gitstats: Added basic dispatching functionality for diff
[git-stats.git] / src / git_stats / diff.py
blobc0ba023c5f5cdd8ea590cca40002800cb1cb6238
1 #!/usr/bin/env python
3 import collections
4 import os
5 import re
6 import sys
8 from optparse import OptionParser
10 from git_python import Git
12 import commit
14 def diffContains(log, regexp):
15 """Traverses the specified log and searches for the specified regexp.
17 Params:
18 log: The log to search through.
19 regexp: The regexp to match for.
20 """
22 matcher = re.compile(regexp)
24 for line in log:
25 # If this line matches the regexp, accept
26 if matcher.search(line):
27 return True
29 # None of the lines matched, reject
30 return False
32 class fileDiff:
33 """A class to store the information of a file diff in.
35 Fields:
36 afile: The file used as the left side of the diff.
37 bfile: The file used as the right side of the diff.
38 context: The context of this diff.
39 apos: Where the left side of the diff starts.
40 bpos: Where the right side of the diff starts.
41 linesAdded: Which lines were added.
42 linesDeleted: Which lines were deleted.
43 """
45 afile = ""
46 bfile = ""
47 context = ""
49 linesAdded = []
50 linesDeleted = []
52 def __init__(self, diffHeader):
53 for line in diffHeader:
54 if line.startswith("--- "):
55 self.afile = line[4:]
57 if line.startswith("+++ "):
58 self.bfile = line[4:]
60 def __str__(self):
61 a = "Diff for '" + self.afile + "' (" + str(self.astart) + ") against '"
62 b = self.bfile + "' (" + str(self.bstart) + ")" + self.context + "."
64 return a + b + '\n' + str(self.linesAdded) + '\n' + str(self.linesDeleted)
66 def getCommitDiff(commit, ignoreWhitespace=True, noContext=False):
67 """Returns the commit diff for the specified commit
69 Params:
70 commit: The commit to get the diff for.
72 Returns: The commit diff.
73 """
75 git = Git(".")
76 args = ["-p"]
78 if ignoreWhitespace:
79 args.append("-w")
81 if noContext:
82 args.append("-U0")
84 args.append(commit + "^")
85 args.append(commit)
87 result = git.diff_tree(*args)
89 return result
91 def splitDiff(diff):
92 """Splits off the diff in chunks, one for each file
94 Params:
95 diff: The diff to split up.
97 Returns: A list containing a chunk per file.
98 """
100 chunks = []
102 content = []
104 for line in diff:
105 if line.startswith("diff"):
106 if content:
107 chunks.append(content)
109 content = []
111 content.append(line)
113 chunks.append(content)
115 return chunks
117 def splitFileDiff(diff):
118 """Splits a file diff into chunks, one per area.
120 Params:
121 diff: The diff to split up.
123 Returns: The diff header and a list with all the chunks.
126 chunks = []
128 header = []
129 content = []
131 start = 0
133 for line in diff:
134 if line.startswith("@@"):
135 break
137 header.append(line)
138 start += 1
140 for line in diff[start:]:
141 if line.startswith("@@"):
142 if content:
143 chunks.append(content)
145 content = []
147 content.append(line)
149 chunks.append(content)
151 return header, chunks
153 def parseFileDiff(header, chunk):
154 """Takes a file diff and returns the parsed result
156 Params:
157 header: The diff header.
158 chunk: The chunk to parse.
160 Returns: A fileDiff containing the parsed diff.
163 print(header)
164 for line in chunk:
165 print(line)
167 result = fileDiff(header)
169 deleted = []
170 added = []
172 # Find out where the context line ends, skipping the first '@@'
173 to = chunk[0].find("@@", 2)
175 # Get the context, skipping the first and last '@@"
176 context = chunk[0][3:to]
178 # Split it at the spaces and store the positions, ignoring '-' and '+'
179 split = context.split(' ')
180 a = split[0][1:]
181 b = split[1][1:]
183 apos = int(a)
184 bpos = int(b)
186 result.astart = apos
187 result.bstart = bpos
189 # Start at the first line (skip the context line)
190 for line in chunk[1:]:
191 if line.startswith("-"):
192 deleted.append((apos, line[1:]))
193 apos += 1
195 if line.startswith("+"):
196 added.append((bpos, line[1:]))
197 bpos += 1
199 result.linesDeleted = deleted
200 result.linesAdded = added
202 return result
204 def parseCommitDiff(diff):
205 """Takes a commit diff and returns the parsed result
207 Params:
208 diff: The diff to parse.
210 Returns: A parsedDiff instance containing the parsed diff.
213 result = []
215 # Split the diff in file sized chunks
216 chunks = splitDiff(diff)
218 # Loop over all the file diffs and parse them
219 for chunk in chunks:
220 header, filechunks = splitFileDiff(chunk)
222 # Loop over all the chunks and parse them
223 for filechunk in filechunks:
224 # Get the result and store it
225 fd = parseFileDiff(header, filechunk)
226 result.append(fd)
228 return result
230 def compareFileDiffs(adiff, bdiff, invert=False):
231 """Compares two fileDiffs and returns whether they are equal
233 Args:
234 adiff: The first fileDiff.
235 bdiff: The second fileDiff.
236 invert: Whether to compare linesAdded with linesDeleted.
238 Returns: Whether the two diffs are equal.
241 if invert:
242 if not adiff.linesAdded == bdiff.linesDeleted:
243 return False
244 if not adiff.linesDeleted == bdiff.linesAdded:
245 return False
246 else:
247 if not adiff.linesAdded == bdiff.linesAdded:
248 return False
249 if not adiff.linesDeleted == bdiff.linesDeleted:
250 return False
252 # Checked everything, accept
253 return True
255 def compareDiffs(adiffs, bdiffs, compareChanges=False, invert=False):
256 """Compares the two diffs and returns whether they are equal
258 Args:
259 adiffs: The first set of diffs.
260 bdiffs: The second set of diffs.
261 compareChanges: Whether to compare not only which lines changed.
262 invert: When compareChanges, invert the comparison of deleted/added.
264 Returns: Whether the diffs are equal.
267 for fd in adiffs:
268 # Look for a match in the bdiffs
269 for theirs in bdiffs:
271 # Looks like we have a match
272 if theirs.astart <= fd.astart and theirs.bstart >= fd.bstart:
273 # If we want to compare changes, do they match
274 if compareChanges:
275 # Reject if they are inequal
276 if not compareFileDiffs(fd, theirs, invert):
277 return False
279 # It was indeed a match, stop searching through bdiffs
280 break
282 else:
283 # Went through all items in bdiffs and couldn't find a matching pair
284 return False
286 # All items in adiffs checked, all had a matching pair, accept.
287 return True
289 def difference(adiffs, bdiffs, compareChanges=False, invert=False):
290 """Calculates the difference between two diffs and returns it
292 Params:
293 adiffs: The first set of diffs.
294 bdiffs: The second set of diffs.
295 compareChanges: Whether to compare not only which lines changed.
296 invert: When compareChanges, invert the comparison of deleted/added.
298 Returns: Which keys are missing and the difference between both diffs.
301 afiles = collections.defaultdict(list)
302 bfiles = collections.defaultdict(list)
304 missing = []
305 difference = []
307 for fd in adiffs:
308 afiles[(fd.afile, fd.bfile)].append(fd)
310 for fd in bdiffs:
311 bfiles[(fd.afile, fd.bfile)].append(fd)
313 for key, fds in afiles.iteritems():
314 if not bfiles.has_key(key):
315 missing.append(key)
316 continue
318 theirs = bfiles[key]
320 if not compareDiffs(fds, theirs, compareChanges, invert):
321 difference.append((fds, theirs))
323 return missing, difference
325 def commitdiffEqual(original, potentialMatch, threshold=0, compareChanges=True, invert=False):
326 """Tests whether a commit matches another by a specified threshhold.
328 Params:
329 original: The original commit thati s to be checked.
330 potentialMatch: The commit that might match original.
331 threshhold: The threshold for how close they have to match.
332 compareChanges: Whether to compare the changes made or just changes lines.
333 invert: Whether to compare deletions with insertions instead.
335 Returns: Whether the commit diffs are equal.
338 git = Git(".")
340 print("threshold " + str(threshold))
342 # Get the diff, but ignore whitespace
343 result = getCommitDiff(original, noContext=True)
345 diffOriginal = result.split('\n')
347 # Get the diff but ignore whitespace
348 result = getCommitDiff(potentialMatch, noContext=True)
350 diffPotentialMatch = result.split('\n')
352 parsedOriginal = parseCommitDiff(diffOriginal[:-1])
353 parsedPotentialMatch = parseCommitDiff(diffPotentialMatch[:-1])
355 missing, diff = difference(parsedOriginal, parsedPotentialMatch, compareChanges=compareChanges, invert=invert)
357 if missing:
358 print("Missing the following keys:")
359 for key in missing:
360 print(key)
362 return False
364 if diff:
365 print("Found the following differences:")
366 for ours, theirs in diff:
367 print("---")
368 for fd in ours:
369 print(fd)
370 print("\nDoes not match:\n")
371 for fd in theirs:
372 print(fd)
373 print("----")
375 return False
377 return True
379 def dispatch(*args):
380 """Dispatches diff related commands
383 progname = os.path.basename(sys.argv[0]) + " diff"
385 parser = OptionParser(option_class=commit.CommitOption, prog=progname)
387 parser.add_option(
388 "-e", "--equals",
389 type="commit",
390 nargs=2,
391 help="show whether the two diffs for the specified commits match",
392 metavar="PATHS")
394 parser.add_option(
395 "-t", "--threshold",
396 type="int",
397 help="the threshold for comparison")
399 parser.add_option(
400 "-n", "--no-compare",
401 action="store_false",
402 dest="compare",
403 help="do not compare the diff content, just look at which lines were touched")
405 parser.add_option(
406 "-i", "--invert",
407 action="store_true",
408 help="compare additions with deletions instead of with additions, and vise versa")
410 parser.set_default("threshold", 0)
411 parser.set_default("compare", True)
412 parser.set_default("invert", False)
414 (options, args) = parser.parse_args(list(args))
416 if options.equals:
417 result = commitdiffEqual( threshold=options.threshold,
418 compareChanges=options.compare,
419 invert=options.invert,
420 *options.equals)