gitstats: Refactoring, cleanups and documentation
[git-stats.git] / src / git_stats / matcher.py
blob68883ae418b7dc4bc1f77e25ccc6f32a7b14ef57
1 #!/usr/bin/env python
3 import difflib
5 from git_stats import commit
6 from git_stats import diff
8 def calculateDiffSize(difference):
9 """Calculates the true diff size
11 All lines that start with '+\t', '-\t' are counted.
12 Lines that are of size 1 are also counted.
13 """
15 size = 0
17 # Take each line and only count if it is part of the diff
18 for line in difference:
19 if len(line) == 1 or line[1] == '\t' and \
20 (line[0] == '+' or line[0] == '-'):
21 size += 1
23 return size
25 def findMatch(left, right):
26 """Tries to find a match between left and right
28 If it is plausible that there is a match the difference is returend.
29 Otherwise False is returned.
30 """
32 # Get the diff and convert it to a usable format
33 res = difflib.unified_diff(left, right, n=0, lineterm="")
34 res = list(res)
36 # Get some sizes for easy calculation
37 ressize = calculateDiffSize(res)
38 leftsize = len(left)
39 rightsize = len(right)
41 # The difference is larger than either side
42 if ressize > leftsize or ressize > rightsize:
43 return False
45 # The difference is larger than the average
46 if ressize > (leftsize + rightsize)/2:
47 return False
49 # This is probably a match, return the difference
50 return res
52 def match(target):
53 """Tries to find a match between added and removed hunks
55 The diff of the specified commit is retreived and it is
56 split into hunks. The hunks that were added are compared
57 with the hunks that were deleted. If they are similar the
58 pair is deemed a match.
59 """
61 # Retrieve the diff
62 result = commit.getDiff(target)
63 targetDiff = result.split('\n')
65 # And have it parsed, but don't add line numbering to the hunks
66 parsedDiffs = diff.parseCommitDiff(targetDiff, number=False)
68 # To store the matches in
69 result = []
71 # Iterate over all the diffs, e.g., take all pairs
72 for left in parsedDiffs:
73 for right in parsedDiffs:
74 # Don't compare with self, that'd always match
75 if left == right:
76 continue
78 # A removal hunk, not interesting as a left side
79 # Only interesting when comparing with addition
80 if not left.linesAdded:
81 continue
83 # An add hunk, not interesting as a right side
84 # We are interested in this as a left side
85 if not right.linesDeleted:
86 continue
88 # Try to find a match for this pair
89 res = findMatch(left.linesAdded, right.linesDeleted)
91 # There was no match
92 if not res:
93 continue
95 result.append(left.linesAdded, right.linesAdded, res)
97 return result
99 def dispatch(*args):
100 """Dispatches matching related commands
103 # We accept exactly one comment as argument
104 if len(args) != 1:
105 print("Please specify a commit to check")
106 return 128
108 # Get the result
109 result = match(args[0])
111 # And print it
112 for line in result:
113 print(line)