gitstats: Licensed GitStats under the Apache License, 2.0
[git-stats.git] / src / git_stats / matcher.py
blob03b96721c53d292e0c750475ea7b3a9dfe64457d
1 #!/usr/bin/env python
3 import difflib
4 import os
5 import sys
7 from optparse import OptionParser
8 from git_stats import commit
9 from git_stats import diff
10 from git_stats import parse
12 def calculateDiffSize(difference):
13 """Calculates the true diff size
15 All lines that start with '+\t', '-\t' are counted.
16 Lines that are of size 1 are also counted.
17 """
19 size = 0
21 # Take each line and only count if it is part of the diff
22 for line in difference:
23 if (len(line) == 1 or line[1] == '\t' and
24 (line[0] == '+' or line[0] == '-')):
25 size += 1
27 return size
29 def findMatch(left, right):
30 """Tries to find a match between left and right
32 If it is plausible that there is a match the difference is returend.
33 Otherwise False is returned.
34 """
36 # Get the diff and convert it to a usable format
37 res = difflib.unified_diff(left, right, n=0, lineterm="")
38 res = list(res)
40 # Get some sizes for easy calculation
41 ressize = calculateDiffSize(res)
42 leftsize = len(left)
43 rightsize = len(right)
45 # The difference is larger than either side
46 if ressize > leftsize or ressize > rightsize:
47 return False
49 # The difference is larger than the average
50 if ressize > (leftsize + rightsize)/2:
51 return False
53 # This is probably a match, return the difference
54 return res
56 def match(target):
57 """Tries to find a match between added and removed hunks
59 The diff of the specified commit is retreived and it is
60 split into hunks. The hunks that were added are compared
61 with the hunks that were deleted. If they are similar the
62 pair is deemed a match.
63 """
65 # Retrieve the diff
66 result = commit.getDiff(target)
67 targetDiff = result.split('\n')
69 # And have it parsed, but don't add line numbering to the hunks
70 parsedDiffs = diff.parseCommitDiff(targetDiff, number=False)
72 # To store the matches in
73 result = []
75 # Iterate over all the diffs, e.g., take all pairs
76 for left in parsedDiffs:
77 for right in parsedDiffs:
78 # Don't compare with self, that'd always match
79 if left == right:
80 continue
82 # A removal hunk, not interesting as a left side
83 # Only interesting when comparing with addition
84 if not left.linesAdded:
85 continue
87 # An add hunk, not interesting as a right side
88 # We are interested in this as a left side
89 if not right.linesDeleted:
90 continue
92 # Try to find a match for this pair
93 res = findMatch(left.linesAdded, right.linesDeleted)
95 # There was no match
96 if not res:
97 continue
99 result.append(left.linesAdded, right.linesAdded, res)
101 return result
103 def dispatch(*args):
104 """Dispatches matching related commands
107 # Make the help show 'progname commit' instead of just 'progname'
108 progname = os.path.basename(sys.argv[0]) + " matcher"
110 parser = OptionParser(option_class=parse.GitOption, prog=progname)
112 parser.add_option(
113 "-m", "--matcher",
114 type="commit",
115 help="match the chunks of a diff to find code moves")
117 parser.set_default("matcher", "HEAD")
119 (options, args) = parser.parse_args(list(args))
121 if args:
122 parser.error("Please specify a commit to analyse")
124 # Get the result
125 result = match(options.matcher)
127 # And print it
128 for line in result:
129 print(line)