From 00a3c465c01e335317857e47f02fdeb93c79a052 Mon Sep 17 00:00:00 2001 From: Sverre Rabbelier Date: Wed, 16 Jul 2008 21:53:36 +0200 Subject: [PATCH] Refactoring, cleanups and documentation Checked all files and added documentation where needed. Also added comments where appropriate as well as some minor cleanups and refactoring. --- src/git_stats/author.py | 2 +- src/git_stats/branch.py | 53 ++++++++++++++++++++++------ src/git_stats/bug.py | 73 ++++++++++++-------------------------- src/git_stats/commit.py | 93 +++++++++++++++++++++---------------------------- src/git_stats/config.py | 64 ++++++++++++++++++++++++++++++---- src/git_stats/diff.py | 76 +++++++++++++++++++++++++++------------- src/git_stats/index.py | 5 +-- src/git_stats/parse.py | 24 +++++++++---- 8 files changed, 234 insertions(+), 156 deletions(-) diff --git a/src/git_stats/author.py b/src/git_stats/author.py index b8f89f9..ae838d3 100644 --- a/src/git_stats/author.py +++ b/src/git_stats/author.py @@ -246,7 +246,7 @@ def _checkOptions(parser, options): opts = [options.aggregate, options.developer, options.file, options.everyone] - if not parse.isUnique(opts): + if not parse.isUnique(opts, atLeastOne=True): parser.error("Please choose exactly one mode") if options.file: diff --git a/src/git_stats/branch.py b/src/git_stats/branch.py index ddf865c..2b93f91 100644 --- a/src/git_stats/branch.py +++ b/src/git_stats/branch.py @@ -119,35 +119,44 @@ def _calculateDilution(target, start, parent_lists, global_memory, ignore=[]): if dilution <= current.dilution: continue + # Nor when already checked in a better branch if global_memory.has_key(current.commit): dilution = global_memory[current.commit] if dilution < current.dilution: continue + # Remember this commit memory[current.commit] = current.dilution global_memory[current.commit] = current.dilution - parents = parent_lists[current.commit] + # Defaulting to None in the case we only have a partial parent listing + parents = parent_lists.get(current.commit, None) # Root commit, we can't go any further if not parents: continue + # Check all the parents and give only the first one 0 extra dilution for parent in parents: if parent == parents[0]: newdilution = current.dilution else: newdilution = current.dilution + 1 + # Create the next target next = Metric() next.commit = parent next.branch = current.branch next.dilution = newdilution + + # Add it to the stack to be examined stack.append(next) + # Nothing new was found here if not metrics: return [] + # Return only the best metric min, champs = _bestMetric(metrics) return champs @@ -161,8 +170,19 @@ def debugPrint(debug, text): if debug: print(text) -def getBranchMap(branches, cut_off_prefix=True): - """ +def getBranchMap(branches): + """Creates a branchMap from the specified branches and returns it + + The hash of each branch is retreived and stored in a + dictionary with the condition that all values in the map + are unique. As such if multiple branches map to one hash, + only one of those branches (the first one) will be in the + returned map. + + Args: + branches: A list of branches that should be in the map. + + Returns: A dictionary with branch names and their hashes. """ git = Repo(".").git @@ -170,9 +190,7 @@ def getBranchMap(branches, cut_off_prefix=True): branch_map = {} for branch in branches: - if cut_off_prefix: - branch = branch[2:] - + # Retreive the hash of each branch branch_hash = git.rev_parse(branch) if branch_hash in branch_map.values(): @@ -183,7 +201,16 @@ def getBranchMap(branches, cut_off_prefix=True): return branch_map def getParentList(commits): - """ + """Retreives the parents of the specified commits and their parents + + The parents for all the specified commits, and for the + parents of those commits, are retreived and stored in + a dictionary. + + Args: + commits: A list of commits for which the parents should be retreived. + + Returns: A dictionary with commits and their parents. """ git = Repo(".").git @@ -195,13 +222,17 @@ def getParentList(commits): parent_list = {} for item in data: + # Each line contains the commit first, and then all it's parents splitline = item.split(' ') first = splitline[0] + # This commit has no parents at all if len(splitline) == 1: parent_list[first] = [] + # This commits has no parents either elif splitline[1] == '': parent_list[first] = [] + # Store all the parents else: parent_list[first] = splitline[1:] @@ -234,6 +265,7 @@ def belongsTo(commit, metrics = [] + # Retreive the parents of the target commit and ignore them if ignore_parents: result = git.rev_list(commit) ignore = set(result.split('\n')) @@ -244,9 +276,11 @@ def belongsTo(commit, memory = {} + # Collect the metric for all branches for branch_name, branch_hash in branch_map.iteritems(): debugPrint(debug, branch_name) + # Create the first metric metric = Metric() metric.branch = branch_hash metric.commit = branch_hash @@ -366,10 +400,7 @@ def dispatch(*args): (options, args) = parser.parse_args(list(args)) if options.belongs_to: - git = Repo(".").git - - result = git.branch("-a", "--contains", options.belongs_to) - branches = result.split('\n') + branches = branchList(options.belongs_to, includeRemotes=options.remotes) branch_map = getBranchMap(branches) parent_list = getParentList(branch_map.values()) diff --git a/src/git_stats/bug.py b/src/git_stats/bug.py index 514eecd..7b44e74 100644 --- a/src/git_stats/bug.py +++ b/src/git_stats/bug.py @@ -12,6 +12,19 @@ from git_stats import config from git_stats import diff from git_stats import parse +options_list = [ + "branch_filter", + "branch_filter_rating", + "debug", + "diff_filter", + "diff_filter_rating", + "msg_filter", + "msg_filter_rating", + "ignore_parents", + "limit", + "revert_rating", + ] + class CommitInfo: """Contains information about a specific commit """ @@ -114,7 +127,7 @@ class Memory(): format="%(refname)") branches = result.split('\n') - self.branch_map = branch.getBranchMap(branches, cut_off_prefix=False) + self.branch_map = branch.getBranchMap(branches) self.parent_list = branch.getParentList(self.branch_map.values()) for name, hash in self.branch_map.iteritems(): @@ -124,16 +137,6 @@ class Memory(): name = name[pos:] self.pretty_names[hash] = name -class Options(): - """Options class - """ - - def __init__(self): - """Creates a new Options instance. - """ - - self.branch_filter = "" - def aggregateType(options): """Uses the specified options to get type information for many commits """ @@ -142,7 +145,7 @@ def aggregateType(options): result = git.rev_list(options.start_from) commits = result.split('\n') - options = extractOptions(options) + options = config.extractOptions(options, options_list) # Unless overridden by the user ignore parents for large repositories if len(commits) > 1000 and options.ignore_parents == None: @@ -194,8 +197,11 @@ def determineType(target, memory, options): result = CommitInfo(target, options) - filter_branches = options.branch_filter.split(':') - match = set(filter_branches).intersection(set(branches)) + if not options.branch_filter: + match = None + else: + filter_branches = options.branch_filter.split(':') + match = set(filter_branches).intersection(set(branches)) if match: result.branches = match @@ -211,41 +217,6 @@ def determineType(target, memory, options): return result -def extractOptions(options): - """Extracts options and returns the result - - The options are extracted from the specified options and - from the 'config' file. - """ - - opts = config.read() - - result = Options() - - options_list = ["branch_filter", - "branch_filter_rating", - "debug", - "diff_filter", - "diff_filter_rating", - "msg_filter", - "msg_filter_rating", - "ignore_parents", - "limit", - "revert_rating"] - - for opt in options_list: - if getattr(options, opt, None) == None: - val = opts.get(opt, None) - else: - val = getattr(options, opt) - - if val == None and getattr(result, opt, None) != None: - continue - - setattr(result, opt, val) - - return result - def _checkOptions(parser, options): """Checks the specified options and uses the parser to indicate errors @@ -256,7 +227,7 @@ def _checkOptions(parser, options): opts = [options.aggregate, options.type] - if not parse.isUnique(opts): + if not parse.isUnique(opts, atLeastOne=True): parser.error("Please choose exactly one mode") def dispatch(*args): @@ -325,7 +296,7 @@ def dispatch(*args): if options.aggregate: result = aggregateType(options) elif options.type: - kwargs = extractOptions(options) + kwargs = config.extractOptions(options, options_list) memory = Memory() stats = determineType(options.type, memory, kwargs) result = [str(stats)] diff --git a/src/git_stats/commit.py b/src/git_stats/commit.py index 799201a..3a1390b 100644 --- a/src/git_stats/commit.py +++ b/src/git_stats/commit.py @@ -14,6 +14,9 @@ GIT_STATS_PRETTY_PRINT = os.environ.get("GIT_STATS_PRETTY_PRINT", "full") def _logContains(log, regexp): """Traverses the specified log and searches for the specified regexp. + If the first line of the log starts with 'diff' only + lines that start with '+' or '-' will be examined. + Params: log: The log to search through. regexp: The regexp to match for. @@ -36,20 +39,24 @@ def _logContains(log, regexp): return False def prettyPrint(commits): - """Pretty prints the specified commits with git log. + """Pretty prints the specified commits with git log + + Respects the GIT_STATS_PRETTY_PRINT environment variable. + Supported are 'full' and 'oneline', defaults to 'full'. """ git = Repo(".").git if GIT_STATS_PRETTY_PRINT == "full": opts = ["-1", "--name-only"] + # Enable raw output for the trailing newline, which is desired in this case kwargs = { "with_raw_output" : True } elif GIT_STATS_PRETTY_PRINT == "oneline": opts = ["-1", "--pretty=oneline"] kwargs = {} for commit in commits: - # Enable raw output for the trailing newline, which is desired in this case + # Copy the opts so that we don't have to kick off the commit later on thisopts = opts[:] thisopts.append(commit) result = git.log(*thisopts, **kwargs) @@ -93,17 +100,17 @@ def pathsTouchedBy(commit, ignoreAddedFiles=True): git = Repo(".").git - # Enable raw output, we use the last empty line as delimiter - result = git.diff_tree("--name-status", "--no-commit-id", "-r", commit, with_raw_output=True) - + result = git.diff_tree("--name-status", "--no-commit-id", "-r", commit) log = result.split('\n') paths = [] for line in log: + # Ignore empty lines if len(line.lstrip()) == 0: continue + # Split the status part and the file name splitline = line.split('\t') if splitline[0] == 'A' and ignoreAddedFiles: continue @@ -141,11 +148,10 @@ def commitsThatTouched(paths, relative=False, touched_files={}): return touched def commitTouched(commit): - """Shows what commit touched the same files as the specified commit. + """Shows what commit touched the same files as the specified commit """ touched = pathsTouchedBy(commit, True) - touched = commitsThatTouched(touched) prettyPrint(touched) @@ -153,8 +159,8 @@ def commitTouched(commit): def commitmsgMatches(commit, regexp): """Returns whether the specified commit matches the specified regexp. - The commit message for the specified commit is searched, if a - matching line is found, True is returned. Otherwise False is returned. + The commit message for the specified commit is searched, + returns true iff amatching line is found. Params: commit: The commit whose commit msg is to be searched. @@ -166,37 +172,14 @@ def commitmsgMatches(commit, regexp): git = Repo(".").git result = git.cat_file("-p", commit) - log = result.split('\n') - # Skip the first lines, the commit msg doesn't begin till after these - return _logContains(log[4:], regexp) - -def hasParent(commit): - """Checks if the specified commit has a parent - - Args: - commit: The commitish to check - - Returns: False if the commitish doesn't have a parent, - or if the committish is not a commit at all. - """ - - git = Repo(".").git - - hash = git.rev_parse("--verify", commit, with_exceptions=False) + # The commit msg begin after the first empty line + pos = log.index("") + pos += 1 + log = log[pos:] - if not hash: - return False - - hash = hash + "^" - - parent = git.rev_parse("--verify", hash, with_exceptions=False) - - if not parent: - return False - - return True + return _logContains(log, regexp) def getDiff(commit, ignoreWhitespace=True, noContext=False): """Returns the commit diff for the specified commit @@ -221,8 +204,10 @@ def getDiff(commit, ignoreWhitespace=True, noContext=False): args.append(commit) args.append("--") + # Don't look before we leap, just try with 'hash^' result = git.diff_tree(with_exceptions=False, *args) + # We missed, retry with '--root' instead if not result: args[-3] = "--root" result = git.diff_tree(*args) @@ -250,12 +235,12 @@ def commitdiffMatches(commit, regexp, raw_diffs={}): return _logContains(diff, regexp) -def commitList(logFilter=None, diffFilter=None): +def commitList(log_filter=None, diff_filter=None): """Returns a list of all commits that match the specified filters Args: - logFilter: If specified, only commits whose log match are included. - diffFilter: If specified, only commits whose diff match are included. + log_filter: If specified, only commits whose log match are included. + diff_filter: If specified, only commits whose diff match are included. """ git = Repo(".").git @@ -263,26 +248,25 @@ def commitList(logFilter=None, diffFilter=None): result = git.rev_list("HEAD") commits = result.split('\n') - if not logFilter and not diffFilter: + if not log_filter and not diff_filter: return commits result = [] for commit in commits: - addIt = True + add_it = True - if logFilter: - if not commitmsgMatches(commit, logFilter): - addIt = False + if log_filter: + if not commitmsgMatches(commit, log_filter): + add_it = False - if diffFilter: - if not commitdiffMatches(commit, diffFilter): - addIt = False + if diff_filter: + if not commitdiffMatches(commit, diff_filter): + add_it = False - if addIt: + if add_it: result.append(commit) - return result def _checkOptions(parser, options): @@ -291,19 +275,19 @@ def _checkOptions(parser, options): Args: parser: The parser to use to signal when the options are bad. options: The options to check. - args: The arguments to check, if used. """ opts = [options.touched, options.log_contains, options.diff_contains, options.all] - if not parse.isUnique(opts, True): + if not parse.isUnique(opts, atLeastOne=True): parser.error("Please choose exactly one mode") if not options.touched: if options.relative: parser.error("-r makes no sense without -t") else: + # Check if the specified file is valid try: parse.check_file(value=options.touched, relative=options.relative) except OptionValueError, e: @@ -313,6 +297,7 @@ def dispatch(*args): """Dispatches commit related commands """ + # Make the help show 'progname commit' instead of just 'progname' progname = os.path.basename(sys.argv[0]) + " commit" parser = OptionParser(option_class=parse.GitOption, prog=progname) @@ -351,8 +336,8 @@ def dispatch(*args): result = commitsThatTouched([options.touched], options.relative) if options.diff_contains or options.log_contains or options.all: - result = commitList(diffFilter=options.diff_contains, - logFilter=options.log_contains) + result = commitList(diff_filter=options.diff_contains, + log_filter=options.log_contains) prettyPrint(result) diff --git a/src/git_stats/config.py b/src/git_stats/config.py index 75724dc..4e2cfe6 100644 --- a/src/git_stats/config.py +++ b/src/git_stats/config.py @@ -1,10 +1,15 @@ #!/usr/bin/env python - import os from git import Repo +class Options(): + """Options class + """ + + pass + def readAll(convert_camel_case=True): """Reads all config files and returns the result @@ -30,12 +35,14 @@ def readAll(convert_camel_case=True): result = {} + # Check all the configuration files and parse them for path in paths: if not os.path.isfile(path): continue config = read(path, convert_camel_case) + # Add the found keys to our result for key, value in config.iteritems(): result[key] = value @@ -43,6 +50,10 @@ def readAll(convert_camel_case=True): def _readVerse(verse, config, convert_camel_case): """Reads in a verse from a config file and stores the result in config + + verse: The lines for the current verse, not including the header. + config: A dictionary to store the found configurations in. + convert_camel_case: Whether to convert camelCase words to dashed_form. """ for line in verse: @@ -64,6 +75,7 @@ def _readVerse(verse, config, convert_camel_case): except ValueError: pass + # Try to convert boolean values if value == "False": value = False @@ -73,20 +85,21 @@ def _readVerse(verse, config, convert_camel_case): if value == "None": value = None + # If desired, convert camelCase to dashed_form if convert_camel_case and key.lower() != key: - res = "" + dashed_form = "" for c in key: if c.isupper(): - res += '_' + dashed_form += '_' - res += c.lower() + dashed_form += c.lower() - key = res + key = dashed_form + # Store the parsed and converted result config[key] = value - def read(path, convert_camel_case=True): """Reads in the specified file @@ -121,11 +134,44 @@ def read(path, convert_camel_case=True): pos += 1 lines = lines[pos:] + # Read in this verse, updates config with the found configurations _readVerse(lines, config, convert_camel_case) return config -if __name__ == '__main__': +def extractOptions(options, options_list): + """Extracts options and returns the result + + The options are extracted from the specified options and + from the 'config' file. + """ + + opts = readAll() + + result = Options() + + for opt in options_list: + if getattr(options, opt, None) == None: + val = opts.get(opt, None) + else: + val = getattr(options, opt) + + if val == None and getattr(result, opt, None) != None: + continue + + setattr(result, opt, val) + + return result + +def main(args): + """Parses all config files and prints the result + + Runs once with convert_camel_case on, and once with it off. + + Params: + args: Not used + """ + result = readAll() print("convert_camel_case is ON:") @@ -139,3 +185,7 @@ if __name__ == '__main__': for key, value in result.iteritems(): print(str(key) + ": " + str(value)) +if __name__ == '__main__': + import sys + main(sys.argv) + diff --git a/src/git_stats/diff.py b/src/git_stats/diff.py index ab97158..1f3b913 100644 --- a/src/git_stats/diff.py +++ b/src/git_stats/diff.py @@ -43,7 +43,7 @@ class fileDiff: return a + b + '\n' + str(self.linesAdded) + '\n' + str(self.linesDeleted) -def splitDiff(diff): +def _splitDiff(diff): """Splits off the diff in chunks, one for each file Params: @@ -54,18 +54,21 @@ def splitDiff(diff): chunks = [] - content = [] + chunk = [] for line in diff: + # Start of a new chunk, add the old one if there is one if line.startswith("diff"): - if content: - chunks.append(content) + if chunk: + chunks.append(chunk) - content = [] + chunk = [] - content.append(line) + chunk.append(line) - chunks.append(content) + # Add the last one + if chunk: + chunks.append(chunk) return chunks @@ -81,27 +84,31 @@ def _splitFileDiff(diff): chunks = [] header = [] - content = [] + chunk = [] - start = 0 - - for line in diff: + # Find out where the header stops + for i, line in enumerate(diff): if line.startswith("@@"): + # Start at the first hunk, which is this line + start = i break header.append(line) - start += 1 + # Chop off the header and split up the chunks for line in diff[start:]: + # Start a new chunk and add the old one if there is one if line.startswith("@@"): - if content: - chunks.append(content) + if chunk: + chunks.append(chunk) - content = [] + chunk = [] - content.append(line) + chunk.append(line) - chunks.append(content) + # Add the last one + if chunk: + chunks.append(chunk) return header, chunks @@ -118,6 +125,7 @@ def _parseFileDiff(header, chunk, number=True): result = fileDiff(header) + # Empty diff, no need to do anything if not chunk: return result @@ -175,7 +183,7 @@ def parseCommitDiff(diff, number=True): result = [] # Split the diff in file sized chunks - chunks = splitDiff(diff) + chunks = _splitDiff(diff) # Loop over all the file diffs and parse them for chunk in chunks: @@ -201,13 +209,17 @@ def _compareFileDiffs(adiff, bdiff, invert=False): """ if invert: + # Cross compare added with deleted if not adiff.linesAdded == bdiff.linesDeleted: return False + # And vise versa if not adiff.linesDeleted == bdiff.linesAdded: return False else: + # Do a normal comparison between added lines if not adiff.linesAdded == bdiff.linesAdded: return False + # And between the deleted lines if not adiff.linesDeleted == bdiff.linesDeleted: return False @@ -280,26 +292,31 @@ def _difference(adiffs, bdiffs, compareChanges=False, invert=False): missing = [] difference = [] + # Group the diffs by file pair for fd in adiffs: afiles[(fd.afile, fd.bfile)].append(fd) + # Group the diffs by file pair for fd in bdiffs: bfiles[(fd.afile, fd.bfile)].append(fd) + # Examine all the diffs and see if they match for key, fds in afiles.iteritems(): + # There is no counterpart for this file, record that if not bfiles.has_key(key): missing.append(key) continue theirs = bfiles[key] + # Compare the diffs, if not equal record that if not _compareDiffs(fds, theirs, compareChanges, invert): difference.append((fds, theirs)) return missing, difference def _getParsedDiff(target, raw_diffs, parsed_diffs): - """Retreives a parsed commit diff for the specified file + """Retrieves a parsed commit diff for the specified file Args: target: The commit to get the diff for. @@ -344,12 +361,17 @@ def commitdiffEqual(original, Returns: Whether the commit diffs are equal. """ - - parsedOriginal = _getParsedDiff(original, raw_diffs, parsed_diffs) - parsedPotentialMatch = _getParsedDiff(potentialMatch, - raw_diffs, - parsed_diffs) + # Retrieved the parsed diffs + parsedOriginal = _getParsedDiff( original, + raw_diffs, + parsed_diffs) + + parsedPotentialMatch = _getParsedDiff( potentialMatch, + raw_diffs, + parsed_diffs) + + # Get the difference between both missing, diff = _difference(parsedOriginal, parsedPotentialMatch, compareChanges=compareChanges, @@ -372,6 +394,8 @@ def commitdiffEqual(original, print(fd) print("----") + # TODO use threshhold + # Unequal if something missing, or there is a difference return not (missing or diff) @@ -405,17 +429,21 @@ def findReverts(potentialRevert, touched_files: A dictionary with files and the commits that touched them. """ + # Find out what paths this commit touched paths = commit.pathsTouchedBy(potentialRevert) # If no paths were touched, there can't be any reverts if not paths: return [] + # Retrieve all commits that touched the same paths commits = commit.commitsThatTouched(paths, touched_files=touched_files) result = [] + # Check all the found commits to see if they are a revert for aCommit in commits: + # Don't compare to self if aCommit == potentialRevert: continue diff --git a/src/git_stats/index.py b/src/git_stats/index.py index 7e7c9c6..9fca3d0 100644 --- a/src/git_stats/index.py +++ b/src/git_stats/index.py @@ -54,9 +54,11 @@ def touched(showAll): staged = stagedChanges(not showAll) + # There were no starged changes, return 'None' explicitly if not staged: return None + # Find all commits that touched the same files and return them touched = commit.commitsThatTouched(staged) return touched @@ -84,8 +86,7 @@ def dispatch(*args): (options, args) = parser.parse_args(list(args)) - if options.touched: - result = touched(options.all) + result = touched(options.all) if not result: msg = "No changes staged" diff --git a/src/git_stats/parse.py b/src/git_stats/parse.py index e56c000..1a7d81b 100644 --- a/src/git_stats/parse.py +++ b/src/git_stats/parse.py @@ -6,14 +6,18 @@ from git import Repo from optparse import OptionParser, Option, OptionValueError def _check_file(option, opt, value): - """Checks whether value is a valid file and returns it + """Checks whether value is a valid file and returns it if so """ return check_file(value) def check_file(value, relative=False): - """Check whether value is a valid file and returns it + """Check whether value is a valid file and returns it if so + + Args: + value: The value to check. + relative: Whether to treat value as a path relative to the working dir. """ git = Repo(".").git @@ -31,7 +35,7 @@ def check_file(value, relative=False): return splitfile[0] def _check_commit(option, opt, value): - """Checks whether value is a valid rev and returns its hash + """Checks whether value is a valid refspec and returns its hash if so """ git = Repo(".").git @@ -45,10 +49,10 @@ def _check_commit(option, opt, value): def _check_bool(option, opt, value): """Checks whether a value is a boolean and returns it's value - Understands about 'True', 'False', and 'None. + Understands about 'True', 'False', and 'None'. """ - if value== "True": + if value == "True": return True if value == "False": @@ -60,11 +64,19 @@ def _check_bool(option, opt, value): raise OptionValueError("Not a boolean: '" + value + "'") class GitOption(Option): - """This parser understands a new type, "commit" + """This parser understands three new types: + commit: A commit must specify exactly 1 commit in the current repository. + file: A file must specify exactly 1 tracked file in the current repository. + bool: A bool must be 'True', 'False', or 'None'. """ + # Define the new types TYPES = Option.TYPES + ("commit",) + ("file",) + ("bool",) + + # Copy the type checker list so that we don't keep the default ones TYPE_CHECKER = copy.copy(Option.TYPE_CHECKER) + + # Add our own options TYPE_CHECKER["commit"] = _check_commit TYPE_CHECKER["file"] = _check_file TYPE_CHECKER["bool"] = _check_bool -- 2.11.4.GIT