From ec3fb47649e5f189c15318d196d6dbf5e8174309 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 26 Dec 2013 20:56:42 -0600 Subject: [PATCH] implied-commit-order: simplify the history by default Imagine a commit history like this: * README: modify heading and fix a typo * README: modify heading * Add a README where the first commit touches some parts that are touched by the second commit, but also some others. In this case, the implied commit order dictates that the third commit is both a direct implied parent as well as an implied *grand* parent. While this is technically correct, it is also confusing, and also unnecessary when all we want to know is what commits are tied by their changes and which ones are not: transitively-implied ancestors are just as implied as directly-implied ones. There is an additional problem with being "too correct" about implied parents: our trick to re-use Git's history visualization tools is to make temporary grafts to imply a different order than the one stored inside the commit objects. Now, the graft handling in Git predates the strbuf support, and therefore it still has a rather serious line length limitation of 1023 bytes per line. Given that the graft lines *must* list non-abbreviated commit names -- the only reason why we have to bother with long names at all -- that leaves space for only (int)(1023 + 1) / (40 + 1) parents, i.e. just one byte shy of allowing 24 parent commits to be listed. While it is rare to see commit histories coming even close to merging 23 branches at once, in the implied commit order that limitation is reached very quickly. Already Git for Windows' fork -- currently only puny 113 non-merge commits on top of upstream's 1.8.5.1 tag (a situation that is unlikely going to change if one trusts history's lessons) -- lists fourty-six implied parents for the final merge! Unfortunately, simplifying the implied order isn't enough: it brings down the (insane) number of 46 parents only to 29, still larger than upstream Git's limit of 23 parents. But it's something!!! While it is conceivable that there are more efficient methods to simplify the implied commit order than to traverse the implied ancestry and throwing out unnecessarily-implied parents, the current method is good enough for now, and in the interest of following the YAGNI principle we'll cross that bridge when -- and if -- we have to. Signed-off-by: Johannes Schindelin --- share/msysGit/implied-commit-order.perl | 56 +++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/share/msysGit/implied-commit-order.perl b/share/msysGit/implied-commit-order.perl index 2f3f8098..e792b1cc 100755 --- a/share/msysGit/implied-commit-order.perl +++ b/share/msysGit/implied-commit-order.perl @@ -36,6 +36,15 @@ sub ordered_set () { }; return { 'add' => $add, + 'remove' => sub ($) { + my $index = $seen{$_[0]}; + return if $index eq undef; + delete $seen{$_[0]}; + splice(@list, $index, 1); + for (; $i <= $#list; $i++) { + $seen{$list[$i]} = $i; + } + }, 'contains' => sub ($) { return $seen{$_[0]} ne undef; }, @@ -362,6 +371,7 @@ sub read_commits ($) { } my $use_gitk = 0; +my $simplify = 1; my $dashdash = -1; for (my $i = 0; $i <= $#ARGV; $i++) { if ($ARGV[$i] eq '--') { @@ -370,13 +380,55 @@ for (my $i = 0; $i <= $#ARGV; $i++) { } if ($ARGV[$i] eq '--gitk') { $use_gitk = 1; - splice(@ARGV, $i, 1); - $i--; + } elsif ($ARGV[$i] eq '--simplify') { + $simplify = 1; + } elsif ($ARGV[$i] eq '--no-simplify') { + $simplify = 0; + } else { + next; } + splice(@ARGV, $i, 1); + $i--; } read_commits(\@ARGV); +sub get_parents ($) { + my $parents = $forward{$_[0]}; + return [] if $parents eq undef; + return $parents->{'list'}(); +} + +# We can simplify the implied history by skipping parents that are ancestors of +# other parents (e.g. if a commit is already an implied grandparent, it does +# not have to be an implied parent, too). + +if ($simplify) { + foreach my $current (@commits) { + my @stack = (); + my %seen = (); + my %parents = (); + foreach my $parent (@{get_parents($current)}) { + $parents{$parent} = 1; + foreach my $grampy (@{get_parents($parent)}) { + push(@stack, $grampy); + } + } + while ($#stack >= 0) { + my $commit = pop(@stack); + next if $seen{$commit} ne undef; + if ($parents{$commit} ne undef) { + $forward{$current}->{'remove'}($commit); + delete $parents{$commit}; + } + foreach my $parent (@{get_parents($commit)}) { + push(@stack, $parent); + } + $seen{$commit} = 1; + } + } +} + # Unfortunately, there is no scriptable way to use the --graph support of `git # log`. # -- 2.11.4.GIT