From cbf2408ceade3e8f870dda7ec5692fbd9a3bd439 Mon Sep 17 00:00:00 2001 From: Lea Wiemann Date: Sun, 22 Jun 2008 00:26:13 +0200 Subject: [PATCH] gitweb: use new Git::Repo API, and add optional caching Gitweb now uses the Git::Repo API; this change is behavior-preserving, except for slightly more aggressive error handling; see below. This patch also adds an optional caching layer for caching repository data using any Cache::Cache-compatible caching engine (typically Cache::Memcached). Larger cacheable items, like blobs, snapshots or diffs, can be cached on disk. Other minor changes: - Gitweb would previously accept invalid input and either (a) display nothing, (b) display an obscure error message, or (c) proceed as normal since the parameter happens to be unused in the particular code path used. This has changed in that gitweb will check for parameter correctness more aggressively, and display meaningful error messages. This change is only relevant if you manually edit gitweb's CGI parameters, since gitweb only generates valid links. - Empty projects: - Only display summary link for empty projects in project list to avoid broken links (yielding 404). - Slim down summary page for empty projects to avoid some broken links and unnecessary vertical space. - Sort empty projects at the bottom of the project list when sorting by last change. - Add test for empty projects to t9503 (the Mechanize test), now that there no broken links anymore. Signed-off-by: Lea Wiemann --- Makefile | 25 +- gitweb/INSTALL | 11 +- gitweb/README | 23 + gitweb/gitweb.css | 16 + gitweb/gitweb.perl | 1372 ++++++++++++++++++++------------ t/t9500-gitweb-standalone-no-errors.sh | 4 +- t/t9503-gitweb-Mechanize.sh | 10 + t/t9503/test.pl | 19 +- 8 files changed, 924 insertions(+), 556 deletions(-) diff --git a/Makefile b/Makefile index 161789a6cf..164e20a04a 100644 --- a/Makefile +++ b/Makefile @@ -1138,25 +1138,28 @@ $(patsubst %.perl,%,$(SCRIPT_PERL)): perl/perl.mak perl/perl.mak: GIT-CFLAGS perl/Makefile perl/Makefile.PL $(QUIET_SUBDIR0)perl $(QUIET_SUBDIR1) PERL_PATH='$(PERL_PATH_SQ)' prefix='$(prefix_SQ)' $(@F) +PERL_USE_LIB_REWRITE = \ + -e '1{' \ + -e ' s|\#!.*perl|\#!$(PERL_PATH_SQ)|' \ + -e ' h' \ + -e ' s=.*=use lib (split(/:/, \$$ENV{GITPERLLIB} || \"@@INSTLIBDIR@@\"));=' \ + -e ' H' \ + -e ' x' \ + -e '}' \ + -e 's|@@INSTLIBDIR@@|'"$$INSTLIBDIR"'|g' \ + -e 's/@@GIT_VERSION@@/$(GIT_VERSION)/g' + $(patsubst %.perl,%,$(SCRIPT_PERL)): % : %.perl $(QUIET_GEN)$(RM) $@ $@+ && \ INSTLIBDIR=`MAKEFLAGS= $(MAKE) -C perl -s --no-print-directory instlibdir` && \ - sed -e '1{' \ - -e ' s|#!.*perl|#!$(PERL_PATH_SQ)|' \ - -e ' h' \ - -e ' s=.*=use lib (split(/:/, $$ENV{GITPERLLIB} || "@@INSTLIBDIR@@"));=' \ - -e ' H' \ - -e ' x' \ - -e '}' \ - -e 's|@@INSTLIBDIR@@|'"$$INSTLIBDIR"'|g' \ - -e 's/@@GIT_VERSION@@/$(GIT_VERSION)/g' \ - $@.perl >$@+ && \ + sed $(PERL_USE_LIB_REWRITE) $@.perl >$@+ && \ chmod +x $@+ && \ mv $@+ $@ gitweb/gitweb.cgi: gitweb/gitweb.perl $(QUIET_GEN)$(RM) $@ $@+ && \ - sed -e '1s|#!.*perl|#!$(PERL_PATH_SQ)|' \ + INSTLIBDIR=`MAKEFLAGS= $(MAKE) -C perl -s --no-print-directory instlibdir` && \ + sed $(PERL_USE_LIB_REWRITE) \ -e 's|++GIT_VERSION++|$(GIT_VERSION)|g' \ -e 's|++GIT_BINDIR++|$(bindir)|g' \ -e 's|++GITWEB_CONFIG++|$(GITWEB_CONFIG)|g' \ diff --git a/gitweb/INSTALL b/gitweb/INSTALL index 26967e201a..c4ab593b5f 100644 --- a/gitweb/INSTALL +++ b/gitweb/INSTALL @@ -6,8 +6,15 @@ First you have to generate gitweb.cgi from gitweb.perl using gitweb.css, git-logo.png and git-favicon.png) to their destination. For example if git was (or is) installed with /usr prefix, you can do - $ make prefix=/usr gitweb/gitweb.cgi ;# as yourself - # cp gitweb/git* /var/www/cgi-bin/ ;# as root + $ make prefix=/usr gitweb/gitweb.cgi ;# as yourself + # cp gitweb/git* /var/www/cgi-bin/ ;# as root + +If the Git Perl package isn't already installed system-wide, you also +need to copy the perl/blib/lib/Git directory to your cgi-bin +directory: + + $ make prefix=/usr -C perl ;# as yourself + # cp -r perl/blib/lib/Git /var/www/cgi-bin/ ;# as root Alternatively you can use autoconf generated ./configure script to set up path to git binaries (via config.mak.autogen), so you can write diff --git a/gitweb/README b/gitweb/README index 825162a0b6..9f0ba8a7d9 100644 --- a/gitweb/README +++ b/gitweb/README @@ -214,6 +214,29 @@ not include variables usually directly set during build): Rename detection options for git-diff and git-diff-tree. By default ('-M'); set it to ('-C') or ('-C', '-C') to also detect copies, or set it to () if you don't want to have renames detection. + * $cache + To activate caching, set this to an instance of a Cache::Cache + compatible cache. You'll typically use a Memcached instance here: + use Cache::Memcached; + $cache = Cache::Memcached->new( { servers => ['localhost:11211'], + compress_threshold => 1000 } ); + * $large_cache_root + A directory in which gitweb will store potentially large cache + items (like snapshots or blobs). Cache items in this directory are + not deleted, so you should clean it up yourself periodically. + Default: undef (no caching for large items). + * $page_info + Set to 1 to display cache statistics at the bottom of each page. + After setting up caching, you should set this to 1 temporarily and + make sure that after opening some blob page (a=blob) for the second + time, the statistics at the bottom of the page show cache hits (and + no misses) for both the main cache and the large cache. If there + are nonzero "failed sets" statistics, then for the "main cache" + they indicate that memcached is not working (check that it is + actually running), and for the "large cache" they indicate that + writing to the $large_cache_root directory didn't work (typically + permission problems). Once you have verified that caching works, + set $page_info back to zero. Projects list file format diff --git a/gitweb/gitweb.css b/gitweb/gitweb.css index aa0eeca247..849bb8e5c7 100644 --- a/gitweb/gitweb.css +++ b/gitweb/gitweb.css @@ -71,6 +71,22 @@ div.page_footer_text { font-style: italic; } +div.page_info { + color: #777777; + padding: 4px 8px; + margin-top: 0.5em; + background-color: #ecebe8; +} + +div.page_info p { + margin: 0px; + padding: 0px; +} + +pre.cache_list { + margin: 0.5em 2em; +} + div.page_body { padding: 8px; font-family: monospace; diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl index 90cd99bf91..410fe0db96 100755 --- a/gitweb/gitweb.perl +++ b/gitweb/gitweb.perl @@ -16,6 +16,9 @@ use Encode; use Fcntl ':mode'; use File::Find qw(); use File::Basename qw(basename); + +use Git::RepoRoot; + binmode STDOUT, ':utf8'; BEGIN { @@ -161,6 +164,47 @@ our %known_snapshot_format_aliases = ( 'x-zip' => undef, '' => undef, ); +# Cache::Cache object to cache data from the repository, or undef for +# no cache. You would typically use a Cache::Memcached instance here. +our $cache = undef; + +# Expiration time in seconds for transient cache entries, or undef for +# no expiration. (Only used if $cache is defined.) +# +# Transient cache entries (like get_sha1('HEAD')) are automatically +# invalidated when an mtime of either the repository's root directory +# or of the refs directory or any subdirectory changes. This +# mechanism *should* detect changes to the repository reliably if you +# only use git or rsync to write to it, and hence this expiration time +# can be set very high. (This does not default to non-expiring +# [undef] just in case a change goes undetected for some reason.) You +# might want to set this to a lower time (e.g. a few minutes) if +# developers change files in the refs directories on your server by +# non-standard means (i.e. manually). +# +# You can usually go with the default here. +our $cache_transient_expiration_time = 60 * 60 * 24; + +# Directory on disk to hold potentially large cache items (in +# particular, snapshots, diffs and blobs), or undef for no cache. +# Cache files will be created in this directory, but they will not be +# expired; you should periodically delete old files yourself. Setting +# $large_cache_root but not $cache is possible, but usually not +# sensible. +our $large_cache_root = undef; + +# Extra cache key component to use. This should stringify to a string +# without null bytes; it is used as a means to discard all cache +# entries whenever this key changes. Since gitweb already uses its +# own version number as a cache key component, you don't normally need +# this unless you're doing development work on gitweb (in which case +# you might set it to gitweb's mtime, (stat 'gitweb.cgi')[9]). +our $cache_key = undef; + +# Display information in the footer of each page (currently only cache +# statistics): 0 = none, 1 = short, 2 = long. +our $page_info = 0; + # You define site-wide feature defaults here; override them with # $GITWEB_CONFIG as necessary. our %feature = ( @@ -379,6 +423,363 @@ if (-e $GITWEB_CONFIG) { # version of the core git binary our $git_version = qx("$GIT" --version) =~ m/git version (.*)$/ ? $1 : "unknown"; + +# ====================================================================== +# caching layer + +package CachedRepo; + +use Digest::MD5 qw(md5_hex); +use List::Util qw(max); + +use base qw(Git::Repo); + +# Global statistics, collected across repositories. +# Hits, misses, sets, and failed_sets are counters, and get_list is an +# arrayref of keys, where a key is an arrayref of key items. +our %cache_statistics = ( + hits => 0, misses => 0, sets => 0, failed_sets => 0, get_list => []); +our %large_cache_statistics = ( + hits => 0, misses => 0, sets => 0, failed_sets => 0, get_list => []); + +# Options: like Git::Repo->new, and the following: +# cache: a Cache::Cache conforming cache instance +# transient_expiration_time: expiration time in seconds for transient +# cache entries (like get_hash('HEAD')), or undef; do not set to +# 30 days or more, since it makes Cache::Memcached hiccup +sub new { + my ($class, %opts) = @_; + my $cache = delete $opts{cache}; + my $large_cache_root = delete $opts{large_cache_root}; + my $transient_expiration_time = delete $opts{transient_expiration_time}; + my $self = $class->SUPER::new(%opts); + $self->{cache} = $cache; + $self->{large_cache_root} = $large_cache_root; + $self->{transient_expiration_time} = $transient_expiration_time; + return $self; +} + +sub cache { shift->{cache} } +sub large_cache_root { shift->{large_cache_root} } +sub transient_expiration_time { shift->{transient_expiration_time} } + +# Fast function to generate a unique (short, hashed) key for the cache +# to use. None of the parameters *should* contain null bytes. Example: +# $repo->get_key('get_sha1', 'HEAD:file1') eq '05dd723732a45fcac864787ec1897cc1' +sub get_key { + my $self = shift; + # Some caches (like Cache::FileCache) hash keys themselves, + # but Cache::Memcached does not like long keys, so we need to + # hash them. MD5 is fine here, since (as of July 2008) there + # are only collision attacks, but no practical preimage + # attacks on MD5. Constructing two colliding keys doesn't + # seem to pose much of a threat for the cache. Digest::SHA1 + # is only in core as of Perl 5.9, so we cannot use it here. + return md5_hex(join "\0", $self->_key_items(@_)); +} + +# Return a list of strings that can be used to generate a key. +sub _key_items { + my $self = shift; + return map { defined $_ ? " $_" : '' } + ('gitweb', $version, $cache_key, 'project', $self->repo_dir, @_); +} + +# Convenience function: cache_set(\@key, $value, $expire); +# $expire is boolean and indicates whether an expiry time should be set. +sub cache_set { + my ($self, $key, $value, $expire) = @_; + return unless $self->cache; + my $expiration_token = $expire ? $self->get_last_modification() : undef; + my $ok = $self->cache->set( + $self->get_key($expiration_token, @$key), $value, + $expire ? $self->transient_expiration_time : ()); + $ok ? $cache_statistics{sets}++ : $cache_statistics{failed_sets}++; + return $ok; +} + +# Convenience function: cache_get(\@key, $expire) +# $expire must be the same that has been used for cache_set or the +# lookup will fail. +sub cache_get { + my ($self, $key, $expire) = @_; + return unless $self->cache; + my $expiration_token = $expire ? $self->get_last_modification() : undef; + my $val = $self->cache->get($self->get_key($expiration_token, @$key)); + defined $val ? $cache_statistics{hits}++ : $cache_statistics{misses}++; + push @{$cache_statistics{get_list}}, + [$self->repo_dir, $expire ? 1 : 0, @$key]; + return $val; +} + +# Return the output of the given git command as a string. +# Valid options are: +# cmd: An arrayref of arguments to pass to git (mandatory). +# max_exit_code: Die if the exit code of the git binary is greater +# than this (default: 0). +# cache: If 1, the output is cached but expires when the repo is +# modified; if 2, it is cached indefinitely. +sub cmd_output { + my ($self, %opts) = @_; + my $key = ['cmd', $opts{max_exit_code}, @{$opts{cmd}}]; + my $output; + unless ($opts{cache} && defined($output = $self->cache_get( + $key, $opts{cache} == 1))) { + my @cmd = ($self->_git_cmd, @{$opts{cmd}}); + my $cmd = join ' ', @cmd; + open my $fh, '-|', @cmd or die "cannot open pipe: $cmd"; + local $/; + $output = <$fh>; + if (!close $fh) { + die "error closing pipe ($!): $cmd" if $!; + my $exit_code = $? >> 8; + die "Command failed with exit code $exit_code: $cmd" + if $exit_code > ($opts{max_exit_code} || 0); + } + $self->cache_set($key, $output, $opts{cache} == 1) if $opts{cache}; + } + return $output; +} + +# progressive_cmd_output(%opts) +# +# Return a coderef that returns a chunk of the output of the given +# command each time it is called, or undef when the output is +# exhausted. For the output to be cached, it must be read until the +# coderef returns undef, otherwise it will leave a stale temporary +# file in the cache. +# +# Options: +# cmd: an arrayref or string of arguments to git; if it's a string, it will be +# passed to the shell +# max_exit_code: die if the command exits with a higher exit code (default: 0) +# separator: like $/; if undef, read the output in chunks of arbitrary size +# cache: if true, cache the output of the command (without expiration) +# +# Example: +# my $diff_read = $repo->progressive_cmd_output( +# cmd => ['diff', $from_sha1, $to_sha1], separator => "\n", cache => 1]); +# while (my $line = $diff_read->()) { +# chomp $line; +# ... +# } +my $_file_seq = 0; +sub progressive_cmd_output { + die 'must pass an odd number of arguments' unless @_ % 2; + my ($self, %opts) = @_; + local $/ = defined $opts{separator} ? $opts{separator} : \32768; + my (@cmd, $cmd_str); + if (ref($opts{cmd}) eq 'ARRAY') { + @cmd = ($self->_git_cmd, @{$opts{cmd}}); + $cmd_str = join " ", @cmd; # only used for diagnostics + } else { + $cmd_str = main::quote_command($self->_git_cmd) . + " $opts{cmd}"; # this will be passed to the shell + } + # We read from $fh, whether it's a pipe or a cache file. If + # it's a pipe, we also progressively cache it to + # $tmp_cache_file, and at the end move $tmp_cache_file_name to + # $cache_file_name. This avoids having partially written + # cache entries. + my ($fh, $cache_file_name, $tmp_cache_file_name, $tmp_cache_file); + if ($opts{cache} && $self->large_cache_root) { + my @key_items = ('cmd', $opts{max_exit_code}, + ref($opts{cmd}) eq 'ARRAY' ? + (@{$opts{cmd}}) : (undef, $opts{cmd})); + push @{$large_cache_statistics{get_list}}, + [$self->repo_dir, @key_items]; + $cache_file_name = File::Spec->catfile( + $self->large_cache_root, $self->get_key(@key_items)); + $tmp_cache_file_name = File::Spec->catfile( + $self->large_cache_root, + join('.', $$, $_file_seq++, 'tmp')); + } + unless ($cache_file_name && open $fh, '<', $cache_file_name) { + # Not in cache -- open pipe, and open cache file to write to. + if (@cmd) { + open $fh, '-|', @cmd; + } else { + open $fh, '-|', $cmd_str; + } + die "cannot open pipe: $cmd_str" unless $fh; + if ($tmp_cache_file_name) { + open $tmp_cache_file, '>', $tmp_cache_file_name + or $tmp_cache_file = undef; + } + # Increment failed_sets; it will be decremented upon + # successful finalization of the cache entry. + $large_cache_statistics{failed_sets}++; + # Record uncached calls as misses. + $large_cache_statistics{misses}++; + } else { + $large_cache_statistics{hits}++; + } + my $read = sub { + return undef unless $fh; # already closed + my $output = <$fh>; + if ($output) { + # Write to cache and return. + if ($tmp_cache_file && ! print $tmp_cache_file $output) { + # Writing to cache failed; clean up + # and stop caching this pipe. + close $tmp_cache_file; + $tmp_cache_file = undef; + unlink $tmp_cache_file_name; + } + return $output; + } + # End of output; close and finalize cache. + if (close $fh) { + # We sometimes get mysterious "Bad file + # descriptor" errors here, but reading from + # the pipe worked fine, so let's not die. + #die "error closing pipe ($!): $cmd_str" if $!; + my $exit_code = $? >> 8; + die "Command died with exit code $exit_code: $cmd_str" + if $exit_code > ($opts{max_exit_code} || 0); + } + $fh = undef; + if ($tmp_cache_file && close $tmp_cache_file) { + # Cache file written OK, move it in place. + if (rename $tmp_cache_file_name, $cache_file_name) { + $large_cache_statistics{failed_sets}--; + $large_cache_statistics{sets}++; + } + } + return undef; + }; + # We can also provide a &close function here in case it + # becomes necessary to close pipes prematurely. + return $read; +} + +# The following methods override the base class (Git::Repo) methods to +# add caching. + +sub get_sha1 { + my ($self, $object_id) = @_; + my $expire = ($object_id !~ /^[0-9a-f]{40}(?![0-9a-f])/); + my $triple = $self->cache_get(['SHA1', $object_id], $expire); + unless (defined $triple) { + $triple = [$self->SUPER::get_sha1($object_id)]; + # Do not cache failed lookups -- missing SHA1s would + # be permanently cached, but a subsequent push to the + # repository might add those missing SHA1s to the + # repository. + return unless $triple->[0]; + $self->cache_set(['SHA1', $object_id], $triple, $expire); + } + return wantarray ? @$triple : $triple->[0]; +} + +sub get_object { + my ($self, $sha1) = @_; + my $type_content = $self->cache_get(['cat-file', $sha1], 0); + unless (defined $type_content) { + $type_content = [$self->SUPER::get_object($sha1)]; + die 'unexpected empty return value' unless @$type_content; + $self->cache_set(['cat-file', $sha1], $type_content, 0); + } + return wantarray ? @$type_content : $type_content->[1]; +} + +# get_commit and get_tag only return empty Commit and Tag objects, +# which when loaded (lazily), happen to call get_object and thus are +# cached as well. + +sub name_rev { + my ($self, $sha1, $tags_only) = @_; + my $name = $self->cache_get(['name-rev', $sha1, $tags_only], 1); + unless (defined $name) { + # || '' is to cache failed lookups (name_rev doesn't + # ever return empty names). + $name = $self->SUPER::name_rev($sha1, $tags_only) || ''; + $self->cache_set(['name-rev', $sha1, $tags_only], $name, 1); + } + return $name || undef; +} + +# Return the seconds since epoch when the repository was last touched. +sub get_last_modification { + my $self = shift; + return $self->{last_modification} if $self->{last_modification}; + # Hashref mapping absolute paths of directories to mtimes. We + # rely on the fact here that every time git updates a file, it + # creates it under a different name and then moves it in + # place, thus causing the mtime of the containing directory to + # be updated. Hence it's enough to just stat the directories. + my $mtimes = $self->cache_get(['mtimes'], 0); + if ($mtimes) { + CHECK_CACHE: { + # Check if the cache response is up to date. + while (my ($dir, $mtime) = each %$mtimes) { + last CHECK_CACHE if (stat $dir)[9] != $mtime; + } + $self->{last_modification} = max(values %$mtimes); + return max(values %$mtimes); + } + } + # Either mtimes are not in cache, or at least one directory + # has been updated. Traverse the whole ref tree and record + # all directory mtimes -- this is a bit slower than the + # up-to-date-ness check above since we end up stat'ing all + # files in the refs directory. + my $time = time; + $mtimes = { $self->repo_dir => (stat $self->repo_dir)[9] }; + my $cacheable = 1; + # Traverse refs/ directory and record the mtimes of all + # directories. Implementation note: This stats *all* files in + # the tree; this can become quite inefficient for repositories + # with many refs (say, >100) that are pushed to frequently. + # There are two possible solutions: + # 1. If we dropped the requirement of following symlinks, it + # would in theory be possible to exploit the nlinks==2 + # feature of many Unix file systems. However, File::Find + # doesn't allow us to only traverse directories. + # 2. We could traverse only those directories whose mtimes have + # changed since the last time we checked (which we got from + # the cache); this makes the traversal code slightly more + # complicated, but should solve the worst efficiency issues. + File::Find::find( { + wanted => sub { + my $time = time; # get time first + # No way to avoid stat'ing unconditionally + # with File::Find. + my @stat = stat($_); + if (Fcntl::S_ISDIR($stat[2])) { + # Record the directory's mtime. + $mtimes->{$File::Find::name} = $stat[9]; + # Mtimes have a 1-second granularity, + # so if the directory has *just* been + # modified, we might miss subsequent + # modifictions in the same second if + # we cached it. + $cacheable = 0 if $stat[9] >= $time; + } + }, + # The refs/ tree can contain symlinks -- e.g. as of + # Aug 08 repo.or.cz uses a 'forkee' symlink, which + # links to another repository's refs/ directory. + follow => 1, + follow_skip => 2, # ignore duplicates + }, File::Spec->catfile($self->repo_dir, 'refs')); + $self->cache_set(['mtimes'], $mtimes, 0) if $cacheable; + $self->{last_modification} = max(values %$mtimes); + return max(values %$mtimes); +} + +package main; + + +our $repo_root = Git::RepoRoot->new(root_dir => $projectroot, + git_binary => $GIT, + cache => $cache, + large_cache_root => $large_cache_root, + transient_expiration_time => + $cache_transient_expiration_time, + repo_class => 'CachedRepo' + ); + $projects_list ||= $projectroot; # ====================================================================== @@ -392,6 +793,7 @@ if (defined $action) { # parameters which are pathnames our $project = $cgi->param('p'); +our $repo = $repo_root->repo(repo_dir => $project) if $project; if (defined $project) { if (!validate_pathname($project) || !(-d "$projectroot/$project") || @@ -509,13 +911,14 @@ sub evaluate_path_info { undef $project; return; } + $repo = $repo_root->repo(repo_dir => $project); # do not change any parameters if an action is given using the query string return if $action; $path_info =~ s,^\Q$project\E/*,,; my ($refname, $pathname) = split(/:/, $path_info, 2); if (defined $pathname) { # we got "project.git/branch:filename" or "project.git/branch:dir/" - # we could use git_get_type(branch:pathname), but it needs $git_dir + # we could use git_get_type(branch:pathname) here $pathname =~ s,^/+,,; if (!$pathname || substr($pathname, -1) eq "/") { $action ||= "tree"; @@ -533,10 +936,6 @@ sub evaluate_path_info { } evaluate_path_info(); -# path to the current git repository -our $git_dir; -$git_dir = "$projectroot/$project" if $project; - # dispatch my %actions = ( "blame" => \&git_blame, @@ -597,8 +996,7 @@ sub href (%) { # default is to use -absolute url() i.e. $my_uri my $href = $params{-full} ? $my_url : $my_uri; - # XXX: Warning: If you touch this, check the search form for updating, - # too. + # If you touch this, check the search form for updating, too. my @mapping = ( project => "p", @@ -1309,10 +1707,12 @@ sub format_diff_cc_simplified { $result .= "
" . "diff --cc "; if (!is_deleted($diffinfo)) { - $result .= $cgi->a({-href => href(action=>"blob", - hash_base=>$hash, - hash=>$diffinfo->{'to_id'}, - file_name=>$diffinfo->{'to_file'}), + $result .= $cgi->a( + {-href => href(action=>"blob", + $hash && git_get_type($hash) eq 'commit' ? + (hash_base=>$hash) : (), + hash=>$diffinfo->{'to_id'}, + file_name=>$diffinfo->{'to_file'}), -class => "path"}, esc_path($diffinfo->{'to_file'})); } else { @@ -1332,8 +1732,6 @@ sub format_diff_line { my ($from, $to) = @_; my $diff_class = ""; - chomp $line; - if ($from && $to && ref($from->{'href'}) eq "ARRAY") { # combined diff my $prefix = substr($line, 0, scalar @{$from->{'href'}}); @@ -1495,11 +1893,6 @@ sub get_feed_info { ## ---------------------------------------------------------------------- ## git utility subroutines, invoking git commands -# returns path to the core git executable and the --git-dir parameter as list -sub git_cmd { - return $GIT, '--git-dir='.$git_dir; -} - # quote the given arguments for passing them to the shell # quote_command("command", "arg 1", "arg with ' and ! characters") # => "'command' 'arg 1' 'arg with '\'' and '\!' characters'" @@ -1509,33 +1902,56 @@ sub quote_command { map( { my $a = $_; $a =~ s/(['!])/'\\$1'/g; "'$a'" } @_ )); } -# get HEAD ref of given project as hash -sub git_get_head_hash { - my $project = shift; - my $o_git_dir = $git_dir; - my $retval = undef; - $git_dir = "$projectroot/$project"; - if (open my $fd, "-|", git_cmd(), "rev-parse", "--verify", "HEAD") { - my $head = <$fd>; - close $fd; - if (defined $head && $head =~ /^([0-9a-fA-F]{40})$/) { - $retval = $1; +# git_get_sha1_or_die ( EXTENDED_OBJECT_IDENTIFER [, TYPE] ) +# +# Look up the object referred to by C and +# return its SHA1 hash in scalar context or its ($hash, $type, $size) +# in list context. Return an error page to the browser if the object +# couldn't be found. +# +# If C is given, resolve tag and commit objects if necessary and +# die unless the object found has the right type. The $type return +# value is guaranteed to equal C. +sub git_get_sha1_or_die { + my ($object_id, $want_type) = @_; + # This method shouldn't be used for checking missing + # parameters, since it cannot generate proper error messages. + # Hence we die with 500. + die_error(500, 'No object given') unless $object_id; + my ($hash, $type, $size) = $repo->get_sha1($object_id); + unless ($hash) { + my $human_type = ucfirst($want_type || 'object'); + die_error(404, "$human_type not found: '$object_id'"); + } + if ($want_type && $want_type ne $type) { + if ($type eq 'tag') { + return git_get_sha1_or_die( + $repo->get_tag($hash)->object, $want_type); + } elsif ($type eq 'commit' && $want_type eq 'tree') { + return git_get_sha1_or_die( + $repo->get_commit($hash)->tree, $want_type); + } else { + # $object_id and $type can be off due to recursion, + # but fixing it complicates the code too much. + die_error(400, "Expected a $want_type object, but " . + "'$object_id' is a $type object"); } } - if (defined $o_git_dir) { - $git_dir = $o_git_dir; - } - return $retval; + return wantarray ? ($hash, $type, $size) : $hash; +} + +# get HEAD ref hash of current project or die if no HEAD ref was found +sub git_get_head_hash { + # Need to have the global $project variable defined. + die_error(400, 'no project given') unless $project; + my $sha1 = $repo->get_sha1('HEAD') + or die_error(500, "HEAD ref not found for project '$project'"); + return $sha1; } # get type of given object sub git_get_type { - my $hash = shift; - - open my $fd, "-|", git_cmd(), "cat-file", '-t', $hash or return; - my $type = <$fd>; - close $fd or return; - chomp $type; + my ($sha1, $type, $size) = $repo->get_sha1(shift); return $type; } @@ -1563,20 +1979,14 @@ sub git_parse_project_config { my $section_regexp = shift; my %config; - local $/ = "\0"; - - open my $fh, "-|", git_cmd(), "config", '-z', '-l', - or return; - - while (my $keyval = <$fh>) { - chomp $keyval; + return unless $repo; + for my $keyval (split "\0", $repo->cmd_output( + cmd => [qw(config -z -l)], cache => 1)) { my ($key, $value) = split(/\n/, $keyval, 2); hash_set_multi(\%config, $key, $value) if (!defined $section_regexp || $key =~ /^(?:$section_regexp)\./o); } - close $fh; - return %config; } @@ -1639,9 +2049,9 @@ sub git_get_project_config { # get config if (!defined $config_file || - $config_file ne "$git_dir/config") { + $config_file ne "$projectroot/$project/config") { %config = git_parse_project_config('gitweb'); - $config_file = "$git_dir/config"; + $config_file = "$projectroot/$project/config"; } # ensure given type @@ -1656,65 +2066,56 @@ sub git_get_project_config { return $config{"gitweb.$key"}; } -# get hash of given path at given ref -sub git_get_hash_by_path { - my $base = shift; - my $path = shift || return undef; - my $type = shift; - +# Return the SHA1 of the blob or tree at the path in the given commit, +# or return undef if it does not exist. +sub git_get_sha1_by_path { + my ($base, $path, $type) = @_; $path =~ s,/+$,,; - - open my $fd, "-|", git_cmd(), "ls-tree", $base, "--", $path - or die_error(500, "Open git-ls-tree failed"); - my $line = <$fd>; - close $fd or return undef; - - if (!defined $line) { - # there is no tree or hash given by $path at $base - return undef; - } - - #'100644 blob 0fa3f3a66fb6a137f6ec2c19351ed4d807070ffa panic.c' - $line =~ m/^([0-9]+) (.+) ([0-9a-fA-F]{40})\t/; - if (defined $type && $type ne $2) { - # type doesn't match - return undef; - } - return $3; + return $repo->get_sha1("$base:$path", $type); } -# get path of entry with given hash at given tree-ish (ref) -# used to get 'from' filename for combined diff (merge commit) for renames +# Get path of entry with given hash at given tree-ish (ref); used to +# get 'from' filename for combined diff (merge commit) for renames. +# Note that this does not resolve tag or commit objects in the $hash +# parameter, you must pass a tree or blob object. sub git_get_path_by_hash { my $base = shift || return; my $hash = shift || return; - local $/ = "\0"; - - open my $fd, "-|", git_cmd(), "ls-tree", '-r', '-t', '-z', $base - or return undef; - while (my $line = <$fd>) { - chomp $line; - - #'040000 tree 595596a6a9117ddba9fe379b6b012b558bac8423 gitweb' - #'100644 blob e02e90f0429be0d2a69b76571101f20b8f75530f gitweb/README' - if ($line =~ m/(?:[0-9]+) (?:.+) $hash\t(.+)$/) { - close $fd; + # This subroutine could be extracted into the Git::Tree API + # once it exists. + my $tree = git_get_sha1_or_die($base, 'tree'); + my ($file_sha1, $file_type) = $repo->get_sha1($hash); + die_error(404, "object not found: '$hash'") unless $file_sha1; + die_error(400, "'$hash' is a $file_type object, not a tree or blob object") + unless $file_type eq 'blob' || $file_type eq 'tree'; + + # The ls-tree output can be quite large, so use + # progressive_cmd_output. + my $ls_tree_read = $repo->progressive_cmd_output( + cmd => [qw(ls-tree -r -t), $tree], separator => "\n", + cache => 1); + while (my $line = $ls_tree_read->()) { + if ($line =~ /^[0-9]+ [a-z]+ $file_sha1\t(.+)$/) { + while ($ls_tree_read->()) { } # cache it + # TODO: needs unquoting return $1; } } - close $fd; return undef; } ## ...................................................................... ## git utility functions, directly accessing git repository +# The following subroutines locally change the global $project +# variable as a side-effect so that their calls to +# git_get_project_config work. + sub git_get_project_description { - my $path = shift; + local $project = shift; - $git_dir = "$projectroot/$path"; - open my $fd, "$git_dir/description" + open my $fd, "$projectroot/$project/description" or return git_get_project_config('description'); my $descr = <$fd>; close $fd; @@ -1725,10 +2126,9 @@ sub git_get_project_description { } sub git_get_project_url_list { - my $path = shift; + local $project = shift; - $git_dir = "$projectroot/$path"; - open my $fd, "$git_dir/cloneurl" + open my $fd, "$projectroot/$project/cloneurl" or return wantarray ? @{ config_to_multi(git_get_project_config('url')) } : config_to_multi(git_get_project_config('url')); @@ -1863,11 +2263,10 @@ sub git_get_project_list_from_file { } sub git_get_project_owner { - my $project = shift; + local $project = shift; my $owner; return undef unless $project; - $git_dir = "$projectroot/$project"; if (!defined $gitweb_project_owner) { git_get_project_list_from_file(); @@ -1880,44 +2279,46 @@ sub git_get_project_owner { $owner = git_get_project_config('owner'); } if (!defined $owner) { - $owner = get_file_owner("$git_dir"); + $owner = get_file_owner("$projectroot/$project"); } return $owner; } sub git_get_last_activity { - my ($path) = @_; - my $fd; + my $path = shift; - $git_dir = "$projectroot/$path"; - open($fd, "-|", git_cmd(), 'for-each-ref', - '--format=%(committer)', - '--sort=-committerdate', - '--count=1', - 'refs/heads') or return; - my $most_recent = <$fd>; - close $fd or return; - if (defined $most_recent && - $most_recent =~ / (\d+) [-+][01]\d\d\d$/) { - my $timestamp = $1; - my $age = time - $timestamp; - return ($age, age_string($age)); - } - return (undef, undef); + chomp(my $most_recent = $repo_root->repo(repo_dir => $path)->cmd_output( + cmd => [ qw(for-each-ref --count=1 --format=%(committer)), + qw(--sort=-committerdate refs/heads) ], + cache => 1, max_exit_code => 255)) or return; + $most_recent =~ / (\d+) [-+][01]\d\d\d$/ or return; + my $timestamp = $1; + my $age = time - $timestamp; + return ($age, age_string($age)); } +# Return a hashref from SHA1s to arrayrefs of ref names. Example: +# { '7e51...' => ['tags/tag-object'], # tag SHA1 +# '51ba...' => ['tags/tag-object'], # referenced commit SHA1 +# '3c4a...' => ['heads/master', 'tags/another-tag'] } sub git_get_references { my $type = shift || ""; my %refs; - # 5dc01c595e6c6ec9ccda4f6f69c131c0dd945f8c refs/tags/v2.6.11 - # c39ae07f393806ccf406ef966e9a15afc43cc36a refs/tags/v2.6.11^{} - open my $fd, "-|", git_cmd(), "show-ref", "--dereference", - ($type ? ("--", "refs/$type") : ()) # use -- if $type - or return; - - while (my $line = <$fd>) { - chomp $line; + # This is not implementable in terms of a simple for-each-refs + # call (e.g. extractable to Git::Repo->get_refs) because it + # doesn't dereference, and we cannot dereference a lot of + # SHA1s ourselves as long as there is no implementation that + # uses Cache::Memcached->get_multi. Hence, we use + # cmd_output. + my @lines = split "\n", $repo->cmd_output( + cmd => ['show-ref', '--dereference', + ($type ? ("--", "refs/$type") : ())], + max_exit_code => 1, # exits with status 1 on empty repos + cache => 1); + for my $line (@lines) { + # 5dc01c595e6c6ec9ccda4f6f69c131c0dd945f8c refs/tags/v2.6.11 + # c39ae07f393806ccf406ef966e9a15afc43cc36a refs/tags/v2.6.11^{} if ($line =~ m!^([0-9a-fA-F]{40})\srefs/($type/?[^^]+)!) { if (defined $refs{$1}) { push @{$refs{$1}}, $2; @@ -1926,24 +2327,15 @@ sub git_get_references { } } } - close $fd or return; return \%refs; } sub git_get_rev_name_tags { - my $hash = shift || return undef; - - open my $fd, "-|", git_cmd(), "name-rev", "--tags", $hash - or return; - my $name_rev = <$fd>; - close $fd; + my $hash = shift or die_error(); - if ($name_rev =~ m|^$hash tags/(.*)$|) { - return $1; - } else { - # catches also '$hash undefined' output - return undef; - } + my $name = $repo->name_rev($hash); + $name =~ s!^tags/!! if $name; + return $name; } ## ---------------------------------------------------------------------- @@ -1982,14 +2374,12 @@ sub parse_date { } sub parse_tag { - my $tag_id = shift; + my $sha1 = shift; my %tag; my @comment; - open my $fd, "-|", git_cmd(), "cat-file", "tag", $tag_id or return; - $tag{'id'} = $tag_id; - while (my $line = <$fd>) { - chomp $line; + my ($raw_header, $raw_comment) = split "\n\n", $repo->get_object($sha1), 2; + for my $line (split "\n", $raw_header) { if ($line =~ m/^object ([0-9a-fA-F]{40})$/) { $tag{'object'} = $1; } elsif ($line =~ m/^type (.+)$/) { @@ -2000,19 +2390,9 @@ sub parse_tag { $tag{'author'} = $1; $tag{'epoch'} = $2; $tag{'tz'} = $3; - } elsif ($line =~ m/--BEGIN/) { - push @comment, $line; - last; - } elsif ($line eq "") { - last; } } - push @comment, <$fd>; - $tag{'comment'} = \@comment; - close $fd or return; - if (!defined $tag{'name'}) { - return - }; + $tag{'comment'} = [split "\n", $raw_comment]; return %tag } @@ -2021,8 +2401,6 @@ sub parse_commit_text { my @commit_lines = split '\n', $commit_text; my %co; - pop @commit_lines; # Remove '\0' - if (! @commit_lines) { return; } @@ -2117,48 +2495,38 @@ sub parse_commit_text { sub parse_commit { my ($commit_id) = @_; - my %co; + # This currently supports arbitrary object names, so we + # resolve it here. + my $sha1 = git_get_sha1_or_die($commit_id, 'commit'); - local $/ = "\0"; - - open my $fd, "-|", git_cmd(), "rev-list", - "--parents", - "--header", - "--max-count=1", - $commit_id, - "--", - or die_error(500, "Open git-rev-list failed"); - %co = parse_commit_text(<$fd>, 1); - close $fd; - - return %co; + # This formats commits slightly differently than the raw + # cat-file, so we cannot use get_object here. This function + # should be replaced by $repo->get_commit anyway. + my $commit_text = $repo->cmd_output( + cmd => [qw(rev-list --parents --header --max-count=1), $sha1, '--'], + cache => 2); + $commit_text =~ s/\x00$//; + return parse_commit_text($commit_text, 1); } sub parse_commits { my ($commit_id, $maxcount, $skip, $filename, @args) = @_; - my @cos; + # This currently supports arbitrary object names, so we + # resolve it here. + my $sha1 = git_get_sha1_or_die($commit_id); + git_get_sha1_or_die("$sha1:$filename") if $filename; # check existence $maxcount ||= 1; $skip ||= 0; - local $/ = "\0"; - - open my $fd, "-|", git_cmd(), "rev-list", - "--header", - @args, - ("--max-count=" . $maxcount), - ("--skip=" . $skip), - @extra_options, - $commit_id, - "--", - ($filename ? ($filename) : ()) - or die_error(500, "Open git-rev-list failed"); - while (my $line = <$fd>) { - my %co = parse_commit_text($line); - push @cos, \%co; - } - close $fd; - + # TODO: Integrate this into Git::Repo, and get rid of @args + # and @extra_options. + my @raw_commits = split "\0", $repo->cmd_output( + cmd => ['rev-list', '--header', @args, "--max-count=$maxcount", + "--skip=$skip", @extra_options, $sha1, "--", + $filename || ()], + cache => 2); + my @cos = map { { parse_commit_text($_) } } @raw_commits; return wantarray ? @cos : \@cos; } @@ -2263,7 +2631,9 @@ sub parse_from_to_diffinfo { # ordinary (not combined) diff $from->{'file'} = $diffinfo->{'from_file'}; if ($diffinfo->{'status'} ne "A") { # not new (added) file - $from->{'href'} = href(action=>"blob", hash_base=>$hash_parent, + $from->{'href'} = href(action=>"blob", + $hash_parent && git_get_type($hash_parent) eq 'commit' ? + (hash_base=>$hash_parent) : (), hash=>$diffinfo->{'from_id'}, file_name=>$from->{'file'}); } else { @@ -2273,7 +2643,9 @@ sub parse_from_to_diffinfo { $to->{'file'} = $diffinfo->{'to_file'}; if (!is_deleted($diffinfo)) { # file exists in result - $to->{'href'} = href(action=>"blob", hash_base=>$hash, + $to->{'href'} = href(action=>"blob", + $hash && git_get_type($hash) eq 'commit' ? + (hash_base=>$hash) : (), hash=>$diffinfo->{'to_id'}, file_name=>$to->{'file'}); } else { @@ -2288,15 +2660,15 @@ sub git_get_heads_list { my $limit = shift; my @headslist; - open my $fd, '-|', git_cmd(), 'for-each-ref', - ($limit ? '--count='.($limit+1) : ()), '--sort=-committerdate', - '--format=%(objectname) %(refname) %(subject)%00%(committer)', - 'refs/heads' - or return; - while (my $line = <$fd>) { + my @lines = split "\n", $repo->cmd_output( + cmd => ['for-each-ref', '--sort=-committerdate', + $limit ? '--count='.($limit+1) : (), + '--format=%(objectname) %(refname) %(subject)%00%(committer)', + 'refs/heads'], + cache => 1); + for my $line (@lines) { my %ref_item; - chomp $line; my ($refinfo, $committerinfo) = split(/\0/, $line); my ($hash, $name, $title) = split(' ', $refinfo, 3); my ($committer, $epoch, $tz) = @@ -2316,7 +2688,6 @@ sub git_get_heads_list { push @headslist, \%ref_item; } - close $fd; return wantarray ? @headslist : \@headslist; } @@ -2325,16 +2696,16 @@ sub git_get_tags_list { my $limit = shift; my @tagslist; - open my $fd, '-|', git_cmd(), 'for-each-ref', - ($limit ? '--count='.($limit+1) : ()), '--sort=-creatordate', - '--format=%(objectname) %(objecttype) %(refname) '. - '%(*objectname) %(*objecttype) %(subject)%00%(creator)', - 'refs/tags' - or return; - while (my $line = <$fd>) { + my @lines = split "\n", $repo->cmd_output( + cmd => ['for-each-ref', '--sort=-creatordate', + ($limit ? '--count='.($limit+1) : ()), + '--format=%(objectname) %(objecttype) %(refname) '. + '%(*objectname) %(*objecttype) %(subject)%00%(creator)', + 'refs/tags'], + cache => 1); + for my $line (@lines) { my %ref_item; - chomp $line; my ($refinfo, $creatorinfo) = split(/\0/, $line); my ($id, $type, $name, $refid, $reftype, $title) = split(' ', $refinfo, 6); my ($creator, $epoch, $tz) = @@ -2365,7 +2736,6 @@ sub git_get_tags_list { push @tagslist, \%ref_item; } - close $fd; return wantarray ? @tagslist : \@tagslist; } @@ -2429,8 +2799,17 @@ sub mimetype_guess { return $mime; } +# Replacement for (heuristic) -T operator. (perldoc -f -T) +sub is_ascii_text { + my $text = shift; + return 1 if length($text) == 0; + return ((grep { ord($_) > 127 } split('', $text)) / length $text) <= 0.3; +} + +# Determine the MIME type of a blob based on its file name ($filename) +# and its first n bytes ($snippet). sub blob_mimetype { - my $fd = shift; + my $snippet = shift; my $filename = shift; if ($filename) { @@ -2438,10 +2817,7 @@ sub blob_mimetype { $mime and return $mime; } - # just in case - return $default_blob_plain_mimetype unless $fd; - - if (-T $fd) { + if (is_ascii_text($snippet)) { return 'text/plain'; } elsif (! $filename) { return 'application/octet-stream'; @@ -2457,9 +2833,9 @@ sub blob_mimetype { } sub blob_contenttype { - my ($fd, $file_name, $type) = @_; + my ($snippet, $file_name, $type) = @_; - $type ||= blob_mimetype($fd, $file_name); + $type ||= blob_mimetype($snippet, $file_name); if ($type eq 'text/plain' && defined $default_text_plain_charset) { $type .= "; charset=$default_text_plain_charset"; } @@ -2472,7 +2848,7 @@ sub blob_contenttype { sub git_header_html { my $status = shift || "200 OK"; - my $expires = shift; + my $expires = shift || 'now'; my $title = "$site_name"; if (defined $project) { @@ -2573,7 +2949,7 @@ EOF print "\n" . "\n"; - if (-f $site_header) { + if ($site_header && -f $site_header) { open (my $fd, $site_header); print <$fd>; close $fd; @@ -2593,8 +2969,8 @@ EOF } print "
\n"; - my ($have_search) = gitweb_check_feature('search'); - if (defined $project && $have_search) { + if (defined $project && gitweb_check_feature('search') && + $repo->get_sha1('HEAD')) { if (!defined $searchtext) { $searchtext = ""; } @@ -2662,6 +3038,39 @@ sub git_footer_html { } print "\n"; # class="page_footer" + if ($page_info) { + print "
\n"; + my $print_stats = sub { + my ($name, $cache_exists) = (shift, shift); + my %s = @_; # statistics hash + if ($cache_exists) { + my $gets = $s{hits} + $s{misses}; + print "

" . ucfirst($name) . ": " . + "$gets gets " . + "($s{hits} hits + ". + "$s{misses} misses); " . + "$s{sets} sets, " . + "$s{failed_sets} failed sets.

\n"; + if ($page_info == 2 && @{$s{get_list}}) { + print "
";
+					print join("\n",
+						   map(esc_html(join ',',
+								map(defined $_ ? " $_" : '',
+								    @$_)),
+						       @{$s{get_list}}));
+					print "
\n"; + } + } else { + print "

No $name.

\n"; + } + }; + $print_stats->('main cache', $cache, + %CachedRepo::cache_statistics); + $print_stats->('large cache', $large_cache_root, + %CachedRepo::large_cache_statistics); + print "
\n"; # class="page_info" + } + if (-f $site_footer) { open (my $fd, $site_footer); print <$fd>; @@ -2710,6 +3119,8 @@ sub git_print_page_nav { my ($current, $suppress, $head, $treehead, $treebase, $extra) = @_; $extra = '' if !defined $extra; # pager or formats + return unless $repo->get_sha1('HEAD'); # no navigation for empty repos + my @navs = qw(summary shortlog log commit commitdiff tree); if ($suppress) { @navs = grep { $_ ne $suppress } @navs; @@ -2891,24 +3302,6 @@ sub git_print_log ($;%) { } } -# return link target (what link points to) -sub git_get_link_target { - my $hash = shift; - my $link_target; - - # read link - open my $fd, "-|", git_cmd(), "cat-file", "blob", $hash - or return; - { - local $/; - $link_target = <$fd>; - } - close $fd - or return; - - return $link_target; -} - # given link target, and the directory (basedir) the link is in, # return target of link relative to top directory (top tree); # return undef if it is not possible (including absolute links). @@ -2970,7 +3363,7 @@ sub git_print_tree_entry { file_name=>"$basedir$t->{'name'}", %base_key), -class => "list"}, esc_path($t->{'name'})); if (S_ISLNK(oct $t->{'mode'})) { - my $link_target = git_get_link_target($t->{'hash'}); + my $link_target = $repo->get_object($t->{'hash'}); if ($link_target) { my $norm_target = normalize_link_target($link_target, $basedir, $hash_base); if (defined $norm_target) { @@ -3383,7 +3776,7 @@ sub git_difftree_body { } sub git_patchset_body { - my ($fd, $difftree, $hash, @hash_parents) = @_; + my ($read, $difftree, $hash, @hash_parents) = @_; my ($hash_parent) = $hash_parents[0]; my $is_combined = (@hash_parents > 1); @@ -3397,7 +3790,7 @@ sub git_patchset_body { print "
\n"; # skip to first patch - while ($patch_line = <$fd>) { + while ($patch_line = $read->()) { chomp $patch_line; last if ($patch_line =~ m/^diff /); @@ -3465,7 +3858,7 @@ sub git_patchset_body { # print extended diff header print "
\n"; EXTENDED_HEADER: - while ($patch_line = <$fd>) { + while ($patch_line = $read->()) { chomp $patch_line; last EXTENDED_HEADER if ($patch_line =~ m/^--- |^diff /); @@ -3484,7 +3877,7 @@ sub git_patchset_body { #assert($patch_line =~ m/^---/) if DEBUG; my $last_patch_line = $patch_line; - $patch_line = <$fd>; + $patch_line = $read->(); chomp $patch_line; #assert($patch_line =~ m/^\+\+\+/) if DEBUG; @@ -3494,7 +3887,7 @@ sub git_patchset_body { # the patch itself LINE: - while ($patch_line = <$fd>) { + while ($patch_line = $read->()) { chomp $patch_line; next PATCH if ($patch_line =~ m/^diff /); @@ -3545,9 +3938,6 @@ sub fill_project_list_info { PROJECT: foreach my $pr (@$projlist) { my (@activity) = git_get_last_activity($pr->{'path'}); - unless (@activity) { - next PROJECT; - } ($pr->{'age'}, $pr->{'age_string'}) = @activity; if (!defined $pr->{'descr'}) { my $descr = git_get_project_description($pr->{'path'}) || ""; @@ -3585,7 +3975,9 @@ sub print_sort_th { if ($str_sort) { @$list = sort {$a->{$key} cmp $b->{$key}} @$list; } else { - @$list = sort {$a->{$key} <=> $b->{$key}} @$list; + # Sort undefined keys last. + @$list = sort { (defined $a->{$key} ? $a->{$key} : 1e30) <=> + (defined $b->{$key} ? $b->{$key} : 1e30)} @$list; } print "$header\n"; } else { @@ -3653,16 +4045,20 @@ sub git_project_list_body { "" . $cgi->a({-href => href(project=>$pr->{'path'}, action=>"summary"), -class => "list", -title => $pr->{'descr_long'}}, esc_html($pr->{'descr'})) . "\n" . - "" . chop_and_escape_str($pr->{'owner'}, 15) . "\n"; - print "{'age'}) . "\">" . + "" . chop_and_escape_str($pr->{'owner'}, 15) . "\n" . + "{'age'}) . "\">" . (defined $pr->{'age_string'} ? $pr->{'age_string'} : "No commits") . "\n" . "" . - $cgi->a({-href => href(project=>$pr->{'path'}, action=>"summary")}, "summary") . " | " . - $cgi->a({-href => href(project=>$pr->{'path'}, action=>"shortlog")}, "shortlog") . " | " . - $cgi->a({-href => href(project=>$pr->{'path'}, action=>"log")}, "log") . " | " . - $cgi->a({-href => href(project=>$pr->{'path'}, action=>"tree")}, "tree") . - ($pr->{'forks'} ? " | " . $cgi->a({-href => href(project=>$pr->{'path'}, action=>"forks")}, "forks") : '') . - "\n" . + $cgi->a({-href => href(project=>$pr->{'path'}, action=>"summary")}, "summary"); + if ($pr->{'age_string'}) { + # Non-empty repository. + print " | " . + $cgi->a({-href => href(project=>$pr->{'path'}, action=>"shortlog")}, "shortlog") . " | " . + $cgi->a({-href => href(project=>$pr->{'path'}, action=>"log")}, "log") . " | " . + $cgi->a({-href => href(project=>$pr->{'path'}, action=>"tree")}, "tree"); + } + print " | " . $cgi->a({-href => href(project=>$pr->{'path'}, action=>"forks")}, "forks") if $pr->{'forks'}; + print "\n" . "\n"; } if (defined $extra) { @@ -3760,8 +4156,8 @@ sub git_history_body { $cgi->a({-href => href(action=>"commitdiff", hash=>$commit)}, "commitdiff"); if ($ftype eq 'blob') { - my $blob_current = git_get_hash_by_path($hash_base, $file_name); - my $blob_parent = git_get_hash_by_path($commit, $file_name); + my $blob_current = git_get_sha1_by_path($hash_base, $file_name); + my $blob_parent = git_get_sha1_by_path($commit, $file_name); if (defined $blob_current && defined $blob_parent && $blob_current ne $blob_parent) { print " | " . @@ -4016,9 +4412,9 @@ sub git_project_index { sub git_summary { my $descr = git_get_project_description($project) || "none"; - my %co = parse_commit("HEAD"); - my %cd = %co ? parse_date($co{'committer_epoch'}, $co{'committer_tz'}) : (); - my $head = $co{'id'}; + my $head = $repo->get_sha1('HEAD', 'commit'); + my %co = parse_commit($head) if $head; + my %cd = parse_date($co{'committer_epoch'}, $co{'committer_tz'}) if $head; my $owner = git_get_project_owner($project); @@ -4037,7 +4433,7 @@ sub git_summary { git_header_html(); git_print_page_nav('summary','', $head); - print "
 
\n"; + print "
 
\n" if $head; print "\n" . "\n" . "\n"; @@ -4103,14 +4499,12 @@ sub git_summary { } sub git_tag { - my $head = git_get_head_hash($project); + my $head = git_get_head_hash(); + my $sha1 = git_get_sha1_or_die($hash, 'tag'); git_header_html(); git_print_page_nav('','', $head,undef,$head); - my %tag = parse_tag($hash); - - if (! %tag) { - die_error(404, "Unknown tag object"); - } + # TODO: This wants to become $repo->get_tag. + my %tag = parse_tag($sha1); git_print_header_div('commit', esc_html($tag{'name'}), $hash); print "
\n" . @@ -4134,7 +4528,6 @@ sub git_tag { print "
"; my $comment = $tag{'comment'}; foreach my $line (@$comment) { - chomp $line; print esc_html($line, -nbsp=>1) . "
\n"; } print "
\n"; @@ -4145,25 +4538,19 @@ sub git_blame { my $fd; my $ftype; - gitweb_check_feature('blame') - or die_error(403, "Blame view not allowed"); + die_error(403, "Blame view not allowed") + unless gitweb_check_feature('blame'); die_error(400, "No file name given") unless $file_name; - $hash_base ||= git_get_head_hash($project); - die_error(404, "Couldn't find base commit") unless ($hash_base); - my %co = parse_commit($hash_base) - or die_error(404, "Commit not found"); - if (!defined $hash) { - $hash = git_get_hash_by_path($hash_base, $file_name, "blob") - or die_error(404, "Error looking up file"); - } - $ftype = git_get_type($hash); - if ($ftype !~ "blob") { - die_error(400, "Object is not a blob"); - } - open ($fd, "-|", git_cmd(), "blame", '-p', '--', - $file_name, $hash_base) - or die_error(500, "Open git-blame failed"); + $hash_base ||= git_get_head_hash(); + my $hash_base_sha1 = git_get_sha1_or_die($hash_base, 'commit'); + my %co = parse_commit($hash_base); + $hash ||= git_get_sha1_by_path($hash_base, $file_name, 'blob') + or die_error(404, "Error looking up file: '$file_name'"); + git_get_sha1_or_die($hash, 'blob'); # check existence + my @blame_lines = split "\n", $repo->cmd_output( + cmd => ['blame', '-p', '--', $file_name, $hash_base_sha1], + cache => 2); git_header_html(); my $formats_nav = $cgi->a({-href => href(action=>"blob", -replay=>1)}, @@ -4188,7 +4575,7 @@ sub git_blame { HTML my %metainfo = (); while (1) { - $_ = <$fd>; + $_ = shift @blame_lines; last unless defined $_; my ($full_rev, $orig_lineno, $lineno, $group_size) = /^([0-9a-f]{40}) (\d+) (\d+)(?: (\d+))?$/; @@ -4196,14 +4583,13 @@ HTML $metainfo{$full_rev} = {}; } my $meta = $metainfo{$full_rev}; - while (<$fd>) { + while ($_ = shift @blame_lines) { last if (s/^\t//); if (/^(\S+) (.*)$/) { $meta->{$1} = $2; } } my $data = $_; - chomp $data; my $rev = substr($full_rev, 0, 8); my $author = $meta->{'author'}; my %date = parse_date($meta->{'author-time'}, @@ -4224,11 +4610,9 @@ HTML esc_html($rev)); print "\n"; } - open (my $dd, "-|", git_cmd(), "rev-parse", "$full_rev^") - or die_error(500, "Open git-rev-parse failed"); - my $parent_commit = <$dd>; - close $dd; - chomp($parent_commit); + # TODO: $parent_commit can be undef, in which case the + # link becomes invalid. + my $parent_commit = $repo->get_sha1("$full_rev^"); my $blamed = href(action => 'blame', file_name => $meta->{'filename'}, hash_base => $parent_commit); @@ -4243,13 +4627,11 @@ HTML } print "
description" . esc_html($descr) . "
owner" . esc_html($owner) . "
\n"; print "
"; - close $fd - or print "Reading blob failed\n"; git_footer_html(); } sub git_tags { - my $head = git_get_head_hash($project); + my $head = git_get_head_hash(); git_header_html(); git_print_page_nav('','', $head,undef,$head); git_print_header_div('summary', $project); @@ -4262,7 +4644,7 @@ sub git_tags { } sub git_heads { - my $head = git_get_head_hash($project); + my $head = git_get_head_hash(); git_header_html(); git_print_page_nav('','', $head,undef,$head); git_print_header_div('summary', $project); @@ -4280,9 +4662,9 @@ sub git_blob_plain { if (!defined $hash) { if (defined $file_name) { - my $base = $hash_base || git_get_head_hash($project); - $hash = git_get_hash_by_path($base, $file_name, "blob") - or die_error(404, "Cannot find file"); + my $base = $hash_base || git_get_head_hash(); + $hash = git_get_sha1_by_path($base, $file_name, 'blob') + or die_error(404, "Cannot find file: '$file_name'"); } else { die_error(400, "No file name defined"); } @@ -4291,11 +4673,15 @@ sub git_blob_plain { $expires = "+1d"; } - open my $fd, "-|", git_cmd(), "cat-file", "blob", $hash - or die_error(500, "Open git-cat-file blob '$hash' failed"); + my $sha1 = git_get_sha1_or_die($hash, 'blob'); + my $blob_read = $repo->progressive_cmd_output( + cmd => ['cat-file', 'blob', $sha1], separator => undef, + cache => 1); + my $first_chunk = $blob_read->() || ''; # content-type (can include charset) - $type = blob_contenttype($fd, $file_name, $type); + $type = blob_contenttype(substr($first_chunk, 0, 1024), + $file_name, $type); # "save as" filename, even when no $file_name is given my $save_as = "$hash"; @@ -4309,12 +4695,15 @@ sub git_blob_plain { -type => $type, -expires => $expires, -content_disposition => 'inline; filename="' . $save_as . '"'); - undef $/; - binmode STDOUT, ':raw'; - print <$fd>; - binmode STDOUT, ':utf8'; # as set at the beginning of gitweb.cgi - $/ = "\n"; - close $fd; + { + local $/; + binmode STDOUT, ':raw'; + print $first_chunk; + while (my $chunk = $blob_read->()) { + print $chunk; + } + binmode STDOUT, ':utf8'; # as set at the beginning of gitweb.cgi + } } sub git_blob { @@ -4322,9 +4711,9 @@ sub git_blob { if (!defined $hash) { if (defined $file_name) { - my $base = $hash_base || git_get_head_hash($project); - $hash = git_get_hash_by_path($base, $file_name, "blob") - or die_error(404, "Cannot find file"); + my $base = $hash_base || git_get_head_hash(); + $hash = git_get_sha1_by_path($base, $file_name, 'blob') + or die_error(404, "Cannot find file: '$file_name'"); } else { die_error(400, "No file name defined"); } @@ -4334,13 +4723,20 @@ sub git_blob { } my ($have_blame) = gitweb_check_feature('blame'); - open my $fd, "-|", git_cmd(), "cat-file", "blob", $hash - or die_error(500, "Couldn't cat $file_name, $hash"); - my $mimetype = blob_mimetype($fd, $file_name); - if ($mimetype !~ m!^(?:text/|image/(?:gif|png|jpeg)$)! && -B $fd) { - close $fd; - return git_blob_plain($mimetype); - } + my $sha1 = git_get_sha1_or_die($hash, 'blob'); + my $blob_read = $repo->progressive_cmd_output( + cmd => ['cat-file', 'blob', $sha1], separator => "\n", + cache => 1); + my @first_lines; + for my $i (0..20) { + my $line = $blob_read->() or last; + push @first_lines, $line; + } + my $test_snippet = join("\n", @first_lines); + my $mimetype = blob_mimetype($test_snippet, $file_name); + return git_blob_plain($mimetype) + if ($mimetype !~ m!^(?:text/|image/(?:gif|png|jpeg)$)! && + ! is_ascii_text($test_snippet)); # we can have blame only for text/* mimetype $have_blame &&= ($mimetype =~ m!^text/!); @@ -4389,7 +4785,7 @@ sub git_blob { qq!" />\n!; } else { my $nr; - while (my $line = <$fd>) { + while (my $line = shift @first_lines || $blob_read->()) { chomp $line; $nr++; $line = untabify($line); @@ -4397,29 +4793,23 @@ sub git_blob { $nr, $nr, $nr, esc_html($line, -nbsp=>1); } } - close $fd - or print "Reading blob failed.\n"; print "
"; git_footer_html(); } sub git_tree { - if (!defined $hash_base) { - $hash_base = "HEAD"; - } + $hash_base ||= "HEAD"; if (!defined $hash) { if (defined $file_name) { - $hash = git_get_hash_by_path($hash_base, $file_name, "tree"); + $hash = git_get_sha1_by_path($hash_base, $file_name, 'tree') + or die_error(404, "Cannot find file: '$file_name'"); } else { $hash = $hash_base; } } - $/ = "\0"; - open my $fd, "-|", git_cmd(), "ls-tree", '-z', $hash - or die_error(500, "Open git-ls-tree failed"); - my @entries = map { chomp; $_ } <$fd>; - close $fd or die_error(404, "Reading tree failed"); - $/ = "\n"; + my $sha1 = git_get_sha1_or_die($hash, 'tree'); + my @entries = split "\0", $repo->cmd_output( + cmd => ['ls-tree', '-z', $sha1], cache => 2); my $refs = git_get_references(); my $ref = format_ref_marker($refs, $hash_base); @@ -4520,9 +4910,8 @@ sub git_snapshot { die_error(403, "Unsupported snapshot format"); } - if (!defined $hash) { - $hash = git_get_head_hash($project); - } + $hash ||= git_get_head_hash(); + my $sha1 = git_get_sha1_or_die($hash); my $name = $project; $name =~ s,([^/])/*\.git$,$1,; @@ -4532,9 +4921,9 @@ sub git_snapshot { my $cmd; $filename .= "-$hash$known_snapshot_formats{$format}{'suffix'}"; $cmd = quote_command( - git_cmd(), 'archive', + 'archive', "--format=$known_snapshot_formats{$format}{'format'}", - "--prefix=$name/", $hash); + "--prefix=$name/", $sha1); if (exists $known_snapshot_formats{$format}{'compressor'}) { $cmd .= ' | ' . quote_command(@{$known_snapshot_formats{$format}{'compressor'}}); } @@ -4544,16 +4933,17 @@ sub git_snapshot { -content_disposition => 'inline; filename="' . "$filename" . '"', -status => '200 OK'); - open my $fd, "-|", $cmd - or die_error(500, "Execute git-archive failed"); + my $snapshot_read = $repo->progressive_cmd_output( + cmd => $cmd, separator => undef, cache => 1); binmode STDOUT, ':raw'; - print <$fd>; + while (my $chunk = $snapshot_read->()) { + print $chunk; + } binmode STDOUT, ':utf8'; # as set at the beginning of gitweb.cgi - close $fd; } sub git_log { - my $head = git_get_head_hash($project); + my $head = git_get_head_hash(); if (!defined $hash) { $hash = $head; } @@ -4613,8 +5003,8 @@ sub git_log { sub git_commit { $hash ||= $hash_base || "HEAD"; - my %co = parse_commit($hash) - or die_error(404, "Unknown commit object"); + my $sha1 = git_get_sha1_or_die($hash, 'commit'); + my %co = parse_commit($hash); my %ad = parse_date($co{'author_epoch'}, $co{'author_tz'}); my %cd = parse_date($co{'committer_epoch'}, $co{'committer_tz'}); @@ -4649,14 +5039,10 @@ sub git_commit { if (!defined $parent) { $parent = "--root"; } - my @difftree; - open my $fd, "-|", git_cmd(), "diff-tree", '-r', "--no-commit-id", - @diff_opts, - (@$parents <= 1 ? $parent : '-c'), - $hash, "--" - or die_error(500, "Open git-diff-tree failed"); - @difftree = map { chomp; $_ } <$fd>; - close $fd or die_error(404, "Reading git-diff-tree failed"); + my @difftree = split "\n", $repo->cmd_output( + cmd => ['diff-tree', '-r', '--no-commit-id', @diff_opts, + (@$parents <= 1 ? $parent : '-c'), $sha1, '--'], + cache => 2); # non-textual hash id's can be cached my $expires; @@ -4743,41 +5129,16 @@ sub git_object { # - hash_base and file_name my $type; - # - hash or hash_base alone if ($hash || ($hash_base && !defined $file_name)) { - my $object_id = $hash || $hash_base; - - open my $fd, "-|", quote_command( - git_cmd(), 'cat-file', '-t', $object_id) . ' 2> /dev/null' - or die_error(404, "Object does not exist"); - $type = <$fd>; - chomp $type; - close $fd - or die_error(404, "Object does not exist"); - - # - hash_base and file_name + # hash or hash_base alone + $type = (git_get_sha1_or_die($hash || $hash_base))[1]; } elsif ($hash_base && defined $file_name) { + # hash_base and file_name $file_name =~ s,/+$,,; - - system(git_cmd(), "cat-file", '-e', $hash_base) == 0 - or die_error(404, "Base object does not exist"); - - # here errors should not hapen - open my $fd, "-|", git_cmd(), "ls-tree", $hash_base, "--", $file_name - or die_error(500, "Open git-ls-tree failed"); - my $line = <$fd>; - close $fd; - - #'100644 blob 0fa3f3a66fb6a137f6ec2c19351ed4d807070ffa panic.c' - unless ($line && $line =~ m/^([0-9]+) (.+) ([0-9a-fA-F]{40})\t/) { - die_error(404, "File or directory for given base does not exist"); - } - $type = $2; - $hash = $3; + ($hash, $type) = git_get_sha1_or_die("$hash_base:$file_name"); } else { die_error(400, "Not enough information to find object"); } - print $cgi->redirect(-uri => href(action=>$type, -full=>1, hash=>$hash, hash_base=>$hash_base, file_name=>$file_name), @@ -4787,47 +5148,26 @@ sub git_object { sub git_blobdiff { my $format = shift || 'html'; - my $fd; my @difftree; my %diffinfo; my $expires; + my $diff_read; - # preparing $fd and %diffinfo for git_patchset_body + # prepare $diff_read and %diffinfo for git_patchset_body # new style URI if (defined $hash_base && defined $hash_parent_base) { - if (defined $file_name) { - # read raw output - open $fd, "-|", git_cmd(), "diff-tree", '-r', @diff_opts, - $hash_parent_base, $hash_base, - "--", (defined $file_parent ? $file_parent : ()), $file_name - or die_error(500, "Open git-diff-tree failed"); - @difftree = map { chomp; $_ } <$fd>; - close $fd - or die_error(404, "Reading git-diff-tree failed"); - @difftree - or die_error(404, "Blob diff not found"); - - } elsif (defined $hash && - $hash =~ /[0-9a-fA-F]{40}/) { - # try to find filename from $hash - - # read filtered raw output - open $fd, "-|", git_cmd(), "diff-tree", '-r', @diff_opts, - $hash_parent_base, $hash_base, "--" - or die_error(500, "Open git-diff-tree failed"); - @difftree = - # ':100644 100644 03b21826... 3b93d5e7... M ls-files.c' - # $hash == to_id - grep { /^:[0-7]{6} [0-7]{6} [0-9a-fA-F]{40} $hash/ } - map { chomp; $_ } <$fd>; - close $fd - or die_error(404, "Reading git-diff-tree failed"); - @difftree - or die_error(404, "Blob diff not found"); - - } else { - die_error(400, "Missing one of the blob diff parameters"); - } + my $commit_sha1 = git_get_sha1_or_die($hash_base, 'commit'); + my $parent_base_sha1 = git_get_sha1_or_die($hash_parent_base, 'commit'); + git_get_sha1_or_die($file_parent, 'blob') if $file_parent; + # There used to be code to handle hash (h) parameters, + # but it's not used (anymore), so we can require $file_name. + die_error(400, "No file name given") unless $file_name; + # read raw output + @difftree = split "\n", $repo->cmd_output( + cmd => ['diff-tree', '-r', @diff_opts, $parent_base_sha1, + $commit_sha1, '--', $file_parent || (), + $file_name], + cache => 2); if (@difftree > 1) { die_error(400, "Ambiguous blob diff specification"); @@ -4847,14 +5187,17 @@ sub git_blobdiff { } # open patch output - open $fd, "-|", git_cmd(), "diff-tree", '-r', @diff_opts, - '-p', ($format eq 'html' ? "--full-index" : ()), - $hash_parent_base, $hash_base, - "--", (defined $file_parent ? $file_parent : ()), $file_name - or die_error(500, "Open git-diff-tree failed"); + # TODO: uncovered (?) + my $hash_base_sha1 = git_get_sha1_or_die($hash_base, 'commit'); + $diff_read = $repo->progressive_cmd_output( + cmd => ['diff-tree', '-r', @diff_opts, '-p', + ($format eq 'html' ? '--full-index' : ()), + $parent_base_sha1, $hash_base_sha1, '--', + $file_parent || (), $file_name], + separator => "\n", cache => 1); } - # old/legacy style URI + # old/legacy style URI (still used in feed [Atom/RSS] view) if (!%diffinfo && # if new style URI failed defined $hash && defined $hash_parent) { # fake git-diff-tree raw output @@ -4884,10 +5227,13 @@ sub git_blobdiff { } # open patch output - open $fd, "-|", git_cmd(), "diff", @diff_opts, - '-p', ($format eq 'html' ? "--full-index" : ()), - $hash_parent, $hash, "--" - or die_error(500, "Open git-diff failed"); + my $parent_sha1 = git_get_sha1_or_die($hash_parent, 'blob'); + my $sha1 = git_get_sha1_or_die($hash, 'commit'); + $diff_read = $repo->progressive_cmd_output( + cmd => ['diff', @diff_opts, '-p', + ($format eq 'html' ? '--full-index' : ()), + $parent_sha1, $sha1, '--'], + separator => "\n", cache => 1); } else { die_error(400, "Missing one of the blob diff parameters") unless %diffinfo; @@ -4929,14 +5275,13 @@ sub git_blobdiff { if ($format eq 'html') { print "
\n"; - git_patchset_body($fd, [ \%diffinfo ], $hash_base, $hash_parent_base); - close $fd; + git_patchset_body($diff_read, [ \%diffinfo ], $hash_base, $hash_parent_base); print "
\n"; # class="page_body" git_footer_html(); } else { - while (my $line = <$fd>) { + while (my $line = $diff_read->()) { $line =~ s!a/($hash|$hash_parent)!'a/'.esc_path($diffinfo{'from_file'})!eg; $line =~ s!b/($hash|$hash_parent)!'b/'.esc_path($diffinfo{'to_file'})!eg; @@ -4944,9 +5289,9 @@ sub git_blobdiff { last if $line =~ m!^\+\+\+!; } - local $/ = undef; - print <$fd>; - close $fd; + while (my $line = $diff_read->()) { + print $line; + } } } @@ -4957,12 +5302,20 @@ sub git_blobdiff_plain { sub git_commitdiff { my $format = shift || 'html'; $hash ||= $hash_base || "HEAD"; - my %co = parse_commit($hash) - or die_error(404, "Unknown commit object"); + my $sha1 = git_get_sha1_or_die($hash, 'commit'); + my %co = parse_commit($hash); # choose format for commitdiff for merge - if (! defined $hash_parent && @{$co{'parents'}} > 1) { - $hash_parent = '--cc'; + my $hash_parent_param = $hash_parent; + # Unfortunately we can pass in command line options as + # $hash_parent. + if ($hash_parent_param && $hash_parent_param ne '-c' && + $hash_parent_param ne '--cc') { + $hash_parent_param = + git_get_sha1_or_die($hash_parent_param, 'commit'); + } + if (! defined $hash_parent_param && @{$co{'parents'}} > 1) { + $hash_parent_param = '--cc'; } # we need to prepare $formats_nav before almost any parameter munging my $formats_nav; @@ -4971,8 +5324,7 @@ sub git_commitdiff { $cgi->a({-href => href(action=>"commitdiff_plain", -replay=>1)}, "raw"); - if (defined $hash_parent && - $hash_parent ne '-c' && $hash_parent ne '--cc') { + if (defined $hash_parent) { # commitdiff with two commits given my $hash_parent_short = $hash_parent; if ($hash_parent =~ m/^[0-9a-fA-F]{40}$/) { @@ -5004,7 +5356,7 @@ sub git_commitdiff { ')'; } else { # merge commit - if ($hash_parent eq '--cc') { + if ($hash_parent && $hash_parent eq '--cc') { $formats_nav .= ' | ' . $cgi->a({-href => href(action=>"commitdiff", hash=>$hash, hash_parent=>'-c')}, @@ -5026,7 +5378,6 @@ sub git_commitdiff { } } - my $hash_parent_param = $hash_parent; if (!defined $hash_parent_param) { # --cc for multiple parents, --root for parentless $hash_parent_param = @@ -5034,28 +5385,26 @@ sub git_commitdiff { } # read commitdiff - my $fd; + my $diff_read; my @difftree; if ($format eq 'html') { - open $fd, "-|", git_cmd(), "diff-tree", '-r', @diff_opts, - "--no-commit-id", "--patch-with-raw", "--full-index", - $hash_parent_param, $hash, "--" - or die_error(500, "Open git-diff-tree failed"); - - while (my $line = <$fd>) { + $diff_read = $repo->progressive_cmd_output( + cmd => ['diff-tree', '-r', @diff_opts, '--no-commit-id', + '--patch-with-raw', '--full-index', + $hash_parent_param, $sha1, '--'], + separator => "\n", cache => 1); + while (my $line = $diff_read->()) { chomp $line; # empty line ends raw part of diff-tree output last unless $line; push @difftree, scalar parse_difftree_raw_line($line); } - - } elsif ($format eq 'plain') { - open $fd, "-|", git_cmd(), "diff-tree", '-r', @diff_opts, - '-p', $hash_parent_param, $hash, "--" - or die_error(500, "Open git-diff-tree failed"); - } else { - die_error(400, "Unknown commitdiff format"); + die unless $format eq 'plain'; + $diff_read = $repo->progressive_cmd_output( + cmd => ['diff-tree', '-r', @diff_opts, '-p', + $hash_parent_param, $sha1, '--'], + separator => "\n", cache => 1); } # non-textual hash id's can be cached @@ -5081,8 +5430,7 @@ sub git_commitdiff { } } elsif ($format eq 'plain') { - my $refs = git_get_references("tags"); - my $tagname = git_get_rev_name_tags($hash); + my $tagname = git_get_rev_name_tags($sha1); my $filename = basename($project) . "-$hash.patch"; print $cgi->header( @@ -5112,17 +5460,15 @@ sub git_commitdiff { $use_parents ? @{$co{'parents'}} : $hash_parent); print "
\n"; - git_patchset_body($fd, \@difftree, $hash, + git_patchset_body($diff_read, \@difftree, $hash, $use_parents ? @{$co{'parents'}} : $hash_parent); - close $fd; print "\n"; # class="page_body" git_footer_html(); } elsif ($format eq 'plain') { - local $/ = undef; - print <$fd>; - close $fd - or print "Reading git-diff-tree failed\n"; + while (my $line = $diff_read->()) { + print $line; + } } } @@ -5131,37 +5477,29 @@ sub git_commitdiff_plain { } sub git_history { - if (!defined $hash_base) { - $hash_base = git_get_head_hash($project); - } - if (!defined $page) { - $page = 0; - } + $hash_base ||= git_get_head_hash(); + $page ||= 0; my $ftype; - my %co = parse_commit($hash_base) - or die_error(404, "Unknown commit object"); + my %co = parse_commit($hash_base); my $refs = git_get_references(); my $limit = sprintf("--max-count=%i", (100 * ($page+1))); my @commitlist = parse_commits($hash_base, 101, (100 * $page), - $file_name, "--full-history") - or die_error(404, "No such file or directory on given branch"); + $file_name, "--full-history"); if (!defined $hash && defined $file_name) { # some commits could have deleted file in question, # and not have it in tree, but one of them has to have it for (my $i = 0; $i <= @commitlist; $i++) { - $hash = git_get_hash_by_path($commitlist[$i]{'id'}, $file_name); + $hash = git_get_sha1_by_path($commitlist[$i]{'id'}, $file_name); last if defined $hash; } } if (defined $hash) { + git_get_sha1_or_die($hash); $ftype = git_get_type($hash); } - if (!defined $ftype) { - die_error(500, "Unknown type of object"); - } my $paging_nav = ''; if ($page > 0) { @@ -5202,13 +5540,8 @@ sub git_search { if (!defined $searchtext) { die_error(400, "Text field is empty"); } - if (!defined $hash) { - $hash = git_get_head_hash($project); - } + $hash ||= git_get_head_hash(); my %co = parse_commit($hash); - if (!%co) { - die_error(404, "Unknown commit object"); - } if (!defined $page) { $page = 0; } @@ -5280,12 +5613,15 @@ sub git_search { print "\n"; my $alternate = 1; $/ = "\n"; - open my $fd, '-|', git_cmd(), '--no-pager', 'log', @diff_opts, - '--pretty=format:%H', '--no-abbrev', '--raw', "-S$searchtext", - ($search_use_regexp ? '--pickaxe-regex' : ()); + my $pickaxe_read = $repo->progressive_cmd_output( + cmd => ['log', @diff_opts, '--pretty=format:%H', + '--no-abbrev', '--raw', "-S$searchtext", + ($search_use_regexp ? '--pickaxe-regex' : ()), + git_get_head_hash()], + separator => "\n", cache => 1); undef %co; my @files; - while (my $line = <$fd>) { + while (my $line = $pickaxe_read->()) { chomp $line; next unless $line; @@ -5326,7 +5662,6 @@ sub git_search { "
\n"; } } - close $fd; # finish last commit (warning: repetition!) if (%co) { @@ -5349,12 +5684,13 @@ sub git_search { print "
\n"; my $alternate = 1; my $matches = 0; - $/ = "\n"; - open my $fd, "-|", git_cmd(), 'grep', '-n', - $search_use_regexp ? ('-E', '-i') : '-F', - $searchtext, $co{'tree'}; + my $grep_read = $repo->progressive_cmd_output( + cmd => ['grep', '-n', + $search_use_regexp ? ('-E', '-i') : '-F', + $searchtext, $co{'tree'}], + separator => "\n", cache => 1); my $lastfile = ''; - while (my $line = <$fd>) { + while (my $line = $grep_read->()) { chomp $line; my ($file, $lno, $ltext, $binary); last if ($matches++ > 1000); @@ -5406,7 +5742,6 @@ sub git_search { } else { print "
No matches found
\n"; } - close $fd; print "
\n"; } @@ -5458,7 +5793,7 @@ EOT } sub git_shortlog { - my $head = git_get_head_hash($project); + my $head = git_get_head_hash(); if (!defined $hash) { $hash = $head; } @@ -5500,33 +5835,22 @@ sub git_feed { } # log/feed of current (HEAD) branch, log of given branch, history of file/directory - my $head = $hash || 'HEAD'; - my @commitlist = parse_commits($head, 150, 0, $file_name); + my $head = $hash || $repo->get_sha1('HEAD'); # can be undef + my %last_modified = parse_date($repo->get_last_modification); - my %latest_commit; - my %latest_date; my $content_type = "application/$format+xml"; if (defined $cgi->http('HTTP_ACCEPT') && $cgi->Accept('text/xml') > $cgi->Accept($content_type)) { # browser (feed reader) prefers text/xml $content_type = 'text/xml'; } - if (defined($commitlist[0])) { - %latest_commit = %{$commitlist[0]}; - %latest_date = parse_date($latest_commit{'author_epoch'}); - print $cgi->header( - -type => $content_type, - -charset => 'utf-8', - -last_modified => $latest_date{'rfc2822'}); - } else { - print $cgi->header( - -type => $content_type, - -charset => 'utf-8'); - } + print $cgi->header( + -type => $content_type, -charset => 'utf-8', + -last_modified => $last_modified{'rfc2822'}); # Optimization: skip generating the body if client asks only # for Last-Modified date. - return if ($cgi->request_method() eq 'HEAD'); + return if $cgi->request_method() && $cgi->request_method() eq 'HEAD'; # header variables my $title = "$site_name - $project/$action"; @@ -5593,15 +5917,11 @@ XML # not twice as wide as tall: 72 x 27 pixels print "" . esc_url($logo) . "\n"; } - if (! %latest_date) { - # dummy date to keep the feed valid until commits trickle in: - print "1970-01-01T00:00:00Z\n"; - } else { - print "$latest_date{'iso-8601'}\n"; - } + print "$last_modified{'iso-8601'}\n"; } # contents + my @commitlist = parse_commits($head, 150, 0, $file_name) if $head; for (my $i = 0; $i <= $#commitlist; $i++) { my %co = %{$commitlist[$i]}; my $commit = $co{'id'}; @@ -5612,13 +5932,11 @@ XML my %cd = parse_date($co{'author_epoch'}); # get list of changed files - open my $fd, "-|", git_cmd(), "diff-tree", '-r', @diff_opts, - $co{'parent'} || "--root", - $co{'id'}, "--", (defined $file_name ? $file_name : ()) - or next; - my @difftree = map { chomp; $_ } <$fd>; - close $fd - or next; + my @difftree = split "\n", $repo->cmd_output( + cmd => ['diff-tree', '-r', @diff_opts, + $co{'parent'} || '--root', $co{'id'}, '--', + (defined $file_name ? $file_name : ())], + cache => 2); # print element (entry, item) my $co_url = href(-full=>1, action=>"commitdiff", hash=>$commit); @@ -5733,16 +6051,8 @@ XML foreach my $pr (@list) { my %proj = %$pr; - my $head = git_get_head_hash($proj{'path'}); - if (!defined $head) { - next; - } - $git_dir = "$projectroot/$proj{'path'}"; - my %co = parse_commit($head); - if (!%co) { - next; - } - + next unless $repo_root->repo(repo_dir => $proj{'path'}) + ->get_sha1('HEAD'); my $path = esc_html(chop_str($proj{'path'}, 25, 5)); my $rss = "$my_url?p=$proj{'path'};a=rss"; my $html = "$my_url?p=$proj{'path'};a=summary"; diff --git a/t/t9500-gitweb-standalone-no-errors.sh b/t/t9500-gitweb-standalone-no-errors.sh index 46ba19be7d..8edbfbd200 100755 --- a/t/t9500-gitweb-standalone-no-errors.sh +++ b/t/t9500-gitweb-standalone-no-errors.sh @@ -54,7 +54,7 @@ gitweb_run () { # written to web server logs, so we are not interested in that: # we are interested only in properly formatted errors/warnings rm -f gitweb.log && - perl -- "$TEST_DIRECTORY/../gitweb/gitweb.perl" \ + "$PERL_PATH" -- "$TEST_DIRECTORY/../gitweb/gitweb.perl" \ >/dev/null 2>gitweb.log && if grep -q -s "^[[]" gitweb.log >/dev/null; then false; else true; fi @@ -71,7 +71,7 @@ safe_chmod () { . ./test-lib.sh -perl -MEncode -e 'decode_utf8("", Encode::FB_CROAK)' >/dev/null 2>&1 || { +"$PERL_PATH" -MEncode -e 'decode_utf8("", Encode::FB_CROAK)' >/dev/null 2>&1 || { test_expect_success 'skipping gitweb tests, perl version is too old' : test_done exit diff --git a/t/t9503-gitweb-Mechanize.sh b/t/t9503-gitweb-Mechanize.sh index 4652980a09..169eed9ca5 100755 --- a/t/t9503-gitweb-Mechanize.sh +++ b/t/t9503-gitweb-Mechanize.sh @@ -89,6 +89,16 @@ test_expect_success 'set up test repository' ' test_tick && git merge b ' +# set up empty repository +# create this as a subdirectory of trash directory; not pretty, but simple +test_expect_success 'set up empty repository' ' + + mkdir empty.git && + cd empty.git && + git init --bare && + cd .. +' + # set up gitweb configuration safe_pwd="$("$PERL_PATH" -MPOSIX=getcwd -e 'print quotemeta(getcwd)')" large_cache_root="$TEST_DIRECTORY/t9503/large_cache.tmp" diff --git a/t/t9503/test.pl b/t/t9503/test.pl index f5d9d7edb0..7685352a10 100755 --- a/t/t9503/test.pl +++ b/t/t9503/test.pl @@ -271,6 +271,11 @@ if (test_page '', 'project list (implicit)') { "title contains $site_name"); $mech->content_contains('t9503-gitweb-Mechanize test repository', 'lists test repository (by description)'); + if (follow_link( { text => 'empty.git' }, 'empty git repository')) { + # Just check that the empty.git summary page is linked + # and doesn't die. + $mech->back; + } } @@ -578,18 +583,12 @@ for my $action qw( blame blob blob_plain ) { "$action: look up existent file"); test_page("?p=.git;a=$action;f=does_not_exist;hb=HEAD", "$action: look up non-existent file", 404); - TODO: { - local $TODO = 'wrong error code (but using Git::Repo will fix this)'; - test_page("?p=.git;a=$action;f=$directories[0];hb=HEAD", - "$action: look up directory", 400); - } -} -TODO: { - local $TODO = 'wrong error code (but using Git::Repo will fix this)'; - test_page("?p=.git;a=tree;f=$files[0];hb=HEAD", - 'tree: look up existent file', 400); + test_page("?p=.git;a=$action;f=$directories[0];hb=HEAD", + "$action: look up directory", 400); } # Pass valid and invalid paths to tree action +test_page("?p=.git;a=tree;f=$files[0];hb=HEAD", + 'tree: look up existent file', 400); test_page("?p=.git;a=tree;f=does_not_exist;hb=HEAD", 'tree: look up non-existent file', 404); test_page("?p=.git;a=tree;f=$directories[0];hb=HEAD", -- 2.11.4.GIT