From f004b4b5a4f94032664ec66f9eb55cd58d0994ec Mon Sep 17 00:00:00 2001 From: "Kyle J. McKay" Date: Wed, 24 Aug 2016 14:10:59 -0700 Subject: [PATCH] fast-import mirrors: support false $Girocco::Config::delay_gfi_redelta Packs created by git fast-import are very poor quality (very poor deltas and very poor object order). Girocco ALWAYS performs redeltification on them at some point to correct this problem. Only foreign vcs mirrors that use git fast-import (e.g. darcs, bzr, hg) can generate such packs (git svn mirrors never generate such packs). Previously Girocco always delayed this redeltification until the next gc (full or mini) triggers. However this leaves open the possibility that a client could end up fetching some of these undesirable deltas during the period where these packs (generated by mirror update fetches) exist but gc has not yet triggered to redeltify them. Introduce a new setting $Girocco::config::delay_gfi_redelta that is set to true (to preserve the previous behavior) but which can be changed to false in order to cause the poor quality git fast-import packs to undergo redeltifcation at the next opportunity (a "mini" gc is immediately requested when such a pack is generated). While changing $Girocco::config::delay_gfi_redelta to false is not recommended, there may be some situations in which eliminating the poor git fast-import created packs as quickly as possible is desirable and so the option to do so is now available. Signed-off-by: Kyle J. McKay --- Girocco/Config.pm | 25 +++++++++++++++++++++++++ jobd/gc.sh | 8 ++++++++ jobd/update.sh | 9 ++++++++- 3 files changed, 41 insertions(+), 1 deletion(-) diff --git a/Girocco/Config.pm b/Girocco/Config.pm index 64bb7f7..32b5046 100644 --- a/Girocco/Config.pm +++ b/Girocco/Config.pm @@ -760,6 +760,30 @@ our @blocked_tags = (); # a per-repository basis. See the description of it in gc.sh. our $new_delta_threshold = undef; +# This setting is irrelevant unless foreign vcs mirrors that use git fast-import +# are enabled (e.g. $mirror_darcs, $mirror_bzr or $mirror_hg -- $mirror_svn does +# NOT use git fast-import and is not affected by this setting). +# The packs generated by git fast-import are very poor quality. For this reason +# they ALWAYS have their deltas recomputed at some point. Normally this is +# delayed until the next full (or mini) gc takes place. For this reason a full +# gc is always scheduled immediately after a fresh mirror clone completes. +# However, in the case of normal mirror updates, several git fast-import created +# packs may exist as a result of changes fetched during the normal mirror update +# process. These packs will persist (with their git fast-import poor quality) +# until the next full (or mini) gc triggers. The bad deltas in these update +# packs could be sent down to clients who fetch updates before the next gc +# triggers. To reduce (i.e. practically eliminate) the likelihood of this +# occurring, this setting can be changed to a false (0 or undef) value in which +# case after each mirror update of a git fast-import mirror, any newly created +# git fast-import packs (as a result of the mirror update running) will have +# their deltas recomputed shortly thereafter instead of waiting for the next gc. +# Recomputing deltas immediately (almost immediately) will result in an extra +# redeltification step (with associated CPU cost) that would otherwise not +# occur and, in some cases (mainly large repositories), could ultimately result +# in slightly less efficient deltas being retained. +# RECOMMENDED VALUE: 1 +our $delay_gfi_redelta = 1; + # If this is set to a true value, then core.packedGitWindowSize will be set # to 1 MiB (the same as if Git was compiled with NO_MMAP set). If this is NOT # set, core.packedGitWindowSize will be set to 32 MiB (even on 64-bit) to avoid @@ -824,6 +848,7 @@ our $httpsdnsname = ($httpspushurl =~ m,https://([A-Za-z0-9.-]+),i) ? lc($1) : u or die "Girocco::Config \$svn_log_window_size must be undef or numeric"; (not $posix_sh_bin or $posix_sh_bin !~ /\s/) or die "Girocco::Config: \$posix_sh_bin must not contain any whitespace"; (not $perl_bin or $perl_bin !~ /\s/) or die "Girocco::Config: \$perl_bin must not contain any whitespace"; +!$delay_gfi_redelta and $delay_gfi_redelta = undef; !$git_no_mmap and $git_no_mmap = undef; !$suppress_x_girocco and $suppress_x_girocco = undef; diff --git a/jobd/gc.sh b/jobd/gc.sh index d59d8ef..27f1443 100755 --- a/jobd/gc.sh +++ b/jobd/gc.sh @@ -368,6 +368,14 @@ if [ -n "$isminigc" ]; then progress "+ [$proj] mini garbage check (`date`)" make_svn_pack fi + if [ -z "$cfg_delay_gfi_redelta" ] && [ -f gfi-packs -a -s gfi-packs ] && is_gfi_mirror; then + # $Girocco::Config::delay_gfi_redelta is false, force redeltification now + if [ -z "$miniactive" ]; then + miniactive=1 + progress "+ [$proj] mini garbage check (`date`)" + fi + repack_gfi_packs + fi # If there aren't at least 10 non-keep, non-bitmap, non-bndl packs then # don't actually process them yet lpo="--exclude-no-idx --exclude-keep --exclude-bitmap --exclude-bndl --quiet" diff --git a/jobd/update.sh b/jobd/update.sh index 4f264a4..a4bea39 100755 --- a/jobd/update.sh +++ b/jobd/update.sh @@ -279,7 +279,7 @@ case "$url" in sleep 1 fi GIT_SSL_NO_VERIFY=1 bang git remote update $pruneopt - if is_gfi_mirror_url "$url"; then + if [ -e .gfipack ] && is_gfi_mirror_url "$url"; then find objects/pack -type f -newer .gfipack -name "pack-$octet20.pack" -print >>gfi-packs rm -f .gfipack fi @@ -321,6 +321,13 @@ if [ -e .svnpack ] && ! [ -e .needsgc ]; then >.needsgc fi +# Force a mini-gc if $Girocco::Config::delay_gfi_redelta is false and there's +# at least one gfi pack present now +if [ -z "$cfg_delay_gfi_redelta" ] && ! [ -e .needsgc ] && \ + [ -f gfi-packs -a -s gfi-packs ] && is_gfi_mirror_url "$url"; then + >.needsgc +fi + # Activate a mini-gc if there are at least 20 packs present now if ! [ -e .needsgc ]; then packs="$(list_packs --quiet --count --exclude-no-idx objects/pack || :)" -- 2.11.4.GIT