From 03111eecdc29d073a1316c324b8ed47081c65131 Mon Sep 17 00:00:00 2001 From: "Kyle J. McKay" Date: Sat, 8 Oct 2016 00:08:14 -0700 Subject: [PATCH] gc.sh: always generate optimized packs Since Git version 1.7.7, git pack-objects generates much more optimized packs, but ONLY if at least one of the commits being packed is the target of a ref in the refs/tags/* namespace (any kind of tag, doesn't matter). While these optimized packs are very slightly smaller, the big win involves disk accesses which can be significantly faster using these better optimized packs. (For example, git fsck can run in less than half the time with such a pack!) Therefore force git repack to always run in the presence of refs/tags/* ref (a lightweight tag is used) that refers to one of the commits being packed so that optimized packs are always produced. This guarantees that very large repositories without any tags produce optimized packs when they are repacked. The strategy used is to create a temporary subdirectory with symbolic links to the relevent items from the parent (config, info, objects, refs) and a copy of HEAD and packed-refs where the extra ref has been appended to the copy of packed-refs. When git repack is run in the subdirectory it packs the same things and puts the resulting packs in the same place, but always produces optimized packs. Signed-off-by: Kyle J. McKay --- jobd/gc.sh | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/jobd/gc.sh b/jobd/gc.sh index 935fa99..548180d 100755 --- a/jobd/gc.sh +++ b/jobd/gc.sh @@ -319,6 +319,42 @@ lock_gc() { mv -f "$lockf.lock" "$lockf" } +# Create a repack subdirectory such that running repack in it will pack the +# same things that a pack in the normal directory would except that the pack +# is guaranteed to be generated in an optimized order by adding a suitable +# synthesized ref in the refs/tags namespace (yes, pack-objects.c really does +# behave differently depending on the contents of the refs/tags namespace). +# Before calling this, pack-refs --all MUST be performed or the wrong pack +# will end up being made. +# If a ref deletion is pushed after making the repack subdir but before the +# the actual repack, the discarded objects will be packed -- no big deal, +# they'll get discarded the next time gc runs. +# If a fast-forward ref update is pushed after making the repack subdir but +# before the actual repack, it will be picked up and the new objects packed +# (subject to the normal git repack race about picking such updates up). +# If a non-fast-forward ref update is pushed after making the repack subdir but +# before the actual repack, it will be picked up like a fast-forward update but +# the discarded objects will be included like a ref deletion (until the next +# scheduled gc takes place). +make_repack_dir() { + ! [ -d repack ] || rm -rf repack + ! [ -d repack ] || { echo >&2 "[$proj] cannot remove repack subdirectory"; exit 1; } + mkdir repack + [ -d info ] || mkdir info + ln -s ../config repack/config + ln -s ../info repack/info + ln -s ../objects repack/objects + ln -s ../refs repack/refs + sed 's, refs/, refs/!/,' < packed-refs > repack/packed-refs + optref="$(git rev-list -n 1 --all 2>/dev/null || :)" + if [ -n "$optref" ]; then + echo "$optref refs/tags/!" >> repack/packed-refs + echo "$optref" > repack/HEAD + else + cat HEAD > repack/HEAD + fi +} + # Remove any crud that's been left behind by interrupted operations # that did not clean up after themselves remove_crud() { @@ -331,6 +367,10 @@ remove_crud() { # there were a lot of refs. rm -f FETCH_HEAD + # remove any existing pack_is_complete_test or repack subdirectories + # If either exists when this function is called it's crud + rm -rf pack_is_complete_test repack + # Remove any stale pack remnants that are more than an hour old. # Stale pack fragments are defined as any pack-.ext where .ext is NOT # .pack AND the corresponding .pack DOES NOT exist. A bunch of stale @@ -720,6 +760,7 @@ fi # git pack-refs --all +make_repack_dir touch .gc_in_progress rm -f .gc_failed bundles/* rm -f objects/pack/pack-*.bndl @@ -765,7 +806,12 @@ nobm= [ -z "$var_have_git_172" ] || ! [ -s objects/info/alternates ] || \ nobm='-c repack.writebitmaps=false -c pack.writebitmaps=false' progress "~ [$proj] running full gc repack${nobm:+ (bitmaps disabled)}" +cd repack +# We run git repack from the repack subdirectory so we can force optimized packs +# to be generated even for repositories that do not have any tagged commits git $nobm repack $packopts -A -d -l $quiet $newdeltas $@ +cd .. +rm -rf repack [ ! -e .gc_failed ] || exit 1 # These, if they exist, are now meaningless and need to be removed rm -f gfi-packs .needsgc .svnpack .svnpackgc -- 2.11.4.GIT