From 886f0f66ff9af7fbbf07bdf9426a6d740c804c74 Mon Sep 17 00:00:00 2001 From: "Kyle J. McKay" Date: Sun, 14 Sep 2014 18:35:57 -0700 Subject: [PATCH] gc.sh: more crud removal and dirty detection Remove stale temporary pack files. In svn mirrors stale temp files. In git fast import mirrors remove stale crash files. Never skip gc if the repo is dirty (more than one .pack file or any loose objects present). --- jobd/gc.sh | 90 ++++++++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 70 insertions(+), 20 deletions(-) diff --git a/jobd/gc.sh b/jobd/gc.sh index 22206f5..2e460d4 100755 --- a/jobd/gc.sh +++ b/jobd/gc.sh @@ -47,6 +47,17 @@ createlock() { return 1 } +# return true if there's more than one objects/pack-.pack file or +# ANY sha-1 files in objects +is_dirty() { + _packs=$(find objects/pack -type f -name "pack-$octet20.pack" -print | wc -l) + if [ "$_packs" != 1 ] && [ "$_packs" != 0 ]; then + return 0 + fi + _objs=$(find objects/$octet -type f -name "$octet19" -print 2>/dev/null | wc -l) + [ "$_objs" != 0 ] +} + # if the current directory is_gfi_mirror then repack all packs listed in gfi-packs repack_gfi_packs() { is_gfi_mirror || return 0 @@ -130,6 +141,8 @@ fi # If lastgc is NOT set or lastreceive is NOT set we MUST run gc # If we are a fork and lastparentgc is NOT set we MUST run gc +# If the repo is dirty after removing any crud we MUST run gc + gcstart="$(date "$datefmt")" skipgc= isfork= @@ -165,7 +178,7 @@ if [ "$(createlock "$lockf")" ]; then # 2) contains two fields (pid hostname) NO trailing NL # 3) the hostname is different OR the pid is still alive # then we exit as another active process is holding the lock - if [ "$(find "$lockf" -mmin -720 -print 2>/dev/null)" ]; then + if [ "$(find "$lockf" -maxdepth 1 -mmin -720 -print 2>/dev/null)" ]; then apid= ahost= read -r apid ahost ajunk < "$lockf" || : @@ -188,10 +201,62 @@ printf "%s %s" "$$" "$hn" > "$lockf.lock" chmod 0664 "$lockf.lock" mv -f "$lockf.lock" "$lockf" -if [ -n "$skipgc" ]; then - progress "= [$proj] garbage check nothing to do (`date`)" +# Remove any stale pack remnants that are more than an hour old. +# Stale pack fragments are defined as any pack-.ext where .ext is NOT +# .pack AND the corresponding .pack DOES NOT exist. A bunch of stale +# pack-.idx files without their corresponding .pack files are worthless +# and just waste space. Normally there shouldn't be any remnants but actually +# this can happen when things are interrupted at just the wrong time. +# Note that the objects/pack directory is created by git init and should +# always exist. +find objects/pack -maxdepth 1 -type f -mmin +60 -name "pack-$octet20.?*" -print | \ +sed -e 's/^objects\/pack\/pack-//; s/\..*$//' | sort -u | \ +while read packsha; do + [ ! -e "objects/pack/pack-$packsha.pack" ] || continue + rm -f "objects/pack/pack-$packsha".?* +done + +# Remove any stale tmp_pack_* files that are more than 12 hours old. +find objects/pack -maxdepth 1 -type f -mmin +720 -name "tmp_pack_?*" -print | \ +while read packtmp; do + rm -f "$packtmp" +done +find objects/pack -maxdepth 1 -type f -mmin +720 -name "packtmp-?*" -print | \ +while read packtmp; do + rm -f "$packtmp" +done + +# Remove any stale git-svn temp files that are more than 12 hours old. +# The git-svn process creates temp files with random 10 character names +# in the root of $GIT_DIR. Unfortunately they do not have a recognizable +# prefix, so we just have to kill any files with a 10-character name. We +# do this only for git-svn mirrors. All characters are chosen from +# [A-Za-z0-9_] so we can at least check that and fortunately the only +# collision is 'FETCH_HEAD' but that shouldn't matter. +if is_svn_mirror; then + _randchar='[A-Za-z0-9_]' + _randchar2="$_randchar$_randchar" + _randchar4="$_randchar2$_randchar2" + _randchar10="$_randchar4$_randchar4$_randchar2" + find . -maxdepth 1 -type f -mmin +720 -name "$_randchar10" -print | \ + while read tmpcrud; do + rm -f "$tmpcrud" + done +fi + +# Remove any stale fast_import_crash_ files that are more than 3 days old. +if is_gfi_mirror; then + find . -maxdepth 1 -type f -mmin +4320 -name "fast_import_crash_?*" -print | \ + while read fastcrash; do + rm -f "$fastcrash" + done +fi + +# Do not skip gc if the repo is dirty +if [ -n "$skipgc" ] && ! is_dirty; then + progress "= [$proj] garbage check nothing but crud removal to do (`date`)" config_set lastgc "$gcstart" - rm "$lockf" + rm -f "$lockf" exit 0 fi @@ -213,21 +278,6 @@ fi progress "+ [$proj] garbage check (`date`)" -# Remove any stale pack remnants that are more than an hour old. -# Stale pack fragments are defined as any pack-.ext where .ext is NOT -# .pack AND the corresponding .pack DOES NOT exist. A bunch of stale -# pack-.idx files without their corresponding .pack files are worthless -# and just waste space. Normally there shouldn't be any remnants but actually -# this can happen when things are interrupted at just the wrong time. -# Note that the objects/pack directory is created by git init and should -# always exist. -find objects/pack -maxdepth 1 -type f -mmin +60 -name "pack-$octet20.?*" | \ -sed -e 's/^objects\/pack\/pack-//; s/\..*$//' | sort -u | \ -while read packsha; do - [ ! -e "objects/pack/pack-$packsha.pack" ] || continue - rm -f "objects/pack/pack-$packsha".?* -done - # safe pruning: we put all our objects in all forks, then we can # safely get rid of extra ones; repacks in forks will get rid of # the redundant ones again then @@ -339,6 +389,6 @@ config_set_raw girocco.reposizek "${reposizek:-0}" # and the next future gc could be incorrectly skipped if we used the current # timestamp here instead config_set lastgc "$gcstart" -rm "$lockf" +rm -f "$lockf" progress "- [$proj] garbage check (`date`)" -- 2.11.4.GIT