From 5ba0c323cb77e6d8b363aa6f156543f9727890a2 Mon Sep 17 00:00:00 2001 From: "Kyle J. McKay" Date: Tue, 6 Sep 2016 14:56:08 -0700 Subject: [PATCH] gc.sh: compact non-current reflogs If there's a large amount of activity on a project and reflogs are kept for many days, they could potentially consume a fair amount of space (although still likely much less than the repository itself). Mitigate this by gzip compressing log files for other than the current date to greatly reduce the space required while being very careful to handle "accidents" where both a compressed and uncompressed log file for the same date end up somehow coexisting simultaneously. Signed-off-by: Kyle J. McKay --- jobd/gc.sh | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/jobd/gc.sh b/jobd/gc.sh index 7da3079..935fa99 100755 --- a/jobd/gc.sh +++ b/jobd/gc.sh @@ -66,12 +66,53 @@ createlock() { # find per-process ref log files then it must be a push project and the only # thing that would write directly to the main per-day log file would be a # mirror project so there's actually no conflict. +# Also, if the clock is wonky (or was futzed with) we may have both YYYYMMDD +# and YYYYMMDD.gz present in which case combine them into YYYYMMDD coalesce_reflogs() { [ -d reflogs ] || return 0 rm -f .gc_failed + find reflogs -maxdepth 1 -type f -name "[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]" -print | + while read -r rname; do + if [ -e "$rname.gz" ]; then + if [ -s "$rname" ]; then + # Presumably the .gz file must have been created before the non-gz + # file since it had to be uncompressed at some point therefore + # we need to append the non-gz contents to it but keep the non-gz + # contents timestamp so we rename to YYYYMMDD_ which will sort first + # and be picked up in the next step if we are interrupted in the middle. + # If a YYYYMMDD_ file already exists we append to it and transfer the + # timestamp. Finally we transfer the YYYYMMDD_ timestamp to the result + # and remove the YYYYMMDD_ temporary file leaving the result uncompressed. + if [ -e "${rname}_" ]; then + cat "$rname" >>"${rname}_" + touch -r "$rname" "${rname}_" + rm -f "$rname" + ! [ -e "$rname" ] + else + mv "$rname" "${rname}_" + fi + gzip -d "$rname.gz" >"$rname" + touch -r "${rname}_" "$rname" + rm -f "${rname}_" + else + # Just remove the empty file to resolve the problem + rm -f "$rname" + fi + fi + done find reflogs -maxdepth 1 -type f -name "[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]_*" -print | LC_ALL=C sort | while read -r rname; do logname="${rname%%_*}" + # If someone's been futzing with the date, the file we want to + # append to could already have been compressed, so we just uncompress + # it here. The previous block guarantees we do not have both a compressed + # and uncompressed version present at the same time. + if [ -e "$logname.gz" ]; then + gzip -d "$logname.gz" >"$logname" touch -r "$rname" "$logname" rm -f "$rname" @@ -93,6 +134,18 @@ prune_reflogs() { find reflogs -maxdepth 1 -type f -mmin "+$exp" -print0 | xargs -0 rm -f } +# Compact any reflogs that are not today's UTC date unless a .gz version exists +compact_reflogs() { + [ -d reflogs ] || return 0 + _td="reflogs/$(TZ=UTC date '+%Y%m%d')" + find reflogs -maxdepth 1 -type f -name "[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]" -print | + while read -r rname; do + [ "$rname" != "$_td" ] || continue + ! [ -e "$rname.gz" ] || continue + gzip -9 "$rname" .pack file or # ANY sha-1 files in objects is_dirty() { @@ -424,6 +477,7 @@ if [ -n "$isminigc" ]; then remove_crud coalesce_reflogs prune_reflogs + compact_reflogs miniactive= if [ -f .svnpack ] && [ -n "$svn_mirror" ]; then miniactive=1 @@ -529,6 +583,7 @@ remove_crud # Always perform reflogs maintenance coalesce_reflogs prune_reflogs +compact_reflogs # Run 'git svn gc' now for svn mirrors if [ -n "$svn_mirror" ]; then -- 2.11.4.GIT