From 6ddbb9e1ac16ff0a9ec09018af0fe4887d879000 Mon Sep 17 00:00:00 2001 From: "Kyle J. McKay" Date: Wed, 31 Aug 2016 09:57:43 -0700 Subject: [PATCH] bang: throttle retries When a mirror becomes "banged", it does not wait until the next $Girocco::Config::min_mirror_interval before trying again, rather it may try again as early as the jobd.pl restart delay between queue runs (which defaults to 300 seconds). While this retry behavior seems appropriate for brief transient failures allowing the mirror to start updating again almost immediately after service is restored, it does not work so well for lasting outages of a mirror source as we keep banging away on the non-working mirror trying to get an update. Instead, after an initial period of rapid retries (subject only to the jobd.pl restart delay) after a mirror update failure, back off and only retry the mirror once every min_mirror_interval so that we do not unnecessarily waste resources trying to restart updates for a mirror whose source is apparently suffering from a long-lasting outage. This reduces the impact on the rest of the mirrors and still the broken mirror will get updated within one min_mirror_interval after it starts working again. We accomplish this feat by updating the gitweb.lastrefresh timestamp but NOT the modification date of the .last_refresh file. Since gitweb only looks at the last modification timestamp on the .last_refresh file when displaying the summary page, the summary page will continue to show the date of the last successful refresh even after we have updated the gitweb.lastrefresh timestamp in order to throttle back on unnecessary retries for a failed mirror. And since update.sh and jobd.pl only look at the gitweb.lastrefresh timestamp they will properly throttle back on updates for failed mirrors. Signed-off-by: Kyle J. McKay --- jobd/update.sh | 10 ++++++++++ shlib.sh | 6 +++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/jobd/update.sh b/jobd/update.sh index add8c69..c69fac9 100755 --- a/jobd/update.sh +++ b/jobd/update.sh @@ -126,6 +126,16 @@ find objects/pack -maxdepth 1 -type f -name "tmp_idx_?*" -print0 | xargs -0 rm - bang_setup bang_action="update" +bang_trap() { + if [ -n "$1" ]; then + # Throttle retries + # Since gitweb shows the .last_refresh date, it's safe to update + # gitweb.lastrefresh to throttle the updates w/o corrupting the + # last refresh date display on the gitweb summary page + # It's therefore important that we do NOT touch .last_refresh here + config_set lastrefresh "$(date "$datefmt")" + fi +} bang echo "Project: $proj" bang echo "" diff --git a/shlib.sh b/shlib.sh index 41d7f42..af98afe 100644 --- a/shlib.sh +++ b/shlib.sh @@ -357,7 +357,11 @@ bang_failed() { } | mail -s "[$cfg_name] $proj $bang_action failed$rsubj" "$bangaddrs" git config --bool girocco.bang.messagesent true fi - bang_trap + bangthrottle= + [ $bangcount -lt 15 ] || \ + check_interval "girocco.bang.firstfail" $(( $cfg_min_mirror_interval * 3 / 2 )) || \ + bangthrottle=1 + bang_trap $bangthrottle [ -n "$bang_errcode" ] && [ "$bang_errcode" != "0" ] || bang_errcode=1 exit $bang_errcode } -- 2.11.4.GIT