From 878aff0a8b1dcf3673cc4f5e54c63eca68423b6d Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jan=20Kr=C3=BCger?= Date: Fri, 5 Nov 2010 01:54:41 +0100 Subject: [PATCH] Introduce new jobd written in Perl MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit New jobd, instead of calling the jobs one after another, can launch several of them in parallel. It can also kill long-running jobs on the assumption that they are hanging. Signed-off-by: Jan Krüger --- jobd/Girocco | 1 + jobd/jobd.pl | 346 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ jobd/jobd.sh | 97 ++--------------- 3 files changed, 354 insertions(+), 90 deletions(-) create mode 120000 jobd/Girocco create mode 100755 jobd/jobd.pl rewrite jobd/jobd.sh (98%) diff --git a/jobd/Girocco b/jobd/Girocco new file mode 120000 index 0000000..f897157 --- /dev/null +++ b/jobd/Girocco @@ -0,0 +1 @@ +../Girocco \ No newline at end of file diff --git a/jobd/jobd.pl b/jobd/jobd.pl new file mode 100755 index 0000000..ca33708 --- /dev/null +++ b/jobd/jobd.pl @@ -0,0 +1,346 @@ +#!/usr/bin/perl +# +# jobd - perform Girocco maintenance jobs +# +# Run with --help for details + +use strict; +use warnings; + +use Getopt::Long; +use Pod::Usage; + +use Girocco::Config; +use Girocco::Project; +use Girocco::User; + +# Options +my $quiet; +my $kill_after = 300; +my $max_par = 3; +my $lockfile = "/tmp/jobd.lock"; +my $all_once; +my $one; + +######### Jobs {{{1 + +sub update_project { + my $job = shift; + my $p = $job->{'project'}; + check_project_exists($job) || return; + (-e "$Girocco::Config::reporoot/$p.git/.nofetch") || do { + job_skip($job); + return; + }; + exec_job_command($job, ["$Girocco::Config::basedir/jobd/update.sh", $p], $quiet); +} + +sub gc_project { + my $job = shift; + my $p = $job->{'project'}; + check_project_exists($job) || return; + exec_job_command($job, ["$Girocco::Config::basedir/jobd/gc.sh", $p], $quiet); +} + +sub setup_gc { + my $job = shift; + queue_job( + project => $job->{'project'}, + type => 'gc', + command => \&gc_project, + ); +} + +sub check_project_exists { + my $job = shift; + my $p = $job->{'project'}; + if (!-d "$Girocco::Config::reporoot/$p.git") { + error("Warning: skipping non-existent project: $job->{project}") + unless $quiet; + job_skip(); + return 0; + } + 1; +} + +sub queue_one { + my $project = shift; + queue_job( + project => $project, + type => 'update', + command => \&update_project, + on_success => \&setup_gc, + on_error => \&setup_gc, + ); +} + +sub queue_all { + queue_one($_) for (Girocco::Project->get_full_list()); +} + +######### Daemon operation {{{1 + +my @queue; +my @running; +my $perpetual = 1; +my $locked = 0; +my $jobs_executed; +my @jobs_killed; + +sub handle_softexit { + error("Waiting for outstanding jobs to finish... ". + "^C again to exit immediately"); + @queue = (); + $perpetual = 0; + $SIG{'INT'} = \&handle_exit; +} + +sub handle_exit { + error("Killing outstanding jobs..."); + $SIG{'CHLD'} = 'IGNORE'; + $SIG{'TERM'} = 'IGNORE'; + for (@running) { + kill 'KILL', $_->{'pid'}; + } + unlink $lockfile if ($locked); + exit(0); +} + +sub handle_childgone { + my $pid = wait; + if ($pid != -1) { + my $child = grep { $_->{'pid'} == $pid } @running; + if ($?) { + # XXX- we currently don't care + } + $child->{'finished'} = 2; + $jobs_executed++; + } + # Just to be safe + $SIG{'CHLD'} = \&handle_childgone; +} + +sub queue_job { + my %opts = @_; + $opts{'queued_at'} = time; + push @queue, \%opts; +} + +sub run_job { + my $job = shift; + + push @running, $job; + $job->{'command'}->($job); +} + +sub _job_name { + my $job = shift; + "[".$job->{'type'}."::".$job->{'project'}."]"; +} + +# Only one of those per job! +sub exec_job_command { + my ($job, $command, $err_only) = @_; + + my $pid; + if (!defined($pid = fork)) { + error(_job_name($job) ." Can't fork job: $!"); + $job->{'finished'} = 1; + return; + } + if (!$pid) { + if ($err_only) { + open STDOUT, '>/dev/null' || do { + error(_job_name($job) ." Can't write to /dev/null: $!"); + $job->{'finished'} = 1; + return; + }; + } + exec @$command; + exit $?; + } + $job->{'pid'} = $pid; + $job->{'finished'} = 0; + $job->{'started_at'} = time; +} + +sub job_skip { + my $job = shift; + exec_job_command($job, ['/bin/false']); +} + +sub reap_hanging_jobs { + for (@running) { + if ((time - $_->{'started_at'}) > $kill_after) { + $_->{'finished'} = 1; + kill 'KILL', $_->{'pid'}; + print STDERR _job_name($_) ." KILLED due to timeout\n"; + push @jobs_killed, _job_name($_); + } + } +} + +sub reap_finished_jobs { + for (@running) { + my $status = $_->{'finished'}; + if ($status == 0) { next; } + elsif ($status == 1 && defined($_->{'on_error'})) { + $_->{'on_error'}->($_); + } elsif ($status == 2 && defined($_->{'on_success'})) { + $_->{'on_success'}->($_); + } + } + @running = grep { $_->{'finished'} == 0 } @running; +} + +sub run_queue { + my $queue_steps = 0; + $jobs_executed = 0; + @jobs_killed = (); + unless ($quiet) { + printf STDERR "--- Processing %d queued jobs\n", @queue; + } + while (@queue || @running) { + reap_hanging_jobs(); + reap_finished_jobs(); + # Back off if we're too busy + if (@running >= $max_par) { + sleep 10; + $queue_steps++; + unless ($quiet || ($queue_steps % 10)) { + printf STDERR "STATUS: %d queued, %d running, %d finished, %d killed\n", @queue, @running, $jobs_executed, @jobs_killed; + } + last; + } + # Run next + run_job(shift(@queue)) if @queue; + } + unless ($quiet) { + printf STDERR "--- Queue processed. %d jobs executed, %d killed due to timeouts. Now restarting.\n", @queue, @jobs_killed; + } +} + +sub run_perpetually { + if (-e $lockfile) { + die "Lockfile exists. Please make sure no other instance of jobd is running."; + } + open LOCK, '>', $lockfile || die "Cannot create lockfile $lockfile: $!"; + print LOCK $$; + close LOCK; + $locked = 1; + + while ($perpetual) { + queue_all(); + run_queue(); + } + unlink $lockfile; +} + +######### Helpers {{{1 + +sub error($) { + print STDERR shift()."\n"; +} +sub fatal($) { + error(shift); + exit 1; +} + +######### Main {{{1 + +# Parse options +Getopt::Long::Configure('bundling', 'auto_help'); +my $parse_res = GetOptions( + 'quiet|q' => \$quiet, + 'kill-after|k=i' => \$kill_after, + 'max-parallel|p=i' => \$max_par, + 'lockfile|l=s' => \$lockfile, + 'all-once|a' => \$all_once, + 'one|o=s' => \$one, +) || pod2usage(2); +fatal("Error: can only use one out of --all-once and --one") + if ($all_once && $one); + +unless ($quiet) { + $ENV{'show_progress'} = '1'; +} + +if ($one) { + queue_one($one); + run_queue(); + exit; +} + +if ($all_once) { + queue_all(); + run_queue(); + exit; +} + +run_perpetually(); + +########## Documentation {{{1 + +__END__ + +=head1 NAME + +jobd - Perform Girocco maintenance jobs + +=head1 SYNOPSIS + +jobd [options] + + Options: + -h | --help detailed instructions + -q | --quiet run quietly + -k SECONDS | --kill-after=SECONDS how long to wait before killing jobs + -p NUM | --max-parallel=NUM how many jobs to run at the same time + -l FILE | --lockfile=FILE create a lockfile in the given location + -a | --all-once process the list only once + -o PRJNAME | --one=PRJNAME process only one project + +=head1 OPTIONS + +=over 8 + +=item B<--help> + +Print the full description of jobd's options. + +=item B<--quiet> + +Suppress non-error messages, e.g. for use when running this task as a cronjob. + +=item B<--kill-after=SECONDS> + +Kill supervised jobs after a certain time to avoid hanging the daemon. + +=item B<--max-parallel=NUM> + +Run no more than that many jobs at the same time. + +=item B<--lockfile=FILE> + +For perpetual operation, create a lockfile in that place and clean it up after +finishing/aborting. + +=item B<--all-once> + +Instead of perpetuously processing all projects over and over again, process +them just once and then exit. + +=item B<--one=PRJNAME> + +Process only the given project (given as just the project name without C<.git> +suffix) and then exit. + +=back + +=head1 DESCRIPTION + +jobd is Girocco's repositories maintenance servant; it periodically checks all +the repositories and updates mirrored repositories and repacks push-mode +repositories when needed. + +=cut diff --git a/jobd/jobd.sh b/jobd/jobd.sh dissimilarity index 98% index ec1d943..9361305 100755 --- a/jobd/jobd.sh +++ b/jobd/jobd.sh @@ -1,90 +1,7 @@ -#!/bin/bash -# -# jobd - Perform Girocco maintenance jobs -# -# jobd is Girocco repositories maintenance servant; it periodically -# checks all the repositories and updates mirrored repositories and -# repacks push-repositories when needed. -# -# Execute with parameter --all-once to run only once (on all projects) -# instead of in infinite loop. Or call with parameter --one and name -# of a project (not full path, without .git suffix) to run maintenance -# only on that particular project. -# -# Use -q as VERY FIRST parameter to enable quiet mode (use in cronjobs). - -. @basedir@/shlib.sh - -set -e -export show_progress=1 - -# Lock setup - -if [ -e /tmp/jobd.lock ]; then - echo "Locked! Stale /tmp/jobd.lock?" >&2 - exit 1 -fi -echo $$ >/tmp/jobd.lock -trap "rm /tmp/jobd.lock" SIGINT SIGTERM EXIT - - -## Single-project routine - -check_one_proj() -{ - proj="$1" - if [ ! -d "$cfg_reporoot/$proj.git" ]; then - echo "WARNING: Skipping non-existing project $proj" >&2 - return - fi - if [ ! -e "$cfg_reporoot/$proj.git"/.nofetch ]; then - "$cfg_basedir"/jobd/update.sh "$proj" - fi - if [ -n "$show_progress" ]; then - "$cfg_basedir"/jobd/gc.sh "$proj" - else - "$cfg_basedir"/jobd/gc.sh "$proj" 2>&1 | grep -v '^Pack.*created\.$' - fi -} - - -## Main loop body - -check_all_projects() -{ - start_by="$1" - get_repo_list | tail -n +"$start_by" | while read proj; do - check_one_proj "$proj" - done -} - - -## Main program - -if [ "$1" = "-q" ]; then - export show_progress= - shift -fi - -case "$1" in - "") - # Start the mirroring at a random point; if there is - # some problem in the update process that requires - # frequent restarting of jobd, this tries to give even - # projects late in the list a chance to get an update. - check_all_projects "$((RANDOM%$(get_repo_list | wc -l)))" - while true; do - touch /tmp/jobd.lock - echo -n $(date) - echo " -+- Starting new check_all_projects run" - check_all_projects 1 - sleep 10 - done;; - "--all-once") - check_all_projects 1;; - "--one") - check_one_proj "$2";; - *) - echo "Usage: $0 [-q] [--all-once | --one PRJNAME]" >&2 - exit 1;; -esac +#!/bin/bash +# deprecated -- use jobd.pl instead + +. @basedir@/shlib.sh + +exec "$cfg_basedir"/jobd/jobd.pl "$@" + -- 2.11.4.GIT