jobd: properly handle SIGINT and SIGTERM
[girocco/testingthisout.git] / jobd / jobd.pl
blobf0f55adc52de1e15c16a088f5caf51cbf5b19886
1 #!/usr/bin/perl
3 # jobd - perform Girocco maintenance jobs
5 # Run with --help for details
7 use strict;
8 use warnings;
10 use Getopt::Long;
11 use Pod::Usage;
13 use Girocco::Config;
14 use Girocco::Project;
15 use Girocco::User;
17 # Options
18 my $quiet;
19 my $progress;
20 my $kill_after = 900;
21 my $max_par = 3;
22 my $lockfile = "/tmp/jobd.lock";
23 my $all_once;
24 my $one;
26 ######### Jobs {{{1
28 sub update_project {
29 my $job = shift;
30 my $p = $job->{'project'};
31 check_project_exists($job) || return;
32 (-e "$Girocco::Config::reporoot/$p.git/.nofetch") && do {
33 job_skip($job);
34 setup_gc($job);
35 return;
37 exec_job_command($job, ["$Girocco::Config::basedir/jobd/update.sh", $p], $quiet);
40 sub gc_project {
41 my $job = shift;
42 my $p = $job->{'project'};
43 check_project_exists($job) || return;
44 exec_job_command($job, ["$Girocco::Config::basedir/jobd/gc.sh", $p], $quiet);
47 sub setup_gc {
48 my $job = shift;
49 queue_job(
50 project => $job->{'project'},
51 type => 'gc',
52 command => \&gc_project,
56 sub check_project_exists {
57 my $job = shift;
58 my $p = $job->{'project'};
59 if (!-d "$Girocco::Config::reporoot/$p.git") {
60 error("Warning: skipping non-existent project: $job->{project}")
61 unless $quiet;
62 job_skip();
63 return 0;
68 sub queue_one {
69 my $project = shift;
70 queue_job(
71 project => $project,
72 type => 'update',
73 command => \&update_project,
74 on_success => \&setup_gc,
75 on_error => \&setup_gc,
79 sub queue_all {
80 queue_one($_) for (Girocco::Project->get_full_list());
83 ######### Daemon operation {{{1
85 my @queue;
86 my @running;
87 my $perpetual = 1;
88 my $locked = 0;
89 my $jobs_executed;
90 my @jobs_killed;
92 sub handle_softexit {
93 error("Waiting for outstanding jobs to finish... ".
94 "^C again to exit immediately");
95 @queue = ();
96 $perpetual = 0;
97 $SIG{'INT'} = \&handle_exit;
100 sub handle_exit {
101 error("Killing outstanding jobs...");
102 $SIG{'CHLD'} = 'IGNORE';
103 $SIG{'TERM'} = 'IGNORE';
104 for (@running) {
105 kill 'KILL', $_->{'pid'};
107 unlink $lockfile if ($locked);
108 exit(0);
111 sub handle_childgone {
112 my $pid = wait;
113 if ($pid != -1) {
114 my @child = grep { $_->{'pid'} && $_->{'pid'} == $pid } @running;
115 if ($?) {
116 # XXX- we currently don't care
118 $child[0]->{'finished'} = 2 if (@child);
119 $jobs_executed++;
121 # Just to be safe
122 $SIG{'CHLD'} = \&handle_childgone;
125 sub queue_job {
126 my %opts = @_;
127 $opts{'queued_at'} = time;
128 push @queue, \%opts;
131 sub run_job {
132 my $job = shift;
134 push @running, $job;
135 $job->{'command'}->($job);
138 sub _job_name {
139 my $job = shift;
140 "[".$job->{'type'}."::".$job->{'project'}."]";
143 # Only one of those per job!
144 sub exec_job_command {
145 my ($job, $command, $err_only) = @_;
147 my $pid;
148 if (!defined($pid = fork)) {
149 error(_job_name($job) ." Can't fork job: $!");
150 $job->{'finished'} = 1;
151 return;
153 if (!$pid) {
154 if ($err_only) {
155 open STDOUT, '>/dev/null' || do {
156 error(_job_name($job) ." Can't write to /dev/null: $!");
157 $job->{'finished'} = 1;
158 return;
161 exec @$command;
162 exit $?;
164 $job->{'pid'} = $pid;
165 $job->{'finished'} = 0;
166 $job->{'started_at'} = time;
169 sub job_skip {
170 my $job = shift;
171 exec_job_command($job, ['/bin/false']);
174 sub reap_hanging_jobs {
175 for (@running) {
176 if ((time - $_->{'started_at'}) > $kill_after) {
177 $_->{'finished'} = 1;
178 kill 'KILL', $_->{'pid'};
179 print STDERR _job_name($_) ." KILLED due to timeout\n";
180 push @jobs_killed, _job_name($_);
185 sub reap_finished_jobs {
186 for (@running) {
187 my $status = $_->{'finished'};
188 if ($status == 0) { next; }
189 elsif ($status == 1 && defined($_->{'on_error'})) {
190 $_->{'on_error'}->($_);
191 } elsif ($status == 2 && defined($_->{'on_success'})) {
192 $_->{'on_success'}->($_);
195 @running = grep { $_->{'finished'} == 0 } @running;
198 sub run_queue {
199 my $last_progress = time;
200 $jobs_executed = 0;
201 @jobs_killed = ();
202 unless ($quiet) {
203 printf STDERR "--- Processing %d queued jobs\n", scalar(@queue);
205 $SIG{'CHLD'} = \&handle_childgone;
206 $SIG{'INT'} = \&handle_softexit;
207 $SIG{'TERM'} = \&handle_exit;
208 while (@queue || @running) {
209 reap_hanging_jobs();
210 reap_finished_jobs();
211 # Back off if we're too busy
212 if (@running >= $max_par) {
213 sleep 10;
214 unless (($quiet && !$progress) || (time - $last_progress) < 60) {
215 printf STDERR "STATUS: %d queued, %d running, %d finished, %d killed\n", scalar(@queue), scalar(@running), $jobs_executed, scalar(@jobs_killed);
216 if (@running) {
217 my @run_status;
218 for (@running) {
219 push @run_status, _job_name($_)." ". (time - $_->{'started_at'}) ."s";
221 error("STATUS: currently running: ". join(', ', @run_status));
223 $last_progress = time;
225 next;
227 # Run next
228 run_job(shift(@queue)) if @queue;
230 unless ($quiet) {
231 printf STDERR "--- Queue processed. %d jobs executed, %d killed due to timeouts. Now restarting.\n", $jobs_executed, scalar(@jobs_killed);
235 sub run_perpetually {
236 if (-e $lockfile) {
237 die "Lockfile exists. Please make sure no other instance of jobd is running.";
239 open LOCK, '>', $lockfile || die "Cannot create lockfile $lockfile: $!";
240 print LOCK $$;
241 close LOCK;
242 $locked = 1;
244 while ($perpetual) {
245 queue_all();
246 run_queue();
248 unlink $lockfile;
251 ######### Helpers {{{1
253 sub error($) {
254 print STDERR shift()."\n";
256 sub fatal($) {
257 error(shift);
258 exit 1;
261 ######### Main {{{1
263 # Parse options
264 Getopt::Long::Configure('bundling');
265 my $parse_res = GetOptions(
266 'help|?' => sub { pod2usage(-verbose => 1, -exitval => 0); },
267 'quiet|q' => \$quiet,
268 'progress|P' => \$progress,
269 'kill-after|k=i' => \$kill_after,
270 'max-parallel|p=i' => \$max_par,
271 'lockfile|l=s' => \$lockfile,
272 'all-once|a' => \$all_once,
273 'one|o=s' => \$one,
274 ) || pod2usage(2);
275 fatal("Error: can only use one out of --all-once and --one")
276 if ($all_once && $one);
278 unless ($quiet) {
279 $ENV{'show_progress'} = '1';
282 if ($one) {
283 queue_one($one);
284 run_queue();
285 exit;
288 if ($all_once) {
289 queue_all();
290 run_queue();
291 exit;
294 run_perpetually();
296 ########## Documentation {{{1
298 __END__
300 =head1 NAME
302 jobd - Perform Girocco maintenance jobs
304 =head1 SYNOPSIS
306 jobd [options]
308 Options:
309 -h | --help detailed instructions
310 -q | --quiet run quietly
311 -P | --progress show occasional status updates
312 -k SECONDS | --kill-after=SECONDS how long to wait before killing jobs
313 -p NUM | --max-parallel=NUM how many jobs to run at the same time
314 -l FILE | --lockfile=FILE create a lockfile in the given location
315 -a | --all-once process the list only once
316 -o PRJNAME | --one=PRJNAME process only one project
318 =head1 OPTIONS
320 =over 8
322 =item B<--help>
324 Print the full description of jobd's options.
326 =item B<--quiet>
328 Suppress non-error messages, e.g. for use when running this task as a cronjob.
330 =item B<--progress>
332 Show information about the current status of the job queue occasionally. This
333 is automatically enabled if --quiet is not given.
335 =item B<--kill-after=SECONDS>
337 Kill supervised jobs after a certain time to avoid hanging the daemon.
339 =item B<--max-parallel=NUM>
341 Run no more than that many jobs at the same time.
343 =item B<--lockfile=FILE>
345 For perpetual operation, create a lockfile in that place and clean it up after
346 finishing/aborting.
348 =item B<--all-once>
350 Instead of perpetuously processing all projects over and over again, process
351 them just once and then exit.
353 =item B<--one=PRJNAME>
355 Process only the given project (given as just the project name without C<.git>
356 suffix) and then exit.
358 =back
360 =head1 DESCRIPTION
362 jobd is Girocco's repositories maintenance servant; it periodically checks all
363 the repositories and updates mirrored repositories and repacks push-mode
364 repositories when needed.
366 =cut