jobd: distinguish load-intensive jobs
[girocco.git] / jobd / jobd.pl
blob61bf1530fd668204f469278941bd8ff495865524
1 #!/usr/bin/perl
3 # jobd - perform Girocco maintenance jobs
5 # Run with --help for details
7 use strict;
8 use warnings;
10 use Getopt::Long;
11 use Pod::Usage;
13 use Girocco::Config;
14 use Girocco::Project;
15 use Girocco::User;
17 # Options
18 my $quiet;
19 my $progress;
20 my $kill_after = 900;
21 my $max_par = 3;
22 my $lockfile = "/tmp/jobd.lock";
23 my $all_once;
24 my $one;
26 ######### Jobs {{{1
28 sub update_project {
29 my $job = shift;
30 my $p = $job->{'project'};
31 check_project_exists($job) || return;
32 (-e "$Girocco::Config::reporoot/$p.git/.nofetch") && do {
33 job_skip($job);
34 setup_gc($job);
35 return;
37 exec_job_command($job, ["$Girocco::Config::basedir/jobd/update.sh", $p], $quiet);
40 sub gc_project {
41 my $job = shift;
42 my $p = $job->{'project'};
43 check_project_exists($job) || return;
44 exec_job_command($job, ["$Girocco::Config::basedir/jobd/gc.sh", $p], $quiet);
47 sub setup_gc {
48 my $job = shift;
49 queue_job(
50 project => $job->{'project'},
51 type => 'gc',
52 command => \&gc_project,
53 intensive => 1,
57 sub check_project_exists {
58 my $job = shift;
59 my $p = $job->{'project'};
60 if (!-d "$Girocco::Config::reporoot/$p.git") {
61 error("Warning: skipping non-existent project: $job->{project}")
62 unless $quiet;
63 job_skip();
64 return 0;
69 sub queue_one {
70 my $project = shift;
71 queue_job(
72 project => $project,
73 type => 'update',
74 command => \&update_project,
75 on_success => \&setup_gc,
76 on_error => \&setup_gc,
80 sub queue_all {
81 queue_one($_) for (Girocco::Project->get_full_list());
84 ######### Daemon operation {{{1
86 my @queue;
87 my @running;
88 my $perpetual = 1;
89 my $locked = 0;
90 my $jobs_executed;
91 my $jobs_skipped;
92 my @jobs_killed;
94 sub handle_softexit {
95 error("Waiting for outstanding jobs to finish... ".
96 "^C again to exit immediately");
97 @queue = ();
98 $perpetual = 0;
99 $SIG{'INT'} = \&handle_exit;
102 sub handle_exit {
103 error("Killing outstanding jobs...");
104 $SIG{'CHLD'} = 'IGNORE';
105 $SIG{'TERM'} = 'IGNORE';
106 for (@running) {
107 kill 'KILL', $_->{'pid'};
109 unlink $lockfile if ($locked);
110 exit(0);
113 sub handle_childgone {
114 my $pid = wait;
115 if ($pid != -1) {
116 my @child = grep { $_->{'pid'} && $_->{'pid'} == $pid } @running;
117 if ($?) {
118 # XXX- we currently don't care
120 $child[0]->{'finished'} = 2 if (@child);
121 $jobs_executed++;
123 # Just to be safe
124 $SIG{'CHLD'} = \&handle_childgone;
127 sub queue_job {
128 my %opts = @_;
129 $opts{'queued_at'} = time;
130 $opts{'dont_run'} = 0;
131 $opts{'intensive'} = 0 unless exists $opts{'intensive'};
132 push @queue, \%opts;
135 sub run_job {
136 my $job = shift;
138 push @running, $job;
139 $job->{'command'}->($job);
140 if ($job->{'dont_run'}) {
141 pop @running;
142 $jobs_skipped++;
143 return;
147 sub _job_name {
148 my $job = shift;
149 "[".$job->{'type'}."::".$job->{'project'}."]";
152 # Only one of those per job!
153 sub exec_job_command {
154 my ($job, $command, $err_only) = @_;
156 my $pid;
157 if (!defined($pid = fork)) {
158 error(_job_name($job) ." Can't fork job: $!");
159 $job->{'finished'} = 1;
160 return;
162 if (!$pid) {
163 if ($err_only) {
164 open STDOUT, '>/dev/null' || do {
165 error(_job_name($job) ." Can't write to /dev/null: $!");
166 $job->{'finished'} = 1;
167 return;
170 exec @$command;
171 exit $?;
173 $job->{'pid'} = $pid;
174 $job->{'finished'} = 0;
175 $job->{'started_at'} = time;
178 sub job_skip {
179 my $job = shift;
180 $job->{'dont_run'} = 1;
183 sub reap_hanging_jobs {
184 for (@running) {
185 if ((time - $_->{'started_at'}) > $kill_after) {
186 $_->{'finished'} = 1;
187 kill 'KILL', $_->{'pid'};
188 print STDERR _job_name($_) ." KILLED due to timeout\n";
189 push @jobs_killed, _job_name($_);
194 sub reap_finished_jobs {
195 for (@running) {
196 my $status = $_->{'finished'};
197 if ($status == 0) { next; }
198 elsif ($status == 1 && defined($_->{'on_error'})) {
199 $_->{'on_error'}->($_);
200 } elsif ($status == 2 && defined($_->{'on_success'})) {
201 $_->{'on_success'}->($_);
204 @running = grep { $_->{'finished'} == 0 } @running;
207 sub have_intensive_jobs {
208 grep { $_->{'intensive'} == 1 } @running;
211 sub run_queue {
212 my $last_progress = time;
213 $jobs_executed = 0;
214 $jobs_skipped = 0;
215 @jobs_killed = ();
216 if ($progress) {
217 printf STDERR "--- Processing %d queued jobs\n", scalar(@queue);
219 $SIG{'CHLD'} = \&handle_childgone;
220 $SIG{'INT'} = \&handle_softexit;
221 $SIG{'TERM'} = \&handle_exit;
222 while (@queue || @running) {
223 reap_hanging_jobs();
224 reap_finished_jobs();
225 # Back off if we're too busy
226 if (@running >= $max_par || have_intensive_jobs() || !@queue) {
227 sleep 10;
228 if ($progress && (time - $last_progress) >= 60) {
229 printf STDERR "STATUS: %d queued, %d running, %d finished, %d skipped, %d killed\n", scalar(@queue), scalar(@running), $jobs_executed, $jobs_skipped, scalar(@jobs_killed);
230 if (@running) {
231 my @run_status;
232 for (@running) {
233 push @run_status, _job_name($_)." ". (time - $_->{'started_at'}) ."s";
235 error("STATUS: currently running: ". join(', ', @run_status));
237 $last_progress = time;
239 next;
241 # Run next
242 run_job(shift(@queue)) if @queue;
244 if ($progress) {
245 printf STDERR "--- Queue processed. %d jobs executed, %d skipped, %d killed. Now restarting.\n", $jobs_executed, $jobs_skipped, scalar(@jobs_killed);
249 sub run_perpetually {
250 if (-e $lockfile) {
251 die "Lockfile exists. Please make sure no other instance of jobd is running.";
253 open LOCK, '>', $lockfile || die "Cannot create lockfile $lockfile: $!";
254 print LOCK $$;
255 close LOCK;
256 $locked = 1;
258 while ($perpetual) {
259 queue_all();
260 run_queue();
262 unlink $lockfile;
265 ######### Helpers {{{1
267 sub error($) {
268 print STDERR shift()."\n";
270 sub fatal($) {
271 error(shift);
272 exit 1;
275 ######### Main {{{1
277 # Parse options
278 Getopt::Long::Configure('bundling');
279 my $parse_res = GetOptions(
280 'help|?' => sub { pod2usage(-verbose => 1, -exitval => 0); },
281 'quiet|q' => \$quiet,
282 'progress|P' => \$progress,
283 'kill-after|k=i' => \$kill_after,
284 'max-parallel|p=i' => \$max_par,
285 'lockfile|l=s' => \$lockfile,
286 'all-once|a' => \$all_once,
287 'one|o=s' => \$one,
288 ) || pod2usage(2);
289 fatal("Error: can only use one out of --all-once and --one")
290 if ($all_once && $one);
292 unless ($quiet) {
293 $ENV{'show_progress'} = '1';
294 $progress = 1;
297 if ($one) {
298 queue_one($one);
299 run_queue();
300 exit;
303 if ($all_once) {
304 queue_all();
305 run_queue();
306 exit;
309 run_perpetually();
311 ########## Documentation {{{1
313 __END__
315 =head1 NAME
317 jobd - Perform Girocco maintenance jobs
319 =head1 SYNOPSIS
321 jobd [options]
323 Options:
324 -h | --help detailed instructions
325 -q | --quiet run quietly
326 -P | --progress show occasional status updates
327 -k SECONDS | --kill-after=SECONDS how long to wait before killing jobs
328 -p NUM | --max-parallel=NUM how many jobs to run at the same time
329 -l FILE | --lockfile=FILE create a lockfile in the given location
330 -a | --all-once process the list only once
331 -o PRJNAME | --one=PRJNAME process only one project
333 =head1 OPTIONS
335 =over 8
337 =item B<--help>
339 Print the full description of jobd's options.
341 =item B<--quiet>
343 Suppress non-error messages, e.g. for use when running this task as a cronjob.
345 =item B<--progress>
347 Show information about the current status of the job queue occasionally. This
348 is automatically enabled if --quiet is not given.
350 =item B<--kill-after=SECONDS>
352 Kill supervised jobs after a certain time to avoid hanging the daemon.
354 =item B<--max-parallel=NUM>
356 Run no more than that many jobs at the same time.
358 =item B<--lockfile=FILE>
360 For perpetual operation, create a lockfile in that place and clean it up after
361 finishing/aborting.
363 =item B<--all-once>
365 Instead of perpetuously processing all projects over and over again, process
366 them just once and then exit.
368 =item B<--one=PRJNAME>
370 Process only the given project (given as just the project name without C<.git>
371 suffix) and then exit.
373 =back
375 =head1 DESCRIPTION
377 jobd is Girocco's repositories maintenance servant; it periodically checks all
378 the repositories and updates mirrored repositories and repacks push-mode
379 repositories when needed.
381 =cut