jobd/jobd.sh: outdated; make it run jobd.pl
[girocco/msimkins.git] / jobd / jobd.pl
blob9dda2828e2ffe2a0b55f9726f129a549781d7691
1 #!/usr/bin/perl
3 # jobd - perform Girocco maintenance jobs
5 # Run with --help for details
7 use strict;
8 use warnings;
10 use Getopt::Long;
11 use Pod::Usage;
12 use POSIX ":sys_wait_h";
14 use Girocco::Config;
15 use Girocco::Project;
16 use Girocco::User;
18 # Options
19 my $quiet;
20 my $progress;
21 my $kill_after = 900;
22 my $max_par = 20;
23 my $max_par_intensive = 3; # no command line option right now
24 my $lockfile = "/tmp/jobd.lock";
25 my $all_once;
26 my $one;
28 ######### Jobs {{{1
30 sub update_project {
31 my $job = shift;
32 my $p = $job->{'project'};
33 check_project_exists($job) || return;
34 (-e "$Girocco::Config::reporoot/$p.git/.nofetch") && do {
35 job_skip($job);
36 setup_gc($job);
37 return;
39 exec_job_command($job, ["$Girocco::Config::basedir/jobd/update.sh", $p], $quiet);
42 sub gc_project {
43 my $job = shift;
44 my $p = $job->{'project'};
45 check_project_exists($job) || return;
46 exec_job_command($job, ["$Girocco::Config::basedir/jobd/gc.sh", $p], $quiet);
49 sub setup_gc {
50 my $job = shift;
51 queue_job(
52 project => $job->{'project'},
53 type => 'gc',
54 command => \&gc_project,
55 intensive => 1,
59 sub check_project_exists {
60 my $job = shift;
61 my $p = $job->{'project'};
62 if (!-d "$Girocco::Config::reporoot/$p.git") {
63 error("Warning: skipping non-existent project: $job->{project}")
64 unless $quiet;
65 job_skip();
66 return 0;
71 sub queue_one {
72 my $project = shift;
73 queue_job(
74 project => $project,
75 type => 'update',
76 command => \&update_project,
77 on_success => \&setup_gc,
78 on_error => \&setup_gc,
82 sub queue_all {
83 queue_one($_) for (Girocco::Project->get_full_list());
86 ######### Daemon operation {{{1
88 my @queue;
89 my @running;
90 my $perpetual = 1;
91 my $locked = 0;
92 my $jobs_executed;
93 my $jobs_skipped;
94 my @jobs_killed;
96 sub handle_softexit {
97 error("Waiting for outstanding jobs to finish... ".
98 "^C again to exit immediately");
99 @queue = ();
100 $perpetual = 0;
101 $SIG{'INT'} = \&handle_exit;
104 sub handle_exit {
105 error("Killing outstanding jobs...");
106 $SIG{'TERM'} = 'IGNORE';
107 for (@running) {
108 kill 'KILL', -($_->{'pid'});
110 unlink $lockfile if ($locked);
111 exit(0);
114 sub queue_job {
115 my %opts = @_;
116 $opts{'queued_at'} = time;
117 $opts{'dont_run'} = 0;
118 $opts{'intensive'} = 0 unless exists $opts{'intensive'};
119 push @queue, \%opts;
122 sub run_job {
123 my $job = shift;
125 push @running, $job;
126 $job->{'command'}->($job);
127 if ($job->{'dont_run'}) {
128 pop @running;
129 $jobs_skipped++;
130 return;
134 sub _job_name {
135 my $job = shift;
136 "[".$job->{'type'}."::".$job->{'project'}."]";
139 # Only one of those per job!
140 sub exec_job_command {
141 my ($job, $command, $err_only) = @_;
143 my $pid;
144 if (!defined($pid = fork)) {
145 error(_job_name($job) ." Can't fork job: $!");
146 $job->{'finished'} = 1;
147 return;
149 if (!$pid) {
150 open STDIN, '/dev/null' || do {
151 error(_job_name($job) ."Can't read from /dev/null: $!");
152 $job->{'finished'} = 1;
153 return;
155 if ($err_only) {
156 open STDOUT, '>/dev/null' || do {
157 error(_job_name($job) ." Can't write to /dev/null: $!");
158 $job->{'finished'} = 1;
159 return;
162 # New process group so we can keep track of all of its children
163 if (!defined(POSIX::setpgid(0, 0))) {
164 error(_job_name($job) ." Can't create process group: $!");
165 $job->{'finished'} = 1;
166 return;
168 # "Prevent" races
169 select(undef, undef, undef, 0.1);
170 exec @$command;
171 # Stop perl from complaining
172 exit $?;
174 $job->{'pid'} = $pid;
175 $job->{'finished'} = 0;
176 $job->{'started_at'} = time;
179 sub job_skip {
180 my $job = shift;
181 $job->{'dont_run'} = 1;
184 sub reap_hanging_jobs {
185 for (@running) {
186 if (defined($_->{'started_at'}) && (time - $_->{'started_at'}) > $kill_after) {
187 $_->{'finished'} = 1;
188 kill 'KILL', -($_->{'pid'});
189 print STDERR _job_name($_) ." KILLED due to timeout\n";
190 push @jobs_killed, _job_name($_);
195 sub reap_finished_jobs {
196 my $pid;
197 my $finished_any = 0;
198 while (1) {
199 $pid = waitpid(-1, WNOHANG);
200 last if $pid < 1;
201 $finished_any = 1;
203 my @child = grep { $_->{'pid'} && $_->{'pid'} == $pid } @running;
204 if ($?) {
205 # XXX- we currently don't care
207 if (@child && !$child[0]->{'finished'}) {
208 $child[0]->{'on_success'}->($child[0]) if defined($child[0]->{'on_success'});
209 $child[0]->{'finished'} = 1;
210 $jobs_executed++;
211 } elsif (@child) {
212 $child[0]->{'on_error'}->($child[0]) if defined($child[0]->{'on_error'});
215 @running = grep { $_->{'finished'} == 0 } @running;
216 $finished_any;
219 sub have_intensive_jobs {
220 grep { $_->{'intensive'} == 1 } @running;
223 sub run_queue {
224 my $last_progress = time;
225 $jobs_executed = 0;
226 $jobs_skipped = 0;
227 @jobs_killed = ();
228 if ($progress) {
229 printf STDERR "--- Processing %d queued jobs\n", scalar(@queue);
231 $SIG{'INT'} = \&handle_softexit;
232 $SIG{'TERM'} = \&handle_exit;
233 while (@queue || @running) {
234 reap_hanging_jobs();
235 my $proceed_immediately = reap_finished_jobs();
236 # Back off if we're too busy
237 if (@running >= $max_par || have_intensive_jobs() >= $max_par_intensive || !@queue) {
238 sleep 1 unless $proceed_immediately;
239 if ($progress && (time - $last_progress) >= 60) {
240 printf STDERR "STATUS: %d queued, %d running, %d finished, %d skipped, %d killed\n", scalar(@queue), scalar(@running), $jobs_executed, $jobs_skipped, scalar(@jobs_killed);
241 if (@running) {
242 my @run_status;
243 for (@running) {
244 push @run_status, _job_name($_)." ". (time - $_->{'started_at'}) ."s";
246 error("STATUS: currently running: ". join(', ', @run_status));
248 $last_progress = time;
250 next;
252 # Run next
253 run_job(shift(@queue)) if @queue;
255 if ($progress) {
256 printf STDERR "--- Queue processed. %d jobs executed, %d skipped, %d killed. Now restarting.\n", $jobs_executed, $jobs_skipped, scalar(@jobs_killed);
260 sub run_perpetually {
261 if (-e $lockfile) {
262 die "Lockfile exists. Please make sure no other instance of jobd is running.";
264 open LOCK, '>', $lockfile || die "Cannot create lockfile $lockfile: $!";
265 print LOCK $$;
266 close LOCK;
267 $locked = 1;
269 while ($perpetual) {
270 queue_all();
271 run_queue();
273 unlink $lockfile;
276 ######### Helpers {{{1
278 sub error($) {
279 print STDERR shift()."\n";
281 sub fatal($) {
282 error(shift);
283 exit 1;
286 ######### Main {{{1
288 # Parse options
289 Getopt::Long::Configure('bundling');
290 my $parse_res = GetOptions(
291 'help|?' => sub { pod2usage(-verbose => 1, -exitval => 0); },
292 'quiet|q' => \$quiet,
293 'progress|P' => \$progress,
294 'kill-after|k=i' => \$kill_after,
295 'max-parallel|p=i' => \$max_par,
296 'lockfile|l=s' => \$lockfile,
297 'all-once|a' => \$all_once,
298 'one|o=s' => \$one,
299 ) || pod2usage(2);
300 fatal("Error: can only use one out of --all-once and --one")
301 if ($all_once && $one);
303 unless ($quiet) {
304 $ENV{'show_progress'} = '1';
305 $progress = 1;
308 if ($one) {
309 queue_one($one);
310 run_queue();
311 exit;
314 if ($all_once) {
315 queue_all();
316 run_queue();
317 exit;
320 run_perpetually();
322 ########## Documentation {{{1
324 __END__
326 =head1 NAME
328 jobd - Perform Girocco maintenance jobs
330 =head1 SYNOPSIS
332 jobd [options]
334 Options:
335 -h | --help detailed instructions
336 -q | --quiet run quietly
337 -P | --progress show occasional status updates
338 -k SECONDS | --kill-after=SECONDS how long to wait before killing jobs
339 -p NUM | --max-parallel=NUM how many jobs to run at the same time
340 -l FILE | --lockfile=FILE create a lockfile in the given location
341 -a | --all-once process the list only once
342 -o PRJNAME | --one=PRJNAME process only one project
344 =head1 OPTIONS
346 =over 8
348 =item B<--help>
350 Print the full description of jobd's options.
352 =item B<--quiet>
354 Suppress non-error messages, e.g. for use when running this task as a cronjob.
356 =item B<--progress>
358 Show information about the current status of the job queue occasionally. This
359 is automatically enabled if --quiet is not given.
361 =item B<--kill-after=SECONDS>
363 Kill supervised jobs after a certain time to avoid hanging the daemon.
365 =item B<--max-parallel=NUM>
367 Run no more than that many jobs at the same time.
369 =item B<--lockfile=FILE>
371 For perpetual operation, create a lockfile in that place and clean it up after
372 finishing/aborting.
374 =item B<--all-once>
376 Instead of perpetuously processing all projects over and over again, process
377 them just once and then exit.
379 =item B<--one=PRJNAME>
381 Process only the given project (given as just the project name without C<.git>
382 suffix) and then exit.
384 =back
386 =head1 DESCRIPTION
388 jobd is Girocco's repositories maintenance servant; it periodically checks all
389 the repositories and updates mirrored repositories and repacks push-mode
390 repositories when needed.
392 =cut