Pick up gitweb fix
[girocco.git] / jobd / jobd.pl
blob0c2f83d0cead4ebfa5c9be1f4cf45b84fcc2f42c
1 #!/usr/bin/perl
3 # jobd - perform Girocco maintenance jobs
5 # Run with --help for details
7 use strict;
8 use warnings;
10 use Getopt::Long;
11 use Pod::Usage;
12 use POSIX ":sys_wait_h";
14 use Girocco::Config;
15 use Girocco::Project;
16 use Girocco::User;
18 # Options
19 my $quiet;
20 my $progress;
21 my $kill_after = 900;
22 my $max_par = 20;
23 my $max_par_intensive = 1;
24 my $load_triggers = '10,2';
25 my $lockfile = "/tmp/jobd.lock";
26 my $restart_delay = 60;
27 my $all_once;
28 my $one;
30 my ($load_trig, $load_untrig);
32 ######### Jobs {{{1
34 sub update_project {
35 my $job = shift;
36 my $p = $job->{'project'};
37 check_project_exists($job) || return;
38 if (-e get_project_path($p).".nofetch") {
39 job_skip($job);
40 return setup_gc($job);
42 if (-e get_project_path($p).".clone_in_progress") {
43 job_skip($job, "initial mirroring not complete yet");
44 return;
46 if (my $ts = is_operation_uptodate($p, 'lastrefresh', $Girocco::Config::min_mirror_interval)) {
47 job_skip($job, "not needed right now, last run at $ts");
48 setup_gc($job);
49 return;
51 if (is_svn_clone($p)) {
52 # git svn can be very, very slow at times
53 $job->{'timeout_factor'} = 3;
55 exec_job_command($job, ["$Girocco::Config::basedir/jobd/update.sh", $p], $quiet);
58 sub gc_project {
59 my $job = shift;
60 my $p = $job->{'project'};
61 check_project_exists($job) || return;
62 if (my $ts = is_operation_uptodate($p, 'lastgc', $Girocco::Config::min_gc_interval)) {
63 job_skip($job, "not needed right now, last run at $ts");
64 return;
66 exec_job_command($job, ["$Girocco::Config::basedir/jobd/gc.sh", $p], $quiet);
69 sub setup_gc {
70 my $job = shift;
71 queue_job(
72 project => $job->{'project'},
73 type => 'gc',
74 command => \&gc_project,
75 intensive => 1,
79 sub check_project_exists {
80 my $job = shift;
81 my $p = $job->{'project'};
82 if (!-d get_project_path($p)) {
83 job_skip($job, "non-existent project");
84 return 0;
89 sub get_project_path {
90 "$Girocco::Config::reporoot/".shift().".git/";
93 sub is_operation_uptodate {
94 my ($project, $which, $threshold) = @_;
95 my $path = get_project_path($project);
96 my $timestamp = `GIT_DIR="$path" $Girocco::Config::git_bin config "gitweb.$which"`;
97 my $unix_ts = `date +%s -d "$timestamp"`;
98 (time - $unix_ts) <= $threshold ? $timestamp : undef;
101 sub is_svn_clone {
102 my ($project) = @_;
103 my $path = get_project_path($project);
104 my $baseurl = `GIT_DIR="$path" $Girocco::Config::git_bin config "gitweb.baseurl"`;
105 my $svnurl = `GIT_DIR="$path" $Girocco::Config::git_bin config "svn-remote.svn.url"`;
106 return $baseurl =~ /^svn[:+]/i && $svnurl;
109 sub queue_one {
110 my $project = shift;
111 queue_job(
112 project => $project,
113 type => 'update',
114 command => \&update_project,
115 on_success => \&setup_gc,
116 on_error => \&setup_gc,
120 sub queue_all {
121 queue_one($_) for (Girocco::Project->get_full_list());
124 ######### Daemon operation {{{1
126 my @queue;
127 my @running;
128 my $perpetual = 1;
129 my $locked = 0;
130 my $jobs_executed;
131 my $jobs_skipped;
132 my @jobs_killed;
134 sub handle_softexit {
135 error("Waiting for outstanding jobs to finish... ".
136 "^C again to exit immediately");
137 @queue = ();
138 $perpetual = 0;
139 $SIG{'INT'} = \&handle_exit;
142 sub handle_exit {
143 error("Killing outstanding jobs...");
144 $SIG{'TERM'} = 'IGNORE';
145 for (@running) {
146 kill 'KILL', -($_->{'pid'});
148 unlink $lockfile if ($locked);
149 exit(0);
152 sub queue_job {
153 my %opts = @_;
154 $opts{'queued_at'} = time;
155 $opts{'dont_run'} = 0;
156 $opts{'intensive'} = 0 unless exists $opts{'intensive'};
157 push @queue, \%opts;
160 sub run_job {
161 my $job = shift;
163 push @running, $job;
164 $job->{'command'}->($job);
165 if ($job->{'dont_run'}) {
166 pop @running;
167 $jobs_skipped++;
168 return;
172 sub _job_name {
173 my $job = shift;
174 "[".$job->{'type'}."::".$job->{'project'}."]";
177 # Only one of those per job!
178 sub exec_job_command {
179 my ($job, $command, $err_only) = @_;
181 my $pid;
182 if (!defined($pid = fork)) {
183 error(_job_name($job) ." Can't fork job: $!");
184 $job->{'finished'} = 1;
185 return;
187 if (!$pid) {
188 open STDIN, '/dev/null' || do {
189 error(_job_name($job) ."Can't read from /dev/null: $!");
190 $job->{'finished'} = 1;
191 return;
193 if ($err_only) {
194 open STDOUT, '>/dev/null' || do {
195 error(_job_name($job) ." Can't write to /dev/null: $!");
196 $job->{'finished'} = 1;
197 return;
200 # New process group so we can keep track of all of its children
201 if (!defined(POSIX::setpgid(0, 0))) {
202 error(_job_name($job) ." Can't create process group: $!");
203 $job->{'finished'} = 1;
204 return;
206 # "Prevent" races
207 select(undef, undef, undef, 0.1);
208 exec @$command;
209 # Stop perl from complaining
210 exit $?;
212 $job->{'pid'} = $pid;
213 $job->{'finished'} = 0;
214 $job->{'started_at'} = time;
217 sub job_skip {
218 my ($job, $msg) = @_;
219 $job->{'dont_run'} = 1;
220 error(_job_name($job) ." Skipping job: $msg") unless $quiet || !$msg;
223 sub reap_hanging_jobs {
224 for (@running) {
225 my $factor = $_->{'timeout_factor'} || 1;
226 if (defined($_->{'started_at'}) && (time - $_->{'started_at'}) > ($kill_after * $factor)) {
227 $_->{'finished'} = 1;
228 kill 'KILL', -($_->{'pid'});
229 error(_job_name($_) ." KILLED due to timeout");
230 push @jobs_killed, _job_name($_);
235 sub reap_finished_jobs {
236 my $pid;
237 my $finished_any = 0;
238 while (1) {
239 $pid = waitpid(-1, WNOHANG);
240 last if $pid < 1;
241 $finished_any = 1;
243 my @child = grep { $_->{'pid'} && $_->{'pid'} == $pid } @running;
244 if ($?) {
245 # XXX- we currently don't care
247 if (@child && !$child[0]->{'finished'}) {
248 $child[0]->{'on_success'}->($child[0]) if defined($child[0]->{'on_success'});
249 $child[0]->{'finished'} = 1;
250 $jobs_executed++;
251 } elsif (@child) {
252 $child[0]->{'on_error'}->($child[0]) if defined($child[0]->{'on_error'});
255 @running = grep { $_->{'finished'} == 0 } @running;
256 $finished_any;
259 sub have_intensive_jobs {
260 grep { $_->{'intensive'} == 1 } @running;
263 sub ts {
264 "[". scalar(localtime) ."] ";
267 sub get_load_info {
268 if ($^O eq "linux") {
269 # Read /proc/loadavg on Linux
270 open(LOADAV, '<', '/proc/loadavg') or return undef;
271 my $loadinfo = <LOADAV>;
272 close LOADAV;
273 return (split(/\s/, $loadinfo, 4))[0..2];
274 } else {
275 # Read the output of uptime everywhere else (works on Linux too)
276 open(LOADAV, '-|', 'uptime') or return undef;
277 my $loadinfo = <LOADAV>;
278 close LOADAV;
279 $loadinfo =~ /load average[^0-9.]*([0-9.]+)[^0-9.]+([0-9.]+)[^0-9.]+([0-9.]+)/iso or return undef;
280 return ($1, $2, $3);
284 sub run_queue {
285 my $last_progress = time;
286 my $last_checkload = time - 5;
287 my $current_load = $load_trig;
288 my $overloaded = 0;
289 my $load_info = '';
290 $jobs_executed = 0;
291 $jobs_skipped = 0;
292 @jobs_killed = ();
293 if ($progress) {
294 ferror("--- Processing %d queued jobs", scalar(@queue));
296 $SIG{'INT'} = \&handle_softexit;
297 $SIG{'TERM'} = \&handle_exit;
298 while (@queue || @running) {
299 reap_hanging_jobs();
300 my $proceed_immediately = reap_finished_jobs();
301 # Check current system load
302 if ($load_trig && (time - $last_checkload) >= 5 && defined((my @loadinfo = get_load_info())[0])) {
303 my $current_load = $loadinfo[0];
304 if ($current_load > $load_trig && !$overloaded) {
305 $overloaded = 1;
306 error("PAUSE: system load is at $current_load > $load_trig") if $progress;
307 } elsif ($current_load < $load_untrig && $overloaded) {
308 $overloaded = 0;
309 error("RESUME: system load is at $current_load < $load_untrig") if $progress;
311 if ($overloaded) {
312 $load_info = ', paused (load '. $current_load .')';
313 } else {
314 $load_info = ', load '. $current_load;
316 $last_checkload = time;
318 # Status output
319 if ($progress && (time - $last_progress) >= 60) {
320 ferror("STATUS: %d queued, %d running, %d finished, %d skipped, %d killed$load_info", scalar(@queue), scalar(@running), $jobs_executed, $jobs_skipped, scalar(@jobs_killed));
321 if (@running) {
322 my @run_status;
323 for (@running) {
324 push @run_status, _job_name($_)." ". (time - $_->{'started_at'}) ."s";
326 error("STATUS: currently running: ". join(', ', @run_status));
328 $last_progress = time;
330 # Back off if we're too busy
331 if (@running >= $max_par || have_intensive_jobs() >= $max_par_intensive || !@queue || $overloaded) {
332 sleep 1 unless $proceed_immediately;
333 next;
335 # Run next
336 run_job(shift(@queue)) if @queue;
338 if ($progress) {
339 ferror("--- Queue processed. %d jobs executed, %d skipped, %d killed.", $jobs_executed, $jobs_skipped, scalar(@jobs_killed));
343 sub run_perpetually {
344 if (-e $lockfile) {
345 die "Lockfile exists. Please make sure no other instance of jobd is running.";
347 open LOCK, '>', $lockfile || die "Cannot create lockfile $lockfile: $!";
348 print LOCK $$;
349 close LOCK;
350 $locked = 1;
352 while ($perpetual) {
353 queue_all();
354 run_queue();
355 sleep($restart_delay) if $perpetual; # Let the system breathe for a moment
357 unlink $lockfile;
360 ######### Helpers {{{1
362 sub error($) {
363 print STDERR ts().shift()."\n";
365 sub ferror(@) {
366 error(sprintf($_[0], @_[1..$#_]));
368 sub fatal($) {
369 error(shift);
370 exit 1;
373 ######### Main {{{1
375 # Parse options
376 Getopt::Long::Configure('bundling');
377 my $parse_res = GetOptions(
378 'help|?' => sub { pod2usage(-verbose => 1, -exitval => 0); },
379 'quiet|q' => \$quiet,
380 'progress|P' => \$progress,
381 'kill-after|k=i' => \$kill_after,
382 'max-parallel|p=i' => \$max_par,
383 'max-intensive-parallel|i=i' => \$max_par_intensive,
384 'load-triggers=s' => \$load_triggers,
385 'restart-delay|d=i' => \$restart_delay,
386 'lockfile|l=s' => \$lockfile,
387 'all-once|a' => \$all_once,
388 'one|o=s' => \$one,
389 ) || pod2usage(2);
390 fatal("Error: can only use one out of --all-once and --one")
391 if ($all_once && $one);
393 unless ($quiet) {
394 $ENV{'show_progress'} = '1';
395 $progress = 1;
398 $load_triggers = '0,0' if (!-f '/proc/loadavg');
399 ($load_trig, $load_untrig) = split(/,/, $load_triggers);
401 if ($one) {
402 queue_one($one);
403 run_queue();
404 exit;
407 if ($all_once) {
408 queue_all();
409 run_queue();
410 exit;
413 run_perpetually();
415 ########## Documentation {{{1
417 __END__
419 =head1 NAME
421 jobd - Perform Girocco maintenance jobs
423 =head1 SYNOPSIS
425 jobd [options]
427 Options:
428 -h | --help detailed instructions
429 -q | --quiet run quietly
430 -P | --progress show occasional status updates
431 -k SECONDS | --kill-after SECONDS how long to wait before killing jobs
432 -p NUM | --max-parallel NUM how many jobs to run at the same time
433 -i NUM | --max-intensive-parallel NUM how many resource-hungry jobs to run
434 at the same time
435 --load-triggers TRIG,UNTRIG stop queueing jobs at load above
436 TRIG and resume at load below UNTRIG
437 -d NUM | --restart-delay SECONDS wait for this many seconds between
438 queue runs
439 -l FILE | --lockfile FILE create a lockfile in the given
440 location
441 -a | --all-once process the list only once
442 -o PRJNAME | --one PRJNAME process only one project
444 =head1 OPTIONS
446 =over 8
448 =item B<--help>
450 Print the full description of jobd's options.
452 =item B<--quiet>
454 Suppress non-error messages, e.g. for use when running this task as a cronjob.
456 =item B<--progress>
458 Show information about the current status of the job queue occasionally. This
459 is automatically enabled if --quiet is not given.
461 =item B<--kill-after SECONDS>
463 Kill supervised jobs after a certain time to avoid hanging the daemon.
465 =item B<--max-parallel NUM>
467 Run no more than that many jobs at the same time.
469 =item B<--max-intensive-parallel NUM>
471 Run no more than that many resource-hungry jobs at the same time. Right now,
472 this refers to repacking jobs.
474 =item B<--load-triggers TRIG,UNTRIG>
476 If the first system load average (1 minute average) exceeds TRIG, don't queue
477 any more jobs until it goes below UNTRIG. This is currently only supported on
478 Linux and any other platforms that provide an uptime command with load average
479 output.
481 If both values are zero, load checks are disabled. Note that this is not the
482 default.
484 =item B<--restart-delay NUM>
486 After processing the queue, wait this many seconds until the queue is
487 restarted.
489 =item B<--lockfile FILE>
491 For perpetual operation, create a lockfile in that place and clean it up after
492 finishing/aborting.
494 =item B<--all-once>
496 Instead of perpetuously processing all projects over and over again, process
497 them just once and then exit.
499 =item B<--one PRJNAME>
501 Process only the given project (given as just the project name without C<.git>
502 suffix) and then exit.
504 =back
506 =head1 DESCRIPTION
508 jobd is Girocco's repositories maintenance servant; it periodically checks all
509 the repositories and updates mirrored repositories and repacks push-mode
510 repositories when needed.
512 =cut