3 # jobd - perform Girocco maintenance jobs
5 # Run with --help for details
12 use POSIX
":sys_wait_h";
24 my $max_par_intensive = 1;
25 my $load_triggers = '10,2';
26 my $lockfile = "/tmp/jobd-$Girocco::Config::tmpsuffix.lock";
27 my $restart_delay = 60;
31 my ($load_trig, $load_untrig);
37 my $p = $job->{'project'};
38 check_project_exists
($job) || return;
39 if (-e get_project_path
($p).".nofetch") {
41 return setup_gc
($job);
43 if (-e get_project_path
($p).".clone_in_progress" && ! -e get_project_path
($p).".clone_failed") {
44 job_skip
($job, "initial mirroring not complete yet");
47 if (-e get_project_path
($p).".clone_failed") {
48 job_skip
($job, "initial mirroring failed");
49 # Still need to gc non top-level clones even if they've failed
50 # otherwise the objects copied into them from the parent will
51 # just accumulate without bound
52 setup_gc
($job) if $p =~ m
,/,;
55 if (my $ts = is_operation_uptodate
($p, 'lastrefresh', rand_adjust
($Girocco::Config
::min_mirror_interval
))) {
56 job_skip
($job, "not needed right now, last run at $ts");
60 if (is_svn_clone
($p)) {
61 # git svn can be very, very slow at times
62 $job->{'timeout_factor'} = 3;
64 exec_job_command
($job, ["$Girocco::Config::basedir/jobd/update.sh", $p], $quiet);
69 my $p = $job->{'project'};
70 check_project_exists
($job) || return;
71 if (my $ts = is_operation_uptodate
($p, 'lastgc', rand_adjust
($Girocco::Config
::min_gc_interval
))) {
72 job_skip
($job, "not needed right now, last run at $ts");
75 exec_job_command
($job, ["$Girocco::Config::basedir/jobd/gc.sh", $p], $quiet);
81 project
=> $job->{'project'},
83 command
=> \
&gc_project
,
88 sub check_project_exists
{
90 my $p = $job->{'project'};
91 if (!-d get_project_path
($p)) {
92 job_skip
($job, "non-existent project");
98 sub get_project_path
{
99 "$Girocco::Config::reporoot/".shift().".git/";
102 sub is_operation_uptodate
{
103 my ($project, $which, $threshold) = @_;
104 my $path = get_project_path
($project);
105 my $timestamp = `GIT_DIR="$path" $Girocco::Config::git_bin config "gitweb.$which"`;
106 my $unix_ts = parse_rfc2822_date
($timestamp) || 0;
107 (time - $unix_ts) <= $threshold ?
$timestamp : undef;
112 my $path = get_project_path
($project);
113 my $baseurl = `GIT_DIR="$path" $Girocco::Config::git_bin config "gitweb.baseurl"`;
114 my $svnurl = `GIT_DIR="$path" $Girocco::Config::git_bin config "svn-remote.svn.url"`;
115 return $baseurl =~ /^svn[:+]/i && $svnurl;
123 command
=> \
&update_project
,
124 on_success
=> \
&setup_gc
,
125 on_error
=> \
&setup_gc
,
130 queue_one
($_) for (Girocco
::Project
->get_full_list());
133 ######### Daemon operation {{{1
143 sub handle_softexit
{
144 error
("Waiting for outstanding jobs to finish... ".
145 "^C again to exit immediately");
148 $SIG{'INT'} = \
&handle_exit
;
152 error
("Killing outstanding jobs...");
153 $SIG{'TERM'} = 'IGNORE';
155 kill 'KILL', -($_->{'pid'});
157 unlink $lockfile if ($locked);
163 $opts{'queued_at'} = time;
164 $opts{'dont_run'} = 0;
165 $opts{'intensive'} = 0 unless exists $opts{'intensive'};
173 $job->{'command'}->($job);
174 if ($job->{'dont_run'}) {
183 "[".$job->{'type'}."::".$job->{'project'}."]";
186 # Only one of those per job!
187 sub exec_job_command
{
188 my ($job, $command, $err_only) = @_;
191 if (!defined($pid = fork)) {
192 error
(_job_name
($job) ." Can't fork job: $!");
193 $job->{'finished'} = 1;
197 open STDIN
, '/dev/null' || do {
198 error
(_job_name
($job) ."Can't read from /dev/null: $!");
199 $job->{'finished'} = 1;
203 open STDOUT
, '>/dev/null' || do {
204 error
(_job_name
($job) ." Can't write to /dev/null: $!");
205 $job->{'finished'} = 1;
209 # New process group so we can keep track of all of its children
210 if (!defined(POSIX
::setpgid
(0, 0))) {
211 error
(_job_name
($job) ." Can't create process group: $!");
212 $job->{'finished'} = 1;
216 select(undef, undef, undef, 0.1);
218 # Stop perl from complaining
221 $job->{'pid'} = $pid;
222 $job->{'finished'} = 0;
223 $job->{'started_at'} = time;
227 my ($job, $msg) = @_;
228 $job->{'dont_run'} = 1;
229 error
(_job_name
($job) ." Skipping job: $msg") unless $quiet || !$msg;
232 sub reap_hanging_jobs
{
234 my $factor = $_->{'timeout_factor'} || 1;
235 if (defined($_->{'started_at'}) && (time - $_->{'started_at'}) > ($kill_after * $factor)) {
236 $_->{'finished'} = 1;
237 kill 'KILL', -($_->{'pid'});
238 error
(_job_name
($_) ." KILLED due to timeout");
239 push @jobs_killed, _job_name
($_);
244 sub reap_finished_jobs
{
246 my $finished_any = 0;
248 $pid = waitpid(-1, WNOHANG
);
252 my @child = grep { $_->{'pid'} && $_->{'pid'} == $pid } @running;
254 # XXX- we currently don't care
256 if (@child && !$child[0]->{'finished'}) {
257 $child[0]->{'on_success'}->($child[0]) if defined($child[0]->{'on_success'});
258 $child[0]->{'finished'} = 1;
261 $child[0]->{'on_error'}->($child[0]) if defined($child[0]->{'on_error'});
264 @running = grep { $_->{'finished'} == 0 } @running;
268 sub have_intensive_jobs
{
269 grep { $_->{'intensive'} == 1 } @running;
273 "[". scalar(localtime) ."] ";
277 if ($^O
eq "linux") {
278 # Read /proc/loadavg on Linux
279 open(LOADAV
, '<', '/proc/loadavg') or return undef;
280 my $loadinfo = <LOADAV
>;
282 return (split(/\s/, $loadinfo, 4))[0..2];
284 # Read the output of uptime everywhere else (works on Linux too)
285 open(LOADAV
, '-|', 'uptime') or return undef;
286 my $loadinfo = <LOADAV
>;
288 $loadinfo =~ /load average[^0-9.]*([0-9.]+)[^0-9.]+([0-9.]+)[^0-9.]+([0-9.]+)/iso or return undef;
294 my $last_progress = time;
295 my $last_checkload = time - 5;
296 my $current_load = $load_trig;
303 ferror
("--- Processing %d queued jobs", scalar(@queue));
305 $SIG{'INT'} = \
&handle_softexit
;
306 $SIG{'TERM'} = \
&handle_exit
;
307 while (@queue || @running) {
309 my $proceed_immediately = reap_finished_jobs
();
310 # Check current system load
311 if ($load_trig && (time - $last_checkload) >= 5 && defined((my @loadinfo = get_load_info
())[0])) {
312 my $current_load = $loadinfo[0];
313 if ($current_load > $load_trig && !$overloaded) {
315 error
("PAUSE: system load is at $current_load > $load_trig") if $progress;
316 } elsif ($current_load < $load_untrig && $overloaded) {
318 error
("RESUME: system load is at $current_load < $load_untrig") if $progress;
321 $load_info = ', paused (load '. $current_load .')';
323 $load_info = ', load '. $current_load;
325 $last_checkload = time;
328 if ($progress && (time - $last_progress) >= 60) {
329 ferror
("STATUS: %d queued, %d running, %d finished, %d skipped, %d killed$load_info", scalar(@queue), scalar(@running), $jobs_executed, $jobs_skipped, scalar(@jobs_killed));
333 push @run_status, _job_name
($_)." ". (time - $_->{'started_at'}) ."s";
335 error
("STATUS: currently running: ". join(', ', @run_status));
337 $last_progress = time;
339 # Back off if we're too busy
340 if (@running >= $max_par || have_intensive_jobs
() >= $max_par_intensive || !@queue || $overloaded) {
341 sleep 1 unless $proceed_immediately;
345 run_job
(shift(@queue)) if @queue;
348 ferror
("--- Queue processed. %d jobs executed, %d skipped, %d killed.", $jobs_executed, $jobs_skipped, scalar(@jobs_killed));
352 sub run_perpetually
{
354 die "Lockfile exists. Please make sure no other instance of jobd is running.";
356 open LOCK
, '>', $lockfile || die "Cannot create lockfile $lockfile: $!";
364 sleep($restart_delay) if $perpetual; # Let the system breathe for a moment
369 ######### Helpers {{{1
372 print STDERR ts
().shift()."\n";
375 error
(sprintf($_[0], @_[1..$#_]));
385 Getopt
::Long
::Configure
('bundling');
386 my $parse_res = GetOptions
(
387 'help|?' => sub { pod2usage
(-verbose
=> 1, -exitval
=> 0); },
388 'quiet|q' => \
$quiet,
389 'progress|P' => \
$progress,
390 'kill-after|k=i' => \
$kill_after,
391 'max-parallel|p=i' => \
$max_par,
392 'max-intensive-parallel|i=i' => \
$max_par_intensive,
393 'load-triggers=s' => \
$load_triggers,
394 'restart-delay|d=i' => \
$restart_delay,
395 'lockfile|l=s' => \
$lockfile,
396 'all-once|a' => \
$all_once,
399 fatal
("Error: can only use one out of --all-once and --one")
400 if ($all_once && $one);
403 $ENV{'show_progress'} = '1';
407 $load_triggers = '0,0' if (!-f
'/proc/loadavg');
408 ($load_trig, $load_untrig) = split(/,/, $load_triggers);
424 ########## Documentation {{{1
430 jobd - Perform Girocco maintenance jobs
437 -h | --help detailed instructions
438 -q | --quiet run quietly
439 -P | --progress show occasional status updates
440 -k SECONDS | --kill-after SECONDS how long to wait before killing jobs
441 -p NUM | --max-parallel NUM how many jobs to run at the same time
442 -i NUM | --max-intensive-parallel NUM how many resource-hungry jobs to run
444 --load-triggers TRIG,UNTRIG stop queueing jobs at load above
445 TRIG and resume at load below UNTRIG
446 -d NUM | --restart-delay SECONDS wait for this many seconds between
448 -l FILE | --lockfile FILE create a lockfile in the given
450 -a | --all-once process the list only once
451 -o PRJNAME | --one PRJNAME process only one project
459 Print the full description of jobd's options.
463 Suppress non-error messages, e.g. for use when running this task as a cronjob.
467 Show information about the current status of the job queue occasionally. This
468 is automatically enabled if --quiet is not given.
470 =item B<--kill-after SECONDS>
472 Kill supervised jobs after a certain time to avoid hanging the daemon.
474 =item B<--max-parallel NUM>
476 Run no more than that many jobs at the same time.
478 =item B<--max-intensive-parallel NUM>
480 Run no more than that many resource-hungry jobs at the same time. Right now,
481 this refers to repacking jobs.
483 =item B<--load-triggers TRIG,UNTRIG>
485 If the first system load average (1 minute average) exceeds TRIG, don't queue
486 any more jobs until it goes below UNTRIG. This is currently only supported on
487 Linux and any other platforms that provide an uptime command with load average
490 If both values are zero, load checks are disabled. Note that this is not the
493 =item B<--restart-delay NUM>
495 After processing the queue, wait this many seconds until the queue is
498 =item B<--lockfile FILE>
500 For perpetual operation, create a lockfile in that place and clean it up after
505 Instead of perpetuously processing all projects over and over again, process
506 them just once and then exit.
508 =item B<--one PRJNAME>
510 Process only the given project (given as just the project name without C<.git>
511 suffix) and then exit.
517 jobd is Girocco's repositories maintenance servant; it periodically checks all
518 the repositories and updates mirrored repositories and repacks push-mode
519 repositories when needed.