3 # jobd - perform Girocco maintenance jobs
5 # Run with --help for details
12 use POSIX
":sys_wait_h";
23 my $max_par_intensive = 1;
24 my $load_triggers = '10,2';
25 my $lockfile = "/tmp/jobd.lock";
26 my $restart_delay = 60;
30 my ($load_trig, $load_untrig);
36 my $p = $job->{'project'};
37 check_project_exists
($job) || return;
38 if (-e get_project_path
($p).".nofetch") {
40 return setup_gc
($job);
42 if (-e get_project_path
($p).".clone_in_progress") {
43 job_skip
($job, "initial mirroring not complete yet");
46 if (my $ts = is_operation_uptodate
($p, 'lastrefresh', $Girocco::Config
::min_mirror_interval
)) {
47 job_skip
($job, "not needed right now, last run at $ts");
51 exec_job_command
($job, ["$Girocco::Config::basedir/jobd/update.sh", $p], $quiet);
56 my $p = $job->{'project'};
57 check_project_exists
($job) || return;
58 if (my $ts = is_operation_uptodate
($p, 'lastgc', $Girocco::Config
::min_gc_interval
)) {
59 job_skip
($job, "not needed right now, last run at $ts");
62 exec_job_command
($job, ["$Girocco::Config::basedir/jobd/gc.sh", $p], $quiet);
68 project
=> $job->{'project'},
70 command
=> \
&gc_project
,
75 sub check_project_exists
{
77 my $p = $job->{'project'};
78 if (!-d get_project_path
($p)) {
79 job_skip
($job, "non-existent project");
85 sub get_project_path
{
86 "$Girocco::Config::reporoot/".shift().".git/";
89 sub is_operation_uptodate
{
90 my ($project, $which, $threshold) = @_;
91 my $path = get_project_path
($project);
92 my $timestamp = `GIT_DIR="$path" $Girocco::Config::git_bin config "gitweb.$which"`;
93 my $unix_ts = `date +%s -d "$timestamp"`;
94 (time - $unix_ts) <= $threshold ?
$timestamp : undef;
102 command
=> \
&update_project
,
103 on_success
=> \
&setup_gc
,
104 on_error
=> \
&setup_gc
,
109 queue_one
($_) for (Girocco
::Project
->get_full_list());
112 ######### Daemon operation {{{1
122 sub handle_softexit
{
123 error
("Waiting for outstanding jobs to finish... ".
124 "^C again to exit immediately");
127 $SIG{'INT'} = \
&handle_exit
;
131 error
("Killing outstanding jobs...");
132 $SIG{'TERM'} = 'IGNORE';
134 kill 'KILL', -($_->{'pid'});
136 unlink $lockfile if ($locked);
142 $opts{'queued_at'} = time;
143 $opts{'dont_run'} = 0;
144 $opts{'intensive'} = 0 unless exists $opts{'intensive'};
152 $job->{'command'}->($job);
153 if ($job->{'dont_run'}) {
162 "[".$job->{'type'}."::".$job->{'project'}."]";
165 # Only one of those per job!
166 sub exec_job_command
{
167 my ($job, $command, $err_only) = @_;
170 if (!defined($pid = fork)) {
171 error
(_job_name
($job) ." Can't fork job: $!");
172 $job->{'finished'} = 1;
176 open STDIN
, '/dev/null' || do {
177 error
(_job_name
($job) ."Can't read from /dev/null: $!");
178 $job->{'finished'} = 1;
182 open STDOUT
, '>/dev/null' || do {
183 error
(_job_name
($job) ." Can't write to /dev/null: $!");
184 $job->{'finished'} = 1;
188 # New process group so we can keep track of all of its children
189 if (!defined(POSIX
::setpgid
(0, 0))) {
190 error
(_job_name
($job) ." Can't create process group: $!");
191 $job->{'finished'} = 1;
195 select(undef, undef, undef, 0.1);
197 # Stop perl from complaining
200 $job->{'pid'} = $pid;
201 $job->{'finished'} = 0;
202 $job->{'started_at'} = time;
206 my ($job, $msg) = @_;
207 $job->{'dont_run'} = 1;
208 error
(_job_name
($job) ." Skipping job: $msg") unless $quiet || !$msg;
211 sub reap_hanging_jobs
{
213 if (defined($_->{'started_at'}) && (time - $_->{'started_at'}) > $kill_after) {
214 $_->{'finished'} = 1;
215 kill 'KILL', -($_->{'pid'});
216 error
(_job_name
($_) ." KILLED due to timeout");
217 push @jobs_killed, _job_name
($_);
222 sub reap_finished_jobs
{
224 my $finished_any = 0;
226 $pid = waitpid(-1, WNOHANG
);
230 my @child = grep { $_->{'pid'} && $_->{'pid'} == $pid } @running;
232 # XXX- we currently don't care
234 if (@child && !$child[0]->{'finished'}) {
235 $child[0]->{'on_success'}->($child[0]) if defined($child[0]->{'on_success'});
236 $child[0]->{'finished'} = 1;
239 $child[0]->{'on_error'}->($child[0]) if defined($child[0]->{'on_error'});
242 @running = grep { $_->{'finished'} == 0 } @running;
246 sub have_intensive_jobs
{
247 grep { $_->{'intensive'} == 1 } @running;
251 "[". scalar(localtime) ."] ";
255 my $last_progress = time;
256 my $last_checkload = time - 5;
257 my $current_load = $load_trig;
264 ferror
("--- Processing %d queued jobs", scalar(@queue));
266 $SIG{'INT'} = \
&handle_softexit
;
267 $SIG{'TERM'} = \
&handle_exit
;
268 while (@queue || @running) {
270 my $proceed_immediately = reap_finished_jobs
();
271 # Check current system load
272 if ($load_trig && (time - $last_checkload) >= 5 && open(LOADAV
, '<', '/proc/loadavg')) {
273 my $loadinfo = <LOADAV
>;
275 my @loadinfo = split(/\s/, $loadinfo);
276 my $current_load = $loadinfo[0];
277 if ($current_load > $load_trig && !$overloaded) {
279 error
("PAUSE: system load is at $current_load > $load_trig") if $progress;
280 } elsif ($current_load < $load_untrig && $overloaded) {
282 error
("RESUME: system load is at $current_load < $load_untrig") if $progress;
285 $load_info = ', paused (load '. $current_load .')';
287 $load_info = ', load '. $current_load;
289 $last_checkload = time;
292 if ($progress && (time - $last_progress) >= 60) {
293 ferror
("STATUS: %d queued, %d running, %d finished, %d skipped, %d killed$load_info", scalar(@queue), scalar(@running), $jobs_executed, $jobs_skipped, scalar(@jobs_killed));
297 push @run_status, _job_name
($_)." ". (time - $_->{'started_at'}) ."s";
299 error
("STATUS: currently running: ". join(', ', @run_status));
301 $last_progress = time;
303 # Back off if we're too busy
304 if (@running >= $max_par || have_intensive_jobs
() >= $max_par_intensive || !@queue || $overloaded) {
305 sleep 1 unless $proceed_immediately;
309 run_job
(shift(@queue)) if @queue;
312 ferror
("--- Queue processed. %d jobs executed, %d skipped, %d killed.", $jobs_executed, $jobs_skipped, scalar(@jobs_killed));
316 sub run_perpetually
{
318 die "Lockfile exists. Please make sure no other instance of jobd is running.";
320 open LOCK
, '>', $lockfile || die "Cannot create lockfile $lockfile: $!";
328 sleep($restart_delay) if $perpetual; # Let the system breathe for a moment
333 ######### Helpers {{{1
336 print STDERR ts
().shift()."\n";
349 Getopt
::Long
::Configure
('bundling');
350 my $parse_res = GetOptions
(
351 'help|?' => sub { pod2usage
(-verbose
=> 1, -exitval
=> 0); },
352 'quiet|q' => \
$quiet,
353 'progress|P' => \
$progress,
354 'kill-after|k=i' => \
$kill_after,
355 'max-parallel|p=i' => \
$max_par,
356 'max-intensive-parallel|i=i' => \
$max_par_intensive,
357 'load-triggers=s' => \
$load_triggers,
358 'restart-delay|d=i' => \
$restart_delay,
359 'lockfile|l=s' => \
$lockfile,
360 'all-once|a' => \
$all_once,
363 fatal
("Error: can only use one out of --all-once and --one")
364 if ($all_once && $one);
367 $ENV{'show_progress'} = '1';
371 $load_triggers = '0,0' if (!-f
'/proc/loadavg');
372 ($load_trig, $load_untrig) = split(/,/, $load_triggers);
388 ########## Documentation {{{1
394 jobd - Perform Girocco maintenance jobs
401 -h | --help detailed instructions
402 -q | --quiet run quietly
403 -P | --progress show occasional status updates
404 -k SECONDS | --kill-after SECONDS how long to wait before killing jobs
405 -p NUM | --max-parallel NUM how many jobs to run at the same time
406 -i NUM | --max-intensive-parallel NUM how many resource-hungry jobs to run
408 --load-triggers TRIG,UNTRIG stop queueing jobs at load above
409 TRIG and resume at load below UNTRIG
410 -d NUM | --restart-delay SECONDS wait for this many seconds between
412 -l FILE | --lockfile FILE create a lockfile in the given
414 -a | --all-once process the list only once
415 -o PRJNAME | --one PRJNAME process only one project
423 Print the full description of jobd's options.
427 Suppress non-error messages, e.g. for use when running this task as a cronjob.
431 Show information about the current status of the job queue occasionally. This
432 is automatically enabled if --quiet is not given.
434 =item B<--kill-after SECONDS>
436 Kill supervised jobs after a certain time to avoid hanging the daemon.
438 =item B<--max-parallel NUM>
440 Run no more than that many jobs at the same time.
442 =item B<--max-intensive-parallel NUM>
444 Run no more than that many resource-hungry jobs at the same time. Right now,
445 this refers to repacking jobs.
447 =item B<--load-triggers TRIG,UNTRIG>
449 If the first system load average (1 minute average) exceeds TRIG, don't queue
450 any more jobs until it goes below UNTRIG. This is currently only supported on
453 If both values are zero, load checks are disabled. Note that this is not the
456 =item B<--restart-delay NUM>
458 After processing the queue, wait this many seconds until the queue is
461 =item B<--lockfile FILE>
463 For perpetual operation, create a lockfile in that place and clean it up after
468 Instead of perpetuously processing all projects over and over again, process
469 them just once and then exit.
471 =item B<--one PRJNAME>
473 Process only the given project (given as just the project name without C<.git>
474 suffix) and then exit.
480 jobd is Girocco's repositories maintenance servant; it periodically checks all
481 the repositories and updates mirrored repositories and repacks push-mode
482 repositories when needed.