--retries works (possibly breaking something else).
[parallel.git] / src / parallel
blobf2c8c4e63aab6531125b8f20275b6006f98fe641
1 #!/usr/bin/env perl
3 # Copyright (C) 2007-2019 Ole Tange and Free Software Foundation, Inc.
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, see <http://www.gnu.org/licenses/>
17 # or write to the Free Software Foundation, Inc., 51 Franklin St,
18 # Fifth Floor, Boston, MA 02110-1301 USA
20 # open3 used in Job::start
21 use IPC::Open3;
22 use POSIX;
23 # gensym used in Job::start
24 use Symbol qw(gensym);
25 # tempfile used in Job::start
26 use File::Temp qw(tempfile tempdir);
27 # mkpath used in openresultsfile
28 use File::Path;
29 # GetOptions used in get_options_from_array
30 use Getopt::Long;
31 # Used to ensure code quality
32 use strict;
33 use File::Basename;
35 sub set_input_source_header($$) {
36 my ($command_ref,$input_source_fh_ref) = @_;
37 if($opt::header and not $opt::pipe) {
38 # split with colsep or \t
39 # $header force $colsep = \t if undef?
40 my $delimiter = defined $opt::colsep ? $opt::colsep : "\t";
41 # regexp for {=
42 my $left = "\Q$Global::parensleft\E";
43 my $l = $Global::parensleft;
44 # regexp for =}
45 my $right = "\Q$Global::parensright\E";
46 my $r = $Global::parensright;
47 my $id = 1;
48 for my $fh (@$input_source_fh_ref) {
49 my $line = <$fh>;
50 chomp($line);
51 $line =~ s/\r$//;
52 ::debug("init", "Delimiter: '$delimiter'");
53 for my $s (split /$delimiter/o, $line) {
54 ::debug("init", "Colname: '$s'");
55 # Replace {colname} with {2}
56 for(@$command_ref, @Global::ret_files,
57 @Global::transfer_files, $opt::tagstring,
58 $opt::workdir, $opt::results, $opt::retries) {
59 # Skip if undefined
60 $_ or next;
61 s:\{$s(|/|//|\.|/\.)\}:\{$id$1\}:g;
62 # {=header1 ... =} => {=1 ... =}
63 s:$left $s (.*?) $right:$l$id$1$r:gx;
65 $Global::input_source_header{$id} = $s;
66 $id++;
69 } else {
70 my $id = 1;
71 for my $fh (@$input_source_fh_ref) {
72 $Global::input_source_header{$id} = $id;
73 $id++;
78 sub max_jobs_running() {
79 # Compute $Global::max_jobs_running as the max number of jobs
80 # running on each sshlogin.
81 # Returns:
82 # $Global::max_jobs_running
83 if(not $Global::max_jobs_running) {
84 for my $sshlogin (values %Global::host) {
85 $sshlogin->max_jobs_running();
88 if(not $Global::max_jobs_running) {
89 ::error("Cannot run any jobs.");
90 wait_and_exit(255);
92 return $Global::max_jobs_running;
95 sub halt() {
96 # Compute exit value,
97 # wait for children to complete
98 # and exit
99 if($opt::halt and $Global::halt_when ne "never") {
100 if(not defined $Global::halt_exitstatus) {
101 if($Global::halt_pct) {
102 $Global::halt_exitstatus =
103 ::ceil($Global::total_failed /
104 ($Global::total_started || 1) * 100);
105 } elsif($Global::halt_count) {
106 $Global::halt_exitstatus =
107 ::min(undef_as_zero($Global::total_failed),101);
110 wait_and_exit($Global::halt_exitstatus);
111 } else {
112 wait_and_exit(min(undef_as_zero($Global::exitstatus),101));
117 sub __PIPE_MODE__() {}
120 sub pipepart_setup() {
121 # Compute the blocksize
122 # Generate the commands to extract the blocks
123 # Push the commands on queue
124 # Changes:
125 # @Global::cat_prepends
126 # $Global::JobQueue
127 if($opt::tee) {
128 # Prepend each command with
129 # < file
130 my $cat_string = "< ".Q($opt::a[0]);
131 for(1..$Global::JobQueue->total_jobs()) {
132 push @Global::cat_appends, $cat_string;
133 push @Global::cat_prepends, "";
135 } else {
136 if(not $opt::blocksize) {
137 # --blocksize with 10 jobs per jobslot
138 $opt::blocksize = -10;
140 if($opt::roundrobin) {
141 # --blocksize with 1 job per jobslot
142 $opt::blocksize = -1;
144 if($opt::blocksize < 0) {
145 my $size = 0;
146 # Compute size of -a
147 for(@opt::a) {
148 if(-f $_) {
149 $size += -s $_;
150 } elsif(-b $_) {
151 $size += size_of_block_dev($_);
152 } elsif(-e $_) {
153 ::error("$_ is neither a file nor a block device");
154 wait_and_exit(255);
155 } else {
156 ::error("File not found: $_");
157 wait_and_exit(255);
160 # Run in total $job_slots*(- $blocksize) jobs
161 # Set --blocksize = size / no of proc / (- $blocksize)
162 $Global::dummy_jobs = 1;
163 $Global::blocksize = 1 +
164 int($size / max_jobs_running() /
165 -multiply_binary_prefix($opt::blocksize));
167 @Global::cat_prepends = map { pipe_part_files($_) } @opt::a;
168 # Unget the empty arg as many times as there are parts
169 $Global::JobQueue->{'commandlinequeue'}{'arg_queue'}->unget(
170 map { [Arg->new("\0noarg")] } @Global::cat_prepends
175 sub pipe_tee_setup() {
176 # Create temporary fifos
177 # Run 'tee fifo1 fifo2 fifo3 ... fifoN' in the background
178 # This will spread the input to fifos
179 # Generate commands that reads from fifo1..N:
180 # cat fifo | user_command
181 # Changes:
182 # @Global::cat_prepends
183 my @fifos;
184 for(1..$Global::JobQueue->total_jobs()) {
185 push @fifos, tmpfifo();
187 # cat foo | tee fifo1 fifo2 fifo3 fifo4 fifo5 > /dev/null
188 if(not fork()){
189 # Test if tee supports --output-error=warn-nopipe
190 `echo | tee --output-error=warn-nopipe /dev/null >/dev/null 2>/dev/null`;
191 my $opt = $? ? "" : "--output-error=warn-nopipe";
192 ::debug("init","tee $opt");
193 # Let tee inherit our stdin
194 # and redirect stdout to null
195 open STDOUT, ">","/dev/null";
196 if($opt) {
197 exec "tee", $opt, @fifos;
198 } else {
199 exec "tee", @fifos;
202 # For each fifo
203 # (rm fifo1; grep 1) < fifo1
204 # (rm fifo2; grep 2) < fifo2
205 # (rm fifo3; grep 3) < fifo3
206 # Remove the tmpfifo as soon as it is open
207 @Global::cat_prepends = map { "(rm $_;" } @fifos;
208 @Global::cat_appends = map { ") < $_" } @fifos;
212 sub parcat_script() {
213 # TODO if script fails: Use parallel -j0 --plain --lb cat ::: fifos
214 my $script = q'{
215 use POSIX qw(:errno_h);
216 use IO::Select;
217 use strict;
218 use threads;
219 use Thread::Queue;
220 use Fcntl qw(:DEFAULT :flock);
222 my $opened :shared;
223 my $q = Thread::Queue->new();
224 my $okq = Thread::Queue->new();
225 my @producers;
227 if(not @ARGV) {
228 if(-t *STDIN) {
229 print "Usage:\n";
230 print " parcat file(s)\n";
231 print " cat argfile | parcat\n";
232 } else {
233 # Read arguments from stdin
234 chomp(@ARGV = <STDIN>);
237 my $files_to_open = 0;
238 # Default: fd = stdout
239 my $fd = 1;
240 for (@ARGV) {
241 # --rm = remove file when opened
242 /^--rm$/ and do { $opt::rm = 1; next; };
243 # -1 = output to fd 1, -2 = output to fd 2
244 /^-(\d+)$/ and do { $fd = $1; next; };
245 push @producers, threads->create("producer", $_, $fd);
246 $files_to_open++;
249 sub producer {
250 # Open a file/fifo, set non blocking, enqueue fileno of the file handle
251 my $file = shift;
252 my $output_fd = shift;
253 open(my $fh, "<", $file) || do {
254 print STDERR "parcat: Cannot open $file\n";
255 exit(1);
257 # Remove file when it has been opened
258 if($opt::rm) {
259 unlink $file;
261 set_fh_non_blocking($fh);
262 $opened++;
263 # Pass the fileno to parent
264 $q->enqueue(fileno($fh),$output_fd);
265 # Get an OK that the $fh is opened and we can release the $fh
266 while(1) {
267 my $ok = $okq->dequeue();
268 if($ok == fileno($fh)) { last; }
269 # Not ours - very unlikely to happen
270 $okq->enqueue($ok);
272 return;
275 my $s = IO::Select->new();
276 my %buffer;
278 sub add_file {
279 my $infd = shift;
280 my $outfd = shift;
281 open(my $infh, "<&=", $infd) || die;
282 open(my $outfh, ">&=", $outfd) || die;
283 $s->add($infh);
284 # Tell the producer now opened here and can be released
285 $okq->enqueue($infd);
286 # Initialize the buffer
287 @{$buffer{$infh}{$outfd}} = ();
288 $Global::fh{$outfd} = $outfh;
291 sub add_files {
292 # Non-blocking dequeue
293 my ($infd,$outfd);
294 do {
295 ($infd,$outfd) = $q->dequeue_nb(2);
296 if(defined($outfd)) {
297 add_file($infd,$outfd);
299 } while(defined($outfd));
302 sub add_files_block {
303 # Blocking dequeue
304 my ($infd,$outfd) = $q->dequeue(2);
305 add_file($infd,$outfd);
309 my $fd;
310 my (@ready,$infh,$rv,$buf);
311 do {
312 # Wait until at least one file is opened
313 add_files_block();
314 while($q->pending or keys %buffer) {
315 add_files();
316 while(keys %buffer) {
317 @ready = $s->can_read(0.01);
318 if(not @ready) {
319 add_files();
321 for $infh (@ready) {
322 # There is only one key, namely the output file descriptor
323 for my $outfd (keys %{$buffer{$infh}}) {
324 $rv = sysread($infh, $buf, 65536);
325 if (!$rv) {
326 if($! == EAGAIN) {
327 # Would block: Nothing read
328 next;
329 } else {
330 # Nothing read, but would not block:
331 # This file is done
332 $s->remove($infh);
333 for(@{$buffer{$infh}{$outfd}}) {
334 syswrite($Global::fh{$outfd},$_);
336 delete $buffer{$infh};
337 # Closing the $infh causes it to block
338 # close $infh;
339 add_files();
340 next;
343 # Something read.
344 # Find \n or \r for full line
345 my $i = (rindex($buf,"\n")+1);
346 if($i) {
347 # Print full line
348 for(@{$buffer{$infh}{$outfd}}, substr($buf,0,$i)) {
349 syswrite($Global::fh{$outfd},$_);
351 # @buffer = remaining half line
352 $buffer{$infh}{$outfd} = [substr($buf,$i,$rv-$i)];
353 } else {
354 # Something read, but not a full line
355 push @{$buffer{$infh}{$outfd}}, $buf;
357 redo;
362 } while($opened < $files_to_open);
364 for (@producers) {
365 $_->join();
368 sub set_fh_non_blocking {
369 # Set filehandle as non-blocking
370 # Inputs:
371 # $fh = filehandle to be blocking
372 # Returns:
373 # N/A
374 my $fh = shift;
375 my $flags;
376 fcntl($fh, &F_GETFL, $flags) || die $!; # Get the current flags on the filehandle
377 $flags |= &O_NONBLOCK; # Add non-blocking to the flags
378 fcntl($fh, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle
381 return ::spacefree(3, $script);
384 sub sharder_script() {
385 my $script = q{
386 use B;
387 # Column separator
388 my $sep = shift;
389 # Which columns to shard on (count from 1)
390 my $col = shift;
391 # Which columns to shard on (count from 0)
392 my $col0 = $col - 1;
393 # Perl expression
394 my $perlexpr = shift;
395 my $bins = @ARGV;
396 # Open fifos for writing, fh{0..$bins}
397 my $t = 0;
398 my %fh;
399 for(@ARGV) {
400 open $fh{$t++}, ">", $_;
401 # open blocks until it is opened by reader
402 # so unlink only happens when it is ready
403 unlink $_;
405 if($perlexpr) {
406 my $subref = eval("sub { no strict; no warnings; $perlexpr }");
407 while(<STDIN>) {
408 # Split into $col columns (no need to split into more)
409 @F = split $sep, $_, $col+1;
411 local $_ = $F[$col0];
412 &$subref();
413 $fh = $fh{ hex(B::hash($_))%$bins };
415 print $fh $_;
417 } else {
418 while(<STDIN>) {
419 # Split into $col columns (no need to split into more)
420 @F = split $sep, $_, $col+1;
421 $fh = $fh{ hex(B::hash($F[$col0]))%$bins };
422 print $fh $_;
425 # Close all open fifos
426 close values %fh;
428 return ::spacefree(1, $script);
431 sub binner_script() {
432 my $script = q{
433 use B;
434 # Column separator
435 my $sep = shift;
436 # Which columns to shard on (count from 1)
437 my $col = shift;
438 # Which columns to shard on (count from 0)
439 my $col0 = $col - 1;
440 # Perl expression
441 my $perlexpr = shift;
442 my $bins = @ARGV;
443 # Open fifos for writing, fh{0..$bins}
444 my $t = 0;
445 my %fh;
446 # Let the last output fifo be the 0'th
447 open $fh{$t++}, ">", pop @ARGV;
448 for(@ARGV) {
449 open $fh{$t++}, ">", $_;
450 # open blocks until it is opened by reader
451 # so unlink only happens when it is ready
452 unlink $_;
454 if($perlexpr) {
455 my $subref = eval("sub { no strict; no warnings; $perlexpr }");
456 while(<STDIN>) {
457 # Split into $col columns (no need to split into more)
458 @F = split $sep, $_, $col+1;
460 local $_ = $F[$col0];
461 &$subref();
462 $fh = $fh{ $_%$bins };
464 print $fh $_;
466 } else {
467 while(<STDIN>) {
468 # Split into $col columns (no need to split into more)
469 @F = split $sep, $_, $col+1;
470 $fh = $fh{ $F[$col0]%$bins };
471 print $fh $_;
474 # Close all open fifos
475 close values %fh;
477 return ::spacefree(1, $script);
480 sub pipe_shard_setup() {
481 # Create temporary fifos
482 # Run 'shard.pl sep col fifo1 fifo2 fifo3 ... fifoN' in the background
483 # This will spread the input to fifos
484 # Generate commands that reads from fifo1..N:
485 # cat fifo | user_command
486 # Changes:
487 # @Global::cat_prepends
488 my @shardfifos;
489 my @parcatfifos;
490 # TODO $opt::jobs should be evaluated (100%)
491 # TODO $opt::jobs should be number of total_jobs if there are argugemts
492 my $njobs = $opt::jobs;
493 for my $m (0..$njobs-1) {
494 for my $n (0..$njobs-1) {
495 # sharding to A B C D
496 # parcatting all As together
497 $parcatfifos[$n][$m] = $shardfifos[$m][$n] = tmpfifo();
500 my $shardbin = ($opt::shard || $opt::bin);
501 my $script;
502 if($opt::bin) {
503 $script = binner_script();
504 } else {
505 $script = sharder_script();
508 # cat foo | sharder sep col fifo1 fifo2 fifo3 ... fifoN
510 if($shardbin =~ /^[a-z_][a-z_0-9]*(\s|$)/i) {
511 # Group by column name
512 # (Yes, this will also wrongly match a perlexpr like: chop)
513 my($read,$char,@line);
514 # A full line, but nothing more (the rest must be read by the child)
515 # $Global::header used to prepend block to each job
516 do {
517 $read = sysread(STDIN,$char,1);
518 push @line, $char;
519 } while($read and $char ne "\n");
520 $Global::header = join "", @line;
522 my ($col, $perlexpr, $subref) =
523 column_perlexpr($shardbin, $Global::header, $opt::colsep);
524 if(not fork()) {
525 # Let the sharder inherit our stdin
526 # and redirect stdout to null
527 open STDOUT, ">","/dev/null";
528 # The PERL_HASH_SEED must be the same for all sharders
529 # so B::hash will return the same value for any given input
530 $ENV{'PERL_HASH_SEED'} = $$;
531 exec qw(parallel --block 100k -q --pipe -j), $njobs,
532 qw(--roundrobin -u perl -e), $script, ($opt::colsep || ","),
533 $col, $perlexpr, '{}', (map { (':::+', @{$_}) } @parcatfifos);
535 # For each fifo
536 # (rm fifo1; grep 1) < fifo1
537 # (rm fifo2; grep 2) < fifo2
538 # (rm fifo3; grep 3) < fifo3
539 my $parcat = Q(parcat_script());
540 if(not $parcat) {
541 ::error("'parcat' must be in path.");
542 ::wait_and_exit(255);
544 @Global::cat_prepends = map { "perl -e $parcat @$_ | " } @parcatfifos;
547 sub pipe_part_files(@) {
548 # Given the bigfile
549 # find header and split positions
550 # make commands that 'cat's the partial file
551 # Input:
552 # $file = the file to read
553 # Returns:
554 # @commands that will cat_partial each part
555 my ($file) = @_;
556 my $buf = "";
557 if(not -f $file and not -b $file) {
558 ::error("$file is not a seekable file.");
559 ::wait_and_exit(255);
561 my $header = find_header(\$buf,open_or_exit($file));
562 # find positions
563 my @pos = find_split_positions($file,$Global::blocksize,$header);
564 # Make @cat_prepends
565 my @cat_prepends = ();
566 for(my $i=0; $i<$#pos; $i++) {
567 push(@cat_prepends,
568 cat_partial($file, 0, length($header), $pos[$i], $pos[$i+1]));
570 return @cat_prepends;
573 sub find_header($$) {
574 # Compute the header based on $opt::header
575 # Input:
576 # $buf_ref = reference to read-in buffer
577 # $fh = filehandle to read from
578 # Uses:
579 # $opt::header
580 # $Global::blocksize
581 # $Global::header
582 # Returns:
583 # $header string
584 my ($buf_ref, $fh) = @_;
585 my $header = "";
586 # $Global::header may be set in group_by_loop()
587 if($Global::header) { return $Global::header }
588 if($opt::header) {
589 if($opt::header eq ":") { $opt::header = "(.*\n)"; }
590 # Number = number of lines
591 $opt::header =~ s/^(\d+)$/"(.*\n)"x$1/e;
592 while(read($fh,substr($$buf_ref,length $$buf_ref,0),
593 $Global::blocksize)) {
594 if($$buf_ref =~ s/^($opt::header)//) {
595 $header = $1;
596 last;
600 return $header;
603 sub find_split_positions($$$) {
604 # Find positions in bigfile where recend is followed by recstart
605 # Input:
606 # $file = the file to read
607 # $block = (minimal) --block-size of each chunk
608 # $header = header to be skipped
609 # Uses:
610 # $opt::recstart
611 # $opt::recend
612 # Returns:
613 # @positions of block start/end
614 my($file, $block, $header) = @_;
615 my $headerlen = length $header;
616 my $size = -s $file;
617 if(-b $file) {
618 # $file is a blockdevice
619 $size = size_of_block_dev($file);
621 $block = int $block;
622 if($opt::groupby) {
623 return split_positions_for_group_by($file,$size,$block,$header);
625 # The optimal dd blocksize for mint, redhat, solaris, openbsd = 2^17..2^20
626 # The optimal dd blocksize for freebsd = 2^15..2^17
627 my $dd_block_size = 131072; # 2^17
628 my @pos;
629 my ($recstart,$recend) = recstartrecend();
630 my $recendrecstart = $recend.$recstart;
631 my $fh = ::open_or_exit($file);
632 push(@pos,$headerlen);
633 for(my $pos = $block+$headerlen; $pos < $size; $pos += $block) {
634 my $buf;
635 if($recendrecstart eq "") {
636 # records ends anywhere
637 push(@pos,$pos);
638 } else {
639 # Seek the the block start
640 if(not seek($fh, $pos, 0)) {
641 ::error("Cannot seek to $pos in $file");
642 edit(255);
644 while(read($fh,substr($buf,length $buf,0),$dd_block_size)) {
645 if($opt::regexp) {
646 # If match /$recend$recstart/ => Record position
647 if($buf =~ m:^(.*$recend)$recstart:os) {
648 # Start looking for next record _after_ this match
649 $pos += length($1);
650 push(@pos,$pos);
651 last;
653 } else {
654 # If match $recend$recstart => Record position
655 # TODO optimize to only look at the appended
656 # $dd_block_size + len $recendrecstart
657 # TODO increase $dd_block_size to optimize for longer records
658 my $i = index64(\$buf,$recendrecstart);
659 if($i != -1) {
660 # Start looking for next record _after_ this match
661 $pos += $i + length($recend);
662 push(@pos,$pos);
663 last;
669 if($pos[$#pos] != $size) {
670 # Last splitpoint was not at end of the file: add it
671 push @pos, $size;
673 close $fh;
674 return @pos;
677 sub split_positions_for_group_by($$$$) {
678 my($fh);
679 sub value_at($) {
680 my $pos = shift;
681 if($pos != 0) {
682 seek($fh, $pos-1, 0) || die;
683 # Read half line
684 <$fh>;
686 # Read full line
687 my $linepos = tell($fh);
688 $_ = <$fh>;
689 if(defined $_) {
690 # Not end of file
691 my @F;
692 if(defined $group_by::col) {
693 $opt::colsep ||= "\t";
694 @F = split /$opt::colsep/, $_;
695 $_ = $F[$group_by::col];
697 eval $group_by::perlexpr;
699 return ($_,$linepos);
702 sub binary_search_end($$$) {
703 my ($s,$spos,$epos) = @_;
704 # value_at($spos) == $s
705 # value_at($epos) != $s
706 my $posdif = $epos - $spos;
707 my ($v,$vpos);
708 while($posdif) {
709 ($v,$vpos) = value_at($spos+$posdif);
710 if($v eq $s) {
711 $spos = $vpos;
712 $posdif = $epos - $spos;
713 } else {
714 $epos = $vpos;
716 $posdif = int($posdif/2);
718 return($v,$vpos);
721 sub binary_search_start($$$) {
722 my ($s,$spos,$epos) = @_;
723 # value_at($spos) != $s
724 # value_at($epos) == $s
725 my $posdif = $epos - $spos;
726 my ($v,$vpos);
727 while($posdif) {
728 ($v,$vpos) = value_at($spos+$posdif);
729 if($v eq $s) {
730 $epos = $vpos;
731 } else {
732 $spos = $vpos;
733 $posdif = $epos - $spos;
735 $posdif = int($posdif/2);
737 return($v,$vpos);
740 my ($file,$size,$block,$header) = @_;
741 my ($a,$b,$c,$apos,$bpos,$cpos);
742 my @pos;
743 $fh = open_or_exit($file);
744 # Set $Global::group_by_column $Global::group_by_perlexpr
745 group_by_loop($fh,$opt::recsep);
746 # $xpos = linestart, $x = value at $xpos, $apos < $bpos < $cpos
747 $apos = length $header;
748 for(($a,$apos) = value_at($apos); $apos < $size;) {
749 push @pos, $apos;
750 $bpos = $apos + $block;
751 ($b,$bpos) = value_at($bpos);
752 if(eof($fh)) {
753 push @pos, $size; last;
755 $cpos = $bpos + $block;
756 ($c,$cpos) = value_at($cpos);
757 if($a eq $b) {
758 while($b eq $c) {
759 # Move bpos, cpos a block forward until $a == $b != $c
760 $bpos = $cpos;
761 $cpos += $block;
762 ($c,$cpos) = value_at($cpos);
763 if($cpos >= $size) {
764 $cpos = $size;
765 last;
768 # $a == $b != $c
769 # Binary search for $b ending between ($bpos,$cpos)
770 ($b,$bpos) = binary_search_end($b,$bpos,$cpos);
771 } else {
772 if($b eq $c) {
773 # $a != $b == $c
774 # Binary search for $b starting between ($apos,$bpos)
775 ($b,$bpos) = binary_search_start($b,$apos,$bpos);
776 } else {
777 # $a != $b != $c
778 # Binary search for $b ending between ($bpos,$cpos)
779 ($b,$bpos) = binary_search_end($b,$bpos,$cpos);
782 ($a,$apos) = ($b,$bpos);
784 if($pos[$#pos] != $size) {
785 # Last splitpoint was not at end of the file: add it
786 push @pos, $size;
788 return @pos;
791 sub cat_partial($@) {
792 # Efficient command to copy from byte X to byte Y
793 # Input:
794 # $file = the file to read
795 # ($start, $end, [$start2, $end2, ...]) = start byte, end byte
796 # Returns:
797 # Efficient command to copy $start..$end, $start2..$end2, ... to stdout
798 my($file, @start_end) = @_;
799 my($start, $i);
800 # Convert (start,end) to (start,len)
801 my @start_len = map {
802 if(++$i % 2) { $start = $_; } else { $_-$start }
803 } @start_end;
804 # This can read 7 GB/s using a single core
805 my $script = spacefree
808 while(@ARGV) {
809 sysseek(STDIN,shift,0) || die;
810 $left = shift;
811 while($read =
812 sysread(STDIN,$buf, $left > 131072 ? 131072 : $left)){
813 $left -= $read;
814 syswrite(STDOUT,$buf);
818 return "<". Q($file) .
819 " perl -e '$script' @start_len |";
822 sub column_perlexpr($$$) {
823 # Compute the column number (if any), perlexpression from combined
824 # string (such as --shard key, --groupby key, {=n perlexpr=}
825 # Input:
826 # $column_perlexpr = string with column and perl expression
827 # $header = header from input file (if column is column name)
828 # $colsep = column separator regexp
829 # Returns:
830 # $col = column number
831 # $perlexpr = perl expression
832 # $subref = compiled perl expression as sub reference
833 my ($column_perlexpr, $header, $colsep) = @_;
834 my ($col, $perlexpr, $subref);
835 if($column_perlexpr =~ /^[-a-z0-9_]+(\s|$)/i) {
836 # Column name/number (possibly prefix)
837 if($column_perlexpr =~ s/^(-?\d+)(\s|$)//) {
838 # Column number (possibly prefix)
839 $col = $1;
840 } elsif($column_perlexpr =~ s/^([a-z0-9_]+)(\s+|$)//i) {
841 # Column name (possibly prefix)
842 my $colname = $1;
843 # Split on --copsep pattern
844 my @headers = split /$colsep/, $header;
845 my %headers;
846 @headers{@headers} = (1..($#headers+1));
847 $col = $headers{$colname};
848 if(not defined $col) {
849 ::error("Column '$colname' $colsep not found in header",keys %headers);
850 ::wait_and_exit(255);
854 # What is left of $column_perlexpr is $perlexpr (possibly empty)
855 $perlexpr = $column_perlexpr;
856 $subref = eval("sub { no strict; no warnings; $perlexpr }");
857 return($col, $perlexpr, $subref);
860 sub group_by_loop($$) {
861 # Generate perl code for group-by loop
862 # Insert a $recsep when the column value changes
863 # The column value can be computed with $perexpr
864 my($fh,$recsep) = @_;
865 my $groupby = $opt::groupby;
866 if($groupby =~ /^[a-z_][a-z_0-9]*(\s|$)/i) {
867 # Group by column name
868 # (Yes, this will also wrongly match a perlexpr like: chop)
869 my($read,$char,@line);
870 # A full line, but nothing more (the rest must be read by the child)
871 # $Global::header used to prepend block to each job
872 do {
873 $read = sysread($fh,$char,1);
874 push @line, $char;
875 } while($read and $char ne "\n");
876 $Global::header = join "", @line;
878 $opt::colsep ||= "\t";
879 ($group_by::col, $group_by::perlexpr, $group_by::subref) =
880 column_perlexpr($groupby, $Global::header, $opt::colsep);
881 # Numbered 0..n-1 due to being used by $F[n]
882 if($group_by::col) { $group_by::col--; }
884 my $loop = ::spacefree(0,'{
885 local $_=COLVALUE;
886 PERLEXPR;
887 if(! defined $last) { $last = $_ }
888 if(($last) ne $_) {
889 print "RECSEP";
890 $last = $_;
892 }');
894 if(defined $group_by::col) {
895 $loop =~ s/COLVALUE/\$F[$group_by::col]/g;
896 } else {
897 $loop =~ s/COLVALUE/\$_/g;
899 $loop =~ s/PERLEXPR/$group_by::perlexpr/g;
900 $loop =~ s/RECSEP/$recsep/g;
901 return $loop;
904 sub group_by_stdin_filter() {
905 # Record separator with 119 bit random value
906 $opt::recend = '';
907 $opt::recstart =
908 join "", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..20);
909 $opt::remove_rec_sep = 1;
910 my @filter;
911 push @filter, "perl";
912 if($opt::groupby =~ /^[a-z0-9_]+(\s|$)/i) {
913 # This is column number/name
914 # Use -a (auto-split)
915 push @filter, "-a";
916 $opt::colsep ||= "\t";
917 my $sep = $opt::colsep;
918 $sep =~ s/\t/\\t/g;
919 $sep =~ s/\"/\\"/g;
920 push @filter, "-F$sep";
922 push @filter, "-pe";
923 push @filter, group_by_loop(*STDIN,$opt::recstart);
924 ::debug("init", "@filter\n");
925 open(STDIN, '-|', @filter) || die ("Cannot start @filter");
928 sub spreadstdin() {
929 # read a record
930 # Spawn a job and print the record to it.
931 # Uses:
932 # $Global::blocksize
933 # STDIN
934 # $opt::r
935 # $Global::max_lines
936 # $Global::max_number_of_args
937 # $opt::regexp
938 # $Global::start_no_new_jobs
939 # $opt::roundrobin
940 # %Global::running
941 # Returns: N/A
943 my $buf = "";
944 my ($recstart,$recend) = recstartrecend();
945 my $recendrecstart = $recend.$recstart;
946 my $chunk_number = 1;
947 my $one_time_through;
948 my $two_gb = 2**31-1;
949 my $blocksize = $Global::blocksize;
950 my $in = *STDIN;
951 my $header = find_header(\$buf,$in);
952 my $timeout = 3;
953 while(1) {
954 my $anything_written = 0;
955 my $buflen = length $buf;
956 my $readsize = ($buflen < $blocksize) ? $blocksize-$buflen : $blocksize;
957 # If $buf < $blocksize, append so it is $blocksize long after reading.
958 # Otherwise append a full $blocksize
959 local $SIG{ALRM} = sub {
960 ::set_fh_non_blocking($in);
961 read($in,substr($buf,$buflen,0),$readsize);
962 ::set_fh_blocking($in);
963 # warn("ee $buf");
964 alarm $timeout;
966 # alarm $timeout;
967 if(not read($in,substr($buf,$buflen,0),$readsize)) {
968 # warn("ff");
969 # End-of-file
970 $chunk_number != 1 and last;
971 # Force the while-loop once if everything was read by header reading
972 $one_time_through++ and last;
974 # warn("yy");
975 # alarm 0;
976 if($opt::r) {
977 # Remove empty lines
978 $buf =~ s/^\s*\n//gm;
979 if(length $buf == 0) {
980 next;
983 if($Global::max_lines and not $Global::max_number_of_args) {
984 # Read n-line records
985 my $n_lines = $buf =~ tr/\n/\n/;
986 my $last_newline_pos = rindex64(\$buf,"\n");
987 # Go backwards until there are full n-line records
988 while($n_lines % $Global::max_lines) {
989 $n_lines--;
990 $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1);
992 # Chop at $last_newline_pos as that is where n-line record ends
993 $anything_written +=
994 write_record_to_pipe($chunk_number++,\$header,\$buf,
995 $recstart,$recend,$last_newline_pos+1);
996 shorten(\$buf,$last_newline_pos+1);
997 } elsif($opt::regexp) {
998 if($Global::max_number_of_args) {
999 # -N => (start..*?end){n}
1000 # -L -N => (start..*?end){n*l}
1001 my $read_n_lines = -1+
1002 $Global::max_number_of_args * ($Global::max_lines || 1);
1003 # (?!negative lookahead) is needed to avoid backtracking
1004 # See: https://unix.stackexchange.com/questions/439356/
1005 while($buf =~
1007 # Either recstart or at least one char from start
1008 ^(?: $recstart | .)
1009 # followed something
1010 (?:(?!$recend$recstart).)*?
1011 # and then recend
1012 $recend
1013 # Then n-1 times recstart.*recend
1014 (?:$recstart(?:(?!$recend$recstart).)*?$recend){$read_n_lines}
1016 # Followed by recstart
1017 (?=$recstart)/osx) {
1018 $anything_written +=
1019 write_record_to_pipe($chunk_number++,\$header,\$buf,
1020 $recstart,$recend,length $1);
1021 shorten(\$buf,length $1);
1023 } else {
1024 eof($in) and last;
1025 # Find the last recend-recstart in $buf
1026 if($buf =~ /^(.*$recend)$recstart.*?$/os) {
1027 $anything_written +=
1028 write_record_to_pipe($chunk_number++,\$header,\$buf,
1029 $recstart,$recend,length $1);
1030 shorten(\$buf,length $1);
1033 } elsif($opt::csv) {
1034 # Read a full CSV record
1035 # even number of " + end of line
1036 my $last_newline_pos = length $buf;
1037 do {
1038 # find last EOL
1039 $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1);
1040 # While uneven "
1041 } while((substr($buf,0,$last_newline_pos) =~ y/"/"/)%2
1042 and $last_newline_pos >= 0);
1043 # Chop at $last_newline_pos as that is where CSV record ends
1044 $anything_written +=
1045 write_record_to_pipe($chunk_number++,\$header,\$buf,
1046 $recstart,$recend,$last_newline_pos+1);
1047 shorten(\$buf,$last_newline_pos+1);
1048 } else {
1049 if($Global::max_number_of_args) {
1050 # -N => (start..*?end){n}
1051 my $i = 0;
1052 my $read_n_lines =
1053 $Global::max_number_of_args * ($Global::max_lines || 1);
1054 while(($i = nindex(\$buf,$recendrecstart,$read_n_lines)) != -1
1056 length $buf) {
1057 $i += length $recend; # find the actual splitting location
1058 $anything_written +=
1059 write_record_to_pipe($chunk_number++,\$header,\$buf,
1060 $recstart,$recend,$i);
1061 shorten(\$buf,$i);
1063 } else {
1064 eof($in) and last;
1065 # Find the last recend+recstart in $buf
1066 my $i = rindex64(\$buf,$recendrecstart);
1067 if($i != -1) {
1068 $i += length $recend; # find the actual splitting location
1069 $anything_written +=
1070 write_record_to_pipe($chunk_number++,\$header,\$buf,
1071 $recstart,$recend,$i);
1072 shorten(\$buf,$i);
1076 if(not $anything_written
1077 and not eof($in)
1078 and not $Global::no_autoexpand_block) {
1079 # Nothing was written - maybe the block size < record size?
1080 # Increase blocksize exponentially up to 2GB-1 (2GB causes problems)
1081 if($blocksize < $two_gb) {
1082 my $old_blocksize = $blocksize;
1083 $blocksize = ::min(ceil($blocksize * 1.3 + 1), $two_gb);
1084 ::warning("A record was longer than $old_blocksize. " .
1085 "Increasing to --blocksize $blocksize.");
1089 ::debug("init", "Done reading input\n");
1091 # If there is anything left in the buffer write it
1092 write_record_to_pipe($chunk_number++, \$header, \$buf, $recstart,
1093 $recend, length $buf);
1095 if($opt::retries) {
1096 $Global::no_more_input = 1;
1097 # We need to start no more jobs: At most we need to retry some
1098 # of the already running.
1099 my @running = values %Global::running;
1100 # Stop any virgins.
1101 for my $job (@running) {
1102 if(defined $job and $job->virgin()) {
1103 close $job->fh(0,"w");
1106 # Wait for running jobs to be done
1107 my $sleep =1;
1108 while($Global::total_running > 0) {
1109 $sleep = ::reap_usleep($sleep);
1110 start_more_jobs();
1113 $Global::start_no_new_jobs ||= 1;
1114 if($opt::roundrobin) {
1115 # Flush blocks to roundrobin procs
1116 my $sleep = 1;
1117 while(%Global::running) {
1118 my $something_written = 0;
1119 for my $job (values %Global::running) {
1120 if($job->block_length()) {
1121 $something_written += $job->non_blocking_write();
1122 } else {
1123 close $job->fh(0,"w");
1126 if($something_written) {
1127 $sleep = $sleep/2+0.001;
1129 $sleep = ::reap_usleep($sleep);
1134 sub recstartrecend() {
1135 # Uses:
1136 # $opt::recstart
1137 # $opt::recend
1138 # Returns:
1139 # $recstart,$recend with default values and regexp conversion
1140 my($recstart,$recend);
1141 if(defined($opt::recstart) and defined($opt::recend)) {
1142 # If both --recstart and --recend is given then both must match
1143 $recstart = $opt::recstart;
1144 $recend = $opt::recend;
1145 } elsif(defined($opt::recstart)) {
1146 # If --recstart is given it must match start of record
1147 $recstart = $opt::recstart;
1148 $recend = "";
1149 } elsif(defined($opt::recend)) {
1150 # If --recend is given then it must match end of record
1151 $recstart = "";
1152 $recend = $opt::recend;
1153 if($opt::regexp and $recend eq '') {
1154 # --regexp --recend ''
1155 $recend = '.';
1159 if($opt::regexp) {
1160 # If $recstart/$recend contains '|'
1161 # this should only apply to the regexp
1162 $recstart = "(?:".$recstart.")";
1163 $recend = "(?:".$recend.")";
1164 } else {
1165 # $recstart/$recend = printf strings (\n)
1166 $recstart =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee;
1167 $recend =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee;
1169 return ($recstart,$recend);
1172 sub nindex($$) {
1173 # See if string is in buffer N times
1174 # Returns:
1175 # the position where the Nth copy is found
1176 my ($buf_ref, $str, $n) = @_;
1177 my $i = 0;
1178 for(1..$n) {
1179 $i = index64($buf_ref,$str,$i+1);
1180 if($i == -1) { last }
1182 return $i;
1186 my @robin_queue;
1187 my $sleep = 1;
1189 sub round_robin_write($$$$$) {
1190 # Input:
1191 # $header_ref = ref to $header string
1192 # $block_ref = ref to $block to be written
1193 # $recstart = record start string
1194 # $recend = record end string
1195 # $endpos = end position of $block
1196 # Uses:
1197 # %Global::running
1198 # Returns:
1199 # $something_written = amount of bytes written
1200 my ($header_ref,$buffer_ref,$recstart,$recend,$endpos) = @_;
1201 my $written = 0;
1202 my $block_passed = 0;
1203 while(not $block_passed) {
1204 # Continue flushing existing buffers
1205 # until one is empty and a new block is passed
1206 if(@robin_queue) {
1207 # Rotate queue once so new blocks get a fair chance
1208 # to be given to another slot
1209 push @robin_queue, shift @robin_queue;
1210 } else {
1211 # Make a queue to spread the blocks evenly
1212 push @robin_queue, (sort { $a->seq() <=> $b->seq() }
1213 values %Global::running);
1215 do {
1216 $written = 0;
1217 for my $job (@robin_queue) {
1218 if($job->block_length() > 0) {
1219 $written += $job->non_blocking_write();
1220 } else {
1221 $job->set_block($header_ref, $buffer_ref,
1222 $endpos, $recstart, $recend);
1223 $block_passed = 1;
1224 $job->set_virgin(0);
1225 $written += $job->non_blocking_write();
1226 last;
1229 if($written) {
1230 $sleep = $sleep/1.5+0.001;
1232 # Don't sleep if something is written
1233 } while($written and not $block_passed);
1234 $sleep = ::reap_usleep($sleep);
1236 return $written;
1240 sub index64($$$) {
1241 # Do index on strings > 2GB.
1242 # index in Perl < v5.22 does not work for > 2GB
1243 # Input:
1244 # as index except STR which must be passed as a reference
1245 # Output:
1246 # as index
1247 my $ref = shift;
1248 my $match = shift;
1249 my $pos = shift || 0;
1250 my $block_size = 2**31-1;
1251 my $strlen = length($$ref);
1252 # No point in doing extra work if we don't need to.
1253 if($strlen < $block_size or $] > 5.022) {
1254 return index($$ref, $match, $pos);
1257 my $matchlen = length($match);
1258 my $ret;
1259 my $offset = $pos;
1260 while($offset < $strlen) {
1261 $ret = index(
1262 substr($$ref, $offset, $block_size),
1263 $match, $pos-$offset);
1264 if($ret != -1) {
1265 return $ret + $offset;
1267 $offset += ($block_size - $matchlen - 1);
1269 return -1;
1272 sub rindex64($@) {
1273 # Do rindex on strings > 2GB.
1274 # rindex in Perl < v5.22 does not work for > 2GB
1275 # Input:
1276 # as rindex except STR which must be passed as a reference
1277 # Output:
1278 # as rindex
1279 my $ref = shift;
1280 my $match = shift;
1281 my $pos = shift;
1282 my $block_size = 2**31-1;
1283 my $strlen = length($$ref);
1284 # Default: search from end
1285 $pos = defined $pos ? $pos : $strlen;
1286 # No point in doing extra work if we don't need to.
1287 if($strlen < $block_size) {
1288 return rindex($$ref, $match, $pos);
1291 my $matchlen = length($match);
1292 my $ret;
1293 my $offset = $pos - $block_size + $matchlen;
1294 if($offset < 0) {
1295 # The offset is less than a $block_size
1296 # Set the $offset to 0 and
1297 # Adjust block_size accordingly
1298 $block_size = $block_size + $offset;
1299 $offset = 0;
1301 while($offset >= 0) {
1302 $ret = rindex(
1303 substr($$ref, $offset, $block_size),
1304 $match);
1305 if($ret != -1) {
1306 return $ret + $offset;
1308 $offset -= ($block_size - $matchlen - 1);
1310 return -1;
1313 sub shorten($$) {
1314 # Do: substr($buf,0,$i) = "";
1315 # Some Perl versions do not support $i > 2GB, so do this in 2GB chunks
1316 # Input:
1317 # $buf_ref = \$buf
1318 # $i = position to shorten to
1319 # Returns: N/A
1320 my ($buf_ref, $i) = @_;
1321 my $two_gb = 2**31-1;
1322 while($i > $two_gb) {
1323 substr($$buf_ref,0,$two_gb) = "";
1324 $i -= $two_gb;
1326 substr($$buf_ref,0,$i) = "";
1329 sub write_record_to_pipe($$$$$$) {
1330 # Fork then
1331 # Write record from pos 0 .. $endpos to pipe
1332 # Input:
1333 # $chunk_number = sequence number - to see if already run
1334 # $header_ref = reference to header string to prepend
1335 # $buffer_ref = reference to record to write
1336 # $recstart = start string of record
1337 # $recend = end string of record
1338 # $endpos = position in $buffer_ref where record ends
1339 # Uses:
1340 # $Global::job_already_run
1341 # $opt::roundrobin
1342 # @Global::virgin_jobs
1343 # Returns:
1344 # Number of chunks written (0 or 1)
1345 my ($chunk_number, $header_ref, $buffer_ref,
1346 $recstart, $recend, $endpos) = @_;
1347 if($endpos == 0) { return 0; }
1348 if(vec($Global::job_already_run,$chunk_number,1)) { return 1; }
1349 if($opt::roundrobin) {
1350 # Write the block to one of the already running jobs
1351 return round_robin_write($header_ref, $buffer_ref,
1352 $recstart, $recend, $endpos);
1354 # If no virgin found, backoff
1355 my $sleep = 0.0001; # 0.01 ms - better performance on highend
1356 while(not @Global::virgin_jobs) {
1357 ::debug("pipe", "No virgin jobs");
1358 $sleep = ::reap_usleep($sleep);
1359 # Jobs may not be started because of loadavg
1360 # or too little time between each ssh login
1361 # or retrying failed jobs.
1362 start_more_jobs();
1364 my $job = shift @Global::virgin_jobs;
1365 # Job is no longer virgin
1366 $job->set_virgin(0);
1368 if($opt::retries) {
1369 # Copy $buffer[0..$endpos] to $job->{'block'}
1370 # Remove rec_sep
1371 # Run $job->add_transfersize
1372 $job->set_block($header_ref, $buffer_ref, $endpos,
1373 $recstart, $recend);
1374 if(fork()) {
1375 # Skip
1376 } else {
1377 $job->write($job->block_ref());
1378 close $job->fh(0,"w");
1379 exit(0);
1381 } else {
1382 # We ignore the removed rec_sep which is technically wrong.
1383 $job->add_transfersize($endpos + length $$header_ref);
1384 if(fork()) {
1385 # Skip
1386 } else {
1387 # Chop of at $endpos as we do not know how many rec_sep will
1388 # be removed.
1389 substr($$buffer_ref,$endpos,length $$buffer_ref) = "";
1390 # Remove rec_sep
1391 if($opt::remove_rec_sep) {
1392 Job::remove_rec_sep($buffer_ref, $recstart, $recend);
1394 $job->write($header_ref);
1395 $job->write($buffer_ref);
1396 close $job->fh(0,"w");
1397 exit(0);
1400 close $job->fh(0,"w");
1401 return 1;
1405 sub __SEM_MODE__() {}
1408 sub acquire_semaphore() {
1409 # Acquires semaphore. If needed: spawns to the background
1410 # Uses:
1411 # @Global::host
1412 # Returns:
1413 # The semaphore to be released when jobs is complete
1414 $Global::host{':'} = SSHLogin->new(":");
1415 my $sem = Semaphore->new($Semaphore::name,
1416 $Global::host{':'}->max_jobs_running());
1417 $sem->acquire();
1418 if($Semaphore::fg) {
1419 # skip
1420 } else {
1421 if(fork()) {
1422 exit(0);
1423 } else {
1424 # If run in the background, the PID will change
1425 $sem->pid_change();
1428 return $sem;
1432 sub __PARSE_OPTIONS__() {}
1435 sub options_hash() {
1436 # Returns:
1437 # %hash = the GetOptions config
1438 return
1439 ("debug|D=s" => \$opt::D,
1440 "xargs" => \$opt::xargs,
1441 "m" => \$opt::m,
1442 "X" => \$opt::X,
1443 "v" => \@opt::v,
1444 "sql=s" => \$opt::retired,
1445 "sqlmaster=s" => \$opt::sqlmaster,
1446 "sqlworker=s" => \$opt::sqlworker,
1447 "sqlandworker=s" => \$opt::sqlandworker,
1448 "joblog|jl=s" => \$opt::joblog,
1449 "results|result|res=s" => \$opt::results,
1450 "resume" => \$opt::resume,
1451 "resume-failed|resumefailed" => \$opt::resume_failed,
1452 "retry-failed|retryfailed" => \$opt::retry_failed,
1453 "silent" => \$opt::silent,
1454 "keep-order|keeporder|k" => \$opt::keeporder,
1455 "no-keep-order|nokeeporder|nok|no-k" => \$opt::nokeeporder,
1456 "group" => \$opt::group,
1457 "g" => \$opt::retired,
1458 "ungroup|u" => \$opt::ungroup,
1459 "linebuffer|linebuffered|line-buffer|line-buffered|lb"
1460 => \$opt::linebuffer,
1461 "tmux" => \$opt::tmux,
1462 "tmuxpane" => \$opt::tmuxpane,
1463 "null|0" => \$opt::null,
1464 "quote|q" => \$opt::quote,
1465 # Replacement strings
1466 "parens=s" => \$opt::parens,
1467 "rpl=s" => \@opt::rpl,
1468 "plus" => \$opt::plus,
1469 "I=s" => \$opt::I,
1470 "extensionreplace|er=s" => \$opt::U,
1471 "U=s" => \$opt::retired,
1472 "basenamereplace|bnr=s" => \$opt::basenamereplace,
1473 "dirnamereplace|dnr=s" => \$opt::dirnamereplace,
1474 "basenameextensionreplace|bner=s" => \$opt::basenameextensionreplace,
1475 "seqreplace=s" => \$opt::seqreplace,
1476 "slotreplace=s" => \$opt::slotreplace,
1477 "jobs|j=s" => \$opt::jobs,
1478 "delay=s" => \$opt::delay,
1479 "sshdelay=f" => \$opt::sshdelay,
1480 "load=s" => \$opt::load,
1481 "noswap" => \$opt::noswap,
1482 "max-line-length-allowed" => \$opt::max_line_length_allowed,
1483 "number-of-cpus" => \$opt::number_of_cpus,
1484 "number-of-sockets" => \$opt::number_of_sockets,
1485 "number-of-cores" => \$opt::number_of_cores,
1486 "number-of-threads" => \$opt::number_of_threads,
1487 "use-sockets-instead-of-threads"
1488 => \$opt::use_sockets_instead_of_threads,
1489 "use-cores-instead-of-threads"
1490 => \$opt::use_cores_instead_of_threads,
1491 "use-cpus-instead-of-cores" => \$opt::use_cpus_instead_of_cores,
1492 "shellquote|shell_quote|shell-quote" => \@opt::shellquote,
1493 "nice=i" => \$opt::nice,
1494 "tag" => \$opt::tag,
1495 "tagstring|tag-string=s" => \$opt::tagstring,
1496 "onall" => \$opt::onall,
1497 "nonall" => \$opt::nonall,
1498 "filter-hosts|filterhosts|filter-host" => \$opt::filter_hosts,
1499 "sshlogin|S=s" => \@opt::sshlogin,
1500 "sshloginfile|slf=s" => \@opt::sshloginfile,
1501 "controlmaster|M" => \$opt::controlmaster,
1502 "ssh=s" => \$opt::ssh,
1503 "transfer-file|transferfile|transfer-files|transferfiles|tf=s"
1504 => \@opt::transfer_files,
1505 "return=s" => \@opt::return,
1506 "trc=s" => \@opt::trc,
1507 "transfer" => \$opt::transfer,
1508 "cleanup" => \$opt::cleanup,
1509 "basefile|bf=s" => \@opt::basefile,
1510 "B=s" => \$opt::retired,
1511 "ctrlc|ctrl-c" => \$opt::retired,
1512 "noctrlc|no-ctrlc|no-ctrl-c" => \$opt::retired,
1513 "workdir|work-dir|wd=s" => \$opt::workdir,
1514 "W=s" => \$opt::retired,
1515 "rsync-opts|rsyncopts=s" => \$opt::rsync_opts,
1516 "tmpdir|tempdir=s" => \$opt::tmpdir,
1517 "use-compress-program|compress-program=s" => \$opt::compress_program,
1518 "use-decompress-program|decompress-program=s"
1519 => \$opt::decompress_program,
1520 "compress" => \$opt::compress,
1521 "tty" => \$opt::tty,
1522 "T" => \$opt::retired,
1523 "H=i" => \$opt::retired,
1524 "dry-run|dryrun|dr" => \$opt::dryrun,
1525 "progress" => \$opt::progress,
1526 "eta" => \$opt::eta,
1527 "bar" => \$opt::bar,
1528 "shuf" => \$opt::shuf,
1529 "arg-sep|argsep=s" => \$opt::arg_sep,
1530 "arg-file-sep|argfilesep=s" => \$opt::arg_file_sep,
1531 "trim=s" => \$opt::trim,
1532 "env=s" => \@opt::env,
1533 "recordenv|record-env" => \$opt::record_env,
1534 "session" => \$opt::session,
1535 "plain" => \$opt::plain,
1536 "profile|J=s" => \@opt::profile,
1537 "pipe|spreadstdin" => \$opt::pipe,
1538 "robin|round-robin|roundrobin" => \$opt::roundrobin,
1539 "recstart=s" => \$opt::recstart,
1540 "recend=s" => \$opt::recend,
1541 "regexp|regex" => \$opt::regexp,
1542 "remove-rec-sep|removerecsep|rrs" => \$opt::remove_rec_sep,
1543 "files|output-as-files|outputasfiles" => \$opt::files,
1544 "block|block-size|blocksize=s" => \$opt::blocksize,
1545 "tollef" => \$opt::tollef,
1546 "gnu" => \$opt::gnu,
1547 "link|xapply" => \$opt::link,
1548 "linkinputsource|xapplyinputsource=i" => \@opt::linkinputsource,
1549 # Before changing this line, please read
1550 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
1551 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
1552 "bibtex|citation" => \$opt::citation,
1553 "wc|willcite|will-cite|nn|nonotice|no-notice" => \$opt::willcite,
1554 # Termination and retries
1555 "halt-on-error|halt=s" => \$opt::halt,
1556 "limit=s" => \$opt::limit,
1557 "memfree=s" => \$opt::memfree,
1558 "retries=s" => \$opt::retries,
1559 "timeout=s" => \$opt::timeout,
1560 "termseq|term-seq=s" => \$opt::termseq,
1561 # xargs-compatibility - implemented, man, testsuite
1562 "max-procs|P=s" => \$opt::jobs,
1563 "delimiter|d=s" => \$opt::d,
1564 "max-chars|s=i" => \$opt::max_chars,
1565 "arg-file|a=s" => \@opt::a,
1566 "no-run-if-empty|r" => \$opt::r,
1567 "replace|i:s" => \$opt::i,
1568 "E=s" => \$opt::eof,
1569 "eof|e:s" => \$opt::eof,
1570 "max-args|maxargs|n=i" => \$opt::max_args,
1571 "max-replace-args|N=i" => \$opt::max_replace_args,
1572 "colsep|col-sep|C=s" => \$opt::colsep,
1573 "csv"=> \$opt::csv,
1574 "help|h" => \$opt::help,
1575 "L=f" => \$opt::L,
1576 "max-lines|l:f" => \$opt::max_lines,
1577 "interactive|p" => \$opt::interactive,
1578 "verbose|t" => \$opt::verbose,
1579 "version|V" => \$opt::version,
1580 "minversion|min-version=i" => \$opt::minversion,
1581 "show-limits|showlimits" => \$opt::show_limits,
1582 "exit|x" => \$opt::x,
1583 # Semaphore
1584 "semaphore" => \$opt::semaphore,
1585 "semaphoretimeout|st=i" => \$opt::semaphoretimeout,
1586 "semaphorename|id=s" => \$opt::semaphorename,
1587 "fg" => \$opt::fg,
1588 "bg" => \$opt::bg,
1589 "wait" => \$opt::wait,
1590 # Shebang #!/usr/bin/parallel --shebang
1591 "shebang|hashbang" => \$opt::shebang,
1592 "internal-pipe-means-argfiles"
1593 => \$opt::internal_pipe_means_argfiles,
1594 "Y" => \$opt::retired,
1595 "skip-first-line" => \$opt::skip_first_line,
1596 "bug" => \$opt::bug,
1597 "header=s" => \$opt::header,
1598 "cat" => \$opt::cat,
1599 "fifo" => \$opt::fifo,
1600 "pipepart|pipe-part" => \$opt::pipepart,
1601 "tee" => \$opt::tee,
1602 "shard=s" => \$opt::shard,
1603 "bin=s" => \$opt::bin,
1604 "groupby|group-by=s" => \$opt::groupby,
1605 "hgrp|hostgrp|hostgroup|hostgroups" => \$opt::hostgroups,
1606 "embed" => \$opt::embed,
1610 sub get_options_from_array($@) {
1611 # Run GetOptions on @array
1612 # Input:
1613 # $array_ref = ref to @ARGV to parse
1614 # @keep_only = Keep only these options
1615 # Uses:
1616 # @ARGV
1617 # Returns:
1618 # true if parsing worked
1619 # false if parsing failed
1620 # @$array_ref is changed
1621 my ($array_ref, @keep_only) = @_;
1622 if(not @$array_ref) {
1623 # Empty array: No need to look more at that
1624 return 1;
1626 # A bit of shuffling of @ARGV needed as GetOptionsFromArray is not
1627 # supported everywhere
1628 my @save_argv;
1629 my $this_is_ARGV = (\@::ARGV == $array_ref);
1630 if(not $this_is_ARGV) {
1631 @save_argv = @::ARGV;
1632 @::ARGV = @{$array_ref};
1634 # If @keep_only set: Ignore all values except @keep_only
1635 my %options = options_hash();
1636 if(@keep_only) {
1637 my (%keep,@dummy);
1638 @keep{@keep_only} = @keep_only;
1639 for my $k (grep { not $keep{$_} } keys %options) {
1640 # Store the value of the option in @dummy
1641 $options{$k} = \@dummy;
1644 my $retval = GetOptions(%options);
1645 if(not $this_is_ARGV) {
1646 @{$array_ref} = @::ARGV;
1647 @::ARGV = @save_argv;
1649 return $retval;
1652 sub parse_options(@) {
1653 # Returns: N/A
1654 init_globals();
1655 my @argv_before = @ARGV;
1656 @ARGV = read_options();
1658 # Before changing this line, please read
1659 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
1660 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
1661 if(defined $opt::citation) {
1662 citation(\@argv_before,\@ARGV);
1663 wait_and_exit(0);
1665 # no-* overrides *
1666 if($opt::nokeeporder) { $opt::keeporder = undef; }
1668 if(@opt::v) { $Global::verbose = $#opt::v+1; } # Convert -v -v to v=2
1669 if($opt::bug) { ::die_bug("test-bug"); }
1670 $Global::debug = $opt::D;
1671 $Global::shell = $ENV{'PARALLEL_SHELL'} || parent_shell($$)
1672 || $ENV{'SHELL'} || "/bin/sh";
1673 if(not -x $Global::shell and not which($Global::shell)) {
1674 ::error("Shell '$Global::shell' not found.");
1675 wait_and_exit(255);
1677 ::debug("init","Global::shell $Global::shell\n");
1678 $Global::cshell = $Global::shell =~ m:(/[-a-z]*)?csh:;
1679 if(defined $opt::X) { $Global::ContextReplace = 1; }
1680 if(defined $opt::silent) { $Global::verbose = 0; }
1681 if(defined $opt::null) { $/ = "\0"; }
1682 if(defined $opt::d) { $/ = unquote_printf($opt::d) }
1683 parse_replacement_string_options();
1684 if(defined $opt::tagstring) {
1685 $opt::tagstring = unquote_printf($opt::tagstring);
1686 if($opt::tagstring =~ /\Q$Global::parensleft\E.*\Q$Global::parensright\E/
1688 $opt::linebuffer) {
1689 # --tagstring contains {= =} and --linebuffer =>
1690 # recompute replacement string for each use (do not cache)
1691 $Global::cache_replacement_eval = 0;
1694 if(defined $opt::interactive) { $Global::interactive = $opt::interactive; }
1695 if(defined $opt::quote) { $Global::quoting = 1; }
1696 if(defined $opt::r) { $Global::ignore_empty = 1; }
1697 if(defined $opt::verbose) { $Global::stderr_verbose = 1; }
1698 if(defined $opt::eof) { $Global::end_of_file_string = $opt::eof; }
1699 if(defined $opt::max_args) {
1700 $Global::max_number_of_args = $opt::max_args;
1702 if(defined $opt::timeout) {
1703 $Global::timeoutq = TimeoutQueue->new($opt::timeout);
1705 if(defined $opt::tmpdir) { $ENV{'TMPDIR'} = $opt::tmpdir; }
1706 $ENV{'PARALLEL_RSYNC_OPTS'} = $opt::rsync_opts ||
1707 $ENV{'PARALLEL_RSYNC_OPTS'} || '-rlDzR';
1708 # Default: Same nice level as GNU Parallel is started at
1709 $opt::nice ||= eval { getpriority(0,0) } || 0;
1710 if(defined $opt::help) { usage(); exit(0); }
1711 if(defined $opt::embed) { embed(); exit(0); }
1712 if(defined $opt::sqlandworker) {
1713 $opt::sqlmaster = $opt::sqlworker = $opt::sqlandworker;
1715 if(defined $opt::tmuxpane) { $opt::tmux = $opt::tmuxpane; }
1716 if(defined $opt::colsep) { $Global::trim = 'lr'; }
1717 if(defined $opt::csv) {
1718 $Global::use{"Text::CSV"} ||= eval "use Text::CSV; 1;";
1719 $opt::colsep = defined $opt::colsep ? $opt::colsep : ",";
1720 my $csv_setting = { binary => 1, sep_char => $opt::colsep };
1721 my $sep = $csv_setting->{sep_char};
1722 $Global::csv = Text::CSV->new($csv_setting)
1723 or die "Cannot use CSV: ".Text::CSV->error_diag ();
1725 if(defined $opt::header) {
1726 $opt::colsep = defined $opt::colsep ? $opt::colsep : "\t";
1728 if(defined $opt::trim) { $Global::trim = $opt::trim; }
1729 if(defined $opt::arg_sep) { $Global::arg_sep = $opt::arg_sep; }
1730 if(defined $opt::arg_file_sep) {
1731 $Global::arg_file_sep = $opt::arg_file_sep;
1733 if(defined $opt::number_of_sockets) {
1734 print SSHLogin::no_of_sockets(),"\n"; wait_and_exit(0);
1736 if(defined $opt::number_of_cpus) {
1737 print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0);
1739 if(defined $opt::number_of_cores) {
1740 print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0);
1742 if(defined $opt::number_of_threads) {
1743 print SSHLogin::no_of_threads(),"\n"; wait_and_exit(0);
1745 if(defined $opt::max_line_length_allowed) {
1746 print Limits::Command::real_max_length(),"\n"; wait_and_exit(0);
1748 if(defined $opt::version) { version(); wait_and_exit(0); }
1749 if(defined $opt::record_env) { record_env(); wait_and_exit(0); }
1750 if(defined $opt::show_limits) { show_limits(); }
1751 if(@opt::sshlogin) { @Global::sshlogin = @opt::sshlogin; }
1752 if(@opt::sshloginfile) { read_sshloginfiles(@opt::sshloginfile); }
1753 if(@opt::return) { push @Global::ret_files, @opt::return; }
1754 if($opt::transfer) {
1755 push @Global::transfer_files, $opt::i || $opt::I || "{}";
1757 push @Global::transfer_files, @opt::transfer_files;
1758 if(not defined $opt::recstart and
1759 not defined $opt::recend) { $opt::recend = "\n"; }
1760 $Global::blocksize = multiply_binary_prefix($opt::blocksize || "1M");
1761 if($Global::blocksize > 2**31-1 and not $opt::pipepart) {
1762 warning("--blocksize >= 2G causes problems. Using 2G-1.");
1763 $Global::blocksize = 2**31-1;
1765 if($^O eq "cygwin" and
1766 ($opt::pipe or $opt::pipepart or $opt::roundrobin)
1767 and $Global::blocksize > 65535) {
1768 warning("--blocksize >= 64K causes problems on Cygwin.");
1770 $opt::memfree = multiply_binary_prefix($opt::memfree);
1771 check_invalid_option_combinations();
1772 if((defined $opt::fifo or defined $opt::cat)
1773 and not $opt::pipepart) {
1774 $opt::pipe = 1;
1776 if(defined $opt::minversion) {
1777 print $Global::version,"\n";
1778 if($Global::version < $opt::minversion) {
1779 wait_and_exit(255);
1780 } else {
1781 wait_and_exit(0);
1784 if(not defined $opt::delay) {
1785 # Set --delay to --sshdelay if not set
1786 $opt::delay = $opt::sshdelay;
1788 $opt::delay = multiply_time_units($opt::delay);
1789 if($opt::compress_program) {
1790 $opt::compress = 1;
1791 $opt::decompress_program ||= $opt::compress_program." -dc";
1794 if(defined $opt::results) {
1795 # Is the output a dir or CSV-file?
1796 if($opt::results =~ /\.csv$/i) {
1797 # CSV with , as separator
1798 $Global::csvsep = ",";
1799 $Global::membuffer ||= 1;
1800 } elsif($opt::results =~ /\.tsv$/i) {
1801 # CSV with TAB as separator
1802 $Global::csvsep = "\t";
1803 $Global::membuffer ||= 1;
1806 if($opt::compress) {
1807 my ($compress, $decompress) = find_compression_program();
1808 $opt::compress_program ||= $compress;
1809 $opt::decompress_program ||= $decompress;
1810 if(($opt::results and not $Global::csvsep) or $opt::files) {
1811 # No need for decompressing
1812 $opt::decompress_program = "cat >/dev/null";
1815 if(defined $opt::dryrun) {
1816 # Force grouping due to bug #51039: --dry-run --timeout 3600 -u breaks
1817 $opt::ungroup = 0;
1818 $opt::group = 1;
1820 if(defined $opt::nonall) {
1821 # Append a dummy empty argument if there are no arguments
1822 # on the command line to avoid reading from STDIN.
1823 # arg_sep = random 50 char
1824 # \0noarg => nothing (not the empty string)
1825 $Global::arg_sep = join "",
1826 map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..50);
1827 push @ARGV, $Global::arg_sep, "\0noarg";
1829 if(defined $opt::tee) {
1830 if(not defined $opt::jobs) {
1831 $opt::jobs = 0;
1834 if(defined $opt::tty) {
1835 # Defaults for --tty: -j1 -u
1836 # Can be overridden with -jXXX -g
1837 if(not defined $opt::jobs) {
1838 $opt::jobs = 1;
1840 if(not defined $opt::group) {
1841 $opt::ungroup = 1;
1844 if(@opt::trc) {
1845 push @Global::ret_files, @opt::trc;
1846 if(not @Global::transfer_files) {
1847 # Defaults to --transferfile {}
1848 push @Global::transfer_files, $opt::i || $opt::I || "{}";
1850 $opt::cleanup = 1;
1852 if(defined $opt::max_lines) {
1853 if($opt::max_lines eq "-0") {
1854 # -l -0 (swallowed -0)
1855 $opt::max_lines = 1;
1856 $opt::null = 1;
1857 $/ = "\0";
1858 } elsif ($opt::max_lines == 0) {
1859 # If not given (or if 0 is given) => 1
1860 $opt::max_lines = 1;
1862 $Global::max_lines = $opt::max_lines;
1863 if(not $opt::pipe) {
1864 # --pipe -L means length of record - not max_number_of_args
1865 $Global::max_number_of_args ||= $Global::max_lines;
1869 # Read more than one arg at a time (-L, -N)
1870 if(defined $opt::L) {
1871 $Global::max_lines = $opt::L;
1872 if(not $opt::pipe) {
1873 # --pipe -L means length of record - not max_number_of_args
1874 $Global::max_number_of_args ||= $Global::max_lines;
1877 if(defined $opt::max_replace_args) {
1878 $Global::max_number_of_args = $opt::max_replace_args;
1879 $Global::ContextReplace = 1;
1881 if((defined $opt::L or defined $opt::max_replace_args)
1883 not ($opt::xargs or $opt::m)) {
1884 $Global::ContextReplace = 1;
1886 if(defined $opt::tag and not defined $opt::tagstring) {
1887 # Default = {}
1888 $opt::tagstring = $Global::parensleft.$Global::parensright;
1890 if(grep /^$Global::arg_sep\+?$|^$Global::arg_file_sep\+?$/o, @ARGV) {
1891 # Deal with ::: :::+ :::: and ::::+
1892 @ARGV = read_args_from_command_line();
1894 parse_semaphore();
1896 if(defined $opt::eta) { $opt::progress = $opt::eta; }
1897 if(defined $opt::bar) { $opt::progress = $opt::bar; }
1899 # Funding a free software project is hard. GNU Parallel is no
1900 # exception. On top of that it seems the less visible a project
1901 # is, the harder it is to get funding. And the nature of GNU
1902 # Parallel is that it will never be seen by "the guy with the
1903 # checkbook", but only by the people doing the actual work.
1905 # This problem has been covered by others - though no solution has
1906 # been found:
1907 # https://www.slideshare.net/NadiaEghbal/consider-the-maintainer
1908 # https://www.numfocus.org/blog/why-is-numpy-only-now-getting-funded/
1910 # If you want GNU Parallel to be maintained in the future, and not
1911 # just wither away like so many other free software tools, you
1912 # need to help finance the development.
1914 # The citation notice is a simple way of doing so, as citations
1915 # makes it possible to me to get a job where I can maintain GNU
1916 # Parallel as part of the job.
1918 # This means you can help financing development
1920 # WITHOUT PAYING A SINGLE CENT!
1922 # Before implementing the citation notice it was discussed with
1923 # the users:
1924 # https://lists.gnu.org/archive/html/parallel/2013-11/msg00006.html
1926 # Having to spend 10 seconds on running 'parallel --citation' once
1927 # is no doubt not an ideal solution, but no one has so far come up
1928 # with an ideal solution - neither for funding GNU Parallel nor
1929 # other free software.
1931 # If you believe you have the perfect solution, you should try it
1932 # out, and if it works, you should post it on the email
1933 # list. Ideas that will cost work and which have not been tested
1934 # are, however, unlikely to be prioritized.
1936 # Please note that GPL version 3 gives you the right to fork GNU
1937 # Parallel under a new name, but it does not give you the right to
1938 # distribute modified copies with the citation notice disabled in
1939 # a way where the software can be confused with GNU Parallel. To
1940 # do that you need to be the owner of the GNU Parallel
1941 # trademark. The xt:Commerce case shows this.
1943 # Description of the xt:Commerce case in OLG Duesseldorf
1944 # http://www.inta.org/INTABulletin/Pages/GERMANYGeneralPublicLicenseDoesNotPermitUseofThird-PartyTrademarksforAdvertisingModifiedVersionsofOpen-SourceSoftware.aspx
1945 # https://web.archive.org/web/20180715073746/http://www.inta.org/INTABulletin/Pages/GERMANYGeneralPublicLicenseDoesNotPermitUseofThird-PartyTrademarksforAdvertisingModifiedVersionsofOpen-SourceSoftware.aspx
1947 # The verdict in German
1948 # https://www.admody.com/urteilsdatenbank/cafe6fdaeed3/OLG-Duesseldorf_Urteil_vom_28-September-2010_Az_I-20-U-41-09
1949 # https://web.archive.org/web/20180715073717/https://www.admody.com/urteilsdatenbank/cafe6fdaeed3/OLG-Duesseldorf_Urteil_vom_28-September-2010_Az_I-20-U-41-09
1951 # Other free software limiting derivates by the same name
1952 # https://en.wikipedia.org/wiki/Red_Hat_Enterprise_Linux_derivatives#Legal_aspects
1953 # https://tm.joomla.org/trademark-faq.html
1954 # https://www.mozilla.org/en-US/foundation/trademarks/faq/
1956 # Running 'parallel --citation' one single time takes less than 10
1957 # seconds, and will silence the citation notice for future
1958 # runs. If that is too much trouble for you, why not use one of
1959 # the alternatives instead?
1960 # See a list in: 'man parallel_alternatives'
1962 # If you want GNU Parallel to be maintained in the future keep
1963 # this line.
1964 citation_notice();
1965 # Seriously: *You* will be harming free software by removing the
1966 # notice. You make it harder to justify spending time developing
1967 # it. If you *do* remove the line, please email
1968 # hallofshame@tange.dk if you want to avoid being put in a hall of
1969 # shame.
1971 parse_halt();
1973 if($ENV{'PARALLEL_ENV'}) {
1974 # Read environment and set $Global::parallel_env
1975 # Must be done before is_acceptable_command_line_length()
1976 my $penv = $ENV{'PARALLEL_ENV'};
1977 # unset $PARALLEL_ENV: It should not be given to children
1978 # because it takes up a lot of env space
1979 delete $ENV{'PARALLEL_ENV'};
1980 if(-e $penv) {
1981 # This is a file/fifo: Replace envvar with content of file
1982 open(my $parallel_env, "<", $penv) ||
1983 ::die_bug("Cannot read parallel_env from $penv");
1984 local $/; # Put <> in slurp mode
1985 $penv = <$parallel_env>;
1986 close $parallel_env;
1988 # Map \001 to \n to make it easer to quote \n in $PARALLEL_ENV
1989 $penv =~ s/\001/\n/g;
1990 if($penv =~ /\0/) {
1991 ::warning('\0 (NUL) in environment is not supported');
1993 $Global::parallel_env = $penv;
1996 parse_sshlogin();
1998 if(remote_hosts() and ($opt::X or $opt::m or $opt::xargs)) {
1999 # As we do not know the max line length on the remote machine
2000 # long commands generated by xargs may fail
2001 # If $opt::max_replace_args is set, it is probably safe
2002 ::warning("Using -X or -m with --sshlogin may fail.");
2005 if(not defined $opt::jobs) { $opt::jobs = "100%"; }
2006 open_joblog();
2007 open_csv();
2008 if($opt::sqlmaster or $opt::sqlworker) {
2009 $Global::sql = SQL->new($opt::sqlmaster || $opt::sqlworker);
2011 if($opt::sqlworker) { $Global::membuffer ||= 1; }
2014 sub check_invalid_option_combinations() {
2015 if(defined $opt::timeout and
2016 $opt::timeout !~ /^\d+(\.\d+)?%?$|^(\d+(\.\d+)?[dhms])+$/i) {
2017 ::error("--timeout must be seconds or percentage.");
2018 wait_and_exit(255);
2020 if(defined $opt::fifo and defined $opt::cat) {
2021 ::error("--fifo cannot be combined with --cat.");
2022 ::wait_and_exit(255);
2024 if(defined $opt::retries and defined $opt::roundrobin) {
2025 ::error("--retries cannot be combined with --roundrobin.");
2026 ::wait_and_exit(255);
2028 if(defined $opt::pipepart and
2029 (defined $opt::L or defined $opt::max_lines
2030 or defined $opt::max_replace_args)) {
2031 ::error("--pipepart is incompatible with --max-replace-args, ".
2032 "--max-lines, and -L.");
2033 wait_and_exit(255);
2035 if(defined $opt::group and $opt::ungroup) {
2036 ::error("--group cannot be combined with --ungroup.");
2037 ::wait_and_exit(255);
2039 if(defined $opt::group and $opt::linebuffer) {
2040 ::error("--group cannot be combined with --line-buffer.");
2041 ::wait_and_exit(255);
2043 if(defined $opt::ungroup and $opt::linebuffer) {
2044 ::error("--ungroup cannot be combined with --line-buffer.");
2045 ::wait_and_exit(255);
2047 if(defined $opt::tollef and not $opt::gnu) {
2048 ::error("--tollef has been retired.",
2049 "Remove --tollef or use --gnu to override --tollef.");
2050 ::wait_and_exit(255);
2052 if(defined $opt::retired) {
2053 ::error("-g has been retired. Use --group.",
2054 "-B has been retired. Use --bf.",
2055 "-T has been retired. Use --tty.",
2056 "-U has been retired. Use --er.",
2057 "-W has been retired. Use --wd.",
2058 "-Y has been retired. Use --shebang.",
2059 "-H has been retired. Use --halt.",
2060 "--sql has been retired. Use --sqlmaster.",
2061 "--ctrlc has been retired.",
2062 "--noctrlc has been retired.");
2063 ::wait_and_exit(255);
2065 if($opt::groupby) {
2066 if(not $opt::pipe and not $opt::pipepart) {
2067 $opt::pipe = 1;
2069 if($opt::remove_rec_sep) {
2070 ::error("--remove-rec-sep is not compatible with --groupby");
2071 ::wait_and_exit(255);
2073 if($opt::recstart) {
2074 ::error("--recstart is not compatible with --groupby");
2075 ::wait_and_exit(255);
2077 if($opt::recend ne "\n") {
2078 ::error("--recend is not compatible with --groupby");
2079 ::wait_and_exit(255);
2084 sub init_globals() {
2085 # Defaults:
2086 $Global::version = 20191023;
2087 $Global::progname = 'parallel';
2088 $::name = "GNU Parallel";
2089 $Global::infinity = 2**31;
2090 $Global::debug = 0;
2091 $Global::verbose = 0;
2092 # Don't quote every part of the command line
2093 $Global::quoting = 0;
2094 # Quote replacement strings
2095 $Global::quote_replace = 1;
2096 $Global::total_completed = 0;
2097 $Global::cache_replacement_eval = 1;
2098 # Read only table with default --rpl values
2099 %Global::replace =
2101 '{}' => '',
2102 '{#}' => '1 $_=$job->seq()',
2103 '{%}' => '1 $_=$job->slot()',
2104 '{/}' => 's:.*/::',
2105 '{//}' =>
2106 ('$Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; '.
2107 '$_ = dirname($_);'),
2108 '{/.}' => 's:.*/::; s:\.[^/.]+$::;',
2109 '{.}' => 's:\.[^/.]+$::',
2111 %Global::plus =
2113 # {} = {+/}/{/}
2114 # = {.}.{+.} = {+/}/{/.}.{+.}
2115 # = {..}.{+..} = {+/}/{/..}.{+..}
2116 # = {...}.{+...} = {+/}/{/...}.{+...}
2117 '{+/}' => 's:/[^/]*$::',
2118 '{+.}' => 's:.*\.::',
2119 '{+..}' => 's:.*\.([^.]*\.):$1:',
2120 '{+...}' => 's:.*\.([^.]*\.[^.]*\.):$1:',
2121 '{..}' => 's:\.[^/.]+$::; s:\.[^/.]+$::',
2122 '{...}' => 's:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::',
2123 '{/..}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::',
2124 '{/...}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::',
2125 '{choose_k}' => 'for $t (2..$#arg){ if($arg[$t-1] ge $arg[$t]) { skip() } }',
2126 # {##} = number of jobs
2127 '{##}' => '$_=total_jobs()',
2128 # Bash ${a:-myval}
2129 '{:-([^}]+?)}' => '$_ ||= $$1',
2130 # Bash ${a:2}
2131 '{:(\d+?)}' => 'substr($_,0,$$1) = ""',
2132 # Bash ${a:2:3}
2133 '{:(\d+?):(\d+?)}' => '$_ = substr($_,$$1,$$2);',
2134 # Bash ${a#bc}
2135 '{#([^#}][^}]*?)}' => 's/^$$1//;',
2136 # Bash ${a%def}
2137 '{%([^}]+?)}' => 's/$$1$//;',
2138 # Bash ${a/def/ghi} ${a/def/}
2139 '{/([^}]+?)/([^}]*?)}' => 's/$$1/$$2/;',
2140 # Bash ${a^a}
2141 '{^([^}]+?)}' => 's/^($$1)/uc($1)/e;',
2142 # Bash ${a^^a}
2143 '{^^([^}]+?)}' => 's/($$1)/uc($1)/eg;',
2144 # Bash ${a,A}
2145 '{,([^}]+?)}' => 's/^($$1)/lc($1)/e;',
2146 # Bash ${a,,A}
2147 '{,,([^}]+?)}' => 's/($$1)/lc($1)/eg;',
2149 # Modifiable copy of %Global::replace
2150 %Global::rpl = %Global::replace;
2151 $/ = "\n";
2152 $Global::ignore_empty = 0;
2153 $Global::interactive = 0;
2154 $Global::stderr_verbose = 0;
2155 $Global::default_simultaneous_sshlogins = 9;
2156 $Global::exitstatus = 0;
2157 $Global::arg_sep = ":::";
2158 $Global::arg_file_sep = "::::";
2159 $Global::trim = 'n';
2160 $Global::max_jobs_running = 0;
2161 $Global::job_already_run = '';
2162 $ENV{'TMPDIR'} ||= "/tmp";
2163 $ENV{'OLDPWD'} = $ENV{'PWD'};
2164 if(not $ENV{HOME}) {
2165 # $ENV{HOME} is sometimes not set if called from PHP
2166 ::warning("\$HOME not set. Using /tmp.");
2167 $ENV{HOME} = "/tmp";
2169 # no warnings to allow for undefined $XDG_*
2170 no warnings 'uninitialized';
2171 # $xdg_config_home is needed to make env_parallel.fish stop complaining
2172 my $xdg_config_home = $ENV{'XDG_CONFIG_HOME'};
2173 # config_dirs = $PARALLEL_HOME, $XDG_CONFIG_HOME/parallel,
2174 # $(each XDG_CONFIG_DIRS)/parallel, $HOME/.parallel
2175 # Keep only dirs that exist
2176 @Global::config_dirs =
2177 (grep { -d $_ }
2178 $ENV{'PARALLEL_HOME'},
2179 (map { "$_/parallel" }
2180 $xdg_config_home,
2181 split /:/, $ENV{'XDG_CONFIG_DIRS'}),
2182 $ENV{'HOME'} . "/.parallel");
2183 # Use first dir as config dir
2184 $Global::config_dir = $Global::config_dirs[0] ||
2185 $ENV{'HOME'} . "/.parallel";
2186 # cache_dirs = $PARALLEL_HOME, $XDG_CACHE_HOME/parallel,
2187 # Keep only dirs that exist
2188 @Global::cache_dirs =
2189 (grep { -d $_ }
2190 $ENV{'PARALLEL_HOME'}, $ENV{'XDG_CACHE_HOME'}."/parallel");
2191 $Global::cache_dir = $Global::cache_dirs[0] ||
2192 $ENV{'HOME'} . "/.parallel";
2195 sub parse_halt() {
2196 # $opt::halt flavours
2197 # Uses:
2198 # $opt::halt
2199 # $Global::halt_when
2200 # $Global::halt_fail
2201 # $Global::halt_success
2202 # $Global::halt_pct
2203 # $Global::halt_count
2204 if(defined $opt::halt) {
2205 my %halt_expansion = (
2206 "0" => "never",
2207 "1" => "soon,fail=1",
2208 "2" => "now,fail=1",
2209 "-1" => "soon,success=1",
2210 "-2" => "now,success=1",
2212 # Expand -2,-1,0,1,2 into long form
2213 $opt::halt = $halt_expansion{$opt::halt} || $opt::halt;
2214 # --halt 5% == --halt soon,fail=5%
2215 $opt::halt =~ s/^(\d+)%$/soon,fail=$1%/;
2216 # Split: soon,fail=5%
2217 my ($when,$fail_success,$pct_count) = split /[,=]/, $opt::halt;
2218 if(not grep { $when eq $_ } qw(never soon now)) {
2219 ::error("--halt must have 'never', 'soon', or 'now'.");
2220 ::wait_and_exit(255);
2222 $Global::halt_when = $when;
2223 if($when ne "never") {
2224 if($fail_success eq "fail") {
2225 $Global::halt_fail = 1;
2226 } elsif($fail_success eq "success") {
2227 $Global::halt_success = 1;
2228 } elsif($fail_success eq "done") {
2229 $Global::halt_done = 1;
2230 } else {
2231 ::error("--halt $when must be followed by ,success or ,fail.");
2232 ::wait_and_exit(255);
2234 if($pct_count =~ /^(\d+)%$/) {
2235 $Global::halt_pct = $1/100;
2236 } elsif($pct_count =~ /^(\d+)$/) {
2237 $Global::halt_count = $1;
2238 } else {
2239 ::error("--halt $when,$fail_success ".
2240 "must be followed by ,number or ,percent%.");
2241 ::wait_and_exit(255);
2247 sub parse_replacement_string_options() {
2248 # Deal with --rpl
2249 # Uses:
2250 # %Global::rpl
2251 # $Global::parensleft
2252 # $Global::parensright
2253 # $opt::parens
2254 # $Global::parensleft
2255 # $Global::parensright
2256 # $opt::plus
2257 # %Global::plus
2258 # $opt::I
2259 # $opt::U
2260 # $opt::i
2261 # $opt::basenamereplace
2262 # $opt::dirnamereplace
2263 # $opt::seqreplace
2264 # $opt::slotreplace
2265 # $opt::basenameextensionreplace
2267 sub rpl($$) {
2268 # Modify %Global::rpl
2269 # Replace $old with $new
2270 my ($old,$new) = @_;
2271 if($old ne $new) {
2272 $Global::rpl{$new} = $Global::rpl{$old};
2273 delete $Global::rpl{$old};
2276 my $parens = "{==}";
2277 if(defined $opt::parens) { $parens = $opt::parens; }
2278 my $parenslen = 0.5*length $parens;
2279 $Global::parensleft = substr($parens,0,$parenslen);
2280 $Global::parensright = substr($parens,$parenslen);
2281 if(defined $opt::plus) { %Global::rpl = (%Global::plus,%Global::rpl); }
2282 if(defined $opt::I) { rpl('{}',$opt::I); }
2283 if(defined $opt::i and $opt::i) { rpl('{}',$opt::i); }
2284 if(defined $opt::U) { rpl('{.}',$opt::U); }
2285 if(defined $opt::basenamereplace) { rpl('{/}',$opt::basenamereplace); }
2286 if(defined $opt::dirnamereplace) { rpl('{//}',$opt::dirnamereplace); }
2287 if(defined $opt::seqreplace) { rpl('{#}',$opt::seqreplace); }
2288 if(defined $opt::slotreplace) { rpl('{%}',$opt::slotreplace); }
2289 if(defined $opt::basenameextensionreplace) {
2290 rpl('{/.}',$opt::basenameextensionreplace);
2292 for(@opt::rpl) {
2293 # Create $Global::rpl entries for --rpl options
2294 # E.g: "{..} s:\.[^.]+$:;s:\.[^.]+$:;"
2295 my ($shorthand,$long) = split/ /,$_,2;
2296 $Global::rpl{$shorthand} = $long;
2300 sub parse_semaphore() {
2301 # Semaphore defaults
2302 # Must be done before computing number of processes and max_line_length
2303 # because when running as a semaphore GNU Parallel does not read args
2304 # Uses:
2305 # $opt::semaphore
2306 # $Global::semaphore
2307 # $opt::semaphoretimeout
2308 # $Semaphore::timeout
2309 # $opt::semaphorename
2310 # $Semaphore::name
2311 # $opt::fg
2312 # $Semaphore::fg
2313 # $opt::wait
2314 # $Semaphore::wait
2315 # $opt::bg
2316 # @opt::a
2317 # @Global::unget_argv
2318 # $Global::default_simultaneous_sshlogins
2319 # $opt::jobs
2320 # $Global::interactive
2321 $Global::semaphore ||= ($0 =~ m:(^|/)sem$:); # called as 'sem'
2322 if(defined $opt::semaphore) { $Global::semaphore = 1; }
2323 if(defined $opt::semaphoretimeout) { $Global::semaphore = 1; }
2324 if(defined $opt::semaphorename) { $Global::semaphore = 1; }
2325 if(defined $opt::fg and not $opt::tmux and not $opt::tmuxpane) {
2326 $Global::semaphore = 1;
2328 if(defined $opt::bg) { $Global::semaphore = 1; }
2329 if(defined $opt::wait and not $opt::sqlmaster) {
2330 $Global::semaphore = 1; @ARGV = "true";
2332 if($Global::semaphore) {
2333 if(@opt::a) {
2334 # A semaphore does not take input from neither stdin nor file
2335 ::error("A semaphore does not take input from neither stdin nor a file\n");
2336 ::wait_and_exit(255);
2338 @opt::a = ("/dev/null");
2339 # Append a dummy empty argument
2340 # \0 => nothing (not the empty string)
2341 push(@Global::unget_argv, [Arg->new("\0noarg")]);
2342 $Semaphore::timeout = $opt::semaphoretimeout || 0;
2343 if(defined $opt::semaphorename) {
2344 $Semaphore::name = $opt::semaphorename;
2345 } else {
2346 local $/ = "\n";
2347 $Semaphore::name = `tty`;
2348 chomp $Semaphore::name;
2350 $Semaphore::fg = $opt::fg;
2351 $Semaphore::wait = $opt::wait;
2352 $Global::default_simultaneous_sshlogins = 1;
2353 if(not defined $opt::jobs) {
2354 $opt::jobs = 1;
2356 if($Global::interactive and $opt::bg) {
2357 ::error("Jobs running in the ".
2358 "background cannot be interactive.");
2359 ::wait_and_exit(255);
2364 sub record_env() {
2365 # Record current %ENV-keys in $PARALLEL_HOME/ignored_vars
2366 # Returns: N/A
2367 my $ignore_filename = $Global::config_dir . "/ignored_vars";
2368 if(open(my $vars_fh, ">", $ignore_filename)) {
2369 print $vars_fh map { $_,"\n" } keys %ENV;
2370 } else {
2371 ::error("Cannot write to $ignore_filename.");
2372 ::wait_and_exit(255);
2376 sub open_joblog() {
2377 # Open joblog as specified by --joblog
2378 # Uses:
2379 # $opt::resume
2380 # $opt::resume_failed
2381 # $opt::joblog
2382 # $opt::results
2383 # $Global::job_already_run
2384 # %Global::fd
2385 my $append = 0;
2386 if(($opt::resume or $opt::resume_failed)
2388 not ($opt::joblog or $opt::results)) {
2389 ::error("--resume and --resume-failed require --joblog or --results.");
2390 ::wait_and_exit(255);
2392 if(defined $opt::joblog and $opt::joblog =~ s/^\+//) {
2393 # --joblog +filename = append to filename
2394 $append = 1;
2396 if($opt::joblog
2398 ($opt::sqlmaster
2400 not $opt::sqlworker)) {
2401 # Do not log if --sqlworker
2402 if($opt::resume || $opt::resume_failed || $opt::retry_failed) {
2403 if(open(my $joblog_fh, "<", $opt::joblog)) {
2404 # Read the joblog
2405 # Override $/ with \n because -d might be set
2406 local $/ = "\n";
2407 # If there is a header: Open as append later
2408 $append = <$joblog_fh>;
2409 my $joblog_regexp;
2410 if($opt::retry_failed) {
2411 # Make a regexp that only matches commands with exit+signal=0
2412 # 4 host 1360490623.067 3.445 1023 1222 0 0 command
2413 $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t';
2414 my @group;
2415 while(<$joblog_fh>) {
2416 if(/$joblog_regexp/o) {
2417 # This is 30% faster than set_job_already_run($1);
2418 vec($Global::job_already_run,($1||0),1) = 1;
2419 $Global::total_completed++;
2420 $group[$1-1] = "true";
2421 } elsif(/(\d+)\s+\S+(\s+[-0-9.]+){6}\s+(.*)$/) {
2422 # Grab out the command
2423 $group[$1-1] = $3;
2424 } else {
2425 chomp;
2426 ::error("Format of '$opt::joblog' is wrong: $_");
2427 ::wait_and_exit(255);
2430 if(@group) {
2431 my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg");
2432 unlink($name);
2433 # Put args into argfile
2434 if(grep /\0/, @group) {
2435 # force --null to deal with \n in commandlines
2436 ::warning("Command lines contain newline. Forcing --null.");
2437 $opt::null = 1;
2438 $/ = "\0";
2440 # Replace \0 with '\n' as used in print_joblog()
2441 print $outfh map { s/\0/\n/g; $_,$/ } map { $_ } @group;
2442 seek $outfh, 0, 0;
2443 exit_if_disk_full();
2444 # Set filehandle to -a
2445 @opt::a = ($outfh);
2447 # Remove $command (so -a is run)
2448 @ARGV = ();
2450 if($opt::resume || $opt::resume_failed) {
2451 if($opt::resume_failed) {
2452 # Make a regexp that only matches commands with exit+signal=0
2453 # 4 host 1360490623.067 3.445 1023 1222 0 0 command
2454 $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t';
2455 } else {
2456 # Just match the job number
2457 $joblog_regexp='^(\d+)';
2459 while(<$joblog_fh>) {
2460 if(/$joblog_regexp/o) {
2461 # This is 30% faster than set_job_already_run($1);
2462 vec($Global::job_already_run,($1||0),1) = 1;
2463 $Global::total_completed++;
2464 } elsif(not /\d+\s+[^\s]+\s+([-0-9.]+\s+){6}/) {
2465 ::error("Format of '$opt::joblog' is wrong: $_");
2466 ::wait_and_exit(255);
2470 close $joblog_fh;
2472 # $opt::null may be set if the commands contain \n
2473 if($opt::null) { $/ = "\0"; }
2475 if($opt::dryrun) {
2476 # Do not write to joblog in a dry-run
2477 if(not open($Global::joblog, ">", "/dev/null")) {
2478 ::error("Cannot write to --joblog $opt::joblog.");
2479 ::wait_and_exit(255);
2481 } elsif($append) {
2482 # Append to joblog
2483 if(not open($Global::joblog, ">>", $opt::joblog)) {
2484 ::error("Cannot append to --joblog $opt::joblog.");
2485 ::wait_and_exit(255);
2487 } else {
2488 if($opt::joblog eq "-") {
2489 # Use STDOUT as joblog
2490 $Global::joblog = $Global::fd{1};
2491 } elsif(not open($Global::joblog, ">", $opt::joblog)) {
2492 # Overwrite the joblog
2493 ::error("Cannot write to --joblog $opt::joblog.");
2494 ::wait_and_exit(255);
2496 print $Global::joblog
2497 join("\t", "Seq", "Host", "Starttime", "JobRuntime",
2498 "Send", "Receive", "Exitval", "Signal", "Command"
2499 ). "\n";
2504 sub open_csv() {
2505 if($opt::results) {
2506 # Output as CSV/TSV
2507 if($opt::results eq "-.csv"
2509 $opt::results eq "-.tsv") {
2510 # Output as CSV/TSV on stdout
2511 open $Global::csv_fh, ">&", "STDOUT" or
2512 ::die_bug("Can't dup STDOUT in csv: $!");
2513 # Do not print any other output to STDOUT
2514 # by forcing all other output to /dev/null
2515 open my $fd, ">", "/dev/null" or
2516 ::die_bug("Can't >/dev/null in csv: $!");
2517 $Global::fd{1} = $fd;
2518 $Global::fd{2} = $fd;
2519 } elsif($Global::csvsep) {
2520 if(not open($Global::csv_fh,">",$opt::results)) {
2521 ::error("Cannot open results file `$opt::results': ".
2522 "$!.");
2523 wait_and_exit(255);
2529 sub find_compression_program() {
2530 # Find a fast compression program
2531 # Returns:
2532 # $compress_program = compress program with options
2533 # $decompress_program = decompress program with options
2535 # Search for these. Sorted by speed on 128 core
2537 # seq 120000000|shuf > 1gb &
2538 # apt-get update
2539 # apt install make g++ htop
2540 # wget -O - pi.dk/3 | bash
2541 # apt install zstd clzip liblz4-tool lzop pigz pxz gzip plzip pbzip2 lzma xz-utils lzip bzip2 lbzip2 lrzip pixz
2542 # git clone https://github.com/facebook/zstd.git
2543 # (cd zstd/contrib/pzstd; make -j; cp pzstd /usr/local/bin)
2544 # echo 'lrzip -L $((-$1))' >/usr/local/bin/lrz
2545 # chmod +x /usr/local/bin/lrz
2546 # wait
2547 # onethread="zstd clzip lz4 lzop gzip lzma xz bzip2"
2548 # multithread="pzstd pigz pxz plzip pbzip2 lzip lbzip2 lrz pixz"
2549 # parallel --shuf -j1 --joblog jl-m --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $multithread
2550 # parallel --shuf -j50% --delay 1 --joblog jl-s --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $onethread
2551 # sort -nk4 jl-?
2553 # 1-core:
2554 # 2-cores: pzstd zstd lz4 lzop pigz gzip lbzip2 pbzip2 lrz bzip2 lzma pxz plzip xz lzip clzip
2555 # 4-cores:
2556 # 8-cores: pzstd lz4 zstd pigz lzop lbzip2 pbzip2 gzip lzip lrz plzip pxz bzip2 lzma xz clzip
2557 # 16-cores: pzstd lz4 pigz lzop lbzip2 pbzip2 plzip lzip lrz pxz gzip lzma xz bzip2
2558 # 32-cores: pzstd lbzip2 pbzip2 zstd pigz lz4 lzop plzip lzip lrz gzip pxz lzma bzip2 xz clzip
2559 # 64-cores: pzstd lbzip2 pbzip2 pigz zstd pixz lz4 plzip lzop lzip lrz gzip pxz lzma bzip2 xz clzip
2560 # 128-core: pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip lrz pxz bzip2 lzma xz clzip
2562 my @prg = qw(pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip
2563 lrz pxz bzip2 lzma xz clzip);
2564 for my $p (@prg) {
2565 if(which($p)) {
2566 return ("$p -c -1","$p -dc");
2569 # Fall back to cat
2570 return ("cat","cat");
2573 sub read_options() {
2574 # Read options from command line, profile and $PARALLEL
2575 # Uses:
2576 # $opt::shebang_wrap
2577 # $opt::shebang
2578 # @ARGV
2579 # $opt::plain
2580 # @opt::profile
2581 # $ENV{'HOME'}
2582 # $ENV{'PARALLEL'}
2583 # Returns:
2584 # @ARGV_no_opt = @ARGV without --options
2586 # This must be done first as this may exec myself
2587 if(defined $ARGV[0] and ($ARGV[0] =~ /^--shebang/ or
2588 $ARGV[0] =~ /^--shebang-?wrap/ or
2589 $ARGV[0] =~ /^--hashbang/)) {
2590 # Program is called from #! line in script
2591 # remove --shebang-wrap if it is set
2592 $opt::shebang_wrap = ($ARGV[0] =~ s/^--shebang-?wrap *//);
2593 # remove --shebang if it is set
2594 $opt::shebang = ($ARGV[0] =~ s/^--shebang *//);
2595 # remove --hashbang if it is set
2596 $opt::shebang .= ($ARGV[0] =~ s/^--hashbang *//);
2597 if($opt::shebang) {
2598 my $argfile = Q(pop @ARGV);
2599 # exec myself to split $ARGV[0] into separate fields
2600 exec "$0 --skip-first-line -a $argfile @ARGV";
2602 if($opt::shebang_wrap) {
2603 my @options;
2604 my @parser;
2605 if ($^O eq 'freebsd') {
2606 # FreeBSD's #! puts different values in @ARGV than Linux' does.
2607 my @nooptions = @ARGV;
2608 get_options_from_array(\@nooptions);
2609 while($#ARGV > $#nooptions) {
2610 push @options, shift @ARGV;
2612 while(@ARGV and $ARGV[0] ne ":::") {
2613 push @parser, shift @ARGV;
2615 if(@ARGV and $ARGV[0] eq ":::") {
2616 shift @ARGV;
2618 } else {
2619 @options = shift @ARGV;
2621 my $script = Q(shift @ARGV);
2622 # exec myself to split $ARGV[0] into separate fields
2623 exec "$0 --internal-pipe-means-argfiles @options @parser $script ".
2624 "::: @ARGV";
2627 if($ARGV[0] =~ / --shebang(-?wrap)? /) {
2628 ::warning("--shebang and --shebang-wrap must be the first argument.\n");
2631 Getopt::Long::Configure("bundling","require_order");
2632 my @ARGV_copy = @ARGV;
2633 my @ARGV_orig = @ARGV;
2634 # Check if there is a --profile to set @opt::profile
2635 get_options_from_array(\@ARGV_copy,"profile|J=s","plain") || die_usage();
2636 my @ARGV_profile = ();
2637 my @ARGV_env = ();
2638 if(not $opt::plain) {
2639 # Add options from $PARALLEL_HOME/config and other profiles
2640 my @config_profiles = (
2641 "/etc/parallel/config",
2642 (map { "$_/config" } @Global::config_dirs),
2643 $ENV{'HOME'}."/.parallelrc");
2644 my @profiles = @config_profiles;
2645 if(@opt::profile) {
2646 # --profile overrides default profiles
2647 @profiles = ();
2648 for my $profile (@opt::profile) {
2649 if($profile =~ m:^\./|^/:) {
2650 # Look for ./profile in .
2651 # Look for /profile in /
2652 push @profiles, grep { -r $_ } $profile;
2653 } else {
2654 # Look for the $profile in @Global::config_dirs
2655 push @profiles, grep { -r $_ }
2656 map { "$_/$profile" } @Global::config_dirs;
2660 for my $profile (@profiles) {
2661 if(-r $profile) {
2662 ::debug("init","Read $profile\n");
2663 local $/ = "\n";
2664 open (my $in_fh, "<", $profile) ||
2665 ::die_bug("read-profile: $profile");
2666 while(<$in_fh>) {
2667 /^\s*\#/ and next;
2668 chomp;
2669 push @ARGV_profile, shell_words($_);
2671 close $in_fh;
2672 } else {
2673 if(grep /^$profile$/, @config_profiles) {
2674 # config file is not required to exist
2675 } else {
2676 ::error("$profile not readable.");
2677 wait_and_exit(255);
2681 # Add options from shell variable $PARALLEL
2682 if($ENV{'PARALLEL'}) {
2683 push @ARGV_env, shell_words($ENV{'PARALLEL'});
2685 # Add options from env_parallel.csh via $PARALLEL_CSH
2686 if($ENV{'PARALLEL_CSH'}) {
2687 push @ARGV_env, shell_words($ENV{'PARALLEL_CSH'});
2690 Getopt::Long::Configure("bundling","require_order");
2691 get_options_from_array(\@ARGV_profile) || die_usage();
2692 get_options_from_array(\@ARGV_env) || die_usage();
2693 get_options_from_array(\@ARGV) || die_usage();
2694 # What were the options given on the command line?
2695 # Used to start --sqlworker
2696 my $ai = arrayindex(\@ARGV_orig, \@ARGV);
2697 @Global::options_in_argv = @ARGV_orig[0..$ai-1];
2698 # Prepend non-options to @ARGV (such as commands like 'nice')
2699 unshift @ARGV, @ARGV_profile, @ARGV_env;
2700 return @ARGV;
2703 sub arrayindex() {
2704 # Similar to Perl's index function, but for arrays
2705 # Input:
2706 # $arr_ref1 = ref to @array1 to search in
2707 # $arr_ref2 = ref to @array2 to search for
2708 # Returns:
2709 # $pos = position of @array1 in @array2, -1 if not found
2710 my ($arr_ref1,$arr_ref2) = @_;
2711 my $array1_as_string = join "", map { "\0".$_ } @$arr_ref1;
2712 my $array2_as_string = join "", map { "\0".$_ } @$arr_ref2;
2713 my $i = index($array1_as_string,$array2_as_string,0);
2714 if($i == -1) { return -1 }
2715 my @before = split /\0/, substr($array1_as_string,0,$i);
2716 return $#before;
2719 sub read_args_from_command_line() {
2720 # Arguments given on the command line after:
2721 # ::: ($Global::arg_sep)
2722 # :::: ($Global::arg_file_sep)
2723 # :::+ ($Global::arg_sep with --link)
2724 # ::::+ ($Global::arg_file_sep with --link)
2725 # Removes the arguments from @ARGV and:
2726 # - puts filenames into -a
2727 # - puts arguments into files and add the files to -a
2728 # - adds --linkinputsource with 0/1 for each -a depending on :::+/::::+
2729 # Input:
2730 # @::ARGV = command option ::: arg arg arg :::: argfiles
2731 # Uses:
2732 # $Global::arg_sep
2733 # $Global::arg_file_sep
2734 # $opt::internal_pipe_means_argfiles
2735 # $opt::pipe
2736 # @opt::a
2737 # Returns:
2738 # @argv_no_argsep = @::ARGV without ::: and :::: and following args
2739 my @new_argv = ();
2740 for(my $arg = shift @ARGV; @ARGV; $arg = shift @ARGV) {
2741 if($arg eq $Global::arg_sep
2743 $arg eq $Global::arg_sep."+"
2745 $arg eq $Global::arg_file_sep
2747 $arg eq $Global::arg_file_sep."+") {
2748 my $group_sep = $arg; # This group of arguments is args or argfiles
2749 my @group;
2750 while(defined ($arg = shift @ARGV)) {
2751 if($arg eq $Global::arg_sep
2753 $arg eq $Global::arg_sep."+"
2755 $arg eq $Global::arg_file_sep
2757 $arg eq $Global::arg_file_sep."+") {
2758 # exit while loop if finding new separator
2759 last;
2760 } else {
2761 # If not hitting ::: :::+ :::: or ::::+
2762 # Append it to the group
2763 push @group, $arg;
2766 my $is_linked = ($group_sep =~ /\+$/) ? 1 : 0;
2767 my $is_file = ($group_sep eq $Global::arg_file_sep
2769 $group_sep eq $Global::arg_file_sep."+");
2770 if($is_file) {
2771 # :::: / ::::+
2772 push @opt::linkinputsource, map { $is_linked } @group;
2773 } else {
2774 # ::: / :::+
2775 push @opt::linkinputsource, $is_linked;
2777 if($is_file
2778 or ($opt::internal_pipe_means_argfiles and $opt::pipe)
2780 # Group of file names on the command line.
2781 # Append args into -a
2782 push @opt::a, @group;
2783 } else {
2784 # Group of arguments on the command line.
2785 # Put them into a file.
2786 # Create argfile
2787 my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg");
2788 unlink($name);
2789 # Put args into argfile
2790 print $outfh map { $_,$/ } @group;
2791 seek $outfh, 0, 0;
2792 exit_if_disk_full();
2793 # Append filehandle to -a
2794 push @opt::a, $outfh;
2796 if(defined($arg)) {
2797 # $arg is ::: :::+ :::: or ::::+
2798 # so there is another group
2799 redo;
2800 } else {
2801 # $arg is undef -> @ARGV empty
2802 last;
2805 push @new_argv, $arg;
2807 # Output: @ARGV = command to run with options
2808 return @new_argv;
2811 sub cleanup() {
2812 # Returns: N/A
2813 unlink keys %Global::unlink;
2814 map { rmdir $_ } keys %Global::unlink;
2815 if(@opt::basefile and $opt::cleanup) { cleanup_basefile(); }
2816 for(keys %Global::sshmaster) {
2817 # If 'ssh -M's are running: kill them
2818 kill "TERM", $_;
2823 sub __QUOTING_ARGUMENTS_FOR_SHELL__() {}
2825 sub shell_quote(@) {
2826 # Input:
2827 # @strings = strings to be quoted
2828 # Returns:
2829 # @shell_quoted_strings = string quoted as needed by the shell
2830 return wantarray ? (map { Q($_) } @_) : (join" ",map { Q($_) } @_);
2833 sub shell_quote_scalar_rc($) {
2834 # Quote for the rc-shell
2835 my $a = $_[0];
2836 if(defined $a) {
2837 if(($a =~ s/'/''/g)
2839 ($a =~ s/[\n\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]+/'$&'/go)) {
2840 # A string was replaced
2841 # No need to test for "" or \0
2842 } elsif($a eq "") {
2843 $a = "''";
2844 } elsif($a eq "\0") {
2845 $a = "";
2848 return $a;
2851 sub shell_quote_scalar_csh($) {
2852 # Quote for (t)csh
2853 my $a = $_[0];
2854 if(defined $a) {
2855 # $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g;
2856 # This is 1% faster than the above
2857 if(($a =~ s/[\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]/\\$&/go)
2859 # quote newline in csh as \\\n
2860 ($a =~ s/[\n]/"\\\n"/go)) {
2861 # A string was replaced
2862 # No need to test for "" or \0
2863 } elsif($a eq "") {
2864 $a = "''";
2865 } elsif($a eq "\0") {
2866 $a = "";
2869 return $a;
2872 sub shell_quote_scalar_default($) {
2873 # Quote for other shells (Bourne compatibles)
2874 # Inputs:
2875 # $string = string to be quoted
2876 # Returns:
2877 # $shell_quoted = string quoted as needed by the shell
2878 my $s = $_[0];
2879 if($s =~ /[^-_.+a-z0-9\/]/i) {
2880 $s =~ s/'/'"'"'/g; # "-quote single quotes
2881 $s = "'$s'"; # '-quote entire string
2882 $s =~ s/^''//; # Remove unneeded '' at ends
2883 $s =~ s/''$//; # (faster than s/^''|''$//g)
2884 return $s;
2885 } elsif ($s eq "") {
2886 return "''";
2887 } else {
2888 # No quoting needed
2889 return $s;
2893 sub shell_quote_scalar($) {
2894 # Quote the string so the shell will not expand any special chars
2895 # Inputs:
2896 # $string = string to be quoted
2897 # Returns:
2898 # $shell_quoted = string quoted as needed by the shell
2900 # Speed optimization: Choose the correct shell_quote_scalar_*
2901 # and call that directly from now on
2902 no warnings 'redefine';
2903 if($Global::cshell) {
2904 # (t)csh
2905 *shell_quote_scalar = \&shell_quote_scalar_csh;
2906 } elsif($Global::shell =~ m:(^|/)rc$:) {
2907 # rc-shell
2908 *shell_quote_scalar = \&shell_quote_scalar_rc;
2909 } else {
2910 # other shells
2911 *shell_quote_scalar = \&shell_quote_scalar_default;
2913 # The sub is now redefined. Call it
2914 return shell_quote_scalar($_[0]);
2917 sub Q($) {
2918 # Q alias for ::shell_quote_scalar
2919 my $ret = shell_quote_scalar($_[0]);
2920 no warnings 'redefine';
2921 *Q = \&::shell_quote_scalar;
2922 return $ret;
2925 sub shell_quote_file($) {
2926 # Quote the string so shell will not expand any special chars
2927 # and prepend ./ if needed
2928 # Input:
2929 # $filename = filename to be shell quoted
2930 # Returns:
2931 # $quoted_filename = filename quoted with \ and ./ if needed
2932 my $a = shift;
2933 if(defined $a) {
2934 if($a =~ m:^/: or $a =~ m:^\./:) {
2935 # /abs/path or ./rel/path => skip
2936 } else {
2937 # rel/path => ./rel/path
2938 $a = "./".$a;
2941 return Q($a);
2944 sub shell_words(@) {
2945 # Input:
2946 # $string = shell line
2947 # Returns:
2948 # @shell_words = $string split into words as shell would do
2949 $Global::use{"Text::ParseWords"} ||= eval "use Text::ParseWords; 1;";
2950 return Text::ParseWords::shellwords(@_);
2953 sub perl_quote_scalar($) {
2954 # Quote the string so perl's eval will not expand any special chars
2955 # Inputs:
2956 # $string = string to be quoted
2957 # Returns:
2958 # $perl_quoted = string quoted with \ as needed by perl's eval
2959 my $a = $_[0];
2960 if(defined $a) {
2961 $a =~ s/[\\\"\$\@]/\\$&/go;
2963 return $a;
2966 # -w complains about prototype
2967 sub pQ($) {
2968 # pQ alias for ::perl_quote_scalar
2969 my $ret = perl_quote_scalar($_[0]);
2970 *pQ = \&::perl_quote_scalar;
2971 return $ret;
2974 sub unquote_printf() {
2975 # Convert \t \n \r \000 \0
2976 # Inputs:
2977 # $string = string with \t \n \r \num \0
2978 # Returns:
2979 # $replaced = string with TAB NEWLINE CR <ascii-num> NUL
2980 $_ = shift;
2981 s/\\t/\t/g;
2982 s/\\n/\n/g;
2983 s/\\r/\r/g;
2984 s/\\(\d\d\d)/eval 'sprintf "\\'.$1.'"'/ge;
2985 s/\\(\d)/eval 'sprintf "\\'.$1.'"'/ge;
2986 return $_;
2990 sub __FILEHANDLES__() {}
2993 sub save_stdin_stdout_stderr() {
2994 # Remember the original STDIN, STDOUT and STDERR
2995 # and file descriptors opened by the shell (e.g. 3>/tmp/foo)
2996 # Uses:
2997 # %Global::fd
2998 # $Global::original_stderr
2999 # $Global::original_stdin
3000 # Returns: N/A
3002 # TODO Disabled until we have an open3 that will take n filehandles
3003 # for my $fdno (1..61) {
3004 # # /dev/fd/62 and above are used by bash for <(cmd)
3005 # # Find file descriptors that are already opened (by the shell)
3006 # Only focus on stdout+stderr for now
3007 for my $fdno (1..2) {
3008 my $fh;
3009 # 2-argument-open is used to be compatible with old perl 5.8.0
3010 # bug #43570: Perl 5.8.0 creates 61 files
3011 if(open($fh,">&=$fdno")) {
3012 $Global::fd{$fdno}=$fh;
3015 open $Global::original_stderr, ">&", "STDERR" or
3016 ::die_bug("Can't dup STDERR: $!");
3017 open $Global::status_fd, ">&", "STDERR" or
3018 ::die_bug("Can't dup STDERR: $!");
3019 open $Global::original_stdin, "<&", "STDIN" or
3020 ::die_bug("Can't dup STDIN: $!");
3023 sub enough_file_handles() {
3024 # Check that we have enough filehandles available for starting
3025 # another job
3026 # Uses:
3027 # $opt::ungroup
3028 # %Global::fd
3029 # Returns:
3030 # 1 if ungrouped (thus not needing extra filehandles)
3031 # 0 if too few filehandles
3032 # 1 if enough filehandles
3033 if(not $opt::ungroup) {
3034 my %fh;
3035 my $enough_filehandles = 1;
3036 # perl uses 7 filehandles for something?
3037 # open3 uses 2 extra filehandles temporarily
3038 # We need a filehandle for each redirected file descriptor
3039 # (normally just STDOUT and STDERR)
3040 for my $i (1..(7+2+keys %Global::fd)) {
3041 $enough_filehandles &&= open($fh{$i}, "<", "/dev/null");
3043 for (values %fh) { close $_; }
3044 return $enough_filehandles;
3045 } else {
3046 # Ungrouped does not need extra file handles
3047 return 1;
3051 sub open_or_exit($) {
3052 # Open a file name or exit if the file cannot be opened
3053 # Inputs:
3054 # $file = filehandle or filename to open
3055 # Uses:
3056 # $Global::original_stdin
3057 # Returns:
3058 # $fh = file handle to read-opened file
3059 my $file = shift;
3060 if($file eq "-") {
3061 return ($Global::original_stdin || *STDIN);
3063 if(ref $file eq "GLOB") {
3064 # This is an open filehandle
3065 return $file;
3067 my $fh = gensym;
3068 if(not open($fh, "<", $file)) {
3069 ::error("Cannot open input file `$file': No such file or directory.");
3070 wait_and_exit(255);
3072 return $fh;
3075 sub set_fh_blocking($) {
3076 # Set filehandle as blocking
3077 # Inputs:
3078 # $fh = filehandle to be blocking
3079 # Returns:
3080 # N/A
3081 my $fh = shift;
3082 $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
3083 my $flags;
3084 # Get the current flags on the filehandle
3085 fcntl($fh, &F_GETFL, $flags) || die $!;
3086 # Remove non-blocking from the flags
3087 $flags &= ~&O_NONBLOCK;
3088 # Set the flags on the filehandle
3089 fcntl($fh, &F_SETFL, $flags) || die $!;
3092 sub set_fh_non_blocking($) {
3093 # Set filehandle as non-blocking
3094 # Inputs:
3095 # $fh = filehandle to be blocking
3096 # Returns:
3097 # N/A
3098 my $fh = shift;
3099 $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
3100 my $flags;
3101 # Get the current flags on the filehandle
3102 fcntl($fh, &F_GETFL, $flags) || die $!;
3103 # Add non-blocking to the flags
3104 $flags |= &O_NONBLOCK;
3105 # Set the flags on the filehandle
3106 fcntl($fh, &F_SETFL, $flags) || die $!;
3110 sub __RUNNING_THE_JOBS_AND_PRINTING_PROGRESS__() {}
3113 # Variable structure:
3115 # $Global::running{$pid} = Pointer to Job-object
3116 # @Global::virgin_jobs = Pointer to Job-object that have received no input
3117 # $Global::host{$sshlogin} = Pointer to SSHLogin-object
3118 # $Global::total_running = total number of running jobs
3119 # $Global::total_started = total jobs started
3120 # $Global::max_procs_file = filename if --jobs is given a filename
3121 # $Global::JobQueue = JobQueue object for the queue of jobs
3122 # $Global::timeoutq = queue of times where jobs timeout
3123 # $Global::newest_job = Job object of the most recent job started
3124 # $Global::newest_starttime = timestamp of $Global::newest_job
3125 # @Global::sshlogin
3126 # $Global::minimal_command_line_length = minimum length supported by all sshlogins
3127 # $Global::start_no_new_jobs = should more jobs be started?
3128 # $Global::original_stderr = file handle for STDERR when the program started
3129 # $Global::total_started = total number of jobs started
3130 # $Global::joblog = filehandle of joblog
3131 # $Global::debug = Is debugging on?
3132 # $Global::exitstatus = status code of GNU Parallel
3133 # $Global::quoting = quote the command to run
3135 sub init_run_jobs() {
3136 # Set Global variables and progress signal handlers
3137 # Do the copying of basefiles
3138 # Returns: N/A
3139 $Global::total_running = 0;
3140 $Global::total_started = 0;
3141 $SIG{USR1} = \&list_running_jobs;
3142 $SIG{USR2} = \&toggle_progress;
3143 if(@opt::basefile) { setup_basefile(); }
3147 my $last_time;
3148 my %last_mtime;
3149 my $max_procs_file_last_mod;
3151 sub changed_procs_file {
3152 # If --jobs is a file and it is modfied:
3153 # Force recomputing of max_jobs_running for each $sshlogin
3154 # Uses:
3155 # $Global::max_procs_file
3156 # %Global::host
3157 # Returns: N/A
3158 if($Global::max_procs_file) {
3159 # --jobs filename
3160 my $mtime = (stat($Global::max_procs_file))[9];
3161 $max_procs_file_last_mod ||= 0;
3162 if($mtime > $max_procs_file_last_mod) {
3163 # file changed: Force re-computing max_jobs_running
3164 $max_procs_file_last_mod = $mtime;
3165 for my $sshlogin (values %Global::host) {
3166 $sshlogin->set_max_jobs_running(undef);
3172 sub changed_sshloginfile {
3173 # If --slf is changed:
3174 # reload --slf
3175 # filter_hosts
3176 # setup_basefile
3177 # Uses:
3178 # @opt::sshloginfile
3179 # @Global::sshlogin
3180 # %Global::host
3181 # $opt::filter_hosts
3182 # Returns: N/A
3183 if(@opt::sshloginfile) {
3184 # Is --sshloginfile changed?
3185 for my $slf (@opt::sshloginfile) {
3186 my $actual_file = expand_slf_shorthand($slf);
3187 my $mtime = (stat($actual_file))[9];
3188 $last_mtime{$actual_file} ||= $mtime;
3189 if($mtime - $last_mtime{$actual_file} > 1) {
3190 ::debug("run","--sshloginfile $actual_file changed. reload\n");
3191 $last_mtime{$actual_file} = $mtime;
3192 # Reload $slf
3193 # Empty sshlogins
3194 @Global::sshlogin = ();
3195 for (values %Global::host) {
3196 # Don't start new jobs on any host
3197 # except the ones added back later
3198 $_->set_max_jobs_running(0);
3200 # This will set max_jobs_running on the SSHlogins
3201 read_sshloginfile($actual_file);
3202 parse_sshlogin();
3203 $opt::filter_hosts and filter_hosts();
3204 setup_basefile();
3210 sub start_more_jobs {
3211 # Run start_another_job() but only if:
3212 # * not $Global::start_no_new_jobs set
3213 # * not JobQueue is empty
3214 # * not load on server is too high
3215 # * not server swapping
3216 # * not too short time since last remote login
3217 # Uses:
3218 # %Global::host
3219 # $Global::start_no_new_jobs
3220 # $Global::JobQueue
3221 # $opt::pipe
3222 # $opt::load
3223 # $opt::noswap
3224 # $opt::delay
3225 # $Global::newest_starttime
3226 # Returns:
3227 # $jobs_started = number of jobs started
3228 my $jobs_started = 0;
3229 if($Global::start_no_new_jobs) {
3230 return $jobs_started;
3232 if(time - ($last_time||0) > 1) {
3233 # At most do this every second
3234 $last_time = time;
3235 changed_procs_file();
3236 changed_sshloginfile();
3238 # This will start 1 job on each --sshlogin (if possible)
3239 # thus distribute the jobs on the --sshlogins round robin
3240 for my $sshlogin (values %Global::host) {
3241 if($Global::JobQueue->empty() and not $opt::pipe) {
3242 # No more jobs in the queue
3243 last;
3245 debug("run", "Running jobs before on ", $sshlogin->string(), ": ",
3246 $sshlogin->jobs_running(), "\n");
3247 if ($sshlogin->jobs_running() < $sshlogin->max_jobs_running()) {
3248 if($opt::delay
3250 $opt::delay - 0.008 > ::now() - $Global::newest_starttime) {
3251 # It has been too short since last start
3252 next;
3254 if($opt::load and $sshlogin->loadavg_too_high()) {
3255 # The load is too high or unknown
3256 next;
3258 if($opt::noswap and $sshlogin->swapping()) {
3259 # The server is swapping
3260 next;
3262 if($opt::limit and $sshlogin->limit()) {
3263 # Over limit
3264 next;
3266 if($opt::memfree and $sshlogin->memfree() < $opt::memfree) {
3267 # The server has not enough mem free
3268 ::debug("mem", "Not starting job: not enough mem\n");
3269 next;
3271 if($sshlogin->too_fast_remote_login()) {
3272 # It has been too short since
3273 next;
3275 debug("run", $sshlogin->string(),
3276 " has ", $sshlogin->jobs_running(),
3277 " out of ", $sshlogin->max_jobs_running(),
3278 " jobs running. Start another.\n");
3279 if(start_another_job($sshlogin) == 0) {
3280 # No more jobs to start on this $sshlogin
3281 debug("run","No jobs started on ",
3282 $sshlogin->string(), "\n");
3283 next;
3285 $sshlogin->inc_jobs_running();
3286 $sshlogin->set_last_login_at(::now());
3287 $jobs_started++;
3289 debug("run","Running jobs after on ", $sshlogin->string(), ": ",
3290 $sshlogin->jobs_running(), " of ",
3291 $sshlogin->max_jobs_running(), "\n");
3294 return $jobs_started;
3299 my $no_more_file_handles_warned;
3301 sub start_another_job() {
3302 # If there are enough filehandles
3303 # and JobQueue not empty
3304 # and not $job is in joblog
3305 # Then grab a job from Global::JobQueue,
3306 # start it at sshlogin
3307 # mark it as virgin_job
3308 # Inputs:
3309 # $sshlogin = the SSHLogin to start the job on
3310 # Uses:
3311 # $Global::JobQueue
3312 # $opt::pipe
3313 # $opt::results
3314 # $opt::resume
3315 # @Global::virgin_jobs
3316 # Returns:
3317 # 1 if another jobs was started
3318 # 0 otherwise
3319 my $sshlogin = shift;
3320 # Do we have enough file handles to start another job?
3321 if(enough_file_handles()) {
3322 if($Global::JobQueue->empty() and not $opt::pipe) {
3323 # No more commands to run
3324 debug("start", "Not starting: JobQueue empty\n");
3325 return 0;
3326 } else {
3327 my $job;
3328 # Skip jobs already in job log
3329 # Skip jobs already in results
3330 do {
3331 $job = get_job_with_sshlogin($sshlogin);
3332 if(not defined $job) {
3333 # No command available for that sshlogin
3334 debug("start", "Not starting: no jobs available for ",
3335 $sshlogin->string(), "\n");
3336 return 0;
3338 if($job->is_already_in_joblog()) {
3339 $job->free_slot();
3341 } while ($job->is_already_in_joblog()
3343 ($opt::results and $opt::resume and $job->is_already_in_results()));
3344 debug("start", "Command to run on '", $job->sshlogin()->string(), "': '",
3345 $job->replaced(),"'\n");
3346 if($job->start()) {
3347 if($opt::pipe) {
3348 if($job->virgin()) {
3349 push(@Global::virgin_jobs,$job);
3350 } else {
3351 # Block already set: This is a retry
3352 if(fork()) {
3353 ::debug("pipe","\n\nWriting ",length ${$job->block_ref()},
3354 " to ", $job->seq(),"\n");
3355 close $job->fh(0,"w");
3356 } else {
3357 $job->write($job->block_ref());
3358 close $job->fh(0,"w");
3359 exit(0);
3363 debug("start", "Started as seq ", $job->seq(),
3364 " pid:", $job->pid(), "\n");
3365 return 1;
3366 } else {
3367 # Not enough processes to run the job.
3368 # Put it back on the queue.
3369 $Global::JobQueue->unget($job);
3370 # Count down the number of jobs to run for this SSHLogin.
3371 my $max = $sshlogin->max_jobs_running();
3372 if($max > 1) { $max--; } else {
3373 my @arg;
3374 for my $record (@{$job->{'commandline'}->{'arg_list'}}) {
3375 push @arg, map { $_->orig() } @$record;
3377 ::error("No more processes: cannot run a single job. Something is wrong at @arg.");
3378 ::wait_and_exit(255);
3380 $sshlogin->set_max_jobs_running($max);
3381 # Sleep up to 300 ms to give other processes time to die
3382 ::usleep(rand()*300);
3383 ::warning("No more processes: ".
3384 "Decreasing number of running jobs to $max.",
3385 "Raising ulimit -u or /etc/security/limits.conf may help.");
3386 return 0;
3389 } else {
3390 # No more file handles
3391 $no_more_file_handles_warned++ or
3392 ::warning("No more file handles. ",
3393 "Raising ulimit -n or /etc/security/limits.conf may help.");
3394 debug("start", "No more file handles. ");
3395 return 0;
3400 sub init_progress() {
3401 # Uses:
3402 # $opt::bar
3403 # Returns:
3404 # list of computers for progress output
3405 $|=1;
3406 if($opt::bar) {
3407 return("","");
3409 my %progress = progress();
3410 return ("\nComputers / CPU cores / Max jobs to run\n",
3411 $progress{'workerlist'});
3414 sub drain_job_queue(@) {
3415 # Uses:
3416 # $opt::progress
3417 # $Global::total_running
3418 # $Global::max_jobs_running
3419 # %Global::running
3420 # $Global::JobQueue
3421 # %Global::host
3422 # $Global::start_no_new_jobs
3423 # Returns: N/A
3424 my @command = @_;
3425 if($opt::progress) {
3426 ::status_no_nl(init_progress());
3428 my $last_header = "";
3429 my $sleep = 0.2;
3430 do {
3431 while($Global::total_running > 0) {
3432 debug("init",$Global::total_running, "==", scalar
3433 keys %Global::running," slots: ", $Global::max_jobs_running);
3434 if($opt::pipe) {
3435 # When using --pipe sometimes file handles are not
3436 # closed properly
3437 for my $job (values %Global::running) {
3438 close $job->fh(0,"w");
3441 if($opt::progress) {
3442 my %progress = progress();
3443 if($last_header ne $progress{'header'}) {
3444 ::status("", $progress{'header'});
3445 $last_header = $progress{'header'};
3447 ::status_no_nl("\r",$progress{'status'});
3449 if($Global::total_running < $Global::max_jobs_running
3450 and not $Global::JobQueue->empty()) {
3451 # These jobs may not be started because of loadavg
3452 # or too little time between each ssh login.
3453 if(start_more_jobs() > 0) {
3454 # Exponential back-on if jobs were started
3455 $sleep = $sleep/2+0.001;
3458 # Exponential back-off sleeping
3459 $sleep = ::reap_usleep($sleep);
3461 if(not $Global::JobQueue->empty()) {
3462 # These jobs may not be started:
3463 # * because there the --filter-hosts has removed all
3464 if(not %Global::host) {
3465 ::error("There are no hosts left to run on.");
3466 ::wait_and_exit(255);
3468 # * because of loadavg
3469 # * because of too little time between each ssh login.
3470 $sleep = ::reap_usleep($sleep);
3471 start_more_jobs();
3472 if($Global::max_jobs_running == 0) {
3473 ::warning("There are no job slots available. Increase --jobs.");
3476 while($opt::sqlmaster and not $Global::sql->finished()) {
3477 # SQL master
3478 $sleep = ::reap_usleep($sleep);
3479 start_more_jobs();
3480 if($Global::start_sqlworker) {
3481 # Start an SQL worker as we are now sure there is work to do
3482 $Global::start_sqlworker = 0;
3483 if(my $pid = fork()) {
3484 $Global::unkilled_sqlworker = $pid;
3485 } else {
3486 # Replace --sql/--sqlandworker with --sqlworker
3487 my @ARGV = map { s/^--sql(andworker)?$/--sqlworker/; $_ } @Global::options_in_argv;
3488 # exec the --sqlworker
3489 exec($0,@ARGV,@command);
3493 } while ($Global::total_running > 0
3495 not $Global::start_no_new_jobs and not $Global::JobQueue->empty()
3497 $opt::sqlmaster and not $Global::sql->finished());
3498 if($opt::progress) {
3499 my %progress = progress();
3500 ::status("\r".$progress{'status'});
3504 sub toggle_progress() {
3505 # Turn on/off progress view
3506 # Uses:
3507 # $opt::progress
3508 # Returns: N/A
3509 $opt::progress = not $opt::progress;
3510 if($opt::progress) {
3511 ::status_no_nl(init_progress());
3515 sub progress() {
3516 # Uses:
3517 # $opt::bar
3518 # $opt::eta
3519 # %Global::host
3520 # $Global::total_started
3521 # Returns:
3522 # $workerlist = list of workers
3523 # $header = that will fit on the screen
3524 # $status = message that will fit on the screen
3525 if($opt::bar) {
3526 return ("workerlist" => "", "header" => "", "status" => bar());
3528 my $eta = "";
3529 my ($status,$header)=("","");
3530 if($opt::eta) {
3531 my($total, $completed, $left, $pctcomplete, $avgtime, $this_eta) =
3532 compute_eta();
3533 $eta = sprintf("ETA: %ds Left: %d AVG: %.2fs ",
3534 $this_eta, $left, $avgtime);
3536 my $termcols = terminal_columns();
3537 my @workers = sort keys %Global::host;
3538 my %sshlogin = map { $_ eq ":" ? ($_ => "local") : ($_ => $_) } @workers;
3539 my $workerno = 1;
3540 my %workerno = map { ($_=>$workerno++) } @workers;
3541 my $workerlist = "";
3542 for my $w (@workers) {
3543 $workerlist .=
3544 $workerno{$w}.":".$sshlogin{$w} ." / ".
3545 ($Global::host{$w}->ncpus() || "-")." / ".
3546 $Global::host{$w}->max_jobs_running()."\n";
3548 $status = "x"x($termcols+1);
3549 # Select an output format that will fit on a single line
3550 if(length $status > $termcols) {
3551 # sshlogin1:XX/XX/XX%/XX.Xs sshlogin2:XX/XX/XX%/XX.Xs sshlogin3:XX/XX/XX%/XX.Xs
3552 $header = "Computer:jobs running/jobs completed/%of started jobs/Average seconds to complete";
3553 $status = $eta .
3554 join(" ",map
3556 if($Global::total_started) {
3557 my $completed = ($Global::host{$_}->jobs_completed()||0);
3558 my $running = $Global::host{$_}->jobs_running();
3559 my $time = $completed ? (time-$^T)/($completed) : "0";
3560 sprintf("%s:%d/%d/%d%%/%.1fs ",
3561 $sshlogin{$_}, $running, $completed,
3562 ($running+$completed)*100
3563 / $Global::total_started, $time);
3565 } @workers);
3567 if(length $status > $termcols) {
3568 # 1:XX/XX/XX%/XX.Xs 2:XX/XX/XX%/XX.Xs 3:XX/XX/XX%/XX.Xs 4:XX/XX/XX%/XX.Xs
3569 $header = "Computer:jobs running/jobs completed/%of started jobs";
3570 $status = $eta .
3571 join(" ",map
3573 if($Global::total_started) {
3574 my $completed = ($Global::host{$_}->jobs_completed()||0);
3575 my $running = $Global::host{$_}->jobs_running();
3576 my $time = $completed ? (time-$^T)/($completed) : "0";
3577 sprintf("%s:%d/%d/%d%%/%.1fs ",
3578 $workerno{$_}, $running, $completed,
3579 ($running+$completed)*100
3580 / $Global::total_started, $time);
3582 } @workers);
3584 if(length $status > $termcols) {
3585 # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX/XX%
3586 $header = "Computer:jobs running/jobs completed/%of started jobs";
3587 $status = $eta .
3588 join(" ",map
3590 if($Global::total_started) {
3591 sprintf("%s:%d/%d/%d%%",
3592 $sshlogin{$_},
3593 $Global::host{$_}->jobs_running(),
3594 ($Global::host{$_}->jobs_completed()||0),
3595 ($Global::host{$_}->jobs_running()+
3596 ($Global::host{$_}->jobs_completed()||0))*100
3597 / $Global::total_started)
3600 @workers);
3602 if(length $status > $termcols) {
3603 # 1:XX/XX/XX% 2:XX/XX/XX% 3:XX/XX/XX% 4:XX/XX/XX% 5:XX/XX/XX% 6:XX/XX/XX%
3604 $header = "Computer:jobs running/jobs completed/%of started jobs";
3605 $status = $eta .
3606 join(" ",map
3608 if($Global::total_started) {
3609 sprintf("%s:%d/%d/%d%%",
3610 $workerno{$_},
3611 $Global::host{$_}->jobs_running(),
3612 ($Global::host{$_}->jobs_completed()||0),
3613 ($Global::host{$_}->jobs_running()+
3614 ($Global::host{$_}->jobs_completed()||0))*100
3615 / $Global::total_started)
3618 @workers);
3620 if(length $status > $termcols) {
3621 # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX sshlogin4:XX/XX
3622 $header = "Computer:jobs running/jobs completed";
3623 $status = $eta .
3624 join(" ",map
3625 { sprintf("%s:%d/%d",
3626 $sshlogin{$_}, $Global::host{$_}->jobs_running(),
3627 ($Global::host{$_}->jobs_completed()||0)) }
3628 @workers);
3630 if(length $status > $termcols) {
3631 # sshlogin1:XX/XX sshlogin2:XX/XX sshlogin3:XX/XX sshlogin4:XX/XX
3632 $header = "Computer:jobs running/jobs completed";
3633 $status = $eta .
3634 join(" ",map
3635 { sprintf("%s:%d/%d",
3636 $sshlogin{$_}, $Global::host{$_}->jobs_running(),
3637 ($Global::host{$_}->jobs_completed()||0)) }
3638 @workers);
3640 if(length $status > $termcols) {
3641 # 1:XX/XX 2:XX/XX 3:XX/XX 4:XX/XX 5:XX/XX 6:XX/XX
3642 $header = "Computer:jobs running/jobs completed";
3643 $status = $eta .
3644 join(" ",map
3645 { sprintf("%s:%d/%d",
3646 $workerno{$_}, $Global::host{$_}->jobs_running(),
3647 ($Global::host{$_}->jobs_completed()||0)) }
3648 @workers);
3650 if(length $status > $termcols) {
3651 # sshlogin1:XX sshlogin2:XX sshlogin3:XX sshlogin4:XX sshlogin5:XX
3652 $header = "Computer:jobs completed";
3653 $status = $eta .
3654 join(" ",map
3655 { sprintf("%s:%d",
3656 $sshlogin{$_},
3657 ($Global::host{$_}->jobs_completed()||0)) }
3658 @workers);
3660 if(length $status > $termcols) {
3661 # 1:XX 2:XX 3:XX 4:XX 5:XX 6:XX
3662 $header = "Computer:jobs completed";
3663 $status = $eta .
3664 join(" ",map
3665 { sprintf("%s:%d",
3666 $workerno{$_},
3667 ($Global::host{$_}->jobs_completed()||0)) }
3668 @workers);
3670 return ("workerlist" => $workerlist, "header" => $header, "status" => $status);
3675 my ($first_completed, $smoothed_avg_time, $last_eta);
3677 sub compute_eta {
3678 # Calculate important numbers for ETA
3679 # Returns:
3680 # $total = number of jobs in total
3681 # $completed = number of jobs completed
3682 # $left = number of jobs left
3683 # $pctcomplete = percent of jobs completed
3684 # $avgtime = averaged time
3685 # $eta = smoothed eta
3686 my $completed = $Global::total_completed;
3687 # In rare cases with -X will $completed > total_jobs()
3688 my $total = ::max($Global::JobQueue->total_jobs(),$completed);
3689 my $left = $total - $completed;
3690 if(not $completed) {
3691 return($total, $completed, $left, 0, 0, 0);
3693 my $pctcomplete = ::min($completed / $total,100);
3694 $first_completed ||= time;
3695 my $timepassed = (time - $first_completed);
3696 my $avgtime = $timepassed / $completed;
3697 $smoothed_avg_time ||= $avgtime;
3698 # Smooth the eta so it does not jump wildly
3699 $smoothed_avg_time = (1 - $pctcomplete) * $smoothed_avg_time +
3700 $pctcomplete * $avgtime;
3701 my $eta = int($left * $smoothed_avg_time);
3702 if($eta*0.90 < $last_eta and $last_eta < $eta) {
3703 # Eta jumped less that 10% up: Keep the last eta instead
3704 $eta = $last_eta;
3705 } else {
3706 $last_eta = $eta;
3708 return($total, $completed, $left, $pctcomplete, $avgtime, $eta);
3713 my ($rev,$reset);
3715 sub bar() {
3716 # Return:
3717 # $status = bar with eta, completed jobs, arg and pct
3718 $rev ||= "\033[7m";
3719 $reset ||= "\033[0m";
3720 my($total, $completed, $left, $pctcomplete, $avgtime, $eta) =
3721 compute_eta();
3722 my $arg = $Global::newest_job ?
3723 $Global::newest_job->{'commandline'}->
3724 replace_placeholders(["\257<\257>"],0,0) : "";
3725 # These chars mess up display in the terminal
3726 $arg =~ tr/[\011-\016\033\302-\365]//d;
3727 my $eta_dhms = ::seconds_to_time_units($eta);
3728 my $bar_text =
3729 sprintf("%d%% %d:%d=%s %s",
3730 $pctcomplete*100, $completed, $left, $eta_dhms, $arg);
3731 my $terminal_width = terminal_columns();
3732 my $s = sprintf("%-${terminal_width}s",
3733 substr($bar_text." "x$terminal_width,
3734 0,$terminal_width));
3735 my $width = int($terminal_width * $pctcomplete);
3736 substr($s,$width,0) = $reset;
3737 my $zenity = sprintf("%-${terminal_width}s",
3738 substr("# $eta sec $arg",
3739 0,$terminal_width));
3740 $s = "\r" . $zenity . "\r" . $pctcomplete*100 . # Prefix with zenity header
3741 "\r" . $rev . $s . $reset;
3742 return $s;
3747 my ($columns,$last_column_time);
3749 sub terminal_columns() {
3750 # Get the number of columns of the terminal.
3751 # Only update once per second.
3752 # Returns:
3753 # number of columns of the screen
3754 if(not $columns or $last_column_time < time) {
3755 $last_column_time = time;
3756 $columns = $ENV{'COLUMNS'};
3757 if(not $columns) {
3758 my $stty = ::qqx("stty -a </dev/tty");
3759 # FreeBSD/OpenBSD/NetBSD/Dragonfly/MirOS
3760 # MacOSX/IRIX/AIX/Tru64
3761 $stty =~ /(\d+) columns/ and do { $columns = $1; };
3762 # GNU/Linux/Solaris
3763 $stty =~ /columns (\d+)/ and do { $columns = $1; };
3764 # Solaris-x86/HPUX/SCOsysV/UnixWare/OpenIndiana
3765 $stty =~ /columns = (\d+)/ and do { $columns = $1; };
3766 # QNX
3767 $stty =~ /rows=\d+,(\d+)/ and do { $columns = $1; };
3769 if(not $columns) {
3770 my $resize = ::qqx("resize");
3771 $resize =~ /COLUMNS=(\d+);/ and do { $columns = $1; };
3773 $columns ||= 80;
3775 return $columns;
3779 # Prototype forwarding
3780 sub get_job_with_sshlogin($);
3781 sub get_job_with_sshlogin($) {
3782 # Input:
3783 # $sshlogin = which host should the job be run on?
3784 # Uses:
3785 # $opt::hostgroups
3786 # $Global::JobQueue
3787 # Returns:
3788 # $job = next job object for $sshlogin if any available
3789 my $sshlogin = shift;
3790 my $job;
3792 if ($opt::hostgroups) {
3793 my @other_hostgroup_jobs = ();
3795 while($job = $Global::JobQueue->get()) {
3796 if($sshlogin->in_hostgroups($job->hostgroups())) {
3797 # Found a job to be run on a hostgroup of this
3798 # $sshlogin
3799 last;
3800 } else {
3801 # This job was not in the hostgroups of $sshlogin
3802 push @other_hostgroup_jobs, $job;
3805 $Global::JobQueue->unget(@other_hostgroup_jobs);
3806 if(not defined $job) {
3807 # No more jobs
3808 return undef;
3810 } else {
3811 $job = $Global::JobQueue->get();
3812 if(not defined $job) {
3813 # No more jobs
3814 ::debug("start", "No more jobs: JobQueue empty\n");
3815 return undef;
3818 $job->set_sshlogin($sshlogin);
3819 if($opt::retries and $job->failed_here()) {
3820 # This command with these args failed for this sshlogin
3821 my ($no_of_failed_sshlogins,$min_failures) = $job->min_failed();
3822 # Only look at the Global::host that have > 0 jobslots
3823 if($no_of_failed_sshlogins ==
3824 grep { $_->max_jobs_running() > 0 } values %Global::host
3825 and $job->failed_here() == $min_failures) {
3826 # It failed the same or more times on another host:
3827 # run it on this host
3828 } else {
3829 # If it failed fewer times on another host:
3830 # Find another job to run
3831 my $nextjob;
3832 if(not $Global::JobQueue->empty()) {
3833 # This can potentially recurse for all args
3834 no warnings 'recursion';
3835 $nextjob = get_job_with_sshlogin($sshlogin);
3837 # Push the command back on the queue
3838 $Global::JobQueue->unget($job);
3839 return $nextjob;
3842 return $job;
3846 sub __REMOTE_SSH__() {}
3849 sub read_sshloginfiles(@) {
3850 # Read a list of --slf's
3851 # Input:
3852 # @files = files or symbolic file names to read
3853 # Returns: N/A
3854 for my $s (@_) {
3855 read_sshloginfile(expand_slf_shorthand($s));
3859 sub expand_slf_shorthand($) {
3860 # Expand --slf shorthand into a read file name
3861 # Input:
3862 # $file = file or symbolic file name to read
3863 # Returns:
3864 # $file = actual file name to read
3865 my $file = shift;
3866 if($file eq "-") {
3867 # skip: It is stdin
3868 } elsif($file eq "..") {
3869 $file = $Global::config_dir."/sshloginfile";
3870 } elsif($file eq ".") {
3871 $file = "/etc/parallel/sshloginfile";
3872 } elsif(not -r $file) {
3873 for(@Global::config_dirs) {
3874 if(not -r $_."/".$file) {
3875 # Try prepending $PARALLEL_HOME
3876 ::error("Cannot open $file.");
3877 ::wait_and_exit(255);
3878 } else {
3879 $file = $_."/".$file;
3880 last;
3884 return $file;
3887 sub read_sshloginfile($) {
3888 # Read sshloginfile into @Global::sshlogin
3889 # Input:
3890 # $file = file to read
3891 # Uses:
3892 # @Global::sshlogin
3893 # Returns: N/A
3894 local $/ = "\n";
3895 my $file = shift;
3896 my $close = 1;
3897 my $in_fh;
3898 ::debug("init","--slf ",$file);
3899 if($file eq "-") {
3900 $in_fh = *STDIN;
3901 $close = 0;
3902 } else {
3903 if(not open($in_fh, "<", $file)) {
3904 # Try the filename
3905 ::error("Cannot open $file.");
3906 ::wait_and_exit(255);
3909 while(<$in_fh>) {
3910 chomp;
3911 /^\s*#/ and next;
3912 /^\s*$/ and next;
3913 push @Global::sshlogin, $_;
3915 if($close) {
3916 close $in_fh;
3920 sub parse_sshlogin() {
3921 # Parse @Global::sshlogin into %Global::host.
3922 # Keep only hosts that are in one of the given ssh hostgroups.
3923 # Uses:
3924 # @Global::sshlogin
3925 # $Global::minimal_command_line_length
3926 # %Global::host
3927 # $opt::transfer
3928 # @opt::return
3929 # $opt::cleanup
3930 # @opt::basefile
3931 # @opt::trc
3932 # Returns: N/A
3933 my @login;
3934 if(not @Global::sshlogin) { @Global::sshlogin = (":"); }
3935 for my $sshlogin (@Global::sshlogin) {
3936 # Split up -S sshlogin,sshlogin
3937 for my $s (split /,|\n/, $sshlogin) {
3938 if ($s eq ".." or $s eq "-") {
3939 # This may add to @Global::sshlogin - possibly bug
3940 read_sshloginfile(expand_slf_shorthand($s));
3941 } else {
3942 $s =~ s/\s*$//;
3943 push (@login, $s);
3947 $Global::minimal_command_line_length = 100_000_000;
3948 my @allowed_hostgroups;
3949 for my $ncpu_sshlogin_string (::uniq(@login)) {
3950 my $sshlogin = SSHLogin->new($ncpu_sshlogin_string);
3951 my $sshlogin_string = $sshlogin->string();
3952 if($sshlogin_string eq "") {
3953 # This is an ssh group: -S @webservers
3954 push @allowed_hostgroups, $sshlogin->hostgroups();
3955 next;
3957 if($Global::host{$sshlogin_string}) {
3958 # This sshlogin has already been added:
3959 # It is probably a host that has come back
3960 # Set the max_jobs_running back to the original
3961 debug("run","Already seen $sshlogin_string\n");
3962 if($sshlogin->{'ncpus'}) {
3963 # If ncpus set by '#/' of the sshlogin, overwrite it:
3964 $Global::host{$sshlogin_string}->set_ncpus($sshlogin->ncpus());
3966 $Global::host{$sshlogin_string}->set_max_jobs_running(undef);
3967 next;
3969 $sshlogin->set_maxlength(Limits::Command::max_length());
3971 $Global::minimal_command_line_length =
3972 ::min($Global::minimal_command_line_length, $sshlogin->maxlength());
3973 $Global::host{$sshlogin_string} = $sshlogin;
3975 if(@allowed_hostgroups) {
3976 # Remove hosts that are not in these groups
3977 while (my ($string, $sshlogin) = each %Global::host) {
3978 if(not $sshlogin->in_hostgroups(@allowed_hostgroups)) {
3979 delete $Global::host{$string};
3984 # debug("start", "sshlogin: ", my_dump(%Global::host),"\n");
3985 if(@Global::transfer_files or @opt::return or $opt::cleanup or @opt::basefile) {
3986 if(not remote_hosts()) {
3987 # There are no remote hosts
3988 if(@opt::trc) {
3989 ::warning("--trc ignored as there are no remote --sshlogin.");
3990 } elsif (defined $opt::transfer) {
3991 ::warning("--transfer ignored as there are no remote --sshlogin.");
3992 } elsif (@opt::transfer_files) {
3993 ::warning("--transferfile ignored as there are no remote --sshlogin.");
3994 } elsif (@opt::return) {
3995 ::warning("--return ignored as there are no remote --sshlogin.");
3996 } elsif (defined $opt::cleanup) {
3997 ::warning("--cleanup ignored as there are no remote --sshlogin.");
3998 } elsif (@opt::basefile) {
3999 ::warning("--basefile ignored as there are no remote --sshlogin.");
4005 sub remote_hosts() {
4006 # Return sshlogins that are not ':'
4007 # Uses:
4008 # %Global::host
4009 # Returns:
4010 # list of sshlogins with ':' removed
4011 return grep !/^:$/, keys %Global::host;
4014 sub setup_basefile() {
4015 # Transfer basefiles to each $sshlogin
4016 # This needs to be done before first jobs on $sshlogin is run
4017 # Uses:
4018 # %Global::host
4019 # @opt::basefile
4020 # Returns: N/A
4021 my @cmd;
4022 my $rsync_destdir;
4023 my $workdir;
4024 for my $sshlogin (values %Global::host) {
4025 if($sshlogin->string() eq ":") { next }
4026 for my $file (@opt::basefile) {
4027 if($file !~ m:^/: and $opt::workdir eq "...") {
4028 ::error("Work dir '...' will not work with relative basefiles.");
4029 ::wait_and_exit(255);
4031 if(not $workdir) {
4032 my $dummycmdline = CommandLine->new(1,["true"],{},0,0,[],[],{},{},{});
4033 my $dummyjob = Job->new($dummycmdline);
4034 $workdir = $dummyjob->workdir();
4036 push @cmd, $sshlogin->rsync_transfer_cmd($file,$workdir);
4039 debug("init", "basesetup: @cmd\n");
4040 my ($exitstatus,$stdout_ref,$stderr_ref) =
4041 run_gnu_parallel((join "\n",@cmd),"-j0","--retries",5);
4042 if($exitstatus) {
4043 my @stdout = @$stdout_ref;
4044 my @stderr = @$stderr_ref;
4045 ::error("Copying of --basefile failed: @stdout@stderr");
4046 ::wait_and_exit(255);
4050 sub cleanup_basefile() {
4051 # Remove the basefiles transferred
4052 # Uses:
4053 # %Global::host
4054 # @opt::basefile
4055 # Returns: N/A
4056 my @cmd;
4057 my $workdir;
4058 if(not $workdir) {
4059 my $dummycmdline = CommandLine->new(1,"true",0,0,0,0,0,{},{},{});
4060 my $dummyjob = Job->new($dummycmdline);
4061 $workdir = $dummyjob->workdir();
4063 for my $sshlogin (values %Global::host) {
4064 if($sshlogin->string() eq ":") { next }
4065 for my $file (@opt::basefile) {
4066 push @cmd, $sshlogin->cleanup_cmd($file,$workdir);
4069 debug("init", "basecleanup: @cmd\n");
4070 my ($exitstatus,$stdout_ref,$stderr_ref) =
4071 run_gnu_parallel(join("\n",@cmd),"-j0","--retries",5);
4072 if($exitstatus) {
4073 my @stdout = @$stdout_ref;
4074 my @stderr = @$stderr_ref;
4075 ::error("Cleanup of --basefile failed: @stdout@stderr");
4076 ::wait_and_exit(255);
4080 sub run_gnu_parallel() {
4081 my ($stdin,@args) = @_;
4082 my $cmd = join "",map { " $_ & " } split /\n/, $stdin;
4083 print $Global::original_stderr ` $cmd wait` ;
4084 return 0
4087 sub _run_gnu_parallel() {
4088 # Run GNU Parallel
4089 # This should ideally just fork an internal copy
4090 # and not start it through a shell
4091 # Input:
4092 # $stdin = data to provide on stdin for GNU Parallel
4093 # @args = command line arguments
4094 # Returns:
4095 # $exitstatus = exitcode of GNU Parallel run
4096 # \@stdout = standard output
4097 # \@stderr = standard error
4098 my ($stdin,@args) = @_;
4099 my ($exitstatus,@stdout,@stderr);
4100 my ($stdin_fh,$stdout_fh)=(gensym(),gensym());
4101 my ($stderr_fh, $stderrname) = ::tmpfile(SUFFIX => ".par");
4102 unlink $stderrname;
4104 my $pid = ::open3($stdin_fh,$stdout_fh,$stderr_fh,
4105 $0,qw(--plain --shell /bin/sh --will-cite), @args);
4106 if(my $writerpid = fork()) {
4107 close $stdin_fh;
4108 @stdout = <$stdout_fh>;
4109 # Now stdout is closed:
4110 # These pids should be dead or die very soon
4111 while(kill 0, $writerpid) { ::usleep(1); }
4112 die;
4113 # reap $writerpid;
4114 # while(kill 0, $pid) { ::usleep(1); }
4115 # reap $writerpid;
4116 $exitstatus = $?;
4117 seek $stderr_fh, 0, 0;
4118 @stderr = <$stderr_fh>;
4119 close $stdout_fh;
4120 close $stderr_fh;
4121 } else {
4122 close $stdout_fh;
4123 close $stderr_fh;
4124 print $stdin_fh $stdin;
4125 close $stdin_fh;
4126 exit(0);
4128 return ($exitstatus,\@stdout,\@stderr);
4131 sub filter_hosts() {
4132 # Remove down --sshlogins from active duty.
4133 # Find ncpus, ncores, maxlen, time-to-login for each host.
4134 # Uses:
4135 # %Global::host
4136 # $Global::minimal_command_line_length
4137 # $opt::use_sockets_instead_of_threads
4138 # $opt::use_cores_instead_of_threads
4139 # $opt::use_cpus_instead_of_cores
4140 # Returns: N/A
4142 my ($nsockets_ref,$ncores_ref, $nthreads_ref, $time_to_login_ref,
4143 $maxlen_ref, $echo_ref, $down_hosts_ref) =
4144 parse_host_filtering(parallelized_host_filtering());
4146 delete @Global::host{@$down_hosts_ref};
4147 @$down_hosts_ref and ::warning("Removed @$down_hosts_ref.");
4149 $Global::minimal_command_line_length = 100_000_000;
4150 while (my ($sshlogin, $obj) = each %Global::host) {
4151 if($sshlogin eq ":") { next }
4152 $nsockets_ref->{$sshlogin} or
4153 ::die_bug("nsockets missing: ".$obj->serverlogin());
4154 $ncores_ref->{$sshlogin} or
4155 ::die_bug("ncores missing: ".$obj->serverlogin());
4156 $nthreads_ref->{$sshlogin} or
4157 ::die_bug("nthreads missing: ".$obj->serverlogin());
4158 $time_to_login_ref->{$sshlogin} or
4159 ::die_bug("time_to_login missing: ".$obj->serverlogin());
4160 $maxlen_ref->{$sshlogin} or
4161 ::die_bug("maxlen missing: ".$obj->serverlogin());
4162 $obj->set_ncpus($nthreads_ref->{$sshlogin});
4163 if($opt::use_cpus_instead_of_cores) {
4164 $obj->set_ncpus($ncores_ref->{$sshlogin});
4165 } elsif($opt::use_sockets_instead_of_threads) {
4166 $obj->set_ncpus($nsockets_ref->{$sshlogin});
4167 } elsif($opt::use_cores_instead_of_threads) {
4168 $obj->set_ncpus($ncores_ref->{$sshlogin});
4170 $obj->set_time_to_login($time_to_login_ref->{$sshlogin});
4171 $obj->set_maxlength($maxlen_ref->{$sshlogin});
4172 $Global::minimal_command_line_length =
4173 ::min($Global::minimal_command_line_length,
4174 int($maxlen_ref->{$sshlogin}/2));
4175 ::debug("init", "Timing from -S:$sshlogin ",
4176 " nsockets:",$nsockets_ref->{$sshlogin},
4177 " ncores:", $ncores_ref->{$sshlogin},
4178 " nthreads:",$nthreads_ref->{$sshlogin},
4179 " time_to_login:", $time_to_login_ref->{$sshlogin},
4180 " maxlen:", $maxlen_ref->{$sshlogin},
4181 " min_max_len:", $Global::minimal_command_line_length,"\n");
4185 sub parse_host_filtering() {
4186 # Input:
4187 # @lines = output from parallelized_host_filtering()
4188 # Returns:
4189 # \%nsockets = number of sockets of {host}
4190 # \%ncores = number of cores of {host}
4191 # \%nthreads = number of hyperthreaded cores of {host}
4192 # \%time_to_login = time_to_login on {host}
4193 # \%maxlen = max command len on {host}
4194 # \%echo = echo received from {host}
4195 # \@down_hosts = list of hosts with no answer
4196 local $/ = "\n";
4197 my (%nsockets, %ncores, %nthreads, %time_to_login, %maxlen, %echo,
4198 @down_hosts);
4199 for (@_) {
4200 ::debug("init","Read: ",$_);
4201 chomp;
4202 my @col = split /\t/, $_;
4203 if($col[0] =~ /^parallel: Warning:/) {
4204 # Timed out job: Ignore it
4205 next;
4206 } elsif(defined $col[6]) {
4207 # This is a line from --joblog
4208 # seq host time spent sent received exit signal command
4209 # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores
4210 if($col[0] eq "Seq" and $col[1] eq "Host" and
4211 $col[2] eq "Starttime") {
4212 # Header => skip
4213 next;
4215 # Get server from: eval true server\;
4216 $col[8] =~ /eval .?true.?\s([^\;]+);/ or
4217 ::die_bug("col8 does not contain host: $col[8]");
4218 my $host = $1;
4219 $host =~ tr/\\//d;
4220 $Global::host{$host} or next;
4221 if($col[6] eq "255" or $col[6] eq "-1" or $col[6] eq "1") {
4222 # exit == 255 or exit == timeout (-1): ssh failed/timedout
4223 # exit == 1: lsh failed
4224 # Remove sshlogin
4225 ::debug("init", "--filtered $host\n");
4226 push(@down_hosts, $host);
4227 } elsif($col[6] eq "127") {
4228 # signal == 127: parallel not installed remote
4229 # Set nsockets, ncores, nthreads = 1
4230 ::warning("Could not figure out ".
4231 "number of cpus on $host. Using 1.");
4232 $nsockets{$host} = 1;
4233 $ncores{$host} = 1;
4234 $nthreads{$host} = 1;
4235 $maxlen{$host} = Limits::Command::max_length();
4236 } elsif($col[0] =~ /^\d+$/ and $Global::host{$host}) {
4237 # Remember how log it took to log in
4238 # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ echo
4239 $time_to_login{$host} = ::min($time_to_login{$host},$col[3]);
4240 } else {
4241 ::die_bug("host check unmatched long jobline: $_");
4243 } elsif($Global::host{$col[0]}) {
4244 # This output from --number-of-cores, --number-of-cpus,
4245 # --max-line-length-allowed
4246 # ncores: server 8
4247 # ncpus: server 2
4248 # maxlen: server 131071
4249 if(/parallel: Warning: Cannot figure out number of/) {
4250 next;
4252 if(not $nsockets{$col[0]}) {
4253 $nsockets{$col[0]} = $col[1];
4254 } elsif(not $ncores{$col[0]}) {
4255 $ncores{$col[0]} = $col[1];
4256 } elsif(not $nthreads{$col[0]}) {
4257 $nthreads{$col[0]} = $col[1];
4258 } elsif(not $maxlen{$col[0]}) {
4259 $maxlen{$col[0]} = $col[1];
4260 } elsif(not $echo{$col[0]}) {
4261 $echo{$col[0]} = $col[1];
4262 } elsif(m/perl: warning:|LANGUAGE =|LC_ALL =|LANG =|are supported and installed/) {
4263 # Skip these:
4264 # perl: warning: Setting locale failed.
4265 # perl: warning: Please check that your locale settings:
4266 # LANGUAGE = (unset),
4267 # LC_ALL = (unset),
4268 # LANG = "en_US.UTF-8"
4269 # are supported and installed on your system.
4270 # perl: warning: Falling back to the standard locale ("C").
4271 } else {
4272 ::die_bug("host check too many col0: $_");
4274 } else {
4275 ::die_bug("host check unmatched short jobline ($col[0]): $_");
4278 @down_hosts = uniq(@down_hosts);
4279 return(\%nsockets, \%ncores, \%nthreads, \%time_to_login,
4280 \%maxlen, \%echo, \@down_hosts);
4283 sub parallelized_host_filtering() {
4284 # Uses:
4285 # %Global::host
4286 # Returns:
4287 # text entries with:
4288 # * joblog line
4289 # * hostname \t number of cores
4290 # * hostname \t number of cpus
4291 # * hostname \t max-line-length-allowed
4292 # * hostname \t empty
4294 sub sshwrapped {
4295 # Wrap with ssh and --env
4296 my $sshlogin = shift;
4297 my $command = shift;
4298 my $commandline = CommandLine->new(1,[$command],{},0,0,[],[],{},{},{});
4299 my $job = Job->new($commandline);
4300 $job->set_sshlogin($sshlogin);
4301 $job->wrapped();
4302 return($job->{'wrapped'});
4305 my(@sockets, @cores, @threads, @maxline, @echo);
4306 while (my ($host, $sshlogin) = each %Global::host) {
4307 if($host eq ":") { next }
4308 # The 'true' is used to get the $host out later
4309 push(@sockets, $host."\t"."true $host; ".
4310 sshwrapped($sshlogin,"parallel --number-of-sockets")."\n\0");
4311 push(@cores, $host."\t"."true $host; ".
4312 sshwrapped($sshlogin,"parallel --number-of-cores")."\n\0");
4313 push(@threads, $host."\t"."true $host; ".
4314 sshwrapped($sshlogin,"parallel --number-of-threads")."\n\0");
4315 push(@maxline, $host."\t"."true $host; ".
4316 sshwrapped($sshlogin,"parallel --max-line-length-allowed")."\n\0");
4317 # 'echo' is used to get the fastest possible ssh login time
4318 my $sshcmd = "true $host; exec " .$sshlogin->sshcommand()." ".
4319 $sshlogin->serverlogin();
4320 push(@echo, $host."\t".$sshcmd." -- echo\n\0");
4323 # --timeout 10: Setting up an SSH connection and running a simple
4324 # command should never take > 10 sec.
4325 # --delay 0.1: If multiple sshlogins use the same proxy the delay
4326 # will make it less likely to overload the ssh daemon.
4327 # --retries 3: If the ssh daemon is overloaded, try 3 times
4328 my $cmd =
4329 "$0 -j0 --timeout 10 --joblog - --plain --delay 0.1 --retries 3 ".
4330 "--tag --tagstring '{1}' -0 --colsep '\t' -k eval '{2}' && true ";
4331 $cmd = $Global::shell." -c ".Q($cmd);
4332 ::debug("init", $cmd, "\n");
4333 my @out;
4334 my $prepend = "";
4336 my ($host_fh,$in,$err);
4337 open3($in, $host_fh, $err, $cmd) || ::die_bug("parallel host check: $cmd");
4338 if(not fork()) {
4339 # Give the commands to run to the $cmd
4340 close $host_fh;
4341 print $in @sockets, @cores, @threads, @maxline, @echo;
4342 close $in;
4343 exit();
4345 close $in;
4346 for(<$host_fh>) {
4347 # TODO incompatible with '-quoting. Needs to be fixed differently
4348 #if(/\'$/) {
4349 # # if last char = ' then append next line
4350 # # This may be due to quoting of \n in environment var
4351 # $prepend .= $_;
4352 # next;
4354 $_ = $prepend . $_;
4355 $prepend = "";
4356 push @out, $_;
4358 close $host_fh;
4359 return @out;
4362 sub onall($@) {
4363 # Runs @command on all hosts.
4364 # Uses parallel to run @command on each host.
4365 # --jobs = number of hosts to run on simultaneously.
4366 # For each host a parallel command with the args will be running.
4367 # Uses:
4368 # $Global::quoting
4369 # @opt::basefile
4370 # $opt::jobs
4371 # $opt::linebuffer
4372 # $opt::ungroup
4373 # $opt::group
4374 # $opt::keeporder
4375 # $opt::D
4376 # $opt::plain
4377 # $opt::max_chars
4378 # $opt::linebuffer
4379 # $opt::files
4380 # $opt::colsep
4381 # $opt::timeout
4382 # $opt::plain
4383 # $opt::retries
4384 # $opt::max_chars
4385 # $opt::arg_sep
4386 # $opt::arg_file_sep
4387 # @opt::v
4388 # @opt::env
4389 # %Global::host
4390 # $Global::exitstatus
4391 # $Global::debug
4392 # $Global::joblog
4393 # $opt::joblog
4394 # $opt::tag
4395 # $opt::tee
4396 # Input:
4397 # @command = command to run on all hosts
4398 # Returns: N/A
4399 sub tmp_joblog {
4400 # Input:
4401 # $joblog = filename of joblog - undef if none
4402 # Returns:
4403 # $tmpfile = temp file for joblog - undef if none
4404 my $joblog = shift;
4405 if(not defined $joblog) {
4406 return undef;
4408 my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".log");
4409 close $fh;
4410 return $tmpfile;
4412 my ($input_source_fh_ref,@command) = @_;
4413 if($Global::quoting) {
4414 @command = shell_quote(@command);
4417 # Copy all @input_source_fh (-a and :::) into tempfiles
4418 my @argfiles = ();
4419 for my $fh (@$input_source_fh_ref) {
4420 my ($outfh, $name) = ::tmpfile(SUFFIX => ".all", UNLINK => not $opt::D);
4421 print $outfh (<$fh>);
4422 close $outfh;
4423 push @argfiles, $name;
4425 if(@opt::basefile) { setup_basefile(); }
4426 # for each sshlogin do:
4427 # parallel -S $sshlogin $command :::: @argfiles
4429 # Pass some of the options to the sub-parallels, not all of them as
4430 # -P should only go to the first, and -S should not be copied at all.
4431 my $options =
4432 join(" ",
4433 ((defined $opt::memfree) ? "--memfree ".$opt::memfree : ""),
4434 ((defined $opt::D) ? "-D $opt::D" : ""),
4435 ((defined $opt::group) ? "-g" : ""),
4436 ((defined $opt::jobs) ? "-P $opt::jobs" : ""),
4437 ((defined $opt::keeporder) ? "--keeporder" : ""),
4438 ((defined $opt::linebuffer) ? "--linebuffer" : ""),
4439 ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""),
4440 ((defined $opt::plain) ? "--plain" : ""),
4441 ((defined $opt::ungroup) ? "-u" : ""),
4442 ((defined $opt::tee) ? "--tee" : ""),
4444 my $suboptions =
4445 join(" ",
4446 ((defined $opt::D) ? "-D $opt::D" : ""),
4447 ((defined $opt::arg_file_sep) ? "--arg-file-sep ".$opt::arg_file_sep : ""),
4448 ((defined $opt::arg_sep) ? "--arg-sep ".$opt::arg_sep : ""),
4449 ((defined $opt::colsep) ? "--colsep ".shell_quote($opt::colsep) : ""),
4450 ((defined $opt::files) ? "--files" : ""),
4451 ((defined $opt::group) ? "-g" : ""),
4452 ((defined $opt::cleanup) ? "--cleanup" : ""),
4453 ((defined $opt::keeporder) ? "--keeporder" : ""),
4454 ((defined $opt::linebuffer) ? "--linebuffer" : ""),
4455 ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""),
4456 ((defined $opt::plain) ? "--plain" : ""),
4457 ((defined $opt::retries) ? "--retries ".$opt::retries : ""),
4458 ((defined $opt::timeout) ? "--timeout ".$opt::timeout : ""),
4459 ((defined $opt::ungroup) ? "-u" : ""),
4460 ((defined $opt::tee) ? "--tee" : ""),
4461 ((defined $opt::workdir) ? "--wd ".Q($opt::workdir) : ""),
4462 (@Global::transfer_files ? map { "--tf ".Q($_) }
4463 @Global::transfer_files : ""),
4464 (@Global::ret_files ? map { "--return ".Q($_) }
4465 @Global::ret_files : ""),
4466 (@opt::env ? map { "--env ".Q($_) } @opt::env : ""),
4467 (map { "-v" } @opt::v),
4469 ::debug("init", "| $0 $options\n");
4470 open(my $parallel_fh, "|-", "$0 -0 --will-cite -j0 $options") ||
4471 ::die_bug("This does not run GNU Parallel: $0 $options");
4472 my @joblogs;
4473 for my $host (sort keys %Global::host) {
4474 my $sshlogin = $Global::host{$host};
4475 my $joblog = tmp_joblog($opt::joblog);
4476 if($joblog) {
4477 push @joblogs, $joblog;
4478 $joblog = "--joblog $joblog";
4480 my $quad = $opt::arg_file_sep || "::::";
4481 # If PARALLEL_ENV is set: Pass it on
4482 my $penv=$Global::parallel_env ?
4483 "PARALLEL_ENV=".Q($Global::parallel_env) :
4485 ::debug("init", "$penv $0 $suboptions -j1 $joblog ",
4486 ((defined $opt::tag) ?
4487 "--tagstring ".Q($sshlogin->string()) : ""),
4488 " -S ", Q($sshlogin->string())," ",
4489 join(" ",shell_quote(@command))," $quad @argfiles\n");
4490 print $parallel_fh "$penv $0 $suboptions -j1 $joblog ",
4491 ((defined $opt::tag) ?
4492 "--tagstring ".Q($sshlogin->string()) : ""),
4493 " -S ", Q($sshlogin->string())," ",
4494 join(" ",shell_quote(@command))," $quad @argfiles\0";
4496 close $parallel_fh;
4497 $Global::exitstatus = $? >> 8;
4498 debug("init", "--onall exitvalue ", $?);
4499 if(@opt::basefile and $opt::cleanup) { cleanup_basefile(); }
4500 $Global::debug or unlink(@argfiles);
4501 my %seen;
4502 for my $joblog (@joblogs) {
4503 # Append to $joblog
4504 open(my $fh, "<", $joblog) || ::die_bug("Cannot open tmp joblog $joblog");
4505 # Skip first line (header);
4506 <$fh>;
4507 print $Global::joblog (<$fh>);
4508 close $fh;
4509 unlink($joblog);
4514 sub __SIGNAL_HANDLING__() {}
4517 sub sigtstp() {
4518 # Send TSTP signal (Ctrl-Z) to all children process groups
4519 # Uses:
4520 # %SIG
4521 # Returns: N/A
4522 signal_children("TSTP");
4525 sub sigpipe() {
4526 # Send SIGPIPE signal to all children process groups
4527 # Uses:
4528 # %SIG
4529 # Returns: N/A
4530 signal_children("PIPE");
4533 sub signal_children() {
4534 # Send signal to all children process groups
4535 # and GNU Parallel itself
4536 # Uses:
4537 # %SIG
4538 # Returns: N/A
4539 my $signal = shift;
4540 debug("run", "Sending $signal ");
4541 kill $signal, map { -$_ } keys %Global::running;
4542 # Use default signal handler for GNU Parallel itself
4543 $SIG{$signal} = undef;
4544 kill $signal, $$;
4547 sub save_original_signal_handler() {
4548 # Remember the original signal handler
4549 # Uses:
4550 # %Global::original_sig
4551 # Returns: N/A
4552 $SIG{INT} = sub {
4553 if($opt::tmux) { ::qqx("tmux kill-session -t p$$"); }
4554 wait_and_exit(255);
4556 $SIG{TERM} = sub {
4557 if($opt::tmux) { ::qqx("tmux kill-session -t p$$"); }
4558 wait_and_exit(255);
4560 %Global::original_sig = %SIG;
4561 $SIG{TERM} = sub {}; # Dummy until jobs really start
4562 $SIG{ALRM} = 'IGNORE';
4563 # Allow Ctrl-Z to suspend and `fg` to continue
4564 $SIG{TSTP} = \&sigtstp;
4565 $SIG{PIPE} = \&sigpipe;
4566 $SIG{CONT} = sub {
4567 # Set $SIG{TSTP} again (it is undef'ed in sigtstp() )
4568 $SIG{TSTP} = \&sigtstp;
4569 # Send continue signal to all children process groups
4570 kill "CONT", map { -$_ } keys %Global::running;
4574 sub list_running_jobs() {
4575 # Print running jobs on tty
4576 # Uses:
4577 # %Global::running
4578 # Returns: N/A
4579 for my $job (values %Global::running) {
4580 ::status("$Global::progname: ".$job->replaced());
4584 sub start_no_new_jobs() {
4585 # Start no more jobs
4586 # Uses:
4587 # %Global::original_sig
4588 # %Global::unlink
4589 # $Global::start_no_new_jobs
4590 # Returns: N/A
4591 # $SIG{TERM} = $Global::original_sig{TERM};
4592 unlink keys %Global::unlink;
4593 ::status
4594 ("$Global::progname: SIGHUP received. No new jobs will be started.",
4595 "$Global::progname: Waiting for these ".(keys %Global::running).
4596 " jobs to finish. Send SIGTERM to stop now.");
4597 list_running_jobs();
4598 $Global::start_no_new_jobs ||= 1;
4601 sub reapers() {
4602 # Run reaper until there are no more left
4603 # Returns:
4604 # @pids_reaped = pids of reaped processes
4605 my @pids_reaped;
4606 my $pid;
4607 while($pid = reaper()) {
4608 push @pids_reaped, $pid;
4610 return @pids_reaped;
4613 sub reaper() {
4614 # A job finished:
4615 # * Set exitstatus, exitsignal, endtime.
4616 # * Free ressources for new job
4617 # * Update median runtime
4618 # * Print output
4619 # * If --halt = now: Kill children
4620 # * Print progress
4621 # Uses:
4622 # %Global::running
4623 # $opt::timeout
4624 # $Global::timeoutq
4625 # $opt::keeporder
4626 # $Global::total_running
4627 # Returns:
4628 # $stiff = PID of child finished
4629 my $stiff;
4630 debug("run", "Reaper ");
4631 if(($stiff = waitpid(-1, &WNOHANG)) <= 0) {
4632 # No jobs waiting to be reaped
4633 return 0;
4636 # $stiff = pid of dead process
4637 my $job = $Global::running{$stiff};
4639 # '-a <(seq 10)' will give us a pid not in %Global::running
4640 # The same will one of the ssh -M: ignore
4641 $job or return 0;
4642 delete $Global::running{$stiff};
4643 $Global::total_running--;
4644 if($job->{'commandline'}{'skip'}) {
4645 # $job->skip() was called
4646 $job->set_exitstatus(-2);
4647 $job->set_exitsignal(0);
4648 } else {
4649 $job->set_exitstatus($? >> 8);
4650 $job->set_exitsignal($? & 127);
4653 debug("run", "seq ",$job->seq()," died (", $job->exitstatus(), ")");
4654 $job->set_endtime(::now());
4655 my $sshlogin = $job->sshlogin();
4656 $sshlogin->dec_jobs_running();
4657 if($job->should_be_retried()) {
4658 # Free up file handles
4659 $job->free_ressources();
4660 } else {
4661 # The job is done
4662 $sshlogin->inc_jobs_completed();
4663 # Free the jobslot
4664 $job->free_slot();
4665 if($opt::timeout and not $job->exitstatus()) {
4666 # Update average runtime for timeout only for successful jobs
4667 $Global::timeoutq->update_median_runtime($job->runtime());
4669 if($opt::keeporder) {
4670 $job->print_earlier_jobs();
4671 } else {
4672 $job->print();
4674 if($job->should_we_halt() eq "now") {
4675 # Kill children
4676 ::kill_sleep_seq($job->pid());
4677 ::killall();
4678 ::wait_and_exit($Global::halt_exitstatus);
4681 $job->cleanup();
4683 if($opt::progress) {
4684 my %progress = progress();
4685 ::status_no_nl("\r",$progress{'status'});
4688 debug("run", "done ");
4689 return $stiff;
4693 sub __USAGE__() {}
4696 sub killall() {
4697 # Kill all jobs by killing their process groups
4698 # Uses:
4699 # $Global::start_no_new_jobs = we are stopping
4700 # $Global::killall = Flag to not run reaper
4701 $Global::start_no_new_jobs ||= 1;
4702 # Do not reap killed children: Ignore them instead
4703 $Global::killall ||= 1;
4704 kill_sleep_seq(keys %Global::running);
4707 sub kill_sleep_seq(@) {
4708 # Send jobs TERM,TERM,KILL to processgroups
4709 # Input:
4710 # @pids = list of pids that are also processgroups
4711 # Convert pids to process groups ($processgroup = -$pid)
4712 my @pgrps = map { -$_ } @_;
4713 my @term_seq = split/,/,$opt::termseq;
4714 if(not @term_seq) {
4715 @term_seq = ("TERM",200,"TERM",100,"TERM",50,"KILL",25);
4717 while(@term_seq) {
4718 @pgrps = kill_sleep(shift @term_seq, shift @term_seq, @pgrps);
4722 sub kill_sleep() {
4723 # Kill pids with a signal and wait a while for them to die
4724 # Input:
4725 # $signal = signal to send to @pids
4726 # $sleep_max = number of ms to sleep at most before returning
4727 # @pids = pids to kill (actually process groups)
4728 # Uses:
4729 # $Global::killall = set by killall() to avoid calling reaper
4730 # Returns:
4731 # @pids = pids still alive
4732 my ($signal, $sleep_max, @pids) = @_;
4733 ::debug("kill","kill_sleep $signal ",(join " ",sort @pids),"\n");
4734 kill $signal, @pids;
4735 my $sleepsum = 0;
4736 my $sleep = 0.001;
4738 while(@pids and $sleepsum < $sleep_max) {
4739 if($Global::killall) {
4740 # Killall => don't run reaper
4741 while(waitpid(-1, &WNOHANG) > 0) {
4742 $sleep = $sleep/2+0.001;
4744 } elsif(reapers()) {
4745 $sleep = $sleep/2+0.001;
4747 $sleep *= 1.1;
4748 ::usleep($sleep);
4749 $sleepsum += $sleep;
4750 # Keep only living children
4751 @pids = grep { kill(0, $_) } @pids;
4753 return @pids;
4756 sub wait_and_exit($) {
4757 # If we do not wait, we sometimes get segfault
4758 # Returns: N/A
4759 my $error = shift;
4760 unlink keys %Global::unlink;
4761 if($error) {
4762 # Kill all jobs without printing
4763 killall();
4765 for (keys %Global::unkilled_children) {
4766 # Kill any (non-jobs) children (e.g. reserved processes)
4767 kill 9, $_;
4768 waitpid($_,0);
4769 delete $Global::unkilled_children{$_};
4771 if($Global::unkilled_sqlworker) {
4772 waitpid($Global::unkilled_sqlworker,0);
4774 exit($error);
4777 sub die_usage() {
4778 # Returns: N/A
4779 usage();
4780 wait_and_exit(255);
4783 sub usage() {
4784 # Returns: N/A
4785 print join
4786 ("\n",
4787 "Usage:",
4789 "$Global::progname [options] [command [arguments]] < list_of_arguments",
4790 "$Global::progname [options] [command [arguments]] (::: arguments|:::: argfile(s))...",
4791 "cat ... | $Global::progname --pipe [options] [command [arguments]]",
4793 "-j n Run n jobs in parallel",
4794 "-k Keep same order",
4795 "-X Multiple arguments with context replace",
4796 "--colsep regexp Split input on regexp for positional replacements",
4797 "{} {.} {/} {/.} {#} {%} {= perl code =} Replacement strings",
4798 "{3} {3.} {3/} {3/.} {=3 perl code =} Positional replacement strings",
4799 "With --plus: {} = {+/}/{/} = {.}.{+.} = {+/}/{/.}.{+.} = {..}.{+..} =",
4800 " {+/}/{/..}.{+..} = {...}.{+...} = {+/}/{/...}.{+...}",
4802 "-S sshlogin Example: foo\@server.example.com",
4803 "--slf .. Use ~/.parallel/sshloginfile as the list of sshlogins",
4804 "--trc {}.bar Shorthand for --transfer --return {}.bar --cleanup",
4805 "--onall Run the given command with argument on all sshlogins",
4806 "--nonall Run the given command with no arguments on all sshlogins",
4808 "--pipe Split stdin (standard input) to multiple jobs.",
4809 "--recend str Record end separator for --pipe.",
4810 "--recstart str Record start separator for --pipe.",
4812 "See 'man $Global::progname' for details",
4814 "Academic tradition requires you to cite works you base your article on.",
4815 "If you use programs that use GNU Parallel to process data for an article in a",
4816 "scientific publication, please cite:",
4818 " O. Tange (2018): GNU Parallel 2018, Mar 2018, ISBN 9781387509881,",
4819 " DOI https://doi.org/10.5281/zenodo.1146014",
4821 # Before changing this line, please read
4822 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
4823 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
4824 "This helps funding further development; AND IT WON'T COST YOU A CENT.",
4825 "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
4827 "",);
4830 sub citation_notice() {
4831 # if --will-cite or --plain: do nothing
4832 # if stderr redirected: do nothing
4833 # if $PARALLEL_HOME/will-cite: do nothing
4834 # else: print citation notice to stderr
4835 if($opt::willcite
4837 $opt::plain
4839 not -t $Global::original_stderr
4841 grep { -e "$_/will-cite" } @Global::config_dirs) {
4842 # skip
4843 } else {
4844 ::status
4845 ("Academic tradition requires you to cite works you base your article on.",
4846 "If you use programs that use GNU Parallel to process data for an article in a",
4847 "scientific publication, please cite:",
4849 " O. Tange (2018): GNU Parallel 2018, Mar 2018, ISBN 9781387509881,",
4850 " DOI https://doi.org/10.5281/zenodo.1146014",
4852 # Before changing this line, please read
4853 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
4854 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
4855 "This helps funding further development; AND IT WON'T COST YOU A CENT.",
4856 "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
4858 "More about funding GNU Parallel and the citation notice:",
4859 "https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice",
4861 "To silence this citation notice: run 'parallel --citation' once.",
4864 mkdir $Global::config_dir;
4865 # Number of times the user has run GNU Parallel without showing
4866 # willingness to cite
4867 my $runs = 0;
4868 if(open (my $fh, "<", $Global::config_dir.
4869 "/runs-without-willing-to-cite")) {
4870 $runs = <$fh>;
4871 close $fh;
4873 $runs++;
4874 if(open (my $fh, ">", $Global::config_dir.
4875 "/runs-without-willing-to-cite")) {
4876 print $fh $runs;
4877 close $fh;
4878 if($runs >= 10) {
4879 ::status("Come on: You have run parallel $runs times. Isn't it about time ",
4880 "you run 'parallel --citation' once to silence the citation notice?",
4881 "");
4887 sub status(@) {
4888 my @w = @_;
4889 my $fh = $Global::status_fd || *STDERR;
4890 print $fh map { ($_, "\n") } @w;
4891 flush $fh;
4894 sub status_no_nl(@) {
4895 my @w = @_;
4896 my $fh = $Global::status_fd || *STDERR;
4897 print $fh @w;
4898 flush $fh;
4901 sub warning(@) {
4902 my @w = @_;
4903 my $prog = $Global::progname || "parallel";
4904 status_no_nl(map { ($prog, ": Warning: ", $_, "\n"); } @w);
4907 sub error(@) {
4908 my @w = @_;
4909 my $prog = $Global::progname || "parallel";
4910 status(map { ($prog.": Error: ". $_); } @w);
4913 sub die_bug($) {
4914 my $bugid = shift;
4915 print STDERR
4916 ("$Global::progname: This should not happen. You have found a bug.\n",
4917 "Please contact <parallel\@gnu.org> and follow\n",
4918 "https://www.gnu.org/software/parallel/man.html#REPORTING-BUGS\n",
4919 "\n",
4920 "Include this in the report:\n",
4921 "* The version number: $Global::version\n",
4922 "* The bugid: $bugid\n",
4923 "* The command line being run\n",
4924 "* The files being read (put the files on a webserver if they are big)\n",
4925 "\n",
4926 "If you get the error on smaller/fewer files, please include those instead.\n");
4927 ::wait_and_exit(255);
4930 sub version() {
4931 # Returns: N/A
4932 print join
4933 ("\n",
4934 "GNU $Global::progname $Global::version",
4935 "Copyright (C) 2007-2019 Ole Tange and Free Software Foundation, Inc.",
4936 "License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>",
4937 "This is free software: you are free to change and redistribute it.",
4938 "GNU $Global::progname comes with no warranty.",
4940 "Web site: http://www.gnu.org/software/${Global::progname}\n",
4941 "When using programs that use GNU Parallel to process data for publication",
4942 "please cite as described in 'parallel --citation'.\n",
4946 sub citation() {
4947 # Returns: N/A
4948 my ($all_argv_ref,$argv_options_removed_ref) = @_;
4949 my $all_argv = "@$all_argv_ref";
4950 my $no_opts = "@$argv_options_removed_ref";
4951 $all_argv=~s/--citation//;
4952 if($all_argv ne $no_opts) {
4953 ::warning("--citation ignores all other options and arguments.");
4954 ::status("");
4957 ::status(
4958 "Academic tradition requires you to cite works you base your article on.",
4959 "If you use programs that use GNU Parallel to process data for an article in a",
4960 "scientific publication, please cite:",
4962 "\@book{tange_ole_2018_1146014,",
4963 " author = {Tange, Ole},",
4964 " title = {GNU Parallel 2018},",
4965 " publisher = {Ole Tange},",
4966 " month = Mar,",
4967 " year = 2018,",
4968 " ISBN = {9781387509881},",
4969 " doi = {10.5281/zenodo.1146014},",
4970 " url = {https://doi.org/10.5281/zenodo.1146014}",
4971 "}",
4973 "(Feel free to use \\nocite{tange_ole_2018_1146014})",
4975 # Before changing this line, please read
4976 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
4977 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
4978 "This helps funding further development; AND IT WON'T COST YOU A CENT.",
4979 "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
4981 "More about funding GNU Parallel and the citation notice:",
4982 "https://lists.gnu.org/archive/html/parallel/2013-11/msg00006.html",
4983 "https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice",
4984 "https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt",
4986 "If you send a copy of your published article to tange\@gnu.org, it will be",
4987 "mentioned in the release notes of next version of GNU Parallel.",
4990 while(not grep { -e "$_/will-cite" } @Global::config_dirs) {
4991 print "\nType: 'will cite' and press enter.\n> ";
4992 my $input = <STDIN>;
4993 if(not defined $input) {
4994 exit(255);
4996 if($input =~ /will cite/i) {
4997 mkdir $Global::config_dir;
4998 if(open (my $fh, ">", $Global::config_dir."/will-cite")) {
4999 close $fh;
5000 ::status(
5002 "Thank you for your support: You are the reason why there is funding to",
5003 "continue maintaining GNU Parallel. On behalf of future versions of",
5004 "GNU Parallel, which would not exist without your support:",
5006 " THANK YOU SO MUCH",
5008 "It is really appreciated. The citation notice is now silenced.",
5009 "");
5010 } else {
5011 ::status(
5013 "Thank you for your support. It is much appreciated. The citation",
5014 "cannot permanently be silenced. Use '--will-cite' instead.",
5016 "If you use '--will-cite' in scripts to be run by others you are making",
5017 "it harder for others to see the citation notice. The development of",
5018 "GNU Parallel is indirectly financed through citations, so if users",
5019 "do not know they should cite then you are making it harder to finance",
5020 "development. However, if you pay 10000 EUR, you should feel free to",
5021 "use '--will-cite' in scripts.",
5022 "");
5023 last;
5029 sub show_limits() {
5030 # Returns: N/A
5031 print("Maximal size of command: ",Limits::Command::real_max_length(),"\n",
5032 "Maximal used size of command: ",Limits::Command::max_length(),"\n",
5033 "\n",
5034 "Execution of will continue now, and it will try to read its input\n",
5035 "and run commands; if this is not what you wanted to happen, please\n",
5036 "press CTRL-D or CTRL-C\n");
5039 sub embed() {
5040 # Give an embeddable version of GNU Parallel
5041 # Tested with: bash, zsh, ksh, ash, dash, sh
5042 my $randomstring = "cut-here-".join"",
5043 map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..20);
5044 if(not -f $0 or not -r $0) {
5045 ::error("--embed only works if parallel is a readable file");
5046 exit(255);
5048 if(open(my $fh, "<", $0)) {
5049 # Read the source from $0
5050 my @source = <$fh>;
5051 my $user = $ENV{LOGNAME} || $ENV{USERNAME} || $ENV{USER};
5052 my @env_parallel_source = ();
5053 my $shell = $Global::shell;
5054 $shell =~ s:.*/::;
5055 for(which("env_parallel.$shell")) {
5056 -r $_ or next;
5057 # Read the source of env_parallel.shellname
5058 open(my $env_parallel_source_fh, $_) || die;
5059 @env_parallel_source = <$env_parallel_source_fh>;
5060 close $env_parallel_source_fh;
5061 last;
5063 print "#!$Global::shell
5065 # Copyright (C) 2007-2019 $user, Ole Tange and Free Software
5066 # Foundation, Inc.
5068 # This program is free software; you can redistribute it and/or modify
5069 # it under the terms of the GNU General Public License as published by
5070 # the Free Software Foundation; either version 3 of the License, or
5071 # (at your option) any later version.
5073 # This program is distributed in the hope that it will be useful, but
5074 # WITHOUT ANY WARRANTY; without even the implied warranty of
5075 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5076 # General Public License for more details.
5078 # You should have received a copy of the GNU General Public License
5079 # along with this program; if not, see <http://www.gnu.org/licenses/>
5080 # or write to the Free Software Foundation, Inc., 51 Franklin St,
5081 # Fifth Floor, Boston, MA 02110-1301 USA
5084 print q!
5085 # Embedded GNU Parallel created with --embed
5086 parallel() {
5087 # Start GNU Parallel without leaving temporary files
5089 # Not all shells support 'perl <(cat ...)'
5090 # This is a complex way of doing:
5091 # perl <(cat <<'cut-here'
5092 # [...]
5093 # ) "$@"
5094 # and also avoiding:
5095 # [1]+ Done cat
5097 # Make a temporary fifo that perl can read from
5098 _fifo_with_GNU_Parallel_source=`perl -e 'use POSIX qw(mkfifo);
5099 do {
5100 $f = "/tmp/parallel-".join"",
5101 map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
5102 } while(-e $f);
5103 mkfifo($f,0600);
5104 print $f;'`
5105 # Put source code into temporary file
5106 # so it is easy to copy to the fifo
5107 _file_with_GNU_Parallel_source=`mktemp`;
5109 "cat <<'$randomstring' > \$_file_with_GNU_Parallel_source\n",
5110 @source,
5111 $randomstring,"\n",
5113 # Copy the source code from the file to the fifo
5114 # and remove the file and fifo ASAP
5115 # 'sh -c' is needed to avoid
5116 # [1]+ Done cat
5117 sh -c "(rm $_file_with_GNU_Parallel_source; cat >$_fifo_with_GNU_Parallel_source; rm $_fifo_with_GNU_Parallel_source) < $_file_with_GNU_Parallel_source &"
5119 # Read the source from the fifo
5120 perl $_fifo_with_GNU_Parallel_source "$@"
5123 @env_parallel_source,
5126 # This will call the functions above
5127 parallel -k echo ::: Put your code here
5128 env_parallel --session
5129 env_parallel -k echo ::: Put your code here
5130 parset p,y,c,h -k echo ::: Put your code here
5131 echo $p $y $c $h
5133 } else {
5134 ::error("Cannot open $0");
5135 exit(255);
5137 ::status("Redirect the output to a file and add your changes at the end:",
5138 " $0 --embed > new_script");
5142 sub __GENERIC_COMMON_FUNCTION__() {}
5145 sub mkdir_or_die($) {
5146 # If dir is not executable: die
5147 my $dir = shift;
5148 # The eval is needed to catch exception from mkdir
5149 eval { File::Path::mkpath($dir); };
5150 if(not -x $dir) {
5151 ::error("Cannot change into non-executable dir $dir: $!");
5152 ::wait_and_exit(255);
5156 sub tmpfile(@) {
5157 # Create tempfile as $TMPDIR/parXXXXX
5158 # Returns:
5159 # $filehandle = opened file handle
5160 # $filename = file name created
5161 my($filehandle,$filename) =
5162 ::tempfile(DIR=>$ENV{'TMPDIR'}, TEMPLATE => 'parXXXXX', @_);
5163 if(wantarray) {
5164 return($filehandle,$filename);
5165 } else {
5166 # Separate unlink due to NFS dealing badly with File::Temp
5167 unlink $filename;
5168 return $filehandle;
5172 sub tmpname($) {
5173 # Select a name that does not exist
5174 # Do not create the file as it may be used for creating a socket (by tmux)
5175 # Remember the name in $Global::unlink to avoid hitting the same name twice
5176 my $name = shift;
5177 my($tmpname);
5178 if(not -w $ENV{'TMPDIR'}) {
5179 if(not -e $ENV{'TMPDIR'}) {
5180 ::error("Tmpdir '$ENV{'TMPDIR'}' does not exist.","Try 'mkdir $ENV{'TMPDIR'}'");
5181 } else {
5182 ::error("Tmpdir '$ENV{'TMPDIR'}' is not writable.","Try 'chmod +w $ENV{'TMPDIR'}'");
5184 ::wait_and_exit(255);
5186 do {
5187 $tmpname = $ENV{'TMPDIR'}."/".$name.
5188 join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
5189 } while(-e $tmpname or $Global::unlink{$tmpname}++);
5190 return $tmpname;
5193 sub tmpfifo() {
5194 # Find an unused name and mkfifo on it
5195 my $tmpfifo = tmpname("fif");
5196 mkfifo($tmpfifo,0600);
5197 return $tmpfifo;
5200 sub rm(@) {
5201 # Remove file and remove it from %Global::unlink
5202 # Uses:
5203 # %Global::unlink
5204 delete @Global::unlink{@_};
5205 unlink @_;
5208 sub size_of_block_dev() {
5209 # Like -s but for block devices
5210 # Input:
5211 # $blockdev = file name of block device
5212 # Returns:
5213 # $size = in bytes, undef if error
5214 my $blockdev = shift;
5215 if(open(my $fh, "<", $blockdev)) {
5216 seek($fh,0,2) || ::die_bug("cannot seek $blockdev");
5217 my $size = tell($fh);
5218 close $fh;
5219 return $size;
5220 } else {
5221 ::error("cannot open $blockdev");
5222 wait_and_exit(255);
5226 sub qqx(@) {
5227 # Like qx but with clean environment (except for @keep)
5228 # and STDERR ignored
5229 # This is needed if the environment contains functions
5230 # that /bin/sh does not understand
5231 my $PATH = $ENV{'PATH'};
5232 my %env;
5233 # ssh with ssh-agent needs PATH SSH_AUTH_SOCK SSH_AGENT_PID
5234 # ssh with Kerberos needs KRB5CCNAME
5235 # tmux needs LC_CTYPE
5236 my @keep = qw(PATH SSH_AUTH_SOCK SSH_AGENT_PID KRB5CCNAME LC_CTYPE);
5237 @env{@keep} = @ENV{@keep};
5238 local %ENV;
5239 %ENV = %env;
5240 if($Global::debug) {
5241 return qx{ @_ && true };
5242 } else {
5243 return qx{ ( @_ ) 2>/dev/null };
5247 sub uniq(@) {
5248 # Remove duplicates and return unique values
5249 return keys %{{ map { $_ => 1 } @_ }};
5252 sub min(@) {
5253 # Returns:
5254 # Minimum value of array
5255 my $min;
5256 for (@_) {
5257 # Skip undefs
5258 defined $_ or next;
5259 defined $min or do { $min = $_; next; }; # Set $_ to the first non-undef
5260 $min = ($min < $_) ? $min : $_;
5262 return $min;
5265 sub max(@) {
5266 # Returns:
5267 # Maximum value of array
5268 my $max;
5269 for (@_) {
5270 # Skip undefs
5271 defined $_ or next;
5272 defined $max or do { $max = $_; next; }; # Set $_ to the first non-undef
5273 $max = ($max > $_) ? $max : $_;
5275 return $max;
5278 sub sum() {
5279 # Returns:
5280 # Sum of values of array
5281 my @args = @_;
5282 my $sum = 0;
5283 for (@args) {
5284 # Skip undefs
5285 $_ and do { $sum += $_; }
5287 return $sum;
5290 sub undef_as_zero($) {
5291 my $a = shift;
5292 return $a ? $a : 0;
5295 sub undef_as_empty($) {
5296 my $a = shift;
5297 return $a ? $a : "";
5300 sub undef_if_empty($) {
5301 if(defined($_[0]) and $_[0] eq "") {
5302 return undef;
5304 return $_[0];
5307 sub multiply_binary_prefix(@) {
5308 # Evalualte numbers with binary prefix
5309 # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80
5310 # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80
5311 # K =2^10, M =2^20, G =2^30, T =2^40, P =2^50, E =2^70, Z =2^80, Y =2^80
5312 # k =10^3, m =10^6, g =10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24
5313 # 13G = 13*1024*1024*1024 = 13958643712
5314 # Input:
5315 # $s = string with prefixes
5316 # Returns:
5317 # $value = int with prefixes multiplied
5318 my @v = @_;
5319 for(@v) {
5320 defined $_ or next;
5321 s/ki/*1024/gi;
5322 s/mi/*1024*1024/gi;
5323 s/gi/*1024*1024*1024/gi;
5324 s/ti/*1024*1024*1024*1024/gi;
5325 s/pi/*1024*1024*1024*1024*1024/gi;
5326 s/ei/*1024*1024*1024*1024*1024*1024/gi;
5327 s/zi/*1024*1024*1024*1024*1024*1024*1024/gi;
5328 s/yi/*1024*1024*1024*1024*1024*1024*1024*1024/gi;
5329 s/xi/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi;
5331 s/K/*1024/g;
5332 s/M/*1024*1024/g;
5333 s/G/*1024*1024*1024/g;
5334 s/T/*1024*1024*1024*1024/g;
5335 s/P/*1024*1024*1024*1024*1024/g;
5336 s/E/*1024*1024*1024*1024*1024*1024/g;
5337 s/Z/*1024*1024*1024*1024*1024*1024*1024/g;
5338 s/Y/*1024*1024*1024*1024*1024*1024*1024*1024/g;
5339 s/X/*1024*1024*1024*1024*1024*1024*1024*1024*1024/g;
5341 s/k/*1000/g;
5342 s/m/*1000*1000/g;
5343 s/g/*1000*1000*1000/g;
5344 s/t/*1000*1000*1000*1000/g;
5345 s/p/*1000*1000*1000*1000*1000/g;
5346 s/e/*1000*1000*1000*1000*1000*1000/g;
5347 s/z/*1000*1000*1000*1000*1000*1000*1000/g;
5348 s/y/*1000*1000*1000*1000*1000*1000*1000*1000/g;
5349 s/x/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g;
5351 $_ = eval $_;
5353 return wantarray ? @v : $v[0];
5356 sub multiply_time_units($) {
5357 # Evalualte numbers with time units
5358 # s=1, m=60, h=3600, d=86400
5359 # Input:
5360 # $s = string time units
5361 # Returns:
5362 # $value = int in seconds
5363 my @v = @_;
5364 for(@v) {
5365 defined $_ or next;
5366 if(/[dhms]/i) {
5367 s/s/*1+/gi;
5368 s/m/*60+/gi;
5369 s/h/*3600+/gi;
5370 s/d/*86400+/gi;
5371 $_ = eval $_."0";
5374 return wantarray ? @v : $v[0];
5377 sub seconds_to_time_units() {
5378 # Convert seconds into ??d??h??m??s
5379 # s=1, m=60, h=3600, d=86400
5380 # Input:
5381 # $s = int in seconds
5382 # Returns:
5383 # $str = string time units
5384 my $s = shift;
5385 my $str;
5386 my $d = int($s/86400);
5387 $s -= $d * 86400;
5388 my $h = int($s/3600);
5389 $s -= $h * 3600;
5390 my $m = int($s/60);
5391 $s -= $m * 60;
5392 if($d) {
5393 $str = sprintf("%dd%02dh%02dm%02ds",$d,$h,$m,$s);
5394 } elsif($h) {
5395 $str = sprintf("%dh%02dm%02ds",$h,$m,$s);
5396 } elsif($m) {
5397 $str = sprintf("%dm%02ds",$m,$s);
5398 } else {
5399 $str = sprintf("%ds",$s);
5401 return $str;
5405 my ($disk_full_fh, $b8193, $error_printed);
5406 sub exit_if_disk_full() {
5407 # Checks if $TMPDIR is full by writing 8kb to a tmpfile
5408 # If the disk is full: Exit immediately.
5409 # Returns:
5410 # N/A
5411 if(not $disk_full_fh) {
5412 $disk_full_fh = ::tmpfile(SUFFIX => ".df");
5413 $b8193 = "x"x8193;
5415 # Linux does not discover if a disk is full if writing <= 8192
5416 # Tested on:
5417 # bfs btrfs cramfs ext2 ext3 ext4 ext4dev jffs2 jfs minix msdos
5418 # ntfs reiserfs tmpfs ubifs vfat xfs
5419 # TODO this should be tested on different OS similar to this:
5421 # doit() {
5422 # sudo mount /dev/ram0 /mnt/loop; sudo chmod 1777 /mnt/loop
5423 # seq 100000 | parallel --tmpdir /mnt/loop/ true &
5424 # seq 6900000 > /mnt/loop/i && echo seq OK
5425 # seq 6980868 > /mnt/loop/i
5426 # seq 10000 > /mnt/loop/ii
5427 # sleep 3
5428 # sudo umount /mnt/loop/ || sudo umount -l /mnt/loop/
5429 # echo >&2
5431 print $disk_full_fh $b8193;
5432 if(not $disk_full_fh
5434 tell $disk_full_fh != 8193) {
5435 # On raspbian the disk can be full except for 10 chars.
5436 if(not $error_printed) {
5437 ::error("Output is incomplete.",
5438 "Cannot append to buffer file in $ENV{'TMPDIR'}.",
5439 "Is the disk full?",
5440 "Change \$TMPDIR with --tmpdir or use --compress.");
5441 $error_printed = 1;
5443 ::wait_and_exit(255);
5445 truncate $disk_full_fh, 0;
5446 seek($disk_full_fh, 0, 0) || die;
5450 sub spacefree($$) {
5451 # Remove comments and spaces
5452 # Inputs:
5453 # $spaces = keep 1 space?
5454 # $s = string to remove spaces from
5455 # Returns:
5456 # $s = with spaces removed
5457 my $spaces = shift;
5458 my $s = shift;
5459 $s =~ s/#.*//mg;
5460 if(1 == $spaces) {
5461 $s =~ s/\s+/ /mg;
5462 } elsif(2 == $spaces) {
5463 # Keep newlines
5464 $s =~ s/\n\n+/\n/sg;
5465 $s =~ s/[ \t]+/ /mg;
5466 } elsif(3 == $spaces) {
5467 # Keep perl code required space
5468 $s =~ s{([^a-zA-Z0-9/])\s+}{$1}sg;
5469 $s =~ s{([a-zA-Z0-9/])\s+([^:a-zA-Z0-9/])}{$1$2}sg;
5470 } else {
5471 $s =~ s/\s//mg;
5473 return $s;
5477 my $hostname;
5478 sub hostname() {
5479 local $/ = "\n";
5480 if(not $hostname) {
5481 $hostname = `hostname`;
5482 chomp($hostname);
5483 $hostname ||= "nohostname";
5485 return $hostname;
5489 sub which(@) {
5490 # Input:
5491 # @programs = programs to find the path to
5492 # Returns:
5493 # @full_path = full paths to @programs. Nothing if not found
5494 my @which;
5495 ::debug("which", "@_ in $ENV{'PATH'}\n");
5496 for my $prg (@_) {
5497 push(@which, grep { not -d $_ and -x $_ }
5498 map { $_."/".$prg } split(":",$ENV{'PATH'}));
5499 if($prg =~ m:/:) {
5500 # Including path
5501 push(@which, grep { not -d $_ and -x $_ } $prg);
5504 return wantarray ? @which : $which[0];
5508 my ($regexp,$shell,%fakename);
5510 sub parent_shell {
5511 # Input:
5512 # $pid = pid to see if (grand)*parent is a shell
5513 # Returns:
5514 # $shellpath = path to shell - undef if no shell found
5515 my $pid = shift;
5516 ::debug("init","Parent of $pid\n");
5517 if(not $regexp) {
5518 # All shells known to mankind
5520 # ash bash csh dash fdsh fish fizsh ksh ksh93 mksh pdksh
5521 # posh rbash rc rush rzsh sash sh static-sh tcsh yash zsh
5523 my @shells = (qw(ash bash bsd-csh csh dash fdsh fish fizsh
5524 ksh ksh93 lksh mksh pdksh posh rbash rc rush rzsh sash sh
5525 static-sh tcsh yash zsh -sh -csh -bash),
5526 '-sh (sh)' # sh on FreeBSD
5528 # Can be formatted as:
5529 # [sh] -sh sh busybox sh -sh (sh)
5530 # /bin/sh /sbin/sh /opt/csw/sh
5531 # But not: foo.sh sshd crash flush pdflush scosh fsflush ssh
5532 $shell = "(?:".join("|",map { "\Q$_\E" } @shells).")";
5533 $regexp = '^((\[)(-?)('. $shell. ')(\])|(|\S+/|busybox )'.
5534 '(-?)('. $shell. '))( *$| [^(])';
5535 %fakename = (
5536 # sh disguises itself as -sh (sh) on FreeBSD
5537 "-sh (sh)" => ["sh"],
5538 # csh and tcsh disguise themselves as -sh/-csh
5539 # E.g.: ssh -tt csh@lo 'ps aux;true' |egrep ^csh
5540 # but sh also disguises itself as -sh
5541 # (TODO When does that happen?)
5542 "-sh" => ["sh"],
5543 "-csh" => ["tcsh", "csh"],
5544 # ash disguises itself as -ash
5545 "-ash" => ["ash", "dash", "sh"],
5546 # dash disguises itself as -dash
5547 "-dash" => ["dash", "ash", "sh"],
5548 # bash disguises itself as -bash
5549 "-bash" => ["bash", "sh"],
5550 # ksh disguises itself as -ksh
5551 "-ksh" => ["ksh", "sh"],
5552 # zsh disguises itself as -zsh
5553 "-zsh" => ["zsh", "sh"],
5556 if($^O eq "linux") {
5557 # Optimized for GNU/Linux
5558 my $testpid = $pid;
5559 my $shellpath;
5560 my $shellline;
5561 while($testpid) {
5562 if(open(my $fd, "<", "/proc/$testpid/cmdline")) {
5563 local $/="\0";
5564 chomp($shellline = <$fd>);
5565 if($shellline =~ /$regexp/o) {
5566 my $shellname = $4 || $8;
5567 my $dash = $3 || $7;
5568 if($shellname eq "sh" and $dash) {
5569 # -sh => csh or sh
5570 if($shellpath = readlink "/proc/$testpid/exe") {
5571 ::debug("init","procpath $shellpath\n");
5572 if($shellpath =~ m:/$shell$:o) {
5573 ::debug("init", "proc which ".$shellpath." => ");
5574 return $shellpath;
5578 ::debug("init", "which ".$shellname." => ");
5579 $shellpath = (which($shellname,@{$fakename{$shellname}}))[0];
5580 ::debug("init", "shell path $shellpath\n");
5581 return $shellpath;
5584 # Get parent pid
5585 if(open(my $fd, "<", "/proc/$testpid/stat")) {
5586 my $line = <$fd>;
5587 close $fd;
5588 # Parent pid is field 4
5589 $testpid = (split /\s+/, $line)[3];
5590 } else {
5591 # Something is wrong: fall back to old method
5592 last;
5596 # if -sh or -csh try readlink /proc/$$/exe
5597 my ($children_of_ref, $parent_of_ref, $name_of_ref) = pid_table();
5598 my $shellpath;
5599 my $testpid = $pid;
5600 while($testpid) {
5601 if($name_of_ref->{$testpid} =~ /$regexp/o) {
5602 my $shellname = $4 || $8;
5603 my $dash = $3 || $7;
5604 if($shellname eq "sh" and $dash) {
5605 # -sh => csh or sh
5606 if($shellpath = readlink "/proc/$testpid/exe") {
5607 ::debug("init","procpath $shellpath\n");
5608 if($shellpath =~ m:/$shell$:o) {
5609 ::debug("init", "proc which ".$shellpath." => ");
5610 return $shellpath;
5614 ::debug("init", "which ".$shellname." => ");
5615 $shellpath = (which($shellname,@{$fakename{$shellname}}))[0];
5616 ::debug("init", "shell path $shellpath\n");
5617 $shellpath and last;
5619 if($testpid == $parent_of_ref->{$testpid}) {
5620 # In Solaris zones, the PPID of the zsched process is itself
5621 last;
5623 $testpid = $parent_of_ref->{$testpid};
5625 return $shellpath;
5630 my %pid_parentpid_cmd;
5632 sub pid_table() {
5633 # Returns:
5634 # %children_of = { pid -> children of pid }
5635 # %parent_of = { pid -> pid of parent }
5636 # %name_of = { pid -> commandname }
5638 if(not %pid_parentpid_cmd) {
5639 # Filter for SysV-style `ps`
5640 my $sysv = q( ps -ef | perl -ane '1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;).
5641 q(s/^.{$s}//; print "@F[1,2] $_"' );
5642 # Minix uses cols 2,3 and can have newlines in the command
5643 # so lines not having numbers in cols 2,3 must be ignored
5644 my $minix = q( ps -ef | perl -ane '1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;).
5645 q(s/^.{$s}// and $F[2]>0 and $F[3]>0 and print "@F[2,3] $_"' );
5646 # BSD-style `ps`
5647 my $bsd = q(ps -o pid,ppid,command -ax);
5648 %pid_parentpid_cmd =
5650 'aix' => $sysv,
5651 'android' => $sysv,
5652 'cygwin' => $sysv,
5653 'darwin' => $bsd,
5654 'dec_osf' => $sysv,
5655 'dragonfly' => $bsd,
5656 'freebsd' => $bsd,
5657 'gnu' => $sysv,
5658 'hpux' => $sysv,
5659 'linux' => $sysv,
5660 'mirbsd' => $bsd,
5661 'minix' => $minix,
5662 'msys' => $sysv,
5663 'MSWin32' => $sysv,
5664 'netbsd' => $bsd,
5665 'nto' => $sysv,
5666 'openbsd' => $bsd,
5667 'solaris' => $sysv,
5668 'svr5' => $sysv,
5669 'syllable' => "echo ps not supported",
5672 $pid_parentpid_cmd{$^O} or ::die_bug("pid_parentpid_cmd for $^O missing");
5674 my (@pidtable,%parent_of,%children_of,%name_of);
5675 # Table with pid -> children of pid
5676 @pidtable = `$pid_parentpid_cmd{$^O}`;
5677 my $p=$$;
5678 for (@pidtable) {
5679 # must match: 24436 21224 busybox ash
5680 # must match: 24436 21224 <<empty on MacOSX running cubase>>
5681 # must match: 24436 21224 <<empty on system running Viber>>
5682 # or: perl -e 'while($0=" "){}'
5683 if(/^\s*(\S+)\s+(\S+)\s+(\S+.*)/
5685 /^\s*(\S+)\s+(\S+)\s+()$/) {
5686 $parent_of{$1} = $2;
5687 push @{$children_of{$2}}, $1;
5688 $name_of{$1} = $3;
5689 } else {
5690 ::die_bug("pidtable format: $_");
5693 return(\%children_of, \%parent_of, \%name_of);
5697 sub now() {
5698 # Returns time since epoch as in seconds with 3 decimals
5699 # Uses:
5700 # @Global::use
5701 # Returns:
5702 # $time = time now with millisecond accuracy
5703 if(not $Global::use{"Time::HiRes"}) {
5704 if(eval "use Time::HiRes qw ( time );") {
5705 eval "sub TimeHiRestime { return Time::HiRes::time };";
5706 } else {
5707 eval "sub TimeHiRestime { return time() };";
5709 $Global::use{"Time::HiRes"} = 1;
5712 return (int(TimeHiRestime()*1000))/1000;
5715 sub usleep($) {
5716 # Sleep this many milliseconds.
5717 # Input:
5718 # $ms = milliseconds to sleep
5719 my $ms = shift;
5720 ::debug("timing",int($ms),"ms ");
5721 select(undef, undef, undef, $ms/1000);
5724 sub __KILLER_REAPER__() {}
5726 sub reap_usleep() {
5727 # Reap dead children.
5728 # If no dead children: Sleep specified amount with exponential backoff
5729 # Input:
5730 # $ms = milliseconds to sleep
5731 # Returns:
5732 # $ms/2+0.001 if children reaped
5733 # $ms*1.1 if no children reaped
5734 my $ms = shift;
5735 if(reapers()) {
5736 if(not $Global::total_completed % 100) {
5737 if($opt::timeout) {
5738 # Force cleaning the timeout queue for every 100 jobs
5739 # Fixes potential memleak
5740 $Global::timeoutq->process_timeouts();
5743 # Sleep exponentially shorter (1/2^n) if a job finished
5744 return $ms/2+0.001;
5745 } else {
5746 if($opt::timeout) {
5747 $Global::timeoutq->process_timeouts();
5749 if($opt::memfree) {
5750 kill_youngster_if_not_enough_mem();
5752 if($opt::limit) {
5753 kill_youngest_if_over_limit();
5755 exit_if_disk_full();
5756 if($opt::linebuffer) {
5757 my $something_printed = 0;
5758 if($opt::keeporder) {
5759 for my $job (values %Global::running) {
5760 $something_printed += $job->print_earlier_jobs();
5762 } else {
5763 for my $job (values %Global::running) {
5764 $something_printed += $job->print();
5767 if($something_printed) {
5768 $ms = $ms/2+0.001;
5771 if($ms > 0.002) {
5772 # When a child dies, wake up from sleep (or select(,,,))
5773 $SIG{CHLD} = sub { kill "ALRM", $$ };
5774 if($opt::delay) {
5775 # The 0.004s is approximately the time it takes for one round
5776 usleep(1000*($Global::newest_starttime +
5777 $opt::delay - 0.004 - ::now()));
5778 } else {
5779 usleep($ms);
5781 # --compress needs $SIG{CHLD} unset
5782 $SIG{CHLD} = 'DEFAULT';
5784 # Sleep exponentially longer (1.1^n) if a job did not finish,
5785 # though at most 1000 ms.
5786 return (($ms < 1000) ? ($ms * 1.1) : ($ms));
5790 sub kill_youngest_if_over_limit() {
5791 # Check each $sshlogin we are over limit
5792 # If over limit: kill off the youngest child
5793 # Put the child back in the queue.
5794 # Uses:
5795 # %Global::running
5796 my %jobs_of;
5797 my @sshlogins;
5799 for my $job (values %Global::running) {
5800 if(not $jobs_of{$job->sshlogin()}) {
5801 push @sshlogins, $job->sshlogin();
5803 push @{$jobs_of{$job->sshlogin()}}, $job;
5805 for my $sshlogin (@sshlogins) {
5806 for my $job (sort { $b->seq() <=> $a->seq() } @{$jobs_of{$sshlogin}}) {
5807 if($sshlogin->limit() == 2) {
5808 $job->kill();
5809 last;
5815 sub kill_youngster_if_not_enough_mem() {
5816 # Check each $sshlogin if there is enough mem.
5817 # If less than 50% enough free mem: kill off the youngest child
5818 # Put the child back in the queue.
5819 # Uses:
5820 # %Global::running
5821 my %jobs_of;
5822 my @sshlogins;
5824 for my $job (values %Global::running) {
5825 if(not $jobs_of{$job->sshlogin()}) {
5826 push @sshlogins, $job->sshlogin();
5828 push @{$jobs_of{$job->sshlogin()}}, $job;
5830 for my $sshlogin (@sshlogins) {
5831 for my $job (sort { $b->seq() <=> $a->seq() } @{$jobs_of{$sshlogin}}) {
5832 if($sshlogin->memfree() < $opt::memfree * 0.5) {
5833 ::debug("mem","\n",map { $_->seq()." " }
5834 (sort { $b->seq() <=> $a->seq() }
5835 @{$jobs_of{$sshlogin}}));
5836 ::debug("mem","\n", $job->seq(), "killed ",
5837 $sshlogin->memfree()," < ",$opt::memfree * 0.5);
5838 $job->kill();
5839 $sshlogin->memfree_recompute();
5840 } else {
5841 last;
5844 ::debug("mem","Free mem OK ",
5845 $sshlogin->memfree()," > ",$opt::memfree * 0.5);
5850 sub __DEBUGGING__() {}
5853 sub debug(@) {
5854 # Uses:
5855 # $Global::debug
5856 # %Global::fd
5857 # Returns: N/A
5858 $Global::debug or return;
5859 @_ = grep { defined $_ ? $_ : "" } @_;
5860 if($Global::debug eq "all" or $Global::debug eq $_[0]) {
5861 if($Global::fd{1}) {
5862 # Original stdout was saved
5863 my $stdout = $Global::fd{1};
5864 print $stdout @_[1..$#_];
5865 } else {
5866 print @_[1..$#_];
5871 sub my_memory_usage() {
5872 # Returns:
5873 # memory usage if found
5874 # 0 otherwise
5875 use strict;
5876 use FileHandle;
5878 local $/ = "\n";
5879 my $pid = $$;
5880 if(-e "/proc/$pid/stat") {
5881 my $fh = FileHandle->new("</proc/$pid/stat");
5883 my $data = <$fh>;
5884 chomp $data;
5885 $fh->close;
5887 my @procinfo = split(/\s+/,$data);
5889 return undef_as_zero($procinfo[22]);
5890 } else {
5891 return 0;
5895 sub my_size() {
5896 # Returns:
5897 # $size = size of object if Devel::Size is installed
5898 # -1 otherwise
5899 my @size_this = (@_);
5900 eval "use Devel::Size qw(size total_size)";
5901 if ($@) {
5902 return -1;
5903 } else {
5904 return total_size(@_);
5908 sub my_dump(@) {
5909 # Returns:
5910 # ascii expression of object if Data::Dump(er) is installed
5911 # error code otherwise
5912 my @dump_this = (@_);
5913 eval "use Data::Dump qw(dump);";
5914 if ($@) {
5915 # Data::Dump not installed
5916 eval "use Data::Dumper;";
5917 if ($@) {
5918 my $err = "Neither Data::Dump nor Data::Dumper is installed\n".
5919 "Not dumping output\n";
5920 ::status($err);
5921 return $err;
5922 } else {
5923 return Dumper(@dump_this);
5925 } else {
5926 # Create a dummy Data::Dump:dump as Hans Schou sometimes has
5927 # it undefined
5928 eval "sub Data::Dump:dump {}";
5929 eval "use Data::Dump qw(dump);";
5930 return (Data::Dump::dump(@dump_this));
5934 sub my_croak(@) {
5935 eval "use Carp; 1";
5936 $Carp::Verbose = 1;
5937 croak(@_);
5940 sub my_carp() {
5941 eval "use Carp; 1";
5942 $Carp::Verbose = 1;
5943 carp(@_);
5947 sub __OBJECT_ORIENTED_PARTS__() {}
5950 package SSHLogin;
5952 sub new($$) {
5953 my $class = shift;
5954 my $sshlogin_string = shift;
5955 my $ncpus;
5956 my %hostgroups;
5957 # SSHLogins can have these formats:
5958 # @grp+grp/ncpu//usr/bin/ssh user@server
5959 # ncpu//usr/bin/ssh user@server
5960 # /usr/bin/ssh user@server
5961 # user@server
5962 # ncpu/user@server
5963 # @grp+grp/user@server
5964 if($sshlogin_string =~ s:^\@([^/]+)/?::) {
5965 # Look for SSHLogin hostgroups
5966 %hostgroups = map { $_ => 1 } split(/\+/, $1);
5968 # An SSHLogin is always in the hostgroup of its "numcpu/host"
5969 $hostgroups{$sshlogin_string} = 1;
5970 if ($sshlogin_string =~ s:^(\d+)/::) {
5971 # Override default autodetected ncpus unless missing
5972 $ncpus = $1;
5974 my $string = $sshlogin_string;
5975 # An SSHLogin is always in the hostgroup of its $string-name
5976 $hostgroups{$string} = 1;
5977 @Global::hostgroups{keys %hostgroups} = values %hostgroups;
5978 my @unget = ();
5979 my $no_slash_string = $string;
5980 $no_slash_string =~ s/[^-a-z0-9:]/_/gi;
5981 return bless {
5982 'string' => $string,
5983 'jobs_running' => 0,
5984 'jobs_completed' => 0,
5985 'maxlength' => undef,
5986 'max_jobs_running' => undef,
5987 'orig_max_jobs_running' => undef,
5988 'ncpus' => $ncpus,
5989 'hostgroups' => \%hostgroups,
5990 'sshcommand' => undef,
5991 'serverlogin' => undef,
5992 'control_path_dir' => undef,
5993 'control_path' => undef,
5994 'time_to_login' => undef,
5995 'last_login_at' => undef,
5996 'loadavg_file' => $Global::cache_dir . "/tmp/sshlogin/" .
5997 $no_slash_string . "/loadavg",
5998 'loadavg' => undef,
5999 'last_loadavg_update' => 0,
6000 'swap_activity_file' => $Global::cache_dir . "/tmp/sshlogin/" .
6001 $no_slash_string . "/swap_activity",
6002 'swap_activity' => undef,
6003 }, ref($class) || $class;
6006 sub DESTROY($) {
6007 my $self = shift;
6008 # Remove temporary files if they are created.
6009 ::rm($self->{'loadavg_file'});
6010 ::rm($self->{'swap_activity_file'});
6013 sub string($) {
6014 my $self = shift;
6015 return $self->{'string'};
6018 sub jobs_running($) {
6019 my $self = shift;
6020 return ($self->{'jobs_running'} || "0");
6023 sub inc_jobs_running($) {
6024 my $self = shift;
6025 $self->{'jobs_running'}++;
6028 sub dec_jobs_running($) {
6029 my $self = shift;
6030 $self->{'jobs_running'}--;
6033 sub set_maxlength($$) {
6034 my $self = shift;
6035 $self->{'maxlength'} = shift;
6038 sub maxlength($) {
6039 my $self = shift;
6040 return $self->{'maxlength'};
6043 sub jobs_completed() {
6044 my $self = shift;
6045 return $self->{'jobs_completed'};
6048 sub in_hostgroups() {
6049 # Input:
6050 # @hostgroups = the hostgroups to look for
6051 # Returns:
6052 # true if intersection of @hostgroups and the hostgroups of this
6053 # SSHLogin is non-empty
6054 my $self = shift;
6055 return grep { defined $self->{'hostgroups'}{$_} } @_;
6058 sub hostgroups() {
6059 my $self = shift;
6060 return keys %{$self->{'hostgroups'}};
6063 sub inc_jobs_completed($) {
6064 my $self = shift;
6065 $self->{'jobs_completed'}++;
6066 $Global::total_completed++;
6069 sub set_max_jobs_running($$) {
6070 my $self = shift;
6071 if(defined $self->{'max_jobs_running'}) {
6072 $Global::max_jobs_running -= $self->{'max_jobs_running'};
6074 $self->{'max_jobs_running'} = shift;
6075 if(defined $self->{'max_jobs_running'}) {
6076 # max_jobs_running could be resat if -j is a changed file
6077 $Global::max_jobs_running += $self->{'max_jobs_running'};
6079 # Initialize orig to the first non-zero value that comes around
6080 $self->{'orig_max_jobs_running'} ||= $self->{'max_jobs_running'};
6083 sub memfree() {
6084 # Returns:
6085 # $memfree in bytes
6086 my $self = shift;
6087 $self->memfree_recompute();
6088 # Return 1 if not defined.
6089 return (not defined $self->{'memfree'} or $self->{'memfree'})
6092 sub memfree_recompute() {
6093 my $self = shift;
6094 my $script = memfreescript();
6096 # TODO add sshlogin and backgrounding
6097 # Run the script twice if it gives 0 (typically intermittent error)
6098 $self->{'memfree'} = ::qqx($script) || ::qqx($script);
6099 if(not $self->{'memfree'}) {
6100 ::die_bug("Less than 1 byte memory free");
6102 #::debug("mem","New free:",$self->{'memfree'}," ");
6106 my $script;
6108 sub memfreescript() {
6109 # Returns:
6110 # shellscript for giving available memory in bytes
6111 if(not $script) {
6112 my %script_of = (
6113 # /proc/meminfo
6114 # MemFree: 7012 kB
6115 # Buffers: 19876 kB
6116 # Cached: 431192 kB
6117 # SwapCached: 0 kB
6118 "linux" =>
6119 q[ print 1024 * qx{ ].
6120 q[ awk '/^((Swap)?Cached|MemFree|Buffers):/ ].
6121 q[ { sum += \$2} END { print sum }' ].
6122 q[ /proc/meminfo } ],
6123 # Android uses same code as GNU/Linux
6124 "android" =>
6125 q[ print 1024 * qx{ ].
6126 q[ awk '/^((Swap)?Cached|MemFree|Buffers):/ ].
6127 q[ { sum += \$2} END { print sum }' ].
6128 q[ /proc/meminfo } ],
6130 # $ vmstat 1 1
6131 # procs memory page faults cpu
6132 # r b w avm free re at pi po fr de sr in sy cs us sy id
6133 # 1 0 0 242793 389737 5 1 0 0 0 0 0 107 978 60 1 1 99
6134 "hpux" =>
6135 q[ print (((reverse `vmstat 1 1`)[0] ].
6136 q[ =~ /(?:\d+\D+){4}(\d+)/)[0]*1024) ],
6137 # $ vmstat 1 2
6138 # kthr memory page disk faults cpu
6139 # r b w swap free re mf pi po fr de sr s3 s4 -- -- in sy cs us sy id
6140 # 0 0 0 6496720 5170320 68 260 8 2 1 0 0 -0 3 0 0 309 1371 255 1 2 97
6141 # 0 0 0 6434088 5072656 7 15 8 0 0 0 0 0 261 0 0 1889 1899 3222 0 8 92
6143 # The second free value is correct
6144 "solaris" =>
6145 q[ print (((reverse `vmstat 1 2`)[0] ].
6146 q[ =~ /(?:\d+\D+){4}(\d+)/)[0]*1024) ],
6147 "freebsd" => q{
6148 for(qx{/sbin/sysctl -a}) {
6149 if (/^([^:]+):\s+(.+)\s*$/s) {
6150 $sysctl->{$1} = $2;
6153 print $sysctl->{"hw.pagesize"} *
6154 ($sysctl->{"vm.stats.vm.v_cache_count"}
6155 + $sysctl->{"vm.stats.vm.v_inactive_count"}
6156 + $sysctl->{"vm.stats.vm.v_free_count"});
6158 # Mach Virtual Memory Statistics: (page size of 4096 bytes)
6159 # Pages free: 198061.
6160 # Pages active: 159701.
6161 # Pages inactive: 47378.
6162 # Pages speculative: 29707.
6163 # Pages wired down: 89231.
6164 # "Translation faults": 928901425.
6165 # Pages copy-on-write: 156988239.
6166 # Pages zero filled: 271267894.
6167 # Pages reactivated: 48895.
6168 # Pageins: 1798068.
6169 # Pageouts: 257.
6170 # Object cache: 6603 hits of 1713223 lookups (0% hit rate)
6171 'darwin' =>
6172 q[ $vm = `vm_stat`;
6173 print (($vm =~ /page size of (\d+)/)[0] *
6174 (($vm =~ /Pages free:\s+(\d+)/)[0] +
6175 ($vm =~ /Pages inactive:\s+(\d+)/)[0]));
6178 my $perlscript = "";
6179 # Make a perl script that detects the OS ($^O) and runs
6180 # the appropriate command
6181 for my $os (keys %script_of) {
6182 $perlscript .= 'if($^O eq "'.$os.'") { '.$script_of{$os}.'}';
6184 $perlscript =~ s/[\t\n ]+/ /g;
6185 $script = "perl -e " . ::Q($perlscript);
6187 return $script;
6191 sub limit($) {
6192 # Returns:
6193 # 0 = Below limit. Start another job.
6194 # 1 = Over limit. Start no jobs.
6195 # 2 = Kill youngest job
6196 my $self = shift;
6198 if(not defined $self->{'limitscript'}) {
6199 my %limitscripts =
6200 ("io" => q!
6201 io() {
6202 limit=$1;
6203 io_file=$2;
6204 # Do the measurement in the background
6205 (tmp=$(tempfile);
6206 LANG=C iostat -x 1 2 > $tmp;
6207 mv $tmp $io_file) &
6208 perl -e '-e $ARGV[0] or exit(1);
6209 for(reverse <>) {
6210 /Device/ and last;
6211 /(\S+)$/ and $max = $max > $1 ? $max : $1; }
6212 exit ($max < '$limit')' $io_file;
6214 export -f io;
6215 io %s %s
6217 "mem" => q!
6218 mem() {
6219 limit=$1;
6220 awk '/^((Swap)?Cached|MemFree|Buffers):/{ sum += $2}
6221 END {
6222 if (sum*1024 < '$limit'/2) { exit 2; }
6223 else { exit (sum*1024 < '$limit') }
6224 }' /proc/meminfo;
6226 export -f mem;
6227 mem %s;
6229 "load" => q!
6230 load() {
6231 limit=$1;
6232 ps ax -o state,command |
6233 grep -E '^[DOR].[^[]' |
6234 wc -l |
6235 perl -ne 'exit ('$limit' < $_)';
6237 export -f load;
6238 load %s;
6241 my ($cmd,@args) = split /\s+/,$opt::limit;
6242 if($limitscripts{$cmd}) {
6243 my $tmpfile = ::tmpname("parlmt");
6244 ++$Global::unlink{$tmpfile};
6245 $self->{'limitscript'} =
6246 ::spacefree(1, sprintf($limitscripts{$cmd},
6247 ::multiply_binary_prefix(@args),$tmpfile));
6248 } else {
6249 $self->{'limitscript'} = $opt::limit;
6253 my %env = %ENV;
6254 local %ENV = %env;
6255 $ENV{'SSHLOGIN'} = $self->string();
6256 system($Global::shell,"-c",$self->{'limitscript'});
6257 ::debug("limit","limit `".$self->{'limitscript'}."` result ".($?>>8)."\n");
6258 return $?>>8;
6262 sub swapping($) {
6263 my $self = shift;
6264 my $swapping = $self->swap_activity();
6265 return (not defined $swapping or $swapping)
6268 sub swap_activity($) {
6269 # If the currently known swap activity is too old:
6270 # Recompute a new one in the background
6271 # Returns:
6272 # last swap activity computed
6273 my $self = shift;
6274 # Should we update the swap_activity file?
6275 my $update_swap_activity_file = 0;
6276 if(-r $self->{'swap_activity_file'}) {
6277 open(my $swap_fh, "<", $self->{'swap_activity_file'}) ||
6278 ::die_bug("swap_activity_file-r");
6279 my $swap_out = <$swap_fh>;
6280 close $swap_fh;
6281 if($swap_out =~ /^(\d+)$/) {
6282 $self->{'swap_activity'} = $1;
6283 ::debug("swap", "New swap_activity: ", $self->{'swap_activity'});
6285 ::debug("swap", "Last update: ", $self->{'last_swap_activity_update'});
6286 if(time - $self->{'last_swap_activity_update'} > 10) {
6287 # last swap activity update was started 10 seconds ago
6288 ::debug("swap", "Older than 10 sec: ", $self->{'swap_activity_file'});
6289 $update_swap_activity_file = 1;
6291 } else {
6292 ::debug("swap", "No swap_activity file: ", $self->{'swap_activity_file'});
6293 $self->{'swap_activity'} = undef;
6294 $update_swap_activity_file = 1;
6296 if($update_swap_activity_file) {
6297 ::debug("swap", "Updating swap_activity file ", $self->{'swap_activity_file'});
6298 $self->{'last_swap_activity_update'} = time;
6299 my $dir = ::dirname($self->{'swap_activity_file'});
6300 -d $dir or eval { File::Path::mkpath($dir); };
6301 my $swap_activity;
6302 $swap_activity = swapactivityscript();
6303 if($self->{'string'} ne ":") {
6304 $swap_activity = $self->sshcommand() . " " . $self->serverlogin() . " " .
6305 ::Q($swap_activity);
6307 # Run swap_activity measuring.
6308 # As the command can take long to run if run remote
6309 # save it to a tmp file before moving it to the correct file
6310 my $file = $self->{'swap_activity_file'};
6311 my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".swp");
6312 ::debug("swap", "\n", $swap_activity, "\n");
6313 ::qqx("($swap_activity > $tmpfile && mv $tmpfile $file || rm $tmpfile &)");
6315 return $self->{'swap_activity'};
6319 my $script;
6321 sub swapactivityscript() {
6322 # Returns:
6323 # shellscript for detecting swap activity
6325 # arguments for vmstat are OS dependant
6326 # swap_in and swap_out are in different columns depending on OS
6328 if(not $script) {
6329 my %vmstat = (
6330 # linux: $7*$8
6331 # $ vmstat 1 2
6332 # procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
6333 # r b swpd free buff cache si so bi bo in cs us sy id wa
6334 # 5 0 51208 1701096 198012 18857888 0 0 37 153 28 19 56 11 33 1
6335 # 3 0 51208 1701288 198012 18857972 0 0 0 0 3638 10412 15 3 82 0
6336 'linux' => ['vmstat 1 2 | tail -n1', '$7*$8'],
6338 # solaris: $6*$7
6339 # $ vmstat -S 1 2
6340 # kthr memory page disk faults cpu
6341 # r b w swap free si so pi po fr de sr s3 s4 -- -- in sy cs us sy id
6342 # 0 0 0 4628952 3208408 0 0 3 1 1 0 0 -0 2 0 0 263 613 246 1 2 97
6343 # 0 0 0 4552504 3166360 0 0 0 0 0 0 0 0 0 0 0 246 213 240 1 1 98
6344 'solaris' => ['vmstat -S 1 2 | tail -1', '$6*$7'],
6346 # darwin (macosx): $21*$22
6347 # $ vm_stat -c 2 1
6348 # Mach Virtual Memory Statistics: (page size of 4096 bytes)
6349 # free active specul inactive throttle wired prgable faults copy 0fill reactive purged file-backed anonymous cmprssed cmprssor dcomprs comprs pageins pageout swapins swapouts
6350 # 346306 829050 74871 606027 0 240231 90367 544858K 62343596 270837K 14178 415070 570102 939846 356 370 116 922 4019813 4 0 0
6351 # 345740 830383 74875 606031 0 239234 90369 2696 359 553 0 0 570110 941179 356 370 0 0 0 0 0 0
6352 'darwin' => ['vm_stat -c 2 1 | tail -n1', '$21*$22'],
6354 # ultrix: $12*$13
6355 # $ vmstat -S 1 2
6356 # procs faults cpu memory page disk
6357 # r b w in sy cs us sy id avm fre si so pi po fr de sr s0
6358 # 1 0 0 4 23 2 3 0 97 7743 217k 0 0 0 0 0 0 0 0
6359 # 1 0 0 6 40 8 0 1 99 7743 217k 0 0 3 0 0 0 0 0
6360 'ultrix' => ['vmstat -S 1 2 | tail -1', '$12*$13'],
6362 # aix: $6*$7
6363 # $ vmstat 1 2
6364 # System configuration: lcpu=1 mem=2048MB
6366 # kthr memory page faults cpu
6367 # ----- ----------- ------------------------ ------------ -----------
6368 # r b avm fre re pi po fr sr cy in sy cs us sy id wa
6369 # 0 0 333933 241803 0 0 0 0 0 0 10 143 90 0 0 99 0
6370 # 0 0 334125 241569 0 0 0 0 0 0 37 5368 184 0 9 86 5
6371 'aix' => ['vmstat 1 2 | tail -n1', '$6*$7'],
6373 # freebsd: $8*$9
6374 # $ vmstat -H 1 2
6375 # procs memory page disks faults cpu
6376 # r b w avm fre flt re pi po fr sr ad0 ad1 in sy cs us sy id
6377 # 1 0 0 596716 19560 32 0 0 0 33 8 0 0 11 220 277 0 0 99
6378 # 0 0 0 596716 19560 2 0 0 0 0 0 0 0 11 144 263 0 1 99
6379 'freebsd' => ['vmstat -H 1 2 | tail -n1', '$8*$9'],
6381 # mirbsd: $8*$9
6382 # $ vmstat 1 2
6383 # procs memory page disks traps cpu
6384 # r b w avm fre flt re pi po fr sr wd0 cd0 int sys cs us sy id
6385 # 0 0 0 25776 164968 34 0 0 0 0 0 0 0 230 259 38 4 0 96
6386 # 0 0 0 25776 164968 24 0 0 0 0 0 0 0 237 275 37 0 0 100
6387 'mirbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'],
6389 # netbsd: $7*$8
6390 # $ vmstat 1 2
6391 # procs memory page disks faults cpu
6392 # r b avm fre flt re pi po fr sr w0 w1 in sy cs us sy id
6393 # 0 0 138452 6012 54 0 0 0 1 2 3 0 4 100 23 0 0 100
6394 # 0 0 138456 6008 1 0 0 0 0 0 0 0 7 26 19 0 0 100
6395 'netbsd' => ['vmstat 1 2 | tail -n1', '$7*$8'],
6397 # openbsd: $8*$9
6398 # $ vmstat 1 2
6399 # procs memory page disks traps cpu
6400 # r b w avm fre flt re pi po fr sr wd0 wd1 int sys cs us sy id
6401 # 0 0 0 76596 109944 73 0 0 0 0 0 0 1 5 259 22 0 1 99
6402 # 0 0 0 76604 109936 24 0 0 0 0 0 0 0 7 114 20 0 1 99
6403 'openbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'],
6405 # hpux: $8*$9
6406 # $ vmstat 1 2
6407 # procs memory page faults cpu
6408 # r b w avm free re at pi po fr de sr in sy cs us sy id
6409 # 1 0 0 247211 216476 4 1 0 0 0 0 0 102 73005 54 6 11 83
6410 # 1 0 0 247211 216421 43 9 0 0 0 0 0 144 1675 96 25269512791222387000 25269512791222387000 105
6411 'hpux' => ['vmstat 1 2 | tail -n1', '$8*$9'],
6413 # dec_osf (tru64): $11*$12
6414 # $ vmstat 1 2
6415 # Virtual Memory Statistics: (pagesize = 8192)
6416 # procs memory pages intr cpu
6417 # r w u act free wire fault cow zero react pin pout in sy cs us sy id
6418 # 3 181 36 51K 1895 8696 348M 59M 122M 259 79M 0 5 218 302 4 1 94
6419 # 3 181 36 51K 1893 8696 3 15 21 0 28 0 4 81 321 1 1 98
6420 'dec_osf' => ['vmstat 1 2 | tail -n1', '$11*$12'],
6422 # gnu (hurd): $7*$8
6423 # $ vmstat -k 1 2
6424 # (pagesize: 4, size: 512288, swap size: 894972)
6425 # free actv inact wired zeroed react pgins pgouts pfaults cowpfs hrat caobj cache swfree
6426 # 371940 30844 89228 20276 298348 0 48192 19016 756105 99808 98% 876 20628 894972
6427 # 371940 30844 89228 20276 +0 +0 +0 +0 +42 +2 98% 876 20628 894972
6428 'gnu' => ['vmstat -k 1 2 | tail -n1', '$7*$8'],
6430 # -nto (qnx has no swap)
6431 #-irix
6432 #-svr5 (scosysv)
6434 my $perlscript = "";
6435 # Make a perl script that detects the OS ($^O) and runs
6436 # the appropriate vmstat command
6437 for my $os (keys %vmstat) {
6438 $vmstat{$os}[1] =~ s/\$/\\\\\\\$/g; # $ => \\\$
6439 $perlscript .= 'if($^O eq "'.$os.'") { print `'.$vmstat{$os}[0].' | awk "{print ' .
6440 $vmstat{$os}[1] . '}"` }';
6442 $script = "perl -e " . ::Q($perlscript);
6444 return $script;
6448 sub too_fast_remote_login($) {
6449 my $self = shift;
6450 if($self->{'last_login_at'} and $self->{'time_to_login'}) {
6451 # sshd normally allows 10 simultaneous logins
6452 # A login takes time_to_login
6453 # So time_to_login/5 should be safe
6454 # If now <= last_login + time_to_login/5: Then it is too soon.
6455 my $too_fast = (::now() <= $self->{'last_login_at'}
6456 + $self->{'time_to_login'}/5);
6457 ::debug("run", "Too fast? $too_fast ");
6458 return $too_fast;
6459 } else {
6460 # No logins so far (or time_to_login not computed): it is not too fast
6461 return 0;
6465 sub last_login_at($) {
6466 my $self = shift;
6467 return $self->{'last_login_at'};
6470 sub set_last_login_at($$) {
6471 my $self = shift;
6472 $self->{'last_login_at'} = shift;
6475 sub loadavg_too_high($) {
6476 my $self = shift;
6477 my $loadavg = $self->loadavg();
6478 if(defined $loadavg) {
6479 ::debug("load", "Load $loadavg > ",$self->max_loadavg());
6480 return $loadavg >= $self->max_loadavg();
6481 } else {
6482 # Unknown load: Assume load is too high
6483 return 1;
6488 my $cmd;
6489 sub loadavg_cmd() {
6490 if(not $cmd) {
6491 # aix => "ps -ae -o state,command" # state wrong
6492 # bsd => "ps ax -o state,command"
6493 # sysv => "ps -ef -o s -o comm"
6494 # cygwin => perl -ne 'close STDERR; /Name/ and print"\n"; \
6495 # /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status |
6496 # awk '{print $2,$1}'
6497 # dec_osf => bsd
6498 # dragonfly => bsd
6499 # freebsd => bsd
6500 # gnu => bsd
6501 # hpux => ps -el|awk '{print $2,$14,$15}'
6502 # irix => ps -ef -o state -o comm
6503 # linux => bsd
6504 # minix => ps el|awk '{print \$1,\$11}'
6505 # mirbsd => bsd
6506 # netbsd => bsd
6507 # openbsd => bsd
6508 # solaris => sysv
6509 # svr5 => sysv
6510 # ultrix => ps -ax | awk '{print $3,$5}'
6511 # unixware => ps -el|awk '{print $2,$14,$15}'
6512 my $ps = ::spacefree(1,q{
6513 $sysv="ps -ef -o s -o comm";
6514 $sysv2="ps -ef -o state -o comm";
6515 $bsd="ps ax -o state,command";
6516 # Treat threads as processes
6517 $bsd2="ps axH -o state,command";
6518 $psel="ps -el|awk '{ print \$2,\$14,\$15 }'";
6519 $cygwin=q{ perl -ne 'close STDERR; /Name/ and print"\n";
6520 /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status |
6521 awk '{print $2,$1}' };
6522 $dummy="echo S COMMAND;echo R dummy";
6523 %ps=(
6524 # TODO Find better code for AIX/Android
6525 'aix' => "uptime",
6526 'android' => "uptime",
6527 'cygwin' => $cygwin,
6528 'darwin' => $bsd,
6529 'dec_osf' => $sysv2,
6530 'dragonfly' => $bsd,
6531 'freebsd' => $bsd2,
6532 'gnu' => $bsd,
6533 'hpux' => $psel,
6534 'irix' => $sysv2,
6535 'linux' => $bsd2,
6536 'minix' => "ps el|awk '{print \$1,\$11}'",
6537 'mirbsd' => $bsd,
6538 'msys' => $cygwin,
6539 'netbsd' => $bsd,
6540 'nto' => $dummy,
6541 'openbsd' => $bsd,
6542 'solaris' => $sysv,
6543 'svr5' => $psel,
6544 'ultrix' => "ps -ax | awk '{print \$3,\$5}'",
6545 'MSWin32' => $sysv,
6547 print `$ps{$^O}`;
6549 # The command is too long for csh, so base64_wrap the command
6550 $cmd = Job::base64_wrap($ps);
6552 return $cmd;
6557 sub loadavg($) {
6558 # If the currently know loadavg is too old:
6559 # Recompute a new one in the background
6560 # The load average is computed as the number of processes waiting for disk
6561 # or CPU right now. So it is the server load this instant and not averaged over
6562 # several minutes. This is needed so GNU Parallel will at most start one job
6563 # that will push the load over the limit.
6565 # Returns:
6566 # $last_loadavg = last load average computed (undef if none)
6567 my $self = shift;
6568 # Should we update the loadavg file?
6569 my $update_loadavg_file = 0;
6570 if(open(my $load_fh, "<", $self->{'loadavg_file'})) {
6571 local $/; # $/ = undef => slurp whole file
6572 my $load_out = <$load_fh>;
6573 close $load_fh;
6574 if($load_out =~ /\S/) {
6575 # Content can be empty if ~/ is on NFS
6576 # due to reading being non-atomic.
6578 # Count lines starting with D,O,R but command does not start with [
6579 my $load =()= ($load_out=~/(^\s?[DOR]\S* +(?=[^\[])\S)/gm);
6580 if($load > 0) {
6581 # load is overestimated by 1
6582 $self->{'loadavg'} = $load - 1;
6583 ::debug("load", "New loadavg: ", $self->{'loadavg'},"\n");
6584 } elsif ($load_out=~/average: (\d+.\d+)/) {
6585 # AIX does not support instant load average
6586 # 04:11AM up 21 days, 12:55, 1 user, load average: 1.85, 1.57, 1.55
6587 $self->{'loadavg'} = $1;
6588 } else {
6589 ::die_bug("loadavg_invalid_content: " .
6590 $self->{'loadavg_file'} . "\n$load_out");
6593 $update_loadavg_file = 1;
6594 } else {
6595 ::debug("load", "No loadavg file: ", $self->{'loadavg_file'});
6596 $self->{'loadavg'} = undef;
6597 $update_loadavg_file = 1;
6599 if($update_loadavg_file) {
6600 ::debug("load", "Updating loadavg file", $self->{'loadavg_file'}, "\n");
6601 $self->{'last_loadavg_update'} = time;
6602 my $dir = ::dirname($self->{'swap_activity_file'});
6603 -d $dir or eval { File::Path::mkpath($dir); };
6604 -w $dir or ::die_bug("Cannot write to $dir");
6605 my $cmd = "";
6606 if($self->{'string'} ne ":") {
6607 $cmd = $self->sshcommand() . " " . $self->serverlogin() . " " .
6608 ::Q(loadavg_cmd());
6609 } else {
6610 $cmd .= loadavg_cmd();
6612 # As the command can take long to run if run remote
6613 # save it to a tmp file before moving it to the correct file
6614 ::debug("load", "Update load\n");
6615 my $file = $self->{'loadavg_file'};
6616 # tmpfile on same filesystem as $file
6617 my $tmpfile = $file.$$;
6618 ::qqx("($cmd > $tmpfile && mv $tmpfile $file || rm $tmpfile & )");
6620 return $self->{'loadavg'};
6623 sub max_loadavg($) {
6624 my $self = shift;
6625 # If --load is a file it might be changed
6626 if($Global::max_load_file) {
6627 my $mtime = (stat($Global::max_load_file))[9];
6628 if($mtime > $Global::max_load_file_last_mod) {
6629 $Global::max_load_file_last_mod = $mtime;
6630 for my $sshlogin (values %Global::host) {
6631 $sshlogin->set_max_loadavg(undef);
6635 if(not defined $self->{'max_loadavg'}) {
6636 $self->{'max_loadavg'} =
6637 $self->compute_max_loadavg($opt::load);
6639 ::debug("load", "max_loadavg: ", $self->string(), " ", $self->{'max_loadavg'});
6640 return $self->{'max_loadavg'};
6643 sub set_max_loadavg($$) {
6644 my $self = shift;
6645 $self->{'max_loadavg'} = shift;
6648 sub compute_max_loadavg($) {
6649 # Parse the max loadaverage that the user asked for using --load
6650 # Returns:
6651 # max loadaverage
6652 my $self = shift;
6653 my $loadspec = shift;
6654 my $load;
6655 if(defined $loadspec) {
6656 if($loadspec =~ /^\+(\d+)$/) {
6657 # E.g. --load +2
6658 my $j = $1;
6659 $load =
6660 $self->ncpus() + $j;
6661 } elsif ($loadspec =~ /^-(\d+)$/) {
6662 # E.g. --load -2
6663 my $j = $1;
6664 $load =
6665 $self->ncpus() - $j;
6666 } elsif ($loadspec =~ /^(\d+)\%$/) {
6667 my $j = $1;
6668 $load =
6669 $self->ncpus() * $j / 100;
6670 } elsif ($loadspec =~ /^(\d+(\.\d+)?)$/) {
6671 $load = $1;
6672 } elsif (-f $loadspec) {
6673 $Global::max_load_file = $loadspec;
6674 $Global::max_load_file_last_mod = (stat($Global::max_load_file))[9];
6675 if(open(my $in_fh, "<", $Global::max_load_file)) {
6676 my $opt_load_file = join("",<$in_fh>);
6677 close $in_fh;
6678 $load = $self->compute_max_loadavg($opt_load_file);
6679 } else {
6680 ::error("Cannot open $loadspec.");
6681 ::wait_and_exit(255);
6683 } else {
6684 ::error("Parsing of --load failed.");
6685 ::die_usage();
6687 if($load < 0.01) {
6688 $load = 0.01;
6691 return $load;
6694 sub time_to_login($) {
6695 my $self = shift;
6696 return $self->{'time_to_login'};
6699 sub set_time_to_login($$) {
6700 my $self = shift;
6701 $self->{'time_to_login'} = shift;
6704 sub max_jobs_running($) {
6705 my $self = shift;
6706 if(not defined $self->{'max_jobs_running'}) {
6707 my $nproc = $self->compute_number_of_processes($opt::jobs);
6708 $self->set_max_jobs_running($nproc);
6710 return $self->{'max_jobs_running'};
6713 sub orig_max_jobs_running($) {
6714 my $self = shift;
6715 return $self->{'orig_max_jobs_running'};
6718 sub compute_number_of_processes($) {
6719 # Number of processes wanted and limited by system resources
6720 # Returns:
6721 # Number of processes
6722 my $self = shift;
6723 my $opt_P = shift;
6724 my $wanted_processes = $self->user_requested_processes($opt_P);
6725 if(not defined $wanted_processes) {
6726 $wanted_processes = $Global::default_simultaneous_sshlogins;
6728 ::debug("load", "Wanted procs: $wanted_processes\n");
6729 my $system_limit =
6730 $self->processes_available_by_system_limit($wanted_processes);
6731 ::debug("load", "Limited to procs: $system_limit\n");
6732 return $system_limit;
6736 my @children;
6737 my $max_system_proc_reached;
6738 my $more_filehandles;
6739 my %fh;
6740 my $tmpfhname;
6741 my $count_jobs_already_read;
6742 my @jobs;
6743 my $job;
6744 my @args;
6745 my $arg;
6747 sub reserve_filehandles($) {
6748 # Reserves filehandle
6749 my $n = shift;
6750 for (1..$n) {
6751 $more_filehandles &&= open($fh{$tmpfhname++}, "<", "/dev/null");
6755 sub reserve_process() {
6756 # Spawn a dummy process
6757 my $child;
6758 if($child = fork()) {
6759 push @children, $child;
6760 $Global::unkilled_children{$child} = 1;
6761 } elsif(defined $child) {
6762 # This is the child
6763 # The child takes one process slot
6764 # It will be killed later
6765 $SIG{'TERM'} = $Global::original_sig{'TERM'};
6766 if($^O eq "cygwin" or $^O eq "msys" or $^O eq "nto") {
6767 # The exec does not work on Cygwin and QNX
6768 sleep 10101010;
6769 } else {
6770 # 'exec sleep' takes less RAM than sleeping in perl
6771 exec 'sleep', 10101;
6773 exit(0);
6774 } else {
6775 # Failed to spawn
6776 $max_system_proc_reached = 1;
6780 sub get_args_or_jobs() {
6781 # Get an arg or a job (depending on mode)
6782 if($Global::semaphore or ($opt::pipe and not $opt::tee)) {
6783 # Skip: No need to get args
6784 return 1;
6785 } elsif(defined $opt::retries and $count_jobs_already_read) {
6786 # For retries we may need to run all jobs on this sshlogin
6787 # so include the already read jobs for this sshlogin
6788 $count_jobs_already_read--;
6789 return 1;
6790 } else {
6791 if($opt::X or $opt::m) {
6792 # The arguments may have to be re-spread over several jobslots
6793 # So pessimistically only read one arg per jobslot
6794 # instead of a full commandline
6795 if($Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->empty()) {
6796 if($Global::JobQueue->empty()) {
6797 return 0;
6798 } else {
6799 $job = $Global::JobQueue->get();
6800 push(@jobs, $job);
6801 return 1;
6803 } else {
6804 $arg = $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get();
6805 push(@args, $arg);
6806 return 1;
6808 } else {
6809 # If there are no more command lines, then we have a process
6810 # per command line, so no need to go further
6811 if($Global::JobQueue->empty()) {
6812 return 0;
6813 } else {
6814 $job = $Global::JobQueue->get();
6815 # Replacement must happen here due to seq()
6816 $job and $job->replaced();
6817 push(@jobs, $job);
6818 return 1;
6824 sub cleanup() {
6825 # Cleanup: Close the files
6826 for (values %fh) { close $_ }
6827 # Cleanup: Kill the children
6828 for my $pid (@children) {
6829 kill 9, $pid;
6830 waitpid($pid,0);
6831 delete $Global::unkilled_children{$pid};
6833 # Cleanup: Unget the command_lines or the @args
6834 $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->unget(@args);
6835 @args = ();
6836 $Global::JobQueue->unget(@jobs);
6837 @jobs = ();
6840 sub processes_available_by_system_limit($) {
6841 # If the wanted number of processes is bigger than the system limits:
6842 # Limit them to the system limits
6843 # Limits are: File handles, number of input lines, processes,
6844 # and taking > 1 second to spawn 10 extra processes
6845 # Returns:
6846 # Number of processes
6847 my $self = shift;
6848 my $wanted_processes = shift;
6849 my $system_limit = 0;
6850 my $slow_spawning_warning_printed = 0;
6851 my $time = time;
6852 $more_filehandles = 1;
6853 $tmpfhname = "TmpFhNamE";
6855 # perl uses 7 filehandles for something?
6856 # parallel uses 1 for memory_usage
6857 # parallel uses 4 for ?
6858 reserve_filehandles(12);
6859 # Two processes for load avg and ?
6860 reserve_process();
6861 reserve_process();
6863 # For --retries count also jobs already run
6864 $count_jobs_already_read = $Global::JobQueue->next_seq();
6865 my $wait_time_for_getting_args = 0;
6866 my $start_time = time;
6867 while(1) {
6868 $system_limit >= $wanted_processes and last;
6869 not $more_filehandles and last;
6870 $max_system_proc_reached and last;
6872 my $before_getting_arg = time;
6873 if(!$Global::dummy_jobs) {
6874 get_args_or_jobs() or last;
6876 $wait_time_for_getting_args += time - $before_getting_arg;
6877 $system_limit++;
6879 # Every simultaneous process uses 2 filehandles to write to
6880 # and 2 filehandles to read from
6881 reserve_filehandles(4);
6883 # System process limit
6884 reserve_process();
6886 my $forktime = time - $time - $wait_time_for_getting_args;
6887 ::debug("run", "Time to fork $system_limit procs: $wait_time_for_getting_args ",
6888 $forktime,
6889 " (processes so far: ", $system_limit,")\n");
6890 if($system_limit > 10 and
6891 $forktime > 1 and
6892 $forktime > $system_limit * 0.01
6893 and not $slow_spawning_warning_printed) {
6894 # It took more than 0.01 second to fork a processes on avg.
6895 # Give the user a warning. He can press Ctrl-C if this
6896 # sucks.
6897 ::warning("Starting $system_limit processes took > $forktime sec.",
6898 "Consider adjusting -j. Press CTRL-C to stop.");
6899 $slow_spawning_warning_printed = 1;
6902 cleanup();
6904 if($system_limit < $wanted_processes) {
6905 # The system_limit is less than the wanted_processes
6906 if($system_limit < 1 and not $Global::JobQueue->empty()) {
6907 ::warning("Cannot spawn any jobs. ".
6908 "Raising ulimit -u or 'nproc' in /etc/security/limits.conf",
6909 "or /proc/sys/kernel/pid_max may help.");
6910 ::wait_and_exit(255);
6912 if(not $more_filehandles) {
6913 ::warning("Only enough file handles to run ".
6914 $system_limit. " jobs in parallel.",
6915 "Running 'parallel -j0 -N $system_limit --pipe parallel -j0' or",
6916 "raising 'ulimit -n' or 'nofile' in /etc/security/limits.conf",
6917 "or /proc/sys/fs/file-max may help.");
6919 if($max_system_proc_reached) {
6920 ::warning("Only enough available processes to run ".
6921 $system_limit. " jobs in parallel.",
6922 "Raising ulimit -u or /etc/security/limits.conf ",
6923 "or /proc/sys/kernel/pid_max may help.");
6926 if($] == 5.008008 and $system_limit > 1000) {
6927 # https://savannah.gnu.org/bugs/?36942
6928 $system_limit = 1000;
6930 if($Global::JobQueue->empty()) {
6931 $system_limit ||= 1;
6933 if($self->string() ne ":" and
6934 $system_limit > $Global::default_simultaneous_sshlogins) {
6935 $system_limit =
6936 $self->simultaneous_sshlogin_limit($system_limit);
6938 return $system_limit;
6942 sub simultaneous_sshlogin_limit($) {
6943 # Test by logging in wanted number of times simultaneously
6944 # Returns:
6945 # min($wanted_processes,$working_simultaneous_ssh_logins-1)
6946 my $self = shift;
6947 my $wanted_processes = shift;
6948 if($self->{'time_to_login'}) {
6949 return $wanted_processes;
6952 # Try twice because it guesses wrong sometimes
6953 # Choose the minimal
6954 my $ssh_limit =
6955 ::min($self->simultaneous_sshlogin($wanted_processes),
6956 $self->simultaneous_sshlogin($wanted_processes));
6957 if($ssh_limit < $wanted_processes) {
6958 my $serverlogin = $self->serverlogin();
6959 ::warning("ssh to $serverlogin only allows ".
6960 "for $ssh_limit simultaneous logins.",
6961 "You may raise this by changing",
6962 "/etc/ssh/sshd_config:MaxStartups and MaxSessions on $serverlogin.",
6963 "You can also try --sshdelay 0.1",
6964 "Using only ".($ssh_limit-1)." connections ".
6965 "to avoid race conditions.");
6966 # Race condition can cause problem if using all sshs.
6967 if($ssh_limit > 1) { $ssh_limit -= 1; }
6969 return $ssh_limit;
6972 sub simultaneous_sshlogin($) {
6973 # Using $sshlogin try to see if we can do $wanted_processes
6974 # simultaneous logins
6975 # (ssh host echo simul-login & ssh host echo simul-login & ...) |
6976 # grep simul|wc -l
6977 # Input:
6978 # $wanted_processes = Try for this many logins in parallel
6979 # Returns:
6980 # $ssh_limit = Number of succesful parallel logins
6981 local $/ = "\n";
6982 my $self = shift;
6983 my $wanted_processes = shift;
6984 my $sshcmd = $self->sshcommand();
6985 my $serverlogin = $self->serverlogin();
6986 my $sshdelay = $opt::sshdelay ? "sleep $opt::sshdelay;" : "";
6987 # TODO sh -c wrapper to work for csh
6988 my $cmd = ("$sshdelay$sshcmd $serverlogin -- ".
6989 "echo simultaneouslogin </dev/null 2>&1 &")x$wanted_processes;
6990 ::debug("init", "Trying $wanted_processes logins at $serverlogin\n");
6991 open (my $simul_fh, "-|", "($cmd)|grep simultaneouslogin | wc -l") or
6992 ::die_bug("simultaneouslogin");
6993 my $ssh_limit = <$simul_fh>;
6994 close $simul_fh;
6995 chomp $ssh_limit;
6996 return $ssh_limit;
6999 sub set_ncpus($$) {
7000 my $self = shift;
7001 $self->{'ncpus'} = shift;
7004 sub user_requested_processes($) {
7005 # Parse the number of processes that the user asked for using -j
7006 # Input:
7007 # $opt_P = string formatted as for -P
7008 # Returns:
7009 # $processes = the number of processes to run on this sshlogin
7010 my $self = shift;
7011 my $opt_P = shift;
7012 my $processes;
7013 if(defined $opt_P) {
7014 if($opt_P =~ /^\+(\d+)$/) {
7015 # E.g. -P +2
7016 my $j = $1;
7017 $processes =
7018 $self->ncpus() + $j;
7019 } elsif ($opt_P =~ /^-(\d+)$/) {
7020 # E.g. -P -2
7021 my $j = $1;
7022 $processes =
7023 $self->ncpus() - $j;
7024 } elsif ($opt_P =~ /^(\d+(\.\d+)?)\%$/) {
7025 # E.g. -P 10.5%
7026 my $j = $1;
7027 $processes =
7028 $self->ncpus() * $j / 100;
7029 } elsif ($opt_P =~ /^(\d+)$/) {
7030 $processes = $1;
7031 if($processes == 0) {
7032 # -P 0 = infinity (or at least close)
7033 $processes = $Global::infinity;
7035 } elsif (-f $opt_P) {
7036 $Global::max_procs_file = $opt_P;
7037 if(open(my $in_fh, "<", $Global::max_procs_file)) {
7038 my $opt_P_file = join("",<$in_fh>);
7039 close $in_fh;
7040 $processes = $self->user_requested_processes($opt_P_file);
7041 } else {
7042 ::error("Cannot open $opt_P.");
7043 ::wait_and_exit(255);
7045 } else {
7046 ::error("Parsing of --jobs/-j/--max-procs/-P failed.");
7047 ::die_usage();
7049 $processes = ::ceil($processes);
7051 return $processes;
7054 sub ncpus($) {
7055 # Number of CPU threads
7056 # --use_sockets_instead_of_threads = count socket instead
7057 # --use_cores_instead_of_threads = count physical cores instead
7058 # Returns:
7059 # $ncpus = number of cpu (threads) on this sshlogin
7060 local $/ = "\n";
7061 my $self = shift;
7062 if(not defined $self->{'ncpus'}) {
7063 my $sshcmd = $self->sshcommand();
7064 my $serverlogin = $self->serverlogin();
7065 if($serverlogin eq ":") {
7066 if($opt::use_sockets_instead_of_threads) {
7067 $self->{'ncpus'} = socket_core_thread()->{'sockets'};
7068 } elsif($opt::use_cores_instead_of_threads) {
7069 $self->{'ncpus'} = socket_core_thread()->{'cores'};
7070 } else {
7071 $self->{'ncpus'} = socket_core_thread()->{'threads'};
7073 } else {
7074 my $ncpu;
7075 if($opt::use_sockets_instead_of_threads
7077 $opt::use_cpus_instead_of_cores) {
7078 $ncpu =
7079 ::qqx("echo|$sshcmd $serverlogin -- parallel --number-of-sockets");
7080 } elsif($opt::use_cores_instead_of_threads) {
7081 $ncpu =
7082 ::qqx("echo|$sshcmd $serverlogin -- parallel --number-of-cores");
7083 } else {
7084 $ncpu =
7085 ::qqx("echo|$sshcmd $serverlogin -- parallel --number-of-threads");
7087 chomp $ncpu;
7088 if($ncpu =~ /^\s*[0-9]+\s*$/s) {
7089 $self->{'ncpus'} = $ncpu;
7090 } else {
7091 ::warning("Could not figure out ".
7092 "number of cpus on $serverlogin ($ncpu). Using 1.");
7093 $self->{'ncpus'} = 1;
7097 return $self->{'ncpus'};
7101 sub nproc() {
7102 # Returns:
7103 # Number of threads using `nproc`
7104 my $no_of_threads = ::qqx("nproc");
7105 chomp $no_of_threads;
7106 return $no_of_threads;
7109 sub no_of_sockets() {
7110 return socket_core_thread()->{'sockets'};
7113 sub no_of_cores() {
7114 return socket_core_thread()->{'cores'};
7117 sub no_of_threads() {
7118 return socket_core_thread()->{'threads'};
7121 sub socket_core_thread() {
7122 # Returns:
7124 # 'sockets' => #sockets = number of socket with CPU present
7125 # 'cores' => #cores = number of physical cores
7126 # 'threads' => #threads = number of compute cores (hyperthreading)
7127 # 'active' => #taskset_threads = number of taskset limited cores
7129 my $cpu;
7130 my $cached_cpuspec = $Global::cache_dir . "/tmp/sshlogin/" .
7131 ::hostname() . "/cpuspec";
7132 if(-e $cached_cpuspec and -M $cached_cpuspec < 1) {
7133 # Reading cached copy instead of /proc/cpuinfo is 17 ms faster
7134 local $/ = "\n";
7135 if(open(my $in_fh, "<", $cached_cpuspec)) {
7136 ::debug("init","Read $cached_cpuspec\n");
7137 $cpu->{'sockets'} = int(<$in_fh>);
7138 $cpu->{'cores'} = int(<$in_fh>);
7139 $cpu->{'threads'} = int(<$in_fh>);
7140 close $in_fh;
7143 if ($^O eq 'linux') {
7144 $cpu = sct_gnu_linux($cpu);
7145 } elsif ($^O eq 'android') {
7146 $cpu = sct_android($cpu);
7147 } elsif ($^O eq 'freebsd') {
7148 $cpu = sct_freebsd($cpu);
7149 } elsif ($^O eq 'netbsd') {
7150 $cpu = sct_netbsd($cpu);
7151 } elsif ($^O eq 'openbsd') {
7152 $cpu = sct_openbsd($cpu);
7153 } elsif ($^O eq 'gnu') {
7154 $cpu = sct_hurd($cpu);
7155 } elsif ($^O eq 'darwin') {
7156 $cpu = sct_darwin($cpu);
7157 } elsif ($^O eq 'solaris') {
7158 $cpu = sct_solaris($cpu);
7159 } elsif ($^O eq 'aix') {
7160 $cpu = sct_aix($cpu);
7161 } elsif ($^O eq 'hpux') {
7162 $cpu = sct_hpux($cpu);
7163 } elsif ($^O eq 'nto') {
7164 $cpu = sct_qnx($cpu);
7165 } elsif ($^O eq 'svr5') {
7166 $cpu = sct_openserver($cpu);
7167 } elsif ($^O eq 'irix') {
7168 $cpu = sct_irix($cpu);
7169 } elsif ($^O eq 'dec_osf') {
7170 $cpu = sct_tru64($cpu);
7171 } else {
7172 # Try all methods until we find something that works
7173 $cpu = (sct_gnu_linux($cpu)
7174 || sct_android($cpu)
7175 || sct_freebsd($cpu)
7176 || sct_netbsd($cpu)
7177 || sct_openbsd($cpu)
7178 || sct_hurd($cpu)
7179 || sct_darwin($cpu)
7180 || sct_solaris($cpu)
7181 || sct_aix($cpu)
7182 || sct_hpux($cpu)
7183 || sct_qnx($cpu)
7184 || sct_openserver($cpu)
7185 || sct_irix($cpu)
7186 || sct_tru64($cpu)
7189 if(not grep { $_ > 0 } values %$cpu) {
7190 $cpu = undef;
7192 # Write cached copy instead of /proc/cpuinfo is 17 ms faster
7193 if($cpu and open(my $out_fh, ">", $cached_cpuspec)) {
7194 print $out_fh (map { chomp; "$_\n" }
7195 $cpu->{'sockets'},
7196 $cpu->{'cores'},
7197 $cpu->{'threads'});
7198 close $out_fh;
7200 if(not $cpu) {
7201 my $nproc = nproc();
7202 if($nproc) {
7203 $cpu->{'sockets'} =
7204 $cpu->{'cores'} =
7205 $cpu->{'threads'} =
7206 $cpu->{'active'} =
7207 $nproc;
7210 if(not $cpu) {
7211 ::warning("Cannot figure out number of cpus. Using 1.");
7212 $cpu->{'sockets'} =
7213 $cpu->{'cores'} =
7214 $cpu->{'threads'} =
7215 $cpu->{'active'} =
7218 $cpu->{'sockets'} ||= 1;
7219 $cpu->{'threads'} ||= $cpu->{'cores'};
7220 $cpu->{'active'} ||= $cpu->{'threads'};
7221 chomp($cpu->{'sockets'},
7222 $cpu->{'cores'},
7223 $cpu->{'threads'},
7224 $cpu->{'active'});
7225 # Choose minimum of active and actual
7226 my $mincpu;
7227 $mincpu->{'sockets'} = ::min($cpu->{'sockets'},$cpu->{'active'});
7228 $mincpu->{'cores'} = ::min($cpu->{'cores'},$cpu->{'active'});
7229 $mincpu->{'threads'} = ::min($cpu->{'threads'},$cpu->{'active'});
7230 return $mincpu;
7233 sub sct_gnu_linux($) {
7234 # Returns:
7235 # { 'sockets' => #sockets
7236 # 'cores' => #cores
7237 # 'threads' => #threads
7238 # 'active' => #taskset_threads }
7239 my $cpu = shift;
7240 local $/ = "\n"; # If delimiter is set, then $/ will be wrong
7241 my @cpuinfo;
7242 if($ENV{'PARALLEL_CPUINFO'}) {
7243 # Use CPUINFO from environment - used for testing only
7244 @cpuinfo = split/(?<=\n)/,$ENV{'PARALLEL_CPUINFO'};
7245 } elsif($cpu->{'sockets'} and $cpu->{'cores'} and
7246 $cpu->{'threads'}) {
7247 # Skip /proc/cpuinfo - already set
7248 } elsif(open(my $in_fh, "<", "/proc/cpuinfo")) {
7249 # Read /proc/cpuinfo
7250 @cpuinfo = <$in_fh>;
7252 if(@cpuinfo) {
7253 $cpu->{'sockets'} = 0;
7254 $cpu->{'cores'} = 0;
7255 $cpu->{'threads'} = 0;
7256 my %seen;
7257 my %phy_seen;
7258 my $physicalid;
7259 for(@cpuinfo) {
7260 if(/^physical id.*[:](.*)/) {
7261 $physicalid = $1;
7262 if(not $phy_seen{$1}++) {
7263 $cpu->{'sockets'}++;
7266 if(/^core id.*[:](.*)/ and not $seen{$physicalid,$1}++) {
7267 $cpu->{'cores'}++;
7269 /^processor.*[:]/i and $cpu->{'threads'}++;
7271 $cpu->{'cores'} ||= $cpu->{'threads'} || $cpu->{'sockets'};
7273 if(-e "/proc/self/status" and not $ENV{'PARALLEL_CPUINFO'}) {
7274 # if 'taskset' is used to limit number of threads
7275 if(open(my $in_fh, "<", "/proc/self/status")) {
7276 while(<$in_fh>) {
7277 if(/^Cpus_allowed:\s*(\S+)/) {
7278 my $a = $1;
7279 $a =~ tr/,//d;
7280 $cpu->{'active'} = unpack ("%32b*", pack ("H*",$a));
7283 close $in_fh;
7286 return $cpu;
7289 sub sct_android($) {
7290 # Returns:
7291 # { 'sockets' => #sockets
7292 # 'cores' => #cores
7293 # 'threads' => #threads
7294 # 'active' => #taskset_threads }
7295 # Use GNU/Linux
7296 return sct_gnu_linux($_[0]);
7299 sub sct_freebsd($) {
7300 # Returns:
7301 # { 'sockets' => #sockets
7302 # 'cores' => #cores
7303 # 'threads' => #threads
7304 # 'active' => #taskset_threads }
7305 local $/ = "\n";
7306 my $cpu = shift;
7307 $cpu->{'cores'} ||=
7308 (::qqx(qq{ sysctl -a dev.cpu | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' })
7310 ::qqx(qq{ sysctl hw.ncpu | awk '{ print \$2 }' }));
7311 $cpu->{'threads'} ||=
7312 (::qqx(qq{ sysctl hw.ncpu | awk '{ print \$2 }' })
7314 ::qqx(qq{ sysctl -a dev.cpu | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' }));
7315 return $cpu;
7318 sub sct_netbsd($) {
7319 # Returns:
7320 # { 'sockets' => #sockets
7321 # 'cores' => #cores
7322 # 'threads' => #threads
7323 # 'active' => #taskset_threads }
7324 local $/ = "\n";
7325 my $cpu = shift;
7326 $cpu->{'cores'} ||= ::qqx("sysctl -n hw.ncpu");
7327 return $cpu;
7330 sub sct_openbsd($) {
7331 # Returns:
7332 # { 'sockets' => #sockets
7333 # 'cores' => #cores
7334 # 'threads' => #threads
7335 # 'active' => #taskset_threads }
7336 local $/ = "\n";
7337 my $cpu = shift;
7338 $cpu->{'cores'} ||= ::qqx('sysctl -n hw.ncpu');
7339 return $cpu;
7342 sub sct_hurd($) {
7343 # Returns:
7344 # { 'sockets' => #sockets
7345 # 'cores' => #cores
7346 # 'threads' => #threads
7347 # 'active' => #taskset_threads }
7348 local $/ = "\n";
7349 my $cpu = shift;
7350 $cpu->{'cores'} ||= ::qqx("nproc");
7351 return $cpu;
7354 sub sct_darwin($) {
7355 # Returns:
7356 # { 'sockets' => #sockets
7357 # 'cores' => #cores
7358 # 'threads' => #threads
7359 # 'active' => #taskset_threads }
7360 local $/ = "\n";
7361 my $cpu = shift;
7362 $cpu->{'cores'} ||=
7363 (::qqx('sysctl -n hw.physicalcpu')
7365 ::qqx(qq{ sysctl -a hw | grep [^a-z]physicalcpu[^a-z] | awk '{ print \$2 }' }));
7366 $cpu->{'threads'} ||=
7367 (::qqx('sysctl -n hw.logicalcpu')
7369 ::qqx(qq{ sysctl -a hw | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }' }));
7370 return $cpu;
7373 sub sct_solaris($) {
7374 # Returns:
7375 # { 'sockets' => #sockets
7376 # 'cores' => #cores
7377 # 'threads' => #threads
7378 # 'active' => #taskset_threads }
7379 local $/ = "\n";
7380 my $cpu = shift;
7381 if(not $cpu->{'cores'}) {
7382 if(-x "/usr/bin/kstat") {
7383 my @chip_id = ::qqx("/usr/bin/kstat cpu_info|grep chip_id");
7384 if($#chip_id >= 0) {
7385 $cpu->{'sockets'} ||= $#chip_id +1;
7387 my @core_id = ::qqx("/usr/bin/kstat -m cpu_info|grep -w core_id|uniq");
7388 if($#core_id >= 0) {
7389 $cpu->{'cores'} ||= $#core_id +1;
7392 if(-x "/usr/sbin/psrinfo") {
7393 my @psrinfo = ::qqx("/usr/sbin/psrinfo -p");
7394 if($#psrinfo >= 0) {
7395 $cpu->{'sockets'} ||= $psrinfo[0];
7398 if(-x "/usr/sbin/prtconf") {
7399 my @prtconf = ::qqx("/usr/sbin/prtconf | grep cpu..instance");
7400 if($#prtconf >= 0) {
7401 $cpu->{'cores'} ||= $#prtconf +1;
7405 return $cpu;
7408 sub sct_aix($) {
7409 # Returns:
7410 # { 'sockets' => #sockets
7411 # 'cores' => #cores
7412 # 'threads' => #threads
7413 # 'active' => #taskset_threads }
7414 local $/ = "\n";
7415 my $cpu = shift;
7416 if(not $cpu->{'cores'}) {
7417 if(-x "/usr/sbin/lscfg") {
7418 if(open(my $in_fh, "-|", "/usr/sbin/lscfg -vs |grep proc | wc -l|tr -d ' '")) {
7419 $cpu->{'cores'} = <$in_fh>;
7420 close $in_fh;
7424 if(not $cpu->{'threads'}) {
7425 if(-x "/usr/bin/vmstat") {
7426 if(open(my $in_fh, "-|", "/usr/bin/vmstat 1 1")) {
7427 while(<$in_fh>) {
7428 /lcpu=([0-9]*) / and $cpu->{'threads'} = $1;
7430 close $in_fh;
7434 return $cpu;
7437 sub sct_hpux($) {
7438 # Returns:
7439 # { 'sockets' => #sockets
7440 # 'cores' => #cores
7441 # 'threads' => #threads
7442 # 'active' => #taskset_threads }
7443 local $/ = "\n";
7444 my $cpu = shift;
7445 $cpu->{'cores'} ||=
7446 ::qqx(qq{ /usr/bin/mpsched -s 2>&1 | grep 'Locality Domain Count' | awk '{ print \$4 }'});
7447 $cpu->{'threads'} ||=
7448 ::qqx(qq{ /usr/bin/mpsched -s 2>&1 | perl -ne '/Processor Count\\D+(\\d+)/ and print "\$1"'});
7449 return $cpu;
7452 sub sct_qnx($) {
7453 # Returns:
7454 # { 'sockets' => #sockets
7455 # 'cores' => #cores
7456 # 'threads' => #threads
7457 # 'active' => #taskset_threads }
7458 local $/ = "\n";
7459 my $cpu = shift;
7460 # BUG: It is not known how to calculate this.
7462 return $cpu;
7465 sub sct_openserver($) {
7466 # Returns:
7467 # { 'sockets' => #sockets
7468 # 'cores' => #cores
7469 # 'threads' => #threads
7470 # 'active' => #taskset_threads }
7471 local $/ = "\n";
7472 my $cpu = shift;
7473 if(not $cpu->{'cores'}) {
7474 if(-x "/usr/sbin/psrinfo") {
7475 my @psrinfo = ::qqx("/usr/sbin/psrinfo");
7476 if($#psrinfo >= 0) {
7477 $cpu->{'cores'} = $#psrinfo +1;
7481 $cpu->{'sockets'} ||= $cpu->{'cores'};
7482 return $cpu;
7485 sub sct_irix($) {
7486 # Returns:
7487 # { 'sockets' => #sockets
7488 # 'cores' => #cores
7489 # 'threads' => #threads
7490 # 'active' => #taskset_threads }
7491 local $/ = "\n";
7492 my $cpu = shift;
7493 $cpu->{'cores'} ||=
7494 ::qqx(qq{ hinv | grep HZ | grep Processor | awk '{print \$1}' });
7495 return $cpu;
7498 sub sct_tru64($) {
7499 # Returns:
7500 # { 'sockets' => #sockets
7501 # 'cores' => #cores
7502 # 'threads' => #threads
7503 # 'active' => #taskset_threads }
7504 local $/ = "\n";
7505 my $cpu = shift;
7506 $cpu->{'cores'} ||= ::qqx("sizer -pr");
7507 $cpu->{'sockets'} ||= $cpu->{'cores'};
7508 $cpu->{'threads'} ||= $cpu->{'cores'};
7510 return $cpu;
7513 sub sshcommand($) {
7514 # Returns:
7515 # $sshcommand = the command (incl options) to run when using ssh
7516 my $self = shift;
7517 if (not defined $self->{'sshcommand'}) {
7518 $self->sshcommand_of_sshlogin();
7520 return $self->{'sshcommand'};
7523 sub serverlogin($) {
7524 # Returns:
7525 # $sshcommand = the command (incl options) to run when using ssh
7526 my $self = shift;
7527 if (not defined $self->{'serverlogin'}) {
7528 $self->sshcommand_of_sshlogin();
7530 return $self->{'serverlogin'};
7533 sub sshcommand_of_sshlogin($) {
7534 # Compute ssh command and serverlogin from sshlogin
7535 # 'server' -> ('ssh -S /tmp/parallel-ssh-RANDOM/host-','server')
7536 # 'user@server' -> ('ssh','user@server')
7537 # 'myssh user@server' -> ('myssh','user@server')
7538 # 'myssh -l user server' -> ('myssh -l user','server')
7539 # '/usr/bin/myssh -l user server' -> ('/usr/bin/myssh -l user','server')
7540 # Sets:
7541 # $self->{'sshcommand'}
7542 # $self->{'serverlogin'}
7543 my $self = shift;
7544 my ($sshcmd, $serverlogin);
7545 # If $opt::ssh is unset, use $PARALLEL_SSH or 'ssh'
7546 $opt::ssh ||= $ENV{'PARALLEL_SSH'} || "ssh";
7547 if($self->{'string'} =~ /(.+) (\S+)$/) {
7548 # Own ssh command
7549 $sshcmd = $1; $serverlogin = $2;
7550 } else {
7551 # Normal ssh
7552 if($opt::controlmaster) {
7553 # Use control_path to make ssh faster
7554 my $control_path = $self->control_path_dir()."/ssh-%r@%h:%p";
7555 $sshcmd = $opt::ssh." -S ".$control_path;
7556 $serverlogin = $self->{'string'};
7557 if(not $self->{'control_path'}{$control_path}++) {
7558 # Master is not running for this control_path
7559 # Start it
7560 my $pid = fork();
7561 if($pid) {
7562 $Global::sshmaster{$pid} ||= 1;
7563 } else {
7564 $SIG{'TERM'} = undef;
7565 # Ignore the 'foo' being printed
7566 open(STDOUT,">","/dev/null");
7567 # STDERR >/dev/null to ignore
7568 open(STDERR,">","/dev/null");
7569 open(STDIN,"<","/dev/null");
7570 # Run a sleep that outputs data, so it will discover
7571 # if the ssh connection closes.
7572 my $sleep = ::Q('$|=1;while(1){sleep 1;print "foo\n"}');
7573 my @master = ($opt::ssh, "-MTS",
7574 $control_path, $serverlogin, "--", "perl", "-e",
7575 $sleep);
7576 exec(@master);
7579 } else {
7580 $sshcmd = $opt::ssh; $serverlogin = $self->{'string'};
7584 if($serverlogin =~ s/(\S+)\@(\S+)/$2/) {
7585 # convert user@server to '-l user server'
7586 # because lsh does not support user@server
7587 $sshcmd = $sshcmd." -l ".$1;
7590 $self->{'sshcommand'} = $sshcmd;
7591 $self->{'serverlogin'} = $serverlogin;
7594 sub control_path_dir($) {
7595 # Returns:
7596 # $control_path_dir = dir of control path (for -M)
7597 my $self = shift;
7598 if(not defined $self->{'control_path_dir'}) {
7599 $self->{'control_path_dir'} =
7600 # Use $ENV{'TMPDIR'} as that is typically not
7601 # NFS mounted
7602 File::Temp::tempdir($ENV{'TMPDIR'}
7603 . "/control_path_dir-XXXX",
7604 CLEANUP => 1);
7606 return $self->{'control_path_dir'};
7609 sub rsync_transfer_cmd($) {
7610 # Command to run to transfer a file
7611 # Input:
7612 # $file = filename of file to transfer
7613 # $workdir = destination dir
7614 # Returns:
7615 # $cmd = rsync command to run to transfer $file ("" if unreadable)
7616 my $self = shift;
7617 my $file = shift;
7618 my $workdir = shift;
7619 if(not -r $file) {
7620 ::warning($file. " is not readable and will not be transferred.");
7621 return "true";
7623 my $rsync_destdir;
7624 my $relpath = ($file !~ m:^/:) || ($file =~ m:/\./:); # Is the path relative or /./?
7625 if($relpath) {
7626 $rsync_destdir = ::shell_quote_file($workdir);
7627 } else {
7628 # rsync /foo/bar /
7629 $rsync_destdir = "/";
7631 $file = ::shell_quote_file($file);
7632 my $sshcmd = $self->sshcommand();
7633 my $rsync_opts = $ENV{'PARALLEL_RSYNC_OPTS'}.
7634 " -e".::Q($sshcmd);
7635 my $serverlogin = $self->serverlogin();
7636 # Make dir if it does not exist
7637 return "$sshcmd $serverlogin -- mkdir -p $rsync_destdir && " .
7638 rsync()." $rsync_opts $file $serverlogin:$rsync_destdir";
7641 sub cleanup_cmd($$$) {
7642 # Command to run to remove the remote file
7643 # Input:
7644 # $file = filename to remove
7645 # $workdir = destination dir
7646 # Returns:
7647 # $cmd = ssh command to run to remove $file and empty parent dirs
7648 my $self = shift;
7649 my $file = shift;
7650 my $workdir = shift;
7651 my $f = $file;
7652 if($f =~ m:/\./:) {
7653 # foo/bar/./baz/quux => workdir/baz/quux
7654 # /foo/bar/./baz/quux => workdir/baz/quux
7655 $f =~ s:.*/\./:$workdir/:;
7656 } elsif($f =~ m:^[^/]:) {
7657 # foo/bar => workdir/foo/bar
7658 $f = $workdir."/".$f;
7660 my @subdirs = split m:/:, ::dirname($f);
7661 my @rmdir;
7662 my $dir = "";
7663 for(@subdirs) {
7664 $dir .= $_."/";
7665 unshift @rmdir, ::shell_quote_file($dir);
7667 my $rmdir = @rmdir ? "sh -c ".::Q("rmdir @rmdir 2>/dev/null;") : "";
7668 if(defined $opt::workdir and $opt::workdir eq "...") {
7669 $rmdir .= ::Q("rm -rf " . ::shell_quote_file($workdir).';');
7672 $f = ::shell_quote_file($f);
7673 my $sshcmd = $self->sshcommand();
7674 my $serverlogin = $self->serverlogin();
7675 return "$sshcmd $serverlogin -- ".::Q("rm -f $f; $rmdir");
7679 my $rsync;
7681 sub rsync {
7682 # rsync 3.1.x uses protocol 31 which is unsupported by 2.5.7.
7683 # If the version >= 3.1.0: downgrade to protocol 30
7684 # Returns:
7685 # $rsync = "rsync" or "rsync --protocol 30"
7686 if(not $rsync) {
7687 my @out = `rsync --version`;
7688 for (@out) {
7689 if(/version (\d+.\d+)(.\d+)?/) {
7690 if($1 >= 3.1) {
7691 # Version 3.1.0 or later: Downgrade to protocol 30
7692 $rsync = "rsync --protocol 30";
7693 } else {
7694 $rsync = "rsync";
7698 $rsync or ::die_bug("Cannot figure out version of rsync: @out");
7700 return $rsync;
7705 package JobQueue;
7707 sub new($) {
7708 my $class = shift;
7709 my $commandref = shift;
7710 my $read_from = shift;
7711 my $context_replace = shift;
7712 my $max_number_of_args = shift;
7713 my $transfer_files = shift;
7714 my $return_files = shift;
7715 my $commandlinequeue = CommandLineQueue->new
7716 ($commandref, $read_from, $context_replace, $max_number_of_args,
7717 $transfer_files, $return_files);
7718 my @unget = ();
7719 return bless {
7720 'unget' => \@unget,
7721 'commandlinequeue' => $commandlinequeue,
7722 'this_job_no' => 0,
7723 'total_jobs' => undef,
7724 }, ref($class) || $class;
7727 sub get($) {
7728 my $self = shift;
7730 $self->{'this_job_no'}++;
7731 if(@{$self->{'unget'}}) {
7732 return shift @{$self->{'unget'}};
7733 } else {
7734 my $commandline = $self->{'commandlinequeue'}->get();
7735 if(defined $commandline) {
7736 return Job->new($commandline);
7737 } else {
7738 $self->{'this_job_no'}--;
7739 return undef;
7744 sub unget($) {
7745 my $self = shift;
7746 unshift @{$self->{'unget'}}, @_;
7747 $self->{'this_job_no'} -= @_;
7750 sub empty($) {
7751 my $self = shift;
7752 my $empty = (not @{$self->{'unget'}}) &&
7753 $self->{'commandlinequeue'}->empty();
7754 ::debug("run", "JobQueue->empty $empty ");
7755 return $empty;
7758 sub total_jobs($) {
7759 my $self = shift;
7760 if(not defined $self->{'total_jobs'}) {
7761 if($opt::pipe and not $opt::tee) {
7762 ::error("--pipe is incompatible with --eta/--bar/--shuf");
7763 ::wait_and_exit(255);
7765 if($opt::sqlworker) {
7766 $self->{'total_jobs'} = $Global::sql->total_jobs();
7767 } else {
7768 my $record;
7769 my @arg_records;
7770 my $record_queue = $self->{'commandlinequeue'}{'arg_queue'};
7771 my $start = time;
7772 while($record = $record_queue->get()) {
7773 push @arg_records, $record;
7774 if(time - $start > 10) {
7775 ::warning("Reading ".scalar(@arg_records).
7776 " arguments took longer than 10 seconds.");
7777 $opt::eta && ::warning("Consider removing --eta.");
7778 $opt::bar && ::warning("Consider removing --bar.");
7779 $opt::shuf && ::warning("Consider removing --shuf.");
7780 last;
7783 while($record = $record_queue->get()) {
7784 push @arg_records, $record;
7786 if($opt::shuf and @arg_records) {
7787 my $i = @arg_records;
7788 while (--$i) {
7789 my $j = int rand($i+1);
7790 @arg_records[$i,$j] = @arg_records[$j,$i];
7793 $record_queue->unget(@arg_records);
7794 # $#arg_records = number of args - 1
7795 # We have read one @arg_record for this job (so add 1 more)
7796 my $num_args = $#arg_records + 2;
7797 # This jobs is not started so -1
7798 my $started_jobs = $self->{'this_job_no'} - 1;
7799 my $max_args = ::max($Global::max_number_of_args,1);
7800 $self->{'total_jobs'} = ::ceil($num_args / $max_args)
7801 + $started_jobs;
7802 ::debug("init","Total jobs: ".$self->{'total_jobs'}.
7803 " ($num_args/$max_args + $started_jobs)\n");
7806 return $self->{'total_jobs'};
7809 sub flush_total_jobs($) {
7810 # Unset total_jobs to force recomputing
7811 my $self = shift;
7812 ::debug("init","flush Total jobs: ");
7813 $self->{'total_jobs'} = undef;
7816 sub next_seq($) {
7817 my $self = shift;
7819 return $self->{'commandlinequeue'}->seq();
7822 sub quote_args($) {
7823 my $self = shift;
7824 return $self->{'commandlinequeue'}->quote_args();
7828 package Job;
7830 sub new($) {
7831 my $class = shift;
7832 my $commandlineref = shift;
7833 return bless {
7834 'commandline' => $commandlineref, # CommandLine object
7835 'workdir' => undef, # --workdir
7836 # filehandle for stdin (used for --pipe)
7837 # filename for writing stdout to (used for --files)
7838 # remaining data not sent to stdin (used for --pipe)
7839 # tmpfiles to cleanup when job is done
7840 'unlink' => [],
7841 # amount of data sent via stdin (used for --pipe)
7842 'transfersize' => 0, # size of files using --transfer
7843 'returnsize' => 0, # size of files using --return
7844 'pid' => undef,
7845 # hash of { SSHLogins => number of times the command failed there }
7846 'failed' => undef,
7847 'sshlogin' => undef,
7848 # The commandline wrapped with rsync and ssh
7849 'sshlogin_wrap' => undef,
7850 'exitstatus' => undef,
7851 'exitsignal' => undef,
7852 # Timestamp for timeout if any
7853 'timeout' => undef,
7854 'virgin' => 1,
7855 # Output used for SQL and CSV-output
7856 'output' => { 1 => [], 2 => [] },
7857 'halfline' => { 1 => [], 2 => [] },
7858 }, ref($class) || $class;
7861 sub replaced($) {
7862 my $self = shift;
7863 $self->{'commandline'} or ::die_bug("commandline empty");
7864 return $self->{'commandline'}->replaced();
7867 sub seq($) {
7868 my $self = shift;
7869 return $self->{'commandline'}->seq();
7872 sub set_seq($$) {
7873 my $self = shift;
7874 return $self->{'commandline'}->set_seq(shift);
7877 sub slot($) {
7878 my $self = shift;
7879 return $self->{'commandline'}->slot();
7882 sub free_slot($) {
7883 my $self = shift;
7884 push @Global::slots, $self->slot();
7888 my($cattail);
7890 sub cattail() {
7891 # Returns:
7892 # $cattail = perl program for:
7893 # cattail "decompress program" writerpid [file_to_decompress or stdin] [file_to_unlink]
7894 if(not $cattail) {
7895 $cattail = q{
7896 # cat followed by tail (possibly with rm as soon at the file is opened)
7897 # If $writerpid dead: finish after this round
7898 use Fcntl;
7899 $|=1;
7901 my ($comfile, $cmd, $writerpid, $read_file, $unlink_file) = @ARGV;
7902 if($read_file) {
7903 open(IN,"<",$read_file) || die("cattail: Cannot open $read_file");
7904 } else {
7905 *IN = *STDIN;
7907 while(! -s $comfile) {
7908 # Writer has not opened the buffer file, so we cannot remove it yet
7909 $sleep = ($sleep < 30) ? ($sleep * 1.001 + 0.01) : ($sleep);
7910 usleep($sleep);
7912 # The writer and we have both opened the file, so it is safe to unlink it
7913 unlink $unlink_file;
7914 unlink $comfile;
7916 my $first_round = 1;
7917 my $flags;
7918 fcntl(IN, F_GETFL, $flags) || die $!; # Get the current flags on the filehandle
7919 $flags |= O_NONBLOCK; # Add non-blocking to the flags
7920 fcntl(IN, F_SETFL, $flags) || die $!; # Set the flags on the filehandle
7922 while(1) {
7923 # clear EOF
7924 seek(IN,0,1);
7925 my $writer_running = kill 0, $writerpid;
7926 $read = sysread(IN,$buf,131072);
7927 if($read) {
7928 if($first_round) {
7929 # Only start the command if there any input to process
7930 $first_round = 0;
7931 open(OUT,"|-",$cmd) || die("cattail: Cannot run $cmd");
7934 # Blocking print
7935 while($buf) {
7936 my $bytes_written = syswrite(OUT,$buf);
7937 # syswrite may be interrupted by SIGHUP
7938 substr($buf,0,$bytes_written) = "";
7940 # Something printed: Wait less next time
7941 $sleep /= 2;
7942 } else {
7943 if(eof(IN) and not $writer_running) {
7944 # Writer dead: There will never be sent more to the decompressor
7945 close OUT;
7946 exit;
7948 # TODO This could probably be done more efficiently using select(2)
7949 # Nothing read: Wait longer before next read
7950 # Up to 100 milliseconds
7951 $sleep = ($sleep < 100) ? ($sleep * 1.001 + 0.01) : ($sleep);
7952 usleep($sleep);
7956 sub usleep {
7957 # Sleep this many milliseconds.
7958 my $secs = shift;
7959 select(undef, undef, undef, $secs/1000);
7962 $cattail =~ s/#.*//mg;
7963 $cattail =~ s/\s+/ /g;
7965 return $cattail;
7970 my $which_sh;
7972 sub startshell($) {
7973 my $self = shift;
7975 # Shell must support 'exec >foo 2>bar'
7976 # Using sh always will cause functions to not be exported
7977 # Could perl be used instead?
7978 $which_sh = $Global::cshell ? "/bin/sh" : $Global::shell;
7979 my ($pid, $stdin);
7980 if($pid = open($stdin, "|-", $which_sh)) {
7981 my $fileno = fileno($stdin);
7982 # Assume we get pid, fileno from spawner child
7983 $self->{'pid'} = $pid;
7984 open(my $stdin_fh, ">&=$fileno") or die ($fileno);
7985 $self->set_fh(0,"w",$stdin_fh);
7986 } else {
7987 die();
7992 sub openoutputfiles($) {
7993 # Open files for STDOUT and STDERR
7994 # Set file handles in $self->fh
7995 my $self = shift;
7996 my ($outfhw, $errfhw, $outname, $errname);
7998 if($opt::linebuffer and not
7999 ($opt::keeporder or $opt::files or $opt::results or
8000 $opt::compress or $opt::compress_program or
8001 $opt::decompress_program)) {
8002 my ($outfhr, $errfhr);
8003 $outname = ::tmpfifo();
8004 $errname = ::tmpfifo();
8005 open($outfhr,"+<",$outname);
8006 open($errfhr,"+<",$errname);
8007 $self->set_fh(1,'r',$outfhr);
8008 $self->set_fh(2,'r',$errfhr);
8009 open($outfhw,"+>",$outname);
8010 open($errfhw,"+>",$errname);
8011 $self->set_fh(1,'w',$outfhw);
8012 $self->set_fh(2,'w',$errfhw);
8013 # Make it possible to read non-blocking from the pipe
8014 for my $fdno (1,2) {
8015 ::set_fh_non_blocking($self->fh($fdno,'r'));
8017 } elsif($opt::results and not $Global::csvsep) {
8018 my $out = $self->{'commandline'}->results_out();
8019 my $seqname;
8020 if($out eq $opt::results or $out =~ m:/$:) {
8021 # $opt::results = simple string or ending in /
8022 # => $out is a dir/
8023 # prefix/name1/val1/name2/val2/seq
8024 $seqname = $out."seq";
8025 # prefix/name1/val1/name2/val2/stdout
8026 $outname = $out."stdout";
8027 # prefix/name1/val1/name2/val2/stderr
8028 $errname = $out."stderr";
8029 } else {
8030 # $opt::results = replacement string not ending in /
8031 # => $out is a file
8032 $outname = $out;
8033 $errname = "$out.err";
8034 $seqname = "$out.seq";
8036 my $seqfhw;
8037 if(not open($seqfhw, "+>", $seqname)) {
8038 ::error("Cannot write to `$seqname'.");
8039 ::wait_and_exit(255);
8041 print $seqfhw $self->seq();
8042 close $seqfhw;
8043 if(not open($outfhw, "+>", $outname)) {
8044 ::error("Cannot write to `$outname'.");
8045 ::wait_and_exit(255);
8047 if(not open($errfhw, "+>", $errname)) {
8048 ::error("Cannot write to `$errname'.");
8049 ::wait_and_exit(255);
8051 $self->set_fh(1,"unlink","");
8052 $self->set_fh(2,"unlink","");
8053 if($opt::sqlworker) {
8054 # Save the filenames in SQL table
8055 $Global::sql->update("SET Stdout = ?, Stderr = ? ".
8056 "WHERE Seq = ". $self->seq(),
8057 $outname, $errname);
8059 } elsif(not $opt::ungroup) {
8060 # To group we create temporary files for STDOUT and STDERR
8061 # To avoid the cleanup unlink the files immediately (but keep them open)
8062 if($opt::files) {
8063 ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par");
8064 ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par");
8065 # --files => only remove stderr
8066 $self->set_fh(1,"unlink","");
8067 $self->set_fh(2,"unlink",$errname);
8068 } else {
8069 ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par");
8070 ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par");
8071 $self->set_fh(1,"unlink",$outname);
8072 $self->set_fh(2,"unlink",$errname);
8074 } else {
8075 # --ungroup
8076 # open($outfhw,">&",$Global::fd{1}) || die;
8077 # open($errfhw,">&",$Global::fd{2}) || die;
8078 # File name must be empty as it will otherwise be printed
8079 $outname = "";
8080 $errname = "";
8081 $self->set_fh(1,"unlink",$outname);
8082 $self->set_fh(2,"unlink",$errname);
8084 # Set writing FD
8085 # $self->set_fh(1,'w',$outfhw);
8086 # $self->set_fh(2,'w',$errfhw);
8087 $self->set_fh(1,'name',$outname);
8088 $self->set_fh(2,'name',$errname);
8090 if($opt::compress) {
8091 $self->filter_through_compress();
8092 } elsif(not $opt::ungroup) {
8093 $self->grouped();
8095 my $in = $self->fh(0,'w');
8096 if($outname) {
8097 syswrite($in,"exec >$outname\n");
8098 # Must be unlinked by worker child
8099 my $n = $self->fh(1,"unlink");
8100 if(-e $n) { syswrite($in,"rm $n\n"); }
8101 } elsif($errname) {
8102 syswrite($in,"exec 2>$errname\n");
8103 # Must be unlinked by worker child
8104 my $n = $self->fh(2,"unlink");
8105 if(-e $n) { syswrite($in,"rm $n\n"); }
8107 # close $outfhw;
8108 # close $errfhw;
8110 if($opt::linebuffer) {
8111 # Make it possible to read non-blocking from
8112 # the buffer files
8113 # Used for --linebuffer with -k, --files, --res, --compress*
8114 for my $fdno (1,2) {
8115 ::set_fh_non_blocking($self->fh($fdno,'r'));
8120 sub print_verbose_dryrun($) {
8121 # If -v set: print command to stdout (possibly buffered)
8122 # This must be done before starting the command
8123 my $self = shift;
8124 if($Global::verbose or $opt::dryrun) {
8125 my $fh = $self->fh(1,"w");
8126 if($Global::verbose <= 1) {
8127 print $fh $self->replaced(),"\n";
8128 } else {
8129 # Verbose level > 1: Print the rsync and stuff
8130 print $fh $self->wrapped(),"\n";
8133 if($opt::sqlworker) {
8134 $Global::sql->update("SET Command = ? WHERE Seq = ".$self->seq(),
8135 $self->replaced());
8139 sub add_rm($) {
8140 # Files to remove when job is done
8141 my $self = shift;
8142 push @{$self->{'unlink'}}, @_;
8145 sub get_rm($) {
8146 # Files to remove when job is done
8147 my $self = shift;
8148 return @{$self->{'unlink'}};
8151 sub cleanup($) {
8152 # Remove files when job is done
8153 my $self = shift;
8154 unlink $self->get_rm();
8155 delete @Global::unlink{$self->get_rm()};
8158 sub grouped($) {
8159 my $self = shift;
8160 # Set reading FD if using --group (--ungroup does not need)
8161 for my $fdno (1,2) {
8162 # Re-open the file for reading
8163 # so fdw can be closed seperately
8164 # and fdr can be seeked seperately (for --line-buffer)
8165 open(my $fdr,"<", $self->fh($fdno,'name')) ||
8166 ::die_bug("fdr: Cannot open ".$self->fh($fdno,'name'));
8167 $self->set_fh($fdno,'r',$fdr);
8168 # Unlink if not debugging
8169 # $Global::debug or ::rm($self->fh($fdno,"unlink"));
8173 sub empty_input_wrapper($) {
8174 # If no input: exit(0)
8175 # If some input: Pass input as input to command on STDIN
8176 # This avoids starting the command if there is no input.
8177 # Input:
8178 # $command = command to pipe data to
8179 # Returns:
8180 # $wrapped_command = the wrapped command
8181 my $command = shift;
8182 my $script =
8183 ::spacefree(0,q{
8184 if(sysread(STDIN, $buf, 1)) {
8185 open($fh, "|-", @ARGV) || die;
8186 syswrite($fh, $buf);
8187 # Align up to 128k block
8188 if($read = sysread(STDIN, $buf, 131071)) {
8189 syswrite($fh, $buf);
8191 while($read = sysread(STDIN, $buf, 131072)) {
8192 syswrite($fh, $buf);
8194 close $fh;
8195 exit ($?&127 ? 128+($?&127) : 1+$?>>8)
8198 ::debug("run",'Empty wrap: perl -e '.::Q($script)."\n");
8199 if($Global::cshell
8201 length $command > 499) {
8202 # csh does not like words longer than 1000 (499 quoted)
8203 # $command = "perl -e '".base64_zip_eval()."' ".
8204 # join" ",string_zip_base64(
8205 # 'exec "'.::perl_quote_scalar($command).'"');
8206 return 'perl -e '.::Q($script)." ".
8207 base64_wrap("exec \"$Global::shell\",'-c',\"".
8208 ::perl_quote_scalar($command).'"');
8209 } else {
8210 return 'perl -e '.::Q($script)." ".
8211 $Global::shell." -c ".::Q($command);
8215 sub filter_through_compress($) {
8216 my $self = shift;
8217 # Send stdout to stdin for $opt::compress_program(1)
8218 # Send stderr to stdin for $opt::compress_program(2)
8219 # cattail get pid: $pid = $self->fh($fdno,'rpid');
8220 my $cattail = cattail();
8222 for my $fdno (1,2) {
8223 # Make a communication file.
8224 my ($fh, $comfile) = ::tmpfile(SUFFIX => ".pac");
8225 close $fh;
8226 # Compressor: (echo > $comfile; compress pipe) > output
8227 # When the echo is written to $comfile,
8228 # it is known that output file is opened,
8229 # thus output file can then be removed by the decompressor.
8230 my $wpid = open(my $fdw,"|-", "(echo > $comfile; ".
8231 empty_input_wrapper($opt::compress_program).") >".
8232 $self->fh($fdno,'name')) || die $?;
8233 $self->set_fh($fdno,'w',$fdw);
8234 $self->set_fh($fdno,'wpid',$wpid);
8235 # Decompressor: open output; -s $comfile > 0: rm $comfile output;
8236 # decompress output > stdout
8237 my $rpid = open(my $fdr, "-|", "perl", "-e", $cattail, $comfile,
8238 $opt::decompress_program, $wpid,
8239 $self->fh($fdno,'name'),$self->fh($fdno,'unlink'))
8240 || die $?;
8241 $self->set_fh($fdno,'r',$fdr);
8242 $self->set_fh($fdno,'rpid',$rpid);
8248 sub set_fh($$$$) {
8249 # Set file handle
8250 my ($self, $fd_no, $key, $fh) = @_;
8251 $self->{'fd'}{$fd_no,$key} = $fh;
8254 sub fh($) {
8255 # Get file handle
8256 my ($self, $fd_no, $key) = @_;
8257 return $self->{'fd'}{$fd_no,$key};
8260 sub write($) {
8261 my $self = shift;
8262 my $remaining_ref = shift;
8263 my $stdin_fh = $self->fh(0,"w");
8265 my $len = length $$remaining_ref;
8266 # syswrite may not write all in one go,
8267 # so make sure everything is written.
8268 my $written;
8270 # If writing is to a closed pipe:
8271 # Do not call signal handler, but let nothing be written
8272 local $SIG{PIPE} = undef;
8273 while($written = syswrite($stdin_fh,$$remaining_ref)){
8274 substr($$remaining_ref,0,$written) = "";
8278 sub set_block($$$$$$) {
8279 # Copy stdin buffer from $block_ref up to $endpos
8280 # Prepend with $header_ref if virgin (i.e. not --roundrobin)
8281 # Remove $recstart and $recend if needed
8282 # Input:
8283 # $header_ref = ref to $header to prepend
8284 # $buffer_ref = ref to $buffer containing the block
8285 # $endpos = length of $block to pass on
8286 # $recstart = --recstart regexp
8287 # $recend = --recend regexp
8288 # Returns:
8289 # N/A
8290 my $self = shift;
8291 my ($header_ref,$buffer_ref,$endpos,$recstart,$recend) = @_;
8292 $self->{'block'} = ($self->virgin() ? $$header_ref : "").
8293 substr($$buffer_ref,0,$endpos);
8294 if($opt::remove_rec_sep) {
8295 remove_rec_sep(\$self->{'block'},$recstart,$recend);
8297 $self->{'block_length'} = length $self->{'block'};
8298 $self->{'block_pos'} = 0;
8299 $self->add_transfersize($self->{'block_length'});
8302 sub block_ref($) {
8303 my $self = shift;
8304 return \$self->{'block'};
8308 sub block_length($) {
8309 my $self = shift;
8310 return $self->{'block_length'};
8313 sub remove_rec_sep($) {
8314 # Remove --recstart and --recend from $block
8315 # Input:
8316 # $block_ref = reference to $block to be modified
8317 # $recstart = --recstart
8318 # $recend = --recend
8319 # Uses:
8320 # $opt::regexp = Are --recstart/--recend regexp?
8321 # Returns:
8322 # N/A
8323 my ($block_ref,$recstart,$recend) = @_;
8324 # Remove record separator
8325 if($opt::regexp) {
8326 $$block_ref =~ s/$recend$recstart//gos;
8327 $$block_ref =~ s/^$recstart//os;
8328 $$block_ref =~ s/$recend$//os;
8329 } else {
8330 $$block_ref =~ s/\Q$recend$recstart\E//gos;
8331 $$block_ref =~ s/^\Q$recstart\E//os;
8332 $$block_ref =~ s/\Q$recend\E$//os;
8336 sub non_blocking_write($) {
8337 my $self = shift;
8338 my $something_written = 0;
8340 my $in = $self->fh(0,"w");
8341 my $rv = syswrite($in,
8342 substr($self->{'block'},$self->{'block_pos'}));
8343 if (!defined($rv) && $! == ::EAGAIN()) {
8344 # would block - but would have written
8345 $something_written = 0;
8346 # avoid triggering auto expanding block size
8347 $Global::no_autoexpand_block ||= 1;
8348 } elsif ($self->{'block_pos'}+$rv != $self->{'block_length'}) {
8349 # incomplete write
8350 # Remove the written part
8351 $self->{'block_pos'} += $rv;
8352 $something_written = $rv;
8353 } else {
8354 # successfully wrote everything
8355 # Empty block to free memory
8356 my $a = "";
8357 $self->set_block(\$a,\$a,0,"","");
8358 $something_written = $rv;
8360 ::debug("pipe", "Non-block: ", $something_written);
8361 return $something_written;
8365 sub virgin($) {
8366 my $self = shift;
8367 return $self->{'virgin'};
8370 sub set_virgin($$) {
8371 my $self = shift;
8372 $self->{'virgin'} = shift;
8375 sub pid($) {
8376 my $self = shift;
8377 return $self->{'pid'};
8380 sub set_pid($$) {
8381 my $self = shift;
8382 $self->{'pid'} = shift;
8385 sub starttime($) {
8386 # Returns:
8387 # UNIX-timestamp this job started
8388 my $self = shift;
8389 return sprintf("%.3f",$self->{'starttime'});
8392 sub set_starttime($@) {
8393 my $self = shift;
8394 my $starttime = shift || ::now();
8395 $self->{'starttime'} = $starttime;
8396 $opt::sqlworker and
8397 $Global::sql->update("SET Starttime = ? WHERE Seq = ".$self->seq(),
8398 $starttime);
8401 sub runtime($) {
8402 # Returns:
8403 # Run time in seconds with 3 decimals
8404 my $self = shift;
8405 return sprintf("%.3f",
8406 int(($self->endtime() - $self->starttime())*1000)/1000);
8409 sub endtime($) {
8410 # Returns:
8411 # UNIX-timestamp this job ended
8412 # 0 if not ended yet
8413 my $self = shift;
8414 return ($self->{'endtime'} || 0);
8417 sub set_endtime($$) {
8418 my $self = shift;
8419 my $endtime = shift;
8420 $self->{'endtime'} = $endtime;
8421 $opt::sqlworker and
8422 $Global::sql->update("SET JobRuntime = ? WHERE Seq = ".$self->seq(),
8423 $self->runtime());
8426 sub is_timedout($) {
8427 # Is the job timedout?
8428 # Input:
8429 # $delta_time = time that the job may run
8430 # Returns:
8431 # True or false
8432 my $self = shift;
8433 my $delta_time = shift;
8434 return time > $self->{'starttime'} + $delta_time;
8437 sub kill($) {
8438 my $self = shift;
8439 $self->set_exitstatus(-1);
8440 ::kill_sleep_seq($self->pid());
8443 sub failed($) {
8444 # return number of times failed for this $sshlogin
8445 # Input:
8446 # $sshlogin
8447 # Returns:
8448 # Number of times failed for $sshlogin
8449 my $self = shift;
8450 my $sshlogin = shift;
8451 return $self->{'failed'}{$sshlogin};
8454 sub failed_here($) {
8455 # return number of times failed for the current $sshlogin
8456 # Returns:
8457 # Number of times failed for this sshlogin
8458 my $self = shift;
8459 return $self->{'failed'}{$self->sshlogin()};
8462 sub add_failed($) {
8463 # increase the number of times failed for this $sshlogin
8464 my $self = shift;
8465 my $sshlogin = shift;
8466 $self->{'failed'}{$sshlogin}++;
8469 sub add_failed_here($) {
8470 # increase the number of times failed for the current $sshlogin
8471 my $self = shift;
8472 $self->{'failed'}{$self->sshlogin()}++;
8475 sub reset_failed($) {
8476 # increase the number of times failed for this $sshlogin
8477 my $self = shift;
8478 my $sshlogin = shift;
8479 delete $self->{'failed'}{$sshlogin};
8482 sub reset_failed_here($) {
8483 # increase the number of times failed for this $sshlogin
8484 my $self = shift;
8485 delete $self->{'failed'}{$self->sshlogin()};
8488 sub min_failed($) {
8489 # Returns:
8490 # the number of sshlogins this command has failed on
8491 # the minimal number of times this command has failed
8492 my $self = shift;
8493 my $min_failures =
8494 ::min(map { $self->{'failed'}{$_} } keys %{$self->{'failed'}});
8495 my $number_of_sshlogins_failed_on = scalar keys %{$self->{'failed'}};
8496 return ($number_of_sshlogins_failed_on,$min_failures);
8499 sub total_failed($) {
8500 # Returns:
8501 # $total_failures = the number of times this command has failed
8502 my $self = shift;
8503 my $total_failures = 0;
8504 for (values %{$self->{'failed'}}) {
8505 $total_failures += $_;
8507 return $total_failures;
8511 my $script;
8513 sub postpone_exit_and_cleanup {
8514 # Command to remove files and dirs (given as args) without
8515 # affecting the exit value in $?/$status.
8516 if(not $script) {
8517 $script = "perl -e '".
8518 ::spacefree(0,q{
8519 $bash=shift;
8520 $csh=shift;
8521 for(@ARGV){
8522 unlink;
8523 rmdir;
8525 if($bash=~s/h//) {
8526 exit $bash;
8528 exit $csh;
8530 "' ".'"$?h" "$status" ';
8532 return $script
8537 my $script;
8539 sub fifo_wrap() {
8540 # Script to create a fifo, run a command on the fifo
8541 # while copying STDIN to the fifo, and finally
8542 # remove the fifo and return the exit code of the command.
8543 if(not $script) {
8544 # {} == $PARALLEL_TMP for --fifo
8545 # To make it csh compatible a wrapper needs to:
8546 # * mkfifo
8547 # * spawn $command &
8548 # * cat > fifo
8549 # * waitpid to get the exit code from $command
8550 # * be less than 1000 chars long
8551 $script = "perl -e '".
8552 (::spacefree
8553 (0, q{
8554 ($s,$c,$f) = @ARGV;
8555 # mkfifo $PARALLEL_TMP
8556 system "mkfifo", $f;
8557 # spawn $shell -c $command &
8558 $pid = fork || exec $s, "-c", $c;
8559 open($o,">",$f) || die $!;
8560 # cat > $PARALLEL_TMP
8561 while(sysread(STDIN,$buf,131072)){
8562 syswrite $o, $buf;
8564 close $o;
8565 # waitpid to get the exit code from $command
8566 waitpid $pid,0;
8567 # Cleanup
8568 unlink $f;
8569 exit $?/256;
8570 }))."'";
8572 return $script;
8576 sub wrapped($) {
8577 # Wrap command with:
8578 # * --shellquote
8579 # * --nice
8580 # * --cat
8581 # * --fifo
8582 # * --sshlogin
8583 # * --pipepart (@Global::cat_prepends)
8584 # * --tee (@Global::cat_prepends)
8585 # * --pipe
8586 # * --tmux
8587 # The ordering of the wrapping is important:
8588 # * --nice/--cat/--fifo should be done on the remote machine
8589 # * --pipepart/--pipe should be done on the local machine inside --tmux
8590 # Uses:
8591 # @opt::shellquote
8592 # $opt::nice
8593 # $Global::shell
8594 # $opt::cat
8595 # $opt::fifo
8596 # @Global::cat_prepends
8597 # $opt::pipe
8598 # $opt::tmux
8599 # Returns:
8600 # $self->{'wrapped'} = the command wrapped with the above
8601 my $self = shift;
8602 if(not defined $self->{'wrapped'}) {
8603 my $command = $self->replaced();
8604 # Bug in Bash and Ksh when running multiline aliases
8605 # This will force them to run correctly, but will fail in
8606 # tcsh so we do not do it.
8607 # $command .= "\n\n";
8608 if(@opt::shellquote) {
8609 # Quote one time for each --shellquote
8610 my $c = $command;
8611 for(@opt::shellquote) {
8612 $c = ::Q($c);
8614 # Prepend "echo" (it is written in perl because
8615 # quoting '-e' causes problem in some versions and
8616 # csh's version does something wrong)
8617 $command = q(perl -e '$,=" "; print "@ARGV\n";' -- ) . ::Q($c);
8619 if($Global::parallel_env) {
8620 # If $PARALLEL_ENV set, put that in front of the command
8621 # Used for env_parallel.*
8622 if($Global::shell =~ /zsh/) {
8623 # The extra 'eval' will make aliases work, too
8624 $command = $Global::parallel_env."\n".
8625 "eval ".::Q($command);
8626 } else {
8627 $command = $Global::parallel_env."\n".$command;
8630 if($opt::cat) {
8631 # In '--cat' and '--fifo' {} == $PARALLEL_TMP.
8632 # This is to make it possible to compute $PARALLEL_TMP on
8633 # the fly when running remotely.
8634 # $ENV{PARALLEL_TMP} is set in the remote wrapper before
8635 # the command is run.
8637 # Prepend 'cat > $PARALLEL_TMP;'
8638 # Append 'unlink $PARALLEL_TMP without affecting $?'
8639 $command =
8640 'cat > $PARALLEL_TMP;'.
8641 $command.";". postpone_exit_and_cleanup().
8642 '$PARALLEL_TMP';
8643 } elsif($opt::fifo) {
8644 # Prepend fifo-wrapper. In essence:
8645 # mkfifo {}
8646 # ( $command ) &
8647 # # $command must read {}, otherwise this 'cat' will block
8648 # cat > {};
8649 # wait; rm {}
8650 # without affecting $?
8651 $command = fifo_wrap(). " ".
8652 $Global::shell. " ". ::Q($command). ' $PARALLEL_TMP'. ';';
8654 # Wrap with ssh + tranferring of files
8655 $command = $self->sshlogin_wrap($command);
8656 if(@Global::cat_prepends) {
8657 # --pipepart: prepend:
8658 # < /tmp/foo perl -e 'while(@ARGV) {
8659 # sysseek(STDIN,shift,0) || die; $left = shift;
8660 # while($read = sysread(STDIN,$buf, ($left > 131072 ? 131072 : $left))){
8661 # $left -= $read; syswrite(STDOUT,$buf);
8663 # }' 0 0 0 11 |
8665 # --pipepart --tee: prepend:
8666 # < dash-a-file
8668 # --pipe --tee: wrap:
8669 # (rm fifo; ... ) < fifo
8671 # --pipe --shard X:
8672 # (rm fifo; ... ) < fifo
8673 $command = (shift @Global::cat_prepends). "($command)".
8674 (shift @Global::cat_appends);
8675 } elsif($opt::pipe and not $opt::roundrobin) {
8676 # Wrap with EOF-detector to avoid starting $command if EOF.
8677 $command = empty_input_wrapper($command);
8679 if($opt::tmux) {
8680 # Wrap command with 'tmux'
8681 $command = $self->tmux_wrap($command);
8683 if($Global::cshell
8685 length $command > 499) {
8686 # csh does not like words longer than 1000 (499 quoted)
8687 # $command = "perl -e '".base64_zip_eval()."' ".
8688 # join" ",string_zip_base64(
8689 # 'exec "'.::perl_quote_scalar($command).'"');
8690 $command = base64_wrap("exec \"$Global::shell\",'-c',\"".
8691 ::perl_quote_scalar($command).'"');
8693 $self->{'wrapped'} = $command;
8695 return $self->{'wrapped'};
8698 sub set_sshlogin($$) {
8699 my $self = shift;
8700 my $sshlogin = shift;
8701 $self->{'sshlogin'} = $sshlogin;
8702 delete $self->{'sshlogin_wrap'}; # If sshlogin is changed the wrap is wrong
8703 delete $self->{'wrapped'};
8705 if($opt::sqlworker) {
8706 # Identify worker as --sqlworker often runs on different machines
8707 my $host = $sshlogin->string();
8708 if($host eq ":") {
8709 $host = ::hostname();
8711 $Global::sql->update("SET Host = ? WHERE Seq = ".$self->seq(), $host);
8715 sub sshlogin($) {
8716 my $self = shift;
8717 return $self->{'sshlogin'};
8720 sub string_base64($) {
8721 # Base64 encode strings into 1000 byte blocks.
8722 # 1000 bytes is the largest word size csh supports
8723 # Input:
8724 # @strings = to be encoded
8725 # Returns:
8726 # @base64 = 1000 byte block
8727 $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;";
8728 my @base64 = unpack("(A1000)*",encode_base64((join"",@_),""));
8729 return @base64;
8732 sub string_zip_base64($) {
8733 # Pipe string through 'bzip2 -9' and base64 encode it into 1000
8734 # byte blocks.
8735 # 1000 bytes is the largest word size csh supports
8736 # Zipping will make exporting big environments work, too
8737 # Input:
8738 # @strings = to be encoded
8739 # Returns:
8740 # @base64 = 1000 byte block
8741 my($zipin_fh, $zipout_fh,@base64);
8742 ::open3($zipin_fh,$zipout_fh,">&STDERR","bzip2 -9");
8743 if(fork) {
8744 close $zipin_fh;
8745 $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;";
8746 # Split base64 encoded into 1000 byte blocks
8747 @base64 = unpack("(A1000)*",encode_base64((join"",<$zipout_fh>),""));
8748 close $zipout_fh;
8749 } else {
8750 close $zipout_fh;
8751 print $zipin_fh @_;
8752 close $zipin_fh;
8753 exit;
8755 ::debug("base64","Orig:@_\nAs bzip2 base64:@base64\n");
8756 return @base64;
8759 sub base64_zip_eval() {
8760 # Script that:
8761 # * reads base64 strings from @ARGV
8762 # * decodes them
8763 # * pipes through 'bzip2 -dc'
8764 # * evals the result
8765 # Reverse of string_zip_base64 + eval
8766 # Will be wrapped in ' so single quote is forbidden
8767 # Returns:
8768 # $script = 1-liner for perl -e
8769 my $script = ::spacefree(0,q{
8770 @GNU_Parallel = split /_/, "use_IPC::Open3;_use_MIME::Base64";
8771 eval"@GNU_Parallel";
8772 $chld = $SIG{CHLD};
8773 $SIG{CHLD} = "IGNORE";
8774 # Search for bzip2. Not found => use default path
8775 my $zip = (grep { -x $_ } "/usr/local/bin/bzip2")[0] || "bzip2";
8776 # $in = stdin on $zip, $out = stdout from $zip
8777 # Forget my() to save chars for csh
8778 # my($in, $out,$eval);
8779 open3($in,$out,">&STDERR",$zip,"-dc");
8780 if(my $perlpid = fork) {
8781 close $in;
8782 $eval = join "", <$out>;
8783 close $out;
8784 } else {
8785 close $out;
8786 # Pipe decoded base64 into 'bzip2 -dc'
8787 print $in (decode_base64(join"",@ARGV));
8788 close $in;
8789 exit;
8791 wait;
8792 $SIG{CHLD} = $chld;
8793 eval $eval;
8795 ::debug("base64",$script,"\n");
8796 return $script;
8799 sub base64_wrap($) {
8800 # base64 encode Perl code
8801 # Split it into chunks of < 1000 bytes
8802 # Prepend it with a decoder that eval's it
8803 # Input:
8804 # $eval_string = Perl code to run
8805 # Returns:
8806 # $shell_command = shell command that runs $eval_string
8807 my $eval_string = shift;
8808 return
8809 "perl -e ".
8810 ::Q(base64_zip_eval())." ".
8811 join" ",::shell_quote(string_zip_base64($eval_string));
8814 sub base64_eval($) {
8815 # Script that:
8816 # * reads base64 strings from @ARGV
8817 # * decodes them
8818 # * evals the result
8819 # Reverse of string_base64 + eval
8820 # Will be wrapped in ' so single quote is forbidden.
8821 # Spaces are stripped so spaces cannot be significant.
8822 # The funny 'use IPC::Open3'-syntax is to avoid spaces and
8823 # to make it clear that this is a GNU Parallel command
8824 # when looking at the process table.
8825 # Returns:
8826 # $script = 1-liner for perl -e
8827 my $script = ::spacefree(0,q{
8828 @GNU_Parallel=("use","IPC::Open3;","use","MIME::Base64");
8829 eval "@GNU_Parallel";
8830 my $eval = decode_base64(join"",@ARGV);
8831 eval $eval;
8833 ::debug("base64",$script,"\n");
8834 return $script;
8837 sub sshlogin_wrap($) {
8838 # Wrap the command with the commands needed to run remotely
8839 # Input:
8840 # $command = command to run
8841 # Returns:
8842 # $self->{'sshlogin_wrap'} = command wrapped with ssh+transfer commands
8843 sub monitor_parent_sshd_script {
8844 # This script is to solve the problem of
8845 # * not mixing STDERR and STDOUT
8846 # * terminating with ctrl-c
8847 # If its parent is ssh: all good
8848 # If its parent is init(1): ssh died, so kill children
8849 my $monitor_parent_sshd_script;
8851 if(not $monitor_parent_sshd_script) {
8852 $monitor_parent_sshd_script =
8853 # This will be packed in ', so only use "
8854 ::spacefree(0,'$shell = "'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'.
8855 '$tmpdir = "'.::perl_quote_scalar($ENV{'TMPDIR'}).'";'.
8856 '$nice = '.$opt::nice.';'.
8858 # Set $PARALLEL_TMP to a non-existent file name in $TMPDIR
8859 do {
8860 $ENV{PARALLEL_TMP} = $tmpdir."/par".
8861 join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
8862 } while(-e $ENV{PARALLEL_TMP});
8863 $SIG{CHLD} = sub { $done = 1; };
8864 $pid = fork;
8865 unless($pid) {
8866 # Make own process group to be able to kill HUP it later
8867 eval { setpgrp };
8868 eval { setpriority(0,0,$nice) };
8869 exec $shell, "-c", ($bashfunc."@ARGV");
8870 die "exec: $!\n";
8872 do {
8873 # Parent is not init (ppid=1), so sshd is alive
8874 # Exponential sleep up to 1 sec
8875 $s = $s < 1 ? 0.001 + $s * 1.03 : $s;
8876 select(undef, undef, undef, $s);
8877 } until ($done || getppid == 1);
8878 # Kill HUP the process group if job not done
8879 kill(SIGHUP, -${pid}) unless $done;
8880 wait;
8881 exit ($?&127 ? 128+($?&127) : 1+$?>>8)
8884 return $monitor_parent_sshd_script;
8887 sub vars_to_export {
8888 # Uses:
8889 # @opt::env
8890 my @vars = ("parallel_bash_environment");
8891 for my $varstring (@opt::env) {
8892 # Split up --env VAR1,VAR2
8893 push @vars, split /,/, $varstring;
8895 for (@vars) {
8896 if(-r $_ and not -d) {
8897 # Read as environment definition bug #44041
8898 # TODO parse this
8899 my $fh = ::open_or_exit($_);
8900 $Global::envdef = join("",<$fh>);
8901 close $fh;
8904 if(grep { /^_$/ } @vars) {
8905 local $/ = "\n";
8906 # --env _
8907 # Include all vars that are not in a clean environment
8908 if(open(my $vars_fh, "<", $Global::config_dir . "/ignored_vars")) {
8909 my @ignore = <$vars_fh>;
8910 chomp @ignore;
8911 my %ignore;
8912 @ignore{@ignore} = @ignore;
8913 close $vars_fh;
8914 push @vars, grep { not defined $ignore{$_} } keys %ENV;
8915 @vars = grep { not /^_$/ } @vars;
8916 } else {
8917 ::error("Run '$Global::progname --record-env' ".
8918 "in a clean environment first.");
8919 ::wait_and_exit(255);
8922 # Duplicate vars as BASH functions to include post-shellshock functions (v1+v2)
8923 # So --env myfunc should look for BASH_FUNC_myfunc() and BASH_FUNC_myfunc%%
8924 push(@vars, "PARALLEL_PID", "PARALLEL_SEQ", "PARALLEL_SSHLOGIN",
8925 map { ("BASH_FUNC_$_()", "BASH_FUNC_$_%%") } @vars);
8926 # Keep only defined variables
8927 return grep { defined($ENV{$_}) } @vars;
8930 sub env_as_eval {
8931 # Returns:
8932 # $eval = '$ENV{"..."}=...; ...'
8933 my @vars = vars_to_export();
8934 my $csh_friendly = not grep { /\n/ } @ENV{@vars};
8935 my @bash_functions = grep { substr($ENV{$_},0,4) eq "() {" } @vars;
8936 my @non_functions = (grep { !/PARALLEL_ENV/ }
8937 grep { substr($ENV{$_},0,4) ne "() {" } @vars);
8939 # eval of @envset will set %ENV
8940 my $envset = join"", map {
8941 '$ENV{"'.::perl_quote_scalar($_).'"}="'.
8942 ::perl_quote_scalar($ENV{$_}).'";'; } @non_functions;
8944 # running @bashfunc on the command line, will set the functions
8945 my @bashfunc = map {
8946 my $v=$_;
8947 s/BASH_FUNC_(.*)(\(\)|%%)/$1/;
8948 "$_$ENV{$v};export -f $_ >/dev/null;" } @bash_functions;
8949 # eval $bashfuncset will set $bashfunc
8950 my $bashfuncset;
8951 if(@bashfunc) {
8952 # Functions are not supported for all shells
8953 if($Global::shell !~ m:(bash|rbash|zsh|rzsh|dash|ksh):) {
8954 ::warning("Shell functions may not be supported in $Global::shell.");
8956 $bashfuncset =
8957 '@bash_functions=qw('."@bash_functions".");".
8958 ::spacefree(1,'$shell="'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'.q{
8959 if($shell=~/csh/) {
8960 print STDERR "CSH/TCSH DO NOT SUPPORT newlines IN VARIABLES/FUNCTIONS. Unset @bash_functions\n";
8961 exec "false";
8964 "\n".'$bashfunc = "'.::perl_quote_scalar("@bashfunc").'";';
8965 } else {
8966 $bashfuncset = '$bashfunc = "";'
8968 if($ENV{'parallel_bash_environment'}) {
8969 $bashfuncset .= '$bashfunc .= "eval\ \"\$parallel_bash_environment\"\;";';
8971 ::debug("base64",$envset,$bashfuncset,"\n");
8972 return $csh_friendly,$envset,$bashfuncset;
8975 my $self = shift;
8976 my $command = shift;
8977 # TODO test that *sh -c 'parallel --env' use *sh
8978 if(not defined $self->{'sshlogin_wrap'}{$command}) {
8979 my $sshlogin = $self->sshlogin();
8980 my $serverlogin = $sshlogin->serverlogin();
8981 my $quoted_remote_command;
8982 $ENV{'PARALLEL_SEQ'} = $self->seq();
8983 $ENV{'PARALLEL_SSHLOGIN'} = $sshlogin->string();
8984 $ENV{'PARALLEL_PID'} = $$;
8985 if($serverlogin eq ":") {
8986 if($opt::workdir) {
8987 # Create workdir if needed. Then cd to it.
8988 my $wd = $self->workdir();
8989 if($opt::workdir eq "." or $opt::workdir eq "...") {
8990 # If $wd does not start with '/': Prepend $HOME
8991 $wd =~ s:^([^/]):$ENV{'HOME'}/$1:;
8993 ::mkdir_or_die($wd);
8994 my $post = "";
8995 if($opt::workdir eq "...") {
8996 $post = ";".exitstatuswrapper("rm -rf ".::Q($wd).";");
8999 $command = "cd ".::Q($wd)." || exit 255; " .
9000 $command . $post;;
9002 if(@opt::env) {
9003 # Prepend with environment setter, which sets functions in zsh
9004 my ($csh_friendly,$envset,$bashfuncset) = env_as_eval();
9005 my $perl_code = $envset.$bashfuncset.
9006 '@ARGV="'.::perl_quote_scalar($command).'";'.
9007 "exec\"$Global::shell\",\"-c\",\(\$bashfunc.\"\@ARGV\"\)\;die\"exec:\$\!\\n\"\;";
9008 if(length $perl_code > 999
9010 not $csh_friendly
9012 $command =~ /\n/) {
9013 # csh does not deal well with > 1000 chars in one word
9014 # csh does not deal well with $ENV with \n
9015 $self->{'sshlogin_wrap'}{$command} = base64_wrap($perl_code);
9016 } else {
9017 $self->{'sshlogin_wrap'}{$command} = "perl -e ".::Q($perl_code);
9019 } else {
9020 $self->{'sshlogin_wrap'}{$command} = $command;
9022 } else {
9023 my $pwd = "";
9024 if($opt::workdir) {
9025 # Create remote workdir if needed. Then cd to it.
9026 my $wd = ::pQ($self->workdir());
9027 $pwd = qq{system("mkdir","-p","--","$wd"); chdir "$wd" ||}.
9028 qq{print(STDERR "parallel: Cannot chdir to $wd\\n") && exit 255;};
9030 my ($csh_friendly,$envset,$bashfuncset) = env_as_eval();
9031 my $remote_command = $pwd.$envset.$bashfuncset.
9032 '@ARGV="'.::perl_quote_scalar($command).'";'.
9033 monitor_parent_sshd_script();
9034 $quoted_remote_command = "perl -e ". ::Q($remote_command);
9035 my $dq_remote_command = ::Q($quoted_remote_command);
9036 if(length $dq_remote_command > 999
9038 not $csh_friendly
9040 $command =~ /\n/) {
9041 # csh does not deal well with > 1000 chars in one word
9042 # csh does not deal well with $ENV with \n
9043 $quoted_remote_command =
9044 "perl -e ". ::Q(::Q(base64_zip_eval()))." ".
9045 join" ",::shell_quote(::shell_quote(string_zip_base64($remote_command)));
9046 } else {
9047 $quoted_remote_command = $dq_remote_command;
9050 my $sshcmd = $sshlogin->sshcommand();
9051 my ($pre,$post,$cleanup)=("","","");
9052 # --transfer
9053 $pre .= $self->sshtransfer();
9054 # --return
9055 $post .= $self->sshreturn();
9056 # --cleanup
9057 $post .= $self->sshcleanup();
9058 if($post) {
9059 # We need to save the exit status of the job
9060 $post = exitstatuswrapper($post);
9062 $self->{'sshlogin_wrap'}{$command} =
9063 ($pre
9064 . "$sshcmd $serverlogin -- exec "
9065 . $quoted_remote_command
9066 . ";"
9067 . $post);
9070 return $self->{'sshlogin_wrap'}{$command};
9073 sub transfer($) {
9074 # Files to transfer
9075 # Non-quoted and with {...} substituted
9076 # Returns:
9077 # @transfer - File names of files to transfer
9078 my $self = shift;
9080 my $transfersize = 0;
9081 my @transfer = $self->{'commandline'}->
9082 replace_placeholders($self->{'commandline'}{'transfer_files'},0,0);
9083 for(@transfer) {
9084 # filesize
9085 if(-e $_) {
9086 $transfersize += (stat($_))[7];
9089 $self->add_transfersize($transfersize);
9090 return @transfer;
9093 sub transfersize($) {
9094 my $self = shift;
9095 return $self->{'transfersize'};
9098 sub add_transfersize($) {
9099 my $self = shift;
9100 my $transfersize = shift;
9101 $self->{'transfersize'} += $transfersize;
9102 $opt::sqlworker and
9103 $Global::sql->update("SET Send = ? WHERE Seq = ".$self->seq(),
9104 $self->{'transfersize'});
9107 sub sshtransfer($) {
9108 # Returns for each transfer file:
9109 # rsync $file remote:$workdir
9110 my $self = shift;
9111 my @pre;
9112 my $sshlogin = $self->sshlogin();
9113 my $workdir = $self->workdir();
9114 for my $file ($self->transfer()) {
9115 push @pre, $sshlogin->rsync_transfer_cmd($file,$workdir).";";
9117 return join("",@pre);
9120 sub return($) {
9121 # Files to return
9122 # Non-quoted and with {...} substituted
9123 # Returns:
9124 # @non_quoted_filenames
9125 my $self = shift;
9126 return $self->{'commandline'}->
9127 replace_placeholders($self->{'commandline'}{'return_files'},0,0);
9130 sub returnsize($) {
9131 # This is called after the job has finished
9132 # Returns:
9133 # $number_of_bytes transferred in return
9134 my $self = shift;
9135 for my $file ($self->return()) {
9136 if(-e $file) {
9137 $self->{'returnsize'} += (stat($file))[7];
9140 return $self->{'returnsize'};
9143 sub add_returnsize($) {
9144 my $self = shift;
9145 my $returnsize = shift;
9146 $self->{'returnsize'} += $returnsize;
9147 $opt::sqlworker and
9148 $Global::sql->update("SET Receive = ? WHERE Seq = ".$self->seq(),
9149 $self->{'returnsize'});
9152 sub sshreturn($) {
9153 # Returns for each return-file:
9154 # rsync remote:$workdir/$file .
9155 my $self = shift;
9156 my $sshlogin = $self->sshlogin();
9157 my $sshcmd = $sshlogin->sshcommand();
9158 my $serverlogin = $sshlogin->serverlogin();
9159 my $rsync_opts = $ENV{'PARALLEL_RSYNC_OPTS'}. " -e". ::Q($sshcmd);
9160 my $pre = "";
9161 for my $file ($self->return()) {
9162 $file =~ s:^\./::g; # Remove ./ if any
9163 my $relpath = ($file !~ m:^/:) || ($file =~ m:/\./:); # Is the path relative or /./?
9164 my $cd = "";
9165 my $wd = "";
9166 if($relpath) {
9167 # rsync -avR /foo/./bar/baz.c remote:/tmp/
9168 # == (on old systems)
9169 # rsync -avR --rsync-path="cd /foo; rsync" remote:bar/baz.c /tmp/
9170 $wd = ::shell_quote_file($self->workdir()."/");
9172 # Only load File::Basename if actually needed
9173 $Global::use{"File::Basename"} ||= eval "use File::Basename; 1;";
9174 # dir/./file means relative to dir, so remove dir on remote
9175 $file =~ m:(.*)/\./:;
9176 my $basedir = $1 ? ::shell_quote_file($1."/") : "";
9177 my $nobasedir = $file;
9178 $nobasedir =~ s:.*/\./::;
9179 $cd = ::shell_quote_file(::dirname($nobasedir));
9180 my $rsync_cd = '--rsync-path='.::Q("cd $wd$cd; rsync");
9181 my $basename = ::Q(::shell_quote_file(::basename($file)));
9182 # --return
9183 # mkdir -p /home/tange/dir/subdir/;
9184 # rsync (--protocol 30) -rlDzR
9185 # --rsync-path="cd /home/tange/dir/subdir/; rsync"
9186 # server:file.gz /home/tange/dir/subdir/
9187 $pre .= "mkdir -p $basedir$cd && ". $sshlogin->rsync().
9188 " $rsync_cd $rsync_opts $serverlogin:".
9189 $basename . " ".$basedir.$cd.";";
9191 return $pre;
9194 sub sshcleanup($) {
9195 # Return the sshcommand needed to remove the file
9196 # Returns:
9197 # ssh command needed to remove files from sshlogin
9198 my $self = shift;
9199 my $sshlogin = $self->sshlogin();
9200 my $sshcmd = $sshlogin->sshcommand();
9201 my $serverlogin = $sshlogin->serverlogin();
9202 my $workdir = $self->workdir();
9203 my $cleancmd = "";
9205 for my $file ($self->remote_cleanup()) {
9206 my @subworkdirs = parentdirs_of($file);
9207 $cleancmd .= $sshlogin->cleanup_cmd($file,$workdir).";";
9209 if(defined $opt::workdir and $opt::workdir eq "...") {
9210 $cleancmd .= "$sshcmd $serverlogin -- rm -rf " . ::Q($workdir).';';
9212 return $cleancmd;
9215 sub remote_cleanup($) {
9216 # Returns:
9217 # Files to remove at cleanup
9218 my $self = shift;
9219 if($opt::cleanup) {
9220 my @transfer = $self->transfer();
9221 my @return = $self->return();
9222 return (@transfer,@return);
9223 } else {
9224 return ();
9228 sub exitstatuswrapper(@) {
9229 # Input:
9230 # @shellcode = shell code to execute
9231 # Returns:
9232 # shell script that returns current status after executing @shellcode
9233 if($Global::cshell) {
9234 return ('set _EXIT_status=$status; ' .
9235 join(" ",@_).
9236 'exit $_EXIT_status;');
9237 } else {
9238 return ('_EXIT_status=$?; ' .
9239 join(" ",@_).
9240 'exit $_EXIT_status;');
9244 sub workdir($) {
9245 # Returns:
9246 # the workdir on a remote machine
9247 my $self = shift;
9248 if(not defined $self->{'workdir'}) {
9249 my $workdir;
9250 if(defined $opt::workdir) {
9251 if($opt::workdir eq ".") {
9252 # . means current dir
9253 my $home = $ENV{'HOME'};
9254 eval 'use Cwd';
9255 my $cwd = cwd();
9256 $workdir = $cwd;
9257 if($home) {
9258 # If homedir exists: remove the homedir from
9259 # workdir if cwd starts with homedir
9260 # E.g. /home/foo/my/dir => my/dir
9261 # E.g. /tmp/my/dir => /tmp/my/dir
9262 my ($home_dev, $home_ino) = (stat($home))[0,1];
9263 my $parent = "";
9264 my @dir_parts = split(m:/:,$cwd);
9265 my $part;
9266 while(defined ($part = shift @dir_parts)) {
9267 $part eq "" and next;
9268 $parent .= "/".$part;
9269 my ($parent_dev, $parent_ino) = (stat($parent))[0,1];
9270 if($parent_dev == $home_dev and $parent_ino == $home_ino) {
9271 # dev and ino is the same: We found the homedir.
9272 $workdir = join("/",@dir_parts);
9273 last;
9277 if($workdir eq "") {
9278 $workdir = ".";
9280 } elsif($opt::workdir eq "...") {
9281 $workdir = ".parallel/tmp/" . ::hostname() . "-" . $$
9282 . "-" . $self->seq();
9283 } else {
9284 $workdir = $self->{'commandline'}->
9285 replace_placeholders([$opt::workdir],0,0);
9286 #$workdir = $opt::workdir;
9287 # Rsync treats /./ special. We dont want that
9288 $workdir =~ s:/\./:/:g; # Remove /./
9289 $workdir =~ s:(.)/+$:$1:; # Remove ending / if any
9290 $workdir =~ s:^\./::g; # Remove starting ./ if any
9292 } else {
9293 $workdir = ".";
9295 $self->{'workdir'} = $workdir;
9297 return $self->{'workdir'};
9300 sub parentdirs_of($) {
9301 # Return:
9302 # all parentdirs except . of this dir or file - sorted desc by length
9303 my $d = shift;
9304 my @parents = ();
9305 while($d =~ s:/[^/]+$::) {
9306 if($d ne ".") {
9307 push @parents, $d;
9310 return @parents;
9313 sub start($) {
9314 # Setup STDOUT and STDERR for a job and start it.
9315 # Returns:
9316 # job-object or undef if job not to run
9318 sub open3_setpgrp_internal {
9319 # Run open3+setpgrp followed by the command
9320 # Input:
9321 # $stdin_fh = Filehandle to use as STDIN
9322 # $stdout_fh = Filehandle to use as STDOUT
9323 # $stderr_fh = Filehandle to use as STDERR
9324 # $command = Command to run
9325 # Returns:
9326 # $pid = Process group of job started
9327 my ($stdin_fh,$stdout_fh,$stderr_fh,$command) = @_;
9328 my $pid;
9329 local (*OUT,*ERR);
9330 open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!");
9331 open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!");
9332 # The eval is needed to catch exception from open3
9333 # eval {
9334 # if(not $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", "-")) {
9335 # # Each child gets its own process group to make it safe to killall
9336 # eval{ setpgrp(0,0) };
9337 # eval{ setpriority(0,0,$opt::nice) };
9338 # exec($Global::shell,"-c",$command)
9339 # || ::die_bug("open3-$stdin_fh $command");
9341 # };
9342 # my ($in,$out,$err);
9343 # if($pid = open($in, "|-", "/bin/sh")) {
9344 # $fileno = fileno($in);
9346 # $pid $fileno
9347 # $stdin_fh = $self->fh("w",0)
9348 # print $stdin_fh $command;
9350 return $pid;
9353 sub open3_setpgrp_external {
9354 # Run open3 on $command wrapped with a perl script doing setpgrp
9355 # Works on systems that do not support open3(,,,"-")
9356 # Input:
9357 # $stdin_fh = Filehandle to use as STDIN
9358 # $stdout_fh = Filehandle to use as STDOUT
9359 # $stderr_fh = Filehandle to use as STDERR
9360 # $command = Command to run
9361 # Returns:
9362 # $pid = Process group of job started
9363 my ($stdin_fh,$stdout_fh,$stderr_fh,$command) = @_;
9364 local (*OUT,*ERR);
9365 open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!");
9366 open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!");
9368 my $pid;
9369 my @setpgrp_wrap =
9370 ('perl','-e',
9371 "eval\{setpgrp\}\;eval\{setpriority\(0,0,$opt::nice\)\}\;".
9372 "exec '$Global::shell', '-c', \@ARGV");
9373 # The eval is needed to catch exception from open3
9374 eval {
9375 $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", @setpgrp_wrap, $command)
9376 || ::die_bug("open3-$stdin_fh");
9379 return $pid;
9382 sub redefine_open3_setpgrp {
9383 my $setgprp_cache = shift;
9384 # Select and run open3_setpgrp_internal/open3_setpgrp_external
9385 no warnings 'redefine';
9386 my ($outfh,$name) = ::tmpfile(SUFFIX => ".tst");
9387 # Test to see if open3(x,x,x,"-") is fully supported
9388 # Can an exported bash function be called via open3?
9389 my $script = 'if($pid=::open3($i,$o,$e,"-")) { wait; } '.
9390 'else { exec("bash","-c","testfun && true"); }';
9391 my $bash =
9392 ::shell_quote_scalar_default(
9393 "testfun() { rm $name; }; export -f testfun; ".
9394 "perl -MIPC::Open3 -e ".
9395 ::shell_quote_scalar_default($script)
9397 my $redefine_eval;
9398 # Redirect STDERR temporarily,
9399 # so errors on MacOS X are ignored.
9400 open my $saveerr, ">&STDERR";
9401 open STDERR, '>', "/dev/null";
9402 # Run the test
9403 ::debug("init",qq{bash -c $bash 2>/dev/null});
9404 qx{ bash -c $bash 2>/dev/null };
9405 open STDERR, ">&", $saveerr;
9407 if(-e $name) {
9408 # Does not support open3(x,x,x,"-")
9409 # or does not have bash:
9410 # Use (slow) external version
9411 unlink($name);
9412 $redefine_eval = '*open3_setpgrp = \&open3_setpgrp_external';
9413 ::debug("init","open3_setpgrp_external chosen\n");
9414 } else {
9415 # Supports open3(x,x,x,"-")
9416 # This is 0.5 ms faster to run
9417 $redefine_eval = '*open3_setpgrp = \&open3_setpgrp_internal';
9418 ::debug("init","open3_setpgrp_internal chosen\n");
9420 if(open(my $fh, ">", $setgprp_cache)) {
9421 print $fh $redefine_eval;
9422 close $fh;
9423 } else {
9424 ::debug("init","Cannot write to $setgprp_cache");
9426 eval $redefine_eval;
9429 sub _open3_setpgrp {
9430 my $setgprp_cache = $Global::cache_dir . "/tmp/sshlogin/" .
9431 ::hostname() . "/setpgrp_func";
9432 if(-e $setgprp_cache) {
9433 local $/ = undef;
9434 open(my $fh, "<", $setgprp_cache) || die;
9435 eval <$fh> || die;
9436 close $fh;
9437 } else {
9438 redefine_open3_setpgrp($setgprp_cache);
9440 # The sub is now redefined. Call it
9441 return open3_setpgrp(@_);
9444 my $job = shift;
9445 # Get the shell command to be executed (possibly with ssh infront).
9446 my $command = $job->wrapped();
9447 my $pid;
9449 if($Global::interactive or $Global::stderr_verbose) {
9450 $job->interactive_start();
9452 # Must be run after $job->interactive_start():
9453 # $job->interactive_start() may call $job->skip()
9454 if($job->{'commandline'}{'skip'}) {
9455 # $job->skip() was called
9456 $command = "true";
9458 $ENV{'PARALLEL_SEQ'} = $job->seq();
9459 $ENV{'PARALLEL_PID'} = $$;
9460 $ENV{'PARALLEL_TMP'} = ::tmpname("par");
9461 $job->startshell();
9462 $job->openoutputfiles();
9463 $job->print_verbose_dryrun();
9464 # Call slot to store the slot value
9465 $job->slot();
9466 if($opt::dryrun or $opt::sqlmaster) { $command = "true"; }
9467 $job->add_rm($ENV{'PARALLEL_TMP'});
9468 ::debug("run", $Global::total_running, " processes . Starting (",
9469 $job->seq(), "): $command\n");
9471 my ($stdin_fh) = $job->fh(0,"w");
9472 if ($opt::tty and -c "/dev/tty" and
9473 open(my $devtty_fh, "<", "/dev/tty")) {
9474 # Give /dev/tty to the command if no one else is using it
9475 close $devtty_fh;
9476 syswrite($stdin_fh,"exec < /dev/tty\n");
9478 my @setpgrp = ('exec perl','-e',
9479 ::Q("eval\{setpgrp\}\;eval\{setpriority\(0,0,$opt::nice\)\}\;".
9480 "exec '$Global::shell', '-c', \@ARGV"));
9481 syswrite($stdin_fh,"@setpgrp ".::Q($command)."\n");
9482 # print $stdin_fh "@setpgrp ",::Q($command),"\n";
9483 ::debug("run", "Run: $command\n");
9484 if($opt::pipe) {
9485 if($opt::roundrobin and not $opt::keeporder) {
9486 # --keep-order will make sure the order will be reproducible
9487 ::set_fh_non_blocking($stdin_fh);
9489 if($opt::tee or $opt::shard or $opt::bin) { $job->set_virgin(0); }
9490 } else {
9491 # Close stdin if not pipe input
9492 close $stdin_fh;
9493 $job->set_virgin(0);
9495 if($job->{'pid'}) {
9496 # A job was started
9497 $Global::total_running++;
9498 $Global::total_started++;
9499 $job->set_starttime();
9500 $Global::running{$job->pid()} = $job;
9501 if($opt::timeout) {
9502 $Global::timeoutq->insert($job);
9504 $Global::newest_job = $job;
9505 $Global::newest_starttime = ::now();
9506 return $job;
9507 } else {
9508 # No more processes
9509 ::debug("run", "Cannot spawn more jobs.\n");
9510 return undef;
9514 sub interactive_start($) {
9515 my $self = shift;
9516 my $command = $self->wrapped();
9517 if($Global::interactive) {
9518 my $answer;
9519 ::status_no_nl("$command ?...");
9521 open(my $tty_fh, "<", "/dev/tty") || ::die_bug("interactive-tty");
9522 $answer = <$tty_fh>;
9523 close $tty_fh;
9524 # Sometime we get an empty string (not even \n)
9525 # Do not know why, so let us just ignore it and try again
9526 } while(length $answer < 1);
9527 if (not ($answer =~ /^\s*y/i)) {
9528 $self->{'commandline'}->skip();
9530 } else {
9531 print $Global::original_stderr "$command\n";
9536 my $tmuxsocket;
9538 sub tmux_wrap($) {
9539 # Wrap command with tmux for session pPID
9540 # Input:
9541 # $actual_command = the actual command being run (incl ssh wrap)
9542 my $self = shift;
9543 my $actual_command = shift;
9544 # Temporary file name. Used for fifo to communicate exit val
9545 my $tmpfifo = ::tmpname("tmx");
9546 $self->add_rm($tmpfifo);
9548 if(length($tmpfifo) >=100) {
9549 ::error("tmux does not support sockets with path > 100.");
9550 ::wait_and_exit(255);
9552 if($opt::tmuxpane) {
9553 # Move the command into a pane in window 0
9554 $actual_command = $ENV{'PARALLEL_TMUX'}.' joinp -t :0 ; '.
9555 $ENV{'PARALLEL_TMUX'}.' select-layout -t :0 tiled ; '.
9556 $actual_command;
9558 my $visual_command = $self->replaced();
9559 my $title = $visual_command;
9560 if($visual_command =~ /\0/) {
9561 ::error("Command line contains NUL. tmux is confused by NUL.");
9562 ::wait_and_exit(255);
9564 # ; causes problems
9565 # ascii 194-245 annoys tmux
9566 $title =~ tr/[\011-\016;\302-\365]/ /s;
9567 $title = ::Q($title);
9569 my $l_act = length($actual_command);
9570 my $l_tit = length($title);
9571 my $l_fifo = length($tmpfifo);
9572 # The line to run contains a 118 chars extra code + the title 2x
9573 my $l_tot = 2 * $l_tit + $l_act + $l_fifo;
9575 my $quoted_space75 = ::Q(" ")x75;
9576 while($l_tit < 1000 and
9578 (890 < $l_tot and $l_tot < 1350)
9580 (9250 < $l_tot and $l_tot < 9800)
9581 )) {
9582 # tmux blocks for certain lengths:
9583 # 900 < title + command < 1200
9584 # 9250 < title + command < 9800
9585 # but only if title < 1000, so expand the title with 75 spaces
9586 # The measured lengths are:
9587 # 996 < (title + whole command) < 1127
9588 # 9331 < (title + whole command) < 9636
9589 $title .= $quoted_space75;
9590 $l_tit = length($title);
9591 $l_tot = 2 * $l_tit + $l_act + $l_fifo;
9594 my $tmux;
9595 $ENV{'PARALLEL_TMUX'} ||= "tmux";
9596 if(not $tmuxsocket) {
9597 $tmuxsocket = ::tmpname("tms");
9598 if($opt::fg) {
9599 if(not fork) {
9600 # Run tmux in the foreground
9601 # Wait for the socket to appear
9602 while (not -e $tmuxsocket) { }
9603 `$ENV{'PARALLEL_TMUX'} -S $tmuxsocket attach`;
9604 exit;
9607 ::status("See output with: $ENV{'PARALLEL_TMUX'} -S $tmuxsocket attach");
9609 $tmux = "sh -c '".
9610 $ENV{'PARALLEL_TMUX'}." -S $tmuxsocket new-session -s p$$ -d \"sleep .2\" >/dev/null 2>&1';" .
9611 $ENV{'PARALLEL_TMUX'}." -S $tmuxsocket new-window -t p$$ -n $title";
9613 ::debug("tmux", "title len:", $l_tit, " act ", $l_act, " max ",
9614 $Limits::Command::line_max_len, " tot ",
9615 $l_tot, "\n");
9617 return "mkfifo $tmpfifo && $tmux ".
9618 # Run in tmux
9621 "(".$actual_command.');'.
9622 # The triple print is needed - otherwise the testsuite fails
9623 q[ perl -e 'while($t++<3){ print $ARGV[0],"\n" }' $?h/$status >> ].$tmpfifo."&".
9624 "echo $title; echo \007Job finished at: `date`;sleep 10"
9626 # Run outside tmux
9627 # Read a / separated line: 0h/2 for csh, 2/0 for bash.
9628 # If csh the first will be 0h, so use the second as exit value.
9629 # Otherwise just use the first value as exit value.
9630 q{; exec perl -e '$/="/";$_=<>;$c=<>;unlink $ARGV; /(\d+)h/ and exit($1);exit$c' }.$tmpfifo;
9634 sub is_already_in_results($) {
9635 # Do we already have results for this job?
9636 # Returns:
9637 # $job_already_run = bool whether there is output for this or not
9638 my $job = $_[0];
9639 my $out = $job->{'commandline'}->results_out();
9640 ::debug("run", "Test ${out}stdout", -e "${out}stdout", "\n");
9641 return(-e $out."stdout" or -f $out);
9644 sub is_already_in_joblog($) {
9645 my $job = shift;
9646 return vec($Global::job_already_run,$job->seq(),1);
9649 sub set_job_in_joblog($) {
9650 my $job = shift;
9651 vec($Global::job_already_run,$job->seq(),1) = 1;
9654 sub should_be_retried($) {
9655 # Should this job be retried?
9656 # Returns
9657 # 0 - do not retry
9658 # 1 - job queued for retry
9659 my $self = shift;
9660 if (not $opt::retries) {
9661 return 0;
9663 if(not $self->exitstatus() and not $self->exitsignal()) {
9664 # Completed with success. If there is a recorded failure: forget it
9665 $self->reset_failed_here();
9666 return 0;
9667 } else {
9668 # The job failed. Should it be retried?
9669 $self->add_failed_here();
9670 my $retries = $self->{'commandline'}->
9671 replace_placeholders([$opt::retries],0,0);
9672 if($self->total_failed() == $retries) {
9673 # This has been retried enough
9674 return 0;
9675 } else {
9676 # This command should be retried
9677 $self->set_endtime(undef);
9678 $self->reset_exitstatus();
9679 $Global::JobQueue->unget($self);
9680 ::debug("run", "Retry ", $self->seq(), "\n");
9681 return 1;
9687 my (%print_later,$job_seq_to_print);
9689 sub print_earlier_jobs($) {
9690 # Print jobs whose output is postponed due to --keep-order
9691 # Returns: N/A
9692 my $job = shift;
9693 $print_later{$job->seq()} = $job;
9694 $job_seq_to_print ||= 1;
9695 my $returnsize = 0;
9696 ::debug("run", "Looking for: $job_seq_to_print ",
9697 "This: ", $job->seq(), "\n");
9698 for(;vec($Global::job_already_run,$job_seq_to_print,1);
9699 $job_seq_to_print++) {}
9700 while(my $j = $print_later{$job_seq_to_print}) {
9701 $returnsize += $j->print();
9702 if($j->endtime()) {
9703 # Job finished - look at the next
9704 delete $print_later{$job_seq_to_print};
9705 $job_seq_to_print++;
9706 next;
9707 } else {
9708 # Job not finished yet - look at it again next round
9709 last;
9712 return $returnsize;
9716 sub print($) {
9717 # Print the output of the jobs
9718 # Returns: N/A
9719 my $self = shift;
9721 ::debug("print", ">>joboutput ", $self->replaced(), "\n");
9722 if($opt::dryrun) {
9723 # Nothing was printed to this job:
9724 # cleanup tmp files if --files was set
9725 ::rm($self->fh(1,"name"));
9727 if($opt::pipe and $self->virgin() and not $opt::tee) {
9728 # Skip --joblog, --dryrun, --verbose
9729 } else {
9730 if($opt::ungroup) {
9731 # NULL returnsize = 0 returnsize
9732 $self->returnsize() or $self->add_returnsize(0);
9733 if($Global::joblog and defined $self->{'exitstatus'}) {
9734 # Add to joblog when finished
9735 $self->print_joblog();
9736 # Printing is only relevant for grouped/--line-buffer output.
9737 $opt::ungroup and return;
9741 # Check for disk full
9742 ::exit_if_disk_full();
9745 my $returnsize = $self->returnsize();
9746 for my $fdno (sort { $a <=> $b } keys %Global::fd) {
9747 # Sort by file descriptor numerically: 1,2,3,..,9,10,11
9748 $fdno == 0 and next;
9749 my $out_fd = $Global::fd{$fdno};
9750 my $in_fh = $self->fh($fdno,"r");
9751 if(not $in_fh) {
9752 if(not $Job::file_descriptor_warning_printed{$fdno}++) {
9753 # ::warning("File descriptor $fdno not defined\n");
9755 next;
9757 ::debug("print", "File descriptor $fdno (", $self->fh($fdno,"name"), "):\n");
9758 if($opt::linebuffer) {
9759 # Line buffered print out
9760 $self->print_linebuffer($fdno,$in_fh,$out_fd);
9761 } elsif($opt::files) {
9762 $self->print_files($fdno,$in_fh,$out_fd);
9763 } elsif($opt::tag or defined $opt::tagstring) {
9764 $self->print_tag($fdno,$in_fh,$out_fd);
9765 } else {
9766 $self->print_normal($fdno,$in_fh,$out_fd);
9768 flush $out_fd;
9770 ::debug("print", "<<joboutput\n");
9771 if(defined $self->{'exitstatus'}
9772 and not ($self->virgin() and $opt::pipe)) {
9773 if($Global::joblog and not $opt::sqlworker) {
9774 # Add to joblog when finished
9775 $self->print_joblog();
9777 if($opt::sqlworker and not $opt::results) {
9778 $Global::sql->output($self);
9780 if($Global::csvsep) {
9781 # Add output to CSV when finished
9782 $self->print_csv();
9785 return $returnsize - $self->returnsize();
9789 my $header_printed;
9791 sub print_csv($) {
9792 my $self = shift;
9793 my $cmd;
9794 if($Global::verbose <= 1) {
9795 $cmd = $self->replaced();
9796 } else {
9797 # Verbose level > 1: Print the rsync and stuff
9798 $cmd = join " ", @{$self->{'commandline'}};
9800 my $record_ref = $self->{'commandline'}{'arg_list_flat_orig'};
9802 if(not $header_printed) {
9803 # Variable headers
9804 # Normal => V1..Vn
9805 # --header : => first value from column
9806 my @V;
9807 if($opt::header) {
9808 my $i = 1;
9809 @V = (map { $Global::input_source_header{$i++} }
9810 @$record_ref[1..$#$record_ref]);
9811 } else {
9812 my $V = "V1";
9813 @V = (map { $V++ } @$record_ref[1..$#$record_ref]);
9815 print $Global::csv_fh
9816 (map { $$_ }
9817 combine_ref("Seq", "Host", "Starttime", "JobRuntime",
9818 "Send", "Receive", "Exitval", "Signal", "Command",
9820 "Stdout","Stderr"
9821 )),"\n";
9822 $header_printed++;
9824 # Memory optimization: Overwrite with the joined output
9825 $self->{'output'}{1} = join("", @{$self->{'output'}{1}});
9826 $self->{'output'}{2} = join("", @{$self->{'output'}{2}});
9827 print $Global::csv_fh
9828 (map { $$_ }
9829 combine_ref
9830 ($self->seq(),
9831 $self->sshlogin()->string(),
9832 $self->starttime(), sprintf("%0.3f",$self->runtime()),
9833 $self->transfersize(), $self->returnsize(),
9834 $self->exitstatus(), $self->exitsignal(), \$cmd,
9835 \@$record_ref[1..$#$record_ref],
9836 \$self->{'output'}{1},
9837 \$self->{'output'}{2})),"\n";
9841 sub combine_ref($) {
9842 # Inspired by Text::CSV_PP::_combine (by Makamaka Hannyaharamitu)
9843 my @part = @_;
9844 my $sep = $Global::csvsep;
9845 my $quot = '"';
9846 my @out = ();
9848 my $must_be_quoted;
9849 for my $column (@part) {
9850 # Memory optimization: Content transferred as reference
9851 if(ref $column ne "SCALAR") {
9852 # Convert all columns to scalar references
9853 my $v = $column;
9854 $column = \$v;
9856 if(not defined $$column) {
9857 $$column = '';
9858 next;
9861 $must_be_quoted = 0;
9863 if($$column =~ s/$quot/$quot$quot/go){
9864 # Replace " => ""
9865 $must_be_quoted ||=1;
9867 if($$column =~ /[\s\Q$sep\E]/o){
9868 # Put quotes around if the column contains ,
9869 $must_be_quoted ||=1;
9872 $Global::use{"bytes"} ||= eval "use bytes; 1;";
9873 if ($$column =~ /\0/) {
9874 # Contains \0 => put quotes around
9875 $must_be_quoted ||=1;
9877 if($must_be_quoted){
9878 push @out, \$sep, \$quot, $column, \$quot;
9879 } else {
9880 push @out, \$sep, $column;
9883 # Pop off a $sep
9884 shift @out;
9885 return @out;
9888 sub print_files($) {
9889 # Print the name of the file containing stdout on stdout
9890 # Uses:
9891 # $opt::pipe
9892 # $opt::group = Print when job is done
9893 # $opt::linebuffer = Print ASAP
9894 # Returns: N/A
9895 my $self = shift;
9896 my ($fdno,$in_fh,$out_fd) = @_;
9898 # If the job is dead: close printing fh. Needed for --compress
9899 close $self->fh($fdno,"w");
9900 if($? and $opt::compress) {
9901 ::error($opt::compress_program." failed.");
9902 $self->set_exitstatus(255);
9904 if($opt::compress) {
9905 # Kill the decompressor which will not be needed
9906 CORE::kill "TERM", $self->fh($fdno,"rpid");
9908 close $in_fh;
9910 if($opt::pipe and $self->virgin()) {
9911 # Nothing was printed to this job:
9912 # cleanup unused tmp files because --files was set
9913 for my $fdno (1,2) {
9914 ::rm($self->fh($fdno,"name"));
9915 ::rm($self->fh($fdno,"unlink"));
9917 } elsif($fdno == 1 and $self->fh($fdno,"name")) {
9918 print $out_fd $self->tag(),$self->fh($fdno,"name"),"\n";
9919 if($Global::membuffer) {
9920 push @{$self->{'output'}{$fdno}},
9921 $self->tag(), $self->fh($fdno,"name");
9923 $self->add_returnsize(-s $self->fh($fdno,"name"));
9924 # Mark as printed - do not print again
9925 $self->set_fh($fdno,"name",undef);
9929 sub print_linebuffer($) {
9930 my $self = shift;
9931 my ($fdno,$in_fh,$out_fd) = @_;
9932 if(defined $self->{'exitstatus'}) {
9933 # If the job is dead: close printing fh. Needed for --compress
9934 close $self->fh($fdno,"w");
9935 if($? and $opt::compress) {
9936 ::error($opt::compress_program." failed.");
9937 $self->set_exitstatus(255);
9939 if($opt::compress) {
9940 # Blocked reading in final round
9941 for my $fdno (1,2) {
9942 ::set_fh_blocking($self->fh($fdno,'r'));
9946 if(not $self->virgin()) {
9947 if($opt::files or ($opt::results and not $Global::csvsep)) {
9948 # Print filename
9949 if($fdno == 1 and not $self->fh($fdno,"printed")) {
9950 print $out_fd $self->tag(),$self->fh($fdno,"name"),"\n";
9951 if($Global::membuffer) {
9952 push(@{$self->{'output'}{$fdno}}, $self->tag(),
9953 $self->fh($fdno,"name"));
9955 $self->set_fh($fdno,"printed",1);
9957 # No need for reading $in_fh, as it is from "cat >/dev/null"
9958 } else {
9959 # Read halflines and print full lines
9960 my $outputlength = 0;
9961 my $halfline_ref = $self->{'halfline'}{$fdno};
9962 my ($buf,$i,$rv);
9963 # 1310720 gives 1.2 GB/s
9964 # 131072 gives 0.9 GB/s
9965 while($rv = sysread($in_fh, $buf,1310720)) {
9966 $outputlength += $rv;
9967 # TODO --recend
9968 # Treat both \n and \r as line end
9969 $i = ::max((rindex($buf,"\n")+1), (rindex($buf,"\r")+1));
9970 if($i) {
9971 # One or more complete lines were found
9972 if($opt::tag or defined $opt::tagstring) {
9973 # Replace ^ with $tag within the full line
9974 if($Global::cache_replacement_eval) {
9975 # Replace with the same value for tag
9976 my $tag = $self->tag();
9977 unshift @$halfline_ref, $tag;
9978 # TODO --recend that can be partially in @$halfline_ref
9979 substr($buf,0,$i-1) =~ s/(?<=[\n\r])(?=.|$)/$tag/gs;
9980 # The length changed, so find the new ending pos
9981 $i = ::max((rindex($buf,"\n")+1), (rindex($buf,"\r")+1));
9982 } else {
9983 # Replace with freshly computed value of tag
9984 unshift @$halfline_ref, $self->tag();
9985 substr($buf,0,$i-1) =~ s/(?<=[\n\r])(?=.|$)/$self->tag()/gse;
9986 # The length changed, so find the new ending pos
9987 $i = ::max((rindex($buf,"\n")+1), (rindex($buf,"\r")+1));
9990 # Print the partial line (halfline) and the last half
9991 print $out_fd @$halfline_ref, substr($buf,0,$i);
9992 # Buffer in memory for SQL and CSV-output
9993 if($Global::membuffer) {
9994 push(@{$self->{'output'}{$fdno}},
9995 @$halfline_ref, substr($buf,0,$i));
9997 # Remove the printed part by keeping the unprinted part
9998 @$halfline_ref = (substr($buf,$i));
9999 } else {
10000 # No newline, so append to the halfline
10001 push @$halfline_ref, $buf;
10004 $self->add_returnsize($outputlength);
10006 if(defined $self->{'exitstatus'}) {
10007 if($opt::files or ($opt::results and not $Global::csvsep)) {
10008 $self->add_returnsize(-s $self->fh($fdno,"name"));
10009 } else {
10010 # If the job is dead: print the remaining partial line
10011 # read remaining
10012 my $halfline_ref = $self->{'halfline'}{$fdno};
10013 if(grep /./, @$halfline_ref) {
10014 my $returnsize = 0;
10015 for(@{$self->{'halfline'}{$fdno}}) {
10016 $returnsize += length $_;
10018 $self->add_returnsize($returnsize);
10019 if($opt::tag or defined $opt::tagstring) {
10020 # Prepend $tag the the remaining half line
10021 unshift @$halfline_ref, $self->tag();
10023 # Print the partial line (halfline)
10024 print $out_fd @{$self->{'halfline'}{$fdno}};
10025 # Buffer in memory for SQL and CSV-output
10026 if($Global::membuffer) {
10027 push(@{$self->{'output'}{$fdno}}, @$halfline_ref);
10029 @$halfline_ref = ();
10032 if($self->fh($fdno,"rpid") and
10033 CORE::kill 0, $self->fh($fdno,"rpid")) {
10034 # decompress still running
10035 } else {
10036 # decompress done: close fh
10037 close $in_fh;
10038 if($? and $opt::compress) {
10039 ::error($opt::decompress_program." failed.");
10040 $self->set_exitstatus(255);
10047 sub print_tag(@) {
10048 return print_normal(@_);
10051 sub free_ressources() {
10052 my $self = shift;
10053 if(not $opt::ungroup) {
10054 my $fh;
10055 for my $fdno (sort { $a <=> $b } keys %Global::fd) {
10056 $fh = $self->fh($fdno,"w");
10057 $fh and close $fh;
10058 $fh = $self->fh($fdno,"r");
10059 $fh and close $fh;
10064 sub print_normal($) {
10065 my $self = shift;
10066 my ($fdno,$in_fh,$out_fd) = @_;
10067 my $buf;
10068 #close $self->fh($fdno,"w");
10069 if($? and $opt::compress) {
10070 ::error($opt::compress_program." failed.");
10071 $self->set_exitstatus(255);
10073 if(not $self->virgin()) {
10074 seek $in_fh, 0, 0;
10075 # $in_fh is now ready for reading at position 0
10076 my $outputlength = 0;
10077 my @output;
10079 if($opt::tag or $opt::tagstring) {
10080 # Read line by line
10081 local $/ = "\n";
10082 my $tag = $self->tag();
10083 while(<$in_fh>) {
10084 $outputlength += length $_;
10085 # Tag lines with \r, too
10086 $_ =~ s/(?<=[\r])(?=.|$)/$tag/gs;
10087 print $out_fd $tag,$_;
10088 if($Global::membuffer) {
10089 push @{$self->{'output'}{$fdno}}, $tag, $_;
10092 } else {
10093 while(sysread($in_fh,$buf,131072)) {
10094 print $out_fd $buf;
10095 $outputlength += length $buf;
10096 if($Global::membuffer) {
10097 push @{$self->{'output'}{$fdno}}, $buf;
10101 if($fdno == 1) {
10102 $self->add_returnsize($outputlength);
10104 close $in_fh;
10105 if($? and $opt::compress) {
10106 ::error($opt::decompress_program." failed.");
10107 $self->set_exitstatus(255);
10112 sub print_joblog($) {
10113 my $self = shift;
10114 my $cmd;
10115 if($Global::verbose <= 1) {
10116 $cmd = $self->replaced();
10117 } else {
10118 # Verbose level > 1: Print the rsync and stuff
10119 $cmd = join " ", @{$self->{'commandline'}};
10121 # Newlines make it hard to parse the joblog
10122 $cmd =~ s/\n/\0/g;
10123 print $Global::joblog
10124 join("\t", $self->seq(), $self->sshlogin()->string(),
10125 $self->starttime(), sprintf("%10.3f",$self->runtime()),
10126 $self->transfersize(), $self->returnsize(),
10127 $self->exitstatus(), $self->exitsignal(), $cmd
10128 ). "\n";
10129 flush $Global::joblog;
10130 $self->set_job_in_joblog();
10133 sub tag($) {
10134 my $self = shift;
10135 if(not defined $self->{'tag'} or not $Global::cache_replacement_eval) {
10136 if($opt::tag or defined $opt::tagstring) {
10137 $self->{'tag'} = $self->{'commandline'}->
10138 replace_placeholders([$opt::tagstring],0,0)."\t";
10139 } else {
10140 $self->{'tag'} = "";
10143 return $self->{'tag'};
10146 sub hostgroups($) {
10147 my $self = shift;
10148 if(not defined $self->{'hostgroups'}) {
10149 $self->{'hostgroups'} =
10150 $self->{'commandline'}->{'arg_list'}[0][0]->{'hostgroups'};
10152 return @{$self->{'hostgroups'}};
10155 sub exitstatus($) {
10156 my $self = shift;
10157 return $self->{'exitstatus'};
10160 sub set_exitstatus($$) {
10161 my $self = shift;
10162 my $exitstatus = shift;
10163 if($exitstatus) {
10164 # Overwrite status if non-zero
10165 $self->{'exitstatus'} = $exitstatus;
10166 } else {
10167 # Set status but do not overwrite
10168 # Status may have been set by --timeout
10169 $self->{'exitstatus'} ||= $exitstatus;
10171 $opt::sqlworker and
10172 $Global::sql->update("SET Exitval = ? WHERE Seq = ".$self->seq(),
10173 $exitstatus);
10176 sub reset_exitstatus($) {
10177 my $self = shift;
10178 undef $self->{'exitstatus'};
10181 sub exitsignal($) {
10182 my $self = shift;
10183 return $self->{'exitsignal'};
10186 sub set_exitsignal($$) {
10187 my $self = shift;
10188 my $exitsignal = shift;
10189 $self->{'exitsignal'} = $exitsignal;
10190 $opt::sqlworker and
10191 $Global::sql->update("SET _Signal = ? WHERE Seq = ".$self->seq(),
10192 $exitsignal);
10196 my $status_printed;
10197 my $total_jobs;
10199 sub should_we_halt {
10200 # Should we halt? Immediately? Gracefully?
10201 # Returns: N/A
10202 my $job = shift;
10203 my $limit;
10204 if($job->exitstatus() or $job->exitsignal()) {
10205 # Job failed
10206 $Global::exitstatus++;
10207 $Global::total_failed++;
10208 if($Global::halt_fail) {
10209 ::status("$Global::progname: This job failed:",
10210 $job->replaced());
10211 $limit = $Global::total_failed;
10213 } elsif($Global::halt_success) {
10214 ::status("$Global::progname: This job succeeded:",
10215 $job->replaced());
10216 $limit = $Global::total_completed - $Global::total_failed;
10218 if($Global::halt_done) {
10219 ::status("$Global::progname: This job finished:",
10220 $job->replaced());
10221 $limit = $Global::total_completed;
10223 if(not defined $limit) {
10224 return ""
10226 # --halt # => 1..100 (number of jobs failed, 101 means > 100)
10227 # --halt % => 1..100 (pct of jobs failed)
10228 if($Global::halt_pct and not $Global::halt_count) {
10229 $total_jobs ||= $Global::JobQueue->total_jobs();
10230 # From the pct compute the number of jobs that must fail/succeed
10231 $Global::halt_count = $total_jobs * $Global::halt_pct;
10233 if($limit >= $Global::halt_count) {
10234 # At least N jobs have failed/succeded/completed
10235 # or at least N% have failed/succeded/completed
10236 # So we should prepare for exit
10237 if($Global::halt_fail or $Global::halt_done) {
10238 # Set exit status
10239 if(not defined $Global::halt_exitstatus) {
10240 if($Global::halt_pct) {
10241 # --halt now,fail=X% or soon,fail=X%
10242 # --halt now,done=X% or soon,done=X%
10243 $Global::halt_exitstatus =
10244 ::ceil($Global::total_failed / $total_jobs * 100);
10245 } elsif($Global::halt_count) {
10246 # --halt now,fail=X or soon,fail=X
10247 # --halt now,done=X or soon,done=X
10248 $Global::halt_exitstatus =
10249 ::min($Global::total_failed,101);
10251 if($Global::halt_count and $Global::halt_count == 1) {
10252 # --halt now,fail=1 or soon,fail=1
10253 # --halt now,done=1 or soon,done=1
10254 # Emulate Bash's +128 if there is a signal
10255 $Global::halt_exitstatus =
10256 ($job->exitstatus()
10258 $job->exitsignal() ? $job->exitsignal() + 128 : 0);
10261 ::debug("halt","Pct: ",$Global::halt_pct,
10262 " count: ",$Global::halt_count,
10263 " status: ",$Global::halt_exitstatus,"\n");
10264 } elsif($Global::halt_success) {
10265 $Global::halt_exitstatus = 0;
10267 if($Global::halt_when eq "soon"
10269 (scalar(keys %Global::running) > 0
10271 $Global::max_jobs_running == 1)) {
10272 ::status
10273 ("$Global::progname: Starting no more jobs. ".
10274 "Waiting for ". (keys %Global::running).
10275 " jobs to finish.");
10276 $Global::start_no_new_jobs ||= 1;
10278 return($Global::halt_when);
10280 return "";
10285 package CommandLine;
10287 sub new($) {
10288 my $class = shift;
10289 my $seq = shift;
10290 my $commandref = shift;
10291 $commandref || die;
10292 my $arg_queue = shift;
10293 my $context_replace = shift;
10294 my $max_number_of_args = shift; # for -N and normal (-n1)
10295 my $transfer_files = shift;
10296 my $return_files = shift;
10297 my $replacecount_ref = shift;
10298 my $len_ref = shift;
10299 my %replacecount = %$replacecount_ref;
10300 my %len = %$len_ref;
10301 for (keys %$replacecount_ref) {
10302 # Total length of this replacement string {} replaced with all args
10303 $len{$_} = 0;
10305 return bless {
10306 'command' => $commandref,
10307 'seq' => $seq,
10308 'len' => \%len,
10309 'arg_list' => [],
10310 'arg_list_flat' => [],
10311 'arg_list_flat_orig' => [undef],
10312 'arg_queue' => $arg_queue,
10313 'max_number_of_args' => $max_number_of_args,
10314 'replacecount' => \%replacecount,
10315 'context_replace' => $context_replace,
10316 'transfer_files' => $transfer_files,
10317 'return_files' => $return_files,
10318 'replaced' => undef,
10319 }, ref($class) || $class;
10322 sub seq($) {
10323 my $self = shift;
10324 return $self->{'seq'};
10327 sub set_seq($$) {
10328 my $self = shift;
10329 $self->{'seq'} = shift;
10332 sub slot($) {
10333 # Find the number of a free job slot and return it
10334 # Uses:
10335 # @Global::slots - list with free jobslots
10336 # Returns:
10337 # $jobslot = number of jobslot
10338 my $self = shift;
10339 if(not $self->{'slot'}) {
10340 if(not @Global::slots) {
10341 # $max_slot_number will typically be $Global::max_jobs_running
10342 push @Global::slots, ++$Global::max_slot_number;
10344 $self->{'slot'} = shift @Global::slots;
10346 return $self->{'slot'};
10350 my $already_spread;
10352 sub populate($) {
10353 # Add arguments from arg_queue until the number of arguments or
10354 # max line length is reached
10355 # Uses:
10356 # $Global::minimal_command_line_length
10357 # $opt::cat
10358 # $opt::fifo
10359 # $Global::JobQueue
10360 # $opt::m
10361 # $opt::X
10362 # $Global::max_jobs_running
10363 # Returns: N/A
10364 my $self = shift;
10365 my $next_arg;
10366 my $max_len = $Global::minimal_command_line_length
10367 || Limits::Command::max_length();
10369 if($opt::cat or $opt::fifo) {
10370 # Get the empty arg added by --pipepart (if any)
10371 $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get();
10372 # $PARALLEL_TMP will point to a tempfile that will be used as {}
10373 $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->
10374 unget([Arg->new('$PARALLEL_TMP')]);
10376 while (not $self->{'arg_queue'}->empty()) {
10377 $next_arg = $self->{'arg_queue'}->get();
10378 if(not defined $next_arg) {
10379 next;
10381 $self->push($next_arg);
10382 if($self->len() >= $max_len) {
10383 # Command length is now > max_length
10384 # If there are arguments: remove the last
10385 # If there are no arguments: Error
10386 # TODO stuff about -x opt_x
10387 if($self->number_of_args() > 1) {
10388 # There is something to work on
10389 $self->{'arg_queue'}->unget($self->pop());
10390 last;
10391 } else {
10392 my $args = join(" ", map { $_->orig() } @$next_arg);
10393 ::error("Command line too long (".
10394 $self->len(). " >= ".
10395 $max_len.
10396 ") at input ".
10397 $self->{'arg_queue'}->arg_number().
10398 ": ".
10399 ((length $args > 50) ?
10400 (substr($args,0,50))."..." :
10401 $args));
10402 $self->{'arg_queue'}->unget($self->pop());
10403 ::wait_and_exit(255);
10407 if(defined $self->{'max_number_of_args'}) {
10408 if($self->number_of_args() >= $self->{'max_number_of_args'}) {
10409 last;
10413 if(($opt::m or $opt::X) and not $already_spread
10414 and $self->{'arg_queue'}->empty() and $Global::max_jobs_running) {
10415 # -m or -X and EOF => Spread the arguments over all jobslots
10416 # (unless they are already spread)
10417 $already_spread ||= 1;
10418 if($self->number_of_args() > 1) {
10419 $self->{'max_number_of_args'} =
10420 ::ceil($self->number_of_args()/$Global::max_jobs_running);
10421 $Global::JobQueue->{'commandlinequeue'}->{'max_number_of_args'} =
10422 $self->{'max_number_of_args'};
10423 $self->{'arg_queue'}->unget($self->pop_all());
10424 while($self->number_of_args() < $self->{'max_number_of_args'}) {
10425 $self->push($self->{'arg_queue'}->get());
10428 $Global::JobQueue->flush_total_jobs();
10431 if($opt::sqlmaster) {
10432 # Insert the V1..Vn for this $seq in SQL table instead of generating one
10433 $Global::sql->insert_records($self->seq(), $self->{'command'},
10434 $self->{'arg_list_flat_orig'});
10439 sub push($) {
10440 # Add one or more records as arguments
10441 # Returns: N/A
10442 my $self = shift;
10443 my $record = shift;
10444 push @{$self->{'arg_list_flat_orig'}}, map { $_->orig() } @$record;
10445 push @{$self->{'arg_list_flat'}}, @$record;
10446 push @{$self->{'arg_list'}}, $record;
10447 # Make @arg available for {= =}
10448 *Arg::arg = $self->{'arg_list_flat_orig'};
10450 my $quote_arg = ($Global::quote_replace and not $Global::quoting);
10451 for my $perlexpr (keys %{$self->{'replacecount'}}) {
10452 if($perlexpr =~ /^(\d+) /) {
10453 # Positional
10454 defined($record->[$1-1]) or next;
10455 $self->{'len'}{$perlexpr} +=
10456 length $record->[$1-1]->replace($perlexpr,$quote_arg,$self);
10457 } else {
10458 for my $arg (@$record) {
10459 if(defined $arg) {
10460 $self->{'len'}{$perlexpr} +=
10461 length $arg->replace($perlexpr,$quote_arg,$self);
10468 sub pop($) {
10469 # Remove last argument
10470 # Returns:
10471 # the last record
10472 my $self = shift;
10473 my $record = pop @{$self->{'arg_list'}};
10474 # pop off arguments from @$record
10475 splice @{$self->{'arg_list_flat_orig'}}, -($#$record+1), $#$record+1;
10476 splice @{$self->{'arg_list_flat'}}, -($#$record+1), $#$record+1;
10477 my $quote_arg = ($Global::quote_replace and not $Global::quoting);
10478 for my $perlexpr (keys %{$self->{'replacecount'}}) {
10479 if($perlexpr =~ /^(\d+) /) {
10480 # Positional
10481 defined($record->[$1-1]) or next;
10482 $self->{'len'}{$perlexpr} -=
10483 length $record->[$1-1]->replace($perlexpr,$quote_arg,$self);
10484 } else {
10485 for my $arg (@$record) {
10486 if(defined $arg) {
10487 $self->{'len'}{$perlexpr} -=
10488 length $arg->replace($perlexpr,$quote_arg,$self);
10493 return $record;
10496 sub pop_all($) {
10497 # Remove all arguments and zeros the length of replacement perlexpr
10498 # Returns:
10499 # all records
10500 my $self = shift;
10501 my @popped = @{$self->{'arg_list'}};
10502 for my $perlexpr (keys %{$self->{'replacecount'}}) {
10503 $self->{'len'}{$perlexpr} = 0;
10505 $self->{'arg_list'} = [];
10506 $self->{'arg_list_flat_orig'} = [undef];
10507 $self->{'arg_list_flat'} = [];
10508 return @popped;
10511 sub number_of_args($) {
10512 # The number of records
10513 # Returns:
10514 # number of records
10515 my $self = shift;
10516 # This is really the number of records
10517 return $#{$self->{'arg_list'}}+1;
10520 sub number_of_recargs($) {
10521 # The number of args in records
10522 # Returns:
10523 # number of args records
10524 my $self = shift;
10525 my $sum = 0;
10526 my $nrec = scalar @{$self->{'arg_list'}};
10527 if($nrec) {
10528 $sum = $nrec * (scalar @{$self->{'arg_list'}[0]});
10530 return $sum;
10533 sub args_as_string($) {
10534 # Returns:
10535 # all unmodified arguments joined with ' ' (similar to {})
10536 my $self = shift;
10537 return (join " ", map { $_->orig() }
10538 map { @$_ } @{$self->{'arg_list'}});
10541 sub results_out($) {
10542 sub max_file_name_length {
10543 # Figure out the max length of a subdir
10544 # TODO and the max total length
10545 # Ext4 = 255,130816
10546 # Uses:
10547 # $Global::max_file_length is set
10548 # Returns:
10549 # $Global::max_file_length
10550 my $testdir = shift;
10552 my $upper = 100_000_000;
10553 # Dir length of 8 chars is supported everywhere
10554 my $len = 8;
10555 my $dir = "x"x$len;
10556 do {
10557 rmdir($testdir."/".$dir);
10558 $len *= 16;
10559 $dir = "x"x$len;
10560 } while ($len < $upper and mkdir $testdir."/".$dir);
10561 # Then search for the actual max length between $len/16 and $len
10562 my $min = $len/16;
10563 my $max = $len;
10564 while($max-$min > 5) {
10565 # If we are within 5 chars of the exact value:
10566 # it is not worth the extra time to find the exact value
10567 my $test = int(($min+$max)/2);
10568 $dir = "x"x$test;
10569 if(mkdir $testdir."/".$dir) {
10570 rmdir($testdir."/".$dir);
10571 $min = $test;
10572 } else {
10573 $max = $test;
10576 $Global::max_file_length = $min;
10577 return $min;
10580 my $self = shift;
10581 my $out = $self->replace_placeholders([$opt::results],0,0);
10582 if($out eq $opt::results) {
10583 # $opt::results simple string: Append args_as_dirname
10584 my $args_as_dirname = $self->args_as_dirname();
10585 # Output in: prefix/name1/val1/name2/val2/stdout
10586 $out = $opt::results."/".$args_as_dirname;
10587 if(-d $out or eval{ File::Path::mkpath($out); }) {
10588 # OK
10589 } else {
10590 # mkpath failed: Argument probably too long.
10591 # Set $Global::max_file_length, which will keep the individual
10592 # dir names shorter than the max length
10593 max_file_name_length($opt::results);
10594 $args_as_dirname = $self->args_as_dirname();
10595 # prefix/name1/val1/name2/val2/
10596 $out = $opt::results."/".$args_as_dirname;
10597 File::Path::mkpath($out);
10599 $out .="/";
10600 } else {
10601 if($out =~ m:/$:) {
10602 # / = dir
10603 if(-d $out or eval{ File::Path::mkpath($out); }) {
10604 # OK
10605 } else {
10606 ::error("Cannot make dir '$out'.");
10607 ::wait_and_exit(255);
10609 } else {
10610 $out =~ m:(.*)/:;
10611 File::Path::mkpath($1);
10614 return $out;
10617 sub args_as_dirname($) {
10618 # Returns:
10619 # all unmodified arguments joined with '/' (similar to {})
10620 # \t \0 \\ and / are quoted as: \t \0 \\ \_
10621 # If $Global::max_file_length: Keep subdirs < $Global::max_file_length
10622 my $self = shift;
10623 my @res = ();
10625 for my $rec_ref (@{$self->{'arg_list'}}) {
10626 # If headers are used, sort by them.
10627 # Otherwise keep the order from the command line.
10628 my @header_indexes_sorted = header_indexes_sorted($#$rec_ref+1);
10629 for my $n (@header_indexes_sorted) {
10630 CORE::push(@res,
10631 $Global::input_source_header{$n},
10632 map { my $s = $_;
10633 # \t \0 \\ and / are quoted as: \t \0 \\ \_
10634 $s =~ s/\\/\\\\/g;
10635 $s =~ s/\t/\\t/g;
10636 $s =~ s/\0/\\0/g;
10637 $s =~ s:/:\\_:g;
10638 if($Global::max_file_length) {
10639 # Keep each subdir shorter than the longest
10640 # allowed file name
10641 $s = substr($s,0,$Global::max_file_length);
10643 $s; }
10644 $rec_ref->[$n-1]->orig());
10647 return join "/", @res;
10650 sub header_indexes_sorted($) {
10651 # Sort headers first by number then by name.
10652 # E.g.: 1a 1b 11a 11b
10653 # Returns:
10654 # Indexes of %Global::input_source_header sorted
10655 my $max_col = shift;
10657 no warnings 'numeric';
10658 for my $col (1 .. $max_col) {
10659 # Make sure the header is defined. If it is not: use column number
10660 if(not defined $Global::input_source_header{$col}) {
10661 $Global::input_source_header{$col} = $col;
10664 my @header_indexes_sorted = sort {
10665 # Sort headers numerically then asciibetically
10666 $Global::input_source_header{$a} <=> $Global::input_source_header{$b}
10668 $Global::input_source_header{$a} cmp $Global::input_source_header{$b}
10669 } 1 .. $max_col;
10670 return @header_indexes_sorted;
10673 sub len($) {
10674 # Uses:
10675 # @opt::shellquote
10676 # The length of the command line with args substituted
10677 my $self = shift;
10678 my $len = 0;
10679 # Add length of the original command with no args
10680 # Length of command w/ all replacement args removed
10681 $len += $self->{'len'}{'noncontext'} + @{$self->{'command'}} -1;
10682 ::debug("length", "noncontext + command: $len\n");
10683 my $recargs = $self->number_of_recargs();
10684 if($self->{'context_replace'}) {
10685 # Context is duplicated for each arg
10686 $len += $recargs * $self->{'len'}{'context'};
10687 for my $replstring (keys %{$self->{'replacecount'}}) {
10688 # If the replacements string is more than once: mulitply its length
10689 $len += $self->{'len'}{$replstring} *
10690 $self->{'replacecount'}{$replstring};
10691 ::debug("length", $replstring, " ", $self->{'len'}{$replstring}, "*",
10692 $self->{'replacecount'}{$replstring}, "\n");
10694 # echo 11 22 33 44 55 66 77 88 99 1010
10695 # echo 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10
10696 # 5 + ctxgrp*arg
10697 ::debug("length", "Ctxgrp: ", $self->{'len'}{'contextgroups'},
10698 " Groups: ", $self->{'len'}{'noncontextgroups'}, "\n");
10699 # Add space between context groups
10700 $len += ($recargs-1) * ($self->{'len'}{'contextgroups'});
10701 } else {
10702 # Each replacement string may occur several times
10703 # Add the length for each time
10704 $len += 1*$self->{'len'}{'context'};
10705 ::debug("length", "context+noncontext + command: $len\n");
10706 for my $replstring (keys %{$self->{'replacecount'}}) {
10707 # (space between regargs + length of replacement)
10708 # * number this replacement is used
10709 $len += ($recargs -1 + $self->{'len'}{$replstring}) *
10710 $self->{'replacecount'}{$replstring};
10713 if(defined $Global::parallel_env) {
10714 # If we are using --env, add the prefix for that, too.
10715 $len += length $Global::parallel_env;
10717 if($Global::quoting) {
10718 # Pessimistic length if -q is set
10719 # Worse than worst case: ' => "'" + " => '"'
10720 # TODO can we count the number of expanding chars?
10721 # and count them in arguments, too?
10722 $len *= 3;
10724 if(@opt::shellquote) {
10725 # Pessimistic length if --shellquote is set
10726 # Worse than worst case: ' => "'"
10727 for(@opt::shellquote) {
10728 $len *= 3;
10730 $len *= 5;
10732 if(@opt::sshlogin) {
10733 # Pessimistic length if remote
10734 # Worst case is BASE64 encoding 3 bytes -> 4 bytes
10735 $len = int($len*4/3);
10738 return $len;
10741 sub replaced($) {
10742 # Uses:
10743 # $Global::quote_replace
10744 # $Global::quoting
10745 # Returns:
10746 # $replaced = command with place holders replaced and prepended
10747 my $self = shift;
10748 if(not defined $self->{'replaced'}) {
10749 # Don't quote arguments if the input is the full command line
10750 my $quote_arg = ($Global::quote_replace and not $Global::quoting);
10751 # or if ($opt::cat or $opt::pipe) as they use $PARALLEL_TMP
10752 $quote_arg = ($opt::cat || $opt::fifo) ? 0 : $quote_arg;
10753 $self->{'replaced'} = $self->
10754 replace_placeholders($self->{'command'},$Global::quoting,
10755 $quote_arg);
10756 my $len = length $self->{'replaced'};
10757 if ($len != $self->len()) {
10758 ::debug("length", $len, " != ", $self->len(),
10759 " ", $self->{'replaced'}, "\n");
10760 } else {
10761 ::debug("length", $len, " == ", $self->len(),
10762 " ", $self->{'replaced'}, "\n");
10765 return $self->{'replaced'};
10768 sub replace_placeholders($$$$) {
10769 # Replace foo{}bar with fooargbar
10770 # Input:
10771 # $targetref = command as shell words
10772 # $quote = should everything be quoted?
10773 # $quote_arg = should replaced arguments be quoted?
10774 # Uses:
10775 # @Arg::arg = arguments as strings to be use in {= =}
10776 # Returns:
10777 # @target with placeholders replaced
10778 my $self = shift;
10779 my $targetref = shift;
10780 my $quote = shift;
10781 my $quote_arg = shift;
10782 my %replace;
10784 # Token description:
10785 # \0spc = unquoted space
10786 # \0end = last token element
10787 # \0ign = dummy token to be ignored
10788 # \257<...\257> = replacement expression
10789 # " " = quoted space, that splits -X group
10790 # text = normal text - possibly part of -X group
10791 my $spacer = 0;
10792 my @tokens = grep { length $_ > 0 } map {
10793 if(/^\257<|^ $/) {
10794 # \257<...\257> or space
10796 } else {
10797 # Split each space/tab into a token
10798 split /(?=\s)|(?<=\s)/
10801 # Split \257< ... \257> into own token
10802 map { split /(?=\257<)|(?<=\257>)/ }
10803 # Insert "\0spc" between every element
10804 # This space should never be quoted
10805 map { $spacer++ ? ("\0spc",$_) : $_ }
10806 map { $_ eq "" ? "\0empty" : $_ }
10807 @$targetref;
10809 if(not @tokens) {
10810 # @tokens is empty: Return empty array
10811 return @tokens;
10813 ::debug("replace", "Tokens ".join":",@tokens,"\n");
10814 # Make it possible to use $arg[2] in {= =}
10815 *Arg::arg = $self->{'arg_list_flat_orig'};
10816 # Flat list:
10817 # $self->{'arg_list'} = [ [Arg11, Arg12], [Arg21, Arg22], [Arg31, Arg32] ]
10818 # $self->{'arg_list_flat'} = [ Arg11, Arg12, Arg21, Arg22, Arg31, Arg32 ]
10819 if(not @{$self->{'arg_list_flat'}}) {
10820 @{$self->{'arg_list_flat'}} = Arg->new("");
10822 my $argref = $self->{'arg_list_flat'};
10823 # Number of arguments - used for positional arguments
10824 my $n = $#$argref+1;
10826 # $self is actually a CommandLine-object,
10827 # but it looks nice to be able to say {= $job->slot() =}
10828 my $job = $self;
10829 # @replaced = tokens with \257< \257> replaced
10830 my @replaced;
10831 if($self->{'context_replace'}) {
10832 my @ctxgroup;
10833 for my $t (@tokens,"\0end") {
10834 # \0end = last token was end of tokens.
10835 if($t eq "\t" or $t eq " " or $t eq "\0end" or $t eq "\0spc") {
10836 # Context group complete: Replace in it
10837 if(grep { /^\257</ } @ctxgroup) {
10838 # Context group contains a replacement string:
10839 # Copy once per arg
10840 my $space = "\0ign";
10841 for my $arg (@$argref) {
10842 my $normal_replace;
10843 # Push output
10844 # Put unquoted space before each context group
10845 # except the first
10846 CORE::push @replaced, $space, map {
10847 $a = $_;
10848 if($a =~
10849 s{\257<(-?\d+)?(.*)\257>}
10851 if($1) {
10852 # Positional replace
10853 # Find the relevant arg and replace it
10854 ($argref->[$1 > 0 ? $1-1 : $n+$1] ? # If defined: replace
10855 $argref->[$1 > 0 ? $1-1 : $n+$1]->
10856 replace($2,$quote_arg,$self)
10857 : "");
10858 } else {
10859 # Normal replace
10860 $normal_replace ||= 1;
10861 ($arg ? $arg->replace($2,$quote_arg,$self) : "");
10863 }sgxe) {
10864 # Token is \257<..\257>
10865 } else {
10866 if($Global::escape_string_present) {
10867 # Command line contains \257:
10868 # Unescape it \257\256 => \257
10869 $a =~ s/\257\256/\257/g;
10873 } @ctxgroup;
10874 $normal_replace or last;
10875 $space = "\0spc";
10877 } else {
10878 # Context group has no a replacement string: Copy it once
10879 CORE::push @replaced, map {
10880 $Global::escape_string_present and s/\257\256/\257/g; $_;
10881 } @ctxgroup;
10883 # New context group
10884 @ctxgroup=();
10886 if($t eq "\0spc" or $t eq " ") {
10887 CORE::push @replaced,$t;
10888 } else {
10889 CORE::push @ctxgroup,$t;
10892 } else {
10893 # @group = @token
10894 # Replace in group
10895 # Push output
10896 # repquote = no if {} first on line, no if $quote, yes otherwise
10897 for my $t (@tokens) {
10898 if($t =~ /^\257</) {
10899 my $space = "\0ign";
10900 for my $arg (@$argref) {
10901 my $normal_replace;
10902 $a = $t;
10903 $a =~
10904 s{\257<(-?\d+)?(.*)\257>}
10906 if($1) {
10907 # Positional replace
10908 # Find the relevant arg and replace it
10909 ($argref->[$1 > 0 ? $1-1 : $n+$1] ?
10910 # If defined: replace
10911 $argref->[$1 > 0 ? $1-1 : $n+$1]->
10912 replace($2,$quote_arg,$self)
10913 : "");
10914 } else {
10915 # Normal replace
10916 $normal_replace ||= 1;
10917 ($arg ? $arg->replace($2,$quote_arg,$self) : "");
10919 }sgxe;
10920 CORE::push @replaced, $space, $a;
10921 $normal_replace or last;
10922 $space = "\0spc";
10924 } else {
10925 # No replacement
10926 CORE::push @replaced, map {
10927 $Global::escape_string_present and s/\257\256/\257/g; $_;
10928 } $t;
10932 *Arg::arg = [];
10933 ::debug("replace","Replaced: ".join":",@replaced,"\n");
10935 # Put tokens into groups that may be quoted.
10936 my @quotegroup;
10937 my @quoted;
10938 for (map { $_ eq "\0empty" ? "" : $_ }
10939 grep { $_ ne "\0ign" and $_ ne "\0noarg" and $_ ne "'\0noarg'" }
10940 @replaced, "\0end") {
10941 if($_ eq "\0spc" or $_ eq "\0end") {
10942 # \0spc splits quotable groups
10943 if($quote) {
10944 if(@quotegroup) {
10945 CORE::push @quoted, ::Q(join"",@quotegroup);;
10947 } else {
10948 CORE::push @quoted, join"",@quotegroup;
10950 @quotegroup = ();
10951 } else {
10952 CORE::push @quotegroup, $_;
10955 ::debug("replace","Quoted: ".join":",@quoted,"\n");
10956 return wantarray ? @quoted : "@quoted";
10959 sub skip($) {
10960 # Skip this job
10961 my $self = shift;
10962 $self->{'skip'} = 1;
10966 package CommandLineQueue;
10968 sub new($) {
10969 my $class = shift;
10970 my $commandref = shift;
10971 my $read_from = shift;
10972 my $context_replace = shift || 0;
10973 my $max_number_of_args = shift;
10974 my $transfer_files = shift;
10975 my $return_files = shift;
10976 my @unget = ();
10977 my $posrpl;
10978 my ($replacecount_ref, $len_ref);
10979 my @command = @$commandref;
10980 my $seq = 1;
10981 # Replace replacement strings with {= perl expr =}
10982 # '{=' 'perlexpr' '=}' => '{= perlexpr =}'
10983 @command = merge_rpl_parts(@command);
10985 # Protect matching inside {= perl expr =}
10986 # by replacing {= and =} with \257< and \257>
10987 # in options that can contain replacement strings:
10988 # @command, --transferfile, --return,
10989 # --tagstring, --workdir, --results
10990 for(@command, @$transfer_files, @$return_files,
10991 $opt::tagstring, $opt::workdir, $opt::results, $opt::retries) {
10992 # Skip if undefined
10993 $_ or next;
10994 # Escape \257 => \257\256
10995 $Global::escape_string_present += s/\257/\257\256/g;
10996 # Needs to match rightmost left parens (Perl defaults to leftmost)
10997 # to deal with: {={==} and {={==}=}
10998 # Replace {= -> \257< and =} -> \257>
11000 # Complex way to do:
11001 # s/{=(.*)=}/\257<$1\257>/g
11002 # which would not work
11003 s[\Q$Global::parensleft\E # Match {=
11004 # Match . unless the next string is {= or =}
11005 # needed to force matching the shortest {= =}
11006 ((?:(?! \Q$Global::parensleft\E|\Q$Global::parensright\E ).)*?)
11007 \Q$Global::parensright\E ] # Match =}
11008 {\257<$1\257>}gxs;
11009 for my $rpl (sort { length $b <=> length $a } keys %Global::rpl) {
11010 # Replace long --rpl's before short ones, as a short may be a
11011 # substring of a long:
11012 # --rpl '% s/a/b/' --rpl '%% s/b/a/'
11014 # Replace the shorthand string (--rpl)
11015 # with the {= perl expr =}
11017 # Avoid searching for shorthand strings inside existing {= perl expr =}
11019 # Replace $$1 in {= perl expr =} with groupings in shorthand string
11021 # --rpl '{/(\.\S+)/(\.\S+)} s/$$1/$$2/g;'
11022 # echo {/.tar/.gz} ::: UU.tar.gz
11023 my ($prefix,$grp_regexp,$postfix) =
11024 $rpl =~ /^( [^(]* ) # Prefix - e.g. {%%
11025 ( \(.*\) )? # Group capture regexp - e.g (.*)
11026 ( [^)]* )$ # Postfix - e.g }
11027 /xs;
11028 $grp_regexp ||= '';
11029 my $rplval = $Global::rpl{$rpl};
11030 while(s{( (?: ^|\257> ) (?: [^\257]*|[\257][^<>] )*? )
11031 # Don't replace after \257 unless \257>
11032 \Q$prefix\E $grp_regexp \Q$postfix\E}
11034 # The start remains the same
11035 my $unchanged = $1;
11036 # Dummy entry to start at 1.
11037 my @grp = (1);
11038 # $2 = first ()-group in $grp_regexp
11039 # Put $2 in $grp[1], Put $3 in $grp[2]
11040 # so first ()-group in $grp_regexp is $grp[1];
11041 for(my $i = 2; defined $grp[$#grp]; $i++) {
11042 push @grp, eval '$'.$i;
11044 my $rv = $rplval;
11045 # replace $$1 with $_pAr_gRp1, $$2 with $_pAr_gRp2
11046 # in the code to be executed
11047 $rv =~ s/\$\$ (\d+)/\$_pAr_gRp$1/gx;
11048 # prepend with $_pAr_gRp1 = perlquote($1),
11049 my $set_args = "";
11050 for(my $i = 1;defined $grp[$i]; $i++) {
11051 $set_args .= "\$_pAr_gRp$i = \"" .
11052 ::perl_quote_scalar($grp[$i]) . "\";";
11054 $unchanged . "\257<" . $set_args . $rv . "\257>"
11055 }gxes) {
11057 # Do the same for the positional replacement strings
11058 $posrpl = $rpl;
11059 if($posrpl =~ s/^\{//) {
11060 # Only do this if the shorthand start with {
11061 $prefix=~s/^\{//;
11062 # Don't replace after \257 unless \257>
11063 while(s{( (?: ^|\257> ) (?: [^\257]*|[\257][^<>] )*? )
11064 \{(-?\d+) \s* \Q$prefix\E $grp_regexp \Q$postfix\E}
11066 # The start remains the same
11067 my $unchanged = $1;
11068 my $position = $2;
11069 # Dummy entry to start at 1.
11070 my @grp = (1);
11071 # $3 = first ()-group in $grp_regexp
11072 # Put $3 in $grp[1], Put $4 in $grp[2]
11073 # so first ()-group in $grp_regexp is $grp[1];
11074 for(my $i = 3; defined $grp[$#grp]; $i++) {
11075 push @grp, eval '$'.$i;
11077 my $rv = $rplval;
11078 # replace $$1 with $_pAr_gRp1, $$2 with $_pAr_gRp2
11079 # in the code to be executed
11080 $rv =~ s/\$\$ (\d+)/\$_pAr_gRp$1/gx;
11081 # prepend with $_pAr_gRp1 = perlquote($1),
11082 my $set_args = "";
11083 for(my $i = 1;defined $grp[$i]; $i++) {
11084 $set_args .= "\$_pAr_gRp$i = \"" .
11085 ::perl_quote_scalar($grp[$i]) . "\";";
11087 $unchanged . "\257<" . $position . $set_args . $rv . "\257>"
11088 }gxes) {
11094 # Add {} if no replacement strings in @command
11095 ($replacecount_ref, $len_ref, @command) =
11096 replacement_counts_and_lengths($transfer_files,$return_files,@command);
11097 if("@command" =~ /^[^ \t\n=]*\257</) {
11098 # Replacement string is (part of) the command (and not just
11099 # argument or variable definition V1={})
11100 # E.g. parallel {}, parallel my_{= s/_//=}, parallel {2}
11101 # Do no quote (Otherwise it will fail if the input contains spaces)
11102 $Global::quote_replace = 0;
11105 if($opt::sqlmaster and $Global::sql->append()) {
11106 $seq = $Global::sql->max_seq() + 1;
11109 return bless {
11110 'unget' => \@unget,
11111 'command' => \@command,
11112 'replacecount' => $replacecount_ref,
11113 'arg_queue' => RecordQueue->new($read_from,$opt::colsep),
11114 'context_replace' => $context_replace,
11115 'len' => $len_ref,
11116 'max_number_of_args' => $max_number_of_args,
11117 'size' => undef,
11118 'transfer_files' => $transfer_files,
11119 'return_files' => $return_files,
11120 'seq' => $seq,
11121 }, ref($class) || $class;
11124 sub merge_rpl_parts($) {
11125 # '{=' 'perlexpr' '=}' => '{= perlexpr =}'
11126 # Input:
11127 # @in = the @command as given by the user
11128 # Uses:
11129 # $Global::parensleft
11130 # $Global::parensright
11131 # Returns:
11132 # @command with parts merged to keep {= and =} as one
11133 my @in = @_;
11134 my @out;
11135 my $l = quotemeta($Global::parensleft);
11136 my $r = quotemeta($Global::parensright);
11138 while(@in) {
11139 my $s = shift @in;
11140 $_ = $s;
11141 # Remove matching (right most) parens
11142 while(s/(.*)$l.*?$r/$1/os) {}
11143 if(/$l/o) {
11144 # Missing right parens
11145 while(@in) {
11146 $s .= " ".shift @in;
11147 $_ = $s;
11148 while(s/(.*)$l.*?$r/$1/os) {}
11149 if(not /$l/o) {
11150 last;
11154 push @out, $s;
11156 return @out;
11159 sub replacement_counts_and_lengths($$@) {
11160 # Count the number of different replacement strings.
11161 # Find the lengths of context for context groups and non-context
11162 # groups.
11163 # If no {} found in @command: add it to @command
11165 # Input:
11166 # \@transfer_files = array of filenames to transfer
11167 # \@return_files = array of filenames to return
11168 # @command = command template
11169 # Output:
11170 # \%replacecount, \%len, @command
11171 my $transfer_files = shift;
11172 my $return_files = shift;
11173 my @command = @_;
11174 my (%replacecount,%len);
11175 my $sum = 0;
11176 while($sum == 0) {
11177 # Count how many times each replacement string is used
11178 my @cmd = @command;
11179 my $contextlen = 0;
11180 my $noncontextlen = 0;
11181 my $contextgroups = 0;
11182 for my $c (@cmd) {
11183 while($c =~ s/ \257<( (?: [^\257]*|[\257][^<>] )*?)\257> /\000/xs) {
11184 # %replacecount = { "perlexpr" => number of times seen }
11185 # e.g { "s/a/b/" => 2 }
11186 $replacecount{$1}++;
11187 $sum++;
11189 # Measure the length of the context around the {= perl expr =}
11190 # Use that {=...=} has been replaced with \000 above
11191 # So there is no need to deal with \257<
11192 while($c =~ s/ (\S*\000\S*) //xs) {
11193 my $w = $1;
11194 $w =~ tr/\000//d; # Remove all \000's
11195 $contextlen += length($w);
11196 $contextgroups++;
11198 # All {= perl expr =} have been removed: The rest is non-context
11199 $noncontextlen += length $c;
11201 for(@$transfer_files, @$return_files,
11202 $opt::tagstring, $opt::workdir, $opt::results, $opt::retries) {
11203 # Options that can contain replacement strings
11204 $_ or next;
11205 my $t = $_;
11206 while($t =~ s/ \257<( (?: [^\257]*|[\257][^<>] )* )\257> //xs) {
11207 # %replacecount = { "perlexpr" => number of times seen }
11208 # e.g { "$_++" => 2 }
11209 # But for tagstring we just need to mark it as seen
11210 $replacecount{$1} ||= 1;
11213 if($opt::bar) {
11214 # If the command does not contain {} force it to be computed
11215 # as it is being used by --bar
11216 $replacecount{""} ||= 1;
11219 $len{'context'} = 0+$contextlen;
11220 $len{'noncontext'} = $noncontextlen;
11221 $len{'contextgroups'} = $contextgroups;
11222 $len{'noncontextgroups'} = @cmd-$contextgroups;
11223 ::debug("length", "@command Context: ", $len{'context'},
11224 " Non: ", $len{'noncontext'}, " Ctxgrp: ", $len{'contextgroups'},
11225 " NonCtxGrp: ", $len{'noncontextgroups'}, "\n");
11226 if($sum == 0) {
11227 if(not @command) {
11228 # Default command = {}
11229 @command = ("\257<\257>");
11230 } elsif(($opt::pipe or $opt::pipepart)
11231 and not $opt::fifo and not $opt::cat) {
11232 # With --pipe / --pipe-part you can have no replacement
11233 last;
11234 } else {
11235 # Append {} to the command if there are no {...}'s and no {=...=}
11236 push @command, ("\257<\257>");
11240 return(\%replacecount,\%len,@command);
11243 sub get($) {
11244 my $self = shift;
11245 if(@{$self->{'unget'}}) {
11246 my $cmd_line = shift @{$self->{'unget'}};
11247 return ($cmd_line);
11248 } else {
11249 if($opt::sqlworker) {
11250 # Get the sequence number from the SQL table
11251 $self->set_seq($SQL::next_seq);
11252 # Get the command from the SQL table
11253 $self->{'command'} = $SQL::command_ref;
11254 my @command;
11255 # Recompute replace counts based on the read command
11256 ($self->{'replacecount'},
11257 $self->{'len'}, @command) =
11258 replacement_counts_and_lengths($self->{'transfer_files'},
11259 $self->{'return_files'},
11260 @$SQL::command_ref);
11261 if("@command" =~ /^[^ \t\n=]*\257</) {
11262 # Replacement string is (part of) the command (and not just
11263 # argument or variable definition V1={})
11264 # E.g. parallel {}, parallel my_{= s/_//=}, parallel {2}
11265 # Do no quote (Otherwise it will fail if the input contains spaces)
11266 $Global::quote_replace = 0;
11270 my $cmd_line = CommandLine->new($self->seq(),
11271 $self->{'command'},
11272 $self->{'arg_queue'},
11273 $self->{'context_replace'},
11274 $self->{'max_number_of_args'},
11275 $self->{'transfer_files'},
11276 $self->{'return_files'},
11277 $self->{'replacecount'},
11278 $self->{'len'},
11280 $cmd_line->populate();
11281 ::debug("run","cmd_line->number_of_args ",
11282 $cmd_line->number_of_args(), "\n");
11283 if(not $Global::no_more_input and ($opt::pipe or $opt::pipepart)) {
11284 if($cmd_line->replaced() eq "") {
11285 # Empty command - pipe requires a command
11286 ::error("--pipe/--pipepart must have a command to pipe into ".
11287 "(e.g. 'cat').");
11288 ::wait_and_exit(255);
11290 } elsif($cmd_line->number_of_args() == 0) {
11291 # We did not get more args - maybe at EOF string?
11292 return undef;
11294 $self->set_seq($self->seq()+1);
11295 return $cmd_line;
11299 sub unget($) {
11300 my $self = shift;
11301 unshift @{$self->{'unget'}}, @_;
11304 sub empty($) {
11305 my $self = shift;
11306 my $empty = (not @{$self->{'unget'}}) &&
11307 $self->{'arg_queue'}->empty();
11308 ::debug("run", "CommandLineQueue->empty $empty");
11309 return $empty;
11312 sub seq($) {
11313 my $self = shift;
11314 return $self->{'seq'};
11317 sub set_seq($$) {
11318 my $self = shift;
11319 $self->{'seq'} = shift;
11322 sub quote_args($) {
11323 my $self = shift;
11324 # If there is not command emulate |bash
11325 return $self->{'command'};
11329 package Limits::Command;
11331 # Maximal command line length (for -m and -X)
11332 sub max_length($) {
11333 # Find the max_length of a command line and cache it
11334 # Returns:
11335 # number of chars on the longest command line allowed
11336 if(not $Limits::Command::line_max_len) {
11337 # Disk cache of max command line length
11338 my $len_cache = $Global::cache_dir . "/tmp/sshlogin/" . ::hostname() .
11339 "/linelen";
11340 my $cached_limit;
11341 if(-e $len_cache) {
11342 open(my $fh, "<", $len_cache) || ::die_bug("Cannot read $len_cache");
11343 $cached_limit = <$fh>;
11344 close $fh;
11345 } else {
11346 $cached_limit = real_max_length();
11347 # If $HOME is write protected: Do not fail
11348 my $dir = ::dirname($len_cache);
11349 -d $dir or eval { File::Path::mkpath($dir); };
11350 open(my $fh, ">", $len_cache);
11351 print $fh $cached_limit;
11352 close $fh;
11354 $Limits::Command::line_max_len = tmux_length($cached_limit);
11355 if($opt::max_chars) {
11356 if($opt::max_chars <= $cached_limit) {
11357 $Limits::Command::line_max_len = $opt::max_chars;
11358 } else {
11359 ::warning("Value for -s option should be < $cached_limit.");
11363 return int($Limits::Command::line_max_len);
11366 sub real_max_length($) {
11367 # Find the max_length of a command line
11368 # Returns:
11369 # The maximal command line length
11370 # Use an upper bound of 100 MB if the shell allows for infinite long lengths
11371 my $upper = 100_000_000;
11372 # 1000 is supported everywhere, so the search can start anywhere 1..999
11373 # 324 makes the search much faster on CygWin, so let us use that
11374 my $len = 324;
11375 do {
11376 if($len > $upper) { return $len };
11377 $len *= 16;
11378 } while (is_acceptable_command_line_length($len));
11379 # Then search for the actual max length between 0 and upper bound
11380 return binary_find_max_length(int($len/16),$len);
11383 # Prototype forwarding
11384 sub binary_find_max_length($$);
11385 sub binary_find_max_length($$) {
11386 # Given a lower and upper bound find the max_length of a command line
11387 # Returns:
11388 # number of chars on the longest command line allowed
11389 my ($lower, $upper) = (@_);
11390 if($lower == $upper or $lower == $upper-1) { return $lower; }
11391 my $middle = int (($upper-$lower)/2 + $lower);
11392 ::debug("init", "Maxlen: $lower<$middle<$upper: ");
11393 if (is_acceptable_command_line_length($middle)) {
11394 return binary_find_max_length($middle,$upper);
11395 } else {
11396 return binary_find_max_length($lower,$middle);
11400 sub is_acceptable_command_line_length($) {
11401 # Test if a command line of this length can run
11402 # in the current environment
11403 # Returns:
11404 # 0 if the command line length is too long
11405 # 1 otherwise
11406 my $len = shift;
11407 if($Global::parallel_env) {
11408 $len += length $Global::parallel_env;
11410 ::qqx("true "."x"x$len);
11411 ::debug("init", "$len=$? ");
11412 return not $?;
11415 sub tmux_length($) {
11416 # If $opt::tmux set, find the limit for tmux
11417 # tmux 1.8 has a 2kB limit
11418 # tmux 1.9 has a 16kB limit
11419 # tmux 2.0 has a 16kB limit
11420 # tmux 2.1 has a 16kB limit
11421 # tmux 2.2 has a 16kB limit
11422 # Input:
11423 # $len = maximal command line length
11424 # Returns:
11425 # $tmux_len = maximal length runable in tmux
11426 local $/ = "\n";
11427 my $len = shift;
11428 if($opt::tmux) {
11429 $ENV{'PARALLEL_TMUX'} ||= "tmux";
11430 if(not ::which($ENV{'PARALLEL_TMUX'})) {
11431 ::error($ENV{'PARALLEL_TMUX'}." not found in \$PATH.");
11432 ::wait_and_exit(255);
11434 my @out;
11435 for my $l (1, 2020, 16320, 100000, $len) {
11436 my $tmpfile = ::tmpname("tms");
11437 my $tmuxcmd = $ENV{'PARALLEL_TMUX'}.
11438 " -S $tmpfile new-session -d -n echo $l".
11439 ("x"x$l). " && echo $l; rm -f $tmpfile";
11440 push @out, ::qqx($tmuxcmd);
11441 ::rm($tmpfile);
11443 ::debug("tmux","tmux-out ",@out);
11444 chomp @out;
11445 # The arguments is given 3 times on the command line
11446 # and the wrapping is around 30 chars
11447 # (29 for tmux1.9, 33 for tmux1.8)
11448 my $tmux_len = ::max(@out);
11449 $len = ::min($len,int($tmux_len/4-33));
11450 ::debug("tmux","tmux-length ",$len);
11452 return $len;
11456 package RecordQueue;
11458 sub new($) {
11459 my $class = shift;
11460 my $fhs = shift;
11461 my $colsep = shift;
11462 my @unget = ();
11463 my $arg_sub_queue;
11464 if($opt::sqlworker) {
11465 # Open SQL table
11466 $arg_sub_queue = SQLRecordQueue->new();
11467 } elsif(defined $colsep) {
11468 # Open one file with colsep or CSV
11469 $arg_sub_queue = RecordColQueue->new($fhs);
11470 } else {
11471 # Open one or more files if multiple -a
11472 $arg_sub_queue = MultifileQueue->new($fhs);
11474 return bless {
11475 'unget' => \@unget,
11476 'arg_number' => 0,
11477 'arg_sub_queue' => $arg_sub_queue,
11478 }, ref($class) || $class;
11481 sub get($) {
11482 # Returns:
11483 # reference to array of Arg-objects
11484 my $self = shift;
11485 if(@{$self->{'unget'}}) {
11486 $self->{'arg_number'}++;
11487 # Flush cached computed replacements in Arg-objects
11488 # To fix: parallel --bar echo {%} ::: a b c ::: d e f
11489 my $ret = shift @{$self->{'unget'}};
11490 if($ret) {
11491 map { $_->flush_cache() } @$ret;
11493 return $ret;
11495 my $ret = $self->{'arg_sub_queue'}->get();
11496 if($ret) {
11497 if(grep { index($_->orig(),"\0") > 0 } @$ret) {
11498 # Allow for \0 in position 0 because GNU Parallel uses "\0noarg"
11499 # to mean no-string
11500 ::warning("A NUL character in the input was replaced with \\0.",
11501 "NUL cannot be passed through in the argument list.",
11502 "Did you mean to use the --null option?");
11503 for(grep { index($_->orig(),"\0") > 0 } @$ret) {
11504 # Replace \0 with \\0
11505 my $a = $_->orig();
11506 $a =~ s/\0/\\0/g;
11507 $_->set_orig($a);
11510 if(defined $Global::max_number_of_args
11511 and $Global::max_number_of_args == 0) {
11512 ::debug("run", "Read 1 but return 0 args\n");
11513 # \0noarg => nothing (not the empty string)
11514 map { $_->set_orig("\0noarg"); } @$ret;
11516 # Flush cached computed replacements in Arg-objects
11517 # To fix: parallel --bar echo {%} ::: a b c ::: d e f
11518 map { $_->flush_cache() } @$ret;
11520 return $ret;
11523 sub unget($) {
11524 my $self = shift;
11525 ::debug("run", "RecordQueue-unget\n");
11526 $self->{'arg_number'} -= @_;
11527 unshift @{$self->{'unget'}}, @_;
11530 sub empty($) {
11531 my $self = shift;
11532 my $empty = (not @{$self->{'unget'}}) &&
11533 $self->{'arg_sub_queue'}->empty();
11534 ::debug("run", "RecordQueue->empty $empty");
11535 return $empty;
11538 sub arg_number($) {
11539 my $self = shift;
11540 return $self->{'arg_number'};
11544 package RecordColQueue;
11546 sub new($) {
11547 my $class = shift;
11548 my $fhs = shift;
11549 my @unget = ();
11550 my $arg_sub_queue = MultifileQueue->new($fhs);
11551 return bless {
11552 'unget' => \@unget,
11553 'arg_sub_queue' => $arg_sub_queue,
11554 }, ref($class) || $class;
11557 sub get($) {
11558 # Returns:
11559 # reference to array of Arg-objects
11560 my $self = shift;
11561 if(@{$self->{'unget'}}) {
11562 return shift @{$self->{'unget'}};
11564 my $unget_ref = $self->{'unget'};
11565 if($self->{'arg_sub_queue'}->empty()) {
11566 return undef;
11568 my $in_record = $self->{'arg_sub_queue'}->get();
11569 if(defined $in_record) {
11570 my @out_record = ();
11571 for my $arg (@$in_record) {
11572 ::debug("run", "RecordColQueue::arg $arg\n");
11573 my $line = $arg->orig();
11574 ::debug("run", "line='$line'\n");
11575 if($line ne "") {
11576 if($opt::csv) {
11577 # Parse CSV
11578 chomp $line;
11579 if(not $Global::csv->parse($line)) {
11580 die "CSV has unexpected format: ^$line^";
11582 for($Global::csv->fields()) {
11583 push @out_record, Arg->new($_);
11585 } else {
11586 for my $s (split /$opt::colsep/o, $line, -1) {
11587 push @out_record, Arg->new($s);
11590 } else {
11591 push @out_record, Arg->new("");
11594 return \@out_record;
11595 } else {
11596 return undef;
11600 sub unget($) {
11601 my $self = shift;
11602 ::debug("run", "RecordColQueue-unget '@_'\n");
11603 unshift @{$self->{'unget'}}, @_;
11606 sub empty($) {
11607 my $self = shift;
11608 my $empty = (not @{$self->{'unget'}}) &&
11609 $self->{'arg_sub_queue'}->empty();
11610 ::debug("run", "RecordColQueue->empty $empty");
11611 return $empty;
11615 package SQLRecordQueue;
11617 sub new($) {
11618 my $class = shift;
11619 my @unget = ();
11620 return bless {
11621 'unget' => \@unget,
11622 }, ref($class) || $class;
11625 sub get($) {
11626 # Returns:
11627 # reference to array of Arg-objects
11628 my $self = shift;
11629 if(@{$self->{'unget'}}) {
11630 return shift @{$self->{'unget'}};
11632 return $Global::sql->get_record();
11635 sub unget($) {
11636 my $self = shift;
11637 ::debug("run", "SQLRecordQueue-unget '@_'\n");
11638 unshift @{$self->{'unget'}}, @_;
11641 sub empty($) {
11642 my $self = shift;
11643 if(@{$self->{'unget'}}) { return 0; }
11644 my $get = $self->get();
11645 if(defined $get) {
11646 $self->unget($get);
11648 my $empty = not $get;
11649 ::debug("run", "SQLRecordQueue->empty $empty");
11650 return $empty;
11654 package MultifileQueue;
11656 @Global::unget_argv=();
11658 sub new($$) {
11659 my $class = shift;
11660 my $fhs = shift;
11661 for my $fh (@$fhs) {
11662 if(-t $fh and -t ($Global::status_fd || *STDERR)) {
11663 ::warning("Input is read from the terminal. You are either an expert",
11664 "(in which case: YOU ARE AWESOME!) or maybe you forgot",
11665 "::: or :::: or -a or to pipe data into parallel. If so",
11666 "consider going through the tutorial: man parallel_tutorial",
11667 "Press CTRL-D to exit.");
11670 return bless {
11671 'unget' => \@Global::unget_argv,
11672 'fhs' => $fhs,
11673 'arg_matrix' => undef,
11674 }, ref($class) || $class;
11677 sub get($) {
11678 my $self = shift;
11679 if($opt::link) {
11680 return $self->link_get();
11681 } else {
11682 return $self->nest_get();
11686 sub unget($) {
11687 my $self = shift;
11688 ::debug("run", "MultifileQueue-unget '@_'\n");
11689 unshift @{$self->{'unget'}}, @_;
11692 sub empty($) {
11693 my $self = shift;
11694 my $empty = (not @Global::unget_argv) &&
11695 not @{$self->{'unget'}};
11696 for my $fh (@{$self->{'fhs'}}) {
11697 $empty &&= eof($fh);
11699 ::debug("run", "MultifileQueue->empty $empty ");
11700 return $empty;
11703 sub link_get($) {
11704 my $self = shift;
11705 if(@{$self->{'unget'}}) {
11706 return shift @{$self->{'unget'}};
11708 my @record = ();
11709 my $prepend;
11710 my $empty = 1;
11711 for my $fh (@{$self->{'fhs'}}) {
11712 my $arg = read_arg_from_fh($fh);
11713 if(defined $arg) {
11714 # Record $arg for recycling at end of file
11715 push @{$self->{'arg_matrix'}{$fh}}, $arg;
11716 push @record, $arg;
11717 $empty = 0;
11718 } else {
11719 ::debug("run", "EOA ");
11720 # End of file: Recycle arguments
11721 push @{$self->{'arg_matrix'}{$fh}}, shift @{$self->{'arg_matrix'}{$fh}};
11722 # return last @{$args->{'args'}{$fh}};
11723 push @record, @{$self->{'arg_matrix'}{$fh}}[-1];
11726 if($empty) {
11727 return undef;
11728 } else {
11729 return \@record;
11733 sub nest_get($) {
11734 my $self = shift;
11735 if(@{$self->{'unget'}}) {
11736 return shift @{$self->{'unget'}};
11738 my @record = ();
11739 my $prepend;
11740 my $empty = 1;
11741 my $no_of_inputsources = $#{$self->{'fhs'}} + 1;
11742 if(not $self->{'arg_matrix'}) {
11743 # Initialize @arg_matrix with one arg from each file
11744 # read one line from each file
11745 my @first_arg_set;
11746 my $all_empty = 1;
11747 for (my $fhno = 0; $fhno < $no_of_inputsources ; $fhno++) {
11748 my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]);
11749 if(defined $arg) {
11750 $all_empty = 0;
11752 $self->{'arg_matrix'}[$fhno][0] = $arg || Arg->new("");
11753 push @first_arg_set, $self->{'arg_matrix'}[$fhno][0];
11755 if($all_empty) {
11756 # All filehandles were at eof or eof-string
11757 return undef;
11759 return [@first_arg_set];
11762 # Treat the case with one input source special. For multiple
11763 # input sources we need to remember all previously read values to
11764 # generate all combinations. But for one input source we can
11765 # forget the value after first use.
11766 if($no_of_inputsources == 1) {
11767 my $arg = read_arg_from_fh($self->{'fhs'}[0]);
11768 if(defined($arg)) {
11769 return [$arg];
11771 return undef;
11773 for (my $fhno = $no_of_inputsources - 1; $fhno >= 0; $fhno--) {
11774 if(eof($self->{'fhs'}[$fhno])) {
11775 next;
11776 } else {
11777 # read one
11778 my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]);
11779 defined($arg) || next; # If we just read an EOF string: Treat this as EOF
11780 my $len = $#{$self->{'arg_matrix'}[$fhno]} + 1;
11781 $self->{'arg_matrix'}[$fhno][$len] = $arg;
11782 # make all new combinations
11783 my @combarg = ();
11784 for (my $fhn = 0; $fhn < $no_of_inputsources; $fhn++) {
11785 push(@combarg, [0, $#{$self->{'arg_matrix'}[$fhn]}],
11786 # Is input source --link'ed to the next?
11787 $opt::linkinputsource[$fhn+1]);
11789 # Find only combinations with this new entry
11790 $combarg[2*$fhno] = [$len,$len];
11791 # map combinations
11792 # [ 1, 3, 7 ], [ 2, 4, 1 ]
11793 # =>
11794 # [ m[0][1], m[1][3], m[2][7] ], [ m[0][2], m[1][4], m[2][1] ]
11795 my @mapped;
11796 for my $c (expand_combinations(@combarg)) {
11797 my @a;
11798 for my $n (0 .. $no_of_inputsources - 1 ) {
11799 push @a, $self->{'arg_matrix'}[$n][$$c[$n]];
11801 push @mapped, \@a;
11803 # append the mapped to the ungotten arguments
11804 push @{$self->{'unget'}}, @mapped;
11805 # get the first
11806 if(@mapped) {
11807 return shift @{$self->{'unget'}};
11811 # all are eof or at EOF string; return from the unget queue
11812 return shift @{$self->{'unget'}};
11815 sub read_arg_from_fh($) {
11816 # Read one Arg from filehandle
11817 # Returns:
11818 # Arg-object with one read line
11819 # undef if end of file
11820 my $fh = shift;
11821 my $prepend;
11822 my $arg;
11823 my $half_record = 0;
11824 do {{
11825 # This makes 10% faster
11826 if(not defined ($arg = <$fh>)) {
11827 if(defined $prepend) {
11828 return Arg->new($prepend);
11829 } else {
11830 return undef;
11833 if($opt::csv) {
11834 # We need to read a full CSV line.
11835 if(($arg =~ y/"/"/) % 2 ) {
11836 # The number of " on the line is uneven:
11837 # If we were in a half_record => we have a full record now
11838 # If we were ouside a half_record => we are in a half record now
11839 $half_record = not $half_record;
11841 if($half_record) {
11842 # CSV half-record with quoting:
11843 # col1,"col2 2""x3"" board newline <-this one
11844 # cont",col3
11845 $prepend .= $arg;
11846 redo;
11847 } else {
11848 # Now we have a full CSV record
11851 # Remove delimiter
11852 chomp $arg;
11853 if($Global::end_of_file_string and
11854 $arg eq $Global::end_of_file_string) {
11855 # Ignore the rest of input file
11856 close $fh;
11857 ::debug("run", "EOF-string ($arg) met\n");
11858 if(defined $prepend) {
11859 return Arg->new($prepend);
11860 } else {
11861 return undef;
11864 if(defined $prepend) {
11865 $arg = $prepend.$arg; # For line continuation
11866 undef $prepend;
11868 if($Global::ignore_empty) {
11869 if($arg =~ /^\s*$/) {
11870 redo; # Try the next line
11873 if($Global::max_lines) {
11874 if($arg =~ /\s$/) {
11875 # Trailing space => continued on next line
11876 $prepend = $arg;
11877 redo;
11880 }} while (1 == 0); # Dummy loop {{}} for redo
11881 if(defined $arg) {
11882 return Arg->new($arg);
11883 } else {
11884 ::die_bug("multiread arg undefined");
11888 # Prototype forwarding
11889 sub expand_combinations(@);
11890 sub expand_combinations(@) {
11891 # Input:
11892 # ([xmin,xmax], [ymin,ymax], ...)
11893 # Returns: ([x,y,...],[x,y,...])
11894 # where xmin <= x <= xmax and ymin <= y <= ymax
11895 my $minmax_ref = shift;
11896 my $link = shift; # This is linked to the next input source
11897 my $xmin = $$minmax_ref[0];
11898 my $xmax = $$minmax_ref[1];
11899 my @p;
11900 if(@_) {
11901 my @rest = expand_combinations(@_);
11902 if($link) {
11903 # Linked to next col with --link/:::+/::::+
11904 # TODO BUG does not wrap values if not same number of vals
11905 push(@p, map { [$$_[0], @$_] }
11906 grep { $xmin <= $$_[0] and $$_[0] <= $xmax } @rest);
11907 } else {
11908 # If there are more columns: Compute those recursively
11909 for(my $x = $xmin; $x <= $xmax; $x++) {
11910 push @p, map { [$x, @$_] } @rest;
11913 } else {
11914 for(my $x = $xmin; $x <= $xmax; $x++) {
11915 push @p, [$x];
11918 return @p;
11922 package Arg;
11924 sub new($) {
11925 my $class = shift;
11926 my $orig = shift;
11927 my @hostgroups;
11928 if($opt::hostgroups) {
11929 if($orig =~ s:@(.+)::) {
11930 # We found hostgroups on the arg
11931 @hostgroups = split(/\+/, $1);
11932 if(not grep { defined $Global::hostgroups{$_} } @hostgroups) {
11933 # This hostgroup is not defined using -S
11934 # Add it
11935 ::warning("Adding hostgroups: @hostgroups");
11936 # Add sshlogin
11937 for(grep { not defined $Global::hostgroups{$_} } @hostgroups) {
11938 my $sshlogin = SSHLogin->new($_);
11939 my $sshlogin_string = $sshlogin->string();
11940 $Global::host{$sshlogin_string} = $sshlogin;
11941 $Global::hostgroups{$sshlogin_string} = 1;
11944 } else {
11945 # No hostgroup on the arg => any hostgroup
11946 @hostgroups = (keys %Global::hostgroups);
11949 return bless {
11950 'orig' => $orig,
11951 'hostgroups' => \@hostgroups,
11952 }, ref($class) || $class;
11955 sub Q($) {
11956 # Q alias for ::shell_quote_scalar
11957 my $ret = ::Q($_[0]);
11958 no warnings 'redefine';
11959 *Q = \&::Q;
11960 return $ret;
11963 sub pQ($) {
11964 # pQ alias for ::perl_quote_scalar
11965 my $ret = ::pQ($_[0]);
11966 no warnings 'redefine';
11967 *pQ = \&::pQ;
11968 return $ret;
11971 sub total_jobs() {
11972 return $Global::JobQueue->total_jobs();
11976 my %perleval;
11977 my $job;
11978 sub skip() {
11979 # shorthand for $job->skip();
11980 $job->skip();
11982 sub slot() {
11983 # shorthand for $job->slot();
11984 $job->slot();
11986 sub seq() {
11987 # shorthand for $job->seq();
11988 $job->seq();
11990 sub uq() {
11991 # Do not quote this arg
11992 $Global::unquote_arg = 1;
11995 sub replace($$$$) {
11996 # Calculates the corresponding value for a given perl expression
11997 # Returns:
11998 # The calculated string (quoted if asked for)
11999 my $self = shift;
12000 my $perlexpr = shift; # E.g. $_=$_ or s/.gz//
12001 my $quote = shift; # should the string be quoted?
12002 # This is actually a CommandLine-object,
12003 # but it looks nice to be able to say {= $job->slot() =}
12004 $job = shift;
12005 $perlexpr =~ s/^(-?\d+)? *//; # Positional replace treated as normal replace
12006 if(not $Global::cache_replacement_eval
12008 not $self->{'cache'}{$perlexpr}) {
12009 # Only compute the value once
12010 # Use $_ as the variable to change
12011 local $_;
12012 if($Global::trim eq "n") {
12013 $_ = $self->{'orig'};
12014 } else {
12015 # Trim the input
12016 $_ = trim_of($self->{'orig'});
12018 ::debug("replace", "eval ", $perlexpr, " ", $_, "\n");
12019 if(not $perleval{$perlexpr}) {
12020 # Make an anonymous function of the $perlexpr
12021 # And more importantly: Compile it only once
12022 if($perleval{$perlexpr} =
12023 eval('sub { no strict; no warnings; my $job = shift; '.
12024 $perlexpr.' }')) {
12025 # All is good
12026 } else {
12027 # The eval failed. Maybe $perlexpr is invalid perl?
12028 ::error("Cannot use $perlexpr: $@");
12029 ::wait_and_exit(255);
12032 # Execute the function
12033 $perleval{$perlexpr}->($job);
12034 $self->{'cache'}{$perlexpr} = $_;
12035 if($Global::unquote_arg) {
12036 # uq() was called in perlexpr
12037 $self->{'cache'}{'unquote'}{$perlexpr} = 1;
12038 # Reset for next perlexpr
12039 $Global::unquote_arg = 0;
12042 # Return the value quoted if needed
12043 if($self->{'cache'}{'unquote'}{$perlexpr}) {
12044 return($self->{'cache'}{$perlexpr});
12045 } else {
12046 return($quote ? Q($self->{'cache'}{$perlexpr})
12047 : $self->{'cache'}{$perlexpr});
12052 sub flush_cache($) {
12053 # Flush cache of computed values
12054 my $self = shift;
12055 $self->{'cache'} = undef;
12058 sub orig($) {
12059 my $self = shift;
12060 return $self->{'orig'};
12063 sub set_orig($$) {
12064 my $self = shift;
12065 $self->{'orig'} = shift;
12068 sub trim_of($) {
12069 # Removes white space as specifed by --trim:
12070 # n = nothing
12071 # l = start
12072 # r = end
12073 # lr|rl = both
12074 # Returns:
12075 # string with white space removed as needed
12076 my @strings = map { defined $_ ? $_ : "" } (@_);
12077 my $arg;
12078 if($Global::trim eq "n") {
12079 # skip
12080 } elsif($Global::trim eq "l") {
12081 for my $arg (@strings) { $arg =~ s/^\s+//; }
12082 } elsif($Global::trim eq "r") {
12083 for my $arg (@strings) { $arg =~ s/\s+$//; }
12084 } elsif($Global::trim eq "rl" or $Global::trim eq "lr") {
12085 for my $arg (@strings) { $arg =~ s/^\s+//; $arg =~ s/\s+$//; }
12086 } else {
12087 ::error("--trim must be one of: r l rl lr.");
12088 ::wait_and_exit(255);
12090 return wantarray ? @strings : "@strings";
12094 package TimeoutQueue;
12096 sub new($) {
12097 my $class = shift;
12098 my $delta_time = shift;
12099 my ($pct);
12100 if($delta_time =~ /(\d+(\.\d+)?)%/) {
12101 # Timeout in percent
12102 $pct = $1/100;
12103 $delta_time = 1_000_000;
12105 $delta_time = ::multiply_time_units($delta_time);
12107 return bless {
12108 'queue' => [],
12109 'delta_time' => $delta_time,
12110 'pct' => $pct,
12111 'remedian_idx' => 0,
12112 'remedian_arr' => [],
12113 'remedian' => undef,
12114 }, ref($class) || $class;
12117 sub delta_time($) {
12118 my $self = shift;
12119 return $self->{'delta_time'};
12122 sub set_delta_time($$) {
12123 my $self = shift;
12124 $self->{'delta_time'} = shift;
12127 sub remedian($) {
12128 my $self = shift;
12129 return $self->{'remedian'};
12132 sub set_remedian($$) {
12133 # Set median of the last 999^3 (=997002999) values using Remedian
12135 # Rousseeuw, Peter J., and Gilbert W. Bassett Jr. "The remedian: A
12136 # robust averaging method for large data sets." Journal of the
12137 # American Statistical Association 85.409 (1990): 97-104.
12138 my $self = shift;
12139 my $val = shift;
12140 my $i = $self->{'remedian_idx'}++;
12141 my $rref = $self->{'remedian_arr'};
12142 $rref->[0][$i%999] = $val;
12143 $rref->[1][$i/999%999] = (sort @{$rref->[0]})[$#{$rref->[0]}/2];
12144 $rref->[2][$i/999/999%999] = (sort @{$rref->[1]})[$#{$rref->[1]}/2];
12145 $self->{'remedian'} = (sort @{$rref->[2]})[$#{$rref->[2]}/2];
12148 sub update_median_runtime($) {
12149 # Update delta_time based on runtime of finished job if timeout is
12150 # a percentage
12151 my $self = shift;
12152 my $runtime = shift;
12153 if($self->{'pct'}) {
12154 $self->set_remedian($runtime);
12155 $self->{'delta_time'} = $self->{'pct'} * $self->remedian();
12156 ::debug("run", "Timeout: $self->{'delta_time'}s ");
12160 sub process_timeouts($) {
12161 # Check if there was a timeout
12162 my $self = shift;
12163 # $self->{'queue'} is sorted by start time
12164 while (@{$self->{'queue'}}) {
12165 my $job = $self->{'queue'}[0];
12166 if($job->endtime()) {
12167 # Job already finished. No need to timeout the job
12168 # This could be because of --keep-order
12169 shift @{$self->{'queue'}};
12170 } elsif($job->is_timedout($self->{'delta_time'})) {
12171 # Need to shift off queue before kill
12172 # because kill calls usleep that calls process_timeouts
12173 shift @{$self->{'queue'}};
12174 ::warning("This job was killed because it timed out:",
12175 $job->replaced());
12176 $job->kill();
12177 } else {
12178 # Because they are sorted by start time the rest are later
12179 last;
12184 sub insert($) {
12185 my $self = shift;
12186 my $in = shift;
12187 push @{$self->{'queue'}}, $in;
12191 package SQL;
12193 sub new($) {
12194 my $class = shift;
12195 my $dburl = shift;
12196 $Global::use{"DBI"} ||= eval "use DBI; 1;";
12197 # +DBURL = append to this DBURL
12198 my $append = $dburl=~s/^\+//;
12199 my %options = parse_dburl(get_alias($dburl));
12200 my %driveralias = ("sqlite" => "SQLite",
12201 "sqlite3" => "SQLite",
12202 "pg" => "Pg",
12203 "postgres" => "Pg",
12204 "postgresql" => "Pg",
12205 "csv" => "CSV",
12206 "oracle" => "Oracle",
12207 "ora" => "Oracle");
12208 my $driver = $driveralias{$options{'databasedriver'}} ||
12209 $options{'databasedriver'};
12210 my $database = $options{'database'};
12211 my $host = $options{'host'} ? ";host=".$options{'host'} : "";
12212 my $port = $options{'port'} ? ";port=".$options{'port'} : "";
12213 my $dsn = "DBI:$driver:dbname=$database$host$port";
12214 my $userid = $options{'user'};
12215 my $password = $options{'password'};;
12216 if(not grep /$driver/, DBI->available_drivers) {
12217 ::error("$driver not supported. Are you missing a perl DBD::$driver module?");
12218 ::wait_and_exit(255);
12220 my $dbh = DBI->connect($dsn, $userid, $password,
12221 { RaiseError => 1, AutoInactiveDestroy => 1 })
12222 or die $DBI::errstr;
12224 $dbh->{'PrintWarn'} = $Global::debug || 0;
12225 $dbh->{'PrintError'} = $Global::debug || 0;
12226 $dbh->{'RaiseError'} = 1;
12227 $dbh->{'ShowErrorStatement'} = 1;
12228 $dbh->{'HandleError'} = sub {};
12230 if(not defined $options{'table'}) {
12231 ::error("The DBURL ($dburl) must contain a table.");
12232 ::wait_and_exit(255);
12235 return bless {
12236 'dbh' => $dbh,
12237 'driver' => $driver,
12238 'max_number_of_args' => undef,
12239 'table' => $options{'table'},
12240 'append' => $append,
12241 }, ref($class) || $class;
12244 # Prototype forwarding
12245 sub get_alias($);
12246 sub get_alias($) {
12247 my $alias = shift;
12248 $alias =~ s/^(sql:)*//; # Accept aliases prepended with sql:
12249 if ($alias !~ /^:/) {
12250 return $alias;
12253 # Find the alias
12254 my $path;
12255 if (-l $0) {
12256 ($path) = readlink($0) =~ m|^(.*)/|;
12257 } else {
12258 ($path) = $0 =~ m|^(.*)/|;
12261 my @deprecated = ("$ENV{HOME}/.dburl.aliases",
12262 "$path/dburl.aliases", "$path/dburl.aliases.dist");
12263 for (@deprecated) {
12264 if(-r $_) {
12265 ::warning("$_ is deprecated. ".
12266 "Use .sql/aliases instead (read man sql).");
12269 my @urlalias=();
12270 check_permissions("$ENV{HOME}/.sql/aliases");
12271 check_permissions("$ENV{HOME}/.dburl.aliases");
12272 my @search = ("$ENV{HOME}/.sql/aliases",
12273 "$ENV{HOME}/.dburl.aliases", "/etc/sql/aliases",
12274 "$path/dburl.aliases", "$path/dburl.aliases.dist");
12275 for my $alias_file (@search) {
12276 # local $/ needed if -0 set
12277 local $/ = "\n";
12278 if(-r $alias_file) {
12279 open(my $in, "<", $alias_file) || die;
12280 push @urlalias, <$in>;
12281 close $in;
12284 my ($alias_part,$rest) = $alias=~/(:\w*)(.*)/;
12285 # If we saw this before: we have an alias loop
12286 if(grep {$_ eq $alias_part } @Private::seen_aliases) {
12287 ::error("$alias_part is a cyclic alias.");
12288 exit -1;
12289 } else {
12290 push @Private::seen_aliases, $alias_part;
12293 my $dburl;
12294 for (@urlalias) {
12295 /^$alias_part\s+(\S+.*)/ and do { $dburl = $1; last; }
12298 if($dburl) {
12299 return get_alias($dburl.$rest);
12300 } else {
12301 ::error("$alias is not defined in @search");
12302 exit(-1);
12306 sub check_permissions($) {
12307 my $file = shift;
12309 if(-e $file) {
12310 if(not -o $file) {
12311 my $username = (getpwuid($<))[0];
12312 ::warning("$file should be owned by $username: ".
12313 "chown $username $file");
12315 my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
12316 $atime,$mtime,$ctime,$blksize,$blocks) = stat($file);
12317 if($mode & 077) {
12318 my $username = (getpwuid($<))[0];
12319 ::warning("$file should be only be readable by $username: ".
12320 "chmod 600 $file");
12325 sub parse_dburl($) {
12326 my $url = shift;
12327 my %options = ();
12328 # sql:mysql://[[user][:password]@][host][:port]/[database[/table][?query]]
12330 if($url=~m!^(?:sql:)? # You can prefix with 'sql:'
12331 ((?:oracle|ora|mysql|pg|postgres|postgresql)(?:s|ssl|)|
12332 (?:sqlite|sqlite2|sqlite3|csv)):// # Databasedriver ($1)
12334 ([^:@/][^:@]*|) # Username ($2)
12336 :([^@]*) # Password ($3)
12339 ([^:/]*)? # Hostname ($4)
12342 ([^/]*)? # Port ($5)
12346 ([^/?]*)? # Database ($6)
12350 ([^?]*)? # Table ($7)
12354 (.*)? # Query ($8)
12356 $!ix) {
12357 $options{databasedriver} = ::undef_if_empty(lc(uri_unescape($1)));
12358 $options{user} = ::undef_if_empty(uri_unescape($2));
12359 $options{password} = ::undef_if_empty(uri_unescape($3));
12360 $options{host} = ::undef_if_empty(uri_unescape($4));
12361 $options{port} = ::undef_if_empty(uri_unescape($5));
12362 $options{database} = ::undef_if_empty(uri_unescape($6));
12363 $options{table} = ::undef_if_empty(uri_unescape($7));
12364 $options{query} = ::undef_if_empty(uri_unescape($8));
12365 ::debug("sql", "dburl $url\n");
12366 ::debug("sql", "databasedriver ", $options{databasedriver},
12367 " user ", $options{user},
12368 " password ", $options{password}, " host ", $options{host},
12369 " port ", $options{port}, " database ", $options{database},
12370 " table ", $options{table}, " query ", $options{query}, "\n");
12371 } else {
12372 ::error("$url is not a valid DBURL");
12373 exit 255;
12375 return %options;
12378 sub uri_unescape($) {
12379 # Copied from http://cpansearch.perl.org/src/GAAS/URI-1.55/URI/Escape.pm
12380 # to avoid depending on URI::Escape
12381 # This section is (C) Gisle Aas.
12382 # Note from RFC1630: "Sequences which start with a percent sign
12383 # but are not followed by two hexadecimal characters are reserved
12384 # for future extension"
12385 my $str = shift;
12386 if (@_ && wantarray) {
12387 # not executed for the common case of a single argument
12388 my @str = ($str, @_); # need to copy
12389 foreach (@str) {
12390 s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
12392 return @str;
12394 $str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str;
12395 $str;
12398 sub run($) {
12399 my $self = shift;
12400 my $stmt = shift;
12401 if($self->{'driver'} eq "CSV") {
12402 $stmt=~ s/;$//;
12403 if($stmt eq "BEGIN" or
12404 $stmt eq "COMMIT") {
12405 return undef;
12408 my @retval;
12409 my $dbh = $self->{'dbh'};
12410 ::debug("sql","$opt::sqlmaster$opt::sqlworker run $stmt\n");
12411 # Execute with the rest of the args - if any
12412 my $rv;
12413 my $sth;
12414 my $lockretry = 0;
12415 while($lockretry < 10) {
12416 $sth = $dbh->prepare($stmt);
12417 if($sth
12419 eval { $rv = $sth->execute(@_) }) {
12420 last;
12421 } else {
12422 if($@ =~ /no such table|Table .* doesn.t exist|relation ".*" does not exist/
12424 $DBI::errstr =~ /no such table|Table .* doesn.t exist|relation ".*" does not exist/) {
12425 # This is fine:
12426 # It is just a worker that reported back too late -
12427 # another worker had finished the job first
12428 # and the table was then dropped
12429 $rv = $sth = 0;
12430 last;
12432 if($DBI::errstr =~ /locked/) {
12433 ::debug("sql", "Lock retry: $lockretry");
12434 $lockretry++;
12435 ::usleep(rand()*300);
12436 } elsif(not $sth) {
12437 # Try again
12438 $lockretry++;
12439 } else {
12440 ::error($DBI::errstr);
12441 ::wait_and_exit(255);
12445 if($lockretry >= 10) {
12446 ::die_bug("retry > 10: $DBI::errstr");
12448 if($rv < 0 and $DBI::errstr){
12449 ::error($DBI::errstr);
12450 ::wait_and_exit(255);
12452 return $sth;
12455 sub get($) {
12456 my $self = shift;
12457 my $sth = $self->run(@_);
12458 my @retval;
12459 # If $sth = 0 it means the table was dropped by another process
12460 while($sth) {
12461 my @row = $sth->fetchrow_array();
12462 @row or last;
12463 push @retval, \@row;
12465 return \@retval;
12468 sub table($) {
12469 my $self = shift;
12470 return $self->{'table'};
12473 sub append($) {
12474 my $self = shift;
12475 return $self->{'append'};
12478 sub update($) {
12479 my $self = shift;
12480 my $stmt = shift;
12481 my $table = $self->table();
12482 $self->run("UPDATE $table $stmt",@_);
12485 sub output($) {
12486 my $self = shift;
12487 my $commandline = shift;
12489 $self->update("SET Stdout = ?, Stderr = ? WHERE Seq = ".
12490 $commandline->seq(),
12491 join("",@{$commandline->{'output'}{1}}),
12492 join("",@{$commandline->{'output'}{2}}));
12495 sub max_number_of_args($) {
12496 # Maximal number of args for this table
12497 my $self = shift;
12498 if(not $self->{'max_number_of_args'}) {
12499 # Read the number of args from the SQL table
12500 my $table = $self->table();
12501 my $v = $self->get("SELECT * FROM $table LIMIT 1;");
12502 my @reserved_columns = qw(Seq Host Starttime JobRuntime Send
12503 Receive Exitval _Signal Command Stdout Stderr);
12504 if(not $v) {
12505 ::error("$table contains no records");
12507 # Count the number of Vx columns
12508 $self->{'max_number_of_args'} = $#{$v->[0]} - $#reserved_columns;
12510 return $self->{'max_number_of_args'};
12513 sub set_max_number_of_args($$) {
12514 my $self = shift;
12515 $self->{'max_number_of_args'} = shift;
12518 sub create_table($) {
12519 my $self = shift;
12520 if($self->append()) { return; }
12521 my $max_number_of_args = shift;
12522 $self->set_max_number_of_args($max_number_of_args);
12523 my $table = $self->table();
12524 $self->run(qq(DROP TABLE IF EXISTS $table;));
12525 # BIGINT and TEXT are not supported in these databases or are too small
12526 my %vartype = (
12527 "Oracle" => { "BIGINT" => "NUMBER(19,0)",
12528 "TEXT" => "CLOB", },
12529 "mysql" => { "TEXT" => "LONGTEXT", },
12530 "CSV" => { "BIGINT" => "INT",
12531 "FLOAT" => "REAL", },
12533 my $BIGINT = $vartype{$self->{'driver'}}{"BIGINT"} || "BIGINT";
12534 my $TEXT = $vartype{$self->{'driver'}}{"TEXT"} || "TEXT";
12535 my $FLOAT = $vartype{$self->{'driver'}}{"FLOAT"} || "FLOAT(44)";
12536 my $v_def = join "", map { "V$_ $TEXT," } (1..$self->max_number_of_args());
12537 $self->run(qq{CREATE TABLE $table
12538 (Seq $BIGINT,
12539 Host $TEXT,
12540 Starttime $FLOAT,
12541 JobRuntime $FLOAT,
12542 Send $BIGINT,
12543 Receive $BIGINT,
12544 Exitval $BIGINT,
12545 _Signal $BIGINT,
12546 Command $TEXT,}.
12547 $v_def.
12548 qq{Stdout $TEXT,
12549 Stderr $TEXT);});
12552 sub insert_records($) {
12553 my $self = shift;
12554 my $seq = shift;
12555 my $command_ref = shift;
12556 my $record_ref = shift;
12557 my $table = $self->table();
12558 # For SQL encode the command with \257 space as split points
12559 my $command = join("\257 ",@$command_ref);
12560 my @v_cols = map { ", V$_" } (1..$self->max_number_of_args());
12561 # Two extra value due to $seq, Exitval, Send
12562 my $v_vals = join ",", map { "?" } (1..$self->max_number_of_args()+4);
12563 $self->run("INSERT INTO $table (Seq,Command,Exitval,Send @v_cols) ".
12564 "VALUES ($v_vals);", $seq, $command, -1000,
12565 0, @$record_ref[1..$#$record_ref]);
12568 sub get_record($) {
12569 my $self = shift;
12570 my @retval;
12571 my $table = $self->table();
12572 my @v_cols = map { ", V$_" } (1..$self->max_number_of_args());
12573 my $v = $self->get("SELECT Seq, Command @v_cols FROM $table ".
12574 "WHERE Exitval = -1000 ORDER BY Seq LIMIT 1;");
12575 if($v->[0]) {
12576 my $val_ref = $v->[0];
12577 # Mark record as taken
12578 my $seq = shift @$val_ref;
12579 # Save the sequence number to use when running the job
12580 $SQL::next_seq = $seq;
12581 $self->update("SET Exitval = ? WHERE Seq = ".$seq, -1220);
12582 my @command = split /\257 /, shift @$val_ref;
12583 $SQL::command_ref = \@command;
12584 for (@$val_ref) {
12585 push @retval, Arg->new($_);
12588 if(@retval) {
12589 return \@retval;
12590 } else {
12591 return undef;
12595 sub total_jobs($) {
12596 my $self = shift;
12597 my $table = $self->table();
12598 my $v = $self->get("SELECT count(*) FROM $table;");
12599 if($v->[0]) {
12600 return $v->[0]->[0];
12601 } else {
12602 ::die_bug("SQL::total_jobs");
12606 sub max_seq($) {
12607 my $self = shift;
12608 my $table = $self->table();
12609 my $v = $self->get("SELECT max(Seq) FROM $table;");
12610 if($v->[0]) {
12611 return $v->[0]->[0];
12612 } else {
12613 ::die_bug("SQL::max_seq");
12617 sub finished($) {
12618 # Check if there are any jobs left in the SQL table that do not
12619 # have a "real" exitval
12620 my $self = shift;
12621 if($opt::wait or $Global::start_sqlworker) {
12622 my $table = $self->table();
12623 my $rv = $self->get("select Seq,Exitval from $table ".
12624 "where Exitval <= -1000 limit 1");
12625 return not $rv->[0];
12626 } else {
12627 return 1;
12631 package Semaphore;
12633 # This package provides a counting semaphore
12635 # If a process dies without releasing the semaphore the next process
12636 # that needs that entry will clean up dead semaphores
12638 # The semaphores are stored in $PARALLEL_HOME/semaphores/id-<name> Each
12639 # file in $PARALLEL_HOME/semaphores/id-<name>/ is the process ID of the
12640 # process holding the entry. If the process dies, the entry can be
12641 # taken by another process.
12643 sub new($) {
12644 my $class = shift;
12645 my $id = shift;
12646 my $count = shift;
12647 $id =~ s/([^-_a-z0-9])/unpack("H*",$1)/ige; # Convert non-word chars to hex
12648 $id = "id-".$id; # To distinguish it from a process id
12649 my $parallel_locks = $Global::cache_dir . "/semaphores";
12650 -d $parallel_locks or ::mkdir_or_die($parallel_locks);
12651 my $lockdir = "$parallel_locks/$id";
12653 my $lockfile = $lockdir.".lock";
12654 if($count < 1) { ::die_bug("semaphore-count: $count"); }
12655 return bless {
12656 'lockfile' => $lockfile,
12657 'lockfh' => Symbol::gensym(),
12658 'lockdir' => $lockdir,
12659 'id' => $id,
12660 'idfile' => $lockdir."/".$id,
12661 'pid' => $$,
12662 'pidfile' => $lockdir."/".$$.'@'.::hostname(),
12663 'count' => $count + 1 # nlinks returns a link for the 'id-' as well
12664 }, ref($class) || $class;
12667 sub remove_dead_locks($) {
12668 my $self = shift;
12669 my $lockdir = $self->{'lockdir'};
12671 for my $d (glob "$lockdir/*") {
12672 $d =~ m:$lockdir/([0-9]+)\@([-\._a-z0-9]+)$:o or next;
12673 my ($pid, $host) = ($1, $2);
12674 if($host eq ::hostname()) {
12675 if(kill 0, $pid) {
12676 ::debug("sem", "Alive: $pid $d\n");
12677 } else {
12678 ::debug("sem", "Dead: $d\n");
12679 ::rm($d);
12685 sub acquire($) {
12686 my $self = shift;
12687 my $sleep = 1; # 1 ms
12688 my $start_time = time;
12689 while(1) {
12690 # Can we get a lock?
12691 $self->atomic_link_if_count_less_than() and last;
12692 $self->remove_dead_locks();
12693 # Retry slower and slower up to 1 second
12694 $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep);
12695 # Random to avoid every sleeping job waking up at the same time
12696 ::usleep(rand()*$sleep);
12697 if($opt::semaphoretimeout) {
12698 if($opt::semaphoretimeout > 0
12700 time - $start_time > $opt::semaphoretimeout) {
12701 # Timeout: Take the semaphore anyway
12702 ::warning("Semaphore timed out. Stealing the semaphore.");
12703 if(not -e $self->{'idfile'}) {
12704 open (my $fh, ">", $self->{'idfile'}) or
12705 ::die_bug("timeout_write_idfile: $self->{'idfile'}");
12706 close $fh;
12708 link $self->{'idfile'}, $self->{'pidfile'};
12709 last;
12711 if($opt::semaphoretimeout < 0
12713 time - $start_time > -$opt::semaphoretimeout) {
12714 # Timeout: Exit
12715 ::warning("Semaphore timed out. Exiting.");
12716 exit(1);
12717 last;
12721 ::debug("sem", "acquired $self->{'pid'}\n");
12724 sub release($) {
12725 my $self = shift;
12726 ::rm($self->{'pidfile'});
12727 if($self->nlinks() == 1) {
12728 # This is the last link, so atomic cleanup
12729 $self->lock();
12730 if($self->nlinks() == 1) {
12731 ::rm($self->{'idfile'});
12732 rmdir $self->{'lockdir'};
12734 $self->unlock();
12736 ::debug("run", "released $self->{'pid'}\n");
12739 sub pid_change($) {
12740 # This should do what release()+acquire() would do without having
12741 # to re-acquire the semaphore
12742 my $self = shift;
12744 my $old_pidfile = $self->{'pidfile'};
12745 $self->{'pid'} = $$;
12746 $self->{'pidfile'} = $self->{'lockdir'}."/".$$.'@'.::hostname();
12747 my $retval = link $self->{'idfile'}, $self->{'pidfile'};
12748 ::debug("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n");
12749 ::rm($old_pidfile);
12752 sub atomic_link_if_count_less_than($) {
12753 # Link $file1 to $file2 if nlinks to $file1 < $count
12754 my $self = shift;
12755 my $retval = 0;
12756 $self->lock();
12757 my $nlinks = $self->nlinks();
12758 ::debug("sem","$nlinks<$self->{'count'} ");
12759 if($nlinks < $self->{'count'}) {
12760 -d $self->{'lockdir'} or ::mkdir_or_die($self->{'lockdir'});
12761 if(not -e $self->{'idfile'}) {
12762 open (my $fh, ">", $self->{'idfile'}) or
12763 ::die_bug("write_idfile: $self->{'idfile'}");
12764 close $fh;
12766 $retval = link $self->{'idfile'}, $self->{'pidfile'};
12767 ::debug("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n");
12769 $self->unlock();
12770 ::debug("sem", "atomic $retval");
12771 return $retval;
12774 sub nlinks($) {
12775 my $self = shift;
12776 if(-e $self->{'idfile'}) {
12777 return (stat(_))[3];
12778 } else {
12779 return 0;
12783 sub lock($) {
12784 my $self = shift;
12785 my $sleep = 100; # 100 ms
12786 my $total_sleep = 0;
12787 $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
12788 my $locked = 0;
12789 while(not $locked) {
12790 if(tell($self->{'lockfh'}) == -1) {
12791 # File not open
12792 open($self->{'lockfh'}, ">", $self->{'lockfile'})
12793 or ::debug("run", "Cannot open $self->{'lockfile'}");
12795 if($self->{'lockfh'}) {
12796 # File is open
12797 chmod 0666, $self->{'lockfile'}; # assuming you want it a+rw
12798 if(flock($self->{'lockfh'}, LOCK_EX()|LOCK_NB())) {
12799 # The file is locked: No need to retry
12800 $locked = 1;
12801 last;
12802 } else {
12803 if ($! =~ m/Function not implemented/) {
12804 ::warning("flock: $!",
12805 "Will wait for a random while.");
12806 ::usleep(rand(5000));
12807 # File cannot be locked: No need to retry
12808 $locked = 2;
12809 last;
12813 # Locking failed in first round
12814 # Sleep and try again
12815 $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep);
12816 # Random to avoid every sleeping job waking up at the same time
12817 ::usleep(rand()*$sleep);
12818 $total_sleep += $sleep;
12819 if($opt::semaphoretimeout) {
12820 if($opt::semaphoretimeout > 0
12822 $total_sleep/1000 > $opt::semaphoretimeout) {
12823 # Timeout: Take the semaphore anyway
12824 ::warning("Semaphore timed out. Taking the semaphore.");
12825 $locked = 3;
12826 last;
12828 if($opt::semaphoretimeout < 0
12830 $total_sleep/1000 > -$opt::semaphoretimeout) {
12831 # Timeout: Exit
12832 ::warning("Semaphore timed out. Exiting.");
12833 $locked = 4;
12834 last;
12836 } else {
12837 if($total_sleep/1000 > 30) {
12838 ::warning("Semaphore stuck for 30 seconds. ".
12839 "Consider using --semaphoretimeout.");
12843 ::debug("run", "locked $self->{'lockfile'}");
12846 sub unlock($) {
12847 my $self = shift;
12848 ::rm($self->{'lockfile'});
12849 close $self->{'lockfh'};
12850 ::debug("run", "unlocked\n");
12853 # Keep perl -w happy
12855 $opt::x = $Semaphore::timeout = $Semaphore::wait =
12856 $Job::file_descriptor_warning_printed = $Global::envdef = @Arg::arg =
12857 $Global::max_slot_number = $opt::session;
12859 package main;
12861 sub main() {
12862 save_stdin_stdout_stderr();
12863 save_original_signal_handler();
12864 parse_options();
12865 ::debug("init", "Open file descriptors: ", join(" ",keys %Global::fd), "\n");
12866 my $number_of_args;
12867 if($Global::max_number_of_args) {
12868 $number_of_args = $Global::max_number_of_args;
12869 } elsif ($opt::X or $opt::m or $opt::xargs) {
12870 $number_of_args = undef;
12871 } else {
12872 $number_of_args = 1;
12875 my @command = @ARGV;
12876 my @input_source_fh;
12877 if($opt::pipepart) {
12878 if($opt::tee) {
12879 @input_source_fh = map { open_or_exit($_) } @opt::a;
12880 # Remove the first: It will be the file piped.
12881 shift @input_source_fh;
12882 if(not @input_source_fh and not $opt::pipe) {
12883 @input_source_fh = (*STDIN);
12885 } else {
12886 # -a is used for data - not for command line args
12887 @input_source_fh = map { open_or_exit($_) } "/dev/null";
12889 } else {
12890 @input_source_fh = map { open_or_exit($_) } @opt::a;
12891 if(not @input_source_fh and not $opt::pipe) {
12892 @input_source_fh = (*STDIN);
12895 if($opt::sqlmaster) {
12896 # Create SQL table to hold joblog + output
12897 $Global::sql->create_table($#input_source_fh+1);
12898 if($opt::sqlworker) {
12899 # Start a real --sqlworker in the background later
12900 $Global::start_sqlworker = 1;
12901 $opt::sqlworker = undef;
12905 if($opt::skip_first_line) {
12906 # Skip the first line for the first file handle
12907 my $fh = $input_source_fh[0];
12908 <$fh>;
12911 set_input_source_header(\@command,\@input_source_fh);
12912 if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) {
12913 # Parallel check all hosts are up. Remove hosts that are down
12914 filter_hosts();
12917 if($opt::nonall or $opt::onall) {
12918 onall(\@input_source_fh,@command);
12919 wait_and_exit(min(undef_as_zero($Global::exitstatus),254));
12922 $Global::JobQueue = JobQueue->new(
12923 \@command,\@input_source_fh,$Global::ContextReplace,
12924 $number_of_args,\@Global::transfer_files,\@Global::ret_files);
12926 if($opt::pipepart) {
12927 pipepart_setup();
12928 } elsif($opt::pipe and $opt::tee) {
12929 pipe_tee_setup();
12930 } elsif($opt::pipe and $opt::shard or $opt::bin) {
12931 pipe_shard_setup();
12934 if(not $opt::pipepart and $opt::groupby) {
12935 group_by_stdin_filter();
12937 if($opt::eta or $opt::bar or $opt::shuf or $Global::halt_pct) {
12938 # Count the number of jobs or shuffle all jobs
12939 # before starting any.
12940 # Must be done after ungetting any --pipepart jobs.
12941 $Global::JobQueue->total_jobs();
12943 # Compute $Global::max_jobs_running
12944 # Must be done after ungetting any --pipepart jobs.
12945 max_jobs_running();
12946 init_run_jobs();
12947 my $sem;
12948 if($Global::semaphore) {
12949 $sem = acquire_semaphore();
12951 $SIG{TERM} = $Global::original_sig{TERM};
12952 $SIG{HUP} = \&start_no_new_jobs;
12954 if($opt::tee or $opt::shard or $opt::bin) {
12955 # All jobs must be running in parallel for --tee/--shard/--bin
12956 while(start_more_jobs()) {}
12957 $Global::start_no_new_jobs = 1;
12958 if(not $Global::JobQueue->empty()) {
12959 if($opt::tee) {
12960 ::error("--tee requires --jobs to be higher. Try --jobs 0.");
12961 } elsif($opt::bin) {
12962 ::error("--bin requires --jobs to be higher than the number of",
12963 "arguments. Increase --jobs.");
12964 } elsif($opt::shard) {
12965 ::error("--shard requires --jobs to be higher than the number of",
12966 "arguments. Increase --jobs.");
12967 } else {
12968 ::die_bug("--bin/--shard/--tee should not get here");
12970 ::wait_and_exit(255);
12972 } elsif($opt::pipe and not $opt::pipepart) {
12973 # Fill all jobslots
12974 while(start_more_jobs()) {}
12975 spreadstdin();
12976 } else {
12977 # Reap one - start one
12978 while(reaper() + start_more_jobs()) {}
12980 ::debug("init", "Start draining\n");
12981 drain_job_queue(@command);
12982 ::debug("init", "Done draining\n");
12983 reapers();
12984 ::debug("init", "Done reaping\n");
12985 if($Global::semaphore) {
12986 $sem->release();
12988 cleanup();
12989 ::debug("init", "Halt\n");
12990 halt();
12993 main();