Fixed bug #59146: Support --termseq for remote jobs.
[parallel.git] / src / parallel
blob3f719392d375d0d353d24037cc56a68d3c33c802
1 #!/usr/bin/env perl
3 # Copyright (C) 2007-2020 Ole Tange, http://ole.tange.dk and Free
4 # Software Foundation, Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, see <http://www.gnu.org/licenses/>
18 # or write to the Free Software Foundation, Inc., 51 Franklin St,
19 # Fifth Floor, Boston, MA 02110-1301 USA
21 # open3 used in Job::start
22 use IPC::Open3;
23 use POSIX;
24 # gensym used in Job::start
25 use Symbol qw(gensym);
26 # tempfile used in Job::start
27 use File::Temp qw(tempfile tempdir);
28 # mkpath used in openresultsfile
29 use File::Path;
30 # GetOptions used in get_options_from_array
31 use Getopt::Long;
32 # Used to ensure code quality
33 use strict;
34 use File::Basename;
36 sub set_input_source_header($$) {
37 my ($command_ref,$input_source_fh_ref) = @_;
38 if($opt::header and not $opt::pipe) {
39 # split with colsep or \t
40 # $header force $colsep = \t if undef?
41 my $delimiter = defined $opt::colsep ? $opt::colsep : "\t";
42 # regexp for {=
43 my $left = "\Q$Global::parensleft\E";
44 my $l = $Global::parensleft;
45 # regexp for =}
46 my $right = "\Q$Global::parensright\E";
47 my $r = $Global::parensright;
48 my $id = 1;
49 for my $fh (@$input_source_fh_ref) {
50 my $line = <$fh>;
51 chomp($line);
52 $line =~ s/\r$//;
53 ::debug("init", "Delimiter: '$delimiter'");
54 for my $s (split /$delimiter/o, $line) {
55 ::debug("init", "Colname: '$s'");
56 # Replace {colname} with {2}
57 for(@$command_ref, @Global::ret_files,
58 @Global::transfer_files, $opt::tagstring,
59 $opt::workdir, $opt::results, $opt::retries) {
60 # Skip if undefined
61 $_ or next;
62 s:\{$s(|/|//|\.|/\.)\}:\{$id$1\}:g;
63 # {=header1 ... =} => {=1 ... =}
64 s:$left $s (.*?) $right:$l$id$1$r:gx;
66 $Global::input_source_header{$id} = $s;
67 $id++;
70 } else {
71 my $id = 1;
72 for my $fh (@$input_source_fh_ref) {
73 $Global::input_source_header{$id} = $id;
74 $id++;
79 sub max_jobs_running() {
80 # Compute $Global::max_jobs_running as the max number of jobs
81 # running on each sshlogin.
82 # Returns:
83 # $Global::max_jobs_running
84 if(not $Global::max_jobs_running) {
85 for my $sshlogin (values %Global::host) {
86 $sshlogin->max_jobs_running();
89 if(not $Global::max_jobs_running) {
90 ::error("Cannot run any jobs.");
91 wait_and_exit(255);
93 return $Global::max_jobs_running;
96 sub halt() {
97 # Compute exit value,
98 # wait for children to complete
99 # and exit
100 if($opt::halt and $Global::halt_when ne "never") {
101 if(not defined $Global::halt_exitstatus) {
102 if($Global::halt_pct) {
103 $Global::halt_exitstatus =
104 ::ceil($Global::total_failed /
105 ($Global::total_started || 1) * 100);
106 } elsif($Global::halt_count) {
107 $Global::halt_exitstatus =
108 ::min(undef_as_zero($Global::total_failed),101);
111 wait_and_exit($Global::halt_exitstatus);
112 } else {
113 wait_and_exit(min(undef_as_zero($Global::exitstatus),101));
118 sub __PIPE_MODE__() {}
121 sub pipepart_setup() {
122 # Compute the blocksize
123 # Generate the commands to extract the blocks
124 # Push the commands on queue
125 # Changes:
126 # @Global::cat_prepends
127 # $Global::JobQueue
128 if($opt::tee) {
129 # Prepend each command with
130 # < file
131 my $cat_string = "< ".Q($opt::a[0]);
132 for(1..$Global::JobQueue->total_jobs()) {
133 push @Global::cat_appends, $cat_string;
134 push @Global::cat_prepends, "";
136 } else {
137 if(not $opt::blocksize) {
138 # --blocksize with 10 jobs per jobslot
139 $opt::blocksize = -10;
141 if($opt::roundrobin) {
142 # --blocksize with 1 job per jobslot
143 $opt::blocksize = -1;
145 if($opt::blocksize < 0) {
146 my $size = 0;
147 # Compute size of -a
148 for(@opt::a) {
149 if(-f $_) {
150 $size += -s $_;
151 } elsif(-b $_) {
152 $size += size_of_block_dev($_);
153 } elsif(-e $_) {
154 ::error("$_ is neither a file nor a block device");
155 wait_and_exit(255);
156 } else {
157 ::error("File not found: $_");
158 wait_and_exit(255);
161 # Run in total $job_slots*(- $blocksize) jobs
162 # Set --blocksize = size / no of proc / (- $blocksize)
163 $Global::dummy_jobs = 1;
164 $Global::blocksize = 1 +
165 int($size / max_jobs_running() /
166 -multiply_binary_prefix($opt::blocksize));
168 @Global::cat_prepends = map { pipe_part_files($_) } @opt::a;
169 # Unget the empty arg as many times as there are parts
170 $Global::JobQueue->{'commandlinequeue'}{'arg_queue'}->unget(
171 map { [Arg->new("\0noarg")] } @Global::cat_prepends
176 sub pipe_tee_setup() {
177 # Create temporary fifos
178 # Run 'tee fifo1 fifo2 fifo3 ... fifoN' in the background
179 # This will spread the input to fifos
180 # Generate commands that reads from fifo1..N:
181 # cat fifo | user_command
182 # Changes:
183 # @Global::cat_prepends
184 my @fifos;
185 for(1..$Global::JobQueue->total_jobs()) {
186 push @fifos, tmpfifo();
188 # cat foo | tee fifo1 fifo2 fifo3 fifo4 fifo5 > /dev/null
189 if(not fork()){
190 # Test if tee supports --output-error=warn-nopipe
191 `echo | tee --output-error=warn-nopipe /dev/null >/dev/null 2>/dev/null`;
192 my $opt = $? ? "" : "--output-error=warn-nopipe";
193 ::debug("init","tee $opt");
194 # Let tee inherit our stdin
195 # and redirect stdout to null
196 open STDOUT, ">","/dev/null";
197 if($opt) {
198 exec "tee", $opt, @fifos;
199 } else {
200 exec "tee", @fifos;
203 # For each fifo
204 # (rm fifo1; grep 1) < fifo1
205 # (rm fifo2; grep 2) < fifo2
206 # (rm fifo3; grep 3) < fifo3
207 # Remove the tmpfifo as soon as it is open
208 @Global::cat_prepends = map { "(rm $_;" } @fifos;
209 @Global::cat_appends = map { ") < $_" } @fifos;
213 sub parcat_script() {
214 # TODO if script fails: Use parallel -j0 --plain --lb cat ::: fifos
215 my $script = q'{
216 use POSIX qw(:errno_h);
217 use IO::Select;
218 use strict;
219 use threads;
220 use Thread::Queue;
221 use Fcntl qw(:DEFAULT :flock);
223 my $opened :shared;
224 my $q = Thread::Queue->new();
225 my $okq = Thread::Queue->new();
226 my @producers;
228 if(not @ARGV) {
229 if(-t *STDIN) {
230 print "Usage:\n";
231 print " parcat file(s)\n";
232 print " cat argfile | parcat\n";
233 } else {
234 # Read arguments from stdin
235 chomp(@ARGV = <STDIN>);
238 my $files_to_open = 0;
239 # Default: fd = stdout
240 my $fd = 1;
241 for (@ARGV) {
242 # --rm = remove file when opened
243 /^--rm$/ and do { $opt::rm = 1; next; };
244 # -1 = output to fd 1, -2 = output to fd 2
245 /^-(\d+)$/ and do { $fd = $1; next; };
246 push @producers, threads->create("producer", $_, $fd);
247 $files_to_open++;
250 sub producer {
251 # Open a file/fifo, set non blocking, enqueue fileno of the file handle
252 my $file = shift;
253 my $output_fd = shift;
254 open(my $fh, "<", $file) || do {
255 print STDERR "parcat: Cannot open $file\n";
256 exit(1);
258 # Remove file when it has been opened
259 if($opt::rm) {
260 unlink $file;
262 set_fh_non_blocking($fh);
263 $opened++;
264 # Pass the fileno to parent
265 $q->enqueue(fileno($fh),$output_fd);
266 # Get an OK that the $fh is opened and we can release the $fh
267 while(1) {
268 my $ok = $okq->dequeue();
269 if($ok == fileno($fh)) { last; }
270 # Not ours - very unlikely to happen
271 $okq->enqueue($ok);
273 return;
276 my $s = IO::Select->new();
277 my %buffer;
279 sub add_file {
280 my $infd = shift;
281 my $outfd = shift;
282 open(my $infh, "<&=", $infd) || die;
283 open(my $outfh, ">&=", $outfd) || die;
284 $s->add($infh);
285 # Tell the producer now opened here and can be released
286 $okq->enqueue($infd);
287 # Initialize the buffer
288 @{$buffer{$infh}{$outfd}} = ();
289 $Global::fh{$outfd} = $outfh;
292 sub add_files {
293 # Non-blocking dequeue
294 my ($infd,$outfd);
295 do {
296 ($infd,$outfd) = $q->dequeue_nb(2);
297 if(defined($outfd)) {
298 add_file($infd,$outfd);
300 } while(defined($outfd));
303 sub add_files_block {
304 # Blocking dequeue
305 my ($infd,$outfd) = $q->dequeue(2);
306 add_file($infd,$outfd);
310 my $fd;
311 my (@ready,$infh,$rv,$buf);
312 do {
313 # Wait until at least one file is opened
314 add_files_block();
315 while($q->pending or keys %buffer) {
316 add_files();
317 while(keys %buffer) {
318 @ready = $s->can_read(0.01);
319 if(not @ready) {
320 add_files();
322 for $infh (@ready) {
323 # There is only one key, namely the output file descriptor
324 for my $outfd (keys %{$buffer{$infh}}) {
325 $rv = sysread($infh, $buf, 65536);
326 if (!$rv) {
327 if($! == EAGAIN) {
328 # Would block: Nothing read
329 next;
330 } else {
331 # Nothing read, but would not block:
332 # This file is done
333 $s->remove($infh);
334 for(@{$buffer{$infh}{$outfd}}) {
335 syswrite($Global::fh{$outfd},$_);
337 delete $buffer{$infh};
338 # Closing the $infh causes it to block
339 # close $infh;
340 add_files();
341 next;
344 # Something read.
345 # Find \n or \r for full line
346 my $i = (rindex($buf,"\n")+1);
347 if($i) {
348 # Print full line
349 for(@{$buffer{$infh}{$outfd}}, substr($buf,0,$i)) {
350 syswrite($Global::fh{$outfd},$_);
352 # @buffer = remaining half line
353 $buffer{$infh}{$outfd} = [substr($buf,$i,$rv-$i)];
354 } else {
355 # Something read, but not a full line
356 push @{$buffer{$infh}{$outfd}}, $buf;
358 redo;
363 } while($opened < $files_to_open);
365 for (@producers) {
366 $_->join();
369 sub set_fh_non_blocking {
370 # Set filehandle as non-blocking
371 # Inputs:
372 # $fh = filehandle to be blocking
373 # Returns:
374 # N/A
375 my $fh = shift;
376 my $flags;
377 fcntl($fh, &F_GETFL, $flags) || die $!; # Get the current flags on the filehandle
378 $flags |= &O_NONBLOCK; # Add non-blocking to the flags
379 fcntl($fh, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle
382 return ::spacefree(3, $script);
385 sub sharder_script() {
386 my $script = q{
387 use B;
388 # Column separator
389 my $sep = shift;
390 # Which columns to shard on (count from 1)
391 my $col = shift;
392 # Which columns to shard on (count from 0)
393 my $col0 = $col - 1;
394 # Perl expression
395 my $perlexpr = shift;
396 my $bins = @ARGV;
397 # Open fifos for writing, fh{0..$bins}
398 my $t = 0;
399 my %fh;
400 for(@ARGV) {
401 open $fh{$t++}, ">", $_;
402 # open blocks until it is opened by reader
403 # so unlink only happens when it is ready
404 unlink $_;
406 if($perlexpr) {
407 my $subref = eval("sub { no strict; no warnings; $perlexpr }");
408 while(<STDIN>) {
409 # Split into $col columns (no need to split into more)
410 @F = split $sep, $_, $col+1;
412 local $_ = $F[$col0];
413 &$subref();
414 $fh = $fh{ hex(B::hash($_))%$bins };
416 print $fh $_;
418 } else {
419 while(<STDIN>) {
420 # Split into $col columns (no need to split into more)
421 @F = split $sep, $_, $col+1;
422 $fh = $fh{ hex(B::hash($F[$col0]))%$bins };
423 print $fh $_;
426 # Close all open fifos
427 close values %fh;
429 return ::spacefree(1, $script);
432 sub binner_script() {
433 my $script = q{
434 use B;
435 # Column separator
436 my $sep = shift;
437 # Which columns to shard on (count from 1)
438 my $col = shift;
439 # Which columns to shard on (count from 0)
440 my $col0 = $col - 1;
441 # Perl expression
442 my $perlexpr = shift;
443 my $bins = @ARGV;
444 # Open fifos for writing, fh{0..$bins}
445 my $t = 0;
446 my %fh;
447 # Let the last output fifo be the 0'th
448 open $fh{$t++}, ">", pop @ARGV;
449 for(@ARGV) {
450 open $fh{$t++}, ">", $_;
451 # open blocks until it is opened by reader
452 # so unlink only happens when it is ready
453 unlink $_;
455 if($perlexpr) {
456 my $subref = eval("sub { no strict; no warnings; $perlexpr }");
457 while(<STDIN>) {
458 # Split into $col columns (no need to split into more)
459 @F = split $sep, $_, $col+1;
461 local $_ = $F[$col0];
462 &$subref();
463 $fh = $fh{ $_%$bins };
465 print $fh $_;
467 } else {
468 while(<STDIN>) {
469 # Split into $col columns (no need to split into more)
470 @F = split $sep, $_, $col+1;
471 $fh = $fh{ $F[$col0]%$bins };
472 print $fh $_;
475 # Close all open fifos
476 close values %fh;
478 return ::spacefree(1, $script);
481 sub pipe_shard_setup() {
482 # Create temporary fifos
483 # Run 'shard.pl sep col fifo1 fifo2 fifo3 ... fifoN' in the background
484 # This will spread the input to fifos
485 # Generate commands that reads from fifo1..N:
486 # cat fifo | user_command
487 # Changes:
488 # @Global::cat_prepends
489 my @shardfifos;
490 my @parcatfifos;
491 # TODO $opt::jobs should be evaluated (100%)
492 # TODO $opt::jobs should be number of total_jobs if there are argugemts
493 my $njobs = $opt::jobs;
494 for my $m (0..$njobs-1) {
495 for my $n (0..$njobs-1) {
496 # sharding to A B C D
497 # parcatting all As together
498 $parcatfifos[$n][$m] = $shardfifos[$m][$n] = tmpfifo();
501 my $shardbin = ($opt::shard || $opt::bin);
502 my $script;
503 if($opt::bin) {
504 $script = binner_script();
505 } else {
506 $script = sharder_script();
509 # cat foo | sharder sep col fifo1 fifo2 fifo3 ... fifoN
511 if($shardbin =~ /^[a-z_][a-z_0-9]*(\s|$)/i) {
512 # Group by column name
513 # (Yes, this will also wrongly match a perlexpr like: chop)
514 my($read,$char,@line);
515 # A full line, but nothing more (the rest must be read by the child)
516 # $Global::header used to prepend block to each job
517 do {
518 $read = sysread(STDIN,$char,1);
519 push @line, $char;
520 } while($read and $char ne "\n");
521 $Global::header = join "", @line;
523 my ($col, $perlexpr, $subref) =
524 column_perlexpr($shardbin, $Global::header, $opt::colsep);
525 if(not fork()) {
526 # Let the sharder inherit our stdin
527 # and redirect stdout to null
528 open STDOUT, ">","/dev/null";
529 # The PERL_HASH_SEED must be the same for all sharders
530 # so B::hash will return the same value for any given input
531 $ENV{'PERL_HASH_SEED'} = $$;
532 exec qw(parallel --block 100k -q --pipe -j), $njobs,
533 qw(--roundrobin -u perl -e), $script, ($opt::colsep || ","),
534 $col, $perlexpr, '{}', (map { (':::+', @{$_}) } @parcatfifos);
536 # For each fifo
537 # (rm fifo1; grep 1) < fifo1
538 # (rm fifo2; grep 2) < fifo2
539 # (rm fifo3; grep 3) < fifo3
540 my $parcat = Q(parcat_script());
541 if(not $parcat) {
542 ::error("'parcat' must be in path.");
543 ::wait_and_exit(255);
545 @Global::cat_prepends = map { "perl -e $parcat @$_ | " } @parcatfifos;
548 sub pipe_part_files(@) {
549 # Given the bigfile
550 # find header and split positions
551 # make commands that 'cat's the partial file
552 # Input:
553 # $file = the file to read
554 # Returns:
555 # @commands that will cat_partial each part
556 my ($file) = @_;
557 my $buf = "";
558 if(not -f $file and not -b $file) {
559 ::error("$file is not a seekable file.");
560 ::wait_and_exit(255);
562 my $header = find_header(\$buf,open_or_exit($file));
563 # find positions
564 my @pos = find_split_positions($file,$Global::blocksize,$header);
565 # Make @cat_prepends
566 my @cat_prepends = ();
567 for(my $i=0; $i<$#pos; $i++) {
568 push(@cat_prepends,
569 cat_partial($file, 0, length($header), $pos[$i], $pos[$i+1]));
571 return @cat_prepends;
574 sub find_header($$) {
575 # Compute the header based on $opt::header
576 # Input:
577 # $buf_ref = reference to read-in buffer
578 # $fh = filehandle to read from
579 # Uses:
580 # $opt::header
581 # $Global::blocksize
582 # $Global::header
583 # Returns:
584 # $header string
585 my ($buf_ref, $fh) = @_;
586 my $header = "";
587 # $Global::header may be set in group_by_loop()
588 if($Global::header) { return $Global::header }
589 if($opt::header) {
590 if($opt::header eq ":") { $opt::header = "(.*\n)"; }
591 # Number = number of lines
592 $opt::header =~ s/^(\d+)$/"(.*\n)"x$1/e;
593 while(sysread($fh,$$buf_ref,$Global::blocksize,length $$buf_ref)) {
594 if($$buf_ref =~ s/^($opt::header)//) {
595 $header = $1;
596 last;
600 return $header;
603 sub find_split_positions($$$) {
604 # Find positions in bigfile where recend is followed by recstart
605 # Input:
606 # $file = the file to read
607 # $block = (minimal) --block-size of each chunk
608 # $header = header to be skipped
609 # Uses:
610 # $opt::recstart
611 # $opt::recend
612 # Returns:
613 # @positions of block start/end
614 my($file, $block, $header) = @_;
615 my $headerlen = length $header;
616 my $size = -s $file;
617 if(-b $file) {
618 # $file is a blockdevice
619 $size = size_of_block_dev($file);
621 $block = int $block;
622 if($opt::groupby) {
623 return split_positions_for_group_by($file,$size,$block,$header);
625 # The optimal dd blocksize for mint, redhat, solaris, openbsd = 2^17..2^20
626 # The optimal dd blocksize for freebsd = 2^15..2^17
627 my $dd_block_size = 131072; # 2^17
628 my @pos;
629 my ($recstart,$recend) = recstartrecend();
630 my $recendrecstart = $recend.$recstart;
631 my $fh = ::open_or_exit($file);
632 push(@pos,$headerlen);
633 for(my $pos = $block+$headerlen; $pos < $size; $pos += $block) {
634 my $buf;
635 if($recendrecstart eq "") {
636 # records ends anywhere
637 push(@pos,$pos);
638 } else {
639 # Seek the the block start
640 if(not sysseek($fh, $pos, 0)) {
641 ::error("Cannot seek to $pos in $file");
642 edit(255);
644 while(sysread($fh,$buf,$dd_block_size,length $buf)) {
645 if($opt::regexp) {
646 # If match /$recend$recstart/ => Record position
647 if($buf =~ m:^(.*$recend)$recstart:os) {
648 # Start looking for next record _after_ this match
649 $pos += length($1);
650 push(@pos,$pos);
651 last;
653 } else {
654 # If match $recend$recstart => Record position
655 # TODO optimize to only look at the appended
656 # $dd_block_size + len $recendrecstart
657 # TODO increase $dd_block_size to optimize for longer records
658 my $i = index64(\$buf,$recendrecstart);
659 if($i != -1) {
660 # Start looking for next record _after_ this match
661 $pos += $i + length($recend);
662 push(@pos,$pos);
663 last;
669 if($pos[$#pos] != $size) {
670 # Last splitpoint was not at end of the file: add $size as the last
671 push @pos, $size;
673 close $fh;
674 return @pos;
677 sub split_positions_for_group_by($$$$) {
678 my($fh);
679 sub value_at($) {
680 my $pos = shift;
681 if($pos != 0) {
682 seek($fh, $pos-1, 0) || die;
683 # Read half line
684 <$fh>;
686 # Read full line
687 my $linepos = tell($fh);
688 $_ = <$fh>;
689 if(defined $_) {
690 # Not end of file
691 my @F;
692 if(defined $group_by::col) {
693 $opt::colsep ||= "\t";
694 @F = split /$opt::colsep/, $_;
695 $_ = $F[$group_by::col];
697 eval $group_by::perlexpr;
699 return ($_,$linepos);
702 sub binary_search_end($$$) {
703 my ($s,$spos,$epos) = @_;
704 # value_at($spos) == $s
705 # value_at($epos) != $s
706 my $posdif = $epos - $spos;
707 my ($v,$vpos);
708 while($posdif) {
709 ($v,$vpos) = value_at($spos+$posdif);
710 if($v eq $s) {
711 $spos = $vpos;
712 $posdif = $epos - $spos;
713 } else {
714 $epos = $vpos;
716 $posdif = int($posdif/2);
718 return($v,$vpos);
721 sub binary_search_start($$$) {
722 my ($s,$spos,$epos) = @_;
723 # value_at($spos) != $s
724 # value_at($epos) == $s
725 my $posdif = $epos - $spos;
726 my ($v,$vpos);
727 while($posdif) {
728 ($v,$vpos) = value_at($spos+$posdif);
729 if($v eq $s) {
730 $epos = $vpos;
731 } else {
732 $spos = $vpos;
733 $posdif = $epos - $spos;
735 $posdif = int($posdif/2);
737 return($v,$vpos);
740 my ($file,$size,$block,$header) = @_;
741 my ($a,$b,$c,$apos,$bpos,$cpos);
742 my @pos;
743 $fh = open_or_exit($file);
744 # Set $Global::group_by_column $Global::group_by_perlexpr
745 group_by_loop($fh,$opt::recsep);
746 # $xpos = linestart, $x = value at $xpos, $apos < $bpos < $cpos
747 $apos = length $header;
748 for(($a,$apos) = value_at($apos); $apos < $size;) {
749 push @pos, $apos;
750 $bpos = $apos + $block;
751 ($b,$bpos) = value_at($bpos);
752 if(eof($fh)) {
753 push @pos, $size; last;
755 $cpos = $bpos + $block;
756 ($c,$cpos) = value_at($cpos);
757 if($a eq $b) {
758 while($b eq $c) {
759 # Move bpos, cpos a block forward until $a == $b != $c
760 $bpos = $cpos;
761 $cpos += $block;
762 ($c,$cpos) = value_at($cpos);
763 if($cpos >= $size) {
764 $cpos = $size;
765 last;
768 # $a == $b != $c
769 # Binary search for $b ending between ($bpos,$cpos)
770 ($b,$bpos) = binary_search_end($b,$bpos,$cpos);
771 } else {
772 if($b eq $c) {
773 # $a != $b == $c
774 # Binary search for $b starting between ($apos,$bpos)
775 ($b,$bpos) = binary_search_start($b,$apos,$bpos);
776 } else {
777 # $a != $b != $c
778 # Binary search for $b ending between ($bpos,$cpos)
779 ($b,$bpos) = binary_search_end($b,$bpos,$cpos);
782 ($a,$apos) = ($b,$bpos);
784 if($pos[$#pos] != $size) {
785 # Last splitpoint was not at end of the file: add it
786 push @pos, $size;
788 return @pos;
791 sub cat_partial($@) {
792 # Efficient command to copy from byte X to byte Y
793 # Input:
794 # $file = the file to read
795 # ($start, $end, [$start2, $end2, ...]) = start byte, end byte
796 # Returns:
797 # Efficient command to copy $start..$end, $start2..$end2, ... to stdout
798 my($file, @start_end) = @_;
799 my($start, $i);
800 # Convert (start,end) to (start,len)
801 my @start_len = map {
802 if(++$i % 2) { $start = $_; } else { $_-$start }
803 } @start_end;
804 # This can read 7 GB/s using a single core
805 my $script = spacefree
808 while(@ARGV) {
809 sysseek(STDIN,shift,0) || die;
810 $left = shift;
811 while($read =
812 sysread(STDIN,$buf, $left > 131072 ? 131072 : $left)){
813 $left -= $read;
814 syswrite(STDOUT,$buf);
818 return "<". Q($file) .
819 " perl -e '$script' @start_len |";
822 sub column_perlexpr($$$) {
823 # Compute the column number (if any), perlexpression from combined
824 # string (such as --shard key, --groupby key, {=n perlexpr=}
825 # Input:
826 # $column_perlexpr = string with column and perl expression
827 # $header = header from input file (if column is column name)
828 # $colsep = column separator regexp
829 # Returns:
830 # $col = column number
831 # $perlexpr = perl expression
832 # $subref = compiled perl expression as sub reference
833 my ($column_perlexpr, $header, $colsep) = @_;
834 my ($col, $perlexpr, $subref);
835 if($column_perlexpr =~ /^[-a-z0-9_]+(\s|$)/i) {
836 # Column name/number (possibly prefix)
837 if($column_perlexpr =~ s/^(-?\d+)(\s|$)//) {
838 # Column number (possibly prefix)
839 $col = $1;
840 } elsif($column_perlexpr =~ s/^([a-z0-9_]+)(\s+|$)//i) {
841 # Column name (possibly prefix)
842 my $colname = $1;
843 # Split on --copsep pattern
844 my @headers = split /$colsep/, $header;
845 my %headers;
846 @headers{@headers} = (1..($#headers+1));
847 $col = $headers{$colname};
848 if(not defined $col) {
849 ::error("Column '$colname' $colsep not found in header",keys %headers);
850 ::wait_and_exit(255);
854 # What is left of $column_perlexpr is $perlexpr (possibly empty)
855 $perlexpr = $column_perlexpr;
856 $subref = eval("sub { no strict; no warnings; $perlexpr }");
857 return($col, $perlexpr, $subref);
860 sub group_by_loop($$) {
861 # Generate perl code for group-by loop
862 # Insert a $recsep when the column value changes
863 # The column value can be computed with $perexpr
864 my($fh,$recsep) = @_;
865 my $groupby = $opt::groupby;
866 if($groupby =~ /^[a-z_][a-z_0-9]*(\s|$)/i) {
867 # Group by column name
868 # (Yes, this will also wrongly match a perlexpr like: chop)
869 my($read,$char,@line);
870 # A full line, but nothing more (the rest must be read by the child)
871 # $Global::header used to prepend block to each job
872 do {
873 $read = sysread($fh,$char,1);
874 push @line, $char;
875 } while($read and $char ne "\n");
876 $Global::header = join "", @line;
878 $opt::colsep ||= "\t";
879 ($group_by::col, $group_by::perlexpr, $group_by::subref) =
880 column_perlexpr($groupby, $Global::header, $opt::colsep);
881 # Numbered 0..n-1 due to being used by $F[n]
882 if($group_by::col) { $group_by::col--; }
884 my $loop = ::spacefree(0,q{
885 BEGIN{ $last = "RECSEP"; }
887 local $_=COLVALUE;
888 PERLEXPR;
889 if(($last) ne $_) {
890 print "RECSEP";
891 $last = $_;
895 if(defined $group_by::col) {
896 $loop =~ s/COLVALUE/\$F[$group_by::col]/g;
897 } else {
898 $loop =~ s/COLVALUE/\$_/g;
900 $loop =~ s/PERLEXPR/$group_by::perlexpr/g;
901 $loop =~ s/RECSEP/$recsep/g;
902 return $loop;
905 sub group_by_stdin_filter() {
906 # Record separator with 119 bit random value
907 $opt::recend = '';
908 $opt::recstart =
909 join "", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..20);
910 $opt::remove_rec_sep = 1;
911 my @filter;
912 push @filter, "perl";
913 if($opt::groupby =~ /^[a-z0-9_]+(\s|$)/i) {
914 # This is column number/name
915 # Use -a (auto-split)
916 push @filter, "-a";
917 $opt::colsep ||= "\t";
918 my $sep = $opt::colsep;
919 $sep =~ s/\t/\\t/g;
920 $sep =~ s/\"/\\"/g;
921 push @filter, "-F$sep";
923 push @filter, "-pe";
924 push @filter, group_by_loop(*STDIN,$opt::recstart);
925 ::debug("init", "@filter\n");
926 open(STDIN, '-|', @filter) || die ("Cannot start @filter");
929 sub spreadstdin() {
930 # read a record
931 # Spawn a job and print the record to it.
932 # Uses:
933 # $Global::blocksize
934 # STDIN
935 # $opt::r
936 # $Global::max_lines
937 # $Global::max_number_of_args
938 # $opt::regexp
939 # $Global::start_no_new_jobs
940 # $opt::roundrobin
941 # %Global::running
942 # Returns: N/A
944 my $buf = "";
945 my ($recstart,$recend) = recstartrecend();
946 my $recendrecstart = $recend.$recstart;
947 my $chunk_number = 1;
948 my $one_time_through;
949 my $two_gb = 2**31-1;
950 my $blocksize = $Global::blocksize;
951 my $in = *STDIN;
952 my $timeout = $Global::blocktimeout;
954 my $header = find_header(\$buf,$in);
955 my $anything_written;
956 my $eof;
958 sub read_block() {
959 # Read a --blocksize from STDIN
960 # possibly interrupted by --blocktimeout
961 # Add up to the next full block
962 my $readsize = $blocksize - (length $buf) % $blocksize;
963 my ($nread,$alarm);
964 eval {
965 local $SIG{ALRM} = sub { die "alarm\n" }; # NB: \n required
966 # --blocktimeout (or 0 if not set)
967 alarm $timeout;
968 if($] >= 5.026) {
969 do {
970 $nread = sysread $in, $buf, $readsize, length $buf;
971 $readsize -= $nread;
972 } while($readsize and $nread);
973 } else {
974 # Less efficient reading, but 32-bit sysread compatible
975 do {
976 $nread = sysread($in,substr($buf,length $buf,0),$readsize,0);
977 $readsize -= $nread;
978 } while($readsize and $nread);
980 alarm 0;
982 if ($@) {
983 die unless $@ eq "alarm\n"; # propagate unexpected errors
984 $alarm = 1;
985 } else {
986 $alarm = 0;
988 $eof = not ($nread or $alarm);
991 sub pass_n_line_records() {
992 # Pass records of N lines
993 my $n_lines = $buf =~ tr/\n/\n/;
994 my $last_newline_pos = rindex64(\$buf,"\n");
995 # Go backwards until there are full n-line records
996 while($n_lines % $Global::max_lines) {
997 $n_lines--;
998 $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1);
1000 # Chop at $last_newline_pos as that is where n-line record ends
1001 $anything_written +=
1002 write_record_to_pipe($chunk_number++,\$header,\$buf,
1003 $recstart,$recend,$last_newline_pos+1);
1004 shorten(\$buf,$last_newline_pos+1);
1007 sub pass_n_regexps() {
1008 # Pass records of N regexps
1009 # -N => (start..*?end){n}
1010 # -L -N => (start..*?end){n*l}
1011 my $read_n_lines = -1+
1012 $Global::max_number_of_args * ($Global::max_lines || 1);
1013 # (?!negative lookahead) is needed to avoid backtracking
1014 # See: https://unix.stackexchange.com/questions/439356/
1015 while($buf =~
1017 # Either recstart or at least one char from start
1018 ^(?: $recstart | .)
1019 # followed something
1020 (?:(?!$recend$recstart).)*?
1021 # and then recend
1022 $recend
1023 # Then n-1 times recstart.*recend
1024 (?:$recstart(?:(?!$recend$recstart).)*?$recend){$read_n_lines}
1026 # Followed by recstart
1027 (?=$recstart)/osx) {
1028 $anything_written +=
1029 write_record_to_pipe($chunk_number++,\$header,\$buf,
1030 $recstart,$recend,length $1);
1031 shorten(\$buf,length $1);
1035 sub pass_regexp() {
1036 # Find the last recend-recstart in $buf
1037 $eof and return;
1038 if($buf =~ /^(.*$recend)$recstart.*?$/os) {
1039 $anything_written +=
1040 write_record_to_pipe($chunk_number++,\$header,\$buf,
1041 $recstart,$recend,length $1);
1042 shorten(\$buf,length $1);
1046 sub pass_csv_record() {
1047 # Pass CVS record
1048 # We define a CSV record as an even number of " + end of line
1049 # This works if you use " as quoting character
1050 my $last_newline_pos = length $buf;
1051 # Go backwards from the last \n and search for a position
1052 # where there is an even number of "
1053 do {
1054 # find last EOL
1055 $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1);
1056 # While uneven "
1057 } while((substr($buf,0,$last_newline_pos) =~ y/"/"/)%2
1058 and $last_newline_pos >= 0);
1059 # Chop at $last_newline_pos as that is where CSV record ends
1060 $anything_written +=
1061 write_record_to_pipe($chunk_number++,\$header,\$buf,
1062 $recstart,$recend,$last_newline_pos+1);
1063 shorten(\$buf,$last_newline_pos+1);
1066 sub pass_n() {
1067 # Pass n records of --recend/--recstart
1068 # -N => (start..*?end){n}
1069 my $i = 0;
1070 my $read_n_lines =
1071 $Global::max_number_of_args * ($Global::max_lines || 1);
1072 while(($i = nindex(\$buf,$recendrecstart,$read_n_lines)) != -1
1074 length $buf) {
1075 $i += length $recend; # find the actual splitting location
1076 $anything_written +=
1077 write_record_to_pipe($chunk_number++,\$header,\$buf,
1078 $recstart,$recend,$i);
1079 shorten(\$buf,$i);
1083 sub pass() {
1084 # Pass records of --recend/--recstart
1085 # Split record at fixed string
1086 # Find the last recend+recstart in $buf
1087 $eof and return;
1088 my $i = rindex64(\$buf,$recendrecstart);
1089 if($i != -1) {
1090 $i += length $recend; # find the actual splitting location
1091 $anything_written +=
1092 write_record_to_pipe($chunk_number++,\$header,\$buf,
1093 $recstart,$recend,$i);
1094 shorten(\$buf,$i);
1098 sub increase_blocksize_maybe() {
1099 if(not $anything_written
1100 and not $opt::blocktimeout
1101 and not $Global::no_autoexpand_block) {
1102 # Nothing was written - maybe the block size < record size?
1103 # Increase blocksize exponentially up to 2GB-1 (2GB causes problems)
1104 if($blocksize < $two_gb) {
1105 my $old_blocksize = $blocksize;
1106 $blocksize = ::min(ceil($blocksize * 1.3 + 1), $two_gb);
1107 ::warning("A record was longer than $old_blocksize. " .
1108 "Increasing to --blocksize $blocksize.");
1113 while(1) {
1114 $anything_written = 0;
1115 read_block();
1116 if($opt::r) {
1117 # Remove empty lines
1118 $buf =~ s/^\s*\n//gm;
1119 if(length $buf == 0) {
1120 if($eof) {
1121 last;
1122 } else {
1123 next;
1127 if($Global::max_lines and not $Global::max_number_of_args) {
1128 # Pass n-line records
1129 pass_n_line_records();
1130 } elsif($opt::csv) {
1131 # Pass a full CSV record
1132 pass_csv_record();
1133 } elsif($opt::regexp) {
1134 # Split record at regexp
1135 if($Global::max_number_of_args) {
1136 pass_n_regexps();
1137 } else {
1138 pass_regexp();
1140 } else {
1141 # Pass normal --recend/--recstart record
1142 if($Global::max_number_of_args) {
1143 pass_n();
1144 } else {
1145 pass();
1148 $eof and last;
1149 increase_blocksize_maybe();
1150 ::debug("init", "Round\n");
1152 ::debug("init", "Done reading input\n");
1154 # If there is anything left in the buffer write it
1155 write_record_to_pipe($chunk_number++, \$header, \$buf, $recstart,
1156 $recend, length $buf);
1158 if($opt::retries) {
1159 $Global::no_more_input = 1;
1160 # We need to start no more jobs: At most we need to retry some
1161 # of the already running.
1162 my @running = values %Global::running;
1163 # Stop any virgins.
1164 for my $job (@running) {
1165 if(defined $job and $job->virgin()) {
1166 close $job->fh(0,"w");
1169 # Wait for running jobs to be done
1170 my $sleep = 1;
1171 while($Global::total_running > 0) {
1172 $sleep = ::reap_usleep($sleep);
1173 start_more_jobs();
1176 $Global::start_no_new_jobs ||= 1;
1177 if($opt::roundrobin) {
1178 # Flush blocks to roundrobin procs
1179 my $sleep = 1;
1180 while(%Global::running) {
1181 my $something_written = 0;
1182 for my $job (values %Global::running) {
1183 if($job->block_length()) {
1184 $something_written += $job->non_blocking_write();
1185 } else {
1186 close $job->fh(0,"w");
1189 if($something_written) {
1190 $sleep = $sleep/2+0.001;
1192 $sleep = ::reap_usleep($sleep);
1197 sub recstartrecend() {
1198 # Uses:
1199 # $opt::recstart
1200 # $opt::recend
1201 # Returns:
1202 # $recstart,$recend with default values and regexp conversion
1203 my($recstart,$recend);
1204 if(defined($opt::recstart) and defined($opt::recend)) {
1205 # If both --recstart and --recend is given then both must match
1206 $recstart = $opt::recstart;
1207 $recend = $opt::recend;
1208 } elsif(defined($opt::recstart)) {
1209 # If --recstart is given it must match start of record
1210 $recstart = $opt::recstart;
1211 $recend = "";
1212 } elsif(defined($opt::recend)) {
1213 # If --recend is given then it must match end of record
1214 $recstart = "";
1215 $recend = $opt::recend;
1216 if($opt::regexp and $recend eq '') {
1217 # --regexp --recend ''
1218 $recend = '.';
1222 if($opt::regexp) {
1223 # If $recstart/$recend contains '|'
1224 # this should only apply to the regexp
1225 $recstart = "(?:".$recstart.")";
1226 $recend = "(?:".$recend.")";
1227 } else {
1228 # $recstart/$recend = printf strings (\n)
1229 $recstart =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee;
1230 $recend =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee;
1232 return ($recstart,$recend);
1235 sub nindex($$) {
1236 # See if string is in buffer N times
1237 # Returns:
1238 # the position where the Nth copy is found
1239 my ($buf_ref, $str, $n) = @_;
1240 my $i = 0;
1241 for(1..$n) {
1242 $i = index64($buf_ref,$str,$i+1);
1243 if($i == -1) { last }
1245 return $i;
1249 my @robin_queue;
1250 my $sleep = 1;
1252 sub round_robin_write($$$$$) {
1253 # Input:
1254 # $header_ref = ref to $header string
1255 # $block_ref = ref to $block to be written
1256 # $recstart = record start string
1257 # $recend = record end string
1258 # $endpos = end position of $block
1259 # Uses:
1260 # %Global::running
1261 # Returns:
1262 # $something_written = amount of bytes written
1263 my ($header_ref,$buffer_ref,$recstart,$recend,$endpos) = @_;
1264 my $written = 0;
1265 my $block_passed = 0;
1266 while(not $block_passed) {
1267 # Continue flushing existing buffers
1268 # until one is empty and a new block is passed
1269 if(@robin_queue) {
1270 # Rotate queue once so new blocks get a fair chance
1271 # to be given to another slot
1272 push @robin_queue, shift @robin_queue;
1273 } else {
1274 # Make a queue to spread the blocks evenly
1275 push @robin_queue, (sort { $a->seq() <=> $b->seq() }
1276 values %Global::running);
1278 do {
1279 $written = 0;
1280 for my $job (@robin_queue) {
1281 if($job->block_length() > 0) {
1282 $written += $job->non_blocking_write();
1283 } else {
1284 $job->set_block($header_ref, $buffer_ref,
1285 $endpos, $recstart, $recend);
1286 $block_passed = 1;
1287 $job->set_virgin(0);
1288 $written += $job->non_blocking_write();
1289 last;
1292 if($written) {
1293 $sleep = $sleep/1.5+0.001;
1295 # Don't sleep if something is written
1296 } while($written and not $block_passed);
1297 $sleep = ::reap_usleep($sleep);
1299 return $written;
1303 sub index64($$$) {
1304 # Do index on strings > 2GB.
1305 # index in Perl < v5.22 does not work for > 2GB
1306 # Input:
1307 # as index except STR which must be passed as a reference
1308 # Output:
1309 # as index
1310 my $ref = shift;
1311 my $match = shift;
1312 my $pos = shift || 0;
1313 my $block_size = 2**31-1;
1314 my $strlen = length($$ref);
1315 # No point in doing extra work if we don't need to.
1316 if($strlen < $block_size or $] > 5.022) {
1317 return index($$ref, $match, $pos);
1320 my $matchlen = length($match);
1321 my $ret;
1322 my $offset = $pos;
1323 while($offset < $strlen) {
1324 $ret = index(
1325 substr($$ref, $offset, $block_size),
1326 $match, $pos-$offset);
1327 if($ret != -1) {
1328 return $ret + $offset;
1330 $offset += ($block_size - $matchlen - 1);
1332 return -1;
1335 sub rindex64($@) {
1336 # Do rindex on strings > 2GB.
1337 # rindex in Perl < v5.22 does not work for > 2GB
1338 # Input:
1339 # as rindex except STR which must be passed as a reference
1340 # Output:
1341 # as rindex
1342 my $ref = shift;
1343 my $match = shift;
1344 my $pos = shift;
1345 my $block_size = 2**31-1;
1346 my $strlen = length($$ref);
1347 # Default: search from end
1348 $pos = defined $pos ? $pos : $strlen;
1349 # No point in doing extra work if we don't need to.
1350 if($strlen < $block_size) {
1351 return rindex($$ref, $match, $pos);
1354 my $matchlen = length($match);
1355 my $ret;
1356 my $offset = $pos - $block_size + $matchlen;
1357 if($offset < 0) {
1358 # The offset is less than a $block_size
1359 # Set the $offset to 0 and
1360 # Adjust block_size accordingly
1361 $block_size = $block_size + $offset;
1362 $offset = 0;
1364 while($offset >= 0) {
1365 $ret = rindex(
1366 substr($$ref, $offset, $block_size),
1367 $match);
1368 if($ret != -1) {
1369 return $ret + $offset;
1371 $offset -= ($block_size - $matchlen - 1);
1373 return -1;
1376 sub shorten($$) {
1377 # Do: substr($buf,0,$i) = "";
1378 # Some Perl versions do not support $i > 2GB, so do this in 2GB chunks
1379 # Input:
1380 # $buf_ref = \$buf
1381 # $i = position to shorten to
1382 # Returns: N/A
1383 my ($buf_ref, $i) = @_;
1384 my $two_gb = 2**31-1;
1385 while($i > $two_gb) {
1386 substr($$buf_ref,0,$two_gb) = "";
1387 $i -= $two_gb;
1389 substr($$buf_ref,0,$i) = "";
1392 sub write_record_to_pipe($$$$$$) {
1393 # Fork then
1394 # Write record from pos 0 .. $endpos to pipe
1395 # Input:
1396 # $chunk_number = sequence number - to see if already run
1397 # $header_ref = reference to header string to prepend
1398 # $buffer_ref = reference to record to write
1399 # $recstart = start string of record
1400 # $recend = end string of record
1401 # $endpos = position in $buffer_ref where record ends
1402 # Uses:
1403 # $Global::job_already_run
1404 # $opt::roundrobin
1405 # @Global::virgin_jobs
1406 # Returns:
1407 # Number of chunks written (0 or 1)
1408 my ($chunk_number, $header_ref, $buffer_ref,
1409 $recstart, $recend, $endpos) = @_;
1410 if($endpos == 0) { return 0; }
1411 if(vec($Global::job_already_run,$chunk_number,1)) { return 1; }
1412 if($opt::roundrobin) {
1413 # Write the block to one of the already running jobs
1414 return round_robin_write($header_ref, $buffer_ref,
1415 $recstart, $recend, $endpos);
1417 # If no virgin found, backoff
1418 my $sleep = 0.0001; # 0.01 ms - better performance on highend
1419 while(not @Global::virgin_jobs) {
1420 ::debug("pipe", "No virgin jobs");
1421 $sleep = ::reap_usleep($sleep);
1422 # Jobs may not be started because of loadavg
1423 # or too little time between each ssh login
1424 # or retrying failed jobs.
1425 start_more_jobs();
1427 my $job = shift @Global::virgin_jobs;
1428 # Job is no longer virgin
1429 $job->set_virgin(0);
1431 if($opt::retries) {
1432 # Copy $buffer[0..$endpos] to $job->{'block'}
1433 # Remove rec_sep
1434 # Run $job->add_transfersize
1435 $job->set_block($header_ref, $buffer_ref, $endpos,
1436 $recstart, $recend);
1437 if(fork()) {
1438 # Skip
1439 } else {
1440 $job->write($job->block_ref());
1441 close $job->fh(0,"w");
1442 exit(0);
1444 } else {
1445 # We ignore the removed rec_sep which is technically wrong.
1446 $job->add_transfersize($endpos + length $$header_ref);
1447 if(fork()) {
1448 # Skip
1449 } else {
1450 # Chop of at $endpos as we do not know how many rec_sep will
1451 # be removed.
1452 substr($$buffer_ref,$endpos,length $$buffer_ref) = "";
1453 # Remove rec_sep
1454 if($opt::remove_rec_sep) {
1455 Job::remove_rec_sep($buffer_ref, $recstart, $recend);
1457 $job->write($header_ref);
1458 $job->write($buffer_ref);
1459 close $job->fh(0,"w");
1460 exit(0);
1463 close $job->fh(0,"w");
1464 return 1;
1468 sub __SEM_MODE__() {}
1471 sub acquire_semaphore() {
1472 # Acquires semaphore. If needed: spawns to the background
1473 # Uses:
1474 # @Global::host
1475 # Returns:
1476 # The semaphore to be released when jobs is complete
1477 $Global::host{':'} = SSHLogin->new(":");
1478 my $sem = Semaphore->new($Semaphore::name,
1479 $Global::host{':'}->max_jobs_running());
1480 $sem->acquire();
1481 if($Semaphore::fg) {
1482 # skip
1483 } else {
1484 if(fork()) {
1485 exit(0);
1486 } else {
1487 # If run in the background, the PID will change
1488 $sem->pid_change();
1491 return $sem;
1495 sub __PARSE_OPTIONS__() {}
1498 sub options_hash() {
1499 # Returns:
1500 # %hash = the GetOptions config
1501 return
1502 ("debug|D=s" => \$opt::D,
1503 "xargs" => \$opt::xargs,
1504 "m" => \$opt::m,
1505 "X" => \$opt::X,
1506 "v" => \@opt::v,
1507 "sql=s" => \$opt::retired,
1508 "sqlmaster=s" => \$opt::sqlmaster,
1509 "sqlworker=s" => \$opt::sqlworker,
1510 "sqlandworker=s" => \$opt::sqlandworker,
1511 "joblog|jl=s" => \$opt::joblog,
1512 "results|result|res=s" => \$opt::results,
1513 "resume" => \$opt::resume,
1514 "resume-failed|resumefailed" => \$opt::resume_failed,
1515 "retry-failed|retryfailed" => \$opt::retry_failed,
1516 "silent" => \$opt::silent,
1517 "keep-order|keeporder|k" => \$opt::keeporder,
1518 "no-keep-order|nokeeporder|nok|no-k" => \$opt::nokeeporder,
1519 "group" => \$opt::group,
1520 "g" => \$opt::retired,
1521 "ungroup|u" => \$opt::ungroup,
1522 "linebuffer|linebuffered|line-buffer|line-buffered|lb"
1523 => \$opt::linebuffer,
1524 "tmux" => \$opt::tmux,
1525 "tmuxpane" => \$opt::tmuxpane,
1526 "null|0" => \$opt::null,
1527 "quote|q" => \$opt::quote,
1528 # Replacement strings
1529 "parens=s" => \$opt::parens,
1530 "rpl=s" => \@opt::rpl,
1531 "plus" => \$opt::plus,
1532 "I=s" => \$opt::I,
1533 "extensionreplace|er=s" => \$opt::U,
1534 "U=s" => \$opt::retired,
1535 "basenamereplace|bnr=s" => \$opt::basenamereplace,
1536 "dirnamereplace|dnr=s" => \$opt::dirnamereplace,
1537 "basenameextensionreplace|bner=s" => \$opt::basenameextensionreplace,
1538 "seqreplace=s" => \$opt::seqreplace,
1539 "slotreplace=s" => \$opt::slotreplace,
1540 "jobs|j=s" => \$opt::jobs,
1541 "delay=s" => \$opt::delay,
1542 "sshdelay=f" => \$opt::sshdelay,
1543 "load=s" => \$opt::load,
1544 "noswap" => \$opt::noswap,
1545 "max-line-length-allowed" => \$opt::max_line_length_allowed,
1546 "number-of-cpus" => \$opt::number_of_cpus,
1547 "number-of-sockets" => \$opt::number_of_sockets,
1548 "number-of-cores" => \$opt::number_of_cores,
1549 "number-of-threads" => \$opt::number_of_threads,
1550 "use-sockets-instead-of-threads"
1551 => \$opt::use_sockets_instead_of_threads,
1552 "use-cores-instead-of-threads"
1553 => \$opt::use_cores_instead_of_threads,
1554 "use-cpus-instead-of-cores" => \$opt::use_cpus_instead_of_cores,
1555 "shellquote|shell_quote|shell-quote" => \@opt::shellquote,
1556 "nice=i" => \$opt::nice,
1557 "tag" => \$opt::tag,
1558 "tagstring|tag-string=s" => \$opt::tagstring,
1559 "onall" => \$opt::onall,
1560 "nonall" => \$opt::nonall,
1561 "filter-hosts|filterhosts|filter-host" => \$opt::filter_hosts,
1562 "sshlogin|S=s" => \@opt::sshlogin,
1563 "sshloginfile|slf=s" => \@opt::sshloginfile,
1564 "controlmaster|M" => \$opt::controlmaster,
1565 "ssh=s" => \$opt::ssh,
1566 "transfer-file|transferfile|transfer-files|transferfiles|tf=s"
1567 => \@opt::transfer_files,
1568 "return=s" => \@opt::return,
1569 "trc=s" => \@opt::trc,
1570 "transfer" => \$opt::transfer,
1571 "cleanup" => \$opt::cleanup,
1572 "basefile|bf=s" => \@opt::basefile,
1573 "B=s" => \$opt::retired,
1574 "ctrlc|ctrl-c" => \$opt::retired,
1575 "noctrlc|no-ctrlc|no-ctrl-c" => \$opt::retired,
1576 "workdir|work-dir|wd=s" => \$opt::workdir,
1577 "W=s" => \$opt::retired,
1578 "rsync-opts|rsyncopts=s" => \$opt::rsync_opts,
1579 "tmpdir|tempdir=s" => \$opt::tmpdir,
1580 "use-compress-program|compress-program=s" => \$opt::compress_program,
1581 "use-decompress-program|decompress-program=s"
1582 => \$opt::decompress_program,
1583 "compress" => \$opt::compress,
1584 "tty" => \$opt::tty,
1585 "T" => \$opt::retired,
1586 "H=i" => \$opt::retired,
1587 "dry-run|dryrun|dr" => \$opt::dryrun,
1588 "progress" => \$opt::progress,
1589 "eta" => \$opt::eta,
1590 "bar" => \$opt::bar,
1591 "shuf" => \$opt::shuf,
1592 "arg-sep|argsep=s" => \$opt::arg_sep,
1593 "arg-file-sep|argfilesep=s" => \$opt::arg_file_sep,
1594 "trim=s" => \$opt::trim,
1595 "env=s" => \@opt::env,
1596 "recordenv|record-env" => \$opt::record_env,
1597 "session" => \$opt::session,
1598 "plain" => \$opt::plain,
1599 "profile|J=s" => \@opt::profile,
1600 "pipe|spreadstdin" => \$opt::pipe,
1601 "robin|round-robin|roundrobin" => \$opt::roundrobin,
1602 "recstart=s" => \$opt::recstart,
1603 "recend=s" => \$opt::recend,
1604 "regexp|regex" => \$opt::regexp,
1605 "remove-rec-sep|removerecsep|rrs" => \$opt::remove_rec_sep,
1606 "files|output-as-files|outputasfiles" => \$opt::files,
1607 "block|block-size|blocksize=s" => \$opt::blocksize,
1608 "blocktimeout|block-timeout|bt=s" => \$opt::blocktimeout,
1609 "tollef" => \$opt::tollef,
1610 "gnu" => \$opt::gnu,
1611 "link|xapply" => \$opt::link,
1612 "linkinputsource|xapplyinputsource=i" => \@opt::linkinputsource,
1613 # Before changing this line, please read
1614 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
1615 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
1616 "bibtex|citation" => \$opt::citation,
1617 "wc|willcite|will-cite|nn|nonotice|no-notice" => \$opt::willcite,
1618 # Termination and retries
1619 "halt-on-error|halt=s" => \$opt::halt,
1620 "limit=s" => \$opt::limit,
1621 "memfree=s" => \$opt::memfree,
1622 "memsuspend=s" => \$opt::memsuspend,
1623 "retries=s" => \$opt::retries,
1624 "timeout=s" => \$opt::timeout,
1625 "termseq|term-seq=s" => \$opt::termseq,
1626 # xargs-compatibility - implemented, man, testsuite
1627 "max-procs|P=s" => \$opt::jobs,
1628 "delimiter|d=s" => \$opt::d,
1629 "max-chars|s=i" => \$opt::max_chars,
1630 "arg-file|a=s" => \@opt::a,
1631 "no-run-if-empty|r" => \$opt::r,
1632 "replace|i:s" => \$opt::i,
1633 "E=s" => \$opt::eof,
1634 "eof|e:s" => \$opt::eof,
1635 "max-args|maxargs|n=i" => \$opt::max_args,
1636 "max-replace-args|N=i" => \$opt::max_replace_args,
1637 "colsep|col-sep|C=s" => \$opt::colsep,
1638 "csv"=> \$opt::csv,
1639 "help|h" => \$opt::help,
1640 "L=f" => \$opt::L,
1641 "max-lines|l:f" => \$opt::max_lines,
1642 "interactive|p" => \$opt::interactive,
1643 "verbose|t" => \$opt::verbose,
1644 "version|V" => \$opt::version,
1645 "minversion|min-version=i" => \$opt::minversion,
1646 "show-limits|showlimits" => \$opt::show_limits,
1647 "exit|x" => \$opt::x,
1648 # Semaphore
1649 "semaphore" => \$opt::semaphore,
1650 "semaphoretimeout|st=i" => \$opt::semaphoretimeout,
1651 "semaphorename|id=s" => \$opt::semaphorename,
1652 "fg" => \$opt::fg,
1653 "bg" => \$opt::bg,
1654 "wait" => \$opt::wait,
1655 # Shebang #!/usr/bin/parallel --shebang
1656 "shebang|hashbang" => \$opt::shebang,
1657 "internal-pipe-means-argfiles"
1658 => \$opt::internal_pipe_means_argfiles,
1659 "Y" => \$opt::retired,
1660 "skip-first-line" => \$opt::skip_first_line,
1661 "bug" => \$opt::bug,
1662 "header=s" => \$opt::header,
1663 "cat" => \$opt::cat,
1664 "fifo" => \$opt::fifo,
1665 "pipepart|pipe-part" => \$opt::pipepart,
1666 "tee" => \$opt::tee,
1667 "shard=s" => \$opt::shard,
1668 "bin=s" => \$opt::bin,
1669 "groupby|group-by=s" => \$opt::groupby,
1670 "hgrp|hostgrp|hostgroup|hostgroups" => \$opt::hostgroups,
1671 "embed" => \$opt::embed,
1675 sub get_options_from_array($@) {
1676 # Run GetOptions on @array
1677 # Input:
1678 # $array_ref = ref to @ARGV to parse
1679 # @keep_only = Keep only these options
1680 # Uses:
1681 # @ARGV
1682 # Returns:
1683 # true if parsing worked
1684 # false if parsing failed
1685 # @$array_ref is changed
1686 my ($array_ref, @keep_only) = @_;
1687 if(not @$array_ref) {
1688 # Empty array: No need to look more at that
1689 return 1;
1691 # A bit of shuffling of @ARGV needed as GetOptionsFromArray is not
1692 # supported everywhere
1693 my @save_argv;
1694 my $this_is_ARGV = (\@::ARGV == $array_ref);
1695 if(not $this_is_ARGV) {
1696 @save_argv = @::ARGV;
1697 @::ARGV = @{$array_ref};
1699 # If @keep_only set: Ignore all values except @keep_only
1700 my %options = options_hash();
1701 if(@keep_only) {
1702 my (%keep,@dummy);
1703 @keep{@keep_only} = @keep_only;
1704 for my $k (grep { not $keep{$_} } keys %options) {
1705 # Store the value of the option in @dummy
1706 $options{$k} = \@dummy;
1709 my $retval = GetOptions(%options);
1710 if(not $this_is_ARGV) {
1711 @{$array_ref} = @::ARGV;
1712 @::ARGV = @save_argv;
1714 return $retval;
1717 sub parse_options(@) {
1718 # Returns: N/A
1719 init_globals();
1720 my @argv_before = @ARGV;
1721 @ARGV = read_options();
1723 # Before changing this line, please read
1724 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
1725 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
1726 if(defined $opt::citation) {
1727 citation(\@argv_before,\@ARGV);
1728 wait_and_exit(0);
1730 # no-* overrides *
1731 if($opt::nokeeporder) { $opt::keeporder = undef; }
1733 if(@opt::v) { $Global::verbose = $#opt::v+1; } # Convert -v -v to v=2
1734 if($opt::bug) { ::die_bug("test-bug"); }
1735 $Global::debug = $opt::D;
1736 $Global::shell = $ENV{'PARALLEL_SHELL'} || parent_shell($$)
1737 || $ENV{'SHELL'} || "/bin/sh";
1738 if(not -x $Global::shell and not which($Global::shell)) {
1739 ::error("Shell '$Global::shell' not found.");
1740 wait_and_exit(255);
1742 ::debug("init","Global::shell $Global::shell\n");
1743 $Global::cshell = $Global::shell =~ m:(/[-a-z]*)?csh:;
1744 if(defined $opt::X) { $Global::ContextReplace = 1; }
1745 if(defined $opt::silent) { $Global::verbose = 0; }
1746 if(defined $opt::null) { $/ = "\0"; }
1747 if(defined $opt::d) { $/ = unquote_printf($opt::d) }
1748 parse_replacement_string_options();
1749 if(defined $opt::tagstring) {
1750 $opt::tagstring = unquote_printf($opt::tagstring);
1751 if($opt::tagstring =~ /\Q$Global::parensleft\E.*\Q$Global::parensright\E/
1753 $opt::linebuffer) {
1754 # --tagstring contains {= =} and --linebuffer =>
1755 # recompute replacement string for each use (do not cache)
1756 $Global::cache_replacement_eval = 0;
1759 if(defined $opt::interactive) { $Global::interactive = $opt::interactive; }
1760 if(defined $opt::quote) { $Global::quoting = 1; }
1761 if(defined $opt::r) { $Global::ignore_empty = 1; }
1762 if(defined $opt::verbose) { $Global::stderr_verbose = 1; }
1763 if(defined $opt::eof) { $Global::end_of_file_string = $opt::eof; }
1764 if(defined $opt::max_args) {
1765 $Global::max_number_of_args = $opt::max_args;
1767 if(defined $opt::blocktimeout) {
1768 $Global::blocktimeout = int(multiply_time_units($opt::blocktimeout));
1769 if($Global::blocktimeout < 1) {
1770 ::error("--block-timeout must be at least 1");
1771 wait_and_exit(255);
1774 if(defined $opt::timeout) {
1775 $Global::timeoutq = TimeoutQueue->new($opt::timeout);
1777 if(defined $opt::tmpdir) { $ENV{'TMPDIR'} = $opt::tmpdir; }
1778 $ENV{'PARALLEL_RSYNC_OPTS'} = $opt::rsync_opts ||
1779 $ENV{'PARALLEL_RSYNC_OPTS'} || '-rlDzR';
1780 # Default: Same nice level as GNU Parallel is started at
1781 $opt::nice ||= eval { getpriority(0,0) } || 0;
1782 if(defined $opt::help) { usage(); exit(0); }
1783 if(defined $opt::embed) { embed(); exit(0); }
1784 if(defined $opt::sqlandworker) {
1785 $opt::sqlmaster = $opt::sqlworker = $opt::sqlandworker;
1787 if(defined $opt::tmuxpane) { $opt::tmux = $opt::tmuxpane; }
1788 if(defined $opt::colsep) { $Global::trim = 'lr'; }
1789 if(defined $opt::csv) {
1790 $Global::use{"Text::CSV"} ||= eval "use Text::CSV; 1;";
1791 $opt::colsep = defined $opt::colsep ? $opt::colsep : ",";
1792 my $csv_setting = { binary => 1, sep_char => $opt::colsep };
1793 my $sep = $csv_setting->{sep_char};
1794 $Global::csv = Text::CSV->new($csv_setting)
1795 or die "Cannot use CSV: ".Text::CSV->error_diag ();
1797 if(defined $opt::header) {
1798 $opt::colsep = defined $opt::colsep ? $opt::colsep : "\t";
1800 if(defined $opt::trim) { $Global::trim = $opt::trim; }
1801 if(defined $opt::arg_sep) { $Global::arg_sep = $opt::arg_sep; }
1802 if(defined $opt::arg_file_sep) {
1803 $Global::arg_file_sep = $opt::arg_file_sep;
1805 if(defined $opt::number_of_sockets) {
1806 print SSHLogin::no_of_sockets(),"\n"; wait_and_exit(0);
1808 if(defined $opt::number_of_cpus) {
1809 print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0);
1811 if(defined $opt::number_of_cores) {
1812 print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0);
1814 if(defined $opt::number_of_threads) {
1815 print SSHLogin::no_of_threads(),"\n"; wait_and_exit(0);
1817 if(defined $opt::max_line_length_allowed) {
1818 print Limits::Command::real_max_length(),"\n"; wait_and_exit(0);
1820 if(defined $opt::version) { version(); wait_and_exit(0); }
1821 if(defined $opt::record_env) { record_env(); wait_and_exit(0); }
1822 if(defined $opt::show_limits) { show_limits(); }
1823 if(@opt::sshlogin) { @Global::sshlogin = @opt::sshlogin; }
1824 if(@opt::sshloginfile) { read_sshloginfiles(@opt::sshloginfile); }
1825 if(@opt::return) { push @Global::ret_files, @opt::return; }
1826 if($opt::transfer) {
1827 push @Global::transfer_files, $opt::i || $opt::I || "{}";
1829 push @Global::transfer_files, @opt::transfer_files;
1830 if(not defined $opt::recstart and
1831 not defined $opt::recend) { $opt::recend = "\n"; }
1832 $Global::blocksize = multiply_binary_prefix($opt::blocksize || "1M");
1833 if($Global::blocksize > 2**31-1 and not $opt::pipepart) {
1834 warning("--blocksize >= 2G causes problems. Using 2G-1.");
1835 $Global::blocksize = 2**31-1;
1837 if($^O eq "cygwin" and
1838 ($opt::pipe or $opt::pipepart or $opt::roundrobin)
1839 and $Global::blocksize > 65535) {
1840 warning("--blocksize >= 64K causes problems on Cygwin.");
1842 $opt::memfree = multiply_binary_prefix($opt::memfree);
1843 $opt::memsuspend = multiply_binary_prefix($opt::memsuspend);
1844 $Global::memlimit = $opt::memsuspend + $opt::memfree;
1845 check_invalid_option_combinations();
1846 if((defined $opt::fifo or defined $opt::cat)
1847 and not $opt::pipepart) {
1848 $opt::pipe = 1;
1850 if(defined $opt::minversion) {
1851 print $Global::version,"\n";
1852 if($Global::version < $opt::minversion) {
1853 wait_and_exit(255);
1854 } else {
1855 wait_and_exit(0);
1858 if(not defined $opt::delay) {
1859 # Set --delay to --sshdelay if not set
1860 $opt::delay = $opt::sshdelay;
1862 $opt::delay = multiply_time_units($opt::delay);
1863 if($opt::compress_program) {
1864 $opt::compress = 1;
1865 $opt::decompress_program ||= $opt::compress_program." -dc";
1868 if(defined $opt::results) {
1869 # Is the output a dir or CSV-file?
1870 if($opt::results =~ /\.csv$/i) {
1871 # CSV with , as separator
1872 $Global::csvsep = ",";
1873 $Global::membuffer ||= 1;
1874 } elsif($opt::results =~ /\.tsv$/i) {
1875 # CSV with TAB as separator
1876 $Global::csvsep = "\t";
1877 $Global::membuffer ||= 1;
1880 if($opt::compress) {
1881 my ($compress, $decompress) = find_compression_program();
1882 $opt::compress_program ||= $compress;
1883 $opt::decompress_program ||= $decompress;
1884 if(($opt::results and not $Global::csvsep) or $opt::files) {
1885 # No need for decompressing
1886 $opt::decompress_program = "cat >/dev/null";
1889 if(defined $opt::dryrun) {
1890 # Force grouping due to bug #51039: --dry-run --timeout 3600 -u breaks
1891 $opt::ungroup = 0;
1892 $opt::group = 1;
1894 if(defined $opt::nonall) {
1895 # Append a dummy empty argument if there are no arguments
1896 # on the command line to avoid reading from STDIN.
1897 # arg_sep = random 50 char
1898 # \0noarg => nothing (not the empty string)
1899 $Global::arg_sep = join "",
1900 map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..50);
1901 push @ARGV, $Global::arg_sep, "\0noarg";
1903 if(defined $opt::tee) {
1904 if(not defined $opt::jobs) {
1905 $opt::jobs = 0;
1908 if(defined $opt::tty) {
1909 # Defaults for --tty: -j1 -u
1910 # Can be overridden with -jXXX -g
1911 if(not defined $opt::jobs) {
1912 $opt::jobs = 1;
1914 if(not defined $opt::group) {
1915 $opt::ungroup = 1;
1918 if(@opt::trc) {
1919 push @Global::ret_files, @opt::trc;
1920 if(not @Global::transfer_files) {
1921 # Defaults to --transferfile {}
1922 push @Global::transfer_files, $opt::i || $opt::I || "{}";
1924 $opt::cleanup = 1;
1926 if(defined $opt::max_lines) {
1927 if($opt::max_lines eq "-0") {
1928 # -l -0 (swallowed -0)
1929 $opt::max_lines = 1;
1930 $opt::null = 1;
1931 $/ = "\0";
1932 } elsif ($opt::max_lines == 0) {
1933 # If not given (or if 0 is given) => 1
1934 $opt::max_lines = 1;
1936 $Global::max_lines = $opt::max_lines;
1937 if(not $opt::pipe) {
1938 # --pipe -L means length of record - not max_number_of_args
1939 $Global::max_number_of_args ||= $Global::max_lines;
1943 # Read more than one arg at a time (-L, -N)
1944 if(defined $opt::L) {
1945 $Global::max_lines = $opt::L;
1946 if(not $opt::pipe) {
1947 # --pipe -L means length of record - not max_number_of_args
1948 $Global::max_number_of_args ||= $Global::max_lines;
1951 if(defined $opt::max_replace_args) {
1952 $Global::max_number_of_args = $opt::max_replace_args;
1953 $Global::ContextReplace = 1;
1955 if((defined $opt::L or defined $opt::max_replace_args)
1957 not ($opt::xargs or $opt::m)) {
1958 $Global::ContextReplace = 1;
1960 if(defined $opt::tag and not defined $opt::tagstring) {
1961 # Default = {}
1962 $opt::tagstring = $Global::parensleft.$Global::parensright;
1964 if(grep /^$Global::arg_sep\+?$|^$Global::arg_file_sep\+?$/o, @ARGV) {
1965 # Deal with ::: :::+ :::: and ::::+
1966 @ARGV = read_args_from_command_line();
1968 parse_semaphore();
1970 if(defined $opt::eta) { $opt::progress = $opt::eta; }
1971 if(defined $opt::bar) { $opt::progress = $opt::bar; }
1973 # Funding a free software project is hard. GNU Parallel is no
1974 # exception. On top of that it seems the less visible a project
1975 # is, the harder it is to get funding. And the nature of GNU
1976 # Parallel is that it will never be seen by "the guy with the
1977 # checkbook", but only by the people doing the actual work.
1979 # This problem has been covered by others - though no solution has
1980 # been found:
1981 # https://www.slideshare.net/NadiaEghbal/consider-the-maintainer
1982 # https://blog.licensezero.com/2019/08/24/process-of-elimination.html
1983 # https://www.numfocus.org/blog/why-is-numpy-only-now-getting-funded/
1985 # The FAQ tells you why the citation notice exists:
1986 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
1988 # If you want GNU Parallel to be maintained in the future, and not
1989 # just wither away like so many other free software tools, you
1990 # need to help finance the development.
1992 # The citation notice is a simple way of doing so, as citations
1993 # makes it possible to me to get a job where I can maintain GNU
1994 # Parallel as part of the job.
1996 # This means you can help financing development
1998 # WITHOUT PAYING A SINGLE CENT!
2000 # Before implementing the citation notice it was discussed with
2001 # the users:
2002 # https://lists.gnu.org/archive/html/parallel/2013-11/msg00006.html
2004 # Having to spend 10 seconds on running 'parallel --citation' once
2005 # is no doubt not an ideal solution, but no one has so far come up
2006 # with an ideal solution - neither for funding GNU Parallel nor
2007 # other free software.
2009 # If you believe you have the perfect solution, you should try it
2010 # out, and if it works, you should post it on the email
2011 # list. Ideas that will cost work and which have not been tested
2012 # are, however, unlikely to be prioritized.
2014 # Please note that GPL version 3 gives you the right to fork GNU
2015 # Parallel under a new name, but it does not give you the right to
2016 # distribute modified copies with the citation notice disabled in
2017 # a way where the software can be confused with GNU Parallel. To
2018 # do that you need to be the owner of the GNU Parallel
2019 # trademark. The xt:Commerce case shows this.
2021 # Description of the xt:Commerce case in OLG Duesseldorf
2022 # http://www.inta.org/INTABulletin/Pages/GERMANYGeneralPublicLicenseDoesNotPermitUseofThird-PartyTrademarksforAdvertisingModifiedVersionsofOpen-SourceSoftware.aspx
2023 # https://web.archive.org/web/20180715073746/http://www.inta.org/INTABulletin/Pages/GERMANYGeneralPublicLicenseDoesNotPermitUseofThird-PartyTrademarksforAdvertisingModifiedVersionsofOpen-SourceSoftware.aspx
2025 # The verdict in German
2026 # https://www.admody.com/urteilsdatenbank/cafe6fdaeed3/OLG-Duesseldorf_Urteil_vom_28-September-2010_Az_I-20-U-41-09
2027 # https://web.archive.org/web/20180715073717/https://www.admody.com/urteilsdatenbank/cafe6fdaeed3/OLG-Duesseldorf_Urteil_vom_28-September-2010_Az_I-20-U-41-09
2029 # Other free software limiting derivates by the same name:
2030 # https://en.wikipedia.org/wiki/Red_Hat_Enterprise_Linux_derivatives#Legal_aspects
2031 # https://tm.joomla.org/trademark-faq.html
2032 # https://www.mozilla.org/en-US/foundation/trademarks/faq/
2034 # Running 'parallel --citation' one single time takes less than 10
2035 # seconds, and will silence the citation notice for future
2036 # runs. If that is too much trouble for you, why not use one of
2037 # the alternatives instead?
2038 # See a list in: 'man parallel_alternatives'
2040 # If you want GNU Parallel to be maintained in the future keep
2041 # this line.
2042 citation_notice();
2043 # Seriously: _YOU_ will be harming free software by removing the
2044 # notice. _YOU_ make it harder to justify spending time developing
2045 # it. If you *do* remove the line, please email
2046 # hallofshame@tange.dk if you want to avoid being put in a hall of
2047 # shame.
2049 parse_halt();
2051 if($ENV{'PARALLEL_ENV'}) {
2052 # Read environment and set $Global::parallel_env
2053 # Must be done before is_acceptable_command_line_length()
2054 my $penv = $ENV{'PARALLEL_ENV'};
2055 # unset $PARALLEL_ENV: It should not be given to children
2056 # because it takes up a lot of env space
2057 delete $ENV{'PARALLEL_ENV'};
2058 if(-e $penv) {
2059 # This is a file/fifo: Replace envvar with content of file
2060 open(my $parallel_env, "<", $penv) ||
2061 ::die_bug("Cannot read parallel_env from $penv");
2062 local $/; # Put <> in slurp mode
2063 $penv = <$parallel_env>;
2064 close $parallel_env;
2066 # Map \001 to \n to make it easer to quote \n in $PARALLEL_ENV
2067 $penv =~ s/\001/\n/g;
2068 if($penv =~ /\0/) {
2069 ::warning('\0 (NUL) in environment is not supported');
2071 $Global::parallel_env = $penv;
2074 parse_sshlogin();
2076 if(remote_hosts() and ($opt::X or $opt::m or $opt::xargs)) {
2077 # As we do not know the max line length on the remote machine
2078 # long commands generated by xargs may fail
2079 # If $opt::max_replace_args is set, it is probably safe
2080 ::warning("Using -X or -m with --sshlogin may fail.");
2083 if(not defined $opt::jobs) { $opt::jobs = "100%"; }
2084 open_joblog();
2085 open_csv();
2086 if($opt::sqlmaster or $opt::sqlworker) {
2087 $Global::sql = SQL->new($opt::sqlmaster || $opt::sqlworker);
2089 if($opt::sqlworker) { $Global::membuffer ||= 1; }
2090 # The sqlmaster groups the arguments, so the should just read one
2091 if($opt::sqlworker and not $opt::sqlmaster) { $Global::max_number_of_args = 1; }
2095 sub check_invalid_option_combinations() {
2096 if(defined $opt::timeout and
2097 $opt::timeout !~ /^\d+(\.\d+)?%?$|^(\d+(\.\d+)?[dhms])+$/i) {
2098 ::error("--timeout must be seconds or percentage.");
2099 wait_and_exit(255);
2101 if(defined $opt::fifo and defined $opt::cat) {
2102 ::error("--fifo cannot be combined with --cat.");
2103 ::wait_and_exit(255);
2105 if(defined $opt::retries and defined $opt::roundrobin) {
2106 ::error("--retries cannot be combined with --roundrobin.");
2107 ::wait_and_exit(255);
2109 if(defined $opt::pipepart and
2110 (defined $opt::L or defined $opt::max_lines
2111 or defined $opt::max_replace_args)) {
2112 ::error("--pipepart is incompatible with --max-replace-args, ".
2113 "--max-lines, and -L.");
2114 wait_and_exit(255);
2116 if(defined $opt::group and $opt::ungroup) {
2117 ::error("--group cannot be combined with --ungroup.");
2118 ::wait_and_exit(255);
2120 if(defined $opt::group and $opt::linebuffer) {
2121 ::error("--group cannot be combined with --line-buffer.");
2122 ::wait_and_exit(255);
2124 if(defined $opt::ungroup and $opt::linebuffer) {
2125 ::error("--ungroup cannot be combined with --line-buffer.");
2126 ::wait_and_exit(255);
2128 if(defined $opt::tollef and not $opt::gnu) {
2129 ::error("--tollef has been retired.",
2130 "Remove --tollef or use --gnu to override --tollef.");
2131 ::wait_and_exit(255);
2133 if(defined $opt::retired) {
2134 ::error("-g has been retired. Use --group.",
2135 "-B has been retired. Use --bf.",
2136 "-T has been retired. Use --tty.",
2137 "-U has been retired. Use --er.",
2138 "-W has been retired. Use --wd.",
2139 "-Y has been retired. Use --shebang.",
2140 "-H has been retired. Use --halt.",
2141 "--sql has been retired. Use --sqlmaster.",
2142 "--ctrlc has been retired.",
2143 "--noctrlc has been retired.");
2144 ::wait_and_exit(255);
2146 if($opt::groupby) {
2147 if(not $opt::pipe and not $opt::pipepart) {
2148 $opt::pipe = 1;
2150 if($opt::remove_rec_sep) {
2151 ::error("--remove-rec-sep is not compatible with --groupby");
2152 ::wait_and_exit(255);
2154 if($opt::recstart) {
2155 ::error("--recstart is not compatible with --groupby");
2156 ::wait_and_exit(255);
2158 if($opt::recend ne "\n") {
2159 ::error("--recend is not compatible with --groupby");
2160 ::wait_and_exit(255);
2165 sub init_globals() {
2166 # Defaults:
2167 $Global::version = 20200923;
2168 $Global::progname = 'parallel';
2169 $::name = "GNU Parallel";
2170 $Global::infinity = 2**31;
2171 $Global::debug = 0;
2172 $Global::verbose = 0;
2173 # Don't quote every part of the command line
2174 $Global::quoting = 0;
2175 # Quote replacement strings
2176 $Global::quote_replace = 1;
2177 $Global::total_completed = 0;
2178 $Global::cache_replacement_eval = 1;
2179 # Read only table with default --rpl values
2180 %Global::replace =
2182 '{}' => '',
2183 '{#}' => '1 $_=$job->seq()',
2184 '{%}' => '1 $_=$job->slot()',
2185 '{/}' => 's:.*/::',
2186 '{//}' =>
2187 ('$Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; '.
2188 '$_ = dirname($_);'),
2189 '{/.}' => 's:.*/::; s:\.[^/.]+$::;',
2190 '{.}' => 's:\.[^/.]+$::',
2192 %Global::plus =
2194 # {} = {+/}/{/}
2195 # = {.}.{+.} = {+/}/{/.}.{+.}
2196 # = {..}.{+..} = {+/}/{/..}.{+..}
2197 # = {...}.{+...} = {+/}/{/...}.{+...}
2198 '{+/}' => 's:/[^/]*$::',
2199 '{+.}' => 's:.*\.::',
2200 '{+..}' => 's:.*\.([^.]*\.):$1:',
2201 '{+...}' => 's:.*\.([^.]*\.[^.]*\.):$1:',
2202 '{..}' => 's:\.[^/.]+$::; s:\.[^/.]+$::',
2203 '{...}' => 's:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::',
2204 '{/..}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::',
2205 '{/...}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::',
2206 '{choose_k}' => 'for $t (2..$#arg){ if($arg[$t-1] ge $arg[$t]) { skip() } }',
2207 # {##} = number of jobs
2208 '{##}' => '$_=total_jobs()',
2209 # Bash ${a:-myval}
2210 '{:-([^}]+?)}' => '$_ ||= $$1',
2211 # Bash ${a:2}
2212 '{:(\d+?)}' => 'substr($_,0,$$1) = ""',
2213 # Bash ${a:2:3}
2214 '{:(\d+?):(\d+?)}' => '$_ = substr($_,$$1,$$2);',
2215 # Bash ${a#bc}
2216 '{#([^#}][^}]*?)}' => 's/^$$1//;',
2217 # Bash ${a%def}
2218 '{%([^}]+?)}' => 's/$$1$//;',
2219 # Bash ${a/def/ghi} ${a/def/}
2220 '{/([^}]+?)/([^}]*?)}' => 's/$$1/$$2/;',
2221 # Bash ${a^a}
2222 '{^([^}]+?)}' => 's/^($$1)/uc($1)/e;',
2223 # Bash ${a^^a}
2224 '{^^([^}]+?)}' => 's/($$1)/uc($1)/eg;',
2225 # Bash ${a,A}
2226 '{,([^}]+?)}' => 's/^($$1)/lc($1)/e;',
2227 # Bash ${a,,A}
2228 '{,,([^}]+?)}' => 's/($$1)/lc($1)/eg;',
2229 # {slot} = $PARALLEL_JOBSLOT
2230 '{slot}' => '1 $_="\${PARALLEL_JOBSLOT}";uq()',
2231 # {host} = ssh host
2232 '{host}' => '1 $_="\${PARALLEL_SSHHOST}";uq()',
2233 # {sshlogin} = sshlogin
2234 '{sshlogin}' => '1 $_="\${PARALLEL_SSHLOGIN}";uq()',
2236 # Modifiable copy of %Global::replace
2237 %Global::rpl = %Global::replace;
2238 $/ = "\n";
2239 $Global::ignore_empty = 0;
2240 $Global::interactive = 0;
2241 $Global::stderr_verbose = 0;
2242 $Global::default_simultaneous_sshlogins = 9;
2243 $Global::exitstatus = 0;
2244 $Global::arg_sep = ":::";
2245 $Global::arg_file_sep = "::::";
2246 $Global::trim = 'n';
2247 $Global::max_jobs_running = 0;
2248 $Global::job_already_run = '';
2249 $ENV{'TMPDIR'} ||= "/tmp";
2250 $ENV{'OLDPWD'} = $ENV{'PWD'};
2251 if(not $ENV{HOME}) {
2252 # $ENV{HOME} is sometimes not set if called from PHP
2253 ::warning("\$HOME not set. Using /tmp.");
2254 $ENV{HOME} = "/tmp";
2256 # no warnings to allow for undefined $XDG_*
2257 no warnings 'uninitialized';
2258 # $xdg_config_home is needed to make env_parallel.fish stop complaining
2259 my $xdg_config_home = $ENV{'XDG_CONFIG_HOME'};
2260 # config_dirs = $PARALLEL_HOME, $XDG_CONFIG_HOME/parallel,
2261 # $(each XDG_CONFIG_DIRS)/parallel, $HOME/.parallel
2262 # Keep only dirs that exist
2263 @Global::config_dirs =
2264 (grep { -d $_ }
2265 $ENV{'PARALLEL_HOME'},
2266 (map { "$_/parallel" }
2267 $xdg_config_home,
2268 split /:/, $ENV{'XDG_CONFIG_DIRS'}),
2269 $ENV{'HOME'} . "/.parallel");
2270 # Use first dir as config dir
2271 $Global::config_dir = $Global::config_dirs[0] ||
2272 $ENV{'HOME'} . "/.parallel";
2273 # cache_dirs = $PARALLEL_HOME, $XDG_CACHE_HOME/parallel,
2274 # Keep only dirs that exist
2275 @Global::cache_dirs =
2276 (grep { -d $_ }
2277 $ENV{'PARALLEL_HOME'}, $ENV{'XDG_CACHE_HOME'}."/parallel");
2278 $Global::cache_dir = $Global::cache_dirs[0] ||
2279 $ENV{'HOME'} . "/.parallel";
2282 sub parse_halt() {
2283 # $opt::halt flavours
2284 # Uses:
2285 # $opt::halt
2286 # $Global::halt_when
2287 # $Global::halt_fail
2288 # $Global::halt_success
2289 # $Global::halt_pct
2290 # $Global::halt_count
2291 if(defined $opt::halt) {
2292 my %halt_expansion = (
2293 "0" => "never",
2294 "1" => "soon,fail=1",
2295 "2" => "now,fail=1",
2296 "-1" => "soon,success=1",
2297 "-2" => "now,success=1",
2299 # Expand -2,-1,0,1,2 into long form
2300 $opt::halt = $halt_expansion{$opt::halt} || $opt::halt;
2301 # --halt 5% == --halt soon,fail=5%
2302 $opt::halt =~ s/^(\d+)%$/soon,fail=$1%/;
2303 # Split: soon,fail=5%
2304 my ($when,$fail_success,$pct_count) = split /[,=]/, $opt::halt;
2305 if(not grep { $when eq $_ } qw(never soon now)) {
2306 ::error("--halt must have 'never', 'soon', or 'now'.");
2307 ::wait_and_exit(255);
2309 $Global::halt_when = $when;
2310 if($when ne "never") {
2311 if($fail_success eq "fail") {
2312 $Global::halt_fail = 1;
2313 } elsif($fail_success eq "success") {
2314 $Global::halt_success = 1;
2315 } elsif($fail_success eq "done") {
2316 $Global::halt_done = 1;
2317 } else {
2318 ::error("--halt $when must be followed by ,success or ,fail.");
2319 ::wait_and_exit(255);
2321 if($pct_count =~ /^(\d+)%$/) {
2322 $Global::halt_pct = $1/100;
2323 } elsif($pct_count =~ /^(\d+)$/) {
2324 $Global::halt_count = $1;
2325 } else {
2326 ::error("--halt $when,$fail_success ".
2327 "must be followed by ,number or ,percent%.");
2328 ::wait_and_exit(255);
2334 sub parse_replacement_string_options() {
2335 # Deal with --rpl
2336 # Uses:
2337 # %Global::rpl
2338 # $Global::parensleft
2339 # $Global::parensright
2340 # $opt::parens
2341 # $Global::parensleft
2342 # $Global::parensright
2343 # $opt::plus
2344 # %Global::plus
2345 # $opt::I
2346 # $opt::U
2347 # $opt::i
2348 # $opt::basenamereplace
2349 # $opt::dirnamereplace
2350 # $opt::seqreplace
2351 # $opt::slotreplace
2352 # $opt::basenameextensionreplace
2354 sub rpl($$) {
2355 # Modify %Global::rpl
2356 # Replace $old with $new
2357 my ($old,$new) = @_;
2358 if($old ne $new) {
2359 $Global::rpl{$new} = $Global::rpl{$old};
2360 delete $Global::rpl{$old};
2363 my $parens = "{==}";
2364 if(defined $opt::parens) { $parens = $opt::parens; }
2365 my $parenslen = 0.5*length $parens;
2366 $Global::parensleft = substr($parens,0,$parenslen);
2367 $Global::parensright = substr($parens,$parenslen);
2368 if(defined $opt::plus) { %Global::rpl = (%Global::plus,%Global::rpl); }
2369 if(defined $opt::I) { rpl('{}',$opt::I); }
2370 if(defined $opt::i and $opt::i) { rpl('{}',$opt::i); }
2371 if(defined $opt::U) { rpl('{.}',$opt::U); }
2372 if(defined $opt::basenamereplace) { rpl('{/}',$opt::basenamereplace); }
2373 if(defined $opt::dirnamereplace) { rpl('{//}',$opt::dirnamereplace); }
2374 if(defined $opt::seqreplace) { rpl('{#}',$opt::seqreplace); }
2375 if(defined $opt::slotreplace) { rpl('{%}',$opt::slotreplace); }
2376 if(defined $opt::basenameextensionreplace) {
2377 rpl('{/.}',$opt::basenameextensionreplace);
2379 for(@opt::rpl) {
2380 # Create $Global::rpl entries for --rpl options
2381 # E.g: "{..} s:\.[^.]+$:;s:\.[^.]+$:;"
2382 my ($shorthand,$long) = split/\s/,$_,2;
2383 $Global::rpl{$shorthand} = $long;
2387 sub parse_semaphore() {
2388 # Semaphore defaults
2389 # Must be done before computing number of processes and max_line_length
2390 # because when running as a semaphore GNU Parallel does not read args
2391 # Uses:
2392 # $opt::semaphore
2393 # $Global::semaphore
2394 # $opt::semaphoretimeout
2395 # $Semaphore::timeout
2396 # $opt::semaphorename
2397 # $Semaphore::name
2398 # $opt::fg
2399 # $Semaphore::fg
2400 # $opt::wait
2401 # $Semaphore::wait
2402 # $opt::bg
2403 # @opt::a
2404 # @Global::unget_argv
2405 # $Global::default_simultaneous_sshlogins
2406 # $opt::jobs
2407 # $Global::interactive
2408 $Global::semaphore ||= ($0 =~ m:(^|/)sem$:); # called as 'sem'
2409 if(defined $opt::semaphore) { $Global::semaphore = 1; }
2410 if(defined $opt::semaphoretimeout) { $Global::semaphore = 1; }
2411 if(defined $opt::semaphorename) { $Global::semaphore = 1; }
2412 if(defined $opt::fg and not $opt::tmux and not $opt::tmuxpane) {
2413 $Global::semaphore = 1;
2415 if(defined $opt::bg) { $Global::semaphore = 1; }
2416 if(defined $opt::wait and not $opt::sqlmaster) {
2417 $Global::semaphore = 1; @ARGV = "true";
2419 if($Global::semaphore) {
2420 if(@opt::a) {
2421 # A semaphore does not take input from neither stdin nor file
2422 ::error("A semaphore does not take input from neither stdin nor a file\n");
2423 ::wait_and_exit(255);
2425 @opt::a = ("/dev/null");
2426 # Append a dummy empty argument
2427 # \0 => nothing (not the empty string)
2428 push(@Global::unget_argv, [Arg->new("\0noarg")]);
2429 $Semaphore::timeout = $opt::semaphoretimeout || 0;
2430 if(defined $opt::semaphorename) {
2431 $Semaphore::name = $opt::semaphorename;
2432 } else {
2433 local $/ = "\n";
2434 $Semaphore::name = `tty`;
2435 chomp $Semaphore::name;
2437 $Semaphore::fg = $opt::fg;
2438 $Semaphore::wait = $opt::wait;
2439 $Global::default_simultaneous_sshlogins = 1;
2440 if(not defined $opt::jobs) {
2441 $opt::jobs = 1;
2443 if($Global::interactive and $opt::bg) {
2444 ::error("Jobs running in the ".
2445 "background cannot be interactive.");
2446 ::wait_and_exit(255);
2451 sub record_env() {
2452 # Record current %ENV-keys in $PARALLEL_HOME/ignored_vars
2453 # Returns: N/A
2454 my $ignore_filename = $Global::config_dir . "/ignored_vars";
2455 if(open(my $vars_fh, ">", $ignore_filename)) {
2456 print $vars_fh map { $_,"\n" } keys %ENV;
2457 } else {
2458 ::error("Cannot write to $ignore_filename.");
2459 ::wait_and_exit(255);
2463 sub open_joblog() {
2464 # Open joblog as specified by --joblog
2465 # Uses:
2466 # $opt::resume
2467 # $opt::resume_failed
2468 # $opt::joblog
2469 # $opt::results
2470 # $Global::job_already_run
2471 # %Global::fd
2472 my $append = 0;
2473 if(($opt::resume or $opt::resume_failed)
2475 not ($opt::joblog or $opt::results)) {
2476 ::error("--resume and --resume-failed require --joblog or --results.");
2477 ::wait_and_exit(255);
2479 if(defined $opt::joblog and $opt::joblog =~ s/^\+//) {
2480 # --joblog +filename = append to filename
2481 $append = 1;
2483 if($opt::joblog
2485 ($opt::sqlmaster
2487 not $opt::sqlworker)) {
2488 # Do not log if --sqlworker
2489 if($opt::resume || $opt::resume_failed || $opt::retry_failed) {
2490 if(open(my $joblog_fh, "<", $opt::joblog)) {
2491 # Read the joblog
2492 # Override $/ with \n because -d might be set
2493 local $/ = "\n";
2494 # If there is a header: Open as append later
2495 $append = <$joblog_fh>;
2496 my $joblog_regexp;
2497 if($opt::retry_failed) {
2498 # Make a regexp that only matches commands with exit+signal=0
2499 # 4 host 1360490623.067 3.445 1023 1222 0 0 command
2500 $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t';
2501 my @group;
2502 while(<$joblog_fh>) {
2503 if(/$joblog_regexp/o) {
2504 # This is 30% faster than set_job_already_run($1);
2505 vec($Global::job_already_run,($1||0),1) = 1;
2506 $Global::total_completed++;
2507 $group[$1-1] = "true";
2508 } elsif(/(\d+)\s+\S+(\s+[-0-9.]+){6}\s+(.*)$/) {
2509 # Grab out the command
2510 $group[$1-1] = $3;
2511 } else {
2512 chomp;
2513 ::error("Format of '$opt::joblog' is wrong: $_");
2514 ::wait_and_exit(255);
2517 if(@group) {
2518 my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg");
2519 unlink($name);
2520 # Put args into argfile
2521 if(grep /\0/, @group) {
2522 # force --null to deal with \n in commandlines
2523 ::warning("Command lines contain newline. Forcing --null.");
2524 $opt::null = 1;
2525 $/ = "\0";
2527 # Replace \0 with '\n' as used in print_joblog()
2528 print $outfh map { s/\0/\n/g; $_,$/ } map { $_ } @group;
2529 seek $outfh, 0, 0;
2530 exit_if_disk_full();
2531 # Set filehandle to -a
2532 @opt::a = ($outfh);
2534 # Remove $command (so -a is run)
2535 @ARGV = ();
2537 if($opt::resume || $opt::resume_failed) {
2538 if($opt::resume_failed) {
2539 # Make a regexp that only matches commands with exit+signal=0
2540 # 4 host 1360490623.067 3.445 1023 1222 0 0 command
2541 $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t';
2542 } else {
2543 # Just match the job number
2544 $joblog_regexp='^(\d+)';
2546 while(<$joblog_fh>) {
2547 if(/$joblog_regexp/o) {
2548 # This is 30% faster than set_job_already_run($1);
2549 vec($Global::job_already_run,($1||0),1) = 1;
2550 $Global::total_completed++;
2551 } elsif(not /\d+\s+[^\s]+\s+([-0-9.]+\s+){6}/) {
2552 ::error("Format of '$opt::joblog' is wrong: $_");
2553 ::wait_and_exit(255);
2557 close $joblog_fh;
2559 # $opt::null may be set if the commands contain \n
2560 if($opt::null) { $/ = "\0"; }
2562 if($opt::dryrun) {
2563 # Do not write to joblog in a dry-run
2564 if(not open($Global::joblog, ">", "/dev/null")) {
2565 ::error("Cannot write to --joblog $opt::joblog.");
2566 ::wait_and_exit(255);
2568 } elsif($append) {
2569 # Append to joblog
2570 if(not open($Global::joblog, ">>", $opt::joblog)) {
2571 ::error("Cannot append to --joblog $opt::joblog.");
2572 ::wait_and_exit(255);
2574 } else {
2575 if($opt::joblog eq "-") {
2576 # Use STDOUT as joblog
2577 $Global::joblog = $Global::fd{1};
2578 } elsif(not open($Global::joblog, ">", $opt::joblog)) {
2579 # Overwrite the joblog
2580 ::error("Cannot write to --joblog $opt::joblog.");
2581 ::wait_and_exit(255);
2583 print $Global::joblog
2584 join("\t", "Seq", "Host", "Starttime", "JobRuntime",
2585 "Send", "Receive", "Exitval", "Signal", "Command"
2586 ). "\n";
2591 sub open_csv() {
2592 if($opt::results) {
2593 # Output as CSV/TSV
2594 if($opt::results eq "-.csv"
2596 $opt::results eq "-.tsv") {
2597 # Output as CSV/TSV on stdout
2598 open $Global::csv_fh, ">&", "STDOUT" or
2599 ::die_bug("Can't dup STDOUT in csv: $!");
2600 # Do not print any other output to STDOUT
2601 # by forcing all other output to /dev/null
2602 open my $fd, ">", "/dev/null" or
2603 ::die_bug("Can't >/dev/null in csv: $!");
2604 $Global::fd{1} = $fd;
2605 $Global::fd{2} = $fd;
2606 } elsif($Global::csvsep) {
2607 if(not open($Global::csv_fh,">",$opt::results)) {
2608 ::error("Cannot open results file `$opt::results': ".
2609 "$!.");
2610 wait_and_exit(255);
2616 sub find_compression_program() {
2617 # Find a fast compression program
2618 # Returns:
2619 # $compress_program = compress program with options
2620 # $decompress_program = decompress program with options
2622 # Search for these. Sorted by speed on 128 core
2624 # seq 120000000|shuf > 1gb &
2625 # apt-get update
2626 # apt install make g++ htop
2627 # wget -O - pi.dk/3 | bash
2628 # apt install zstd clzip liblz4-tool lzop pigz pxz gzip plzip pbzip2 lzma xz-utils lzip bzip2 lbzip2 lrzip pixz
2629 # git clone https://github.com/facebook/zstd.git
2630 # (cd zstd/contrib/pzstd; make -j; cp pzstd /usr/local/bin)
2631 # echo 'lrzip -L $((-$1))' >/usr/local/bin/lrz
2632 # chmod +x /usr/local/bin/lrz
2633 # wait
2634 # onethread="zstd clzip lz4 lzop gzip lzma xz bzip2"
2635 # multithread="pzstd pigz pxz plzip pbzip2 lzip lbzip2 lrz pixz"
2636 # parallel --shuf -j1 --joblog jl-m --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $multithread
2637 # parallel --shuf -j50% --delay 1 --joblog jl-s --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $onethread
2638 # sort -nk4 jl-?
2640 # 1-core:
2641 # 2-cores: pzstd zstd lz4 lzop pigz gzip lbzip2 pbzip2 lrz bzip2 lzma pxz plzip xz lzip clzip
2642 # 4-cores:
2643 # 8-cores: pzstd lz4 zstd pigz lzop lbzip2 pbzip2 gzip lzip lrz plzip pxz bzip2 lzma xz clzip
2644 # 16-cores: pzstd lz4 pigz lzop lbzip2 pbzip2 plzip lzip lrz pxz gzip lzma xz bzip2
2645 # 32-cores: pzstd lbzip2 pbzip2 zstd pigz lz4 lzop plzip lzip lrz gzip pxz lzma bzip2 xz clzip
2646 # 64-cores: pzstd lbzip2 pbzip2 pigz zstd pixz lz4 plzip lzop lzip lrz gzip pxz lzma bzip2 xz clzip
2647 # 128-core: pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip lrz pxz bzip2 lzma xz clzip
2649 my @prg = qw(pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip
2650 lrz pxz bzip2 lzma xz clzip);
2651 for my $p (@prg) {
2652 if(which($p)) {
2653 return ("$p -c -1","$p -dc");
2656 # Fall back to cat
2657 return ("cat","cat");
2660 sub read_options() {
2661 # Read options from command line, profile and $PARALLEL
2662 # Uses:
2663 # $opt::shebang_wrap
2664 # $opt::shebang
2665 # @ARGV
2666 # $opt::plain
2667 # @opt::profile
2668 # $ENV{'HOME'}
2669 # $ENV{'PARALLEL'}
2670 # Returns:
2671 # @ARGV_no_opt = @ARGV without --options
2673 # This must be done first as this may exec myself
2674 if(defined $ARGV[0] and ($ARGV[0] =~ /^--shebang/ or
2675 $ARGV[0] =~ /^--shebang-?wrap/ or
2676 $ARGV[0] =~ /^--hashbang/)) {
2677 # Program is called from #! line in script
2678 # remove --shebang-wrap if it is set
2679 $opt::shebang_wrap = ($ARGV[0] =~ s/^--shebang-?wrap *//);
2680 # remove --shebang if it is set
2681 $opt::shebang = ($ARGV[0] =~ s/^--shebang *//);
2682 # remove --hashbang if it is set
2683 $opt::shebang .= ($ARGV[0] =~ s/^--hashbang *//);
2684 if($opt::shebang) {
2685 my $argfile = Q(pop @ARGV);
2686 # exec myself to split $ARGV[0] into separate fields
2687 exec "$0 --skip-first-line -a $argfile @ARGV";
2689 if($opt::shebang_wrap) {
2690 my @options;
2691 my @parser;
2692 if ($^O eq 'freebsd') {
2693 # FreeBSD's #! puts different values in @ARGV than Linux' does.
2694 my @nooptions = @ARGV;
2695 get_options_from_array(\@nooptions);
2696 while($#ARGV > $#nooptions) {
2697 push @options, shift @ARGV;
2699 while(@ARGV and $ARGV[0] ne ":::") {
2700 push @parser, shift @ARGV;
2702 if(@ARGV and $ARGV[0] eq ":::") {
2703 shift @ARGV;
2705 } else {
2706 @options = shift @ARGV;
2708 my $script = Q(shift @ARGV);
2709 # exec myself to split $ARGV[0] into separate fields
2710 exec "$0 --internal-pipe-means-argfiles @options @parser $script ".
2711 "::: @ARGV";
2714 if($ARGV[0] =~ / --shebang(-?wrap)? /) {
2715 ::warning("--shebang and --shebang-wrap must be the first argument.\n");
2718 Getopt::Long::Configure("bundling","require_order");
2719 my @ARGV_copy = @ARGV;
2720 my @ARGV_orig = @ARGV;
2721 # Check if there is a --profile to set @opt::profile
2722 get_options_from_array(\@ARGV_copy,"profile|J=s","plain") || die_usage();
2723 my @ARGV_profile = ();
2724 my @ARGV_env = ();
2725 if(not $opt::plain) {
2726 # Add options from $PARALLEL_HOME/config and other profiles
2727 my @config_profiles = (
2728 "/etc/parallel/config",
2729 (map { "$_/config" } @Global::config_dirs),
2730 $ENV{'HOME'}."/.parallelrc");
2731 my @profiles = @config_profiles;
2732 if(@opt::profile) {
2733 # --profile overrides default profiles
2734 @profiles = ();
2735 for my $profile (@opt::profile) {
2736 if($profile =~ m:^\./|^/:) {
2737 # Look for ./profile in .
2738 # Look for /profile in /
2739 push @profiles, grep { -r $_ } $profile;
2740 } else {
2741 # Look for the $profile in @Global::config_dirs
2742 push @profiles, grep { -r $_ }
2743 map { "$_/$profile" } @Global::config_dirs;
2747 for my $profile (@profiles) {
2748 if(-r $profile) {
2749 ::debug("init","Read $profile\n");
2750 local $/ = "\n";
2751 open (my $in_fh, "<", $profile) ||
2752 ::die_bug("read-profile: $profile");
2753 while(<$in_fh>) {
2754 /^\s*\#/ and next;
2755 chomp;
2756 push @ARGV_profile, shell_words($_);
2758 close $in_fh;
2759 } else {
2760 if(grep /^$profile$/, @config_profiles) {
2761 # config file is not required to exist
2762 } else {
2763 ::error("$profile not readable.");
2764 wait_and_exit(255);
2768 # Add options from shell variable $PARALLEL
2769 if($ENV{'PARALLEL'}) {
2770 push @ARGV_env, shell_words($ENV{'PARALLEL'});
2772 # Add options from env_parallel.csh via $PARALLEL_CSH
2773 if($ENV{'PARALLEL_CSH'}) {
2774 push @ARGV_env, shell_words($ENV{'PARALLEL_CSH'});
2777 Getopt::Long::Configure("bundling","require_order");
2778 get_options_from_array(\@ARGV_profile) || die_usage();
2779 get_options_from_array(\@ARGV_env) || die_usage();
2780 get_options_from_array(\@ARGV) || die_usage();
2781 # What were the options given on the command line?
2782 # Used to start --sqlworker
2783 my $ai = arrayindex(\@ARGV_orig, \@ARGV);
2784 @Global::options_in_argv = @ARGV_orig[0..$ai-1];
2785 # Prepend non-options to @ARGV (such as commands like 'nice')
2786 unshift @ARGV, @ARGV_profile, @ARGV_env;
2787 return @ARGV;
2790 sub arrayindex() {
2791 # Similar to Perl's index function, but for arrays
2792 # Input:
2793 # $arr_ref1 = ref to @array1 to search in
2794 # $arr_ref2 = ref to @array2 to search for
2795 # Returns:
2796 # $pos = position of @array1 in @array2, -1 if not found
2797 my ($arr_ref1,$arr_ref2) = @_;
2798 my $array1_as_string = join "", map { "\0".$_ } @$arr_ref1;
2799 my $array2_as_string = join "", map { "\0".$_ } @$arr_ref2;
2800 my $i = index($array1_as_string,$array2_as_string,0);
2801 if($i == -1) { return -1 }
2802 my @before = split /\0/, substr($array1_as_string,0,$i);
2803 return $#before;
2806 sub read_args_from_command_line() {
2807 # Arguments given on the command line after:
2808 # ::: ($Global::arg_sep)
2809 # :::: ($Global::arg_file_sep)
2810 # :::+ ($Global::arg_sep with --link)
2811 # ::::+ ($Global::arg_file_sep with --link)
2812 # Removes the arguments from @ARGV and:
2813 # - puts filenames into -a
2814 # - puts arguments into files and add the files to -a
2815 # - adds --linkinputsource with 0/1 for each -a depending on :::+/::::+
2816 # Input:
2817 # @::ARGV = command option ::: arg arg arg :::: argfiles
2818 # Uses:
2819 # $Global::arg_sep
2820 # $Global::arg_file_sep
2821 # $opt::internal_pipe_means_argfiles
2822 # $opt::pipe
2823 # @opt::a
2824 # Returns:
2825 # @argv_no_argsep = @::ARGV without ::: and :::: and following args
2826 my @new_argv = ();
2827 for(my $arg = shift @ARGV; @ARGV; $arg = shift @ARGV) {
2828 if($arg eq $Global::arg_sep
2830 $arg eq $Global::arg_sep."+"
2832 $arg eq $Global::arg_file_sep
2834 $arg eq $Global::arg_file_sep."+") {
2835 my $group_sep = $arg; # This group of arguments is args or argfiles
2836 my @group;
2837 while(defined ($arg = shift @ARGV)) {
2838 if($arg eq $Global::arg_sep
2840 $arg eq $Global::arg_sep."+"
2842 $arg eq $Global::arg_file_sep
2844 $arg eq $Global::arg_file_sep."+") {
2845 # exit while loop if finding new separator
2846 last;
2847 } else {
2848 # If not hitting ::: :::+ :::: or ::::+
2849 # Append it to the group
2850 push @group, $arg;
2853 my $is_linked = ($group_sep =~ /\+$/) ? 1 : 0;
2854 my $is_file = ($group_sep eq $Global::arg_file_sep
2856 $group_sep eq $Global::arg_file_sep."+");
2857 if($is_file) {
2858 # :::: / ::::+
2859 push @opt::linkinputsource, map { $is_linked } @group;
2860 } else {
2861 # ::: / :::+
2862 push @opt::linkinputsource, $is_linked;
2864 if($is_file
2865 or ($opt::internal_pipe_means_argfiles and $opt::pipe)
2867 # Group of file names on the command line.
2868 # Append args into -a
2869 push @opt::a, @group;
2870 } else {
2871 # Group of arguments on the command line.
2872 # Put them into a file.
2873 # Create argfile
2874 my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg");
2875 unlink($name);
2876 # Put args into argfile
2877 print $outfh map { $_,$/ } @group;
2878 seek $outfh, 0, 0;
2879 exit_if_disk_full();
2880 # Append filehandle to -a
2881 push @opt::a, $outfh;
2883 if(defined($arg)) {
2884 # $arg is ::: :::+ :::: or ::::+
2885 # so there is another group
2886 redo;
2887 } else {
2888 # $arg is undef -> @ARGV empty
2889 last;
2892 push @new_argv, $arg;
2894 # Output: @ARGV = command to run with options
2895 return @new_argv;
2898 sub cleanup() {
2899 # Returns: N/A
2900 unlink keys %Global::unlink;
2901 map { rmdir $_ } keys %Global::unlink;
2902 if(@opt::basefile and $opt::cleanup) { cleanup_basefile(); }
2903 for(keys %Global::sshmaster) {
2904 # If 'ssh -M's are running: kill them
2905 kill "TERM", $_;
2910 sub __QUOTING_ARGUMENTS_FOR_SHELL__() {}
2912 sub shell_quote(@) {
2913 # Input:
2914 # @strings = strings to be quoted
2915 # Returns:
2916 # @shell_quoted_strings = string quoted as needed by the shell
2917 return wantarray ? (map { Q($_) } @_) : (join" ",map { Q($_) } @_);
2920 sub shell_quote_scalar_rc($) {
2921 # Quote for the rc-shell
2922 my $a = $_[0];
2923 if(defined $a) {
2924 if(($a =~ s/'/''/g)
2926 ($a =~ s/[\n\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]+/'$&'/go)) {
2927 # A string was replaced
2928 # No need to test for "" or \0
2929 } elsif($a eq "") {
2930 $a = "''";
2931 } elsif($a eq "\0") {
2932 $a = "";
2935 return $a;
2938 sub shell_quote_scalar_csh($) {
2939 # Quote for (t)csh
2940 my $a = $_[0];
2941 if(defined $a) {
2942 # $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g;
2943 # This is 1% faster than the above
2944 if(($a =~ s/[\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]/\\$&/go)
2946 # quote newline in csh as \\\n
2947 ($a =~ s/[\n]/"\\\n"/go)) {
2948 # A string was replaced
2949 # No need to test for "" or \0
2950 } elsif($a eq "") {
2951 $a = "''";
2952 } elsif($a eq "\0") {
2953 $a = "";
2956 return $a;
2959 sub shell_quote_scalar_default($) {
2960 # Quote for other shells (Bourne compatibles)
2961 # Inputs:
2962 # $string = string to be quoted
2963 # Returns:
2964 # $shell_quoted = string quoted as needed by the shell
2965 my $s = $_[0];
2966 if($s =~ /[^-_.+a-z0-9\/]/i) {
2967 $s =~ s/'/'"'"'/g; # "-quote single quotes
2968 $s = "'$s'"; # '-quote entire string
2969 $s =~ s/^''//; # Remove unneeded '' at ends
2970 $s =~ s/''$//; # (faster than s/^''|''$//g)
2971 return $s;
2972 } elsif ($s eq "") {
2973 return "''";
2974 } else {
2975 # No quoting needed
2976 return $s;
2980 sub shell_quote_scalar($) {
2981 # Quote the string so the shell will not expand any special chars
2982 # Inputs:
2983 # $string = string to be quoted
2984 # Returns:
2985 # $shell_quoted = string quoted as needed by the shell
2987 # Speed optimization: Choose the correct shell_quote_scalar_*
2988 # and call that directly from now on
2989 no warnings 'redefine';
2990 if($Global::cshell) {
2991 # (t)csh
2992 *shell_quote_scalar = \&shell_quote_scalar_csh;
2993 } elsif($Global::shell =~ m:(^|/)rc$:) {
2994 # rc-shell
2995 *shell_quote_scalar = \&shell_quote_scalar_rc;
2996 } else {
2997 # other shells
2998 *shell_quote_scalar = \&shell_quote_scalar_default;
3000 # The sub is now redefined. Call it
3001 return shell_quote_scalar($_[0]);
3004 sub Q($) {
3005 # Q alias for ::shell_quote_scalar
3006 my $ret = shell_quote_scalar($_[0]);
3007 no warnings 'redefine';
3008 *Q = \&::shell_quote_scalar;
3009 return $ret;
3012 sub shell_quote_file($) {
3013 # Quote the string so shell will not expand any special chars
3014 # and prepend ./ if needed
3015 # Input:
3016 # $filename = filename to be shell quoted
3017 # Returns:
3018 # $quoted_filename = filename quoted with \ and ./ if needed
3019 my $a = shift;
3020 if(defined $a) {
3021 if($a =~ m:^/: or $a =~ m:^\./:) {
3022 # /abs/path or ./rel/path => skip
3023 } else {
3024 # rel/path => ./rel/path
3025 $a = "./".$a;
3028 return Q($a);
3031 sub shell_words(@) {
3032 # Input:
3033 # $string = shell line
3034 # Returns:
3035 # @shell_words = $string split into words as shell would do
3036 $Global::use{"Text::ParseWords"} ||= eval "use Text::ParseWords; 1;";
3037 return Text::ParseWords::shellwords(@_);
3040 sub perl_quote_scalar($) {
3041 # Quote the string so perl's eval will not expand any special chars
3042 # Inputs:
3043 # $string = string to be quoted
3044 # Returns:
3045 # $perl_quoted = string quoted with \ as needed by perl's eval
3046 my $a = $_[0];
3047 if(defined $a) {
3048 $a =~ s/[\\\"\$\@]/\\$&/go;
3050 return $a;
3053 # -w complains about prototype
3054 sub pQ($) {
3055 # pQ alias for ::perl_quote_scalar
3056 my $ret = perl_quote_scalar($_[0]);
3057 *pQ = \&::perl_quote_scalar;
3058 return $ret;
3061 sub unquote_printf() {
3062 # Convert \t \n \r \000 \0
3063 # Inputs:
3064 # $string = string with \t \n \r \num \0
3065 # Returns:
3066 # $replaced = string with TAB NEWLINE CR <ascii-num> NUL
3067 $_ = shift;
3068 s/\\t/\t/g;
3069 s/\\n/\n/g;
3070 s/\\r/\r/g;
3071 s/\\(\d\d\d)/eval 'sprintf "\\'.$1.'"'/ge;
3072 s/\\(\d)/eval 'sprintf "\\'.$1.'"'/ge;
3073 return $_;
3077 sub __FILEHANDLES__() {}
3080 sub save_stdin_stdout_stderr() {
3081 # Remember the original STDIN, STDOUT and STDERR
3082 # and file descriptors opened by the shell (e.g. 3>/tmp/foo)
3083 # Uses:
3084 # %Global::fd
3085 # $Global::original_stderr
3086 # $Global::original_stdin
3087 # Returns: N/A
3089 # TODO Disabled until we have an open3 that will take n filehandles
3090 # for my $fdno (1..61) {
3091 # # /dev/fd/62 and above are used by bash for <(cmd)
3092 # # Find file descriptors that are already opened (by the shell)
3093 # Only focus on stdout+stderr for now
3094 for my $fdno (1..2) {
3095 my $fh;
3096 # 2-argument-open is used to be compatible with old perl 5.8.0
3097 # bug #43570: Perl 5.8.0 creates 61 files
3098 if(open($fh,">&=$fdno")) {
3099 $Global::fd{$fdno}=$fh;
3102 open $Global::original_stderr, ">&", "STDERR" or
3103 ::die_bug("Can't dup STDERR: $!");
3104 open $Global::status_fd, ">&", "STDERR" or
3105 ::die_bug("Can't dup STDERR: $!");
3106 open $Global::original_stdin, "<&", "STDIN" or
3107 ::die_bug("Can't dup STDIN: $!");
3110 sub enough_file_handles() {
3111 # Check that we have enough filehandles available for starting
3112 # another job
3113 # Uses:
3114 # $opt::ungroup
3115 # %Global::fd
3116 # Returns:
3117 # 1 if ungrouped (thus not needing extra filehandles)
3118 # 0 if too few filehandles
3119 # 1 if enough filehandles
3120 if(not $opt::ungroup) {
3121 my %fh;
3122 my $enough_filehandles = 1;
3123 # perl uses 7 filehandles for something?
3124 # open3 uses 2 extra filehandles temporarily
3125 # We need a filehandle for each redirected file descriptor
3126 # (normally just STDOUT and STDERR)
3127 for my $i (1..(7+2+keys %Global::fd)) {
3128 $enough_filehandles &&= open($fh{$i}, "<", "/dev/null");
3130 for (values %fh) { close $_; }
3131 return $enough_filehandles;
3132 } else {
3133 # Ungrouped does not need extra file handles
3134 return 1;
3138 sub open_or_exit($) {
3139 # Open a file name or exit if the file cannot be opened
3140 # Inputs:
3141 # $file = filehandle or filename to open
3142 # Uses:
3143 # $Global::original_stdin
3144 # Returns:
3145 # $fh = file handle to read-opened file
3146 my $file = shift;
3147 if($file eq "-") {
3148 return ($Global::original_stdin || *STDIN);
3150 if(ref $file eq "GLOB") {
3151 # This is an open filehandle
3152 return $file;
3154 my $fh = gensym;
3155 if(not open($fh, "<", $file)) {
3156 ::error("Cannot open input file `$file': No such file or directory.");
3157 wait_and_exit(255);
3159 return $fh;
3162 sub set_fh_blocking($) {
3163 # Set filehandle as blocking
3164 # Inputs:
3165 # $fh = filehandle to be blocking
3166 # Returns:
3167 # N/A
3168 my $fh = shift;
3169 $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
3170 my $flags;
3171 # Get the current flags on the filehandle
3172 fcntl($fh, &F_GETFL, $flags) || die $!;
3173 # Remove non-blocking from the flags
3174 $flags &= ~&O_NONBLOCK;
3175 # Set the flags on the filehandle
3176 fcntl($fh, &F_SETFL, $flags) || die $!;
3179 sub set_fh_non_blocking($) {
3180 # Set filehandle as non-blocking
3181 # Inputs:
3182 # $fh = filehandle to be blocking
3183 # Returns:
3184 # N/A
3185 my $fh = shift;
3186 $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
3187 my $flags;
3188 # Get the current flags on the filehandle
3189 fcntl($fh, &F_GETFL, $flags) || die $!;
3190 # Add non-blocking to the flags
3191 $flags |= &O_NONBLOCK;
3192 # Set the flags on the filehandle
3193 fcntl($fh, &F_SETFL, $flags) || die $!;
3197 sub __RUNNING_THE_JOBS_AND_PRINTING_PROGRESS__() {}
3200 # Variable structure:
3202 # $Global::running{$pid} = Pointer to Job-object
3203 # @Global::virgin_jobs = Pointer to Job-object that have received no input
3204 # $Global::host{$sshlogin} = Pointer to SSHLogin-object
3205 # $Global::total_running = total number of running jobs
3206 # $Global::total_started = total jobs started
3207 # $Global::max_procs_file = filename if --jobs is given a filename
3208 # $Global::JobQueue = JobQueue object for the queue of jobs
3209 # $Global::timeoutq = queue of times where jobs timeout
3210 # $Global::newest_job = Job object of the most recent job started
3211 # $Global::newest_starttime = timestamp of $Global::newest_job
3212 # @Global::sshlogin
3213 # $Global::minimal_command_line_length = minimum length supported by all sshlogins
3214 # $Global::start_no_new_jobs = should more jobs be started?
3215 # $Global::original_stderr = file handle for STDERR when the program started
3216 # $Global::total_started = total number of jobs started
3217 # $Global::joblog = filehandle of joblog
3218 # $Global::debug = Is debugging on?
3219 # $Global::exitstatus = status code of GNU Parallel
3220 # $Global::quoting = quote the command to run
3222 sub init_run_jobs() {
3223 # Set Global variables and progress signal handlers
3224 # Do the copying of basefiles
3225 # Returns: N/A
3226 $Global::total_running = 0;
3227 $Global::total_started = 0;
3228 $SIG{USR1} = \&list_running_jobs;
3229 $SIG{USR2} = \&toggle_progress;
3230 if(@opt::basefile) { setup_basefile(); }
3234 my $last_time;
3235 my %last_mtime;
3236 my $max_procs_file_last_mod;
3238 sub changed_procs_file {
3239 # If --jobs is a file and it is modfied:
3240 # Force recomputing of max_jobs_running for each $sshlogin
3241 # Uses:
3242 # $Global::max_procs_file
3243 # %Global::host
3244 # Returns: N/A
3245 if($Global::max_procs_file) {
3246 # --jobs filename
3247 my $mtime = (stat($Global::max_procs_file))[9];
3248 $max_procs_file_last_mod ||= 0;
3249 if($mtime > $max_procs_file_last_mod) {
3250 # file changed: Force re-computing max_jobs_running
3251 $max_procs_file_last_mod = $mtime;
3252 for my $sshlogin (values %Global::host) {
3253 $sshlogin->set_max_jobs_running(undef);
3259 sub changed_sshloginfile {
3260 # If --slf is changed:
3261 # reload --slf
3262 # filter_hosts
3263 # setup_basefile
3264 # Uses:
3265 # @opt::sshloginfile
3266 # @Global::sshlogin
3267 # %Global::host
3268 # $opt::filter_hosts
3269 # Returns: N/A
3270 if(@opt::sshloginfile) {
3271 # Is --sshloginfile changed?
3272 for my $slf (@opt::sshloginfile) {
3273 my $actual_file = expand_slf_shorthand($slf);
3274 my $mtime = (stat($actual_file))[9];
3275 $last_mtime{$actual_file} ||= $mtime;
3276 if($mtime - $last_mtime{$actual_file} > 1) {
3277 ::debug("run","--sshloginfile $actual_file changed. reload\n");
3278 $last_mtime{$actual_file} = $mtime;
3279 # Reload $slf
3280 # Empty sshlogins
3281 @Global::sshlogin = ();
3282 for (values %Global::host) {
3283 # Don't start new jobs on any host
3284 # except the ones added back later
3285 $_->set_max_jobs_running(0);
3287 # This will set max_jobs_running on the SSHlogins
3288 read_sshloginfile($actual_file);
3289 parse_sshlogin();
3290 $opt::filter_hosts and filter_hosts();
3291 setup_basefile();
3297 sub start_more_jobs {
3298 # Run start_another_job() but only if:
3299 # * not $Global::start_no_new_jobs set
3300 # * not JobQueue is empty
3301 # * not load on server is too high
3302 # * not server swapping
3303 # * not too short time since last remote login
3304 # Uses:
3305 # %Global::host
3306 # $Global::start_no_new_jobs
3307 # $Global::JobQueue
3308 # $opt::pipe
3309 # $opt::load
3310 # $opt::noswap
3311 # $opt::delay
3312 # $Global::newest_starttime
3313 # Returns:
3314 # $jobs_started = number of jobs started
3315 my $jobs_started = 0;
3316 if($Global::start_no_new_jobs) {
3317 return $jobs_started;
3319 if(time - ($last_time||0) > 1) {
3320 # At most do this every second
3321 $last_time = time;
3322 changed_procs_file();
3323 changed_sshloginfile();
3325 # This will start 1 job on each --sshlogin (if possible)
3326 # thus distribute the jobs on the --sshlogins round robin
3327 for my $sshlogin (values %Global::host) {
3328 if($Global::JobQueue->empty() and not $opt::pipe) {
3329 # No more jobs in the queue
3330 last;
3332 debug("run", "Running jobs before on ", $sshlogin->string(), ": ",
3333 $sshlogin->jobs_running(), "\n");
3334 if ($sshlogin->jobs_running() < $sshlogin->max_jobs_running()) {
3335 if($opt::delay
3337 $opt::delay - 0.008 > ::now() - $Global::newest_starttime) {
3338 # It has been too short since last start
3339 next;
3341 if($opt::load and $sshlogin->loadavg_too_high()) {
3342 # The load is too high or unknown
3343 next;
3345 if($opt::noswap and $sshlogin->swapping()) {
3346 # The server is swapping
3347 next;
3349 if($opt::limit and $sshlogin->limit()) {
3350 # Over limit
3351 next;
3353 if(($opt::memfree or $opt::memsuspend)
3355 $sshlogin->memfree() < $Global::memlimit) {
3356 # The server has not enough mem free
3357 ::debug("mem", "Not starting job: not enough mem\n");
3358 next;
3360 if($sshlogin->too_fast_remote_login()) {
3361 # It has been too short since
3362 next;
3364 debug("run", $sshlogin->string(),
3365 " has ", $sshlogin->jobs_running(),
3366 " out of ", $sshlogin->max_jobs_running(),
3367 " jobs running. Start another.\n");
3368 if(start_another_job($sshlogin) == 0) {
3369 # No more jobs to start on this $sshlogin
3370 debug("run","No jobs started on ",
3371 $sshlogin->string(), "\n");
3372 next;
3374 $sshlogin->inc_jobs_running();
3375 $sshlogin->set_last_login_at(::now());
3376 $jobs_started++;
3378 debug("run","Running jobs after on ", $sshlogin->string(), ": ",
3379 $sshlogin->jobs_running(), " of ",
3380 $sshlogin->max_jobs_running(), "\n");
3383 return $jobs_started;
3388 my $no_more_file_handles_warned;
3390 sub start_another_job() {
3391 # If there are enough filehandles
3392 # and JobQueue not empty
3393 # and not $job is in joblog
3394 # Then grab a job from Global::JobQueue,
3395 # start it at sshlogin
3396 # mark it as virgin_job
3397 # Inputs:
3398 # $sshlogin = the SSHLogin to start the job on
3399 # Uses:
3400 # $Global::JobQueue
3401 # $opt::pipe
3402 # $opt::results
3403 # $opt::resume
3404 # @Global::virgin_jobs
3405 # Returns:
3406 # 1 if another jobs was started
3407 # 0 otherwise
3408 my $sshlogin = shift;
3409 # Do we have enough file handles to start another job?
3410 if(enough_file_handles()) {
3411 if($Global::JobQueue->empty() and not $opt::pipe) {
3412 # No more commands to run
3413 debug("start", "Not starting: JobQueue empty\n");
3414 return 0;
3415 } else {
3416 my $job;
3417 # Skip jobs already in job log
3418 # Skip jobs already in results
3419 do {
3420 $job = get_job_with_sshlogin($sshlogin);
3421 if(not defined $job) {
3422 # No command available for that sshlogin
3423 debug("start", "Not starting: no jobs available for ",
3424 $sshlogin->string(), "\n");
3425 return 0;
3427 if($job->is_already_in_joblog()) {
3428 $job->free_slot();
3430 } while ($job->is_already_in_joblog()
3432 ($opt::results and $opt::resume and $job->is_already_in_results()));
3433 debug("start", "Command to run on '", $job->sshlogin()->string(), "': '",
3434 $job->replaced(),"'\n");
3435 if($job->start()) {
3436 if($opt::pipe) {
3437 if($job->virgin()) {
3438 push(@Global::virgin_jobs,$job);
3439 } else {
3440 # Block already set: This is a retry
3441 if(fork()) {
3442 ::debug("pipe","\n\nWriting ",length ${$job->block_ref()},
3443 " to ", $job->seq(),"\n");
3444 close $job->fh(0,"w");
3445 } else {
3446 $job->write($job->block_ref());
3447 close $job->fh(0,"w");
3448 exit(0);
3452 debug("start", "Started as seq ", $job->seq(),
3453 " pid:", $job->pid(), "\n");
3454 return 1;
3455 } else {
3456 # Not enough processes to run the job.
3457 # Put it back on the queue.
3458 $Global::JobQueue->unget($job);
3459 # Count down the number of jobs to run for this SSHLogin.
3460 my $max = $sshlogin->max_jobs_running();
3461 if($max > 1) { $max--; } else {
3462 my @arg;
3463 for my $record (@{$job->{'commandline'}->{'arg_list'}}) {
3464 push @arg, map { $_->orig() } @$record;
3466 ::error("No more processes: cannot run a single job. Something is wrong at @arg.");
3467 ::wait_and_exit(255);
3469 $sshlogin->set_max_jobs_running($max);
3470 # Sleep up to 300 ms to give other processes time to die
3471 ::usleep(rand()*300);
3472 ::warning("No more processes: ".
3473 "Decreasing number of running jobs to $max.",
3474 "Raising ulimit -u or /etc/security/limits.conf may help.");
3475 return 0;
3478 } else {
3479 # No more file handles
3480 $no_more_file_handles_warned++ or
3481 ::warning("No more file handles. ",
3482 "Raising ulimit -n or /etc/security/limits.conf may help.");
3483 debug("start", "No more file handles. ");
3484 return 0;
3489 sub init_progress() {
3490 # Uses:
3491 # $opt::bar
3492 # Returns:
3493 # list of computers for progress output
3494 $|=1;
3495 if($opt::bar) {
3496 return("","");
3498 my %progress = progress();
3499 return ("\nComputers / CPU cores / Max jobs to run\n",
3500 $progress{'workerlist'});
3503 sub drain_job_queue(@) {
3504 # Uses:
3505 # $opt::progress
3506 # $Global::total_running
3507 # $Global::max_jobs_running
3508 # %Global::running
3509 # $Global::JobQueue
3510 # %Global::host
3511 # $Global::start_no_new_jobs
3512 # Returns: N/A
3513 my @command = @_;
3514 if($opt::progress) {
3515 ::status_no_nl(init_progress());
3517 my $last_header = "";
3518 my $sleep = 0.2;
3519 do {
3520 while($Global::total_running > 0) {
3521 debug("init",$Global::total_running, "==", scalar
3522 keys %Global::running," slots: ", $Global::max_jobs_running);
3523 if($opt::pipe) {
3524 # When using --pipe sometimes file handles are not
3525 # closed properly
3526 for my $job (values %Global::running) {
3527 close $job->fh(0,"w");
3530 if($opt::progress) {
3531 my %progress = progress();
3532 if($last_header ne $progress{'header'}) {
3533 ::status("", $progress{'header'});
3534 $last_header = $progress{'header'};
3536 ::status_no_nl("\r",$progress{'status'});
3538 if($Global::total_running < $Global::max_jobs_running
3539 and not $Global::JobQueue->empty()) {
3540 # These jobs may not be started because of loadavg
3541 # or too little time between each ssh login.
3542 if(start_more_jobs() > 0) {
3543 # Exponential back-on if jobs were started
3544 $sleep = $sleep/2+0.001;
3547 # Exponential back-off sleeping
3548 $sleep = ::reap_usleep($sleep);
3550 if(not $Global::JobQueue->empty()) {
3551 # These jobs may not be started:
3552 # * because there the --filter-hosts has removed all
3553 if(not %Global::host) {
3554 ::error("There are no hosts left to run on.");
3555 ::wait_and_exit(255);
3557 # * because of loadavg
3558 # * because of too little time between each ssh login.
3559 $sleep = ::reap_usleep($sleep);
3560 start_more_jobs();
3561 if($Global::max_jobs_running == 0) {
3562 ::warning("There are no job slots available. Increase --jobs.");
3565 while($opt::sqlmaster and not $Global::sql->finished()) {
3566 # SQL master
3567 $sleep = ::reap_usleep($sleep);
3568 start_more_jobs();
3569 if($Global::start_sqlworker) {
3570 # Start an SQL worker as we are now sure there is work to do
3571 $Global::start_sqlworker = 0;
3572 if(my $pid = fork()) {
3573 $Global::unkilled_sqlworker = $pid;
3574 } else {
3575 # Replace --sql/--sqlandworker with --sqlworker
3576 my @ARGV = map { s/^--sql(andworker)?$/--sqlworker/; $_ } @Global::options_in_argv;
3577 # exec the --sqlworker
3578 exec($0,@ARGV,@command);
3582 } while ($Global::total_running > 0
3584 not $Global::start_no_new_jobs and not $Global::JobQueue->empty()
3586 $opt::sqlmaster and not $Global::sql->finished());
3587 if($opt::progress) {
3588 my %progress = progress();
3589 ::status("\r".$progress{'status'});
3593 sub toggle_progress() {
3594 # Turn on/off progress view
3595 # Uses:
3596 # $opt::progress
3597 # Returns: N/A
3598 $opt::progress = not $opt::progress;
3599 if($opt::progress) {
3600 ::status_no_nl(init_progress());
3604 sub progress() {
3605 # Uses:
3606 # $opt::bar
3607 # $opt::eta
3608 # %Global::host
3609 # $Global::total_started
3610 # Returns:
3611 # $workerlist = list of workers
3612 # $header = that will fit on the screen
3613 # $status = message that will fit on the screen
3614 if($opt::bar) {
3615 return ("workerlist" => "", "header" => "", "status" => bar());
3617 my $eta = "";
3618 my ($status,$header)=("","");
3619 if($opt::eta) {
3620 my($total, $completed, $left, $pctcomplete, $avgtime, $this_eta) =
3621 compute_eta();
3622 $eta = sprintf("ETA: %ds Left: %d AVG: %.2fs ",
3623 $this_eta, $left, $avgtime);
3625 my $termcols = terminal_columns();
3626 my @workers = sort keys %Global::host;
3627 my %sshlogin = map { $_ eq ":" ? ($_ => "local") : ($_ => $_) } @workers;
3628 my $workerno = 1;
3629 my %workerno = map { ($_=>$workerno++) } @workers;
3630 my $workerlist = "";
3631 for my $w (@workers) {
3632 $workerlist .=
3633 $workerno{$w}.":".$sshlogin{$w} ." / ".
3634 ($Global::host{$w}->ncpus() || "-")." / ".
3635 $Global::host{$w}->max_jobs_running()."\n";
3637 $status = "x"x($termcols+1);
3638 # Select an output format that will fit on a single line
3639 if(length $status > $termcols) {
3640 # sshlogin1:XX/XX/XX%/XX.Xs sshlogin2:XX/XX/XX%/XX.Xs sshlogin3:XX/XX/XX%/XX.Xs
3641 $header = "Computer:jobs running/jobs completed/%of started jobs/Average seconds to complete";
3642 $status = $eta .
3643 join(" ",map
3645 if($Global::total_started) {
3646 my $completed = ($Global::host{$_}->jobs_completed()||0);
3647 my $running = $Global::host{$_}->jobs_running();
3648 my $time = $completed ? (time-$^T)/($completed) : "0";
3649 sprintf("%s:%d/%d/%d%%/%.1fs ",
3650 $sshlogin{$_}, $running, $completed,
3651 ($running+$completed)*100
3652 / $Global::total_started, $time);
3654 } @workers);
3656 if(length $status > $termcols) {
3657 # 1:XX/XX/XX%/XX.Xs 2:XX/XX/XX%/XX.Xs 3:XX/XX/XX%/XX.Xs 4:XX/XX/XX%/XX.Xs
3658 $header = "Computer:jobs running/jobs completed/%of started jobs";
3659 $status = $eta .
3660 join(" ",map
3662 if($Global::total_started) {
3663 my $completed = ($Global::host{$_}->jobs_completed()||0);
3664 my $running = $Global::host{$_}->jobs_running();
3665 my $time = $completed ? (time-$^T)/($completed) : "0";
3666 sprintf("%s:%d/%d/%d%%/%.1fs ",
3667 $workerno{$_}, $running, $completed,
3668 ($running+$completed)*100
3669 / $Global::total_started, $time);
3671 } @workers);
3673 if(length $status > $termcols) {
3674 # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX/XX%
3675 $header = "Computer:jobs running/jobs completed/%of started jobs";
3676 $status = $eta .
3677 join(" ",map
3679 if($Global::total_started) {
3680 sprintf("%s:%d/%d/%d%%",
3681 $sshlogin{$_},
3682 $Global::host{$_}->jobs_running(),
3683 ($Global::host{$_}->jobs_completed()||0),
3684 ($Global::host{$_}->jobs_running()+
3685 ($Global::host{$_}->jobs_completed()||0))*100
3686 / $Global::total_started)
3689 @workers);
3691 if(length $status > $termcols) {
3692 # 1:XX/XX/XX% 2:XX/XX/XX% 3:XX/XX/XX% 4:XX/XX/XX% 5:XX/XX/XX% 6:XX/XX/XX%
3693 $header = "Computer:jobs running/jobs completed/%of started jobs";
3694 $status = $eta .
3695 join(" ",map
3697 if($Global::total_started) {
3698 sprintf("%s:%d/%d/%d%%",
3699 $workerno{$_},
3700 $Global::host{$_}->jobs_running(),
3701 ($Global::host{$_}->jobs_completed()||0),
3702 ($Global::host{$_}->jobs_running()+
3703 ($Global::host{$_}->jobs_completed()||0))*100
3704 / $Global::total_started)
3707 @workers);
3709 if(length $status > $termcols) {
3710 # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX sshlogin4:XX/XX
3711 $header = "Computer:jobs running/jobs completed";
3712 $status = $eta .
3713 join(" ",map
3714 { sprintf("%s:%d/%d",
3715 $sshlogin{$_}, $Global::host{$_}->jobs_running(),
3716 ($Global::host{$_}->jobs_completed()||0)) }
3717 @workers);
3719 if(length $status > $termcols) {
3720 # sshlogin1:XX/XX sshlogin2:XX/XX sshlogin3:XX/XX sshlogin4:XX/XX
3721 $header = "Computer:jobs running/jobs completed";
3722 $status = $eta .
3723 join(" ",map
3724 { sprintf("%s:%d/%d",
3725 $sshlogin{$_}, $Global::host{$_}->jobs_running(),
3726 ($Global::host{$_}->jobs_completed()||0)) }
3727 @workers);
3729 if(length $status > $termcols) {
3730 # 1:XX/XX 2:XX/XX 3:XX/XX 4:XX/XX 5:XX/XX 6:XX/XX
3731 $header = "Computer:jobs running/jobs completed";
3732 $status = $eta .
3733 join(" ",map
3734 { sprintf("%s:%d/%d",
3735 $workerno{$_}, $Global::host{$_}->jobs_running(),
3736 ($Global::host{$_}->jobs_completed()||0)) }
3737 @workers);
3739 if(length $status > $termcols) {
3740 # sshlogin1:XX sshlogin2:XX sshlogin3:XX sshlogin4:XX sshlogin5:XX
3741 $header = "Computer:jobs completed";
3742 $status = $eta .
3743 join(" ",map
3744 { sprintf("%s:%d",
3745 $sshlogin{$_},
3746 ($Global::host{$_}->jobs_completed()||0)) }
3747 @workers);
3749 if(length $status > $termcols) {
3750 # 1:XX 2:XX 3:XX 4:XX 5:XX 6:XX
3751 $header = "Computer:jobs completed";
3752 $status = $eta .
3753 join(" ",map
3754 { sprintf("%s:%d",
3755 $workerno{$_},
3756 ($Global::host{$_}->jobs_completed()||0)) }
3757 @workers);
3759 return ("workerlist" => $workerlist, "header" => $header, "status" => $status);
3764 my ($first_completed, $smoothed_avg_time, $last_eta);
3766 sub compute_eta {
3767 # Calculate important numbers for ETA
3768 # Returns:
3769 # $total = number of jobs in total
3770 # $completed = number of jobs completed
3771 # $left = number of jobs left
3772 # $pctcomplete = percent of jobs completed
3773 # $avgtime = averaged time
3774 # $eta = smoothed eta
3775 my $completed = $Global::total_completed;
3776 # In rare cases with -X will $completed > total_jobs()
3777 my $total = ::max($Global::JobQueue->total_jobs(),$completed);
3778 my $left = $total - $completed;
3779 if(not $completed) {
3780 return($total, $completed, $left, 0, 0, 0);
3782 my $pctcomplete = ::min($completed / $total,100);
3783 $first_completed ||= time;
3784 my $timepassed = (time - $first_completed);
3785 my $avgtime = $timepassed / $completed;
3786 $smoothed_avg_time ||= $avgtime;
3787 # Smooth the eta so it does not jump wildly
3788 $smoothed_avg_time = (1 - $pctcomplete) * $smoothed_avg_time +
3789 $pctcomplete * $avgtime;
3790 my $eta = int($left * $smoothed_avg_time);
3791 if($eta*0.90 < $last_eta and $last_eta < $eta) {
3792 # Eta jumped less that 10% up: Keep the last eta instead
3793 $eta = $last_eta;
3794 } else {
3795 $last_eta = $eta;
3797 return($total, $completed, $left, $pctcomplete, $avgtime, $eta);
3802 my ($rev,$reset);
3804 sub bar() {
3805 # Return:
3806 # $status = bar with eta, completed jobs, arg and pct
3807 $rev ||= "\033[7m";
3808 $reset ||= "\033[0m";
3809 my($total, $completed, $left, $pctcomplete, $avgtime, $eta) =
3810 compute_eta();
3811 my $arg = $Global::newest_job ?
3812 $Global::newest_job->{'commandline'}->
3813 replace_placeholders(["\257<\257>"],0,0) : "";
3814 # These chars mess up display in the terminal
3815 $arg =~ tr/[\011-\016\033\302-\365]//d;
3816 my $eta_dhms = ::seconds_to_time_units($eta);
3817 my $bar_text =
3818 sprintf("%d%% %d:%d=%s %s",
3819 $pctcomplete*100, $completed, $left, $eta_dhms, $arg);
3820 my $terminal_width = terminal_columns();
3821 my $s = sprintf("%-${terminal_width}s",
3822 substr($bar_text." "x$terminal_width,
3823 0,$terminal_width));
3824 my $width = int($terminal_width * $pctcomplete);
3825 substr($s,$width,0) = $reset;
3826 my $zenity = sprintf("%-${terminal_width}s",
3827 substr("# $eta sec $arg",
3828 0,$terminal_width));
3829 $s = "\r" . $zenity . "\r" . $pctcomplete*100 . # Prefix with zenity header
3830 "\r" . $rev . $s . $reset;
3831 return $s;
3836 my ($columns,$last_column_time);
3838 sub terminal_columns() {
3839 # Get the number of columns of the terminal.
3840 # Only update once per second.
3841 # Returns:
3842 # number of columns of the screen
3843 if(not $columns or $last_column_time < time) {
3844 $last_column_time = time;
3845 $columns = $ENV{'COLUMNS'};
3846 if(not $columns) {
3847 # && true is to force spawning a shell and not just exec'ing
3848 my $stty = qx{stty -a </dev/tty 2>/dev/null && true};
3849 # FreeBSD/OpenBSD/NetBSD/Dragonfly/MirOS
3850 # MacOSX/IRIX/AIX/Tru64
3851 $stty =~ /(\d+) columns/ and do { $columns = $1; };
3852 # GNU/Linux/Solaris
3853 $stty =~ /columns (\d+)/ and do { $columns = $1; };
3854 # Solaris-x86/HPUX/SCOsysV/UnixWare/OpenIndiana
3855 $stty =~ /columns = (\d+)/ and do { $columns = $1; };
3856 # QNX
3857 $stty =~ /rows=\d+,(\d+)/ and do { $columns = $1; };
3859 if(not $columns) {
3860 # && true is to force spawning a shell and not just exec'ing
3861 my $resize = qx{resize 2>/dev/null && true};
3862 $resize =~ /COLUMNS=(\d+);/ and do { $columns = $1; };
3864 $columns ||= 80;
3866 return $columns;
3870 # Prototype forwarding
3871 sub get_job_with_sshlogin($);
3872 sub get_job_with_sshlogin($) {
3873 # Input:
3874 # $sshlogin = which host should the job be run on?
3875 # Uses:
3876 # $opt::hostgroups
3877 # $Global::JobQueue
3878 # Returns:
3879 # $job = next job object for $sshlogin if any available
3880 my $sshlogin = shift;
3881 my $job;
3883 if ($opt::hostgroups) {
3884 my @other_hostgroup_jobs = ();
3886 while($job = $Global::JobQueue->get()) {
3887 if($sshlogin->in_hostgroups($job->hostgroups())) {
3888 # Found a job to be run on a hostgroup of this
3889 # $sshlogin
3890 last;
3891 } else {
3892 # This job was not in the hostgroups of $sshlogin
3893 push @other_hostgroup_jobs, $job;
3896 $Global::JobQueue->unget(@other_hostgroup_jobs);
3897 if(not defined $job) {
3898 # No more jobs
3899 return undef;
3901 } else {
3902 $job = $Global::JobQueue->get();
3903 if(not defined $job) {
3904 # No more jobs
3905 ::debug("start", "No more jobs: JobQueue empty\n");
3906 return undef;
3909 $job->set_sshlogin($sshlogin);
3910 if($opt::retries and $job->failed_here()) {
3911 # This command with these args failed for this sshlogin
3912 my ($no_of_failed_sshlogins,$min_failures) = $job->min_failed();
3913 # Only look at the Global::host that have > 0 jobslots
3914 if($no_of_failed_sshlogins ==
3915 grep { $_->max_jobs_running() > 0 } values %Global::host
3916 and $job->failed_here() == $min_failures) {
3917 # It failed the same or more times on another host:
3918 # run it on this host
3919 } else {
3920 # If it failed fewer times on another host:
3921 # Find another job to run
3922 my $nextjob;
3923 if(not $Global::JobQueue->empty()) {
3924 # This can potentially recurse for all args
3925 no warnings 'recursion';
3926 $nextjob = get_job_with_sshlogin($sshlogin);
3928 # Push the command back on the queue
3929 $Global::JobQueue->unget($job);
3930 return $nextjob;
3933 return $job;
3937 sub __REMOTE_SSH__() {}
3940 sub read_sshloginfiles(@) {
3941 # Read a list of --slf's
3942 # Input:
3943 # @files = files or symbolic file names to read
3944 # Returns: N/A
3945 for my $s (@_) {
3946 read_sshloginfile(expand_slf_shorthand($s));
3950 sub expand_slf_shorthand($) {
3951 # Expand --slf shorthand into a read file name
3952 # Input:
3953 # $file = file or symbolic file name to read
3954 # Returns:
3955 # $file = actual file name to read
3956 my $file = shift;
3957 if($file eq "-") {
3958 # skip: It is stdin
3959 } elsif($file eq "..") {
3960 $file = $Global::config_dir."/sshloginfile";
3961 } elsif($file eq ".") {
3962 $file = "/etc/parallel/sshloginfile";
3963 } elsif(not -r $file) {
3964 for(@Global::config_dirs) {
3965 if(not -r $_."/".$file) {
3966 # Try prepending $PARALLEL_HOME
3967 ::error("Cannot open $file.");
3968 ::wait_and_exit(255);
3969 } else {
3970 $file = $_."/".$file;
3971 last;
3975 return $file;
3978 sub read_sshloginfile($) {
3979 # Read sshloginfile into @Global::sshlogin
3980 # Input:
3981 # $file = file to read
3982 # Uses:
3983 # @Global::sshlogin
3984 # Returns: N/A
3985 local $/ = "\n";
3986 my $file = shift;
3987 my $close = 1;
3988 my $in_fh;
3989 ::debug("init","--slf ",$file);
3990 if($file eq "-") {
3991 $in_fh = *STDIN;
3992 $close = 0;
3993 } else {
3994 if(not open($in_fh, "<", $file)) {
3995 # Try the filename
3996 ::error("Cannot open $file.");
3997 ::wait_and_exit(255);
4000 while(<$in_fh>) {
4001 chomp;
4002 /^\s*#/ and next;
4003 /^\s*$/ and next;
4004 push @Global::sshlogin, $_;
4006 if($close) {
4007 close $in_fh;
4011 sub parse_sshlogin() {
4012 # Parse @Global::sshlogin into %Global::host.
4013 # Keep only hosts that are in one of the given ssh hostgroups.
4014 # Uses:
4015 # @Global::sshlogin
4016 # $Global::minimal_command_line_length
4017 # %Global::host
4018 # $opt::transfer
4019 # @opt::return
4020 # $opt::cleanup
4021 # @opt::basefile
4022 # @opt::trc
4023 # Returns: N/A
4024 my @login;
4025 if(not @Global::sshlogin) { @Global::sshlogin = (":"); }
4026 for my $sshlogin (@Global::sshlogin) {
4027 # Split up -S sshlogin,sshlogin
4028 for my $s (split /,|\n/, $sshlogin) {
4029 if ($s eq ".." or $s eq "-") {
4030 # This may add to @Global::sshlogin - possibly bug
4031 read_sshloginfile(expand_slf_shorthand($s));
4032 } else {
4033 $s =~ s/\s*$//;
4034 push (@login, $s);
4038 $Global::minimal_command_line_length = 100_000_000;
4039 my @allowed_hostgroups;
4040 for my $ncpu_sshlogin_string (::uniq(@login)) {
4041 my $sshlogin = SSHLogin->new($ncpu_sshlogin_string);
4042 my $sshlogin_string = $sshlogin->string();
4043 if($sshlogin_string eq "") {
4044 # This is an ssh group: -S @webservers
4045 push @allowed_hostgroups, $sshlogin->hostgroups();
4046 next;
4048 if($Global::host{$sshlogin_string}) {
4049 # This sshlogin has already been added:
4050 # It is probably a host that has come back
4051 # Set the max_jobs_running back to the original
4052 debug("run","Already seen $sshlogin_string\n");
4053 if($sshlogin->{'ncpus'}) {
4054 # If ncpus set by '#/' of the sshlogin, overwrite it:
4055 $Global::host{$sshlogin_string}->set_ncpus($sshlogin->ncpus());
4057 $Global::host{$sshlogin_string}->set_max_jobs_running(undef);
4058 next;
4060 $sshlogin->set_maxlength(Limits::Command::max_length());
4062 $Global::minimal_command_line_length =
4063 ::min($Global::minimal_command_line_length, $sshlogin->maxlength());
4064 $Global::host{$sshlogin_string} = $sshlogin;
4066 if(@allowed_hostgroups) {
4067 # Remove hosts that are not in these groups
4068 while (my ($string, $sshlogin) = each %Global::host) {
4069 if(not $sshlogin->in_hostgroups(@allowed_hostgroups)) {
4070 delete $Global::host{$string};
4075 # debug("start", "sshlogin: ", my_dump(%Global::host),"\n");
4076 if(@Global::transfer_files or @opt::return or $opt::cleanup or @opt::basefile) {
4077 if(not remote_hosts()) {
4078 # There are no remote hosts
4079 if(@opt::trc) {
4080 ::warning("--trc ignored as there are no remote --sshlogin.");
4081 } elsif (defined $opt::transfer) {
4082 ::warning("--transfer ignored as there are no remote --sshlogin.");
4083 } elsif (@opt::transfer_files) {
4084 ::warning("--transferfile ignored as there are no remote --sshlogin.");
4085 } elsif (@opt::return) {
4086 ::warning("--return ignored as there are no remote --sshlogin.");
4087 } elsif (defined $opt::cleanup) {
4088 ::warning("--cleanup ignored as there are no remote --sshlogin.");
4089 } elsif (@opt::basefile) {
4090 ::warning("--basefile ignored as there are no remote --sshlogin.");
4096 sub remote_hosts() {
4097 # Return sshlogins that are not ':'
4098 # Uses:
4099 # %Global::host
4100 # Returns:
4101 # list of sshlogins with ':' removed
4102 return grep !/^:$/, keys %Global::host;
4105 sub setup_basefile() {
4106 # Transfer basefiles to each $sshlogin
4107 # This needs to be done before first jobs on $sshlogin is run
4108 # Uses:
4109 # %Global::host
4110 # @opt::basefile
4111 # Returns: N/A
4112 my @cmd;
4113 my $rsync_destdir;
4114 my $workdir;
4115 for my $sshlogin (values %Global::host) {
4116 if($sshlogin->string() eq ":") { next }
4117 for my $file (@opt::basefile) {
4118 if($file !~ m:^/: and $opt::workdir eq "...") {
4119 ::error("Work dir '...' will not work with relative basefiles.");
4120 ::wait_and_exit(255);
4122 if(not $workdir) {
4123 my $dummycmdline = CommandLine->new(1,["true"],{},0,0,[],[],{},{},{});
4124 my $dummyjob = Job->new($dummycmdline);
4125 $workdir = $dummyjob->workdir();
4127 push @cmd, $sshlogin->rsync_transfer_cmd($file,$workdir);
4130 debug("init", "basesetup: @cmd\n");
4131 my ($exitstatus,$stdout_ref,$stderr_ref) =
4132 run_gnu_parallel((join "\n",@cmd),"-j0","--retries",5);
4133 if($exitstatus) {
4134 my @stdout = @$stdout_ref;
4135 my @stderr = @$stderr_ref;
4136 ::error("Copying of --basefile failed: @stdout@stderr");
4137 ::wait_and_exit(255);
4141 sub cleanup_basefile() {
4142 # Remove the basefiles transferred
4143 # Uses:
4144 # %Global::host
4145 # @opt::basefile
4146 # Returns: N/A
4147 my @cmd;
4148 my $workdir;
4149 if(not $workdir) {
4150 my $dummycmdline = CommandLine->new(1,"true",0,0,0,0,0,{},{},{});
4151 my $dummyjob = Job->new($dummycmdline);
4152 $workdir = $dummyjob->workdir();
4154 for my $sshlogin (values %Global::host) {
4155 if($sshlogin->string() eq ":") { next }
4156 for my $file (@opt::basefile) {
4157 push @cmd, $sshlogin->cleanup_cmd($file,$workdir);
4160 debug("init", "basecleanup: @cmd\n");
4161 my ($exitstatus,$stdout_ref,$stderr_ref) =
4162 run_gnu_parallel(join("\n",@cmd),"-j0","--retries",5);
4163 if($exitstatus) {
4164 my @stdout = @$stdout_ref;
4165 my @stderr = @$stderr_ref;
4166 ::error("Cleanup of --basefile failed: @stdout@stderr");
4167 ::wait_and_exit(255);
4171 sub run_gnu_parallel() {
4172 my ($stdin,@args) = @_;
4173 my $cmd = join "",map { " $_ & " } split /\n/, $stdin;
4174 print $Global::original_stderr ` $cmd wait` ;
4175 return 0
4178 sub _run_gnu_parallel() {
4179 # Run GNU Parallel
4180 # This should ideally just fork an internal copy
4181 # and not start it through a shell
4182 # Input:
4183 # $stdin = data to provide on stdin for GNU Parallel
4184 # @args = command line arguments
4185 # Returns:
4186 # $exitstatus = exitcode of GNU Parallel run
4187 # \@stdout = standard output
4188 # \@stderr = standard error
4189 my ($stdin,@args) = @_;
4190 my ($exitstatus,@stdout,@stderr);
4191 my ($stdin_fh,$stdout_fh)=(gensym(),gensym());
4192 my ($stderr_fh, $stderrname) = ::tmpfile(SUFFIX => ".par");
4193 unlink $stderrname;
4195 my $pid = ::open3($stdin_fh,$stdout_fh,$stderr_fh,
4196 $0,qw(--plain --shell /bin/sh --will-cite), @args);
4197 if(my $writerpid = fork()) {
4198 close $stdin_fh;
4199 @stdout = <$stdout_fh>;
4200 # Now stdout is closed:
4201 # These pids should be dead or die very soon
4202 while(kill 0, $writerpid) { ::usleep(1); }
4203 die;
4204 # reap $writerpid;
4205 # while(kill 0, $pid) { ::usleep(1); }
4206 # reap $writerpid;
4207 $exitstatus = $?;
4208 seek $stderr_fh, 0, 0;
4209 @stderr = <$stderr_fh>;
4210 close $stdout_fh;
4211 close $stderr_fh;
4212 } else {
4213 close $stdout_fh;
4214 close $stderr_fh;
4215 print $stdin_fh $stdin;
4216 close $stdin_fh;
4217 exit(0);
4219 return ($exitstatus,\@stdout,\@stderr);
4222 sub filter_hosts() {
4223 # Remove down --sshlogins from active duty.
4224 # Find ncpus, ncores, maxlen, time-to-login for each host.
4225 # Uses:
4226 # %Global::host
4227 # $Global::minimal_command_line_length
4228 # $opt::use_sockets_instead_of_threads
4229 # $opt::use_cores_instead_of_threads
4230 # $opt::use_cpus_instead_of_cores
4231 # Returns: N/A
4233 my ($nsockets_ref,$ncores_ref, $nthreads_ref, $time_to_login_ref,
4234 $maxlen_ref, $echo_ref, $down_hosts_ref) =
4235 parse_host_filtering(parallelized_host_filtering());
4237 delete @Global::host{@$down_hosts_ref};
4238 @$down_hosts_ref and ::warning("Removed @$down_hosts_ref.");
4240 $Global::minimal_command_line_length = 100_000_000;
4241 while (my ($sshlogin, $obj) = each %Global::host) {
4242 if($sshlogin eq ":") { next }
4243 $nsockets_ref->{$sshlogin} or
4244 ::die_bug("nsockets missing: ".$obj->serverlogin());
4245 $ncores_ref->{$sshlogin} or
4246 ::die_bug("ncores missing: ".$obj->serverlogin());
4247 $nthreads_ref->{$sshlogin} or
4248 ::die_bug("nthreads missing: ".$obj->serverlogin());
4249 $time_to_login_ref->{$sshlogin} or
4250 ::die_bug("time_to_login missing: ".$obj->serverlogin());
4251 $maxlen_ref->{$sshlogin} or
4252 ::die_bug("maxlen missing: ".$obj->serverlogin());
4253 $obj->set_ncpus($nthreads_ref->{$sshlogin});
4254 if($opt::use_cpus_instead_of_cores) {
4255 $obj->set_ncpus($ncores_ref->{$sshlogin});
4256 } elsif($opt::use_sockets_instead_of_threads) {
4257 $obj->set_ncpus($nsockets_ref->{$sshlogin});
4258 } elsif($opt::use_cores_instead_of_threads) {
4259 $obj->set_ncpus($ncores_ref->{$sshlogin});
4261 $obj->set_time_to_login($time_to_login_ref->{$sshlogin});
4262 $obj->set_maxlength($maxlen_ref->{$sshlogin});
4263 $Global::minimal_command_line_length =
4264 ::min($Global::minimal_command_line_length,
4265 int($maxlen_ref->{$sshlogin}/2));
4266 ::debug("init", "Timing from -S:$sshlogin ",
4267 " nsockets:",$nsockets_ref->{$sshlogin},
4268 " ncores:", $ncores_ref->{$sshlogin},
4269 " nthreads:",$nthreads_ref->{$sshlogin},
4270 " time_to_login:", $time_to_login_ref->{$sshlogin},
4271 " maxlen:", $maxlen_ref->{$sshlogin},
4272 " min_max_len:", $Global::minimal_command_line_length,"\n");
4276 sub parse_host_filtering() {
4277 # Input:
4278 # @lines = output from parallelized_host_filtering()
4279 # Returns:
4280 # \%nsockets = number of sockets of {host}
4281 # \%ncores = number of cores of {host}
4282 # \%nthreads = number of hyperthreaded cores of {host}
4283 # \%time_to_login = time_to_login on {host}
4284 # \%maxlen = max command len on {host}
4285 # \%echo = echo received from {host}
4286 # \@down_hosts = list of hosts with no answer
4287 local $/ = "\n";
4288 my (%nsockets, %ncores, %nthreads, %time_to_login, %maxlen, %echo,
4289 @down_hosts);
4290 for (@_) {
4291 ::debug("init","Read: ",$_);
4292 chomp;
4293 my @col = split /\t/, $_;
4294 if($col[0] =~ /^parallel: Warning:/) {
4295 # Timed out job: Ignore it
4296 next;
4297 } elsif(defined $col[6]) {
4298 # This is a line from --joblog
4299 # seq host time spent sent received exit signal command
4300 # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores
4301 if($col[0] eq "Seq" and $col[1] eq "Host" and
4302 $col[2] eq "Starttime") {
4303 # Header => skip
4304 next;
4306 # Get server from: eval true server\;
4307 $col[8] =~ /eval .?true.?\s([^\;]+);/ or
4308 ::die_bug("col8 does not contain host: $col[8]");
4309 my $host = $1;
4310 $host =~ tr/\\//d;
4311 $Global::host{$host} or next;
4312 if($col[6] eq "255" or $col[6] eq "-1" or $col[6] eq "1") {
4313 # exit == 255 or exit == timeout (-1): ssh failed/timedout
4314 # exit == 1: lsh failed
4315 # Remove sshlogin
4316 ::debug("init", "--filtered $host\n");
4317 push(@down_hosts, $host);
4318 } elsif($col[6] eq "127") {
4319 # signal == 127: parallel not installed remote
4320 # Set nsockets, ncores, nthreads = 1
4321 ::warning("Could not figure out ".
4322 "number of cpus on $host. Using 1.");
4323 $nsockets{$host} = 1;
4324 $ncores{$host} = 1;
4325 $nthreads{$host} = 1;
4326 $maxlen{$host} = Limits::Command::max_length();
4327 } elsif($col[0] =~ /^\d+$/ and $Global::host{$host}) {
4328 # Remember how log it took to log in
4329 # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ echo
4330 $time_to_login{$host} = ::min($time_to_login{$host},$col[3]);
4331 } else {
4332 ::die_bug("host check unmatched long jobline: $_");
4334 } elsif($Global::host{$col[0]}) {
4335 # This output from --number-of-cores, --number-of-cpus,
4336 # --max-line-length-allowed
4337 # ncores: server 8
4338 # ncpus: server 2
4339 # maxlen: server 131071
4340 if(/parallel: Warning: Cannot figure out number of/) {
4341 next;
4343 if(not $nsockets{$col[0]}) {
4344 $nsockets{$col[0]} = $col[1];
4345 } elsif(not $ncores{$col[0]}) {
4346 $ncores{$col[0]} = $col[1];
4347 } elsif(not $nthreads{$col[0]}) {
4348 $nthreads{$col[0]} = $col[1];
4349 } elsif(not $maxlen{$col[0]}) {
4350 $maxlen{$col[0]} = $col[1];
4351 } elsif(not $echo{$col[0]}) {
4352 $echo{$col[0]} = $col[1];
4353 } elsif(m/perl: warning:|LANGUAGE =|LC_ALL =|LANG =|are supported and installed/) {
4354 # Skip these:
4355 # perl: warning: Setting locale failed.
4356 # perl: warning: Please check that your locale settings:
4357 # LANGUAGE = (unset),
4358 # LC_ALL = (unset),
4359 # LANG = "en_US.UTF-8"
4360 # are supported and installed on your system.
4361 # perl: warning: Falling back to the standard locale ("C").
4362 } else {
4363 ::die_bug("host check too many col0: $_");
4365 } else {
4366 ::die_bug("host check unmatched short jobline ($col[0]): $_");
4369 @down_hosts = uniq(@down_hosts);
4370 return(\%nsockets, \%ncores, \%nthreads, \%time_to_login,
4371 \%maxlen, \%echo, \@down_hosts);
4374 sub parallelized_host_filtering() {
4375 # Uses:
4376 # %Global::host
4377 # Returns:
4378 # text entries with:
4379 # * joblog line
4380 # * hostname \t number of cores
4381 # * hostname \t number of cpus
4382 # * hostname \t max-line-length-allowed
4383 # * hostname \t empty
4385 sub sshwrapped {
4386 # Wrap with ssh and --env
4387 # Return $default_value if command fails
4388 my $sshlogin = shift;
4389 my $command = shift;
4390 my $default_value = shift;
4391 # wrapper that returns $default_value if the command fails:
4392 # bug #57886: Errors when using different version on remote
4393 # perl -e '$a=`$command`; print $? ? "$default_value" : $a'
4394 my $wcmd = q(perl -e '$a=`).$command.q(`;).
4395 q(print $? ? ").::pQ($default_value).q(" : $a');
4396 my $commandline = CommandLine->new(1,[$wcmd],{},0,0,[],[],{},{},{});
4397 my $job = Job->new($commandline);
4398 $job->set_sshlogin($sshlogin);
4399 $job->wrapped();
4400 return($job->{'wrapped'});
4403 my(@sockets, @cores, @threads, @maxline, @echo);
4404 while (my ($host, $sshlogin) = each %Global::host) {
4405 if($host eq ":") { next }
4406 # The 'true' is used to get the $host out later
4407 push(@sockets, $host."\t"."true $host; ".
4408 sshwrapped($sshlogin,"parallel --number-of-sockets",0)."\n\0");
4409 push(@cores, $host."\t"."true $host; ".
4410 sshwrapped($sshlogin,"parallel --number-of-cores",0)."\n\0");
4411 push(@threads, $host."\t"."true $host; ".
4412 sshwrapped($sshlogin,"parallel --number-of-threads",0)."\n\0");
4413 push(@maxline, $host."\t"."true $host; ".
4414 sshwrapped($sshlogin,"parallel --max-line-length-allowed",0)."\n\0");
4415 # 'echo' is used to get the fastest possible ssh login time
4416 my $sshcmd = "true $host; exec " .$sshlogin->sshcommand()." ".
4417 $sshlogin->serverlogin();
4418 push(@echo, $host."\t".$sshcmd." -- echo\n\0");
4420 # --timeout 10: Setting up an SSH connection and running a simple
4421 # command should never take > 10 sec.
4422 # --delay 0.1: If multiple sshlogins use the same proxy the delay
4423 # will make it less likely to overload the ssh daemon.
4424 # --retries 3: If the ssh daemon is overloaded, try 3 times
4425 my $cmd =
4426 "$0 -j0 --timeout 10 --joblog - --plain --delay 0.1 --retries 3 ".
4427 "--tag --tagstring '{1}' -0 --colsep '\t' -k eval '{2}' && true ";
4428 $cmd = $Global::shell." -c ".Q($cmd);
4429 ::debug("init", $cmd, "\n");
4430 my @out;
4431 my $prepend = "";
4433 my ($host_fh,$in,$err);
4434 open3($in, $host_fh, $err, $cmd) || ::die_bug("parallel host check: $cmd");
4435 ::debug("init", map { $_,"\n" } @sockets, @cores, @threads, @maxline, @echo);
4437 if(not fork()) {
4438 # Give the commands to run to the $cmd
4439 close $host_fh;
4440 print $in @sockets, @cores, @threads, @maxline, @echo;
4441 close $in;
4442 exit();
4444 close $in;
4445 for(<$host_fh>) {
4446 # TODO incompatible with '-quoting. Needs to be fixed differently
4447 #if(/\'$/) {
4448 # # if last char = ' then append next line
4449 # # This may be due to quoting of \n in environment var
4450 # $prepend .= $_;
4451 # next;
4453 $_ = $prepend . $_;
4454 $prepend = "";
4455 push @out, $_;
4457 close $host_fh;
4458 return @out;
4461 sub onall($@) {
4462 # Runs @command on all hosts.
4463 # Uses parallel to run @command on each host.
4464 # --jobs = number of hosts to run on simultaneously.
4465 # For each host a parallel command with the args will be running.
4466 # Uses:
4467 # $Global::quoting
4468 # @opt::basefile
4469 # $opt::jobs
4470 # $opt::linebuffer
4471 # $opt::ungroup
4472 # $opt::group
4473 # $opt::keeporder
4474 # $opt::D
4475 # $opt::plain
4476 # $opt::max_chars
4477 # $opt::linebuffer
4478 # $opt::files
4479 # $opt::colsep
4480 # $opt::timeout
4481 # $opt::plain
4482 # $opt::retries
4483 # $opt::max_chars
4484 # $opt::arg_sep
4485 # $opt::arg_file_sep
4486 # @opt::v
4487 # @opt::env
4488 # %Global::host
4489 # $Global::exitstatus
4490 # $Global::debug
4491 # $Global::joblog
4492 # $opt::joblog
4493 # $opt::tag
4494 # $opt::tee
4495 # Input:
4496 # @command = command to run on all hosts
4497 # Returns: N/A
4498 sub tmp_joblog {
4499 # Input:
4500 # $joblog = filename of joblog - undef if none
4501 # Returns:
4502 # $tmpfile = temp file for joblog - undef if none
4503 my $joblog = shift;
4504 if(not defined $joblog) {
4505 return undef;
4507 my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".log");
4508 close $fh;
4509 return $tmpfile;
4511 my ($input_source_fh_ref,@command) = @_;
4512 if($Global::quoting) {
4513 @command = shell_quote(@command);
4516 # Copy all @input_source_fh (-a and :::) into tempfiles
4517 my @argfiles = ();
4518 for my $fh (@$input_source_fh_ref) {
4519 my ($outfh, $name) = ::tmpfile(SUFFIX => ".all", UNLINK => not $opt::D);
4520 print $outfh (<$fh>);
4521 close $outfh;
4522 push @argfiles, $name;
4524 if(@opt::basefile) { setup_basefile(); }
4525 # for each sshlogin do:
4526 # parallel -S $sshlogin $command :::: @argfiles
4528 # Pass some of the options to the sub-parallels, not all of them as
4529 # -P should only go to the first, and -S should not be copied at all.
4530 my $options =
4531 join(" ",
4532 ((defined $opt::memfree) ? "--memfree ".$opt::memfree : ""),
4533 ((defined $opt::memsuspend) ? "--memfree ".$opt::memsuspend : ""),
4534 ((defined $opt::D) ? "-D $opt::D" : ""),
4535 ((defined $opt::group) ? "-g" : ""),
4536 ((defined $opt::jobs) ? "-P $opt::jobs" : ""),
4537 ((defined $opt::keeporder) ? "--keeporder" : ""),
4538 ((defined $opt::linebuffer) ? "--linebuffer" : ""),
4539 ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""),
4540 ((defined $opt::plain) ? "--plain" : ""),
4541 ((defined $opt::ungroup) ? "-u" : ""),
4542 ((defined $opt::tee) ? "--tee" : ""),
4544 my $suboptions =
4545 join(" ",
4546 ((defined $opt::D) ? "-D $opt::D" : ""),
4547 ((defined $opt::arg_file_sep) ? "--arg-file-sep ".$opt::arg_file_sep : ""),
4548 ((defined $opt::arg_sep) ? "--arg-sep ".$opt::arg_sep : ""),
4549 ((defined $opt::colsep) ? "--colsep ".shell_quote($opt::colsep) : ""),
4550 ((defined $opt::files) ? "--files" : ""),
4551 ((defined $opt::group) ? "-g" : ""),
4552 ((defined $opt::cleanup) ? "--cleanup" : ""),
4553 ((defined $opt::keeporder) ? "--keeporder" : ""),
4554 ((defined $opt::linebuffer) ? "--linebuffer" : ""),
4555 ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""),
4556 ((defined $opt::plain) ? "--plain" : ""),
4557 ((defined $opt::plus) ? "--plus" : ""),
4558 ((defined $opt::retries) ? "--retries ".$opt::retries : ""),
4559 ((defined $opt::timeout) ? "--timeout ".$opt::timeout : ""),
4560 ((defined $opt::ungroup) ? "-u" : ""),
4561 ((defined $opt::ssh) ? "--ssh '".$opt::ssh."'" : ""),
4562 ((defined $opt::tee) ? "--tee" : ""),
4563 ((defined $opt::workdir) ? "--wd ".Q($opt::workdir) : ""),
4564 (@Global::transfer_files ? map { "--tf ".Q($_) }
4565 @Global::transfer_files : ""),
4566 (@Global::ret_files ? map { "--return ".Q($_) }
4567 @Global::ret_files : ""),
4568 (@opt::env ? map { "--env ".Q($_) } @opt::env : ""),
4569 (map { "-v" } @opt::v),
4571 ::debug("init", "| $0 $options\n");
4572 open(my $parallel_fh, "|-", "$0 -0 --will-cite -j0 $options") ||
4573 ::die_bug("This does not run GNU Parallel: $0 $options");
4574 my @joblogs;
4575 for my $host (sort keys %Global::host) {
4576 my $sshlogin = $Global::host{$host};
4577 my $joblog = tmp_joblog($opt::joblog);
4578 if($joblog) {
4579 push @joblogs, $joblog;
4580 $joblog = "--joblog $joblog";
4582 my $quad = $opt::arg_file_sep || "::::";
4583 # If PARALLEL_ENV is set: Pass it on
4584 my $penv=$Global::parallel_env ?
4585 "PARALLEL_ENV=".Q($Global::parallel_env) :
4587 ::debug("init", "$penv $0 $suboptions -j1 $joblog ",
4588 ((defined $opt::tag) ?
4589 "--tagstring ".Q($sshlogin->string()) : ""),
4590 " -S ", Q($sshlogin->string())," ",
4591 join(" ",shell_quote(@command))," $quad @argfiles\n");
4592 print $parallel_fh "$penv $0 $suboptions -j1 $joblog ",
4593 ((defined $opt::tag) ?
4594 "--tagstring ".Q($sshlogin->string()) : ""),
4595 " -S ", Q($sshlogin->string())," ",
4596 join(" ",shell_quote(@command))," $quad @argfiles\0";
4598 close $parallel_fh;
4599 $Global::exitstatus = $? >> 8;
4600 debug("init", "--onall exitvalue ", $?);
4601 if(@opt::basefile and $opt::cleanup) { cleanup_basefile(); }
4602 $Global::debug or unlink(@argfiles);
4603 my %seen;
4604 for my $joblog (@joblogs) {
4605 # Append to $joblog
4606 open(my $fh, "<", $joblog) || ::die_bug("Cannot open tmp joblog $joblog");
4607 # Skip first line (header);
4608 <$fh>;
4609 print $Global::joblog (<$fh>);
4610 close $fh;
4611 unlink($joblog);
4616 sub __SIGNAL_HANDLING__() {}
4619 sub sigtstp() {
4620 # Send TSTP signal (Ctrl-Z) to all children process groups
4621 # Uses:
4622 # %SIG
4623 # Returns: N/A
4624 signal_children("TSTP");
4627 sub sigpipe() {
4628 # Send SIGPIPE signal to all children process groups
4629 # Uses:
4630 # %SIG
4631 # Returns: N/A
4632 signal_children("PIPE");
4635 sub signal_children() {
4636 # Send signal to all children process groups
4637 # and GNU Parallel itself
4638 # Uses:
4639 # %SIG
4640 # Returns: N/A
4641 my $signal = shift;
4642 debug("run", "Sending $signal ");
4643 kill $signal, map { -$_ } keys %Global::running;
4644 # Use default signal handler for GNU Parallel itself
4645 $SIG{$signal} = undef;
4646 kill $signal, $$;
4649 sub save_original_signal_handler() {
4650 # Remember the original signal handler
4651 # Uses:
4652 # %Global::original_sig
4653 # Returns: N/A
4654 $SIG{INT} = sub {
4655 if($opt::tmux) { ::qqx("tmux kill-session -t p$$"); }
4656 wait_and_exit(255);
4658 $SIG{TERM} = sub {
4659 if($opt::tmux) { ::qqx("tmux kill-session -t p$$"); }
4660 wait_and_exit(255);
4662 %Global::original_sig = %SIG;
4663 $SIG{TERM} = sub {}; # Dummy until jobs really start
4664 $SIG{ALRM} = 'IGNORE';
4665 # Allow Ctrl-Z to suspend and `fg` to continue
4666 $SIG{TSTP} = \&sigtstp;
4667 $SIG{PIPE} = \&sigpipe;
4668 $SIG{CONT} = sub {
4669 # Set $SIG{TSTP} again (it is undef'ed in sigtstp() )
4670 $SIG{TSTP} = \&sigtstp;
4671 # Send continue signal to all children process groups
4672 kill "CONT", map { -$_ } keys %Global::running;
4676 sub list_running_jobs() {
4677 # Print running jobs on tty
4678 # Uses:
4679 # %Global::running
4680 # Returns: N/A
4681 for my $job (values %Global::running) {
4682 ::status("$Global::progname: ".$job->replaced());
4686 sub start_no_new_jobs() {
4687 # Start no more jobs
4688 # Uses:
4689 # %Global::original_sig
4690 # %Global::unlink
4691 # $Global::start_no_new_jobs
4692 # Returns: N/A
4693 # $SIG{TERM} = $Global::original_sig{TERM};
4694 unlink keys %Global::unlink;
4695 ::status
4696 ("$Global::progname: SIGHUP received. No new jobs will be started.",
4697 "$Global::progname: Waiting for these ".(keys %Global::running).
4698 " jobs to finish. Send SIGTERM to stop now.");
4699 list_running_jobs();
4700 $Global::start_no_new_jobs ||= 1;
4703 sub reapers() {
4704 # Run reaper until there are no more left
4705 # Returns:
4706 # @pids_reaped = pids of reaped processes
4707 my @pids_reaped;
4708 my $pid;
4709 while($pid = reaper()) {
4710 push @pids_reaped, $pid;
4712 return @pids_reaped;
4715 sub reaper() {
4716 # A job finished:
4717 # * Set exitstatus, exitsignal, endtime.
4718 # * Free ressources for new job
4719 # * Update median runtime
4720 # * Print output
4721 # * If --halt = now: Kill children
4722 # * Print progress
4723 # Uses:
4724 # %Global::running
4725 # $opt::timeout
4726 # $Global::timeoutq
4727 # $opt::keeporder
4728 # $Global::total_running
4729 # Returns:
4730 # $stiff = PID of child finished
4731 my $stiff;
4732 debug("run", "Reaper ");
4733 if(($stiff = waitpid(-1, &WNOHANG)) <= 0) {
4734 # No jobs waiting to be reaped
4735 return 0;
4738 # $stiff = pid of dead process
4739 my $job = $Global::running{$stiff};
4741 # '-a <(seq 10)' will give us a pid not in %Global::running
4742 # The same will one of the ssh -M: ignore
4743 $job or return 0;
4744 delete $Global::running{$stiff};
4745 $Global::total_running--;
4746 if($job->{'commandline'}{'skip'}) {
4747 # $job->skip() was called
4748 $job->set_exitstatus(-2);
4749 $job->set_exitsignal(0);
4750 } else {
4751 $job->set_exitstatus($? >> 8);
4752 $job->set_exitsignal($? & 127);
4755 debug("run", "seq ",$job->seq()," died (", $job->exitstatus(), ")");
4756 $job->set_endtime(::now());
4757 my $sshlogin = $job->sshlogin();
4758 $sshlogin->dec_jobs_running();
4759 if($job->should_be_retried()) {
4760 # Free up file handles
4761 $job->free_ressources();
4762 } else {
4763 # The job is done
4764 $sshlogin->inc_jobs_completed();
4765 # Free the jobslot
4766 $job->free_slot();
4767 if($opt::timeout and not $job->exitstatus()) {
4768 # Update average runtime for timeout only for successful jobs
4769 $Global::timeoutq->update_median_runtime($job->runtime());
4771 if($opt::keeporder) {
4772 $job->print_earlier_jobs();
4773 } else {
4774 $job->print();
4776 if($job->should_we_halt() eq "now") {
4777 # Kill children
4778 ::kill_sleep_seq($job->pid());
4779 ::killall();
4780 ::wait_and_exit($Global::halt_exitstatus);
4783 $job->cleanup();
4785 if($opt::progress) {
4786 my %progress = progress();
4787 ::status_no_nl("\r",$progress{'status'});
4790 debug("run", "done ");
4791 return $stiff;
4795 sub __USAGE__() {}
4798 sub killall() {
4799 # Kill all jobs by killing their process groups
4800 # Uses:
4801 # $Global::start_no_new_jobs = we are stopping
4802 # $Global::killall = Flag to not run reaper
4803 $Global::start_no_new_jobs ||= 1;
4804 # Do not reap killed children: Ignore them instead
4805 $Global::killall ||= 1;
4806 kill_sleep_seq(keys %Global::running);
4809 sub kill_sleep_seq(@) {
4810 # Send jobs TERM,TERM,KILL to processgroups
4811 # Input:
4812 # @pids = list of pids that are also processgroups
4813 # Convert pids to process groups ($processgroup = -$pid)
4814 my @pgrps = map { -$_ } @_;
4815 my @term_seq = split/,/,$opt::termseq;
4816 if($opt::memsuspend) {
4817 @term_seq = ("STOP",1);
4818 } else {
4819 if(not @term_seq) {
4820 @term_seq = ("TERM",200,"TERM",100,"TERM",50,"KILL",25);
4823 while(@term_seq) {
4824 @pgrps = kill_sleep(shift @term_seq, shift @term_seq, @pgrps);
4828 sub kill_sleep() {
4829 # Kill pids with a signal and wait a while for them to die
4830 # Input:
4831 # $signal = signal to send to @pids
4832 # $sleep_max = number of ms to sleep at most before returning
4833 # @pids = pids to kill (actually process groups)
4834 # Uses:
4835 # $Global::killall = set by killall() to avoid calling reaper
4836 # Returns:
4837 # @pids = pids still alive
4838 my ($signal, $sleep_max, @pids) = @_;
4839 ::debug("kill","kill_sleep $signal ",(join " ",sort @pids),"\n");
4840 kill $signal, @pids;
4841 my $sleepsum = 0;
4842 my $sleep = 0.001;
4844 while(@pids and $sleepsum < $sleep_max) {
4845 if($Global::killall) {
4846 # Killall => don't run reaper
4847 while(waitpid(-1, &WNOHANG) > 0) {
4848 $sleep = $sleep/2+0.001;
4850 } elsif(reapers()) {
4851 $sleep = $sleep/2+0.001;
4853 $sleep *= 1.1;
4854 ::usleep($sleep);
4855 $sleepsum += $sleep;
4856 # Keep only living children
4857 @pids = grep { kill(0, $_) } @pids;
4859 return @pids;
4862 sub wait_and_exit($) {
4863 # If we do not wait, we sometimes get segfault
4864 # Returns: N/A
4865 my $error = shift;
4866 unlink keys %Global::unlink;
4867 if($error) {
4868 # Kill all jobs without printing
4869 killall();
4871 for (keys %Global::unkilled_children) {
4872 # Kill any (non-jobs) children (e.g. reserved processes)
4873 kill 9, $_;
4874 waitpid($_,0);
4875 delete $Global::unkilled_children{$_};
4877 if($Global::unkilled_sqlworker) {
4878 waitpid($Global::unkilled_sqlworker,0);
4880 # Avoid: Warning: unable to close filehandle properly: No space
4881 # left on device during global destruction.
4882 $SIG{__WARN__} = sub {};
4883 exit($error);
4886 sub die_usage() {
4887 # Returns: N/A
4888 usage();
4889 wait_and_exit(255);
4892 sub usage() {
4893 # Returns: N/A
4894 print join
4895 ("\n",
4896 "Usage:",
4898 "$Global::progname [options] [command [arguments]] < list_of_arguments",
4899 "$Global::progname [options] [command [arguments]] (::: arguments|:::: argfile(s))...",
4900 "cat ... | $Global::progname --pipe [options] [command [arguments]]",
4902 "-j n Run n jobs in parallel",
4903 "-k Keep same order",
4904 "-X Multiple arguments with context replace",
4905 "--colsep regexp Split input on regexp for positional replacements",
4906 "{} {.} {/} {/.} {#} {%} {= perl code =} Replacement strings",
4907 "{3} {3.} {3/} {3/.} {=3 perl code =} Positional replacement strings",
4908 "With --plus: {} = {+/}/{/} = {.}.{+.} = {+/}/{/.}.{+.} = {..}.{+..} =",
4909 " {+/}/{/..}.{+..} = {...}.{+...} = {+/}/{/...}.{+...}",
4911 "-S sshlogin Example: foo\@server.example.com",
4912 "--slf .. Use ~/.parallel/sshloginfile as the list of sshlogins",
4913 "--trc {}.bar Shorthand for --transfer --return {}.bar --cleanup",
4914 "--onall Run the given command with argument on all sshlogins",
4915 "--nonall Run the given command with no arguments on all sshlogins",
4917 "--pipe Split stdin (standard input) to multiple jobs.",
4918 "--recend str Record end separator for --pipe.",
4919 "--recstart str Record start separator for --pipe.",
4921 "GNU Parallel can do much more. See 'man $Global::progname' for details",
4923 "Academic tradition requires you to cite works you base your article on.",
4924 "If you use programs that use GNU Parallel to process data for an article in a",
4925 "scientific publication, please cite:",
4927 " Tange, O. (2020, September 22). GNU Parallel 20200922 ('Ginsburg').",
4928 " Zenodo. https://doi.org/10.5281/zenodo.4045386",
4930 # Before changing this line, please read
4931 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
4932 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
4933 "This helps funding further development; AND IT WON'T COST YOU A CENT.",
4934 "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
4936 "",);
4939 sub citation_notice() {
4940 # if --will-cite or --plain: do nothing
4941 # if stderr redirected: do nothing
4942 # if $PARALLEL_HOME/will-cite: do nothing
4943 # else: print citation notice to stderr
4944 if($opt::willcite
4946 $opt::plain
4948 not -t $Global::original_stderr
4950 grep { -e "$_/will-cite" } @Global::config_dirs) {
4951 # skip
4952 } else {
4953 ::status
4954 ("Academic tradition requires you to cite works you base your article on.",
4955 "If you use programs that use GNU Parallel to process data for an article in a",
4956 "scientific publication, please cite:",
4958 " Tange, O. (2020, September 22). GNU Parallel 20200922 ('Ginsburg').",
4959 " Zenodo. https://doi.org/10.5281/zenodo.4045386",
4961 # Before changing this line, please read
4962 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice and
4963 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
4964 "This helps funding further development; AND IT WON'T COST YOU A CENT.",
4965 "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
4967 "More about funding GNU Parallel and the citation notice:",
4968 "https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice",
4970 "To silence this citation notice: run 'parallel --citation' once.",
4973 mkdir $Global::config_dir;
4974 # Number of times the user has run GNU Parallel without showing
4975 # willingness to cite
4976 my $runs = 0;
4977 if(open (my $fh, "<", $Global::config_dir.
4978 "/runs-without-willing-to-cite")) {
4979 $runs = <$fh>;
4980 close $fh;
4982 $runs++;
4983 if(open (my $fh, ">", $Global::config_dir.
4984 "/runs-without-willing-to-cite")) {
4985 print $fh $runs;
4986 close $fh;
4987 if($runs >= 10) {
4988 ::status("Come on: You have run parallel $runs times. Isn't it about time ",
4989 "you run 'parallel --citation' once to silence the citation notice?",
4990 "");
4996 sub status(@) {
4997 my @w = @_;
4998 my $fh = $Global::status_fd || *STDERR;
4999 print $fh map { ($_, "\n") } @w;
5000 flush $fh;
5003 sub status_no_nl(@) {
5004 my @w = @_;
5005 my $fh = $Global::status_fd || *STDERR;
5006 print $fh @w;
5007 flush $fh;
5010 sub warning(@) {
5011 my @w = @_;
5012 my $prog = $Global::progname || "parallel";
5013 status_no_nl(map { ($prog, ": Warning: ", $_, "\n"); } @w);
5016 sub error(@) {
5017 my @w = @_;
5018 my $prog = $Global::progname || "parallel";
5019 status(map { ($prog.": Error: ". $_); } @w);
5022 sub die_bug($) {
5023 my $bugid = shift;
5024 print STDERR
5025 ("$Global::progname: This should not happen. You have found a bug.\n",
5026 "Please contact <parallel\@gnu.org> and follow\n",
5027 "https://www.gnu.org/software/parallel/man.html#REPORTING-BUGS\n",
5028 "\n",
5029 "Include this in the report:\n",
5030 "* The version number: $Global::version\n",
5031 "* The bugid: $bugid\n",
5032 "* The command line being run\n",
5033 "* The files being read (put the files on a webserver if they are big)\n",
5034 "\n",
5035 "If you get the error on smaller/fewer files, please include those instead.\n");
5036 ::wait_and_exit(255);
5039 sub version() {
5040 # Returns: N/A
5041 print join
5042 ("\n",
5043 "GNU $Global::progname $Global::version",
5044 "Copyright (C) 2007-2020 Ole Tange, http://ole.tange.dk and Free Software",
5045 "Foundation, Inc.",
5046 "License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>",
5047 "This is free software: you are free to change and redistribute it.",
5048 "GNU $Global::progname comes with no warranty.",
5050 "Web site: https://www.gnu.org/software/${Global::progname}\n",
5051 "When using programs that use GNU Parallel to process data for publication",
5052 "please cite as described in 'parallel --citation'.\n",
5056 sub citation() {
5057 # Returns: N/A
5058 my ($all_argv_ref,$argv_options_removed_ref) = @_;
5059 my $all_argv = "@$all_argv_ref";
5060 my $no_opts = "@$argv_options_removed_ref";
5061 $all_argv=~s/--citation//;
5062 if($all_argv ne $no_opts) {
5063 ::warning("--citation ignores all other options and arguments.");
5064 ::status("");
5067 ::status(
5068 "Academic tradition requires you to cite works you base your article on.",
5069 "If you use programs that use GNU Parallel to process data for an article in a",
5070 "scientific publication, please cite:",
5072 "\@software{tange_2020_4045386,",
5073 " author = {Tange, Ole},",
5074 " title = {GNU Parallel 20200922 ('Ginsburg')},",
5075 " month = Sep,",
5076 " year = 2020,",
5077 " note = {{GNU Parallel is a general parallelizer to run",
5078 " multiple serial command line programs in parallel",
5079 " without changing them.}},",
5080 " publisher = {Zenodo},",
5081 " doi = {10.5281/zenodo.4045386},",
5082 " url = {https://doi.org/10.5281/zenodo.4045386}",
5083 "}",
5085 "(Feel free to use \\nocite{tange_2020_4045386})",
5087 # Before changing this line, please read
5088 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice and
5089 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
5090 "This helps funding further development; AND IT WON'T COST YOU A CENT.",
5091 "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
5093 "More about funding GNU Parallel and the citation notice:",
5094 "https://lists.gnu.org/archive/html/parallel/2013-11/msg00006.html",
5095 "https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice",
5096 "https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt",
5098 "If you send a copy of your published article to tange\@gnu.org, it will be",
5099 "mentioned in the release notes of next version of GNU Parallel.",
5102 while(not grep { -e "$_/will-cite" } @Global::config_dirs) {
5103 print "\nType: 'will cite' and press enter.\n> ";
5104 my $input = <STDIN>;
5105 if(not defined $input) {
5106 exit(255);
5108 if($input =~ /will cite/i) {
5109 mkdir $Global::config_dir;
5110 if(open (my $fh, ">", $Global::config_dir."/will-cite")) {
5111 close $fh;
5112 ::status(
5114 "Thank you for your support: You are the reason why there is funding to",
5115 "continue maintaining GNU Parallel. On behalf of future versions of",
5116 "GNU Parallel, which would not exist without your support:",
5118 " THANK YOU SO MUCH",
5120 "It is really appreciated. The citation notice is now silenced.",
5121 "");
5122 } else {
5123 ::status(
5125 "Thank you for your support. It is much appreciated. The citation",
5126 "cannot permanently be silenced. Use '--will-cite' instead.",
5128 "If you use '--will-cite' in scripts to be run by others you are making",
5129 "it harder for others to see the citation notice. The development of",
5130 "GNU Parallel is indirectly financed through citations, so if users",
5131 "do not know they should cite then you are making it harder to finance",
5132 "development. However, if you pay 10000 EUR, you should feel free to",
5133 "use '--will-cite' in scripts.",
5134 "");
5135 last;
5141 sub show_limits() {
5142 # Returns: N/A
5143 print("Maximal size of command: ",Limits::Command::real_max_length(),"\n",
5144 "Maximal used size of command: ",Limits::Command::max_length(),"\n",
5145 "\n",
5146 "Execution of will continue now, and it will try to read its input\n",
5147 "and run commands; if this is not what you wanted to happen, please\n",
5148 "press CTRL-D or CTRL-C\n");
5151 sub embed() {
5152 # Give an embeddable version of GNU Parallel
5153 # Tested with: bash, zsh, ksh, ash, dash, sh
5154 my $randomstring = "cut-here-".join"",
5155 map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..20);
5156 if(not -f $0 or not -r $0) {
5157 ::error("--embed only works if parallel is a readable file");
5158 exit(255);
5160 if(open(my $fh, "<", $0)) {
5161 # Read the source from $0
5162 my @source = <$fh>;
5163 my $user = $ENV{LOGNAME} || $ENV{USERNAME} || $ENV{USER};
5164 my @env_parallel_source = ();
5165 my $shell = $Global::shell;
5166 $shell =~ s:.*/::;
5167 for(which("env_parallel.$shell")) {
5168 -r $_ or next;
5169 # Read the source of env_parallel.shellname
5170 open(my $env_parallel_source_fh, $_) || die;
5171 @env_parallel_source = <$env_parallel_source_fh>;
5172 close $env_parallel_source_fh;
5173 last;
5175 print "#!$Global::shell
5177 # Copyright (C) 2007-2020 $user, Ole Tange, http://ole.tange.dk
5178 # and Free Software Foundation, Inc.
5180 # This program is free software; you can redistribute it and/or modify
5181 # it under the terms of the GNU General Public License as published by
5182 # the Free Software Foundation; either version 3 of the License, or
5183 # (at your option) any later version.
5185 # This program is distributed in the hope that it will be useful, but
5186 # WITHOUT ANY WARRANTY; without even the implied warranty of
5187 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5188 # General Public License for more details.
5190 # You should have received a copy of the GNU General Public License
5191 # along with this program; if not, see <https://www.gnu.org/licenses/>
5192 # or write to the Free Software Foundation, Inc., 51 Franklin St,
5193 # Fifth Floor, Boston, MA 02110-1301 USA
5196 print q!
5197 # Embedded GNU Parallel created with --embed
5198 parallel() {
5199 # Start GNU Parallel without leaving temporary files
5201 # Not all shells support 'perl <(cat ...)'
5202 # This is a complex way of doing:
5203 # perl <(cat <<'cut-here'
5204 # [...]
5205 # ) "$@"
5206 # and also avoiding:
5207 # [1]+ Done cat
5209 # Make a temporary fifo that perl can read from
5210 _fifo_with_GNU_Parallel_source=`perl -e 'use POSIX qw(mkfifo);
5211 do {
5212 $f = "/tmp/parallel-".join"",
5213 map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
5214 } while(-e $f);
5215 mkfifo($f,0600);
5216 print $f;'`
5217 # Put source code into temporary file
5218 # so it is easy to copy to the fifo
5219 _file_with_GNU_Parallel_source=`mktemp`;
5221 "cat <<'$randomstring' > \$_file_with_GNU_Parallel_source\n",
5222 @source,
5223 $randomstring,"\n",
5225 # Copy the source code from the file to the fifo
5226 # and remove the file and fifo ASAP
5227 # 'sh -c' is needed to avoid
5228 # [1]+ Done cat
5229 sh -c "(rm $_file_with_GNU_Parallel_source; cat >$_fifo_with_GNU_Parallel_source; rm $_fifo_with_GNU_Parallel_source) < $_file_with_GNU_Parallel_source &"
5231 # Read the source from the fifo
5232 perl $_fifo_with_GNU_Parallel_source "$@"
5235 @env_parallel_source,
5238 # This will call the functions above
5239 parallel -k echo ::: Put your code here
5240 env_parallel --session
5241 env_parallel -k echo ::: Put your code here
5242 parset p,y,c,h -k echo ::: Put your code here
5243 echo $p $y $c $h
5245 } else {
5246 ::error("Cannot open $0");
5247 exit(255);
5249 ::status("Redirect the output to a file and add your changes at the end:",
5250 " $0 --embed > new_script");
5254 sub __GENERIC_COMMON_FUNCTION__() {}
5257 sub mkdir_or_die($) {
5258 # If dir is not executable: die
5259 my $dir = shift;
5260 # The eval is needed to catch exception from mkdir
5261 eval { File::Path::mkpath($dir); };
5262 if(not -x $dir) {
5263 ::error("Cannot change into non-executable dir $dir: $!");
5264 ::wait_and_exit(255);
5268 sub tmpfile(@) {
5269 # Create tempfile as $TMPDIR/parXXXXX
5270 # Returns:
5271 # $filehandle = opened file handle
5272 # $filename = file name created
5273 my($filehandle,$filename) =
5274 ::tempfile(DIR=>$ENV{'TMPDIR'}, TEMPLATE => 'parXXXXX', @_);
5275 if(wantarray) {
5276 return($filehandle,$filename);
5277 } else {
5278 # Separate unlink due to NFS dealing badly with File::Temp
5279 unlink $filename;
5280 return $filehandle;
5284 sub tmpname($) {
5285 # Select a name that does not exist
5286 # Do not create the file as it may be used for creating a socket (by tmux)
5287 # Remember the name in $Global::unlink to avoid hitting the same name twice
5288 my $name = shift;
5289 my($tmpname);
5290 if(not -w $ENV{'TMPDIR'}) {
5291 if(not -e $ENV{'TMPDIR'}) {
5292 ::error("Tmpdir '$ENV{'TMPDIR'}' does not exist.","Try 'mkdir $ENV{'TMPDIR'}'");
5293 } else {
5294 ::error("Tmpdir '$ENV{'TMPDIR'}' is not writable.","Try 'chmod +w $ENV{'TMPDIR'}'");
5296 ::wait_and_exit(255);
5298 do {
5299 $tmpname = $ENV{'TMPDIR'}."/".$name.
5300 join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
5301 } while(-e $tmpname or $Global::unlink{$tmpname}++);
5302 return $tmpname;
5305 sub tmpfifo() {
5306 # Find an unused name and mkfifo on it
5307 my $tmpfifo = tmpname("fif");
5308 mkfifo($tmpfifo,0600);
5309 return $tmpfifo;
5312 sub rm(@) {
5313 # Remove file and remove it from %Global::unlink
5314 # Uses:
5315 # %Global::unlink
5316 delete @Global::unlink{@_};
5317 unlink @_;
5320 sub size_of_block_dev() {
5321 # Like -s but for block devices
5322 # Input:
5323 # $blockdev = file name of block device
5324 # Returns:
5325 # $size = in bytes, undef if error
5326 my $blockdev = shift;
5327 if(open(my $fh, "<", $blockdev)) {
5328 seek($fh,0,2) || ::die_bug("cannot seek $blockdev");
5329 my $size = tell($fh);
5330 close $fh;
5331 return $size;
5332 } else {
5333 ::error("cannot open $blockdev");
5334 wait_and_exit(255);
5338 sub qqx(@) {
5339 # Like qx but with clean environment (except for @keep)
5340 # and STDERR ignored
5341 # This is needed if the environment contains functions
5342 # that /bin/sh does not understand
5343 my %env;
5344 # ssh with ssh-agent needs PATH SSH_AUTH_SOCK SSH_AGENT_PID
5345 # ssh with Kerberos needs KRB5CCNAME
5346 # tmux needs LC_CTYPE
5347 # lsh needs HOME LOGNAME
5348 my @keep = qw(PATH SSH_AUTH_SOCK SSH_AGENT_PID KRB5CCNAME LC_CTYPE HOME LOGNAME);
5349 @env{@keep} = @ENV{@keep};
5350 local %ENV;
5351 %ENV = %env;
5352 if($Global::debug) {
5353 # && true is to force spawning a shell and not just exec'ing
5354 return qx{ @_ && true };
5355 } else {
5356 # CygWin does not respect 2>/dev/null
5357 # so we do that by hand
5358 # This trick does not work:
5359 # https://stackoverflow.com/q/13833088/363028
5360 # local *STDERR;
5361 # open(STDERR, ">", "/dev/null");
5362 open(local *CHILD_STDIN, '<', '/dev/null') or die $!;
5363 open(local *CHILD_STDERR, '>', '/dev/null') or die $!;
5364 my $out;
5365 # eval is needed if open3 fails (e.g. command line too long)
5366 eval {
5367 my $pid = open3(
5368 '<&CHILD_STDIN',
5369 $out,
5370 '>&CHILD_STDERR',
5371 # && true is to force spawning a shell and not just exec'ing
5372 "@_ && true");
5373 my @arr = <$out>;
5374 close $out;
5375 # Make sure $? is set
5376 waitpid($pid, 0);
5377 return wantarray ? @arr : join "",@arr;
5378 } or do {
5379 # If eval fails, force $?=false
5380 `false`;
5385 sub uniq(@) {
5386 # Remove duplicates and return unique values
5387 return keys %{{ map { $_ => 1 } @_ }};
5390 sub min(@) {
5391 # Returns:
5392 # Minimum value of array
5393 my $min;
5394 for (@_) {
5395 # Skip undefs
5396 defined $_ or next;
5397 defined $min or do { $min = $_; next; }; # Set $_ to the first non-undef
5398 $min = ($min < $_) ? $min : $_;
5400 return $min;
5403 sub max(@) {
5404 # Returns:
5405 # Maximum value of array
5406 my $max;
5407 for (@_) {
5408 # Skip undefs
5409 defined $_ or next;
5410 defined $max or do { $max = $_; next; }; # Set $_ to the first non-undef
5411 $max = ($max > $_) ? $max : $_;
5413 return $max;
5416 sub sum(@) {
5417 # Returns:
5418 # Sum of values of array
5419 my @args = @_;
5420 my $sum = 0;
5421 for (@args) {
5422 # Skip undefs
5423 $_ and do { $sum += $_; }
5425 return $sum;
5428 sub undef_as_zero($) {
5429 my $a = shift;
5430 return $a ? $a : 0;
5433 sub undef_as_empty($) {
5434 my $a = shift;
5435 return $a ? $a : "";
5438 sub undef_if_empty($) {
5439 if(defined($_[0]) and $_[0] eq "") {
5440 return undef;
5442 return $_[0];
5445 sub multiply_binary_prefix(@) {
5446 # Evalualte numbers with binary prefix
5447 # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80
5448 # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80
5449 # K =2^10, M =2^20, G =2^30, T =2^40, P =2^50, E =2^70, Z =2^80, Y =2^80
5450 # k =10^3, m =10^6, g =10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24
5451 # 13G = 13*1024*1024*1024 = 13958643712
5452 # Input:
5453 # $s = string with prefixes
5454 # Returns:
5455 # $value = int with prefixes multiplied
5456 my @v = @_;
5457 for(@v) {
5458 defined $_ or next;
5459 s/ki/*1024/gi;
5460 s/mi/*1024*1024/gi;
5461 s/gi/*1024*1024*1024/gi;
5462 s/ti/*1024*1024*1024*1024/gi;
5463 s/pi/*1024*1024*1024*1024*1024/gi;
5464 s/ei/*1024*1024*1024*1024*1024*1024/gi;
5465 s/zi/*1024*1024*1024*1024*1024*1024*1024/gi;
5466 s/yi/*1024*1024*1024*1024*1024*1024*1024*1024/gi;
5467 s/xi/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi;
5469 s/K/*1024/g;
5470 s/M/*1024*1024/g;
5471 s/G/*1024*1024*1024/g;
5472 s/T/*1024*1024*1024*1024/g;
5473 s/P/*1024*1024*1024*1024*1024/g;
5474 s/E/*1024*1024*1024*1024*1024*1024/g;
5475 s/Z/*1024*1024*1024*1024*1024*1024*1024/g;
5476 s/Y/*1024*1024*1024*1024*1024*1024*1024*1024/g;
5477 s/X/*1024*1024*1024*1024*1024*1024*1024*1024*1024/g;
5479 s/k/*1000/g;
5480 s/m/*1000*1000/g;
5481 s/g/*1000*1000*1000/g;
5482 s/t/*1000*1000*1000*1000/g;
5483 s/p/*1000*1000*1000*1000*1000/g;
5484 s/e/*1000*1000*1000*1000*1000*1000/g;
5485 s/z/*1000*1000*1000*1000*1000*1000*1000/g;
5486 s/y/*1000*1000*1000*1000*1000*1000*1000*1000/g;
5487 s/x/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g;
5489 $_ = eval $_;
5491 return wantarray ? @v : $v[0];
5494 sub multiply_time_units($) {
5495 # Evalualte numbers with time units
5496 # s=1, m=60, h=3600, d=86400
5497 # Input:
5498 # $s = string time units
5499 # Returns:
5500 # $value = int in seconds
5501 my @v = @_;
5502 for(@v) {
5503 defined $_ or next;
5504 if(/[dhms]/i) {
5505 s/s/*1+/gi;
5506 s/m/*60+/gi;
5507 s/h/*3600+/gi;
5508 s/d/*86400+/gi;
5509 $_ = eval $_."0";
5512 return wantarray ? @v : $v[0];
5515 sub seconds_to_time_units() {
5516 # Convert seconds into ??d??h??m??s
5517 # s=1, m=60, h=3600, d=86400
5518 # Input:
5519 # $s = int in seconds
5520 # Returns:
5521 # $str = string time units
5522 my $s = shift;
5523 my $str;
5524 my $d = int($s/86400);
5525 $s -= $d * 86400;
5526 my $h = int($s/3600);
5527 $s -= $h * 3600;
5528 my $m = int($s/60);
5529 $s -= $m * 60;
5530 if($d) {
5531 $str = sprintf("%dd%02dh%02dm%02ds",$d,$h,$m,$s);
5532 } elsif($h) {
5533 $str = sprintf("%dh%02dm%02ds",$h,$m,$s);
5534 } elsif($m) {
5535 $str = sprintf("%dm%02ds",$m,$s);
5536 } else {
5537 $str = sprintf("%ds",$s);
5539 return $str;
5543 my ($disk_full_fh, $b8193, $error_printed);
5544 sub exit_if_disk_full() {
5545 # Checks if $TMPDIR is full by writing 8kb to a tmpfile
5546 # If the disk is full: Exit immediately.
5547 # Returns:
5548 # N/A
5549 if(not $disk_full_fh) {
5550 $disk_full_fh = ::tmpfile(SUFFIX => ".df");
5551 $b8193 = "b"x8193;
5553 # Linux does not discover if a disk is full if writing <= 8192
5554 # Tested on:
5555 # bfs btrfs cramfs ext2 ext3 ext4 ext4dev jffs2 jfs minix msdos
5556 # ntfs reiserfs tmpfs ubifs vfat xfs
5557 # TODO this should be tested on different OS similar to this:
5559 # doit() {
5560 # sudo mount /dev/ram0 /mnt/loop; sudo chmod 1777 /mnt/loop
5561 # seq 100000 | parallel --tmpdir /mnt/loop/ true &
5562 # seq 6900000 > /mnt/loop/i && echo seq OK
5563 # seq 6980868 > /mnt/loop/i
5564 # seq 10000 > /mnt/loop/ii
5565 # sleep 3
5566 # sudo umount /mnt/loop/ || sudo umount -l /mnt/loop/
5567 # echo >&2
5569 print $disk_full_fh $b8193;
5570 if(not $disk_full_fh
5572 tell $disk_full_fh != 8193) {
5573 # On raspbian the disk can be full except for 10 chars.
5574 if(not $error_printed) {
5575 ::error("Output is incomplete.",
5576 "Cannot append to buffer file in $ENV{'TMPDIR'}.",
5577 "Is the disk full?",
5578 "Change \$TMPDIR with --tmpdir or use --compress.");
5579 $error_printed = 1;
5581 ::wait_and_exit(255);
5583 truncate $disk_full_fh, 0;
5584 seek($disk_full_fh, 0, 0) || die;
5588 sub spacefree($$) {
5589 # Remove comments and spaces
5590 # Inputs:
5591 # $spaces = keep 1 space?
5592 # $s = string to remove spaces from
5593 # Returns:
5594 # $s = with spaces removed
5595 my $spaces = shift;
5596 my $s = shift;
5597 $s =~ s/#.*//mg;
5598 if(1 == $spaces) {
5599 $s =~ s/\s+/ /mg;
5600 } elsif(2 == $spaces) {
5601 # Keep newlines
5602 $s =~ s/\n\n+/\n/sg;
5603 $s =~ s/[ \t]+/ /mg;
5604 } elsif(3 == $spaces) {
5605 # Keep perl code required space
5606 $s =~ s{([^a-zA-Z0-9/])\s+}{$1}sg;
5607 $s =~ s{([a-zA-Z0-9/])\s+([^:a-zA-Z0-9/])}{$1$2}sg;
5608 } else {
5609 $s =~ s/\s//mg;
5611 return $s;
5615 my $hostname;
5616 sub hostname() {
5617 local $/ = "\n";
5618 if(not $hostname) {
5619 $hostname = `hostname`;
5620 chomp($hostname);
5621 $hostname ||= "nohostname";
5623 return $hostname;
5627 sub which(@) {
5628 # Input:
5629 # @programs = programs to find the path to
5630 # Returns:
5631 # @full_path = full paths to @programs. Nothing if not found
5632 my @which;
5633 for my $prg (@_) {
5634 push(@which, grep { not -d $_ and -x $_ }
5635 map { $_."/".$prg } split(":",$ENV{'PATH'}));
5636 if($prg =~ m:/:) {
5637 # Including path
5638 push(@which, grep { not -d $_ and -x $_ } $prg);
5641 ::debug("which", "$which[0] in $ENV{'PATH'}\n");
5642 return wantarray ? @which : $which[0];
5646 my ($regexp,$shell,%fakename);
5648 sub parent_shell {
5649 # Input:
5650 # $pid = pid to see if (grand)*parent is a shell
5651 # Returns:
5652 # $shellpath = path to shell - undef if no shell found
5653 my $pid = shift;
5654 ::debug("init","Parent of $pid\n");
5655 if(not $regexp) {
5656 # All shells known to mankind
5658 # ash bash csh dash fdsh fish fizsh ksh ksh93 mksh pdksh
5659 # posh rbash rc rush rzsh sash sh static-sh tcsh yash zsh
5661 my @shells = (qw(ash bash bsd-csh csh dash fdsh fish fizsh
5662 ksh ksh93 lksh mksh pdksh posh rbash rc rush rzsh sash sh
5663 static-sh tcsh yash zsh -sh -csh -bash),
5664 '-sh (sh)' # sh on FreeBSD
5666 # Can be formatted as:
5667 # [sh] -sh sh busybox sh -sh (sh)
5668 # /bin/sh /sbin/sh /opt/csw/sh
5669 # But not: foo.sh sshd crash flush pdflush scosh fsflush ssh
5670 $shell = "(?:".join("|",map { "\Q$_\E" } @shells).")";
5671 $regexp = '^((\[)(-?)('. $shell. ')(\])|(|\S+/|busybox )'.
5672 '(-?)('. $shell. '))( *$| [^(])';
5673 %fakename = (
5674 # sh disguises itself as -sh (sh) on FreeBSD
5675 "-sh (sh)" => ["sh"],
5676 # csh and tcsh disguise themselves as -sh/-csh
5677 # E.g.: ssh -tt csh@lo 'ps aux;true' |egrep ^csh
5678 # but sh also disguises itself as -sh
5679 # (TODO When does that happen?)
5680 "-sh" => ["sh"],
5681 "-csh" => ["tcsh", "csh"],
5682 # ash disguises itself as -ash
5683 "-ash" => ["ash", "dash", "sh"],
5684 # dash disguises itself as -dash
5685 "-dash" => ["dash", "ash", "sh"],
5686 # bash disguises itself as -bash
5687 "-bash" => ["bash", "sh"],
5688 # ksh disguises itself as -ksh
5689 "-ksh" => ["ksh", "sh"],
5690 # zsh disguises itself as -zsh
5691 "-zsh" => ["zsh", "sh"],
5694 if($^O eq "linux") {
5695 # Optimized for GNU/Linux
5696 my $testpid = $pid;
5697 my $shellpath;
5698 my $shellline;
5699 while($testpid) {
5700 if(open(my $fd, "<", "/proc/$testpid/cmdline")) {
5701 local $/="\0";
5702 chomp($shellline = <$fd>);
5703 if($shellline =~ /$regexp/o) {
5704 my $shellname = $4 || $8;
5705 my $dash = $3 || $7;
5706 if($shellname eq "sh" and $dash) {
5707 # -sh => csh or sh
5708 if($shellpath = readlink "/proc/$testpid/exe") {
5709 ::debug("init","procpath $shellpath\n");
5710 if($shellpath =~ m:/$shell$:o) {
5711 ::debug("init", "proc which ".$shellpath." => ");
5712 return $shellpath;
5716 ::debug("init", "which ".$shellname." => ");
5717 $shellpath = (which($shellname,@{$fakename{$shellname}}))[0];
5718 ::debug("init", "shell path $shellpath\n");
5719 return $shellpath;
5722 # Get parent pid
5723 if(open(my $fd, "<", "/proc/$testpid/stat")) {
5724 my $line = <$fd>;
5725 close $fd;
5726 # Parent pid is field 4
5727 $testpid = (split /\s+/, $line)[3];
5728 } else {
5729 # Something is wrong: fall back to old method
5730 last;
5734 # if -sh or -csh try readlink /proc/$$/exe
5735 my ($children_of_ref, $parent_of_ref, $name_of_ref) = pid_table();
5736 my $shellpath;
5737 my $testpid = $pid;
5738 while($testpid) {
5739 if($name_of_ref->{$testpid} =~ /$regexp/o) {
5740 my $shellname = $4 || $8;
5741 my $dash = $3 || $7;
5742 if($shellname eq "sh" and $dash) {
5743 # -sh => csh or sh
5744 if($shellpath = readlink "/proc/$testpid/exe") {
5745 ::debug("init","procpath $shellpath\n");
5746 if($shellpath =~ m:/$shell$:o) {
5747 ::debug("init", "proc which ".$shellpath." => ");
5748 return $shellpath;
5752 ::debug("init", "which ".$shellname." => ");
5753 $shellpath = (which($shellname,@{$fakename{$shellname}}))[0];
5754 ::debug("init", "shell path $shellpath\n");
5755 $shellpath and last;
5757 if($testpid == $parent_of_ref->{$testpid}) {
5758 # In Solaris zones, the PPID of the zsched process is itself
5759 last;
5761 $testpid = $parent_of_ref->{$testpid};
5763 return $shellpath;
5768 my %pid_parentpid_cmd;
5770 sub pid_table() {
5771 # Returns:
5772 # %children_of = { pid -> children of pid }
5773 # %parent_of = { pid -> pid of parent }
5774 # %name_of = { pid -> commandname }
5776 if(not %pid_parentpid_cmd) {
5777 # Filter for SysV-style `ps`
5778 my $sysv = q( ps -ef | perl -ane '1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;).
5779 q(s/^.{$s}//; print "@F[1,2] $_"' );
5780 # Minix uses cols 2,3 and can have newlines in the command
5781 # so lines not having numbers in cols 2,3 must be ignored
5782 my $minix = q( ps -ef | perl -ane '1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;).
5783 q(s/^.{$s}// and $F[2]>0 and $F[3]>0 and print "@F[2,3] $_"' );
5784 # BSD-style `ps`
5785 my $bsd = q(ps -o pid,ppid,command -ax);
5786 %pid_parentpid_cmd =
5788 'aix' => $sysv,
5789 'android' => $sysv,
5790 'cygwin' => $sysv,
5791 'darwin' => $bsd,
5792 'dec_osf' => $sysv,
5793 'dragonfly' => $bsd,
5794 'freebsd' => $bsd,
5795 'gnu' => $sysv,
5796 'hpux' => $sysv,
5797 'linux' => $sysv,
5798 'mirbsd' => $bsd,
5799 'minix' => $minix,
5800 'msys' => $sysv,
5801 'MSWin32' => $sysv,
5802 'netbsd' => $bsd,
5803 'nto' => $sysv,
5804 'openbsd' => $bsd,
5805 'solaris' => $sysv,
5806 'svr5' => $sysv,
5807 'syllable' => "echo ps not supported",
5810 $pid_parentpid_cmd{$^O} or ::die_bug("pid_parentpid_cmd for $^O missing");
5812 my (@pidtable,%parent_of,%children_of,%name_of);
5813 # Table with pid -> children of pid
5814 @pidtable = `$pid_parentpid_cmd{$^O}`;
5815 my $p=$$;
5816 for (@pidtable) {
5817 # must match: 24436 21224 busybox ash
5818 # must match: 24436 21224 <<empty on MacOSX running cubase>>
5819 # must match: 24436 21224 <<empty on system running Viber>>
5820 # or: perl -e 'while($0=" "){}'
5821 if(/^\s*(\S+)\s+(\S+)\s+(\S+.*)/
5823 /^\s*(\S+)\s+(\S+)\s+()$/) {
5824 $parent_of{$1} = $2;
5825 push @{$children_of{$2}}, $1;
5826 $name_of{$1} = $3;
5827 } else {
5828 ::die_bug("pidtable format: $_");
5831 return(\%children_of, \%parent_of, \%name_of);
5835 sub now() {
5836 # Returns time since epoch as in seconds with 3 decimals
5837 # Uses:
5838 # @Global::use
5839 # Returns:
5840 # $time = time now with millisecond accuracy
5841 if(not $Global::use{"Time::HiRes"}) {
5842 if(eval "use Time::HiRes qw ( time );") {
5843 eval "sub TimeHiRestime { return Time::HiRes::time };";
5844 } else {
5845 eval "sub TimeHiRestime { return time() };";
5847 $Global::use{"Time::HiRes"} = 1;
5850 return (int(TimeHiRestime()*1000))/1000;
5853 sub usleep($) {
5854 # Sleep this many milliseconds.
5855 # Input:
5856 # $ms = milliseconds to sleep
5857 my $ms = shift;
5858 ::debug("timing",int($ms),"ms ");
5859 select(undef, undef, undef, $ms/1000);
5862 sub __KILLER_REAPER__() {}
5864 sub reap_usleep() {
5865 # Reap dead children.
5866 # If no dead children: Sleep specified amount with exponential backoff
5867 # Input:
5868 # $ms = milliseconds to sleep
5869 # Returns:
5870 # $ms/2+0.001 if children reaped
5871 # $ms*1.1 if no children reaped
5872 my $ms = shift;
5873 if(reapers()) {
5874 if(not $Global::total_completed % 100) {
5875 if($opt::timeout) {
5876 # Force cleaning the timeout queue for every 100 jobs
5877 # Fixes potential memleak
5878 $Global::timeoutq->process_timeouts();
5881 # Sleep exponentially shorter (1/2^n) if a job finished
5882 return $ms/2+0.001;
5883 } else {
5884 if($opt::timeout) {
5885 $Global::timeoutq->process_timeouts();
5887 if($opt::memfree) {
5888 kill_youngster_if_not_enough_mem($opt::memfree*0.5);
5890 if($opt::memsuspend) {
5891 kill_youngster_if_not_enough_mem($opt::memsuspend*0.5);
5893 if($opt::limit) {
5894 kill_youngest_if_over_limit();
5896 exit_if_disk_full();
5897 if($opt::linebuffer) {
5898 my $something_printed = 0;
5899 if($opt::keeporder) {
5900 for my $job (values %Global::running) {
5901 $something_printed += $job->print_earlier_jobs();
5903 } else {
5904 for my $job (values %Global::running) {
5905 $something_printed += $job->print();
5908 if($something_printed) {
5909 $ms = $ms/2+0.001;
5912 if($ms > 0.002) {
5913 # When a child dies, wake up from sleep (or select(,,,))
5914 $SIG{CHLD} = sub { kill "ALRM", $$ };
5915 if($opt::delay) {
5916 # The 0.004s is approximately the time it takes for one round
5917 usleep(1000*($Global::newest_starttime +
5918 $opt::delay - 0.004 - ::now()));
5919 } else {
5920 usleep($ms);
5922 # --compress needs $SIG{CHLD} unset
5923 $SIG{CHLD} = 'DEFAULT';
5925 # Sleep exponentially longer (1.1^n) if a job did not finish,
5926 # though at most 1000 ms.
5927 return (($ms < 1000) ? ($ms * 1.1) : ($ms));
5931 sub kill_youngest_if_over_limit() {
5932 # Check each $sshlogin we are over limit
5933 # If over limit: kill off the youngest child
5934 # Put the child back in the queue.
5935 # Uses:
5936 # %Global::running
5937 my %jobs_of;
5938 my @sshlogins;
5940 for my $job (values %Global::running) {
5941 if(not $jobs_of{$job->sshlogin()}) {
5942 push @sshlogins, $job->sshlogin();
5944 push @{$jobs_of{$job->sshlogin()}}, $job;
5946 for my $sshlogin (@sshlogins) {
5947 for my $job (sort { $b->seq() <=> $a->seq() } @{$jobs_of{$sshlogin}}) {
5948 if($sshlogin->limit() == 2) {
5949 $job->kill();
5950 last;
5956 sub kill_youngster_if_not_enough_mem() {
5957 # Check each $sshlogin if there is enough mem.
5958 # If less than 50% enough free mem: kill off the youngest child
5959 # Put the child back in the queue.
5960 # Uses:
5961 # %Global::running
5962 my $limit = shift;
5963 my %jobs_of;
5964 my @sshlogins;
5966 for my $job (values %Global::running) {
5967 if(not $jobs_of{$job->sshlogin()}) {
5968 push @sshlogins, $job->sshlogin();
5970 push @{$jobs_of{$job->sshlogin()}}, $job;
5972 for my $sshlogin (@sshlogins) {
5973 for my $job (sort { $b->seq() <=> $a->seq() } @{$jobs_of{$sshlogin}}) {
5974 if($sshlogin->memfree() < $limit) {
5975 ::debug("mem","\n",map { $_->seq()." " }
5976 (sort { $b->seq() <=> $a->seq() }
5977 @{$jobs_of{$sshlogin}}));
5978 ::debug("mem","\n", $job->seq(), "killed ",
5979 $sshlogin->memfree()," < ",$limit);
5980 $job->kill();
5981 $sshlogin->memfree_recompute();
5982 } else {
5983 last;
5986 ::debug("mem","Free mem OK ",
5987 $sshlogin->memfree()," > ",$limit);
5992 sub __DEBUGGING__() {}
5995 sub debug(@) {
5996 # Uses:
5997 # $Global::debug
5998 # %Global::fd
5999 # Returns: N/A
6000 $Global::debug or return;
6001 @_ = grep { defined $_ ? $_ : "" } @_;
6002 if($Global::debug eq "all" or $Global::debug eq $_[0]) {
6003 if($Global::fd{1}) {
6004 # Original stdout was saved
6005 my $stdout = $Global::fd{1};
6006 print $stdout @_[1..$#_];
6007 } else {
6008 print @_[1..$#_];
6013 sub my_memory_usage() {
6014 # Returns:
6015 # memory usage if found
6016 # 0 otherwise
6017 use strict;
6018 use FileHandle;
6020 local $/ = "\n";
6021 my $pid = $$;
6022 if(-e "/proc/$pid/stat") {
6023 my $fh = FileHandle->new("</proc/$pid/stat");
6025 my $data = <$fh>;
6026 chomp $data;
6027 $fh->close;
6029 my @procinfo = split(/\s+/,$data);
6031 return undef_as_zero($procinfo[22]);
6032 } else {
6033 return 0;
6037 sub my_size() {
6038 # Returns:
6039 # $size = size of object if Devel::Size is installed
6040 # -1 otherwise
6041 my @size_this = (@_);
6042 eval "use Devel::Size qw(size total_size)";
6043 if ($@) {
6044 return -1;
6045 } else {
6046 return total_size(@_);
6050 sub my_dump(@) {
6051 # Returns:
6052 # ascii expression of object if Data::Dump(er) is installed
6053 # error code otherwise
6054 my @dump_this = (@_);
6055 eval "use Data::Dump qw(dump);";
6056 if ($@) {
6057 # Data::Dump not installed
6058 eval "use Data::Dumper;";
6059 if ($@) {
6060 my $err = "Neither Data::Dump nor Data::Dumper is installed\n".
6061 "Not dumping output\n";
6062 ::status($err);
6063 return $err;
6064 } else {
6065 return Dumper(@dump_this);
6067 } else {
6068 # Create a dummy Data::Dump:dump as Hans Schou sometimes has
6069 # it undefined
6070 eval "sub Data::Dump:dump {}";
6071 eval "use Data::Dump qw(dump);";
6072 return (Data::Dump::dump(@dump_this));
6076 sub my_croak(@) {
6077 eval "use Carp; 1";
6078 $Carp::Verbose = 1;
6079 croak(@_);
6082 sub my_carp() {
6083 eval "use Carp; 1";
6084 $Carp::Verbose = 1;
6085 carp(@_);
6089 sub __OBJECT_ORIENTED_PARTS__() {}
6092 package SSHLogin;
6094 sub new($$) {
6095 my $class = shift;
6096 my $sshlogin_string = shift;
6097 my $ncpus;
6098 my %hostgroups;
6099 # SSHLogins can have these formats:
6100 # @grp+grp/ncpu//usr/bin/ssh user@server
6101 # ncpu//usr/bin/ssh user@server
6102 # /usr/bin/ssh user@server
6103 # user@server
6104 # ncpu/user@server
6105 # @grp+grp/user@server
6106 if($sshlogin_string =~ s:^\@([^/]+)/?::) {
6107 # Look for SSHLogin hostgroups
6108 %hostgroups = map { $_ => 1 } split(/\+/, $1);
6110 # An SSHLogin is always in the hostgroup of its "numcpu/host"
6111 $hostgroups{$sshlogin_string} = 1;
6112 if ($sshlogin_string =~ s:^(\d+)/::) {
6113 # Override default autodetected ncpus unless missing
6114 $ncpus = $1;
6116 my $string = $sshlogin_string;
6117 # An SSHLogin is always in the hostgroup of its $string-name
6118 $hostgroups{$string} = 1;
6119 @Global::hostgroups{keys %hostgroups} = values %hostgroups;
6120 my @unget = ();
6121 my $no_slash_string = $string;
6122 $no_slash_string =~ s/[^-a-z0-9:]/_/gi;
6123 return bless {
6124 'string' => $string,
6125 'jobs_running' => 0,
6126 'jobs_completed' => 0,
6127 'maxlength' => undef,
6128 'max_jobs_running' => undef,
6129 'orig_max_jobs_running' => undef,
6130 'ncpus' => $ncpus,
6131 'hostgroups' => \%hostgroups,
6132 'sshcommand' => undef,
6133 'serverlogin' => undef,
6134 'control_path_dir' => undef,
6135 'control_path' => undef,
6136 'time_to_login' => undef,
6137 'last_login_at' => undef,
6138 'loadavg_file' => $Global::cache_dir . "/tmp/sshlogin/" .
6139 $no_slash_string . "/loadavg",
6140 'loadavg' => undef,
6141 'last_loadavg_update' => 0,
6142 'swap_activity_file' => $Global::cache_dir . "/tmp/sshlogin/" .
6143 $no_slash_string . "/swap_activity",
6144 'swap_activity' => undef,
6145 }, ref($class) || $class;
6148 sub DESTROY($) {
6149 my $self = shift;
6150 # Remove temporary files if they are created.
6151 ::rm($self->{'loadavg_file'});
6152 ::rm($self->{'swap_activity_file'});
6155 sub string($) {
6156 my $self = shift;
6157 return $self->{'string'};
6160 sub jobs_running($) {
6161 my $self = shift;
6162 return ($self->{'jobs_running'} || "0");
6165 sub inc_jobs_running($) {
6166 my $self = shift;
6167 $self->{'jobs_running'}++;
6170 sub dec_jobs_running($) {
6171 my $self = shift;
6172 $self->{'jobs_running'}--;
6175 sub set_maxlength($$) {
6176 my $self = shift;
6177 $self->{'maxlength'} = shift;
6180 sub maxlength($) {
6181 my $self = shift;
6182 return $self->{'maxlength'};
6185 sub jobs_completed() {
6186 my $self = shift;
6187 return $self->{'jobs_completed'};
6190 sub in_hostgroups() {
6191 # Input:
6192 # @hostgroups = the hostgroups to look for
6193 # Returns:
6194 # true if intersection of @hostgroups and the hostgroups of this
6195 # SSHLogin is non-empty
6196 my $self = shift;
6197 return grep { defined $self->{'hostgroups'}{$_} } @_;
6200 sub hostgroups() {
6201 my $self = shift;
6202 return keys %{$self->{'hostgroups'}};
6205 sub inc_jobs_completed($) {
6206 my $self = shift;
6207 $self->{'jobs_completed'}++;
6208 $Global::total_completed++;
6211 sub set_max_jobs_running($$) {
6212 my $self = shift;
6213 if(defined $self->{'max_jobs_running'}) {
6214 $Global::max_jobs_running -= $self->{'max_jobs_running'};
6216 $self->{'max_jobs_running'} = shift;
6217 if(defined $self->{'max_jobs_running'}) {
6218 # max_jobs_running could be resat if -j is a changed file
6219 $Global::max_jobs_running += $self->{'max_jobs_running'};
6221 # Initialize orig to the first non-zero value that comes around
6222 $self->{'orig_max_jobs_running'} ||= $self->{'max_jobs_running'};
6225 sub memfree() {
6226 # Returns:
6227 # $memfree in bytes
6228 my $self = shift;
6229 $self->memfree_recompute();
6230 # Return 1 if not defined.
6231 return (not defined $self->{'memfree'} or $self->{'memfree'})
6234 sub memfree_recompute() {
6235 my $self = shift;
6236 my $script = memfreescript();
6238 # TODO add sshlogin and backgrounding
6239 # Run the script twice if it gives 0 (typically intermittent error)
6240 $self->{'memfree'} = ::qqx($script) || ::qqx($script);
6241 if(not $self->{'memfree'}) {
6242 ::die_bug("Less than 1 byte memory free");
6244 #::debug("mem","New free:",$self->{'memfree'}," ");
6248 my $script;
6250 sub memfreescript() {
6251 # Returns:
6252 # shellscript for giving available memory in bytes
6253 if(not $script) {
6254 my %script_of = (
6255 # /proc/meminfo
6256 # MemFree: 7012 kB
6257 # Buffers: 19876 kB
6258 # Cached: 431192 kB
6259 # SwapCached: 0 kB
6260 "linux" => (
6262 print 1024 * qx{
6263 awk '/^((Swap)?Cached|MemFree|Buffers):/
6264 { sum += \$2} END { print sum }'
6265 /proc/meminfo }
6267 # Android uses same code as GNU/Linux
6268 "android" => (
6270 print 1024 * qx{
6271 awk '/^((Swap)?Cached|MemFree|Buffers):/
6272 { sum += \$2} END { print sum }'
6273 /proc/meminfo }
6275 # $ vmstat 1 1
6276 # procs memory page faults cpu
6277 # r b w avm free re at pi po fr de sr in sy cs us sy id
6278 # 1 0 0 242793 389737 5 1 0 0 0 0 0 107 978 60 1 1 99
6279 "hpux" => (
6281 print (((reverse `vmstat 1 1`)[0]
6282 =~ /(?:\d+\D+){4}(\d+)/)[0]*1024)
6284 # $ vmstat 1 2
6285 # kthr memory page disk faults cpu
6286 # r b w swap free re mf pi po fr de sr s3 s4 -- -- in sy cs us sy id
6287 # 0 0 0 6496720 5170320 68 260 8 2 1 0 0 -0 3 0 0 309 1371 255 1 2 97
6288 # 0 0 0 6434088 5072656 7 15 8 0 0 0 0 0 261 0 0 1889 1899 3222 0 8 92
6290 # The second free value is correct
6291 "solaris" => (
6293 print (((reverse `vmstat 1 2`)[0]
6294 =~ /(?:\d+\D+){4}(\d+)/)[0]*1024)
6296 # hw.pagesize: 4096
6297 # vm.stats.vm.v_cache_count: 0
6298 # vm.stats.vm.v_inactive_count: 79574
6299 # vm.stats.vm.v_free_count: 4507
6300 "freebsd" => (
6302 for(qx{/sbin/sysctl -a}) {
6303 if (/^([^:]+):\s+(.+)\s*$/s) {
6304 $sysctl->{$1} = $2;
6307 print $sysctl->{"hw.pagesize"} *
6308 ($sysctl->{"vm.stats.vm.v_cache_count"}
6309 + $sysctl->{"vm.stats.vm.v_inactive_count"}
6310 + $sysctl->{"vm.stats.vm.v_free_count"});
6312 # Mach Virtual Memory Statistics: (page size of 4096 bytes)
6313 # Pages free: 198061.
6314 # Pages active: 159701.
6315 # Pages inactive: 47378.
6316 # Pages speculative: 29707.
6317 # Pages wired down: 89231.
6318 # "Translation faults": 928901425.
6319 # Pages copy-on-write: 156988239.
6320 # Pages zero filled: 271267894.
6321 # Pages reactivated: 48895.
6322 # Pageins: 1798068.
6323 # Pageouts: 257.
6324 # Object cache: 6603 hits of 1713223 lookups (0% hit rate)
6325 'darwin' => (
6327 $vm = `vm_stat`;
6328 print (($vm =~ /page size of (\d+)/)[0] *
6329 (($vm =~ /Pages free:\s+(\d+)/)[0] +
6330 ($vm =~ /Pages inactive:\s+(\d+)/)[0]));
6333 my $perlscript = "";
6334 # Make a perl script that detects the OS ($^O) and runs
6335 # the appropriate command
6336 for my $os (keys %script_of) {
6337 $perlscript .= 'if($^O eq "'.$os.'") { '.$script_of{$os}.'}';
6339 $script = "perl -e " . ::Q(::spacefree(1,$perlscript));
6341 return $script;
6345 sub limit($) {
6346 # Returns:
6347 # 0 = Below limit. Start another job.
6348 # 1 = Over limit. Start no jobs.
6349 # 2 = Kill youngest job
6350 my $self = shift;
6352 if(not defined $self->{'limitscript'}) {
6353 my %limitscripts =
6354 ("io" => q!
6355 io() {
6356 limit=$1;
6357 io_file=$2;
6358 # Do the measurement in the background
6359 (tmp=$(tempfile);
6360 LANG=C iostat -x 1 2 > $tmp;
6361 mv $tmp $io_file) &
6362 perl -e '-e $ARGV[0] or exit(1);
6363 for(reverse <>) {
6364 /Device/ and last;
6365 /(\S+)$/ and $max = $max > $1 ? $max : $1; }
6366 exit ($max < '$limit')' $io_file;
6368 export -f io;
6369 io %s %s
6371 "mem" => q!
6372 mem() {
6373 limit=$1;
6374 awk '/^((Swap)?Cached|MemFree|Buffers):/{ sum += $2}
6375 END {
6376 if (sum*1024 < '$limit'/2) { exit 2; }
6377 else { exit (sum*1024 < '$limit') }
6378 }' /proc/meminfo;
6380 export -f mem;
6381 mem %s;
6383 "load" => q!
6384 load() {
6385 limit=$1;
6386 ps ax -o state,command |
6387 grep -E '^[DOR].[^[]' |
6388 wc -l |
6389 perl -ne 'exit ('$limit' < $_)';
6391 export -f load;
6392 load %s;
6395 my ($cmd,@args) = split /\s+/,$opt::limit;
6396 if($limitscripts{$cmd}) {
6397 my $tmpfile = ::tmpname("parlmt");
6398 ++$Global::unlink{$tmpfile};
6399 $self->{'limitscript'} =
6400 ::spacefree(1, sprintf($limitscripts{$cmd},
6401 ::multiply_binary_prefix(@args),$tmpfile));
6402 } else {
6403 $self->{'limitscript'} = $opt::limit;
6407 my %env = %ENV;
6408 local %ENV = %env;
6409 $ENV{'SSHLOGIN'} = $self->string();
6410 system($Global::shell,"-c",$self->{'limitscript'});
6411 ::debug("limit","limit `".$self->{'limitscript'}."` result ".($?>>8)."\n");
6412 return $?>>8;
6416 sub swapping($) {
6417 my $self = shift;
6418 my $swapping = $self->swap_activity();
6419 return (not defined $swapping or $swapping)
6422 sub swap_activity($) {
6423 # If the currently known swap activity is too old:
6424 # Recompute a new one in the background
6425 # Returns:
6426 # last swap activity computed
6427 my $self = shift;
6428 # Should we update the swap_activity file?
6429 my $update_swap_activity_file = 0;
6430 if(-r $self->{'swap_activity_file'}) {
6431 open(my $swap_fh, "<", $self->{'swap_activity_file'}) ||
6432 ::die_bug("swap_activity_file-r");
6433 my $swap_out = <$swap_fh>;
6434 close $swap_fh;
6435 if($swap_out =~ /^(\d+)$/) {
6436 $self->{'swap_activity'} = $1;
6437 ::debug("swap", "New swap_activity: ", $self->{'swap_activity'});
6439 ::debug("swap", "Last update: ", $self->{'last_swap_activity_update'});
6440 if(time - $self->{'last_swap_activity_update'} > 10) {
6441 # last swap activity update was started 10 seconds ago
6442 ::debug("swap", "Older than 10 sec: ", $self->{'swap_activity_file'});
6443 $update_swap_activity_file = 1;
6445 } else {
6446 ::debug("swap", "No swap_activity file: ", $self->{'swap_activity_file'});
6447 $self->{'swap_activity'} = undef;
6448 $update_swap_activity_file = 1;
6450 if($update_swap_activity_file) {
6451 ::debug("swap", "Updating swap_activity file ", $self->{'swap_activity_file'});
6452 $self->{'last_swap_activity_update'} = time;
6453 my $dir = ::dirname($self->{'swap_activity_file'});
6454 -d $dir or eval { File::Path::mkpath($dir); };
6455 my $swap_activity;
6456 $swap_activity = swapactivityscript();
6457 if($self->{'string'} ne ":") {
6458 $swap_activity = $self->sshcommand() . " " . $self->serverlogin() . " " .
6459 ::Q($swap_activity);
6461 # Run swap_activity measuring.
6462 # As the command can take long to run if run remote
6463 # save it to a tmp file before moving it to the correct file
6464 my $file = $self->{'swap_activity_file'};
6465 my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".swp");
6466 ::debug("swap", "\n", $swap_activity, "\n");
6467 ::qqx("($swap_activity > $tmpfile && mv $tmpfile $file || rm $tmpfile &)");
6469 return $self->{'swap_activity'};
6473 my $script;
6475 sub swapactivityscript() {
6476 # Returns:
6477 # shellscript for detecting swap activity
6479 # arguments for vmstat are OS dependant
6480 # swap_in and swap_out are in different columns depending on OS
6482 if(not $script) {
6483 my %vmstat = (
6484 # linux: $7*$8
6485 # $ vmstat 1 2
6486 # procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
6487 # r b swpd free buff cache si so bi bo in cs us sy id wa
6488 # 5 0 51208 1701096 198012 18857888 0 0 37 153 28 19 56 11 33 1
6489 # 3 0 51208 1701288 198012 18857972 0 0 0 0 3638 10412 15 3 82 0
6490 'linux' => ['vmstat 1 2 | tail -n1', '$7*$8'],
6492 # solaris: $6*$7
6493 # $ vmstat -S 1 2
6494 # kthr memory page disk faults cpu
6495 # r b w swap free si so pi po fr de sr s3 s4 -- -- in sy cs us sy id
6496 # 0 0 0 4628952 3208408 0 0 3 1 1 0 0 -0 2 0 0 263 613 246 1 2 97
6497 # 0 0 0 4552504 3166360 0 0 0 0 0 0 0 0 0 0 0 246 213 240 1 1 98
6498 'solaris' => ['vmstat -S 1 2 | tail -1', '$6*$7'],
6500 # darwin (macosx): $21*$22
6501 # $ vm_stat -c 2 1
6502 # Mach Virtual Memory Statistics: (page size of 4096 bytes)
6503 # free active specul inactive throttle wired prgable faults copy 0fill reactive purged file-backed anonymous cmprssed cmprssor dcomprs comprs pageins pageout swapins swapouts
6504 # 346306 829050 74871 606027 0 240231 90367 544858K 62343596 270837K 14178 415070 570102 939846 356 370 116 922 4019813 4 0 0
6505 # 345740 830383 74875 606031 0 239234 90369 2696 359 553 0 0 570110 941179 356 370 0 0 0 0 0 0
6506 'darwin' => ['vm_stat -c 2 1 | tail -n1', '$21*$22'],
6508 # ultrix: $12*$13
6509 # $ vmstat -S 1 2
6510 # procs faults cpu memory page disk
6511 # r b w in sy cs us sy id avm fre si so pi po fr de sr s0
6512 # 1 0 0 4 23 2 3 0 97 7743 217k 0 0 0 0 0 0 0 0
6513 # 1 0 0 6 40 8 0 1 99 7743 217k 0 0 3 0 0 0 0 0
6514 'ultrix' => ['vmstat -S 1 2 | tail -1', '$12*$13'],
6516 # aix: $6*$7
6517 # $ vmstat 1 2
6518 # System configuration: lcpu=1 mem=2048MB
6520 # kthr memory page faults cpu
6521 # ----- ----------- ------------------------ ------------ -----------
6522 # r b avm fre re pi po fr sr cy in sy cs us sy id wa
6523 # 0 0 333933 241803 0 0 0 0 0 0 10 143 90 0 0 99 0
6524 # 0 0 334125 241569 0 0 0 0 0 0 37 5368 184 0 9 86 5
6525 'aix' => ['vmstat 1 2 | tail -n1', '$6*$7'],
6527 # freebsd: $8*$9
6528 # $ vmstat -H 1 2
6529 # procs memory page disks faults cpu
6530 # r b w avm fre flt re pi po fr sr ad0 ad1 in sy cs us sy id
6531 # 1 0 0 596716 19560 32 0 0 0 33 8 0 0 11 220 277 0 0 99
6532 # 0 0 0 596716 19560 2 0 0 0 0 0 0 0 11 144 263 0 1 99
6533 'freebsd' => ['vmstat -H 1 2 | tail -n1', '$8*$9'],
6535 # mirbsd: $8*$9
6536 # $ vmstat 1 2
6537 # procs memory page disks traps cpu
6538 # r b w avm fre flt re pi po fr sr wd0 cd0 int sys cs us sy id
6539 # 0 0 0 25776 164968 34 0 0 0 0 0 0 0 230 259 38 4 0 96
6540 # 0 0 0 25776 164968 24 0 0 0 0 0 0 0 237 275 37 0 0 100
6541 'mirbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'],
6543 # netbsd: $7*$8
6544 # $ vmstat 1 2
6545 # procs memory page disks faults cpu
6546 # r b avm fre flt re pi po fr sr w0 w1 in sy cs us sy id
6547 # 0 0 138452 6012 54 0 0 0 1 2 3 0 4 100 23 0 0 100
6548 # 0 0 138456 6008 1 0 0 0 0 0 0 0 7 26 19 0 0 100
6549 'netbsd' => ['vmstat 1 2 | tail -n1', '$7*$8'],
6551 # openbsd: $8*$9
6552 # $ vmstat 1 2
6553 # procs memory page disks traps cpu
6554 # r b w avm fre flt re pi po fr sr wd0 wd1 int sys cs us sy id
6555 # 0 0 0 76596 109944 73 0 0 0 0 0 0 1 5 259 22 0 1 99
6556 # 0 0 0 76604 109936 24 0 0 0 0 0 0 0 7 114 20 0 1 99
6557 'openbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'],
6559 # hpux: $8*$9
6560 # $ vmstat 1 2
6561 # procs memory page faults cpu
6562 # r b w avm free re at pi po fr de sr in sy cs us sy id
6563 # 1 0 0 247211 216476 4 1 0 0 0 0 0 102 73005 54 6 11 83
6564 # 1 0 0 247211 216421 43 9 0 0 0 0 0 144 1675 96 25269512791222387000 25269512791222387000 105
6565 'hpux' => ['vmstat 1 2 | tail -n1', '$8*$9'],
6567 # dec_osf (tru64): $11*$12
6568 # $ vmstat 1 2
6569 # Virtual Memory Statistics: (pagesize = 8192)
6570 # procs memory pages intr cpu
6571 # r w u act free wire fault cow zero react pin pout in sy cs us sy id
6572 # 3 181 36 51K 1895 8696 348M 59M 122M 259 79M 0 5 218 302 4 1 94
6573 # 3 181 36 51K 1893 8696 3 15 21 0 28 0 4 81 321 1 1 98
6574 'dec_osf' => ['vmstat 1 2 | tail -n1', '$11*$12'],
6576 # gnu (hurd): $7*$8
6577 # $ vmstat -k 1 2
6578 # (pagesize: 4, size: 512288, swap size: 894972)
6579 # free actv inact wired zeroed react pgins pgouts pfaults cowpfs hrat caobj cache swfree
6580 # 371940 30844 89228 20276 298348 0 48192 19016 756105 99808 98% 876 20628 894972
6581 # 371940 30844 89228 20276 +0 +0 +0 +0 +42 +2 98% 876 20628 894972
6582 'gnu' => ['vmstat -k 1 2 | tail -n1', '$7*$8'],
6584 # -nto (qnx has no swap)
6585 #-irix
6586 #-svr5 (scosysv)
6588 my $perlscript = "";
6589 # Make a perl script that detects the OS ($^O) and runs
6590 # the appropriate vmstat command
6591 for my $os (keys %vmstat) {
6592 $vmstat{$os}[1] =~ s/\$/\\\\\\\$/g; # $ => \\\$
6593 $perlscript .= 'if($^O eq "'.$os.'") { print `'.$vmstat{$os}[0].' | awk "{print ' .
6594 $vmstat{$os}[1] . '}"` }';
6596 $script = "perl -e " . ::Q($perlscript);
6598 return $script;
6602 sub too_fast_remote_login($) {
6603 my $self = shift;
6604 if($self->{'last_login_at'} and $self->{'time_to_login'}) {
6605 # sshd normally allows 10 simultaneous logins
6606 # A login takes time_to_login
6607 # So time_to_login/5 should be safe
6608 # If now <= last_login + time_to_login/5: Then it is too soon.
6609 my $too_fast = (::now() <= $self->{'last_login_at'}
6610 + $self->{'time_to_login'}/5);
6611 ::debug("run", "Too fast? $too_fast ");
6612 return $too_fast;
6613 } else {
6614 # No logins so far (or time_to_login not computed): it is not too fast
6615 return 0;
6619 sub last_login_at($) {
6620 my $self = shift;
6621 return $self->{'last_login_at'};
6624 sub set_last_login_at($$) {
6625 my $self = shift;
6626 $self->{'last_login_at'} = shift;
6629 sub loadavg_too_high($) {
6630 my $self = shift;
6631 my $loadavg = $self->loadavg();
6632 if(defined $loadavg) {
6633 ::debug("load", "Load $loadavg > ",$self->max_loadavg());
6634 return $loadavg >= $self->max_loadavg();
6635 } else {
6636 # Unknown load: Assume load is too high
6637 return 1;
6642 my $cmd;
6643 sub loadavg_cmd() {
6644 if(not $cmd) {
6645 # aix => "ps -ae -o state,command" # state wrong
6646 # bsd => "ps ax -o state,command"
6647 # sysv => "ps -ef -o s -o comm"
6648 # cygwin => perl -ne 'close STDERR; /Name/ and print"\n"; \
6649 # /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status |
6650 # awk '{print $2,$1}'
6651 # dec_osf => bsd
6652 # dragonfly => bsd
6653 # freebsd => bsd
6654 # gnu => bsd
6655 # hpux => ps -el|awk '{print $2,$14,$15}'
6656 # irix => ps -ef -o state -o comm
6657 # linux => bsd
6658 # minix => ps el|awk '{print \$1,\$11}'
6659 # mirbsd => bsd
6660 # netbsd => bsd
6661 # openbsd => bsd
6662 # solaris => sysv
6663 # svr5 => sysv
6664 # ultrix => ps -ax | awk '{print $3,$5}'
6665 # unixware => ps -el|awk '{print $2,$14,$15}'
6666 my $ps = ::spacefree(1,q{
6667 $sysv="ps -ef -o s -o comm";
6668 $sysv2="ps -ef -o state -o comm";
6669 $bsd="ps ax -o state,command";
6670 # Treat threads as processes
6671 $bsd2="ps axH -o state,command";
6672 $psel="ps -el|awk '{ print \$2,\$14,\$15 }'";
6673 $cygwin=q{ perl -ne 'close STDERR; /Name/ and print"\n";
6674 /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status |
6675 awk '{print $2,$1}' };
6676 $dummy="echo S COMMAND;echo R dummy";
6677 %ps=(
6678 # TODO Find better code for AIX/Android
6679 'aix' => "uptime",
6680 'android' => "uptime",
6681 'cygwin' => $cygwin,
6682 'darwin' => $bsd,
6683 'dec_osf' => $sysv2,
6684 'dragonfly' => $bsd,
6685 'freebsd' => $bsd2,
6686 'gnu' => $bsd,
6687 'hpux' => $psel,
6688 'irix' => $sysv2,
6689 'linux' => $bsd2,
6690 'minix' => "ps el|awk '{print \$1,\$11}'",
6691 'mirbsd' => $bsd,
6692 'msys' => $cygwin,
6693 'netbsd' => $bsd,
6694 'nto' => $dummy,
6695 'openbsd' => $bsd,
6696 'solaris' => $sysv,
6697 'svr5' => $psel,
6698 'ultrix' => "ps -ax | awk '{print \$3,\$5}'",
6699 'MSWin32' => $sysv,
6701 print `$ps{$^O}`;
6703 # The command is too long for csh, so base64_wrap the command
6704 $cmd = Job::base64_wrap($ps);
6706 return $cmd;
6711 sub loadavg($) {
6712 # If the currently know loadavg is too old:
6713 # Recompute a new one in the background
6714 # The load average is computed as the number of processes waiting for disk
6715 # or CPU right now. So it is the server load this instant and not averaged over
6716 # several minutes. This is needed so GNU Parallel will at most start one job
6717 # that will push the load over the limit.
6719 # Returns:
6720 # $last_loadavg = last load average computed (undef if none)
6721 my $self = shift;
6722 # Should we update the loadavg file?
6723 my $update_loadavg_file = 0;
6724 if(open(my $load_fh, "<", $self->{'loadavg_file'})) {
6725 local $/; # $/ = undef => slurp whole file
6726 my $load_out = <$load_fh>;
6727 close $load_fh;
6728 if($load_out =~ /\S/) {
6729 # Content can be empty if ~/ is on NFS
6730 # due to reading being non-atomic.
6732 # Count lines starting with D,O,R but command does not start with [
6733 my $load =()= ($load_out=~/(^\s?[DOR]\S* +(?=[^\[])\S)/gm);
6734 if($load > 0) {
6735 # load is overestimated by 1
6736 $self->{'loadavg'} = $load - 1;
6737 ::debug("load", "New loadavg: ", $self->{'loadavg'},"\n");
6738 } elsif ($load_out=~/average: (\d+.\d+)/) {
6739 # AIX does not support instant load average
6740 # 04:11AM up 21 days, 12:55, 1 user, load average: 1.85, 1.57, 1.55
6741 $self->{'loadavg'} = $1;
6742 } else {
6743 ::die_bug("loadavg_invalid_content: " .
6744 $self->{'loadavg_file'} . "\n$load_out");
6747 $update_loadavg_file = 1;
6748 } else {
6749 ::debug("load", "No loadavg file: ", $self->{'loadavg_file'});
6750 $self->{'loadavg'} = undef;
6751 $update_loadavg_file = 1;
6753 if($update_loadavg_file) {
6754 ::debug("load", "Updating loadavg file", $self->{'loadavg_file'}, "\n");
6755 $self->{'last_loadavg_update'} = time;
6756 my $dir = ::dirname($self->{'swap_activity_file'});
6757 -d $dir or eval { File::Path::mkpath($dir); };
6758 -w $dir or ::die_bug("Cannot write to $dir");
6759 my $cmd = "";
6760 if($self->{'string'} ne ":") {
6761 $cmd = $self->sshcommand() . " " . $self->serverlogin() . " " .
6762 ::Q(loadavg_cmd());
6763 } else {
6764 $cmd .= loadavg_cmd();
6766 # As the command can take long to run if run remote
6767 # save it to a tmp file before moving it to the correct file
6768 ::debug("load", "Update load\n");
6769 my $file = $self->{'loadavg_file'};
6770 # tmpfile on same filesystem as $file
6771 my $tmpfile = $file.$$;
6772 ::qqx("($cmd > $tmpfile && mv $tmpfile $file || rm $tmpfile & )");
6774 return $self->{'loadavg'};
6777 sub max_loadavg($) {
6778 my $self = shift;
6779 # If --load is a file it might be changed
6780 if($Global::max_load_file) {
6781 my $mtime = (stat($Global::max_load_file))[9];
6782 if($mtime > $Global::max_load_file_last_mod) {
6783 $Global::max_load_file_last_mod = $mtime;
6784 for my $sshlogin (values %Global::host) {
6785 $sshlogin->set_max_loadavg(undef);
6789 if(not defined $self->{'max_loadavg'}) {
6790 $self->{'max_loadavg'} =
6791 $self->compute_max_loadavg($opt::load);
6793 ::debug("load", "max_loadavg: ", $self->string(), " ", $self->{'max_loadavg'});
6794 return $self->{'max_loadavg'};
6797 sub set_max_loadavg($$) {
6798 my $self = shift;
6799 $self->{'max_loadavg'} = shift;
6802 sub compute_max_loadavg($) {
6803 # Parse the max loadaverage that the user asked for using --load
6804 # Returns:
6805 # max loadaverage
6806 my $self = shift;
6807 my $loadspec = shift;
6808 my $load;
6809 if(defined $loadspec) {
6810 if($loadspec =~ /^\+(\d+)$/) {
6811 # E.g. --load +2
6812 my $j = $1;
6813 $load =
6814 $self->ncpus() + $j;
6815 } elsif ($loadspec =~ /^-(\d+)$/) {
6816 # E.g. --load -2
6817 my $j = $1;
6818 $load =
6819 $self->ncpus() - $j;
6820 } elsif ($loadspec =~ /^(\d+)\%$/) {
6821 my $j = $1;
6822 $load =
6823 $self->ncpus() * $j / 100;
6824 } elsif ($loadspec =~ /^(\d+(\.\d+)?)$/) {
6825 $load = $1;
6826 } elsif (-f $loadspec) {
6827 $Global::max_load_file = $loadspec;
6828 $Global::max_load_file_last_mod = (stat($Global::max_load_file))[9];
6829 if(open(my $in_fh, "<", $Global::max_load_file)) {
6830 my $opt_load_file = join("",<$in_fh>);
6831 close $in_fh;
6832 $load = $self->compute_max_loadavg($opt_load_file);
6833 } else {
6834 ::error("Cannot open $loadspec.");
6835 ::wait_and_exit(255);
6837 } else {
6838 ::error("Parsing of --load failed.");
6839 ::die_usage();
6841 if($load < 0.01) {
6842 $load = 0.01;
6845 return $load;
6848 sub time_to_login($) {
6849 my $self = shift;
6850 return $self->{'time_to_login'};
6853 sub set_time_to_login($$) {
6854 my $self = shift;
6855 $self->{'time_to_login'} = shift;
6858 sub max_jobs_running($) {
6859 my $self = shift;
6860 if(not defined $self->{'max_jobs_running'}) {
6861 my $nproc = $self->compute_number_of_processes($opt::jobs);
6862 $self->set_max_jobs_running($nproc);
6864 return $self->{'max_jobs_running'};
6867 sub orig_max_jobs_running($) {
6868 my $self = shift;
6869 return $self->{'orig_max_jobs_running'};
6872 sub compute_number_of_processes($) {
6873 # Number of processes wanted and limited by system resources
6874 # Returns:
6875 # Number of processes
6876 my $self = shift;
6877 my $opt_P = shift;
6878 my $wanted_processes = $self->user_requested_processes($opt_P);
6879 if(not defined $wanted_processes) {
6880 $wanted_processes = $Global::default_simultaneous_sshlogins;
6882 ::debug("load", "Wanted procs: $wanted_processes\n");
6883 my $system_limit =
6884 $self->processes_available_by_system_limit($wanted_processes);
6885 ::debug("load", "Limited to procs: $system_limit\n");
6886 return $system_limit;
6890 my @children;
6891 my $max_system_proc_reached;
6892 my $more_filehandles;
6893 my %fh;
6894 my $tmpfhname;
6895 my $count_jobs_already_read;
6896 my @jobs;
6897 my $job;
6898 my @args;
6899 my $arg;
6901 sub reserve_filehandles($) {
6902 # Reserves filehandle
6903 my $n = shift;
6904 for (1..$n) {
6905 $more_filehandles &&= open($fh{$tmpfhname++}, "<", "/dev/null");
6909 sub reserve_process() {
6910 # Spawn a dummy process
6911 my $child;
6912 if($child = fork()) {
6913 push @children, $child;
6914 $Global::unkilled_children{$child} = 1;
6915 } elsif(defined $child) {
6916 # This is the child
6917 # The child takes one process slot
6918 # It will be killed later
6919 $SIG{'TERM'} = $Global::original_sig{'TERM'};
6920 if($^O eq "cygwin" or $^O eq "msys" or $^O eq "nto") {
6921 # The exec does not work on Cygwin and QNX
6922 sleep 10101010;
6923 } else {
6924 # 'exec sleep' takes less RAM than sleeping in perl
6925 exec 'sleep', 10101;
6927 exit(0);
6928 } else {
6929 # Failed to spawn
6930 $max_system_proc_reached = 1;
6934 sub get_args_or_jobs() {
6935 # Get an arg or a job (depending on mode)
6936 if($Global::semaphore or ($opt::pipe and not $opt::tee)) {
6937 # Skip: No need to get args
6938 return 1;
6939 } elsif(defined $opt::retries and $count_jobs_already_read) {
6940 # For retries we may need to run all jobs on this sshlogin
6941 # so include the already read jobs for this sshlogin
6942 $count_jobs_already_read--;
6943 return 1;
6944 } else {
6945 if($opt::X or $opt::m) {
6946 # The arguments may have to be re-spread over several jobslots
6947 # So pessimistically only read one arg per jobslot
6948 # instead of a full commandline
6949 if($Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->empty()) {
6950 if($Global::JobQueue->empty()) {
6951 return 0;
6952 } else {
6953 $job = $Global::JobQueue->get();
6954 push(@jobs, $job);
6955 return 1;
6957 } else {
6958 $arg = $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get();
6959 push(@args, $arg);
6960 return 1;
6962 } else {
6963 # If there are no more command lines, then we have a process
6964 # per command line, so no need to go further
6965 if($Global::JobQueue->empty()) {
6966 return 0;
6967 } else {
6968 $job = $Global::JobQueue->get();
6969 # Replacement must happen here due to seq()
6970 $job and $job->replaced();
6971 push(@jobs, $job);
6972 return 1;
6978 sub cleanup() {
6979 # Cleanup: Close the files
6980 for (values %fh) { close $_ }
6981 # Cleanup: Kill the children
6982 for my $pid (@children) {
6983 kill 9, $pid;
6984 waitpid($pid,0);
6985 delete $Global::unkilled_children{$pid};
6987 # Cleanup: Unget the command_lines or the @args
6988 $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->unget(@args);
6989 @args = ();
6990 $Global::JobQueue->unget(@jobs);
6991 @jobs = ();
6994 sub processes_available_by_system_limit($) {
6995 # If the wanted number of processes is bigger than the system limits:
6996 # Limit them to the system limits
6997 # Limits are: File handles, number of input lines, processes,
6998 # and taking > 1 second to spawn 10 extra processes
6999 # Returns:
7000 # Number of processes
7001 my $self = shift;
7002 my $wanted_processes = shift;
7003 my $system_limit = 0;
7004 my $slow_spawning_warning_printed = 0;
7005 my $time = time;
7006 $more_filehandles = 1;
7007 $tmpfhname = "TmpFhNamE";
7009 # perl uses 7 filehandles for something?
7010 # parallel uses 1 for memory_usage
7011 # parallel uses 4 for ?
7012 reserve_filehandles(12);
7013 # Two processes for load avg and ?
7014 reserve_process();
7015 reserve_process();
7017 # For --retries count also jobs already run
7018 $count_jobs_already_read = $Global::JobQueue->next_seq();
7019 my $wait_time_for_getting_args = 0;
7020 my $start_time = time;
7021 while(1) {
7022 $system_limit >= $wanted_processes and last;
7023 not $more_filehandles and last;
7024 $max_system_proc_reached and last;
7026 my $before_getting_arg = time;
7027 if(!$Global::dummy_jobs) {
7028 get_args_or_jobs() or last;
7030 $wait_time_for_getting_args += time - $before_getting_arg;
7031 $system_limit++;
7033 # Every simultaneous process uses 2 filehandles to write to
7034 # and 2 filehandles to read from
7035 reserve_filehandles(4);
7037 # System process limit
7038 reserve_process();
7040 my $forktime = time - $time - $wait_time_for_getting_args;
7041 ::debug("run", "Time to fork $system_limit procs: $wait_time_for_getting_args ",
7042 $forktime,
7043 " (processes so far: ", $system_limit,")\n");
7044 if($system_limit > 10 and
7045 $forktime > 1 and
7046 $forktime > $system_limit * 0.01
7047 and not $slow_spawning_warning_printed) {
7048 # It took more than 0.01 second to fork a processes on avg.
7049 # Give the user a warning. He can press Ctrl-C if this
7050 # sucks.
7051 ::warning("Starting $system_limit processes took > $forktime sec.",
7052 "Consider adjusting -j. Press CTRL-C to stop.");
7053 $slow_spawning_warning_printed = 1;
7056 cleanup();
7058 if($system_limit < $wanted_processes) {
7059 # The system_limit is less than the wanted_processes
7060 if($system_limit < 1 and not $Global::JobQueue->empty()) {
7061 ::warning("Cannot spawn any jobs. ".
7062 "Raising ulimit -u or 'nproc' in /etc/security/limits.conf",
7063 "or /proc/sys/kernel/pid_max may help.");
7064 ::wait_and_exit(255);
7066 if(not $more_filehandles) {
7067 ::warning("Only enough file handles to run ".
7068 $system_limit. " jobs in parallel.",
7069 "Running 'parallel -j0 -N $system_limit --pipe parallel -j0' or",
7070 "raising 'ulimit -n' or 'nofile' in /etc/security/limits.conf",
7071 "or /proc/sys/fs/file-max may help.");
7073 if($max_system_proc_reached) {
7074 ::warning("Only enough available processes to run ".
7075 $system_limit. " jobs in parallel.",
7076 "Raising ulimit -u or /etc/security/limits.conf ",
7077 "or /proc/sys/kernel/pid_max may help.");
7080 if($] == 5.008008 and $system_limit > 1000) {
7081 # https://savannah.gnu.org/bugs/?36942
7082 $system_limit = 1000;
7084 if($Global::JobQueue->empty()) {
7085 $system_limit ||= 1;
7087 if($self->string() ne ":" and
7088 $system_limit > $Global::default_simultaneous_sshlogins) {
7089 $system_limit =
7090 $self->simultaneous_sshlogin_limit($system_limit);
7092 return $system_limit;
7096 sub simultaneous_sshlogin_limit($) {
7097 # Test by logging in wanted number of times simultaneously
7098 # Returns:
7099 # min($wanted_processes,$working_simultaneous_ssh_logins-1)
7100 my $self = shift;
7101 my $wanted_processes = shift;
7102 if($self->{'time_to_login'}) {
7103 return $wanted_processes;
7106 # Try twice because it guesses wrong sometimes
7107 # Choose the minimal
7108 my $ssh_limit =
7109 ::min($self->simultaneous_sshlogin($wanted_processes),
7110 $self->simultaneous_sshlogin($wanted_processes));
7111 if($ssh_limit < $wanted_processes) {
7112 my $serverlogin = $self->serverlogin();
7113 ::warning("ssh to $serverlogin only allows ".
7114 "for $ssh_limit simultaneous logins.",
7115 "You may raise this by changing",
7116 "/etc/ssh/sshd_config:MaxStartups and MaxSessions on $serverlogin.",
7117 "You can also try --sshdelay 0.1",
7118 "Using only ".($ssh_limit-1)." connections ".
7119 "to avoid race conditions.");
7120 # Race condition can cause problem if using all sshs.
7121 if($ssh_limit > 1) { $ssh_limit -= 1; }
7123 return $ssh_limit;
7126 sub simultaneous_sshlogin($) {
7127 # Using $sshlogin try to see if we can do $wanted_processes
7128 # simultaneous logins
7129 # (ssh host echo simul-login & ssh host echo simul-login & ...) |
7130 # grep simul|wc -l
7131 # Input:
7132 # $wanted_processes = Try for this many logins in parallel
7133 # Returns:
7134 # $ssh_limit = Number of succesful parallel logins
7135 local $/ = "\n";
7136 my $self = shift;
7137 my $wanted_processes = shift;
7138 my $sshcmd = $self->sshcommand();
7139 my $serverlogin = $self->serverlogin();
7140 my $sshdelay = $opt::sshdelay ? "sleep $opt::sshdelay;" : "";
7141 # TODO sh -c wrapper to work for csh
7142 my $cmd = ("$sshdelay$sshcmd $serverlogin -- ".
7143 "echo simultaneouslogin </dev/null 2>&1 &")x$wanted_processes;
7144 ::debug("init", "Trying $wanted_processes logins at $serverlogin\n");
7145 open (my $simul_fh, "-|", "($cmd)|grep simultaneouslogin | wc -l") or
7146 ::die_bug("simultaneouslogin");
7147 my $ssh_limit = <$simul_fh>;
7148 close $simul_fh;
7149 chomp $ssh_limit;
7150 return $ssh_limit;
7153 sub set_ncpus($$) {
7154 my $self = shift;
7155 $self->{'ncpus'} = shift;
7158 sub user_requested_processes($) {
7159 # Parse the number of processes that the user asked for using -j
7160 # Input:
7161 # $opt_P = string formatted as for -P
7162 # Returns:
7163 # $processes = the number of processes to run on this sshlogin
7164 my $self = shift;
7165 my $opt_P = shift;
7166 my $processes;
7167 if(defined $opt_P) {
7168 if($opt_P =~ /^\+(\d+)$/) {
7169 # E.g. -P +2
7170 my $j = $1;
7171 $processes =
7172 $self->ncpus() + $j;
7173 } elsif ($opt_P =~ /^-(\d+)$/) {
7174 # E.g. -P -2
7175 my $j = $1;
7176 $processes =
7177 $self->ncpus() - $j;
7178 } elsif ($opt_P =~ /^(\d+(\.\d+)?)\%$/) {
7179 # E.g. -P 10.5%
7180 my $j = $1;
7181 $processes =
7182 $self->ncpus() * $j / 100;
7183 } elsif ($opt_P =~ /^(\d+)$/) {
7184 $processes = $1;
7185 if($processes == 0) {
7186 # -P 0 = infinity (or at least close)
7187 $processes = $Global::infinity;
7189 } elsif (-f $opt_P) {
7190 $Global::max_procs_file = $opt_P;
7191 if(open(my $in_fh, "<", $Global::max_procs_file)) {
7192 my $opt_P_file = join("",<$in_fh>);
7193 close $in_fh;
7194 $processes = $self->user_requested_processes($opt_P_file);
7195 } else {
7196 ::error("Cannot open $opt_P.");
7197 ::wait_and_exit(255);
7199 } else {
7200 ::error("Parsing of --jobs/-j/--max-procs/-P failed.");
7201 ::die_usage();
7203 $processes = ::ceil($processes);
7205 return $processes;
7208 sub ncpus($) {
7209 # Number of CPU threads
7210 # --use_sockets_instead_of_threads = count socket instead
7211 # --use_cores_instead_of_threads = count physical cores instead
7212 # Returns:
7213 # $ncpus = number of cpu (threads) on this sshlogin
7214 local $/ = "\n";
7215 my $self = shift;
7216 if(not defined $self->{'ncpus'}) {
7217 my $sshcmd = $self->sshcommand();
7218 my $serverlogin = $self->serverlogin();
7219 if($serverlogin eq ":") {
7220 if($opt::use_sockets_instead_of_threads) {
7221 $self->{'ncpus'} = socket_core_thread()->{'sockets'};
7222 } elsif($opt::use_cores_instead_of_threads) {
7223 $self->{'ncpus'} = socket_core_thread()->{'cores'};
7224 } else {
7225 $self->{'ncpus'} = socket_core_thread()->{'threads'};
7227 } else {
7228 my $ncpu;
7229 ::debug("init","echo|$sshcmd $serverlogin -- parallel --number-of-sockets");
7230 if($opt::use_sockets_instead_of_threads
7232 $opt::use_cpus_instead_of_cores) {
7233 $ncpu =
7234 ::qqx("echo|$sshcmd $serverlogin -- parallel --number-of-sockets");
7235 } elsif($opt::use_cores_instead_of_threads) {
7236 $ncpu =
7237 ::qqx("echo|$sshcmd $serverlogin -- parallel --number-of-cores");
7238 } else {
7239 $ncpu =
7240 ::qqx("echo|$sshcmd $serverlogin -- parallel --number-of-threads");
7242 chomp $ncpu;
7243 if($ncpu =~ /^\s*[0-9]+\s*$/s) {
7244 $self->{'ncpus'} = $ncpu;
7245 } else {
7246 ::warning("Could not figure out ".
7247 "number of cpus on $serverlogin ($ncpu). Using 1.");
7248 $self->{'ncpus'} = 1;
7252 return $self->{'ncpus'};
7256 sub nproc() {
7257 # Returns:
7258 # Number of threads using `nproc`
7259 my $no_of_threads = ::qqx("nproc");
7260 chomp $no_of_threads;
7261 return $no_of_threads;
7264 sub no_of_sockets() {
7265 return socket_core_thread()->{'sockets'};
7268 sub no_of_cores() {
7269 return socket_core_thread()->{'cores'};
7272 sub no_of_threads() {
7273 return socket_core_thread()->{'threads'};
7276 sub socket_core_thread() {
7277 # Returns:
7279 # 'sockets' => #sockets = number of socket with CPU present
7280 # 'cores' => #cores = number of physical cores
7281 # 'threads' => #threads = number of compute cores (hyperthreading)
7282 # 'active' => #taskset_threads = number of taskset limited cores
7284 my $cpu;
7285 my $cached_cpuspec = $Global::cache_dir . "/tmp/sshlogin/" .
7286 ::hostname() . "/cpuspec";
7287 if(-e $cached_cpuspec and -M $cached_cpuspec < 1) {
7288 # Reading cached copy instead of /proc/cpuinfo is 17 ms faster
7289 local $/ = "\n";
7290 if(open(my $in_fh, "<", $cached_cpuspec)) {
7291 ::debug("init","Read $cached_cpuspec\n");
7292 $cpu->{'sockets'} = int(<$in_fh>);
7293 $cpu->{'cores'} = int(<$in_fh>);
7294 $cpu->{'threads'} = int(<$in_fh>);
7295 close $in_fh;
7298 if ($^O eq 'linux') {
7299 $cpu = sct_gnu_linux($cpu);
7300 } elsif ($^O eq 'android') {
7301 $cpu = sct_android($cpu);
7302 } elsif ($^O eq 'freebsd') {
7303 $cpu = sct_freebsd($cpu);
7304 } elsif ($^O eq 'netbsd') {
7305 $cpu = sct_netbsd($cpu);
7306 } elsif ($^O eq 'openbsd') {
7307 $cpu = sct_openbsd($cpu);
7308 } elsif ($^O eq 'gnu') {
7309 $cpu = sct_hurd($cpu);
7310 } elsif ($^O eq 'darwin') {
7311 $cpu = sct_darwin($cpu);
7312 } elsif ($^O eq 'solaris') {
7313 $cpu = sct_solaris($cpu);
7314 } elsif ($^O eq 'aix') {
7315 $cpu = sct_aix($cpu);
7316 } elsif ($^O eq 'hpux') {
7317 $cpu = sct_hpux($cpu);
7318 } elsif ($^O eq 'nto') {
7319 $cpu = sct_qnx($cpu);
7320 } elsif ($^O eq 'svr5') {
7321 $cpu = sct_openserver($cpu);
7322 } elsif ($^O eq 'irix') {
7323 $cpu = sct_irix($cpu);
7324 } elsif ($^O eq 'dec_osf') {
7325 $cpu = sct_tru64($cpu);
7326 } else {
7327 # Try all methods until we find something that works
7328 $cpu = (sct_gnu_linux($cpu)
7329 || sct_android($cpu)
7330 || sct_freebsd($cpu)
7331 || sct_netbsd($cpu)
7332 || sct_openbsd($cpu)
7333 || sct_hurd($cpu)
7334 || sct_darwin($cpu)
7335 || sct_solaris($cpu)
7336 || sct_aix($cpu)
7337 || sct_hpux($cpu)
7338 || sct_qnx($cpu)
7339 || sct_openserver($cpu)
7340 || sct_irix($cpu)
7341 || sct_tru64($cpu)
7344 if(not grep { $_ > 0 } values %$cpu) {
7345 $cpu = undef;
7347 # Write cached copy instead of /proc/cpuinfo is 17 ms faster
7348 if($cpu and open(my $out_fh, ">", $cached_cpuspec)) {
7349 print $out_fh (map { chomp; "$_\n" }
7350 $cpu->{'sockets'},
7351 $cpu->{'cores'},
7352 $cpu->{'threads'});
7353 close $out_fh;
7355 if(not $cpu) {
7356 my $nproc = nproc();
7357 if($nproc) {
7358 $cpu->{'sockets'} =
7359 $cpu->{'cores'} =
7360 $cpu->{'threads'} =
7361 $cpu->{'active'} =
7362 $nproc;
7365 if(not $cpu) {
7366 ::warning("Cannot figure out number of cpus. Using 1.");
7367 $cpu->{'sockets'} =
7368 $cpu->{'cores'} =
7369 $cpu->{'threads'} =
7370 $cpu->{'active'} =
7373 $cpu->{'sockets'} ||= 1;
7374 $cpu->{'threads'} ||= $cpu->{'cores'};
7375 $cpu->{'active'} ||= $cpu->{'threads'};
7376 chomp($cpu->{'sockets'},
7377 $cpu->{'cores'},
7378 $cpu->{'threads'},
7379 $cpu->{'active'});
7380 # Choose minimum of active and actual
7381 my $mincpu;
7382 $mincpu->{'sockets'} = ::min($cpu->{'sockets'},$cpu->{'active'});
7383 $mincpu->{'cores'} = ::min($cpu->{'cores'},$cpu->{'active'});
7384 $mincpu->{'threads'} = ::min($cpu->{'threads'},$cpu->{'active'});
7385 return $mincpu;
7388 sub sct_gnu_linux($) {
7389 # Returns:
7390 # { 'sockets' => #sockets
7391 # 'cores' => #cores
7392 # 'threads' => #threads
7393 # 'active' => #taskset_threads }
7394 my $cpu = shift;
7396 sub read_topology($) {
7397 my $prefix = shift;
7398 my %sibiling;
7399 my %socket;
7400 my $thread;
7401 for($thread = 0;
7402 -r "$prefix/cpu$thread/topology/physical_package_id";
7403 $thread++) {
7404 open(my $fh,"<",
7405 "$prefix/cpu$thread/topology/physical_package_id")
7406 || die;
7407 $socket{<$fh>}++;
7408 close $fh;
7410 for($thread = 0;
7411 -r "$prefix/cpu$thread/topology/thread_siblings";
7412 $thread++) {
7413 open(my $fh,"<",
7414 "$prefix/cpu$thread/topology/thread_siblings")
7415 || die;
7416 $sibiling{<$fh>}++;
7417 close $fh;
7419 $cpu->{'sockets'} = keys %socket;
7420 $cpu->{'cores'} = keys %sibiling;
7421 $cpu->{'threads'} = $thread;
7424 sub read_cpuinfo(@) {
7425 my @cpuinfo = @_;
7426 $cpu->{'sockets'} = 0;
7427 $cpu->{'cores'} = 0;
7428 $cpu->{'threads'} = 0;
7429 my %seen;
7430 my %phy_seen;
7431 my $physicalid;
7432 for(@cpuinfo) {
7433 # physical id : 0
7434 if(/^physical id.*[:](.*)/) {
7435 $physicalid = $1;
7436 if(not $phy_seen{$1}++) {
7437 $cpu->{'sockets'}++;
7440 # core id : 3
7441 if(/^core id.*[:](.*)/ and not $seen{$physicalid,$1}++) {
7442 $cpu->{'cores'}++;
7444 # processor : 2
7445 /^processor.*[:]\s*\d/i and $cpu->{'threads'}++;
7447 $cpu->{'cores'} ||= $cpu->{'threads'};
7448 $cpu->{'cpus'} ||= $cpu->{'threads'};
7449 $cpu->{'sockets'} ||= 1;
7452 sub read_lscpu(@) {
7453 my @lscpu = @_;
7454 my $threads_per_core;
7455 my $cores_per_socket;
7456 for(@lscpu) {
7457 /^CPU.s.:\s*(\d+)/ and $cpu->{'threads'} = $1;
7458 /^Thread.s. per core:\s*(\d+)/ and $threads_per_core = $1;
7459 /^Core.s. per socket:\s*(\d+)/ and $cores_per_socket = $1;
7460 /^(CPU )?Socket.s.:\s*(\d+)/i and $cpu->{'sockets'} = $2;
7462 if($threads_per_core and $cpu->{'threads'}) {
7463 $cpu->{'cores'} = $cpu->{'threads'} / $threads_per_core;
7465 $cpu->{'cpus'} ||= $cpu->{'threads'};
7468 local $/ = "\n"; # If delimiter is set, then $/ will be wrong
7469 my @cpuinfo;
7470 my @lscpu;
7471 if($ENV{'PARALLEL_CPUINFO'}) {
7472 # Use CPUINFO from environment - used for testing only
7473 read_cpuinfo(split/(?<=\n)/,$ENV{'PARALLEL_CPUINFO'});
7474 } elsif($ENV{'PARALLEL_LSCPU'}) {
7475 # Use LSCPU from environment - used for testing only
7476 read_lscpu(split/(?<=\n)/,$ENV{'PARALLEL_LSCPU'});
7477 } elsif(-r "$ENV{'PARALLEL_CPUPREFIX'}/cpu0/topology/thread_siblings") {
7478 # Use CPUPREFIX from environment - used for testing only
7479 read_topology($ENV{'PARALLEL_CPUPREFIX'});
7480 } elsif($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'}) {
7481 # Skip /proc/cpuinfo - already set
7482 } else {
7483 # Not debugging: Look at this computer
7484 if(!($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'})
7486 open(my $in_fh, "-|", "lscpu")) {
7487 # Parse output from lscpu
7488 read_lscpu(<$in_fh>);
7489 close $in_fh;
7491 if(!($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'})
7493 -r "/sys/devices/system/cpu/cpu0/topology/thread_siblings") {
7494 read_topology("/sys/devices/system/cpu");
7496 if(!($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'})
7498 open(my $in_fh, "<", "/proc/cpuinfo")) {
7499 # Read /proc/cpuinfo
7500 read_cpuinfo(<$in_fh>);
7501 close $in_fh;
7504 if(-e "/proc/self/status" and not $ENV{'PARALLEL_CPUINFO'}) {
7505 # if 'taskset' is used to limit number of threads
7506 if(open(my $in_fh, "<", "/proc/self/status")) {
7507 while(<$in_fh>) {
7508 if(/^Cpus_allowed:\s*(\S+)/) {
7509 my $a = $1;
7510 $a =~ tr/,//d;
7511 $cpu->{'active'} = unpack ("%32b*", pack ("H*",$a));
7514 close $in_fh;
7517 return $cpu;
7520 sub sct_android($) {
7521 # Returns:
7522 # { 'sockets' => #sockets
7523 # 'cores' => #cores
7524 # 'threads' => #threads
7525 # 'active' => #taskset_threads }
7526 # Use GNU/Linux
7527 return sct_gnu_linux($_[0]);
7530 sub sct_freebsd($) {
7531 # Returns:
7532 # { 'sockets' => #sockets
7533 # 'cores' => #cores
7534 # 'threads' => #threads
7535 # 'active' => #taskset_threads }
7536 local $/ = "\n";
7537 my $cpu = shift;
7538 $cpu->{'cores'} ||=
7539 (::qqx(qq{ sysctl -a dev.cpu | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' })
7541 ::qqx(qq{ sysctl hw.ncpu | awk '{ print \$2 }' }));
7542 $cpu->{'threads'} ||=
7543 (::qqx(qq{ sysctl hw.ncpu | awk '{ print \$2 }' })
7545 ::qqx(qq{ sysctl -a dev.cpu | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' }));
7546 return $cpu;
7549 sub sct_netbsd($) {
7550 # Returns:
7551 # { 'sockets' => #sockets
7552 # 'cores' => #cores
7553 # 'threads' => #threads
7554 # 'active' => #taskset_threads }
7555 local $/ = "\n";
7556 my $cpu = shift;
7557 $cpu->{'cores'} ||= ::qqx("sysctl -n hw.ncpu");
7558 return $cpu;
7561 sub sct_openbsd($) {
7562 # Returns:
7563 # { 'sockets' => #sockets
7564 # 'cores' => #cores
7565 # 'threads' => #threads
7566 # 'active' => #taskset_threads }
7567 local $/ = "\n";
7568 my $cpu = shift;
7569 $cpu->{'cores'} ||= ::qqx('sysctl -n hw.ncpu');
7570 return $cpu;
7573 sub sct_hurd($) {
7574 # Returns:
7575 # { 'sockets' => #sockets
7576 # 'cores' => #cores
7577 # 'threads' => #threads
7578 # 'active' => #taskset_threads }
7579 local $/ = "\n";
7580 my $cpu = shift;
7581 $cpu->{'cores'} ||= ::qqx("nproc");
7582 return $cpu;
7585 sub sct_darwin($) {
7586 # Returns:
7587 # { 'sockets' => #sockets
7588 # 'cores' => #cores
7589 # 'threads' => #threads
7590 # 'active' => #taskset_threads }
7591 local $/ = "\n";
7592 my $cpu = shift;
7593 $cpu->{'cores'} ||=
7594 (::qqx('sysctl -n hw.physicalcpu')
7596 ::qqx(qq{ sysctl -a hw | grep [^a-z]physicalcpu[^a-z] | awk '{ print \$2 }' }));
7597 $cpu->{'threads'} ||=
7598 (::qqx('sysctl -n hw.logicalcpu')
7600 ::qqx(qq{ sysctl -a hw | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }' }));
7601 return $cpu;
7604 sub sct_solaris($) {
7605 # Returns:
7606 # { 'sockets' => #sockets
7607 # 'cores' => #cores
7608 # 'threads' => #threads
7609 # 'active' => #taskset_threads }
7610 local $/ = "\n";
7611 my $cpu = shift;
7612 if(not $cpu->{'cores'}) {
7613 if(-x "/usr/bin/kstat") {
7614 my @chip_id = ::qqx("/usr/bin/kstat cpu_info|grep chip_id");
7615 if($#chip_id >= 0) {
7616 $cpu->{'sockets'} ||= $#chip_id +1;
7618 my @core_id = ::qqx("/usr/bin/kstat -m cpu_info|grep -w core_id|uniq");
7619 if($#core_id >= 0) {
7620 $cpu->{'cores'} ||= $#core_id +1;
7623 if(-x "/usr/sbin/psrinfo") {
7624 my @psrinfo = ::qqx("/usr/sbin/psrinfo -p");
7625 if($#psrinfo >= 0) {
7626 $cpu->{'sockets'} ||= $psrinfo[0];
7629 if(-x "/usr/sbin/prtconf") {
7630 my @prtconf = ::qqx("/usr/sbin/prtconf | grep cpu..instance");
7631 if($#prtconf >= 0) {
7632 $cpu->{'cores'} ||= $#prtconf +1;
7636 return $cpu;
7639 sub sct_aix($) {
7640 # Returns:
7641 # { 'sockets' => #sockets
7642 # 'cores' => #cores
7643 # 'threads' => #threads
7644 # 'active' => #taskset_threads }
7645 local $/ = "\n";
7646 my $cpu = shift;
7647 if(not $cpu->{'cores'}) {
7648 if(-x "/usr/sbin/lscfg") {
7649 if(open(my $in_fh, "-|", "/usr/sbin/lscfg -vs |grep proc | wc -l|tr -d ' '")) {
7650 $cpu->{'cores'} = <$in_fh>;
7651 close $in_fh;
7655 if(not $cpu->{'threads'}) {
7656 if(-x "/usr/bin/vmstat") {
7657 if(open(my $in_fh, "-|", "/usr/bin/vmstat 1 1")) {
7658 while(<$in_fh>) {
7659 /lcpu=([0-9]*) / and $cpu->{'threads'} = $1;
7661 close $in_fh;
7665 return $cpu;
7668 sub sct_hpux($) {
7669 # Returns:
7670 # { 'sockets' => #sockets
7671 # 'cores' => #cores
7672 # 'threads' => #threads
7673 # 'active' => #taskset_threads }
7674 local $/ = "\n";
7675 my $cpu = shift;
7676 $cpu->{'cores'} ||=
7677 ::qqx(qq{ /usr/bin/mpsched -s 2>&1 | grep 'Locality Domain Count' | awk '{ print \$4 }'});
7678 $cpu->{'threads'} ||=
7679 ::qqx(qq{ /usr/bin/mpsched -s 2>&1 | perl -ne '/Processor Count\\D+(\\d+)/ and print "\$1"'});
7680 return $cpu;
7683 sub sct_qnx($) {
7684 # Returns:
7685 # { 'sockets' => #sockets
7686 # 'cores' => #cores
7687 # 'threads' => #threads
7688 # 'active' => #taskset_threads }
7689 local $/ = "\n";
7690 my $cpu = shift;
7691 # BUG: It is not known how to calculate this.
7693 return $cpu;
7696 sub sct_openserver($) {
7697 # Returns:
7698 # { 'sockets' => #sockets
7699 # 'cores' => #cores
7700 # 'threads' => #threads
7701 # 'active' => #taskset_threads }
7702 local $/ = "\n";
7703 my $cpu = shift;
7704 if(not $cpu->{'cores'}) {
7705 if(-x "/usr/sbin/psrinfo") {
7706 my @psrinfo = ::qqx("/usr/sbin/psrinfo");
7707 if($#psrinfo >= 0) {
7708 $cpu->{'cores'} = $#psrinfo +1;
7712 $cpu->{'sockets'} ||= $cpu->{'cores'};
7713 return $cpu;
7716 sub sct_irix($) {
7717 # Returns:
7718 # { 'sockets' => #sockets
7719 # 'cores' => #cores
7720 # 'threads' => #threads
7721 # 'active' => #taskset_threads }
7722 local $/ = "\n";
7723 my $cpu = shift;
7724 $cpu->{'cores'} ||=
7725 ::qqx(qq{ hinv | grep HZ | grep Processor | awk '{print \$1}' });
7726 return $cpu;
7729 sub sct_tru64($) {
7730 # Returns:
7731 # { 'sockets' => #sockets
7732 # 'cores' => #cores
7733 # 'threads' => #threads
7734 # 'active' => #taskset_threads }
7735 local $/ = "\n";
7736 my $cpu = shift;
7737 $cpu->{'cores'} ||= ::qqx("sizer -pr");
7738 $cpu->{'sockets'} ||= $cpu->{'cores'};
7739 $cpu->{'threads'} ||= $cpu->{'cores'};
7741 return $cpu;
7744 sub sshcommand($) {
7745 # Returns:
7746 # $sshcommand = the command (incl options) to run when using ssh
7747 my $self = shift;
7748 if (not defined $self->{'sshcommand'}) {
7749 $self->sshcommand_of_sshlogin();
7751 return $self->{'sshcommand'};
7754 sub serverlogin($) {
7755 # Returns:
7756 # $sshcommand = the command (incl options) to run when using ssh
7757 my $self = shift;
7758 if (not defined $self->{'serverlogin'}) {
7759 $self->sshcommand_of_sshlogin();
7761 return $self->{'serverlogin'};
7764 sub sshcommand_of_sshlogin($) {
7765 # Compute ssh command and serverlogin from sshlogin
7766 # 'server' -> ('ssh -S /tmp/parallel-ssh-RANDOM/host-','server')
7767 # 'user@server' -> ('ssh','user@server')
7768 # 'myssh user@server' -> ('myssh','user@server')
7769 # 'myssh -l user server' -> ('myssh -l user','server')
7770 # '/usr/bin/myssh -l user server' -> ('/usr/bin/myssh -l user','server')
7771 # Sets:
7772 # $self->{'sshcommand'}
7773 # $self->{'serverlogin'}
7774 my $self = shift;
7775 my ($sshcmd, $serverlogin);
7776 # If $opt::ssh is unset, use $PARALLEL_SSH or 'ssh'
7777 $opt::ssh ||= $ENV{'PARALLEL_SSH'} || "ssh";
7778 if($self->{'string'} =~ /(.+) (\S+)$/) {
7779 # Own ssh command
7780 $sshcmd = $1; $serverlogin = $2;
7781 } else {
7782 # Normal ssh
7783 if($opt::controlmaster) {
7784 # Use control_path to make ssh faster
7785 my $control_path = $self->control_path_dir()."/ssh-%r@%h:%p";
7786 $sshcmd = $opt::ssh." -S ".$control_path;
7787 $serverlogin = $self->{'string'};
7788 if(not $self->{'control_path'}{$control_path}++) {
7789 # Master is not running for this control_path
7790 # Start it
7791 my $pid = fork();
7792 if($pid) {
7793 $Global::sshmaster{$pid} ||= 1;
7794 } else {
7795 $SIG{'TERM'} = undef;
7796 # Ignore the 'foo' being printed
7797 open(STDOUT,">","/dev/null");
7798 # STDERR >/dev/null to ignore
7799 open(STDERR,">","/dev/null");
7800 open(STDIN,"<","/dev/null");
7801 # Run a sleep that outputs data, so it will discover
7802 # if the ssh connection closes.
7803 my $sleep = ::Q('$|=1;while(1){sleep 1;print "foo\n"}');
7804 my @master = ($opt::ssh, "-MTS",
7805 $control_path, $serverlogin, "--", "perl", "-e",
7806 $sleep);
7807 exec(@master);
7810 } else {
7811 $sshcmd = $opt::ssh; $serverlogin = $self->{'string'};
7815 if($serverlogin =~ s/(\S+)\@(\S+)/$2/) {
7816 # convert user@server to '-l user server'
7817 # because lsh does not support user@server
7818 $sshcmd = $sshcmd." -l ".$1;
7821 $self->{'sshcommand'} = $sshcmd;
7822 $self->{'serverlogin'} = $serverlogin;
7825 sub control_path_dir($) {
7826 # Returns:
7827 # $control_path_dir = dir of control path (for -M)
7828 my $self = shift;
7829 if(not defined $self->{'control_path_dir'}) {
7830 $self->{'control_path_dir'} =
7831 # Use $ENV{'TMPDIR'} as that is typically not
7832 # NFS mounted
7833 File::Temp::tempdir($ENV{'TMPDIR'}
7834 . "/control_path_dir-XXXX",
7835 CLEANUP => 1);
7837 return $self->{'control_path_dir'};
7840 sub rsync_transfer_cmd($) {
7841 # Command to run to transfer a file
7842 # Input:
7843 # $file = filename of file to transfer
7844 # $workdir = destination dir
7845 # Returns:
7846 # $cmd = rsync command to run to transfer $file ("" if unreadable)
7847 my $self = shift;
7848 my $file = shift;
7849 my $workdir = shift;
7850 if(not -r $file) {
7851 ::warning($file. " is not readable and will not be transferred.");
7852 return "true";
7854 my $rsync_destdir;
7855 my $relpath = ($file !~ m:^/:) || ($file =~ m:/\./:); # Is the path relative or /./?
7856 if($relpath) {
7857 $rsync_destdir = ::shell_quote_file($workdir);
7858 } else {
7859 # rsync /foo/bar /
7860 $rsync_destdir = "/";
7862 $file = ::shell_quote_file($file);
7863 my $sshcmd = $self->sshcommand();
7864 my $rsync_opts = $ENV{'PARALLEL_RSYNC_OPTS'}.
7865 " -e".::Q($sshcmd);
7866 my $serverlogin = $self->serverlogin();
7867 # Make dir if it does not exist
7868 return "$sshcmd $serverlogin -- mkdir -p $rsync_destdir && " .
7869 rsync()." $rsync_opts $file $serverlogin:$rsync_destdir";
7872 sub cleanup_cmd($$$) {
7873 # Command to run to remove the remote file
7874 # Input:
7875 # $file = filename to remove
7876 # $workdir = destination dir
7877 # Returns:
7878 # $cmd = ssh command to run to remove $file and empty parent dirs
7879 my $self = shift;
7880 my $file = shift;
7881 my $workdir = shift;
7882 my $f = $file;
7883 if($f =~ m:/\./:) {
7884 # foo/bar/./baz/quux => workdir/baz/quux
7885 # /foo/bar/./baz/quux => workdir/baz/quux
7886 $f =~ s:.*/\./:$workdir/:;
7887 } elsif($f =~ m:^[^/]:) {
7888 # foo/bar => workdir/foo/bar
7889 $f = $workdir."/".$f;
7891 my @subdirs = split m:/:, ::dirname($f);
7892 my @rmdir;
7893 my $dir = "";
7894 for(@subdirs) {
7895 $dir .= $_."/";
7896 unshift @rmdir, ::shell_quote_file($dir);
7898 my $rmdir = @rmdir ? "sh -c ".::Q("rmdir @rmdir 2>/dev/null;") : "";
7899 if(defined $opt::workdir and $opt::workdir eq "...") {
7900 $rmdir .= ::Q("rm -rf " . ::shell_quote_file($workdir).';');
7903 $f = ::shell_quote_file($f);
7904 my $sshcmd = $self->sshcommand();
7905 my $serverlogin = $self->serverlogin();
7906 return "$sshcmd $serverlogin -- ".::Q("rm -f $f; $rmdir");
7910 my $rsync;
7912 sub rsync {
7913 # rsync 3.1.x uses protocol 31 which is unsupported by 2.5.7.
7914 # If the version >= 3.1.0: downgrade to protocol 30
7915 # Returns:
7916 # $rsync = "rsync" or "rsync --protocol 30"
7917 if(not $rsync) {
7918 my @out = `rsync --version`;
7919 for (@out) {
7920 # rsync version 3.1.3 protocol version 31
7921 # rsync version v3.2.3 protocol version 31
7922 if(/version v?(\d+.\d+)(.\d+)?/) {
7923 if($1 >= 3.1) {
7924 # Version 3.1.0 or later: Downgrade to protocol 30
7925 $rsync = "rsync --protocol 30";
7926 } else {
7927 $rsync = "rsync";
7931 $rsync or ::die_bug("Cannot figure out version of rsync: @out");
7933 return $rsync;
7938 package JobQueue;
7940 sub new($) {
7941 my $class = shift;
7942 my $commandref = shift;
7943 my $read_from = shift;
7944 my $context_replace = shift;
7945 my $max_number_of_args = shift;
7946 my $transfer_files = shift;
7947 my $return_files = shift;
7948 my $commandlinequeue = CommandLineQueue->new
7949 ($commandref, $read_from, $context_replace, $max_number_of_args,
7950 $transfer_files, $return_files);
7951 my @unget = ();
7952 return bless {
7953 'unget' => \@unget,
7954 'commandlinequeue' => $commandlinequeue,
7955 'this_job_no' => 0,
7956 'total_jobs' => undef,
7957 }, ref($class) || $class;
7960 sub get($) {
7961 my $self = shift;
7963 $self->{'this_job_no'}++;
7964 if(@{$self->{'unget'}}) {
7965 return shift @{$self->{'unget'}};
7966 } else {
7967 my $commandline = $self->{'commandlinequeue'}->get();
7968 if(defined $commandline) {
7969 return Job->new($commandline);
7970 } else {
7971 $self->{'this_job_no'}--;
7972 return undef;
7977 sub unget($) {
7978 my $self = shift;
7979 unshift @{$self->{'unget'}}, @_;
7980 $self->{'this_job_no'} -= @_;
7983 sub empty($) {
7984 my $self = shift;
7985 my $empty = (not @{$self->{'unget'}}) &&
7986 $self->{'commandlinequeue'}->empty();
7987 ::debug("run", "JobQueue->empty $empty ");
7988 return $empty;
7991 sub total_jobs($) {
7992 my $self = shift;
7993 if(not defined $self->{'total_jobs'}) {
7994 if($opt::pipe and not $opt::tee) {
7995 ::error("--pipe is incompatible with --eta/--bar/--shuf");
7996 ::wait_and_exit(255);
7998 if($opt::sqlworker) {
7999 $self->{'total_jobs'} = $Global::sql->total_jobs();
8000 } else {
8001 my $record;
8002 my @arg_records;
8003 my $record_queue = $self->{'commandlinequeue'}{'arg_queue'};
8004 my $start = time;
8005 while($record = $record_queue->get()) {
8006 push @arg_records, $record;
8007 if(time - $start > 10) {
8008 ::warning("Reading ".scalar(@arg_records).
8009 " arguments took longer than 10 seconds.");
8010 $opt::eta && ::warning("Consider removing --eta.");
8011 $opt::bar && ::warning("Consider removing --bar.");
8012 $opt::shuf && ::warning("Consider removing --shuf.");
8013 last;
8016 while($record = $record_queue->get()) {
8017 push @arg_records, $record;
8019 if($opt::shuf and @arg_records) {
8020 my $i = @arg_records;
8021 while (--$i) {
8022 my $j = int rand($i+1);
8023 @arg_records[$i,$j] = @arg_records[$j,$i];
8026 $record_queue->unget(@arg_records);
8027 # $#arg_records = number of args - 1
8028 # We have read one @arg_record for this job (so add 1 more)
8029 my $num_args = $#arg_records + 2;
8030 # This jobs is not started so -1
8031 my $started_jobs = $self->{'this_job_no'} - 1;
8032 my $max_args = ::max($Global::max_number_of_args,1);
8033 $self->{'total_jobs'} = ::ceil($num_args / $max_args)
8034 + $started_jobs;
8035 ::debug("init","Total jobs: ".$self->{'total_jobs'}.
8036 " ($num_args/$max_args + $started_jobs)\n");
8039 return $self->{'total_jobs'};
8042 sub flush_total_jobs($) {
8043 # Unset total_jobs to force recomputing
8044 my $self = shift;
8045 ::debug("init","flush Total jobs: ");
8046 $self->{'total_jobs'} = undef;
8049 sub next_seq($) {
8050 my $self = shift;
8052 return $self->{'commandlinequeue'}->seq();
8055 sub quote_args($) {
8056 my $self = shift;
8057 return $self->{'commandlinequeue'}->quote_args();
8061 package Job;
8063 sub new($) {
8064 my $class = shift;
8065 my $commandlineref = shift;
8066 return bless {
8067 'commandline' => $commandlineref, # CommandLine object
8068 'workdir' => undef, # --workdir
8069 # filehandle for stdin (used for --pipe)
8070 # filename for writing stdout to (used for --files)
8071 # remaining data not sent to stdin (used for --pipe)
8072 # tmpfiles to cleanup when job is done
8073 'unlink' => [],
8074 # amount of data sent via stdin (used for --pipe)
8075 'transfersize' => 0, # size of files using --transfer
8076 'returnsize' => 0, # size of files using --return
8077 'pid' => undef,
8078 # hash of { SSHLogins => number of times the command failed there }
8079 'failed' => undef,
8080 'sshlogin' => undef,
8081 # The commandline wrapped with rsync and ssh
8082 'sshlogin_wrap' => undef,
8083 'exitstatus' => undef,
8084 'exitsignal' => undef,
8085 # Timestamp for timeout if any
8086 'timeout' => undef,
8087 'virgin' => 1,
8088 # Output used for SQL and CSV-output
8089 'output' => { 1 => [], 2 => [] },
8090 'halfline' => { 1 => [], 2 => [] },
8091 }, ref($class) || $class;
8094 sub replaced($) {
8095 my $self = shift;
8096 $self->{'commandline'} or ::die_bug("commandline empty");
8097 return $self->{'commandline'}->replaced();
8100 sub seq($) {
8101 my $self = shift;
8102 return $self->{'commandline'}->seq();
8105 sub set_seq($$) {
8106 my $self = shift;
8107 return $self->{'commandline'}->set_seq(shift);
8110 sub slot($) {
8111 my $self = shift;
8112 return $self->{'commandline'}->slot();
8115 sub free_slot($) {
8116 my $self = shift;
8117 push @Global::slots, $self->slot();
8121 my($cattail);
8123 sub cattail() {
8124 # Returns:
8125 # $cattail = perl program for:
8126 # cattail "decompress program" writerpid [file_to_decompress or stdin] [file_to_unlink]
8127 if(not $cattail) {
8128 $cattail = q{
8129 # cat followed by tail (possibly with rm as soon at the file is opened)
8130 # If $writerpid dead: finish after this round
8131 use Fcntl;
8132 $|=1;
8134 my ($comfile, $cmd, $writerpid, $read_file, $unlink_file) = @ARGV;
8135 if($read_file) {
8136 open(IN,"<",$read_file) || die("cattail: Cannot open $read_file");
8137 } else {
8138 *IN = *STDIN;
8140 while(! -s $comfile) {
8141 # Writer has not opened the buffer file, so we cannot remove it yet
8142 $sleep = ($sleep < 30) ? ($sleep * 1.001 + 0.01) : ($sleep);
8143 usleep($sleep);
8145 # The writer and we have both opened the file, so it is safe to unlink it
8146 unlink $unlink_file;
8147 unlink $comfile;
8149 my $first_round = 1;
8150 my $flags;
8151 fcntl(IN, F_GETFL, $flags) || die $!; # Get the current flags on the filehandle
8152 $flags |= O_NONBLOCK; # Add non-blocking to the flags
8153 fcntl(IN, F_SETFL, $flags) || die $!; # Set the flags on the filehandle
8155 while(1) {
8156 # clear EOF
8157 seek(IN,0,1);
8158 my $writer_running = kill 0, $writerpid;
8159 $read = sysread(IN,$buf,131072);
8160 if($read) {
8161 if($first_round) {
8162 # Only start the command if there any input to process
8163 $first_round = 0;
8164 open(OUT,"|-",$cmd) || die("cattail: Cannot run $cmd");
8167 # Blocking print
8168 while($buf) {
8169 my $bytes_written = syswrite(OUT,$buf);
8170 # syswrite may be interrupted by SIGHUP
8171 substr($buf,0,$bytes_written) = "";
8173 # Something printed: Wait less next time
8174 $sleep /= 2;
8175 } else {
8176 if(eof(IN) and not $writer_running) {
8177 # Writer dead: There will never be sent more to the decompressor
8178 close OUT;
8179 exit;
8181 # TODO This could probably be done more efficiently using select(2)
8182 # Nothing read: Wait longer before next read
8183 # Up to 100 milliseconds
8184 $sleep = ($sleep < 100) ? ($sleep * 1.001 + 0.01) : ($sleep);
8185 usleep($sleep);
8189 sub usleep {
8190 # Sleep this many milliseconds.
8191 my $secs = shift;
8192 select(undef, undef, undef, $secs/1000);
8195 $cattail =~ s/#.*//mg;
8196 $cattail =~ s/\s+/ /g;
8198 return $cattail;
8202 sub openoutputfiles($) {
8203 # Open files for STDOUT and STDERR
8204 # Set file handles in $self->fh
8205 my $self = shift;
8206 my ($outfhw, $errfhw, $outname, $errname);
8208 if($opt::linebuffer and not
8209 ($opt::keeporder or $opt::files or $opt::results or
8210 $opt::compress or $opt::compress_program or
8211 $opt::decompress_program)) {
8212 # Do not save to files: Use non-blocking pipe
8213 my ($outfhr, $errfhr);
8214 pipe($outfhr, $outfhw) || die;
8215 pipe($errfhr, $errfhw) || die;
8216 $self->set_fh(1,'w',$outfhw);
8217 $self->set_fh(2,'w',$errfhw);
8218 $self->set_fh(1,'r',$outfhr);
8219 $self->set_fh(2,'r',$errfhr);
8220 # Make it possible to read non-blocking from the pipe
8221 for my $fdno (1,2) {
8222 ::set_fh_non_blocking($self->fh($fdno,'r'));
8224 # Return immediately because we do not need setting filenames
8225 return;
8226 } elsif($opt::results and not $Global::csvsep) {
8227 my $out = $self->{'commandline'}->results_out();
8228 my $seqname;
8229 if($out eq $opt::results or $out =~ m:/$:) {
8230 # $opt::results = simple string or ending in /
8231 # => $out is a dir/
8232 # prefix/name1/val1/name2/val2/seq
8233 $seqname = $out."seq";
8234 # prefix/name1/val1/name2/val2/stdout
8235 $outname = $out."stdout";
8236 # prefix/name1/val1/name2/val2/stderr
8237 $errname = $out."stderr";
8238 } else {
8239 # $opt::results = replacement string not ending in /
8240 # => $out is a file
8241 $outname = $out;
8242 $errname = "$out.err";
8243 $seqname = "$out.seq";
8245 my $seqfhw;
8246 if(not open($seqfhw, "+>", $seqname)) {
8247 ::error("Cannot write to `$seqname'.");
8248 ::wait_and_exit(255);
8250 print $seqfhw $self->seq();
8251 close $seqfhw;
8252 if(not open($outfhw, "+>", $outname)) {
8253 ::error("Cannot write to `$outname'.");
8254 ::wait_and_exit(255);
8256 if(not open($errfhw, "+>", $errname)) {
8257 ::error("Cannot write to `$errname'.");
8258 ::wait_and_exit(255);
8260 $self->set_fh(1,"unlink","");
8261 $self->set_fh(2,"unlink","");
8262 if($opt::sqlworker) {
8263 # Save the filenames in SQL table
8264 $Global::sql->update("SET Stdout = ?, Stderr = ? ".
8265 "WHERE Seq = ". $self->seq(),
8266 $outname, $errname);
8268 } elsif(not $opt::ungroup) {
8269 # To group we create temporary files for STDOUT and STDERR
8270 # To avoid the cleanup unlink the files immediately (but keep them open)
8271 if($opt::files) {
8272 ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par");
8273 ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par");
8274 # --files => only remove stderr
8275 $self->set_fh(1,"unlink","");
8276 $self->set_fh(2,"unlink",$errname);
8277 } else {
8278 ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par");
8279 ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par");
8280 $self->set_fh(1,"unlink",$outname);
8281 $self->set_fh(2,"unlink",$errname);
8283 } else {
8284 # --ungroup
8285 open($outfhw,">&",$Global::fd{1}) || die;
8286 open($errfhw,">&",$Global::fd{2}) || die;
8287 # File name must be empty as it will otherwise be printed
8288 $outname = "";
8289 $errname = "";
8290 $self->set_fh(1,"unlink",$outname);
8291 $self->set_fh(2,"unlink",$errname);
8293 # Set writing FD
8294 $self->set_fh(1,'w',$outfhw);
8295 $self->set_fh(2,'w',$errfhw);
8296 $self->set_fh(1,'name',$outname);
8297 $self->set_fh(2,'name',$errname);
8298 if($opt::compress) {
8299 $self->filter_through_compress();
8300 } elsif(not $opt::ungroup) {
8301 $self->grouped();
8303 if($opt::linebuffer) {
8304 # Make it possible to read non-blocking from
8305 # the buffer files
8306 # Used for --linebuffer with -k, --files, --res, --compress*
8307 for my $fdno (1,2) {
8308 ::set_fh_non_blocking($self->fh($fdno,'r'));
8313 sub print_verbose_dryrun($) {
8314 # If -v set: print command to stdout (possibly buffered)
8315 # This must be done before starting the command
8316 my $self = shift;
8317 if($Global::verbose or $opt::dryrun) {
8318 my $fh = $self->fh(1,"w");
8319 if($Global::verbose <= 1) {
8320 print $fh $self->replaced(),"\n";
8321 } else {
8322 # Verbose level > 1: Print the rsync and stuff
8323 print $fh $self->wrapped(),"\n";
8326 if($opt::sqlworker) {
8327 $Global::sql->update("SET Command = ? WHERE Seq = ".$self->seq(),
8328 $self->replaced());
8332 sub add_rm($) {
8333 # Files to remove when job is done
8334 my $self = shift;
8335 push @{$self->{'unlink'}}, @_;
8338 sub get_rm($) {
8339 # Files to remove when job is done
8340 my $self = shift;
8341 return @{$self->{'unlink'}};
8344 sub cleanup($) {
8345 # Remove files when job is done
8346 my $self = shift;
8347 unlink $self->get_rm();
8348 delete @Global::unlink{$self->get_rm()};
8351 sub grouped($) {
8352 my $self = shift;
8353 # Set reading FD if using --group (--ungroup does not need)
8354 for my $fdno (1,2) {
8355 # Re-open the file for reading
8356 # so fdw can be closed seperately
8357 # and fdr can be seeked seperately (for --line-buffer)
8358 open(my $fdr,"<", $self->fh($fdno,'name')) ||
8359 ::die_bug("fdr: Cannot open ".$self->fh($fdno,'name'));
8360 $self->set_fh($fdno,'r',$fdr);
8361 # Unlink if not debugging
8362 $Global::debug or ::rm($self->fh($fdno,"unlink"));
8366 sub empty_input_wrapper($) {
8367 # If no input: exit(0)
8368 # If some input: Pass input as input to command on STDIN
8369 # This avoids starting the command if there is no input.
8370 # Input:
8371 # $command = command to pipe data to
8372 # Returns:
8373 # $wrapped_command = the wrapped command
8374 my $command = shift;
8375 my $script =
8376 ::spacefree(0,q{
8377 if(sysread(STDIN, $buf, 1)) {
8378 open($fh, "|-", @ARGV) || die;
8379 syswrite($fh, $buf);
8380 # Align up to 128k block
8381 if($read = sysread(STDIN, $buf, 131071)) {
8382 syswrite($fh, $buf);
8384 while($read = sysread(STDIN, $buf, 131072)) {
8385 syswrite($fh, $buf);
8387 close $fh;
8388 exit ($?&127 ? 128+($?&127) : 1+$?>>8)
8391 ::debug("run",'Empty wrap: perl -e '.::Q($script)."\n");
8392 if($Global::cshell
8394 length $command > 499) {
8395 # csh does not like words longer than 1000 (499 quoted)
8396 # $command = "perl -e '".base64_zip_eval()."' ".
8397 # join" ",string_zip_base64(
8398 # 'exec "'.::perl_quote_scalar($command).'"');
8399 return 'perl -e '.::Q($script)." ".
8400 base64_wrap("exec \"$Global::shell\",'-c',\"".
8401 ::perl_quote_scalar($command).'"');
8402 } else {
8403 return 'perl -e '.::Q($script)." ".
8404 $Global::shell." -c ".::Q($command);
8408 sub filter_through_compress($) {
8409 my $self = shift;
8410 # Send stdout to stdin for $opt::compress_program(1)
8411 # Send stderr to stdin for $opt::compress_program(2)
8412 # cattail get pid: $pid = $self->fh($fdno,'rpid');
8413 my $cattail = cattail();
8415 for my $fdno (1,2) {
8416 # Make a communication file.
8417 my ($fh, $comfile) = ::tmpfile(SUFFIX => ".pac");
8418 close $fh;
8419 # Compressor: (echo > $comfile; compress pipe) > output
8420 # When the echo is written to $comfile,
8421 # it is known that output file is opened,
8422 # thus output file can then be removed by the decompressor.
8423 my $wpid = open(my $fdw,"|-", "(echo > $comfile; ".
8424 empty_input_wrapper($opt::compress_program).") >".
8425 $self->fh($fdno,'name')) || die $?;
8426 $self->set_fh($fdno,'w',$fdw);
8427 $self->set_fh($fdno,'wpid',$wpid);
8428 # Decompressor: open output; -s $comfile > 0: rm $comfile output;
8429 # decompress output > stdout
8430 my $rpid = open(my $fdr, "-|", "perl", "-e", $cattail, $comfile,
8431 $opt::decompress_program, $wpid,
8432 $self->fh($fdno,'name'),$self->fh($fdno,'unlink'))
8433 || die $?;
8434 $self->set_fh($fdno,'r',$fdr);
8435 $self->set_fh($fdno,'rpid',$rpid);
8441 sub set_fh($$$$) {
8442 # Set file handle
8443 my ($self, $fd_no, $key, $fh) = @_;
8444 $self->{'fd'}{$fd_no,$key} = $fh;
8447 sub fh($) {
8448 # Get file handle
8449 my ($self, $fd_no, $key) = @_;
8450 return $self->{'fd'}{$fd_no,$key};
8453 sub write($) {
8454 my $self = shift;
8455 my $remaining_ref = shift;
8456 my $stdin_fh = $self->fh(0,"w");
8458 my $len = length $$remaining_ref;
8459 # syswrite may not write all in one go,
8460 # so make sure everything is written.
8461 my $written;
8463 # If writing is to a closed pipe:
8464 # Do not call signal handler, but let nothing be written
8465 local $SIG{PIPE} = undef;
8466 while($written = syswrite($stdin_fh,$$remaining_ref)){
8467 substr($$remaining_ref,0,$written) = "";
8471 sub set_block($$$$$$) {
8472 # Copy stdin buffer from $block_ref up to $endpos
8473 # Prepend with $header_ref if virgin (i.e. not --roundrobin)
8474 # Remove $recstart and $recend if needed
8475 # Input:
8476 # $header_ref = ref to $header to prepend
8477 # $buffer_ref = ref to $buffer containing the block
8478 # $endpos = length of $block to pass on
8479 # $recstart = --recstart regexp
8480 # $recend = --recend regexp
8481 # Returns:
8482 # N/A
8483 my $self = shift;
8484 my ($header_ref,$buffer_ref,$endpos,$recstart,$recend) = @_;
8485 $self->{'block'} = ($self->virgin() ? $$header_ref : "").
8486 substr($$buffer_ref,0,$endpos);
8487 if($opt::remove_rec_sep) {
8488 remove_rec_sep(\$self->{'block'},$recstart,$recend);
8490 $self->{'block_length'} = length $self->{'block'};
8491 $self->{'block_pos'} = 0;
8492 $self->add_transfersize($self->{'block_length'});
8495 sub block_ref($) {
8496 my $self = shift;
8497 return \$self->{'block'};
8501 sub block_length($) {
8502 my $self = shift;
8503 return $self->{'block_length'};
8506 sub remove_rec_sep($) {
8507 # Remove --recstart and --recend from $block
8508 # Input:
8509 # $block_ref = reference to $block to be modified
8510 # $recstart = --recstart
8511 # $recend = --recend
8512 # Uses:
8513 # $opt::regexp = Are --recstart/--recend regexp?
8514 # Returns:
8515 # N/A
8516 my ($block_ref,$recstart,$recend) = @_;
8517 # Remove record separator
8518 if($opt::regexp) {
8519 $$block_ref =~ s/$recend$recstart//gos;
8520 $$block_ref =~ s/^$recstart//os;
8521 $$block_ref =~ s/$recend$//os;
8522 } else {
8523 $$block_ref =~ s/\Q$recend$recstart\E//gos;
8524 $$block_ref =~ s/^\Q$recstart\E//os;
8525 $$block_ref =~ s/\Q$recend\E$//os;
8529 sub non_blocking_write($) {
8530 my $self = shift;
8531 my $something_written = 0;
8533 my $in = $self->fh(0,"w");
8534 my $rv = syswrite($in,
8535 substr($self->{'block'},$self->{'block_pos'}));
8536 if (!defined($rv) && $! == ::EAGAIN()) {
8537 # would block - but would have written
8538 $something_written = 0;
8539 # avoid triggering auto expanding block size
8540 $Global::no_autoexpand_block ||= 1;
8541 } elsif ($self->{'block_pos'}+$rv != $self->{'block_length'}) {
8542 # incomplete write
8543 # Remove the written part
8544 $self->{'block_pos'} += $rv;
8545 $something_written = $rv;
8546 } else {
8547 # successfully wrote everything
8548 # Empty block to free memory
8549 my $a = "";
8550 $self->set_block(\$a,\$a,0,"","");
8551 $something_written = $rv;
8553 ::debug("pipe", "Non-block: ", $something_written);
8554 return $something_written;
8558 sub virgin($) {
8559 my $self = shift;
8560 return $self->{'virgin'};
8563 sub set_virgin($$) {
8564 my $self = shift;
8565 $self->{'virgin'} = shift;
8568 sub pid($) {
8569 my $self = shift;
8570 return $self->{'pid'};
8573 sub set_pid($$) {
8574 my $self = shift;
8575 $self->{'pid'} = shift;
8578 sub starttime($) {
8579 # Returns:
8580 # UNIX-timestamp this job started
8581 my $self = shift;
8582 return sprintf("%.3f",$self->{'starttime'});
8585 sub set_starttime($@) {
8586 my $self = shift;
8587 my $starttime = shift || ::now();
8588 $self->{'starttime'} = $starttime;
8589 $opt::sqlworker and
8590 $Global::sql->update("SET Starttime = ? WHERE Seq = ".$self->seq(),
8591 $starttime);
8594 sub runtime($) {
8595 # Returns:
8596 # Run time in seconds with 3 decimals
8597 my $self = shift;
8598 return sprintf("%.3f",
8599 int(($self->endtime() - $self->starttime())*1000)/1000);
8602 sub endtime($) {
8603 # Returns:
8604 # UNIX-timestamp this job ended
8605 # 0 if not ended yet
8606 my $self = shift;
8607 return ($self->{'endtime'} || 0);
8610 sub set_endtime($$) {
8611 my $self = shift;
8612 my $endtime = shift;
8613 $self->{'endtime'} = $endtime;
8614 $opt::sqlworker and
8615 $Global::sql->update("SET JobRuntime = ? WHERE Seq = ".$self->seq(),
8616 $self->runtime());
8619 sub is_timedout($) {
8620 # Is the job timedout?
8621 # Input:
8622 # $delta_time = time that the job may run
8623 # Returns:
8624 # True or false
8625 my $self = shift;
8626 my $delta_time = shift;
8627 return time > $self->{'starttime'} + $delta_time;
8630 sub kill($) {
8631 my $self = shift;
8632 $self->set_exitstatus(-1);
8633 ::kill_sleep_seq($self->pid());
8634 # push job onto start stack
8635 if($opt::memsuspend) {
8636 $self->{'suspended'} = 1;
8637 $Global::JobQueue->{'commandlinequeue'}->unget($self);
8641 sub set_suspended($$) {
8642 my $self = shift;
8643 $self->{'suspended'} = shift;
8646 sub suspended($) {
8647 my $self = shift;
8648 return $self->{'suspended'};
8651 sub failed($) {
8652 # return number of times failed for this $sshlogin
8653 # Input:
8654 # $sshlogin
8655 # Returns:
8656 # Number of times failed for $sshlogin
8657 my $self = shift;
8658 my $sshlogin = shift;
8659 return $self->{'failed'}{$sshlogin};
8662 sub failed_here($) {
8663 # return number of times failed for the current $sshlogin
8664 # Returns:
8665 # Number of times failed for this sshlogin
8666 my $self = shift;
8667 return $self->{'failed'}{$self->sshlogin()};
8670 sub add_failed($) {
8671 # increase the number of times failed for this $sshlogin
8672 my $self = shift;
8673 my $sshlogin = shift;
8674 $self->{'failed'}{$sshlogin}++;
8677 sub add_failed_here($) {
8678 # increase the number of times failed for the current $sshlogin
8679 my $self = shift;
8680 $self->{'failed'}{$self->sshlogin()}++;
8683 sub reset_failed($) {
8684 # increase the number of times failed for this $sshlogin
8685 my $self = shift;
8686 my $sshlogin = shift;
8687 delete $self->{'failed'}{$sshlogin};
8690 sub reset_failed_here($) {
8691 # increase the number of times failed for this $sshlogin
8692 my $self = shift;
8693 delete $self->{'failed'}{$self->sshlogin()};
8696 sub min_failed($) {
8697 # Returns:
8698 # the number of sshlogins this command has failed on
8699 # the minimal number of times this command has failed
8700 my $self = shift;
8701 my $min_failures =
8702 ::min(map { $self->{'failed'}{$_} } keys %{$self->{'failed'}});
8703 my $number_of_sshlogins_failed_on = scalar keys %{$self->{'failed'}};
8704 return ($number_of_sshlogins_failed_on,$min_failures);
8707 sub total_failed($) {
8708 # Returns:
8709 # $total_failures = the number of times this command has failed
8710 my $self = shift;
8711 my $total_failures = 0;
8712 for (values %{$self->{'failed'}}) {
8713 $total_failures += $_;
8715 return $total_failures;
8719 my $script;
8721 sub postpone_exit_and_cleanup {
8722 # Command to remove files and dirs (given as args) without
8723 # affecting the exit value in $?/$status.
8724 if(not $script) {
8725 $script = "perl -e '".
8726 ::spacefree(0,q{
8727 $bash=shift;
8728 $csh=shift;
8729 for(@ARGV){
8730 unlink;
8731 rmdir;
8733 if($bash=~s/(\d+)h/$1/) {
8734 exit $bash;
8736 exit $csh;
8738 # `echo \$?h` is needed to make fish not complain
8739 "' ".'"`echo \\\\\\\\\$?h`" "$status" ';
8741 return $script
8746 my $script;
8748 sub fifo_wrap() {
8749 # Script to create a fifo, run a command on the fifo
8750 # while copying STDIN to the fifo, and finally
8751 # remove the fifo and return the exit code of the command.
8752 if(not $script) {
8753 # {} == $PARALLEL_TMP for --fifo
8754 # To make it csh compatible a wrapper needs to:
8755 # * mkfifo
8756 # * spawn $command &
8757 # * cat > fifo
8758 # * waitpid to get the exit code from $command
8759 # * be less than 1000 chars long
8760 $script = "perl -e '".
8761 (::spacefree
8762 (0, q{
8763 ($s,$c,$f) = @ARGV;
8764 # mkfifo $PARALLEL_TMP
8765 system "mkfifo", $f;
8766 # spawn $shell -c $command &
8767 $pid = fork || exec $s, "-c", $c;
8768 open($o,">",$f) || die $!;
8769 # cat > $PARALLEL_TMP
8770 while(sysread(STDIN,$buf,131072)){
8771 syswrite $o, $buf;
8773 close $o;
8774 # waitpid to get the exit code from $command
8775 waitpid $pid,0;
8776 # Cleanup
8777 unlink $f;
8778 exit $?/256;
8779 }))."'";
8781 return $script;
8785 sub wrapped($) {
8786 # Wrap command with:
8787 # * --shellquote
8788 # * --nice
8789 # * --cat
8790 # * --fifo
8791 # * --sshlogin
8792 # * --pipepart (@Global::cat_prepends)
8793 # * --tee (@Global::cat_prepends)
8794 # * --pipe
8795 # * --tmux
8796 # The ordering of the wrapping is important:
8797 # * --nice/--cat/--fifo should be done on the remote machine
8798 # * --pipepart/--pipe should be done on the local machine inside --tmux
8799 # Uses:
8800 # @opt::shellquote
8801 # $opt::nice
8802 # $Global::shell
8803 # $opt::cat
8804 # $opt::fifo
8805 # @Global::cat_prepends
8806 # $opt::pipe
8807 # $opt::tmux
8808 # Returns:
8809 # $self->{'wrapped'} = the command wrapped with the above
8810 my $self = shift;
8811 if(not defined $self->{'wrapped'}) {
8812 my $command = $self->replaced();
8813 # Bug in Bash and Ksh when running multiline aliases
8814 # This will force them to run correctly, but will fail in
8815 # tcsh so we do not do it.
8816 # $command .= "\n\n";
8817 if(@opt::shellquote) {
8818 # Quote one time for each --shellquote
8819 my $c = $command;
8820 for(@opt::shellquote) {
8821 $c = ::Q($c);
8823 # Prepend "echo" (it is written in perl because
8824 # quoting '-e' causes problem in some versions and
8825 # csh's version does something wrong)
8826 $command = q(perl -e '$,=" "; print "@ARGV\n";' -- ) . ::Q($c);
8828 if($Global::parallel_env) {
8829 # If $PARALLEL_ENV set, put that in front of the command
8830 # Used for env_parallel.*
8831 if($Global::shell =~ /zsh/) {
8832 # The extra 'eval' will make aliases work, too
8833 $command = $Global::parallel_env."\n".
8834 "eval ".::Q($command);
8835 } else {
8836 $command = $Global::parallel_env."\n".$command;
8839 if($opt::cat) {
8840 # In '--cat' and '--fifo' {} == $PARALLEL_TMP.
8841 # This is to make it possible to compute $PARALLEL_TMP on
8842 # the fly when running remotely.
8843 # $ENV{PARALLEL_TMP} is set in the remote wrapper before
8844 # the command is run.
8846 # Prepend 'cat > $PARALLEL_TMP;'
8847 # Append 'unlink $PARALLEL_TMP without affecting $?'
8848 $command =
8849 'cat > $PARALLEL_TMP;'.
8850 $command.";". postpone_exit_and_cleanup().
8851 '$PARALLEL_TMP';
8852 } elsif($opt::fifo) {
8853 # Prepend fifo-wrapper. In essence:
8854 # mkfifo {}
8855 # ( $command ) &
8856 # # $command must read {}, otherwise this 'cat' will block
8857 # cat > {};
8858 # wait; rm {}
8859 # without affecting $?
8860 $command = fifo_wrap(). " ".
8861 $Global::shell. " ". ::Q($command). ' $PARALLEL_TMP'. ';';
8863 # Wrap with ssh + tranferring of files
8864 $command = $self->sshlogin_wrap($command);
8865 if(@Global::cat_prepends) {
8866 # --pipepart: prepend:
8867 # < /tmp/foo perl -e 'while(@ARGV) {
8868 # sysseek(STDIN,shift,0) || die; $left = shift;
8869 # while($read = sysread(STDIN,$buf, ($left > 131072 ? 131072 : $left))){
8870 # $left -= $read; syswrite(STDOUT,$buf);
8872 # }' 0 0 0 11 |
8874 # --pipepart --tee: prepend:
8875 # < dash-a-file
8877 # --pipe --tee: wrap:
8878 # (rm fifo; ... ) < fifo
8880 # --pipe --shard X:
8881 # (rm fifo; ... ) < fifo
8882 $command = (shift @Global::cat_prepends). "($command)".
8883 (shift @Global::cat_appends);
8884 } elsif($opt::pipe and not $opt::roundrobin) {
8885 # Wrap with EOF-detector to avoid starting $command if EOF.
8886 $command = empty_input_wrapper($command);
8888 if($opt::tmux) {
8889 # Wrap command with 'tmux'
8890 $command = $self->tmux_wrap($command);
8892 if($Global::cshell
8894 length $command > 499) {
8895 # csh does not like words longer than 1000 (499 quoted)
8896 # $command = "perl -e '".base64_zip_eval()."' ".
8897 # join" ",string_zip_base64(
8898 # 'exec "'.::perl_quote_scalar($command).'"');
8899 $command = base64_wrap("exec \"$Global::shell\",'-c',\"".
8900 ::perl_quote_scalar($command).'"');
8902 $self->{'wrapped'} = $command;
8904 return $self->{'wrapped'};
8907 sub set_sshlogin($$) {
8908 my $self = shift;
8909 my $sshlogin = shift;
8910 $self->{'sshlogin'} = $sshlogin;
8911 delete $self->{'sshlogin_wrap'}; # If sshlogin is changed the wrap is wrong
8912 delete $self->{'wrapped'};
8914 if($opt::sqlworker) {
8915 # Identify worker as --sqlworker often runs on different machines
8916 my $host = $sshlogin->string();
8917 if($host eq ":") {
8918 $host = ::hostname();
8920 $Global::sql->update("SET Host = ? WHERE Seq = ".$self->seq(), $host);
8924 sub sshlogin($) {
8925 my $self = shift;
8926 return $self->{'sshlogin'};
8929 sub string_base64($) {
8930 # Base64 encode strings into 1000 byte blocks.
8931 # 1000 bytes is the largest word size csh supports
8932 # Input:
8933 # @strings = to be encoded
8934 # Returns:
8935 # @base64 = 1000 byte block
8936 $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;";
8937 my @base64 = unpack("(A1000)*",encode_base64((join"",@_),""));
8938 return @base64;
8941 sub string_zip_base64($) {
8942 # Pipe string through 'bzip2 -9' and base64 encode it into 1000
8943 # byte blocks.
8944 # 1000 bytes is the largest word size csh supports
8945 # Zipping will make exporting big environments work, too
8946 # Input:
8947 # @strings = to be encoded
8948 # Returns:
8949 # @base64 = 1000 byte block
8950 my($zipin_fh, $zipout_fh,@base64);
8951 ::open3($zipin_fh,$zipout_fh,">&STDERR","bzip2 -9");
8952 if(fork) {
8953 close $zipin_fh;
8954 $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;";
8955 # Split base64 encoded into 1000 byte blocks
8956 @base64 = unpack("(A1000)*",encode_base64((join"",<$zipout_fh>),""));
8957 close $zipout_fh;
8958 } else {
8959 close $zipout_fh;
8960 print $zipin_fh @_;
8961 close $zipin_fh;
8962 exit;
8964 ::debug("base64","Orig:@_\nAs bzip2 base64:@base64\n");
8965 return @base64;
8968 sub base64_zip_eval() {
8969 # Script that:
8970 # * reads base64 strings from @ARGV
8971 # * decodes them
8972 # * pipes through 'bzip2 -dc'
8973 # * evals the result
8974 # Reverse of string_zip_base64 + eval
8975 # Will be wrapped in ' so single quote is forbidden
8976 # Returns:
8977 # $script = 1-liner for perl -e
8978 my $script = ::spacefree(0,q{
8979 @GNU_Parallel = split /_/, "use_IPC::Open3;_use_MIME::Base64";
8980 eval"@GNU_Parallel";
8981 $chld = $SIG{CHLD};
8982 $SIG{CHLD} = "IGNORE";
8983 # Search for bzip2. Not found => use default path
8984 my $zip = (grep { -x $_ } "/usr/local/bin/bzip2")[0] || "bzip2";
8985 # $in = stdin on $zip, $out = stdout from $zip
8986 # Forget my() to save chars for csh
8987 # my($in, $out,$eval);
8988 open3($in,$out,">&STDERR",$zip,"-dc");
8989 if(my $perlpid = fork) {
8990 close $in;
8991 $eval = join "", <$out>;
8992 close $out;
8993 } else {
8994 close $out;
8995 # Pipe decoded base64 into 'bzip2 -dc'
8996 print $in (decode_base64(join"",@ARGV));
8997 close $in;
8998 exit;
9000 wait;
9001 $SIG{CHLD} = $chld;
9002 eval $eval;
9004 ::debug("base64",$script,"\n");
9005 return $script;
9008 sub base64_wrap($) {
9009 # base64 encode Perl code
9010 # Split it into chunks of < 1000 bytes
9011 # Prepend it with a decoder that eval's it
9012 # Input:
9013 # $eval_string = Perl code to run
9014 # Returns:
9015 # $shell_command = shell command that runs $eval_string
9016 my $eval_string = shift;
9017 return
9018 "perl -e ".
9019 ::Q(base64_zip_eval())." ".
9020 join" ",::shell_quote(string_zip_base64($eval_string));
9023 sub base64_eval($) {
9024 # Script that:
9025 # * reads base64 strings from @ARGV
9026 # * decodes them
9027 # * evals the result
9028 # Reverse of string_base64 + eval
9029 # Will be wrapped in ' so single quote is forbidden.
9030 # Spaces are stripped so spaces cannot be significant.
9031 # The funny 'use IPC::Open3'-syntax is to avoid spaces and
9032 # to make it clear that this is a GNU Parallel command
9033 # when looking at the process table.
9034 # Returns:
9035 # $script = 1-liner for perl -e
9036 my $script = ::spacefree(0,q{
9037 @GNU_Parallel=("use","IPC::Open3;","use","MIME::Base64");
9038 eval "@GNU_Parallel";
9039 my $eval = decode_base64(join"",@ARGV);
9040 eval $eval;
9042 ::debug("base64",$script,"\n");
9043 return $script;
9046 sub sshlogin_wrap($) {
9047 # Wrap the command with the commands needed to run remotely
9048 # Input:
9049 # $command = command to run
9050 # Returns:
9051 # $self->{'sshlogin_wrap'} = command wrapped with ssh+transfer commands
9052 sub monitor_parent_sshd_script {
9053 # This script is to solve the problem of
9054 # * not mixing STDERR and STDOUT
9055 # * terminating with ctrl-c
9056 # If its parent is ssh: all good
9057 # If its parent is init(1): ssh died, so kill children
9058 my $monitor_parent_sshd_script;
9060 if(not $monitor_parent_sshd_script) {
9061 $monitor_parent_sshd_script =
9062 # This will be packed in ', so only use "
9063 ::spacefree(0,'$shell = "'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'.
9064 '$tmpdir = "'.::perl_quote_scalar($ENV{'TMPDIR'}).'";'.
9065 '$nice = '.$opt::nice.';'.
9066 '$termseq = "'.$opt::termseq.'";'.
9068 # Set $PARALLEL_TMP to a non-existent file name in $TMPDIR
9069 do {
9070 $ENV{PARALLEL_TMP} = $tmpdir."/par".
9071 join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
9072 } while(-e $ENV{PARALLEL_TMP});
9073 $SIG{CHLD} = sub { $done = 1; };
9074 $pid = fork;
9075 unless($pid) {
9076 # Make own process group to be able to kill HUP it later
9077 eval { setpgrp };
9078 eval { setpriority(0,0,$nice) };
9079 exec $shell, "-c", ($bashfunc."@ARGV");
9080 die "exec: $!\n";
9082 do {
9083 # Parent is not init (ppid=1), so sshd is alive
9084 # Exponential sleep up to 1 sec
9085 $s = $s < 1 ? 0.001 + $s * 1.03 : $s;
9086 select(undef, undef, undef, $s);
9087 } until ($done || getppid == 1);
9088 if(not $done) {
9089 # Kill as per --termseq
9090 my @term_seq = split/,/,$termseq;
9091 if(not @term_seq) {
9092 @term_seq = ("TERM",200,"TERM",100,"TERM",50,"KILL",25);
9094 while(@term_seq && kill(0,-$pid)) {
9095 kill(shift @term_seq, -$pid);
9096 select(undef, undef, undef, (shift @term_seq)/1000);
9099 wait;
9100 exit ($?&127 ? 128+($?&127) : 1+$?>>8)
9103 return $monitor_parent_sshd_script;
9106 sub vars_to_export {
9107 # Uses:
9108 # @opt::env
9109 my @vars = ("parallel_bash_environment");
9110 for my $varstring (@opt::env) {
9111 # Split up --env VAR1,VAR2
9112 push @vars, split /,/, $varstring;
9114 for (@vars) {
9115 if(-r $_ and not -d) {
9116 # Read as environment definition bug #44041
9117 # TODO parse this
9118 my $fh = ::open_or_exit($_);
9119 $Global::envdef = join("",<$fh>);
9120 close $fh;
9123 if(grep { /^_$/ } @vars) {
9124 local $/ = "\n";
9125 # --env _
9126 # Include all vars that are not in a clean environment
9127 if(open(my $vars_fh, "<", $Global::config_dir . "/ignored_vars")) {
9128 my @ignore = <$vars_fh>;
9129 chomp @ignore;
9130 my %ignore;
9131 @ignore{@ignore} = @ignore;
9132 close $vars_fh;
9133 push @vars, grep { not defined $ignore{$_} } keys %ENV;
9134 @vars = grep { not /^_$/ } @vars;
9135 } else {
9136 ::error("Run '$Global::progname --record-env' ".
9137 "in a clean environment first.");
9138 ::wait_and_exit(255);
9141 # Duplicate vars as BASH functions to include post-shellshock functions (v1+v2)
9142 # So --env myfunc should look for BASH_FUNC_myfunc() and BASH_FUNC_myfunc%%
9144 push(@vars, "PARALLEL_PID", "PARALLEL_SEQ",
9145 "PARALLEL_SSHLOGIN", "PARALLEL_SSHHOST", "PARALLEL_JOBSLOT",
9146 map { ("BASH_FUNC_$_()", "BASH_FUNC_$_%%") } @vars);
9147 # Keep only defined variables
9148 return grep { defined($ENV{$_}) } @vars;
9151 sub env_as_eval {
9152 # Returns:
9153 # $eval = '$ENV{"..."}=...; ...'
9154 my @vars = vars_to_export();
9155 my $csh_friendly = not grep { /\n/ } @ENV{@vars};
9156 my @bash_functions = grep { substr($ENV{$_},0,4) eq "() {" } @vars;
9157 my @non_functions = (grep { !/PARALLEL_ENV/ }
9158 grep { substr($ENV{$_},0,4) ne "() {" } @vars);
9160 # eval of @envset will set %ENV
9161 my $envset = join"", map {
9162 '$ENV{"'.::perl_quote_scalar($_).'"}="'.
9163 ::perl_quote_scalar($ENV{$_}).'";'; } @non_functions;
9165 # running @bashfunc on the command line, will set the functions
9166 my @bashfunc = map {
9167 my $v=$_;
9168 s/BASH_FUNC_(.*)(\(\)|%%)/$1/;
9169 "$_$ENV{$v};export -f $_ >/dev/null;" } @bash_functions;
9170 # eval $bashfuncset will set $bashfunc
9171 my $bashfuncset;
9172 if(@bashfunc) {
9173 # Functions are not supported for all shells
9174 if($Global::shell !~ m:(bash|rbash|zsh|rzsh|dash|ksh):) {
9175 ::warning("Shell functions may not be supported in $Global::shell.");
9177 $bashfuncset =
9178 '@bash_functions=qw('."@bash_functions".");".
9179 ::spacefree(1,'$shell="'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'.q{
9180 if($shell=~/csh/) {
9181 print STDERR "CSH/TCSH DO NOT SUPPORT newlines IN VARIABLES/FUNCTIONS. Unset @bash_functions\n";
9182 exec "false";
9185 "\n".'$bashfunc = "'.::perl_quote_scalar("@bashfunc").'";';
9186 } else {
9187 $bashfuncset = '$bashfunc = "";'
9189 if($ENV{'parallel_bash_environment'}) {
9190 $bashfuncset .= '$bashfunc .= "eval\ \"\$parallel_bash_environment\"\;";';
9192 ::debug("base64",$envset,$bashfuncset,"\n");
9193 return $csh_friendly,$envset,$bashfuncset;
9196 my $self = shift;
9197 my $command = shift;
9198 # TODO test that *sh -c 'parallel --env' use *sh
9199 if(not defined $self->{'sshlogin_wrap'}{$command}) {
9200 my $sshlogin = $self->sshlogin();
9201 my $serverlogin = $sshlogin->serverlogin();
9202 my $quoted_remote_command;
9203 $ENV{'PARALLEL_SEQ'} = $self->seq();
9204 $ENV{'PARALLEL_JOBSLOT'} = $self->slot();
9205 $ENV{'PARALLEL_SSHLOGIN'} = $sshlogin->string();
9206 $ENV{'PARALLEL_SSHHOST'} = $sshlogin->serverlogin();
9207 $ENV{'PARALLEL_PID'} = $$;
9208 if($serverlogin eq ":") {
9209 if($opt::workdir) {
9210 # Create workdir if needed. Then cd to it.
9211 my $wd = $self->workdir();
9212 if($opt::workdir eq "." or $opt::workdir eq "...") {
9213 # If $wd does not start with '/': Prepend $HOME
9214 $wd =~ s:^([^/]):$ENV{'HOME'}/$1:;
9216 ::mkdir_or_die($wd);
9217 my $post = "";
9218 if($opt::workdir eq "...") {
9219 $post = ";".exitstatuswrapper("rm -rf ".::Q($wd).";");
9222 $command = "cd ".::Q($wd)." || exit 255; " .
9223 $command . $post;;
9225 if(@opt::env) {
9226 # Prepend with environment setter, which sets functions in zsh
9227 my ($csh_friendly,$envset,$bashfuncset) = env_as_eval();
9228 my $perl_code = $envset.$bashfuncset.
9229 '@ARGV="'.::perl_quote_scalar($command).'";'.
9230 "exec\"$Global::shell\",\"-c\",\(\$bashfunc.\"\@ARGV\"\)\;die\"exec:\$\!\\n\"\;";
9231 if(length $perl_code > 999
9233 not $csh_friendly
9235 $command =~ /\n/) {
9236 # csh does not deal well with > 1000 chars in one word
9237 # csh does not deal well with $ENV with \n
9238 $self->{'sshlogin_wrap'}{$command} = base64_wrap($perl_code);
9239 } else {
9240 $self->{'sshlogin_wrap'}{$command} = "perl -e ".::Q($perl_code);
9242 } else {
9243 $self->{'sshlogin_wrap'}{$command} = $command;
9245 } else {
9246 my $pwd = "";
9247 if($opt::workdir) {
9248 # Create remote workdir if needed. Then cd to it.
9249 my $wd = ::pQ($self->workdir());
9250 $pwd = qq{system("mkdir","-p","--","$wd"); chdir "$wd" ||}.
9251 qq{print(STDERR "parallel: Cannot chdir to $wd\\n") && exit 255;};
9253 my ($csh_friendly,$envset,$bashfuncset) = env_as_eval();
9254 my $remote_command = $pwd.$envset.$bashfuncset.
9255 '@ARGV="'.::perl_quote_scalar($command).'";'.
9256 monitor_parent_sshd_script();
9257 $quoted_remote_command = "perl -e ". ::Q($remote_command);
9258 my $dq_remote_command = ::Q($quoted_remote_command);
9259 if(length $dq_remote_command > 999
9261 not $csh_friendly
9263 $command =~ /\n/) {
9264 # csh does not deal well with > 1000 chars in one word
9265 # csh does not deal well with $ENV with \n
9266 $quoted_remote_command =
9267 "perl -e ". ::Q(::Q(base64_zip_eval()))." ".
9268 join" ",::shell_quote(::shell_quote(string_zip_base64($remote_command)));
9269 } else {
9270 $quoted_remote_command = $dq_remote_command;
9273 my $sshcmd = $sshlogin->sshcommand();
9274 my ($pre,$post,$cleanup)=("","","");
9275 # --transfer
9276 $pre .= $self->sshtransfer();
9277 # --return
9278 $post .= $self->sshreturn();
9279 # --cleanup
9280 $post .= $self->sshcleanup();
9281 if($post) {
9282 # We need to save the exit status of the job
9283 $post = exitstatuswrapper($post);
9285 $self->{'sshlogin_wrap'}{$command} =
9286 ($pre
9287 . "$sshcmd $serverlogin -- exec "
9288 . $quoted_remote_command
9289 . ";"
9290 . $post);
9293 return $self->{'sshlogin_wrap'}{$command};
9296 sub transfer($) {
9297 # Files to transfer
9298 # Non-quoted and with {...} substituted
9299 # Returns:
9300 # @transfer - File names of files to transfer
9301 my $self = shift;
9303 my $transfersize = 0;
9304 my @transfer = $self->{'commandline'}->
9305 replace_placeholders($self->{'commandline'}{'transfer_files'},0,0);
9306 for(@transfer) {
9307 # filesize
9308 if(-e $_) {
9309 $transfersize += (stat($_))[7];
9312 $self->add_transfersize($transfersize);
9313 return @transfer;
9316 sub transfersize($) {
9317 my $self = shift;
9318 return $self->{'transfersize'};
9321 sub add_transfersize($) {
9322 my $self = shift;
9323 my $transfersize = shift;
9324 $self->{'transfersize'} += $transfersize;
9325 $opt::sqlworker and
9326 $Global::sql->update("SET Send = ? WHERE Seq = ".$self->seq(),
9327 $self->{'transfersize'});
9330 sub sshtransfer($) {
9331 # Returns for each transfer file:
9332 # rsync $file remote:$workdir
9333 my $self = shift;
9334 my @pre;
9335 my $sshlogin = $self->sshlogin();
9336 my $workdir = $self->workdir();
9337 for my $file ($self->transfer()) {
9338 push @pre, $sshlogin->rsync_transfer_cmd($file,$workdir).";";
9340 return join("",@pre);
9343 sub return($) {
9344 # Files to return
9345 # Non-quoted and with {...} substituted
9346 # Returns:
9347 # @non_quoted_filenames
9348 my $self = shift;
9349 return $self->{'commandline'}->
9350 replace_placeholders($self->{'commandline'}{'return_files'},0,0);
9353 sub returnsize($) {
9354 # This is called after the job has finished
9355 # Returns:
9356 # $number_of_bytes transferred in return
9357 my $self = shift;
9358 for my $file ($self->return()) {
9359 if(-e $file) {
9360 $self->{'returnsize'} += (stat($file))[7];
9363 return $self->{'returnsize'};
9366 sub add_returnsize($) {
9367 my $self = shift;
9368 my $returnsize = shift;
9369 $self->{'returnsize'} += $returnsize;
9370 $opt::sqlworker and
9371 $Global::sql->update("SET Receive = ? WHERE Seq = ".$self->seq(),
9372 $self->{'returnsize'});
9375 sub sshreturn($) {
9376 # Returns for each return-file:
9377 # rsync remote:$workdir/$file .
9378 my $self = shift;
9379 my $sshlogin = $self->sshlogin();
9380 my $sshcmd = $sshlogin->sshcommand();
9381 my $serverlogin = $sshlogin->serverlogin();
9382 my $rsync_opts = $ENV{'PARALLEL_RSYNC_OPTS'}. " -e". ::Q($sshcmd);
9383 my $pre = "";
9384 for my $file ($self->return()) {
9385 $file =~ s:^\./::g; # Remove ./ if any
9386 my $relpath = ($file !~ m:^/:) || ($file =~ m:/\./:); # Is the path relative or /./?
9387 my $cd = "";
9388 my $wd = "";
9389 if($relpath) {
9390 # rsync -avR /foo/./bar/baz.c remote:/tmp/
9391 # == (on old systems)
9392 # rsync -avR --rsync-path="cd /foo; rsync" remote:bar/baz.c /tmp/
9393 $wd = ::shell_quote_file($self->workdir()."/");
9395 # Only load File::Basename if actually needed
9396 $Global::use{"File::Basename"} ||= eval "use File::Basename; 1;";
9397 # dir/./file means relative to dir, so remove dir on remote
9398 $file =~ m:(.*)/\./:;
9399 my $basedir = $1 ? ::shell_quote_file($1."/") : "";
9400 my $nobasedir = $file;
9401 $nobasedir =~ s:.*/\./::;
9402 $cd = ::shell_quote_file(::dirname($nobasedir));
9403 my $rsync_cd = '--rsync-path='.::Q("cd $wd$cd; rsync");
9404 my $basename = ::Q(::shell_quote_file(::basename($file)));
9405 # --return
9406 # mkdir -p /home/tange/dir/subdir/;
9407 # rsync (--protocol 30) -rlDzR
9408 # --rsync-path="cd /home/tange/dir/subdir/; rsync"
9409 # server:file.gz /home/tange/dir/subdir/
9410 $pre .= "mkdir -p $basedir$cd && ". $sshlogin->rsync().
9411 " $rsync_cd $rsync_opts $serverlogin:".
9412 $basename . " ".$basedir.$cd.";";
9414 return $pre;
9417 sub sshcleanup($) {
9418 # Return the sshcommand needed to remove the file
9419 # Returns:
9420 # ssh command needed to remove files from sshlogin
9421 my $self = shift;
9422 my $sshlogin = $self->sshlogin();
9423 my $sshcmd = $sshlogin->sshcommand();
9424 my $serverlogin = $sshlogin->serverlogin();
9425 my $workdir = $self->workdir();
9426 my $cleancmd = "";
9428 for my $file ($self->remote_cleanup()) {
9429 my @subworkdirs = parentdirs_of($file);
9430 $cleancmd .= $sshlogin->cleanup_cmd($file,$workdir).";";
9432 if(defined $opt::workdir and $opt::workdir eq "...") {
9433 $cleancmd .= "$sshcmd $serverlogin -- rm -rf " . ::Q($workdir).';';
9435 return $cleancmd;
9438 sub remote_cleanup($) {
9439 # Returns:
9440 # Files to remove at cleanup
9441 my $self = shift;
9442 if($opt::cleanup) {
9443 my @transfer = $self->transfer();
9444 my @return = $self->return();
9445 return (@transfer,@return);
9446 } else {
9447 return ();
9451 sub exitstatuswrapper(@) {
9452 # Input:
9453 # @shellcode = shell code to execute
9454 # Returns:
9455 # shell script that returns current status after executing @shellcode
9456 if($Global::cshell) {
9457 return ('set _EXIT_status=$status; ' .
9458 join(" ",@_).
9459 'exit $_EXIT_status;');
9460 } else {
9461 return ('_EXIT_status=$?; ' .
9462 join(" ",@_).
9463 'exit $_EXIT_status;');
9467 sub workdir($) {
9468 # Returns:
9469 # the workdir on a remote machine
9470 my $self = shift;
9471 if(not defined $self->{'workdir'}) {
9472 my $workdir;
9473 if(defined $opt::workdir) {
9474 if($opt::workdir eq ".") {
9475 # . means current dir
9476 my $home = $ENV{'HOME'};
9477 eval 'use Cwd';
9478 my $cwd = cwd();
9479 $workdir = $cwd;
9480 if($home) {
9481 # If homedir exists: remove the homedir from
9482 # workdir if cwd starts with homedir
9483 # E.g. /home/foo/my/dir => my/dir
9484 # E.g. /tmp/my/dir => /tmp/my/dir
9485 my ($home_dev, $home_ino) = (stat($home))[0,1];
9486 my $parent = "";
9487 my @dir_parts = split(m:/:,$cwd);
9488 my $part;
9489 while(defined ($part = shift @dir_parts)) {
9490 $part eq "" and next;
9491 $parent .= "/".$part;
9492 my ($parent_dev, $parent_ino) = (stat($parent))[0,1];
9493 if($parent_dev == $home_dev and $parent_ino == $home_ino) {
9494 # dev and ino is the same: We found the homedir.
9495 $workdir = join("/",@dir_parts);
9496 last;
9500 if($workdir eq "") {
9501 $workdir = ".";
9503 } elsif($opt::workdir eq "...") {
9504 $workdir = ".parallel/tmp/" . ::hostname() . "-" . $$
9505 . "-" . $self->seq();
9506 } else {
9507 $workdir = $self->{'commandline'}->
9508 replace_placeholders([$opt::workdir],0,0);
9509 #$workdir = $opt::workdir;
9510 # Rsync treats /./ special. We dont want that
9511 $workdir =~ s:/\./:/:g; # Remove /./
9512 $workdir =~ s:(.)/+$:$1:; # Remove ending / if any
9513 $workdir =~ s:^\./::g; # Remove starting ./ if any
9515 } else {
9516 $workdir = ".";
9518 $self->{'workdir'} = $workdir;
9520 return $self->{'workdir'};
9523 sub parentdirs_of($) {
9524 # Return:
9525 # all parentdirs except . of this dir or file - sorted desc by length
9526 my $d = shift;
9527 my @parents = ();
9528 while($d =~ s:/[^/]+$::) {
9529 if($d ne ".") {
9530 push @parents, $d;
9533 return @parents;
9536 sub start($) {
9537 # Setup STDOUT and STDERR for a job and start it.
9538 # Returns:
9539 # job-object or undef if job not to run
9541 sub open3_setpgrp_internal {
9542 # Run open3+setpgrp followed by the command
9543 # Input:
9544 # $stdin_fh = Filehandle to use as STDIN
9545 # $stdout_fh = Filehandle to use as STDOUT
9546 # $stderr_fh = Filehandle to use as STDERR
9547 # $command = Command to run
9548 # Returns:
9549 # $pid = Process group of job started
9550 my ($stdin_fh,$stdout_fh,$stderr_fh,$command) = @_;
9551 my $pid;
9552 local (*OUT,*ERR);
9553 open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!");
9554 open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!");
9555 # The eval is needed to catch exception from open3
9556 eval {
9557 if(not $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", "-")) {
9558 # Each child gets its own process group to make it safe to killall
9559 eval{ setpgrp(0,0) };
9560 eval{ setpriority(0,0,$opt::nice) };
9561 exec($Global::shell,"-c",$command)
9562 || ::die_bug("open3-$stdin_fh $command");
9565 return $pid;
9568 sub open3_setpgrp_external {
9569 # Run open3 on $command wrapped with a perl script doing setpgrp
9570 # Works on systems that do not support open3(,,,"-")
9571 # Input:
9572 # $stdin_fh = Filehandle to use as STDIN
9573 # $stdout_fh = Filehandle to use as STDOUT
9574 # $stderr_fh = Filehandle to use as STDERR
9575 # $command = Command to run
9576 # Returns:
9577 # $pid = Process group of job started
9578 my ($stdin_fh,$stdout_fh,$stderr_fh,$command) = @_;
9579 local (*OUT,*ERR);
9580 open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!");
9581 open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!");
9583 my $pid;
9584 my @setpgrp_wrap =
9585 ('perl','-e',
9586 "eval\{setpgrp\}\;eval\{setpriority\(0,0,$opt::nice\)\}\;".
9587 "exec '$Global::shell', '-c', \@ARGV");
9588 # The eval is needed to catch exception from open3
9589 eval {
9590 $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", @setpgrp_wrap, $command)
9591 || ::die_bug("open3-$stdin_fh");
9594 return $pid;
9597 sub redefine_open3_setpgrp {
9598 my $setgprp_cache = shift;
9599 # Select and run open3_setpgrp_internal/open3_setpgrp_external
9600 no warnings 'redefine';
9601 my ($outfh,$name) = ::tmpfile(SUFFIX => ".tst");
9602 # Test to see if open3(x,x,x,"-") is fully supported
9603 # Can an exported bash function be called via open3?
9604 my $script = 'if($pid=::open3($i,$o,$e,"-")) { wait; } '.
9605 'else { exec("bash","-c","testfun && true"); }';
9606 my $bash =
9607 ::shell_quote_scalar_default(
9608 "testfun() { rm $name; }; export -f testfun; ".
9609 "perl -MIPC::Open3 -e ".
9610 ::shell_quote_scalar_default($script)
9612 my $redefine_eval;
9613 # Redirect STDERR temporarily,
9614 # so errors on MacOS X are ignored.
9615 open my $saveerr, ">&STDERR";
9616 open STDERR, '>', "/dev/null";
9617 # Run the test
9618 ::debug("init",qq{bash -c $bash 2>/dev/null});
9619 qx{ bash -c $bash 2>/dev/null };
9620 open STDERR, ">&", $saveerr;
9622 if(-e $name) {
9623 # Does not support open3(x,x,x,"-")
9624 # or does not have bash:
9625 # Use (slow) external version
9626 unlink($name);
9627 $redefine_eval = '*open3_setpgrp = \&open3_setpgrp_external';
9628 ::debug("init","open3_setpgrp_external chosen\n");
9629 } else {
9630 # Supports open3(x,x,x,"-")
9631 # This is 0.5 ms faster to run
9632 $redefine_eval = '*open3_setpgrp = \&open3_setpgrp_internal';
9633 ::debug("init","open3_setpgrp_internal chosen\n");
9635 if(open(my $fh, ">", $setgprp_cache)) {
9636 print $fh $redefine_eval;
9637 close $fh;
9638 } else {
9639 ::debug("init","Cannot write to $setgprp_cache");
9641 eval $redefine_eval;
9644 sub open3_setpgrp {
9645 my $setgprp_cache = $Global::cache_dir . "/tmp/sshlogin/" .
9646 ::hostname() . "/setpgrp_func";
9647 sub read_cache() {
9648 -e $setgprp_cache || return 0;
9649 local $/ = undef;
9650 open(my $fh, "<", $setgprp_cache) || return 0;
9651 eval <$fh> || return 0;
9652 close $fh;
9653 return 1;
9655 if(not read_cache()) {
9656 redefine_open3_setpgrp($setgprp_cache);
9658 # The sub is now redefined. Call it
9659 return open3_setpgrp(@_);
9662 my $job = shift;
9663 if($job->suspended()) {
9664 # Job is kill -STOP'ped: Restart it.
9665 kill "CONT", $job->pid();
9666 return $job;
9668 # Get the shell command to be executed (possibly with ssh infront).
9669 my $command = $job->wrapped();
9670 my $pid;
9672 if($Global::interactive or $Global::stderr_verbose) {
9673 $job->interactive_start();
9675 # Must be run after $job->interactive_start():
9676 # $job->interactive_start() may call $job->skip()
9677 if($job->{'commandline'}{'skip'}) {
9678 # $job->skip() was called
9679 $command = "true";
9681 $job->openoutputfiles();
9682 $job->print_verbose_dryrun();
9683 my($stdout_fh,$stderr_fh) = ($job->fh(1,"w"),$job->fh(2,"w"));
9684 if($opt::dryrun or $opt::sqlmaster) { $command = "true"; }
9685 $ENV{'PARALLEL_SEQ'} = $job->seq();
9686 $ENV{'PARALLEL_PID'} = $$;
9687 $ENV{'PARALLEL_JOBSLOT'} = $job->slot();
9688 $ENV{'PARALLEL_TMP'} = ::tmpname("par");
9689 $job->add_rm($ENV{'PARALLEL_TMP'});
9690 ::debug("run", $Global::total_running, " processes . Starting (",
9691 $job->seq(), "): $command\n");
9692 if($opt::pipe) {
9693 my ($stdin_fh) = ::gensym();
9694 $pid = open3_setpgrp($stdin_fh,$stdout_fh,$stderr_fh,$command);
9695 if($opt::roundrobin and not $opt::keeporder) {
9696 # --keep-order will make sure the order will be reproducible
9697 ::set_fh_non_blocking($stdin_fh);
9699 $job->set_fh(0,"w",$stdin_fh);
9700 if($opt::tee or $opt::shard or $opt::bin) { $job->set_virgin(0); }
9701 } elsif ($opt::tty and -c "/dev/tty" and
9702 open(my $devtty_fh, "<", "/dev/tty")) {
9703 # Give /dev/tty to the command if no one else is using it
9704 # The eval is needed to catch exception from open3
9705 local (*IN,*OUT,*ERR);
9706 open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!");
9707 open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!");
9708 *IN = $devtty_fh;
9709 # The eval is needed to catch exception from open3
9710 my @wrap = ('perl','-e',
9711 "eval\{setpriority\(0,0,$opt::nice\)\}\;".
9712 "exec '$Global::shell', '-c', \@ARGV");
9713 eval {
9714 $pid = ::open3("<&IN", ">&OUT", ">&ERR", @wrap, $command)
9715 || ::die_bug("open3-/dev/tty");
9718 close $devtty_fh;
9719 $job->set_virgin(0);
9720 } else {
9721 $pid = open3_setpgrp(::gensym(),$stdout_fh,$stderr_fh,$command);
9722 $job->set_virgin(0);
9724 if($pid) {
9725 # A job was started
9726 $Global::total_running++;
9727 $Global::total_started++;
9728 $job->set_pid($pid);
9729 $job->set_starttime();
9730 $Global::running{$job->pid()} = $job;
9731 if($opt::timeout) {
9732 $Global::timeoutq->insert($job);
9734 $Global::newest_job = $job;
9735 $Global::newest_starttime = ::now();
9736 return $job;
9737 } else {
9738 # No more processes
9739 ::debug("run", "Cannot spawn more jobs.\n");
9740 return undef;
9744 sub interactive_start($) {
9745 my $self = shift;
9746 my $command = $self->wrapped();
9747 if($Global::interactive) {
9748 my $answer;
9749 ::status_no_nl("$command ?...");
9751 open(my $tty_fh, "<", "/dev/tty") || ::die_bug("interactive-tty");
9752 $answer = <$tty_fh>;
9753 close $tty_fh;
9754 # Sometime we get an empty string (not even \n)
9755 # Do not know why, so let us just ignore it and try again
9756 } while(length $answer < 1);
9757 if (not ($answer =~ /^\s*y/i)) {
9758 $self->{'commandline'}->skip();
9760 } else {
9761 print $Global::original_stderr "$command\n";
9766 my $tmuxsocket;
9768 sub tmux_wrap($) {
9769 # Wrap command with tmux for session pPID
9770 # Input:
9771 # $actual_command = the actual command being run (incl ssh wrap)
9772 my $self = shift;
9773 my $actual_command = shift;
9774 # Temporary file name. Used for fifo to communicate exit val
9775 my $tmpfifo = ::tmpname("tmx");
9776 $self->add_rm($tmpfifo);
9778 if(length($tmpfifo) >=100) {
9779 ::error("tmux does not support sockets with path > 100.");
9780 ::wait_and_exit(255);
9782 if($opt::tmuxpane) {
9783 # Move the command into a pane in window 0
9784 $actual_command = $ENV{'PARALLEL_TMUX'}.' joinp -t :0 ; '.
9785 $ENV{'PARALLEL_TMUX'}.' select-layout -t :0 tiled ; '.
9786 $actual_command;
9788 my $visual_command = $self->replaced();
9789 my $title = $visual_command;
9790 if($visual_command =~ /\0/) {
9791 ::error("Command line contains NUL. tmux is confused by NUL.");
9792 ::wait_and_exit(255);
9794 # ; causes problems
9795 # ascii 194-245 annoys tmux
9796 $title =~ tr/[\011-\016;\302-\365]/ /s;
9797 $title = ::Q($title);
9799 my $l_act = length($actual_command);
9800 my $l_tit = length($title);
9801 my $l_fifo = length($tmpfifo);
9802 # The line to run contains a 118 chars extra code + the title 2x
9803 my $l_tot = 2 * $l_tit + $l_act + $l_fifo;
9805 my $quoted_space75 = ::Q(" ")x75;
9806 while($l_tit < 1000 and
9808 (890 < $l_tot and $l_tot < 1350)
9810 (9250 < $l_tot and $l_tot < 9800)
9811 )) {
9812 # tmux blocks for certain lengths:
9813 # 900 < title + command < 1200
9814 # 9250 < title + command < 9800
9815 # but only if title < 1000, so expand the title with 75 spaces
9816 # The measured lengths are:
9817 # 996 < (title + whole command) < 1127
9818 # 9331 < (title + whole command) < 9636
9819 $title .= $quoted_space75;
9820 $l_tit = length($title);
9821 $l_tot = 2 * $l_tit + $l_act + $l_fifo;
9824 my $tmux;
9825 $ENV{'PARALLEL_TMUX'} ||= "tmux";
9826 if(not $tmuxsocket) {
9827 $tmuxsocket = ::tmpname("tms");
9828 if($opt::fg) {
9829 if(not fork) {
9830 # Run tmux in the foreground
9831 # Wait for the socket to appear
9832 while (not -e $tmuxsocket) { }
9833 `$ENV{'PARALLEL_TMUX'} -S $tmuxsocket attach`;
9834 exit;
9837 ::status("See output with: $ENV{'PARALLEL_TMUX'} -S $tmuxsocket attach");
9839 $tmux = "sh -c '".
9840 $ENV{'PARALLEL_TMUX'}." -S $tmuxsocket new-session -s p$$ -d \"sleep .2\" >/dev/null 2>&1';" .
9841 $ENV{'PARALLEL_TMUX'}." -S $tmuxsocket new-window -t p$$ -n $title";
9843 ::debug("tmux", "title len:", $l_tit, " act ", $l_act, " max ",
9844 $Limits::Command::line_max_len, " tot ",
9845 $l_tot, "\n");
9847 return "mkfifo $tmpfifo && $tmux ".
9848 # Run in tmux
9851 "(".$actual_command.');'.
9852 # The triple print is needed - otherwise the testsuite fails
9853 q[ perl -e 'while($t++<3){ print $ARGV[0],"\n" }' $?h/$status >> ].$tmpfifo."&".
9854 "echo $title; echo \007Job finished at: `date`;sleep 10"
9856 # Run outside tmux
9857 # Read a / separated line: 0h/2 for csh, 2/0 for bash.
9858 # If csh the first will be 0h, so use the second as exit value.
9859 # Otherwise just use the first value as exit value.
9860 q{; exec perl -e '$/="/";$_=<>;$c=<>;unlink $ARGV; /(\d+)h/ and exit($1);exit$c' }.$tmpfifo;
9864 sub is_already_in_results($) {
9865 # Do we already have results for this job?
9866 # Returns:
9867 # $job_already_run = bool whether there is output for this or not
9868 my $job = $_[0];
9869 my $out = $job->{'commandline'}->results_out();
9870 ::debug("run", "Test ${out}stdout", -e "${out}stdout", "\n");
9871 return(-e $out."stdout" or -f $out);
9874 sub is_already_in_joblog($) {
9875 my $job = shift;
9876 return vec($Global::job_already_run,$job->seq(),1);
9879 sub set_job_in_joblog($) {
9880 my $job = shift;
9881 vec($Global::job_already_run,$job->seq(),1) = 1;
9884 sub should_be_retried($) {
9885 # Should this job be retried?
9886 # Returns
9887 # 0 - do not retry
9888 # 1 - job queued for retry
9889 my $self = shift;
9890 if (not $opt::retries) {
9891 return 0;
9893 if(not $self->exitstatus() and not $self->exitsignal()) {
9894 # Completed with success. If there is a recorded failure: forget it
9895 $self->reset_failed_here();
9896 return 0;
9897 } else {
9898 # The job failed. Should it be retried?
9899 $self->add_failed_here();
9900 my $retries = $self->{'commandline'}->
9901 replace_placeholders([$opt::retries],0,0);
9902 if($self->total_failed() == $retries) {
9903 # This has been retried enough
9904 return 0;
9905 } else {
9906 # This command should be retried
9907 $self->set_endtime(undef);
9908 $self->reset_exitstatus();
9909 $Global::JobQueue->unget($self);
9910 ::debug("run", "Retry ", $self->seq(), "\n");
9911 return 1;
9917 my (%print_later,$job_seq_to_print);
9919 sub print_earlier_jobs($) {
9920 # Print jobs whose output is postponed due to --keep-order
9921 # Returns: N/A
9922 my $job = shift;
9923 $print_later{$job->seq()} = $job;
9924 $job_seq_to_print ||= 1;
9925 my $returnsize = 0;
9926 ::debug("run", "Looking for: $job_seq_to_print ",
9927 "This: ", $job->seq(), "\n");
9928 for(;vec($Global::job_already_run,$job_seq_to_print,1);
9929 $job_seq_to_print++) {}
9930 while(my $j = $print_later{$job_seq_to_print}) {
9931 $returnsize += $j->print();
9932 if($j->endtime()) {
9933 # Job finished - look at the next
9934 delete $print_later{$job_seq_to_print};
9935 $job_seq_to_print++;
9936 next;
9937 } else {
9938 # Job not finished yet - look at it again next round
9939 last;
9942 return $returnsize;
9946 sub print($) {
9947 # Print the output of the jobs
9948 # Returns: N/A
9949 my $self = shift;
9951 ::debug("print", ">>joboutput ", $self->replaced(), "\n");
9952 if($opt::dryrun) {
9953 # Nothing was printed to this job:
9954 # cleanup tmp files if --files was set
9955 ::rm($self->fh(1,"name"));
9957 if($opt::pipe and $self->virgin() and not $opt::tee) {
9958 # Skip --joblog, --dryrun, --verbose
9959 } else {
9960 if($opt::ungroup) {
9961 # NULL returnsize = 0 returnsize
9962 $self->returnsize() or $self->add_returnsize(0);
9963 if($Global::joblog and defined $self->{'exitstatus'}) {
9964 # Add to joblog when finished
9965 $self->print_joblog();
9966 # Printing is only relevant for grouped/--line-buffer output.
9967 $opt::ungroup and return;
9970 # Check for disk full
9971 ::exit_if_disk_full();
9974 my $returnsize = $self->returnsize();
9975 for my $fdno (sort { $a <=> $b } keys %Global::fd) {
9976 # Sort by file descriptor numerically: 1,2,3,..,9,10,11
9977 $fdno == 0 and next;
9978 my $out_fd = $Global::fd{$fdno};
9979 my $in_fh = $self->fh($fdno,"r");
9980 if(not $in_fh) {
9981 if(not $Job::file_descriptor_warning_printed{$fdno}++) {
9982 # ::warning("File descriptor $fdno not defined\n");
9984 next;
9986 ::debug("print", "File descriptor $fdno (", $self->fh($fdno,"name"), "):\n");
9987 if($opt::linebuffer) {
9988 # Line buffered print out
9989 $self->print_linebuffer($fdno,$in_fh,$out_fd);
9990 } elsif($opt::files) {
9991 $self->print_files($fdno,$in_fh,$out_fd);
9992 } elsif($opt::tag or defined $opt::tagstring) {
9993 $self->print_tag($fdno,$in_fh,$out_fd);
9994 } else {
9995 $self->print_normal($fdno,$in_fh,$out_fd);
9997 flush $out_fd;
9999 ::debug("print", "<<joboutput\n");
10000 if(defined $self->{'exitstatus'}
10001 and not ($self->virgin() and $opt::pipe)) {
10002 if($Global::joblog and not $opt::sqlworker) {
10003 # Add to joblog when finished
10004 $self->print_joblog();
10006 if($opt::sqlworker and not $opt::results) {
10007 $Global::sql->output($self);
10009 if($Global::csvsep) {
10010 # Add output to CSV when finished
10011 $self->print_csv();
10014 return $returnsize - $self->returnsize();
10018 my $header_printed;
10020 sub print_csv($) {
10021 my $self = shift;
10022 my $cmd;
10023 if($Global::verbose <= 1) {
10024 $cmd = $self->replaced();
10025 } else {
10026 # Verbose level > 1: Print the rsync and stuff
10027 $cmd = join " ", @{$self->{'commandline'}};
10029 my $record_ref = $self->{'commandline'}{'arg_list_flat_orig'};
10031 if(not $header_printed) {
10032 # Variable headers
10033 # Normal => V1..Vn
10034 # --header : => first value from column
10035 my @V;
10036 if($opt::header) {
10037 my $i = 1;
10038 @V = (map { $Global::input_source_header{$i++} }
10039 @$record_ref[1..$#$record_ref]);
10040 } else {
10041 my $V = "V1";
10042 @V = (map { $V++ } @$record_ref[1..$#$record_ref]);
10044 print $Global::csv_fh
10045 (map { $$_ }
10046 combine_ref("Seq", "Host", "Starttime", "JobRuntime",
10047 "Send", "Receive", "Exitval", "Signal", "Command",
10049 "Stdout","Stderr"
10050 )),"\n";
10051 $header_printed++;
10053 # Memory optimization: Overwrite with the joined output
10054 $self->{'output'}{1} = join("", @{$self->{'output'}{1}});
10055 $self->{'output'}{2} = join("", @{$self->{'output'}{2}});
10056 print $Global::csv_fh
10057 (map { $$_ }
10058 combine_ref
10059 ($self->seq(),
10060 $self->sshlogin()->string(),
10061 $self->starttime(), sprintf("%0.3f",$self->runtime()),
10062 $self->transfersize(), $self->returnsize(),
10063 $self->exitstatus(), $self->exitsignal(), \$cmd,
10064 \@$record_ref[1..$#$record_ref],
10065 \$self->{'output'}{1},
10066 \$self->{'output'}{2})),"\n";
10070 sub combine_ref($) {
10071 # Inspired by Text::CSV_PP::_combine (by Makamaka Hannyaharamitu)
10072 my @part = @_;
10073 my $sep = $Global::csvsep;
10074 my $quot = '"';
10075 my @out = ();
10077 my $must_be_quoted;
10078 for my $column (@part) {
10079 # Memory optimization: Content transferred as reference
10080 if(ref $column ne "SCALAR") {
10081 # Convert all columns to scalar references
10082 my $v = $column;
10083 $column = \$v;
10085 if(not defined $$column) {
10086 $$column = '';
10087 next;
10090 $must_be_quoted = 0;
10092 if($$column =~ s/$quot/$quot$quot/go){
10093 # Replace " => ""
10094 $must_be_quoted ||=1;
10096 if($$column =~ /[\s\Q$sep\E]/o){
10097 # Put quotes around if the column contains ,
10098 $must_be_quoted ||=1;
10101 $Global::use{"bytes"} ||= eval "use bytes; 1;";
10102 if ($$column =~ /\0/) {
10103 # Contains \0 => put quotes around
10104 $must_be_quoted ||=1;
10106 if($must_be_quoted){
10107 push @out, \$sep, \$quot, $column, \$quot;
10108 } else {
10109 push @out, \$sep, $column;
10112 # Pop off a $sep
10113 shift @out;
10114 return @out;
10117 sub print_files($) {
10118 # Print the name of the file containing stdout on stdout
10119 # Uses:
10120 # $opt::pipe
10121 # $opt::group = Print when job is done
10122 # $opt::linebuffer = Print ASAP
10123 # Returns: N/A
10124 my $self = shift;
10125 my ($fdno,$in_fh,$out_fd) = @_;
10127 # If the job is dead: close printing fh. Needed for --compress
10128 close $self->fh($fdno,"w");
10129 if($? and $opt::compress) {
10130 ::error($opt::compress_program." failed.");
10131 $self->set_exitstatus(255);
10133 if($opt::compress) {
10134 # Kill the decompressor which will not be needed
10135 CORE::kill "TERM", $self->fh($fdno,"rpid");
10137 close $in_fh;
10139 if($opt::pipe and $self->virgin()) {
10140 # Nothing was printed to this job:
10141 # cleanup unused tmp files because --files was set
10142 for my $fdno (1,2) {
10143 ::rm($self->fh($fdno,"name"));
10144 ::rm($self->fh($fdno,"unlink"));
10146 } elsif($fdno == 1 and $self->fh($fdno,"name")) {
10147 print $out_fd $self->tag(),$self->fh($fdno,"name"),"\n";
10148 if($Global::membuffer) {
10149 push @{$self->{'output'}{$fdno}},
10150 $self->tag(), $self->fh($fdno,"name");
10152 $self->add_returnsize(-s $self->fh($fdno,"name"));
10153 # Mark as printed - do not print again
10154 $self->set_fh($fdno,"name",undef);
10158 sub print_linebuffer($) {
10159 my $self = shift;
10160 my ($fdno,$in_fh,$out_fd) = @_;
10161 if(defined $self->{'exitstatus'}) {
10162 # If the job is dead: close printing fh. Needed for --compress
10163 close $self->fh($fdno,"w");
10164 if($? and $opt::compress) {
10165 ::error($opt::compress_program." failed.");
10166 $self->set_exitstatus(255);
10168 if($opt::compress) {
10169 # Blocked reading in final round
10170 for my $fdno (1,2) {
10171 ::set_fh_blocking($self->fh($fdno,'r'));
10175 if(not $self->virgin()) {
10176 if($opt::files or ($opt::results and not $Global::csvsep)) {
10177 # Print filename
10178 if($fdno == 1 and not $self->fh($fdno,"printed")) {
10179 print $out_fd $self->tag(),$self->fh($fdno,"name"),"\n";
10180 if($Global::membuffer) {
10181 push(@{$self->{'output'}{$fdno}}, $self->tag(),
10182 $self->fh($fdno,"name"));
10184 $self->set_fh($fdno,"printed",1);
10186 # No need for reading $in_fh, as it is from "cat >/dev/null"
10187 } else {
10188 # Read halflines and print full lines
10189 my $outputlength = 0;
10190 my $halfline_ref = $self->{'halfline'}{$fdno};
10191 my ($buf,$i,$rv);
10192 # 1310720 gives 1.2 GB/s
10193 # 131072 gives 0.9 GB/s
10194 while($rv = sysread($in_fh, $buf,1310720)) {
10195 $outputlength += $rv;
10196 # TODO --recend
10197 # Treat both \n and \r as line end
10198 $i = ::max((rindex($buf,"\n")+1), (rindex($buf,"\r")+1));
10199 if($i) {
10200 # One or more complete lines were found
10201 if($opt::tag or defined $opt::tagstring) {
10202 # Replace ^ with $tag within the full line
10203 if($Global::cache_replacement_eval) {
10204 # Replace with the same value for tag
10205 my $tag = $self->tag();
10206 unshift @$halfline_ref, $tag;
10207 # TODO --recend that can be partially in @$halfline_ref
10208 substr($buf,0,$i-1) =~ s/(?<=[\n\r])(?=.|$)/$tag/gs;
10209 # The length changed, so find the new ending pos
10210 $i = ::max((rindex($buf,"\n")+1), (rindex($buf,"\r")+1));
10211 } else {
10212 # Replace with freshly computed value of tag
10213 unshift @$halfline_ref, $self->tag();
10214 substr($buf,0,$i-1) =~ s/(?<=[\n\r])(?=.|$)/$self->tag()/gse;
10215 # The length changed, so find the new ending pos
10216 $i = ::max((rindex($buf,"\n")+1), (rindex($buf,"\r")+1));
10219 # Print the partial line (halfline) and the last half
10220 print $out_fd @$halfline_ref, substr($buf,0,$i);
10221 # Buffer in memory for SQL and CSV-output
10222 if($Global::membuffer) {
10223 push(@{$self->{'output'}{$fdno}},
10224 @$halfline_ref, substr($buf,0,$i));
10226 # Remove the printed part by keeping the unprinted part
10227 @$halfline_ref = (substr($buf,$i));
10228 } else {
10229 # No newline, so append to the halfline
10230 push @$halfline_ref, $buf;
10233 $self->add_returnsize($outputlength);
10235 if(defined $self->{'exitstatus'}) {
10236 if($opt::files or ($opt::results and not $Global::csvsep)) {
10237 $self->add_returnsize(-s $self->fh($fdno,"name"));
10238 } else {
10239 # If the job is dead: print the remaining partial line
10240 # read remaining
10241 my $halfline_ref = $self->{'halfline'}{$fdno};
10242 if(grep /./, @$halfline_ref) {
10243 my $returnsize = 0;
10244 for(@{$self->{'halfline'}{$fdno}}) {
10245 $returnsize += length $_;
10247 $self->add_returnsize($returnsize);
10248 if($opt::tag or defined $opt::tagstring) {
10249 # Prepend $tag the the remaining half line
10250 unshift @$halfline_ref, $self->tag();
10252 # Print the partial line (halfline)
10253 print $out_fd @{$self->{'halfline'}{$fdno}};
10254 # Buffer in memory for SQL and CSV-output
10255 if($Global::membuffer) {
10256 push(@{$self->{'output'}{$fdno}}, @$halfline_ref);
10258 @$halfline_ref = ();
10261 if($self->fh($fdno,"rpid") and
10262 CORE::kill 0, $self->fh($fdno,"rpid")) {
10263 # decompress still running
10264 } else {
10265 # decompress done: close fh
10266 close $in_fh;
10267 if($? and $opt::compress) {
10268 ::error($opt::decompress_program." failed.");
10269 $self->set_exitstatus(255);
10276 sub print_tag(@) {
10277 return print_normal(@_);
10280 sub free_ressources() {
10281 my $self = shift;
10282 if(not $opt::ungroup) {
10283 my $fh;
10284 for my $fdno (sort { $a <=> $b } keys %Global::fd) {
10285 $fh = $self->fh($fdno,"w");
10286 $fh and close $fh;
10287 $fh = $self->fh($fdno,"r");
10288 $fh and close $fh;
10293 sub print_normal($) {
10294 my $self = shift;
10295 my ($fdno,$in_fh,$out_fd) = @_;
10296 my $buf;
10297 close $self->fh($fdno,"w");
10298 if($? and $opt::compress) {
10299 ::error($opt::compress_program." failed.");
10300 $self->set_exitstatus(255);
10302 if(not $self->virgin()) {
10303 seek $in_fh, 0, 0;
10304 # $in_fh is now ready for reading at position 0
10305 my $outputlength = 0;
10306 my @output;
10308 if($opt::tag or $opt::tagstring) {
10309 # Read line by line
10310 local $/ = "\n";
10311 my $tag = $self->tag();
10312 while(<$in_fh>) {
10313 $outputlength += length $_;
10314 # Tag lines with \r, too
10315 $_ =~ s/(?<=[\r])(?=.|$)/$tag/gs;
10316 print $out_fd $tag,$_;
10317 if($Global::membuffer) {
10318 push @{$self->{'output'}{$fdno}}, $tag, $_;
10321 } else {
10322 while(sysread($in_fh,$buf,131072)) {
10323 print $out_fd $buf;
10324 $outputlength += length $buf;
10325 if($Global::membuffer) {
10326 push @{$self->{'output'}{$fdno}}, $buf;
10330 if($fdno == 1) {
10331 $self->add_returnsize($outputlength);
10333 close $in_fh;
10334 if($? and $opt::compress) {
10335 ::error($opt::decompress_program." failed.");
10336 $self->set_exitstatus(255);
10341 sub print_joblog($) {
10342 my $self = shift;
10343 my $cmd;
10344 if($Global::verbose <= 1) {
10345 $cmd = $self->replaced();
10346 } else {
10347 # Verbose level > 1: Print the rsync and stuff
10348 $cmd = $self->wrapped();
10350 # Newlines make it hard to parse the joblog
10351 $cmd =~ s/\n/\0/g;
10352 print $Global::joblog
10353 join("\t", $self->seq(), $self->sshlogin()->string(),
10354 $self->starttime(), sprintf("%10.3f",$self->runtime()),
10355 $self->transfersize(), $self->returnsize(),
10356 $self->exitstatus(), $self->exitsignal(), $cmd
10357 ). "\n";
10358 flush $Global::joblog;
10359 $self->set_job_in_joblog();
10362 sub tag($) {
10363 my $self = shift;
10364 if(not defined $self->{'tag'} or not $Global::cache_replacement_eval) {
10365 if($opt::tag or defined $opt::tagstring) {
10366 $self->{'tag'} = $self->{'commandline'}->
10367 replace_placeholders([$opt::tagstring],0,0)."\t";
10368 } else {
10369 $self->{'tag'} = "";
10372 return $self->{'tag'};
10375 sub hostgroups($) {
10376 my $self = shift;
10377 if(not defined $self->{'hostgroups'}) {
10378 $self->{'hostgroups'} =
10379 $self->{'commandline'}->{'arg_list'}[0][0]->{'hostgroups'};
10381 return @{$self->{'hostgroups'}};
10384 sub exitstatus($) {
10385 my $self = shift;
10386 return $self->{'exitstatus'};
10389 sub set_exitstatus($$) {
10390 my $self = shift;
10391 my $exitstatus = shift;
10392 if($exitstatus) {
10393 # Overwrite status if non-zero
10394 $self->{'exitstatus'} = $exitstatus;
10395 } else {
10396 # Set status but do not overwrite
10397 # Status may have been set by --timeout
10398 $self->{'exitstatus'} ||= $exitstatus;
10400 $opt::sqlworker and
10401 $Global::sql->update("SET Exitval = ? WHERE Seq = ".$self->seq(),
10402 $exitstatus);
10405 sub reset_exitstatus($) {
10406 my $self = shift;
10407 undef $self->{'exitstatus'};
10410 sub exitsignal($) {
10411 my $self = shift;
10412 return $self->{'exitsignal'};
10415 sub set_exitsignal($$) {
10416 my $self = shift;
10417 my $exitsignal = shift;
10418 $self->{'exitsignal'} = $exitsignal;
10419 $opt::sqlworker and
10420 $Global::sql->update("SET _Signal = ? WHERE Seq = ".$self->seq(),
10421 $exitsignal);
10425 my $status_printed;
10426 my $total_jobs;
10428 sub should_we_halt {
10429 # Should we halt? Immediately? Gracefully?
10430 # Returns: N/A
10431 my $job = shift;
10432 my $limit;
10433 if($job->exitstatus() or $job->exitsignal()) {
10434 # Job failed
10435 $Global::exitstatus++;
10436 $Global::total_failed++;
10437 if($Global::halt_fail) {
10438 ::status("$Global::progname: This job failed:",
10439 $job->replaced());
10440 $limit = $Global::total_failed;
10442 } elsif($Global::halt_success) {
10443 ::status("$Global::progname: This job succeeded:",
10444 $job->replaced());
10445 $limit = $Global::total_completed - $Global::total_failed;
10447 if($Global::halt_done) {
10448 ::status("$Global::progname: This job finished:",
10449 $job->replaced());
10450 $limit = $Global::total_completed;
10452 if(not defined $limit) {
10453 return ""
10455 # --halt # => 1..100 (number of jobs failed, 101 means > 100)
10456 # --halt % => 1..100 (pct of jobs failed)
10457 if($Global::halt_pct and not $Global::halt_count) {
10458 $total_jobs ||= $Global::JobQueue->total_jobs();
10459 # From the pct compute the number of jobs that must fail/succeed
10460 $Global::halt_count = $total_jobs * $Global::halt_pct;
10462 if($limit >= $Global::halt_count) {
10463 # At least N jobs have failed/succeded/completed
10464 # or at least N% have failed/succeded/completed
10465 # So we should prepare for exit
10466 if($Global::halt_fail or $Global::halt_done) {
10467 # Set exit status
10468 if(not defined $Global::halt_exitstatus) {
10469 if($Global::halt_pct) {
10470 # --halt now,fail=X% or soon,fail=X%
10471 # --halt now,done=X% or soon,done=X%
10472 $Global::halt_exitstatus =
10473 ::ceil($Global::total_failed / $total_jobs * 100);
10474 } elsif($Global::halt_count) {
10475 # --halt now,fail=X or soon,fail=X
10476 # --halt now,done=X or soon,done=X
10477 $Global::halt_exitstatus =
10478 ::min($Global::total_failed,101);
10480 if($Global::halt_count and $Global::halt_count == 1) {
10481 # --halt now,fail=1 or soon,fail=1
10482 # --halt now,done=1 or soon,done=1
10483 # Emulate Bash's +128 if there is a signal
10484 $Global::halt_exitstatus =
10485 ($job->exitstatus()
10487 $job->exitsignal() ? $job->exitsignal() + 128 : 0);
10490 ::debug("halt","Pct: ",$Global::halt_pct,
10491 " count: ",$Global::halt_count,
10492 " status: ",$Global::halt_exitstatus,"\n");
10493 } elsif($Global::halt_success) {
10494 $Global::halt_exitstatus = 0;
10496 if($Global::halt_when eq "soon"
10498 (scalar(keys %Global::running) > 0
10500 $Global::max_jobs_running == 1)) {
10501 ::status
10502 ("$Global::progname: Starting no more jobs. ".
10503 "Waiting for ". (keys %Global::running).
10504 " jobs to finish.");
10505 $Global::start_no_new_jobs ||= 1;
10507 return($Global::halt_when);
10509 return "";
10514 package CommandLine;
10516 sub new($) {
10517 my $class = shift;
10518 my $seq = shift;
10519 my $commandref = shift;
10520 $commandref || die;
10521 my $arg_queue = shift;
10522 my $context_replace = shift;
10523 my $max_number_of_args = shift; # for -N and normal (-n1)
10524 my $transfer_files = shift;
10525 my $return_files = shift;
10526 my $replacecount_ref = shift;
10527 my $len_ref = shift;
10528 my %replacecount = %$replacecount_ref;
10529 my %len = %$len_ref;
10530 for (keys %$replacecount_ref) {
10531 # Total length of this replacement string {} replaced with all args
10532 $len{$_} = 0;
10534 return bless {
10535 'command' => $commandref,
10536 'seq' => $seq,
10537 'len' => \%len,
10538 'arg_list' => [],
10539 'arg_list_flat' => [],
10540 'arg_list_flat_orig' => [undef],
10541 'arg_queue' => $arg_queue,
10542 'max_number_of_args' => $max_number_of_args,
10543 'replacecount' => \%replacecount,
10544 'context_replace' => $context_replace,
10545 'transfer_files' => $transfer_files,
10546 'return_files' => $return_files,
10547 'replaced' => undef,
10548 }, ref($class) || $class;
10551 sub seq($) {
10552 my $self = shift;
10553 return $self->{'seq'};
10556 sub set_seq($$) {
10557 my $self = shift;
10558 $self->{'seq'} = shift;
10561 sub slot($) {
10562 # Find the number of a free job slot and return it
10563 # Uses:
10564 # @Global::slots - list with free jobslots
10565 # Returns:
10566 # $jobslot = number of jobslot
10567 my $self = shift;
10568 if(not $self->{'slot'}) {
10569 if(not @Global::slots) {
10570 # $max_slot_number will typically be $Global::max_jobs_running
10571 push @Global::slots, ++$Global::max_slot_number;
10573 $self->{'slot'} = shift @Global::slots;
10575 return $self->{'slot'};
10579 my $already_spread;
10580 my $darwin_max_len;
10582 sub populate($) {
10583 # Add arguments from arg_queue until the number of arguments or
10584 # max line length is reached
10585 # Uses:
10586 # $Global::minimal_command_line_length
10587 # $opt::cat
10588 # $opt::fifo
10589 # $Global::JobQueue
10590 # $opt::m
10591 # $opt::X
10592 # $Global::max_jobs_running
10593 # Returns: N/A
10594 my $self = shift;
10595 my $next_arg;
10596 my $max_len = $Global::minimal_command_line_length
10597 || Limits::Command::max_length();
10598 if($^O eq "darwin") {
10599 # Darwin's limit is affected by:
10600 # * number of environment names (variables+functions)
10601 # * size of environment
10602 # * the length of arguments:
10603 # a one-char argument lowers the limit by 5
10604 # To be safe assume all arguments are one-char
10605 # The max_len is cached between runs, but if the size of
10606 # the environment is different we need to recompute the
10607 # usable max length for this run of GNU Parallel
10608 # See https://unix.stackexchange.com/a/604943/2972
10609 if(not $darwin_max_len) {
10610 my $envc = (keys %ENV);
10611 my $envn = length join"",(keys %ENV);
10612 my $envv = length join"",(values %ENV);
10613 $darwin_max_len = -146+($max_len - $envn - $envv) - $envc*10;
10614 ::debug("init",
10615 "length: $darwin_max_len ".
10616 "3+($max_len - $envn - $envv)/5 - $envc*2");
10618 $max_len = $darwin_max_len;
10620 if($opt::cat or $opt::fifo) {
10621 # Get the empty arg added by --pipepart (if any)
10622 $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get();
10623 # $PARALLEL_TMP will point to a tempfile that will be used as {}
10624 $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->
10625 unget([Arg->new('$PARALLEL_TMP')]);
10627 while (not $self->{'arg_queue'}->empty()) {
10628 $next_arg = $self->{'arg_queue'}->get();
10629 if(not defined $next_arg) {
10630 next;
10632 $self->push($next_arg);
10633 if($self->len() >= $max_len) {
10634 # Command length is now > max_length
10635 # If there are arguments: remove the last
10636 # If there are no arguments: Error
10637 # TODO stuff about -x opt_x
10638 if($self->number_of_args() > 1) {
10639 # There is something to work on
10640 $self->{'arg_queue'}->unget($self->pop());
10641 last;
10642 } else {
10643 my $args = join(" ", map { $_->orig() } @$next_arg);
10644 ::error("Command line too long (".
10645 $self->len(). " >= ".
10646 $max_len.
10647 ") at input ".
10648 $self->{'arg_queue'}->arg_number().
10649 ": ".
10650 ((length $args > 50) ?
10651 (substr($args,0,50))."..." :
10652 $args));
10653 $self->{'arg_queue'}->unget($self->pop());
10654 ::wait_and_exit(255);
10658 if(defined $self->{'max_number_of_args'}) {
10659 if($self->number_of_args() >= $self->{'max_number_of_args'}) {
10660 last;
10664 if(($opt::m or $opt::X) and not $already_spread
10665 and $self->{'arg_queue'}->empty() and $Global::max_jobs_running) {
10666 # -m or -X and EOF => Spread the arguments over all jobslots
10667 # (unless they are already spread)
10668 $already_spread ||= 1;
10669 if($self->number_of_args() > 1) {
10670 $self->{'max_number_of_args'} =
10671 ::ceil($self->number_of_args()/$Global::max_jobs_running);
10672 $Global::JobQueue->{'commandlinequeue'}->{'max_number_of_args'} =
10673 $self->{'max_number_of_args'};
10674 $self->{'arg_queue'}->unget($self->pop_all());
10675 while($self->number_of_args() < $self->{'max_number_of_args'}) {
10676 $self->push($self->{'arg_queue'}->get());
10679 $Global::JobQueue->flush_total_jobs();
10682 if($opt::sqlmaster) {
10683 # Insert the V1..Vn for this $seq in SQL table instead of generating one
10684 $Global::sql->insert_records($self->seq(), $self->{'command'},
10685 $self->{'arg_list_flat_orig'});
10690 sub push($) {
10691 # Add one or more records as arguments
10692 # Returns: N/A
10693 my $self = shift;
10694 my $record = shift;
10695 push @{$self->{'arg_list_flat_orig'}}, map { $_->orig() } @$record;
10696 push @{$self->{'arg_list_flat'}}, @$record;
10697 push @{$self->{'arg_list'}}, $record;
10698 # Make @arg available for {= =}
10699 *Arg::arg = $self->{'arg_list_flat_orig'};
10701 my $quote_arg = ($Global::quote_replace and not $Global::quoting);
10702 for my $perlexpr (keys %{$self->{'replacecount'}}) {
10703 if($perlexpr =~ /^(\d+) /) {
10704 # Positional
10705 defined($record->[$1-1]) or next;
10706 $self->{'len'}{$perlexpr} +=
10707 length $record->[$1-1]->replace($perlexpr,$quote_arg,$self);
10708 } else {
10709 for my $arg (@$record) {
10710 if(defined $arg) {
10711 $self->{'len'}{$perlexpr} +=
10712 length $arg->replace($perlexpr,$quote_arg,$self);
10719 sub pop($) {
10720 # Remove last argument
10721 # Returns:
10722 # the last record
10723 my $self = shift;
10724 my $record = pop @{$self->{'arg_list'}};
10725 # pop off arguments from @$record
10726 splice @{$self->{'arg_list_flat_orig'}}, -($#$record+1), $#$record+1;
10727 splice @{$self->{'arg_list_flat'}}, -($#$record+1), $#$record+1;
10728 my $quote_arg = ($Global::quote_replace and not $Global::quoting);
10729 for my $perlexpr (keys %{$self->{'replacecount'}}) {
10730 if($perlexpr =~ /^(\d+) /) {
10731 # Positional
10732 defined($record->[$1-1]) or next;
10733 $self->{'len'}{$perlexpr} -=
10734 length $record->[$1-1]->replace($perlexpr,$quote_arg,$self);
10735 } else {
10736 for my $arg (@$record) {
10737 if(defined $arg) {
10738 $self->{'len'}{$perlexpr} -=
10739 length $arg->replace($perlexpr,$quote_arg,$self);
10744 return $record;
10747 sub pop_all($) {
10748 # Remove all arguments and zeros the length of replacement perlexpr
10749 # Returns:
10750 # all records
10751 my $self = shift;
10752 my @popped = @{$self->{'arg_list'}};
10753 for my $perlexpr (keys %{$self->{'replacecount'}}) {
10754 $self->{'len'}{$perlexpr} = 0;
10756 $self->{'arg_list'} = [];
10757 $self->{'arg_list_flat_orig'} = [undef];
10758 $self->{'arg_list_flat'} = [];
10759 return @popped;
10762 sub number_of_args($) {
10763 # The number of records
10764 # Returns:
10765 # number of records
10766 my $self = shift;
10767 # This is really the number of records
10768 return $#{$self->{'arg_list'}}+1;
10771 sub number_of_recargs($) {
10772 # The number of args in records
10773 # Returns:
10774 # number of args records
10775 my $self = shift;
10776 my $sum = 0;
10777 my $nrec = scalar @{$self->{'arg_list'}};
10778 if($nrec) {
10779 $sum = $nrec * (scalar @{$self->{'arg_list'}[0]});
10781 return $sum;
10784 sub args_as_string($) {
10785 # Returns:
10786 # all unmodified arguments joined with ' ' (similar to {})
10787 my $self = shift;
10788 return (join " ", map { $_->orig() }
10789 map { @$_ } @{$self->{'arg_list'}});
10792 sub results_out($) {
10793 sub max_file_name_length {
10794 # Figure out the max length of a subdir
10795 # TODO and the max total length
10796 # Ext4 = 255,130816
10797 # Uses:
10798 # $Global::max_file_length is set
10799 # Returns:
10800 # $Global::max_file_length
10801 my $testdir = shift;
10803 my $upper = 100_000_000;
10804 # Dir length of 8 chars is supported everywhere
10805 my $len = 8;
10806 my $dir = "x"x$len;
10807 do {
10808 rmdir($testdir."/".$dir);
10809 $len *= 16;
10810 $dir = "x"x$len;
10811 } while ($len < $upper and mkdir $testdir."/".$dir);
10812 # Then search for the actual max length between $len/16 and $len
10813 my $min = $len/16;
10814 my $max = $len;
10815 while($max-$min > 5) {
10816 # If we are within 5 chars of the exact value:
10817 # it is not worth the extra time to find the exact value
10818 my $test = int(($min+$max)/2);
10819 $dir = "x"x$test;
10820 if(mkdir $testdir."/".$dir) {
10821 rmdir($testdir."/".$dir);
10822 $min = $test;
10823 } else {
10824 $max = $test;
10827 $Global::max_file_length = $min;
10828 return $min;
10831 my $self = shift;
10832 my $out = $self->replace_placeholders([$opt::results],0,0);
10833 if($out eq $opt::results) {
10834 # $opt::results simple string: Append args_as_dirname
10835 my $args_as_dirname = $self->args_as_dirname();
10836 # Output in: prefix/name1/val1/name2/val2/stdout
10837 $out = $opt::results."/".$args_as_dirname;
10838 if(-d $out or eval{ File::Path::mkpath($out); }) {
10839 # OK
10840 } else {
10841 # mkpath failed: Argument probably too long.
10842 # Set $Global::max_file_length, which will keep the individual
10843 # dir names shorter than the max length
10844 max_file_name_length($opt::results);
10845 $args_as_dirname = $self->args_as_dirname();
10846 # prefix/name1/val1/name2/val2/
10847 $out = $opt::results."/".$args_as_dirname;
10848 File::Path::mkpath($out);
10850 $out .="/";
10851 } else {
10852 if($out =~ m:/$:) {
10853 # / = dir
10854 if(-d $out or eval{ File::Path::mkpath($out); }) {
10855 # OK
10856 } else {
10857 ::error("Cannot make dir '$out'.");
10858 ::wait_and_exit(255);
10860 } else {
10861 $out =~ m:(.*)/:;
10862 File::Path::mkpath($1);
10865 return $out;
10868 sub args_as_dirname($) {
10869 # Returns:
10870 # all unmodified arguments joined with '/' (similar to {})
10871 # \t \0 \\ and / are quoted as: \t \0 \\ \_
10872 # If $Global::max_file_length: Keep subdirs < $Global::max_file_length
10873 my $self = shift;
10874 my @res = ();
10876 for my $rec_ref (@{$self->{'arg_list'}}) {
10877 # If headers are used, sort by them.
10878 # Otherwise keep the order from the command line.
10879 my @header_indexes_sorted = header_indexes_sorted($#$rec_ref+1);
10880 for my $n (@header_indexes_sorted) {
10881 CORE::push(@res,
10882 $Global::input_source_header{$n},
10883 map { my $s = $_;
10884 # \t \0 \\ and / are quoted as: \t \0 \\ \_
10885 $s =~ s/\\/\\\\/g;
10886 $s =~ s/\t/\\t/g;
10887 $s =~ s/\0/\\0/g;
10888 $s =~ s:/:\\_:g;
10889 if($Global::max_file_length) {
10890 # Keep each subdir shorter than the longest
10891 # allowed file name
10892 $s = substr($s,0,$Global::max_file_length);
10894 $s; }
10895 $rec_ref->[$n-1]->orig());
10898 return join "/", @res;
10901 sub header_indexes_sorted($) {
10902 # Sort headers first by number then by name.
10903 # E.g.: 1a 1b 11a 11b
10904 # Returns:
10905 # Indexes of %Global::input_source_header sorted
10906 my $max_col = shift;
10908 no warnings 'numeric';
10909 for my $col (1 .. $max_col) {
10910 # Make sure the header is defined. If it is not: use column number
10911 if(not defined $Global::input_source_header{$col}) {
10912 $Global::input_source_header{$col} = $col;
10915 my @header_indexes_sorted = sort {
10916 # Sort headers numerically then asciibetically
10917 $Global::input_source_header{$a} <=> $Global::input_source_header{$b}
10919 $Global::input_source_header{$a} cmp $Global::input_source_header{$b}
10920 } 1 .. $max_col;
10921 return @header_indexes_sorted;
10924 sub len($) {
10925 # Uses:
10926 # @opt::shellquote
10927 # The length of the command line with args substituted
10928 my $self = shift;
10929 my $len = 0;
10930 # Add length of the original command with no args
10931 # Length of command w/ all replacement args removed
10932 $len += $self->{'len'}{'noncontext'} + @{$self->{'command'}} -1;
10933 ::debug("length", "noncontext + command: $len\n");
10934 # MacOS has an overhead of 8 bytes per argument
10935 my $darwin = ($^O eq "darwin") ? 8 : 0;
10936 my $recargs = $self->number_of_recargs();
10937 if($self->{'context_replace'}) {
10938 # Context is duplicated for each arg
10939 $len += $recargs * $self->{'len'}{'context'};
10940 for my $replstring (keys %{$self->{'replacecount'}}) {
10941 # If the replacements string is more than once: mulitply its length
10942 $len += $self->{'len'}{$replstring} *
10943 $self->{'replacecount'}{$replstring};
10944 ::debug("length", $replstring, " ", $self->{'len'}{$replstring}, "*",
10945 $self->{'replacecount'}{$replstring}, "\n");
10947 # echo 11 22 33 44 55 66 77 88 99 1010
10948 # echo 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10
10949 # 5 + ctxgrp*arg
10950 ::debug("length", "Ctxgrp: ", $self->{'len'}{'contextgroups'},
10951 " Groups: ", $self->{'len'}{'noncontextgroups'}, "\n");
10952 # Add space between context groups
10953 $len += ($recargs-1) * ($self->{'len'}{'contextgroups'});
10954 if($darwin) {
10955 $len += $recargs * $self->{'len'}{'contextgroups'} * $darwin;
10957 } else {
10958 # Each replacement string may occur several times
10959 # Add the length for each time
10960 $len += 1*$self->{'len'}{'context'};
10961 ::debug("length", "context+noncontext + command: $len\n");
10962 for my $replstring (keys %{$self->{'replacecount'}}) {
10963 # (space between recargs + length of replacement)
10964 # * number this replacement is used
10965 $len += ($recargs -1 + $self->{'len'}{$replstring}) *
10966 $self->{'replacecount'}{$replstring};
10967 if($darwin) {
10968 $len += ($recargs * $self->{'replacecount'}{$replstring}
10969 * $darwin);
10973 if(defined $Global::parallel_env) {
10974 # If we are using --env, add the prefix for that, too.
10975 $len += length $Global::parallel_env;
10977 if($Global::quoting) {
10978 # Pessimistic length if -q is set
10979 # Worse than worst case: ' => "'" + " => '"'
10980 # TODO can we count the number of expanding chars?
10981 # and count them in arguments, too?
10982 $len *= 3;
10984 if(@opt::shellquote) {
10985 # Pessimistic length if --shellquote is set
10986 # Worse than worst case: ' => "'"
10987 for(@opt::shellquote) {
10988 $len *= 3;
10990 $len *= 5;
10992 if(@opt::sshlogin) {
10993 # Pessimistic length if remote
10994 # Worst case is BASE64 encoding 3 bytes -> 4 bytes
10995 $len = int($len*4/3);
10998 return $len;
11001 sub replaced($) {
11002 # Uses:
11003 # $Global::quote_replace
11004 # $Global::quoting
11005 # Returns:
11006 # $replaced = command with place holders replaced and prepended
11007 my $self = shift;
11008 if(not defined $self->{'replaced'}) {
11009 # Don't quote arguments if the input is the full command line
11010 my $quote_arg = ($Global::quote_replace and not $Global::quoting);
11011 # or if ($opt::cat or $opt::pipe) as they use $PARALLEL_TMP
11012 $quote_arg = ($opt::cat || $opt::fifo) ? 0 : $quote_arg;
11013 $self->{'replaced'} = $self->
11014 replace_placeholders($self->{'command'},$Global::quoting,
11015 $quote_arg);
11016 my $len = length $self->{'replaced'};
11017 if ($len != $self->len()) {
11018 ::debug("length", $len, " != ", $self->len(),
11019 " ", $self->{'replaced'}, "\n");
11020 } else {
11021 ::debug("length", $len, " == ", $self->len(),
11022 " ", $self->{'replaced'}, "\n");
11025 return $self->{'replaced'};
11028 sub replace_placeholders($$$$) {
11029 # Replace foo{}bar with fooargbar
11030 # Input:
11031 # $targetref = command as shell words
11032 # $quote = should everything be quoted?
11033 # $quote_arg = should replaced arguments be quoted?
11034 # Uses:
11035 # @Arg::arg = arguments as strings to be use in {= =}
11036 # Returns:
11037 # @target with placeholders replaced
11038 my $self = shift;
11039 my $targetref = shift;
11040 my $quote = shift;
11041 my $quote_arg = shift;
11042 my %replace;
11044 # Token description:
11045 # \0spc = unquoted space
11046 # \0end = last token element
11047 # \0ign = dummy token to be ignored
11048 # \257<...\257> = replacement expression
11049 # " " = quoted space, that splits -X group
11050 # text = normal text - possibly part of -X group
11051 my $spacer = 0;
11052 my @tokens = grep { length $_ > 0 } map {
11053 if(/^\257<|^ $/) {
11054 # \257<...\257> or space
11056 } else {
11057 # Split each space/tab into a token
11058 split /(?=\s)|(?<=\s)/
11061 # Split \257< ... \257> into own token
11062 map { split /(?=\257<)|(?<=\257>)/ }
11063 # Insert "\0spc" between every element
11064 # This space should never be quoted
11065 map { $spacer++ ? ("\0spc",$_) : $_ }
11066 map { $_ eq "" ? "\0empty" : $_ }
11067 @$targetref;
11069 if(not @tokens) {
11070 # @tokens is empty: Return empty array
11071 return @tokens;
11073 ::debug("replace", "Tokens ".join":",@tokens,"\n");
11074 # Make it possible to use $arg[2] in {= =}
11075 *Arg::arg = $self->{'arg_list_flat_orig'};
11076 # Flat list:
11077 # $self->{'arg_list'} = [ [Arg11, Arg12], [Arg21, Arg22], [Arg31, Arg32] ]
11078 # $self->{'arg_list_flat'} = [ Arg11, Arg12, Arg21, Arg22, Arg31, Arg32 ]
11079 if(not @{$self->{'arg_list_flat'}}) {
11080 @{$self->{'arg_list_flat'}} = Arg->new("");
11082 my $argref = $self->{'arg_list_flat'};
11083 # Number of arguments - used for positional arguments
11084 my $n = $#$argref+1;
11086 # $self is actually a CommandLine-object,
11087 # but it looks nice to be able to say {= $job->slot() =}
11088 my $job = $self;
11089 # @replaced = tokens with \257< \257> replaced
11090 my @replaced;
11091 if($self->{'context_replace'}) {
11092 my @ctxgroup;
11093 for my $t (@tokens,"\0end") {
11094 # \0end = last token was end of tokens.
11095 if($t eq "\t" or $t eq " " or $t eq "\0end" or $t eq "\0spc") {
11096 # Context group complete: Replace in it
11097 if(grep { /^\257</ } @ctxgroup) {
11098 # Context group contains a replacement string:
11099 # Copy once per arg
11100 my $space = "\0ign";
11101 for my $arg (@$argref) {
11102 my $normal_replace;
11103 # Push output
11104 # Put unquoted space before each context group
11105 # except the first
11106 CORE::push @replaced, $space, map {
11107 $a = $_;
11108 if($a =~
11109 s{\257<(-?\d+)?(.*)\257>}
11111 if($1) {
11112 # Positional replace
11113 # Find the relevant arg and replace it
11114 ($argref->[$1 > 0 ? $1-1 : $n+$1] ? # If defined: replace
11115 $argref->[$1 > 0 ? $1-1 : $n+$1]->
11116 replace($2,$quote_arg,$self)
11117 : "");
11118 } else {
11119 # Normal replace
11120 $normal_replace ||= 1;
11121 ($arg ? $arg->replace($2,$quote_arg,$self) : "");
11123 }sgxe) {
11124 # Token is \257<..\257>
11125 } else {
11126 if($Global::escape_string_present) {
11127 # Command line contains \257:
11128 # Unescape it \257\256 => \257
11129 $a =~ s/\257\256/\257/g;
11133 } @ctxgroup;
11134 $normal_replace or last;
11135 $space = "\0spc";
11137 } else {
11138 # Context group has no a replacement string: Copy it once
11139 CORE::push @replaced, map {
11140 $Global::escape_string_present and s/\257\256/\257/g; $_;
11141 } @ctxgroup;
11143 # New context group
11144 @ctxgroup=();
11146 if($t eq "\0spc" or $t eq " ") {
11147 CORE::push @replaced,$t;
11148 } else {
11149 CORE::push @ctxgroup,$t;
11152 } else {
11153 # @group = @token
11154 # Replace in group
11155 # Push output
11156 # repquote = no if {} first on line, no if $quote, yes otherwise
11157 for my $t (@tokens) {
11158 if($t =~ /^\257</) {
11159 my $space = "\0ign";
11160 for my $arg (@$argref) {
11161 my $normal_replace;
11162 $a = $t;
11163 $a =~
11164 s{\257<(-?\d+)?(.*)\257>}
11166 if($1) {
11167 # Positional replace
11168 # Find the relevant arg and replace it
11169 ($argref->[$1 > 0 ? $1-1 : $n+$1] ?
11170 # If defined: replace
11171 $argref->[$1 > 0 ? $1-1 : $n+$1]->
11172 replace($2,$quote_arg,$self)
11173 : "");
11174 } else {
11175 # Normal replace
11176 $normal_replace ||= 1;
11177 ($arg ? $arg->replace($2,$quote_arg,$self) : "");
11179 }sgxe;
11180 CORE::push @replaced, $space, $a;
11181 $normal_replace or last;
11182 $space = "\0spc";
11184 } else {
11185 # No replacement
11186 CORE::push @replaced, map {
11187 $Global::escape_string_present and s/\257\256/\257/g; $_;
11188 } $t;
11192 *Arg::arg = [];
11193 ::debug("replace","Replaced: ".join":",@replaced,"\n");
11195 # Put tokens into groups that may be quoted.
11196 my @quotegroup;
11197 my @quoted;
11198 for (map { $_ eq "\0empty" ? "" : $_ }
11199 grep { $_ ne "\0ign" and $_ ne "\0noarg" and $_ ne "'\0noarg'" }
11200 @replaced, "\0end") {
11201 if($_ eq "\0spc" or $_ eq "\0end") {
11202 # \0spc splits quotable groups
11203 if($quote) {
11204 if(@quotegroup) {
11205 CORE::push @quoted, ::Q(join"",@quotegroup);;
11207 } else {
11208 CORE::push @quoted, join"",@quotegroup;
11210 @quotegroup = ();
11211 } else {
11212 CORE::push @quotegroup, $_;
11215 ::debug("replace","Quoted: ".join":",@quoted,"\n");
11216 return wantarray ? @quoted : "@quoted";
11219 sub skip($) {
11220 # Skip this job
11221 my $self = shift;
11222 $self->{'skip'} = 1;
11226 package CommandLineQueue;
11228 sub new($) {
11229 my $class = shift;
11230 my $commandref = shift;
11231 my $read_from = shift;
11232 my $context_replace = shift || 0;
11233 my $max_number_of_args = shift;
11234 my $transfer_files = shift;
11235 my $return_files = shift;
11236 my @unget = ();
11237 my $posrpl;
11238 my ($replacecount_ref, $len_ref);
11239 my @command = @$commandref;
11240 my $seq = 1;
11241 # Replace replacement strings with {= perl expr =}
11242 # '{=' 'perlexpr' '=}' => '{= perlexpr =}'
11243 @command = merge_rpl_parts(@command);
11245 # Protect matching inside {= perl expr =}
11246 # by replacing {= and =} with \257< and \257>
11247 # in options that can contain replacement strings:
11248 # @command, --transferfile, --return,
11249 # --tagstring, --workdir, --results
11250 for(@command, @$transfer_files, @$return_files,
11251 $opt::tagstring, $opt::workdir, $opt::results, $opt::retries) {
11252 # Skip if undefined
11253 $_ or next;
11254 # Escape \257 => \257\256
11255 $Global::escape_string_present += s/\257/\257\256/g;
11256 # Needs to match rightmost left parens (Perl defaults to leftmost)
11257 # to deal with: {={==} and {={==}=}
11258 # Replace {= -> \257< and =} -> \257>
11260 # Complex way to do:
11261 # s/{=(.*)=}/\257<$1\257>/g
11262 # which would not work
11263 s[\Q$Global::parensleft\E # Match {=
11264 # Match . unless the next string is {= or =}
11265 # needed to force matching the shortest {= =}
11266 ((?:(?! \Q$Global::parensleft\E|\Q$Global::parensright\E ).)*?)
11267 \Q$Global::parensright\E ] # Match =}
11268 {\257<$1\257>}gxs;
11269 for my $rpl (sort { length $b <=> length $a } keys %Global::rpl) {
11270 # Replace long --rpl's before short ones, as a short may be a
11271 # substring of a long:
11272 # --rpl '% s/a/b/' --rpl '%% s/b/a/'
11274 # Replace the shorthand string (--rpl)
11275 # with the {= perl expr =}
11277 # Avoid searching for shorthand strings inside existing {= perl expr =}
11279 # Replace $$1 in {= perl expr =} with groupings in shorthand string
11281 # --rpl '{/(\.\S+)/(\.\S+)} s/$$1/$$2/g;'
11282 # echo {/.tar/.gz} ::: UU.tar.gz
11283 my ($prefix,$grp_regexp,$postfix) =
11284 $rpl =~ /^( [^(]* ) # Prefix - e.g. {%%
11285 ( \(.*\) )? # Group capture regexp - e.g (.*)
11286 ( [^)]* )$ # Postfix - e.g }
11287 /xs;
11288 $grp_regexp ||= '';
11289 my $rplval = $Global::rpl{$rpl};
11290 while(s{( (?: ^|\257> ) (?: [^\257]*|[\257][^<>] )*? )
11291 # Don't replace after \257 unless \257>
11292 \Q$prefix\E $grp_regexp \Q$postfix\E}
11294 # The start remains the same
11295 my $unchanged = $1;
11296 # Dummy entry to start at 1.
11297 my @grp = (1);
11298 # $2 = first ()-group in $grp_regexp
11299 # Put $2 in $grp[1], Put $3 in $grp[2]
11300 # so first ()-group in $grp_regexp is $grp[1];
11301 for(my $i = 2; defined $grp[$#grp]; $i++) {
11302 push @grp, eval '$'.$i;
11304 my $rv = $rplval;
11305 # replace $$1 with $_pAr_gRp1, $$2 with $_pAr_gRp2
11306 # in the code to be executed
11307 $rv =~ s/\$\$ (\d+)/\$_pAr_gRp$1/gx;
11308 # prepend with $_pAr_gRp1 = perlquote($1),
11309 my $set_args = "";
11310 for(my $i = 1;defined $grp[$i]; $i++) {
11311 $set_args .= "\$_pAr_gRp$i = \"" .
11312 ::perl_quote_scalar($grp[$i]) . "\";";
11314 $unchanged . "\257<" . $set_args . $rv . "\257>"
11315 }gxes) {
11317 # Do the same for the positional replacement strings
11318 $posrpl = $rpl;
11319 if($posrpl =~ s/^\{//) {
11320 # Only do this if the shorthand start with {
11321 $prefix=~s/^\{//;
11322 # Don't replace after \257 unless \257>
11323 while(s{( (?: ^|\257> ) (?: [^\257]*|[\257][^<>] )*? )
11324 \{(-?\d+) \s* \Q$prefix\E $grp_regexp \Q$postfix\E}
11326 # The start remains the same
11327 my $unchanged = $1;
11328 my $position = $2;
11329 # Dummy entry to start at 1.
11330 my @grp = (1);
11331 # $3 = first ()-group in $grp_regexp
11332 # Put $3 in $grp[1], Put $4 in $grp[2]
11333 # so first ()-group in $grp_regexp is $grp[1];
11334 for(my $i = 3; defined $grp[$#grp]; $i++) {
11335 push @grp, eval '$'.$i;
11337 my $rv = $rplval;
11338 # replace $$1 with $_pAr_gRp1, $$2 with $_pAr_gRp2
11339 # in the code to be executed
11340 $rv =~ s/\$\$ (\d+)/\$_pAr_gRp$1/gx;
11341 # prepend with $_pAr_gRp1 = perlquote($1),
11342 my $set_args = "";
11343 for(my $i = 1;defined $grp[$i]; $i++) {
11344 $set_args .= "\$_pAr_gRp$i = \"" .
11345 ::perl_quote_scalar($grp[$i]) . "\";";
11347 $unchanged . "\257<" . $position . $set_args . $rv . "\257>"
11348 }gxes) {
11354 # Add {} if no replacement strings in @command
11355 ($replacecount_ref, $len_ref, @command) =
11356 replacement_counts_and_lengths($transfer_files,$return_files,@command);
11357 if("@command" =~ /^[^ \t\n=]*\257</) {
11358 # Replacement string is (part of) the command (and not just
11359 # argument or variable definition V1={})
11360 # E.g. parallel {}, parallel my_{= s/_//=}, parallel {2}
11361 # Do no quote (Otherwise it will fail if the input contains spaces)
11362 $Global::quote_replace = 0;
11365 if($opt::sqlmaster and $Global::sql->append()) {
11366 $seq = $Global::sql->max_seq() + 1;
11369 return bless {
11370 'unget' => \@unget,
11371 'command' => \@command,
11372 'replacecount' => $replacecount_ref,
11373 'arg_queue' => RecordQueue->new($read_from,$opt::colsep),
11374 'context_replace' => $context_replace,
11375 'len' => $len_ref,
11376 'max_number_of_args' => $max_number_of_args,
11377 'size' => undef,
11378 'transfer_files' => $transfer_files,
11379 'return_files' => $return_files,
11380 'seq' => $seq,
11381 }, ref($class) || $class;
11384 sub merge_rpl_parts($) {
11385 # '{=' 'perlexpr' '=}' => '{= perlexpr =}'
11386 # Input:
11387 # @in = the @command as given by the user
11388 # Uses:
11389 # $Global::parensleft
11390 # $Global::parensright
11391 # Returns:
11392 # @command with parts merged to keep {= and =} as one
11393 my @in = @_;
11394 my @out;
11395 my $l = quotemeta($Global::parensleft);
11396 my $r = quotemeta($Global::parensright);
11398 while(@in) {
11399 my $s = shift @in;
11400 $_ = $s;
11401 # Remove matching (right most) parens
11402 while(s/(.*)$l.*?$r/$1/os) {}
11403 if(/$l/o) {
11404 # Missing right parens
11405 while(@in) {
11406 $s .= " ".shift @in;
11407 $_ = $s;
11408 while(s/(.*)$l.*?$r/$1/os) {}
11409 if(not /$l/o) {
11410 last;
11414 push @out, $s;
11416 return @out;
11419 sub replacement_counts_and_lengths($$@) {
11420 # Count the number of different replacement strings.
11421 # Find the lengths of context for context groups and non-context
11422 # groups.
11423 # If no {} found in @command: add it to @command
11425 # Input:
11426 # \@transfer_files = array of filenames to transfer
11427 # \@return_files = array of filenames to return
11428 # @command = command template
11429 # Output:
11430 # \%replacecount, \%len, @command
11431 my $transfer_files = shift;
11432 my $return_files = shift;
11433 my @command = @_;
11434 my (%replacecount,%len);
11435 my $sum = 0;
11436 while($sum == 0) {
11437 # Count how many times each replacement string is used
11438 my @cmd = @command;
11439 my $contextlen = 0;
11440 my $noncontextlen = 0;
11441 my $contextgroups = 0;
11442 for my $c (@cmd) {
11443 while($c =~ s/ \257<( (?: [^\257]*|[\257][^<>] )*?)\257> /\000/xs) {
11444 # %replacecount = { "perlexpr" => number of times seen }
11445 # e.g { "s/a/b/" => 2 }
11446 $replacecount{$1}++;
11447 $sum++;
11449 # Measure the length of the context around the {= perl expr =}
11450 # Use that {=...=} has been replaced with \000 above
11451 # So there is no need to deal with \257<
11452 while($c =~ s/ (\S*\000\S*) //xs) {
11453 my $w = $1;
11454 $w =~ tr/\000//d; # Remove all \000's
11455 $contextlen += length($w);
11456 $contextgroups++;
11458 # All {= perl expr =} have been removed: The rest is non-context
11459 $noncontextlen += length $c;
11461 for(@$transfer_files, @$return_files,
11462 $opt::tagstring, $opt::workdir, $opt::results, $opt::retries) {
11463 # Options that can contain replacement strings
11464 $_ or next;
11465 my $t = $_;
11466 while($t =~ s/ \257<( (?: [^\257]*|[\257][^<>] )* )\257> //xs) {
11467 # %replacecount = { "perlexpr" => number of times seen }
11468 # e.g { "$_++" => 2 }
11469 # But for tagstring we just need to mark it as seen
11470 $replacecount{$1} ||= 1;
11473 if($opt::bar) {
11474 # If the command does not contain {} force it to be computed
11475 # as it is being used by --bar
11476 $replacecount{""} ||= 1;
11479 $len{'context'} = 0+$contextlen;
11480 $len{'noncontext'} = $noncontextlen;
11481 $len{'contextgroups'} = $contextgroups;
11482 $len{'noncontextgroups'} = @cmd-$contextgroups;
11483 ::debug("length", "@command Context: ", $len{'context'},
11484 " Non: ", $len{'noncontext'}, " Ctxgrp: ", $len{'contextgroups'},
11485 " NonCtxGrp: ", $len{'noncontextgroups'}, "\n");
11486 if($sum == 0) {
11487 if(not @command) {
11488 # Default command = {}
11489 @command = ("\257<\257>");
11490 } elsif(($opt::pipe or $opt::pipepart)
11491 and not $opt::fifo and not $opt::cat) {
11492 # With --pipe / --pipe-part you can have no replacement
11493 last;
11494 } else {
11495 # Append {} to the command if there are no {...}'s and no {=...=}
11496 push @command, ("\257<\257>");
11500 return(\%replacecount,\%len,@command);
11503 sub get($) {
11504 my $self = shift;
11505 if(@{$self->{'unget'}}) {
11506 my $cmd_line = shift @{$self->{'unget'}};
11507 return ($cmd_line);
11508 } else {
11509 if($opt::sqlworker) {
11510 # Get the sequence number from the SQL table
11511 $self->set_seq($SQL::next_seq);
11512 # Get the command from the SQL table
11513 $self->{'command'} = $SQL::command_ref;
11514 my @command;
11515 # Recompute replace counts based on the read command
11516 ($self->{'replacecount'},
11517 $self->{'len'}, @command) =
11518 replacement_counts_and_lengths($self->{'transfer_files'},
11519 $self->{'return_files'},
11520 @$SQL::command_ref);
11521 if("@command" =~ /^[^ \t\n=]*\257</) {
11522 # Replacement string is (part of) the command (and not just
11523 # argument or variable definition V1={})
11524 # E.g. parallel {}, parallel my_{= s/_//=}, parallel {2}
11525 # Do no quote (Otherwise it will fail if the input contains spaces)
11526 $Global::quote_replace = 0;
11530 my $cmd_line = CommandLine->new($self->seq(),
11531 $self->{'command'},
11532 $self->{'arg_queue'},
11533 $self->{'context_replace'},
11534 $self->{'max_number_of_args'},
11535 $self->{'transfer_files'},
11536 $self->{'return_files'},
11537 $self->{'replacecount'},
11538 $self->{'len'},
11540 $cmd_line->populate();
11541 ::debug("run","cmd_line->number_of_args ",
11542 $cmd_line->number_of_args(), "\n");
11543 if(not $Global::no_more_input and ($opt::pipe or $opt::pipepart)) {
11544 if($cmd_line->replaced() eq "") {
11545 # Empty command - pipe requires a command
11546 ::error("--pipe/--pipepart must have a command to pipe into ".
11547 "(e.g. 'cat').");
11548 ::wait_and_exit(255);
11550 } elsif($cmd_line->number_of_args() == 0) {
11551 # We did not get more args - maybe at EOF string?
11552 return undef;
11554 $self->set_seq($self->seq()+1);
11555 return $cmd_line;
11559 sub unget($) {
11560 my $self = shift;
11561 unshift @{$self->{'unget'}}, @_;
11564 sub empty($) {
11565 my $self = shift;
11566 my $empty = (not @{$self->{'unget'}}) &&
11567 $self->{'arg_queue'}->empty();
11568 ::debug("run", "CommandLineQueue->empty $empty");
11569 return $empty;
11572 sub seq($) {
11573 my $self = shift;
11574 return $self->{'seq'};
11577 sub set_seq($$) {
11578 my $self = shift;
11579 $self->{'seq'} = shift;
11582 sub quote_args($) {
11583 my $self = shift;
11584 # If there is not command emulate |bash
11585 return $self->{'command'};
11589 package Limits::Command;
11591 # Maximal command line length (for -m and -X)
11592 sub max_length($) {
11593 # Find the max_length of a command line and cache it
11594 # Returns:
11595 # number of chars on the longest command line allowed
11596 if(not $Limits::Command::line_max_len) {
11597 # Disk cache of max command line length
11598 my $len_cache = $Global::cache_dir . "/tmp/sshlogin/" . ::hostname() .
11599 "/linelen";
11600 my $cached_limit;
11601 if(-e $len_cache) {
11602 open(my $fh, "<", $len_cache) || ::die_bug("Cannot read $len_cache");
11603 $cached_limit = <$fh>;
11604 close $fh;
11606 if(not $cached_limit) {
11607 $cached_limit = real_max_length();
11608 # If $HOME is write protected: Do not fail
11609 my $dir = ::dirname($len_cache);
11610 -d $dir or eval { File::Path::mkpath($dir); };
11611 open(my $fh, ">", $len_cache.$$);
11612 print $fh $cached_limit;
11613 close $fh;
11614 rename $len_cache.$$, $len_cache || ::die_bug("rename cache file");
11616 $Limits::Command::line_max_len = tmux_length($cached_limit);
11617 if($opt::max_chars) {
11618 if($opt::max_chars <= $cached_limit) {
11619 $Limits::Command::line_max_len = $opt::max_chars;
11620 } else {
11621 ::warning("Value for -s option should be < $cached_limit.");
11625 return int($Limits::Command::line_max_len);
11628 sub real_max_length() {
11629 # Find the max_length of a command line
11630 # Returns:
11631 # The maximal command line length with 1 byte arguments
11632 # return find_max(" x");
11633 return find_max("x");
11636 sub find_max($) {
11637 my $string = shift;
11638 # This is slow on Cygwin, so give Cygwin users a warning
11639 if($^O eq "cygwin") {
11640 ::warning("Finding the maximal command line length. This may take up to 30 seconds.")
11642 # Use an upper bound of 100 MB if the shell allows for infinite long lengths
11643 my $upper = 100_000_000;
11644 # 1000 is supported everywhere, so the search can start anywhere 1..999
11645 # 324 makes the search much faster on Cygwin, so let us use that
11646 my $len = 324;
11647 do {
11648 if($len > $upper) { return $len };
11649 $len *= 16;
11650 } while (is_acceptable_command_line_length($len,$string));
11651 # Then search for the actual max length between 0 and upper bound
11652 return binary_find_max(int($len/16),$len,$string);
11655 # Prototype forwarding
11656 sub binary_find_max($$$);
11657 sub binary_find_max($$$) {
11658 # Given a lower and upper bound find the max (length or args) of a command line
11659 # Returns:
11660 # number of chars on the longest command line allowed
11661 my ($lower, $upper, $string) = (@_);
11662 if($lower == $upper or $lower == $upper-1) { return $lower; }
11663 my $middle = int (($upper-$lower)/2 + $lower);
11664 ::debug("init", "Maxlen: $lower<$middle<$upper: ");
11665 if (is_acceptable_command_line_length($middle,$string)) {
11666 return binary_find_max($middle,$upper,$string);
11667 } else {
11668 return binary_find_max($lower,$middle,$string);
11672 sub is_acceptable_command_line_length($$) {
11673 # Test if a command line of this length can run
11674 # in the current environment
11675 # If the string is " x" it tests how many args are allowed
11676 # Returns:
11677 # 0 if the command line length is too long
11678 # 1 otherwise
11679 my $len = shift;
11680 my $string = shift;
11681 if($Global::parallel_env) {
11682 $len += length $Global::parallel_env;
11684 # Force using non-built-in command
11685 ::qqx("/bin/echo ".${string}x(($len-length "/bin/echo ")/length $string));
11686 ::debug("init", "$len=$? ");
11687 return not $?;
11690 sub tmux_length($) {
11691 # If $opt::tmux set, find the limit for tmux
11692 # tmux 1.8 has a 2kB limit
11693 # tmux 1.9 has a 16kB limit
11694 # tmux 2.0 has a 16kB limit
11695 # tmux 2.1 has a 16kB limit
11696 # tmux 2.2 has a 16kB limit
11697 # Input:
11698 # $len = maximal command line length
11699 # Returns:
11700 # $tmux_len = maximal length runable in tmux
11701 local $/ = "\n";
11702 my $len = shift;
11703 if($opt::tmux) {
11704 $ENV{'PARALLEL_TMUX'} ||= "tmux";
11705 if(not ::which($ENV{'PARALLEL_TMUX'})) {
11706 ::error($ENV{'PARALLEL_TMUX'}." not found in \$PATH.");
11707 ::wait_and_exit(255);
11709 my @out;
11710 for my $l (1, 2020, 16320, 100000, $len) {
11711 my $tmpfile = ::tmpname("tms");
11712 my $tmuxcmd = $ENV{'PARALLEL_TMUX'}.
11713 " -S $tmpfile new-session -d -n echo $l".
11714 ("x"x$l). " && echo $l; rm -f $tmpfile";
11715 push @out, ::qqx($tmuxcmd);
11716 ::rm($tmpfile);
11718 ::debug("tmux","tmux-out ",@out);
11719 chomp @out;
11720 # The arguments is given 3 times on the command line
11721 # and the wrapping is around 30 chars
11722 # (29 for tmux1.9, 33 for tmux1.8)
11723 my $tmux_len = ::max(@out);
11724 $len = ::min($len,int($tmux_len/4-33));
11725 ::debug("tmux","tmux-length ",$len);
11727 return $len;
11731 package RecordQueue;
11733 sub new($) {
11734 my $class = shift;
11735 my $fhs = shift;
11736 my $colsep = shift;
11737 my @unget = ();
11738 my $arg_sub_queue;
11739 if($opt::sqlworker) {
11740 # Open SQL table
11741 $arg_sub_queue = SQLRecordQueue->new();
11742 } elsif(defined $colsep) {
11743 # Open one file with colsep or CSV
11744 $arg_sub_queue = RecordColQueue->new($fhs);
11745 } else {
11746 # Open one or more files if multiple -a
11747 $arg_sub_queue = MultifileQueue->new($fhs);
11749 return bless {
11750 'unget' => \@unget,
11751 'arg_number' => 0,
11752 'arg_sub_queue' => $arg_sub_queue,
11753 }, ref($class) || $class;
11756 sub get($) {
11757 # Returns:
11758 # reference to array of Arg-objects
11759 my $self = shift;
11760 if(@{$self->{'unget'}}) {
11761 $self->{'arg_number'}++;
11762 # Flush cached computed replacements in Arg-objects
11763 # To fix: parallel --bar echo {%} ::: a b c ::: d e f
11764 my $ret = shift @{$self->{'unget'}};
11765 if($ret) {
11766 map { $_->flush_cache() } @$ret;
11768 return $ret;
11770 my $ret = $self->{'arg_sub_queue'}->get();
11771 if($ret) {
11772 if(grep { index($_->orig(),"\0") > 0 } @$ret) {
11773 # Allow for \0 in position 0 because GNU Parallel uses "\0noarg"
11774 # to mean no-string
11775 ::warning("A NUL character in the input was replaced with \\0.",
11776 "NUL cannot be passed through in the argument list.",
11777 "Did you mean to use the --null option?");
11778 for(grep { index($_->orig(),"\0") > 0 } @$ret) {
11779 # Replace \0 with \\0
11780 my $a = $_->orig();
11781 $a =~ s/\0/\\0/g;
11782 $_->set_orig($a);
11785 if(defined $Global::max_number_of_args
11786 and $Global::max_number_of_args == 0) {
11787 ::debug("run", "Read 1 but return 0 args\n");
11788 # \0noarg => nothing (not the empty string)
11789 map { $_->set_orig("\0noarg"); } @$ret;
11791 # Flush cached computed replacements in Arg-objects
11792 # To fix: parallel --bar echo {%} ::: a b c ::: d e f
11793 map { $_->flush_cache() } @$ret;
11795 return $ret;
11798 sub unget($) {
11799 my $self = shift;
11800 ::debug("run", "RecordQueue-unget\n");
11801 $self->{'arg_number'} -= @_;
11802 unshift @{$self->{'unget'}}, @_;
11805 sub empty($) {
11806 my $self = shift;
11807 my $empty = (not @{$self->{'unget'}}) &&
11808 $self->{'arg_sub_queue'}->empty();
11809 ::debug("run", "RecordQueue->empty $empty");
11810 return $empty;
11813 sub arg_number($) {
11814 my $self = shift;
11815 return $self->{'arg_number'};
11819 package RecordColQueue;
11821 sub new($) {
11822 my $class = shift;
11823 my $fhs = shift;
11824 my @unget = ();
11825 my $arg_sub_queue = MultifileQueue->new($fhs);
11826 return bless {
11827 'unget' => \@unget,
11828 'arg_sub_queue' => $arg_sub_queue,
11829 }, ref($class) || $class;
11832 sub get($) {
11833 # Returns:
11834 # reference to array of Arg-objects
11835 my $self = shift;
11836 if(@{$self->{'unget'}}) {
11837 return shift @{$self->{'unget'}};
11839 my $unget_ref = $self->{'unget'};
11840 if($self->{'arg_sub_queue'}->empty()) {
11841 return undef;
11843 my $in_record = $self->{'arg_sub_queue'}->get();
11844 if(defined $in_record) {
11845 my @out_record = ();
11846 for my $arg (@$in_record) {
11847 ::debug("run", "RecordColQueue::arg $arg\n");
11848 my $line = $arg->orig();
11849 ::debug("run", "line='$line'\n");
11850 if($line ne "") {
11851 if($opt::csv) {
11852 # Parse CSV
11853 chomp $line;
11854 if(not $Global::csv->parse($line)) {
11855 die "CSV has unexpected format: ^$line^";
11857 for($Global::csv->fields()) {
11858 push @out_record, Arg->new($_);
11860 } else {
11861 for my $s (split /$opt::colsep/o, $line, -1) {
11862 push @out_record, Arg->new($s);
11865 } else {
11866 push @out_record, Arg->new("");
11869 return \@out_record;
11870 } else {
11871 return undef;
11875 sub unget($) {
11876 my $self = shift;
11877 ::debug("run", "RecordColQueue-unget '@_'\n");
11878 unshift @{$self->{'unget'}}, @_;
11881 sub empty($) {
11882 my $self = shift;
11883 my $empty = (not @{$self->{'unget'}}) &&
11884 $self->{'arg_sub_queue'}->empty();
11885 ::debug("run", "RecordColQueue->empty $empty");
11886 return $empty;
11890 package SQLRecordQueue;
11892 sub new($) {
11893 my $class = shift;
11894 my @unget = ();
11895 return bless {
11896 'unget' => \@unget,
11897 }, ref($class) || $class;
11900 sub get($) {
11901 # Returns:
11902 # reference to array of Arg-objects
11903 my $self = shift;
11904 if(@{$self->{'unget'}}) {
11905 return shift @{$self->{'unget'}};
11907 return $Global::sql->get_record();
11910 sub unget($) {
11911 my $self = shift;
11912 ::debug("run", "SQLRecordQueue-unget '@_'\n");
11913 unshift @{$self->{'unget'}}, @_;
11916 sub empty($) {
11917 my $self = shift;
11918 if(@{$self->{'unget'}}) { return 0; }
11919 my $get = $self->get();
11920 if(defined $get) {
11921 $self->unget($get);
11923 my $empty = not $get;
11924 ::debug("run", "SQLRecordQueue->empty $empty");
11925 return $empty;
11929 package MultifileQueue;
11931 @Global::unget_argv=();
11933 sub new($$) {
11934 my $class = shift;
11935 my $fhs = shift;
11936 for my $fh (@$fhs) {
11937 if(-t $fh and -t ($Global::status_fd || *STDERR)) {
11938 ::warning("Input is read from the terminal. You are either an expert",
11939 "(in which case: YOU ARE AWESOME!) or maybe you forgot",
11940 "::: or :::: or -a or to pipe data into parallel. If so",
11941 "consider going through the tutorial: man parallel_tutorial",
11942 "Press CTRL-D to exit.");
11945 return bless {
11946 'unget' => \@Global::unget_argv,
11947 'fhs' => $fhs,
11948 'arg_matrix' => undef,
11949 }, ref($class) || $class;
11952 sub get($) {
11953 my $self = shift;
11954 if($opt::link) {
11955 return $self->link_get();
11956 } else {
11957 return $self->nest_get();
11961 sub unget($) {
11962 my $self = shift;
11963 ::debug("run", "MultifileQueue-unget '@_'\n");
11964 unshift @{$self->{'unget'}}, @_;
11967 sub empty($) {
11968 my $self = shift;
11969 my $empty = (not @Global::unget_argv) &&
11970 not @{$self->{'unget'}};
11971 for my $fh (@{$self->{'fhs'}}) {
11972 $empty &&= eof($fh);
11974 ::debug("run", "MultifileQueue->empty $empty ");
11975 return $empty;
11978 sub link_get($) {
11979 my $self = shift;
11980 if(@{$self->{'unget'}}) {
11981 return shift @{$self->{'unget'}};
11983 my @record = ();
11984 my $prepend;
11985 my $empty = 1;
11986 for my $fh (@{$self->{'fhs'}}) {
11987 my $arg = read_arg_from_fh($fh);
11988 if(defined $arg) {
11989 # Record $arg for recycling at end of file
11990 push @{$self->{'arg_matrix'}{$fh}}, $arg;
11991 push @record, $arg;
11992 $empty = 0;
11993 } else {
11994 ::debug("run", "EOA ");
11995 # End of file: Recycle arguments
11996 push @{$self->{'arg_matrix'}{$fh}}, shift @{$self->{'arg_matrix'}{$fh}};
11997 # return last @{$args->{'args'}{$fh}};
11998 push @record, @{$self->{'arg_matrix'}{$fh}}[-1];
12001 if($empty) {
12002 return undef;
12003 } else {
12004 return \@record;
12008 sub nest_get($) {
12009 my $self = shift;
12010 if(@{$self->{'unget'}}) {
12011 return shift @{$self->{'unget'}};
12013 my @record = ();
12014 my $prepend;
12015 my $empty = 1;
12016 my $no_of_inputsources = $#{$self->{'fhs'}} + 1;
12017 if(not $self->{'arg_matrix'}) {
12018 # Initialize @arg_matrix with one arg from each file
12019 # read one line from each file
12020 my @first_arg_set;
12021 my $all_empty = 1;
12022 for (my $fhno = 0; $fhno < $no_of_inputsources ; $fhno++) {
12023 my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]);
12024 if(defined $arg) {
12025 $all_empty = 0;
12027 $self->{'arg_matrix'}[$fhno][0] = $arg || Arg->new("");
12028 push @first_arg_set, $self->{'arg_matrix'}[$fhno][0];
12030 if($all_empty) {
12031 # All filehandles were at eof or eof-string
12032 return undef;
12034 return [@first_arg_set];
12037 # Treat the case with one input source special. For multiple
12038 # input sources we need to remember all previously read values to
12039 # generate all combinations. But for one input source we can
12040 # forget the value after first use.
12041 if($no_of_inputsources == 1) {
12042 my $arg = read_arg_from_fh($self->{'fhs'}[0]);
12043 if(defined($arg)) {
12044 return [$arg];
12046 return undef;
12048 for (my $fhno = $no_of_inputsources - 1; $fhno >= 0; $fhno--) {
12049 if(eof($self->{'fhs'}[$fhno])) {
12050 next;
12051 } else {
12052 # read one
12053 my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]);
12054 defined($arg) || next; # If we just read an EOF string: Treat this as EOF
12055 my $len = $#{$self->{'arg_matrix'}[$fhno]} + 1;
12056 $self->{'arg_matrix'}[$fhno][$len] = $arg;
12057 # make all new combinations
12058 my @combarg = ();
12059 for (my $fhn = 0; $fhn < $no_of_inputsources; $fhn++) {
12060 push(@combarg, [0, $#{$self->{'arg_matrix'}[$fhn]}],
12061 # Is input source --link'ed to the next?
12062 $opt::linkinputsource[$fhn+1]);
12064 # Find only combinations with this new entry
12065 $combarg[2*$fhno] = [$len,$len];
12066 # map combinations
12067 # [ 1, 3, 7 ], [ 2, 4, 1 ]
12068 # =>
12069 # [ m[0][1], m[1][3], m[2][7] ], [ m[0][2], m[1][4], m[2][1] ]
12070 my @mapped;
12071 for my $c (expand_combinations(@combarg)) {
12072 my @a;
12073 for my $n (0 .. $no_of_inputsources - 1 ) {
12074 push @a, $self->{'arg_matrix'}[$n][$$c[$n]];
12076 push @mapped, \@a;
12078 # append the mapped to the ungotten arguments
12079 push @{$self->{'unget'}}, @mapped;
12080 # get the first
12081 if(@mapped) {
12082 return shift @{$self->{'unget'}};
12086 # all are eof or at EOF string; return from the unget queue
12087 return shift @{$self->{'unget'}};
12090 sub read_arg_from_fh($) {
12091 # Read one Arg from filehandle
12092 # Returns:
12093 # Arg-object with one read line
12094 # undef if end of file
12095 my $fh = shift;
12096 my $prepend;
12097 my $arg;
12098 my $half_record = 0;
12099 do {{
12100 # This makes 10% faster
12101 if(not defined ($arg = <$fh>)) {
12102 if(defined $prepend) {
12103 return Arg->new($prepend);
12104 } else {
12105 return undef;
12108 if($opt::csv) {
12109 # We need to read a full CSV line.
12110 if(($arg =~ y/"/"/) % 2 ) {
12111 # The number of " on the line is uneven:
12112 # If we were in a half_record => we have a full record now
12113 # If we were ouside a half_record => we are in a half record now
12114 $half_record = not $half_record;
12116 if($half_record) {
12117 # CSV half-record with quoting:
12118 # col1,"col2 2""x3"" board newline <-this one
12119 # cont",col3
12120 $prepend .= $arg;
12121 redo;
12122 } else {
12123 # Now we have a full CSV record
12126 # Remove delimiter
12127 chomp $arg;
12128 if($Global::end_of_file_string and
12129 $arg eq $Global::end_of_file_string) {
12130 # Ignore the rest of input file
12131 close $fh;
12132 ::debug("run", "EOF-string ($arg) met\n");
12133 if(defined $prepend) {
12134 return Arg->new($prepend);
12135 } else {
12136 return undef;
12139 if(defined $prepend) {
12140 $arg = $prepend.$arg; # For line continuation
12141 undef $prepend;
12143 if($Global::ignore_empty) {
12144 if($arg =~ /^\s*$/) {
12145 redo; # Try the next line
12148 if($Global::max_lines) {
12149 if($arg =~ /\s$/) {
12150 # Trailing space => continued on next line
12151 $prepend = $arg;
12152 redo;
12155 }} while (1 == 0); # Dummy loop {{}} for redo
12156 if(defined $arg) {
12157 return Arg->new($arg);
12158 } else {
12159 ::die_bug("multiread arg undefined");
12163 # Prototype forwarding
12164 sub expand_combinations(@);
12165 sub expand_combinations(@) {
12166 # Input:
12167 # ([xmin,xmax], [ymin,ymax], ...)
12168 # Returns: ([x,y,...],[x,y,...])
12169 # where xmin <= x <= xmax and ymin <= y <= ymax
12170 my $minmax_ref = shift;
12171 my $link = shift; # This is linked to the next input source
12172 my $xmin = $$minmax_ref[0];
12173 my $xmax = $$minmax_ref[1];
12174 my @p;
12175 if(@_) {
12176 my @rest = expand_combinations(@_);
12177 if($link) {
12178 # Linked to next col with --link/:::+/::::+
12179 # TODO BUG does not wrap values if not same number of vals
12180 push(@p, map { [$$_[0], @$_] }
12181 grep { $xmin <= $$_[0] and $$_[0] <= $xmax } @rest);
12182 } else {
12183 # If there are more columns: Compute those recursively
12184 for(my $x = $xmin; $x <= $xmax; $x++) {
12185 push @p, map { [$x, @$_] } @rest;
12188 } else {
12189 for(my $x = $xmin; $x <= $xmax; $x++) {
12190 push @p, [$x];
12193 return @p;
12197 package Arg;
12199 sub new($) {
12200 my $class = shift;
12201 my $orig = shift;
12202 my @hostgroups;
12203 if($opt::hostgroups) {
12204 if($orig =~ s:@(.+)::) {
12205 # We found hostgroups on the arg
12206 @hostgroups = split(/\+/, $1);
12207 if(not grep { defined $Global::hostgroups{$_} } @hostgroups) {
12208 # This hostgroup is not defined using -S
12209 # Add it
12210 ::warning("Adding hostgroups: @hostgroups");
12211 # Add sshlogin
12212 for(grep { not defined $Global::hostgroups{$_} } @hostgroups) {
12213 my $sshlogin = SSHLogin->new($_);
12214 my $sshlogin_string = $sshlogin->string();
12215 $Global::host{$sshlogin_string} = $sshlogin;
12216 $Global::hostgroups{$sshlogin_string} = 1;
12219 } else {
12220 # No hostgroup on the arg => any hostgroup
12221 @hostgroups = (keys %Global::hostgroups);
12224 return bless {
12225 'orig' => $orig,
12226 'hostgroups' => \@hostgroups,
12227 }, ref($class) || $class;
12230 sub Q($) {
12231 # Q alias for ::shell_quote_scalar
12232 my $ret = ::Q($_[0]);
12233 no warnings 'redefine';
12234 *Q = \&::Q;
12235 return $ret;
12238 sub pQ($) {
12239 # pQ alias for ::perl_quote_scalar
12240 my $ret = ::pQ($_[0]);
12241 no warnings 'redefine';
12242 *pQ = \&::pQ;
12243 return $ret;
12246 sub total_jobs() {
12247 return $Global::JobQueue->total_jobs();
12251 my %perleval;
12252 my $job;
12253 sub skip() {
12254 # shorthand for $job->skip();
12255 $job->skip();
12257 sub slot() {
12258 # shorthand for $job->slot();
12259 $job->slot();
12261 sub seq() {
12262 # shorthand for $job->seq();
12263 $job->seq();
12265 sub uq() {
12266 # Do not quote this arg
12267 $Global::unquote_arg = 1;
12270 sub replace($$$$) {
12271 # Calculates the corresponding value for a given perl expression
12272 # Returns:
12273 # The calculated string (quoted if asked for)
12274 my $self = shift;
12275 my $perlexpr = shift; # E.g. $_=$_ or s/.gz//
12276 my $quote = shift; # should the string be quoted?
12277 # This is actually a CommandLine-object,
12278 # but it looks nice to be able to say {= $job->slot() =}
12279 $job = shift;
12280 $perlexpr =~ s/^(-?\d+)? *//; # Positional replace treated as normal replace
12281 if(not $Global::cache_replacement_eval
12283 not $self->{'cache'}{$perlexpr}) {
12284 # Only compute the value once
12285 # Use $_ as the variable to change
12286 local $_;
12287 if($Global::trim eq "n") {
12288 $_ = $self->{'orig'};
12289 } else {
12290 # Trim the input
12291 $_ = trim_of($self->{'orig'});
12293 ::debug("replace", "eval ", $perlexpr, " ", $_, "\n");
12294 if(not $perleval{$perlexpr}) {
12295 # Make an anonymous function of the $perlexpr
12296 # And more importantly: Compile it only once
12297 if($perleval{$perlexpr} =
12298 eval('sub { no strict; no warnings; my $job = shift; '.
12299 $perlexpr.' }')) {
12300 # All is good
12301 } else {
12302 # The eval failed. Maybe $perlexpr is invalid perl?
12303 ::error("Cannot use $perlexpr: $@");
12304 ::wait_and_exit(255);
12307 # Execute the function
12308 $perleval{$perlexpr}->($job);
12309 $self->{'cache'}{$perlexpr} = $_;
12310 if($Global::unquote_arg) {
12311 # uq() was called in perlexpr
12312 $self->{'cache'}{'unquote'}{$perlexpr} = 1;
12313 # Reset for next perlexpr
12314 $Global::unquote_arg = 0;
12317 # Return the value quoted if needed
12318 if($self->{'cache'}{'unquote'}{$perlexpr}) {
12319 return($self->{'cache'}{$perlexpr});
12320 } else {
12321 return($quote ? Q($self->{'cache'}{$perlexpr})
12322 : $self->{'cache'}{$perlexpr});
12327 sub flush_cache($) {
12328 # Flush cache of computed values
12329 my $self = shift;
12330 $self->{'cache'} = undef;
12333 sub orig($) {
12334 my $self = shift;
12335 return $self->{'orig'};
12338 sub set_orig($$) {
12339 my $self = shift;
12340 $self->{'orig'} = shift;
12343 sub trim_of($) {
12344 # Removes white space as specifed by --trim:
12345 # n = nothing
12346 # l = start
12347 # r = end
12348 # lr|rl = both
12349 # Returns:
12350 # string with white space removed as needed
12351 my @strings = map { defined $_ ? $_ : "" } (@_);
12352 my $arg;
12353 if($Global::trim eq "n") {
12354 # skip
12355 } elsif($Global::trim eq "l") {
12356 for my $arg (@strings) { $arg =~ s/^\s+//; }
12357 } elsif($Global::trim eq "r") {
12358 for my $arg (@strings) { $arg =~ s/\s+$//; }
12359 } elsif($Global::trim eq "rl" or $Global::trim eq "lr") {
12360 for my $arg (@strings) { $arg =~ s/^\s+//; $arg =~ s/\s+$//; }
12361 } else {
12362 ::error("--trim must be one of: r l rl lr.");
12363 ::wait_and_exit(255);
12365 return wantarray ? @strings : "@strings";
12369 package TimeoutQueue;
12371 sub new($) {
12372 my $class = shift;
12373 my $delta_time = shift;
12374 my ($pct);
12375 if($delta_time =~ /(\d+(\.\d+)?)%/) {
12376 # Timeout in percent
12377 $pct = $1/100;
12378 $delta_time = 1_000_000;
12380 $delta_time = ::multiply_time_units($delta_time);
12382 return bless {
12383 'queue' => [],
12384 'delta_time' => $delta_time,
12385 'pct' => $pct,
12386 'remedian_idx' => 0,
12387 'remedian_arr' => [],
12388 'remedian' => undef,
12389 }, ref($class) || $class;
12392 sub delta_time($) {
12393 my $self = shift;
12394 return $self->{'delta_time'};
12397 sub set_delta_time($$) {
12398 my $self = shift;
12399 $self->{'delta_time'} = shift;
12402 sub remedian($) {
12403 my $self = shift;
12404 return $self->{'remedian'};
12407 sub set_remedian($$) {
12408 # Set median of the last 999^3 (=997002999) values using Remedian
12410 # Rousseeuw, Peter J., and Gilbert W. Bassett Jr. "The remedian: A
12411 # robust averaging method for large data sets." Journal of the
12412 # American Statistical Association 85.409 (1990): 97-104.
12413 my $self = shift;
12414 my $val = shift;
12415 my $i = $self->{'remedian_idx'}++;
12416 my $rref = $self->{'remedian_arr'};
12417 $rref->[0][$i%999] = $val;
12418 $rref->[1][$i/999%999] = (sort @{$rref->[0]})[$#{$rref->[0]}/2];
12419 $rref->[2][$i/999/999%999] = (sort @{$rref->[1]})[$#{$rref->[1]}/2];
12420 $self->{'remedian'} = (sort @{$rref->[2]})[$#{$rref->[2]}/2];
12423 sub update_median_runtime($) {
12424 # Update delta_time based on runtime of finished job if timeout is
12425 # a percentage
12426 my $self = shift;
12427 my $runtime = shift;
12428 if($self->{'pct'}) {
12429 $self->set_remedian($runtime);
12430 $self->{'delta_time'} = $self->{'pct'} * $self->remedian();
12431 ::debug("run", "Timeout: $self->{'delta_time'}s ");
12435 sub process_timeouts($) {
12436 # Check if there was a timeout
12437 my $self = shift;
12438 # $self->{'queue'} is sorted by start time
12439 while (@{$self->{'queue'}}) {
12440 my $job = $self->{'queue'}[0];
12441 if($job->endtime()) {
12442 # Job already finished. No need to timeout the job
12443 # This could be because of --keep-order
12444 shift @{$self->{'queue'}};
12445 } elsif($job->is_timedout($self->{'delta_time'})) {
12446 # Need to shift off queue before kill
12447 # because kill calls usleep that calls process_timeouts
12448 shift @{$self->{'queue'}};
12449 ::warning("This job was killed because it timed out:",
12450 $job->replaced());
12451 $job->kill();
12452 } else {
12453 # Because they are sorted by start time the rest are later
12454 last;
12459 sub insert($) {
12460 my $self = shift;
12461 my $in = shift;
12462 push @{$self->{'queue'}}, $in;
12466 package SQL;
12468 sub new($) {
12469 my $class = shift;
12470 my $dburl = shift;
12471 $Global::use{"DBI"} ||= eval "use DBI; 1;";
12472 # +DBURL = append to this DBURL
12473 my $append = $dburl=~s/^\+//;
12474 my %options = parse_dburl(get_alias($dburl));
12475 my %driveralias = ("sqlite" => "SQLite",
12476 "sqlite3" => "SQLite",
12477 "pg" => "Pg",
12478 "postgres" => "Pg",
12479 "postgresql" => "Pg",
12480 "csv" => "CSV",
12481 "oracle" => "Oracle",
12482 "ora" => "Oracle");
12483 my $driver = $driveralias{$options{'databasedriver'}} ||
12484 $options{'databasedriver'};
12485 my $database = $options{'database'};
12486 my $host = $options{'host'} ? ";host=".$options{'host'} : "";
12487 my $port = $options{'port'} ? ";port=".$options{'port'} : "";
12488 my $dsn = "DBI:$driver:dbname=$database$host$port";
12489 my $userid = $options{'user'};
12490 my $password = $options{'password'};;
12491 if(not grep /$driver/, DBI->available_drivers) {
12492 ::error("$driver not supported. Are you missing a perl DBD::$driver module?");
12493 ::wait_and_exit(255);
12495 my $dbh;
12496 if($driver eq "CSV") {
12497 # CSV does not use normal dsn
12498 if(-d $database) {
12499 $dbh = DBI->connect("dbi:CSV:", "", "", { f_dir => "$database", })
12500 or die $DBI::errstr;
12501 } else {
12502 ::error("$database is not a directory.");
12503 ::wait_and_exit(255);
12505 } else {
12506 $dbh = DBI->connect($dsn, $userid, $password,
12507 { RaiseError => 1, AutoInactiveDestroy => 1 })
12508 or die $DBI::errstr;
12510 $dbh->{'PrintWarn'} = $Global::debug || 0;
12511 $dbh->{'PrintError'} = $Global::debug || 0;
12512 $dbh->{'RaiseError'} = 1;
12513 $dbh->{'ShowErrorStatement'} = 1;
12514 $dbh->{'HandleError'} = sub {};
12515 if(not defined $options{'table'}) {
12516 ::error("The DBURL ($dburl) must contain a table.");
12517 ::wait_and_exit(255);
12520 return bless {
12521 'dbh' => $dbh,
12522 'driver' => $driver,
12523 'max_number_of_args' => undef,
12524 'table' => $options{'table'},
12525 'append' => $append,
12526 }, ref($class) || $class;
12529 # Prototype forwarding
12530 sub get_alias($);
12531 sub get_alias($) {
12532 my $alias = shift;
12533 $alias =~ s/^(sql:)*//; # Accept aliases prepended with sql:
12534 if ($alias !~ /^:/) {
12535 return $alias;
12538 # Find the alias
12539 my $path;
12540 if (-l $0) {
12541 ($path) = readlink($0) =~ m|^(.*)/|;
12542 } else {
12543 ($path) = $0 =~ m|^(.*)/|;
12546 my @deprecated = ("$ENV{HOME}/.dburl.aliases",
12547 "$path/dburl.aliases", "$path/dburl.aliases.dist");
12548 for (@deprecated) {
12549 if(-r $_) {
12550 ::warning("$_ is deprecated. ".
12551 "Use .sql/aliases instead (read man sql).");
12554 my @urlalias=();
12555 check_permissions("$ENV{HOME}/.sql/aliases");
12556 check_permissions("$ENV{HOME}/.dburl.aliases");
12557 my @search = ("$ENV{HOME}/.sql/aliases",
12558 "$ENV{HOME}/.dburl.aliases", "/etc/sql/aliases",
12559 "$path/dburl.aliases", "$path/dburl.aliases.dist");
12560 for my $alias_file (@search) {
12561 # local $/ needed if -0 set
12562 local $/ = "\n";
12563 if(-r $alias_file) {
12564 open(my $in, "<", $alias_file) || die;
12565 push @urlalias, <$in>;
12566 close $in;
12569 my ($alias_part,$rest) = $alias=~/(:\w*)(.*)/;
12570 # If we saw this before: we have an alias loop
12571 if(grep {$_ eq $alias_part } @Private::seen_aliases) {
12572 ::error("$alias_part is a cyclic alias.");
12573 exit -1;
12574 } else {
12575 push @Private::seen_aliases, $alias_part;
12578 my $dburl;
12579 for (@urlalias) {
12580 /^$alias_part\s+(\S+.*)/ and do { $dburl = $1; last; }
12583 if($dburl) {
12584 return get_alias($dburl.$rest);
12585 } else {
12586 ::error("$alias is not defined in @search");
12587 exit(-1);
12591 sub check_permissions($) {
12592 my $file = shift;
12594 if(-e $file) {
12595 if(not -o $file) {
12596 my $username = (getpwuid($<))[0];
12597 ::warning("$file should be owned by $username: ".
12598 "chown $username $file");
12600 my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
12601 $atime,$mtime,$ctime,$blksize,$blocks) = stat($file);
12602 if($mode & 077) {
12603 my $username = (getpwuid($<))[0];
12604 ::warning("$file should be only be readable by $username: ".
12605 "chmod 600 $file");
12610 sub parse_dburl($) {
12611 my $url = shift;
12612 my %options = ();
12613 # sql:mysql://[[user][:password]@][host][:port]/[database[/table][?query]]
12615 if($url=~m!^(?:sql:)? # You can prefix with 'sql:'
12616 ((?:oracle|ora|mysql|pg|postgres|postgresql)(?:s|ssl|)|
12617 (?:sqlite|sqlite2|sqlite3|csv)):// # Databasedriver ($1)
12619 ([^:@/][^:@]*|) # Username ($2)
12621 :([^@]*) # Password ($3)
12624 ([^:/]*)? # Hostname ($4)
12627 ([^/]*)? # Port ($5)
12631 ([^/?]*)? # Database ($6)
12635 ([^?]*)? # Table ($7)
12639 (.*)? # Query ($8)
12641 $!ix) {
12642 $options{databasedriver} = ::undef_if_empty(lc(uri_unescape($1)));
12643 $options{user} = ::undef_if_empty(uri_unescape($2));
12644 $options{password} = ::undef_if_empty(uri_unescape($3));
12645 $options{host} = ::undef_if_empty(uri_unescape($4));
12646 $options{port} = ::undef_if_empty(uri_unescape($5));
12647 $options{database} = ::undef_if_empty(uri_unescape($6));
12648 $options{table} = ::undef_if_empty(uri_unescape($7));
12649 $options{query} = ::undef_if_empty(uri_unescape($8));
12650 ::debug("sql", "dburl $url\n");
12651 ::debug("sql", "databasedriver ", $options{databasedriver},
12652 " user ", $options{user},
12653 " password ", $options{password}, " host ", $options{host},
12654 " port ", $options{port}, " database ", $options{database},
12655 " table ", $options{table}, " query ", $options{query}, "\n");
12656 } else {
12657 ::error("$url is not a valid DBURL");
12658 exit 255;
12660 return %options;
12663 sub uri_unescape($) {
12664 # Copied from http://cpansearch.perl.org/src/GAAS/URI-1.55/URI/Escape.pm
12665 # to avoid depending on URI::Escape
12666 # This section is (C) Gisle Aas.
12667 # Note from RFC1630: "Sequences which start with a percent sign
12668 # but are not followed by two hexadecimal characters are reserved
12669 # for future extension"
12670 my $str = shift;
12671 if (@_ && wantarray) {
12672 # not executed for the common case of a single argument
12673 my @str = ($str, @_); # need to copy
12674 foreach (@str) {
12675 s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
12677 return @str;
12679 $str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str;
12680 $str;
12683 sub run($) {
12684 my $self = shift;
12685 my $stmt = shift;
12686 if($self->{'driver'} eq "CSV") {
12687 $stmt=~ s/;$//;
12688 if($stmt eq "BEGIN" or
12689 $stmt eq "COMMIT") {
12690 return undef;
12693 my @retval;
12694 my $dbh = $self->{'dbh'};
12695 ::debug("sql","$opt::sqlmaster$opt::sqlworker run $stmt\n");
12696 # Execute with the rest of the args - if any
12697 my $rv;
12698 my $sth;
12699 my $lockretry = 0;
12700 while($lockretry < 10) {
12701 $sth = $dbh->prepare($stmt);
12702 if($sth
12704 eval { $rv = $sth->execute(@_) }) {
12705 last;
12706 } else {
12707 if($@ =~ /no such table|Table .* doesn.t exist|relation ".*" does not exist/
12709 $DBI::errstr =~ /no such table|Table .* doesn.t exist|relation ".*" does not exist/) {
12710 # This is fine:
12711 # It is just a worker that reported back too late -
12712 # another worker had finished the job first
12713 # and the table was then dropped
12714 $rv = $sth = 0;
12715 last;
12717 if($DBI::errstr =~ /locked/) {
12718 ::debug("sql", "Lock retry: $lockretry");
12719 $lockretry++;
12720 ::usleep(rand()*300);
12721 } elsif(not $sth) {
12722 # Try again
12723 $lockretry++;
12724 } else {
12725 ::error($DBI::errstr);
12726 ::wait_and_exit(255);
12730 if($lockretry >= 10) {
12731 ::die_bug("retry > 10: $DBI::errstr");
12733 if($rv < 0 and $DBI::errstr){
12734 ::error($DBI::errstr);
12735 ::wait_and_exit(255);
12737 return $sth;
12740 sub get($) {
12741 my $self = shift;
12742 my $sth = $self->run(@_);
12743 my @retval;
12744 # If $sth = 0 it means the table was dropped by another process
12745 while($sth) {
12746 my @row = $sth->fetchrow_array();
12747 @row or last;
12748 push @retval, \@row;
12750 return \@retval;
12753 sub table($) {
12754 my $self = shift;
12755 return $self->{'table'};
12758 sub append($) {
12759 my $self = shift;
12760 return $self->{'append'};
12763 sub update($) {
12764 my $self = shift;
12765 my $stmt = shift;
12766 my $table = $self->table();
12767 $self->run("UPDATE $table $stmt",@_);
12770 sub output($) {
12771 my $self = shift;
12772 my $commandline = shift;
12774 $self->update("SET Stdout = ?, Stderr = ? WHERE Seq = ".
12775 $commandline->seq(),
12776 join("",@{$commandline->{'output'}{1}}),
12777 join("",@{$commandline->{'output'}{2}}));
12780 sub max_number_of_args($) {
12781 # Maximal number of args for this table
12782 my $self = shift;
12783 if(not $self->{'max_number_of_args'}) {
12784 # Read the number of args from the SQL table
12785 my $table = $self->table();
12786 my $v = $self->get("SELECT * FROM $table LIMIT 1;");
12787 my @reserved_columns = qw(Seq Host Starttime JobRuntime Send
12788 Receive Exitval _Signal Command Stdout Stderr);
12789 if(not $v) {
12790 ::error("$table contains no records");
12792 # Count the number of Vx columns
12793 $self->{'max_number_of_args'} = $#{$v->[0]} - $#reserved_columns;
12795 return $self->{'max_number_of_args'};
12798 sub set_max_number_of_args($$) {
12799 my $self = shift;
12800 $self->{'max_number_of_args'} = shift;
12803 sub create_table($) {
12804 my $self = shift;
12805 if($self->append()) { return; }
12806 my $max_number_of_args = shift;
12807 $self->set_max_number_of_args($max_number_of_args);
12808 my $table = $self->table();
12809 $self->run(qq(DROP TABLE IF EXISTS $table;));
12810 # BIGINT and TEXT are not supported in these databases or are too small
12811 my %vartype = (
12812 "Oracle" => { "BIGINT" => "NUMBER(19,0)",
12813 "TEXT" => "CLOB", },
12814 "mysql" => { "TEXT" => "BLOB", },
12815 "CSV" => { "BIGINT" => "INT",
12816 "FLOAT" => "REAL", },
12818 my $BIGINT = $vartype{$self->{'driver'}}{"BIGINT"} || "BIGINT";
12819 my $TEXT = $vartype{$self->{'driver'}}{"TEXT"} || "TEXT";
12820 my $FLOAT = $vartype{$self->{'driver'}}{"FLOAT"} || "FLOAT(44)";
12821 my $v_def = join "", map { "V$_ $TEXT," } (1..$self->max_number_of_args());
12822 $self->run(qq{CREATE TABLE $table
12823 (Seq $BIGINT,
12824 Host $TEXT,
12825 Starttime $FLOAT,
12826 JobRuntime $FLOAT,
12827 Send $BIGINT,
12828 Receive $BIGINT,
12829 Exitval $BIGINT,
12830 _Signal $BIGINT,
12831 Command $TEXT,}.
12832 $v_def.
12833 qq{Stdout $TEXT,
12834 Stderr $TEXT);});
12837 sub insert_records($) {
12838 my $self = shift;
12839 my $seq = shift;
12840 my $command_ref = shift;
12841 my $record_ref = shift;
12842 my $table = $self->table();
12843 # For SQL encode the command with \257 space as split points
12844 my $command = join("\257 ",@$command_ref);
12845 my @v_cols = map { ", V$_" } (1..$self->max_number_of_args());
12846 # Two extra value due to $seq, Exitval, Send
12847 my $v_vals = join ",", map { "?" } (1..$self->max_number_of_args()+4);
12848 $self->run("INSERT INTO $table (Seq,Command,Exitval,Send @v_cols) ".
12849 "VALUES ($v_vals);", $seq, $command, -1000,
12850 0, @$record_ref[1..$#$record_ref]);
12854 sub get_record($) {
12855 my $self = shift;
12856 my @retval;
12857 my $table = $self->table();
12858 my @v_cols = map { ", V$_" } (1..$self->max_number_of_args());
12859 my $rand = "Reserved-".$$.rand();
12860 my $v;
12861 my $more_pending;
12863 do {
12864 if($self->{'driver'} eq "CSV") {
12865 # Sub SELECT is not supported in CSV
12866 # So to minimize the race condition below select a job at random
12867 my $r = $self->get("SELECT Seq, Command @v_cols FROM $table ".
12868 "WHERE Exitval = -1000 LIMIT 100;");
12869 $v = [ sort { rand() > 0.5 } @$r ];
12870 } else {
12871 # Avoid race condition where multiple workers get the same job
12872 # by setting Stdout to a unique string
12873 # (SELECT * FROM (...) AS dummy) is needed due to sillyness in MySQL
12874 $self->update("SET Stdout = ?,Exitval = ? ".
12875 "WHERE Seq = (".
12876 " SELECT * FROM (".
12877 " SELECT min(Seq) FROM $table WHERE Exitval = -1000".
12878 " ) AS dummy".
12879 ") AND Exitval = -1000;", $rand, -1210);
12880 # If a parallel worker overwrote the unique string this will get nothing
12881 $v = $self->get("SELECT Seq, Command @v_cols FROM $table ".
12882 "WHERE Stdout = ?;", $rand);
12884 if($v->[0]) {
12885 my $val_ref = $v->[0];
12886 # Mark record as taken
12887 my $seq = shift @$val_ref;
12888 # Save the sequence number to use when running the job
12889 $SQL::next_seq = $seq;
12890 $self->update("SET Exitval = ? WHERE Seq = ".$seq, -1220);
12891 # Command is encoded with '\257 space' as splitting char
12892 my @command = split /\257 /, shift @$val_ref;
12893 $SQL::command_ref = \@command;
12894 for (@$val_ref) {
12895 push @retval, Arg->new($_);
12897 } else {
12898 # If the record was updated by another job in parallel,
12899 # then we may not be done, so see if there are more jobs pending
12900 $more_pending =
12901 $self->get("SELECT Seq FROM $table WHERE Exitval = ?;", -1210);
12903 } while (not $v->[0] and $more_pending->[0]);
12905 if(@retval) {
12906 return \@retval;
12907 } else {
12908 return undef;
12912 sub total_jobs($) {
12913 my $self = shift;
12914 my $table = $self->table();
12915 my $v = $self->get("SELECT count(*) FROM $table;");
12916 if($v->[0]) {
12917 return $v->[0]->[0];
12918 } else {
12919 ::die_bug("SQL::total_jobs");
12923 sub max_seq($) {
12924 my $self = shift;
12925 my $table = $self->table();
12926 my $v = $self->get("SELECT max(Seq) FROM $table;");
12927 if($v->[0]) {
12928 return $v->[0]->[0];
12929 } else {
12930 ::die_bug("SQL::max_seq");
12934 sub finished($) {
12935 # Check if there are any jobs left in the SQL table that do not
12936 # have a "real" exitval
12937 my $self = shift;
12938 if($opt::wait or $Global::start_sqlworker) {
12939 my $table = $self->table();
12940 my $rv = $self->get("select Seq,Exitval from $table ".
12941 "where Exitval <= -1000 limit 1");
12942 return not $rv->[0];
12943 } else {
12944 return 1;
12948 package Semaphore;
12950 # This package provides a counting semaphore
12952 # If a process dies without releasing the semaphore the next process
12953 # that needs that entry will clean up dead semaphores
12955 # The semaphores are stored in $PARALLEL_HOME/semaphores/id-<name> Each
12956 # file in $PARALLEL_HOME/semaphores/id-<name>/ is the process ID of the
12957 # process holding the entry. If the process dies, the entry can be
12958 # taken by another process.
12960 sub new($) {
12961 my $class = shift;
12962 my $id = shift;
12963 my $count = shift;
12964 $id =~ s/([^-_a-z0-9])/unpack("H*",$1)/ige; # Convert non-word chars to hex
12965 $id = "id-".$id; # To distinguish it from a process id
12966 my $parallel_locks = $Global::cache_dir . "/semaphores";
12967 -d $parallel_locks or ::mkdir_or_die($parallel_locks);
12968 my $lockdir = "$parallel_locks/$id";
12969 my $lockfile = $lockdir.".lock";
12970 if(-d $parallel_locks and -w $parallel_locks
12971 and -r $parallel_locks and -x $parallel_locks) {
12972 # skip
12973 } else {
12974 ::error("Semaphoredir must be writable: '$parallel_locks'");
12975 ::wait_and_exit(255);
12978 if($count < 1) { ::die_bug("semaphore-count: $count"); }
12979 return bless {
12980 'lockfile' => $lockfile,
12981 'lockfh' => Symbol::gensym(),
12982 'lockdir' => $lockdir,
12983 'id' => $id,
12984 'idfile' => $lockdir."/".$id,
12985 'pid' => $$,
12986 'pidfile' => $lockdir."/".$$.'@'.::hostname(),
12987 'count' => $count + 1 # nlinks returns a link for the 'id-' as well
12988 }, ref($class) || $class;
12991 sub remove_dead_locks($) {
12992 my $self = shift;
12993 my $lockdir = $self->{'lockdir'};
12995 for my $d (glob "$lockdir/*") {
12996 $d =~ m:$lockdir/([0-9]+)\@([-\._a-z0-9]+)$:o or next;
12997 my ($pid, $host) = ($1, $2);
12998 if($host eq ::hostname()) {
12999 if(kill 0, $pid) {
13000 ::debug("sem", "Alive: $pid $d\n");
13001 } else {
13002 ::debug("sem", "Dead: $d\n");
13003 ::rm($d);
13009 sub acquire($) {
13010 my $self = shift;
13011 my $sleep = 1; # 1 ms
13012 my $start_time = time;
13013 while(1) {
13014 # Can we get a lock?
13015 $self->atomic_link_if_count_less_than() and last;
13016 $self->remove_dead_locks();
13017 # Retry slower and slower up to 1 second
13018 $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep);
13019 # Random to avoid every sleeping job waking up at the same time
13020 ::usleep(rand()*$sleep);
13021 if($opt::semaphoretimeout) {
13022 if($opt::semaphoretimeout > 0
13024 time - $start_time > $opt::semaphoretimeout) {
13025 # Timeout: Take the semaphore anyway
13026 ::warning("Semaphore timed out. Stealing the semaphore.");
13027 if(not -e $self->{'idfile'}) {
13028 open (my $fh, ">", $self->{'idfile'}) or
13029 ::die_bug("timeout_write_idfile: $self->{'idfile'}");
13030 close $fh;
13032 link $self->{'idfile'}, $self->{'pidfile'};
13033 last;
13035 if($opt::semaphoretimeout < 0
13037 time - $start_time > -$opt::semaphoretimeout) {
13038 # Timeout: Exit
13039 ::warning("Semaphore timed out. Exiting.");
13040 exit(1);
13041 last;
13045 ::debug("sem", "acquired $self->{'pid'}\n");
13048 sub release($) {
13049 my $self = shift;
13050 ::rm($self->{'pidfile'});
13051 if($self->nlinks() == 1) {
13052 # This is the last link, so atomic cleanup
13053 $self->lock();
13054 if($self->nlinks() == 1) {
13055 ::rm($self->{'idfile'});
13056 rmdir $self->{'lockdir'};
13058 $self->unlock();
13060 ::debug("run", "released $self->{'pid'}\n");
13063 sub pid_change($) {
13064 # This should do what release()+acquire() would do without having
13065 # to re-acquire the semaphore
13066 my $self = shift;
13068 my $old_pidfile = $self->{'pidfile'};
13069 $self->{'pid'} = $$;
13070 $self->{'pidfile'} = $self->{'lockdir'}."/".$$.'@'.::hostname();
13071 my $retval = link $self->{'idfile'}, $self->{'pidfile'};
13072 ::debug("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n");
13073 ::rm($old_pidfile);
13076 sub atomic_link_if_count_less_than($) {
13077 # Link $file1 to $file2 if nlinks to $file1 < $count
13078 my $self = shift;
13079 my $retval = 0;
13080 $self->lock();
13081 my $nlinks = $self->nlinks();
13082 ::debug("sem","$nlinks<$self->{'count'} ");
13083 if($nlinks < $self->{'count'}) {
13084 -d $self->{'lockdir'} or ::mkdir_or_die($self->{'lockdir'});
13085 if(not -e $self->{'idfile'}) {
13086 open (my $fh, ">", $self->{'idfile'}) or
13087 ::die_bug("write_idfile: $self->{'idfile'}");
13088 close $fh;
13090 $retval = link $self->{'idfile'}, $self->{'pidfile'};
13091 ::debug("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n");
13093 $self->unlock();
13094 ::debug("sem", "atomic $retval");
13095 return $retval;
13098 sub nlinks($) {
13099 my $self = shift;
13100 if(-e $self->{'idfile'}) {
13101 return (stat(_))[3];
13102 } else {
13103 return 0;
13107 sub lock($) {
13108 my $self = shift;
13109 my $sleep = 100; # 100 ms
13110 my $total_sleep = 0;
13111 $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
13112 my $locked = 0;
13113 while(not $locked) {
13114 if(tell($self->{'lockfh'}) == -1) {
13115 # File not open
13116 open($self->{'lockfh'}, ">", $self->{'lockfile'})
13117 or ::debug("run", "Cannot open $self->{'lockfile'}");
13119 if($self->{'lockfh'}) {
13120 # File is open
13121 chmod 0666, $self->{'lockfile'}; # assuming you want it a+rw
13122 if(flock($self->{'lockfh'}, LOCK_EX()|LOCK_NB())) {
13123 # The file is locked: No need to retry
13124 $locked = 1;
13125 last;
13126 } else {
13127 if ($! =~ m/Function not implemented/) {
13128 ::warning("flock: $!",
13129 "Will wait for a random while.");
13130 ::usleep(rand(5000));
13131 # File cannot be locked: No need to retry
13132 $locked = 2;
13133 last;
13137 # Locking failed in first round
13138 # Sleep and try again
13139 $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep);
13140 # Random to avoid every sleeping job waking up at the same time
13141 ::usleep(rand()*$sleep);
13142 $total_sleep += $sleep;
13143 if($opt::semaphoretimeout) {
13144 if($opt::semaphoretimeout > 0
13146 $total_sleep/1000 > $opt::semaphoretimeout) {
13147 # Timeout: Take the semaphore anyway
13148 ::warning("Semaphore timed out. Taking the semaphore.");
13149 $locked = 3;
13150 last;
13152 if($opt::semaphoretimeout < 0
13154 $total_sleep/1000 > -$opt::semaphoretimeout) {
13155 # Timeout: Exit
13156 ::warning("Semaphore timed out. Exiting.");
13157 $locked = 4;
13158 last;
13160 } else {
13161 if($total_sleep/1000 > 30) {
13162 ::warning("Semaphore stuck for 30 seconds. ".
13163 "Consider using --semaphoretimeout.");
13167 ::debug("run", "locked $self->{'lockfile'}");
13170 sub unlock($) {
13171 my $self = shift;
13172 ::rm($self->{'lockfile'});
13173 close $self->{'lockfh'};
13174 ::debug("run", "unlocked\n");
13177 # Keep perl -w happy
13179 $opt::x = $Semaphore::timeout = $Semaphore::wait =
13180 $Job::file_descriptor_warning_printed = $Global::envdef = @Arg::arg =
13181 $Global::max_slot_number = $opt::session;
13183 package main;
13185 sub main() {
13186 save_stdin_stdout_stderr();
13187 save_original_signal_handler();
13188 parse_options();
13189 ::debug("init", "Open file descriptors: ", join(" ",keys %Global::fd), "\n");
13190 my $number_of_args;
13191 if($Global::max_number_of_args) {
13192 $number_of_args = $Global::max_number_of_args;
13193 } elsif ($opt::X or $opt::m or $opt::xargs) {
13194 $number_of_args = undef;
13195 } else {
13196 $number_of_args = 1;
13199 my @command = @ARGV;
13200 my @input_source_fh;
13201 if($opt::pipepart) {
13202 if($opt::tee) {
13203 @input_source_fh = map { open_or_exit($_) } @opt::a;
13204 # Remove the first: It will be the file piped.
13205 shift @input_source_fh;
13206 if(not @input_source_fh and not $opt::pipe) {
13207 @input_source_fh = (*STDIN);
13209 } else {
13210 # -a is used for data - not for command line args
13211 @input_source_fh = map { open_or_exit($_) } "/dev/null";
13213 } else {
13214 @input_source_fh = map { open_or_exit($_) } @opt::a;
13215 if(not @input_source_fh and not $opt::pipe) {
13216 @input_source_fh = (*STDIN);
13220 if($opt::skip_first_line) {
13221 # Skip the first line for the first file handle
13222 my $fh = $input_source_fh[0];
13223 <$fh>;
13226 set_input_source_header(\@command,\@input_source_fh);
13227 if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) {
13228 # Parallel check all hosts are up. Remove hosts that are down
13229 filter_hosts();
13233 if($opt::sqlmaster and $opt::sqlworker) {
13234 # Start a real --sqlworker in the background later
13235 $Global::start_sqlworker = 1;
13236 $opt::sqlworker = undef;
13239 if($opt::nonall or $opt::onall) {
13240 onall(\@input_source_fh,@command);
13241 wait_and_exit(min(undef_as_zero($Global::exitstatus),254));
13244 $Global::JobQueue = JobQueue->new(
13245 \@command,\@input_source_fh,$Global::ContextReplace,
13246 $number_of_args,\@Global::transfer_files,\@Global::ret_files);
13248 if($opt::sqlmaster) {
13249 # Create SQL table to hold joblog + output
13250 # Figure out how many arguments are in a job
13251 # (It is affected by --colsep, -N, $number_source_fh)
13252 my $record_queue = $Global::JobQueue->{'commandlinequeue'}{'arg_queue'};
13253 my $record = $record_queue->get();
13254 my $no_of_values = $number_of_args * (1+$#{$record});
13255 $record_queue->unget($record);
13256 $Global::sql->create_table($no_of_values);
13257 if($opt::sqlworker) {
13258 # Start a real --sqlworker in the background later
13259 $Global::start_sqlworker = 1;
13260 $opt::sqlworker = undef;
13264 if($opt::pipepart) {
13265 pipepart_setup();
13266 } elsif($opt::pipe and $opt::tee) {
13267 pipe_tee_setup();
13268 } elsif($opt::pipe and $opt::shard or $opt::bin) {
13269 pipe_shard_setup();
13272 if(not $opt::pipepart and $opt::groupby) {
13273 group_by_stdin_filter();
13275 if($opt::eta or $opt::bar or $opt::shuf or $Global::halt_pct) {
13276 # Count the number of jobs or shuffle all jobs
13277 # before starting any.
13278 # Must be done after ungetting any --pipepart jobs.
13279 $Global::JobQueue->total_jobs();
13281 # Compute $Global::max_jobs_running
13282 # Must be done after ungetting any --pipepart jobs.
13283 max_jobs_running();
13284 init_run_jobs();
13285 my $sem;
13286 if($Global::semaphore) {
13287 $sem = acquire_semaphore();
13289 $SIG{TERM} = $Global::original_sig{TERM};
13290 $SIG{HUP} = \&start_no_new_jobs;
13292 if($opt::tee or $opt::shard or $opt::bin) {
13293 # All jobs must be running in parallel for --tee/--shard/--bin
13294 while(start_more_jobs()) {}
13295 $Global::start_no_new_jobs = 1;
13296 if(not $Global::JobQueue->empty()) {
13297 if($opt::tee) {
13298 ::error("--tee requires --jobs to be higher. Try --jobs 0.");
13299 } elsif($opt::bin) {
13300 ::error("--bin requires --jobs to be higher than the number of",
13301 "arguments. Increase --jobs.");
13302 } elsif($opt::shard) {
13303 ::error("--shard requires --jobs to be higher than the number of",
13304 "arguments. Increase --jobs.");
13305 } else {
13306 ::die_bug("--bin/--shard/--tee should not get here");
13308 ::wait_and_exit(255);
13310 } elsif($opt::pipe and not $opt::pipepart) {
13311 # Fill all jobslots
13312 while(start_more_jobs()) {}
13313 spreadstdin();
13314 } else {
13315 # Reap one - start one
13316 while(reaper() + start_more_jobs()) {}
13318 ::debug("init", "Start draining\n");
13319 drain_job_queue(@command);
13320 ::debug("init", "Done draining\n");
13321 reapers();
13322 ::debug("init", "Done reaping\n");
13323 if($Global::semaphore) {
13324 $sem->release();
13326 cleanup();
13327 ::debug("init", "Halt\n");
13328 halt();
13331 main();