3 # Copyright (C) 2007-2021 Ole Tange, http://ole.tange.dk and Free
4 # Software Foundation, Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, see <https://www.gnu.org/licenses/>
18 # or write to the Free Software Foundation, Inc., 51 Franklin St,
19 # Fifth Floor, Boston, MA 02110-1301 USA
21 # SPDX-FileCopyrightText: 2021 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc.
22 # SPDX-License-Identifier: GPL-3.0-or-later
24 # open3 used in Job::start
27 # gensym used in Job::start
28 use Symbol
qw(gensym);
29 # tempfile used in Job::start
30 use File
::Temp
qw(tempfile tempdir);
31 # mkpath used in openresultsfile
33 # GetOptions used in get_options_from_array
35 # Used to ensure code quality
39 sub set_input_source_header
($$) {
40 my ($command_ref,$input_source_fh_ref) = @_;
41 if($opt::header
and not $opt::pipe) {
42 # split with colsep or \t
43 # $header force $colsep = \t if undef?
44 my $delimiter = defined $opt::colsep ?
$opt::colsep
: "\t";
46 my $left = "\Q$Global::parensleft\E";
47 my $l = $Global::parensleft
;
49 my $right = "\Q$Global::parensright\E";
50 my $r = $Global::parensright
;
52 for my $fh (@
$input_source_fh_ref) {
56 ::debug
("init", "Delimiter: '$delimiter'");
57 for my $s (split /$delimiter/o, $line) {
58 ::debug
("init", "Colname: '$s'");
59 # Replace {colname} with {2}
60 for(@
$command_ref, @Global::ret_files
,
61 @Global::transfer_files
, $opt::tagstring
,
62 $opt::workdir
, $opt::results
, $opt::retries
,
63 @Global::template_contents
, @Global::template_names
,
67 s
:\
{$s(|/|//|\.|/\
.)\
}:\
{$id$1\
}:g
;
68 # {=header1 ... =} => {=1 ... =}
69 s
:$left $s (.*?
) $right:$l$id$1$r:gx
;
71 $Global::input_source_header
{$id} = $s;
77 for my $fh (@
$input_source_fh_ref) {
78 $Global::input_source_header
{$id} = $id;
84 sub max_jobs_running
() {
85 # Compute $Global::max_jobs_running as the max number of jobs
86 # running on each sshlogin.
88 # $Global::max_jobs_running
89 if(not $Global::max_jobs_running
) {
90 for my $sshlogin (values %Global::host
) {
91 $sshlogin->max_jobs_running();
94 if(not $Global::max_jobs_running
) {
95 ::error
("Cannot run any jobs.");
98 return $Global::max_jobs_running
;
102 # Compute exit value,
103 # wait for children to complete
105 if($opt::halt
and $Global::halt_when
ne "never") {
106 if(not defined $Global::halt_exitstatus
) {
107 if($Global::halt_pct
) {
108 $Global::halt_exitstatus
=
109 ::ceil
($Global::total_failed
/
110 ($Global::total_started
|| 1) * 100);
111 } elsif($Global::halt_count
) {
112 $Global::halt_exitstatus
=
113 ::min
(undef_as_zero
($Global::total_failed
),101);
116 wait_and_exit
($Global::halt_exitstatus
);
118 wait_and_exit
(min
(undef_as_zero
($Global::exitstatus
),101));
123 sub __PIPE_MODE__
() {}
126 sub pipepart_setup
() {
127 # Compute the blocksize
128 # Generate the commands to extract the blocks
129 # Push the commands on queue
131 # @Global::cat_prepends
134 # Prepend each command with
136 my $cat_string = "< ".Q
($opt::a
[0]);
137 for(1..$Global::JobQueue
->total_jobs()) {
138 push @Global::cat_appends
, $cat_string;
139 push @Global::cat_prepends
, "";
142 if(not $opt::blocksize
) {
143 # --blocksize with 10 jobs per jobslot
144 $opt::blocksize
= -10;
146 if($opt::roundrobin
) {
147 # --blocksize with 1 job per jobslot
148 $opt::blocksize
= -1;
150 if($opt::blocksize
< 0) {
157 $size += size_of_block_dev
($_);
159 ::error
("$_ is neither a file nor a block device");
162 ::error
("File not found: $_");
166 # Run in total $job_slots*(- $blocksize) jobs
167 # Set --blocksize = size / no of proc / (- $blocksize)
168 $Global::dummy_jobs
= 1;
169 $Global::blocksize
= 1 +
170 int($size / max_jobs_running() /
171 -multiply_binary_prefix
($opt::blocksize
));
173 @Global::cat_prepends
= map { pipe_part_files
($_) } @opt::a
;
174 # Unget the empty arg as many times as there are parts
175 $Global::JobQueue
->{'commandlinequeue'}{'arg_queue'}->unget(
176 map { [Arg
->new("\0noarg")] } @Global::cat_prepends
181 sub pipe_tee_setup
() {
182 # Create temporary fifos
183 # Run 'tee fifo1 fifo2 fifo3 ... fifoN' in the background
184 # This will spread the input to fifos
185 # Generate commands that reads from fifo1..N:
186 # cat fifo | user_command
188 # @Global::cat_prepends
190 for(1..$Global::JobQueue
->total_jobs()) {
191 push @fifos, tmpfifo
();
193 # cat foo | tee fifo1 fifo2 fifo3 fifo4 fifo5 > /dev/null
195 # Test if tee supports --output-error=warn-nopipe
196 `echo | tee --output-error=warn-nopipe /dev/null >/dev/null 2>/dev/null`;
197 my $opt = $? ?
"" : "--output-error=warn-nopipe";
198 ::debug
("init","tee $opt");
199 # Let tee inherit our stdin
200 # and redirect stdout to null
201 open STDOUT
, ">","/dev/null";
203 exec "tee", $opt, @fifos;
209 # (rm fifo1; grep 1) < fifo1
210 # (rm fifo2; grep 2) < fifo2
211 # (rm fifo3; grep 3) < fifo3
212 # Remove the tmpfifo as soon as it is open
213 @Global::cat_prepends
= map { "(rm $_;" } @fifos;
214 @Global::cat_appends
= map { ") < $_" } @fifos;
218 sub parcat_script
() {
219 # TODO if script fails: Use parallel -j0 --plain --lb cat ::: fifos
221 use POSIX qw(:errno_h);
226 use Fcntl
qw(:DEFAULT :flock);
229 my $q = Thread
::Queue
->new();
230 my $okq = Thread
::Queue
->new();
236 print " parcat file(s)\n";
237 print " cat argfile | parcat\n";
239 # Read arguments from stdin
240 chomp(@ARGV = <STDIN
>);
243 my $files_to_open = 0;
244 # Default: fd = stdout
247 # --rm = remove file when opened
248 /^--rm$/ and do { $opt::rm
= 1; next; };
249 # -1 = output to fd 1, -2 = output to fd 2
250 /^-(\d+)$/ and do { $fd = $1; next; };
251 push @producers, threads
->create("producer", $_, $fd);
256 # Open a file/fifo, set non blocking, enqueue fileno of the file handle
258 my $output_fd = shift;
259 open(my $fh, "<", $file) || do {
260 print STDERR
"parcat: Cannot open $file\n";
263 # Remove file when it has been opened
267 set_fh_non_blocking
($fh);
269 # Pass the fileno to parent
270 $q->enqueue(fileno($fh),$output_fd);
271 # Get an OK that the $fh is opened and we can release the $fh
273 my $ok = $okq->dequeue();
274 if($ok == fileno($fh)) { last; }
275 # Not ours - very unlikely to happen
281 my $s = IO
::Select
->new();
287 open(my $infh, "<&=", $infd) || die;
288 open(my $outfh, ">&=", $outfd) || die;
290 # Tell the producer now opened here and can be released
291 $okq->enqueue($infd);
292 # Initialize the buffer
293 @
{$buffer{$infh}{$outfd}} = ();
294 $Global::fh
{$outfd} = $outfh;
298 # Non-blocking dequeue
301 ($infd,$outfd) = $q->dequeue_nb(2);
302 if(defined($outfd)) {
303 add_file
($infd,$outfd);
305 } while(defined($outfd));
308 sub add_files_block
{
310 my ($infd,$outfd) = $q->dequeue(2);
311 add_file
($infd,$outfd);
316 my (@ready,$infh,$rv,$buf);
318 # Wait until at least one file is opened
320 while($q->pending or keys %buffer) {
322 while(keys %buffer) {
323 @ready = $s->can_read(0.01);
328 # There is only one key, namely the output file descriptor
329 for my $outfd (keys %{$buffer{$infh}}) {
330 # TODO test if 65536 is optimal (2^17 is used elsewhere)
331 $rv = sysread($infh, $buf, 65536);
334 # Would block: Nothing read
337 # Nothing read, but would not block:
340 for(@
{$buffer{$infh}{$outfd}}) {
341 syswrite($Global::fh
{$outfd},$_);
343 delete $buffer{$infh};
344 # Closing the $infh causes it to block
351 # Find \n or \r for full line
352 my $i = (rindex($buf,"\n")+1);
355 for(@
{$buffer{$infh}{$outfd}}, substr($buf,0,$i)) {
356 syswrite($Global::fh
{$outfd},$_);
358 # @buffer = remaining half line
359 $buffer{$infh}{$outfd} = [substr($buf,$i,$rv-$i)];
361 # Something read, but not a full line
362 push @
{$buffer{$infh}{$outfd}}, $buf;
369 } while($opened < $files_to_open);
375 sub set_fh_non_blocking
{
376 # Set filehandle as non-blocking
378 # $fh = filehandle to be blocking
383 fcntl($fh, &F_GETFL
, $flags) || die $!; # Get the current flags on the filehandle
384 $flags |= &O_NONBLOCK
; # Add non-blocking to the flags
385 fcntl($fh, &F_SETFL
, $flags) || die $!; # Set the flags on the filehandle
388 return ::spacefree(3, $script);
391 sub sharder_script() {
396 # Which columns to shard on (count from 1)
398 # Which columns to shard on (count from 0)
401 my $perlexpr = shift;
403 # Open fifos for writing, fh{0..$bins}
407 open $fh{$t++}, ">", $_;
408 # open blocks until it is opened by reader
409 # so unlink only happens when it is ready
413 my $subref = eval("sub { no strict; no warnings; $perlexpr }");
415 # Split into $col columns (no need to split into more)
416 @F = split $sep, $_, $col+1;
418 local $_ = $F[$col0];
420 $fh = $fh{ hex(B::hash($_))%$bins };
426 # Split into $col columns (no need to split into more)
427 @F = split $sep, $_, $col+1;
428 $fh = $fh{ hex(B::hash($F[$col0]))%$bins };
432 # Close all open fifos
435 return ::spacefree(1, $script);
438 sub binner_script() {
443 # Which columns to shard on (count from 1)
445 # Which columns to shard on (count from 0)
448 my $perlexpr = shift;
450 # Open fifos for writing, fh{0..$bins}
453 # Let the last output fifo be the 0'th
454 open $fh{$t++}, ">", pop @ARGV;
456 open $fh{$t++}, ">", $_;
457 # open blocks until it is opened by reader
458 # so unlink only happens when it is ready
462 my $subref = eval("sub { no strict; no warnings; $perlexpr }");
464 # Split into $col columns (no need to split into more)
465 @F = split $sep, $_, $col+1;
467 local $_ = $F[$col0];
469 $fh = $fh{ $_%$bins };
475 # Split into $col columns (no need to split into more)
476 @F = split $sep, $_, $col+1;
477 $fh = $fh{ $F[$col0]%$bins };
481 # Close all open fifos
484 return ::spacefree
(1, $script);
487 sub pipe_shard_setup
() {
488 # Create temporary fifos
489 # Run 'shard.pl sep col fifo1 fifo2 fifo3 ... fifoN' in the background
490 # This will spread the input to fifos
491 # Generate commands that reads from fifo1..N:
492 # cat fifo | user_command
494 # @Global::cat_prepends
497 # TODO $opt::jobs should be evaluated (100%)
498 # TODO $opt::jobs should be number of total_jobs if there are argugemts
499 my $njobs = $opt::jobs
;
500 for my $m (0..$njobs-1) {
501 for my $n (0..$njobs-1) {
502 # sharding to A B C D
503 # parcatting all As together
504 $parcatfifos[$n][$m] = $shardfifos[$m][$n] = tmpfifo
();
507 my $shardbin = ($opt::shard
|| $opt::bin
);
510 $script = binner_script
();
512 $script = sharder_script
();
515 # cat foo | sharder sep col fifo1 fifo2 fifo3 ... fifoN
517 if($shardbin =~ /^[a-z_][a-z_0-9]*(\s|$)/i) {
518 # Group by column name
519 # (Yes, this will also wrongly match a perlexpr like: chop)
520 my($read,$char,@line);
521 # A full line, but nothing more (the rest must be read by the child)
522 # $Global::header used to prepend block to each job
524 $read = sysread(STDIN
,$char,1);
526 } while($read and $char ne "\n");
527 $Global::header
= join "", @line;
529 my ($col, $perlexpr, $subref) =
530 column_perlexpr
($shardbin, $Global::header
, $opt::colsep
);
532 # Let the sharder inherit our stdin
533 # and redirect stdout to null
534 open STDOUT
, ">","/dev/null";
535 # The PERL_HASH_SEED must be the same for all sharders
536 # so B::hash will return the same value for any given input
537 $ENV{'PERL_HASH_SEED'} = $$;
538 exec qw(parallel --block 100k -q --pipe -j), $njobs,
539 qw(--roundrobin -u perl -e), $script, ($opt::colsep
|| ","),
540 $col, $perlexpr, '{}', (map { (':::+', @
{$_}) } @parcatfifos);
543 # (rm fifo1; grep 1) < fifo1
544 # (rm fifo2; grep 2) < fifo2
545 # (rm fifo3; grep 3) < fifo3
546 my $parcat = Q
(parcat_script
());
548 ::error
("'parcat' must be in path.");
549 ::wait_and_exit
(255);
551 @Global::cat_prepends
= map { "perl -e $parcat @$_ | " } @parcatfifos;
554 sub pipe_part_files
(@
) {
556 # find header and split positions
557 # make commands that 'cat's the partial file
559 # $file = the file to read
561 # @commands that will cat_partial each part
564 if(not -f
$file and not -b
$file) {
565 ::error
("--pipepart only works on seekable files, not streams/pipes.",
566 "$file is not a seekable file.");
567 ::wait_and_exit
(255);
569 my $header = find_header
(\
$buf,open_or_exit
($file));
571 my @pos = find_split_positions
($file,int($Global::blocksize
),$header);
573 my @cat_prepends = ();
574 for(my $i=0; $i<$#pos; $i++) {
576 cat_partial
($file, 0, length($header), $pos[$i], $pos[$i+1]));
578 return @cat_prepends;
581 sub find_header
($$) {
582 # Compute the header based on $opt::header
584 # $buf_ref = reference to read-in buffer
585 # $fh = filehandle to read from
592 my ($buf_ref, $fh) = @_;
594 # $Global::header may be set in group_by_loop()
595 if($Global::header
) { return $Global::header
}
597 if($opt::header
eq ":") { $opt::header
= "(.*\n)"; }
598 # Number = number of lines
599 $opt::header
=~ s/^(\d+)$/"(.*\n)"x$1/e;
600 while(sysread($fh,$$buf_ref,int($Global::blocksize
),length $$buf_ref)) {
601 if($$buf_ref =~ s/^($opt::header)//) {
610 sub find_split_positions
($$$) {
611 # Find positions in bigfile where recend is followed by recstart
613 # $file = the file to read
614 # $block = (minimal) --block-size of each chunk
615 # $header = header to be skipped
620 # @positions of block start/end
621 my($file, $block, $header) = @_;
622 my $headerlen = length $header;
625 # $file is a blockdevice
626 $size = size_of_block_dev
($file);
630 return split_positions_for_group_by
($file,$size,$block,$header);
632 # The optimal dd blocksize for mint, redhat, solaris, openbsd = 2^17..2^20
633 # The optimal dd blocksize for freebsd = 2^15..2^17
634 # The optimal dd blocksize for ubuntu (AMD6376) = 2^16
635 my $dd_block_size = 131072; # 2^17
637 my ($recstart,$recend) = recstartrecend
();
638 my $recendrecstart = $recend.$recstart;
639 my $fh = ::open_or_exit
($file);
640 push(@pos,$headerlen);
641 for(my $pos = $block+$headerlen; $pos < $size; $pos += $block) {
643 if($recendrecstart eq "") {
644 # records ends anywhere
647 # Seek the the block start
648 if(not sysseek($fh, $pos, 0)) {
649 ::error
("Cannot seek to $pos in $file");
652 while(sysread($fh,$buf,$dd_block_size,length $buf)) {
654 # If match /$recend$recstart/ => Record position
655 if($buf =~ m
:^(.*$recend)$recstart:os
) {
656 # Start looking for next record _after_ this match
662 # If match $recend$recstart => Record position
663 # TODO optimize to only look at the appended
664 # $dd_block_size + len $recendrecstart
665 # TODO increase $dd_block_size to optimize for longer records
666 my $i = index64
(\
$buf,$recendrecstart);
668 # Start looking for next record _after_ this match
669 $pos += $i + length($recend);
677 if($pos[$#pos] != $size) {
678 # Last splitpoint was not at end of the file: add $size as the last
685 sub split_positions_for_group_by
($$$$) {
690 seek($fh, $pos-1, 0) || die;
695 my $linepos = tell($fh);
700 if(defined $group_by::col
) {
701 $opt::colsep
||= "\t";
702 @F = split /$opt::colsep/, $_;
703 $_ = $F[$group_by::col
];
705 eval $group_by::perlexpr
;
707 return ($_,$linepos);
710 sub binary_search_end
($$$) {
711 my ($s,$spos,$epos) = @_;
712 # value_at($spos) == $s
713 # value_at($epos) != $s
714 my $posdif = $epos - $spos;
717 ($v,$vpos) = value_at
($spos+$posdif);
720 $posdif = $epos - $spos;
724 $posdif = int($posdif/2);
729 sub binary_search_start
($$$) {
730 my ($s,$spos,$epos) = @_;
731 # value_at($spos) != $s
732 # value_at($epos) == $s
733 my $posdif = $epos - $spos;
736 ($v,$vpos) = value_at
($spos+$posdif);
741 $posdif = $epos - $spos;
743 $posdif = int($posdif/2);
748 my ($file,$size,$block,$header) = @_;
749 my ($a,$b,$c,$apos,$bpos,$cpos);
751 $fh = open_or_exit
($file);
752 # Set $Global::group_by_column $Global::group_by_perlexpr
753 group_by_loop
($fh,$opt::recsep
);
754 # $xpos = linestart, $x = value at $xpos, $apos < $bpos < $cpos
755 $apos = length $header;
756 for(($a,$apos) = value_at
($apos); $apos < $size;) {
758 $bpos = $apos + $block;
759 ($b,$bpos) = value_at
($bpos);
761 push @pos, $size; last;
763 $cpos = $bpos + $block;
764 ($c,$cpos) = value_at
($cpos);
767 # Move bpos, cpos a block forward until $a == $b != $c
770 ($c,$cpos) = value_at
($cpos);
777 # Binary search for $b ending between ($bpos,$cpos)
778 ($b,$bpos) = binary_search_end
($b,$bpos,$cpos);
782 # Binary search for $b starting between ($apos,$bpos)
783 ($b,$bpos) = binary_search_start
($b,$apos,$bpos);
786 # Binary search for $b ending between ($bpos,$cpos)
787 ($b,$bpos) = binary_search_end
($b,$bpos,$cpos);
790 ($a,$apos) = ($b,$bpos);
792 if($pos[$#pos] != $size) {
793 # Last splitpoint was not at end of the file: add it
799 sub cat_partial
($@
) {
800 # Efficient command to copy from byte X to byte Y
802 # $file = the file to read
803 # ($start, $end, [$start2, $end2, ...]) = start byte, end byte
805 # Efficient command to copy $start..$end, $start2..$end2, ... to stdout
806 my($file, @start_end) = @_;
808 # Convert (start,end) to (start,len)
809 my @start_len = map {
810 if(++$i % 2) { $start = $_; } else { $_-$start }
812 # This can read 7 GB/s using a single core
813 my $script = spacefree
817 sysseek(STDIN,shift,0) || die;
820 sysread(STDIN,$buf, $left > 131072 ? 131072 : $left)){
822 syswrite(STDOUT,$buf);
826 return "<". Q($file) .
827 " perl -e '$script' @start_len |";
830 sub column_perlexpr($$$) {
831 # Compute the column number (if any), perlexpression from combined
832 # string (such as --shard key, --groupby key, {=n perlexpr=}
834 # $column_perlexpr = string with column and perl expression
835 # $header = header from input file (if column is column name)
836 # $colsep = column separator regexp
838 # $col = column number
839 # $perlexpr = perl expression
840 # $subref = compiled perl expression as sub reference
841 my ($column_perlexpr, $header, $colsep) = @_;
842 my ($col, $perlexpr, $subref);
843 if($column_perlexpr =~ /^[-a-z0-9_]+(\s|$)/i) {
844 # Column name/number (possibly prefix)
845 if($column_perlexpr =~ s/^(-?\d+)(\s|$)//) {
846 # Column number (possibly prefix)
848 } elsif($column_perlexpr =~ s/^([a-z0-9_]+)(\s+|$)//i) {
849 # Column name (possibly prefix)
851 # Split on --copsep pattern
852 my @headers = split /$colsep/, $header;
854 @headers{@headers} = (1..($#headers+1));
855 $col = $headers{$colname};
856 if(not defined $col) {
857 ::error("Column '$colname' $colsep not found in header",keys %headers);
858 ::wait_and_exit(255);
862 # What is left of $column_perlexpr is $perlexpr (possibly empty)
863 $perlexpr = $column_perlexpr;
864 $subref = eval("sub { no strict; no warnings; $perlexpr }");
865 return($col, $perlexpr, $subref);
868 sub group_by_loop($$) {
869 # Generate perl code for group-by loop
870 # Insert a $recsep when the column value changes
871 # The column value can be computed with $perlexpr
872 my($fh,$recsep) = @_;
873 my $groupby = $opt::groupby;
874 if($groupby =~ /^[a-z_][a-z_0-9]*(\s|$)/i) {
875 # Group by column name
876 # (Yes, this will also wrongly match a perlexpr like: chop)
877 my($read,$char,@line);
878 # Read a full line, but nothing more
879 # (the rest must be read by the child)
880 # $Global::header used to prepend block to each job
882 $read = sysread($fh,$char,1);
884 } while($read and $char ne "\n");
885 $Global::header = join "", @line;
887 $opt::colsep ||= "\t";
888 ($group_by::col, $group_by::perlexpr, $group_by::subref) =
889 column_perlexpr($groupby, $Global::header, $opt::colsep);
890 # Numbered 0..n-1 due to being used by $F[n]
891 if($group_by::col) { $group_by::col--; }
893 my $loop = ::spacefree(0,q{
894 BEGIN{ $last = "RECSEP"; }
904 if(defined $group_by::col) {
905 $loop =~ s/COLVALUE/\$F[$group_by::col]/g;
907 $loop =~ s/COLVALUE/\$_/g;
909 $loop =~ s/PERLEXPR/$group_by::perlexpr/g;
910 $loop =~ s/RECSEP/$recsep/g;
914 sub group_by_stdin_filter() {
915 # Record separator with 119 bit random value
918 join "", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..20);
919 $opt::remove_rec_sep = 1;
921 push @filter, "perl";
922 if($opt::groupby =~ /^[a-z0-9_]+(\s|$)/i) {
923 # This is column number/name
924 # Use -a (auto-split)
926 $opt::colsep ||= "\t";
927 my $sep = $opt::colsep;
930 # man perlrun: -Fpattern [...] You can't use literal whitespace
931 $sep =~ s/ /\\040{1}/g;
932 push @filter, "-F$sep";
935 push @filter, group_by_loop(*STDIN,$opt::recstart);
936 ::debug("init", "@filter\n");
937 open(STDIN, '-|', @filter) || die ("Cannot start @filter");
938 if(which("mbuffer")) {
939 # You get a speed up of 30% by going through mbuffer
940 open(STDIN, '-|', "mbuffer", "-q","-m6M","-b5") ||
941 die ("Cannot start mbuffer");
947 # Spawn a job and print the record to it.
953 # $Global::max_number_of_args
955 # $Global::start_no_new_jobs
961 my ($recstart,$recend) = recstartrecend();
962 my $recendrecstart = $recend.$recstart;
963 my $chunk_number = 1;
964 my $one_time_through;
965 my $two_gb = 2**31-1;
966 my $blocksize = int($Global::blocksize);
968 my $timeout = $Global::blocktimeout;
970 my $header = find_header(\$buf,$in);
971 my $anything_written;
976 # Read a --blocksize from STDIN
977 # possibly interrupted by --blocktimeout
978 # Add up to the next full block
979 my $readsize = $blocksize - (length $buf) % $blocksize;
982 local $SIG{ALRM} = sub { die "alarm\n" }; # NB: \n required
983 # --blocktimeout (or 0 if not set)
987 $nread = sysread $in, $buf, $readsize, length $buf;
989 } while($readsize and $nread);
991 # Less efficient reading, but 32-bit sysread compatible
993 $nread = sysread($in,substr($buf,length $buf,0),$readsize,0);
995 } while($readsize and $nread);
1000 die unless $@ eq "alarm\n"; # propagate unexpected errors
1005 $eof = not ($nread or $alarm);
1008 sub pass_n_line_records() {
1009 # Pass records of N lines
1010 my $n_lines = $buf =~ tr/\n/\n/;
1011 my $last_newline_pos = rindex64(\$buf,"\n");
1012 # Go backwards until there are full n-line records
1013 while($n_lines % $Global::max_lines) {
1015 $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1);
1017 # Chop at $last_newline_pos as that is where n-line record ends
1018 $anything_written +=
1019 write_record_to_pipe($chunk_number++,\$header,\$buf,
1020 $recstart,$recend,$last_newline_pos+1);
1021 shorten(\$buf,$last_newline_pos+1);
1024 sub pass_n_regexps() {
1025 # Pass records of N regexps
1026 # -N => (start..*?end){n}
1027 # -L -N => (start..*?end){n*l}
1028 if(not $garbage_read) {
1030 if($buf !~ /^$recstart/o) {
1031 # Buf does not start with $recstart => There is garbage.
1032 # Make a single record of the garbage
1035 (?:(?:(?!$recend$recstart)(?s:.))*?$recend)
1037 # Followed by recstart
1038 (?=$recstart)/mox and length $1 > 0) {
1039 $anything_written +=
1040 write_record_to_pipe($chunk_number++,\$header,\$buf,
1041 $recstart,$recend,length $1);
1042 shorten(\$buf,length $1);
1048 $Global::max_number_of_args * ($Global::max_lines || 1);
1049 # (?!negative lookahead) is needed to avoid backtracking
1050 # See: https://unix.stackexchange.com/questions/439356/
1051 # (?s:.) = (.|[\n]) but faster
1054 # n more times recstart.*recend
1055 (?:$recstart(?:(?!$recend$recstart)(?s:.))*?$recend){$n_records}
1057 # Followed by recstart
1058 (?=$recstart)/mox and length $1 > 0) {
1059 $anything_written +=
1060 write_record_to_pipe($chunk_number++,\$header,\$buf,
1061 $recstart,$recend,length $1);
1062 shorten(\$buf,length $1);
1067 # Find the last recend-recstart in $buf
1069 # (?s:.) = (.|[\n]) but faster
1070 if($buf =~ /^((?s:.)*$recend)$recstart(?s:.)*?$/mox) {
1071 $anything_written +=
1072 write_record_to_pipe($chunk_number++,\$header,\$buf,
1073 $recstart,$recend,length $1);
1074 shorten(\$buf,length $1);
1078 sub pass_csv_record() {
1080 # We define a CSV record as an even number of " + end of line
1081 # This works if you use " as quoting character
1082 my $last_newline_pos = length $buf;
1083 # Go backwards from the last \n and search for a position
1084 # where there is an even number of "
1087 $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1);
1089 } while((substr($buf,0,$last_newline_pos) =~ y/"/"/)%2
1090 and $last_newline_pos >= 0);
1091 # Chop at $last_newline_pos as that is where CSV record ends
1092 $anything_written +=
1093 write_record_to_pipe($chunk_number++,\$header,\$buf,
1094 $recstart,$recend,$last_newline_pos+1);
1095 shorten(\$buf,$last_newline_pos+1);
1099 # Pass n records of --recend/--recstart
1100 # -N => (start..*?end){n}
1103 $Global::max_number_of_args * ($Global::max_lines || 1);
1104 while(($i = nindex(\$buf,$recendrecstart,$read_n_lines)) != -1
1107 $i += length $recend; # find the actual splitting location
1108 $anything_written +=
1109 write_record_to_pipe($chunk_number++,\$header,\$buf,
1110 $recstart,$recend,$i);
1116 # Pass records of --recend/--recstart
1117 # Split record at fixed string
1118 # Find the last recend+recstart in $buf
1120 my $i = rindex64(\$buf,$recendrecstart);
1122 $i += length $recend; # find the actual splitting location
1123 $anything_written +=
1124 write_record_to_pipe($chunk_number++,\$header,\$buf,
1125 $recstart,$recend,$i);
1130 sub increase_blocksize_maybe() {
1131 if(not $anything_written
1132 and not $opt::blocktimeout
1133 and not $Global::no_autoexpand_block) {
1134 # Nothing was written - maybe the block size < record size?
1135 # Increase blocksize exponentially up to 2GB-1 (2GB causes problems)
1136 if($blocksize < $two_gb) {
1137 my $old_blocksize = $blocksize;
1138 $blocksize = ::min(ceil($blocksize * 1.3 + 1), $two_gb);
1139 ::warning("A record was longer than $old_blocksize. " .
1140 "Increasing to --blocksize $blocksize.");
1146 $anything_written = 0;
1149 # Remove empty lines
1150 $buf =~ s/^\s*\n//gm;
1151 if(length $buf == 0) {
1159 if($Global::max_lines and not $Global::max_number_of_args) {
1160 # Pass n-line records
1161 pass_n_line_records();
1162 } elsif($opt::csv) {
1163 # Pass a full CSV record
1165 } elsif($opt::regexp) {
1166 # Split record at regexp
1167 if($Global::max_number_of_args) {
1173 # Pass normal --recend/--recstart record
1174 if($Global::max_number_of_args) {
1181 increase_blocksize_maybe();
1182 ::debug("init", "Round\n");
1184 ::debug("init", "Done reading input\n");
1186 # If there is anything left in the buffer write it
1187 write_record_to_pipe($chunk_number++, \$header, \$buf, $recstart,
1188 $recend, length $buf);
1191 $Global::no_more_input = 1;
1192 # We need to start no more jobs: At most we need to retry some
1193 # of the already running.
1194 my @running = values %Global::running;
1196 for my $job (@running) {
1197 if(defined $job and $job->virgin()) {
1198 close $job->fh(0,"w");
1201 # Wait for running jobs to be done
1203 while($Global::total_running > 0) {
1204 $sleep = ::reap_usleep($sleep);
1208 $Global::start_no_new_jobs ||= 1;
1209 if($opt::roundrobin) {
1210 # Flush blocks to roundrobin procs
1212 while(%Global::running) {
1213 my $something_written = 0;
1214 for my $job (values %Global::running) {
1215 if($job->block_length()) {
1216 $something_written += $job->non_blocking_write();
1218 close $job->fh(0,"w");
1221 if($something_written) {
1222 $sleep = $sleep/2+0.001;
1224 $sleep = ::reap_usleep($sleep);
1229 sub recstartrecend() {
1234 # $recstart,$recend with default values and regexp conversion
1235 my($recstart,$recend);
1236 if(defined($opt::recstart) and defined($opt::recend)) {
1237 # If both --recstart and --recend is given then both must match
1238 $recstart = $opt::recstart;
1239 $recend = $opt::recend;
1240 } elsif(defined($opt::recstart)) {
1241 # If --recstart is given it must match start of record
1242 $recstart = $opt::recstart;
1244 } elsif(defined($opt::recend)) {
1245 # If --recend is given then it must match end of record
1247 $recend = $opt::recend;
1248 if($opt::regexp and $recend eq '') {
1249 # --regexp --recend ''
1255 # Do not allow /x comments - to avoid having to quote space
1256 $recstart = "(?-x:".$recstart.")";
1257 $recend = "(?-x:".$recend.")";
1258 # If $recstart/$recend contains '|'
1259 # the | should only apply to the regexp
1260 $recstart = "(?:".$recstart.")";
1261 $recend = "(?:".$recend.")";
1263 # $recstart/$recend = printf strings (\n)
1264 $recstart =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee;
1265 $recend =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee;
1267 return ($recstart,$recend);
1271 # See if string is in buffer N times
1273 # the position where the Nth copy is found
1274 my ($buf_ref, $str, $n) = @_;
1277 $i = index64($buf_ref,$str,$i+1);
1278 if($i == -1) { last }
1287 sub round_robin_write($$$$$) {
1289 # $header_ref = ref to $header string
1290 # $block_ref = ref to $block to be written
1291 # $recstart = record start string
1292 # $recend = record end string
1293 # $endpos = end position of $block
1297 # $something_written = amount of bytes written
1298 my ($header_ref,$buffer_ref,$recstart,$recend,$endpos) = @_;
1300 my $block_passed = 0;
1301 while(not $block_passed) {
1302 # Continue flushing existing buffers
1303 # until one is empty and a new block is passed
1305 # Rotate queue once so new blocks get a fair chance
1306 # to be given to another slot
1307 push @robin_queue, shift @robin_queue;
1309 # Make a queue to spread the blocks evenly
1310 push @robin_queue, (sort { $a->seq() <=> $b->seq() }
1311 values %Global::running);
1315 for my $job (@robin_queue) {
1316 if($job->block_length() > 0) {
1317 $written += $job->non_blocking_write();
1319 $job->set_block($header_ref, $buffer_ref,
1320 $endpos, $recstart, $recend);
1322 $written += $job->non_blocking_write();
1327 $sleep = $sleep/1.5+0.001;
1329 # Don't sleep if something is written
1330 } while($written and not $block_passed);
1331 $sleep = ::reap_usleep($sleep);
1338 # Do index on strings > 2GB.
1339 # index in Perl < v5.22 does not work for > 2GB
1341 # as index except STR which must be passed as a reference
1346 my $pos = shift || 0;
1347 my $block_size = 2**31-1;
1348 my $strlen = length($$ref);
1349 # No point in doing extra work if we don't need to.
1350 if($strlen < $block_size or $] > 5.022) {
1351 return index($$ref, $match, $pos);
1354 my $matchlen = length($match);
1357 while($offset < $strlen) {
1359 substr($$ref, $offset, $block_size),
1360 $match, $pos-$offset);
1362 return $ret + $offset;
1364 $offset += ($block_size - $matchlen - 1);
1370 # Do rindex on strings > 2GB.
1371 # rindex in Perl < v5.22 does not work for > 2GB
1373 # as rindex except STR which must be passed as a reference
1379 my $block_size = 2**31-1;
1380 my $strlen = length($$ref);
1381 # Default: search from end
1382 $pos = defined $pos ? $pos : $strlen;
1383 # No point in doing extra work if we don't need to.
1384 if($strlen < $block_size or $] > 5.022) {
1385 return rindex($$ref, $match, $pos);
1388 my $matchlen = length($match);
1390 my $offset = $pos - $block_size + $matchlen;
1392 # The offset is less than a $block_size
1393 # Set the $offset to 0 and
1394 # Adjust block_size accordingly
1395 $block_size = $block_size + $offset;
1398 while($offset >= 0) {
1400 substr($$ref, $offset, $block_size),
1403 return $ret + $offset;
1405 $offset -= ($block_size - $matchlen - 1);
1411 # Do: substr($buf,0,$i) = "";
1412 # Some Perl versions do not support $i > 2GB, so do this in 2GB chunks
1415 # $i = position to shorten to
1417 my ($buf_ref, $i) = @_;
1418 my $two_gb = 2**31-1;
1419 while($i > $two_gb) {
1420 substr($$buf_ref,0,$two_gb) = "";
1423 substr($$buf_ref,0,$i) = "";
1426 sub write_record_to_pipe($$$$$$) {
1428 # Write record from pos 0 .. $endpos to pipe
1430 # $chunk_number = sequence number - to see if already run
1431 # $header_ref = reference to header string to prepend
1432 # $buffer_ref = reference to record to write
1433 # $recstart = start string of record
1434 # $recend = end string of record
1435 # $endpos = position in $buffer_ref where record ends
1437 # $Global::job_already_run
1439 # @Global::virgin_jobs
1441 # Number of chunks written (0 or 1)
1442 my ($chunk_number, $header_ref, $buffer_ref,
1443 $recstart, $recend, $endpos) = @_;
1444 if($endpos == 0) { return 0; }
1445 if(vec($Global::job_already_run,$chunk_number,1)) { return 1; }
1446 if($opt::roundrobin) {
1447 # Write the block to one of the already running jobs
1448 return round_robin_write($header_ref, $buffer_ref,
1449 $recstart, $recend, $endpos);
1451 # If no virgin found, backoff
1452 my $sleep = 0.0001; # 0.01 ms - better performance on highend
1453 while(not @Global::virgin_jobs) {
1454 ::debug("pipe", "No virgin jobs");
1455 $sleep = ::reap_usleep($sleep);
1456 # Jobs may not be started because of loadavg
1457 # or too little time between each ssh login
1458 # or retrying failed jobs.
1461 my $job = shift @Global::virgin_jobs;
1462 $job->set_block($header_ref, $buffer_ref, $endpos, $recstart, $recend);
1463 $job->write_block();
1468 sub __SEM_MODE__() {}
1471 sub acquire_semaphore() {
1472 # Acquires semaphore. If needed: spawns to the background
1476 # The semaphore to be released when jobs is complete
1477 $Global::host{':'} = SSHLogin->new(":");
1478 my $sem = Semaphore->new($Semaphore::name,
1479 $Global::host{':'}->max_jobs_running());
1481 if($Semaphore::fg) {
1487 # If run in the background, the PID will change
1495 sub __PARSE_OPTIONS__() {}
1498 sub options_hash() {
1500 # %hash = the GetOptions config
1502 ("debug|D=s" => \$opt::D,
1503 "xargs" => \$opt::xargs,
1507 "sql=s" => \$opt::retired,
1508 "sqlmaster=s" => \$opt::sqlmaster,
1509 "sqlworker=s" => \$opt::sqlworker,
1510 "sqlandworker=s" => \$opt::sqlandworker,
1511 "joblog|jl=s" => \$opt::joblog,
1512 "results|result|res=s" => \$opt::results,
1513 "resume" => \$opt::resume,
1514 "resume-failed|resumefailed" => \$opt::resume_failed,
1515 "retry-failed|retryfailed" => \$opt::retry_failed,
1516 "silent" => \$opt::silent,
1517 "keep-order|keeporder|k" => \$opt::keeporder,
1518 "no-keep-order|nokeeporder|nok|no-k" => \$opt::nokeeporder,
1519 "group" => \$opt::group,
1520 "g" => \$opt::retired,
1521 "ungroup|u" => \$opt::ungroup,
1522 "linebuffer|linebuffered|line-buffer|line-buffered|lb"
1523 => \$opt::linebuffer,
1524 "tmux" => \$opt::tmux,
1525 "tmuxpane" => \$opt::tmuxpane,
1526 "null|0" => \$opt::null,
1527 "quote|q" => \$opt::quote,
1528 # Replacement strings
1529 "parens=s" => \$opt::parens,
1530 "rpl=s" => \@opt::rpl,
1531 "plus" => \$opt::plus,
1533 "extensionreplace|er=s" => \$opt::U,
1534 "U=s" => \$opt::retired,
1535 "basenamereplace|bnr=s" => \$opt::basenamereplace,
1536 "dirnamereplace|dnr=s" => \$opt::dirnamereplace,
1537 "basenameextensionreplace|bner=s" => \$opt::basenameextensionreplace,
1538 "seqreplace=s" => \$opt::seqreplace,
1539 "slotreplace=s" => \$opt::slotreplace,
1540 "jobs|j=s" => \$opt::jobs,
1541 "delay=s" => \$opt::delay,
1542 "sshdelay=f" => \$opt::sshdelay,
1543 "load=s" => \$opt::load,
1544 "noswap" => \$opt::noswap,
1545 "max-line-length-allowed" => \$opt::max_line_length_allowed,
1546 "number-of-cpus" => \$opt::number_of_cpus,
1547 "number-of-sockets" => \$opt::number_of_sockets,
1548 "number-of-cores" => \$opt::number_of_cores,
1549 "number-of-threads" => \$opt::number_of_threads,
1550 "use-sockets-instead-of-threads"
1551 => \$opt::use_sockets_instead_of_threads,
1552 "use-cores-instead-of-threads"
1553 => \$opt::use_cores_instead_of_threads,
1554 "use-cpus-instead-of-cores" => \$opt::use_cpus_instead_of_cores,
1555 "shellquote|shell_quote|shell-quote" => \@opt::shellquote,
1556 "nice=i" => \$opt::nice,
1557 "tag" => \$opt::tag,
1558 "tagstring|tag-string=s" => \$opt::tagstring,
1559 "ctag" => \$opt::ctag,
1560 "ctagstring|ctag-string=s" => \$opt::ctagstring,
1561 "onall" => \$opt::onall,
1562 "nonall" => \$opt::nonall,
1563 "filter-hosts|filterhosts|filter-host" => \$opt::filter_hosts,
1564 "sshlogin|S=s" => \@opt::sshlogin,
1565 "sshloginfile|slf=s" => \@opt::sshloginfile,
1566 "controlmaster|M" => \$opt::controlmaster,
1567 "ssh=s" => \$opt::ssh,
1568 "transfer-file|transferfile|transfer-files|transferfiles|tf=s"
1569 => \@opt::transfer_files,
1570 "return=s" => \@opt::return,
1571 "trc=s" => \@opt::trc,
1572 "transfer" => \$opt::transfer,
1573 "cleanup" => \$opt::cleanup,
1574 "basefile|bf=s" => \@opt::basefile,
1575 "template|tmpl=s" => \%opt::template,
1576 "B=s" => \$opt::retired,
1577 "ctrlc|ctrl-c" => \$opt::retired,
1578 "noctrlc|no-ctrlc|no-ctrl-c" => \$opt::retired,
1579 "workdir|work-dir|wd=s" => \$opt::workdir,
1580 "W=s" => \$opt::retired,
1581 "rsync-opts|rsyncopts=s" => \$opt::rsync_opts,
1582 "tmpdir|tempdir=s" => \$opt::tmpdir,
1583 "use-compress-program|compress-program=s" => \$opt::compress_program,
1584 "use-decompress-program|decompress-program=s"
1585 => \$opt::decompress_program,
1586 "compress" => \$opt::compress,
1587 "tty" => \$opt::tty,
1588 "T" => \$opt::retired,
1589 "H=i" => \$opt::retired,
1590 "dry-run|dryrun|dr" => \$opt::dryrun,
1591 "progress" => \$opt::progress,
1592 "eta" => \$opt::eta,
1593 "bar" => \$opt::bar,
1594 "shuf" => \$opt::shuf,
1595 "arg-sep|argsep=s" => \$opt::arg_sep,
1596 "arg-file-sep|argfilesep=s" => \$opt::arg_file_sep,
1597 "trim=s" => \$opt::trim,
1598 "env=s" => \@opt::env,
1599 "recordenv|record-env" => \$opt::record_env,
1600 "session" => \$opt::session,
1601 "plain" => \$opt::plain,
1602 "profile|J=s" => \@opt::profile,
1603 "tollef" => \$opt::tollef,
1604 "gnu" => \$opt::gnu,
1605 "link|xapply" => \$opt::link,
1606 "linkinputsource|xapplyinputsource=i" => \@opt::linkinputsource,
1607 # Before changing these lines, please read
1608 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
1609 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
1610 # You accept to be put in a public hall of shame by removing
1612 "bibtex|citation" => \$opt::citation,
1613 "wc|willcite|will-cite|nn|nonotice|no-notice" => \$opt::willcite,
1614 # Termination and retries
1615 "halt-on-error|halt=s" => \$opt::halt,
1616 "limit=s" => \$opt::limit,
1617 "memfree=s" => \$opt::memfree,
1618 "memsuspend=s" => \$opt::memsuspend,
1619 "retries=s" => \$opt::retries,
1620 "timeout=s" => \$opt::timeout,
1621 "termseq|term-seq=s" => \$opt::termseq,
1622 # xargs-compatibility - implemented, man, testsuite
1623 "max-procs|P=s" => \$opt::jobs,
1624 "delimiter|d=s" => \$opt::d,
1625 "max-chars|s=s" => \$opt::max_chars,
1626 "arg-file|a=s" => \@opt::a,
1627 "no-run-if-empty|r" => \$opt::r,
1628 "replace|i:s" => \$opt::i,
1629 "E=s" => \$opt::eof,
1630 "eof|e:s" => \$opt::eof,
1631 "max-args|maxargs|n=s" => \$opt::max_args,
1632 "max-replace-args|N=s" => \$opt::max_replace_args,
1633 "colsep|col-sep|C=s" => \$opt::colsep,
1635 "help|h" => \$opt::help,
1637 "max-lines|l:f" => \$opt::max_lines,
1638 "interactive|p" => \$opt::interactive,
1639 "verbose|t" => \$opt::verbose,
1640 "version|V" => \$opt::version,
1641 "minversion|min-version=i" => \$opt::minversion,
1642 "show-limits|showlimits" => \$opt::show_limits,
1643 "exit|x" => \$opt::x,
1645 "semaphore" => \$opt::semaphore,
1646 "semaphoretimeout|st=i" => \$opt::semaphoretimeout,
1647 "semaphorename|id=s" => \$opt::semaphorename,
1650 "wait" => \$opt::wait,
1651 # Shebang #!/usr/bin/parallel --shebang
1652 "shebang|hashbang" => \$opt::shebang,
1653 "internal-pipe-means-argfiles"
1654 => \$opt::internal_pipe_means_argfiles,
1655 "Y" => \$opt::retired,
1656 "skip-first-line" => \$opt::skip_first_line,
1657 "bug" => \$opt::bug,
1659 "pipe|spreadstdin" => \$opt::pipe,
1660 "robin|round-robin|roundrobin" => \$opt::roundrobin,
1661 "recstart=s" => \$opt::recstart,
1662 "recend=s" => \$opt::recend,
1663 "regexp|regex" => \$opt::regexp,
1664 "remove-rec-sep|removerecsep|rrs" => \$opt::remove_rec_sep,
1665 "files|output-as-files|outputasfiles" => \$opt::files,
1666 "block|block-size|blocksize=s" => \$opt::blocksize,
1667 "blocktimeout|block-timeout|bt=s" => \$opt::blocktimeout,
1668 "header=s" => \$opt::header,
1669 "cat" => \$opt::cat,
1670 "fifo" => \$opt::fifo,
1671 "pipepart|pipe-part" => \$opt::pipepart,
1672 "tee" => \$opt::tee,
1673 "shard=s" => \$opt::shard,
1674 "bin=s" => \$opt::bin,
1675 "groupby|group-by=s" => \$opt::groupby,
1677 "hgrp|hostgrp|hostgroup|hostgroups" => \$opt::hostgroups,
1678 "embed" => \$opt::embed,
1679 "filter=s" => \@opt::filter,
1680 "parset=s" => \$opt::parset,
1684 sub get_options_from_array($@) {
1685 # Run GetOptions on @array
1687 # $array_ref = ref to @ARGV to parse
1688 # @keep_only = Keep only these options
1692 # true if parsing worked
1693 # false if parsing failed
1694 # @$array_ref is changed
1695 my ($array_ref, @keep_only) = @_;
1696 if(not @$array_ref) {
1697 # Empty array: No need to look more at that
1700 # A bit of shuffling of @ARGV needed as GetOptionsFromArray is not
1701 # supported everywhere
1703 my $this_is_ARGV = (\@::ARGV == $array_ref);
1704 if(not $this_is_ARGV) {
1705 @save_argv = @::ARGV;
1706 @::ARGV = @{$array_ref};
1708 # If @keep_only set: Ignore all values except @keep_only
1709 my %options = options_hash();
1712 @keep{@keep_only} = @keep_only;
1713 for my $k (grep { not $keep{$_} } keys %options) {
1714 # Store the value of the option in @dummy
1715 $options{$k} = \@dummy;
1718 my $retval = GetOptions(%options);
1719 if(not $this_is_ARGV) {
1720 @{$array_ref} = @::ARGV;
1721 @::ARGV = @save_argv;
1726 sub parse_parset() {
1727 $Global::progname = "parset";
1728 @Global::parset_vars = split /[ ,]/, $opt::parset;
1729 my $var_or_assoc = shift @Global::parset_vars;
1730 # Legal names: var _v2ar arrayentry[2]
1731 my @illegal = (grep { not /^[a-zA-Z_][a-zA-Z_0-9]*(\[\d+\])?$/ }
1732 @Global::parset_vars);
1735 ("@illegal is an invalid variable name.",
1736 "Variable names must be letter followed by letters or digits.",
1738 " parset varname GNU Parallel options and command");
1741 if($var_or_assoc eq "assoc") {
1742 my $var = shift @Global::parset_vars;
1744 $Global::parset = "assoc";
1745 $Global::parset_endstring=")\n";
1746 } elsif($var_or_assoc eq "var") {
1747 if($#Global::parset_vars > 0) {
1748 $Global::parset = "var";
1750 my $var = shift @Global::parset_vars;
1752 $Global::parset = "array";
1753 $Global::parset_endstring=")\n";
1756 ::die_bug("parset: unknown '$opt::parset'");
1760 sub parse_options(@) {
1763 my @argv_before = @ARGV;
1764 @ARGV = read_options();
1766 # Before changing these line, please read
1767 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
1768 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
1769 # You accept to be added to a public hall of shame by
1770 # removing the lines.
1771 if(defined $opt::citation) {
1772 citation(\@argv_before,\@ARGV);
1776 if($opt::nokeeporder) { $opt::keeporder = undef; }
1778 if(@opt::v) { $Global::verbose = $#opt::v+1; } # Convert -v -v to v=2
1779 if($opt::bug) { ::die_bug("test-bug"); }
1780 $Global::debug = $opt::D;
1781 $Global::shell = $ENV{'PARALLEL_SHELL'} || parent_shell($$)
1782 || $ENV{'SHELL'} || "/bin/sh";
1783 if(not -x $Global::shell and not which($Global::shell)) {
1784 ::error("Shell '$Global::shell' not found.");
1787 ::debug("init","Global::shell $Global::shell\n");
1788 $Global::cshell = $Global::shell =~ m:(/[-a-z]*)?csh:;
1789 if(defined $opt::parset) { parse_parset(); }
1790 if(defined $opt::X) { $Global::ContextReplace = 1; }
1791 if(defined $opt::silent) { $Global::verbose = 0; }
1792 if(defined $opt::null) { $/ = "\0"; }
1793 if(defined $opt::d) { $/ = unquote_printf($opt::d) }
1794 parse_replacement_string_options();
1795 $opt::tag ||= $opt::ctag;
1796 $opt::tagstring ||= $opt::ctagstring;
1797 if(defined $opt::ctag or defined $opt::ctagstring) {
1800 if(defined $opt::tag and not defined $opt::tagstring) {
1802 $opt::tagstring = $Global::parensleft.$Global::parensright;
1804 if(defined $opt::tagstring) {
1805 $opt::tagstring = unquote_printf($opt::tagstring);
1806 if($opt::tagstring =~ /\Q$Global::parensleft\E.*\Q$Global::parensright\E/
1809 # --tagstring contains {= =} and --linebuffer =>
1810 # recompute replacement string for each use (do not cache)
1811 $Global::cache_replacement_eval = 0;
1814 if(defined $opt::interactive) { $Global::interactive = $opt::interactive; }
1815 if(defined $opt::quote) { $Global::quoting = 1; }
1816 if(defined $opt::r) { $Global::ignore_empty = 1; }
1817 if(defined $opt::verbose) { $Global::stderr_verbose = 1; }
1818 if(defined $opt::eof) { $Global::end_of_file_string = $opt::eof; }
1819 if(defined $opt::max_args) {
1820 $opt::max_args = multiply_binary_prefix($opt::max_args);
1821 $Global::max_number_of_args = $opt::max_args;
1823 if(defined $opt::blocktimeout) {
1824 $Global::blocktimeout = int(multiply_time_units($opt::blocktimeout));
1825 if($Global::blocktimeout < 1) {
1826 ::error("--block-timeout must be at least 1");
1830 if(defined $opt::timeout) {
1831 $Global::timeoutq = TimeoutQueue->new($opt::timeout);
1833 if(defined $opt::tmpdir) { $ENV{'TMPDIR'} = $opt::tmpdir; }
1834 $ENV{'PARALLEL_RSYNC_OPTS'} = $opt::rsync_opts ||
1835 $ENV{'PARALLEL_RSYNC_OPTS'} || '-rlDzR';
1836 # Default: Same nice level as GNU Parallel is started at
1837 $opt::nice ||= eval { getpriority(0,0) } || 0;
1838 if(defined $opt::help) { usage(); exit(0); }
1839 if(defined $opt::embed) { embed(); exit(0); }
1840 if(defined $opt::sqlandworker) {
1841 $opt::sqlmaster = $opt::sqlworker = $opt::sqlandworker;
1843 if(defined $opt::tmuxpane) { $opt::tmux = $opt::tmuxpane; }
1844 if(defined $opt::colsep) { $Global::trim = 'lr'; }
1845 if(defined $opt::csv) {
1846 $Global::use{"Text::CSV"} ||= eval "use Text::CSV; 1;";
1847 $opt::colsep = defined $opt::colsep ? $opt::colsep : ",";
1848 my $csv_setting = { binary => 1, sep_char => $opt::colsep };
1849 my $sep = $csv_setting->{sep_char};
1850 $Global::csv = Text::CSV->new($csv_setting)
1851 or die "Cannot use CSV: ".Text::CSV->error_diag ();
1853 if(defined $opt::header) {
1854 $opt::colsep = defined $opt::colsep ? $opt::colsep : "\t";
1856 if(defined $opt::trim) { $Global::trim = $opt::trim; }
1857 if(defined $opt::arg_sep) { $Global::arg_sep = $opt::arg_sep; }
1858 if(defined $opt::arg_file_sep) {
1859 $Global::arg_file_sep = $opt::arg_file_sep;
1861 if(defined $opt::number_of_sockets) {
1862 print SSHLogin::no_of_sockets(),"\n"; wait_and_exit(0);
1864 if(defined $opt::number_of_cpus) {
1865 print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0);
1867 if(defined $opt::number_of_cores) {
1868 print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0);
1870 if(defined $opt::number_of_threads) {
1871 print SSHLogin::no_of_threads(),"\n"; wait_and_exit(0);
1873 if(defined $opt::max_line_length_allowed) {
1874 print Limits::Command::real_max_length(),"\n"; wait_and_exit(0);
1876 if(defined $opt::max_chars) {
1877 $opt::max_chars = multiply_binary_prefix($opt::max_chars);
1879 if(defined $opt::version) { version(); wait_and_exit(0); }
1880 if(defined $opt::record_env) { record_env(); wait_and_exit(0); }
1881 if(defined $opt::show_limits) { show_limits(); }
1882 if(@opt::sshlogin) { @Global::sshlogin = @opt::sshlogin; }
1883 if(@opt::sshloginfile) { read_sshloginfiles(@opt::sshloginfile); }
1884 if(@opt::return) { push @Global::ret_files, @opt::return; }
1885 if($opt::transfer) {
1886 push @Global::transfer_files, $opt::i || $opt::I || "{}";
1888 push @Global::transfer_files, @opt::transfer_files;
1889 if(%opt::template) {
1890 while (my ($source, $template_name) = each %opt::template) {
1891 if(open(my $tmpl, "<", $source)) {
1892 local $/; # $/ = undef => slurp whole file
1893 my $content = <$tmpl>;
1894 push @Global::template_names, $template_name;
1895 push @Global::template_contents, $content;
1896 ::debug("tmpl","Name: $template_name\n$content\n");
1898 ::error("Cannot open '$source'.");
1903 if(not defined $opt::recstart and
1904 not defined $opt::recend) { $opt::recend = "\n"; }
1905 $Global::blocksize = multiply_binary_prefix($opt::blocksize || "1M");
1906 if($Global::blocksize > 2**31-1 and not $opt::pipepart) {
1907 warning("--blocksize >= 2G causes problems. Using 2G-1.");
1908 $Global::blocksize = 2**31-1;
1910 if($^O eq "cygwin" and
1911 ($opt::pipe or $opt::pipepart or $opt::roundrobin)
1912 and $Global::blocksize > 65535) {
1913 warning("--blocksize >= 64K causes problems on Cygwin.");
1915 $opt::memfree = multiply_binary_prefix($opt::memfree);
1916 $opt::memsuspend = multiply_binary_prefix($opt::memsuspend);
1917 $Global::memlimit = $opt::memsuspend + $opt::memfree;
1918 check_invalid_option_combinations();
1919 if((defined $opt::fifo or defined $opt::cat)
1920 and not $opt::pipepart) {
1923 if(defined $opt::minversion) {
1924 print $Global::version,"\n";
1925 if($Global::version < $opt::minversion) {
1931 if(not defined $opt::delay) {
1932 # Set --delay to --sshdelay if not set
1933 $opt::delay = $opt::sshdelay;
1935 $Global::sshdelayauto = $opt::sshdelay =~ s/auto$//;
1936 $opt::sshdelay = multiply_time_units($opt::sshdelay);
1937 $Global::delayauto = $opt::delay =~ s/auto$//;
1938 $opt::delay = multiply_time_units($opt::delay);
1939 if($opt::compress_program) {
1941 $opt::decompress_program ||= $opt::compress_program." -dc";
1944 if(defined $opt::results) {
1945 # Is the output a dir or CSV-file?
1946 if($opt::results =~ /\.csv$/i) {
1947 # CSV with , as separator
1948 $Global::csvsep = ",";
1949 $Global::membuffer ||= 1;
1950 } elsif($opt::results =~ /\.tsv$/i) {
1951 # CSV with TAB as separator
1952 $Global::csvsep = "\t";
1953 $Global::membuffer ||= 1;
1954 } elsif($opt::results =~ /\.json$/i) {
1956 $Global::jsonout ||= 1;
1957 $Global::membuffer ||= 1;
1960 if($opt::compress) {
1961 my ($compress, $decompress) = find_compression_program();
1962 $opt::compress_program ||= $compress;
1963 $opt::decompress_program ||= $decompress;
1964 if(($opt::results and not $Global::csvsep) or $opt::files) {
1965 # No need for decompressing
1966 $opt::decompress_program = "cat >/dev/null";
1969 if(defined $opt::dryrun) {
1970 # Force grouping due to bug #51039: --dry-run --timeout 3600 -u breaks
1974 if(defined $opt::nonall) {
1975 # Append a dummy empty argument if there are no arguments
1976 # on the command line to avoid reading from STDIN.
1977 # arg_sep = random 50 char
1978 # \0noarg => nothing (not the empty string)
1979 $Global::arg_sep = join "",
1980 map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..50);
1981 push @ARGV, $Global::arg_sep, "\0noarg";
1983 if(defined $opt::tee) {
1984 if(not defined $opt::jobs) {
1988 if(defined $opt::tty) {
1989 # Defaults for --tty: -j1 -u
1990 # Can be overridden with -jXXX -g
1991 if(not defined $opt::jobs) {
1994 if(not defined $opt::group) {
1999 push @Global::ret_files, @opt::trc;
2000 if(not @Global::transfer_files) {
2001 # Defaults to --transferfile {}
2002 push @Global::transfer_files, $opt::i || $opt::I || "{}";
2006 if(defined $opt::max_lines) {
2007 if($opt::max_lines eq "-0") {
2008 # -l -0 (swallowed -0)
2009 $opt::max_lines = 1;
2013 $opt::max_lines = multiply_binary_prefix($opt::max_lines);
2014 if ($opt::max_lines == 0) {
2015 # If not given (or if 0 is given) => 1
2016 $opt::max_lines = 1;
2020 $Global::max_lines = $opt::max_lines;
2021 if(not $opt::pipe) {
2022 # --pipe -L means length of record - not max_number_of_args
2023 $Global::max_number_of_args ||= $Global::max_lines;
2027 # Read more than one arg at a time (-L, -N)
2028 if(defined $opt::L) {
2029 $opt::L = multiply_binary_prefix($opt::L);
2030 $Global::max_lines = $opt::L;
2031 if(not $opt::pipe) {
2032 # --pipe -L means length of record - not max_number_of_args
2033 $Global::max_number_of_args ||= $Global::max_lines;
2036 if(defined $opt::max_replace_args) {
2037 $opt::max_replace_args = multiply_binary_prefix($opt::max_replace_args);
2038 $Global::max_number_of_args = $opt::max_replace_args;
2039 $Global::ContextReplace = 1;
2041 if((defined $opt::L or defined $opt::max_replace_args)
2043 not ($opt::xargs or $opt::m)) {
2044 $Global::ContextReplace = 1;
2046 if(grep /^$Global::arg_sep\+?$|^$Global::arg_file_sep\+?$/o, @ARGV) {
2047 # Deal with ::: :::+ :::: and ::::+
2048 @ARGV = read_args_from_command_line();
2052 if(defined $opt::eta) { $opt::progress = $opt::eta; }
2053 if(defined $opt::bar) { $opt::progress = $opt::bar; }
2055 # Funding a free software project is hard. GNU Parallel is no
2056 # exception. On top of that it seems the less visible a project
2057 # is, the harder it is to get funding. And the nature of GNU
2058 # Parallel is that it will never be seen by "the guy with the
2059 # checkbook", but only by the people doing the actual work.
2061 # This problem has been covered by others - though no solution has
2063 # https://www.slideshare.net/NadiaEghbal/consider-the-maintainer
2064 # https://blog.licensezero.com/2019/08/24/process-of-elimination.html
2065 # https://www.numfocus.org/blog/why-is-numpy-only-now-getting-funded/
2067 # The FAQ tells you why the citation notice exists:
2068 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
2070 # If you want GNU Parallel to be maintained in the future, and not
2071 # just wither away like so many other free software tools, you
2072 # need to help finance the development.
2074 # The citation notice is a simple way of doing so, as citations
2075 # makes it possible to me to get a job where I can maintain GNU
2076 # Parallel as part of the job.
2078 # This means you can help financing development
2080 # WITHOUT PAYING A SINGLE CENT!
2082 # Before implementing the citation notice it was discussed with
2084 # https://lists.gnu.org/archive/html/parallel/2013-11/msg00006.html
2086 # Having to spend 10 seconds on running 'parallel --citation' once
2087 # is no doubt not an ideal solution, but no one has so far come up
2088 # with an ideal solution - neither for funding GNU Parallel nor
2089 # other free software.
2091 # If you believe you have the perfect solution, you should try it
2092 # out, and if it works, you should post it on the email
2093 # list. Ideas that will cost work and which have not been tested
2094 # are, however, unlikely to be prioritized.
2096 # Please note that GPL version 3 gives you the right to fork GNU
2097 # Parallel under a new name, but it does not give you the right to
2098 # distribute modified copies with the citation notice disabled in
2099 # a way where the software can be confused with GNU Parallel. To
2100 # do that you need to be the owner of the GNU Parallel
2101 # trademark. The xt:Commerce case shows this.
2103 # Description of the xt:Commerce case in OLG Duesseldorf
2104 # http://www.inta.org/INTABulletin/Pages/GERMANYGeneralPublicLicenseDoesNotPermitUseofThird-PartyTrademarksforAdvertisingModifiedVersionsofOpen-SourceSoftware.aspx
2105 # https://web.archive.org/web/20180715073746/http://www.inta.org/INTABulletin/Pages/GERMANYGeneralPublicLicenseDoesNotPermitUseofThird-PartyTrademarksforAdvertisingModifiedVersionsofOpen-SourceSoftware.aspx
2107 # The verdict in German
2108 # https://www.admody.com/urteilsdatenbank/cafe6fdaeed3/OLG-Duesseldorf_Urteil_vom_28-September-2010_Az_I-20-U-41-09
2109 # https://web.archive.org/web/20180715073717/https://www.admody.com/urteilsdatenbank/cafe6fdaeed3/OLG-Duesseldorf_Urteil_vom_28-September-2010_Az_I-20-U-41-09
2111 # Other free software limiting derivates by the same name:
2112 # https://en.wikipedia.org/wiki/Red_Hat_Enterprise_Linux_derivatives#Legal_aspects
2113 # https://tm.joomla.org/trademark-faq.html
2114 # https://www.mozilla.org/en-US/foundation/trademarks/faq/
2116 # Running 'parallel --citation' one single time takes less than 10
2117 # seconds, and will silence the citation notice for future
2118 # runs. If that is too much trouble for you, why not use one of
2119 # the alternatives instead?
2120 # See a list in: 'man parallel_alternatives'
2122 # If you want GNU Parallel to be maintained in the future you
2123 # should keep this line.
2125 # _YOU_ will be harming free software by removing the notice. You
2126 # accept to be added to a public hall of shame by removing the
2127 # line. This is because _YOU_ actively make it harder to justify
2128 # spending time developing GNU Parallel.
2132 if($ENV{'PARALLEL_ENV'}) {
2133 # Read environment and set $Global::parallel_env
2134 # Must be done before is_acceptable_command_line_length()
2135 my $penv = $ENV{'PARALLEL_ENV'};
2136 # unset $PARALLEL_ENV: It should not be given to children
2137 # because it takes up a lot of env space
2138 delete $ENV{'PARALLEL_ENV'};
2140 # This is a file/fifo: Replace envvar with content of file
2141 open(my $parallel_env, "<", $penv) ||
2142 ::die_bug("Cannot read parallel_env from $penv");
2143 local $/; # Put <> in slurp mode
2144 $penv = <$parallel_env>;
2145 close $parallel_env;
2147 # Map \001 to \n to make it easer to quote \n in $PARALLEL_ENV
2148 $penv =~ s/\001/\n/g;
2150 ::warning('\0 (NUL) in environment is not supported');
2152 $Global::parallel_env = $penv;
2157 if(remote_hosts() and ($opt::X or $opt::m or $opt::xargs)) {
2158 # As we do not know the max line length on the remote machine
2159 # long commands generated by xargs may fail
2160 # If $opt::max_replace_args is set, it is probably safe
2161 ::warning("Using -X or -m with --sshlogin may fail.");
2164 if(not defined $opt::jobs) { $opt::jobs = "100%"; }
2167 if($opt::sqlmaster or $opt::sqlworker) {
2168 $Global::sql = SQL->new($opt::sqlmaster || $opt::sqlworker);
2170 if($opt::sqlworker) { $Global::membuffer ||= 1; }
2171 # The sqlmaster groups the arguments, so the should just read one
2172 if($opt::sqlworker and not $opt::sqlmaster) { $Global::max_number_of_args = 1; }
2175 sub check_invalid_option_combinations() {
2176 if(defined $opt::timeout and
2177 $opt::timeout !~ /^\d+(\.\d+)?%?$|^(\d+(\.\d+)?[dhms])+$/i) {
2178 ::error("--timeout must be seconds or percentage.");
2181 if(defined $opt::fifo and defined $opt::cat) {
2182 ::error("--fifo cannot be combined with --cat.");
2183 ::wait_and_exit(255);
2185 if(defined $opt::retries and defined $opt::roundrobin) {
2186 ::error("--retries cannot be combined with --roundrobin.");
2187 ::wait_and_exit(255);
2189 if(defined $opt::pipepart and
2190 (defined $opt::L or defined $opt::max_lines
2191 or defined $opt::max_replace_args)) {
2192 ::error("--pipepart is incompatible with --max-replace-args, ".
2193 "--max-lines, and -L.");
2196 if(defined $opt::group and $opt::ungroup) {
2197 ::error("--group cannot be combined with --ungroup.");
2198 ::wait_and_exit(255);
2200 if(defined $opt::group and $opt::linebuffer) {
2201 ::error("--group cannot be combined with --line-buffer.");
2202 ::wait_and_exit(255);
2204 if(defined $opt::ungroup and $opt::linebuffer) {
2205 ::error("--ungroup cannot be combined with --line-buffer.");
2206 ::wait_and_exit(255);
2208 if(defined $opt::tollef and not $opt::gnu) {
2209 ::error("--tollef has been retired.",
2210 "Remove --tollef or use --gnu to override --tollef.");
2211 ::wait_and_exit(255);
2213 if(defined $opt::retired) {
2214 ::error("-g has been retired. Use --group.",
2215 "-B has been retired. Use --bf.",
2216 "-T has been retired. Use --tty.",
2217 "-U has been retired. Use --er.",
2218 "-W has been retired. Use --wd.",
2219 "-Y has been retired. Use --shebang.",
2220 "-H has been retired. Use --halt.",
2221 "--sql has been retired. Use --sqlmaster.",
2222 "--ctrlc has been retired.",
2223 "--noctrlc has been retired.");
2224 ::wait_and_exit(255);
2227 if(not $opt::pipe and not $opt::pipepart) {
2230 if($opt::remove_rec_sep) {
2231 ::error("--remove-rec-sep is not compatible with --groupby");
2232 ::wait_and_exit(255);
2234 if($opt::recstart) {
2235 ::error("--recstart is not compatible with --groupby");
2236 ::wait_and_exit(255);
2238 if($opt::recend ne "\n") {
2239 ::error("--recend is not compatible with --groupby");
2240 ::wait_and_exit(255);
2245 sub init_globals() {
2247 $Global::version = 20210922;
2248 $Global::progname = 'parallel';
2249 $::name = "GNU Parallel";
2250 $Global::infinity = 2**31;
2252 $Global::verbose = 0;
2253 # Don't quote every part of the command line
2254 $Global::quoting = 0;
2255 # Quote replacement strings
2256 $Global::quote_replace = 1;
2257 $Global::total_completed = 0;
2258 $Global::cache_replacement_eval = 1;
2259 # Read only table with default --rpl values
2263 '{#}' => '1 $_=$job->seq()',
2264 '{%}' => '1 $_=$job->slot()',
2267 ('$Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; '.
2268 '$_ = dirname($_);'),
2269 '{/.}' => 's:.*/::; s:\.[^/.]+$::;',
2270 '{.}' => 's:\.[^/.]+$::',
2275 # = {.}.{+.} = {+/}/{/.}.{+.}
2276 # = {..}.{+..} = {+/}/{/..}.{+..}
2277 # = {...}.{+...} = {+/}/{/...}.{+...}
2278 '{+/}' => 's:/[^/]*$::',
2279 '{+.}' => 's:.*\.::',
2280 '{+..}' => 's:.*\.([^.]*\.):$1:',
2281 '{+...}' => 's:.*\.([^.]*\.[^.]*\.):$1:',
2282 '{..}' => 's:\.[^/.]+$::; s:\.[^/.]+$::',
2283 '{...}' => 's:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::',
2284 '{/..}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::',
2285 '{/...}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::',
2286 # n choose k = Binomial coefficient
2287 '{choose_k}' => 'for $t (2..$#arg){ if($arg[$t-1] ge $arg[$t]) { skip() } }',
2288 # {##} = number of jobs
2289 '{##}' => '1 $_=total_jobs()',
2290 # {0%} = 0-padded jobslot
2291 '{0%}' => '1 $f=1+int((log($Global::max_jobs_running||1)/log(10))); $_=sprintf("%0${f}d",slot())',
2292 # {0%} = 0-padded seq
2293 '{0#}' => '1 $f=1+int((log(total_jobs())/log(10))); $_=sprintf("%0${f}d",seq())',
2295 ## Bash inspired replacement strings
2297 '{:-([^}]+?)}' => '$_ ||= $$1',
2299 '{:(\d+?)}' => 'substr($_,0,$$1) = ""',
2301 '{:(\d+?):(\d+?)}' => '$_ = substr($_,$$1,$$2);',
2302 # echo {#z.*z.} ::: z.z.z.foo => z.foo
2303 # echo {##z.*z.} ::: z.z.z.foo => foo
2305 '{#([^#}][^}]*?)}' =>
2306 '$nongreedy=::make_regexp_ungreedy($$1);s/^$nongreedy(.*)/$1/;',
2308 '{##([^#}][^}]*?)}' => 's/^$$1//;',
2309 # echo {%.z.*z} ::: foo.z.z.z => foo.z
2310 # echo {%%.z.*z} ::: foo.z.z.z => foo
2313 '$nongreedy=::make_regexp_ungreedy($$1);s/(.*)$nongreedy$/$1/;',
2315 '{%%([^}]+?)}' => 's/$$1$//;',
2316 # Bash ${a/def/ghi} ${a/def/}
2317 '{/([^}]+?)/([^}]*?)}' => 's/$$1/$$2/;',
2319 '{^([^}]+?)}' => 's/^($$1)/uc($1)/e;',
2321 '{^^([^}]+?)}' => 's/($$1)/uc($1)/eg;',
2323 '{,([^}]+?)}' => 's/^($$1)/lc($1)/e;',
2325 '{,,([^}]+?)}' => 's/($$1)/lc($1)/eg;',
2327 # {slot} = $PARALLEL_JOBSLOT
2328 '{slot}' => '1 $_="\${PARALLEL_JOBSLOT}";uq()',
2330 '{host}' => '1 $_="\${PARALLEL_SSHHOST}";uq()',
2331 # {sshlogin} = sshlogin
2332 '{sshlogin}' => '1 $_="\${PARALLEL_SSHLOGIN}";uq()',
2333 # {hgrp} = hostgroups of the host
2334 '{hgrp}' => '1 $_="\${PARALLEL_HOSTGROUPS}";uq()',
2335 # {agrp} = hostgroups of the argument
2336 '{agrp}' => '1 $_="\${PARALLEL_ARGHOSTGROUPS}";uq()',
2338 # Modifiable copy of %Global::replace
2339 %Global::rpl = %Global::replace;
2341 $Global::ignore_empty = 0;
2342 $Global::interactive = 0;
2343 $Global::stderr_verbose = 0;
2344 $Global::default_simultaneous_sshlogins = 9;
2345 $Global::exitstatus = 0;
2346 $Global::arg_sep = ":::";
2347 $Global::arg_file_sep = "::::";
2348 $Global::trim = 'n';
2349 $Global::max_jobs_running = 0;
2350 $Global::job_already_run = '';
2351 $ENV{'TMPDIR'} ||= "/tmp";
2352 $ENV{'OLDPWD'} = $ENV{'PWD'};
2353 if(not $ENV{HOME}) {
2354 # $ENV{HOME} is sometimes not set if called from PHP
2355 ::warning("\$HOME not set. Using /tmp.");
2356 $ENV{HOME} = "/tmp";
2358 # no warnings to allow for undefined $XDG_*
2359 no warnings 'uninitialized';
2360 # $xdg_config_home is needed to make env_parallel.fish stop complaining
2361 my $xdg_config_home = $ENV{'XDG_CONFIG_HOME'};
2362 # config_dirs = $PARALLEL_HOME, $XDG_CONFIG_HOME/parallel,
2363 # $(each XDG_CONFIG_DIRS)/parallel, $HOME/.parallel
2364 # Keep only dirs that exist
2365 @Global::config_dirs =
2367 $ENV{'PARALLEL_HOME'},
2368 (map { "$_/parallel" }
2370 split /:/, $ENV{'XDG_CONFIG_DIRS'}),
2371 $ENV{'HOME'} . "/.parallel");
2372 # Use first dir as config dir
2373 $Global::config_dir = $Global::config_dirs[0] ||
2374 $ENV{'HOME'} . "/.parallel";
2375 if($ENV{'PARALLEL_HOME'} =~ /./ and not -d $ENV{'PARALLEL_HOME'}) {
2376 ::warning("\$PARALLEL_HOME ($ENV{'PARALLEL_HOME'}) does not exist.");
2377 ::warning("Using $Global::config_dir");
2379 # cache_dirs = $PARALLEL_HOME, $XDG_CACHE_HOME/parallel,
2380 # Keep only dirs that exist
2381 @Global::cache_dirs =
2383 $ENV{'PARALLEL_HOME'}, $ENV{'XDG_CACHE_HOME'}."/parallel");
2384 $Global::cache_dir = $Global::cache_dirs[0] ||
2385 $ENV{'HOME'} . "/.parallel";
2389 # $opt::halt flavours
2392 # $Global::halt_when
2393 # $Global::halt_fail
2394 # $Global::halt_success
2396 # $Global::halt_count
2397 if(defined $opt::halt) {
2398 my %halt_expansion = (
2400 "1" => "soon,fail=1",
2401 "2" => "now,fail=1",
2402 "-1" => "soon,success=1",
2403 "-2" => "now,success=1",
2405 # Expand -2,-1,0,1,2 into long form
2406 $opt::halt = $halt_expansion{$opt::halt} || $opt::halt;
2407 # --halt 5% == --halt soon,fail=5%
2408 $opt::halt =~ s/^(\d+)%$/soon,fail=$1%/;
2409 # Split: soon,fail=5%
2410 my ($when,$fail_success,$pct_count) = split /[,=]/, $opt::halt;
2411 if(not grep { $when eq $_ } qw(never soon now)) {
2412 ::error
("--halt must have 'never', 'soon', or 'now'.");
2413 ::wait_and_exit
(255);
2415 $Global::halt_when
= $when;
2416 if($when ne "never") {
2417 if($fail_success eq "fail") {
2418 $Global::halt_fail
= 1;
2419 } elsif($fail_success eq "success") {
2420 $Global::halt_success
= 1;
2421 } elsif($fail_success eq "done") {
2422 $Global::halt_done
= 1;
2424 ::error
("--halt $when must be followed by ,success or ,fail.");
2425 ::wait_and_exit
(255);
2427 if($pct_count =~ /^(\d+)%$/) {
2428 $Global::halt_pct
= $1/100;
2429 } elsif($pct_count =~ /^(\d+)$/) {
2430 $Global::halt_count
= $1;
2432 ::error
("--halt $when,$fail_success ".
2433 "must be followed by ,number or ,percent%.");
2434 ::wait_and_exit
(255);
2440 sub parse_replacement_string_options
() {
2444 # $Global::parensleft
2445 # $Global::parensright
2447 # $Global::parensleft
2448 # $Global::parensright
2454 # $opt::basenamereplace
2455 # $opt::dirnamereplace
2458 # $opt::basenameextensionreplace
2461 # Modify %Global::rpl
2462 # Replace $old with $new
2463 my ($old,$new) = @_;
2465 $Global::rpl
{$new} = $Global::rpl
{$old};
2466 delete $Global::rpl
{$old};
2469 my $parens = "{==}";
2470 if(defined $opt::parens
) { $parens = $opt::parens
; }
2471 my $parenslen = 0.5*length $parens;
2472 $Global::parensleft
= substr($parens,0,$parenslen);
2473 $Global::parensright
= substr($parens,$parenslen);
2474 if(defined $opt::plus
) { %Global::rpl
= (%Global::plus
,%Global::rpl
); }
2475 if(defined $opt::I
) { rpl
('{}',$opt::I
); }
2476 if(defined $opt::i
and $opt::i
) { rpl
('{}',$opt::i
); }
2477 if(defined $opt::U
) { rpl
('{.}',$opt::U
); }
2478 if(defined $opt::basenamereplace
) { rpl
('{/}',$opt::basenamereplace
); }
2479 if(defined $opt::dirnamereplace
) { rpl
('{//}',$opt::dirnamereplace
); }
2480 if(defined $opt::seqreplace
) { rpl
('{#}',$opt::seqreplace
); }
2481 if(defined $opt::slotreplace
) { rpl
('{%}',$opt::slotreplace
); }
2482 if(defined $opt::basenameextensionreplace
) {
2483 rpl
('{/.}',$opt::basenameextensionreplace
);
2486 # Create $Global::rpl entries for --rpl options
2487 # E.g: "{..} s:\.[^.]+$:;s:\.[^.]+$:;"
2488 my ($shorthand,$long) = split/\s/,$_,2;
2489 $Global::rpl
{$shorthand} = $long;
2493 sub parse_semaphore
() {
2494 # Semaphore defaults
2495 # Must be done before computing number of processes and max_line_length
2496 # because when running as a semaphore GNU Parallel does not read args
2499 # $Global::semaphore
2500 # $opt::semaphoretimeout
2501 # $Semaphore::timeout
2502 # $opt::semaphorename
2510 # @Global::unget_argv
2511 # $Global::default_simultaneous_sshlogins
2513 # $Global::interactive
2514 $Global::semaphore
||= ($0 =~ m
:(^|/)sem
$:); # called as 'sem'
2515 if(defined $opt::semaphore
) { $Global::semaphore
= 1; }
2516 if(defined $opt::semaphoretimeout
) { $Global::semaphore
= 1; }
2517 if(defined $opt::semaphorename
) { $Global::semaphore
= 1; }
2518 if(defined $opt::fg
and not $opt::tmux
and not $opt::tmuxpane
) {
2519 $Global::semaphore
= 1;
2521 if(defined $opt::bg
) { $Global::semaphore
= 1; }
2522 if(defined $opt::wait and not $opt::sqlmaster
) {
2523 $Global::semaphore
= 1; @ARGV = "true";
2525 if($Global::semaphore
) {
2527 # A semaphore does not take input from neither stdin nor file
2528 ::error
("A semaphore does not take input from neither stdin nor a file\n");
2529 ::wait_and_exit
(255);
2531 @opt::a
= ("/dev/null");
2532 # Append a dummy empty argument
2533 # \0 => nothing (not the empty string)
2534 push(@Global::unget_argv
, [Arg
->new("\0noarg")]);
2535 $Semaphore::timeout
= $opt::semaphoretimeout
|| 0;
2536 if(defined $opt::semaphorename
) {
2537 $Semaphore::name
= $opt::semaphorename
;
2540 $Semaphore::name
= `tty`;
2541 chomp $Semaphore::name
;
2543 $Semaphore::fg
= $opt::fg
;
2544 $Semaphore::wait = $opt::wait;
2545 $Global::default_simultaneous_sshlogins
= 1;
2546 if(not defined $opt::jobs
) {
2549 if($Global::interactive
and $opt::bg
) {
2550 ::error
("Jobs running in the ".
2551 "background cannot be interactive.");
2552 ::wait_and_exit
(255);
2558 # Record current %ENV-keys in $PARALLEL_HOME/ignored_vars
2560 my $ignore_filename = $Global::config_dir
. "/ignored_vars";
2561 if(open(my $vars_fh, ">", $ignore_filename)) {
2562 print $vars_fh map { $_,"\n" } keys %ENV;
2564 ::error
("Cannot write to $ignore_filename.");
2565 ::wait_and_exit
(255);
2570 # Open joblog as specified by --joblog
2573 # $opt::resume_failed
2576 # $Global::job_already_run
2579 if(($opt::resume
or $opt::resume_failed
)
2581 not ($opt::joblog
or $opt::results
)) {
2582 ::error
("--resume and --resume-failed require --joblog or --results.");
2583 ::wait_and_exit
(255);
2585 if(defined $opt::joblog
and $opt::joblog
=~ s/^\+//) {
2586 # --joblog +filename = append to filename
2593 not $opt::sqlworker
)) {
2594 # Do not log if --sqlworker
2595 if($opt::resume
|| $opt::resume_failed
|| $opt::retry_failed
) {
2596 if(open(my $joblog_fh, "<", $opt::joblog
)) {
2598 # Override $/ with \n because -d might be set
2600 # If there is a header: Open as append later
2601 $append = <$joblog_fh>;
2603 if($opt::retry_failed
) {
2604 # Make a regexp that only matches commands with exit+signal=0
2605 # 4 host 1360490623.067 3.445 1023 1222 0 0 command
2606 $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t';
2608 while(<$joblog_fh>) {
2609 if(/$joblog_regexp/o) {
2610 # This is 30% faster than set_job_already_run($1);
2611 vec($Global::job_already_run
,($1||0),1) = 1;
2612 $Global::total_completed
++;
2613 $group[$1-1] = "true";
2614 } elsif(/(\d+)\s+\S+(\s+[-0-9.]+){6}\s+(.*)$/) {
2615 # Grab out the command
2619 ::error
("Format of '$opt::joblog' is wrong: $_");
2620 ::wait_and_exit
(255);
2624 my ($outfh,$name) = ::tmpfile
(SUFFIX
=> ".arg");
2626 # Put args into argfile
2627 if(grep /\0/, @group) {
2628 # force --null to deal with \n in commandlines
2629 ::warning
("Command lines contain newline. Forcing --null.");
2633 # Replace \0 with '\n' as used in print_joblog()
2634 print $outfh map { s/\0/\n/g; $_,$/ } map { $_ } @group;
2636 exit_if_disk_full
();
2637 # Set filehandle to -a
2640 # Remove $command (so -a is run)
2643 if($opt::resume
|| $opt::resume_failed
) {
2644 if($opt::resume_failed
) {
2645 # Make a regexp that only matches commands with exit+signal=0
2646 # 4 host 1360490623.067 3.445 1023 1222 0 0 command
2647 $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t';
2649 # Just match the job number
2650 $joblog_regexp='^(\d+)';
2652 while(<$joblog_fh>) {
2653 if(/$joblog_regexp/o) {
2654 # This is 30% faster than set_job_already_run($1);
2655 vec($Global::job_already_run
,($1||0),1) = 1;
2656 $Global::total_completed
++;
2657 } elsif(not /\d+\s+[^\s]+\s+([-0-9.]+\s+){6}/) {
2658 ::error
("Format of '$opt::joblog' is wrong: $_");
2659 ::wait_and_exit
(255);
2665 # $opt::null may be set if the commands contain \n
2666 if($opt::null
) { $/ = "\0"; }
2669 # Do not write to joblog in a dry-run
2670 if(not open($Global::joblog
, ">", "/dev/null")) {
2671 ::error
("Cannot write to --joblog $opt::joblog.");
2672 ::wait_and_exit
(255);
2676 if(not open($Global::joblog
, ">>", $opt::joblog
)) {
2677 ::error
("Cannot append to --joblog $opt::joblog.");
2678 ::wait_and_exit
(255);
2681 if($opt::joblog
eq "-") {
2682 # Use STDOUT as joblog
2683 $Global::joblog
= $Global::fh
{1};
2684 } elsif(not open($Global::joblog
, ">", $opt::joblog
)) {
2685 # Overwrite the joblog
2686 ::error
("Cannot write to --joblog $opt::joblog.");
2687 ::wait_and_exit
(255);
2689 print $Global::joblog
2690 join("\t", "Seq", "Host", "Starttime", "JobRuntime",
2691 "Send", "Receive", "Exitval", "Signal", "Command"
2697 sub open_json_csv
() {
2699 # Output as JSON/CSV/TSV
2700 if($opt::results
eq "-.csv"
2702 $opt::results
eq "-.tsv"
2704 $opt::results
eq "-.json") {
2705 # Output as JSON/CSV/TSV on stdout
2706 open $Global::csv_fh
, ">&", "STDOUT" or
2707 ::die_bug
("Can't dup STDOUT in csv: $!");
2708 # Do not print any other output to STDOUT
2709 # by forcing all other output to /dev/null
2710 open my $fd, ">", "/dev/null" or
2711 ::die_bug
("Can't >/dev/null in csv: $!");
2712 $Global::fh
{1} = $fd;
2713 $Global::fh
{2} = $fd;
2714 } elsif($Global::csvsep
) {
2715 if(not open($Global::csv_fh
,">",$opt::results
)) {
2716 ::error
("Cannot open results file `$opt::results': ".
2724 sub find_compression_program
() {
2725 # Find a fast compression program
2727 # $compress_program = compress program with options
2728 # $decompress_program = decompress program with options
2730 # Search for these. Sorted by speed on 128 core
2732 # seq 120000000|shuf > 1gb &
2734 # apt install make g++ htop
2735 # wget -O - pi.dk/3 | bash
2736 # apt install zstd clzip liblz4-tool lzop pigz pxz gzip plzip pbzip2 lzma xz-utils lzip bzip2 lbzip2 lrzip pixz
2737 # git clone https://github.com/facebook/zstd.git
2738 # (cd zstd/contrib/pzstd; make -j; cp pzstd /usr/local/bin)
2739 # echo 'lrzip -L $((-$1))' >/usr/local/bin/lrz
2740 # chmod +x /usr/local/bin/lrz
2742 # onethread="zstd clzip lz4 lzop gzip lzma xz bzip2"
2743 # multithread="pzstd pigz pxz plzip pbzip2 lzip lbzip2 lrz pixz"
2744 # parallel --shuf -j1 --joblog jl-m --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $multithread
2745 # parallel --shuf -j50% --delay 1 --joblog jl-s --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $onethread
2749 # 2-cores: pzstd zstd lz4 lzop pigz gzip lbzip2 pbzip2 lrz bzip2 lzma pxz plzip xz lzip clzip
2751 # 8-cores: pzstd lz4 zstd pigz lzop lbzip2 pbzip2 gzip lzip lrz plzip pxz bzip2 lzma xz clzip
2752 # 16-cores: pzstd lz4 pigz lzop lbzip2 pbzip2 plzip lzip lrz pxz gzip lzma xz bzip2
2753 # 32-cores: pzstd lbzip2 pbzip2 zstd pigz lz4 lzop plzip lzip lrz gzip pxz lzma bzip2 xz clzip
2754 # 64-cores: pzstd lbzip2 pbzip2 pigz zstd pixz lz4 plzip lzop lzip lrz gzip pxz lzma bzip2 xz clzip
2755 # 128-core: pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip lrz pxz bzip2 lzma xz clzip
2757 my @prg = qw(pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip
2758 lrz pxz bzip2 lzma xz clzip);
2761 return ("$p -c -1","$p -dc");
2765 return ("cat","cat");
2768 sub read_options
() {
2769 # Read options from command line, profile and $PARALLEL
2771 # $opt::shebang_wrap
2779 # @ARGV_no_opt = @ARGV without --options
2781 # This must be done first as this may exec myself
2782 if(defined $ARGV[0] and ($ARGV[0] =~ /^--shebang/ or
2783 $ARGV[0] =~ /^--shebang-?wrap/ or
2784 $ARGV[0] =~ /^--hashbang/)) {
2785 # Program is called from #! line in script
2786 # remove --shebang-wrap if it is set
2787 $opt::shebang_wrap
= ($ARGV[0] =~ s/^--shebang-?wrap *//);
2788 # remove --shebang if it is set
2789 $opt::shebang
= ($ARGV[0] =~ s/^--shebang *//);
2790 # remove --hashbang if it is set
2791 $opt::shebang
.= ($ARGV[0] =~ s/^--hashbang *//);
2793 my $argfile = Q
(pop @ARGV);
2794 # exec myself to split $ARGV[0] into separate fields
2795 exec "$0 --skip-first-line -a $argfile @ARGV";
2797 if($opt::shebang_wrap
) {
2800 if ($^O
eq 'freebsd') {
2801 # FreeBSD's #! puts different values in @ARGV than Linux' does.
2802 my @nooptions = @ARGV;
2803 get_options_from_array
(\
@nooptions);
2804 while($#ARGV > $#nooptions) {
2805 push @options, shift @ARGV;
2807 while(@ARGV and $ARGV[0] ne ":::") {
2808 push @parser, shift @ARGV;
2810 if(@ARGV and $ARGV[0] eq ":::") {
2814 @options = shift @ARGV;
2816 my $script = Q
(shift @ARGV);
2817 # exec myself to split $ARGV[0] into separate fields
2818 exec "$0 --internal-pipe-means-argfiles @options @parser $script ".
2822 if($ARGV[0] =~ / --shebang(-?wrap)? /) {
2823 ::warning
("--shebang and --shebang-wrap must be the first argument.\n");
2826 Getopt
::Long
::Configure
("bundling","require_order");
2827 my @ARGV_copy = @ARGV;
2828 my @ARGV_orig = @ARGV;
2829 # Check if there is a --profile to set @opt::profile
2830 get_options_from_array
(\
@ARGV_copy,"profile|J=s","plain") || die_usage
();
2831 my @ARGV_profile = ();
2833 if(not $opt::plain
) {
2834 # Add options from $PARALLEL_HOME/config and other profiles
2835 my @config_profiles = (
2836 "/etc/parallel/config",
2837 (map { "$_/config" } @Global::config_dirs
),
2838 $ENV{'HOME'}."/.parallelrc");
2839 my @profiles = @config_profiles;
2841 # --profile overrides default profiles
2843 for my $profile (@opt::profile
) {
2844 if($profile =~ m
:^\
./|^/:) {
2845 # Look for ./profile in .
2846 # Look for /profile in /
2847 push @profiles, grep { -r
$_ } $profile;
2849 # Look for the $profile in @Global::config_dirs
2850 push @profiles, grep { -r
$_ }
2851 map { "$_/$profile" } @Global::config_dirs
;
2855 for my $profile (@profiles) {
2857 ::debug
("init","Read $profile\n");
2859 open (my $in_fh, "<", $profile) ||
2860 ::die_bug
("read-profile: $profile");
2864 push @ARGV_profile, shell_words
($_);
2868 if(grep /^\Q$profile\E$/, @config_profiles) {
2869 # config file is not required to exist
2871 ::error
("$profile not readable.");
2876 # Add options from shell variable $PARALLEL
2877 if($ENV{'PARALLEL'}) {
2878 push @ARGV_env, shell_words
($ENV{'PARALLEL'});
2880 # Add options from env_parallel.csh via $PARALLEL_CSH
2881 if($ENV{'PARALLEL_CSH'}) {
2882 push @ARGV_env, shell_words
($ENV{'PARALLEL_CSH'});
2885 Getopt
::Long
::Configure
("bundling","require_order");
2886 get_options_from_array
(\
@ARGV_profile) || die_usage
();
2887 get_options_from_array
(\
@ARGV_env) || die_usage
();
2888 get_options_from_array
(\
@ARGV) || die_usage
();
2889 # What were the options given on the command line?
2890 # Used to start --sqlworker
2891 my $ai = arrayindex
(\
@ARGV_orig, \
@ARGV);
2892 @Global::options_in_argv
= @ARGV_orig[0..$ai-1];
2893 # Prepend non-options to @ARGV (such as commands like 'nice')
2894 unshift @ARGV, @ARGV_profile, @ARGV_env;
2899 # Similar to Perl's index function, but for arrays
2901 # $arr_ref1 = ref to @array1 to search in
2902 # $arr_ref2 = ref to @array2 to search for
2904 # $pos = position of @array1 in @array2, -1 if not found
2905 my ($arr_ref1,$arr_ref2) = @_;
2906 my $array1_as_string = join "", map { "\0".$_ } @
$arr_ref1;
2907 my $array2_as_string = join "", map { "\0".$_ } @
$arr_ref2;
2908 my $i = index($array1_as_string,$array2_as_string,0);
2909 if($i == -1) { return -1 }
2910 my @before = split /\0/, substr($array1_as_string,0,$i);
2914 sub read_args_from_command_line
() {
2915 # Arguments given on the command line after:
2916 # ::: ($Global::arg_sep)
2917 # :::: ($Global::arg_file_sep)
2918 # :::+ ($Global::arg_sep with --link)
2919 # ::::+ ($Global::arg_file_sep with --link)
2920 # Removes the arguments from @ARGV and:
2921 # - puts filenames into -a
2922 # - puts arguments into files and add the files to -a
2923 # - adds --linkinputsource with 0/1 for each -a depending on :::+/::::+
2925 # @::ARGV = command option ::: arg arg arg :::: argfiles
2928 # $Global::arg_file_sep
2929 # $opt::internal_pipe_means_argfiles
2933 # @argv_no_argsep = @::ARGV without ::: and :::: and following args
2935 for(my $arg = shift @ARGV; @ARGV; $arg = shift @ARGV) {
2936 if($arg eq $Global::arg_sep
2938 $arg eq $Global::arg_sep
."+"
2940 $arg eq $Global::arg_file_sep
2942 $arg eq $Global::arg_file_sep
."+") {
2943 my $group_sep = $arg; # This group of arguments is args or argfiles
2945 while(defined ($arg = shift @ARGV)) {
2946 if($arg eq $Global::arg_sep
2948 $arg eq $Global::arg_sep
."+"
2950 $arg eq $Global::arg_file_sep
2952 $arg eq $Global::arg_file_sep
."+") {
2953 # exit while loop if finding new separator
2956 # If not hitting ::: :::+ :::: or ::::+
2957 # Append it to the group
2961 my $is_linked = ($group_sep =~ /\+$/) ?
1 : 0;
2962 my $is_file = ($group_sep eq $Global::arg_file_sep
2964 $group_sep eq $Global::arg_file_sep
."+");
2967 push @opt::linkinputsource
, map { $is_linked } @group;
2970 push @opt::linkinputsource
, $is_linked;
2973 or ($opt::internal_pipe_means_argfiles
and $opt::pipe)
2975 # Group of file names on the command line.
2976 # Append args into -a
2977 push @opt::a
, @group;
2979 # Group of arguments on the command line.
2980 # Put them into a file.
2982 my ($outfh,$name) = ::tmpfile
(SUFFIX
=> ".arg");
2984 # Put args into argfile
2985 print $outfh map { $_,$/ } @group;
2987 exit_if_disk_full
();
2988 # Append filehandle to -a
2989 push @opt::a
, $outfh;
2992 # $arg is ::: :::+ :::: or ::::+
2993 # so there is another group
2996 # $arg is undef -> @ARGV empty
3000 push @new_argv, $arg;
3002 # Output: @ARGV = command to run with options
3008 unlink keys %Global::unlink;
3009 map { rmdir $_ } keys %Global::unlink;
3010 if(@opt::basefile
and $opt::cleanup
) { cleanup_basefile
(); }
3011 for(keys %Global::sshmaster
) {
3012 # If 'ssh -M's are running: kill them
3018 sub __QUOTING_ARGUMENTS_FOR_SHELL__
() {}
3020 sub shell_quote
(@
) {
3022 # @strings = strings to be quoted
3024 # @shell_quoted_strings = string quoted as needed by the shell
3025 return wantarray ?
(map { Q
($_) } @_) : (join" ",map { Q
($_) } @_);
3028 sub shell_quote_scalar_rc
($) {
3029 # Quote for the rc-shell
3034 ($a =~ s/[\n\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]+/'$&'/go)) {
3035 # A string was replaced
3036 # No need to test for "" or \0
3039 } elsif($a eq "\0") {
3046 sub shell_quote_scalar_csh
($) {
3050 # $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g;
3051 # This is 1% faster than the above
3052 if(($a =~ s/[\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]/\\$&/go)
3054 # quote newline in csh as \\\n
3055 ($a =~ s/[\n]/"\\\n"/go)) {
3056 # A string was replaced
3057 # No need to test for "" or \0
3060 } elsif($a eq "\0") {
3067 sub shell_quote_scalar_default
($) {
3068 # Quote for other shells (Bourne compatibles)
3070 # $string = string to be quoted
3072 # $shell_quoted = string quoted as needed by the shell
3074 if($s =~ /[^-_.+a-z0-9\/]/i
) {
3075 $s =~ s/'/'"'"'/g; # "-quote single quotes
3076 $s = "'$s'"; # '-quote entire string
3077 $s =~ s/^''//; # Remove unneeded '' at ends
3078 $s =~ s/''$//; # (faster than s/^''|''$//g)
3080 } elsif ($s eq "") {
3088 sub shell_quote_scalar
($) {
3089 # Quote the string so the shell will not expand any special chars
3091 # $string = string to be quoted
3093 # $shell_quoted = string quoted as needed by the shell
3095 # Speed optimization: Choose the correct shell_quote_scalar_*
3096 # and call that directly from now on
3097 no warnings
'redefine';
3098 if($Global::cshell
) {
3100 *shell_quote_scalar
= \
&shell_quote_scalar_csh
;
3101 } elsif($Global::shell
=~ m
:(^|/)rc
$:) {
3103 *shell_quote_scalar
= \
&shell_quote_scalar_rc
;
3106 *shell_quote_scalar
= \
&shell_quote_scalar_default
;
3108 # The sub is now redefined. Call it
3109 return shell_quote_scalar
($_[0]);
3113 # Q alias for ::shell_quote_scalar
3114 my $ret = shell_quote_scalar
($_[0]);
3115 no warnings
'redefine';
3116 *Q
= \
&::shell_quote_scalar
;
3120 sub shell_quote_file
($) {
3121 # Quote the string so shell will not expand any special chars
3122 # and prepend ./ if needed
3124 # $filename = filename to be shell quoted
3126 # $quoted_filename = filename quoted with \ and ./ if needed
3129 if($a =~ m
:^/: or $a =~ m:^\./:) {
3130 # /abs/path or ./rel/path => skip
3132 # rel/path => ./rel/path
3139 sub shell_words
(@
) {
3141 # $string = shell line
3143 # @shell_words = $string split into words as shell would do
3144 $Global::use{"Text::ParseWords"} ||= eval "use Text::ParseWords; 1;";
3145 return Text
::ParseWords
::shellwords
(@_);
3148 sub perl_quote_scalar
($) {
3149 # Quote the string so perl's eval will not expand any special chars
3151 # $string = string to be quoted
3153 # $perl_quoted = string quoted with \ as needed by perl's eval
3156 $a =~ s/[\\\"\$\@]/\\$&/go;
3161 # -w complains about prototype
3163 # pQ alias for ::perl_quote_scalar
3164 my $ret = perl_quote_scalar
($_[0]);
3165 *pQ
= \
&::perl_quote_scalar
;
3169 sub unquote_printf
() {
3170 # Convert \t \n \r \000 \0
3172 # $string = string with \t \n \r \num \0
3174 # $replaced = string with TAB NEWLINE CR <ascii-num> NUL
3179 s/\\(\d\d\d)/eval 'sprintf "\\'.$1.'"'/ge;
3180 s/\\(\d)/eval 'sprintf "\\'.$1.'"'/ge;
3185 sub __FILEHANDLES__
() {}
3188 sub save_stdin_stdout_stderr
() {
3189 # Remember the original STDIN, STDOUT and STDERR
3190 # and file descriptors opened by the shell (e.g. 3>/tmp/foo)
3193 # $Global::original_stderr
3194 # $Global::original_stdin
3197 # TODO Disabled until we have an open3 that will take n filehandles
3198 # for my $fdno (1..61) {
3199 # # /dev/fd/62 and above are used by bash for <(cmd)
3200 # # Find file descriptors that are already opened (by the shell)
3201 # Only focus on stdout+stderr for now
3202 for my $fdno (1..2) {
3204 # 2-argument-open is used to be compatible with old perl 5.8.0
3205 # bug #43570: Perl 5.8.0 creates 61 files
3206 if(open($fh,">&=$fdno")) {
3207 $Global::fh
{$fdno}=$fh;
3210 open $Global::original_stderr
, ">&", "STDERR" or
3211 ::die_bug
("Can't dup STDERR: $!");
3212 open $Global::status_fd
, ">&", "STDERR" or
3213 ::die_bug
("Can't dup STDERR: $!");
3214 open $Global::original_stdin
, "<&", "STDIN" or
3215 ::die_bug
("Can't dup STDIN: $!");
3218 sub enough_file_handles
() {
3219 # Check that we have enough filehandles available for starting
3225 # 1 if ungrouped (thus not needing extra filehandles)
3226 # 0 if too few filehandles
3227 # 1 if enough filehandles
3228 if(not $opt::ungroup
) {
3230 my $enough_filehandles = 1;
3231 # perl uses 7 filehandles for something?
3232 # open3 uses 2 extra filehandles temporarily
3233 # We need a filehandle for each redirected file descriptor
3234 # (normally just STDOUT and STDERR)
3235 for my $i (1..(7+2+keys %Global::fh
)) {
3236 $enough_filehandles &&= open($fh{$i}, "<", "/dev/null");
3238 for (values %fh) { close $_; }
3239 return $enough_filehandles;
3241 # Ungrouped does not need extra file handles
3246 sub open_or_exit
($) {
3247 # Open a file name or exit if the file cannot be opened
3249 # $file = filehandle or filename to open
3251 # $Global::original_stdin
3253 # $fh = file handle to read-opened file
3256 return ($Global::original_stdin
|| *STDIN
);
3258 if(ref $file eq "GLOB") {
3259 # This is an open filehandle
3263 if(not open($fh, "<", $file)) {
3264 ::error
("Cannot open input file `$file': No such file or directory.");
3270 sub set_fh_blocking
($) {
3271 # Set filehandle as blocking
3273 # $fh = filehandle to be blocking
3277 $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
3279 # Get the current flags on the filehandle
3280 fcntl($fh, &F_GETFL, $flags) || die $!;
3281 # Remove non-blocking from the flags
3282 $flags &= ~&O_NONBLOCK;
3283 # Set the flags on the filehandle
3284 fcntl($fh, &F_SETFL, $flags) || die $!;
3287 sub set_fh_non_blocking($) {
3288 # Set filehandle as non-blocking
3290 # $fh = filehandle to be blocking
3294 $Global::use{"Fcntl
"} ||= eval "use Fcntl
qw(:DEFAULT :flock); 1;";
3296 # Get the current flags on the filehandle
3297 fcntl($fh, &F_GETFL, $flags) || die $!;
3298 # Add non-blocking to the flags
3299 $flags |= &O_NONBLOCK;
3300 # Set the flags on the filehandle
3301 fcntl($fh, &F_SETFL, $flags) || die $!;
3305 sub __RUNNING_THE_JOBS_AND_PRINTING_PROGRESS__() {}
3308 # Variable structure:
3310 # $Global::running{$pid} = Pointer to Job-object
3311 # @Global::virgin_jobs = Pointer to Job-object that have received no input
3312 # $Global::host{$sshlogin} = Pointer to SSHLogin-object
3313 # $Global::total_running = total number of running jobs
3314 # $Global::total_started = total jobs started
3315 # $Global::max_procs_file = filename if --jobs is given a filename
3316 # $Global::JobQueue = JobQueue object for the queue of jobs
3317 # $Global::timeoutq = queue of times where jobs timeout
3318 # $Global::newest_job = Job object of the most recent job started
3319 # $Global::newest_starttime = timestamp of $Global::newest_job
3321 # $Global::minimal_command_line_length = minimum length supported by all sshlogins
3322 # $Global::start_no_new_jobs = should more jobs be started?
3323 # $Global::original_stderr = file handle for STDERR when the program started
3324 # $Global::total_started = total number of jobs started
3325 # $Global::joblog = filehandle of joblog
3326 # $Global::debug = Is debugging on?
3327 # $Global::exitstatus = status code of GNU Parallel
3328 # $Global::quoting = quote the command to run
3330 sub init_run_jobs() {
3331 # Set Global variables and progress signal handlers
3332 # Do the copying of basefiles
3334 $Global::total_running = 0;
3335 $Global::total_started = 0;
3336 $SIG{USR1} = \&list_running_jobs;
3337 $SIG{USR2} = \&toggle_progress;
3338 if(@opt::basefile) { setup_basefile(); }
3344 my $max_procs_file_last_mod;
3346 sub changed_procs_file {
3347 # If --jobs is a file and it is modfied:
3348 # Force recomputing of max_jobs_running for each $sshlogin
3350 # $Global::max_procs_file
3353 if($Global::max_procs_file) {
3355 my $mtime = (stat($Global::max_procs_file))[9];
3356 $max_procs_file_last_mod ||= 0;
3357 if($mtime > $max_procs_file_last_mod) {
3358 # file changed: Force re-computing max_jobs_running
3359 $max_procs_file_last_mod = $mtime;
3360 for my $sshlogin (values %Global::host) {
3361 $sshlogin->set_max_jobs_running(undef);
3367 sub changed_sshloginfile {
3368 # If --slf is changed:
3373 # @opt::sshloginfile
3376 # $opt::filter_hosts
3378 if(@opt::sshloginfile) {
3379 # Is --sshloginfile changed?
3380 for my $slf (@opt::sshloginfile) {
3381 my $actual_file = expand_slf_shorthand($slf);
3382 my $mtime = (stat($actual_file))[9];
3383 $last_mtime{$actual_file} ||= $mtime;
3384 if($mtime - $last_mtime{$actual_file} > 1) {
3385 ::debug("run
","--sshloginfile
$actual_file changed
. reload
\n");
3386 $last_mtime{$actual_file} = $mtime;
3389 @Global::sshlogin = ();
3390 for (values %Global::host) {
3391 # Don't start new jobs on any host
3392 # except the ones added back later
3393 $_->set_max_jobs_running(0);
3395 # This will set max_jobs_running on the SSHlogins
3396 read_sshloginfile($actual_file);
3398 $opt::filter_hosts and filter_hosts();
3405 sub start_more_jobs {
3406 # Run start_another_job() but only if:
3407 # * not $Global::start_no_new_jobs set
3408 # * not JobQueue is empty
3409 # * not load on server is too high
3410 # * not server swapping
3411 # * not too short time since last remote login
3414 # $Global::start_no_new_jobs
3420 # $Global::newest_starttime
3422 # $jobs_started = number of jobs started
3423 my $jobs_started = 0;
3424 if($Global::start_no_new_jobs) {
3425 return $jobs_started;
3427 if(time - ($last_time||0) > 1) {
3428 # At most do this every second
3430 changed_procs_file();
3431 changed_sshloginfile();
3433 # This will start 1 job on each --sshlogin (if possible)
3434 # thus distribute the jobs on the --sshlogins round robin
3435 for my $sshlogin (values %Global::host) {
3436 if($Global::JobQueue->empty() and not $opt::pipe) {
3437 # No more jobs in the queue
3440 debug("run
", "Running jobs before on
", $sshlogin->string(), ": ",
3441 $sshlogin->jobs_running(), "\n");
3442 if ($sshlogin->jobs_running() < $sshlogin->max_jobs_running()) {
3445 $opt::delay - 0.008 > ::now() - $Global::newest_starttime) {
3446 # It has been too short since last start
3449 if($opt::load and $sshlogin->loadavg_too_high()) {
3450 # The load is too high or unknown
3453 if($opt::noswap and $sshlogin->swapping()) {
3454 # The server is swapping
3457 if($opt::limit and $sshlogin->limit()) {
3461 if(($opt::memfree or $opt::memsuspend)
3463 $sshlogin->memfree() < $Global::memlimit) {
3464 # The server has not enough mem free
3465 ::debug("mem
", "Not starting job
: not enough mem
\n");
3468 if($sshlogin->too_fast_remote_login()) {
3469 # It has been too short since
3472 debug("run
", $sshlogin->string(),
3473 " has
", $sshlogin->jobs_running(),
3474 " out of
", $sshlogin->max_jobs_running(),
3475 " jobs running
. Start another
.\n");
3476 if(start_another_job($sshlogin) == 0) {
3477 # No more jobs to start on this $sshlogin
3478 debug("run
","No jobs started on
",
3479 $sshlogin->string(), "\n");
3482 $sshlogin->inc_jobs_running();
3483 $sshlogin->set_last_login_at(::now());
3486 debug("run
","Running jobs after on
", $sshlogin->string(), ": ",
3487 $sshlogin->jobs_running(), " of
",
3488 $sshlogin->max_jobs_running(), "\n");
3491 return $jobs_started;
3496 my $no_more_file_handles_warned;
3498 sub start_another_job() {
3499 # If there are enough filehandles
3500 # and JobQueue not empty
3501 # and not $job is in joblog
3502 # Then grab a job from Global::JobQueue,
3503 # start it at sshlogin
3504 # mark it as virgin_job
3506 # $sshlogin = the SSHLogin to start the job on
3512 # @Global::virgin_jobs
3514 # 1 if another jobs was started
3516 my $sshlogin = shift;
3517 # Do we have enough file handles to start another job?
3518 if(enough_file_handles()) {
3519 if($Global::JobQueue->empty() and not $opt::pipe) {
3520 # No more commands to run
3521 debug("start
", "Not starting
: JobQueue empty
\n");
3525 # Skip jobs already in job log
3526 # Skip jobs already in results
3528 $job = get_job_with_sshlogin($sshlogin);
3529 if(not defined $job) {
3530 # No command available for that sshlogin
3531 debug("start
", "Not starting
: no jobs available
for ",
3532 $sshlogin->string(), "\n");
3535 if($job->is_already_in_joblog()) {
3538 } while ($job->is_already_in_joblog()
3540 ($opt::results and $opt::resume and $job->is_already_in_results()));
3541 debug("start
", "Command to run on
'", $job->sshlogin()->string(), "': '",
3542 $job->replaced(),"'\n");
3545 if($job->virgin()) {
3546 push(@Global::virgin_jobs,$job);
3548 # Block already set: This is a retry
3549 $job->write_block();
3552 debug("start
", "Started as seq
", $job->seq(),
3553 " pid
:", $job->pid(), "\n");
3556 # Not enough processes to run the job.
3557 # Put it back on the queue.
3558 $Global::JobQueue->unget($job);
3559 # Count down the number of jobs to run for this SSHLogin.
3560 my $max = $sshlogin->max_jobs_running();
3561 if($max > 1) { $max--; } else {
3563 for my $record (@{$job->{'commandline'}->{'arg_list'}}) {
3564 push @arg, map { $_->orig() } @$record;
3566 ::error("No more processes
: cannot run a single job
. Something is wrong at
@arg.");
3567 ::wait_and_exit(255);
3569 $sshlogin->set_max_jobs_running($max);
3570 # Sleep up to 300 ms to give other processes time to die
3571 ::usleep(rand()*300);
3572 ::warning("No more processes
: ".
3573 "Decreasing number of running jobs to
$max.",
3574 "Try increasing
'ulimit -u' (try
: ulimit
-u
`ulimit -Hu`)",
3575 "or increasing
'nproc' in /etc/security
/limits
.conf
",
3576 "or increasing
/proc/sys
/kernel/pid_max
");
3581 # No more file handles
3582 $no_more_file_handles_warned++ or
3583 ::warning("No more file handles
. ",
3584 "Try running
'parallel -j0 -N 100 --pipe parallel -j0'",
3585 "or increasing
'ulimit -n' (try
: ulimit
-n
`ulimit -Hn`)",
3586 "or increasing
'nofile' in /etc/security
/limits
.conf
",
3587 "or increasing
/proc/sys
/fs/file
-max
");
3588 debug("start
", "No more file handles
. ");
3594 sub init_progress() {
3598 # list of computers for progress output
3603 my %progress = progress();
3604 return ("\nComputers
/ CPU cores / Max jobs to run
\n",
3605 $progress{'workerlist'});
3608 sub drain_job_queue(@) {
3611 # $Global::total_running
3612 # $Global::max_jobs_running
3616 # $Global::start_no_new_jobs
3619 if($opt::progress) {
3620 ::status_no_nl(init_progress());
3622 my $last_header = "";
3625 while($Global::total_running > 0) {
3626 debug("init
",$Global::total_running, "==", scalar
3627 keys %Global::running," slots
: ", $Global::max_jobs_running);
3629 # When using --pipe sometimes file handles are not
3631 for my $job (values %Global::running) {
3632 close $job->fh(0,"w
");
3635 if($opt::progress) {
3636 my %progress = progress();
3637 if($last_header ne $progress{'header'}) {
3638 ::status("", $progress{'header'});
3639 $last_header = $progress{'header'};
3641 ::status_no_nl("\r",$progress{'status'});
3643 if($Global::total_running < $Global::max_jobs_running
3644 and not $Global::JobQueue->empty()) {
3645 # These jobs may not be started because of loadavg
3646 # or too little time between each ssh login.
3647 if(start_more_jobs() > 0) {
3648 # Exponential back-on if jobs were started
3649 $sleep = $sleep/2+0.001;
3652 # Exponential back-off sleeping
3653 $sleep = ::reap_usleep($sleep);
3655 if(not $Global::JobQueue->empty()) {
3656 # These jobs may not be started:
3657 # * because there the --filter-hosts has removed all
3658 if(not %Global::host) {
3659 ::error("There are
no hosts left to run on
.");
3660 ::wait_and_exit(255);
3662 # * because of loadavg
3663 # * because of too little time between each ssh login.
3664 $sleep = ::reap_usleep($sleep);
3666 if($Global::max_jobs_running == 0) {
3667 ::warning("There are
no job slots available
. Increase
--jobs
.");
3670 while($opt::sqlmaster and not $Global::sql->finished()) {
3672 $sleep = ::reap_usleep($sleep);
3674 if($Global::start_sqlworker) {
3675 # Start an SQL worker as we are now sure there is work to do
3676 $Global::start_sqlworker = 0;
3677 if(my $pid = fork()) {
3678 $Global::unkilled_sqlworker = $pid;
3680 # Replace --sql/--sqlandworker with --sqlworker
3681 my @ARGV = map { s/^--sql(andworker)?$/--sqlworker/; $_ } @Global::options_in_argv;
3682 # exec the --sqlworker
3683 exec($0,@ARGV,@command);
3687 } while ($Global::total_running > 0
3689 not $Global::start_no_new_jobs and not $Global::JobQueue->empty()
3691 $opt::sqlmaster and not $Global::sql->finished());
3692 if($opt::progress) {
3693 my %progress = progress();
3694 ::status("\r".$progress{'status'});
3698 sub toggle_progress() {
3699 # Turn on/off progress view
3703 $opt::progress = not $opt::progress;
3704 if($opt::progress) {
3705 ::status_no_nl(init_progress());
3714 # $Global::total_started
3716 # $workerlist = list of workers
3717 # $header = that will fit on the screen
3718 # $status = message that will fit on the screen
3720 return ("workerlist
" => "", "header
" => "", "status
" => bar());
3723 my ($status,$header)=("","");
3725 my($total, $completed, $left, $pctcomplete, $avgtime, $this_eta) =
3727 $eta = sprintf("ETA
: %ds Left
: %d AVG
: %.2fs
",
3728 $this_eta, $left, $avgtime);
3730 my $termcols = terminal_columns();
3731 my @workers = sort keys %Global::host;
3732 my %sshlogin = map { $_ eq ":" ? ($_ => "local") : ($_ => $_) } @workers;
3734 my %workerno = map { ($_=>$workerno++) } @workers;
3735 my $workerlist = "";
3736 for my $w (@workers) {
3738 $workerno{$w}.":".$sshlogin{$w} ." / ".
3739 ($Global::host{$w}->ncpus() || "-")." / ".
3740 $Global::host{$w}->max_jobs_running()."\n";
3742 $status = "x
"x($termcols+1);
3743 # Select an output format that will fit on a single line
3744 if(length $status > $termcols) {
3745 # sshlogin1:XX/XX/XX%/XX.Xs sshlogin2:XX/XX/XX%/XX.Xs sshlogin3:XX/XX/XX%/XX.Xs
3746 $header = "Computer
:jobs running
/jobs completed/%of started jobs
/Average seconds to complete
";
3750 if($Global::total_started) {
3751 my $completed = ($Global::host{$_}->jobs_completed()||0);
3752 my $running = $Global::host{$_}->jobs_running();
3753 my $time = $completed ? (time-$^T)/($completed) : "0";
3754 sprintf("%s:%d/%d/%d%%/%.1fs
",
3755 $sshlogin{$_}, $running, $completed,
3756 ($running+$completed)*100
3757 / $Global::total_started, $time);
3761 if(length $status > $termcols) {
3762 # 1:XX/XX/XX%/XX.Xs 2:XX/XX/XX%/XX.Xs 3:XX/XX/XX%/XX.Xs 4:XX/XX/XX%/XX.Xs
3763 $header = "Computer
:jobs running
/jobs completed/%of started jobs
";
3767 if($Global::total_started) {
3768 my $completed = ($Global::host{$_}->jobs_completed()||0);
3769 my $running = $Global::host{$_}->jobs_running();
3770 my $time = $completed ? (time-$^T)/($completed) : "0";
3771 sprintf("%s:%d/%d/%d%%/%.1fs
",
3772 $workerno{$_}, $running, $completed,
3773 ($running+$completed)*100
3774 / $Global::total_started, $time);
3778 if(length $status > $termcols) {
3779 # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX/XX%
3780 $header = "Computer
:jobs running
/jobs completed/%of started jobs
";
3784 if($Global::total_started) {
3785 sprintf("%s:%d/%d/%d%%",
3787 $Global::host{$_}->jobs_running(),
3788 ($Global::host{$_}->jobs_completed()||0),
3789 ($Global::host{$_}->jobs_running()+
3790 ($Global::host{$_}->jobs_completed()||0))*100
3791 / $Global::total_started)
3796 if(length $status > $termcols) {
3797 # 1:XX/XX/XX% 2:XX/XX/XX% 3:XX/XX/XX% 4:XX/XX/XX% 5:XX/XX/XX% 6:XX/XX/XX%
3798 $header = "Computer
:jobs running
/jobs completed/%of started jobs
";
3802 if($Global::total_started) {
3803 sprintf("%s:%d/%d/%d%%",
3805 $Global::host{$_}->jobs_running(),
3806 ($Global::host{$_}->jobs_completed()||0),
3807 ($Global::host{$_}->jobs_running()+
3808 ($Global::host{$_}->jobs_completed()||0))*100
3809 / $Global::total_started)
3814 if(length $status > $termcols) {
3815 # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX sshlogin4:XX/XX
3816 $header = "Computer
:jobs running
/jobs completed
";
3819 { sprintf("%s:%d/%d",
3820 $sshlogin{$_}, $Global::host{$_}->jobs_running(),
3821 ($Global::host{$_}->jobs_completed()||0)) }
3824 if(length $status > $termcols) {
3825 # sshlogin1:XX/XX sshlogin2:XX/XX sshlogin3:XX/XX sshlogin4:XX/XX
3826 $header = "Computer
:jobs running
/jobs completed
";
3829 { sprintf("%s:%d/%d",
3830 $sshlogin{$_}, $Global::host{$_}->jobs_running(),
3831 ($Global::host{$_}->jobs_completed()||0)) }
3834 if(length $status > $termcols) {
3835 # 1:XX/XX 2:XX/XX 3:XX/XX 4:XX/XX 5:XX/XX 6:XX/XX
3836 $header = "Computer
:jobs running
/jobs completed
";
3839 { sprintf("%s:%d/%d",
3840 $workerno{$_}, $Global::host{$_}->jobs_running(),
3841 ($Global::host{$_}->jobs_completed()||0)) }
3844 if(length $status > $termcols) {
3845 # sshlogin1:XX sshlogin2:XX sshlogin3:XX sshlogin4:XX sshlogin5:XX
3846 $header = "Computer
:jobs completed
";
3851 ($Global::host{$_}->jobs_completed()||0)) }
3854 if(length $status > $termcols) {
3855 # 1:XX 2:XX 3:XX 4:XX 5:XX 6:XX
3856 $header = "Computer
:jobs completed
";
3861 ($Global::host{$_}->jobs_completed()||0)) }
3864 return ("workerlist
" => $workerlist, "header
" => $header, "status
" => $status);
3869 my ($first_completed, $smoothed_avg_time, $last_eta);
3872 # Calculate important numbers for ETA
3874 # $total = number of jobs in total
3875 # $completed = number of jobs completed
3876 # $left = number of jobs left
3877 # $pctcomplete = percent of jobs completed
3878 # $avgtime = averaged time
3879 # $eta = smoothed eta
3880 my $completed = $Global::total_completed;
3881 # In rare cases with -X will $completed > total_jobs()
3882 my $total = ::max($Global::JobQueue->total_jobs(),$completed);
3883 my $left = $total - $completed;
3884 if(not $completed) {
3885 return($total, $completed, $left, 0, 0, 0);
3887 my $pctcomplete = ::min($completed / $total,100);
3888 $first_completed ||= time;
3889 my $timepassed = (time - $first_completed);
3890 my $avgtime = $timepassed / $completed;
3891 $smoothed_avg_time ||= $avgtime;
3892 # Smooth the eta so it does not jump wildly
3893 $smoothed_avg_time = (1 - $pctcomplete) * $smoothed_avg_time +
3894 $pctcomplete * $avgtime;
3895 my $eta = int($left * $smoothed_avg_time);
3896 if($eta*0.90 < $last_eta and $last_eta < $eta) {
3897 # Eta jumped less that 10% up: Keep the last eta instead
3902 return($total, $completed, $left, $pctcomplete, $avgtime, $eta);
3911 # $status = bar with eta, completed jobs, arg and pct
3913 $reset ||= "\033[0m
";
3914 my($total, $completed, $left, $pctcomplete, $avgtime, $eta) =
3916 my $arg = $Global::newest_job ?
3917 $Global::newest_job->{'commandline'}->
3918 replace_placeholders(["\257<\257>"],0,0) : "";
3919 # These chars mess up display in the terminal in US-ASCII
3920 # and in some combinations as UTF8 (e.g. ঐ ও ঔ ক 𐅪 𐅫 𐅬)
3921 $arg =~ tr/[\011-\016\033\302-\365]//d;
3922 my $eta_dhms = ::seconds_to_time_units($eta);
3924 sprintf("%d%% %d:%d=%s %s",
3925 $pctcomplete*100, $completed, $left, $eta_dhms, $arg);
3926 my $terminal_width = terminal_columns();
3927 my $s = sprintf("%-${terminal_width
}s
",
3928 substr($bar_text." "x$terminal_width,
3929 0,$terminal_width));
3930 my $width = int($terminal_width * $pctcomplete);
3931 substr($s,$width,0) = $reset;
3932 my $zenity = sprintf("%-${terminal_width
}s
",
3933 substr("# $eta sec $arg",
3934 0,$terminal_width));
3935 $s = "\r" . $zenity . "\r" . $pctcomplete*100 . # Prefix with zenity header
3936 "\r" . $rev . $s . $reset;
3942 my ($columns,$last_column_time);
3944 sub terminal_columns
() {
3945 # Get the number of columns of the terminal.
3946 # Only update once per second.
3948 # number of columns of the screen
3949 if(not $columns or $last_column_time < time) {
3950 $last_column_time = time;
3951 $columns = $ENV{'COLUMNS'};
3953 # && true is to force spawning a shell and not just exec'ing
3954 my $stty = qx{stty
-a
</dev/tty
2>/dev/null
&& true
};
3955 # FreeBSD/OpenBSD/NetBSD/Dragonfly/MirOS
3956 # MacOSX/IRIX/AIX/Tru64
3957 $stty =~ /(\d+) columns/ and do { $columns = $1; };
3959 $stty =~ /columns (\d+)/ and do { $columns = $1; };
3960 # Solaris-x86/HPUX/SCOsysV/UnixWare/OpenIndiana
3961 $stty =~ /columns = (\d+)/ and do { $columns = $1; };
3963 $stty =~ /rows=\d+,(\d+)/ and do { $columns = $1; };
3966 # && true is to force spawning a shell and not just exec'ing
3967 my $resize = qx{resize
2>/dev/null
&& true
};
3968 $resize =~ /COLUMNS=(\d+);/ and do { $columns = $1; };
3976 # Prototype forwarding
3977 sub get_job_with_sshlogin
($);
3978 sub get_job_with_sshlogin
($) {
3980 # $sshlogin = which host should the job be run on?
3985 # $job = next job object for $sshlogin if any available
3986 my $sshlogin = shift;
3989 if ($opt::hostgroups
) {
3990 my @other_hostgroup_jobs = ();
3992 while($job = $Global::JobQueue
->get()) {
3993 if($sshlogin->in_hostgroups($job->hostgroups())) {
3994 # Found a job to be run on a hostgroup of this
3998 # This job was not in the hostgroups of $sshlogin
3999 push @other_hostgroup_jobs, $job;
4002 $Global::JobQueue
->unget(@other_hostgroup_jobs);
4003 if(not defined $job) {
4008 $job = $Global::JobQueue
->get();
4009 if(not defined $job) {
4011 ::debug
("start", "No more jobs: JobQueue empty\n");
4015 if(not $job->suspended()) {
4016 $job->set_sshlogin($sshlogin);
4018 if($opt::retries
and $job->failed_here()) {
4019 # This command with these args failed for this sshlogin
4020 my ($no_of_failed_sshlogins,$min_failures) = $job->min_failed();
4021 # Only look at the Global::host that have > 0 jobslots
4022 if($no_of_failed_sshlogins ==
4023 grep { $_->max_jobs_running() > 0 } values %Global::host
4024 and $job->failed_here() == $min_failures) {
4025 # It failed the same or more times on another host:
4026 # run it on this host
4028 # If it failed fewer times on another host:
4029 # Find another job to run
4031 if(not $Global::JobQueue
->empty()) {
4032 # This can potentially recurse for all args
4033 no warnings
'recursion';
4034 $nextjob = get_job_with_sshlogin
($sshlogin);
4036 # Push the command back on the queue
4037 $Global::JobQueue
->unget($job);
4045 sub __REMOTE_SSH__
() {}
4048 sub read_sshloginfiles
(@
) {
4049 # Read a list of --slf's
4051 # @files = files or symbolic file names to read
4054 read_sshloginfile
(expand_slf_shorthand
($s));
4058 sub expand_slf_shorthand
($) {
4059 # Expand --slf shorthand into a read file name
4061 # $file = file or symbolic file name to read
4063 # $file = actual file name to read
4067 } elsif($file eq "..") {
4068 $file = $Global::config_dir
."/sshloginfile";
4069 } elsif($file eq ".") {
4070 $file = "/etc/parallel/sshloginfile";
4071 } elsif(not -r
$file) {
4072 for(@Global::config_dirs
) {
4073 if(not -r
$_."/".$file) {
4074 # Try prepending $PARALLEL_HOME
4075 ::error
("Cannot open $file.");
4076 ::wait_and_exit
(255);
4078 $file = $_."/".$file;
4086 sub read_sshloginfile
($) {
4087 # Read sshloginfile into @Global::sshlogin
4089 # $file = file to read
4097 ::debug
("init","--slf ",$file);
4102 if(not open($in_fh, "<", $file)) {
4104 ::error
("Cannot open $file.");
4105 ::wait_and_exit
(255);
4112 push @Global::sshlogin
, $_;
4119 sub parse_sshlogin
() {
4120 # Parse @Global::sshlogin into %Global::host.
4121 # Keep only hosts that are in one of the given ssh hostgroups.
4124 # $Global::minimal_command_line_length
4133 if(not @Global::sshlogin
) { @Global::sshlogin
= (":"); }
4134 for my $sshlogin (@Global::sshlogin
) {
4135 # Split up -S sshlogin,sshlogin
4136 for my $s (split /,|\n/, $sshlogin) {
4137 if ($s eq ".." or $s eq "-") {
4138 # This may add to @Global::sshlogin - possibly bug
4139 read_sshloginfile
(expand_slf_shorthand
($s));
4146 $Global::minimal_command_line_length
= 100_000_000
;
4147 my @allowed_hostgroups;
4148 for my $ncpu_sshlogin_string (::uniq
(@login)) {
4149 my $sshlogin = SSHLogin
->new($ncpu_sshlogin_string);
4150 my $sshlogin_string = $sshlogin->string();
4151 if($sshlogin_string eq "") {
4152 # This is an ssh group: -S @webservers
4153 push @allowed_hostgroups, $sshlogin->hostgroups();
4156 if($Global::host
{$sshlogin_string}) {
4157 # This sshlogin has already been added:
4158 # It is probably a host that has come back
4159 # Set the max_jobs_running back to the original
4160 debug
("run","Already seen $sshlogin_string\n");
4161 if($sshlogin->{'ncpus'}) {
4162 # If ncpus set by '#/' of the sshlogin, overwrite it:
4163 $Global::host
{$sshlogin_string}->set_ncpus($sshlogin->ncpus());
4165 $Global::host
{$sshlogin_string}->set_max_jobs_running(undef);
4168 $sshlogin->set_maxlength(Limits
::Command
::max_length
());
4170 $Global::minimal_command_line_length
=
4171 ::min
($Global::minimal_command_line_length
, $sshlogin->maxlength());
4172 $Global::host
{$sshlogin_string} = $sshlogin;
4174 if(@allowed_hostgroups) {
4175 # Remove hosts that are not in these groups
4176 while (my ($string, $sshlogin) = each %Global::host
) {
4177 if(not $sshlogin->in_hostgroups(@allowed_hostgroups)) {
4178 delete $Global::host
{$string};
4183 # debug("start", "sshlogin: ", my_dump(%Global::host),"\n");
4184 if(@Global::transfer_files
or @opt::return or $opt::cleanup
or @opt::basefile
) {
4185 if(not remote_hosts
()) {
4186 # There are no remote hosts
4188 ::warning
("--trc ignored as there are no remote --sshlogin.");
4189 } elsif (defined $opt::transfer
) {
4190 ::warning
("--transfer ignored as there are no remote --sshlogin.");
4191 } elsif (@opt::transfer_files
) {
4192 ::warning
("--transferfile ignored as there are no remote --sshlogin.");
4193 } elsif (@opt::return) {
4194 ::warning
("--return ignored as there are no remote --sshlogin.");
4195 } elsif (defined $opt::cleanup
and not %opt::template
) {
4196 ::warning
("--cleanup ignored as there are no remote --sshlogin.");
4197 } elsif (@opt::basefile
) {
4198 ::warning
("--basefile ignored as there are no remote --sshlogin.");
4204 sub remote_hosts
() {
4205 # Return sshlogins that are not ':'
4209 # list of sshlogins with ':' removed
4210 return grep !/^:$/, keys %Global::host
;
4213 sub setup_basefile
() {
4214 # Transfer basefiles to each $sshlogin
4215 # This needs to be done before first jobs on $sshlogin is run
4223 for my $sshlogin (values %Global::host
) {
4224 if($sshlogin->string() eq ":") { next }
4225 for my $file (@opt::basefile
) {
4226 if($file !~ m
:^/: and $opt::workdir
eq "...") {
4227 ::error
("Work dir '...' will not work with relative basefiles.");
4228 ::wait_and_exit
(255);
4231 my $dummycmdline = CommandLine
->new(1,["true"],{},0,0,[],[],[],[],{},{});
4232 my $dummyjob = Job
->new($dummycmdline);
4233 $workdir = $dummyjob->workdir();
4235 push @cmd, $sshlogin->rsync_transfer_cmd($file,$workdir);
4238 debug
("init", "basesetup: @cmd\n");
4239 my ($exitstatus,$stdout_ref,$stderr_ref) =
4240 run_gnu_parallel
((join "\n",@cmd),"-j0","--retries",5);
4242 my @stdout = @
$stdout_ref;
4243 my @stderr = @
$stderr_ref;
4244 ::error
("Copying of --basefile failed: @stdout@stderr");
4245 ::wait_and_exit
(255);
4249 sub cleanup_basefile
() {
4250 # Remove the basefiles transferred
4258 my $dummycmdline = CommandLine
->new(1,["true"],{},0,0,[],[],[],[],{},{});
4259 my $dummyjob = Job
->new($dummycmdline);
4260 $workdir = $dummyjob->workdir();
4262 for my $sshlogin (values %Global::host
) {
4263 if($sshlogin->string() eq ":") { next }
4264 for my $file (@opt::basefile
) {
4265 push @cmd, $sshlogin->cleanup_cmd($file,$workdir);
4268 debug
("init", "basecleanup: @cmd\n");
4269 my ($exitstatus,$stdout_ref,$stderr_ref) =
4270 run_gnu_parallel
(join("\n",@cmd),"-j0","--retries",5);
4272 my @stdout = @
$stdout_ref;
4273 my @stderr = @
$stderr_ref;
4274 ::error
("Cleanup of --basefile failed: @stdout@stderr");
4275 ::wait_and_exit
(255);
4279 sub run_gnu_parallel
() {
4280 my ($stdin,@args) = @_;
4281 my $cmd = join "",map { " $_ & " } split /\n/, $stdin;
4282 print $Global::original_stderr
` $cmd wait` ;
4286 sub _run_gnu_parallel
() {
4288 # This should ideally just fork an internal copy
4289 # and not start it through a shell
4291 # $stdin = data to provide on stdin for GNU Parallel
4292 # @args = command line arguments
4294 # $exitstatus = exitcode of GNU Parallel run
4295 # \@stdout = standard output
4296 # \@stderr = standard error
4297 my ($stdin,@args) = @_;
4298 my ($exitstatus,@stdout,@stderr);
4299 my ($stdin_fh,$stdout_fh)=(gensym
(),gensym
());
4300 my ($stderr_fh, $stderrname) = ::tmpfile
(SUFFIX
=> ".par");
4303 my $pid = ::open3
($stdin_fh,$stdout_fh,$stderr_fh,
4304 $0,qw(--plain --shell /bin/sh --will-cite), @args);
4305 if(my $writerpid = fork()) {
4307 @stdout = <$stdout_fh>;
4308 # Now stdout is closed:
4309 # These pids should be dead or die very soon
4310 while(kill 0, $writerpid) { ::usleep
(1); }
4313 # while(kill 0, $pid) { ::usleep(1); }
4316 seek $stderr_fh, 0, 0;
4317 @stderr = <$stderr_fh>;
4323 print $stdin_fh $stdin;
4327 return ($exitstatus,\
@stdout,\
@stderr);
4330 sub filter_hosts
() {
4331 # Remove down --sshlogins from active duty.
4332 # Find ncpus, ncores, maxlen, time-to-login for each host.
4335 # $Global::minimal_command_line_length
4336 # $opt::use_sockets_instead_of_threads
4337 # $opt::use_cores_instead_of_threads
4338 # $opt::use_cpus_instead_of_cores
4341 my ($nsockets_ref,$ncores_ref, $nthreads_ref, $time_to_login_ref,
4342 $maxlen_ref, $echo_ref, $down_hosts_ref) =
4343 parse_host_filtering
(parallelized_host_filtering
());
4345 delete @Global::host
{@
$down_hosts_ref};
4346 @
$down_hosts_ref and ::warning
("Removed @$down_hosts_ref.");
4348 $Global::minimal_command_line_length
= 100_000_000
;
4349 while (my ($sshlogin, $obj) = each %Global::host
) {
4350 if($sshlogin eq ":") { next }
4351 $nsockets_ref->{$sshlogin} or
4352 ::die_bug
("nsockets missing: ".$obj->serverlogin());
4353 $ncores_ref->{$sshlogin} or
4354 ::die_bug
("ncores missing: ".$obj->serverlogin());
4355 $nthreads_ref->{$sshlogin} or
4356 ::die_bug
("nthreads missing: ".$obj->serverlogin());
4357 $time_to_login_ref->{$sshlogin} or
4358 ::die_bug
("time_to_login missing: ".$obj->serverlogin());
4359 $maxlen_ref->{$sshlogin} or
4360 ::die_bug
("maxlen missing: ".$obj->serverlogin());
4361 $obj->set_ncpus($nthreads_ref->{$sshlogin});
4362 if($opt::use_cpus_instead_of_cores
) {
4363 $obj->set_ncpus($ncores_ref->{$sshlogin});
4364 } elsif($opt::use_sockets_instead_of_threads
) {
4365 $obj->set_ncpus($nsockets_ref->{$sshlogin});
4366 } elsif($opt::use_cores_instead_of_threads
) {
4367 $obj->set_ncpus($ncores_ref->{$sshlogin});
4369 $obj->set_time_to_login($time_to_login_ref->{$sshlogin});
4370 $obj->set_maxlength($maxlen_ref->{$sshlogin});
4371 $Global::minimal_command_line_length
=
4372 ::min
($Global::minimal_command_line_length
,
4373 int($maxlen_ref->{$sshlogin}/2));
4374 ::debug
("init", "Timing from -S:$sshlogin ",
4375 " nsockets:",$nsockets_ref->{$sshlogin},
4376 " ncores:", $ncores_ref->{$sshlogin},
4377 " nthreads:",$nthreads_ref->{$sshlogin},
4378 " time_to_login:", $time_to_login_ref->{$sshlogin},
4379 " maxlen:", $maxlen_ref->{$sshlogin},
4380 " min_max_len:", $Global::minimal_command_line_length
,"\n");
4384 sub parse_host_filtering
() {
4386 # @lines = output from parallelized_host_filtering()
4388 # \%nsockets = number of sockets of {host}
4389 # \%ncores = number of cores of {host}
4390 # \%nthreads = number of hyperthreaded cores of {host}
4391 # \%time_to_login = time_to_login on {host}
4392 # \%maxlen = max command len on {host}
4393 # \%echo = echo received from {host}
4394 # \@down_hosts = list of hosts with no answer
4396 my (%nsockets, %ncores, %nthreads, %time_to_login, %maxlen, %echo,
4399 ::debug
("init","Read: ",$_);
4401 my @col = split /\t/, $_;
4402 if($col[0] =~ /^parallel: Warning:/) {
4403 # Timed out job: Ignore it
4405 } elsif(defined $col[6]) {
4406 # This is a line from --joblog
4407 # seq host time spent sent received exit signal command
4408 # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores
4409 if($col[0] eq "Seq" and $col[1] eq "Host" and
4410 $col[2] eq "Starttime") {
4414 # Get server from: eval true server\;
4415 $col[8] =~ /eval .?true.?\s([^\;]+);/ or
4416 ::die_bug
("col8 does not contain host: $col[8]");
4419 $Global::host
{$host} or next;
4420 if($col[6] eq "255" or $col[6] eq "-1" or $col[6] eq "1") {
4421 # exit == 255 or exit == timeout (-1): ssh failed/timedout
4422 # exit == 1: lsh failed
4424 ::debug
("init", "--filtered $host\n");
4425 push(@down_hosts, $host);
4426 } elsif($col[6] eq "127") {
4427 # signal == 127: parallel not installed remote
4428 # Set nsockets, ncores, nthreads = 1
4429 ::warning
("Could not figure out ".
4430 "number of cpus on $host. Using 1.");
4431 $nsockets{$host} = 1;
4433 $nthreads{$host} = 1;
4434 $maxlen{$host} = Limits
::Command
::max_length
();
4435 } elsif($col[0] =~ /^\d+$/ and $Global::host
{$host}) {
4436 # Remember how log it took to log in
4437 # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ echo
4438 $time_to_login{$host} = ::min
($time_to_login{$host},$col[3]);
4440 ::die_bug
("host check unmatched long jobline: $_");
4442 } elsif($Global::host
{$col[0]}) {
4443 # This output from --number-of-cores, --number-of-cpus,
4444 # --max-line-length-allowed
4447 # maxlen: server 131071
4448 if(/parallel: Warning: Cannot figure out number of/) {
4451 if(not $nsockets{$col[0]}) {
4452 $nsockets{$col[0]} = $col[1];
4453 } elsif(not $ncores{$col[0]}) {
4454 $ncores{$col[0]} = $col[1];
4455 } elsif(not $nthreads{$col[0]}) {
4456 $nthreads{$col[0]} = $col[1];
4457 } elsif(not $maxlen{$col[0]}) {
4458 $maxlen{$col[0]} = $col[1];
4459 } elsif(not $echo{$col[0]}) {
4460 $echo{$col[0]} = $col[1];
4461 } elsif(m/perl: warning:|LANGUAGE =|LC_ALL =|LANG =|are supported and installed|Disconnected from|Received disconnect from/) {
4463 # perl: warning: Setting locale failed.
4464 # perl: warning: Please check that your locale settings:
4465 # LANGUAGE = (unset),
4467 # LANG = "en_US.UTF-8"
4468 # are supported and installed on your system.
4469 # perl: warning: Falling back to the standard locale ("C").
4470 # Disconnected from 127.0.0.1 port 22
4472 ::die_bug
("host check too many col0: $_");
4475 ::die_bug
("host check unmatched short jobline ($col[0]): $_");
4478 @down_hosts = uniq
(@down_hosts);
4479 return(\
%nsockets, \
%ncores, \
%nthreads, \
%time_to_login,
4480 \
%maxlen, \
%echo, \
@down_hosts);
4483 sub parallelized_host_filtering
() {
4487 # text entries with:
4489 # * hostname \t number of cores
4490 # * hostname \t number of cpus
4491 # * hostname \t max-line-length-allowed
4492 # * hostname \t empty
4495 # Wrap with ssh and --env
4496 # Return $default_value if command fails
4497 my $sshlogin = shift;
4498 my $command = shift;
4499 my $default_value = shift;
4500 # wrapper that returns $default_value if the command fails:
4501 # bug #57886: Errors when using different version on remote
4502 # perl -e '$a=`$command`; print $? ? "$default_value" : $a'
4503 my $wcmd = q
(perl
-e
'$a=`).$command.q(`;).
4504 q(print $? ? ").::pQ($default_value).q(" : $a');
4505 my $commandline = CommandLine
->new(1,[$wcmd],{},0,0,[],[],[],[],{},{});
4506 my $job = Job
->new($commandline);
4507 $job->set_sshlogin($sshlogin);
4509 return($job->{'wrapped'});
4512 my(@sockets, @cores, @threads, @maxline, @echo);
4513 while (my ($host, $sshlogin) = each %Global::host
) {
4514 if($host eq ":") { next }
4515 # The 'true' is used to get the $host out later
4516 push(@sockets, $host."\t"."true $host; ".
4517 sshwrapped
($sshlogin,"parallel --number-of-sockets",0)."\n\0");
4518 push(@cores, $host."\t"."true $host; ".
4519 sshwrapped
($sshlogin,"parallel --number-of-cores",0)."\n\0");
4520 push(@threads, $host."\t"."true $host; ".
4521 sshwrapped
($sshlogin,"parallel --number-of-threads",0)."\n\0");
4522 push(@maxline, $host."\t"."true $host; ".
4523 sshwrapped
($sshlogin,"parallel --max-line-length-allowed",0)."\n\0");
4524 # 'echo' is used to get the fastest possible ssh login time
4525 my $sshcmd = "true $host; exec " .$sshlogin->sshcommand()." ".
4526 $sshlogin->serverlogin();
4527 push(@echo, $host."\t".$sshcmd." -- echo\n\0");
4529 # --timeout 10: Setting up an SSH connection and running a simple
4530 # command should never take > 10 sec.
4531 # --delay 0.1: If multiple sshlogins use the same proxy the delay
4532 # will make it less likely to overload the ssh daemon.
4533 # --retries 3: If the ssh daemon is overloaded, try 3 times
4535 "$0 -j0 --timeout 10 --joblog - --plain --delay 0.1 --retries 3 ".
4536 "--tag --tagstring '{1}' -0 --colsep '\t' -k eval '{2}' && true ";
4537 $cmd = $Global::shell
." -c ".Q
($cmd);
4538 ::debug
("init", $cmd, "\n");
4542 my ($host_fh,$in,$err);
4543 open3
($in, $host_fh, $err, $cmd) || ::die_bug
("parallel host check: $cmd");
4544 ::debug
("init", map { $_,"\n" } @sockets, @cores, @threads, @maxline, @echo);
4547 # Give the commands to run to the $cmd
4549 print $in @sockets, @cores, @threads, @maxline, @echo;
4555 # TODO incompatible with '-quoting. Needs to be fixed differently
4557 # # if last char = ' then append next line
4558 # # This may be due to quoting of \n in environment var
4571 # Runs @command on all hosts.
4572 # Uses parallel to run @command on each host.
4573 # --jobs = number of hosts to run on simultaneously.
4574 # For each host a parallel command with the args will be running.
4594 # $opt::arg_file_sep
4598 # $Global::exitstatus
4605 # @command = command to run on all hosts
4609 # $joblog = filename of joblog - undef if none
4611 # $tmpfile = temp file for joblog - undef if none
4613 if(not defined $joblog) {
4616 my ($fh, $tmpfile) = ::tmpfile
(SUFFIX
=> ".log");
4620 my ($input_source_fh_ref,@command) = @_;
4621 if($Global::quoting
) {
4622 @command = shell_quote
(@command);
4625 # Copy all @input_source_fh (-a and :::) into tempfiles
4627 for my $fh (@
$input_source_fh_ref) {
4628 my ($outfh, $name) = ::tmpfile
(SUFFIX
=> ".all", UNLINK
=> not $opt::D
);
4629 print $outfh (<$fh>);
4631 push @argfiles, $name;
4633 if(@opt::basefile
) { setup_basefile
(); }
4634 # for each sshlogin do:
4635 # parallel -S $sshlogin $command :::: @argfiles
4637 # Pass some of the options to the sub-parallels, not all of them as
4638 # -P should only go to the first, and -S should not be copied at all.
4641 ((defined $opt::sshdelay
) ?
"--delay ".$opt::sshdelay
: ""),
4642 ((defined $opt::memfree
) ?
"--memfree ".$opt::memfree
: ""),
4643 ((defined $opt::memsuspend
) ?
"--memfree ".$opt::memsuspend
: ""),
4644 ((defined $opt::D
) ?
"-D $opt::D" : ""),
4645 ((defined $opt::group
) ?
"-g" : ""),
4646 ((defined $opt::jobs
) ?
"-P $opt::jobs" : ""),
4647 ((defined $opt::keeporder
) ?
"--keeporder" : ""),
4648 ((defined $opt::linebuffer
) ?
"--linebuffer" : ""),
4649 ((defined $opt::max_chars
) ?
"--max-chars ".$opt::max_chars
: ""),
4650 ((defined $opt::plain
) ?
"--plain" : ""),
4651 ((defined $opt::ungroup
) ?
"-u" : ""),
4652 ((defined $opt::tee
) ?
"--tee" : ""),
4656 ((defined $opt::sshdelay
) ?
"--delay ".$opt::sshdelay
: ""),
4657 ((defined $opt::D
) ?
"-D $opt::D" : ""),
4658 ((defined $opt::arg_file_sep
) ?
"--arg-file-sep ".$opt::arg_file_sep
: ""),
4659 ((defined $opt::arg_sep
) ?
"--arg-sep ".$opt::arg_sep
: ""),
4660 ((defined $opt::colsep
) ?
"--colsep ".shell_quote
($opt::colsep
) : ""),
4661 ((defined $opt::files
) ?
"--files" : ""),
4662 ((defined $opt::group
) ?
"-g" : ""),
4663 ((defined $opt::cleanup
) ?
"--cleanup" : ""),
4664 ((defined $opt::keeporder
) ?
"--keeporder" : ""),
4665 ((defined $opt::linebuffer
) ?
"--linebuffer" : ""),
4666 ((defined $opt::max_chars
) ?
"--max-chars ".$opt::max_chars
: ""),
4667 ((defined $opt::plain
) ?
"--plain" : ""),
4668 ((defined $opt::plus
) ?
"--plus" : ""),
4669 ((defined $opt::retries
) ?
"--retries ".$opt::retries
: ""),
4670 ((defined $opt::timeout
) ?
"--timeout ".$opt::timeout
: ""),
4671 ((defined $opt::ungroup
) ?
"-u" : ""),
4672 ((defined $opt::ssh
) ?
"--ssh '".$opt::ssh
."'" : ""),
4673 ((defined $opt::tee
) ?
"--tee" : ""),
4674 ((defined $opt::workdir
) ?
"--wd ".Q
($opt::workdir
) : ""),
4675 (@Global::transfer_files ?
map { "--tf ".Q
($_) }
4676 @Global::transfer_files
: ""),
4677 (@Global::ret_files ?
map { "--return ".Q
($_) }
4678 @Global::ret_files
: ""),
4679 (@opt::env ?
map { "--env ".Q
($_) } @opt::env
: ""),
4680 (map { "-v" } @opt::v
),
4682 ::debug
("init", "| $0 $options\n");
4683 open(my $parallel_fh, "|-", "$0 -0 --will-cite -j0 $options") ||
4684 ::die_bug
("This does not run GNU Parallel: $0 $options");
4686 for my $host (sort keys %Global::host
) {
4687 my $sshlogin = $Global::host
{$host};
4688 my $joblog = tmp_joblog
($opt::joblog
);
4690 push @joblogs, $joblog;
4691 $joblog = "--joblog $joblog";
4693 my $quad = $opt::arg_file_sep
|| "::::";
4694 # If PARALLEL_ENV is set: Pass it on
4695 my $penv=$Global::parallel_env ?
4696 "PARALLEL_ENV=".Q
($Global::parallel_env
) :
4698 ::debug
("init", "$penv $0 $suboptions -j1 $joblog ",
4699 ((defined $opt::tag
) ?
4700 "--tagstring ".Q
($sshlogin->string()) : ""),
4701 " -S ", Q
($sshlogin->string())," ",
4702 join(" ",shell_quote
(@command))," $quad @argfiles\n");
4703 print $parallel_fh "$penv $0 $suboptions -j1 $joblog ",
4704 ((defined $opt::tag
) ?
4705 "--tagstring ".Q
($sshlogin->string()) : ""),
4706 " -S ", Q
($sshlogin->string())," ",
4707 join(" ",shell_quote
(@command))," $quad @argfiles\0";
4710 $Global::exitstatus
= $?
>> 8;
4711 debug
("init", "--onall exitvalue ", $?
);
4712 if(@opt::basefile
and $opt::cleanup
) { cleanup_basefile
(); }
4713 $Global::debug
or unlink(@argfiles);
4715 for my $joblog (@joblogs) {
4717 open(my $fh, "<", $joblog) || ::die_bug
("Cannot open tmp joblog $joblog");
4718 # Skip first line (header);
4720 print $Global::joblog
(<$fh>);
4727 sub __SIGNAL_HANDLING__
() {}
4731 # Send TSTP signal (Ctrl-Z) to all children process groups
4735 signal_children
("TSTP");
4739 # Send SIGPIPE signal to all children process groups
4743 signal_children
("PIPE");
4746 sub signal_children
() {
4747 # Send signal to all children process groups
4748 # and GNU Parallel itself
4753 debug
("run", "Sending $signal ");
4754 kill $signal, map { -$_ } keys %Global::running
;
4755 # Use default signal handler for GNU Parallel itself
4756 $SIG{$signal} = undef;
4760 sub save_original_signal_handler
() {
4761 # Remember the original signal handler
4763 # %Global::original_sig
4766 if($opt::tmux
) { ::qqx
("tmux kill-session -t p$$"); }
4770 if($opt::tmux
) { ::qqx
("tmux kill-session -t p$$"); }
4773 %Global::original_sig
= %SIG;
4774 $SIG{TERM
} = sub {}; # Dummy until jobs really start
4775 $SIG{ALRM
} = 'IGNORE';
4776 # Allow Ctrl-Z to suspend and `fg` to continue
4777 $SIG{TSTP
} = \
&sigtstp
;
4778 $SIG{PIPE
} = \
&sigpipe
;
4780 # Set $SIG{TSTP} again (it is undef'ed in sigtstp() )
4781 $SIG{TSTP
} = \
&sigtstp
;
4782 for my $job (values %Global::running
) {
4783 if($job->suspended()) {
4784 # Force jobs to suspend, if they are marked as suspended.
4785 # --memsupspend can suspend a job that will be resumed
4786 # if the user presses CTRL-Z followed by `fg`.
4789 # Resume the rest of the jobs
4796 sub list_running_jobs
() {
4797 # Print running jobs on tty
4801 for my $job (values %Global::running
) {
4802 ::status
("$Global::progname: ".$job->replaced());
4806 sub start_no_new_jobs
() {
4807 # Start no more jobs
4809 # %Global::original_sig
4811 # $Global::start_no_new_jobs
4813 unlink keys %Global::unlink;
4815 ("$Global::progname: SIGHUP received. No new jobs will be started.",
4816 "$Global::progname: Waiting for these ".(keys %Global::running
).
4817 " jobs to finish. Send SIGTERM to stop now.");
4818 list_running_jobs
();
4819 $Global::start_no_new_jobs
||= 1;
4823 # Run reaper until there are no more left
4825 # @pids_reaped = pids of reaped processes
4828 while($pid = reaper
()) {
4829 push @pids_reaped, $pid;
4831 return @pids_reaped;
4836 # * Set exitstatus, exitsignal, endtime.
4837 # * Free ressources for new job
4838 # * Update median runtime
4840 # * If --halt = now: Kill children
4847 # $Global::total_running
4849 # $stiff = PID of child finished
4851 debug
("run", "Reaper ");
4852 if(($stiff = waitpid(-1, &WNOHANG
)) <= 0) {
4853 # No jobs waiting to be reaped
4857 # $stiff = pid of dead process
4858 my $job = $Global::running
{$stiff};
4860 # '-a <(seq 10)' will give us a pid not in %Global::running
4861 # The same will one of the ssh -M: ignore
4863 delete $Global::running
{$stiff};
4864 $Global::total_running
--;
4865 if($job->{'commandline'}{'skip'}) {
4866 # $job->skip() was called
4867 $job->set_exitstatus(-2);
4868 $job->set_exitsignal(0);
4870 $job->set_exitstatus($?
>> 8);
4871 $job->set_exitsignal($?
& 127);
4874 debug
("run", "\nseq ",$job->seq()," died (", $job->exitstatus(), ")");
4875 if($Global::delayauto
or $Global::sshdelayauto
) {
4876 if($job->exitstatus()) {
4877 # Job failed: Increase delay (if $opt::(ssh)delay set)
4878 $opt::delay
&&= $opt::delay
* 2;
4879 $opt::sshdelay
&&= $opt::sshdelay
* 2;
4881 # Job succeeded: Decrease delay (if $opt::(ssh)delay set)
4882 $opt::delay
&&= $opt::delay
* 0.9;
4883 $opt::sshdelay
&&= $opt::sshdelay
* 0.9;
4885 debug
("run", "delay:$opt::delay ssh:$opt::sshdelay ");
4887 $job->set_endtime(::now
());
4888 my $sshlogin = $job->sshlogin();
4889 $sshlogin->dec_jobs_running();
4890 if($job->should_be_retried()) {
4891 # Free up file handles
4892 $job->free_ressources();
4895 $sshlogin->inc_jobs_completed();
4898 if($opt::timeout
and not $job->exitstatus()) {
4899 # Update average runtime for timeout only for successful jobs
4900 $Global::timeoutq
->update_median_runtime($job->runtime());
4902 if($opt::keeporder
) {
4903 $job->print_earlier_jobs();
4907 if($job->should_we_halt() eq "now") {
4909 ::kill_sleep_seq
($job->pid());
4911 ::wait_and_exit
($Global::halt_exitstatus
);
4916 if($opt::progress
) {
4917 my %progress = progress
();
4918 ::status_no_nl
("\r",$progress{'status'});
4921 debug
("run", "jobdone \n");
4930 # Kill all jobs by killing their process groups
4932 # $Global::start_no_new_jobs = we are stopping
4933 # $Global::killall = Flag to not run reaper
4934 $Global::start_no_new_jobs
||= 1;
4935 # Do not reap killed children: Ignore them instead
4936 $Global::killall
||= 1;
4937 kill_sleep_seq
(keys %Global::running
);
4940 sub kill_sleep_seq
(@
) {
4941 # Send jobs TERM,TERM,KILL to processgroups
4943 # @pids = list of pids that are also processgroups
4944 # Convert pids to process groups ($processgroup = -$pid)
4945 my @pgrps = map { -$_ } @_;
4946 my @term_seq = split/,/,$opt::termseq
;
4948 @term_seq = ("TERM",200,"TERM",100,"TERM",50,"KILL",25);
4951 @pgrps = kill_sleep
(shift @term_seq, shift @term_seq, @pgrps);
4956 # Kill pids with a signal and wait a while for them to die
4958 # $signal = signal to send to @pids
4959 # $sleep_max = number of ms to sleep at most before returning
4960 # @pids = pids to kill (actually process groups)
4962 # $Global::killall = set by killall() to avoid calling reaper
4964 # @pids = pids still alive
4965 my ($signal, $sleep_max, @pids) = @_;
4966 ::debug
("kill","kill_sleep $signal ",(join " ",sort @pids),"\n");
4967 kill $signal, @pids;
4971 while(@pids and $sleepsum < $sleep_max) {
4972 if($Global::killall
) {
4973 # Killall => don't run reaper
4974 while(waitpid(-1, &WNOHANG
) > 0) {
4975 $sleep = $sleep/2+0.001;
4977 } elsif(reapers
()) {
4978 $sleep = $sleep/2+0.001;
4982 $sleepsum += $sleep;
4983 # Keep only living children
4984 @pids = grep { kill(0, $_) } @pids;
4989 sub wait_and_exit
($) {
4990 # If we do not wait, we sometimes get segfault
4993 unlink keys %Global::unlink;
4995 # Kill all jobs without printing
4998 for (keys %Global::unkilled_children
) {
4999 # Kill any (non-jobs) children (e.g. reserved processes)
5002 delete $Global::unkilled_children
{$_};
5004 if($Global::unkilled_sqlworker
) {
5005 waitpid($Global::unkilled_sqlworker
,0);
5007 # Avoid: Warning: unable to close filehandle properly: No space
5008 # left on device during global destruction.
5009 $SIG{__WARN__
} = sub {};
5011 # Make the shell script return $error
5012 print "$Global::parset_endstring\nreturn $error";
5029 "$Global::progname [options] [command [arguments]] < list_of_arguments",
5030 "$Global::progname [options] [command [arguments]] (::: arguments|:::: argfile(s))...",
5031 "cat ... | $Global::progname --pipe [options] [command [arguments]]",
5033 "-j n Run n jobs in parallel",
5034 "-k Keep same order",
5035 "-X Multiple arguments with context replace",
5036 "--colsep regexp Split input on regexp for positional replacements",
5037 "{} {.} {/} {/.} {#} {%} {= perl code =} Replacement strings",
5038 "{3} {3.} {3/} {3/.} {=3 perl code =} Positional replacement strings",
5039 "With --plus: {} = {+/}/{/} = {.}.{+.} = {+/}/{/.}.{+.} = {..}.{+..} =",
5040 " {+/}/{/..}.{+..} = {...}.{+...} = {+/}/{/...}.{+...}",
5042 "-S sshlogin Example: foo\@server.example.com",
5043 "--slf .. Use ~/.parallel/sshloginfile as the list of sshlogins",
5044 "--trc {}.bar Shorthand for --transfer --return {}.bar --cleanup",
5045 "--onall Run the given command with argument on all sshlogins",
5046 "--nonall Run the given command with no arguments on all sshlogins",
5048 "--pipe Split stdin (standard input) to multiple jobs.",
5049 "--recend str Record end separator for --pipe.",
5050 "--recstart str Record start separator for --pipe.",
5052 "GNU Parallel can do much more. See 'man $Global::progname' for details",
5054 "Academic tradition requires you to cite works you base your article on.",
5055 "If you use programs that use GNU Parallel to process data for an article in a",
5056 "scientific publication, please cite:",
5058 " Tange, O. (2021, September 22). GNU Parallel 20210922 ('Vindelev').",
5059 " Zenodo. https://doi.org/10.5281/zenodo.5523272",
5061 # Before changing these lines, please read
5062 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
5063 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
5064 # You accept to be put in a public hall of shame by removing
5066 "This helps funding further development; AND IT WON'T COST YOU A CENT.",
5067 "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
5072 sub citation_notice
() {
5073 # if --will-cite or --plain: do nothing
5074 # if stderr redirected: do nothing
5075 # if $PARALLEL_HOME/will-cite: do nothing
5076 # else: print citation notice to stderr
5081 not -t
$Global::original_stderr
5083 grep { -e
"$_/will-cite" } @Global::config_dirs
) {
5087 ("Academic tradition requires you to cite works you base your article on.",
5088 "If you use programs that use GNU Parallel to process data for an article in a",
5089 "scientific publication, please cite:",
5091 " Tange, O. (2021, September 22). GNU Parallel 20210922 ('Vindelev').",
5092 " Zenodo. https://doi.org/10.5281/zenodo.5523272",
5094 # Before changing these line, please read
5095 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice and
5096 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
5097 # You accept to be put in a public hall of shame by
5098 # removing the lines.
5099 "This helps funding further development; AND IT WON'T COST YOU A CENT.",
5100 "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
5102 "More about funding GNU Parallel and the citation notice:",
5103 "https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice",
5105 "To silence this citation notice: run 'parallel --citation' once.",
5108 mkdir $Global::config_dir
;
5109 # Number of times the user has run GNU Parallel without showing
5110 # willingness to cite
5112 if(open (my $fh, "<", $Global::config_dir
.
5113 "/runs-without-willing-to-cite")) {
5118 if(open (my $fh, ">", $Global::config_dir
.
5119 "/runs-without-willing-to-cite")) {
5123 ::status
("Come on: You have run parallel $runs times. Isn't it about time ",
5124 "you run 'parallel --citation' once to silence the citation notice?",
5133 my $fh = $Global::status_fd
|| *STDERR
;
5134 print $fh map { ($_, "\n") } @w;
5138 sub status_no_nl
(@
) {
5140 my $fh = $Global::status_fd
|| *STDERR
;
5147 my $prog = $Global::progname
|| "parallel";
5148 status_no_nl
(map { ($prog, ": Warning: ", $_, "\n"); } @w);
5153 sub warning_once
(@
) {
5155 my $prog = $Global::progname
|| "parallel";
5157 status_no_nl
(map { ($prog, ": Warning: ", $_, "\n"); } @w);
5163 my $prog = $Global::progname
|| "parallel";
5164 status
(map { ($prog.": Error: ". $_); } @w);
5170 ("$Global::progname: This should not happen. You have found a bug. ",
5172 "https://www.gnu.org/software/parallel/man.html#REPORTING-BUGS\n",
5174 "Include this in the report:\n",
5175 "* The version number: $Global::version\n",
5176 "* The bugid: $bugid\n",
5177 "* The command line being run\n",
5178 "* The files being read (put the files on a webserver if they are big)\n",
5180 "If you get the error on smaller/fewer files, please include those instead.\n");
5181 ::wait_and_exit
(255);
5188 "GNU $Global::progname $Global::version",
5189 "Copyright (C) 2007-2021 Ole Tange, http://ole.tange.dk and Free Software",
5191 "License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>",
5192 "This is free software: you are free to change and redistribute it.",
5193 "GNU $Global::progname comes with no warranty.",
5195 "Web site: https://www.gnu.org/software/${Global::progname}\n",
5196 "When using programs that use GNU Parallel to process data for publication",
5197 "please cite as described in 'parallel --citation'.\n",
5203 my ($all_argv_ref,$argv_options_removed_ref) = @_;
5204 my $all_argv = "@$all_argv_ref";
5205 my $no_opts = "@$argv_options_removed_ref";
5206 $all_argv=~s/--citation//;
5207 if($all_argv ne $no_opts) {
5208 ::warning
("--citation ignores all other options and arguments.");
5213 "Academic tradition requires you to cite works you base your article on.",
5214 "If you use programs that use GNU Parallel to process data for an article in a",
5215 "scientific publication, please cite:",
5217 "\@software{tange_2021_5523272,",
5218 " author = {Tange, Ole},",
5219 " title = {GNU Parallel 20210922 ('Vindelev')},",
5222 " note = {{GNU Parallel is a general parallelizer to run",
5223 " multiple serial command line programs in parallel",
5224 " without changing them.}},",
5225 " publisher = {Zenodo},",
5226 " doi = {10.5281/zenodo.5523272},",
5227 " url = {https://doi.org/10.5281/zenodo.5523272}",
5230 "(Feel free to use \\nocite{tange_2021_5523272})",
5232 # Before changing these lines, please read
5233 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice and
5234 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
5235 # You accept to be put in a public hall of shame by removing
5237 "This helps funding further development; AND IT WON'T COST YOU A CENT.",
5238 "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
5240 "More about funding GNU Parallel and the citation notice:",
5241 "https://lists.gnu.org/archive/html/parallel/2013-11/msg00006.html",
5242 "https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice",
5243 "https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt",
5245 "If you send a copy of your published article to tange\@gnu.org, it will be",
5246 "mentioned in the release notes of next version of GNU Parallel.",
5249 while(not grep { -e
"$_/will-cite" } @Global::config_dirs
) {
5250 print "\nType: 'will cite' and press enter.\n> ";
5251 my $input = <STDIN
>;
5252 if(not defined $input) {
5255 if($input =~ /will cite/i) {
5256 mkdir $Global::config_dir
;
5257 if(open (my $fh, ">", $Global::config_dir
."/will-cite")) {
5261 "Thank you for your support: You are the reason why there is funding to",
5262 "continue maintaining GNU Parallel. On behalf of future versions of",
5263 "GNU Parallel, which would not exist without your support:",
5265 " THANK YOU SO MUCH",
5267 "It is really appreciated. The citation notice is now silenced.",
5272 "Thank you for your support. It is much appreciated. The citation",
5273 "cannot permanently be silenced. Use '--will-cite' instead.",
5275 "If you use '--will-cite' in scripts to be run by others you are making",
5276 "it harder for others to see the citation notice. The development of",
5277 "GNU Parallel is indirectly financed through citations, so if users",
5278 "do not know they should cite then you are making it harder to finance",
5279 "development. However, if you pay 10000 EUR, you should feel free to",
5280 "use '--will-cite' in scripts.",
5290 print("Maximal size of command: ",Limits
::Command
::real_max_length
(),"\n",
5291 "Maximal used size of command: ",Limits
::Command
::max_length
(),"\n",
5293 "Execution of will continue now, and it will try to read its input\n",
5294 "and run commands; if this is not what you wanted to happen, please\n",
5295 "press CTRL-D or CTRL-C\n");
5299 # Give an embeddable version of GNU Parallel
5300 # Tested with: bash, zsh, ksh, ash, dash, sh
5301 my $randomstring = "cut-here-".join"",
5302 map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..20);
5303 if(not -f
$0 or not -r
$0) {
5304 ::error
("--embed only works if parallel is a readable file");
5307 if(open(my $fh, "<", $0)) {
5308 # Read the source from $0
5310 my $user = $ENV{LOGNAME
} || $ENV{USERNAME
} || $ENV{USER
};
5311 my @env_parallel_source = ();
5312 my $shell = $Global::shell
;
5314 for(which
("env_parallel.$shell")) {
5316 # Read the source of env_parallel.shellname
5317 open(my $env_parallel_source_fh, $_) || die;
5318 @env_parallel_source = <$env_parallel_source_fh>;
5319 close $env_parallel_source_fh;
5322 print "#!$Global::shell
5324 # Copyright (C) 2007-2021 $user, Ole Tange, http://ole.tange.dk
5325 # and Free Software Foundation, Inc.
5327 # This program is free software; you can redistribute it and/or modify
5328 # it under the terms of the GNU General Public License as published by
5329 # the Free Software Foundation; either version 3 of the License, or
5330 # (at your option) any later version.
5332 # This program is distributed in the hope that it will be useful, but
5333 # WITHOUT ANY WARRANTY; without even the implied warranty of
5334 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5335 # General Public License for more details.
5337 # You should have received a copy of the GNU General Public License
5338 # along with this program; if not, see <https://www.gnu.org/licenses/>
5339 # or write to the Free Software Foundation, Inc., 51 Franklin St,
5340 # Fifth Floor, Boston, MA 02110-1301 USA
5344 # Embedded GNU Parallel created with --embed
5346 # Start GNU Parallel without leaving temporary files
5348 # Not all shells support 'perl <(cat ...)'
5349 # This is a complex way of doing:
5350 # perl <(cat <<'cut-here'
5353 # and also avoiding:
5356 # Make a temporary fifo that perl can read from
5357 _fifo_with_GNU_Parallel_source
=`perl -e 'use POSIX qw(mkfifo);
5359 $f = "/tmp/parallel-".join"",
5360 map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
5364 # Put source code into temporary file
5365 # so it is easy to copy to the fifo
5366 _file_with_GNU_Parallel_source=`mktemp`;
5368 "cat <<'$randomstring' > \$_file_with_GNU_Parallel_source\n",
5372 # Copy the source code from the file to the fifo
5373 # and remove the file and fifo ASAP
5374 # 'sh
-c
' is needed to avoid
5376 sh -c "(rm $_file_with_GNU_Parallel_source; cat >$_fifo_with_GNU_Parallel_source; rm $_fifo_with_GNU_Parallel_source) < $_file_with_GNU_Parallel_source &"
5378 # Read the source from the fifo
5379 perl $_fifo_with_GNU_Parallel_source "$@"
5382 @env_parallel_source,
5385 # This will call the functions above
5386 parallel -k echo ::: Put your code here
5387 env_parallel --session
5388 env_parallel -k echo ::: Put your code here
5389 parset p,y,c,h -k echo ::: Put your code here
5391 echo You can also activate GNU Parallel for interactive use by:
5395 ::error("Cannot open $0");
5398 ::status("Redirect the output to a file and add your changes at the end:",
5399 " $0 --embed > new_script");
5403 sub __GENERIC_COMMON_FUNCTION__() {}
5406 sub mkdir_or_die($) {
5407 # If dir is not executable: die
5409 # The eval is needed to catch exception from mkdir
5410 eval { File::Path::mkpath($dir); };
5412 ::error("Cannot change into non-executable dir $dir: $!");
5413 ::wait_and_exit(255);
5418 # Create tempfile as $TMPDIR/parXXXXX
5420 # $filehandle = opened file handle
5421 # $filename = file name created
5422 my($filehandle,$filename) =
5423 ::tempfile(DIR=>$ENV{'TMPDIR
'}, TEMPLATE => 'parXXXXX
', @_);
5425 return($filehandle,$filename);
5427 # Separate unlink due to NFS dealing badly with File::Temp
5434 # Select a name that does not exist
5435 # Do not create the file as it may be used for creating a socket (by tmux)
5436 # Remember the name in $Global::unlink to avoid hitting the same name twice
5439 if(not -w $ENV{'TMPDIR
'}) {
5440 if(not -e $ENV{'TMPDIR
'}) {
5441 ::error("Tmpdir '$ENV{'TMPDIR'}' does not exist.","Try 'mkdir $ENV{'TMPDIR'}'");
5443 ::error("Tmpdir '$ENV{'TMPDIR'}' is not writable.","Try 'chmod +w
$ENV{'TMPDIR'}'");
5445 ::wait_and_exit(255);
5448 $tmpname = $ENV{'TMPDIR
'}."/".$name.
5449 join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
5450 } while(-e $tmpname or $Global::unlink{$tmpname}++);
5455 # Find an unused name and mkfifo on it
5456 my $tmpfifo = tmpname("fif");
5457 mkfifo($tmpfifo,0600);
5462 # Remove file and remove it from %Global::unlink
5465 delete @Global::unlink{@_};
5469 sub size_of_block_dev() {
5470 # Like -s but for block devices
5472 # $blockdev = file name of block device
5474 # $size = in bytes, undef if error
5475 my $blockdev = shift;
5476 if(open(my $fh, "<", $blockdev)) {
5477 seek($fh,0,2) || ::die_bug("cannot seek $blockdev");
5478 my $size = tell($fh);
5482 ::error("cannot open $blockdev");
5488 # Like qx but with clean environment (except for @keep)
5489 # and STDERR ignored
5490 # This is needed if the environment contains functions
5491 # that /bin/sh does not understand
5493 # ssh with ssh-agent needs PATH SSH_AUTH_SOCK SSH_AGENT_PID
5494 # ssh with Kerberos needs KRB5CCNAME
5495 # tmux needs LC_CTYPE
5496 # lsh needs HOME LOGNAME
5497 my @keep = qw(PATH SSH_AUTH_SOCK SSH_AGENT_PID KRB5CCNAME LC_CTYPE HOME LOGNAME);
5498 @env{@keep} = @ENV{@keep};
5501 if($Global::debug
) {
5502 # && true is to force spawning a shell and not just exec'ing
5503 return qx{ @_ && true
};
5505 # CygWin does not respect 2>/dev/null
5506 # so we do that by hand
5507 # This trick does not work:
5508 # https://stackoverflow.com/q/13833088/363028
5510 # open(STDERR, ">", "/dev/null");
5511 open(local *CHILD_STDIN
, '<', '/dev/null') or die $!;
5512 open(local *CHILD_STDERR
, '>', '/dev/null') or die $!;
5514 # eval is needed if open3 fails (e.g. command line too long)
5520 # && true is to force spawning a shell and not just exec'ing
5524 # Make sure $? is set
5526 return wantarray ?
@arr : join "",@arr;
5528 # If eval fails, force $?=false
5535 # Remove duplicates and return unique values
5536 return keys %{{ map { $_ => 1 } @_ }};
5541 # Minimum value of array
5546 defined $min or do { $min = $_; next; }; # Set $_ to the first non-undef
5547 $min = ($min < $_) ?
$min : $_;
5554 # Maximum value of array
5559 defined $max or do { $max = $_; next; }; # Set $_ to the first non-undef
5560 $max = ($max > $_) ?
$max : $_;
5567 # Sum of values of array
5572 $_ and do { $sum += $_; }
5577 sub undef_as_zero
($) {
5582 sub undef_as_empty
($) {
5584 return $a ?
$a : "";
5587 sub undef_if_empty
($) {
5588 if(defined($_[0]) and $_[0] eq "") {
5594 sub multiply_binary_prefix
(@
) {
5595 # Evalualte numbers with binary prefix
5596 # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80
5597 # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80
5598 # K =2^10, M =2^20, G =2^30, T =2^40, P =2^50, E =2^70, Z =2^80, Y =2^80
5599 # k =10^3, m =10^6, g =10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24
5600 # 13G = 13*1024*1024*1024 = 13958643712
5602 # $s = string with prefixes
5604 # $value = int with prefixes multiplied
5610 s/gi/*1024*1024*1024/gi;
5611 s/ti/*1024*1024*1024*1024/gi;
5612 s/pi/*1024*1024*1024*1024*1024/gi;
5613 s/ei/*1024*1024*1024*1024*1024*1024/gi;
5614 s/zi/*1024*1024*1024*1024*1024*1024*1024/gi;
5615 s/yi/*1024*1024*1024*1024*1024*1024*1024*1024/gi;
5616 s/xi/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi;
5620 s/G/*1024*1024*1024/g;
5621 s/T/*1024*1024*1024*1024/g;
5622 s/P/*1024*1024*1024*1024*1024/g;
5623 s/E/*1024*1024*1024*1024*1024*1024/g;
5624 s/Z/*1024*1024*1024*1024*1024*1024*1024/g;
5625 s/Y/*1024*1024*1024*1024*1024*1024*1024*1024/g;
5626 s/X/*1024*1024*1024*1024*1024*1024*1024*1024*1024/g;
5630 s/g/*1000*1000*1000/g;
5631 s/t/*1000*1000*1000*1000/g;
5632 s/p/*1000*1000*1000*1000*1000/g;
5633 s/e/*1000*1000*1000*1000*1000*1000/g;
5634 s/z/*1000*1000*1000*1000*1000*1000*1000/g;
5635 s/y/*1000*1000*1000*1000*1000*1000*1000*1000/g;
5636 s/x/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g;
5640 return wantarray ?
@v : $v[0];
5643 sub multiply_time_units
($) {
5644 # Evalualte numbers with time units
5645 # s=1, m=60, h=3600, d=86400
5647 # $s = string time units
5649 # $value = int in seconds
5661 return wantarray ?
@v : $v[0];
5664 sub seconds_to_time_units
() {
5665 # Convert seconds into ??d??h??m??s
5666 # s=1, m=60, h=3600, d=86400
5668 # $s = int in seconds
5670 # $str = string time units
5673 my $d = int($s/86400);
5675 my $h = int($s/3600);
5680 $str = sprintf("%dd%02dh%02dm%02ds",$d,$h,$m,$s);
5682 $str = sprintf("%dh%02dm%02ds",$h,$m,$s);
5684 $str = sprintf("%dm%02ds",$m,$s);
5686 $str = sprintf("%ds",$s);
5692 my ($disk_full_fh, $b8193, $error_printed);
5693 sub exit_if_disk_full
() {
5694 # Checks if $TMPDIR is full by writing 8kb to a tmpfile
5695 # If the disk is full: Exit immediately.
5698 if(not $disk_full_fh) {
5699 $disk_full_fh = ::tmpfile
(SUFFIX
=> ".df");
5702 # Linux does not discover if a disk is full if writing <= 8192
5704 # bfs btrfs cramfs ext2 ext3 ext4 ext4dev jffs2 jfs minix msdos
5705 # ntfs reiserfs tmpfs ubifs vfat xfs
5706 # TODO this should be tested on different OS similar to this:
5709 # sudo mount /dev/ram0 /mnt/loop; sudo chmod 1777 /mnt/loop
5710 # seq 100000 | parallel --tmpdir /mnt/loop/ true &
5711 # seq 6900000 > /mnt/loop/i && echo seq OK
5712 # seq 6980868 > /mnt/loop/i
5713 # seq 10000 > /mnt/loop/ii
5715 # sudo umount /mnt/loop/ || sudo umount -l /mnt/loop/
5718 print $disk_full_fh $b8193;
5719 if(not $disk_full_fh
5721 tell $disk_full_fh != 8193) {
5722 # On raspbian the disk can be full except for 10 chars.
5723 if(not $error_printed) {
5724 ::error
("Output is incomplete.",
5725 "Cannot append to buffer file in $ENV{'TMPDIR'}.",
5726 "Is the disk full?",
5727 "Change \$TMPDIR with --tmpdir or use --compress.");
5730 ::wait_and_exit
(255);
5732 truncate $disk_full_fh, 0;
5733 seek($disk_full_fh, 0, 0) || die;
5738 # Remove comments and spaces
5740 # $spaces = keep 1 space?
5741 # $s = string to remove spaces from
5743 # $s = with spaces removed
5749 } elsif(2 == $spaces) {
5751 $s =~ s/\n\n+/\n/sg;
5752 $s =~ s/[ \t]+/ /mg;
5753 } elsif(3 == $spaces) {
5754 # Keep perl code required space
5755 $s =~ s{([^a-zA-Z0-9/])\s+}{$1}sg;
5756 $s =~ s{([a-zA-Z0-9/])\s+([^:a-zA-Z0-9/])}{$1$2}sg;
5768 $hostname = `hostname`;
5770 $hostname ||= "nohostname";
5778 # @programs = programs to find the path to
5780 # @full_path = full paths to @programs. Nothing if not found
5783 push(@which, grep { not -d
$_ and -x
$_ }
5784 map { $_."/".$prg } split(":",$ENV{'PATH'}));
5786 # Test if program with full path exists
5787 push(@which, grep { not -d
$_ and -x
$_ } $prg);
5790 ::debug
("which", "$which[0] in $ENV{'PATH'}\n");
5791 return wantarray ?
@which : $which[0];
5795 my ($regexp,$shell,%fakename);
5799 # $pid = pid to see if (grand)*parent is a shell
5801 # $shellpath = path to shell - undef if no shell found
5803 ::debug
("init","Parent of $pid\n");
5805 # All shells known to mankind
5807 # ash bash csh dash fdsh fish fizsh ksh ksh93 mksh pdksh
5808 # posh rbash rc rush rzsh sash sh static-sh tcsh yash zsh
5810 my @shells = (qw(ash bash bsd-csh csh dash fdsh fish fizsh ksh
5811 ksh93 lksh mksh pdksh posh rbash rc rush rzsh sash sh
5812 static-sh tcsh yash zsh -sh -csh -bash),
5813 '-sh (sh)' # sh on FreeBSD
5815 # Can be formatted as:
5816 # [sh] -sh sh busybox sh -sh (sh)
5817 # /bin/sh /sbin/sh /opt/csw/sh
5818 # But not: foo.sh sshd crash flush pdflush scosh fsflush ssh
5819 $shell = "(?:".join("|",map { "\Q$_\E" } @shells).")";
5820 $regexp = '^((\[)(-?)('. $shell. ')(\])|(|\S+/|busybox )'.
5821 '(-?)('. $shell. '))( *$| [^(])';
5823 # sh disguises itself as -sh (sh) on FreeBSD
5824 "-sh (sh)" => ["sh"],
5825 # csh and tcsh disguise themselves as -sh/-csh
5826 # E.g.: ssh -tt csh@lo 'ps aux;true' |egrep ^csh
5827 # but sh also disguises itself as -sh
5828 # (TODO When does that happen?)
5830 "-csh" => ["tcsh", "csh"],
5831 # ash disguises itself as -ash
5832 "-ash" => ["ash", "dash", "sh"],
5833 # dash disguises itself as -dash
5834 "-dash" => ["dash", "ash", "sh"],
5835 # bash disguises itself as -bash
5836 "-bash" => ["bash", "sh"],
5837 # ksh disguises itself as -ksh
5838 "-ksh" => ["ksh", "sh"],
5839 # zsh disguises itself as -zsh
5840 "-zsh" => ["zsh", "sh"],
5843 if($^O
eq "linux") {
5844 # Optimized for GNU/Linux
5849 if(open(my $fd, "<", "/proc/$testpid/cmdline")) {
5851 chomp($shellline = <$fd>);
5852 if($shellline =~ /$regexp/o) {
5853 my $shellname = $4 || $8;
5854 my $dash = $3 || $7;
5855 if($shellname eq "sh" and $dash) {
5857 if($shellpath = readlink "/proc/$testpid/exe") {
5858 ::debug
("init","procpath $shellpath\n");
5859 if($shellpath =~ m
:/$shell$:o
) {
5861 "proc which ".$shellpath." => ");
5866 ::debug
("init", "which ".$shellname." => ");
5867 $shellpath = (which
($shellname,
5868 @
{$fakename{$shellname}}))[0];
5869 ::debug
("init", "shell path $shellpath\n");
5874 if(open(my $fd, "<", "/proc/$testpid/stat")) {
5877 # Parent pid is field 4
5878 $testpid = (split /\s+/, $line)[3];
5880 # Something is wrong: fall back to old method
5885 # if -sh or -csh try readlink /proc/$$/exe
5886 my ($children_of_ref, $parent_of_ref, $name_of_ref) = pid_table
();
5890 if($name_of_ref->{$testpid} =~ /$regexp/o) {
5891 my $shellname = $4 || $8;
5892 my $dash = $3 || $7;
5893 if($shellname eq "sh" and $dash) {
5895 if($shellpath = readlink "/proc/$testpid/exe") {
5896 ::debug
("init","procpath $shellpath\n");
5897 if($shellpath =~ m
:/$shell$:o
) {
5898 ::debug
("init", "proc which ".$shellpath." => ");
5903 ::debug
("init", "which ".$shellname." => ");
5904 $shellpath = (which
($shellname,@
{$fakename{$shellname}}))[0];
5905 ::debug
("init", "shell path $shellpath\n");
5906 $shellpath and last;
5908 if($testpid == $parent_of_ref->{$testpid}) {
5909 # In Solaris zones, the PPID of the zsched process is itself
5912 $testpid = $parent_of_ref->{$testpid};
5919 my %pid_parentpid_cmd;
5923 # %children_of = { pid -> children of pid }
5924 # %parent_of = { pid -> pid of parent }
5925 # %name_of = { pid -> commandname }
5927 if(not %pid_parentpid_cmd) {
5928 # Filter for SysV-style `ps`
5929 my $sysv = q
( ps
-ef
|).
5930 q
(perl
-ane
'1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;).
5931 q(s/^.{$s}//; print "@F[1,2] $_"' );
5932 # Minix uses cols 2,3 and can have newlines in the command
5933 # so lines not having numbers in cols 2,3 must be ignored
5934 my $minix = q
( ps
-ef
|).
5935 q
(perl
-ane
'1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;).
5936 q(s/^.{$s}// and $F[2]>0 and $F[3]>0 and print "@F[2,3] $_"' );
5938 my $bsd = q
(ps
-o pid
,ppid
,command
-ax
);
5939 %pid_parentpid_cmd =
5946 'dragonfly' => $bsd,
5960 'syllable' => "echo ps not supported",
5963 $pid_parentpid_cmd{$^O
} or
5964 ::die_bug
("pid_parentpid_cmd for $^O missing");
5966 my (@pidtable,%parent_of,%children_of,%name_of);
5967 # Table with pid -> children of pid
5968 @pidtable = `$pid_parentpid_cmd{$^O}`;
5971 # must match: 24436 21224 busybox ash
5972 # must match: 24436 21224 <<empty on MacOSX running cubase>>
5973 # must match: 24436 21224 <<empty on system running Viber>>
5974 # or: perl -e 'while($0=" "){}'
5975 if(/^\s*(\S+)\s+(\S+)\s+(\S+.*)/
5977 /^\s*(\S+)\s+(\S+)\s+()$/) {
5978 $parent_of{$1} = $2;
5979 push @
{$children_of{$2}}, $1;
5982 ::die_bug
("pidtable format: $_");
5985 return(\
%children_of, \
%parent_of, \
%name_of);
5990 # Returns time since epoch as in seconds with 3 decimals
5994 # $time = time now with millisecond accuracy
5995 if(not $Global::use{"Time::HiRes"}) {
5996 if(eval "use Time::HiRes qw ( time );") {
5997 eval "sub TimeHiRestime { return Time::HiRes::time };";
5999 eval "sub TimeHiRestime { return time() };";
6001 $Global::use{"Time::HiRes"} = 1;
6004 return (int(TimeHiRestime
()*1000))/1000;
6008 # Sleep this many milliseconds.
6010 # $ms = milliseconds to sleep
6012 ::debug
("timing",int($ms),"ms ");
6013 select(undef, undef, undef, $ms/1000);
6016 sub make_regexp_ungreedy
{
6019 my $class_state = 0;
6020 my $escape_state = 0;
6025 for $c (split (//, $regexp)) {
6027 if($c ne "?") { $ungreedy .= "?"; }
6032 if ($escape_state) { $escape_state = 0; next; }
6033 if ($c eq "\\") { $escape_state = 1; next; }
6034 if ($c eq '[') { $class_state = 1; next; }
6036 if($c eq ']') { $class_state = 0; }
6039 # Quantifiers: + * {...}
6040 if ($c =~ /[*}+]/) { $found = 1; }
6042 if($found) { $ungreedy .= '?'; }
6047 sub __KILLER_REAPER__
() {}
6050 # Reap dead children.
6051 # If no dead children: Sleep specified amount with exponential backoff
6053 # $ms = milliseconds to sleep
6055 # $ms/2+0.001 if children reaped
6056 # $ms*1.1 if no children reaped
6059 if(not $Global::total_completed
% 100) {
6061 # Force cleaning the timeout queue for every 100 jobs
6062 # Fixes potential memleak
6063 $Global::timeoutq
->process_timeouts();
6066 # Sleep exponentially shorter (1/2^n) if a job finished
6070 $Global::timeoutq
->process_timeouts();
6073 kill_youngster_if_not_enough_mem
($opt::memfree
*0.5);
6075 if($opt::memsuspend
) {
6076 suspend_young_if_not_enough_mem
($opt::memsuspend
);
6079 kill_youngest_if_over_limit
();
6081 exit_if_disk_full
();
6082 if($opt::linebuffer
) {
6083 my $something_printed = 0;
6084 if($opt::keeporder
) {
6085 for my $job (values %Global::running
) {
6086 $something_printed += $job->print_earlier_jobs();
6089 for my $job (values %Global::running
) {
6090 $something_printed += $job->print();
6093 if($something_printed) {
6098 # When a child dies, wake up from sleep (or select(,,,))
6099 $SIG{CHLD
} = sub { kill "ALRM", $$ };
6101 # The 0.004s is approximately the time it takes for one round
6102 my $next_earliest_start =
6103 $Global::newest_starttime
+ $opt::delay
- 0.004;
6104 my $remaining_ms = 1000 * ($next_earliest_start - ::now
());
6105 # The next job can only start at $next_earliest_start
6106 # so sleep until then (but sleep at least $ms)
6107 usleep
(::max
($ms,$remaining_ms));
6111 # --compress needs $SIG{CHLD} unset
6112 $SIG{CHLD
} = 'DEFAULT';
6114 # Sleep exponentially longer (1.1^n) if a job did not finish,
6115 # though at most 1000 ms.
6116 return (($ms < 1000) ?
($ms * 1.1) : ($ms));
6120 sub kill_youngest_if_over_limit
() {
6121 # Check each $sshlogin we are over limit
6122 # If over limit: kill off the youngest child
6123 # Put the child back in the queue.
6129 for my $job (values %Global::running
) {
6130 if(not $jobs_of{$job->sshlogin()}) {
6131 push @sshlogins, $job->sshlogin();
6133 push @
{$jobs_of{$job->sshlogin()}}, $job;
6135 for my $sshlogin (@sshlogins) {
6136 for my $job (sort { $b->seq() <=> $a->seq() }
6137 @
{$jobs_of{$sshlogin}}) {
6138 if($sshlogin->limit() == 2) {
6146 sub suspend_young_if_not_enough_mem
() {
6147 # Check each $sshlogin if there is enough mem.
6148 # If less than $limit free mem: suspend some of the young children
6149 # Else: Resume all jobs
6156 for my $job (values %Global::running
) {
6157 if(not $jobs_of{$job->sshlogin()}) {
6158 push @sshlogins, $job->sshlogin();
6160 push @
{$jobs_of{$job->sshlogin()}}, $job;
6162 for my $sshlogin (@sshlogins) {
6163 my $free = $sshlogin->memfree();
6164 if($free < 2*$limit) {
6165 # Suspend all jobs (resume some of them later)
6166 map { $_->suspended() or $_->suspend(); } @
{$jobs_of{$sshlogin}};
6167 my @jobs = (sort { $b->seq() <=> $a->seq() }
6168 @
{$jobs_of{$sshlogin}});
6169 # how many should be running?
6173 # free < limit*(2-1/2^n);
6175 # 1/(2-free/limit) < 2^n;
6176 my $run = int(1/(2-$free/$limit));
6177 $run = ::min
($run,$#jobs);
6178 # Resume the oldest running
6179 for my $job ((sort { $a->seq() <=> $b->seq() } @jobs)[0..$run]) {
6180 ::debug
("mem","\nResume ",$run+1, " jobs. Seq ",
6181 $job->seq(), " resumed ",
6182 $sshlogin->memfree()," < ",2*$limit);
6186 for my $job (@
{$jobs_of{$sshlogin}}) {
6187 if($job->suspended()) {
6189 ::debug
("mem","\nResume ",$#{$jobs_of{$sshlogin}}+1,
6190 " jobs. Seq ", $job->seq(), " resumed ",
6191 $sshlogin->memfree()," > ",2*$limit);
6199 sub kill_youngster_if_not_enough_mem
() {
6200 # Check each $sshlogin if there is enough mem.
6201 # If less than 50% enough free mem: kill off the youngest child
6202 # Put the child back in the queue.
6209 for my $job (values %Global::running
) {
6210 if(not $jobs_of{$job->sshlogin()}) {
6211 push @sshlogins, $job->sshlogin();
6213 push @
{$jobs_of{$job->sshlogin()}}, $job;
6215 for my $sshlogin (@sshlogins) {
6216 for my $job (sort { $b->seq() <=> $a->seq() }
6217 @
{$jobs_of{$sshlogin}}) {
6218 if($sshlogin->memfree() < $limit) {
6219 ::debug
("mem","\n",map { $_->seq()." " }
6220 (sort { $b->seq() <=> $a->seq() }
6221 @
{$jobs_of{$sshlogin}}));
6222 ::debug
("mem","\n", $job->seq(), "killed ",
6223 $sshlogin->memfree()," < ",$limit);
6225 $sshlogin->memfree_recompute();
6230 ::debug
("mem","Free mem OK? ",
6231 $sshlogin->memfree()," > ",$limit);
6236 sub __DEBUGGING__
() {}
6244 $Global::debug
or return;
6245 @_ = grep { defined $_ ?
$_ : "" } @_;
6246 if($Global::debug
eq "all" or $Global::debug
eq $_[0]) {
6247 if($Global::fh
{2}) {
6248 # Original stderr was saved
6249 my $stderr = $Global::fh
{2};
6250 print $stderr @_[1..$#_];
6252 print STDERR
@_[1..$#_];
6257 sub my_memory_usage
() {
6259 # memory usage if found
6266 if(-e
"/proc/$pid/stat") {
6267 my $fh = FileHandle
->new("</proc/$pid/stat");
6273 my @procinfo = split(/\s+/,$data);
6275 return undef_as_zero
($procinfo[22]);
6283 # $size = size of object if Devel::Size is installed
6285 my @size_this = (@_);
6286 eval "use Devel::Size qw(size total_size)";
6290 return total_size(@_);
6296 # ascii expression of object if Data::Dump(er) is installed
6297 # error code otherwise
6298 my @dump_this = (@_);
6299 eval "use Data
::Dump
qw(dump);";
6301 # Data::Dump not installed
6302 eval "use Data
::Dumper
;";
6304 my $err = "Neither Data
::Dump nor Data
::Dumper is installed
\n".
6305 "Not dumping output
\n";
6309 return Dumper(@dump_this);
6312 # Create a dummy Data::Dump:dump as Hans Schou sometimes has
6314 eval "sub Data
::Dump
:dump {}";
6315 eval "use Data
::Dump
qw(dump);";
6316 return (Data::Dump::dump(@dump_this));
6333 sub __OBJECT_ORIENTED_PARTS__() {}
6340 my $sshlogin_string = shift;
6343 # SSHLogins can have these formats:
6344 # @grp+grp/ncpu//usr/bin/ssh user@server
6345 # ncpu//usr/bin/ssh user@server
6346 # /usr/bin/ssh user@server
6349 # @grp+grp/user@server
6350 if($sshlogin_string =~ s:^\@([^/]+)/?::) {
6351 # Look for SSHLogin hostgroups
6352 %hostgroups = map { $_ => 1 } split(/\+/, $1);
6354 # An SSHLogin is always in the hostgroup of its "numcpu
/host
"
6355 $hostgroups{$sshlogin_string} = 1;
6356 if ($sshlogin_string =~ s:^(\d+)/::) {
6357 # Override default autodetected ncpus unless missing
6360 my $string = $sshlogin_string;
6361 # An SSHLogin is always in the hostgroup of its $string-name
6362 $hostgroups{$string} = 1;
6363 @Global::hostgroups{keys %hostgroups} = values %hostgroups;
6365 my $no_slash_string = $string;
6366 $no_slash_string =~ s/[^-a-z0-9:]/_/gi;
6368 'string' => $string,
6369 'jobs_running' => 0,
6370 'jobs_completed' => 0,
6371 'maxlength' => undef,
6372 'max_jobs_running' => undef,
6373 'orig_max_jobs_running' => undef,
6375 'hostgroups' => \%hostgroups,
6376 'sshcommand' => undef,
6377 'serverlogin' => undef,
6378 'control_path_dir' => undef,
6379 'control_path' => undef,
6380 'time_to_login' => undef,
6381 'last_login_at' => undef,
6382 'loadavg_file' => $Global::cache_dir . "/tmp/sshlogin
/" .
6383 $no_slash_string . "/loadavg
",
6385 'last_loadavg_update' => 0,
6386 'swap_activity_file' => $Global::cache_dir . "/tmp/sshlogin
/" .
6387 $no_slash_string . "/swap_activity
",
6388 'swap_activity' => undef,
6389 }, ref($class) || $class;
6394 # Remove temporary files if they are created.
6395 ::rm($self->{'loadavg_file'});
6396 ::rm($self->{'swap_activity_file'});
6401 return $self->{'string'};
6404 sub jobs_running($) {
6406 return ($self->{'jobs_running'} || "0");
6409 sub inc_jobs_running($) {
6411 $self->{'jobs_running'}++;
6414 sub dec_jobs_running($) {
6416 $self->{'jobs_running'}--;
6419 sub set_maxlength($$) {
6421 $self->{'maxlength'} = shift;
6426 return $self->{'maxlength'};
6429 sub jobs_completed() {
6431 return $self->{'jobs_completed'};
6434 sub in_hostgroups() {
6436 # @hostgroups = the hostgroups to look for
6438 # true if intersection of @hostgroups and the hostgroups of this
6439 # SSHLogin is non-empty
6441 return grep { defined $self->{'hostgroups'}{$_} } @_;
6446 return keys %{$self->{'hostgroups'}};
6449 sub inc_jobs_completed($) {
6451 $self->{'jobs_completed'}++;
6452 $Global::total_completed++;
6455 sub set_max_jobs_running($$) {
6457 if(defined $self->{'max_jobs_running'}) {
6458 $Global::max_jobs_running -= $self->{'max_jobs_running'};
6460 $self->{'max_jobs_running'} = shift;
6462 if(defined $self->{'max_jobs_running'}) {
6463 # max_jobs_running could be resat if -j is a changed file
6464 $Global::max_jobs_running += $self->{'max_jobs_running'};
6466 # Initialize orig to the first non-zero value that comes around
6467 $self->{'orig_max_jobs_running'} ||= $self->{'max_jobs_running'};
6474 $self->memfree_recompute();
6475 # Return 1 if not defined.
6476 return (not defined $self->{'memfree'} or $self->{'memfree'})
6479 sub memfree_recompute() {
6481 my $script = memfreescript();
6483 # TODO add sshlogin and backgrounding
6484 # Run the script twice if it gives 0 (typically intermittent error)
6485 $self->{'memfree'} = ::qqx($script) || ::qqx($script);
6486 if(not $self->{'memfree'}) {
6487 ::die_bug("Less than
1 byte memory free
");
6489 #::debug("mem
","New free
:",$self->{'memfree'}," ");
6495 sub memfreescript() {
6497 # shellscript for giving available memory in bytes
6508 awk '/^((Swap)?Cached|MemFree|Buffers):/
6509 { sum += \$2} END { print sum }'
6512 # Android uses same code as GNU/Linux
6516 awk '/^((Swap)?Cached|MemFree|Buffers):/
6517 { sum += \$2} END { print sum }'
6521 # procs memory page faults cpu
6522 # r b w avm free re at pi po fr de sr in sy cs us sy id
6523 # 1 0 0 242793 389737 5 1 0 0 0 0 0 107 978 60 1 1 99
6526 print (((reverse `vmstat 1 1`)[0]
6527 =~ /(?:\d+\D+){4}(\d
+)/)[0]*1024)
6530 # kthr memory page disk faults cpu
6531 # r b w swap free re mf pi po fr de sr s3 s4 -- -- in sy cs us sy id
6532 # 0 0 0 6496720 5170320 68 260 8 2 1 0 0 -0 3 0 0 309 1371 255 1 2 97
6533 # 0 0 0 6434088 5072656 7 15 8 0 0 0 0 0 261 0 0 1889 1899 3222 0 8 92
6535 # The second free value is correct
6538 print (((reverse `vmstat 1 2`)[0]
6539 =~ /(?:\d+\D+){4}(\d
+)/)[0]*1024)
6542 # vm.stats.vm.v_cache_count: 0
6543 # vm.stats.vm.v_inactive_count: 79574
6544 # vm.stats.vm.v_free_count: 4507
6547 for(qx{/sbin/sysctl -a}) {
6548 if (/^([^:]+):\s+(.+)\s*$/s) {
6552 print $sysctl->{"hw.pagesize"} *
6553 ($sysctl->{"vm.stats.vm.v_cache_count"}
6554 + $sysctl->{"vm.stats.vm.v_inactive_count"}
6555 + $sysctl->{"vm.stats.vm.v_free_count"});
6557 # Mach Virtual Memory Statistics: (page size of 4096 bytes)
6558 # Pages free: 198061.
6559 # Pages active: 159701.
6560 # Pages inactive: 47378.
6561 # Pages speculative: 29707.
6562 # Pages wired down: 89231.
6563 # "Translation faults": 928901425.
6564 # Pages copy-on-write: 156988239.
6565 # Pages zero filled: 271267894.
6566 # Pages reactivated: 48895.
6569 # Object cache: 6603 hits of 1713223 lookups (0% hit rate)
6573 print (($vm =~ /page size of (\d+)/)[0] *
6574 (($vm =~ /Pages free:\s+(\d+)/)[0] +
6575 ($vm =~ /Pages inactive:\s+(\d+)/)[0]));
6578 my $perlscript = "";
6579 # Make a perl script that detects the OS ($^O) and runs
6580 # the appropriate command
6581 for my $os (keys %script_of) {
6582 $perlscript .= 'if($^O eq "'.$os.'") { '.$script_of{$os}.'}';
6584 $script = "perl -e " . ::Q(::spacefree(1,$perlscript));
6592 # 0 = Below limit. Start another job.
6593 # 1 = Over limit. Start no jobs.
6594 # 2 = Kill youngest job
6597 if(not defined $self->{'limitscript'}) {
6603 # Do the measurement in the background
6605 LANG=C iostat -x 1 2 > $tmp;
6606 mv $tmp $io_file) </dev/null >/dev/null & );
6607 perl -e '-e $ARGV[0] or exit(1);
6610 /(\S+)$/ and $max = $max > $1 ? $max : $1; }
6611 exit ('$limit' < $max)' $io_file;
6618 awk '/^((Swap)?Cached|MemFree|Buffers):/{ sum += $2}
6620 if (sum*1024 < '$limit'/2) { exit 2; }
6621 else { exit (sum*1024 < '$limit') }
6630 ps ax -o state,command |
6631 grep -E '^[DOR].[^[]' |
6633 perl -ne 'exit ('$limit' < $_)';
6639 my ($cmd,@args) = split /\s+/,$opt::limit;
6640 if($limitscripts{$cmd}) {
6641 my $tmpfile = ::tmpname("parlmt");
6642 ++$Global::unlink{$tmpfile};
6643 $self->{'limitscript'} =
6644 ::spacefree(1, sprintf($limitscripts{$cmd},
6645 ::multiply_binary_prefix(@args),$tmpfile));
6647 $self->{'limitscript'} = $opt::limit;
6653 $ENV{'SSHLOGIN'} = $self->string();
6654 system($Global::shell,"-c",$self->{'limitscript'});
6655 #::qqx($self->{'limitscript'});
6656 ::debug("limit","limit `".$self->{'limitscript'}."` result ".($?>>8)."\n");
6663 my $swapping = $self->swap_activity();
6664 return (not defined $swapping or $swapping)
6667 sub swap_activity($) {
6668 # If the currently known swap activity is too old:
6669 # Recompute a new one in the background
6671 # last swap activity computed
6673 # Should we update the swap_activity file?
6674 my $update_swap_activity_file = 0;
6675 if(-r $self->{'swap_activity_file'}) {
6676 open(my $swap_fh, "<", $self->{'swap_activity_file'}) ||
6677 ::die_bug("swap_activity_file-r");
6678 my $swap_out = <$swap_fh>;
6680 if($swap_out =~ /^(\d+)$/) {
6681 $self->{'swap_activity'} = $1;
6682 ::debug("swap", "New swap_activity: ", $self->{'swap_activity'});
6684 ::debug("swap", "Last update: ", $self->{'last_swap_activity_update'});
6685 if(time - $self->{'last_swap_activity_update'} > 10) {
6686 # last swap activity update was started 10 seconds ago
6687 ::debug("swap", "Older than 10 sec: ", $self->{'swap_activity_file'});
6688 $update_swap_activity_file = 1;
6691 ::debug("swap", "No swap_activity file: ", $self->{'swap_activity_file'});
6692 $self->{'swap_activity'} = undef;
6693 $update_swap_activity_file = 1;
6695 if($update_swap_activity_file) {
6696 ::debug("swap", "Updating swap_activity file ", $self->{'swap_activity_file'});
6697 $self->{'last_swap_activity_update'} = time;
6698 my $dir = ::dirname($self->{'swap_activity_file'});
6699 -d $dir or eval { File::Path::mkpath($dir); };
6701 $swap_activity = swapactivityscript();
6702 if($self->{'string'} ne ":") {
6703 $swap_activity = $self->sshcommand() . " " . $self->serverlogin() . " " .
6704 ::Q($swap_activity);
6706 # Run swap_activity measuring.
6707 # As the command can take long to run if run remote
6708 # save it to a tmp file before moving it to the correct file
6709 my $file = $self->{'swap_activity_file'};
6710 my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".swp");
6711 ::debug("swap", "\n", $swap_activity, "\n");
6712 ::qqx("($swap_activity > $tmpfile && mv $tmpfile $file || rm $tmpfile &)");
6714 return $self->{'swap_activity'};
6720 sub swapactivityscript() {
6722 # shellscript for detecting swap activity
6724 # arguments for vmstat are OS dependant
6725 # swap_in and swap_out are in different columns depending on OS
6731 # procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
6732 # r b swpd free buff cache si so bi bo in cs us sy id wa
6733 # 5 0 51208 1701096 198012 18857888 0 0 37 153 28 19 56 11 33 1
6734 # 3 0 51208 1701288 198012 18857972 0 0 0 0 3638 10412 15 3 82 0
6735 'linux' => ['vmstat 1 2 | tail -n1', '$7*$8'],
6739 # kthr memory page disk faults cpu
6740 # r b w swap free si so pi po fr de sr s3 s4 -- -- in sy cs us sy id
6741 # 0 0 0 4628952 3208408 0 0 3 1 1 0 0 -0 2 0 0 263 613 246 1 2 97
6742 # 0 0 0 4552504 3166360 0 0 0 0 0 0 0 0 0 0 0 246 213 240 1 1 98
6743 'solaris' => ['vmstat -S 1 2 | tail -1', '$6*$7'],
6745 # darwin (macosx): $21*$22
6747 # Mach Virtual Memory Statistics: (page size of 4096 bytes)
6748 # free active specul inactive throttle wired prgable faults copy 0fill reactive purged file-backed anonymous cmprssed cmprssor dcomprs comprs pageins pageout swapins swapouts
6749 # 346306 829050 74871 606027 0 240231 90367 544858K 62343596 270837K 14178 415070 570102 939846 356 370 116 922 4019813 4 0 0
6750 # 345740 830383 74875 606031 0 239234 90369 2696 359 553 0 0 570110 941179 356 370 0 0 0 0 0 0
6751 'darwin' => ['vm_stat -c 2 1 | tail -n1', '$21*$22'],
6755 # procs faults cpu memory page disk
6756 # r b w in sy cs us sy id avm fre si so pi po fr de sr s0
6757 # 1 0 0 4 23 2 3 0 97 7743 217k 0 0 0 0 0 0 0 0
6758 # 1 0 0 6 40 8 0 1 99 7743 217k 0 0 3 0 0 0 0 0
6759 'ultrix' => ['vmstat -S 1 2 | tail -1', '$12*$13'],
6763 # System configuration: lcpu=1 mem=2048MB
6765 # kthr memory page faults cpu
6766 # ----- ----------- ------------------------ ------------ -----------
6767 # r b avm fre re pi po fr sr cy in sy cs us sy id wa
6768 # 0 0 333933 241803 0 0 0 0 0 0 10 143 90 0 0 99 0
6769 # 0 0 334125 241569 0 0 0 0 0 0 37 5368 184 0 9 86 5
6770 'aix' => ['vmstat 1 2 | tail -n1', '$6*$7'],
6774 # procs memory page disks faults cpu
6775 # r b w avm fre flt re pi po fr sr ad0 ad1 in sy cs us sy id
6776 # 1 0 0 596716 19560 32 0 0 0 33 8 0 0 11 220 277 0 0 99
6777 # 0 0 0 596716 19560 2 0 0 0 0 0 0 0 11 144 263 0 1 99
6778 'freebsd' => ['vmstat -H 1 2 | tail -n1', '$8*$9'],
6782 # procs memory page disks traps cpu
6783 # r b w avm fre flt re pi po fr sr wd0 cd0 int sys cs us sy id
6784 # 0 0 0 25776 164968 34 0 0 0 0 0 0 0 230 259 38 4 0 96
6785 # 0 0 0 25776 164968 24 0 0 0 0 0 0 0 237 275 37 0 0 100
6786 'mirbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'],
6790 # procs memory page disks faults cpu
6791 # r b avm fre flt re pi po fr sr w0 w1 in sy cs us sy id
6792 # 0 0 138452 6012 54 0 0 0 1 2 3 0 4 100 23 0 0 100
6793 # 0 0 138456 6008 1 0 0 0 0 0 0 0 7 26 19 0 0 100
6794 'netbsd' => ['vmstat 1 2 | tail -n1', '$7*$8'],
6798 # procs memory page disks traps cpu
6799 # r b w avm fre flt re pi po fr sr wd0 wd1 int sys cs us sy id
6800 # 0 0 0 76596 109944 73 0 0 0 0 0 0 1 5 259 22 0 1 99
6801 # 0 0 0 76604 109936 24 0 0 0 0 0 0 0 7 114 20 0 1 99
6802 'openbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'],
6806 # procs memory page faults cpu
6807 # r b w avm free re at pi po fr de sr in sy cs us sy id
6808 # 1 0 0 247211 216476 4 1 0 0 0 0 0 102 73005 54 6 11 83
6809 # 1 0 0 247211 216421 43 9 0 0 0 0 0 144 1675 96 25269512791222387000 25269512791222387000 105
6810 'hpux' => ['vmstat 1 2 | tail -n1', '$8*$9'],
6812 # dec_osf (tru64): $11*$12
6814 # Virtual Memory Statistics: (pagesize = 8192)
6815 # procs memory pages intr cpu
6816 # r w u act free wire fault cow zero react pin pout in sy cs us sy id
6817 # 3 181 36 51K 1895 8696 348M 59M 122M 259 79M 0 5 218 302 4 1 94
6818 # 3 181 36 51K 1893 8696 3 15 21 0 28 0 4 81 321 1 1 98
6819 'dec_osf' => ['vmstat 1 2 | tail -n1', '$11*$12'],
6823 # (pagesize: 4, size: 512288, swap size: 894972)
6824 # free actv inact wired zeroed react pgins pgouts pfaults cowpfs hrat caobj cache swfree
6825 # 371940 30844 89228 20276 298348 0 48192 19016 756105 99808 98% 876 20628 894972
6826 # 371940 30844 89228 20276 +0 +0 +0 +0 +42 +2 98% 876 20628 894972
6827 'gnu' => ['vmstat -k 1 2 | tail -n1', '$7*$8'],
6829 # -nto (qnx has no swap)
6833 my $perlscript = "";
6834 # Make a perl script that detects the OS ($^O) and runs
6835 # the appropriate vmstat command
6836 for my $os (keys %vmstat) {
6837 $vmstat{$os}[1] =~ s/\$/\\\\\\\$/g; # $ => \\\$
6838 $perlscript .= 'if($^O eq "'.$os.'") { print `'.$vmstat{$os}[0].' | awk "{print ' .
6839 $vmstat{$os}[1] . '}"` }';
6841 $script = "perl -e " . ::Q($perlscript);
6847 sub too_fast_remote_login($) {
6849 if($self->{'last_login_at'} and $self->{'time_to_login'}) {
6850 # sshd normally allows 10 simultaneous logins
6851 # A login takes time_to_login
6852 # So time_to_login/5 should be safe
6853 # If now <= last_login + time_to_login/5: Then it is too soon.
6854 my $too_fast = (::now() <= $self->{'last_login_at'}
6855 + $self->{'time_to_login'}/5);
6856 ::debug("run", "Too fast? $too_fast ");
6859 # No logins so far (or time_to_login not computed): it is not too fast
6864 sub last_login_at($) {
6866 return $self->{'last_login_at'};
6869 sub set_last_login_at($$) {
6871 $self->{'last_login_at'} = shift;
6874 sub loadavg_too_high($) {
6876 my $loadavg = $self->loadavg();
6877 if(defined $loadavg) {
6878 ::debug("load", "Load $loadavg > ",$self->max_loadavg());
6879 return $loadavg >= $self->max_loadavg();
6881 # Unknown load: Assume load is too high
6890 # aix => "ps -ae -o state,command" # state wrong
6891 # bsd => "ps ax -o state,command"
6892 # sysv => "ps -ef -o s -o comm"
6893 # cygwin => perl -ne 'close STDERR; /Name/ and print"\n"; \
6894 # /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status |
6895 # awk '{print $2,$1}'
6900 # hpux => ps -el|awk '{print $2,$14,$15}'
6901 # irix => ps -ef -o state -o comm
6903 # minix => ps el|awk '{print \$1,\$11}'
6909 # ultrix => ps -ax | awk '{print $3,$5}'
6910 # unixware => ps -el|awk '{print $2,$14,$15}'
6911 my $ps = ::spacefree(1,q{
6912 $sysv="ps -ef -o s -o comm";
6913 $sysv2="ps -ef -o state -o comm";
6914 $bsd="ps ax -o state,command";
6915 # Treat threads as processes
6916 $bsd2="ps axH -o state,command";
6917 $psel="ps -el|awk '{ print \$2,\$14,\$15 }'";
6918 $cygwin=q{ perl -ne 'close STDERR; /Name/ and print"\n";
6919 /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status |
6920 awk '{print $2,$1}' };
6921 $dummy="echo S COMMAND;echo R dummy";
6923 # TODO Find better code for AIX/Android
6925 'android' => "uptime",
6926 'cygwin' => $cygwin,
6928 'dec_osf' => $sysv2,
6929 'dragonfly' => $bsd,
6935 'minix' => "ps el|awk '{print \$1,\$11}'",
6943 'ultrix' => "ps -ax | awk '{print \$3,\$5}'",
6948 # The command is too long for csh, so base64_wrap the command
6949 $cmd = Job::base64_wrap($ps);
6957 # If the currently know loadavg is too old:
6958 # Recompute a new one in the background
6959 # The load average is computed as the number of processes waiting for disk
6960 # or CPU right now. So it is the server load this instant and not averaged over
6961 # several minutes. This is needed so GNU Parallel will at most start one job
6962 # that will push the load over the limit.
6965 # $last_loadavg = last load average computed (undef if none)
6967 # Should we update the loadavg file?
6968 my $update_loadavg_file = 0;
6969 if(open(my $load_fh, "<", $self->{'loadavg_file'})) {
6970 local $/; # $/ = undef => slurp whole file
6971 my $load_out = <$load_fh>;
6973 if($load_out =~ /\S/) {
6974 # Content can be empty if ~/ is on NFS
6975 # due to reading being non-atomic.
6977 # Count lines starting with D,O,R but command does not start with [
6978 my $load =()= ($load_out=~/(^\s?[DOR]\S* +(?=[^\[])\S)/gm);
6980 # load is overestimated by 1
6981 $self->{'loadavg'} = $load - 1;
6982 ::debug("load", "New loadavg: ", $self->{'loadavg'},"\n");
6983 } elsif ($load_out=~/average: (\d+.\d+)/) {
6984 # AIX does not support instant load average
6985 # 04:11AM up 21 days, 12:55, 1 user, load average: 1.85, 1.57, 1.55
6986 $self->{'loadavg'} = $1;
6988 ::die_bug("loadavg_invalid_content: " .
6989 $self->{'loadavg_file'} . "\n$load_out");
6992 $update_loadavg_file = 1;
6994 ::debug("load", "No loadavg file: ", $self->{'loadavg_file'});
6995 $self->{'loadavg'} = undef;
6996 $update_loadavg_file = 1;
6998 if($update_loadavg_file) {
6999 ::debug("load", "Updating loadavg file", $self->{'loadavg_file'}, "\n");
7000 $self->{'last_loadavg_update'} = time;
7001 my $dir = ::dirname($self->{'swap_activity_file'});
7002 -d $dir or eval { File::Path::mkpath($dir); };
7003 -w $dir or ::die_bug("Cannot write to $dir");
7005 if($self->{'string'} ne ":") {
7006 $cmd = $self->sshcommand() . " " . $self->serverlogin() . " " .
7009 $cmd .= loadavg_cmd();
7011 # As the command can take long to run if run remote
7012 # save it to a tmp file before moving it to the correct file
7013 ::debug("load", "Update load\n");
7014 my $file = $self->{'loadavg_file'};
7015 # tmpfile on same filesystem as $file
7016 my $tmpfile = $file.$$;
7017 ::qqx("($cmd > $tmpfile && mv $tmpfile $file || rm $tmpfile & )");
7019 return $self->{'loadavg'};
7022 sub max_loadavg($) {
7024 # If --load is a file it might be changed
7025 if($Global::max_load_file) {
7026 my $mtime = (stat($Global::max_load_file))[9];
7027 if($mtime > $Global::max_load_file_last_mod) {
7028 $Global::max_load_file_last_mod = $mtime;
7029 for my $sshlogin (values %Global::host) {
7030 $sshlogin->set_max_loadavg(undef);
7034 if(not defined $self->{'max_loadavg'}) {
7035 $self->{'max_loadavg'} =
7036 $self->compute_max_loadavg($opt::load);
7038 ::debug("load", "max_loadavg: ", $self->string(), " ", $self->{'max_loadavg'});
7039 return $self->{'max_loadavg'};
7042 sub set_max_loadavg($$) {
7044 $self->{'max_loadavg'} = shift;
7047 sub compute_max_loadavg($) {
7048 # Parse the max loadaverage that the user asked for using --load
7052 my $loadspec = shift;
7054 if(defined $loadspec) {
7055 if($loadspec =~ /^\+(\d+)$/) {
7059 $self->ncpus() + $j;
7060 } elsif ($loadspec =~ /^-(\d+)$/) {
7064 $self->ncpus() - $j;
7065 } elsif ($loadspec =~ /^(\d+)\%$/) {
7068 $self->ncpus() * $j / 100;
7069 } elsif ($loadspec =~ /^(\d+(\.\d+)?)$/) {
7071 } elsif (-f $loadspec) {
7072 $Global::max_load_file = $loadspec;
7073 $Global::max_load_file_last_mod = (stat($Global::max_load_file))[9];
7074 if(open(my $in_fh, "<", $Global::max_load_file)) {
7075 my $opt_load_file = join("",<$in_fh>);
7077 $load = $self->compute_max_loadavg($opt_load_file);
7079 ::error("Cannot open $loadspec.");
7080 ::wait_and_exit(255);
7083 ::error("Parsing of --load failed.");
7093 sub time_to_login($) {
7095 return $self->{'time_to_login'};
7098 sub set_time_to_login($$) {
7100 $self->{'time_to_login'} = shift;
7103 sub max_jobs_running($) {
7105 if(not defined $self->{'max_jobs_running'}) {
7106 my $nproc = $self->compute_number_of_processes($opt::jobs);
7107 $self->set_max_jobs_running($nproc);
7109 return $self->{'max_jobs_running'};
7112 sub orig_max_jobs_running($) {
7114 return $self->{'orig_max_jobs_running'};
7117 sub compute_number_of_processes($) {
7118 # Number of processes wanted and limited by system resources
7120 # Number of processes
7123 my $wanted_processes = $self->user_requested_processes($opt_P);
7124 if(not defined $wanted_processes) {
7125 $wanted_processes = $Global::default_simultaneous_sshlogins;
7127 ::debug("load", "Wanted procs: $wanted_processes\n");
7129 $self->processes_available_by_system_limit($wanted_processes);
7130 ::debug("load", "Limited to procs: $system_limit\n");
7131 return $system_limit;
7136 my $max_system_proc_reached;
7137 my $more_filehandles;
7140 my $count_jobs_already_read;
7146 sub reserve_filehandles($) {
7147 # Reserves filehandle
7150 $more_filehandles &&= open($fh{$tmpfhname++}, "<", "/dev/null");
7154 sub reserve_process() {
7155 # Spawn a dummy process
7157 if($child = fork()) {
7158 push @children, $child;
7159 $Global::unkilled_children{$child} = 1;
7160 } elsif(defined $child) {
7162 # The child takes one process slot
7163 # It will be killed later
7164 $SIG{'TERM'} = $Global::original_sig{'TERM'};
7165 if($^O eq "cygwin" or $^O eq "msys" or $^O eq "nto") {
7166 # The exec does not work on Cygwin and QNX
7169 # 'exec sleep' takes less RAM than sleeping in perl
7170 exec 'sleep', 10101;
7175 $max_system_proc_reached = 1;
7179 sub get_args_or_jobs() {
7180 # Get an arg or a job (depending on mode)
7181 if($Global::semaphore or ($opt::pipe and not $opt::tee)) {
7182 # Skip: No need to get args
7184 } elsif(defined $opt::retries and $count_jobs_already_read) {
7185 # For retries we may need to run all jobs on this sshlogin
7186 # so include the already read jobs for this sshlogin
7187 $count_jobs_already_read--;
7190 if($opt::X or $opt::m) {
7191 # The arguments may have to be re-spread over several jobslots
7192 # So pessimistically only read one arg per jobslot
7193 # instead of a full commandline
7194 if($Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->empty()) {
7195 if($Global::JobQueue->empty()) {
7198 $job = $Global::JobQueue->get();
7203 $arg = $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get();
7208 # If there are no more command lines, then we have a process
7209 # per command line, so no need to go further
7210 if($Global::JobQueue->empty()) {
7213 $job = $Global::JobQueue->get();
7214 # Replacement must happen here due to seq()
7215 $job and $job->replaced();
7224 # Cleanup: Close the files
7225 for (values %fh) { close $_ }
7226 # Cleanup: Kill the children
7227 for my $pid (@children) {
7230 delete $Global::unkilled_children{$pid};
7232 # Cleanup: Unget the command_lines or the @args
7233 $Global::JobQueue->{'commandlinequeue'}{'arg_queue'}->unget(@args);
7235 $Global::JobQueue->unget(@jobs);
7239 sub processes_available_by_system_limit($) {
7240 # If the wanted number of processes is bigger than the system limits:
7241 # Limit them to the system limits
7242 # Limits are: File handles, number of input lines, processes,
7243 # and taking > 1 second to spawn 10 extra processes
7245 # Number of processes
7247 my $wanted_processes = shift;
7248 my $system_limit = 0;
7249 my $slow_spawning_warning_printed = 0;
7251 $more_filehandles = 1;
7252 $tmpfhname = "TmpFhNamE";
7254 # perl uses 7 filehandles for something?
7255 # parallel uses 1 for memory_usage
7256 # parallel uses 4 for ?
7257 reserve_filehandles(12);
7258 # Two processes for load avg and ?
7262 # For --retries count also jobs already run
7263 $count_jobs_already_read = $Global::JobQueue->next_seq();
7264 my $wait_time_for_getting_args = 0;
7265 my $start_time = time;
7267 $system_limit >= $wanted_processes and last;
7268 not $more_filehandles and last;
7269 $max_system_proc_reached and last;
7271 my $before_getting_arg = time;
7272 if(!$Global::dummy_jobs) {
7273 get_args_or_jobs() or last;
7275 $wait_time_for_getting_args += time - $before_getting_arg;
7278 # Every simultaneous process uses 2 filehandles to write to
7279 # and 2 filehandles to read from
7280 reserve_filehandles(4);
7282 # System process limit
7285 my $forktime = time - $time - $wait_time_for_getting_args;
7286 ::debug("run", "Time to fork $system_limit procs: $wait_time_for_getting_args ",
7288 " (processes so far: ", $system_limit,")\n");
7289 if($system_limit > 10 and
7291 $forktime > $system_limit * 0.01) {
7292 # It took more than 0.01 second to fork a processes on avg.
7293 # Give the user a warning. He can press Ctrl-C if this
7296 "Starting $system_limit processes took > $forktime sec.",
7297 "Consider adjusting -j. Press CTRL-C to stop.");
7302 if($system_limit < $wanted_processes) {
7303 # The system_limit is less than the wanted_processes
7304 if($system_limit < 1 and not $Global::JobQueue->empty()) {
7305 ::warning("Cannot spawn any jobs.",
7306 "Try increasing 'ulimit -u' (try: ulimit -u `ulimit -Hu`)",
7307 "or increasing 'nproc' in /etc/security/limits.conf",
7308 "or increasing /proc/sys/kernel/pid_max");
7309 ::wait_and_exit(255);
7311 if(not $more_filehandles) {
7312 ::warning("Only enough file handles to run ".
7313 $system_limit. " jobs in parallel.",
7314 "Try running 'parallel -j0 -N $system_limit --pipe parallel -j0'",
7315 "or increasing 'ulimit -n' (try: ulimit -n `ulimit -Hn`)",
7316 "or increasing 'nofile' in /etc/security/limits.conf",
7317 "or increasing /proc/sys/fs/file-max");
7319 if($max_system_proc_reached) {
7320 ::warning("Only enough available processes to run ".
7321 $system_limit. " jobs in parallel.",
7322 "Try increasing 'ulimit -u' (try: ulimit -u `ulimit -Hu`)",
7323 "or increasing 'nproc' in /etc/security/limits.conf",
7324 "or increasing /proc/sys/kernel/pid_max");
7327 if($] == 5.008008 and $system_limit > 1000) {
7328 # https://savannah.gnu.org/bugs/?36942
7329 $system_limit = 1000;
7331 if($Global::JobQueue->empty()) {
7332 $system_limit ||= 1;
7334 if($self->string() ne ":" and
7335 $system_limit > $Global::default_simultaneous_sshlogins) {
7337 $self->simultaneous_sshlogin_limit($system_limit);
7339 return $system_limit;
7343 sub simultaneous_sshlogin_limit($) {
7344 # Test by logging in wanted number of times simultaneously
7346 # min($wanted_processes,$working_simultaneous_ssh_logins-1)
7348 my $wanted_processes = shift;
7349 if($self->{'time_to_login'}) {
7350 return $wanted_processes;
7353 # Try twice because it guesses wrong sometimes
7354 # Choose the minimal
7356 ::min($self->simultaneous_sshlogin($wanted_processes),
7357 $self->simultaneous_sshlogin($wanted_processes));
7358 if($ssh_limit < $wanted_processes) {
7359 my $serverlogin = $self->serverlogin();
7360 ::warning("ssh to $serverlogin only allows ".
7361 "for $ssh_limit simultaneous logins.",
7362 "You may raise this by changing",
7363 "/etc/ssh/sshd_config:MaxStartups and MaxSessions on $serverlogin.",
7364 "You can also try --sshdelay 0.1",
7365 "Using only ".($ssh_limit-1)." connections ".
7366 "to avoid race conditions.");
7367 # Race condition can cause problem if using all sshs.
7368 if($ssh_limit > 1) { $ssh_limit -= 1; }
7373 sub simultaneous_sshlogin($) {
7374 # Using $sshlogin try to see if we can do $wanted_processes
7375 # simultaneous logins
7376 # (ssh host echo simul-login & ssh host echo simul-login & ...) |
7379 # $wanted_processes = Try for this many logins in parallel
7381 # $ssh_limit = Number of succesful parallel logins
7384 my $wanted_processes = shift;
7385 my $sshcmd = $self->sshcommand();
7386 my $serverlogin = $self->serverlogin();
7387 my $sshdelay = $opt::sshdelay ? "sleep $opt::sshdelay;" : "";
7388 # TODO sh -c wrapper to work for csh
7389 my $cmd = ("$sshdelay$sshcmd $serverlogin -- ".
7390 "echo simultaneouslogin </dev/null 2>&1 &")x$wanted_processes;
7391 ::debug("init", "Trying $wanted_processes logins at $serverlogin\n");
7392 open (my $simul_fh, "-|", "($cmd)|grep simultaneouslogin | wc -l") or
7393 ::die_bug("simultaneouslogin");
7394 my $ssh_limit = <$simul_fh>;
7402 $self->{'ncpus'} = shift;
7405 sub user_requested_processes($) {
7406 # Parse the number of processes that the user asked for using -j
7408 # $opt_P = string formatted as for -P
7410 # $processes = the number of processes to run on this sshlogin
7414 if(defined $opt_P) {
7415 if($opt_P =~ /^\+(\d+)$/) {
7419 $self->ncpus() + $j;
7420 } elsif ($opt_P =~ /^-(\d+)$/) {
7424 $self->ncpus() - $j;
7425 } elsif ($opt_P =~ /^(\d+(\.\d+)?)\%$/) {
7429 $self->ncpus() * $j / 100;
7430 } elsif ($opt_P =~ /^(\d+)$/) {
7432 if($processes == 0) {
7433 # -P 0 = infinity (or at least close)
7434 $processes = $Global::infinity;
7436 } elsif (-f $opt_P) {
7437 $Global::max_procs_file = $opt_P;
7438 if(open(my $in_fh, "<", $Global::max_procs_file)) {
7439 my $opt_P_file = join("",<$in_fh>);
7441 $processes = $self->user_requested_processes($opt_P_file);
7443 ::error("Cannot open $opt_P.");
7444 ::wait_and_exit(255);
7447 ::error("Parsing of --jobs/-j/--max-procs/-P failed.");
7450 $processes = ::ceil($processes);
7456 # Number of CPU threads
7457 # --use_sockets_instead_of_threads = count socket instead
7458 # --use_cores_instead_of_threads = count physical cores instead
7460 # $ncpus = number of cpu (threads) on this sshlogin
7463 if(not defined $self->{'ncpus'}) {
7464 my $sshcmd = $self->sshcommand();
7465 my $serverlogin = $self->serverlogin();
7466 if($serverlogin eq ":") {
7467 if($opt::use_sockets_instead_of_threads) {
7468 $self->{'ncpus'} = socket_core_thread()->{'sockets'};
7469 } elsif($opt::use_cores_instead_of_threads) {
7470 $self->{'ncpus'} = socket_core_thread()->{'cores'};
7472 $self->{'ncpus'} = socket_core_thread()->{'threads'};
7476 ::debug("init","echo|$sshcmd $serverlogin -- parallel --number-of-sockets");
7477 if($opt::use_sockets_instead_of_threads
7479 $opt::use_cpus_instead_of_cores) {
7481 ::qqx("echo|$sshcmd $serverlogin -- parallel --number-of-sockets");
7482 } elsif($opt::use_cores_instead_of_threads) {
7484 ::qqx("echo|$sshcmd $serverlogin -- parallel --number-of-cores");
7487 ::qqx("echo|$sshcmd $serverlogin -- parallel --number-of-threads");
7490 if($ncpu =~ /^\s*[0-9]+\s*$/s) {
7491 $self->{'ncpus'} = $ncpu;
7493 ::warning("Could not figure out ".
7494 "number of cpus on $serverlogin ($ncpu). Using 1.");
7495 $self->{'ncpus'} = 1;
7499 return $self->{'ncpus'};
7505 # Number of threads using `nproc`
7506 my $no_of_threads = ::qqx("nproc");
7507 chomp $no_of_threads;
7508 return $no_of_threads;
7511 sub no_of_sockets() {
7512 return socket_core_thread()->{'sockets'};
7516 return socket_core_thread()->{'cores'};
7519 sub no_of_threads() {
7520 return socket_core_thread()->{'threads'};
7523 sub socket_core_thread() {
7526 # 'sockets' => #sockets = number of socket with CPU present
7527 # 'cores' => #cores = number of physical cores
7528 # 'threads' => #threads = number of compute cores (hyperthreading)
7529 # 'active' => #taskset_threads = number of taskset limited cores
7532 my $cached_cpuspec = $Global::cache_dir . "/tmp/sshlogin/" .
7533 ::hostname() . "/cpuspec";
7534 if(-e $cached_cpuspec and -M $cached_cpuspec < 1) {
7535 # Reading cached copy instead of /proc/cpuinfo is 17 ms faster
7537 if(open(my $in_fh, "<", $cached_cpuspec)) {
7538 ::debug("init","Read $cached_cpuspec\n");
7539 $cpu->{'sockets'} = int(<$in_fh>);
7540 $cpu->{'cores'} = int(<$in_fh>);
7541 $cpu->{'threads'} = int(<$in_fh>);
7545 if ($^O eq 'linux') {
7546 $cpu = sct_gnu_linux($cpu);
7547 } elsif ($^O eq 'android') {
7548 $cpu = sct_android($cpu);
7549 } elsif ($^O eq 'freebsd') {
7550 $cpu = sct_freebsd($cpu);
7551 } elsif ($^O eq 'netbsd') {
7552 $cpu = sct_netbsd($cpu);
7553 } elsif ($^O eq 'openbsd') {
7554 $cpu = sct_openbsd($cpu);
7555 } elsif ($^O eq 'gnu') {
7556 $cpu = sct_hurd($cpu);
7557 } elsif ($^O eq 'darwin') {
7558 $cpu = sct_darwin($cpu);
7559 } elsif ($^O eq 'solaris') {
7560 $cpu = sct_solaris($cpu);
7561 } elsif ($^O eq 'aix') {
7562 $cpu = sct_aix($cpu);
7563 } elsif ($^O eq 'hpux') {
7564 $cpu = sct_hpux($cpu);
7565 } elsif ($^O eq 'nto') {
7566 $cpu = sct_qnx($cpu);
7567 } elsif ($^O eq 'svr5') {
7568 $cpu = sct_openserver($cpu);
7569 } elsif ($^O eq 'irix') {
7570 $cpu = sct_irix($cpu);
7571 } elsif ($^O eq 'dec_osf') {
7572 $cpu = sct_tru64($cpu);
7574 # Try all methods until we find something that works
7575 $cpu = (sct_gnu_linux($cpu)
7576 || sct_android($cpu)
7577 || sct_freebsd($cpu)
7579 || sct_openbsd($cpu)
7582 || sct_solaris($cpu)
7586 || sct_openserver($cpu)
7591 if(not grep { $_ > 0 } values %$cpu) {
7594 # Write cached copy instead of /proc/cpuinfo is 17 ms faster
7595 if($cpu and open(my $out_fh, ">", $cached_cpuspec)) {
7596 print $out_fh (map { chomp; "$_\n" }
7603 my $nproc = nproc();
7613 ::warning("Cannot figure out number of cpus. Using 1.");
7620 $cpu->{'sockets'} ||= 1;
7621 $cpu->{'threads'} ||= $cpu->{'cores'};
7622 $cpu->{'active'} ||= $cpu->{'threads'};
7623 chomp($cpu->{'sockets'},
7627 # Choose minimum of active and actual
7629 $mincpu->{'sockets'} = ::min($cpu->{'sockets'},$cpu->{'active'});
7630 $mincpu->{'cores'} = ::min($cpu->{'cores'},$cpu->{'active'});
7631 $mincpu->{'threads'} = ::min($cpu->{'threads'},$cpu->{'active'});
7635 sub sct_gnu_linux($) {
7637 # { 'sockets' => #sockets
7639 # 'threads' => #threads
7640 # 'active' => #taskset_threads }
7643 sub read_topology($) {
7649 -r "$prefix/cpu$thread/topology/physical_package_id";
7652 "$prefix/cpu$thread/topology/physical_package_id")
7658 -r "$prefix/cpu$thread/topology/thread_siblings";
7661 "$prefix/cpu$thread/topology/thread_siblings")
7666 $cpu->{'sockets'} = keys %socket;
7667 $cpu->{'cores'} = keys %sibiling;
7668 $cpu->{'threads'} = $thread;
7671 sub read_cpuinfo(@) {
7673 $cpu->{'sockets'} = 0;
7674 $cpu->{'cores'} = 0;
7675 $cpu->{'threads'} = 0;
7681 if(/^physical id.*[:](.*)/) {
7683 if(not $phy_seen{$1}++) {
7684 $cpu->{'sockets'}++;
7688 if(/^core id.*[:](.*)/ and not $seen{$physicalid,$1}++) {
7692 /^processor.*[:]\s*\d/i and $cpu->{'threads'}++;
7694 $cpu->{'cores'} ||= $cpu->{'threads'};
7695 $cpu->{'cpus'} ||= $cpu->{'threads'};
7696 $cpu->{'sockets'} ||= 1;
7701 my $threads_per_core;
7702 my $cores_per_socket;
7704 /^CPU.s.:\s*(\d+)/ and $cpu->{'threads'} = $1;
7705 /^Thread.s. per core:\s*(\d+)/ and $threads_per_core = $1;
7706 /^Core.s. per socket:\s*(\d+)/ and $cores_per_socket = $1;
7707 /^(CPU )?Socket.s.:\s*(\d+)/i and $cpu->{'sockets'} = $2;
7709 if($threads_per_core and $cpu->{'threads'}) {
7710 $cpu->{'cores'} = $cpu->{'threads'} / $threads_per_core;
7712 $cpu->{'cpus'} ||= $cpu->{'threads'};
7715 local $/ = "\n"; # If delimiter is set, then $/ will be wrong
7718 if($ENV{'PARALLEL_CPUINFO'}) {
7719 # Use CPUINFO from environment - used for testing only
7720 read_cpuinfo(split/(?<=\n)/,$ENV{'PARALLEL_CPUINFO'});
7721 } elsif($ENV{'PARALLEL_LSCPU'}) {
7722 # Use LSCPU from environment - used for testing only
7723 read_lscpu(split/(?<=\n)/,$ENV{'PARALLEL_LSCPU'});
7724 } elsif(-r "$ENV{'PARALLEL_CPUPREFIX'}/cpu0/topology/thread_siblings") {
7725 # Use CPUPREFIX from environment - used for testing only
7726 read_topology($ENV{'PARALLEL_CPUPREFIX'});
7727 } elsif($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'}) {
7728 # Skip /proc/cpuinfo - already set
7730 # Not debugging: Look at this computer
7731 if(!($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'})
7733 open(my $in_fh, "-|", "lscpu")) {
7734 # Parse output from lscpu
7735 read_lscpu(<$in_fh>);
7738 if(!($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'})
7740 -r "/sys/devices/system/cpu/cpu0/topology/thread_siblings") {
7741 read_topology("/sys/devices/system/cpu");
7743 if(!($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'})
7745 open(my $in_fh, "<", "/proc/cpuinfo")) {
7746 # Read /proc/cpuinfo
7747 read_cpuinfo(<$in_fh>);
7751 if(-e "/proc/self/status" and not $ENV{'PARALLEL_CPUINFO'}) {
7752 # if 'taskset' is used to limit number of threads
7753 if(open(my $in_fh, "<", "/proc/self/status")) {
7755 if(/^Cpus_allowed:\s*(\S+)/) {
7758 $cpu->{'active'} = unpack ("%32b*", pack ("H*",$a));
7767 sub sct_android($) {
7769 # { 'sockets' => #sockets
7771 # 'threads' => #threads
7772 # 'active' => #taskset_threads }
7774 return sct_gnu_linux($_[0]);
7777 sub sct_freebsd($) {
7779 # { 'sockets' => #sockets
7781 # 'threads' => #threads
7782 # 'active' => #taskset_threads }
7786 (::qqx(qq{ sysctl -a dev.cpu | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' })
7788 ::qqx(qq{ sysctl hw.ncpu | awk '{ print \$2 }' }));
7789 $cpu->{'threads'} ||=
7790 (::qqx(qq{ sysctl hw.ncpu | awk '{ print \$2 }' })
7792 ::qqx(qq{ sysctl -a dev.cpu | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' }));
7798 # { 'sockets' => #sockets
7800 # 'threads' => #threads
7801 # 'active' => #taskset_threads }
7804 $cpu->{'cores'} ||= ::qqx("sysctl -n hw.ncpu");
7808 sub sct_openbsd($) {
7810 # { 'sockets' => #sockets
7812 # 'threads' => #threads
7813 # 'active' => #taskset_threads }
7816 $cpu->{'cores'} ||= ::qqx('sysctl -n hw.ncpu');
7822 # { 'sockets' => #sockets
7824 # 'threads' => #threads
7825 # 'active' => #taskset_threads }
7828 $cpu->{'cores'} ||= ::qqx("nproc");
7834 # { 'sockets' => #sockets
7836 # 'threads' => #threads
7837 # 'active' => #taskset_threads }
7841 (::qqx('sysctl -n hw.physicalcpu')
7843 ::qqx(qq{ sysctl -a hw | grep [^a-z]physicalcpu[^a-z] | awk '{ print \$2 }' }));
7844 $cpu->{'threads'} ||=
7845 (::qqx('sysctl -n hw.logicalcpu')
7847 ::qqx(qq{ sysctl -a hw | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }' }));
7851 sub sct_solaris($) {
7853 # { 'sockets' => #sockets
7855 # 'threads' => #threads
7856 # 'active' => #taskset_threads }
7859 if(not $cpu->{'cores'}) {
7860 if(-x "/usr/bin/kstat") {
7861 my @chip_id = ::qqx("/usr/bin/kstat cpu_info|grep chip_id");
7862 if($#chip_id >= 0) {
7863 $cpu->{'sockets'} ||= $#chip_id +1;
7865 my @core_id = ::qqx("/usr/bin/kstat -m cpu_info|grep -w core_id|uniq");
7866 if($#core_id >= 0) {
7867 $cpu->{'cores'} ||= $#core_id +1;
7870 if(-x "/usr/sbin/psrinfo") {
7871 my @psrinfo = ::qqx("/usr/sbin/psrinfo -p");
7872 if($#psrinfo >= 0) {
7873 $cpu->{'sockets'} ||= $psrinfo[0];
7876 if(-x "/usr/sbin/prtconf") {
7877 my @prtconf = ::qqx("/usr/sbin/prtconf | grep cpu..instance");
7878 if($#prtconf >= 0) {
7879 $cpu->{'cores'} ||= $#prtconf +1;
7888 # { 'sockets' => #sockets
7890 # 'threads' => #threads
7891 # 'active' => #taskset_threads }
7894 if(not $cpu->{'cores'}) {
7895 if(-x "/usr/sbin/lscfg") {
7896 if(open(my $in_fh, "-|", "/usr/sbin/lscfg -vs |grep proc | wc -l|tr -d ' '")) {
7897 $cpu->{'cores'} = <$in_fh>;
7902 if(not $cpu->{'threads'}) {
7903 if(-x "/usr/bin/vmstat") {
7904 if(open(my $in_fh, "-|", "/usr/bin/vmstat 1 1")) {
7906 /lcpu=([0-9]*) / and $cpu->{'threads'} = $1;
7917 # { 'sockets' => #sockets
7919 # 'threads' => #threads
7920 # 'active' => #taskset_threads }
7924 ::qqx(qq{ /usr/bin/mpsched -s 2>&1 | grep 'Locality Domain Count' | awk '{ print \$4 }'});
7925 $cpu->{'threads'} ||=
7926 ::qqx(qq{ /usr/bin/mpsched -s 2>&1 | perl -ne '/Processor Count\\D+(\\d+)/ and print "\$1"'});
7932 # { 'sockets' => #sockets
7934 # 'threads' => #threads
7935 # 'active' => #taskset_threads }
7938 # BUG: It is not known how to calculate this.
7943 sub sct_openserver($) {
7945 # { 'sockets' => #sockets
7947 # 'threads' => #threads
7948 # 'active' => #taskset_threads }
7951 if(not $cpu->{'cores'}) {
7952 if(-x "/usr/sbin/psrinfo") {
7953 my @psrinfo = ::qqx("/usr/sbin/psrinfo");
7954 if($#psrinfo >= 0) {
7955 $cpu->{'cores'} = $#psrinfo +1;
7959 $cpu->{'sockets'} ||= $cpu->{'cores'};
7965 # { 'sockets' => #sockets
7967 # 'threads' => #threads
7968 # 'active' => #taskset_threads }
7972 ::qqx(qq{ hinv | grep HZ | grep Processor | awk '{print \$1}' });
7978 # { 'sockets' => #sockets
7980 # 'threads' => #threads
7981 # 'active' => #taskset_threads }
7984 $cpu->{'cores'} ||= ::qqx("sizer -pr");
7985 $cpu->{'sockets'} ||= $cpu->{'cores'};
7986 $cpu->{'threads'} ||= $cpu->{'cores'};
7993 # $sshcommand = the command (incl options) to run when using ssh
7995 if (not defined $self->{'sshcommand'}) {
7996 $self->sshcommand_of_sshlogin();
7998 return $self->{'sshcommand'};
8001 sub serverlogin($) {
8003 # $sshcommand = the command (incl options) to run when using ssh
8005 if (not defined $self->{'serverlogin'}) {
8006 $self->sshcommand_of_sshlogin();
8008 return $self->{'serverlogin'};
8011 sub sshcommand_of_sshlogin($) {
8012 # Compute ssh command and serverlogin from sshlogin
8013 # 'server' -> ('ssh -S /tmp/parallel-ssh-RANDOM/host-','server')
8014 # 'user@server' -> ('ssh','user@server')
8015 # 'myssh user@server' -> ('myssh','user@server')
8016 # 'myssh -l user server' -> ('myssh -l user','server')
8017 # '/usr/bin/myssh -l user server' -> ('/usr/bin/myssh -l user','server')
8019 # $self->{'sshcommand'}
8020 # $self->{'serverlogin'}
8022 my ($sshcmd, $serverlogin);
8023 # If $opt::ssh is unset, use $PARALLEL_SSH or 'ssh'
8024 $opt::ssh ||= $ENV{'PARALLEL_SSH'} || "ssh";
8025 if($self->{'string'} =~ /(.+) (\S+)$/) {
8027 $sshcmd = $1; $serverlogin = $2;
8030 if($opt::controlmaster) {
8031 # Use control_path to make ssh faster
8032 my $control_path = $self->control_path_dir()."/ssh-%r@%h:%p";
8033 $sshcmd = $opt::ssh." -S ".$control_path;
8034 $serverlogin = $self->{'string'};
8035 if(not $self->{'control_path'}{$control_path}++) {
8036 # Master is not running for this control_path
8040 $Global::sshmaster{$pid} ||= 1;
8042 $SIG{'TERM'} = undef;
8043 # Ignore the 'foo' being printed
8044 open(STDOUT,">","/dev/null");
8045 # STDERR >/dev/null to ignore
8046 open(STDERR,">","/dev/null");
8047 open(STDIN,"<","/dev/null");
8048 # Run a sleep that outputs data, so it will discover
8049 # if the ssh connection closes.
8050 my $sleep = ::Q('$|=1;while(1){sleep 1;print "foo\n"}');
8051 my @master = ($opt::ssh, "-MTS",
8052 $control_path, $serverlogin, "--", "perl", "-e",
8058 $sshcmd = $opt::ssh; $serverlogin = $self->{'string'};
8062 if($serverlogin =~ s/(\S+)\@(\S+)/$2/) {
8063 # convert user@server to '-l user server'
8064 # because lsh does not support user@server
8065 $sshcmd = $sshcmd." -l ".$1;
8068 $self->{'sshcommand'} = $sshcmd;
8069 $self->{'serverlogin'} = $serverlogin;
8072 sub control_path_dir($) {
8074 # $control_path_dir = dir of control path (for -M)
8076 if(not defined $self->{'control_path_dir'}) {
8077 $self->{'control_path_dir'} =
8078 # Use $ENV{'TMPDIR'} as that is typically not
8080 File::Temp::tempdir($ENV{'TMPDIR'}
8081 . "/control_path_dir-XXXX",
8084 return $self->{'control_path_dir'};
8087 sub rsync_transfer_cmd($) {
8088 # Command to run to transfer a file
8090 # $file = filename of file to transfer
8091 # $workdir = destination dir
8093 # $cmd = rsync command to run to transfer $file ("" if unreadable)
8096 my $workdir = shift;
8098 ::warning($file. " is not readable and will not be transferred.");
8102 my $relpath = ($file !~ m:^/:) || ($file =~ m:/\./:); # Is the path relative or /./?
8104 $rsync_destdir = ::shell_quote_file($workdir);
8107 $rsync_destdir = "/";
8109 $file = ::shell_quote_file($file);
8110 my $sshcmd = $self->sshcommand();
8111 my $rsync_opts = $ENV{'PARALLEL_RSYNC_OPTS'}.
8113 my $serverlogin = $self->serverlogin();
8114 # Make dir if it does not exist
8115 return "$sshcmd $serverlogin -- mkdir -p $rsync_destdir && " .
8116 rsync()." $rsync_opts $file $serverlogin:$rsync_destdir";
8119 sub cleanup_cmd($$$) {
8120 # Command to run to remove the remote file
8122 # $file = filename to remove
8123 # $workdir = destination dir
8125 # $cmd = ssh command to run to remove $file and empty parent dirs
8128 my $workdir = shift;
8131 # foo/bar/./baz/quux => workdir/baz/quux
8132 # /foo/bar/./baz/quux => workdir/baz/quux
8133 $f =~ s:.*/\./:$workdir/:;
8134 } elsif($f =~ m:^[^/]:) {
8135 # foo/bar => workdir/foo/bar
8136 $f = $workdir."/".$f;
8138 my @subdirs = split m:/:, ::dirname($f);
8143 unshift @rmdir, ::shell_quote_file($dir);
8145 my $rmdir = @rmdir ? "rmdir @rmdir 2>/dev/null;" : "";
8146 if(defined $opt::workdir and $opt::workdir eq "...") {
8147 $rmdir .= "rm -rf " . ::shell_quote_file($workdir).';';
8150 ::Q("rm -f ".::shell_quote_file($f)." 2>/dev/null;".$rmdir);
8151 my $sshcmd = $self->sshcommand();
8152 my $serverlogin = $self->serverlogin();
8153 return "$sshcmd $serverlogin -- ".::Q("$rmf");
8160 # rsync 3.1.x uses protocol 31 which is unsupported by 2.5.7.
8161 # If the version >= 3.1.0: downgrade to protocol 30
8163 # $rsync = "rsync" or "rsync --protocol 30"
8165 my @out = `rsync --version`;
8167 # rsync version 3.1.3 protocol version 31
8168 # rsync version v3.2.3 protocol version 31
8169 if(/version v?(\d+.\d+)(.\d+)?/) {
8171 # Version 3.1.0 or later: Downgrade to protocol 30
8172 $rsync = "rsync --protocol 30";
8178 $rsync or ::die_bug("Cannot figure out version of rsync: @out");
8189 my $commandref = shift;
8190 my $read_from = shift;
8191 my $context_replace = shift;
8192 my $max_number_of_args = shift;
8193 my $transfer_files = shift;
8194 my $return_files = shift;
8195 my $template_names = shift;
8196 my $template_contents = shift;
8197 my $commandlinequeue = CommandLineQueue->new
8198 ($commandref, $read_from, $context_replace, $max_number_of_args,
8199 $transfer_files, $return_files, $template_names, $template_contents);
8203 'commandlinequeue' => $commandlinequeue,
8205 'total_jobs' => undef,
8206 }, ref($class) || $class;
8212 $self->{'this_job_no'}++;
8213 if(@{$self->{'unget'}}) {
8214 my $job = shift @{$self->{'unget'}};
8215 # {%} may have changed, so flush computed values
8216 $job && $job->flush_cache();
8219 my $commandline = $self->{'commandlinequeue'}->get();
8220 if(defined $commandline) {
8221 return Job->new($commandline);
8223 $self->{'this_job_no'}--;
8231 unshift @{$self->{'unget'}}, @_;
8232 $self->{'this_job_no'} -= @_;
8237 my $empty = (not @{$self->{'unget'}}) &&
8238 $self->{'commandlinequeue'}->empty();
8239 ::debug("run", "JobQueue->empty $empty ");
8245 if(not defined $self->{'total_jobs'}) {
8246 if($opt::pipe and not $opt::tee) {
8247 ::error("--pipe is incompatible with --eta/--bar/--shuf");
8248 ::wait_and_exit(255);
8250 if($opt::sqlworker) {
8251 $self->{'total_jobs'} = $Global::sql->total_jobs();
8255 my $record_queue = $self->{'commandlinequeue'}{'arg_queue'};
8257 while($record = $record_queue->get()) {
8258 push @arg_records, $record;
8259 if(time - $start > 10) {
8260 ::warning("Reading ".scalar(@arg_records).
8261 " arguments took longer than 10 seconds.");
8262 $opt::eta && ::warning("Consider removing --eta.");
8263 $opt::bar && ::warning("Consider removing --bar.");
8264 $opt::shuf && ::warning("Consider removing --shuf.");
8268 while($record = $record_queue->get()) {
8269 push @arg_records, $record;
8271 if($opt::shuf and @arg_records) {
8272 my $i = @arg_records;
8274 my $j = int rand($i+1);
8275 @arg_records[$i,$j] = @arg_records[$j,$i];
8278 $record_queue->unget(@arg_records);
8279 # $#arg_records = number of args - 1
8280 # We have read one @arg_record for this job (so add 1 more)
8281 my $num_args = $#arg_records + 2;
8282 # This jobs is not started so -1
8283 my $started_jobs = $self->{'this_job_no'} - 1;
8284 my $max_args = ::max($Global::max_number_of_args,1);
8285 $self->{'total_jobs'} = ::ceil($num_args / $max_args)
8287 ::debug("init","Total jobs: ".$self->{'total_jobs'}.
8288 " ($num_args/$max_args + $started_jobs)\n");
8291 return $self->{'total_jobs'};
8294 sub flush_total_jobs($) {
8295 # Unset total_jobs to force recomputing
8297 ::debug("init","flush Total jobs: ");
8298 $self->{'total_jobs'} = undef;
8304 return $self->{'commandlinequeue'}->seq();
8309 return $self->{'commandlinequeue'}->quote_args();
8317 my $commandlineref = shift;
8319 'commandline' => $commandlineref, # CommandLine object
8320 'workdir' => undef, # --workdir
8321 # filehandle for stdin (used for --pipe)
8322 # filename for writing stdout to (used for --files)
8323 # remaining data not sent to stdin (used for --pipe)
8324 # tmpfiles to cleanup when job is done
8326 # amount of data sent via stdin (used for --pipe)
8327 'transfersize' => 0, # size of files using --transfer
8328 'returnsize' => 0, # size of files using --return
8330 # hash of { SSHLogins => number of times the command failed there }
8332 'sshlogin' => undef,
8333 # The commandline wrapped with rsync and ssh
8334 'sshlogin_wrap' => undef,
8335 'exitstatus' => undef,
8336 'exitsignal' => undef,
8337 # Timestamp for timeout if any
8340 # Output used for SQL and CSV-output
8341 'output' => { 1 => [], 2 => [] },
8342 'halfline' => { 1 => [], 2 => [] },
8343 }, ref($class) || $class;
8346 sub flush_cache($) {
8348 $self->{'commandline'}->flush_cache();
8353 $self->{'commandline'} or ::die_bug("commandline empty");
8354 return $self->{'commandline'}->replaced();
8359 return $self->{'commandline'}->seq();
8364 return $self->{'commandline'}->set_seq(shift);
8369 return $self->{'commandline'}->slot();
8374 push @Global::slots, $self->slot();
8382 # $cattail = perl program for:
8383 # cattail "decompress program" writerpid [file_to_decompress or stdin] [file_to_unlink]
8386 # cat followed by tail (possibly with rm as soon at the file is opened)
8387 # If $writerpid dead: finish after this round
8391 my ($comfile, $cmd, $writerpid, $read_file, $unlink_file) = @ARGV;
8393 open(IN,"<",$read_file) || die("cattail: Cannot open $read_file");
8397 while(! -s $comfile) {
8398 # Writer has not opened the buffer file, so we cannot remove it yet
8399 $sleep = ($sleep < 30) ? ($sleep * 1.001 + 0.01) : ($sleep);
8402 # The writer and we have both opened the file, so it is safe to unlink it
8403 unlink $unlink_file;
8406 my $first_round = 1;
8408 fcntl(IN, F_GETFL, $flags) || die $!; # Get the current flags on the filehandle
8409 $flags |= O_NONBLOCK; # Add non-blocking to the flags
8410 fcntl(IN, F_SETFL, $flags) || die $!; # Set the flags on the filehandle
8415 my $writer_running = kill 0, $writerpid;
8416 $read = sysread(IN,$buf,131072);
8419 # Only start the command if there any input to process
8421 open(OUT,"|-",$cmd) || die("cattail: Cannot run $cmd");
8426 my $bytes_written = syswrite(OUT,$buf);
8427 # syswrite may be interrupted by SIGHUP
8428 substr($buf,0,$bytes_written) = "";
8430 # Something printed: Wait less next time
8433 if(eof(IN) and not $writer_running) {
8434 # Writer dead: There will never be sent more to the decompressor
8438 # TODO This could probably be done more efficiently using select(2)
8439 # Nothing read: Wait longer before next read
8440 # Up to 100 milliseconds
8441 $sleep = ($sleep < 100) ? ($sleep * 1.001 + 0.01) : ($sleep);
8447 # Sleep this many milliseconds.
8449 select(undef, undef, undef, $secs/1000);
8452 $cattail =~ s/#.*//mg;
8453 $cattail =~ s/\s+/ /g;
8459 sub openoutputfiles($) {
8460 # Open files for STDOUT and STDERR
8461 # Set file handles in $self->fh
8463 my ($outfhw, $errfhw, $outname, $errname);
8465 if($opt::linebuffer and not
8466 ($opt::keeporder or $opt::files or $opt::results or
8467 $opt::compress or $opt::compress_program or
8468 $opt::decompress_program)) {
8469 # Do not save to files: Use non-blocking pipe
8470 my ($outfhr, $errfhr);
8471 pipe($outfhr, $outfhw) || die;
8472 pipe($errfhr, $errfhw) || die;
8473 $self->set_fh(1,'w',$outfhw);
8474 $self->set_fh(2,'w',$errfhw);
8475 $self->set_fh(1,'r',$outfhr);
8476 $self->set_fh(2,'r',$errfhr);
8477 # Make it possible to read non-blocking from the pipe
8478 for my $fdno (1,2) {
8479 ::set_fh_non_blocking($self->fh($fdno,'r'));
8481 # Return immediately because we do not need setting filenames
8483 } elsif($opt::results and not $Global::csvsep) {
8484 # If --results, but not --results *.csv/*.tsv
8485 my $out = $self->{'commandline'}->results_out();
8487 if($out eq $opt::results or $out =~ m:/$:) {
8488 # $opt::results = simple string or ending in /
8490 # prefix/name1/val1/name2/val2/seq
8491 $seqname = $out."seq";
8492 # prefix/name1/val1/name2/val2/stdout
8493 $outname = $out."stdout";
8494 # prefix/name1/val1/name2/val2/stderr
8495 $errname = $out."stderr";
8497 # $opt::results = replacement string not ending in /
8500 $errname = "$out.err";
8501 $seqname = "$out.seq";
8504 if(not open($seqfhw, "+>", $seqname)) {
8505 ::error("Cannot write to `$seqname'.");
8506 ::wait_and_exit(255);
8508 print $seqfhw $self->seq();
8510 if(not open($outfhw, "+>", $outname)) {
8511 ::error("Cannot write to `$outname'.");
8512 ::wait_and_exit(255);
8514 if(not open($errfhw, "+>", $errname)) {
8515 ::error("Cannot write to `$errname'.");
8516 ::wait_and_exit(255);
8518 $self->set_fh(1,"unlink","");
8519 $self->set_fh(2,"unlink","");
8520 if($opt::sqlworker) {
8521 # Save the filenames in SQL table
8522 $Global::sql->update("SET Stdout = ?, Stderr = ? ".
8523 "WHERE Seq = ". $self->seq(),
8524 $outname, $errname);
8526 } elsif(not $opt::ungroup) {
8527 # To group we create temporary files for STDOUT and STDERR
8528 # To avoid the cleanup unlink the files immediately (but keep them open)
8530 ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par");
8531 ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par");
8532 # --files => only remove stderr
8533 $self->set_fh(1,"unlink","");
8534 $self->set_fh(2,"unlink",$errname);
8536 ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par");
8537 ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par");
8538 $self->set_fh(1,"unlink",$outname);
8539 $self->set_fh(2,"unlink",$errname);
8543 open($outfhw,">&",$Global::fh{1}) || die;
8544 open($errfhw,">&",$Global::fh{2}) || die;
8545 # File name must be empty as it will otherwise be printed
8548 $self->set_fh(1,"unlink",$outname);
8549 $self->set_fh(2,"unlink",$errname);
8552 $self->set_fh(1,'w',$outfhw);
8553 $self->set_fh(2,'w',$errfhw);
8554 $self->set_fh(1,'name',$outname);
8555 $self->set_fh(2,'name',$errname);
8556 if($opt::compress) {
8557 $self->filter_through_compress();
8558 } elsif(not $opt::ungroup) {
8561 if($opt::linebuffer) {
8562 # Make it possible to read non-blocking from
8564 # Used for --linebuffer with -k, --files, --res, --compress*
8565 for my $fdno (1,2) {
8566 ::set_fh_non_blocking($self->fh($fdno,'r'));
8571 sub print_verbose_dryrun($) {
8572 # If -v set: print command to stdout (possibly buffered)
8573 # This must be done before starting the command
8575 if($Global::verbose or $opt::dryrun) {
8576 my $fh = $self->fh(1,"w");
8577 if($Global::verbose <= 1) {
8578 print $fh $self->replaced(),"\n";
8580 # Verbose level > 1: Print the rsync and stuff
8581 print $fh $self->wrapped(),"\n";
8584 if($opt::sqlworker) {
8585 $Global::sql->update("SET Command = ? WHERE Seq = ".$self->seq(),
8591 # Files to remove when job is done
8593 push @{$self->{'unlink'}}, @_;
8597 # Files to remove when job is done
8599 return @{$self->{'unlink'}};
8603 # Remove files when job is done
8605 unlink $self->get_rm();
8606 delete @Global::unlink{$self->get_rm()};
8611 # Set reading FD if using --group (--ungroup does not need)
8612 for my $fdno (1,2) {
8613 # Re-open the file for reading
8614 # so fdw can be closed seperately
8615 # and fdr can be seeked seperately (for --line-buffer)
8616 open(my $fdr,"<", $self->fh($fdno,'name')) ||
8617 ::die_bug("fdr: Cannot open ".$self->fh($fdno,'name'));
8618 $self->set_fh($fdno,'r',$fdr);
8619 # Unlink if not debugging
8620 $Global::debug or ::rm($self->fh($fdno,"unlink"));
8624 sub empty_input_wrapper($) {
8625 # If no input: exit(0)
8626 # If some input: Pass input as input to command on STDIN
8627 # This avoids starting the command if there is no input.
8629 # $command = command to pipe data to
8631 # $wrapped_command = the wrapped command
8632 my $command = shift;
8635 if(sysread(STDIN, $buf, 1)) {
8636 open($fh, "|-", @ARGV) || die;
8637 syswrite($fh, $buf);
8638 # Align up to 128k block
8639 if($read = sysread(STDIN, $buf, 131071)) {
8640 syswrite($fh, $buf);
8642 while($read = sysread(STDIN, $buf, 131072)) {
8643 syswrite($fh, $buf);
8646 exit ($?&127 ? 128+($?&127) : 1+$?>>8)
8649 ::debug("run",'Empty wrap: perl -e '.::Q($script)."\n");
8652 length $command > 499) {
8653 # csh does not like words longer than 1000 (499 quoted)
8654 # $command = "perl -e '".base64_zip_eval()."' ".
8655 # join" ",string_zip_base64(
8656 # 'exec "'.::perl_quote_scalar($command).'"');
8657 return 'perl -e '.::Q($script)." ".
8658 base64_wrap("exec \"$Global::shell\",'-c',\"".
8659 ::perl_quote_scalar($command).'"');
8661 return 'perl -e '.::Q($script)." ".
8662 $Global::shell." -c ".::Q($command);
8666 sub filter_through_compress($) {
8668 # Send stdout to stdin for $opt::compress_program(1)
8669 # Send stderr to stdin for $opt::compress_program(2)
8670 # cattail get pid: $pid = $self->fh($fdno,'rpid');
8671 my $cattail = cattail();
8673 for my $fdno (1,2) {
8674 # Make a communication file.
8675 my ($fh, $comfile) = ::tmpfile(SUFFIX => ".pac");
8677 # Compressor: (echo > $comfile; compress pipe) > output
8678 # When the echo is written to $comfile,
8679 # it is known that output file is opened,
8680 # thus output file can then be removed by the decompressor.
8681 my $wpid = open(my $fdw,"|-", "(echo > $comfile; ".
8682 empty_input_wrapper($opt::compress_program).") >".
8683 ::Q($self->fh($fdno,'name'))) || die $?;
8684 $self->set_fh($fdno,'w',$fdw);
8685 $self->set_fh($fdno,'wpid',$wpid);
8686 # Decompressor: open output; -s $comfile > 0: rm $comfile output;
8687 # decompress output > stdout
8688 my $rpid = open(my $fdr, "-|", "perl", "-e", $cattail, $comfile,
8689 $opt::decompress_program, $wpid,
8690 $self->fh($fdno,'name'),$self->fh($fdno,'unlink'))
8692 $self->set_fh($fdno,'r',$fdr);
8693 $self->set_fh($fdno,'rpid',$rpid);
8699 my ($self, $fd_no, $key, $fh) = @_;
8700 $self->{'fd'}{$fd_no,$key} = $fh;
8705 my ($self, $fd_no, $key) = @_;
8706 return $self->{'fd'}{$fd_no,$key};
8709 sub write_block($) {
8711 my $stdin_fh = $self->fh(0,"w");
8716 # If writing is to a closed pipe:
8717 # Do not call signal handler, but let nothing be written
8718 local $SIG{PIPE} = undef;
8722 $self->{'header'},$self->{'block'}) {
8723 # syswrite may not write all in one go,
8724 # so make sure everything is written.
8726 while($written = syswrite($stdin_fh,$$part)) {
8727 substr($$part,0,$written) = "";
8737 my $remaining_ref = shift;
8738 my $stdin_fh = $self->fh(0,"w");
8740 my $len = length $$remaining_ref;
8741 # syswrite may not write all in one go,
8742 # so make sure everything is written.
8745 # If writing is to a closed pipe:
8746 # Do not call signal handler, but let nothing be written
8747 local $SIG{PIPE} = undef;
8748 while($written = syswrite($stdin_fh,$$remaining_ref)){
8749 substr($$remaining_ref,0,$written) = "";
8753 sub set_block($$$$$$) {
8754 # Copy stdin buffer from $block_ref up to $endpos
8755 # Prepend with $header_ref if virgin (i.e. not --roundrobin)
8756 # Remove $recstart and $recend if needed
8758 # $header_ref = ref to $header to prepend
8759 # $buffer_ref = ref to $buffer containing the block
8760 # $endpos = length of $block to pass on
8761 # $recstart = --recstart regexp
8762 # $recend = --recend regexp
8766 my ($header_ref,$buffer_ref,$endpos,$recstart,$recend) = @_;
8767 $self->{'header'} = $header_ref;
8768 if($opt::roundrobin or $opt::remove_rec_sep or $opt::retries) {
8770 if(($opt::roundrobin or $opt::retries) and $self->virgin()) {
8773 # Job is no longer virgin
8774 $self->set_virgin(0);
8775 # Make a full copy because $buffer will change
8776 $a .= substr($$buffer_ref,0,$endpos);
8777 $self->{'block'} = \$a;
8778 if($opt::remove_rec_sep) {
8779 remove_rec_sep($self->{'block'},$recstart,$recend);
8781 $self->{'block_length'} = length ${$self->{'block'}};
8783 $self->set_virgin(0);
8784 for(substr($$buffer_ref,0,$endpos)) {
8785 $self->{'block'} = \$_;
8787 $self->{'block_length'} = $endpos + length ${$self->{'header'}};
8789 $self->{'block_pos'} = 0;
8790 $self->add_transfersize($self->{'block_length'});
8795 return $self->{'block'};
8798 sub block_length($) {
8800 return $self->{'block_length'};
8803 sub remove_rec_sep($) {
8804 # Remove --recstart and --recend from $block
8806 # $block_ref = reference to $block to be modified
8807 # $recstart = --recstart
8808 # $recend = --recend
8810 # $opt::regexp = Are --recstart/--recend regexp?
8813 my ($block_ref,$recstart,$recend) = @_;
8814 # Remove record separator
8816 $$block_ref =~ s/$recend$recstart//gom;
8817 $$block_ref =~ s/^$recstart//os;
8818 $$block_ref =~ s/$recend$//os;
8820 $$block_ref =~ s/\Q$recend$recstart\E//gom;
8821 $$block_ref =~ s/^\Q$recstart\E//os;
8822 $$block_ref =~ s/\Q$recend\E$//os;
8826 sub non_blocking_write($) {
8828 my $something_written = 0;
8830 my $in = $self->fh(0,"w");
8831 my $rv = syswrite($in,
8832 substr(${$self->{'block'}},$self->{'block_pos'}));
8833 if (!defined($rv) && $! == ::EAGAIN()) {
8834 # would block - but would have written
8835 $something_written = 0;
8836 # avoid triggering auto expanding block size
8837 $Global::no_autoexpand_block ||= 1;
8838 } elsif ($self->{'block_pos'}+$rv != $self->{'block_length'}) {
8840 # Remove the written part
8841 $self->{'block_pos'} += $rv;
8842 $something_written = $rv;
8844 # successfully wrote everything
8845 # Empty block to free memory
8847 $self->set_block(\$a,\$a,0,"","");
8848 $something_written = $rv;
8850 ::debug("pipe", "Non-block: ", $something_written);
8851 return $something_written;
8857 return $self->{'virgin'};
8860 sub set_virgin($$) {
8862 $self->{'virgin'} = shift;
8867 return $self->{'pid'};
8872 $self->{'pid'} = shift;
8877 # UNIX-timestamp this job started
8879 return sprintf("%.3f",$self->{'starttime'});
8882 sub set_starttime($@) {
8884 my $starttime = shift || ::now();
8885 $self->{'starttime'} = $starttime;
8887 $Global::sql->update("SET Starttime = ? WHERE Seq = ".$self->seq(),
8893 # Run time in seconds with 3 decimals
8895 return sprintf("%.3f",
8896 int(($self->endtime() - $self->starttime())*1000)/1000);
8901 # UNIX-timestamp this job ended
8902 # 0 if not ended yet
8904 return ($self->{'endtime'} || 0);
8907 sub set_endtime($$) {
8909 my $endtime = shift;
8910 $self->{'endtime'} = $endtime;
8912 $Global::sql->update("SET JobRuntime = ? WHERE Seq = ".$self->seq(),
8916 sub is_timedout($) {
8917 # Is the job timedout?
8919 # $delta_time = time that the job may run
8923 my $delta_time = shift;
8924 return time > $self->{'starttime'} + $delta_time;
8929 $self->set_exitstatus(-1);
8930 ::kill_sleep_seq($self->pid());
8935 my @pgrps = map { -$_ } $self->pid();
8936 kill "STOP", @pgrps;
8937 $self->set_suspended(1);
8940 sub set_suspended($$) {
8942 $self->{'suspended'} = shift;
8947 return $self->{'suspended'};
8952 my @pgrps = map { -$_ } $self->pid();
8953 kill "CONT", @pgrps;
8954 $self->set_suspended(0);
8958 # return number of times failed for this $sshlogin
8962 # Number of times failed for $sshlogin
8964 my $sshlogin = shift;
8965 return $self->{'failed'}{$sshlogin};
8968 sub failed_here($) {
8969 # return number of times failed for the current $sshlogin
8971 # Number of times failed for this sshlogin
8973 return $self->{'failed'}{$self->sshlogin()};
8977 # increase the number of times failed for this $sshlogin
8979 my $sshlogin = shift;
8980 $self->{'failed'}{$sshlogin}++;
8983 sub add_failed_here($) {
8984 # increase the number of times failed for the current $sshlogin
8986 $self->{'failed'}{$self->sshlogin()}++;
8989 sub reset_failed($) {
8990 # increase the number of times failed for this $sshlogin
8992 my $sshlogin = shift;
8993 delete $self->{'failed'}{$sshlogin};
8996 sub reset_failed_here($) {
8997 # increase the number of times failed for this $sshlogin
8999 delete $self->{'failed'}{$self->sshlogin()};
9004 # the number of sshlogins this command has failed on
9005 # the minimal number of times this command has failed
9008 ::min(map { $self->{'failed'}{$_} } keys %{$self->{'failed'}});
9009 my $number_of_sshlogins_failed_on = scalar keys %{$self->{'failed'}};
9010 return ($number_of_sshlogins_failed_on,$min_failures);
9013 sub total_failed($) {
9015 # $total_failures = the number of times this command has failed
9017 my $total_failures = 0;
9018 for (values %{$self->{'failed'}}) {
9019 $total_failures += $_;
9021 return $total_failures;
9027 sub postpone_exit_and_cleanup {
9028 # Command to remove files and dirs (given as args) without
9029 # affecting the exit value in $?/$status.
9031 $script = "perl -e '".
9039 if($bash=~s/(\d+)h/$1/) {
9044 # `echo \$?h` is needed to make fish not complain
9045 "' ".'"`echo \\\\\\\\\$?h`" "$status" ';
9055 # Script to create a fifo, run a command on the fifo
9056 # while copying STDIN to the fifo, and finally
9057 # remove the fifo and return the exit code of the command.
9059 # {} == $PARALLEL_TMP for --fifo
9060 # To make it csh compatible a wrapper needs to:
9062 # * spawn $command &
9064 # * waitpid to get the exit code from $command
9065 # * be less than 1000 chars long
9066 $script = "perl -e '".
9070 # mkfifo $PARALLEL_TMP
9071 system "mkfifo", $f;
9072 # spawn $shell -c $command &
9073 $pid = fork || exec $s, "-c", $c;
9074 open($o,">",$f) || die $!;
9075 # cat > $PARALLEL_TMP
9076 while(sysread(STDIN,$buf,131072)){
9080 # waitpid to get the exit code from $command
9092 # Wrap command with:
9098 # * --pipepart (@Global::cat_prepends)
9099 # * --tee (@Global::cat_prepends)
9102 # The ordering of the wrapping is important:
9103 # * --nice/--cat/--fifo should be done on the remote machine
9104 # * --pipepart/--pipe should be done on the local machine inside --tmux
9111 # @Global::cat_prepends
9115 # $self->{'wrapped'} = the command wrapped with the above
9117 if(not defined $self->{'wrapped'}) {
9118 my $command = $self->replaced();
9119 # Bug in Bash and Ksh when running multiline aliases
9120 # This will force them to run correctly, but will fail in
9121 # tcsh so we do not do it.
9122 # $command .= "\n\n";
9123 if(@opt::shellquote) {
9124 # Quote one time for each --shellquote
9126 for(@opt::shellquote) {
9129 # Prepend "echo" (it is written in perl because
9130 # quoting '-e' causes problem in some versions and
9131 # csh's version does something wrong)
9132 $command = q(perl -e '$,=" "; print "@ARGV\n";' -- ) . ::Q($c);
9134 if($Global::parallel_env) {
9135 # If $PARALLEL_ENV set, put that in front of the command
9136 # Used for env_parallel.*
9137 if($Global::shell =~ /zsh/) {
9138 # The extra 'eval' will make aliases work, too
9139 $command = $Global::parallel_env."\n".
9140 "eval ".::Q($command);
9142 $command = $Global::parallel_env."\n".$command;
9146 # In '--cat' and '--fifo' {} == $PARALLEL_TMP.
9147 # This is to make it possible to compute $PARALLEL_TMP on
9148 # the fly when running remotely.
9149 # $ENV{PARALLEL_TMP} is set in the remote wrapper before
9150 # the command is run.
9152 # Prepend 'cat > $PARALLEL_TMP;'
9153 # Append 'unlink $PARALLEL_TMP without affecting $?'
9155 'cat > $PARALLEL_TMP;'.
9156 $command.";". postpone_exit_and_cleanup().
9158 } elsif($opt::fifo) {
9159 # Prepend fifo-wrapper. In essence:
9162 # # $command must read {}, otherwise this 'cat' will block
9165 # without affecting $?
9166 $command = fifo_wrap(). " ".
9167 $Global::shell. " ". ::Q($command). ' $PARALLEL_TMP'. ';';
9169 # Wrap with ssh + tranferring of files
9170 $command = $self->sshlogin_wrap($command);
9171 if(@Global::cat_prepends) {
9172 # --pipepart: prepend:
9173 # < /tmp/foo perl -e 'while(@ARGV) {
9174 # sysseek(STDIN,shift,0) || die; $left = shift;
9175 # while($read = sysread(STDIN,$buf, ($left > 131072 ? 131072 : $left))){
9176 # $left -= $read; syswrite(STDOUT,$buf);
9180 # --pipepart --tee: prepend:
9183 # --pipe --tee: wrap:
9184 # (rm fifo; ... ) < fifo
9187 # (rm fifo; ... ) < fifo
9188 $command = (shift @Global::cat_prepends). "($command)".
9189 (shift @Global::cat_appends);
9190 } elsif($opt::pipe and not $opt::roundrobin) {
9191 # Wrap with EOF-detector to avoid starting $command if EOF.
9192 $command = empty_input_wrapper($command);
9195 # Wrap command with 'tmux'
9196 $command = $self->tmux_wrap($command);
9200 length $command > 499) {
9201 # csh does not like words longer than 1000 (499 quoted)
9202 # $command = "perl -e '".base64_zip_eval()."' ".
9203 # join" ",string_zip_base64(
9204 # 'exec "'.::perl_quote_scalar($command).'"');
9205 $command = base64_wrap("exec \"$Global::shell\",'-c',\"".
9206 ::perl_quote_scalar($command).'"');
9208 $self->{'wrapped'} = $command;
9210 return $self->{'wrapped'};
9213 sub set_sshlogin($$) {
9215 my $sshlogin = shift;
9216 $self->{'sshlogin'} = $sshlogin;
9217 delete $self->{'sshlogin_wrap'}; # If sshlogin is changed the wrap is wrong
9218 delete $self->{'wrapped'};
9220 if($opt::sqlworker) {
9221 # Identify worker as --sqlworker often runs on different machines
9222 my $host = $sshlogin->string();
9224 $host = ::hostname();
9226 $Global::sql->update("SET Host = ? WHERE Seq = ".$self->seq(), $host);
9232 return $self->{'sshlogin'};
9235 sub string_base64($) {
9236 # Base64 encode strings into 1000 byte blocks.
9237 # 1000 bytes is the largest word size csh supports
9239 # @strings = to be encoded
9241 # @base64 = 1000 byte block
9242 $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;";
9243 my @base64 = unpack("(A1000)*",encode_base64((join"",@_),""));
9247 sub string_zip_base64($) {
9248 # Pipe string through 'bzip2 -9' and base64 encode it into 1000
9250 # 1000 bytes is the largest word size csh supports
9251 # Zipping will make exporting big environments work, too
9253 # @strings = to be encoded
9255 # @base64 = 1000 byte block
9256 my($zipin_fh, $zipout_fh,@base64);
9257 ::open3($zipin_fh,$zipout_fh,">&STDERR","bzip2 -9");
9260 $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;";
9261 # Split base64 encoded into 1000 byte blocks
9262 @base64 = unpack("(A1000)*",encode_base64((join"",<$zipout_fh>),""));
9270 ::debug("base64","Orig:@_\nAs bzip2 base64:@base64\n");
9274 sub base64_zip_eval() {
9276 # * reads base64 strings from @ARGV
9278 # * pipes through 'bzip2 -dc'
9279 # * evals the result
9280 # Reverse of string_zip_base64 + eval
9281 # Will be wrapped in ' so single quote is forbidden
9283 # $script = 1-liner for perl -e
9284 my $script = ::spacefree(0,q{
9285 @GNU_Parallel = split /_/, "use_IPC::Open3;_use_MIME::Base64";
9286 eval"@GNU_Parallel";
9288 $SIG{CHLD} = "IGNORE";
9289 # Search for bzip2. Not found => use default path
9290 my $zip = (grep { -x $_ } "/usr/local/bin/bzip2")[0] || "bzip2";
9291 # $in = stdin on $zip, $out = stdout from $zip
9292 # Forget my() to save chars for csh
9293 # my($in, $out,$eval);
9294 open3($in,$out,">&STDERR",$zip,"-dc");
9295 if(my $perlpid = fork) {
9297 $eval = join "", <$out>;
9301 # Pipe decoded base64 into 'bzip2 -dc'
9302 print $in (decode_base64(join"",@ARGV));
9310 ::debug("base64",$script,"\n");
9314 sub base64_wrap($) {
9315 # base64 encode Perl code
9316 # Split it into chunks of < 1000 bytes
9317 # Prepend it with a decoder that eval's it
9319 # $eval_string = Perl code to run
9321 # $shell_command = shell command that runs $eval_string
9322 my $eval_string = shift;
9325 ::Q(base64_zip_eval())." ".
9326 join" ",::shell_quote(string_zip_base64($eval_string));
9329 sub base64_eval($) {
9331 # * reads base64 strings from @ARGV
9333 # * evals the result
9334 # Reverse of string_base64 + eval
9335 # Will be wrapped in ' so single quote is forbidden.
9336 # Spaces are stripped so spaces cannot be significant.
9337 # The funny 'use IPC::Open3'-syntax is to avoid spaces and
9338 # to make it clear that this is a GNU Parallel command
9339 # when looking at the process table.
9341 # $script = 1-liner for perl -e
9342 my $script = ::spacefree(0,q{
9343 @GNU_Parallel=("use","IPC::Open3;","use","MIME::Base64");
9344 eval "@GNU_Parallel";
9345 my $eval = decode_base64(join"",@ARGV);
9348 ::debug("base64",$script,"\n");
9352 sub sshlogin_wrap($) {
9353 # Wrap the command with the commands needed to run remotely
9355 # $command = command to run
9357 # $self->{'sshlogin_wrap'} = command wrapped with ssh+transfer commands
9358 sub monitor_parent_sshd_script {
9359 # This script is to solve the problem of
9360 # * not mixing STDERR and STDOUT
9361 # * terminating with ctrl-c
9362 # If its parent is ssh: all good
9363 # If its parent is init(1): ssh died, so kill children
9364 my $monitor_parent_sshd_script;
9366 if(not $monitor_parent_sshd_script) {
9367 $monitor_parent_sshd_script =
9368 # This will be packed in ', so only use "
9369 ::spacefree(0,'$shell = "'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'.
9370 '$tmpdir = "'.::perl_quote_scalar($ENV{'TMPDIR'}).'";'.
9371 '$nice = '.$opt::nice.';'.
9372 '$termseq = "'.$opt::termseq.'";'.
9374 # Set $PARALLEL_TMP to a non-existent file name in $TMPDIR
9376 $ENV{PARALLEL_TMP} = $tmpdir."/par".
9377 join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
9378 } while(-e $ENV{PARALLEL_TMP});
9379 $SIG{CHLD} = sub { $done = 1; };
9382 # Make own process group to be able to kill HUP it later
9384 eval { setpriority(0,0,$nice) };
9385 exec $shell, "-c", ($bashfunc."@ARGV");
9388 my $parent = getppid;
9390 # Parent pid is not changed, so sshd is alive
9391 # Exponential sleep up to 1 sec
9392 $s = $s < 1 ? 0.001 + $s * 1.03 : $s;
9393 select(undef, undef, undef, $s);
9394 } until ($done || getppid != $parent);
9396 # Kill as per --termseq
9397 my @term_seq = split/,/,$termseq;
9399 @term_seq = ("TERM",200,"TERM",100,"TERM",50,"KILL",25);
9401 while(@term_seq && kill(0,-$pid)) {
9402 kill(shift @term_seq, -$pid);
9403 select(undef, undef, undef, (shift @term_seq)/1000);
9407 exit ($?&127 ? 128+($?&127) : 1+$?>>8)
9410 return $monitor_parent_sshd_script;
9413 sub vars_to_export {
9416 my @vars = ("parallel_bash_environment");
9417 for my $varstring (@opt::env) {
9418 # Split up --env VAR1,VAR2
9419 push @vars, split /,/, $varstring;
9422 if(-r $_ and not -d) {
9423 # Read as environment definition bug #44041
9425 my $fh = ::open_or_exit($_);
9426 $Global::envdef = join("",<$fh>);
9430 if(grep { /^_$/ } @vars) {
9433 # Include all vars that are not in a clean environment
9434 if(open(my $vars_fh, "<", $Global::config_dir . "/ignored_vars")) {
9435 my @ignore = <$vars_fh>;
9438 @ignore{@ignore} = @ignore;
9440 push @vars, grep { not defined $ignore{$_} } keys %ENV;
9441 @vars = grep { not /^_$/ } @vars;
9443 ::error("Run '$Global::progname --record-env' ".
9444 "in a clean environment first.");
9445 ::wait_and_exit(255);
9448 # Duplicate vars as BASH functions to include post-shellshock functions (v1+v2)
9449 # So --env myfunc should look for BASH_FUNC_myfunc() and BASH_FUNC_myfunc%%
9451 push(@vars, "PARALLEL_PID", "PARALLEL_SEQ",
9452 "PARALLEL_SSHLOGIN", "PARALLEL_SSHHOST",
9453 "PARALLEL_HOSTGROUPS", "PARALLEL_ARGHOSTGROUPS",
9454 "PARALLEL_JOBSLOT", map { ("BASH_FUNC_$_()",
9455 "BASH_FUNC_$_%%") } @vars);
9456 # Keep only defined variables
9457 return grep { defined($ENV{$_}) } @vars;
9462 # $eval = '$ENV{"..."}=...; ...'
9463 my @vars = vars_to_export();
9464 my $csh_friendly = not grep { /\n/ } @ENV{@vars};
9465 my @bash_functions = grep { substr($ENV{$_},0,4) eq "() {" } @vars;
9466 my @non_functions = (grep { !/PARALLEL_ENV/ }
9467 grep { substr($ENV{$_},0,4) ne "() {" } @vars);
9469 # eval of @envset will set %ENV
9470 my $envset = join"", map {
9471 '$ENV{"'.::perl_quote_scalar($_).'"}="'.
9472 ::perl_quote_scalar($ENV{$_}).'";'; } @non_functions;
9474 # running @bashfunc on the command line, will set the functions
9475 my @bashfunc = map {
9477 s/BASH_FUNC_(.*)(\(\)|%%)/$1/;
9478 "$_$ENV{$v};\nexport -f $_ 2> /dev/null;\n" } @bash_functions;
9479 # eval $bashfuncset will set $bashfunc
9482 # Functions are not supported for all shells
9483 if($Global::shell !~ m:(^|/)(ash|bash|rbash|zsh|rzsh|dash|ksh):) {
9484 ::warning("Shell functions may not be supported in $Global::shell.");
9487 '@bash_functions=qw('."@bash_functions".");".
9488 ::spacefree(1,'$shell="'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'.q{
9490 print STDERR "CSH/TCSH DO NOT SUPPORT newlines IN VARIABLES/FUNCTIONS. Unset @bash_functions\n";
9494 "\n".'$bashfunc = "'.::perl_quote_scalar("@bashfunc").'";';
9496 $bashfuncset = '$bashfunc = "";'
9498 if($ENV{'parallel_bash_environment'}) {
9499 $bashfuncset .= '$bashfunc .= "eval\ \"\$parallel_bash_environment\"\;";';
9501 ::debug("base64",$envset,$bashfuncset,"\n");
9502 return $csh_friendly,$envset,$bashfuncset;
9506 my $command = shift;
9507 # TODO test that *sh -c 'parallel --env' use *sh
9508 if(not defined $self->{'sshlogin_wrap'}{$command}) {
9509 my $sshlogin = $self->sshlogin();
9510 my $serverlogin = $sshlogin->serverlogin();
9511 my $quoted_remote_command;
9512 $ENV{'PARALLEL_SEQ'} = $self->seq();
9513 $ENV{'PARALLEL_JOBSLOT'} = $self->slot();
9514 $ENV{'PARALLEL_SSHLOGIN'} = $sshlogin->string();
9515 $ENV{'PARALLEL_SSHHOST'} = $sshlogin->serverlogin();
9516 if ($opt::hostgroups) {
9517 $ENV{'PARALLEL_HOSTGROUPS'} = join '+', $sshlogin->hostgroups();
9518 $ENV{'PARALLEL_ARGHOSTGROUPS'} = join '+', $self->hostgroups();
9520 $ENV{'PARALLEL_PID'} = $$;
9521 if($serverlogin eq ":") {
9523 # Create workdir if needed. Then cd to it.
9524 my $wd = $self->workdir();
9525 if($opt::workdir eq "." or $opt::workdir eq "...") {
9526 # If $wd does not start with '/': Prepend $HOME
9527 $wd =~ s:^([^/]):$ENV{'HOME'}/$1:;
9529 ::mkdir_or_die($wd);
9531 if($opt::workdir eq "...") {
9532 $post = ";".exitstatuswrapper("rm -rf ".::Q($wd).";");
9535 $command = "cd ".::Q($wd)." || exit 255; " .
9539 # Prepend with environment setter, which sets functions in zsh
9540 my ($csh_friendly,$envset,$bashfuncset) = env_as_eval();
9541 my $perl_code = $envset.$bashfuncset.
9542 '@ARGV="'.::perl_quote_scalar($command).'";'.
9543 "exec\"$Global::shell\",\"-c\",\(\$bashfunc.\"\@ARGV\"\)\;die\"exec:\$\!\\n\"\;";
9544 if(length $perl_code > 999
9549 # csh does not deal well with > 1000 chars in one word
9550 # csh does not deal well with $ENV with \n
9551 $self->{'sshlogin_wrap'}{$command} = base64_wrap($perl_code);
9553 $self->{'sshlogin_wrap'}{$command} = "perl -e ".::Q($perl_code);
9556 $self->{'sshlogin_wrap'}{$command} = $command;
9561 # Create remote workdir if needed. Then cd to it.
9562 my $wd = ::pQ($self->workdir());
9563 $pwd = qq{system("mkdir","-p","--","$wd"); chdir "$wd" ||}.
9564 qq{print(STDERR "parallel: Cannot chdir to $wd\\n") && exit 255;};
9566 my ($csh_friendly,$envset,$bashfuncset) = env_as_eval();
9567 my $remote_command = $pwd.$envset.$bashfuncset.
9568 '@ARGV="'.::perl_quote_scalar($command).'";'.
9569 monitor_parent_sshd_script();
9570 $quoted_remote_command = "perl -e ". ::Q($remote_command);
9571 my $dq_remote_command = ::Q($quoted_remote_command);
9572 if(length $dq_remote_command > 999
9577 # csh does not deal well with > 1000 chars in one word
9578 # csh does not deal well with $ENV with \n
9579 $quoted_remote_command =
9580 "perl -e ". ::Q(::Q(base64_zip_eval()))." ".
9581 join" ",::shell_quote(::shell_quote(string_zip_base64($remote_command)));
9583 $quoted_remote_command = $dq_remote_command;
9586 my $sshcmd = $sshlogin->sshcommand();
9587 my ($pre,$post,$cleanup)=("","","");
9589 $pre .= $self->sshtransfer();
9591 $post .= $self->sshreturn();
9593 $post .= $self->sshcleanup();
9595 # We need to save the exit status of the job
9596 $post = exitstatuswrapper($post);
9598 $self->{'sshlogin_wrap'}{$command} =
9600 . "$sshcmd $serverlogin -- exec "
9601 . $quoted_remote_command
9606 return $self->{'sshlogin_wrap'}{$command};
9609 sub fill_templates($) {
9610 # Replace replacement strings in template(s)
9612 # @templates - File names of replaced templates
9615 if(%opt::template) {
9617 map { $self->{'commandline'}->replace_placeholders([$_],0,0) }
9618 @{$self->{'commandline'}{'template_names'}};
9619 ::debug("tmpl","Names: @template_name\n");
9620 for(my $i = 0; $i <= $#template_name; $i++) {
9621 open(my $fh, ">", $template_name[$i]) || die;
9622 print $fh $self->{'commandline'}->
9623 replace_placeholders([$self->{'commandline'}{'template_contents'}[$i]],0,0);
9627 $self->add_rm(@template_name);
9633 # Replace replacement strings in filter(s) and evaluate them
9635 # $run - 1=yes, undef=no
9639 for my $eval ($self->{'commandline'}->
9640 replace_placeholders(\@opt::filter,0,0)) {
9641 $run &&= eval $eval;
9643 $self->{'commandline'}{'skip'} ||= not $run;
9650 # Non-quoted and with {...} substituted
9652 # @transfer - File names of files to transfer
9655 my $transfersize = 0;
9656 my @transfer = $self->{'commandline'}->
9657 replace_placeholders($self->{'commandline'}{'transfer_files'},0,0);
9661 $transfersize += (stat($_))[7];
9664 $self->add_transfersize($transfersize);
9668 sub transfersize($) {
9670 return $self->{'transfersize'};
9673 sub add_transfersize($) {
9675 my $transfersize = shift;
9676 $self->{'transfersize'} += $transfersize;
9678 $Global::sql->update("SET Send = ? WHERE Seq = ".$self->seq(),
9679 $self->{'transfersize'});
9682 sub sshtransfer($) {
9683 # Returns for each transfer file:
9684 # rsync $file remote:$workdir
9687 my $sshlogin = $self->sshlogin();
9688 my $workdir = $self->workdir();
9689 for my $file ($self->transfer()) {
9690 push @pre, $sshlogin->rsync_transfer_cmd($file,$workdir).";";
9692 return join("",@pre);
9697 # Non-quoted and with {...} substituted
9699 # @non_quoted_filenames
9701 return $self->{'commandline'}->
9702 replace_placeholders($self->{'commandline'}{'return_files'},0,0);
9706 # This is called after the job has finished
9708 # $number_of_bytes transferred in return
9710 for my $file ($self->return()) {
9712 $self->{'returnsize'} += (stat($file))[7];
9715 return $self->{'returnsize'};
9718 sub add_returnsize($) {
9720 my $returnsize = shift;
9721 $self->{'returnsize'} += $returnsize;
9723 $Global::sql->update("SET Receive = ? WHERE Seq = ".$self->seq(),
9724 $self->{'returnsize'});
9728 # Returns for each return-file:
9729 # rsync remote:$workdir/$file .
9731 my $sshlogin = $self->sshlogin();
9732 my $sshcmd = $sshlogin->sshcommand();
9733 my $serverlogin = $sshlogin->serverlogin();
9734 my $rsync_opts = $ENV{'PARALLEL_RSYNC_OPTS'}. " -e". ::Q($sshcmd);
9736 for my $file ($self->return()) {
9737 $file =~ s:^\./::g; # Remove ./ if any
9738 my $relpath = ($file !~ m:^/:) || ($file =~ m:/\./:); # Is the path relative or /./?
9742 # rsync -avR /foo/./bar/baz.c remote:/tmp/
9743 # == (on old systems)
9744 # rsync -avR --rsync-path="cd /foo; rsync" remote:bar/baz.c /tmp/
9745 $wd = ::shell_quote_file($self->workdir()."/");
9747 # Only load File::Basename if actually needed
9748 $Global::use{"File::Basename"} ||= eval "use File::Basename; 1;";
9749 # dir/./file means relative to dir, so remove dir on remote
9750 $file =~ m:(.*)/\./:;
9751 my $basedir = $1 ? ::shell_quote_file($1."/") : "";
9752 my $nobasedir = $file;
9753 $nobasedir =~ s:.*/\./::;
9754 $cd = ::shell_quote_file(::dirname($nobasedir));
9755 my $rsync_cd = '--rsync-path='.::Q("cd $wd$cd; rsync");
9756 my $basename = ::Q(::shell_quote_file(::basename($file)));
9758 # mkdir -p /home/tange/dir/subdir/;
9759 # rsync (--protocol 30) -rlDzR
9760 # --rsync-path="cd /home/tange/dir/subdir/; rsync"
9761 # server:file.gz /home/tange/dir/subdir/
9762 $pre .= "mkdir -p $basedir$cd && ". $sshlogin->rsync().
9763 " $rsync_cd $rsync_opts $serverlogin:".
9764 $basename . " ".$basedir.$cd.";";
9770 # Return the sshcommand needed to remove the file
9772 # ssh command needed to remove files from sshlogin
9774 my $sshlogin = $self->sshlogin();
9775 my $sshcmd = $sshlogin->sshcommand();
9776 my $serverlogin = $sshlogin->serverlogin();
9777 my $workdir = $self->workdir();
9780 for my $file ($self->remote_cleanup()) {
9781 my @subworkdirs = parentdirs_of($file);
9782 $cleancmd .= $sshlogin->cleanup_cmd($file,$workdir).";";
9784 if(defined $opt::workdir and $opt::workdir eq "...") {
9785 $cleancmd .= "$sshcmd $serverlogin -- rm -rf " . ::Q($workdir).';';
9790 sub remote_cleanup($) {
9792 # Files to remove at cleanup
9795 my @transfer = $self->transfer();
9796 my @return = $self->return();
9797 return (@transfer,@return);
9803 sub exitstatuswrapper(@) {
9805 # @shellcode = shell code to execute
9807 # shell script that returns current status after executing @shellcode
9808 if($Global::cshell) {
9809 return ('set _EXIT_status=$status; ' .
9811 'exit $_EXIT_status;');
9813 return ('_EXIT_status=$?; ' .
9815 'exit $_EXIT_status;');
9821 # the workdir on a remote machine
9823 if(not defined $self->{'workdir'}) {
9825 if(defined $opt::workdir) {
9826 if($opt::workdir eq ".") {
9827 # . means current dir
9828 my $home = $ENV{'HOME'};
9833 # If homedir exists: remove the homedir from
9834 # workdir if cwd starts with homedir
9835 # E.g. /home/foo/my/dir => my/dir
9836 # E.g. /tmp/my/dir => /tmp/my/dir
9837 my ($home_dev, $home_ino) = (stat($home))[0,1];
9839 my @dir_parts = split(m:/:,$cwd);
9841 while(defined ($part = shift @dir_parts)) {
9842 $part eq "" and next;
9843 $parent .= "/".$part;
9844 my ($parent_dev, $parent_ino) = (stat($parent))[0,1];
9845 if($parent_dev == $home_dev and $parent_ino == $home_ino) {
9846 # dev and ino is the same: We found the homedir.
9847 $workdir = join("/",@dir_parts);
9852 if($workdir eq "") {
9855 } elsif($opt::workdir eq "...") {
9856 $workdir = ".parallel/tmp/" . ::hostname() . "-" . $$
9857 . "-" . $self->seq();
9859 $workdir = $self->{'commandline'}->
9860 replace_placeholders([$opt::workdir],0,0);
9861 #$workdir = $opt::workdir;
9862 # Rsync treats /./ special. We dont want that
9863 $workdir =~ s:/\./:/:g; # Remove /./
9864 $workdir =~ s:(.)/+$:$1:; # Remove ending / if any
9865 $workdir =~ s:^\./::g; # Remove starting ./ if any
9870 $self->{'workdir'} = $workdir;
9872 return $self->{'workdir'};
9875 sub parentdirs_of($) {
9877 # all parentdirs except . of this dir or file - sorted desc by length
9880 while($d =~ s:/[^/]+$::) {
9889 # Setup STDOUT and STDERR for a job and start it.
9891 # job-object or undef if job not to run
9893 sub open3_setpgrp_internal {
9894 # Run open3+setpgrp followed by the command
9896 # $stdin_fh = Filehandle to use as STDIN
9897 # $stdout_fh = Filehandle to use as STDOUT
9898 # $stderr_fh = Filehandle to use as STDERR
9899 # $command = Command to run
9901 # $pid = Process group of job started
9902 my ($stdin_fh,$stdout_fh,$stderr_fh,$command) = @_;
9905 open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!");
9906 open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!");
9907 # The eval is needed to catch exception from open3
9909 if(not $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", "-")) {
9910 # Each child gets its own process group to make it safe to killall
9911 eval{ setpgrp(0,0) };
9912 eval{ setpriority(0,0,$opt::nice) };
9913 exec($Global::shell,"-c",$command)
9914 || ::die_bug("open3-$stdin_fh $command");
9920 sub open3_setpgrp_external {
9921 # Run open3 on $command wrapped with a perl script doing setpgrp
9922 # Works on systems that do not support open3(,,,"-")
9924 # $stdin_fh = Filehandle to use as STDIN
9925 # $stdout_fh = Filehandle to use as STDOUT
9926 # $stderr_fh = Filehandle to use as STDERR
9927 # $command = Command to run
9929 # $pid = Process group of job started
9930 my ($stdin_fh,$stdout_fh,$stderr_fh,$command) = @_;
9932 open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!");
9933 open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!");
9938 "eval\{setpgrp\}\;eval\{setpriority\(0,0,$opt::nice\)\}\;".
9939 "exec '$Global::shell', '-c', \@ARGV");
9940 # The eval is needed to catch exception from open3
9942 $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", @setpgrp_wrap, $command)
9943 || ::die_bug("open3-$stdin_fh");
9949 sub redefine_open3_setpgrp {
9950 my $setgprp_cache = shift;
9951 # Select and run open3_setpgrp_internal/open3_setpgrp_external
9952 no warnings 'redefine';
9953 my ($outfh,$name) = ::tmpfile(SUFFIX => ".tst");
9954 # Test to see if open3(x,x,x,"-") is fully supported
9955 # Can an exported bash function be called via open3?
9956 my $script = 'if($pid=::open3($i,$o,$e,"-")) { wait; } '.
9957 'else { exec("bash","-c","testfun && true"); }';
9959 ::shell_quote_scalar_default(
9960 "testfun() { rm $name; }; export -f testfun; ".
9961 "perl -MIPC::Open3 -e ".
9962 ::shell_quote_scalar_default($script)
9965 # Redirect STDERR temporarily,
9966 # so errors on MacOS X are ignored.
9967 open my $saveerr, ">&STDERR";
9968 open STDERR, '>', "/dev/null";
9970 ::debug("init",qq{bash -c $bash 2>/dev/null});
9971 qx{ bash -c $bash 2>/dev/null };
9972 open STDERR, ">&", $saveerr;
9975 # Does not support open3(x,x,x,"-")
9976 # or does not have bash:
9977 # Use (slow) external version
9979 $redefine_eval = '*open3_setpgrp = \&open3_setpgrp_external';
9980 ::debug("init","open3_setpgrp_external chosen\n");
9982 # Supports open3(x,x,x,"-")
9983 # This is 0.5 ms faster to run
9984 $redefine_eval = '*open3_setpgrp = \&open3_setpgrp_internal';
9985 ::debug("init","open3_setpgrp_internal chosen\n");
9987 if(open(my $fh, ">", $setgprp_cache)) {
9988 print $fh $redefine_eval;
9991 ::debug("init","Cannot write to $setgprp_cache");
9993 eval $redefine_eval;
9997 my $setgprp_cache = $Global::cache_dir . "/tmp/sshlogin/" .
9998 ::hostname() . "/setpgrp_func";
10000 -e $setgprp_cache || return 0;
10002 open(my $fh, "<", $setgprp_cache) || return 0;
10003 eval <$fh> || return 0;
10007 if(not read_cache()) {
10008 redefine_open3_setpgrp($setgprp_cache);
10010 # The sub is now redefined. Call it
10011 return open3_setpgrp(@_);
10015 # Get the shell command to be executed (possibly with ssh infront).
10016 my $command = $job->wrapped();
10019 if($Global::interactive or $Global::stderr_verbose) {
10020 $job->interactive_start();
10022 # Must be run after $job->interactive_start():
10023 # $job->interactive_start() may call $job->skip()
10024 if($job->{'commandline'}{'skip'}
10026 not $job->filter()) {
10027 # $job->skip() was called or job filtered
10030 $job->openoutputfiles();
10031 $job->print_verbose_dryrun();
10032 my($stdout_fh,$stderr_fh) = ($job->fh(1,"w"),$job->fh(2,"w"));
10033 if($opt::dryrun or $opt::sqlmaster) { $command = "true"; }
10034 $ENV{'PARALLEL_SEQ'} = $job->seq();
10035 $ENV{'PARALLEL_PID'} = $$;
10036 $ENV{'PARALLEL_JOBSLOT'} = $job->slot();
10037 $ENV{'PARALLEL_TMP'} = ::tmpname("par");
10038 $job->add_rm($ENV{'PARALLEL_TMP'});
10039 $job->fill_templates();
10040 ::debug("run", $Global::total_running, " processes . Starting (",
10041 $job->seq(), "): $command\n");
10043 my ($stdin_fh) = ::gensym();
10044 $pid = open3_setpgrp($stdin_fh,$stdout_fh,$stderr_fh,$command);
10045 if($opt::roundrobin and not $opt::keeporder) {
10046 # --keep-order will make sure the order will be reproducible
10047 ::set_fh_non_blocking($stdin_fh);
10049 $job->set_fh(0,"w",$stdin_fh);
10050 if($opt::tee or $opt::shard or $opt::bin) { $job->set_virgin(0); }
10051 } elsif ($opt::tty and -c "/dev/tty" and
10052 open(my $devtty_fh, "<", "/dev/tty")) {
10053 # Give /dev/tty to the command if no one else is using it
10054 # The eval is needed to catch exception from open3
10055 local (*IN,*OUT,*ERR);
10056 open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!");
10057 open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!");
10059 # The eval is needed to catch exception from open3
10060 my @wrap = ('perl','-e',
10061 "eval\{setpriority\(0,0,$opt::nice\)\}\;".
10062 "exec '$Global::shell', '-c', \@ARGV");
10064 $pid = ::open3("<&IN", ">&OUT", ">&ERR", @wrap, $command)
10065 || ::die_bug("open3-/dev/tty");
10069 $job->set_virgin(0);
10071 $pid = open3_setpgrp(::gensym(),$stdout_fh,$stderr_fh,$command);
10072 $job->set_virgin(0);
10075 # A job was started
10076 $Global::total_running++;
10077 $Global::total_started++;
10078 $job->set_pid($pid);
10079 $job->set_starttime();
10080 $Global::running{$job->pid()} = $job;
10081 if($opt::timeout) {
10082 $Global::timeoutq->insert($job);
10084 $Global::newest_job = $job;
10085 $Global::newest_starttime = ::now();
10088 # No more processes
10089 ::debug("run", "Cannot spawn more jobs.\n");
10094 sub interactive_start($) {
10096 my $command = $self->wrapped();
10097 if($Global::interactive) {
10099 ::status_no_nl("$command ?...");
10101 open(my $tty_fh, "<", "/dev/tty") || ::die_bug("interactive-tty");
10102 $answer = <$tty_fh>;
10104 # Sometime we get an empty string (not even \n)
10105 # Do not know why, so let us just ignore it and try again
10106 } while(length $answer < 1);
10107 if (not ($answer =~ /^\s*y/i)) {
10108 $self->{'commandline'}->skip();
10111 print $Global::original_stderr "$command\n";
10119 # Wrap command with tmux for session pPID
10121 # $actual_command = the actual command being run (incl ssh wrap)
10123 my $actual_command = shift;
10124 # Temporary file name. Used for fifo to communicate exit val
10125 my $tmpfifo = ::tmpname("tmx");
10126 $self->add_rm($tmpfifo);
10128 if(length($tmpfifo) >=100) {
10129 ::error("tmux does not support sockets with path > 100.");
10130 ::wait_and_exit(255);
10132 if($opt::tmuxpane) {
10133 # Move the command into a pane in window 0
10134 $actual_command = $ENV{'PARALLEL_TMUX'}.' joinp -t :0 ; '.
10135 $ENV{'PARALLEL_TMUX'}.' select-layout -t :0 tiled ; '.
10138 my $visual_command = $self->replaced();
10139 my $title = $visual_command;
10140 if($visual_command =~ /\0/) {
10141 ::error("Command line contains NUL. tmux is confused by NUL.");
10142 ::wait_and_exit(255);
10144 # ; causes problems
10145 # ascii 194-245 annoys tmux
10146 $title =~ tr/[\011-\016;\302-\365]/ /s;
10147 $title = ::Q($title);
10149 my $l_act = length($actual_command);
10150 my $l_tit = length($title);
10151 my $l_fifo = length($tmpfifo);
10152 # The line to run contains a 118 chars extra code + the title 2x
10153 my $l_tot = 2 * $l_tit + $l_act + $l_fifo;
10155 my $quoted_space75 = ::Q(" ")x75;
10156 while($l_tit < 1000 and
10158 (890 < $l_tot and $l_tot < 1350)
10160 (9250 < $l_tot and $l_tot < 9800)
10162 # tmux blocks for certain lengths:
10163 # 900 < title + command < 1200
10164 # 9250 < title + command < 9800
10165 # but only if title < 1000, so expand the title with 75 spaces
10166 # The measured lengths are:
10167 # 996 < (title + whole command) < 1127
10168 # 9331 < (title + whole command) < 9636
10169 $title .= $quoted_space75;
10170 $l_tit = length($title);
10171 $l_tot = 2 * $l_tit + $l_act + $l_fifo;
10175 $ENV{'PARALLEL_TMUX'} ||= "tmux";
10176 if(not $tmuxsocket) {
10177 $tmuxsocket = ::tmpname("tms");
10180 # Run tmux in the foreground
10181 # Wait for the socket to appear
10182 while (not -e $tmuxsocket) { }
10183 `$ENV{'PARALLEL_TMUX'} -S $tmuxsocket attach`;
10187 ::status("See output with: $ENV{'PARALLEL_TMUX'} -S $tmuxsocket attach");
10190 $ENV{'PARALLEL_TMUX'}.
10191 " -S $tmuxsocket new-session -s p$$ -d \"sleep .2\" >/dev/null 2>&1';" .
10192 $ENV{'PARALLEL_TMUX'}.
10193 " -S $tmuxsocket new-window -t p$$ -n $title";
10195 ::debug("tmux", "title len:", $l_tit, " act ", $l_act, " max ",
10196 $Limits::Command::line_max_len, " tot ",
10199 return "mkfifo $tmpfifo && $tmux ".
10203 "(".$actual_command.');'.
10204 # The triple print is needed - otherwise the testsuite fails
10205 q[ perl -e 'while($t++<3){ print $ARGV[0],"\n" }' $?h/$status >> ].
10207 "echo $title; echo \007Job finished at: `date`;sleep 10"
10210 # Read a / separated line: 0h/2 for csh, 2/0 for bash.
10211 # If csh the first will be 0h, so use the second as exit value.
10212 # Otherwise just use the first value as exit value.
10213 q{; exec perl -e '$/="/";$_=<>;$c=<>;unlink $ARGV; /(\d+)h/ and exit($1);exit$c' }.$tmpfifo;
10217 sub is_already_in_results($) {
10218 # Do we already have results for this job?
10220 # $job_already_run = bool whether there is output for this or not
10222 if($Global::csvsep) {
10224 # OK: You can look for job run in joblog
10228 "--resume --results .csv/.tsv/.json is not supported yet\n");
10229 # TODO read and parse the file
10233 my $out = $job->{'commandline'}->results_out();
10234 ::debug("run", "Test ${out}stdout", -e "${out}stdout", "\n");
10235 return(-e $out."stdout" or -f $out);
10238 sub is_already_in_joblog($) {
10240 return vec($Global::job_already_run,$job->seq(),1);
10243 sub set_job_in_joblog($) {
10245 vec($Global::job_already_run,$job->seq(),1) = 1;
10248 sub should_be_retried($) {
10249 # Should this job be retried?
10252 # 1 - job queued for retry
10254 if (not $opt::retries) {
10257 if(not $self->exitstatus() and not $self->exitsignal()) {
10258 # Completed with success. If there is a recorded failure: forget it
10259 $self->reset_failed_here();
10262 # The job failed. Should it be retried?
10263 $self->add_failed_here();
10264 my $retries = $self->{'commandline'}->
10265 replace_placeholders([$opt::retries],0,0);
10266 if($self->total_failed() == $retries) {
10267 # This has been retried enough
10270 # This command should be retried
10271 $self->set_endtime(undef);
10272 $self->reset_exitstatus();
10273 $Global::JobQueue->unget($self);
10274 ::debug("run", "Retry ", $self->seq(), "\n");
10281 my (%print_later,$job_seq_to_print);
10283 sub print_earlier_jobs($) {
10284 # Print jobs whose output is postponed due to --keep-order
10287 $print_later{$job->seq()} = $job;
10288 $job_seq_to_print ||= 1;
10289 my $returnsize = 0;
10290 ::debug("run", "Looking for: $job_seq_to_print ",
10291 "This: ", $job->seq(), "\n");
10292 for(;vec($Global::job_already_run,$job_seq_to_print,1);
10293 $job_seq_to_print++) {}
10294 while(my $j = $print_later{$job_seq_to_print}) {
10295 $returnsize += $j->print();
10296 if($j->endtime()) {
10297 # Job finished - look at the next
10298 delete $print_later{$job_seq_to_print};
10299 $job_seq_to_print++;
10302 # Job not finished yet - look at it again next round
10306 return $returnsize;
10311 # Print the output of the jobs
10315 ::debug("print", ">>joboutput ", $self->replaced(), "\n");
10317 # Nothing was printed to this job:
10318 # cleanup tmp files if --files was set
10319 ::rm($self->fh(1,"name"));
10321 if($opt::pipe and $self->virgin() and not $opt::tee) {
10322 # Skip --joblog, --dryrun, --verbose
10324 if($opt::ungroup) {
10325 # NULL returnsize = 0 returnsize
10326 $self->returnsize() or $self->add_returnsize(0);
10327 if($Global::joblog and defined $self->{'exitstatus'}) {
10328 # Add to joblog when finished
10329 $self->print_joblog();
10330 # Printing is only relevant for grouped/--line-buffer output.
10331 $opt::ungroup and return;
10334 # Check for disk full
10335 ::exit_if_disk_full();
10338 my $returnsize = $self->returnsize();
10339 for my $fdno (sort { $a <=> $b } keys %Global::fh) {
10340 # Sort by file descriptor numerically: 1,2,3,..,9,10,11
10341 $fdno == 0 and next;
10342 my $out_fh = $Global::fh{$fdno};
10343 my $in_fh = $self->fh($fdno,"r");
10345 if(not $Job::file_descriptor_warning_printed{$fdno}++) {
10346 # ::warning("File descriptor $fdno not defined\n");
10350 ::debug("print", "File descriptor $fdno (", $self->fh($fdno,"name"), "):\n");
10351 if($opt::linebuffer) {
10352 # Line buffered print out
10353 $self->print_linebuffer($fdno,$in_fh,$out_fh);
10354 } elsif($opt::files) {
10355 $self->print_files($fdno,$in_fh,$out_fh);
10356 } elsif($opt::results) {
10357 $self->print_results($fdno,$in_fh,$out_fh);
10359 $self->print_normal($fdno,$in_fh,$out_fh);
10363 ::debug("print", "<<joboutput\n");
10364 if(defined $self->{'exitstatus'}
10365 and not ($self->virgin() and $opt::pipe)) {
10366 if($Global::joblog and not $opt::sqlworker) {
10367 # Add to joblog when finished
10368 $self->print_joblog();
10370 if($opt::sqlworker and not $opt::results) {
10371 $Global::sql->output($self);
10373 if($Global::csvsep) {
10374 # Add output to CSV when finished
10375 $self->print_csv();
10377 if($Global::jsonout) {
10378 $self->print_json();
10381 return $returnsize - $self->returnsize();
10387 sub print_json($) {
10391 if(not $jsonmap{"\001"}) {
10392 map { $jsonmap{sprintf("%c",$_)} =
10393 sprintf '\u%04x', $_ } 0..31;
10397 $a =~ s/([\000-\037])/$jsonmap{$1}/g;
10402 if($Global::verbose <= 1) {
10403 $cmd = jsonquote($self->replaced());
10405 # Verbose level > 1: Print the rsync and stuff
10406 $cmd = jsonquote(join " ", @{$self->{'commandline'}});
10408 my $record_ref = $self->{'commandline'}{'arg_list_flat_orig'};
10410 # Memory optimization: Overwrite with the joined output
10411 $self->{'output'}{1} = join("", @{$self->{'output'}{1}});
10412 $self->{'output'}{2} = join("", @{$self->{'output'}{2}});
10415 # "Host": "/usr/bin/ssh foo@lo",
10416 # "Starttime": 1608344711.743,
10417 # "JobRuntime": 0.01,
10422 # "Command": "echo 1",
10430 printf($Global::csv_fh
10431 q({ "Seq": %s, "Host": "%s", "Starttime": %s, "JobRuntime": %s, ).
10432 q("Send": %s, "Receive": %s, "Exitval": %s, "Signal": %s, ).
10433 q("Command": "%s", "V": [ %s ], "Stdout": "%s", "Stderr": "%s" }).
10436 jsonquote($self->sshlogin()->string()),
10437 $self->starttime(), sprintf("%0.3f",$self->runtime()),
10438 $self->transfersize(), $self->returnsize(),
10439 $self->exitstatus(), $self->exitsignal(), $cmd,
10440 # \@$record_ref[1..$#$record_ref],
10442 map { '"'.jsonquote($_).'"' } @$record_ref[1..$#$record_ref],
10444 jsonquote($self->{'output'}{1}),
10445 jsonquote($self->{'output'}{2})
10451 my $header_printed;
10456 if($Global::verbose <= 1) {
10457 $cmd = $self->replaced();
10459 # Verbose level > 1: Print the rsync and stuff
10460 $cmd = join " ", @{$self->{'commandline'}};
10462 my $record_ref = $self->{'commandline'}{'arg_list_flat_orig'};
10464 if(not $header_printed) {
10467 # --header : => first value from column
10471 @V = (map { $Global::input_source_header{$i++} }
10472 @$record_ref[1..$#$record_ref]);
10475 @V = (map { $V++ } @$record_ref[1..$#$record_ref]);
10477 print $Global::csv_fh
10479 combine_ref("Seq", "Host", "Starttime", "JobRuntime",
10480 "Send", "Receive", "Exitval", "Signal", "Command",
10486 # Memory optimization: Overwrite with the joined output
10487 $self->{'output'}{1} = join("", @{$self->{'output'}{1}});
10488 $self->{'output'}{2} = join("", @{$self->{'output'}{2}});
10489 print $Global::csv_fh
10493 $self->sshlogin()->string(),
10494 $self->starttime(), sprintf("%0.3f",$self->runtime()),
10495 $self->transfersize(), $self->returnsize(),
10496 $self->exitstatus(), $self->exitsignal(), \$cmd,
10497 \@$record_ref[1..$#$record_ref],
10498 \$self->{'output'}{1},
10499 \$self->{'output'}{2})),"\n";
10503 sub combine_ref($) {
10504 # Inspired by Text::CSV_PP::_combine (by Makamaka Hannyaharamitu)
10506 my $sep = $Global::csvsep;
10510 my $must_be_quoted;
10511 for my $column (@part) {
10512 # Memory optimization: Content transferred as reference
10513 if(ref $column ne "SCALAR") {
10514 # Convert all columns to scalar references
10518 if(not defined $$column) {
10523 $must_be_quoted = 0;
10525 if($$column =~ s/$quot/$quot$quot/go){
10527 $must_be_quoted ||=1;
10529 if($$column =~ /[\s\Q$sep\E]/o){
10530 # Put quotes around if the column contains ,
10531 $must_be_quoted ||=1;
10534 $Global::use{"bytes"} ||= eval "use bytes; 1;";
10535 if ($$column =~ /\0/) {
10536 # Contains \0 => put quotes around
10537 $must_be_quoted ||=1;
10539 if($must_be_quoted){
10540 push @out, \$sep, \$quot, $column, \$quot;
10542 push @out, \$sep, $column;
10550 sub print_files($) {
10551 # Print the name of the file containing stdout on stdout
10554 # $opt::group = Print when job is done
10555 # $opt::linebuffer = Print ASAP
10558 my ($fdno,$in_fh,$out_fh) = @_;
10560 # If the job is dead: close printing fh. Needed for --compress
10561 close $self->fh($fdno,"w");
10562 if($? and $opt::compress) {
10563 ::error($opt::compress_program." failed.");
10564 $self->set_exitstatus(255);
10566 if($opt::compress) {
10567 # Kill the decompressor which will not be needed
10568 CORE::kill "TERM", $self->fh($fdno,"rpid");
10572 if($opt::pipe and $self->virgin()) {
10573 # Nothing was printed to this job:
10574 # cleanup unused tmp files because --files was set
10575 for my $fdno (1,2) {
10576 ::rm($self->fh($fdno,"name"));
10577 ::rm($self->fh($fdno,"unlink"));
10579 } elsif($fdno == 1 and $self->fh($fdno,"name")) {
10580 print $out_fh $self->tag(),$self->fh($fdno,"name"),"\n";
10581 if($Global::membuffer) {
10582 push @{$self->{'output'}{$fdno}},
10583 $self->tag(), $self->fh($fdno,"name");
10585 $self->add_returnsize(-s $self->fh($fdno,"name"));
10586 # Mark as printed - do not print again
10587 $self->set_fh($fdno,"name",undef);
10591 sub print_linebuffer($) {
10593 my ($fdno,$in_fh,$out_fh) = @_;
10594 if(defined $self->{'exitstatus'}) {
10595 # If the job is dead: close printing fh. Needed for --compress
10596 close $self->fh($fdno,"w");
10597 if($? and $opt::compress) {
10598 ::error($opt::compress_program." failed.");
10599 $self->set_exitstatus(255);
10601 if($opt::compress) {
10602 # Blocked reading in final round
10603 for my $fdno (1,2) {
10604 ::set_fh_blocking($self->fh($fdno,'r'));
10608 if(not $self->virgin()) {
10609 if($opt::files or ($opt::results and not $Global::csvsep)) {
10611 if($fdno == 1 and not $self->fh($fdno,"printed")) {
10612 print $out_fh $self->tag(),$self->fh($fdno,"name"),"\n";
10613 if($Global::membuffer) {
10614 push(@{$self->{'output'}{$fdno}}, $self->tag(),
10615 $self->fh($fdno,"name"));
10617 $self->set_fh($fdno,"printed",1);
10619 # No need for reading $in_fh, as it is from "cat >/dev/null"
10621 # Read halflines and print full lines
10622 my $outputlength = 0;
10623 my $halfline_ref = $self->{'halfline'}{$fdno};
10625 # 1310720 gives 1.2 GB/s
10626 # 131072 gives 0.9 GB/s
10627 while($rv = sysread($in_fh, $buf,1310720)) {
10628 $outputlength += $rv;
10630 # Treat both \n and \r as line end
10631 $i = ::max((rindex($buf,"\n")+1), (rindex($buf,"\r")+1));
10633 # One or more complete lines were found
10634 if($opt::tag or defined $opt::tagstring) {
10635 # Replace ^ with $tag within the full line
10636 if($Global::cache_replacement_eval) {
10637 # Replace with the same value for tag
10638 my $tag = $self->tag();
10639 unshift @$halfline_ref, $tag;
10640 # TODO --recend that can be partially in @$halfline_ref
10641 substr($buf,0,$i-1) =~
10642 s/(?<=[\n\r])(?=.|$)/$tag/gs;
10643 # The length changed, so find the new ending pos
10644 $i = ::max((rindex($buf,"\n")+1),
10645 (rindex($buf,"\r")+1));
10647 # Replace with freshly computed value of tag
10648 unshift @$halfline_ref, $self->tag();
10649 substr($buf,0,$i-1) =~
10650 s/(?<=[\n\r])(?=.|$)/$self->tag()/gse;
10651 # The length changed, so find the new ending pos
10652 $i = ::max((rindex($buf,"\n")+1),
10653 (rindex($buf,"\r")+1));
10656 # Print the partial line (halfline) and the last half
10657 print $out_fh @$halfline_ref, substr($buf,0,$i);
10658 # Buffer in memory for SQL and CSV-output
10659 if($Global::membuffer) {
10660 push(@{$self->{'output'}{$fdno}},
10661 @$halfline_ref, substr($buf,0,$i));
10663 # Remove the printed part by keeping the unprinted part
10664 @$halfline_ref = (substr($buf,$i));
10666 # No newline, so append to the halfline
10667 push @$halfline_ref, $buf;
10670 $self->add_returnsize($outputlength);
10672 if(defined $self->{'exitstatus'}) {
10673 if($opt::files or ($opt::results and not $Global::csvsep)) {
10674 $self->add_returnsize(-s $self->fh($fdno,"name"));
10676 # If the job is dead: print the remaining partial line
10678 my $halfline_ref = $self->{'halfline'}{$fdno};
10679 if(grep /./, @$halfline_ref) {
10680 my $returnsize = 0;
10681 for(@{$self->{'halfline'}{$fdno}}) {
10682 $returnsize += length $_;
10684 $self->add_returnsize($returnsize);
10685 if($opt::tag or defined $opt::tagstring) {
10686 # Prepend $tag the the remaining half line
10687 unshift @$halfline_ref, $self->tag();
10689 # Print the partial line (halfline)
10690 print $out_fh @{$self->{'halfline'}{$fdno}};
10691 # Buffer in memory for SQL and CSV-output
10692 if($Global::membuffer) {
10693 push(@{$self->{'output'}{$fdno}}, @$halfline_ref);
10695 @$halfline_ref = ();
10698 if($self->fh($fdno,"rpid") and
10699 CORE::kill 0, $self->fh($fdno,"rpid")) {
10700 # decompress still running
10702 # decompress done: close fh
10704 if($? and $opt::compress) {
10705 ::error($opt::decompress_program." failed.");
10706 $self->set_exitstatus(255);
10713 sub free_ressources() {
10715 if(not $opt::ungroup) {
10717 for my $fdno (sort { $a <=> $b } keys %Global::fh) {
10718 $fh = $self->fh($fdno,"w");
10720 $fh = $self->fh($fdno,"r");
10726 sub print_parset($) {
10727 # Wrap output with shell script code to set as variables
10729 my ($fdno,$in_fh,$out_fh) = @_;
10730 my $outputlength = 0;
10732 ::debug("parset","print $Global::parset");
10733 if($Global::parset eq "assoc") {
10734 # eval "`echo 'declare -A myassoc; myassoc=(
10736 # [$'a\tb']=$'a\tb\tc ddd'
10739 print '[',::Q($self->{'commandline'}->
10740 replace_placeholders(["\257<\257>"],0,0)),']=';
10741 } elsif($Global::parset eq "array") {
10742 # eval "`echo 'myassoc=(
10747 } elsif($Global::parset eq "var") {
10748 # var=$'a\tb\tc ddd'
10749 if(not @Global::parset_vars) {
10750 ::error("Too few named destination variables");
10751 ::wait_and_exit(255);
10753 print shift @Global::parset_vars,"=";
10756 my $tag = $self->tag();
10759 $outputlength += length $_;
10760 # Tag lines with \r, too
10761 $_ =~ s/(?<=[\r])(?=.|$)/$tag/gs;
10762 push @out, $tag,$_;
10764 # Remove last newline
10765 # This often makes it easier to use the output in shell
10766 @out and ${out[$#out]} =~ s/\n$//s;
10767 print ::Q(join("",@out)),"\n";
10768 return $outputlength;
10771 sub print_normal($) {
10773 my ($fdno,$in_fh,$out_fh) = @_;
10775 close $self->fh($fdno,"w");
10776 if($? and $opt::compress) {
10777 ::error($opt::compress_program." failed.");
10778 $self->set_exitstatus(255);
10780 if(not $self->virgin()) {
10782 # $in_fh is now ready for reading at position 0
10783 my $outputlength = 0;
10786 if($Global::parset and $fdno == 1) {
10787 $outputlength += $self->print_parset($fdno,$in_fh,$out_fh);
10788 } elsif($opt::tag or $opt::tagstring) {
10789 # Read line by line
10791 my $tag = $self->tag();
10793 $outputlength += length $_;
10794 # Tag lines with \r, too
10795 $_ =~ s/(?<=[\r])(?=.|$)/$tag/gs;
10796 print $out_fh $tag,$_;
10797 if($Global::membuffer) {
10798 push @{$self->{'output'}{$fdno}}, $tag, $_;
10802 # Most efficient way of copying data from $in_fh to $out_fh
10803 while(sysread($in_fh,$buf,131072)) {
10804 print $out_fh $buf;
10805 $outputlength += length $buf;
10806 if($Global::membuffer) {
10807 push @{$self->{'output'}{$fdno}}, $buf;
10812 $self->add_returnsize($outputlength);
10815 if($? and $opt::compress) {
10816 ::error($opt::decompress_program." failed.");
10817 $self->set_exitstatus(255);
10822 sub print_results($) {
10824 my ($fdno,$in_fh,$out_fh) = @_;
10826 close $self->fh($fdno,"w");
10827 if($? and $opt::compress) {
10828 ::error($opt::compress_program." failed.");
10829 $self->set_exitstatus(255);
10831 if(not $self->virgin()) {
10833 # $in_fh is now ready for reading at position 0
10834 my $outputlength = 0;
10837 if($Global::membuffer) {
10838 # Read data into membuffer
10839 if($opt::tag or $opt::tagstring) {
10840 # Read line by line
10842 my $tag = $self->tag();
10844 $outputlength += length $_;
10845 # Tag lines with \r, too
10846 $_ =~ s/(?<=[\r])(?=.|$)/$tag/gs;
10847 push @{$self->{'output'}{$fdno}}, $tag, $_;
10850 # Most efficient way of copying data from $in_fh to $out_fh
10851 while(sysread($in_fh,$buf,131072)) {
10852 $outputlength += length $buf;
10853 push @{$self->{'output'}{$fdno}}, $buf;
10857 # Not membuffer: No need to read the file
10858 if($opt::compress) {
10859 $outputlength = -1;
10861 # Determine $outputlength = file length
10862 seek($in_fh, 0, 2) || ::die_bug("cannot seek result");
10863 $outputlength = tell($in_fh);
10867 $self->add_returnsize($outputlength);
10870 if($? and $opt::compress) {
10871 ::error($opt::decompress_program." failed.");
10872 $self->set_exitstatus(255);
10877 sub print_joblog($) {
10880 if($Global::verbose <= 1) {
10881 $cmd = $self->replaced();
10883 # Verbose level > 1: Print the rsync and stuff
10884 $cmd = $self->wrapped();
10886 # Newlines make it hard to parse the joblog
10888 print $Global::joblog
10889 join("\t", $self->seq(), $self->sshlogin()->string(),
10890 $self->starttime(), sprintf("%10.3f",$self->runtime()),
10891 $self->transfersize(), $self->returnsize(),
10892 $self->exitstatus(), $self->exitsignal(), $cmd
10894 flush $Global::joblog;
10895 $self->set_job_in_joblog();
10903 # color combinations that are readable: black/white text
10904 # on colored background, but not white on yellow
10906 # Force each color code to have the same length in chars
10907 # This will make \t work as expected
10908 ((map { [sprintf("%03d",$_),"000"] }
10909 6..7,9..11,13..15,40..51,75..87,113..123,147..159,
10910 171..231,249..254),
10911 (map { [sprintf("%03d",$_),231] }
10912 1..9,12..13,16..45,52..81,88..116,124..151,153,
10913 160..180,182..185,187..189,196..214,232..252,
10915 # reorder list so adjacent colors are dissimilar
10916 # %7 and %17 were found experimentally
10918 sort { ($b%7 <=> $a%7) or ($a%17 <=> $b%17) } 0..$#color
10922 if(not defined $self->{'tag'} or not $Global::cache_replacement_eval) {
10923 if($opt::tag or defined $opt::tagstring) {
10924 if($Global::color) {
10925 if(not @color) { init_color() }
10926 # Choose a value based on the seq
10927 my $col = @color[$self->seq() % ($#color+1)];
10928 $self->{'tag'} = "\033[48;5;".$col->[0].
10929 ";38;5;".$col->[1]."m".
10930 ($self->{'commandline'}->
10931 replace_placeholders([$opt::tagstring],0,0)).
10934 $self->{'tag'} = $self->{'commandline'}->
10935 replace_placeholders([$opt::tagstring],0,0)."\t";
10938 $self->{'tag'} = "";
10941 return $self->{'tag'};
10945 sub hostgroups($) {
10947 if(not defined $self->{'hostgroups'}) {
10948 $self->{'hostgroups'} =
10949 $self->{'commandline'}->{'arg_list'}[0][0]->{'hostgroups'};
10951 return @{$self->{'hostgroups'}};
10954 sub exitstatus($) {
10956 return $self->{'exitstatus'};
10959 sub set_exitstatus($$) {
10961 my $exitstatus = shift;
10963 # Overwrite status if non-zero
10964 $self->{'exitstatus'} = $exitstatus;
10966 # Set status but do not overwrite
10967 # Status may have been set by --timeout
10968 $self->{'exitstatus'} ||= $exitstatus;
10970 $opt::sqlworker and
10971 $Global::sql->update("SET Exitval = ? WHERE Seq = ".$self->seq(),
10975 sub reset_exitstatus($) {
10977 undef $self->{'exitstatus'};
10980 sub exitsignal($) {
10982 return $self->{'exitsignal'};
10985 sub set_exitsignal($$) {
10987 my $exitsignal = shift;
10988 $self->{'exitsignal'} = $exitsignal;
10989 $opt::sqlworker and
10990 $Global::sql->update("SET _Signal = ? WHERE Seq = ".$self->seq(),
10997 sub should_we_halt {
10998 # Should we halt? Immediately? Gracefully?
11002 if($job->exitstatus() or $job->exitsignal()) {
11004 $Global::exitstatus++;
11005 $Global::total_failed++;
11006 if($Global::halt_fail) {
11007 ::status("$Global::progname: This job failed:",
11009 $limit = $Global::total_failed;
11011 } elsif($Global::halt_success) {
11012 ::status("$Global::progname: This job succeeded:",
11014 $limit = $Global::total_completed - $Global::total_failed;
11016 if($Global::halt_done) {
11017 ::status("$Global::progname: This job finished:",
11019 $limit = $Global::total_completed;
11021 if(not defined $limit) {
11024 # --halt # => 1..100 (number of jobs failed, 101 means > 100)
11025 # --halt % => 1..100 (pct of jobs failed)
11026 if($Global::halt_pct and not $Global::halt_count) {
11027 $total_jobs ||= $Global::JobQueue->total_jobs();
11028 # From the pct compute the number of jobs that must fail/succeed
11029 $Global::halt_count = $total_jobs * $Global::halt_pct;
11031 if($limit >= $Global::halt_count) {
11032 # At least N jobs have failed/succeded/completed
11033 # or at least N% have failed/succeded/completed
11034 # So we should prepare for exit
11035 if($Global::halt_fail or $Global::halt_done) {
11037 if(not defined $Global::halt_exitstatus) {
11038 if($Global::halt_pct) {
11039 # --halt now,fail=X% or soon,fail=X%
11040 # --halt now,done=X% or soon,done=X%
11041 $Global::halt_exitstatus =
11042 ::ceil($Global::total_failed / $total_jobs * 100);
11043 } elsif($Global::halt_count) {
11044 # --halt now,fail=X or soon,fail=X
11045 # --halt now,done=X or soon,done=X
11046 $Global::halt_exitstatus =
11047 ::min($Global::total_failed,101);
11049 if($Global::halt_count and $Global::halt_count == 1) {
11050 # --halt now,fail=1 or soon,fail=1
11051 # --halt now,done=1 or soon,done=1
11052 # Emulate Bash's +128 if there is a signal
11053 $Global::halt_exitstatus =
11054 ($job->exitstatus()
11056 $job->exitsignal() ? $job->exitsignal() + 128 : 0);
11059 ::debug("halt","Pct: ",$Global::halt_pct,
11060 " count: ",$Global::halt_count,
11061 " status: ",$Global::halt_exitstatus,"\n");
11062 } elsif($Global::halt_success) {
11063 $Global::halt_exitstatus = 0;
11065 if($Global::halt_when eq "soon") {
11066 $Global::start_no_new_jobs ||= 1;
11067 if(scalar(keys %Global::running) > 0) {
11068 # Only warn if there are more jobs running
11070 ("$Global::progname: Starting no more jobs. ".
11071 "Waiting for ". (keys %Global::running).
11072 " jobs to finish.");
11075 return($Global::halt_when);
11082 package CommandLine;
11087 my $commandref = shift;
11088 $commandref || die;
11089 my $arg_queue = shift;
11090 my $context_replace = shift;
11091 my $max_number_of_args = shift; # for -N and normal (-n1)
11092 my $transfer_files = shift;
11093 my $return_files = shift;
11094 my $template_names = shift;
11095 my $template_contents = shift;
11096 my $replacecount_ref = shift;
11097 my $len_ref = shift;
11098 my %replacecount = %$replacecount_ref;
11099 my %len = %$len_ref;
11100 for (keys %$replacecount_ref) {
11101 # Total length of this replacement string {} replaced with all args
11105 'command' => $commandref,
11109 'arg_list_flat' => [],
11110 'arg_list_flat_orig' => [undef],
11111 'arg_queue' => $arg_queue,
11112 'max_number_of_args' => $max_number_of_args,
11113 'replacecount' => \%replacecount,
11114 'context_replace' => $context_replace,
11115 'transfer_files' => $transfer_files,
11116 'return_files' => $return_files,
11117 'template_names' => $template_names,
11118 'template_contents' => $template_contents,
11119 'replaced' => undef,
11120 }, ref($class) || $class;
11123 sub flush_cache() {
11125 for my $arglist (@{$self->{'arg_list'}}) {
11126 for my $arg (@$arglist) {
11127 $arg->flush_cache();
11130 $self->{'arg_queue'}->flush_cache();
11131 $self->{'replaced'} = undef;
11136 return $self->{'seq'};
11141 $self->{'seq'} = shift;
11145 # Find the number of a free job slot and return it
11147 # @Global::slots - list with free jobslots
11149 # $jobslot = number of jobslot
11151 if(not $self->{'slot'}) {
11152 if(not @Global::slots) {
11153 # $max_slot_number will typically be $Global::max_jobs_running
11154 push @Global::slots, ++$Global::max_slot_number;
11156 $self->{'slot'} = shift @Global::slots;
11158 return $self->{'slot'};
11162 my $already_spread;
11163 my $darwin_max_len;
11166 # Add arguments from arg_queue until the number of arguments or
11167 # max line length is reached
11169 # $Global::minimal_command_line_length
11172 # $Global::JobQueue
11175 # $Global::max_jobs_running
11179 my $max_len = $Global::minimal_command_line_length
11180 || Limits::Command::max_length();
11181 if($^O eq "darwin") {
11182 # Darwin's limit is affected by:
11183 # * number of environment names (variables+functions)
11184 # * size of environment
11185 # * the length of arguments:
11186 # a one-char argument lowers the limit by 5
11187 # To be safe assume all arguments are one-char
11188 # The max_len is cached between runs, but if the size of
11189 # the environment is different we need to recompute the
11190 # usable max length for this run of GNU Parallel
11191 # See https://unix.stackexchange.com/a/604943/2972
11192 if(not $darwin_max_len) {
11193 my $envc = (keys %ENV);
11194 my $envn = length join"",(keys %ENV);
11195 my $envv = length join"",(values %ENV);
11196 $darwin_max_len = -146+($max_len - $envn - $envv) - $envc*10;
11198 "length: $darwin_max_len ".
11199 "3+($max_len - $envn - $envv)/5 - $envc*2");
11201 $max_len = $darwin_max_len;
11203 if($opt::cat or $opt::fifo) {
11204 # Get the empty arg added by --pipepart (if any)
11205 $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get();
11206 # $PARALLEL_TMP will point to a tempfile that will be used as {}
11207 $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->
11208 unget([Arg->new('$PARALLEL_TMP')]);
11210 while (not $self->{'arg_queue'}->empty()) {
11211 $next_arg = $self->{'arg_queue'}->get();
11212 if(not defined $next_arg) {
11215 $self->push($next_arg);
11216 if($self->len() >= $max_len) {
11217 # Command length is now > max_length
11218 # If there are arguments: remove the last
11219 # If there are no arguments: Error
11220 # TODO stuff about -x opt_x
11221 if($self->number_of_args() > 1) {
11222 # There is something to work on
11223 $self->{'arg_queue'}->unget($self->pop());
11226 my $args = join(" ", map { $_->orig() } @$next_arg);
11227 ::error("Command line too long (".
11228 $self->len(). " >= ".
11231 $self->{'arg_queue'}->arg_number().
11233 ((length $args > 50) ?
11234 (substr($args,0,50))."..." :
11236 $self->{'arg_queue'}->unget($self->pop());
11237 ::wait_and_exit(255);
11241 if(defined $self->{'max_number_of_args'}) {
11242 if($self->number_of_args() >= $self->{'max_number_of_args'}) {
11247 if(($opt::m or $opt::X) and not $already_spread
11248 and $self->{'arg_queue'}->empty() and $Global::max_jobs_running) {
11249 # -m or -X and EOF => Spread the arguments over all jobslots
11250 # (unless they are already spread)
11251 $already_spread ||= 1;
11252 if($self->number_of_args() > 1) {
11253 $self->{'max_number_of_args'} =
11254 ::ceil($self->number_of_args()/$Global::max_jobs_running);
11255 $Global::JobQueue->{'commandlinequeue'}->{'max_number_of_args'} =
11256 $self->{'max_number_of_args'};
11257 $self->{'arg_queue'}->unget($self->pop_all());
11258 while($self->number_of_args() < $self->{'max_number_of_args'}) {
11259 $self->push($self->{'arg_queue'}->get());
11262 $Global::JobQueue->flush_total_jobs();
11265 if($opt::sqlmaster) {
11266 # Insert the V1..Vn for this $seq in SQL table instead of generating one
11267 $Global::sql->insert_records($self->seq(), $self->{'command'},
11268 $self->{'arg_list_flat_orig'});
11274 # Add one or more records as arguments
11277 my $record = shift;
11278 push @{$self->{'arg_list_flat_orig'}}, map { $_->orig() } @$record;
11279 push @{$self->{'arg_list_flat'}}, @$record;
11280 push @{$self->{'arg_list'}}, $record;
11281 # Make @arg available for {= =}
11282 *Arg::arg = $self->{'arg_list_flat_orig'};
11284 my $quote_arg = ($Global::quote_replace and not $Global::quoting);
11285 for my $perlexpr (keys %{$self->{'replacecount'}}) {
11286 if($perlexpr =~ /^(\d+) /) {
11288 defined($record->[$1-1]) or next;
11289 $self->{'len'}{$perlexpr} +=
11290 length $record->[$1-1]->replace($perlexpr,$quote_arg,$self);
11292 for my $arg (@$record) {
11294 $self->{'len'}{$perlexpr} +=
11295 length $arg->replace($perlexpr,$quote_arg,$self);
11303 # Remove last argument
11307 my $record = pop @{$self->{'arg_list'}};
11308 # pop off arguments from @$record
11309 splice @{$self->{'arg_list_flat_orig'}}, -($#$record+1), $#$record+1;
11310 splice @{$self->{'arg_list_flat'}}, -($#$record+1), $#$record+1;
11311 my $quote_arg = ($Global::quote_replace and not $Global::quoting);
11312 for my $perlexpr (keys %{$self->{'replacecount'}}) {
11313 if($perlexpr =~ /^(\d+) /) {
11315 defined($record->[$1-1]) or next;
11316 $self->{'len'}{$perlexpr} -=
11317 length $record->[$1-1]->replace($perlexpr,$quote_arg,$self);
11319 for my $arg (@$record) {
11321 $self->{'len'}{$perlexpr} -=
11322 length $arg->replace($perlexpr,$quote_arg,$self);
11331 # Remove all arguments and zeros the length of replacement perlexpr
11335 my @popped = @{$self->{'arg_list'}};
11336 for my $perlexpr (keys %{$self->{'replacecount'}}) {
11337 $self->{'len'}{$perlexpr} = 0;
11339 $self->{'arg_list'} = [];
11340 $self->{'arg_list_flat_orig'} = [undef];
11341 $self->{'arg_list_flat'} = [];
11345 sub number_of_args($) {
11346 # The number of records
11348 # number of records
11350 # This is really the number of records
11351 return $#{$self->{'arg_list'}}+1;
11354 sub number_of_recargs($) {
11355 # The number of args in records
11357 # number of args records
11360 my $nrec = scalar @{$self->{'arg_list'}};
11362 $sum = $nrec * (scalar @{$self->{'arg_list'}[0]});
11367 sub args_as_string($) {
11369 # all unmodified arguments joined with ' ' (similar to {})
11371 return (join " ", map { $_->orig() }
11372 map { @$_ } @{$self->{'arg_list'}});
11375 sub results_out($) {
11376 sub max_file_name_length {
11377 # Figure out the max length of a subdir
11378 # TODO and the max total length
11379 # Ext4 = 255,130816
11381 # $Global::max_file_length is set
11383 # $Global::max_file_length
11384 my $testdir = shift;
11386 my $upper = 100_000_000;
11387 # Dir length of 8 chars is supported everywhere
11389 my $dir = "x"x$len;
11391 rmdir($testdir."/".$dir);
11394 } while ($len < $upper and mkdir $testdir."/".$dir);
11395 # Then search for the actual max length between $len/16 and $len
11398 while($max-$min > 5) {
11399 # If we are within 5 chars of the exact value:
11400 # it is not worth the extra time to find the exact value
11401 my $test = int(($min+$max)/2);
11403 if(mkdir $testdir."/".$dir) {
11404 rmdir($testdir."/".$dir);
11410 $Global::max_file_length = $min;
11415 my $out = $self->replace_placeholders([$opt::results],0,0);
11416 if($out eq $opt::results) {
11417 # $opt::results simple string: Append args_as_dirname
11418 my $args_as_dirname = $self->args_as_dirname();
11419 # Output in: prefix/name1/val1/name2/val2/stdout
11420 $out = $opt::results."/".$args_as_dirname;
11421 if(-d $out or eval{ File::Path::mkpath($out); }) {
11424 # mkpath failed: Argument probably too long.
11425 # Set $Global::max_file_length, which will keep the individual
11426 # dir names shorter than the max length
11427 max_file_name_length($opt::results);
11428 $args_as_dirname = $self->args_as_dirname();
11429 # prefix/name1/val1/name2/val2/
11430 $out = $opt::results."/".$args_as_dirname;
11431 File::Path::mkpath($out);
11435 if($out =~ m:/$:) {
11437 if(-d $out or eval{ File::Path::mkpath($out); }) {
11440 ::error("Cannot make dir '$out'.");
11441 ::wait_and_exit(255);
11445 File::Path::mkpath($1);
11451 sub args_as_dirname($) {
11453 # all unmodified arguments joined with '/' (similar to {})
11454 # \t \0 \\ and / are quoted as: \t \0 \\ \_
11455 # If $Global::max_file_length: Keep subdirs < $Global::max_file_length
11459 for my $rec_ref (@{$self->{'arg_list'}}) {
11460 # If headers are used, sort by them.
11461 # Otherwise keep the order from the command line.
11462 my @header_indexes_sorted = header_indexes_sorted($#$rec_ref+1);
11463 for my $n (@header_indexes_sorted) {
11465 $Global::input_source_header{$n},
11467 # \t \0 \\ and / are quoted as: \t \0 \\ \_
11472 if($Global::max_file_length) {
11473 # Keep each subdir shorter than the longest
11474 # allowed file name
11475 $s = substr($s,0,$Global::max_file_length);
11478 $rec_ref->[$n-1]->orig());
11481 return join "/", @res;
11484 sub header_indexes_sorted($) {
11485 # Sort headers first by number then by name.
11486 # E.g.: 1a 1b 11a 11b
11488 # Indexes of %Global::input_source_header sorted
11489 my $max_col = shift;
11491 no warnings 'numeric';
11492 for my $col (1 .. $max_col) {
11493 # Make sure the header is defined. If it is not: use column number
11494 if(not defined $Global::input_source_header{$col}) {
11495 $Global::input_source_header{$col} = $col;
11498 my @header_indexes_sorted = sort {
11499 # Sort headers numerically then asciibetically
11500 $Global::input_source_header{$a} <=> $Global::input_source_header{$b}
11502 $Global::input_source_header{$a} cmp $Global::input_source_header{$b}
11504 return @header_indexes_sorted;
11510 # The length of the command line with args substituted
11513 # Add length of the original command with no args
11514 # Length of command w/ all replacement args removed
11515 $len += $self->{'len'}{'noncontext'} + @{$self->{'command'}} -1;
11516 ::debug("length", "noncontext + command: $len\n");
11517 # MacOS has an overhead of 8 bytes per argument
11518 my $darwin = ($^O eq "darwin") ? 8 : 0;
11519 my $recargs = $self->number_of_recargs();
11520 if($self->{'context_replace'}) {
11521 # Context is duplicated for each arg
11522 $len += $recargs * $self->{'len'}{'context'};
11523 for my $replstring (keys %{$self->{'replacecount'}}) {
11524 # If the replacements string is more than once: mulitply its length
11525 $len += $self->{'len'}{$replstring} *
11526 $self->{'replacecount'}{$replstring};
11527 ::debug("length", $replstring, " ", $self->{'len'}{$replstring}, "*",
11528 $self->{'replacecount'}{$replstring}, "\n");
11530 # echo 11 22 33 44 55 66 77 88 99 1010
11531 # echo 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10
11533 ::debug("length", "Ctxgrp: ", $self->{'len'}{'contextgroups'},
11534 " Groups: ", $self->{'len'}{'noncontextgroups'}, "\n");
11535 # Add space between context groups
11536 $len += ($recargs-1) * ($self->{'len'}{'contextgroups'});
11538 $len += $recargs * $self->{'len'}{'contextgroups'} * $darwin;
11541 # Each replacement string may occur several times
11542 # Add the length for each time
11543 $len += 1*$self->{'len'}{'context'};
11544 ::debug("length", "context+noncontext + command: $len\n");
11545 for my $replstring (keys %{$self->{'replacecount'}}) {
11546 # (space between recargs + length of replacement)
11547 # * number this replacement is used
11548 $len += ($recargs -1 + $self->{'len'}{$replstring}) *
11549 $self->{'replacecount'}{$replstring};
11551 $len += ($recargs * $self->{'replacecount'}{$replstring}
11556 if(defined $Global::parallel_env) {
11557 # If we are using --env, add the prefix for that, too.
11558 $len += length $Global::parallel_env;
11560 if($Global::quoting) {
11561 # Pessimistic length if -q is set
11562 # Worse than worst case: ' => "'" + " => '"'
11563 # TODO can we count the number of expanding chars?
11564 # and count them in arguments, too?
11567 if(@opt::shellquote) {
11568 # Pessimistic length if --shellquote is set
11569 # Worse than worst case: ' => "'"
11570 for(@opt::shellquote) {
11575 if(@opt::sshlogin) {
11576 # Pessimistic length if remote
11577 # Worst case is BASE64 encoding 3 bytes -> 4 bytes
11578 $len = int($len*4/3);
11586 # $Global::quote_replace
11589 # $replaced = command with place holders replaced and prepended
11591 if(not defined $self->{'replaced'}) {
11592 # Don't quote arguments if the input is the full command line
11593 my $quote_arg = ($Global::quote_replace and not $Global::quoting);
11594 # or if ($opt::cat or $opt::pipe) as they use $PARALLEL_TMP
11595 $quote_arg = ($opt::cat || $opt::fifo) ? 0 : $quote_arg;
11596 $self->{'replaced'} = $self->
11597 replace_placeholders($self->{'command'},$Global::quoting,
11599 my $len = length $self->{'replaced'};
11600 if ($len != $self->len()) {
11601 ::debug("length", $len, " != ", $self->len(),
11602 " ", $self->{'replaced'}, "\n");
11604 ::debug("length", $len, " == ", $self->len(),
11605 " ", $self->{'replaced'}, "\n");
11608 return $self->{'replaced'};
11611 sub replace_placeholders($$$$) {
11612 # Replace foo{}bar with fooargbar
11614 # $targetref = command as shell words
11615 # $quote = should everything be quoted?
11616 # $quote_arg = should replaced arguments be quoted?
11618 # @Arg::arg = arguments as strings to be use in {= =}
11620 # @target with placeholders replaced
11622 my $targetref = shift;
11624 my $quote_arg = shift;
11627 # Token description:
11628 # \0spc = unquoted space
11629 # \0end = last token element
11630 # \0ign = dummy token to be ignored
11631 # \257<...\257> = replacement expression
11632 # " " = quoted space, that splits -X group
11633 # text = normal text - possibly part of -X group
11635 my @tokens = grep { length $_ > 0 } map {
11637 # \257<...\257> or space
11640 # Split each space/tab into a token
11641 split /(?=\s)|(?<=\s)/
11644 # Split \257< ... \257> into own token
11645 map { split /(?=\257<)|(?<=\257>)/ }
11646 # Insert "\0spc" between every element
11647 # This space should never be quoted
11648 map { $spacer++ ? ("\0spc",$_) : $_ }
11649 map { $_ eq "" ? "\0empty" : $_ }
11653 # @tokens is empty: Return empty array
11656 ::debug("replace", "Tokens ".join":",@tokens,"\n");
11657 # Make it possible to use $arg[2] in {= =}
11658 *Arg::arg = $self->{'arg_list_flat_orig'};
11660 # $self->{'arg_list'} = [ [Arg11, Arg12], [Arg21, Arg22], [Arg31, Arg32] ]
11661 # $self->{'arg_list_flat'} = [ Arg11, Arg12, Arg21, Arg22, Arg31, Arg32 ]
11662 if(not @{$self->{'arg_list_flat'}}) {
11663 @{$self->{'arg_list_flat'}} = Arg->new("");
11665 my $argref = $self->{'arg_list_flat'};
11666 # Number of arguments - used for positional arguments
11667 my $n = $#$argref+1;
11669 # $self is actually a CommandLine-object,
11670 # but it looks nice to be able to say {= $job->slot() =}
11672 # @replaced = tokens with \257< \257> replaced
11674 if($self->{'context_replace'}) {
11676 for my $t (@tokens,"\0end") {
11677 # \0end = last token was end of tokens.
11678 if($t eq "\t" or $t eq " " or $t eq "\0end" or $t eq "\0spc") {
11679 # Context group complete: Replace in it
11680 if(grep { /^\257</ } @ctxgroup) {
11681 # Context group contains a replacement string:
11682 # Copy once per arg
11683 my $space = "\0ign";
11684 for my $arg (@$argref) {
11685 my $normal_replace;
11687 # Put unquoted space before each context group
11689 CORE::push @replaced, $space, map {
11692 s{\257<(-?\d+)?(.*)\257>}
11695 # Positional replace
11696 # Find the relevant arg and replace it
11697 ($argref->[$1 > 0 ? $1-1 : $n+$1] ? # If defined: replace
11698 $argref->[$1 > 0 ? $1-1 : $n+$1]->
11699 replace($2,$quote_arg,$self)
11703 $normal_replace ||= 1;
11704 ($arg ? $arg->replace($2,$quote_arg,$self) : "");
11707 # Token is \257<..\257>
11709 if($Global::escape_string_present) {
11710 # Command line contains \257:
11711 # Unescape it \257\256 => \257
11712 $a =~ s/\257\256/\257/g;
11717 $normal_replace or last;
11721 # Context group has no a replacement string: Copy it once
11722 CORE::push @replaced, map {
11723 $Global::escape_string_present and s/\257\256/\257/g; $_;
11726 # New context group
11729 if($t eq "\0spc" or $t eq " ") {
11730 CORE::push @replaced,$t;
11732 CORE::push @ctxgroup,$t;
11739 # repquote = no if {} first on line, no if $quote, yes otherwise
11740 for my $t (@tokens) {
11741 if($t =~ /^\257</) {
11742 my $space = "\0ign";
11743 for my $arg (@$argref) {
11744 my $normal_replace;
11747 s{\257<(-?\d+)?(.*)\257>}
11750 # Positional replace
11751 # Find the relevant arg and replace it
11752 ($argref->[$1 > 0 ? $1-1 : $n+$1] ?
11753 # If defined: replace
11754 $argref->[$1 > 0 ? $1-1 : $n+$1]->
11755 replace($2,$quote_arg,$self)
11759 $normal_replace ||= 1;
11760 ($arg ? $arg->replace($2,$quote_arg,$self) : "");
11763 CORE::push @replaced, $space, $a;
11764 $normal_replace or last;
11769 CORE::push @replaced, map {
11770 $Global::escape_string_present and s/\257\256/\257/g; $_;
11776 ::debug("replace","Replaced: ".join":",@replaced,"\n");
11778 # Put tokens into groups that may be quoted.
11781 for (map { $_ eq "\0empty" ? "" : $_ }
11782 grep { $_ ne "\0ign" and $_ ne "\0noarg" and $_ ne "'\0noarg'" }
11783 @replaced, "\0end") {
11784 if($_ eq "\0spc" or $_ eq "\0end") {
11785 # \0spc splits quotable groups
11788 CORE::push @quoted, ::Q(join"",@quotegroup);;
11791 CORE::push @quoted, join"",@quotegroup;
11795 CORE::push @quotegroup, $_;
11798 ::debug("replace","Quoted: ".join":",@quoted,"\n");
11799 return wantarray ? @quoted : "@quoted";
11805 $self->{'skip'} = 1;
11809 package CommandLineQueue;
11813 my $commandref = shift;
11814 my $read_from = shift;
11815 my $context_replace = shift || 0;
11816 my $max_number_of_args = shift;
11817 my $transfer_files = shift;
11818 my $return_files = shift;
11819 my $template_names = shift;
11820 my $template_contents = shift;
11823 my ($replacecount_ref, $len_ref);
11824 my @command = @$commandref;
11826 # Replace replacement strings with {= perl expr =}
11827 # '{=' 'perlexpr' '=}' => '{= perlexpr =}'
11828 @command = merge_rpl_parts(@command);
11830 # Protect matching inside {= perl expr =}
11831 # by replacing {= and =} with \257< and \257>
11832 # in options that can contain replacement strings:
11833 # @command, --transferfile, --return,
11834 # --tagstring, --workdir, --results
11835 for(@command, @$transfer_files, @$return_files,
11836 @$template_names, @$template_contents,
11837 $opt::tagstring, $opt::workdir, $opt::results, $opt::retries,
11839 # Skip if undefined
11841 # Escape \257 => \257\256
11842 $Global::escape_string_present += s/\257/\257\256/g;
11843 # Needs to match rightmost left parens (Perl defaults to leftmost)
11844 # to deal with: {={==} and {={==}=}
11845 # Replace {= -> \257< and =} -> \257>
11847 # Complex way to do:
11848 # s/{=(.*)=}/\257<$1\257>/g
11849 # which would not work
11850 s[\Q$Global::parensleft\E # Match {=
11851 # Match . unless the next string is {= or =}
11852 # needed to force matching the shortest {= =}
11853 ((?:(?! \Q$Global::parensleft\E|\Q$Global::parensright\E ).)*?)
11854 \Q$Global::parensright\E ] # Match =}
11856 for my $rpl (sort { length $b <=> length $a } keys %Global::rpl) {
11857 # Replace long --rpl's before short ones, as a short may be a
11858 # substring of a long:
11859 # --rpl '% s/a/b/' --rpl '%% s/b/a/'
11861 # Replace the shorthand string (--rpl)
11862 # with the {= perl expr =}
11864 # Avoid searching for shorthand strings inside existing {= perl expr =}
11866 # Replace $$1 in {= perl expr =} with groupings in shorthand string
11868 # --rpl '{/(\.\S+)/(\.\S+)} s/$$1/$$2/g;'
11869 # echo {/.tar/.gz} ::: UU.tar.gz
11870 my ($prefix,$grp_regexp,$postfix) =
11871 $rpl =~ /^( [^(]* ) # Prefix - e.g. {%%
11872 ( \(.*\) )? # Group capture regexp - e.g (.*)
11873 ( [^)]* )$ # Postfix - e.g }
11875 $grp_regexp ||= '';
11876 my $rplval = $Global::rpl{$rpl};
11877 while(s{( (?: ^|\257> ) (?: [^\257]*|[\257][^<>] )*? )
11878 # Don't replace after \257 unless \257>
11879 \Q$prefix\E $grp_regexp \Q$postfix\E}
11881 # The start remains the same
11882 my $unchanged = $1;
11883 # Dummy entry to start at 1.
11885 # $2 = first ()-group in $grp_regexp
11886 # Put $2 in $grp[1], Put $3 in $grp[2]
11887 # so first ()-group in $grp_regexp is $grp[1];
11888 for(my $i = 2; defined $grp[$#grp]; $i++) {
11889 push @grp, eval '$'.$i;
11892 # replace $$1 with $_pAr_gRp1, $$2 with $_pAr_gRp2
11893 # in the code to be executed
11894 $rv =~ s/\$\$ (\d+)/\$_pAr_gRp$1/gx;
11895 # prepend with $_pAr_gRp1 = perlquote($1),
11897 for(my $i = 1;defined $grp[$i]; $i++) {
11898 $set_args .= "\$_pAr_gRp$i = \"" .
11899 ::perl_quote_scalar($grp[$i]) . "\";";
11901 $unchanged . "\257<" . $set_args . $rv . "\257>"
11904 # Do the same for the positional replacement strings
11906 if($posrpl =~ s/^\{//) {
11907 # Only do this if the shorthand start with {
11909 # Don't replace after \257 unless \257>
11910 while(s{( (?: ^|\257> ) (?: [^\257]*|[\257][^<>] )*? )
11911 \{(-?\d+) \s* \Q$prefix\E $grp_regexp \Q$postfix\E}
11913 # The start remains the same
11914 my $unchanged = $1;
11916 # Dummy entry to start at 1.
11918 # $3 = first ()-group in $grp_regexp
11919 # Put $3 in $grp[1], Put $4 in $grp[2]
11920 # so first ()-group in $grp_regexp is $grp[1];
11921 for(my $i = 3; defined $grp[$#grp]; $i++) {
11922 push @grp, eval '$'.$i;
11925 # replace $$1 with $_pAr_gRp1, $$2 with $_pAr_gRp2
11926 # in the code to be executed
11927 $rv =~ s/\$\$ (\d+)/\$_pAr_gRp$1/gx;
11928 # prepend with $_pAr_gRp1 = perlquote($1),
11930 for(my $i = 1;defined $grp[$i]; $i++) {
11931 $set_args .= "\$_pAr_gRp$i = \"" .
11932 ::perl_quote_scalar($grp[$i]) . "\";";
11934 $unchanged . "\257<" . $position . $set_args . $rv . "\257>"
11940 # Add {} if no replacement strings in @command
11941 ($replacecount_ref, $len_ref, @command) =
11942 replacement_counts_and_lengths($transfer_files, $return_files,
11943 $template_names, $template_contents,
11945 if("@command" =~ /^[^ \t\n=]*\257</) {
11946 # Replacement string is (part of) the command (and not just
11947 # argument or variable definition V1={})
11948 # E.g. parallel {}, parallel my_{= s/_//=}, parallel {2}
11949 # Do no quote (Otherwise it will fail if the input contains spaces)
11950 $Global::quote_replace = 0;
11953 if($opt::sqlmaster and $Global::sql->append()) {
11954 $seq = $Global::sql->max_seq() + 1;
11958 ('unget' => \@unget,
11959 'command' => \@command,
11960 'replacecount' => $replacecount_ref,
11961 'arg_queue' => RecordQueue->new($read_from,$opt::colsep),
11962 'context_replace' => $context_replace,
11964 'max_number_of_args' => $max_number_of_args,
11966 'transfer_files' => $transfer_files,
11967 'return_files' => $return_files,
11968 'template_names' => $template_names,
11969 'template_contents' => $template_contents,
11972 }, ref($class) || $class;
11975 sub merge_rpl_parts($) {
11976 # '{=' 'perlexpr' '=}' => '{= perlexpr =}'
11978 # @in = the @command as given by the user
11980 # $Global::parensleft
11981 # $Global::parensright
11983 # @command with parts merged to keep {= and =} as one
11986 my $l = quotemeta($Global::parensleft);
11987 my $r = quotemeta($Global::parensright);
11992 # Remove matching (right most) parens
11993 while(s/(.*)$l.*?$r/$1/os) {}
11995 # Missing right parens
11997 $s .= " ".shift @in;
11999 while(s/(.*)$l.*?$r/$1/os) {}
12010 sub replacement_counts_and_lengths($$@) {
12011 # Count the number of different replacement strings.
12012 # Find the lengths of context for context groups and non-context
12014 # If no {} found in @command: add it to @command
12017 # \@transfer_files = array of filenames to transfer
12018 # \@return_files = array of filenames to return
12019 # \@template_names = array of names to copy to
12020 # \@template_contents = array of contents to write
12021 # @command = command template
12023 # \%replacecount, \%len, @command
12024 my $transfer_files = shift;
12025 my $return_files = shift;
12026 my $template_names = shift;
12027 my $template_contents = shift;
12029 my (%replacecount,%len);
12032 # Count how many times each replacement string is used
12033 my @cmd = @command;
12034 my $contextlen = 0;
12035 my $noncontextlen = 0;
12036 my $contextgroups = 0;
12038 while($c =~ s/ \257<( (?: [^\257]*|[\257][^<>] )*?)\257> /\000/xs) {
12039 # %replacecount = { "perlexpr" => number of times seen }
12040 # e.g { "s/a/b/" => 2 }
12041 $replacecount{$1}++;
12044 # Measure the length of the context around the {= perl expr =}
12045 # Use that {=...=} has been replaced with \000 above
12046 # So there is no need to deal with \257<
12047 while($c =~ s/ (\S*\000\S*) //xs) {
12049 $w =~ tr/\000//d; # Remove all \000's
12050 $contextlen += length($w);
12053 # All {= perl expr =} have been removed: The rest is non-context
12054 $noncontextlen += length $c;
12056 for(@$transfer_files, @$return_files,
12057 @$template_names, @$template_contents,
12059 $opt::tagstring, $opt::workdir, $opt::results, $opt::retries) {
12060 # Options that can contain replacement strings
12063 while($t =~ s/ \257<( (?: [^\257]*|[\257][^<>] )* )\257> //xs) {
12064 # %replacecount = { "perlexpr" => number of times seen }
12065 # e.g { "$_++" => 2 }
12066 # But for tagstring we just need to mark it as seen
12067 $replacecount{$1} ||= 1;
12071 # If the command does not contain {} force it to be computed
12072 # as it is being used by --bar
12073 $replacecount{""} ||= 1;
12076 $len{'context'} = 0+$contextlen;
12077 $len{'noncontext'} = $noncontextlen;
12078 $len{'contextgroups'} = $contextgroups;
12079 $len{'noncontextgroups'} = @cmd-$contextgroups;
12080 ::debug("length", "@command Context: ", $len{'context'},
12081 " Non: ", $len{'noncontext'}, " Ctxgrp: ", $len{'contextgroups'},
12082 " NonCtxGrp: ", $len{'noncontextgroups'}, "\n");
12085 # Default command = {}
12086 @command = ("\257<\257>");
12087 } elsif(($opt::pipe or $opt::pipepart)
12088 and not $opt::fifo and not $opt::cat) {
12089 # With --pipe / --pipe-part you can have no replacement
12092 # Append {} to the command if there are no {...}'s and no {=...=}
12093 push @command, ("\257<\257>");
12097 return(\%replacecount,\%len,@command);
12102 if(@{$self->{'unget'}}) {
12103 my $cmd_line = shift @{$self->{'unget'}};
12104 return ($cmd_line);
12106 if($opt::sqlworker) {
12107 # Get the sequence number from the SQL table
12108 $self->set_seq($SQL::next_seq);
12109 # Get the command from the SQL table
12110 $self->{'command'} = $SQL::command_ref;
12112 # Recompute replace counts based on the read command
12113 ($self->{'replacecount'},
12114 $self->{'len'}, @command) =
12115 replacement_counts_and_lengths($self->{'transfer_files'},
12116 $self->{'return_files'},
12117 $self->{'template_name'},
12118 $self->{'template_contents'},
12119 @$SQL::command_ref);
12120 if("@command" =~ /^[^ \t\n=]*\257</) {
12121 # Replacement string is (part of) the command (and not just
12122 # argument or variable definition V1={})
12123 # E.g. parallel {}, parallel my_{= s/_//=}, parallel {2}
12124 # Do no quote (Otherwise it will fail if the input contains spaces)
12125 $Global::quote_replace = 0;
12129 my $cmd_line = CommandLine->new($self->seq(),
12130 $self->{'command'},
12131 $self->{'arg_queue'},
12132 $self->{'context_replace'},
12133 $self->{'max_number_of_args'},
12134 $self->{'transfer_files'},
12135 $self->{'return_files'},
12136 $self->{'template_names'},
12137 $self->{'template_contents'},
12138 $self->{'replacecount'},
12141 $cmd_line->populate();
12142 ::debug("run","cmd_line->number_of_args ",
12143 $cmd_line->number_of_args(), "\n");
12144 if(not $Global::no_more_input and ($opt::pipe or $opt::pipepart)) {
12145 if($cmd_line->replaced() eq "") {
12146 # Empty command - pipe requires a command
12147 ::error("--pipe/--pipepart must have a command to pipe into ".
12149 ::wait_and_exit(255);
12151 } elsif($cmd_line->number_of_args() == 0) {
12152 # We did not get more args - maybe at EOF string?
12155 $self->set_seq($self->seq()+1);
12162 unshift @{$self->{'unget'}}, @_;
12167 my $empty = (not @{$self->{'unget'}}) &&
12168 $self->{'arg_queue'}->empty();
12169 ::debug("run", "CommandLineQueue->empty $empty");
12175 return $self->{'seq'};
12180 $self->{'seq'} = shift;
12183 sub quote_args($) {
12185 # If there is not command emulate |bash
12186 return $self->{'command'};
12190 package Limits::Command;
12192 # Maximal command line length (for -m and -X)
12193 sub max_length($) {
12194 # Find the max_length of a command line and cache it
12196 # number of chars on the longest command line allowed
12197 if(not $Limits::Command::line_max_len) {
12198 # Disk cache of max command line length
12199 my $len_cache = $Global::cache_dir . "/tmp/sshlogin/" . ::hostname() .
12202 if(open(my $fh, "<", $len_cache)) {
12203 $cached_limit = <$fh>;
12204 $cached_limit || ::die_bug("Cannot read $len_cache");
12207 if(not $cached_limit) {
12208 $cached_limit = real_max_length();
12209 # If $HOME is write protected: Do not fail
12210 my $dir = ::dirname($len_cache);
12211 -d $dir or eval { File::Path::mkpath($dir); };
12212 open(my $fh, ">", $len_cache.$$);
12213 print $fh $cached_limit;
12215 rename $len_cache.$$, $len_cache || ::die_bug("rename cache file");
12217 $Limits::Command::line_max_len = tmux_length($cached_limit);
12218 if($opt::max_chars) {
12219 if($opt::max_chars <= $cached_limit) {
12220 $Limits::Command::line_max_len = $opt::max_chars;
12222 ::warning("Value for -s option should be < $cached_limit.");
12226 return int($Limits::Command::line_max_len);
12229 sub real_max_length() {
12230 # Find the max_length of a command line
12232 # The maximal command line length with 1 byte arguments
12233 # return find_max(" x");
12234 return find_max("x");
12238 my $string = shift;
12239 # This is slow on Cygwin, so give Cygwin users a warning
12240 if($^O eq "cygwin") {
12241 ::warning("Finding the maximal command line length. ".
12242 "This may take up to 30 seconds.")
12244 # Use an upper bound of 100 MB if the shell allows for infinite long lengths
12245 my $upper = 100_000_000;
12246 # 1000 is supported everywhere, so the search can start anywhere 1..999
12247 # 324 makes the search much faster on Cygwin, so let us use that
12250 if($len > $upper) { return $len };
12252 } while (is_acceptable_command_line_length($len,$string));
12253 # Then search for the actual max length between
12254 # last successful length ($len/16) and upper bound
12255 return binary_find_max(int($len/16),$len,$string);
12258 # Prototype forwarding
12259 sub binary_find_max($$$);
12260 sub binary_find_max($$$) {
12261 # Given a lower and upper bound find the max (length or args) of a command line
12263 # number of chars on the longest command line allowed
12264 my ($lower, $upper, $string) = (@_);
12265 if($lower == $upper or $lower == $upper-1) { return $lower; }
12266 my $middle = int (($upper-$lower)/2 + $lower);
12267 ::debug("init", "Maxlen: $lower<$middle<$upper: ");
12268 if (is_acceptable_command_line_length($middle,$string)) {
12269 return binary_find_max($middle,$upper,$string);
12271 return binary_find_max($lower,$middle,$string);
12278 sub is_acceptable_command_line_length($$) {
12279 # Test if a command line of this length can run
12280 # in the current environment
12281 # If the string is " x" it tests how many args are allowed
12283 # 0 if the command line length is too long
12286 my $string = shift;
12287 if($Global::parallel_env) {
12288 $len += length $Global::parallel_env;
12290 # Force using non-built-in command
12291 $prg ||= ::which("echo");
12292 ::qqx("$prg ".${string}x(($len-1-length $prg)/length $string));
12293 ::debug("init", "$len=$? ");
12298 sub tmux_length($) {
12299 # If $opt::tmux set, find the limit for tmux
12300 # tmux 1.8 has a 2kB limit
12301 # tmux 1.9 has a 16kB limit
12302 # tmux 2.0 has a 16kB limit
12303 # tmux 2.1 has a 16kB limit
12304 # tmux 2.2 has a 16kB limit
12306 # $len = maximal command line length
12308 # $tmux_len = maximal length runable in tmux
12312 $ENV{'PARALLEL_TMUX'} ||= "tmux";
12313 if(not ::which($ENV{'PARALLEL_TMUX'})) {
12314 ::error($ENV{'PARALLEL_TMUX'}." not found in \$PATH.");
12315 ::wait_and_exit(255);
12318 for my $l (1, 2020, 16320, 100000, $len) {
12319 my $tmpfile = ::tmpname("tms");
12320 my $tmuxcmd = $ENV{'PARALLEL_TMUX'}.
12321 " -S $tmpfile new-session -d -n echo $l".
12322 ("x"x$l). " && echo $l; rm -f $tmpfile";
12323 push @out, ::qqx($tmuxcmd);
12326 ::debug("tmux","tmux-out ",@out);
12328 # The arguments is given 3 times on the command line
12329 # and the wrapping is around 30 chars
12330 # (29 for tmux1.9, 33 for tmux1.8)
12331 my $tmux_len = ::max(@out);
12332 $len = ::min($len,int($tmux_len/4-33));
12333 ::debug("tmux","tmux-length ",$len);
12339 package RecordQueue;
12344 my $colsep = shift;
12347 if($opt::sqlworker) {
12349 $arg_sub_queue = SQLRecordQueue->new();
12350 } elsif(defined $colsep) {
12351 # Open one file with colsep or CSV
12352 $arg_sub_queue = RecordColQueue->new($fhs);
12354 # Open one or more files if multiple -a
12355 $arg_sub_queue = MultifileQueue->new($fhs);
12358 'unget' => \@unget,
12360 'arg_sub_queue' => $arg_sub_queue,
12361 }, ref($class) || $class;
12366 # reference to array of Arg-objects
12368 if(@{$self->{'unget'}}) {
12369 $self->{'arg_number'}++;
12370 # Flush cached computed replacements in Arg-objects
12371 # To fix: parallel --bar echo {%} ::: a b c ::: d e f
12372 my $ret = shift @{$self->{'unget'}};
12374 map { $_->flush_cache() } @$ret;
12378 my $ret = $self->{'arg_sub_queue'}->get();
12380 if(grep { index($_->orig(),"\0") > 0 } @$ret) {
12381 # Allow for \0 in position 0 because GNU Parallel uses "\0noarg"
12382 # to mean no-string
12383 ::warning("A NUL character in the input was replaced with \\0.",
12384 "NUL cannot be passed through in the argument list.",
12385 "Did you mean to use the --null option?");
12386 for(grep { index($_->orig(),"\0") > 0 } @$ret) {
12387 # Replace \0 with \\0
12388 my $a = $_->orig();
12393 if(defined $Global::max_number_of_args
12394 and $Global::max_number_of_args == 0) {
12395 ::debug("run", "Read 1 but return 0 args\n");
12396 # \0noarg => nothing (not the empty string)
12397 map { $_->set_orig("\0noarg"); } @$ret;
12399 # Flush cached computed replacements in Arg-objects
12400 # To fix: parallel --bar echo {%} ::: a b c ::: d e f
12401 map { $_->flush_cache() } @$ret;
12408 ::debug("run", "RecordQueue-unget\n");
12409 $self->{'arg_number'} -= @_;
12410 unshift @{$self->{'unget'}}, @_;
12415 my $empty = (not @{$self->{'unget'}}) &&
12416 $self->{'arg_sub_queue'}->empty();
12417 ::debug("run", "RecordQueue->empty $empty");
12421 sub flush_cache($) {
12423 for my $record (@{$self->{'unget'}}) {
12424 for my $arg (@$record) {
12425 $arg->flush_cache();
12428 $self->{'arg_sub_queue'}->flush_cache();
12431 sub arg_number($) {
12433 return $self->{'arg_number'};
12437 package RecordColQueue;
12443 my $arg_sub_queue = MultifileQueue->new($fhs);
12445 'unget' => \@unget,
12446 'arg_sub_queue' => $arg_sub_queue,
12447 }, ref($class) || $class;
12452 # reference to array of Arg-objects
12454 if(@{$self->{'unget'}}) {
12455 return shift @{$self->{'unget'}};
12457 if($self->{'arg_sub_queue'}->empty()) {
12460 my $in_record = $self->{'arg_sub_queue'}->get();
12461 if(defined $in_record) {
12462 my @out_record = ();
12463 for my $arg (@$in_record) {
12464 ::debug("run", "RecordColQueue::arg $arg\n");
12465 my $line = $arg->orig();
12466 ::debug("run", "line='$line'\n");
12469 # Parse CSV and put it into a record
12471 if(not $Global::csv->parse($line)) {
12472 die "CSV has unexpected format: ^$line^";
12474 for($Global::csv->fields()) {
12475 push @out_record, Arg->new($_);
12478 # Split --colsep into record
12479 for my $s (split /$opt::colsep/o, $line, -1) {
12480 push @out_record, Arg->new($s);
12484 push @out_record, Arg->new("");
12487 return \@out_record;
12495 ::debug("run", "RecordColQueue-unget '@_'\n");
12496 unshift @{$self->{'unget'}}, @_;
12501 my $empty = (not @{$self->{'unget'}}) &&
12502 $self->{'arg_sub_queue'}->empty();
12503 ::debug("run", "RecordColQueue->empty $empty");
12507 sub flush_cache($) {
12509 for my $arg (@{$self->{'unget'}}) {
12510 $arg->flush_cache();
12512 $self->{'arg_sub_queue'}->flush_cache();
12516 package SQLRecordQueue;
12522 'unget' => \@unget,
12523 }, ref($class) || $class;
12528 # reference to array of Arg-objects
12530 if(@{$self->{'unget'}}) {
12531 return shift @{$self->{'unget'}};
12533 return $Global::sql->get_record();
12538 ::debug("run", "SQLRecordQueue-unget '@_'\n");
12539 unshift @{$self->{'unget'}}, @_;
12544 if(@{$self->{'unget'}}) { return 0; }
12545 my $get = $self->get();
12547 $self->unget($get);
12549 my $empty = not $get;
12550 ::debug("run", "SQLRecordQueue->empty $empty");
12554 sub flush_cache($) {
12556 for my $record (@{$self->{'unget'}}) {
12557 for my $arg (@$record) {
12558 $arg->flush_cache();
12564 package MultifileQueue;
12566 @Global::unget_argv=();
12571 for my $fh (@$fhs) {
12572 if(-t $fh and -t ($Global::status_fd || *STDERR)) {
12574 "Input is read from the terminal. You are either an expert",
12575 "(in which case: YOU ARE AWESOME!) or maybe you forgot",
12576 "::: or :::: or -a or to pipe data into parallel. If so",
12577 "consider going through the tutorial: man parallel_tutorial",
12578 "Press CTRL-D to exit.");
12582 'unget' => \@Global::unget_argv,
12584 'arg_matrix' => undef,
12585 }, ref($class) || $class;
12591 return $self->link_get();
12593 return $self->nest_get();
12599 ::debug("run", "MultifileQueue-unget '@_'\n");
12600 unshift @{$self->{'unget'}}, @_;
12605 my $empty = (not @Global::unget_argv) &&
12606 not @{$self->{'unget'}};
12607 for my $fh (@{$self->{'fhs'}}) {
12608 $empty &&= eof($fh);
12610 ::debug("run", "MultifileQueue->empty $empty ");
12614 sub flush_cache($) {
12616 for my $record (@{$self->{'unget'}}, @{$self->{'arg_matrix'}}) {
12617 for my $arg (@$record) {
12618 $arg->flush_cache();
12625 if(@{$self->{'unget'}}) {
12626 return shift @{$self->{'unget'}};
12631 for my $i (0..$#{$self->{'fhs'}}) {
12632 my $fh = $self->{'fhs'}[$i];
12633 my $arg = read_arg_from_fh($fh);
12635 # Record $arg for recycling at end of file
12636 push @{$self->{'arg_matrix'}[$i]}, $arg;
12637 push @record, $arg;
12640 ::debug("run", "EOA ");
12641 # End of file: Recycle arguments
12642 push @{$self->{'arg_matrix'}[$i]}, shift @{$self->{'arg_matrix'}[$i]};
12643 # return last @{$args->{'args'}{$fh}};
12644 push @record, @{$self->{'arg_matrix'}[$i]}[-1];
12656 if(@{$self->{'unget'}}) {
12657 return shift @{$self->{'unget'}};
12662 my $no_of_inputsources = $#{$self->{'fhs'}} + 1;
12663 if(not $self->{'arg_matrix'}) {
12664 # Initialize @arg_matrix with one arg from each file
12665 # read one line from each file
12668 for (my $fhno = 0; $fhno < $no_of_inputsources ; $fhno++) {
12669 my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]);
12673 $self->{'arg_matrix'}[$fhno][0] = $arg || Arg->new("");
12674 push @first_arg_set, $self->{'arg_matrix'}[$fhno][0];
12677 # All filehandles were at eof or eof-string
12680 return [@first_arg_set];
12683 # Treat the case with one input source special. For multiple
12684 # input sources we need to remember all previously read values to
12685 # generate all combinations. But for one input source we can
12686 # forget the value after first use.
12687 if($no_of_inputsources == 1) {
12688 my $arg = read_arg_from_fh($self->{'fhs'}[0]);
12689 if(defined($arg)) {
12694 for (my $fhno = $no_of_inputsources - 1; $fhno >= 0; $fhno--) {
12695 if(eof($self->{'fhs'}[$fhno])) {
12699 my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]);
12700 defined($arg) || next; # If we just read an EOF string: Treat this as EOF
12701 my $len = $#{$self->{'arg_matrix'}[$fhno]} + 1;
12702 $self->{'arg_matrix'}[$fhno][$len] = $arg;
12703 # make all new combinations
12705 for (my $fhn = 0; $fhn < $no_of_inputsources; $fhn++) {
12706 push(@combarg, [0, $#{$self->{'arg_matrix'}[$fhn]}],
12707 # Is input source --link'ed to the next?
12708 $opt::linkinputsource[$fhn+1]);
12710 # Find only combinations with this new entry
12711 $combarg[2*$fhno] = [$len,$len];
12713 # [ 1, 3, 7 ], [ 2, 4, 1 ]
12715 # [ m[0][1], m[1][3], m[2][7] ], [ m[0][2], m[1][4], m[2][1] ]
12717 for my $c (expand_combinations(@combarg)) {
12719 for my $n (0 .. $no_of_inputsources - 1 ) {
12720 push @a, $self->{'arg_matrix'}[$n][$$c[$n]];
12724 # append the mapped to the ungotten arguments
12725 push @{$self->{'unget'}}, @mapped;
12728 return shift @{$self->{'unget'}};
12732 # all are eof or at EOF string; return from the unget queue
12733 return shift @{$self->{'unget'}};
12739 my $dos_crnl_determined;
12740 sub read_arg_from_fh($) {
12741 # Read one Arg from filehandle
12743 # Arg-object with one read line
12744 # undef if end of file
12748 my $half_record = 0;
12750 # This makes 10% faster
12751 if(not defined ($arg = <$fh>)) {
12752 if(defined $prepend) {
12753 return Arg->new($prepend);
12758 if(not $dos_crnl_determined and not $opt::d) {
12759 # Warn if input has CR-NL and -d is not set
12760 if($arg =~ /\r$/) {
12765 if($cr_count == 3 or $nl_count == 3) {
12766 $dos_crnl_determined = 1;
12767 if($nl_count == 0 and $cr_count == 3) {
12768 ::warning('The first three values end in CR-NL. '.
12769 'Consider using -d "\r\n"');
12774 # We need to read a full CSV line.
12775 if(($arg =~ y/"/"/) % 2 ) {
12776 # The number of " on the line is uneven:
12777 # If we were in a half_record => we have a full record now
12778 # If we were outside a half_record =>
12779 # we are in a half record now
12780 $half_record = not $half_record;
12783 # CSV half-record with quoting:
12784 # col1,"col2 2""x3"" board newline <-this one
12789 # Now we have a full CSV record
12794 if($Global::end_of_file_string and
12795 $arg eq $Global::end_of_file_string) {
12796 # Ignore the rest of input file
12798 ::debug("run", "EOF-string ($arg) met\n");
12799 if(defined $prepend) {
12800 return Arg->new($prepend);
12805 if(defined $prepend) {
12806 $arg = $prepend.$arg; # For line continuation
12809 if($Global::ignore_empty) {
12810 if($arg =~ /^\s*$/) {
12811 redo; # Try the next line
12814 if($Global::max_lines) {
12815 if($arg =~ /\s$/) {
12816 # Trailing space => continued on next line
12821 }} while (1 == 0); # Dummy loop {{}} for redo
12823 return Arg->new($arg);
12825 ::die_bug("multiread arg undefined");
12830 # Prototype forwarding
12831 sub expand_combinations(@);
12832 sub expand_combinations(@) {
12834 # ([xmin,xmax], [ymin,ymax], ...)
12835 # Returns: ([x,y,...],[x,y,...])
12836 # where xmin <= x <= xmax and ymin <= y <= ymax
12837 my $minmax_ref = shift;
12838 my $link = shift; # This is linked to the next input source
12839 my $xmin = $$minmax_ref[0];
12840 my $xmax = $$minmax_ref[1];
12843 my @rest = expand_combinations(@_);
12845 # Linked to next col with --link/:::+/::::+
12846 # TODO BUG does not wrap values if not same number of vals
12847 push(@p, map { [$$_[0], @$_] }
12848 grep { $xmin <= $$_[0] and $$_[0] <= $xmax } @rest);
12850 # If there are more columns: Compute those recursively
12851 for(my $x = $xmin; $x <= $xmax; $x++) {
12852 push @p, map { [$x, @$_] } @rest;
12856 for(my $x = $xmin; $x <= $xmax; $x++) {
12870 if($opt::hostgroups) {
12871 if($orig =~ s:@(.+)::) {
12872 # We found hostgroups on the arg
12873 @hostgroups = split(/\+/, $1);
12874 if(not grep { defined $Global::hostgroups{$_} } @hostgroups) {
12875 # This hostgroup is not defined using -S
12877 ::warning("Adding hostgroups: @hostgroups");
12879 for(grep { not defined $Global::hostgroups{$_} } @hostgroups) {
12880 my $sshlogin = SSHLogin->new($_);
12881 my $sshlogin_string = $sshlogin->string();
12882 $Global::host{$sshlogin_string} = $sshlogin;
12883 $Global::hostgroups{$sshlogin_string} = 1;
12887 # No hostgroup on the arg => any hostgroup
12888 @hostgroups = (keys %Global::hostgroups);
12893 'hostgroups' => \@hostgroups,
12894 }, ref($class) || $class;
12898 # Q alias for ::shell_quote_scalar
12899 my $ret = ::Q($_[0]);
12900 no warnings 'redefine';
12906 # pQ alias for ::perl_quote_scalar
12907 my $ret = ::pQ($_[0]);
12908 no warnings 'redefine';
12914 $Global::use{"DBI"} ||= eval "use B; 1;";
12919 return $Global::JobQueue->total_jobs();
12926 # shorthand for $job->skip();
12930 # shorthand for $job->slot();
12934 # shorthand for $job->seq();
12938 # Do not quote this arg
12939 $Global::unquote_arg = 1;
12941 sub yyyy_mm_dd_hh_mm_ss() {
12942 # ISO8601 2038-01-19T03:14:08
12943 ::strftime("%Y-%m-%dT%H:%M:%S", localtime(time()));
12945 sub yyyy_mm_dd_hh_mm() {
12946 # ISO8601 2038-01-19T03:14
12947 ::strftime("%Y-%m-%dT%H:%M", localtime(time()));
12950 # ISO8601 2038-01-19
12951 ::strftime("%Y-%m-%d", localtime(time()));
12953 sub yyyymmddhhmmss() {
12954 # ISO8601 20380119031408
12955 ::strftime("%Y%m%d%H%M%S", localtime(time()));
12957 sub yyyymmddhhmm() {
12958 # ISO8601 203801190314
12959 ::strftime("%Y%m%d%H%M", localtime(time()));
12963 ::strftime("%Y%m%d", localtime(time()));
12966 sub replace($$$$) {
12967 # Calculates the corresponding value for a given perl expression
12969 # The calculated string (quoted if asked for)
12971 my $perlexpr = shift; # E.g. $_=$_ or s/.gz//
12972 my $quote = shift; # should the string be quoted?
12973 # This is actually a CommandLine-object,
12974 # but it looks nice to be able to say {= $job->slot() =}
12976 # Positional replace treated as normal replace
12977 $perlexpr =~ s/^(-?\d+)? *//;
12978 if(not $Global::cache_replacement_eval
12980 not $self->{'cache'}{$perlexpr}) {
12981 # Only compute the value once
12982 # Use $_ as the variable to change
12984 if($Global::trim eq "n") {
12985 $_ = $self->{'orig'};
12988 $_ = trim_of($self->{'orig'});
12990 ::debug("replace", "eval ", $perlexpr, " ", $_, "\n");
12991 if(not $perleval{$perlexpr}) {
12992 # Make an anonymous function of the $perlexpr
12993 # And more importantly: Compile it only once
12994 if($perleval{$perlexpr} =
12995 eval('sub { no strict; no warnings; my $job = shift; '.
12999 # The eval failed. Maybe $perlexpr is invalid perl?
13000 ::error("Cannot use $perlexpr: $@");
13001 ::wait_and_exit(255);
13004 # Execute the function
13005 $perleval{$perlexpr}->($job);
13006 $self->{'cache'}{$perlexpr} = $_;
13007 if($Global::unquote_arg) {
13008 # uq() was called in perlexpr
13009 $self->{'cache'}{'unquote'}{$perlexpr} = 1;
13010 # Reset for next perlexpr
13011 $Global::unquote_arg = 0;
13014 # Return the value quoted if needed
13015 if($self->{'cache'}{'unquote'}{$perlexpr}) {
13016 return($self->{'cache'}{$perlexpr});
13018 return($quote ? Q($self->{'cache'}{$perlexpr})
13019 : $self->{'cache'}{$perlexpr});
13024 sub flush_cache($) {
13025 # Flush cache of computed values
13027 $self->{'cache'} = undef;
13032 return $self->{'orig'};
13037 $self->{'orig'} = shift;
13041 # Removes white space as specifed by --trim:
13047 # string with white space removed as needed
13048 my @strings = map { defined $_ ? $_ : "" } (@_);
13050 if($Global::trim eq "n") {
13052 } elsif($Global::trim eq "l") {
13053 for my $arg (@strings) { $arg =~ s/^\s+//; }
13054 } elsif($Global::trim eq "r") {
13055 for my $arg (@strings) { $arg =~ s/\s+$//; }
13056 } elsif($Global::trim eq "rl" or $Global::trim eq "lr") {
13057 for my $arg (@strings) { $arg =~ s/^\s+//; $arg =~ s/\s+$//; }
13059 ::error("--trim must be one of: r l rl lr.");
13060 ::wait_and_exit(255);
13062 return wantarray ? @strings : "@strings";
13066 package TimeoutQueue;
13070 my $delta_time = shift;
13072 if($delta_time =~ /(\d+(\.\d+)?)%/) {
13073 # Timeout in percent
13075 $delta_time = 1_000_000;
13077 $delta_time = ::multiply_time_units($delta_time);
13081 'delta_time' => $delta_time,
13083 'remedian_idx' => 0,
13084 'remedian_arr' => [],
13085 'remedian' => undef,
13086 }, ref($class) || $class;
13089 sub delta_time($) {
13091 return $self->{'delta_time'};
13094 sub set_delta_time($$) {
13096 $self->{'delta_time'} = shift;
13101 return $self->{'remedian'};
13104 sub set_remedian($$) {
13105 # Set median of the last 999^3 (=997002999) values using Remedian
13107 # Rousseeuw, Peter J., and Gilbert W. Bassett Jr. "The remedian: A
13108 # robust averaging method for large data sets." Journal of the
13109 # American Statistical Association 85.409 (1990): 97-104.
13112 my $i = $self->{'remedian_idx'}++;
13113 my $rref = $self->{'remedian_arr'};
13114 $rref->[0][$i%999] = $val;
13115 $rref->[1][$i/999%999] = (sort @{$rref->[0]})[$#{$rref->[0]}/2];
13116 $rref->[2][$i/999/999%999] = (sort @{$rref->[1]})[$#{$rref->[1]}/2];
13117 $self->{'remedian'} = (sort @{$rref->[2]})[$#{$rref->[2]}/2];
13120 sub update_median_runtime($) {
13121 # Update delta_time based on runtime of finished job if timeout is
13124 my $runtime = shift;
13125 if($self->{'pct'}) {
13126 $self->set_remedian($runtime);
13127 $self->{'delta_time'} = $self->{'pct'} * $self->remedian();
13128 ::debug("run", "Timeout: $self->{'delta_time'}s ");
13132 sub process_timeouts($) {
13133 # Check if there was a timeout
13135 # $self->{'queue'} is sorted by start time
13136 while (@{$self->{'queue'}}) {
13137 my $job = $self->{'queue'}[0];
13138 if($job->endtime()) {
13139 # Job already finished. No need to timeout the job
13140 # This could be because of --keep-order
13141 shift @{$self->{'queue'}};
13142 } elsif($job->is_timedout($self->{'delta_time'})) {
13143 # Need to shift off queue before kill
13144 # because kill calls usleep that calls process_timeouts
13145 shift @{$self->{'queue'}};
13146 ::warning("This job was killed because it timed out:",
13150 # Because they are sorted by start time the rest are later
13159 push @{$self->{'queue'}}, $in;
13168 $Global::use{"DBI"} ||= eval "use DBI; 1;";
13169 # +DBURL = append to this DBURL
13170 my $append = $dburl=~s/^\+//;
13171 my %options = parse_dburl(get_alias($dburl));
13172 my %driveralias = ("sqlite" => "SQLite",
13173 "sqlite3" => "SQLite",
13175 "postgres" => "Pg",
13176 "postgresql" => "Pg",
13178 "oracle" => "Oracle",
13179 "ora" => "Oracle");
13180 my $driver = $driveralias{$options{'databasedriver'}} ||
13181 $options{'databasedriver'};
13182 my $database = $options{'database'};
13183 my $host = $options{'host'} ? ";host=".$options{'host'} : "";
13184 my $port = $options{'port'} ? ";port=".$options{'port'} : "";
13185 my $dsn = "DBI:$driver:dbname=$database$host$port";
13186 my $userid = $options{'user'};
13187 my $password = $options{'password'};;
13188 if(not grep /$driver/, DBI->available_drivers) {
13189 ::error("$driver not supported. Are you missing a perl DBD::$driver module?");
13190 ::wait_and_exit(255);
13193 if($driver eq "CSV") {
13194 # CSV does not use normal dsn
13196 $dbh = DBI->connect("dbi:CSV:", "", "", { f_dir => "$database", })
13197 or die $DBI::errstr;
13199 ::error("$database is not a directory.");
13200 ::wait_and_exit(255);
13203 $dbh = DBI->connect($dsn, $userid, $password,
13204 { RaiseError => 1, AutoInactiveDestroy => 1 })
13205 or die $DBI::errstr;
13207 $dbh->{'PrintWarn'} = $Global::debug || 0;
13208 $dbh->{'PrintError'} = $Global::debug || 0;
13209 $dbh->{'RaiseError'} = 1;
13210 $dbh->{'ShowErrorStatement'} = 1;
13211 $dbh->{'HandleError'} = sub {};
13212 if(not defined $options{'table'}) {
13213 ::error("The DBURL ($dburl) must contain a table.");
13214 ::wait_and_exit(255);
13219 'driver' => $driver,
13220 'max_number_of_args' => undef,
13221 'table' => $options{'table'},
13222 'append' => $append,
13223 }, ref($class) || $class;
13226 # Prototype forwarding
13230 $alias =~ s/^(sql:)*//; # Accept aliases prepended with sql:
13231 if ($alias !~ /^:/) {
13238 ($path) = readlink($0) =~ m|^(.*)/|;
13240 ($path) = $0 =~ m|^(.*)/|;
13243 my @deprecated = ("$ENV{HOME}/.dburl.aliases",
13244 "$path/dburl.aliases", "$path/dburl.aliases.dist");
13245 for (@deprecated) {
13247 ::warning("$_ is deprecated. ".
13248 "Use .sql/aliases instead (read man sql).");
13252 check_permissions("$ENV{HOME}/.sql/aliases");
13253 check_permissions("$ENV{HOME}/.dburl.aliases");
13254 my @search = ("$ENV{HOME}/.sql/aliases",
13255 "$ENV{HOME}/.dburl.aliases", "/etc/sql/aliases",
13256 "$path/dburl.aliases", "$path/dburl.aliases.dist");
13257 for my $alias_file (@search) {
13258 # local $/ needed if -0 set
13260 if(-r $alias_file) {
13261 open(my $in, "<", $alias_file) || die;
13262 push @urlalias, <$in>;
13266 my ($alias_part,$rest) = $alias=~/(:\w*)(.*)/;
13267 # If we saw this before: we have an alias loop
13268 if(grep {$_ eq $alias_part } @Private::seen_aliases) {
13269 ::error("$alias_part is a cyclic alias.");
13272 push @Private::seen_aliases, $alias_part;
13277 /^$alias_part\s+(\S+.*)/ and do { $dburl = $1; last; }
13281 return get_alias($dburl.$rest);
13283 ::error("$alias is not defined in @search");
13288 sub check_permissions($) {
13293 my $username = (getpwuid($<))[0];
13294 ::warning("$file should be owned by $username: ".
13295 "chown $username $file");
13297 my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
13298 $atime,$mtime,$ctime,$blksize,$blocks) = stat($file);
13300 my $username = (getpwuid($<))[0];
13301 ::warning("$file should be only be readable by $username: ".
13302 "chmod 600 $file");
13307 sub parse_dburl($) {
13310 # sql:mysql://[[user][:password]@][host][:port]/[database[/table][?query]]
13312 if($url=~m!^(?:sql:)? # You can prefix with 'sql:'
13313 ((?:oracle|ora|mysql|pg|postgres|postgresql)(?:s|ssl|)|
13314 (?:sqlite|sqlite2|sqlite3|csv)):// # Databasedriver ($1)
13316 ([^:@/][^:@]*|) # Username ($2)
13318 :([^@]*) # Password ($3)
13321 ([^:/]*)? # Hostname ($4)
13324 ([^/]*)? # Port ($5)
13328 ([^/?]*)? # Database ($6)
13332 ([^?]*)? # Table ($7)
13339 $options{databasedriver} = ::undef_if_empty(lc(uri_unescape($1)));
13340 $options{user} = ::undef_if_empty(uri_unescape($2));
13341 $options{password} = ::undef_if_empty(uri_unescape($3));
13342 $options{host} = ::undef_if_empty(uri_unescape($4));
13343 $options{port} = ::undef_if_empty(uri_unescape($5));
13344 $options{database} = ::undef_if_empty(uri_unescape($6));
13345 $options{table} = ::undef_if_empty(uri_unescape($7));
13346 $options{query} = ::undef_if_empty(uri_unescape($8));
13347 ::debug("sql", "dburl $url\n");
13348 ::debug("sql", "databasedriver ", $options{databasedriver},
13349 " user ", $options{user},
13350 " password ", $options{password}, " host ", $options{host},
13351 " port ", $options{port}, " database ", $options{database},
13352 " table ", $options{table}, " query ", $options{query}, "\n");
13354 ::error("$url is not a valid DBURL");
13360 sub uri_unescape($) {
13361 # Copied from http://cpansearch.perl.org/src/GAAS/URI-1.55/URI/Escape.pm
13362 # to avoid depending on URI::Escape
13363 # This section is (C) Gisle Aas.
13364 # Note from RFC1630: "Sequences which start with a percent sign
13365 # but are not followed by two hexadecimal characters are reserved
13366 # for future extension"
13368 if (@_ && wantarray) {
13369 # not executed for the common case of a single argument
13370 my @str = ($str, @_); # need to copy
13372 s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
13376 $str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str;
13383 if($self->{'driver'} eq "CSV") {
13385 if($stmt eq "BEGIN" or
13386 $stmt eq "COMMIT") {
13391 my $dbh = $self->{'dbh'};
13392 ::debug("sql","$opt::sqlmaster$opt::sqlworker run $stmt\n");
13393 # Execute with the rest of the args - if any
13397 while($lockretry < 10) {
13398 $sth = $dbh->prepare($stmt);
13401 eval { $rv = $sth->execute(@_) }) {
13404 if($@ =~ /no such table|Table .* doesn.t exist|relation ".*" does not exist/
13406 $DBI::errstr =~ /no such table|Table .* doesn.t exist|relation ".*" does not exist/) {
13408 # It is just a worker that reported back too late -
13409 # another worker had finished the job first
13410 # and the table was then dropped
13414 if($DBI::errstr =~ /locked/) {
13415 ::debug("sql", "Lock retry: $lockretry");
13417 ::usleep(rand()*300);
13418 } elsif(not $sth) {
13422 ::error($DBI::errstr);
13423 ::wait_and_exit(255);
13427 if($lockretry >= 10) {
13428 ::die_bug("retry > 10: $DBI::errstr");
13430 if($rv < 0 and $DBI::errstr){
13431 ::error($DBI::errstr);
13432 ::wait_and_exit(255);
13439 my $sth = $self->run(@_);
13441 # If $sth = 0 it means the table was dropped by another process
13443 my @row = $sth->fetchrow_array();
13445 push @retval, \@row;
13452 return $self->{'table'};
13457 return $self->{'append'};
13463 my $table = $self->table();
13464 $self->run("UPDATE $table $stmt",@_);
13469 my $commandline = shift;
13471 $self->update("SET Stdout = ?, Stderr = ? WHERE Seq = ".
13472 $commandline->seq(),
13473 join("",@{$commandline->{'output'}{1}}),
13474 join("",@{$commandline->{'output'}{2}}));
13477 sub max_number_of_args($) {
13478 # Maximal number of args for this table
13480 if(not $self->{'max_number_of_args'}) {
13481 # Read the number of args from the SQL table
13482 my $table = $self->table();
13483 my $v = $self->get("SELECT * FROM $table LIMIT 1;");
13484 my @reserved_columns = qw(Seq Host Starttime JobRuntime Send
13485 Receive Exitval _Signal Command Stdout Stderr);
13487 ::error
("$table contains no records");
13489 # Count the number of Vx columns
13490 $self->{'max_number_of_args'} = $#{$v->[0]} - $#reserved_columns;
13492 return $self->{'max_number_of_args'};
13495 sub set_max_number_of_args
($$) {
13497 $self->{'max_number_of_args'} = shift;
13500 sub create_table
($) {
13502 if($self->append()) { return; }
13503 my $max_number_of_args = shift;
13504 $self->set_max_number_of_args($max_number_of_args);
13505 my $table = $self->table();
13506 $self->run(qq(DROP TABLE IF EXISTS
$table;));
13507 # BIGINT and TEXT are not supported in these databases or are too small
13509 "Oracle" => { "BIGINT" => "NUMBER(19,0)",
13510 "TEXT" => "CLOB", },
13511 "mysql" => { "TEXT" => "BLOB", },
13512 "CSV" => { "BIGINT" => "INT",
13513 "FLOAT" => "REAL", },
13515 my $BIGINT = $vartype{$self->{'driver'}}{"BIGINT"} || "BIGINT";
13516 my $TEXT = $vartype{$self->{'driver'}}{"TEXT"} || "TEXT";
13517 my $FLOAT = $vartype{$self->{'driver'}}{"FLOAT"} || "FLOAT(44)";
13518 my $v_def = join "", map { "V$_ $TEXT," } (1..$self->max_number_of_args());
13519 $self->run(qq{CREATE TABLE
$table
13534 sub insert_records
($) {
13537 my $command_ref = shift;
13538 my $record_ref = shift;
13539 my $table = $self->table();
13540 # For SQL encode the command with \257 space as split points
13541 my $command = join("\257 ",@
$command_ref);
13542 my @v_cols = map { ", V$_" } (1..$self->max_number_of_args());
13543 # Two extra value due to $seq, Exitval, Send
13544 my $v_vals = join ",", map { "?" } (1..$self->max_number_of_args()+4);
13545 $self->run("INSERT INTO $table (Seq,Command,Exitval,Send @v_cols) ".
13546 "VALUES ($v_vals);", $seq, $command, -1000,
13547 0, @
$record_ref[1..$#$record_ref]);
13551 sub get_record
($) {
13554 my $table = $self->table();
13555 my @v_cols = map { ", V$_" } (1..$self->max_number_of_args());
13556 my $rand = "Reserved-".$$.rand();
13561 if($self->{'driver'} eq "CSV") {
13562 # Sub SELECT is not supported in CSV
13563 # So to minimize the race condition below select a job at random
13564 my $r = $self->get("SELECT Seq, Command @v_cols FROM $table ".
13565 "WHERE Exitval = -1000 LIMIT 100;");
13566 $v = [ sort { rand() > 0.5 } @
$r ];
13568 # Avoid race condition where multiple workers get the same job
13569 # by setting Stdout to a unique string
13570 # (SELECT * FROM (...) AS dummy) is needed due to sillyness in MySQL
13571 $self->update("SET Stdout = ?,Exitval = ? ".
13573 " SELECT * FROM (".
13574 " SELECT min(Seq) FROM $table WHERE Exitval = -1000".
13576 ") AND Exitval = -1000;", $rand, -1210);
13577 # If a parallel worker overwrote the unique string this will get nothing
13578 $v = $self->get("SELECT Seq, Command @v_cols FROM $table ".
13579 "WHERE Stdout = ?;", $rand);
13582 my $val_ref = $v->[0];
13583 # Mark record as taken
13584 my $seq = shift @
$val_ref;
13585 # Save the sequence number to use when running the job
13586 $SQL::next_seq
= $seq;
13587 $self->update("SET Exitval = ? WHERE Seq = ".$seq, -1220);
13588 # Command is encoded with '\257 space' as splitting char
13589 my @command = split /\257 /, shift @
$val_ref;
13590 $SQL::command_ref
= \
@command;
13592 push @retval, Arg
->new($_);
13595 # If the record was updated by another job in parallel,
13596 # then we may not be done, so see if there are more jobs pending
13598 $self->get("SELECT Seq FROM $table WHERE Exitval = ?;", -1210);
13600 } while (not $v->[0] and $more_pending->[0]);
13609 sub total_jobs
($) {
13611 my $table = $self->table();
13612 my $v = $self->get("SELECT count(*) FROM $table;");
13614 return $v->[0]->[0];
13616 ::die_bug
("SQL::total_jobs");
13622 my $table = $self->table();
13623 my $v = $self->get("SELECT max(Seq) FROM $table;");
13625 return $v->[0]->[0];
13627 ::die_bug
("SQL::max_seq");
13632 # Check if there are any jobs left in the SQL table that do not
13633 # have a "real" exitval
13635 if($opt::wait or $Global::start_sqlworker
) {
13636 my $table = $self->table();
13637 my $rv = $self->get("select Seq,Exitval from $table ".
13638 "where Exitval <= -1000 limit 1");
13639 return not $rv->[0];
13647 # This package provides a counting semaphore
13649 # If a process dies without releasing the semaphore the next process
13650 # that needs that entry will clean up dead semaphores
13652 # The semaphores are stored in $PARALLEL_HOME/semaphores/id-<name> Each
13653 # file in $PARALLEL_HOME/semaphores/id-<name>/ is the process ID of the
13654 # process holding the entry. If the process dies, the entry can be
13655 # taken by another process.
13661 $id =~ s/([^-_a-z0-9])/unpack("H*",$1)/ige; # Convert non-word chars to hex
13662 $id = "id-".$id; # To distinguish it from a process id
13663 my $parallel_locks = $Global::cache_dir
. "/semaphores";
13664 -d
$parallel_locks or ::mkdir_or_die
($parallel_locks);
13665 my $lockdir = "$parallel_locks/$id";
13666 my $lockfile = $lockdir.".lock";
13667 if(-d
$parallel_locks and -w
$parallel_locks
13668 and -r
$parallel_locks and -x
$parallel_locks) {
13671 ::error
("Semaphoredir must be writable: '$parallel_locks'");
13672 ::wait_and_exit
(255);
13675 if($count < 1) { ::die_bug
("semaphore-count: $count"); }
13677 'lockfile' => $lockfile,
13678 'lockfh' => Symbol
::gensym
(),
13679 'lockdir' => $lockdir,
13681 'idfile' => $lockdir."/".$id,
13683 'pidfile' => $lockdir."/".$$.'@'.::hostname
(),
13684 'count' => $count + 1 # nlinks returns a link for the 'id-' as well
13685 }, ref($class) || $class;
13688 sub remove_dead_locks
($) {
13690 my $lockdir = $self->{'lockdir'};
13692 for my $d (glob "$lockdir/*") {
13693 $d =~ m
:$lockdir/([0-9]+)\@
([-\
._a
-z0
-9]+)$:o
or next;
13694 my ($pid, $host) = ($1, $2);
13695 if($host eq ::hostname
()) {
13697 ::debug
("sem", "Alive: $pid $d\n");
13699 ::debug
("sem", "Dead: $d\n");
13708 my $sleep = 1; # 1 ms
13709 my $start_time = time;
13711 # Can we get a lock?
13712 $self->atomic_link_if_count_less_than() and last;
13713 $self->remove_dead_locks();
13714 # Retry slower and slower up to 1 second
13715 $sleep = ($sleep < 1000) ?
($sleep * 1.1) : ($sleep);
13716 # Random to avoid every sleeping job waking up at the same time
13717 ::usleep
(rand()*$sleep);
13718 if($opt::semaphoretimeout
) {
13719 if($opt::semaphoretimeout
> 0
13721 time - $start_time > $opt::semaphoretimeout
) {
13722 # Timeout: Take the semaphore anyway
13723 ::warning
("Semaphore timed out. Stealing the semaphore.");
13724 if(not -e
$self->{'idfile'}) {
13725 open (my $fh, ">", $self->{'idfile'}) or
13726 ::die_bug
("timeout_write_idfile: $self->{'idfile'}");
13729 link $self->{'idfile'}, $self->{'pidfile'};
13732 if($opt::semaphoretimeout
< 0
13734 time - $start_time > -$opt::semaphoretimeout
) {
13736 ::warning
("Semaphore timed out. Exiting.");
13742 ::debug
("sem", "acquired $self->{'pid'}\n");
13747 ::rm
($self->{'pidfile'});
13748 if($self->nlinks() == 1) {
13749 # This is the last link, so atomic cleanup
13751 if($self->nlinks() == 1) {
13752 ::rm
($self->{'idfile'});
13753 rmdir $self->{'lockdir'};
13757 ::debug
("run", "released $self->{'pid'}\n");
13760 sub pid_change
($) {
13761 # This should do what release()+acquire() would do without having
13762 # to re-acquire the semaphore
13765 my $old_pidfile = $self->{'pidfile'};
13766 $self->{'pid'} = $$;
13767 $self->{'pidfile'} = $self->{'lockdir'}."/".$$.'@'.::hostname
();
13768 my $retval = link $self->{'idfile'}, $self->{'pidfile'};
13769 ::debug
("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n");
13770 ::rm
($old_pidfile);
13773 sub atomic_link_if_count_less_than
($) {
13774 # Link $file1 to $file2 if nlinks to $file1 < $count
13778 my $nlinks = $self->nlinks();
13779 ::debug
("sem","$nlinks<$self->{'count'} ");
13780 if($nlinks < $self->{'count'}) {
13781 -d
$self->{'lockdir'} or ::mkdir_or_die
($self->{'lockdir'});
13782 if(not -e
$self->{'idfile'}) {
13783 open (my $fh, ">", $self->{'idfile'}) or
13784 ::die_bug
("write_idfile: $self->{'idfile'}");
13787 $retval = link $self->{'idfile'}, $self->{'pidfile'};
13788 ::debug
("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n");
13791 ::debug
("sem", "atomic $retval");
13797 if(-e
$self->{'idfile'}) {
13798 return (stat(_
))[3];
13806 my $sleep = 100; # 100 ms
13807 my $total_sleep = 0;
13808 $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
13810 while(not $locked) {
13811 if(tell($self->{'lockfh'}) == -1) {
13813 open($self->{'lockfh'}, ">", $self->{'lockfile'})
13814 or ::debug("run
", "Cannot
open $self->{'lockfile'}");
13816 if($self->{'lockfh'}) {
13818 chmod 0666, $self->{'lockfile'}; # assuming you want it a+rw
13819 if(flock($self->{'lockfh'}, LOCK_EX()|LOCK_NB())) {
13820 # The file is locked: No need to retry
13824 if ($! =~ m/Function not implemented/) {
13825 ::warning("flock: $!",
13826 "Will
wait for a random
while.");
13827 ::usleep(rand(5000));
13828 # File cannot be locked: No need to retry
13834 # Locking failed in first round
13835 # Sleep and try again
13836 $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep);
13837 # Random to avoid every sleeping job waking up at the same time
13838 ::usleep(rand()*$sleep);
13839 $total_sleep += $sleep;
13840 if($opt::semaphoretimeout) {
13841 if($opt::semaphoretimeout > 0
13843 $total_sleep/1000 > $opt::semaphoretimeout) {
13844 # Timeout: Take the semaphore anyway
13845 ::warning("Semaphore timed out
. Taking the semaphore
.");
13849 if($opt::semaphoretimeout < 0
13851 $total_sleep/1000 > -$opt::semaphoretimeout) {
13853 ::warning("Semaphore timed out
. Exiting
.");
13858 if($total_sleep/1000 > 30) {
13859 ::warning("Semaphore stuck
for 30 seconds
. ".
13860 "Consider using
--semaphoretimeout
.");
13864 ::debug("run
", "locked
$self->{'lockfile'}");
13869 ::rm($self->{'lockfile'});
13870 close $self->{'lockfh'};
13871 ::debug("run
", "unlocked
\n");
13874 # Keep perl -w happy
13876 $opt::x = $Semaphore::timeout = $Semaphore::wait =
13877 $Job::file_descriptor_warning_printed = $Global::envdef = @Arg::arg =
13878 $Global::max_slot_number = $opt::session;
13883 save_stdin_stdout_stderr();
13884 save_original_signal_handler();
13886 ::debug("init
", "Open file descriptors
: ", join(" ",keys %Global::fh), "\n");
13887 my $number_of_args;
13888 if($Global::max_number_of_args) {
13889 $number_of_args = $Global::max_number_of_args;
13890 } elsif ($opt::X or $opt::m or $opt::xargs) {
13891 $number_of_args = undef;
13893 $number_of_args = 1;
13896 my @command = @ARGV;
13897 my @input_source_fh;
13898 if($opt::pipepart) {
13900 @input_source_fh = map { open_or_exit($_) } @opt::a;
13901 # Remove the first: It will be the file piped.
13902 shift @input_source_fh;
13903 if(not @input_source_fh and not $opt::pipe) {
13904 @input_source_fh = (*STDIN);
13907 # -a is used for data - not for command line args
13908 @input_source_fh = map { open_or_exit($_) } "/dev/null
";
13911 @input_source_fh = map { open_or_exit($_) } @opt::a;
13912 if(not @input_source_fh and not $opt::pipe) {
13913 @input_source_fh = (*STDIN);
13917 if($opt::skip_first_line) {
13918 # Skip the first line for the first file handle
13919 my $fh = $input_source_fh[0];
13923 set_input_source_header(\@command,\@input_source_fh);
13924 if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) {
13925 # Parallel check all hosts are up. Remove hosts that are down
13930 if($opt::sqlmaster and $opt::sqlworker) {
13931 # Start a real --sqlworker in the background later
13932 $Global::start_sqlworker = 1;
13933 $opt::sqlworker = undef;
13936 if($opt::nonall or $opt::onall) {
13937 onall(\@input_source_fh,@command);
13938 wait_and_exit(min(undef_as_zero($Global::exitstatus),254));
13941 $Global::JobQueue = JobQueue->new(
13942 \@command, \@input_source_fh, $Global::ContextReplace,
13943 $number_of_args, \@Global::transfer_files, \@Global::ret_files,
13944 \@Global::template_names, \@Global::template_contents
13947 if($opt::sqlmaster) {
13948 # Create SQL table to hold joblog + output
13949 # Figure out how many arguments are in a job
13950 # (It is affected by --colsep, -N, $number_source_fh)
13951 my $record_queue = $Global::JobQueue->{'commandlinequeue'}{'arg_queue'};
13952 my $record = $record_queue->get();
13953 my $no_of_values = $number_of_args * (1+$#{$record});
13954 $record_queue->unget($record);
13955 $Global::sql->create_table($no_of_values);
13956 if($opt::sqlworker) {
13957 # Start a real --sqlworker in the background later
13958 $Global::start_sqlworker = 1;
13959 $opt::sqlworker = undef;
13963 if($opt::pipepart) {
13965 } elsif($opt::pipe and $opt::tee) {
13967 } elsif($opt::pipe and $opt::shard or $opt::bin) {
13968 pipe_shard_setup();
13971 if(not $opt::pipepart and $opt::groupby) {
13972 group_by_stdin_filter();
13974 if($opt::eta or $opt::bar or $opt::shuf or $Global::halt_pct) {
13975 # Count the number of jobs or shuffle all jobs
13976 # before starting any.
13977 # Must be done after ungetting any --pipepart jobs.
13978 $Global::JobQueue->total_jobs();
13980 # Compute $Global::max_jobs_running
13981 # Must be done after ungetting any --pipepart jobs.
13982 max_jobs_running();
13986 if($Global::semaphore) {
13987 $sem = acquire_semaphore();
13989 $SIG{TERM} = $Global::original_sig{TERM};
13990 $SIG{HUP} = \&start_no_new_jobs;
13992 if($opt::tee or $opt::shard or $opt::bin) {
13993 # All jobs must be running in parallel for --tee/--shard/--bin
13994 while(start_more_jobs()) {}
13995 $Global::start_no_new_jobs = 1;
13996 if(not $Global::JobQueue->empty()) {
13998 ::error("--tee requires
--jobs to be higher
. Try
--jobs
0.");
13999 } elsif($opt::bin) {
14000 ::error("--bin requires
--jobs to be higher than the number of
",
14001 "arguments
. Increase
--jobs
.");
14002 } elsif($opt::shard) {
14003 ::error("--shard requires
--jobs to be higher than the number of
",
14004 "arguments
. Increase
--jobs
.");
14006 ::die_bug("--bin
/--shard/--tee should
not get here
");
14008 ::wait_and_exit(255);
14010 } elsif($opt::pipe and not $opt::pipepart) {
14011 # Fill all jobslots
14012 while(start_more_jobs()) {}
14015 # Reap the finished jobs and start more
14016 while(reapers() + start_more_jobs()) {}
14018 ::debug("init
", "Start draining
\n");
14019 drain_job_queue(@command);
14020 ::debug("init
", "Done draining
\n");
14022 ::debug("init
", "Done reaping
\n");
14023 if($Global::semaphore) {
14027 ::debug("init
", "Halt
\n");