3 # Copyright (C) 2007-2024 Ole Tange, http://ole.tange.dk and Free
4 # Software Foundation, Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, see <https://www.gnu.org/licenses/>
18 # or write to the Free Software Foundation, Inc., 51 Franklin St,
19 # Fifth Floor, Boston, MA 02110-1301 USA
21 # SPDX-FileCopyrightText: 2007-2024 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc.
22 # SPDX-License-Identifier: GPL-3.0-or-later
24 # open3 used in Job::start
27 # gensym used in Job::start
28 use Symbol
qw(gensym);
29 # tempfile used in Job::start
30 use File
::Temp
qw(tempfile tempdir);
31 # mkpath used in openresultsfile
33 # GetOptions used in get_options_from_array
35 # Used to ensure code quality
39 sub set_input_source_header
($$) {
40 my ($command_ref,$input_source_fh_ref) = @_;
41 if(defined $opt::header
and not $opt::pipe) {
42 # split with colsep or \t
43 # $header force $colsep = \t if undef?
44 my $delimiter = defined $opt::colsep ?
$opt::colsep
: "\t";
46 my $left = "\Q$Global::parensleft\E";
47 my $l = $Global::parensleft
;
49 my $right = "\Q$Global::parensright\E";
50 my $r = $Global::parensright
;
51 if($opt::header
ne "0") {
53 for my $fh (@
$input_source_fh_ref) {
57 ::debug
("init", "Delimiter: '$delimiter'");
58 for my $s (split /$delimiter/o, $line) {
59 ::debug
("init", "Colname: '$s'");
60 # Replace {colname} with {2}
61 for(@
$command_ref, @Global::ret_files
,
62 @Global::transfer_files
, $opt::tagstring
,
63 $opt::workdir
, $opt::results
, $opt::retries
,
64 @Global::template_contents
, @Global::template_names
,
68 s
:\
{$s(|/|//|\.|/\
.)\
}:\
{$id$1\
}:g
;
69 # {=header1 ... =} => {=1 ... =}
70 s
:$left $s (.*?
) $right:$l$id$1$r:gx
;
72 $Global::input_source_header
{$id} = $s;
77 # Make it possible to do:
78 # parallel --header 0 echo {file2} {file1} :::: file1 file2
81 # ::: are put into files and given a filehandle
82 # ignore these and only keep the filenames.
84 for(@
$command_ref, @Global::ret_files
,
85 @Global::transfer_files
, $opt::tagstring
,
86 $opt::workdir
, $opt::results
, $opt::retries
,
87 @Global::template_contents
, @Global::template_names
,
91 s
:\
{\Q
$s\E
(|/|//|\.|/\
.)\
}:\
{$id$1\
}:g
;
92 # {=header1 ... =} => {=1 ... =}
93 s
:$left \Q
$s\E
(.*?
) $right:$l$id$1$r:gx
;
95 $Global::input_source_header
{$id} = $s;
100 for my $fh (@
$input_source_fh_ref) {
101 $Global::input_source_header
{$id} = $id;
107 sub max_jobs_running
() {
108 # Compute $Global::max_jobs_running as the max number of jobs
109 # running on each sshlogin.
111 # $Global::max_jobs_running
112 if(not $Global::max_jobs_running
) {
113 for my $sshlogin (values %Global::host
) {
114 $sshlogin->max_jobs_running();
117 if(not $Global::max_jobs_running
) {
118 ::error
("Cannot run any jobs.");
121 return $Global::max_jobs_running
;
125 # Compute exit value,
126 # wait for children to complete
128 if($opt::halt
and $Global::halt_when
ne "never") {
129 if(not defined $Global::halt_exitstatus
) {
130 if($Global::halt_pct
) {
131 $Global::halt_exitstatus
=
132 ::ceil
($Global::total_failed
/
133 ($Global::total_started
|| 1) * 100);
134 } elsif($Global::halt_count
) {
135 $Global::halt_exitstatus
=
136 ::min
(undef_as_zero
($Global::total_failed
),101);
139 wait_and_exit
($Global::halt_exitstatus
);
141 if($Global::semaphore
) {
142 # --semaphore runs a single job:
143 # Use exit value of that
144 wait_and_exit
($Global::halt_exitstatus
);
146 # 0 = all jobs succeeded
147 # 1-100 = n jobs failed
148 # 101 = >100 jobs failed
149 wait_and_exit
(min
(undef_as_zero
($Global::exitstatus
),101));
155 sub __PIPE_MODE__
() {}
158 sub pipepart_setup
() {
159 # Compute the blocksize
160 # Generate the commands to extract the blocks
161 # Push the commands on queue
163 # @Global::cat_prepends
166 # Prepend each command with
168 my $cat_string = "< ".Q
($opt::a
[0]);
169 for(1..$Global::JobQueue
->total_jobs()) {
170 push @Global::cat_appends
, $cat_string;
171 push @Global::cat_prepends
, "";
174 if(not $opt::blocksize
) {
175 # --blocksize with 10 jobs per jobslot
176 $opt::blocksize
= -10;
178 if($opt::roundrobin
) {
179 # --blocksize with 1 job per jobslot
180 $opt::blocksize
= -1;
182 if($opt::blocksize
< 0) {
189 $size += size_of_block_dev
($_);
191 ::error
("$_ is neither a file nor a block device");
194 ::error
("File not found: $_");
198 # Run in total $job_slots*(- $blocksize) jobs
199 # Set --blocksize = size / no of proc / (- $blocksize)
200 $Global::dummy_jobs
= 1;
201 $Global::blocksize
= 1 +
202 int($size / max_jobs_running() /
203 -multiply_binary_prefix
($opt::blocksize
));
205 @Global::cat_prepends
= (map { pipe_part_files
($_) }
206 # ::: are put into files and given a filehandle
207 # ignore these and only keep the filenames.
208 grep { ! fileno $_ } @opt::a
);
209 # Unget the empty arg as many times as there are parts
210 $Global::JobQueue
->{'commandlinequeue'}{'arg_queue'}->unget(
211 map { [Arg
->new("\0noarg")] } @Global::cat_prepends
216 sub pipe_tee_setup
() {
217 # Create temporary fifos
218 # Run 'tee fifo1 fifo2 fifo3 ... fifoN' in the background
219 # This will spread the input to fifos
220 # Generate commands that reads from fifo1..N:
221 # cat fifo | user_command
223 # @Global::cat_prepends
225 for(1..$Global::JobQueue
->total_jobs()) {
226 push @fifos, tmpfifo
();
228 # cat foo | tee fifo1 fifo2 fifo3 fifo4 fifo5 > /dev/null
230 # Test if tee supports --output-error=warn-nopipe
231 `echo | tee --output-error=warn-nopipe /dev/null >/dev/null 2>/dev/null`;
232 my $opt = $? ?
"" : "--output-error=warn-nopipe";
233 ::debug
("init","tee $opt");
235 # This is not exactly what is run, but it gives the basic idea
236 print "mkfifo @fifos\n";
237 print "tee $opt @fifos >/dev/null &\n";
239 # Let tee inherit our stdin
240 # and redirect stdout to null
241 open STDOUT
, ">","/dev/null";
243 exec "tee", $opt, @fifos;
251 # (rm fifo1; grep 1) < fifo1
252 # (rm fifo2; grep 2) < fifo2
253 # (rm fifo3; grep 3) < fifo3
254 # Remove the tmpfifo as soon as it is open
255 @Global::cat_prepends
= map { "(rm $_;" } shell_quote
(@fifos);
256 @Global::cat_appends
= map { ") < $_" } shell_quote
(@fifos);
260 sub parcat_script
() {
261 # TODO if script fails: Use parallel -j0 --plain --lb cat ::: fifos
263 use POSIX qw(:errno_h);
268 use Fcntl
qw(:DEFAULT :flock);
271 my $q = Thread
::Queue
->new();
272 my $okq = Thread
::Queue
->new();
278 print " parcat file(s)\n";
279 print " cat argfile | parcat\n";
281 # Read arguments from stdin
282 chomp(@ARGV = <STDIN
>);
285 my $files_to_open = 0;
286 # Default: fd = stdout
289 # --rm = remove file when opened
290 /^--rm$/ and do { $opt::rm
= 1; next; };
291 # -1 = output to fd 1, -2 = output to fd 2
292 /^-(\d+)$/ and do { $fd = $1; next; };
293 push @producers, threads
->create("producer", $_, $fd);
298 # Open a file/fifo, set non blocking, enqueue fileno of the file handle
300 my $output_fd = shift;
301 open(my $fh, "<", $file) || do {
302 print STDERR
"parcat: Cannot open $file: $!\n";
305 # Remove file when it has been opened
309 set_fh_non_blocking
($fh);
311 # Pass the fileno to parent
312 $q->enqueue(fileno($fh),$output_fd);
313 # Get an OK that the $fh is opened and we can release the $fh
315 my $ok = $okq->dequeue();
316 if($ok == fileno($fh)) { last; }
317 # Not ours - very unlikely to happen
323 my $s = IO
::Select
->new();
329 open(my $infh, "<&=", $infd) || die;
330 open(my $outfh, ">&=", $outfd) || die;
332 # Tell the producer now opened here and can be released
333 $okq->enqueue($infd);
334 # Initialize the buffer
335 @
{$buffer{$infh}{$outfd}} = ();
336 $Global::fh
{$outfd} = $outfh;
340 # Non-blocking dequeue
343 ($infd,$outfd) = $q->dequeue_nb(2);
344 if(defined($outfd)) {
345 add_file
($infd,$outfd);
347 } while(defined($outfd));
350 sub add_files_block
{
352 my ($infd,$outfd) = $q->dequeue(2);
353 add_file
($infd,$outfd);
358 my (@ready,$infh,$rv,$buf);
360 # Wait until at least one file is opened
362 while($q->pending or keys %buffer) {
364 while(keys %buffer) {
365 @ready = $s->can_read(0.01);
370 # There is only one key, namely the output file descriptor
371 for my $outfd (keys %{$buffer{$infh}}) {
372 # TODO test if 60800 is optimal (2^17 is used elsewhere)
373 $rv = sysread($infh, $buf, 60800);
376 # Would block: Nothing read
379 # Nothing read, but would not block:
382 for(@
{$buffer{$infh}{$outfd}}) {
383 syswrite($Global::fh
{$outfd},$_);
385 delete $buffer{$infh};
386 # Closing the $infh causes it to block
393 # Find \n or \r for full line
394 my $i = (rindex($buf,"\n")+1);
397 for(@
{$buffer{$infh}{$outfd}}, substr($buf,0,$i)) {
398 syswrite($Global::fh
{$outfd},$_);
400 # @buffer = remaining half line
401 $buffer{$infh}{$outfd} = [substr($buf,$i,$rv-$i)];
403 # Something read, but not a full line
404 push @
{$buffer{$infh}{$outfd}}, $buf;
411 } while($opened < $files_to_open);
417 sub set_fh_non_blocking
{
418 # Set filehandle as non-blocking
420 # $fh = filehandle to be blocking
425 fcntl($fh, &F_GETFL
, $flags) || die $!; # Get the current flags on the filehandle
426 $flags |= &O_NONBLOCK
; # Add non-blocking to the flags
427 fcntl($fh, &F_SETFL
, $flags) || die $!; # Set the flags on the filehandle
430 return ::spacefree(3, $script);
433 sub sharder_script() {
438 # Which columns to shard on (count from 1)
440 # Which columns to shard on (count from 0)
443 my $perlexpr = shift;
445 # Open fifos for writing, fh{0..$bins}
449 open $fh{$t++}, ">", $_;
450 # open blocks until it is opened by reader
451 # so unlink only happens when it is ready
455 my $subref = eval("sub { no strict; no warnings; $perlexpr }");
457 # Split into $col columns (no need to split into more)
458 @F = split $sep, $_, $col+1;
460 local $_ = $F[$col0];
462 $fh = $fh{ hex(B::hash($_))%$bins };
468 # Split into $col columns (no need to split into more)
469 @F = split $sep, $_, $col+1;
470 $fh = $fh{ hex(B::hash($F[$col0]))%$bins };
474 # Close all open fifos
477 return ::spacefree(1, $script);
480 sub binner_script() {
485 # Which columns to shard on (count from 1)
487 # Which columns to shard on (count from 0)
490 my $perlexpr = shift;
492 # Open fifos for writing, fh{0..$bins}
495 # Let the last output fifo be the 0'th
496 open $fh{$t++}, ">", pop @ARGV;
498 open $fh{$t++}, ">", $_;
499 # open blocks until it is opened by reader
500 # so unlink only happens when it is ready
504 my $subref = eval("sub { no strict; no warnings; $perlexpr }");
506 # Split into $col columns (no need to split into more)
507 @F = split $sep, $_, $col+1;
509 local $_ = $F[$col0];
511 $fh = $fh{ $_%$bins };
517 # Split into $col columns (no need to split into more)
518 @F = split $sep, $_, $col+1;
519 $fh = $fh{ $F[$col0]%$bins };
523 # Close all open fifos
526 return ::spacefree
(1, $script);
529 sub pipe_shard_setup
() {
530 # Create temporary fifos
531 # Run 'shard.pl sep col fifo1 fifo2 fifo3 ... fifoN' in the background
532 # This will spread the input to fifos
533 # Generate commands that reads from fifo1..N:
534 # cat fifo | user_command
536 # @Global::cat_prepends
539 # TODO $opt::jobs should be evaluated (100%)
540 # TODO $opt::jobs should be number of total_jobs if there are arguments
542 my $njobs = $Global::max_jobs_running
;
543 for my $m (0..$njobs-1) {
544 for my $n (0..$njobs-1) {
545 # sharding to A B C D
546 # parcatting all As together
547 $parcatfifos[$n][$m] = $shardfifos[$m][$n] = tmpfifo
();
550 my $shardbin = ($opt::shard
|| $opt::bin
);
553 $script = binner_script
();
555 $script = sharder_script
();
558 # cat foo | sharder sep col fifo1 fifo2 fifo3 ... fifoN
560 if($shardbin =~ /^[a-z_][a-z_0-9]*(\s|$)/i) {
561 # Group by column name
562 # (Yes, this will also wrongly match a perlexpr like: chop)
563 my($read,$char,@line);
564 # A full line, but nothing more (the rest must be read by the child)
565 # $Global::header used to prepend block to each job
567 $read = sysread(STDIN
,$char,1);
569 } while($read and $char ne "\n");
570 $Global::header
= join "", @line;
572 my ($col, $perlexpr, $subref) =
573 column_perlexpr
($shardbin, $Global::header
, $opt::colsep
);
575 # Let the sharder inherit our stdin
576 # and redirect stdout to null
577 open STDOUT
, ">","/dev/null";
578 # The PERL_HASH_SEED must be the same for all sharders
579 # so B::hash will return the same value for any given input
580 $ENV{'PERL_HASH_SEED'} = $$;
581 exec qw(parallel -0 --block 100k -q --pipe -j), $njobs,
582 qw(--roundrobin -u perl -e), $script, ($opt::colsep
|| ","),
583 $col, $perlexpr, '{}', (map { (':::+', @
{$_}) } @parcatfifos);
586 # (rm fifo1; grep 1) < fifo1
587 # (rm fifo2; grep 2) < fifo2
588 # (rm fifo3; grep 3) < fifo3
589 my $parcat = Q
(parcat_script
());
591 ::error
("'parcat' must be in path.");
592 ::wait_and_exit
(255);
594 @Global::cat_prepends
=
595 map { "perl -e $parcat ".
596 join(" ",shell_quote
(@
$_))." | "} @parcatfifos;
599 sub pipe_part_files
(@
) {
601 # - find header and split positions
602 # - make commands that 'cat's the partial file
604 # $file = the file to read
606 # @commands that will cat_partial each part
609 if(not -f
$file and not -b
$file) {
610 ::error
("--pipepart only works on seekable files, not streams/pipes.",
611 "$file is not a seekable file.");
612 ::wait_and_exit
(255);
615 my $fh = open_or_exit
("<",$file);
616 my $firstlinelen = 0;
617 if($opt::skip_first_line
) {
619 # Read a full line one byte at a time
620 while($firstlinelen += sysread($fh,$newline,1,0)) {
621 $newline eq "\n" and last;
624 my $header = find_header
(\
$buf,$fh);
626 my @pos = find_split_positions
($file,int($Global::blocksize
),
627 $header,$firstlinelen);
629 my @cat_prepends = ();
630 for(my $i=0; $i<$#pos; $i++) {
632 cat_partial
($file, $firstlinelen, $firstlinelen+length($header),
633 $pos[$i], $pos[$i+1]));
635 return @cat_prepends;
638 sub find_header
($$) {
639 # Compute the header based on $opt::header
641 # $buf_ref = reference to read-in buffer
642 # $fh = filehandle to read from
649 my ($buf_ref, $fh) = @_;
651 # $Global::header may be set in group_by_loop()
652 if($Global::header
) { return $Global::header
}
654 if($opt::header
eq ":") { $opt::header
= "(.*\n)"; }
655 # Number = number of lines
656 $opt::header
=~ s/^(\d+)$/"(.*\n)"x$1/e;
657 while(sysread($fh,$$buf_ref,int($Global::blocksize
),length $$buf_ref)) {
658 if($$buf_ref =~ s/^($opt::header)//) {
667 sub find_split_positions
($$$) {
668 # Find positions in bigfile where recend is followed by recstart
670 # $file = the file to read
671 # $block = (minimal) --block-size of each chunk
672 # $header = header to be skipped
677 # @positions of block start/end
678 my($file, $block, $header, $firstlinelen) = @_;
679 my $skiplen = $firstlinelen + length $header;
682 # $file is a blockdevice
683 $size = size_of_block_dev
($file);
687 return split_positions_for_group_by
($file,$size,$block,
688 $header,$firstlinelen);
690 # The optimal dd blocksize for mint, redhat, solaris, openbsd = 2^17..2^20
691 # The optimal dd blocksize for freebsd = 2^15..2^17
692 # The optimal dd blocksize for ubuntu (AMD6376) = 2^16
693 my $dd_block_size = 131072; # 2^17
695 my ($recstart,$recend) = recstartrecend
();
696 my $recendrecstart = $recend.$recstart;
697 my $fh = ::open_or_exit
("<",$file);
699 for(my $pos = $block+$skiplen; $pos < $size; $pos += $block) {
701 if($recendrecstart eq "") {
702 # records ends anywhere
705 # Seek the the block start
706 if(not sysseek($fh, $pos, 0)) {
707 ::error
("Cannot seek to $pos in $file");
710 while(sysread($fh,$buf,$dd_block_size,length $buf)) {
712 # If match /$recend$recstart/ => Record position
713 if($buf =~ m
:^(.*$recend)$recstart:os
) {
714 # Start looking for next record _after_ this match
720 # If match $recend$recstart => Record position
721 # TODO optimize to only look at the appended
722 # $dd_block_size + len $recendrecstart
723 # TODO increase $dd_block_size to optimize for longer records
724 my $i = index64
(\
$buf,$recendrecstart);
726 # Start looking for next record _after_ this match
727 $pos += $i + length($recend);
735 if($pos[$#pos] != $size) {
736 # Last splitpoint was not at end of the file: add $size as the last
743 sub split_positions_for_group_by
($$$$) {
748 if(not defined $value{$pos}) {
750 seek($fh, $pos-1, 0) || die;
755 my $linepos = tell($fh);
756 if(not defined $value{$linepos}) {
761 if(defined $group_by::col
) {
762 $opt::colsep
||= "\t";
763 @F = split /$opt::colsep/, $_;
764 $_ = $F[$group_by::col
];
766 eval $group_by::perlexpr
;
768 $value{$linepos} = [$_,$linepos];
770 $value{$pos} = $value{$linepos};
772 return (@
{$value{$pos}});
775 sub binary_search_end
($$$) {
776 my ($s,$spos,$epos) = @_;
777 # value_at($spos) == $s
778 # value_at($epos) != $s
779 my $posdif = $epos - $spos;
782 ($v,$vpos) = value_at
($spos+$posdif);
785 $posdif = $epos - $spos;
789 $posdif = int($posdif/2);
794 sub binary_search_start
($$$) {
795 my ($s,$spos,$epos) = @_;
796 # value_at($spos) != $s
797 # value_at($epos) == $s
798 my $posdif = $epos - $spos;
801 ($v,$vpos) = value_at
($spos+$posdif);
806 $posdif = $epos - $spos;
808 $posdif = int($posdif/2);
813 my ($file,$size,$block,$header,$firstlinelen) = @_;
815 $fh = open_or_exit
("<",$file);
816 # Set $Global::group_by_column $Global::group_by_perlexpr
817 group_by_loop
($fh,$opt::recsep
);
819 # Split after n values
821 # $xpos = linestart, $x = value at $xpos
822 $apos = $firstlinelen + length $header;
823 for(($a,$apos) = value_at
($apos); $apos < $size;) {
825 ($a,$apos) = binary_search_end
($a,$apos,$size);
827 push @pos, $size; last;
830 # @pos = start of every value
832 # -nX = keep every X'th position
834 @pos = grep { not ($i++ % $opt::max_args
) } @pos;
836 # Split after any value group
837 # Preferable < $blocksize
838 my ($a,$b,$c,$apos,$bpos,$cpos);
839 # $xpos = linestart, $x = value at $xpos, $apos < $bpos < $cpos
840 $apos = $firstlinelen + length $header;
841 for(($a,$apos) = value_at
($apos); $apos < $size;) {
843 $bpos = $apos + $block;
844 ($b,$bpos) = value_at
($bpos);
846 # EOF is less than 1 block away
847 push @pos, $size; last;
849 $cpos = $bpos + $block;
850 ($c,$cpos) = value_at
($cpos);
853 # Move bpos, cpos a block forward until $a == $b != $c
856 ($c,$cpos) = value_at
($cpos);
863 # Binary search for $b ending between ($bpos,$cpos)
864 ($b,$bpos) = binary_search_end
($b,$bpos,$cpos);
868 # Binary search for $b starting between ($apos,$bpos)
869 ($b,$bpos) = binary_search_start
($b,$apos,$bpos);
872 # Binary search for $b ending between ($bpos,$cpos)
873 ($b,$bpos) = binary_search_end
($b,$bpos,$cpos);
876 ($a,$apos) = ($b,$bpos);
879 if($pos[$#pos] != $size) {
880 # Last splitpoint was not at end of the file: add it
886 sub cat_partial
($@
) {
887 # Efficient command to copy from byte X to byte Y
889 # $file = the file to read
890 # ($start, $end, [$start2, $end2, ...]) = start byte, end byte
892 # Efficient command to copy $start..$end, $start2..$end2, ... to stdout
893 my($file, @start_end) = @_;
895 # Convert (start,end) to (start,len)
896 my @start_len = map {
897 if(++$i % 2) { $start = $_; } else { $_-$start }
899 # The optimal block size differs
900 # It has been measured on:
901 # AMD 6376: n*4k-1; small n
902 # AMD Neo N36L: 44k-200k
903 # Intel i7-3632QM: 55k-
904 # ARM Cortex A53: 4k-28k
905 # Intel i5-2410M: 36k-46k
907 # I choose 2^15-1 = 32767
912 # $first = shift || 1;
913 # $inc = shift || 1.03;
914 # for($i=$first; $i<=$last;$i*=$inc) { say int $i }
918 # seq 111111111 > big;
919 # f() { ppar --test $1 -a big --pipepart --block -1 'md5sum > /dev/null'; }
921 # expseq 1000 1.001 300000 | shuf | parallel -j1 --jl jl-md5sum f;
923 my $script = spacefree
927 sysseek(STDIN,shift,0) || die;
930 sysread(STDIN,$buf, $left > 32767 ? 32767 : $left)){
932 syswrite(STDOUT,$buf);
936 return "<". Q($file) .
937 " perl -e '$script' @start_len |";
940 sub column_perlexpr($$$) {
941 # Compute the column number (if any), perlexpression from combined
942 # string (such as --shard key, --groupby key, {=n perlexpr=}
944 # $column_perlexpr = string with column and perl expression
945 # $header = header from input file (if column is column name)
946 # $colsep = column separator regexp
948 # $col = column number
949 # $perlexpr = perl expression
950 # $subref = compiled perl expression as sub reference
951 my ($column_perlexpr, $header, $colsep) = @_;
952 my ($col, $perlexpr, $subref);
953 if($column_perlexpr =~ /^[-a-z0-9_]+(\s|$)/i) {
954 # Column name/number (possibly prefix)
955 if($column_perlexpr =~ s/^(-?\d+)(\s|$)//) {
956 # Column number (possibly prefix)
958 } elsif($column_perlexpr =~ s/^([a-z0-9_]+)(\s+|$)//i) {
959 # Column name (possibly prefix)
961 # Split on --copsep pattern
962 my @headers = split /$colsep/, $header;
964 @headers{@headers} = (1..($#headers+1));
965 $col = $headers{$colname};
966 if(not defined $col) {
967 ::error("Column '$colname' $colsep not found in header",keys %headers);
968 ::wait_and_exit(255);
972 # What is left of $column_perlexpr is $perlexpr (possibly empty)
973 $perlexpr = $column_perlexpr;
974 $subref = eval("sub { no strict; no warnings; $perlexpr }");
975 return($col, $perlexpr, $subref);
978 sub group_by_loop($$) {
979 # Generate perl code for group-by loop
980 # Insert a $recsep when the column value changes
981 # The column value can be computed with $perlexpr
982 my($fh,$recsep) = @_;
983 my $groupby = $opt::groupby;
984 if($groupby =~ /^[a-z_][a-z_0-9]*(\s|$)/i) {
985 # Group by column name
986 # (Yes, this will also wrongly match a perlexpr like: chop)
987 my($read,$char,@line);
988 # Read a full line, but nothing more
989 # (the rest must be read by the child)
990 # $Global::header used to prepend block to each job
992 $read = sysread($fh,$char,1);
994 } while($read and $char ne "\n");
995 $Global::header = join "", @line;
997 $opt::colsep ||= "\t";
998 ($group_by::col, $group_by::perlexpr, $group_by::subref) =
999 column_perlexpr($groupby, $Global::header, $opt::colsep);
1000 # Numbered 0..n-1 due to being used by $F[n]
1001 if($group_by::col) { $group_by::col--; }
1003 my $loop = ::spacefree(0,q{
1004 BEGIN{ $last = "RECSEP"; }
1014 if(defined $group_by::col) {
1015 $loop =~ s/COLVALUE/\$F[$group_by::col]/g;
1017 $loop =~ s/COLVALUE/\$_/g;
1019 $loop =~ s/PERLEXPR/$group_by::perlexpr/g;
1020 $loop =~ s/RECSEP/$recsep/g;
1024 sub pipe_group_by_setup() {
1025 # Record separator with 119 bit random value
1028 join "", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..20);
1029 $opt::remove_rec_sep = 1;
1031 push @filter, "perl";
1032 if($opt::groupby =~ /^[a-z0-9_]+(\s|$)/i) {
1033 # This is column number/name
1034 # Use -a (auto-split)
1036 $opt::colsep ||= "\t";
1037 my $sep = $opt::colsep;
1040 # man perlrun: -Fpattern [...] You can't use literal whitespace
1041 $sep =~ s/ /\\040/g;
1042 push @filter, "-F$sep";
1044 push @filter, "-pe";
1045 push @filter, group_by_loop(*STDIN,$opt::recstart);
1046 ::debug("init", "@filter\n");
1047 open(STDIN, '-|', @filter) || die ("Cannot start @filter");
1048 if(which("mbuffer")) {
1049 # You get a speed up of 30% by going through mbuffer
1050 open(STDIN, '-|', "mbuffer", "-q","-m6M","-b5") ||
1051 die ("Cannot start mbuffer");
1057 # Spawn a job and print the record to it.
1059 # $Global::blocksize
1062 # $Global::max_lines
1063 # $Global::max_number_of_args
1065 # $Global::start_no_new_jobs
1071 my ($recstart,$recend) = recstartrecend();
1072 my $recendrecstart = $recend.$recstart;
1073 my $chunk_number = 1;
1074 my $one_time_through;
1075 my $two_gb = 2**31-1;
1076 my $blocksize = int($Global::blocksize);
1078 my $timeout = $Global::blocktimeout;
1080 if($opt::skip_first_line) {
1082 # Read a full line one byte at a time
1083 while(sysread($in,$newline,1,0)) {
1084 $newline eq "\n" and last;
1087 my $header = find_header(\$buf,$in);
1088 my $anything_written;
1093 # Read a --blocksize from STDIN
1094 # possibly interrupted by --blocktimeout
1095 # Add up to the next full block
1096 my $readsize = $blocksize - (length $buf) % $blocksize;
1099 local $SIG{ALRM} = sub { die "alarm\n" }; # NB: \n required
1100 # --blocktimeout (or 0 if not set)
1104 $nread = sysread $in, $buf, $readsize, length $buf;
1105 $readsize -= $nread;
1106 } while($readsize and $nread);
1108 # Less efficient reading, but 32-bit sysread compatible
1110 $nread = sysread($in,substr($buf,length $buf,0),$readsize,0);
1111 $readsize -= $nread;
1112 } while($readsize and $nread);
1117 die unless $@ eq "alarm\n"; # propagate unexpected errors
1122 $eof = not ($nread or $alarm);
1125 sub pass_n_line_records() {
1126 # Pass records of N lines
1127 my $n_lines = $buf =~ tr/\n/\n/;
1128 my $last_newline_pos = rindex64(\$buf,"\n");
1129 # Go backwards until there are full n-line records
1130 while($n_lines % $Global::max_lines) {
1132 $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1);
1134 # Chop at $last_newline_pos as that is where n-line record ends
1135 $anything_written +=
1136 write_record_to_pipe($chunk_number++,\$header,\$buf,
1137 $recstart,$recend,$last_newline_pos+1);
1138 shorten(\$buf,$last_newline_pos+1);
1141 sub pass_n_regexps() {
1142 # Pass records of N regexps
1143 # -N => (start..*?end){n}
1144 # -L -N => (start..*?end){n*l}
1145 if(not $garbage_read) {
1147 if($buf !~ /^$recstart/o) {
1148 # Buf does not start with $recstart => There is garbage.
1149 # Make a single record of the garbage
1152 (?:(?:(?!$recend$recstart)(?s:.))*?$recend)
1154 # Followed by recstart
1155 (?=$recstart)/mox and length $1 > 0) {
1156 $anything_written +=
1157 write_record_to_pipe($chunk_number++,\$header,\$buf,
1158 $recstart,$recend,length $1);
1159 shorten(\$buf,length $1);
1165 $Global::max_number_of_args * ($Global::max_lines || 1);
1166 # (?!negative lookahead) is needed to avoid backtracking
1167 # See: https://unix.stackexchange.com/questions/439356/
1168 # (?s:.) = (.|[\n]) but faster
1171 # n more times recstart.*recend
1172 (?:$recstart(?:(?!$recend$recstart)(?s:.))*?$recend){$n_records}
1174 # Followed by recstart
1175 (?=$recstart)/mox and length $1 > 0) {
1176 $anything_written +=
1177 write_record_to_pipe($chunk_number++,\$header,\$buf,
1178 $recstart,$recend,length $1);
1179 shorten(\$buf,length $1);
1184 # Find the last recend-recstart in $buf
1186 # (?s:.) = (.|[\n]) but faster
1187 if($buf =~ /^((?s:.)*$recend)$recstart(?s:.)*?$/mox) {
1188 $anything_written +=
1189 write_record_to_pipe($chunk_number++,\$header,\$buf,
1190 $recstart,$recend,length $1);
1191 shorten(\$buf,length $1);
1195 sub pass_csv_record() {
1197 # We define a CSV record as an even number of " + end of line
1198 # This works if you use " as quoting character
1199 my $last_newline_pos = length $buf;
1200 # Go backwards from the last \n and search for a position
1201 # where there is an even number of "
1204 $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1);
1206 } while((substr($buf,0,$last_newline_pos) =~ y/"/"/)%2
1207 and $last_newline_pos >= 0);
1208 # Chop at $last_newline_pos as that is where CSV record ends
1209 $anything_written +=
1210 write_record_to_pipe($chunk_number++,\$header,\$buf,
1211 $recstart,$recend,$last_newline_pos+1);
1212 shorten(\$buf,$last_newline_pos+1);
1216 # Pass n records of --recend/--recstart
1217 # -N => (start..*?end){n}
1220 $Global::max_number_of_args * ($Global::max_lines || 1);
1221 while(($i = nindex(\$buf,$recendrecstart,$read_n_lines)) != -1
1224 $i += length $recend; # find the actual splitting location
1225 $anything_written +=
1226 write_record_to_pipe($chunk_number++,\$header,\$buf,
1227 $recstart,$recend,$i);
1233 # Pass records of --recend/--recstart
1234 # Split record at fixed string
1235 # Find the last recend+recstart in $buf
1237 my $i = rindex64(\$buf,$recendrecstart);
1239 $i += length $recend; # find the actual splitting location
1240 $anything_written +=
1241 write_record_to_pipe($chunk_number++,\$header,\$buf,
1242 $recstart,$recend,$i);
1247 sub increase_blocksize_maybe() {
1248 if(not $anything_written
1249 and not $opt::blocktimeout
1250 and not $Global::no_autoexpand_block) {
1251 # Nothing was written - maybe the block size < record size?
1252 # Increase blocksize exponentially up to 2GB-1 (2GB causes problems)
1253 if($blocksize < $two_gb) {
1254 my $old_blocksize = $blocksize;
1255 $blocksize = ::min(ceil($blocksize * 1.3 + 1), $two_gb);
1256 ::warning("A record was longer than $old_blocksize. " .
1257 "Increasing to --blocksize $blocksize.");
1263 $anything_written = 0;
1266 # Remove empty lines
1267 $buf =~ s/^\s*\n//gm;
1268 if(length $buf == 0) {
1276 if($Global::max_lines and not $Global::max_number_of_args) {
1277 # Pass n-line records
1278 pass_n_line_records();
1279 } elsif($opt::csv) {
1280 # Pass a full CSV record
1282 } elsif($opt::regexp) {
1283 # Split record at regexp
1284 if($Global::max_number_of_args) {
1290 # Pass normal --recend/--recstart record
1291 if($Global::max_number_of_args) {
1298 increase_blocksize_maybe();
1299 ::debug("init", "Round\n");
1301 ::debug("init", "Done reading input\n");
1303 # If there is anything left in the buffer write it
1304 write_record_to_pipe($chunk_number++, \$header, \$buf, $recstart,
1305 $recend, length $buf);
1308 $Global::no_more_input = 1;
1309 # We need to start no more jobs: At most we need to retry some
1310 # of the already running.
1311 my @running = values %Global::running;
1313 for my $job (@running) {
1314 if(defined $job and $job->virgin()) {
1315 close $job->fh(0,"w");
1318 # Wait for running jobs to be done
1320 while($Global::total_running > 0) {
1321 $sleep = ::reap_usleep($sleep);
1325 $Global::start_no_new_jobs ||= 1;
1326 if($opt::roundrobin) {
1327 # Flush blocks to roundrobin procs
1329 while(%Global::running) {
1330 my $something_written = 0;
1331 for my $job (values %Global::running) {
1332 if($job->block_length()) {
1333 $something_written += $job->non_blocking_write();
1335 close $job->fh(0,"w");
1338 if($something_written) {
1339 $sleep = $sleep/2+0.001;
1341 $sleep = ::reap_usleep($sleep);
1346 sub recstartrecend() {
1351 # $recstart,$recend with default values and regexp conversion
1352 my($recstart,$recend);
1353 if(defined($opt::recstart) and defined($opt::recend)) {
1354 # If both --recstart and --recend is given then both must match
1355 $recstart = $opt::recstart;
1356 $recend = $opt::recend;
1357 } elsif(defined($opt::recstart)) {
1358 # If --recstart is given it must match start of record
1359 $recstart = $opt::recstart;
1361 } elsif(defined($opt::recend)) {
1362 # If --recend is given then it must match end of record
1364 $recend = $opt::recend;
1365 if($opt::regexp and $recend eq '') {
1366 # --regexp --recend ''
1372 # Do not allow /x comments - to avoid having to quote space
1373 $recstart = "(?-x:".$recstart.")";
1374 $recend = "(?-x:".$recend.")";
1375 # If $recstart/$recend contains '|'
1376 # the | should only apply to the regexp
1377 $recstart = "(?:".$recstart.")";
1378 $recend = "(?:".$recend.")";
1380 # $recstart/$recend = printf strings (\n)
1381 $recstart =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee;
1382 $recend =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee;
1384 return ($recstart,$recend);
1388 # See if string is in buffer N times
1390 # the position where the Nth copy is found
1391 my ($buf_ref, $str, $n) = @_;
1394 $i = index64($buf_ref,$str,$i+1);
1395 if($i == -1) { last }
1404 sub round_robin_write($$$$$) {
1406 # $header_ref = ref to $header string
1407 # $block_ref = ref to $block to be written
1408 # $recstart = record start string
1409 # $recend = record end string
1410 # $endpos = end position of $block
1414 # $something_written = amount of bytes written
1415 my ($header_ref,$buffer_ref,$recstart,$recend,$endpos) = @_;
1417 my $block_passed = 0;
1418 while(not $block_passed) {
1419 # Continue flushing existing buffers
1420 # until one is empty and a new block is passed
1422 # Rotate queue once so new blocks get a fair chance
1423 # to be given to another slot
1424 push @robin_queue, shift @robin_queue;
1426 # Make a queue to spread the blocks evenly
1427 push @robin_queue, (sort { $a->seq() <=> $b->seq() }
1428 values %Global::running);
1432 for my $job (@robin_queue) {
1433 if($job->block_length() > 0) {
1434 $written += $job->non_blocking_write();
1436 $job->set_block($header_ref, $buffer_ref,
1437 $endpos, $recstart, $recend);
1439 $written += $job->non_blocking_write();
1444 $sleep = $sleep/1.5+0.001;
1446 # Don't sleep if something is written
1447 } while($written and not $block_passed);
1448 $sleep = ::reap_usleep($sleep);
1455 # Do index on strings > 2GB.
1456 # index in Perl < v5.22 does not work for > 2GB
1458 # as index except STR which must be passed as a reference
1463 my $pos = shift || 0;
1464 my $max2gb = 2**31-1;
1465 my $strlen = length($$ref);
1466 # No point in doing extra work if we don't need to.
1467 if($strlen < $max2gb or $] > 5.022) {
1468 return index($$ref, $match, $pos);
1471 my $matchlen = length($match);
1474 while($offset < $strlen) {
1476 substr($$ref, $offset, $max2gb),
1477 $match, $pos-$offset);
1479 return $ret + $offset;
1481 $offset += ($max2gb - $matchlen - 1);
1487 # Do rindex on strings > 2GB.
1488 # rindex in Perl < v5.22 does not work for > 2GB
1490 # as rindex except STR which must be passed as a reference
1496 my $block_size = 2**31-1;
1497 my $strlen = length($$ref);
1498 # Default: search from end
1499 $pos = defined $pos ? $pos : $strlen;
1500 # No point in doing extra work if we don't need to.
1501 if($strlen < $block_size or $] > 5.022) {
1502 return rindex($$ref, $match, $pos);
1505 my $matchlen = length($match);
1507 my $offset = $pos - $block_size + $matchlen;
1509 # The offset is less than a $block_size
1510 # Set the $offset to 0 and
1511 # Adjust block_size accordingly
1512 $block_size = $block_size + $offset;
1515 while($offset >= 0) {
1517 substr($$ref, $offset, $block_size),
1520 return $ret + $offset;
1522 $offset -= ($block_size - $matchlen - 1);
1528 # Do: substr($buf,0,$i) = "";
1529 # Some Perl versions do not support $i > 2GB, so do this in 2GB chunks
1532 # $i = position to shorten to
1534 my ($buf_ref, $i) = @_;
1535 my $two_gb = 2**31-1;
1536 while($i > $two_gb) {
1537 substr($$buf_ref,0,$two_gb) = "";
1540 substr($$buf_ref,0,$i) = "";
1543 sub write_record_to_pipe($$$$$$) {
1545 # Write record from pos 0 .. $endpos to pipe
1547 # $chunk_number = sequence number - to see if already run
1548 # $header_ref = reference to header string to prepend
1549 # $buffer_ref = reference to record to write
1550 # $recstart = start string of record
1551 # $recend = end string of record
1552 # $endpos = position in $buffer_ref where record ends
1554 # $Global::job_already_run
1556 # @Global::virgin_jobs
1558 # Number of chunks written (0 or 1)
1559 my ($chunk_number, $header_ref, $buffer_ref,
1560 $recstart, $recend, $endpos) = @_;
1561 if($endpos == 0) { return 0; }
1562 if(vec($Global::job_already_run,$chunk_number,1)) { return 1; }
1563 if($opt::roundrobin) {
1564 # Write the block to one of the already running jobs
1565 return round_robin_write($header_ref, $buffer_ref,
1566 $recstart, $recend, $endpos);
1568 # If no virgin found, backoff
1569 my $sleep = 0.0001; # 0.01 ms - better performance on highend
1570 while(not @Global::virgin_jobs) {
1571 ::debug("pipe", "No virgin jobs");
1572 $sleep = ::reap_usleep($sleep);
1573 # Jobs may not be started because of loadavg
1574 # or too little time between each ssh login
1575 # or retrying failed jobs.
1578 my $job = shift @Global::virgin_jobs;
1579 $job->set_block($header_ref, $buffer_ref, $endpos, $recstart, $recend);
1580 $job->write_block();
1585 sub __SEM_MODE__() {}
1588 sub acquire_semaphore() {
1589 # Acquires semaphore. If needed: spawns to the background
1593 # The semaphore to be released when jobs is complete
1594 $Global::host{':'} = SSHLogin->new(":");
1595 my $sem = Semaphore->new($Semaphore::name,
1596 $Global::host{':'}->max_jobs_running());
1598 if($Semaphore::fg) {
1604 # If run in the background, the PID will change
1612 sub __PARSE_OPTIONS__() {}
1614 sub shell_completion() {
1615 if($opt::shellcompletion eq "zsh") {
1618 } elsif($opt::shellcompletion eq "bash") {
1621 } elsif($opt::shellcompletion eq "auto") {
1622 if($Global::shell =~ m:/zsh$|^zsh$:) {
1625 } elsif($Global::shell =~ m:/bash$|^bash$:) {
1629 ::error("--shellcompletion is not implemented for ".
1630 "'$Global::shell'.");
1634 ::error("--shellcompletion is not implemented for ".
1635 "'$opt::shellcompletion'.");
1640 sub bash_competion() {
1642 # complete -F _comp_parallel parallel;
1643 # _comp_parallel() {
1644 # COMPREPLY=($(compgen -W "--options" --
1645 # "${COMP_WORDS[$COMP_CWORD]}"));
1647 my @bash_completion =
1648 ("complete -F _comp_parallel parallel;",
1649 '_comp_parallel() { COMPREPLY=($(compgen -W "');
1650 my @och = options_completion_hash();
1654 # "joblog|jl=s[Logfile for executed jobs]:logfile:_files"
1655 if(/^(.*?)(\[.*?])?(:[^:]*)?(:.*)?$/) {
1663 # internal options start with --_
1666 push @bash_completion,
1667 (map { (length $_ == 1) ? "-$_ " : "--$_ " }
1673 push @bash_completion,'" -- "${COMP_WORDS[$COMP_CWORD]}")); };'."\n";
1674 print @bash_completion;
1677 sub zsh_competion() {
1678 # Print code used for completion in zsh
1679 my @zsh_completion =
1680 ("compdef _comp_parallel parallel; ",
1681 "setopt localoptions extended_glob; ",
1682 "_comp_parallel() { ",
1684 my @och = options_completion_hash();
1688 # "joblog|jl=s[Logfile for executed jobs]:logfile:_files"
1689 if(/^(.*?)(\[.*?])?(:[^:]*)?(:.*)?$/) {
1697 # internal options start with --_
1701 my $zsh_opt = join(",",
1702 (map { (length $_ == 1) ? "-$_" : "--$_" }
1704 if($zsh_opt =~ /,/) { $zsh_opt = "{$zsh_opt}"; }
1705 $desc =~ s/'/'"'"'/g;
1706 $argdesc =~ s/'/'"'"'/g;
1707 $func =~ s/'/'"'"'/g;
1708 push @zsh_completion, $zsh_opt."'".$desc.$argdesc.$func."' ";
1713 push @zsh_completion,
1714 q{'(-)1:command:{_command_names -e}' },
1715 q{'*::arguments
:_normal
'},
1717 print @zsh_completion;
1720 sub options_hash() {
1722 # %hash = for GetOptions
1723 my %och = options_completion_hash();
1726 while(($k,$v) = each %och) {
1727 # Remove description
1734 sub options_completion_hash() {
1736 # %hash = for GetOptions and shell completion
1738 ("debug|D=s" => \$opt::D,
1739 "xargs[Insert as many arguments as the command line length permits]"
1741 "m[Multiple arguments]" => \$opt::m,
1742 ("X[Insert as many arguments with context as the command line ".
1745 "v[Verbose]" => \@opt::v,
1746 "sql=s[Use --sql-master instead (obsolete)]:DBURL" => \$opt::retired,
1747 ("sql-master|sqlmaster=s".
1748 "[Submit jobs via SQL server. DBURL must point to a table, which ".
1749 "will contain --joblog, the values, and output]:DBURL"
1750 => \$opt::sqlmaster),
1751 ("sql-worker|sqlworker=s".
1752 "[Execute jobs via SQL server. Read the input sources variables ".
1753 "from the table pointed to by DBURL.]:DBURL"
1754 => \$opt::sqlworker),
1755 ("sql-and-worker|sqlandworker=s".
1756 "[--sql-master DBURL --sql-worker DBURL]:DBURL"
1757 => \$opt::sqlandworker),
1758 ("joblog|jl=s[Logfile for executed jobs]:logfile:_files"
1760 ("results|result|res=s[Save the output into files]:name:_files"
1762 "resume[Resumes from the last unfinished job]" => \$opt::resume,
1763 ("resume-failed|resumefailed".
1764 "[Retry all failed and resume from the last unfinished job]"
1765 => \$opt::resume_failed),
1766 ("retry-failed|retryfailed[Retry all failed jobs in joblog]"
1767 => \$opt::retry_failed),
1768 "silent[Silent]" => \$opt::silent,
1769 ("keep-order|keeporder|k".
1770 "[Keep sequence of output same as the order of input]"
1771 => \$opt::keeporder),
1772 ("no-keep-order|nokeeporder|nok|no-k".
1773 "[Overrides an earlier --keep-order (e.g. if set in ".
1774 "~/.parallel/config)]"
1775 => \$opt::nokeeporder),
1776 "group[Group output]" => \$opt::group,
1777 "g" => \$opt::retired,
1779 "[Output is printed as soon as possible and bypasses GNU parallel ".
1780 "internal processing]"
1782 ("latest-line|latestline|ll".
1783 "[Print latest line of each job]"
1784 => \$opt::latestline),
1785 ("line-buffer|line-buffered|linebuffer|linebuffered|lb".
1786 "[Buffer output on line basis]"
1787 => \$opt::linebuffer),
1789 "[Use tmux for output. Start a tmux session and run each job in a ".
1790 "window in that session. No other output will be produced]"
1792 ("tmux-pane|tmuxpane".
1793 "[Use tmux for output but put output into panes in the first ".
1794 "window. Useful if you want to monitor the progress of less than ".
1795 "100 concurrent jobs]"
1796 => \$opt::tmuxpane),
1797 "null|0[Use NUL as delimiter]" => \$opt::null,
1798 "quote|q[Quote command]" => \$opt::quote,
1799 # Replacement strings
1800 ("parens=s[Use parensstring instead of {==}]:parensstring"
1802 ('rpl
=s
[Define replacement string
]:"tag perl expression"'
1804 "plus[Add more replacement strings]" => \$opt::plus,
1806 "[Use the replacement string replace-str instead of {}]:replace-str"
1808 ("extensionreplace|er=s".
1809 "[Use the replacement string replace-str instead of {.} for input ".
1810 "line without extension]:replace-str"
1812 "U=s" => \$opt::retired,
1813 ("basenamereplace|bnr=s".
1814 "[Use the replacement string replace-str instead of {/} for ".
1815 "basename of input line]:replace-str"
1816 => \$opt::basenamereplace),
1817 ("dirnamereplace|dnr=s".
1818 "[Use the replacement string replace-str instead of {//} for ".
1819 "dirname of input line]:replace-str"
1820 => \$opt::dirnamereplace),
1821 ("basenameextensionreplace|bner=s".
1822 "[Use the replacement string replace-str instead of {/.} for ".
1823 "basename of input line without extension]:replace-str"
1824 => \$opt::basenameextensionreplace),
1826 "[Use the replacement string replace-str instead of {#} for job ".
1827 "sequence number]:replace-str"
1828 => \$opt::seqreplace),
1830 "[Use the replacement string replace-str instead of {%} for job ".
1831 "slot number]:replace-str"
1832 => \$opt::slotreplace),
1834 "[Delay starting next job by duration]:duration" => \$opt::delay),
1835 ("ssh-delay|sshdelay=f".
1836 "[Delay starting next ssh by duration]:duration"
1837 => \$opt::sshdelay),
1839 "[Only start jobs if load is less than max-load]:max-load"
1841 "noswap[Do not start job is computer is swapping]" => \$opt::noswap,
1842 ("max-line-length-allowed|maxlinelengthallowed".
1843 "[Print maximal command line length]"
1844 => \$opt::max_line_length_allowed),
1845 ("number-of-cpus|numberofcpus".
1846 "[Print the number of physical CPU cores and exit (obsolete)]"
1847 => \$opt::number_of_cpus),
1848 ("number-of-sockets|numberofsockets".
1849 "[Print the number of CPU sockets and exit]"
1850 => \$opt::number_of_sockets),
1851 ("number-of-cores|numberofcores".
1852 "[Print the number of physical CPU cores and exit]"
1853 => \$opt::number_of_cores),
1854 ("number-of-threads|numberofthreads".
1855 "[Print the number of hyperthreaded CPU cores and exit]"
1856 => \$opt::number_of_threads),
1857 ("use-sockets-instead-of-threads|usesocketsinsteadofthreads".
1858 "[Determine how GNU Parallel counts the number of CPUs]"
1859 => \$opt::use_sockets_instead_of_threads),
1860 ("use-cores-instead-of-threads|usecoresinsteadofthreads".
1861 "[Determine how GNU Parallel counts the number of CPUs]"
1862 => \$opt::use_cores_instead_of_threads),
1863 ("use-cpus-instead-of-cores|usecpusinsteadofcores".
1864 "[Determine how GNU Parallel counts the number of CPUs]"
1865 => \$opt::use_cpus_instead_of_cores),
1866 ("shell-quote|shellquote|shell_quote".
1867 "[Does not run the command but quotes it. Useful for making ".
1868 "quoted composed commands for GNU parallel]"
1869 => \@opt::shellquote),
1870 ('nice
=i
[Run the command at this niceness
]:niceness
:($(seq
-20 19))'
1872 "tag[Tag lines with arguments]" => \$opt::tag,
1873 ("tag-string|tagstring=s".
1874 "[Tag lines with a string]:str" => \$opt::tagstring),
1875 "ctag[Color tag]:str" => \$opt::ctag,
1876 "ctag-string|ctagstring=s[Colour tagstring]:str" => \$opt::ctagstring,
1877 "color|colour[Colourize output]" => \$opt::color,
1878 ("color-failed|colour-failed|colorfailed|colourfailed|".
1879 "color-fail|colour-fail|colorfail|colourfail|cf".
1880 "[Colour failed jobs red]"
1881 => \$opt::colorfailed),
1882 ("onall[Run all the jobs on all computers given with --sshlogin]"
1884 "nonall[--onall with no arguments]" => \$opt::nonall,
1885 ("filter-hosts|filterhosts|filter-host[Remove down hosts]"
1886 => \$opt::filter_hosts),
1888 '[Distribute jobs to remote computers
]'.
1889 ':[@hostgroups/][ncpus/]sshlogin
'.
1890 '[,[@hostgroups/][ncpus/]sshlogin
[,...]] or @hostgroup'.
1891 ':_users
') => \@opt::sshlogin,
1892 ("sshloginfile|slf=s".
1893 "[File with sshlogins on separate lines. Lines starting with '#' ".
1894 "are ignored.]:filename:_files"
1895 => \
@opt::sshloginfile
),
1897 "[Use ssh's ControlMaster to make ssh connections faster]"
1898 => \
$opt::controlmaster
),
1900 "[Use this command instead of ssh for remote access]:sshcommand"
1902 ("transfer-file|transferfile|transfer-files|transferfiles|tf=s".
1903 "[Transfer filename to remote computers]:filename:_files"
1904 => \
@opt::transfer_files
),
1905 ("return=s[Transfer files from remote computers]:filename:_files"
1907 ("trc=s[--transfer --return filename --cleanup]:filename:_files"
1909 "transfer[Transfer files to remote computers]" => \
$opt::transfer
,
1910 "cleanup[Remove transferred files]" => \
$opt::cleanup
,
1912 "[Transfer file to each sshlogin before first job is started]".
1914 => \
@opt::basefile
),
1916 "[Replace replacement strings in file and save it in repl]".
1918 => \
%opt::template
),
1919 "B=s" => \
$opt::retired
,
1920 "ctrl-c|ctrlc" => \
$opt::retired
,
1921 "no-ctrl-c|no-ctrlc|noctrlc" => \
$opt::retired
,
1922 ("work-dir|workdir|wd=s".
1923 "[Jobs will be run in the dir mydir. (default: the current dir ".
1924 "for the local machine, the login dir for remote computers)]".
1927 "W=s" => \
$opt::retired
,
1928 ("rsync-opts|rsyncopts=s[Options to pass on to rsync]:options"
1929 => \
$opt::rsync_opts
),
1930 ("tmpdir|tempdir=s[Directory for temporary files]:dirname:_cd"
1932 ("use-compress-program|compress-program|".
1933 "usecompressprogram|compressprogram=s".
1934 "[Use prg for compressing temporary files]:prg:_commands"
1935 => \
$opt::compress_program
),
1936 ("use-decompress-program|decompress-program|".
1937 "usedecompressprogram|decompressprogram=s".
1938 "[Use prg for decompressing temporary files]:prg:_commands"
1939 => \
$opt::decompress_program
),
1940 "compress[Compress temporary files]" => \
$opt::compress
,
1941 "open-tty|o[Open terminal tty]" => \
$opt::open_tty
,
1942 "tty[Open terminal tty]" => \
$opt::tty
,
1943 "T" => \
$opt::retired
,
1944 "H=i" => \
$opt::retired
,
1945 ("dry-run|dryrun|dr".
1946 "[Print the job to run on stdout (standard output), but do not ".
1949 "progress[Show progress of computations]" => \
$opt::progress
,
1950 ("eta[Show the estimated number of seconds before finishing]"
1952 "bar[Show progress as a progress bar]" => \
$opt::bar
,
1953 ("total-jobs|totaljobs|total=s".
1954 "[Set total number of jobs]" => \
$opt::totaljobs
),
1955 "shuf[Shuffle jobs]" => \
$opt::shuf
,
1956 ("arg-sep|argsep=s".
1957 "[Use sep-str instead of ::: as separator string]:sep-str"
1959 ("arg-file-sep|argfilesep=s".
1960 "[Use sep-str instead of :::: as separator string ".
1961 "between command and argument files]:sep-str"
1962 => \
$opt::arg_file_sep
),
1963 ('trim=s[Trim white space in input]:trim_method:'.
1964 '((n\:"No trim" l\:"Left\ trim" r\:"Right trim" '.
1965 'lr\:"Both trim" rl\:"Both trim"))'
1967 "env=s[Copy environment variable var]:var:_vars" => \
@opt::env
,
1968 "recordenv|record-env[Record environment]" => \
$opt::record_env
,
1970 '[Record names in current environment in $PARALLEL_IGNORED_NAMES '.
1971 'and exit. Only used with env_parallel. '.
1972 'Aliases, functions, and variables with names i]'
1974 ('plain[Ignore --profile, $PARALLEL, and ~/.parallel/config]'
1977 "[Use profile profilename for options]:profilename:_files"
1979 "tollef" => \
$opt::tollef
,
1980 "gnu[Behave like GNU parallel]" => \
$opt::gnu
,
1981 "link|xapply[Link input sources]" => \
$opt::link,
1982 "linkinputsource|xapplyinputsource=i" => \
@opt::linkinputsource
,
1983 # Before changing these lines, please read
1984 # https://www.gnu.org/software/parallel/parallel_design.html#citation-notice
1985 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
1986 # You accept to be put in a public hall-of-shame by removing
1989 "[Print the citation notice and BibTeX entry for GNU parallel, ".
1990 "silence citation notice for all future runs, and exit. ".
1991 "It will not run any commands]"
1992 => \
$opt::citation
),
1993 "will-cite|willcite|nn|nonotice|no-notice" => \
$opt::willcite
,
1994 # Termination and retries
1995 ('halt-on-error|haltonerror|halt=s'.
1996 '[When should GNU parallel terminate]'.
1997 ':when:((now\:"kill all running jobs and halt immediately" '.
1998 'soon\:"wait for all running jobs to complete, start no new jobs"))'
2000 'limit=s[Dynamic job limit]:"command args"' => \
$opt::limit
,
2002 "[Minimum memory free when starting another job]:size"
2005 "[Suspend jobs when there is less memory available]:size"
2006 => \
$opt::memsuspend
),
2007 "retries=s[Try failing jobs n times]:n" => \
$opt::retries
,
2009 "[Time out for command. If the command runs for longer than ".
2010 "duration seconds it will get killed as per --term-seq]:duration"
2012 ("term-seq|termseq=s".
2013 "[Termination sequence]:sequence" => \
$opt::termseq
),
2014 # xargs-compatibility - implemented, man, testsuite
2015 ("max-procs|maxprocs|P|jobs|j=s".
2016 "[Add N to/Subtract N from/Multiply N% with/ the number of CPU ".
2017 "threads or read parameter from file]:+N/-N/N%/N/procfile:_files"
2019 ("delimiter|d=s[Input items are terminated by delim]:delim"
2021 ("max-chars|maxchars|s=s[Limit length of command]:max-chars"
2022 => \
$opt::max_chars
),
2023 ("arg-file|argfile|a=s".
2024 "[Use input-file as input source]:input-file:_files" => \
@opt::a
),
2025 "no-run-if-empty|norunifempty|r[Do not run empty input]" => \
$opt::r
,
2027 "[This option is deprecated; use -I instead]:replace-str"
2029 "E=s" => \
$opt::eof,
2030 ("eof|e:s[Set the end of file string to eof-str]:eof-str"
2032 ("process-slot-var|processslotvar=s".
2033 "[Set this variable to job slot number]:varname"
2034 => \
$opt::process_slot_var
),
2035 ("max-args|maxargs|n=s".
2036 "[Use at most max-args arguments per command line]:max-args"
2037 => \
$opt::max_args
),
2038 ("max-replace-args|maxreplaceargs|N=s".
2039 "[Use at most max-args arguments per command line]:max-args"
2040 => \
$opt::max_replace_args
),
2041 "col-sep|colsep|C=s[Column separator]:regexp" => \
$opt::colsep
,
2042 "csv[Treat input as CSV-format]"=> \
$opt::csv
,
2043 ("help|h[Print a summary of the options to GNU parallel and exit]"
2045 ("L=s[When used with --pipe: Read records of recsize]:recsize"
2047 ("max-lines|maxlines|l:f".
2048 "[When used with --pipe: Read records of recsize lines]:recsize"
2049 => \
$opt::max_lines
),
2050 "interactive|p[Ask user before running a job]" => \
$opt::interactive
,
2051 ("verbose|t[Print the job to be run on stderr (standard error)]"
2053 ("version|V[Print the version GNU parallel and exit]"
2055 ('min-version|minversion=i'.
2056 '[Print the version GNU parallel and exit]'.
2057 ':version:($(parallel --minversion 0))'
2058 => \
$opt::minversion
),
2059 ("show-limits|showlimits".
2060 "[Display limits given by the operating system]"
2061 => \
$opt::show_limits
),
2062 ("exit|x[Exit if the size (see the -s option) is exceeded]"
2065 "semaphore[Work as a counting semaphore]" => \
$opt::semaphore
,
2066 ("semaphore-timeout|semaphoretimeout|st=s".
2067 "[If secs > 0: If the semaphore is not released within secs ".
2068 "seconds, take it anyway]:secs"
2069 => \
$opt::semaphoretimeout
),
2070 ("semaphore-name|semaphorename|id=s".
2071 "[Use name as the name of the semaphore]:name"
2072 => \
$opt::semaphorename
),
2073 "fg[Run command in foreground]" => \
$opt::fg
,
2074 "bg[Run command in background]" => \
$opt::bg
,
2075 "wait[Wait for all commands to complete]" => \
$opt::wait,
2076 # Shebang #!/usr/bin/parallel --shebang
2077 ("shebang|hashbang".
2078 "[GNU parallel can be called as a shebang (#!) command as the ".
2079 "first line of a script. The content of the file will be treated ".
2082 ("_pipe-means-argfiles[internal]"
2083 => \
$opt::_pipe_means_argfiles
),
2084 "Y" => \
$opt::retired
,
2085 ("skip-first-line|skipfirstline".
2086 "[Do not use the first line of input]"
2087 => \
$opt::skip_first_line
),
2088 "_bug" => \
$opt::_bug
,
2089 "_unsafe" => \
$opt::_unsafe
,
2091 ("pipe|spreadstdin".
2092 "[Spread input to jobs on stdin (standard input)]" => \
$opt::pipe),
2093 ("round-robin|roundrobin|round".
2094 "[Distribute chunks of standard input in a round robin fashion]"
2095 => \
$opt::roundrobin
),
2096 "recstart=s" => \
$opt::recstart
,
2098 "[Split record between endstring and startstring]:endstring"
2101 "[Interpret --recstart and --recend as regular expressions]"
2103 ("remove-rec-sep|removerecsep|rrs".
2104 "[Remove record separator]" => \
$opt::remove_rec_sep
),
2105 ("output-as-files|outputasfiles|files[Save output to files]"
2107 ("output-as-files0|outputasfiles0|files0".
2108 "[Save output to files separated by NUL]"
2110 ("block-size|blocksize|block=s".
2111 "[Size of block in bytes to read at a time]:size"
2112 => \
$opt::blocksize
),
2113 ("block-timeout|blocktimeout|bt=s".
2114 "[Timeout for reading block when using --pipe]:duration"
2115 => \
$opt::blocktimeout
),
2116 "header=s[Use regexp as header]:regexp" => \
$opt::header
,
2117 "cat[Create a temporary file with content]" => \
$opt::cat
,
2118 "fifo[Create a temporary fifo with content]" => \
$opt::fifo
,
2119 ("pipe-part|pipepart[Pipe parts of a physical file]"
2120 => \
$opt::pipepart
),
2121 "tee[Pipe all data to all jobs]" => \
$opt::tee
,
2123 "[Use shardexpr as shard key and shard input to the jobs]:shardexpr"
2126 "[Use binexpr as binning key and bin input to the jobs]:binexpr"
2128 "group-by|groupby=s[Group input by value]:val" => \
$opt::groupby
,
2130 ("hgrp|hostgrp|hostgroup|hostgroups[Enable hostgroups on arguments]"
2131 => \
$opt::hostgroups
),
2132 "embed[Embed GNU parallel in a shell script]" => \
$opt::embed
,
2133 ("filter=s[Only run jobs where filter is true]:filter"
2135 "combineexec|combine-exec|combineexecutable|combine-executable=s".
2136 "[Embed GNU parallel in a shell script]" => \
$opt::combineexec
,
2137 ("filter=s[Only run jobs where filter is true]:filter"
2139 "_parset=s[Generate shell code for parset]" => \
$opt::_parset
,
2140 ("shell-completion|shellcompletion=s".
2141 "[Generate shell code for shell completion]:shell:(bash zsh)"
2142 => \
$opt::shellcompletion
),
2143 # Parameter for testing optimal values
2144 "_test=s" => \
$opt::_test
,
2148 sub get_options_from_array
($@
) {
2149 # Run GetOptions on @array
2151 # $array_ref = ref to @ARGV to parse
2152 # @keep_only = Keep only these options (e.g. --profile)
2156 # true if parsing worked
2157 # false if parsing failed
2158 # @$array_ref is changed
2159 my ($array_ref, @keep_only) = @_;
2160 if(not @
$array_ref) {
2161 # Empty array: No need to look more at that
2164 # A bit of shuffling of @ARGV needed as GetOptionsFromArray is not
2165 # supported everywhere
2167 my $this_is_ARGV = (\@
::ARGV
== $array_ref);
2168 if(not $this_is_ARGV) {
2169 @save_argv = @
::ARGV
;
2170 @
::ARGV
= @
{$array_ref};
2172 # If @keep_only set: Ignore all values except @keep_only
2173 my %options = options_hash
();
2176 @keep{@keep_only} = @keep_only;
2177 for my $k (grep { not $keep{$_} } keys %options) {
2178 # Store the value of the option in @dummy
2179 $options{$k} = \
@dummy;
2182 my $retval = GetOptions
(%options);
2183 if(not $this_is_ARGV) {
2184 @
{$array_ref} = @
::ARGV
;
2185 @
::ARGV
= @save_argv;
2190 sub parse_parset
() {
2191 $Global::progname
= "parset";
2192 @Global::parset_vars
= split /[ ,]/, $opt::_parset
;
2193 my $var_or_assoc = shift @Global::parset_vars
;
2194 # Legal names: var _v2ar arrayentry[2]
2195 my @illegal = (grep { not /^[a-zA-Z_][a-zA-Z_0-9]*(\[\d+\])?$/ }
2196 @Global::parset_vars
);
2199 ("@illegal is an invalid variable name.",
2200 "Variable names must be letter followed by letters or digits.",
2202 " parset varname GNU Parallel options and command");
2205 if($var_or_assoc eq "assoc") {
2206 my $var = shift @Global::parset_vars
;
2208 $Global::parset
= "assoc";
2209 $Global::parset_endstring
=")\n";
2210 } elsif($var_or_assoc eq "var") {
2211 if($#Global::parset_vars
> 0) {
2212 $Global::parset
= "var";
2214 my $var = shift @Global::parset_vars
;
2216 $Global::parset
= "array";
2217 $Global::parset_endstring
=")\n";
2220 ::die_bug
("parset: unknown '$opt::_parset'");
2224 sub parse_options
(@
) {
2227 my @argv_before = @ARGV;
2228 @ARGV = read_options
();
2229 # Before changing these line, please read
2230 # https://www.gnu.org/software/parallel/parallel_design.html#citation-notice
2231 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
2232 # You accept to be added to a public hall-of-shame by removing the lines
2233 if(defined $opt::citation
) {
2234 citation
(\
@argv_before,\
@ARGV);
2238 if($opt::nokeeporder
) { $opt::keeporder
= undef; }
2240 if(@opt::v
) { $Global::verbose
= $#opt::v
+1; } # Convert -v -v to v=2
2241 if($opt::_bug
) { ::die_bug
("test-bug"); }
2242 $Global::debug
= $opt::D
;
2246 $Global::shell
= $ENV{'PARALLEL_SHELL'} || parent_shell
($$)
2247 || $ENV{'SHELL'} || "/bin/sh";
2248 if(not -x
$Global::shell
and not which
($Global::shell
)) {
2249 ::error
("Shell '$Global::shell' not found.");
2252 ::debug
("init","Global::shell $Global::shell\n");
2253 $Global::cshell
= $Global::shell
=~ m
:(/[-a
-z
]*)?csh
:;
2254 $Global::fish
= $Global::shell
=~ m
:(/[-a
-z
]*)?fish
:;
2255 if(defined $opt::_parset
) { parse_parset
(); }
2256 if(defined $opt::X
) { $Global::ContextReplace
= 1; }
2257 if(defined $opt::silent
) { $Global::verbose
= 0; }
2258 if(defined $opt::null
) { $/ = "\0"; }
2259 if(defined $opt::files
) { $Global::files
= 1; $Global::files_sep
= "\n"; }
2260 if(defined $opt::files0
) { $Global::files
= 1; $Global::files_sep
= "\0"; }
2261 if(defined $opt::d
) { $/ = unquote_printf
($opt::d
) }
2262 parse_replacement_string_options
();
2263 $opt::tag
||= $opt::ctag
;
2264 $opt::tagstring
||= $opt::ctagstring
;
2265 if(defined $opt::ctag
or defined $opt::ctagstring
2266 or defined $opt::color
) {
2269 if($opt::linebuffer
or $opt::latestline
) {
2270 $Global::linebuffer
= 1;
2271 Job
::latestline_init
();
2273 if(defined $opt::tag
and not defined $opt::tagstring
) {
2275 $opt::tagstring
= $Global::parensleft
.$Global::parensright
;
2277 if(defined $opt::tagstring
) {
2278 $opt::tagstring
= unquote_printf
($opt::tagstring
);
2279 if($opt::tagstring
=~
2280 /\Q$Global::parensleft\E.*\S+.*\Q$Global::parensright\E/
2282 $Global::linebuffer
) {
2283 # --tagstring contains {= ... =} and --linebuffer =>
2284 # recompute replacement string for each use (do not cache)
2285 $Global::cache_replacement_eval
= 0;
2288 if(defined $opt::interactive
) { $Global::interactive
= $opt::interactive
; }
2289 if(defined $opt::quote
) { $Global::quoting
= 1; }
2290 if(defined $opt::r
) { $Global::ignore_empty
= 1; }
2291 if(defined $opt::verbose
) { $Global::stderr_verbose
= 1; }
2292 if(defined $opt::eof) { $Global::end_of_file_string
= $opt::eof; }
2293 if(defined $opt::max_args
) {
2294 $opt::max_args
= multiply_binary_prefix
($opt::max_args
);
2295 $Global::max_number_of_args
= $opt::max_args
;
2296 if($opt::pipepart
and $opt::groupby
) { $Global::max_number_of_args
= 1; }
2298 if(defined $opt::blocktimeout
) {
2299 $Global::blocktimeout
= int(multiply_time_units
($opt::blocktimeout
));
2300 if($Global::blocktimeout
< 1) {
2301 ::error
("--block-timeout must be at least 1");
2305 if(defined $opt::timeout
) {
2306 $Global::timeoutq
= TimeoutQueue
->new($opt::timeout
);
2308 if(defined $opt::tmpdir
) { $ENV{'TMPDIR'} = $opt::tmpdir
; }
2309 $ENV{'PARALLEL_RSYNC_OPTS'} = $opt::rsync_opts
||
2310 $ENV{'PARALLEL_RSYNC_OPTS'} || '-rlDzR';
2311 # Default: Same nice level as GNU Parallel is started at
2312 $opt::nice
||= eval { getpriority(0,0) } || 0;
2313 if(defined $opt::help
) { usage
(); exit(0); }
2314 if(defined $opt::shellcompletion
) { shell_completion
(); exit(0); }
2315 if(defined $opt::embed
) { embed
(); exit(0); }
2316 if(defined $opt::sqlandworker
) {
2317 $opt::sqlmaster
= $opt::sqlworker
= $opt::sqlandworker
;
2319 if(defined $opt::tmuxpane
) { $opt::tmux
= $opt::tmuxpane
; }
2320 if(defined $opt::colsep
) { $Global::trim
= 'lr'; }
2321 if(defined $opt::csv
) {
2322 if(not $Global::use{"Text::CSV"} ||= eval "use Text::CSV; 1;") {
2323 ::error
("The perl module Text::CSV is not installed.");
2324 ::error
("Try installing libtext-csv-perl or perl-Text-CSV.");
2327 $opt::colsep
= defined $opt::colsep ?
$opt::colsep
: ",";
2328 my $csv_setting = { binary
=> 1, sep_char
=> $opt::colsep
};
2329 my $sep = $csv_setting->{sep_char
};
2330 $Global::csv
= Text
::CSV
->new($csv_setting)
2331 or die "Cannot use CSV: ".Text
::CSV
->error_diag ();
2333 if(defined $opt::header
) {
2334 $opt::colsep
= defined $opt::colsep ?
$opt::colsep
: "\t";
2336 if(defined $opt::trim
) { $Global::trim
= $opt::trim
; }
2337 if(defined $opt::arg_sep
) { $Global::arg_sep
= $opt::arg_sep
; }
2338 if(defined $opt::arg_file_sep
) {
2339 $Global::arg_file_sep
= $opt::arg_file_sep
;
2341 if(not defined $opt::process_slot_var
) {
2342 $opt::process_slot_var
= 'PARALLEL_JOBSLOT0';
2344 if(defined $opt::number_of_sockets
) {
2345 print SSHLogin
::no_of_sockets
(),"\n"; wait_and_exit
(0);
2347 if(defined $opt::number_of_cpus
) {
2348 print SSHLogin
::no_of_cores
(),"\n"; wait_and_exit
(0);
2350 if(defined $opt::number_of_cores
) {
2351 print SSHLogin
::no_of_cores
(),"\n"; wait_and_exit
(0);
2353 if(defined $opt::number_of_threads
) {
2354 print SSHLogin
::no_of_threads
(),"\n"; wait_and_exit
(0);
2356 if(defined $opt::max_line_length_allowed
) {
2357 print Limits
::Command
::real_max_length
(),"\n"; wait_and_exit
(0);
2359 if(defined $opt::max_chars
) {
2360 $opt::max_chars
= multiply_binary_prefix
($opt::max_chars
);
2362 if(defined $opt::version
) { version
(); wait_and_exit
(0); }
2363 if(defined $opt::record_env
) { record_env
(); wait_and_exit
(0); }
2364 if(@opt::sshlogin
) { @Global::sshlogin
= @opt::sshlogin
; }
2365 if(@opt::sshloginfile
) { read_sshloginfiles
(@opt::sshloginfile
); }
2366 if(@opt::return) { push @Global::ret_files
, @opt::return; }
2367 if($opt::transfer
) {
2368 push @Global::transfer_files
, $opt::i
|| $opt::I
|| "{}";
2370 push @Global::transfer_files
, @opt::transfer_files
;
2371 if(%opt::template
) {
2372 while (my ($source, $template_name) = each %opt::template
) {
2373 push @Global::template_names
, $template_name;
2374 push @Global::template_contents
, slurp_or_exit
($source);
2377 if(not defined $opt::recstart
and
2378 not defined $opt::recend
) { $opt::recend
= "\n"; }
2379 $Global::blocksize
= multiply_binary_prefix
($opt::blocksize
|| "1M");
2380 if($Global::blocksize
> 2**31-1 and not $opt::pipepart
) {
2381 warning
("--blocksize >= 2G causes problems. Using 2G-1.");
2382 $Global::blocksize
= 2**31-1;
2384 if($^O
eq "cygwin" and
2385 ($opt::pipe or $opt::pipepart
or $opt::roundrobin
)
2386 and $Global::blocksize
> 65535) {
2387 warning
("--blocksize >= 64K causes problems on Cygwin.");
2389 $opt::memfree
= multiply_binary_prefix
($opt::memfree
);
2390 $opt::memsuspend
= multiply_binary_prefix
($opt::memsuspend
);
2391 $Global::memlimit
= $opt::memsuspend
+ $opt::memfree
;
2392 check_invalid_option_combinations
();
2393 if((defined $opt::fifo
or defined $opt::cat
) and not $opt::pipepart
) {
2396 if(defined $opt::minversion
) {
2397 print $Global::version
,"\n";
2398 if($Global::version
< $opt::minversion
) {
2404 if(not defined $opt::delay
) {
2405 # Set --delay to --sshdelay if not set
2406 $opt::delay
= $opt::sshdelay
;
2408 $Global::sshdelayauto
= $opt::sshdelay
=~ s/auto$//;
2409 $opt::sshdelay
= multiply_time_units
($opt::sshdelay
);
2410 $Global::delayauto
= $opt::delay
=~ s/auto$//;
2411 $opt::delay
= multiply_time_units
($opt::delay
);
2412 if($opt::compress_program
) {
2414 $opt::decompress_program
||= $opt::compress_program
." -dc";
2417 if(defined $opt::results
) {
2418 # Is the output a dir or CSV-file?
2419 if($opt::results
=~ /\.csv$/i) {
2420 # CSV with , as separator
2421 $Global::csvsep
= ",";
2422 $Global::membuffer
||= 1;
2423 } elsif($opt::results
=~ /\.tsv$/i) {
2424 # CSV with TAB as separator
2425 $Global::csvsep
= "\t";
2426 $Global::membuffer
||= 1;
2427 } elsif($opt::results
=~ /\.json$/i) {
2429 $Global::jsonout
||= 1;
2430 $Global::membuffer
||= 1;
2433 if($opt::compress
) {
2434 my ($compress, $decompress) = find_compression_program
();
2435 $opt::compress_program
||= $compress;
2436 $opt::decompress_program
||= $decompress;
2437 if(($opt::results
and not $Global::csvsep
) or $Global::files
) {
2438 # No need for decompressing
2439 $opt::decompress_program
= "cat >/dev/null";
2442 if(defined $opt::dryrun
) {
2443 # Force grouping due to bug #51039: --dry-run --timeout 3600 -u breaks
2447 if(defined $opt::nonall
) {
2448 # Append a dummy empty argument if there are no arguments
2449 # on the command line to avoid reading from STDIN.
2450 # arg_sep = random 50 char
2451 # \0noarg => nothing (not the empty string)
2452 $Global::arg_sep
= join "",
2453 map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..50);
2454 push @ARGV, $Global::arg_sep
, "\0noarg";
2456 if(defined $opt::tee
) {
2457 if(not defined $opt::jobs
) {
2461 if(defined $opt::tty
) {
2462 # Defaults for --tty: -j1 -u
2463 # Can be overridden with -jXXX -g
2464 if(not defined $opt::jobs
) {
2467 if(not defined $opt::group
) {
2472 push @Global::ret_files
, @opt::trc
;
2473 if(not @Global::transfer_files
) {
2474 # Defaults to --transferfile {}
2475 push @Global::transfer_files
, $opt::i
|| $opt::I
|| "{}";
2479 if(defined $opt::max_lines
) {
2480 if($opt::max_lines
eq "-0") {
2481 # -l -0 (swallowed -0)
2482 $opt::max_lines
= 1;
2486 $opt::max_lines
= multiply_binary_prefix
($opt::max_lines
);
2487 if ($opt::max_lines
== 0) {
2488 # If not given (or if 0 is given) => 1
2489 $opt::max_lines
= 1;
2493 $Global::max_lines
= $opt::max_lines
;
2494 if(not $opt::pipe) {
2495 # --pipe -L means length of record - not max_number_of_args
2496 $Global::max_number_of_args
||= $Global::max_lines
;
2500 # Read more than one arg at a time (-L, -N)
2501 if(defined $opt::L
) {
2502 $opt::L
= multiply_binary_prefix
($opt::L
);
2503 $Global::max_lines
= $opt::L
;
2504 if(not $opt::pipe) {
2505 # --pipe -L means length of record - not max_number_of_args
2506 $Global::max_number_of_args
||= $Global::max_lines
;
2509 if(defined $opt::max_replace_args
) {
2510 $opt::max_replace_args
=
2511 multiply_binary_prefix
($opt::max_replace_args
);
2512 $Global::max_number_of_args
= $opt::max_replace_args
;
2513 $Global::ContextReplace
= 1;
2515 if((defined $opt::L
or defined $opt::max_replace_args
)
2517 not ($opt::xargs
or $opt::m
)) {
2518 $Global::ContextReplace
= 1;
2520 # Deal with ::: :::+ :::: ::::+ and -a +file
2521 my @ARGV_with_argsep = @ARGV;
2522 @ARGV = read_args_from_command_line
();
2523 if(defined $opt::combineexec
) {
2524 pack_combined_executable
(\
@argv_before,\
@ARGV_with_argsep,\
@ARGV);
2529 if(defined $opt::eta
) { $opt::progress
= $opt::eta
; }
2530 if(defined $opt::bar
) { $opt::progress
= $opt::bar
; }
2531 if(defined $opt::bar
or defined $opt::latestline
) {
2532 my $fh = $Global::status_fd
|| *STDERR
;
2533 # Activate decode_utf8
2535 # Enable utf8 if possible
2537 binmode $fh, "encoding(utf8)";
2538 *decode_utf8 = \&Encode::decode_utf8;
2540 if(eval { decode_utf8("x") }) {
2541 # Great: decode works
2543 # UTF8-decode not supported: Dummy decode
2544 eval q{sub decode_utf8($;$) { $_[0]; }};
2546 # Activate decode_utf8
2548 # Enable utf8 if possible
2550 use Encode qw( encode_utf8 );
2551 use Text
::CharWidth
qw( mbswidth );
2552 use Unicode
::Normalize
qw( NFC NFD );
2554 if(eval { mbswidth
("ヌー平行") }) {
2555 # Great: mbswidth works
2557 # mbswidth not supported: Dummy mbswidth
2558 eval q{ sub mbswidth { return length @_; } };
2562 # If you want GNU Parallel to be maintained in the future you
2564 # *YOU* will be harming free software by removing the notice.
2566 # Funding a free software project is hard. GNU Parallel is no
2567 # exception. On top of that it seems the less visible a project
2568 # is, the harder it is to get funding. And the nature of GNU
2569 # Parallel is that it will never be seen by "the guy with the
2570 # checkbook", but only by the people doing the actual work.
2572 # This problem has been covered by others - though no solution has
2574 # https://www.slideshare.net/NadiaEghbal/consider-the-maintainer
2575 # https://www.numfocus.org/blog/why-is-numpy-only-now-getting-funded/
2577 # The FAQ tells you why the citation notice exists:
2578 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
2580 # If you want GNU Parallel to be maintained in the future, and not
2581 # just wither away like so many other free software tools, you
2582 # need to help finance the development.
2584 # The citation notice is a simple way of doing so, as citations
2585 # makes it possible to me to get a job where I can maintain GNU
2586 # Parallel as part of the job.
2588 # This means you can help financing development
2590 # WITHOUT PAYING A SINGLE CENT!
2592 # Before implementing the citation notice it was discussed with
2594 # https://lists.gnu.org/archive/html/parallel/2013-11/msg00006.html
2596 # Having to spend 10 seconds on running 'parallel --citation' once
2597 # is no doubt not an ideal solution, but no one has so far come up
2598 # with an ideal solution - neither for funding GNU Parallel nor
2599 # other free software.
2601 # If you believe you have the perfect solution, you should try it
2602 # out, and if it works, you should post it on the email
2603 # list. Ideas that will cost work and which have not been tested
2604 # are, however, unlikely to be prioritized.
2606 # Please note that GPL version 3 gives you the right to fork GNU
2607 # Parallel under a new name, but it does not give you the right to
2608 # distribute modified copies with the citation notice disabled in
2609 # a way where the software can be confused with GNU Parallel. To
2610 # do that you need to be the owner of the GNU Parallel
2611 # trademark. The xt:Commerce case shows this.
2613 # Description of the xt:Commerce case in OLG Duesseldorf
2614 # https://web.archive.org/web/20180715073746/http://www.inta.org/INTABulletin/Pages/GERMANYGeneralPublicLicenseDoesNotPermitUseofThird-PartyTrademarksforAdvertisingModifiedVersionsofOpen-SourceSoftware.aspx
2616 # The verdict in German
2617 # https://www.admody.com/urteilsdatenbank/cafe6fdaeed3/OLG-Duesseldorf_Urteil_vom_28-September-2010_Az_I-20-U-41-09
2618 # https://web.archive.org/web/20180715073717/https://www.admody.com/urteilsdatenbank/cafe6fdaeed3/OLG-Duesseldorf_Urteil_vom_28-September-2010_Az_I-20-U-41-09
2620 # Other free software limiting derivates by the same name:
2621 # https://en.wikipedia.org/wiki/Red_Hat_Enterprise_Linux_derivatives#Legal_aspects
2622 # https://tm.joomla.org/trademark-faq.html
2623 # https://www.mozilla.org/en-US/foundation/trademarks/faq/
2625 # Running 'parallel --citation' one single time takes less than 10
2626 # seconds, and will silence the citation notice for future
2627 # runs. If that is too much trouble for you, why not use one of
2628 # the alternatives instead?
2629 # See a list in: 'man parallel_alternatives'
2631 # If you want GNU Parallel to be maintained in the future, you
2632 # should keep this line:
2634 # This is because _YOU_ actively make it harder to justify
2635 # spending time developing GNU Parallel by removing it.
2637 # If you disagree, please read (especially 77-):
2638 # https://www.fordfoundation.org/media/2976/roads-and-bridges-the-unseen-labor-behind-our-digital-infrastructure.pdf
2640 # *YOU* will be harming free software by removing the notice. You
2641 # accept to be added to a public hall of shame by removing the
2642 # line. That includes you, George and Andreas.
2646 if($ENV{'PARALLEL_ENV'}) {
2647 # Read environment and set $Global::parallel_env
2648 # Must be done before is_acceptable_command_line_length()
2649 my $penv = $ENV{'PARALLEL_ENV'};
2650 # unset $PARALLEL_ENV: It should not be given to children
2651 # because it takes up a lot of env space
2652 delete $ENV{'PARALLEL_ENV'};
2654 # This is a file/fifo: Replace envvar with content of file
2655 $penv = slurp_or_exit($penv);
2657 # Map \001 to \n to make it easer to quote \n in $PARALLEL_ENV
2658 $penv =~ s/\001/\n/g;
2660 ::warning('\0 (NUL) in environment is not supported');
2662 $Global::parallel_env = $penv;
2666 if(defined $opt::show_limits) { show_limits(); }
2668 if(remote_hosts() and
2669 (defined $opt::X or defined $opt::m or defined $opt::xargs)) {
2670 # As we do not know the max line length on the remote machine
2671 # long commands generated by xargs may fail
2672 # If $opt::max_replace_args is set, it is probably safe
2673 ::warning("Using -X or -m with --sshlogin may fail.");
2676 if(not defined $opt::jobs) { $opt::jobs = "100%"; }
2679 if(defined $opt::sqlmaster or defined $opt::sqlworker) {
2680 $Global::sql = SQL->new($opt::sqlmaster || $opt::sqlworker);
2682 if(defined $opt::sqlworker) { $Global::membuffer ||= 1; }
2683 # The sqlmaster groups the arguments, so the should just read one
2684 if(defined $opt::sqlworker and not defined $opt::sqlmaster) {
2685 $Global::max_number_of_args = 1;
2687 if(defined $Global::color or defined $opt::colorfailed) {
2692 sub check_invalid_option_combinations() {
2693 if(defined $opt::timeout and
2694 $opt::timeout !~ /^\d+(\.\d+)?%?$|^(\d+(\.\d+)?[dhms])+$/i) {
2695 ::error("--timeout must be seconds or percentage.");
2698 if(defined $opt::fifo and defined $opt::cat) {
2699 ::error("--fifo cannot be combined with --cat.");
2700 ::wait_and_exit(255);
2702 if(defined $opt::retries and defined $opt::roundrobin) {
2703 ::error("--retries cannot be combined with --roundrobin.");
2704 ::wait_and_exit(255);
2706 if(defined $opt::pipepart and
2707 (defined $opt::L or defined $opt::max_lines
2708 or defined $opt::max_replace_args)) {
2709 ::error("--pipepart is incompatible with --max-replace-args, ".
2710 "--max-lines, and -L.");
2713 if(defined $opt::group and defined $opt::ungroup) {
2714 ::error("--group cannot be combined with --ungroup.");
2715 ::wait_and_exit(255);
2717 if(defined $opt::group and defined $opt::linebuffer) {
2718 ::error("--group cannot be combined with --line-buffer.");
2719 ::wait_and_exit(255);
2721 if(defined $opt::ungroup and defined $opt::linebuffer) {
2722 ::error("--ungroup cannot be combined with --line-buffer.");
2723 ::wait_and_exit(255);
2725 if(defined $opt::tollef and not defined $opt::gnu) {
2726 ::error("--tollef has been retired.",
2727 "Remove --tollef or use --gnu to override --tollef.");
2728 ::wait_and_exit(255);
2730 if(defined $opt::retired) {
2731 ::error("-g has been retired. Use --group.",
2732 "-B has been retired. Use --bf.",
2733 "-T has been retired. Use --tty.",
2734 "-U has been retired. Use --er.",
2735 "-W has been retired. Use --wd.",
2736 "-Y has been retired. Use --shebang.",
2737 "-H has been retired. Use --halt.",
2738 "--sql has been retired. Use --sqlmaster.",
2739 "--ctrlc has been retired.",
2740 "--noctrlc has been retired.");
2741 ::wait_and_exit(255);
2743 if(defined $opt::groupby) {
2744 if(not defined $opt::pipe and not defined $opt::pipepart) {
2747 if(defined $opt::remove_rec_sep) {
2748 ::error("--remove-rec-sep is not compatible with --groupby");
2749 ::wait_and_exit(255);
2751 if(defined $opt::recstart) {
2752 ::error("--recstart is not compatible with --groupby");
2753 ::wait_and_exit(255);
2755 if($opt::recend ne "\n") {
2756 ::error("--recend is not compatible with --groupby");
2757 ::wait_and_exit(255);
2761 # use --_unsafe to only generate a warning
2762 if($opt::_unsafe) { ::warning(@_); } else { ::error(@_); exit(255); }
2764 if(defined $opt::results) {
2765 if($opt::nonall or $opt::onall) {
2766 unsafe_warn("--(n)onall + --results not supported (yet).");
2769 sub test_safe_chars {
2771 if($ENV{$var} =~ m{^[-a-z0-9_+,.%:/= ]*$}i) {
2774 unsafe_warn("\$$var can only contain [-a-z0-9_+,.%:/= ].");
2777 if($ENV{'TMPDIR'} =~ /\n/) {
2778 if(defined $opt::files) {
2779 ::warning("Use --files0 when \$TMPDIR contains newline.");
2780 } elsif($Global::cshell
2782 (defined $opt::cat or defined $opt::fifo)) {
2783 ::warning("--cat/--fifo fails under csh ".
2784 "if \$TMPDIR contains newline.");
2786 } elsif($ENV{'TMPDIR'} =~ /\257/) {
2787 unsafe_warn("\$TMPDIR with \\257 (\257) is not supported.");
2789 test_safe_chars('TMPDIR');
2791 map { test_safe_chars($_); } qw(PARALLEL_HOME XDG_CONFIG_DIRS
2792 PARALLEL_REMOTE_TMPDIR XDG_CACHE_HOME);
2795 sub init_globals
() {
2797 $Global::version
= 20240522;
2798 $Global::progname
= 'parallel';
2799 $::name
= "GNU Parallel";
2800 $Global::infinity
= 2**31;
2802 $Global::verbose
= 0;
2803 # Don't quote every part of the command line
2804 $Global::quoting
= 0;
2805 # Quote replacement strings
2806 $Global::quote_replace
= 1;
2807 $Global::total_completed
= 0;
2808 $Global::cache_replacement_eval
= 1;
2809 # Read only table with default --rpl values
2813 '{#}' => '1 $_=$job->seq()',
2814 '{%}' => '1 $_=$job->slot()',
2817 ('$Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; '.
2818 '$_ = dirname($_);'),
2819 '{/.}' => 's:.*/::; s:\.[^/.]*$::;',
2820 '{.}' => 's:\.[^/.]*$::',
2825 # = {.}.{+.} = {+/}/{/.}.{+.}
2826 # = {..}.{+..} = {+/}/{/..}.{+..}
2827 # = {...}.{+...} = {+/}/{/...}.{+...}
2828 '{+/}' => 's:/[^/]*$:: || s:.*$::',
2830 '{+.}' => 's:.*\.:: || s:.*$::',
2831 # a.b.c => b.c; a.b => ''; a => ''
2832 '{+..}' => 's:.*\.([^/.]*\.[^/.]*)$:$1: || s:.*$::',
2833 '{+...}' => 's:.*\.([^/.]*\.[^/.]*\.[^/.]*)$:$1: || s:.*$::',
2834 '{..}' => 's:\.[^/.]*\.[^/.]*$::',
2835 '{...}' => 's:\.[^/.]*\.[^/.]*\.[^/.]*$::',
2836 '{/..}' => 's:.*/::; s:\.[^/.]*\.[^/.]*$::',
2837 '{/...}' => 's:.*/::; s:\.[^/.]*\.[^/.]*\.[^/.]*$::',
2838 # n choose k = Binomial coefficient
2839 '{choose_k}' => ('for $t (2..$#arg)'.
2840 '{ if($arg[$t-1] ge $arg[$t]) { skip() } }'),
2841 # unique values: Skip job if any args are the same
2842 '{uniq}' => 'if(::uniq(@arg) != @arg) { skip(); }',
2843 # {##} = number of jobs
2844 '{##}' => '1 $_=total_jobs()',
2845 # {0#} = 0-padded seq
2846 '{0#}' => ('1 $f=1+int((log(total_jobs())/log(10)));'.
2847 '$_=sprintf("%0${f}d",seq())'),
2848 # {0%} = 0-padded jobslot
2849 '{0%}' => ('1 $f=1+int((log($Global::max_jobs_running||1)/log(10)));'.
2850 '$_=sprintf("%0${f}d",slot())'),
2851 # {seq-1} = seq-1 = counting from 0
2852 '{seq(.*?)}' => '$_=eval q{$job->seq()}.qq{$$1}',
2853 # {seq-1} = jobslot-1 = counting from 0
2854 '{slot(.*?)}' => '$_=eval q{$job->slot()}.qq{$$1}',
2856 ## Bash inspired replacement strings
2858 '{:-([^}]+?)}' => '$_ ||= $$1',
2860 '{:(\d+?)}' => 'substr($_,0,$$1) = ""',
2862 '{:(\d+?):(\d+?)}' => '$_ = substr($_,$$1,$$2);',
2863 # echo {#z.*z.} ::: z.z.z.foo => z.foo
2864 # echo {##z.*z.} ::: z.z.z.foo => foo
2866 '{#([^#}][^}]*?)}' =>
2867 '$nongreedy=::make_regexp_ungreedy($$1);s/^$nongreedy(.*)/$1/;',
2869 '{##([^#}][^}]*?)}' => 's/^$$1//;',
2870 # echo {%.z.*z} ::: foo.z.z.z => foo.z
2871 # echo {%%.z.*z} ::: foo.z.z.z => foo
2874 '$nongreedy=::make_regexp_ungreedy($$1);s/(.*)$nongreedy$/$1/;',
2876 '{%%([^}]+?)}' => 's/$$1$//;',
2877 # Bash ${a/def/ghi} ${a/def/}
2878 '{/([^#%}/]+?)/([^}]*?)}' => 's/$$1/$$2/;',
2879 # Bash ${a/#def/ghi} ${a/#def/}
2880 '{/#([^}]+?)/([^}]*?)}' => 's/^$$1/$$2/g;',
2881 # Bash ${a/%def/ghi} ${a/%def/}
2882 '{/%([^}]+?)/([^}]*?)}' => 's/$$1$/$$2/g;',
2883 # Bash ${a//def/ghi} ${a//def/}
2884 '{//([^}]+?)/([^}]*?)}' => 's/$$1/$$2/g;',
2886 '{^([^}]+?)}' => 's/^($$1)/uc($1)/e;',
2888 '{^^([^}]+?)}' => 's/($$1)/uc($1)/eg;',
2890 '{,([^}]+?)}' => 's/^($$1)/lc($1)/e;',
2892 '{,,([^}]+?)}' => 's/($$1)/lc($1)/eg;',
2894 # {slot} = $PARALLEL_JOBSLOT
2895 '{slot}' => '1 $_="\${PARALLEL_JOBSLOT}";uq()',
2897 '{host}' => '1 $_="\${PARALLEL_SSHHOST}";uq()',
2898 # {sshlogin} = sshlogin
2899 '{sshlogin}' => '1 $_="\${PARALLEL_SSHLOGIN}";uq()',
2900 # {hgrp} = hostgroups of the host
2901 '{hgrp}' => '1 $_="\${PARALLEL_HOSTGROUPS}";uq()',
2902 # {agrp} = hostgroups of the argument
2903 '{agrp}' => '1 $_="\${PARALLEL_ARGHOSTGROUPS}";uq()',
2905 # Modifiable copy of %Global::replace
2906 %Global::rpl
= %Global::replace
;
2908 $Global::ignore_empty
= 0;
2909 $Global::interactive
= 0;
2910 $Global::stderr_verbose
= 0;
2911 $Global::default_simultaneous_sshlogins
= 9;
2912 $Global::exitstatus
= 0;
2913 $Global::arg_sep
= ":::";
2914 $Global::arg_file_sep
= "::::";
2915 $Global::trim
= 'n';
2916 $Global::max_jobs_running
= 0;
2917 $Global::job_already_run
= '';
2918 $ENV{'TMPDIR'} ||= "/tmp";
2919 $ENV{'PARALLEL_REMOTE_TMPDIR'} ||= "/tmp";
2920 # bug #55398: set $OLDPWD when using --wd
2921 $ENV{'OLDPWD'} = $ENV{'PWD'};
2922 if(not $ENV{HOME
}) {
2923 # $ENV{HOME} is sometimes not set if called from PHP
2924 ::warning
("\$HOME not set. Using /tmp.");
2925 $ENV{HOME
} = "/tmp";
2927 # no warnings to allow for undefined $XDG_*
2928 no warnings
'uninitialized';
2929 # If $PARALLEL_HOME is set, but does not exist, try making it.
2930 if(defined $ENV{'PARALLEL_HOME'}) {
2931 eval { File
::Path
::mkpath
($ENV{'PARALLEL_HOME'}); };
2933 # $xdg_config_home is needed to make env_parallel.fish stop complaining
2934 my $xdg_config_home = $ENV{'XDG_CONFIG_HOME'};
2935 # Use the first config dir that exists from:
2937 # $XDG_CONFIG_HOME/parallel
2938 # $(each XDG_CONFIG_DIRS)/parallel
2941 # Keep only dirs that exist
2942 @Global::config_dirs
=
2944 $ENV{'PARALLEL_HOME'},
2945 (map { "$_/parallel" }
2947 split /:/, $ENV{'XDG_CONFIG_DIRS'}),
2948 $ENV{'HOME'} . "/.parallel");
2949 # Use first dir as config dir
2950 $Global::config_dir
= $Global::config_dirs
[0] ||
2951 $ENV{'HOME'} . "/.parallel";
2952 if($ENV{'PARALLEL_HOME'} =~ /./ and not -d
$ENV{'PARALLEL_HOME'}) {
2953 ::warning
("\$PARALLEL_HOME ($ENV{'PARALLEL_HOME'}) does not exist.");
2954 ::warning
("Using $Global::config_dir");
2956 # Use the first cache dir that exists from:
2958 # $XDG_CACHE_HOME/parallel
2959 # Keep only dirs that exist
2960 @Global::cache_dirs
= (grep { -d
$_ }
2961 $ENV{'PARALLEL_HOME'},
2962 $ENV{'XDG_CACHE_HOME'}."/parallel");
2963 $Global::cache_dir
= $Global::cache_dirs
[0] ||
2964 $ENV{'HOME'} . "/.parallel";
2969 # $opt::halt flavours
2972 # $Global::halt_when
2973 # $Global::halt_fail
2974 # $Global::halt_success
2976 # $Global::halt_count
2977 if(defined $opt::halt
) {
2978 my %halt_expansion = (
2980 "1" => "soon,fail=1",
2981 "2" => "now,fail=1",
2982 "-1" => "soon,success=1",
2983 "-2" => "now,success=1",
2985 # Expand -2,-1,0,1,2 into long form
2986 $opt::halt
= $halt_expansion{$opt::halt
} || $opt::halt
;
2987 # --halt 5% == --halt soon,fail=5%
2988 $opt::halt
=~ s/^(\d+)%$/soon,fail=$1%/;
2989 # Split: soon,fail=5%
2990 my ($when,$fail_success,$pct_count) = split /[,=]/, $opt::halt
;
2991 if(not grep { $when eq $_ } qw(never soon now)) {
2992 ::error
("--halt must have 'never', 'soon', or 'now'.");
2993 ::wait_and_exit
(255);
2995 $Global::halt_when
= $when;
2996 if($when ne "never") {
2997 if($fail_success eq "fail") {
2998 $Global::halt_fail
= 1;
2999 } elsif($fail_success eq "success") {
3000 $Global::halt_success
= 1;
3001 } elsif($fail_success eq "done") {
3002 $Global::halt_done
= 1;
3004 ::error
("--halt $when must be followed by ,success or ,fail.");
3005 ::wait_and_exit
(255);
3007 if($pct_count =~ /^(\d+)%$/) {
3008 $Global::halt_pct
= $1/100;
3009 } elsif($pct_count =~ /^(\d+)$/) {
3010 $Global::halt_count
= $1;
3012 ::error
("--halt $when,$fail_success ".
3013 "must be followed by ,number or ,percent%.");
3014 ::wait_and_exit
(255);
3020 sub parse_replacement_string_options
() {
3024 # $Global::parensleft
3025 # $Global::parensright
3027 # $Global::parensleft
3028 # $Global::parensright
3034 # $opt::basenamereplace
3035 # $opt::dirnamereplace
3038 # $opt::basenameextensionreplace
3041 # Modify %Global::rpl
3042 # Replace $old with $new
3043 my ($old,$new) = @_;
3045 $Global::rpl
{$new} = $Global::rpl
{$old};
3046 delete $Global::rpl
{$old};
3049 my $parens = "{==}";
3050 if(defined $opt::parens
) { $parens = $opt::parens
; }
3051 my $parenslen = 0.5*length $parens;
3052 $Global::parensleft
= substr($parens,0,$parenslen);
3053 $Global::parensright
= substr($parens,$parenslen);
3054 if(defined $opt::plus
) { %Global::rpl
= (%Global::plus
,%Global::rpl
); }
3055 if(defined $opt::I
) { rpl
('{}',$opt::I
); }
3056 if(defined $opt::i
and $opt::i
) { rpl
('{}',$opt::i
); }
3057 if(defined $opt::U
) { rpl
('{.}',$opt::U
); }
3058 if(defined $opt::basenamereplace
) { rpl
('{/}',$opt::basenamereplace
); }
3059 if(defined $opt::dirnamereplace
) { rpl
('{//}',$opt::dirnamereplace
); }
3060 if(defined $opt::seqreplace
) { rpl
('{#}',$opt::seqreplace
); }
3061 if(defined $opt::slotreplace
) { rpl
('{%}',$opt::slotreplace
); }
3062 if(defined $opt::basenameextensionreplace
) {
3063 rpl
('{/.}',$opt::basenameextensionreplace
);
3066 # Create $Global::rpl entries for --rpl options
3067 # E.g: "{..} s:\.[^.]+$:;s:\.[^.]+$:;"
3068 my ($shorthand,$long) = split/\s/,$_,2;
3069 $Global::rpl
{$shorthand} = $long;
3073 sub parse_semaphore
() {
3074 # Semaphore defaults
3075 # Must be done before computing number of processes and max_line_length
3076 # because when running as a semaphore GNU Parallel does not read args
3079 # $Global::semaphore
3080 # $opt::semaphoretimeout
3081 # $Semaphore::timeout
3082 # $opt::semaphorename
3090 # @Global::unget_argv
3091 # $Global::default_simultaneous_sshlogins
3093 # $Global::interactive
3094 $Global::semaphore
||= ($0 =~ m
:(^|/)sem
$:); # called as 'sem'
3095 if(defined $opt::semaphore
) { $Global::semaphore
= 1; }
3096 if(defined $opt::semaphoretimeout
) { $Global::semaphore
= 1; }
3097 if(defined $opt::semaphorename
) { $Global::semaphore
= 1; }
3098 if(defined $opt::fg
and not $opt::tmux
and not $opt::tmuxpane
) {
3099 $Global::semaphore
= 1;
3101 if(defined $opt::bg
) { $Global::semaphore
= 1; }
3102 if(defined $opt::wait and not $opt::sqlmaster
) {
3103 $Global::semaphore
= 1; @ARGV = "true";
3105 if($Global::semaphore
) {
3107 # Assign the first -a to STDIN
3108 open(STDIN
,"<",shift @opt::a
);
3110 # We currently have no way of dealing with more -a
3111 ::error
("A semaphore cannot take input from more files\n");
3112 ::wait_and_exit
(255);
3115 @opt::a
= ("/dev/null");
3116 # Append a dummy empty argument
3117 # \0 => nothing (not the empty string)
3118 push(@Global::unget_argv
, [Arg
->new("\0noarg")]);
3119 $Semaphore::timeout
= int(multiply_time_units
($opt::semaphoretimeout
))
3121 if(defined $opt::semaphorename
) {
3122 $Semaphore::name
= $opt::semaphorename
;
3125 $Semaphore::name
= `tty`;
3126 chomp $Semaphore::name
;
3128 $Semaphore::fg
= $opt::fg
;
3129 $Semaphore::wait = $opt::wait;
3130 $Global::default_simultaneous_sshlogins
= 1;
3131 if(not defined $opt::jobs
) {
3134 if($Global::interactive
and $opt::bg
) {
3135 ::error
("Jobs running in the ".
3136 "background cannot be interactive.");
3137 ::wait_and_exit
(255);
3143 # Record current %ENV-keys in $PARALLEL_HOME/ignored_vars
3145 my $ignore_filename = $Global::config_dir
. "/ignored_vars";
3146 write_or_exit
($ignore_filename,map { $_,"\n" } keys %ENV);
3150 # Open joblog as specified by --joblog
3153 # $opt::resume_failed
3156 # $Global::job_already_run
3159 if(($opt::resume
or $opt::resume_failed
)
3161 not ($opt::joblog
or $opt::results
)) {
3162 ::error
("--resume and --resume-failed require --joblog or --results.");
3163 ::wait_and_exit
(255);
3165 if(defined $opt::joblog
and $opt::joblog
=~ s/^\+//) {
3166 # --joblog +filename = append to filename
3173 not $opt::sqlworker
)) {
3174 # Do not log if --sqlworker
3175 if($opt::resume
|| $opt::resume_failed
|| $opt::retry_failed
) {
3176 if(open(my $joblog_fh, "<", $opt::joblog
)) {
3177 # Enable utf8 if possible
3178 eval q{ binmode $joblog_fh, "encoding(utf8)"; };
3180 # Override $/ with \n because -d might be set
3182 # If there is a header: Open as append later
3183 $append = <$joblog_fh>;
3185 if($opt::retry_failed) {
3186 # Make a regexp that matches commands with exit+signal=0
3187 # 4 host 1360490623.067 3.445 1023 1222 0 0 command
3188 $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t';
3190 while(<$joblog_fh>) {
3191 if(/$joblog_regexp/o) {
3192 # This is 30% faster than set_job_already_run($1);
3193 vec($Global::job_already_run,($1||0),1) = 1;
3194 $Global::total_completed++;
3195 $group[$1-1] = "true";
3196 } elsif(/(\d+)\s+\S+(\s+[-0-9.]+){6}\s+(.*)$/) {
3197 # Grab out the command
3201 ::error("Format of '$opt::joblog' is wrong: $_");
3202 ::wait_and_exit(255);
3206 my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg");
3208 # Put args into argfile
3209 if(grep /\0/, @group) {
3210 # force --null to deal with \n in commandlines
3211 ::warning("Command lines contain newline. ".
3216 # Replace \0 with '\n' as used in print_joblog()
3217 print $outfh (map { s/\0/\n/g; $_,$/ }
3220 exit_if_disk_full();
3221 # Set filehandle to -a
3224 # Remove $command (so -a is run)
3227 if($opt::resume || $opt::resume_failed) {
3228 if($opt::resume_failed) {
3229 # Make a regexp that matches commands with exit+signal=0
3230 # 4 host 1360490623.067 3.445 1023 1222 0 0 command
3231 $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t';
3233 # Just match the job number
3234 $joblog_regexp='^(\d+)';
3236 while(<$joblog_fh>) {
3237 if(/$joblog_regexp/o) {
3238 # This is 30% faster than set_job_already_run($1);
3239 vec($Global::job_already_run,($1||0),1) = 1;
3240 $Global::total_completed++;
3241 } elsif(not /\d+\s+[^\s]+\s+([-0-9.]+\s+){6}/) {
3242 ::error("Format of '$opt::joblog' is wrong: $_");
3243 ::wait_and_exit(255);
3249 # $opt::null may be set if the commands contain \n
3250 if($opt::null) { $/ = "\0"; }
3253 # Do not write to joblog in a dry-run
3257 $Global::joblog = open_or_exit(">>", $opt::joblog);
3259 if($opt::joblog eq "-") {
3260 # Use STDOUT as joblog
3261 $Global::joblog = $Global::fh{1};
3263 # Overwrite the joblog
3264 $Global::joblog = open_or_exit(">", $opt::joblog);
3266 print $Global::joblog
3267 join("\t", "Seq", "Host", "Starttime", "JobRuntime",
3268 "Send", "Receive", "Exitval", "Signal", "Command"
3274 sub open_json_csv() {
3276 # Output as JSON/CSV/TSV
3277 if($opt::results eq "-.csv"
3279 $opt::results eq "-.tsv"
3281 $opt::results eq "-.json") {
3282 # Output as JSON/CSV/TSV on stdout
3283 open $Global::csv_fh, ">&", "STDOUT" or
3284 ::die_bug("Can't dup STDOUT in csv: $!");
3285 # Do not print any other output to STDOUT
3286 # by forcing all other output to /dev/null
3287 open my $fd, ">", "/dev/null" or
3288 ::die_bug("Can't >/dev/null in csv: $!");
3289 $Global::fh{1} = $fd;
3290 $Global::fh{2} = $fd;
3291 } elsif($Global::csvsep or $Global::jsonout) {
3292 $Global::csv_fh = open_or_exit(">",$opt::results);
3297 sub find_compression_program() {
3298 # Find a fast compression program
3300 # $compress_program = compress program with options
3301 # $decompress_program = decompress program with options
3303 # Search for these. Sorted by speed on 128 core
3305 # seq 120000000|shuf > 1gb &
3307 # apt install make g++ htop
3308 # wget -O - pi.dk/3 | bash
3309 # apt install zstd clzip liblz4-tool lzop pigz pxz gzip plzip pbzip2 lzma xz-utils lzip bzip2 lbzip2 lrzip pixz
3310 # git clone https://github.com/facebook/zstd.git
3311 # (cd zstd/contrib/pzstd; make -j; cp pzstd /usr/local/bin)
3312 # echo 'lrzip -L $((-$1))' >/usr/local/bin/lrz
3313 # chmod +x /usr/local/bin/lrz
3315 # onethread="zstd clzip lz4 lzop gzip lzma xz bzip2"
3316 # multithread="pzstd pigz pxz plzip pbzip2 lzip lbzip2 lrz pixz"
3317 # parallel --shuf -j1 --joblog jl-m --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $multithread
3318 # parallel --shuf -j50% --delay 1 --joblog jl-s --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $onethread
3322 # 2-cores: pzstd zstd lz4 lzop pigz gzip lbzip2 pbzip2 lrz bzip2 lzma pxz plzip xz lzip clzip
3324 # 8-cores: pzstd lz4 zstd pigz lzop lbzip2 pbzip2 gzip lzip lrz plzip pxz bzip2 lzma xz clzip
3325 # 16-cores: pzstd lz4 pigz lzop lbzip2 pbzip2 plzip lzip lrz pxz gzip lzma xz bzip2
3326 # 32-cores: pzstd lbzip2 pbzip2 zstd pigz lz4 lzop plzip lzip lrz gzip pxz lzma bzip2 xz clzip
3327 # 64-cores: pzstd lbzip2 pbzip2 pigz zstd pixz lz4 plzip lzop lzip lrz gzip pxz lzma bzip2 xz clzip
3328 # 128-core: pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip lrz pxz bzip2 lzma xz clzip
3330 my @prg = qw(pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip
3331 lrz pxz bzip2 lzma xz clzip);
3334 return ("$p -c -1","$p -dc");
3338 return ("cat","cat");
3341 sub read_options
() {
3342 # Read options from command line, profile and $PARALLEL
3344 # $opt::shebang_wrap
3352 # @ARGV_no_opt = @ARGV without --options
3354 # This must be done first as this may exec myself
3355 if(defined $ARGV[0] and ($ARGV[0] =~ /^--shebang/ or
3356 $ARGV[0] =~ /^--shebang-?wrap/ or
3357 $ARGV[0] =~ /^--hashbang/)) {
3358 # Program is called from #! line in script
3359 # remove --shebang-wrap if it is set
3360 $opt::shebang_wrap
= ($ARGV[0] =~ s/^--shebang-?wrap *//);
3361 # remove --shebang if it is set
3362 $opt::shebang
= ($ARGV[0] =~ s/^--shebang *//);
3363 # remove --hashbang if it is set
3364 $opt::shebang
.= ($ARGV[0] =~ s/^--hashbang *//);
3366 my $argfile = Q
(pop @ARGV);
3367 # exec myself to split $ARGV[0] into separate fields
3368 exec "$0 --skip-first-line -a $argfile @ARGV";
3370 if($opt::shebang_wrap
) {
3373 if ($^O
eq 'freebsd') {
3374 # FreeBSD's #! puts different values in @ARGV than Linux' does
3375 my @nooptions = @ARGV;
3376 get_options_from_array
(\
@nooptions);
3377 while($#ARGV > $#nooptions) {
3378 push @options, shift @ARGV;
3380 while(@ARGV and $ARGV[0] ne ":::") {
3381 push @parser, shift @ARGV;
3383 if(@ARGV and $ARGV[0] eq ":::") {
3387 @options = shift @ARGV;
3389 my $script = Q
(Q
(shift @ARGV)); # TODO - test if script = " "
3390 my @args = map{ Q
($_) } @ARGV;
3391 # exec myself to split $ARGV[0] into separate fields
3392 exec "$0 --_pipe-means-argfiles @options @parser $script ".
3396 if($ARGV[0] =~ / --shebang(-?wrap)? /) {
3397 ::warning
("--shebang and --shebang-wrap must be the first ".
3401 Getopt
::Long
::Configure
("bundling","require_order");
3402 my @ARGV_copy = @ARGV;
3403 my @ARGV_orig = @ARGV;
3404 # Check if there is a --profile to set @opt::profile
3405 get_options_from_array
(\
@ARGV_copy,"profile|J=s","plain") || die_usage
();
3406 my @ARGV_profile = ();
3408 if(not $opt::plain
) {
3409 # Add options from $PARALLEL_HOME/config and other profiles
3410 my @config_profiles = (
3411 "/etc/parallel/config",
3412 (map { "$_/config" } @Global::config_dirs
),
3413 $ENV{'HOME'}."/.parallelrc");
3414 my @profiles = @config_profiles;
3416 # --profile overrides default profiles
3418 for my $profile (@opt::profile
) {
3419 if($profile =~ m
:^\
./|^/:) {
3420 # Look for ./profile in .
3421 # Look for /profile in /
3422 push @profiles, grep { -r
$_ } $profile;
3424 # Look for the $profile in @Global::config_dirs
3425 push @profiles, grep { -r
$_ }
3426 map { "$_/$profile" } @Global::config_dirs
;
3430 for my $profile (@profiles) {
3432 ::debug
("init","Read $profile\n");
3434 open (my $in_fh, "<", $profile) ||
3435 ::die_bug
("read-profile: $profile");
3439 push @ARGV_profile, shell_words
($_);
3443 if(grep /^\Q$profile\E$/, @config_profiles) {
3444 # config file is not required to exist
3446 ::error
("$profile not readable.");
3451 # Add options from shell variable $PARALLEL
3452 if($ENV{'PARALLEL'}) {
3453 push @ARGV_env, shell_words
($ENV{'PARALLEL'});
3455 # Add options from env_parallel.csh via $PARALLEL_CSH
3456 if($ENV{'PARALLEL_CSH'}) {
3457 push @ARGV_env, shell_words
($ENV{'PARALLEL_CSH'});
3460 Getopt
::Long
::Configure
("bundling","require_order");
3461 get_options_from_array
(\
@ARGV_profile) || die_usage
();
3462 get_options_from_array
(\
@ARGV_env) || die_usage
();
3463 get_options_from_array
(\
@ARGV) || die_usage
();
3464 # What were the options given on the command line?
3465 # Used to start --sqlworker
3466 my $ai = arrayindex
(\
@ARGV_orig, \
@ARGV);
3467 @Global::options_in_argv
= @ARGV_orig[0..$ai-1];
3468 # Prepend non-options to @ARGV (such as commands like 'nice')
3469 unshift @ARGV, @ARGV_profile, @ARGV_env;
3473 sub arrayindex
($$) {
3474 # Similar to Perl's index function, but for arrays
3476 # $arr_ref1 = ref to @array1 to search in
3477 # $arr_ref2 = ref to @array2 to search for
3479 # $pos = position of @array1 in @array2, -1 if not found
3480 my ($arr_ref1,$arr_ref2) = @_;
3481 my $array1_as_string = join "", map { "\0".$_ } @
$arr_ref1;
3482 my $array2_as_string = join "", map { "\0".$_ } @
$arr_ref2;
3483 my $i = index($array1_as_string,$array2_as_string,0);
3484 if($i == -1) { return -1 }
3485 my @before = split /\0/, substr($array1_as_string,0,$i);
3489 sub read_args_from_command_line
() {
3490 # Arguments given on the command line after:
3491 # ::: ($Global::arg_sep)
3492 # :::: ($Global::arg_file_sep)
3493 # :::+ ($Global::arg_sep with --link)
3494 # ::::+ ($Global::arg_file_sep with --link)
3495 # Removes the arguments from @ARGV and:
3496 # - puts filenames into -a
3497 # - puts arguments into files and add the files to -a
3498 # - adds --linkinputsource with 0/1 for each -a depending on :::+/::::+
3500 # @::ARGV = command option ::: arg arg arg :::: argfiles
3503 # $Global::arg_file_sep
3504 # $opt::_pipe_means_argfiles
3508 # @argv_no_argsep = @::ARGV without ::: and :::: and following args
3509 my %group_sep = ($Global::arg_sep
=> ":::",
3510 $Global::arg_sep
."+" => ":::+",
3511 $Global::arg_file_sep
=> "::::",
3512 $Global::arg_file_sep
."+" => "::::+");
3514 # file is linked if file starts with +
3518 if(-e
$_ and -e
$noplus) {
3519 ::error
("It is unclear whether you mean +./$noplus or ./+$noplus");
3521 } elsif(-e
$_ and not -e
$noplus) {
3522 # This is ./+file = this is not linked
3524 } elsif(not -e
$_ and -e
$noplus) {
3525 # This is +./file = this is linked
3527 } elsif(not -e
$_ and not -e
$noplus) {
3528 # File does not exist, maybe it is stdin?
3530 # This is - = this is not linked
3532 } elsif($_ eq "+-") {
3533 # This is +- = this is linked
3536 ::error
("File not found: $_");
3540 ::die_bug
("noplus: $noplus $_");
3546 sub cmd_template
() {
3547 # remove command template from @ARGV
3548 # keep ::: / :::: in @ARGV if any
3551 my $arg = shift @ARGV;
3552 if($group_sep{$arg}) {
3553 # Found separator: push it back and exit loop
3554 unshift @ARGV, $arg;
3557 push @cmd_template, $arg;
3559 return @cmd_template;
3561 sub divide_into_groups
() {
3562 # Split arguments from @ARGV into groups:
3563 # ::: 1 2 3 :::: a b c ::::+ d e f
3565 # [ ::: 1 2 3 ], [ :::: a b c ], [ ::::+ d e f ]
3569 my $arg = shift @ARGV;
3570 if($group_sep{$arg}) {
3573 @g = ($group_sep{$arg});
3579 shift @grp; # The first will always be empty
3582 sub save_to_file
(@
) {
3583 # Put args into a file, return open file handle of file
3585 my ($fh,$name) = ::tmpfile
(SUFFIX
=> ".arg");
3587 # Put args into argfile
3588 print $fh map { $_,$/ } @_;
3590 exit_if_disk_full
();
3593 my @cmd = cmd_template
();
3594 # The rest of @ARGV is ::: / :::: args
3595 # If there are any -a: Rewrite them to use ::::
3596 if(@opt::a
) { unshift @ARGV, $Global::arg_file_sep
, @opt::a
; }
3598 # Convert ::: and :::: into (linked) files and put those into @opt::a
3599 for my $g_ref (divide_into_groups
()) {
3600 my $group_sep = shift @
$g_ref;
3601 if($group_sep eq ":::" or $group_sep eq ":::+") {
3602 # Group starts with ::: / :::+
3603 if($opt::_pipe_means_argfiles
and $#$g_ref < 0) {
3605 # Deal with --shebang-wrap and ::: on the shebang line
3607 push @opt::a
, save_to_file
(@
$g_ref);
3608 # if $group_sep == ":::+": it is linked
3609 push @opt::linkinputsource
, ($group_sep eq ":::+");
3611 } elsif($group_sep eq "::::" or $group_sep eq "::::+") {
3612 # Group starts with :::: / ::::+
3613 for my $f (@
$g_ref) {
3614 if($group_sep eq "::::+") {
3617 push @opt::linkinputsource
, 1;
3618 } elsif($group_sep eq "::::") {
3619 # Auto detect linking
3622 push @opt::linkinputsource
, 1;
3626 push @opt::linkinputsource
, 0;
3630 ::die_bug
("arg link error");
3634 ::die_bug
("arg link error");
3637 # Output: command to run with options
3643 unlink keys %Global::unlink;
3644 map { rmdir $_ } keys %Global::unlink;
3645 if(@opt::basefile
and $opt::cleanup
) { cleanup_basefile
(); }
3646 for(keys %Global::sshmaster
) {
3647 # If 'ssh -M's are running: kill them
3653 sub __QUOTING_ARGUMENTS_FOR_SHELL__
() {}
3655 sub shell_quote
(@
) {
3657 # @strings = strings to be quoted
3659 # @shell_quoted_strings = string quoted as needed by the shell
3660 return wantarray ?
(map { Q
($_) } @_) : (join" ",map { Q
($_) } @_);
3663 sub shell_quote_scalar_rc
($) {
3664 # Quote for the rc-shell
3669 ($a =~ s/[\n\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]+/'$&'/go)) {
3670 # A string was replaced
3671 # No need to test for "" or \0
3674 } elsif($a eq "\0") {
3681 sub shell_quote_scalar_csh
($) {
3685 # $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g;
3686 # This is 1% faster than the above
3687 if(($a =~ s/[\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]/\\$&/go)
3689 # quote newline in csh as \\\n
3690 ($a =~ s/[\n]/"\\\n"/go)) {
3691 # A string was replaced
3692 # No need to test for "" or \0
3695 } elsif($a eq "\0") {
3702 sub shell_quote_scalar_default
($) {
3703 # Quote for other shells (Bourne compatibles)
3705 # $string = string to be quoted
3707 # $shell_quoted = string quoted as needed by the shell
3709 if(/[^-_.+a-z0-9\/]/i
) {
3710 s/'+/'"$&"'/g; # "-quote '-quotes: ''' => "'''"
3711 $_ = "'$_'"; # '-quote entire string
3712 s/^''//; # Remove unneeded '' at ends
3713 s/''$//; # (faster than s/^''|''$//g)
3715 } elsif ($_ eq "") {
3723 sub shell_quote_scalar
($) {
3724 # Quote the string so the shell will not expand any special chars
3726 # $string = string to be quoted
3728 # $shell_quoted = string quoted as needed by the shell
3730 # Speed optimization: Choose the correct shell_quote_scalar_*
3731 # and call that directly from now on
3732 no warnings
'redefine';
3733 if($Global::cshell
) {
3735 *shell_quote_scalar
= \
&shell_quote_scalar_csh
;
3736 } elsif($Global::shell
=~ m
:(^|/)rc
$:) {
3738 *shell_quote_scalar
= \
&shell_quote_scalar_rc
;
3741 *shell_quote_scalar
= \
&shell_quote_scalar_default
;
3743 # The sub is now redefined. Call it
3744 return shell_quote_scalar
($_[0]);
3748 # Q alias for ::shell_quote_scalar
3749 my $ret = shell_quote_scalar
($_[0]);
3750 no warnings
'redefine';
3751 *Q
= \
&::shell_quote_scalar
;
3755 sub shell_quote_file
($) {
3756 # Quote the string so shell will not expand any special chars
3757 # and prepend ./ if needed
3759 # $filename = filename to be shell quoted
3761 # $quoted_filename = filename quoted with \ and ./ if needed
3764 if($a =~ m
:^/: or $a =~ m:^\./:) {
3765 # /abs/path or ./rel/path => skip
3767 # rel/path => ./rel/path
3774 sub shell_words
(@
) {
3776 # $string = shell line
3778 # @shell_words = $string split into words as shell would do
3779 $Global::use{"Text::ParseWords"} ||= eval "use Text::ParseWords; 1;";
3780 return Text
::ParseWords
::shellwords
(@_);
3783 sub perl_quote_scalar
($) {
3784 # Quote the string so perl's eval will not expand any special chars
3786 # $string = string to be quoted
3788 # $perl_quoted = string quoted with \ as needed by perl's eval
3791 $a =~ s/[\\\"\$\@]/\\$&/go;
3796 # -w complains about prototype
3798 # pQ alias for ::perl_quote_scalar
3799 my $ret = perl_quote_scalar
($_[0]);
3800 *pQ
= \
&::perl_quote_scalar
;
3804 sub unquote_printf
() {
3805 # Convert \t \n \r \000 \0
3807 # $string = string with \t \n \r \num \0
3809 # $replaced = string with TAB NEWLINE CR <ascii-num> NUL
3814 s/\\(\d\d\d)/eval 'sprintf "\\'.$1.'"'/ge;
3815 s/\\(\d)/eval 'sprintf "\\'.$1.'"'/ge;
3820 sub __FILEHANDLES__
() {}
3823 sub save_stdin_stdout_stderr
() {
3824 # Remember the original STDIN, STDOUT and STDERR
3825 # and file descriptors opened by the shell (e.g. 3>/tmp/foo)
3828 # $Global::original_stderr
3829 # $Global::original_stdin
3832 # TODO Disabled until we have an open3 that will take n filehandles
3833 # for my $fdno (1..61) {
3834 # # /dev/fd/62 and above are used by bash for <(cmd)
3835 # # Find file descriptors that are already opened (by the shell)
3836 # Only focus on stdout+stderr for now
3837 for my $fdno (1..2) {
3839 # 2-argument-open is used to be compatible with old perl 5.8.0
3840 # bug #43570: Perl 5.8.0 creates 61 files
3841 if(open($fh,">&=$fdno")) {
3842 $Global::fh
{$fdno}=$fh;
3845 open $Global::original_stderr
, ">&", "STDERR" or
3846 ::die_bug
("Can't dup STDERR: $!");
3847 open $Global::status_fd
, ">&", "STDERR" or
3848 ::die_bug
("Can't dup STDERR: $!");
3849 open $Global::original_stdin
, "<&", "STDIN" or
3850 ::die_bug
("Can't dup STDIN: $!");
3853 sub enough_file_handles
() {
3854 # Check that we have enough filehandles available for starting
3860 # 1 if ungrouped (thus not needing extra filehandles)
3861 # 0 if too few filehandles
3862 # 1 if enough filehandles
3863 if(not $opt::ungroup
) {
3865 my $enough_filehandles = 1;
3866 # perl uses 7 filehandles for something?
3867 # open3 uses 2 extra filehandles temporarily
3868 # We need a filehandle for each redirected file descriptor
3869 # (normally just STDOUT and STDERR)
3870 for my $i (1..(7+2+keys %Global::fh
)) {
3871 $enough_filehandles &&= open($fh{$i}, "<", "/dev/null");
3873 for (values %fh) { close $_; }
3874 return $enough_filehandles;
3876 # Ungrouped does not need extra file handles
3881 sub open_or_exit
($$) {
3882 # Open a file name or exit if the file cannot be opened
3884 # $mode = read:"<" write:">"
3885 # $file = filehandle or filename to open
3887 # $Global::original_stdin
3889 # $fh = file handle to opened file
3894 return ($Global::original_stdin
|| *STDIN
);
3896 return ($Global::original_stderr
|| *STDERR
);
3899 if(ref $file eq "GLOB") {
3900 # This is an open filehandle
3904 if(not open($fh, $mode, $file)) {
3905 ::error
("Cannot open `$file': $!");
3911 sub slurp_or_exit
($) {
3912 # Read content of a file or exit if the file cannot be opened
3914 # $file = filehandle or filename to open
3916 # $content = content as scalar
3917 my $fh = open_or_exit
("<",shift);
3918 # $/ = undef => slurp whole file
3920 my $content = <$fh>;
3925 sub write_or_exit
(@
) {
3926 # Write content to a file or exit if the file cannot be opened
3928 # $file = filehandle or filename to open
3929 # @content = content to be written
3934 error
("Cannot write to `$file': $!");
3937 my $fh = open_or_exit
(">",$file);
3938 print($fh @_) or failed
();
3939 close($fh) or failed
();
3942 sub set_fh_blocking
($) {
3943 # Set filehandle as blocking
3945 # $fh = filehandle to be blocking
3949 $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
3951 # Get the current flags on the filehandle
3952 fcntl($fh, &F_GETFL, $flags) || die $!;
3953 # Remove non-blocking from the flags
3954 $flags &= ~&O_NONBLOCK;
3955 # Set the flags on the filehandle
3956 fcntl($fh, &F_SETFL, $flags) || die $!;
3959 sub set_fh_non_blocking($) {
3960 # Set filehandle as non-blocking
3962 # $fh = filehandle to be blocking
3966 $Global::use{"Fcntl
"} ||= eval "use Fcntl
qw(:DEFAULT :flock); 1;";
3968 # Get the current flags on the filehandle
3969 fcntl($fh, &F_GETFL, $flags) || die $!;
3970 # Add non-blocking to the flags
3971 $flags |= &O_NONBLOCK;
3972 # Set the flags on the filehandle
3973 fcntl($fh, &F_SETFL, $flags) || die $!;
3977 sub __RUNNING_THE_JOBS_AND_PRINTING_PROGRESS__() {}
3980 # Variable structure:
3982 # $Global::running{$pid} = Pointer to Job-object
3983 # @Global::virgin_jobs = Pointer to Job-object that have received no input
3984 # $Global::host{$sshlogin} = Pointer to SSHLogin-object
3985 # $Global::total_running = total number of running jobs
3986 # $Global::total_started = total jobs started
3987 # $Global::max_procs_file = filename if --jobs is given a filename
3988 # $Global::JobQueue = JobQueue object for the queue of jobs
3989 # $Global::timeoutq = queue of times where jobs timeout
3990 # $Global::newest_job = Job object of the most recent job started
3991 # $Global::newest_starttime = timestamp of $Global::newest_job
3993 # $Global::minimal_command_line_length = min len supported by all sshlogins
3994 # $Global::start_no_new_jobs = should more jobs be started?
3995 # $Global::original_stderr = file handle for STDERR when the program started
3996 # $Global::total_started = total number of jobs started
3997 # $Global::joblog = filehandle of joblog
3998 # $Global::debug = Is debugging on?
3999 # $Global::exitstatus = status code of GNU Parallel
4000 # $Global::quoting = quote the command to run
4002 sub init_run_jobs() {
4003 # Set Global variables and progress signal handlers
4004 # Do the copying of basefiles
4006 $Global::total_running = 0;
4007 $Global::total_started = 0;
4008 $SIG{USR1} = \&list_running_jobs;
4009 $SIG{USR2} = \&toggle_progress;
4010 if(@opt::basefile) { setup_basefile(); }
4016 my $max_procs_file_last_mod;
4018 sub changed_procs_file {
4019 # If --jobs is a file and it is modfied:
4020 # Force recomputing of max_jobs_running for each $sshlogin
4022 # $Global::max_procs_file
4025 if($Global::max_procs_file) {
4027 my $mtime = (stat($Global::max_procs_file))[9];
4028 $max_procs_file_last_mod ||= 0;
4029 if($mtime > $max_procs_file_last_mod) {
4030 # file changed: Force re-computing max_jobs_running
4031 $max_procs_file_last_mod = $mtime;
4032 for my $sshlogin (values %Global::host) {
4033 $sshlogin->set_max_jobs_running(undef);
4039 sub changed_sshloginfile {
4040 # If --slf is changed:
4045 # @opt::sshloginfile
4048 # $opt::filter_hosts
4050 if(@opt::sshloginfile) {
4051 # Is --sshloginfile changed?
4052 for my $slf (@opt::sshloginfile) {
4053 my $actual_file = expand_slf_shorthand($slf);
4054 my $mtime = (stat($actual_file))[9];
4055 $last_mtime{$actual_file} ||= $mtime;
4056 if($mtime - $last_mtime{$actual_file} > 1) {
4058 "--sshloginfile
$actual_file changed
. reload
\n");
4059 $last_mtime{$actual_file} = $mtime;
4062 @Global::sshlogin = ();
4063 for (values %Global::host) {
4064 # Don't start new jobs on any host
4065 # except the ones added back later
4066 $_->set_max_jobs_running(0);
4068 # This will set max_jobs_running on the SSHlogins
4069 read_sshloginfile($actual_file);
4071 $opt::filter_hosts and filter_hosts();
4078 sub start_more_jobs {
4079 # Run start_another_job() but only if:
4080 # * not $Global::start_no_new_jobs set
4081 # * not JobQueue is empty
4082 # * not load on server is too high
4083 # * not server swapping
4084 # * not too short time since last remote login
4087 # $Global::start_no_new_jobs
4093 # $Global::newest_starttime
4095 # $jobs_started = number of jobs started
4096 my $jobs_started = 0;
4097 if($Global::start_no_new_jobs) {
4098 return $jobs_started;
4100 if(time - ($last_time||0) > 1) {
4101 # At most do this every second
4103 changed_procs_file();
4104 changed_sshloginfile();
4106 # This will start 1 job on each --sshlogin (if possible)
4107 # thus distribute the jobs on the --sshlogins round robin
4108 for my $sshlogin (values %Global::host) {
4109 if($Global::JobQueue->empty() and not $opt::pipe) {
4110 # No more jobs in the queue
4113 debug("run
", "Running jobs before on
", $sshlogin->string(), ": ",
4114 $sshlogin->jobs_running(), "\n");
4115 if ($sshlogin->jobs_running() < $sshlogin->max_jobs_running()) {
4118 $opt::delay-0.008 > ::now()-$Global::newest_starttime) {
4119 # It has been too short since last start
4122 if($opt::load and $sshlogin->loadavg_too_high()) {
4123 # The load is too high or unknown
4126 if($opt::noswap and $sshlogin->swapping()) {
4127 # The server is swapping
4130 if($opt::limit and $sshlogin->limit()) {
4134 if(($opt::memfree or $opt::memsuspend)
4136 $sshlogin->memfree() < $Global::memlimit) {
4137 # The server has not enough mem free
4138 ::debug("mem
", "Not starting job
: not enough mem
\n");
4141 if($sshlogin->too_fast_remote_login()) {
4142 # It has been too short since last login
4145 debug("run
", $sshlogin->string(),
4146 " has
", $sshlogin->jobs_running(),
4147 " out of
", $sshlogin->max_jobs_running(),
4148 " jobs running
. Start another
.\n");
4149 if(start_another_job($sshlogin) == 0) {
4150 # No more jobs to start on this $sshlogin
4151 debug("run
","No jobs started on
",
4152 $sshlogin->string(), "\n");
4155 $sshlogin->inc_jobs_running();
4156 $sshlogin->set_last_login_at(::now());
4159 debug("run
","Running jobs after on
", $sshlogin->string(), ": ",
4160 $sshlogin->jobs_running(), " of
",
4161 $sshlogin->max_jobs_running(), "\n");
4164 return $jobs_started;
4169 my $no_more_file_handles_warned;
4171 sub start_another_job() {
4172 # If there are enough filehandles
4173 # and JobQueue not empty
4174 # and not $job is in joblog
4175 # Then grab a job from Global::JobQueue,
4176 # start it at sshlogin
4177 # mark it as virgin_job
4179 # $sshlogin = the SSHLogin to start the job on
4185 # @Global::virgin_jobs
4187 # 1 if another jobs was started
4189 my $sshlogin = shift;
4190 # Do we have enough file handles to start another job?
4191 if(enough_file_handles()) {
4192 if($Global::JobQueue->empty() and not $opt::pipe) {
4193 # No more commands to run
4194 debug("start
", "Not starting
: JobQueue empty
\n");
4198 # Skip jobs already in job log
4199 # Skip jobs already in results
4201 $job = get_job_with_sshlogin($sshlogin);
4202 if(not defined $job) {
4203 # No command available for that sshlogin
4204 debug("start
", "Not starting
: no jobs available
for ",
4205 $sshlogin->string(), "\n");
4208 if($job->is_already_in_joblog()) {
4211 } while ($job->is_already_in_joblog()
4213 ($opt::results and $opt::resume
4214 and $job->is_already_in_results()));
4215 debug("start
", "Command to run on
'",
4216 $job->sshlogin()->string(), "': '",
4217 $job->replaced(),"'\n");
4220 if($job->virgin()) {
4221 push(@Global::virgin_jobs,$job);
4223 # Block already set: This is a retry
4224 $job->write_block();
4227 debug("start
", "Started as seq
", $job->seq(),
4228 " pid
:", $job->pid(), "\n");
4231 # Not enough processes to run the job.
4232 # Put it back on the queue.
4233 $Global::JobQueue->unget($job);
4234 # Count down the number of jobs to run for this SSHLogin.
4235 my $max = $sshlogin->max_jobs_running();
4236 if($max > 1) { $max--; } else {
4238 for my $record (@{$job->{'commandline'}{'arg_list'}}) {
4239 push @arg, map { $_->orig() } @$record;
4241 ::error("No more processes
: cannot run a single job
. ".
4242 "Something is wrong at
@arg.");
4243 ::wait_and_exit(255);
4245 $sshlogin->set_max_jobs_running($max);
4246 # Sleep up to 300 ms to give other processes time to die
4247 ::usleep(rand()*300);
4248 ::warning("No more processes
: ".
4249 "Decreasing number of running jobs to
$max.",
4250 "Try increasing
'ulimit -u' (try
: ulimit
-u
`ulimit -Hu`)",
4251 "or increasing
'nproc' in /etc/security
/limits
.conf
",
4252 "or increasing
/proc/sys
/kernel/pid_max
");
4257 # No more file handles
4258 $no_more_file_handles_warned++ or
4259 ::warning("No more file handles
. ",
4260 "Try running
'parallel -j0 -N 100 --pipe parallel -j0'",
4261 "or increasing
'ulimit -n' (try
: ulimit
-n
`ulimit -Hn`)",
4262 "or increasing
'nofile' in /etc/security
/limits
.conf
",
4263 "or increasing
/proc/sys
/fs/file
-max
");
4264 debug("start
", "No more file handles
. ");
4270 sub init_progress() {
4274 # list of computers for progress output
4279 my $progress = progress();
4280 my $cpu_units = $opt::use_sockets_instead_of_threads ? "CPU sockets
" :
4281 ($opt::use_cores_instead_of_threads ? "CPU cores
" : "CPU threads
");
4282 return ("\nComputers
/ $cpu_units / Max jobs to run
\n",
4283 $progress->{'workerlist'},"\n",$progress->{'header'});
4286 sub drain_job_queue(@) {
4289 # $Global::total_running
4290 # $Global::max_jobs_running
4294 # $Global::start_no_new_jobs
4300 while($Global::total_running > 0) {
4301 debug("init
",$Global::total_running, "==", scalar
4302 keys %Global::running," slots
: ", $Global::max_jobs_running);
4304 # When using --pipe sometimes file handles are not
4306 for my $job (values %Global::running) {
4307 close $job->fh(0,"w
");
4310 if($opt::progress) {
4311 my $progress = progress();
4312 ::status_no_nl("\r",$progress->{'status'});
4314 if($Global::total_running < $Global::max_jobs_running
4315 and not $Global::JobQueue->empty()) {
4316 # These jobs may not be started because of loadavg
4317 # or too little time between each ssh login.
4318 if(start_more_jobs() > 0) {
4319 # Exponential back-on if jobs were started
4320 $sleep = $sleep/2+0.001;
4323 # Exponential back-off sleeping
4324 $sleep = ::reap_usleep($sleep);
4325 $sleepsum += $sleep;
4326 if($sleepsum >= 1000) {
4327 # At most do this every second
4329 changed_procs_file();
4330 changed_sshloginfile();
4334 if(not $Global::JobQueue->empty()) {
4335 # These jobs may not be started:
4336 # * because there the --filter-hosts has removed all
4337 if(not %Global::host) {
4338 ::error("There are
no hosts left to run on
.");
4339 ::wait_and_exit(255);
4341 # * because of loadavg
4342 # * because of too little time between each ssh login.
4343 $sleep = ::reap_usleep($sleep);
4345 if($Global::max_jobs_running == 0) {
4346 ::warning("There are
no job slots available
. Increase
--jobs
.");
4349 while($opt::sqlmaster and not $Global::sql->finished()) {
4351 $sleep = ::reap_usleep($sleep);
4353 if($Global::start_sqlworker) {
4354 # Start an SQL worker as we are now sure there is work to do
4355 $Global::start_sqlworker = 0;
4356 if(my $pid = fork()) {
4357 $Global::unkilled_sqlworker = $pid;
4359 # Replace --sql/--sqlandworker with --sqlworker
4360 my @ARGV = (map { s/^--sql(andworker)?$/--sqlworker/; $_ }
4361 @Global::options_in_argv);
4362 # exec the --sqlworker
4363 exec($0,@ARGV,@command);
4367 } while ($Global::total_running > 0
4369 not $Global::start_no_new_jobs and not $Global::JobQueue->empty()
4371 $opt::sqlmaster and not $Global::sql->finished());
4372 $Global::all_jobs_done = 1;
4373 if($opt::progress) {
4374 my $progress = progress();
4375 ::status("\r".$progress->{'status'});
4379 sub toggle_progress() {
4380 # Turn on/off progress view
4384 $opt::progress = not $opt::progress;
4385 if($opt::progress) {
4386 ::status_no_nl(init_progress());
4399 # $Global::total_started
4401 # $workerlist = list of workers
4402 # $header = that will fit on the screen
4403 # $status = message that will fit on the screen
4405 return {"workerlist
" => "", "header
" => "", "status
" => bar()};
4408 my ($status,$header)=("","");
4410 my($total, $completed, $left, $pctcomplete, $avgtime, $this_eta) =
4412 $eta = sprintf("ETA
: %ds Left
: %d AVG
: %.2fs
",
4413 $this_eta, $left, $avgtime);
4415 my $termcols = terminal_columns();
4416 my @workers = sort keys %Global::host;
4419 for my $w (@workers) {
4421 $i{'sshlogin'} = $w eq ":" ? "local" : $w;
4422 $i{'no'} = $workerno++;
4423 $i{'ncpu'} = ($Global::host{$w}->ncpus() || "-");
4424 $i{'jobslots'} = $Global::host{$w}->max_jobs_running();
4425 $i{'completed'} = ($Global::host{$w}->jobs_completed() || 0);
4426 $i{'running'} = $Global::host{$w}->jobs_running();
4427 $i{'pct'} = $Global::total_started ?
4428 (($i{'running'}+$i{'completed'})*100 /
4429 $Global::total_started) : 0;
4430 $i{'time'} = $i{'completed'} ? (time-$^T)/($i{'completed'}) : 0;
4434 my $workerlist = "";
4435 for my $w (@workers) {
4437 $wrk{$w}{'no'}.":".$wrk{$w}{'sshlogin'} ." / ".
4438 $wrk{$w}{'ncpu'}." / ".
4439 $wrk{$w}{'jobslots'}."\n";
4441 # Force $status to select one of the below formats
4442 $status = "c
"x($termcols+1);
4443 # Select an output format that will fit on a single line
4444 if(length $status > $termcols) {
4445 # sshlogin1:XX/XX/XX%/XX.Xs s2:XX/XX/XX%/XX.Xs s3:XX/XX/XX%/XX.Xs
4446 $header = "Computer
:jobs running
/jobs completed/".
4447 "%of started jobs
/Average seconds to complete
";
4448 $status = $eta . join(" ",map {
4449 sprintf("%s:%d/%d/%d%%/%.1fs
",
4451 {'sshlogin','running','completed','pct','time'}
4454 if(length $status > $termcols) {
4455 # 1:XX/XX/XX%/X.Xs 2:XX/XX/XX%/X.Xs 3:XX/XX/XX%/X.Xs
4456 $header = "Computer
:jobs running
/jobs completed/%of started jobs
";
4457 $status = $eta . join(" ",map {
4458 sprintf("%s:%d/%d/%d%%/%.1fs
",
4460 {'no','running','completed','pct','time'}
4463 if(length $status > $termcols) {
4464 # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX/XX%
4465 $header = "Computer
:jobs running
/jobs completed/%of started jobs
";
4466 $status = $eta . join(" ",map {
4467 sprintf("%s:%d/%d/%d%%",
4469 {'sshlogin','running','completed','pct'}
4472 if(length $status > $termcols) {
4473 # 1:XX/XX/XX% 2:XX/XX/XX% 3:XX/XX/XX% 4:XX/XX/XX% 5:XX/XX/XX%
4474 $header = "Computer
:jobs running
/jobs completed/%of started jobs
";
4475 $status = $eta . join(" ",map {
4476 sprintf("%s:%d/%d/%d%%",
4478 {'no','running','completed','pct'}
4481 if(length $status > $termcols) {
4482 # sshlogin1:XX/XX sshlogin2:XX/XX sshlogin3:XX/XX
4483 $header = "Computer
:jobs running
/jobs completed
";
4484 $status = $eta . join(" ", map {
4487 {'sshlogin','running','completed'}
4490 if(length $status > $termcols) {
4491 # 1:XX/XX 2:XX/XX 3:XX/XX 4:XX/XX 5:XX/XX 6:XX/XX
4492 $header = "Computer
:jobs running
/jobs completed
";
4493 $status = $eta . join(" ", map {
4496 {'no','running','completed'}
4499 if(length $status > $termcols) {
4500 # sshlogin1:XX sshlogin2:XX sshlogin3:XX sshlogin4:XX sshlogin5:XX
4501 $header = "Computer
:jobs completed
";
4502 $status = $eta . join(" ", map {
4505 {'sshlogin','completed'}
4508 if(length $status > $termcols) {
4509 # 1:XX 2:XX 3:XX 4:XX 5:XX 6:XX
4510 $header = "Computer
:jobs completed
";
4511 $status = $eta . join(" ", map {
4517 if($last_header ne $header) {
4519 $last_header = $header;
4524 $eol = `sh -c "tput el
</dev/tty
" 2>/dev/null`;
4526 if($eol eq "") { $eol = "\033[K
"; }
4529 return {"workerlist
" => $workerlist, "header
" => $header,
4530 "status
" => $status.$eol};
4536 my ($first_completed, $smoothed_avg_time, $last_eta);
4539 # Calculate important numbers for ETA
4541 # $total = number of jobs in total
4542 # $completed = number of jobs completed
4543 # $left = number of jobs left
4544 # $pctcomplete = percent of jobs completed
4545 # $avgtime = averaged time
4546 # $eta = smoothed eta
4547 my $completed = $Global::total_completed;
4548 # In rare cases with -X will $completed > total_jobs()
4549 my $total = ::max($Global::JobQueue->total_jobs(),$completed);
4550 my $left = $total - $completed;
4551 if(not $completed) {
4552 return($total, $completed, $left, 0, 0, 0);
4554 my $pctcomplete = ::min($completed / $total,100);
4555 $first_completed ||= time;
4556 my $timepassed = (time - $first_completed);
4557 my $avgtime = $timepassed / $completed;
4558 $smoothed_avg_time ||= $avgtime;
4559 # Smooth the eta so it does not jump wildly
4560 $smoothed_avg_time = (1 - $pctcomplete) * $smoothed_avg_time +
4561 $pctcomplete * $avgtime;
4562 my $eta = int($left * $smoothed_avg_time);
4563 if($eta*0.90 < $last_eta and $last_eta < $eta) {
4564 # Eta jumped less that 10% up: Keep the last eta instead
4569 return($total, $completed, $left, $pctcomplete, $avgtime, $eta);
4578 # $status = bar with eta, completed jobs, arg and pct
4580 $reset ||= "\033[0m
";
4581 my($total, $completed, $left, $pctcomplete, $avgtime, $eta) =
4583 if($Global::all_jobs_done) { $eta = now()-$Global::start_time; }
4584 my $arg = $Global::newest_job ?
4585 $Global::newest_job->{'commandline'}->
4586 replace_placeholders(["\257<\257>"],0,0) : "";
4587 $arg = decode_utf8($arg);
4588 my $eta_dhms = ::seconds_to_time_units($eta);
4590 sprintf("%d%% %d:%d=%s %s",
4591 $pctcomplete*100, $completed, $left, $eta_dhms, $arg);
4592 my $terminal_width = terminal_columns();
4593 my $s = sprintf("%-${terminal_width
}s
",
4594 substr($bar_text." "x$terminal_width,
4595 0,$terminal_width));
4596 my $width = int($terminal_width * $pctcomplete);
4597 substr($s,$width,0) = $reset;
4598 my $zenity = sprintf("%-${terminal_width
}s
",
4599 substr("# $eta sec $arg",
4600 0,$terminal_width));
4601 # Prefix with zenity header
4602 $s = "\r" . $zenity . "\r" . $pctcomplete*100 .
4603 "\r" . $rev . $s . $reset;
4609 my ($rows,$columns,$last_update_time);
4611 sub compute_terminal_size
() {
4612 # && true is to force spawning a shell and not just exec'ing
4613 my @tput = qx{ tput lines cols
</dev/tty
2>/dev/null
&& true
};
4614 $rows = 0 + $tput[0];
4615 $columns = 0 + $tput[1];
4616 if(not ($rows && $columns)) {
4617 # && true is to force spawning a shell and not just exec'ing
4618 my $stty = qx{ stty
-a
</dev/tty
2>/dev/null
&& true
};
4619 # FreeBSD/OpenBSD/NetBSD/Dragonfly/MirOS
4620 # MacOSX/IRIX/AIX/Tru64
4621 $stty =~ /(\d+) columns/ and do { $columns = $1; };
4622 $stty =~ /(\d+) rows/ and do { $rows = $1; };
4624 $stty =~ /columns (\d+)/ and do { $columns = $1; };
4625 $stty =~ /rows (\d+)/ and do { $rows = $1; };
4626 # Solaris-x86/HPUX/SCOsysV/UnixWare/OpenIndiana
4627 $stty =~ /columns = (\d+)/ and do { $columns = $1; };
4628 $stty =~ /rows = (\d+)/ and do { $rows = $1; };
4630 $stty =~ /rows=(\d+),(\d+)/ and do { ($rows,$columns) = ($1,$2); };
4632 if(not ($rows && $columns)) {
4633 # && true is to force spawning a shell and not just exec'ing
4634 my $resize = qx{ resize
2>/dev/null
&& true
};
4635 $resize =~ /COLUMNS=(\d+);/ and do { $columns ||= $1; };
4636 $resize =~ /LINES=(\d+);/ and do { $rows ||= $1; };
4642 sub update_terminal_size
() {
4643 # Only update once per second.
4644 if($last_update_time < time) {
4645 $last_update_time = time;
4646 compute_terminal_size
();
4647 # Set signal WINdow CHange to force recompute
4648 $SIG{WINCH
} = \
&compute_terminal_size
;
4652 sub terminal_rows
() {
4653 # Get the number of rows of the terminal.
4655 # number of rows of the screen
4656 update_terminal_size
();
4660 sub terminal_columns
() {
4661 # Get the number of columns of the terminal.
4663 # number of columns of the screen
4664 update_terminal_size
();
4670 # Convert \t into spaces
4673 # Deal with multi-byte characters
4674 for my $src (split("\t",$_[0])) {
4675 push @out, $src. " "x
(8-mbswidth
($src)%8);
4677 return join "",@out;
4680 # Prototype forwarding
4681 sub get_job_with_sshlogin
($);
4682 sub get_job_with_sshlogin
($) {
4684 # $sshlogin = which host should the job be run on?
4689 # $job = next job object for $sshlogin if any available
4690 my $sshlogin = shift;
4693 if ($opt::hostgroups
) {
4694 my @other_hostgroup_jobs = ();
4696 while($job = $Global::JobQueue
->get()) {
4697 if($sshlogin->in_hostgroups($job->hostgroups())) {
4698 # Found a job to be run on a hostgroup of this
4702 # This job was not in the hostgroups of $sshlogin
4703 push @other_hostgroup_jobs, $job;
4706 $Global::JobQueue
->unget(@other_hostgroup_jobs);
4707 if(not defined $job) {
4712 $job = $Global::JobQueue
->get();
4713 if(not defined $job) {
4715 ::debug
("start", "No more jobs: JobQueue empty\n");
4719 if(not $job->suspended()) {
4720 $job->set_sshlogin($sshlogin);
4722 if(defined $opt::retries
and $job->failed_here()) {
4723 # This command with these args failed for this sshlogin
4724 my ($no_of_failed_sshlogins,$min_failures) = $job->min_failed();
4725 # Only look at the Global::host that have > 0 jobslots
4726 if($no_of_failed_sshlogins ==
4727 grep { $_->max_jobs_running() > 0 } values %Global::host
4728 and $job->failed_here() == $min_failures) {
4729 # It failed the same or more times on another host:
4730 # run it on this host
4732 # If it failed fewer times on another host:
4733 # Find another job to run
4735 if(not $Global::JobQueue
->empty()) {
4736 # This can potentially recurse for all args
4737 no warnings
'recursion';
4738 $nextjob = get_job_with_sshlogin
($sshlogin);
4740 # Push the command back on the queue
4741 $Global::JobQueue
->unget($job);
4749 sub __REMOTE_SSH__
() {}
4752 sub read_sshloginfiles
(@
) {
4753 # Read a list of --slf's
4755 # @files = files or symbolic file names to read
4758 read_sshloginfile
(expand_slf_shorthand
($s));
4762 sub expand_slf_shorthand
($) {
4763 # Expand --slf shorthand into a read file name
4765 # $file = file or symbolic file name to read
4767 # $file = actual file name to read
4771 } elsif($file eq "..") {
4772 $file = $Global::config_dir
."/sshloginfile";
4773 } elsif($file eq ".") {
4774 $file = "/etc/parallel/sshloginfile";
4775 } elsif(not -r
$file) {
4776 for(@Global::config_dirs
) {
4777 if(not -r
$_."/".$file) {
4778 # Try prepending $PARALLEL_HOME
4779 ::error
("Cannot open $file.");
4780 ::wait_and_exit
(255);
4782 $file = $_."/".$file;
4790 sub read_sshloginfile
($) {
4791 # Read sshloginfile into @Global::sshlogin
4793 # $file = file to read
4801 ::debug
("init","--slf ",$file);
4806 $in_fh = open_or_exit
("<", $file);
4812 push @Global::sshlogin
, $_;
4819 sub parse_sshlogin
() {
4820 # Parse @Global::sshlogin into %Global::host.
4821 # Keep only hosts that are in one of the given ssh hostgroups.
4824 # $Global::minimal_command_line_length
4832 sub expand_range
($) {
4833 # Expand host[9-11,15]a[09-11]b
4834 # [9-11,15] => 9 10 11 15
4835 # [09-11] => 09 10 11
4837 my ($prefix, $range, $suffix);
4838 if(($prefix, $range, $suffix) = $in =~ /^(.*?)\[([-0-9,]*)\](.*)$/) {
4840 while(length $range) {
4841 if($range =~ s/^,//) {
4843 } elsif($range =~ s/^(\d+)-(\d+)//) {
4844 my ($start, $end) = ($1, $2);
4845 push @res, map { $prefix . $_ . $suffix } $start..$end;
4846 } elsif($range =~ s/^(\d+)//) {
4847 push @res, map { $prefix . $_ . $suffix } $1;
4849 die "Cannot parse $in (at $range)";
4852 return map { expand_range
($_) } @res;
4858 if(not @Global::sshlogin
) { @Global::sshlogin
= (":"); }
4859 for my $sshlogin (@Global::sshlogin
) {
4860 # Split up -S sshlogin,sshlogin
4861 # Parse ,, and \, as , but do not split on that
4862 # -S "ssh -J jump1,,jump2 host1,host2" =>
4863 # ssh -J jump1,jump2 host1
4865 # Protect \, and ,, as \0
4866 $sshlogin =~ s/\\,|,,/\0/g;
4867 # Protect , in ranges: [___,___] => [___\0___]
4868 while($sshlogin =~ s/(\[[-0-9\0]*),(.*\])/$1\0$2/g) {}
4869 for my $s (split /,|\n/, $sshlogin) {
4872 if ($s eq ".." or $s eq "-") {
4873 # This may add to @Global::sshlogin - possibly bug
4874 read_sshloginfile
(expand_slf_shorthand
($s));
4877 # Expand host[1-12,15]a[01-10]b
4878 push @login, expand_range
($s);
4882 $Global::minimal_command_line_length
= 100_000_000
;
4883 my @allowed_hostgroups;
4884 for my $ncpu_sshlogin_string (::uniq
(@login)) {
4885 my $sshlogin = SSHLogin
->new($ncpu_sshlogin_string);
4886 my $sshlogin_string = $sshlogin->string();
4887 if($sshlogin_string eq "") {
4888 # This is an ssh group: -S @webservers
4889 push @allowed_hostgroups, $sshlogin->hostgroups();
4892 if($Global::host
{$sshlogin_string}) {
4893 # This sshlogin has already been added:
4894 # It is probably a host that has come back
4895 # Set the max_jobs_running back to the original
4896 debug
("run","Already seen $sshlogin_string\n");
4897 if($sshlogin->{'ncpus'}) {
4898 # If ncpus set by '#/' of the sshlogin, overwrite it:
4899 $Global::host
{$sshlogin_string}->set_ncpus($sshlogin->ncpus());
4901 $Global::host
{$sshlogin_string}->set_max_jobs_running(undef);
4904 $sshlogin->set_maxlength(Limits
::Command
::max_length
());
4906 $Global::minimal_command_line_length
=
4907 ::min
($Global::minimal_command_line_length
, $sshlogin->maxlength());
4908 $Global::host
{$sshlogin_string} = $sshlogin;
4910 $Global::usable_command_line_length
=
4911 # Usable len = maxlen - 3000 for wrapping, div 2 for hexing
4912 int(($Global::minimal_command_line_length
- 3000)/2);
4913 if($opt::max_chars
) {
4914 if($opt::max_chars
<= $Global::usable_command_line_length
) {
4915 $Global::usable_command_line_length
= $opt::max_chars
;
4917 ::warning
("Value for option -s should be < ".
4918 $Global::usable_command_line_length
.".");
4921 if(@allowed_hostgroups) {
4922 # Remove hosts that are not in these groups
4923 while (my ($string, $sshlogin) = each %Global::host
) {
4924 if(not $sshlogin->in_hostgroups(@allowed_hostgroups)) {
4925 delete $Global::host
{$string};
4930 # debug("start", "sshlogin: ", my_dump(%Global::host),"\n");
4931 if(@Global::transfer_files
or @opt::return
4932 or $opt::cleanup
or @opt::basefile
) {
4933 if(not remote_hosts
()) {
4934 # There are no remote hosts
4936 ::warning
("--trc ignored as there are no remote --sshlogin.");
4937 } elsif (defined $opt::transfer
) {
4938 ::warning
("--transfer ignored as there are ".
4939 "no remote --sshlogin.");
4940 } elsif (@opt::transfer_files
) {
4941 ::warning
("--transferfile ignored as there ".
4942 "are no remote --sshlogin.");
4943 } elsif (@opt::return) {
4944 ::warning
("--return ignored as there are no remote --sshlogin.");
4945 } elsif (defined $opt::cleanup
and not %opt::template
) {
4946 ::warning
("--cleanup ignored as there ".
4947 "are no remote --sshlogin.");
4948 } elsif (@opt::basefile
) {
4949 ::warning
("--basefile ignored as there ".
4950 "are no remote --sshlogin.");
4956 sub remote_hosts
() {
4957 # Return sshlogins that are not ':'
4961 # list of sshlogins with ':' removed
4962 return grep !/^:$/, keys %Global::host
;
4965 sub setup_basefile
() {
4966 # Transfer basefiles to each $sshlogin
4967 # This needs to be done before first jobs on $sshlogin is run
4975 for my $sshlogin (values %Global::host
) {
4976 if($sshlogin->local()) { next }
4977 for my $file (@opt::basefile
) {
4978 if($file !~ m
:^/: and $opt::workdir
eq "...") {
4979 ::error
("Work dir '...' will not work with relative basefiles.");
4980 ::wait_and_exit
(255);
4984 CommandLine
->new(1,["true"],{},0,0,[],[],[],[],{},{});
4985 my $dummyjob = Job
->new($dummycmdline);
4986 $workdir = $dummyjob->workdir();
4988 push @cmd, $sshlogin->rsync_transfer_cmd($file,$workdir);
4991 debug
("init", "basesetup: @cmd\n");
4992 my ($exitstatus,$stdout_ref,$stderr_ref) =
4993 run_gnu_parallel
((join "\n",@cmd),"-j0","--retries",5);
4995 my @stdout = @
$stdout_ref;
4996 my @stderr = @
$stderr_ref;
4997 ::error
("Copying of --basefile failed: @stdout@stderr");
4998 ::wait_and_exit
(255);
5002 sub cleanup_basefile
() {
5003 # Remove the basefiles transferred
5011 my $dummycmdline = CommandLine
->new(1,["true"],{},0,0,[],[],[],[],{},{});
5012 my $dummyjob = Job
->new($dummycmdline);
5013 $workdir = $dummyjob->workdir();
5015 for my $sshlogin (values %Global::host
) {
5016 if($sshlogin->local()) { next }
5017 for my $file (@opt::basefile
) {
5018 push @cmd, $sshlogin->cleanup_cmd($file,$workdir);
5021 debug
("init", "basecleanup: @cmd\n");
5022 my ($exitstatus,$stdout_ref,$stderr_ref) =
5023 run_gnu_parallel
(join("\n",@cmd),"-j0","--retries",5);
5025 my @stdout = @
$stdout_ref;
5026 my @stderr = @
$stderr_ref;
5027 ::error
("Cleanup of --basefile failed: @stdout@stderr");
5028 ::wait_and_exit
(255);
5032 sub run_gnu_parallel
() {
5033 my ($stdin,@args) = @_;
5034 my $cmd = join "",map { " $_ & " } split /\n/, $stdin;
5035 print $Global::original_stderr
` $cmd wait` ;
5039 sub _run_gnu_parallel
() {
5041 # This should ideally just fork an internal copy
5042 # and not start it through a shell
5044 # $stdin = data to provide on stdin for GNU Parallel
5045 # @args = command line arguments
5047 # $exitstatus = exitcode of GNU Parallel run
5048 # \@stdout = standard output
5049 # \@stderr = standard error
5050 my ($stdin,@args) = @_;
5051 my ($exitstatus,@stdout,@stderr);
5052 my ($stdin_fh,$stdout_fh)=(gensym
(),gensym
());
5053 my ($stderr_fh, $stderrname) = ::tmpfile
(SUFFIX
=> ".par");
5056 my $pid = ::open3
($stdin_fh,$stdout_fh,$stderr_fh,
5057 $0,qw(--plain --shell /bin/sh --will-cite), @args);
5058 if(my $writerpid = fork()) {
5060 @stdout = <$stdout_fh>;
5061 # Now stdout is closed:
5062 # These pids should be dead or die very soon
5063 while(kill 0, $writerpid) { ::usleep
(1); }
5066 # while(kill 0, $pid) { ::usleep(1); }
5069 seek $stderr_fh, 0, 0;
5070 @stderr = <$stderr_fh>;
5076 print $stdin_fh $stdin;
5080 return ($exitstatus,\
@stdout,\
@stderr);
5083 sub filter_hosts
() {
5084 # Remove down --sshlogins from active duty.
5085 # Find ncpus, ncores, maxlen, time-to-login for each host.
5088 # $Global::minimal_command_line_length
5089 # $opt::use_sockets_instead_of_threads
5090 # $opt::use_cores_instead_of_threads
5091 # $opt::use_cpus_instead_of_cores
5094 my ($nsockets_ref,$ncores_ref, $nthreads_ref, $time_to_login_ref,
5095 $maxlen_ref, $echo_ref, $down_hosts_ref) =
5096 parse_host_filtering
(parallelized_host_filtering
());
5098 delete @Global::host
{@
$down_hosts_ref};
5099 @
$down_hosts_ref and ::warning
("Removed @$down_hosts_ref.");
5101 $Global::minimal_command_line_length
= 100_000_000
;
5102 while (my ($string, $sshlogin) = each %Global::host
) {
5103 if($sshlogin->local()) { next }
5104 my ($nsockets,$ncores,$nthreads,$time_to_login,$maxlen) =
5105 ($nsockets_ref->{$string},$ncores_ref->{$string},
5106 $nthreads_ref->{$string},$time_to_login_ref->{$string},
5107 $maxlen_ref->{$string});
5108 defined $nsockets or ::die_bug
("nsockets missing: $string");
5109 defined $ncores or ::die_bug
("ncores missing: $string");
5110 defined $nthreads or ::die_bug
("nthreads missing: $string");
5111 defined $time_to_login or ::die_bug
("time_to_login missing: $string");
5112 defined $maxlen or ::die_bug
("maxlen missing: $string");
5113 # ncpus may be set by 4/hostname or may be undefined yet
5114 my $ncpus = $sshlogin->{'ncpus'};
5115 # $nthreads may be 0 if GNU Parallel is not installed remotely
5116 $ncpus = $nthreads || $ncpus || $sshlogin->ncpus();
5117 if($opt::use_cpus_instead_of_cores
) {
5118 $ncpus = $ncores || $ncpus;
5119 } elsif($opt::use_sockets_instead_of_threads
) {
5120 $ncpus = $nsockets || $ncpus;
5121 } elsif($opt::use_cores_instead_of_threads
) {
5122 $ncpus = $ncores || $ncpus;
5124 $sshlogin->set_ncpus($ncpus);
5125 $sshlogin->set_time_to_login($time_to_login);
5126 $maxlen = $maxlen || Limits
::Command
::max_length
();
5127 $sshlogin->set_maxlength($maxlen);
5128 ::debug
("init", "Timing from -S:$string ",
5130 " nsockets:",$nsockets,
5131 " ncores:", $ncores,
5132 " nthreads:",$nthreads,
5133 " time_to_login:", $time_to_login,
5134 " maxlen:", $maxlen,
5135 " min_max_len:", $Global::minimal_command_line_length
,"\n");
5139 sub parse_host_filtering
() {
5141 # @lines = output from parallelized_host_filtering()
5143 # \%nsockets = number of sockets of {host}
5144 # \%ncores = number of cores of {host}
5145 # \%nthreads = number of hyperthreaded cores of {host}
5146 # \%time_to_login = time_to_login on {host}
5147 # \%maxlen = max command len on {host}
5148 # \%echo = echo received from {host}
5149 # \@down_hosts = list of hosts with no answer
5151 my (%nsockets, %ncores, %nthreads, %time_to_login, %maxlen, %echo,
5154 ::debug
("init","Read: ",$_);
5156 my @col = split /\t/, $_;
5157 if($col[0] =~ /^parallel: Warning:/) {
5158 # Timed out job: Ignore it
5160 } elsif(defined $col[6]) {
5161 # This is a line from --joblog
5162 # seq host time spent sent received exit signal command
5163 # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores
5164 if($col[0] eq "Seq" and $col[1] eq "Host" and
5165 $col[2] eq "Starttime") {
5169 # Get server from: eval true server\;
5170 $col[8] =~ /eval .?true.?\s([^\;]+);/ or
5171 ::die_bug
("col8 does not contain host: $col[8] in $_");
5174 $Global::host
{$host} or next;
5175 if($col[6] eq "255" or $col[6] eq "-1" or $col[6] eq "1") {
5176 # exit == 255 or exit == timeout (-1): ssh failed/timedout
5177 # exit == 1: lsh failed
5179 ::debug
("init", "--filtered $host\n");
5180 push(@down_hosts, $host);
5181 } elsif($col[6] eq "127") {
5182 # signal == 127: parallel not installed remote
5183 # Set nsockets, ncores, nthreads = 1
5184 ::warning
("Could not figure out ".
5185 "number of cpus on $host. Using 1.");
5186 $nsockets{$host} = 1;
5188 $nthreads{$host} = 1;
5189 $maxlen{$host} = Limits
::Command
::max_length
();
5190 } elsif($col[0] =~ /^\d+$/ and $Global::host
{$host}) {
5191 # Remember how log it took to log in
5192 # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ echo
5193 $time_to_login{$host} = ::min
($time_to_login{$host},$col[3]);
5195 ::die_bug
("host check unmatched long jobline: $_");
5197 } elsif($Global::host
{$col[0]}) {
5198 # This output from --number-of-cores, --number-of-cpus,
5199 # --max-line-length-allowed
5202 # maxlen: server 131071
5203 if(/parallel: Warning: Cannot figure out number of/) {
5206 if(/\t(perl: warning:|LANGUAGE =|LC_ALL =|LANG =|are supported and installed|Disconnected from|Received disconnect from)/
5210 /\t(Host key fingerprint is|\+-.*-\+|\|.*\|)/
5212 /\t\S+: Undefined variable./
5214 # Skip these (from perl):
5215 # perl: warning: Setting locale failed.
5216 # perl: warning: Please check that your locale settings:
5217 # LANGUAGE = (unset),
5219 # LANG = "en_US.UTF-8"
5220 # are supported and installed on your system.
5221 # perl: warning: Falling back to the standard locale ("C").
5222 # Disconnected from 127.0.0.1 port 22
5224 # Skip these (from ssh):
5225 # Warning: Permanently added * to the list of known hosts.
5226 # Warning: Identity file * not accessible: *
5227 # (VisualHostKey=yes)
5228 # Host key fingerprint is SHA256:...
5229 # +--[ED25519 256]--+
5231 # +----[SHA256]-----+
5233 # Skip these (from csh):
5234 # MANPATH: Undefined variable.
5235 } elsif(not defined $nsockets{$col[0]}) {
5236 $nsockets{$col[0]} = $col[1];
5237 } elsif(not defined $ncores{$col[0]}) {
5238 $ncores{$col[0]} = $col[1];
5239 } elsif(not defined $nthreads{$col[0]}) {
5240 $nthreads{$col[0]} = $col[1];
5241 } elsif(not defined $maxlen{$col[0]}) {
5242 $maxlen{$col[0]} = $col[1];
5243 } elsif(not defined $echo{$col[0]}) {
5244 $echo{$col[0]} = $col[1];
5246 ::die_bug
("host check too many col0: $_");
5249 ::die_bug
("host check unmatched short jobline ($col[0]): $_");
5252 @down_hosts = uniq
(@down_hosts);
5253 return(\
%nsockets, \
%ncores, \
%nthreads, \
%time_to_login,
5254 \
%maxlen, \
%echo, \
@down_hosts);
5257 sub parallelized_host_filtering
() {
5261 # text entries with:
5263 # * hostname \t number of cores
5264 # * hostname \t number of cpus
5265 # * hostname \t max-line-length-allowed
5266 # * hostname \t empty
5269 # Wrap with ssh and --env
5270 # Return $default_value if command fails
5271 my $sshlogin = shift;
5272 my $command = shift;
5273 # wrapper that returns output "0\n" if the command fails
5274 # E.g. parallel not installed => "0\n"
5275 my $wcmd = q
(perl
-e
'$a=`).$command.q(`; print $? ? "0".v010 : $a');
5276 my $commandline = CommandLine
->new(1,[$wcmd],{},0,0,[],[],[],[],{},{});
5277 my $job = Job
->new($commandline);
5278 $job->set_sshlogin($sshlogin);
5280 return($job->{'wrapped'});
5283 my(@sockets, @cores, @threads, @maxline, @echo);
5284 while (my ($host, $sshlogin) = each %Global::host
) {
5285 if($host eq ":") { next }
5286 # The 'true' is used to get the $host out later
5287 push(@sockets, $host."\t"."true $host; ".
5288 sshwrapped
($sshlogin,"parallel --number-of-sockets")."\n\0");
5289 push(@cores, $host."\t"."true $host; ".
5290 sshwrapped
($sshlogin,"parallel --number-of-cores")."\n\0");
5291 push(@threads, $host."\t"."true $host; ".
5292 sshwrapped
($sshlogin,"parallel --number-of-threads")."\n\0");
5293 push(@maxline, $host."\t"."true $host; ".
5294 sshwrapped
($sshlogin,
5295 "parallel --max-line-length-allowed")."\n\0");
5296 # 'echo' is used to get the fastest possible ssh login time
5297 push(@echo, $host."\t"."true $host; ".
5298 $sshlogin->wrap("echo $host")."\n\0");
5300 # --timeout 10: Setting up an SSH connection and running a simple
5301 # command should never take > 10 sec.
5302 # --delay 0.1: If multiple sshlogins use the same proxy the delay
5303 # will make it less likely to overload the ssh daemon.
5304 # --retries 3: If the ssh daemon is overloaded, try 3 times
5306 "$0 -j0 --timeout 10 --joblog - --plain --delay 0.1 --retries 3 ".
5307 "--tag --tagstring '{1}' -0 --colsep '\t' -k eval '{2}' && true ";
5308 $cmd = $Global::shell
." -c ".Q
($cmd);
5309 ::debug
("init", $cmd, "\n");
5313 my ($host_fh,$in,$err);
5314 open3
($in, $host_fh, $err, $cmd) || ::die_bug
("parallel host check: $cmd");
5315 ::debug
("init", map { $_,"\n" } @sockets, @cores, @threads, @maxline, @echo);
5318 # Give the commands to run to the $cmd
5320 print $in @sockets, @cores, @threads, @maxline, @echo;
5325 # If -0: $/ must be \n
5328 # TODO incompatible with '-quoting. Needs to be fixed differently
5330 # # if last char = ' then append next line
5331 # # This may be due to quoting of \n in environment var
5344 # Runs @command on all hosts.
5345 # Uses parallel to run @command on each host.
5346 # --jobs = number of hosts to run on simultaneously.
5347 # For each host a parallel command with the args will be running.
5350 # $Global::exitstatus
5354 # $opt::arg_file_sep
5376 # @command = command to run on all hosts
5380 # $joblog = filename of joblog - undef if none
5382 # $tmpfile = temp file for joblog - undef if none
5384 if(not defined $joblog) {
5387 my ($fh, $tmpfile) = ::tmpfile
(SUFFIX
=> ".log");
5391 my ($input_source_fh_ref,@command) = @_;
5392 if($Global::quoting
) {
5393 @command = shell_quote
(@command);
5396 # Copy all @input_source_fh (-a and :::) into tempfiles
5398 for my $fh (@
$input_source_fh_ref) {
5399 my ($outfh, $name) = ::tmpfile
(SUFFIX
=> ".all", UNLINK
=> not $opt::D
);
5400 print $outfh (<$fh>);
5402 push @argfiles, $name;
5404 if(@opt::basefile
) { setup_basefile
(); }
5405 # for each sshlogin do:
5406 # parallel -S $sshlogin $command :::: @argfiles
5408 # Pass some of the options to the sub-parallels, not all of them as
5409 # -P should only go to the first, and -S should not be copied at all.
5412 ((defined $opt::sshdelay
) ?
"--delay ".$opt::sshdelay
: ""),
5413 ((defined $opt::memfree
) ?
"--memfree ".$opt::memfree
: ""),
5414 ((defined $opt::memsuspend
) ?
"--memfree ".$opt::memsuspend
: ""),
5415 ((defined $opt::D
) ?
"-D $opt::D" : ""),
5416 ((defined $opt::group
) ?
"--group" : ""),
5417 ((defined $opt::jobs
) ?
"-P $opt::jobs" : ""),
5418 ((defined $opt::keeporder
) ?
"--keeporder" : ""),
5419 ((defined $opt::linebuffer
) ?
"--linebuffer" : ""),
5420 ((defined $opt::max_chars
) ?
"--max-chars ".$opt::max_chars
: ""),
5421 ((defined $opt::plain
) ?
"--plain" : ""),
5422 (($opt::ungroup
== 1) ?
"-u" : ""),
5423 ((defined $opt::tee
) ?
"--tee" : ""),
5427 ((defined $opt::sshdelay
) ?
"--delay ".$opt::sshdelay
: ""),
5428 ((defined $opt::D
) ?
"-D $opt::D" : ""),
5429 ((defined $opt::arg_file_sep
) ?
"--arg-file-sep ".$opt::arg_file_sep
: ""),
5430 ((defined $opt::arg_sep
) ?
"--arg-sep ".$opt::arg_sep
: ""),
5431 ((defined $opt::colsep
) ?
"--colsep ".shell_quote
($opt::colsep
) : ""),
5432 ((defined $opt::files
) ?
"--files" : ""),
5433 ((defined $opt::files0
) ?
"--files0" : ""),
5434 ((defined $opt::group
) ?
"--group" : ""),
5435 ((defined $opt::cleanup
) ?
"--cleanup" : ""),
5436 ((defined $opt::keeporder
) ?
"--keeporder" : ""),
5437 ((defined $opt::linebuffer
) ?
"--linebuffer" : ""),
5438 ((defined $opt::max_chars
) ?
"--max-chars ".$opt::max_chars
: ""),
5439 ((defined $opt::plain
) ?
"--plain" : ""),
5440 ((defined $opt::plus
) ?
"--plus" : ""),
5441 ((defined $opt::retries
) ?
"--retries ".$opt::retries
: ""),
5442 ((defined $opt::timeout
) ?
"--timeout ".$opt::timeout
: ""),
5443 (($opt::ungroup
== 1) ?
"-u" : ""),
5444 ((defined $opt::ssh
) ?
"--ssh '".$opt::ssh
."'" : ""),
5445 ((defined $opt::tee
) ?
"--tee" : ""),
5446 ((defined $opt::workdir
) ?
"--wd ".Q
($opt::workdir
) : ""),
5447 (@Global::transfer_files ?
map { "--tf ".Q
($_) }
5448 @Global::transfer_files
: ""),
5449 (@Global::ret_files ?
map { "--return ".Q
($_) }
5450 @Global::ret_files
: ""),
5451 (@opt::env ?
map { "--env ".Q
($_) } @opt::env
: ""),
5452 (map { "-v" } @opt::v
),
5454 ::debug
("init", "| $0 $options\n");
5455 open(my $parallel_fh, "|-", "$0 -0 --will-cite -j0 $options") ||
5456 ::die_bug
("This does not run GNU Parallel: $0 $options");
5458 for my $host (sort keys %Global::host
) {
5459 my $sshlogin = $Global::host
{$host};
5460 my $qsshlogin = Q
($sshlogin->string());
5461 my $qsshloginpw = Q
($sshlogin->pwstring());
5462 if($qsshloginpw ne $qsshlogin) {
5463 ::warning_once
("Using password or SSHPASS with --(n)onall ".
5464 "exposes the password",
5465 "on the command line, ".
5466 "making it visible to local users via `ps`.");
5468 my $joblog = tmp_joblog
($opt::joblog
);
5470 push @joblogs, $joblog;
5471 $joblog = "--joblog ".::Q
($joblog);
5473 my $quad = $opt::arg_file_sep
|| "::::";
5474 # If PARALLEL_ENV is set: Pass it on
5475 my $penv=$Global::parallel_env ?
5476 "PARALLEL_ENV=".Q
($Global::parallel_env
) : '';
5478 if(defined $opt::results
) {
5479 $results = Q
($opt::results
) . $qsshlogin;
5481 ::debug
("init", "$penv $0 $suboptions -j1 $joblog ",
5482 ((defined $opt::tag
) ?
"--tagstring ".$qsshlogin : ""),
5483 ((defined $opt::results
) ?
"--results ".$results : ""),
5484 " -S $qsshloginpw ",
5485 join(" ",shell_quote
(@command,$quad,@argfiles)),"\n");
5486 print $parallel_fh "$penv $0 $suboptions -j1 $joblog ",
5487 ((defined $opt::tag
) ?
"--tagstring ".$qsshlogin : ""),
5488 ((defined $opt::results
) ?
"--results ".$results : ""),
5489 " -S $qsshloginpw ",
5490 join(" ",shell_quote
(@command,$quad,@argfiles)),"\0";
5493 $Global::exitstatus
= $?
>> 8;
5494 debug
("init", "--onall exitvalue ", $?
);
5495 if(@opt::basefile
and $opt::cleanup
) { cleanup_basefile
(); }
5496 $Global::debug
or unlink(@argfiles);
5498 for my $joblog (@joblogs) {
5500 my $fh = open_or_exit
("<", $joblog);
5501 # Skip first line (header);
5503 print $Global::joblog
(<$fh>);
5510 sub __SIGNAL_HANDLING__
() {}
5514 # Send TSTP signal (Ctrl-Z) to all children process groups
5518 signal_children
("TSTP");
5522 # Send SIGPIPE signal to all children process groups
5526 signal_children
("PIPE");
5529 sub signal_children
() {
5530 # Send signal to all children process groups
5531 # and GNU Parallel itself
5536 debug
("run", "Sending $signal ");
5537 kill $signal, map { -$_ } keys %Global::running
;
5538 # Use default signal handler for GNU Parallel itself
5539 $SIG{$signal} = undef;
5543 sub save_original_signal_handler
() {
5544 # Remember the original signal handler
5546 # %Global::original_sig
5549 if($opt::tmux
) { ::qqx
("tmux kill-session -t p$$"); }
5553 if($opt::tmux
) { ::qqx
("tmux kill-session -t p$$"); }
5556 %Global::original_sig
= %SIG;
5557 $SIG{TERM
} = sub {}; # Dummy until jobs really start
5558 $SIG{ALRM
} = 'IGNORE';
5559 # Allow Ctrl-Z to suspend and `fg` to continue
5560 $SIG{TSTP
} = \
&sigtstp
;
5561 $SIG{PIPE
} = \
&sigpipe
;
5563 # Set $SIG{TSTP} again (it is undef'ed in sigtstp() )
5564 $SIG{TSTP
} = \
&sigtstp
;
5565 for my $job (values %Global::running
) {
5566 if($job->suspended()) {
5567 # Force jobs to suspend, if they are marked as suspended.
5568 # --memsupspend can suspend a job that will be resumed
5569 # if the user presses CTRL-Z followed by `fg`.
5572 # Resume the rest of the jobs
5579 sub list_running_jobs
() {
5580 # Print running jobs on tty
5584 for my $job (values %Global::running
) {
5585 ::status
("$Global::progname: ".$job->replaced());
5589 sub start_no_new_jobs
() {
5590 # Start no more jobs
5592 # %Global::original_sig
5594 # $Global::start_no_new_jobs
5596 unlink keys %Global::unlink;
5598 ("$Global::progname: SIGHUP received. No new jobs will be started.",
5599 "$Global::progname: Waiting for these ".(keys %Global::running
).
5600 " jobs to finish. Send SIGTERM to stop now.");
5601 list_running_jobs
();
5602 $Global::start_no_new_jobs
||= 1;
5606 # Run reaper until there are no more left
5608 # @pids_reaped = pids of reaped processes
5611 while($pid = reaper
()) {
5612 push @pids_reaped, $pid;
5614 return @pids_reaped;
5619 # * Set exitstatus, exitsignal, endtime.
5620 # * Free ressources for new job
5621 # * Update median runtime
5623 # * If --halt = now: Kill children
5630 # $Global::total_running
5632 # $stiff = PID of child finished
5634 debug
("run", "Reaper ");
5635 if(($stiff = waitpid(-1, &WNOHANG
)) <= 0) {
5636 # No jobs waiting to be reaped
5640 # $stiff = pid of dead process
5641 my $job = $Global::running
{$stiff};
5643 # '-a <(seq 10)' will give us a pid not in %Global::running
5644 # The same will one of the ssh -M: ignore
5646 delete $Global::running
{$stiff};
5647 $Global::total_running
--;
5648 if($job->{'commandline'}{'skip'}) {
5649 # $job->skip() was called
5650 $job->set_exitstatus(-2);
5651 $job->set_exitsignal(0);
5653 $job->set_exitsignal($?
& 127);
5654 if($job->exitstatus()) {
5655 # Exit status already set - probably by --timeout
5657 # Killed by signal. Many shells return: 128 | $signal
5658 $job->set_exitstatus(128 | $?
);
5661 $job->set_exitstatus($?
>> 8);
5665 debug
("run", "\nseq ",$job->seq()," died (", $job->exitstatus(), ")");
5666 if($Global::delayauto
or $Global::sshdelayauto
) {
5667 if($job->exitstatus()) {
5668 # Job failed: Increase delay (if $opt::(ssh)delay set)
5669 $opt::delay
&&= $opt::delay
* 1.3;
5670 $opt::sshdelay
&&= $opt::sshdelay
* 1.3;
5672 # Job succeeded: Decrease delay (if $opt::(ssh)delay set)
5673 $opt::delay
&&= $opt::delay
* 0.9;
5674 $opt::sshdelay
&&= $opt::sshdelay
* 0.9;
5676 debug
("run", "delay:$opt::delay ssh:$opt::sshdelay ");
5678 $job->set_endtime(::now
());
5679 my $sshlogin = $job->sshlogin();
5680 $sshlogin->dec_jobs_running();
5681 if($job->should_be_retried()) {
5682 # Free up file handles
5683 $job->free_ressources();
5686 $sshlogin->inc_jobs_completed();
5689 if($opt::timeout
and not $job->exitstatus()) {
5690 # Update average runtime for timeout only for successful jobs
5691 $Global::timeoutq
->update_median_runtime($job->runtime());
5693 if($opt::keeporder
and not $opt::latestline
) {
5694 # --latestline fixes --keeporder in Job::row()
5695 $job->print_earlier_jobs();
5699 if($job->should_we_halt() eq "now") {
5701 ::kill_sleep_seq
($job->pid());
5703 ::wait_and_exit
($Global::halt_exitstatus
);
5708 if($opt::progress
) {
5709 my $progress = progress
();
5710 ::status_no_nl
("\r",$progress->{'status'});
5713 debug
("run", "jobdone \n");
5722 # Kill all jobs by killing their process groups
5724 # $Global::start_no_new_jobs = we are stopping
5725 # $Global::killall = Flag to not run reaper
5726 $Global::start_no_new_jobs
||= 1;
5727 # Do not reap killed children: Ignore them instead
5728 $Global::killall
||= 1;
5729 kill_sleep_seq
(keys %Global::running
);
5732 sub kill_sleep_seq
(@
) {
5733 # Send jobs TERM,TERM,KILL to processgroups
5735 # @pids = list of pids that are also processgroups
5736 # Killing can be slow if you follow @term_seq
5737 # So run the killing in parallel
5739 # Convert pids to process groups ($processgroup = -$pid)
5740 my @pgrps = map { -$_ } @_;
5741 my @term_seq = split/,/,$opt::termseq
;
5743 @term_seq = ("TERM",200,"TERM",100,"TERM",50,"KILL",25);
5745 # for each signal+waittime: kill process groups still not dead
5747 @pgrps = kill_sleep
(shift @term_seq, shift @term_seq, @pgrps);
5754 # Kill pids with a signal and wait a while for them to die
5756 # $signal = signal to send to @pids
5757 # $sleep_max = number of ms to sleep at most before returning
5758 # @pids = pids to kill (actually process groups)
5760 # $Global::killall = set by killall() to avoid calling reaper
5762 # @pids = pids still alive
5763 my ($signal, $sleep_max, @pids) = @_;
5764 ::debug
("kill","kill_sleep $signal ",(join " ",sort @pids),"\n");
5765 kill $signal, @pids;
5769 while(@pids and $sleepsum < $sleep_max) {
5770 if($Global::killall
) {
5771 # Killall => don't run reaper
5772 while(waitpid(-1, &WNOHANG
) > 0) {
5773 $sleep = $sleep/2+0.001;
5775 } elsif(reapers
()) {
5776 $sleep = $sleep/2+0.001;
5780 $sleepsum += $sleep;
5781 # Keep only living children
5782 @pids = grep { kill(0, $_) } @pids;
5787 sub wait_and_exit
($) {
5788 # If we do not wait, we sometimes get segfault
5791 unlink keys %Global::unlink;
5793 # Kill all jobs without printing
5796 for (keys %Global::unkilled_children
) {
5797 # Kill any (non-jobs) children (e.g. reserved processes)
5800 delete $Global::unkilled_children
{$_};
5802 if($Global::unkilled_sqlworker
) {
5803 waitpid($Global::unkilled_sqlworker
,0);
5805 # Avoid: Warning: unable to close filehandle properly: No space
5806 # left on device during global destruction.
5807 $SIG{__WARN__
} = sub {};
5809 # Make the shell script return $error
5810 print "$Global::parset_endstring\nreturn $error";
5827 "$Global::progname [options] [command [arguments]] < list_of_arguments",
5828 "$Global::progname [options] [command [arguments]] (::: arguments|:::: argfile(s))...",
5829 "cat ... | $Global::progname --pipe [options] [command [arguments]]",
5831 "-j n Run n jobs in parallel",
5832 "-k Keep same order",
5833 "-X Multiple arguments with context replace",
5834 "--colsep regexp Split input on regexp for positional replacements",
5835 "{} {.} {/} {/.} {#} {%} {= perl code =} Replacement strings",
5836 "{3} {3.} {3/} {3/.} {=3 perl code =} Positional replacement strings",
5837 "With --plus: {} = {+/}/{/} = {.}.{+.} = {+/}/{/.}.{+.} = {..}.{+..} =",
5838 " {+/}/{/..}.{+..} = {...}.{+...} = {+/}/{/...}.{+...}",
5840 "-S sshlogin Example: foo\@server.example.com",
5841 "--slf .. Use ~/.parallel/sshloginfile as the list of sshlogins",
5842 "--trc {}.bar Shorthand for --transfer --return {}.bar --cleanup",
5843 "--onall Run the given command with argument on all sshlogins",
5844 "--nonall Run the given command with no arguments on all sshlogins",
5846 "--pipe Split stdin (standard input) to multiple jobs.",
5847 "--recend str Record end separator for --pipe.",
5848 "--recstart str Record start separator for --pipe.",
5850 "GNU Parallel can do much more. See 'man $Global::progname' for details",
5852 "Academic tradition requires you to cite works you base your article on.",
5853 "If you use programs that use GNU Parallel to process data for an article in a",
5854 "scientific publication, please cite:",
5856 " Tange, O. (2024, May 22). GNU Parallel 20240522 ('Tbilisi').",
5857 " Zenodo. https://doi.org/10.5281/zenodo.11247979",
5859 # Before changing these lines, please read
5860 # https://www.gnu.org/software/parallel/parallel_design.html#citation-notice
5861 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
5862 # You accept to be put in a public hall of shame by removing
5864 "This helps funding further development; AND IT WON'T COST YOU A CENT.",
5865 "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
5870 sub citation_notice
() {
5871 # if --will-cite or --plain: do nothing
5872 # if stderr redirected: do nothing
5873 # if $PARALLEL_HOME/will-cite: do nothing
5874 # else: print citation notice to stderr
5879 not -t
$Global::original_stderr
5881 grep { -e
"$_/will-cite" } @Global::config_dirs
) {
5885 ("Academic tradition requires you to cite works you base your article on.",
5886 "If you use programs that use GNU Parallel to process data for an article in a",
5887 "scientific publication, please cite:",
5889 " Tange, O. (2024, May 22). GNU Parallel 20240522 ('Tbilisi').",
5890 " Zenodo. https://doi.org/10.5281/zenodo.11247979",
5892 # Before changing these line, please read
5893 # https://www.gnu.org/software/parallel/parallel_design.html#citation-notice and
5894 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
5895 # You accept to be put in a public hall of shame by
5896 # removing these lines
5897 "This helps funding further development; AND IT WON'T COST YOU A CENT.",
5898 "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
5900 "More about funding GNU Parallel and the citation notice:",
5901 "https://www.gnu.org/software/parallel/parallel_design.html#citation-notice",
5903 "To silence this citation notice: run 'parallel --citation' once.",
5906 mkdir $Global::config_dir
;
5907 # Number of times the user has run GNU Parallel without showing
5908 # willingness to cite
5910 if(open (my $fh, "<", $Global::config_dir
.
5911 "/runs-without-willing-to-cite")) {
5916 if(open (my $fh, ">", $Global::config_dir
.
5917 "/runs-without-willing-to-cite")) {
5921 ::status
("Come on: You have run parallel $runs times. ".
5922 "Isn't it about time ",
5923 "you run 'parallel --citation' once to silence ".
5924 "the citation notice?",
5933 my $fh = $Global::status_fd
|| *STDERR
;
5934 print $fh map { ($_, "\n") } @w;
5938 sub status_no_nl
(@
) {
5940 my $fh = $Global::status_fd
|| *STDERR
;
5947 my $prog = $Global::progname
|| "parallel";
5948 status_no_nl
(map { ($prog, ": Warning: ", $_, "\n"); } @w);
5953 sub warning_once
(@
) {
5955 my $prog = $Global::progname
|| "parallel";
5957 status_no_nl
(map { ($prog, ": Warning: ", $_, "\n"); } @w);
5963 my $prog = $Global::progname
|| "parallel";
5964 status
(map { ($prog.": Error: ". $_); } @w);
5970 ("$Global::progname: This should not happen. You have found a bug. ",
5972 "https://www.gnu.org/software/parallel/man.html#reporting-bugs\n",
5974 "Include this in the report:\n",
5975 "* The version number: $Global::version\n",
5976 "* The bugid: $bugid\n",
5977 "* The command line being run\n",
5978 "* The files being read (put the files on a webserver if they are big)\n",
5980 "If you get the error on smaller/fewer files, please include those instead.\n");
5981 ::wait_and_exit
(255);
5988 "GNU $Global::progname $Global::version",
5989 "Copyright (C) 2007-2024 Ole Tange, http://ole.tange.dk and Free Software",
5991 "License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>",
5992 "This is free software: you are free to change and redistribute it.",
5993 "GNU $Global::progname comes with no warranty.",
5995 "Web site: https://www.gnu.org/software/${Global::progname}\n",
5996 "When using programs that use GNU Parallel to process data for publication",
5997 "please cite as described in 'parallel --citation'.\n",
6003 my ($all_argv_ref,$argv_options_removed_ref) = @_;
6004 my $all_argv = "@$all_argv_ref";
6005 my $no_opts = "@$argv_options_removed_ref";
6006 $all_argv=~s/--citation//;
6007 if($all_argv ne $no_opts) {
6008 ::warning
("--citation ignores all other options and arguments.");
6013 "Academic tradition requires you to cite works you base your article on.",
6014 "If you use programs that use GNU Parallel to process data for an article in a",
6015 "scientific publication, please cite:",
6017 "\@software{tange_2024_11247979,",
6018 " author = {Tange, Ole},",
6019 " title = {GNU Parallel 20240522 ('Tbilisi')},",
6022 " note = {{GNU Parallel is a general parallelizer to run",
6023 " multiple serial command line programs in parallel",
6024 " without changing them.}},",
6025 " publisher = {Zenodo},",
6026 " doi = {10.5281/zenodo.11247979},",
6027 " url = {https://doi.org/10.5281/zenodo.11247979}",
6030 "(Feel free to use \\nocite{tange_2024_11247979})",
6032 # Before changing these lines, please read
6033 # https://www.gnu.org/software/parallel/parallel_design.html#citation-notice and
6034 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
6035 # You accept to be put in a public hall of shame by removing
6037 "This helps funding further development; AND IT WON'T COST YOU A CENT.",
6038 "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
6040 "More about funding GNU Parallel and the citation notice:",
6041 "https://lists.gnu.org/archive/html/parallel/2013-11/msg00006.html",
6042 "https://www.gnu.org/software/parallel/parallel_design.html#citation-notice",
6043 "https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt",
6046 while(not grep { -e
"$_/will-cite" } @Global::config_dirs
) {
6047 print "\nType: 'will cite' and press enter.\n> ";
6048 my $input = <STDIN
>;
6049 if(not defined $input) {
6052 if($input =~ /will cite/i) {
6053 if(mkdir $Global::config_dir
) {
6054 # Recompute @Global::config_dirs so we can break out of the loop.
6057 if(open (my $fh, ">", $Global::config_dir
."/will-cite")) {
6061 "Thank you for your support: You are the reason why there is funding to",
6062 "continue maintaining GNU Parallel. On behalf of future versions of",
6063 "GNU Parallel, which would not exist without your support:",
6065 " THANK YOU SO MUCH",
6067 "It is really appreciated. The citation notice is now silenced.",
6072 "Thank you for your support. It is much appreciated. The citation",
6073 "cannot permanently be silenced. Use '--will-cite' instead.",
6075 "If you use '--will-cite' in scripts to be run by others you are making",
6076 "it harder for others to see the citation notice. The development of",
6077 "GNU Parallel is indirectly financed through citations, so if users",
6078 "do not know they should cite then you are making it harder to finance",
6079 "development. However, if you pay 10000 EUR, you should feel free to",
6080 "use '--will-cite' in scripts.",
6090 print("Maximal size of command: ",Limits
::Command
::real_max_length
(),"\n",
6091 "Maximal usable size of command: ",
6092 $Global::usable_command_line_length
,"\n",
6094 "Execution will continue now, ",
6095 "and it will try to read its input\n",
6096 "and run commands; if this is not ",
6097 "what you wanted to happen, please\n",
6098 "press CTRL-D or CTRL-C\n");
6102 # Give an embeddable version of GNU Parallel
6103 # Tested with: bash, zsh, ksh, ash, dash, sh
6104 my $randomstring = "cut-here-".join"",
6105 map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..20);
6106 if(not -f
$0 or not -r
$0) {
6107 ::error
("--embed only works if parallel is a readable file");
6110 # Read the source from $0
6111 my $source = slurp_or_exit
($0);
6112 my $user = $ENV{LOGNAME
} || $ENV{USERNAME
} || $ENV{USER
};
6113 my $env_parallel_source;
6114 my $shell = $Global::shell
;
6116 for(which
("env_parallel.$shell")) {
6118 # Read the source of env_parallel.shellname
6119 $env_parallel_source .= slurp_or_exit
($_);
6122 print "#!$Global::shell
6124 # Copyright (C) 2007-2024 $user, Ole Tange, http://ole.tange.dk
6125 # and Free Software Foundation, Inc.
6127 # This program is free software; you can redistribute it and/or modify
6128 # it under the terms of the GNU General Public License as published by
6129 # the Free Software Foundation; either version 3 of the License, or
6130 # (at your option) any later version.
6132 # This program is distributed in the hope that it will be useful, but
6133 # WITHOUT ANY WARRANTY; without even the implied warranty of
6134 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
6135 # General Public License for more details.
6137 # You should have received a copy of the GNU General Public License
6138 # along with this program; if not, see <https://www.gnu.org/licenses/>
6139 # or write to the Free Software Foundation, Inc., 51 Franklin St,
6140 # Fifth Floor, Boston, MA 02110-1301 USA
6144 # Embedded GNU Parallel created with --embed
6146 # Start GNU Parallel without leaving temporary files
6148 # Not all shells support 'perl <(cat ...)'
6149 # This is a complex way of doing:
6150 # perl <(cat <<'cut-here'
6153 # and also avoiding:
6156 # Make a temporary fifo that perl can read from
6157 _fifo_with_GNU_Parallel_source
=`perl -e 'use POSIX qw(mkfifo);
6159 $f = "/tmp/parallel-".join"",
6160 map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
6164 # Put source code into temporary file
6165 # so it is easy to copy to the fifo
6166 _file_with_GNU_Parallel_source=`mktemp`;
6168 "cat <<'$randomstring' > \$_file_with_GNU_Parallel_source\n",
6172 # Copy the source code from the file to the fifo
6173 # and remove the file and fifo ASAP
6174 # 'sh
-c
' is needed to avoid
6176 sh -c "(rm $_file_with_GNU_Parallel_source; cat >$_fifo_with_GNU_Parallel_source; rm $_fifo_with_GNU_Parallel_source) < $_file_with_GNU_Parallel_source &"
6178 # Read the source from the fifo
6179 perl $_fifo_with_GNU_Parallel_source "$@"
6182 $env_parallel_source,
6185 # This will call the functions above
6186 parallel -k echo ::: Put your code here
6187 env_parallel --session
6188 env_parallel -k echo ::: Put your code here
6189 parset p,y,c,h -k echo ::: Put your code here
6191 echo You can also activate GNU Parallel for interactive use by:
6194 ::status("Redirect the output to a file and add your changes at the end:",
6195 " $0 --embed > new_script");
6198 sub pack_combined_executable {
6199 my ($before_ref,$with_argsep_ref,$argv_ref) = @_;
6202 # Remove '--combine
-exec file
' from options
6203 for(@{$before_ref}[0..(arrayindex($before_ref,$with_argsep_ref))-1]) {
6204 if (/^--combine-?exec(utable)?$/ || $skip_next) {
6205 # Also skip the filename given to --combine-exec
6206 $skip_next = !$skip_next;
6209 push @parallelopts, $_;
6211 # From ::: and to end
6212 my @argsep = @{$with_argsep_ref}[($#ARGV+1)..$#$with_argsep_ref];
6213 # The executable is now the first in @ARGV
6214 my $execname = shift @ARGV;
6215 # The rest of @ARGV are options for $execname
6216 my @execopts = @ARGV;
6218 "Parallel opts: @parallelopts ",
6219 "Executable: $execname ",
6220 "Execopts: @execopts ",
6221 "Argsep: @argsep\n");
6222 # Read the the executable
6223 my $exec = slurp_or_exit(which($execname));
6224 # Read the source of GNU Parallel and the executable
6225 my $parallel = slurp_or_exit($0);
6226 # Remove possibly __END__ from GNU Parallel
6227 $parallel =~ s/^__END__.*//s;
6228 if(-t $Global::original_stderr) {
6230 "Please be aware that combining GNU Parallel and '$execname'",
6231 "into a combined executable will make the whole executable",
6232 "licensed under GPLv3 (section 5.c).",
6234 "If the license of '$execname' is incompatible with GPLv3,",
6235 "you cannot legally convey copies of the combined executable",
6236 "to others. You can, however, still run them yourself.",
6238 "The combined executable will not have a citation notice,",
6239 "so it is your resposibilty to advice that academic tradition",
6240 "requires the users to cite GNU Parallel.",
6245 ::status_no_nl("\nType: 'I agree
' and press enter.\n> ");
6247 if(not defined $input) {
6250 } until($input =~ /I agree/i);
6252 write_or_exit($opt::combineexec,
6255 (map { "$_\0\n" } @parallelopts), "\0\0\n",
6256 $execname, "\0\0\n",
6257 (map { "$_\0\n" } @execopts), "\0\0\n",
6258 (map { "$_\0\n" } @argsep), "\0\0\n",
6261 chmod 0700, $opt::combineexec;
6265 sub unpack_combined_executable {
6266 # If the script is a combined executable,
6267 # it will have stuff in <DATA> (I.e. after __END__)
6268 my $combine_exec = join("",<DATA>);
6269 if(length $combine_exec) {
6273 # Option for GNU Parallel\0\n
6274 # Option for GNU Parallel\0\n
6276 # Name of executable\0\0\n
6277 # Option for executable\0\n
6278 # Option for executable\0\n
6280 # argsep + args if any\0\n
6281 # argsep + args if any\0\n
6283 # <<binary of exec>>
6285 # parallel --combine --pipe -j10% --recend '' myscript --myopt myval
6289 # --recend\0\n --recend
6291 # \0\0\n end-of-parallel-options
6292 # myscript\0\0\n myscript
6293 # --myopt\0\n --myopt
6295 # \0\0\n end-of-myscript-options
6297 # <<binary of myscript>>
6299 # parallel --combine -j10% myscript :::
6302 # \0\0\n end-of-parallel-options
6304 # \0\0\n end-of-myscript-options
6307 # <<binary of myscript>>
6309 my ($opts,$execname,$execopts,$argsep,$exec) =
6310 split /\0\0\n/,$combine_exec,5;
6311 # Make a tmpdir with a file called $execname
6313 $ENV{TMPDIR} ||= "/tmp";
6314 my $dir = File::Temp::tempdir($ENV{'TMPDIR
'} . "/parXXXXX", CLEANUP => 1);
6315 my $script = $dir."/".$execname;
6316 write_or_exit($script,$exec);
6318 chmod 0700, $script;
6319 # Mark it for unlinking later
6320 $Global::unlink{$script}++;
6321 $Global::unlink{$dir}++;
6322 # pass the options for GNU Parallel
6323 my @opts = split /\0\n/, $opts;
6324 my @execopts = split /\0\n/, $execopts;
6325 if(length $argsep) {
6326 # Only add argsep if set
6327 unshift(@ARGV, split(/\0\n/,$argsep));
6329 unshift(@ARGV,@opts,$script,@execopts);
6334 sub __GENERIC_COMMON_FUNCTION__() {}
6337 sub mkdir_or_die($) {
6338 # If dir is not executable: die
6340 # The eval is needed to catch exception from mkdir
6341 eval { File::Path::mkpath($dir); };
6343 ::error("Cannot change into non-executable dir $dir: $!");
6344 ::wait_and_exit(255);
6349 # Create tempfile as $TMPDIR/parXXXXX
6351 # $filehandle = opened file handle
6352 # $filename = file name created
6353 my($filehandle,$filename) =
6354 ::tempfile(DIR=>$ENV{'TMPDIR
'}, TEMPLATE => 'parXXXXX
', @_);
6356 return($filehandle,$filename);
6358 # Separate unlink due to NFS dealing badly with File::Temp
6365 # Select a name that does not exist
6366 # Do not create the file as it may be used for creating a socket (by tmux)
6367 # Remember the name in $Global::unlink to avoid hitting the same name twice
6370 if(not -w $ENV{'TMPDIR
'}) {
6371 my $qtmp = ::Q($ENV{'TMPDIR
'});
6372 if(not -e $ENV{'TMPDIR
'}) {
6373 ::error("Tmpdir $qtmp does not exist.","Try: mkdir -p $qtmp");
6375 ::error("Tmpdir $qtmp is not writable.","Try: chmod +w $qtmp");
6377 ::wait_and_exit(255);
6380 $tmpname = $ENV{'TMPDIR
'}."/".$name.
6381 join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
6382 } while(-e $tmpname or $Global::unlink{$tmpname}++);
6387 # Find an unused name and mkfifo on it
6388 my $tmpfifo = tmpname("fif");
6389 mkfifo($tmpfifo,0600);
6394 # Remove file and remove it from %Global::unlink
6397 delete @Global::unlink{@_};
6401 sub size_of_block_dev() {
6402 # Like -s but for block devices
6404 # $blockdev = file name of block device
6406 # $size = in bytes, undef if error
6407 my $blockdev = shift;
6408 my $fh = open_or_exit("<", $blockdev);
6409 seek($fh,0,2) || ::die_bug("cannot seek $blockdev");
6410 my $size = tell($fh);
6416 # Like qx but with clean environment (except for @keep)
6417 # and STDERR ignored
6418 # This is needed if the environment contains functions
6419 # that /bin/sh does not understand
6421 # ssh with ssh-agent needs PATH SSH_AUTH_SOCK SSH_AGENT_PID
6422 # ssh with Kerberos needs KRB5CCNAME
6423 # sshpass needs SSHPASS
6424 # tmux needs LC_CTYPE
6425 # lsh needs HOME LOGNAME
6426 my @keep = qw(PATH SSH_AUTH_SOCK SSH_AGENT_PID KRB5CCNAME LC_CTYPE
6427 HOME LOGNAME SSHPASS);
6428 @env{@keep} = @ENV{@keep};
6431 if($Global::debug
) {
6432 # && true is to force spawning a shell and not just exec'ing
6433 return qx{ @_ && true
};
6435 # CygWin does not respect 2>/dev/null
6436 # so we do that by hand
6437 # This trick does not work:
6438 # https://stackoverflow.com/q/13833088/363028
6440 # open(STDERR, ">", "/dev/null");
6441 open(local *CHILD_STDIN
, '<', '/dev/null') or die $!;
6442 open(local *CHILD_STDERR
, '>', '/dev/null') or die $!;
6444 # eval is needed if open3 fails (e.g. command line too long)
6450 # && true is to force spawning a shell and not just exec'ing
6454 # Make sure $? is set
6456 return wantarray ?
@arr : join "",@arr;
6458 # If eval fails, force $?=false
6465 # Remove duplicates and return unique values
6466 return keys %{{ map { $_ => 1 } @_ }};
6471 # Minimum value of array
6476 defined $min or do { $min = $_; next; }; # Set $_ to the first non-undef
6477 $min = ($min < $_) ?
$min : $_;
6484 # Maximum value of array
6489 defined $max or do { $max = $_; next; }; # Set $_ to the first non-undef
6490 $max = ($max > $_) ?
$max : $_;
6497 # Sum of values of array
6502 $_ and do { $sum += $_; }
6507 sub undef_as_zero
($) {
6512 sub undef_as_empty
($) {
6514 return $a ?
$a : "";
6517 sub undef_if_empty
($) {
6518 if(defined($_[0]) and $_[0] eq "") {
6524 sub multiply_binary_prefix
(@
) {
6525 # Evalualte numbers with binary prefix
6526 # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80
6527 # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80
6528 # K =2^10, M =2^20, G =2^30, T =2^40, P =2^50, E =2^70, Z =2^80, Y =2^80
6529 # k =10^3, m =10^6, g =10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24
6530 # 13G = 13*1024*1024*1024 = 13958643712
6532 # $s = string with prefixes
6534 # $value = int with prefixes multiplied
6540 s/gi/*1024*1024*1024/gi;
6541 s/ti/*1024*1024*1024*1024/gi;
6542 s/pi/*1024*1024*1024*1024*1024/gi;
6543 s/ei/*1024*1024*1024*1024*1024*1024/gi;
6544 s/zi/*1024*1024*1024*1024*1024*1024*1024/gi;
6545 s/yi/*1024*1024*1024*1024*1024*1024*1024*1024/gi;
6546 s/xi/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi;
6550 s/G/*1024*1024*1024/g;
6551 s/T/*1024*1024*1024*1024/g;
6552 s/P/*1024*1024*1024*1024*1024/g;
6553 s/E/*1024*1024*1024*1024*1024*1024/g;
6554 s/Z/*1024*1024*1024*1024*1024*1024*1024/g;
6555 s/Y/*1024*1024*1024*1024*1024*1024*1024*1024/g;
6556 s/X/*1024*1024*1024*1024*1024*1024*1024*1024*1024/g;
6560 s/g/*1000*1000*1000/g;
6561 s/t/*1000*1000*1000*1000/g;
6562 s/p/*1000*1000*1000*1000*1000/g;
6563 s/e/*1000*1000*1000*1000*1000*1000/g;
6564 s/z/*1000*1000*1000*1000*1000*1000*1000/g;
6565 s/y/*1000*1000*1000*1000*1000*1000*1000*1000/g;
6566 s/x/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g;
6570 return wantarray ?
@v : $v[0];
6573 sub multiply_time_units
($) {
6574 # Evalualte numbers with time units
6575 # s=1, m=60, h=3600, d=86400
6577 # $s = string time units
6579 # $value = int in seconds
6588 # 1m/3 => 1*60+/3 => 1*60/3
6593 return wantarray ?
@v : $v[0];
6596 sub seconds_to_time_units
() {
6597 # Convert seconds into ??d??h??m??s
6598 # s=1, m=60, h=3600, d=86400
6600 # $s = int in seconds
6602 # $str = string time units
6605 my $d = int($s/86400);
6607 my $h = int($s/3600);
6612 $str = sprintf("%dd%02dh%02dm%02ds",$d,$h,$m,$s);
6614 $str = sprintf("%dh%02dm%02ds",$h,$m,$s);
6616 $str = sprintf("%dm%02ds",$m,$s);
6618 $str = sprintf("%ds",$s);
6624 my ($disk_full_fh, $b8193, $error_printed);
6625 sub exit_if_disk_full
() {
6626 # Checks if $TMPDIR is full by writing 8kb to a tmpfile
6627 # If the disk is full: Exit immediately.
6630 if(not $disk_full_fh) {
6631 $disk_full_fh = ::tmpfile
(SUFFIX
=> ".df");
6634 # Linux does not discover if a disk is full if writing <= 8192
6636 # bfs btrfs cramfs ext2 ext3 ext4 ext4dev jffs2 jfs minix msdos
6637 # ntfs reiserfs tmpfs ubifs vfat xfs
6638 # TODO this should be tested on different OS similar to this:
6641 # sudo mount /dev/ram0 /mnt/loop; sudo chmod 1777 /mnt/loop
6642 # seq 100000 | parallel --tmpdir /mnt/loop/ true &
6643 # seq 6900000 > /mnt/loop/i && echo seq OK
6644 # seq 6980868 > /mnt/loop/i
6645 # seq 10000 > /mnt/loop/ii
6647 # sudo umount /mnt/loop/ || sudo umount -l /mnt/loop/
6650 print $disk_full_fh $b8193;
6651 if(not $disk_full_fh
6653 tell $disk_full_fh != 8193) {
6654 # On raspbian the disk can be full except for 10 chars.
6655 if(not $error_printed) {
6656 ::error
("Output is incomplete.",
6657 "Cannot append to buffer file in $ENV{'TMPDIR'}.",
6658 "Is the disk full?",
6659 "Change \$TMPDIR with --tmpdir or use --compress.");
6662 ::wait_and_exit
(255);
6664 truncate $disk_full_fh, 0;
6665 seek($disk_full_fh, 0, 0) || die;
6670 # Remove comments and spaces
6672 # $spaces = keep 1 space?
6673 # $s = string to remove spaces from
6675 # $s = with spaces removed
6681 } elsif(2 == $spaces) {
6683 $s =~ s/\n\n+/\n/sg;
6684 $s =~ s/[ \t]+/ /mg;
6685 } elsif(3 == $spaces) {
6686 # Keep perl code required space
6687 $s =~ s{([^a-zA-Z0-9/])\s+}{$1}sg;
6688 $s =~ s{([a-zA-Z0-9/])\s+([^:a-zA-Z0-9/])}{$1$2}sg;
6700 $hostname = `hostname`;
6702 $hostname ||= "nohostname";
6710 # @programs = programs to find the path to
6712 # @full_path = full paths to @programs. Nothing if not found
6715 push(@which, grep { not -d
$_ and -x
$_ }
6716 map { $_."/".$prg } split(":",$ENV{'PATH'}));
6718 # Test if program with full path exists
6719 push(@which, grep { not -d
$_ and -x
$_ } $prg);
6722 ::debug
("which", "$which[0] in $ENV{'PATH'}\n");
6723 return wantarray ?
@which : $which[0];
6727 my ($regexp,$shell,%fakename);
6731 # $pid = pid to see if (grand)*parent is a shell
6733 # $shellpath = path to shell - undef if no shell found
6735 ::debug
("init","Parent of $pid\n");
6737 # All shells known to mankind
6739 # ash bash csh dash fdsh fish fizsh ksh ksh93 mksh pdksh
6740 # posh rbash rc rush rzsh sash sh static-sh tcsh yash zsh
6742 my @shells = (qw(ash bash bsd-csh csh dash fdsh fish fizsh ksh
6743 ksh93 lksh mksh pdksh posh rbash rc rush rzsh sash sh
6744 static-sh tcsh yash zsh -sh -csh -bash),
6745 '-sh (sh)' # sh on FreeBSD
6747 # Can be formatted as:
6748 # [sh] -sh sh busybox sh -sh (sh)
6749 # /bin/sh /sbin/sh /opt/csw/sh
6750 # But not: foo.sh sshd crash flush pdflush scosh fsflush ssh
6751 $shell = "(?:".join("|",map { "\Q$_\E" } @shells).")";
6752 $regexp = '^((\[)(-?)('. $shell. ')(\])|(|\S+/|busybox )'.
6753 '(-?)('. $shell. '))( *$| [^(])';
6755 # sh disguises itself as -sh (sh) on FreeBSD
6756 "-sh (sh)" => ["sh"],
6757 # csh and tcsh disguise themselves as -sh/-csh
6758 # E.g.: ssh -tt csh@lo 'ps aux;true' |egrep ^csh
6759 # but sh also disguises itself as -sh
6760 # (TODO When does that happen?)
6762 "-csh" => ["tcsh", "csh"],
6763 # ash disguises itself as -ash
6764 "-ash" => ["ash", "dash", "sh"],
6765 # dash disguises itself as -dash
6766 "-dash" => ["dash", "ash", "sh"],
6767 # bash disguises itself as -bash
6768 "-bash" => ["bash", "sh"],
6769 # ksh disguises itself as -ksh
6770 "-ksh" => ["ksh", "sh"],
6771 # zsh disguises itself as -zsh
6772 "-zsh" => ["zsh", "sh"],
6775 if($^O
eq "linux") {
6776 # Optimized for GNU/Linux
6781 if(open(my $fd, "<", "/proc/$testpid/cmdline")) {
6783 chomp($shellline = <$fd>);
6784 if($shellline =~ /$regexp/o) {
6785 my $shellname = $4 || $8;
6786 my $dash = $3 || $7;
6787 if($shellname eq "sh" and $dash) {
6789 if($shellpath = readlink "/proc/$testpid/exe") {
6790 ::debug
("init","procpath $shellpath\n");
6791 if($shellpath =~ m
:/$shell$:o
) {
6793 "proc which ".$shellpath." => ");
6798 ::debug
("init", "which ".$shellname." => ");
6799 $shellpath = (which
($shellname,
6800 @
{$fakename{$shellname}}))[0];
6801 ::debug
("init", "shell path $shellpath\n");
6806 if(open(my $fd, "<", "/proc/$testpid/stat")) {
6809 # Parent pid is field 4
6810 $testpid = (split /\s+/, $line)[3];
6812 # Something is wrong: fall back to old method
6817 # if -sh or -csh try readlink /proc/$$/exe
6818 my ($children_of_ref, $parent_of_ref, $name_of_ref) = pid_table
();
6822 if($name_of_ref->{$testpid} =~ /$regexp/o) {
6823 my $shellname = $4 || $8;
6824 my $dash = $3 || $7;
6825 if($shellname eq "sh" and $dash) {
6827 if($shellpath = readlink "/proc/$testpid/exe") {
6828 ::debug
("init","procpath $shellpath\n");
6829 if($shellpath =~ m
:/$shell$:o
) {
6830 ::debug
("init", "proc which ".$shellpath." => ");
6835 ::debug
("init", "which ".$shellname." => ");
6836 $shellpath = (which
($shellname,@
{$fakename{$shellname}}))[0];
6837 ::debug
("init", "shell path $shellpath\n");
6838 $shellpath and last;
6840 if($testpid == $parent_of_ref->{$testpid}) {
6841 # In Solaris zones, the PPID of the zsched process is itself
6844 $testpid = $parent_of_ref->{$testpid};
6851 my %pid_parentpid_cmd;
6855 # %children_of = { pid -> children of pid }
6856 # %parent_of = { pid -> pid of parent }
6857 # %name_of = { pid -> commandname }
6859 if(not %pid_parentpid_cmd) {
6860 # Filter for SysV-style `ps`
6861 my $sysv = q
( ps
-ef
|).
6862 q
(perl
-ane
'1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;).
6863 q(s/^.{$s}//; print "@F[1,2] $_"' );
6864 # Minix uses cols 2,3 and can have newlines in the command
6865 # so lines not having numbers in cols 2,3 must be ignored
6866 my $minix = q
( ps
-ef
|).
6867 q
(perl
-ane
'1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;).
6868 q(s/^.{$s}// and $F[2]>0 and $F[3]>0 and print "@F[2,3] $_"' );
6870 my $bsd = q
(ps
-o pid
,ppid
,command
-ax
);
6871 %pid_parentpid_cmd =
6878 'dragonfly' => $bsd,
6892 'syllable' => "echo ps not supported",
6895 $pid_parentpid_cmd{$^O
} or
6896 ::die_bug
("pid_parentpid_cmd for $^O missing");
6898 my (@pidtable,%parent_of,%children_of,%name_of);
6899 # Table with pid -> children of pid
6900 @pidtable = `$pid_parentpid_cmd{$^O}`;
6903 # must match: 24436 21224 busybox ash
6904 # must match: 24436 21224 <<empty on MacOSX running cubase>>
6905 # must match: 24436 21224 <<empty on system running Viber>>
6906 # or: perl -e 'while($0=" "){}'
6907 if(/^\s*(\S+)\s+(\S+)\s+(\S+.*)/
6909 /^\s*(\S+)\s+(\S+)\s+()$/) {
6910 $parent_of{$1} = $2;
6911 push @
{$children_of{$2}}, $1;
6914 ::die_bug
("pidtable format: $_");
6917 return(\
%children_of, \
%parent_of, \
%name_of);
6922 # Returns time since epoch as in seconds with 3 decimals
6926 # $time = time now with millisecond accuracy
6927 if(not $Global::use{"Time::HiRes"}) {
6928 if(eval "use Time::HiRes qw ( time );") {
6929 eval "sub TimeHiRestime { return Time::HiRes::time };";
6931 eval "sub TimeHiRestime { return time() };";
6933 $Global::use{"Time::HiRes"} = 1;
6936 return (int(TimeHiRestime
()*1000))/1000;
6940 # Sleep this many milliseconds.
6942 # $ms = milliseconds to sleep
6944 ::debug
("timing",int($ms),"ms ");
6945 select(undef, undef, undef, $ms/1000);
6948 sub make_regexp_ungreedy
{
6950 my $class_state = 0;
6951 my $escape_state = 0;
6956 for $c (split (//, $regexp)) {
6958 if($c ne "?") { $ungreedy .= "?"; }
6963 if ($escape_state) { $escape_state = 0; next; }
6964 if ($c eq "\\") { $escape_state = 1; next; }
6965 if ($c eq '[') { $class_state = 1; next; }
6967 if($c eq ']') { $class_state = 0; }
6970 # Quantifiers: + * {...}
6971 if ($c =~ /[*}+]/) { $found = 1; }
6973 if($found) { $ungreedy .= '?'; }
6978 sub __KILLER_REAPER__
() {}
6981 # Reap dead children.
6982 # If no dead children: Sleep specified amount with exponential backoff
6984 # $ms = milliseconds to sleep
6986 # $ms/2+0.001 if children reaped
6987 # $ms*1.1 if no children reaped
6990 if(not $Global::total_completed
% 100) {
6992 # Force cleaning the timeout queue for every 100 jobs
6993 # Fixes potential memleak
6994 $Global::timeoutq
->process_timeouts();
6997 # Sleep exponentially shorter (1/2^n) if a job finished
7001 $Global::timeoutq
->process_timeouts();
7004 kill_youngster_if_not_enough_mem
($opt::memfree
*0.5);
7006 if($opt::memsuspend
) {
7007 suspend_young_if_not_enough_mem
($opt::memsuspend
);
7010 kill_youngest_if_over_limit
();
7012 exit_if_disk_full
();
7013 if($Global::linebuffer
) {
7014 my $something_printed = 0;
7015 if($opt::keeporder
and not $opt::latestline
) {
7016 for my $job (values %Global::running
) {
7017 $something_printed += $job->print_earlier_jobs();
7020 for my $job (values %Global::running
) {
7021 $something_printed += $job->print();
7024 if($something_printed) { $ms = $ms/2+0.001; }
7027 # When a child dies, wake up from sleep (or select(,,,))
7028 $SIG{CHLD
} = sub { kill "ALRM", $$ };
7029 if($opt::delay
and not $Global::linebuffer
) {
7030 # The 0.004s is approximately the time it takes for one round
7031 my $next_earliest_start =
7032 $Global::newest_starttime
+ $opt::delay
- 0.004;
7033 my $remaining_ms = 1000 * ($next_earliest_start - ::now
());
7034 # The next job can only start at $next_earliest_start
7035 # so sleep until then (but sleep at least $ms)
7036 usleep
(::max
($ms,$remaining_ms));
7040 # --compress needs $SIG{CHLD} unset
7041 $SIG{CHLD
} = 'DEFAULT';
7043 # Sleep exponentially longer (1.1^n) if a job did not finish,
7044 # though at most 1000 ms.
7045 return (($ms < 1000) ?
($ms * 1.1) : ($ms));
7049 sub kill_youngest_if_over_limit
() {
7050 # Check each $sshlogin we are over limit
7051 # If over limit: kill off the youngest child
7052 # Put the child back in the queue.
7058 for my $job (values %Global::running
) {
7059 if(not $jobs_of{$job->sshlogin()}) {
7060 push @sshlogins, $job->sshlogin();
7062 push @
{$jobs_of{$job->sshlogin()}}, $job;
7064 for my $sshlogin (@sshlogins) {
7065 for my $job (sort { $b->seq() <=> $a->seq() }
7066 @
{$jobs_of{$sshlogin}}) {
7067 if($sshlogin->limit() == 2) {
7075 sub suspend_young_if_not_enough_mem
() {
7076 # Check each $sshlogin if there is enough mem.
7077 # If less than $limit free mem: suspend some of the young children
7078 # Else: Resume all jobs
7085 for my $job (values %Global::running
) {
7086 if(not $jobs_of{$job->sshlogin()}) {
7087 push @sshlogins, $job->sshlogin();
7089 push @
{$jobs_of{$job->sshlogin()}}, $job;
7091 for my $sshlogin (@sshlogins) {
7092 my $free = $sshlogin->memfree();
7093 if($free < 2*$limit) {
7094 # Suspend all jobs (resume some of them later)
7095 map { $_->suspended() or $_->suspend(); } @
{$jobs_of{$sshlogin}};
7096 my @jobs = (sort { $b->seq() <=> $a->seq() }
7097 @
{$jobs_of{$sshlogin}});
7098 # how many should be running?
7102 # free < limit*(2-1/2^n);
7104 # 1/(2-free/limit) < 2^n;
7105 my $run = int(1/(2-$free/$limit));
7106 $run = ::min
($run,$#jobs);
7107 # Resume the oldest running
7108 for my $job ((sort { $a->seq() <=> $b->seq() } @jobs)[0..$run]) {
7109 ::debug
("mem","\nResume ",$run+1, " jobs. Seq ",
7110 $job->seq(), " resumed ",
7111 $sshlogin->memfree()," < ",2*$limit);
7115 for my $job (@
{$jobs_of{$sshlogin}}) {
7116 if($job->suspended()) {
7118 ::debug
("mem","\nResume ",$#{$jobs_of{$sshlogin}}+1,
7119 " jobs. Seq ", $job->seq(), " resumed ",
7120 $sshlogin->memfree()," > ",2*$limit);
7128 sub kill_youngster_if_not_enough_mem
() {
7129 # Check each $sshlogin if there is enough mem.
7130 # If less than 50% enough free mem: kill off the youngest child
7131 # Put the child back in the queue.
7138 for my $job (values %Global::running
) {
7139 if(not $jobs_of{$job->sshlogin()}) {
7140 push @sshlogins, $job->sshlogin();
7142 push @
{$jobs_of{$job->sshlogin()}}, $job;
7144 for my $sshlogin (@sshlogins) {
7145 for my $job (sort { $b->seq() <=> $a->seq() }
7146 @
{$jobs_of{$sshlogin}}) {
7147 if($sshlogin->memfree() < $limit) {
7148 ::debug
("mem","\n",map { $_->seq()." " }
7149 (sort { $b->seq() <=> $a->seq() }
7150 @
{$jobs_of{$sshlogin}}));
7151 ::debug
("mem","\n", $job->seq(), "killed ",
7152 $sshlogin->memfree()," < ",$limit);
7154 $job->set_killreason("mem");
7155 $sshlogin->memfree_recompute();
7160 ::debug
("mem","Free mem OK? ",
7161 $sshlogin->memfree()," > ",$limit);
7166 sub __DEBUGGING__
() {}
7174 $Global::debug
or return;
7175 @_ = grep { defined $_ ?
$_ : "" } @_;
7176 if($Global::debug
eq "all" or $Global::debug
eq $_[0]) {
7177 if($Global::fh
{2}) {
7178 # Original stderr was saved
7179 my $stderr = $Global::fh
{2};
7180 print $stderr @_[1..$#_];
7182 print STDERR
@_[1..$#_];
7187 sub my_memory_usage
() {
7189 # memory usage if found
7196 if(-e
"/proc/$pid/stat") {
7197 my $fh = FileHandle
->new("</proc/$pid/stat");
7203 my @procinfo = split(/\s+/,$data);
7205 return undef_as_zero
($procinfo[22]);
7213 # $size = size of object if Devel::Size is installed
7215 my @size_this = (@_);
7216 eval "use Devel::Size qw(size total_size)";
7220 return total_size(@_);
7226 # ascii expression of object if Data::Dump(er) is installed
7227 # error code otherwise
7228 my @dump_this = (@_);
7229 eval "use Data
::Dump
qw(dump);";
7231 # Data::Dump not installed
7232 eval "use Data
::Dumper
;";
7234 my $err = "Neither Data
::Dump nor Data
::Dumper is installed
\n".
7235 "Not dumping output
\n";
7239 return Dumper(@dump_this);
7242 # Create a dummy Data::Dump:dump as Hans Schou sometimes has
7244 eval "sub Data
::Dump
:dump {}";
7245 eval "use Data
::Dump
qw(dump);";
7246 return (Data::Dump::dump(@dump_this));
7263 sub __OBJECT_ORIENTED_PARTS__() {}
7281 # SSHLogins can have these formats:
7282 # @grp+grp/ncpu//usr/bin/ssh user@server
7283 # ncpu//usr/bin/ssh user@server
7284 # /usr/bin/ssh user@server
7287 # @grp+grp/user@server
7288 # above with: user:password@server
7289 # above with: user@server:port
7291 # [@grp+grp][ncpu/][ssh command ][[user][:password]@][server[:port]]
7293 # [@grp+grp]/ncpu//usr/bin/ssh user:pass@server:port
7294 if($s =~ s:^\@([^/]+)/?::) {
7295 # Look for SSHLogin hostgroups
7296 %hostgroups = map { $_ => 1 } split(/\+|,/, $1);
7298 # An SSHLogin is always in the hostgroup of its "numcpu
/host
"
7299 $hostgroups{$s} = 1;
7301 # [ncpu/]/usr/bin/ssh user:pass@server:port
7302 if ($s =~ s:^(\d+)/::) { $ncpus = $1; }
7304 # Why disallow space in password?
7306 # C:/bin/ssh user:C:/bin/ssh@host
7307 # Should this parse as:
7308 # user 'C' with password '/bin/ssh user:C:/bin/ssh'
7310 # cmd 'C:/bin/ssh' user 'user' with password 'C:/bin/ssh'
7311 # This is impossible to determine.
7312 # With space forbidden in password it uniquely parses as the 2nd.
7313 # [/usr/bin/ssh ]user:pass@server:port
7314 if($s =~ s/^(.*) //) { $sshcommand = $1; }
7316 # [user:pass@]server:port
7317 if($s =~ s/^(.+)@//) {
7320 if($userpw =~ s/:(.*)//) {
7322 if($password eq "") { $password = $ENV{'SSHPASS'} }
7323 if(not ::which("sshpass
")) {
7324 ::error("--sshlogin with password requires sshpass installed
");
7325 ::wait_and_exit(255);
7333 $s =~ s/^([-a-z0-9._]+)//i) {
7334 # Not IPv6 (IPv6 has 2 or more ':')
7336 } elsif($s =~ s/^(\\[\[\]box0-9a-f.]+)//i) {
7337 # RFC2673 allows for:
7338 # \[b11010000011101] \[o64072/14] \[xd074/14] \[208.116.0.0/14]
7340 } elsif($s =~ s/^\[([0-9a-f:]+)\]//i
7342 $s =~ s/^([0-9a-f:]+)//i) {
7348 # 2001:db8::1:80 - not supported
7349 # 2001:db8::1 port 80 - not supported
7354 if($s =~ s/^:(\w+)//i) {
7356 } elsif($s =~ s/^[p\.\#](\w+)//i) {
7364 if($s and $s ne ':') {
7365 ::die_bug("SSHLogin parser failed on
'$origs' => '$s'");
7369 # Only include the sshcommand in $string if it is set by user
7370 ($sshcommand && $sshcommand." ").
7371 ($user && $user."@
").
7373 ($port && ":$port");
7375 # Only include the sshcommand in $string if it is set by user
7376 ($sshcommand && $sshcommand." ").
7377 ($user && $user. ($password && ":".$password)."@
").
7379 ($port && ":$port");
7384 $sshcommand ||= $opt::ssh || $ENV{'PARALLEL_SSH'} || "ssh
";
7386 # An SSHLogin is always in the hostgroup of its $string-name
7387 $hostgroups{$string} = 1;
7388 @Global::hostgroups{keys %hostgroups} = values %hostgroups;
7389 # Used for file names for loadavg
7390 my $no_slash_string = $string;
7391 $no_slash_string =~ s/[^-a-z0-9:]/_/gi;
7393 'string' => $string,
7394 'pwstring' => $pwstring,
7395 'jobs_running' => 0,
7396 'jobs_completed' => 0,
7397 'maxlength' => undef,
7398 'max_jobs_running' => undef,
7399 'orig_max_jobs_running' => undef,
7401 'sshcommand' => $sshcommand,
7403 'password' => $password,
7406 'hostgroups' => \%hostgroups,
7408 'control_path_dir' => undef,
7409 'control_path' => undef,
7410 'time_to_login' => undef,
7411 'last_login_at' => undef,
7412 'loadavg_file' => $Global::cache_dir . "/tmp/sshlogin
/" .
7413 $no_slash_string . "/loadavg
",
7415 'last_loadavg_update' => 0,
7416 'swap_activity_file' => $Global::cache_dir . "/tmp/sshlogin
/" .
7417 $no_slash_string . "/swap_activity
",
7418 'swap_activity' => undef,
7419 }, ref($class) || $class;
7424 # Remove temporary files if they are created.
7425 ::rm($self->{'loadavg_file'});
7426 ::rm($self->{'swap_activity_file'});
7431 return $self->{'string'};
7436 return $self->{'pwstring'};
7441 return $self->{'host'};
7445 # Give the ssh command without hostname
7447 # "sshpass
-e ssh
-p port
-l user
"
7450 # [sshpass -e] ssh -p port -l user
7451 if($self->{'password'}) { push @local, "sshpass
-e
"; }
7452 # [ssh] -p port -l user
7453 # TODO sshpass + space
7454 push @local, $self->{'sshcommand'};
7456 if($self->{'port'}) { push @local, '-p',$self->{'port'}; }
7458 if($self->{'user'}) { push @local, '-l',$self->{'user'}; }
7459 if($opt::controlmaster) {
7460 # Use control_path to make ssh faster
7461 my $control_path = $self->control_path_dir()."/ssh
-%r@
%h:%p";
7463 if(not $self->{'control_path'}{$control_path}++) {
7464 # Master is not running for this control_path
7468 $Global::sshmaster{$pid} ||= 1;
7470 push @local, "-S
", $control_path;
7471 $SIG{'TERM'} = undef;
7472 # Run a sleep that outputs data, so it will discover
7473 # if the ssh connection closes.
7474 my $sleep = ::Q('$|=1;while(1){sleep 1;print "foo
\n"}');
7475 # Ignore the 'foo' being printed
7476 open(STDOUT,">","/dev/null
");
7477 # STDERR >/dev/null to ignore
7478 open(STDERR,">","/dev/null
");
7479 open(STDIN,"<","/dev/null
");
7480 exec(@local, "-MT
", $self->{'host'}, "--",
7481 "perl
", "-e
", $sleep);
7484 push @local, "-S
", ::Q($control_path);
7491 # @cmd = shell command to run on remote
7493 # $sshwrapped = ssh remote @cmd
7497 $self->sshcmd(), $self->{'host'}, "--", "exec", @remote);
7502 # @cmd = perl expresion to eval
7504 # $hexencoded = perl command that decodes hex and evals @cmd
7506 my $cmd = join("",@_);
7508 # "#" is needed because Perl on MacOS X adds NULs
7509 # when running pack q/H10000000/
7510 my $hex = unpack "H*", $cmd."#";
7511 # csh does not deal well with > 1000 chars in one word
7512 # Insert space every 1000 char
7513 $hex =~ s/\G.{1000}\K/ /sg;
7515 # Write this without special chars: eval pack 'H*', join '',@ARGV
7516 # GNU_Parallel_worker = String so people can see this is from GNU Parallel
7517 # eval+ = way to write 'eval ' without space (gives warning)
7518 # pack+ = way to write 'pack ' without space
7519 # q/H10000000/, = almost the same as "H*" but does not use *
7520 # join+q//, = join '',
7521 return('perl -X -e '.
7522 'GNU_Parallel_worker,eval+pack+q/H10000000/,join+q//,@ARGV '.
7526 sub jobs_running
($) {
7528 return ($self->{'jobs_running'} || "0");
7531 sub inc_jobs_running
($) {
7533 $self->{'jobs_running'}++;
7536 sub dec_jobs_running
($) {
7538 $self->{'jobs_running'}--;
7541 sub set_maxlength
($$) {
7543 $self->{'maxlength'} = shift;
7548 return $self->{'maxlength'};
7551 sub jobs_completed
() {
7553 return $self->{'jobs_completed'};
7556 sub in_hostgroups
() {
7558 # @hostgroups = the hostgroups to look for
7560 # true if intersection of @hostgroups and the hostgroups of this
7561 # SSHLogin is non-empty
7563 return grep { defined $self->{'hostgroups'}{$_} } @_;
7568 return keys %{$self->{'hostgroups'}};
7571 sub inc_jobs_completed
($) {
7573 $self->{'jobs_completed'}++;
7574 $Global::total_completed
++;
7577 sub set_max_jobs_running
($$) {
7579 if(defined $self->{'max_jobs_running'}) {
7580 $Global::max_jobs_running
-= $self->{'max_jobs_running'};
7582 $self->{'max_jobs_running'} = shift;
7584 if(defined $self->{'max_jobs_running'}) {
7585 # max_jobs_running could be resat if -j is a changed file
7586 $Global::max_jobs_running
+= $self->{'max_jobs_running'};
7588 # Initialize orig to the first non-zero value that comes around
7589 $self->{'orig_max_jobs_running'} ||= $self->{'max_jobs_running'};
7596 $self->memfree_recompute();
7597 # Return 1 if not defined.
7598 return (not defined $self->{'memfree'} or $self->{'memfree'})
7601 sub memfree_recompute
() {
7603 my $script = memfreescript
();
7605 # TODO add sshlogin and backgrounding
7606 # Run the script twice if it gives 0 (typically intermittent error)
7607 $self->{'memfree'} = ::qqx
($script) || ::qqx
($script);
7608 if(not $self->{'memfree'}) {
7609 ::die_bug
("Less than 1 byte memory free");
7611 #::debug("mem","New free:",$self->{'memfree'}," ");
7617 sub memfreescript
() {
7619 # shellscript for giving available memory in bytes
7630 awk '/^((Swap)?Cached|MemFree|Buffers):/
7631 { sum += \$2} END { print sum }'
7634 # Android uses same code as GNU/Linux
7638 awk '/^((Swap)?Cached|MemFree|Buffers):/
7639 { sum += \$2} END { print sum }'
7643 # procs memory page faults cpu
7644 # r b w avm free re at pi po fr de sr in sy cs us sy id
7645 # 1 0 0 242793 389737 5 1 0 0 0 0 0 107 978 60 1 1 99
7648 print (((reverse `vmstat 1 1`)[0]
7649 =~ /(?:\d+\D+){4}(\d
+)/)[0]*1024)
7652 # kthr memory page disk faults cpu
7653 # r b w swap free re mf pi po fr de sr s3 s4 -- -- in sy cs us sy id
7654 # 0 0 0 6496720 5170320 68 260 8 2 1 0 0 -0 3 0 0 309 1371 255 1 2 97
7655 # 0 0 0 6434088 5072656 7 15 8 0 0 0 0 0 261 0 0 1889 1899 3222 0 8 92
7657 # The second free value is correct
7660 print (((reverse `vmstat 1 2`)[0]
7661 =~ /(?:\d+\D+){4}(\d
+)/)[0]*1024)
7664 # vm.stats.vm.v_cache_count: 0
7665 # vm.stats.vm.v_inactive_count: 79574
7666 # vm.stats.vm.v_free_count: 4507
7669 for(qx{/sbin/sysctl -a}) {
7670 if (/^([^:]+):\s+(.+)\s*$/s) {
7674 print $sysctl->{"hw.pagesize"} *
7675 ($sysctl->{"vm.stats.vm.v_cache_count"}
7676 + $sysctl->{"vm.stats.vm.v_inactive_count"}
7677 + $sysctl->{"vm.stats.vm.v_free_count"});
7679 # Mach Virtual Memory Statistics: (page size of 4096 bytes)
7680 # Pages free: 198061.
7681 # Pages active: 159701.
7682 # Pages inactive: 47378.
7683 # Pages speculative: 29707.
7684 # Pages wired down: 89231.
7685 # "Translation faults": 928901425.
7686 # Pages copy-on-write: 156988239.
7687 # Pages zero filled: 271267894.
7688 # Pages reactivated: 48895.
7691 # Object cache: 6603 hits of 1713223 lookups (0% hit rate)
7695 print (($vm =~ /page size of (\d+)/)[0] *
7696 (($vm =~ /Pages free:\s+(\d+)/)[0] +
7697 ($vm =~ /Pages inactive:\s+(\d+)/)[0]));
7700 my $perlscript = "";
7701 # Make a perl script that detects the OS ($^O) and runs
7702 # the appropriate command
7703 for my $os (keys %script_of) {
7704 $perlscript .= 'if($^O eq "'.$os.'") { '.$script_of{$os}.'}';
7706 $script = "perl -e " . ::Q(::spacefree(1,$perlscript));
7714 # 0 = Below limit. Start another job.
7715 # 1 = Over limit. Start no jobs.
7716 # 2 = Kill youngest job
7719 if(not defined $self->{'limitscript'}) {
7725 # Do the measurement in the background
7727 LANG=C iostat -x 1 2 > $tmp;
7728 mv $tmp $io_file) </dev/null >/dev/null & );
7729 perl -e '-e $ARGV[0] or exit(1);
7732 /(\S+)$/ and $max = $max > $1 ? $max : $1; }
7733 exit ('$limit' < $max)' $io_file;
7740 awk '/^((Swap)?Cached|MemFree|Buffers):/{ sum += $2}
7742 if (sum*1024 < '$limit'/2) { exit 2; }
7743 else { exit (sum*1024 < '$limit') }
7751 ps ax -o state,command |
7752 grep -E '^[DOR].[^[]' |
7754 perl -ne 'exit ('$limit' < $_)';
7759 my ($cmd,@args) = split /\s+/,$opt::limit;
7760 if($limitscripts{$cmd}) {
7761 my $tmpfile = ::tmpname("parlmt");
7762 ++$Global::unlink{$tmpfile};
7763 $self->{'limitscript'} =
7764 ::spacefree(1, sprintf($limitscripts{$cmd},
7765 ::multiply_binary_prefix(@args),$tmpfile));
7767 $self->{'limitscript'} = $opt::limit;
7773 $ENV{'SSHLOGIN'} = $self->string();
7774 system($Global::shell,"-c",$self->{'limitscript'});
7775 #::qqx($self->{'limitscript'});
7776 ::debug("limit","limit `".$self->{'limitscript'}."` result ".($?>>8)."\n");
7783 my $swapping = $self->swap_activity();
7784 return (not defined $swapping or $swapping)
7787 sub swap_activity($) {
7788 # If the currently known swap activity is too old:
7789 # Recompute a new one in the background
7791 # last swap activity computed
7793 # Should we update the swap_activity file?
7794 my $update_swap_activity_file = 0;
7795 # Test with (on 64 core machine):
7796 # seq 100 | parallel --lb -j100 'seq 1000 | parallel --noswap -j 1 true'
7797 if(open(my $swap_fh, "<", $self->{'swap_activity_file'})) {
7798 my $swap_out = <$swap_fh>;
7800 if($swap_out =~ /^(\d+)$/) {
7801 $self->{'swap_activity'} = $1;
7802 ::debug("swap", "New swap_activity: ", $self->{'swap_activity'});
7804 ::debug("swap", "Last update: ", $self->{'last_swap_activity_update'});
7805 if(time - $self->{'last_swap_activity_update'} > 10) {
7806 # last swap activity update was started 10 seconds ago
7807 ::debug("swap", "Older than 10 sec: ", $self->{'swap_activity_file'});
7808 $update_swap_activity_file = 1;
7811 ::debug("swap", "No swap_activity file: ", $self->{'swap_activity_file'});
7812 $self->{'swap_activity'} = undef;
7813 $update_swap_activity_file = 1;
7815 if($update_swap_activity_file) {
7816 ::debug("swap", "Updating swap_activity file ", $self->{'swap_activity_file'});
7817 $self->{'last_swap_activity_update'} = time;
7818 my $dir = ::dirname($self->{'swap_activity_file'});
7819 -d $dir or eval { File::Path::mkpath($dir); };
7821 $swap_activity = swapactivityscript();
7822 if(not $self->local()) {
7823 $swap_activity = $self->wrap($swap_activity);
7825 # Run swap_activity measuring.
7826 # As the command can take long to run if run remote
7827 # save it to a tmp file before moving it to the correct file
7828 my $file = $self->{'swap_activity_file'};
7829 my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".swp");
7830 ::debug("swap", "\n", $swap_activity, "\n");
7831 my $qtmp = ::Q($tmpfile);
7832 my $qfile = ::Q($file);
7833 ::qqx("($swap_activity > $qtmp && mv $qtmp $qfile || rm $qtmp &)");
7835 return $self->{'swap_activity'};
7841 sub swapactivityscript() {
7843 # shellscript for detecting swap activity
7845 # arguments for vmstat are OS dependant
7846 # swap_in and swap_out are in different columns depending on OS
7852 # procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
7853 # r b swpd free buff cache si so bi bo in cs us sy id wa
7854 # 5 0 51208 1701096 198012 18857888 0 0 37 153 28 19 56 11 33 1
7855 # 3 0 51208 1701288 198012 18857972 0 0 0 0 3638 10412 15 3 82 0
7856 'linux' => ['vmstat 1 2 | tail -n1', '$7*$8'],
7860 # kthr memory page disk faults cpu
7861 # r b w swap free si so pi po fr de sr s3 s4 -- -- in sy cs us sy id
7862 # 0 0 0 4628952 3208408 0 0 3 1 1 0 0 -0 2 0 0 263 613 246 1 2 97
7863 # 0 0 0 4552504 3166360 0 0 0 0 0 0 0 0 0 0 0 246 213 240 1 1 98
7864 'solaris' => ['vmstat -S 1 2 | tail -1', '$6*$7'],
7866 # darwin (macosx): $21*$22
7868 # Mach Virtual Memory Statistics: (page size of 4096 bytes)
7869 # free active specul inactive throttle wired prgable faults copy 0fill reactive purged file-backed anonymous cmprssed cmprssor dcomprs comprs pageins pageout swapins swapouts
7870 # 346306 829050 74871 606027 0 240231 90367 544858K 62343596 270837K 14178 415070 570102 939846 356 370 116 922 4019813 4 0 0
7871 # 345740 830383 74875 606031 0 239234 90369 2696 359 553 0 0 570110 941179 356 370 0 0 0 0 0 0
7872 'darwin' => ['vm_stat -c 2 1 | tail -n1', '$21*$22'],
7876 # procs faults cpu memory page disk
7877 # r b w in sy cs us sy id avm fre si so pi po fr de sr s0
7878 # 1 0 0 4 23 2 3 0 97 7743 217k 0 0 0 0 0 0 0 0
7879 # 1 0 0 6 40 8 0 1 99 7743 217k 0 0 3 0 0 0 0 0
7880 'ultrix' => ['vmstat -S 1 2 | tail -1', '$12*$13'],
7884 # System configuration: lcpu=1 mem=2048MB
7886 # kthr memory page faults cpu
7887 # ----- ----------- ------------------------ ------------ -----------
7888 # r b avm fre re pi po fr sr cy in sy cs us sy id wa
7889 # 0 0 333933 241803 0 0 0 0 0 0 10 143 90 0 0 99 0
7890 # 0 0 334125 241569 0 0 0 0 0 0 37 5368 184 0 9 86 5
7891 'aix' => ['vmstat 1 2 | tail -n1', '$6*$7'],
7895 # procs memory page disks faults cpu
7896 # r b w avm fre flt re pi po fr sr ad0 ad1 in sy cs us sy id
7897 # 1 0 0 596716 19560 32 0 0 0 33 8 0 0 11 220 277 0 0 99
7898 # 0 0 0 596716 19560 2 0 0 0 0 0 0 0 11 144 263 0 1 99
7899 'freebsd' => ['vmstat -H 1 2 | tail -n1', '$8*$9'],
7903 # procs memory page disks traps cpu
7904 # r b w avm fre flt re pi po fr sr wd0 cd0 int sys cs us sy id
7905 # 0 0 0 25776 164968 34 0 0 0 0 0 0 0 230 259 38 4 0 96
7906 # 0 0 0 25776 164968 24 0 0 0 0 0 0 0 237 275 37 0 0 100
7907 'mirbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'],
7911 # procs memory page disks faults cpu
7912 # r b avm fre flt re pi po fr sr w0 w1 in sy cs us sy id
7913 # 0 0 138452 6012 54 0 0 0 1 2 3 0 4 100 23 0 0 100
7914 # 0 0 138456 6008 1 0 0 0 0 0 0 0 7 26 19 0 0 100
7915 'netbsd' => ['vmstat 1 2 | tail -n1', '$7*$8'],
7919 # procs memory page disks traps cpu
7920 # r b w avm fre flt re pi po fr sr wd0 wd1 int sys cs us sy id
7921 # 0 0 0 76596 109944 73 0 0 0 0 0 0 1 5 259 22 0 1 99
7922 # 0 0 0 76604 109936 24 0 0 0 0 0 0 0 7 114 20 0 1 99
7923 'openbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'],
7927 # procs memory page faults cpu
7928 # r b w avm free re at pi po fr de sr in sy cs us sy id
7929 # 1 0 0 247211 216476 4 1 0 0 0 0 0 102 73005 54 6 11 83
7930 # 1 0 0 247211 216421 43 9 0 0 0 0 0 144 1675 96 25269512791222387000 25269512791222387000 105
7931 'hpux' => ['vmstat 1 2 | tail -n1', '$8*$9'],
7933 # dec_osf (tru64): $11*$12
7935 # Virtual Memory Statistics: (pagesize = 8192)
7936 # procs memory pages intr cpu
7937 # r w u act free wire fault cow zero react pin pout in sy cs us sy id
7938 # 3 181 36 51K 1895 8696 348M 59M 122M 259 79M 0 5 218 302 4 1 94
7939 # 3 181 36 51K 1893 8696 3 15 21 0 28 0 4 81 321 1 1 98
7940 'dec_osf' => ['vmstat 1 2 | tail -n1', '$11*$12'],
7944 # (pagesize: 4, size: 512288, swap size: 894972)
7945 # free actv inact wired zeroed react pgins pgouts pfaults cowpfs hrat caobj cache swfree
7946 # 371940 30844 89228 20276 298348 0 48192 19016 756105 99808 98% 876 20628 894972
7947 # 371940 30844 89228 20276 +0 +0 +0 +0 +42 +2 98% 876 20628 894972
7948 'gnu' => ['vmstat -k 1 2 | tail -n1', '$7*$8'],
7950 # -nto (qnx has no swap)
7954 my $perlscript = "";
7955 # Make a perl script that detects the OS ($^O) and runs
7956 # the appropriate vmstat command
7957 for my $os (keys %vmstat) {
7958 $vmstat{$os}[1] =~ s/\$/\\\\\\\$/g; # $ => \\\$
7959 $perlscript .= 'if($^O eq "'.$os.'") { print `'.$vmstat{$os}[0].' | awk "{print ' .
7960 $vmstat{$os}[1] . '}"` }';
7962 $script = "perl -e " . ::Q($perlscript);
7968 sub too_fast_remote_login($) {
7970 if($self->{'last_login_at'} and $self->{'time_to_login'}) {
7971 # sshd normally allows 10 simultaneous logins
7972 # A login takes time_to_login
7973 # So time_to_login/5 should be safe
7974 # If now <= last_login + time_to_login/5: Then it is too soon.
7975 my $too_fast = (::now() <= $self->{'last_login_at'}
7976 + $self->{'time_to_login'}/5);
7977 ::debug("run", "Too fast? $too_fast ");
7980 # No logins so far (or time_to_login not computed): it is not too fast
7985 sub last_login_at($) {
7987 return $self->{'last_login_at'};
7990 sub set_last_login_at($$) {
7992 $self->{'last_login_at'} = shift;
7995 sub loadavg_too_high($) {
7997 my $loadavg = $self->loadavg();
7998 if(defined $loadavg) {
7999 ::debug("load", "Load $loadavg > ",$self->max_loadavg());
8000 return $loadavg >= $self->max_loadavg();
8002 # Unknown load: Assume load is too high
8010 # If the currently know loadavg is too old:
8011 # Recompute a new one in the background
8012 # The load average is computed as the number of processes waiting
8013 # for disk or CPU right now. So it is the server load this instant
8014 # and not averaged over several minutes. This is needed so GNU
8015 # Parallel will at most start one job that will push the load over
8019 # $last_loadavg = last load average computed (undef if none)
8023 if(not $Global::loadavg_cmd) {
8024 # aix => "ps -ae -o state,command" # state wrong
8025 # bsd => "ps ax -o state,command"
8026 # sysv => "ps -ef -o s -o comm"
8027 # cygwin => perl -ne 'close STDERR; /Name/ and print"\n"; \
8028 # /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status |
8029 # awk '{print $2,$1}'
8034 # hpux => ps -el|awk '{print $2,$14,$15}'
8035 # irix => ps -ef -o state -o comm
8037 # minix => ps el|awk '{print \$1,\$11}'
8043 # ultrix => ps -ax | awk '{print $3,$5}'
8044 # unixware => ps -el|awk '{print $2,$14,$15}'
8045 my $ps = ::spacefree(1,q{
8046 $sysv="ps -ef -o s -o comm";
8047 $sysv2="ps -ef -o state -o comm";
8048 $bsd="ps ax -o state,command";
8049 # Treat threads as processes
8050 $bsd2="ps axH -o state,command";
8051 $psel="ps -el|awk '{ print \$2,\$14,\$15 }'";
8052 $cygwin=q{ perl -ne 'close STDERR; /Name/ and print"\n";
8053 /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status |
8054 awk '{print $2,$1}' };
8055 $dummy="echo S COMMAND;echo R dummy";
8057 # TODO Find better code for AIX/Android
8059 'android' => "uptime",
8060 'cygwin' => $cygwin,
8062 'dec_osf' => $sysv2,
8063 'dragonfly' => $bsd,
8069 'minix' => "ps el|awk '{print \$1,\$11}'",
8077 'ultrix' => "ps -ax | awk '{print \$3,\$5}'",
8082 # The command is too long for csh, so base64_wrap the command
8083 $Global::loadavg_cmd = $self->hexwrap($ps);
8085 return $Global::loadavg_cmd;
8087 # Should we update the loadavg file?
8088 my $update_loadavg_file = 0;
8089 if(open(my $load_fh, "<", $self->{'loadavg_file'})) {
8090 local $/; # $/ = undef => slurp whole file
8091 my $load_out = <$load_fh>;
8093 if($load_out =~ /\S/) {
8094 # Content can be empty if ~/ is on NFS
8095 # due to reading being non-atomic.
8097 # Count lines starting with D,O,R but command does not start with [
8098 my $load =()= ($load_out=~/(^\s?[DOR]\S* +(?=[^\[])\S)/gm);
8100 # load is overestimated by 1
8101 $self->{'loadavg'} = $load - 1;
8102 ::debug("load", "New loadavg: ", $self->{'loadavg'},"\n");
8103 } elsif ($load_out=~/average: (\d+.\d+)/) {
8104 # AIX does not support instant load average
8105 # 04:11AM up 21 days, 12:55, 1 user, load average: 1.85, 1.57, 1.55
8106 $self->{'loadavg'} = $1;
8108 ::die_bug("loadavg_invalid_content: " .
8109 $self->{'loadavg_file'} . "\n$load_out");
8112 $update_loadavg_file = 1;
8114 ::debug("load", "No loadavg file: ", $self->{'loadavg_file'});
8115 $self->{'loadavg'} = undef;
8116 $update_loadavg_file = 1;
8118 if($update_loadavg_file) {
8119 ::debug("load", "Updating loadavg file", $self->{'loadavg_file'}, "\n");
8120 $self->{'last_loadavg_update'} = time;
8121 my $dir = ::dirname($self->{'swap_activity_file'});
8122 -d $dir or eval { File::Path::mkpath($dir); };
8123 -w $dir or ::die_bug("Cannot write to $dir");
8125 if($self->{'string'} ne ":") {
8126 $cmd = $self->wrap(loadavg_cmd());
8128 $cmd .= loadavg_cmd();
8130 # As the command can take long to run if run remote
8131 # save it to a tmp file before moving it to the correct file
8132 ::debug("load", "Update load\n");
8133 my $file = ::Q($self->{'loadavg_file'});
8134 # tmpfile on same filesystem as $file
8135 my $tmpfile = $file.$$;
8136 $ENV{'SSHPASS'} = $self->{'password'};
8137 ::qqx("($cmd > $tmpfile && mv $tmpfile $file || rm $tmpfile & )");
8139 return $self->{'loadavg'};
8142 sub max_loadavg($) {
8144 # If --load is a file it might be changed
8145 if($Global::max_load_file) {
8146 my $mtime = (stat($Global::max_load_file))[9];
8147 if($mtime > $Global::max_load_file_last_mod) {
8148 $Global::max_load_file_last_mod = $mtime;
8149 for my $sshlogin (values %Global::host) {
8150 $sshlogin->set_max_loadavg(undef);
8154 if(not defined $self->{'max_loadavg'}) {
8155 $self->{'max_loadavg'} =
8156 $self->compute_max_loadavg($opt::load);
8158 ::debug("load", "max_loadavg: ", $self->string(), " ", $self->{'max_loadavg'});
8159 return $self->{'max_loadavg'};
8162 sub set_max_loadavg($$) {
8164 $self->{'max_loadavg'} = shift;
8167 sub compute_max_loadavg($) {
8168 # Parse the max loadaverage that the user asked for using --load
8172 my $loadspec = shift;
8174 if(defined $loadspec) {
8175 if($loadspec =~ /^\+(\d+)$/) {
8179 $self->ncpus() + $j;
8180 } elsif ($loadspec =~ /^-(\d+)$/) {
8184 $self->ncpus() - $j;
8185 } elsif ($loadspec =~ /^(\d+)\%$/) {
8188 $self->ncpus() * $j / 100;
8189 } elsif ($loadspec =~ /^(\d+(\.\d+)?)$/) {
8191 } elsif (-f $loadspec) {
8192 $Global::max_load_file = $loadspec;
8193 $Global::max_load_file_last_mod = (stat($Global::max_load_file))[9];
8194 $load = $self->compute_max_loadavg(
8195 ::slurp_or_exit($Global::max_load_file)
8198 ::error("Parsing of --load failed.");
8208 sub time_to_login($) {
8210 return $self->{'time_to_login'};
8213 sub set_time_to_login($$) {
8215 $self->{'time_to_login'} = shift;
8218 sub max_jobs_running($) {
8220 if(not defined $self->{'max_jobs_running'}) {
8221 my $nproc = $self->compute_number_of_processes($opt::jobs);
8222 $self->set_max_jobs_running($nproc);
8224 return $self->{'max_jobs_running'};
8227 sub orig_max_jobs_running($) {
8229 return $self->{'orig_max_jobs_running'};
8232 sub compute_number_of_processes($) {
8233 # Number of processes wanted and limited by system resources
8235 # Number of processes
8238 my $wanted_processes = $self->user_requested_processes($opt_P);
8239 if(not defined $wanted_processes) {
8240 $wanted_processes = $Global::default_simultaneous_sshlogins;
8242 ::debug("load", "Wanted procs: $wanted_processes\n");
8244 $self->processes_available_by_system_limit($wanted_processes);
8245 ::debug("load", "Limited to procs: $system_limit\n");
8246 return $system_limit;
8251 my $max_system_proc_reached;
8252 my $more_filehandles;
8255 my $count_jobs_already_read;
8261 sub reserve_filehandles($) {
8262 # Reserves filehandle
8265 $more_filehandles &&= open($fh{$tmpfhname++}, "<", "/dev/null");
8269 sub reserve_process() {
8270 # Spawn a dummy process
8272 if($child = fork()) {
8273 push @children, $child;
8274 $Global::unkilled_children{$child} = 1;
8275 } elsif(defined $child) {
8277 # The child takes one process slot
8278 # It will be killed later
8279 $SIG{'TERM'} = $Global::original_sig{'TERM'};
8280 if($^O eq "cygwin" or $^O eq "msys" or $^O eq "nto") {
8281 # The exec does not work on Cygwin and QNX
8284 # 'exec sleep' takes less RAM than sleeping in perl
8285 exec 'sleep', 10101;
8290 $max_system_proc_reached = 1;
8294 sub get_args_or_jobs() {
8295 # Get an arg or a job (depending on mode)
8296 if($Global::semaphore or ($opt::pipe and not $opt::tee)) {
8297 # Skip: No need to get args
8299 } elsif(defined $opt::retries and $count_jobs_already_read) {
8300 # For retries we may need to run all jobs on this sshlogin
8301 # so include the already read jobs for this sshlogin
8302 $count_jobs_already_read--;
8305 if($opt::X or $opt::m) {
8306 # The arguments may have to be re-spread over several jobslots
8307 # So pessimistically only read one arg per jobslot
8308 # instead of a full commandline
8309 if($Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->empty()) {
8310 if($Global::JobQueue->empty()) {
8313 $job = $Global::JobQueue->get();
8318 $arg = $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get();
8323 # If there are no more command lines, then we have a process
8324 # per command line, so no need to go further
8325 if($Global::JobQueue->empty()) {
8328 $job = $Global::JobQueue->get();
8329 # Replacement must happen here due to seq()
8330 $job and $job->replaced();
8339 # Cleanup: Close the files
8340 for (values %fh) { close $_ }
8341 # Cleanup: Kill the children
8342 for my $pid (@children) {
8345 delete $Global::unkilled_children{$pid};
8347 # Cleanup: Unget the command_lines or the @args
8348 $Global::JobQueue->{'commandlinequeue'}{'arg_queue'}->unget(@args);
8350 $Global::JobQueue->unget(@jobs);
8354 sub processes_available_by_system_limit($) {
8355 # If the wanted number of processes is bigger than the system limits:
8356 # Limit them to the system limits
8357 # Limits are: File handles, number of input lines, processes,
8358 # and taking > 1 second to spawn 10 extra processes
8360 # Number of processes
8362 my $wanted_processes = shift;
8363 my $system_limit = 0;
8364 my $slow_spawning_warning_printed = 0;
8366 $more_filehandles = 1;
8367 $tmpfhname = "TmpFhNamE";
8369 # perl uses 7 filehandles for something?
8370 # parallel uses 1 for memory_usage
8371 # parallel uses 4 for ?
8372 reserve_filehandles(12);
8373 # Two processes for load avg and ?
8377 # For --retries count also jobs already run
8378 $count_jobs_already_read = $Global::JobQueue->next_seq();
8379 my $wait_time_for_getting_args = 0;
8380 my $start_time = time;
8381 if($wanted_processes < $Global::infinity) {
8382 $Global::dummy_jobs = 1;
8385 $system_limit >= $wanted_processes and last;
8386 not $more_filehandles and last;
8387 $max_system_proc_reached and last;
8389 my $before_getting_arg = time;
8390 if(!$Global::dummy_jobs) {
8391 get_args_or_jobs() or last;
8393 $wait_time_for_getting_args += time - $before_getting_arg;
8396 # Every simultaneous process uses 2 filehandles to write to
8397 # and 2 filehandles to read from
8398 reserve_filehandles(4);
8400 # System process limit
8403 my $forktime = time - $time - $wait_time_for_getting_args;
8404 ::debug("run", "Time to fork $system_limit procs: ".
8405 $wait_time_for_getting_args, " ", $forktime,
8406 " (processes so far: ", $system_limit,")\n");
8407 if($system_limit > 10 and
8409 $forktime > $system_limit * 0.01) {
8410 # It took more than 0.01 second to fork a processes on avg.
8411 # Give the user a warning. He can press Ctrl-C if this
8414 "Starting $system_limit processes took > $forktime sec.",
8415 "Consider adjusting -j. Press CTRL-C to stop.");
8420 if($system_limit < $wanted_processes) {
8421 # The system_limit is less than the wanted_processes
8422 if($system_limit < 1 and not $Global::JobQueue->empty()) {
8423 ::warning("Cannot spawn any jobs.",
8424 "Try increasing 'ulimit -u' (try: ulimit -u `ulimit -Hu`)",
8425 "or increasing 'nproc' in /etc/security/limits.conf",
8426 "or increasing /proc/sys/kernel/pid_max");
8427 ::wait_and_exit(255);
8429 if(not $more_filehandles) {
8430 ::warning("Only enough file handles to run ".
8431 $system_limit. " jobs in parallel.",
8432 "Try running 'parallel -j0 -N $system_limit --pipe parallel -j0'",
8433 "or increasing 'ulimit -n' (try: ulimit -n `ulimit -Hn`)",
8434 "or increasing 'nofile' in /etc/security/limits.conf",
8435 "or increasing /proc/sys/fs/file-max");
8437 if($max_system_proc_reached) {
8438 ::warning("Only enough available processes to run ".
8439 $system_limit. " jobs in parallel.",
8440 "Try increasing 'ulimit -u' (try: ulimit -u `ulimit -Hu`)",
8441 "or increasing 'nproc' in /etc/security/limits.conf",
8442 "or increasing /proc/sys/kernel/pid_max");
8445 if($] == 5.008008 and $system_limit > 1000) {
8446 # https://savannah.gnu.org/bugs/?36942
8447 $system_limit = 1000;
8449 if($Global::JobQueue->empty()) {
8450 $system_limit ||= 1;
8452 if($self->string() ne ":" and
8453 $system_limit > $Global::default_simultaneous_sshlogins) {
8455 $self->simultaneous_sshlogin_limit($system_limit);
8457 return $system_limit;
8461 sub simultaneous_sshlogin_limit($) {
8462 # Test by logging in wanted number of times simultaneously
8464 # min($wanted_processes,$working_simultaneous_ssh_logins-1)
8466 my $wanted_processes = shift;
8467 if($self->{'time_to_login'}) {
8468 return $wanted_processes;
8471 # Try twice because it guesses wrong sometimes
8472 # Choose the minimal
8474 ::min($self->simultaneous_sshlogin($wanted_processes),
8475 $self->simultaneous_sshlogin($wanted_processes));
8476 if($ssh_limit < $wanted_processes) {
8477 my $serverlogin = $self->string();
8478 ::warning("ssh to $serverlogin only allows ".
8479 "for $ssh_limit simultaneous logins.",
8480 "You may raise this by changing",
8481 "/etc/ssh/sshd_config:MaxStartups and MaxSessions on $serverlogin.",
8482 "You can also try --sshdelay 0.1",
8483 "Using only ".($ssh_limit-1)." connections ".
8484 "to avoid race conditions.");
8485 # Race condition can cause problem if using all sshs.
8486 if($ssh_limit > 1) { $ssh_limit -= 1; }
8491 sub simultaneous_sshlogin($) {
8492 # Using $sshlogin try to see if we can do $wanted_processes
8493 # simultaneous logins
8494 # (ssh host echo simul-login & ssh host echo simul-login & ...) |
8497 # $wanted_processes = Try for this many logins in parallel
8499 # $ssh_limit = Number of succesful parallel logins
8502 my $wanted_processes = shift;
8503 my $sshdelay = $opt::sshdelay ? "sleep $opt::sshdelay;" : "";
8504 # TODO sh -c wrapper to work for csh
8505 my $cmd = ($sshdelay.$self->wrap("echo simultaneouslogin").
8506 "</dev/null 2>&1 &")x$wanted_processes;
8507 ::debug("init","Trying $wanted_processes logins at ".$self->string()."\n");
8508 open (my $simul_fh, "-|", "($cmd)|grep simultaneouslogin | wc -l") or
8509 ::die_bug("simultaneouslogin");
8510 my $ssh_limit = <$simul_fh>;
8518 $self->{'ncpus'} = shift;
8521 sub user_requested_processes($) {
8522 # Parse the number of processes that the user asked for using -j
8524 # $opt_P = string formatted as for -P
8526 # $processes = the number of processes to run on this sshlogin
8530 if(defined $opt_P) {
8532 $Global::max_procs_file = $opt_P;
8533 my $opt_P_file = ::slurp_or_exit($Global::max_procs_file);
8534 if($opt_P_file !~ /\S/) {
8535 ::warning_once("$Global::max_procs_file is empty. ".
8537 $opt_P_file = "100%";
8539 $processes = $self->user_requested_processes($opt_P_file);
8542 # -P 0 = infinity (or at least close)
8543 $processes = $Global::infinity;
8546 $opt_P =~ s/^([-+])/\$self->ncpus()$1/;
8548 $opt_P =~ s:%$:*\$self->ncpus()/100:;
8549 $processes = eval $opt_P;
8550 if($processes <= 0) {
8556 $processes = ::ceil($processes);
8562 # Number of CPU threads
8563 # --use_sockets_instead_of_threads = count socket instead
8564 # --use_cores_instead_of_threads = count physical cores instead
8566 # $ncpus = number of cpu (threads) on this sshlogin
8569 if(not defined $self->{'ncpus'}) {
8570 if($self->local()) {
8571 if($opt::use_sockets_instead_of_threads) {
8572 $self->{'ncpus'} = socket_core_thread()->{'sockets'};
8573 } elsif($opt::use_cores_instead_of_threads) {
8574 $self->{'ncpus'} = socket_core_thread()->{'cores'};
8576 $self->{'ncpus'} = socket_core_thread()->{'threads'};
8580 $ENV{'SSHPASS'} = $self->{'password'};
8581 ::debug("init",("echo | ".$self->wrap("parallel --number-of-sockets")));
8582 if($opt::use_sockets_instead_of_threads
8584 $opt::use_cpus_instead_of_cores) {
8585 $ncpu = ::qqx("echo | ".$self->wrap("parallel --number-of-sockets"));
8586 } elsif($opt::use_cores_instead_of_threads) {
8587 $ncpu = ::qqx("echo | ".$self->wrap("parallel --number-of-cores"));
8589 $ncpu = ::qqx("echo | ".$self->wrap("parallel --number-of-threads"));
8592 if($ncpu =~ /^\s*[0-9]+\s*$/s) {
8593 $self->{'ncpus'} = $ncpu;
8595 ::warning("Could not figure out ".
8596 "number of cpus on ".$self->string." ($ncpu). Using 1.");
8597 $self->{'ncpus'} = 1;
8601 return $self->{'ncpus'};
8607 # Number of threads using `nproc`
8608 my $no_of_threads = ::qqx("nproc");
8609 chomp $no_of_threads;
8610 return $no_of_threads;
8613 sub no_of_sockets() {
8614 return socket_core_thread()->{'sockets'};
8618 return socket_core_thread()->{'cores'};
8621 sub no_of_threads() {
8622 return socket_core_thread()->{'threads'};
8625 sub socket_core_thread() {
8628 # 'sockets' => #sockets = number of socket with CPU present
8629 # 'cores' => #cores = number of physical cores
8630 # 'threads' => #threads = number of compute cores (hyperthreading)
8631 # 'active' => #taskset_threads = number of taskset limited cores
8634 if ($^O eq 'linux') {
8635 $cpu = sct_gnu_linux($cpu);
8636 } elsif ($^O eq 'android') {
8637 $cpu = sct_android($cpu);
8638 } elsif ($^O eq 'freebsd') {
8639 $cpu = sct_freebsd($cpu);
8640 } elsif ($^O eq 'netbsd') {
8641 $cpu = sct_netbsd($cpu);
8642 } elsif ($^O eq 'openbsd') {
8643 $cpu = sct_openbsd($cpu);
8644 } elsif ($^O eq 'gnu') {
8645 $cpu = sct_hurd($cpu);
8646 } elsif ($^O eq 'darwin') {
8647 $cpu = sct_darwin($cpu);
8648 } elsif ($^O eq 'solaris') {
8649 $cpu = sct_solaris($cpu);
8650 } elsif ($^O eq 'aix') {
8651 $cpu = sct_aix($cpu);
8652 } elsif ($^O eq 'hpux') {
8653 $cpu = sct_hpux($cpu);
8654 } elsif ($^O eq 'nto') {
8655 $cpu = sct_qnx($cpu);
8656 } elsif ($^O eq 'svr5') {
8657 $cpu = sct_openserver($cpu);
8658 } elsif ($^O eq 'irix') {
8659 $cpu = sct_irix($cpu);
8660 } elsif ($^O eq 'dec_osf') {
8661 $cpu = sct_tru64($cpu);
8663 # Try all methods until we find something that works
8664 $cpu = (sct_gnu_linux($cpu)
8665 || sct_android($cpu)
8666 || sct_freebsd($cpu)
8668 || sct_openbsd($cpu)
8671 || sct_solaris($cpu)
8675 || sct_openserver($cpu)
8681 # Fall back: Set all to nproc
8682 my $nproc = nproc();
8692 ::warning("Cannot figure out number of cpus. Using 1.");
8699 $cpu->{'sockets'} ||= 1;
8700 $cpu->{'threads'} ||= $cpu->{'cores'};
8701 $cpu->{'active'} ||= $cpu->{'threads'};
8702 chomp($cpu->{'sockets'},
8706 # Choose minimum of active and actual
8708 $mincpu->{'sockets'} = ::min($cpu->{'sockets'},$cpu->{'active'});
8709 $mincpu->{'cores'} = ::min($cpu->{'cores'},$cpu->{'active'});
8710 $mincpu->{'threads'} = ::min($cpu->{'threads'},$cpu->{'active'});
8714 sub sct_gnu_linux($) {
8716 # { 'sockets' => #sockets
8718 # 'threads' => #threads
8719 # 'active' => #taskset_threads }
8722 sub read_topology($) {
8728 -r "$prefix/cpu$thread/topology/physical_package_id";
8730 $socket{::slurp_or_exit(
8731 "$prefix/cpu$thread/topology/physical_package_id")}++;
8734 -r "$prefix/cpu$thread/topology/thread_siblings";
8736 $sibiling{::slurp_or_exit(
8737 "$prefix/cpu$thread/topology/thread_siblings")}++;
8739 $cpu->{'sockets'} = keys %socket;
8740 $cpu->{'cores'} = keys %sibiling;
8741 $cpu->{'threads'} = $thread;
8744 sub read_cpuinfo(@) {
8746 $cpu->{'sockets'} = 0;
8747 $cpu->{'cores'} = 0;
8748 $cpu->{'threads'} = 0;
8754 if(/^physical id.*[:](.*)/) {
8756 if(not $phy_seen{$1}++) {
8757 $cpu->{'sockets'}++;
8761 if(/^core id.*[:](.*)/ and not $seen{$physicalid,$1}++) {
8765 /^processor.*[:]\s*\d/i and $cpu->{'threads'}++;
8767 $cpu->{'cores'} ||= $cpu->{'threads'};
8768 $cpu->{'cpus'} ||= $cpu->{'threads'};
8769 $cpu->{'sockets'} ||= 1;
8774 my $threads_per_core;
8775 my $cores_per_socket;
8777 /^CPU.s.:\s*(\d+)/ and $cpu->{'threads'} = $1;
8778 /^Thread.s. per core:\s*(\d+)/ and $threads_per_core = $1;
8779 /^Core.s. per socket:\s*(\d+)/ and $cores_per_socket = $1;
8780 /^(CPU )?Socket.s.:\s*(\d+)/i and $cpu->{'sockets'} = $2;
8782 if($cores_per_socket and $cpu->{'sockets'}) {
8783 $cpu->{'cores'} = $cores_per_socket * $cpu->{'sockets'};
8785 if($threads_per_core and $cpu->{'cores'}) {
8786 $cpu->{'threads'} = $threads_per_core * $cpu->{'cores'};
8788 if($threads_per_core and $cpu->{'threads'}) {
8789 $cpu->{'cores'} = $cpu->{'threads'} / $threads_per_core;
8791 $cpu->{'cpus'} ||= $cpu->{'threads'};
8794 local $/ = "\n"; # If delimiter is set, then $/ will be wrong
8797 if($ENV{'PARALLEL_CPUINFO'}) {
8798 # Use CPUINFO from environment - used for testing only
8799 read_cpuinfo(split/(?<=\n)/,$ENV{'PARALLEL_CPUINFO'});
8800 } elsif($ENV{'PARALLEL_LSCPU'}) {
8801 # Use LSCPU from environment - used for testing only
8802 read_lscpu(split/(?<=\n)/,$ENV{'PARALLEL_LSCPU'});
8803 } elsif(-r "$ENV{'PARALLEL_CPUPREFIX'}/cpu0/topology/thread_siblings") {
8804 # Use CPUPREFIX from environment - used for testing only
8805 read_topology($ENV{'PARALLEL_CPUPREFIX'});
8806 } elsif($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'}) {
8807 # Skip /proc/cpuinfo - already set
8809 # Not debugging: Look at this computer
8810 if(!($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'})
8812 open(my $in_fh, "-|", "lscpu")) {
8813 # Parse output from lscpu
8814 read_lscpu(<$in_fh>);
8817 if(!($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'})
8819 -r "/sys/devices/system/cpu/cpu0/topology/thread_siblings") {
8820 read_topology("/sys/devices/system/cpu");
8822 if(!($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'})
8824 open(my $in_fh, "<", "/proc/cpuinfo")) {
8825 # Read /proc/cpuinfo
8826 read_cpuinfo(<$in_fh>);
8830 if(-e "/proc/self/status"
8831 and not $ENV{'PARALLEL_CPUINFO'}
8832 and not $ENV{'PARALLEL_LSCPU'}) {
8833 # if 'taskset' is used to limit number of threads
8834 if(open(my $in_fh, "<", "/proc/self/status")) {
8836 if(/^Cpus_allowed:\s*(\S+)/) {
8839 $cpu->{'active'} = unpack ("%32b*", pack ("H*",$a));
8848 sub sct_android($) {
8850 # { 'sockets' => #sockets
8852 # 'threads' => #threads
8853 # 'active' => #taskset_threads }
8855 return sct_gnu_linux($_[0]);
8858 sub sct_freebsd($) {
8860 # { 'sockets' => #sockets
8862 # 'threads' => #threads
8863 # 'active' => #taskset_threads }
8867 (::qqx(qq{ sysctl -a dev.cpu | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' })
8869 ::qqx(qq{ sysctl hw.ncpu | awk '{ print \$2 }' }));
8870 $cpu->{'threads'} ||=
8871 (::qqx(qq{ sysctl hw.ncpu | awk '{ print \$2 }' })
8873 ::qqx(qq{ sysctl -a dev.cpu | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' }));
8879 # { 'sockets' => #sockets
8881 # 'threads' => #threads
8882 # 'active' => #taskset_threads }
8885 $cpu->{'cores'} ||= ::qqx("sysctl -n hw.ncpu");
8889 sub sct_openbsd($) {
8891 # { 'sockets' => #sockets
8893 # 'threads' => #threads
8894 # 'active' => #taskset_threads }
8897 $cpu->{'cores'} ||= ::qqx('sysctl -n hw.ncpu');
8903 # { 'sockets' => #sockets
8905 # 'threads' => #threads
8906 # 'active' => #taskset_threads }
8909 $cpu->{'cores'} ||= ::qqx("nproc");
8915 # { 'sockets' => #sockets
8917 # 'threads' => #threads
8918 # 'active' => #taskset_threads }
8922 (::qqx('sysctl -n hw.physicalcpu')
8924 ::qqx(qq{ sysctl -a hw | grep [^a-z]physicalcpu[^a-z] | awk '{ print \$2 }' }));
8925 $cpu->{'threads'} ||=
8926 (::qqx('sysctl -n hw.logicalcpu')
8928 ::qqx(qq{ sysctl -a hw | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }' }));
8932 sub sct_solaris($) {
8934 # { 'sockets' => #sockets
8936 # 'threads' => #threads
8937 # 'active' => #taskset_threads }
8940 if(not $cpu->{'cores'}) {
8941 if(-x "/usr/bin/kstat") {
8942 my @chip_id = ::qqx("/usr/bin/kstat cpu_info|grep chip_id");
8943 if($#chip_id >= 0) {
8944 $cpu->{'sockets'} ||= $#chip_id +1;
8946 my @core_id = ::qqx("/usr/bin/kstat -m cpu_info|grep -w core_id|uniq");
8947 if($#core_id >= 0) {
8948 $cpu->{'cores'} ||= $#core_id +1;
8951 if(-x "/usr/sbin/psrinfo") {
8952 my @psrinfo = ::qqx("/usr/sbin/psrinfo -p");
8953 if($#psrinfo >= 0) {
8954 $cpu->{'sockets'} ||= $psrinfo[0];
8957 if(-x "/usr/sbin/prtconf") {
8958 my @prtconf = ::qqx("/usr/sbin/prtconf | grep cpu..instance");
8959 if($#prtconf >= 0) {
8960 $cpu->{'cores'} ||= $#prtconf +1;
8969 # { 'sockets' => #sockets
8971 # 'threads' => #threads
8972 # 'active' => #taskset_threads }
8975 if(not $cpu->{'cores'}) {
8976 if(-x "/usr/sbin/lscfg") {
8977 if(open(my $in_fh, "-|",
8978 "/usr/sbin/lscfg -vs |grep proc | wc -l|tr -d ' '")) {
8979 $cpu->{'cores'} = <$in_fh>;
8984 if(not $cpu->{'threads'}) {
8985 if(-x "/usr/bin/vmstat") {
8986 if(open(my $in_fh, "-|", "/usr/bin/vmstat 1 1")) {
8988 /lcpu=([0-9]*) / and $cpu->{'threads'} = $1;
8999 # { 'sockets' => #sockets
9001 # 'threads' => #threads
9002 # 'active' => #taskset_threads }
9006 ::qqx(qq{ /usr/bin/mpsched -s 2>&1 | grep 'Locality Domain Count' | awk '{ print \$4 }'});
9007 $cpu->{'threads'} ||=
9008 ::qqx(qq{ /usr/bin/mpsched -s 2>&1 | perl -ne '/Processor Count\\D+(\\d+)/ and print "\$1"'});
9014 # { 'sockets' => #sockets
9016 # 'threads' => #threads
9017 # 'active' => #taskset_threads }
9020 # BUG: It is not known how to calculate this.
9025 sub sct_openserver($) {
9027 # { 'sockets' => #sockets
9029 # 'threads' => #threads
9030 # 'active' => #taskset_threads }
9033 if(not $cpu->{'cores'}) {
9034 if(-x "/usr/sbin/psrinfo") {
9035 my @psrinfo = ::qqx("/usr/sbin/psrinfo");
9036 if($#psrinfo >= 0) {
9037 $cpu->{'cores'} = $#psrinfo +1;
9041 $cpu->{'sockets'} ||= $cpu->{'cores'};
9047 # { 'sockets' => #sockets
9049 # 'threads' => #threads
9050 # 'active' => #taskset_threads }
9054 ::qqx(qq{ hinv | grep HZ | grep Processor | awk '{print \$1}' });
9060 # { 'sockets' => #sockets
9062 # 'threads' => #threads
9063 # 'active' => #taskset_threads }
9066 $cpu->{'cores'} ||= ::qqx("sizer -pr");
9067 $cpu->{'sockets'} ||= $cpu->{'cores'};
9068 $cpu->{'threads'} ||= $cpu->{'cores'};
9075 # $sshcommand = the command (incl options) to run when using ssh
9077 if (not defined $self->{'sshcommand'}) {
9078 ::die_bug("sshcommand not set");
9080 return $self->{'sshcommand'};
9085 return $self->{'local'};
9088 sub control_path_dir($) {
9090 # $control_path_dir = dir of control path (for -M)
9092 if(not defined $self->{'control_path_dir'}) {
9093 $self->{'control_path_dir'} =
9094 # Use $ENV{'TMPDIR'} as that is typically not
9096 # The file system must support UNIX domain sockets
9097 File::Temp::tempdir($ENV{'TMPDIR'}
9101 return $self->{'control_path_dir'};
9104 sub rsync_transfer_cmd($) {
9105 # Command to run to transfer a file
9107 # $file = filename of file to transfer
9108 # $workdir = destination dir
9110 # $cmd = rsync command to run to transfer $file ("" if unreadable)
9113 my $workdir = shift;
9115 ::warning($file. " is not readable and will not be transferred.");
9119 my $relpath = ($file !~ m:^/:) || ($file =~ m:/\./:); # Is the path relative or /./?
9121 $rsync_destdir = ::shell_quote_file($workdir);
9124 $rsync_destdir = "/";
9126 $file = ::shell_quote_file($file);
9127 # Make dir if it does not exist
9128 return($self->wrap("mkdir -p $rsync_destdir") . " && " .
9129 $self->rsync()." $file ".$self->{'host'}.":$rsync_destdir");
9137 # rsync 3.1.x uses protocol 31 which is unsupported by 2.5.7.
9138 # If the version >= 3.1.0: downgrade to protocol 30
9139 # rsync 3.2.4 introduces a quoting bug: Add --old-args for that
9141 # $rsync = "rsync" or "rsync --protocol 30 --old-args"
9143 if(not $rsync_version) {
9144 my @out = `rsync --version`;
9146 if(::which("rsync")) {
9147 ::die_bug("'rsync --version' gave no output.");
9149 ::error("'rsync' is not in \$PATH.");
9150 ::wait_and_exit(255);
9154 # rsync version 3.1.3 protocol version 31
9155 # rsync version v3.2.3 protocol version 31
9156 if(/version v?(\d+)\.(\d+)(\.(\d+))?/) {
9158 $rsync_version = sprintf "%02d.%02d%02d",$1,$2,$4;
9162 ::die_bug("Cannot figure out version of rsync: @out");
9167 # rsync 3.1.x uses protocol 31 which is unsupported by 2.5.7.
9168 # If the version >= 3.1.0: downgrade to protocol 30
9170 # $rsync = "rsync" or "rsync --protocol 30"
9171 if(not $rsync_fix) {
9173 if($rsync_version >= 3.01) {
9174 # Version 3.1.0 or later: Downgrade to protocol 30
9175 $rsync_fix .= " --protocol 30";
9177 if($rsync_version >= 3.0204) {
9178 # Version 3.2.4 .. 3.2.8: --old-args
9179 $rsync_fix .= " --old-args";
9186 return "rsync".rsync_fixup()." ".$ENV{'PARALLEL_RSYNC_OPTS'}.
9187 " -e".::Q($self->sshcmd());
9191 sub cleanup_cmd($$$) {
9192 # Command to run to remove the remote file
9194 # $file = filename to remove
9195 # $workdir = destination dir
9197 # $cmd = ssh command to run to remove $file and empty parent dirs
9200 my $workdir = shift;
9203 # foo/bar/./baz/quux => workdir/baz/quux
9204 # /foo/bar/./baz/quux => workdir/baz/quux
9205 $f =~ s:.*/\./:$workdir/:;
9206 } elsif($f =~ m:^[^/]:) {
9207 # foo/bar => workdir/foo/bar
9208 $f = $workdir."/".$f;
9210 my @subdirs = split m:/:, ::dirname($f);
9215 unshift @rmdir, ::shell_quote_file($dir);
9217 my $rmdir = @rmdir ? "rmdir @rmdir 2>/dev/null;" : "";
9218 if(defined $opt::workdir and $opt::workdir eq "...") {
9219 $rmdir .= "rm -rf " . ::shell_quote_file($workdir).';';
9222 ::Q("rm -f ".::shell_quote_file($f)." 2>/dev/null;".$rmdir);
9223 return $self->wrap(::Q($rmf));
9230 my $commandref = shift;
9231 my $read_from = shift;
9232 my $context_replace = shift;
9233 my $max_number_of_args = shift;
9234 my $transfer_files = shift;
9235 my $return_files = shift;
9236 my $template_names = shift;
9237 my $template_contents = shift;
9238 my $commandlinequeue = CommandLineQueue->new
9239 ($commandref, $read_from, $context_replace, $max_number_of_args,
9240 $transfer_files, $return_files, $template_names, $template_contents);
9244 'commandlinequeue' => $commandlinequeue,
9246 'total_jobs' => undef,
9247 }, ref($class) || $class;
9253 $self->{'this_job_no'}++;
9254 if(@{$self->{'unget'}}) {
9255 my $job = shift @{$self->{'unget'}};
9256 # {%} may have changed, so flush computed values
9257 $job && $job->flush_cache();
9260 my $commandline = $self->{'commandlinequeue'}->get();
9261 if(defined $commandline) {
9262 return Job->new($commandline);
9264 $self->{'this_job_no'}--;
9272 unshift @{$self->{'unget'}}, @_;
9273 $self->{'this_job_no'} -= @_;
9278 my $empty = (not @{$self->{'unget'}}) &&
9279 $self->{'commandlinequeue'}->empty();
9280 ::debug("run", "JobQueue->empty $empty ");
9286 if(not defined $self->{'total_jobs'}) {
9287 if($opt::pipe and not $opt::tee) {
9288 ::error("--pipe is incompatible with --eta/--bar/--shuf");
9289 ::wait_and_exit(255);
9291 if($opt::totaljobs) {
9292 $self->{'total_jobs'} = $opt::totaljobs;
9293 } elsif($opt::sqlworker) {
9294 $self->{'total_jobs'} = $Global::sql->total_jobs();
9298 my $record_queue = $self->{'commandlinequeue'}{'arg_queue'};
9300 while($record = $record_queue->get()) {
9301 push @arg_records, $record;
9302 if(time - $start > 10) {
9303 ::warning("Reading ".scalar(@arg_records).
9304 " arguments took longer than 10 seconds.");
9305 $opt::eta && ::warning("Consider removing --eta.");
9306 $opt::bar && ::warning("Consider removing --bar.");
9307 $opt::shuf && ::warning("Consider removing --shuf.");
9311 while($record = $record_queue->get()) {
9312 push @arg_records, $record;
9314 if($opt::shuf and @arg_records) {
9315 my $i = @arg_records;
9317 my $j = int rand($i+1);
9318 @arg_records[$i,$j] = @arg_records[$j,$i];
9321 $record_queue->unget(@arg_records);
9322 # $#arg_records = number of args - 1
9323 # We have read one @arg_record for this job (so add 1 more)
9324 my $num_args = $#arg_records + 2;
9325 # This jobs is not started so -1
9326 my $started_jobs = $self->{'this_job_no'} - 1;
9327 my $max_args = ::max($Global::max_number_of_args,1);
9328 $self->{'total_jobs'} = ::ceil($num_args / $max_args)
9330 ::debug("init","Total jobs: ".$self->{'total_jobs'}.
9331 " ($num_args/$max_args + $started_jobs)\n");
9334 return $self->{'total_jobs'};
9337 sub flush_total_jobs($) {
9338 # Unset total_jobs to force recomputing
9340 ::debug("init","flush Total jobs: ");
9341 $self->{'total_jobs'} = undef;
9347 return $self->{'commandlinequeue'}->seq();
9352 return $self->{'commandlinequeue'}->quote_args();
9360 my $commandlineref = shift;
9362 'commandline' => $commandlineref, # CommandLine object
9363 'workdir' => undef, # --workdir
9364 # filehandle for stdin (used for --pipe)
9365 # filename for writing stdout to (used for --files)
9366 # remaining data not sent to stdin (used for --pipe)
9367 # tmpfiles to cleanup when job is done
9369 # amount of data sent via stdin (used for --pipe)
9370 'transfersize' => 0, # size of files using --transfer
9371 'returnsize' => 0, # size of files using --return
9373 # hash of { SSHLogins => number of times the command failed there }
9375 'sshlogin' => undef,
9376 # The commandline wrapped with rsync and ssh
9377 'sshlogin_wrap' => undef,
9378 'exitstatus' => undef,
9379 'exitsignal' => undef,
9380 # Timestamp for timeout if any
9383 # Output used for SQL and CSV-output
9384 'output' => { 1 => [], 2 => [] },
9385 'halfline' => { 1 => [], 2 => [] },
9386 }, ref($class) || $class;
9389 sub flush_cache($) {
9391 $self->{'commandline'}->flush_cache();
9396 $self->{'commandline'} or ::die_bug("commandline empty");
9397 return $self->{'commandline'}->replaced();
9401 my $next_available_row;
9405 if(not defined $self->{'row'}) {
9406 if($opt::keeporder) {
9407 $self->{'row'} = $self->seq();
9409 $self->{'row'} = ++$next_available_row;
9412 return $self->{'row'};
9418 return $self->{'commandline'}->seq();
9423 return $self->{'commandline'}->set_seq(shift);
9428 return $self->{'commandline'}->slot();
9433 push @Global::slots, $self->slot();
9441 # $cattail = perl program for:
9442 # cattail "decomp-prg" wpid [file_stdin] [file_to_unlink]
9443 # decomp-prg = decompress program
9444 # wpid = pid of writer program
9445 # file_stdin = file_to_decompress
9446 # file_to_unlink = unlink this file
9449 # cat followed by tail (possibly with rm as soon at the file is opened)
9450 # If $writerpid dead: finish after this round
9454 my ($comfile, $cmd, $writerpid, $read_file, $unlink_file) = @ARGV;
9456 open(IN,"<",$read_file) || die("cattail: Cannot open $read_file");
9460 while(! -s $comfile) {
9461 # Writer has not opened the buffer file, so we cannot remove it yet
9462 $sleep = ($sleep < 30) ? ($sleep * 1.001 + 0.01) : ($sleep);
9465 # The writer and we have both opened the file, so it is safe to unlink it
9466 unlink $unlink_file;
9469 my $first_round = 1;
9471 fcntl(IN, F_GETFL, $flags) || die $!; # Get the current flags on the filehandle
9472 $flags |= O_NONBLOCK; # Add non-blocking to the flags
9473 fcntl(IN, F_SETFL, $flags) || die $!; # Set the flags on the filehandle
9478 my $writer_running = kill 0, $writerpid;
9479 $read = sysread(IN,$buf,131072);
9482 # Only start the command if there any input to process
9484 open(OUT,"|-",$cmd) || die("cattail: Cannot run $cmd");
9489 my $bytes_written = syswrite(OUT,$buf);
9490 # syswrite may be interrupted by SIGHUP
9491 substr($buf,0,$bytes_written) = "";
9493 # Something printed: Wait less next time
9496 if(eof(IN) and not $writer_running) {
9497 # Writer dead: There will never be sent more to the decompressor
9501 # TODO This could probably be done more efficiently using select(2)
9502 # Nothing read: Wait longer before next read
9503 # Up to 100 milliseconds
9504 $sleep = ($sleep < 100) ? ($sleep * 1.001 + 0.01) : ($sleep);
9510 # Sleep this many milliseconds.
9512 select(undef, undef, undef, $secs/1000);
9515 $cattail =~ s/#.*//mg;
9516 $cattail =~ s/\s+/ /g;
9522 sub openoutputfiles($) {
9523 # Open files for STDOUT and STDERR
9524 # Set file handles in $self->fh
9526 my ($outfhw, $errfhw, $outname, $errname);
9528 if($opt::latestline) {
9529 # Do not save to files: Use non-blocking pipe
9530 my ($outfhr, $errfhr);
9531 pipe($outfhr, $outfhw) || die;
9532 $self->set_fh(1,'w',$outfhw);
9533 $self->set_fh(2,'w',$outfhw);
9534 $self->set_fh(1,'r',$outfhr);
9535 $self->set_fh(2,'r',$outfhr);
9536 # Make it possible to read non-blocking from the pipe
9537 for my $fdno (1,2) {
9538 ::set_fh_non_blocking($self->fh($fdno,'r'));
9540 # Return immediately because we do not need setting filenames
9542 } elsif($Global::linebuffer and not
9543 ($opt::keeporder or $Global::files or $opt::results or
9544 $opt::compress or $opt::compress_program or
9545 $opt::decompress_program)) {
9546 # Do not save to files: Use non-blocking pipe
9547 my ($outfhr, $errfhr);
9548 pipe($outfhr, $outfhw) || die;
9549 pipe($errfhr, $errfhw) || die;
9550 $self->set_fh(1,'w',$outfhw);
9551 $self->set_fh(2,'w',$errfhw);
9552 $self->set_fh(1,'r',$outfhr);
9553 $self->set_fh(2,'r',$errfhr);
9554 # Make it possible to read non-blocking from the pipe
9555 for my $fdno (1,2) {
9556 ::set_fh_non_blocking($self->fh($fdno,'r'));
9558 # Return immediately because we do not need setting filenames
9560 } elsif($opt::results and not $Global::csvsep and not $Global::jsonout) {
9561 # If --results, but not --results *.csv/*.tsv
9562 my $out = $self->{'commandline'}->results_out();
9564 if($out eq $opt::results or $out =~ m:/$:) {
9565 # $opt::results = simple string or ending in /
9567 # prefix/name1/val1/name2/val2/seq
9568 $seqname = $out."seq";
9569 # prefix/name1/val1/name2/val2/stdout
9570 $outname = $out."stdout";
9571 # prefix/name1/val1/name2/val2/stderr
9572 $errname = $out."stderr";
9574 # $opt::results = replacement string not ending in /
9577 $errname = "$out.err";
9578 $seqname = "$out.seq";
9580 ::write_or_exit($seqname, $self->seq());
9581 $outfhw = ::open_or_exit("+>", $outname);
9582 $errfhw = ::open_or_exit("+>", $errname);
9583 $self->set_fh(1,"unlink","");
9584 $self->set_fh(2,"unlink","");
9585 if($opt::sqlworker) {
9586 # Save the filenames in SQL table
9587 $Global::sql->update("SET Stdout = ?, Stderr = ? ".
9588 "WHERE Seq = ". $self->seq(),
9589 $outname, $errname);
9591 } elsif(not $opt::ungroup) {
9592 # To group we create temporary files for STDOUT and STDERR
9593 # To avoid the cleanup unlink the files immediately (but keep them open)
9594 if($Global::files) {
9595 ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par");
9596 ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par");
9597 # --files => only remove stderr
9598 $self->set_fh(1,"unlink","");
9599 $self->set_fh(2,"unlink",$errname);
9601 ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par");
9602 ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par");
9603 $self->set_fh(1,"unlink",$outname);
9604 $self->set_fh(2,"unlink",$errname);
9608 open($outfhw,">&",$Global::fh{1}) || die;
9609 open($errfhw,">&",$Global::fh{2}) || die;
9610 # File name must be empty as it will otherwise be printed
9613 $self->set_fh(1,"unlink",$outname);
9614 $self->set_fh(2,"unlink",$errname);
9617 $self->set_fh(1,'w',$outfhw);
9618 $self->set_fh(2,'w',$errfhw);
9619 $self->set_fh(1,'name',$outname);
9620 $self->set_fh(2,'name',$errname);
9621 if($opt::compress) {
9622 $self->filter_through_compress();
9623 } elsif(not $opt::ungroup) {
9626 if($Global::linebuffer) {
9627 # Make it possible to read non-blocking from
9629 # Used for --linebuffer with -k, --files, --res, --compress*
9630 for my $fdno (1,2) {
9631 ::set_fh_non_blocking($self->fh($fdno,'r'));
9636 sub print_verbose_dryrun($) {
9637 # If -v set: print command to stdout (possibly buffered)
9638 # This must be done before starting the command
9640 if($Global::verbose or $opt::dryrun) {
9641 my $fh = $self->fh(1,"w");
9642 if($Global::verbose <= 1) {
9643 print $fh $self->replaced(),"\n";
9645 # Verbose level > 1: Print the rsync and stuff
9646 print $fh $self->wrapped(),"\n";
9649 if($opt::sqlworker) {
9650 $Global::sql->update("SET Command = ? WHERE Seq = ".$self->seq(),
9656 # Files to remove when job is done
9658 push @{$self->{'unlink'}}, @_;
9662 # Files to remove when job is done
9664 return @{$self->{'unlink'}};
9668 # Remove files when job is done
9670 unlink $self->get_rm();
9671 delete @Global::unlink{$self->get_rm()};
9676 # Set reading FD if using --group (--ungroup does not need)
9677 for my $fdno (1,2) {
9678 # Re-open the file for reading
9679 # so fdw can be closed seperately
9680 # and fdr can be seeked seperately (for --line-buffer)
9681 my $fdr = ::open_or_exit("<", $self->fh($fdno,'name'));
9682 $self->set_fh($fdno,'r',$fdr);
9683 # Unlink if not debugging
9684 $Global::debug or ::rm($self->fh($fdno,"unlink"));
9688 sub empty_input_wrapper($) {
9689 # If no input: exit(0)
9690 # If some input: Pass input as input to command on STDIN
9691 # This avoids starting the command if there is no input.
9693 # $command = command to pipe data to
9695 # $wrapped_command = the wrapped command
9696 my $command = shift;
9697 # The optimal block size differs
9698 # It has been measured on:
9700 # <big ppar --pipe --block 100M --test $1 -j1 'cat >/dev/null';
9703 if(sysread(STDIN, $buf, 1)) {
9704 open($fh, "|-", @ARGV) || die;
9705 syswrite($fh, $buf);
9706 while($read = sysread(STDIN, $buf, 59000)) {
9707 syswrite($fh, $buf);
9710 exit ($?&127 ? 128+($?&127) : 1+$?>>8)
9713 ::debug("run",'Empty wrap: perl -e '.::Q($script)."\n");
9716 length $command > 499) {
9717 # csh does not like words longer than 1000 (499 quoted)
9718 # $command = "perl -e '".base64_zip_eval()."' ".
9719 # join" ",string_zip_base64(
9720 # 'exec "'.::perl_quote_scalar($command).'"');
9721 return 'perl -e '.::Q($script)." ".
9722 base64_wrap("exec \"$Global::shell\",'-c',\"".
9723 ::perl_quote_scalar($command).'"');
9725 return 'perl -e '.::Q($script)." ".
9726 $Global::shell." -c ".::Q($command);
9730 sub filter_through_compress($) {
9732 # Send stdout to stdin for $opt::compress_program(1)
9733 # Send stderr to stdin for $opt::compress_program(2)
9734 # cattail get pid: $pid = $self->fh($fdno,'rpid');
9735 my $cattail = cattail();
9737 for my $fdno (1,2) {
9738 # Make a communication file.
9739 my ($fh, $comfile) = ::tmpfile(SUFFIX => ".pac");
9741 # Compressor: (echo > $comfile; compress pipe) > output
9742 # When the echo is written to $comfile,
9743 # it is known that output file is opened,
9744 # thus output file can then be removed by the decompressor.
9745 # empty_input_wrapper is needed for plzip
9746 my $qcom = ::Q($comfile);
9747 my $wpid = open(my $fdw,"|-", "(echo > $qcom; ".
9748 empty_input_wrapper($opt::compress_program).") >".
9749 ::Q($self->fh($fdno,'name'))) || die $?;
9750 $self->set_fh($fdno,'w',$fdw);
9751 $self->set_fh($fdno,'wpid',$wpid);
9752 # Decompressor: open output; -s $comfile > 0: rm $comfile output;
9753 # decompress output > stdout
9754 my $rpid = open(my $fdr, "-|", "perl", "-e", $cattail, $comfile,
9755 $opt::decompress_program, $wpid,
9756 $self->fh($fdno,'name'),$self->fh($fdno,'unlink'))
9758 $self->set_fh($fdno,'r',$fdr);
9759 $self->set_fh($fdno,'rpid',$rpid);
9765 my ($self, $fd_no, $key, $fh) = @_;
9766 $self->{'fd'}{$fd_no,$key} = $fh;
9771 my ($self, $fd_no, $key) = @_;
9772 return $self->{'fd'}{$fd_no,$key};
9775 sub write_block($) {
9777 my $stdin_fh = $self->fh(0,"w");
9782 # If writing is to a closed pipe:
9783 # Do not call signal handler, but let nothing be written
9784 local $SIG{PIPE} = undef;
9788 $self->{'header'},$self->{'block'}) {
9789 # syswrite may not write all in one go,
9790 # so make sure everything is written.
9792 while($written = syswrite($stdin_fh,$$part)) {
9793 substr($$part,0,$written) = "";
9803 my $remaining_ref = shift;
9804 my $stdin_fh = $self->fh(0,"w");
9806 my $len = length $$remaining_ref;
9807 # syswrite may not write all in one go,
9808 # so make sure everything is written.
9811 # If writing is to a closed pipe:
9812 # Do not call signal handler, but let nothing be written
9813 local $SIG{PIPE} = undef;
9814 while($written = syswrite($stdin_fh,$$remaining_ref)){
9815 substr($$remaining_ref,0,$written) = "";
9819 sub set_block($$$$$$) {
9820 # Copy stdin buffer from $block_ref up to $endpos
9821 # Prepend with $header_ref if virgin (i.e. not --roundrobin)
9822 # Remove $recstart and $recend if needed
9824 # $header_ref = ref to $header to prepend
9825 # $buffer_ref = ref to $buffer containing the block
9826 # $endpos = length of $block to pass on
9827 # $recstart = --recstart regexp
9828 # $recend = --recend regexp
9832 my ($header_ref,$buffer_ref,$endpos,$recstart,$recend) = @_;
9833 $self->{'header'} = $header_ref;
9834 if($opt::roundrobin or $opt::remove_rec_sep or defined $opt::retries) {
9836 if(($opt::roundrobin or defined $opt::retries) and $self->virgin()) {
9839 # Job is no longer virgin
9840 $self->set_virgin(0);
9841 # Make a full copy because $buffer will change
9842 $a .= substr($$buffer_ref,0,$endpos);
9843 $self->{'block'} = \$a;
9844 if($opt::remove_rec_sep) {
9845 remove_rec_sep($self->{'block'},$recstart,$recend);
9847 $self->{'block_length'} = length ${$self->{'block'}};
9849 $self->set_virgin(0);
9850 for(substr($$buffer_ref,0,$endpos)) {
9851 $self->{'block'} = \$_;
9853 $self->{'block_length'} = $endpos + length ${$self->{'header'}};
9855 $self->{'block_pos'} = 0;
9856 $self->add_transfersize($self->{'block_length'});
9861 return $self->{'block'};
9864 sub block_length($) {
9866 return $self->{'block_length'};
9869 sub remove_rec_sep($) {
9870 # Remove --recstart and --recend from $block
9872 # $block_ref = reference to $block to be modified
9873 # $recstart = --recstart
9874 # $recend = --recend
9876 # $opt::regexp = Are --recstart/--recend regexp?
9879 my ($block_ref,$recstart,$recend) = @_;
9880 # Remove record separator
9882 $$block_ref =~ s/$recend$recstart//gom;
9883 $$block_ref =~ s/^$recstart//os;
9884 $$block_ref =~ s/$recend$//os;
9886 $$block_ref =~ s/\Q$recend$recstart\E//gom;
9887 $$block_ref =~ s/^\Q$recstart\E//os;
9888 $$block_ref =~ s/\Q$recend\E$//os;
9892 sub non_blocking_write($) {
9894 my $something_written = 0;
9896 my $in = $self->fh(0,"w");
9897 my $rv = syswrite($in,
9898 substr(${$self->{'block'}},$self->{'block_pos'}));
9899 if (!defined($rv) && $! == ::EAGAIN()) {
9900 # would block - but would have written
9901 $something_written = 0;
9902 # avoid triggering auto expanding block size
9903 $Global::no_autoexpand_block ||= 1;
9904 } elsif ($self->{'block_pos'}+$rv != $self->{'block_length'}) {
9906 # Remove the written part
9907 $self->{'block_pos'} += $rv;
9908 $something_written = $rv;
9910 # successfully wrote everything
9911 # Empty block to free memory
9913 $self->set_block(\$a,\$a,0,"","");
9914 $something_written = $rv;
9916 ::debug("pipe", "Non-block: ", $something_written);
9917 return $something_written;
9923 return $self->{'virgin'};
9926 sub set_virgin($$) {
9928 $self->{'virgin'} = shift;
9933 return $self->{'pid'};
9938 $self->{'pid'} = shift;
9943 # UNIX-timestamp this job started
9945 return sprintf("%.3f",$self->{'starttime'});
9948 sub set_starttime($@) {
9950 my $starttime = shift || ::now();
9951 $self->{'starttime'} = $starttime;
9953 $Global::sql->update("SET Starttime = ? WHERE Seq = ".$self->seq(),
9959 # Run time in seconds with 3 decimals
9961 return sprintf("%.3f",
9962 int(($self->endtime() - $self->starttime())*1000)/1000);
9967 # UNIX-timestamp this job ended
9968 # 0 if not ended yet
9970 return ($self->{'endtime'} || 0);
9973 sub set_endtime($$) {
9975 my $endtime = shift;
9976 $self->{'endtime'} = $endtime;
9978 $Global::sql->update("SET JobRuntime = ? WHERE Seq = ".$self->seq(),
9982 sub is_timedout($) {
9983 # Is the job timedout?
9985 # $delta_time = time that the job may run
9989 my $delta_time = shift;
9990 return time > $self->{'starttime'} + $delta_time;
9995 $self->set_exitstatus(-1);
9996 ::kill_sleep_seq($self->pid());
10001 return $self->{'killreason'};
10004 sub set_killreason($) {
10006 $self->{'killreason'} = shift;
10011 my @pgrps = map { -$_ } $self->pid();
10012 kill "STOP", @pgrps;
10013 $self->set_suspended(1);
10016 sub set_suspended($$) {
10018 $self->{'suspended'} = shift;
10023 return $self->{'suspended'};
10028 my @pgrps = map { -$_ } $self->pid();
10029 kill "CONT", @pgrps;
10030 $self->set_suspended(0);
10034 # return number of times failed for this $sshlogin
10038 # Number of times failed for $sshlogin
10040 my $sshlogin = shift;
10041 return $self->{'failed'}{$sshlogin};
10044 sub failed_here($) {
10045 # return number of times failed for the current $sshlogin
10047 # Number of times failed for this sshlogin
10049 return $self->{'failed'}{$self->sshlogin()};
10052 sub add_failed($) {
10053 # increase the number of times failed for this $sshlogin
10055 my $sshlogin = shift;
10056 $self->{'failed'}{$sshlogin}++;
10059 sub add_failed_here($) {
10060 # increase the number of times failed for the current $sshlogin
10062 $self->{'failed'}{$self->sshlogin()}++;
10065 sub reset_failed($) {
10066 # increase the number of times failed for this $sshlogin
10068 my $sshlogin = shift;
10069 delete $self->{'failed'}{$sshlogin};
10072 sub reset_failed_here($) {
10073 # increase the number of times failed for this $sshlogin
10075 delete $self->{'failed'}{$self->sshlogin()};
10078 sub min_failed($) {
10080 # the number of sshlogins this command has failed on
10081 # the minimal number of times this command has failed
10084 ::min(map { $self->{'failed'}{$_} } keys %{$self->{'failed'}});
10085 my $number_of_sshlogins_failed_on = scalar keys %{$self->{'failed'}};
10086 return ($number_of_sshlogins_failed_on,$min_failures);
10089 sub total_failed($) {
10091 # $total_failures = the number of times this command has failed
10093 my $total_failures = 0;
10094 for (values %{$self->{'failed'}}) {
10095 $total_failures += $_;
10097 return $total_failures;
10103 sub postpone_exit_and_cleanup {
10104 # Command to remove files and dirs (given as args) without
10105 # affecting the exit value in $?/$status.
10107 $script = "perl -e '".
10115 if($bash=~s/(\d+)h/$1/) {
10120 # `echo \$?h` is needed to make fish not complain
10121 "' ".'"`echo \\\\\\\\\$?h`" "$status" ';
10131 # Script to create a fifo, run a command on the fifo
10132 # while copying STDIN to the fifo, and finally
10133 # remove the fifo and return the exit code of the command.
10135 # {} == $PARALLEL_TMP for --fifo
10136 # To make it csh compatible a wrapper needs to:
10138 # * spawn $command &
10140 # * waitpid to get the exit code from $command
10141 # * be less than 1000 chars long
10143 # The optimal block size differs
10144 # It has been measured on:
10146 # ppar -a big --pipepart --block -1 --test $1 --fifo 'cat {} >/dev/null';
10147 $script = "perl -e '".
10150 ($s,$c,$f) = @ARGV;
10151 # mkfifo $PARALLEL_TMP
10152 system "mkfifo", $f;
10153 # spawn $shell -c $command &
10154 $pid = fork || exec $s, "-c", $c;
10155 open($o,">",$f) || die $!;
10156 # cat > $PARALLEL_TMP
10157 while(sysread(STDIN,$buf,4095)){
10161 # waitpid to get the exit code from $command
10173 # Wrap command with:
10179 # * --pipepart (@Global::cat_prepends)
10180 # * --tee (@Global::cat_prepends)
10183 # The ordering of the wrapping is important:
10184 # * --nice/--cat/--fifo should be done on the remote machine
10185 # * --pipepart/--pipe should be done on the local machine inside --tmux
10192 # @Global::cat_prepends
10196 # $self->{'wrapped'} = the command wrapped with the above
10198 if(not defined $self->{'wrapped'}) {
10199 my $command = $self->replaced();
10200 # Bug in Bash and Ksh when running multiline aliases
10201 # This will force them to run correctly, but will fail in
10202 # tcsh so we do not do it.
10203 # $command .= "\n\n";
10204 if(@opt::shellquote) {
10205 # Quote one time for each --shellquote
10207 for(@opt::shellquote) {
10210 # Prepend "echo" (it is written in perl because
10211 # quoting '-e' causes problem in some versions and
10212 # csh's version does something wrong)
10213 $command = q(perl -e '$,=" "; print "@ARGV\n";' -- ) . ::Q($c);
10215 if($Global::parallel_env) {
10216 # If $PARALLEL_ENV set, put that in front of the command
10217 # Used for env_parallel.*
10218 if($Global::shell =~ /zsh/) {
10219 # The extra 'eval' will make aliases work, too
10220 $command = $Global::parallel_env."\n".
10221 "eval ".::Q($command);
10223 $command = $Global::parallel_env."\n".$command;
10227 # In '--cat' and '--fifo' {} == $PARALLEL_TMP.
10228 # This is to make it possible to compute $PARALLEL_TMP on
10229 # the fly when running remotely.
10230 # $ENV{PARALLEL_TMP} is set in the remote wrapper before
10231 # the command is run.
10233 # Prepend 'cat > $PARALLEL_TMP;'
10234 # Append 'unlink $PARALLEL_TMP without affecting $?'
10236 'cat > "$PARALLEL_TMP";'.
10237 $command.";". postpone_exit_and_cleanup().
10239 } elsif($opt::fifo) {
10240 # Prepend fifo-wrapper. In essence:
10243 # # $command must read {}, otherwise this 'cat' will block
10246 # without affecting $?
10247 $command = fifo_wrap(). " ".
10248 $Global::shell. " ". ::Q($command). ' "$PARALLEL_TMP"'. ';';
10250 # Wrap with ssh + tranferring of files
10251 $command = $self->sshlogin_wrap($command);
10252 if(@Global::cat_prepends) {
10253 # --pipepart: prepend:
10254 # < /tmp/foo perl -e 'while(@ARGV) {
10255 # sysseek(STDIN,shift,0) || die; $left = shift;
10256 # while($read = sysread(STDIN,$buf, ($left > 60800 ? 60800 : $left))){
10257 # $left -= $read; syswrite(STDOUT,$buf);
10261 # --pipepart --tee: prepend:
10264 # --pipe --tee: wrap:
10265 # (rm fifo; ... ) < fifo
10267 # --pipe --shard X:
10268 # (rm fifo; ... ) < fifo
10269 $command = (shift @Global::cat_prepends). "($command)".
10270 (shift @Global::cat_appends);
10271 } elsif($opt::pipe and not $opt::roundrobin) {
10272 # Wrap with EOF-detector to avoid starting $command if EOF.
10273 $command = empty_input_wrapper($command);
10276 # Wrap command with 'tmux'
10277 $command = $self->tmux_wrap($command);
10281 length $command > 499) {
10282 # csh does not like words longer than 1000 (499 quoted)
10283 # $command = "perl -e '".base64_zip_eval()."' ".
10284 # join" ",string_zip_base64(
10285 # 'exec "'.::perl_quote_scalar($command).'"');
10286 $command = base64_wrap("exec \"$Global::shell\",'-c',\"".
10287 ::perl_quote_scalar($command).'"');
10289 $self->{'wrapped'} = $command;
10291 return $self->{'wrapped'};
10294 sub set_sshlogin($$) {
10296 my $sshlogin = shift;
10297 $self->{'sshlogin'} = $sshlogin;
10298 delete $self->{'sshlogin_wrap'}; # If sshlogin is changed the wrap is wrong
10299 delete $self->{'wrapped'};
10301 if($opt::sqlworker) {
10302 # Identify worker as --sqlworker often runs on different machines
10303 # If local: Use hostname
10304 my $host = $sshlogin->local() ? ::hostname() : $sshlogin->host();
10305 $Global::sql->update("SET Host = ? WHERE Seq = ".$self->seq(), $host);
10311 return $self->{'sshlogin'};
10314 sub string_base64($) {
10315 # Base64 encode strings into 1000 byte blocks.
10316 # 1000 bytes is the largest word size csh supports
10318 # @strings = to be encoded
10320 # @base64 = 1000 byte block
10321 $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;";
10322 my @base64 = unpack("(A1000)*",encode_base64((join"",@_),""));
10326 sub string_zip_base64($) {
10327 # Pipe string through 'bzip2 -9' and base64 encode it into 1000
10329 # 1000 bytes is the largest word size csh supports
10330 # Zipping will make exporting big environments work, too
10332 # @strings = to be encoded
10334 # @base64 = 1000 byte block
10335 my($zipin_fh, $zipout_fh,@base64);
10336 ::open3($zipin_fh,$zipout_fh,">&STDERR","bzip2 -9");
10339 $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;";
10340 # Split base64 encoded into 1000 byte blocks
10341 @base64 = unpack("(A1000)*",encode_base64((join"",<$zipout_fh>),""));
10345 print $zipin_fh @_;
10349 ::debug("base64","Orig:@_\nAs bzip2 base64:@base64\n");
10353 sub base64_zip_eval() {
10355 # * reads base64 strings from @ARGV
10357 # * pipes through 'bzip2 -dc'
10358 # * evals the result
10359 # Reverse of string_zip_base64 + eval
10360 # Will be wrapped in ' so single quote is forbidden
10362 # $script = 1-liner for perl -e
10363 my $script = ::spacefree(0,q{
10364 @GNU_Parallel = split /_/, "use_IPC::Open3;_use_MIME::Base64";
10365 eval"@GNU_Parallel";
10366 $chld = $SIG{CHLD};
10367 $SIG{CHLD} = "IGNORE";
10368 # Search for bzip2. Not found => use default path
10369 my $zip = (grep { -x $_ } "/usr/local/bin/bzip2")[0] || "bzip2";
10370 # $in = stdin on $zip, $out = stdout from $zip
10371 # Forget my() to save chars for csh
10372 # my($in, $out,$eval);
10373 open3($in,$out,">&STDERR",$zip,"-dc");
10374 if(my $perlpid = fork) {
10376 $eval = join "", <$out>;
10380 # Pipe decoded base64 into 'bzip2 -dc'
10381 print $in (decode_base64(join"",@ARGV));
10386 $SIG{CHLD} = $chld;
10389 ::debug("base64",$script,"\n");
10393 sub base64_wrap($) {
10394 # base64 encode Perl code
10395 # Split it into chunks of < 1000 bytes
10396 # Prepend it with a decoder that eval's it
10398 # $eval_string = Perl code to run
10400 # $shell_command = shell command that runs $eval_string
10401 my $eval_string = shift;
10404 ::Q(base64_zip_eval())." ".
10405 join" ",::shell_quote(string_zip_base64($eval_string));
10408 sub base64_eval($) {
10410 # * reads base64 strings from @ARGV
10412 # * evals the result
10413 # Reverse of string_base64 + eval
10414 # Will be wrapped in ' so single quote is forbidden.
10415 # Spaces are stripped so spaces cannot be significant.
10416 # The funny 'use IPC::Open3'-syntax is to avoid spaces and
10417 # to make it clear that this is a GNU Parallel command
10418 # when looking at the process table.
10420 # $script = 1-liner for perl -e
10421 my $script = ::spacefree(0,q{
10422 @GNU_Parallel=("use","IPC::Open3;","use","MIME::Base64");
10423 eval "@GNU_Parallel";
10424 my $eval = decode_base64(join"",@ARGV);
10427 ::debug("base64",$script,"\n");
10431 sub sshlogin_wrap($) {
10432 # Wrap the command with the commands needed to run remotely
10434 # $command = command to run
10436 # $self->{'sshlogin_wrap'} = command wrapped with ssh+transfer commands
10437 sub monitor_parent_sshd_script {
10438 # This script is to solve the problem of
10439 # * not mixing STDERR and STDOUT
10440 # * terminating with ctrl-c
10441 # If its parent is ssh: all good
10442 # If its parent is init(1): ssh died, so kill children
10443 my $monitor_parent_sshd_script;
10445 if(not $monitor_parent_sshd_script) {
10446 $monitor_parent_sshd_script =
10447 # This will be packed in ', so only use "
10449 (0,'$shell = "'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'.
10450 '$tmpdir = $ENV{"TMPDIR"} || "'.
10451 ::perl_quote_scalar($ENV{'PARALLEL_REMOTE_TMPDIR'}).'";'.
10452 '$nice = '.$opt::nice.';'.
10453 '$termseq = "'.$opt::termseq.'";'.
10456 # Check that $tmpdir is writable
10458 die("$tmpdir\040is\040not\040writable.".
10459 "\040Set\040PARALLEL_REMOTE_TMPDIR");
10460 # Set $PARALLEL_TMP to a non-existent file name in $TMPDIR
10462 $ENV{PARALLEL_TMP} = $tmpdir."/par".
10463 join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
10464 } while(-e $ENV{PARALLEL_TMP});
10465 # Set $script to a non-existent file name in $TMPDIR
10467 $script = $tmpdir."/par-job-$ENV{PARALLEL_SEQ}_".
10468 join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
10469 } while(-e $script);
10470 # Create a script from the hex code
10471 # that removes itself and runs the commands
10472 open($fh,">",$script) || die;
10473 # \040 = space - but we remove spaces in the script
10474 # ' needed due to rc-shell
10475 print($fh("rm\040\'$script\'\n",$bashfunc.$cmd));
10477 my $parent = getppid;
10479 $SIG{CHLD} = sub { $done = 1; };
10482 # Make own process group to be able to kill HUP it later
10485 eval { setpriority(0,0,$nice) };
10487 exec($shell,$script);
10488 die("exec\040failed: $!");
10490 while((not $done) and (getppid == $parent)) {
10491 # Parent pid is not changed, so sshd is alive
10492 # Exponential sleep up to 1 sec
10493 $s = $s < 1 ? 0.001 + $s * 1.03 : $s;
10494 select(undef, undef, undef, $s);
10497 # sshd is dead: User pressed Ctrl-C
10498 # Kill as per --termseq
10499 my @term_seq = split/,/,$termseq;
10500 if(not @term_seq) {
10501 @term_seq = ("TERM",200,"TERM",100,"TERM",50,"KILL",25);
10503 while(@term_seq && kill(0,-$pid)) {
10504 kill(shift @term_seq, -$pid);
10505 select(undef, undef, undef, (shift @term_seq)/1000);
10509 exit ($?&127 ? 128+($?&127) : 1+$?>>8)
10512 return $monitor_parent_sshd_script;
10515 sub vars_to_export {
10518 my @vars = ("parallel_bash_environment");
10519 for my $varstring (@opt::env) {
10520 # Split up --env VAR1,VAR2
10521 push @vars, split /,/, $varstring;
10524 if(-r $_ and not -d) {
10525 # Read as environment definition bug #44041
10527 $Global::envdef = ::slurp_or_exit($_);
10530 if(grep { /^_$/ } @vars) {
10533 # Include all vars that are not in a clean environment
10534 if(open(my $vars_fh, "<", $Global::config_dir . "/ignored_vars")) {
10535 my @ignore = <$vars_fh>;
10538 @ignore{@ignore} = @ignore;
10540 push @vars, grep { not defined $ignore{$_} } keys %ENV;
10541 @vars = grep { not /^_$/ } @vars;
10543 ::error("Run '$Global::progname --record-env' ".
10544 "in a clean environment first.");
10545 ::wait_and_exit(255);
10548 # Duplicate vars as BASH functions to include post-shellshock functions (v1+v2)
10549 # So --env myfunc should look for BASH_FUNC_myfunc() and BASH_FUNC_myfunc%%
10551 push(@vars, "PARALLEL_PID", "PARALLEL_SEQ",
10552 "PARALLEL_SSHLOGIN", "PARALLEL_SSHHOST",
10553 "PARALLEL_HOSTGROUPS", "PARALLEL_ARGHOSTGROUPS",
10554 "PARALLEL_JOBSLOT", $opt::process_slot_var,
10555 map { ("BASH_FUNC_$_()", "BASH_FUNC_$_%%") } @vars);
10556 # Keep only defined variables
10557 return grep { defined($ENV{$_}) } @vars;
10562 # $eval = '$ENV{"..."}=...; ...'
10563 my @vars = vars_to_export();
10564 my $csh_friendly = not grep { /\n/ } @ENV{@vars};
10565 my @bash_functions = grep { substr($ENV{$_},0,4) eq "() {" } @vars;
10566 my @non_functions = (grep { !/PARALLEL_ENV/ }
10567 grep { substr($ENV{$_},0,4) ne "() {" } @vars);
10569 # eval of @envset will set %ENV
10570 my $envset = join"", map {
10571 '$ENV{"'.::perl_quote_scalar($_).'"}="'.
10572 ::perl_quote_scalar($ENV{$_}).'";'; } @non_functions;
10574 # running @bashfunc on the command line, will set the functions
10575 my @bashfunc = map {
10577 s/BASH_FUNC_(.*)(\(\)|%%)/$1/;
10578 "$_$ENV{$v};\nexport -f $_ 2> /dev/null;\n" } @bash_functions;
10579 # eval $bashfuncset will set $bashfunc
10582 # Functions are not supported for all shells
10583 if($Global::shell !~ m:(^|/)(ash|bash|rbash|zsh|rzsh|dash|ksh):) {
10584 ::warning("Shell functions may not be supported in $Global::shell.");
10587 '@bash_functions=qw('."@bash_functions".");".
10588 ::spacefree(1,'$shell="'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'.q{
10589 if($shell=~/csh/) {
10590 print STDERR "CSH/TCSH DO NOT SUPPORT newlines IN VARIABLES/FUNCTIONS. Unset @bash_functions\n";
10594 "\n".'$bashfunc = "'.::perl_quote_scalar("@bashfunc").'";';
10596 $bashfuncset = '$bashfunc = "";'
10598 if($ENV{'parallel_bash_environment'}) {
10599 $bashfuncset .= '$bashfunc .= "eval\ \"\$parallel_bash_environment\"\;";';
10601 ::debug("base64",$envset,$bashfuncset,"\n");
10602 return $csh_friendly,$envset,$bashfuncset;
10606 my $command = shift;
10607 # TODO test that *sh -c 'parallel --env' use *sh
10608 if(not defined $self->{'sshlogin_wrap'}{$command}) {
10609 my $sshlogin = $self->sshlogin();
10610 $ENV{'PARALLEL_SEQ'} = $self->seq();
10611 $ENV{$opt::process_slot_var} = -1 +
10612 ($ENV{'PARALLEL_JOBSLOT'} = $self->slot());
10613 $ENV{'PARALLEL_SSHLOGIN'} = $sshlogin->string();
10614 $ENV{'PARALLEL_SSHHOST'} = $sshlogin->host();
10615 if ($opt::hostgroups) {
10616 $ENV{'PARALLEL_HOSTGROUPS'} = join '+', $sshlogin->hostgroups();
10617 $ENV{'PARALLEL_ARGHOSTGROUPS'} = join '+', $self->hostgroups();
10619 $ENV{'PARALLEL_PID'} = $$;
10620 if($sshlogin->local()) {
10621 if($opt::workdir) {
10622 # Create workdir if needed. Then cd to it.
10623 my $wd = $self->workdir();
10624 if($opt::workdir eq "." or $opt::workdir eq "...") {
10625 # If $wd does not start with '/': Prepend $HOME
10626 $wd =~ s:^([^/]):$ENV{'HOME'}/$1:;
10628 ::mkdir_or_die($wd);
10630 if($opt::workdir eq "...") {
10631 $post = ";".exitstatuswrapper("rm -rf ".::Q($wd).";");
10634 $command = "cd ".::Q($wd)." || exit 255; " .
10638 # Prepend with environment setter, which sets functions in zsh
10639 my ($csh_friendly,$envset,$bashfuncset) = env_as_eval();
10640 my $perl_code = $envset.$bashfuncset.
10641 '@ARGV="'.::perl_quote_scalar($command).'";'.
10642 "exec\"$Global::shell\",\"-c\",\(\$bashfunc.\"\@ARGV\"\)\;die\"exec:\$\!\\n\"\;";
10643 if(length $perl_code > 999
10647 $command =~ /\n/) {
10648 # csh does not deal well with > 1000 chars in one word
10649 # csh does not deal well with $ENV with \n
10650 $self->{'sshlogin_wrap'}{$command} = base64_wrap($perl_code);
10652 $self->{'sshlogin_wrap'}{$command} = "perl -e ".::Q($perl_code);
10655 $self->{'sshlogin_wrap'}{$command} = $command;
10659 if($opt::workdir) {
10660 # Create remote workdir if needed. Then cd to it.
10661 my $wd = ::pQ($self->workdir());
10662 $pwd = qq{system("mkdir","-p","--","$wd"); chdir "$wd" ||}.
10663 qq{print(STDERR "parallel: Cannot chdir to $wd\\n") &&}.
10666 my ($csh_friendly,$envset,$bashfuncset) = env_as_eval();
10667 my $cmd = $command;
10668 # q// does not quote \, so we must do that
10669 $cmd =~ s/\\/\\\\/g;
10671 my $remote_command = $sshlogin->hexwrap
10672 ($pwd.$envset.$bashfuncset.'$cmd='."q\0".$cmd."\0;".
10673 monitor_parent_sshd_script());
10674 my ($pre,$post,$cleanup)=("","","");
10676 $pre .= $self->sshtransfer();
10678 $post .= $self->sshreturn();
10680 $post .= $self->sshcleanup();
10682 # We need to save the exit status of the job
10683 $post = exitstatuswrapper($post);
10685 $self->{'sshlogin_wrap'}{$command} =
10687 . $sshlogin->wrap($remote_command)
10692 return $self->{'sshlogin_wrap'}{$command};
10695 sub fill_templates($) {
10696 # Replace replacement strings in template(s)
10698 # @templates - File names of replaced templates
10701 if(%opt::template) {
10702 my @template_name =
10703 map { $self->{'commandline'}->replace_placeholders([$_],0,0) }
10704 @{$self->{'commandline'}{'template_names'}};
10705 ::debug("tmpl","Names: @template_name\n");
10706 for(my $i = 0; $i <= $#template_name; $i++) {
10708 ($template_name[$i],
10709 $self->{'commandline'}->
10710 replace_placeholders([$self->{'commandline'}
10711 {'template_contents'}[$i]],0,0));
10713 if($opt::cleanup) {
10714 $self->add_rm(@template_name);
10720 # Replace replacement strings in filter(s) and evaluate them
10722 # $run - 1=yes, undef=no
10726 for my $eval ($self->{'commandline'}->
10727 replace_placeholders(\@opt::filter,0,0)) {
10728 $run &&= eval $eval;
10730 $self->{'commandline'}{'skip'} ||= not $run;
10736 # Files to transfer
10737 # Non-quoted and with {...} substituted
10739 # @transfer - File names of files to transfer
10742 my $transfersize = 0;
10743 my @transfer = $self->{'commandline'}->
10744 replace_placeholders($self->{'commandline'}{'transfer_files'},0,0);
10748 $transfersize += (stat($_))[7];
10751 $self->add_transfersize($transfersize);
10755 sub transfersize($) {
10757 return $self->{'transfersize'};
10760 sub add_transfersize($) {
10762 my $transfersize = shift;
10763 $self->{'transfersize'} += $transfersize;
10764 $opt::sqlworker and
10765 $Global::sql->update("SET Send = ? WHERE Seq = ".$self->seq(),
10766 $self->{'transfersize'});
10769 sub sshtransfer($) {
10770 # Returns for each transfer file:
10771 # rsync $file remote:$workdir
10774 my $sshlogin = $self->sshlogin();
10775 my $workdir = $self->workdir();
10776 for my $file ($self->transfer()) {
10777 push @pre, $sshlogin->rsync_transfer_cmd($file,$workdir).";";
10779 return join("",@pre);
10784 # Non-quoted and with {...} substituted
10786 # @non_quoted_filenames
10788 return $self->{'commandline'}->
10789 replace_placeholders($self->{'commandline'}{'return_files'},0,0);
10792 sub returnsize($) {
10793 # This is called after the job has finished
10795 # $number_of_bytes transferred in return
10797 for my $file ($self->return()) {
10799 $self->{'returnsize'} += (stat($file))[7];
10802 return $self->{'returnsize'};
10805 sub add_returnsize($) {
10807 my $returnsize = shift;
10808 $self->{'returnsize'} += $returnsize;
10809 $opt::sqlworker and
10810 $Global::sql->update("SET Receive = ? WHERE Seq = ".$self->seq(),
10811 $self->{'returnsize'});
10815 # Returns for each return-file:
10816 # rsync remote:$workdir/$file .
10818 my $sshlogin = $self->sshlogin();
10820 for my $file ($self->return()) {
10821 $file =~ s:^\./::g; # Remove ./ if any
10822 my $relpath = ($file !~ m:^/:) ||
10823 ($file =~ m:/\./:); # Is the path relative or /./?
10827 # rsync -avR /foo/./bar/baz.c remote:/tmp/
10828 # == (on old systems)
10829 # rsync -avR --rsync-path="cd /foo; rsync" remote:bar/baz.c /tmp/
10830 $wd = ::shell_quote_file($self->workdir()."/");
10832 # Only load File::Basename if actually needed
10833 $Global::use{"File::Basename"} ||= eval "use File::Basename; 1;";
10834 # dir/./file means relative to dir, so remove dir on remote
10835 $file =~ m:(.*)/\./:;
10836 my $basedir = $1 ? ::shell_quote_file($1."/") : "";
10837 my $nobasedir = $file;
10838 $nobasedir =~ s:.*/\./::;
10839 $cd = ::shell_quote_file(::dirname($nobasedir));
10840 my $rsync_cd = '--rsync-path='.::Q("cd $wd$cd; rsync");
10841 my $basename = ::Q(::shell_quote_file(::basename($file)));
10843 # mkdir -p /home/tange/dir/subdir/;
10844 # rsync (--protocol 30) -rlDzR
10845 # --rsync-path="cd /home/tange/dir/subdir/; rsync"
10846 # server:file.gz /home/tange/dir/subdir/
10847 $pre .= "mkdir -p $basedir$cd" . " && " .
10848 $sshlogin->rsync(). " $rsync_cd -- ".$sshlogin->host().':'.
10849 $basename . " ".$basedir.$cd.";";
10854 sub sshcleanup($) {
10855 # Return the sshcommand needed to remove the file
10857 # ssh command needed to remove files from sshlogin
10859 my $sshlogin = $self->sshlogin();
10860 my $workdir = $self->workdir();
10863 for my $file ($self->remote_cleanup()) {
10864 my @subworkdirs = parentdirs_of($file);
10865 $cleancmd .= $sshlogin->cleanup_cmd($file,$workdir).";";
10867 if(defined $opt::workdir and $opt::workdir eq "...") {
10868 $cleancmd .= $sshlogin->wrap("rm -rf " . ::Q($workdir).';');
10873 sub remote_cleanup($) {
10875 # Files to remove at cleanup
10877 if($opt::cleanup) {
10878 my @transfer = $self->transfer();
10879 my @return = $self->return();
10880 return (@transfer,@return);
10886 sub exitstatuswrapper(@) {
10888 # @shellcode = shell code to execute
10890 # shell script that returns current status after executing @shellcode
10891 if($Global::cshell) {
10892 return ('set _EXIT_status=$status; ' .
10894 'exit $_EXIT_status;');
10895 } elsif($Global::fish) {
10896 return ('export _EXIT_status=$status; ' .
10898 'exit $_EXIT_status;');
10900 return ('_EXIT_status=$?; ' .
10902 'exit $_EXIT_status;');
10908 # the workdir on a remote machine
10910 if(not defined $self->{'workdir'}) {
10912 if(defined $opt::workdir) {
10913 if($opt::workdir eq ".") {
10914 # . means current dir
10915 my $home = $ENV{'HOME'};
10920 # If homedir exists: remove the homedir from
10921 # workdir if cwd starts with homedir
10922 # E.g. /home/foo/my/dir => my/dir
10923 # E.g. /tmp/my/dir => /tmp/my/dir
10924 my ($home_dev, $home_ino) = (stat($home))[0,1];
10926 my @dir_parts = split(m:/:,$cwd);
10928 while(defined ($part = shift @dir_parts)) {
10929 $part eq "" and next;
10930 $parent .= "/".$part;
10931 my ($parent_dev, $parent_ino) = (stat($parent))[0,1];
10932 if($parent_dev == $home_dev and $parent_ino == $home_ino) {
10933 # dev and ino is the same: We found the homedir.
10934 $workdir = join("/",@dir_parts);
10939 if($workdir eq "") {
10942 } elsif($opt::workdir eq "...") {
10943 $workdir = ".parallel/tmp/" . ::hostname() . "-" . $$
10944 . "-" . $self->seq();
10946 $workdir = $self->{'commandline'}->
10947 replace_placeholders([$opt::workdir],0,0);
10948 #$workdir = $opt::workdir;
10949 # Rsync treats /./ special. We dont want that
10950 $workdir =~ s:/\./:/:g; # Remove /./
10951 $workdir =~ s:(.)/+$:$1:; # Remove ending / if any
10952 $workdir =~ s:^\./::g; # Remove starting ./ if any
10957 $self->{'workdir'} = $workdir;
10959 return $self->{'workdir'};
10962 sub parentdirs_of($) {
10964 # all parentdirs except . of this dir or file - sorted desc by length
10967 while($d =~ s:/[^/]+$::) {
10976 # Setup STDOUT and STDERR for a job and start it.
10978 # job-object or undef if job not to run
10980 sub open3_setpgrp_internal {
10981 # Run open3+setpgrp followed by the command
10983 # $stdin_fh = Filehandle to use as STDIN
10984 # $stdout_fh = Filehandle to use as STDOUT
10985 # $stderr_fh = Filehandle to use as STDERR
10986 # $command = Command to run
10988 # $pid = Process group of job started
10989 my ($stdin_fh,$stdout_fh,$stderr_fh,$command) = @_;
10992 open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!");
10993 open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!");
10994 # The eval is needed to catch exception from open3
10996 if(not $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", "-")) {
10997 # Each child gets its own process group to make it safe to killall
10998 eval{ setpgrp(0,0) };
10999 eval{ setpriority(0,0,$opt::nice) };
11000 exec($Global::shell,"-c",$command)
11001 || ::die_bug("open3-$stdin_fh ".substr($command,0,200));
11007 sub open3_setpgrp_external {
11008 # Run open3 on $command wrapped with a perl script doing setpgrp
11009 # Works on systems that do not support open3(,,,"-")
11011 # $stdin_fh = Filehandle to use as STDIN
11012 # $stdout_fh = Filehandle to use as STDOUT
11013 # $stderr_fh = Filehandle to use as STDERR
11014 # $command = Command to run
11016 # $pid = Process group of job started
11017 my ($stdin_fh,$stdout_fh,$stderr_fh,$command) = @_;
11019 open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!");
11020 open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!");
11025 "eval\{setpgrp\}\;eval\{setpriority\(0,0,$opt::nice\)\}\;".
11026 "exec '$Global::shell', '-c', \@ARGV");
11027 # The eval is needed to catch exception from open3
11029 $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", @setpgrp_wrap, $command)
11030 || ::die_bug("open3-$stdin_fh");
11036 sub redefine_open3_setpgrp {
11037 my $setgprp_cache = shift;
11038 # Select and run open3_setpgrp_internal/open3_setpgrp_external
11039 no warnings 'redefine';
11040 my ($outfh,$name) = ::tmpfile(SUFFIX => ".tst");
11041 # Test to see if open3(x,x,x,"-") is fully supported
11042 # Can an exported bash function be called via open3?
11043 my $script = 'if($pid=::open3($i,$o,$e,"-")) { wait; } '.
11044 'else { exec("bash","-c","testfun && true"); }';
11046 ::shell_quote_scalar_default(
11047 "testfun() { rm $name; }; export -f testfun; ".
11048 "perl -MIPC::Open3 -e ".
11052 # Redirect STDERR temporarily,
11053 # so errors on MacOS X are ignored.
11054 open my $saveerr, ">&STDERR";
11055 open STDERR, '>', "/dev/null";
11057 ::debug("init",qq{bash -c $bash 2>/dev/null});
11058 qx{ bash -c $bash 2>/dev/null };
11059 open STDERR, ">&", $saveerr;
11062 # Does not support open3(x,x,x,"-")
11063 # or does not have bash:
11064 # Use (slow) external version
11066 $redefine_eval = '*open3_setpgrp = \&open3_setpgrp_external';
11067 ::debug("init","open3_setpgrp_external chosen\n");
11069 # Supports open3(x,x,x,"-")
11070 # This is 0.5 ms faster to run
11071 $redefine_eval = '*open3_setpgrp = \&open3_setpgrp_internal';
11072 ::debug("init","open3_setpgrp_internal chosen\n");
11074 if(open(my $fh, ">", $setgprp_cache)) {
11075 print $fh $redefine_eval;
11078 ::debug("init","Cannot write to $setgprp_cache");
11080 eval $redefine_eval;
11083 sub open3_setpgrp {
11084 my $setgprp_cache = $Global::cache_dir . "/tmp/sshlogin/" .
11085 ::hostname() . "/setpgrp_func";
11087 -e $setgprp_cache || return 0;
11089 open(my $fh, "<", $setgprp_cache) || return 0;
11090 eval <$fh> || return 0;
11094 if(not read_cache()) {
11095 redefine_open3_setpgrp($setgprp_cache);
11097 # The sub is now redefined. Call it
11098 return open3_setpgrp(@_);
11102 # Get the shell command to be executed (possibly with ssh infront).
11103 my $command = $job->wrapped();
11106 if($Global::interactive or $Global::stderr_verbose) {
11107 $job->interactive_start();
11109 # Must be run after $job->interactive_start():
11110 # $job->interactive_start() may call $job->skip()
11111 if($job->{'commandline'}{'skip'}
11113 not $job->filter()) {
11114 # $job->skip() was called or job filtered
11117 $job->openoutputfiles();
11118 $job->print_verbose_dryrun();
11119 my($stdout_fh,$stderr_fh) = ($job->fh(1,"w"),$job->fh(2,"w"));
11120 if($opt::dryrun or $opt::sqlmaster) { $command = "true"; }
11121 $ENV{'PARALLEL_SEQ'} = $job->seq();
11122 $ENV{'PARALLEL_PID'} = $$;
11123 $ENV{$opt::process_slot_var} = -1 +
11124 ($ENV{'PARALLEL_JOBSLOT'} = $job->slot());
11125 $ENV{'PARALLEL_TMP'} = ::tmpname("par");
11126 $job->add_rm($ENV{'PARALLEL_TMP'});
11127 $job->fill_templates();
11128 $ENV{'SSHPASS'} = $job->{'sshlogin'}->{'password'};
11129 ::debug("run", $Global::total_running, " processes . Starting (",
11130 $job->seq(), "): $command\n");
11132 my ($stdin_fh) = ::gensym();
11133 $pid = open3_setpgrp($stdin_fh,$stdout_fh,$stderr_fh,$command);
11134 if($opt::roundrobin and not $opt::keeporder) {
11135 # --keep-order will make sure the order will be reproducible
11136 ::set_fh_non_blocking($stdin_fh);
11138 $job->set_fh(0,"w",$stdin_fh);
11139 if($opt::tee or $opt::shard or $opt::bin) { $job->set_virgin(0); }
11140 } elsif(($opt::tty or $opt::open_tty) and -c "/dev/tty" and
11141 open(my $devtty_fh, "<", "/dev/tty")) {
11142 # Give /dev/tty to the command if no one else is using it
11143 # The eval is needed to catch exception from open3
11144 local (*IN,*OUT,*ERR);
11145 open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!");
11146 open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!");
11148 # The eval is needed to catch exception from open3
11149 my @wrap = ('perl','-e',
11150 "eval\{setpriority\(0,0,$opt::nice\)\}\;".
11151 "exec '$Global::shell', '-c', \@ARGV");
11153 $pid = ::open3("<&IN", ">&OUT", ">&ERR", @wrap, $command)
11154 || ::die_bug("open3-/dev/tty");
11158 $job->set_virgin(0);
11159 } elsif($Global::semaphore) {
11160 # Allow sem to read from stdin
11161 $pid = open3_setpgrp("<&STDIN",$stdout_fh,$stderr_fh,$command);
11162 $job->set_virgin(0);
11164 $pid = open3_setpgrp(::gensym(),$stdout_fh,$stderr_fh,$command);
11165 $job->set_virgin(0);
11168 # A job was started
11169 $Global::total_running++;
11170 $Global::total_started++;
11171 $job->set_pid($pid);
11172 $job->set_starttime();
11173 $Global::running{$job->pid()} = $job;
11174 if($opt::timeout) {
11175 $Global::timeoutq->insert($job);
11177 $Global::newest_job = $job;
11178 $Global::newest_starttime = ::now();
11181 # No more processes
11182 ::debug("run", "Cannot spawn more jobs.\n");
11187 sub interactive_start($) {
11189 my $command = $self->wrapped();
11190 if($Global::interactive) {
11192 ::status_no_nl("$command ?...");
11194 my $tty_fh = ::open_or_exit("<","/dev/tty");
11195 $answer = <$tty_fh>;
11197 # Sometime we get an empty string (not even \n)
11198 # Do not know why, so let us just ignore it and try again
11199 } while(length $answer < 1);
11200 if (not ($answer =~ /^\s*y/i)) {
11201 $self->{'commandline'}->skip();
11204 print $Global::original_stderr "$command\n";
11213 # Wrap command with tmux for session pPID
11215 # $actual_command = the actual command being run (incl ssh wrap)
11217 my $actual_command = shift;
11218 # Temporary file name. Used for fifo to communicate exit val
11219 my $tmpfifo = ::tmpname("tmx");
11220 $self->add_rm($tmpfifo);
11221 if(length($tmpfifo) >=100) {
11222 ::error("tmux does not support sockets with path > 100.");
11223 ::wait_and_exit(255);
11225 if($opt::tmuxpane) {
11226 # Move the command into a pane in window 0
11227 $actual_command = $ENV{'PARALLEL_TMUX'}.' joinp -t :0 ; '.
11228 $ENV{'PARALLEL_TMUX'}.' select-layout -t :0 tiled ; '.
11231 my $visual_command = $self->replaced();
11232 my $title = $visual_command;
11233 if($visual_command =~ /\0/) {
11234 ::error("Command line contains NUL. tmux is confused by NUL.");
11235 ::wait_and_exit(255);
11237 # ; causes problems
11238 # ascii 194-245 annoys tmux
11239 $title =~ tr/[\011-\016;\302-\365]/ /s;
11240 $title = ::Q($title);
11242 my $l_act = length($actual_command);
11243 my $l_tit = length($title);
11244 my $l_fifo = length($tmpfifo);
11245 # The line to run contains a 118 chars extra code + the title 2x
11246 my $l_tot = 2 * $l_tit + $l_act + $l_fifo;
11248 my $quoted_space75 = ::Q(" ")x75;
11249 while($l_tit < 1000 and
11251 (890 < $l_tot and $l_tot < 1350)
11253 (9250 < $l_tot and $l_tot < 9800)
11255 # tmux blocks for certain lengths:
11256 # 900 < title + command < 1200
11257 # 9250 < title + command < 9800
11258 # but only if title < 1000, so expand the title with 75 spaces
11259 # The measured lengths are:
11260 # 996 < (title + whole command) < 1127
11261 # 9331 < (title + whole command) < 9636
11262 $title .= $quoted_space75;
11263 $l_tit = length($title);
11264 $l_tot = 2 * $l_tit + $l_act + $l_fifo;
11268 $ENV{'PARALLEL_TMUX'} ||= "tmux";
11269 if(not $tmuxsocket) {
11270 $tmuxsocket = ::tmpname("tms");
11271 $qsocket = ::Q($tmuxsocket);
11272 ::debug("tmux", "Start: $ENV{'PARALLEL_TMUX'} -S $qsocket attach");
11275 # Run tmux in the foreground
11276 # Wait for the socket to appear
11277 while (not -e $tmuxsocket) { }
11278 `$ENV{'PARALLEL_TMUX'} -S $qsocket attach`;
11282 ::status("See output with: $ENV{'PARALLEL_TMUX'} -S $qsocket attach");
11284 $tmux = "sh -c ".::Q(
11285 $ENV{'PARALLEL_TMUX'}.
11286 " -S $qsocket new-session -s p$$ -d \"sleep .2\" >/dev/null 2>&1").";" .
11287 $ENV{'PARALLEL_TMUX'}.
11288 " -S $qsocket new-window -t p$$ -n $title";
11290 ::debug("tmux", "title len:", $l_tit, " act ", $l_act, " max ",
11291 $Limits::Command::line_max_len, " tot ",
11293 return "mkfifo ".::Q($tmpfifo)." && $tmux ".
11297 "(".$actual_command.');'.
11298 # The triple print is needed - otherwise the testsuite fails
11299 q[ perl -e 'while($t++<3){ print $ARGV[0],"\n" }' $?h/$status >> ].
11301 "echo $title; echo \007Job finished at: `date`;sleep 10"
11304 # Read a / separated line: 0h/2 for csh, 2/0 for bash.
11305 # If csh the first will be 0h, so use the second as exit value.
11306 # Otherwise just use the first value as exit value.
11307 q{; exec perl -e '$/="/";$_=<>;$c=<>;unlink $ARGV; }.
11308 q{/(\d+)h/ and exit($1);exit$c' }.::Q($tmpfifo);
11312 sub is_already_in_results($) {
11313 # Do we already have results for this job?
11315 # $job_already_run = bool whether there is output for this or not
11317 if($Global::csvsep) {
11319 # OK: You can look for job run in joblog
11323 "--resume --results .csv/.tsv/.json is not supported yet\n");
11324 # TODO read and parse the file
11328 my $out = $job->{'commandline'}->results_out();
11329 ::debug("run", "Test ${out}stdout", -e "${out}stdout", "\n");
11330 return(-e $out."stdout" or -f $out);
11333 sub is_already_in_joblog($) {
11335 return vec($Global::job_already_run,$job->seq(),1);
11338 sub set_job_in_joblog($) {
11340 vec($Global::job_already_run,$job->seq(),1) = 1;
11344 # This command should be retried
11347 $self->set_endtime(undef);
11348 $self->reset_exitstatus();
11349 $self->set_killreason(undef);
11350 $Global::JobQueue->unget($self);
11351 ::debug("run", "Retry ", $self->seq(), "\n");
11355 sub should_be_retried($) {
11356 # Should this job be retried?
11359 # 1 - job queued for retry
11361 if($opt::memfree and $self->killreason() eq "mem") {
11362 # Job was killed due to memfree => retry
11363 return $self->retry();
11365 if (not defined $opt::retries) { return 0; }
11366 if(not $self->exitstatus() and not $self->exitsignal()) {
11367 # Completed with success. If there is a recorded failure: forget it
11368 $self->reset_failed_here();
11371 # The job failed. Should it be retried?
11372 $self->add_failed_here();
11373 my $retries = $self->{'commandline'}->
11374 replace_placeholders([$opt::retries],0,0);
11376 if($retries == 0) { $retries = 2**31; }
11377 # Ignore files already unlinked to avoid memory leak
11378 $self->{'unlink'} = [ grep { -e $_ } @{$self->{'unlink'}} ];
11379 map { -e $_ or delete $Global::unlink{$_} } keys %Global::unlink;
11380 if($self->total_failed() == $retries) {
11381 # This has been retried enough
11384 # This command should be retried
11385 return $self->retry();
11391 my (%print_later,$job_seq_to_print);
11393 sub print_earlier_jobs($) {
11394 # Print jobs whose output is postponed due to --keep-order
11397 $print_later{$job->seq()} = $job;
11398 $job_seq_to_print ||= 1;
11399 my $returnsize = 0;
11400 ::debug("run", "Looking for: $job_seq_to_print ",
11401 "This: ", $job->seq(), "\n");
11402 for(;vec($Global::job_already_run,$job_seq_to_print,1);
11403 $job_seq_to_print++) {}
11404 while(my $j = $print_later{$job_seq_to_print}) {
11405 $returnsize += $j->print();
11406 if($j->endtime()) {
11407 # Job finished - look at the next
11408 delete $print_later{$job_seq_to_print};
11409 $job_seq_to_print++;
11412 # Job not finished yet - look at it again next round
11416 return $returnsize;
11421 # Print the output of the jobs
11425 ::debug("print", ">>joboutput ", $self->replaced(), "\n");
11427 # Nothing was printed to this job:
11428 # cleanup tmp files if --files was set
11429 ::rm($self->fh(1,"name"));
11431 if($opt::pipe and $self->virgin() and not $opt::tee) {
11432 # Skip --joblog, --dryrun, --verbose
11434 if($opt::ungroup) {
11435 # NULL returnsize = 0 returnsize
11436 $self->returnsize() or $self->add_returnsize(0);
11437 if($Global::joblog and defined $self->{'exitstatus'}) {
11438 # Add to joblog when finished
11439 $self->print_joblog();
11440 # Printing is only relevant for grouped/--line-buffer output.
11441 $opt::ungroup and return;
11444 # Check for disk full
11445 ::exit_if_disk_full();
11448 my $returnsize = $self->returnsize();
11450 if($opt::latestline) {
11453 @fdno = (sort { $a <=> $b } keys %Global::fh);
11455 for my $fdno (@fdno) {
11456 # Sort by file descriptor numerically: 1,2,3,..,9,10,11
11457 $fdno == 0 and next;
11458 my $out_fh = $Global::fh{$fdno};
11459 my $in_fh = $self->fh($fdno,"r");
11461 if(not $Job::file_descriptor_warning_printed{$fdno}++) {
11462 # ::warning("File descriptor $fdno not defined\n");
11466 ::debug("print", "File descriptor $fdno (", $self->fh($fdno,"name"), "):\n");
11467 if($Global::linebuffer) {
11468 # Line buffered print out
11469 $self->print_linebuffer($fdno,$in_fh,$out_fh);
11470 } elsif($Global::files) {
11471 $self->print_files($fdno,$in_fh,$out_fh);
11472 } elsif($opt::results) {
11473 $self->print_results($fdno,$in_fh,$out_fh);
11475 $self->print_normal($fdno,$in_fh,$out_fh);
11479 ::debug("print", "<<joboutput\n");
11480 if(defined $self->{'exitstatus'}
11481 and not ($self->virgin() and $opt::pipe)) {
11482 if($Global::joblog and not $opt::sqlworker) {
11483 # Add to joblog when finished
11484 $self->print_joblog();
11486 if($opt::sqlworker and not $opt::results) {
11487 $Global::sql->output($self);
11489 if($Global::csvsep) {
11490 # Add output to CSV when finished
11491 $self->print_csv();
11493 if($Global::jsonout) {
11494 $self->print_json();
11497 return $returnsize - $self->returnsize();
11503 sub print_json($) {
11507 if(not $jsonmap{"\001"}) {
11508 map { $jsonmap{sprintf("%c",$_)} =
11509 sprintf '\u%04x', $_ } 0..31;
11513 $a =~ s/([\000-\037])/$jsonmap{$1}/g;
11518 if($Global::verbose <= 1) {
11519 $cmd = jsonquote($self->replaced());
11521 # Verbose level > 1: Print the rsync and stuff
11522 $cmd = jsonquote(join " ", @{$self->{'commandline'}});
11524 my $record_ref = $self->{'commandline'}{'arg_list_flat_orig'};
11526 # Memory optimization: Overwrite with the joined output
11527 $self->{'output'}{1} = join("", @{$self->{'output'}{1}});
11528 $self->{'output'}{2} = join("", @{$self->{'output'}{2}});
11531 # "Host": "/usr/bin/ssh foo@lo",
11532 # "Starttime": 1608344711.743,
11533 # "JobRuntime": 0.01,
11538 # "Command": "echo 1",
11546 printf($Global::csv_fh
11547 q({ "Seq": %s, "Host": "%s", "Starttime": %s, "JobRuntime": %s, ).
11548 q("Send": %s, "Receive": %s, "Exitval": %s, "Signal": %s, ).
11549 q("Command": "%s", "V": [ %s ], "Stdout": "%s", "Stderr": "%s" }).
11552 jsonquote($self->sshlogin()->string()),
11553 $self->starttime(), sprintf("%0.3f",$self->runtime()),
11554 $self->transfersize(), $self->returnsize(),
11555 $self->exitstatus(), $self->exitsignal(), $cmd,
11557 map { '"'.jsonquote($_).'"' } @$record_ref[1..$#$record_ref],
11559 jsonquote($self->{'output'}{1}),
11560 jsonquote($self->{'output'}{2})
11566 my $header_printed;
11571 if($Global::verbose <= 1) {
11572 $cmd = $self->replaced();
11574 # Verbose level > 1: Print the rsync and stuff
11575 $cmd = join " ", @{$self->{'commandline'}};
11577 my $record_ref = $self->{'commandline'}{'arg_list_flat_orig'};
11579 if(not $header_printed) {
11582 # --header : => first value from column
11586 @V = (map { $Global::input_source_header{$i++} }
11587 @$record_ref[1..$#$record_ref]);
11590 @V = (map { $V++ } @$record_ref[1..$#$record_ref]);
11592 print $Global::csv_fh
11594 combine_ref("Seq", "Host", "Starttime", "JobRuntime",
11595 "Send", "Receive", "Exitval", "Signal", "Command",
11601 # Memory optimization: Overwrite with the joined output
11602 $self->{'output'}{1} = join("", @{$self->{'output'}{1}});
11603 $self->{'output'}{2} = join("", @{$self->{'output'}{2}});
11604 print $Global::csv_fh
11608 $self->sshlogin()->string(),
11609 $self->starttime(), sprintf("%0.3f",$self->runtime()),
11610 $self->transfersize(), $self->returnsize(),
11611 $self->exitstatus(), $self->exitsignal(), \$cmd,
11612 \@$record_ref[1..$#$record_ref],
11613 \$self->{'output'}{1},
11614 \$self->{'output'}{2})),"\n";
11618 sub combine_ref($) {
11619 # Inspired by Text::CSV_PP::_combine (by Makamaka Hannyaharamitu)
11621 my $sep = $Global::csvsep;
11625 my $must_be_quoted;
11626 for my $column (@part) {
11627 # Memory optimization: Content transferred as reference
11628 if(ref $column ne "SCALAR") {
11629 # Convert all columns to scalar references
11633 if(not defined $$column) {
11638 $must_be_quoted = 0;
11640 if($$column =~ s/$quot/$quot$quot/go){
11642 $must_be_quoted ||=1;
11644 if($$column =~ /[\s\Q$sep\E]/o){
11645 # Put quotes around if the column contains ,
11646 $must_be_quoted ||=1;
11649 $Global::use{"bytes"} ||= eval "use bytes; 1;";
11650 if ($$column =~ /\0/) {
11651 # Contains \0 => put quotes around
11652 $must_be_quoted ||=1;
11654 if($must_be_quoted){
11655 push @out, \$sep, \$quot, $column, \$quot;
11657 push @out, \$sep, $column;
11660 # Remove the first $sep: ,val,"val" => val,"val"
11665 sub print_files($) {
11666 # Print the name of the file containing stdout on stdout
11669 # $opt::group = Print when job is done
11670 # $opt::linebuffer = Print ASAP
11673 my ($fdno,$in_fh,$out_fh) = @_;
11675 # If the job is dead: close printing fh. Needed for --compress
11676 close $self->fh($fdno,"w");
11677 if($? and $opt::compress) {
11678 ::error($opt::compress_program." failed.");
11679 $self->set_exitstatus(255);
11681 if($opt::compress) {
11682 # Kill the decompressor which will not be needed
11683 CORE::kill "TERM", $self->fh($fdno,"rpid");
11687 if($opt::pipe and $self->virgin()) {
11688 # Nothing was printed to this job:
11689 # cleanup unused tmp files because --files was set
11690 for my $fdno (1,2) {
11691 ::rm($self->fh($fdno,"name"));
11692 ::rm($self->fh($fdno,"unlink"));
11694 } elsif($fdno == 1 and $self->fh($fdno,"name")) {
11695 print $out_fh $self->tag(),$self->fh($fdno,"name"), $Global::files_sep;
11696 if($Global::membuffer) {
11697 push @{$self->{'output'}{$fdno}},
11698 $self->tag(), $self->fh($fdno,"name");
11700 $self->add_returnsize(-s $self->fh($fdno,"name"));
11701 # Mark as printed - do not print again
11702 $self->set_fh($fdno,"name",undef);
11707 # Different print types
11708 # (--ll | --ll --bar | --lb | --group | --parset | --sql-worker)
11709 # (--files | --results (.json|.csv|.tsv) )
11716 my ($up,$eol,$currow,$maxrow);
11717 my ($minvisible,%print_later,%notvisible);
11718 my (%binmodeset,%tab);
11720 sub latestline_init() {
11721 # cursor_up cuu1 = up one line
11722 $up = `sh -c "tput cuu1 </dev/tty" 2>/dev/null`;
11724 $eol = `sh -c "tput el </dev/tty" 2>/dev/null`;
11726 if($eol eq "") { $eol = "\033[K"; }
11731 $tab{$_} = " "x(8-($_%8));
11736 # Simple mbtrunc to avoid using Text::WideChar::Util
11739 if(::mbswidth($str) == length($str)) {
11740 $str = substr($str,0,$len);
11742 # mb chars (ヌー平行) are wider than 1 char on screen
11743 # We need at most $len chars - they may be wide
11744 $str =~ s/(.{$len}).*/$1/;
11745 my $rlen = int((::mbswidth($str) - $len)/2+0.5);
11747 $str =~ s/.{$rlen}$//;
11748 $rlen = int((::mbswidth($str) - $len)/2+0.5);
11749 } while($rlen >= 1);
11754 sub print_latest_line($) {
11756 my $out_fh = shift;
11757 if(not defined $self->{$out_fh,'latestline'}) { return; }
11758 my $row = $self->row();
11760 if(not ($minvisible <= $row
11762 $row < $minvisible + ::terminal_rows() - 1)) {
11765 if(not $binmodeset{$out_fh}++) {
11766 # Enable utf8 if possible
11767 eval q{ binmode $out_fh, "encoding(utf8)"; };
11769 my ($color,$reset_color) = $self->color();
11770 my $termcol = ::terminal_columns();
11771 my $untabify_tag = ::decode_utf8($self->untabtag());
11773 ::untabify(::decode_utf8($self->{$out_fh,'latestline'}));
11774 # -1 to make space for $truncated_str
11775 my $maxtaglen = $termcol - 1;
11776 $untabify_tag = mbtrunc($untabify_tag,$maxtaglen);
11777 my $taglen = ::mbswidth($untabify_tag);
11778 my $maxstrlen = $termcol - $taglen - 1;
11779 $untabify_str = mbtrunc($untabify_str,$maxstrlen);
11780 my $strlen = ::mbswidth($untabify_str);
11781 my $truncated_tag = "";
11782 my $truncated_str = "";
11783 if($termcol - $taglen < 2) {
11784 $truncated_tag = ">";
11786 if($termcol - $taglen - $strlen <= 2) {
11787 $truncated_str = ">";
11790 $maxrow = ($row > $maxrow) ? $row : $maxrow;
11792 ("%s%s%s%s". # up down \r eol
11793 "%s%s". # tag trunc_tag
11794 "%s%s%s%s". # color line trunc reset_color
11797 "$up"x($currow - $row), "\n"x($row - $currow), "\r", $eol,
11798 $untabify_tag,$truncated_tag,
11799 $color, $untabify_str, $truncated_str, $reset_color,
11800 "\n"x($maxrow - $row + 1));
11801 $currow = $maxrow + 1;
11804 sub print_linebuffer($) {
11806 my ($fdno,$in_fh,$out_fh) = @_;
11807 if(defined $self->{'exitstatus'}) {
11808 # If the job is dead: close printing fh. Needed for --compress
11809 close $self->fh($fdno,"w");
11810 if($opt::compress) {
11812 ::error($opt::compress_program." failed.");
11813 $self->set_exitstatus(255);
11815 # Blocked reading in final round
11816 for my $fdno (1,2) { ::set_fh_blocking($self->fh($fdno,'r')); }
11818 if($opt::latestline) { $print_later{$self->row()} = $self; }
11820 if(not $self->virgin()) {
11821 if($Global::files or ($opt::results and not $Global::csvsep)) {
11823 if($fdno == 1 and not $self->fh($fdno,"printed")) {
11824 print $out_fh $self->tag(),$self->fh($fdno,"name"),"\n";
11825 if($Global::membuffer) {
11826 push(@{$self->{'output'}{$fdno}}, $self->tag(),
11827 $self->fh($fdno,"name"));
11829 $self->set_fh($fdno,"printed",1);
11831 # No need for reading $in_fh, as it is from "cat >/dev/null"
11833 # Read halflines and print full lines
11834 my $outputlength = 0;
11835 my $halfline_ref = $self->{'halfline'}{$fdno};
11837 # 1310720 gives 1.2 GB/s
11838 # 131072 gives 0.9 GB/s
11839 # The optimal block size differs
11840 # It has been measured on:
11841 # AMD 6376: 60800 (>70k is also reasonable)
11842 # Intel i7-3632QM: 52-59k, 170-175k
11843 # seq 64 | ppar --_test $1 --lb \
11844 # 'yes {} `seq 1000`|head -c 10000000' >/dev/null
11845 while($rv = sysread($in_fh, $buf, 60800)) {
11846 $outputlength += $rv;
11848 # Treat both \n and \r as line end
11849 # Only test for \r if there is no \n
11851 # perl -e '$a="x"x1000000;
11852 # $b="$a\r$a\n$a\r$a\n";
11853 # map { print $b,$_ } 1..10'
11854 $i = ((rindex($buf,"\n")+1) || (rindex($buf,"\r")+1));
11856 if($opt::latestline) {
11857 # Keep the latest full line
11858 my $l = join('', @$halfline_ref,
11859 substr($buf,0,$i-1));
11860 # "ab\rb\n" = "bb", but we cannot process that correctly.
11863 # foo \r bar \r baz \r
11864 # If so: Remove 'foo \r'
11866 my $j = ((rindex($l,"\n")+1) ||
11867 (rindex($l,"\r")+1));
11868 $self->{$out_fh,'latestline'} = substr($l,$j);
11869 # Remove the processed part
11870 # by keeping the unprocessed part
11871 @$halfline_ref = (substr($buf,$i));
11873 # One or more complete lines were found
11874 if($Global::color) {
11875 my $print = join("",@$halfline_ref,
11876 substr($buf,0,$i));
11878 my ($color,$reset_color) = $self->color();
11879 my $colortag = $color.$self->tag();
11880 # \n => reset \n color tag
11881 $print =~ s{([\n\r])(?=.|$)}
11882 {$reset_color$1$colortag}gs;
11883 print($out_fh $colortag, $print,
11884 $reset_color, "\n");
11885 } elsif($opt::tag or defined $opt::tagstring) {
11886 # Replace ^ with $tag within the full line
11887 if($Global::cache_replacement_eval) {
11888 # Replace with the same value for tag
11889 my $tag = $self->tag();
11890 unshift @$halfline_ref, $tag;
11891 # TODO --recend that can be partially in
11893 substr($buf,0,$i-1) =~
11894 s/([\n\r])(?=.|$)/$1$tag/gs;
11896 # Replace with freshly computed tag-value
11897 unshift @$halfline_ref, $self->tag();
11898 substr($buf,0,$i-1) =~
11899 s/([\n\r])(?=.|$)/$1.$self->tag()/gse;
11901 # The length changed,
11902 # so find the new ending pos
11903 $i = ::max((rindex($buf,"\n")+1),
11904 (rindex($buf,"\r")+1));
11905 # Print the partial line (halfline)
11906 # and the last half
11907 print $out_fh @$halfline_ref, substr($buf,0,$i);
11909 # Print the partial line (halfline)
11910 # and the last half
11911 print $out_fh @$halfline_ref, substr($buf,0,$i);
11913 # Buffer in memory for SQL and CSV-output
11914 if($Global::membuffer) {
11915 push(@{$self->{'output'}{$fdno}},
11916 @$halfline_ref, substr($buf,0,$i));
11918 # Remove the printed part by keeping the unprinted
11919 @$halfline_ref = (substr($buf,$i));
11922 # No newline, so append to the halfline
11923 push @$halfline_ref, $buf;
11926 $self->add_returnsize($outputlength);
11927 if($opt::latestline) { $self->print_latest_line($out_fh); }
11929 if(defined $self->{'exitstatus'}) {
11930 if($Global::files or ($opt::results and not $Global::csvsep)) {
11931 $self->add_returnsize(-s $self->fh($fdno,"name"));
11933 if($opt::latestline) {
11934 # Force re-computing color if --colorfailed
11935 if($opt::colorfailed) { delete $self->{'color'}; }
11936 if($self->{$out_fh,'latestline'} ne "") {
11937 $self->print_latest_line($out_fh);
11939 if(@{$self->{'halfline'}{$fdno}}) {
11940 my $l = join('', @{$self->{'halfline'}{$fdno}});
11942 $self->{$out_fh,'latestline'} = $l;
11945 $self->{$out_fh,'latestline'} = undef;
11947 # Print latest line from jobs that are already done
11948 while($print_later{$minvisible}) {
11949 $print_later{$minvisible}->print_latest_line($out_fh);
11950 delete $print_later{$minvisible};
11953 # Print latest line from jobs that are on screen now
11954 for(my $row = $minvisible;
11955 $row < $minvisible -1 + ::terminal_rows();
11957 $print_later{$row} and
11958 $print_later{$row}->print_latest_line($out_fh);
11961 # If the job is dead: print the remaining partial line
11962 # read remaining (already done for $opt::latestline)
11963 my $halfline_ref = $self->{'halfline'}{$fdno};
11964 if(grep /./, @$halfline_ref) {
11965 my $returnsize = 0;
11966 for(@{$self->{'halfline'}{$fdno}}) {
11967 $returnsize += length $_;
11969 $self->add_returnsize($returnsize);
11970 if($opt::tag or defined $opt::tagstring) {
11971 # Prepend $tag the the remaining half line
11972 unshift @$halfline_ref, $self->tag();
11974 # Print the partial line (halfline)
11975 print $out_fh @{$self->{'halfline'}{$fdno}};
11976 # Buffer in memory for SQL and CSV-output
11977 if($Global::membuffer) {
11978 push(@{$self->{'output'}{$fdno}}, @$halfline_ref);
11980 @$halfline_ref = ();
11984 if($self->fh($fdno,"rpid") and
11985 CORE::kill 0, $self->fh($fdno,"rpid")) {
11986 # decompress still running
11988 # decompress done: close fh
11990 if($? and $opt::compress) {
11991 ::error($opt::decompress_program." failed.");
11992 $self->set_exitstatus(255);
12000 sub free_ressources() {
12002 if(not $opt::ungroup) {
12004 for my $fdno (sort { $a <=> $b } keys %Global::fh) {
12005 $fh = $self->fh($fdno,"w");
12007 $fh = $self->fh($fdno,"r");
12013 sub print_parset($) {
12014 # Wrap output with shell script code to set as variables
12016 my ($fdno,$in_fh,$out_fh) = @_;
12017 my $outputlength = 0;
12019 ::debug("parset","print $Global::parset");
12020 if($Global::parset eq "assoc") {
12021 # Start: (done in parse_parset())
12022 # eval "`echo 'declare -A myassoc; myassoc=(
12023 # Each: (done here)
12024 # [$'a\tb']=$'a\tb\tc ddd'
12025 # End: (done in wait_and_exit())
12027 print '[',::Q($self->{'commandline'}->
12028 replace_placeholders(["\257<\257>"],0,0)),']=';
12029 } elsif($Global::parset eq "array") {
12030 # Start: (done in parse_parset())
12031 # eval "`echo 'myassoc=(
12032 # Each: (done here)
12034 # End: (done in wait_and_exit())
12036 } elsif($Global::parset eq "var") {
12037 # Start: (done in parse_parset())
12039 # Each: (done here)
12040 # var=$'a\tb\tc ddd'
12041 # End: (done in wait_and_exit())
12043 if(not @Global::parset_vars) {
12044 ::error("Too few named destination variables");
12045 ::wait_and_exit(255);
12047 print shift @Global::parset_vars,"=";
12050 my $tag = $self->tag();
12053 $outputlength += length $_;
12054 # Tag lines with \r, too
12055 $_ =~ s/(?<=[\r])(?=.|$)/$tag/gs;
12056 push @out, $tag,$_;
12058 # Remove last newline
12059 # This often makes it easier to use the output in shell
12060 @out and ${out[$#out]} =~ s/\n$//s;
12061 print ::Q(join("",@out)),"\n";
12062 return $outputlength;
12065 sub print_normal($) {
12067 my ($fdno,$in_fh,$out_fh) = @_;
12069 close $self->fh($fdno,"w");
12070 if($? and $opt::compress) {
12071 ::error($opt::compress_program." failed.");
12072 $self->set_exitstatus(255);
12074 if(not $self->virgin()) {
12076 # $in_fh is now ready for reading at position 0
12077 my $outputlength = 0;
12080 if($Global::parset and $fdno == 1) {
12081 $outputlength += $self->print_parset($fdno,$in_fh,$out_fh);
12082 } elsif(defined $opt::tag or defined $opt::tagstring
12083 or $Global::color or $opt::colorfailed) {
12084 if($Global::color or $opt::colorfailed) {
12085 my ($color,$reset_color) = $self->color();
12086 my $colortag = $color.$self->tag();
12087 # Read line by line
12090 $outputlength += length $_;
12091 # Tag lines with \r, too
12093 s{([\n\r])(?=.|$)}{$reset_color$1$colortag}gs;
12094 print $out_fh $colortag,$_,$reset_color,"\n";
12097 my $tag = $self->tag();
12100 while(sysread($in_fh,$buf,32767)) {
12101 $outputlength += length $buf;
12102 $buf =~ s/(?<=[\r\n])(?=.)/$tag/gs;
12103 print $out_fh ($pretag ? $tag : ""),$buf;
12104 if($Global::membuffer) {
12105 push @{$self->{'output'}{$fdno}},
12106 ($pretag ? $tag : ""),$buf;
12108 # Should next print start with a tag?
12109 $s = substr($buf, -1);
12110 # This is faster than ($s eq "\n") || ($s eq "\r")
12111 $pretag = ($s eq "\n") ? 1 : ($s eq "\r");
12115 # Most efficient way of copying data from $in_fh to $out_fh
12116 # Intel i7-3632QM: 25k-
12117 while(sysread($in_fh,$buf,32767)) {
12118 print $out_fh $buf;
12119 $outputlength += length $buf;
12120 if($Global::membuffer) {
12121 push @{$self->{'output'}{$fdno}}, $buf;
12126 $self->add_returnsize($outputlength);
12129 if($? and $opt::compress) {
12130 ::error($opt::decompress_program." failed.");
12131 $self->set_exitstatus(255);
12136 sub print_results($) {
12138 my ($fdno,$in_fh,$out_fh) = @_;
12140 close $self->fh($fdno,"w");
12141 if($? and $opt::compress) {
12142 ::error($opt::compress_program." failed.");
12143 $self->set_exitstatus(255);
12145 if(not $self->virgin()) {
12147 # $in_fh is now ready for reading at position 0
12148 my $outputlength = 0;
12151 if($Global::membuffer) {
12152 # Read data into membuffer
12153 if($opt::tag or $opt::tagstring) {
12154 # Read line by line
12156 my $tag = $self->tag();
12158 $outputlength += length $_;
12159 # Tag lines with \r, too
12160 $_ =~ s/(?<=[\r])(?=.|$)/$tag/gs;
12161 push @{$self->{'output'}{$fdno}}, $tag, $_;
12164 # Most efficient way of copying data from $in_fh to $out_fh
12165 while(sysread($in_fh,$buf,60000)) {
12166 $outputlength += length $buf;
12167 push @{$self->{'output'}{$fdno}}, $buf;
12171 # Not membuffer: No need to read the file
12172 if($opt::compress) {
12173 $outputlength = -1;
12175 # Determine $outputlength = file length
12176 seek($in_fh, 0, 2) || ::die_bug("cannot seek result");
12177 $outputlength = tell($in_fh);
12180 if($fdno == 1) { $self->add_returnsize($outputlength); }
12182 if($? and $opt::compress) {
12183 ::error($opt::decompress_program." failed.");
12184 $self->set_exitstatus(255);
12189 sub print_joblog($) {
12192 if($Global::verbose <= 1) {
12193 $cmd = $self->replaced();
12195 # Verbose level > 1: Print the rsync and stuff
12196 $cmd = $self->wrapped();
12198 # Newlines make it hard to parse the joblog
12200 print $Global::joblog
12201 join("\t", $self->seq(), $self->sshlogin()->string(),
12202 $self->starttime(), sprintf("%10.3f",$self->runtime()),
12203 $self->transfersize(), $self->returnsize(),
12204 $self->exitstatus(), $self->exitsignal(), $cmd
12206 flush $Global::joblog;
12207 $self->set_job_in_joblog();
12212 if(not defined $self->{'tag'} or not $Global::cache_replacement_eval) {
12213 if(defined $opt::tag or defined $opt::tagstring) {
12215 ($self->{'commandline'}->
12216 replace_placeholders([$opt::tagstring],0,0)).
12220 $self->{'tag'} = "";
12223 return $self->{'tag'};
12227 # tag with \t replaced with spaces
12229 my $tag = $self->tag();
12230 if(not defined $self->{'untab'}{$tag}) {
12231 $self->{'untab'}{$tag} = ::untabify($tag);
12233 return $self->{'untab'}{$tag};
12237 my (@color,$eol,$reset_color,$init);
12242 # color combinations that are readable: black/white text
12243 # on colored background, but not white on yellow
12244 my @color_combinations =
12245 # Force each color code to have the same length in chars
12246 # This will make \t work as expected
12247 ((map { [sprintf("%03d",$_),"000"] }
12248 6..7,9..11,13..15,40..51,75..87,113..123,147..159,
12249 171..182,185..231,249..254),
12250 (map { [sprintf("%03d",$_),231] }
12251 1..9,12..13,16..45,52..81,88..114,124..149,
12252 160..178,180,182..184,196..214,232..250));
12253 # reorder list so adjacent colors are dissimilar
12254 # %23 and %7 were found experimentally
12255 my @order = reverse sort {
12256 (($a%23) <=> ($b%23))
12258 (($b%7) <=> ($a%7));
12259 } 0..$#color_combinations;
12260 @order = @order[54 .. $#color_combinations, 0 .. 53];
12262 # TODO Can this be done with `tput` codes?
12263 "\033[48;5;".$_->[0].";38;5;".$_->[1]."m"
12264 } @color_combinations[ @order ];
12266 # clr_eol el = clear to end of line
12267 $eol = `sh -c "tput el </dev/tty" 2>/dev/null`;
12269 if($eol eq "") { $eol = "\033[K"; }
12270 # exit_attribute_mode sgr0 = turn off all attributes
12271 $reset_color = `sh -c "tput sgr0 </dev/tty" 2>/dev/null`;
12272 chomp($reset_color);
12273 if($reset_color eq "") { $reset_color = "\033[m"; }
12279 if(not defined $self->{'color'}) {
12280 if($Global::color) {
12281 # Choose a value based on the seq
12282 $self->{'color'} = $color[$self->seq() % ($#color+1)].$eol;
12283 $self->{'reset_color'} = $reset_color;
12285 $self->{'color'} = "";
12286 $self->{'reset_color'} = "";
12288 if($opt::colorfailed) {
12289 if($self->exitstatus()) {
12291 # Can this be done more generally?
12293 "\033[48;5;"."196".";38;5;"."231"."m".$eol;
12294 $self->{'reset_color'} = $reset_color;
12298 return ($self->{'color'},$self->{'reset_color'});
12302 sub hostgroups($) {
12304 if(not defined $self->{'hostgroups'}) {
12305 $self->{'hostgroups'} =
12306 $self->{'commandline'}->{'arg_list'}[0][0]->{'hostgroups'};
12308 return @{$self->{'hostgroups'}};
12311 sub exitstatus($) {
12313 return $self->{'exitstatus'};
12316 sub set_exitstatus($$) {
12318 my $exitstatus = shift;
12320 # Overwrite status if non-zero
12321 $self->{'exitstatus'} = $exitstatus;
12323 # Set status but do not overwrite
12324 # Status may have been set by --timeout
12325 $self->{'exitstatus'} ||= $exitstatus;
12327 $opt::sqlworker and
12328 $Global::sql->update("SET Exitval = ? WHERE Seq = ".$self->seq(),
12332 sub reset_exitstatus($) {
12334 undef $self->{'exitstatus'};
12337 sub exitsignal($) {
12339 return $self->{'exitsignal'};
12342 sub set_exitsignal($$) {
12344 my $exitsignal = shift;
12345 $self->{'exitsignal'} = $exitsignal;
12346 $opt::sqlworker and
12347 $Global::sql->update("SET _Signal = ? WHERE Seq = ".$self->seq(),
12354 sub should_we_halt {
12355 # Should we halt? Immediately? Gracefully?
12359 if($Global::semaphore) {
12360 # Emulate Bash's +128 if there is a signal
12361 $Global::halt_exitstatus =
12362 ($job->exitstatus()
12364 $job->exitsignal() ? $job->exitsignal() + 128 : 0);
12366 if($job->exitstatus() or $job->exitsignal()) {
12368 $Global::exitstatus++;
12369 $Global::total_failed++;
12370 if($Global::halt_fail) {
12371 ::status("$Global::progname: This job failed:",
12373 $limit = $Global::total_failed;
12375 } elsif($Global::halt_success) {
12376 ::status("$Global::progname: This job succeeded:",
12378 $limit = $Global::total_completed - $Global::total_failed;
12380 if($Global::halt_done) {
12381 ::status("$Global::progname: This job finished:",
12383 $limit = $Global::total_completed;
12385 if(not defined $limit) {
12388 # --halt # => 1..100 (number of jobs failed, 101 means > 100)
12389 # --halt % => 1..100 (pct of jobs failed)
12390 if($Global::halt_pct and not $Global::halt_count) {
12391 $total_jobs ||= $Global::JobQueue->total_jobs();
12392 # From the pct compute the number of jobs that must fail/succeed
12393 $Global::halt_count = $total_jobs * $Global::halt_pct;
12395 if($limit >= $Global::halt_count) {
12396 # At least N jobs have failed/succeded/completed
12397 # or at least N% have failed/succeded/completed
12398 # So we should prepare for exit
12399 if($Global::halt_fail or $Global::halt_done) {
12401 if(not defined $Global::halt_exitstatus) {
12402 if($Global::halt_pct) {
12403 # --halt now,fail=X% or soon,fail=X%
12404 # --halt now,done=X% or soon,done=X%
12405 $Global::halt_exitstatus =
12406 ::ceil($Global::total_failed / $total_jobs * 100);
12407 } elsif($Global::halt_count) {
12408 # --halt now,fail=X or soon,fail=X
12409 # --halt now,done=X or soon,done=X
12410 $Global::halt_exitstatus =
12411 ::min($Global::total_failed,101);
12413 if($Global::halt_count and $Global::halt_count == 1) {
12414 # --halt now,fail=1 or soon,fail=1
12415 # --halt now,done=1 or soon,done=1
12416 # Emulate Bash's +128 if there is a signal
12417 $Global::halt_exitstatus =
12418 ($job->exitstatus()
12420 $job->exitsignal() ? $job->exitsignal() + 128 : 0);
12423 ::debug("halt","Pct: ",$Global::halt_pct,
12424 " count: ",$Global::halt_count,
12425 " status: ",$Global::halt_exitstatus,"\n");
12426 } elsif($Global::halt_success) {
12427 $Global::halt_exitstatus = 0;
12429 if($Global::halt_when eq "soon") {
12430 $Global::start_no_new_jobs ||= 1;
12431 if(scalar(keys %Global::running) > 0) {
12432 # Only warn if there are more jobs running
12434 ("$Global::progname: Starting no more jobs. ".
12435 "Waiting for ". (keys %Global::running).
12436 " jobs to finish.");
12439 return($Global::halt_when);
12446 package CommandLine;
12451 my $commandref = shift;
12452 $commandref || die;
12453 my $arg_queue = shift;
12454 my $context_replace = shift;
12455 my $max_number_of_args = shift; # for -N and normal (-n1)
12456 my $transfer_files = shift;
12457 my $return_files = shift;
12458 my $template_names = shift;
12459 my $template_contents = shift;
12460 my $replacecount_ref = shift;
12461 my $len_ref = shift;
12462 my %replacecount = %$replacecount_ref;
12463 my %len = %$len_ref;
12464 for (keys %$replacecount_ref) {
12465 # Total length of this replacement string {} replaced with all args
12469 'command' => $commandref,
12473 'arg_list_flat' => [],
12474 'arg_list_flat_orig' => [undef],
12475 'arg_queue' => $arg_queue,
12476 'max_number_of_args' => $max_number_of_args,
12477 'replacecount' => \%replacecount,
12478 'context_replace' => $context_replace,
12479 'transfer_files' => $transfer_files,
12480 'return_files' => $return_files,
12481 'template_names' => $template_names,
12482 'template_contents' => $template_contents,
12483 'replaced' => undef,
12484 }, ref($class) || $class;
12487 sub flush_cache() {
12489 for my $arglist (@{$self->{'arg_list'}}) {
12490 for my $arg (@$arglist) {
12491 $arg->flush_cache();
12494 $self->{'arg_queue'}->flush_cache();
12495 $self->{'replaced'} = undef;
12500 return $self->{'seq'};
12505 $self->{'seq'} = shift;
12509 # Find the number of a free job slot and return it
12511 # @Global::slots - list with free jobslots
12513 # $jobslot = number of jobslot
12515 if(not $self->{'slot'}) {
12516 if(not @Global::slots) {
12517 # $max_slot_number will typically be $Global::max_jobs_running
12518 push @Global::slots, ++$Global::max_slot_number;
12520 $self->{'slot'} = shift @Global::slots;
12522 return $self->{'slot'};
12526 my $already_spread;
12527 my $darwin_max_len;
12530 # Add arguments from arg_queue until the number of arguments or
12531 # max line length is reached
12533 # $Global::usable_command_line_length
12536 # $Global::JobQueue
12539 # $Global::max_jobs_running
12543 my $max_len = $Global::usable_command_line_length || die;
12544 if($^O eq "darwin") {
12545 # Darwin's limit is affected by:
12546 # * number of environment names (variables+functions)
12547 # * size of environment
12548 # * the length of arguments:
12549 # a one-char argument lowers the limit by 5
12550 # To be safe assume all arguments are one-char
12551 # The max_len is cached between runs, but if the size of
12552 # the environment is different we need to recompute the
12553 # usable max length for this run of GNU Parallel
12554 # See https://unix.stackexchange.com/a/604943/2972
12555 if(not $darwin_max_len) {
12556 my $envc = (keys %ENV);
12557 my $envn = length join"",(keys %ENV);
12558 my $envv = length join"",(values %ENV);
12559 $darwin_max_len = -146+($max_len - $envn - $envv) - $envc*10;
12561 "length: $darwin_max_len ".
12562 "3+($max_len - $envn - $envv)/5 - $envc*2");
12564 $max_len = $darwin_max_len;
12566 if($opt::cat or $opt::fifo) {
12567 # Get the empty arg added by --pipepart (if any)
12568 $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get();
12569 # $PARALLEL_TMP will point to a tempfile that will be used as {}
12570 $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->
12571 unget([Arg->new('"$PARALLEL_TMP"')]);
12573 while (not $self->{'arg_queue'}->empty()) {
12574 $next_arg = $self->{'arg_queue'}->get();
12575 if(not defined $next_arg) {
12578 $self->push($next_arg);
12579 if($self->len() >= $max_len) {
12580 # Command length is now > max_length
12581 # If there are arguments: remove the last
12582 # If there are no arguments: Error
12583 # TODO stuff about -x opt_x
12584 if($self->number_of_args() > 1) {
12585 # There is something to work on
12586 $self->{'arg_queue'}->unget($self->pop());
12589 my $args = join(" ", map { $_->orig() } @$next_arg);
12590 ::error("Command line too long (".
12591 $self->len(). " >= ".
12594 $self->{'arg_queue'}->arg_number().
12596 ((length $args > 50) ?
12597 (substr($args,0,50))."..." :
12599 $self->{'arg_queue'}->unget($self->pop());
12600 ::wait_and_exit(255);
12604 if(defined $self->{'max_number_of_args'}) {
12605 if($self->number_of_args() >= $self->{'max_number_of_args'}) {
12610 if(($opt::m or $opt::X) and not $already_spread
12611 and $self->{'arg_queue'}->empty() and $Global::max_jobs_running) {
12612 # -m or -X and EOF => Spread the arguments over all jobslots
12613 # (unless they are already spread)
12614 $already_spread ||= 1;
12615 if($self->number_of_args() > 1) {
12616 $self->{'max_number_of_args'} =
12617 ::ceil($self->number_of_args()/$Global::max_jobs_running);
12618 $Global::JobQueue->{'commandlinequeue'}->{'max_number_of_args'} =
12619 $self->{'max_number_of_args'};
12620 $self->{'arg_queue'}->unget($self->pop_all());
12621 while($self->number_of_args() < $self->{'max_number_of_args'}) {
12622 $self->push($self->{'arg_queue'}->get());
12625 $Global::JobQueue->flush_total_jobs();
12628 if($opt::sqlmaster) {
12629 # Insert the V1..Vn for this $seq in SQL table
12630 # instead of generating one
12631 $Global::sql->insert_records($self->seq(), $self->{'command'},
12632 $self->{'arg_list_flat_orig'});
12638 # Add one or more records as arguments
12641 my $record = shift;
12642 push @{$self->{'arg_list_flat_orig'}}, map { $_->orig() } @$record;
12643 push @{$self->{'arg_list_flat'}}, @$record;
12644 push @{$self->{'arg_list'}}, $record;
12645 # Make @arg available for {= =}
12646 *Arg::arg = $self->{'arg_list_flat_orig'};
12648 my $quote_arg = ($Global::quote_replace and not $Global::quoting);
12650 for my $perlexpr (keys %{$self->{'replacecount'}}) {
12651 if($perlexpr =~ /^(-?\d+)(?:\D.*|)$/) {
12652 # Positional replacement string
12653 # Deal with negative positional replacement string
12654 $col = ($1 < 0) ? $1 : $1-1;
12655 if(defined($record->[$col])) {
12656 $self->{'len'}{$perlexpr} +=
12657 length $record->[$col]->replace($perlexpr,$quote_arg,$self);
12660 for my $arg (@$record) {
12662 $self->{'len'}{$perlexpr} +=
12663 length $arg->replace($perlexpr,$quote_arg,$self);
12671 # Remove last argument
12675 my $record = pop @{$self->{'arg_list'}};
12676 # pop off arguments from @$record
12677 splice @{$self->{'arg_list_flat_orig'}}, -($#$record+1), $#$record+1;
12678 splice @{$self->{'arg_list_flat'}}, -($#$record+1), $#$record+1;
12679 my $quote_arg = ($Global::quote_replace and not $Global::quoting);
12680 for my $perlexpr (keys %{$self->{'replacecount'}}) {
12681 if($perlexpr =~ /^(\d+) /) {
12683 defined($record->[$1-1]) or next;
12684 $self->{'len'}{$perlexpr} -=
12685 length $record->[$1-1]->replace($perlexpr,$quote_arg,$self);
12687 for my $arg (@$record) {
12689 $self->{'len'}{$perlexpr} -=
12690 length $arg->replace($perlexpr,$quote_arg,$self);
12699 # Remove all arguments and zeros the length of replacement perlexpr
12703 my @popped = @{$self->{'arg_list'}};
12704 for my $perlexpr (keys %{$self->{'replacecount'}}) {
12705 $self->{'len'}{$perlexpr} = 0;
12707 $self->{'arg_list'} = [];
12708 $self->{'arg_list_flat_orig'} = [undef];
12709 $self->{'arg_list_flat'} = [];
12713 sub number_of_args($) {
12714 # The number of records
12716 # number of records
12718 # This is really the number of records
12719 return $#{$self->{'arg_list'}}+1;
12722 sub number_of_recargs($) {
12723 # The number of args in records
12725 # number of args records
12728 my $nrec = scalar @{$self->{'arg_list'}};
12730 $sum = $nrec * (scalar @{$self->{'arg_list'}[0]});
12735 sub args_as_string($) {
12737 # all unmodified arguments joined with ' ' (similar to {})
12739 return (join " ", map { $_->orig() }
12740 map { @$_ } @{$self->{'arg_list'}});
12743 sub results_out($) {
12744 sub max_file_name_length {
12745 # Figure out the max length of a subdir
12746 # TODO and the max total length
12747 # Ext4 = 255,130816
12749 # $Global::max_file_length is set
12751 # $Global::max_file_length
12752 my $testdir = shift;
12754 my $upper = 100_000_000;
12755 # Dir length of 8 chars is supported everywhere
12757 my $dir = "d"x$len;
12759 rmdir($testdir."/".$dir);
12762 } while ($len < $upper and mkdir $testdir."/".$dir);
12763 # Then search for the actual max length between $len/16 and $len
12766 while($max-$min > 5) {
12767 # If we are within 5 chars of the exact value:
12768 # it is not worth the extra time to find the exact value
12769 my $test = int(($min+$max)/2);
12771 if(mkdir $testdir."/".$dir) {
12772 rmdir($testdir."/".$dir);
12778 $Global::max_file_length = $min;
12783 my $out = $self->replace_placeholders([$opt::results],0,0);
12784 if($out eq $opt::results) {
12785 # $opt::results simple string: Append args_as_dirname
12786 my $args_as_dirname = $self->args_as_dirname(0);
12787 # Output in: prefix/name1/val1/name2/val2/stdout
12788 $out = $opt::results."/".$args_as_dirname;
12789 if(-d $out or eval{ File::Path::mkpath($out); }) {
12792 # mkpath failed: Argument too long or not quoted
12793 # Set $Global::max_file_length, which will keep the individual
12794 # dir names shorter than the max length
12795 max_file_name_length($opt::results);
12796 # Quote dirnames with +
12797 $args_as_dirname = $self->args_as_dirname(1);
12798 # prefix/name1/val1/name2/val2/
12799 $out = $opt::results."/".$args_as_dirname;
12800 File::Path::mkpath($out);
12804 if($out =~ m:/$:s) {
12806 if(-d $out or eval{ File::Path::mkpath($out); }) {
12809 ::error("Cannot make dir '$out'.");
12810 ::wait_and_exit(255);
12814 File::Path::mkpath($1);
12825 # test: '' . .. a. a.. + ++ 0..255 on fat12 ext4
12826 sub args_as_dirname($) {
12828 # all arguments joined with '/' (similar to {})
12829 # Chars that are not safe on all file systems are quoted.
12831 # ext4: / \t \n \0 \\ \r
12832 # fat: 0..31 " * / : < > ? \ | Maybe also: # [ ] ; = ,
12834 # Other FS: , [ ] { } ( ) ! ; " ' * ? < > |
12840 # \\ = +b (backslash)
12845 # " = +d (double quote)
12847 # * = +a (asterisk)
12848 # < = +l (less than)
12849 # > = +g (greater than)
12851 # ! = +x (eXclamation)
12854 # ; = +s (semicolon)
12857 # 1..32 128..255 = +XX (hex value)
12861 # Quote '' as +m (eMpty)
12893 # 1..32 128..255 = +XX (hex value)
12894 map { $map{sprintf "%c",$_} = sprintf "+%02x",$_ } 1..32, 128..255;
12895 # Default value = itself
12896 map { $map{sprintf "%c",$_} ||= sprintf "%c",$_ } 0..255;
12897 # Quote '' as +m (eMpty)
12898 $stringmap{""} = "+m";
12900 $stringmap{"."} = "+_";
12902 $stringmap{".."} = "+__";
12903 # Set dir separator
12904 eval 'use File::Spec; $sep = File::Spec->catfile("", "");';
12907 # If $Global::max_file_length: Keep subdirs < $Global::max_file_length
12911 if(not $sep) { init(); }
12913 for my $rec_ref (@{$self->{'arg_list'}}) {
12914 # If headers are used, sort by them.
12915 # Otherwise keep the order from the command line.
12916 my @header_indexes_sorted = header_indexes_sorted($#$rec_ref+1);
12917 for my $n (@header_indexes_sorted) {
12919 $Global::input_source_header{$n},
12922 grep { $_ ne "\0noarg" } map {
12925 $s =~ s/(.)/$map{$1}/gs;
12926 if($Global::max_file_length) {
12927 # Keep each subdir shorter than the longest
12928 # allowed file name
12929 $s = substr($s,0,$Global::max_file_length);
12932 $rec_ref->[$n-1]->orig()
12935 grep { $_ ne "\0noarg" } map {
12937 # Quote / as +z and + as ++
12938 $s =~ s/($sep|\+)/$map{$1}/gos;
12939 if($Global::max_file_length) {
12940 # Keep each subdir shorter than the longest
12941 # allowed file name
12942 $s = substr($s,0,$Global::max_file_length);
12945 $rec_ref->[$n-1]->orig()
12950 return join $sep, map { $stringmap{$_} || $_ } @res;
12954 sub header_indexes_sorted($) {
12955 # Sort headers first by number then by name.
12956 # E.g.: 1a 1b 11a 11b
12958 # Indexes of %Global::input_source_header sorted
12959 my $max_col = shift;
12961 no warnings 'numeric';
12962 for my $col (1 .. $max_col) {
12963 # Make sure the header is defined. If it is not: use column number
12964 if(not defined $Global::input_source_header{$col}) {
12965 $Global::input_source_header{$col} = $col;
12968 my @header_indexes_sorted = sort {
12969 # Sort headers numerically then asciibetically
12970 $Global::input_source_header{$a} <=> $Global::input_source_header{$b}
12972 $Global::input_source_header{$a} cmp $Global::input_source_header{$b}
12974 return @header_indexes_sorted;
12980 # The length of the command line with args substituted
12983 # Add length of the original command with no args
12984 # Length of command w/ all replacement args removed
12985 $len += $self->{'len'}{'noncontext'} + @{$self->{'command'}} -1;
12986 ::debug("length", "noncontext + command: $len\n");
12987 # MacOS has an overhead of 8 bytes per argument
12988 my $darwin = ($^O eq "darwin") ? 8 : 0;
12989 my $recargs = $self->number_of_recargs();
12990 if($self->{'context_replace'}) {
12991 # Context is duplicated for each arg
12992 $len += $recargs * $self->{'len'}{'context'};
12993 for my $replstring (keys %{$self->{'replacecount'}}) {
12994 # If the replacements string is more than once: mulitply its length
12995 $len += $self->{'len'}{$replstring} *
12996 $self->{'replacecount'}{$replstring};
12997 ::debug("length", $replstring, " ", $self->{'len'}{$replstring}, "*",
12998 $self->{'replacecount'}{$replstring}, "\n");
13000 # echo 11 22 33 44 55 66 77 88 99 1010
13001 # echo 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10
13003 ::debug("length", "Ctxgrp: ", $self->{'len'}{'contextgroups'},
13004 " Groups: ", $self->{'len'}{'noncontextgroups'}, "\n");
13005 # Add space between context groups
13006 $len += ($recargs-1) * ($self->{'len'}{'contextgroups'});
13008 $len += $recargs * $self->{'len'}{'contextgroups'} * $darwin;
13011 # Each replacement string may occur several times
13012 # Add the length for each time
13013 $len += 1*$self->{'len'}{'context'};
13014 ::debug("length", "context+noncontext + command: $len\n");
13015 for my $replstring (keys %{$self->{'replacecount'}}) {
13016 # (space between recargs + length of replacement)
13017 # * number this replacement is used
13018 $len += ($recargs -1 + $self->{'len'}{$replstring}) *
13019 $self->{'replacecount'}{$replstring};
13021 $len += ($recargs * $self->{'replacecount'}{$replstring}
13026 if(defined $Global::parallel_env) {
13027 # If we are using --env, add the prefix for that, too.
13028 $len += length $Global::parallel_env;
13030 if($Global::quoting) {
13031 # Pessimistic length if -q is set
13032 # Worse than worst case: ' => "'" + " => '"'
13033 # TODO can we count the number of expanding chars?
13034 # and count them in arguments, too?
13037 if(@opt::shellquote) {
13038 # Pessimistic length if --shellquote is set
13039 # Worse than worst case: ' => "'"
13040 for(@opt::shellquote) {
13045 if(@opt::sshlogin) {
13046 # Pessimistic length if remote
13047 # Worst case is BASE64 encoding 3 bytes -> 4 bytes
13048 $len = int($len*4/3);
13055 # $Global::quote_replace
13058 # $replaced = command with place holders replaced and prepended
13060 if(not defined $self->{'replaced'}) {
13061 # Don't quote arguments if the input is the full command line
13062 my $quote_arg = ($Global::quote_replace and not $Global::quoting);
13063 # or if ($opt::cat or $opt::pipe) as they use $PARALLEL_TMP
13064 $quote_arg = ($opt::cat || $opt::fifo) ? 0 : $quote_arg;
13065 $self->{'replaced'} = $self->
13066 replace_placeholders($self->{'command'},$Global::quoting,
13068 my $len = length $self->{'replaced'};
13069 if ($len != $self->len()) {
13070 ::debug("length", $len, " != ", $self->len(),
13071 " ", $self->{'replaced'}, "\n");
13073 ::debug("length", $len, " == ", $self->len(),
13074 " ", $self->{'replaced'}, "\n");
13077 return $self->{'replaced'};
13080 sub replace_placeholders($$$$) {
13081 # Replace foo{}bar with fooargbar
13083 # $targetref = command as shell words
13084 # $quote = should everything be quoted?
13085 # $quote_arg = should replaced arguments be quoted?
13087 # @Arg::arg = arguments as strings to be use in {= =}
13089 # @target with placeholders replaced
13091 my $targetref = shift;
13093 my $quote_arg = shift;
13096 # Token description:
13097 # \0spc = unquoted space
13098 # \0end = last token element
13099 # \0ign = dummy token to be ignored
13100 # \257<...\257> = replacement expression
13101 # " " = quoted space, that splits -X group
13102 # text = normal text - possibly part of -X group
13104 my @tokens = grep { length $_ > 0 } map {
13106 # \257<...\257> or space
13109 # Split each space/tab into a token
13110 split /(?=\s)|(?<=\s)/
13113 # Split \257< ... \257> into own token
13114 map { split /(?=\257<)|(?<=\257>)/ }
13115 # Insert "\0spc" between every element
13116 # This space should never be quoted
13117 map { $spacer++ ? ("\0spc",$_) : $_ }
13118 map { $_ eq "" ? "\0empty" : $_ }
13122 # @tokens is empty: Return empty array
13125 ::debug("replace", "Tokens ".join":",@tokens,"\n");
13126 # Make it possible to use $arg[2] in {= =}
13127 *Arg::arg = $self->{'arg_list_flat_orig'};
13129 # $self->{'arg_list'} = [ [Arg11, Arg12], [Arg21, Arg22], [Arg31, Arg32] ]
13130 # $self->{'arg_list_flat'} = [ Arg11, Arg12, Arg21, Arg22, Arg31, Arg32 ]
13131 if(not @{$self->{'arg_list_flat'}}) {
13132 @{$self->{'arg_list_flat'}} = Arg->new("");
13134 my $argref = $self->{'arg_list_flat'};
13135 # Number of arguments - used for positional arguments
13136 my $n = $#$argref+1;
13138 # $self is actually a CommandLine-object,
13139 # but it looks nice to be able to say {= $job->slot() =}
13141 # @replaced = tokens with \257< \257> replaced
13143 if($self->{'context_replace'}) {
13145 for my $t (@tokens,"\0end") {
13146 # \0end = last token was end of tokens.
13147 if($t eq "\t" or $t eq " " or $t eq "\0end" or $t eq "\0spc") {
13148 # Context group complete: Replace in it
13149 if(grep { /^\257</ } @ctxgroup) {
13150 # Context group contains a replacement string:
13151 # Copy once per arg
13152 my $space = "\0ign";
13153 for my $arg (@$argref) {
13154 my $normal_replace;
13156 # Put unquoted space before each context group
13158 CORE::push @replaced, $space, map {
13161 s{\257<(-?\d+)?(.*)\257>}
13164 # Positional replace
13165 # Find the relevant arg and replace it
13166 ($argref->[$1 > 0 ? $1-1 : $n+$1] ? # If defined: replace
13167 $argref->[$1 > 0 ? $1-1 : $n+$1]->
13168 replace($2,$quote_arg,$self)
13172 $normal_replace ||= 1;
13173 ($arg ? $arg->replace($2,$quote_arg,$self) : "");
13176 # Token is \257<..\257>
13178 if($Global::escape_string_present) {
13179 # Command line contains \257:
13180 # Unescape it \257\256 => \257
13181 $a =~ s/\257\256/\257/g;
13186 $normal_replace or last;
13190 # Context group has no a replacement string: Copy it once
13191 CORE::push @replaced, map {
13192 $Global::escape_string_present and s/\257\256/\257/g; $_;
13195 # New context group
13198 if($t eq "\0spc" or $t eq " ") {
13199 CORE::push @replaced,$t;
13201 CORE::push @ctxgroup,$t;
13208 # repquote = no if {} first on line, no if $quote, yes otherwise
13209 for my $t (@tokens) {
13210 if($t =~ /^\257</) {
13211 my $space = "\0ign";
13212 for my $arg (@$argref) {
13213 my $normal_replace;
13216 s{\257<(-?\d+)?(.*)\257>}
13219 # Positional replace
13220 # Find the relevant arg and replace it
13221 ($argref->[$1 > 0 ? $1-1 : $n+$1] ?
13222 # If defined: replace
13223 $argref->[$1 > 0 ? $1-1 : $n+$1]->
13224 replace($2,$quote_arg,$self)
13228 $normal_replace ||= 1;
13229 ($arg ? $arg->replace($2,$quote_arg,$self) : "");
13232 CORE::push @replaced, $space, $a;
13233 $normal_replace or last;
13238 CORE::push @replaced, map {
13239 $Global::escape_string_present and s/\257\256/\257/g; $_;
13245 ::debug("replace","Replaced: ".join":",@replaced,"\n");
13247 # Put tokens into groups that may be quoted.
13250 for (map { $_ eq "\0empty" ? "" : $_ }
13251 grep { $_ ne "\0ign" and $_ ne "\0noarg" and $_ ne "'\0noarg'" }
13252 @replaced, "\0end") {
13253 if($_ eq "\0spc" or $_ eq "\0end") {
13254 # \0spc splits quotable groups
13257 CORE::push @quoted, ::Q(join"",@quotegroup);;
13260 CORE::push @quoted, join"",@quotegroup;
13264 CORE::push @quotegroup, $_;
13267 ::debug("replace","Quoted: ".join":",@quoted,"\n");
13268 return wantarray ? @quoted : "@quoted";
13274 $self->{'skip'} = 1;
13278 package CommandLineQueue;
13282 my $commandref = shift;
13283 my $read_from = shift;
13284 my $context_replace = shift || 0;
13285 my $max_number_of_args = shift;
13286 my $transfer_files = shift;
13287 my $return_files = shift;
13288 my $template_names = shift;
13289 my $template_contents = shift;
13292 my ($replacecount_ref, $len_ref);
13293 my @command = @$commandref;
13295 # Replace replacement strings with {= perl expr =}
13296 # '{=' 'perlexpr' '=}' => '{= perlexpr =}'
13297 @command = merge_rpl_parts(@command);
13299 # Protect matching inside {= perl expr =}
13300 # by replacing {= and =} with \257< and \257>
13301 # in options that can contain replacement strings:
13302 # @command, --transferfile, --return,
13303 # --tagstring, --workdir, --results
13304 for(@command, @$transfer_files, @$return_files,
13305 @$template_names, @$template_contents,
13306 $opt::tagstring, $opt::workdir, $opt::results, $opt::retries,
13308 # Skip if undefined
13309 defined($_) or next;
13310 # Escape \257 => \257\256
13311 $Global::escape_string_present += s/\257/\257\256/g;
13312 # Needs to match rightmost left parens (Perl defaults to leftmost)
13313 # to deal with: {={==} and {={==}=}
13314 # Replace {= -> \257< and =} -> \257>
13316 # Complex way to do:
13317 # s/{=(.*)=}/\257<$1\257>/g
13318 # which would not work
13319 s[\Q$Global::parensleft\E # Match {=
13320 # Match . unless the next string is {= or =}
13321 # needed to force matching the shortest {= =}
13322 ((?:(?! \Q$Global::parensleft\E|\Q$Global::parensright\E ).)*?)
13323 \Q$Global::parensright\E ] # Match =}
13325 for my $rpl (sort { length $b <=> length $a } keys %Global::rpl) {
13326 # Replace long --rpl's before short ones, as a short may be a
13327 # substring of a long:
13328 # --rpl '% s/a/b/' --rpl '%% s/b/a/'
13330 # Replace the shorthand string (--rpl)
13331 # with the {= perl expr =}
13333 # Avoid searching for shorthand strings inside existing {= perl expr =}
13335 # Replace $$1 in {= perl expr =} with groupings in shorthand string
13337 # --rpl '{/(\.\S+)/(\.\S+)} s/$$1/$$2/g;'
13338 # echo {/.tar/.gz} ::: UU.tar.gz
13339 my ($prefix,$grp_regexp,$postfix) =
13340 $rpl =~ /^( [^(]* ) # Prefix - e.g. {%%
13341 ( \(.*\) )? # Group capture regexp - e.g (.*)
13342 ( [^)]* )$ # Postfix - e.g }
13344 $grp_regexp ||= '';
13345 my $rplval = $Global::rpl{$rpl};
13346 while(s{( (?: ^|\257> ) (?: [^\257]*|[\257][^<>] )*? )
13347 # Don't replace after \257 unless \257>
13348 \Q$prefix\E $grp_regexp \Q$postfix\E}
13350 # The start remains the same
13351 my $unchanged = $1;
13352 # Dummy entry to start at 1.
13354 # $2 = first ()-group in $grp_regexp
13355 # Put $2 in $grp[1], Put $3 in $grp[2]
13356 # so first ()-group in $grp_regexp is $grp[1];
13357 for(my $i = 2; defined $grp[$#grp]; $i++) {
13358 push @grp, eval '$'.$i;
13361 # replace $$1 with $_pAr_gRp1, $$2 with $_pAr_gRp2
13362 # in the code to be executed
13363 $rv =~ s/\$\$ (\d+)/\$_pAr_gRp$1/gx;
13364 # prepend with $_pAr_gRp1 = perlquote($1),
13366 for(my $i = 1;defined $grp[$i]; $i++) {
13367 $set_args .= "\$_pAr_gRp$i = \"" .
13368 ::perl_quote_scalar($grp[$i]) . "\";";
13370 $unchanged . "\257<" . $set_args . $rv . "\257>"
13373 # Do the same for the positional replacement strings
13375 if($posrpl =~ s/^\{//) {
13376 # Only do this if the shorthand start with {
13378 # Don't replace after \257 unless \257>
13379 while(s{( (?: ^|\257> ) (?: [^\257]*|[\257][^<>] )*? )
13380 \{(-?\d+) \s* \Q$prefix\E $grp_regexp \Q$postfix\E}
13382 # The start remains the same
13383 my $unchanged = $1;
13385 # Dummy entry to start at 1.
13387 # $3 = first ()-group in $grp_regexp
13388 # Put $3 in $grp[1], Put $4 in $grp[2]
13389 # so first ()-group in $grp_regexp is $grp[1];
13390 for(my $i = 3; defined $grp[$#grp]; $i++) {
13391 push @grp, eval '$'.$i;
13394 # replace $$1 with $_pAr_gRp1, $$2 with $_pAr_gRp2
13395 # in the code to be executed
13396 $rv =~ s/\$\$ (\d+)/\$_pAr_gRp$1/gx;
13397 # prepend with $_pAr_gRp1 = perlquote($1),
13399 for(my $i = 1;defined $grp[$i]; $i++) {
13400 $set_args .= "\$_pAr_gRp$i = \"" .
13401 ::perl_quote_scalar($grp[$i]) . "\";";
13403 $unchanged . "\257<" . $position . $set_args . $rv . "\257>"
13409 # Add {} if no replacement strings in @command
13410 ($replacecount_ref, $len_ref, @command) =
13411 replacement_counts_and_lengths($transfer_files, $return_files,
13412 $template_names, $template_contents,
13414 if("@command" =~ /^[^ \t\n=]*\257</) {
13415 # Replacement string is (part of) the command (and not just
13416 # argument or variable definition V1={})
13417 # E.g. parallel {}, parallel my_{= s/_//=}, parallel {2}
13418 # Do no quote (Otherwise it will fail if the input contains spaces)
13419 $Global::quote_replace = 0;
13422 if($opt::sqlmaster and $Global::sql->append()) {
13423 $seq = $Global::sql->max_seq() + 1;
13427 ('unget' => \@unget,
13428 'command' => \@command,
13429 'replacecount' => $replacecount_ref,
13430 'arg_queue' => RecordQueue->new($read_from,$opt::colsep),
13431 'context_replace' => $context_replace,
13433 'max_number_of_args' => $max_number_of_args,
13435 'transfer_files' => $transfer_files,
13436 'return_files' => $return_files,
13437 'template_names' => $template_names,
13438 'template_contents' => $template_contents,
13441 }, ref($class) || $class;
13444 sub merge_rpl_parts($) {
13445 # '{=' 'perlexpr' '=}' => '{= perlexpr =}'
13447 # @in = the @command as given by the user
13449 # $Global::parensleft
13450 # $Global::parensright
13452 # @command with parts merged to keep {= and =} as one
13455 my $l = quotemeta($Global::parensleft);
13456 my $r = quotemeta($Global::parensright);
13461 # Remove matching (right most) parens
13462 while(s/(.*)$l.*?$r/$1/os) {}
13464 # Missing right parens
13466 $s .= " ".shift @in;
13468 while(s/(.*)$l.*?$r/$1/os) {}
13479 sub replacement_counts_and_lengths($$@) {
13480 # Count the number of different replacement strings.
13481 # Find the lengths of context for context groups and non-context
13483 # If no {} found in @command: add it to @command
13486 # \@transfer_files = array of filenames to transfer
13487 # \@return_files = array of filenames to return
13488 # \@template_names = array of names to copy to
13489 # \@template_contents = array of contents to write
13490 # @command = command template
13492 # \%replacecount, \%len, @command
13493 my $transfer_files = shift;
13494 my $return_files = shift;
13495 my $template_names = shift;
13496 my $template_contents = shift;
13498 my (%replacecount,%len);
13501 # Count how many times each replacement string is used
13502 my @cmd = @command;
13503 my $contextlen = 0;
13504 my $noncontextlen = 0;
13505 my $contextgroups = 0;
13507 while($c =~ s/ \257<( (?: [^\257]*|[\257][^<>] )*?)\257> /\000/xs) {
13508 # %replacecount = { "perlexpr" => number of times seen }
13509 # e.g { "s/a/b/" => 2 }
13510 $replacecount{$1}++;
13513 # Measure the length of the context around the {= perl expr =}
13514 # Use that {=...=} has been replaced with \000 above
13515 # So there is no need to deal with \257<
13516 while($c =~ s/ (\S*\000\S*) //xs) {
13518 $w =~ tr/\000//d; # Remove all \000's
13519 $contextlen += length($w);
13522 # All {= perl expr =} have been removed: The rest is non-context
13523 $noncontextlen += length $c;
13525 for(@$transfer_files, @$return_files,
13526 @$template_names, @$template_contents,
13528 $opt::tagstring, $opt::workdir, $opt::results, $opt::retries) {
13529 # Options that can contain replacement strings
13530 defined($_) or next;
13532 while($t =~ s/ \257<( (?: [^\257]*|[\257][^<>] )* )\257> //xs) {
13533 # %replacecount = { "perlexpr" => number of times seen }
13534 # e.g { "$_++" => 2 }
13535 # But for tagstring we just need to mark it as seen
13536 $replacecount{$1} ||= 1;
13540 # If the command does not contain {} force it to be computed
13541 # as it is being used by --bar
13542 $replacecount{""} ||= 1;
13545 $len{'context'} = 0+$contextlen;
13546 $len{'noncontext'} = $noncontextlen;
13547 $len{'contextgroups'} = $contextgroups;
13548 $len{'noncontextgroups'} = @cmd-$contextgroups;
13549 ::debug("length", "@command Context: ", $len{'context'},
13550 " Non: ", $len{'noncontext'}, " Ctxgrp: ", $len{'contextgroups'},
13551 " NonCtxGrp: ", $len{'noncontextgroups'}, "\n");
13554 # Default command = {}
13555 @command = ("\257<\257>");
13556 } elsif(($opt::pipe or $opt::pipepart)
13557 and not $opt::fifo and not $opt::cat) {
13558 # With --pipe / --pipe-part you can have no replacement
13561 # Append {} to the command if there are no {...}'s and no {=...=}
13562 push @command, ("\257<\257>");
13566 return(\%replacecount,\%len,@command);
13571 if(@{$self->{'unget'}}) {
13572 my $cmd_line = shift @{$self->{'unget'}};
13573 return ($cmd_line);
13575 if($opt::sqlworker) {
13576 # Get the sequence number from the SQL table
13577 $self->set_seq($SQL::next_seq);
13578 # Get the command from the SQL table
13579 $self->{'command'} = $SQL::command_ref;
13581 # Recompute replace counts based on the read command
13582 ($self->{'replacecount'},
13583 $self->{'len'}, @command) =
13584 replacement_counts_and_lengths($self->{'transfer_files'},
13585 $self->{'return_files'},
13586 $self->{'template_name'},
13587 $self->{'template_contents'},
13588 @$SQL::command_ref);
13589 if("@command" =~ /^[^ \t\n=]*\257</) {
13590 # Replacement string is (part of) the command (and not just
13591 # argument or variable definition V1={})
13592 # E.g. parallel {}, parallel my_{= s/_//=}, parallel {2}
13593 # Do no quote (Otherwise it will fail if the input contains spaces)
13594 $Global::quote_replace = 0;
13598 my $cmd_line = CommandLine->new($self->seq(),
13599 $self->{'command'},
13600 $self->{'arg_queue'},
13601 $self->{'context_replace'},
13602 $self->{'max_number_of_args'},
13603 $self->{'transfer_files'},
13604 $self->{'return_files'},
13605 $self->{'template_names'},
13606 $self->{'template_contents'},
13607 $self->{'replacecount'},
13610 $cmd_line->populate();
13611 ::debug("run","cmd_line->number_of_args ",
13612 $cmd_line->number_of_args(), "\n");
13613 if(not $Global::no_more_input and ($opt::pipe or $opt::pipepart)) {
13614 if($cmd_line->replaced() eq "") {
13615 # Empty command - pipe requires a command
13616 ::error("--pipe/--pipepart must have a command to pipe into ".
13618 ::wait_and_exit(255);
13620 } elsif($cmd_line->number_of_args() == 0) {
13621 # We did not get more args - maybe at EOF string?
13624 $self->set_seq($self->seq()+1);
13631 unshift @{$self->{'unget'}}, @_;
13636 my $empty = (not @{$self->{'unget'}}) &&
13637 $self->{'arg_queue'}->empty();
13638 ::debug("run", "CommandLineQueue->empty $empty");
13644 return $self->{'seq'};
13649 $self->{'seq'} = shift;
13652 sub quote_args($) {
13654 # If there is not command emulate |bash
13655 return $self->{'command'};
13659 package Limits::Command;
13661 # Maximal command line length (for -m and -X)
13662 sub max_length($) {
13663 # Find the max_length of a command line and cache it
13665 # number of chars on the longest command line allowed
13666 if(not $Limits::Command::line_max_len) {
13667 # Disk cache of max command line length
13668 my $len_cache = $Global::cache_dir . "/tmp/sshlogin/" . ::hostname() .
13672 if(open(my $fh, "<", $len_cache)) {
13673 $cached_limit = <$fh>;
13674 $cached_limit || ::warning("Invalid content in $len_cache");
13677 if(not $cached_limit) {
13678 $cached_limit = real_max_length();
13679 # If $HOME is write protected: Do not fail
13680 my $dir = ::dirname($len_cache);
13681 -d $dir or eval { File::Path::mkpath($dir); };
13682 open(my $fh, ">", $len_cache.$$);
13683 print $fh $cached_limit;
13685 rename $len_cache.$$, $len_cache || ::die_bug("rename cache file");
13687 $Limits::Command::line_max_len = tmux_length($cached_limit);
13689 return int($Limits::Command::line_max_len);
13692 sub real_max_length() {
13693 # Find the max_length of a command line
13695 # The maximal command line length with 1 byte arguments
13696 # return find_max(" c");
13697 return find_max("c");
13701 my $string = shift;
13702 # This is slow on Cygwin, so give Cygwin users a warning
13703 if($^O eq "cygwin" or $^O eq "msys") {
13704 ::warning("Finding the maximal command line length. ".
13705 "This may take up to 1 minute.")
13707 # Use an upper bound of 100 MB if the shell allows for infinite
13709 my $upper = 100_000_000;
13711 # 1000 is supported everywhere, so the search can start anywhere 1..999
13712 # 324 makes the search much faster on Cygwin, so let us use that
13715 if($len > $upper) { return $len };
13718 ::debug("init", "Maxlen: $lower<$len<$upper(".($upper-$lower)."): ");
13719 } while (is_acceptable_command_line_length($len,$string));
13720 # Then search for the actual max length between
13721 # last successful length ($len/16) and upper bound
13722 return binary_find_max(int($len/16),$len,$string);
13726 # Prototype forwarding
13727 sub binary_find_max($$$);
13728 sub binary_find_max($$$) {
13729 # Given a lower and upper bound find the max (length or args) of a
13732 # number of chars on the longest command line allowed
13733 my ($lower, $upper, $string) = (@_);
13734 if($lower == $upper
13735 or $lower == $upper-1
13736 or $lower/$upper > 0.99) {
13737 # $lower is +- 1 or within 1%: Don't search more
13740 # Unevenly split binary search which is faster for Microsoft Windows.
13741 # Guessing too high is cheap. Guessing too low is expensive.
13742 my $split = ($^O eq "cygwin" or $^O eq "msys") ? 0.93 : 0.5;
13743 my $middle = int (($upper-$lower)*$split + $lower);
13744 ::debug("init", "Maxlen: $lower<$middle<$upper(".($upper-$lower)."): ");
13745 if (is_acceptable_command_line_length($middle,$string)) {
13746 return binary_find_max($middle,$upper,$string);
13748 return binary_find_max($lower,$middle,$string);
13755 sub is_acceptable_command_line_length($$) {
13756 # Test if a command line of this length can run
13757 # in the current environment
13758 # If the string is " x" it tests how many args are allowed
13760 # 0 if the command line length is too long
13763 my $string = shift;
13764 if($Global::parallel_env) {
13765 $len += length $Global::parallel_env;
13767 # Force using non-built-in command
13768 $prg ||= ::which("echo");
13769 my $l = length ::qqx("$prg ".${string}x(($len-1-length $prg)/length $string));
13771 # The command returned OK, but did not output $len chars
13772 # => this failed (Centos3 does this craziness)
13775 ::debug("init", "$len=$?\n");
13780 sub tmux_length($) {
13781 # If $opt::tmux set, find the limit for tmux
13782 # tmux 1.8 has a 2kB limit
13783 # tmux 1.9 has a 16kB limit
13784 # tmux 2.0 has a 16kB limit
13785 # tmux 2.1 has a 16kB limit
13786 # tmux 2.2 has a 16kB limit
13788 # $len = maximal command line length
13790 # $tmux_len = maximal length runable in tmux
13794 $ENV{'PARALLEL_TMUX'} ||= "tmux";
13795 if(not ::which($ENV{'PARALLEL_TMUX'})) {
13796 ::error($ENV{'PARALLEL_TMUX'}." not found in \$PATH.");
13797 ::wait_and_exit(255);
13800 for my $l (1, 2020, 16320, 30000, $len) {
13801 my $tmpfile = ::tmpname("tms");
13802 my $qtmp = ::Q($tmpfile);
13803 my $tmuxcmd = $ENV{'PARALLEL_TMUX'}.
13804 " -S $qtmp new-session -d -n echo $l".
13805 ("t"x$l). " && echo $l; rm -f $qtmp";
13806 push @out, ::qqx($tmuxcmd);
13809 ::debug("tmux","tmux-out ",@out);
13811 # The arguments is given 3 times on the command line
13812 # and the tmux wrapping is around 30 chars
13813 # (29 for tmux1.9, 33 for tmux1.8)
13814 my $tmux_len = ::max(@out);
13815 $len = ::min($len,int($tmux_len/4-33));
13816 ::debug("tmux","tmux-length ",$len);
13822 package RecordQueue;
13827 my $colsep = shift;
13830 if($opt::sqlworker) {
13832 $arg_sub_queue = SQLRecordQueue->new();
13833 } elsif(defined $colsep) {
13834 # Open one file with colsep or CSV
13835 $arg_sub_queue = RecordColQueue->new($fhs);
13837 # Open one or more files if multiple -a
13838 $arg_sub_queue = MultifileQueue->new($fhs);
13841 'unget' => \@unget,
13843 'arg_sub_queue' => $arg_sub_queue,
13844 }, ref($class) || $class;
13849 # reference to array of Arg-objects
13851 if(@{$self->{'unget'}}) {
13852 $self->{'arg_number'}++;
13853 # Flush cached computed replacements in Arg-objects
13854 # To fix: parallel --bar echo {%} ::: a b c ::: d e f
13855 my $ret = shift @{$self->{'unget'}};
13857 map { $_->flush_cache() } @$ret;
13861 my $ret = $self->{'arg_sub_queue'}->get();
13863 if(grep { index($_->orig(),"\0") > 0 } @$ret) {
13864 # Allow for \0 in position 0 because GNU Parallel uses "\0noarg"
13865 # to mean no-string
13866 ::warning("A NUL character in the input was replaced with \\0.",
13867 "NUL cannot be passed through in the argument list.",
13868 "Did you mean to use the --null option?");
13869 for(grep { index($_->orig(),"\0") > 0 } @$ret) {
13870 # Replace \0 with \\0
13871 my $a = $_->orig();
13876 if(defined $Global::max_number_of_args
13877 and $Global::max_number_of_args == 0) {
13878 ::debug("run", "Read 1 but return 0 args\n");
13879 # \0noarg => nothing (not the empty string)
13880 map { $_->set_orig("\0noarg"); } @$ret;
13882 # Flush cached computed replacements in Arg-objects
13883 # To fix: parallel --bar echo {%} ::: a b c ::: d e f
13884 map { $_->flush_cache() } @$ret;
13891 ::debug("run", "RecordQueue-unget\n");
13892 $self->{'arg_number'} -= @_;
13893 unshift @{$self->{'unget'}}, @_;
13898 my $empty = (not @{$self->{'unget'}}) &&
13899 $self->{'arg_sub_queue'}->empty();
13900 ::debug("run", "RecordQueue->empty $empty");
13904 sub flush_cache($) {
13906 for my $record (@{$self->{'unget'}}) {
13907 for my $arg (@$record) {
13908 $arg->flush_cache();
13911 $self->{'arg_sub_queue'}->flush_cache();
13914 sub arg_number($) {
13916 return $self->{'arg_number'};
13920 package RecordColQueue;
13926 my $arg_sub_queue = MultifileQueue->new($fhs);
13928 'unget' => \@unget,
13929 'arg_sub_queue' => $arg_sub_queue,
13930 }, ref($class) || $class;
13935 # reference to array of Arg-objects
13937 if(@{$self->{'unget'}}) {
13938 return shift @{$self->{'unget'}};
13940 if($self->{'arg_sub_queue'}->empty()) {
13943 my $in_record = $self->{'arg_sub_queue'}->get();
13944 if(defined $in_record) {
13945 my @out_record = ();
13946 for my $arg (@$in_record) {
13947 ::debug("run", "RecordColQueue::arg $arg\n");
13948 my $line = $arg->orig();
13949 ::debug("run", "line='$line'\n");
13952 # Parse CSV and put it into a record
13954 if(not $Global::csv->parse($line)) {
13955 die "CSV has unexpected format: ^$line^";
13957 for($Global::csv->fields()) {
13958 push @out_record, Arg->new($_);
13961 # Split --colsep into record
13962 for my $s (split /$opt::colsep/o, $line, -1) {
13963 push @out_record, Arg->new($s);
13967 push @out_record, Arg->new("");
13970 return \@out_record;
13978 ::debug("run", "RecordColQueue-unget '@_'\n");
13979 unshift @{$self->{'unget'}}, @_;
13984 my $empty = (not @{$self->{'unget'}}) &&
13985 $self->{'arg_sub_queue'}->empty();
13986 ::debug("run", "RecordColQueue->empty $empty");
13990 sub flush_cache($) {
13992 for my $arg (@{$self->{'unget'}}) {
13993 $arg->flush_cache();
13995 $self->{'arg_sub_queue'}->flush_cache();
13999 package SQLRecordQueue;
14005 'unget' => \@unget,
14006 }, ref($class) || $class;
14011 # reference to array of Arg-objects
14013 if(@{$self->{'unget'}}) {
14014 return shift @{$self->{'unget'}};
14016 return $Global::sql->get_record();
14021 ::debug("run", "SQLRecordQueue-unget '@_'\n");
14022 unshift @{$self->{'unget'}}, @_;
14027 if(@{$self->{'unget'}}) { return 0; }
14028 my $get = $self->get();
14030 $self->unget($get);
14032 my $empty = not $get;
14033 ::debug("run", "SQLRecordQueue->empty $empty");
14037 sub flush_cache($) {
14039 for my $record (@{$self->{'unget'}}) {
14040 for my $arg (@$record) {
14041 $arg->flush_cache();
14047 package MultifileQueue;
14049 @Global::unget_argv=();
14054 for my $fh (@$fhs) {
14055 if(-t $fh and -t ($Global::status_fd || *STDERR)) {
14057 "Input is read from the terminal. You are either an expert",
14058 "(in which case: YOU ARE AWESOME!) or maybe you forgot",
14059 "::: or :::: or -a or to pipe data into parallel. If so",
14060 "consider going through the tutorial: man parallel_tutorial",
14061 "Press CTRL-D to exit.");
14065 'unget' => \@Global::unget_argv,
14067 'arg_matrix' => undef,
14068 }, ref($class) || $class;
14074 return $self->link_get();
14076 return $self->nest_get();
14082 ::debug("run", "MultifileQueue-unget '@_'\n");
14083 unshift @{$self->{'unget'}}, @_;
14088 my $empty = (not @Global::unget_argv) &&
14089 not @{$self->{'unget'}};
14090 for my $fh (@{$self->{'fhs'}}) {
14091 $empty &&= eof($fh);
14093 ::debug("run", "MultifileQueue->empty $empty ");
14097 sub flush_cache($) {
14099 for my $record (@{$self->{'unget'}}, @{$self->{'arg_matrix'}}) {
14100 for my $arg (@$record) {
14101 $arg->flush_cache();
14108 if(@{$self->{'unget'}}) {
14109 return shift @{$self->{'unget'}};
14114 for my $i (0..$#{$self->{'fhs'}}) {
14115 my $fh = $self->{'fhs'}[$i];
14116 my $arg = read_arg_from_fh($fh);
14118 # Record $arg for recycling at end of file
14119 push @{$self->{'arg_matrix'}[$i]}, $arg;
14120 push @record, $arg;
14123 ::debug("run", "EOA ");
14124 # End of file: Recycle arguments
14125 push @{$self->{'arg_matrix'}[$i]}, shift @{$self->{'arg_matrix'}[$i]};
14126 # return last @{$args->{'args'}{$fh}};
14127 push @record, @{$self->{'arg_matrix'}[$i]}[-1];
14139 if(@{$self->{'unget'}}) {
14140 return shift @{$self->{'unget'}};
14145 my $no_of_inputsources = $#{$self->{'fhs'}} + 1;
14146 if(not $self->{'arg_matrix'}) {
14147 # Initialize @arg_matrix with one arg from each file
14148 # read one line from each file
14151 for (my $fhno = 0; $fhno < $no_of_inputsources ; $fhno++) {
14152 my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]);
14156 $self->{'arg_matrix'}[$fhno][0] = $arg || Arg->new("");
14157 push @first_arg_set, $self->{'arg_matrix'}[$fhno][0];
14160 # All filehandles were at eof or eof-string
14163 return [@first_arg_set];
14166 # Treat the case with one input source special. For multiple
14167 # input sources we need to remember all previously read values to
14168 # generate all combinations. But for one input source we can
14169 # forget the value after first use.
14170 if($no_of_inputsources == 1) {
14171 my $arg = read_arg_from_fh($self->{'fhs'}[0]);
14172 if(defined($arg)) {
14177 for (my $fhno = $no_of_inputsources - 1; $fhno >= 0; $fhno--) {
14178 if(eof($self->{'fhs'}[$fhno])) {
14182 my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]);
14183 defined($arg) || next; # If we just read an EOF string: Treat this as EOF
14184 my $len = $#{$self->{'arg_matrix'}[$fhno]} + 1;
14185 $self->{'arg_matrix'}[$fhno][$len] = $arg;
14186 # make all new combinations
14188 for (my $fhn = 0; $fhn < $no_of_inputsources; $fhn++) {
14189 push(@combarg, [0, $#{$self->{'arg_matrix'}[$fhn]}],
14190 # Is input source --link'ed to the next?
14191 $opt::linkinputsource[$fhn+1]);
14193 # Find only combinations with this new entry
14194 $combarg[2*$fhno] = [$len,$len];
14196 # [ 1, 3, 7 ], [ 2, 4, 1 ]
14198 # [ m[0][1], m[1][3], m[2][7] ], [ m[0][2], m[1][4], m[2][1] ]
14200 for my $c (expand_combinations(@combarg)) {
14202 for my $n (0 .. $no_of_inputsources - 1 ) {
14203 push @a, $self->{'arg_matrix'}[$n][$$c[$n]];
14207 # append the mapped to the ungotten arguments
14208 push @{$self->{'unget'}}, @mapped;
14211 return shift @{$self->{'unget'}};
14215 # all are eof or at EOF string; return from the unget queue
14216 return shift @{$self->{'unget'}};
14222 my $dos_crnl_determined;
14223 sub read_arg_from_fh($) {
14224 # Read one Arg from filehandle
14226 # Arg-object with one read line
14227 # undef if end of file
14231 my $half_record = 0;
14233 # This makes 10% faster
14234 if(not defined ($arg = <$fh>)) {
14235 if(defined $prepend) {
14236 return Arg->new($prepend);
14241 if(not $dos_crnl_determined and not defined $opt::d) {
14242 # Warn if input has CR-NL and -d is not set
14243 if($arg =~ /\r$/) {
14248 if($cr_count == 3 or $nl_count == 3) {
14249 $dos_crnl_determined = 1;
14250 if($nl_count == 0 and $cr_count == 3) {
14251 ::warning('The first three values end in CR-NL. '.
14252 'Consider using -d "\r\n"');
14257 # We need to read a full CSV line.
14258 if(($arg =~ y/"/"/) % 2 ) {
14259 # The number of " on the line is uneven:
14260 # If we were in a half_record => we have a full record now
14261 # If we were outside a half_record =>
14262 # we are in a half record now
14263 $half_record = not $half_record;
14266 # CSV half-record with quoting:
14267 # col1,"col2 2""x3"" board newline <-this one
14272 # Now we have a full CSV record
14277 if($Global::end_of_file_string and
14278 $arg eq $Global::end_of_file_string) {
14279 # Ignore the rest of input file
14281 ::debug("run", "EOF-string ($arg) met\n");
14282 if(defined $prepend) {
14283 return Arg->new($prepend);
14288 if(defined $prepend) {
14289 $arg = $prepend.$arg; # For line continuation
14292 if($Global::ignore_empty) {
14293 if($arg =~ /^\s*$/) {
14294 redo; # Try the next line
14297 if($Global::max_lines) {
14298 if($arg =~ /\s$/) {
14299 # Trailing space => continued on next line
14304 }} while (1 == 0); # Dummy loop {{}} for redo
14306 return Arg->new($arg);
14308 ::die_bug("multiread arg undefined");
14313 # Prototype forwarding
14314 sub expand_combinations(@);
14315 sub expand_combinations(@) {
14317 # ([xmin,xmax], [ymin,ymax], ...)
14318 # Returns: ([x,y,...],[x,y,...])
14319 # where xmin <= x <= xmax and ymin <= y <= ymax
14320 my $minmax_ref = shift;
14321 my $link = shift; # This is linked to the next input source
14322 my $xmin = $$minmax_ref[0];
14323 my $xmax = $$minmax_ref[1];
14326 my @rest = expand_combinations(@_);
14328 # Linked to next col with --link/:::+/::::+
14329 # TODO BUG does not wrap values if not same number of vals
14330 push(@p, map { [$$_[0], @$_] }
14331 grep { $xmin <= $$_[0] and $$_[0] <= $xmax } @rest);
14333 # If there are more columns: Compute those recursively
14334 for(my $x = $xmin; $x <= $xmax; $x++) {
14335 push @p, map { [$x, @$_] } @rest;
14339 for(my $x = $xmin; $x <= $xmax; $x++) {
14353 if($opt::hostgroups) {
14354 if($orig =~ s:@(.+)::) {
14355 # We found hostgroups on the arg
14356 @hostgroups = split(/\+|,/, $1);
14357 if(not grep { defined $Global::hostgroups{$_} } @hostgroups) {
14358 # This hostgroup is not defined using -S
14360 ::warning("Adding hostgroups: @hostgroups");
14362 for(grep { not defined $Global::hostgroups{$_} } @hostgroups) {
14363 my $sshlogin = SSHLogin->new($_);
14364 my $sshlogin_string = $sshlogin->string();
14365 $Global::host{$sshlogin_string} = $sshlogin;
14366 $Global::hostgroups{$sshlogin_string} = 1;
14370 # No hostgroup on the arg => any hostgroup
14371 @hostgroups = (keys %Global::hostgroups);
14376 'hostgroups' => \@hostgroups,
14377 }, ref($class) || $class;
14381 # Q alias for ::shell_quote_scalar
14382 my $ret = ::Q($_[0]);
14383 no warnings 'redefine';
14389 # pQ alias for ::perl_quote_scalar
14390 my $ret = ::pQ($_[0]);
14391 no warnings 'redefine';
14397 $Global::use{"DBI"} ||= eval "use B; 1;";
14402 return $Global::JobQueue->total_jobs();
14409 # shorthand for $job->skip();
14413 # shorthand for $job->slot();
14417 # shorthand for $job->seq();
14421 # Do not quote this arg
14422 $Global::unquote_arg = 1;
14424 sub yyyy_mm_dd_hh_mm_ss(@) {
14425 # ISO8601 2038-01-19T03:14:08
14426 ::strftime("%Y-%m-%dT%H:%M:%S", localtime(shift || time()));
14428 sub yyyy_mm_dd_hh_mm(@) {
14429 # ISO8601 2038-01-19T03:14
14430 ::strftime("%Y-%m-%dT%H:%M", localtime(shift || time()));
14432 sub yyyy_mm_dd(@) {
14433 # ISO8601 2038-01-19
14434 ::strftime("%Y-%m-%d", localtime(shift || time()));
14438 ::strftime("%H:%M:%S", localtime(shift || time()));
14442 ::strftime("%H:%M", localtime(shift || time()));
14444 sub yyyymmddhhmmss(@) {
14445 # ISO8601 20380119 + ISO8601 031408
14446 ::strftime("%Y%m%d%H%M%S", localtime(shift || time()));
14448 sub yyyymmddhhmm(@) {
14449 # ISO8601 20380119 + ISO8601 0314
14450 ::strftime("%Y%m%d%H%M", localtime(shift || time()));
14454 ::strftime("%Y%m%d", localtime(shift || time()));
14458 ::strftime("%H%M%S", localtime(shift || time()));
14462 ::strftime("%H%M", localtime(shift || time()));
14465 sub replace($$$$) {
14466 # Calculates the corresponding value for a given perl expression
14468 # The calculated string (quoted if asked for)
14470 my $perlexpr = shift; # E.g. $_=$_ or s/.gz//
14471 my $quote = shift; # should the string be quoted?
14472 # This is actually a CommandLine-object,
14473 # but it looks nice to be able to say {= $job->slot() =}
14475 # Positional replace treated as normal replace
14476 $perlexpr =~ s/^(-?\d+)? *//;
14477 if(not $Global::cache_replacement_eval
14479 not $self->{'cache'}{$perlexpr}) {
14480 # Only compute the value once
14481 # Use $_ as the variable to change
14483 if($Global::trim eq "n") {
14484 $_ = $self->{'orig'};
14487 $_ = trim_of($self->{'orig'});
14489 ::debug("replace", "eval ", $perlexpr, " ", $_, "\n");
14490 if(not $perleval{$perlexpr}) {
14491 # Make an anonymous function of the $perlexpr
14492 # And more importantly: Compile it only once
14493 if($perleval{$perlexpr} =
14494 eval('sub { no strict; no warnings; my $job = shift; '.
14498 # The eval failed. Maybe $perlexpr is invalid perl?
14499 ::error("Cannot use $perlexpr: $@");
14500 ::wait_and_exit(255);
14503 # Execute the function
14504 $perleval{$perlexpr}->($job);
14505 $self->{'cache'}{$perlexpr} = $_;
14506 if($Global::unquote_arg) {
14507 # uq() was called in perlexpr
14508 $self->{'cache'}{'unquote'}{$perlexpr} = 1;
14509 # Reset for next perlexpr
14510 $Global::unquote_arg = 0;
14513 # Return the value quoted if needed
14514 if($self->{'cache'}{'unquote'}{$perlexpr}) {
14515 return($self->{'cache'}{$perlexpr});
14517 return($quote ? Q($self->{'cache'}{$perlexpr})
14518 : $self->{'cache'}{$perlexpr});
14523 sub flush_cache($) {
14524 # Flush cache of computed values
14526 $self->{'cache'} = undef;
14531 return $self->{'orig'};
14536 $self->{'orig'} = shift;
14540 # Removes white space as specifed by --trim:
14546 # string with white space removed as needed
14547 my @strings = map { defined $_ ? $_ : "" } (@_);
14549 if($Global::trim eq "n") {
14551 } elsif($Global::trim eq "l") {
14552 for my $arg (@strings) { $arg =~ s/^\s+//; }
14553 } elsif($Global::trim eq "r") {
14554 for my $arg (@strings) { $arg =~ s/\s+$//; }
14555 } elsif($Global::trim eq "rl" or $Global::trim eq "lr") {
14556 for my $arg (@strings) { $arg =~ s/^\s+//; $arg =~ s/\s+$//; }
14558 ::error("--trim must be one of: r l rl lr.");
14559 ::wait_and_exit(255);
14561 return wantarray ? @strings : "@strings";
14565 package TimeoutQueue;
14569 my $delta_time = shift;
14571 if($delta_time =~ /(\d+(\.\d+)?)%/) {
14572 # Timeout in percent
14574 $delta_time = 1_000_000;
14576 $delta_time = ::multiply_time_units($delta_time);
14580 'delta_time' => $delta_time,
14582 'remedian_idx' => 0,
14583 'remedian_arr' => [],
14584 'remedian' => undef,
14585 }, ref($class) || $class;
14588 sub delta_time($) {
14590 return $self->{'delta_time'};
14593 sub set_delta_time($$) {
14595 $self->{'delta_time'} = shift;
14600 return $self->{'remedian'};
14603 sub set_remedian($$) {
14604 # Set median of the last 999^3 (=997002999) values using Remedian
14606 # Rousseeuw, Peter J., and Gilbert W. Bassett Jr. "The remedian: A
14607 # robust averaging method for large data sets." Journal of the
14608 # American Statistical Association 85.409 (1990): 97-104.
14611 my $i = $self->{'remedian_idx'}++;
14612 my $rref = $self->{'remedian_arr'};
14613 $rref->[0][$i%999] = $val;
14614 $rref->[1][$i/999%999] = (sort @{$rref->[0]})[$#{$rref->[0]}/2];
14615 $rref->[2][$i/999/999%999] = (sort @{$rref->[1]})[$#{$rref->[1]}/2];
14616 $self->{'remedian'} = (sort @{$rref->[2]})[$#{$rref->[2]}/2];
14619 sub update_median_runtime($) {
14620 # Update delta_time based on runtime of finished job if timeout is
14623 my $runtime = shift;
14624 if($self->{'pct'}) {
14625 $self->set_remedian($runtime);
14626 $self->{'delta_time'} = $self->{'pct'} * $self->remedian();
14627 ::debug("run", "Timeout: $self->{'delta_time'}s ");
14631 sub process_timeouts($) {
14632 # Check if there was a timeout
14634 # $self->{'queue'} is sorted by start time
14635 while (@{$self->{'queue'}}) {
14636 my $job = $self->{'queue'}[0];
14637 if($job->endtime()) {
14638 # Job already finished. No need to timeout the job
14639 # This could be because of --keep-order
14640 shift @{$self->{'queue'}};
14641 } elsif($job->is_timedout($self->{'delta_time'})) {
14642 # Need to shift off queue before kill
14643 # because kill calls usleep that calls process_timeouts
14644 shift @{$self->{'queue'}};
14645 ::warning("This job was killed because it timed out:",
14649 # Because they are sorted by start time the rest are later
14658 push @{$self->{'queue'}}, $in;
14667 $Global::use{"DBI"} ||= eval "use DBI; 1;";
14668 # +DBURL = append to this DBURL
14669 my $append = $dburl=~s/^\+//;
14670 my %options = parse_dburl(get_alias($dburl));
14671 my %driveralias = ("sqlite" => "SQLite",
14672 "sqlite3" => "SQLite",
14674 "postgres" => "Pg",
14675 "postgresql" => "Pg",
14677 "oracle" => "Oracle",
14678 "ora" => "Oracle");
14679 my $driver = $driveralias{$options{'databasedriver'}} ||
14680 $options{'databasedriver'};
14681 my $database = $options{'database'};
14682 my $host = $options{'host'} ? ";host=".$options{'host'} : "";
14683 my $port = $options{'port'} ? ";port=".$options{'port'} : "";
14684 my $dsn = "DBI:$driver:dbname=$database$host$port";
14685 my $userid = $options{'user'};
14686 my $password = $options{'password'};;
14687 if(not grep /$driver/, DBI->available_drivers) {
14688 ::error("$driver not supported. Are you missing a perl DBD::$driver module?");
14689 ::wait_and_exit(255);
14692 if($driver eq "CSV") {
14693 # CSV does not use normal dsn
14695 $dbh = DBI->connect("dbi:CSV:", "", "", { f_dir => "$database", })
14696 or die $DBI::errstr;
14698 ::error("$database is not a directory.");
14699 ::wait_and_exit(255);
14702 $dbh = DBI->connect($dsn, $userid, $password,
14703 { RaiseError => 1, AutoInactiveDestroy => 1 })
14704 or die $DBI::errstr;
14706 $dbh->{'PrintWarn'} = $Global::debug || 0;
14707 $dbh->{'PrintError'} = $Global::debug || 0;
14708 $dbh->{'RaiseError'} = 1;
14709 $dbh->{'ShowErrorStatement'} = 1;
14710 $dbh->{'HandleError'} = sub {};
14711 if(not defined $options{'table'}) {
14712 ::error("The DBURL ($dburl) must contain a table.");
14713 ::wait_and_exit(255);
14718 'driver' => $driver,
14719 'max_number_of_args' => undef,
14720 'table' => $options{'table'},
14721 'append' => $append,
14722 }, ref($class) || $class;
14725 # Prototype forwarding
14729 $alias =~ s/^(sql:)*//; # Accept aliases prepended with sql:
14730 if ($alias !~ /^:/) {
14737 ($path) = readlink($0) =~ m|^(.*)/|;
14739 ($path) = $0 =~ m|^(.*)/|;
14742 my @deprecated = ("$ENV{HOME}/.dburl.aliases",
14743 "$path/dburl.aliases", "$path/dburl.aliases.dist");
14744 for (@deprecated) {
14746 ::warning("$_ is deprecated. ".
14747 "Use .sql/aliases instead (read man sql).");
14751 check_permissions("$ENV{HOME}/.sql/aliases");
14752 check_permissions("$ENV{HOME}/.dburl.aliases");
14753 my @search = ("$ENV{HOME}/.sql/aliases",
14754 "$ENV{HOME}/.dburl.aliases", "/etc/sql/aliases",
14755 "$path/dburl.aliases", "$path/dburl.aliases.dist");
14756 for my $alias_file (@search) {
14757 # local $/ needed if -0 set
14759 if(-r $alias_file) {
14760 my $in = ::open_or_exit("<",$alias_file);
14761 push @urlalias, <$in>;
14765 my ($alias_part,$rest) = $alias=~/(:\w*)(.*)/;
14766 # If we saw this before: we have an alias loop
14767 if(grep {$_ eq $alias_part } @Private::seen_aliases) {
14768 ::error("$alias_part is a cyclic alias.");
14771 push @Private::seen_aliases, $alias_part;
14776 /^$alias_part\s+(\S+.*)/ and do { $dburl = $1; last; }
14780 return get_alias($dburl.$rest);
14782 ::error("$alias is not defined in @search");
14787 sub check_permissions($) {
14792 my $username = (getpwuid($<))[0];
14793 ::warning("$file should be owned by $username: ".
14794 "chown $username $file");
14796 my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
14797 $atime,$mtime,$ctime,$blksize,$blocks) = stat($file);
14799 my $username = (getpwuid($<))[0];
14800 ::warning("$file should be only be readable by $username: ".
14801 "chmod 600 $file");
14806 sub parse_dburl($) {
14809 # sql:mysql://[[user][:password]@][host][:port]/[database[/table][?query]]
14811 if($url=~m!^(?:sql:)? # You can prefix with 'sql:'
14812 ((?:oracle|ora|mysql|pg|postgres|postgresql)(?:s|ssl|)|
14813 (?:sqlite|sqlite2|sqlite3|csv)):// # Databasedriver ($1)
14815 ([^:@/][^:@]*|) # Username ($2)
14817 :([^@]*) # Password ($3)
14820 ([^:/]*)? # Hostname ($4)
14823 ([^/]*)? # Port ($5)
14827 ([^/?]*)? # Database ($6)
14831 ([^?]*)? # Table ($7)
14838 $options{databasedriver} = ::undef_if_empty(lc(uri_unescape($1)));
14839 $options{user} = ::undef_if_empty(uri_unescape($2));
14840 $options{password} = ::undef_if_empty(uri_unescape($3));
14841 $options{host} = ::undef_if_empty(uri_unescape($4));
14842 $options{port} = ::undef_if_empty(uri_unescape($5));
14843 $options{database} = ::undef_if_empty(uri_unescape($6));
14844 $options{table} = ::undef_if_empty(uri_unescape($7));
14845 $options{query} = ::undef_if_empty(uri_unescape($8));
14846 ::debug("sql", "dburl $url\n");
14847 ::debug("sql", "databasedriver ", $options{databasedriver},
14848 " user ", $options{user},
14849 " password ", $options{password}, " host ", $options{host},
14850 " port ", $options{port}, " database ", $options{database},
14851 " table ", $options{table}, " query ", $options{query}, "\n");
14853 ::error("$url is not a valid DBURL");
14859 sub uri_unescape($) {
14860 # Copied from http://cpansearch.perl.org/src/GAAS/URI-1.55/URI/Escape.pm
14861 # to avoid depending on URI::Escape
14862 # This section is (C) Gisle Aas.
14863 # Note from RFC1630: "Sequences which start with a percent sign
14864 # but are not followed by two hexadecimal characters are reserved
14865 # for future extension"
14867 if (@_ && wantarray) {
14868 # not executed for the common case of a single argument
14869 my @str = ($str, @_); # need to copy
14871 s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
14875 $str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str;
14882 if($self->{'driver'} eq "CSV") {
14884 if($stmt eq "BEGIN" or
14885 $stmt eq "COMMIT") {
14890 my $dbh = $self->{'dbh'};
14891 ::debug("sql","$opt::sqlmaster$opt::sqlworker run $stmt\n");
14892 # Execute with the rest of the args - if any
14896 while($lockretry < 10) {
14897 $sth = $dbh->prepare($stmt);
14900 eval { $rv = $sth->execute(@_) }) {
14903 if($@ =~ /no such table|Table .* doesn.t exist|relation ".*" does not exist/
14905 $DBI::errstr =~ /no such table|Table .* doesn.t exist|relation ".*" does not exist/) {
14907 # It is just a worker that reported back too late -
14908 # another worker had finished the job first
14909 # and the table was then dropped
14913 if($DBI::errstr =~ /locked/) {
14914 ::debug("sql", "Lock retry: $lockretry");
14916 ::usleep(rand()*300);
14917 } elsif(not $sth) {
14921 ::error($DBI::errstr);
14922 ::wait_and_exit(255);
14926 if($lockretry >= 10) {
14927 ::die_bug("retry > 10: $DBI::errstr");
14929 if($rv < 0 and $DBI::errstr){
14930 ::error($DBI::errstr);
14931 ::wait_and_exit(255);
14938 my $sth = $self->run(@_);
14940 # If $sth = 0 it means the table was dropped by another process
14942 my @row = $sth->fetchrow_array();
14944 push @retval, \@row;
14951 return $self->{'table'};
14956 return $self->{'append'};
14962 my $table = $self->table();
14963 $self->run("UPDATE $table $stmt",@_);
14968 my $commandline = shift;
14970 $self->update("SET Stdout = ?, Stderr = ? WHERE Seq = ".
14971 $commandline->seq(),
14972 join("",@{$commandline->{'output'}{1}}),
14973 join("",@{$commandline->{'output'}{2}}));
14976 sub max_number_of_args($) {
14977 # Maximal number of args for this table
14979 if(not $self->{'max_number_of_args'}) {
14980 # Read the number of args from the SQL table
14981 my $table = $self->table();
14982 my $v = $self->get("SELECT * FROM $table LIMIT 1;");
14983 my @reserved_columns = qw(Seq Host Starttime JobRuntime Send
14984 Receive Exitval _Signal Command Stdout Stderr);
14986 ::error
("$table contains no records");
14988 # Count the number of Vx columns
14989 $self->{'max_number_of_args'} = $#{$v->[0]} - $#reserved_columns;
14991 return $self->{'max_number_of_args'};
14994 sub set_max_number_of_args
($$) {
14996 $self->{'max_number_of_args'} = shift;
14999 sub create_table
($) {
15001 if($self->append()) { return; }
15002 my $max_number_of_args = shift;
15003 $self->set_max_number_of_args($max_number_of_args);
15004 my $table = $self->table();
15005 $self->run(qq(DROP TABLE IF EXISTS
$table;));
15006 # BIGINT and TEXT are not supported in these databases or are too small
15008 "Oracle" => { "BIGINT" => "NUMBER(19,0)",
15009 "TEXT" => "CLOB", },
15010 "mysql" => { "TEXT" => "BLOB", },
15011 "CSV" => { "BIGINT" => "INT",
15012 "FLOAT" => "REAL", },
15014 my $BIGINT = $vartype{$self->{'driver'}}{"BIGINT"} || "BIGINT";
15015 my $TEXT = $vartype{$self->{'driver'}}{"TEXT"} || "TEXT";
15016 my $FLOAT = $vartype{$self->{'driver'}}{"FLOAT"} || "FLOAT(44)";
15017 my $v_def = join "", map { "V$_ $TEXT," } (1..$self->max_number_of_args());
15018 $self->run(qq{CREATE TABLE
$table
15033 sub insert_records
($) {
15036 my $command_ref = shift;
15037 my $record_ref = shift;
15038 my $table = $self->table();
15039 # For SQL encode the command with \257 space as split points
15040 my $command = join("\257 ",@
$command_ref);
15041 my @v_cols = map { ", V$_" } (1..$self->max_number_of_args());
15042 # Two extra value due to $seq, Exitval, Send
15043 my $v_vals = join ",", map { "?" } (1..$self->max_number_of_args()+4);
15044 $self->run("INSERT INTO $table (Seq,Command,Exitval,Send @v_cols) ".
15045 "VALUES ($v_vals);", $seq, $command, -1000,
15046 0, @
$record_ref[1..$#$record_ref]);
15050 sub get_record
($) {
15053 my $table = $self->table();
15054 my @v_cols = map { ", V$_" } (1..$self->max_number_of_args());
15055 my $rand = "Reserved-".$$.rand();
15060 if($self->{'driver'} eq "CSV") {
15061 # Sub SELECT is not supported in CSV
15062 # So to minimize the race condition below select a job at random
15063 my $r = $self->get("SELECT Seq, Command @v_cols FROM $table ".
15064 "WHERE Exitval = -1000 LIMIT 100;");
15065 $v = [ sort { rand() > 0.5 } @
$r ];
15067 # Avoid race condition where multiple workers get the same job
15068 # by setting Stdout to a unique string
15069 # (SELECT * FROM (...) AS dummy) is needed due to sillyness in MySQL
15070 $self->update("SET Stdout = ?,Exitval = ? ".
15072 " SELECT * FROM (".
15073 " SELECT min(Seq) FROM $table WHERE Exitval = -1000".
15075 ") AND Exitval = -1000;", $rand, -1210);
15076 # If a parallel worker overwrote the unique string this will get nothing
15077 $v = $self->get("SELECT Seq, Command @v_cols FROM $table ".
15078 "WHERE Stdout = ?;", $rand);
15081 my $val_ref = $v->[0];
15082 # Mark record as taken
15083 my $seq = shift @
$val_ref;
15084 # Save the sequence number to use when running the job
15085 $SQL::next_seq
= $seq;
15086 $self->update("SET Exitval = ? WHERE Seq = ".$seq, -1220);
15087 # Command is encoded with '\257 space' as splitting char
15088 my @command = split /\257 /, shift @
$val_ref;
15089 $SQL::command_ref
= \
@command;
15091 push @retval, Arg
->new($_);
15094 # If the record was updated by another job in parallel,
15095 # then we may not be done, so see if there are more jobs pending
15097 $self->get("SELECT Seq FROM $table WHERE Exitval = ?;", -1210);
15099 } while (not $v->[0] and $more_pending->[0]);
15108 sub total_jobs
($) {
15110 my $table = $self->table();
15111 my $v = $self->get("SELECT count(*) FROM $table;");
15113 return $v->[0]->[0];
15115 ::die_bug
("SQL::total_jobs");
15121 my $table = $self->table();
15122 my $v = $self->get("SELECT max(Seq) FROM $table;");
15124 return $v->[0]->[0];
15126 ::die_bug
("SQL::max_seq");
15131 # Check if there are any jobs left in the SQL table that do not
15132 # have a "real" exitval
15134 if($opt::wait or $Global::start_sqlworker
) {
15135 my $table = $self->table();
15136 my $rv = $self->get("select Seq,Exitval from $table ".
15137 "where Exitval <= -1000 limit 1");
15138 return not $rv->[0];
15146 # This package provides a counting semaphore
15148 # If a process dies without releasing the semaphore the next process
15149 # that needs that entry will clean up dead semaphores
15151 # The semaphores are stored in $PARALLEL_HOME/semaphores/id-<name> Each
15152 # file in $PARALLEL_HOME/semaphores/id-<name>/ is the process ID of the
15153 # process holding the entry. If the process dies, the entry can be
15154 # taken by another process.
15160 $id =~ s/([^-_a-z0-9])/unpack("H*",$1)/ige; # Convert non-word chars to hex
15161 $id = "id-".$id; # To distinguish it from a process id
15162 my $parallel_locks = $Global::cache_dir
. "/semaphores";
15163 -d
$parallel_locks or ::mkdir_or_die
($parallel_locks);
15164 my $lockdir = "$parallel_locks/$id";
15165 my $lockfile = $lockdir.".lock";
15166 if(-d
$parallel_locks and -w
$parallel_locks
15167 and -r
$parallel_locks and -x
$parallel_locks) {
15170 ::error
("Semaphoredir must be writable: '$parallel_locks'");
15171 ::wait_and_exit
(255);
15174 if($count < 1) { ::die_bug
("semaphore-count: $count"); }
15176 'lockfile' => $lockfile,
15177 'lockfh' => Symbol
::gensym
(),
15178 'lockdir' => $lockdir,
15180 'idfile' => $lockdir."/".$id,
15182 'pidfile' => $lockdir."/".$$.'@'.::hostname
(),
15183 'count' => $count + 1 # nlinks returns a link for the 'id-' as well
15184 }, ref($class) || $class;
15187 sub remove_dead_locks
($) {
15189 my $lockdir = $self->{'lockdir'};
15191 for my $d (glob "$lockdir/*") {
15192 $d =~ m
:$lockdir/([0-9]+)\@
([-\
._a
-z0
-9]+)$:o
or next;
15193 my ($pid, $host) = ($1, $2);
15194 if($host eq ::hostname
()) {
15196 ::debug
("sem", "Alive: $pid $d\n");
15198 ::debug
("sem", "Dead: $d\n");
15207 my $sleep = 1; # 1 ms
15208 my $start_time = time;
15210 # Can we get a lock?
15211 $self->atomic_link_if_count_less_than() and last;
15212 $self->remove_dead_locks();
15213 # Retry slower and slower up to 1 second
15214 $sleep = ($sleep < 1000) ?
($sleep * 1.1) : ($sleep);
15215 # Random to avoid every sleeping job waking up at the same time
15216 ::usleep
(rand()*$sleep);
15217 if($opt::semaphoretimeout
) {
15218 if($opt::semaphoretimeout
> 0
15220 time - $start_time > $opt::semaphoretimeout
) {
15221 # Timeout: Take the semaphore anyway
15222 ::warning
("Semaphore timed out. Stealing the semaphore.");
15223 if(not -e
$self->{'idfile'}) {
15224 open (my $fh, ">", $self->{'idfile'}) or
15225 ::die_bug
("timeout_write_idfile: $self->{'idfile'}");
15228 link $self->{'idfile'}, $self->{'pidfile'};
15231 if($opt::semaphoretimeout
< 0
15233 time - $start_time > -$opt::semaphoretimeout
) {
15235 ::warning
("Semaphore timed out. Exiting.");
15241 ::debug
("sem", "acquired $self->{'pid'}\n");
15246 ::rm
($self->{'pidfile'});
15247 if($self->nlinks() == 1) {
15248 # This is the last link, so atomic cleanup
15250 if($self->nlinks() == 1) {
15251 ::rm
($self->{'idfile'});
15252 rmdir $self->{'lockdir'};
15256 ::debug
("run", "released $self->{'pid'}\n");
15259 sub pid_change
($) {
15260 # This should do what release()+acquire() would do without having
15261 # to re-acquire the semaphore
15264 my $old_pidfile = $self->{'pidfile'};
15265 $self->{'pid'} = $$;
15266 $self->{'pidfile'} = $self->{'lockdir'}."/".$$.'@'.::hostname
();
15267 my $retval = link $self->{'idfile'}, $self->{'pidfile'};
15268 ::debug
("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n");
15269 ::rm
($old_pidfile);
15272 sub atomic_link_if_count_less_than
($) {
15273 # Link $file1 to $file2 if nlinks to $file1 < $count
15277 my $nlinks = $self->nlinks();
15278 ::debug
("sem","$nlinks<$self->{'count'} ");
15279 if($nlinks < $self->{'count'}) {
15280 -d
$self->{'lockdir'} or ::mkdir_or_die
($self->{'lockdir'});
15281 if(not -e
$self->{'idfile'}) {
15282 open (my $fh, ">", $self->{'idfile'}) or
15283 ::die_bug
("write_idfile: $self->{'idfile'}");
15286 $retval = link $self->{'idfile'}, $self->{'pidfile'};
15287 ::debug
("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n");
15290 ::debug
("sem", "atomic $retval");
15296 if(-e
$self->{'idfile'}) {
15297 return (stat(_
))[3];
15305 my $sleep = 100; # 100 ms
15306 my $total_sleep = 0;
15307 $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
15309 while(not $locked) {
15310 if(tell($self->{'lockfh'}) == -1) {
15312 open($self->{'lockfh'}, ">", $self->{'lockfile'})
15313 or ::debug("run
", "Cannot
open $self->{'lockfile'}");
15315 if($self->{'lockfh'}) {
15317 chmod 0666, $self->{'lockfile'}; # assuming you want it a+rw
15318 if(flock($self->{'lockfh'}, LOCK_EX()|LOCK_NB())) {
15319 # The file is locked: No need to retry
15323 if ($! =~ m/Function not implemented/) {
15324 ::warning("flock: $!",
15325 "Will
wait for a random
while.");
15326 ::usleep(rand(5000));
15327 # File cannot be locked: No need to retry
15333 # Locking failed in first round
15334 # Sleep and try again
15335 $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep);
15336 # Random to avoid every sleeping job waking up at the same time
15337 ::usleep(rand()*$sleep);
15338 $total_sleep += $sleep;
15339 if($opt::semaphoretimeout) {
15340 if($opt::semaphoretimeout > 0
15342 $total_sleep/1000 > $opt::semaphoretimeout) {
15343 # Timeout: Take the semaphore anyway
15344 ::warning("Semaphore timed out
. Taking the semaphore
.");
15348 if($opt::semaphoretimeout < 0
15350 $total_sleep/1000 > -$opt::semaphoretimeout) {
15352 ::warning("Semaphore timed out
. Exiting
.");
15357 if($total_sleep/1000 > 30) {
15358 ::warning("Semaphore stuck
for 30 seconds
. ".
15359 "Consider using
--semaphoretimeout
.");
15363 ::debug("run
", "locked
$self->{'lockfile'}");
15368 ::rm($self->{'lockfile'});
15369 close $self->{'lockfh'};
15370 ::debug("run
", "unlocked
\n");
15373 # Keep perl -w happy
15375 $opt::x = $Semaphore::timeout = $Semaphore::wait =
15376 $Job::file_descriptor_warning_printed = $Global::envdef = @Arg::arg =
15377 $Global::max_slot_number = $opt::session;
15383 unpack_combined_executable();
15384 save_stdin_stdout_stderr();
15385 save_original_signal_handler();
15387 ::debug("init
", "Open file descriptors
: ", join(" ",keys %Global::fh), "\n");
15388 my $number_of_args;
15389 if($Global::max_number_of_args) {
15390 $number_of_args = $Global::max_number_of_args;
15391 } elsif ($opt::X or $opt::m or $opt::xargs) {
15392 $number_of_args = undef;
15394 $number_of_args = 1;
15397 my @command = @ARGV;
15398 my @input_source_fh;
15399 if($opt::pipepart) {
15401 @input_source_fh = map { open_or_exit("<",$_) } @opt::a;
15402 # Remove the first: It will be the file piped.
15403 shift @input_source_fh;
15404 if(not @input_source_fh and not $opt::pipe) {
15405 @input_source_fh = (*STDIN);
15408 # -a is used for data - not for command line args
15409 @input_source_fh = map { open_or_exit("<",$_) } "/dev/null
";
15412 @input_source_fh = map { open_or_exit("<",$_) } @opt::a;
15413 if(not @input_source_fh and not $opt::pipe) {
15414 @input_source_fh = (*STDIN);
15418 if($opt::skip_first_line) {
15419 # Skip the first line for the first file handle
15420 my $fh = $input_source_fh[0];
15424 set_input_source_header(\@command,\@input_source_fh);
15425 if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) {
15426 # Parallel check all hosts are up. Remove hosts that are down
15429 if($opt::sqlmaster and $opt::sqlworker) {
15430 # Start a real --sqlworker in the background later
15431 $Global::start_sqlworker = 1;
15432 $opt::sqlworker = undef;
15435 $Global::start_time = ::now();
15436 if($opt::nonall or $opt::onall) {
15437 onall(\@input_source_fh,@command);
15438 wait_and_exit(min(undef_as_zero($Global::exitstatus),254));
15441 $Global::JobQueue = JobQueue->new(
15442 \@command, \@input_source_fh, $Global::ContextReplace,
15443 $number_of_args, \@Global::transfer_files, \@Global::ret_files,
15444 \@Global::template_names, \@Global::template_contents
15447 if($opt::sqlmaster) {
15448 # Create SQL table to hold joblog + output
15449 # Figure out how many arguments are in a job
15450 # (It is affected by --colsep, -N, $number_source_fh)
15451 my $record_queue = $Global::JobQueue->{'commandlinequeue'}{'arg_queue'};
15452 my $record = $record_queue->get();
15453 my $no_of_values = $number_of_args * (1+$#{$record});
15454 $record_queue->unget($record);
15455 $Global::sql->create_table($no_of_values);
15456 if($opt::sqlworker) {
15457 # Start a real --sqlworker in the background later
15458 $Global::start_sqlworker = 1;
15459 $opt::sqlworker = undef;
15463 if($opt::pipepart) {
15465 } elsif($opt::pipe) {
15468 } elsif($opt::shard or $opt::bin) {
15469 pipe_shard_setup();
15470 } elsif($opt::groupby) {
15471 pipe_group_by_setup();
15475 if($opt::eta or $opt::bar or $opt::shuf or $Global::halt_pct) {
15476 # Count the number of jobs or shuffle all jobs
15477 # before starting any.
15478 # Must be done after ungetting any --pipepart jobs.
15479 $Global::JobQueue->total_jobs();
15481 # Compute $Global::max_jobs_running
15482 # Must be done after ungetting any --pipepart jobs.
15483 max_jobs_running();
15486 if($Global::semaphore) {
15487 $sem = acquire_semaphore();
15489 $SIG{TERM} = $Global::original_sig{TERM};
15490 $SIG{HUP} = \&start_no_new_jobs;
15492 if($opt::progress) {
15493 ::status_no_nl(init_progress());
15495 if($opt::tee or $opt::shard or $opt::bin) {
15496 # All jobs must be running in parallel for --tee/--shard/--bin
15497 while(start_more_jobs()) {}
15498 $Global::start_no_new_jobs = 1;
15499 if(not $Global::JobQueue->empty()) {
15501 ::error("--tee requires
--jobs to be higher
. Try
--jobs
0.");
15502 } elsif($opt::bin) {
15503 ::error("--bin requires
--jobs to be higher than the number of
",
15504 "arguments
. Increase
--jobs
.");
15505 } elsif($opt::shard) {
15506 ::error("--shard requires
--jobs to be higher than the number of
",
15507 "arguments
. Increase
--jobs
.");
15509 ::die_bug("--bin
/--shard/--tee should
not get here
");
15511 ::wait_and_exit(255);
15513 } elsif($opt::pipe and not $opt::pipepart and not $opt::semaphore) {
15514 # Fill all jobslots
15515 while(start_more_jobs()) {}
15518 # Reap the finished jobs and start more
15519 while(reapers() + start_more_jobs()) {}
15521 ::debug("init
", "Start draining
\n");
15522 drain_job_queue(@command);
15523 ::debug("init
", "Done draining
\n");
15525 ::debug("init
", "Done reaping
\n");
15526 if($Global::semaphore) { $sem->release(); }
15528 ::debug("init
", "Halt
\n");