3 # Copyright (C) 2007-2020 Ole Tange, http://ole.tange.dk and Free
4 # Software Foundation, Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, see <http://www.gnu.org/licenses/>
18 # or write to the Free Software Foundation, Inc., 51 Franklin St,
19 # Fifth Floor, Boston, MA 02110-1301 USA
21 # open3 used in Job::start
24 # gensym used in Job::start
25 use Symbol
qw(gensym);
26 # tempfile used in Job::start
27 use File
::Temp
qw(tempfile tempdir);
28 # mkpath used in openresultsfile
30 # GetOptions used in get_options_from_array
32 # Used to ensure code quality
36 sub set_input_source_header
($$) {
37 my ($command_ref,$input_source_fh_ref) = @_;
38 if($opt::header
and not $opt::pipe) {
39 # split with colsep or \t
40 # $header force $colsep = \t if undef?
41 my $delimiter = defined $opt::colsep ?
$opt::colsep
: "\t";
43 my $left = "\Q$Global::parensleft\E";
44 my $l = $Global::parensleft
;
46 my $right = "\Q$Global::parensright\E";
47 my $r = $Global::parensright
;
49 for my $fh (@
$input_source_fh_ref) {
53 ::debug
("init", "Delimiter: '$delimiter'");
54 for my $s (split /$delimiter/o, $line) {
55 ::debug
("init", "Colname: '$s'");
56 # Replace {colname} with {2}
57 for(@
$command_ref, @Global::ret_files
,
58 @Global::transfer_files
, $opt::tagstring
,
59 $opt::workdir
, $opt::results
, $opt::retries
) {
62 s
:\
{$s(|/|//|\.|/\
.)\
}:\
{$id$1\
}:g
;
63 # {=header1 ... =} => {=1 ... =}
64 s
:$left $s (.*?
) $right:$l$id$1$r:gx
;
66 $Global::input_source_header
{$id} = $s;
72 for my $fh (@
$input_source_fh_ref) {
73 $Global::input_source_header
{$id} = $id;
79 sub max_jobs_running
() {
80 # Compute $Global::max_jobs_running as the max number of jobs
81 # running on each sshlogin.
83 # $Global::max_jobs_running
84 if(not $Global::max_jobs_running
) {
85 for my $sshlogin (values %Global::host
) {
86 $sshlogin->max_jobs_running();
89 if(not $Global::max_jobs_running
) {
90 ::error
("Cannot run any jobs.");
93 return $Global::max_jobs_running
;
98 # wait for children to complete
100 if($opt::halt
and $Global::halt_when
ne "never") {
101 if(not defined $Global::halt_exitstatus
) {
102 if($Global::halt_pct
) {
103 $Global::halt_exitstatus
=
104 ::ceil
($Global::total_failed
/
105 ($Global::total_started
|| 1) * 100);
106 } elsif($Global::halt_count
) {
107 $Global::halt_exitstatus
=
108 ::min
(undef_as_zero
($Global::total_failed
),101);
111 wait_and_exit
($Global::halt_exitstatus
);
113 wait_and_exit
(min
(undef_as_zero
($Global::exitstatus
),101));
118 sub __PIPE_MODE__
() {}
121 sub pipepart_setup
() {
122 # Compute the blocksize
123 # Generate the commands to extract the blocks
124 # Push the commands on queue
126 # @Global::cat_prepends
129 # Prepend each command with
131 my $cat_string = "< ".Q
($opt::a
[0]);
132 for(1..$Global::JobQueue
->total_jobs()) {
133 push @Global::cat_appends
, $cat_string;
134 push @Global::cat_prepends
, "";
137 if(not $opt::blocksize
) {
138 # --blocksize with 10 jobs per jobslot
139 $opt::blocksize
= -10;
141 if($opt::roundrobin
) {
142 # --blocksize with 1 job per jobslot
143 $opt::blocksize
= -1;
145 if($opt::blocksize
< 0) {
152 $size += size_of_block_dev
($_);
154 ::error
("$_ is neither a file nor a block device");
157 ::error
("File not found: $_");
161 # Run in total $job_slots*(- $blocksize) jobs
162 # Set --blocksize = size / no of proc / (- $blocksize)
163 $Global::dummy_jobs
= 1;
164 $Global::blocksize
= 1 +
165 int($size / max_jobs_running() /
166 -multiply_binary_prefix
($opt::blocksize
));
168 @Global::cat_prepends
= map { pipe_part_files
($_) } @opt::a
;
169 # Unget the empty arg as many times as there are parts
170 $Global::JobQueue
->{'commandlinequeue'}{'arg_queue'}->unget(
171 map { [Arg
->new("\0noarg")] } @Global::cat_prepends
176 sub pipe_tee_setup
() {
177 # Create temporary fifos
178 # Run 'tee fifo1 fifo2 fifo3 ... fifoN' in the background
179 # This will spread the input to fifos
180 # Generate commands that reads from fifo1..N:
181 # cat fifo | user_command
183 # @Global::cat_prepends
185 for(1..$Global::JobQueue
->total_jobs()) {
186 push @fifos, tmpfifo
();
188 # cat foo | tee fifo1 fifo2 fifo3 fifo4 fifo5 > /dev/null
190 # Test if tee supports --output-error=warn-nopipe
191 `echo | tee --output-error=warn-nopipe /dev/null >/dev/null 2>/dev/null`;
192 my $opt = $? ?
"" : "--output-error=warn-nopipe";
193 ::debug
("init","tee $opt");
194 # Let tee inherit our stdin
195 # and redirect stdout to null
196 open STDOUT
, ">","/dev/null";
198 exec "tee", $opt, @fifos;
204 # (rm fifo1; grep 1) < fifo1
205 # (rm fifo2; grep 2) < fifo2
206 # (rm fifo3; grep 3) < fifo3
207 # Remove the tmpfifo as soon as it is open
208 @Global::cat_prepends
= map { "(rm $_;" } @fifos;
209 @Global::cat_appends
= map { ") < $_" } @fifos;
213 sub parcat_script
() {
214 # TODO if script fails: Use parallel -j0 --plain --lb cat ::: fifos
216 use POSIX qw(:errno_h);
221 use Fcntl
qw(:DEFAULT :flock);
224 my $q = Thread
::Queue
->new();
225 my $okq = Thread
::Queue
->new();
231 print " parcat file(s)\n";
232 print " cat argfile | parcat\n";
234 # Read arguments from stdin
235 chomp(@ARGV = <STDIN
>);
238 my $files_to_open = 0;
239 # Default: fd = stdout
242 # --rm = remove file when opened
243 /^--rm$/ and do { $opt::rm
= 1; next; };
244 # -1 = output to fd 1, -2 = output to fd 2
245 /^-(\d+)$/ and do { $fd = $1; next; };
246 push @producers, threads
->create("producer", $_, $fd);
251 # Open a file/fifo, set non blocking, enqueue fileno of the file handle
253 my $output_fd = shift;
254 open(my $fh, "<", $file) || do {
255 print STDERR
"parcat: Cannot open $file\n";
258 # Remove file when it has been opened
262 set_fh_non_blocking
($fh);
264 # Pass the fileno to parent
265 $q->enqueue(fileno($fh),$output_fd);
266 # Get an OK that the $fh is opened and we can release the $fh
268 my $ok = $okq->dequeue();
269 if($ok == fileno($fh)) { last; }
270 # Not ours - very unlikely to happen
276 my $s = IO
::Select
->new();
282 open(my $infh, "<&=", $infd) || die;
283 open(my $outfh, ">&=", $outfd) || die;
285 # Tell the producer now opened here and can be released
286 $okq->enqueue($infd);
287 # Initialize the buffer
288 @
{$buffer{$infh}{$outfd}} = ();
289 $Global::fh
{$outfd} = $outfh;
293 # Non-blocking dequeue
296 ($infd,$outfd) = $q->dequeue_nb(2);
297 if(defined($outfd)) {
298 add_file
($infd,$outfd);
300 } while(defined($outfd));
303 sub add_files_block
{
305 my ($infd,$outfd) = $q->dequeue(2);
306 add_file
($infd,$outfd);
311 my (@ready,$infh,$rv,$buf);
313 # Wait until at least one file is opened
315 while($q->pending or keys %buffer) {
317 while(keys %buffer) {
318 @ready = $s->can_read(0.01);
323 # There is only one key, namely the output file descriptor
324 for my $outfd (keys %{$buffer{$infh}}) {
325 $rv = sysread($infh, $buf, 65536);
328 # Would block: Nothing read
331 # Nothing read, but would not block:
334 for(@
{$buffer{$infh}{$outfd}}) {
335 syswrite($Global::fh
{$outfd},$_);
337 delete $buffer{$infh};
338 # Closing the $infh causes it to block
345 # Find \n or \r for full line
346 my $i = (rindex($buf,"\n")+1);
349 for(@
{$buffer{$infh}{$outfd}}, substr($buf,0,$i)) {
350 syswrite($Global::fh
{$outfd},$_);
352 # @buffer = remaining half line
353 $buffer{$infh}{$outfd} = [substr($buf,$i,$rv-$i)];
355 # Something read, but not a full line
356 push @
{$buffer{$infh}{$outfd}}, $buf;
363 } while($opened < $files_to_open);
369 sub set_fh_non_blocking
{
370 # Set filehandle as non-blocking
372 # $fh = filehandle to be blocking
377 fcntl($fh, &F_GETFL
, $flags) || die $!; # Get the current flags on the filehandle
378 $flags |= &O_NONBLOCK
; # Add non-blocking to the flags
379 fcntl($fh, &F_SETFL
, $flags) || die $!; # Set the flags on the filehandle
382 return ::spacefree(3, $script);
385 sub sharder_script() {
390 # Which columns to shard on (count from 1)
392 # Which columns to shard on (count from 0)
395 my $perlexpr = shift;
397 # Open fifos for writing, fh{0..$bins}
401 open $fh{$t++}, ">", $_;
402 # open blocks until it is opened by reader
403 # so unlink only happens when it is ready
407 my $subref = eval("sub { no strict; no warnings; $perlexpr }");
409 # Split into $col columns (no need to split into more)
410 @F = split $sep, $_, $col+1;
412 local $_ = $F[$col0];
414 $fh = $fh{ hex(B::hash($_))%$bins };
420 # Split into $col columns (no need to split into more)
421 @F = split $sep, $_, $col+1;
422 $fh = $fh{ hex(B::hash($F[$col0]))%$bins };
426 # Close all open fifos
429 return ::spacefree(1, $script);
432 sub binner_script() {
437 # Which columns to shard on (count from 1)
439 # Which columns to shard on (count from 0)
442 my $perlexpr = shift;
444 # Open fifos for writing, fh{0..$bins}
447 # Let the last output fifo be the 0'th
448 open $fh{$t++}, ">", pop @ARGV;
450 open $fh{$t++}, ">", $_;
451 # open blocks until it is opened by reader
452 # so unlink only happens when it is ready
456 my $subref = eval("sub { no strict; no warnings; $perlexpr }");
458 # Split into $col columns (no need to split into more)
459 @F = split $sep, $_, $col+1;
461 local $_ = $F[$col0];
463 $fh = $fh{ $_%$bins };
469 # Split into $col columns (no need to split into more)
470 @F = split $sep, $_, $col+1;
471 $fh = $fh{ $F[$col0]%$bins };
475 # Close all open fifos
478 return ::spacefree
(1, $script);
481 sub pipe_shard_setup
() {
482 # Create temporary fifos
483 # Run 'shard.pl sep col fifo1 fifo2 fifo3 ... fifoN' in the background
484 # This will spread the input to fifos
485 # Generate commands that reads from fifo1..N:
486 # cat fifo | user_command
488 # @Global::cat_prepends
491 # TODO $opt::jobs should be evaluated (100%)
492 # TODO $opt::jobs should be number of total_jobs if there are argugemts
493 my $njobs = $opt::jobs
;
494 for my $m (0..$njobs-1) {
495 for my $n (0..$njobs-1) {
496 # sharding to A B C D
497 # parcatting all As together
498 $parcatfifos[$n][$m] = $shardfifos[$m][$n] = tmpfifo
();
501 my $shardbin = ($opt::shard
|| $opt::bin
);
504 $script = binner_script
();
506 $script = sharder_script
();
509 # cat foo | sharder sep col fifo1 fifo2 fifo3 ... fifoN
511 if($shardbin =~ /^[a-z_][a-z_0-9]*(\s|$)/i) {
512 # Group by column name
513 # (Yes, this will also wrongly match a perlexpr like: chop)
514 my($read,$char,@line);
515 # A full line, but nothing more (the rest must be read by the child)
516 # $Global::header used to prepend block to each job
518 $read = sysread(STDIN
,$char,1);
520 } while($read and $char ne "\n");
521 $Global::header
= join "", @line;
523 my ($col, $perlexpr, $subref) =
524 column_perlexpr
($shardbin, $Global::header
, $opt::colsep
);
526 # Let the sharder inherit our stdin
527 # and redirect stdout to null
528 open STDOUT
, ">","/dev/null";
529 # The PERL_HASH_SEED must be the same for all sharders
530 # so B::hash will return the same value for any given input
531 $ENV{'PERL_HASH_SEED'} = $$;
532 exec qw(parallel --block 100k -q --pipe -j), $njobs,
533 qw(--roundrobin -u perl -e), $script, ($opt::colsep
|| ","),
534 $col, $perlexpr, '{}', (map { (':::+', @
{$_}) } @parcatfifos);
537 # (rm fifo1; grep 1) < fifo1
538 # (rm fifo2; grep 2) < fifo2
539 # (rm fifo3; grep 3) < fifo3
540 my $parcat = Q
(parcat_script
());
542 ::error
("'parcat' must be in path.");
543 ::wait_and_exit
(255);
545 @Global::cat_prepends
= map { "perl -e $parcat @$_ | " } @parcatfifos;
548 sub pipe_part_files
(@
) {
550 # find header and split positions
551 # make commands that 'cat's the partial file
553 # $file = the file to read
555 # @commands that will cat_partial each part
558 if(not -f
$file and not -b
$file) {
559 ::error
("$file is not a seekable file.");
560 ::wait_and_exit
(255);
562 my $header = find_header
(\
$buf,open_or_exit
($file));
564 my @pos = find_split_positions
($file,$Global::blocksize
,$header);
566 my @cat_prepends = ();
567 for(my $i=0; $i<$#pos; $i++) {
569 cat_partial
($file, 0, length($header), $pos[$i], $pos[$i+1]));
571 return @cat_prepends;
574 sub find_header
($$) {
575 # Compute the header based on $opt::header
577 # $buf_ref = reference to read-in buffer
578 # $fh = filehandle to read from
585 my ($buf_ref, $fh) = @_;
587 # $Global::header may be set in group_by_loop()
588 if($Global::header
) { return $Global::header
}
590 if($opt::header
eq ":") { $opt::header
= "(.*\n)"; }
591 # Number = number of lines
592 $opt::header
=~ s/^(\d+)$/"(.*\n)"x$1/e;
593 while(sysread($fh,$$buf_ref,$Global::blocksize
,length $$buf_ref)) {
594 if($$buf_ref =~ s/^($opt::header)//) {
603 sub find_split_positions
($$$) {
604 # Find positions in bigfile where recend is followed by recstart
606 # $file = the file to read
607 # $block = (minimal) --block-size of each chunk
608 # $header = header to be skipped
613 # @positions of block start/end
614 my($file, $block, $header) = @_;
615 my $headerlen = length $header;
618 # $file is a blockdevice
619 $size = size_of_block_dev
($file);
623 return split_positions_for_group_by
($file,$size,$block,$header);
625 # The optimal dd blocksize for mint, redhat, solaris, openbsd = 2^17..2^20
626 # The optimal dd blocksize for freebsd = 2^15..2^17
627 my $dd_block_size = 131072; # 2^17
629 my ($recstart,$recend) = recstartrecend
();
630 my $recendrecstart = $recend.$recstart;
631 my $fh = ::open_or_exit
($file);
632 push(@pos,$headerlen);
633 for(my $pos = $block+$headerlen; $pos < $size; $pos += $block) {
635 if($recendrecstart eq "") {
636 # records ends anywhere
639 # Seek the the block start
640 if(not sysseek($fh, $pos, 0)) {
641 ::error
("Cannot seek to $pos in $file");
644 while(sysread($fh,$buf,$dd_block_size,length $buf)) {
646 # If match /$recend$recstart/ => Record position
647 if($buf =~ m
:^(.*$recend)$recstart:os
) {
648 # Start looking for next record _after_ this match
654 # If match $recend$recstart => Record position
655 # TODO optimize to only look at the appended
656 # $dd_block_size + len $recendrecstart
657 # TODO increase $dd_block_size to optimize for longer records
658 my $i = index64
(\
$buf,$recendrecstart);
660 # Start looking for next record _after_ this match
661 $pos += $i + length($recend);
669 if($pos[$#pos] != $size) {
670 # Last splitpoint was not at end of the file: add $size as the last
677 sub split_positions_for_group_by
($$$$) {
682 seek($fh, $pos-1, 0) || die;
687 my $linepos = tell($fh);
692 if(defined $group_by::col
) {
693 $opt::colsep
||= "\t";
694 @F = split /$opt::colsep/, $_;
695 $_ = $F[$group_by::col
];
697 eval $group_by::perlexpr
;
699 return ($_,$linepos);
702 sub binary_search_end
($$$) {
703 my ($s,$spos,$epos) = @_;
704 # value_at($spos) == $s
705 # value_at($epos) != $s
706 my $posdif = $epos - $spos;
709 ($v,$vpos) = value_at
($spos+$posdif);
712 $posdif = $epos - $spos;
716 $posdif = int($posdif/2);
721 sub binary_search_start
($$$) {
722 my ($s,$spos,$epos) = @_;
723 # value_at($spos) != $s
724 # value_at($epos) == $s
725 my $posdif = $epos - $spos;
728 ($v,$vpos) = value_at
($spos+$posdif);
733 $posdif = $epos - $spos;
735 $posdif = int($posdif/2);
740 my ($file,$size,$block,$header) = @_;
741 my ($a,$b,$c,$apos,$bpos,$cpos);
743 $fh = open_or_exit
($file);
744 # Set $Global::group_by_column $Global::group_by_perlexpr
745 group_by_loop
($fh,$opt::recsep
);
746 # $xpos = linestart, $x = value at $xpos, $apos < $bpos < $cpos
747 $apos = length $header;
748 for(($a,$apos) = value_at
($apos); $apos < $size;) {
750 $bpos = $apos + $block;
751 ($b,$bpos) = value_at
($bpos);
753 push @pos, $size; last;
755 $cpos = $bpos + $block;
756 ($c,$cpos) = value_at
($cpos);
759 # Move bpos, cpos a block forward until $a == $b != $c
762 ($c,$cpos) = value_at
($cpos);
769 # Binary search for $b ending between ($bpos,$cpos)
770 ($b,$bpos) = binary_search_end
($b,$bpos,$cpos);
774 # Binary search for $b starting between ($apos,$bpos)
775 ($b,$bpos) = binary_search_start
($b,$apos,$bpos);
778 # Binary search for $b ending between ($bpos,$cpos)
779 ($b,$bpos) = binary_search_end
($b,$bpos,$cpos);
782 ($a,$apos) = ($b,$bpos);
784 if($pos[$#pos] != $size) {
785 # Last splitpoint was not at end of the file: add it
791 sub cat_partial
($@
) {
792 # Efficient command to copy from byte X to byte Y
794 # $file = the file to read
795 # ($start, $end, [$start2, $end2, ...]) = start byte, end byte
797 # Efficient command to copy $start..$end, $start2..$end2, ... to stdout
798 my($file, @start_end) = @_;
800 # Convert (start,end) to (start,len)
801 my @start_len = map {
802 if(++$i % 2) { $start = $_; } else { $_-$start }
804 # This can read 7 GB/s using a single core
805 my $script = spacefree
809 sysseek(STDIN,shift,0) || die;
812 sysread(STDIN,$buf, $left > 131072 ? 131072 : $left)){
814 syswrite(STDOUT,$buf);
818 return "<". Q($file) .
819 " perl -e '$script' @start_len |";
822 sub column_perlexpr($$$) {
823 # Compute the column number (if any), perlexpression from combined
824 # string (such as --shard key, --groupby key, {=n perlexpr=}
826 # $column_perlexpr = string with column and perl expression
827 # $header = header from input file (if column is column name)
828 # $colsep = column separator regexp
830 # $col = column number
831 # $perlexpr = perl expression
832 # $subref = compiled perl expression as sub reference
833 my ($column_perlexpr, $header, $colsep) = @_;
834 my ($col, $perlexpr, $subref);
835 if($column_perlexpr =~ /^[-a-z0-9_]+(\s|$)/i) {
836 # Column name/number (possibly prefix)
837 if($column_perlexpr =~ s/^(-?\d+)(\s|$)//) {
838 # Column number (possibly prefix)
840 } elsif($column_perlexpr =~ s/^([a-z0-9_]+)(\s+|$)//i) {
841 # Column name (possibly prefix)
843 # Split on --copsep pattern
844 my @headers = split /$colsep/, $header;
846 @headers{@headers} = (1..($#headers+1));
847 $col = $headers{$colname};
848 if(not defined $col) {
849 ::error("Column '$colname' $colsep not found in header",keys %headers);
850 ::wait_and_exit(255);
854 # What is left of $column_perlexpr is $perlexpr (possibly empty)
855 $perlexpr = $column_perlexpr;
856 $subref = eval("sub { no strict; no warnings; $perlexpr }");
857 return($col, $perlexpr, $subref);
860 sub group_by_loop($$) {
861 # Generate perl code for group-by loop
862 # Insert a $recsep when the column value changes
863 # The column value can be computed with $perexpr
864 my($fh,$recsep) = @_;
865 my $groupby = $opt::groupby;
866 if($groupby =~ /^[a-z_][a-z_0-9]*(\s|$)/i) {
867 # Group by column name
868 # (Yes, this will also wrongly match a perlexpr like: chop)
869 my($read,$char,@line);
870 # A full line, but nothing more (the rest must be read by the child)
871 # $Global::header used to prepend block to each job
873 $read = sysread($fh,$char,1);
875 } while($read and $char ne "\n");
876 $Global::header = join "", @line;
878 $opt::colsep ||= "\t";
879 ($group_by::col, $group_by::perlexpr, $group_by::subref) =
880 column_perlexpr($groupby, $Global::header, $opt::colsep);
881 # Numbered 0..n-1 due to being used by $F[n]
882 if($group_by::col) { $group_by::col--; }
884 my $loop = ::spacefree(0,q{
885 BEGIN{ $last = "RECSEP"; }
895 if(defined $group_by::col) {
896 $loop =~ s/COLVALUE/\$F[$group_by::col]/g;
898 $loop =~ s/COLVALUE/\$_/g;
900 $loop =~ s/PERLEXPR/$group_by::perlexpr/g;
901 $loop =~ s/RECSEP/$recsep/g;
905 sub group_by_stdin_filter() {
906 # Record separator with 119 bit random value
909 join "", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..20);
910 $opt::remove_rec_sep = 1;
912 push @filter, "perl";
913 if($opt::groupby =~ /^[a-z0-9_]+(\s|$)/i) {
914 # This is column number/name
915 # Use -a (auto-split)
917 $opt::colsep ||= "\t";
918 my $sep = $opt::colsep;
921 push @filter, "-F$sep";
924 push @filter, group_by_loop(*STDIN,$opt::recstart);
925 ::debug("init", "@filter\n");
926 open(STDIN, '-|', @filter) || die ("Cannot start @filter");
931 # Spawn a job and print the record to it.
937 # $Global::max_number_of_args
939 # $Global::start_no_new_jobs
945 my ($recstart,$recend) = recstartrecend();
946 my $recendrecstart = $recend.$recstart;
947 my $chunk_number = 1;
948 my $one_time_through;
949 my $two_gb = 2**31-1;
950 my $blocksize = $Global::blocksize;
952 my $timeout = $Global::blocktimeout;
954 my $header = find_header(\$buf,$in);
955 my $anything_written;
959 # Read a --blocksize from STDIN
960 # possibly interrupted by --blocktimeout
961 # Add up to the next full block
962 my $readsize = $blocksize - (length $buf) % $blocksize;
965 local $SIG{ALRM} = sub { die "alarm\n" }; # NB: \n required
966 # --blocktimeout (or 0 if not set)
970 $nread = sysread $in, $buf, $readsize, length $buf;
972 } while($readsize and $nread);
974 # Less efficient reading, but 32-bit sysread compatible
976 $nread = sysread($in,substr($buf,length $buf,0),$readsize,0);
978 } while($readsize and $nread);
983 die unless $@ eq "alarm\n"; # propagate unexpected errors
988 $eof = not ($nread or $alarm);
991 sub pass_n_line_records() {
992 # Pass records of N lines
993 my $n_lines = $buf =~ tr/\n/\n/;
994 my $last_newline_pos = rindex64(\$buf,"\n");
995 # Go backwards until there are full n-line records
996 while($n_lines % $Global::max_lines) {
998 $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1);
1000 # Chop at $last_newline_pos as that is where n-line record ends
1001 $anything_written +=
1002 write_record_to_pipe($chunk_number++,\$header,\$buf,
1003 $recstart,$recend,$last_newline_pos+1);
1004 shorten(\$buf,$last_newline_pos+1);
1007 sub pass_n_regexps() {
1008 # Pass records of N regexps
1009 # -N => (start..*?end){n}
1010 # -L -N => (start..*?end){n*l}
1011 my $read_n_lines = -1+
1012 $Global::max_number_of_args * ($Global::max_lines || 1);
1013 # (?!negative lookahead) is needed to avoid backtracking
1014 # See: https://unix.stackexchange.com/questions/439356/
1017 # Either recstart or at least one char from start
1019 # followed something
1020 (?:(?!$recend$recstart).)*?
1023 # Then n-1 times recstart.*recend
1024 (?:$recstart(?:(?!$recend$recstart).)*?$recend){$read_n_lines}
1026 # Followed by recstart
1027 (?=$recstart)/osx) {
1028 $anything_written +=
1029 write_record_to_pipe($chunk_number++,\$header,\$buf,
1030 $recstart,$recend,length $1);
1031 shorten(\$buf,length $1);
1036 # Find the last recend-recstart in $buf
1038 if($buf =~ /^(.*$recend)$recstart.*?$/os) {
1039 $anything_written +=
1040 write_record_to_pipe($chunk_number++,\$header,\$buf,
1041 $recstart,$recend,length $1);
1042 shorten(\$buf,length $1);
1046 sub pass_csv_record() {
1048 # We define a CSV record as an even number of " + end of line
1049 # This works if you use " as quoting character
1050 my $last_newline_pos = length $buf;
1051 # Go backwards from the last \n and search for a position
1052 # where there is an even number of "
1055 $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1);
1057 } while((substr($buf,0,$last_newline_pos) =~ y/"/"/)%2
1058 and $last_newline_pos >= 0);
1059 # Chop at $last_newline_pos as that is where CSV record ends
1060 $anything_written +=
1061 write_record_to_pipe($chunk_number++,\$header,\$buf,
1062 $recstart,$recend,$last_newline_pos+1);
1063 shorten(\$buf,$last_newline_pos+1);
1067 # Pass n records of --recend/--recstart
1068 # -N => (start..*?end){n}
1071 $Global::max_number_of_args * ($Global::max_lines || 1);
1072 while(($i = nindex(\$buf,$recendrecstart,$read_n_lines)) != -1
1075 $i += length $recend; # find the actual splitting location
1076 $anything_written +=
1077 write_record_to_pipe($chunk_number++,\$header,\$buf,
1078 $recstart,$recend,$i);
1084 # Pass records of --recend/--recstart
1085 # Split record at fixed string
1086 # Find the last recend+recstart in $buf
1088 my $i = rindex64(\$buf,$recendrecstart);
1090 $i += length $recend; # find the actual splitting location
1091 $anything_written +=
1092 write_record_to_pipe($chunk_number++,\$header,\$buf,
1093 $recstart,$recend,$i);
1098 sub increase_blocksize_maybe() {
1099 if(not $anything_written
1100 and not $opt::blocktimeout
1101 and not $Global::no_autoexpand_block) {
1102 # Nothing was written - maybe the block size < record size?
1103 # Increase blocksize exponentially up to 2GB-1 (2GB causes problems)
1104 if($blocksize < $two_gb) {
1105 my $old_blocksize = $blocksize;
1106 $blocksize = ::min(ceil($blocksize * 1.3 + 1), $two_gb);
1107 ::warning("A record was longer than $old_blocksize. " .
1108 "Increasing to --blocksize $blocksize.");
1114 $anything_written = 0;
1117 # Remove empty lines
1118 $buf =~ s/^\s*\n//gm;
1119 if(length $buf == 0) {
1127 if($Global::max_lines and not $Global::max_number_of_args) {
1128 # Pass n-line records
1129 pass_n_line_records();
1130 } elsif($opt::csv) {
1131 # Pass a full CSV record
1133 } elsif($opt::regexp) {
1134 # Split record at regexp
1135 if($Global::max_number_of_args) {
1141 # Pass normal --recend/--recstart record
1142 if($Global::max_number_of_args) {
1149 increase_blocksize_maybe();
1150 ::debug("init", "Round\n");
1152 ::debug("init", "Done reading input\n");
1154 # If there is anything left in the buffer write it
1155 write_record_to_pipe($chunk_number++, \$header, \$buf, $recstart,
1156 $recend, length $buf);
1159 $Global::no_more_input = 1;
1160 # We need to start no more jobs: At most we need to retry some
1161 # of the already running.
1162 my @running = values %Global::running;
1164 for my $job (@running) {
1165 if(defined $job and $job->virgin()) {
1166 close $job->fh(0,"w");
1169 # Wait for running jobs to be done
1171 while($Global::total_running > 0) {
1172 $sleep = ::reap_usleep($sleep);
1176 $Global::start_no_new_jobs ||= 1;
1177 if($opt::roundrobin) {
1178 # Flush blocks to roundrobin procs
1180 while(%Global::running) {
1181 my $something_written = 0;
1182 for my $job (values %Global::running) {
1183 if($job->block_length()) {
1184 $something_written += $job->non_blocking_write();
1186 close $job->fh(0,"w");
1189 if($something_written) {
1190 $sleep = $sleep/2+0.001;
1192 $sleep = ::reap_usleep($sleep);
1197 sub recstartrecend() {
1202 # $recstart,$recend with default values and regexp conversion
1203 my($recstart,$recend);
1204 if(defined($opt::recstart) and defined($opt::recend)) {
1205 # If both --recstart and --recend is given then both must match
1206 $recstart = $opt::recstart;
1207 $recend = $opt::recend;
1208 } elsif(defined($opt::recstart)) {
1209 # If --recstart is given it must match start of record
1210 $recstart = $opt::recstart;
1212 } elsif(defined($opt::recend)) {
1213 # If --recend is given then it must match end of record
1215 $recend = $opt::recend;
1216 if($opt::regexp and $recend eq '') {
1217 # --regexp --recend ''
1223 # If $recstart/$recend contains '|'
1224 # this should only apply to the regexp
1225 $recstart = "(?:".$recstart.")";
1226 $recend = "(?:".$recend.")";
1228 # $recstart/$recend = printf strings (\n)
1229 $recstart =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee;
1230 $recend =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee;
1232 return ($recstart,$recend);
1236 # See if string is in buffer N times
1238 # the position where the Nth copy is found
1239 my ($buf_ref, $str, $n) = @_;
1242 $i = index64($buf_ref,$str,$i+1);
1243 if($i == -1) { last }
1252 sub round_robin_write($$$$$) {
1254 # $header_ref = ref to $header string
1255 # $block_ref = ref to $block to be written
1256 # $recstart = record start string
1257 # $recend = record end string
1258 # $endpos = end position of $block
1262 # $something_written = amount of bytes written
1263 my ($header_ref,$buffer_ref,$recstart,$recend,$endpos) = @_;
1265 my $block_passed = 0;
1266 while(not $block_passed) {
1267 # Continue flushing existing buffers
1268 # until one is empty and a new block is passed
1270 # Rotate queue once so new blocks get a fair chance
1271 # to be given to another slot
1272 push @robin_queue, shift @robin_queue;
1274 # Make a queue to spread the blocks evenly
1275 push @robin_queue, (sort { $a->seq() <=> $b->seq() }
1276 values %Global::running);
1280 for my $job (@robin_queue) {
1281 if($job->block_length() > 0) {
1282 $written += $job->non_blocking_write();
1284 $job->set_block($header_ref, $buffer_ref,
1285 $endpos, $recstart, $recend);
1287 $job->set_virgin(0);
1288 $written += $job->non_blocking_write();
1293 $sleep = $sleep/1.5+0.001;
1295 # Don't sleep if something is written
1296 } while($written and not $block_passed);
1297 $sleep = ::reap_usleep($sleep);
1304 # Do index on strings > 2GB.
1305 # index in Perl < v5.22 does not work for > 2GB
1307 # as index except STR which must be passed as a reference
1312 my $pos = shift || 0;
1313 my $block_size = 2**31-1;
1314 my $strlen = length($$ref);
1315 # No point in doing extra work if we don't need to.
1316 if($strlen < $block_size or $] > 5.022) {
1317 return index($$ref, $match, $pos);
1320 my $matchlen = length($match);
1323 while($offset < $strlen) {
1325 substr($$ref, $offset, $block_size),
1326 $match, $pos-$offset);
1328 return $ret + $offset;
1330 $offset += ($block_size - $matchlen - 1);
1336 # Do rindex on strings > 2GB.
1337 # rindex in Perl < v5.22 does not work for > 2GB
1339 # as rindex except STR which must be passed as a reference
1345 my $block_size = 2**31-1;
1346 my $strlen = length($$ref);
1347 # Default: search from end
1348 $pos = defined $pos ? $pos : $strlen;
1349 # No point in doing extra work if we don't need to.
1350 if($strlen < $block_size) {
1351 return rindex($$ref, $match, $pos);
1354 my $matchlen = length($match);
1356 my $offset = $pos - $block_size + $matchlen;
1358 # The offset is less than a $block_size
1359 # Set the $offset to 0 and
1360 # Adjust block_size accordingly
1361 $block_size = $block_size + $offset;
1364 while($offset >= 0) {
1366 substr($$ref, $offset, $block_size),
1369 return $ret + $offset;
1371 $offset -= ($block_size - $matchlen - 1);
1377 # Do: substr($buf,0,$i) = "";
1378 # Some Perl versions do not support $i > 2GB, so do this in 2GB chunks
1381 # $i = position to shorten to
1383 my ($buf_ref, $i) = @_;
1384 my $two_gb = 2**31-1;
1385 while($i > $two_gb) {
1386 substr($$buf_ref,0,$two_gb) = "";
1389 substr($$buf_ref,0,$i) = "";
1392 sub write_record_to_pipe($$$$$$) {
1394 # Write record from pos 0 .. $endpos to pipe
1396 # $chunk_number = sequence number - to see if already run
1397 # $header_ref = reference to header string to prepend
1398 # $buffer_ref = reference to record to write
1399 # $recstart = start string of record
1400 # $recend = end string of record
1401 # $endpos = position in $buffer_ref where record ends
1403 # $Global::job_already_run
1405 # @Global::virgin_jobs
1407 # Number of chunks written (0 or 1)
1408 my ($chunk_number, $header_ref, $buffer_ref,
1409 $recstart, $recend, $endpos) = @_;
1410 if($endpos == 0) { return 0; }
1411 if(vec($Global::job_already_run,$chunk_number,1)) { return 1; }
1412 if($opt::roundrobin) {
1413 # Write the block to one of the already running jobs
1414 return round_robin_write($header_ref, $buffer_ref,
1415 $recstart, $recend, $endpos);
1417 # If no virgin found, backoff
1418 my $sleep = 0.0001; # 0.01 ms - better performance on highend
1419 while(not @Global::virgin_jobs) {
1420 ::debug("pipe", "No virgin jobs");
1421 $sleep = ::reap_usleep($sleep);
1422 # Jobs may not be started because of loadavg
1423 # or too little time between each ssh login
1424 # or retrying failed jobs.
1427 my $job = shift @Global::virgin_jobs;
1428 # Job is no longer virgin
1429 $job->set_virgin(0);
1432 # Copy $buffer[0..$endpos] to $job->{'block'}
1434 # Run $job->add_transfersize
1435 $job->set_block($header_ref, $buffer_ref, $endpos,
1436 $recstart, $recend);
1440 $job->write($job->block_ref());
1441 close $job->fh(0,"w");
1445 # We ignore the removed rec_sep which is technically wrong.
1446 $job->add_transfersize($endpos + length $$header_ref);
1450 # Chop of at $endpos as we do not know how many rec_sep will
1452 substr($$buffer_ref,$endpos,length $$buffer_ref) = "";
1454 if($opt::remove_rec_sep) {
1455 Job::remove_rec_sep($buffer_ref, $recstart, $recend);
1457 $job->write($header_ref);
1458 $job->write($buffer_ref);
1459 close $job->fh(0,"w");
1463 close $job->fh(0,"w");
1468 sub __SEM_MODE__() {}
1471 sub acquire_semaphore() {
1472 # Acquires semaphore. If needed: spawns to the background
1476 # The semaphore to be released when jobs is complete
1477 $Global::host{':'} = SSHLogin->new(":");
1478 my $sem = Semaphore->new($Semaphore::name,
1479 $Global::host{':'}->max_jobs_running());
1481 if($Semaphore::fg) {
1487 # If run in the background, the PID will change
1495 sub __PARSE_OPTIONS__() {}
1498 sub options_hash() {
1500 # %hash = the GetOptions config
1502 ("debug|D=s" => \$opt::D,
1503 "xargs" => \$opt::xargs,
1507 "sql=s" => \$opt::retired,
1508 "sqlmaster=s" => \$opt::sqlmaster,
1509 "sqlworker=s" => \$opt::sqlworker,
1510 "sqlandworker=s" => \$opt::sqlandworker,
1511 "joblog|jl=s" => \$opt::joblog,
1512 "results|result|res=s" => \$opt::results,
1513 "resume" => \$opt::resume,
1514 "resume-failed|resumefailed" => \$opt::resume_failed,
1515 "retry-failed|retryfailed" => \$opt::retry_failed,
1516 "silent" => \$opt::silent,
1517 "keep-order|keeporder|k" => \$opt::keeporder,
1518 "no-keep-order|nokeeporder|nok|no-k" => \$opt::nokeeporder,
1519 "group" => \$opt::group,
1520 "g" => \$opt::retired,
1521 "ungroup|u" => \$opt::ungroup,
1522 "linebuffer|linebuffered|line-buffer|line-buffered|lb"
1523 => \$opt::linebuffer,
1524 "tmux" => \$opt::tmux,
1525 "tmuxpane" => \$opt::tmuxpane,
1526 "null|0" => \$opt::null,
1527 "quote|q" => \$opt::quote,
1528 # Replacement strings
1529 "parens=s" => \$opt::parens,
1530 "rpl=s" => \@opt::rpl,
1531 "plus" => \$opt::plus,
1533 "extensionreplace|er=s" => \$opt::U,
1534 "U=s" => \$opt::retired,
1535 "basenamereplace|bnr=s" => \$opt::basenamereplace,
1536 "dirnamereplace|dnr=s" => \$opt::dirnamereplace,
1537 "basenameextensionreplace|bner=s" => \$opt::basenameextensionreplace,
1538 "seqreplace=s" => \$opt::seqreplace,
1539 "slotreplace=s" => \$opt::slotreplace,
1540 "jobs|j=s" => \$opt::jobs,
1541 "delay=s" => \$opt::delay,
1542 "sshdelay=f" => \$opt::sshdelay,
1543 "load=s" => \$opt::load,
1544 "noswap" => \$opt::noswap,
1545 "max-line-length-allowed" => \$opt::max_line_length_allowed,
1546 "number-of-cpus" => \$opt::number_of_cpus,
1547 "number-of-sockets" => \$opt::number_of_sockets,
1548 "number-of-cores" => \$opt::number_of_cores,
1549 "number-of-threads" => \$opt::number_of_threads,
1550 "use-sockets-instead-of-threads"
1551 => \$opt::use_sockets_instead_of_threads,
1552 "use-cores-instead-of-threads"
1553 => \$opt::use_cores_instead_of_threads,
1554 "use-cpus-instead-of-cores" => \$opt::use_cpus_instead_of_cores,
1555 "shellquote|shell_quote|shell-quote" => \@opt::shellquote,
1556 "nice=i" => \$opt::nice,
1557 "tag" => \$opt::tag,
1558 "tagstring|tag-string=s" => \$opt::tagstring,
1559 "onall" => \$opt::onall,
1560 "nonall" => \$opt::nonall,
1561 "filter-hosts|filterhosts|filter-host" => \$opt::filter_hosts,
1562 "sshlogin|S=s" => \@opt::sshlogin,
1563 "sshloginfile|slf=s" => \@opt::sshloginfile,
1564 "controlmaster|M" => \$opt::controlmaster,
1565 "ssh=s" => \$opt::ssh,
1566 "transfer-file|transferfile|transfer-files|transferfiles|tf=s"
1567 => \@opt::transfer_files,
1568 "return=s" => \@opt::return,
1569 "trc=s" => \@opt::trc,
1570 "transfer" => \$opt::transfer,
1571 "cleanup" => \$opt::cleanup,
1572 "basefile|bf=s" => \@opt::basefile,
1573 "B=s" => \$opt::retired,
1574 "ctrlc|ctrl-c" => \$opt::retired,
1575 "noctrlc|no-ctrlc|no-ctrl-c" => \$opt::retired,
1576 "workdir|work-dir|wd=s" => \$opt::workdir,
1577 "W=s" => \$opt::retired,
1578 "rsync-opts|rsyncopts=s" => \$opt::rsync_opts,
1579 "tmpdir|tempdir=s" => \$opt::tmpdir,
1580 "use-compress-program|compress-program=s" => \$opt::compress_program,
1581 "use-decompress-program|decompress-program=s"
1582 => \$opt::decompress_program,
1583 "compress" => \$opt::compress,
1584 "tty" => \$opt::tty,
1585 "T" => \$opt::retired,
1586 "H=i" => \$opt::retired,
1587 "dry-run|dryrun|dr" => \$opt::dryrun,
1588 "progress" => \$opt::progress,
1589 "eta" => \$opt::eta,
1590 "bar" => \$opt::bar,
1591 "shuf" => \$opt::shuf,
1592 "arg-sep|argsep=s" => \$opt::arg_sep,
1593 "arg-file-sep|argfilesep=s" => \$opt::arg_file_sep,
1594 "trim=s" => \$opt::trim,
1595 "env=s" => \@opt::env,
1596 "recordenv|record-env" => \$opt::record_env,
1597 "session" => \$opt::session,
1598 "plain" => \$opt::plain,
1599 "profile|J=s" => \@opt::profile,
1600 "pipe|spreadstdin" => \$opt::pipe,
1601 "robin|round-robin|roundrobin" => \$opt::roundrobin,
1602 "recstart=s" => \$opt::recstart,
1603 "recend=s" => \$opt::recend,
1604 "regexp|regex" => \$opt::regexp,
1605 "remove-rec-sep|removerecsep|rrs" => \$opt::remove_rec_sep,
1606 "files|output-as-files|outputasfiles" => \$opt::files,
1607 "block|block-size|blocksize=s" => \$opt::blocksize,
1608 "blocktimeout|block-timeout|bt=s" => \$opt::blocktimeout,
1609 "tollef" => \$opt::tollef,
1610 "gnu" => \$opt::gnu,
1611 "link|xapply" => \$opt::link,
1612 "linkinputsource|xapplyinputsource=i" => \@opt::linkinputsource,
1613 # Before changing this line, please read
1614 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
1615 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
1616 "bibtex|citation" => \$opt::citation,
1617 "wc|willcite|will-cite|nn|nonotice|no-notice" => \$opt::willcite,
1618 # Termination and retries
1619 "halt-on-error|halt=s" => \$opt::halt,
1620 "limit=s" => \$opt::limit,
1621 "memfree=s" => \$opt::memfree,
1622 "memsuspend=s" => \$opt::memsuspend,
1623 "retries=s" => \$opt::retries,
1624 "timeout=s" => \$opt::timeout,
1625 "termseq|term-seq=s" => \$opt::termseq,
1626 # xargs-compatibility - implemented, man, testsuite
1627 "max-procs|P=s" => \$opt::jobs,
1628 "delimiter|d=s" => \$opt::d,
1629 "max-chars|s=i" => \$opt::max_chars,
1630 "arg-file|a=s" => \@opt::a,
1631 "no-run-if-empty|r" => \$opt::r,
1632 "replace|i:s" => \$opt::i,
1633 "E=s" => \$opt::eof,
1634 "eof|e:s" => \$opt::eof,
1635 "max-args|maxargs|n=i" => \$opt::max_args,
1636 "max-replace-args|N=i" => \$opt::max_replace_args,
1637 "colsep|col-sep|C=s" => \$opt::colsep,
1639 "help|h" => \$opt::help,
1641 "max-lines|l:f" => \$opt::max_lines,
1642 "interactive|p" => \$opt::interactive,
1643 "verbose|t" => \$opt::verbose,
1644 "version|V" => \$opt::version,
1645 "minversion|min-version=i" => \$opt::minversion,
1646 "show-limits|showlimits" => \$opt::show_limits,
1647 "exit|x" => \$opt::x,
1649 "semaphore" => \$opt::semaphore,
1650 "semaphoretimeout|st=i" => \$opt::semaphoretimeout,
1651 "semaphorename|id=s" => \$opt::semaphorename,
1654 "wait" => \$opt::wait,
1655 # Shebang #!/usr/bin/parallel --shebang
1656 "shebang|hashbang" => \$opt::shebang,
1657 "internal-pipe-means-argfiles"
1658 => \$opt::internal_pipe_means_argfiles,
1659 "Y" => \$opt::retired,
1660 "skip-first-line" => \$opt::skip_first_line,
1661 "bug" => \$opt::bug,
1662 "header=s" => \$opt::header,
1663 "cat" => \$opt::cat,
1664 "fifo" => \$opt::fifo,
1665 "pipepart|pipe-part" => \$opt::pipepart,
1666 "tee" => \$opt::tee,
1667 "shard=s" => \$opt::shard,
1668 "bin=s" => \$opt::bin,
1669 "groupby|group-by=s" => \$opt::groupby,
1670 "hgrp|hostgrp|hostgroup|hostgroups" => \$opt::hostgroups,
1671 "embed" => \$opt::embed,
1675 sub get_options_from_array($@) {
1676 # Run GetOptions on @array
1678 # $array_ref = ref to @ARGV to parse
1679 # @keep_only = Keep only these options
1683 # true if parsing worked
1684 # false if parsing failed
1685 # @$array_ref is changed
1686 my ($array_ref, @keep_only) = @_;
1687 if(not @$array_ref) {
1688 # Empty array: No need to look more at that
1691 # A bit of shuffling of @ARGV needed as GetOptionsFromArray is not
1692 # supported everywhere
1694 my $this_is_ARGV = (\@::ARGV == $array_ref);
1695 if(not $this_is_ARGV) {
1696 @save_argv = @::ARGV;
1697 @::ARGV = @{$array_ref};
1699 # If @keep_only set: Ignore all values except @keep_only
1700 my %options = options_hash();
1703 @keep{@keep_only} = @keep_only;
1704 for my $k (grep { not $keep{$_} } keys %options) {
1705 # Store the value of the option in @dummy
1706 $options{$k} = \@dummy;
1709 my $retval = GetOptions(%options);
1710 if(not $this_is_ARGV) {
1711 @{$array_ref} = @::ARGV;
1712 @::ARGV = @save_argv;
1717 sub parse_options(@) {
1720 my @argv_before = @ARGV;
1721 @ARGV = read_options();
1723 # Before changing this line, please read
1724 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
1725 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
1726 if(defined $opt::citation) {
1727 citation(\@argv_before,\@ARGV);
1731 if($opt::nokeeporder) { $opt::keeporder = undef; }
1733 if(@opt::v) { $Global::verbose = $#opt::v+1; } # Convert -v -v to v=2
1734 if($opt::bug) { ::die_bug("test-bug"); }
1735 $Global::debug = $opt::D;
1736 $Global::shell = $ENV{'PARALLEL_SHELL'} || parent_shell($$)
1737 || $ENV{'SHELL'} || "/bin/sh";
1738 if(not -x $Global::shell and not which($Global::shell)) {
1739 ::error("Shell '$Global::shell' not found.");
1742 ::debug("init","Global::shell $Global::shell\n");
1743 $Global::cshell = $Global::shell =~ m:(/[-a-z]*)?csh:;
1744 if(defined $opt::X) { $Global::ContextReplace = 1; }
1745 if(defined $opt::silent) { $Global::verbose = 0; }
1746 if(defined $opt::null) { $/ = "\0"; }
1747 if(defined $opt::d) { $/ = unquote_printf($opt::d) }
1748 parse_replacement_string_options();
1749 if(defined $opt::tagstring) {
1750 $opt::tagstring = unquote_printf($opt::tagstring);
1751 if($opt::tagstring =~ /\Q$Global::parensleft\E.*\Q$Global::parensright\E/
1754 # --tagstring contains {= =} and --linebuffer =>
1755 # recompute replacement string for each use (do not cache)
1756 $Global::cache_replacement_eval = 0;
1759 if(defined $opt::interactive) { $Global::interactive = $opt::interactive; }
1760 if(defined $opt::quote) { $Global::quoting = 1; }
1761 if(defined $opt::r) { $Global::ignore_empty = 1; }
1762 if(defined $opt::verbose) { $Global::stderr_verbose = 1; }
1763 if(defined $opt::eof) { $Global::end_of_file_string = $opt::eof; }
1764 if(defined $opt::max_args) {
1765 $Global::max_number_of_args = $opt::max_args;
1767 if(defined $opt::blocktimeout) {
1768 $Global::blocktimeout = int(multiply_time_units($opt::blocktimeout));
1769 if($Global::blocktimeout < 1) {
1770 ::error("--block-timeout must be at least 1");
1774 if(defined $opt::timeout) {
1775 $Global::timeoutq = TimeoutQueue->new($opt::timeout);
1777 if(defined $opt::tmpdir) { $ENV{'TMPDIR'} = $opt::tmpdir; }
1778 $ENV{'PARALLEL_RSYNC_OPTS'} = $opt::rsync_opts ||
1779 $ENV{'PARALLEL_RSYNC_OPTS'} || '-rlDzR';
1780 # Default: Same nice level as GNU Parallel is started at
1781 $opt::nice ||= eval { getpriority(0,0) } || 0;
1782 if(defined $opt::help) { usage(); exit(0); }
1783 if(defined $opt::embed) { embed(); exit(0); }
1784 if(defined $opt::sqlandworker) {
1785 $opt::sqlmaster = $opt::sqlworker = $opt::sqlandworker;
1787 if(defined $opt::tmuxpane) { $opt::tmux = $opt::tmuxpane; }
1788 if(defined $opt::colsep) { $Global::trim = 'lr'; }
1789 if(defined $opt::csv) {
1790 $Global::use{"Text::CSV"} ||= eval "use Text::CSV; 1;";
1791 $opt::colsep = defined $opt::colsep ? $opt::colsep : ",";
1792 my $csv_setting = { binary => 1, sep_char => $opt::colsep };
1793 my $sep = $csv_setting->{sep_char};
1794 $Global::csv = Text::CSV->new($csv_setting)
1795 or die "Cannot use CSV: ".Text::CSV->error_diag ();
1797 if(defined $opt::header) {
1798 $opt::colsep = defined $opt::colsep ? $opt::colsep : "\t";
1800 if(defined $opt::trim) { $Global::trim = $opt::trim; }
1801 if(defined $opt::arg_sep) { $Global::arg_sep = $opt::arg_sep; }
1802 if(defined $opt::arg_file_sep) {
1803 $Global::arg_file_sep = $opt::arg_file_sep;
1805 if(defined $opt::number_of_sockets) {
1806 print SSHLogin::no_of_sockets(),"\n"; wait_and_exit(0);
1808 if(defined $opt::number_of_cpus) {
1809 print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0);
1811 if(defined $opt::number_of_cores) {
1812 print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0);
1814 if(defined $opt::number_of_threads) {
1815 print SSHLogin::no_of_threads(),"\n"; wait_and_exit(0);
1817 if(defined $opt::max_line_length_allowed) {
1818 print Limits::Command::real_max_length(),"\n"; wait_and_exit(0);
1820 if(defined $opt::version) { version(); wait_and_exit(0); }
1821 if(defined $opt::record_env) { record_env(); wait_and_exit(0); }
1822 if(defined $opt::show_limits) { show_limits(); }
1823 if(@opt::sshlogin) { @Global::sshlogin = @opt::sshlogin; }
1824 if(@opt::sshloginfile) { read_sshloginfiles(@opt::sshloginfile); }
1825 if(@opt::return) { push @Global::ret_files, @opt::return; }
1826 if($opt::transfer) {
1827 push @Global::transfer_files, $opt::i || $opt::I || "{}";
1829 push @Global::transfer_files, @opt::transfer_files;
1830 if(not defined $opt::recstart and
1831 not defined $opt::recend) { $opt::recend = "\n"; }
1832 $Global::blocksize = multiply_binary_prefix($opt::blocksize || "1M");
1833 if($Global::blocksize > 2**31-1 and not $opt::pipepart) {
1834 warning("--blocksize >= 2G causes problems. Using 2G-1.");
1835 $Global::blocksize = 2**31-1;
1837 if($^O eq "cygwin" and
1838 ($opt::pipe or $opt::pipepart or $opt::roundrobin)
1839 and $Global::blocksize > 65535) {
1840 warning("--blocksize >= 64K causes problems on Cygwin.");
1842 $opt::memfree = multiply_binary_prefix($opt::memfree);
1843 $opt::memsuspend = multiply_binary_prefix($opt::memsuspend);
1844 $Global::memlimit = $opt::memsuspend + $opt::memfree;
1845 check_invalid_option_combinations();
1846 if((defined $opt::fifo or defined $opt::cat)
1847 and not $opt::pipepart) {
1850 if(defined $opt::minversion) {
1851 print $Global::version,"\n";
1852 if($Global::version < $opt::minversion) {
1858 if(not defined $opt::delay) {
1859 # Set --delay to --sshdelay if not set
1860 $opt::delay = $opt::sshdelay;
1862 $opt::delay = multiply_time_units($opt::delay);
1863 if($opt::compress_program) {
1865 $opt::decompress_program ||= $opt::compress_program." -dc";
1868 if(defined $opt::results) {
1869 # Is the output a dir or CSV-file?
1870 if($opt::results =~ /\.csv$/i) {
1871 # CSV with , as separator
1872 $Global::csvsep = ",";
1873 $Global::membuffer ||= 1;
1874 } elsif($opt::results =~ /\.tsv$/i) {
1875 # CSV with TAB as separator
1876 $Global::csvsep = "\t";
1877 $Global::membuffer ||= 1;
1880 if($opt::compress) {
1881 my ($compress, $decompress) = find_compression_program();
1882 $opt::compress_program ||= $compress;
1883 $opt::decompress_program ||= $decompress;
1884 if(($opt::results and not $Global::csvsep) or $opt::files) {
1885 # No need for decompressing
1886 $opt::decompress_program = "cat >/dev/null";
1889 if(defined $opt::dryrun) {
1890 # Force grouping due to bug #51039: --dry-run --timeout 3600 -u breaks
1894 if(defined $opt::nonall) {
1895 # Append a dummy empty argument if there are no arguments
1896 # on the command line to avoid reading from STDIN.
1897 # arg_sep = random 50 char
1898 # \0noarg => nothing (not the empty string)
1899 $Global::arg_sep = join "",
1900 map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..50);
1901 push @ARGV, $Global::arg_sep, "\0noarg";
1903 if(defined $opt::tee) {
1904 if(not defined $opt::jobs) {
1908 if(defined $opt::tty) {
1909 # Defaults for --tty: -j1 -u
1910 # Can be overridden with -jXXX -g
1911 if(not defined $opt::jobs) {
1914 if(not defined $opt::group) {
1919 push @Global::ret_files, @opt::trc;
1920 if(not @Global::transfer_files) {
1921 # Defaults to --transferfile {}
1922 push @Global::transfer_files, $opt::i || $opt::I || "{}";
1926 if(defined $opt::max_lines) {
1927 if($opt::max_lines eq "-0") {
1928 # -l -0 (swallowed -0)
1929 $opt::max_lines = 1;
1932 } elsif ($opt::max_lines == 0) {
1933 # If not given (or if 0 is given) => 1
1934 $opt::max_lines = 1;
1936 $Global::max_lines = $opt::max_lines;
1937 if(not $opt::pipe) {
1938 # --pipe -L means length of record - not max_number_of_args
1939 $Global::max_number_of_args ||= $Global::max_lines;
1943 # Read more than one arg at a time (-L, -N)
1944 if(defined $opt::L) {
1945 $Global::max_lines = $opt::L;
1946 if(not $opt::pipe) {
1947 # --pipe -L means length of record - not max_number_of_args
1948 $Global::max_number_of_args ||= $Global::max_lines;
1951 if(defined $opt::max_replace_args) {
1952 $Global::max_number_of_args = $opt::max_replace_args;
1953 $Global::ContextReplace = 1;
1955 if((defined $opt::L or defined $opt::max_replace_args)
1957 not ($opt::xargs or $opt::m)) {
1958 $Global::ContextReplace = 1;
1960 if(defined $opt::tag and not defined $opt::tagstring) {
1962 $opt::tagstring = $Global::parensleft.$Global::parensright;
1964 if(grep /^$Global::arg_sep\+?$|^$Global::arg_file_sep\+?$/o, @ARGV) {
1965 # Deal with ::: :::+ :::: and ::::+
1966 @ARGV = read_args_from_command_line();
1970 if(defined $opt::eta) { $opt::progress = $opt::eta; }
1971 if(defined $opt::bar) { $opt::progress = $opt::bar; }
1973 # Funding a free software project is hard. GNU Parallel is no
1974 # exception. On top of that it seems the less visible a project
1975 # is, the harder it is to get funding. And the nature of GNU
1976 # Parallel is that it will never be seen by "the guy with the
1977 # checkbook", but only by the people doing the actual work.
1979 # This problem has been covered by others - though no solution has
1981 # https://www.slideshare.net/NadiaEghbal/consider-the-maintainer
1982 # https://blog.licensezero.com/2019/08/24/process-of-elimination.html
1983 # https://www.numfocus.org/blog/why-is-numpy-only-now-getting-funded/
1985 # The FAQ tells you why the citation notice exists:
1986 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
1988 # If you want GNU Parallel to be maintained in the future, and not
1989 # just wither away like so many other free software tools, you
1990 # need to help finance the development.
1992 # The citation notice is a simple way of doing so, as citations
1993 # makes it possible to me to get a job where I can maintain GNU
1994 # Parallel as part of the job.
1996 # This means you can help financing development
1998 # WITHOUT PAYING A SINGLE CENT!
2000 # Before implementing the citation notice it was discussed with
2002 # https://lists.gnu.org/archive/html/parallel/2013-11/msg00006.html
2004 # Having to spend 10 seconds on running 'parallel --citation' once
2005 # is no doubt not an ideal solution, but no one has so far come up
2006 # with an ideal solution - neither for funding GNU Parallel nor
2007 # other free software.
2009 # If you believe you have the perfect solution, you should try it
2010 # out, and if it works, you should post it on the email
2011 # list. Ideas that will cost work and which have not been tested
2012 # are, however, unlikely to be prioritized.
2014 # Please note that GPL version 3 gives you the right to fork GNU
2015 # Parallel under a new name, but it does not give you the right to
2016 # distribute modified copies with the citation notice disabled in
2017 # a way where the software can be confused with GNU Parallel. To
2018 # do that you need to be the owner of the GNU Parallel
2019 # trademark. The xt:Commerce case shows this.
2021 # Description of the xt:Commerce case in OLG Duesseldorf
2022 # http://www.inta.org/INTABulletin/Pages/GERMANYGeneralPublicLicenseDoesNotPermitUseofThird-PartyTrademarksforAdvertisingModifiedVersionsofOpen-SourceSoftware.aspx
2023 # https://web.archive.org/web/20180715073746/http://www.inta.org/INTABulletin/Pages/GERMANYGeneralPublicLicenseDoesNotPermitUseofThird-PartyTrademarksforAdvertisingModifiedVersionsofOpen-SourceSoftware.aspx
2025 # The verdict in German
2026 # https://www.admody.com/urteilsdatenbank/cafe6fdaeed3/OLG-Duesseldorf_Urteil_vom_28-September-2010_Az_I-20-U-41-09
2027 # https://web.archive.org/web/20180715073717/https://www.admody.com/urteilsdatenbank/cafe6fdaeed3/OLG-Duesseldorf_Urteil_vom_28-September-2010_Az_I-20-U-41-09
2029 # Other free software limiting derivates by the same name:
2030 # https://en.wikipedia.org/wiki/Red_Hat_Enterprise_Linux_derivatives#Legal_aspects
2031 # https://tm.joomla.org/trademark-faq.html
2032 # https://www.mozilla.org/en-US/foundation/trademarks/faq/
2034 # Running 'parallel --citation' one single time takes less than 10
2035 # seconds, and will silence the citation notice for future
2036 # runs. If that is too much trouble for you, why not use one of
2037 # the alternatives instead?
2038 # See a list in: 'man parallel_alternatives'
2040 # If you want GNU Parallel to be maintained in the future keep
2043 # Seriously: _YOU_ will be harming free software by removing the
2044 # notice. _YOU_ make it harder to justify spending time developing
2045 # it. If you *do* remove the line, please email
2046 # hallofshame@tange.dk if you want to avoid being put in a hall of
2051 if($ENV{'PARALLEL_ENV'}) {
2052 # Read environment and set $Global::parallel_env
2053 # Must be done before is_acceptable_command_line_length()
2054 my $penv = $ENV{'PARALLEL_ENV'};
2055 # unset $PARALLEL_ENV: It should not be given to children
2056 # because it takes up a lot of env space
2057 delete $ENV{'PARALLEL_ENV'};
2059 # This is a file/fifo: Replace envvar with content of file
2060 open(my $parallel_env, "<", $penv) ||
2061 ::die_bug("Cannot read parallel_env from $penv");
2062 local $/; # Put <> in slurp mode
2063 $penv = <$parallel_env>;
2064 close $parallel_env;
2066 # Map \001 to \n to make it easer to quote \n in $PARALLEL_ENV
2067 $penv =~ s/\001/\n/g;
2069 ::warning('\0 (NUL) in environment is not supported');
2071 $Global::parallel_env = $penv;
2076 if(remote_hosts() and ($opt::X or $opt::m or $opt::xargs)) {
2077 # As we do not know the max line length on the remote machine
2078 # long commands generated by xargs may fail
2079 # If $opt::max_replace_args is set, it is probably safe
2080 ::warning("Using -X or -m with --sshlogin may fail.");
2083 if(not defined $opt::jobs) { $opt::jobs = "100%"; }
2086 if($opt::sqlmaster or $opt::sqlworker) {
2087 $Global::sql = SQL->new($opt::sqlmaster || $opt::sqlworker);
2089 if($opt::sqlworker) { $Global::membuffer ||= 1; }
2090 # The sqlmaster groups the arguments, so the should just read one
2091 if($opt::sqlworker and not $opt::sqlmaster) { $Global::max_number_of_args = 1; }
2095 sub check_invalid_option_combinations() {
2096 if(defined $opt::timeout and
2097 $opt::timeout !~ /^\d+(\.\d+)?%?$|^(\d+(\.\d+)?[dhms])+$/i) {
2098 ::error("--timeout must be seconds or percentage.");
2101 if(defined $opt::fifo and defined $opt::cat) {
2102 ::error("--fifo cannot be combined with --cat.");
2103 ::wait_and_exit(255);
2105 if(defined $opt::retries and defined $opt::roundrobin) {
2106 ::error("--retries cannot be combined with --roundrobin.");
2107 ::wait_and_exit(255);
2109 if(defined $opt::pipepart and
2110 (defined $opt::L or defined $opt::max_lines
2111 or defined $opt::max_replace_args)) {
2112 ::error("--pipepart is incompatible with --max-replace-args, ".
2113 "--max-lines, and -L.");
2116 if(defined $opt::group and $opt::ungroup) {
2117 ::error("--group cannot be combined with --ungroup.");
2118 ::wait_and_exit(255);
2120 if(defined $opt::group and $opt::linebuffer) {
2121 ::error("--group cannot be combined with --line-buffer.");
2122 ::wait_and_exit(255);
2124 if(defined $opt::ungroup and $opt::linebuffer) {
2125 ::error("--ungroup cannot be combined with --line-buffer.");
2126 ::wait_and_exit(255);
2128 if(defined $opt::tollef and not $opt::gnu) {
2129 ::error("--tollef has been retired.",
2130 "Remove --tollef or use --gnu to override --tollef.");
2131 ::wait_and_exit(255);
2133 if(defined $opt::retired) {
2134 ::error("-g has been retired. Use --group.",
2135 "-B has been retired. Use --bf.",
2136 "-T has been retired. Use --tty.",
2137 "-U has been retired. Use --er.",
2138 "-W has been retired. Use --wd.",
2139 "-Y has been retired. Use --shebang.",
2140 "-H has been retired. Use --halt.",
2141 "--sql has been retired. Use --sqlmaster.",
2142 "--ctrlc has been retired.",
2143 "--noctrlc has been retired.");
2144 ::wait_and_exit(255);
2147 if(not $opt::pipe and not $opt::pipepart) {
2150 if($opt::remove_rec_sep) {
2151 ::error("--remove-rec-sep is not compatible with --groupby");
2152 ::wait_and_exit(255);
2154 if($opt::recstart) {
2155 ::error("--recstart is not compatible with --groupby");
2156 ::wait_and_exit(255);
2158 if($opt::recend ne "\n") {
2159 ::error("--recend is not compatible with --groupby");
2160 ::wait_and_exit(255);
2165 sub init_globals() {
2167 $Global::version = 20200923;
2168 $Global::progname = 'parallel';
2169 $::name = "GNU Parallel";
2170 $Global::infinity = 2**31;
2172 $Global::verbose = 0;
2173 # Don't quote every part of the command line
2174 $Global::quoting = 0;
2175 # Quote replacement strings
2176 $Global::quote_replace = 1;
2177 $Global::total_completed = 0;
2178 $Global::cache_replacement_eval = 1;
2179 # Read only table with default --rpl values
2183 '{#}' => '1 $_=$job->seq()',
2184 '{%}' => '1 $_=$job->slot()',
2187 ('$Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; '.
2188 '$_ = dirname($_);'),
2189 '{/.}' => 's:.*/::; s:\.[^/.]+$::;',
2190 '{.}' => 's:\.[^/.]+$::',
2195 # = {.}.{+.} = {+/}/{/.}.{+.}
2196 # = {..}.{+..} = {+/}/{/..}.{+..}
2197 # = {...}.{+...} = {+/}/{/...}.{+...}
2198 '{+/}' => 's:/[^/]*$::',
2199 '{+.}' => 's:.*\.::',
2200 '{+..}' => 's:.*\.([^.]*\.):$1:',
2201 '{+...}' => 's:.*\.([^.]*\.[^.]*\.):$1:',
2202 '{..}' => 's:\.[^/.]+$::; s:\.[^/.]+$::',
2203 '{...}' => 's:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::',
2204 '{/..}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::',
2205 '{/...}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::',
2206 '{choose_k}' => 'for $t (2..$#arg){ if($arg[$t-1] ge $arg[$t]) { skip() } }',
2207 # {##} = number of jobs
2208 '{##}' => '$_=total_jobs()',
2210 '{:-([^}]+?)}' => '$_ ||= $$1',
2212 '{:(\d+?)}' => 'substr($_,0,$$1) = ""',
2214 '{:(\d+?):(\d+?)}' => '$_ = substr($_,$$1,$$2);',
2216 '{#([^#}][^}]*?)}' => 's/^$$1//;',
2218 '{%([^}]+?)}' => 's/$$1$//;',
2219 # Bash ${a/def/ghi} ${a/def/}
2220 '{/([^}]+?)/([^}]*?)}' => 's/$$1/$$2/;',
2222 '{^([^}]+?)}' => 's/^($$1)/uc($1)/e;',
2224 '{^^([^}]+?)}' => 's/($$1)/uc($1)/eg;',
2226 '{,([^}]+?)}' => 's/^($$1)/lc($1)/e;',
2228 '{,,([^}]+?)}' => 's/($$1)/lc($1)/eg;',
2229 # {slot} = $PARALLEL_JOBSLOT
2230 '{slot}' => '1 $_="\${PARALLEL_JOBSLOT}";uq()',
2232 '{host}' => '1 $_="\${PARALLEL_SSHHOST}";uq()',
2233 # {sshlogin} = sshlogin
2234 '{sshlogin}' => '1 $_="\${PARALLEL_SSHLOGIN}";uq()',
2236 # Modifiable copy of %Global::replace
2237 %Global::rpl = %Global::replace;
2239 $Global::ignore_empty = 0;
2240 $Global::interactive = 0;
2241 $Global::stderr_verbose = 0;
2242 $Global::default_simultaneous_sshlogins = 9;
2243 $Global::exitstatus = 0;
2244 $Global::arg_sep = ":::";
2245 $Global::arg_file_sep = "::::";
2246 $Global::trim = 'n';
2247 $Global::max_jobs_running = 0;
2248 $Global::job_already_run = '';
2249 $ENV{'TMPDIR'} ||= "/tmp";
2250 $ENV{'OLDPWD'} = $ENV{'PWD'};
2251 if(not $ENV{HOME}) {
2252 # $ENV{HOME} is sometimes not set if called from PHP
2253 ::warning("\$HOME not set. Using /tmp.");
2254 $ENV{HOME} = "/tmp";
2256 # no warnings to allow for undefined $XDG_*
2257 no warnings 'uninitialized';
2258 # $xdg_config_home is needed to make env_parallel.fish stop complaining
2259 my $xdg_config_home = $ENV{'XDG_CONFIG_HOME'};
2260 # config_dirs = $PARALLEL_HOME, $XDG_CONFIG_HOME/parallel,
2261 # $(each XDG_CONFIG_DIRS)/parallel, $HOME/.parallel
2262 # Keep only dirs that exist
2263 @Global::config_dirs =
2265 $ENV{'PARALLEL_HOME'},
2266 (map { "$_/parallel" }
2268 split /:/, $ENV{'XDG_CONFIG_DIRS'}),
2269 $ENV{'HOME'} . "/.parallel");
2270 # Use first dir as config dir
2271 $Global::config_dir = $Global::config_dirs[0] ||
2272 $ENV{'HOME'} . "/.parallel";
2273 # cache_dirs = $PARALLEL_HOME, $XDG_CACHE_HOME/parallel,
2274 # Keep only dirs that exist
2275 @Global::cache_dirs =
2277 $ENV{'PARALLEL_HOME'}, $ENV{'XDG_CACHE_HOME'}."/parallel");
2278 $Global::cache_dir = $Global::cache_dirs[0] ||
2279 $ENV{'HOME'} . "/.parallel";
2283 # $opt::halt flavours
2286 # $Global::halt_when
2287 # $Global::halt_fail
2288 # $Global::halt_success
2290 # $Global::halt_count
2291 if(defined $opt::halt) {
2292 my %halt_expansion = (
2294 "1" => "soon,fail=1",
2295 "2" => "now,fail=1",
2296 "-1" => "soon,success=1",
2297 "-2" => "now,success=1",
2299 # Expand -2,-1,0,1,2 into long form
2300 $opt::halt = $halt_expansion{$opt::halt} || $opt::halt;
2301 # --halt 5% == --halt soon,fail=5%
2302 $opt::halt =~ s/^(\d+)%$/soon,fail=$1%/;
2303 # Split: soon,fail=5%
2304 my ($when,$fail_success,$pct_count) = split /[,=]/, $opt::halt;
2305 if(not grep { $when eq $_ } qw(never soon now)) {
2306 ::error
("--halt must have 'never', 'soon', or 'now'.");
2307 ::wait_and_exit
(255);
2309 $Global::halt_when
= $when;
2310 if($when ne "never") {
2311 if($fail_success eq "fail") {
2312 $Global::halt_fail
= 1;
2313 } elsif($fail_success eq "success") {
2314 $Global::halt_success
= 1;
2315 } elsif($fail_success eq "done") {
2316 $Global::halt_done
= 1;
2318 ::error
("--halt $when must be followed by ,success or ,fail.");
2319 ::wait_and_exit
(255);
2321 if($pct_count =~ /^(\d+)%$/) {
2322 $Global::halt_pct
= $1/100;
2323 } elsif($pct_count =~ /^(\d+)$/) {
2324 $Global::halt_count
= $1;
2326 ::error
("--halt $when,$fail_success ".
2327 "must be followed by ,number or ,percent%.");
2328 ::wait_and_exit
(255);
2334 sub parse_replacement_string_options
() {
2338 # $Global::parensleft
2339 # $Global::parensright
2341 # $Global::parensleft
2342 # $Global::parensright
2348 # $opt::basenamereplace
2349 # $opt::dirnamereplace
2352 # $opt::basenameextensionreplace
2355 # Modify %Global::rpl
2356 # Replace $old with $new
2357 my ($old,$new) = @_;
2359 $Global::rpl
{$new} = $Global::rpl
{$old};
2360 delete $Global::rpl
{$old};
2363 my $parens = "{==}";
2364 if(defined $opt::parens
) { $parens = $opt::parens
; }
2365 my $parenslen = 0.5*length $parens;
2366 $Global::parensleft
= substr($parens,0,$parenslen);
2367 $Global::parensright
= substr($parens,$parenslen);
2368 if(defined $opt::plus
) { %Global::rpl
= (%Global::plus
,%Global::rpl
); }
2369 if(defined $opt::I
) { rpl
('{}',$opt::I
); }
2370 if(defined $opt::i
and $opt::i
) { rpl
('{}',$opt::i
); }
2371 if(defined $opt::U
) { rpl
('{.}',$opt::U
); }
2372 if(defined $opt::basenamereplace
) { rpl
('{/}',$opt::basenamereplace
); }
2373 if(defined $opt::dirnamereplace
) { rpl
('{//}',$opt::dirnamereplace
); }
2374 if(defined $opt::seqreplace
) { rpl
('{#}',$opt::seqreplace
); }
2375 if(defined $opt::slotreplace
) { rpl
('{%}',$opt::slotreplace
); }
2376 if(defined $opt::basenameextensionreplace
) {
2377 rpl
('{/.}',$opt::basenameextensionreplace
);
2380 # Create $Global::rpl entries for --rpl options
2381 # E.g: "{..} s:\.[^.]+$:;s:\.[^.]+$:;"
2382 my ($shorthand,$long) = split/\s/,$_,2;
2383 $Global::rpl
{$shorthand} = $long;
2387 sub parse_semaphore
() {
2388 # Semaphore defaults
2389 # Must be done before computing number of processes and max_line_length
2390 # because when running as a semaphore GNU Parallel does not read args
2393 # $Global::semaphore
2394 # $opt::semaphoretimeout
2395 # $Semaphore::timeout
2396 # $opt::semaphorename
2404 # @Global::unget_argv
2405 # $Global::default_simultaneous_sshlogins
2407 # $Global::interactive
2408 $Global::semaphore
||= ($0 =~ m
:(^|/)sem
$:); # called as 'sem'
2409 if(defined $opt::semaphore
) { $Global::semaphore
= 1; }
2410 if(defined $opt::semaphoretimeout
) { $Global::semaphore
= 1; }
2411 if(defined $opt::semaphorename
) { $Global::semaphore
= 1; }
2412 if(defined $opt::fg
and not $opt::tmux
and not $opt::tmuxpane
) {
2413 $Global::semaphore
= 1;
2415 if(defined $opt::bg
) { $Global::semaphore
= 1; }
2416 if(defined $opt::wait and not $opt::sqlmaster
) {
2417 $Global::semaphore
= 1; @ARGV = "true";
2419 if($Global::semaphore
) {
2421 # A semaphore does not take input from neither stdin nor file
2422 ::error
("A semaphore does not take input from neither stdin nor a file\n");
2423 ::wait_and_exit
(255);
2425 @opt::a
= ("/dev/null");
2426 # Append a dummy empty argument
2427 # \0 => nothing (not the empty string)
2428 push(@Global::unget_argv
, [Arg
->new("\0noarg")]);
2429 $Semaphore::timeout
= $opt::semaphoretimeout
|| 0;
2430 if(defined $opt::semaphorename
) {
2431 $Semaphore::name
= $opt::semaphorename
;
2434 $Semaphore::name
= `tty`;
2435 chomp $Semaphore::name
;
2437 $Semaphore::fg
= $opt::fg
;
2438 $Semaphore::wait = $opt::wait;
2439 $Global::default_simultaneous_sshlogins
= 1;
2440 if(not defined $opt::jobs
) {
2443 if($Global::interactive
and $opt::bg
) {
2444 ::error
("Jobs running in the ".
2445 "background cannot be interactive.");
2446 ::wait_and_exit
(255);
2452 # Record current %ENV-keys in $PARALLEL_HOME/ignored_vars
2454 my $ignore_filename = $Global::config_dir
. "/ignored_vars";
2455 if(open(my $vars_fh, ">", $ignore_filename)) {
2456 print $vars_fh map { $_,"\n" } keys %ENV;
2458 ::error
("Cannot write to $ignore_filename.");
2459 ::wait_and_exit
(255);
2464 # Open joblog as specified by --joblog
2467 # $opt::resume_failed
2470 # $Global::job_already_run
2473 if(($opt::resume
or $opt::resume_failed
)
2475 not ($opt::joblog
or $opt::results
)) {
2476 ::error
("--resume and --resume-failed require --joblog or --results.");
2477 ::wait_and_exit
(255);
2479 if(defined $opt::joblog
and $opt::joblog
=~ s/^\+//) {
2480 # --joblog +filename = append to filename
2487 not $opt::sqlworker
)) {
2488 # Do not log if --sqlworker
2489 if($opt::resume
|| $opt::resume_failed
|| $opt::retry_failed
) {
2490 if(open(my $joblog_fh, "<", $opt::joblog
)) {
2492 # Override $/ with \n because -d might be set
2494 # If there is a header: Open as append later
2495 $append = <$joblog_fh>;
2497 if($opt::retry_failed
) {
2498 # Make a regexp that only matches commands with exit+signal=0
2499 # 4 host 1360490623.067 3.445 1023 1222 0 0 command
2500 $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t';
2502 while(<$joblog_fh>) {
2503 if(/$joblog_regexp/o) {
2504 # This is 30% faster than set_job_already_run($1);
2505 vec($Global::job_already_run
,($1||0),1) = 1;
2506 $Global::total_completed
++;
2507 $group[$1-1] = "true";
2508 } elsif(/(\d+)\s+\S+(\s+[-0-9.]+){6}\s+(.*)$/) {
2509 # Grab out the command
2513 ::error
("Format of '$opt::joblog' is wrong: $_");
2514 ::wait_and_exit
(255);
2518 my ($outfh,$name) = ::tmpfile
(SUFFIX
=> ".arg");
2520 # Put args into argfile
2521 if(grep /\0/, @group) {
2522 # force --null to deal with \n in commandlines
2523 ::warning
("Command lines contain newline. Forcing --null.");
2527 # Replace \0 with '\n' as used in print_joblog()
2528 print $outfh map { s/\0/\n/g; $_,$/ } map { $_ } @group;
2530 exit_if_disk_full
();
2531 # Set filehandle to -a
2534 # Remove $command (so -a is run)
2537 if($opt::resume
|| $opt::resume_failed
) {
2538 if($opt::resume_failed
) {
2539 # Make a regexp that only matches commands with exit+signal=0
2540 # 4 host 1360490623.067 3.445 1023 1222 0 0 command
2541 $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t';
2543 # Just match the job number
2544 $joblog_regexp='^(\d+)';
2546 while(<$joblog_fh>) {
2547 if(/$joblog_regexp/o) {
2548 # This is 30% faster than set_job_already_run($1);
2549 vec($Global::job_already_run
,($1||0),1) = 1;
2550 $Global::total_completed
++;
2551 } elsif(not /\d+\s+[^\s]+\s+([-0-9.]+\s+){6}/) {
2552 ::error
("Format of '$opt::joblog' is wrong: $_");
2553 ::wait_and_exit
(255);
2559 # $opt::null may be set if the commands contain \n
2560 if($opt::null
) { $/ = "\0"; }
2563 # Do not write to joblog in a dry-run
2564 if(not open($Global::joblog
, ">", "/dev/null")) {
2565 ::error
("Cannot write to --joblog $opt::joblog.");
2566 ::wait_and_exit
(255);
2570 if(not open($Global::joblog
, ">>", $opt::joblog
)) {
2571 ::error
("Cannot append to --joblog $opt::joblog.");
2572 ::wait_and_exit
(255);
2575 if($opt::joblog
eq "-") {
2576 # Use STDOUT as joblog
2577 $Global::joblog
= $Global::fd
{1};
2578 } elsif(not open($Global::joblog
, ">", $opt::joblog
)) {
2579 # Overwrite the joblog
2580 ::error
("Cannot write to --joblog $opt::joblog.");
2581 ::wait_and_exit
(255);
2583 print $Global::joblog
2584 join("\t", "Seq", "Host", "Starttime", "JobRuntime",
2585 "Send", "Receive", "Exitval", "Signal", "Command"
2594 if($opt::results
eq "-.csv"
2596 $opt::results
eq "-.tsv") {
2597 # Output as CSV/TSV on stdout
2598 open $Global::csv_fh
, ">&", "STDOUT" or
2599 ::die_bug
("Can't dup STDOUT in csv: $!");
2600 # Do not print any other output to STDOUT
2601 # by forcing all other output to /dev/null
2602 open my $fd, ">", "/dev/null" or
2603 ::die_bug
("Can't >/dev/null in csv: $!");
2604 $Global::fd
{1} = $fd;
2605 $Global::fd
{2} = $fd;
2606 } elsif($Global::csvsep
) {
2607 if(not open($Global::csv_fh
,">",$opt::results
)) {
2608 ::error
("Cannot open results file `$opt::results': ".
2616 sub find_compression_program
() {
2617 # Find a fast compression program
2619 # $compress_program = compress program with options
2620 # $decompress_program = decompress program with options
2622 # Search for these. Sorted by speed on 128 core
2624 # seq 120000000|shuf > 1gb &
2626 # apt install make g++ htop
2627 # wget -O - pi.dk/3 | bash
2628 # apt install zstd clzip liblz4-tool lzop pigz pxz gzip plzip pbzip2 lzma xz-utils lzip bzip2 lbzip2 lrzip pixz
2629 # git clone https://github.com/facebook/zstd.git
2630 # (cd zstd/contrib/pzstd; make -j; cp pzstd /usr/local/bin)
2631 # echo 'lrzip -L $((-$1))' >/usr/local/bin/lrz
2632 # chmod +x /usr/local/bin/lrz
2634 # onethread="zstd clzip lz4 lzop gzip lzma xz bzip2"
2635 # multithread="pzstd pigz pxz plzip pbzip2 lzip lbzip2 lrz pixz"
2636 # parallel --shuf -j1 --joblog jl-m --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $multithread
2637 # parallel --shuf -j50% --delay 1 --joblog jl-s --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $onethread
2641 # 2-cores: pzstd zstd lz4 lzop pigz gzip lbzip2 pbzip2 lrz bzip2 lzma pxz plzip xz lzip clzip
2643 # 8-cores: pzstd lz4 zstd pigz lzop lbzip2 pbzip2 gzip lzip lrz plzip pxz bzip2 lzma xz clzip
2644 # 16-cores: pzstd lz4 pigz lzop lbzip2 pbzip2 plzip lzip lrz pxz gzip lzma xz bzip2
2645 # 32-cores: pzstd lbzip2 pbzip2 zstd pigz lz4 lzop plzip lzip lrz gzip pxz lzma bzip2 xz clzip
2646 # 64-cores: pzstd lbzip2 pbzip2 pigz zstd pixz lz4 plzip lzop lzip lrz gzip pxz lzma bzip2 xz clzip
2647 # 128-core: pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip lrz pxz bzip2 lzma xz clzip
2649 my @prg = qw(pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip
2650 lrz pxz bzip2 lzma xz clzip);
2653 return ("$p -c -1","$p -dc");
2657 return ("cat","cat");
2660 sub read_options
() {
2661 # Read options from command line, profile and $PARALLEL
2663 # $opt::shebang_wrap
2671 # @ARGV_no_opt = @ARGV without --options
2673 # This must be done first as this may exec myself
2674 if(defined $ARGV[0] and ($ARGV[0] =~ /^--shebang/ or
2675 $ARGV[0] =~ /^--shebang-?wrap/ or
2676 $ARGV[0] =~ /^--hashbang/)) {
2677 # Program is called from #! line in script
2678 # remove --shebang-wrap if it is set
2679 $opt::shebang_wrap
= ($ARGV[0] =~ s/^--shebang-?wrap *//);
2680 # remove --shebang if it is set
2681 $opt::shebang
= ($ARGV[0] =~ s/^--shebang *//);
2682 # remove --hashbang if it is set
2683 $opt::shebang
.= ($ARGV[0] =~ s/^--hashbang *//);
2685 my $argfile = Q
(pop @ARGV);
2686 # exec myself to split $ARGV[0] into separate fields
2687 exec "$0 --skip-first-line -a $argfile @ARGV";
2689 if($opt::shebang_wrap
) {
2692 if ($^O
eq 'freebsd') {
2693 # FreeBSD's #! puts different values in @ARGV than Linux' does.
2694 my @nooptions = @ARGV;
2695 get_options_from_array
(\
@nooptions);
2696 while($#ARGV > $#nooptions) {
2697 push @options, shift @ARGV;
2699 while(@ARGV and $ARGV[0] ne ":::") {
2700 push @parser, shift @ARGV;
2702 if(@ARGV and $ARGV[0] eq ":::") {
2706 @options = shift @ARGV;
2708 my $script = Q
(shift @ARGV);
2709 # exec myself to split $ARGV[0] into separate fields
2710 exec "$0 --internal-pipe-means-argfiles @options @parser $script ".
2714 if($ARGV[0] =~ / --shebang(-?wrap)? /) {
2715 ::warning
("--shebang and --shebang-wrap must be the first argument.\n");
2718 Getopt
::Long
::Configure
("bundling","require_order");
2719 my @ARGV_copy = @ARGV;
2720 my @ARGV_orig = @ARGV;
2721 # Check if there is a --profile to set @opt::profile
2722 get_options_from_array
(\
@ARGV_copy,"profile|J=s","plain") || die_usage
();
2723 my @ARGV_profile = ();
2725 if(not $opt::plain
) {
2726 # Add options from $PARALLEL_HOME/config and other profiles
2727 my @config_profiles = (
2728 "/etc/parallel/config",
2729 (map { "$_/config" } @Global::config_dirs
),
2730 $ENV{'HOME'}."/.parallelrc");
2731 my @profiles = @config_profiles;
2733 # --profile overrides default profiles
2735 for my $profile (@opt::profile
) {
2736 if($profile =~ m
:^\
./|^/:) {
2737 # Look for ./profile in .
2738 # Look for /profile in /
2739 push @profiles, grep { -r
$_ } $profile;
2741 # Look for the $profile in @Global::config_dirs
2742 push @profiles, grep { -r
$_ }
2743 map { "$_/$profile" } @Global::config_dirs
;
2747 for my $profile (@profiles) {
2749 ::debug
("init","Read $profile\n");
2751 open (my $in_fh, "<", $profile) ||
2752 ::die_bug
("read-profile: $profile");
2756 push @ARGV_profile, shell_words
($_);
2760 if(grep /^$profile$/, @config_profiles) {
2761 # config file is not required to exist
2763 ::error
("$profile not readable.");
2768 # Add options from shell variable $PARALLEL
2769 if($ENV{'PARALLEL'}) {
2770 push @ARGV_env, shell_words
($ENV{'PARALLEL'});
2772 # Add options from env_parallel.csh via $PARALLEL_CSH
2773 if($ENV{'PARALLEL_CSH'}) {
2774 push @ARGV_env, shell_words
($ENV{'PARALLEL_CSH'});
2777 Getopt
::Long
::Configure
("bundling","require_order");
2778 get_options_from_array
(\
@ARGV_profile) || die_usage
();
2779 get_options_from_array
(\
@ARGV_env) || die_usage
();
2780 get_options_from_array
(\
@ARGV) || die_usage
();
2781 # What were the options given on the command line?
2782 # Used to start --sqlworker
2783 my $ai = arrayindex
(\
@ARGV_orig, \
@ARGV);
2784 @Global::options_in_argv
= @ARGV_orig[0..$ai-1];
2785 # Prepend non-options to @ARGV (such as commands like 'nice')
2786 unshift @ARGV, @ARGV_profile, @ARGV_env;
2791 # Similar to Perl's index function, but for arrays
2793 # $arr_ref1 = ref to @array1 to search in
2794 # $arr_ref2 = ref to @array2 to search for
2796 # $pos = position of @array1 in @array2, -1 if not found
2797 my ($arr_ref1,$arr_ref2) = @_;
2798 my $array1_as_string = join "", map { "\0".$_ } @
$arr_ref1;
2799 my $array2_as_string = join "", map { "\0".$_ } @
$arr_ref2;
2800 my $i = index($array1_as_string,$array2_as_string,0);
2801 if($i == -1) { return -1 }
2802 my @before = split /\0/, substr($array1_as_string,0,$i);
2806 sub read_args_from_command_line
() {
2807 # Arguments given on the command line after:
2808 # ::: ($Global::arg_sep)
2809 # :::: ($Global::arg_file_sep)
2810 # :::+ ($Global::arg_sep with --link)
2811 # ::::+ ($Global::arg_file_sep with --link)
2812 # Removes the arguments from @ARGV and:
2813 # - puts filenames into -a
2814 # - puts arguments into files and add the files to -a
2815 # - adds --linkinputsource with 0/1 for each -a depending on :::+/::::+
2817 # @::ARGV = command option ::: arg arg arg :::: argfiles
2820 # $Global::arg_file_sep
2821 # $opt::internal_pipe_means_argfiles
2825 # @argv_no_argsep = @::ARGV without ::: and :::: and following args
2827 for(my $arg = shift @ARGV; @ARGV; $arg = shift @ARGV) {
2828 if($arg eq $Global::arg_sep
2830 $arg eq $Global::arg_sep
."+"
2832 $arg eq $Global::arg_file_sep
2834 $arg eq $Global::arg_file_sep
."+") {
2835 my $group_sep = $arg; # This group of arguments is args or argfiles
2837 while(defined ($arg = shift @ARGV)) {
2838 if($arg eq $Global::arg_sep
2840 $arg eq $Global::arg_sep
."+"
2842 $arg eq $Global::arg_file_sep
2844 $arg eq $Global::arg_file_sep
."+") {
2845 # exit while loop if finding new separator
2848 # If not hitting ::: :::+ :::: or ::::+
2849 # Append it to the group
2853 my $is_linked = ($group_sep =~ /\+$/) ?
1 : 0;
2854 my $is_file = ($group_sep eq $Global::arg_file_sep
2856 $group_sep eq $Global::arg_file_sep
."+");
2859 push @opt::linkinputsource
, map { $is_linked } @group;
2862 push @opt::linkinputsource
, $is_linked;
2865 or ($opt::internal_pipe_means_argfiles
and $opt::pipe)
2867 # Group of file names on the command line.
2868 # Append args into -a
2869 push @opt::a
, @group;
2871 # Group of arguments on the command line.
2872 # Put them into a file.
2874 my ($outfh,$name) = ::tmpfile
(SUFFIX
=> ".arg");
2876 # Put args into argfile
2877 print $outfh map { $_,$/ } @group;
2879 exit_if_disk_full
();
2880 # Append filehandle to -a
2881 push @opt::a
, $outfh;
2884 # $arg is ::: :::+ :::: or ::::+
2885 # so there is another group
2888 # $arg is undef -> @ARGV empty
2892 push @new_argv, $arg;
2894 # Output: @ARGV = command to run with options
2900 unlink keys %Global::unlink;
2901 map { rmdir $_ } keys %Global::unlink;
2902 if(@opt::basefile
and $opt::cleanup
) { cleanup_basefile
(); }
2903 for(keys %Global::sshmaster
) {
2904 # If 'ssh -M's are running: kill them
2910 sub __QUOTING_ARGUMENTS_FOR_SHELL__
() {}
2912 sub shell_quote
(@
) {
2914 # @strings = strings to be quoted
2916 # @shell_quoted_strings = string quoted as needed by the shell
2917 return wantarray ?
(map { Q
($_) } @_) : (join" ",map { Q
($_) } @_);
2920 sub shell_quote_scalar_rc
($) {
2921 # Quote for the rc-shell
2926 ($a =~ s/[\n\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]+/'$&'/go)) {
2927 # A string was replaced
2928 # No need to test for "" or \0
2931 } elsif($a eq "\0") {
2938 sub shell_quote_scalar_csh
($) {
2942 # $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g;
2943 # This is 1% faster than the above
2944 if(($a =~ s/[\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]/\\$&/go)
2946 # quote newline in csh as \\\n
2947 ($a =~ s/[\n]/"\\\n"/go)) {
2948 # A string was replaced
2949 # No need to test for "" or \0
2952 } elsif($a eq "\0") {
2959 sub shell_quote_scalar_default
($) {
2960 # Quote for other shells (Bourne compatibles)
2962 # $string = string to be quoted
2964 # $shell_quoted = string quoted as needed by the shell
2966 if($s =~ /[^-_.+a-z0-9\/]/i
) {
2967 $s =~ s/'/'"'"'/g; # "-quote single quotes
2968 $s = "'$s'"; # '-quote entire string
2969 $s =~ s/^''//; # Remove unneeded '' at ends
2970 $s =~ s/''$//; # (faster than s/^''|''$//g)
2972 } elsif ($s eq "") {
2980 sub shell_quote_scalar
($) {
2981 # Quote the string so the shell will not expand any special chars
2983 # $string = string to be quoted
2985 # $shell_quoted = string quoted as needed by the shell
2987 # Speed optimization: Choose the correct shell_quote_scalar_*
2988 # and call that directly from now on
2989 no warnings
'redefine';
2990 if($Global::cshell
) {
2992 *shell_quote_scalar
= \
&shell_quote_scalar_csh
;
2993 } elsif($Global::shell
=~ m
:(^|/)rc
$:) {
2995 *shell_quote_scalar
= \
&shell_quote_scalar_rc
;
2998 *shell_quote_scalar
= \
&shell_quote_scalar_default
;
3000 # The sub is now redefined. Call it
3001 return shell_quote_scalar
($_[0]);
3005 # Q alias for ::shell_quote_scalar
3006 my $ret = shell_quote_scalar
($_[0]);
3007 no warnings
'redefine';
3008 *Q
= \
&::shell_quote_scalar
;
3012 sub shell_quote_file
($) {
3013 # Quote the string so shell will not expand any special chars
3014 # and prepend ./ if needed
3016 # $filename = filename to be shell quoted
3018 # $quoted_filename = filename quoted with \ and ./ if needed
3021 if($a =~ m
:^/: or $a =~ m:^\./:) {
3022 # /abs/path or ./rel/path => skip
3024 # rel/path => ./rel/path
3031 sub shell_words
(@
) {
3033 # $string = shell line
3035 # @shell_words = $string split into words as shell would do
3036 $Global::use{"Text::ParseWords"} ||= eval "use Text::ParseWords; 1;";
3037 return Text
::ParseWords
::shellwords
(@_);
3040 sub perl_quote_scalar
($) {
3041 # Quote the string so perl's eval will not expand any special chars
3043 # $string = string to be quoted
3045 # $perl_quoted = string quoted with \ as needed by perl's eval
3048 $a =~ s/[\\\"\$\@]/\\$&/go;
3053 # -w complains about prototype
3055 # pQ alias for ::perl_quote_scalar
3056 my $ret = perl_quote_scalar
($_[0]);
3057 *pQ
= \
&::perl_quote_scalar
;
3061 sub unquote_printf
() {
3062 # Convert \t \n \r \000 \0
3064 # $string = string with \t \n \r \num \0
3066 # $replaced = string with TAB NEWLINE CR <ascii-num> NUL
3071 s/\\(\d\d\d)/eval 'sprintf "\\'.$1.'"'/ge;
3072 s/\\(\d)/eval 'sprintf "\\'.$1.'"'/ge;
3077 sub __FILEHANDLES__
() {}
3080 sub save_stdin_stdout_stderr
() {
3081 # Remember the original STDIN, STDOUT and STDERR
3082 # and file descriptors opened by the shell (e.g. 3>/tmp/foo)
3085 # $Global::original_stderr
3086 # $Global::original_stdin
3089 # TODO Disabled until we have an open3 that will take n filehandles
3090 # for my $fdno (1..61) {
3091 # # /dev/fd/62 and above are used by bash for <(cmd)
3092 # # Find file descriptors that are already opened (by the shell)
3093 # Only focus on stdout+stderr for now
3094 for my $fdno (1..2) {
3096 # 2-argument-open is used to be compatible with old perl 5.8.0
3097 # bug #43570: Perl 5.8.0 creates 61 files
3098 if(open($fh,">&=$fdno")) {
3099 $Global::fd
{$fdno}=$fh;
3102 open $Global::original_stderr
, ">&", "STDERR" or
3103 ::die_bug
("Can't dup STDERR: $!");
3104 open $Global::status_fd
, ">&", "STDERR" or
3105 ::die_bug
("Can't dup STDERR: $!");
3106 open $Global::original_stdin
, "<&", "STDIN" or
3107 ::die_bug
("Can't dup STDIN: $!");
3110 sub enough_file_handles
() {
3111 # Check that we have enough filehandles available for starting
3117 # 1 if ungrouped (thus not needing extra filehandles)
3118 # 0 if too few filehandles
3119 # 1 if enough filehandles
3120 if(not $opt::ungroup
) {
3122 my $enough_filehandles = 1;
3123 # perl uses 7 filehandles for something?
3124 # open3 uses 2 extra filehandles temporarily
3125 # We need a filehandle for each redirected file descriptor
3126 # (normally just STDOUT and STDERR)
3127 for my $i (1..(7+2+keys %Global::fd
)) {
3128 $enough_filehandles &&= open($fh{$i}, "<", "/dev/null");
3130 for (values %fh) { close $_; }
3131 return $enough_filehandles;
3133 # Ungrouped does not need extra file handles
3138 sub open_or_exit
($) {
3139 # Open a file name or exit if the file cannot be opened
3141 # $file = filehandle or filename to open
3143 # $Global::original_stdin
3145 # $fh = file handle to read-opened file
3148 return ($Global::original_stdin
|| *STDIN
);
3150 if(ref $file eq "GLOB") {
3151 # This is an open filehandle
3155 if(not open($fh, "<", $file)) {
3156 ::error
("Cannot open input file `$file': No such file or directory.");
3162 sub set_fh_blocking
($) {
3163 # Set filehandle as blocking
3165 # $fh = filehandle to be blocking
3169 $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
3171 # Get the current flags on the filehandle
3172 fcntl($fh, &F_GETFL, $flags) || die $!;
3173 # Remove non-blocking from the flags
3174 $flags &= ~&O_NONBLOCK;
3175 # Set the flags on the filehandle
3176 fcntl($fh, &F_SETFL, $flags) || die $!;
3179 sub set_fh_non_blocking($) {
3180 # Set filehandle as non-blocking
3182 # $fh = filehandle to be blocking
3186 $Global::use{"Fcntl
"} ||= eval "use Fcntl
qw(:DEFAULT :flock); 1;";
3188 # Get the current flags on the filehandle
3189 fcntl($fh, &F_GETFL, $flags) || die $!;
3190 # Add non-blocking to the flags
3191 $flags |= &O_NONBLOCK;
3192 # Set the flags on the filehandle
3193 fcntl($fh, &F_SETFL, $flags) || die $!;
3197 sub __RUNNING_THE_JOBS_AND_PRINTING_PROGRESS__() {}
3200 # Variable structure:
3202 # $Global::running{$pid} = Pointer to Job-object
3203 # @Global::virgin_jobs = Pointer to Job-object that have received no input
3204 # $Global::host{$sshlogin} = Pointer to SSHLogin-object
3205 # $Global::total_running = total number of running jobs
3206 # $Global::total_started = total jobs started
3207 # $Global::max_procs_file = filename if --jobs is given a filename
3208 # $Global::JobQueue = JobQueue object for the queue of jobs
3209 # $Global::timeoutq = queue of times where jobs timeout
3210 # $Global::newest_job = Job object of the most recent job started
3211 # $Global::newest_starttime = timestamp of $Global::newest_job
3213 # $Global::minimal_command_line_length = minimum length supported by all sshlogins
3214 # $Global::start_no_new_jobs = should more jobs be started?
3215 # $Global::original_stderr = file handle for STDERR when the program started
3216 # $Global::total_started = total number of jobs started
3217 # $Global::joblog = filehandle of joblog
3218 # $Global::debug = Is debugging on?
3219 # $Global::exitstatus = status code of GNU Parallel
3220 # $Global::quoting = quote the command to run
3222 sub init_run_jobs() {
3223 # Set Global variables and progress signal handlers
3224 # Do the copying of basefiles
3226 $Global::total_running = 0;
3227 $Global::total_started = 0;
3228 $SIG{USR1} = \&list_running_jobs;
3229 $SIG{USR2} = \&toggle_progress;
3230 if(@opt::basefile) { setup_basefile(); }
3236 my $max_procs_file_last_mod;
3238 sub changed_procs_file {
3239 # If --jobs is a file and it is modfied:
3240 # Force recomputing of max_jobs_running for each $sshlogin
3242 # $Global::max_procs_file
3245 if($Global::max_procs_file) {
3247 my $mtime = (stat($Global::max_procs_file))[9];
3248 $max_procs_file_last_mod ||= 0;
3249 if($mtime > $max_procs_file_last_mod) {
3250 # file changed: Force re-computing max_jobs_running
3251 $max_procs_file_last_mod = $mtime;
3252 for my $sshlogin (values %Global::host) {
3253 $sshlogin->set_max_jobs_running(undef);
3259 sub changed_sshloginfile {
3260 # If --slf is changed:
3265 # @opt::sshloginfile
3268 # $opt::filter_hosts
3270 if(@opt::sshloginfile) {
3271 # Is --sshloginfile changed?
3272 for my $slf (@opt::sshloginfile) {
3273 my $actual_file = expand_slf_shorthand($slf);
3274 my $mtime = (stat($actual_file))[9];
3275 $last_mtime{$actual_file} ||= $mtime;
3276 if($mtime - $last_mtime{$actual_file} > 1) {
3277 ::debug("run
","--sshloginfile
$actual_file changed
. reload
\n");
3278 $last_mtime{$actual_file} = $mtime;
3281 @Global::sshlogin = ();
3282 for (values %Global::host) {
3283 # Don't start new jobs on any host
3284 # except the ones added back later
3285 $_->set_max_jobs_running(0);
3287 # This will set max_jobs_running on the SSHlogins
3288 read_sshloginfile($actual_file);
3290 $opt::filter_hosts and filter_hosts();
3297 sub start_more_jobs {
3298 # Run start_another_job() but only if:
3299 # * not $Global::start_no_new_jobs set
3300 # * not JobQueue is empty
3301 # * not load on server is too high
3302 # * not server swapping
3303 # * not too short time since last remote login
3306 # $Global::start_no_new_jobs
3312 # $Global::newest_starttime
3314 # $jobs_started = number of jobs started
3315 my $jobs_started = 0;
3316 if($Global::start_no_new_jobs) {
3317 return $jobs_started;
3319 if(time - ($last_time||0) > 1) {
3320 # At most do this every second
3322 changed_procs_file();
3323 changed_sshloginfile();
3325 # This will start 1 job on each --sshlogin (if possible)
3326 # thus distribute the jobs on the --sshlogins round robin
3327 for my $sshlogin (values %Global::host) {
3328 if($Global::JobQueue->empty() and not $opt::pipe) {
3329 # No more jobs in the queue
3332 debug("run
", "Running jobs before on
", $sshlogin->string(), ": ",
3333 $sshlogin->jobs_running(), "\n");
3334 if ($sshlogin->jobs_running() < $sshlogin->max_jobs_running()) {
3337 $opt::delay - 0.008 > ::now() - $Global::newest_starttime) {
3338 # It has been too short since last start
3341 if($opt::load and $sshlogin->loadavg_too_high()) {
3342 # The load is too high or unknown
3345 if($opt::noswap and $sshlogin->swapping()) {
3346 # The server is swapping
3349 if($opt::limit and $sshlogin->limit()) {
3353 if(($opt::memfree or $opt::memsuspend)
3355 $sshlogin->memfree() < $Global::memlimit) {
3356 # The server has not enough mem free
3357 ::debug("mem
", "Not starting job
: not enough mem
\n");
3360 if($sshlogin->too_fast_remote_login()) {
3361 # It has been too short since
3364 debug("run
", $sshlogin->string(),
3365 " has
", $sshlogin->jobs_running(),
3366 " out of
", $sshlogin->max_jobs_running(),
3367 " jobs running
. Start another
.\n");
3368 if(start_another_job($sshlogin) == 0) {
3369 # No more jobs to start on this $sshlogin
3370 debug("run
","No jobs started on
",
3371 $sshlogin->string(), "\n");
3374 $sshlogin->inc_jobs_running();
3375 $sshlogin->set_last_login_at(::now());
3378 debug("run
","Running jobs after on
", $sshlogin->string(), ": ",
3379 $sshlogin->jobs_running(), " of
",
3380 $sshlogin->max_jobs_running(), "\n");
3383 return $jobs_started;
3388 my $no_more_file_handles_warned;
3390 sub start_another_job() {
3391 # If there are enough filehandles
3392 # and JobQueue not empty
3393 # and not $job is in joblog
3394 # Then grab a job from Global::JobQueue,
3395 # start it at sshlogin
3396 # mark it as virgin_job
3398 # $sshlogin = the SSHLogin to start the job on
3404 # @Global::virgin_jobs
3406 # 1 if another jobs was started
3408 my $sshlogin = shift;
3409 # Do we have enough file handles to start another job?
3410 if(enough_file_handles()) {
3411 if($Global::JobQueue->empty() and not $opt::pipe) {
3412 # No more commands to run
3413 debug("start
", "Not starting
: JobQueue empty
\n");
3417 # Skip jobs already in job log
3418 # Skip jobs already in results
3420 $job = get_job_with_sshlogin($sshlogin);
3421 if(not defined $job) {
3422 # No command available for that sshlogin
3423 debug("start
", "Not starting
: no jobs available
for ",
3424 $sshlogin->string(), "\n");
3427 if($job->is_already_in_joblog()) {
3430 } while ($job->is_already_in_joblog()
3432 ($opt::results and $opt::resume and $job->is_already_in_results()));
3433 debug("start
", "Command to run on
'", $job->sshlogin()->string(), "': '",
3434 $job->replaced(),"'\n");
3437 if($job->virgin()) {
3438 push(@Global::virgin_jobs,$job);
3440 # Block already set: This is a retry
3442 ::debug("pipe","\n\nWriting
",length ${$job->block_ref()},
3443 " to
", $job->seq(),"\n");
3444 close $job->fh(0,"w
");
3446 $job->write($job->block_ref());
3447 close $job->fh(0,"w
");
3452 debug("start
", "Started as seq
", $job->seq(),
3453 " pid
:", $job->pid(), "\n");
3456 # Not enough processes to run the job.
3457 # Put it back on the queue.
3458 $Global::JobQueue->unget($job);
3459 # Count down the number of jobs to run for this SSHLogin.
3460 my $max = $sshlogin->max_jobs_running();
3461 if($max > 1) { $max--; } else {
3463 for my $record (@{$job->{'commandline'}->{'arg_list'}}) {
3464 push @arg, map { $_->orig() } @$record;
3466 ::error("No more processes
: cannot run a single job
. Something is wrong at
@arg.");
3467 ::wait_and_exit(255);
3469 $sshlogin->set_max_jobs_running($max);
3470 # Sleep up to 300 ms to give other processes time to die
3471 ::usleep(rand()*300);
3472 ::warning("No more processes
: ".
3473 "Decreasing number of running jobs to
$max.",
3474 "Raising ulimit
-u
or /etc/security
/limits
.conf may help
.");
3479 # No more file handles
3480 $no_more_file_handles_warned++ or
3481 ::warning("No more file handles
. ",
3482 "Raising ulimit
-n
or /etc/security
/limits
.conf may help
.");
3483 debug("start
", "No more file handles
. ");
3489 sub init_progress() {
3493 # list of computers for progress output
3498 my %progress = progress();
3499 return ("\nComputers
/ CPU cores / Max jobs to run
\n",
3500 $progress{'workerlist'});
3503 sub drain_job_queue(@) {
3506 # $Global::total_running
3507 # $Global::max_jobs_running
3511 # $Global::start_no_new_jobs
3514 if($opt::progress) {
3515 ::status_no_nl(init_progress());
3517 my $last_header = "";
3520 while($Global::total_running > 0) {
3521 debug("init
",$Global::total_running, "==", scalar
3522 keys %Global::running," slots
: ", $Global::max_jobs_running);
3524 # When using --pipe sometimes file handles are not
3526 for my $job (values %Global::running) {
3527 close $job->fh(0,"w
");
3530 if($opt::progress) {
3531 my %progress = progress();
3532 if($last_header ne $progress{'header'}) {
3533 ::status("", $progress{'header'});
3534 $last_header = $progress{'header'};
3536 ::status_no_nl("\r",$progress{'status'});
3538 if($Global::total_running < $Global::max_jobs_running
3539 and not $Global::JobQueue->empty()) {
3540 # These jobs may not be started because of loadavg
3541 # or too little time between each ssh login.
3542 if(start_more_jobs() > 0) {
3543 # Exponential back-on if jobs were started
3544 $sleep = $sleep/2+0.001;
3547 # Exponential back-off sleeping
3548 $sleep = ::reap_usleep($sleep);
3550 if(not $Global::JobQueue->empty()) {
3551 # These jobs may not be started:
3552 # * because there the --filter-hosts has removed all
3553 if(not %Global::host) {
3554 ::error("There are
no hosts left to run on
.");
3555 ::wait_and_exit(255);
3557 # * because of loadavg
3558 # * because of too little time between each ssh login.
3559 $sleep = ::reap_usleep($sleep);
3561 if($Global::max_jobs_running == 0) {
3562 ::warning("There are
no job slots available
. Increase
--jobs
.");
3565 while($opt::sqlmaster and not $Global::sql->finished()) {
3567 $sleep = ::reap_usleep($sleep);
3569 if($Global::start_sqlworker) {
3570 # Start an SQL worker as we are now sure there is work to do
3571 $Global::start_sqlworker = 0;
3572 if(my $pid = fork()) {
3573 $Global::unkilled_sqlworker = $pid;
3575 # Replace --sql/--sqlandworker with --sqlworker
3576 my @ARGV = map { s/^--sql(andworker)?$/--sqlworker/; $_ } @Global::options_in_argv;
3577 # exec the --sqlworker
3578 exec($0,@ARGV,@command);
3582 } while ($Global::total_running > 0
3584 not $Global::start_no_new_jobs and not $Global::JobQueue->empty()
3586 $opt::sqlmaster and not $Global::sql->finished());
3587 if($opt::progress) {
3588 my %progress = progress();
3589 ::status("\r".$progress{'status'});
3593 sub toggle_progress() {
3594 # Turn on/off progress view
3598 $opt::progress = not $opt::progress;
3599 if($opt::progress) {
3600 ::status_no_nl(init_progress());
3609 # $Global::total_started
3611 # $workerlist = list of workers
3612 # $header = that will fit on the screen
3613 # $status = message that will fit on the screen
3615 return ("workerlist
" => "", "header
" => "", "status
" => bar());
3618 my ($status,$header)=("","");
3620 my($total, $completed, $left, $pctcomplete, $avgtime, $this_eta) =
3622 $eta = sprintf("ETA
: %ds Left
: %d AVG
: %.2fs
",
3623 $this_eta, $left, $avgtime);
3625 my $termcols = terminal_columns();
3626 my @workers = sort keys %Global::host;
3627 my %sshlogin = map { $_ eq ":" ? ($_ => "local") : ($_ => $_) } @workers;
3629 my %workerno = map { ($_=>$workerno++) } @workers;
3630 my $workerlist = "";
3631 for my $w (@workers) {
3633 $workerno{$w}.":".$sshlogin{$w} ." / ".
3634 ($Global::host{$w}->ncpus() || "-")." / ".
3635 $Global::host{$w}->max_jobs_running()."\n";
3637 $status = "x
"x($termcols+1);
3638 # Select an output format that will fit on a single line
3639 if(length $status > $termcols) {
3640 # sshlogin1:XX/XX/XX%/XX.Xs sshlogin2:XX/XX/XX%/XX.Xs sshlogin3:XX/XX/XX%/XX.Xs
3641 $header = "Computer
:jobs running
/jobs completed/%of started jobs
/Average seconds to complete
";
3645 if($Global::total_started) {
3646 my $completed = ($Global::host{$_}->jobs_completed()||0);
3647 my $running = $Global::host{$_}->jobs_running();
3648 my $time = $completed ? (time-$^T)/($completed) : "0";
3649 sprintf("%s:%d/%d/%d%%/%.1fs
",
3650 $sshlogin{$_}, $running, $completed,
3651 ($running+$completed)*100
3652 / $Global::total_started, $time);
3656 if(length $status > $termcols) {
3657 # 1:XX/XX/XX%/XX.Xs 2:XX/XX/XX%/XX.Xs 3:XX/XX/XX%/XX.Xs 4:XX/XX/XX%/XX.Xs
3658 $header = "Computer
:jobs running
/jobs completed/%of started jobs
";
3662 if($Global::total_started) {
3663 my $completed = ($Global::host{$_}->jobs_completed()||0);
3664 my $running = $Global::host{$_}->jobs_running();
3665 my $time = $completed ? (time-$^T)/($completed) : "0";
3666 sprintf("%s:%d/%d/%d%%/%.1fs
",
3667 $workerno{$_}, $running, $completed,
3668 ($running+$completed)*100
3669 / $Global::total_started, $time);
3673 if(length $status > $termcols) {
3674 # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX/XX%
3675 $header = "Computer
:jobs running
/jobs completed/%of started jobs
";
3679 if($Global::total_started) {
3680 sprintf("%s:%d/%d/%d%%",
3682 $Global::host{$_}->jobs_running(),
3683 ($Global::host{$_}->jobs_completed()||0),
3684 ($Global::host{$_}->jobs_running()+
3685 ($Global::host{$_}->jobs_completed()||0))*100
3686 / $Global::total_started)
3691 if(length $status > $termcols) {
3692 # 1:XX/XX/XX% 2:XX/XX/XX% 3:XX/XX/XX% 4:XX/XX/XX% 5:XX/XX/XX% 6:XX/XX/XX%
3693 $header = "Computer
:jobs running
/jobs completed/%of started jobs
";
3697 if($Global::total_started) {
3698 sprintf("%s:%d/%d/%d%%",
3700 $Global::host{$_}->jobs_running(),
3701 ($Global::host{$_}->jobs_completed()||0),
3702 ($Global::host{$_}->jobs_running()+
3703 ($Global::host{$_}->jobs_completed()||0))*100
3704 / $Global::total_started)
3709 if(length $status > $termcols) {
3710 # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX sshlogin4:XX/XX
3711 $header = "Computer
:jobs running
/jobs completed
";
3714 { sprintf("%s:%d/%d",
3715 $sshlogin{$_}, $Global::host{$_}->jobs_running(),
3716 ($Global::host{$_}->jobs_completed()||0)) }
3719 if(length $status > $termcols) {
3720 # sshlogin1:XX/XX sshlogin2:XX/XX sshlogin3:XX/XX sshlogin4:XX/XX
3721 $header = "Computer
:jobs running
/jobs completed
";
3724 { sprintf("%s:%d/%d",
3725 $sshlogin{$_}, $Global::host{$_}->jobs_running(),
3726 ($Global::host{$_}->jobs_completed()||0)) }
3729 if(length $status > $termcols) {
3730 # 1:XX/XX 2:XX/XX 3:XX/XX 4:XX/XX 5:XX/XX 6:XX/XX
3731 $header = "Computer
:jobs running
/jobs completed
";
3734 { sprintf("%s:%d/%d",
3735 $workerno{$_}, $Global::host{$_}->jobs_running(),
3736 ($Global::host{$_}->jobs_completed()||0)) }
3739 if(length $status > $termcols) {
3740 # sshlogin1:XX sshlogin2:XX sshlogin3:XX sshlogin4:XX sshlogin5:XX
3741 $header = "Computer
:jobs completed
";
3746 ($Global::host{$_}->jobs_completed()||0)) }
3749 if(length $status > $termcols) {
3750 # 1:XX 2:XX 3:XX 4:XX 5:XX 6:XX
3751 $header = "Computer
:jobs completed
";
3756 ($Global::host{$_}->jobs_completed()||0)) }
3759 return ("workerlist
" => $workerlist, "header
" => $header, "status
" => $status);
3764 my ($first_completed, $smoothed_avg_time, $last_eta);
3767 # Calculate important numbers for ETA
3769 # $total = number of jobs in total
3770 # $completed = number of jobs completed
3771 # $left = number of jobs left
3772 # $pctcomplete = percent of jobs completed
3773 # $avgtime = averaged time
3774 # $eta = smoothed eta
3775 my $completed = $Global::total_completed;
3776 # In rare cases with -X will $completed > total_jobs()
3777 my $total = ::max($Global::JobQueue->total_jobs(),$completed);
3778 my $left = $total - $completed;
3779 if(not $completed) {
3780 return($total, $completed, $left, 0, 0, 0);
3782 my $pctcomplete = ::min($completed / $total,100);
3783 $first_completed ||= time;
3784 my $timepassed = (time - $first_completed);
3785 my $avgtime = $timepassed / $completed;
3786 $smoothed_avg_time ||= $avgtime;
3787 # Smooth the eta so it does not jump wildly
3788 $smoothed_avg_time = (1 - $pctcomplete) * $smoothed_avg_time +
3789 $pctcomplete * $avgtime;
3790 my $eta = int($left * $smoothed_avg_time);
3791 if($eta*0.90 < $last_eta and $last_eta < $eta) {
3792 # Eta jumped less that 10% up: Keep the last eta instead
3797 return($total, $completed, $left, $pctcomplete, $avgtime, $eta);
3806 # $status = bar with eta, completed jobs, arg and pct
3808 $reset ||= "\033[0m
";
3809 my($total, $completed, $left, $pctcomplete, $avgtime, $eta) =
3811 my $arg = $Global::newest_job ?
3812 $Global::newest_job->{'commandline'}->
3813 replace_placeholders(["\257<\257>"],0,0) : "";
3814 # These chars mess up display in the terminal
3815 $arg =~ tr/[\011-\016\033\302-\365]//d;
3816 my $eta_dhms = ::seconds_to_time_units($eta);
3818 sprintf("%d%% %d:%d=%s %s",
3819 $pctcomplete*100, $completed, $left, $eta_dhms, $arg);
3820 my $terminal_width = terminal_columns();
3821 my $s = sprintf("%-${terminal_width
}s
",
3822 substr($bar_text." "x$terminal_width,
3823 0,$terminal_width));
3824 my $width = int($terminal_width * $pctcomplete);
3825 substr($s,$width,0) = $reset;
3826 my $zenity = sprintf("%-${terminal_width
}s
",
3827 substr("# $eta sec $arg",
3828 0,$terminal_width));
3829 $s = "\r" . $zenity . "\r" . $pctcomplete*100 . # Prefix with zenity header
3830 "\r" . $rev . $s . $reset;
3836 my ($columns,$last_column_time);
3838 sub terminal_columns
() {
3839 # Get the number of columns of the terminal.
3840 # Only update once per second.
3842 # number of columns of the screen
3843 if(not $columns or $last_column_time < time) {
3844 $last_column_time = time;
3845 $columns = $ENV{'COLUMNS'};
3847 # && true is to force spawning a shell and not just exec'ing
3848 my $stty = qx{stty
-a
</dev/tty
2>/dev/null
&& true
};
3849 # FreeBSD/OpenBSD/NetBSD/Dragonfly/MirOS
3850 # MacOSX/IRIX/AIX/Tru64
3851 $stty =~ /(\d+) columns/ and do { $columns = $1; };
3853 $stty =~ /columns (\d+)/ and do { $columns = $1; };
3854 # Solaris-x86/HPUX/SCOsysV/UnixWare/OpenIndiana
3855 $stty =~ /columns = (\d+)/ and do { $columns = $1; };
3857 $stty =~ /rows=\d+,(\d+)/ and do { $columns = $1; };
3860 # && true is to force spawning a shell and not just exec'ing
3861 my $resize = qx{resize
2>/dev/null
&& true
};
3862 $resize =~ /COLUMNS=(\d+);/ and do { $columns = $1; };
3870 # Prototype forwarding
3871 sub get_job_with_sshlogin
($);
3872 sub get_job_with_sshlogin
($) {
3874 # $sshlogin = which host should the job be run on?
3879 # $job = next job object for $sshlogin if any available
3880 my $sshlogin = shift;
3883 if ($opt::hostgroups
) {
3884 my @other_hostgroup_jobs = ();
3886 while($job = $Global::JobQueue
->get()) {
3887 if($sshlogin->in_hostgroups($job->hostgroups())) {
3888 # Found a job to be run on a hostgroup of this
3892 # This job was not in the hostgroups of $sshlogin
3893 push @other_hostgroup_jobs, $job;
3896 $Global::JobQueue
->unget(@other_hostgroup_jobs);
3897 if(not defined $job) {
3902 $job = $Global::JobQueue
->get();
3903 if(not defined $job) {
3905 ::debug
("start", "No more jobs: JobQueue empty\n");
3909 if(not $job->suspended()) {
3910 $job->set_sshlogin($sshlogin);
3912 if($opt::retries
and $job->failed_here()) {
3913 # This command with these args failed for this sshlogin
3914 my ($no_of_failed_sshlogins,$min_failures) = $job->min_failed();
3915 # Only look at the Global::host that have > 0 jobslots
3916 if($no_of_failed_sshlogins ==
3917 grep { $_->max_jobs_running() > 0 } values %Global::host
3918 and $job->failed_here() == $min_failures) {
3919 # It failed the same or more times on another host:
3920 # run it on this host
3922 # If it failed fewer times on another host:
3923 # Find another job to run
3925 if(not $Global::JobQueue
->empty()) {
3926 # This can potentially recurse for all args
3927 no warnings
'recursion';
3928 $nextjob = get_job_with_sshlogin
($sshlogin);
3930 # Push the command back on the queue
3931 $Global::JobQueue
->unget($job);
3939 sub __REMOTE_SSH__
() {}
3942 sub read_sshloginfiles
(@
) {
3943 # Read a list of --slf's
3945 # @files = files or symbolic file names to read
3948 read_sshloginfile
(expand_slf_shorthand
($s));
3952 sub expand_slf_shorthand
($) {
3953 # Expand --slf shorthand into a read file name
3955 # $file = file or symbolic file name to read
3957 # $file = actual file name to read
3961 } elsif($file eq "..") {
3962 $file = $Global::config_dir
."/sshloginfile";
3963 } elsif($file eq ".") {
3964 $file = "/etc/parallel/sshloginfile";
3965 } elsif(not -r
$file) {
3966 for(@Global::config_dirs
) {
3967 if(not -r
$_."/".$file) {
3968 # Try prepending $PARALLEL_HOME
3969 ::error
("Cannot open $file.");
3970 ::wait_and_exit
(255);
3972 $file = $_."/".$file;
3980 sub read_sshloginfile
($) {
3981 # Read sshloginfile into @Global::sshlogin
3983 # $file = file to read
3991 ::debug
("init","--slf ",$file);
3996 if(not open($in_fh, "<", $file)) {
3998 ::error
("Cannot open $file.");
3999 ::wait_and_exit
(255);
4006 push @Global::sshlogin
, $_;
4013 sub parse_sshlogin
() {
4014 # Parse @Global::sshlogin into %Global::host.
4015 # Keep only hosts that are in one of the given ssh hostgroups.
4018 # $Global::minimal_command_line_length
4027 if(not @Global::sshlogin
) { @Global::sshlogin
= (":"); }
4028 for my $sshlogin (@Global::sshlogin
) {
4029 # Split up -S sshlogin,sshlogin
4030 for my $s (split /,|\n/, $sshlogin) {
4031 if ($s eq ".." or $s eq "-") {
4032 # This may add to @Global::sshlogin - possibly bug
4033 read_sshloginfile
(expand_slf_shorthand
($s));
4040 $Global::minimal_command_line_length
= 100_000_000
;
4041 my @allowed_hostgroups;
4042 for my $ncpu_sshlogin_string (::uniq
(@login)) {
4043 my $sshlogin = SSHLogin
->new($ncpu_sshlogin_string);
4044 my $sshlogin_string = $sshlogin->string();
4045 if($sshlogin_string eq "") {
4046 # This is an ssh group: -S @webservers
4047 push @allowed_hostgroups, $sshlogin->hostgroups();
4050 if($Global::host
{$sshlogin_string}) {
4051 # This sshlogin has already been added:
4052 # It is probably a host that has come back
4053 # Set the max_jobs_running back to the original
4054 debug
("run","Already seen $sshlogin_string\n");
4055 if($sshlogin->{'ncpus'}) {
4056 # If ncpus set by '#/' of the sshlogin, overwrite it:
4057 $Global::host
{$sshlogin_string}->set_ncpus($sshlogin->ncpus());
4059 $Global::host
{$sshlogin_string}->set_max_jobs_running(undef);
4062 $sshlogin->set_maxlength(Limits
::Command
::max_length
());
4064 $Global::minimal_command_line_length
=
4065 ::min
($Global::minimal_command_line_length
, $sshlogin->maxlength());
4066 $Global::host
{$sshlogin_string} = $sshlogin;
4068 if(@allowed_hostgroups) {
4069 # Remove hosts that are not in these groups
4070 while (my ($string, $sshlogin) = each %Global::host
) {
4071 if(not $sshlogin->in_hostgroups(@allowed_hostgroups)) {
4072 delete $Global::host
{$string};
4077 # debug("start", "sshlogin: ", my_dump(%Global::host),"\n");
4078 if(@Global::transfer_files
or @opt::return or $opt::cleanup
or @opt::basefile
) {
4079 if(not remote_hosts
()) {
4080 # There are no remote hosts
4082 ::warning
("--trc ignored as there are no remote --sshlogin.");
4083 } elsif (defined $opt::transfer
) {
4084 ::warning
("--transfer ignored as there are no remote --sshlogin.");
4085 } elsif (@opt::transfer_files
) {
4086 ::warning
("--transferfile ignored as there are no remote --sshlogin.");
4087 } elsif (@opt::return) {
4088 ::warning
("--return ignored as there are no remote --sshlogin.");
4089 } elsif (defined $opt::cleanup
) {
4090 ::warning
("--cleanup ignored as there are no remote --sshlogin.");
4091 } elsif (@opt::basefile
) {
4092 ::warning
("--basefile ignored as there are no remote --sshlogin.");
4098 sub remote_hosts
() {
4099 # Return sshlogins that are not ':'
4103 # list of sshlogins with ':' removed
4104 return grep !/^:$/, keys %Global::host
;
4107 sub setup_basefile
() {
4108 # Transfer basefiles to each $sshlogin
4109 # This needs to be done before first jobs on $sshlogin is run
4117 for my $sshlogin (values %Global::host
) {
4118 if($sshlogin->string() eq ":") { next }
4119 for my $file (@opt::basefile
) {
4120 if($file !~ m
:^/: and $opt::workdir
eq "...") {
4121 ::error
("Work dir '...' will not work with relative basefiles.");
4122 ::wait_and_exit
(255);
4125 my $dummycmdline = CommandLine
->new(1,["true"],{},0,0,[],[],{},{},{});
4126 my $dummyjob = Job
->new($dummycmdline);
4127 $workdir = $dummyjob->workdir();
4129 push @cmd, $sshlogin->rsync_transfer_cmd($file,$workdir);
4132 debug
("init", "basesetup: @cmd\n");
4133 my ($exitstatus,$stdout_ref,$stderr_ref) =
4134 run_gnu_parallel
((join "\n",@cmd),"-j0","--retries",5);
4136 my @stdout = @
$stdout_ref;
4137 my @stderr = @
$stderr_ref;
4138 ::error
("Copying of --basefile failed: @stdout@stderr");
4139 ::wait_and_exit
(255);
4143 sub cleanup_basefile
() {
4144 # Remove the basefiles transferred
4152 my $dummycmdline = CommandLine
->new(1,"true",0,0,0,0,0,{},{},{});
4153 my $dummyjob = Job
->new($dummycmdline);
4154 $workdir = $dummyjob->workdir();
4156 for my $sshlogin (values %Global::host
) {
4157 if($sshlogin->string() eq ":") { next }
4158 for my $file (@opt::basefile
) {
4159 push @cmd, $sshlogin->cleanup_cmd($file,$workdir);
4162 debug
("init", "basecleanup: @cmd\n");
4163 my ($exitstatus,$stdout_ref,$stderr_ref) =
4164 run_gnu_parallel
(join("\n",@cmd),"-j0","--retries",5);
4166 my @stdout = @
$stdout_ref;
4167 my @stderr = @
$stderr_ref;
4168 ::error
("Cleanup of --basefile failed: @stdout@stderr");
4169 ::wait_and_exit
(255);
4173 sub run_gnu_parallel
() {
4174 my ($stdin,@args) = @_;
4175 my $cmd = join "",map { " $_ & " } split /\n/, $stdin;
4176 print $Global::original_stderr
` $cmd wait` ;
4180 sub _run_gnu_parallel
() {
4182 # This should ideally just fork an internal copy
4183 # and not start it through a shell
4185 # $stdin = data to provide on stdin for GNU Parallel
4186 # @args = command line arguments
4188 # $exitstatus = exitcode of GNU Parallel run
4189 # \@stdout = standard output
4190 # \@stderr = standard error
4191 my ($stdin,@args) = @_;
4192 my ($exitstatus,@stdout,@stderr);
4193 my ($stdin_fh,$stdout_fh)=(gensym
(),gensym
());
4194 my ($stderr_fh, $stderrname) = ::tmpfile
(SUFFIX
=> ".par");
4197 my $pid = ::open3
($stdin_fh,$stdout_fh,$stderr_fh,
4198 $0,qw(--plain --shell /bin/sh --will-cite), @args);
4199 if(my $writerpid = fork()) {
4201 @stdout = <$stdout_fh>;
4202 # Now stdout is closed:
4203 # These pids should be dead or die very soon
4204 while(kill 0, $writerpid) { ::usleep
(1); }
4207 # while(kill 0, $pid) { ::usleep(1); }
4210 seek $stderr_fh, 0, 0;
4211 @stderr = <$stderr_fh>;
4217 print $stdin_fh $stdin;
4221 return ($exitstatus,\
@stdout,\
@stderr);
4224 sub filter_hosts
() {
4225 # Remove down --sshlogins from active duty.
4226 # Find ncpus, ncores, maxlen, time-to-login for each host.
4229 # $Global::minimal_command_line_length
4230 # $opt::use_sockets_instead_of_threads
4231 # $opt::use_cores_instead_of_threads
4232 # $opt::use_cpus_instead_of_cores
4235 my ($nsockets_ref,$ncores_ref, $nthreads_ref, $time_to_login_ref,
4236 $maxlen_ref, $echo_ref, $down_hosts_ref) =
4237 parse_host_filtering
(parallelized_host_filtering
());
4239 delete @Global::host
{@
$down_hosts_ref};
4240 @
$down_hosts_ref and ::warning
("Removed @$down_hosts_ref.");
4242 $Global::minimal_command_line_length
= 100_000_000
;
4243 while (my ($sshlogin, $obj) = each %Global::host
) {
4244 if($sshlogin eq ":") { next }
4245 $nsockets_ref->{$sshlogin} or
4246 ::die_bug
("nsockets missing: ".$obj->serverlogin());
4247 $ncores_ref->{$sshlogin} or
4248 ::die_bug
("ncores missing: ".$obj->serverlogin());
4249 $nthreads_ref->{$sshlogin} or
4250 ::die_bug
("nthreads missing: ".$obj->serverlogin());
4251 $time_to_login_ref->{$sshlogin} or
4252 ::die_bug
("time_to_login missing: ".$obj->serverlogin());
4253 $maxlen_ref->{$sshlogin} or
4254 ::die_bug
("maxlen missing: ".$obj->serverlogin());
4255 $obj->set_ncpus($nthreads_ref->{$sshlogin});
4256 if($opt::use_cpus_instead_of_cores
) {
4257 $obj->set_ncpus($ncores_ref->{$sshlogin});
4258 } elsif($opt::use_sockets_instead_of_threads
) {
4259 $obj->set_ncpus($nsockets_ref->{$sshlogin});
4260 } elsif($opt::use_cores_instead_of_threads
) {
4261 $obj->set_ncpus($ncores_ref->{$sshlogin});
4263 $obj->set_time_to_login($time_to_login_ref->{$sshlogin});
4264 $obj->set_maxlength($maxlen_ref->{$sshlogin});
4265 $Global::minimal_command_line_length
=
4266 ::min
($Global::minimal_command_line_length
,
4267 int($maxlen_ref->{$sshlogin}/2));
4268 ::debug
("init", "Timing from -S:$sshlogin ",
4269 " nsockets:",$nsockets_ref->{$sshlogin},
4270 " ncores:", $ncores_ref->{$sshlogin},
4271 " nthreads:",$nthreads_ref->{$sshlogin},
4272 " time_to_login:", $time_to_login_ref->{$sshlogin},
4273 " maxlen:", $maxlen_ref->{$sshlogin},
4274 " min_max_len:", $Global::minimal_command_line_length
,"\n");
4278 sub parse_host_filtering
() {
4280 # @lines = output from parallelized_host_filtering()
4282 # \%nsockets = number of sockets of {host}
4283 # \%ncores = number of cores of {host}
4284 # \%nthreads = number of hyperthreaded cores of {host}
4285 # \%time_to_login = time_to_login on {host}
4286 # \%maxlen = max command len on {host}
4287 # \%echo = echo received from {host}
4288 # \@down_hosts = list of hosts with no answer
4290 my (%nsockets, %ncores, %nthreads, %time_to_login, %maxlen, %echo,
4293 ::debug
("init","Read: ",$_);
4295 my @col = split /\t/, $_;
4296 if($col[0] =~ /^parallel: Warning:/) {
4297 # Timed out job: Ignore it
4299 } elsif(defined $col[6]) {
4300 # This is a line from --joblog
4301 # seq host time spent sent received exit signal command
4302 # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores
4303 if($col[0] eq "Seq" and $col[1] eq "Host" and
4304 $col[2] eq "Starttime") {
4308 # Get server from: eval true server\;
4309 $col[8] =~ /eval .?true.?\s([^\;]+);/ or
4310 ::die_bug
("col8 does not contain host: $col[8]");
4313 $Global::host
{$host} or next;
4314 if($col[6] eq "255" or $col[6] eq "-1" or $col[6] eq "1") {
4315 # exit == 255 or exit == timeout (-1): ssh failed/timedout
4316 # exit == 1: lsh failed
4318 ::debug
("init", "--filtered $host\n");
4319 push(@down_hosts, $host);
4320 } elsif($col[6] eq "127") {
4321 # signal == 127: parallel not installed remote
4322 # Set nsockets, ncores, nthreads = 1
4323 ::warning
("Could not figure out ".
4324 "number of cpus on $host. Using 1.");
4325 $nsockets{$host} = 1;
4327 $nthreads{$host} = 1;
4328 $maxlen{$host} = Limits
::Command
::max_length
();
4329 } elsif($col[0] =~ /^\d+$/ and $Global::host
{$host}) {
4330 # Remember how log it took to log in
4331 # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ echo
4332 $time_to_login{$host} = ::min
($time_to_login{$host},$col[3]);
4334 ::die_bug
("host check unmatched long jobline: $_");
4336 } elsif($Global::host
{$col[0]}) {
4337 # This output from --number-of-cores, --number-of-cpus,
4338 # --max-line-length-allowed
4341 # maxlen: server 131071
4342 if(/parallel: Warning: Cannot figure out number of/) {
4345 if(not $nsockets{$col[0]}) {
4346 $nsockets{$col[0]} = $col[1];
4347 } elsif(not $ncores{$col[0]}) {
4348 $ncores{$col[0]} = $col[1];
4349 } elsif(not $nthreads{$col[0]}) {
4350 $nthreads{$col[0]} = $col[1];
4351 } elsif(not $maxlen{$col[0]}) {
4352 $maxlen{$col[0]} = $col[1];
4353 } elsif(not $echo{$col[0]}) {
4354 $echo{$col[0]} = $col[1];
4355 } elsif(m/perl: warning:|LANGUAGE =|LC_ALL =|LANG =|are supported and installed/) {
4357 # perl: warning: Setting locale failed.
4358 # perl: warning: Please check that your locale settings:
4359 # LANGUAGE = (unset),
4361 # LANG = "en_US.UTF-8"
4362 # are supported and installed on your system.
4363 # perl: warning: Falling back to the standard locale ("C").
4365 ::die_bug
("host check too many col0: $_");
4368 ::die_bug
("host check unmatched short jobline ($col[0]): $_");
4371 @down_hosts = uniq
(@down_hosts);
4372 return(\
%nsockets, \
%ncores, \
%nthreads, \
%time_to_login,
4373 \
%maxlen, \
%echo, \
@down_hosts);
4376 sub parallelized_host_filtering
() {
4380 # text entries with:
4382 # * hostname \t number of cores
4383 # * hostname \t number of cpus
4384 # * hostname \t max-line-length-allowed
4385 # * hostname \t empty
4388 # Wrap with ssh and --env
4389 # Return $default_value if command fails
4390 my $sshlogin = shift;
4391 my $command = shift;
4392 my $default_value = shift;
4393 # wrapper that returns $default_value if the command fails:
4394 # bug #57886: Errors when using different version on remote
4395 # perl -e '$a=`$command`; print $? ? "$default_value" : $a'
4396 my $wcmd = q
(perl
-e
'$a=`).$command.q(`;).
4397 q(print $? ? ").::pQ($default_value).q(" : $a');
4398 my $commandline = CommandLine
->new(1,[$wcmd],{},0,0,[],[],{},{},{});
4399 my $job = Job
->new($commandline);
4400 $job->set_sshlogin($sshlogin);
4402 return($job->{'wrapped'});
4405 my(@sockets, @cores, @threads, @maxline, @echo);
4406 while (my ($host, $sshlogin) = each %Global::host
) {
4407 if($host eq ":") { next }
4408 # The 'true' is used to get the $host out later
4409 push(@sockets, $host."\t"."true $host; ".
4410 sshwrapped
($sshlogin,"parallel --number-of-sockets",0)."\n\0");
4411 push(@cores, $host."\t"."true $host; ".
4412 sshwrapped
($sshlogin,"parallel --number-of-cores",0)."\n\0");
4413 push(@threads, $host."\t"."true $host; ".
4414 sshwrapped
($sshlogin,"parallel --number-of-threads",0)."\n\0");
4415 push(@maxline, $host."\t"."true $host; ".
4416 sshwrapped
($sshlogin,"parallel --max-line-length-allowed",0)."\n\0");
4417 # 'echo' is used to get the fastest possible ssh login time
4418 my $sshcmd = "true $host; exec " .$sshlogin->sshcommand()." ".
4419 $sshlogin->serverlogin();
4420 push(@echo, $host."\t".$sshcmd." -- echo\n\0");
4422 # --timeout 10: Setting up an SSH connection and running a simple
4423 # command should never take > 10 sec.
4424 # --delay 0.1: If multiple sshlogins use the same proxy the delay
4425 # will make it less likely to overload the ssh daemon.
4426 # --retries 3: If the ssh daemon is overloaded, try 3 times
4428 "$0 -j0 --timeout 10 --joblog - --plain --delay 0.1 --retries 3 ".
4429 "--tag --tagstring '{1}' -0 --colsep '\t' -k eval '{2}' && true ";
4430 $cmd = $Global::shell
." -c ".Q
($cmd);
4431 ::debug
("init", $cmd, "\n");
4435 my ($host_fh,$in,$err);
4436 open3
($in, $host_fh, $err, $cmd) || ::die_bug
("parallel host check: $cmd");
4437 ::debug
("init", map { $_,"\n" } @sockets, @cores, @threads, @maxline, @echo);
4440 # Give the commands to run to the $cmd
4442 print $in @sockets, @cores, @threads, @maxline, @echo;
4448 # TODO incompatible with '-quoting. Needs to be fixed differently
4450 # # if last char = ' then append next line
4451 # # This may be due to quoting of \n in environment var
4464 # Runs @command on all hosts.
4465 # Uses parallel to run @command on each host.
4466 # --jobs = number of hosts to run on simultaneously.
4467 # For each host a parallel command with the args will be running.
4487 # $opt::arg_file_sep
4491 # $Global::exitstatus
4498 # @command = command to run on all hosts
4502 # $joblog = filename of joblog - undef if none
4504 # $tmpfile = temp file for joblog - undef if none
4506 if(not defined $joblog) {
4509 my ($fh, $tmpfile) = ::tmpfile
(SUFFIX
=> ".log");
4513 my ($input_source_fh_ref,@command) = @_;
4514 if($Global::quoting
) {
4515 @command = shell_quote
(@command);
4518 # Copy all @input_source_fh (-a and :::) into tempfiles
4520 for my $fh (@
$input_source_fh_ref) {
4521 my ($outfh, $name) = ::tmpfile
(SUFFIX
=> ".all", UNLINK
=> not $opt::D
);
4522 print $outfh (<$fh>);
4524 push @argfiles, $name;
4526 if(@opt::basefile
) { setup_basefile
(); }
4527 # for each sshlogin do:
4528 # parallel -S $sshlogin $command :::: @argfiles
4530 # Pass some of the options to the sub-parallels, not all of them as
4531 # -P should only go to the first, and -S should not be copied at all.
4534 ((defined $opt::sshdelay
) ?
"--delay ".$opt::sshdelay
: ""),
4535 ((defined $opt::memfree
) ?
"--memfree ".$opt::memfree
: ""),
4536 ((defined $opt::memsuspend
) ?
"--memfree ".$opt::memsuspend
: ""),
4537 ((defined $opt::D
) ?
"-D $opt::D" : ""),
4538 ((defined $opt::group
) ?
"-g" : ""),
4539 ((defined $opt::jobs
) ?
"-P $opt::jobs" : ""),
4540 ((defined $opt::keeporder
) ?
"--keeporder" : ""),
4541 ((defined $opt::linebuffer
) ?
"--linebuffer" : ""),
4542 ((defined $opt::max_chars
) ?
"--max-chars ".$opt::max_chars
: ""),
4543 ((defined $opt::plain
) ?
"--plain" : ""),
4544 ((defined $opt::ungroup
) ?
"-u" : ""),
4545 ((defined $opt::tee
) ?
"--tee" : ""),
4549 ((defined $opt::sshdelay
) ?
"--delay ".$opt::sshdelay
: ""),
4550 ((defined $opt::D
) ?
"-D $opt::D" : ""),
4551 ((defined $opt::arg_file_sep
) ?
"--arg-file-sep ".$opt::arg_file_sep
: ""),
4552 ((defined $opt::arg_sep
) ?
"--arg-sep ".$opt::arg_sep
: ""),
4553 ((defined $opt::colsep
) ?
"--colsep ".shell_quote
($opt::colsep
) : ""),
4554 ((defined $opt::files
) ?
"--files" : ""),
4555 ((defined $opt::group
) ?
"-g" : ""),
4556 ((defined $opt::cleanup
) ?
"--cleanup" : ""),
4557 ((defined $opt::keeporder
) ?
"--keeporder" : ""),
4558 ((defined $opt::linebuffer
) ?
"--linebuffer" : ""),
4559 ((defined $opt::max_chars
) ?
"--max-chars ".$opt::max_chars
: ""),
4560 ((defined $opt::plain
) ?
"--plain" : ""),
4561 ((defined $opt::plus
) ?
"--plus" : ""),
4562 ((defined $opt::retries
) ?
"--retries ".$opt::retries
: ""),
4563 ((defined $opt::timeout
) ?
"--timeout ".$opt::timeout
: ""),
4564 ((defined $opt::ungroup
) ?
"-u" : ""),
4565 ((defined $opt::ssh
) ?
"--ssh '".$opt::ssh
."'" : ""),
4566 ((defined $opt::tee
) ?
"--tee" : ""),
4567 ((defined $opt::workdir
) ?
"--wd ".Q
($opt::workdir
) : ""),
4568 (@Global::transfer_files ?
map { "--tf ".Q
($_) }
4569 @Global::transfer_files
: ""),
4570 (@Global::ret_files ?
map { "--return ".Q
($_) }
4571 @Global::ret_files
: ""),
4572 (@opt::env ?
map { "--env ".Q
($_) } @opt::env
: ""),
4573 (map { "-v" } @opt::v
),
4575 ::debug
("init", "| $0 $options\n");
4576 open(my $parallel_fh, "|-", "$0 -0 --will-cite -j0 $options") ||
4577 ::die_bug
("This does not run GNU Parallel: $0 $options");
4579 for my $host (sort keys %Global::host
) {
4580 my $sshlogin = $Global::host
{$host};
4581 my $joblog = tmp_joblog
($opt::joblog
);
4583 push @joblogs, $joblog;
4584 $joblog = "--joblog $joblog";
4586 my $quad = $opt::arg_file_sep
|| "::::";
4587 # If PARALLEL_ENV is set: Pass it on
4588 my $penv=$Global::parallel_env ?
4589 "PARALLEL_ENV=".Q
($Global::parallel_env
) :
4591 ::debug
("init", "$penv $0 $suboptions -j1 $joblog ",
4592 ((defined $opt::tag
) ?
4593 "--tagstring ".Q
($sshlogin->string()) : ""),
4594 " -S ", Q
($sshlogin->string())," ",
4595 join(" ",shell_quote
(@command))," $quad @argfiles\n");
4596 print $parallel_fh "$penv $0 $suboptions -j1 $joblog ",
4597 ((defined $opt::tag
) ?
4598 "--tagstring ".Q
($sshlogin->string()) : ""),
4599 " -S ", Q
($sshlogin->string())," ",
4600 join(" ",shell_quote
(@command))," $quad @argfiles\0";
4603 $Global::exitstatus
= $?
>> 8;
4604 debug
("init", "--onall exitvalue ", $?
);
4605 if(@opt::basefile
and $opt::cleanup
) { cleanup_basefile
(); }
4606 $Global::debug
or unlink(@argfiles);
4608 for my $joblog (@joblogs) {
4610 open(my $fh, "<", $joblog) || ::die_bug
("Cannot open tmp joblog $joblog");
4611 # Skip first line (header);
4613 print $Global::joblog
(<$fh>);
4620 sub __SIGNAL_HANDLING__
() {}
4624 # Send TSTP signal (Ctrl-Z) to all children process groups
4628 signal_children
("TSTP");
4632 # Send SIGPIPE signal to all children process groups
4636 signal_children
("PIPE");
4639 sub signal_children
() {
4640 # Send signal to all children process groups
4641 # and GNU Parallel itself
4646 debug
("run", "Sending $signal ");
4647 kill $signal, map { -$_ } keys %Global::running
;
4648 # Use default signal handler for GNU Parallel itself
4649 $SIG{$signal} = undef;
4653 sub save_original_signal_handler
() {
4654 # Remember the original signal handler
4656 # %Global::original_sig
4659 if($opt::tmux
) { ::qqx
("tmux kill-session -t p$$"); }
4663 if($opt::tmux
) { ::qqx
("tmux kill-session -t p$$"); }
4666 %Global::original_sig
= %SIG;
4667 $SIG{TERM
} = sub {}; # Dummy until jobs really start
4668 $SIG{ALRM
} = 'IGNORE';
4669 # Allow Ctrl-Z to suspend and `fg` to continue
4670 $SIG{TSTP
} = \
&sigtstp
;
4671 $SIG{PIPE
} = \
&sigpipe
;
4673 # Set $SIG{TSTP} again (it is undef'ed in sigtstp() )
4674 $SIG{TSTP
} = \
&sigtstp
;
4675 # Send continue signal to all children process groups
4676 kill "CONT", map { -$_ } keys %Global::running
;
4680 sub list_running_jobs
() {
4681 # Print running jobs on tty
4685 for my $job (values %Global::running
) {
4686 ::status
("$Global::progname: ".$job->replaced());
4690 sub start_no_new_jobs
() {
4691 # Start no more jobs
4693 # %Global::original_sig
4695 # $Global::start_no_new_jobs
4697 # $SIG{TERM} = $Global::original_sig{TERM};
4698 unlink keys %Global::unlink;
4700 ("$Global::progname: SIGHUP received. No new jobs will be started.",
4701 "$Global::progname: Waiting for these ".(keys %Global::running
).
4702 " jobs to finish. Send SIGTERM to stop now.");
4703 list_running_jobs
();
4704 $Global::start_no_new_jobs
||= 1;
4708 # Run reaper until there are no more left
4710 # @pids_reaped = pids of reaped processes
4713 while($pid = reaper
()) {
4714 push @pids_reaped, $pid;
4716 return @pids_reaped;
4721 # * Set exitstatus, exitsignal, endtime.
4722 # * Free ressources for new job
4723 # * Update median runtime
4725 # * If --halt = now: Kill children
4732 # $Global::total_running
4734 # $stiff = PID of child finished
4736 debug
("run", "Reaper ");
4737 if(($stiff = waitpid(-1, &WNOHANG
)) <= 0) {
4738 # No jobs waiting to be reaped
4742 # $stiff = pid of dead process
4743 my $job = $Global::running
{$stiff};
4745 # '-a <(seq 10)' will give us a pid not in %Global::running
4746 # The same will one of the ssh -M: ignore
4748 delete $Global::running
{$stiff};
4749 $Global::total_running
--;
4750 if($job->{'commandline'}{'skip'}) {
4751 # $job->skip() was called
4752 $job->set_exitstatus(-2);
4753 $job->set_exitsignal(0);
4755 $job->set_exitstatus($?
>> 8);
4756 $job->set_exitsignal($?
& 127);
4759 debug
("run", "seq ",$job->seq()," died (", $job->exitstatus(), ")");
4760 $job->set_endtime(::now
());
4761 my $sshlogin = $job->sshlogin();
4762 $sshlogin->dec_jobs_running();
4763 if($job->should_be_retried()) {
4764 # Free up file handles
4765 $job->free_ressources();
4768 $sshlogin->inc_jobs_completed();
4771 if($opt::timeout
and not $job->exitstatus()) {
4772 # Update average runtime for timeout only for successful jobs
4773 $Global::timeoutq
->update_median_runtime($job->runtime());
4775 if($opt::keeporder
) {
4776 $job->print_earlier_jobs();
4780 if($job->should_we_halt() eq "now") {
4782 ::kill_sleep_seq
($job->pid());
4784 ::wait_and_exit
($Global::halt_exitstatus
);
4789 if($opt::progress
) {
4790 my %progress = progress
();
4791 ::status_no_nl
("\r",$progress{'status'});
4794 debug
("run", "done ");
4803 # Kill all jobs by killing their process groups
4805 # $Global::start_no_new_jobs = we are stopping
4806 # $Global::killall = Flag to not run reaper
4807 $Global::start_no_new_jobs
||= 1;
4808 # Do not reap killed children: Ignore them instead
4809 $Global::killall
||= 1;
4810 kill_sleep_seq
(keys %Global::running
);
4813 sub kill_sleep_seq
(@
) {
4814 # Send jobs TERM,TERM,KILL to processgroups
4816 # @pids = list of pids that are also processgroups
4817 # Convert pids to process groups ($processgroup = -$pid)
4818 my @pgrps = map { -$_ } @_;
4819 my @term_seq = split/,/,$opt::termseq
;
4821 @term_seq = ("TERM",200,"TERM",100,"TERM",50,"KILL",25);
4824 @pgrps = kill_sleep
(shift @term_seq, shift @term_seq, @pgrps);
4829 # Kill pids with a signal and wait a while for them to die
4831 # $signal = signal to send to @pids
4832 # $sleep_max = number of ms to sleep at most before returning
4833 # @pids = pids to kill (actually process groups)
4835 # $Global::killall = set by killall() to avoid calling reaper
4837 # @pids = pids still alive
4838 my ($signal, $sleep_max, @pids) = @_;
4839 ::debug
("kill","kill_sleep $signal ",(join " ",sort @pids),"\n");
4840 kill $signal, @pids;
4844 while(@pids and $sleepsum < $sleep_max) {
4845 if($Global::killall
) {
4846 # Killall => don't run reaper
4847 while(waitpid(-1, &WNOHANG
) > 0) {
4848 $sleep = $sleep/2+0.001;
4850 } elsif(reapers
()) {
4851 $sleep = $sleep/2+0.001;
4855 $sleepsum += $sleep;
4856 # Keep only living children
4857 @pids = grep { kill(0, $_) } @pids;
4862 sub wait_and_exit
($) {
4863 # If we do not wait, we sometimes get segfault
4866 unlink keys %Global::unlink;
4868 # Kill all jobs without printing
4871 for (keys %Global::unkilled_children
) {
4872 # Kill any (non-jobs) children (e.g. reserved processes)
4875 delete $Global::unkilled_children
{$_};
4877 if($Global::unkilled_sqlworker
) {
4878 waitpid($Global::unkilled_sqlworker
,0);
4880 # Avoid: Warning: unable to close filehandle properly: No space
4881 # left on device during global destruction.
4882 $SIG{__WARN__
} = sub {};
4898 "$Global::progname [options] [command [arguments]] < list_of_arguments",
4899 "$Global::progname [options] [command [arguments]] (::: arguments|:::: argfile(s))...",
4900 "cat ... | $Global::progname --pipe [options] [command [arguments]]",
4902 "-j n Run n jobs in parallel",
4903 "-k Keep same order",
4904 "-X Multiple arguments with context replace",
4905 "--colsep regexp Split input on regexp for positional replacements",
4906 "{} {.} {/} {/.} {#} {%} {= perl code =} Replacement strings",
4907 "{3} {3.} {3/} {3/.} {=3 perl code =} Positional replacement strings",
4908 "With --plus: {} = {+/}/{/} = {.}.{+.} = {+/}/{/.}.{+.} = {..}.{+..} =",
4909 " {+/}/{/..}.{+..} = {...}.{+...} = {+/}/{/...}.{+...}",
4911 "-S sshlogin Example: foo\@server.example.com",
4912 "--slf .. Use ~/.parallel/sshloginfile as the list of sshlogins",
4913 "--trc {}.bar Shorthand for --transfer --return {}.bar --cleanup",
4914 "--onall Run the given command with argument on all sshlogins",
4915 "--nonall Run the given command with no arguments on all sshlogins",
4917 "--pipe Split stdin (standard input) to multiple jobs.",
4918 "--recend str Record end separator for --pipe.",
4919 "--recstart str Record start separator for --pipe.",
4921 "GNU Parallel can do much more. See 'man $Global::progname' for details",
4923 "Academic tradition requires you to cite works you base your article on.",
4924 "If you use programs that use GNU Parallel to process data for an article in a",
4925 "scientific publication, please cite:",
4927 " Tange, O. (2020, September 22). GNU Parallel 20200922 ('Ginsburg').",
4928 " Zenodo. https://doi.org/10.5281/zenodo.4045386",
4930 # Before changing this line, please read
4931 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
4932 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
4933 "This helps funding further development; AND IT WON'T COST YOU A CENT.",
4934 "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
4939 sub citation_notice
() {
4940 # if --will-cite or --plain: do nothing
4941 # if stderr redirected: do nothing
4942 # if $PARALLEL_HOME/will-cite: do nothing
4943 # else: print citation notice to stderr
4948 not -t
$Global::original_stderr
4950 grep { -e
"$_/will-cite" } @Global::config_dirs
) {
4954 ("Academic tradition requires you to cite works you base your article on.",
4955 "If you use programs that use GNU Parallel to process data for an article in a",
4956 "scientific publication, please cite:",
4958 " Tange, O. (2020, September 22). GNU Parallel 20200922 ('Ginsburg').",
4959 " Zenodo. https://doi.org/10.5281/zenodo.4045386",
4961 # Before changing this line, please read
4962 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice and
4963 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
4964 "This helps funding further development; AND IT WON'T COST YOU A CENT.",
4965 "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
4967 "More about funding GNU Parallel and the citation notice:",
4968 "https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice",
4970 "To silence this citation notice: run 'parallel --citation' once.",
4973 mkdir $Global::config_dir
;
4974 # Number of times the user has run GNU Parallel without showing
4975 # willingness to cite
4977 if(open (my $fh, "<", $Global::config_dir
.
4978 "/runs-without-willing-to-cite")) {
4983 if(open (my $fh, ">", $Global::config_dir
.
4984 "/runs-without-willing-to-cite")) {
4988 ::status
("Come on: You have run parallel $runs times. Isn't it about time ",
4989 "you run 'parallel --citation' once to silence the citation notice?",
4998 my $fh = $Global::status_fd
|| *STDERR
;
4999 print $fh map { ($_, "\n") } @w;
5003 sub status_no_nl
(@
) {
5005 my $fh = $Global::status_fd
|| *STDERR
;
5012 my $prog = $Global::progname
|| "parallel";
5013 status_no_nl
(map { ($prog, ": Warning: ", $_, "\n"); } @w);
5018 my $prog = $Global::progname
|| "parallel";
5019 status
(map { ($prog.": Error: ". $_); } @w);
5025 ("$Global::progname: This should not happen. You have found a bug.\n",
5026 "Please contact <parallel\@gnu.org> and follow\n",
5027 "https://www.gnu.org/software/parallel/man.html#REPORTING-BUGS\n",
5029 "Include this in the report:\n",
5030 "* The version number: $Global::version\n",
5031 "* The bugid: $bugid\n",
5032 "* The command line being run\n",
5033 "* The files being read (put the files on a webserver if they are big)\n",
5035 "If you get the error on smaller/fewer files, please include those instead.\n");
5036 ::wait_and_exit
(255);
5043 "GNU $Global::progname $Global::version",
5044 "Copyright (C) 2007-2020 Ole Tange, http://ole.tange.dk and Free Software",
5046 "License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>",
5047 "This is free software: you are free to change and redistribute it.",
5048 "GNU $Global::progname comes with no warranty.",
5050 "Web site: https://www.gnu.org/software/${Global::progname}\n",
5051 "When using programs that use GNU Parallel to process data for publication",
5052 "please cite as described in 'parallel --citation'.\n",
5058 my ($all_argv_ref,$argv_options_removed_ref) = @_;
5059 my $all_argv = "@$all_argv_ref";
5060 my $no_opts = "@$argv_options_removed_ref";
5061 $all_argv=~s/--citation//;
5062 if($all_argv ne $no_opts) {
5063 ::warning
("--citation ignores all other options and arguments.");
5068 "Academic tradition requires you to cite works you base your article on.",
5069 "If you use programs that use GNU Parallel to process data for an article in a",
5070 "scientific publication, please cite:",
5072 "\@software{tange_2020_4045386,",
5073 " author = {Tange, Ole},",
5074 " title = {GNU Parallel 20200922 ('Ginsburg')},",
5077 " note = {{GNU Parallel is a general parallelizer to run",
5078 " multiple serial command line programs in parallel",
5079 " without changing them.}},",
5080 " publisher = {Zenodo},",
5081 " doi = {10.5281/zenodo.4045386},",
5082 " url = {https://doi.org/10.5281/zenodo.4045386}",
5085 "(Feel free to use \\nocite{tange_2020_4045386})",
5087 # Before changing this line, please read
5088 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice and
5089 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
5090 "This helps funding further development; AND IT WON'T COST YOU A CENT.",
5091 "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
5093 "More about funding GNU Parallel and the citation notice:",
5094 "https://lists.gnu.org/archive/html/parallel/2013-11/msg00006.html",
5095 "https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice",
5096 "https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt",
5098 "If you send a copy of your published article to tange\@gnu.org, it will be",
5099 "mentioned in the release notes of next version of GNU Parallel.",
5102 while(not grep { -e
"$_/will-cite" } @Global::config_dirs
) {
5103 print "\nType: 'will cite' and press enter.\n> ";
5104 my $input = <STDIN
>;
5105 if(not defined $input) {
5108 if($input =~ /will cite/i) {
5109 mkdir $Global::config_dir
;
5110 if(open (my $fh, ">", $Global::config_dir
."/will-cite")) {
5114 "Thank you for your support: You are the reason why there is funding to",
5115 "continue maintaining GNU Parallel. On behalf of future versions of",
5116 "GNU Parallel, which would not exist without your support:",
5118 " THANK YOU SO MUCH",
5120 "It is really appreciated. The citation notice is now silenced.",
5125 "Thank you for your support. It is much appreciated. The citation",
5126 "cannot permanently be silenced. Use '--will-cite' instead.",
5128 "If you use '--will-cite' in scripts to be run by others you are making",
5129 "it harder for others to see the citation notice. The development of",
5130 "GNU Parallel is indirectly financed through citations, so if users",
5131 "do not know they should cite then you are making it harder to finance",
5132 "development. However, if you pay 10000 EUR, you should feel free to",
5133 "use '--will-cite' in scripts.",
5143 print("Maximal size of command: ",Limits
::Command
::real_max_length
(),"\n",
5144 "Maximal used size of command: ",Limits
::Command
::max_length
(),"\n",
5146 "Execution of will continue now, and it will try to read its input\n",
5147 "and run commands; if this is not what you wanted to happen, please\n",
5148 "press CTRL-D or CTRL-C\n");
5152 # Give an embeddable version of GNU Parallel
5153 # Tested with: bash, zsh, ksh, ash, dash, sh
5154 my $randomstring = "cut-here-".join"",
5155 map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..20);
5156 if(not -f
$0 or not -r
$0) {
5157 ::error
("--embed only works if parallel is a readable file");
5160 if(open(my $fh, "<", $0)) {
5161 # Read the source from $0
5163 my $user = $ENV{LOGNAME
} || $ENV{USERNAME
} || $ENV{USER
};
5164 my @env_parallel_source = ();
5165 my $shell = $Global::shell
;
5167 for(which
("env_parallel.$shell")) {
5169 # Read the source of env_parallel.shellname
5170 open(my $env_parallel_source_fh, $_) || die;
5171 @env_parallel_source = <$env_parallel_source_fh>;
5172 close $env_parallel_source_fh;
5175 print "#!$Global::shell
5177 # Copyright (C) 2007-2020 $user, Ole Tange, http://ole.tange.dk
5178 # and Free Software Foundation, Inc.
5180 # This program is free software; you can redistribute it and/or modify
5181 # it under the terms of the GNU General Public License as published by
5182 # the Free Software Foundation; either version 3 of the License, or
5183 # (at your option) any later version.
5185 # This program is distributed in the hope that it will be useful, but
5186 # WITHOUT ANY WARRANTY; without even the implied warranty of
5187 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5188 # General Public License for more details.
5190 # You should have received a copy of the GNU General Public License
5191 # along with this program; if not, see <https://www.gnu.org/licenses/>
5192 # or write to the Free Software Foundation, Inc., 51 Franklin St,
5193 # Fifth Floor, Boston, MA 02110-1301 USA
5197 # Embedded GNU Parallel created with --embed
5199 # Start GNU Parallel without leaving temporary files
5201 # Not all shells support 'perl <(cat ...)'
5202 # This is a complex way of doing:
5203 # perl <(cat <<'cut-here'
5206 # and also avoiding:
5209 # Make a temporary fifo that perl can read from
5210 _fifo_with_GNU_Parallel_source
=`perl -e 'use POSIX qw(mkfifo);
5212 $f = "/tmp/parallel-".join"",
5213 map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
5217 # Put source code into temporary file
5218 # so it is easy to copy to the fifo
5219 _file_with_GNU_Parallel_source=`mktemp`;
5221 "cat <<'$randomstring' > \$_file_with_GNU_Parallel_source\n",
5225 # Copy the source code from the file to the fifo
5226 # and remove the file and fifo ASAP
5227 # 'sh
-c
' is needed to avoid
5229 sh -c "(rm $_file_with_GNU_Parallel_source; cat >$_fifo_with_GNU_Parallel_source; rm $_fifo_with_GNU_Parallel_source) < $_file_with_GNU_Parallel_source &"
5231 # Read the source from the fifo
5232 perl $_fifo_with_GNU_Parallel_source "$@"
5235 @env_parallel_source,
5238 # This will call the functions above
5239 parallel -k echo ::: Put your code here
5240 env_parallel --session
5241 env_parallel -k echo ::: Put your code here
5242 parset p,y,c,h -k echo ::: Put your code here
5246 ::error("Cannot open $0");
5249 ::status("Redirect the output to a file and add your changes at the end:",
5250 " $0 --embed > new_script");
5254 sub __GENERIC_COMMON_FUNCTION__() {}
5257 sub mkdir_or_die($) {
5258 # If dir is not executable: die
5260 # The eval is needed to catch exception from mkdir
5261 eval { File::Path::mkpath($dir); };
5263 ::error("Cannot change into non-executable dir $dir: $!");
5264 ::wait_and_exit(255);
5269 # Create tempfile as $TMPDIR/parXXXXX
5271 # $filehandle = opened file handle
5272 # $filename = file name created
5273 my($filehandle,$filename) =
5274 ::tempfile(DIR=>$ENV{'TMPDIR
'}, TEMPLATE => 'parXXXXX
', @_);
5276 return($filehandle,$filename);
5278 # Separate unlink due to NFS dealing badly with File::Temp
5285 # Select a name that does not exist
5286 # Do not create the file as it may be used for creating a socket (by tmux)
5287 # Remember the name in $Global::unlink to avoid hitting the same name twice
5290 if(not -w $ENV{'TMPDIR
'}) {
5291 if(not -e $ENV{'TMPDIR
'}) {
5292 ::error("Tmpdir '$ENV{'TMPDIR'}' does not exist.","Try 'mkdir $ENV{'TMPDIR'}'");
5294 ::error("Tmpdir '$ENV{'TMPDIR'}' is not writable.","Try 'chmod +w
$ENV{'TMPDIR'}'");
5296 ::wait_and_exit(255);
5299 $tmpname = $ENV{'TMPDIR
'}."/".$name.
5300 join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
5301 } while(-e $tmpname or $Global::unlink{$tmpname}++);
5306 # Find an unused name and mkfifo on it
5307 my $tmpfifo = tmpname("fif");
5308 mkfifo($tmpfifo,0600);
5313 # Remove file and remove it from %Global::unlink
5316 delete @Global::unlink{@_};
5320 sub size_of_block_dev() {
5321 # Like -s but for block devices
5323 # $blockdev = file name of block device
5325 # $size = in bytes, undef if error
5326 my $blockdev = shift;
5327 if(open(my $fh, "<", $blockdev)) {
5328 seek($fh,0,2) || ::die_bug("cannot seek $blockdev");
5329 my $size = tell($fh);
5333 ::error("cannot open $blockdev");
5339 # Like qx but with clean environment (except for @keep)
5340 # and STDERR ignored
5341 # This is needed if the environment contains functions
5342 # that /bin/sh does not understand
5344 # ssh with ssh-agent needs PATH SSH_AUTH_SOCK SSH_AGENT_PID
5345 # ssh with Kerberos needs KRB5CCNAME
5346 # tmux needs LC_CTYPE
5347 # lsh needs HOME LOGNAME
5348 my @keep = qw(PATH SSH_AUTH_SOCK SSH_AGENT_PID KRB5CCNAME LC_CTYPE HOME LOGNAME);
5349 @env{@keep} = @ENV{@keep};
5352 if($Global::debug
) {
5353 # && true is to force spawning a shell and not just exec'ing
5354 return qx{ @_ && true
};
5356 # CygWin does not respect 2>/dev/null
5357 # so we do that by hand
5358 # This trick does not work:
5359 # https://stackoverflow.com/q/13833088/363028
5361 # open(STDERR, ">", "/dev/null");
5362 open(local *CHILD_STDIN
, '<', '/dev/null') or die $!;
5363 open(local *CHILD_STDERR
, '>', '/dev/null') or die $!;
5365 # eval is needed if open3 fails (e.g. command line too long)
5371 # && true is to force spawning a shell and not just exec'ing
5375 # Make sure $? is set
5377 return wantarray ?
@arr : join "",@arr;
5379 # If eval fails, force $?=false
5386 # Remove duplicates and return unique values
5387 return keys %{{ map { $_ => 1 } @_ }};
5392 # Minimum value of array
5397 defined $min or do { $min = $_; next; }; # Set $_ to the first non-undef
5398 $min = ($min < $_) ?
$min : $_;
5405 # Maximum value of array
5410 defined $max or do { $max = $_; next; }; # Set $_ to the first non-undef
5411 $max = ($max > $_) ?
$max : $_;
5418 # Sum of values of array
5423 $_ and do { $sum += $_; }
5428 sub undef_as_zero
($) {
5433 sub undef_as_empty
($) {
5435 return $a ?
$a : "";
5438 sub undef_if_empty
($) {
5439 if(defined($_[0]) and $_[0] eq "") {
5445 sub multiply_binary_prefix
(@
) {
5446 # Evalualte numbers with binary prefix
5447 # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80
5448 # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80
5449 # K =2^10, M =2^20, G =2^30, T =2^40, P =2^50, E =2^70, Z =2^80, Y =2^80
5450 # k =10^3, m =10^6, g =10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24
5451 # 13G = 13*1024*1024*1024 = 13958643712
5453 # $s = string with prefixes
5455 # $value = int with prefixes multiplied
5461 s/gi/*1024*1024*1024/gi;
5462 s/ti/*1024*1024*1024*1024/gi;
5463 s/pi/*1024*1024*1024*1024*1024/gi;
5464 s/ei/*1024*1024*1024*1024*1024*1024/gi;
5465 s/zi/*1024*1024*1024*1024*1024*1024*1024/gi;
5466 s/yi/*1024*1024*1024*1024*1024*1024*1024*1024/gi;
5467 s/xi/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi;
5471 s/G/*1024*1024*1024/g;
5472 s/T/*1024*1024*1024*1024/g;
5473 s/P/*1024*1024*1024*1024*1024/g;
5474 s/E/*1024*1024*1024*1024*1024*1024/g;
5475 s/Z/*1024*1024*1024*1024*1024*1024*1024/g;
5476 s/Y/*1024*1024*1024*1024*1024*1024*1024*1024/g;
5477 s/X/*1024*1024*1024*1024*1024*1024*1024*1024*1024/g;
5481 s/g/*1000*1000*1000/g;
5482 s/t/*1000*1000*1000*1000/g;
5483 s/p/*1000*1000*1000*1000*1000/g;
5484 s/e/*1000*1000*1000*1000*1000*1000/g;
5485 s/z/*1000*1000*1000*1000*1000*1000*1000/g;
5486 s/y/*1000*1000*1000*1000*1000*1000*1000*1000/g;
5487 s/x/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g;
5491 return wantarray ?
@v : $v[0];
5494 sub multiply_time_units
($) {
5495 # Evalualte numbers with time units
5496 # s=1, m=60, h=3600, d=86400
5498 # $s = string time units
5500 # $value = int in seconds
5512 return wantarray ?
@v : $v[0];
5515 sub seconds_to_time_units
() {
5516 # Convert seconds into ??d??h??m??s
5517 # s=1, m=60, h=3600, d=86400
5519 # $s = int in seconds
5521 # $str = string time units
5524 my $d = int($s/86400);
5526 my $h = int($s/3600);
5531 $str = sprintf("%dd%02dh%02dm%02ds",$d,$h,$m,$s);
5533 $str = sprintf("%dh%02dm%02ds",$h,$m,$s);
5535 $str = sprintf("%dm%02ds",$m,$s);
5537 $str = sprintf("%ds",$s);
5543 my ($disk_full_fh, $b8193, $error_printed);
5544 sub exit_if_disk_full
() {
5545 # Checks if $TMPDIR is full by writing 8kb to a tmpfile
5546 # If the disk is full: Exit immediately.
5549 if(not $disk_full_fh) {
5550 $disk_full_fh = ::tmpfile
(SUFFIX
=> ".df");
5553 # Linux does not discover if a disk is full if writing <= 8192
5555 # bfs btrfs cramfs ext2 ext3 ext4 ext4dev jffs2 jfs minix msdos
5556 # ntfs reiserfs tmpfs ubifs vfat xfs
5557 # TODO this should be tested on different OS similar to this:
5560 # sudo mount /dev/ram0 /mnt/loop; sudo chmod 1777 /mnt/loop
5561 # seq 100000 | parallel --tmpdir /mnt/loop/ true &
5562 # seq 6900000 > /mnt/loop/i && echo seq OK
5563 # seq 6980868 > /mnt/loop/i
5564 # seq 10000 > /mnt/loop/ii
5566 # sudo umount /mnt/loop/ || sudo umount -l /mnt/loop/
5569 print $disk_full_fh $b8193;
5570 if(not $disk_full_fh
5572 tell $disk_full_fh != 8193) {
5573 # On raspbian the disk can be full except for 10 chars.
5574 if(not $error_printed) {
5575 ::error
("Output is incomplete.",
5576 "Cannot append to buffer file in $ENV{'TMPDIR'}.",
5577 "Is the disk full?",
5578 "Change \$TMPDIR with --tmpdir or use --compress.");
5581 ::wait_and_exit
(255);
5583 truncate $disk_full_fh, 0;
5584 seek($disk_full_fh, 0, 0) || die;
5589 # Remove comments and spaces
5591 # $spaces = keep 1 space?
5592 # $s = string to remove spaces from
5594 # $s = with spaces removed
5600 } elsif(2 == $spaces) {
5602 $s =~ s/\n\n+/\n/sg;
5603 $s =~ s/[ \t]+/ /mg;
5604 } elsif(3 == $spaces) {
5605 # Keep perl code required space
5606 $s =~ s{([^a-zA-Z0-9/])\s+}{$1}sg;
5607 $s =~ s{([a-zA-Z0-9/])\s+([^:a-zA-Z0-9/])}{$1$2}sg;
5619 $hostname = `hostname`;
5621 $hostname ||= "nohostname";
5629 # @programs = programs to find the path to
5631 # @full_path = full paths to @programs. Nothing if not found
5634 push(@which, grep { not -d
$_ and -x
$_ }
5635 map { $_."/".$prg } split(":",$ENV{'PATH'}));
5638 push(@which, grep { not -d
$_ and -x
$_ } $prg);
5641 ::debug
("which", "$which[0] in $ENV{'PATH'}\n");
5642 return wantarray ?
@which : $which[0];
5646 my ($regexp,$shell,%fakename);
5650 # $pid = pid to see if (grand)*parent is a shell
5652 # $shellpath = path to shell - undef if no shell found
5654 ::debug
("init","Parent of $pid\n");
5656 # All shells known to mankind
5658 # ash bash csh dash fdsh fish fizsh ksh ksh93 mksh pdksh
5659 # posh rbash rc rush rzsh sash sh static-sh tcsh yash zsh
5661 my @shells = (qw(ash bash bsd-csh csh dash fdsh fish fizsh
5662 ksh ksh93 lksh mksh pdksh posh rbash rc rush rzsh sash sh
5663 static-sh tcsh yash zsh -sh -csh -bash),
5664 '-sh (sh)' # sh on FreeBSD
5666 # Can be formatted as:
5667 # [sh] -sh sh busybox sh -sh (sh)
5668 # /bin/sh /sbin/sh /opt/csw/sh
5669 # But not: foo.sh sshd crash flush pdflush scosh fsflush ssh
5670 $shell = "(?:".join("|",map { "\Q$_\E" } @shells).")";
5671 $regexp = '^((\[)(-?)('. $shell. ')(\])|(|\S+/|busybox )'.
5672 '(-?)('. $shell. '))( *$| [^(])';
5674 # sh disguises itself as -sh (sh) on FreeBSD
5675 "-sh (sh)" => ["sh"],
5676 # csh and tcsh disguise themselves as -sh/-csh
5677 # E.g.: ssh -tt csh@lo 'ps aux;true' |egrep ^csh
5678 # but sh also disguises itself as -sh
5679 # (TODO When does that happen?)
5681 "-csh" => ["tcsh", "csh"],
5682 # ash disguises itself as -ash
5683 "-ash" => ["ash", "dash", "sh"],
5684 # dash disguises itself as -dash
5685 "-dash" => ["dash", "ash", "sh"],
5686 # bash disguises itself as -bash
5687 "-bash" => ["bash", "sh"],
5688 # ksh disguises itself as -ksh
5689 "-ksh" => ["ksh", "sh"],
5690 # zsh disguises itself as -zsh
5691 "-zsh" => ["zsh", "sh"],
5694 if($^O
eq "linux") {
5695 # Optimized for GNU/Linux
5700 if(open(my $fd, "<", "/proc/$testpid/cmdline")) {
5702 chomp($shellline = <$fd>);
5703 if($shellline =~ /$regexp/o) {
5704 my $shellname = $4 || $8;
5705 my $dash = $3 || $7;
5706 if($shellname eq "sh" and $dash) {
5708 if($shellpath = readlink "/proc/$testpid/exe") {
5709 ::debug
("init","procpath $shellpath\n");
5710 if($shellpath =~ m
:/$shell$:o
) {
5711 ::debug
("init", "proc which ".$shellpath." => ");
5716 ::debug
("init", "which ".$shellname." => ");
5717 $shellpath = (which
($shellname,@
{$fakename{$shellname}}))[0];
5718 ::debug
("init", "shell path $shellpath\n");
5723 if(open(my $fd, "<", "/proc/$testpid/stat")) {
5726 # Parent pid is field 4
5727 $testpid = (split /\s+/, $line)[3];
5729 # Something is wrong: fall back to old method
5734 # if -sh or -csh try readlink /proc/$$/exe
5735 my ($children_of_ref, $parent_of_ref, $name_of_ref) = pid_table
();
5739 if($name_of_ref->{$testpid} =~ /$regexp/o) {
5740 my $shellname = $4 || $8;
5741 my $dash = $3 || $7;
5742 if($shellname eq "sh" and $dash) {
5744 if($shellpath = readlink "/proc/$testpid/exe") {
5745 ::debug
("init","procpath $shellpath\n");
5746 if($shellpath =~ m
:/$shell$:o
) {
5747 ::debug
("init", "proc which ".$shellpath." => ");
5752 ::debug
("init", "which ".$shellname." => ");
5753 $shellpath = (which
($shellname,@
{$fakename{$shellname}}))[0];
5754 ::debug
("init", "shell path $shellpath\n");
5755 $shellpath and last;
5757 if($testpid == $parent_of_ref->{$testpid}) {
5758 # In Solaris zones, the PPID of the zsched process is itself
5761 $testpid = $parent_of_ref->{$testpid};
5768 my %pid_parentpid_cmd;
5772 # %children_of = { pid -> children of pid }
5773 # %parent_of = { pid -> pid of parent }
5774 # %name_of = { pid -> commandname }
5776 if(not %pid_parentpid_cmd) {
5777 # Filter for SysV-style `ps`
5778 my $sysv = q
( ps
-ef
| perl
-ane
'1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;).
5779 q(s/^.{$s}//; print "@F[1,2] $_"' );
5780 # Minix uses cols 2,3 and can have newlines in the command
5781 # so lines not having numbers in cols 2,3 must be ignored
5782 my $minix = q
( ps
-ef
| perl
-ane
'1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;).
5783 q(s/^.{$s}// and $F[2]>0 and $F[3]>0 and print "@F[2,3] $_"' );
5785 my $bsd = q
(ps
-o pid
,ppid
,command
-ax
);
5786 %pid_parentpid_cmd =
5793 'dragonfly' => $bsd,
5807 'syllable' => "echo ps not supported",
5810 $pid_parentpid_cmd{$^O
} or ::die_bug
("pid_parentpid_cmd for $^O missing");
5812 my (@pidtable,%parent_of,%children_of,%name_of);
5813 # Table with pid -> children of pid
5814 @pidtable = `$pid_parentpid_cmd{$^O}`;
5817 # must match: 24436 21224 busybox ash
5818 # must match: 24436 21224 <<empty on MacOSX running cubase>>
5819 # must match: 24436 21224 <<empty on system running Viber>>
5820 # or: perl -e 'while($0=" "){}'
5821 if(/^\s*(\S+)\s+(\S+)\s+(\S+.*)/
5823 /^\s*(\S+)\s+(\S+)\s+()$/) {
5824 $parent_of{$1} = $2;
5825 push @
{$children_of{$2}}, $1;
5828 ::die_bug
("pidtable format: $_");
5831 return(\
%children_of, \
%parent_of, \
%name_of);
5836 # Returns time since epoch as in seconds with 3 decimals
5840 # $time = time now with millisecond accuracy
5841 if(not $Global::use{"Time::HiRes"}) {
5842 if(eval "use Time::HiRes qw ( time );") {
5843 eval "sub TimeHiRestime { return Time::HiRes::time };";
5845 eval "sub TimeHiRestime { return time() };";
5847 $Global::use{"Time::HiRes"} = 1;
5850 return (int(TimeHiRestime
()*1000))/1000;
5854 # Sleep this many milliseconds.
5856 # $ms = milliseconds to sleep
5858 ::debug
("timing",int($ms),"ms ");
5859 select(undef, undef, undef, $ms/1000);
5862 sub __KILLER_REAPER__
() {}
5865 # Reap dead children.
5866 # If no dead children: Sleep specified amount with exponential backoff
5868 # $ms = milliseconds to sleep
5870 # $ms/2+0.001 if children reaped
5871 # $ms*1.1 if no children reaped
5874 if(not $Global::total_completed
% 100) {
5876 # Force cleaning the timeout queue for every 100 jobs
5877 # Fixes potential memleak
5878 $Global::timeoutq
->process_timeouts();
5881 # Sleep exponentially shorter (1/2^n) if a job finished
5885 $Global::timeoutq
->process_timeouts();
5888 kill_youngster_if_not_enough_mem
($opt::memfree
*0.5);
5890 if($opt::memsuspend
) {
5891 kill_youngster_if_not_enough_mem
($opt::memsuspend
*0.5);
5894 kill_youngest_if_over_limit
();
5896 exit_if_disk_full
();
5897 if($opt::linebuffer
) {
5898 my $something_printed = 0;
5899 if($opt::keeporder
) {
5900 for my $job (values %Global::running
) {
5901 $something_printed += $job->print_earlier_jobs();
5904 for my $job (values %Global::running
) {
5905 $something_printed += $job->print();
5908 if($something_printed) {
5913 # When a child dies, wake up from sleep (or select(,,,))
5914 $SIG{CHLD
} = sub { kill "ALRM", $$ };
5916 # The 0.004s is approximately the time it takes for one round
5917 usleep
(1000*($Global::newest_starttime
+
5918 $opt::delay
- 0.004 - ::now
()));
5922 # --compress needs $SIG{CHLD} unset
5923 $SIG{CHLD
} = 'DEFAULT';
5925 # Sleep exponentially longer (1.1^n) if a job did not finish,
5926 # though at most 1000 ms.
5927 return (($ms < 1000) ?
($ms * 1.1) : ($ms));
5931 sub kill_youngest_if_over_limit
() {
5932 # Check each $sshlogin we are over limit
5933 # If over limit: kill off the youngest child
5934 # Put the child back in the queue.
5940 for my $job (values %Global::running
) {
5941 if(not $jobs_of{$job->sshlogin()}) {
5942 push @sshlogins, $job->sshlogin();
5944 push @
{$jobs_of{$job->sshlogin()}}, $job;
5946 for my $sshlogin (@sshlogins) {
5947 for my $job (sort { $b->seq() <=> $a->seq() } @
{$jobs_of{$sshlogin}}) {
5948 if($sshlogin->limit() == 2) {
5956 sub kill_youngster_if_not_enough_mem
() {
5957 # Check each $sshlogin if there is enough mem.
5958 # If less than 50% enough free mem: kill off the youngest child
5959 # Put the child back in the queue.
5966 for my $job (values %Global::running
) {
5967 if(not $jobs_of{$job->sshlogin()}) {
5968 push @sshlogins, $job->sshlogin();
5970 push @
{$jobs_of{$job->sshlogin()}}, $job;
5972 for my $sshlogin (@sshlogins) {
5973 for my $job (sort { $b->seq() <=> $a->seq() } @
{$jobs_of{$sshlogin}}) {
5974 if($sshlogin->memfree() < $limit) {
5975 ::debug
("mem","\n",map { $_->seq()." " }
5976 (sort { $b->seq() <=> $a->seq() }
5977 @
{$jobs_of{$sshlogin}}));
5978 ::debug
("mem","\n", $job->seq(), "killed ",
5979 $sshlogin->memfree()," < ",$limit);
5980 if($opt::memsuspend
) {
5985 $sshlogin->memfree_recompute();
5990 ::debug
("mem","Free mem OK? ",
5991 $sshlogin->memfree()," > ",$limit);
5996 sub __DEBUGGING__
() {}
6004 $Global::debug
or return;
6005 @_ = grep { defined $_ ?
$_ : "" } @_;
6006 if($Global::debug
eq "all" or $Global::debug
eq $_[0]) {
6007 if($Global::fd
{1}) {
6008 # Original stdout was saved
6009 my $stdout = $Global::fd
{1};
6010 print $stdout @_[1..$#_];
6017 sub my_memory_usage
() {
6019 # memory usage if found
6026 if(-e
"/proc/$pid/stat") {
6027 my $fh = FileHandle
->new("</proc/$pid/stat");
6033 my @procinfo = split(/\s+/,$data);
6035 return undef_as_zero
($procinfo[22]);
6043 # $size = size of object if Devel::Size is installed
6045 my @size_this = (@_);
6046 eval "use Devel::Size qw(size total_size)";
6050 return total_size(@_);
6056 # ascii expression of object if Data::Dump(er) is installed
6057 # error code otherwise
6058 my @dump_this = (@_);
6059 eval "use Data
::Dump
qw(dump);";
6061 # Data::Dump not installed
6062 eval "use Data
::Dumper
;";
6064 my $err = "Neither Data
::Dump nor Data
::Dumper is installed
\n".
6065 "Not dumping output
\n";
6069 return Dumper(@dump_this);
6072 # Create a dummy Data::Dump:dump as Hans Schou sometimes has
6074 eval "sub Data
::Dump
:dump {}";
6075 eval "use Data
::Dump
qw(dump);";
6076 return (Data::Dump::dump(@dump_this));
6093 sub __OBJECT_ORIENTED_PARTS__() {}
6100 my $sshlogin_string = shift;
6103 # SSHLogins can have these formats:
6104 # @grp+grp/ncpu//usr/bin/ssh user@server
6105 # ncpu//usr/bin/ssh user@server
6106 # /usr/bin/ssh user@server
6109 # @grp+grp/user@server
6110 if($sshlogin_string =~ s:^\@([^/]+)/?::) {
6111 # Look for SSHLogin hostgroups
6112 %hostgroups = map { $_ => 1 } split(/\+/, $1);
6114 # An SSHLogin is always in the hostgroup of its "numcpu
/host
"
6115 $hostgroups{$sshlogin_string} = 1;
6116 if ($sshlogin_string =~ s:^(\d+)/::) {
6117 # Override default autodetected ncpus unless missing
6120 my $string = $sshlogin_string;
6121 # An SSHLogin is always in the hostgroup of its $string-name
6122 $hostgroups{$string} = 1;
6123 @Global::hostgroups{keys %hostgroups} = values %hostgroups;
6125 my $no_slash_string = $string;
6126 $no_slash_string =~ s/[^-a-z0-9:]/_/gi;
6128 'string' => $string,
6129 'jobs_running' => 0,
6130 'jobs_completed' => 0,
6131 'maxlength' => undef,
6132 'max_jobs_running' => undef,
6133 'orig_max_jobs_running' => undef,
6135 'hostgroups' => \%hostgroups,
6136 'sshcommand' => undef,
6137 'serverlogin' => undef,
6138 'control_path_dir' => undef,
6139 'control_path' => undef,
6140 'time_to_login' => undef,
6141 'last_login_at' => undef,
6142 'loadavg_file' => $Global::cache_dir . "/tmp/sshlogin
/" .
6143 $no_slash_string . "/loadavg
",
6145 'last_loadavg_update' => 0,
6146 'swap_activity_file' => $Global::cache_dir . "/tmp/sshlogin
/" .
6147 $no_slash_string . "/swap_activity
",
6148 'swap_activity' => undef,
6149 }, ref($class) || $class;
6154 # Remove temporary files if they are created.
6155 ::rm($self->{'loadavg_file'});
6156 ::rm($self->{'swap_activity_file'});
6161 return $self->{'string'};
6164 sub jobs_running($) {
6166 return ($self->{'jobs_running'} || "0");
6169 sub inc_jobs_running($) {
6171 $self->{'jobs_running'}++;
6174 sub dec_jobs_running($) {
6176 $self->{'jobs_running'}--;
6179 sub set_maxlength($$) {
6181 $self->{'maxlength'} = shift;
6186 return $self->{'maxlength'};
6189 sub jobs_completed() {
6191 return $self->{'jobs_completed'};
6194 sub in_hostgroups() {
6196 # @hostgroups = the hostgroups to look for
6198 # true if intersection of @hostgroups and the hostgroups of this
6199 # SSHLogin is non-empty
6201 return grep { defined $self->{'hostgroups'}{$_} } @_;
6206 return keys %{$self->{'hostgroups'}};
6209 sub inc_jobs_completed($) {
6211 $self->{'jobs_completed'}++;
6212 $Global::total_completed++;
6215 sub set_max_jobs_running($$) {
6217 if(defined $self->{'max_jobs_running'}) {
6218 $Global::max_jobs_running -= $self->{'max_jobs_running'};
6220 $self->{'max_jobs_running'} = shift;
6221 if(defined $self->{'max_jobs_running'}) {
6222 # max_jobs_running could be resat if -j is a changed file
6223 $Global::max_jobs_running += $self->{'max_jobs_running'};
6225 # Initialize orig to the first non-zero value that comes around
6226 $self->{'orig_max_jobs_running'} ||= $self->{'max_jobs_running'};
6233 $self->memfree_recompute();
6234 # Return 1 if not defined.
6235 return (not defined $self->{'memfree'} or $self->{'memfree'})
6238 sub memfree_recompute() {
6240 my $script = memfreescript();
6242 # TODO add sshlogin and backgrounding
6243 # Run the script twice if it gives 0 (typically intermittent error)
6244 $self->{'memfree'} = ::qqx($script) || ::qqx($script);
6245 if(not $self->{'memfree'}) {
6246 ::die_bug("Less than
1 byte memory free
");
6248 #::debug("mem
","New free
:",$self->{'memfree'}," ");
6254 sub memfreescript() {
6256 # shellscript for giving available memory in bytes
6267 awk '/^((Swap)?Cached|MemFree|Buffers):/
6268 { sum += \$2} END { print sum }'
6271 # Android uses same code as GNU/Linux
6275 awk '/^((Swap)?Cached|MemFree|Buffers):/
6276 { sum += \$2} END { print sum }'
6280 # procs memory page faults cpu
6281 # r b w avm free re at pi po fr de sr in sy cs us sy id
6282 # 1 0 0 242793 389737 5 1 0 0 0 0 0 107 978 60 1 1 99
6285 print (((reverse `vmstat 1 1`)[0]
6286 =~ /(?:\d+\D+){4}(\d
+)/)[0]*1024)
6289 # kthr memory page disk faults cpu
6290 # r b w swap free re mf pi po fr de sr s3 s4 -- -- in sy cs us sy id
6291 # 0 0 0 6496720 5170320 68 260 8 2 1 0 0 -0 3 0 0 309 1371 255 1 2 97
6292 # 0 0 0 6434088 5072656 7 15 8 0 0 0 0 0 261 0 0 1889 1899 3222 0 8 92
6294 # The second free value is correct
6297 print (((reverse `vmstat 1 2`)[0]
6298 =~ /(?:\d+\D+){4}(\d
+)/)[0]*1024)
6301 # vm.stats.vm.v_cache_count: 0
6302 # vm.stats.vm.v_inactive_count: 79574
6303 # vm.stats.vm.v_free_count: 4507
6306 for(qx{/sbin/sysctl -a}) {
6307 if (/^([^:]+):\s+(.+)\s*$/s) {
6311 print $sysctl->{"hw.pagesize"} *
6312 ($sysctl->{"vm.stats.vm.v_cache_count"}
6313 + $sysctl->{"vm.stats.vm.v_inactive_count"}
6314 + $sysctl->{"vm.stats.vm.v_free_count"});
6316 # Mach Virtual Memory Statistics: (page size of 4096 bytes)
6317 # Pages free: 198061.
6318 # Pages active: 159701.
6319 # Pages inactive: 47378.
6320 # Pages speculative: 29707.
6321 # Pages wired down: 89231.
6322 # "Translation faults": 928901425.
6323 # Pages copy-on-write: 156988239.
6324 # Pages zero filled: 271267894.
6325 # Pages reactivated: 48895.
6328 # Object cache: 6603 hits of 1713223 lookups (0% hit rate)
6332 print (($vm =~ /page size of (\d+)/)[0] *
6333 (($vm =~ /Pages free:\s+(\d+)/)[0] +
6334 ($vm =~ /Pages inactive:\s+(\d+)/)[0]));
6337 my $perlscript = "";
6338 # Make a perl script that detects the OS ($^O) and runs
6339 # the appropriate command
6340 for my $os (keys %script_of) {
6341 $perlscript .= 'if($^O eq "'.$os.'") { '.$script_of{$os}.'}';
6343 $script = "perl -e " . ::Q(::spacefree(1,$perlscript));
6351 # 0 = Below limit. Start another job.
6352 # 1 = Over limit. Start no jobs.
6353 # 2 = Kill youngest job
6356 if(not defined $self->{'limitscript'}) {
6362 # Do the measurement in the background
6364 LANG=C iostat -x 1 2 > $tmp;
6366 perl -e '-e $ARGV[0] or exit(1);
6369 /(\S+)$/ and $max = $max > $1 ? $max : $1; }
6370 exit ($max < '$limit')' $io_file;
6378 awk '/^((Swap)?Cached|MemFree|Buffers):/{ sum += $2}
6380 if (sum*1024 < '$limit'/2) { exit 2; }
6381 else { exit (sum*1024 < '$limit') }
6390 ps ax -o state,command |
6391 grep -E '^[DOR].[^[]' |
6393 perl -ne 'exit ('$limit' < $_)';
6399 my ($cmd,@args) = split /\s+/,$opt::limit;
6400 if($limitscripts{$cmd}) {
6401 my $tmpfile = ::tmpname("parlmt");
6402 ++$Global::unlink{$tmpfile};
6403 $self->{'limitscript'} =
6404 ::spacefree(1, sprintf($limitscripts{$cmd},
6405 ::multiply_binary_prefix(@args),$tmpfile));
6407 $self->{'limitscript'} = $opt::limit;
6413 $ENV{'SSHLOGIN'} = $self->string();
6414 system($Global::shell,"-c",$self->{'limitscript'});
6415 ::debug("limit","limit `".$self->{'limitscript'}."` result ".($?>>8)."\n");
6422 my $swapping = $self->swap_activity();
6423 return (not defined $swapping or $swapping)
6426 sub swap_activity($) {
6427 # If the currently known swap activity is too old:
6428 # Recompute a new one in the background
6430 # last swap activity computed
6432 # Should we update the swap_activity file?
6433 my $update_swap_activity_file = 0;
6434 if(-r $self->{'swap_activity_file'}) {
6435 open(my $swap_fh, "<", $self->{'swap_activity_file'}) ||
6436 ::die_bug("swap_activity_file-r");
6437 my $swap_out = <$swap_fh>;
6439 if($swap_out =~ /^(\d+)$/) {
6440 $self->{'swap_activity'} = $1;
6441 ::debug("swap", "New swap_activity: ", $self->{'swap_activity'});
6443 ::debug("swap", "Last update: ", $self->{'last_swap_activity_update'});
6444 if(time - $self->{'last_swap_activity_update'} > 10) {
6445 # last swap activity update was started 10 seconds ago
6446 ::debug("swap", "Older than 10 sec: ", $self->{'swap_activity_file'});
6447 $update_swap_activity_file = 1;
6450 ::debug("swap", "No swap_activity file: ", $self->{'swap_activity_file'});
6451 $self->{'swap_activity'} = undef;
6452 $update_swap_activity_file = 1;
6454 if($update_swap_activity_file) {
6455 ::debug("swap", "Updating swap_activity file ", $self->{'swap_activity_file'});
6456 $self->{'last_swap_activity_update'} = time;
6457 my $dir = ::dirname($self->{'swap_activity_file'});
6458 -d $dir or eval { File::Path::mkpath($dir); };
6460 $swap_activity = swapactivityscript();
6461 if($self->{'string'} ne ":") {
6462 $swap_activity = $self->sshcommand() . " " . $self->serverlogin() . " " .
6463 ::Q($swap_activity);
6465 # Run swap_activity measuring.
6466 # As the command can take long to run if run remote
6467 # save it to a tmp file before moving it to the correct file
6468 my $file = $self->{'swap_activity_file'};
6469 my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".swp");
6470 ::debug("swap", "\n", $swap_activity, "\n");
6471 ::qqx("($swap_activity > $tmpfile && mv $tmpfile $file || rm $tmpfile &)");
6473 return $self->{'swap_activity'};
6479 sub swapactivityscript() {
6481 # shellscript for detecting swap activity
6483 # arguments for vmstat are OS dependant
6484 # swap_in and swap_out are in different columns depending on OS
6490 # procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
6491 # r b swpd free buff cache si so bi bo in cs us sy id wa
6492 # 5 0 51208 1701096 198012 18857888 0 0 37 153 28 19 56 11 33 1
6493 # 3 0 51208 1701288 198012 18857972 0 0 0 0 3638 10412 15 3 82 0
6494 'linux' => ['vmstat 1 2 | tail -n1', '$7*$8'],
6498 # kthr memory page disk faults cpu
6499 # r b w swap free si so pi po fr de sr s3 s4 -- -- in sy cs us sy id
6500 # 0 0 0 4628952 3208408 0 0 3 1 1 0 0 -0 2 0 0 263 613 246 1 2 97
6501 # 0 0 0 4552504 3166360 0 0 0 0 0 0 0 0 0 0 0 246 213 240 1 1 98
6502 'solaris' => ['vmstat -S 1 2 | tail -1', '$6*$7'],
6504 # darwin (macosx): $21*$22
6506 # Mach Virtual Memory Statistics: (page size of 4096 bytes)
6507 # free active specul inactive throttle wired prgable faults copy 0fill reactive purged file-backed anonymous cmprssed cmprssor dcomprs comprs pageins pageout swapins swapouts
6508 # 346306 829050 74871 606027 0 240231 90367 544858K 62343596 270837K 14178 415070 570102 939846 356 370 116 922 4019813 4 0 0
6509 # 345740 830383 74875 606031 0 239234 90369 2696 359 553 0 0 570110 941179 356 370 0 0 0 0 0 0
6510 'darwin' => ['vm_stat -c 2 1 | tail -n1', '$21*$22'],
6514 # procs faults cpu memory page disk
6515 # r b w in sy cs us sy id avm fre si so pi po fr de sr s0
6516 # 1 0 0 4 23 2 3 0 97 7743 217k 0 0 0 0 0 0 0 0
6517 # 1 0 0 6 40 8 0 1 99 7743 217k 0 0 3 0 0 0 0 0
6518 'ultrix' => ['vmstat -S 1 2 | tail -1', '$12*$13'],
6522 # System configuration: lcpu=1 mem=2048MB
6524 # kthr memory page faults cpu
6525 # ----- ----------- ------------------------ ------------ -----------
6526 # r b avm fre re pi po fr sr cy in sy cs us sy id wa
6527 # 0 0 333933 241803 0 0 0 0 0 0 10 143 90 0 0 99 0
6528 # 0 0 334125 241569 0 0 0 0 0 0 37 5368 184 0 9 86 5
6529 'aix' => ['vmstat 1 2 | tail -n1', '$6*$7'],
6533 # procs memory page disks faults cpu
6534 # r b w avm fre flt re pi po fr sr ad0 ad1 in sy cs us sy id
6535 # 1 0 0 596716 19560 32 0 0 0 33 8 0 0 11 220 277 0 0 99
6536 # 0 0 0 596716 19560 2 0 0 0 0 0 0 0 11 144 263 0 1 99
6537 'freebsd' => ['vmstat -H 1 2 | tail -n1', '$8*$9'],
6541 # procs memory page disks traps cpu
6542 # r b w avm fre flt re pi po fr sr wd0 cd0 int sys cs us sy id
6543 # 0 0 0 25776 164968 34 0 0 0 0 0 0 0 230 259 38 4 0 96
6544 # 0 0 0 25776 164968 24 0 0 0 0 0 0 0 237 275 37 0 0 100
6545 'mirbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'],
6549 # procs memory page disks faults cpu
6550 # r b avm fre flt re pi po fr sr w0 w1 in sy cs us sy id
6551 # 0 0 138452 6012 54 0 0 0 1 2 3 0 4 100 23 0 0 100
6552 # 0 0 138456 6008 1 0 0 0 0 0 0 0 7 26 19 0 0 100
6553 'netbsd' => ['vmstat 1 2 | tail -n1', '$7*$8'],
6557 # procs memory page disks traps cpu
6558 # r b w avm fre flt re pi po fr sr wd0 wd1 int sys cs us sy id
6559 # 0 0 0 76596 109944 73 0 0 0 0 0 0 1 5 259 22 0 1 99
6560 # 0 0 0 76604 109936 24 0 0 0 0 0 0 0 7 114 20 0 1 99
6561 'openbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'],
6565 # procs memory page faults cpu
6566 # r b w avm free re at pi po fr de sr in sy cs us sy id
6567 # 1 0 0 247211 216476 4 1 0 0 0 0 0 102 73005 54 6 11 83
6568 # 1 0 0 247211 216421 43 9 0 0 0 0 0 144 1675 96 25269512791222387000 25269512791222387000 105
6569 'hpux' => ['vmstat 1 2 | tail -n1', '$8*$9'],
6571 # dec_osf (tru64): $11*$12
6573 # Virtual Memory Statistics: (pagesize = 8192)
6574 # procs memory pages intr cpu
6575 # r w u act free wire fault cow zero react pin pout in sy cs us sy id
6576 # 3 181 36 51K 1895 8696 348M 59M 122M 259 79M 0 5 218 302 4 1 94
6577 # 3 181 36 51K 1893 8696 3 15 21 0 28 0 4 81 321 1 1 98
6578 'dec_osf' => ['vmstat 1 2 | tail -n1', '$11*$12'],
6582 # (pagesize: 4, size: 512288, swap size: 894972)
6583 # free actv inact wired zeroed react pgins pgouts pfaults cowpfs hrat caobj cache swfree
6584 # 371940 30844 89228 20276 298348 0 48192 19016 756105 99808 98% 876 20628 894972
6585 # 371940 30844 89228 20276 +0 +0 +0 +0 +42 +2 98% 876 20628 894972
6586 'gnu' => ['vmstat -k 1 2 | tail -n1', '$7*$8'],
6588 # -nto (qnx has no swap)
6592 my $perlscript = "";
6593 # Make a perl script that detects the OS ($^O) and runs
6594 # the appropriate vmstat command
6595 for my $os (keys %vmstat) {
6596 $vmstat{$os}[1] =~ s/\$/\\\\\\\$/g; # $ => \\\$
6597 $perlscript .= 'if($^O eq "'.$os.'") { print `'.$vmstat{$os}[0].' | awk "{print ' .
6598 $vmstat{$os}[1] . '}"` }';
6600 $script = "perl -e " . ::Q($perlscript);
6606 sub too_fast_remote_login($) {
6608 if($self->{'last_login_at'} and $self->{'time_to_login'}) {
6609 # sshd normally allows 10 simultaneous logins
6610 # A login takes time_to_login
6611 # So time_to_login/5 should be safe
6612 # If now <= last_login + time_to_login/5: Then it is too soon.
6613 my $too_fast = (::now() <= $self->{'last_login_at'}
6614 + $self->{'time_to_login'}/5);
6615 ::debug("run", "Too fast? $too_fast ");
6618 # No logins so far (or time_to_login not computed): it is not too fast
6623 sub last_login_at($) {
6625 return $self->{'last_login_at'};
6628 sub set_last_login_at($$) {
6630 $self->{'last_login_at'} = shift;
6633 sub loadavg_too_high($) {
6635 my $loadavg = $self->loadavg();
6636 if(defined $loadavg) {
6637 ::debug("load", "Load $loadavg > ",$self->max_loadavg());
6638 return $loadavg >= $self->max_loadavg();
6640 # Unknown load: Assume load is too high
6649 # aix => "ps -ae -o state,command" # state wrong
6650 # bsd => "ps ax -o state,command"
6651 # sysv => "ps -ef -o s -o comm"
6652 # cygwin => perl -ne 'close STDERR; /Name/ and print"\n"; \
6653 # /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status |
6654 # awk '{print $2,$1}'
6659 # hpux => ps -el|awk '{print $2,$14,$15}'
6660 # irix => ps -ef -o state -o comm
6662 # minix => ps el|awk '{print \$1,\$11}'
6668 # ultrix => ps -ax | awk '{print $3,$5}'
6669 # unixware => ps -el|awk '{print $2,$14,$15}'
6670 my $ps = ::spacefree(1,q{
6671 $sysv="ps -ef -o s -o comm";
6672 $sysv2="ps -ef -o state -o comm";
6673 $bsd="ps ax -o state,command";
6674 # Treat threads as processes
6675 $bsd2="ps axH -o state,command";
6676 $psel="ps -el|awk '{ print \$2,\$14,\$15 }'";
6677 $cygwin=q{ perl -ne 'close STDERR; /Name/ and print"\n";
6678 /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status |
6679 awk '{print $2,$1}' };
6680 $dummy="echo S COMMAND;echo R dummy";
6682 # TODO Find better code for AIX/Android
6684 'android' => "uptime",
6685 'cygwin' => $cygwin,
6687 'dec_osf' => $sysv2,
6688 'dragonfly' => $bsd,
6694 'minix' => "ps el|awk '{print \$1,\$11}'",
6702 'ultrix' => "ps -ax | awk '{print \$3,\$5}'",
6707 # The command is too long for csh, so base64_wrap the command
6708 $cmd = Job::base64_wrap($ps);
6716 # If the currently know loadavg is too old:
6717 # Recompute a new one in the background
6718 # The load average is computed as the number of processes waiting for disk
6719 # or CPU right now. So it is the server load this instant and not averaged over
6720 # several minutes. This is needed so GNU Parallel will at most start one job
6721 # that will push the load over the limit.
6724 # $last_loadavg = last load average computed (undef if none)
6726 # Should we update the loadavg file?
6727 my $update_loadavg_file = 0;
6728 if(open(my $load_fh, "<", $self->{'loadavg_file'})) {
6729 local $/; # $/ = undef => slurp whole file
6730 my $load_out = <$load_fh>;
6732 if($load_out =~ /\S/) {
6733 # Content can be empty if ~/ is on NFS
6734 # due to reading being non-atomic.
6736 # Count lines starting with D,O,R but command does not start with [
6737 my $load =()= ($load_out=~/(^\s?[DOR]\S* +(?=[^\[])\S)/gm);
6739 # load is overestimated by 1
6740 $self->{'loadavg'} = $load - 1;
6741 ::debug("load", "New loadavg: ", $self->{'loadavg'},"\n");
6742 } elsif ($load_out=~/average: (\d+.\d+)/) {
6743 # AIX does not support instant load average
6744 # 04:11AM up 21 days, 12:55, 1 user, load average: 1.85, 1.57, 1.55
6745 $self->{'loadavg'} = $1;
6747 ::die_bug("loadavg_invalid_content: " .
6748 $self->{'loadavg_file'} . "\n$load_out");
6751 $update_loadavg_file = 1;
6753 ::debug("load", "No loadavg file: ", $self->{'loadavg_file'});
6754 $self->{'loadavg'} = undef;
6755 $update_loadavg_file = 1;
6757 if($update_loadavg_file) {
6758 ::debug("load", "Updating loadavg file", $self->{'loadavg_file'}, "\n");
6759 $self->{'last_loadavg_update'} = time;
6760 my $dir = ::dirname($self->{'swap_activity_file'});
6761 -d $dir or eval { File::Path::mkpath($dir); };
6762 -w $dir or ::die_bug("Cannot write to $dir");
6764 if($self->{'string'} ne ":") {
6765 $cmd = $self->sshcommand() . " " . $self->serverlogin() . " " .
6768 $cmd .= loadavg_cmd();
6770 # As the command can take long to run if run remote
6771 # save it to a tmp file before moving it to the correct file
6772 ::debug("load", "Update load\n");
6773 my $file = $self->{'loadavg_file'};
6774 # tmpfile on same filesystem as $file
6775 my $tmpfile = $file.$$;
6776 ::qqx("($cmd > $tmpfile && mv $tmpfile $file || rm $tmpfile & )");
6778 return $self->{'loadavg'};
6781 sub max_loadavg($) {
6783 # If --load is a file it might be changed
6784 if($Global::max_load_file) {
6785 my $mtime = (stat($Global::max_load_file))[9];
6786 if($mtime > $Global::max_load_file_last_mod) {
6787 $Global::max_load_file_last_mod = $mtime;
6788 for my $sshlogin (values %Global::host) {
6789 $sshlogin->set_max_loadavg(undef);
6793 if(not defined $self->{'max_loadavg'}) {
6794 $self->{'max_loadavg'} =
6795 $self->compute_max_loadavg($opt::load);
6797 ::debug("load", "max_loadavg: ", $self->string(), " ", $self->{'max_loadavg'});
6798 return $self->{'max_loadavg'};
6801 sub set_max_loadavg($$) {
6803 $self->{'max_loadavg'} = shift;
6806 sub compute_max_loadavg($) {
6807 # Parse the max loadaverage that the user asked for using --load
6811 my $loadspec = shift;
6813 if(defined $loadspec) {
6814 if($loadspec =~ /^\+(\d+)$/) {
6818 $self->ncpus() + $j;
6819 } elsif ($loadspec =~ /^-(\d+)$/) {
6823 $self->ncpus() - $j;
6824 } elsif ($loadspec =~ /^(\d+)\%$/) {
6827 $self->ncpus() * $j / 100;
6828 } elsif ($loadspec =~ /^(\d+(\.\d+)?)$/) {
6830 } elsif (-f $loadspec) {
6831 $Global::max_load_file = $loadspec;
6832 $Global::max_load_file_last_mod = (stat($Global::max_load_file))[9];
6833 if(open(my $in_fh, "<", $Global::max_load_file)) {
6834 my $opt_load_file = join("",<$in_fh>);
6836 $load = $self->compute_max_loadavg($opt_load_file);
6838 ::error("Cannot open $loadspec.");
6839 ::wait_and_exit(255);
6842 ::error("Parsing of --load failed.");
6852 sub time_to_login($) {
6854 return $self->{'time_to_login'};
6857 sub set_time_to_login($$) {
6859 $self->{'time_to_login'} = shift;
6862 sub max_jobs_running($) {
6864 if(not defined $self->{'max_jobs_running'}) {
6865 my $nproc = $self->compute_number_of_processes($opt::jobs);
6866 $self->set_max_jobs_running($nproc);
6868 return $self->{'max_jobs_running'};
6871 sub orig_max_jobs_running($) {
6873 return $self->{'orig_max_jobs_running'};
6876 sub compute_number_of_processes($) {
6877 # Number of processes wanted and limited by system resources
6879 # Number of processes
6882 my $wanted_processes = $self->user_requested_processes($opt_P);
6883 if(not defined $wanted_processes) {
6884 $wanted_processes = $Global::default_simultaneous_sshlogins;
6886 ::debug("load", "Wanted procs: $wanted_processes\n");
6888 $self->processes_available_by_system_limit($wanted_processes);
6889 ::debug("load", "Limited to procs: $system_limit\n");
6890 return $system_limit;
6895 my $max_system_proc_reached;
6896 my $more_filehandles;
6899 my $count_jobs_already_read;
6905 sub reserve_filehandles($) {
6906 # Reserves filehandle
6909 $more_filehandles &&= open($fh{$tmpfhname++}, "<", "/dev/null");
6913 sub reserve_process() {
6914 # Spawn a dummy process
6916 if($child = fork()) {
6917 push @children, $child;
6918 $Global::unkilled_children{$child} = 1;
6919 } elsif(defined $child) {
6921 # The child takes one process slot
6922 # It will be killed later
6923 $SIG{'TERM'} = $Global::original_sig{'TERM'};
6924 if($^O eq "cygwin" or $^O eq "msys" or $^O eq "nto") {
6925 # The exec does not work on Cygwin and QNX
6928 # 'exec sleep' takes less RAM than sleeping in perl
6929 exec 'sleep', 10101;
6934 $max_system_proc_reached = 1;
6938 sub get_args_or_jobs() {
6939 # Get an arg or a job (depending on mode)
6940 if($Global::semaphore or ($opt::pipe and not $opt::tee)) {
6941 # Skip: No need to get args
6943 } elsif(defined $opt::retries and $count_jobs_already_read) {
6944 # For retries we may need to run all jobs on this sshlogin
6945 # so include the already read jobs for this sshlogin
6946 $count_jobs_already_read--;
6949 if($opt::X or $opt::m) {
6950 # The arguments may have to be re-spread over several jobslots
6951 # So pessimistically only read one arg per jobslot
6952 # instead of a full commandline
6953 if($Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->empty()) {
6954 if($Global::JobQueue->empty()) {
6957 $job = $Global::JobQueue->get();
6962 $arg = $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get();
6967 # If there are no more command lines, then we have a process
6968 # per command line, so no need to go further
6969 if($Global::JobQueue->empty()) {
6972 $job = $Global::JobQueue->get();
6973 # Replacement must happen here due to seq()
6974 $job and $job->replaced();
6983 # Cleanup: Close the files
6984 for (values %fh) { close $_ }
6985 # Cleanup: Kill the children
6986 for my $pid (@children) {
6989 delete $Global::unkilled_children{$pid};
6991 # Cleanup: Unget the command_lines or the @args
6992 $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->unget(@args);
6994 $Global::JobQueue->unget(@jobs);
6998 sub processes_available_by_system_limit($) {
6999 # If the wanted number of processes is bigger than the system limits:
7000 # Limit them to the system limits
7001 # Limits are: File handles, number of input lines, processes,
7002 # and taking > 1 second to spawn 10 extra processes
7004 # Number of processes
7006 my $wanted_processes = shift;
7007 my $system_limit = 0;
7008 my $slow_spawning_warning_printed = 0;
7010 $more_filehandles = 1;
7011 $tmpfhname = "TmpFhNamE";
7013 # perl uses 7 filehandles for something?
7014 # parallel uses 1 for memory_usage
7015 # parallel uses 4 for ?
7016 reserve_filehandles(12);
7017 # Two processes for load avg and ?
7021 # For --retries count also jobs already run
7022 $count_jobs_already_read = $Global::JobQueue->next_seq();
7023 my $wait_time_for_getting_args = 0;
7024 my $start_time = time;
7026 $system_limit >= $wanted_processes and last;
7027 not $more_filehandles and last;
7028 $max_system_proc_reached and last;
7030 my $before_getting_arg = time;
7031 if(!$Global::dummy_jobs) {
7032 get_args_or_jobs() or last;
7034 $wait_time_for_getting_args += time - $before_getting_arg;
7037 # Every simultaneous process uses 2 filehandles to write to
7038 # and 2 filehandles to read from
7039 reserve_filehandles(4);
7041 # System process limit
7044 my $forktime = time - $time - $wait_time_for_getting_args;
7045 ::debug("run", "Time to fork $system_limit procs: $wait_time_for_getting_args ",
7047 " (processes so far: ", $system_limit,")\n");
7048 if($system_limit > 10 and
7050 $forktime > $system_limit * 0.01
7051 and not $slow_spawning_warning_printed) {
7052 # It took more than 0.01 second to fork a processes on avg.
7053 # Give the user a warning. He can press Ctrl-C if this
7055 ::warning("Starting $system_limit processes took > $forktime sec.",
7056 "Consider adjusting -j. Press CTRL-C to stop.");
7057 $slow_spawning_warning_printed = 1;
7062 if($system_limit < $wanted_processes) {
7063 # The system_limit is less than the wanted_processes
7064 if($system_limit < 1 and not $Global::JobQueue->empty()) {
7065 ::warning("Cannot spawn any jobs. ".
7066 "Raising ulimit -u or 'nproc' in /etc/security/limits.conf",
7067 "or /proc/sys/kernel/pid_max may help.");
7068 ::wait_and_exit(255);
7070 if(not $more_filehandles) {
7071 ::warning("Only enough file handles to run ".
7072 $system_limit. " jobs in parallel.",
7073 "Running 'parallel -j0 -N $system_limit --pipe parallel -j0' or",
7074 "raising 'ulimit -n' or 'nofile' in /etc/security/limits.conf",
7075 "or /proc/sys/fs/file-max may help.");
7077 if($max_system_proc_reached) {
7078 ::warning("Only enough available processes to run ".
7079 $system_limit. " jobs in parallel.",
7080 "Raising ulimit -u or /etc/security/limits.conf ",
7081 "or /proc/sys/kernel/pid_max may help.");
7084 if($] == 5.008008 and $system_limit > 1000) {
7085 # https://savannah.gnu.org/bugs/?36942
7086 $system_limit = 1000;
7088 if($Global::JobQueue->empty()) {
7089 $system_limit ||= 1;
7091 if($self->string() ne ":" and
7092 $system_limit > $Global::default_simultaneous_sshlogins) {
7094 $self->simultaneous_sshlogin_limit($system_limit);
7096 return $system_limit;
7100 sub simultaneous_sshlogin_limit($) {
7101 # Test by logging in wanted number of times simultaneously
7103 # min($wanted_processes,$working_simultaneous_ssh_logins-1)
7105 my $wanted_processes = shift;
7106 if($self->{'time_to_login'}) {
7107 return $wanted_processes;
7110 # Try twice because it guesses wrong sometimes
7111 # Choose the minimal
7113 ::min($self->simultaneous_sshlogin($wanted_processes),
7114 $self->simultaneous_sshlogin($wanted_processes));
7115 if($ssh_limit < $wanted_processes) {
7116 my $serverlogin = $self->serverlogin();
7117 ::warning("ssh to $serverlogin only allows ".
7118 "for $ssh_limit simultaneous logins.",
7119 "You may raise this by changing",
7120 "/etc/ssh/sshd_config:MaxStartups and MaxSessions on $serverlogin.",
7121 "You can also try --sshdelay 0.1",
7122 "Using only ".($ssh_limit-1)." connections ".
7123 "to avoid race conditions.");
7124 # Race condition can cause problem if using all sshs.
7125 if($ssh_limit > 1) { $ssh_limit -= 1; }
7130 sub simultaneous_sshlogin($) {
7131 # Using $sshlogin try to see if we can do $wanted_processes
7132 # simultaneous logins
7133 # (ssh host echo simul-login & ssh host echo simul-login & ...) |
7136 # $wanted_processes = Try for this many logins in parallel
7138 # $ssh_limit = Number of succesful parallel logins
7141 my $wanted_processes = shift;
7142 my $sshcmd = $self->sshcommand();
7143 my $serverlogin = $self->serverlogin();
7144 my $sshdelay = $opt::sshdelay ? "sleep $opt::sshdelay;" : "";
7145 # TODO sh -c wrapper to work for csh
7146 my $cmd = ("$sshdelay$sshcmd $serverlogin -- ".
7147 "echo simultaneouslogin </dev/null 2>&1 &")x$wanted_processes;
7148 ::debug("init", "Trying $wanted_processes logins at $serverlogin\n");
7149 open (my $simul_fh, "-|", "($cmd)|grep simultaneouslogin | wc -l") or
7150 ::die_bug("simultaneouslogin");
7151 my $ssh_limit = <$simul_fh>;
7159 $self->{'ncpus'} = shift;
7162 sub user_requested_processes($) {
7163 # Parse the number of processes that the user asked for using -j
7165 # $opt_P = string formatted as for -P
7167 # $processes = the number of processes to run on this sshlogin
7171 if(defined $opt_P) {
7172 if($opt_P =~ /^\+(\d+)$/) {
7176 $self->ncpus() + $j;
7177 } elsif ($opt_P =~ /^-(\d+)$/) {
7181 $self->ncpus() - $j;
7182 } elsif ($opt_P =~ /^(\d+(\.\d+)?)\%$/) {
7186 $self->ncpus() * $j / 100;
7187 } elsif ($opt_P =~ /^(\d+)$/) {
7189 if($processes == 0) {
7190 # -P 0 = infinity (or at least close)
7191 $processes = $Global::infinity;
7193 } elsif (-f $opt_P) {
7194 $Global::max_procs_file = $opt_P;
7195 if(open(my $in_fh, "<", $Global::max_procs_file)) {
7196 my $opt_P_file = join("",<$in_fh>);
7198 $processes = $self->user_requested_processes($opt_P_file);
7200 ::error("Cannot open $opt_P.");
7201 ::wait_and_exit(255);
7204 ::error("Parsing of --jobs/-j/--max-procs/-P failed.");
7207 $processes = ::ceil($processes);
7213 # Number of CPU threads
7214 # --use_sockets_instead_of_threads = count socket instead
7215 # --use_cores_instead_of_threads = count physical cores instead
7217 # $ncpus = number of cpu (threads) on this sshlogin
7220 if(not defined $self->{'ncpus'}) {
7221 my $sshcmd = $self->sshcommand();
7222 my $serverlogin = $self->serverlogin();
7223 if($serverlogin eq ":") {
7224 if($opt::use_sockets_instead_of_threads) {
7225 $self->{'ncpus'} = socket_core_thread()->{'sockets'};
7226 } elsif($opt::use_cores_instead_of_threads) {
7227 $self->{'ncpus'} = socket_core_thread()->{'cores'};
7229 $self->{'ncpus'} = socket_core_thread()->{'threads'};
7233 ::debug("init","echo|$sshcmd $serverlogin -- parallel --number-of-sockets");
7234 if($opt::use_sockets_instead_of_threads
7236 $opt::use_cpus_instead_of_cores) {
7238 ::qqx("echo|$sshcmd $serverlogin -- parallel --number-of-sockets");
7239 } elsif($opt::use_cores_instead_of_threads) {
7241 ::qqx("echo|$sshcmd $serverlogin -- parallel --number-of-cores");
7244 ::qqx("echo|$sshcmd $serverlogin -- parallel --number-of-threads");
7247 if($ncpu =~ /^\s*[0-9]+\s*$/s) {
7248 $self->{'ncpus'} = $ncpu;
7250 ::warning("Could not figure out ".
7251 "number of cpus on $serverlogin ($ncpu). Using 1.");
7252 $self->{'ncpus'} = 1;
7256 return $self->{'ncpus'};
7262 # Number of threads using `nproc`
7263 my $no_of_threads = ::qqx("nproc");
7264 chomp $no_of_threads;
7265 return $no_of_threads;
7268 sub no_of_sockets() {
7269 return socket_core_thread()->{'sockets'};
7273 return socket_core_thread()->{'cores'};
7276 sub no_of_threads() {
7277 return socket_core_thread()->{'threads'};
7280 sub socket_core_thread() {
7283 # 'sockets' => #sockets = number of socket with CPU present
7284 # 'cores' => #cores = number of physical cores
7285 # 'threads' => #threads = number of compute cores (hyperthreading)
7286 # 'active' => #taskset_threads = number of taskset limited cores
7289 my $cached_cpuspec = $Global::cache_dir . "/tmp/sshlogin/" .
7290 ::hostname() . "/cpuspec";
7291 if(-e $cached_cpuspec and -M $cached_cpuspec < 1) {
7292 # Reading cached copy instead of /proc/cpuinfo is 17 ms faster
7294 if(open(my $in_fh, "<", $cached_cpuspec)) {
7295 ::debug("init","Read $cached_cpuspec\n");
7296 $cpu->{'sockets'} = int(<$in_fh>);
7297 $cpu->{'cores'} = int(<$in_fh>);
7298 $cpu->{'threads'} = int(<$in_fh>);
7302 if ($^O eq 'linux') {
7303 $cpu = sct_gnu_linux($cpu);
7304 } elsif ($^O eq 'android') {
7305 $cpu = sct_android($cpu);
7306 } elsif ($^O eq 'freebsd') {
7307 $cpu = sct_freebsd($cpu);
7308 } elsif ($^O eq 'netbsd') {
7309 $cpu = sct_netbsd($cpu);
7310 } elsif ($^O eq 'openbsd') {
7311 $cpu = sct_openbsd($cpu);
7312 } elsif ($^O eq 'gnu') {
7313 $cpu = sct_hurd($cpu);
7314 } elsif ($^O eq 'darwin') {
7315 $cpu = sct_darwin($cpu);
7316 } elsif ($^O eq 'solaris') {
7317 $cpu = sct_solaris($cpu);
7318 } elsif ($^O eq 'aix') {
7319 $cpu = sct_aix($cpu);
7320 } elsif ($^O eq 'hpux') {
7321 $cpu = sct_hpux($cpu);
7322 } elsif ($^O eq 'nto') {
7323 $cpu = sct_qnx($cpu);
7324 } elsif ($^O eq 'svr5') {
7325 $cpu = sct_openserver($cpu);
7326 } elsif ($^O eq 'irix') {
7327 $cpu = sct_irix($cpu);
7328 } elsif ($^O eq 'dec_osf') {
7329 $cpu = sct_tru64($cpu);
7331 # Try all methods until we find something that works
7332 $cpu = (sct_gnu_linux($cpu)
7333 || sct_android($cpu)
7334 || sct_freebsd($cpu)
7336 || sct_openbsd($cpu)
7339 || sct_solaris($cpu)
7343 || sct_openserver($cpu)
7348 if(not grep { $_ > 0 } values %$cpu) {
7351 # Write cached copy instead of /proc/cpuinfo is 17 ms faster
7352 if($cpu and open(my $out_fh, ">", $cached_cpuspec)) {
7353 print $out_fh (map { chomp; "$_\n" }
7360 my $nproc = nproc();
7370 ::warning("Cannot figure out number of cpus. Using 1.");
7377 $cpu->{'sockets'} ||= 1;
7378 $cpu->{'threads'} ||= $cpu->{'cores'};
7379 $cpu->{'active'} ||= $cpu->{'threads'};
7380 chomp($cpu->{'sockets'},
7384 # Choose minimum of active and actual
7386 $mincpu->{'sockets'} = ::min($cpu->{'sockets'},$cpu->{'active'});
7387 $mincpu->{'cores'} = ::min($cpu->{'cores'},$cpu->{'active'});
7388 $mincpu->{'threads'} = ::min($cpu->{'threads'},$cpu->{'active'});
7392 sub sct_gnu_linux($) {
7394 # { 'sockets' => #sockets
7396 # 'threads' => #threads
7397 # 'active' => #taskset_threads }
7400 sub read_topology($) {
7406 -r "$prefix/cpu$thread/topology/physical_package_id";
7409 "$prefix/cpu$thread/topology/physical_package_id")
7415 -r "$prefix/cpu$thread/topology/thread_siblings";
7418 "$prefix/cpu$thread/topology/thread_siblings")
7423 $cpu->{'sockets'} = keys %socket;
7424 $cpu->{'cores'} = keys %sibiling;
7425 $cpu->{'threads'} = $thread;
7428 sub read_cpuinfo(@) {
7430 $cpu->{'sockets'} = 0;
7431 $cpu->{'cores'} = 0;
7432 $cpu->{'threads'} = 0;
7438 if(/^physical id.*[:](.*)/) {
7440 if(not $phy_seen{$1}++) {
7441 $cpu->{'sockets'}++;
7445 if(/^core id.*[:](.*)/ and not $seen{$physicalid,$1}++) {
7449 /^processor.*[:]\s*\d/i and $cpu->{'threads'}++;
7451 $cpu->{'cores'} ||= $cpu->{'threads'};
7452 $cpu->{'cpus'} ||= $cpu->{'threads'};
7453 $cpu->{'sockets'} ||= 1;
7458 my $threads_per_core;
7459 my $cores_per_socket;
7461 /^CPU.s.:\s*(\d+)/ and $cpu->{'threads'} = $1;
7462 /^Thread.s. per core:\s*(\d+)/ and $threads_per_core = $1;
7463 /^Core.s. per socket:\s*(\d+)/ and $cores_per_socket = $1;
7464 /^(CPU )?Socket.s.:\s*(\d+)/i and $cpu->{'sockets'} = $2;
7466 if($threads_per_core and $cpu->{'threads'}) {
7467 $cpu->{'cores'} = $cpu->{'threads'} / $threads_per_core;
7469 $cpu->{'cpus'} ||= $cpu->{'threads'};
7472 local $/ = "\n"; # If delimiter is set, then $/ will be wrong
7475 if($ENV{'PARALLEL_CPUINFO'}) {
7476 # Use CPUINFO from environment - used for testing only
7477 read_cpuinfo(split/(?<=\n)/,$ENV{'PARALLEL_CPUINFO'});
7478 } elsif($ENV{'PARALLEL_LSCPU'}) {
7479 # Use LSCPU from environment - used for testing only
7480 read_lscpu(split/(?<=\n)/,$ENV{'PARALLEL_LSCPU'});
7481 } elsif(-r "$ENV{'PARALLEL_CPUPREFIX'}/cpu0/topology/thread_siblings") {
7482 # Use CPUPREFIX from environment - used for testing only
7483 read_topology($ENV{'PARALLEL_CPUPREFIX'});
7484 } elsif($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'}) {
7485 # Skip /proc/cpuinfo - already set
7487 # Not debugging: Look at this computer
7488 if(!($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'})
7490 open(my $in_fh, "-|", "lscpu")) {
7491 # Parse output from lscpu
7492 read_lscpu(<$in_fh>);
7495 if(!($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'})
7497 -r "/sys/devices/system/cpu/cpu0/topology/thread_siblings") {
7498 read_topology("/sys/devices/system/cpu");
7500 if(!($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'})
7502 open(my $in_fh, "<", "/proc/cpuinfo")) {
7503 # Read /proc/cpuinfo
7504 read_cpuinfo(<$in_fh>);
7508 if(-e "/proc/self/status" and not $ENV{'PARALLEL_CPUINFO'}) {
7509 # if 'taskset' is used to limit number of threads
7510 if(open(my $in_fh, "<", "/proc/self/status")) {
7512 if(/^Cpus_allowed:\s*(\S+)/) {
7515 $cpu->{'active'} = unpack ("%32b*", pack ("H*",$a));
7524 sub sct_android($) {
7526 # { 'sockets' => #sockets
7528 # 'threads' => #threads
7529 # 'active' => #taskset_threads }
7531 return sct_gnu_linux($_[0]);
7534 sub sct_freebsd($) {
7536 # { 'sockets' => #sockets
7538 # 'threads' => #threads
7539 # 'active' => #taskset_threads }
7543 (::qqx(qq{ sysctl -a dev.cpu | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' })
7545 ::qqx(qq{ sysctl hw.ncpu | awk '{ print \$2 }' }));
7546 $cpu->{'threads'} ||=
7547 (::qqx(qq{ sysctl hw.ncpu | awk '{ print \$2 }' })
7549 ::qqx(qq{ sysctl -a dev.cpu | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' }));
7555 # { 'sockets' => #sockets
7557 # 'threads' => #threads
7558 # 'active' => #taskset_threads }
7561 $cpu->{'cores'} ||= ::qqx("sysctl -n hw.ncpu");
7565 sub sct_openbsd($) {
7567 # { 'sockets' => #sockets
7569 # 'threads' => #threads
7570 # 'active' => #taskset_threads }
7573 $cpu->{'cores'} ||= ::qqx('sysctl -n hw.ncpu');
7579 # { 'sockets' => #sockets
7581 # 'threads' => #threads
7582 # 'active' => #taskset_threads }
7585 $cpu->{'cores'} ||= ::qqx("nproc");
7591 # { 'sockets' => #sockets
7593 # 'threads' => #threads
7594 # 'active' => #taskset_threads }
7598 (::qqx('sysctl -n hw.physicalcpu')
7600 ::qqx(qq{ sysctl -a hw | grep [^a-z]physicalcpu[^a-z] | awk '{ print \$2 }' }));
7601 $cpu->{'threads'} ||=
7602 (::qqx('sysctl -n hw.logicalcpu')
7604 ::qqx(qq{ sysctl -a hw | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }' }));
7608 sub sct_solaris($) {
7610 # { 'sockets' => #sockets
7612 # 'threads' => #threads
7613 # 'active' => #taskset_threads }
7616 if(not $cpu->{'cores'}) {
7617 if(-x "/usr/bin/kstat") {
7618 my @chip_id = ::qqx("/usr/bin/kstat cpu_info|grep chip_id");
7619 if($#chip_id >= 0) {
7620 $cpu->{'sockets'} ||= $#chip_id +1;
7622 my @core_id = ::qqx("/usr/bin/kstat -m cpu_info|grep -w core_id|uniq");
7623 if($#core_id >= 0) {
7624 $cpu->{'cores'} ||= $#core_id +1;
7627 if(-x "/usr/sbin/psrinfo") {
7628 my @psrinfo = ::qqx("/usr/sbin/psrinfo -p");
7629 if($#psrinfo >= 0) {
7630 $cpu->{'sockets'} ||= $psrinfo[0];
7633 if(-x "/usr/sbin/prtconf") {
7634 my @prtconf = ::qqx("/usr/sbin/prtconf | grep cpu..instance");
7635 if($#prtconf >= 0) {
7636 $cpu->{'cores'} ||= $#prtconf +1;
7645 # { 'sockets' => #sockets
7647 # 'threads' => #threads
7648 # 'active' => #taskset_threads }
7651 if(not $cpu->{'cores'}) {
7652 if(-x "/usr/sbin/lscfg") {
7653 if(open(my $in_fh, "-|", "/usr/sbin/lscfg -vs |grep proc | wc -l|tr -d ' '")) {
7654 $cpu->{'cores'} = <$in_fh>;
7659 if(not $cpu->{'threads'}) {
7660 if(-x "/usr/bin/vmstat") {
7661 if(open(my $in_fh, "-|", "/usr/bin/vmstat 1 1")) {
7663 /lcpu=([0-9]*) / and $cpu->{'threads'} = $1;
7674 # { 'sockets' => #sockets
7676 # 'threads' => #threads
7677 # 'active' => #taskset_threads }
7681 ::qqx(qq{ /usr/bin/mpsched -s 2>&1 | grep 'Locality Domain Count' | awk '{ print \$4 }'});
7682 $cpu->{'threads'} ||=
7683 ::qqx(qq{ /usr/bin/mpsched -s 2>&1 | perl -ne '/Processor Count\\D+(\\d+)/ and print "\$1"'});
7689 # { 'sockets' => #sockets
7691 # 'threads' => #threads
7692 # 'active' => #taskset_threads }
7695 # BUG: It is not known how to calculate this.
7700 sub sct_openserver($) {
7702 # { 'sockets' => #sockets
7704 # 'threads' => #threads
7705 # 'active' => #taskset_threads }
7708 if(not $cpu->{'cores'}) {
7709 if(-x "/usr/sbin/psrinfo") {
7710 my @psrinfo = ::qqx("/usr/sbin/psrinfo");
7711 if($#psrinfo >= 0) {
7712 $cpu->{'cores'} = $#psrinfo +1;
7716 $cpu->{'sockets'} ||= $cpu->{'cores'};
7722 # { 'sockets' => #sockets
7724 # 'threads' => #threads
7725 # 'active' => #taskset_threads }
7729 ::qqx(qq{ hinv | grep HZ | grep Processor | awk '{print \$1}' });
7735 # { 'sockets' => #sockets
7737 # 'threads' => #threads
7738 # 'active' => #taskset_threads }
7741 $cpu->{'cores'} ||= ::qqx("sizer -pr");
7742 $cpu->{'sockets'} ||= $cpu->{'cores'};
7743 $cpu->{'threads'} ||= $cpu->{'cores'};
7750 # $sshcommand = the command (incl options) to run when using ssh
7752 if (not defined $self->{'sshcommand'}) {
7753 $self->sshcommand_of_sshlogin();
7755 return $self->{'sshcommand'};
7758 sub serverlogin($) {
7760 # $sshcommand = the command (incl options) to run when using ssh
7762 if (not defined $self->{'serverlogin'}) {
7763 $self->sshcommand_of_sshlogin();
7765 return $self->{'serverlogin'};
7768 sub sshcommand_of_sshlogin($) {
7769 # Compute ssh command and serverlogin from sshlogin
7770 # 'server' -> ('ssh -S /tmp/parallel-ssh-RANDOM/host-','server')
7771 # 'user@server' -> ('ssh','user@server')
7772 # 'myssh user@server' -> ('myssh','user@server')
7773 # 'myssh -l user server' -> ('myssh -l user','server')
7774 # '/usr/bin/myssh -l user server' -> ('/usr/bin/myssh -l user','server')
7776 # $self->{'sshcommand'}
7777 # $self->{'serverlogin'}
7779 my ($sshcmd, $serverlogin);
7780 # If $opt::ssh is unset, use $PARALLEL_SSH or 'ssh'
7781 $opt::ssh ||= $ENV{'PARALLEL_SSH'} || "ssh";
7782 if($self->{'string'} =~ /(.+) (\S+)$/) {
7784 $sshcmd = $1; $serverlogin = $2;
7787 if($opt::controlmaster) {
7788 # Use control_path to make ssh faster
7789 my $control_path = $self->control_path_dir()."/ssh-%r@%h:%p";
7790 $sshcmd = $opt::ssh." -S ".$control_path;
7791 $serverlogin = $self->{'string'};
7792 if(not $self->{'control_path'}{$control_path}++) {
7793 # Master is not running for this control_path
7797 $Global::sshmaster{$pid} ||= 1;
7799 $SIG{'TERM'} = undef;
7800 # Ignore the 'foo' being printed
7801 open(STDOUT,">","/dev/null");
7802 # STDERR >/dev/null to ignore
7803 open(STDERR,">","/dev/null");
7804 open(STDIN,"<","/dev/null");
7805 # Run a sleep that outputs data, so it will discover
7806 # if the ssh connection closes.
7807 my $sleep = ::Q('$|=1;while(1){sleep 1;print "foo\n"}');
7808 my @master = ($opt::ssh, "-MTS",
7809 $control_path, $serverlogin, "--", "perl", "-e",
7815 $sshcmd = $opt::ssh; $serverlogin = $self->{'string'};
7819 if($serverlogin =~ s/(\S+)\@(\S+)/$2/) {
7820 # convert user@server to '-l user server'
7821 # because lsh does not support user@server
7822 $sshcmd = $sshcmd." -l ".$1;
7825 $self->{'sshcommand'} = $sshcmd;
7826 $self->{'serverlogin'} = $serverlogin;
7829 sub control_path_dir($) {
7831 # $control_path_dir = dir of control path (for -M)
7833 if(not defined $self->{'control_path_dir'}) {
7834 $self->{'control_path_dir'} =
7835 # Use $ENV{'TMPDIR'} as that is typically not
7837 File::Temp::tempdir($ENV{'TMPDIR'}
7838 . "/control_path_dir-XXXX",
7841 return $self->{'control_path_dir'};
7844 sub rsync_transfer_cmd($) {
7845 # Command to run to transfer a file
7847 # $file = filename of file to transfer
7848 # $workdir = destination dir
7850 # $cmd = rsync command to run to transfer $file ("" if unreadable)
7853 my $workdir = shift;
7855 ::warning($file. " is not readable and will not be transferred.");
7859 my $relpath = ($file !~ m:^/:) || ($file =~ m:/\./:); # Is the path relative or /./?
7861 $rsync_destdir = ::shell_quote_file($workdir);
7864 $rsync_destdir = "/";
7866 $file = ::shell_quote_file($file);
7867 my $sshcmd = $self->sshcommand();
7868 my $rsync_opts = $ENV{'PARALLEL_RSYNC_OPTS'}.
7870 my $serverlogin = $self->serverlogin();
7871 # Make dir if it does not exist
7872 return "$sshcmd $serverlogin -- mkdir -p $rsync_destdir && " .
7873 rsync()." $rsync_opts $file $serverlogin:$rsync_destdir";
7876 sub cleanup_cmd($$$) {
7877 # Command to run to remove the remote file
7879 # $file = filename to remove
7880 # $workdir = destination dir
7882 # $cmd = ssh command to run to remove $file and empty parent dirs
7885 my $workdir = shift;
7888 # foo/bar/./baz/quux => workdir/baz/quux
7889 # /foo/bar/./baz/quux => workdir/baz/quux
7890 $f =~ s:.*/\./:$workdir/:;
7891 } elsif($f =~ m:^[^/]:) {
7892 # foo/bar => workdir/foo/bar
7893 $f = $workdir."/".$f;
7895 my @subdirs = split m:/:, ::dirname($f);
7900 unshift @rmdir, ::shell_quote_file($dir);
7902 my $rmdir = @rmdir ? "sh -c ".::Q("rmdir @rmdir 2>/dev/null;") : "";
7903 if(defined $opt::workdir and $opt::workdir eq "...") {
7904 $rmdir .= ::Q("rm -rf " . ::shell_quote_file($workdir).';');
7907 $f = ::shell_quote_file($f);
7908 my $sshcmd = $self->sshcommand();
7909 my $serverlogin = $self->serverlogin();
7910 return "$sshcmd $serverlogin -- ".::Q("rm -f $f; $rmdir");
7917 # rsync 3.1.x uses protocol 31 which is unsupported by 2.5.7.
7918 # If the version >= 3.1.0: downgrade to protocol 30
7920 # $rsync = "rsync" or "rsync --protocol 30"
7922 my @out = `rsync --version`;
7924 # rsync version 3.1.3 protocol version 31
7925 # rsync version v3.2.3 protocol version 31
7926 if(/version v?(\d+.\d+)(.\d+)?/) {
7928 # Version 3.1.0 or later: Downgrade to protocol 30
7929 $rsync = "rsync --protocol 30";
7935 $rsync or ::die_bug("Cannot figure out version of rsync: @out");
7946 my $commandref = shift;
7947 my $read_from = shift;
7948 my $context_replace = shift;
7949 my $max_number_of_args = shift;
7950 my $transfer_files = shift;
7951 my $return_files = shift;
7952 my $commandlinequeue = CommandLineQueue->new
7953 ($commandref, $read_from, $context_replace, $max_number_of_args,
7954 $transfer_files, $return_files);
7958 'commandlinequeue' => $commandlinequeue,
7960 'total_jobs' => undef,
7961 }, ref($class) || $class;
7967 $self->{'this_job_no'}++;
7968 if(@{$self->{'unget'}}) {
7969 return shift @{$self->{'unget'}};
7971 my $commandline = $self->{'commandlinequeue'}->get();
7972 if(defined $commandline) {
7973 return Job->new($commandline);
7975 $self->{'this_job_no'}--;
7983 unshift @{$self->{'unget'}}, @_;
7984 $self->{'this_job_no'} -= @_;
7989 my $empty = (not @{$self->{'unget'}}) &&
7990 $self->{'commandlinequeue'}->empty();
7991 ::debug("run", "JobQueue->empty $empty ");
7997 if(not defined $self->{'total_jobs'}) {
7998 if($opt::pipe and not $opt::tee) {
7999 ::error("--pipe is incompatible with --eta/--bar/--shuf");
8000 ::wait_and_exit(255);
8002 if($opt::sqlworker) {
8003 $self->{'total_jobs'} = $Global::sql->total_jobs();
8007 my $record_queue = $self->{'commandlinequeue'}{'arg_queue'};
8009 while($record = $record_queue->get()) {
8010 push @arg_records, $record;
8011 if(time - $start > 10) {
8012 ::warning("Reading ".scalar(@arg_records).
8013 " arguments took longer than 10 seconds.");
8014 $opt::eta && ::warning("Consider removing --eta.");
8015 $opt::bar && ::warning("Consider removing --bar.");
8016 $opt::shuf && ::warning("Consider removing --shuf.");
8020 while($record = $record_queue->get()) {
8021 push @arg_records, $record;
8023 if($opt::shuf and @arg_records) {
8024 my $i = @arg_records;
8026 my $j = int rand($i+1);
8027 @arg_records[$i,$j] = @arg_records[$j,$i];
8030 $record_queue->unget(@arg_records);
8031 # $#arg_records = number of args - 1
8032 # We have read one @arg_record for this job (so add 1 more)
8033 my $num_args = $#arg_records + 2;
8034 # This jobs is not started so -1
8035 my $started_jobs = $self->{'this_job_no'} - 1;
8036 my $max_args = ::max($Global::max_number_of_args,1);
8037 $self->{'total_jobs'} = ::ceil($num_args / $max_args)
8039 ::debug("init","Total jobs: ".$self->{'total_jobs'}.
8040 " ($num_args/$max_args + $started_jobs)\n");
8043 return $self->{'total_jobs'};
8046 sub flush_total_jobs($) {
8047 # Unset total_jobs to force recomputing
8049 ::debug("init","flush Total jobs: ");
8050 $self->{'total_jobs'} = undef;
8056 return $self->{'commandlinequeue'}->seq();
8061 return $self->{'commandlinequeue'}->quote_args();
8069 my $commandlineref = shift;
8071 'commandline' => $commandlineref, # CommandLine object
8072 'workdir' => undef, # --workdir
8073 # filehandle for stdin (used for --pipe)
8074 # filename for writing stdout to (used for --files)
8075 # remaining data not sent to stdin (used for --pipe)
8076 # tmpfiles to cleanup when job is done
8078 # amount of data sent via stdin (used for --pipe)
8079 'transfersize' => 0, # size of files using --transfer
8080 'returnsize' => 0, # size of files using --return
8082 # hash of { SSHLogins => number of times the command failed there }
8084 'sshlogin' => undef,
8085 # The commandline wrapped with rsync and ssh
8086 'sshlogin_wrap' => undef,
8087 'exitstatus' => undef,
8088 'exitsignal' => undef,
8089 # Timestamp for timeout if any
8092 # Output used for SQL and CSV-output
8093 'output' => { 1 => [], 2 => [] },
8094 'halfline' => { 1 => [], 2 => [] },
8095 }, ref($class) || $class;
8100 $self->{'commandline'} or ::die_bug("commandline empty");
8101 return $self->{'commandline'}->replaced();
8106 return $self->{'commandline'}->seq();
8111 return $self->{'commandline'}->set_seq(shift);
8116 return $self->{'commandline'}->slot();
8121 push @Global::slots, $self->slot();
8129 # $cattail = perl program for:
8130 # cattail "decompress program" writerpid [file_to_decompress or stdin] [file_to_unlink]
8133 # cat followed by tail (possibly with rm as soon at the file is opened)
8134 # If $writerpid dead: finish after this round
8138 my ($comfile, $cmd, $writerpid, $read_file, $unlink_file) = @ARGV;
8140 open(IN,"<",$read_file) || die("cattail: Cannot open $read_file");
8144 while(! -s $comfile) {
8145 # Writer has not opened the buffer file, so we cannot remove it yet
8146 $sleep = ($sleep < 30) ? ($sleep * 1.001 + 0.01) : ($sleep);
8149 # The writer and we have both opened the file, so it is safe to unlink it
8150 unlink $unlink_file;
8153 my $first_round = 1;
8155 fcntl(IN, F_GETFL, $flags) || die $!; # Get the current flags on the filehandle
8156 $flags |= O_NONBLOCK; # Add non-blocking to the flags
8157 fcntl(IN, F_SETFL, $flags) || die $!; # Set the flags on the filehandle
8162 my $writer_running = kill 0, $writerpid;
8163 $read = sysread(IN,$buf,131072);
8166 # Only start the command if there any input to process
8168 open(OUT,"|-",$cmd) || die("cattail: Cannot run $cmd");
8173 my $bytes_written = syswrite(OUT,$buf);
8174 # syswrite may be interrupted by SIGHUP
8175 substr($buf,0,$bytes_written) = "";
8177 # Something printed: Wait less next time
8180 if(eof(IN) and not $writer_running) {
8181 # Writer dead: There will never be sent more to the decompressor
8185 # TODO This could probably be done more efficiently using select(2)
8186 # Nothing read: Wait longer before next read
8187 # Up to 100 milliseconds
8188 $sleep = ($sleep < 100) ? ($sleep * 1.001 + 0.01) : ($sleep);
8194 # Sleep this many milliseconds.
8196 select(undef, undef, undef, $secs/1000);
8199 $cattail =~ s/#.*//mg;
8200 $cattail =~ s/\s+/ /g;
8206 sub openoutputfiles($) {
8207 # Open files for STDOUT and STDERR
8208 # Set file handles in $self->fh
8210 my ($outfhw, $errfhw, $outname, $errname);
8212 if($opt::linebuffer and not
8213 ($opt::keeporder or $opt::files or $opt::results or
8214 $opt::compress or $opt::compress_program or
8215 $opt::decompress_program)) {
8216 # Do not save to files: Use non-blocking pipe
8217 my ($outfhr, $errfhr);
8218 pipe($outfhr, $outfhw) || die;
8219 pipe($errfhr, $errfhw) || die;
8220 $self->set_fh(1,'w',$outfhw);
8221 $self->set_fh(2,'w',$errfhw);
8222 $self->set_fh(1,'r',$outfhr);
8223 $self->set_fh(2,'r',$errfhr);
8224 # Make it possible to read non-blocking from the pipe
8225 for my $fdno (1,2) {
8226 ::set_fh_non_blocking($self->fh($fdno,'r'));
8228 # Return immediately because we do not need setting filenames
8230 } elsif($opt::results and not $Global::csvsep) {
8231 my $out = $self->{'commandline'}->results_out();
8233 if($out eq $opt::results or $out =~ m:/$:) {
8234 # $opt::results = simple string or ending in /
8236 # prefix/name1/val1/name2/val2/seq
8237 $seqname = $out."seq";
8238 # prefix/name1/val1/name2/val2/stdout
8239 $outname = $out."stdout";
8240 # prefix/name1/val1/name2/val2/stderr
8241 $errname = $out."stderr";
8243 # $opt::results = replacement string not ending in /
8246 $errname = "$out.err";
8247 $seqname = "$out.seq";
8250 if(not open($seqfhw, "+>", $seqname)) {
8251 ::error("Cannot write to `$seqname'.");
8252 ::wait_and_exit(255);
8254 print $seqfhw $self->seq();
8256 if(not open($outfhw, "+>", $outname)) {
8257 ::error("Cannot write to `$outname'.");
8258 ::wait_and_exit(255);
8260 if(not open($errfhw, "+>", $errname)) {
8261 ::error("Cannot write to `$errname'.");
8262 ::wait_and_exit(255);
8264 $self->set_fh(1,"unlink","");
8265 $self->set_fh(2,"unlink","");
8266 if($opt::sqlworker) {
8267 # Save the filenames in SQL table
8268 $Global::sql->update("SET Stdout = ?, Stderr = ? ".
8269 "WHERE Seq = ". $self->seq(),
8270 $outname, $errname);
8272 } elsif(not $opt::ungroup) {
8273 # To group we create temporary files for STDOUT and STDERR
8274 # To avoid the cleanup unlink the files immediately (but keep them open)
8276 ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par");
8277 ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par");
8278 # --files => only remove stderr
8279 $self->set_fh(1,"unlink","");
8280 $self->set_fh(2,"unlink",$errname);
8282 ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par");
8283 ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par");
8284 $self->set_fh(1,"unlink",$outname);
8285 $self->set_fh(2,"unlink",$errname);
8289 open($outfhw,">&",$Global::fd{1}) || die;
8290 open($errfhw,">&",$Global::fd{2}) || die;
8291 # File name must be empty as it will otherwise be printed
8294 $self->set_fh(1,"unlink",$outname);
8295 $self->set_fh(2,"unlink",$errname);
8298 $self->set_fh(1,'w',$outfhw);
8299 $self->set_fh(2,'w',$errfhw);
8300 $self->set_fh(1,'name',$outname);
8301 $self->set_fh(2,'name',$errname);
8302 if($opt::compress) {
8303 $self->filter_through_compress();
8304 } elsif(not $opt::ungroup) {
8307 if($opt::linebuffer) {
8308 # Make it possible to read non-blocking from
8310 # Used for --linebuffer with -k, --files, --res, --compress*
8311 for my $fdno (1,2) {
8312 ::set_fh_non_blocking($self->fh($fdno,'r'));
8317 sub print_verbose_dryrun($) {
8318 # If -v set: print command to stdout (possibly buffered)
8319 # This must be done before starting the command
8321 if($Global::verbose or $opt::dryrun) {
8322 my $fh = $self->fh(1,"w");
8323 if($Global::verbose <= 1) {
8324 print $fh $self->replaced(),"\n";
8326 # Verbose level > 1: Print the rsync and stuff
8327 print $fh $self->wrapped(),"\n";
8330 if($opt::sqlworker) {
8331 $Global::sql->update("SET Command = ? WHERE Seq = ".$self->seq(),
8337 # Files to remove when job is done
8339 push @{$self->{'unlink'}}, @_;
8343 # Files to remove when job is done
8345 return @{$self->{'unlink'}};
8349 # Remove files when job is done
8351 unlink $self->get_rm();
8352 delete @Global::unlink{$self->get_rm()};
8357 # Set reading FD if using --group (--ungroup does not need)
8358 for my $fdno (1,2) {
8359 # Re-open the file for reading
8360 # so fdw can be closed seperately
8361 # and fdr can be seeked seperately (for --line-buffer)
8362 open(my $fdr,"<", $self->fh($fdno,'name')) ||
8363 ::die_bug("fdr: Cannot open ".$self->fh($fdno,'name'));
8364 $self->set_fh($fdno,'r',$fdr);
8365 # Unlink if not debugging
8366 $Global::debug or ::rm($self->fh($fdno,"unlink"));
8370 sub empty_input_wrapper($) {
8371 # If no input: exit(0)
8372 # If some input: Pass input as input to command on STDIN
8373 # This avoids starting the command if there is no input.
8375 # $command = command to pipe data to
8377 # $wrapped_command = the wrapped command
8378 my $command = shift;
8381 if(sysread(STDIN, $buf, 1)) {
8382 open($fh, "|-", @ARGV) || die;
8383 syswrite($fh, $buf);
8384 # Align up to 128k block
8385 if($read = sysread(STDIN, $buf, 131071)) {
8386 syswrite($fh, $buf);
8388 while($read = sysread(STDIN, $buf, 131072)) {
8389 syswrite($fh, $buf);
8392 exit ($?&127 ? 128+($?&127) : 1+$?>>8)
8395 ::debug("run",'Empty wrap: perl -e '.::Q($script)."\n");
8398 length $command > 499) {
8399 # csh does not like words longer than 1000 (499 quoted)
8400 # $command = "perl -e '".base64_zip_eval()."' ".
8401 # join" ",string_zip_base64(
8402 # 'exec "'.::perl_quote_scalar($command).'"');
8403 return 'perl -e '.::Q($script)." ".
8404 base64_wrap("exec \"$Global::shell\",'-c',\"".
8405 ::perl_quote_scalar($command).'"');
8407 return 'perl -e '.::Q($script)." ".
8408 $Global::shell." -c ".::Q($command);
8412 sub filter_through_compress($) {
8414 # Send stdout to stdin for $opt::compress_program(1)
8415 # Send stderr to stdin for $opt::compress_program(2)
8416 # cattail get pid: $pid = $self->fh($fdno,'rpid');
8417 my $cattail = cattail();
8419 for my $fdno (1,2) {
8420 # Make a communication file.
8421 my ($fh, $comfile) = ::tmpfile(SUFFIX => ".pac");
8423 # Compressor: (echo > $comfile; compress pipe) > output
8424 # When the echo is written to $comfile,
8425 # it is known that output file is opened,
8426 # thus output file can then be removed by the decompressor.
8427 my $wpid = open(my $fdw,"|-", "(echo > $comfile; ".
8428 empty_input_wrapper($opt::compress_program).") >".
8429 $self->fh($fdno,'name')) || die $?;
8430 $self->set_fh($fdno,'w',$fdw);
8431 $self->set_fh($fdno,'wpid',$wpid);
8432 # Decompressor: open output; -s $comfile > 0: rm $comfile output;
8433 # decompress output > stdout
8434 my $rpid = open(my $fdr, "-|", "perl", "-e", $cattail, $comfile,
8435 $opt::decompress_program, $wpid,
8436 $self->fh($fdno,'name'),$self->fh($fdno,'unlink'))
8438 $self->set_fh($fdno,'r',$fdr);
8439 $self->set_fh($fdno,'rpid',$rpid);
8447 my ($self, $fd_no, $key, $fh) = @_;
8448 $self->{'fd'}{$fd_no,$key} = $fh;
8453 my ($self, $fd_no, $key) = @_;
8454 return $self->{'fd'}{$fd_no,$key};
8459 my $remaining_ref = shift;
8460 my $stdin_fh = $self->fh(0,"w");
8462 my $len = length $$remaining_ref;
8463 # syswrite may not write all in one go,
8464 # so make sure everything is written.
8467 # If writing is to a closed pipe:
8468 # Do not call signal handler, but let nothing be written
8469 local $SIG{PIPE} = undef;
8470 while($written = syswrite($stdin_fh,$$remaining_ref)){
8471 substr($$remaining_ref,0,$written) = "";
8475 sub set_block($$$$$$) {
8476 # Copy stdin buffer from $block_ref up to $endpos
8477 # Prepend with $header_ref if virgin (i.e. not --roundrobin)
8478 # Remove $recstart and $recend if needed
8480 # $header_ref = ref to $header to prepend
8481 # $buffer_ref = ref to $buffer containing the block
8482 # $endpos = length of $block to pass on
8483 # $recstart = --recstart regexp
8484 # $recend = --recend regexp
8488 my ($header_ref,$buffer_ref,$endpos,$recstart,$recend) = @_;
8489 $self->{'block'} = ($self->virgin() ? $$header_ref : "").
8490 substr($$buffer_ref,0,$endpos);
8491 if($opt::remove_rec_sep) {
8492 remove_rec_sep(\$self->{'block'},$recstart,$recend);
8494 $self->{'block_length'} = length $self->{'block'};
8495 $self->{'block_pos'} = 0;
8496 $self->add_transfersize($self->{'block_length'});
8501 return \$self->{'block'};
8505 sub block_length($) {
8507 return $self->{'block_length'};
8510 sub remove_rec_sep($) {
8511 # Remove --recstart and --recend from $block
8513 # $block_ref = reference to $block to be modified
8514 # $recstart = --recstart
8515 # $recend = --recend
8517 # $opt::regexp = Are --recstart/--recend regexp?
8520 my ($block_ref,$recstart,$recend) = @_;
8521 # Remove record separator
8523 $$block_ref =~ s/$recend$recstart//gos;
8524 $$block_ref =~ s/^$recstart//os;
8525 $$block_ref =~ s/$recend$//os;
8527 $$block_ref =~ s/\Q$recend$recstart\E//gos;
8528 $$block_ref =~ s/^\Q$recstart\E//os;
8529 $$block_ref =~ s/\Q$recend\E$//os;
8533 sub non_blocking_write($) {
8535 my $something_written = 0;
8537 my $in = $self->fh(0,"w");
8538 my $rv = syswrite($in,
8539 substr($self->{'block'},$self->{'block_pos'}));
8540 if (!defined($rv) && $! == ::EAGAIN()) {
8541 # would block - but would have written
8542 $something_written = 0;
8543 # avoid triggering auto expanding block size
8544 $Global::no_autoexpand_block ||= 1;
8545 } elsif ($self->{'block_pos'}+$rv != $self->{'block_length'}) {
8547 # Remove the written part
8548 $self->{'block_pos'} += $rv;
8549 $something_written = $rv;
8551 # successfully wrote everything
8552 # Empty block to free memory
8554 $self->set_block(\$a,\$a,0,"","");
8555 $something_written = $rv;
8557 ::debug("pipe", "Non-block: ", $something_written);
8558 return $something_written;
8564 return $self->{'virgin'};
8567 sub set_virgin($$) {
8569 $self->{'virgin'} = shift;
8574 return $self->{'pid'};
8579 $self->{'pid'} = shift;
8584 # UNIX-timestamp this job started
8586 return sprintf("%.3f",$self->{'starttime'});
8589 sub set_starttime($@) {
8591 my $starttime = shift || ::now();
8592 $self->{'starttime'} = $starttime;
8594 $Global::sql->update("SET Starttime = ? WHERE Seq = ".$self->seq(),
8600 # Run time in seconds with 3 decimals
8602 return sprintf("%.3f",
8603 int(($self->endtime() - $self->starttime())*1000)/1000);
8608 # UNIX-timestamp this job ended
8609 # 0 if not ended yet
8611 return ($self->{'endtime'} || 0);
8614 sub set_endtime($$) {
8616 my $endtime = shift;
8617 $self->{'endtime'} = $endtime;
8619 $Global::sql->update("SET JobRuntime = ? WHERE Seq = ".$self->seq(),
8623 sub is_timedout($) {
8624 # Is the job timedout?
8626 # $delta_time = time that the job may run
8630 my $delta_time = shift;
8631 return time > $self->{'starttime'} + $delta_time;
8636 $self->set_exitstatus(-1);
8637 ::kill_sleep_seq($self->pid());
8642 my @pgrps = map { -$_ } $self->pid();
8643 kill "STOP", @pgrps;
8644 $self->set_suspended(1);
8645 # push job onto start stack
8646 $Global::JobQueue->unget($self);
8649 sub set_suspended($$) {
8651 $self->{'suspended'} = shift;
8656 return $self->{'suspended'};
8661 my @pgrps = map { -$_ } $self->pid();
8662 kill "CONT", @pgrps;
8663 $self->set_suspended(0);
8667 # return number of times failed for this $sshlogin
8671 # Number of times failed for $sshlogin
8673 my $sshlogin = shift;
8674 return $self->{'failed'}{$sshlogin};
8677 sub failed_here($) {
8678 # return number of times failed for the current $sshlogin
8680 # Number of times failed for this sshlogin
8682 return $self->{'failed'}{$self->sshlogin()};
8686 # increase the number of times failed for this $sshlogin
8688 my $sshlogin = shift;
8689 $self->{'failed'}{$sshlogin}++;
8692 sub add_failed_here($) {
8693 # increase the number of times failed for the current $sshlogin
8695 $self->{'failed'}{$self->sshlogin()}++;
8698 sub reset_failed($) {
8699 # increase the number of times failed for this $sshlogin
8701 my $sshlogin = shift;
8702 delete $self->{'failed'}{$sshlogin};
8705 sub reset_failed_here($) {
8706 # increase the number of times failed for this $sshlogin
8708 delete $self->{'failed'}{$self->sshlogin()};
8713 # the number of sshlogins this command has failed on
8714 # the minimal number of times this command has failed
8717 ::min(map { $self->{'failed'}{$_} } keys %{$self->{'failed'}});
8718 my $number_of_sshlogins_failed_on = scalar keys %{$self->{'failed'}};
8719 return ($number_of_sshlogins_failed_on,$min_failures);
8722 sub total_failed($) {
8724 # $total_failures = the number of times this command has failed
8726 my $total_failures = 0;
8727 for (values %{$self->{'failed'}}) {
8728 $total_failures += $_;
8730 return $total_failures;
8736 sub postpone_exit_and_cleanup {
8737 # Command to remove files and dirs (given as args) without
8738 # affecting the exit value in $?/$status.
8740 $script = "perl -e '".
8748 if($bash=~s/(\d+)h/$1/) {
8753 # `echo \$?h` is needed to make fish not complain
8754 "' ".'"`echo \\\\\\\\\$?h`" "$status" ';
8764 # Script to create a fifo, run a command on the fifo
8765 # while copying STDIN to the fifo, and finally
8766 # remove the fifo and return the exit code of the command.
8768 # {} == $PARALLEL_TMP for --fifo
8769 # To make it csh compatible a wrapper needs to:
8771 # * spawn $command &
8773 # * waitpid to get the exit code from $command
8774 # * be less than 1000 chars long
8775 $script = "perl -e '".
8779 # mkfifo $PARALLEL_TMP
8780 system "mkfifo", $f;
8781 # spawn $shell -c $command &
8782 $pid = fork || exec $s, "-c", $c;
8783 open($o,">",$f) || die $!;
8784 # cat > $PARALLEL_TMP
8785 while(sysread(STDIN,$buf,131072)){
8789 # waitpid to get the exit code from $command
8801 # Wrap command with:
8807 # * --pipepart (@Global::cat_prepends)
8808 # * --tee (@Global::cat_prepends)
8811 # The ordering of the wrapping is important:
8812 # * --nice/--cat/--fifo should be done on the remote machine
8813 # * --pipepart/--pipe should be done on the local machine inside --tmux
8820 # @Global::cat_prepends
8824 # $self->{'wrapped'} = the command wrapped with the above
8826 if(not defined $self->{'wrapped'}) {
8827 my $command = $self->replaced();
8828 # Bug in Bash and Ksh when running multiline aliases
8829 # This will force them to run correctly, but will fail in
8830 # tcsh so we do not do it.
8831 # $command .= "\n\n";
8832 if(@opt::shellquote) {
8833 # Quote one time for each --shellquote
8835 for(@opt::shellquote) {
8838 # Prepend "echo" (it is written in perl because
8839 # quoting '-e' causes problem in some versions and
8840 # csh's version does something wrong)
8841 $command = q(perl -e '$,=" "; print "@ARGV\n";' -- ) . ::Q($c);
8843 if($Global::parallel_env) {
8844 # If $PARALLEL_ENV set, put that in front of the command
8845 # Used for env_parallel.*
8846 if($Global::shell =~ /zsh/) {
8847 # The extra 'eval' will make aliases work, too
8848 $command = $Global::parallel_env."\n".
8849 "eval ".::Q($command);
8851 $command = $Global::parallel_env."\n".$command;
8855 # In '--cat' and '--fifo' {} == $PARALLEL_TMP.
8856 # This is to make it possible to compute $PARALLEL_TMP on
8857 # the fly when running remotely.
8858 # $ENV{PARALLEL_TMP} is set in the remote wrapper before
8859 # the command is run.
8861 # Prepend 'cat > $PARALLEL_TMP;'
8862 # Append 'unlink $PARALLEL_TMP without affecting $?'
8864 'cat > $PARALLEL_TMP;'.
8865 $command.";". postpone_exit_and_cleanup().
8867 } elsif($opt::fifo) {
8868 # Prepend fifo-wrapper. In essence:
8871 # # $command must read {}, otherwise this 'cat' will block
8874 # without affecting $?
8875 $command = fifo_wrap(). " ".
8876 $Global::shell. " ". ::Q($command). ' $PARALLEL_TMP'. ';';
8878 # Wrap with ssh + tranferring of files
8879 $command = $self->sshlogin_wrap($command);
8880 if(@Global::cat_prepends) {
8881 # --pipepart: prepend:
8882 # < /tmp/foo perl -e 'while(@ARGV) {
8883 # sysseek(STDIN,shift,0) || die; $left = shift;
8884 # while($read = sysread(STDIN,$buf, ($left > 131072 ? 131072 : $left))){
8885 # $left -= $read; syswrite(STDOUT,$buf);
8889 # --pipepart --tee: prepend:
8892 # --pipe --tee: wrap:
8893 # (rm fifo; ... ) < fifo
8896 # (rm fifo; ... ) < fifo
8897 $command = (shift @Global::cat_prepends). "($command)".
8898 (shift @Global::cat_appends);
8899 } elsif($opt::pipe and not $opt::roundrobin) {
8900 # Wrap with EOF-detector to avoid starting $command if EOF.
8901 $command = empty_input_wrapper($command);
8904 # Wrap command with 'tmux'
8905 $command = $self->tmux_wrap($command);
8909 length $command > 499) {
8910 # csh does not like words longer than 1000 (499 quoted)
8911 # $command = "perl -e '".base64_zip_eval()."' ".
8912 # join" ",string_zip_base64(
8913 # 'exec "'.::perl_quote_scalar($command).'"');
8914 $command = base64_wrap("exec \"$Global::shell\",'-c',\"".
8915 ::perl_quote_scalar($command).'"');
8917 $self->{'wrapped'} = $command;
8919 return $self->{'wrapped'};
8922 sub set_sshlogin($$) {
8924 my $sshlogin = shift;
8925 $self->{'sshlogin'} = $sshlogin;
8926 delete $self->{'sshlogin_wrap'}; # If sshlogin is changed the wrap is wrong
8927 delete $self->{'wrapped'};
8929 if($opt::sqlworker) {
8930 # Identify worker as --sqlworker often runs on different machines
8931 my $host = $sshlogin->string();
8933 $host = ::hostname();
8935 $Global::sql->update("SET Host = ? WHERE Seq = ".$self->seq(), $host);
8941 return $self->{'sshlogin'};
8944 sub string_base64($) {
8945 # Base64 encode strings into 1000 byte blocks.
8946 # 1000 bytes is the largest word size csh supports
8948 # @strings = to be encoded
8950 # @base64 = 1000 byte block
8951 $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;";
8952 my @base64 = unpack("(A1000)*",encode_base64((join"",@_),""));
8956 sub string_zip_base64($) {
8957 # Pipe string through 'bzip2 -9' and base64 encode it into 1000
8959 # 1000 bytes is the largest word size csh supports
8960 # Zipping will make exporting big environments work, too
8962 # @strings = to be encoded
8964 # @base64 = 1000 byte block
8965 my($zipin_fh, $zipout_fh,@base64);
8966 ::open3($zipin_fh,$zipout_fh,">&STDERR","bzip2 -9");
8969 $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;";
8970 # Split base64 encoded into 1000 byte blocks
8971 @base64 = unpack("(A1000)*",encode_base64((join"",<$zipout_fh>),""));
8979 ::debug("base64","Orig:@_\nAs bzip2 base64:@base64\n");
8983 sub base64_zip_eval() {
8985 # * reads base64 strings from @ARGV
8987 # * pipes through 'bzip2 -dc'
8988 # * evals the result
8989 # Reverse of string_zip_base64 + eval
8990 # Will be wrapped in ' so single quote is forbidden
8992 # $script = 1-liner for perl -e
8993 my $script = ::spacefree(0,q{
8994 @GNU_Parallel = split /_/, "use_IPC::Open3;_use_MIME::Base64";
8995 eval"@GNU_Parallel";
8997 $SIG{CHLD} = "IGNORE";
8998 # Search for bzip2. Not found => use default path
8999 my $zip = (grep { -x $_ } "/usr/local/bin/bzip2")[0] || "bzip2";
9000 # $in = stdin on $zip, $out = stdout from $zip
9001 # Forget my() to save chars for csh
9002 # my($in, $out,$eval);
9003 open3($in,$out,">&STDERR",$zip,"-dc");
9004 if(my $perlpid = fork) {
9006 $eval = join "", <$out>;
9010 # Pipe decoded base64 into 'bzip2 -dc'
9011 print $in (decode_base64(join"",@ARGV));
9019 ::debug("base64",$script,"\n");
9023 sub base64_wrap($) {
9024 # base64 encode Perl code
9025 # Split it into chunks of < 1000 bytes
9026 # Prepend it with a decoder that eval's it
9028 # $eval_string = Perl code to run
9030 # $shell_command = shell command that runs $eval_string
9031 my $eval_string = shift;
9034 ::Q(base64_zip_eval())." ".
9035 join" ",::shell_quote(string_zip_base64($eval_string));
9038 sub base64_eval($) {
9040 # * reads base64 strings from @ARGV
9042 # * evals the result
9043 # Reverse of string_base64 + eval
9044 # Will be wrapped in ' so single quote is forbidden.
9045 # Spaces are stripped so spaces cannot be significant.
9046 # The funny 'use IPC::Open3'-syntax is to avoid spaces and
9047 # to make it clear that this is a GNU Parallel command
9048 # when looking at the process table.
9050 # $script = 1-liner for perl -e
9051 my $script = ::spacefree(0,q{
9052 @GNU_Parallel=("use","IPC::Open3;","use","MIME::Base64");
9053 eval "@GNU_Parallel";
9054 my $eval = decode_base64(join"",@ARGV);
9057 ::debug("base64",$script,"\n");
9061 sub sshlogin_wrap($) {
9062 # Wrap the command with the commands needed to run remotely
9064 # $command = command to run
9066 # $self->{'sshlogin_wrap'} = command wrapped with ssh+transfer commands
9067 sub monitor_parent_sshd_script {
9068 # This script is to solve the problem of
9069 # * not mixing STDERR and STDOUT
9070 # * terminating with ctrl-c
9071 # If its parent is ssh: all good
9072 # If its parent is init(1): ssh died, so kill children
9073 my $monitor_parent_sshd_script;
9075 if(not $monitor_parent_sshd_script) {
9076 $monitor_parent_sshd_script =
9077 # This will be packed in ', so only use "
9078 ::spacefree(0,'$shell = "'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'.
9079 '$tmpdir = "'.::perl_quote_scalar($ENV{'TMPDIR'}).'";'.
9080 '$nice = '.$opt::nice.';'.
9081 '$termseq = "'.$opt::termseq.'";'.
9083 # Set $PARALLEL_TMP to a non-existent file name in $TMPDIR
9085 $ENV{PARALLEL_TMP} = $tmpdir."/par".
9086 join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
9087 } while(-e $ENV{PARALLEL_TMP});
9088 $SIG{CHLD} = sub { $done = 1; };
9091 # Make own process group to be able to kill HUP it later
9093 eval { setpriority(0,0,$nice) };
9094 exec $shell, "-c", ($bashfunc."@ARGV");
9098 # Parent is not init (ppid=1), so sshd is alive
9099 # Exponential sleep up to 1 sec
9100 $s = $s < 1 ? 0.001 + $s * 1.03 : $s;
9101 select(undef, undef, undef, $s);
9102 } until ($done || getppid == 1);
9104 # Kill as per --termseq
9105 my @term_seq = split/,/,$termseq;
9107 @term_seq = ("TERM",200,"TERM",100,"TERM",50,"KILL",25);
9109 while(@term_seq && kill(0,-$pid)) {
9110 kill(shift @term_seq, -$pid);
9111 select(undef, undef, undef, (shift @term_seq)/1000);
9115 exit ($?&127 ? 128+($?&127) : 1+$?>>8)
9118 return $monitor_parent_sshd_script;
9121 sub vars_to_export {
9124 my @vars = ("parallel_bash_environment");
9125 for my $varstring (@opt::env) {
9126 # Split up --env VAR1,VAR2
9127 push @vars, split /,/, $varstring;
9130 if(-r $_ and not -d) {
9131 # Read as environment definition bug #44041
9133 my $fh = ::open_or_exit($_);
9134 $Global::envdef = join("",<$fh>);
9138 if(grep { /^_$/ } @vars) {
9141 # Include all vars that are not in a clean environment
9142 if(open(my $vars_fh, "<", $Global::config_dir . "/ignored_vars")) {
9143 my @ignore = <$vars_fh>;
9146 @ignore{@ignore} = @ignore;
9148 push @vars, grep { not defined $ignore{$_} } keys %ENV;
9149 @vars = grep { not /^_$/ } @vars;
9151 ::error("Run '$Global::progname --record-env' ".
9152 "in a clean environment first.");
9153 ::wait_and_exit(255);
9156 # Duplicate vars as BASH functions to include post-shellshock functions (v1+v2)
9157 # So --env myfunc should look for BASH_FUNC_myfunc() and BASH_FUNC_myfunc%%
9159 push(@vars, "PARALLEL_PID", "PARALLEL_SEQ",
9160 "PARALLEL_SSHLOGIN", "PARALLEL_SSHHOST", "PARALLEL_JOBSLOT",
9161 map { ("BASH_FUNC_$_()", "BASH_FUNC_$_%%") } @vars);
9162 # Keep only defined variables
9163 return grep { defined($ENV{$_}) } @vars;
9168 # $eval = '$ENV{"..."}=...; ...'
9169 my @vars = vars_to_export();
9170 my $csh_friendly = not grep { /\n/ } @ENV{@vars};
9171 my @bash_functions = grep { substr($ENV{$_},0,4) eq "() {" } @vars;
9172 my @non_functions = (grep { !/PARALLEL_ENV/ }
9173 grep { substr($ENV{$_},0,4) ne "() {" } @vars);
9175 # eval of @envset will set %ENV
9176 my $envset = join"", map {
9177 '$ENV{"'.::perl_quote_scalar($_).'"}="'.
9178 ::perl_quote_scalar($ENV{$_}).'";'; } @non_functions;
9180 # running @bashfunc on the command line, will set the functions
9181 my @bashfunc = map {
9183 s/BASH_FUNC_(.*)(\(\)|%%)/$1/;
9184 "$_$ENV{$v};export -f $_ >/dev/null;" } @bash_functions;
9185 # eval $bashfuncset will set $bashfunc
9188 # Functions are not supported for all shells
9189 if($Global::shell !~ m:(bash|rbash|zsh|rzsh|dash|ksh):) {
9190 ::warning("Shell functions may not be supported in $Global::shell.");
9193 '@bash_functions=qw('."@bash_functions".");".
9194 ::spacefree(1,'$shell="'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'.q{
9196 print STDERR "CSH/TCSH DO NOT SUPPORT newlines IN VARIABLES/FUNCTIONS. Unset @bash_functions\n";
9200 "\n".'$bashfunc = "'.::perl_quote_scalar("@bashfunc").'";';
9202 $bashfuncset = '$bashfunc = "";'
9204 if($ENV{'parallel_bash_environment'}) {
9205 $bashfuncset .= '$bashfunc .= "eval\ \"\$parallel_bash_environment\"\;";';
9207 ::debug("base64",$envset,$bashfuncset,"\n");
9208 return $csh_friendly,$envset,$bashfuncset;
9212 my $command = shift;
9213 # TODO test that *sh -c 'parallel --env' use *sh
9214 if(not defined $self->{'sshlogin_wrap'}{$command}) {
9215 my $sshlogin = $self->sshlogin();
9216 my $serverlogin = $sshlogin->serverlogin();
9217 my $quoted_remote_command;
9218 $ENV{'PARALLEL_SEQ'} = $self->seq();
9219 $ENV{'PARALLEL_JOBSLOT'} = $self->slot();
9220 $ENV{'PARALLEL_SSHLOGIN'} = $sshlogin->string();
9221 $ENV{'PARALLEL_SSHHOST'} = $sshlogin->serverlogin();
9222 $ENV{'PARALLEL_PID'} = $$;
9223 if($serverlogin eq ":") {
9225 # Create workdir if needed. Then cd to it.
9226 my $wd = $self->workdir();
9227 if($opt::workdir eq "." or $opt::workdir eq "...") {
9228 # If $wd does not start with '/': Prepend $HOME
9229 $wd =~ s:^([^/]):$ENV{'HOME'}/$1:;
9231 ::mkdir_or_die($wd);
9233 if($opt::workdir eq "...") {
9234 $post = ";".exitstatuswrapper("rm -rf ".::Q($wd).";");
9237 $command = "cd ".::Q($wd)." || exit 255; " .
9241 # Prepend with environment setter, which sets functions in zsh
9242 my ($csh_friendly,$envset,$bashfuncset) = env_as_eval();
9243 my $perl_code = $envset.$bashfuncset.
9244 '@ARGV="'.::perl_quote_scalar($command).'";'.
9245 "exec\"$Global::shell\",\"-c\",\(\$bashfunc.\"\@ARGV\"\)\;die\"exec:\$\!\\n\"\;";
9246 if(length $perl_code > 999
9251 # csh does not deal well with > 1000 chars in one word
9252 # csh does not deal well with $ENV with \n
9253 $self->{'sshlogin_wrap'}{$command} = base64_wrap($perl_code);
9255 $self->{'sshlogin_wrap'}{$command} = "perl -e ".::Q($perl_code);
9258 $self->{'sshlogin_wrap'}{$command} = $command;
9263 # Create remote workdir if needed. Then cd to it.
9264 my $wd = ::pQ($self->workdir());
9265 $pwd = qq{system("mkdir","-p","--","$wd"); chdir "$wd" ||}.
9266 qq{print(STDERR "parallel: Cannot chdir to $wd\\n") && exit 255;};
9268 my ($csh_friendly,$envset,$bashfuncset) = env_as_eval();
9269 my $remote_command = $pwd.$envset.$bashfuncset.
9270 '@ARGV="'.::perl_quote_scalar($command).'";'.
9271 monitor_parent_sshd_script();
9272 $quoted_remote_command = "perl -e ". ::Q($remote_command);
9273 my $dq_remote_command = ::Q($quoted_remote_command);
9274 if(length $dq_remote_command > 999
9279 # csh does not deal well with > 1000 chars in one word
9280 # csh does not deal well with $ENV with \n
9281 $quoted_remote_command =
9282 "perl -e ". ::Q(::Q(base64_zip_eval()))." ".
9283 join" ",::shell_quote(::shell_quote(string_zip_base64($remote_command)));
9285 $quoted_remote_command = $dq_remote_command;
9288 my $sshcmd = $sshlogin->sshcommand();
9289 my ($pre,$post,$cleanup)=("","","");
9291 $pre .= $self->sshtransfer();
9293 $post .= $self->sshreturn();
9295 $post .= $self->sshcleanup();
9297 # We need to save the exit status of the job
9298 $post = exitstatuswrapper($post);
9300 $self->{'sshlogin_wrap'}{$command} =
9302 . "$sshcmd $serverlogin -- exec "
9303 . $quoted_remote_command
9308 return $self->{'sshlogin_wrap'}{$command};
9313 # Non-quoted and with {...} substituted
9315 # @transfer - File names of files to transfer
9318 my $transfersize = 0;
9319 my @transfer = $self->{'commandline'}->
9320 replace_placeholders($self->{'commandline'}{'transfer_files'},0,0);
9324 $transfersize += (stat($_))[7];
9327 $self->add_transfersize($transfersize);
9331 sub transfersize($) {
9333 return $self->{'transfersize'};
9336 sub add_transfersize($) {
9338 my $transfersize = shift;
9339 $self->{'transfersize'} += $transfersize;
9341 $Global::sql->update("SET Send = ? WHERE Seq = ".$self->seq(),
9342 $self->{'transfersize'});
9345 sub sshtransfer($) {
9346 # Returns for each transfer file:
9347 # rsync $file remote:$workdir
9350 my $sshlogin = $self->sshlogin();
9351 my $workdir = $self->workdir();
9352 for my $file ($self->transfer()) {
9353 push @pre, $sshlogin->rsync_transfer_cmd($file,$workdir).";";
9355 return join("",@pre);
9360 # Non-quoted and with {...} substituted
9362 # @non_quoted_filenames
9364 return $self->{'commandline'}->
9365 replace_placeholders($self->{'commandline'}{'return_files'},0,0);
9369 # This is called after the job has finished
9371 # $number_of_bytes transferred in return
9373 for my $file ($self->return()) {
9375 $self->{'returnsize'} += (stat($file))[7];
9378 return $self->{'returnsize'};
9381 sub add_returnsize($) {
9383 my $returnsize = shift;
9384 $self->{'returnsize'} += $returnsize;
9386 $Global::sql->update("SET Receive = ? WHERE Seq = ".$self->seq(),
9387 $self->{'returnsize'});
9391 # Returns for each return-file:
9392 # rsync remote:$workdir/$file .
9394 my $sshlogin = $self->sshlogin();
9395 my $sshcmd = $sshlogin->sshcommand();
9396 my $serverlogin = $sshlogin->serverlogin();
9397 my $rsync_opts = $ENV{'PARALLEL_RSYNC_OPTS'}. " -e". ::Q($sshcmd);
9399 for my $file ($self->return()) {
9400 $file =~ s:^\./::g; # Remove ./ if any
9401 my $relpath = ($file !~ m:^/:) || ($file =~ m:/\./:); # Is the path relative or /./?
9405 # rsync -avR /foo/./bar/baz.c remote:/tmp/
9406 # == (on old systems)
9407 # rsync -avR --rsync-path="cd /foo; rsync" remote:bar/baz.c /tmp/
9408 $wd = ::shell_quote_file($self->workdir()."/");
9410 # Only load File::Basename if actually needed
9411 $Global::use{"File::Basename"} ||= eval "use File::Basename; 1;";
9412 # dir/./file means relative to dir, so remove dir on remote
9413 $file =~ m:(.*)/\./:;
9414 my $basedir = $1 ? ::shell_quote_file($1."/") : "";
9415 my $nobasedir = $file;
9416 $nobasedir =~ s:.*/\./::;
9417 $cd = ::shell_quote_file(::dirname($nobasedir));
9418 my $rsync_cd = '--rsync-path='.::Q("cd $wd$cd; rsync");
9419 my $basename = ::Q(::shell_quote_file(::basename($file)));
9421 # mkdir -p /home/tange/dir/subdir/;
9422 # rsync (--protocol 30) -rlDzR
9423 # --rsync-path="cd /home/tange/dir/subdir/; rsync"
9424 # server:file.gz /home/tange/dir/subdir/
9425 $pre .= "mkdir -p $basedir$cd && ". $sshlogin->rsync().
9426 " $rsync_cd $rsync_opts $serverlogin:".
9427 $basename . " ".$basedir.$cd.";";
9433 # Return the sshcommand needed to remove the file
9435 # ssh command needed to remove files from sshlogin
9437 my $sshlogin = $self->sshlogin();
9438 my $sshcmd = $sshlogin->sshcommand();
9439 my $serverlogin = $sshlogin->serverlogin();
9440 my $workdir = $self->workdir();
9443 for my $file ($self->remote_cleanup()) {
9444 my @subworkdirs = parentdirs_of($file);
9445 $cleancmd .= $sshlogin->cleanup_cmd($file,$workdir).";";
9447 if(defined $opt::workdir and $opt::workdir eq "...") {
9448 $cleancmd .= "$sshcmd $serverlogin -- rm -rf " . ::Q($workdir).';';
9453 sub remote_cleanup($) {
9455 # Files to remove at cleanup
9458 my @transfer = $self->transfer();
9459 my @return = $self->return();
9460 return (@transfer,@return);
9466 sub exitstatuswrapper(@) {
9468 # @shellcode = shell code to execute
9470 # shell script that returns current status after executing @shellcode
9471 if($Global::cshell) {
9472 return ('set _EXIT_status=$status; ' .
9474 'exit $_EXIT_status;');
9476 return ('_EXIT_status=$?; ' .
9478 'exit $_EXIT_status;');
9484 # the workdir on a remote machine
9486 if(not defined $self->{'workdir'}) {
9488 if(defined $opt::workdir) {
9489 if($opt::workdir eq ".") {
9490 # . means current dir
9491 my $home = $ENV{'HOME'};
9496 # If homedir exists: remove the homedir from
9497 # workdir if cwd starts with homedir
9498 # E.g. /home/foo/my/dir => my/dir
9499 # E.g. /tmp/my/dir => /tmp/my/dir
9500 my ($home_dev, $home_ino) = (stat($home))[0,1];
9502 my @dir_parts = split(m:/:,$cwd);
9504 while(defined ($part = shift @dir_parts)) {
9505 $part eq "" and next;
9506 $parent .= "/".$part;
9507 my ($parent_dev, $parent_ino) = (stat($parent))[0,1];
9508 if($parent_dev == $home_dev and $parent_ino == $home_ino) {
9509 # dev and ino is the same: We found the homedir.
9510 $workdir = join("/",@dir_parts);
9515 if($workdir eq "") {
9518 } elsif($opt::workdir eq "...") {
9519 $workdir = ".parallel/tmp/" . ::hostname() . "-" . $$
9520 . "-" . $self->seq();
9522 $workdir = $self->{'commandline'}->
9523 replace_placeholders([$opt::workdir],0,0);
9524 #$workdir = $opt::workdir;
9525 # Rsync treats /./ special. We dont want that
9526 $workdir =~ s:/\./:/:g; # Remove /./
9527 $workdir =~ s:(.)/+$:$1:; # Remove ending / if any
9528 $workdir =~ s:^\./::g; # Remove starting ./ if any
9533 $self->{'workdir'} = $workdir;
9535 return $self->{'workdir'};
9538 sub parentdirs_of($) {
9540 # all parentdirs except . of this dir or file - sorted desc by length
9543 while($d =~ s:/[^/]+$::) {
9552 # Setup STDOUT and STDERR for a job and start it.
9554 # job-object or undef if job not to run
9556 sub open3_setpgrp_internal {
9557 # Run open3+setpgrp followed by the command
9559 # $stdin_fh = Filehandle to use as STDIN
9560 # $stdout_fh = Filehandle to use as STDOUT
9561 # $stderr_fh = Filehandle to use as STDERR
9562 # $command = Command to run
9564 # $pid = Process group of job started
9565 my ($stdin_fh,$stdout_fh,$stderr_fh,$command) = @_;
9568 open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!");
9569 open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!");
9570 # The eval is needed to catch exception from open3
9572 if(not $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", "-")) {
9573 # Each child gets its own process group to make it safe to killall
9574 eval{ setpgrp(0,0) };
9575 eval{ setpriority(0,0,$opt::nice) };
9576 exec($Global::shell,"-c",$command)
9577 || ::die_bug("open3-$stdin_fh $command");
9583 sub open3_setpgrp_external {
9584 # Run open3 on $command wrapped with a perl script doing setpgrp
9585 # Works on systems that do not support open3(,,,"-")
9587 # $stdin_fh = Filehandle to use as STDIN
9588 # $stdout_fh = Filehandle to use as STDOUT
9589 # $stderr_fh = Filehandle to use as STDERR
9590 # $command = Command to run
9592 # $pid = Process group of job started
9593 my ($stdin_fh,$stdout_fh,$stderr_fh,$command) = @_;
9595 open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!");
9596 open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!");
9601 "eval\{setpgrp\}\;eval\{setpriority\(0,0,$opt::nice\)\}\;".
9602 "exec '$Global::shell', '-c', \@ARGV");
9603 # The eval is needed to catch exception from open3
9605 $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", @setpgrp_wrap, $command)
9606 || ::die_bug("open3-$stdin_fh");
9612 sub redefine_open3_setpgrp {
9613 my $setgprp_cache = shift;
9614 # Select and run open3_setpgrp_internal/open3_setpgrp_external
9615 no warnings 'redefine';
9616 my ($outfh,$name) = ::tmpfile(SUFFIX => ".tst");
9617 # Test to see if open3(x,x,x,"-") is fully supported
9618 # Can an exported bash function be called via open3?
9619 my $script = 'if($pid=::open3($i,$o,$e,"-")) { wait; } '.
9620 'else { exec("bash","-c","testfun && true"); }';
9622 ::shell_quote_scalar_default(
9623 "testfun() { rm $name; }; export -f testfun; ".
9624 "perl -MIPC::Open3 -e ".
9625 ::shell_quote_scalar_default($script)
9628 # Redirect STDERR temporarily,
9629 # so errors on MacOS X are ignored.
9630 open my $saveerr, ">&STDERR";
9631 open STDERR, '>', "/dev/null";
9633 ::debug("init",qq{bash -c $bash 2>/dev/null});
9634 qx{ bash -c $bash 2>/dev/null };
9635 open STDERR, ">&", $saveerr;
9638 # Does not support open3(x,x,x,"-")
9639 # or does not have bash:
9640 # Use (slow) external version
9642 $redefine_eval = '*open3_setpgrp = \&open3_setpgrp_external';
9643 ::debug("init","open3_setpgrp_external chosen\n");
9645 # Supports open3(x,x,x,"-")
9646 # This is 0.5 ms faster to run
9647 $redefine_eval = '*open3_setpgrp = \&open3_setpgrp_internal';
9648 ::debug("init","open3_setpgrp_internal chosen\n");
9650 if(open(my $fh, ">", $setgprp_cache)) {
9651 print $fh $redefine_eval;
9654 ::debug("init","Cannot write to $setgprp_cache");
9656 eval $redefine_eval;
9660 my $setgprp_cache = $Global::cache_dir . "/tmp/sshlogin/" .
9661 ::hostname() . "/setpgrp_func";
9663 -e $setgprp_cache || return 0;
9665 open(my $fh, "<", $setgprp_cache) || return 0;
9666 eval <$fh> || return 0;
9670 if(not read_cache()) {
9671 redefine_open3_setpgrp($setgprp_cache);
9673 # The sub is now redefined. Call it
9674 return open3_setpgrp(@_);
9678 if($job->suspended()) {
9679 # Job is kill -STOP'ped: Restart it.
9683 # Get the shell command to be executed (possibly with ssh infront).
9684 my $command = $job->wrapped();
9687 if($Global::interactive or $Global::stderr_verbose) {
9688 $job->interactive_start();
9690 # Must be run after $job->interactive_start():
9691 # $job->interactive_start() may call $job->skip()
9692 if($job->{'commandline'}{'skip'}) {
9693 # $job->skip() was called
9696 $job->openoutputfiles();
9697 $job->print_verbose_dryrun();
9698 my($stdout_fh,$stderr_fh) = ($job->fh(1,"w"),$job->fh(2,"w"));
9699 if($opt::dryrun or $opt::sqlmaster) { $command = "true"; }
9700 $ENV{'PARALLEL_SEQ'} = $job->seq();
9701 $ENV{'PARALLEL_PID'} = $$;
9702 $ENV{'PARALLEL_JOBSLOT'} = $job->slot();
9703 $ENV{'PARALLEL_TMP'} = ::tmpname("par");
9704 $job->add_rm($ENV{'PARALLEL_TMP'});
9705 ::debug("run", $Global::total_running, " processes . Starting (",
9706 $job->seq(), "): $command\n");
9708 my ($stdin_fh) = ::gensym();
9709 $pid = open3_setpgrp($stdin_fh,$stdout_fh,$stderr_fh,$command);
9710 if($opt::roundrobin and not $opt::keeporder) {
9711 # --keep-order will make sure the order will be reproducible
9712 ::set_fh_non_blocking($stdin_fh);
9714 $job->set_fh(0,"w",$stdin_fh);
9715 if($opt::tee or $opt::shard or $opt::bin) { $job->set_virgin(0); }
9716 } elsif ($opt::tty and -c "/dev/tty" and
9717 open(my $devtty_fh, "<", "/dev/tty")) {
9718 # Give /dev/tty to the command if no one else is using it
9719 # The eval is needed to catch exception from open3
9720 local (*IN,*OUT,*ERR);
9721 open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!");
9722 open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!");
9724 # The eval is needed to catch exception from open3
9725 my @wrap = ('perl','-e',
9726 "eval\{setpriority\(0,0,$opt::nice\)\}\;".
9727 "exec '$Global::shell', '-c', \@ARGV");
9729 $pid = ::open3("<&IN", ">&OUT", ">&ERR", @wrap, $command)
9730 || ::die_bug("open3-/dev/tty");
9734 $job->set_virgin(0);
9736 $pid = open3_setpgrp(::gensym(),$stdout_fh,$stderr_fh,$command);
9737 $job->set_virgin(0);
9741 $Global::total_running++;
9742 $Global::total_started++;
9743 $job->set_pid($pid);
9744 $job->set_starttime();
9745 $Global::running{$job->pid()} = $job;
9747 $Global::timeoutq->insert($job);
9749 $Global::newest_job = $job;
9750 $Global::newest_starttime = ::now();
9754 ::debug("run", "Cannot spawn more jobs.\n");
9759 sub interactive_start($) {
9761 my $command = $self->wrapped();
9762 if($Global::interactive) {
9764 ::status_no_nl("$command ?...");
9766 open(my $tty_fh, "<", "/dev/tty") || ::die_bug("interactive-tty");
9767 $answer = <$tty_fh>;
9769 # Sometime we get an empty string (not even \n)
9770 # Do not know why, so let us just ignore it and try again
9771 } while(length $answer < 1);
9772 if (not ($answer =~ /^\s*y/i)) {
9773 $self->{'commandline'}->skip();
9776 print $Global::original_stderr "$command\n";
9784 # Wrap command with tmux for session pPID
9786 # $actual_command = the actual command being run (incl ssh wrap)
9788 my $actual_command = shift;
9789 # Temporary file name. Used for fifo to communicate exit val
9790 my $tmpfifo = ::tmpname("tmx");
9791 $self->add_rm($tmpfifo);
9793 if(length($tmpfifo) >=100) {
9794 ::error("tmux does not support sockets with path > 100.");
9795 ::wait_and_exit(255);
9797 if($opt::tmuxpane) {
9798 # Move the command into a pane in window 0
9799 $actual_command = $ENV{'PARALLEL_TMUX'}.' joinp -t :0 ; '.
9800 $ENV{'PARALLEL_TMUX'}.' select-layout -t :0 tiled ; '.
9803 my $visual_command = $self->replaced();
9804 my $title = $visual_command;
9805 if($visual_command =~ /\0/) {
9806 ::error("Command line contains NUL. tmux is confused by NUL.");
9807 ::wait_and_exit(255);
9810 # ascii 194-245 annoys tmux
9811 $title =~ tr/[\011-\016;\302-\365]/ /s;
9812 $title = ::Q($title);
9814 my $l_act = length($actual_command);
9815 my $l_tit = length($title);
9816 my $l_fifo = length($tmpfifo);
9817 # The line to run contains a 118 chars extra code + the title 2x
9818 my $l_tot = 2 * $l_tit + $l_act + $l_fifo;
9820 my $quoted_space75 = ::Q(" ")x75;
9821 while($l_tit < 1000 and
9823 (890 < $l_tot and $l_tot < 1350)
9825 (9250 < $l_tot and $l_tot < 9800)
9827 # tmux blocks for certain lengths:
9828 # 900 < title + command < 1200
9829 # 9250 < title + command < 9800
9830 # but only if title < 1000, so expand the title with 75 spaces
9831 # The measured lengths are:
9832 # 996 < (title + whole command) < 1127
9833 # 9331 < (title + whole command) < 9636
9834 $title .= $quoted_space75;
9835 $l_tit = length($title);
9836 $l_tot = 2 * $l_tit + $l_act + $l_fifo;
9840 $ENV{'PARALLEL_TMUX'} ||= "tmux";
9841 if(not $tmuxsocket) {
9842 $tmuxsocket = ::tmpname("tms");
9845 # Run tmux in the foreground
9846 # Wait for the socket to appear
9847 while (not -e $tmuxsocket) { }
9848 `$ENV{'PARALLEL_TMUX'} -S $tmuxsocket attach`;
9852 ::status("See output with: $ENV{'PARALLEL_TMUX'} -S $tmuxsocket attach");
9855 $ENV{'PARALLEL_TMUX'}." -S $tmuxsocket new-session -s p$$ -d \"sleep .2\" >/dev/null 2>&1';" .
9856 $ENV{'PARALLEL_TMUX'}." -S $tmuxsocket new-window -t p$$ -n $title";
9858 ::debug("tmux", "title len:", $l_tit, " act ", $l_act, " max ",
9859 $Limits::Command::line_max_len, " tot ",
9862 return "mkfifo $tmpfifo && $tmux ".
9866 "(".$actual_command.');'.
9867 # The triple print is needed - otherwise the testsuite fails
9868 q[ perl -e 'while($t++<3){ print $ARGV[0],"\n" }' $?h/$status >> ].$tmpfifo."&".
9869 "echo $title; echo \007Job finished at: `date`;sleep 10"
9872 # Read a / separated line: 0h/2 for csh, 2/0 for bash.
9873 # If csh the first will be 0h, so use the second as exit value.
9874 # Otherwise just use the first value as exit value.
9875 q{; exec perl -e '$/="/";$_=<>;$c=<>;unlink $ARGV; /(\d+)h/ and exit($1);exit$c' }.$tmpfifo;
9879 sub is_already_in_results($) {
9880 # Do we already have results for this job?
9882 # $job_already_run = bool whether there is output for this or not
9884 my $out = $job->{'commandline'}->results_out();
9885 ::debug("run", "Test ${out}stdout", -e "${out}stdout", "\n");
9886 return(-e $out."stdout" or -f $out);
9889 sub is_already_in_joblog($) {
9891 return vec($Global::job_already_run,$job->seq(),1);
9894 sub set_job_in_joblog($) {
9896 vec($Global::job_already_run,$job->seq(),1) = 1;
9899 sub should_be_retried($) {
9900 # Should this job be retried?
9903 # 1 - job queued for retry
9905 if (not $opt::retries) {
9908 if(not $self->exitstatus() and not $self->exitsignal()) {
9909 # Completed with success. If there is a recorded failure: forget it
9910 $self->reset_failed_here();
9913 # The job failed. Should it be retried?
9914 $self->add_failed_here();
9915 my $retries = $self->{'commandline'}->
9916 replace_placeholders([$opt::retries],0,0);
9917 if($self->total_failed() == $retries) {
9918 # This has been retried enough
9921 # This command should be retried
9922 $self->set_endtime(undef);
9923 $self->reset_exitstatus();
9924 $Global::JobQueue->unget($self);
9925 ::debug("run", "Retry ", $self->seq(), "\n");
9932 my (%print_later,$job_seq_to_print);
9934 sub print_earlier_jobs($) {
9935 # Print jobs whose output is postponed due to --keep-order
9938 $print_later{$job->seq()} = $job;
9939 $job_seq_to_print ||= 1;
9941 ::debug("run", "Looking for: $job_seq_to_print ",
9942 "This: ", $job->seq(), "\n");
9943 for(;vec($Global::job_already_run,$job_seq_to_print,1);
9944 $job_seq_to_print++) {}
9945 while(my $j = $print_later{$job_seq_to_print}) {
9946 $returnsize += $j->print();
9948 # Job finished - look at the next
9949 delete $print_later{$job_seq_to_print};
9950 $job_seq_to_print++;
9953 # Job not finished yet - look at it again next round
9962 # Print the output of the jobs
9966 ::debug("print", ">>joboutput ", $self->replaced(), "\n");
9968 # Nothing was printed to this job:
9969 # cleanup tmp files if --files was set
9970 ::rm($self->fh(1,"name"));
9972 if($opt::pipe and $self->virgin() and not $opt::tee) {
9973 # Skip --joblog, --dryrun, --verbose
9976 # NULL returnsize = 0 returnsize
9977 $self->returnsize() or $self->add_returnsize(0);
9978 if($Global::joblog and defined $self->{'exitstatus'}) {
9979 # Add to joblog when finished
9980 $self->print_joblog();
9981 # Printing is only relevant for grouped/--line-buffer output.
9982 $opt::ungroup and return;
9985 # Check for disk full
9986 ::exit_if_disk_full();
9989 my $returnsize = $self->returnsize();
9990 for my $fdno (sort { $a <=> $b } keys %Global::fd) {
9991 # Sort by file descriptor numerically: 1,2,3,..,9,10,11
9992 $fdno == 0 and next;
9993 my $out_fd = $Global::fd{$fdno};
9994 my $in_fh = $self->fh($fdno,"r");
9996 if(not $Job::file_descriptor_warning_printed{$fdno}++) {
9997 # ::warning("File descriptor $fdno not defined\n");
10001 ::debug("print", "File descriptor $fdno (", $self->fh($fdno,"name"), "):\n");
10002 if($opt::linebuffer) {
10003 # Line buffered print out
10004 $self->print_linebuffer($fdno,$in_fh,$out_fd);
10005 } elsif($opt::files) {
10006 $self->print_files($fdno,$in_fh,$out_fd);
10007 } elsif($opt::tag or defined $opt::tagstring) {
10008 $self->print_tag($fdno,$in_fh,$out_fd);
10010 $self->print_normal($fdno,$in_fh,$out_fd);
10014 ::debug("print", "<<joboutput\n");
10015 if(defined $self->{'exitstatus'}
10016 and not ($self->virgin() and $opt::pipe)) {
10017 if($Global::joblog and not $opt::sqlworker) {
10018 # Add to joblog when finished
10019 $self->print_joblog();
10021 if($opt::sqlworker and not $opt::results) {
10022 $Global::sql->output($self);
10024 if($Global::csvsep) {
10025 # Add output to CSV when finished
10026 $self->print_csv();
10029 return $returnsize - $self->returnsize();
10033 my $header_printed;
10038 if($Global::verbose <= 1) {
10039 $cmd = $self->replaced();
10041 # Verbose level > 1: Print the rsync and stuff
10042 $cmd = join " ", @{$self->{'commandline'}};
10044 my $record_ref = $self->{'commandline'}{'arg_list_flat_orig'};
10046 if(not $header_printed) {
10049 # --header : => first value from column
10053 @V = (map { $Global::input_source_header{$i++} }
10054 @$record_ref[1..$#$record_ref]);
10057 @V = (map { $V++ } @$record_ref[1..$#$record_ref]);
10059 print $Global::csv_fh
10061 combine_ref("Seq", "Host", "Starttime", "JobRuntime",
10062 "Send", "Receive", "Exitval", "Signal", "Command",
10068 # Memory optimization: Overwrite with the joined output
10069 $self->{'output'}{1} = join("", @{$self->{'output'}{1}});
10070 $self->{'output'}{2} = join("", @{$self->{'output'}{2}});
10071 print $Global::csv_fh
10075 $self->sshlogin()->string(),
10076 $self->starttime(), sprintf("%0.3f",$self->runtime()),
10077 $self->transfersize(), $self->returnsize(),
10078 $self->exitstatus(), $self->exitsignal(), \$cmd,
10079 \@$record_ref[1..$#$record_ref],
10080 \$self->{'output'}{1},
10081 \$self->{'output'}{2})),"\n";
10085 sub combine_ref($) {
10086 # Inspired by Text::CSV_PP::_combine (by Makamaka Hannyaharamitu)
10088 my $sep = $Global::csvsep;
10092 my $must_be_quoted;
10093 for my $column (@part) {
10094 # Memory optimization: Content transferred as reference
10095 if(ref $column ne "SCALAR") {
10096 # Convert all columns to scalar references
10100 if(not defined $$column) {
10105 $must_be_quoted = 0;
10107 if($$column =~ s/$quot/$quot$quot/go){
10109 $must_be_quoted ||=1;
10111 if($$column =~ /[\s\Q$sep\E]/o){
10112 # Put quotes around if the column contains ,
10113 $must_be_quoted ||=1;
10116 $Global::use{"bytes"} ||= eval "use bytes; 1;";
10117 if ($$column =~ /\0/) {
10118 # Contains \0 => put quotes around
10119 $must_be_quoted ||=1;
10121 if($must_be_quoted){
10122 push @out, \$sep, \$quot, $column, \$quot;
10124 push @out, \$sep, $column;
10132 sub print_files($) {
10133 # Print the name of the file containing stdout on stdout
10136 # $opt::group = Print when job is done
10137 # $opt::linebuffer = Print ASAP
10140 my ($fdno,$in_fh,$out_fd) = @_;
10142 # If the job is dead: close printing fh. Needed for --compress
10143 close $self->fh($fdno,"w");
10144 if($? and $opt::compress) {
10145 ::error($opt::compress_program." failed.");
10146 $self->set_exitstatus(255);
10148 if($opt::compress) {
10149 # Kill the decompressor which will not be needed
10150 CORE::kill "TERM", $self->fh($fdno,"rpid");
10154 if($opt::pipe and $self->virgin()) {
10155 # Nothing was printed to this job:
10156 # cleanup unused tmp files because --files was set
10157 for my $fdno (1,2) {
10158 ::rm($self->fh($fdno,"name"));
10159 ::rm($self->fh($fdno,"unlink"));
10161 } elsif($fdno == 1 and $self->fh($fdno,"name")) {
10162 print $out_fd $self->tag(),$self->fh($fdno,"name"),"\n";
10163 if($Global::membuffer) {
10164 push @{$self->{'output'}{$fdno}},
10165 $self->tag(), $self->fh($fdno,"name");
10167 $self->add_returnsize(-s $self->fh($fdno,"name"));
10168 # Mark as printed - do not print again
10169 $self->set_fh($fdno,"name",undef);
10173 sub print_linebuffer($) {
10175 my ($fdno,$in_fh,$out_fd) = @_;
10176 if(defined $self->{'exitstatus'}) {
10177 # If the job is dead: close printing fh. Needed for --compress
10178 close $self->fh($fdno,"w");
10179 if($? and $opt::compress) {
10180 ::error($opt::compress_program." failed.");
10181 $self->set_exitstatus(255);
10183 if($opt::compress) {
10184 # Blocked reading in final round
10185 for my $fdno (1,2) {
10186 ::set_fh_blocking($self->fh($fdno,'r'));
10190 if(not $self->virgin()) {
10191 if($opt::files or ($opt::results and not $Global::csvsep)) {
10193 if($fdno == 1 and not $self->fh($fdno,"printed")) {
10194 print $out_fd $self->tag(),$self->fh($fdno,"name"),"\n";
10195 if($Global::membuffer) {
10196 push(@{$self->{'output'}{$fdno}}, $self->tag(),
10197 $self->fh($fdno,"name"));
10199 $self->set_fh($fdno,"printed",1);
10201 # No need for reading $in_fh, as it is from "cat >/dev/null"
10203 # Read halflines and print full lines
10204 my $outputlength = 0;
10205 my $halfline_ref = $self->{'halfline'}{$fdno};
10207 # 1310720 gives 1.2 GB/s
10208 # 131072 gives 0.9 GB/s
10209 while($rv = sysread($in_fh, $buf,1310720)) {
10210 $outputlength += $rv;
10212 # Treat both \n and \r as line end
10213 $i = ::max((rindex($buf,"\n")+1), (rindex($buf,"\r")+1));
10215 # One or more complete lines were found
10216 if($opt::tag or defined $opt::tagstring) {
10217 # Replace ^ with $tag within the full line
10218 if($Global::cache_replacement_eval) {
10219 # Replace with the same value for tag
10220 my $tag = $self->tag();
10221 unshift @$halfline_ref, $tag;
10222 # TODO --recend that can be partially in @$halfline_ref
10223 substr($buf,0,$i-1) =~ s/(?<=[\n\r])(?=.|$)/$tag/gs;
10224 # The length changed, so find the new ending pos
10225 $i = ::max((rindex($buf,"\n")+1), (rindex($buf,"\r")+1));
10227 # Replace with freshly computed value of tag
10228 unshift @$halfline_ref, $self->tag();
10229 substr($buf,0,$i-1) =~ s/(?<=[\n\r])(?=.|$)/$self->tag()/gse;
10230 # The length changed, so find the new ending pos
10231 $i = ::max((rindex($buf,"\n")+1), (rindex($buf,"\r")+1));
10234 # Print the partial line (halfline) and the last half
10235 print $out_fd @$halfline_ref, substr($buf,0,$i);
10236 # Buffer in memory for SQL and CSV-output
10237 if($Global::membuffer) {
10238 push(@{$self->{'output'}{$fdno}},
10239 @$halfline_ref, substr($buf,0,$i));
10241 # Remove the printed part by keeping the unprinted part
10242 @$halfline_ref = (substr($buf,$i));
10244 # No newline, so append to the halfline
10245 push @$halfline_ref, $buf;
10248 $self->add_returnsize($outputlength);
10250 if(defined $self->{'exitstatus'}) {
10251 if($opt::files or ($opt::results and not $Global::csvsep)) {
10252 $self->add_returnsize(-s $self->fh($fdno,"name"));
10254 # If the job is dead: print the remaining partial line
10256 my $halfline_ref = $self->{'halfline'}{$fdno};
10257 if(grep /./, @$halfline_ref) {
10258 my $returnsize = 0;
10259 for(@{$self->{'halfline'}{$fdno}}) {
10260 $returnsize += length $_;
10262 $self->add_returnsize($returnsize);
10263 if($opt::tag or defined $opt::tagstring) {
10264 # Prepend $tag the the remaining half line
10265 unshift @$halfline_ref, $self->tag();
10267 # Print the partial line (halfline)
10268 print $out_fd @{$self->{'halfline'}{$fdno}};
10269 # Buffer in memory for SQL and CSV-output
10270 if($Global::membuffer) {
10271 push(@{$self->{'output'}{$fdno}}, @$halfline_ref);
10273 @$halfline_ref = ();
10276 if($self->fh($fdno,"rpid") and
10277 CORE::kill 0, $self->fh($fdno,"rpid")) {
10278 # decompress still running
10280 # decompress done: close fh
10282 if($? and $opt::compress) {
10283 ::error($opt::decompress_program." failed.");
10284 $self->set_exitstatus(255);
10292 return print_normal(@_);
10295 sub free_ressources() {
10297 if(not $opt::ungroup) {
10299 for my $fdno (sort { $a <=> $b } keys %Global::fd) {
10300 $fh = $self->fh($fdno,"w");
10302 $fh = $self->fh($fdno,"r");
10308 sub print_normal($) {
10310 my ($fdno,$in_fh,$out_fd) = @_;
10312 close $self->fh($fdno,"w");
10313 if($? and $opt::compress) {
10314 ::error($opt::compress_program." failed.");
10315 $self->set_exitstatus(255);
10317 if(not $self->virgin()) {
10319 # $in_fh is now ready for reading at position 0
10320 my $outputlength = 0;
10323 if($opt::tag or $opt::tagstring) {
10324 # Read line by line
10326 my $tag = $self->tag();
10328 $outputlength += length $_;
10329 # Tag lines with \r, too
10330 $_ =~ s/(?<=[\r])(?=.|$)/$tag/gs;
10331 print $out_fd $tag,$_;
10332 if($Global::membuffer) {
10333 push @{$self->{'output'}{$fdno}}, $tag, $_;
10337 while(sysread($in_fh,$buf,131072)) {
10338 print $out_fd $buf;
10339 $outputlength += length $buf;
10340 if($Global::membuffer) {
10341 push @{$self->{'output'}{$fdno}}, $buf;
10346 $self->add_returnsize($outputlength);
10349 if($? and $opt::compress) {
10350 ::error($opt::decompress_program." failed.");
10351 $self->set_exitstatus(255);
10356 sub print_joblog($) {
10359 if($Global::verbose <= 1) {
10360 $cmd = $self->replaced();
10362 # Verbose level > 1: Print the rsync and stuff
10363 $cmd = $self->wrapped();
10365 # Newlines make it hard to parse the joblog
10367 print $Global::joblog
10368 join("\t", $self->seq(), $self->sshlogin()->string(),
10369 $self->starttime(), sprintf("%10.3f",$self->runtime()),
10370 $self->transfersize(), $self->returnsize(),
10371 $self->exitstatus(), $self->exitsignal(), $cmd
10373 flush $Global::joblog;
10374 $self->set_job_in_joblog();
10379 if(not defined $self->{'tag'} or not $Global::cache_replacement_eval) {
10380 if($opt::tag or defined $opt::tagstring) {
10381 $self->{'tag'} = $self->{'commandline'}->
10382 replace_placeholders([$opt::tagstring],0,0)."\t";
10384 $self->{'tag'} = "";
10387 return $self->{'tag'};
10390 sub hostgroups($) {
10392 if(not defined $self->{'hostgroups'}) {
10393 $self->{'hostgroups'} =
10394 $self->{'commandline'}->{'arg_list'}[0][0]->{'hostgroups'};
10396 return @{$self->{'hostgroups'}};
10399 sub exitstatus($) {
10401 return $self->{'exitstatus'};
10404 sub set_exitstatus($$) {
10406 my $exitstatus = shift;
10408 # Overwrite status if non-zero
10409 $self->{'exitstatus'} = $exitstatus;
10411 # Set status but do not overwrite
10412 # Status may have been set by --timeout
10413 $self->{'exitstatus'} ||= $exitstatus;
10415 $opt::sqlworker and
10416 $Global::sql->update("SET Exitval = ? WHERE Seq = ".$self->seq(),
10420 sub reset_exitstatus($) {
10422 undef $self->{'exitstatus'};
10425 sub exitsignal($) {
10427 return $self->{'exitsignal'};
10430 sub set_exitsignal($$) {
10432 my $exitsignal = shift;
10433 $self->{'exitsignal'} = $exitsignal;
10434 $opt::sqlworker and
10435 $Global::sql->update("SET _Signal = ? WHERE Seq = ".$self->seq(),
10440 my $status_printed;
10443 sub should_we_halt {
10444 # Should we halt? Immediately? Gracefully?
10448 if($job->exitstatus() or $job->exitsignal()) {
10450 $Global::exitstatus++;
10451 $Global::total_failed++;
10452 if($Global::halt_fail) {
10453 ::status("$Global::progname: This job failed:",
10455 $limit = $Global::total_failed;
10457 } elsif($Global::halt_success) {
10458 ::status("$Global::progname: This job succeeded:",
10460 $limit = $Global::total_completed - $Global::total_failed;
10462 if($Global::halt_done) {
10463 ::status("$Global::progname: This job finished:",
10465 $limit = $Global::total_completed;
10467 if(not defined $limit) {
10470 # --halt # => 1..100 (number of jobs failed, 101 means > 100)
10471 # --halt % => 1..100 (pct of jobs failed)
10472 if($Global::halt_pct and not $Global::halt_count) {
10473 $total_jobs ||= $Global::JobQueue->total_jobs();
10474 # From the pct compute the number of jobs that must fail/succeed
10475 $Global::halt_count = $total_jobs * $Global::halt_pct;
10477 if($limit >= $Global::halt_count) {
10478 # At least N jobs have failed/succeded/completed
10479 # or at least N% have failed/succeded/completed
10480 # So we should prepare for exit
10481 if($Global::halt_fail or $Global::halt_done) {
10483 if(not defined $Global::halt_exitstatus) {
10484 if($Global::halt_pct) {
10485 # --halt now,fail=X% or soon,fail=X%
10486 # --halt now,done=X% or soon,done=X%
10487 $Global::halt_exitstatus =
10488 ::ceil($Global::total_failed / $total_jobs * 100);
10489 } elsif($Global::halt_count) {
10490 # --halt now,fail=X or soon,fail=X
10491 # --halt now,done=X or soon,done=X
10492 $Global::halt_exitstatus =
10493 ::min($Global::total_failed,101);
10495 if($Global::halt_count and $Global::halt_count == 1) {
10496 # --halt now,fail=1 or soon,fail=1
10497 # --halt now,done=1 or soon,done=1
10498 # Emulate Bash's +128 if there is a signal
10499 $Global::halt_exitstatus =
10500 ($job->exitstatus()
10502 $job->exitsignal() ? $job->exitsignal() + 128 : 0);
10505 ::debug("halt","Pct: ",$Global::halt_pct,
10506 " count: ",$Global::halt_count,
10507 " status: ",$Global::halt_exitstatus,"\n");
10508 } elsif($Global::halt_success) {
10509 $Global::halt_exitstatus = 0;
10511 if($Global::halt_when eq "soon"
10513 (scalar(keys %Global::running) > 0
10515 $Global::max_jobs_running == 1)) {
10517 ("$Global::progname: Starting no more jobs. ".
10518 "Waiting for ". (keys %Global::running).
10519 " jobs to finish.");
10520 $Global::start_no_new_jobs ||= 1;
10522 return($Global::halt_when);
10529 package CommandLine;
10534 my $commandref = shift;
10535 $commandref || die;
10536 my $arg_queue = shift;
10537 my $context_replace = shift;
10538 my $max_number_of_args = shift; # for -N and normal (-n1)
10539 my $transfer_files = shift;
10540 my $return_files = shift;
10541 my $replacecount_ref = shift;
10542 my $len_ref = shift;
10543 my %replacecount = %$replacecount_ref;
10544 my %len = %$len_ref;
10545 for (keys %$replacecount_ref) {
10546 # Total length of this replacement string {} replaced with all args
10550 'command' => $commandref,
10554 'arg_list_flat' => [],
10555 'arg_list_flat_orig' => [undef],
10556 'arg_queue' => $arg_queue,
10557 'max_number_of_args' => $max_number_of_args,
10558 'replacecount' => \%replacecount,
10559 'context_replace' => $context_replace,
10560 'transfer_files' => $transfer_files,
10561 'return_files' => $return_files,
10562 'replaced' => undef,
10563 }, ref($class) || $class;
10568 return $self->{'seq'};
10573 $self->{'seq'} = shift;
10577 # Find the number of a free job slot and return it
10579 # @Global::slots - list with free jobslots
10581 # $jobslot = number of jobslot
10583 if(not $self->{'slot'}) {
10584 if(not @Global::slots) {
10585 # $max_slot_number will typically be $Global::max_jobs_running
10586 push @Global::slots, ++$Global::max_slot_number;
10588 $self->{'slot'} = shift @Global::slots;
10590 return $self->{'slot'};
10594 my $already_spread;
10595 my $darwin_max_len;
10598 # Add arguments from arg_queue until the number of arguments or
10599 # max line length is reached
10601 # $Global::minimal_command_line_length
10604 # $Global::JobQueue
10607 # $Global::max_jobs_running
10611 my $max_len = $Global::minimal_command_line_length
10612 || Limits::Command::max_length();
10613 if($^O eq "darwin") {
10614 # Darwin's limit is affected by:
10615 # * number of environment names (variables+functions)
10616 # * size of environment
10617 # * the length of arguments:
10618 # a one-char argument lowers the limit by 5
10619 # To be safe assume all arguments are one-char
10620 # The max_len is cached between runs, but if the size of
10621 # the environment is different we need to recompute the
10622 # usable max length for this run of GNU Parallel
10623 # See https://unix.stackexchange.com/a/604943/2972
10624 if(not $darwin_max_len) {
10625 my $envc = (keys %ENV);
10626 my $envn = length join"",(keys %ENV);
10627 my $envv = length join"",(values %ENV);
10628 $darwin_max_len = -146+($max_len - $envn - $envv) - $envc*10;
10630 "length: $darwin_max_len ".
10631 "3+($max_len - $envn - $envv)/5 - $envc*2");
10633 $max_len = $darwin_max_len;
10635 if($opt::cat or $opt::fifo) {
10636 # Get the empty arg added by --pipepart (if any)
10637 $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get();
10638 # $PARALLEL_TMP will point to a tempfile that will be used as {}
10639 $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->
10640 unget([Arg->new('$PARALLEL_TMP')]);
10642 while (not $self->{'arg_queue'}->empty()) {
10643 $next_arg = $self->{'arg_queue'}->get();
10644 if(not defined $next_arg) {
10647 $self->push($next_arg);
10648 if($self->len() >= $max_len) {
10649 # Command length is now > max_length
10650 # If there are arguments: remove the last
10651 # If there are no arguments: Error
10652 # TODO stuff about -x opt_x
10653 if($self->number_of_args() > 1) {
10654 # There is something to work on
10655 $self->{'arg_queue'}->unget($self->pop());
10658 my $args = join(" ", map { $_->orig() } @$next_arg);
10659 ::error("Command line too long (".
10660 $self->len(). " >= ".
10663 $self->{'arg_queue'}->arg_number().
10665 ((length $args > 50) ?
10666 (substr($args,0,50))."..." :
10668 $self->{'arg_queue'}->unget($self->pop());
10669 ::wait_and_exit(255);
10673 if(defined $self->{'max_number_of_args'}) {
10674 if($self->number_of_args() >= $self->{'max_number_of_args'}) {
10679 if(($opt::m or $opt::X) and not $already_spread
10680 and $self->{'arg_queue'}->empty() and $Global::max_jobs_running) {
10681 # -m or -X and EOF => Spread the arguments over all jobslots
10682 # (unless they are already spread)
10683 $already_spread ||= 1;
10684 if($self->number_of_args() > 1) {
10685 $self->{'max_number_of_args'} =
10686 ::ceil($self->number_of_args()/$Global::max_jobs_running);
10687 $Global::JobQueue->{'commandlinequeue'}->{'max_number_of_args'} =
10688 $self->{'max_number_of_args'};
10689 $self->{'arg_queue'}->unget($self->pop_all());
10690 while($self->number_of_args() < $self->{'max_number_of_args'}) {
10691 $self->push($self->{'arg_queue'}->get());
10694 $Global::JobQueue->flush_total_jobs();
10697 if($opt::sqlmaster) {
10698 # Insert the V1..Vn for this $seq in SQL table instead of generating one
10699 $Global::sql->insert_records($self->seq(), $self->{'command'},
10700 $self->{'arg_list_flat_orig'});
10706 # Add one or more records as arguments
10709 my $record = shift;
10710 push @{$self->{'arg_list_flat_orig'}}, map { $_->orig() } @$record;
10711 push @{$self->{'arg_list_flat'}}, @$record;
10712 push @{$self->{'arg_list'}}, $record;
10713 # Make @arg available for {= =}
10714 *Arg::arg = $self->{'arg_list_flat_orig'};
10716 my $quote_arg = ($Global::quote_replace and not $Global::quoting);
10717 for my $perlexpr (keys %{$self->{'replacecount'}}) {
10718 if($perlexpr =~ /^(\d+) /) {
10720 defined($record->[$1-1]) or next;
10721 $self->{'len'}{$perlexpr} +=
10722 length $record->[$1-1]->replace($perlexpr,$quote_arg,$self);
10724 for my $arg (@$record) {
10726 $self->{'len'}{$perlexpr} +=
10727 length $arg->replace($perlexpr,$quote_arg,$self);
10735 # Remove last argument
10739 my $record = pop @{$self->{'arg_list'}};
10740 # pop off arguments from @$record
10741 splice @{$self->{'arg_list_flat_orig'}}, -($#$record+1), $#$record+1;
10742 splice @{$self->{'arg_list_flat'}}, -($#$record+1), $#$record+1;
10743 my $quote_arg = ($Global::quote_replace and not $Global::quoting);
10744 for my $perlexpr (keys %{$self->{'replacecount'}}) {
10745 if($perlexpr =~ /^(\d+) /) {
10747 defined($record->[$1-1]) or next;
10748 $self->{'len'}{$perlexpr} -=
10749 length $record->[$1-1]->replace($perlexpr,$quote_arg,$self);
10751 for my $arg (@$record) {
10753 $self->{'len'}{$perlexpr} -=
10754 length $arg->replace($perlexpr,$quote_arg,$self);
10763 # Remove all arguments and zeros the length of replacement perlexpr
10767 my @popped = @{$self->{'arg_list'}};
10768 for my $perlexpr (keys %{$self->{'replacecount'}}) {
10769 $self->{'len'}{$perlexpr} = 0;
10771 $self->{'arg_list'} = [];
10772 $self->{'arg_list_flat_orig'} = [undef];
10773 $self->{'arg_list_flat'} = [];
10777 sub number_of_args($) {
10778 # The number of records
10780 # number of records
10782 # This is really the number of records
10783 return $#{$self->{'arg_list'}}+1;
10786 sub number_of_recargs($) {
10787 # The number of args in records
10789 # number of args records
10792 my $nrec = scalar @{$self->{'arg_list'}};
10794 $sum = $nrec * (scalar @{$self->{'arg_list'}[0]});
10799 sub args_as_string($) {
10801 # all unmodified arguments joined with ' ' (similar to {})
10803 return (join " ", map { $_->orig() }
10804 map { @$_ } @{$self->{'arg_list'}});
10807 sub results_out($) {
10808 sub max_file_name_length {
10809 # Figure out the max length of a subdir
10810 # TODO and the max total length
10811 # Ext4 = 255,130816
10813 # $Global::max_file_length is set
10815 # $Global::max_file_length
10816 my $testdir = shift;
10818 my $upper = 100_000_000;
10819 # Dir length of 8 chars is supported everywhere
10821 my $dir = "x"x$len;
10823 rmdir($testdir."/".$dir);
10826 } while ($len < $upper and mkdir $testdir."/".$dir);
10827 # Then search for the actual max length between $len/16 and $len
10830 while($max-$min > 5) {
10831 # If we are within 5 chars of the exact value:
10832 # it is not worth the extra time to find the exact value
10833 my $test = int(($min+$max)/2);
10835 if(mkdir $testdir."/".$dir) {
10836 rmdir($testdir."/".$dir);
10842 $Global::max_file_length = $min;
10847 my $out = $self->replace_placeholders([$opt::results],0,0);
10848 if($out eq $opt::results) {
10849 # $opt::results simple string: Append args_as_dirname
10850 my $args_as_dirname = $self->args_as_dirname();
10851 # Output in: prefix/name1/val1/name2/val2/stdout
10852 $out = $opt::results."/".$args_as_dirname;
10853 if(-d $out or eval{ File::Path::mkpath($out); }) {
10856 # mkpath failed: Argument probably too long.
10857 # Set $Global::max_file_length, which will keep the individual
10858 # dir names shorter than the max length
10859 max_file_name_length($opt::results);
10860 $args_as_dirname = $self->args_as_dirname();
10861 # prefix/name1/val1/name2/val2/
10862 $out = $opt::results."/".$args_as_dirname;
10863 File::Path::mkpath($out);
10867 if($out =~ m:/$:) {
10869 if(-d $out or eval{ File::Path::mkpath($out); }) {
10872 ::error("Cannot make dir '$out'.");
10873 ::wait_and_exit(255);
10877 File::Path::mkpath($1);
10883 sub args_as_dirname($) {
10885 # all unmodified arguments joined with '/' (similar to {})
10886 # \t \0 \\ and / are quoted as: \t \0 \\ \_
10887 # If $Global::max_file_length: Keep subdirs < $Global::max_file_length
10891 for my $rec_ref (@{$self->{'arg_list'}}) {
10892 # If headers are used, sort by them.
10893 # Otherwise keep the order from the command line.
10894 my @header_indexes_sorted = header_indexes_sorted($#$rec_ref+1);
10895 for my $n (@header_indexes_sorted) {
10897 $Global::input_source_header{$n},
10899 # \t \0 \\ and / are quoted as: \t \0 \\ \_
10904 if($Global::max_file_length) {
10905 # Keep each subdir shorter than the longest
10906 # allowed file name
10907 $s = substr($s,0,$Global::max_file_length);
10910 $rec_ref->[$n-1]->orig());
10913 return join "/", @res;
10916 sub header_indexes_sorted($) {
10917 # Sort headers first by number then by name.
10918 # E.g.: 1a 1b 11a 11b
10920 # Indexes of %Global::input_source_header sorted
10921 my $max_col = shift;
10923 no warnings 'numeric';
10924 for my $col (1 .. $max_col) {
10925 # Make sure the header is defined. If it is not: use column number
10926 if(not defined $Global::input_source_header{$col}) {
10927 $Global::input_source_header{$col} = $col;
10930 my @header_indexes_sorted = sort {
10931 # Sort headers numerically then asciibetically
10932 $Global::input_source_header{$a} <=> $Global::input_source_header{$b}
10934 $Global::input_source_header{$a} cmp $Global::input_source_header{$b}
10936 return @header_indexes_sorted;
10942 # The length of the command line with args substituted
10945 # Add length of the original command with no args
10946 # Length of command w/ all replacement args removed
10947 $len += $self->{'len'}{'noncontext'} + @{$self->{'command'}} -1;
10948 ::debug("length", "noncontext + command: $len\n");
10949 # MacOS has an overhead of 8 bytes per argument
10950 my $darwin = ($^O eq "darwin") ? 8 : 0;
10951 my $recargs = $self->number_of_recargs();
10952 if($self->{'context_replace'}) {
10953 # Context is duplicated for each arg
10954 $len += $recargs * $self->{'len'}{'context'};
10955 for my $replstring (keys %{$self->{'replacecount'}}) {
10956 # If the replacements string is more than once: mulitply its length
10957 $len += $self->{'len'}{$replstring} *
10958 $self->{'replacecount'}{$replstring};
10959 ::debug("length", $replstring, " ", $self->{'len'}{$replstring}, "*",
10960 $self->{'replacecount'}{$replstring}, "\n");
10962 # echo 11 22 33 44 55 66 77 88 99 1010
10963 # echo 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10
10965 ::debug("length", "Ctxgrp: ", $self->{'len'}{'contextgroups'},
10966 " Groups: ", $self->{'len'}{'noncontextgroups'}, "\n");
10967 # Add space between context groups
10968 $len += ($recargs-1) * ($self->{'len'}{'contextgroups'});
10970 $len += $recargs * $self->{'len'}{'contextgroups'} * $darwin;
10973 # Each replacement string may occur several times
10974 # Add the length for each time
10975 $len += 1*$self->{'len'}{'context'};
10976 ::debug("length", "context+noncontext + command: $len\n");
10977 for my $replstring (keys %{$self->{'replacecount'}}) {
10978 # (space between recargs + length of replacement)
10979 # * number this replacement is used
10980 $len += ($recargs -1 + $self->{'len'}{$replstring}) *
10981 $self->{'replacecount'}{$replstring};
10983 $len += ($recargs * $self->{'replacecount'}{$replstring}
10988 if(defined $Global::parallel_env) {
10989 # If we are using --env, add the prefix for that, too.
10990 $len += length $Global::parallel_env;
10992 if($Global::quoting) {
10993 # Pessimistic length if -q is set
10994 # Worse than worst case: ' => "'" + " => '"'
10995 # TODO can we count the number of expanding chars?
10996 # and count them in arguments, too?
10999 if(@opt::shellquote) {
11000 # Pessimistic length if --shellquote is set
11001 # Worse than worst case: ' => "'"
11002 for(@opt::shellquote) {
11007 if(@opt::sshlogin) {
11008 # Pessimistic length if remote
11009 # Worst case is BASE64 encoding 3 bytes -> 4 bytes
11010 $len = int($len*4/3);
11018 # $Global::quote_replace
11021 # $replaced = command with place holders replaced and prepended
11023 if(not defined $self->{'replaced'}) {
11024 # Don't quote arguments if the input is the full command line
11025 my $quote_arg = ($Global::quote_replace and not $Global::quoting);
11026 # or if ($opt::cat or $opt::pipe) as they use $PARALLEL_TMP
11027 $quote_arg = ($opt::cat || $opt::fifo) ? 0 : $quote_arg;
11028 $self->{'replaced'} = $self->
11029 replace_placeholders($self->{'command'},$Global::quoting,
11031 my $len = length $self->{'replaced'};
11032 if ($len != $self->len()) {
11033 ::debug("length", $len, " != ", $self->len(),
11034 " ", $self->{'replaced'}, "\n");
11036 ::debug("length", $len, " == ", $self->len(),
11037 " ", $self->{'replaced'}, "\n");
11040 return $self->{'replaced'};
11043 sub replace_placeholders($$$$) {
11044 # Replace foo{}bar with fooargbar
11046 # $targetref = command as shell words
11047 # $quote = should everything be quoted?
11048 # $quote_arg = should replaced arguments be quoted?
11050 # @Arg::arg = arguments as strings to be use in {= =}
11052 # @target with placeholders replaced
11054 my $targetref = shift;
11056 my $quote_arg = shift;
11059 # Token description:
11060 # \0spc = unquoted space
11061 # \0end = last token element
11062 # \0ign = dummy token to be ignored
11063 # \257<...\257> = replacement expression
11064 # " " = quoted space, that splits -X group
11065 # text = normal text - possibly part of -X group
11067 my @tokens = grep { length $_ > 0 } map {
11069 # \257<...\257> or space
11072 # Split each space/tab into a token
11073 split /(?=\s)|(?<=\s)/
11076 # Split \257< ... \257> into own token
11077 map { split /(?=\257<)|(?<=\257>)/ }
11078 # Insert "\0spc" between every element
11079 # This space should never be quoted
11080 map { $spacer++ ? ("\0spc",$_) : $_ }
11081 map { $_ eq "" ? "\0empty" : $_ }
11085 # @tokens is empty: Return empty array
11088 ::debug("replace", "Tokens ".join":",@tokens,"\n");
11089 # Make it possible to use $arg[2] in {= =}
11090 *Arg::arg = $self->{'arg_list_flat_orig'};
11092 # $self->{'arg_list'} = [ [Arg11, Arg12], [Arg21, Arg22], [Arg31, Arg32] ]
11093 # $self->{'arg_list_flat'} = [ Arg11, Arg12, Arg21, Arg22, Arg31, Arg32 ]
11094 if(not @{$self->{'arg_list_flat'}}) {
11095 @{$self->{'arg_list_flat'}} = Arg->new("");
11097 my $argref = $self->{'arg_list_flat'};
11098 # Number of arguments - used for positional arguments
11099 my $n = $#$argref+1;
11101 # $self is actually a CommandLine-object,
11102 # but it looks nice to be able to say {= $job->slot() =}
11104 # @replaced = tokens with \257< \257> replaced
11106 if($self->{'context_replace'}) {
11108 for my $t (@tokens,"\0end") {
11109 # \0end = last token was end of tokens.
11110 if($t eq "\t" or $t eq " " or $t eq "\0end" or $t eq "\0spc") {
11111 # Context group complete: Replace in it
11112 if(grep { /^\257</ } @ctxgroup) {
11113 # Context group contains a replacement string:
11114 # Copy once per arg
11115 my $space = "\0ign";
11116 for my $arg (@$argref) {
11117 my $normal_replace;
11119 # Put unquoted space before each context group
11121 CORE::push @replaced, $space, map {
11124 s{\257<(-?\d+)?(.*)\257>}
11127 # Positional replace
11128 # Find the relevant arg and replace it
11129 ($argref->[$1 > 0 ? $1-1 : $n+$1] ? # If defined: replace
11130 $argref->[$1 > 0 ? $1-1 : $n+$1]->
11131 replace($2,$quote_arg,$self)
11135 $normal_replace ||= 1;
11136 ($arg ? $arg->replace($2,$quote_arg,$self) : "");
11139 # Token is \257<..\257>
11141 if($Global::escape_string_present) {
11142 # Command line contains \257:
11143 # Unescape it \257\256 => \257
11144 $a =~ s/\257\256/\257/g;
11149 $normal_replace or last;
11153 # Context group has no a replacement string: Copy it once
11154 CORE::push @replaced, map {
11155 $Global::escape_string_present and s/\257\256/\257/g; $_;
11158 # New context group
11161 if($t eq "\0spc" or $t eq " ") {
11162 CORE::push @replaced,$t;
11164 CORE::push @ctxgroup,$t;
11171 # repquote = no if {} first on line, no if $quote, yes otherwise
11172 for my $t (@tokens) {
11173 if($t =~ /^\257</) {
11174 my $space = "\0ign";
11175 for my $arg (@$argref) {
11176 my $normal_replace;
11179 s{\257<(-?\d+)?(.*)\257>}
11182 # Positional replace
11183 # Find the relevant arg and replace it
11184 ($argref->[$1 > 0 ? $1-1 : $n+$1] ?
11185 # If defined: replace
11186 $argref->[$1 > 0 ? $1-1 : $n+$1]->
11187 replace($2,$quote_arg,$self)
11191 $normal_replace ||= 1;
11192 ($arg ? $arg->replace($2,$quote_arg,$self) : "");
11195 CORE::push @replaced, $space, $a;
11196 $normal_replace or last;
11201 CORE::push @replaced, map {
11202 $Global::escape_string_present and s/\257\256/\257/g; $_;
11208 ::debug("replace","Replaced: ".join":",@replaced,"\n");
11210 # Put tokens into groups that may be quoted.
11213 for (map { $_ eq "\0empty" ? "" : $_ }
11214 grep { $_ ne "\0ign" and $_ ne "\0noarg" and $_ ne "'\0noarg'" }
11215 @replaced, "\0end") {
11216 if($_ eq "\0spc" or $_ eq "\0end") {
11217 # \0spc splits quotable groups
11220 CORE::push @quoted, ::Q(join"",@quotegroup);;
11223 CORE::push @quoted, join"",@quotegroup;
11227 CORE::push @quotegroup, $_;
11230 ::debug("replace","Quoted: ".join":",@quoted,"\n");
11231 return wantarray ? @quoted : "@quoted";
11237 $self->{'skip'} = 1;
11241 package CommandLineQueue;
11245 my $commandref = shift;
11246 my $read_from = shift;
11247 my $context_replace = shift || 0;
11248 my $max_number_of_args = shift;
11249 my $transfer_files = shift;
11250 my $return_files = shift;
11253 my ($replacecount_ref, $len_ref);
11254 my @command = @$commandref;
11256 # Replace replacement strings with {= perl expr =}
11257 # '{=' 'perlexpr' '=}' => '{= perlexpr =}'
11258 @command = merge_rpl_parts(@command);
11260 # Protect matching inside {= perl expr =}
11261 # by replacing {= and =} with \257< and \257>
11262 # in options that can contain replacement strings:
11263 # @command, --transferfile, --return,
11264 # --tagstring, --workdir, --results
11265 for(@command, @$transfer_files, @$return_files,
11266 $opt::tagstring, $opt::workdir, $opt::results, $opt::retries) {
11267 # Skip if undefined
11269 # Escape \257 => \257\256
11270 $Global::escape_string_present += s/\257/\257\256/g;
11271 # Needs to match rightmost left parens (Perl defaults to leftmost)
11272 # to deal with: {={==} and {={==}=}
11273 # Replace {= -> \257< and =} -> \257>
11275 # Complex way to do:
11276 # s/{=(.*)=}/\257<$1\257>/g
11277 # which would not work
11278 s[\Q$Global::parensleft\E # Match {=
11279 # Match . unless the next string is {= or =}
11280 # needed to force matching the shortest {= =}
11281 ((?:(?! \Q$Global::parensleft\E|\Q$Global::parensright\E ).)*?)
11282 \Q$Global::parensright\E ] # Match =}
11284 for my $rpl (sort { length $b <=> length $a } keys %Global::rpl) {
11285 # Replace long --rpl's before short ones, as a short may be a
11286 # substring of a long:
11287 # --rpl '% s/a/b/' --rpl '%% s/b/a/'
11289 # Replace the shorthand string (--rpl)
11290 # with the {= perl expr =}
11292 # Avoid searching for shorthand strings inside existing {= perl expr =}
11294 # Replace $$1 in {= perl expr =} with groupings in shorthand string
11296 # --rpl '{/(\.\S+)/(\.\S+)} s/$$1/$$2/g;'
11297 # echo {/.tar/.gz} ::: UU.tar.gz
11298 my ($prefix,$grp_regexp,$postfix) =
11299 $rpl =~ /^( [^(]* ) # Prefix - e.g. {%%
11300 ( \(.*\) )? # Group capture regexp - e.g (.*)
11301 ( [^)]* )$ # Postfix - e.g }
11303 $grp_regexp ||= '';
11304 my $rplval = $Global::rpl{$rpl};
11305 while(s{( (?: ^|\257> ) (?: [^\257]*|[\257][^<>] )*? )
11306 # Don't replace after \257 unless \257>
11307 \Q$prefix\E $grp_regexp \Q$postfix\E}
11309 # The start remains the same
11310 my $unchanged = $1;
11311 # Dummy entry to start at 1.
11313 # $2 = first ()-group in $grp_regexp
11314 # Put $2 in $grp[1], Put $3 in $grp[2]
11315 # so first ()-group in $grp_regexp is $grp[1];
11316 for(my $i = 2; defined $grp[$#grp]; $i++) {
11317 push @grp, eval '$'.$i;
11320 # replace $$1 with $_pAr_gRp1, $$2 with $_pAr_gRp2
11321 # in the code to be executed
11322 $rv =~ s/\$\$ (\d+)/\$_pAr_gRp$1/gx;
11323 # prepend with $_pAr_gRp1 = perlquote($1),
11325 for(my $i = 1;defined $grp[$i]; $i++) {
11326 $set_args .= "\$_pAr_gRp$i = \"" .
11327 ::perl_quote_scalar($grp[$i]) . "\";";
11329 $unchanged . "\257<" . $set_args . $rv . "\257>"
11332 # Do the same for the positional replacement strings
11334 if($posrpl =~ s/^\{//) {
11335 # Only do this if the shorthand start with {
11337 # Don't replace after \257 unless \257>
11338 while(s{( (?: ^|\257> ) (?: [^\257]*|[\257][^<>] )*? )
11339 \{(-?\d+) \s* \Q$prefix\E $grp_regexp \Q$postfix\E}
11341 # The start remains the same
11342 my $unchanged = $1;
11344 # Dummy entry to start at 1.
11346 # $3 = first ()-group in $grp_regexp
11347 # Put $3 in $grp[1], Put $4 in $grp[2]
11348 # so first ()-group in $grp_regexp is $grp[1];
11349 for(my $i = 3; defined $grp[$#grp]; $i++) {
11350 push @grp, eval '$'.$i;
11353 # replace $$1 with $_pAr_gRp1, $$2 with $_pAr_gRp2
11354 # in the code to be executed
11355 $rv =~ s/\$\$ (\d+)/\$_pAr_gRp$1/gx;
11356 # prepend with $_pAr_gRp1 = perlquote($1),
11358 for(my $i = 1;defined $grp[$i]; $i++) {
11359 $set_args .= "\$_pAr_gRp$i = \"" .
11360 ::perl_quote_scalar($grp[$i]) . "\";";
11362 $unchanged . "\257<" . $position . $set_args . $rv . "\257>"
11369 # Add {} if no replacement strings in @command
11370 ($replacecount_ref, $len_ref, @command) =
11371 replacement_counts_and_lengths($transfer_files,$return_files,@command);
11372 if("@command" =~ /^[^ \t\n=]*\257</) {
11373 # Replacement string is (part of) the command (and not just
11374 # argument or variable definition V1={})
11375 # E.g. parallel {}, parallel my_{= s/_//=}, parallel {2}
11376 # Do no quote (Otherwise it will fail if the input contains spaces)
11377 $Global::quote_replace = 0;
11380 if($opt::sqlmaster and $Global::sql->append()) {
11381 $seq = $Global::sql->max_seq() + 1;
11385 'unget' => \@unget,
11386 'command' => \@command,
11387 'replacecount' => $replacecount_ref,
11388 'arg_queue' => RecordQueue->new($read_from,$opt::colsep),
11389 'context_replace' => $context_replace,
11391 'max_number_of_args' => $max_number_of_args,
11393 'transfer_files' => $transfer_files,
11394 'return_files' => $return_files,
11396 }, ref($class) || $class;
11399 sub merge_rpl_parts($) {
11400 # '{=' 'perlexpr' '=}' => '{= perlexpr =}'
11402 # @in = the @command as given by the user
11404 # $Global::parensleft
11405 # $Global::parensright
11407 # @command with parts merged to keep {= and =} as one
11410 my $l = quotemeta($Global::parensleft);
11411 my $r = quotemeta($Global::parensright);
11416 # Remove matching (right most) parens
11417 while(s/(.*)$l.*?$r/$1/os) {}
11419 # Missing right parens
11421 $s .= " ".shift @in;
11423 while(s/(.*)$l.*?$r/$1/os) {}
11434 sub replacement_counts_and_lengths($$@) {
11435 # Count the number of different replacement strings.
11436 # Find the lengths of context for context groups and non-context
11438 # If no {} found in @command: add it to @command
11441 # \@transfer_files = array of filenames to transfer
11442 # \@return_files = array of filenames to return
11443 # @command = command template
11445 # \%replacecount, \%len, @command
11446 my $transfer_files = shift;
11447 my $return_files = shift;
11449 my (%replacecount,%len);
11452 # Count how many times each replacement string is used
11453 my @cmd = @command;
11454 my $contextlen = 0;
11455 my $noncontextlen = 0;
11456 my $contextgroups = 0;
11458 while($c =~ s/ \257<( (?: [^\257]*|[\257][^<>] )*?)\257> /\000/xs) {
11459 # %replacecount = { "perlexpr" => number of times seen }
11460 # e.g { "s/a/b/" => 2 }
11461 $replacecount{$1}++;
11464 # Measure the length of the context around the {= perl expr =}
11465 # Use that {=...=} has been replaced with \000 above
11466 # So there is no need to deal with \257<
11467 while($c =~ s/ (\S*\000\S*) //xs) {
11469 $w =~ tr/\000//d; # Remove all \000's
11470 $contextlen += length($w);
11473 # All {= perl expr =} have been removed: The rest is non-context
11474 $noncontextlen += length $c;
11476 for(@$transfer_files, @$return_files,
11477 $opt::tagstring, $opt::workdir, $opt::results, $opt::retries) {
11478 # Options that can contain replacement strings
11481 while($t =~ s/ \257<( (?: [^\257]*|[\257][^<>] )* )\257> //xs) {
11482 # %replacecount = { "perlexpr" => number of times seen }
11483 # e.g { "$_++" => 2 }
11484 # But for tagstring we just need to mark it as seen
11485 $replacecount{$1} ||= 1;
11489 # If the command does not contain {} force it to be computed
11490 # as it is being used by --bar
11491 $replacecount{""} ||= 1;
11494 $len{'context'} = 0+$contextlen;
11495 $len{'noncontext'} = $noncontextlen;
11496 $len{'contextgroups'} = $contextgroups;
11497 $len{'noncontextgroups'} = @cmd-$contextgroups;
11498 ::debug("length", "@command Context: ", $len{'context'},
11499 " Non: ", $len{'noncontext'}, " Ctxgrp: ", $len{'contextgroups'},
11500 " NonCtxGrp: ", $len{'noncontextgroups'}, "\n");
11503 # Default command = {}
11504 @command = ("\257<\257>");
11505 } elsif(($opt::pipe or $opt::pipepart)
11506 and not $opt::fifo and not $opt::cat) {
11507 # With --pipe / --pipe-part you can have no replacement
11510 # Append {} to the command if there are no {...}'s and no {=...=}
11511 push @command, ("\257<\257>");
11515 return(\%replacecount,\%len,@command);
11520 if(@{$self->{'unget'}}) {
11521 my $cmd_line = shift @{$self->{'unget'}};
11522 return ($cmd_line);
11524 if($opt::sqlworker) {
11525 # Get the sequence number from the SQL table
11526 $self->set_seq($SQL::next_seq);
11527 # Get the command from the SQL table
11528 $self->{'command'} = $SQL::command_ref;
11530 # Recompute replace counts based on the read command
11531 ($self->{'replacecount'},
11532 $self->{'len'}, @command) =
11533 replacement_counts_and_lengths($self->{'transfer_files'},
11534 $self->{'return_files'},
11535 @$SQL::command_ref);
11536 if("@command" =~ /^[^ \t\n=]*\257</) {
11537 # Replacement string is (part of) the command (and not just
11538 # argument or variable definition V1={})
11539 # E.g. parallel {}, parallel my_{= s/_//=}, parallel {2}
11540 # Do no quote (Otherwise it will fail if the input contains spaces)
11541 $Global::quote_replace = 0;
11545 my $cmd_line = CommandLine->new($self->seq(),
11546 $self->{'command'},
11547 $self->{'arg_queue'},
11548 $self->{'context_replace'},
11549 $self->{'max_number_of_args'},
11550 $self->{'transfer_files'},
11551 $self->{'return_files'},
11552 $self->{'replacecount'},
11555 $cmd_line->populate();
11556 ::debug("run","cmd_line->number_of_args ",
11557 $cmd_line->number_of_args(), "\n");
11558 if(not $Global::no_more_input and ($opt::pipe or $opt::pipepart)) {
11559 if($cmd_line->replaced() eq "") {
11560 # Empty command - pipe requires a command
11561 ::error("--pipe/--pipepart must have a command to pipe into ".
11563 ::wait_and_exit(255);
11565 } elsif($cmd_line->number_of_args() == 0) {
11566 # We did not get more args - maybe at EOF string?
11569 $self->set_seq($self->seq()+1);
11576 unshift @{$self->{'unget'}}, @_;
11581 my $empty = (not @{$self->{'unget'}}) &&
11582 $self->{'arg_queue'}->empty();
11583 ::debug("run", "CommandLineQueue->empty $empty");
11589 return $self->{'seq'};
11594 $self->{'seq'} = shift;
11597 sub quote_args($) {
11599 # If there is not command emulate |bash
11600 return $self->{'command'};
11604 package Limits::Command;
11606 # Maximal command line length (for -m and -X)
11607 sub max_length($) {
11608 # Find the max_length of a command line and cache it
11610 # number of chars on the longest command line allowed
11611 if(not $Limits::Command::line_max_len) {
11612 # Disk cache of max command line length
11613 my $len_cache = $Global::cache_dir . "/tmp/sshlogin/" . ::hostname() .
11616 if(-e $len_cache) {
11617 open(my $fh, "<", $len_cache) || ::die_bug("Cannot read $len_cache");
11618 $cached_limit = <$fh>;
11621 if(not $cached_limit) {
11622 $cached_limit = real_max_length();
11623 # If $HOME is write protected: Do not fail
11624 my $dir = ::dirname($len_cache);
11625 -d $dir or eval { File::Path::mkpath($dir); };
11626 open(my $fh, ">", $len_cache.$$);
11627 print $fh $cached_limit;
11629 rename $len_cache.$$, $len_cache || ::die_bug("rename cache file");
11631 $Limits::Command::line_max_len = tmux_length($cached_limit);
11632 if($opt::max_chars) {
11633 if($opt::max_chars <= $cached_limit) {
11634 $Limits::Command::line_max_len = $opt::max_chars;
11636 ::warning("Value for -s option should be < $cached_limit.");
11640 return int($Limits::Command::line_max_len);
11643 sub real_max_length() {
11644 # Find the max_length of a command line
11646 # The maximal command line length with 1 byte arguments
11647 # return find_max(" x");
11648 return find_max("x");
11652 my $string = shift;
11653 # This is slow on Cygwin, so give Cygwin users a warning
11654 if($^O eq "cygwin") {
11655 ::warning("Finding the maximal command line length. This may take up to 30 seconds.")
11657 # Use an upper bound of 100 MB if the shell allows for infinite long lengths
11658 my $upper = 100_000_000;
11659 # 1000 is supported everywhere, so the search can start anywhere 1..999
11660 # 324 makes the search much faster on Cygwin, so let us use that
11663 if($len > $upper) { return $len };
11665 } while (is_acceptable_command_line_length($len,$string));
11666 # Then search for the actual max length between 0 and upper bound
11667 return binary_find_max(int($len/16),$len,$string);
11670 # Prototype forwarding
11671 sub binary_find_max($$$);
11672 sub binary_find_max($$$) {
11673 # Given a lower and upper bound find the max (length or args) of a command line
11675 # number of chars on the longest command line allowed
11676 my ($lower, $upper, $string) = (@_);
11677 if($lower == $upper or $lower == $upper-1) { return $lower; }
11678 my $middle = int (($upper-$lower)/2 + $lower);
11679 ::debug("init", "Maxlen: $lower<$middle<$upper: ");
11680 if (is_acceptable_command_line_length($middle,$string)) {
11681 return binary_find_max($middle,$upper,$string);
11683 return binary_find_max($lower,$middle,$string);
11687 sub is_acceptable_command_line_length($$) {
11688 # Test if a command line of this length can run
11689 # in the current environment
11690 # If the string is " x" it tests how many args are allowed
11692 # 0 if the command line length is too long
11695 my $string = shift;
11696 if($Global::parallel_env) {
11697 $len += length $Global::parallel_env;
11699 # Force using non-built-in command
11700 ::qqx("/bin/echo ".${string}x(($len-length "/bin/echo ")/length $string));
11701 ::debug("init", "$len=$? ");
11705 sub tmux_length($) {
11706 # If $opt::tmux set, find the limit for tmux
11707 # tmux 1.8 has a 2kB limit
11708 # tmux 1.9 has a 16kB limit
11709 # tmux 2.0 has a 16kB limit
11710 # tmux 2.1 has a 16kB limit
11711 # tmux 2.2 has a 16kB limit
11713 # $len = maximal command line length
11715 # $tmux_len = maximal length runable in tmux
11719 $ENV{'PARALLEL_TMUX'} ||= "tmux";
11720 if(not ::which($ENV{'PARALLEL_TMUX'})) {
11721 ::error($ENV{'PARALLEL_TMUX'}." not found in \$PATH.");
11722 ::wait_and_exit(255);
11725 for my $l (1, 2020, 16320, 100000, $len) {
11726 my $tmpfile = ::tmpname("tms");
11727 my $tmuxcmd = $ENV{'PARALLEL_TMUX'}.
11728 " -S $tmpfile new-session -d -n echo $l".
11729 ("x"x$l). " && echo $l; rm -f $tmpfile";
11730 push @out, ::qqx($tmuxcmd);
11733 ::debug("tmux","tmux-out ",@out);
11735 # The arguments is given 3 times on the command line
11736 # and the wrapping is around 30 chars
11737 # (29 for tmux1.9, 33 for tmux1.8)
11738 my $tmux_len = ::max(@out);
11739 $len = ::min($len,int($tmux_len/4-33));
11740 ::debug("tmux","tmux-length ",$len);
11746 package RecordQueue;
11751 my $colsep = shift;
11754 if($opt::sqlworker) {
11756 $arg_sub_queue = SQLRecordQueue->new();
11757 } elsif(defined $colsep) {
11758 # Open one file with colsep or CSV
11759 $arg_sub_queue = RecordColQueue->new($fhs);
11761 # Open one or more files if multiple -a
11762 $arg_sub_queue = MultifileQueue->new($fhs);
11765 'unget' => \@unget,
11767 'arg_sub_queue' => $arg_sub_queue,
11768 }, ref($class) || $class;
11773 # reference to array of Arg-objects
11775 if(@{$self->{'unget'}}) {
11776 $self->{'arg_number'}++;
11777 # Flush cached computed replacements in Arg-objects
11778 # To fix: parallel --bar echo {%} ::: a b c ::: d e f
11779 my $ret = shift @{$self->{'unget'}};
11781 map { $_->flush_cache() } @$ret;
11785 my $ret = $self->{'arg_sub_queue'}->get();
11787 if(grep { index($_->orig(),"\0") > 0 } @$ret) {
11788 # Allow for \0 in position 0 because GNU Parallel uses "\0noarg"
11789 # to mean no-string
11790 ::warning("A NUL character in the input was replaced with \\0.",
11791 "NUL cannot be passed through in the argument list.",
11792 "Did you mean to use the --null option?");
11793 for(grep { index($_->orig(),"\0") > 0 } @$ret) {
11794 # Replace \0 with \\0
11795 my $a = $_->orig();
11800 if(defined $Global::max_number_of_args
11801 and $Global::max_number_of_args == 0) {
11802 ::debug("run", "Read 1 but return 0 args\n");
11803 # \0noarg => nothing (not the empty string)
11804 map { $_->set_orig("\0noarg"); } @$ret;
11806 # Flush cached computed replacements in Arg-objects
11807 # To fix: parallel --bar echo {%} ::: a b c ::: d e f
11808 map { $_->flush_cache() } @$ret;
11815 ::debug("run", "RecordQueue-unget\n");
11816 $self->{'arg_number'} -= @_;
11817 unshift @{$self->{'unget'}}, @_;
11822 my $empty = (not @{$self->{'unget'}}) &&
11823 $self->{'arg_sub_queue'}->empty();
11824 ::debug("run", "RecordQueue->empty $empty");
11828 sub arg_number($) {
11830 return $self->{'arg_number'};
11834 package RecordColQueue;
11840 my $arg_sub_queue = MultifileQueue->new($fhs);
11842 'unget' => \@unget,
11843 'arg_sub_queue' => $arg_sub_queue,
11844 }, ref($class) || $class;
11849 # reference to array of Arg-objects
11851 if(@{$self->{'unget'}}) {
11852 return shift @{$self->{'unget'}};
11854 my $unget_ref = $self->{'unget'};
11855 if($self->{'arg_sub_queue'}->empty()) {
11858 my $in_record = $self->{'arg_sub_queue'}->get();
11859 if(defined $in_record) {
11860 my @out_record = ();
11861 for my $arg (@$in_record) {
11862 ::debug("run", "RecordColQueue::arg $arg\n");
11863 my $line = $arg->orig();
11864 ::debug("run", "line='$line'\n");
11869 if(not $Global::csv->parse($line)) {
11870 die "CSV has unexpected format: ^$line^";
11872 for($Global::csv->fields()) {
11873 push @out_record, Arg->new($_);
11876 for my $s (split /$opt::colsep/o, $line, -1) {
11877 push @out_record, Arg->new($s);
11881 push @out_record, Arg->new("");
11884 return \@out_record;
11892 ::debug("run", "RecordColQueue-unget '@_'\n");
11893 unshift @{$self->{'unget'}}, @_;
11898 my $empty = (not @{$self->{'unget'}}) &&
11899 $self->{'arg_sub_queue'}->empty();
11900 ::debug("run", "RecordColQueue->empty $empty");
11905 package SQLRecordQueue;
11911 'unget' => \@unget,
11912 }, ref($class) || $class;
11917 # reference to array of Arg-objects
11919 if(@{$self->{'unget'}}) {
11920 return shift @{$self->{'unget'}};
11922 return $Global::sql->get_record();
11927 ::debug("run", "SQLRecordQueue-unget '@_'\n");
11928 unshift @{$self->{'unget'}}, @_;
11933 if(@{$self->{'unget'}}) { return 0; }
11934 my $get = $self->get();
11936 $self->unget($get);
11938 my $empty = not $get;
11939 ::debug("run", "SQLRecordQueue->empty $empty");
11944 package MultifileQueue;
11946 @Global::unget_argv=();
11951 for my $fh (@$fhs) {
11952 if(-t $fh and -t ($Global::status_fd || *STDERR)) {
11953 ::warning("Input is read from the terminal. You are either an expert",
11954 "(in which case: YOU ARE AWESOME!) or maybe you forgot",
11955 "::: or :::: or -a or to pipe data into parallel. If so",
11956 "consider going through the tutorial: man parallel_tutorial",
11957 "Press CTRL-D to exit.");
11961 'unget' => \@Global::unget_argv,
11963 'arg_matrix' => undef,
11964 }, ref($class) || $class;
11970 return $self->link_get();
11972 return $self->nest_get();
11978 ::debug("run", "MultifileQueue-unget '@_'\n");
11979 unshift @{$self->{'unget'}}, @_;
11984 my $empty = (not @Global::unget_argv) &&
11985 not @{$self->{'unget'}};
11986 for my $fh (@{$self->{'fhs'}}) {
11987 $empty &&= eof($fh);
11989 ::debug("run", "MultifileQueue->empty $empty ");
11995 if(@{$self->{'unget'}}) {
11996 return shift @{$self->{'unget'}};
12001 for my $fh (@{$self->{'fhs'}}) {
12002 my $arg = read_arg_from_fh($fh);
12004 # Record $arg for recycling at end of file
12005 push @{$self->{'arg_matrix'}{$fh}}, $arg;
12006 push @record, $arg;
12009 ::debug("run", "EOA ");
12010 # End of file: Recycle arguments
12011 push @{$self->{'arg_matrix'}{$fh}}, shift @{$self->{'arg_matrix'}{$fh}};
12012 # return last @{$args->{'args'}{$fh}};
12013 push @record, @{$self->{'arg_matrix'}{$fh}}[-1];
12025 if(@{$self->{'unget'}}) {
12026 return shift @{$self->{'unget'}};
12031 my $no_of_inputsources = $#{$self->{'fhs'}} + 1;
12032 if(not $self->{'arg_matrix'}) {
12033 # Initialize @arg_matrix with one arg from each file
12034 # read one line from each file
12037 for (my $fhno = 0; $fhno < $no_of_inputsources ; $fhno++) {
12038 my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]);
12042 $self->{'arg_matrix'}[$fhno][0] = $arg || Arg->new("");
12043 push @first_arg_set, $self->{'arg_matrix'}[$fhno][0];
12046 # All filehandles were at eof or eof-string
12049 return [@first_arg_set];
12052 # Treat the case with one input source special. For multiple
12053 # input sources we need to remember all previously read values to
12054 # generate all combinations. But for one input source we can
12055 # forget the value after first use.
12056 if($no_of_inputsources == 1) {
12057 my $arg = read_arg_from_fh($self->{'fhs'}[0]);
12058 if(defined($arg)) {
12063 for (my $fhno = $no_of_inputsources - 1; $fhno >= 0; $fhno--) {
12064 if(eof($self->{'fhs'}[$fhno])) {
12068 my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]);
12069 defined($arg) || next; # If we just read an EOF string: Treat this as EOF
12070 my $len = $#{$self->{'arg_matrix'}[$fhno]} + 1;
12071 $self->{'arg_matrix'}[$fhno][$len] = $arg;
12072 # make all new combinations
12074 for (my $fhn = 0; $fhn < $no_of_inputsources; $fhn++) {
12075 push(@combarg, [0, $#{$self->{'arg_matrix'}[$fhn]}],
12076 # Is input source --link'ed to the next?
12077 $opt::linkinputsource[$fhn+1]);
12079 # Find only combinations with this new entry
12080 $combarg[2*$fhno] = [$len,$len];
12082 # [ 1, 3, 7 ], [ 2, 4, 1 ]
12084 # [ m[0][1], m[1][3], m[2][7] ], [ m[0][2], m[1][4], m[2][1] ]
12086 for my $c (expand_combinations(@combarg)) {
12088 for my $n (0 .. $no_of_inputsources - 1 ) {
12089 push @a, $self->{'arg_matrix'}[$n][$$c[$n]];
12093 # append the mapped to the ungotten arguments
12094 push @{$self->{'unget'}}, @mapped;
12097 return shift @{$self->{'unget'}};
12101 # all are eof or at EOF string; return from the unget queue
12102 return shift @{$self->{'unget'}};
12105 sub read_arg_from_fh($) {
12106 # Read one Arg from filehandle
12108 # Arg-object with one read line
12109 # undef if end of file
12113 my $half_record = 0;
12115 # This makes 10% faster
12116 if(not defined ($arg = <$fh>)) {
12117 if(defined $prepend) {
12118 return Arg->new($prepend);
12124 # We need to read a full CSV line.
12125 if(($arg =~ y/"/"/) % 2 ) {
12126 # The number of " on the line is uneven:
12127 # If we were in a half_record => we have a full record now
12128 # If we were ouside a half_record => we are in a half record now
12129 $half_record = not $half_record;
12132 # CSV half-record with quoting:
12133 # col1,"col2 2""x3"" board newline <-this one
12138 # Now we have a full CSV record
12143 if($Global::end_of_file_string and
12144 $arg eq $Global::end_of_file_string) {
12145 # Ignore the rest of input file
12147 ::debug("run", "EOF-string ($arg) met\n");
12148 if(defined $prepend) {
12149 return Arg->new($prepend);
12154 if(defined $prepend) {
12155 $arg = $prepend.$arg; # For line continuation
12158 if($Global::ignore_empty) {
12159 if($arg =~ /^\s*$/) {
12160 redo; # Try the next line
12163 if($Global::max_lines) {
12164 if($arg =~ /\s$/) {
12165 # Trailing space => continued on next line
12170 }} while (1 == 0); # Dummy loop {{}} for redo
12172 return Arg->new($arg);
12174 ::die_bug("multiread arg undefined");
12178 # Prototype forwarding
12179 sub expand_combinations(@);
12180 sub expand_combinations(@) {
12182 # ([xmin,xmax], [ymin,ymax], ...)
12183 # Returns: ([x,y,...],[x,y,...])
12184 # where xmin <= x <= xmax and ymin <= y <= ymax
12185 my $minmax_ref = shift;
12186 my $link = shift; # This is linked to the next input source
12187 my $xmin = $$minmax_ref[0];
12188 my $xmax = $$minmax_ref[1];
12191 my @rest = expand_combinations(@_);
12193 # Linked to next col with --link/:::+/::::+
12194 # TODO BUG does not wrap values if not same number of vals
12195 push(@p, map { [$$_[0], @$_] }
12196 grep { $xmin <= $$_[0] and $$_[0] <= $xmax } @rest);
12198 # If there are more columns: Compute those recursively
12199 for(my $x = $xmin; $x <= $xmax; $x++) {
12200 push @p, map { [$x, @$_] } @rest;
12204 for(my $x = $xmin; $x <= $xmax; $x++) {
12218 if($opt::hostgroups) {
12219 if($orig =~ s:@(.+)::) {
12220 # We found hostgroups on the arg
12221 @hostgroups = split(/\+/, $1);
12222 if(not grep { defined $Global::hostgroups{$_} } @hostgroups) {
12223 # This hostgroup is not defined using -S
12225 ::warning("Adding hostgroups: @hostgroups");
12227 for(grep { not defined $Global::hostgroups{$_} } @hostgroups) {
12228 my $sshlogin = SSHLogin->new($_);
12229 my $sshlogin_string = $sshlogin->string();
12230 $Global::host{$sshlogin_string} = $sshlogin;
12231 $Global::hostgroups{$sshlogin_string} = 1;
12235 # No hostgroup on the arg => any hostgroup
12236 @hostgroups = (keys %Global::hostgroups);
12241 'hostgroups' => \@hostgroups,
12242 }, ref($class) || $class;
12246 # Q alias for ::shell_quote_scalar
12247 my $ret = ::Q($_[0]);
12248 no warnings 'redefine';
12254 # pQ alias for ::perl_quote_scalar
12255 my $ret = ::pQ($_[0]);
12256 no warnings 'redefine';
12262 return $Global::JobQueue->total_jobs();
12269 # shorthand for $job->skip();
12273 # shorthand for $job->slot();
12277 # shorthand for $job->seq();
12281 # Do not quote this arg
12282 $Global::unquote_arg = 1;
12285 sub replace($$$$) {
12286 # Calculates the corresponding value for a given perl expression
12288 # The calculated string (quoted if asked for)
12290 my $perlexpr = shift; # E.g. $_=$_ or s/.gz//
12291 my $quote = shift; # should the string be quoted?
12292 # This is actually a CommandLine-object,
12293 # but it looks nice to be able to say {= $job->slot() =}
12295 $perlexpr =~ s/^(-?\d+)? *//; # Positional replace treated as normal replace
12296 if(not $Global::cache_replacement_eval
12298 not $self->{'cache'}{$perlexpr}) {
12299 # Only compute the value once
12300 # Use $_ as the variable to change
12302 if($Global::trim eq "n") {
12303 $_ = $self->{'orig'};
12306 $_ = trim_of($self->{'orig'});
12308 ::debug("replace", "eval ", $perlexpr, " ", $_, "\n");
12309 if(not $perleval{$perlexpr}) {
12310 # Make an anonymous function of the $perlexpr
12311 # And more importantly: Compile it only once
12312 if($perleval{$perlexpr} =
12313 eval('sub { no strict; no warnings; my $job = shift; '.
12317 # The eval failed. Maybe $perlexpr is invalid perl?
12318 ::error("Cannot use $perlexpr: $@");
12319 ::wait_and_exit(255);
12322 # Execute the function
12323 $perleval{$perlexpr}->($job);
12324 $self->{'cache'}{$perlexpr} = $_;
12325 if($Global::unquote_arg) {
12326 # uq() was called in perlexpr
12327 $self->{'cache'}{'unquote'}{$perlexpr} = 1;
12328 # Reset for next perlexpr
12329 $Global::unquote_arg = 0;
12332 # Return the value quoted if needed
12333 if($self->{'cache'}{'unquote'}{$perlexpr}) {
12334 return($self->{'cache'}{$perlexpr});
12336 return($quote ? Q($self->{'cache'}{$perlexpr})
12337 : $self->{'cache'}{$perlexpr});
12342 sub flush_cache($) {
12343 # Flush cache of computed values
12345 $self->{'cache'} = undef;
12350 return $self->{'orig'};
12355 $self->{'orig'} = shift;
12359 # Removes white space as specifed by --trim:
12365 # string with white space removed as needed
12366 my @strings = map { defined $_ ? $_ : "" } (@_);
12368 if($Global::trim eq "n") {
12370 } elsif($Global::trim eq "l") {
12371 for my $arg (@strings) { $arg =~ s/^\s+//; }
12372 } elsif($Global::trim eq "r") {
12373 for my $arg (@strings) { $arg =~ s/\s+$//; }
12374 } elsif($Global::trim eq "rl" or $Global::trim eq "lr") {
12375 for my $arg (@strings) { $arg =~ s/^\s+//; $arg =~ s/\s+$//; }
12377 ::error("--trim must be one of: r l rl lr.");
12378 ::wait_and_exit(255);
12380 return wantarray ? @strings : "@strings";
12384 package TimeoutQueue;
12388 my $delta_time = shift;
12390 if($delta_time =~ /(\d+(\.\d+)?)%/) {
12391 # Timeout in percent
12393 $delta_time = 1_000_000;
12395 $delta_time = ::multiply_time_units($delta_time);
12399 'delta_time' => $delta_time,
12401 'remedian_idx' => 0,
12402 'remedian_arr' => [],
12403 'remedian' => undef,
12404 }, ref($class) || $class;
12407 sub delta_time($) {
12409 return $self->{'delta_time'};
12412 sub set_delta_time($$) {
12414 $self->{'delta_time'} = shift;
12419 return $self->{'remedian'};
12422 sub set_remedian($$) {
12423 # Set median of the last 999^3 (=997002999) values using Remedian
12425 # Rousseeuw, Peter J., and Gilbert W. Bassett Jr. "The remedian: A
12426 # robust averaging method for large data sets." Journal of the
12427 # American Statistical Association 85.409 (1990): 97-104.
12430 my $i = $self->{'remedian_idx'}++;
12431 my $rref = $self->{'remedian_arr'};
12432 $rref->[0][$i%999] = $val;
12433 $rref->[1][$i/999%999] = (sort @{$rref->[0]})[$#{$rref->[0]}/2];
12434 $rref->[2][$i/999/999%999] = (sort @{$rref->[1]})[$#{$rref->[1]}/2];
12435 $self->{'remedian'} = (sort @{$rref->[2]})[$#{$rref->[2]}/2];
12438 sub update_median_runtime($) {
12439 # Update delta_time based on runtime of finished job if timeout is
12442 my $runtime = shift;
12443 if($self->{'pct'}) {
12444 $self->set_remedian($runtime);
12445 $self->{'delta_time'} = $self->{'pct'} * $self->remedian();
12446 ::debug("run", "Timeout: $self->{'delta_time'}s ");
12450 sub process_timeouts($) {
12451 # Check if there was a timeout
12453 # $self->{'queue'} is sorted by start time
12454 while (@{$self->{'queue'}}) {
12455 my $job = $self->{'queue'}[0];
12456 if($job->endtime()) {
12457 # Job already finished. No need to timeout the job
12458 # This could be because of --keep-order
12459 shift @{$self->{'queue'}};
12460 } elsif($job->is_timedout($self->{'delta_time'})) {
12461 # Need to shift off queue before kill
12462 # because kill calls usleep that calls process_timeouts
12463 shift @{$self->{'queue'}};
12464 ::warning("This job was killed because it timed out:",
12468 # Because they are sorted by start time the rest are later
12477 push @{$self->{'queue'}}, $in;
12486 $Global::use{"DBI"} ||= eval "use DBI; 1;";
12487 # +DBURL = append to this DBURL
12488 my $append = $dburl=~s/^\+//;
12489 my %options = parse_dburl(get_alias($dburl));
12490 my %driveralias = ("sqlite" => "SQLite",
12491 "sqlite3" => "SQLite",
12493 "postgres" => "Pg",
12494 "postgresql" => "Pg",
12496 "oracle" => "Oracle",
12497 "ora" => "Oracle");
12498 my $driver = $driveralias{$options{'databasedriver'}} ||
12499 $options{'databasedriver'};
12500 my $database = $options{'database'};
12501 my $host = $options{'host'} ? ";host=".$options{'host'} : "";
12502 my $port = $options{'port'} ? ";port=".$options{'port'} : "";
12503 my $dsn = "DBI:$driver:dbname=$database$host$port";
12504 my $userid = $options{'user'};
12505 my $password = $options{'password'};;
12506 if(not grep /$driver/, DBI->available_drivers) {
12507 ::error("$driver not supported. Are you missing a perl DBD::$driver module?");
12508 ::wait_and_exit(255);
12511 if($driver eq "CSV") {
12512 # CSV does not use normal dsn
12514 $dbh = DBI->connect("dbi:CSV:", "", "", { f_dir => "$database", })
12515 or die $DBI::errstr;
12517 ::error("$database is not a directory.");
12518 ::wait_and_exit(255);
12521 $dbh = DBI->connect($dsn, $userid, $password,
12522 { RaiseError => 1, AutoInactiveDestroy => 1 })
12523 or die $DBI::errstr;
12525 $dbh->{'PrintWarn'} = $Global::debug || 0;
12526 $dbh->{'PrintError'} = $Global::debug || 0;
12527 $dbh->{'RaiseError'} = 1;
12528 $dbh->{'ShowErrorStatement'} = 1;
12529 $dbh->{'HandleError'} = sub {};
12530 if(not defined $options{'table'}) {
12531 ::error("The DBURL ($dburl) must contain a table.");
12532 ::wait_and_exit(255);
12537 'driver' => $driver,
12538 'max_number_of_args' => undef,
12539 'table' => $options{'table'},
12540 'append' => $append,
12541 }, ref($class) || $class;
12544 # Prototype forwarding
12548 $alias =~ s/^(sql:)*//; # Accept aliases prepended with sql:
12549 if ($alias !~ /^:/) {
12556 ($path) = readlink($0) =~ m|^(.*)/|;
12558 ($path) = $0 =~ m|^(.*)/|;
12561 my @deprecated = ("$ENV{HOME}/.dburl.aliases",
12562 "$path/dburl.aliases", "$path/dburl.aliases.dist");
12563 for (@deprecated) {
12565 ::warning("$_ is deprecated. ".
12566 "Use .sql/aliases instead (read man sql).");
12570 check_permissions("$ENV{HOME}/.sql/aliases");
12571 check_permissions("$ENV{HOME}/.dburl.aliases");
12572 my @search = ("$ENV{HOME}/.sql/aliases",
12573 "$ENV{HOME}/.dburl.aliases", "/etc/sql/aliases",
12574 "$path/dburl.aliases", "$path/dburl.aliases.dist");
12575 for my $alias_file (@search) {
12576 # local $/ needed if -0 set
12578 if(-r $alias_file) {
12579 open(my $in, "<", $alias_file) || die;
12580 push @urlalias, <$in>;
12584 my ($alias_part,$rest) = $alias=~/(:\w*)(.*)/;
12585 # If we saw this before: we have an alias loop
12586 if(grep {$_ eq $alias_part } @Private::seen_aliases) {
12587 ::error("$alias_part is a cyclic alias.");
12590 push @Private::seen_aliases, $alias_part;
12595 /^$alias_part\s+(\S+.*)/ and do { $dburl = $1; last; }
12599 return get_alias($dburl.$rest);
12601 ::error("$alias is not defined in @search");
12606 sub check_permissions($) {
12611 my $username = (getpwuid($<))[0];
12612 ::warning("$file should be owned by $username: ".
12613 "chown $username $file");
12615 my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
12616 $atime,$mtime,$ctime,$blksize,$blocks) = stat($file);
12618 my $username = (getpwuid($<))[0];
12619 ::warning("$file should be only be readable by $username: ".
12620 "chmod 600 $file");
12625 sub parse_dburl($) {
12628 # sql:mysql://[[user][:password]@][host][:port]/[database[/table][?query]]
12630 if($url=~m!^(?:sql:)? # You can prefix with 'sql:'
12631 ((?:oracle|ora|mysql|pg|postgres|postgresql)(?:s|ssl|)|
12632 (?:sqlite|sqlite2|sqlite3|csv)):// # Databasedriver ($1)
12634 ([^:@/][^:@]*|) # Username ($2)
12636 :([^@]*) # Password ($3)
12639 ([^:/]*)? # Hostname ($4)
12642 ([^/]*)? # Port ($5)
12646 ([^/?]*)? # Database ($6)
12650 ([^?]*)? # Table ($7)
12657 $options{databasedriver} = ::undef_if_empty(lc(uri_unescape($1)));
12658 $options{user} = ::undef_if_empty(uri_unescape($2));
12659 $options{password} = ::undef_if_empty(uri_unescape($3));
12660 $options{host} = ::undef_if_empty(uri_unescape($4));
12661 $options{port} = ::undef_if_empty(uri_unescape($5));
12662 $options{database} = ::undef_if_empty(uri_unescape($6));
12663 $options{table} = ::undef_if_empty(uri_unescape($7));
12664 $options{query} = ::undef_if_empty(uri_unescape($8));
12665 ::debug("sql", "dburl $url\n");
12666 ::debug("sql", "databasedriver ", $options{databasedriver},
12667 " user ", $options{user},
12668 " password ", $options{password}, " host ", $options{host},
12669 " port ", $options{port}, " database ", $options{database},
12670 " table ", $options{table}, " query ", $options{query}, "\n");
12672 ::error("$url is not a valid DBURL");
12678 sub uri_unescape($) {
12679 # Copied from http://cpansearch.perl.org/src/GAAS/URI-1.55/URI/Escape.pm
12680 # to avoid depending on URI::Escape
12681 # This section is (C) Gisle Aas.
12682 # Note from RFC1630: "Sequences which start with a percent sign
12683 # but are not followed by two hexadecimal characters are reserved
12684 # for future extension"
12686 if (@_ && wantarray) {
12687 # not executed for the common case of a single argument
12688 my @str = ($str, @_); # need to copy
12690 s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
12694 $str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str;
12701 if($self->{'driver'} eq "CSV") {
12703 if($stmt eq "BEGIN" or
12704 $stmt eq "COMMIT") {
12709 my $dbh = $self->{'dbh'};
12710 ::debug("sql","$opt::sqlmaster$opt::sqlworker run $stmt\n");
12711 # Execute with the rest of the args - if any
12715 while($lockretry < 10) {
12716 $sth = $dbh->prepare($stmt);
12719 eval { $rv = $sth->execute(@_) }) {
12722 if($@ =~ /no such table|Table .* doesn.t exist|relation ".*" does not exist/
12724 $DBI::errstr =~ /no such table|Table .* doesn.t exist|relation ".*" does not exist/) {
12726 # It is just a worker that reported back too late -
12727 # another worker had finished the job first
12728 # and the table was then dropped
12732 if($DBI::errstr =~ /locked/) {
12733 ::debug("sql", "Lock retry: $lockretry");
12735 ::usleep(rand()*300);
12736 } elsif(not $sth) {
12740 ::error($DBI::errstr);
12741 ::wait_and_exit(255);
12745 if($lockretry >= 10) {
12746 ::die_bug("retry > 10: $DBI::errstr");
12748 if($rv < 0 and $DBI::errstr){
12749 ::error($DBI::errstr);
12750 ::wait_and_exit(255);
12757 my $sth = $self->run(@_);
12759 # If $sth = 0 it means the table was dropped by another process
12761 my @row = $sth->fetchrow_array();
12763 push @retval, \@row;
12770 return $self->{'table'};
12775 return $self->{'append'};
12781 my $table = $self->table();
12782 $self->run("UPDATE $table $stmt",@_);
12787 my $commandline = shift;
12789 $self->update("SET Stdout = ?, Stderr = ? WHERE Seq = ".
12790 $commandline->seq(),
12791 join("",@{$commandline->{'output'}{1}}),
12792 join("",@{$commandline->{'output'}{2}}));
12795 sub max_number_of_args($) {
12796 # Maximal number of args for this table
12798 if(not $self->{'max_number_of_args'}) {
12799 # Read the number of args from the SQL table
12800 my $table = $self->table();
12801 my $v = $self->get("SELECT * FROM $table LIMIT 1;");
12802 my @reserved_columns = qw(Seq Host Starttime JobRuntime Send
12803 Receive Exitval _Signal Command Stdout Stderr);
12805 ::error
("$table contains no records");
12807 # Count the number of Vx columns
12808 $self->{'max_number_of_args'} = $#{$v->[0]} - $#reserved_columns;
12810 return $self->{'max_number_of_args'};
12813 sub set_max_number_of_args
($$) {
12815 $self->{'max_number_of_args'} = shift;
12818 sub create_table
($) {
12820 if($self->append()) { return; }
12821 my $max_number_of_args = shift;
12822 $self->set_max_number_of_args($max_number_of_args);
12823 my $table = $self->table();
12824 $self->run(qq(DROP TABLE IF EXISTS
$table;));
12825 # BIGINT and TEXT are not supported in these databases or are too small
12827 "Oracle" => { "BIGINT" => "NUMBER(19,0)",
12828 "TEXT" => "CLOB", },
12829 "mysql" => { "TEXT" => "BLOB", },
12830 "CSV" => { "BIGINT" => "INT",
12831 "FLOAT" => "REAL", },
12833 my $BIGINT = $vartype{$self->{'driver'}}{"BIGINT"} || "BIGINT";
12834 my $TEXT = $vartype{$self->{'driver'}}{"TEXT"} || "TEXT";
12835 my $FLOAT = $vartype{$self->{'driver'}}{"FLOAT"} || "FLOAT(44)";
12836 my $v_def = join "", map { "V$_ $TEXT," } (1..$self->max_number_of_args());
12837 $self->run(qq{CREATE TABLE
$table
12852 sub insert_records
($) {
12855 my $command_ref = shift;
12856 my $record_ref = shift;
12857 my $table = $self->table();
12858 # For SQL encode the command with \257 space as split points
12859 my $command = join("\257 ",@
$command_ref);
12860 my @v_cols = map { ", V$_" } (1..$self->max_number_of_args());
12861 # Two extra value due to $seq, Exitval, Send
12862 my $v_vals = join ",", map { "?" } (1..$self->max_number_of_args()+4);
12863 $self->run("INSERT INTO $table (Seq,Command,Exitval,Send @v_cols) ".
12864 "VALUES ($v_vals);", $seq, $command, -1000,
12865 0, @
$record_ref[1..$#$record_ref]);
12869 sub get_record
($) {
12872 my $table = $self->table();
12873 my @v_cols = map { ", V$_" } (1..$self->max_number_of_args());
12874 my $rand = "Reserved-".$$.rand();
12879 if($self->{'driver'} eq "CSV") {
12880 # Sub SELECT is not supported in CSV
12881 # So to minimize the race condition below select a job at random
12882 my $r = $self->get("SELECT Seq, Command @v_cols FROM $table ".
12883 "WHERE Exitval = -1000 LIMIT 100;");
12884 $v = [ sort { rand() > 0.5 } @
$r ];
12886 # Avoid race condition where multiple workers get the same job
12887 # by setting Stdout to a unique string
12888 # (SELECT * FROM (...) AS dummy) is needed due to sillyness in MySQL
12889 $self->update("SET Stdout = ?,Exitval = ? ".
12891 " SELECT * FROM (".
12892 " SELECT min(Seq) FROM $table WHERE Exitval = -1000".
12894 ") AND Exitval = -1000;", $rand, -1210);
12895 # If a parallel worker overwrote the unique string this will get nothing
12896 $v = $self->get("SELECT Seq, Command @v_cols FROM $table ".
12897 "WHERE Stdout = ?;", $rand);
12900 my $val_ref = $v->[0];
12901 # Mark record as taken
12902 my $seq = shift @
$val_ref;
12903 # Save the sequence number to use when running the job
12904 $SQL::next_seq
= $seq;
12905 $self->update("SET Exitval = ? WHERE Seq = ".$seq, -1220);
12906 # Command is encoded with '\257 space' as splitting char
12907 my @command = split /\257 /, shift @
$val_ref;
12908 $SQL::command_ref
= \
@command;
12910 push @retval, Arg
->new($_);
12913 # If the record was updated by another job in parallel,
12914 # then we may not be done, so see if there are more jobs pending
12916 $self->get("SELECT Seq FROM $table WHERE Exitval = ?;", -1210);
12918 } while (not $v->[0] and $more_pending->[0]);
12927 sub total_jobs
($) {
12929 my $table = $self->table();
12930 my $v = $self->get("SELECT count(*) FROM $table;");
12932 return $v->[0]->[0];
12934 ::die_bug
("SQL::total_jobs");
12940 my $table = $self->table();
12941 my $v = $self->get("SELECT max(Seq) FROM $table;");
12943 return $v->[0]->[0];
12945 ::die_bug
("SQL::max_seq");
12950 # Check if there are any jobs left in the SQL table that do not
12951 # have a "real" exitval
12953 if($opt::wait or $Global::start_sqlworker
) {
12954 my $table = $self->table();
12955 my $rv = $self->get("select Seq,Exitval from $table ".
12956 "where Exitval <= -1000 limit 1");
12957 return not $rv->[0];
12965 # This package provides a counting semaphore
12967 # If a process dies without releasing the semaphore the next process
12968 # that needs that entry will clean up dead semaphores
12970 # The semaphores are stored in $PARALLEL_HOME/semaphores/id-<name> Each
12971 # file in $PARALLEL_HOME/semaphores/id-<name>/ is the process ID of the
12972 # process holding the entry. If the process dies, the entry can be
12973 # taken by another process.
12979 $id =~ s/([^-_a-z0-9])/unpack("H*",$1)/ige; # Convert non-word chars to hex
12980 $id = "id-".$id; # To distinguish it from a process id
12981 my $parallel_locks = $Global::cache_dir
. "/semaphores";
12982 -d
$parallel_locks or ::mkdir_or_die
($parallel_locks);
12983 my $lockdir = "$parallel_locks/$id";
12984 my $lockfile = $lockdir.".lock";
12985 if(-d
$parallel_locks and -w
$parallel_locks
12986 and -r
$parallel_locks and -x
$parallel_locks) {
12989 ::error
("Semaphoredir must be writable: '$parallel_locks'");
12990 ::wait_and_exit
(255);
12993 if($count < 1) { ::die_bug
("semaphore-count: $count"); }
12995 'lockfile' => $lockfile,
12996 'lockfh' => Symbol
::gensym
(),
12997 'lockdir' => $lockdir,
12999 'idfile' => $lockdir."/".$id,
13001 'pidfile' => $lockdir."/".$$.'@'.::hostname
(),
13002 'count' => $count + 1 # nlinks returns a link for the 'id-' as well
13003 }, ref($class) || $class;
13006 sub remove_dead_locks
($) {
13008 my $lockdir = $self->{'lockdir'};
13010 for my $d (glob "$lockdir/*") {
13011 $d =~ m
:$lockdir/([0-9]+)\@
([-\
._a
-z0
-9]+)$:o
or next;
13012 my ($pid, $host) = ($1, $2);
13013 if($host eq ::hostname
()) {
13015 ::debug
("sem", "Alive: $pid $d\n");
13017 ::debug
("sem", "Dead: $d\n");
13026 my $sleep = 1; # 1 ms
13027 my $start_time = time;
13029 # Can we get a lock?
13030 $self->atomic_link_if_count_less_than() and last;
13031 $self->remove_dead_locks();
13032 # Retry slower and slower up to 1 second
13033 $sleep = ($sleep < 1000) ?
($sleep * 1.1) : ($sleep);
13034 # Random to avoid every sleeping job waking up at the same time
13035 ::usleep
(rand()*$sleep);
13036 if($opt::semaphoretimeout
) {
13037 if($opt::semaphoretimeout
> 0
13039 time - $start_time > $opt::semaphoretimeout
) {
13040 # Timeout: Take the semaphore anyway
13041 ::warning
("Semaphore timed out. Stealing the semaphore.");
13042 if(not -e
$self->{'idfile'}) {
13043 open (my $fh, ">", $self->{'idfile'}) or
13044 ::die_bug
("timeout_write_idfile: $self->{'idfile'}");
13047 link $self->{'idfile'}, $self->{'pidfile'};
13050 if($opt::semaphoretimeout
< 0
13052 time - $start_time > -$opt::semaphoretimeout
) {
13054 ::warning
("Semaphore timed out. Exiting.");
13060 ::debug
("sem", "acquired $self->{'pid'}\n");
13065 ::rm
($self->{'pidfile'});
13066 if($self->nlinks() == 1) {
13067 # This is the last link, so atomic cleanup
13069 if($self->nlinks() == 1) {
13070 ::rm
($self->{'idfile'});
13071 rmdir $self->{'lockdir'};
13075 ::debug
("run", "released $self->{'pid'}\n");
13078 sub pid_change
($) {
13079 # This should do what release()+acquire() would do without having
13080 # to re-acquire the semaphore
13083 my $old_pidfile = $self->{'pidfile'};
13084 $self->{'pid'} = $$;
13085 $self->{'pidfile'} = $self->{'lockdir'}."/".$$.'@'.::hostname
();
13086 my $retval = link $self->{'idfile'}, $self->{'pidfile'};
13087 ::debug
("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n");
13088 ::rm
($old_pidfile);
13091 sub atomic_link_if_count_less_than
($) {
13092 # Link $file1 to $file2 if nlinks to $file1 < $count
13096 my $nlinks = $self->nlinks();
13097 ::debug
("sem","$nlinks<$self->{'count'} ");
13098 if($nlinks < $self->{'count'}) {
13099 -d
$self->{'lockdir'} or ::mkdir_or_die
($self->{'lockdir'});
13100 if(not -e
$self->{'idfile'}) {
13101 open (my $fh, ">", $self->{'idfile'}) or
13102 ::die_bug
("write_idfile: $self->{'idfile'}");
13105 $retval = link $self->{'idfile'}, $self->{'pidfile'};
13106 ::debug
("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n");
13109 ::debug
("sem", "atomic $retval");
13115 if(-e
$self->{'idfile'}) {
13116 return (stat(_
))[3];
13124 my $sleep = 100; # 100 ms
13125 my $total_sleep = 0;
13126 $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
13128 while(not $locked) {
13129 if(tell($self->{'lockfh'}) == -1) {
13131 open($self->{'lockfh'}, ">", $self->{'lockfile'})
13132 or ::debug("run
", "Cannot
open $self->{'lockfile'}");
13134 if($self->{'lockfh'}) {
13136 chmod 0666, $self->{'lockfile'}; # assuming you want it a+rw
13137 if(flock($self->{'lockfh'}, LOCK_EX()|LOCK_NB())) {
13138 # The file is locked: No need to retry
13142 if ($! =~ m/Function not implemented/) {
13143 ::warning("flock: $!",
13144 "Will
wait for a random
while.");
13145 ::usleep(rand(5000));
13146 # File cannot be locked: No need to retry
13152 # Locking failed in first round
13153 # Sleep and try again
13154 $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep);
13155 # Random to avoid every sleeping job waking up at the same time
13156 ::usleep(rand()*$sleep);
13157 $total_sleep += $sleep;
13158 if($opt::semaphoretimeout) {
13159 if($opt::semaphoretimeout > 0
13161 $total_sleep/1000 > $opt::semaphoretimeout) {
13162 # Timeout: Take the semaphore anyway
13163 ::warning("Semaphore timed out
. Taking the semaphore
.");
13167 if($opt::semaphoretimeout < 0
13169 $total_sleep/1000 > -$opt::semaphoretimeout) {
13171 ::warning("Semaphore timed out
. Exiting
.");
13176 if($total_sleep/1000 > 30) {
13177 ::warning("Semaphore stuck
for 30 seconds
. ".
13178 "Consider using
--semaphoretimeout
.");
13182 ::debug("run
", "locked
$self->{'lockfile'}");
13187 ::rm($self->{'lockfile'});
13188 close $self->{'lockfh'};
13189 ::debug("run
", "unlocked
\n");
13192 # Keep perl -w happy
13194 $opt::x = $Semaphore::timeout = $Semaphore::wait =
13195 $Job::file_descriptor_warning_printed = $Global::envdef = @Arg::arg =
13196 $Global::max_slot_number = $opt::session;
13201 save_stdin_stdout_stderr();
13202 save_original_signal_handler();
13204 ::debug("init
", "Open file descriptors
: ", join(" ",keys %Global::fd), "\n");
13205 my $number_of_args;
13206 if($Global::max_number_of_args) {
13207 $number_of_args = $Global::max_number_of_args;
13208 } elsif ($opt::X or $opt::m or $opt::xargs) {
13209 $number_of_args = undef;
13211 $number_of_args = 1;
13214 my @command = @ARGV;
13215 my @input_source_fh;
13216 if($opt::pipepart) {
13218 @input_source_fh = map { open_or_exit($_) } @opt::a;
13219 # Remove the first: It will be the file piped.
13220 shift @input_source_fh;
13221 if(not @input_source_fh and not $opt::pipe) {
13222 @input_source_fh = (*STDIN);
13225 # -a is used for data - not for command line args
13226 @input_source_fh = map { open_or_exit($_) } "/dev/null
";
13229 @input_source_fh = map { open_or_exit($_) } @opt::a;
13230 if(not @input_source_fh and not $opt::pipe) {
13231 @input_source_fh = (*STDIN);
13235 if($opt::skip_first_line) {
13236 # Skip the first line for the first file handle
13237 my $fh = $input_source_fh[0];
13241 set_input_source_header(\@command,\@input_source_fh);
13242 if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) {
13243 # Parallel check all hosts are up. Remove hosts that are down
13248 if($opt::sqlmaster and $opt::sqlworker) {
13249 # Start a real --sqlworker in the background later
13250 $Global::start_sqlworker = 1;
13251 $opt::sqlworker = undef;
13254 if($opt::nonall or $opt::onall) {
13255 onall(\@input_source_fh,@command);
13256 wait_and_exit(min(undef_as_zero($Global::exitstatus),254));
13259 $Global::JobQueue = JobQueue->new(
13260 \@command,\@input_source_fh,$Global::ContextReplace,
13261 $number_of_args,\@Global::transfer_files,\@Global::ret_files);
13263 if($opt::sqlmaster) {
13264 # Create SQL table to hold joblog + output
13265 # Figure out how many arguments are in a job
13266 # (It is affected by --colsep, -N, $number_source_fh)
13267 my $record_queue = $Global::JobQueue->{'commandlinequeue'}{'arg_queue'};
13268 my $record = $record_queue->get();
13269 my $no_of_values = $number_of_args * (1+$#{$record});
13270 $record_queue->unget($record);
13271 $Global::sql->create_table($no_of_values);
13272 if($opt::sqlworker) {
13273 # Start a real --sqlworker in the background later
13274 $Global::start_sqlworker = 1;
13275 $opt::sqlworker = undef;
13279 if($opt::pipepart) {
13281 } elsif($opt::pipe and $opt::tee) {
13283 } elsif($opt::pipe and $opt::shard or $opt::bin) {
13284 pipe_shard_setup();
13287 if(not $opt::pipepart and $opt::groupby) {
13288 group_by_stdin_filter();
13290 if($opt::eta or $opt::bar or $opt::shuf or $Global::halt_pct) {
13291 # Count the number of jobs or shuffle all jobs
13292 # before starting any.
13293 # Must be done after ungetting any --pipepart jobs.
13294 $Global::JobQueue->total_jobs();
13296 # Compute $Global::max_jobs_running
13297 # Must be done after ungetting any --pipepart jobs.
13298 max_jobs_running();
13301 if($Global::semaphore) {
13302 $sem = acquire_semaphore();
13304 $SIG{TERM} = $Global::original_sig{TERM};
13305 $SIG{HUP} = \&start_no_new_jobs;
13307 if($opt::tee or $opt::shard or $opt::bin) {
13308 # All jobs must be running in parallel for --tee/--shard/--bin
13309 while(start_more_jobs()) {}
13310 $Global::start_no_new_jobs = 1;
13311 if(not $Global::JobQueue->empty()) {
13313 ::error("--tee requires
--jobs to be higher
. Try
--jobs
0.");
13314 } elsif($opt::bin) {
13315 ::error("--bin requires
--jobs to be higher than the number of
",
13316 "arguments
. Increase
--jobs
.");
13317 } elsif($opt::shard) {
13318 ::error("--shard requires
--jobs to be higher than the number of
",
13319 "arguments
. Increase
--jobs
.");
13321 ::die_bug("--bin
/--shard/--tee should
not get here
");
13323 ::wait_and_exit(255);
13325 } elsif($opt::pipe and not $opt::pipepart) {
13326 # Fill all jobslots
13327 while(start_more_jobs()) {}
13330 # Reap one - start one
13331 while(reaper() + start_more_jobs()) {}
13333 ::debug("init
", "Start draining
\n");
13334 drain_job_queue(@command);
13335 ::debug("init
", "Done draining
\n");
13337 ::debug("init
", "Done reaping
\n");
13338 if($Global::semaphore) {
13342 ::debug("init
", "Halt
\n");