src/parallel

   1 #!/usr/bin/env perl
   2
   3 # Copyright (C) 2007-2020 Ole Tange, http://ole.tange.dk and Free
   4 # Software Foundation, Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 3 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, see <http://www.gnu.org/licenses/>
  18 # or write to the Free Software Foundation, Inc., 51 Franklin St,
  19 # Fifth Floor, Boston, MA 02110-1301 USA
  20
  21 # open3 used in Job::start
  22 use IPC::Open3;
  23 use POSIX;
  24 # gensym used in Job::start
  25 use Symbol qw(gensym);
  26 # tempfile used in Job::start
  27 use File::Temp qw(tempfile tempdir);
  28 # mkpath used in openresultsfile
  29 use File::Path;
  30 # GetOptions used in get_options_from_array
  31 use Getopt::Long;
  32 # Used to ensure code quality
  33 use strict;
  34 use File::Basename;
  35
  36 sub set_input_source_header($$) {
  37     my ($command_ref,$input_source_fh_ref) = @_;
  38     if($opt::header and not $opt::pipe) {
  39         # split with colsep or \t
  40         # $header force $colsep = \t if undef?
  41         my $delimiter = defined $opt::colsep ? $opt::colsep : "\t";
  42         # regexp for {=
  43         my $left = "\Q$Global::parensleft\E";
  44         my $l = $Global::parensleft;
  45         # regexp for =}
  46         my $right = "\Q$Global::parensright\E";
  47         my $r = $Global::parensright;
  48         my $id = 1;
  49         for my $fh (@$input_source_fh_ref) {
  50             my $line = <$fh>;
  51             chomp($line);
  52             $line =~ s/\r$//;
  53             ::debug("init", "Delimiter: '$delimiter'");
  54             for my $s (split /$delimiter/o, $line) {
  55                 ::debug("init", "Colname: '$s'");
  56                 # Replace {colname} with {2}
  57                 for(@$command_ref, @Global::ret_files,
  58                     @Global::transfer_files, $opt::tagstring,
  59                     $opt::workdir, $opt::results, $opt::retries) {
  60                     # Skip if undefined
  61                     $_ or next;
  62                   s:\{$s(|/|//|\.|/\.)\}:\{$id$1\}:g;
  63                     # {=header1 ... =}  =>  {=1 ... =}
  64                   s:$left $s (.*?) $right:$l$id$1$r:gx;
  65                 }
  66                 $Global::input_source_header{$id} = $s;
  67                 $id++;
  68             }
  69         }
  70     } else {
  71         my $id = 1;
  72         for my $fh (@$input_source_fh_ref) {
  73             $Global::input_source_header{$id} = $id;
  74             $id++;
  75         }
  76     }
  77 }
  78
  79 sub max_jobs_running() {
  80     # Compute $Global::max_jobs_running as the max number of jobs
  81     # running on each sshlogin.
  82     # Returns:
  83     #   $Global::max_jobs_running
  84     if(not $Global::max_jobs_running) {
  85         for my $sshlogin (values %Global::host) {
  86             $sshlogin->max_jobs_running();
  87         }
  88     }
  89     if(not $Global::max_jobs_running) {
  90         ::error("Cannot run any jobs.");
  91         wait_and_exit(255);
  92     }
  93     return $Global::max_jobs_running;
  94 }
  95
  96 sub halt() {
  97     # Compute exit value,
  98     # wait for children to complete
  99     # and exit
 100     if($opt::halt and $Global::halt_when ne "never") {
 101         if(not defined $Global::halt_exitstatus) {
 102             if($Global::halt_pct) {
 103                 $Global::halt_exitstatus =
 104                     ::ceil($Global::total_failed /
 105                            ($Global::total_started || 1) * 100);
 106             } elsif($Global::halt_count) {
 107                 $Global::halt_exitstatus =
 108                     ::min(undef_as_zero($Global::total_failed),101);
 109             }
 110         }
 111         wait_and_exit($Global::halt_exitstatus);
 112     } else {
 113         wait_and_exit(min(undef_as_zero($Global::exitstatus),101));
 114     }
 115 }
 116
 117
 118 sub __PIPE_MODE__() {}
 119
 120
 121 sub pipepart_setup() {
 122     # Compute the blocksize
 123     # Generate the commands to extract the blocks
 124     # Push the commands on queue
 125     # Changes:
 126     #   @Global::cat_prepends
 127     #   $Global::JobQueue
 128     if($opt::tee) {
 129         # Prepend each command with
 130         #   < file
 131         my $cat_string = "< ".Q($opt::a[0]);
 132         for(1..$Global::JobQueue->total_jobs()) {
 133             push @Global::cat_appends, $cat_string;
 134             push @Global::cat_prepends, "";
 135         }
 136     } else {
 137         if(not $opt::blocksize) {
 138             # --blocksize with 10 jobs per jobslot
 139             $opt::blocksize = -10;
 140         }
 141         if($opt::roundrobin) {
 142             # --blocksize with 1 job per jobslot
 143             $opt::blocksize = -1;
 144         }
 145         if($opt::blocksize < 0) {
 146             my $size = 0;
 147             # Compute size of -a
 148             for(@opt::a) {
 149                 if(-f $_) {
 150                     $size += -s $_;
 151                 } elsif(-b $_) {
 152                     $size += size_of_block_dev($_);
 153                 } elsif(-e $_) {
 154                     ::error("$_ is neither a file nor a block device");
 155                     wait_and_exit(255);
 156                 } else {
 157                     ::error("File not found: $_");
 158                     wait_and_exit(255);
 159                 }
 160             }
 161             # Run in total $job_slots*(- $blocksize) jobs
 162             # Set --blocksize = size / no of proc / (- $blocksize)
 163             $Global::dummy_jobs = 1;
 164             $Global::blocksize = 1 +
 165                 int($size / max_jobs_running() /
 166                     -multiply_binary_prefix($opt::blocksize));
 167         }
 168         @Global::cat_prepends = map { pipe_part_files($_) } @opt::a;
 169         # Unget the empty arg as many times as there are parts
 170         $Global::JobQueue->{'commandlinequeue'}{'arg_queue'}->unget(
 171             map { [Arg->new("\0noarg")] } @Global::cat_prepends
 172             );
 173     }
 174 }
 175
 176 sub pipe_tee_setup() {
 177     # Create temporary fifos
 178     # Run 'tee fifo1 fifo2 fifo3 ... fifoN' in the background
 179     # This will spread the input to fifos
 180     # Generate commands that reads from fifo1..N:
 181     #   cat fifo | user_command
 182     # Changes:
 183     #   @Global::cat_prepends
 184     my @fifos;
 185     for(1..$Global::JobQueue->total_jobs()) {
 186         push @fifos, tmpfifo();
 187     }
 188     # cat foo | tee fifo1 fifo2 fifo3 fifo4 fifo5 > /dev/null
 189     if(not fork()){
 190         # Test if tee supports --output-error=warn-nopipe
 191         `echo | tee --output-error=warn-nopipe /dev/null >/dev/null 2>/dev/null`;
 192         my $opt = $? ? "" : "--output-error=warn-nopipe";
 193         ::debug("init","tee $opt");
 194         # Let tee inherit our stdin
 195         # and redirect stdout to null
 196         open STDOUT, ">","/dev/null";
 197         if($opt) {
 198             exec "tee", $opt, @fifos;
 199         } else {
 200             exec "tee", @fifos;
 201         }
 202     }
 203     # For each fifo
 204     #   (rm fifo1; grep 1) < fifo1
 205     #   (rm fifo2; grep 2) < fifo2
 206     #   (rm fifo3; grep 3) < fifo3
 207     # Remove the tmpfifo as soon as it is open
 208     @Global::cat_prepends = map { "(rm $_;" } @fifos;
 209     @Global::cat_appends = map { ") < $_" } @fifos;
 210 }
 211
 212
 213 sub parcat_script() {
 214     # TODO if script fails: Use parallel -j0 --plain --lb cat ::: fifos
 215     my $script = q'{
 216         use POSIX qw(:errno_h);
 217         use IO::Select;
 218         use strict;
 219         use threads;
 220         use Thread::Queue;
 221         use Fcntl qw(:DEFAULT :flock);
 222
 223         my $opened :shared;
 224         my $q = Thread::Queue->new();
 225         my $okq = Thread::Queue->new();
 226         my @producers;
 227
 228         if(not @ARGV) {
 229             if(-t *STDIN) {
 230                 print "Usage:\n";
 231                 print "  parcat file(s)\n";
 232                 print "  cat argfile | parcat\n";
 233             } else {
 234                 # Read arguments from stdin
 235                 chomp(@ARGV = <STDIN>);
 236             }
 237         }
 238         my $files_to_open = 0;
 239         # Default: fd = stdout
 240         my $fd = 1;
 241         for (@ARGV) {
 242             # --rm = remove file when opened
 243             /^--rm$/ and do { $opt::rm = 1; next; };
 244             # -1 = output to fd 1, -2 = output to fd 2
 245             /^-(\d+)$/ and do { $fd = $1; next; };
 246             push @producers, threads->create("producer", $_, $fd);
 247             $files_to_open++;
 248         }
 249
 250         sub producer {
 251             # Open a file/fifo, set non blocking, enqueue fileno of the file handle
 252             my $file = shift;
 253             my $output_fd = shift;
 254             open(my $fh, "<", $file) || do {
 255                 print STDERR "parcat: Cannot open $file\n";
 256                 exit(1);
 257             };
 258             # Remove file when it has been opened
 259             if($opt::rm) {
 260                 unlink $file;
 261             }
 262             set_fh_non_blocking($fh);
 263             $opened++;
 264             # Pass the fileno to parent
 265             $q->enqueue(fileno($fh),$output_fd);
 266             # Get an OK that the $fh is opened and we can release the $fh
 267             while(1) {
 268                 my $ok = $okq->dequeue();
 269                 if($ok == fileno($fh)) { last; }
 270                 # Not ours - very unlikely to happen
 271                 $okq->enqueue($ok);
 272             }
 273             return;
 274         }
 275
 276         my $s = IO::Select->new();
 277         my %buffer;
 278
 279         sub add_file {
 280             my $infd = shift;
 281             my $outfd = shift;
 282             open(my $infh, "<&=", $infd) || die;
 283             open(my $outfh, ">&=", $outfd) || die;
 284             $s->add($infh);
 285             # Tell the producer now opened here and can be released
 286             $okq->enqueue($infd);
 287             # Initialize the buffer
 288             @{$buffer{$infh}{$outfd}} = ();
 289             $Global::fh{$outfd} = $outfh;
 290         }
 291
 292         sub add_files {
 293             # Non-blocking dequeue
 294             my ($infd,$outfd);
 295             do {
 296                 ($infd,$outfd) = $q->dequeue_nb(2);
 297                 if(defined($outfd)) {
 298                     add_file($infd,$outfd);
 299                 }
 300             } while(defined($outfd));
 301         }
 302
 303         sub add_files_block {
 304             # Blocking dequeue
 305             my ($infd,$outfd) = $q->dequeue(2);
 306             add_file($infd,$outfd);
 307         }
 308
 309
 310         my $fd;
 311         my (@ready,$infh,$rv,$buf);
 312         do {
 313             # Wait until at least one file is opened
 314             add_files_block();
 315             while($q->pending or keys %buffer) {
 316                 add_files();
 317                 while(keys %buffer) {
 318                     @ready = $s->can_read(0.01);
 319                     if(not @ready) {
 320                         add_files();
 321                     }
 322                     for $infh (@ready) {
 323                         # There is only one key, namely the output file descriptor
 324                         for my $outfd (keys %{$buffer{$infh}}) {
 325                             $rv = sysread($infh, $buf, 65536);
 326                             if (!$rv) {
 327                                 if($! == EAGAIN) {
 328                                     # Would block: Nothing read
 329                                     next;
 330                                 } else {
 331                                     # Nothing read, but would not block:
 332                                     # This file is done
 333                                     $s->remove($infh);
 334                                     for(@{$buffer{$infh}{$outfd}}) {
 335                                         syswrite($Global::fh{$outfd},$_);
 336                                     }
 337                                     delete $buffer{$infh};
 338                                     # Closing the $infh causes it to block
 339                                     # close $infh;
 340                                     add_files();
 341                                     next;
 342                                 }
 343                             }
 344                             # Something read.
 345                             # Find \n or \r for full line
 346                             my $i = (rindex($buf,"\n")+1);
 347                             if($i) {
 348                                 # Print full line
 349                                 for(@{$buffer{$infh}{$outfd}}, substr($buf,0,$i)) {
 350                                     syswrite($Global::fh{$outfd},$_);
 351                                 }
 352                                 # @buffer = remaining half line
 353                                 $buffer{$infh}{$outfd} = [substr($buf,$i,$rv-$i)];
 354                             } else {
 355                                 # Something read, but not a full line
 356                                 push @{$buffer{$infh}{$outfd}}, $buf;
 357                             }
 358                             redo;
 359                         }
 360                     }
 361                 }
 362             }
 363         } while($opened < $files_to_open);
 364
 365         for (@producers) {
 366             $_->join();
 367         }
 368
 369         sub set_fh_non_blocking {
 370             # Set filehandle as non-blocking
 371             # Inputs:
 372             #   $fh = filehandle to be blocking
 373             # Returns:
 374             #   N/A
 375             my $fh = shift;
 376             my $flags;
 377             fcntl($fh, &F_GETFL, $flags) || die $!; # Get the current flags on the filehandle
 378             $flags |= &O_NONBLOCK; # Add non-blocking to the flags
 379             fcntl($fh, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle
 380         }
 381     }';
 382     return ::spacefree(3, $script);
 383 }
 384
 385 sub sharder_script() {
 386     my $script = q{
 387         use B;
 388         # Column separator
 389         my $sep = shift;
 390         # Which columns to shard on (count from 1)
 391         my $col = shift;
 392         # Which columns to shard on (count from 0)
 393         my $col0 = $col - 1;
 394         # Perl expression
 395         my $perlexpr = shift;
 396         my $bins = @ARGV;
 397         # Open fifos for writing, fh{0..$bins}
 398         my $t = 0;
 399         my %fh;
 400         for(@ARGV) {
 401             open $fh{$t++}, ">", $_;
 402             # open blocks until it is opened by reader
 403             # so unlink only happens when it is ready
 404             unlink $_;
 405         }
 406         if($perlexpr) {
 407             my $subref = eval("sub { no strict; no warnings; $perlexpr }");
 408             while(<STDIN>) {
 409                 # Split into $col columns (no need to split into more)
 410                 @F = split $sep, $_, $col+1;
 411                 {
 412                     local $_ = $F[$col0];
 413                     &$subref();
 414                     $fh = $fh{ hex(B::hash($_))%$bins };
 415                 }
 416                 print $fh $_;
 417             }
 418         } else {
 419             while(<STDIN>) {
 420                 # Split into $col columns (no need to split into more)
 421                 @F = split $sep, $_, $col+1;
 422                 $fh = $fh{ hex(B::hash($F[$col0]))%$bins };
 423                 print $fh $_;
 424             }
 425         }
 426         # Close all open fifos
 427         close values %fh;
 428     };
 429     return ::spacefree(1, $script);
 430 }
 431
 432 sub binner_script() {
 433     my $script = q{
 434         use B;
 435         # Column separator
 436         my $sep = shift;
 437         # Which columns to shard on (count from 1)
 438         my $col = shift;
 439         # Which columns to shard on (count from 0)
 440         my $col0 = $col - 1;
 441         # Perl expression
 442         my $perlexpr = shift;
 443         my $bins = @ARGV;
 444         # Open fifos for writing, fh{0..$bins}
 445         my $t = 0;
 446         my %fh;
 447         # Let the last output fifo be the 0'th
 448         open $fh{$t++}, ">", pop @ARGV;
 449         for(@ARGV) {
 450             open $fh{$t++}, ">", $_;
 451             # open blocks until it is opened by reader
 452             # so unlink only happens when it is ready
 453             unlink $_;
 454         }
 455         if($perlexpr) {
 456             my $subref = eval("sub { no strict; no warnings; $perlexpr }");
 457             while(<STDIN>) {
 458                 # Split into $col columns (no need to split into more)
 459                 @F = split $sep, $_, $col+1;
 460                 {
 461                     local $_ = $F[$col0];
 462                     &$subref();
 463                     $fh = $fh{ $_%$bins };
 464                 }
 465                 print $fh $_;
 466             }
 467         } else {
 468             while(<STDIN>) {
 469                 # Split into $col columns (no need to split into more)
 470                 @F = split $sep, $_, $col+1;
 471                 $fh = $fh{ $F[$col0]%$bins };
 472                 print $fh $_;
 473             }
 474         }
 475         # Close all open fifos
 476         close values %fh;
 477     };
 478     return ::spacefree(1, $script);
 479 }
 480
 481 sub pipe_shard_setup() {
 482     # Create temporary fifos
 483     # Run 'shard.pl sep col fifo1 fifo2 fifo3 ... fifoN' in the background
 484     # This will spread the input to fifos
 485     # Generate commands that reads from fifo1..N:
 486     #   cat fifo | user_command
 487     # Changes:
 488     #   @Global::cat_prepends
 489     my @shardfifos;
 490     my @parcatfifos;
 491     # TODO $opt::jobs should be evaluated (100%)
 492     # TODO $opt::jobs should be number of total_jobs if there are argugemts
 493     my $njobs = $opt::jobs;
 494     for my $m (0..$njobs-1) {
 495         for my $n (0..$njobs-1) {
 496             # sharding to A B C D
 497             # parcatting all As together
 498             $parcatfifos[$n][$m] = $shardfifos[$m][$n] = tmpfifo();
 499         }
 500     }
 501     my $shardbin = ($opt::shard || $opt::bin);
 502     my $script;
 503     if($opt::bin) {
 504         $script = binner_script();
 505     } else {
 506         $script = sharder_script();
 507     }
 508
 509     # cat foo | sharder sep col fifo1 fifo2 fifo3 ... fifoN
 510
 511     if($shardbin =~ /^[a-z_][a-z_0-9]*(\s|$)/i) {
 512         # Group by column name
 513         # (Yes, this will also wrongly match a perlexpr like: chop)
 514         my($read,$char,@line);
 515         # A full line, but nothing more (the rest must be read by the child)
 516         # $Global::header used to prepend block to each job
 517         do {
 518             $read = sysread(STDIN,$char,1);
 519             push @line, $char;
 520         } while($read and $char ne "\n");
 521         $Global::header = join "", @line;
 522     }
 523     my ($col, $perlexpr, $subref) =
 524         column_perlexpr($shardbin, $Global::header, $opt::colsep);
 525     if(not fork()) {
 526         # Let the sharder inherit our stdin
 527         # and redirect stdout to null
 528         open STDOUT, ">","/dev/null";
 529         # The PERL_HASH_SEED must be the same for all sharders
 530         # so B::hash will return the same value for any given input
 531         $ENV{'PERL_HASH_SEED'} = $$;
 532         exec qw(parallel --block 100k -q --pipe -j), $njobs,
 533             qw(--roundrobin -u perl -e), $script, ($opt::colsep || ","),
 534             $col, $perlexpr, '{}', (map { (':::+', @{$_}) } @parcatfifos);
 535     }
 536     # For each fifo
 537     #   (rm fifo1; grep 1) < fifo1
 538     #   (rm fifo2; grep 2) < fifo2
 539     #   (rm fifo3; grep 3) < fifo3
 540     my $parcat = Q(parcat_script());
 541     if(not $parcat) {
 542         ::error("'parcat' must be in path.");
 543         ::wait_and_exit(255);
 544     }
 545     @Global::cat_prepends = map { "perl -e $parcat @$_ | " } @parcatfifos;
 546 }
 547
 548 sub pipe_part_files(@) {
 549     # Given the bigfile
 550     # find header and split positions
 551     # make commands that 'cat's the partial file
 552     # Input:
 553     #   $file = the file to read
 554     # Returns:
 555     #   @commands that will cat_partial each part
 556     my ($file) = @_;
 557     my $buf = "";
 558     if(not -f $file and not -b $file) {
 559         ::error("$file is not a seekable file.");
 560         ::wait_and_exit(255);
 561     }
 562     my $header = find_header(\$buf,open_or_exit($file));
 563     # find positions
 564     my @pos = find_split_positions($file,$Global::blocksize,$header);
 565     # Make @cat_prepends
 566     my @cat_prepends = ();
 567     for(my $i=0; $i<$#pos; $i++) {
 568         push(@cat_prepends,
 569              cat_partial($file, 0, length($header), $pos[$i], $pos[$i+1]));
 570     }
 571     return @cat_prepends;
 572 }
 573
 574 sub find_header($$) {
 575     # Compute the header based on $opt::header
 576     # Input:
 577     #   $buf_ref = reference to read-in buffer
 578     #   $fh = filehandle to read from
 579     # Uses:
 580     #   $opt::header
 581     #   $Global::blocksize
 582     #   $Global::header
 583     # Returns:
 584     #   $header string
 585     my ($buf_ref, $fh) = @_;
 586     my $header = "";
 587     # $Global::header may be set in group_by_loop()
 588     if($Global::header) { return $Global::header }
 589     if($opt::header) {
 590         if($opt::header eq ":") { $opt::header = "(.*\n)"; }
 591         # Number = number of lines
 592         $opt::header =~ s/^(\d+)$/"(.*\n)"x$1/e;
 593         while(sysread($fh,$$buf_ref,$Global::blocksize,length $$buf_ref)) {
 594             if($$buf_ref =~ s/^($opt::header)//) {
 595                 $header = $1;
 596                 last;
 597             }
 598         }
 599     }
 600     return $header;
 601 }
 602
 603 sub find_split_positions($$$) {
 604     # Find positions in bigfile where recend is followed by recstart
 605     # Input:
 606     #   $file = the file to read
 607     #   $block = (minimal) --block-size of each chunk
 608     #   $header = header to be skipped
 609     # Uses:
 610     #   $opt::recstart
 611     #   $opt::recend
 612     # Returns:
 613     #   @positions of block start/end
 614     my($file, $block, $header) = @_;
 615     my $headerlen = length $header;
 616     my $size = -s $file;
 617     if(-b $file) {
 618         # $file is a blockdevice
 619         $size = size_of_block_dev($file);
 620     }
 621     $block = int $block;
 622     if($opt::groupby) {
 623         return split_positions_for_group_by($file,$size,$block,$header);
 624     }
 625     # The optimal dd blocksize for mint, redhat, solaris, openbsd = 2^17..2^20
 626     # The optimal dd blocksize for freebsd = 2^15..2^17
 627     my $dd_block_size = 131072; # 2^17
 628     my @pos;
 629     my ($recstart,$recend) = recstartrecend();
 630     my $recendrecstart = $recend.$recstart;
 631     my $fh = ::open_or_exit($file);
 632     push(@pos,$headerlen);
 633     for(my $pos = $block+$headerlen; $pos < $size; $pos += $block) {
 634         my $buf;
 635         if($recendrecstart eq "") {
 636             # records ends anywhere
 637             push(@pos,$pos);
 638         } else {
 639             # Seek the the block start
 640             if(not sysseek($fh, $pos, 0)) {
 641                 ::error("Cannot seek to $pos in $file");
 642                 edit(255);
 643             }
 644             while(sysread($fh,$buf,$dd_block_size,length $buf)) {
 645                 if($opt::regexp) {
 646                     # If match /$recend$recstart/ => Record position
 647                     if($buf =~ m:^(.*$recend)$recstart:os) {
 648                         # Start looking for next record _after_ this match
 649                         $pos += length($1);
 650                         push(@pos,$pos);
 651                         last;
 652                     }
 653                 } else {
 654                     # If match $recend$recstart => Record position
 655                     # TODO optimize to only look at the appended
 656                     #   $dd_block_size + len $recendrecstart
 657                     # TODO increase $dd_block_size to optimize for longer records
 658                     my $i = index64(\$buf,$recendrecstart);
 659                     if($i != -1) {
 660                         # Start looking for next record _after_ this match
 661                         $pos += $i + length($recend);
 662                         push(@pos,$pos);
 663                         last;
 664                     }
 665                 }
 666             }
 667         }
 668     }
 669     if($pos[$#pos] != $size) {
 670         # Last splitpoint was not at end of the file: add $size as the last
 671         push @pos, $size;
 672     }
 673     close $fh;
 674     return @pos;
 675 }
 676
 677 sub split_positions_for_group_by($$$$) {
 678     my($fh);
 679     sub value_at($) {
 680         my $pos = shift;
 681         if($pos != 0) {
 682             seek($fh, $pos-1, 0) || die;
 683             # Read half line
 684             <$fh>;
 685         }
 686         # Read full line
 687         my $linepos = tell($fh);
 688         $_ = <$fh>;
 689         if(defined $_) {
 690             # Not end of file
 691             my @F;
 692             if(defined $group_by::col) {
 693                 $opt::colsep ||= "\t";
 694                 @F = split /$opt::colsep/, $_;
 695                 $_ = $F[$group_by::col];
 696             }
 697             eval $group_by::perlexpr;
 698         }
 699         return ($_,$linepos);
 700     }
 701
 702     sub binary_search_end($$$) {
 703         my ($s,$spos,$epos) = @_;
 704         # value_at($spos) == $s
 705         # value_at($epos) != $s
 706         my $posdif = $epos - $spos;
 707         my ($v,$vpos);
 708         while($posdif) {
 709             ($v,$vpos) = value_at($spos+$posdif);
 710             if($v eq $s) {
 711                 $spos = $vpos;
 712                 $posdif = $epos - $spos;
 713             } else {
 714                 $epos = $vpos;
 715             }
 716             $posdif = int($posdif/2);
 717         }
 718         return($v,$vpos);
 719     }
 720
 721     sub binary_search_start($$$) {
 722         my ($s,$spos,$epos) = @_;
 723         # value_at($spos) != $s
 724         # value_at($epos) == $s
 725         my $posdif = $epos - $spos;
 726         my ($v,$vpos);
 727         while($posdif) {
 728             ($v,$vpos) = value_at($spos+$posdif);
 729             if($v eq $s) {
 730                 $epos = $vpos;
 731             } else {
 732                 $spos = $vpos;
 733                 $posdif = $epos - $spos;
 734             }
 735             $posdif = int($posdif/2);
 736         }
 737         return($v,$vpos);
 738     }
 739
 740     my ($file,$size,$block,$header) = @_;
 741     my ($a,$b,$c,$apos,$bpos,$cpos);
 742     my @pos;
 743     $fh = open_or_exit($file);
 744     # Set $Global::group_by_column $Global::group_by_perlexpr
 745     group_by_loop($fh,$opt::recsep);
 746     # $xpos = linestart, $x = value at $xpos, $apos < $bpos < $cpos
 747     $apos = length $header;
 748     for(($a,$apos) = value_at($apos); $apos < $size;) {
 749         push @pos, $apos;
 750         $bpos = $apos + $block;
 751         ($b,$bpos) = value_at($bpos);
 752         if(eof($fh)) {
 753             push @pos, $size; last;
 754         }
 755         $cpos = $bpos + $block;
 756         ($c,$cpos) = value_at($cpos);
 757         if($a eq $b) {
 758             while($b eq $c) {
 759                 # Move bpos, cpos a block forward until $a == $b != $c
 760                 $bpos = $cpos;
 761                 $cpos += $block;
 762                 ($c,$cpos) = value_at($cpos);
 763                 if($cpos >= $size) {
 764                     $cpos = $size;
 765                     last;
 766                 }
 767             }
 768             # $a == $b != $c
 769             # Binary search for $b ending between ($bpos,$cpos)
 770             ($b,$bpos) = binary_search_end($b,$bpos,$cpos);
 771         } else {
 772             if($b eq $c) {
 773                 # $a != $b == $c
 774                 # Binary search for $b starting between ($apos,$bpos)
 775                 ($b,$bpos) = binary_search_start($b,$apos,$bpos);
 776             } else {
 777                 # $a != $b != $c
 778                 # Binary search for $b ending between ($bpos,$cpos)
 779                 ($b,$bpos) = binary_search_end($b,$bpos,$cpos);
 780             }
 781         }
 782         ($a,$apos) = ($b,$bpos);
 783     }
 784     if($pos[$#pos] != $size) {
 785         # Last splitpoint was not at end of the file: add it
 786         push @pos, $size;
 787     }
 788     return @pos;
 789 }
 790
 791 sub cat_partial($@) {
 792     # Efficient command to copy from byte X to byte Y
 793     # Input:
 794     #   $file = the file to read
 795     #   ($start, $end, [$start2, $end2, ...]) = start byte, end byte
 796     # Returns:
 797     #   Efficient command to copy $start..$end, $start2..$end2, ... to stdout
 798     my($file, @start_end) = @_;
 799     my($start, $i);
 800     # Convert (start,end) to (start,len)
 801     my @start_len = map {
 802         if(++$i % 2) { $start = $_; } else { $_-$start }
 803     } @start_end;
 804     # This can read 7 GB/s using a single core
 805     my $script = spacefree
 806         (0,
 807          q{
 808              while(@ARGV) {
 809                  sysseek(STDIN,shift,0) || die;
 810                  $left = shift;
 811                  while($read =
 812                        sysread(STDIN,$buf, $left > 131072 ? 131072 : $left)){
 813                      $left -= $read;
 814                      syswrite(STDOUT,$buf);
 815                  }
 816              }
 817          });
 818     return "<". Q($file) .
 819         " perl -e '$script' @start_len |";
 820 }
 821
 822 sub column_perlexpr($$$) {
 823     # Compute the column number (if any), perlexpression from combined
 824     # string (such as --shard key, --groupby key, {=n perlexpr=}
 825     # Input:
 826     #   $column_perlexpr = string with column and perl expression
 827     #   $header = header from input file (if column is column name)
 828     #   $colsep = column separator regexp
 829     # Returns:
 830     #   $col = column number
 831     #   $perlexpr = perl expression
 832     #   $subref = compiled perl expression as sub reference
 833     my ($column_perlexpr, $header, $colsep) = @_;
 834     my ($col, $perlexpr, $subref);
 835     if($column_perlexpr =~ /^[-a-z0-9_]+(\s|$)/i) {
 836         # Column name/number (possibly prefix)
 837         if($column_perlexpr =~ s/^(-?\d+)(\s|$)//) {
 838             # Column number (possibly prefix)
 839             $col = $1;
 840         } elsif($column_perlexpr =~ s/^([a-z0-9_]+)(\s+|$)//i) {
 841             # Column name (possibly prefix)
 842             my $colname = $1;
 843             # Split on --copsep pattern
 844             my @headers = split /$colsep/, $header;
 845             my %headers;
 846             @headers{@headers} = (1..($#headers+1));
 847             $col = $headers{$colname};
 848             if(not defined $col) {
 849                 ::error("Column '$colname' $colsep not found in header",keys %headers);
 850                 ::wait_and_exit(255);
 851             }
 852         }
 853     }
 854     # What is left of $column_perlexpr is $perlexpr (possibly empty)
 855     $perlexpr = $column_perlexpr;
 856     $subref = eval("sub { no strict; no warnings; $perlexpr }");
 857     return($col, $perlexpr, $subref);
 858 }
 859
 860 sub group_by_loop($$) {
 861     # Generate perl code for group-by loop
 862     # Insert a $recsep when the column value changes
 863     # The column value can be computed with $perexpr
 864     my($fh,$recsep) = @_;
 865     my $groupby = $opt::groupby;
 866     if($groupby =~ /^[a-z_][a-z_0-9]*(\s|$)/i) {
 867         # Group by column name
 868         # (Yes, this will also wrongly match a perlexpr like: chop)
 869         my($read,$char,@line);
 870         # A full line, but nothing more (the rest must be read by the child)
 871         # $Global::header used to prepend block to each job
 872         do {
 873             $read = sysread($fh,$char,1);
 874             push @line, $char;
 875         } while($read and $char ne "\n");
 876         $Global::header = join "", @line;
 877     }
 878     $opt::colsep ||= "\t";
 879     ($group_by::col, $group_by::perlexpr, $group_by::subref) =
 880         column_perlexpr($groupby, $Global::header, $opt::colsep);
 881     # Numbered 0..n-1 due to being used by $F[n]
 882     if($group_by::col) { $group_by::col--; }
 883
 884     my $loop = ::spacefree(0,q{
 885         BEGIN{ $last = "RECSEP"; }
 886         {
 887             local $_=COLVALUE;
 888             PERLEXPR;
 889             if(($last) ne $_) {
 890                 print "RECSEP";
 891                 $last = $_;
 892             }
 893         }
 894                            });
 895     if(defined $group_by::col) {
 896         $loop =~ s/COLVALUE/\$F[$group_by::col]/g;
 897     } else {
 898         $loop =~ s/COLVALUE/\$_/g;
 899     }
 900     $loop =~ s/PERLEXPR/$group_by::perlexpr/g;
 901     $loop =~ s/RECSEP/$recsep/g;
 902     return $loop;
 903 }
 904
 905 sub group_by_stdin_filter() {
 906     # Record separator with 119 bit random value
 907     $opt::recend = '';
 908     $opt::recstart =
 909         join "", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..20);
 910     $opt::remove_rec_sep = 1;
 911     my @filter;
 912     push @filter, "perl";
 913     if($opt::groupby =~ /^[a-z0-9_]+(\s|$)/i) {
 914         # This is column number/name
 915         # Use -a (auto-split)
 916         push @filter, "-a";
 917         $opt::colsep ||= "\t";
 918         my $sep = $opt::colsep;
 919         $sep =~ s/\t/\\t/g;
 920         $sep =~ s/\"/\\"/g;
 921         push @filter, "-F$sep";
 922     }
 923     push @filter, "-pe";
 924     push @filter, group_by_loop(*STDIN,$opt::recstart);
 925     ::debug("init", "@filter\n");
 926     open(STDIN, '-|', @filter) || die ("Cannot start @filter");
 927 }
 928
 929 sub spreadstdin() {
 930     # read a record
 931     # Spawn a job and print the record to it.
 932     # Uses:
 933     #   $Global::blocksize
 934     #   STDIN
 935     #   $opt::r
 936     #   $Global::max_lines
 937     #   $Global::max_number_of_args
 938     #   $opt::regexp
 939     #   $Global::start_no_new_jobs
 940     #   $opt::roundrobin
 941     #   %Global::running
 942     # Returns: N/A
 943
 944     my $buf = "";
 945     my ($recstart,$recend) = recstartrecend();
 946     my $recendrecstart = $recend.$recstart;
 947     my $chunk_number = 1;
 948     my $one_time_through;
 949     my $two_gb = 2**31-1;
 950     my $blocksize = $Global::blocksize;
 951     my $in = *STDIN;
 952     my $timeout = $Global::blocktimeout;
 953
 954     my $header = find_header(\$buf,$in);
 955     my $anything_written;
 956     my $eof;
 957
 958     sub read_block() {
 959         # Read a --blocksize from STDIN
 960         # possibly interrupted by --blocktimeout
 961         # Add up to the next full block
 962         my $readsize = $blocksize - (length $buf) % $blocksize;
 963         my ($nread,$alarm);
 964         eval {
 965             local $SIG{ALRM} = sub { die "alarm\n" }; # NB: \n required
 966             # --blocktimeout (or 0 if not set)
 967             alarm $timeout;
 968             if($] >= 5.026) {
 969                 do {
 970                     $nread = sysread $in, $buf, $readsize, length $buf;
 971                     $readsize -= $nread;
 972                 } while($readsize and $nread);
 973             } else {
 974                 # Less efficient reading, but 32-bit sysread compatible
 975                 do {
 976                     $nread = sysread($in,substr($buf,length $buf,0),$readsize,0);
 977                     $readsize -= $nread;
 978                 } while($readsize and $nread);
 979             }
 980             alarm 0;
 981         };
 982         if ($@) {
 983             die unless $@ eq "alarm\n";   # propagate unexpected errors
 984             $alarm = 1;
 985         } else {
 986             $alarm = 0;
 987         }
 988         $eof = not ($nread or $alarm);
 989     }
 990
 991     sub pass_n_line_records() {
 992         # Pass records of N lines
 993         my $n_lines = $buf =~ tr/\n/\n/;
 994         my $last_newline_pos = rindex64(\$buf,"\n");
 995         # Go backwards until there are full n-line records
 996         while($n_lines % $Global::max_lines) {
 997             $n_lines--;
 998             $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1);
 999         }
1000         # Chop at $last_newline_pos as that is where n-line record ends
1001         $anything_written +=
1002             write_record_to_pipe($chunk_number++,\$header,\$buf,
1003                                  $recstart,$recend,$last_newline_pos+1);
1004         shorten(\$buf,$last_newline_pos+1);
1005     }
1006
1007     sub pass_n_regexps() {
1008         # Pass records of N regexps
1009         # -N => (start..*?end){n}
1010         # -L -N => (start..*?end){n*l}
1011         my $read_n_lines = -1+
1012             $Global::max_number_of_args * ($Global::max_lines || 1);
1013         # (?!negative lookahead) is needed to avoid backtracking
1014         # See: https://unix.stackexchange.com/questions/439356/
1015         while($buf =~
1016               /(
1017               # Either recstart or at least one char from start
1018               ^(?: $recstart | .)
1019               # followed something
1020               (?:(?!$recend$recstart).)*?
1021               # and then recend
1022               $recend
1023               # Then n-1 times recstart.*recend
1024               (?:$recstart(?:(?!$recend$recstart).)*?$recend){$read_n_lines}
1025               )
1026               # Followed by recstart
1027               (?=$recstart)/osx) {
1028             $anything_written +=
1029                 write_record_to_pipe($chunk_number++,\$header,\$buf,
1030                                      $recstart,$recend,length $1);
1031             shorten(\$buf,length $1);
1032         }
1033     }
1034
1035     sub pass_regexp() {
1036         # Find the last recend-recstart in $buf
1037         $eof and return;
1038         if($buf =~ /^(.*$recend)$recstart.*?$/os) {
1039             $anything_written +=
1040                 write_record_to_pipe($chunk_number++,\$header,\$buf,
1041                                      $recstart,$recend,length $1);
1042             shorten(\$buf,length $1);
1043         }
1044     }
1045
1046     sub pass_csv_record() {
1047         # Pass CVS record
1048         # We define a CSV record as an even number of " + end of line
1049         # This works if you use " as quoting character
1050         my $last_newline_pos = length $buf;
1051         # Go backwards from the last \n and search for a position
1052         # where there is an even number of "
1053         do {
1054             # find last EOL
1055             $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1);
1056             # While uneven "
1057         } while((substr($buf,0,$last_newline_pos) =~ y/"/"/)%2
1058                 and $last_newline_pos >= 0);
1059         # Chop at $last_newline_pos as that is where CSV record ends
1060         $anything_written +=
1061             write_record_to_pipe($chunk_number++,\$header,\$buf,
1062                                  $recstart,$recend,$last_newline_pos+1);
1063         shorten(\$buf,$last_newline_pos+1);
1064     }
1065
1066     sub pass_n() {
1067         # Pass n records of --recend/--recstart
1068         # -N => (start..*?end){n}
1069         my $i = 0;
1070         my $read_n_lines =
1071             $Global::max_number_of_args * ($Global::max_lines || 1);
1072             while(($i = nindex(\$buf,$recendrecstart,$read_n_lines)) != -1
1073                   and
1074                   length $buf) {
1075                 $i += length $recend; # find the actual splitting location
1076                 $anything_written +=
1077                     write_record_to_pipe($chunk_number++,\$header,\$buf,
1078                                          $recstart,$recend,$i);
1079                 shorten(\$buf,$i);
1080             }
1081     }
1082
1083     sub pass() {
1084         # Pass records of --recend/--recstart
1085         # Split record at fixed string
1086         # Find the last recend+recstart in $buf
1087         $eof and return;
1088         my $i = rindex64(\$buf,$recendrecstart);
1089         if($i != -1) {
1090             $i += length $recend; # find the actual splitting location
1091             $anything_written +=
1092                 write_record_to_pipe($chunk_number++,\$header,\$buf,
1093                                      $recstart,$recend,$i);
1094             shorten(\$buf,$i);
1095         }
1096     }
1097
1098     sub increase_blocksize_maybe() {
1099         if(not $anything_written
1100            and not $opt::blocktimeout
1101            and not $Global::no_autoexpand_block) {
1102             # Nothing was written - maybe the block size < record size?
1103             # Increase blocksize exponentially up to 2GB-1 (2GB causes problems)
1104             if($blocksize < $two_gb) {
1105                 my $old_blocksize = $blocksize;
1106                 $blocksize = ::min(ceil($blocksize * 1.3 + 1), $two_gb);
1107                 ::warning("A record was longer than $old_blocksize. " .
1108                           "Increasing to --blocksize $blocksize.");
1109             }
1110         }
1111     }
1112
1113     while(1) {
1114         $anything_written = 0;
1115         read_block();
1116         if($opt::r) {
1117             # Remove empty lines
1118             $buf =~ s/^\s*\n//gm;
1119             if(length $buf == 0) {
1120                 if($eof) {
1121                     last;
1122                 } else {
1123                     next;
1124                 }
1125             }
1126         }
1127         if($Global::max_lines and not $Global::max_number_of_args) {
1128             # Pass n-line records
1129             pass_n_line_records();
1130         } elsif($opt::csv) {
1131             # Pass a full CSV record
1132             pass_csv_record();
1133         } elsif($opt::regexp) {
1134             # Split record at regexp
1135             if($Global::max_number_of_args) {
1136                 pass_n_regexps();
1137             } else {
1138                 pass_regexp();
1139             }
1140         } else {
1141             # Pass normal --recend/--recstart record
1142             if($Global::max_number_of_args) {
1143                 pass_n();
1144             } else {
1145                 pass();
1146             }
1147         }
1148         $eof and last;
1149         increase_blocksize_maybe();
1150         ::debug("init", "Round\n");
1151     }
1152     ::debug("init", "Done reading input\n");
1153
1154     # If there is anything left in the buffer write it
1155     write_record_to_pipe($chunk_number++, \$header, \$buf, $recstart,
1156                          $recend, length $buf);
1157
1158     if($opt::retries) {
1159         $Global::no_more_input = 1;
1160         # We need to start no more jobs: At most we need to retry some
1161         # of the already running.
1162         my @running = values %Global::running;
1163         # Stop any virgins.
1164         for my $job (@running) {
1165             if(defined $job and $job->virgin()) {
1166                 close $job->fh(0,"w");
1167             }
1168         }
1169         # Wait for running jobs to be done
1170         my $sleep = 1;
1171         while($Global::total_running > 0) {
1172             $sleep = ::reap_usleep($sleep);
1173             start_more_jobs();
1174         }
1175     }
1176     $Global::start_no_new_jobs ||= 1;
1177     if($opt::roundrobin) {
1178         # Flush blocks to roundrobin procs
1179         my $sleep = 1;
1180         while(%Global::running) {
1181             my $something_written = 0;
1182             for my $job (values %Global::running) {
1183                 if($job->block_length()) {
1184                     $something_written += $job->non_blocking_write();
1185                 } else {
1186                     close $job->fh(0,"w");
1187                 }
1188             }
1189             if($something_written) {
1190                 $sleep = $sleep/2+0.001;
1191             }
1192             $sleep = ::reap_usleep($sleep);
1193         }
1194     }
1195 }
1196
1197 sub recstartrecend() {
1198     # Uses:
1199     #   $opt::recstart
1200     #   $opt::recend
1201     # Returns:
1202     #   $recstart,$recend with default values and regexp conversion
1203     my($recstart,$recend);
1204     if(defined($opt::recstart) and defined($opt::recend)) {
1205         # If both --recstart and --recend is given then both must match
1206         $recstart = $opt::recstart;
1207         $recend = $opt::recend;
1208     } elsif(defined($opt::recstart)) {
1209         # If --recstart is given it must match start of record
1210         $recstart = $opt::recstart;
1211         $recend = "";
1212     } elsif(defined($opt::recend)) {
1213         # If --recend is given then it must match end of record
1214         $recstart = "";
1215         $recend = $opt::recend;
1216         if($opt::regexp and $recend eq '') {
1217             # --regexp --recend ''
1218             $recend = '.';
1219         }
1220     }
1221
1222     if($opt::regexp) {
1223         # If $recstart/$recend contains '|'
1224         # this should only apply to the regexp
1225         $recstart = "(?:".$recstart.")";
1226         $recend = "(?:".$recend.")";
1227     } else {
1228         # $recstart/$recend = printf strings (\n)
1229         $recstart =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee;
1230         $recend =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee;
1231     }
1232     return ($recstart,$recend);
1233 }
1234
1235 sub nindex($$) {
1236     # See if string is in buffer N times
1237     # Returns:
1238     #   the position where the Nth copy is found
1239     my ($buf_ref, $str, $n) = @_;
1240     my $i = 0;
1241     for(1..$n) {
1242         $i = index64($buf_ref,$str,$i+1);
1243         if($i == -1) { last }
1244     }
1245     return $i;
1246 }
1247
1248 {
1249     my @robin_queue;
1250     my $sleep = 1;
1251
1252     sub round_robin_write($$$$$) {
1253         # Input:
1254         #   $header_ref = ref to $header string
1255         #   $block_ref = ref to $block to be written
1256         #   $recstart = record start string
1257         #   $recend = record end string
1258         #   $endpos = end position of $block
1259         # Uses:
1260         #   %Global::running
1261         # Returns:
1262         #   $something_written = amount of bytes written
1263         my ($header_ref,$buffer_ref,$recstart,$recend,$endpos) = @_;
1264         my $written = 0;
1265         my $block_passed = 0;
1266         while(not $block_passed) {
1267             # Continue flushing existing buffers
1268             # until one is empty and a new block is passed
1269             if(@robin_queue) {
1270                 # Rotate queue once so new blocks get a fair chance
1271                 # to be given to another slot
1272                 push @robin_queue, shift @robin_queue;
1273             } else {
1274                 # Make a queue to spread the blocks evenly
1275                 push @robin_queue, (sort { $a->seq() <=> $b->seq() }
1276                                     values %Global::running);
1277             }
1278             do {
1279                 $written = 0;
1280                 for my $job (@robin_queue) {
1281                     if($job->block_length() > 0) {
1282                         $written += $job->non_blocking_write();
1283                     } else {
1284                         $job->set_block($header_ref, $buffer_ref,
1285                                         $endpos, $recstart, $recend);
1286                         $block_passed = 1;
1287                         $job->set_virgin(0);
1288                         $written += $job->non_blocking_write();
1289                         last;
1290                     }
1291                 }
1292                 if($written) {
1293                     $sleep = $sleep/1.5+0.001;
1294                 }
1295                 # Don't sleep if something is written
1296             } while($written and not $block_passed);
1297             $sleep = ::reap_usleep($sleep);
1298         }
1299         return $written;
1300     }
1301 }
1302
1303 sub index64($$$) {
1304     # Do index on strings > 2GB.
1305     # index in Perl < v5.22 does not work for > 2GB
1306     # Input:
1307     #   as index except STR which must be passed as a reference
1308     # Output:
1309     #   as index
1310     my $ref = shift;
1311     my $match = shift;
1312     my $pos = shift || 0;
1313     my $block_size = 2**31-1;
1314     my $strlen   = length($$ref);
1315     # No point in doing extra work if we don't need to.
1316     if($strlen < $block_size or $] > 5.022) {
1317         return index($$ref, $match, $pos);
1318     }
1319
1320     my $matchlen = length($match);
1321     my $ret;
1322     my $offset = $pos;
1323     while($offset < $strlen) {
1324         $ret = index(
1325             substr($$ref, $offset, $block_size),
1326             $match, $pos-$offset);
1327         if($ret != -1) {
1328             return $ret + $offset;
1329         }
1330         $offset += ($block_size - $matchlen - 1);
1331     }
1332     return -1;
1333 }
1334
1335 sub rindex64($@) {
1336     # Do rindex on strings > 2GB.
1337     # rindex in Perl < v5.22 does not work for > 2GB
1338     # Input:
1339     #   as rindex except STR which must be passed as a reference
1340     # Output:
1341     #   as rindex
1342     my $ref = shift;
1343     my $match = shift;
1344     my $pos = shift;
1345     my $block_size = 2**31-1;
1346     my $strlen = length($$ref);
1347     # Default: search from end
1348     $pos = defined $pos ? $pos : $strlen;
1349     # No point in doing extra work if we don't need to.
1350     if($strlen < $block_size) {
1351         return rindex($$ref, $match, $pos);
1352     }
1353
1354     my $matchlen = length($match);
1355     my $ret;
1356     my $offset = $pos - $block_size + $matchlen;
1357     if($offset < 0) {
1358         # The offset is less than a $block_size
1359         # Set the $offset to 0 and
1360         # Adjust block_size accordingly
1361         $block_size = $block_size + $offset;
1362         $offset = 0;
1363     }
1364     while($offset >= 0) {
1365         $ret = rindex(
1366             substr($$ref, $offset, $block_size),
1367             $match);
1368         if($ret != -1) {
1369             return $ret + $offset;
1370         }
1371         $offset -= ($block_size - $matchlen - 1);
1372     }
1373     return -1;
1374 }
1375
1376 sub shorten($$) {
1377     # Do: substr($buf,0,$i) = "";
1378     # Some Perl versions do not support $i > 2GB, so do this in 2GB chunks
1379     # Input:
1380     #   $buf_ref = \$buf
1381     #   $i = position to shorten to
1382     # Returns: N/A
1383     my ($buf_ref, $i) = @_;
1384     my $two_gb = 2**31-1;
1385     while($i > $two_gb) {
1386         substr($$buf_ref,0,$two_gb) = "";
1387         $i -= $two_gb;
1388     }
1389     substr($$buf_ref,0,$i) = "";
1390 }
1391
1392 sub write_record_to_pipe($$$$$$) {
1393     # Fork then
1394     # Write record from pos 0 .. $endpos to pipe
1395     # Input:
1396     #   $chunk_number = sequence number - to see if already run
1397     #   $header_ref = reference to header string to prepend
1398     #   $buffer_ref = reference to record to write
1399     #   $recstart = start string of record
1400     #   $recend = end string of record
1401     #   $endpos = position in $buffer_ref where record ends
1402     # Uses:
1403     #   $Global::job_already_run
1404     #   $opt::roundrobin
1405     #   @Global::virgin_jobs
1406     # Returns:
1407     #   Number of chunks written (0 or 1)
1408     my ($chunk_number, $header_ref, $buffer_ref,
1409         $recstart, $recend, $endpos) = @_;
1410     if($endpos == 0) { return 0; }
1411     if(vec($Global::job_already_run,$chunk_number,1)) { return 1; }
1412     if($opt::roundrobin) {
1413         # Write the block to one of the already running jobs
1414         return round_robin_write($header_ref, $buffer_ref,
1415                                  $recstart, $recend, $endpos);
1416     }
1417     # If no virgin found, backoff
1418     my $sleep = 0.0001; # 0.01 ms - better performance on highend
1419     while(not @Global::virgin_jobs) {
1420         ::debug("pipe", "No virgin jobs");
1421         $sleep = ::reap_usleep($sleep);
1422         # Jobs may not be started because of loadavg
1423         # or too little time between each ssh login
1424         # or retrying failed jobs.
1425         start_more_jobs();
1426     }
1427     my $job = shift @Global::virgin_jobs;
1428     # Job is no longer virgin
1429     $job->set_virgin(0);
1430
1431     if($opt::retries) {
1432         # Copy $buffer[0..$endpos] to $job->{'block'}
1433         # Remove rec_sep
1434         # Run $job->add_transfersize
1435         $job->set_block($header_ref, $buffer_ref, $endpos,
1436                         $recstart, $recend);
1437         if(fork()) {
1438             # Skip
1439         } else {
1440             $job->write($job->block_ref());
1441             close $job->fh(0,"w");
1442             exit(0);
1443         }
1444     } else {
1445         # We ignore the removed rec_sep which is technically wrong.
1446         $job->add_transfersize($endpos + length $$header_ref);
1447         if(fork()) {
1448             # Skip
1449         } else {
1450             # Chop of at $endpos as we do not know how many rec_sep will
1451             # be removed.
1452             substr($$buffer_ref,$endpos,length $$buffer_ref) = "";
1453             # Remove rec_sep
1454             if($opt::remove_rec_sep) {
1455                 Job::remove_rec_sep($buffer_ref, $recstart, $recend);
1456             }
1457             $job->write($header_ref);
1458             $job->write($buffer_ref);
1459             close $job->fh(0,"w");
1460             exit(0);
1461         }
1462     }
1463     close $job->fh(0,"w");
1464     return 1;
1465 }
1466
1467
1468 sub __SEM_MODE__() {}
1469
1470
1471 sub acquire_semaphore() {
1472     # Acquires semaphore. If needed: spawns to the background
1473     # Uses:
1474     #   @Global::host
1475     # Returns:
1476     #   The semaphore to be released when jobs is complete
1477     $Global::host{':'} = SSHLogin->new(":");
1478     my $sem = Semaphore->new($Semaphore::name,
1479                              $Global::host{':'}->max_jobs_running());
1480     $sem->acquire();
1481     if($Semaphore::fg) {
1482         # skip
1483     } else {
1484         if(fork()) {
1485             exit(0);
1486         } else {
1487             # If run in the background, the PID will change
1488             $sem->pid_change();
1489         }
1490     }
1491     return $sem;
1492 }
1493
1494
1495 sub __PARSE_OPTIONS__() {}
1496
1497
1498 sub options_hash() {
1499     # Returns:
1500     #   %hash = the GetOptions config
1501     return
1502         ("debug|D=s" => \$opt::D,
1503          "xargs" => \$opt::xargs,
1504          "m" => \$opt::m,
1505          "X" => \$opt::X,
1506          "v" => \@opt::v,
1507          "sql=s" => \$opt::retired,
1508          "sqlmaster=s" => \$opt::sqlmaster,
1509          "sqlworker=s" => \$opt::sqlworker,
1510          "sqlandworker=s" => \$opt::sqlandworker,
1511          "joblog|jl=s" => \$opt::joblog,
1512          "results|result|res=s" => \$opt::results,
1513          "resume" => \$opt::resume,
1514          "resume-failed|resumefailed" => \$opt::resume_failed,
1515          "retry-failed|retryfailed" => \$opt::retry_failed,
1516          "silent" => \$opt::silent,
1517          "keep-order|keeporder|k" => \$opt::keeporder,
1518          "no-keep-order|nokeeporder|nok|no-k" => \$opt::nokeeporder,
1519          "group" => \$opt::group,
1520          "g" => \$opt::retired,
1521          "ungroup|u" => \$opt::ungroup,
1522          "linebuffer|linebuffered|line-buffer|line-buffered|lb"
1523          => \$opt::linebuffer,
1524          "tmux" => \$opt::tmux,
1525          "tmuxpane" => \$opt::tmuxpane,
1526          "null|0" => \$opt::null,
1527          "quote|q" => \$opt::quote,
1528          # Replacement strings
1529          "parens=s" => \$opt::parens,
1530          "rpl=s" => \@opt::rpl,
1531          "plus" => \$opt::plus,
1532          "I=s" => \$opt::I,
1533          "extensionreplace|er=s" => \$opt::U,
1534          "U=s" => \$opt::retired,
1535          "basenamereplace|bnr=s" => \$opt::basenamereplace,
1536          "dirnamereplace|dnr=s" => \$opt::dirnamereplace,
1537          "basenameextensionreplace|bner=s" => \$opt::basenameextensionreplace,
1538          "seqreplace=s" => \$opt::seqreplace,
1539          "slotreplace=s" => \$opt::slotreplace,
1540          "jobs|j=s" => \$opt::jobs,
1541          "delay=s" => \$opt::delay,
1542          "sshdelay=f" => \$opt::sshdelay,
1543          "load=s" => \$opt::load,
1544          "noswap" => \$opt::noswap,
1545          "max-line-length-allowed" => \$opt::max_line_length_allowed,
1546          "number-of-cpus" => \$opt::number_of_cpus,
1547          "number-of-sockets" => \$opt::number_of_sockets,
1548          "number-of-cores" => \$opt::number_of_cores,
1549          "number-of-threads" => \$opt::number_of_threads,
1550          "use-sockets-instead-of-threads"
1551          => \$opt::use_sockets_instead_of_threads,
1552          "use-cores-instead-of-threads"
1553          => \$opt::use_cores_instead_of_threads,
1554          "use-cpus-instead-of-cores" => \$opt::use_cpus_instead_of_cores,
1555          "shellquote|shell_quote|shell-quote" => \@opt::shellquote,
1556          "nice=i" => \$opt::nice,
1557          "tag" => \$opt::tag,
1558          "tagstring|tag-string=s" => \$opt::tagstring,
1559          "onall" => \$opt::onall,
1560          "nonall" => \$opt::nonall,
1561          "filter-hosts|filterhosts|filter-host" => \$opt::filter_hosts,
1562          "sshlogin|S=s" => \@opt::sshlogin,
1563          "sshloginfile|slf=s" => \@opt::sshloginfile,
1564          "controlmaster|M" => \$opt::controlmaster,
1565          "ssh=s" => \$opt::ssh,
1566          "transfer-file|transferfile|transfer-files|transferfiles|tf=s"
1567          => \@opt::transfer_files,
1568          "return=s" => \@opt::return,
1569          "trc=s" => \@opt::trc,
1570          "transfer" => \$opt::transfer,
1571          "cleanup" => \$opt::cleanup,
1572          "basefile|bf=s" => \@opt::basefile,
1573          "B=s" => \$opt::retired,
1574          "ctrlc|ctrl-c" => \$opt::retired,
1575          "noctrlc|no-ctrlc|no-ctrl-c" => \$opt::retired,
1576          "workdir|work-dir|wd=s" => \$opt::workdir,
1577          "W=s" => \$opt::retired,
1578          "rsync-opts|rsyncopts=s" => \$opt::rsync_opts,
1579          "tmpdir|tempdir=s" => \$opt::tmpdir,
1580          "use-compress-program|compress-program=s" => \$opt::compress_program,
1581          "use-decompress-program|decompress-program=s"
1582          => \$opt::decompress_program,
1583          "compress" => \$opt::compress,
1584          "tty" => \$opt::tty,
1585          "T" => \$opt::retired,
1586          "H=i" => \$opt::retired,
1587          "dry-run|dryrun|dr" => \$opt::dryrun,
1588          "progress" => \$opt::progress,
1589          "eta" => \$opt::eta,
1590          "bar" => \$opt::bar,
1591          "shuf" => \$opt::shuf,
1592          "arg-sep|argsep=s" => \$opt::arg_sep,
1593          "arg-file-sep|argfilesep=s" => \$opt::arg_file_sep,
1594          "trim=s" => \$opt::trim,
1595          "env=s" => \@opt::env,
1596          "recordenv|record-env" => \$opt::record_env,
1597          "session" => \$opt::session,
1598          "plain" => \$opt::plain,
1599          "profile|J=s" => \@opt::profile,
1600          "pipe|spreadstdin" => \$opt::pipe,
1601          "robin|round-robin|roundrobin" => \$opt::roundrobin,
1602          "recstart=s" => \$opt::recstart,
1603          "recend=s" => \$opt::recend,
1604          "regexp|regex" => \$opt::regexp,
1605          "remove-rec-sep|removerecsep|rrs" => \$opt::remove_rec_sep,
1606          "files|output-as-files|outputasfiles" => \$opt::files,
1607          "block|block-size|blocksize=s" => \$opt::blocksize,
1608          "blocktimeout|block-timeout|bt=s" => \$opt::blocktimeout,
1609          "tollef" => \$opt::tollef,
1610          "gnu" => \$opt::gnu,
1611          "link|xapply" => \$opt::link,
1612          "linkinputsource|xapplyinputsource=i" => \@opt::linkinputsource,
1613          # Before changing this line, please read
1614          # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
1615          # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
1616          "bibtex|citation" => \$opt::citation,
1617          "wc|willcite|will-cite|nn|nonotice|no-notice" => \$opt::willcite,
1618          # Termination and retries
1619          "halt-on-error|halt=s" => \$opt::halt,
1620          "limit=s" => \$opt::limit,
1621          "memfree=s" => \$opt::memfree,
1622          "memsuspend=s" => \$opt::memsuspend,
1623          "retries=s" => \$opt::retries,
1624          "timeout=s" => \$opt::timeout,
1625          "termseq|term-seq=s" => \$opt::termseq,
1626          # xargs-compatibility - implemented, man, testsuite
1627          "max-procs|P=s" => \$opt::jobs,
1628          "delimiter|d=s" => \$opt::d,
1629          "max-chars|s=i" => \$opt::max_chars,
1630          "arg-file|a=s" => \@opt::a,
1631          "no-run-if-empty|r" => \$opt::r,
1632          "replace|i:s" => \$opt::i,
1633          "E=s" => \$opt::eof,
1634          "eof|e:s" => \$opt::eof,
1635          "max-args|maxargs|n=i" => \$opt::max_args,
1636          "max-replace-args|N=i" => \$opt::max_replace_args,
1637          "colsep|col-sep|C=s" => \$opt::colsep,
1638          "csv"=> \$opt::csv,
1639          "help|h" => \$opt::help,
1640          "L=f" => \$opt::L,
1641          "max-lines|l:f" => \$opt::max_lines,
1642          "interactive|p" => \$opt::interactive,
1643          "verbose|t" => \$opt::verbose,
1644          "version|V" => \$opt::version,
1645          "minversion|min-version=i" => \$opt::minversion,
1646          "show-limits|showlimits" => \$opt::show_limits,
1647          "exit|x" => \$opt::x,
1648          # Semaphore
1649          "semaphore" => \$opt::semaphore,
1650          "semaphoretimeout|st=i" => \$opt::semaphoretimeout,
1651          "semaphorename|id=s" => \$opt::semaphorename,
1652          "fg" => \$opt::fg,
1653          "bg" => \$opt::bg,
1654          "wait" => \$opt::wait,
1655          # Shebang #!/usr/bin/parallel --shebang
1656          "shebang|hashbang" => \$opt::shebang,
1657          "internal-pipe-means-argfiles"
1658          => \$opt::internal_pipe_means_argfiles,
1659          "Y" => \$opt::retired,
1660          "skip-first-line" => \$opt::skip_first_line,
1661          "bug" => \$opt::bug,
1662          "header=s" => \$opt::header,
1663          "cat" => \$opt::cat,
1664          "fifo" => \$opt::fifo,
1665          "pipepart|pipe-part" => \$opt::pipepart,
1666          "tee" => \$opt::tee,
1667          "shard=s" => \$opt::shard,
1668          "bin=s" => \$opt::bin,
1669          "groupby|group-by=s" => \$opt::groupby,
1670          "hgrp|hostgrp|hostgroup|hostgroups" => \$opt::hostgroups,
1671          "embed" => \$opt::embed,
1672         );
1673 }
1674
1675 sub get_options_from_array($@) {
1676     # Run GetOptions on @array
1677     # Input:
1678     #   $array_ref = ref to @ARGV to parse
1679     #   @keep_only = Keep only these options
1680     # Uses:
1681     #   @ARGV
1682     # Returns:
1683     #   true if parsing worked
1684     #   false if parsing failed
1685     #   @$array_ref is changed
1686     my ($array_ref, @keep_only) = @_;
1687     if(not @$array_ref) {
1688         # Empty array: No need to look more at that
1689         return 1;
1690     }
1691     # A bit of shuffling of @ARGV needed as GetOptionsFromArray is not
1692     # supported everywhere
1693     my @save_argv;
1694     my $this_is_ARGV = (\@::ARGV == $array_ref);
1695     if(not $this_is_ARGV) {
1696         @save_argv = @::ARGV;
1697         @::ARGV = @{$array_ref};
1698     }
1699     # If @keep_only set: Ignore all values except @keep_only
1700     my %options = options_hash();
1701     if(@keep_only) {
1702         my (%keep,@dummy);
1703         @keep{@keep_only} = @keep_only;
1704         for my $k (grep { not $keep{$_} } keys %options) {
1705             # Store the value of the option in @dummy
1706             $options{$k} = \@dummy;
1707         }
1708     }
1709     my $retval = GetOptions(%options);
1710     if(not $this_is_ARGV) {
1711         @{$array_ref} = @::ARGV;
1712         @::ARGV = @save_argv;
1713     }
1714     return $retval;
1715 }
1716
1717 sub parse_options(@) {
1718     # Returns: N/A
1719     init_globals();
1720     my @argv_before = @ARGV;
1721     @ARGV = read_options();
1722
1723     # Before changing this line, please read
1724     # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
1725     # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
1726     if(defined $opt::citation) {
1727         citation(\@argv_before,\@ARGV);
1728         wait_and_exit(0);
1729     }
1730     # no-* overrides *
1731     if($opt::nokeeporder) { $opt::keeporder = undef; }
1732
1733     if(@opt::v) { $Global::verbose = $#opt::v+1; } # Convert -v -v to v=2
1734     if($opt::bug) { ::die_bug("test-bug"); }
1735     $Global::debug = $opt::D;
1736     $Global::shell = $ENV{'PARALLEL_SHELL'} || parent_shell($$)
1737         || $ENV{'SHELL'} || "/bin/sh";
1738     if(not -x $Global::shell and not which($Global::shell)) {
1739         ::error("Shell '$Global::shell' not found.");
1740         wait_and_exit(255);
1741     }
1742     ::debug("init","Global::shell $Global::shell\n");
1743     $Global::cshell = $Global::shell =~ m:(/[-a-z]*)?csh:;
1744     if(defined $opt::X) { $Global::ContextReplace = 1; }
1745     if(defined $opt::silent) { $Global::verbose = 0; }
1746     if(defined $opt::null) { $/ = "\0"; }
1747     if(defined $opt::d) { $/ = unquote_printf($opt::d) }
1748     parse_replacement_string_options();
1749     if(defined $opt::tagstring) {
1750         $opt::tagstring = unquote_printf($opt::tagstring);
1751         if($opt::tagstring =~ /\Q$Global::parensleft\E.*\Q$Global::parensright\E/
1752            and
1753            $opt::linebuffer) {
1754             # --tagstring contains {= =} and --linebuffer =>
1755             #   recompute replacement string for each use (do not cache)
1756             $Global::cache_replacement_eval = 0;
1757         }
1758     }
1759     if(defined $opt::interactive) { $Global::interactive = $opt::interactive; }
1760     if(defined $opt::quote) { $Global::quoting = 1; }
1761     if(defined $opt::r) { $Global::ignore_empty = 1; }
1762     if(defined $opt::verbose) { $Global::stderr_verbose = 1; }
1763     if(defined $opt::eof) { $Global::end_of_file_string = $opt::eof; }
1764     if(defined $opt::max_args) {
1765         $Global::max_number_of_args = $opt::max_args;
1766     }
1767     if(defined $opt::blocktimeout) {
1768         $Global::blocktimeout = int(multiply_time_units($opt::blocktimeout));
1769         if($Global::blocktimeout < 1) {
1770             ::error("--block-timeout must be at least 1");
1771             wait_and_exit(255);
1772         }
1773     }
1774     if(defined $opt::timeout) {
1775         $Global::timeoutq = TimeoutQueue->new($opt::timeout);
1776     }
1777     if(defined $opt::tmpdir) { $ENV{'TMPDIR'} = $opt::tmpdir; }
1778     $ENV{'PARALLEL_RSYNC_OPTS'} = $opt::rsync_opts ||
1779         $ENV{'PARALLEL_RSYNC_OPTS'} || '-rlDzR';
1780     # Default: Same nice level as GNU  Parallel is started at
1781     $opt::nice ||= eval { getpriority(0,0) } || 0;
1782     if(defined $opt::help) { usage(); exit(0); }
1783     if(defined $opt::embed) { embed(); exit(0); }
1784     if(defined $opt::sqlandworker) {
1785         $opt::sqlmaster = $opt::sqlworker = $opt::sqlandworker;
1786     }
1787     if(defined $opt::tmuxpane) { $opt::tmux = $opt::tmuxpane; }
1788     if(defined $opt::colsep) { $Global::trim = 'lr'; }
1789     if(defined $opt::csv) {
1790         $Global::use{"Text::CSV"} ||= eval "use Text::CSV; 1;";
1791         $opt::colsep = defined $opt::colsep ? $opt::colsep : ",";
1792         my $csv_setting = { binary => 1, sep_char => $opt::colsep };
1793         my $sep = $csv_setting->{sep_char};
1794         $Global::csv = Text::CSV->new($csv_setting)
1795             or die "Cannot use CSV: ".Text::CSV->error_diag ();
1796     }
1797     if(defined $opt::header) {
1798         $opt::colsep = defined $opt::colsep ? $opt::colsep : "\t";
1799     }
1800     if(defined $opt::trim) { $Global::trim = $opt::trim; }
1801     if(defined $opt::arg_sep) { $Global::arg_sep = $opt::arg_sep; }
1802     if(defined $opt::arg_file_sep) {
1803         $Global::arg_file_sep = $opt::arg_file_sep;
1804     }
1805     if(defined $opt::number_of_sockets) {
1806         print SSHLogin::no_of_sockets(),"\n"; wait_and_exit(0);
1807     }
1808     if(defined $opt::number_of_cpus) {
1809         print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0);
1810     }
1811     if(defined $opt::number_of_cores) {
1812         print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0);
1813     }
1814     if(defined $opt::number_of_threads) {
1815         print SSHLogin::no_of_threads(),"\n"; wait_and_exit(0);
1816     }
1817     if(defined $opt::max_line_length_allowed) {
1818         print Limits::Command::real_max_length(),"\n"; wait_and_exit(0);
1819     }
1820     if(defined $opt::version) { version(); wait_and_exit(0); }
1821     if(defined $opt::record_env) { record_env(); wait_and_exit(0); }
1822     if(defined $opt::show_limits) { show_limits(); }
1823     if(@opt::sshlogin) { @Global::sshlogin = @opt::sshlogin; }
1824     if(@opt::sshloginfile) { read_sshloginfiles(@opt::sshloginfile); }
1825     if(@opt::return) { push @Global::ret_files, @opt::return; }
1826     if($opt::transfer) {
1827         push @Global::transfer_files, $opt::i || $opt::I || "{}";
1828     }
1829     push @Global::transfer_files, @opt::transfer_files;
1830     if(not defined $opt::recstart and
1831        not defined $opt::recend) { $opt::recend = "\n"; }
1832     $Global::blocksize = multiply_binary_prefix($opt::blocksize || "1M");
1833     if($Global::blocksize > 2**31-1 and not $opt::pipepart) {
1834         warning("--blocksize >= 2G causes problems. Using 2G-1.");
1835         $Global::blocksize = 2**31-1;
1836     }
1837     if($^O eq "cygwin" and
1838        ($opt::pipe or $opt::pipepart or $opt::roundrobin)
1839        and $Global::blocksize > 65535) {
1840         warning("--blocksize >= 64K causes problems on Cygwin.");
1841     }
1842     $opt::memfree = multiply_binary_prefix($opt::memfree);
1843     $opt::memsuspend = multiply_binary_prefix($opt::memsuspend);
1844     $Global::memlimit = $opt::memsuspend + $opt::memfree;
1845     check_invalid_option_combinations();
1846     if((defined $opt::fifo or defined $opt::cat)
1847        and not $opt::pipepart) {
1848         $opt::pipe = 1;
1849     }
1850     if(defined $opt::minversion) {
1851         print $Global::version,"\n";
1852         if($Global::version < $opt::minversion) {
1853             wait_and_exit(255);
1854         } else {
1855             wait_and_exit(0);
1856         }
1857     }
1858     if(not defined $opt::delay) {
1859         # Set --delay to --sshdelay if not set
1860         $opt::delay = $opt::sshdelay;
1861     }
1862     $opt::delay = multiply_time_units($opt::delay);
1863     if($opt::compress_program) {
1864         $opt::compress = 1;
1865         $opt::decompress_program ||= $opt::compress_program." -dc";
1866     }
1867
1868     if(defined $opt::results) {
1869         # Is the output a dir or CSV-file?
1870         if($opt::results =~ /\.csv$/i) {
1871             # CSV with , as separator
1872             $Global::csvsep = ",";
1873             $Global::membuffer ||= 1;
1874         } elsif($opt::results =~ /\.tsv$/i) {
1875             # CSV with TAB as separator
1876             $Global::csvsep = "\t";
1877             $Global::membuffer ||= 1;
1878         }
1879     }
1880     if($opt::compress) {
1881         my ($compress, $decompress) = find_compression_program();
1882         $opt::compress_program ||= $compress;
1883         $opt::decompress_program ||= $decompress;
1884         if(($opt::results and not $Global::csvsep) or $opt::files) {
1885             # No need for decompressing
1886             $opt::decompress_program = "cat >/dev/null";
1887         }
1888     }
1889     if(defined $opt::dryrun) {
1890         # Force grouping due to bug #51039: --dry-run --timeout 3600 -u breaks
1891         $opt::ungroup = 0;
1892         $opt::group = 1;
1893     }
1894     if(defined $opt::nonall) {
1895         # Append a dummy empty argument if there are no arguments
1896         # on the command line to avoid reading from STDIN.
1897         # arg_sep = random 50 char
1898         # \0noarg => nothing (not the empty string)
1899         $Global::arg_sep = join "",
1900         map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..50);
1901         push @ARGV, $Global::arg_sep, "\0noarg";
1902     }
1903     if(defined $opt::tee) {
1904         if(not defined $opt::jobs) {
1905             $opt::jobs = 0;
1906         }
1907     }
1908     if(defined $opt::tty) {
1909         # Defaults for --tty: -j1 -u
1910         # Can be overridden with -jXXX -g
1911         if(not defined $opt::jobs) {
1912             $opt::jobs = 1;
1913         }
1914         if(not defined $opt::group) {
1915             $opt::ungroup = 1;
1916         }
1917     }
1918     if(@opt::trc) {
1919         push @Global::ret_files, @opt::trc;
1920         if(not @Global::transfer_files) {
1921             # Defaults to --transferfile {}
1922             push @Global::transfer_files, $opt::i || $opt::I || "{}";
1923         }
1924         $opt::cleanup = 1;
1925     }
1926     if(defined $opt::max_lines) {
1927         if($opt::max_lines eq "-0") {
1928             # -l -0 (swallowed -0)
1929             $opt::max_lines = 1;
1930             $opt::null = 1;
1931             $/ = "\0";
1932         } elsif ($opt::max_lines == 0) {
1933             # If not given (or if 0 is given) => 1
1934             $opt::max_lines = 1;
1935         }
1936         $Global::max_lines = $opt::max_lines;
1937         if(not $opt::pipe) {
1938             # --pipe -L means length of record - not max_number_of_args
1939             $Global::max_number_of_args ||= $Global::max_lines;
1940         }
1941     }
1942
1943     # Read more than one arg at a time (-L, -N)
1944     if(defined $opt::L) {
1945         $Global::max_lines = $opt::L;
1946         if(not $opt::pipe) {
1947             # --pipe -L means length of record - not max_number_of_args
1948             $Global::max_number_of_args ||= $Global::max_lines;
1949         }
1950     }
1951     if(defined $opt::max_replace_args) {
1952         $Global::max_number_of_args = $opt::max_replace_args;
1953         $Global::ContextReplace = 1;
1954     }
1955     if((defined $opt::L or defined $opt::max_replace_args)
1956        and
1957        not ($opt::xargs or $opt::m)) {
1958         $Global::ContextReplace = 1;
1959     }
1960     if(defined $opt::tag and not defined $opt::tagstring) {
1961         # Default = {}
1962         $opt::tagstring = $Global::parensleft.$Global::parensright;
1963     }
1964     if(grep /^$Global::arg_sep\+?$|^$Global::arg_file_sep\+?$/o, @ARGV) {
1965         # Deal with ::: :::+ :::: and ::::+
1966         @ARGV = read_args_from_command_line();
1967     }
1968     parse_semaphore();
1969
1970     if(defined $opt::eta) { $opt::progress = $opt::eta; }
1971     if(defined $opt::bar) { $opt::progress = $opt::bar; }
1972
1973     # Funding a free software project is hard. GNU Parallel is no
1974     # exception. On top of that it seems the less visible a project
1975     # is, the harder it is to get funding. And the nature of GNU
1976     # Parallel is that it will never be seen by "the guy with the
1977     # checkbook", but only by the people doing the actual work.
1978     #
1979     # This problem has been covered by others - though no solution has
1980     # been found:
1981     # https://www.slideshare.net/NadiaEghbal/consider-the-maintainer
1982     # https://blog.licensezero.com/2019/08/24/process-of-elimination.html
1983     # https://www.numfocus.org/blog/why-is-numpy-only-now-getting-funded/
1984     #
1985     # The FAQ tells you why the citation notice exists:
1986     # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
1987     #
1988     # If you want GNU Parallel to be maintained in the future, and not
1989     # just wither away like so many other free software tools, you
1990     # need to help finance the development.
1991     #
1992     # The citation notice is a simple way of doing so, as citations
1993     # makes it possible to me to get a job where I can maintain GNU
1994     # Parallel as part of the job.
1995     #
1996     # This means you can help financing development
1997     #
1998     #   WITHOUT PAYING A SINGLE CENT!
1999     #
2000     # Before implementing the citation notice it was discussed with
2001     # the users:
2002     # https://lists.gnu.org/archive/html/parallel/2013-11/msg00006.html
2003     #
2004     # Having to spend 10 seconds on running 'parallel --citation' once
2005     # is no doubt not an ideal solution, but no one has so far come up
2006     # with an ideal solution - neither for funding GNU Parallel nor
2007     # other free software.
2008     #
2009     # If you believe you have the perfect solution, you should try it
2010     # out, and if it works, you should post it on the email
2011     # list. Ideas that will cost work and which have not been tested
2012     # are, however, unlikely to be prioritized.
2013     #
2014     # Please note that GPL version 3 gives you the right to fork GNU
2015     # Parallel under a new name, but it does not give you the right to
2016     # distribute modified copies with the citation notice disabled in
2017     # a way where the software can be confused with GNU Parallel. To
2018     # do that you need to be the owner of the GNU Parallel
2019     # trademark. The xt:Commerce case shows this.
2020     #
2021     # Description of the xt:Commerce case in OLG Duesseldorf
2022     # http://www.inta.org/INTABulletin/Pages/GERMANYGeneralPublicLicenseDoesNotPermitUseofThird-PartyTrademarksforAdvertisingModifiedVersionsofOpen-SourceSoftware.aspx
2023     # https://web.archive.org/web/20180715073746/http://www.inta.org/INTABulletin/Pages/GERMANYGeneralPublicLicenseDoesNotPermitUseofThird-PartyTrademarksforAdvertisingModifiedVersionsofOpen-SourceSoftware.aspx
2024     #
2025     # The verdict in German
2026     # https://www.admody.com/urteilsdatenbank/cafe6fdaeed3/OLG-Duesseldorf_Urteil_vom_28-September-2010_Az_I-20-U-41-09
2027     # https://web.archive.org/web/20180715073717/https://www.admody.com/urteilsdatenbank/cafe6fdaeed3/OLG-Duesseldorf_Urteil_vom_28-September-2010_Az_I-20-U-41-09
2028     #
2029     # Other free software limiting derivates by the same name:
2030     # https://en.wikipedia.org/wiki/Red_Hat_Enterprise_Linux_derivatives#Legal_aspects
2031     # https://tm.joomla.org/trademark-faq.html
2032     # https://www.mozilla.org/en-US/foundation/trademarks/faq/
2033     #
2034     # Running 'parallel --citation' one single time takes less than 10
2035     # seconds, and will silence the citation notice for future
2036     # runs. If that is too much trouble for you, why not use one of
2037     # the alternatives instead?
2038     # See a list in: 'man parallel_alternatives'
2039     #
2040     # If you want GNU Parallel to be maintained in the future keep
2041     # this line.
2042     citation_notice();
2043     # Seriously: _YOU_ will be harming free software by removing the
2044     # notice.  _YOU_ make it harder to justify spending time developing
2045     # it. If you *do* remove the line, please email
2046     # hallofshame@tange.dk if you want to avoid being put in a hall of
2047     # shame.
2048
2049     parse_halt();
2050
2051     if($ENV{'PARALLEL_ENV'}) {
2052         # Read environment and set $Global::parallel_env
2053         # Must be done before is_acceptable_command_line_length()
2054         my $penv = $ENV{'PARALLEL_ENV'};
2055         # unset $PARALLEL_ENV: It should not be given to children
2056         # because it takes up a lot of env space
2057         delete $ENV{'PARALLEL_ENV'};
2058         if(-e $penv) {
2059             # This is a file/fifo: Replace envvar with content of file
2060             open(my $parallel_env, "<", $penv) ||
2061                 ::die_bug("Cannot read parallel_env from $penv");
2062             local $/; # Put <> in slurp mode
2063             $penv = <$parallel_env>;
2064             close $parallel_env;
2065         }
2066         # Map \001 to \n to make it easer to quote \n in $PARALLEL_ENV
2067         $penv =~ s/\001/\n/g;
2068         if($penv =~ /\0/) {
2069             ::warning('\0 (NUL) in environment is not supported');
2070         }
2071         $Global::parallel_env = $penv;
2072     }
2073
2074     parse_sshlogin();
2075
2076     if(remote_hosts() and ($opt::X or $opt::m or $opt::xargs)) {
2077         # As we do not know the max line length on the remote machine
2078         # long commands generated by xargs may fail
2079         # If $opt::max_replace_args is set, it is probably safe
2080         ::warning("Using -X or -m with --sshlogin may fail.");
2081     }
2082
2083     if(not defined $opt::jobs) { $opt::jobs = "100%"; }
2084     open_joblog();
2085     open_csv();
2086     if($opt::sqlmaster or $opt::sqlworker) {
2087         $Global::sql = SQL->new($opt::sqlmaster || $opt::sqlworker);
2088     }
2089     if($opt::sqlworker) { $Global::membuffer ||= 1; }
2090     # The sqlmaster groups the arguments, so the should just read one
2091     if($opt::sqlworker and not $opt::sqlmaster) { $Global::max_number_of_args = 1; }
2092
2093 }
2094
2095 sub check_invalid_option_combinations() {
2096     if(defined $opt::timeout and
2097        $opt::timeout !~ /^\d+(\.\d+)?%?$|^(\d+(\.\d+)?[dhms])+$/i) {
2098         ::error("--timeout must be seconds or percentage.");
2099         wait_and_exit(255);
2100     }
2101     if(defined $opt::fifo and defined $opt::cat) {
2102         ::error("--fifo cannot be combined with --cat.");
2103         ::wait_and_exit(255);
2104     }
2105     if(defined $opt::retries and defined $opt::roundrobin) {
2106         ::error("--retries cannot be combined with --roundrobin.");
2107         ::wait_and_exit(255);
2108     }
2109     if(defined $opt::pipepart and
2110        (defined $opt::L or defined $opt::max_lines
2111         or defined $opt::max_replace_args)) {
2112         ::error("--pipepart is incompatible with --max-replace-args, ".
2113                 "--max-lines, and -L.");
2114         wait_and_exit(255);
2115     }
2116     if(defined $opt::group and $opt::ungroup) {
2117         ::error("--group cannot be combined with --ungroup.");
2118         ::wait_and_exit(255);
2119     }
2120     if(defined $opt::group and $opt::linebuffer) {
2121         ::error("--group cannot be combined with --line-buffer.");
2122         ::wait_and_exit(255);
2123     }
2124     if(defined $opt::ungroup and $opt::linebuffer) {
2125         ::error("--ungroup cannot be combined with --line-buffer.");
2126         ::wait_and_exit(255);
2127     }
2128     if(defined $opt::tollef and not $opt::gnu) {
2129        ::error("--tollef has been retired.",
2130                "Remove --tollef or use --gnu to override --tollef.");
2131        ::wait_and_exit(255);
2132     }
2133     if(defined $opt::retired) {
2134             ::error("-g has been retired. Use --group.",
2135                     "-B has been retired. Use --bf.",
2136                     "-T has been retired. Use --tty.",
2137                     "-U has been retired. Use --er.",
2138                     "-W has been retired. Use --wd.",
2139                     "-Y has been retired. Use --shebang.",
2140                     "-H has been retired. Use --halt.",
2141                     "--sql has been retired. Use --sqlmaster.",
2142                     "--ctrlc has been retired.",
2143                     "--noctrlc has been retired.");
2144             ::wait_and_exit(255);
2145     }
2146     if($opt::groupby) {
2147         if(not $opt::pipe and not $opt::pipepart) {
2148             $opt::pipe = 1;
2149         }
2150         if($opt::remove_rec_sep) {
2151             ::error("--remove-rec-sep is not compatible with --groupby");
2152             ::wait_and_exit(255);
2153         }
2154         if($opt::recstart) {
2155             ::error("--recstart is not compatible with --groupby");
2156             ::wait_and_exit(255);
2157         }
2158         if($opt::recend ne "\n") {
2159             ::error("--recend is not compatible with --groupby");
2160             ::wait_and_exit(255);
2161         }
2162     }
2163 }
2164
2165 sub init_globals() {
2166     # Defaults:
2167     $Global::version = 20200923;
2168     $Global::progname = 'parallel';
2169     $::name = "GNU Parallel";
2170     $Global::infinity = 2**31;
2171     $Global::debug = 0;
2172     $Global::verbose = 0;
2173     # Don't quote every part of the command line
2174     $Global::quoting = 0;
2175     # Quote replacement strings
2176     $Global::quote_replace = 1;
2177     $Global::total_completed = 0;
2178     $Global::cache_replacement_eval = 1;
2179     # Read only table with default --rpl values
2180     %Global::replace =
2181         (
2182          '{}'   => '',
2183          '{#}'  => '1 $_=$job->seq()',
2184          '{%}'  => '1 $_=$job->slot()',
2185          '{/}'  => 's:.*/::',
2186          '{//}' =>
2187          ('$Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; '.
2188           '$_ = dirname($_);'),
2189          '{/.}' => 's:.*/::; s:\.[^/.]+$::;',
2190          '{.}'  => 's:\.[^/.]+$::',
2191         );
2192     %Global::plus =
2193         (
2194          # {} = {+/}/{/}
2195          #    = {.}.{+.}     = {+/}/{/.}.{+.}
2196          #    = {..}.{+..}   = {+/}/{/..}.{+..}
2197          #    = {...}.{+...} = {+/}/{/...}.{+...}
2198          '{+/}' => 's:/[^/]*$::',
2199          '{+.}' => 's:.*\.::',
2200          '{+..}' => 's:.*\.([^.]*\.):$1:',
2201          '{+...}' => 's:.*\.([^.]*\.[^.]*\.):$1:',
2202          '{..}' => 's:\.[^/.]+$::; s:\.[^/.]+$::',
2203          '{...}' => 's:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::',
2204          '{/..}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::',
2205          '{/...}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::',
2206          '{choose_k}' => 'for $t (2..$#arg){ if($arg[$t-1] ge $arg[$t]) { skip() } }',
2207          # {##} = number of jobs
2208          '{##}' => '$_=total_jobs()',
2209          # Bash ${a:-myval}
2210          '{:-([^}]+?)}' => '$_ ||= $$1',
2211          # Bash ${a:2}
2212          '{:(\d+?)}' => 'substr($_,0,$$1) = ""',
2213          # Bash ${a:2:3}
2214          '{:(\d+?):(\d+?)}' => '$_ = substr($_,$$1,$$2);',
2215          # Bash ${a#bc}
2216          '{#([^#}][^}]*?)}' => 's/^$$1//;',
2217          # Bash ${a%def}
2218          '{%([^}]+?)}' => 's/$$1$//;',
2219          # Bash ${a/def/ghi} ${a/def/}
2220          '{/([^}]+?)/([^}]*?)}' => 's/$$1/$$2/;',
2221          # Bash ${a^a}
2222          '{^([^}]+?)}' => 's/^($$1)/uc($1)/e;',
2223          # Bash ${a^^a}
2224          '{^^([^}]+?)}' => 's/($$1)/uc($1)/eg;',
2225          # Bash ${a,A}
2226          '{,([^}]+?)}' => 's/^($$1)/lc($1)/e;',
2227          # Bash ${a,,A}
2228          '{,,([^}]+?)}' => 's/($$1)/lc($1)/eg;',
2229          # {slot} = $PARALLEL_JOBSLOT
2230          '{slot}' => '1 $_="\${PARALLEL_JOBSLOT}";uq()',
2231          # {host} = ssh host
2232          '{host}' => '1 $_="\${PARALLEL_SSHHOST}";uq()',
2233          # {sshlogin} = sshlogin
2234          '{sshlogin}' => '1 $_="\${PARALLEL_SSHLOGIN}";uq()',
2235         );
2236     # Modifiable copy of %Global::replace
2237     %Global::rpl = %Global::replace;
2238     $/ = "\n";
2239     $Global::ignore_empty = 0;
2240     $Global::interactive = 0;
2241     $Global::stderr_verbose = 0;
2242     $Global::default_simultaneous_sshlogins = 9;
2243     $Global::exitstatus = 0;
2244     $Global::arg_sep = ":::";
2245     $Global::arg_file_sep = "::::";
2246     $Global::trim = 'n';
2247     $Global::max_jobs_running = 0;
2248     $Global::job_already_run = '';
2249     $ENV{'TMPDIR'} ||= "/tmp";
2250     $ENV{'OLDPWD'} = $ENV{'PWD'};
2251     if(not $ENV{HOME}) {
2252         # $ENV{HOME} is sometimes not set if called from PHP
2253         ::warning("\$HOME not set. Using /tmp.");
2254         $ENV{HOME} = "/tmp";
2255     }
2256     # no warnings to allow for undefined $XDG_*
2257     no warnings 'uninitialized';
2258     # $xdg_config_home is needed to make env_parallel.fish stop complaining
2259     my $xdg_config_home = $ENV{'XDG_CONFIG_HOME'};
2260     # config_dirs = $PARALLEL_HOME, $XDG_CONFIG_HOME/parallel,
2261     #   $(each XDG_CONFIG_DIRS)/parallel, $HOME/.parallel
2262     # Keep only dirs that exist
2263     @Global::config_dirs =
2264         (grep { -d $_ }
2265          $ENV{'PARALLEL_HOME'},
2266          (map { "$_/parallel" }
2267           $xdg_config_home,
2268           split /:/, $ENV{'XDG_CONFIG_DIRS'}),
2269          $ENV{'HOME'} . "/.parallel");
2270     # Use first dir as config dir
2271     $Global::config_dir = $Global::config_dirs[0] ||
2272         $ENV{'HOME'} . "/.parallel";
2273     # cache_dirs = $PARALLEL_HOME, $XDG_CACHE_HOME/parallel,
2274     # Keep only dirs that exist
2275     @Global::cache_dirs =
2276         (grep { -d $_ }
2277          $ENV{'PARALLEL_HOME'}, $ENV{'XDG_CACHE_HOME'}."/parallel");
2278     $Global::cache_dir = $Global::cache_dirs[0] ||
2279         $ENV{'HOME'} . "/.parallel";
2280 }
2281
2282 sub parse_halt() {
2283     # $opt::halt flavours
2284     # Uses:
2285     #   $opt::halt
2286     #   $Global::halt_when
2287     #   $Global::halt_fail
2288     #   $Global::halt_success
2289     #   $Global::halt_pct
2290     #   $Global::halt_count
2291     if(defined $opt::halt) {
2292         my %halt_expansion = (
2293             "0" => "never",
2294             "1" => "soon,fail=1",
2295             "2" => "now,fail=1",
2296             "-1" => "soon,success=1",
2297             "-2" => "now,success=1",
2298         );
2299         # Expand -2,-1,0,1,2 into long form
2300         $opt::halt = $halt_expansion{$opt::halt} || $opt::halt;
2301         # --halt 5% == --halt soon,fail=5%
2302         $opt::halt =~ s/^(\d+)%$/soon,fail=$1%/;
2303         # Split: soon,fail=5%
2304         my ($when,$fail_success,$pct_count) = split /[,=]/, $opt::halt;
2305         if(not grep { $when eq $_ } qw(never soon now)) {
2306             ::error("--halt must have 'never', 'soon', or 'now'.");
2307             ::wait_and_exit(255);
2308         }
2309         $Global::halt_when = $when;
2310         if($when ne "never") {
2311             if($fail_success eq "fail") {
2312                 $Global::halt_fail = 1;
2313             } elsif($fail_success eq "success") {
2314                 $Global::halt_success = 1;
2315             } elsif($fail_success eq "done") {
2316                 $Global::halt_done = 1;
2317             } else {
2318                 ::error("--halt $when must be followed by ,success or ,fail.");
2319                 ::wait_and_exit(255);
2320             }
2321             if($pct_count =~ /^(\d+)%$/) {
2322                 $Global::halt_pct = $1/100;
2323             } elsif($pct_count =~ /^(\d+)$/) {
2324                 $Global::halt_count = $1;
2325             } else {
2326                 ::error("--halt $when,$fail_success ".
2327                         "must be followed by ,number or ,percent%.");
2328                 ::wait_and_exit(255);
2329             }
2330         }
2331     }
2332 }
2333
2334 sub parse_replacement_string_options() {
2335     # Deal with --rpl
2336     # Uses:
2337     #   %Global::rpl
2338     #   $Global::parensleft
2339     #   $Global::parensright
2340     #   $opt::parens
2341     #   $Global::parensleft
2342     #   $Global::parensright
2343     #   $opt::plus
2344     #   %Global::plus
2345     #   $opt::I
2346     #   $opt::U
2347     #   $opt::i
2348     #   $opt::basenamereplace
2349     #   $opt::dirnamereplace
2350     #   $opt::seqreplace
2351     #   $opt::slotreplace
2352     #   $opt::basenameextensionreplace
2353
2354     sub rpl($$) {
2355         # Modify %Global::rpl
2356         # Replace $old with $new
2357         my ($old,$new) =  @_;
2358         if($old ne $new) {
2359             $Global::rpl{$new} = $Global::rpl{$old};
2360             delete $Global::rpl{$old};
2361         }
2362     }
2363     my $parens = "{==}";
2364     if(defined $opt::parens) { $parens = $opt::parens; }
2365     my $parenslen = 0.5*length $parens;
2366     $Global::parensleft = substr($parens,0,$parenslen);
2367     $Global::parensright = substr($parens,$parenslen);
2368     if(defined $opt::plus) { %Global::rpl = (%Global::plus,%Global::rpl); }
2369     if(defined $opt::I) { rpl('{}',$opt::I); }
2370     if(defined $opt::i and $opt::i) { rpl('{}',$opt::i); }
2371     if(defined $opt::U) { rpl('{.}',$opt::U); }
2372     if(defined $opt::basenamereplace) { rpl('{/}',$opt::basenamereplace); }
2373     if(defined $opt::dirnamereplace) { rpl('{//}',$opt::dirnamereplace); }
2374     if(defined $opt::seqreplace) { rpl('{#}',$opt::seqreplace); }
2375     if(defined $opt::slotreplace) { rpl('{%}',$opt::slotreplace); }
2376     if(defined $opt::basenameextensionreplace) {
2377        rpl('{/.}',$opt::basenameextensionreplace);
2378     }
2379     for(@opt::rpl) {
2380         # Create $Global::rpl entries for --rpl options
2381         # E.g: "{..} s:\.[^.]+$:;s:\.[^.]+$:;"
2382         my ($shorthand,$long) = split/\s/,$_,2;
2383         $Global::rpl{$shorthand} = $long;
2384     }
2385 }
2386
2387 sub parse_semaphore() {
2388     # Semaphore defaults
2389     # Must be done before computing number of processes and max_line_length
2390     # because when running as a semaphore GNU Parallel does not read args
2391     # Uses:
2392     #   $opt::semaphore
2393     #   $Global::semaphore
2394     #   $opt::semaphoretimeout
2395     #   $Semaphore::timeout
2396     #   $opt::semaphorename
2397     #   $Semaphore::name
2398     #   $opt::fg
2399     #   $Semaphore::fg
2400     #   $opt::wait
2401     #   $Semaphore::wait
2402     #   $opt::bg
2403     #   @opt::a
2404     #   @Global::unget_argv
2405     #   $Global::default_simultaneous_sshlogins
2406     #   $opt::jobs
2407     #   $Global::interactive
2408     $Global::semaphore ||= ($0 =~ m:(^|/)sem$:); # called as 'sem'
2409     if(defined $opt::semaphore) { $Global::semaphore = 1; }
2410     if(defined $opt::semaphoretimeout) { $Global::semaphore = 1; }
2411     if(defined $opt::semaphorename) { $Global::semaphore = 1; }
2412     if(defined $opt::fg and not $opt::tmux and not $opt::tmuxpane) {
2413         $Global::semaphore = 1;
2414     }
2415     if(defined $opt::bg) { $Global::semaphore = 1; }
2416     if(defined $opt::wait and not $opt::sqlmaster) {
2417         $Global::semaphore = 1; @ARGV = "true";
2418     }
2419     if($Global::semaphore) {
2420         if(@opt::a) {
2421             # A semaphore does not take input from neither stdin nor file
2422             ::error("A semaphore does not take input from neither stdin nor a file\n");
2423             ::wait_and_exit(255);
2424         }
2425         @opt::a = ("/dev/null");
2426         # Append a dummy empty argument
2427         # \0 => nothing (not the empty string)
2428         push(@Global::unget_argv, [Arg->new("\0noarg")]);
2429         $Semaphore::timeout = $opt::semaphoretimeout || 0;
2430         if(defined $opt::semaphorename) {
2431             $Semaphore::name = $opt::semaphorename;
2432         } else {
2433             local $/ = "\n";
2434             $Semaphore::name = `tty`;
2435             chomp $Semaphore::name;
2436         }
2437         $Semaphore::fg = $opt::fg;
2438         $Semaphore::wait = $opt::wait;
2439         $Global::default_simultaneous_sshlogins = 1;
2440         if(not defined $opt::jobs) {
2441             $opt::jobs = 1;
2442         }
2443         if($Global::interactive and $opt::bg) {
2444             ::error("Jobs running in the ".
2445                     "background cannot be interactive.");
2446             ::wait_and_exit(255);
2447         }
2448     }
2449 }
2450
2451 sub record_env() {
2452     # Record current %ENV-keys in $PARALLEL_HOME/ignored_vars
2453     # Returns: N/A
2454     my $ignore_filename = $Global::config_dir . "/ignored_vars";
2455     if(open(my $vars_fh, ">", $ignore_filename)) {
2456         print $vars_fh map { $_,"\n" } keys %ENV;
2457     } else {
2458         ::error("Cannot write to $ignore_filename.");
2459         ::wait_and_exit(255);
2460     }
2461 }
2462
2463 sub open_joblog() {
2464     # Open joblog as specified by --joblog
2465     # Uses:
2466     #   $opt::resume
2467     #   $opt::resume_failed
2468     #   $opt::joblog
2469     #   $opt::results
2470     #   $Global::job_already_run
2471     #   %Global::fd
2472     my $append = 0;
2473     if(($opt::resume or $opt::resume_failed)
2474        and
2475        not ($opt::joblog or $opt::results)) {
2476         ::error("--resume and --resume-failed require --joblog or --results.");
2477         ::wait_and_exit(255);
2478     }
2479     if(defined $opt::joblog and $opt::joblog =~ s/^\+//) {
2480         # --joblog +filename = append to filename
2481         $append = 1;
2482     }
2483     if($opt::joblog
2484        and
2485        ($opt::sqlmaster
2486         or
2487         not $opt::sqlworker)) {
2488         # Do not log if --sqlworker
2489         if($opt::resume || $opt::resume_failed || $opt::retry_failed) {
2490             if(open(my $joblog_fh, "<", $opt::joblog)) {
2491                 # Read the joblog
2492                 # Override $/ with \n because -d might be set
2493                 local $/ = "\n";
2494                 # If there is a header: Open as append later
2495                 $append = <$joblog_fh>;
2496                 my $joblog_regexp;
2497                 if($opt::retry_failed) {
2498                     # Make a regexp that only matches commands with exit+signal=0
2499                     # 4 host 1360490623.067 3.445 1023 1222 0 0 command
2500                     $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t';
2501                     my @group;
2502                     while(<$joblog_fh>) {
2503                         if(/$joblog_regexp/o) {
2504                             # This is 30% faster than set_job_already_run($1);
2505                             vec($Global::job_already_run,($1||0),1) = 1;
2506                             $Global::total_completed++;
2507                             $group[$1-1] = "true";
2508                         } elsif(/(\d+)\s+\S+(\s+[-0-9.]+){6}\s+(.*)$/) {
2509                             # Grab out the command
2510                             $group[$1-1] = $3;
2511                         } else {
2512                             chomp;
2513                             ::error("Format of '$opt::joblog' is wrong: $_");
2514                             ::wait_and_exit(255);
2515                         }
2516                     }
2517                     if(@group) {
2518                         my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg");
2519                         unlink($name);
2520                         # Put args into argfile
2521                         if(grep /\0/, @group) {
2522                             # force --null to deal with \n in commandlines
2523                             ::warning("Command lines contain newline. Forcing --null.");
2524                             $opt::null = 1;
2525                             $/ = "\0";
2526                         }
2527                         # Replace \0 with '\n' as used in print_joblog()
2528                         print $outfh map { s/\0/\n/g; $_,$/ } map { $_ } @group;
2529                         seek $outfh, 0, 0;
2530                         exit_if_disk_full();
2531                         # Set filehandle to -a
2532                         @opt::a = ($outfh);
2533                     }
2534                     # Remove $command (so -a is run)
2535                     @ARGV = ();
2536                 }
2537                 if($opt::resume || $opt::resume_failed) {
2538                     if($opt::resume_failed) {
2539                         # Make a regexp that only matches commands with exit+signal=0
2540                         # 4 host 1360490623.067 3.445 1023 1222 0 0 command
2541                         $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t';
2542                     } else {
2543                         # Just match the job number
2544                         $joblog_regexp='^(\d+)';
2545                     }
2546                     while(<$joblog_fh>) {
2547                         if(/$joblog_regexp/o) {
2548                             # This is 30% faster than set_job_already_run($1);
2549                             vec($Global::job_already_run,($1||0),1) = 1;
2550                             $Global::total_completed++;
2551                         } elsif(not /\d+\s+[^\s]+\s+([-0-9.]+\s+){6}/) {
2552                             ::error("Format of '$opt::joblog' is wrong: $_");
2553                             ::wait_and_exit(255);
2554                         }
2555                     }
2556                 }
2557                 close $joblog_fh;
2558             }
2559             # $opt::null may be set if the commands contain \n
2560             if($opt::null) { $/ = "\0"; }
2561         }
2562         if($opt::dryrun) {
2563             # Do not write to joblog in a dry-run
2564             if(not open($Global::joblog, ">", "/dev/null")) {
2565                 ::error("Cannot write to --joblog $opt::joblog.");
2566                 ::wait_and_exit(255);
2567             }
2568         } elsif($append) {
2569             # Append to joblog
2570             if(not open($Global::joblog, ">>", $opt::joblog)) {
2571                 ::error("Cannot append to --joblog $opt::joblog.");
2572                 ::wait_and_exit(255);
2573             }
2574         } else {
2575             if($opt::joblog eq "-") {
2576                 # Use STDOUT as joblog
2577                 $Global::joblog = $Global::fd{1};
2578             } elsif(not open($Global::joblog, ">", $opt::joblog)) {
2579                 # Overwrite the joblog
2580                 ::error("Cannot write to --joblog $opt::joblog.");
2581                 ::wait_and_exit(255);
2582             }
2583             print $Global::joblog
2584                 join("\t", "Seq", "Host", "Starttime", "JobRuntime",
2585                      "Send", "Receive", "Exitval", "Signal", "Command"
2586                 ). "\n";
2587         }
2588     }
2589 }
2590
2591 sub open_csv() {
2592     if($opt::results) {
2593         # Output as CSV/TSV
2594         if($opt::results eq "-.csv"
2595            or
2596            $opt::results eq "-.tsv") {
2597             # Output as CSV/TSV on stdout
2598             open $Global::csv_fh, ">&", "STDOUT" or
2599                 ::die_bug("Can't dup STDOUT in csv: $!");
2600             # Do not print any other output to STDOUT
2601             # by forcing all other output to /dev/null
2602             open my $fd, ">", "/dev/null" or
2603                 ::die_bug("Can't >/dev/null in csv: $!");
2604             $Global::fd{1} = $fd;
2605             $Global::fd{2} = $fd;
2606         } elsif($Global::csvsep) {
2607             if(not open($Global::csv_fh,">",$opt::results)) {
2608                 ::error("Cannot open results file `$opt::results': ".
2609                         "$!.");
2610                 wait_and_exit(255);
2611             }
2612         }
2613     }
2614 }
2615
2616 sub find_compression_program() {
2617     # Find a fast compression program
2618     # Returns:
2619     #   $compress_program = compress program with options
2620     #   $decompress_program = decompress program with options
2621
2622     # Search for these. Sorted by speed on 128 core
2623
2624     # seq 120000000|shuf > 1gb &
2625     # apt-get update
2626     # apt install make g++ htop
2627     # wget -O - pi.dk/3 | bash
2628     # apt install zstd clzip liblz4-tool lzop pigz pxz gzip plzip pbzip2 lzma xz-utils lzip bzip2 lbzip2 lrzip pixz
2629     # git clone https://github.com/facebook/zstd.git
2630     # (cd zstd/contrib/pzstd; make -j; cp pzstd /usr/local/bin)
2631     # echo 'lrzip -L $((-$1))'  >/usr/local/bin/lrz
2632     # chmod +x /usr/local/bin/lrz
2633     # wait
2634     # onethread="zstd clzip lz4 lzop gzip lzma xz bzip2"
2635     # multithread="pzstd pigz pxz plzip pbzip2 lzip lbzip2 lrz pixz"
2636     # parallel --shuf -j1  --joblog jl-m --arg-sep ,  parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 ,  {1..3} , $multithread
2637     # parallel --shuf -j50% --delay 1  --joblog jl-s --arg-sep ,  parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 ,  {1..3} , $onethread
2638     # sort -nk4 jl-?
2639
2640     # 1-core:
2641     # 2-cores: pzstd zstd lz4 lzop pigz gzip lbzip2 pbzip2 lrz bzip2 lzma pxz plzip xz lzip clzip
2642     # 4-cores:
2643     # 8-cores: pzstd lz4 zstd pigz lzop lbzip2 pbzip2 gzip lzip lrz plzip pxz bzip2 lzma xz clzip
2644     # 16-cores: pzstd lz4 pigz lzop lbzip2 pbzip2 plzip lzip lrz pxz gzip lzma xz bzip2
2645     # 32-cores: pzstd lbzip2 pbzip2 zstd pigz lz4 lzop plzip lzip lrz gzip pxz lzma bzip2 xz clzip
2646     # 64-cores: pzstd lbzip2 pbzip2 pigz zstd pixz lz4 plzip lzop lzip lrz gzip pxz lzma bzip2 xz clzip
2647     # 128-core: pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip lrz pxz bzip2 lzma xz clzip
2648
2649     my @prg = qw(pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip
2650                  lrz pxz bzip2 lzma xz clzip);
2651     for my $p (@prg) {
2652         if(which($p)) {
2653             return ("$p -c -1","$p -dc");
2654         }
2655     }
2656     # Fall back to cat
2657     return ("cat","cat");
2658 }
2659
2660 sub read_options() {
2661     # Read options from command line, profile and $PARALLEL
2662     # Uses:
2663     #   $opt::shebang_wrap
2664     #   $opt::shebang
2665     #   @ARGV
2666     #   $opt::plain
2667     #   @opt::profile
2668     #   $ENV{'HOME'}
2669     #   $ENV{'PARALLEL'}
2670     # Returns:
2671     #   @ARGV_no_opt = @ARGV without --options
2672
2673     # This must be done first as this may exec myself
2674     if(defined $ARGV[0] and ($ARGV[0] =~ /^--shebang/ or
2675                              $ARGV[0] =~ /^--shebang-?wrap/ or
2676                              $ARGV[0] =~ /^--hashbang/)) {
2677         # Program is called from #! line in script
2678         # remove --shebang-wrap if it is set
2679         $opt::shebang_wrap = ($ARGV[0] =~ s/^--shebang-?wrap *//);
2680         # remove --shebang if it is set
2681         $opt::shebang = ($ARGV[0] =~ s/^--shebang *//);
2682         # remove --hashbang if it is set
2683         $opt::shebang .= ($ARGV[0] =~ s/^--hashbang *//);
2684         if($opt::shebang) {
2685             my $argfile = Q(pop @ARGV);
2686             # exec myself to split $ARGV[0] into separate fields
2687             exec "$0 --skip-first-line -a $argfile @ARGV";
2688         }
2689         if($opt::shebang_wrap) {
2690             my @options;
2691             my @parser;
2692             if ($^O eq 'freebsd') {
2693                 # FreeBSD's #! puts different values in @ARGV than Linux' does.
2694                 my @nooptions = @ARGV;
2695                 get_options_from_array(\@nooptions);
2696                 while($#ARGV > $#nooptions) {
2697                     push @options, shift @ARGV;
2698                 }
2699                 while(@ARGV and $ARGV[0] ne ":::") {
2700                     push @parser, shift @ARGV;
2701                 }
2702                 if(@ARGV and $ARGV[0] eq ":::") {
2703                     shift @ARGV;
2704                 }
2705             } else {
2706                 @options = shift @ARGV;
2707             }
2708             my $script = Q(shift @ARGV);
2709             # exec myself to split $ARGV[0] into separate fields
2710             exec "$0 --internal-pipe-means-argfiles @options @parser $script ".
2711                 "::: @ARGV";
2712         }
2713     }
2714     if($ARGV[0] =~ / --shebang(-?wrap)? /) {
2715         ::warning("--shebang and --shebang-wrap must be the first argument.\n");
2716     }
2717
2718     Getopt::Long::Configure("bundling","require_order");
2719     my @ARGV_copy = @ARGV;
2720     my @ARGV_orig = @ARGV;
2721     # Check if there is a --profile to set @opt::profile
2722     get_options_from_array(\@ARGV_copy,"profile|J=s","plain") || die_usage();
2723     my @ARGV_profile = ();
2724     my @ARGV_env = ();
2725     if(not $opt::plain) {
2726         # Add options from $PARALLEL_HOME/config and other profiles
2727         my @config_profiles = (
2728             "/etc/parallel/config",
2729             (map { "$_/config" } @Global::config_dirs),
2730             $ENV{'HOME'}."/.parallelrc");
2731         my @profiles = @config_profiles;
2732         if(@opt::profile) {
2733             # --profile overrides default profiles
2734             @profiles = ();
2735             for my $profile (@opt::profile) {
2736                 if($profile =~ m:^\./|^/:) {
2737                     # Look for ./profile in .
2738                     # Look for /profile in /
2739                     push @profiles, grep { -r $_ } $profile;
2740                 } else {
2741                     # Look for the $profile in @Global::config_dirs
2742                     push @profiles, grep { -r $_ }
2743                     map { "$_/$profile" } @Global::config_dirs;
2744                 }
2745             }
2746         }
2747         for my $profile (@profiles) {
2748             if(-r $profile) {
2749                 ::debug("init","Read $profile\n");
2750                 local $/ = "\n";
2751                 open (my $in_fh, "<", $profile) ||
2752                     ::die_bug("read-profile: $profile");
2753                 while(<$in_fh>) {
2754                     /^\s*\#/ and next;
2755                     chomp;
2756                     push @ARGV_profile, shell_words($_);
2757                 }
2758                 close $in_fh;
2759             } else {
2760                 if(grep /^$profile$/, @config_profiles) {
2761                     # config file is not required to exist
2762                 } else {
2763                     ::error("$profile not readable.");
2764                     wait_and_exit(255);
2765                 }
2766             }
2767         }
2768         # Add options from shell variable $PARALLEL
2769         if($ENV{'PARALLEL'}) {
2770             push @ARGV_env, shell_words($ENV{'PARALLEL'});
2771         }
2772         # Add options from env_parallel.csh via $PARALLEL_CSH
2773         if($ENV{'PARALLEL_CSH'}) {
2774             push @ARGV_env, shell_words($ENV{'PARALLEL_CSH'});
2775         }
2776     }
2777     Getopt::Long::Configure("bundling","require_order");
2778     get_options_from_array(\@ARGV_profile) || die_usage();
2779     get_options_from_array(\@ARGV_env) || die_usage();
2780     get_options_from_array(\@ARGV) || die_usage();
2781     # What were the options given on the command line?
2782     # Used to start --sqlworker
2783     my $ai = arrayindex(\@ARGV_orig, \@ARGV);
2784     @Global::options_in_argv = @ARGV_orig[0..$ai-1];
2785     # Prepend non-options to @ARGV (such as commands like 'nice')
2786     unshift @ARGV, @ARGV_profile, @ARGV_env;
2787     return @ARGV;
2788 }
2789
2790 sub arrayindex() {
2791     # Similar to Perl's index function, but for arrays
2792     # Input:
2793     #   $arr_ref1 = ref to @array1 to search in
2794     #   $arr_ref2 = ref to @array2 to search for
2795     # Returns:
2796     #   $pos = position of @array1 in @array2, -1 if not found
2797     my ($arr_ref1,$arr_ref2) = @_;
2798     my $array1_as_string = join "", map { "\0".$_ } @$arr_ref1;
2799     my $array2_as_string = join "", map { "\0".$_ } @$arr_ref2;
2800     my $i = index($array1_as_string,$array2_as_string,0);
2801     if($i == -1) { return -1 }
2802     my @before = split /\0/, substr($array1_as_string,0,$i);
2803     return $#before;
2804 }
2805
2806 sub read_args_from_command_line() {
2807     # Arguments given on the command line after:
2808     #   ::: ($Global::arg_sep)
2809     #   :::: ($Global::arg_file_sep)
2810     #   :::+ ($Global::arg_sep with --link)
2811     #   ::::+ ($Global::arg_file_sep with --link)
2812     # Removes the arguments from @ARGV and:
2813     # - puts filenames into -a
2814     # - puts arguments into files and add the files to -a
2815     # - adds --linkinputsource with 0/1 for each -a depending on :::+/::::+
2816     # Input:
2817     #   @::ARGV = command option ::: arg arg arg :::: argfiles
2818     # Uses:
2819     #   $Global::arg_sep
2820     #   $Global::arg_file_sep
2821     #   $opt::internal_pipe_means_argfiles
2822     #   $opt::pipe
2823     #   @opt::a
2824     # Returns:
2825     #   @argv_no_argsep = @::ARGV without ::: and :::: and following args
2826     my @new_argv = ();
2827     for(my $arg = shift @ARGV; @ARGV; $arg = shift @ARGV) {
2828         if($arg eq $Global::arg_sep
2829            or
2830            $arg eq $Global::arg_sep."+"
2831            or
2832            $arg eq $Global::arg_file_sep
2833            or
2834            $arg eq $Global::arg_file_sep."+") {
2835             my $group_sep = $arg; # This group of arguments is args or argfiles
2836             my @group;
2837             while(defined ($arg = shift @ARGV)) {
2838                 if($arg eq $Global::arg_sep
2839                    or
2840                    $arg eq $Global::arg_sep."+"
2841                    or
2842                    $arg eq $Global::arg_file_sep
2843                    or
2844                    $arg eq $Global::arg_file_sep."+") {
2845                     # exit while loop if finding new separator
2846                     last;
2847                 } else {
2848                     # If not hitting ::: :::+ :::: or ::::+
2849                     # Append it to the group
2850                     push @group, $arg;
2851                 }
2852             }
2853             my $is_linked = ($group_sep =~ /\+$/) ? 1 : 0;
2854             my $is_file = ($group_sep eq $Global::arg_file_sep
2855                            or
2856                            $group_sep eq $Global::arg_file_sep."+");
2857             if($is_file) {
2858                 # :::: / ::::+
2859                 push @opt::linkinputsource, map { $is_linked } @group;
2860             } else {
2861                 # ::: / :::+
2862                 push @opt::linkinputsource, $is_linked;
2863             }
2864             if($is_file
2865                or ($opt::internal_pipe_means_argfiles and $opt::pipe)
2866                 ) {
2867                 # Group of file names on the command line.
2868                 # Append args into -a
2869                 push @opt::a, @group;
2870             } else {
2871                 # Group of arguments on the command line.
2872                 # Put them into a file.
2873                 # Create argfile
2874                 my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg");
2875                 unlink($name);
2876                 # Put args into argfile
2877                 print $outfh map { $_,$/ } @group;
2878                 seek $outfh, 0, 0;
2879                 exit_if_disk_full();
2880                 # Append filehandle to -a
2881                 push @opt::a, $outfh;
2882             }
2883             if(defined($arg)) {
2884                 # $arg is ::: :::+ :::: or ::::+
2885                 # so there is another group
2886                 redo;
2887             } else {
2888                 # $arg is undef -> @ARGV empty
2889                 last;
2890             }
2891         }
2892         push @new_argv, $arg;
2893     }
2894     # Output: @ARGV = command to run with options
2895     return @new_argv;
2896 }
2897
2898 sub cleanup() {
2899     # Returns: N/A
2900     unlink keys %Global::unlink;
2901     map { rmdir $_ } keys %Global::unlink;
2902     if(@opt::basefile and $opt::cleanup) { cleanup_basefile(); }
2903     for(keys %Global::sshmaster) {
2904         # If 'ssh -M's are running: kill them
2905         kill "TERM", $_;
2906     }
2907 }
2908
2909
2910 sub __QUOTING_ARGUMENTS_FOR_SHELL__() {}
2911
2912 sub shell_quote(@) {
2913     # Input:
2914     #   @strings = strings to be quoted
2915     # Returns:
2916     #   @shell_quoted_strings = string quoted as needed by the shell
2917     return wantarray ? (map { Q($_) } @_) : (join" ",map { Q($_) } @_);
2918 }
2919
2920 sub shell_quote_scalar_rc($) {
2921     # Quote for the rc-shell
2922     my $a = $_[0];
2923     if(defined $a) {
2924         if(($a =~ s/'/''/g)
2925            +
2926            ($a =~ s/[\n\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]+/'$&'/go)) {
2927             # A string was replaced
2928             # No need to test for "" or \0
2929         } elsif($a eq "") {
2930             $a = "''";
2931         } elsif($a eq "\0") {
2932             $a = "";
2933         }
2934     }
2935     return $a;
2936 }
2937
2938 sub shell_quote_scalar_csh($) {
2939     # Quote for (t)csh
2940     my $a = $_[0];
2941     if(defined $a) {
2942         # $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g;
2943         # This is 1% faster than the above
2944         if(($a =~ s/[\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]/\\$&/go)
2945            +
2946            # quote newline in csh as \\\n
2947            ($a =~ s/[\n]/"\\\n"/go)) {
2948             # A string was replaced
2949             # No need to test for "" or \0
2950         } elsif($a eq "") {
2951             $a = "''";
2952         } elsif($a eq "\0") {
2953             $a = "";
2954         }
2955     }
2956     return $a;
2957 }
2958
2959 sub shell_quote_scalar_default($) {
2960     # Quote for other shells (Bourne compatibles)
2961     # Inputs:
2962     #   $string = string to be quoted
2963     # Returns:
2964     #   $shell_quoted = string quoted as needed by the shell
2965     my $s = $_[0];
2966     if($s =~ /[^-_.+a-z0-9\/]/i) {
2967         $s =~ s/'/'"'"'/g; # "-quote single quotes
2968         $s = "'$s'";       # '-quote entire string
2969         $s =~ s/^''//;     # Remove unneeded '' at ends
2970         $s =~ s/''$//;     # (faster than s/^''|''$//g)
2971         return $s;
2972     } elsif ($s eq "") {
2973         return "''";
2974     } else {
2975         # No quoting needed
2976         return $s;
2977     }
2978 }
2979
2980 sub shell_quote_scalar($) {
2981     # Quote the string so the shell will not expand any special chars
2982     # Inputs:
2983     #   $string = string to be quoted
2984     # Returns:
2985     #   $shell_quoted = string quoted as needed by the shell
2986
2987     # Speed optimization: Choose the correct shell_quote_scalar_*
2988     # and call that directly from now on
2989     no warnings 'redefine';
2990     if($Global::cshell) {
2991         # (t)csh
2992         *shell_quote_scalar = \&shell_quote_scalar_csh;
2993     } elsif($Global::shell =~ m:(^|/)rc$:) {
2994         # rc-shell
2995         *shell_quote_scalar = \&shell_quote_scalar_rc;
2996     } else {
2997         # other shells
2998         *shell_quote_scalar = \&shell_quote_scalar_default;
2999     }
3000     # The sub is now redefined. Call it
3001     return shell_quote_scalar($_[0]);
3002 }
3003
3004 sub Q($) {
3005     # Q alias for ::shell_quote_scalar
3006     my $ret = shell_quote_scalar($_[0]);
3007     no warnings 'redefine';
3008     *Q = \&::shell_quote_scalar;
3009     return $ret;
3010 }
3011
3012 sub shell_quote_file($) {
3013     # Quote the string so shell will not expand any special chars
3014     # and prepend ./ if needed
3015     # Input:
3016     #   $filename = filename to be shell quoted
3017     # Returns:
3018     #   $quoted_filename = filename quoted with \ and ./ if needed
3019     my $a = shift;
3020     if(defined $a) {
3021         if($a =~ m:^/: or $a =~ m:^\./:) {
3022             # /abs/path or ./rel/path => skip
3023         } else {
3024             # rel/path => ./rel/path
3025             $a = "./".$a;
3026         }
3027     }
3028     return Q($a);
3029 }
3030
3031 sub shell_words(@) {
3032     # Input:
3033     #   $string = shell line
3034     # Returns:
3035     #   @shell_words = $string split into words as shell would do
3036     $Global::use{"Text::ParseWords"} ||= eval "use Text::ParseWords; 1;";
3037     return Text::ParseWords::shellwords(@_);
3038 }
3039
3040 sub perl_quote_scalar($) {
3041     # Quote the string so perl's eval will not expand any special chars
3042     # Inputs:
3043     #   $string = string to be quoted
3044     # Returns:
3045     #   $perl_quoted = string quoted with \ as needed by perl's eval
3046     my $a = $_[0];
3047     if(defined $a) {
3048         $a =~ s/[\\\"\$\@]/\\$&/go;
3049     }
3050     return $a;
3051 }
3052
3053 # -w complains about prototype
3054 sub pQ($) {
3055     # pQ alias for ::perl_quote_scalar
3056     my $ret = perl_quote_scalar($_[0]);
3057     *pQ = \&::perl_quote_scalar;
3058     return $ret;
3059 }
3060
3061 sub unquote_printf() {
3062     # Convert \t \n \r \000 \0
3063     # Inputs:
3064     #   $string = string with \t \n \r \num \0
3065     # Returns:
3066     #   $replaced = string with TAB NEWLINE CR <ascii-num> NUL
3067     $_ = shift;
3068     s/\\t/\t/g;
3069     s/\\n/\n/g;
3070     s/\\r/\r/g;
3071     s/\\(\d\d\d)/eval 'sprintf "\\'.$1.'"'/ge;
3072     s/\\(\d)/eval 'sprintf "\\'.$1.'"'/ge;
3073     return $_;
3074 }
3075
3076
3077 sub __FILEHANDLES__() {}
3078
3079
3080 sub save_stdin_stdout_stderr() {
3081     # Remember the original STDIN, STDOUT and STDERR
3082     # and file descriptors opened by the shell (e.g. 3>/tmp/foo)
3083     # Uses:
3084     #   %Global::fd
3085     #   $Global::original_stderr
3086     #   $Global::original_stdin
3087     # Returns: N/A
3088
3089     # TODO Disabled until we have an open3 that will take n filehandles
3090     # for my $fdno (1..61) {
3091     # # /dev/fd/62 and above are used by bash for <(cmd)
3092     # # Find file descriptors that are already opened (by the shell)
3093     # Only focus on stdout+stderr for now
3094     for my $fdno (1..2) {
3095         my $fh;
3096         # 2-argument-open is used to be compatible with old perl 5.8.0
3097         # bug #43570: Perl 5.8.0 creates 61 files
3098         if(open($fh,">&=$fdno")) {
3099             $Global::fd{$fdno}=$fh;
3100         }
3101     }
3102     open $Global::original_stderr, ">&", "STDERR" or
3103         ::die_bug("Can't dup STDERR: $!");
3104     open $Global::status_fd, ">&", "STDERR" or
3105         ::die_bug("Can't dup STDERR: $!");
3106     open $Global::original_stdin, "<&", "STDIN" or
3107         ::die_bug("Can't dup STDIN: $!");
3108 }
3109
3110 sub enough_file_handles() {
3111     # Check that we have enough filehandles available for starting
3112     # another job
3113     # Uses:
3114     #   $opt::ungroup
3115     #   %Global::fd
3116     # Returns:
3117     #   1 if ungrouped (thus not needing extra filehandles)
3118     #   0 if too few filehandles
3119     #   1 if enough filehandles
3120     if(not $opt::ungroup) {
3121         my %fh;
3122         my $enough_filehandles = 1;
3123         # perl uses 7 filehandles for something?
3124         # open3 uses 2 extra filehandles temporarily
3125         # We need a filehandle for each redirected file descriptor
3126         # (normally just STDOUT and STDERR)
3127         for my $i (1..(7+2+keys %Global::fd)) {
3128             $enough_filehandles &&= open($fh{$i}, "<", "/dev/null");
3129         }
3130         for (values %fh) { close $_; }
3131         return $enough_filehandles;
3132     } else {
3133         # Ungrouped does not need extra file handles
3134         return 1;
3135     }
3136 }
3137
3138 sub open_or_exit($) {
3139     # Open a file name or exit if the file cannot be opened
3140     # Inputs:
3141     #   $file = filehandle or filename to open
3142     # Uses:
3143     #   $Global::original_stdin
3144     # Returns:
3145     #   $fh = file handle to read-opened file
3146     my $file = shift;
3147     if($file eq "-") {
3148         return ($Global::original_stdin || *STDIN);
3149     }
3150     if(ref $file eq "GLOB") {
3151         # This is an open filehandle
3152         return $file;
3153     }
3154     my $fh = gensym;
3155     if(not open($fh, "<", $file)) {
3156         ::error("Cannot open input file `$file': No such file or directory.");
3157         wait_and_exit(255);
3158     }
3159     return $fh;
3160 }
3161
3162 sub set_fh_blocking($) {
3163     # Set filehandle as blocking
3164     # Inputs:
3165     #   $fh = filehandle to be blocking
3166     # Returns:
3167     #   N/A
3168     my $fh = shift;
3169     $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
3170     my $flags;
3171     # Get the current flags on the filehandle
3172     fcntl($fh, &F_GETFL, $flags) || die $!;
3173     # Remove non-blocking from the flags
3174     $flags &= ~&O_NONBLOCK;
3175     # Set the flags on the filehandle
3176     fcntl($fh, &F_SETFL, $flags) || die $!;
3177 }
3178
3179 sub set_fh_non_blocking($) {
3180     # Set filehandle as non-blocking
3181     # Inputs:
3182     #   $fh = filehandle to be blocking
3183     # Returns:
3184     #   N/A
3185     my $fh = shift;
3186     $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
3187     my $flags;
3188     # Get the current flags on the filehandle
3189     fcntl($fh, &F_GETFL, $flags) || die $!;
3190     # Add non-blocking to the flags
3191     $flags |= &O_NONBLOCK;
3192     # Set the flags on the filehandle
3193     fcntl($fh, &F_SETFL, $flags) || die $!;
3194 }
3195
3196
3197 sub __RUNNING_THE_JOBS_AND_PRINTING_PROGRESS__() {}
3198
3199
3200 # Variable structure:
3201 #
3202 #    $Global::running{$pid} = Pointer to Job-object
3203 #    @Global::virgin_jobs = Pointer to Job-object that have received no input
3204 #    $Global::host{$sshlogin} = Pointer to SSHLogin-object
3205 #    $Global::total_running = total number of running jobs
3206 #    $Global::total_started = total jobs started
3207 #    $Global::max_procs_file = filename if --jobs is given a filename
3208 #    $Global::JobQueue = JobQueue object for the queue of jobs
3209 #    $Global::timeoutq = queue of times where jobs timeout
3210 #    $Global::newest_job = Job object of the most recent job started
3211 #    $Global::newest_starttime = timestamp of $Global::newest_job
3212 #    @Global::sshlogin
3213 #    $Global::minimal_command_line_length = minimum length supported by all sshlogins
3214 #    $Global::start_no_new_jobs = should more jobs be started?
3215 #    $Global::original_stderr = file handle for STDERR when the program started
3216 #    $Global::total_started = total number of jobs started
3217 #    $Global::joblog = filehandle of joblog
3218 #    $Global::debug = Is debugging on?
3219 #    $Global::exitstatus = status code of GNU   Parallel
3220 #    $Global::quoting = quote the command to run
3221
3222 sub init_run_jobs() {
3223     # Set Global variables and progress signal handlers
3224     # Do the copying of basefiles
3225     # Returns: N/A
3226     $Global::total_running = 0;
3227     $Global::total_started = 0;
3228     $SIG{USR1} = \&list_running_jobs;
3229     $SIG{USR2} = \&toggle_progress;
3230     if(@opt::basefile) { setup_basefile(); }
3231 }
3232
3233 {
3234     my $last_time;
3235     my %last_mtime;
3236     my $max_procs_file_last_mod;
3237
3238     sub changed_procs_file {
3239         # If --jobs is a file and it is modfied:
3240         # Force recomputing of max_jobs_running for each $sshlogin
3241         # Uses:
3242         #   $Global::max_procs_file
3243         #   %Global::host
3244         # Returns: N/A
3245         if($Global::max_procs_file) {
3246             # --jobs filename
3247             my $mtime = (stat($Global::max_procs_file))[9];
3248             $max_procs_file_last_mod ||= 0;
3249             if($mtime > $max_procs_file_last_mod) {
3250                 # file changed: Force re-computing max_jobs_running
3251                 $max_procs_file_last_mod = $mtime;
3252                 for my $sshlogin (values %Global::host) {
3253                     $sshlogin->set_max_jobs_running(undef);
3254                 }
3255             }
3256         }
3257     }
3258
3259     sub changed_sshloginfile {
3260         # If --slf is changed:
3261         #   reload --slf
3262         #   filter_hosts
3263         #   setup_basefile
3264         # Uses:
3265         #   @opt::sshloginfile
3266         #   @Global::sshlogin
3267         #   %Global::host
3268         #   $opt::filter_hosts
3269         # Returns: N/A
3270         if(@opt::sshloginfile) {
3271             # Is --sshloginfile changed?
3272             for my $slf (@opt::sshloginfile) {
3273                 my $actual_file = expand_slf_shorthand($slf);
3274                 my $mtime = (stat($actual_file))[9];
3275                 $last_mtime{$actual_file} ||= $mtime;
3276                 if($mtime - $last_mtime{$actual_file} > 1) {
3277                     ::debug("run","--sshloginfile $actual_file changed. reload\n");
3278                     $last_mtime{$actual_file} = $mtime;
3279                     # Reload $slf
3280                     # Empty sshlogins
3281                     @Global::sshlogin = ();
3282                     for (values %Global::host) {
3283                         # Don't start new jobs on any host
3284                         # except the ones added back later
3285                         $_->set_max_jobs_running(0);
3286                     }
3287                     # This will set max_jobs_running on the SSHlogins
3288                     read_sshloginfile($actual_file);
3289                     parse_sshlogin();
3290                     $opt::filter_hosts and filter_hosts();
3291                     setup_basefile();
3292                 }
3293             }
3294         }
3295     }
3296
3297     sub start_more_jobs {
3298         # Run start_another_job() but only if:
3299         #   * not $Global::start_no_new_jobs set
3300         #   * not JobQueue is empty
3301         #   * not load on server is too high
3302         #   * not server swapping
3303         #   * not too short time since last remote login
3304         # Uses:
3305         #   %Global::host
3306         #   $Global::start_no_new_jobs
3307         #   $Global::JobQueue
3308         #   $opt::pipe
3309         #   $opt::load
3310         #   $opt::noswap
3311         #   $opt::delay
3312         #   $Global::newest_starttime
3313         # Returns:
3314         #   $jobs_started = number of jobs started
3315         my $jobs_started = 0;
3316         if($Global::start_no_new_jobs) {
3317             return $jobs_started;
3318         }
3319         if(time - ($last_time||0) > 1) {
3320             # At most do this every second
3321             $last_time = time;
3322             changed_procs_file();
3323             changed_sshloginfile();
3324         }
3325         # This will start 1 job on each --sshlogin (if possible)
3326         # thus distribute the jobs on the --sshlogins round robin
3327         for my $sshlogin (values %Global::host) {
3328             if($Global::JobQueue->empty() and not $opt::pipe) {
3329                 # No more jobs in the queue
3330                 last;
3331             }
3332             debug("run", "Running jobs before on ", $sshlogin->string(), ": ",
3333                   $sshlogin->jobs_running(), "\n");
3334             if ($sshlogin->jobs_running() < $sshlogin->max_jobs_running()) {
3335                 if($opt::delay
3336                    and
3337                    $opt::delay - 0.008 > ::now() - $Global::newest_starttime) {
3338                     # It has been too short since last start
3339                     next;
3340                 }
3341                 if($opt::load and $sshlogin->loadavg_too_high()) {
3342                     # The load is too high or unknown
3343                     next;
3344                 }
3345                 if($opt::noswap and $sshlogin->swapping()) {
3346                     # The server is swapping
3347                     next;
3348                 }
3349                 if($opt::limit and $sshlogin->limit()) {
3350                     # Over limit
3351                     next;
3352                 }
3353                 if(($opt::memfree or $opt::memsuspend)
3354                    and
3355                    $sshlogin->memfree() < $Global::memlimit) {
3356                     # The server has not enough mem free
3357                     ::debug("mem", "Not starting job: not enough mem\n");
3358                     next;
3359                 }
3360                 if($sshlogin->too_fast_remote_login()) {
3361                     # It has been too short since
3362                     next;
3363                 }
3364                 debug("run", $sshlogin->string(),
3365                       " has ", $sshlogin->jobs_running(),
3366                       " out of ", $sshlogin->max_jobs_running(),
3367                       " jobs running. Start another.\n");
3368                 if(start_another_job($sshlogin) == 0) {
3369                     # No more jobs to start on this $sshlogin
3370                     debug("run","No jobs started on ",
3371                           $sshlogin->string(), "\n");
3372                     next;
3373                 }
3374                 $sshlogin->inc_jobs_running();
3375                 $sshlogin->set_last_login_at(::now());
3376                 $jobs_started++;
3377             }
3378             debug("run","Running jobs after on ", $sshlogin->string(), ": ",
3379                   $sshlogin->jobs_running(), " of ",
3380                   $sshlogin->max_jobs_running(), "\n");
3381         }
3382
3383         return $jobs_started;
3384     }
3385 }
3386
3387 {
3388     my $no_more_file_handles_warned;
3389
3390     sub start_another_job() {
3391         # If there are enough filehandles
3392         #   and JobQueue not empty
3393         #   and not $job is in joblog
3394         # Then grab a job from Global::JobQueue,
3395         #   start it at sshlogin
3396         #   mark it as virgin_job
3397         # Inputs:
3398         #   $sshlogin = the SSHLogin to start the job on
3399         # Uses:
3400         #   $Global::JobQueue
3401         #   $opt::pipe
3402         #   $opt::results
3403         #   $opt::resume
3404         #   @Global::virgin_jobs
3405         # Returns:
3406         #   1 if another jobs was started
3407         #   0 otherwise
3408         my $sshlogin = shift;
3409         # Do we have enough file handles to start another job?
3410         if(enough_file_handles()) {
3411             if($Global::JobQueue->empty() and not $opt::pipe) {
3412                 # No more commands to run
3413                 debug("start", "Not starting: JobQueue empty\n");
3414                 return 0;
3415             } else {
3416                 my $job;
3417                 # Skip jobs already in job log
3418                 # Skip jobs already in results
3419                 do {
3420                     $job = get_job_with_sshlogin($sshlogin);
3421                     if(not defined $job) {
3422                         # No command available for that sshlogin
3423                         debug("start", "Not starting: no jobs available for ",
3424                               $sshlogin->string(), "\n");
3425                         return 0;
3426                     }
3427                     if($job->is_already_in_joblog()) {
3428                         $job->free_slot();
3429                     }
3430                 } while ($job->is_already_in_joblog()
3431                          or
3432                          ($opt::results and $opt::resume and $job->is_already_in_results()));
3433                 debug("start", "Command to run on '", $job->sshlogin()->string(), "': '",
3434                       $job->replaced(),"'\n");
3435                 if($job->start()) {
3436                     if($opt::pipe) {
3437                         if($job->virgin()) {
3438                             push(@Global::virgin_jobs,$job);
3439                         } else {
3440                             # Block already set: This is a retry
3441                             if(fork()) {
3442                                 ::debug("pipe","\n\nWriting ",length ${$job->block_ref()},
3443                                         " to ", $job->seq(),"\n");
3444                                 close $job->fh(0,"w");
3445                             } else {
3446                                 $job->write($job->block_ref());
3447                                 close $job->fh(0,"w");
3448                                 exit(0);
3449                             }
3450                         }
3451                     }
3452                     debug("start", "Started as seq ", $job->seq(),
3453                           " pid:", $job->pid(), "\n");
3454                     return 1;
3455                 } else {
3456                     # Not enough processes to run the job.
3457                     # Put it back on the queue.
3458                     $Global::JobQueue->unget($job);
3459                     # Count down the number of jobs to run for this SSHLogin.
3460                     my $max = $sshlogin->max_jobs_running();
3461                     if($max > 1) { $max--; } else {
3462                         my @arg;
3463                         for my $record (@{$job->{'commandline'}->{'arg_list'}}) {
3464                             push @arg, map { $_->orig() } @$record;
3465                         }
3466                         ::error("No more processes: cannot run a single job. Something is wrong at @arg.");
3467                         ::wait_and_exit(255);
3468                     }
3469                     $sshlogin->set_max_jobs_running($max);
3470                     # Sleep up to 300 ms to give other processes time to die
3471                     ::usleep(rand()*300);
3472                     ::warning("No more processes: ".
3473                               "Decreasing number of running jobs to $max.",
3474                               "Raising ulimit -u or /etc/security/limits.conf may help.");
3475                     return 0;
3476                 }
3477             }
3478         } else {
3479             # No more file handles
3480             $no_more_file_handles_warned++ or
3481                 ::warning("No more file handles. ",
3482                           "Raising ulimit -n or /etc/security/limits.conf may help.");
3483             debug("start", "No more file handles. ");
3484             return 0;
3485         }
3486     }
3487 }
3488
3489 sub init_progress() {
3490     # Uses:
3491     #   $opt::bar
3492     # Returns:
3493     #   list of computers for progress output
3494     $|=1;
3495     if($opt::bar) {
3496         return("","");
3497     }
3498     my %progress = progress();
3499     return ("\nComputers / CPU cores / Max jobs to run\n",
3500             $progress{'workerlist'});
3501 }
3502
3503 sub drain_job_queue(@) {
3504     # Uses:
3505     #   $opt::progress
3506     #   $Global::total_running
3507     #   $Global::max_jobs_running
3508     #   %Global::running
3509     #   $Global::JobQueue
3510     #   %Global::host
3511     #   $Global::start_no_new_jobs
3512     # Returns: N/A
3513     my @command = @_;
3514     if($opt::progress) {
3515         ::status_no_nl(init_progress());
3516     }
3517     my $last_header = "";
3518     my $sleep = 0.2;
3519     do {
3520         while($Global::total_running > 0) {
3521             debug("init",$Global::total_running, "==", scalar
3522                   keys %Global::running," slots: ", $Global::max_jobs_running);
3523             if($opt::pipe) {
3524                 # When using --pipe sometimes file handles are not
3525                 # closed properly
3526                 for my $job (values %Global::running) {
3527                     close $job->fh(0,"w");
3528                 }
3529             }
3530             if($opt::progress) {
3531                 my %progress = progress();
3532                 if($last_header ne $progress{'header'}) {
3533                     ::status("", $progress{'header'});
3534                     $last_header = $progress{'header'};
3535                 }
3536                 ::status_no_nl("\r",$progress{'status'});
3537             }
3538             if($Global::total_running < $Global::max_jobs_running
3539                and not $Global::JobQueue->empty()) {
3540                 # These jobs may not be started because of loadavg
3541                 # or too little time between each ssh login.
3542                 if(start_more_jobs() > 0) {
3543                     # Exponential back-on if jobs were started
3544                     $sleep = $sleep/2+0.001;
3545                 }
3546             }
3547             # Exponential back-off sleeping
3548             $sleep = ::reap_usleep($sleep);
3549         }
3550         if(not $Global::JobQueue->empty()) {
3551             # These jobs may not be started:
3552             # * because there the --filter-hosts has removed all
3553             if(not %Global::host) {
3554                 ::error("There are no hosts left to run on.");
3555                 ::wait_and_exit(255);
3556             }
3557             # * because of loadavg
3558             # * because of too little time between each ssh login.
3559             $sleep = ::reap_usleep($sleep);
3560             start_more_jobs();
3561             if($Global::max_jobs_running == 0) {
3562                 ::warning("There are no job slots available. Increase --jobs.");
3563             }
3564         }
3565         while($opt::sqlmaster and not $Global::sql->finished()) {
3566             # SQL master
3567             $sleep = ::reap_usleep($sleep);
3568             start_more_jobs();
3569             if($Global::start_sqlworker) {
3570                 # Start an SQL worker as we are now sure there is work to do
3571                 $Global::start_sqlworker = 0;
3572                 if(my $pid = fork()) {
3573                     $Global::unkilled_sqlworker = $pid;
3574                 } else {
3575                     # Replace --sql/--sqlandworker with --sqlworker
3576                     my @ARGV = map { s/^--sql(andworker)?$/--sqlworker/; $_ } @Global::options_in_argv;
3577                     # exec the --sqlworker
3578                     exec($0,@ARGV,@command);
3579                 }
3580             }
3581         }
3582     } while ($Global::total_running > 0
3583              or
3584              not $Global::start_no_new_jobs and not $Global::JobQueue->empty()
3585              or
3586              $opt::sqlmaster and not $Global::sql->finished());
3587     if($opt::progress) {
3588         my %progress = progress();
3589         ::status("\r".$progress{'status'});
3590     }
3591 }
3592
3593 sub toggle_progress() {
3594     # Turn on/off progress view
3595     # Uses:
3596     #   $opt::progress
3597     # Returns: N/A
3598     $opt::progress = not $opt::progress;
3599     if($opt::progress) {
3600         ::status_no_nl(init_progress());
3601     }
3602 }
3603
3604 sub progress() {
3605     # Uses:
3606     #   $opt::bar
3607     #   $opt::eta
3608     #   %Global::host
3609     #   $Global::total_started
3610     # Returns:
3611     #   $workerlist = list of workers
3612     #   $header = that will fit on the screen
3613     #   $status = message that will fit on the screen
3614     if($opt::bar) {
3615         return ("workerlist" => "", "header" => "", "status" => bar());
3616     }
3617     my $eta = "";
3618     my ($status,$header)=("","");
3619     if($opt::eta) {
3620         my($total, $completed, $left, $pctcomplete, $avgtime, $this_eta) =
3621             compute_eta();
3622         $eta = sprintf("ETA: %ds Left: %d AVG: %.2fs  ",
3623                        $this_eta, $left, $avgtime);
3624     }
3625     my $termcols = terminal_columns();
3626     my @workers = sort keys %Global::host;
3627     my %sshlogin = map { $_ eq ":" ? ($_ => "local") : ($_ => $_) } @workers;
3628     my $workerno = 1;
3629     my %workerno = map { ($_=>$workerno++) } @workers;
3630     my $workerlist = "";
3631     for my $w (@workers) {
3632         $workerlist .=
3633         $workerno{$w}.":".$sshlogin{$w} ." / ".
3634             ($Global::host{$w}->ncpus() || "-")." / ".
3635             $Global::host{$w}->max_jobs_running()."\n";
3636     }
3637     $status = "x"x($termcols+1);
3638     # Select an output format that will fit on a single line
3639     if(length $status > $termcols) {
3640         # sshlogin1:XX/XX/XX%/XX.Xs sshlogin2:XX/XX/XX%/XX.Xs sshlogin3:XX/XX/XX%/XX.Xs
3641         $header = "Computer:jobs running/jobs completed/%of started jobs/Average seconds to complete";
3642         $status = $eta .
3643             join(" ",map
3644                  {
3645                      if($Global::total_started) {
3646                          my $completed = ($Global::host{$_}->jobs_completed()||0);
3647                          my $running = $Global::host{$_}->jobs_running();
3648                          my $time = $completed ? (time-$^T)/($completed) : "0";
3649                          sprintf("%s:%d/%d/%d%%/%.1fs ",
3650                                  $sshlogin{$_}, $running, $completed,
3651                                  ($running+$completed)*100
3652                                  / $Global::total_started, $time);
3653                      }
3654                  } @workers);
3655     }
3656     if(length $status > $termcols) {
3657         # 1:XX/XX/XX%/XX.Xs 2:XX/XX/XX%/XX.Xs 3:XX/XX/XX%/XX.Xs 4:XX/XX/XX%/XX.Xs
3658         $header = "Computer:jobs running/jobs completed/%of started jobs";
3659         $status = $eta .
3660             join(" ",map
3661                  {
3662                      if($Global::total_started) {
3663                          my $completed = ($Global::host{$_}->jobs_completed()||0);
3664                          my $running = $Global::host{$_}->jobs_running();
3665                          my $time = $completed ? (time-$^T)/($completed) : "0";
3666                          sprintf("%s:%d/%d/%d%%/%.1fs ",
3667                                  $workerno{$_}, $running, $completed,
3668                                  ($running+$completed)*100
3669                                  / $Global::total_started, $time);
3670                      }
3671                  } @workers);
3672     }
3673     if(length $status > $termcols) {
3674         # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX/XX%
3675         $header = "Computer:jobs running/jobs completed/%of started jobs";
3676         $status = $eta .
3677             join(" ",map
3678                  {
3679                      if($Global::total_started) {
3680                          sprintf("%s:%d/%d/%d%%",
3681                                  $sshlogin{$_},
3682                                  $Global::host{$_}->jobs_running(),
3683                                  ($Global::host{$_}->jobs_completed()||0),
3684                                  ($Global::host{$_}->jobs_running()+
3685                                   ($Global::host{$_}->jobs_completed()||0))*100
3686                                  / $Global::total_started)
3687                      }
3688                  }
3689                  @workers);
3690     }
3691     if(length $status > $termcols) {
3692         # 1:XX/XX/XX% 2:XX/XX/XX% 3:XX/XX/XX% 4:XX/XX/XX% 5:XX/XX/XX% 6:XX/XX/XX%
3693         $header = "Computer:jobs running/jobs completed/%of started jobs";
3694         $status = $eta .
3695             join(" ",map
3696                  {
3697                      if($Global::total_started) {
3698                          sprintf("%s:%d/%d/%d%%",
3699                                  $workerno{$_},
3700                                  $Global::host{$_}->jobs_running(),
3701                                  ($Global::host{$_}->jobs_completed()||0),
3702                                  ($Global::host{$_}->jobs_running()+
3703                                   ($Global::host{$_}->jobs_completed()||0))*100
3704                                  / $Global::total_started)
3705                      }
3706                  }
3707                  @workers);
3708     }
3709     if(length $status > $termcols) {
3710         # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX sshlogin4:XX/XX
3711         $header = "Computer:jobs running/jobs completed";
3712         $status = $eta .
3713             join(" ",map
3714                        { sprintf("%s:%d/%d",
3715                                  $sshlogin{$_}, $Global::host{$_}->jobs_running(),
3716                                  ($Global::host{$_}->jobs_completed()||0)) }
3717                        @workers);
3718     }
3719     if(length $status > $termcols) {
3720         # sshlogin1:XX/XX sshlogin2:XX/XX sshlogin3:XX/XX sshlogin4:XX/XX
3721         $header = "Computer:jobs running/jobs completed";
3722         $status = $eta .
3723             join(" ",map
3724                        { sprintf("%s:%d/%d",
3725                                  $sshlogin{$_}, $Global::host{$_}->jobs_running(),
3726                                  ($Global::host{$_}->jobs_completed()||0)) }
3727                        @workers);
3728     }
3729     if(length $status > $termcols) {
3730         # 1:XX/XX 2:XX/XX 3:XX/XX 4:XX/XX 5:XX/XX 6:XX/XX
3731         $header = "Computer:jobs running/jobs completed";
3732         $status = $eta .
3733             join(" ",map
3734                        { sprintf("%s:%d/%d",
3735                                  $workerno{$_}, $Global::host{$_}->jobs_running(),
3736                                  ($Global::host{$_}->jobs_completed()||0)) }
3737                        @workers);
3738     }
3739     if(length $status > $termcols) {
3740         # sshlogin1:XX sshlogin2:XX sshlogin3:XX sshlogin4:XX sshlogin5:XX
3741         $header = "Computer:jobs completed";
3742         $status = $eta .
3743             join(" ",map
3744                        { sprintf("%s:%d",
3745                                  $sshlogin{$_},
3746                                  ($Global::host{$_}->jobs_completed()||0)) }
3747                        @workers);
3748     }
3749     if(length $status > $termcols) {
3750         # 1:XX 2:XX 3:XX 4:XX 5:XX 6:XX
3751         $header = "Computer:jobs completed";
3752         $status = $eta .
3753             join(" ",map
3754                        { sprintf("%s:%d",
3755                                  $workerno{$_},
3756                                  ($Global::host{$_}->jobs_completed()||0)) }
3757                        @workers);
3758     }
3759     return ("workerlist" => $workerlist, "header" => $header, "status" => $status);
3760 }
3761
3762 {
3763
3764     my ($first_completed, $smoothed_avg_time, $last_eta);
3765
3766     sub compute_eta {
3767         # Calculate important numbers for ETA
3768         # Returns:
3769         #   $total = number of jobs in total
3770         #   $completed = number of jobs completed
3771         #   $left = number of jobs left
3772         #   $pctcomplete = percent of jobs completed
3773         #   $avgtime = averaged time
3774         #   $eta = smoothed eta
3775         my $completed = $Global::total_completed;
3776         # In rare cases with -X will $completed > total_jobs()
3777         my $total = ::max($Global::JobQueue->total_jobs(),$completed);
3778         my $left = $total - $completed;
3779         if(not $completed) {
3780             return($total, $completed, $left, 0, 0, 0);
3781         }
3782         my $pctcomplete = ::min($completed / $total,100);
3783         $first_completed ||= time;
3784         my $timepassed = (time - $first_completed);
3785         my $avgtime = $timepassed / $completed;
3786         $smoothed_avg_time ||= $avgtime;
3787         # Smooth the eta so it does not jump wildly
3788         $smoothed_avg_time = (1 - $pctcomplete) * $smoothed_avg_time +
3789             $pctcomplete * $avgtime;
3790         my $eta = int($left * $smoothed_avg_time);
3791         if($eta*0.90 < $last_eta and $last_eta < $eta) {
3792             # Eta jumped less that 10% up: Keep the last eta instead
3793             $eta = $last_eta;
3794         } else {
3795             $last_eta = $eta;
3796         }
3797         return($total, $completed, $left, $pctcomplete, $avgtime, $eta);
3798     }
3799 }
3800
3801 {
3802     my ($rev,$reset);
3803
3804     sub bar() {
3805         # Return:
3806         #   $status = bar with eta, completed jobs, arg and pct
3807         $rev ||= "\033[7m";
3808         $reset ||= "\033[0m";
3809         my($total, $completed, $left, $pctcomplete, $avgtime, $eta) =
3810             compute_eta();
3811         my $arg = $Global::newest_job ?
3812             $Global::newest_job->{'commandline'}->
3813             replace_placeholders(["\257<\257>"],0,0) : "";
3814         # These chars mess up display in the terminal
3815         $arg =~ tr/[\011-\016\033\302-\365]//d;
3816         my $eta_dhms = ::seconds_to_time_units($eta);
3817         my $bar_text =
3818             sprintf("%d%% %d:%d=%s %s",
3819                     $pctcomplete*100, $completed, $left, $eta_dhms, $arg);
3820         my $terminal_width = terminal_columns();
3821         my $s = sprintf("%-${terminal_width}s",
3822                         substr($bar_text." "x$terminal_width,
3823                                0,$terminal_width));
3824         my $width = int($terminal_width * $pctcomplete);
3825         substr($s,$width,0) = $reset;
3826         my $zenity = sprintf("%-${terminal_width}s",
3827                              substr("#   $eta sec $arg",
3828                                     0,$terminal_width));
3829         $s = "\r" . $zenity . "\r" . $pctcomplete*100 . # Prefix with zenity header
3830             "\r" . $rev . $s . $reset;
3831         return $s;
3832     }
3833 }
3834
3835 {
3836     my ($columns,$last_column_time);
3837
3838     sub terminal_columns() {
3839         # Get the number of columns of the terminal.
3840         # Only update once per second.
3841         # Returns:
3842         #   number of columns of the screen
3843         if(not $columns or $last_column_time < time) {
3844             $last_column_time = time;
3845             $columns = $ENV{'COLUMNS'};
3846             if(not $columns) {
3847                 # && true is to force spawning a shell and not just exec'ing
3848                 my $stty = qx{stty -a </dev/tty 2>/dev/null && true};
3849                 # FreeBSD/OpenBSD/NetBSD/Dragonfly/MirOS
3850                 # MacOSX/IRIX/AIX/Tru64
3851                 $stty =~ /(\d+) columns/ and do { $columns = $1; };
3852                 # GNU/Linux/Solaris
3853                 $stty =~ /columns (\d+)/ and do { $columns = $1; };
3854                 # Solaris-x86/HPUX/SCOsysV/UnixWare/OpenIndiana
3855                 $stty =~ /columns = (\d+)/ and do { $columns = $1; };
3856                 # QNX
3857                 $stty =~ /rows=\d+,(\d+)/ and do { $columns = $1; };
3858             }
3859             if(not $columns) {
3860                 # && true is to force spawning a shell and not just exec'ing
3861                 my $resize = qx{resize 2>/dev/null && true};
3862                 $resize =~ /COLUMNS=(\d+);/ and do { $columns = $1; };
3863             }
3864             $columns ||= 80;
3865         }
3866         return $columns;
3867     }
3868 }
3869
3870 # Prototype forwarding
3871 sub get_job_with_sshlogin($);
3872 sub get_job_with_sshlogin($) {
3873     # Input:
3874     #   $sshlogin = which host should the job be run on?
3875     # Uses:
3876     #   $opt::hostgroups
3877     #   $Global::JobQueue
3878     # Returns:
3879     #   $job = next job object for $sshlogin if any available
3880     my $sshlogin = shift;
3881     my $job;
3882
3883     if ($opt::hostgroups) {
3884         my @other_hostgroup_jobs = ();
3885
3886         while($job = $Global::JobQueue->get()) {
3887             if($sshlogin->in_hostgroups($job->hostgroups())) {
3888                 # Found a job to be run on a hostgroup of this
3889                 # $sshlogin
3890                 last;
3891             } else {
3892                 # This job was not in the hostgroups of $sshlogin
3893                 push @other_hostgroup_jobs, $job;
3894             }
3895         }
3896         $Global::JobQueue->unget(@other_hostgroup_jobs);
3897         if(not defined $job) {
3898             # No more jobs
3899             return undef;
3900         }
3901     } else {
3902         $job = $Global::JobQueue->get();
3903         if(not defined $job) {
3904             # No more jobs
3905             ::debug("start", "No more jobs: JobQueue empty\n");
3906             return undef;
3907         }
3908     }
3909     if(not $job->suspended()) {
3910         $job->set_sshlogin($sshlogin);
3911     }
3912     if($opt::retries and $job->failed_here()) {
3913         # This command with these args failed for this sshlogin
3914         my ($no_of_failed_sshlogins,$min_failures) = $job->min_failed();
3915         # Only look at the Global::host that have > 0 jobslots
3916         if($no_of_failed_sshlogins ==
3917            grep { $_->max_jobs_running() > 0 } values %Global::host
3918            and $job->failed_here() == $min_failures) {
3919             # It failed the same or more times on another host:
3920             # run it on this host
3921         } else {
3922             # If it failed fewer times on another host:
3923             # Find another job to run
3924             my $nextjob;
3925             if(not $Global::JobQueue->empty()) {
3926                 # This can potentially recurse for all args
3927                 no warnings 'recursion';
3928                 $nextjob = get_job_with_sshlogin($sshlogin);
3929             }
3930             # Push the command back on the queue
3931             $Global::JobQueue->unget($job);
3932             return $nextjob;
3933         }
3934     }
3935     return $job;
3936 }
3937
3938
3939 sub __REMOTE_SSH__() {}
3940
3941
3942 sub read_sshloginfiles(@) {
3943     # Read a list of --slf's
3944     # Input:
3945     #   @files = files or symbolic file names to read
3946     # Returns: N/A
3947     for my $s (@_) {
3948         read_sshloginfile(expand_slf_shorthand($s));
3949     }
3950 }
3951
3952 sub expand_slf_shorthand($) {
3953     # Expand --slf shorthand into a read file name
3954     # Input:
3955     #   $file = file or symbolic file name to read
3956     # Returns:
3957     #   $file = actual file name to read
3958     my $file = shift;
3959     if($file eq "-") {
3960         # skip: It is stdin
3961     } elsif($file eq "..") {
3962         $file = $Global::config_dir."/sshloginfile";
3963     } elsif($file eq ".") {
3964         $file = "/etc/parallel/sshloginfile";
3965     } elsif(not -r $file) {
3966         for(@Global::config_dirs) {
3967             if(not -r $_."/".$file) {
3968                 # Try prepending $PARALLEL_HOME
3969                 ::error("Cannot open $file.");
3970                 ::wait_and_exit(255);
3971             } else {
3972                 $file = $_."/".$file;
3973                 last;
3974             }
3975         }
3976     }
3977     return $file;
3978 }
3979
3980 sub read_sshloginfile($) {
3981     # Read sshloginfile into @Global::sshlogin
3982     # Input:
3983     #   $file = file to read
3984     # Uses:
3985     #   @Global::sshlogin
3986     # Returns: N/A
3987     local $/ = "\n";
3988     my $file = shift;
3989     my $close = 1;
3990     my $in_fh;
3991     ::debug("init","--slf ",$file);
3992     if($file eq "-") {
3993         $in_fh = *STDIN;
3994         $close = 0;
3995     } else {
3996         if(not open($in_fh, "<", $file)) {
3997             # Try the filename
3998             ::error("Cannot open $file.");
3999             ::wait_and_exit(255);
4000         }
4001     }
4002     while(<$in_fh>) {
4003         chomp;
4004         /^\s*#/ and next;
4005         /^\s*$/ and next;
4006         push @Global::sshlogin, $_;
4007     }
4008     if($close) {
4009         close $in_fh;
4010     }
4011 }
4012
4013 sub parse_sshlogin() {
4014     # Parse @Global::sshlogin into %Global::host.
4015     # Keep only hosts that are in one of the given ssh hostgroups.
4016     # Uses:
4017     #   @Global::sshlogin
4018     #   $Global::minimal_command_line_length
4019     #   %Global::host
4020     #   $opt::transfer
4021     #   @opt::return
4022     #   $opt::cleanup
4023     #   @opt::basefile
4024     #   @opt::trc
4025     # Returns: N/A
4026     my @login;
4027     if(not @Global::sshlogin) { @Global::sshlogin = (":"); }
4028     for my $sshlogin (@Global::sshlogin) {
4029         # Split up -S sshlogin,sshlogin
4030         for my $s (split /,|\n/, $sshlogin) {
4031             if ($s eq ".." or $s eq "-") {
4032                 # This may add to @Global::sshlogin - possibly bug
4033                 read_sshloginfile(expand_slf_shorthand($s));
4034             } else {
4035                 $s =~ s/\s*$//;
4036                 push (@login, $s);
4037             }
4038         }
4039     }
4040     $Global::minimal_command_line_length = 100_000_000;
4041     my @allowed_hostgroups;
4042     for my $ncpu_sshlogin_string (::uniq(@login)) {
4043         my $sshlogin = SSHLogin->new($ncpu_sshlogin_string);
4044         my $sshlogin_string = $sshlogin->string();
4045         if($sshlogin_string eq "") {
4046             # This is an ssh group: -S @webservers
4047             push @allowed_hostgroups, $sshlogin->hostgroups();
4048             next;
4049         }
4050         if($Global::host{$sshlogin_string}) {
4051             # This sshlogin has already been added:
4052             # It is probably a host that has come back
4053             # Set the max_jobs_running back to the original
4054             debug("run","Already seen $sshlogin_string\n");
4055             if($sshlogin->{'ncpus'}) {
4056                 # If ncpus set by '#/' of the sshlogin, overwrite it:
4057                 $Global::host{$sshlogin_string}->set_ncpus($sshlogin->ncpus());
4058             }
4059             $Global::host{$sshlogin_string}->set_max_jobs_running(undef);
4060             next;
4061         }
4062         $sshlogin->set_maxlength(Limits::Command::max_length());
4063
4064         $Global::minimal_command_line_length =
4065             ::min($Global::minimal_command_line_length, $sshlogin->maxlength());
4066         $Global::host{$sshlogin_string} = $sshlogin;
4067     }
4068     if(@allowed_hostgroups) {
4069         # Remove hosts that are not in these groups
4070         while (my ($string, $sshlogin) = each %Global::host) {
4071             if(not $sshlogin->in_hostgroups(@allowed_hostgroups)) {
4072                 delete $Global::host{$string};
4073             }
4074         }
4075     }
4076
4077     # debug("start", "sshlogin: ", my_dump(%Global::host),"\n");
4078     if(@Global::transfer_files or @opt::return or $opt::cleanup or @opt::basefile) {
4079         if(not remote_hosts()) {
4080             # There are no remote hosts
4081             if(@opt::trc) {
4082                 ::warning("--trc ignored as there are no remote --sshlogin.");
4083             } elsif (defined $opt::transfer) {
4084                 ::warning("--transfer ignored as there are no remote --sshlogin.");
4085             } elsif (@opt::transfer_files) {
4086                 ::warning("--transferfile ignored as there are no remote --sshlogin.");
4087             } elsif (@opt::return) {
4088                 ::warning("--return ignored as there are no remote --sshlogin.");
4089             } elsif (defined $opt::cleanup) {
4090                 ::warning("--cleanup ignored as there are no remote --sshlogin.");
4091             } elsif (@opt::basefile) {
4092                 ::warning("--basefile ignored as there are no remote --sshlogin.");
4093             }
4094         }
4095     }
4096 }
4097
4098 sub remote_hosts() {
4099     # Return sshlogins that are not ':'
4100     # Uses:
4101     #   %Global::host
4102     # Returns:
4103     #   list of sshlogins with ':' removed
4104     return grep !/^:$/, keys %Global::host;
4105 }
4106
4107 sub setup_basefile() {
4108     # Transfer basefiles to each $sshlogin
4109     # This needs to be done before first jobs on $sshlogin is run
4110     # Uses:
4111     #   %Global::host
4112     #   @opt::basefile
4113     # Returns: N/A
4114     my @cmd;
4115     my $rsync_destdir;
4116     my $workdir;
4117     for my $sshlogin (values %Global::host) {
4118       if($sshlogin->string() eq ":") { next }
4119       for my $file (@opt::basefile) {
4120         if($file !~ m:^/: and $opt::workdir eq "...") {
4121           ::error("Work dir '...' will not work with relative basefiles.");
4122           ::wait_and_exit(255);
4123         }
4124         if(not $workdir) {
4125             my $dummycmdline = CommandLine->new(1,["true"],{},0,0,[],[],{},{},{});
4126             my $dummyjob = Job->new($dummycmdline);
4127             $workdir = $dummyjob->workdir();
4128         }
4129         push @cmd, $sshlogin->rsync_transfer_cmd($file,$workdir);
4130       }
4131     }
4132     debug("init", "basesetup: @cmd\n");
4133     my ($exitstatus,$stdout_ref,$stderr_ref) =
4134         run_gnu_parallel((join "\n",@cmd),"-j0","--retries",5);
4135     if($exitstatus) {
4136         my @stdout = @$stdout_ref;
4137         my @stderr = @$stderr_ref;
4138         ::error("Copying of --basefile failed: @stdout@stderr");
4139         ::wait_and_exit(255);
4140     }
4141 }
4142
4143 sub cleanup_basefile() {
4144     # Remove the basefiles transferred
4145     # Uses:
4146     #   %Global::host
4147     #   @opt::basefile
4148     # Returns: N/A
4149     my @cmd;
4150     my $workdir;
4151     if(not $workdir) {
4152         my $dummycmdline = CommandLine->new(1,"true",0,0,0,0,0,{},{},{});
4153         my $dummyjob = Job->new($dummycmdline);
4154         $workdir = $dummyjob->workdir();
4155     }
4156     for my $sshlogin (values %Global::host) {
4157         if($sshlogin->string() eq ":") { next }
4158         for my $file (@opt::basefile) {
4159             push @cmd, $sshlogin->cleanup_cmd($file,$workdir);
4160         }
4161     }
4162     debug("init", "basecleanup: @cmd\n");
4163     my ($exitstatus,$stdout_ref,$stderr_ref) =
4164         run_gnu_parallel(join("\n",@cmd),"-j0","--retries",5);
4165     if($exitstatus) {
4166         my @stdout = @$stdout_ref;
4167         my @stderr = @$stderr_ref;
4168         ::error("Cleanup of --basefile failed: @stdout@stderr");
4169         ::wait_and_exit(255);
4170     }
4171 }
4172
4173 sub run_gnu_parallel() {
4174     my ($stdin,@args) = @_;
4175     my $cmd = join "",map { " $_ & " } split /\n/, $stdin;
4176     print $Global::original_stderr ` $cmd wait` ;
4177     return 0
4178 }
4179
4180 sub _run_gnu_parallel() {
4181     # Run GNU Parallel
4182     # This should ideally just fork an internal copy
4183     # and not start it through a shell
4184     # Input:
4185     #   $stdin = data to provide on stdin for GNU   Parallel
4186     #   @args = command line arguments
4187     # Returns:
4188     #   $exitstatus = exitcode of GNU Parallel run
4189     #   \@stdout = standard output
4190     #   \@stderr = standard error
4191     my ($stdin,@args) = @_;
4192     my ($exitstatus,@stdout,@stderr);
4193     my ($stdin_fh,$stdout_fh)=(gensym(),gensym());
4194     my ($stderr_fh, $stderrname) = ::tmpfile(SUFFIX => ".par");
4195     unlink $stderrname;
4196
4197     my $pid = ::open3($stdin_fh,$stdout_fh,$stderr_fh,
4198                       $0,qw(--plain --shell /bin/sh --will-cite), @args);
4199     if(my $writerpid = fork()) {
4200         close $stdin_fh;
4201         @stdout = <$stdout_fh>;
4202         # Now stdout is closed:
4203         # These pids should be dead or die very soon
4204         while(kill 0, $writerpid) { ::usleep(1); }
4205         die;
4206 #       reap $writerpid;
4207 #       while(kill 0, $pid) { ::usleep(1); }
4208 #       reap $writerpid;
4209         $exitstatus = $?;
4210         seek $stderr_fh, 0, 0;
4211         @stderr = <$stderr_fh>;
4212         close $stdout_fh;
4213         close $stderr_fh;
4214     } else {
4215         close $stdout_fh;
4216         close $stderr_fh;
4217         print $stdin_fh $stdin;
4218         close $stdin_fh;
4219         exit(0);
4220     }
4221     return ($exitstatus,\@stdout,\@stderr);
4222 }
4223
4224 sub filter_hosts() {
4225     # Remove down --sshlogins from active duty.
4226     # Find ncpus, ncores, maxlen, time-to-login for each host.
4227     # Uses:
4228     #   %Global::host
4229     #   $Global::minimal_command_line_length
4230     #   $opt::use_sockets_instead_of_threads
4231     #   $opt::use_cores_instead_of_threads
4232     #   $opt::use_cpus_instead_of_cores
4233     # Returns: N/A
4234
4235     my ($nsockets_ref,$ncores_ref, $nthreads_ref, $time_to_login_ref,
4236         $maxlen_ref, $echo_ref, $down_hosts_ref) =
4237         parse_host_filtering(parallelized_host_filtering());
4238
4239     delete @Global::host{@$down_hosts_ref};
4240     @$down_hosts_ref and ::warning("Removed @$down_hosts_ref.");
4241
4242     $Global::minimal_command_line_length = 100_000_000;
4243     while (my ($sshlogin, $obj) = each %Global::host) {
4244         if($sshlogin eq ":") { next }
4245         $nsockets_ref->{$sshlogin} or
4246             ::die_bug("nsockets missing: ".$obj->serverlogin());
4247         $ncores_ref->{$sshlogin} or
4248             ::die_bug("ncores missing: ".$obj->serverlogin());
4249         $nthreads_ref->{$sshlogin} or
4250             ::die_bug("nthreads missing: ".$obj->serverlogin());
4251         $time_to_login_ref->{$sshlogin} or
4252             ::die_bug("time_to_login missing: ".$obj->serverlogin());
4253         $maxlen_ref->{$sshlogin} or
4254             ::die_bug("maxlen missing: ".$obj->serverlogin());
4255         $obj->set_ncpus($nthreads_ref->{$sshlogin});
4256         if($opt::use_cpus_instead_of_cores) {
4257             $obj->set_ncpus($ncores_ref->{$sshlogin});
4258         } elsif($opt::use_sockets_instead_of_threads) {
4259             $obj->set_ncpus($nsockets_ref->{$sshlogin});
4260         } elsif($opt::use_cores_instead_of_threads) {
4261             $obj->set_ncpus($ncores_ref->{$sshlogin});
4262         }
4263         $obj->set_time_to_login($time_to_login_ref->{$sshlogin});
4264         $obj->set_maxlength($maxlen_ref->{$sshlogin});
4265         $Global::minimal_command_line_length =
4266             ::min($Global::minimal_command_line_length,
4267                   int($maxlen_ref->{$sshlogin}/2));
4268         ::debug("init", "Timing from -S:$sshlogin ",
4269                 " nsockets:",$nsockets_ref->{$sshlogin},
4270                 " ncores:", $ncores_ref->{$sshlogin},
4271                 " nthreads:",$nthreads_ref->{$sshlogin},
4272                 " time_to_login:", $time_to_login_ref->{$sshlogin},
4273                 " maxlen:", $maxlen_ref->{$sshlogin},
4274                 " min_max_len:", $Global::minimal_command_line_length,"\n");
4275     }
4276 }
4277
4278 sub parse_host_filtering() {
4279     # Input:
4280     #   @lines = output from parallelized_host_filtering()
4281     # Returns:
4282     #   \%nsockets = number of sockets of {host}
4283     #   \%ncores = number of cores of {host}
4284     #   \%nthreads = number of hyperthreaded cores of {host}
4285     #   \%time_to_login = time_to_login on {host}
4286     #   \%maxlen = max command len on {host}
4287     #   \%echo = echo received from {host}
4288     #   \@down_hosts = list of hosts with no answer
4289     local $/ = "\n";
4290     my (%nsockets, %ncores, %nthreads, %time_to_login, %maxlen, %echo,
4291         @down_hosts);
4292     for (@_) {
4293         ::debug("init","Read: ",$_);
4294         chomp;
4295         my @col = split /\t/, $_;
4296         if($col[0] =~ /^parallel: Warning:/) {
4297             # Timed out job: Ignore it
4298             next;
4299         } elsif(defined $col[6]) {
4300             # This is a line from --joblog
4301             # seq host time spent sent received exit signal command
4302             # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores
4303             if($col[0] eq "Seq" and $col[1] eq "Host" and
4304                     $col[2] eq "Starttime") {
4305                 # Header => skip
4306                 next;
4307             }
4308             # Get server from: eval true server\;
4309             $col[8] =~ /eval .?true.?\s([^\;]+);/ or
4310                 ::die_bug("col8 does not contain host: $col[8]");
4311             my $host = $1;
4312             $host =~ tr/\\//d;
4313             $Global::host{$host} or next;
4314             if($col[6] eq "255" or $col[6] eq "-1" or $col[6] eq "1") {
4315                 # exit == 255 or exit == timeout (-1): ssh failed/timedout
4316                 # exit == 1: lsh failed
4317                 # Remove sshlogin
4318                 ::debug("init", "--filtered $host\n");
4319                 push(@down_hosts, $host);
4320             } elsif($col[6] eq "127") {
4321                 # signal == 127: parallel not installed remote
4322                 # Set nsockets, ncores, nthreads = 1
4323                 ::warning("Could not figure out ".
4324                           "number of cpus on $host. Using 1.");
4325                 $nsockets{$host} = 1;
4326                 $ncores{$host} = 1;
4327                 $nthreads{$host} = 1;
4328                 $maxlen{$host} = Limits::Command::max_length();
4329             } elsif($col[0] =~ /^\d+$/ and $Global::host{$host}) {
4330                 # Remember how log it took to log in
4331                 # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ echo
4332                 $time_to_login{$host} = ::min($time_to_login{$host},$col[3]);
4333             } else {
4334                 ::die_bug("host check unmatched long jobline: $_");
4335             }
4336         } elsif($Global::host{$col[0]}) {
4337             # This output from --number-of-cores, --number-of-cpus,
4338             # --max-line-length-allowed
4339             # ncores: server       8
4340             # ncpus:  server       2
4341             # maxlen: server       131071
4342             if(/parallel: Warning: Cannot figure out number of/) {
4343                 next;
4344             }
4345             if(not $nsockets{$col[0]}) {
4346                 $nsockets{$col[0]} = $col[1];
4347             } elsif(not $ncores{$col[0]}) {
4348                 $ncores{$col[0]} = $col[1];
4349             } elsif(not $nthreads{$col[0]}) {
4350                 $nthreads{$col[0]} = $col[1];
4351             } elsif(not $maxlen{$col[0]}) {
4352                 $maxlen{$col[0]} = $col[1];
4353             } elsif(not $echo{$col[0]}) {
4354                 $echo{$col[0]} = $col[1];
4355             } elsif(m/perl: warning:|LANGUAGE =|LC_ALL =|LANG =|are supported and installed/) {
4356                 # Skip these:
4357                 # perl: warning: Setting locale failed.
4358                 # perl: warning: Please check that your locale settings:
4359                 #         LANGUAGE = (unset),
4360                 #         LC_ALL = (unset),
4361                 #         LANG = "en_US.UTF-8"
4362                 #     are supported and installed on your system.
4363                 # perl: warning: Falling back to the standard locale ("C").
4364             } else {
4365                 ::die_bug("host check too many col0: $_");
4366             }
4367         } else {
4368             ::die_bug("host check unmatched short jobline ($col[0]): $_");
4369         }
4370     }
4371     @down_hosts = uniq(@down_hosts);
4372     return(\%nsockets, \%ncores, \%nthreads, \%time_to_login,
4373            \%maxlen, \%echo, \@down_hosts);
4374 }
4375
4376 sub parallelized_host_filtering() {
4377     # Uses:
4378     #   %Global::host
4379     # Returns:
4380     #   text entries with:
4381     #   * joblog line
4382     #   * hostname \t number of cores
4383     #   * hostname \t number of cpus
4384     #   * hostname \t max-line-length-allowed
4385     #   * hostname \t empty
4386
4387     sub sshwrapped {
4388         # Wrap with ssh and --env
4389         # Return $default_value if command fails
4390         my $sshlogin = shift;
4391         my $command = shift;
4392         my $default_value = shift;
4393         # wrapper that returns $default_value if the command fails:
4394         # bug #57886: Errors when using different version on remote
4395         #   perl -e '$a=`$command`; print $? ? "$default_value" : $a'
4396         my $wcmd = q(perl -e '$a=`).$command.q(`;).
4397             q(print $? ? ").::pQ($default_value).q(" : $a');
4398         my $commandline = CommandLine->new(1,[$wcmd],{},0,0,[],[],{},{},{});
4399         my $job = Job->new($commandline);
4400         $job->set_sshlogin($sshlogin);
4401         $job->wrapped();
4402         return($job->{'wrapped'});
4403     }
4404
4405     my(@sockets, @cores, @threads, @maxline, @echo);
4406     while (my ($host, $sshlogin) = each %Global::host) {
4407         if($host eq ":") { next }
4408         # The 'true' is used to get the $host out later
4409         push(@sockets, $host."\t"."true $host; ".
4410              sshwrapped($sshlogin,"parallel --number-of-sockets",0)."\n\0");
4411         push(@cores, $host."\t"."true $host; ".
4412              sshwrapped($sshlogin,"parallel --number-of-cores",0)."\n\0");
4413         push(@threads, $host."\t"."true $host; ".
4414              sshwrapped($sshlogin,"parallel --number-of-threads",0)."\n\0");
4415         push(@maxline, $host."\t"."true $host; ".
4416              sshwrapped($sshlogin,"parallel --max-line-length-allowed",0)."\n\0");
4417         # 'echo' is used to get the fastest possible ssh login time
4418         my $sshcmd = "true $host; exec " .$sshlogin->sshcommand()." ".
4419             $sshlogin->serverlogin();
4420         push(@echo, $host."\t".$sshcmd." -- echo\n\0");
4421     }
4422     # --timeout 10: Setting up an SSH connection and running a simple
4423     #               command should never take > 10 sec.
4424     # --delay 0.1:  If multiple sshlogins use the same proxy the delay
4425     #               will make it less likely to overload the ssh daemon.
4426     # --retries 3:  If the ssh daemon is overloaded, try 3 times
4427     my $cmd =
4428         "$0 -j0 --timeout 10 --joblog - --plain --delay 0.1 --retries 3 ".
4429         "--tag --tagstring '{1}' -0 --colsep '\t' -k eval '{2}' && true ";
4430     $cmd = $Global::shell." -c ".Q($cmd);
4431     ::debug("init", $cmd, "\n");
4432     my @out;
4433     my $prepend = "";
4434
4435     my ($host_fh,$in,$err);
4436     open3($in, $host_fh, $err, $cmd) || ::die_bug("parallel host check: $cmd");
4437     ::debug("init", map { $_,"\n" } @sockets, @cores, @threads, @maxline, @echo);
4438
4439     if(not fork()) {
4440         # Give the commands to run to the $cmd
4441         close $host_fh;
4442         print $in @sockets, @cores, @threads, @maxline, @echo;
4443         close $in;
4444         exit();
4445     }
4446     close $in;
4447     for(<$host_fh>) {
4448         # TODO incompatible with '-quoting. Needs to be fixed differently
4449         #if(/\'$/) {
4450         #    # if last char = ' then append next line
4451         #    # This may be due to quoting of \n in environment var
4452         #    $prepend .= $_;
4453         #    next;
4454         #}
4455         $_ = $prepend . $_;
4456         $prepend = "";
4457         push @out, $_;
4458     }
4459     close $host_fh;
4460     return @out;
4461 }
4462
4463 sub onall($@) {
4464     # Runs @command on all hosts.
4465     # Uses parallel to run @command on each host.
4466     # --jobs = number of hosts to run on simultaneously.
4467     # For each host a parallel command with the args will be running.
4468     # Uses:
4469     #   $Global::quoting
4470     #   @opt::basefile
4471     #   $opt::jobs
4472     #   $opt::linebuffer
4473     #   $opt::ungroup
4474     #   $opt::group
4475     #   $opt::keeporder
4476     #   $opt::D
4477     #   $opt::plain
4478     #   $opt::max_chars
4479     #   $opt::linebuffer
4480     #   $opt::files
4481     #   $opt::colsep
4482     #   $opt::timeout
4483     #   $opt::plain
4484     #   $opt::retries
4485     #   $opt::max_chars
4486     #   $opt::arg_sep
4487     #   $opt::arg_file_sep
4488     #   @opt::v
4489     #   @opt::env
4490     #   %Global::host
4491     #   $Global::exitstatus
4492     #   $Global::debug
4493     #   $Global::joblog
4494     #   $opt::joblog
4495     #   $opt::tag
4496     #   $opt::tee
4497     # Input:
4498     #   @command = command to run on all hosts
4499     # Returns: N/A
4500     sub tmp_joblog {
4501         # Input:
4502         #   $joblog = filename of joblog - undef if none
4503         # Returns:
4504         #   $tmpfile = temp file for joblog - undef if none
4505         my $joblog = shift;
4506         if(not defined $joblog) {
4507             return undef;
4508         }
4509         my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".log");
4510         close $fh;
4511         return $tmpfile;
4512     }
4513     my ($input_source_fh_ref,@command) = @_;
4514     if($Global::quoting) {
4515        @command = shell_quote(@command);
4516     }
4517
4518     # Copy all @input_source_fh (-a and :::) into tempfiles
4519     my @argfiles = ();
4520     for my $fh (@$input_source_fh_ref) {
4521         my ($outfh, $name) = ::tmpfile(SUFFIX => ".all", UNLINK => not $opt::D);
4522         print $outfh (<$fh>);
4523         close $outfh;
4524         push @argfiles, $name;
4525     }
4526     if(@opt::basefile) { setup_basefile(); }
4527     # for each sshlogin do:
4528     # parallel -S $sshlogin $command :::: @argfiles
4529     #
4530     # Pass some of the options to the sub-parallels, not all of them as
4531     # -P should only go to the first, and -S should not be copied at all.
4532     my $options =
4533         join(" ",
4534              ((defined $opt::sshdelay) ? "--delay ".$opt::sshdelay : ""),
4535              ((defined $opt::memfree) ? "--memfree ".$opt::memfree : ""),
4536              ((defined $opt::memsuspend) ? "--memfree ".$opt::memsuspend : ""),
4537              ((defined $opt::D) ? "-D $opt::D" : ""),
4538              ((defined $opt::group) ? "-g" : ""),
4539              ((defined $opt::jobs) ? "-P $opt::jobs" : ""),
4540              ((defined $opt::keeporder) ? "--keeporder" : ""),
4541              ((defined $opt::linebuffer) ? "--linebuffer" : ""),
4542              ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""),
4543              ((defined $opt::plain) ? "--plain" : ""),
4544              ((defined $opt::ungroup) ? "-u" : ""),
4545              ((defined $opt::tee) ? "--tee" : ""),
4546         );
4547     my $suboptions =
4548         join(" ",
4549              ((defined $opt::sshdelay) ? "--delay ".$opt::sshdelay : ""),
4550              ((defined $opt::D) ? "-D $opt::D" : ""),
4551              ((defined $opt::arg_file_sep) ? "--arg-file-sep ".$opt::arg_file_sep : ""),
4552              ((defined $opt::arg_sep) ? "--arg-sep ".$opt::arg_sep : ""),
4553              ((defined $opt::colsep) ? "--colsep ".shell_quote($opt::colsep) : ""),
4554              ((defined $opt::files) ? "--files" : ""),
4555              ((defined $opt::group) ? "-g" : ""),
4556              ((defined $opt::cleanup) ? "--cleanup" : ""),
4557              ((defined $opt::keeporder) ? "--keeporder" : ""),
4558              ((defined $opt::linebuffer) ? "--linebuffer" : ""),
4559              ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""),
4560              ((defined $opt::plain) ? "--plain" : ""),
4561              ((defined $opt::plus) ? "--plus" : ""),
4562              ((defined $opt::retries) ? "--retries ".$opt::retries : ""),
4563              ((defined $opt::timeout) ? "--timeout ".$opt::timeout : ""),
4564              ((defined $opt::ungroup) ? "-u" : ""),
4565              ((defined $opt::ssh) ? "--ssh '".$opt::ssh."'" : ""),
4566              ((defined $opt::tee) ? "--tee" : ""),
4567              ((defined $opt::workdir) ? "--wd ".Q($opt::workdir) : ""),
4568              (@Global::transfer_files ? map { "--tf ".Q($_) }
4569               @Global::transfer_files : ""),
4570              (@Global::ret_files ? map { "--return ".Q($_) }
4571               @Global::ret_files : ""),
4572              (@opt::env ? map { "--env ".Q($_) } @opt::env : ""),
4573              (map { "-v" } @opt::v),
4574         );
4575     ::debug("init", "| $0 $options\n");
4576     open(my $parallel_fh, "|-", "$0 -0 --will-cite -j0 $options") ||
4577         ::die_bug("This does not run GNU Parallel: $0 $options");
4578     my @joblogs;
4579     for my $host (sort keys %Global::host) {
4580         my $sshlogin = $Global::host{$host};
4581         my $joblog = tmp_joblog($opt::joblog);
4582         if($joblog) {
4583             push @joblogs, $joblog;
4584             $joblog = "--joblog $joblog";
4585         }
4586         my $quad = $opt::arg_file_sep || "::::";
4587         # If PARALLEL_ENV is set: Pass it on
4588         my $penv=$Global::parallel_env ?
4589             "PARALLEL_ENV=".Q($Global::parallel_env) :
4590             '';
4591         ::debug("init", "$penv $0 $suboptions -j1 $joblog ",
4592             ((defined $opt::tag) ?
4593              "--tagstring ".Q($sshlogin->string()) : ""),
4594              " -S ", Q($sshlogin->string())," ",
4595              join(" ",shell_quote(@command))," $quad @argfiles\n");
4596         print $parallel_fh "$penv $0 $suboptions -j1 $joblog ",
4597             ((defined $opt::tag) ?
4598              "--tagstring ".Q($sshlogin->string()) : ""),
4599              " -S ", Q($sshlogin->string())," ",
4600              join(" ",shell_quote(@command))," $quad @argfiles\0";
4601     }
4602     close $parallel_fh;
4603     $Global::exitstatus = $? >> 8;
4604     debug("init", "--onall exitvalue ", $?);
4605     if(@opt::basefile and $opt::cleanup) { cleanup_basefile(); }
4606     $Global::debug or unlink(@argfiles);
4607     my %seen;
4608     for my $joblog (@joblogs) {
4609         # Append to $joblog
4610         open(my $fh, "<", $joblog) || ::die_bug("Cannot open tmp joblog $joblog");
4611         # Skip first line (header);
4612         <$fh>;
4613         print $Global::joblog (<$fh>);
4614         close $fh;
4615         unlink($joblog);
4616     }
4617 }
4618
4619
4620 sub __SIGNAL_HANDLING__() {}
4621
4622
4623 sub sigtstp() {
4624     # Send TSTP signal (Ctrl-Z) to all children process groups
4625     # Uses:
4626     #   %SIG
4627     # Returns: N/A
4628     signal_children("TSTP");
4629 }
4630
4631 sub sigpipe() {
4632     # Send SIGPIPE signal to all children process groups
4633     # Uses:
4634     #   %SIG
4635     # Returns: N/A
4636     signal_children("PIPE");
4637 }
4638
4639 sub signal_children() {
4640     # Send signal to all children process groups
4641     # and GNU   Parallel itself
4642     # Uses:
4643     #   %SIG
4644     # Returns: N/A
4645     my $signal = shift;
4646     debug("run", "Sending $signal ");
4647     kill $signal, map { -$_ } keys %Global::running;
4648     # Use default signal handler for GNU Parallel itself
4649     $SIG{$signal} = undef;
4650     kill $signal, $$;
4651 }
4652
4653 sub save_original_signal_handler() {
4654     # Remember the original signal handler
4655     # Uses:
4656     #   %Global::original_sig
4657     # Returns: N/A
4658     $SIG{INT} = sub {
4659         if($opt::tmux) { ::qqx("tmux kill-session -t p$$"); }
4660         wait_and_exit(255);
4661     };
4662     $SIG{TERM} = sub {
4663         if($opt::tmux) { ::qqx("tmux kill-session -t p$$"); }
4664         wait_and_exit(255);
4665     };
4666     %Global::original_sig = %SIG;
4667     $SIG{TERM} = sub {}; # Dummy until jobs really start
4668     $SIG{ALRM} = 'IGNORE';
4669     # Allow Ctrl-Z to suspend and `fg` to continue
4670     $SIG{TSTP} = \&sigtstp;
4671     $SIG{PIPE} = \&sigpipe;
4672     $SIG{CONT} = sub {
4673         # Set $SIG{TSTP} again (it is undef'ed in sigtstp() )
4674         $SIG{TSTP} = \&sigtstp;
4675         # Send continue signal to all children process groups
4676         kill "CONT", map { -$_ } keys %Global::running;
4677     };
4678 }
4679
4680 sub list_running_jobs() {
4681     # Print running jobs on tty
4682     # Uses:
4683     #   %Global::running
4684     # Returns: N/A
4685     for my $job (values %Global::running) {
4686         ::status("$Global::progname: ".$job->replaced());
4687     }
4688 }
4689
4690 sub start_no_new_jobs() {
4691     # Start no more jobs
4692     # Uses:
4693     #    %Global::original_sig
4694     #    %Global::unlink
4695     #    $Global::start_no_new_jobs
4696     # Returns: N/A
4697 #    $SIG{TERM} = $Global::original_sig{TERM};
4698     unlink keys %Global::unlink;
4699     ::status
4700         ("$Global::progname: SIGHUP received. No new jobs will be started.",
4701          "$Global::progname: Waiting for these ".(keys %Global::running).
4702          " jobs to finish. Send SIGTERM to stop now.");
4703     list_running_jobs();
4704     $Global::start_no_new_jobs ||= 1;
4705 }
4706
4707 sub reapers() {
4708     # Run reaper until there are no more left
4709     # Returns:
4710     #   @pids_reaped = pids of reaped processes
4711     my @pids_reaped;
4712     my $pid;
4713     while($pid = reaper()) {
4714         push @pids_reaped, $pid;
4715     }
4716     return @pids_reaped;
4717 }
4718
4719 sub reaper() {
4720     # A job finished:
4721     # * Set exitstatus, exitsignal, endtime.
4722     # * Free ressources for new job
4723     # * Update median runtime
4724     # * Print output
4725     # * If --halt = now: Kill children
4726     # * Print progress
4727     # Uses:
4728     #   %Global::running
4729     #   $opt::timeout
4730     #   $Global::timeoutq
4731     #   $opt::keeporder
4732     #   $Global::total_running
4733     # Returns:
4734     #   $stiff = PID of child finished
4735     my $stiff;
4736     debug("run", "Reaper ");
4737     if(($stiff = waitpid(-1, &WNOHANG)) <= 0) {
4738         # No jobs waiting to be reaped
4739         return 0;
4740     }
4741
4742     # $stiff = pid of dead process
4743     my $job = $Global::running{$stiff};
4744
4745     # '-a <(seq 10)' will give us a pid not in %Global::running
4746     # The same will one of the ssh -M: ignore
4747     $job or return 0;
4748     delete $Global::running{$stiff};
4749     $Global::total_running--;
4750     if($job->{'commandline'}{'skip'}) {
4751         # $job->skip() was called
4752         $job->set_exitstatus(-2);
4753         $job->set_exitsignal(0);
4754     } else {
4755         $job->set_exitstatus($? >> 8);
4756         $job->set_exitsignal($? & 127);
4757     }
4758
4759     debug("run", "seq ",$job->seq()," died (", $job->exitstatus(), ")");
4760     $job->set_endtime(::now());
4761     my $sshlogin = $job->sshlogin();
4762     $sshlogin->dec_jobs_running();
4763     if($job->should_be_retried()) {
4764         # Free up file handles
4765         $job->free_ressources();
4766     } else {
4767         # The job is done
4768         $sshlogin->inc_jobs_completed();
4769         # Free the jobslot
4770         $job->free_slot();
4771         if($opt::timeout and not $job->exitstatus()) {
4772             # Update average runtime for timeout only for successful jobs
4773             $Global::timeoutq->update_median_runtime($job->runtime());
4774         }
4775         if($opt::keeporder) {
4776             $job->print_earlier_jobs();
4777         } else {
4778             $job->print();
4779         }
4780         if($job->should_we_halt() eq "now") {
4781             # Kill children
4782             ::kill_sleep_seq($job->pid());
4783             ::killall();
4784             ::wait_and_exit($Global::halt_exitstatus);
4785         }
4786     }
4787     $job->cleanup();
4788
4789     if($opt::progress) {
4790         my %progress = progress();
4791         ::status_no_nl("\r",$progress{'status'});
4792     }
4793
4794     debug("run", "done ");
4795     return $stiff;
4796 }
4797
4798
4799 sub __USAGE__() {}
4800
4801
4802 sub killall() {
4803     # Kill all jobs by killing their process groups
4804     # Uses:
4805     #   $Global::start_no_new_jobs = we are stopping
4806     #   $Global::killall = Flag to not run reaper
4807     $Global::start_no_new_jobs ||= 1;
4808     # Do not reap killed children: Ignore them instead
4809     $Global::killall ||= 1;
4810     kill_sleep_seq(keys %Global::running);
4811 }
4812
4813 sub kill_sleep_seq(@) {
4814     # Send jobs TERM,TERM,KILL to processgroups
4815     # Input:
4816     #   @pids = list of pids that are also processgroups
4817     # Convert pids to process groups ($processgroup = -$pid)
4818     my @pgrps = map { -$_ } @_;
4819     my @term_seq = split/,/,$opt::termseq;
4820     if(not @term_seq) {
4821         @term_seq = ("TERM",200,"TERM",100,"TERM",50,"KILL",25);
4822     }
4823     while(@term_seq) {
4824         @pgrps = kill_sleep(shift @term_seq, shift @term_seq, @pgrps);
4825     }
4826 }
4827
4828 sub kill_sleep() {
4829     # Kill pids with a signal and wait a while for them to die
4830     # Input:
4831     #   $signal = signal to send to @pids
4832     #   $sleep_max = number of ms to sleep at most before returning
4833     #   @pids = pids to kill (actually process groups)
4834     # Uses:
4835     #   $Global::killall = set by killall() to avoid calling reaper
4836     # Returns:
4837     #   @pids = pids still alive
4838     my ($signal, $sleep_max, @pids) = @_;
4839     ::debug("kill","kill_sleep $signal ",(join " ",sort @pids),"\n");
4840     kill $signal, @pids;
4841     my $sleepsum = 0;
4842     my $sleep = 0.001;
4843
4844     while(@pids and $sleepsum < $sleep_max) {
4845         if($Global::killall) {
4846             # Killall => don't run reaper
4847             while(waitpid(-1, &WNOHANG) > 0) {
4848                 $sleep = $sleep/2+0.001;
4849             }
4850         } elsif(reapers()) {
4851             $sleep = $sleep/2+0.001;
4852         }
4853         $sleep *= 1.1;
4854         ::usleep($sleep);
4855         $sleepsum += $sleep;
4856         # Keep only living children
4857         @pids = grep { kill(0, $_) } @pids;
4858     }
4859     return @pids;
4860 }
4861
4862 sub wait_and_exit($) {
4863     # If we do not wait, we sometimes get segfault
4864     # Returns: N/A
4865     my $error = shift;
4866     unlink keys %Global::unlink;
4867     if($error) {
4868         # Kill all jobs without printing
4869         killall();
4870     }
4871     for (keys %Global::unkilled_children) {
4872         # Kill any (non-jobs) children (e.g. reserved processes)
4873         kill 9, $_;
4874         waitpid($_,0);
4875         delete $Global::unkilled_children{$_};
4876     }
4877     if($Global::unkilled_sqlworker) {
4878         waitpid($Global::unkilled_sqlworker,0);
4879     }
4880     # Avoid: Warning: unable to close filehandle properly: No space
4881     #        left on device during global destruction.
4882     $SIG{__WARN__} = sub {};
4883     exit($error);
4884 }
4885
4886 sub die_usage() {
4887     # Returns: N/A
4888     usage();
4889     wait_and_exit(255);
4890 }
4891
4892 sub usage() {
4893     # Returns: N/A
4894     print join
4895         ("\n",
4896          "Usage:",
4897          "",
4898          "$Global::progname [options] [command [arguments]] < list_of_arguments",
4899          "$Global::progname [options] [command [arguments]] (::: arguments|:::: argfile(s))...",
4900          "cat ... | $Global::progname --pipe [options] [command [arguments]]",
4901          "",
4902          "-j n            Run n jobs in parallel",
4903          "-k              Keep same order",
4904          "-X              Multiple arguments with context replace",
4905          "--colsep regexp Split input on regexp for positional replacements",
4906          "{} {.} {/} {/.} {#} {%} {= perl code =} Replacement strings",
4907          "{3} {3.} {3/} {3/.} {=3 perl code =}    Positional replacement strings",
4908          "With --plus:    {} = {+/}/{/} = {.}.{+.} = {+/}/{/.}.{+.} = {..}.{+..} =",
4909          "                {+/}/{/..}.{+..} = {...}.{+...} = {+/}/{/...}.{+...}",
4910          "",
4911          "-S sshlogin     Example: foo\@server.example.com",
4912          "--slf ..        Use ~/.parallel/sshloginfile as the list of sshlogins",
4913          "--trc {}.bar    Shorthand for --transfer --return {}.bar --cleanup",
4914          "--onall         Run the given command with argument on all sshlogins",
4915          "--nonall        Run the given command with no arguments on all sshlogins",
4916          "",
4917          "--pipe          Split stdin (standard input) to multiple jobs.",
4918          "--recend str    Record end separator for --pipe.",
4919          "--recstart str  Record start separator for --pipe.",
4920          "",
4921          "GNU Parallel can do much more. See 'man $Global::progname' for details",
4922          "",
4923          "Academic tradition requires you to cite works you base your article on.",
4924          "If you use programs that use GNU Parallel to process data for an article in a",
4925          "scientific publication, please cite:",
4926          "",
4927          "  Tange, O. (2020, September 22). GNU Parallel 20200922 ('Ginsburg').",
4928          "  Zenodo. https://doi.org/10.5281/zenodo.4045386",
4929          "",
4930          # Before changing this line,  please read
4931          # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
4932          # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
4933          "This helps funding further development; AND IT WON'T COST YOU A CENT.",
4934          "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
4935          "",
4936          "",);
4937 }
4938
4939 sub citation_notice() {
4940     # if --will-cite or --plain: do nothing
4941     # if stderr redirected: do nothing
4942     # if $PARALLEL_HOME/will-cite: do nothing
4943     # else: print citation notice to stderr
4944     if($opt::willcite
4945        or
4946        $opt::plain
4947        or
4948        not -t $Global::original_stderr
4949        or
4950        grep { -e "$_/will-cite" } @Global::config_dirs) {
4951         # skip
4952     } else {
4953         ::status
4954             ("Academic tradition requires you to cite works you base your article on.",
4955              "If you use programs that use GNU Parallel to process data for an article in a",
4956              "scientific publication, please cite:",
4957              "",
4958              "  Tange, O. (2020, September 22). GNU Parallel 20200922 ('Ginsburg').",
4959              "  Zenodo. https://doi.org/10.5281/zenodo.4045386",
4960              "",
4961              # Before changing this line,  please read
4962              # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice and
4963              # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
4964              "This helps funding further development; AND IT WON'T COST YOU A CENT.",
4965              "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
4966              "",
4967              "More about funding GNU Parallel and the citation notice:",
4968              "https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice",
4969              "",
4970              "To silence this citation notice: run 'parallel --citation' once.",
4971              ""
4972             );
4973         mkdir $Global::config_dir;
4974         # Number of times the user has run GNU Parallel without showing
4975         # willingness to cite
4976         my $runs = 0;
4977         if(open (my $fh, "<", $Global::config_dir.
4978                  "/runs-without-willing-to-cite")) {
4979             $runs = <$fh>;
4980             close $fh;
4981         }
4982         $runs++;
4983         if(open (my $fh, ">", $Global::config_dir.
4984                  "/runs-without-willing-to-cite")) {
4985             print $fh $runs;
4986             close $fh;
4987             if($runs >= 10) {
4988                 ::status("Come on: You have run parallel $runs times. Isn't it about time ",
4989                          "you run 'parallel --citation' once to silence the citation notice?",
4990                          "");
4991             }
4992         }
4993     }
4994 }
4995
4996 sub status(@) {
4997     my @w = @_;
4998     my $fh = $Global::status_fd || *STDERR;
4999     print $fh map { ($_, "\n") } @w;
5000     flush $fh;
5001 }
5002
5003 sub status_no_nl(@) {
5004     my @w = @_;
5005     my $fh = $Global::status_fd || *STDERR;
5006     print $fh @w;
5007     flush $fh;
5008 }
5009
5010 sub warning(@) {
5011     my @w = @_;
5012     my $prog = $Global::progname || "parallel";
5013     status_no_nl(map { ($prog, ": Warning: ", $_, "\n"); } @w);
5014 }
5015
5016 sub error(@) {
5017     my @w = @_;
5018     my $prog = $Global::progname || "parallel";
5019     status(map { ($prog.": Error: ". $_); } @w);
5020 }
5021
5022 sub die_bug($) {
5023     my $bugid = shift;
5024     print STDERR
5025         ("$Global::progname: This should not happen. You have found a bug.\n",
5026          "Please contact <parallel\@gnu.org> and follow\n",
5027          "https://www.gnu.org/software/parallel/man.html#REPORTING-BUGS\n",
5028          "\n",
5029          "Include this in the report:\n",
5030          "* The version number: $Global::version\n",
5031          "* The bugid: $bugid\n",
5032          "* The command line being run\n",
5033          "* The files being read (put the files on a webserver if they are big)\n",
5034          "\n",
5035          "If you get the error on smaller/fewer files, please include those instead.\n");
5036     ::wait_and_exit(255);
5037 }
5038
5039 sub version() {
5040     # Returns: N/A
5041     print join
5042         ("\n",
5043          "GNU $Global::progname $Global::version",
5044          "Copyright (C) 2007-2020 Ole Tange, http://ole.tange.dk and Free Software",
5045          "Foundation, Inc.",
5046          "License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>",
5047          "This is free software: you are free to change and redistribute it.",
5048          "GNU $Global::progname comes with no warranty.",
5049          "",
5050          "Web site: https://www.gnu.org/software/${Global::progname}\n",
5051          "When using programs that use GNU Parallel to process data for publication",
5052          "please cite as described in 'parallel --citation'.\n",
5053         );
5054 }
5055
5056 sub citation() {
5057     # Returns: N/A
5058     my ($all_argv_ref,$argv_options_removed_ref) = @_;
5059     my $all_argv = "@$all_argv_ref";
5060     my $no_opts = "@$argv_options_removed_ref";
5061     $all_argv=~s/--citation//;
5062     if($all_argv ne $no_opts) {
5063         ::warning("--citation ignores all other options and arguments.");
5064         ::status("");
5065     }
5066
5067     ::status(
5068         "Academic tradition requires you to cite works you base your article on.",
5069         "If you use programs that use GNU Parallel to process data for an article in a",
5070         "scientific publication, please cite:",
5071         "",
5072         "\@software{tange_2020_4045386,",
5073         "      author       = {Tange, Ole},",
5074         "      title        = {GNU Parallel 20200922 ('Ginsburg')},",
5075         "      month        = Sep,",
5076         "      year         = 2020,",
5077         "      note         = {{GNU Parallel is a general parallelizer to run",
5078         "                       multiple serial command line programs in parallel",
5079         "                       without changing them.}},",
5080         "      publisher    = {Zenodo},",
5081         "      doi          = {10.5281/zenodo.4045386},",
5082         "      url          = {https://doi.org/10.5281/zenodo.4045386}",
5083         "}",
5084         "",
5085         "(Feel free to use \\nocite{tange_2020_4045386})",
5086         "",
5087         # Before changing this line, please read
5088         # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice and
5089         # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
5090         "This helps funding further development; AND IT WON'T COST YOU A CENT.",
5091         "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
5092         "",
5093         "More about funding GNU Parallel and the citation notice:",
5094         "https://lists.gnu.org/archive/html/parallel/2013-11/msg00006.html",
5095         "https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice",
5096         "https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt",
5097         "",
5098         "If you send a copy of your published article to tange\@gnu.org, it will be",
5099         "mentioned in the release notes of next version of GNU Parallel.",
5100         ""
5101         );
5102     while(not grep { -e "$_/will-cite" } @Global::config_dirs) {
5103         print "\nType: 'will cite' and press enter.\n> ";
5104         my $input = <STDIN>;
5105         if(not defined $input) {
5106             exit(255);
5107         }
5108         if($input =~ /will cite/i) {
5109             mkdir $Global::config_dir;
5110             if(open (my $fh, ">", $Global::config_dir."/will-cite")) {
5111                 close $fh;
5112                 ::status(
5113                     "",
5114                     "Thank you for your support: You are the reason why there is funding to",
5115                     "continue maintaining GNU Parallel. On behalf of future versions of",
5116                     "GNU Parallel, which would not exist without your support:",
5117                     "",
5118                     "  THANK YOU SO MUCH",
5119                     "",
5120                     "It is really appreciated. The citation notice is now silenced.",
5121                     "");
5122             } else {
5123                 ::status(
5124                       "",
5125                       "Thank you for your support. It is much appreciated. The citation",
5126                       "cannot permanently be silenced. Use '--will-cite' instead.",
5127                       "",
5128                       "If you use '--will-cite' in scripts to be run by others you are making",
5129                       "it harder for others to see the citation notice.  The development of",
5130                       "GNU Parallel is indirectly financed through citations, so if users",
5131                       "do not know they should cite then you are making it harder to finance",
5132                       "development. However, if you pay 10000 EUR, you should feel free to",
5133                       "use '--will-cite' in scripts.",
5134                       "");
5135                 last;
5136             }
5137         }
5138     }
5139 }
5140
5141 sub show_limits() {
5142     # Returns: N/A
5143     print("Maximal size of command: ",Limits::Command::real_max_length(),"\n",
5144           "Maximal used size of command: ",Limits::Command::max_length(),"\n",
5145           "\n",
5146           "Execution of  will continue now, and it will try to read its input\n",
5147           "and run commands; if this is not what you wanted to happen, please\n",
5148           "press CTRL-D or CTRL-C\n");
5149 }
5150
5151 sub embed() {
5152     # Give an embeddable version of GNU  Parallel
5153     # Tested with: bash, zsh, ksh, ash, dash, sh
5154     my $randomstring = "cut-here-".join"",
5155         map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..20);
5156     if(not -f $0 or not -r $0) {
5157         ::error("--embed only works if parallel is a readable file");
5158         exit(255);
5159     }
5160     if(open(my $fh, "<", $0)) {
5161         # Read the source from $0
5162         my @source = <$fh>;
5163         my $user = $ENV{LOGNAME} || $ENV{USERNAME} || $ENV{USER};
5164         my @env_parallel_source = ();
5165         my $shell = $Global::shell;
5166         $shell =~ s:.*/::;
5167         for(which("env_parallel.$shell")) {
5168             -r $_ or next;
5169             # Read the source of env_parallel.shellname
5170             open(my $env_parallel_source_fh, $_) || die;
5171             @env_parallel_source = <$env_parallel_source_fh>;
5172             close $env_parallel_source_fh;
5173             last;
5174         }
5175         print "#!$Global::shell
5176
5177 # Copyright (C) 2007-2020 $user, Ole Tange, http://ole.tange.dk
5178 # and Free Software Foundation, Inc.
5179 #
5180 # This program is free software; you can redistribute it and/or modify
5181 # it under the terms of the GNU General Public License as published by
5182 # the Free Software Foundation; either version 3 of the License, or
5183 # (at your option) any later version.
5184 #
5185 # This program is distributed in the hope that it will be useful, but
5186 # WITHOUT ANY WARRANTY; without even the implied warranty of
5187 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
5188 # General Public License for more details.
5189 #
5190 # You should have received a copy of the GNU General Public License
5191 # along with this program; if not, see <https://www.gnu.org/licenses/>
5192 # or write to the Free Software Foundation, Inc., 51 Franklin St,
5193 # Fifth Floor, Boston, MA 02110-1301 USA
5194 ";
5195
5196         print q!
5197 # Embedded GNU  Parallel created with --embed
5198 parallel() {
5199     # Start GNU  Parallel without leaving temporary files
5200     #
5201     # Not all shells support 'perl <(cat ...)'
5202     # This is a complex way of doing:
5203     #   perl <(cat <<'cut-here'
5204     #     [...]
5205     #     ) "$@"
5206     # and also avoiding:
5207     #   [1]+  Done   cat
5208
5209     # Make a temporary fifo that perl can read from
5210     _fifo_with_GNU_Parallel_source=`perl -e 'use POSIX qw(mkfifo);
5211       do {
5212         $f = "/tmp/parallel-".join"",
5213           map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
5214       } while(-e $f);
5215       mkfifo($f,0600);
5216       print $f;'`
5217     # Put source code into temporary file
5218     # so it is easy to copy to the fifo
5219     _file_with_GNU_Parallel_source=`mktemp`;
5220 !,
5221         "cat <<'$randomstring' > \$_file_with_GNU_Parallel_source\n",
5222         @source,
5223         $randomstring,"\n",
5224         q!
5225     # Copy the source code from the file to the fifo
5226     # and remove the file and fifo ASAP
5227     # 'sh -c' is needed to avoid
5228     #   [1]+  Done   cat
5229     sh -c "(rm $_file_with_GNU_Parallel_source; cat >$_fifo_with_GNU_Parallel_source; rm $_fifo_with_GNU_Parallel_source) < $_file_with_GNU_Parallel_source &"
5230
5231     # Read the source from the fifo
5232     perl $_fifo_with_GNU_Parallel_source "$@"
5233 }
5234 !,
5235         @env_parallel_source,
5236         q!
5237
5238 # This will call the functions above
5239 parallel -k echo ::: Put your code here
5240 env_parallel --session
5241 env_parallel -k echo ::: Put your code here
5242 parset p,y,c,h -k echo ::: Put your code here
5243 echo $p $y $c $h
5244 !;
5245     } else {
5246         ::error("Cannot open $0");
5247         exit(255);
5248     }
5249     ::status("Redirect the output to a file and add your changes at the end:",
5250         "  $0 --embed > new_script");
5251 }
5252
5253
5254 sub __GENERIC_COMMON_FUNCTION__() {}
5255
5256
5257 sub mkdir_or_die($) {
5258     # If dir is not executable: die
5259     my $dir = shift;
5260     # The eval is needed to catch exception from mkdir
5261     eval { File::Path::mkpath($dir); };
5262     if(not -x $dir) {
5263         ::error("Cannot change into non-executable dir $dir: $!");
5264         ::wait_and_exit(255);
5265     }
5266 }
5267
5268 sub tmpfile(@) {
5269     # Create tempfile as $TMPDIR/parXXXXX
5270     # Returns:
5271     #   $filehandle = opened file handle
5272     #   $filename = file name created
5273     my($filehandle,$filename) =
5274         ::tempfile(DIR=>$ENV{'TMPDIR'}, TEMPLATE => 'parXXXXX', @_);
5275     if(wantarray) {
5276         return($filehandle,$filename);
5277     } else {
5278         # Separate unlink due to NFS dealing badly with File::Temp
5279         unlink $filename;
5280         return $filehandle;
5281     }
5282 }
5283
5284 sub tmpname($) {
5285     # Select a name that does not exist
5286     # Do not create the file as it may be used for creating a socket (by tmux)
5287     # Remember the name in $Global::unlink to avoid hitting the same name twice
5288     my $name = shift;
5289     my($tmpname);
5290     if(not -w $ENV{'TMPDIR'}) {
5291         if(not -e $ENV{'TMPDIR'}) {
5292             ::error("Tmpdir '$ENV{'TMPDIR'}' does not exist.","Try 'mkdir $ENV{'TMPDIR'}'");
5293         } else {
5294             ::error("Tmpdir '$ENV{'TMPDIR'}' is not writable.","Try 'chmod +w $ENV{'TMPDIR'}'");
5295         }
5296         ::wait_and_exit(255);
5297     }
5298     do {
5299         $tmpname = $ENV{'TMPDIR'}."/".$name.
5300             join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
5301     } while(-e $tmpname or $Global::unlink{$tmpname}++);
5302     return $tmpname;
5303 }
5304
5305 sub tmpfifo() {
5306     # Find an unused name and mkfifo on it
5307     my $tmpfifo = tmpname("fif");
5308     mkfifo($tmpfifo,0600);
5309     return $tmpfifo;
5310 }
5311
5312 sub rm(@) {
5313     # Remove file and remove it from %Global::unlink
5314     # Uses:
5315     #   %Global::unlink
5316     delete @Global::unlink{@_};
5317     unlink @_;
5318 }
5319
5320 sub size_of_block_dev() {
5321     # Like -s but for block devices
5322     # Input:
5323     #   $blockdev = file name of block device
5324     # Returns:
5325     #   $size = in bytes, undef if error
5326     my $blockdev = shift;
5327     if(open(my $fh, "<", $blockdev)) {
5328         seek($fh,0,2) || ::die_bug("cannot seek $blockdev");
5329         my $size = tell($fh);
5330         close $fh;
5331         return $size;
5332     } else {
5333         ::error("cannot open $blockdev");
5334         wait_and_exit(255);
5335     }
5336 }
5337
5338 sub qqx(@) {
5339     # Like qx but with clean environment (except for @keep)
5340     # and STDERR ignored
5341     # This is needed if the environment contains functions
5342     # that /bin/sh does not understand
5343     my %env;
5344     # ssh with ssh-agent needs PATH SSH_AUTH_SOCK SSH_AGENT_PID
5345     # ssh with Kerberos needs KRB5CCNAME
5346     # tmux needs LC_CTYPE
5347     # lsh needs HOME LOGNAME
5348     my @keep = qw(PATH SSH_AUTH_SOCK SSH_AGENT_PID KRB5CCNAME LC_CTYPE HOME LOGNAME);
5349     @env{@keep} = @ENV{@keep};
5350     local %ENV;
5351     %ENV = %env;
5352     if($Global::debug) {
5353         # && true is to force spawning a shell and not just exec'ing
5354         return qx{ @_ && true };
5355     } else {
5356         # CygWin does not respect 2>/dev/null
5357         # so we do that by hand
5358         # This trick does not work:
5359         # https://stackoverflow.com/q/13833088/363028
5360         # local *STDERR;
5361         # open(STDERR, ">", "/dev/null");
5362         open(local *CHILD_STDIN,  '<', '/dev/null') or die $!;
5363         open(local *CHILD_STDERR, '>', '/dev/null') or die $!;
5364         my $out;
5365         # eval is needed if open3 fails (e.g. command line too long)
5366         eval {
5367             my $pid = open3(
5368                 '<&CHILD_STDIN',
5369                 $out,
5370                 '>&CHILD_STDERR',
5371                 # && true is to force spawning a shell and not just exec'ing
5372                 "@_ && true");
5373             my @arr = <$out>;
5374             close $out;
5375             # Make sure $? is set
5376             waitpid($pid, 0);
5377             return wantarray ? @arr : join "",@arr;
5378         } or do {
5379             # If eval fails, force $?=false
5380             `false`;
5381         };
5382     }
5383 }
5384
5385 sub uniq(@) {
5386     # Remove duplicates and return unique values
5387     return keys %{{ map { $_ => 1 } @_ }};
5388 }
5389
5390 sub min(@) {
5391     # Returns:
5392     #   Minimum value of array
5393     my $min;
5394     for (@_) {
5395         # Skip undefs
5396         defined $_ or next;
5397         defined $min or do { $min = $_; next; }; # Set $_ to the first non-undef
5398         $min = ($min < $_) ? $min : $_;
5399     }
5400     return $min;
5401 }
5402
5403 sub max(@) {
5404     # Returns:
5405     #   Maximum value of array
5406     my $max;
5407     for (@_) {
5408         # Skip undefs
5409         defined $_ or next;
5410         defined $max or do { $max = $_; next; }; # Set $_ to the first non-undef
5411         $max = ($max > $_) ? $max : $_;
5412     }
5413     return $max;
5414 }
5415
5416 sub sum(@) {
5417     # Returns:
5418     #   Sum of values of array
5419     my @args = @_;
5420     my $sum = 0;
5421     for (@args) {
5422         # Skip undefs
5423         $_ and do { $sum += $_; }
5424     }
5425     return $sum;
5426 }
5427
5428 sub undef_as_zero($) {
5429     my $a = shift;
5430     return $a ? $a : 0;
5431 }
5432
5433 sub undef_as_empty($) {
5434     my $a = shift;
5435     return $a ? $a : "";
5436 }
5437
5438 sub undef_if_empty($) {
5439     if(defined($_[0]) and $_[0] eq "") {
5440         return undef;
5441     }
5442     return $_[0];
5443 }
5444
5445 sub multiply_binary_prefix(@) {
5446     # Evalualte numbers with binary prefix
5447     # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80
5448     # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80
5449     # K =2^10, M =2^20, G =2^30, T =2^40, P =2^50, E =2^70, Z =2^80, Y =2^80
5450     # k =10^3, m =10^6, g =10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24
5451     # 13G = 13*1024*1024*1024 = 13958643712
5452     # Input:
5453     #   $s = string with prefixes
5454     # Returns:
5455     #   $value = int with prefixes multiplied
5456     my @v = @_;
5457     for(@v) {
5458         defined $_ or next;
5459         s/ki/*1024/gi;
5460         s/mi/*1024*1024/gi;
5461         s/gi/*1024*1024*1024/gi;
5462         s/ti/*1024*1024*1024*1024/gi;
5463         s/pi/*1024*1024*1024*1024*1024/gi;
5464         s/ei/*1024*1024*1024*1024*1024*1024/gi;
5465         s/zi/*1024*1024*1024*1024*1024*1024*1024/gi;
5466         s/yi/*1024*1024*1024*1024*1024*1024*1024*1024/gi;
5467         s/xi/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi;
5468
5469         s/K/*1024/g;
5470         s/M/*1024*1024/g;
5471         s/G/*1024*1024*1024/g;
5472         s/T/*1024*1024*1024*1024/g;
5473         s/P/*1024*1024*1024*1024*1024/g;
5474         s/E/*1024*1024*1024*1024*1024*1024/g;
5475         s/Z/*1024*1024*1024*1024*1024*1024*1024/g;
5476         s/Y/*1024*1024*1024*1024*1024*1024*1024*1024/g;
5477         s/X/*1024*1024*1024*1024*1024*1024*1024*1024*1024/g;
5478
5479         s/k/*1000/g;
5480         s/m/*1000*1000/g;
5481         s/g/*1000*1000*1000/g;
5482         s/t/*1000*1000*1000*1000/g;
5483         s/p/*1000*1000*1000*1000*1000/g;
5484         s/e/*1000*1000*1000*1000*1000*1000/g;
5485         s/z/*1000*1000*1000*1000*1000*1000*1000/g;
5486         s/y/*1000*1000*1000*1000*1000*1000*1000*1000/g;
5487         s/x/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g;
5488
5489         $_ = eval $_;
5490     }
5491     return wantarray ? @v : $v[0];
5492 }
5493
5494 sub multiply_time_units($) {
5495     # Evalualte numbers with time units
5496     # s=1, m=60, h=3600, d=86400
5497     # Input:
5498     #   $s = string time units
5499     # Returns:
5500     #   $value = int in seconds
5501     my @v = @_;
5502     for(@v) {
5503         defined $_ or next;
5504         if(/[dhms]/i) {
5505             s/s/*1+/gi;
5506             s/m/*60+/gi;
5507             s/h/*3600+/gi;
5508             s/d/*86400+/gi;
5509             $_ = eval $_."0";
5510         }
5511     }
5512     return wantarray ? @v : $v[0];
5513 }
5514
5515 sub seconds_to_time_units() {
5516     # Convert seconds into ??d??h??m??s
5517     # s=1, m=60, h=3600, d=86400
5518     # Input:
5519     #   $s = int in seconds
5520     # Returns:
5521     #   $str = string time units
5522     my $s = shift;
5523     my $str;
5524     my $d = int($s/86400);
5525     $s -= $d * 86400;
5526     my $h = int($s/3600);
5527     $s -= $h * 3600;
5528     my $m = int($s/60);
5529     $s -= $m * 60;
5530     if($d) {
5531         $str = sprintf("%dd%02dh%02dm%02ds",$d,$h,$m,$s);
5532     } elsif($h) {
5533         $str = sprintf("%dh%02dm%02ds",$h,$m,$s);
5534     } elsif($m) {
5535         $str = sprintf("%dm%02ds",$m,$s);
5536     } else {
5537         $str = sprintf("%ds",$s);
5538     }
5539     return $str;
5540 }
5541
5542 {
5543     my ($disk_full_fh, $b8193, $error_printed);
5544     sub exit_if_disk_full() {
5545         # Checks if $TMPDIR is full by writing 8kb to a tmpfile
5546         # If the disk is full: Exit immediately.
5547         # Returns:
5548         #   N/A
5549         if(not $disk_full_fh) {
5550             $disk_full_fh = ::tmpfile(SUFFIX => ".df");
5551             $b8193 = "b"x8193;
5552         }
5553         # Linux does not discover if a disk is full if writing <= 8192
5554         # Tested on:
5555         # bfs btrfs cramfs ext2 ext3 ext4 ext4dev jffs2 jfs minix msdos
5556         # ntfs reiserfs tmpfs ubifs vfat xfs
5557         # TODO this should be tested on different OS similar to this:
5558         #
5559         # doit() {
5560         #   sudo mount /dev/ram0 /mnt/loop; sudo chmod 1777 /mnt/loop
5561         #   seq 100000 | parallel --tmpdir /mnt/loop/ true &
5562         #   seq 6900000 > /mnt/loop/i && echo seq OK
5563         #   seq 6980868 > /mnt/loop/i
5564         #   seq 10000 > /mnt/loop/ii
5565         #   sleep 3
5566         #   sudo umount /mnt/loop/ || sudo umount -l /mnt/loop/
5567         #   echo >&2
5568         # }
5569         print $disk_full_fh $b8193;
5570         if(not $disk_full_fh
5571            or
5572            tell $disk_full_fh != 8193) {
5573             # On raspbian the disk can be full except for 10 chars.
5574             if(not $error_printed) {
5575                 ::error("Output is incomplete.",
5576                         "Cannot append to buffer file in $ENV{'TMPDIR'}.",
5577                         "Is the disk full?",
5578                         "Change \$TMPDIR with --tmpdir or use --compress.");
5579                 $error_printed = 1;
5580             }
5581             ::wait_and_exit(255);
5582         }
5583         truncate $disk_full_fh, 0;
5584         seek($disk_full_fh, 0, 0) || die;
5585     }
5586 }
5587
5588 sub spacefree($$) {
5589     # Remove comments and spaces
5590     # Inputs:
5591     #   $spaces = keep 1 space?
5592     #   $s = string to remove spaces from
5593     # Returns:
5594     #   $s = with spaces removed
5595     my $spaces = shift;
5596     my $s = shift;
5597     $s =~ s/#.*//mg;
5598     if(1 == $spaces) {
5599         $s =~ s/\s+/ /mg;
5600     } elsif(2 == $spaces) {
5601         # Keep newlines
5602         $s =~ s/\n\n+/\n/sg;
5603         $s =~ s/[ \t]+/ /mg;
5604     } elsif(3 == $spaces) {
5605         # Keep perl code required space
5606         $s =~ s{([^a-zA-Z0-9/])\s+}{$1}sg;
5607         $s =~ s{([a-zA-Z0-9/])\s+([^:a-zA-Z0-9/])}{$1$2}sg;
5608     } else {
5609         $s =~ s/\s//mg;
5610     }
5611     return $s;
5612 }
5613
5614 {
5615     my $hostname;
5616     sub hostname() {
5617         local $/ = "\n";
5618         if(not $hostname) {
5619             $hostname = `hostname`;
5620             chomp($hostname);
5621             $hostname ||= "nohostname";
5622         }
5623         return $hostname;
5624     }
5625 }
5626
5627 sub which(@) {
5628     # Input:
5629     #   @programs = programs to find the path to
5630     # Returns:
5631     #   @full_path = full paths to @programs. Nothing if not found
5632     my @which;
5633     for my $prg (@_) {
5634         push(@which, grep { not -d $_ and -x $_ }
5635              map { $_."/".$prg } split(":",$ENV{'PATH'}));
5636         if($prg =~ m:/:) {
5637             # Including path
5638             push(@which, grep { not -d $_ and -x $_ } $prg);
5639         }
5640     }
5641     ::debug("which", "$which[0] in $ENV{'PATH'}\n");
5642     return wantarray ? @which : $which[0];
5643 }
5644
5645 {
5646     my ($regexp,$shell,%fakename);
5647
5648     sub parent_shell {
5649         # Input:
5650         #   $pid = pid to see if (grand)*parent is a shell
5651         # Returns:
5652         #   $shellpath = path to shell - undef if no shell found
5653         my $pid = shift;
5654         ::debug("init","Parent of $pid\n");
5655         if(not $regexp) {
5656             # All shells known to mankind
5657             #
5658             # ash bash csh dash fdsh fish fizsh ksh ksh93 mksh pdksh
5659             # posh rbash rc rush rzsh sash sh static-sh tcsh yash zsh
5660
5661             my @shells = (qw(ash bash bsd-csh csh dash fdsh fish fizsh
5662             ksh ksh93 lksh mksh pdksh posh rbash rc rush rzsh sash sh
5663             static-sh tcsh yash zsh -sh -csh -bash),
5664                           '-sh (sh)' # sh on FreeBSD
5665                 );
5666             # Can be formatted as:
5667             #   [sh]  -sh  sh  busybox sh  -sh (sh)
5668             #   /bin/sh /sbin/sh /opt/csw/sh
5669             # But not: foo.sh sshd crash flush pdflush scosh fsflush ssh
5670             $shell = "(?:".join("|",map { "\Q$_\E" } @shells).")";
5671             $regexp = '^((\[)(-?)('. $shell. ')(\])|(|\S+/|busybox )'.
5672                 '(-?)('. $shell. '))( *$| [^(])';
5673             %fakename = (
5674                 # sh disguises itself as -sh (sh) on FreeBSD
5675                 "-sh (sh)" => ["sh"],
5676                 # csh and tcsh disguise themselves as -sh/-csh
5677                 # E.g.: ssh -tt csh@lo 'ps aux;true' |egrep ^csh
5678                 # but sh also disguises itself as -sh
5679                 # (TODO When does that happen?)
5680                 "-sh" => ["sh"],
5681                 "-csh" => ["tcsh", "csh"],
5682                 # ash disguises itself as -ash
5683                 "-ash" => ["ash", "dash", "sh"],
5684                 # dash disguises itself as -dash
5685                 "-dash" => ["dash", "ash", "sh"],
5686                 # bash disguises itself as -bash
5687                 "-bash" => ["bash", "sh"],
5688                 # ksh disguises itself as -ksh
5689                 "-ksh" => ["ksh", "sh"],
5690                 # zsh disguises itself as -zsh
5691                 "-zsh" => ["zsh", "sh"],
5692                 );
5693         }
5694         if($^O eq "linux") {
5695             # Optimized for GNU/Linux
5696             my $testpid = $pid;
5697             my $shellpath;
5698             my $shellline;
5699             while($testpid) {
5700                 if(open(my $fd, "<", "/proc/$testpid/cmdline")) {
5701                     local $/="\0";
5702                     chomp($shellline = <$fd>);
5703                     if($shellline =~ /$regexp/o) {
5704                         my $shellname = $4 || $8;
5705                         my $dash = $3 || $7;
5706                         if($shellname eq "sh" and $dash) {
5707                             # -sh => csh or sh
5708                             if($shellpath = readlink "/proc/$testpid/exe") {
5709                                 ::debug("init","procpath $shellpath\n");
5710                                 if($shellpath =~ m:/$shell$:o) {
5711                                     ::debug("init", "proc which ".$shellpath." => ");
5712                                     return $shellpath;
5713                                 }
5714                             }
5715                         }
5716                         ::debug("init", "which ".$shellname." => ");
5717                         $shellpath = (which($shellname,@{$fakename{$shellname}}))[0];
5718                         ::debug("init", "shell path $shellpath\n");
5719                         return $shellpath;
5720                     }
5721                 }
5722                 # Get parent pid
5723                 if(open(my $fd, "<", "/proc/$testpid/stat")) {
5724                     my $line = <$fd>;
5725                     close $fd;
5726                     # Parent pid is field 4
5727                     $testpid = (split /\s+/, $line)[3];
5728                 } else {
5729                     # Something is wrong: fall back to old method
5730                     last;
5731                 }
5732             }
5733         }
5734         # if -sh or -csh try readlink /proc/$$/exe
5735         my ($children_of_ref, $parent_of_ref, $name_of_ref) = pid_table();
5736         my $shellpath;
5737         my $testpid = $pid;
5738         while($testpid) {
5739             if($name_of_ref->{$testpid} =~ /$regexp/o) {
5740                 my $shellname = $4 || $8;
5741                 my $dash = $3 || $7;
5742                 if($shellname eq "sh" and $dash) {
5743                     # -sh => csh or sh
5744                     if($shellpath = readlink "/proc/$testpid/exe") {
5745                         ::debug("init","procpath $shellpath\n");
5746                         if($shellpath =~ m:/$shell$:o) {
5747                             ::debug("init", "proc which ".$shellpath." => ");
5748                             return $shellpath;
5749                         }
5750                     }
5751                 }
5752                 ::debug("init", "which ".$shellname." => ");
5753                 $shellpath = (which($shellname,@{$fakename{$shellname}}))[0];
5754                 ::debug("init", "shell path $shellpath\n");
5755                 $shellpath and last;
5756             }
5757             if($testpid == $parent_of_ref->{$testpid}) {
5758                 # In Solaris zones, the PPID of the zsched process is itself
5759                 last;
5760             }
5761             $testpid = $parent_of_ref->{$testpid};
5762         }
5763         return $shellpath;
5764     }
5765 }
5766
5767 {
5768     my %pid_parentpid_cmd;
5769
5770     sub pid_table() {
5771         # Returns:
5772         #   %children_of = { pid -> children of pid }
5773         #   %parent_of = { pid -> pid of parent }
5774         #   %name_of = { pid -> commandname }
5775
5776         if(not %pid_parentpid_cmd) {
5777             # Filter for SysV-style `ps`
5778             my $sysv = q( ps -ef | perl -ane '1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;).
5779                 q(s/^.{$s}//; print "@F[1,2] $_"' );
5780             # Minix uses cols 2,3 and can have newlines in the command
5781             # so lines not having numbers in cols 2,3 must be ignored
5782             my $minix = q( ps -ef | perl -ane '1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;).
5783                 q(s/^.{$s}// and $F[2]>0 and $F[3]>0 and print "@F[2,3] $_"' );
5784             # BSD-style `ps`
5785             my $bsd = q(ps -o pid,ppid,command -ax);
5786             %pid_parentpid_cmd =
5787             (
5788              'aix' => $sysv,
5789              'android' => $sysv,
5790              'cygwin' => $sysv,
5791              'darwin' => $bsd,
5792              'dec_osf' => $sysv,
5793              'dragonfly' => $bsd,
5794              'freebsd' => $bsd,
5795              'gnu' => $sysv,
5796              'hpux' => $sysv,
5797              'linux' => $sysv,
5798              'mirbsd' => $bsd,
5799              'minix' => $minix,
5800              'msys' => $sysv,
5801              'MSWin32' => $sysv,
5802              'netbsd' => $bsd,
5803              'nto' => $sysv,
5804              'openbsd' => $bsd,
5805              'solaris' => $sysv,
5806              'svr5' => $sysv,
5807              'syllable' => "echo ps not supported",
5808             );
5809         }
5810         $pid_parentpid_cmd{$^O} or ::die_bug("pid_parentpid_cmd for $^O missing");
5811
5812         my (@pidtable,%parent_of,%children_of,%name_of);
5813         # Table with pid -> children of pid
5814         @pidtable = `$pid_parentpid_cmd{$^O}`;
5815         my $p=$$;
5816         for (@pidtable) {
5817             # must match: 24436 21224 busybox ash
5818             # must match: 24436 21224 <<empty on MacOSX running cubase>>
5819             # must match: 24436 21224 <<empty on system running Viber>>
5820             #   or: perl -e 'while($0=" "){}'
5821             if(/^\s*(\S+)\s+(\S+)\s+(\S+.*)/
5822                or
5823                /^\s*(\S+)\s+(\S+)\s+()$/) {
5824                 $parent_of{$1} = $2;
5825                 push @{$children_of{$2}}, $1;
5826                 $name_of{$1} = $3;
5827             } else {
5828                 ::die_bug("pidtable format: $_");
5829             }
5830         }
5831         return(\%children_of, \%parent_of, \%name_of);
5832     }
5833 }
5834
5835 sub now() {
5836     # Returns time since epoch as in seconds with 3 decimals
5837     # Uses:
5838     #   @Global::use
5839     # Returns:
5840     #   $time = time now with millisecond accuracy
5841     if(not $Global::use{"Time::HiRes"}) {
5842         if(eval "use Time::HiRes qw ( time );") {
5843             eval "sub TimeHiRestime { return Time::HiRes::time };";
5844         } else {
5845             eval "sub TimeHiRestime { return time() };";
5846         }
5847         $Global::use{"Time::HiRes"} = 1;
5848     }
5849
5850     return (int(TimeHiRestime()*1000))/1000;
5851 }
5852
5853 sub usleep($) {
5854     # Sleep this many milliseconds.
5855     # Input:
5856     #   $ms = milliseconds to sleep
5857     my $ms = shift;
5858     ::debug("timing",int($ms),"ms ");
5859     select(undef, undef, undef, $ms/1000);
5860 }
5861
5862 sub __KILLER_REAPER__() {}
5863
5864 sub reap_usleep() {
5865     # Reap dead children.
5866     # If no dead children: Sleep specified amount with exponential backoff
5867     # Input:
5868     #   $ms = milliseconds to sleep
5869     # Returns:
5870     #   $ms/2+0.001 if children reaped
5871     #   $ms*1.1 if no children reaped
5872     my $ms = shift;
5873     if(reapers()) {
5874         if(not $Global::total_completed % 100) {
5875             if($opt::timeout) {
5876                 # Force cleaning the timeout queue for every 100 jobs
5877                 # Fixes potential memleak
5878                 $Global::timeoutq->process_timeouts();
5879             }
5880         }
5881         # Sleep exponentially shorter (1/2^n) if a job finished
5882         return $ms/2+0.001;
5883     } else {
5884         if($opt::timeout) {
5885             $Global::timeoutq->process_timeouts();
5886         }
5887         if($opt::memfree) {
5888             kill_youngster_if_not_enough_mem($opt::memfree*0.5);
5889         }
5890         if($opt::memsuspend) {
5891             kill_youngster_if_not_enough_mem($opt::memsuspend*0.5);
5892         }
5893         if($opt::limit) {
5894             kill_youngest_if_over_limit();
5895         }
5896         exit_if_disk_full();
5897         if($opt::linebuffer) {
5898             my $something_printed = 0;
5899             if($opt::keeporder) {
5900                 for my $job (values %Global::running) {
5901                     $something_printed += $job->print_earlier_jobs();
5902                 }
5903             } else {
5904                 for my $job (values %Global::running) {
5905                     $something_printed += $job->print();
5906                 }
5907             }
5908             if($something_printed) {
5909                 $ms = $ms/2+0.001;
5910             }
5911         }
5912         if($ms > 0.002) {
5913             # When a child dies, wake up from sleep (or select(,,,))
5914             $SIG{CHLD} = sub { kill "ALRM", $$ };
5915             if($opt::delay) {
5916                 # The 0.004s is approximately the time it takes for one round
5917                 usleep(1000*($Global::newest_starttime +
5918                              $opt::delay - 0.004 - ::now()));
5919             } else {
5920                 usleep($ms);
5921             }
5922             # --compress needs $SIG{CHLD} unset
5923             $SIG{CHLD} = 'DEFAULT';
5924         }
5925         # Sleep exponentially longer (1.1^n) if a job did not finish,
5926         # though at most 1000 ms.
5927         return (($ms < 1000) ? ($ms * 1.1) : ($ms));
5928     }
5929 }
5930
5931 sub kill_youngest_if_over_limit() {
5932     # Check each $sshlogin we are over limit
5933     # If over limit: kill off the youngest child
5934     # Put the child back in the queue.
5935     # Uses:
5936     #   %Global::running
5937     my %jobs_of;
5938     my @sshlogins;
5939
5940     for my $job (values %Global::running) {
5941         if(not $jobs_of{$job->sshlogin()}) {
5942             push @sshlogins, $job->sshlogin();
5943         }
5944         push @{$jobs_of{$job->sshlogin()}}, $job;
5945     }
5946     for my $sshlogin (@sshlogins) {
5947         for my $job (sort { $b->seq() <=> $a->seq() } @{$jobs_of{$sshlogin}}) {
5948             if($sshlogin->limit() == 2) {
5949                 $job->kill();
5950                 last;
5951             }
5952         }
5953     }
5954 }
5955
5956 sub kill_youngster_if_not_enough_mem() {
5957     # Check each $sshlogin if there is enough mem.
5958     # If less than 50% enough free mem: kill off the youngest child
5959     # Put the child back in the queue.
5960     # Uses:
5961     #   %Global::running
5962     my $limit = shift;
5963     my %jobs_of;
5964     my @sshlogins;
5965
5966     for my $job (values %Global::running) {
5967         if(not $jobs_of{$job->sshlogin()}) {
5968             push @sshlogins, $job->sshlogin();
5969         }
5970         push @{$jobs_of{$job->sshlogin()}}, $job;
5971     }
5972     for my $sshlogin (@sshlogins) {
5973         for my $job (sort { $b->seq() <=> $a->seq() } @{$jobs_of{$sshlogin}}) {
5974             if($sshlogin->memfree() < $limit) {
5975                 ::debug("mem","\n",map { $_->seq()." " }
5976                         (sort { $b->seq() <=> $a->seq() }
5977                          @{$jobs_of{$sshlogin}}));
5978                 ::debug("mem","\n", $job->seq(), "killed ",
5979                         $sshlogin->memfree()," < ",$limit);
5980                 if($opt::memsuspend) {
5981                     $job->suspend();
5982                 } else {
5983                     $job->kill();
5984                 }
5985                 $sshlogin->memfree_recompute();
5986             } else {
5987                 last;
5988             }
5989         }
5990         ::debug("mem","Free mem OK? ",
5991                 $sshlogin->memfree()," > ",$limit);
5992     }
5993 }
5994
5995
5996 sub __DEBUGGING__() {}
5997
5998
5999 sub debug(@) {
6000     # Uses:
6001     #   $Global::debug
6002     #   %Global::fd
6003     # Returns: N/A
6004     $Global::debug or return;
6005     @_ = grep { defined $_ ? $_ : "" } @_;
6006     if($Global::debug eq "all" or $Global::debug eq $_[0]) {
6007         if($Global::fd{1}) {
6008             # Original stdout was saved
6009             my $stdout = $Global::fd{1};
6010             print $stdout @_[1..$#_];
6011         } else {
6012             print @_[1..$#_];
6013         }
6014     }
6015 }
6016
6017 sub my_memory_usage() {
6018     # Returns:
6019     #   memory usage if found
6020     #   0 otherwise
6021     use strict;
6022     use FileHandle;
6023
6024     local $/ = "\n";
6025     my $pid = $$;
6026     if(-e "/proc/$pid/stat") {
6027         my $fh = FileHandle->new("</proc/$pid/stat");
6028
6029         my $data = <$fh>;
6030         chomp $data;
6031         $fh->close;
6032
6033         my @procinfo = split(/\s+/,$data);
6034
6035         return undef_as_zero($procinfo[22]);
6036     } else {
6037         return 0;
6038     }
6039 }
6040
6041 sub my_size() {
6042     # Returns:
6043     #   $size = size of object if Devel::Size is installed
6044     #   -1 otherwise
6045     my @size_this = (@_);
6046     eval "use Devel::Size qw(size total_size)";
6047     if ($@) {
6048         return -1;
6049     } else {
6050         return total_size(@_);
6051     }
6052 }
6053
6054 sub my_dump(@) {
6055     # Returns:
6056     #   ascii expression of object if Data::Dump(er) is installed
6057     #   error code otherwise
6058     my @dump_this = (@_);
6059     eval "use Data::Dump qw(dump);";
6060     if ($@) {
6061         # Data::Dump not installed
6062         eval "use Data::Dumper;";
6063         if ($@) {
6064             my $err =  "Neither Data::Dump nor Data::Dumper is installed\n".
6065                 "Not dumping output\n";
6066             ::status($err);
6067             return $err;
6068         } else {
6069             return Dumper(@dump_this);
6070         }
6071     } else {
6072         # Create a dummy Data::Dump:dump as Hans Schou sometimes has
6073         # it undefined
6074         eval "sub Data::Dump:dump {}";
6075         eval "use Data::Dump qw(dump);";
6076         return (Data::Dump::dump(@dump_this));
6077     }
6078 }
6079
6080 sub my_croak(@) {
6081     eval "use Carp; 1";
6082     $Carp::Verbose = 1;
6083     croak(@_);
6084 }
6085
6086 sub my_carp() {
6087     eval "use Carp; 1";
6088     $Carp::Verbose = 1;
6089     carp(@_);
6090 }
6091
6092
6093 sub __OBJECT_ORIENTED_PARTS__() {}
6094
6095
6096 package SSHLogin;
6097
6098 sub new($$) {
6099     my $class = shift;
6100     my $sshlogin_string = shift;
6101     my $ncpus;
6102     my %hostgroups;
6103     # SSHLogins can have these formats:
6104     #   @grp+grp/ncpu//usr/bin/ssh user@server
6105     #   ncpu//usr/bin/ssh user@server
6106     #   /usr/bin/ssh user@server
6107     #   user@server
6108     #   ncpu/user@server
6109     #   @grp+grp/user@server
6110     if($sshlogin_string =~ s:^\@([^/]+)/?::) {
6111         # Look for SSHLogin hostgroups
6112         %hostgroups = map { $_ => 1 } split(/\+/, $1);
6113     }
6114     # An SSHLogin is always in the hostgroup of its "numcpu/host"
6115     $hostgroups{$sshlogin_string} = 1;
6116     if ($sshlogin_string =~ s:^(\d+)/::) {
6117         # Override default autodetected ncpus unless missing
6118         $ncpus = $1;
6119     }
6120     my $string = $sshlogin_string;
6121     # An SSHLogin is always in the hostgroup of its $string-name
6122     $hostgroups{$string} = 1;
6123     @Global::hostgroups{keys %hostgroups} = values %hostgroups;
6124     my @unget = ();
6125     my $no_slash_string = $string;
6126     $no_slash_string =~ s/[^-a-z0-9:]/_/gi;
6127     return bless {
6128         'string' => $string,
6129         'jobs_running' => 0,
6130         'jobs_completed' => 0,
6131         'maxlength' => undef,
6132         'max_jobs_running' => undef,
6133         'orig_max_jobs_running' => undef,
6134         'ncpus' => $ncpus,
6135         'hostgroups' => \%hostgroups,
6136         'sshcommand' => undef,
6137         'serverlogin' => undef,
6138         'control_path_dir' => undef,
6139         'control_path' => undef,
6140         'time_to_login' => undef,
6141         'last_login_at' => undef,
6142         'loadavg_file' => $Global::cache_dir . "/tmp/sshlogin/" .
6143             $no_slash_string . "/loadavg",
6144         'loadavg' => undef,
6145         'last_loadavg_update' => 0,
6146         'swap_activity_file' => $Global::cache_dir . "/tmp/sshlogin/" .
6147             $no_slash_string . "/swap_activity",
6148         'swap_activity' => undef,
6149     }, ref($class) || $class;
6150 }
6151
6152 sub DESTROY($) {
6153     my $self = shift;
6154     # Remove temporary files if they are created.
6155     ::rm($self->{'loadavg_file'});
6156     ::rm($self->{'swap_activity_file'});
6157 }
6158
6159 sub string($) {
6160     my $self = shift;
6161     return $self->{'string'};
6162 }
6163
6164 sub jobs_running($) {
6165     my $self = shift;
6166     return ($self->{'jobs_running'} || "0");
6167 }
6168
6169 sub inc_jobs_running($) {
6170     my $self = shift;
6171     $self->{'jobs_running'}++;
6172 }
6173
6174 sub dec_jobs_running($) {
6175     my $self = shift;
6176     $self->{'jobs_running'}--;
6177 }
6178
6179 sub set_maxlength($$) {
6180     my $self = shift;
6181     $self->{'maxlength'} = shift;
6182 }
6183
6184 sub maxlength($) {
6185     my $self = shift;
6186     return $self->{'maxlength'};
6187 }
6188
6189 sub jobs_completed() {
6190     my $self = shift;
6191     return $self->{'jobs_completed'};
6192 }
6193
6194 sub in_hostgroups() {
6195     # Input:
6196     #   @hostgroups = the hostgroups to look for
6197     # Returns:
6198     #   true if intersection of @hostgroups and the hostgroups of this
6199     #        SSHLogin is non-empty
6200     my $self = shift;
6201     return grep { defined $self->{'hostgroups'}{$_} } @_;
6202 }
6203
6204 sub hostgroups() {
6205     my $self = shift;
6206     return keys %{$self->{'hostgroups'}};
6207 }
6208
6209 sub inc_jobs_completed($) {
6210     my $self = shift;
6211     $self->{'jobs_completed'}++;
6212     $Global::total_completed++;
6213 }
6214
6215 sub set_max_jobs_running($$) {
6216     my $self = shift;
6217     if(defined $self->{'max_jobs_running'}) {
6218         $Global::max_jobs_running -= $self->{'max_jobs_running'};
6219     }
6220     $self->{'max_jobs_running'} = shift;
6221     if(defined $self->{'max_jobs_running'}) {
6222         # max_jobs_running could be resat if -j is a changed file
6223         $Global::max_jobs_running += $self->{'max_jobs_running'};
6224     }
6225     # Initialize orig to the first non-zero value that comes around
6226     $self->{'orig_max_jobs_running'} ||= $self->{'max_jobs_running'};
6227 }
6228
6229 sub memfree() {
6230     # Returns:
6231     #   $memfree in bytes
6232     my $self = shift;
6233     $self->memfree_recompute();
6234     # Return 1 if not defined.
6235     return (not defined $self->{'memfree'} or $self->{'memfree'})
6236 }
6237
6238 sub memfree_recompute() {
6239     my $self = shift;
6240     my $script = memfreescript();
6241
6242     # TODO add sshlogin and backgrounding
6243     # Run the script twice if it gives 0 (typically intermittent error)
6244     $self->{'memfree'} = ::qqx($script) || ::qqx($script);
6245     if(not $self->{'memfree'}) {
6246         ::die_bug("Less than 1 byte memory free");
6247     }
6248     #::debug("mem","New free:",$self->{'memfree'}," ");
6249 }
6250
6251 {
6252     my $script;
6253
6254     sub memfreescript() {
6255         # Returns:
6256         #   shellscript for giving available memory in bytes
6257         if(not $script) {
6258             my %script_of = (
6259                 # /proc/meminfo
6260                 # MemFree:          7012 kB
6261                 # Buffers:         19876 kB
6262                 # Cached:         431192 kB
6263                 # SwapCached:          0 kB
6264                 "linux" => (
6265                     q{
6266                         print 1024 * qx{
6267                         awk '/^((Swap)?Cached|MemFree|Buffers):/
6268                         { sum += \$2} END { print sum }'
6269                             /proc/meminfo }
6270                     }),
6271                 # Android uses same code as GNU/Linux
6272                 "android" => (
6273                     q{
6274                         print 1024 * qx{
6275                         awk '/^((Swap)?Cached|MemFree|Buffers):/
6276                         { sum += \$2} END { print sum }'
6277                             /proc/meminfo }
6278                     }),
6279                 # $ vmstat 1 1
6280                 #   procs         memory         page                   faults       cpu
6281                 # r  b  w    avm    free re at  pi po  fr de  sr   in   sy  cs  us sy id
6282                 # 1  0  0 242793  389737  5  1   0  0   0  0   0  107  978  60   1  1 99
6283                 "hpux" => (
6284                     q{
6285                         print (((reverse `vmstat 1 1`)[0]
6286                             =~ /(?:\d+\D+){4}(\d+)/)[0]*1024)
6287                     }),
6288                 # $ vmstat 1 2
6289                 # kthr      memory            page            disk          faults      cpu
6290                 # r b w   swap  free  re  mf pi po fr de sr s3 s4 -- --   in   sy   cs us sy id
6291                 # 0 0 0 6496720 5170320 68 260 8 2  1  0  0 -0  3  0  0  309 1371  255  1  2 97
6292                 # 0 0 0 6434088 5072656  7  15 8 0  0  0  0  0 261 0  0 1889 1899 3222  0  8 92
6293                 #
6294                 # The second free value is correct
6295                 "solaris" => (
6296                     q{
6297                         print (((reverse `vmstat 1 2`)[0]
6298                                 =~ /(?:\d+\D+){4}(\d+)/)[0]*1024)
6299                     }),
6300                 # hw.pagesize: 4096
6301                 # vm.stats.vm.v_cache_count: 0
6302                 # vm.stats.vm.v_inactive_count: 79574
6303                 # vm.stats.vm.v_free_count: 4507
6304                 "freebsd" => (
6305                     q{
6306                         for(qx{/sbin/sysctl -a}) {
6307                             if (/^([^:]+):\s+(.+)\s*$/s) {
6308                                 $sysctl->{$1} = $2;
6309                             }
6310                         }
6311                         print $sysctl->{"hw.pagesize"} *
6312                             ($sysctl->{"vm.stats.vm.v_cache_count"}
6313                              + $sysctl->{"vm.stats.vm.v_inactive_count"}
6314                              + $sysctl->{"vm.stats.vm.v_free_count"});
6315                     }),
6316                 # Mach Virtual Memory Statistics: (page size of 4096 bytes)
6317                 # Pages free:                         198061.
6318                 # Pages active:                       159701.
6319                 # Pages inactive:                      47378.
6320                 # Pages speculative:                   29707.
6321                 # Pages wired down:                    89231.
6322                 # "Translation faults":            928901425.
6323                 # Pages copy-on-write:             156988239.
6324                 # Pages zero filled:               271267894.
6325                 # Pages reactivated:                   48895.
6326                 # Pageins:                           1798068.
6327                 # Pageouts:                              257.
6328                 # Object cache: 6603 hits of 1713223 lookups (0% hit rate)
6329                 'darwin' => (
6330                     q{
6331                         $vm = `vm_stat`;
6332                         print (($vm =~ /page size of (\d+)/)[0] *
6333                                (($vm =~ /Pages free:\s+(\d+)/)[0] +
6334                                 ($vm =~ /Pages inactive:\s+(\d+)/)[0]));
6335                     }),
6336                 );
6337             my $perlscript = "";
6338             # Make a perl script that detects the OS ($^O) and runs
6339             # the appropriate command
6340             for my $os (keys %script_of) {
6341                 $perlscript .= 'if($^O eq "'.$os.'") { '.$script_of{$os}.'}';
6342             }
6343             $script = "perl -e " . ::Q(::spacefree(1,$perlscript));
6344         }
6345         return $script;
6346     }
6347 }
6348
6349 sub limit($) {
6350     # Returns:
6351     #  0 = Below limit. Start another job.
6352     #  1 = Over limit. Start no jobs.
6353     #  2 = Kill youngest job
6354     my $self = shift;
6355
6356     if(not defined $self->{'limitscript'}) {
6357         my %limitscripts =
6358             ("io" => q!
6359              io() {
6360                  limit=$1;
6361                  io_file=$2;
6362                  # Do the measurement in the background
6363                  (tmp=$(tempfile);
6364                  LANG=C iostat -x 1 2 > $tmp;
6365                  mv $tmp $io_file) &
6366                  perl -e '-e $ARGV[0] or exit(1);
6367                    for(reverse <>) {
6368                      /Device/ and last;
6369                      /(\S+)$/ and $max = $max > $1 ? $max : $1; }
6370                    exit ($max < '$limit')' $io_file;
6371              };
6372              export -f io;
6373              io %s %s
6374              !,
6375              "mem" => q!
6376              mem() {
6377                  limit=$1;
6378                  awk '/^((Swap)?Cached|MemFree|Buffers):/{ sum += $2}
6379                       END {
6380                          if (sum*1024 < '$limit'/2) { exit 2; }
6381                          else { exit (sum*1024 < '$limit') }
6382                       }' /proc/meminfo;
6383              };
6384              export -f mem;
6385              mem %s;
6386              !,
6387              "load" => q!
6388              load() {
6389                  limit=$1;
6390                  ps ax -o state,command |
6391                      grep -E '^[DOR].[^[]' |
6392                      wc -l |
6393                  perl -ne 'exit ('$limit' < $_)';
6394              };
6395              export -f load;
6396              load %s;
6397              !,
6398             );
6399         my ($cmd,@args) = split /\s+/,$opt::limit;
6400         if($limitscripts{$cmd}) {
6401             my $tmpfile = ::tmpname("parlmt");
6402             ++$Global::unlink{$tmpfile};
6403             $self->{'limitscript'} =
6404             ::spacefree(1, sprintf($limitscripts{$cmd},
6405                                    ::multiply_binary_prefix(@args),$tmpfile));
6406         } else {
6407             $self->{'limitscript'} = $opt::limit;
6408         }
6409     }
6410
6411     my %env = %ENV;
6412     local %ENV = %env;
6413     $ENV{'SSHLOGIN'} = $self->string();
6414     system($Global::shell,"-c",$self->{'limitscript'});
6415     ::debug("limit","limit `".$self->{'limitscript'}."` result ".($?>>8)."\n");
6416     return $?>>8;
6417 }
6418
6419
6420 sub swapping($) {
6421     my $self = shift;
6422     my $swapping = $self->swap_activity();
6423     return (not defined $swapping or $swapping)
6424 }
6425
6426 sub swap_activity($) {
6427     # If the currently known swap activity is too old:
6428     #   Recompute a new one in the background
6429     # Returns:
6430     #   last swap activity computed
6431     my $self = shift;
6432     # Should we update the swap_activity file?
6433     my $update_swap_activity_file = 0;
6434     if(-r $self->{'swap_activity_file'}) {
6435         open(my $swap_fh, "<", $self->{'swap_activity_file'}) ||
6436             ::die_bug("swap_activity_file-r");
6437         my $swap_out = <$swap_fh>;
6438         close $swap_fh;
6439         if($swap_out =~ /^(\d+)$/) {
6440             $self->{'swap_activity'} = $1;
6441             ::debug("swap", "New swap_activity: ", $self->{'swap_activity'});
6442         }
6443         ::debug("swap", "Last update: ", $self->{'last_swap_activity_update'});
6444         if(time - $self->{'last_swap_activity_update'} > 10) {
6445             # last swap activity update was started 10 seconds ago
6446             ::debug("swap", "Older than 10 sec: ", $self->{'swap_activity_file'});
6447             $update_swap_activity_file = 1;
6448         }
6449     } else {
6450         ::debug("swap", "No swap_activity file: ", $self->{'swap_activity_file'});
6451         $self->{'swap_activity'} = undef;
6452         $update_swap_activity_file = 1;
6453     }
6454     if($update_swap_activity_file) {
6455         ::debug("swap", "Updating swap_activity file ", $self->{'swap_activity_file'});
6456         $self->{'last_swap_activity_update'} = time;
6457         my $dir = ::dirname($self->{'swap_activity_file'});
6458         -d $dir or eval { File::Path::mkpath($dir); };
6459         my $swap_activity;
6460         $swap_activity = swapactivityscript();
6461         if($self->{'string'} ne ":") {
6462             $swap_activity = $self->sshcommand() . " " . $self->serverlogin() . " " .
6463                 ::Q($swap_activity);
6464         }
6465         # Run swap_activity measuring.
6466         # As the command can take long to run if run remote
6467         # save it to a tmp file before moving it to the correct file
6468         my $file = $self->{'swap_activity_file'};
6469         my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".swp");
6470         ::debug("swap", "\n", $swap_activity, "\n");
6471         ::qqx("($swap_activity > $tmpfile && mv $tmpfile $file || rm $tmpfile &)");
6472     }
6473     return $self->{'swap_activity'};
6474 }
6475
6476 {
6477     my $script;
6478
6479     sub swapactivityscript() {
6480         # Returns:
6481         #   shellscript for detecting swap activity
6482         #
6483         # arguments for vmstat are OS dependant
6484         # swap_in and swap_out are in different columns depending on OS
6485         #
6486         if(not $script) {
6487             my %vmstat = (
6488                 # linux: $7*$8
6489                 # $ vmstat 1 2
6490                 # procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
6491                 #  r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa
6492                 #  5  0  51208 1701096 198012 18857888    0    0    37   153   28   19 56 11 33  1
6493                 #  3  0  51208 1701288 198012 18857972    0    0     0     0 3638 10412 15  3 82  0
6494                 'linux' => ['vmstat 1 2 | tail -n1', '$7*$8'],
6495
6496                 # solaris: $6*$7
6497                 # $ vmstat -S 1 2
6498                 #  kthr      memory            page            disk          faults      cpu
6499                 #  r b w   swap  free  si  so pi po fr de sr s3 s4 -- --   in   sy   cs us sy id
6500                 #  0 0 0 4628952 3208408 0  0  3  1  1  0  0 -0  2  0  0  263  613  246  1  2 97
6501                 #  0 0 0 4552504 3166360 0  0  0  0  0  0  0  0  0  0  0  246  213  240  1  1 98
6502                 'solaris' => ['vmstat -S 1 2 | tail -1', '$6*$7'],
6503
6504                 # darwin (macosx): $21*$22
6505                 # $ vm_stat -c 2 1
6506                 # Mach Virtual Memory Statistics: (page size of 4096 bytes)
6507                 #     free   active   specul inactive throttle    wired  prgable   faults     copy    0fill reactive   purged file-backed anonymous cmprssed cmprssor  dcomprs   comprs  pageins  pageout  swapins swapouts
6508                 #   346306   829050    74871   606027        0   240231    90367  544858K 62343596  270837K    14178   415070      570102    939846      356      370      116      922  4019813        4        0        0
6509                 #   345740   830383    74875   606031        0   239234    90369     2696      359      553        0        0      570110    941179      356      370        0        0        0        0        0        0
6510                 'darwin' => ['vm_stat -c 2 1 | tail -n1', '$21*$22'],
6511
6512                 # ultrix: $12*$13
6513                 # $ vmstat -S 1 2
6514                 #  procs      faults    cpu      memory              page             disk
6515                 #  r b w   in  sy  cs us sy id  avm  fre  si so  pi  po  fr  de  sr s0
6516                 #  1 0 0    4  23   2  3  0 97 7743 217k   0  0   0   0   0   0   0  0
6517                 #  1 0 0    6  40   8  0  1 99 7743 217k   0  0   3   0   0   0   0  0
6518                 'ultrix' => ['vmstat -S 1 2 | tail -1', '$12*$13'],
6519
6520                 # aix: $6*$7
6521                 # $ vmstat 1 2
6522                 # System configuration: lcpu=1 mem=2048MB
6523                 #
6524                 # kthr    memory              page              faults        cpu
6525                 # ----- ----------- ------------------------ ------------ -----------
6526                 #  r  b   avm   fre  re  pi  po  fr   sr  cy  in   sy  cs us sy id wa
6527                 #  0  0 333933 241803   0   0   0   0    0   0  10  143  90  0  0 99  0
6528                 #  0  0 334125 241569   0   0   0   0    0   0  37 5368 184  0  9 86  5
6529                 'aix' => ['vmstat 1 2 | tail -n1', '$6*$7'],
6530
6531                 # freebsd: $8*$9
6532                 # $ vmstat -H 1 2
6533                 #  procs      memory      page                    disks     faults         cpu
6534                 #  r b w     avm    fre   flt  re  pi  po    fr  sr ad0 ad1   in   sy   cs us sy id
6535                 #  1 0 0  596716   19560    32   0   0   0    33   8   0   0   11  220  277  0  0 99
6536                 #  0 0 0  596716   19560     2   0   0   0     0   0   0   0   11  144  263  0  1 99
6537                 'freebsd' => ['vmstat -H 1 2 | tail -n1', '$8*$9'],
6538
6539                 # mirbsd: $8*$9
6540                 # $ vmstat 1 2
6541                 #  procs   memory        page                    disks     traps         cpu
6542                 #  r b w    avm    fre   flt  re  pi  po  fr  sr wd0 cd0  int   sys   cs us sy id
6543                 #  0 0 0  25776 164968    34   0   0   0   0   0   0   0  230   259   38  4  0 96
6544                 #  0 0 0  25776 164968    24   0   0   0   0   0   0   0  237   275   37  0  0 100
6545                 'mirbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'],
6546
6547                 # netbsd: $7*$8
6548                 # $ vmstat 1 2
6549                 #  procs    memory      page                       disks   faults      cpu
6550                 #  r b      avm    fre  flt  re  pi   po   fr   sr w0 w1   in   sy  cs us sy id
6551                 #  0 0   138452   6012   54   0   0    0    1    2  3  0    4  100  23  0  0 100
6552                 #  0 0   138456   6008    1   0   0    0    0    0  0  0    7   26  19  0 0 100
6553                 'netbsd' => ['vmstat 1 2 | tail -n1', '$7*$8'],
6554
6555                 # openbsd: $8*$9
6556                 # $ vmstat 1 2
6557                 #  procs    memory       page                    disks    traps          cpu
6558                 #  r b w    avm     fre  flt  re  pi  po  fr  sr wd0 wd1  int   sys   cs us sy id
6559                 #  0 0 0  76596  109944   73   0   0   0   0   0   0   1    5   259   22  0  1 99
6560                 #  0 0 0  76604  109936   24   0   0   0   0   0   0   0    7   114   20  0  1 99
6561                 'openbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'],
6562
6563                 # hpux: $8*$9
6564                 # $ vmstat 1 2
6565                 #          procs           memory                   page                              faults       cpu
6566                 #     r     b     w      avm    free   re   at    pi   po    fr   de    sr     in     sy    cs  us sy id
6567                 #     1     0     0   247211  216476    4    1     0    0     0    0     0    102  73005    54   6 11 83
6568                 #     1     0     0   247211  216421   43    9     0    0     0    0     0    144   1675    96  25269512791222387000 25269512791222387000 105
6569                 'hpux' => ['vmstat 1 2 | tail -n1', '$8*$9'],
6570
6571                 # dec_osf (tru64): $11*$12
6572                 # $ vmstat  1 2
6573                 # Virtual Memory Statistics: (pagesize = 8192)
6574                 #   procs      memory        pages                            intr       cpu
6575                 #   r   w   u  act free wire fault  cow zero react  pin pout  in  sy  cs us sy id
6576                 #   3 181  36  51K 1895 8696  348M  59M 122M   259  79M    0   5 218 302  4  1 94
6577                 #   3 181  36  51K 1893 8696     3   15   21     0   28    0   4  81 321  1  1 98
6578                 'dec_osf' => ['vmstat 1 2 | tail -n1', '$11*$12'],
6579
6580                 # gnu (hurd): $7*$8
6581                 # $ vmstat -k 1 2
6582                 # (pagesize: 4, size: 512288, swap size: 894972)
6583                 #   free   actv  inact  wired   zeroed  react    pgins   pgouts  pfaults  cowpfs hrat    caobj  cache swfree
6584                 # 371940  30844  89228  20276   298348      0    48192    19016   756105   99808  98%      876  20628 894972
6585                 # 371940  30844  89228  20276       +0     +0       +0       +0      +42      +2  98%      876  20628 894972
6586                 'gnu' => ['vmstat -k 1 2 | tail -n1', '$7*$8'],
6587
6588                 # -nto (qnx has no swap)
6589                 #-irix
6590                 #-svr5 (scosysv)
6591                 );
6592             my $perlscript = "";
6593             # Make a perl script that detects the OS ($^O) and runs
6594             # the appropriate vmstat command
6595             for my $os (keys %vmstat) {
6596                 $vmstat{$os}[1] =~ s/\$/\\\\\\\$/g; # $ => \\\$
6597                 $perlscript .= 'if($^O eq "'.$os.'") { print `'.$vmstat{$os}[0].' | awk "{print ' .
6598                     $vmstat{$os}[1] . '}"` }';
6599             }
6600             $script = "perl -e " . ::Q($perlscript);
6601         }
6602         return $script;
6603     }
6604 }
6605
6606 sub too_fast_remote_login($) {
6607     my $self = shift;
6608     if($self->{'last_login_at'} and $self->{'time_to_login'}) {
6609         # sshd normally allows 10 simultaneous logins
6610         # A login takes time_to_login
6611         # So time_to_login/5 should be safe
6612         # If now <= last_login + time_to_login/5: Then it is too soon.
6613         my $too_fast = (::now() <= $self->{'last_login_at'}
6614                         + $self->{'time_to_login'}/5);
6615         ::debug("run", "Too fast? $too_fast ");
6616         return $too_fast;
6617     } else {
6618         # No logins so far (or time_to_login not computed): it is not too fast
6619         return 0;
6620     }
6621 }
6622
6623 sub last_login_at($) {
6624     my $self = shift;
6625     return $self->{'last_login_at'};
6626 }
6627
6628 sub set_last_login_at($$) {
6629     my $self = shift;
6630     $self->{'last_login_at'} = shift;
6631 }
6632
6633 sub loadavg_too_high($) {
6634     my $self = shift;
6635     my $loadavg = $self->loadavg();
6636     if(defined $loadavg) {
6637         ::debug("load", "Load $loadavg > ",$self->max_loadavg());
6638         return $loadavg >= $self->max_loadavg();
6639     } else {
6640         # Unknown load: Assume load is too high
6641         return 1;
6642     }
6643 }
6644
6645 {
6646     my $cmd;
6647     sub loadavg_cmd() {
6648         if(not $cmd) {
6649             # aix => "ps -ae -o state,command" # state wrong
6650             # bsd => "ps ax -o state,command"
6651             # sysv => "ps -ef -o s -o comm"
6652             # cygwin => perl -ne 'close STDERR; /Name/ and print"\n"; \
6653             #    /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status |
6654             #    awk '{print $2,$1}'
6655             # dec_osf => bsd
6656             # dragonfly => bsd
6657             # freebsd => bsd
6658             # gnu => bsd
6659             # hpux => ps -el|awk '{print $2,$14,$15}'
6660             # irix => ps -ef -o state -o comm
6661             # linux => bsd
6662             # minix => ps el|awk '{print \$1,\$11}'
6663             # mirbsd => bsd
6664             # netbsd => bsd
6665             # openbsd => bsd
6666             # solaris => sysv
6667             # svr5 => sysv
6668             # ultrix => ps -ax | awk '{print $3,$5}'
6669             # unixware => ps -el|awk '{print $2,$14,$15}'
6670             my $ps = ::spacefree(1,q{
6671                 $sysv="ps -ef -o s -o comm";
6672                 $sysv2="ps -ef -o state -o comm";
6673                 $bsd="ps ax -o state,command";
6674                 # Treat threads as processes
6675                 $bsd2="ps axH -o state,command";
6676                 $psel="ps -el|awk '{ print \$2,\$14,\$15 }'";
6677                 $cygwin=q{ perl -ne 'close STDERR; /Name/ and print"\n";
6678                     /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status |
6679                     awk '{print $2,$1}' };
6680                 $dummy="echo S COMMAND;echo R dummy";
6681                 %ps=(
6682                     # TODO Find better code for AIX/Android
6683                     'aix' => "uptime",
6684                     'android' => "uptime",
6685                     'cygwin' => $cygwin,
6686                     'darwin' => $bsd,
6687                     'dec_osf' => $sysv2,
6688                     'dragonfly' => $bsd,
6689                     'freebsd' => $bsd2,
6690                     'gnu' => $bsd,
6691                     'hpux' => $psel,
6692                     'irix' => $sysv2,
6693                     'linux' => $bsd2,
6694                     'minix' => "ps el|awk '{print \$1,\$11}'",
6695                     'mirbsd' => $bsd,
6696                     'msys' => $cygwin,
6697                     'netbsd' => $bsd,
6698                     'nto' => $dummy,
6699                     'openbsd' => $bsd,
6700                     'solaris' => $sysv,
6701                     'svr5' => $psel,
6702                     'ultrix' => "ps -ax | awk '{print \$3,\$5}'",
6703                     'MSWin32' => $sysv,
6704                     );
6705                 print `$ps{$^O}`;
6706             });
6707             # The command is too long for csh, so base64_wrap the command
6708             $cmd = Job::base64_wrap($ps);
6709         }
6710         return $cmd;
6711     }
6712 }
6713
6714
6715 sub loadavg($) {
6716     # If the currently know loadavg is too old:
6717     #   Recompute a new one in the background
6718     # The load average is computed as the number of processes waiting for disk
6719     # or CPU right now. So it is the server load this instant and not averaged over
6720     # several minutes. This is needed so GNU  Parallel will at most start one job
6721     # that will push the load over the limit.
6722     #
6723     # Returns:
6724     #   $last_loadavg = last load average computed (undef if none)
6725     my $self = shift;
6726     # Should we update the loadavg file?
6727     my $update_loadavg_file = 0;
6728     if(open(my $load_fh, "<", $self->{'loadavg_file'})) {
6729         local $/; # $/ = undef => slurp whole file
6730         my $load_out = <$load_fh>;
6731         close $load_fh;
6732         if($load_out =~ /\S/) {
6733             # Content can be empty if ~/ is on NFS
6734             # due to reading being non-atomic.
6735             #
6736             # Count lines starting with D,O,R but command does not start with [
6737             my $load =()= ($load_out=~/(^\s?[DOR]\S* +(?=[^\[])\S)/gm);
6738             if($load > 0) {
6739                 # load is overestimated by 1
6740                 $self->{'loadavg'} = $load - 1;
6741                 ::debug("load", "New loadavg: ", $self->{'loadavg'},"\n");
6742             } elsif ($load_out=~/average: (\d+.\d+)/) {
6743                 # AIX does not support instant load average
6744                 # 04:11AM   up 21 days,  12:55,  1 user,  load average: 1.85, 1.57, 1.55
6745                 $self->{'loadavg'} = $1;
6746             } else {
6747                 ::die_bug("loadavg_invalid_content: " .
6748                           $self->{'loadavg_file'} . "\n$load_out");
6749             }
6750         }
6751         $update_loadavg_file = 1;
6752     } else {
6753         ::debug("load", "No loadavg file: ", $self->{'loadavg_file'});
6754         $self->{'loadavg'} = undef;
6755         $update_loadavg_file = 1;
6756     }
6757     if($update_loadavg_file) {
6758         ::debug("load", "Updating loadavg file", $self->{'loadavg_file'}, "\n");
6759         $self->{'last_loadavg_update'} = time;
6760         my $dir = ::dirname($self->{'swap_activity_file'});
6761         -d $dir or eval { File::Path::mkpath($dir); };
6762         -w $dir or ::die_bug("Cannot write to $dir");
6763         my $cmd = "";
6764         if($self->{'string'} ne ":") {
6765             $cmd = $self->sshcommand() . " " . $self->serverlogin() . " " .
6766                 ::Q(loadavg_cmd());
6767         } else {
6768             $cmd .= loadavg_cmd();
6769         }
6770         # As the command can take long to run if run remote
6771         # save it to a tmp file before moving it to the correct file
6772         ::debug("load", "Update load\n");
6773         my $file = $self->{'loadavg_file'};
6774         # tmpfile on same filesystem as $file
6775         my $tmpfile = $file.$$;
6776         ::qqx("($cmd > $tmpfile && mv $tmpfile $file || rm $tmpfile & )");
6777     }
6778     return $self->{'loadavg'};
6779 }
6780
6781 sub max_loadavg($) {
6782     my $self = shift;
6783     # If --load is a file it might be changed
6784     if($Global::max_load_file) {
6785         my $mtime = (stat($Global::max_load_file))[9];
6786         if($mtime > $Global::max_load_file_last_mod) {
6787             $Global::max_load_file_last_mod = $mtime;
6788             for my $sshlogin (values %Global::host) {
6789                 $sshlogin->set_max_loadavg(undef);
6790             }
6791         }
6792     }
6793     if(not defined $self->{'max_loadavg'}) {
6794         $self->{'max_loadavg'} =
6795             $self->compute_max_loadavg($opt::load);
6796     }
6797     ::debug("load", "max_loadavg: ", $self->string(), " ", $self->{'max_loadavg'});
6798     return $self->{'max_loadavg'};
6799 }
6800
6801 sub set_max_loadavg($$) {
6802     my $self = shift;
6803     $self->{'max_loadavg'} = shift;
6804 }
6805
6806 sub compute_max_loadavg($) {
6807     # Parse the max loadaverage that the user asked for using --load
6808     # Returns:
6809     #   max loadaverage
6810     my $self = shift;
6811     my $loadspec = shift;
6812     my $load;
6813     if(defined $loadspec) {
6814         if($loadspec =~ /^\+(\d+)$/) {
6815             # E.g. --load +2
6816             my $j = $1;
6817             $load =
6818                 $self->ncpus() + $j;
6819         } elsif ($loadspec =~ /^-(\d+)$/) {
6820             # E.g. --load -2
6821             my $j = $1;
6822             $load =
6823                 $self->ncpus() - $j;
6824         } elsif ($loadspec =~ /^(\d+)\%$/) {
6825             my $j = $1;
6826             $load =
6827                 $self->ncpus() * $j / 100;
6828         } elsif ($loadspec =~ /^(\d+(\.\d+)?)$/) {
6829             $load = $1;
6830         } elsif (-f $loadspec) {
6831             $Global::max_load_file = $loadspec;
6832             $Global::max_load_file_last_mod = (stat($Global::max_load_file))[9];
6833             if(open(my $in_fh, "<", $Global::max_load_file)) {
6834                 my $opt_load_file = join("",<$in_fh>);
6835                 close $in_fh;
6836                 $load = $self->compute_max_loadavg($opt_load_file);
6837             } else {
6838                 ::error("Cannot open $loadspec.");
6839                 ::wait_and_exit(255);
6840             }
6841         } else {
6842             ::error("Parsing of --load failed.");
6843             ::die_usage();
6844         }
6845         if($load < 0.01) {
6846             $load = 0.01;
6847         }
6848     }
6849     return $load;
6850 }
6851
6852 sub time_to_login($) {
6853     my $self = shift;
6854     return $self->{'time_to_login'};
6855 }
6856
6857 sub set_time_to_login($$) {
6858     my $self = shift;
6859     $self->{'time_to_login'} = shift;
6860 }
6861
6862 sub max_jobs_running($) {
6863     my $self = shift;
6864     if(not defined $self->{'max_jobs_running'}) {
6865         my $nproc = $self->compute_number_of_processes($opt::jobs);
6866         $self->set_max_jobs_running($nproc);
6867     }
6868     return $self->{'max_jobs_running'};
6869 }
6870
6871 sub orig_max_jobs_running($) {
6872     my $self = shift;
6873     return $self->{'orig_max_jobs_running'};
6874 }
6875
6876 sub compute_number_of_processes($) {
6877     # Number of processes wanted and limited by system resources
6878     # Returns:
6879     #   Number of processes
6880     my $self = shift;
6881     my $opt_P = shift;
6882     my $wanted_processes = $self->user_requested_processes($opt_P);
6883     if(not defined $wanted_processes) {
6884         $wanted_processes = $Global::default_simultaneous_sshlogins;
6885     }
6886     ::debug("load", "Wanted procs: $wanted_processes\n");
6887     my $system_limit =
6888         $self->processes_available_by_system_limit($wanted_processes);
6889     ::debug("load", "Limited to procs: $system_limit\n");
6890     return $system_limit;
6891 }
6892
6893 {
6894     my @children;
6895     my $max_system_proc_reached;
6896     my $more_filehandles;
6897     my %fh;
6898     my $tmpfhname;
6899     my $count_jobs_already_read;
6900     my @jobs;
6901     my $job;
6902     my @args;
6903     my $arg;
6904
6905     sub reserve_filehandles($) {
6906         # Reserves filehandle
6907         my $n = shift;
6908         for (1..$n) {
6909             $more_filehandles &&= open($fh{$tmpfhname++}, "<", "/dev/null");
6910         }
6911     }
6912
6913     sub reserve_process() {
6914         # Spawn a dummy process
6915         my $child;
6916         if($child = fork()) {
6917             push @children, $child;
6918             $Global::unkilled_children{$child} = 1;
6919         } elsif(defined $child) {
6920             # This is the child
6921             # The child takes one process slot
6922             # It will be killed later
6923             $SIG{'TERM'} = $Global::original_sig{'TERM'};
6924             if($^O eq "cygwin" or $^O eq "msys" or $^O eq "nto") {
6925                 # The exec does not work on Cygwin and QNX
6926                 sleep 10101010;
6927             } else {
6928                 # 'exec sleep' takes less RAM than sleeping in perl
6929                 exec 'sleep', 10101;
6930             }
6931             exit(0);
6932         } else {
6933             # Failed to spawn
6934             $max_system_proc_reached = 1;
6935         }
6936     }
6937
6938     sub get_args_or_jobs() {
6939         # Get an arg or a job (depending on mode)
6940         if($Global::semaphore or ($opt::pipe and not $opt::tee)) {
6941             # Skip: No need to get args
6942             return 1;
6943         } elsif(defined $opt::retries and $count_jobs_already_read) {
6944             # For retries we may need to run all jobs on this sshlogin
6945             # so include the already read jobs for this sshlogin
6946             $count_jobs_already_read--;
6947             return 1;
6948         } else {
6949             if($opt::X or $opt::m) {
6950                 # The arguments may have to be re-spread over several jobslots
6951                 # So pessimistically only read one arg per jobslot
6952                 # instead of a full commandline
6953                 if($Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->empty()) {
6954                     if($Global::JobQueue->empty()) {
6955                         return 0;
6956                     } else {
6957                         $job = $Global::JobQueue->get();
6958                         push(@jobs, $job);
6959                         return 1;
6960                     }
6961                 } else {
6962                     $arg = $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get();
6963                     push(@args, $arg);
6964                     return 1;
6965                 }
6966             } else {
6967                 # If there are no more command lines, then we have a process
6968                 # per command line, so no need to go further
6969                 if($Global::JobQueue->empty()) {
6970                     return 0;
6971                 } else {
6972                     $job = $Global::JobQueue->get();
6973                     # Replacement must happen here due to seq()
6974                     $job and $job->replaced();
6975                     push(@jobs, $job);
6976                     return 1;
6977                 }
6978             }
6979         }
6980     }
6981
6982     sub cleanup() {
6983         # Cleanup: Close the files
6984         for (values %fh) { close $_ }
6985         # Cleanup: Kill the children
6986         for my $pid (@children) {
6987             kill 9, $pid;
6988             waitpid($pid,0);
6989             delete $Global::unkilled_children{$pid};
6990         }
6991         # Cleanup: Unget the command_lines or the @args
6992         $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->unget(@args);
6993         @args = ();
6994         $Global::JobQueue->unget(@jobs);
6995         @jobs = ();
6996     }
6997
6998     sub processes_available_by_system_limit($) {
6999         # If the wanted number of processes is bigger than the system limits:
7000         # Limit them to the system limits
7001         # Limits are: File handles, number of input lines, processes,
7002         # and taking > 1 second to spawn 10 extra processes
7003         # Returns:
7004         #   Number of processes
7005         my $self = shift;
7006         my $wanted_processes = shift;
7007         my $system_limit = 0;
7008         my $slow_spawning_warning_printed = 0;
7009         my $time = time;
7010         $more_filehandles = 1;
7011         $tmpfhname = "TmpFhNamE";
7012
7013         # perl uses 7 filehandles for something?
7014         # parallel uses 1 for memory_usage
7015         # parallel uses 4 for ?
7016         reserve_filehandles(12);
7017         # Two processes for load avg and ?
7018         reserve_process();
7019         reserve_process();
7020
7021         # For --retries count also jobs already run
7022         $count_jobs_already_read = $Global::JobQueue->next_seq();
7023         my $wait_time_for_getting_args = 0;
7024         my $start_time = time;
7025         while(1) {
7026             $system_limit >= $wanted_processes and last;
7027             not $more_filehandles and last;
7028             $max_system_proc_reached and last;
7029
7030             my $before_getting_arg = time;
7031             if(!$Global::dummy_jobs) {
7032                 get_args_or_jobs() or last;
7033             }
7034             $wait_time_for_getting_args += time - $before_getting_arg;
7035             $system_limit++;
7036
7037             # Every simultaneous process uses 2 filehandles to write to
7038             # and 2 filehandles to read from
7039             reserve_filehandles(4);
7040
7041             # System process limit
7042             reserve_process();
7043
7044             my $forktime = time - $time - $wait_time_for_getting_args;
7045             ::debug("run", "Time to fork $system_limit procs: $wait_time_for_getting_args ",
7046                     $forktime,
7047                     " (processes so far: ", $system_limit,")\n");
7048             if($system_limit > 10 and
7049                $forktime > 1 and
7050                $forktime > $system_limit * 0.01
7051                and not $slow_spawning_warning_printed) {
7052                 # It took more than 0.01 second to fork a processes on avg.
7053                 # Give the user a warning. He can press Ctrl-C if this
7054                 # sucks.
7055                 ::warning("Starting $system_limit processes took > $forktime sec.",
7056                           "Consider adjusting -j. Press CTRL-C to stop.");
7057                 $slow_spawning_warning_printed = 1;
7058             }
7059         }
7060         cleanup();
7061
7062         if($system_limit < $wanted_processes) {
7063             # The system_limit is less than the wanted_processes
7064             if($system_limit < 1 and not $Global::JobQueue->empty()) {
7065                 ::warning("Cannot spawn any jobs. ".
7066                           "Raising ulimit -u or 'nproc' in /etc/security/limits.conf",
7067                           "or /proc/sys/kernel/pid_max may help.");
7068                 ::wait_and_exit(255);
7069             }
7070             if(not $more_filehandles) {
7071                 ::warning("Only enough file handles to run ".
7072                           $system_limit. " jobs in parallel.",
7073                           "Running 'parallel -j0 -N $system_limit --pipe parallel -j0' or",
7074                           "raising 'ulimit -n' or 'nofile' in /etc/security/limits.conf",
7075                           "or /proc/sys/fs/file-max may help.");
7076             }
7077             if($max_system_proc_reached) {
7078                 ::warning("Only enough available processes to run ".
7079                           $system_limit. " jobs in parallel.",
7080                           "Raising ulimit -u or /etc/security/limits.conf ",
7081                           "or /proc/sys/kernel/pid_max may help.");
7082             }
7083         }
7084         if($] == 5.008008 and $system_limit > 1000) {
7085             # https://savannah.gnu.org/bugs/?36942
7086             $system_limit = 1000;
7087         }
7088         if($Global::JobQueue->empty()) {
7089             $system_limit ||= 1;
7090         }
7091         if($self->string() ne ":" and
7092            $system_limit > $Global::default_simultaneous_sshlogins) {
7093             $system_limit =
7094                 $self->simultaneous_sshlogin_limit($system_limit);
7095         }
7096         return $system_limit;
7097     }
7098 }
7099
7100 sub simultaneous_sshlogin_limit($) {
7101     # Test by logging in wanted number of times simultaneously
7102     # Returns:
7103     #   min($wanted_processes,$working_simultaneous_ssh_logins-1)
7104     my $self = shift;
7105     my $wanted_processes = shift;
7106     if($self->{'time_to_login'}) {
7107         return $wanted_processes;
7108     }
7109
7110     # Try twice because it guesses wrong sometimes
7111     # Choose the minimal
7112     my $ssh_limit =
7113         ::min($self->simultaneous_sshlogin($wanted_processes),
7114               $self->simultaneous_sshlogin($wanted_processes));
7115     if($ssh_limit < $wanted_processes) {
7116         my $serverlogin = $self->serverlogin();
7117         ::warning("ssh to $serverlogin only allows ".
7118                   "for $ssh_limit simultaneous logins.",
7119                   "You may raise this by changing",
7120                   "/etc/ssh/sshd_config:MaxStartups and MaxSessions on $serverlogin.",
7121                   "You can also try --sshdelay 0.1",
7122                   "Using only ".($ssh_limit-1)." connections ".
7123                   "to avoid race conditions.");
7124         # Race condition can cause problem if using all sshs.
7125         if($ssh_limit > 1) { $ssh_limit -= 1; }
7126     }
7127     return $ssh_limit;
7128 }
7129
7130 sub simultaneous_sshlogin($) {
7131     # Using $sshlogin try to see if we can do $wanted_processes
7132     # simultaneous logins
7133     # (ssh host echo simul-login & ssh host echo simul-login & ...) |
7134     #   grep simul|wc -l
7135     # Input:
7136     #   $wanted_processes = Try for this many logins in parallel
7137     # Returns:
7138     #   $ssh_limit = Number of succesful parallel logins
7139     local $/ = "\n";
7140     my $self = shift;
7141     my $wanted_processes = shift;
7142     my $sshcmd = $self->sshcommand();
7143     my $serverlogin = $self->serverlogin();
7144     my $sshdelay = $opt::sshdelay ? "sleep $opt::sshdelay;" : "";
7145     # TODO sh -c wrapper to work for csh
7146     my $cmd = ("$sshdelay$sshcmd $serverlogin -- ".
7147                "echo simultaneouslogin </dev/null 2>&1 &")x$wanted_processes;
7148     ::debug("init", "Trying $wanted_processes logins at $serverlogin\n");
7149     open (my $simul_fh, "-|", "($cmd)|grep simultaneouslogin | wc -l") or
7150         ::die_bug("simultaneouslogin");
7151     my $ssh_limit = <$simul_fh>;
7152     close $simul_fh;
7153     chomp $ssh_limit;
7154     return $ssh_limit;
7155 }
7156
7157 sub set_ncpus($$) {
7158     my $self = shift;
7159     $self->{'ncpus'} = shift;
7160 }
7161
7162 sub user_requested_processes($) {
7163     # Parse the number of processes that the user asked for using -j
7164     # Input:
7165     #   $opt_P = string formatted as for -P
7166     # Returns:
7167     #   $processes = the number of processes to run on this sshlogin
7168     my $self = shift;
7169     my $opt_P = shift;
7170     my $processes;
7171     if(defined $opt_P) {
7172         if($opt_P =~ /^\+(\d+)$/) {
7173             # E.g. -P +2
7174             my $j = $1;
7175             $processes =
7176                 $self->ncpus() + $j;
7177         } elsif ($opt_P =~ /^-(\d+)$/) {
7178             # E.g. -P -2
7179             my $j = $1;
7180             $processes =
7181                 $self->ncpus() - $j;
7182         } elsif ($opt_P =~ /^(\d+(\.\d+)?)\%$/) {
7183             # E.g. -P 10.5%
7184             my $j = $1;
7185             $processes =
7186                 $self->ncpus() * $j / 100;
7187         } elsif ($opt_P =~ /^(\d+)$/) {
7188             $processes = $1;
7189             if($processes == 0) {
7190                 # -P 0 = infinity (or at least close)
7191                 $processes = $Global::infinity;
7192             }
7193         } elsif (-f $opt_P) {
7194             $Global::max_procs_file = $opt_P;
7195             if(open(my $in_fh, "<", $Global::max_procs_file)) {
7196                 my $opt_P_file = join("",<$in_fh>);
7197                 close $in_fh;
7198                 $processes = $self->user_requested_processes($opt_P_file);
7199             } else {
7200                 ::error("Cannot open $opt_P.");
7201                 ::wait_and_exit(255);
7202             }
7203         } else {
7204             ::error("Parsing of --jobs/-j/--max-procs/-P failed.");
7205             ::die_usage();
7206         }
7207         $processes = ::ceil($processes);
7208     }
7209     return $processes;
7210 }
7211
7212 sub ncpus($) {
7213     # Number of CPU threads
7214     # --use_sockets_instead_of_threads = count socket instead
7215     # --use_cores_instead_of_threads = count physical cores instead
7216     # Returns:
7217     #   $ncpus = number of cpu (threads) on this sshlogin
7218     local $/ = "\n";
7219     my $self = shift;
7220     if(not defined $self->{'ncpus'}) {
7221         my $sshcmd = $self->sshcommand();
7222         my $serverlogin = $self->serverlogin();
7223         if($serverlogin eq ":") {
7224             if($opt::use_sockets_instead_of_threads) {
7225                 $self->{'ncpus'} = socket_core_thread()->{'sockets'};
7226             } elsif($opt::use_cores_instead_of_threads) {
7227                 $self->{'ncpus'} = socket_core_thread()->{'cores'};
7228             } else {
7229                 $self->{'ncpus'} = socket_core_thread()->{'threads'};
7230             }
7231         } else {
7232             my $ncpu;
7233             ::debug("init","echo|$sshcmd $serverlogin -- parallel --number-of-sockets");
7234             if($opt::use_sockets_instead_of_threads
7235                or
7236                 $opt::use_cpus_instead_of_cores) {
7237                 $ncpu =
7238                     ::qqx("echo|$sshcmd $serverlogin -- parallel --number-of-sockets");
7239             } elsif($opt::use_cores_instead_of_threads) {
7240                 $ncpu =
7241                     ::qqx("echo|$sshcmd $serverlogin -- parallel --number-of-cores");
7242             } else {
7243                 $ncpu =
7244                     ::qqx("echo|$sshcmd $serverlogin -- parallel --number-of-threads");
7245             }
7246             chomp $ncpu;
7247             if($ncpu =~ /^\s*[0-9]+\s*$/s) {
7248                 $self->{'ncpus'} = $ncpu;
7249             } else {
7250                 ::warning("Could not figure out ".
7251                           "number of cpus on $serverlogin ($ncpu). Using 1.");
7252                 $self->{'ncpus'} = 1;
7253             }
7254         }
7255     }
7256     return $self->{'ncpus'};
7257 }
7258
7259
7260 sub nproc() {
7261     # Returns:
7262     #   Number of threads using `nproc`
7263     my $no_of_threads = ::qqx("nproc");
7264     chomp $no_of_threads;
7265     return $no_of_threads;
7266 }
7267
7268 sub no_of_sockets() {
7269     return socket_core_thread()->{'sockets'};
7270 }
7271
7272 sub no_of_cores() {
7273     return socket_core_thread()->{'cores'};
7274 }
7275
7276 sub no_of_threads() {
7277     return socket_core_thread()->{'threads'};
7278 }
7279
7280 sub socket_core_thread() {
7281     # Returns:
7282     #   {
7283     #     'sockets' => #sockets = number of socket with CPU present
7284     #     'cores' => #cores = number of physical cores
7285     #     'threads' => #threads = number of compute cores (hyperthreading)
7286     #     'active' => #taskset_threads = number of taskset limited cores
7287     #   }
7288     my $cpu;
7289     my $cached_cpuspec = $Global::cache_dir . "/tmp/sshlogin/" .
7290         ::hostname() . "/cpuspec";
7291     if(-e $cached_cpuspec and -M $cached_cpuspec < 1) {
7292         # Reading cached copy instead of /proc/cpuinfo is 17 ms faster
7293         local $/ = "\n";
7294         if(open(my $in_fh, "<", $cached_cpuspec)) {
7295             ::debug("init","Read $cached_cpuspec\n");
7296             $cpu->{'sockets'} = int(<$in_fh>);
7297             $cpu->{'cores'} = int(<$in_fh>);
7298             $cpu->{'threads'} = int(<$in_fh>);
7299             close $in_fh;
7300         }
7301     }
7302     if ($^O eq 'linux') {
7303         $cpu = sct_gnu_linux($cpu);
7304     } elsif ($^O eq 'android') {
7305         $cpu = sct_android($cpu);
7306     } elsif ($^O eq 'freebsd') {
7307         $cpu = sct_freebsd($cpu);
7308     } elsif ($^O eq 'netbsd') {
7309         $cpu = sct_netbsd($cpu);
7310     } elsif ($^O eq 'openbsd') {
7311         $cpu = sct_openbsd($cpu);
7312     } elsif ($^O eq 'gnu') {
7313         $cpu = sct_hurd($cpu);
7314     } elsif ($^O eq 'darwin') {
7315         $cpu = sct_darwin($cpu);
7316     } elsif ($^O eq 'solaris') {
7317         $cpu = sct_solaris($cpu);
7318     } elsif ($^O eq 'aix') {
7319         $cpu = sct_aix($cpu);
7320     } elsif ($^O eq 'hpux') {
7321         $cpu = sct_hpux($cpu);
7322     } elsif ($^O eq 'nto') {
7323         $cpu = sct_qnx($cpu);
7324     } elsif ($^O eq 'svr5') {
7325         $cpu = sct_openserver($cpu);
7326     } elsif ($^O eq 'irix') {
7327         $cpu = sct_irix($cpu);
7328     } elsif ($^O eq 'dec_osf') {
7329         $cpu = sct_tru64($cpu);
7330     } else {
7331         # Try all methods until we find something that works
7332         $cpu = (sct_gnu_linux($cpu)
7333                 || sct_android($cpu)
7334                 || sct_freebsd($cpu)
7335                 || sct_netbsd($cpu)
7336                 || sct_openbsd($cpu)
7337                 || sct_hurd($cpu)
7338                 || sct_darwin($cpu)
7339                 || sct_solaris($cpu)
7340                 || sct_aix($cpu)
7341                 || sct_hpux($cpu)
7342                 || sct_qnx($cpu)
7343                 || sct_openserver($cpu)
7344                 || sct_irix($cpu)
7345                 || sct_tru64($cpu)
7346             );
7347     }
7348     if(not grep { $_ > 0 } values %$cpu) {
7349         $cpu = undef;
7350     }
7351     # Write cached copy instead of /proc/cpuinfo is 17 ms faster
7352     if($cpu and open(my $out_fh, ">", $cached_cpuspec)) {
7353         print $out_fh (map { chomp; "$_\n" }
7354                        $cpu->{'sockets'},
7355                        $cpu->{'cores'},
7356                        $cpu->{'threads'});
7357         close $out_fh;
7358     }
7359     if(not $cpu) {
7360         my $nproc = nproc();
7361         if($nproc) {
7362             $cpu->{'sockets'} =
7363                 $cpu->{'cores'} =
7364                 $cpu->{'threads'} =
7365                 $cpu->{'active'} =
7366                 $nproc;
7367         }
7368     }
7369     if(not $cpu) {
7370         ::warning("Cannot figure out number of cpus. Using 1.");
7371         $cpu->{'sockets'} =
7372             $cpu->{'cores'} =
7373             $cpu->{'threads'} =
7374             $cpu->{'active'} =
7375             1
7376     }
7377     $cpu->{'sockets'} ||= 1;
7378     $cpu->{'threads'} ||= $cpu->{'cores'};
7379     $cpu->{'active'} ||= $cpu->{'threads'};
7380     chomp($cpu->{'sockets'},
7381           $cpu->{'cores'},
7382           $cpu->{'threads'},
7383           $cpu->{'active'});
7384     # Choose minimum of active and actual
7385     my $mincpu;
7386     $mincpu->{'sockets'} = ::min($cpu->{'sockets'},$cpu->{'active'});
7387     $mincpu->{'cores'} = ::min($cpu->{'cores'},$cpu->{'active'});
7388     $mincpu->{'threads'} = ::min($cpu->{'threads'},$cpu->{'active'});
7389     return $mincpu;
7390 }
7391
7392 sub sct_gnu_linux($) {
7393     # Returns:
7394     #   { 'sockets' => #sockets
7395     #     'cores' => #cores
7396     #     'threads' => #threads
7397     #     'active' => #taskset_threads }
7398     my $cpu = shift;
7399
7400     sub read_topology($) {
7401         my $prefix = shift;
7402         my %sibiling;
7403         my %socket;
7404         my $thread;
7405         for($thread = 0;
7406             -r "$prefix/cpu$thread/topology/physical_package_id";
7407             $thread++) {
7408             open(my $fh,"<",
7409                  "$prefix/cpu$thread/topology/physical_package_id")
7410                 || die;
7411             $socket{<$fh>}++;
7412             close $fh;
7413         }
7414         for($thread = 0;
7415             -r "$prefix/cpu$thread/topology/thread_siblings";
7416             $thread++) {
7417             open(my $fh,"<",
7418                  "$prefix/cpu$thread/topology/thread_siblings")
7419                 || die;
7420             $sibiling{<$fh>}++;
7421             close $fh;
7422         }
7423         $cpu->{'sockets'} = keys %socket;
7424         $cpu->{'cores'} = keys %sibiling;
7425         $cpu->{'threads'} = $thread;
7426     }
7427
7428     sub read_cpuinfo(@) {
7429         my @cpuinfo = @_;
7430         $cpu->{'sockets'} = 0;
7431         $cpu->{'cores'} = 0;
7432         $cpu->{'threads'} = 0;
7433         my %seen;
7434         my %phy_seen;
7435         my $physicalid;
7436         for(@cpuinfo) {
7437             # physical id : 0
7438             if(/^physical id.*[:](.*)/) {
7439                 $physicalid = $1;
7440                 if(not $phy_seen{$1}++) {
7441                     $cpu->{'sockets'}++;
7442                 }
7443             }
7444             # core id : 3
7445             if(/^core id.*[:](.*)/ and not $seen{$physicalid,$1}++) {
7446                 $cpu->{'cores'}++;
7447             }
7448             # processor : 2
7449             /^processor.*[:]\s*\d/i and $cpu->{'threads'}++;
7450         }
7451         $cpu->{'cores'} ||= $cpu->{'threads'};
7452         $cpu->{'cpus'} ||= $cpu->{'threads'};
7453         $cpu->{'sockets'} ||= 1;
7454     }
7455
7456     sub read_lscpu(@) {
7457         my @lscpu = @_;
7458         my $threads_per_core;
7459         my $cores_per_socket;
7460         for(@lscpu) {
7461             /^CPU.s.:\s*(\d+)/ and $cpu->{'threads'} = $1;
7462             /^Thread.s. per core:\s*(\d+)/ and $threads_per_core = $1;
7463             /^Core.s. per socket:\s*(\d+)/ and $cores_per_socket = $1;
7464             /^(CPU )?Socket.s.:\s*(\d+)/i and $cpu->{'sockets'} = $2;
7465         }
7466         if($threads_per_core and $cpu->{'threads'}) {
7467             $cpu->{'cores'} =  $cpu->{'threads'} / $threads_per_core;
7468         }
7469         $cpu->{'cpus'} ||= $cpu->{'threads'};
7470     }
7471
7472     local $/ = "\n"; # If delimiter is set, then $/ will be wrong
7473     my @cpuinfo;
7474     my @lscpu;
7475     if($ENV{'PARALLEL_CPUINFO'}) {
7476         # Use CPUINFO from environment - used for testing only
7477         read_cpuinfo(split/(?<=\n)/,$ENV{'PARALLEL_CPUINFO'});
7478     } elsif($ENV{'PARALLEL_LSCPU'}) {
7479         # Use LSCPU from environment - used for testing only
7480         read_lscpu(split/(?<=\n)/,$ENV{'PARALLEL_LSCPU'});
7481     } elsif(-r "$ENV{'PARALLEL_CPUPREFIX'}/cpu0/topology/thread_siblings") {
7482         # Use CPUPREFIX from environment - used for testing only
7483         read_topology($ENV{'PARALLEL_CPUPREFIX'});
7484     } elsif($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'}) {
7485         # Skip /proc/cpuinfo - already set
7486     } else {
7487         # Not debugging: Look at this computer
7488         if(!($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'})
7489            and
7490            open(my $in_fh, "-|", "lscpu")) {
7491             # Parse output from lscpu
7492             read_lscpu(<$in_fh>);
7493             close $in_fh;
7494         }
7495         if(!($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'})
7496            and
7497            -r "/sys/devices/system/cpu/cpu0/topology/thread_siblings") {
7498             read_topology("/sys/devices/system/cpu");
7499         }
7500         if(!($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'})
7501            and
7502            open(my $in_fh, "<", "/proc/cpuinfo")) {
7503             # Read /proc/cpuinfo
7504             read_cpuinfo(<$in_fh>);
7505             close $in_fh;
7506         }
7507     }
7508     if(-e "/proc/self/status" and not $ENV{'PARALLEL_CPUINFO'}) {
7509         # if 'taskset' is used to limit number of threads
7510         if(open(my $in_fh, "<", "/proc/self/status")) {
7511             while(<$in_fh>) {
7512                 if(/^Cpus_allowed:\s*(\S+)/) {
7513                     my $a = $1;
7514                     $a =~ tr/,//d;
7515                     $cpu->{'active'} = unpack ("%32b*", pack ("H*",$a));
7516                 }
7517             }
7518             close $in_fh;
7519         }
7520     }
7521     return $cpu;
7522 }
7523
7524 sub sct_android($) {
7525     # Returns:
7526     #   { 'sockets' => #sockets
7527     #     'cores' => #cores
7528     #     'threads' => #threads
7529     #     'active' => #taskset_threads }
7530     # Use GNU/Linux
7531     return sct_gnu_linux($_[0]);
7532 }
7533
7534 sub sct_freebsd($) {
7535     # Returns:
7536     #   { 'sockets' => #sockets
7537     #     'cores' => #cores
7538     #     'threads' => #threads
7539     #     'active' => #taskset_threads }
7540     local $/ = "\n";
7541     my $cpu = shift;
7542     $cpu->{'cores'} ||=
7543         (::qqx(qq{ sysctl -a dev.cpu | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' })
7544          or
7545          ::qqx(qq{ sysctl hw.ncpu | awk '{ print \$2 }' }));
7546     $cpu->{'threads'} ||=
7547         (::qqx(qq{ sysctl hw.ncpu | awk '{ print \$2 }' })
7548          or
7549          ::qqx(qq{ sysctl -a dev.cpu | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' }));
7550     return $cpu;
7551 }
7552
7553 sub sct_netbsd($) {
7554     # Returns:
7555     #   { 'sockets' => #sockets
7556     #     'cores' => #cores
7557     #     'threads' => #threads
7558     #     'active' => #taskset_threads }
7559     local $/ = "\n";
7560     my $cpu = shift;
7561     $cpu->{'cores'} ||= ::qqx("sysctl -n hw.ncpu");
7562     return $cpu;
7563 }
7564
7565 sub sct_openbsd($) {
7566     # Returns:
7567     #   { 'sockets' => #sockets
7568     #     'cores' => #cores
7569     #     'threads' => #threads
7570     #     'active' => #taskset_threads }
7571     local $/ = "\n";
7572     my $cpu = shift;
7573     $cpu->{'cores'} ||= ::qqx('sysctl -n hw.ncpu');
7574     return $cpu;
7575 }
7576
7577 sub sct_hurd($) {
7578     # Returns:
7579     #   { 'sockets' => #sockets
7580     #     'cores' => #cores
7581     #     'threads' => #threads
7582     #     'active' => #taskset_threads }
7583     local $/ = "\n";
7584     my $cpu = shift;
7585     $cpu->{'cores'} ||= ::qqx("nproc");
7586     return $cpu;
7587 }
7588
7589 sub sct_darwin($) {
7590     # Returns:
7591     #   { 'sockets' => #sockets
7592     #     'cores' => #cores
7593     #     'threads' => #threads
7594     #     'active' => #taskset_threads }
7595     local $/ = "\n";
7596     my $cpu = shift;
7597     $cpu->{'cores'} ||=
7598         (::qqx('sysctl -n hw.physicalcpu')
7599          or
7600          ::qqx(qq{ sysctl -a hw | grep [^a-z]physicalcpu[^a-z] | awk '{ print \$2 }' }));
7601     $cpu->{'threads'} ||=
7602         (::qqx('sysctl -n hw.logicalcpu')
7603          or
7604          ::qqx(qq{ sysctl -a hw | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }' }));
7605     return $cpu;
7606 }
7607
7608 sub sct_solaris($) {
7609     # Returns:
7610     #   { 'sockets' => #sockets
7611     #     'cores' => #cores
7612     #     'threads' => #threads
7613     #     'active' => #taskset_threads }
7614     local $/ = "\n";
7615     my $cpu = shift;
7616     if(not $cpu->{'cores'}) {
7617         if(-x "/usr/bin/kstat") {
7618             my @chip_id = ::qqx("/usr/bin/kstat cpu_info|grep chip_id");
7619             if($#chip_id >= 0) {
7620                 $cpu->{'sockets'} ||= $#chip_id +1;
7621             }
7622             my @core_id = ::qqx("/usr/bin/kstat -m cpu_info|grep -w core_id|uniq");
7623             if($#core_id >= 0) {
7624                 $cpu->{'cores'} ||= $#core_id +1;
7625             }
7626         }
7627         if(-x "/usr/sbin/psrinfo") {
7628             my @psrinfo = ::qqx("/usr/sbin/psrinfo -p");
7629             if($#psrinfo >= 0) {
7630                 $cpu->{'sockets'} ||= $psrinfo[0];
7631             }
7632         }
7633         if(-x "/usr/sbin/prtconf") {
7634             my @prtconf = ::qqx("/usr/sbin/prtconf | grep cpu..instance");
7635             if($#prtconf >= 0) {
7636                 $cpu->{'cores'} ||= $#prtconf +1;
7637             }
7638         }
7639     }
7640     return $cpu;
7641 }
7642
7643 sub sct_aix($) {
7644     # Returns:
7645     #   { 'sockets' => #sockets
7646     #     'cores' => #cores
7647     #     'threads' => #threads
7648     #     'active' => #taskset_threads }
7649     local $/ = "\n";
7650     my $cpu = shift;
7651     if(not $cpu->{'cores'}) {
7652         if(-x "/usr/sbin/lscfg") {
7653             if(open(my $in_fh, "-|", "/usr/sbin/lscfg -vs |grep proc | wc -l|tr -d ' '")) {
7654                 $cpu->{'cores'} = <$in_fh>;
7655                 close $in_fh;
7656             }
7657         }
7658     }
7659     if(not $cpu->{'threads'}) {
7660         if(-x "/usr/bin/vmstat") {
7661             if(open(my $in_fh, "-|", "/usr/bin/vmstat 1 1")) {
7662                 while(<$in_fh>) {
7663                     /lcpu=([0-9]*) / and $cpu->{'threads'} = $1;
7664                 }
7665                 close $in_fh;
7666             }
7667         }
7668     }
7669     return $cpu;
7670 }
7671
7672 sub sct_hpux($) {
7673     # Returns:
7674     #   { 'sockets' => #sockets
7675     #     'cores' => #cores
7676     #     'threads' => #threads
7677     #     'active' => #taskset_threads }
7678     local $/ = "\n";
7679     my $cpu = shift;
7680     $cpu->{'cores'} ||=
7681         ::qqx(qq{ /usr/bin/mpsched -s 2>&1 | grep 'Locality Domain Count' | awk '{ print \$4 }'});
7682     $cpu->{'threads'} ||=
7683         ::qqx(qq{ /usr/bin/mpsched -s 2>&1 | perl -ne '/Processor Count\\D+(\\d+)/ and print "\$1"'});
7684     return $cpu;
7685 }
7686
7687 sub sct_qnx($) {
7688     # Returns:
7689     #   { 'sockets' => #sockets
7690     #     'cores' => #cores
7691     #     'threads' => #threads
7692     #     'active' => #taskset_threads }
7693     local $/ = "\n";
7694     my $cpu = shift;
7695     # BUG: It is not known how to calculate this.
7696
7697     return $cpu;
7698 }
7699
7700 sub sct_openserver($) {
7701     # Returns:
7702     #   { 'sockets' => #sockets
7703     #     'cores' => #cores
7704     #     'threads' => #threads
7705     #     'active' => #taskset_threads }
7706     local $/ = "\n";
7707     my $cpu = shift;
7708     if(not $cpu->{'cores'}) {
7709         if(-x "/usr/sbin/psrinfo") {
7710             my @psrinfo = ::qqx("/usr/sbin/psrinfo");
7711             if($#psrinfo >= 0) {
7712                 $cpu->{'cores'} = $#psrinfo +1;
7713             }
7714         }
7715     }
7716     $cpu->{'sockets'} ||= $cpu->{'cores'};
7717     return $cpu;
7718 }
7719
7720 sub sct_irix($) {
7721     # Returns:
7722     #   { 'sockets' => #sockets
7723     #     'cores' => #cores
7724     #     'threads' => #threads
7725     #     'active' => #taskset_threads }
7726     local $/ = "\n";
7727     my $cpu = shift;
7728     $cpu->{'cores'} ||=
7729         ::qqx(qq{ hinv | grep HZ | grep Processor | awk '{print \$1}' });
7730     return $cpu;
7731 }
7732
7733 sub sct_tru64($) {
7734     # Returns:
7735     #   { 'sockets' => #sockets
7736     #     'cores' => #cores
7737     #     'threads' => #threads
7738     #     'active' => #taskset_threads }
7739     local $/ = "\n";
7740     my $cpu = shift;
7741     $cpu->{'cores'} ||= ::qqx("sizer -pr");
7742     $cpu->{'sockets'} ||= $cpu->{'cores'};
7743     $cpu->{'threads'} ||= $cpu->{'cores'};
7744
7745     return $cpu;
7746 }
7747
7748 sub sshcommand($) {
7749     # Returns:
7750     #   $sshcommand = the command (incl options) to run when using ssh
7751     my $self = shift;
7752     if (not defined $self->{'sshcommand'}) {
7753         $self->sshcommand_of_sshlogin();
7754     }
7755     return $self->{'sshcommand'};
7756 }
7757
7758 sub serverlogin($) {
7759     # Returns:
7760     #   $sshcommand = the command (incl options) to run when using ssh
7761     my $self = shift;
7762     if (not defined $self->{'serverlogin'}) {
7763         $self->sshcommand_of_sshlogin();
7764     }
7765     return $self->{'serverlogin'};
7766 }
7767
7768 sub sshcommand_of_sshlogin($) {
7769     # Compute ssh command and serverlogin from sshlogin
7770     # 'server' -> ('ssh -S /tmp/parallel-ssh-RANDOM/host-','server')
7771     # 'user@server' -> ('ssh','user@server')
7772     # 'myssh user@server' -> ('myssh','user@server')
7773     # 'myssh -l user server' -> ('myssh -l user','server')
7774     # '/usr/bin/myssh -l user server' -> ('/usr/bin/myssh -l user','server')
7775     # Sets:
7776     #   $self->{'sshcommand'}
7777     #   $self->{'serverlogin'}
7778     my $self = shift;
7779     my ($sshcmd, $serverlogin);
7780     # If $opt::ssh is unset, use $PARALLEL_SSH or 'ssh'
7781     $opt::ssh ||= $ENV{'PARALLEL_SSH'} || "ssh";
7782     if($self->{'string'} =~ /(.+) (\S+)$/) {
7783         # Own ssh command
7784         $sshcmd = $1; $serverlogin = $2;
7785     } else {
7786         # Normal ssh
7787         if($opt::controlmaster) {
7788             # Use control_path to make ssh faster
7789             my $control_path = $self->control_path_dir()."/ssh-%r@%h:%p";
7790             $sshcmd = $opt::ssh." -S ".$control_path;
7791             $serverlogin = $self->{'string'};
7792             if(not $self->{'control_path'}{$control_path}++) {
7793                 # Master is not running for this control_path
7794                 # Start it
7795                 my $pid = fork();
7796                 if($pid) {
7797                     $Global::sshmaster{$pid} ||= 1;
7798                 } else {
7799                     $SIG{'TERM'} = undef;
7800                     # Ignore the 'foo' being printed
7801                     open(STDOUT,">","/dev/null");
7802                     # STDERR >/dev/null to ignore
7803                     open(STDERR,">","/dev/null");
7804                     open(STDIN,"<","/dev/null");
7805                     # Run a sleep that outputs data, so it will discover
7806                     # if the ssh connection closes.
7807                     my $sleep = ::Q('$|=1;while(1){sleep 1;print "foo\n"}');
7808                     my @master = ($opt::ssh, "-MTS",
7809                                   $control_path, $serverlogin, "--", "perl", "-e",
7810                                   $sleep);
7811                     exec(@master);
7812                 }
7813             }
7814         } else {
7815             $sshcmd = $opt::ssh; $serverlogin = $self->{'string'};
7816         }
7817     }
7818
7819     if($serverlogin =~ s/(\S+)\@(\S+)/$2/) {
7820         # convert user@server to '-l user server'
7821         # because lsh does not support user@server
7822         $sshcmd = $sshcmd." -l ".$1;
7823     }
7824
7825     $self->{'sshcommand'} = $sshcmd;
7826     $self->{'serverlogin'} = $serverlogin;
7827 }
7828
7829 sub control_path_dir($) {
7830     # Returns:
7831     #   $control_path_dir = dir of control path (for -M)
7832     my $self = shift;
7833     if(not defined $self->{'control_path_dir'}) {
7834         $self->{'control_path_dir'} =
7835             # Use $ENV{'TMPDIR'} as that is typically not
7836             # NFS mounted
7837             File::Temp::tempdir($ENV{'TMPDIR'}
7838                                 . "/control_path_dir-XXXX",
7839                                 CLEANUP => 1);
7840     }
7841     return $self->{'control_path_dir'};
7842 }
7843
7844 sub rsync_transfer_cmd($) {
7845     # Command to run to transfer a file
7846     # Input:
7847     #   $file = filename of file to transfer
7848     #   $workdir = destination dir
7849     # Returns:
7850     #   $cmd = rsync command to run to transfer $file ("" if unreadable)
7851     my $self = shift;
7852     my $file = shift;
7853     my $workdir = shift;
7854     if(not -r $file) {
7855         ::warning($file. " is not readable and will not be transferred.");
7856         return "true";
7857     }
7858     my $rsync_destdir;
7859     my $relpath = ($file !~ m:^/:) || ($file =~ m:/\./:); # Is the path relative or /./?
7860     if($relpath) {
7861         $rsync_destdir = ::shell_quote_file($workdir);
7862     } else {
7863         # rsync /foo/bar /
7864         $rsync_destdir = "/";
7865     }
7866     $file = ::shell_quote_file($file);
7867     my $sshcmd = $self->sshcommand();
7868     my $rsync_opts = $ENV{'PARALLEL_RSYNC_OPTS'}.
7869         " -e".::Q($sshcmd);
7870     my $serverlogin = $self->serverlogin();
7871     # Make dir if it does not exist
7872     return "$sshcmd $serverlogin -- mkdir -p $rsync_destdir && " .
7873         rsync()." $rsync_opts $file $serverlogin:$rsync_destdir";
7874 }
7875
7876 sub cleanup_cmd($$$) {
7877     # Command to run to remove the remote file
7878     # Input:
7879     #   $file = filename to remove
7880     #   $workdir = destination dir
7881     # Returns:
7882     #   $cmd = ssh command to run to remove $file and empty parent dirs
7883     my $self = shift;
7884     my $file = shift;
7885     my $workdir = shift;
7886     my $f = $file;
7887     if($f =~ m:/\./:) {
7888         # foo/bar/./baz/quux => workdir/baz/quux
7889         # /foo/bar/./baz/quux => workdir/baz/quux
7890         $f =~ s:.*/\./:$workdir/:;
7891     } elsif($f =~ m:^[^/]:) {
7892         # foo/bar => workdir/foo/bar
7893         $f = $workdir."/".$f;
7894     }
7895     my @subdirs = split m:/:, ::dirname($f);
7896     my @rmdir;
7897     my $dir = "";
7898     for(@subdirs) {
7899         $dir .= $_."/";
7900         unshift @rmdir, ::shell_quote_file($dir);
7901     }
7902     my $rmdir = @rmdir ? "sh -c ".::Q("rmdir @rmdir 2>/dev/null;") : "";
7903     if(defined $opt::workdir and $opt::workdir eq "...") {
7904         $rmdir .= ::Q("rm -rf " . ::shell_quote_file($workdir).';');
7905     }
7906
7907     $f = ::shell_quote_file($f);
7908     my $sshcmd = $self->sshcommand();
7909     my $serverlogin = $self->serverlogin();
7910     return "$sshcmd $serverlogin -- ".::Q("rm -f $f; $rmdir");
7911 }
7912
7913 {
7914     my $rsync;
7915
7916     sub rsync {
7917         # rsync 3.1.x uses protocol 31 which is unsupported by 2.5.7.
7918         # If the version >= 3.1.0: downgrade to protocol 30
7919         # Returns:
7920         #   $rsync = "rsync" or "rsync --protocol 30"
7921         if(not $rsync) {
7922             my @out = `rsync --version`;
7923             for (@out) {
7924                 # rsync  version 3.1.3  protocol version 31
7925                 # rsync  version v3.2.3  protocol version 31
7926                 if(/version v?(\d+.\d+)(.\d+)?/) {
7927                     if($1 >= 3.1) {
7928                         # Version 3.1.0 or later: Downgrade to protocol 30
7929                         $rsync = "rsync --protocol 30";
7930                     } else {
7931                         $rsync = "rsync";
7932                     }
7933                 }
7934             }
7935             $rsync or ::die_bug("Cannot figure out version of rsync: @out");
7936         }
7937         return $rsync;
7938     }
7939 }
7940
7941
7942 package JobQueue;
7943
7944 sub new($) {
7945     my $class = shift;
7946     my $commandref = shift;
7947     my $read_from = shift;
7948     my $context_replace = shift;
7949     my $max_number_of_args = shift;
7950     my $transfer_files = shift;
7951     my $return_files = shift;
7952     my $commandlinequeue = CommandLineQueue->new
7953         ($commandref, $read_from, $context_replace, $max_number_of_args,
7954          $transfer_files, $return_files);
7955     my @unget = ();
7956     return bless {
7957         'unget' => \@unget,
7958         'commandlinequeue' => $commandlinequeue,
7959         'this_job_no' => 0,
7960         'total_jobs' => undef,
7961     }, ref($class) || $class;
7962 }
7963
7964 sub get($) {
7965     my $self = shift;
7966
7967     $self->{'this_job_no'}++;
7968     if(@{$self->{'unget'}}) {
7969         return shift @{$self->{'unget'}};
7970     } else {
7971         my $commandline = $self->{'commandlinequeue'}->get();
7972         if(defined $commandline) {
7973             return Job->new($commandline);
7974         } else {
7975             $self->{'this_job_no'}--;
7976             return undef;
7977         }
7978     }
7979 }
7980
7981 sub unget($) {
7982     my $self = shift;
7983     unshift @{$self->{'unget'}}, @_;
7984     $self->{'this_job_no'} -= @_;
7985 }
7986
7987 sub empty($) {
7988     my $self = shift;
7989     my $empty = (not @{$self->{'unget'}}) &&
7990         $self->{'commandlinequeue'}->empty();
7991     ::debug("run", "JobQueue->empty $empty ");
7992     return $empty;
7993 }
7994
7995 sub total_jobs($) {
7996     my $self = shift;
7997     if(not defined $self->{'total_jobs'}) {
7998         if($opt::pipe and not $opt::tee) {
7999             ::error("--pipe is incompatible with --eta/--bar/--shuf");
8000             ::wait_and_exit(255);
8001         }
8002         if($opt::sqlworker) {
8003             $self->{'total_jobs'} = $Global::sql->total_jobs();
8004         } else {
8005             my $record;
8006             my @arg_records;
8007             my $record_queue = $self->{'commandlinequeue'}{'arg_queue'};
8008             my $start = time;
8009             while($record = $record_queue->get()) {
8010                 push @arg_records, $record;
8011                 if(time - $start > 10) {
8012                     ::warning("Reading ".scalar(@arg_records).
8013                               " arguments took longer than 10 seconds.");
8014                     $opt::eta && ::warning("Consider removing --eta.");
8015                     $opt::bar && ::warning("Consider removing --bar.");
8016                     $opt::shuf && ::warning("Consider removing --shuf.");
8017                     last;
8018                 }
8019             }
8020             while($record = $record_queue->get()) {
8021                 push @arg_records, $record;
8022             }
8023             if($opt::shuf and @arg_records) {
8024                 my $i = @arg_records;
8025                 while (--$i) {
8026                     my $j = int rand($i+1);
8027                     @arg_records[$i,$j] = @arg_records[$j,$i];
8028                 }
8029             }
8030             $record_queue->unget(@arg_records);
8031             # $#arg_records = number of args - 1
8032             # We have read one @arg_record for this job (so add 1 more)
8033             my $num_args = $#arg_records + 2;
8034             # This jobs is not started so -1
8035             my $started_jobs = $self->{'this_job_no'} - 1;
8036             my $max_args = ::max($Global::max_number_of_args,1);
8037             $self->{'total_jobs'} = ::ceil($num_args / $max_args)
8038                 + $started_jobs;
8039             ::debug("init","Total jobs: ".$self->{'total_jobs'}.
8040                     " ($num_args/$max_args + $started_jobs)\n");
8041         }
8042     }
8043     return $self->{'total_jobs'};
8044 }
8045
8046 sub flush_total_jobs($) {
8047     # Unset total_jobs to force recomputing
8048     my $self = shift;
8049     ::debug("init","flush Total jobs: ");
8050     $self->{'total_jobs'} = undef;
8051 }
8052
8053 sub next_seq($) {
8054     my $self = shift;
8055
8056     return $self->{'commandlinequeue'}->seq();
8057 }
8058
8059 sub quote_args($) {
8060     my $self = shift;
8061     return $self->{'commandlinequeue'}->quote_args();
8062 }
8063
8064
8065 package Job;
8066
8067 sub new($) {
8068     my $class = shift;
8069     my $commandlineref = shift;
8070     return bless {
8071         'commandline' => $commandlineref, # CommandLine object
8072         'workdir' => undef, # --workdir
8073         # filehandle for stdin (used for --pipe)
8074         # filename for writing stdout to (used for --files)
8075         # remaining data not sent to stdin (used for --pipe)
8076         # tmpfiles to cleanup when job is done
8077         'unlink' => [],
8078         # amount of data sent via stdin (used for --pipe)
8079         'transfersize' => 0, # size of files using --transfer
8080         'returnsize' => 0, # size of files using --return
8081         'pid' => undef,
8082         # hash of { SSHLogins => number of times the command failed there }
8083         'failed' => undef,
8084         'sshlogin' => undef,
8085         # The commandline wrapped with rsync and ssh
8086         'sshlogin_wrap' => undef,
8087         'exitstatus' => undef,
8088         'exitsignal' => undef,
8089         # Timestamp for timeout if any
8090         'timeout' => undef,
8091         'virgin' => 1,
8092         # Output used for SQL and CSV-output
8093         'output' => { 1 => [], 2 => [] },
8094         'halfline' => { 1 => [], 2 => [] },
8095     }, ref($class) || $class;
8096 }
8097
8098 sub replaced($) {
8099     my $self = shift;
8100     $self->{'commandline'} or ::die_bug("commandline empty");
8101     return $self->{'commandline'}->replaced();
8102 }
8103
8104 sub seq($) {
8105     my $self = shift;
8106     return $self->{'commandline'}->seq();
8107 }
8108
8109 sub set_seq($$) {
8110     my $self = shift;
8111     return $self->{'commandline'}->set_seq(shift);
8112 }
8113
8114 sub slot($) {
8115     my $self = shift;
8116     return $self->{'commandline'}->slot();
8117 }
8118
8119 sub free_slot($) {
8120     my $self = shift;
8121     push @Global::slots, $self->slot();
8122 }
8123
8124 {
8125     my($cattail);
8126
8127     sub cattail() {
8128         # Returns:
8129         #   $cattail = perl program for:
8130         #     cattail "decompress program" writerpid [file_to_decompress or stdin] [file_to_unlink]
8131         if(not $cattail) {
8132             $cattail = q{
8133                 # cat followed by tail (possibly with rm as soon at the file is opened)
8134                 # If $writerpid dead: finish after this round
8135                 use Fcntl;
8136                 $|=1;
8137
8138                 my ($comfile, $cmd, $writerpid, $read_file, $unlink_file) = @ARGV;
8139                 if($read_file) {
8140                     open(IN,"<",$read_file) || die("cattail: Cannot open $read_file");
8141                 } else {
8142                     *IN = *STDIN;
8143                 }
8144                 while(! -s $comfile) {
8145                     # Writer has not opened the buffer file, so we cannot remove it yet
8146                     $sleep = ($sleep < 30) ? ($sleep * 1.001 + 0.01) : ($sleep);
8147                     usleep($sleep);
8148                 }
8149                 # The writer and we have both opened the file, so it is safe to unlink it
8150                 unlink $unlink_file;
8151                 unlink $comfile;
8152
8153                 my $first_round = 1;
8154                 my $flags;
8155                 fcntl(IN, F_GETFL, $flags) || die $!; # Get the current flags on the filehandle
8156                 $flags |= O_NONBLOCK; # Add non-blocking to the flags
8157                 fcntl(IN, F_SETFL, $flags) || die $!; # Set the flags on the filehandle
8158
8159                 while(1) {
8160                     # clear EOF
8161                     seek(IN,0,1);
8162                     my $writer_running = kill 0, $writerpid;
8163                     $read = sysread(IN,$buf,131072);
8164                     if($read) {
8165                         if($first_round) {
8166                             # Only start the command if there any input to process
8167                             $first_round = 0;
8168                             open(OUT,"|-",$cmd) || die("cattail: Cannot run $cmd");
8169                         }
8170
8171                         # Blocking print
8172                         while($buf) {
8173                             my $bytes_written = syswrite(OUT,$buf);
8174                             # syswrite may be interrupted by SIGHUP
8175                             substr($buf,0,$bytes_written) = "";
8176                         }
8177                         # Something printed: Wait less next time
8178                         $sleep /= 2;
8179                     } else {
8180                         if(eof(IN) and not $writer_running) {
8181                             # Writer dead: There will never be sent more to the decompressor
8182                             close OUT;
8183                             exit;
8184                         }
8185                         # TODO This could probably be done more efficiently using select(2)
8186                         # Nothing read: Wait longer before next read
8187                         # Up to 100 milliseconds
8188                         $sleep = ($sleep < 100) ? ($sleep * 1.001 + 0.01) : ($sleep);
8189                         usleep($sleep);
8190                     }
8191                 }
8192
8193                 sub usleep {
8194                     # Sleep this many milliseconds.
8195                     my $secs = shift;
8196                     select(undef, undef, undef, $secs/1000);
8197                 }
8198             };
8199             $cattail =~ s/#.*//mg;
8200             $cattail =~ s/\s+/ /g;
8201         }
8202         return $cattail;
8203     }
8204 }
8205
8206 sub openoutputfiles($) {
8207     # Open files for STDOUT and STDERR
8208     # Set file handles in $self->fh
8209     my $self = shift;
8210     my ($outfhw, $errfhw, $outname, $errname);
8211
8212     if($opt::linebuffer and not
8213        ($opt::keeporder or $opt::files or $opt::results or
8214         $opt::compress or $opt::compress_program or
8215         $opt::decompress_program)) {
8216         # Do not save to files: Use non-blocking pipe
8217         my ($outfhr, $errfhr);
8218         pipe($outfhr, $outfhw) || die;
8219         pipe($errfhr, $errfhw) || die;
8220         $self->set_fh(1,'w',$outfhw);
8221         $self->set_fh(2,'w',$errfhw);
8222         $self->set_fh(1,'r',$outfhr);
8223         $self->set_fh(2,'r',$errfhr);
8224         # Make it possible to read non-blocking from the pipe
8225         for my $fdno (1,2) {
8226             ::set_fh_non_blocking($self->fh($fdno,'r'));
8227         }
8228         # Return immediately because we do not need setting filenames
8229         return;
8230     } elsif($opt::results and not $Global::csvsep) {
8231         my $out = $self->{'commandline'}->results_out();
8232         my $seqname;
8233         if($out eq $opt::results or $out =~ m:/$:) {
8234             # $opt::results = simple string or ending in /
8235             # => $out is a dir/
8236             # prefix/name1/val1/name2/val2/seq
8237             $seqname = $out."seq";
8238             # prefix/name1/val1/name2/val2/stdout
8239             $outname = $out."stdout";
8240             # prefix/name1/val1/name2/val2/stderr
8241             $errname = $out."stderr";
8242         } else {
8243             # $opt::results = replacement string not ending in /
8244             # => $out is a file
8245             $outname = $out;
8246             $errname = "$out.err";
8247             $seqname = "$out.seq";
8248         }
8249         my $seqfhw;
8250         if(not open($seqfhw, "+>", $seqname)) {
8251             ::error("Cannot write to `$seqname'.");
8252             ::wait_and_exit(255);
8253         }
8254         print $seqfhw $self->seq();
8255         close $seqfhw;
8256         if(not open($outfhw, "+>", $outname)) {
8257             ::error("Cannot write to `$outname'.");
8258             ::wait_and_exit(255);
8259         }
8260         if(not open($errfhw, "+>", $errname)) {
8261             ::error("Cannot write to `$errname'.");
8262             ::wait_and_exit(255);
8263         }
8264         $self->set_fh(1,"unlink","");
8265         $self->set_fh(2,"unlink","");
8266         if($opt::sqlworker) {
8267             # Save the filenames in SQL table
8268             $Global::sql->update("SET Stdout = ?, Stderr = ? ".
8269                                  "WHERE Seq = ". $self->seq(),
8270                                  $outname, $errname);
8271         }
8272     } elsif(not $opt::ungroup) {
8273         # To group we create temporary files for STDOUT and STDERR
8274         # To avoid the cleanup unlink the files immediately (but keep them open)
8275         if($opt::files) {
8276             ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par");
8277             ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par");
8278             # --files => only remove stderr
8279             $self->set_fh(1,"unlink","");
8280             $self->set_fh(2,"unlink",$errname);
8281         } else {
8282             ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par");
8283             ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par");
8284             $self->set_fh(1,"unlink",$outname);
8285             $self->set_fh(2,"unlink",$errname);
8286         }
8287     } else {
8288         # --ungroup
8289         open($outfhw,">&",$Global::fd{1}) || die;
8290         open($errfhw,">&",$Global::fd{2}) || die;
8291         # File name must be empty as it will otherwise be printed
8292         $outname = "";
8293         $errname = "";
8294         $self->set_fh(1,"unlink",$outname);
8295         $self->set_fh(2,"unlink",$errname);
8296     }
8297     # Set writing FD
8298     $self->set_fh(1,'w',$outfhw);
8299     $self->set_fh(2,'w',$errfhw);
8300     $self->set_fh(1,'name',$outname);
8301     $self->set_fh(2,'name',$errname);
8302     if($opt::compress) {
8303         $self->filter_through_compress();
8304     } elsif(not $opt::ungroup) {
8305         $self->grouped();
8306     }
8307     if($opt::linebuffer) {
8308         # Make it possible to read non-blocking from
8309         # the buffer files
8310         # Used for --linebuffer with -k, --files, --res, --compress*
8311         for my $fdno (1,2) {
8312             ::set_fh_non_blocking($self->fh($fdno,'r'));
8313         }
8314     }
8315 }
8316
8317 sub print_verbose_dryrun($) {
8318     # If -v set: print command to stdout (possibly buffered)
8319     # This must be done before starting the command
8320     my $self = shift;
8321     if($Global::verbose or $opt::dryrun) {
8322         my $fh = $self->fh(1,"w");
8323         if($Global::verbose <= 1) {
8324             print $fh $self->replaced(),"\n";
8325         } else {
8326             # Verbose level > 1: Print the rsync and stuff
8327             print $fh $self->wrapped(),"\n";
8328         }
8329     }
8330     if($opt::sqlworker) {
8331         $Global::sql->update("SET Command = ? WHERE Seq = ".$self->seq(),
8332                              $self->replaced());
8333     }
8334 }
8335
8336 sub add_rm($) {
8337     # Files to remove when job is done
8338     my $self = shift;
8339     push @{$self->{'unlink'}}, @_;
8340 }
8341
8342 sub get_rm($) {
8343     # Files to remove when job is done
8344     my $self = shift;
8345     return @{$self->{'unlink'}};
8346 }
8347
8348 sub cleanup($) {
8349     # Remove files when job is done
8350     my $self = shift;
8351     unlink $self->get_rm();
8352     delete @Global::unlink{$self->get_rm()};
8353 }
8354
8355 sub grouped($) {
8356     my $self = shift;
8357     # Set reading FD if using --group (--ungroup does not need)
8358     for my $fdno (1,2) {
8359         # Re-open the file for reading
8360         # so fdw can be closed seperately
8361         # and fdr can be seeked seperately (for --line-buffer)
8362         open(my $fdr,"<", $self->fh($fdno,'name')) ||
8363             ::die_bug("fdr: Cannot open ".$self->fh($fdno,'name'));
8364         $self->set_fh($fdno,'r',$fdr);
8365         # Unlink if not debugging
8366         $Global::debug or ::rm($self->fh($fdno,"unlink"));
8367     }
8368 }
8369
8370 sub empty_input_wrapper($) {
8371     # If no input: exit(0)
8372     # If some input: Pass input as input to command on STDIN
8373     # This avoids starting the command if there is no input.
8374     # Input:
8375     #   $command = command to pipe data to
8376     # Returns:
8377     #   $wrapped_command = the wrapped command
8378     my $command = shift;
8379     my $script =
8380         ::spacefree(0,q{
8381             if(sysread(STDIN, $buf, 1)) {
8382                 open($fh, "|-", @ARGV) || die;
8383                 syswrite($fh, $buf);
8384                 # Align up to 128k block
8385                 if($read = sysread(STDIN, $buf, 131071)) {
8386                     syswrite($fh, $buf);
8387                 }
8388                 while($read = sysread(STDIN, $buf, 131072)) {
8389                     syswrite($fh, $buf);
8390                 }
8391                 close $fh;
8392                 exit ($?&127 ? 128+($?&127) : 1+$?>>8)
8393             }
8394                   });
8395     ::debug("run",'Empty wrap: perl -e '.::Q($script)."\n");
8396     if($Global::cshell
8397        and
8398        length $command > 499) {
8399         # csh does not like words longer than 1000 (499 quoted)
8400         # $command = "perl -e '".base64_zip_eval()."' ".
8401         # join" ",string_zip_base64(
8402         #         'exec "'.::perl_quote_scalar($command).'"');
8403         return 'perl -e '.::Q($script)." ".
8404             base64_wrap("exec \"$Global::shell\",'-c',\"".
8405                                    ::perl_quote_scalar($command).'"');
8406     } else {
8407         return 'perl -e '.::Q($script)." ".
8408             $Global::shell." -c ".::Q($command);
8409     }
8410 }
8411
8412 sub filter_through_compress($) {
8413     my $self = shift;
8414     # Send stdout to stdin for $opt::compress_program(1)
8415     # Send stderr to stdin for $opt::compress_program(2)
8416     # cattail get pid:  $pid = $self->fh($fdno,'rpid');
8417     my $cattail = cattail();
8418
8419     for my $fdno (1,2) {
8420         # Make a communication file.
8421         my ($fh, $comfile) = ::tmpfile(SUFFIX => ".pac");
8422         close $fh;
8423         # Compressor: (echo > $comfile; compress pipe) > output
8424         # When the echo is written to $comfile,
8425         # it is known that output file is opened,
8426         # thus output file can then be removed by the decompressor.
8427         my $wpid = open(my $fdw,"|-", "(echo > $comfile; ".
8428                         empty_input_wrapper($opt::compress_program).") >".
8429                         $self->fh($fdno,'name')) || die $?;
8430         $self->set_fh($fdno,'w',$fdw);
8431         $self->set_fh($fdno,'wpid',$wpid);
8432         # Decompressor: open output; -s $comfile > 0: rm $comfile output;
8433         #               decompress output > stdout
8434         my $rpid = open(my $fdr, "-|", "perl", "-e", $cattail, $comfile,
8435                         $opt::decompress_program, $wpid,
8436                         $self->fh($fdno,'name'),$self->fh($fdno,'unlink'))
8437             || die $?;
8438         $self->set_fh($fdno,'r',$fdr);
8439         $self->set_fh($fdno,'rpid',$rpid);
8440     }
8441 }
8442
8443
8444
8445 sub set_fh($$$$) {
8446     # Set file handle
8447     my ($self, $fd_no, $key, $fh) = @_;
8448     $self->{'fd'}{$fd_no,$key} = $fh;
8449 }
8450
8451 sub fh($) {
8452     # Get file handle
8453     my ($self, $fd_no, $key) = @_;
8454     return $self->{'fd'}{$fd_no,$key};
8455 }
8456
8457 sub write($) {
8458     my $self = shift;
8459     my $remaining_ref = shift;
8460     my $stdin_fh = $self->fh(0,"w");
8461
8462     my $len = length $$remaining_ref;
8463     # syswrite may not write all in one go,
8464     # so make sure everything is written.
8465     my $written;
8466
8467     # If writing is to a closed pipe:
8468     #   Do not call signal handler, but let nothing be written
8469     local $SIG{PIPE} = undef;
8470     while($written = syswrite($stdin_fh,$$remaining_ref)){
8471         substr($$remaining_ref,0,$written) = "";
8472     }
8473 }
8474
8475 sub set_block($$$$$$) {
8476     # Copy stdin buffer from $block_ref up to $endpos
8477     # Prepend with $header_ref if virgin (i.e. not --roundrobin)
8478     # Remove $recstart and $recend if needed
8479     # Input:
8480     #   $header_ref = ref to $header to prepend
8481     #   $buffer_ref = ref to $buffer containing the block
8482     #   $endpos = length of $block to pass on
8483     #   $recstart = --recstart regexp
8484     #   $recend = --recend regexp
8485     # Returns:
8486     #   N/A
8487     my $self = shift;
8488     my ($header_ref,$buffer_ref,$endpos,$recstart,$recend) = @_;
8489     $self->{'block'} = ($self->virgin() ? $$header_ref : "").
8490         substr($$buffer_ref,0,$endpos);
8491     if($opt::remove_rec_sep) {
8492         remove_rec_sep(\$self->{'block'},$recstart,$recend);
8493     }
8494     $self->{'block_length'} = length $self->{'block'};
8495     $self->{'block_pos'} = 0;
8496     $self->add_transfersize($self->{'block_length'});
8497 }
8498
8499 sub block_ref($) {
8500     my $self = shift;
8501     return \$self->{'block'};
8502 }
8503
8504
8505 sub block_length($) {
8506     my $self = shift;
8507     return $self->{'block_length'};
8508 }
8509
8510 sub remove_rec_sep($) {
8511     # Remove --recstart and --recend from $block
8512     # Input:
8513     #   $block_ref = reference to $block to be modified
8514     #   $recstart = --recstart
8515     #   $recend = --recend
8516     # Uses:
8517     #   $opt::regexp = Are --recstart/--recend regexp?
8518     # Returns:
8519     #   N/A
8520     my ($block_ref,$recstart,$recend) = @_;
8521     # Remove record separator
8522     if($opt::regexp) {
8523         $$block_ref =~ s/$recend$recstart//gos;
8524         $$block_ref =~ s/^$recstart//os;
8525         $$block_ref =~ s/$recend$//os;
8526     } else {
8527         $$block_ref =~ s/\Q$recend$recstart\E//gos;
8528         $$block_ref =~ s/^\Q$recstart\E//os;
8529         $$block_ref =~ s/\Q$recend\E$//os;
8530     }
8531 }
8532
8533 sub non_blocking_write($) {
8534     my $self = shift;
8535     my $something_written = 0;
8536
8537     my $in = $self->fh(0,"w");
8538     my $rv = syswrite($in,
8539                       substr($self->{'block'},$self->{'block_pos'}));
8540     if (!defined($rv) && $! == ::EAGAIN()) {
8541         # would block - but would have written
8542         $something_written = 0;
8543         # avoid triggering auto expanding block size
8544         $Global::no_autoexpand_block ||= 1;
8545     } elsif ($self->{'block_pos'}+$rv != $self->{'block_length'}) {
8546         # incomplete write
8547         # Remove the written part
8548         $self->{'block_pos'} += $rv;
8549         $something_written = $rv;
8550     } else {
8551         # successfully wrote everything
8552         # Empty block to free memory
8553         my $a = "";
8554         $self->set_block(\$a,\$a,0,"","");
8555         $something_written = $rv;
8556     }
8557     ::debug("pipe", "Non-block: ", $something_written);
8558     return $something_written;
8559 }
8560
8561
8562 sub virgin($) {
8563     my $self = shift;
8564     return $self->{'virgin'};
8565 }
8566
8567 sub set_virgin($$) {
8568     my $self = shift;
8569     $self->{'virgin'} = shift;
8570 }
8571
8572 sub pid($) {
8573     my $self = shift;
8574     return $self->{'pid'};
8575 }
8576
8577 sub set_pid($$) {
8578     my $self = shift;
8579     $self->{'pid'} = shift;
8580 }
8581
8582 sub starttime($) {
8583     # Returns:
8584     #   UNIX-timestamp this job started
8585     my $self = shift;
8586     return sprintf("%.3f",$self->{'starttime'});
8587 }
8588
8589 sub set_starttime($@) {
8590     my $self = shift;
8591     my $starttime = shift || ::now();
8592     $self->{'starttime'} = $starttime;
8593     $opt::sqlworker and
8594         $Global::sql->update("SET Starttime = ? WHERE Seq = ".$self->seq(),
8595                              $starttime);
8596 }
8597
8598 sub runtime($) {
8599     # Returns:
8600     #   Run time in seconds with 3 decimals
8601     my $self = shift;
8602     return sprintf("%.3f",
8603                    int(($self->endtime() - $self->starttime())*1000)/1000);
8604 }
8605
8606 sub endtime($) {
8607     # Returns:
8608     #   UNIX-timestamp this job ended
8609     #   0 if not ended yet
8610     my $self = shift;
8611     return ($self->{'endtime'} || 0);
8612 }
8613
8614 sub set_endtime($$) {
8615     my $self = shift;
8616     my $endtime = shift;
8617     $self->{'endtime'} = $endtime;
8618     $opt::sqlworker and
8619         $Global::sql->update("SET JobRuntime = ? WHERE Seq = ".$self->seq(),
8620                              $self->runtime());
8621 }
8622
8623 sub is_timedout($) {
8624     # Is the job timedout?
8625     # Input:
8626     #   $delta_time = time that the job may run
8627     # Returns:
8628     #   True or false
8629     my $self = shift;
8630     my $delta_time = shift;
8631     return time > $self->{'starttime'} + $delta_time;
8632 }
8633
8634 sub kill($) {
8635     my $self = shift;
8636     $self->set_exitstatus(-1);
8637     ::kill_sleep_seq($self->pid());
8638 }
8639
8640 sub suspend($) {
8641     my $self = shift;
8642     my @pgrps = map { -$_ } $self->pid();
8643     kill "STOP", @pgrps;
8644     $self->set_suspended(1);
8645     # push job onto start stack
8646     $Global::JobQueue->unget($self);
8647 }
8648
8649 sub set_suspended($$) {
8650     my $self = shift;
8651     $self->{'suspended'} = shift;
8652 }
8653
8654 sub suspended($) {
8655     my $self = shift;
8656     return $self->{'suspended'};
8657 }
8658
8659 sub resume($) {
8660     my $self = shift;
8661     my @pgrps = map { -$_ } $self->pid();
8662     kill "CONT", @pgrps;
8663     $self->set_suspended(0);
8664 }
8665
8666 sub failed($) {
8667     # return number of times failed for this $sshlogin
8668     # Input:
8669     #   $sshlogin
8670     # Returns:
8671     #   Number of times failed for $sshlogin
8672     my $self = shift;
8673     my $sshlogin = shift;
8674     return $self->{'failed'}{$sshlogin};
8675 }
8676
8677 sub failed_here($) {
8678     # return number of times failed for the current $sshlogin
8679     # Returns:
8680     #   Number of times failed for this sshlogin
8681     my $self = shift;
8682     return $self->{'failed'}{$self->sshlogin()};
8683 }
8684
8685 sub add_failed($) {
8686     # increase the number of times failed for this $sshlogin
8687     my $self = shift;
8688     my $sshlogin = shift;
8689     $self->{'failed'}{$sshlogin}++;
8690 }
8691
8692 sub add_failed_here($) {
8693     # increase the number of times failed for the current $sshlogin
8694     my $self = shift;
8695     $self->{'failed'}{$self->sshlogin()}++;
8696 }
8697
8698 sub reset_failed($) {
8699     # increase the number of times failed for this $sshlogin
8700     my $self = shift;
8701     my $sshlogin = shift;
8702     delete $self->{'failed'}{$sshlogin};
8703 }
8704
8705 sub reset_failed_here($) {
8706     # increase the number of times failed for this $sshlogin
8707     my $self = shift;
8708     delete $self->{'failed'}{$self->sshlogin()};
8709 }
8710
8711 sub min_failed($) {
8712     # Returns:
8713     #   the number of sshlogins this command has failed on
8714     #   the minimal number of times this command has failed
8715     my $self = shift;
8716     my $min_failures =
8717         ::min(map { $self->{'failed'}{$_} } keys %{$self->{'failed'}});
8718     my $number_of_sshlogins_failed_on = scalar keys %{$self->{'failed'}};
8719     return ($number_of_sshlogins_failed_on,$min_failures);
8720 }
8721
8722 sub total_failed($) {
8723     # Returns:
8724     #   $total_failures = the number of times this command has failed
8725     my $self = shift;
8726     my $total_failures = 0;
8727     for (values %{$self->{'failed'}}) {
8728         $total_failures += $_;
8729     }
8730     return $total_failures;
8731 }
8732
8733 {
8734     my $script;
8735
8736     sub postpone_exit_and_cleanup {
8737         # Command to remove files and dirs (given as args) without
8738         # affecting the exit value in $?/$status.
8739         if(not $script) {
8740             $script = "perl -e '".
8741                 ::spacefree(0,q{
8742                     $bash=shift;
8743                     $csh=shift;
8744                     for(@ARGV){
8745                         unlink;
8746                         rmdir;
8747                     }
8748                     if($bash=~s/(\d+)h/$1/) {
8749                         exit $bash;
8750                     }
8751                     exit $csh;
8752                             }).
8753                 # `echo \$?h` is needed to make fish not complain
8754                 "' ".'"`echo \\\\\\\\\$?h`" "$status" ';
8755         }
8756         return $script
8757     }
8758 }
8759
8760 {
8761     my $script;
8762
8763     sub fifo_wrap() {
8764         # Script to create a fifo, run a command on the fifo
8765         # while copying STDIN to the fifo, and finally
8766         # remove the fifo and return the exit code of the command.
8767         if(not $script) {
8768             # {} == $PARALLEL_TMP for --fifo
8769             # To make it csh compatible a wrapper needs to:
8770             # * mkfifo
8771             # * spawn $command &
8772             # * cat > fifo
8773             # * waitpid to get the exit code from $command
8774             # * be less than 1000 chars long
8775             $script = "perl -e '".
8776                 (::spacefree
8777                  (0, q{
8778                      ($s,$c,$f) = @ARGV;
8779                      # mkfifo $PARALLEL_TMP
8780                      system "mkfifo", $f;
8781                      # spawn $shell -c $command &
8782                      $pid = fork || exec $s, "-c", $c;
8783                      open($o,">",$f) || die $!;
8784                      # cat > $PARALLEL_TMP
8785                      while(sysread(STDIN,$buf,131072)){
8786                          syswrite $o, $buf;
8787                      }
8788                      close $o;
8789                      # waitpid to get the exit code from $command
8790                      waitpid $pid,0;
8791                      # Cleanup
8792                      unlink $f;
8793                      exit $?/256;
8794                   }))."'";
8795         }
8796         return $script;
8797     }
8798 }
8799
8800 sub wrapped($) {
8801     # Wrap command with:
8802     # * --shellquote
8803     # * --nice
8804     # * --cat
8805     # * --fifo
8806     # * --sshlogin
8807     # * --pipepart (@Global::cat_prepends)
8808     # * --tee (@Global::cat_prepends)
8809     # * --pipe
8810     # * --tmux
8811     # The ordering of the wrapping is important:
8812     # * --nice/--cat/--fifo should be done on the remote machine
8813     # * --pipepart/--pipe should be done on the local machine inside --tmux
8814     # Uses:
8815     #   @opt::shellquote
8816     #   $opt::nice
8817     #   $Global::shell
8818     #   $opt::cat
8819     #   $opt::fifo
8820     #   @Global::cat_prepends
8821     #   $opt::pipe
8822     #   $opt::tmux
8823     # Returns:
8824     #   $self->{'wrapped'} = the command wrapped with the above
8825     my $self = shift;
8826     if(not defined $self->{'wrapped'}) {
8827         my $command = $self->replaced();
8828         # Bug in Bash and Ksh when running multiline aliases
8829         # This will force them to run correctly, but will fail in
8830         # tcsh so we do not do it.
8831         # $command .= "\n\n";
8832         if(@opt::shellquote) {
8833             # Quote one time for each --shellquote
8834             my $c = $command;
8835             for(@opt::shellquote) {
8836                 $c = ::Q($c);
8837             }
8838             # Prepend "echo" (it is written in perl because
8839             # quoting '-e' causes problem in some versions and
8840             # csh's version does something wrong)
8841             $command = q(perl -e '$,=" "; print "@ARGV\n";' -- ) . ::Q($c);
8842         }
8843         if($Global::parallel_env) {
8844             # If $PARALLEL_ENV set, put that in front of the command
8845             # Used for env_parallel.*
8846             if($Global::shell =~ /zsh/) {
8847                 # The extra 'eval' will make aliases work, too
8848                 $command = $Global::parallel_env."\n".
8849                     "eval ".::Q($command);
8850             } else {
8851                 $command = $Global::parallel_env."\n".$command;
8852             }
8853         }
8854         if($opt::cat) {
8855             # In '--cat' and '--fifo' {} == $PARALLEL_TMP.
8856             # This is to make it possible to compute $PARALLEL_TMP on
8857             # the fly when running remotely.
8858             # $ENV{PARALLEL_TMP} is set in the remote wrapper before
8859             # the command is run.
8860             #
8861             # Prepend 'cat > $PARALLEL_TMP;'
8862             # Append 'unlink $PARALLEL_TMP without affecting $?'
8863             $command =
8864                 'cat > $PARALLEL_TMP;'.
8865                 $command.";". postpone_exit_and_cleanup().
8866                                          '$PARALLEL_TMP';
8867         } elsif($opt::fifo) {
8868             # Prepend fifo-wrapper. In essence:
8869             #   mkfifo {}
8870             #   ( $command ) &
8871             #   # $command must read {}, otherwise this 'cat' will block
8872             #   cat > {};
8873             #   wait; rm {}
8874             # without affecting $?
8875             $command = fifo_wrap(). " ".
8876                 $Global::shell. " ". ::Q($command). ' $PARALLEL_TMP'. ';';
8877         }
8878         # Wrap with ssh + tranferring of files
8879         $command = $self->sshlogin_wrap($command);
8880         if(@Global::cat_prepends) {
8881             # --pipepart: prepend:
8882             # < /tmp/foo perl -e 'while(@ARGV) {
8883             #   sysseek(STDIN,shift,0) || die; $left = shift;
8884             #   while($read = sysread(STDIN,$buf, ($left > 131072 ? 131072 : $left))){
8885             #     $left -= $read; syswrite(STDOUT,$buf);
8886             #   }
8887             # }'  0 0 0 11 |
8888             #
8889             # --pipepart --tee: prepend:
8890             #   < dash-a-file
8891             #
8892             # --pipe --tee: wrap:
8893             #   (rm fifo; ... ) < fifo
8894             #
8895             # --pipe --shard X:
8896             #   (rm fifo; ... ) < fifo
8897             $command = (shift @Global::cat_prepends). "($command)".
8898                 (shift @Global::cat_appends);
8899         } elsif($opt::pipe and not $opt::roundrobin) {
8900             # Wrap with EOF-detector to avoid starting $command if EOF.
8901             $command = empty_input_wrapper($command);
8902         }
8903         if($opt::tmux) {
8904             # Wrap command with 'tmux'
8905             $command = $self->tmux_wrap($command);
8906         }
8907         if($Global::cshell
8908            and
8909            length $command > 499) {
8910             # csh does not like words longer than 1000 (499 quoted)
8911             # $command = "perl -e '".base64_zip_eval()."' ".
8912             # join" ",string_zip_base64(
8913             #         'exec "'.::perl_quote_scalar($command).'"');
8914             $command = base64_wrap("exec \"$Global::shell\",'-c',\"".
8915                                    ::perl_quote_scalar($command).'"');
8916         }
8917         $self->{'wrapped'} = $command;
8918     }
8919     return $self->{'wrapped'};
8920 }
8921
8922 sub set_sshlogin($$) {
8923     my $self = shift;
8924     my $sshlogin = shift;
8925     $self->{'sshlogin'} = $sshlogin;
8926     delete $self->{'sshlogin_wrap'}; # If sshlogin is changed the wrap is wrong
8927     delete $self->{'wrapped'};
8928
8929     if($opt::sqlworker) {
8930         # Identify worker as --sqlworker often runs on different machines
8931         my $host = $sshlogin->string();
8932         if($host eq ":") {
8933             $host = ::hostname();
8934         }
8935         $Global::sql->update("SET Host = ? WHERE Seq = ".$self->seq(), $host);
8936     }
8937 }
8938
8939 sub sshlogin($) {
8940     my $self = shift;
8941     return $self->{'sshlogin'};
8942 }
8943
8944 sub string_base64($) {
8945     # Base64 encode strings into 1000 byte blocks.
8946     # 1000 bytes is the largest word size csh supports
8947     # Input:
8948     #   @strings = to be encoded
8949     # Returns:
8950     #   @base64 = 1000 byte block
8951     $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;";
8952     my @base64 = unpack("(A1000)*",encode_base64((join"",@_),""));
8953     return @base64;
8954 }
8955
8956 sub string_zip_base64($) {
8957     # Pipe string through 'bzip2 -9' and base64 encode it into 1000
8958     # byte blocks.
8959     # 1000 bytes is the largest word size csh supports
8960     # Zipping will make exporting big environments work, too
8961     # Input:
8962     #   @strings = to be encoded
8963     # Returns:
8964     #   @base64 = 1000 byte block
8965     my($zipin_fh, $zipout_fh,@base64);
8966     ::open3($zipin_fh,$zipout_fh,">&STDERR","bzip2 -9");
8967     if(fork) {
8968         close $zipin_fh;
8969         $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;";
8970         # Split base64 encoded into 1000 byte blocks
8971         @base64 = unpack("(A1000)*",encode_base64((join"",<$zipout_fh>),""));
8972         close $zipout_fh;
8973     } else {
8974         close $zipout_fh;
8975         print $zipin_fh @_;
8976         close $zipin_fh;
8977         exit;
8978     }
8979     ::debug("base64","Orig:@_\nAs bzip2 base64:@base64\n");
8980     return @base64;
8981 }
8982
8983 sub base64_zip_eval() {
8984     # Script that:
8985     #   * reads base64 strings from @ARGV
8986     #   * decodes them
8987     #   * pipes through 'bzip2 -dc'
8988     #   * evals the result
8989     # Reverse of string_zip_base64 + eval
8990     # Will be wrapped in ' so single quote is forbidden
8991     # Returns:
8992     #   $script = 1-liner for perl -e
8993     my $script = ::spacefree(0,q{
8994         @GNU_Parallel = split /_/, "use_IPC::Open3;_use_MIME::Base64";
8995         eval"@GNU_Parallel";
8996         $chld = $SIG{CHLD};
8997         $SIG{CHLD} = "IGNORE";
8998         # Search for bzip2. Not found => use default path
8999         my $zip = (grep { -x $_ } "/usr/local/bin/bzip2")[0] || "bzip2";
9000         # $in = stdin on $zip, $out = stdout from $zip
9001         # Forget my() to save chars for csh
9002         # my($in, $out,$eval);
9003         open3($in,$out,">&STDERR",$zip,"-dc");
9004         if(my $perlpid = fork) {
9005             close $in;
9006             $eval = join "", <$out>;
9007             close $out;
9008         } else {
9009             close $out;
9010             # Pipe decoded base64 into 'bzip2 -dc'
9011             print $in (decode_base64(join"",@ARGV));
9012             close $in;
9013             exit;
9014         }
9015         wait;
9016         $SIG{CHLD} = $chld;
9017         eval $eval;
9018                              });
9019     ::debug("base64",$script,"\n");
9020     return $script;
9021 }
9022
9023 sub base64_wrap($) {
9024     # base64 encode Perl code
9025     # Split it into chunks of < 1000 bytes
9026     # Prepend it with a decoder that eval's it
9027     # Input:
9028     #   $eval_string = Perl code to run
9029     # Returns:
9030     #   $shell_command = shell command that runs $eval_string
9031     my $eval_string = shift;
9032     return
9033         "perl -e ".
9034         ::Q(base64_zip_eval())." ".
9035         join" ",::shell_quote(string_zip_base64($eval_string));
9036 }
9037
9038 sub base64_eval($) {
9039     # Script that:
9040     #   * reads base64 strings from @ARGV
9041     #   * decodes them
9042     #   * evals the result
9043     # Reverse of string_base64 + eval
9044     # Will be wrapped in ' so single quote is forbidden.
9045     # Spaces are stripped so spaces cannot be significant.
9046     # The funny 'use IPC::Open3'-syntax is to avoid spaces and
9047     # to make it clear that this is a GNU  Parallel command
9048     # when looking at the process table.
9049     # Returns:
9050     #   $script = 1-liner for perl -e
9051     my $script = ::spacefree(0,q{
9052         @GNU_Parallel=("use","IPC::Open3;","use","MIME::Base64");
9053         eval "@GNU_Parallel";
9054         my $eval = decode_base64(join"",@ARGV);
9055         eval $eval;
9056                              });
9057     ::debug("base64",$script,"\n");
9058     return $script;
9059 }
9060
9061 sub sshlogin_wrap($) {
9062     # Wrap the command with the commands needed to run remotely
9063     # Input:
9064     #   $command = command to run
9065     # Returns:
9066     #   $self->{'sshlogin_wrap'} = command wrapped with ssh+transfer commands
9067     sub monitor_parent_sshd_script {
9068         # This script is to solve the problem of
9069         # * not mixing STDERR and STDOUT
9070         # * terminating with ctrl-c
9071         # If its parent is ssh: all good
9072         # If its parent is init(1): ssh died, so kill children
9073         my $monitor_parent_sshd_script;
9074
9075         if(not $monitor_parent_sshd_script) {
9076             $monitor_parent_sshd_script =
9077                 # This will be packed in ', so only use "
9078                 ::spacefree(0,'$shell = "'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'.
9079                             '$tmpdir = "'.::perl_quote_scalar($ENV{'TMPDIR'}).'";'.
9080                             '$nice = '.$opt::nice.';'.
9081                             '$termseq = "'.$opt::termseq.'";'.
9082                             q{
9083                 # Set $PARALLEL_TMP to a non-existent file name in $TMPDIR
9084                 do {
9085                     $ENV{PARALLEL_TMP} = $tmpdir."/par".
9086                         join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
9087                 } while(-e $ENV{PARALLEL_TMP});
9088                 $SIG{CHLD} = sub { $done = 1; };
9089                 $pid = fork;
9090                 unless($pid) {
9091                     # Make own process group to be able to kill HUP it later
9092                     eval { setpgrp };
9093                     eval { setpriority(0,0,$nice) };
9094                     exec $shell, "-c", ($bashfunc."@ARGV");
9095                     die "exec: $!\n";
9096                 }
9097                 do {
9098                     # Parent is not init (ppid=1), so sshd is alive
9099                     # Exponential sleep up to 1 sec
9100                     $s = $s < 1 ? 0.001 + $s * 1.03 : $s;
9101                     select(undef, undef, undef, $s);
9102                 } until ($done || getppid == 1);
9103                 if(not $done) {
9104                     # Kill as per --termseq
9105                     my @term_seq = split/,/,$termseq;
9106                     if(not @term_seq) {
9107                         @term_seq = ("TERM",200,"TERM",100,"TERM",50,"KILL",25);
9108                     }
9109                     while(@term_seq && kill(0,-$pid)) {
9110                         kill(shift @term_seq, -$pid);
9111                         select(undef, undef, undef, (shift @term_seq)/1000);
9112                     }
9113                 }
9114                 wait;
9115                 exit ($?&127 ? 128+($?&127) : 1+$?>>8)
9116             });
9117         }
9118         return $monitor_parent_sshd_script;
9119     }
9120
9121     sub vars_to_export {
9122         # Uses:
9123         #   @opt::env
9124         my @vars = ("parallel_bash_environment");
9125         for my $varstring (@opt::env) {
9126             # Split up --env VAR1,VAR2
9127             push @vars, split /,/, $varstring;
9128         }
9129         for (@vars) {
9130             if(-r $_ and not -d) {
9131                 # Read as environment definition bug #44041
9132                 # TODO parse this
9133                 my $fh = ::open_or_exit($_);
9134                 $Global::envdef = join("",<$fh>);
9135                 close $fh;
9136             }
9137         }
9138         if(grep { /^_$/ } @vars) {
9139             local $/ = "\n";
9140             # --env _
9141             # Include all vars that are not in a clean environment
9142             if(open(my $vars_fh, "<", $Global::config_dir . "/ignored_vars")) {
9143                 my @ignore = <$vars_fh>;
9144                 chomp @ignore;
9145                 my %ignore;
9146                 @ignore{@ignore} = @ignore;
9147                 close $vars_fh;
9148                 push @vars, grep { not defined $ignore{$_} } keys %ENV;
9149                 @vars = grep { not /^_$/ } @vars;
9150             } else {
9151                 ::error("Run '$Global::progname --record-env' ".
9152                         "in a clean environment first.");
9153                 ::wait_and_exit(255);
9154             }
9155         }
9156         # Duplicate vars as BASH functions to include post-shellshock functions (v1+v2)
9157         # So --env myfunc should look for BASH_FUNC_myfunc() and BASH_FUNC_myfunc%%
9158
9159         push(@vars, "PARALLEL_PID", "PARALLEL_SEQ",
9160              "PARALLEL_SSHLOGIN", "PARALLEL_SSHHOST", "PARALLEL_JOBSLOT",
9161              map { ("BASH_FUNC_$_()", "BASH_FUNC_$_%%") } @vars);
9162         # Keep only defined variables
9163         return grep { defined($ENV{$_}) } @vars;
9164     }
9165
9166     sub env_as_eval {
9167         # Returns:
9168         #   $eval = '$ENV{"..."}=...; ...'
9169         my @vars = vars_to_export();
9170         my $csh_friendly = not grep { /\n/ } @ENV{@vars};
9171         my @bash_functions = grep { substr($ENV{$_},0,4) eq "() {" } @vars;
9172         my @non_functions = (grep { !/PARALLEL_ENV/ }
9173                              grep { substr($ENV{$_},0,4) ne "() {" } @vars);
9174
9175         # eval of @envset will set %ENV
9176         my $envset = join"", map {
9177             '$ENV{"'.::perl_quote_scalar($_).'"}="'.
9178                 ::perl_quote_scalar($ENV{$_}).'";'; } @non_functions;
9179
9180         # running @bashfunc on the command line, will set the functions
9181         my @bashfunc = map {
9182             my $v=$_;
9183             s/BASH_FUNC_(.*)(\(\)|%%)/$1/;
9184             "$_$ENV{$v};export -f $_ >/dev/null;" } @bash_functions;
9185         # eval $bashfuncset will set $bashfunc
9186         my $bashfuncset;
9187         if(@bashfunc) {
9188             # Functions are not supported for all shells
9189             if($Global::shell !~ m:(bash|rbash|zsh|rzsh|dash|ksh):) {
9190                 ::warning("Shell functions may not be supported in $Global::shell.");
9191             }
9192             $bashfuncset =
9193                 '@bash_functions=qw('."@bash_functions".");".
9194                 ::spacefree(1,'$shell="'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'.q{
9195                     if($shell=~/csh/) {
9196                         print STDERR "CSH/TCSH DO NOT SUPPORT newlines IN VARIABLES/FUNCTIONS. Unset @bash_functions\n";
9197                         exec "false";
9198                     }
9199                             }).
9200                                 "\n".'$bashfunc = "'.::perl_quote_scalar("@bashfunc").'";';
9201         } else {
9202             $bashfuncset = '$bashfunc = "";'
9203         }
9204         if($ENV{'parallel_bash_environment'}) {
9205             $bashfuncset .= '$bashfunc .= "eval\ \"\$parallel_bash_environment\"\;";';
9206         }
9207         ::debug("base64",$envset,$bashfuncset,"\n");
9208         return $csh_friendly,$envset,$bashfuncset;
9209     }
9210
9211     my $self = shift;
9212     my $command = shift;
9213     # TODO test that *sh -c 'parallel --env' use *sh
9214     if(not defined $self->{'sshlogin_wrap'}{$command}) {
9215         my $sshlogin = $self->sshlogin();
9216         my $serverlogin = $sshlogin->serverlogin();
9217         my $quoted_remote_command;
9218         $ENV{'PARALLEL_SEQ'} = $self->seq();
9219         $ENV{'PARALLEL_JOBSLOT'} = $self->slot();
9220         $ENV{'PARALLEL_SSHLOGIN'} = $sshlogin->string();
9221         $ENV{'PARALLEL_SSHHOST'} = $sshlogin->serverlogin();
9222         $ENV{'PARALLEL_PID'} = $$;
9223         if($serverlogin eq ":") {
9224             if($opt::workdir) {
9225                 # Create workdir if needed. Then cd to it.
9226                 my $wd = $self->workdir();
9227                 if($opt::workdir eq "." or $opt::workdir eq "...") {
9228                     # If $wd does not start with '/': Prepend $HOME
9229                     $wd =~ s:^([^/]):$ENV{'HOME'}/$1:;
9230                 }
9231                 ::mkdir_or_die($wd);
9232                 my $post = "";
9233                 if($opt::workdir eq "...") {
9234                     $post = ";".exitstatuswrapper("rm -rf ".::Q($wd).";");
9235
9236                 }
9237                 $command = "cd ".::Q($wd)." || exit 255; " .
9238                     $command . $post;;
9239             }
9240             if(@opt::env) {
9241                 # Prepend with environment setter, which sets functions in zsh
9242                 my ($csh_friendly,$envset,$bashfuncset) = env_as_eval();
9243                 my $perl_code = $envset.$bashfuncset.
9244                     '@ARGV="'.::perl_quote_scalar($command).'";'.
9245                     "exec\"$Global::shell\",\"-c\",\(\$bashfunc.\"\@ARGV\"\)\;die\"exec:\$\!\\n\"\;";
9246                 if(length $perl_code > 999
9247                    or
9248                    not $csh_friendly
9249                    or
9250                    $command =~ /\n/) {
9251                     # csh does not deal well with > 1000 chars in one word
9252                     # csh does not deal well with $ENV with \n
9253                     $self->{'sshlogin_wrap'}{$command} = base64_wrap($perl_code);
9254                 } else {
9255                     $self->{'sshlogin_wrap'}{$command} = "perl -e ".::Q($perl_code);
9256                 }
9257             } else {
9258                 $self->{'sshlogin_wrap'}{$command} = $command;
9259             }
9260         } else {
9261             my $pwd = "";
9262             if($opt::workdir) {
9263                 # Create remote workdir if needed. Then cd to it.
9264                 my $wd = ::pQ($self->workdir());
9265                 $pwd = qq{system("mkdir","-p","--","$wd"); chdir "$wd" ||}.
9266                     qq{print(STDERR "parallel: Cannot chdir to $wd\\n") && exit 255;};
9267             }
9268             my ($csh_friendly,$envset,$bashfuncset) = env_as_eval();
9269             my $remote_command = $pwd.$envset.$bashfuncset.
9270                 '@ARGV="'.::perl_quote_scalar($command).'";'.
9271                 monitor_parent_sshd_script();
9272             $quoted_remote_command = "perl -e ". ::Q($remote_command);
9273             my $dq_remote_command = ::Q($quoted_remote_command);
9274             if(length $dq_remote_command > 999
9275                or
9276                not $csh_friendly
9277                or
9278                $command =~ /\n/) {
9279                 # csh does not deal well with > 1000 chars in one word
9280                 # csh does not deal well with $ENV with \n
9281                 $quoted_remote_command =
9282                     "perl -e ". ::Q(::Q(base64_zip_eval()))." ".
9283                     join" ",::shell_quote(::shell_quote(string_zip_base64($remote_command)));
9284             } else {
9285                 $quoted_remote_command = $dq_remote_command;
9286             }
9287
9288             my $sshcmd = $sshlogin->sshcommand();
9289             my ($pre,$post,$cleanup)=("","","");
9290             # --transfer
9291             $pre .= $self->sshtransfer();
9292             # --return
9293             $post .= $self->sshreturn();
9294             # --cleanup
9295             $post .= $self->sshcleanup();
9296             if($post) {
9297                 # We need to save the exit status of the job
9298                 $post = exitstatuswrapper($post);
9299             }
9300             $self->{'sshlogin_wrap'}{$command} =
9301                 ($pre
9302                  . "$sshcmd $serverlogin -- exec "
9303                  . $quoted_remote_command
9304                  . ";"
9305                  . $post);
9306         }
9307     }
9308     return $self->{'sshlogin_wrap'}{$command};
9309 }
9310
9311 sub transfer($) {
9312     # Files to transfer
9313     # Non-quoted and with {...} substituted
9314     # Returns:
9315     #   @transfer - File names of files to transfer
9316     my $self = shift;
9317
9318     my $transfersize = 0;
9319     my @transfer = $self->{'commandline'}->
9320         replace_placeholders($self->{'commandline'}{'transfer_files'},0,0);
9321     for(@transfer) {
9322         # filesize
9323         if(-e $_) {
9324             $transfersize += (stat($_))[7];
9325         }
9326     }
9327     $self->add_transfersize($transfersize);
9328     return @transfer;
9329 }
9330
9331 sub transfersize($) {
9332     my $self = shift;
9333     return $self->{'transfersize'};
9334 }
9335
9336 sub add_transfersize($) {
9337     my $self = shift;
9338     my $transfersize = shift;
9339     $self->{'transfersize'} += $transfersize;
9340     $opt::sqlworker and
9341         $Global::sql->update("SET Send = ? WHERE Seq = ".$self->seq(),
9342                              $self->{'transfersize'});
9343 }
9344
9345 sub sshtransfer($) {
9346     # Returns for each transfer file:
9347     #   rsync $file remote:$workdir
9348     my $self = shift;
9349     my @pre;
9350     my $sshlogin = $self->sshlogin();
9351     my $workdir = $self->workdir();
9352     for my $file ($self->transfer()) {
9353         push @pre, $sshlogin->rsync_transfer_cmd($file,$workdir).";";
9354     }
9355     return join("",@pre);
9356 }
9357
9358 sub return($) {
9359     # Files to return
9360     # Non-quoted and with {...} substituted
9361     # Returns:
9362     #   @non_quoted_filenames
9363     my $self = shift;
9364     return $self->{'commandline'}->
9365         replace_placeholders($self->{'commandline'}{'return_files'},0,0);
9366 }
9367
9368 sub returnsize($) {
9369     # This is called after the job has finished
9370     # Returns:
9371     #   $number_of_bytes transferred in return
9372     my $self = shift;
9373     for my $file ($self->return()) {
9374         if(-e $file) {
9375             $self->{'returnsize'} += (stat($file))[7];
9376         }
9377     }
9378     return $self->{'returnsize'};
9379 }
9380
9381 sub add_returnsize($) {
9382     my $self = shift;
9383     my $returnsize = shift;
9384     $self->{'returnsize'} += $returnsize;
9385     $opt::sqlworker and
9386         $Global::sql->update("SET Receive = ? WHERE Seq = ".$self->seq(),
9387                              $self->{'returnsize'});
9388 }
9389
9390 sub sshreturn($) {
9391     # Returns for each return-file:
9392     #   rsync remote:$workdir/$file .
9393     my $self = shift;
9394     my $sshlogin = $self->sshlogin();
9395     my $sshcmd = $sshlogin->sshcommand();
9396     my $serverlogin = $sshlogin->serverlogin();
9397     my $rsync_opts = $ENV{'PARALLEL_RSYNC_OPTS'}. " -e". ::Q($sshcmd);
9398     my $pre = "";
9399     for my $file ($self->return()) {
9400         $file =~ s:^\./::g; # Remove ./ if any
9401         my $relpath = ($file !~ m:^/:) || ($file =~ m:/\./:); # Is the path relative or /./?
9402         my $cd = "";
9403         my $wd = "";
9404         if($relpath) {
9405             #   rsync -avR /foo/./bar/baz.c remote:/tmp/
9406             # == (on old systems)
9407             #   rsync -avR --rsync-path="cd /foo; rsync" remote:bar/baz.c /tmp/
9408             $wd = ::shell_quote_file($self->workdir()."/");
9409         }
9410         # Only load File::Basename if actually needed
9411         $Global::use{"File::Basename"} ||= eval "use File::Basename; 1;";
9412         # dir/./file means relative to dir, so remove dir on remote
9413         $file =~ m:(.*)/\./:;
9414         my $basedir = $1 ? ::shell_quote_file($1."/") : "";
9415         my $nobasedir = $file;
9416         $nobasedir =~ s:.*/\./::;
9417         $cd = ::shell_quote_file(::dirname($nobasedir));
9418         my $rsync_cd = '--rsync-path='.::Q("cd $wd$cd; rsync");
9419         my $basename = ::Q(::shell_quote_file(::basename($file)));
9420         # --return
9421         #   mkdir -p /home/tange/dir/subdir/;
9422         #   rsync (--protocol 30) -rlDzR
9423         #     --rsync-path="cd /home/tange/dir/subdir/; rsync"
9424         #     server:file.gz /home/tange/dir/subdir/
9425         $pre .= "mkdir -p $basedir$cd && ". $sshlogin->rsync().
9426             " $rsync_cd $rsync_opts $serverlogin:".
9427              $basename . " ".$basedir.$cd.";";
9428     }
9429     return $pre;
9430 }
9431
9432 sub sshcleanup($) {
9433     # Return the sshcommand needed to remove the file
9434     # Returns:
9435     #   ssh command needed to remove files from sshlogin
9436     my $self = shift;
9437     my $sshlogin = $self->sshlogin();
9438     my $sshcmd = $sshlogin->sshcommand();
9439     my $serverlogin = $sshlogin->serverlogin();
9440     my $workdir = $self->workdir();
9441     my $cleancmd = "";
9442
9443     for my $file ($self->remote_cleanup()) {
9444         my @subworkdirs = parentdirs_of($file);
9445         $cleancmd .= $sshlogin->cleanup_cmd($file,$workdir).";";
9446     }
9447     if(defined $opt::workdir and $opt::workdir eq "...") {
9448        $cleancmd .= "$sshcmd $serverlogin -- rm -rf " . ::Q($workdir).';';
9449     }
9450     return $cleancmd;
9451 }
9452
9453 sub remote_cleanup($) {
9454     # Returns:
9455     #   Files to remove at cleanup
9456     my $self = shift;
9457     if($opt::cleanup) {
9458         my @transfer = $self->transfer();
9459         my @return = $self->return();
9460         return (@transfer,@return);
9461     } else {
9462         return ();
9463     }
9464 }
9465
9466 sub exitstatuswrapper(@) {
9467     # Input:
9468     #   @shellcode = shell code to execute
9469     # Returns:
9470     #   shell script that returns current status after executing @shellcode
9471     if($Global::cshell) {
9472         return ('set _EXIT_status=$status; ' .
9473                 join(" ",@_).
9474                 'exit $_EXIT_status;');
9475     } else {
9476         return ('_EXIT_status=$?; ' .
9477                 join(" ",@_).
9478                 'exit $_EXIT_status;');
9479     }
9480 }
9481
9482 sub workdir($) {
9483     # Returns:
9484     #   the workdir on a remote machine
9485     my $self = shift;
9486     if(not defined $self->{'workdir'}) {
9487         my $workdir;
9488         if(defined $opt::workdir) {
9489             if($opt::workdir eq ".") {
9490                 # . means current dir
9491                 my $home = $ENV{'HOME'};
9492                 eval 'use Cwd';
9493                 my $cwd = cwd();
9494                 $workdir = $cwd;
9495                 if($home) {
9496                     # If homedir exists: remove the homedir from
9497                     # workdir if cwd starts with homedir
9498                     # E.g. /home/foo/my/dir => my/dir
9499                     # E.g. /tmp/my/dir => /tmp/my/dir
9500                     my ($home_dev, $home_ino) = (stat($home))[0,1];
9501                     my $parent = "";
9502                     my @dir_parts = split(m:/:,$cwd);
9503                     my $part;
9504                     while(defined ($part = shift @dir_parts)) {
9505                         $part eq "" and next;
9506                         $parent .= "/".$part;
9507                         my ($parent_dev, $parent_ino) = (stat($parent))[0,1];
9508                         if($parent_dev == $home_dev and $parent_ino == $home_ino) {
9509                             # dev and ino is the same: We found the homedir.
9510                             $workdir = join("/",@dir_parts);
9511                             last;
9512                         }
9513                     }
9514                 }
9515                 if($workdir eq "") {
9516                     $workdir = ".";
9517                 }
9518             } elsif($opt::workdir eq "...") {
9519                 $workdir = ".parallel/tmp/" . ::hostname() . "-" . $$
9520                     . "-" . $self->seq();
9521             } else {
9522                 $workdir = $self->{'commandline'}->
9523                     replace_placeholders([$opt::workdir],0,0);
9524                 #$workdir = $opt::workdir;
9525                 # Rsync treats /./ special. We dont want that
9526                 $workdir =~ s:/\./:/:g; # Remove /./
9527                 $workdir =~ s:(.)/+$:$1:; # Remove ending / if any
9528                 $workdir =~ s:^\./::g; # Remove starting ./ if any
9529             }
9530         } else {
9531             $workdir = ".";
9532         }
9533         $self->{'workdir'} = $workdir;
9534     }
9535     return $self->{'workdir'};
9536 }
9537
9538 sub parentdirs_of($) {
9539     # Return:
9540     #   all parentdirs except . of this dir or file - sorted desc by length
9541     my $d = shift;
9542     my @parents = ();
9543     while($d =~ s:/[^/]+$::) {
9544         if($d ne ".") {
9545             push @parents, $d;
9546         }
9547     }
9548     return @parents;
9549 }
9550
9551 sub start($) {
9552     # Setup STDOUT and STDERR for a job and start it.
9553     # Returns:
9554     #   job-object or undef if job not to run
9555
9556     sub open3_setpgrp_internal {
9557         # Run open3+setpgrp followed by the command
9558         # Input:
9559         #   $stdin_fh = Filehandle to use as STDIN
9560         #   $stdout_fh = Filehandle to use as STDOUT
9561         #   $stderr_fh = Filehandle to use as STDERR
9562         #   $command = Command to run
9563         # Returns:
9564         #   $pid = Process group of job started
9565         my ($stdin_fh,$stdout_fh,$stderr_fh,$command) = @_;
9566         my $pid;
9567         local (*OUT,*ERR);
9568         open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!");
9569         open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!");
9570         # The eval is needed to catch exception from open3
9571         eval {
9572             if(not $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", "-")) {
9573                 # Each child gets its own process group to make it safe to killall
9574                 eval{ setpgrp(0,0) };
9575                 eval{ setpriority(0,0,$opt::nice) };
9576                 exec($Global::shell,"-c",$command)
9577                     || ::die_bug("open3-$stdin_fh $command");
9578             }
9579         };
9580         return $pid;
9581     }
9582
9583     sub open3_setpgrp_external {
9584         # Run open3 on $command wrapped with a perl script doing setpgrp
9585         # Works on systems that do not support open3(,,,"-")
9586         # Input:
9587         #   $stdin_fh = Filehandle to use as STDIN
9588         #   $stdout_fh = Filehandle to use as STDOUT
9589         #   $stderr_fh = Filehandle to use as STDERR
9590         #   $command = Command to run
9591         # Returns:
9592         #   $pid = Process group of job started
9593         my ($stdin_fh,$stdout_fh,$stderr_fh,$command) = @_;
9594         local (*OUT,*ERR);
9595         open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!");
9596         open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!");
9597
9598         my $pid;
9599         my @setpgrp_wrap =
9600             ('perl','-e',
9601              "eval\{setpgrp\}\;eval\{setpriority\(0,0,$opt::nice\)\}\;".
9602              "exec '$Global::shell', '-c', \@ARGV");
9603         # The eval is needed to catch exception from open3
9604         eval {
9605             $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", @setpgrp_wrap, $command)
9606                 || ::die_bug("open3-$stdin_fh");
9607             1;
9608         };
9609         return $pid;
9610     }
9611
9612     sub redefine_open3_setpgrp {
9613         my $setgprp_cache = shift;
9614         # Select and run open3_setpgrp_internal/open3_setpgrp_external
9615         no warnings 'redefine';
9616         my ($outfh,$name) = ::tmpfile(SUFFIX => ".tst");
9617         # Test to see if open3(x,x,x,"-") is fully supported
9618         # Can an exported bash function be called via open3?
9619         my $script = 'if($pid=::open3($i,$o,$e,"-")) { wait; } '.
9620             'else { exec("bash","-c","testfun && true"); }';
9621         my $bash =
9622             ::shell_quote_scalar_default(
9623             "testfun() { rm $name; }; export -f testfun; ".
9624             "perl -MIPC::Open3 -e ".
9625             ::shell_quote_scalar_default($script)
9626             );
9627         my $redefine_eval;
9628         # Redirect STDERR temporarily,
9629         # so errors on MacOS X are ignored.
9630         open my $saveerr, ">&STDERR";
9631         open STDERR, '>', "/dev/null";
9632         # Run the test
9633         ::debug("init",qq{bash -c $bash 2>/dev/null});
9634         qx{ bash -c $bash 2>/dev/null };
9635         open STDERR, ">&", $saveerr;
9636
9637         if(-e $name) {
9638             # Does not support open3(x,x,x,"-")
9639             # or does not have bash:
9640             # Use (slow) external version
9641             unlink($name);
9642             $redefine_eval = '*open3_setpgrp = \&open3_setpgrp_external';
9643             ::debug("init","open3_setpgrp_external chosen\n");
9644         } else {
9645             # Supports open3(x,x,x,"-")
9646             # This is 0.5 ms faster to run
9647             $redefine_eval = '*open3_setpgrp = \&open3_setpgrp_internal';
9648             ::debug("init","open3_setpgrp_internal chosen\n");
9649         }
9650         if(open(my $fh, ">", $setgprp_cache)) {
9651             print $fh $redefine_eval;
9652             close $fh;
9653         } else {
9654             ::debug("init","Cannot write to $setgprp_cache");
9655         }
9656         eval $redefine_eval;
9657     }
9658
9659     sub open3_setpgrp {
9660         my $setgprp_cache = $Global::cache_dir . "/tmp/sshlogin/" .
9661             ::hostname() . "/setpgrp_func";
9662         sub read_cache() {
9663             -e $setgprp_cache || return 0;
9664             local $/ = undef;
9665             open(my $fh, "<", $setgprp_cache) || return 0;
9666             eval <$fh> || return 0;
9667             close $fh;
9668             return 1;
9669         }
9670         if(not read_cache()) {
9671             redefine_open3_setpgrp($setgprp_cache);
9672         }
9673         # The sub is now redefined. Call it
9674         return open3_setpgrp(@_);
9675     }
9676
9677     my $job = shift;
9678     if($job->suspended()) {
9679         # Job is kill -STOP'ped: Restart it.
9680         $job->resume();
9681         return $job;
9682     }
9683     # Get the shell command to be executed (possibly with ssh infront).
9684     my $command = $job->wrapped();
9685     my $pid;
9686
9687     if($Global::interactive or $Global::stderr_verbose) {
9688         $job->interactive_start();
9689     }
9690     # Must be run after $job->interactive_start():
9691     # $job->interactive_start() may call $job->skip()
9692     if($job->{'commandline'}{'skip'}) {
9693         # $job->skip() was called
9694         $command = "true";
9695     }
9696     $job->openoutputfiles();
9697     $job->print_verbose_dryrun();
9698     my($stdout_fh,$stderr_fh) = ($job->fh(1,"w"),$job->fh(2,"w"));
9699     if($opt::dryrun or $opt::sqlmaster) { $command = "true"; }
9700     $ENV{'PARALLEL_SEQ'} = $job->seq();
9701     $ENV{'PARALLEL_PID'} = $$;
9702     $ENV{'PARALLEL_JOBSLOT'} = $job->slot();
9703     $ENV{'PARALLEL_TMP'} = ::tmpname("par");
9704     $job->add_rm($ENV{'PARALLEL_TMP'});
9705     ::debug("run", $Global::total_running, " processes . Starting (",
9706             $job->seq(), "): $command\n");
9707     if($opt::pipe) {
9708         my ($stdin_fh) = ::gensym();
9709         $pid = open3_setpgrp($stdin_fh,$stdout_fh,$stderr_fh,$command);
9710         if($opt::roundrobin and not $opt::keeporder) {
9711             # --keep-order will make sure the order will be reproducible
9712             ::set_fh_non_blocking($stdin_fh);
9713         }
9714         $job->set_fh(0,"w",$stdin_fh);
9715         if($opt::tee or $opt::shard or $opt::bin) { $job->set_virgin(0); }
9716     } elsif ($opt::tty and -c "/dev/tty" and
9717              open(my $devtty_fh, "<", "/dev/tty")) {
9718         # Give /dev/tty to the command if no one else is using it
9719         # The eval is needed to catch exception from open3
9720         local (*IN,*OUT,*ERR);
9721         open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!");
9722         open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!");
9723         *IN = $devtty_fh;
9724         # The eval is needed to catch exception from open3
9725         my @wrap = ('perl','-e',
9726                             "eval\{setpriority\(0,0,$opt::nice\)\}\;".
9727                             "exec '$Global::shell', '-c', \@ARGV");
9728         eval {
9729             $pid = ::open3("<&IN", ">&OUT", ">&ERR", @wrap, $command)
9730                 || ::die_bug("open3-/dev/tty");
9731             1;
9732         };
9733         close $devtty_fh;
9734         $job->set_virgin(0);
9735     } else {
9736         $pid = open3_setpgrp(::gensym(),$stdout_fh,$stderr_fh,$command);
9737         $job->set_virgin(0);
9738     }
9739     if($pid) {
9740         # A job was started
9741         $Global::total_running++;
9742         $Global::total_started++;
9743         $job->set_pid($pid);
9744         $job->set_starttime();
9745         $Global::running{$job->pid()} = $job;
9746         if($opt::timeout) {
9747             $Global::timeoutq->insert($job);
9748         }
9749         $Global::newest_job = $job;
9750         $Global::newest_starttime = ::now();
9751         return $job;
9752     } else {
9753         # No more processes
9754         ::debug("run", "Cannot spawn more jobs.\n");
9755         return undef;
9756     }
9757 }
9758
9759 sub interactive_start($) {
9760     my $self = shift;
9761     my $command = $self->wrapped();
9762     if($Global::interactive) {
9763         my $answer;
9764         ::status_no_nl("$command ?...");
9765         do{
9766             open(my $tty_fh, "<", "/dev/tty") || ::die_bug("interactive-tty");
9767             $answer = <$tty_fh>;
9768             close $tty_fh;
9769             # Sometime we get an empty string (not even \n)
9770             # Do not know why, so let us just ignore it and try again
9771         } while(length $answer < 1);
9772         if (not ($answer =~ /^\s*y/i)) {
9773             $self->{'commandline'}->skip();
9774         }
9775     } else {
9776         print $Global::original_stderr "$command\n";
9777     }
9778 }
9779
9780 {
9781     my $tmuxsocket;
9782
9783     sub tmux_wrap($) {
9784         # Wrap command with tmux for session pPID
9785         # Input:
9786         #   $actual_command = the actual command being run (incl ssh wrap)
9787         my $self = shift;
9788         my $actual_command = shift;
9789         # Temporary file name. Used for fifo to communicate exit val
9790         my $tmpfifo = ::tmpname("tmx");
9791         $self->add_rm($tmpfifo);
9792
9793         if(length($tmpfifo) >=100) {
9794             ::error("tmux does not support sockets with path > 100.");
9795             ::wait_and_exit(255);
9796         }
9797         if($opt::tmuxpane) {
9798             # Move the command into a pane in window 0
9799             $actual_command = $ENV{'PARALLEL_TMUX'}.' joinp -t :0 ; '.
9800                 $ENV{'PARALLEL_TMUX'}.' select-layout -t :0 tiled ; '.
9801                 $actual_command;
9802         }
9803         my $visual_command = $self->replaced();
9804         my $title = $visual_command;
9805         if($visual_command =~ /\0/) {
9806             ::error("Command line contains NUL. tmux is confused by NUL.");
9807             ::wait_and_exit(255);
9808         }
9809         # ; causes problems
9810         # ascii 194-245 annoys tmux
9811         $title =~ tr/[\011-\016;\302-\365]/ /s;
9812         $title = ::Q($title);
9813
9814         my $l_act = length($actual_command);
9815         my $l_tit = length($title);
9816         my $l_fifo = length($tmpfifo);
9817         # The line to run contains a 118 chars extra code + the title 2x
9818         my $l_tot = 2 * $l_tit + $l_act + $l_fifo;
9819
9820         my $quoted_space75 = ::Q(" ")x75;
9821         while($l_tit < 1000 and
9822               (
9823                (890 < $l_tot and $l_tot < 1350)
9824                or
9825                (9250 < $l_tot and $l_tot < 9800)
9826               )) {
9827             # tmux blocks for certain lengths:
9828             # 900 < title + command < 1200
9829             # 9250 < title + command < 9800
9830             # but only if title < 1000, so expand the title with 75 spaces
9831             # The measured lengths are:
9832             # 996 < (title + whole command) < 1127
9833             # 9331 < (title + whole command) < 9636
9834             $title .= $quoted_space75;
9835             $l_tit = length($title);
9836             $l_tot = 2 * $l_tit + $l_act + $l_fifo;
9837         }
9838
9839         my $tmux;
9840         $ENV{'PARALLEL_TMUX'} ||= "tmux";
9841         if(not $tmuxsocket) {
9842             $tmuxsocket = ::tmpname("tms");
9843             if($opt::fg) {
9844                 if(not fork) {
9845                     # Run tmux in the foreground
9846                     # Wait for the socket to appear
9847                     while (not -e $tmuxsocket) { }
9848                     `$ENV{'PARALLEL_TMUX'} -S $tmuxsocket attach`;
9849                     exit;
9850                 }
9851             }
9852             ::status("See output with: $ENV{'PARALLEL_TMUX'} -S $tmuxsocket attach");
9853         }
9854         $tmux = "sh -c '".
9855             $ENV{'PARALLEL_TMUX'}." -S $tmuxsocket new-session -s p$$ -d \"sleep .2\" >/dev/null 2>&1';" .
9856             $ENV{'PARALLEL_TMUX'}." -S $tmuxsocket new-window -t p$$ -n $title";
9857
9858         ::debug("tmux", "title len:", $l_tit, " act ", $l_act, " max ",
9859                 $Limits::Command::line_max_len, " tot ",
9860                 $l_tot, "\n");
9861
9862         return "mkfifo $tmpfifo && $tmux ".
9863             # Run in tmux
9864             ::Q
9865             (
9866              "(".$actual_command.');'.
9867              # The triple print is needed - otherwise the testsuite fails
9868              q[ perl -e 'while($t++<3){ print $ARGV[0],"\n" }' $?h/$status >> ].$tmpfifo."&".
9869              "echo $title; echo \007Job finished at: `date`;sleep 10"
9870             ).
9871             # Run outside tmux
9872             # Read a / separated line: 0h/2 for csh, 2/0 for bash.
9873             # If csh the first will be 0h, so use the second as exit value.
9874             # Otherwise just use the first value as exit value.
9875             q{; exec perl -e '$/="/";$_=<>;$c=<>;unlink $ARGV; /(\d+)h/ and exit($1);exit$c' }.$tmpfifo;
9876     }
9877 }
9878
9879 sub is_already_in_results($) {
9880     # Do we already have results for this job?
9881     # Returns:
9882     #   $job_already_run = bool whether there is output for this or not
9883     my $job = $_[0];
9884     my $out = $job->{'commandline'}->results_out();
9885     ::debug("run", "Test ${out}stdout", -e "${out}stdout", "\n");
9886     return(-e $out."stdout" or -f $out);
9887 }
9888
9889 sub is_already_in_joblog($) {
9890     my $job = shift;
9891     return vec($Global::job_already_run,$job->seq(),1);
9892 }
9893
9894 sub set_job_in_joblog($) {
9895     my $job = shift;
9896     vec($Global::job_already_run,$job->seq(),1) = 1;
9897 }
9898
9899 sub should_be_retried($) {
9900     # Should this job be retried?
9901     # Returns
9902     #   0 - do not retry
9903     #   1 - job queued for retry
9904     my $self = shift;
9905     if (not $opt::retries) {
9906         return 0;
9907     }
9908     if(not $self->exitstatus() and not $self->exitsignal()) {
9909         # Completed with success. If there is a recorded failure: forget it
9910         $self->reset_failed_here();
9911         return 0;
9912     } else {
9913         # The job failed. Should it be retried?
9914         $self->add_failed_here();
9915         my $retries = $self->{'commandline'}->
9916             replace_placeholders([$opt::retries],0,0);
9917         if($self->total_failed() == $retries) {
9918             # This has been retried enough
9919             return 0;
9920         } else {
9921             # This command should be retried
9922             $self->set_endtime(undef);
9923             $self->reset_exitstatus();
9924             $Global::JobQueue->unget($self);
9925             ::debug("run", "Retry ", $self->seq(), "\n");
9926             return 1;
9927         }
9928     }
9929 }
9930
9931 {
9932     my (%print_later,$job_seq_to_print);
9933
9934     sub print_earlier_jobs($) {
9935         # Print jobs whose output is postponed due to --keep-order
9936         # Returns: N/A
9937         my $job = shift;
9938         $print_later{$job->seq()} = $job;
9939         $job_seq_to_print ||= 1;
9940         my $returnsize = 0;
9941         ::debug("run", "Looking for: $job_seq_to_print ",
9942                 "This: ", $job->seq(), "\n");
9943         for(;vec($Global::job_already_run,$job_seq_to_print,1);
9944             $job_seq_to_print++) {}
9945         while(my $j = $print_later{$job_seq_to_print}) {
9946             $returnsize += $j->print();
9947             if($j->endtime()) {
9948                 # Job finished - look at the next
9949                 delete $print_later{$job_seq_to_print};
9950                 $job_seq_to_print++;
9951                 next;
9952             } else {
9953                 # Job not finished yet - look at it again next round
9954                 last;
9955             }
9956         }
9957         return $returnsize;
9958     }
9959 }
9960
9961 sub print($) {
9962     # Print the output of the jobs
9963     # Returns: N/A
9964     my $self = shift;
9965
9966     ::debug("print", ">>joboutput ", $self->replaced(), "\n");
9967     if($opt::dryrun) {
9968         # Nothing was printed to this job:
9969         # cleanup tmp files if --files was set
9970         ::rm($self->fh(1,"name"));
9971     }
9972     if($opt::pipe and $self->virgin() and not $opt::tee) {
9973         # Skip --joblog, --dryrun, --verbose
9974     } else {
9975         if($opt::ungroup) {
9976             # NULL returnsize = 0 returnsize
9977             $self->returnsize() or $self->add_returnsize(0);
9978             if($Global::joblog and defined $self->{'exitstatus'}) {
9979                 # Add to joblog when finished
9980                 $self->print_joblog();
9981                 # Printing is only relevant for grouped/--line-buffer output.
9982                 $opt::ungroup and return;
9983             }
9984         }
9985         # Check for disk full
9986         ::exit_if_disk_full();
9987     }
9988
9989     my $returnsize = $self->returnsize();
9990     for my $fdno (sort { $a <=> $b } keys %Global::fd) {
9991         # Sort by file descriptor numerically: 1,2,3,..,9,10,11
9992         $fdno == 0 and next;
9993         my $out_fd = $Global::fd{$fdno};
9994         my $in_fh = $self->fh($fdno,"r");
9995         if(not $in_fh) {
9996             if(not $Job::file_descriptor_warning_printed{$fdno}++) {
9997                 # ::warning("File descriptor $fdno not defined\n");
9998             }
9999             next;
10000         }
10001         ::debug("print", "File descriptor $fdno (", $self->fh($fdno,"name"), "):\n");
10002         if($opt::linebuffer) {
10003             # Line buffered print out
10004             $self->print_linebuffer($fdno,$in_fh,$out_fd);
10005         } elsif($opt::files) {
10006             $self->print_files($fdno,$in_fh,$out_fd);
10007         } elsif($opt::tag or defined $opt::tagstring) {
10008             $self->print_tag($fdno,$in_fh,$out_fd);
10009         } else {
10010             $self->print_normal($fdno,$in_fh,$out_fd);
10011         }
10012         flush $out_fd;
10013     }
10014     ::debug("print", "<<joboutput\n");
10015     if(defined $self->{'exitstatus'}
10016        and not ($self->virgin() and $opt::pipe)) {
10017         if($Global::joblog and not $opt::sqlworker) {
10018             # Add to joblog when finished
10019             $self->print_joblog();
10020         }
10021         if($opt::sqlworker and not $opt::results) {
10022             $Global::sql->output($self);
10023         }
10024         if($Global::csvsep) {
10025             # Add output to CSV when finished
10026             $self->print_csv();
10027         }
10028     }
10029     return $returnsize - $self->returnsize();
10030 }
10031
10032 {
10033     my $header_printed;
10034
10035     sub print_csv($) {
10036         my $self = shift;
10037         my $cmd;
10038         if($Global::verbose <= 1) {
10039             $cmd = $self->replaced();
10040         } else {
10041             # Verbose level > 1: Print the rsync and stuff
10042             $cmd = join " ", @{$self->{'commandline'}};
10043         }
10044         my $record_ref = $self->{'commandline'}{'arg_list_flat_orig'};
10045
10046         if(not $header_printed) {
10047             # Variable headers
10048             # Normal => V1..Vn
10049             # --header : => first value from column
10050             my @V;
10051             if($opt::header) {
10052                 my $i = 1;
10053                 @V = (map { $Global::input_source_header{$i++} }
10054                       @$record_ref[1..$#$record_ref]);
10055             } else {
10056                 my $V = "V1";
10057                 @V = (map { $V++ } @$record_ref[1..$#$record_ref]);
10058             }
10059             print $Global::csv_fh
10060                 (map { $$_ }
10061                  combine_ref("Seq", "Host", "Starttime", "JobRuntime",
10062                              "Send", "Receive", "Exitval", "Signal", "Command",
10063                              @V,
10064                              "Stdout","Stderr"
10065                  )),"\n";
10066             $header_printed++;
10067         }
10068         # Memory optimization: Overwrite with the joined output
10069         $self->{'output'}{1} = join("", @{$self->{'output'}{1}});
10070         $self->{'output'}{2} = join("", @{$self->{'output'}{2}});
10071         print $Global::csv_fh
10072             (map { $$_ }
10073              combine_ref
10074              ($self->seq(),
10075               $self->sshlogin()->string(),
10076               $self->starttime(), sprintf("%0.3f",$self->runtime()),
10077               $self->transfersize(), $self->returnsize(),
10078               $self->exitstatus(), $self->exitsignal(), \$cmd,
10079               \@$record_ref[1..$#$record_ref],
10080               \$self->{'output'}{1},
10081               \$self->{'output'}{2})),"\n";
10082     }
10083 }
10084
10085 sub combine_ref($) {
10086     # Inspired by Text::CSV_PP::_combine (by Makamaka Hannyaharamitu)
10087     my @part = @_;
10088     my $sep = $Global::csvsep;
10089     my $quot = '"';
10090     my @out = ();
10091
10092     my $must_be_quoted;
10093     for my $column (@part) {
10094         # Memory optimization: Content transferred as reference
10095         if(ref $column ne "SCALAR") {
10096             # Convert all columns to scalar references
10097             my $v = $column;
10098             $column = \$v;
10099         }
10100         if(not defined $$column) {
10101             $$column = '';
10102             next;
10103         }
10104
10105         $must_be_quoted = 0;
10106
10107         if($$column =~ s/$quot/$quot$quot/go){
10108             # Replace " => ""
10109             $must_be_quoted ||=1;
10110         }
10111         if($$column =~ /[\s\Q$sep\E]/o){
10112             # Put quotes around if the column contains ,
10113             $must_be_quoted ||=1;
10114         }
10115
10116         $Global::use{"bytes"} ||= eval "use bytes; 1;";
10117         if ($$column =~ /\0/) {
10118             # Contains \0 => put quotes around
10119             $must_be_quoted ||=1;
10120         }
10121         if($must_be_quoted){
10122             push @out, \$sep, \$quot, $column, \$quot;
10123         } else {
10124             push @out, \$sep, $column;
10125         }
10126     }
10127     # Pop off a $sep
10128     shift @out;
10129     return @out;
10130 }
10131
10132 sub print_files($) {
10133     # Print the name of the file containing stdout on stdout
10134     # Uses:
10135     #   $opt::pipe
10136     #   $opt::group = Print when job is done
10137     #   $opt::linebuffer = Print ASAP
10138     # Returns: N/A
10139     my $self = shift;
10140     my ($fdno,$in_fh,$out_fd) = @_;
10141
10142     # If the job is dead: close printing fh. Needed for --compress
10143     close $self->fh($fdno,"w");
10144     if($? and $opt::compress) {
10145         ::error($opt::compress_program." failed.");
10146         $self->set_exitstatus(255);
10147     }
10148     if($opt::compress) {
10149         # Kill the decompressor which will not be needed
10150         CORE::kill "TERM", $self->fh($fdno,"rpid");
10151     }
10152     close $in_fh;
10153
10154     if($opt::pipe and $self->virgin()) {
10155         # Nothing was printed to this job:
10156         # cleanup unused tmp files because --files was set
10157         for my $fdno (1,2) {
10158             ::rm($self->fh($fdno,"name"));
10159             ::rm($self->fh($fdno,"unlink"));
10160         }
10161     } elsif($fdno == 1 and $self->fh($fdno,"name")) {
10162         print $out_fd $self->tag(),$self->fh($fdno,"name"),"\n";
10163         if($Global::membuffer) {
10164             push @{$self->{'output'}{$fdno}},
10165                 $self->tag(), $self->fh($fdno,"name");
10166         }
10167         $self->add_returnsize(-s $self->fh($fdno,"name"));
10168         # Mark as printed - do not print again
10169         $self->set_fh($fdno,"name",undef);
10170     }
10171 }
10172
10173 sub print_linebuffer($) {
10174     my $self = shift;
10175     my ($fdno,$in_fh,$out_fd) = @_;
10176     if(defined $self->{'exitstatus'}) {
10177         # If the job is dead: close printing fh. Needed for --compress
10178         close $self->fh($fdno,"w");
10179         if($? and $opt::compress) {
10180             ::error($opt::compress_program." failed.");
10181             $self->set_exitstatus(255);
10182         }
10183         if($opt::compress) {
10184             # Blocked reading in final round
10185             for my $fdno (1,2) {
10186                 ::set_fh_blocking($self->fh($fdno,'r'));
10187             }
10188         }
10189     }
10190     if(not $self->virgin()) {
10191         if($opt::files or ($opt::results and not $Global::csvsep)) {
10192             # Print filename
10193             if($fdno == 1 and not $self->fh($fdno,"printed")) {
10194                 print $out_fd $self->tag(),$self->fh($fdno,"name"),"\n";
10195                 if($Global::membuffer) {
10196                     push(@{$self->{'output'}{$fdno}}, $self->tag(),
10197                          $self->fh($fdno,"name"));
10198                 }
10199                 $self->set_fh($fdno,"printed",1);
10200             }
10201             # No need for reading $in_fh, as it is from "cat >/dev/null"
10202         } else {
10203             # Read halflines and print full lines
10204             my $outputlength = 0;
10205             my $halfline_ref = $self->{'halfline'}{$fdno};
10206             my ($buf,$i,$rv);
10207             # 1310720 gives 1.2 GB/s
10208             # 131072  gives 0.9 GB/s
10209             while($rv = sysread($in_fh, $buf,1310720)) {
10210                 $outputlength += $rv;
10211                 # TODO --recend
10212                 # Treat both \n and \r as line end
10213                 $i = ::max((rindex($buf,"\n")+1), (rindex($buf,"\r")+1));
10214                 if($i) {
10215                     # One or more complete lines were found
10216                     if($opt::tag or defined $opt::tagstring) {
10217                         # Replace ^ with $tag within the full line
10218                         if($Global::cache_replacement_eval) {
10219                             # Replace with the same value for tag
10220                             my $tag = $self->tag();
10221                             unshift @$halfline_ref, $tag;
10222                             # TODO --recend that can be partially in @$halfline_ref
10223                             substr($buf,0,$i-1) =~ s/(?<=[\n\r])(?=.|$)/$tag/gs;
10224                             # The length changed, so find the new ending pos
10225                             $i = ::max((rindex($buf,"\n")+1), (rindex($buf,"\r")+1));
10226                         } else {
10227                             # Replace with freshly computed value of tag
10228                             unshift @$halfline_ref, $self->tag();
10229                             substr($buf,0,$i-1) =~ s/(?<=[\n\r])(?=.|$)/$self->tag()/gse;
10230                             # The length changed, so find the new ending pos
10231                             $i = ::max((rindex($buf,"\n")+1), (rindex($buf,"\r")+1));
10232                         }
10233                     }
10234                     # Print the partial line (halfline) and the last half
10235                     print $out_fd @$halfline_ref, substr($buf,0,$i);
10236                     # Buffer in memory for SQL and CSV-output
10237                     if($Global::membuffer) {
10238                         push(@{$self->{'output'}{$fdno}},
10239                              @$halfline_ref, substr($buf,0,$i));
10240                     }
10241                     # Remove the printed part by keeping the unprinted part
10242                     @$halfline_ref = (substr($buf,$i));
10243                 } else {
10244                     # No newline, so append to the halfline
10245                     push @$halfline_ref, $buf;
10246                 }
10247             }
10248             $self->add_returnsize($outputlength);
10249         }
10250         if(defined $self->{'exitstatus'}) {
10251             if($opt::files or ($opt::results and not $Global::csvsep)) {
10252                 $self->add_returnsize(-s $self->fh($fdno,"name"));
10253             } else {
10254                 # If the job is dead: print the remaining partial line
10255                 # read remaining
10256                 my $halfline_ref = $self->{'halfline'}{$fdno};
10257                 if(grep /./, @$halfline_ref) {
10258                     my $returnsize = 0;
10259                     for(@{$self->{'halfline'}{$fdno}}) {
10260                         $returnsize += length $_;
10261                     }
10262                     $self->add_returnsize($returnsize);
10263                     if($opt::tag or defined $opt::tagstring) {
10264                         # Prepend $tag the the remaining half line
10265                         unshift @$halfline_ref, $self->tag();
10266                     }
10267                     # Print the partial line (halfline)
10268                     print $out_fd @{$self->{'halfline'}{$fdno}};
10269                     # Buffer in memory for SQL and CSV-output
10270                     if($Global::membuffer) {
10271                         push(@{$self->{'output'}{$fdno}}, @$halfline_ref);
10272                     }
10273                     @$halfline_ref = ();
10274                 }
10275             }
10276             if($self->fh($fdno,"rpid") and
10277                CORE::kill 0, $self->fh($fdno,"rpid")) {
10278                 # decompress still running
10279             } else {
10280                 # decompress done: close fh
10281                 close $in_fh;
10282                 if($? and $opt::compress) {
10283                     ::error($opt::decompress_program." failed.");
10284                     $self->set_exitstatus(255);
10285                 }
10286             }
10287         }
10288     }
10289 }
10290
10291 sub print_tag(@) {
10292     return print_normal(@_);
10293 }
10294
10295 sub free_ressources() {
10296     my $self = shift;
10297     if(not $opt::ungroup) {
10298         my $fh;
10299         for my $fdno (sort { $a <=> $b } keys %Global::fd) {
10300             $fh = $self->fh($fdno,"w");
10301             $fh and close $fh;
10302             $fh = $self->fh($fdno,"r");
10303             $fh and close $fh;
10304         }
10305     }
10306 }
10307
10308 sub print_normal($) {
10309     my $self = shift;
10310     my ($fdno,$in_fh,$out_fd) = @_;
10311     my $buf;
10312     close $self->fh($fdno,"w");
10313     if($? and $opt::compress) {
10314         ::error($opt::compress_program." failed.");
10315         $self->set_exitstatus(255);
10316     }
10317     if(not $self->virgin()) {
10318         seek $in_fh, 0, 0;
10319         # $in_fh is now ready for reading at position 0
10320         my $outputlength = 0;
10321         my @output;
10322
10323         if($opt::tag or $opt::tagstring) {
10324             # Read line by line
10325             local $/ = "\n";
10326             my $tag = $self->tag();
10327             while(<$in_fh>) {
10328                 $outputlength += length $_;
10329                 # Tag lines with \r, too
10330                 $_ =~ s/(?<=[\r])(?=.|$)/$tag/gs;
10331                 print $out_fd $tag,$_;
10332                 if($Global::membuffer) {
10333                     push @{$self->{'output'}{$fdno}}, $tag, $_;
10334                 }
10335             }
10336         } else {
10337             while(sysread($in_fh,$buf,131072)) {
10338                 print $out_fd $buf;
10339                 $outputlength += length $buf;
10340                 if($Global::membuffer) {
10341                     push @{$self->{'output'}{$fdno}}, $buf;
10342                 }
10343             }
10344         }
10345         if($fdno == 1) {
10346             $self->add_returnsize($outputlength);
10347         }
10348         close $in_fh;
10349         if($? and $opt::compress) {
10350             ::error($opt::decompress_program." failed.");
10351             $self->set_exitstatus(255);
10352         }
10353     }
10354 }
10355
10356 sub print_joblog($) {
10357     my $self = shift;
10358     my $cmd;
10359     if($Global::verbose <= 1) {
10360         $cmd = $self->replaced();
10361     } else {
10362         # Verbose level > 1: Print the rsync and stuff
10363         $cmd = $self->wrapped();
10364     }
10365     # Newlines make it hard to parse the joblog
10366     $cmd =~ s/\n/\0/g;
10367     print $Global::joblog
10368         join("\t", $self->seq(), $self->sshlogin()->string(),
10369              $self->starttime(), sprintf("%10.3f",$self->runtime()),
10370              $self->transfersize(), $self->returnsize(),
10371              $self->exitstatus(), $self->exitsignal(), $cmd
10372         ). "\n";
10373     flush $Global::joblog;
10374     $self->set_job_in_joblog();
10375 }
10376
10377 sub tag($) {
10378     my $self = shift;
10379     if(not defined $self->{'tag'} or not $Global::cache_replacement_eval) {
10380         if($opt::tag or defined $opt::tagstring) {
10381             $self->{'tag'} = $self->{'commandline'}->
10382                 replace_placeholders([$opt::tagstring],0,0)."\t";
10383         } else {
10384             $self->{'tag'} = "";
10385         }
10386     }
10387     return $self->{'tag'};
10388 }
10389
10390 sub hostgroups($) {
10391     my $self = shift;
10392     if(not defined $self->{'hostgroups'}) {
10393         $self->{'hostgroups'} =
10394             $self->{'commandline'}->{'arg_list'}[0][0]->{'hostgroups'};
10395     }
10396     return @{$self->{'hostgroups'}};
10397 }
10398
10399 sub exitstatus($) {
10400     my $self = shift;
10401     return $self->{'exitstatus'};
10402 }
10403
10404 sub set_exitstatus($$) {
10405     my $self = shift;
10406     my $exitstatus = shift;
10407     if($exitstatus) {
10408         # Overwrite status if non-zero
10409         $self->{'exitstatus'} = $exitstatus;
10410     } else {
10411         # Set status but do not overwrite
10412         # Status may have been set by --timeout
10413         $self->{'exitstatus'} ||= $exitstatus;
10414     }
10415     $opt::sqlworker and
10416         $Global::sql->update("SET Exitval = ? WHERE Seq = ".$self->seq(),
10417                              $exitstatus);
10418 }
10419
10420 sub reset_exitstatus($) {
10421     my $self = shift;
10422     undef $self->{'exitstatus'};
10423 }
10424
10425 sub exitsignal($) {
10426     my $self = shift;
10427     return $self->{'exitsignal'};
10428 }
10429
10430 sub set_exitsignal($$) {
10431     my $self = shift;
10432     my $exitsignal = shift;
10433     $self->{'exitsignal'} = $exitsignal;
10434     $opt::sqlworker and
10435         $Global::sql->update("SET _Signal = ? WHERE Seq = ".$self->seq(),
10436                              $exitsignal);
10437 }
10438
10439 {
10440     my $status_printed;
10441     my $total_jobs;
10442
10443     sub should_we_halt {
10444         # Should we halt? Immediately? Gracefully?
10445         # Returns: N/A
10446         my $job = shift;
10447         my $limit;
10448         if($job->exitstatus() or $job->exitsignal()) {
10449             # Job failed
10450             $Global::exitstatus++;
10451             $Global::total_failed++;
10452             if($Global::halt_fail) {
10453                 ::status("$Global::progname: This job failed:",
10454                          $job->replaced());
10455                 $limit = $Global::total_failed;
10456             }
10457         } elsif($Global::halt_success) {
10458             ::status("$Global::progname: This job succeeded:",
10459                      $job->replaced());
10460             $limit = $Global::total_completed - $Global::total_failed;
10461         }
10462         if($Global::halt_done) {
10463             ::status("$Global::progname: This job finished:",
10464                      $job->replaced());
10465             $limit = $Global::total_completed;
10466         }
10467         if(not defined $limit) {
10468             return ""
10469         }
10470         #  --halt # => 1..100 (number of jobs failed, 101 means > 100)
10471         #  --halt % => 1..100 (pct of jobs failed)
10472         if($Global::halt_pct and not $Global::halt_count) {
10473             $total_jobs ||= $Global::JobQueue->total_jobs();
10474             # From the pct compute the number of jobs that must fail/succeed
10475             $Global::halt_count = $total_jobs * $Global::halt_pct;
10476         }
10477         if($limit >= $Global::halt_count) {
10478             # At least N jobs have failed/succeded/completed
10479             # or at least N% have failed/succeded/completed
10480             # So we should prepare for exit
10481             if($Global::halt_fail or $Global::halt_done) {
10482                 # Set exit status
10483                 if(not defined $Global::halt_exitstatus) {
10484                     if($Global::halt_pct) {
10485                         # --halt now,fail=X% or soon,fail=X%
10486                         # --halt now,done=X% or soon,done=X%
10487                         $Global::halt_exitstatus =
10488                             ::ceil($Global::total_failed / $total_jobs * 100);
10489                     } elsif($Global::halt_count) {
10490                         # --halt now,fail=X or soon,fail=X
10491                         # --halt now,done=X or soon,done=X
10492                         $Global::halt_exitstatus =
10493                             ::min($Global::total_failed,101);
10494                     }
10495                     if($Global::halt_count and $Global::halt_count == 1) {
10496                         # --halt now,fail=1 or soon,fail=1
10497                         # --halt now,done=1 or soon,done=1
10498                         # Emulate Bash's +128 if there is a signal
10499                         $Global::halt_exitstatus =
10500                             ($job->exitstatus()
10501                              or
10502                              $job->exitsignal() ? $job->exitsignal() + 128 : 0);
10503                     }
10504                 }
10505                 ::debug("halt","Pct: ",$Global::halt_pct,
10506                         " count: ",$Global::halt_count,
10507                         " status: ",$Global::halt_exitstatus,"\n");
10508             } elsif($Global::halt_success) {
10509                 $Global::halt_exitstatus = 0;
10510             }
10511             if($Global::halt_when eq "soon"
10512                and
10513                (scalar(keys %Global::running) > 0
10514                 or
10515                 $Global::max_jobs_running == 1)) {
10516                 ::status
10517                     ("$Global::progname: Starting no more jobs. ".
10518                      "Waiting for ". (keys %Global::running).
10519                      " jobs to finish.");
10520                 $Global::start_no_new_jobs ||= 1;
10521             }
10522             return($Global::halt_when);
10523         }
10524         return "";
10525     }
10526 }
10527
10528
10529 package CommandLine;
10530
10531 sub new($) {
10532     my $class = shift;
10533     my $seq = shift;
10534     my $commandref = shift;
10535     $commandref || die;
10536     my $arg_queue = shift;
10537     my $context_replace = shift;
10538     my $max_number_of_args = shift; # for -N and normal (-n1)
10539     my $transfer_files = shift;
10540     my $return_files = shift;
10541     my $replacecount_ref = shift;
10542     my $len_ref = shift;
10543     my %replacecount = %$replacecount_ref;
10544     my %len = %$len_ref;
10545     for (keys %$replacecount_ref) {
10546         # Total length of this replacement string {} replaced with all args
10547         $len{$_} = 0;
10548     }
10549     return bless {
10550         'command' => $commandref,
10551         'seq' => $seq,
10552         'len' => \%len,
10553         'arg_list' => [],
10554         'arg_list_flat' => [],
10555         'arg_list_flat_orig' => [undef],
10556         'arg_queue' => $arg_queue,
10557         'max_number_of_args' => $max_number_of_args,
10558         'replacecount' => \%replacecount,
10559         'context_replace' => $context_replace,
10560         'transfer_files' => $transfer_files,
10561         'return_files' => $return_files,
10562         'replaced' => undef,
10563     }, ref($class) || $class;
10564 }
10565
10566 sub seq($) {
10567     my $self = shift;
10568     return $self->{'seq'};
10569 }
10570
10571 sub set_seq($$) {
10572     my $self = shift;
10573     $self->{'seq'} = shift;
10574 }
10575
10576 sub slot($) {
10577     # Find the number of a free job slot and return it
10578     # Uses:
10579     #   @Global::slots - list with free jobslots
10580     # Returns:
10581     #   $jobslot = number of jobslot
10582     my $self = shift;
10583     if(not $self->{'slot'}) {
10584         if(not @Global::slots) {
10585             # $max_slot_number will typically be $Global::max_jobs_running
10586             push @Global::slots, ++$Global::max_slot_number;
10587         }
10588         $self->{'slot'} = shift @Global::slots;
10589     }
10590     return $self->{'slot'};
10591 }
10592
10593 {
10594     my $already_spread;
10595     my $darwin_max_len;
10596
10597     sub populate($) {
10598         # Add arguments from arg_queue until the number of arguments or
10599         # max line length is reached
10600         # Uses:
10601         #   $Global::minimal_command_line_length
10602         #   $opt::cat
10603         #   $opt::fifo
10604         #   $Global::JobQueue
10605         #   $opt::m
10606         #   $opt::X
10607         #   $Global::max_jobs_running
10608         # Returns: N/A
10609         my $self = shift;
10610         my $next_arg;
10611         my $max_len = $Global::minimal_command_line_length
10612             || Limits::Command::max_length();
10613         if($^O eq "darwin") {
10614             # Darwin's limit is affected by:
10615             # * number of environment names (variables+functions)
10616             # * size of environment
10617             # * the length of arguments:
10618             #   a one-char argument lowers the limit by 5
10619             #   To be safe assume all arguments are one-char
10620             # The max_len is cached between runs, but if the size of
10621             # the environment is different we need to recompute the
10622             # usable max length for this run of GNU Parallel
10623             # See https://unix.stackexchange.com/a/604943/2972
10624             if(not $darwin_max_len) {
10625                 my $envc = (keys %ENV);
10626                 my $envn = length join"",(keys %ENV);
10627                 my $envv = length join"",(values %ENV);
10628                 $darwin_max_len = -146+($max_len - $envn - $envv) - $envc*10;
10629                 ::debug("init",
10630                         "length: $darwin_max_len ".
10631                         "3+($max_len - $envn - $envv)/5 - $envc*2");
10632             }
10633             $max_len = $darwin_max_len;
10634         }
10635         if($opt::cat or $opt::fifo) {
10636             # Get the empty arg added by --pipepart (if any)
10637             $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get();
10638             # $PARALLEL_TMP will point to a tempfile that will be used as {}
10639             $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->
10640                 unget([Arg->new('$PARALLEL_TMP')]);
10641         }
10642         while (not $self->{'arg_queue'}->empty()) {
10643             $next_arg = $self->{'arg_queue'}->get();
10644             if(not defined $next_arg) {
10645                 next;
10646             }
10647             $self->push($next_arg);
10648             if($self->len() >= $max_len) {
10649                 # Command length is now > max_length
10650                 # If there are arguments: remove the last
10651                 # If there are no arguments: Error
10652                 # TODO stuff about -x opt_x
10653                 if($self->number_of_args() > 1) {
10654                     # There is something to work on
10655                     $self->{'arg_queue'}->unget($self->pop());
10656                     last;
10657                 } else {
10658                     my $args = join(" ", map { $_->orig() } @$next_arg);
10659                     ::error("Command line too long (".
10660                             $self->len(). " >= ".
10661                             $max_len.
10662                             ") at input ".
10663                             $self->{'arg_queue'}->arg_number().
10664                             ": ".
10665                             ((length $args > 50) ?
10666                              (substr($args,0,50))."..." :
10667                              $args));
10668                     $self->{'arg_queue'}->unget($self->pop());
10669                     ::wait_and_exit(255);
10670                 }
10671             }
10672
10673             if(defined $self->{'max_number_of_args'}) {
10674                 if($self->number_of_args() >= $self->{'max_number_of_args'}) {
10675                     last;
10676                 }
10677             }
10678         }
10679         if(($opt::m or $opt::X) and not $already_spread
10680            and $self->{'arg_queue'}->empty() and $Global::max_jobs_running) {
10681             # -m or -X and EOF => Spread the arguments over all jobslots
10682             # (unless they are already spread)
10683             $already_spread ||= 1;
10684             if($self->number_of_args() > 1) {
10685                 $self->{'max_number_of_args'} =
10686                     ::ceil($self->number_of_args()/$Global::max_jobs_running);
10687                 $Global::JobQueue->{'commandlinequeue'}->{'max_number_of_args'} =
10688                     $self->{'max_number_of_args'};
10689                 $self->{'arg_queue'}->unget($self->pop_all());
10690                 while($self->number_of_args() < $self->{'max_number_of_args'}) {
10691                     $self->push($self->{'arg_queue'}->get());
10692                 }
10693             }
10694             $Global::JobQueue->flush_total_jobs();
10695         }
10696
10697         if($opt::sqlmaster) {
10698             # Insert the V1..Vn for this $seq in SQL table instead of generating one
10699             $Global::sql->insert_records($self->seq(), $self->{'command'},
10700                                          $self->{'arg_list_flat_orig'});
10701         }
10702     }
10703 }
10704
10705 sub push($) {
10706     # Add one or more records as arguments
10707     # Returns: N/A
10708     my $self = shift;
10709     my $record = shift;
10710     push @{$self->{'arg_list_flat_orig'}}, map { $_->orig() } @$record;
10711     push @{$self->{'arg_list_flat'}}, @$record;
10712     push @{$self->{'arg_list'}}, $record;
10713     # Make @arg available for {= =}
10714     *Arg::arg = $self->{'arg_list_flat_orig'};
10715
10716     my $quote_arg = ($Global::quote_replace and not $Global::quoting);
10717     for my $perlexpr (keys %{$self->{'replacecount'}}) {
10718         if($perlexpr =~ /^(\d+) /) {
10719             # Positional
10720             defined($record->[$1-1]) or next;
10721             $self->{'len'}{$perlexpr} +=
10722                 length $record->[$1-1]->replace($perlexpr,$quote_arg,$self);
10723         } else {
10724             for my $arg (@$record) {
10725                 if(defined $arg) {
10726                     $self->{'len'}{$perlexpr} +=
10727                         length $arg->replace($perlexpr,$quote_arg,$self);
10728                 }
10729             }
10730         }
10731     }
10732 }
10733
10734 sub pop($) {
10735     # Remove last argument
10736     # Returns:
10737     #   the last record
10738     my $self = shift;
10739     my $record = pop @{$self->{'arg_list'}};
10740     # pop off arguments from @$record
10741     splice @{$self->{'arg_list_flat_orig'}}, -($#$record+1), $#$record+1;
10742     splice @{$self->{'arg_list_flat'}}, -($#$record+1), $#$record+1;
10743     my $quote_arg = ($Global::quote_replace and not $Global::quoting);
10744     for my $perlexpr (keys %{$self->{'replacecount'}}) {
10745         if($perlexpr =~ /^(\d+) /) {
10746             # Positional
10747             defined($record->[$1-1]) or next;
10748             $self->{'len'}{$perlexpr} -=
10749                 length $record->[$1-1]->replace($perlexpr,$quote_arg,$self);
10750         } else {
10751             for my $arg (@$record) {
10752                 if(defined $arg) {
10753                     $self->{'len'}{$perlexpr} -=
10754                         length $arg->replace($perlexpr,$quote_arg,$self);
10755                 }
10756             }
10757         }
10758     }
10759     return $record;
10760 }
10761
10762 sub pop_all($) {
10763     # Remove all arguments and zeros the length of replacement perlexpr
10764     # Returns:
10765     #   all records
10766     my $self = shift;
10767     my @popped = @{$self->{'arg_list'}};
10768     for my $perlexpr (keys %{$self->{'replacecount'}}) {
10769         $self->{'len'}{$perlexpr} = 0;
10770     }
10771     $self->{'arg_list'} = [];
10772     $self->{'arg_list_flat_orig'} = [undef];
10773     $self->{'arg_list_flat'} = [];
10774     return @popped;
10775 }
10776
10777 sub number_of_args($) {
10778     # The number of records
10779     # Returns:
10780     #   number of records
10781     my $self = shift;
10782     # This is really the number of records
10783     return $#{$self->{'arg_list'}}+1;
10784 }
10785
10786 sub number_of_recargs($) {
10787     # The number of args in records
10788     # Returns:
10789     #   number of args records
10790     my $self = shift;
10791     my $sum = 0;
10792     my $nrec = scalar @{$self->{'arg_list'}};
10793     if($nrec) {
10794         $sum = $nrec * (scalar @{$self->{'arg_list'}[0]});
10795     }
10796     return $sum;
10797 }
10798
10799 sub args_as_string($) {
10800     # Returns:
10801     #  all unmodified arguments joined with ' ' (similar to {})
10802     my $self = shift;
10803     return (join " ", map { $_->orig() }
10804             map { @$_ } @{$self->{'arg_list'}});
10805 }
10806
10807 sub results_out($) {
10808     sub max_file_name_length {
10809         # Figure out the max length of a subdir
10810         # TODO and the max total length
10811         # Ext4 = 255,130816
10812         # Uses:
10813         #   $Global::max_file_length is set
10814         # Returns:
10815         #   $Global::max_file_length
10816         my $testdir = shift;
10817
10818         my $upper = 100_000_000;
10819         # Dir length of 8 chars is supported everywhere
10820         my $len = 8;
10821         my $dir = "x"x$len;
10822         do {
10823             rmdir($testdir."/".$dir);
10824             $len *= 16;
10825             $dir = "x"x$len;
10826         } while ($len < $upper and mkdir $testdir."/".$dir);
10827         # Then search for the actual max length between $len/16 and $len
10828         my $min = $len/16;
10829         my $max = $len;
10830         while($max-$min > 5) {
10831             # If we are within 5 chars of the exact value:
10832             # it is not worth the extra time to find the exact value
10833             my $test = int(($min+$max)/2);
10834             $dir = "x"x$test;
10835             if(mkdir $testdir."/".$dir) {
10836                 rmdir($testdir."/".$dir);
10837                 $min = $test;
10838             } else {
10839                 $max = $test;
10840             }
10841         }
10842         $Global::max_file_length = $min;
10843         return $min;
10844     }
10845
10846     my $self = shift;
10847     my $out = $self->replace_placeholders([$opt::results],0,0);
10848     if($out eq $opt::results) {
10849         # $opt::results simple string: Append args_as_dirname
10850         my $args_as_dirname = $self->args_as_dirname();
10851         # Output in: prefix/name1/val1/name2/val2/stdout
10852         $out = $opt::results."/".$args_as_dirname;
10853         if(-d $out or eval{ File::Path::mkpath($out); }) {
10854             # OK
10855         } else {
10856             # mkpath failed: Argument probably too long.
10857             # Set $Global::max_file_length, which will keep the individual
10858             # dir names shorter than the max length
10859             max_file_name_length($opt::results);
10860             $args_as_dirname = $self->args_as_dirname();
10861             # prefix/name1/val1/name2/val2/
10862             $out = $opt::results."/".$args_as_dirname;
10863             File::Path::mkpath($out);
10864         }
10865         $out .="/";
10866     } else {
10867         if($out =~ m:/$:) {
10868             # / = dir
10869             if(-d $out or eval{ File::Path::mkpath($out); }) {
10870                 # OK
10871             } else {
10872                 ::error("Cannot make dir '$out'.");
10873                 ::wait_and_exit(255);
10874             }
10875         } else {
10876             $out =~ m:(.*)/:;
10877             File::Path::mkpath($1);
10878         }
10879     }
10880     return $out;
10881 }
10882
10883 sub args_as_dirname($) {
10884     # Returns:
10885     #  all unmodified arguments joined with '/' (similar to {})
10886     #  \t \0 \\ and / are quoted as: \t \0 \\ \_
10887     # If $Global::max_file_length: Keep subdirs < $Global::max_file_length
10888     my $self = shift;
10889     my @res = ();
10890
10891     for my $rec_ref (@{$self->{'arg_list'}}) {
10892         # If headers are used, sort by them.
10893         # Otherwise keep the order from the command line.
10894         my @header_indexes_sorted = header_indexes_sorted($#$rec_ref+1);
10895         for my $n (@header_indexes_sorted) {
10896             CORE::push(@res,
10897                  $Global::input_source_header{$n},
10898                  map { my $s = $_;
10899                        #  \t \0 \\ and / are quoted as: \t \0 \\ \_
10900                        $s =~ s/\\/\\\\/g;
10901                        $s =~ s/\t/\\t/g;
10902                        $s =~ s/\0/\\0/g;
10903                        $s =~ s:/:\\_:g;
10904                        if($Global::max_file_length) {
10905                            # Keep each subdir shorter than the longest
10906                            # allowed file name
10907                            $s = substr($s,0,$Global::max_file_length);
10908                        }
10909                        $s; }
10910                  $rec_ref->[$n-1]->orig());
10911         }
10912     }
10913     return join "/", @res;
10914 }
10915
10916 sub header_indexes_sorted($) {
10917     # Sort headers first by number then by name.
10918     # E.g.: 1a 1b 11a 11b
10919     # Returns:
10920     #  Indexes of %Global::input_source_header sorted
10921     my $max_col = shift;
10922
10923     no warnings 'numeric';
10924     for my $col (1 .. $max_col) {
10925         # Make sure the header is defined. If it is not: use column number
10926         if(not defined $Global::input_source_header{$col}) {
10927             $Global::input_source_header{$col} = $col;
10928         }
10929     }
10930     my @header_indexes_sorted = sort {
10931         # Sort headers numerically then asciibetically
10932         $Global::input_source_header{$a} <=> $Global::input_source_header{$b}
10933         or
10934             $Global::input_source_header{$a} cmp $Global::input_source_header{$b}
10935     } 1 .. $max_col;
10936     return @header_indexes_sorted;
10937 }
10938
10939 sub len($) {
10940     # Uses:
10941     #   @opt::shellquote
10942     # The length of the command line with args substituted
10943     my $self = shift;
10944     my $len = 0;
10945     # Add length of the original command with no args
10946     # Length of command w/ all replacement args removed
10947     $len += $self->{'len'}{'noncontext'} + @{$self->{'command'}} -1;
10948     ::debug("length", "noncontext + command: $len\n");
10949     # MacOS has an overhead of 8 bytes per argument
10950     my $darwin = ($^O eq "darwin") ? 8 : 0;
10951     my $recargs = $self->number_of_recargs();
10952     if($self->{'context_replace'}) {
10953         # Context is duplicated for each arg
10954         $len += $recargs * $self->{'len'}{'context'};
10955         for my $replstring (keys %{$self->{'replacecount'}}) {
10956             # If the replacements string is more than once: mulitply its length
10957             $len += $self->{'len'}{$replstring} *
10958                 $self->{'replacecount'}{$replstring};
10959             ::debug("length", $replstring, " ", $self->{'len'}{$replstring}, "*",
10960                     $self->{'replacecount'}{$replstring}, "\n");
10961         }
10962         # echo 11 22 33 44 55 66 77 88 99 1010
10963         # echo 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10
10964         # 5 +  ctxgrp*arg
10965         ::debug("length", "Ctxgrp: ", $self->{'len'}{'contextgroups'},
10966                 " Groups: ", $self->{'len'}{'noncontextgroups'}, "\n");
10967         # Add space between context groups
10968         $len += ($recargs-1) * ($self->{'len'}{'contextgroups'});
10969         if($darwin) {
10970             $len += $recargs * $self->{'len'}{'contextgroups'} * $darwin;
10971         }
10972     } else {
10973         # Each replacement string may occur several times
10974         # Add the length for each time
10975         $len += 1*$self->{'len'}{'context'};
10976         ::debug("length", "context+noncontext + command: $len\n");
10977         for my $replstring (keys %{$self->{'replacecount'}}) {
10978             # (space between recargs + length of replacement)
10979             # * number this replacement is used
10980             $len += ($recargs -1 + $self->{'len'}{$replstring}) *
10981                 $self->{'replacecount'}{$replstring};
10982             if($darwin) {
10983                 $len += ($recargs * $self->{'replacecount'}{$replstring}
10984                          * $darwin);
10985             }
10986         }
10987     }
10988     if(defined $Global::parallel_env) {
10989         # If we are using --env, add the prefix for that, too.
10990         $len += length $Global::parallel_env;
10991     }
10992     if($Global::quoting) {
10993         # Pessimistic length if -q is set
10994         # Worse than worst case: ' => "'" + " => '"'
10995         # TODO can we count the number of expanding chars?
10996         #   and count them in arguments, too?
10997         $len *= 3;
10998     }
10999     if(@opt::shellquote) {
11000         # Pessimistic length if --shellquote is set
11001         # Worse than worst case: ' => "'"
11002         for(@opt::shellquote) {
11003             $len *= 3;
11004         }
11005         $len *= 5;
11006     }
11007     if(@opt::sshlogin) {
11008         # Pessimistic length if remote
11009         # Worst case is BASE64 encoding 3 bytes -> 4 bytes
11010         $len = int($len*4/3);
11011     }
11012
11013     return $len;
11014 }
11015
11016 sub replaced($) {
11017     # Uses:
11018     #   $Global::quote_replace
11019     #   $Global::quoting
11020     # Returns:
11021     #   $replaced = command with place holders replaced and prepended
11022     my $self = shift;
11023     if(not defined $self->{'replaced'}) {
11024         # Don't quote arguments if the input is the full command line
11025         my $quote_arg = ($Global::quote_replace and not $Global::quoting);
11026         # or if ($opt::cat or $opt::pipe) as they use $PARALLEL_TMP
11027         $quote_arg = ($opt::cat || $opt::fifo) ? 0 : $quote_arg;
11028         $self->{'replaced'} = $self->
11029             replace_placeholders($self->{'command'},$Global::quoting,
11030                                  $quote_arg);
11031         my $len = length $self->{'replaced'};
11032         if ($len != $self->len()) {
11033             ::debug("length", $len, " != ", $self->len(),
11034                     " ", $self->{'replaced'}, "\n");
11035         } else {
11036             ::debug("length", $len, " == ", $self->len(),
11037                     " ", $self->{'replaced'}, "\n");
11038         }
11039     }
11040     return $self->{'replaced'};
11041 }
11042
11043 sub replace_placeholders($$$$) {
11044     # Replace foo{}bar with fooargbar
11045     # Input:
11046     #   $targetref = command as shell words
11047     #   $quote = should everything be quoted?
11048     #   $quote_arg = should replaced arguments be quoted?
11049     # Uses:
11050     #   @Arg::arg = arguments as strings to be use in {= =}
11051     # Returns:
11052     #   @target with placeholders replaced
11053     my $self = shift;
11054     my $targetref = shift;
11055     my $quote = shift;
11056     my $quote_arg = shift;
11057     my %replace;
11058
11059     # Token description:
11060     #   \0spc = unquoted space
11061     #   \0end = last token element
11062     #   \0ign = dummy token to be ignored
11063     #   \257<...\257> = replacement expression
11064     #   " " = quoted space, that splits -X group
11065     #   text = normal text - possibly part of -X group
11066     my $spacer = 0;
11067     my @tokens = grep { length $_ > 0 } map {
11068         if(/^\257<|^ $/) {
11069             # \257<...\257> or space
11070             $_
11071         } else {
11072             # Split each space/tab into a token
11073             split /(?=\s)|(?<=\s)/
11074         }
11075     }
11076     # Split \257< ... \257> into own token
11077     map { split /(?=\257<)|(?<=\257>)/ }
11078     # Insert "\0spc" between every element
11079     # This space should never be quoted
11080     map { $spacer++ ? ("\0spc",$_) : $_ }
11081     map { $_ eq "" ? "\0empty" : $_ }
11082     @$targetref;
11083
11084     if(not @tokens) {
11085         # @tokens is empty: Return empty array
11086         return @tokens;
11087     }
11088     ::debug("replace", "Tokens ".join":",@tokens,"\n");
11089     # Make it possible to use $arg[2] in {= =}
11090     *Arg::arg = $self->{'arg_list_flat_orig'};
11091     # Flat list:
11092     # $self->{'arg_list'} = [ [Arg11, Arg12], [Arg21, Arg22], [Arg31, Arg32] ]
11093     # $self->{'arg_list_flat'} = [ Arg11, Arg12, Arg21, Arg22, Arg31, Arg32 ]
11094     if(not @{$self->{'arg_list_flat'}}) {
11095         @{$self->{'arg_list_flat'}} = Arg->new("");
11096     }
11097     my $argref = $self->{'arg_list_flat'};
11098     # Number of arguments - used for positional arguments
11099     my $n = $#$argref+1;
11100
11101     # $self is actually a CommandLine-object,
11102     # but it looks nice to be able to say {= $job->slot() =}
11103     my $job = $self;
11104     # @replaced = tokens with \257< \257> replaced
11105     my @replaced;
11106     if($self->{'context_replace'}) {
11107         my @ctxgroup;
11108         for my $t (@tokens,"\0end") {
11109             # \0end = last token was end of tokens.
11110             if($t eq "\t" or $t eq " " or $t eq "\0end" or $t eq "\0spc") {
11111                 # Context group complete: Replace in it
11112                 if(grep { /^\257</ } @ctxgroup) {
11113                     # Context group contains a replacement string:
11114                     # Copy once per arg
11115                     my $space = "\0ign";
11116                     for my $arg (@$argref) {
11117                         my $normal_replace;
11118                         # Push output
11119                         # Put unquoted space before each context group
11120                         # except the first
11121                         CORE::push @replaced, $space, map {
11122                             $a = $_;
11123                             if($a =~
11124                                 s{\257<(-?\d+)?(.*)\257>}
11125                             {
11126                                 if($1) {
11127                                     # Positional replace
11128                                     # Find the relevant arg and replace it
11129                                     ($argref->[$1 > 0 ? $1-1 : $n+$1] ? # If defined: replace
11130                                      $argref->[$1 > 0 ? $1-1 : $n+$1]->
11131                                      replace($2,$quote_arg,$self)
11132                                      : "");
11133                                 } else {
11134                                     # Normal replace
11135                                     $normal_replace ||= 1;
11136                                     ($arg ? $arg->replace($2,$quote_arg,$self) : "");
11137                                 }
11138                             }sgxe) {
11139                                 # Token is \257<..\257>
11140                             } else {
11141                                 if($Global::escape_string_present) {
11142                                     # Command line contains \257:
11143                                     # Unescape it \257\256 => \257
11144                                     $a =~ s/\257\256/\257/g;
11145                                 }
11146                             }
11147                             $a
11148                         } @ctxgroup;
11149                         $normal_replace or last;
11150                         $space = "\0spc";
11151                     }
11152                 } else {
11153                     # Context group has no a replacement string: Copy it once
11154                     CORE::push @replaced, map {
11155                         $Global::escape_string_present and s/\257\256/\257/g; $_;
11156                     } @ctxgroup;
11157                 }
11158                 # New context group
11159                 @ctxgroup=();
11160             }
11161             if($t eq "\0spc" or $t eq " ") {
11162                 CORE::push @replaced,$t;
11163             } else {
11164                 CORE::push @ctxgroup,$t;
11165             }
11166         }
11167     } else {
11168         # @group = @token
11169         # Replace in group
11170         # Push output
11171         # repquote = no if {} first on line, no if $quote, yes otherwise
11172         for my $t (@tokens) {
11173             if($t =~ /^\257</) {
11174                 my $space = "\0ign";
11175                 for my $arg (@$argref) {
11176                     my $normal_replace;
11177                     $a = $t;
11178                     $a =~
11179                         s{\257<(-?\d+)?(.*)\257>}
11180                     {
11181                         if($1) {
11182                             # Positional replace
11183                             # Find the relevant arg and replace it
11184                             ($argref->[$1 > 0 ? $1-1 : $n+$1] ?
11185                              # If defined: replace
11186                              $argref->[$1 > 0 ? $1-1 : $n+$1]->
11187                              replace($2,$quote_arg,$self)
11188                              : "");
11189                         } else {
11190                             # Normal replace
11191                             $normal_replace ||= 1;
11192                             ($arg ? $arg->replace($2,$quote_arg,$self) : "");
11193                         }
11194                     }sgxe;
11195                     CORE::push @replaced, $space, $a;
11196                     $normal_replace or last;
11197                     $space = "\0spc";
11198                 }
11199             } else {
11200                 # No replacement
11201                 CORE::push @replaced, map {
11202                     $Global::escape_string_present and s/\257\256/\257/g; $_;
11203                 } $t;
11204             }
11205         }
11206     }
11207     *Arg::arg = [];
11208     ::debug("replace","Replaced: ".join":",@replaced,"\n");
11209
11210     # Put tokens into groups that may be quoted.
11211     my @quotegroup;
11212     my @quoted;
11213     for (map { $_ eq "\0empty" ? "" : $_ }
11214          grep { $_ ne "\0ign" and $_ ne "\0noarg" and $_ ne "'\0noarg'" }
11215          @replaced, "\0end") {
11216         if($_ eq "\0spc" or $_ eq "\0end") {
11217             # \0spc splits quotable groups
11218             if($quote) {
11219                 if(@quotegroup) {
11220                     CORE::push @quoted, ::Q(join"",@quotegroup);;
11221                 }
11222             } else {
11223                 CORE::push @quoted, join"",@quotegroup;
11224             }
11225             @quotegroup = ();
11226         } else {
11227             CORE::push @quotegroup, $_;
11228         }
11229     }
11230     ::debug("replace","Quoted: ".join":",@quoted,"\n");
11231     return wantarray ? @quoted : "@quoted";
11232 }
11233
11234 sub skip($) {
11235     # Skip this job
11236     my $self = shift;
11237     $self->{'skip'} = 1;
11238 }
11239
11240
11241 package CommandLineQueue;
11242
11243 sub new($) {
11244     my $class = shift;
11245     my $commandref = shift;
11246     my $read_from = shift;
11247     my $context_replace = shift || 0;
11248     my $max_number_of_args = shift;
11249     my $transfer_files = shift;
11250     my $return_files = shift;
11251     my @unget = ();
11252     my $posrpl;
11253     my ($replacecount_ref, $len_ref);
11254     my @command = @$commandref;
11255     my $seq = 1;
11256     # Replace replacement strings with {= perl expr =}
11257     # '{=' 'perlexpr' '=}'  => '{= perlexpr =}'
11258     @command = merge_rpl_parts(@command);
11259
11260     # Protect matching inside {= perl expr =}
11261     # by replacing {= and =} with \257< and \257>
11262     # in options that can contain replacement strings:
11263     # @command, --transferfile, --return,
11264     # --tagstring, --workdir, --results
11265     for(@command, @$transfer_files, @$return_files,
11266         $opt::tagstring, $opt::workdir, $opt::results, $opt::retries) {
11267         # Skip if undefined
11268         $_ or next;
11269         # Escape \257 => \257\256
11270         $Global::escape_string_present += s/\257/\257\256/g;
11271         # Needs to match rightmost left parens (Perl defaults to leftmost)
11272         # to deal with: {={==} and {={==}=}
11273         # Replace {= -> \257<  and  =} -> \257>
11274         #
11275         # Complex way to do:
11276         #   s/{=(.*)=}/\257<$1\257>/g
11277         # which would not work
11278         s[\Q$Global::parensleft\E # Match {=
11279           # Match . unless the next string is {= or =}
11280           #   needed to force matching the shortest {= =}
11281           ((?:(?! \Q$Global::parensleft\E|\Q$Global::parensright\E ).)*?)
11282           \Q$Global::parensright\E ] # Match =}
11283         {\257<$1\257>}gxs;
11284         for my $rpl (sort { length $b <=> length $a } keys %Global::rpl) {
11285             # Replace long --rpl's before short ones, as a short may be a
11286             # substring of a long:
11287             #   --rpl '% s/a/b/' --rpl '%% s/b/a/'
11288             #
11289             # Replace the shorthand string (--rpl)
11290             # with the {= perl expr =}
11291             #
11292             # Avoid searching for shorthand strings inside existing {= perl expr =}
11293             #
11294             # Replace $$1 in {= perl expr =} with groupings in shorthand string
11295             #
11296             #   --rpl '{/(\.\S+)/(\.\S+)} s/$$1/$$2/g;'
11297             #     echo {/.tar/.gz} ::: UU.tar.gz
11298             my ($prefix,$grp_regexp,$postfix) =
11299                 $rpl =~ /^( [^(]*  )    # Prefix - e.g. {%%
11300                           ( \(.*\) )?   # Group capture regexp - e.g (.*)
11301                           ( [^)]*  )$   # Postfix - e.g }
11302                         /xs;
11303             $grp_regexp ||= '';
11304             my $rplval = $Global::rpl{$rpl};
11305             while(s{( (?: ^|\257> ) (?: [^\257]*|[\257][^<>] )*? )
11306                       # Don't replace after \257 unless \257>
11307                     \Q$prefix\E $grp_regexp \Q$postfix\E}
11308                   {
11309                       # The start remains the same
11310                       my $unchanged = $1;
11311                       # Dummy entry to start at 1.
11312                       my @grp = (1);
11313                       # $2 = first ()-group in $grp_regexp
11314                       # Put $2 in $grp[1], Put $3 in $grp[2]
11315                       # so first ()-group in $grp_regexp is $grp[1];
11316                       for(my $i = 2; defined $grp[$#grp]; $i++) {
11317                           push @grp, eval '$'.$i;
11318                       }
11319                       my $rv = $rplval;
11320                       # replace $$1 with $_pAr_gRp1, $$2 with $_pAr_gRp2
11321                       # in the code to be executed
11322                       $rv =~ s/\$\$ (\d+)/\$_pAr_gRp$1/gx;
11323                       # prepend with $_pAr_gRp1 = perlquote($1),
11324                       my $set_args = "";
11325                       for(my $i = 1;defined $grp[$i]; $i++) {
11326                           $set_args .= "\$_pAr_gRp$i = \"" .
11327                               ::perl_quote_scalar($grp[$i]) . "\";";
11328                       }
11329                       $unchanged . "\257<" . $set_args . $rv . "\257>"
11330                   }gxes) {
11331             }
11332             # Do the same for the positional replacement strings
11333             $posrpl = $rpl;
11334             if($posrpl =~ s/^\{//) {
11335                 # Only do this if the shorthand start with {
11336                 $prefix=~s/^\{//;
11337                 # Don't replace after \257 unless \257>
11338                 while(s{( (?: ^|\257> ) (?: [^\257]*|[\257][^<>] )*? )
11339                   \{(-?\d+) \s* \Q$prefix\E $grp_regexp \Q$postfix\E}
11340                       {
11341                       # The start remains the same
11342                       my $unchanged = $1;
11343                       my $position = $2;
11344                       # Dummy entry to start at 1.
11345                       my @grp = (1);
11346                       # $3 = first ()-group in $grp_regexp
11347                       # Put $3 in $grp[1], Put $4 in $grp[2]
11348                       # so first ()-group in $grp_regexp is $grp[1];
11349                       for(my $i = 3; defined $grp[$#grp]; $i++) {
11350                           push @grp, eval '$'.$i;
11351                       }
11352                       my $rv = $rplval;
11353                       # replace $$1 with $_pAr_gRp1, $$2 with $_pAr_gRp2
11354                       # in the code to be executed
11355                       $rv =~ s/\$\$ (\d+)/\$_pAr_gRp$1/gx;
11356                       # prepend with $_pAr_gRp1 = perlquote($1),
11357                       my $set_args = "";
11358                       for(my $i = 1;defined $grp[$i]; $i++) {
11359                           $set_args .= "\$_pAr_gRp$i = \"" .
11360                               ::perl_quote_scalar($grp[$i]) . "\";";
11361                       }
11362                       $unchanged . "\257<" . $position . $set_args . $rv . "\257>"
11363                   }gxes) {
11364                 }
11365             }
11366         }
11367     }
11368
11369     # Add {} if no replacement strings in @command
11370     ($replacecount_ref, $len_ref, @command) =
11371         replacement_counts_and_lengths($transfer_files,$return_files,@command);
11372     if("@command" =~ /^[^ \t\n=]*\257</) {
11373         # Replacement string is (part of) the command (and not just
11374         # argument or variable definition V1={})
11375         # E.g. parallel {}, parallel my_{= s/_//=}, parallel {2}
11376         # Do no quote (Otherwise it will fail if the input contains spaces)
11377         $Global::quote_replace = 0;
11378     }
11379
11380     if($opt::sqlmaster and $Global::sql->append()) {
11381         $seq = $Global::sql->max_seq() + 1;
11382     }
11383
11384     return bless {
11385         'unget' => \@unget,
11386         'command' => \@command,
11387         'replacecount' => $replacecount_ref,
11388         'arg_queue' => RecordQueue->new($read_from,$opt::colsep),
11389         'context_replace' => $context_replace,
11390         'len' => $len_ref,
11391         'max_number_of_args' => $max_number_of_args,
11392         'size' => undef,
11393         'transfer_files' => $transfer_files,
11394         'return_files' => $return_files,
11395         'seq' => $seq,
11396     }, ref($class) || $class;
11397 }
11398
11399 sub merge_rpl_parts($) {
11400     # '{=' 'perlexpr' '=}'  => '{= perlexpr =}'
11401     # Input:
11402     #   @in = the @command as given by the user
11403     # Uses:
11404     #   $Global::parensleft
11405     #   $Global::parensright
11406     # Returns:
11407     #   @command with parts merged to keep {= and =} as one
11408     my @in = @_;
11409     my @out;
11410     my $l = quotemeta($Global::parensleft);
11411     my $r = quotemeta($Global::parensright);
11412
11413     while(@in) {
11414         my $s = shift @in;
11415         $_ = $s;
11416         # Remove matching (right most) parens
11417         while(s/(.*)$l.*?$r/$1/os) {}
11418         if(/$l/o) {
11419             # Missing right parens
11420             while(@in) {
11421                 $s .= " ".shift @in;
11422                 $_ = $s;
11423                 while(s/(.*)$l.*?$r/$1/os) {}
11424                 if(not /$l/o) {
11425                     last;
11426                 }
11427             }
11428         }
11429         push @out, $s;
11430     }
11431     return @out;
11432 }
11433
11434 sub replacement_counts_and_lengths($$@) {
11435     # Count the number of different replacement strings.
11436     # Find the lengths of context for context groups and non-context
11437     # groups.
11438     # If no {} found in @command: add it to @command
11439     #
11440     # Input:
11441     #   \@transfer_files = array of filenames to transfer
11442     #   \@return_files = array of filenames to return
11443     #   @command = command template
11444     # Output:
11445     #   \%replacecount, \%len, @command
11446     my $transfer_files = shift;
11447     my $return_files = shift;
11448     my @command = @_;
11449     my (%replacecount,%len);
11450     my $sum = 0;
11451     while($sum == 0) {
11452         # Count how many times each replacement string is used
11453         my @cmd = @command;
11454         my $contextlen = 0;
11455         my $noncontextlen = 0;
11456         my $contextgroups = 0;
11457         for my $c (@cmd) {
11458             while($c =~ s/ \257<( (?: [^\257]*|[\257][^<>] )*?)\257> /\000/xs) {
11459                 # %replacecount = { "perlexpr" => number of times seen }
11460                 # e.g { "s/a/b/" => 2 }
11461                 $replacecount{$1}++;
11462                 $sum++;
11463             }
11464             # Measure the length of the context around the {= perl expr =}
11465             # Use that {=...=} has been replaced with \000 above
11466             # So there is no need to deal with \257<
11467             while($c =~ s/ (\S*\000\S*) //xs) {
11468                 my $w = $1;
11469                 $w =~ tr/\000//d; # Remove all \000's
11470                 $contextlen += length($w);
11471                 $contextgroups++;
11472             }
11473             # All {= perl expr =} have been removed: The rest is non-context
11474             $noncontextlen += length $c;
11475         }
11476         for(@$transfer_files, @$return_files,
11477             $opt::tagstring, $opt::workdir, $opt::results, $opt::retries) {
11478             # Options that can contain replacement strings
11479             $_ or next;
11480             my $t = $_;
11481             while($t =~ s/ \257<( (?: [^\257]*|[\257][^<>] )* )\257> //xs) {
11482                 # %replacecount = { "perlexpr" => number of times seen }
11483                 # e.g { "$_++" => 2 }
11484                 # But for tagstring we just need to mark it as seen
11485                 $replacecount{$1} ||= 1;
11486             }
11487         }
11488         if($opt::bar) {
11489             # If the command does not contain {} force it to be computed
11490             # as it is being used by --bar
11491             $replacecount{""} ||= 1;
11492         }
11493
11494         $len{'context'} = 0+$contextlen;
11495         $len{'noncontext'} = $noncontextlen;
11496         $len{'contextgroups'} = $contextgroups;
11497         $len{'noncontextgroups'} = @cmd-$contextgroups;
11498         ::debug("length", "@command Context: ", $len{'context'},
11499                 " Non: ", $len{'noncontext'}, " Ctxgrp: ", $len{'contextgroups'},
11500                 " NonCtxGrp: ", $len{'noncontextgroups'}, "\n");
11501         if($sum == 0) {
11502             if(not @command) {
11503                 # Default command = {}
11504                 @command = ("\257<\257>");
11505             } elsif(($opt::pipe or $opt::pipepart)
11506                     and not $opt::fifo and not $opt::cat) {
11507                 # With --pipe / --pipe-part you can have no replacement
11508                 last;
11509             } else {
11510                 # Append {} to the command if there are no {...}'s and no {=...=}
11511                 push @command, ("\257<\257>");
11512             }
11513         }
11514     }
11515     return(\%replacecount,\%len,@command);
11516 }
11517
11518 sub get($) {
11519     my $self = shift;
11520     if(@{$self->{'unget'}}) {
11521         my $cmd_line = shift @{$self->{'unget'}};
11522         return ($cmd_line);
11523     } else {
11524         if($opt::sqlworker) {
11525             # Get the sequence number from the SQL table
11526             $self->set_seq($SQL::next_seq);
11527             # Get the command from the SQL table
11528             $self->{'command'} = $SQL::command_ref;
11529             my @command;
11530             # Recompute replace counts based on the read command
11531             ($self->{'replacecount'},
11532              $self->{'len'}, @command) =
11533                 replacement_counts_and_lengths($self->{'transfer_files'},
11534                                                $self->{'return_files'},
11535                                                @$SQL::command_ref);
11536             if("@command" =~ /^[^ \t\n=]*\257</) {
11537                 # Replacement string is (part of) the command (and not just
11538                 # argument or variable definition V1={})
11539                 # E.g. parallel {}, parallel my_{= s/_//=}, parallel {2}
11540                 # Do no quote (Otherwise it will fail if the input contains spaces)
11541                 $Global::quote_replace = 0;
11542             }
11543         }
11544
11545         my $cmd_line = CommandLine->new($self->seq(),
11546                                         $self->{'command'},
11547                                         $self->{'arg_queue'},
11548                                         $self->{'context_replace'},
11549                                         $self->{'max_number_of_args'},
11550                                         $self->{'transfer_files'},
11551                                         $self->{'return_files'},
11552                                         $self->{'replacecount'},
11553                                         $self->{'len'},
11554             );
11555         $cmd_line->populate();
11556         ::debug("run","cmd_line->number_of_args ",
11557                 $cmd_line->number_of_args(), "\n");
11558         if(not $Global::no_more_input and ($opt::pipe or $opt::pipepart)) {
11559             if($cmd_line->replaced() eq "") {
11560                 # Empty command - pipe requires a command
11561                 ::error("--pipe/--pipepart must have a command to pipe into ".
11562                         "(e.g. 'cat').");
11563                 ::wait_and_exit(255);
11564             }
11565         } elsif($cmd_line->number_of_args() == 0) {
11566             # We did not get more args - maybe at EOF string?
11567             return undef;
11568         }
11569         $self->set_seq($self->seq()+1);
11570         return $cmd_line;
11571     }
11572 }
11573
11574 sub unget($) {
11575     my $self = shift;
11576     unshift @{$self->{'unget'}}, @_;
11577 }
11578
11579 sub empty($) {
11580     my $self = shift;
11581     my $empty = (not @{$self->{'unget'}}) &&
11582         $self->{'arg_queue'}->empty();
11583     ::debug("run", "CommandLineQueue->empty $empty");
11584     return $empty;
11585 }
11586
11587 sub seq($) {
11588     my $self = shift;
11589     return $self->{'seq'};
11590 }
11591
11592 sub set_seq($$) {
11593     my $self = shift;
11594     $self->{'seq'} = shift;
11595 }
11596
11597 sub quote_args($) {
11598     my $self = shift;
11599     # If there is not command emulate |bash
11600     return $self->{'command'};
11601 }
11602
11603
11604 package Limits::Command;
11605
11606 # Maximal command line length (for -m and -X)
11607 sub max_length($) {
11608     # Find the max_length of a command line and cache it
11609     # Returns:
11610     #   number of chars on the longest command line allowed
11611     if(not $Limits::Command::line_max_len) {
11612         # Disk cache of max command line length
11613         my $len_cache = $Global::cache_dir . "/tmp/sshlogin/" . ::hostname() .
11614             "/linelen";
11615         my $cached_limit;
11616         if(-e $len_cache) {
11617             open(my $fh, "<", $len_cache) || ::die_bug("Cannot read $len_cache");
11618             $cached_limit = <$fh>;
11619             close $fh;
11620         }
11621         if(not $cached_limit) {
11622             $cached_limit = real_max_length();
11623             # If $HOME is write protected: Do not fail
11624             my $dir = ::dirname($len_cache);
11625             -d $dir or eval { File::Path::mkpath($dir); };
11626             open(my $fh, ">", $len_cache.$$);
11627             print $fh $cached_limit;
11628             close $fh;
11629             rename $len_cache.$$, $len_cache || ::die_bug("rename cache file");
11630         }
11631         $Limits::Command::line_max_len = tmux_length($cached_limit);
11632         if($opt::max_chars) {
11633             if($opt::max_chars <= $cached_limit) {
11634                 $Limits::Command::line_max_len = $opt::max_chars;
11635             } else {
11636                 ::warning("Value for -s option should be < $cached_limit.");
11637             }
11638         }
11639     }
11640     return int($Limits::Command::line_max_len);
11641 }
11642
11643 sub real_max_length() {
11644     # Find the max_length of a command line
11645     # Returns:
11646     #   The maximal command line length with 1 byte arguments
11647     # return find_max(" x");
11648     return find_max("x");
11649 }
11650
11651 sub find_max($) {
11652     my $string = shift;
11653     # This is slow on Cygwin, so give Cygwin users a warning
11654     if($^O eq "cygwin") {
11655         ::warning("Finding the maximal command line length. This may take up to 30 seconds.")
11656     }
11657     # Use an upper bound of 100 MB if the shell allows for infinite long lengths
11658     my $upper = 100_000_000;
11659     # 1000 is supported everywhere, so the search can start anywhere 1..999
11660     # 324 makes the search much faster on Cygwin, so let us use that
11661     my $len = 324;
11662     do {
11663         if($len > $upper) { return $len };
11664         $len *= 16;
11665     } while (is_acceptable_command_line_length($len,$string));
11666     # Then search for the actual max length between 0 and upper bound
11667     return binary_find_max(int($len/16),$len,$string);
11668 }
11669
11670 # Prototype forwarding
11671 sub binary_find_max($$$);
11672 sub binary_find_max($$$) {
11673     # Given a lower and upper bound find the max (length or args) of a command line
11674     # Returns:
11675     #   number of chars on the longest command line allowed
11676     my ($lower, $upper, $string) = (@_);
11677     if($lower == $upper or $lower == $upper-1) { return $lower; }
11678     my $middle = int (($upper-$lower)/2 + $lower);
11679     ::debug("init", "Maxlen: $lower<$middle<$upper: ");
11680     if (is_acceptable_command_line_length($middle,$string)) {
11681         return binary_find_max($middle,$upper,$string);
11682     } else {
11683         return binary_find_max($lower,$middle,$string);
11684     }
11685 }
11686
11687 sub is_acceptable_command_line_length($$) {
11688     # Test if a command line of this length can run
11689     # in the current environment
11690     # If the string is " x" it tests how many args are allowed
11691     # Returns:
11692     #   0 if the command line length is too long
11693     #   1 otherwise
11694     my $len = shift;
11695     my $string = shift;
11696     if($Global::parallel_env) {
11697         $len += length $Global::parallel_env;
11698     }
11699     # Force using non-built-in command
11700     ::qqx("/bin/echo ".${string}x(($len-length "/bin/echo ")/length $string));
11701     ::debug("init", "$len=$? ");
11702     return not $?;
11703 }
11704
11705 sub tmux_length($) {
11706     # If $opt::tmux set, find the limit for tmux
11707     # tmux 1.8 has a 2kB limit
11708     # tmux 1.9 has a 16kB limit
11709     # tmux 2.0 has a 16kB limit
11710     # tmux 2.1 has a 16kB limit
11711     # tmux 2.2 has a 16kB limit
11712     # Input:
11713     #   $len = maximal command line length
11714     # Returns:
11715     #   $tmux_len = maximal length runable in tmux
11716     local $/ = "\n";
11717     my $len = shift;
11718     if($opt::tmux) {
11719         $ENV{'PARALLEL_TMUX'} ||= "tmux";
11720         if(not ::which($ENV{'PARALLEL_TMUX'})) {
11721             ::error($ENV{'PARALLEL_TMUX'}." not found in \$PATH.");
11722             ::wait_and_exit(255);
11723         }
11724         my @out;
11725         for my $l (1, 2020, 16320, 100000, $len) {
11726             my $tmpfile = ::tmpname("tms");
11727             my $tmuxcmd = $ENV{'PARALLEL_TMUX'}.
11728                 " -S $tmpfile new-session -d -n echo $l".
11729                 ("x"x$l). " && echo $l; rm -f $tmpfile";
11730             push @out, ::qqx($tmuxcmd);
11731             ::rm($tmpfile);
11732         }
11733         ::debug("tmux","tmux-out ",@out);
11734         chomp @out;
11735         # The arguments is given 3 times on the command line
11736         # and the wrapping is around 30 chars
11737         # (29 for tmux1.9, 33 for tmux1.8)
11738         my $tmux_len = ::max(@out);
11739         $len = ::min($len,int($tmux_len/4-33));
11740         ::debug("tmux","tmux-length ",$len);
11741     }
11742     return $len;
11743 }
11744
11745
11746 package RecordQueue;
11747
11748 sub new($) {
11749     my $class = shift;
11750     my $fhs = shift;
11751     my $colsep = shift;
11752     my @unget = ();
11753     my $arg_sub_queue;
11754     if($opt::sqlworker) {
11755         # Open SQL table
11756         $arg_sub_queue = SQLRecordQueue->new();
11757     } elsif(defined $colsep) {
11758         # Open one file with colsep or CSV
11759         $arg_sub_queue = RecordColQueue->new($fhs);
11760     } else {
11761         # Open one or more files if multiple -a
11762         $arg_sub_queue = MultifileQueue->new($fhs);
11763     }
11764     return bless {
11765         'unget' => \@unget,
11766         'arg_number' => 0,
11767         'arg_sub_queue' => $arg_sub_queue,
11768     }, ref($class) || $class;
11769 }
11770
11771 sub get($) {
11772     # Returns:
11773     #   reference to array of Arg-objects
11774     my $self = shift;
11775     if(@{$self->{'unget'}}) {
11776         $self->{'arg_number'}++;
11777         # Flush cached computed replacements in Arg-objects
11778         # To fix: parallel --bar echo {%} ::: a b c ::: d e f
11779         my $ret = shift @{$self->{'unget'}};
11780         if($ret) {
11781             map { $_->flush_cache() } @$ret;
11782         }
11783         return $ret;
11784     }
11785     my $ret = $self->{'arg_sub_queue'}->get();
11786     if($ret) {
11787         if(grep { index($_->orig(),"\0") > 0 } @$ret) {
11788             # Allow for \0 in position 0 because GNU  Parallel uses "\0noarg"
11789             # to mean no-string
11790             ::warning("A NUL character in the input was replaced with \\0.",
11791                       "NUL cannot be passed through in the argument list.",
11792                       "Did you mean to use the --null option?");
11793             for(grep { index($_->orig(),"\0") > 0 } @$ret) {
11794                 # Replace \0 with \\0
11795                 my $a = $_->orig();
11796                 $a =~ s/\0/\\0/g;
11797                 $_->set_orig($a);
11798             }
11799         }
11800         if(defined $Global::max_number_of_args
11801            and $Global::max_number_of_args == 0) {
11802             ::debug("run", "Read 1 but return 0 args\n");
11803             # \0noarg => nothing (not the empty string)
11804             map { $_->set_orig("\0noarg"); } @$ret;
11805         }
11806         # Flush cached computed replacements in Arg-objects
11807         # To fix: parallel --bar echo {%} ::: a b c ::: d e f
11808         map { $_->flush_cache() } @$ret;
11809     }
11810     return $ret;
11811 }
11812
11813 sub unget($) {
11814     my $self = shift;
11815     ::debug("run", "RecordQueue-unget\n");
11816     $self->{'arg_number'} -= @_;
11817     unshift @{$self->{'unget'}}, @_;
11818 }
11819
11820 sub empty($) {
11821     my $self = shift;
11822     my $empty = (not @{$self->{'unget'}}) &&
11823         $self->{'arg_sub_queue'}->empty();
11824     ::debug("run", "RecordQueue->empty $empty");
11825     return $empty;
11826 }
11827
11828 sub arg_number($) {
11829     my $self = shift;
11830     return $self->{'arg_number'};
11831 }
11832
11833
11834 package RecordColQueue;
11835
11836 sub new($) {
11837     my $class = shift;
11838     my $fhs = shift;
11839     my @unget = ();
11840     my $arg_sub_queue = MultifileQueue->new($fhs);
11841     return bless {
11842         'unget' => \@unget,
11843         'arg_sub_queue' => $arg_sub_queue,
11844     }, ref($class) || $class;
11845 }
11846
11847 sub get($) {
11848     # Returns:
11849     #   reference to array of Arg-objects
11850     my $self = shift;
11851     if(@{$self->{'unget'}}) {
11852         return shift @{$self->{'unget'}};
11853     }
11854     my $unget_ref = $self->{'unget'};
11855     if($self->{'arg_sub_queue'}->empty()) {
11856         return undef;
11857     }
11858     my $in_record = $self->{'arg_sub_queue'}->get();
11859     if(defined $in_record) {
11860         my @out_record = ();
11861         for my $arg (@$in_record) {
11862             ::debug("run", "RecordColQueue::arg $arg\n");
11863             my $line = $arg->orig();
11864             ::debug("run", "line='$line'\n");
11865             if($line ne "") {
11866                 if($opt::csv) {
11867                     # Parse CSV
11868                     chomp $line;
11869                     if(not $Global::csv->parse($line)) {
11870                         die "CSV has unexpected format: ^$line^";
11871                     }
11872                     for($Global::csv->fields()) {
11873                         push @out_record, Arg->new($_);
11874                     }
11875                 } else {
11876                     for my $s (split /$opt::colsep/o, $line, -1) {
11877                         push @out_record, Arg->new($s);
11878                     }
11879                 }
11880             } else {
11881                 push @out_record, Arg->new("");
11882             }
11883         }
11884         return \@out_record;
11885     } else {
11886         return undef;
11887     }
11888 }
11889
11890 sub unget($) {
11891     my $self = shift;
11892     ::debug("run", "RecordColQueue-unget '@_'\n");
11893     unshift @{$self->{'unget'}}, @_;
11894 }
11895
11896 sub empty($) {
11897     my $self = shift;
11898     my $empty = (not @{$self->{'unget'}}) &&
11899         $self->{'arg_sub_queue'}->empty();
11900     ::debug("run", "RecordColQueue->empty $empty");
11901     return $empty;
11902 }
11903
11904
11905 package SQLRecordQueue;
11906
11907 sub new($) {
11908     my $class = shift;
11909     my @unget = ();
11910     return bless {
11911         'unget' => \@unget,
11912     }, ref($class) || $class;
11913 }
11914
11915 sub get($) {
11916     # Returns:
11917     #   reference to array of Arg-objects
11918     my $self = shift;
11919     if(@{$self->{'unget'}}) {
11920         return shift @{$self->{'unget'}};
11921     }
11922     return $Global::sql->get_record();
11923 }
11924
11925 sub unget($) {
11926     my $self = shift;
11927     ::debug("run", "SQLRecordQueue-unget '@_'\n");
11928     unshift @{$self->{'unget'}}, @_;
11929 }
11930
11931 sub empty($) {
11932     my $self = shift;
11933     if(@{$self->{'unget'}}) { return 0; }
11934     my $get = $self->get();
11935     if(defined $get) {
11936         $self->unget($get);
11937     }
11938     my $empty = not $get;
11939     ::debug("run", "SQLRecordQueue->empty $empty");
11940     return $empty;
11941 }
11942
11943
11944 package MultifileQueue;
11945
11946 @Global::unget_argv=();
11947
11948 sub new($$) {
11949     my $class = shift;
11950     my $fhs = shift;
11951     for my $fh (@$fhs) {
11952         if(-t $fh and -t ($Global::status_fd || *STDERR)) {
11953             ::warning("Input is read from the terminal. You are either an expert",
11954                       "(in which case: YOU ARE AWESOME!) or maybe you forgot",
11955                       "::: or :::: or -a or to pipe data into parallel. If so",
11956                       "consider going through the tutorial: man parallel_tutorial",
11957                       "Press CTRL-D to exit.");
11958         }
11959     }
11960     return bless {
11961         'unget' => \@Global::unget_argv,
11962         'fhs' => $fhs,
11963         'arg_matrix' => undef,
11964     }, ref($class) || $class;
11965 }
11966
11967 sub get($) {
11968     my $self = shift;
11969     if($opt::link) {
11970         return $self->link_get();
11971     } else {
11972         return $self->nest_get();
11973     }
11974 }
11975
11976 sub unget($) {
11977     my $self = shift;
11978     ::debug("run", "MultifileQueue-unget '@_'\n");
11979     unshift @{$self->{'unget'}}, @_;
11980 }
11981
11982 sub empty($) {
11983     my $self = shift;
11984     my $empty = (not @Global::unget_argv) &&
11985         not @{$self->{'unget'}};
11986     for my $fh (@{$self->{'fhs'}}) {
11987         $empty &&= eof($fh);
11988     }
11989     ::debug("run", "MultifileQueue->empty $empty ");
11990     return $empty;
11991 }
11992
11993 sub link_get($) {
11994     my $self = shift;
11995     if(@{$self->{'unget'}}) {
11996         return shift @{$self->{'unget'}};
11997     }
11998     my @record = ();
11999     my $prepend;
12000     my $empty = 1;
12001     for my $fh (@{$self->{'fhs'}}) {
12002         my $arg = read_arg_from_fh($fh);
12003         if(defined $arg) {
12004             # Record $arg for recycling at end of file
12005             push @{$self->{'arg_matrix'}{$fh}}, $arg;
12006             push @record, $arg;
12007             $empty = 0;
12008         } else {
12009             ::debug("run", "EOA ");
12010             # End of file: Recycle arguments
12011             push @{$self->{'arg_matrix'}{$fh}}, shift @{$self->{'arg_matrix'}{$fh}};
12012             # return last @{$args->{'args'}{$fh}};
12013             push @record, @{$self->{'arg_matrix'}{$fh}}[-1];
12014         }
12015     }
12016     if($empty) {
12017         return undef;
12018     } else {
12019         return \@record;
12020     }
12021 }
12022
12023 sub nest_get($) {
12024     my $self = shift;
12025     if(@{$self->{'unget'}}) {
12026         return shift @{$self->{'unget'}};
12027     }
12028     my @record = ();
12029     my $prepend;
12030     my $empty = 1;
12031     my $no_of_inputsources = $#{$self->{'fhs'}} + 1;
12032     if(not $self->{'arg_matrix'}) {
12033         # Initialize @arg_matrix with one arg from each file
12034         # read one line from each file
12035         my @first_arg_set;
12036         my $all_empty = 1;
12037         for (my $fhno = 0; $fhno < $no_of_inputsources ; $fhno++) {
12038             my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]);
12039             if(defined $arg) {
12040                 $all_empty = 0;
12041             }
12042             $self->{'arg_matrix'}[$fhno][0] = $arg || Arg->new("");
12043             push @first_arg_set, $self->{'arg_matrix'}[$fhno][0];
12044         }
12045         if($all_empty) {
12046             # All filehandles were at eof or eof-string
12047             return undef;
12048         }
12049         return [@first_arg_set];
12050     }
12051
12052     # Treat the case with one input source special.  For multiple
12053     # input sources we need to remember all previously read values to
12054     # generate all combinations. But for one input source we can
12055     # forget the value after first use.
12056     if($no_of_inputsources == 1) {
12057         my $arg = read_arg_from_fh($self->{'fhs'}[0]);
12058         if(defined($arg)) {
12059             return [$arg];
12060         }
12061         return undef;
12062     }
12063     for (my $fhno = $no_of_inputsources - 1; $fhno >= 0; $fhno--) {
12064         if(eof($self->{'fhs'}[$fhno])) {
12065             next;
12066         } else {
12067             # read one
12068             my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]);
12069             defined($arg) || next; # If we just read an EOF string: Treat this as EOF
12070             my $len = $#{$self->{'arg_matrix'}[$fhno]} + 1;
12071             $self->{'arg_matrix'}[$fhno][$len] = $arg;
12072             # make all new combinations
12073             my @combarg = ();
12074             for (my $fhn = 0; $fhn < $no_of_inputsources; $fhn++) {
12075                 push(@combarg, [0, $#{$self->{'arg_matrix'}[$fhn]}],
12076                      # Is input source --link'ed to the next?
12077                      $opt::linkinputsource[$fhn+1]);
12078             }
12079             # Find only combinations with this new entry
12080             $combarg[2*$fhno] = [$len,$len];
12081             # map combinations
12082             # [ 1, 3, 7 ], [ 2, 4, 1 ]
12083             # =>
12084             # [ m[0][1], m[1][3], m[2][7] ], [ m[0][2], m[1][4], m[2][1] ]
12085             my @mapped;
12086             for my $c (expand_combinations(@combarg)) {
12087                 my @a;
12088                 for my $n (0 .. $no_of_inputsources - 1 ) {
12089                     push @a,  $self->{'arg_matrix'}[$n][$$c[$n]];
12090                 }
12091                 push @mapped, \@a;
12092             }
12093             # append the mapped to the ungotten arguments
12094             push @{$self->{'unget'}}, @mapped;
12095             # get the first
12096             if(@mapped) {
12097                 return shift @{$self->{'unget'}};
12098             }
12099         }
12100     }
12101     # all are eof or at EOF string; return from the unget queue
12102     return shift @{$self->{'unget'}};
12103 }
12104
12105 sub read_arg_from_fh($) {
12106     # Read one Arg from filehandle
12107     # Returns:
12108     #   Arg-object with one read line
12109     #   undef if end of file
12110     my $fh = shift;
12111     my $prepend;
12112     my $arg;
12113     my $half_record = 0;
12114     do {{
12115         # This makes 10% faster
12116         if(not defined ($arg = <$fh>)) {
12117             if(defined $prepend) {
12118                 return Arg->new($prepend);
12119             } else {
12120                 return undef;
12121             }
12122         }
12123         if($opt::csv) {
12124             # We need to read a full CSV line.
12125             if(($arg =~ y/"/"/) % 2 ) {
12126                 # The number of " on the line is uneven:
12127                 # If we were in a half_record => we have a full record now
12128                 # If we were ouside a half_record => we are in a half record now
12129                 $half_record = not $half_record;
12130             }
12131             if($half_record) {
12132                 # CSV half-record with quoting:
12133                 #   col1,"col2 2""x3"" board newline  <-this one
12134                 #   cont",col3
12135                 $prepend .= $arg;
12136                 redo;
12137             } else {
12138                 # Now we have a full CSV record
12139             }
12140         }
12141         # Remove delimiter
12142         chomp $arg;
12143         if($Global::end_of_file_string and
12144            $arg eq $Global::end_of_file_string) {
12145             # Ignore the rest of input file
12146             close $fh;
12147             ::debug("run", "EOF-string ($arg) met\n");
12148             if(defined $prepend) {
12149                 return Arg->new($prepend);
12150             } else {
12151                 return undef;
12152             }
12153         }
12154         if(defined $prepend) {
12155             $arg = $prepend.$arg; # For line continuation
12156             undef $prepend;
12157         }
12158         if($Global::ignore_empty) {
12159             if($arg =~ /^\s*$/) {
12160                 redo; # Try the next line
12161             }
12162         }
12163         if($Global::max_lines) {
12164             if($arg =~ /\s$/) {
12165                 # Trailing space => continued on next line
12166                 $prepend = $arg;
12167                 redo;
12168             }
12169         }
12170     }} while (1 == 0); # Dummy loop {{}} for redo
12171     if(defined $arg) {
12172         return Arg->new($arg);
12173     } else {
12174         ::die_bug("multiread arg undefined");
12175     }
12176 }
12177
12178 # Prototype forwarding
12179 sub expand_combinations(@);
12180 sub expand_combinations(@) {
12181     # Input:
12182     #   ([xmin,xmax], [ymin,ymax], ...)
12183     # Returns: ([x,y,...],[x,y,...])
12184     # where xmin <= x <= xmax and ymin <= y <= ymax
12185     my $minmax_ref = shift;
12186     my $link = shift; # This is linked to the next input source
12187     my $xmin = $$minmax_ref[0];
12188     my $xmax = $$minmax_ref[1];
12189     my @p;
12190     if(@_) {
12191         my @rest = expand_combinations(@_);
12192         if($link) {
12193             # Linked to next col with --link/:::+/::::+
12194             # TODO BUG does not wrap values if not same number of vals
12195             push(@p, map { [$$_[0], @$_] }
12196                  grep { $xmin <= $$_[0] and $$_[0] <= $xmax } @rest);
12197         } else {
12198             # If there are more columns: Compute those recursively
12199             for(my $x = $xmin; $x <= $xmax; $x++) {
12200                 push @p, map { [$x, @$_] } @rest;
12201             }
12202         }
12203     } else {
12204         for(my $x = $xmin; $x <= $xmax; $x++) {
12205             push @p, [$x];
12206         }
12207     }
12208     return @p;
12209 }
12210
12211
12212 package Arg;
12213
12214 sub new($) {
12215     my $class = shift;
12216     my $orig = shift;
12217     my @hostgroups;
12218     if($opt::hostgroups) {
12219         if($orig =~ s:@(.+)::) {
12220             # We found hostgroups on the arg
12221             @hostgroups = split(/\+/, $1);
12222             if(not grep { defined $Global::hostgroups{$_} } @hostgroups) {
12223                 # This hostgroup is not defined using -S
12224                 # Add it
12225                 ::warning("Adding hostgroups: @hostgroups");
12226                 # Add sshlogin
12227                 for(grep { not defined $Global::hostgroups{$_} } @hostgroups) {
12228                     my $sshlogin = SSHLogin->new($_);
12229                     my $sshlogin_string = $sshlogin->string();
12230                     $Global::host{$sshlogin_string} = $sshlogin;
12231                     $Global::hostgroups{$sshlogin_string} = 1;
12232                 }
12233             }
12234         } else {
12235             # No hostgroup on the arg => any hostgroup
12236             @hostgroups = (keys %Global::hostgroups);
12237         }
12238     }
12239     return bless {
12240         'orig' => $orig,
12241         'hostgroups' => \@hostgroups,
12242     }, ref($class) || $class;
12243 }
12244
12245 sub Q($) {
12246     # Q alias for ::shell_quote_scalar
12247     my $ret = ::Q($_[0]);
12248     no warnings 'redefine';
12249     *Q = \&::Q;
12250     return $ret;
12251 }
12252
12253 sub pQ($) {
12254     # pQ alias for ::perl_quote_scalar
12255     my $ret = ::pQ($_[0]);
12256     no warnings 'redefine';
12257     *pQ = \&::pQ;
12258     return $ret;
12259 }
12260
12261 sub total_jobs() {
12262     return $Global::JobQueue->total_jobs();
12263 }
12264
12265 {
12266     my %perleval;
12267     my $job;
12268     sub skip() {
12269         # shorthand for $job->skip();
12270         $job->skip();
12271     }
12272     sub slot() {
12273         # shorthand for $job->slot();
12274         $job->slot();
12275     }
12276     sub seq() {
12277         # shorthand for $job->seq();
12278         $job->seq();
12279     }
12280     sub uq() {
12281         # Do not quote this arg
12282         $Global::unquote_arg = 1;
12283     }
12284
12285     sub replace($$$$) {
12286         # Calculates the corresponding value for a given perl expression
12287         # Returns:
12288         #   The calculated string (quoted if asked for)
12289         my $self = shift;
12290         my $perlexpr = shift; # E.g. $_=$_ or s/.gz//
12291         my $quote = shift; # should the string be quoted?
12292         # This is actually a CommandLine-object,
12293         # but it looks nice to be able to say {= $job->slot() =}
12294         $job = shift;
12295         $perlexpr =~ s/^(-?\d+)? *//; # Positional replace treated as normal replace
12296         if(not $Global::cache_replacement_eval
12297            or
12298            not $self->{'cache'}{$perlexpr}) {
12299             # Only compute the value once
12300             # Use $_ as the variable to change
12301             local $_;
12302             if($Global::trim eq "n") {
12303                 $_ = $self->{'orig'};
12304             } else {
12305                 # Trim the input
12306                 $_ = trim_of($self->{'orig'});
12307             }
12308             ::debug("replace", "eval ", $perlexpr, " ", $_, "\n");
12309             if(not $perleval{$perlexpr}) {
12310                 # Make an anonymous function of the $perlexpr
12311                 # And more importantly: Compile it only once
12312                 if($perleval{$perlexpr} =
12313                    eval('sub { no strict; no warnings; my $job = shift; '.
12314                         $perlexpr.' }')) {
12315                     # All is good
12316                 } else {
12317                     # The eval failed. Maybe $perlexpr is invalid perl?
12318                     ::error("Cannot use $perlexpr: $@");
12319                     ::wait_and_exit(255);
12320                 }
12321             }
12322             # Execute the function
12323             $perleval{$perlexpr}->($job);
12324             $self->{'cache'}{$perlexpr} = $_;
12325             if($Global::unquote_arg) {
12326                 # uq() was called in perlexpr
12327                 $self->{'cache'}{'unquote'}{$perlexpr} = 1;
12328                 # Reset for next perlexpr
12329                 $Global::unquote_arg = 0;
12330             }
12331         }
12332         # Return the value quoted if needed
12333         if($self->{'cache'}{'unquote'}{$perlexpr}) {
12334             return($self->{'cache'}{$perlexpr});
12335         } else {
12336             return($quote ? Q($self->{'cache'}{$perlexpr})
12337                    : $self->{'cache'}{$perlexpr});
12338         }
12339     }
12340 }
12341
12342 sub flush_cache($) {
12343     # Flush cache of computed values
12344     my $self = shift;
12345     $self->{'cache'} = undef;
12346 }
12347
12348 sub orig($) {
12349     my $self = shift;
12350     return $self->{'orig'};
12351 }
12352
12353 sub set_orig($$) {
12354     my $self = shift;
12355     $self->{'orig'} = shift;
12356 }
12357
12358 sub trim_of($) {
12359     # Removes white space as specifed by --trim:
12360     # n = nothing
12361     # l = start
12362     # r = end
12363     # lr|rl = both
12364     # Returns:
12365     #   string with white space removed as needed
12366     my @strings = map { defined $_ ? $_ : "" } (@_);
12367     my $arg;
12368     if($Global::trim eq "n") {
12369         # skip
12370     } elsif($Global::trim eq "l") {
12371         for my $arg (@strings) { $arg =~ s/^\s+//; }
12372     } elsif($Global::trim eq "r") {
12373         for my $arg (@strings) { $arg =~ s/\s+$//; }
12374     } elsif($Global::trim eq "rl" or $Global::trim eq "lr") {
12375         for my $arg (@strings) { $arg =~ s/^\s+//; $arg =~ s/\s+$//; }
12376     } else {
12377         ::error("--trim must be one of: r l rl lr.");
12378         ::wait_and_exit(255);
12379     }
12380     return wantarray ? @strings : "@strings";
12381 }
12382
12383
12384 package TimeoutQueue;
12385
12386 sub new($) {
12387     my $class = shift;
12388     my $delta_time = shift;
12389     my ($pct);
12390     if($delta_time =~ /(\d+(\.\d+)?)%/) {
12391         # Timeout in percent
12392         $pct = $1/100;
12393         $delta_time = 1_000_000;
12394     }
12395     $delta_time = ::multiply_time_units($delta_time);
12396
12397     return bless {
12398         'queue' => [],
12399         'delta_time' => $delta_time,
12400         'pct' => $pct,
12401         'remedian_idx' => 0,
12402         'remedian_arr' => [],
12403         'remedian' => undef,
12404     }, ref($class) || $class;
12405 }
12406
12407 sub delta_time($) {
12408     my $self = shift;
12409     return $self->{'delta_time'};
12410 }
12411
12412 sub set_delta_time($$) {
12413     my $self = shift;
12414     $self->{'delta_time'} = shift;
12415 }
12416
12417 sub remedian($) {
12418     my $self = shift;
12419     return $self->{'remedian'};
12420 }
12421
12422 sub set_remedian($$) {
12423     # Set median of the last 999^3 (=997002999) values using Remedian
12424     #
12425     # Rousseeuw, Peter J., and Gilbert W. Bassett Jr. "The remedian: A
12426     # robust averaging method for large data sets." Journal of the
12427     # American Statistical Association 85.409 (1990): 97-104.
12428     my $self = shift;
12429     my $val = shift;
12430     my $i = $self->{'remedian_idx'}++;
12431     my $rref = $self->{'remedian_arr'};
12432     $rref->[0][$i%999] = $val;
12433     $rref->[1][$i/999%999] = (sort @{$rref->[0]})[$#{$rref->[0]}/2];
12434     $rref->[2][$i/999/999%999] = (sort @{$rref->[1]})[$#{$rref->[1]}/2];
12435     $self->{'remedian'} = (sort @{$rref->[2]})[$#{$rref->[2]}/2];
12436 }
12437
12438 sub update_median_runtime($) {
12439     # Update delta_time based on runtime of finished job if timeout is
12440     # a percentage
12441     my $self = shift;
12442     my $runtime = shift;
12443     if($self->{'pct'}) {
12444         $self->set_remedian($runtime);
12445         $self->{'delta_time'} = $self->{'pct'} * $self->remedian();
12446         ::debug("run", "Timeout: $self->{'delta_time'}s ");
12447     }
12448 }
12449
12450 sub process_timeouts($) {
12451     # Check if there was a timeout
12452     my $self = shift;
12453     # $self->{'queue'} is sorted by start time
12454     while (@{$self->{'queue'}}) {
12455         my $job = $self->{'queue'}[0];
12456         if($job->endtime()) {
12457             # Job already finished. No need to timeout the job
12458             # This could be because of --keep-order
12459             shift @{$self->{'queue'}};
12460         } elsif($job->is_timedout($self->{'delta_time'})) {
12461             # Need to shift off queue before kill
12462             # because kill calls usleep that calls process_timeouts
12463             shift @{$self->{'queue'}};
12464             ::warning("This job was killed because it timed out:",
12465                 $job->replaced());
12466             $job->kill();
12467         } else {
12468             # Because they are sorted by start time the rest are later
12469             last;
12470         }
12471     }
12472 }
12473
12474 sub insert($) {
12475     my $self = shift;
12476     my $in = shift;
12477     push @{$self->{'queue'}}, $in;
12478 }
12479
12480
12481 package SQL;
12482
12483 sub new($) {
12484     my $class = shift;
12485     my $dburl = shift;
12486     $Global::use{"DBI"} ||= eval "use DBI; 1;";
12487     # +DBURL = append to this DBURL
12488     my $append = $dburl=~s/^\+//;
12489     my %options = parse_dburl(get_alias($dburl));
12490     my %driveralias = ("sqlite" => "SQLite",
12491                        "sqlite3" => "SQLite",
12492                        "pg" => "Pg",
12493                        "postgres" => "Pg",
12494                        "postgresql" => "Pg",
12495                        "csv" => "CSV",
12496                        "oracle" => "Oracle",
12497                        "ora" => "Oracle");
12498     my $driver = $driveralias{$options{'databasedriver'}} ||
12499         $options{'databasedriver'};
12500     my $database = $options{'database'};
12501     my $host = $options{'host'} ? ";host=".$options{'host'} : "";
12502     my $port = $options{'port'} ? ";port=".$options{'port'} : "";
12503     my $dsn = "DBI:$driver:dbname=$database$host$port";
12504     my $userid = $options{'user'};
12505     my $password = $options{'password'};;
12506     if(not grep /$driver/, DBI->available_drivers) {
12507         ::error("$driver not supported. Are you missing a perl DBD::$driver module?");
12508         ::wait_and_exit(255);
12509     }
12510     my $dbh;
12511     if($driver eq "CSV") {
12512         # CSV does not use normal dsn
12513         if(-d $database) {
12514             $dbh = DBI->connect("dbi:CSV:", "", "", { f_dir => "$database", })
12515                 or die $DBI::errstr;
12516         } else {
12517             ::error("$database is not a directory.");
12518             ::wait_and_exit(255);
12519         }
12520     } else {
12521         $dbh = DBI->connect($dsn, $userid, $password,
12522                            { RaiseError => 1, AutoInactiveDestroy => 1 })
12523         or die $DBI::errstr;
12524     }
12525     $dbh->{'PrintWarn'} = $Global::debug || 0;
12526     $dbh->{'PrintError'} = $Global::debug || 0;
12527     $dbh->{'RaiseError'} = 1;
12528     $dbh->{'ShowErrorStatement'} = 1;
12529     $dbh->{'HandleError'} = sub {};
12530     if(not defined $options{'table'}) {
12531         ::error("The DBURL ($dburl) must contain a table.");
12532         ::wait_and_exit(255);
12533     }
12534
12535     return bless {
12536         'dbh' => $dbh,
12537         'driver' => $driver,
12538         'max_number_of_args' => undef,
12539         'table' => $options{'table'},
12540         'append' => $append,
12541     }, ref($class) || $class;
12542 }
12543
12544 # Prototype forwarding
12545 sub get_alias($);
12546 sub get_alias($) {
12547     my $alias = shift;
12548     $alias =~ s/^(sql:)*//; # Accept aliases prepended with sql:
12549     if ($alias !~ /^:/) {
12550         return $alias;
12551     }
12552
12553     # Find the alias
12554     my $path;
12555     if (-l $0) {
12556         ($path) = readlink($0) =~ m|^(.*)/|;
12557     } else {
12558         ($path) = $0 =~ m|^(.*)/|;
12559     }
12560
12561     my @deprecated = ("$ENV{HOME}/.dburl.aliases",
12562                       "$path/dburl.aliases", "$path/dburl.aliases.dist");
12563     for (@deprecated) {
12564         if(-r $_) {
12565             ::warning("$_ is deprecated. ".
12566                       "Use .sql/aliases instead (read man sql).");
12567         }
12568     }
12569     my @urlalias=();
12570     check_permissions("$ENV{HOME}/.sql/aliases");
12571     check_permissions("$ENV{HOME}/.dburl.aliases");
12572     my @search = ("$ENV{HOME}/.sql/aliases",
12573                   "$ENV{HOME}/.dburl.aliases", "/etc/sql/aliases",
12574                   "$path/dburl.aliases", "$path/dburl.aliases.dist");
12575     for my $alias_file (@search) {
12576         # local $/ needed if -0 set
12577         local $/ = "\n";
12578         if(-r $alias_file) {
12579             open(my $in, "<", $alias_file) || die;
12580             push @urlalias, <$in>;
12581             close $in;
12582         }
12583     }
12584     my ($alias_part,$rest) = $alias=~/(:\w*)(.*)/;
12585     # If we saw this before: we have an alias loop
12586     if(grep {$_ eq $alias_part } @Private::seen_aliases) {
12587         ::error("$alias_part is a cyclic alias.");
12588         exit -1;
12589     } else {
12590         push @Private::seen_aliases, $alias_part;
12591     }
12592
12593     my $dburl;
12594     for (@urlalias) {
12595         /^$alias_part\s+(\S+.*)/ and do { $dburl = $1; last; }
12596     }
12597
12598     if($dburl) {
12599         return get_alias($dburl.$rest);
12600     } else {
12601         ::error("$alias is not defined in @search");
12602         exit(-1);
12603     }
12604 }
12605
12606 sub check_permissions($) {
12607     my $file = shift;
12608
12609     if(-e $file) {
12610         if(not -o $file) {
12611             my $username = (getpwuid($<))[0];
12612             ::warning("$file should be owned by $username: ".
12613                       "chown $username $file");
12614         }
12615         my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
12616             $atime,$mtime,$ctime,$blksize,$blocks) = stat($file);
12617         if($mode & 077) {
12618             my $username = (getpwuid($<))[0];
12619             ::warning("$file should be only be readable by $username: ".
12620                       "chmod 600 $file");
12621         }
12622     }
12623 }
12624
12625 sub parse_dburl($) {
12626     my $url = shift;
12627     my %options = ();
12628     # sql:mysql://[[user][:password]@][host][:port]/[database[/table][?query]]
12629
12630     if($url=~m!^(?:sql:)? # You can prefix with 'sql:'
12631                ((?:oracle|ora|mysql|pg|postgres|postgresql)(?:s|ssl|)|
12632                  (?:sqlite|sqlite2|sqlite3|csv)):// # Databasedriver ($1)
12633                (?:
12634                 ([^:@/][^:@]*|) # Username ($2)
12635                 (?:
12636                  :([^@]*) # Password ($3)
12637                 )?
12638                @)?
12639                ([^:/]*)? # Hostname ($4)
12640                (?:
12641                 :
12642                 ([^/]*)? # Port ($5)
12643                )?
12644                (?:
12645                 /
12646                 ([^/?]*)? # Database ($6)
12647                )?
12648                (?:
12649                 /
12650                 ([^?]*)? # Table ($7)
12651                )?
12652                (?:
12653                 \?
12654                 (.*)? # Query ($8)
12655                )?
12656               $!ix) {
12657         $options{databasedriver} = ::undef_if_empty(lc(uri_unescape($1)));
12658         $options{user} = ::undef_if_empty(uri_unescape($2));
12659         $options{password} = ::undef_if_empty(uri_unescape($3));
12660         $options{host} = ::undef_if_empty(uri_unescape($4));
12661         $options{port} = ::undef_if_empty(uri_unescape($5));
12662         $options{database} = ::undef_if_empty(uri_unescape($6));
12663         $options{table} = ::undef_if_empty(uri_unescape($7));
12664         $options{query} = ::undef_if_empty(uri_unescape($8));
12665         ::debug("sql", "dburl $url\n");
12666         ::debug("sql", "databasedriver ", $options{databasedriver},
12667                 " user ", $options{user},
12668                 " password ", $options{password}, " host ", $options{host},
12669                 " port ", $options{port}, " database ", $options{database},
12670                 " table ", $options{table}, " query ", $options{query}, "\n");
12671     } else {
12672         ::error("$url is not a valid DBURL");
12673         exit 255;
12674     }
12675     return %options;
12676 }
12677
12678 sub uri_unescape($) {
12679     # Copied from http://cpansearch.perl.org/src/GAAS/URI-1.55/URI/Escape.pm
12680     # to avoid depending on URI::Escape
12681     # This section is (C) Gisle Aas.
12682     # Note from RFC1630:  "Sequences which start with a percent sign
12683     # but are not followed by two hexadecimal characters are reserved
12684     # for future extension"
12685     my $str = shift;
12686     if (@_ && wantarray) {
12687         # not executed for the common case of a single argument
12688         my @str = ($str, @_);  # need to copy
12689         foreach (@str) {
12690             s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
12691         }
12692         return @str;
12693     }
12694     $str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str;
12695     $str;
12696 }
12697
12698 sub run($) {
12699     my $self = shift;
12700     my $stmt = shift;
12701     if($self->{'driver'} eq "CSV") {
12702         $stmt=~ s/;$//;
12703         if($stmt eq "BEGIN" or
12704            $stmt eq "COMMIT") {
12705             return undef;
12706         }
12707     }
12708     my @retval;
12709     my $dbh = $self->{'dbh'};
12710     ::debug("sql","$opt::sqlmaster$opt::sqlworker run $stmt\n");
12711     # Execute with the rest of the args - if any
12712     my $rv;
12713     my $sth;
12714     my $lockretry = 0;
12715     while($lockretry < 10) {
12716         $sth = $dbh->prepare($stmt);
12717         if($sth
12718            and
12719            eval { $rv = $sth->execute(@_) }) {
12720             last;
12721         } else {
12722             if($@ =~ /no such table|Table .* doesn.t exist|relation ".*" does not exist/
12723                or
12724                $DBI::errstr =~ /no such table|Table .* doesn.t exist|relation ".*" does not exist/) {
12725                 # This is fine:
12726                 # It is just a worker that reported back too late -
12727                 # another worker had finished the job first
12728                 # and the table was then dropped
12729                 $rv = $sth = 0;
12730                 last;
12731             }
12732             if($DBI::errstr =~ /locked/) {
12733                 ::debug("sql", "Lock retry: $lockretry");
12734                 $lockretry++;
12735                 ::usleep(rand()*300);
12736             } elsif(not $sth) {
12737                 # Try again
12738                 $lockretry++;
12739             } else {
12740                 ::error($DBI::errstr);
12741                 ::wait_and_exit(255);
12742             }
12743         }
12744     }
12745     if($lockretry >= 10) {
12746         ::die_bug("retry > 10: $DBI::errstr");
12747     }
12748     if($rv < 0 and $DBI::errstr){
12749         ::error($DBI::errstr);
12750         ::wait_and_exit(255);
12751     }
12752     return $sth;
12753 }
12754
12755 sub get($) {
12756     my $self = shift;
12757     my $sth = $self->run(@_);
12758     my @retval;
12759     # If $sth = 0 it means the table was dropped by another process
12760     while($sth) {
12761         my @row = $sth->fetchrow_array();
12762         @row or last;
12763         push @retval, \@row;
12764     }
12765     return \@retval;
12766 }
12767
12768 sub table($) {
12769     my $self = shift;
12770     return $self->{'table'};
12771 }
12772
12773 sub append($) {
12774     my $self = shift;
12775     return $self->{'append'};
12776 }
12777
12778 sub update($) {
12779     my $self = shift;
12780     my $stmt = shift;
12781     my $table = $self->table();
12782     $self->run("UPDATE $table $stmt",@_);
12783 }
12784
12785 sub output($) {
12786     my $self = shift;
12787     my $commandline = shift;
12788
12789     $self->update("SET Stdout = ?, Stderr = ? WHERE Seq = ".
12790                   $commandline->seq(),
12791                   join("",@{$commandline->{'output'}{1}}),
12792                   join("",@{$commandline->{'output'}{2}}));
12793 }
12794
12795 sub max_number_of_args($) {
12796     # Maximal number of args for this table
12797     my $self = shift;
12798     if(not $self->{'max_number_of_args'}) {
12799         # Read the number of args from the SQL table
12800         my $table = $self->table();
12801         my $v = $self->get("SELECT * FROM $table LIMIT 1;");
12802         my @reserved_columns = qw(Seq Host Starttime JobRuntime Send
12803             Receive Exitval _Signal Command Stdout Stderr);
12804         if(not $v) {
12805             ::error("$table contains no records");
12806         }
12807         # Count the number of Vx columns
12808         $self->{'max_number_of_args'} = $#{$v->[0]} - $#reserved_columns;
12809     }
12810     return $self->{'max_number_of_args'};
12811 }
12812
12813 sub set_max_number_of_args($$) {
12814     my $self = shift;
12815     $self->{'max_number_of_args'} = shift;
12816 }
12817
12818 sub create_table($) {
12819     my $self = shift;
12820     if($self->append()) { return; }
12821     my $max_number_of_args = shift;
12822     $self->set_max_number_of_args($max_number_of_args);
12823     my $table = $self->table();
12824     $self->run(qq(DROP TABLE IF EXISTS $table;));
12825     # BIGINT and TEXT are not supported in these databases or are too small
12826     my %vartype = (
12827         "Oracle" => { "BIGINT" => "NUMBER(19,0)",
12828                       "TEXT" => "CLOB", },
12829         "mysql" => { "TEXT" => "BLOB", },
12830         "CSV" => { "BIGINT" => "INT",
12831                    "FLOAT" => "REAL", },
12832         );
12833     my $BIGINT = $vartype{$self->{'driver'}}{"BIGINT"} || "BIGINT";
12834     my $TEXT = $vartype{$self->{'driver'}}{"TEXT"} || "TEXT";
12835     my $FLOAT = $vartype{$self->{'driver'}}{"FLOAT"} || "FLOAT(44)";
12836     my $v_def = join "", map { "V$_ $TEXT," } (1..$self->max_number_of_args());
12837     $self->run(qq{CREATE TABLE $table
12838                 (Seq $BIGINT,
12839                  Host $TEXT,
12840                  Starttime $FLOAT,
12841                  JobRuntime $FLOAT,
12842                  Send $BIGINT,
12843                  Receive $BIGINT,
12844                  Exitval $BIGINT,
12845                  _Signal $BIGINT,
12846                  Command $TEXT,}.
12847                $v_def.
12848                qq{Stdout $TEXT,
12849                  Stderr $TEXT);});
12850 }
12851
12852 sub insert_records($) {
12853     my $self = shift;
12854     my $seq = shift;
12855     my $command_ref = shift;
12856     my $record_ref = shift;
12857     my $table = $self->table();
12858     # For SQL encode the command with \257 space as split points
12859     my $command = join("\257 ",@$command_ref);
12860     my @v_cols = map { ", V$_" } (1..$self->max_number_of_args());
12861     # Two extra value due to $seq, Exitval, Send
12862     my $v_vals = join ",", map { "?" } (1..$self->max_number_of_args()+4);
12863     $self->run("INSERT INTO $table (Seq,Command,Exitval,Send @v_cols) ".
12864                "VALUES ($v_vals);", $seq, $command, -1000,
12865                0, @$record_ref[1..$#$record_ref]);
12866 }
12867
12868
12869 sub get_record($) {
12870     my $self = shift;
12871     my @retval;
12872     my $table = $self->table();
12873     my @v_cols = map { ", V$_" } (1..$self->max_number_of_args());
12874     my $rand = "Reserved-".$$.rand();
12875     my $v;
12876     my $more_pending;
12877
12878     do {
12879         if($self->{'driver'} eq "CSV") {
12880             # Sub SELECT is not supported in CSV
12881             # So to minimize the race condition below select a job at random
12882             my $r = $self->get("SELECT Seq, Command @v_cols FROM $table ".
12883                                "WHERE Exitval = -1000 LIMIT 100;");
12884             $v = [ sort { rand() > 0.5 } @$r ];
12885         } else {
12886             # Avoid race condition where multiple workers get the same job
12887             # by setting Stdout to a unique string
12888             # (SELECT * FROM (...) AS dummy) is needed due to sillyness in MySQL
12889             $self->update("SET Stdout = ?,Exitval = ? ".
12890                           "WHERE Seq = (".
12891                           "  SELECT * FROM (".
12892                           "    SELECT min(Seq) FROM $table WHERE Exitval = -1000".
12893                           "  ) AS dummy".
12894                           ") AND Exitval = -1000;", $rand, -1210);
12895             # If a parallel worker overwrote the unique string this will get nothing
12896             $v = $self->get("SELECT Seq, Command @v_cols FROM $table ".
12897                             "WHERE Stdout = ?;", $rand);
12898         }
12899         if($v->[0]) {
12900             my $val_ref = $v->[0];
12901             # Mark record as taken
12902             my $seq = shift @$val_ref;
12903             # Save the sequence number to use when running the job
12904             $SQL::next_seq = $seq;
12905             $self->update("SET Exitval = ? WHERE Seq = ".$seq, -1220);
12906             # Command is encoded with '\257 space' as splitting char
12907             my @command = split /\257 /, shift @$val_ref;
12908             $SQL::command_ref = \@command;
12909             for (@$val_ref) {
12910                 push @retval, Arg->new($_);
12911             }
12912         } else {
12913             # If the record was updated by another job in parallel,
12914             # then we may not be done, so see if there are more jobs pending
12915             $more_pending =
12916                 $self->get("SELECT Seq FROM $table WHERE Exitval = ?;", -1210);
12917         }
12918     } while (not $v->[0] and $more_pending->[0]);
12919
12920     if(@retval) {
12921         return \@retval;
12922     } else {
12923         return undef;
12924     }
12925 }
12926
12927 sub total_jobs($) {
12928     my $self = shift;
12929     my $table = $self->table();
12930     my $v = $self->get("SELECT count(*) FROM $table;");
12931     if($v->[0]) {
12932         return $v->[0]->[0];
12933     } else {
12934         ::die_bug("SQL::total_jobs");
12935     }
12936 }
12937
12938 sub max_seq($) {
12939     my $self = shift;
12940     my $table = $self->table();
12941     my $v = $self->get("SELECT max(Seq) FROM $table;");
12942     if($v->[0]) {
12943         return $v->[0]->[0];
12944     } else {
12945         ::die_bug("SQL::max_seq");
12946     }
12947 }
12948
12949 sub finished($) {
12950     # Check if there are any jobs left in the SQL table that do not
12951     # have a "real" exitval
12952     my $self = shift;
12953     if($opt::wait or $Global::start_sqlworker) {
12954         my $table = $self->table();
12955         my $rv = $self->get("select Seq,Exitval from $table ".
12956                             "where Exitval <= -1000 limit 1");
12957         return not $rv->[0];
12958     } else {
12959         return 1;
12960     }
12961 }
12962
12963 package Semaphore;
12964
12965 # This package provides a counting semaphore
12966 #
12967 # If a process dies without releasing the semaphore the next process
12968 # that needs that entry will clean up dead semaphores
12969 #
12970 # The semaphores are stored in $PARALLEL_HOME/semaphores/id-<name> Each
12971 # file in $PARALLEL_HOME/semaphores/id-<name>/ is the process ID of the
12972 # process holding the entry. If the process dies, the entry can be
12973 # taken by another process.
12974
12975 sub new($) {
12976     my $class = shift;
12977     my $id = shift;
12978     my $count = shift;
12979     $id =~ s/([^-_a-z0-9])/unpack("H*",$1)/ige; # Convert non-word chars to hex
12980     $id = "id-".$id; # To distinguish it from a process id
12981     my $parallel_locks = $Global::cache_dir . "/semaphores";
12982     -d $parallel_locks or ::mkdir_or_die($parallel_locks);
12983     my $lockdir = "$parallel_locks/$id";
12984     my $lockfile = $lockdir.".lock";
12985     if(-d $parallel_locks and -w $parallel_locks
12986        and -r $parallel_locks and -x $parallel_locks) {
12987         # skip
12988     } else {
12989         ::error("Semaphoredir must be writable: '$parallel_locks'");
12990         ::wait_and_exit(255);
12991     }
12992
12993     if($count < 1) { ::die_bug("semaphore-count: $count"); }
12994     return bless {
12995         'lockfile' => $lockfile,
12996         'lockfh' => Symbol::gensym(),
12997         'lockdir' => $lockdir,
12998         'id' => $id,
12999         'idfile' => $lockdir."/".$id,
13000         'pid' => $$,
13001         'pidfile' => $lockdir."/".$$.'@'.::hostname(),
13002         'count' => $count + 1 # nlinks returns a link for the 'id-' as well
13003     }, ref($class) || $class;
13004 }
13005
13006 sub remove_dead_locks($) {
13007     my $self = shift;
13008     my $lockdir = $self->{'lockdir'};
13009
13010     for my $d (glob "$lockdir/*") {
13011         $d =~ m:$lockdir/([0-9]+)\@([-\._a-z0-9]+)$:o or next;
13012         my ($pid, $host) = ($1, $2);
13013         if($host eq ::hostname()) {
13014             if(kill 0, $pid) {
13015                 ::debug("sem", "Alive: $pid $d\n");
13016             } else {
13017                 ::debug("sem", "Dead: $d\n");
13018                 ::rm($d);
13019             }
13020         }
13021     }
13022 }
13023
13024 sub acquire($) {
13025     my $self = shift;
13026     my $sleep = 1; # 1 ms
13027     my $start_time = time;
13028     while(1) {
13029         # Can we get a lock?
13030         $self->atomic_link_if_count_less_than() and last;
13031         $self->remove_dead_locks();
13032         # Retry slower and slower up to 1 second
13033         $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep);
13034         # Random to avoid every sleeping job waking up at the same time
13035         ::usleep(rand()*$sleep);
13036         if($opt::semaphoretimeout) {
13037             if($opt::semaphoretimeout > 0
13038                and
13039                time - $start_time > $opt::semaphoretimeout) {
13040                 # Timeout: Take the semaphore anyway
13041                 ::warning("Semaphore timed out. Stealing the semaphore.");
13042                 if(not -e $self->{'idfile'}) {
13043                     open (my $fh, ">", $self->{'idfile'}) or
13044                         ::die_bug("timeout_write_idfile: $self->{'idfile'}");
13045                     close $fh;
13046                 }
13047                 link $self->{'idfile'}, $self->{'pidfile'};
13048                 last;
13049             }
13050             if($opt::semaphoretimeout < 0
13051                and
13052                time - $start_time > -$opt::semaphoretimeout) {
13053                 # Timeout: Exit
13054                 ::warning("Semaphore timed out. Exiting.");
13055                 exit(1);
13056                 last;
13057             }
13058         }
13059     }
13060     ::debug("sem", "acquired $self->{'pid'}\n");
13061 }
13062
13063 sub release($) {
13064     my $self = shift;
13065     ::rm($self->{'pidfile'});
13066     if($self->nlinks() == 1) {
13067         # This is the last link, so atomic cleanup
13068         $self->lock();
13069         if($self->nlinks() == 1) {
13070             ::rm($self->{'idfile'});
13071             rmdir $self->{'lockdir'};
13072         }
13073         $self->unlock();
13074     }
13075     ::debug("run", "released $self->{'pid'}\n");
13076 }
13077
13078 sub pid_change($) {
13079     # This should do what release()+acquire() would do without having
13080     # to re-acquire the semaphore
13081     my $self = shift;
13082
13083     my $old_pidfile =  $self->{'pidfile'};
13084     $self->{'pid'} = $$;
13085     $self->{'pidfile'} = $self->{'lockdir'}."/".$$.'@'.::hostname();
13086     my $retval = link $self->{'idfile'}, $self->{'pidfile'};
13087     ::debug("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n");
13088     ::rm($old_pidfile);
13089 }
13090
13091 sub atomic_link_if_count_less_than($) {
13092     # Link $file1 to $file2 if nlinks to $file1 < $count
13093     my $self = shift;
13094     my $retval = 0;
13095     $self->lock();
13096     my $nlinks = $self->nlinks();
13097     ::debug("sem","$nlinks<$self->{'count'} ");
13098     if($nlinks < $self->{'count'}) {
13099         -d $self->{'lockdir'} or ::mkdir_or_die($self->{'lockdir'});
13100         if(not -e $self->{'idfile'}) {
13101             open (my $fh, ">", $self->{'idfile'}) or
13102                 ::die_bug("write_idfile: $self->{'idfile'}");
13103             close $fh;
13104         }
13105         $retval = link $self->{'idfile'}, $self->{'pidfile'};
13106         ::debug("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n");
13107     }
13108     $self->unlock();
13109     ::debug("sem", "atomic $retval");
13110     return $retval;
13111 }
13112
13113 sub nlinks($) {
13114     my $self = shift;
13115     if(-e $self->{'idfile'}) {
13116         return (stat(_))[3];
13117     } else {
13118         return 0;
13119     }
13120 }
13121
13122 sub lock($) {
13123     my $self = shift;
13124     my $sleep = 100; # 100 ms
13125     my $total_sleep = 0;
13126     $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
13127     my $locked = 0;
13128     while(not $locked) {
13129         if(tell($self->{'lockfh'}) == -1) {
13130             # File not open
13131             open($self->{'lockfh'}, ">", $self->{'lockfile'})
13132                 or ::debug("run", "Cannot open $self->{'lockfile'}");
13133         }
13134         if($self->{'lockfh'}) {
13135             # File is open
13136             chmod 0666, $self->{'lockfile'}; # assuming you want it a+rw
13137             if(flock($self->{'lockfh'}, LOCK_EX()|LOCK_NB())) {
13138                 # The file is locked: No need to retry
13139                 $locked = 1;
13140                 last;
13141             } else {
13142                 if ($! =~ m/Function not implemented/) {
13143                     ::warning("flock: $!",
13144                               "Will wait for a random while.");
13145                     ::usleep(rand(5000));
13146                     # File cannot be locked: No need to retry
13147                     $locked = 2;
13148                     last;
13149                 }
13150             }
13151         }
13152         # Locking failed in first round
13153         # Sleep and try again
13154         $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep);
13155         # Random to avoid every sleeping job waking up at the same time
13156         ::usleep(rand()*$sleep);
13157         $total_sleep += $sleep;
13158         if($opt::semaphoretimeout) {
13159             if($opt::semaphoretimeout > 0
13160                and
13161                $total_sleep/1000 > $opt::semaphoretimeout) {
13162                 # Timeout: Take the semaphore anyway
13163                 ::warning("Semaphore timed out. Taking the semaphore.");
13164                 $locked = 3;
13165                 last;
13166             }
13167             if($opt::semaphoretimeout < 0
13168                and
13169                $total_sleep/1000 > -$opt::semaphoretimeout) {
13170                 # Timeout: Exit
13171                 ::warning("Semaphore timed out. Exiting.");
13172                 $locked = 4;
13173                 last;
13174             }
13175         } else {
13176             if($total_sleep/1000 > 30) {
13177                 ::warning("Semaphore stuck for 30 seconds. ".
13178                           "Consider using --semaphoretimeout.");
13179             }
13180         }
13181     }
13182     ::debug("run", "locked $self->{'lockfile'}");
13183 }
13184
13185 sub unlock($) {
13186     my $self = shift;
13187     ::rm($self->{'lockfile'});
13188     close $self->{'lockfh'};
13189     ::debug("run", "unlocked\n");
13190 }
13191
13192 # Keep perl -w happy
13193
13194 $opt::x = $Semaphore::timeout = $Semaphore::wait =
13195 $Job::file_descriptor_warning_printed = $Global::envdef = @Arg::arg =
13196 $Global::max_slot_number = $opt::session;
13197
13198 package main;
13199
13200 sub main() {
13201     save_stdin_stdout_stderr();
13202     save_original_signal_handler();
13203     parse_options();
13204     ::debug("init", "Open file descriptors: ", join(" ",keys %Global::fd), "\n");
13205     my $number_of_args;
13206     if($Global::max_number_of_args) {
13207         $number_of_args = $Global::max_number_of_args;
13208     } elsif ($opt::X or $opt::m or $opt::xargs) {
13209         $number_of_args = undef;
13210     } else {
13211         $number_of_args = 1;
13212     }
13213
13214     my @command = @ARGV;
13215     my @input_source_fh;
13216     if($opt::pipepart) {
13217         if($opt::tee) {
13218             @input_source_fh = map { open_or_exit($_) } @opt::a;
13219             # Remove the first: It will be the file piped.
13220             shift @input_source_fh;
13221             if(not @input_source_fh and not $opt::pipe) {
13222                 @input_source_fh = (*STDIN);
13223             }
13224         } else {
13225             # -a is used for data - not for command line args
13226             @input_source_fh = map { open_or_exit($_) } "/dev/null";
13227         }
13228     } else {
13229         @input_source_fh = map { open_or_exit($_) } @opt::a;
13230         if(not @input_source_fh and not $opt::pipe) {
13231             @input_source_fh = (*STDIN);
13232         }
13233     }
13234
13235     if($opt::skip_first_line) {
13236         # Skip the first line for the first file handle
13237         my $fh = $input_source_fh[0];
13238         <$fh>;
13239     }
13240
13241     set_input_source_header(\@command,\@input_source_fh);
13242     if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) {
13243         # Parallel check all hosts are up. Remove hosts that are down
13244         filter_hosts();
13245     }
13246
13247
13248     if($opt::sqlmaster and $opt::sqlworker) {
13249         # Start a real --sqlworker in the background later
13250         $Global::start_sqlworker = 1;
13251         $opt::sqlworker = undef;
13252     }
13253
13254     if($opt::nonall or $opt::onall) {
13255         onall(\@input_source_fh,@command);
13256         wait_and_exit(min(undef_as_zero($Global::exitstatus),254));
13257     }
13258
13259     $Global::JobQueue = JobQueue->new(
13260         \@command,\@input_source_fh,$Global::ContextReplace,
13261         $number_of_args,\@Global::transfer_files,\@Global::ret_files);
13262
13263     if($opt::sqlmaster) {
13264         # Create SQL table to hold joblog + output
13265         # Figure out how many arguments are in a job
13266         # (It is affected by --colsep, -N, $number_source_fh)
13267         my $record_queue = $Global::JobQueue->{'commandlinequeue'}{'arg_queue'};
13268         my $record = $record_queue->get();
13269         my $no_of_values = $number_of_args * (1+$#{$record});
13270         $record_queue->unget($record);
13271         $Global::sql->create_table($no_of_values);
13272         if($opt::sqlworker) {
13273             # Start a real --sqlworker in the background later
13274             $Global::start_sqlworker = 1;
13275             $opt::sqlworker = undef;
13276         }
13277     }
13278
13279     if($opt::pipepart) {
13280         pipepart_setup();
13281     } elsif($opt::pipe and $opt::tee) {
13282         pipe_tee_setup();
13283     } elsif($opt::pipe and $opt::shard or $opt::bin) {
13284         pipe_shard_setup();
13285     }
13286
13287     if(not $opt::pipepart and $opt::groupby) {
13288         group_by_stdin_filter();
13289     }
13290     if($opt::eta or $opt::bar or $opt::shuf or $Global::halt_pct) {
13291         # Count the number of jobs or shuffle all jobs
13292         # before starting any.
13293         # Must be done after ungetting any --pipepart jobs.
13294         $Global::JobQueue->total_jobs();
13295     }
13296     # Compute $Global::max_jobs_running
13297     # Must be done after ungetting any --pipepart jobs.
13298     max_jobs_running();
13299     init_run_jobs();
13300     my $sem;
13301     if($Global::semaphore) {
13302         $sem = acquire_semaphore();
13303     }
13304     $SIG{TERM} = $Global::original_sig{TERM};
13305     $SIG{HUP} = \&start_no_new_jobs;
13306
13307     if($opt::tee or $opt::shard or $opt::bin) {
13308         # All jobs must be running in parallel for --tee/--shard/--bin
13309         while(start_more_jobs()) {}
13310         $Global::start_no_new_jobs = 1;
13311         if(not $Global::JobQueue->empty()) {
13312             if($opt::tee) {
13313                 ::error("--tee requires --jobs to be higher. Try --jobs 0.");
13314             } elsif($opt::bin) {
13315                 ::error("--bin requires --jobs to be higher than the number of",
13316                         "arguments. Increase --jobs.");
13317             } elsif($opt::shard) {
13318                 ::error("--shard requires --jobs to be higher than the number of",
13319                         "arguments. Increase --jobs.");
13320             } else {
13321                 ::die_bug("--bin/--shard/--tee should not get here");
13322             }
13323             ::wait_and_exit(255);
13324         }
13325     } elsif($opt::pipe and not $opt::pipepart) {
13326         # Fill all jobslots
13327         while(start_more_jobs()) {}
13328         spreadstdin();
13329     } else {
13330         # Reap one - start one
13331         while(reaper() + start_more_jobs()) {}
13332     }
13333     ::debug("init", "Start draining\n");
13334     drain_job_queue(@command);
13335     ::debug("init", "Done draining\n");
13336     reapers();
13337     ::debug("init", "Done reaping\n");
13338     if($Global::semaphore) {
13339         $sem->release();
13340     }
13341     cleanup();
13342     ::debug("init", "Halt\n");
13343     halt();
13344 }
13345
13346 main();