parallel: --(n)onall and --sshdelay fixed.
[parallel.git] / src / parallel
blob97682ba2b9e08f7a8c8e892c360b61184804600e
1 #!/usr/bin/env perl
3 # Copyright (C) 2007-2020 Ole Tange, http://ole.tange.dk and Free
4 # Software Foundation, Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, see <http://www.gnu.org/licenses/>
18 # or write to the Free Software Foundation, Inc., 51 Franklin St,
19 # Fifth Floor, Boston, MA 02110-1301 USA
21 # open3 used in Job::start
22 use IPC::Open3;
23 use POSIX;
24 # gensym used in Job::start
25 use Symbol qw(gensym);
26 # tempfile used in Job::start
27 use File::Temp qw(tempfile tempdir);
28 # mkpath used in openresultsfile
29 use File::Path;
30 # GetOptions used in get_options_from_array
31 use Getopt::Long;
32 # Used to ensure code quality
33 use strict;
34 use File::Basename;
36 sub set_input_source_header($$) {
37 my ($command_ref,$input_source_fh_ref) = @_;
38 if($opt::header and not $opt::pipe) {
39 # split with colsep or \t
40 # $header force $colsep = \t if undef?
41 my $delimiter = defined $opt::colsep ? $opt::colsep : "\t";
42 # regexp for {=
43 my $left = "\Q$Global::parensleft\E";
44 my $l = $Global::parensleft;
45 # regexp for =}
46 my $right = "\Q$Global::parensright\E";
47 my $r = $Global::parensright;
48 my $id = 1;
49 for my $fh (@$input_source_fh_ref) {
50 my $line = <$fh>;
51 chomp($line);
52 $line =~ s/\r$//;
53 ::debug("init", "Delimiter: '$delimiter'");
54 for my $s (split /$delimiter/o, $line) {
55 ::debug("init", "Colname: '$s'");
56 # Replace {colname} with {2}
57 for(@$command_ref, @Global::ret_files,
58 @Global::transfer_files, $opt::tagstring,
59 $opt::workdir, $opt::results, $opt::retries) {
60 # Skip if undefined
61 $_ or next;
62 s:\{$s(|/|//|\.|/\.)\}:\{$id$1\}:g;
63 # {=header1 ... =} => {=1 ... =}
64 s:$left $s (.*?) $right:$l$id$1$r:gx;
66 $Global::input_source_header{$id} = $s;
67 $id++;
70 } else {
71 my $id = 1;
72 for my $fh (@$input_source_fh_ref) {
73 $Global::input_source_header{$id} = $id;
74 $id++;
79 sub max_jobs_running() {
80 # Compute $Global::max_jobs_running as the max number of jobs
81 # running on each sshlogin.
82 # Returns:
83 # $Global::max_jobs_running
84 if(not $Global::max_jobs_running) {
85 for my $sshlogin (values %Global::host) {
86 $sshlogin->max_jobs_running();
89 if(not $Global::max_jobs_running) {
90 ::error("Cannot run any jobs.");
91 wait_and_exit(255);
93 return $Global::max_jobs_running;
96 sub halt() {
97 # Compute exit value,
98 # wait for children to complete
99 # and exit
100 if($opt::halt and $Global::halt_when ne "never") {
101 if(not defined $Global::halt_exitstatus) {
102 if($Global::halt_pct) {
103 $Global::halt_exitstatus =
104 ::ceil($Global::total_failed /
105 ($Global::total_started || 1) * 100);
106 } elsif($Global::halt_count) {
107 $Global::halt_exitstatus =
108 ::min(undef_as_zero($Global::total_failed),101);
111 wait_and_exit($Global::halt_exitstatus);
112 } else {
113 wait_and_exit(min(undef_as_zero($Global::exitstatus),101));
118 sub __PIPE_MODE__() {}
121 sub pipepart_setup() {
122 # Compute the blocksize
123 # Generate the commands to extract the blocks
124 # Push the commands on queue
125 # Changes:
126 # @Global::cat_prepends
127 # $Global::JobQueue
128 if($opt::tee) {
129 # Prepend each command with
130 # < file
131 my $cat_string = "< ".Q($opt::a[0]);
132 for(1..$Global::JobQueue->total_jobs()) {
133 push @Global::cat_appends, $cat_string;
134 push @Global::cat_prepends, "";
136 } else {
137 if(not $opt::blocksize) {
138 # --blocksize with 10 jobs per jobslot
139 $opt::blocksize = -10;
141 if($opt::roundrobin) {
142 # --blocksize with 1 job per jobslot
143 $opt::blocksize = -1;
145 if($opt::blocksize < 0) {
146 my $size = 0;
147 # Compute size of -a
148 for(@opt::a) {
149 if(-f $_) {
150 $size += -s $_;
151 } elsif(-b $_) {
152 $size += size_of_block_dev($_);
153 } elsif(-e $_) {
154 ::error("$_ is neither a file nor a block device");
155 wait_and_exit(255);
156 } else {
157 ::error("File not found: $_");
158 wait_and_exit(255);
161 # Run in total $job_slots*(- $blocksize) jobs
162 # Set --blocksize = size / no of proc / (- $blocksize)
163 $Global::dummy_jobs = 1;
164 $Global::blocksize = 1 +
165 int($size / max_jobs_running() /
166 -multiply_binary_prefix($opt::blocksize));
168 @Global::cat_prepends = map { pipe_part_files($_) } @opt::a;
169 # Unget the empty arg as many times as there are parts
170 $Global::JobQueue->{'commandlinequeue'}{'arg_queue'}->unget(
171 map { [Arg->new("\0noarg")] } @Global::cat_prepends
176 sub pipe_tee_setup() {
177 # Create temporary fifos
178 # Run 'tee fifo1 fifo2 fifo3 ... fifoN' in the background
179 # This will spread the input to fifos
180 # Generate commands that reads from fifo1..N:
181 # cat fifo | user_command
182 # Changes:
183 # @Global::cat_prepends
184 my @fifos;
185 for(1..$Global::JobQueue->total_jobs()) {
186 push @fifos, tmpfifo();
188 # cat foo | tee fifo1 fifo2 fifo3 fifo4 fifo5 > /dev/null
189 if(not fork()){
190 # Test if tee supports --output-error=warn-nopipe
191 `echo | tee --output-error=warn-nopipe /dev/null >/dev/null 2>/dev/null`;
192 my $opt = $? ? "" : "--output-error=warn-nopipe";
193 ::debug("init","tee $opt");
194 # Let tee inherit our stdin
195 # and redirect stdout to null
196 open STDOUT, ">","/dev/null";
197 if($opt) {
198 exec "tee", $opt, @fifos;
199 } else {
200 exec "tee", @fifos;
203 # For each fifo
204 # (rm fifo1; grep 1) < fifo1
205 # (rm fifo2; grep 2) < fifo2
206 # (rm fifo3; grep 3) < fifo3
207 # Remove the tmpfifo as soon as it is open
208 @Global::cat_prepends = map { "(rm $_;" } @fifos;
209 @Global::cat_appends = map { ") < $_" } @fifos;
213 sub parcat_script() {
214 # TODO if script fails: Use parallel -j0 --plain --lb cat ::: fifos
215 my $script = q'{
216 use POSIX qw(:errno_h);
217 use IO::Select;
218 use strict;
219 use threads;
220 use Thread::Queue;
221 use Fcntl qw(:DEFAULT :flock);
223 my $opened :shared;
224 my $q = Thread::Queue->new();
225 my $okq = Thread::Queue->new();
226 my @producers;
228 if(not @ARGV) {
229 if(-t *STDIN) {
230 print "Usage:\n";
231 print " parcat file(s)\n";
232 print " cat argfile | parcat\n";
233 } else {
234 # Read arguments from stdin
235 chomp(@ARGV = <STDIN>);
238 my $files_to_open = 0;
239 # Default: fd = stdout
240 my $fd = 1;
241 for (@ARGV) {
242 # --rm = remove file when opened
243 /^--rm$/ and do { $opt::rm = 1; next; };
244 # -1 = output to fd 1, -2 = output to fd 2
245 /^-(\d+)$/ and do { $fd = $1; next; };
246 push @producers, threads->create("producer", $_, $fd);
247 $files_to_open++;
250 sub producer {
251 # Open a file/fifo, set non blocking, enqueue fileno of the file handle
252 my $file = shift;
253 my $output_fd = shift;
254 open(my $fh, "<", $file) || do {
255 print STDERR "parcat: Cannot open $file\n";
256 exit(1);
258 # Remove file when it has been opened
259 if($opt::rm) {
260 unlink $file;
262 set_fh_non_blocking($fh);
263 $opened++;
264 # Pass the fileno to parent
265 $q->enqueue(fileno($fh),$output_fd);
266 # Get an OK that the $fh is opened and we can release the $fh
267 while(1) {
268 my $ok = $okq->dequeue();
269 if($ok == fileno($fh)) { last; }
270 # Not ours - very unlikely to happen
271 $okq->enqueue($ok);
273 return;
276 my $s = IO::Select->new();
277 my %buffer;
279 sub add_file {
280 my $infd = shift;
281 my $outfd = shift;
282 open(my $infh, "<&=", $infd) || die;
283 open(my $outfh, ">&=", $outfd) || die;
284 $s->add($infh);
285 # Tell the producer now opened here and can be released
286 $okq->enqueue($infd);
287 # Initialize the buffer
288 @{$buffer{$infh}{$outfd}} = ();
289 $Global::fh{$outfd} = $outfh;
292 sub add_files {
293 # Non-blocking dequeue
294 my ($infd,$outfd);
295 do {
296 ($infd,$outfd) = $q->dequeue_nb(2);
297 if(defined($outfd)) {
298 add_file($infd,$outfd);
300 } while(defined($outfd));
303 sub add_files_block {
304 # Blocking dequeue
305 my ($infd,$outfd) = $q->dequeue(2);
306 add_file($infd,$outfd);
310 my $fd;
311 my (@ready,$infh,$rv,$buf);
312 do {
313 # Wait until at least one file is opened
314 add_files_block();
315 while($q->pending or keys %buffer) {
316 add_files();
317 while(keys %buffer) {
318 @ready = $s->can_read(0.01);
319 if(not @ready) {
320 add_files();
322 for $infh (@ready) {
323 # There is only one key, namely the output file descriptor
324 for my $outfd (keys %{$buffer{$infh}}) {
325 $rv = sysread($infh, $buf, 65536);
326 if (!$rv) {
327 if($! == EAGAIN) {
328 # Would block: Nothing read
329 next;
330 } else {
331 # Nothing read, but would not block:
332 # This file is done
333 $s->remove($infh);
334 for(@{$buffer{$infh}{$outfd}}) {
335 syswrite($Global::fh{$outfd},$_);
337 delete $buffer{$infh};
338 # Closing the $infh causes it to block
339 # close $infh;
340 add_files();
341 next;
344 # Something read.
345 # Find \n or \r for full line
346 my $i = (rindex($buf,"\n")+1);
347 if($i) {
348 # Print full line
349 for(@{$buffer{$infh}{$outfd}}, substr($buf,0,$i)) {
350 syswrite($Global::fh{$outfd},$_);
352 # @buffer = remaining half line
353 $buffer{$infh}{$outfd} = [substr($buf,$i,$rv-$i)];
354 } else {
355 # Something read, but not a full line
356 push @{$buffer{$infh}{$outfd}}, $buf;
358 redo;
363 } while($opened < $files_to_open);
365 for (@producers) {
366 $_->join();
369 sub set_fh_non_blocking {
370 # Set filehandle as non-blocking
371 # Inputs:
372 # $fh = filehandle to be blocking
373 # Returns:
374 # N/A
375 my $fh = shift;
376 my $flags;
377 fcntl($fh, &F_GETFL, $flags) || die $!; # Get the current flags on the filehandle
378 $flags |= &O_NONBLOCK; # Add non-blocking to the flags
379 fcntl($fh, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle
382 return ::spacefree(3, $script);
385 sub sharder_script() {
386 my $script = q{
387 use B;
388 # Column separator
389 my $sep = shift;
390 # Which columns to shard on (count from 1)
391 my $col = shift;
392 # Which columns to shard on (count from 0)
393 my $col0 = $col - 1;
394 # Perl expression
395 my $perlexpr = shift;
396 my $bins = @ARGV;
397 # Open fifos for writing, fh{0..$bins}
398 my $t = 0;
399 my %fh;
400 for(@ARGV) {
401 open $fh{$t++}, ">", $_;
402 # open blocks until it is opened by reader
403 # so unlink only happens when it is ready
404 unlink $_;
406 if($perlexpr) {
407 my $subref = eval("sub { no strict; no warnings; $perlexpr }");
408 while(<STDIN>) {
409 # Split into $col columns (no need to split into more)
410 @F = split $sep, $_, $col+1;
412 local $_ = $F[$col0];
413 &$subref();
414 $fh = $fh{ hex(B::hash($_))%$bins };
416 print $fh $_;
418 } else {
419 while(<STDIN>) {
420 # Split into $col columns (no need to split into more)
421 @F = split $sep, $_, $col+1;
422 $fh = $fh{ hex(B::hash($F[$col0]))%$bins };
423 print $fh $_;
426 # Close all open fifos
427 close values %fh;
429 return ::spacefree(1, $script);
432 sub binner_script() {
433 my $script = q{
434 use B;
435 # Column separator
436 my $sep = shift;
437 # Which columns to shard on (count from 1)
438 my $col = shift;
439 # Which columns to shard on (count from 0)
440 my $col0 = $col - 1;
441 # Perl expression
442 my $perlexpr = shift;
443 my $bins = @ARGV;
444 # Open fifos for writing, fh{0..$bins}
445 my $t = 0;
446 my %fh;
447 # Let the last output fifo be the 0'th
448 open $fh{$t++}, ">", pop @ARGV;
449 for(@ARGV) {
450 open $fh{$t++}, ">", $_;
451 # open blocks until it is opened by reader
452 # so unlink only happens when it is ready
453 unlink $_;
455 if($perlexpr) {
456 my $subref = eval("sub { no strict; no warnings; $perlexpr }");
457 while(<STDIN>) {
458 # Split into $col columns (no need to split into more)
459 @F = split $sep, $_, $col+1;
461 local $_ = $F[$col0];
462 &$subref();
463 $fh = $fh{ $_%$bins };
465 print $fh $_;
467 } else {
468 while(<STDIN>) {
469 # Split into $col columns (no need to split into more)
470 @F = split $sep, $_, $col+1;
471 $fh = $fh{ $F[$col0]%$bins };
472 print $fh $_;
475 # Close all open fifos
476 close values %fh;
478 return ::spacefree(1, $script);
481 sub pipe_shard_setup() {
482 # Create temporary fifos
483 # Run 'shard.pl sep col fifo1 fifo2 fifo3 ... fifoN' in the background
484 # This will spread the input to fifos
485 # Generate commands that reads from fifo1..N:
486 # cat fifo | user_command
487 # Changes:
488 # @Global::cat_prepends
489 my @shardfifos;
490 my @parcatfifos;
491 # TODO $opt::jobs should be evaluated (100%)
492 # TODO $opt::jobs should be number of total_jobs if there are argugemts
493 my $njobs = $opt::jobs;
494 for my $m (0..$njobs-1) {
495 for my $n (0..$njobs-1) {
496 # sharding to A B C D
497 # parcatting all As together
498 $parcatfifos[$n][$m] = $shardfifos[$m][$n] = tmpfifo();
501 my $shardbin = ($opt::shard || $opt::bin);
502 my $script;
503 if($opt::bin) {
504 $script = binner_script();
505 } else {
506 $script = sharder_script();
509 # cat foo | sharder sep col fifo1 fifo2 fifo3 ... fifoN
511 if($shardbin =~ /^[a-z_][a-z_0-9]*(\s|$)/i) {
512 # Group by column name
513 # (Yes, this will also wrongly match a perlexpr like: chop)
514 my($read,$char,@line);
515 # A full line, but nothing more (the rest must be read by the child)
516 # $Global::header used to prepend block to each job
517 do {
518 $read = sysread(STDIN,$char,1);
519 push @line, $char;
520 } while($read and $char ne "\n");
521 $Global::header = join "", @line;
523 my ($col, $perlexpr, $subref) =
524 column_perlexpr($shardbin, $Global::header, $opt::colsep);
525 if(not fork()) {
526 # Let the sharder inherit our stdin
527 # and redirect stdout to null
528 open STDOUT, ">","/dev/null";
529 # The PERL_HASH_SEED must be the same for all sharders
530 # so B::hash will return the same value for any given input
531 $ENV{'PERL_HASH_SEED'} = $$;
532 exec qw(parallel --block 100k -q --pipe -j), $njobs,
533 qw(--roundrobin -u perl -e), $script, ($opt::colsep || ","),
534 $col, $perlexpr, '{}', (map { (':::+', @{$_}) } @parcatfifos);
536 # For each fifo
537 # (rm fifo1; grep 1) < fifo1
538 # (rm fifo2; grep 2) < fifo2
539 # (rm fifo3; grep 3) < fifo3
540 my $parcat = Q(parcat_script());
541 if(not $parcat) {
542 ::error("'parcat' must be in path.");
543 ::wait_and_exit(255);
545 @Global::cat_prepends = map { "perl -e $parcat @$_ | " } @parcatfifos;
548 sub pipe_part_files(@) {
549 # Given the bigfile
550 # find header and split positions
551 # make commands that 'cat's the partial file
552 # Input:
553 # $file = the file to read
554 # Returns:
555 # @commands that will cat_partial each part
556 my ($file) = @_;
557 my $buf = "";
558 if(not -f $file and not -b $file) {
559 ::error("$file is not a seekable file.");
560 ::wait_and_exit(255);
562 my $header = find_header(\$buf,open_or_exit($file));
563 # find positions
564 my @pos = find_split_positions($file,$Global::blocksize,$header);
565 # Make @cat_prepends
566 my @cat_prepends = ();
567 for(my $i=0; $i<$#pos; $i++) {
568 push(@cat_prepends,
569 cat_partial($file, 0, length($header), $pos[$i], $pos[$i+1]));
571 return @cat_prepends;
574 sub find_header($$) {
575 # Compute the header based on $opt::header
576 # Input:
577 # $buf_ref = reference to read-in buffer
578 # $fh = filehandle to read from
579 # Uses:
580 # $opt::header
581 # $Global::blocksize
582 # $Global::header
583 # Returns:
584 # $header string
585 my ($buf_ref, $fh) = @_;
586 my $header = "";
587 # $Global::header may be set in group_by_loop()
588 if($Global::header) { return $Global::header }
589 if($opt::header) {
590 if($opt::header eq ":") { $opt::header = "(.*\n)"; }
591 # Number = number of lines
592 $opt::header =~ s/^(\d+)$/"(.*\n)"x$1/e;
593 while(sysread($fh,$$buf_ref,$Global::blocksize,length $$buf_ref)) {
594 if($$buf_ref =~ s/^($opt::header)//) {
595 $header = $1;
596 last;
600 return $header;
603 sub find_split_positions($$$) {
604 # Find positions in bigfile where recend is followed by recstart
605 # Input:
606 # $file = the file to read
607 # $block = (minimal) --block-size of each chunk
608 # $header = header to be skipped
609 # Uses:
610 # $opt::recstart
611 # $opt::recend
612 # Returns:
613 # @positions of block start/end
614 my($file, $block, $header) = @_;
615 my $headerlen = length $header;
616 my $size = -s $file;
617 if(-b $file) {
618 # $file is a blockdevice
619 $size = size_of_block_dev($file);
621 $block = int $block;
622 if($opt::groupby) {
623 return split_positions_for_group_by($file,$size,$block,$header);
625 # The optimal dd blocksize for mint, redhat, solaris, openbsd = 2^17..2^20
626 # The optimal dd blocksize for freebsd = 2^15..2^17
627 my $dd_block_size = 131072; # 2^17
628 my @pos;
629 my ($recstart,$recend) = recstartrecend();
630 my $recendrecstart = $recend.$recstart;
631 my $fh = ::open_or_exit($file);
632 push(@pos,$headerlen);
633 for(my $pos = $block+$headerlen; $pos < $size; $pos += $block) {
634 my $buf;
635 if($recendrecstart eq "") {
636 # records ends anywhere
637 push(@pos,$pos);
638 } else {
639 # Seek the the block start
640 if(not sysseek($fh, $pos, 0)) {
641 ::error("Cannot seek to $pos in $file");
642 edit(255);
644 while(sysread($fh,$buf,$dd_block_size,length $buf)) {
645 if($opt::regexp) {
646 # If match /$recend$recstart/ => Record position
647 if($buf =~ m:^(.*$recend)$recstart:os) {
648 # Start looking for next record _after_ this match
649 $pos += length($1);
650 push(@pos,$pos);
651 last;
653 } else {
654 # If match $recend$recstart => Record position
655 # TODO optimize to only look at the appended
656 # $dd_block_size + len $recendrecstart
657 # TODO increase $dd_block_size to optimize for longer records
658 my $i = index64(\$buf,$recendrecstart);
659 if($i != -1) {
660 # Start looking for next record _after_ this match
661 $pos += $i + length($recend);
662 push(@pos,$pos);
663 last;
669 if($pos[$#pos] != $size) {
670 # Last splitpoint was not at end of the file: add $size as the last
671 push @pos, $size;
673 close $fh;
674 return @pos;
677 sub split_positions_for_group_by($$$$) {
678 my($fh);
679 sub value_at($) {
680 my $pos = shift;
681 if($pos != 0) {
682 seek($fh, $pos-1, 0) || die;
683 # Read half line
684 <$fh>;
686 # Read full line
687 my $linepos = tell($fh);
688 $_ = <$fh>;
689 if(defined $_) {
690 # Not end of file
691 my @F;
692 if(defined $group_by::col) {
693 $opt::colsep ||= "\t";
694 @F = split /$opt::colsep/, $_;
695 $_ = $F[$group_by::col];
697 eval $group_by::perlexpr;
699 return ($_,$linepos);
702 sub binary_search_end($$$) {
703 my ($s,$spos,$epos) = @_;
704 # value_at($spos) == $s
705 # value_at($epos) != $s
706 my $posdif = $epos - $spos;
707 my ($v,$vpos);
708 while($posdif) {
709 ($v,$vpos) = value_at($spos+$posdif);
710 if($v eq $s) {
711 $spos = $vpos;
712 $posdif = $epos - $spos;
713 } else {
714 $epos = $vpos;
716 $posdif = int($posdif/2);
718 return($v,$vpos);
721 sub binary_search_start($$$) {
722 my ($s,$spos,$epos) = @_;
723 # value_at($spos) != $s
724 # value_at($epos) == $s
725 my $posdif = $epos - $spos;
726 my ($v,$vpos);
727 while($posdif) {
728 ($v,$vpos) = value_at($spos+$posdif);
729 if($v eq $s) {
730 $epos = $vpos;
731 } else {
732 $spos = $vpos;
733 $posdif = $epos - $spos;
735 $posdif = int($posdif/2);
737 return($v,$vpos);
740 my ($file,$size,$block,$header) = @_;
741 my ($a,$b,$c,$apos,$bpos,$cpos);
742 my @pos;
743 $fh = open_or_exit($file);
744 # Set $Global::group_by_column $Global::group_by_perlexpr
745 group_by_loop($fh,$opt::recsep);
746 # $xpos = linestart, $x = value at $xpos, $apos < $bpos < $cpos
747 $apos = length $header;
748 for(($a,$apos) = value_at($apos); $apos < $size;) {
749 push @pos, $apos;
750 $bpos = $apos + $block;
751 ($b,$bpos) = value_at($bpos);
752 if(eof($fh)) {
753 push @pos, $size; last;
755 $cpos = $bpos + $block;
756 ($c,$cpos) = value_at($cpos);
757 if($a eq $b) {
758 while($b eq $c) {
759 # Move bpos, cpos a block forward until $a == $b != $c
760 $bpos = $cpos;
761 $cpos += $block;
762 ($c,$cpos) = value_at($cpos);
763 if($cpos >= $size) {
764 $cpos = $size;
765 last;
768 # $a == $b != $c
769 # Binary search for $b ending between ($bpos,$cpos)
770 ($b,$bpos) = binary_search_end($b,$bpos,$cpos);
771 } else {
772 if($b eq $c) {
773 # $a != $b == $c
774 # Binary search for $b starting between ($apos,$bpos)
775 ($b,$bpos) = binary_search_start($b,$apos,$bpos);
776 } else {
777 # $a != $b != $c
778 # Binary search for $b ending between ($bpos,$cpos)
779 ($b,$bpos) = binary_search_end($b,$bpos,$cpos);
782 ($a,$apos) = ($b,$bpos);
784 if($pos[$#pos] != $size) {
785 # Last splitpoint was not at end of the file: add it
786 push @pos, $size;
788 return @pos;
791 sub cat_partial($@) {
792 # Efficient command to copy from byte X to byte Y
793 # Input:
794 # $file = the file to read
795 # ($start, $end, [$start2, $end2, ...]) = start byte, end byte
796 # Returns:
797 # Efficient command to copy $start..$end, $start2..$end2, ... to stdout
798 my($file, @start_end) = @_;
799 my($start, $i);
800 # Convert (start,end) to (start,len)
801 my @start_len = map {
802 if(++$i % 2) { $start = $_; } else { $_-$start }
803 } @start_end;
804 # This can read 7 GB/s using a single core
805 my $script = spacefree
808 while(@ARGV) {
809 sysseek(STDIN,shift,0) || die;
810 $left = shift;
811 while($read =
812 sysread(STDIN,$buf, $left > 131072 ? 131072 : $left)){
813 $left -= $read;
814 syswrite(STDOUT,$buf);
818 return "<". Q($file) .
819 " perl -e '$script' @start_len |";
822 sub column_perlexpr($$$) {
823 # Compute the column number (if any), perlexpression from combined
824 # string (such as --shard key, --groupby key, {=n perlexpr=}
825 # Input:
826 # $column_perlexpr = string with column and perl expression
827 # $header = header from input file (if column is column name)
828 # $colsep = column separator regexp
829 # Returns:
830 # $col = column number
831 # $perlexpr = perl expression
832 # $subref = compiled perl expression as sub reference
833 my ($column_perlexpr, $header, $colsep) = @_;
834 my ($col, $perlexpr, $subref);
835 if($column_perlexpr =~ /^[-a-z0-9_]+(\s|$)/i) {
836 # Column name/number (possibly prefix)
837 if($column_perlexpr =~ s/^(-?\d+)(\s|$)//) {
838 # Column number (possibly prefix)
839 $col = $1;
840 } elsif($column_perlexpr =~ s/^([a-z0-9_]+)(\s+|$)//i) {
841 # Column name (possibly prefix)
842 my $colname = $1;
843 # Split on --copsep pattern
844 my @headers = split /$colsep/, $header;
845 my %headers;
846 @headers{@headers} = (1..($#headers+1));
847 $col = $headers{$colname};
848 if(not defined $col) {
849 ::error("Column '$colname' $colsep not found in header",keys %headers);
850 ::wait_and_exit(255);
854 # What is left of $column_perlexpr is $perlexpr (possibly empty)
855 $perlexpr = $column_perlexpr;
856 $subref = eval("sub { no strict; no warnings; $perlexpr }");
857 return($col, $perlexpr, $subref);
860 sub group_by_loop($$) {
861 # Generate perl code for group-by loop
862 # Insert a $recsep when the column value changes
863 # The column value can be computed with $perexpr
864 my($fh,$recsep) = @_;
865 my $groupby = $opt::groupby;
866 if($groupby =~ /^[a-z_][a-z_0-9]*(\s|$)/i) {
867 # Group by column name
868 # (Yes, this will also wrongly match a perlexpr like: chop)
869 my($read,$char,@line);
870 # A full line, but nothing more (the rest must be read by the child)
871 # $Global::header used to prepend block to each job
872 do {
873 $read = sysread($fh,$char,1);
874 push @line, $char;
875 } while($read and $char ne "\n");
876 $Global::header = join "", @line;
878 $opt::colsep ||= "\t";
879 ($group_by::col, $group_by::perlexpr, $group_by::subref) =
880 column_perlexpr($groupby, $Global::header, $opt::colsep);
881 # Numbered 0..n-1 due to being used by $F[n]
882 if($group_by::col) { $group_by::col--; }
884 my $loop = ::spacefree(0,q{
885 BEGIN{ $last = "RECSEP"; }
887 local $_=COLVALUE;
888 PERLEXPR;
889 if(($last) ne $_) {
890 print "RECSEP";
891 $last = $_;
895 if(defined $group_by::col) {
896 $loop =~ s/COLVALUE/\$F[$group_by::col]/g;
897 } else {
898 $loop =~ s/COLVALUE/\$_/g;
900 $loop =~ s/PERLEXPR/$group_by::perlexpr/g;
901 $loop =~ s/RECSEP/$recsep/g;
902 return $loop;
905 sub group_by_stdin_filter() {
906 # Record separator with 119 bit random value
907 $opt::recend = '';
908 $opt::recstart =
909 join "", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..20);
910 $opt::remove_rec_sep = 1;
911 my @filter;
912 push @filter, "perl";
913 if($opt::groupby =~ /^[a-z0-9_]+(\s|$)/i) {
914 # This is column number/name
915 # Use -a (auto-split)
916 push @filter, "-a";
917 $opt::colsep ||= "\t";
918 my $sep = $opt::colsep;
919 $sep =~ s/\t/\\t/g;
920 $sep =~ s/\"/\\"/g;
921 push @filter, "-F$sep";
923 push @filter, "-pe";
924 push @filter, group_by_loop(*STDIN,$opt::recstart);
925 ::debug("init", "@filter\n");
926 open(STDIN, '-|', @filter) || die ("Cannot start @filter");
929 sub spreadstdin() {
930 # read a record
931 # Spawn a job and print the record to it.
932 # Uses:
933 # $Global::blocksize
934 # STDIN
935 # $opt::r
936 # $Global::max_lines
937 # $Global::max_number_of_args
938 # $opt::regexp
939 # $Global::start_no_new_jobs
940 # $opt::roundrobin
941 # %Global::running
942 # Returns: N/A
944 my $buf = "";
945 my ($recstart,$recend) = recstartrecend();
946 my $recendrecstart = $recend.$recstart;
947 my $chunk_number = 1;
948 my $one_time_through;
949 my $two_gb = 2**31-1;
950 my $blocksize = $Global::blocksize;
951 my $in = *STDIN;
952 my $timeout = $Global::blocktimeout;
954 my $header = find_header(\$buf,$in);
955 my $anything_written;
956 my $eof;
958 sub read_block() {
959 # Read a --blocksize from STDIN
960 # possibly interrupted by --blocktimeout
961 # Add up to the next full block
962 my $readsize = $blocksize - (length $buf) % $blocksize;
963 my ($nread,$alarm);
964 eval {
965 local $SIG{ALRM} = sub { die "alarm\n" }; # NB: \n required
966 # --blocktimeout (or 0 if not set)
967 alarm $timeout;
968 if($] >= 5.026) {
969 do {
970 $nread = sysread $in, $buf, $readsize, length $buf;
971 $readsize -= $nread;
972 } while($readsize and $nread);
973 } else {
974 # Less efficient reading, but 32-bit sysread compatible
975 do {
976 $nread = sysread($in,substr($buf,length $buf,0),$readsize,0);
977 $readsize -= $nread;
978 } while($readsize and $nread);
980 alarm 0;
982 if ($@) {
983 die unless $@ eq "alarm\n"; # propagate unexpected errors
984 $alarm = 1;
985 } else {
986 $alarm = 0;
988 $eof = not ($nread or $alarm);
991 sub pass_n_line_records() {
992 # Pass records of N lines
993 my $n_lines = $buf =~ tr/\n/\n/;
994 my $last_newline_pos = rindex64(\$buf,"\n");
995 # Go backwards until there are full n-line records
996 while($n_lines % $Global::max_lines) {
997 $n_lines--;
998 $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1);
1000 # Chop at $last_newline_pos as that is where n-line record ends
1001 $anything_written +=
1002 write_record_to_pipe($chunk_number++,\$header,\$buf,
1003 $recstart,$recend,$last_newline_pos+1);
1004 shorten(\$buf,$last_newline_pos+1);
1007 sub pass_n_regexps() {
1008 # Pass records of N regexps
1009 # -N => (start..*?end){n}
1010 # -L -N => (start..*?end){n*l}
1011 my $read_n_lines = -1+
1012 $Global::max_number_of_args * ($Global::max_lines || 1);
1013 # (?!negative lookahead) is needed to avoid backtracking
1014 # See: https://unix.stackexchange.com/questions/439356/
1015 while($buf =~
1017 # Either recstart or at least one char from start
1018 ^(?: $recstart | .)
1019 # followed something
1020 (?:(?!$recend$recstart).)*?
1021 # and then recend
1022 $recend
1023 # Then n-1 times recstart.*recend
1024 (?:$recstart(?:(?!$recend$recstart).)*?$recend){$read_n_lines}
1026 # Followed by recstart
1027 (?=$recstart)/osx) {
1028 $anything_written +=
1029 write_record_to_pipe($chunk_number++,\$header,\$buf,
1030 $recstart,$recend,length $1);
1031 shorten(\$buf,length $1);
1035 sub pass_regexp() {
1036 # Find the last recend-recstart in $buf
1037 $eof and return;
1038 if($buf =~ /^(.*$recend)$recstart.*?$/os) {
1039 $anything_written +=
1040 write_record_to_pipe($chunk_number++,\$header,\$buf,
1041 $recstart,$recend,length $1);
1042 shorten(\$buf,length $1);
1046 sub pass_csv_record() {
1047 # Pass CVS record
1048 # We define a CSV record as an even number of " + end of line
1049 # This works if you use " as quoting character
1050 my $last_newline_pos = length $buf;
1051 # Go backwards from the last \n and search for a position
1052 # where there is an even number of "
1053 do {
1054 # find last EOL
1055 $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1);
1056 # While uneven "
1057 } while((substr($buf,0,$last_newline_pos) =~ y/"/"/)%2
1058 and $last_newline_pos >= 0);
1059 # Chop at $last_newline_pos as that is where CSV record ends
1060 $anything_written +=
1061 write_record_to_pipe($chunk_number++,\$header,\$buf,
1062 $recstart,$recend,$last_newline_pos+1);
1063 shorten(\$buf,$last_newline_pos+1);
1066 sub pass_n() {
1067 # Pass n records of --recend/--recstart
1068 # -N => (start..*?end){n}
1069 my $i = 0;
1070 my $read_n_lines =
1071 $Global::max_number_of_args * ($Global::max_lines || 1);
1072 while(($i = nindex(\$buf,$recendrecstart,$read_n_lines)) != -1
1074 length $buf) {
1075 $i += length $recend; # find the actual splitting location
1076 $anything_written +=
1077 write_record_to_pipe($chunk_number++,\$header,\$buf,
1078 $recstart,$recend,$i);
1079 shorten(\$buf,$i);
1083 sub pass() {
1084 # Pass records of --recend/--recstart
1085 # Split record at fixed string
1086 # Find the last recend+recstart in $buf
1087 $eof and return;
1088 my $i = rindex64(\$buf,$recendrecstart);
1089 if($i != -1) {
1090 $i += length $recend; # find the actual splitting location
1091 $anything_written +=
1092 write_record_to_pipe($chunk_number++,\$header,\$buf,
1093 $recstart,$recend,$i);
1094 shorten(\$buf,$i);
1098 sub increase_blocksize_maybe() {
1099 if(not $anything_written
1100 and not $opt::blocktimeout
1101 and not $Global::no_autoexpand_block) {
1102 # Nothing was written - maybe the block size < record size?
1103 # Increase blocksize exponentially up to 2GB-1 (2GB causes problems)
1104 if($blocksize < $two_gb) {
1105 my $old_blocksize = $blocksize;
1106 $blocksize = ::min(ceil($blocksize * 1.3 + 1), $two_gb);
1107 ::warning("A record was longer than $old_blocksize. " .
1108 "Increasing to --blocksize $blocksize.");
1113 while(1) {
1114 $anything_written = 0;
1115 read_block();
1116 if($opt::r) {
1117 # Remove empty lines
1118 $buf =~ s/^\s*\n//gm;
1119 if(length $buf == 0) {
1120 if($eof) {
1121 last;
1122 } else {
1123 next;
1127 if($Global::max_lines and not $Global::max_number_of_args) {
1128 # Pass n-line records
1129 pass_n_line_records();
1130 } elsif($opt::csv) {
1131 # Pass a full CSV record
1132 pass_csv_record();
1133 } elsif($opt::regexp) {
1134 # Split record at regexp
1135 if($Global::max_number_of_args) {
1136 pass_n_regexps();
1137 } else {
1138 pass_regexp();
1140 } else {
1141 # Pass normal --recend/--recstart record
1142 if($Global::max_number_of_args) {
1143 pass_n();
1144 } else {
1145 pass();
1148 $eof and last;
1149 increase_blocksize_maybe();
1150 ::debug("init", "Round\n");
1152 ::debug("init", "Done reading input\n");
1154 # If there is anything left in the buffer write it
1155 write_record_to_pipe($chunk_number++, \$header, \$buf, $recstart,
1156 $recend, length $buf);
1158 if($opt::retries) {
1159 $Global::no_more_input = 1;
1160 # We need to start no more jobs: At most we need to retry some
1161 # of the already running.
1162 my @running = values %Global::running;
1163 # Stop any virgins.
1164 for my $job (@running) {
1165 if(defined $job and $job->virgin()) {
1166 close $job->fh(0,"w");
1169 # Wait for running jobs to be done
1170 my $sleep = 1;
1171 while($Global::total_running > 0) {
1172 $sleep = ::reap_usleep($sleep);
1173 start_more_jobs();
1176 $Global::start_no_new_jobs ||= 1;
1177 if($opt::roundrobin) {
1178 # Flush blocks to roundrobin procs
1179 my $sleep = 1;
1180 while(%Global::running) {
1181 my $something_written = 0;
1182 for my $job (values %Global::running) {
1183 if($job->block_length()) {
1184 $something_written += $job->non_blocking_write();
1185 } else {
1186 close $job->fh(0,"w");
1189 if($something_written) {
1190 $sleep = $sleep/2+0.001;
1192 $sleep = ::reap_usleep($sleep);
1197 sub recstartrecend() {
1198 # Uses:
1199 # $opt::recstart
1200 # $opt::recend
1201 # Returns:
1202 # $recstart,$recend with default values and regexp conversion
1203 my($recstart,$recend);
1204 if(defined($opt::recstart) and defined($opt::recend)) {
1205 # If both --recstart and --recend is given then both must match
1206 $recstart = $opt::recstart;
1207 $recend = $opt::recend;
1208 } elsif(defined($opt::recstart)) {
1209 # If --recstart is given it must match start of record
1210 $recstart = $opt::recstart;
1211 $recend = "";
1212 } elsif(defined($opt::recend)) {
1213 # If --recend is given then it must match end of record
1214 $recstart = "";
1215 $recend = $opt::recend;
1216 if($opt::regexp and $recend eq '') {
1217 # --regexp --recend ''
1218 $recend = '.';
1222 if($opt::regexp) {
1223 # If $recstart/$recend contains '|'
1224 # this should only apply to the regexp
1225 $recstart = "(?:".$recstart.")";
1226 $recend = "(?:".$recend.")";
1227 } else {
1228 # $recstart/$recend = printf strings (\n)
1229 $recstart =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee;
1230 $recend =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee;
1232 return ($recstart,$recend);
1235 sub nindex($$) {
1236 # See if string is in buffer N times
1237 # Returns:
1238 # the position where the Nth copy is found
1239 my ($buf_ref, $str, $n) = @_;
1240 my $i = 0;
1241 for(1..$n) {
1242 $i = index64($buf_ref,$str,$i+1);
1243 if($i == -1) { last }
1245 return $i;
1249 my @robin_queue;
1250 my $sleep = 1;
1252 sub round_robin_write($$$$$) {
1253 # Input:
1254 # $header_ref = ref to $header string
1255 # $block_ref = ref to $block to be written
1256 # $recstart = record start string
1257 # $recend = record end string
1258 # $endpos = end position of $block
1259 # Uses:
1260 # %Global::running
1261 # Returns:
1262 # $something_written = amount of bytes written
1263 my ($header_ref,$buffer_ref,$recstart,$recend,$endpos) = @_;
1264 my $written = 0;
1265 my $block_passed = 0;
1266 while(not $block_passed) {
1267 # Continue flushing existing buffers
1268 # until one is empty and a new block is passed
1269 if(@robin_queue) {
1270 # Rotate queue once so new blocks get a fair chance
1271 # to be given to another slot
1272 push @robin_queue, shift @robin_queue;
1273 } else {
1274 # Make a queue to spread the blocks evenly
1275 push @robin_queue, (sort { $a->seq() <=> $b->seq() }
1276 values %Global::running);
1278 do {
1279 $written = 0;
1280 for my $job (@robin_queue) {
1281 if($job->block_length() > 0) {
1282 $written += $job->non_blocking_write();
1283 } else {
1284 $job->set_block($header_ref, $buffer_ref,
1285 $endpos, $recstart, $recend);
1286 $block_passed = 1;
1287 $job->set_virgin(0);
1288 $written += $job->non_blocking_write();
1289 last;
1292 if($written) {
1293 $sleep = $sleep/1.5+0.001;
1295 # Don't sleep if something is written
1296 } while($written and not $block_passed);
1297 $sleep = ::reap_usleep($sleep);
1299 return $written;
1303 sub index64($$$) {
1304 # Do index on strings > 2GB.
1305 # index in Perl < v5.22 does not work for > 2GB
1306 # Input:
1307 # as index except STR which must be passed as a reference
1308 # Output:
1309 # as index
1310 my $ref = shift;
1311 my $match = shift;
1312 my $pos = shift || 0;
1313 my $block_size = 2**31-1;
1314 my $strlen = length($$ref);
1315 # No point in doing extra work if we don't need to.
1316 if($strlen < $block_size or $] > 5.022) {
1317 return index($$ref, $match, $pos);
1320 my $matchlen = length($match);
1321 my $ret;
1322 my $offset = $pos;
1323 while($offset < $strlen) {
1324 $ret = index(
1325 substr($$ref, $offset, $block_size),
1326 $match, $pos-$offset);
1327 if($ret != -1) {
1328 return $ret + $offset;
1330 $offset += ($block_size - $matchlen - 1);
1332 return -1;
1335 sub rindex64($@) {
1336 # Do rindex on strings > 2GB.
1337 # rindex in Perl < v5.22 does not work for > 2GB
1338 # Input:
1339 # as rindex except STR which must be passed as a reference
1340 # Output:
1341 # as rindex
1342 my $ref = shift;
1343 my $match = shift;
1344 my $pos = shift;
1345 my $block_size = 2**31-1;
1346 my $strlen = length($$ref);
1347 # Default: search from end
1348 $pos = defined $pos ? $pos : $strlen;
1349 # No point in doing extra work if we don't need to.
1350 if($strlen < $block_size) {
1351 return rindex($$ref, $match, $pos);
1354 my $matchlen = length($match);
1355 my $ret;
1356 my $offset = $pos - $block_size + $matchlen;
1357 if($offset < 0) {
1358 # The offset is less than a $block_size
1359 # Set the $offset to 0 and
1360 # Adjust block_size accordingly
1361 $block_size = $block_size + $offset;
1362 $offset = 0;
1364 while($offset >= 0) {
1365 $ret = rindex(
1366 substr($$ref, $offset, $block_size),
1367 $match);
1368 if($ret != -1) {
1369 return $ret + $offset;
1371 $offset -= ($block_size - $matchlen - 1);
1373 return -1;
1376 sub shorten($$) {
1377 # Do: substr($buf,0,$i) = "";
1378 # Some Perl versions do not support $i > 2GB, so do this in 2GB chunks
1379 # Input:
1380 # $buf_ref = \$buf
1381 # $i = position to shorten to
1382 # Returns: N/A
1383 my ($buf_ref, $i) = @_;
1384 my $two_gb = 2**31-1;
1385 while($i > $two_gb) {
1386 substr($$buf_ref,0,$two_gb) = "";
1387 $i -= $two_gb;
1389 substr($$buf_ref,0,$i) = "";
1392 sub write_record_to_pipe($$$$$$) {
1393 # Fork then
1394 # Write record from pos 0 .. $endpos to pipe
1395 # Input:
1396 # $chunk_number = sequence number - to see if already run
1397 # $header_ref = reference to header string to prepend
1398 # $buffer_ref = reference to record to write
1399 # $recstart = start string of record
1400 # $recend = end string of record
1401 # $endpos = position in $buffer_ref where record ends
1402 # Uses:
1403 # $Global::job_already_run
1404 # $opt::roundrobin
1405 # @Global::virgin_jobs
1406 # Returns:
1407 # Number of chunks written (0 or 1)
1408 my ($chunk_number, $header_ref, $buffer_ref,
1409 $recstart, $recend, $endpos) = @_;
1410 if($endpos == 0) { return 0; }
1411 if(vec($Global::job_already_run,$chunk_number,1)) { return 1; }
1412 if($opt::roundrobin) {
1413 # Write the block to one of the already running jobs
1414 return round_robin_write($header_ref, $buffer_ref,
1415 $recstart, $recend, $endpos);
1417 # If no virgin found, backoff
1418 my $sleep = 0.0001; # 0.01 ms - better performance on highend
1419 while(not @Global::virgin_jobs) {
1420 ::debug("pipe", "No virgin jobs");
1421 $sleep = ::reap_usleep($sleep);
1422 # Jobs may not be started because of loadavg
1423 # or too little time between each ssh login
1424 # or retrying failed jobs.
1425 start_more_jobs();
1427 my $job = shift @Global::virgin_jobs;
1428 # Job is no longer virgin
1429 $job->set_virgin(0);
1431 if($opt::retries) {
1432 # Copy $buffer[0..$endpos] to $job->{'block'}
1433 # Remove rec_sep
1434 # Run $job->add_transfersize
1435 $job->set_block($header_ref, $buffer_ref, $endpos,
1436 $recstart, $recend);
1437 if(fork()) {
1438 # Skip
1439 } else {
1440 $job->write($job->block_ref());
1441 close $job->fh(0,"w");
1442 exit(0);
1444 } else {
1445 # We ignore the removed rec_sep which is technically wrong.
1446 $job->add_transfersize($endpos + length $$header_ref);
1447 if(fork()) {
1448 # Skip
1449 } else {
1450 # Chop of at $endpos as we do not know how many rec_sep will
1451 # be removed.
1452 substr($$buffer_ref,$endpos,length $$buffer_ref) = "";
1453 # Remove rec_sep
1454 if($opt::remove_rec_sep) {
1455 Job::remove_rec_sep($buffer_ref, $recstart, $recend);
1457 $job->write($header_ref);
1458 $job->write($buffer_ref);
1459 close $job->fh(0,"w");
1460 exit(0);
1463 close $job->fh(0,"w");
1464 return 1;
1468 sub __SEM_MODE__() {}
1471 sub acquire_semaphore() {
1472 # Acquires semaphore. If needed: spawns to the background
1473 # Uses:
1474 # @Global::host
1475 # Returns:
1476 # The semaphore to be released when jobs is complete
1477 $Global::host{':'} = SSHLogin->new(":");
1478 my $sem = Semaphore->new($Semaphore::name,
1479 $Global::host{':'}->max_jobs_running());
1480 $sem->acquire();
1481 if($Semaphore::fg) {
1482 # skip
1483 } else {
1484 if(fork()) {
1485 exit(0);
1486 } else {
1487 # If run in the background, the PID will change
1488 $sem->pid_change();
1491 return $sem;
1495 sub __PARSE_OPTIONS__() {}
1498 sub options_hash() {
1499 # Returns:
1500 # %hash = the GetOptions config
1501 return
1502 ("debug|D=s" => \$opt::D,
1503 "xargs" => \$opt::xargs,
1504 "m" => \$opt::m,
1505 "X" => \$opt::X,
1506 "v" => \@opt::v,
1507 "sql=s" => \$opt::retired,
1508 "sqlmaster=s" => \$opt::sqlmaster,
1509 "sqlworker=s" => \$opt::sqlworker,
1510 "sqlandworker=s" => \$opt::sqlandworker,
1511 "joblog|jl=s" => \$opt::joblog,
1512 "results|result|res=s" => \$opt::results,
1513 "resume" => \$opt::resume,
1514 "resume-failed|resumefailed" => \$opt::resume_failed,
1515 "retry-failed|retryfailed" => \$opt::retry_failed,
1516 "silent" => \$opt::silent,
1517 "keep-order|keeporder|k" => \$opt::keeporder,
1518 "no-keep-order|nokeeporder|nok|no-k" => \$opt::nokeeporder,
1519 "group" => \$opt::group,
1520 "g" => \$opt::retired,
1521 "ungroup|u" => \$opt::ungroup,
1522 "linebuffer|linebuffered|line-buffer|line-buffered|lb"
1523 => \$opt::linebuffer,
1524 "tmux" => \$opt::tmux,
1525 "tmuxpane" => \$opt::tmuxpane,
1526 "null|0" => \$opt::null,
1527 "quote|q" => \$opt::quote,
1528 # Replacement strings
1529 "parens=s" => \$opt::parens,
1530 "rpl=s" => \@opt::rpl,
1531 "plus" => \$opt::plus,
1532 "I=s" => \$opt::I,
1533 "extensionreplace|er=s" => \$opt::U,
1534 "U=s" => \$opt::retired,
1535 "basenamereplace|bnr=s" => \$opt::basenamereplace,
1536 "dirnamereplace|dnr=s" => \$opt::dirnamereplace,
1537 "basenameextensionreplace|bner=s" => \$opt::basenameextensionreplace,
1538 "seqreplace=s" => \$opt::seqreplace,
1539 "slotreplace=s" => \$opt::slotreplace,
1540 "jobs|j=s" => \$opt::jobs,
1541 "delay=s" => \$opt::delay,
1542 "sshdelay=f" => \$opt::sshdelay,
1543 "load=s" => \$opt::load,
1544 "noswap" => \$opt::noswap,
1545 "max-line-length-allowed" => \$opt::max_line_length_allowed,
1546 "number-of-cpus" => \$opt::number_of_cpus,
1547 "number-of-sockets" => \$opt::number_of_sockets,
1548 "number-of-cores" => \$opt::number_of_cores,
1549 "number-of-threads" => \$opt::number_of_threads,
1550 "use-sockets-instead-of-threads"
1551 => \$opt::use_sockets_instead_of_threads,
1552 "use-cores-instead-of-threads"
1553 => \$opt::use_cores_instead_of_threads,
1554 "use-cpus-instead-of-cores" => \$opt::use_cpus_instead_of_cores,
1555 "shellquote|shell_quote|shell-quote" => \@opt::shellquote,
1556 "nice=i" => \$opt::nice,
1557 "tag" => \$opt::tag,
1558 "tagstring|tag-string=s" => \$opt::tagstring,
1559 "onall" => \$opt::onall,
1560 "nonall" => \$opt::nonall,
1561 "filter-hosts|filterhosts|filter-host" => \$opt::filter_hosts,
1562 "sshlogin|S=s" => \@opt::sshlogin,
1563 "sshloginfile|slf=s" => \@opt::sshloginfile,
1564 "controlmaster|M" => \$opt::controlmaster,
1565 "ssh=s" => \$opt::ssh,
1566 "transfer-file|transferfile|transfer-files|transferfiles|tf=s"
1567 => \@opt::transfer_files,
1568 "return=s" => \@opt::return,
1569 "trc=s" => \@opt::trc,
1570 "transfer" => \$opt::transfer,
1571 "cleanup" => \$opt::cleanup,
1572 "basefile|bf=s" => \@opt::basefile,
1573 "B=s" => \$opt::retired,
1574 "ctrlc|ctrl-c" => \$opt::retired,
1575 "noctrlc|no-ctrlc|no-ctrl-c" => \$opt::retired,
1576 "workdir|work-dir|wd=s" => \$opt::workdir,
1577 "W=s" => \$opt::retired,
1578 "rsync-opts|rsyncopts=s" => \$opt::rsync_opts,
1579 "tmpdir|tempdir=s" => \$opt::tmpdir,
1580 "use-compress-program|compress-program=s" => \$opt::compress_program,
1581 "use-decompress-program|decompress-program=s"
1582 => \$opt::decompress_program,
1583 "compress" => \$opt::compress,
1584 "tty" => \$opt::tty,
1585 "T" => \$opt::retired,
1586 "H=i" => \$opt::retired,
1587 "dry-run|dryrun|dr" => \$opt::dryrun,
1588 "progress" => \$opt::progress,
1589 "eta" => \$opt::eta,
1590 "bar" => \$opt::bar,
1591 "shuf" => \$opt::shuf,
1592 "arg-sep|argsep=s" => \$opt::arg_sep,
1593 "arg-file-sep|argfilesep=s" => \$opt::arg_file_sep,
1594 "trim=s" => \$opt::trim,
1595 "env=s" => \@opt::env,
1596 "recordenv|record-env" => \$opt::record_env,
1597 "session" => \$opt::session,
1598 "plain" => \$opt::plain,
1599 "profile|J=s" => \@opt::profile,
1600 "pipe|spreadstdin" => \$opt::pipe,
1601 "robin|round-robin|roundrobin" => \$opt::roundrobin,
1602 "recstart=s" => \$opt::recstart,
1603 "recend=s" => \$opt::recend,
1604 "regexp|regex" => \$opt::regexp,
1605 "remove-rec-sep|removerecsep|rrs" => \$opt::remove_rec_sep,
1606 "files|output-as-files|outputasfiles" => \$opt::files,
1607 "block|block-size|blocksize=s" => \$opt::blocksize,
1608 "blocktimeout|block-timeout|bt=s" => \$opt::blocktimeout,
1609 "tollef" => \$opt::tollef,
1610 "gnu" => \$opt::gnu,
1611 "link|xapply" => \$opt::link,
1612 "linkinputsource|xapplyinputsource=i" => \@opt::linkinputsource,
1613 # Before changing this line, please read
1614 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
1615 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
1616 "bibtex|citation" => \$opt::citation,
1617 "wc|willcite|will-cite|nn|nonotice|no-notice" => \$opt::willcite,
1618 # Termination and retries
1619 "halt-on-error|halt=s" => \$opt::halt,
1620 "limit=s" => \$opt::limit,
1621 "memfree=s" => \$opt::memfree,
1622 "memsuspend=s" => \$opt::memsuspend,
1623 "retries=s" => \$opt::retries,
1624 "timeout=s" => \$opt::timeout,
1625 "termseq|term-seq=s" => \$opt::termseq,
1626 # xargs-compatibility - implemented, man, testsuite
1627 "max-procs|P=s" => \$opt::jobs,
1628 "delimiter|d=s" => \$opt::d,
1629 "max-chars|s=i" => \$opt::max_chars,
1630 "arg-file|a=s" => \@opt::a,
1631 "no-run-if-empty|r" => \$opt::r,
1632 "replace|i:s" => \$opt::i,
1633 "E=s" => \$opt::eof,
1634 "eof|e:s" => \$opt::eof,
1635 "max-args|maxargs|n=i" => \$opt::max_args,
1636 "max-replace-args|N=i" => \$opt::max_replace_args,
1637 "colsep|col-sep|C=s" => \$opt::colsep,
1638 "csv"=> \$opt::csv,
1639 "help|h" => \$opt::help,
1640 "L=f" => \$opt::L,
1641 "max-lines|l:f" => \$opt::max_lines,
1642 "interactive|p" => \$opt::interactive,
1643 "verbose|t" => \$opt::verbose,
1644 "version|V" => \$opt::version,
1645 "minversion|min-version=i" => \$opt::minversion,
1646 "show-limits|showlimits" => \$opt::show_limits,
1647 "exit|x" => \$opt::x,
1648 # Semaphore
1649 "semaphore" => \$opt::semaphore,
1650 "semaphoretimeout|st=i" => \$opt::semaphoretimeout,
1651 "semaphorename|id=s" => \$opt::semaphorename,
1652 "fg" => \$opt::fg,
1653 "bg" => \$opt::bg,
1654 "wait" => \$opt::wait,
1655 # Shebang #!/usr/bin/parallel --shebang
1656 "shebang|hashbang" => \$opt::shebang,
1657 "internal-pipe-means-argfiles"
1658 => \$opt::internal_pipe_means_argfiles,
1659 "Y" => \$opt::retired,
1660 "skip-first-line" => \$opt::skip_first_line,
1661 "bug" => \$opt::bug,
1662 "header=s" => \$opt::header,
1663 "cat" => \$opt::cat,
1664 "fifo" => \$opt::fifo,
1665 "pipepart|pipe-part" => \$opt::pipepart,
1666 "tee" => \$opt::tee,
1667 "shard=s" => \$opt::shard,
1668 "bin=s" => \$opt::bin,
1669 "groupby|group-by=s" => \$opt::groupby,
1670 "hgrp|hostgrp|hostgroup|hostgroups" => \$opt::hostgroups,
1671 "embed" => \$opt::embed,
1675 sub get_options_from_array($@) {
1676 # Run GetOptions on @array
1677 # Input:
1678 # $array_ref = ref to @ARGV to parse
1679 # @keep_only = Keep only these options
1680 # Uses:
1681 # @ARGV
1682 # Returns:
1683 # true if parsing worked
1684 # false if parsing failed
1685 # @$array_ref is changed
1686 my ($array_ref, @keep_only) = @_;
1687 if(not @$array_ref) {
1688 # Empty array: No need to look more at that
1689 return 1;
1691 # A bit of shuffling of @ARGV needed as GetOptionsFromArray is not
1692 # supported everywhere
1693 my @save_argv;
1694 my $this_is_ARGV = (\@::ARGV == $array_ref);
1695 if(not $this_is_ARGV) {
1696 @save_argv = @::ARGV;
1697 @::ARGV = @{$array_ref};
1699 # If @keep_only set: Ignore all values except @keep_only
1700 my %options = options_hash();
1701 if(@keep_only) {
1702 my (%keep,@dummy);
1703 @keep{@keep_only} = @keep_only;
1704 for my $k (grep { not $keep{$_} } keys %options) {
1705 # Store the value of the option in @dummy
1706 $options{$k} = \@dummy;
1709 my $retval = GetOptions(%options);
1710 if(not $this_is_ARGV) {
1711 @{$array_ref} = @::ARGV;
1712 @::ARGV = @save_argv;
1714 return $retval;
1717 sub parse_options(@) {
1718 # Returns: N/A
1719 init_globals();
1720 my @argv_before = @ARGV;
1721 @ARGV = read_options();
1723 # Before changing this line, please read
1724 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
1725 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
1726 if(defined $opt::citation) {
1727 citation(\@argv_before,\@ARGV);
1728 wait_and_exit(0);
1730 # no-* overrides *
1731 if($opt::nokeeporder) { $opt::keeporder = undef; }
1733 if(@opt::v) { $Global::verbose = $#opt::v+1; } # Convert -v -v to v=2
1734 if($opt::bug) { ::die_bug("test-bug"); }
1735 $Global::debug = $opt::D;
1736 $Global::shell = $ENV{'PARALLEL_SHELL'} || parent_shell($$)
1737 || $ENV{'SHELL'} || "/bin/sh";
1738 if(not -x $Global::shell and not which($Global::shell)) {
1739 ::error("Shell '$Global::shell' not found.");
1740 wait_and_exit(255);
1742 ::debug("init","Global::shell $Global::shell\n");
1743 $Global::cshell = $Global::shell =~ m:(/[-a-z]*)?csh:;
1744 if(defined $opt::X) { $Global::ContextReplace = 1; }
1745 if(defined $opt::silent) { $Global::verbose = 0; }
1746 if(defined $opt::null) { $/ = "\0"; }
1747 if(defined $opt::d) { $/ = unquote_printf($opt::d) }
1748 parse_replacement_string_options();
1749 if(defined $opt::tagstring) {
1750 $opt::tagstring = unquote_printf($opt::tagstring);
1751 if($opt::tagstring =~ /\Q$Global::parensleft\E.*\Q$Global::parensright\E/
1753 $opt::linebuffer) {
1754 # --tagstring contains {= =} and --linebuffer =>
1755 # recompute replacement string for each use (do not cache)
1756 $Global::cache_replacement_eval = 0;
1759 if(defined $opt::interactive) { $Global::interactive = $opt::interactive; }
1760 if(defined $opt::quote) { $Global::quoting = 1; }
1761 if(defined $opt::r) { $Global::ignore_empty = 1; }
1762 if(defined $opt::verbose) { $Global::stderr_verbose = 1; }
1763 if(defined $opt::eof) { $Global::end_of_file_string = $opt::eof; }
1764 if(defined $opt::max_args) {
1765 $Global::max_number_of_args = $opt::max_args;
1767 if(defined $opt::blocktimeout) {
1768 $Global::blocktimeout = int(multiply_time_units($opt::blocktimeout));
1769 if($Global::blocktimeout < 1) {
1770 ::error("--block-timeout must be at least 1");
1771 wait_and_exit(255);
1774 if(defined $opt::timeout) {
1775 $Global::timeoutq = TimeoutQueue->new($opt::timeout);
1777 if(defined $opt::tmpdir) { $ENV{'TMPDIR'} = $opt::tmpdir; }
1778 $ENV{'PARALLEL_RSYNC_OPTS'} = $opt::rsync_opts ||
1779 $ENV{'PARALLEL_RSYNC_OPTS'} || '-rlDzR';
1780 # Default: Same nice level as GNU Parallel is started at
1781 $opt::nice ||= eval { getpriority(0,0) } || 0;
1782 if(defined $opt::help) { usage(); exit(0); }
1783 if(defined $opt::embed) { embed(); exit(0); }
1784 if(defined $opt::sqlandworker) {
1785 $opt::sqlmaster = $opt::sqlworker = $opt::sqlandworker;
1787 if(defined $opt::tmuxpane) { $opt::tmux = $opt::tmuxpane; }
1788 if(defined $opt::colsep) { $Global::trim = 'lr'; }
1789 if(defined $opt::csv) {
1790 $Global::use{"Text::CSV"} ||= eval "use Text::CSV; 1;";
1791 $opt::colsep = defined $opt::colsep ? $opt::colsep : ",";
1792 my $csv_setting = { binary => 1, sep_char => $opt::colsep };
1793 my $sep = $csv_setting->{sep_char};
1794 $Global::csv = Text::CSV->new($csv_setting)
1795 or die "Cannot use CSV: ".Text::CSV->error_diag ();
1797 if(defined $opt::header) {
1798 $opt::colsep = defined $opt::colsep ? $opt::colsep : "\t";
1800 if(defined $opt::trim) { $Global::trim = $opt::trim; }
1801 if(defined $opt::arg_sep) { $Global::arg_sep = $opt::arg_sep; }
1802 if(defined $opt::arg_file_sep) {
1803 $Global::arg_file_sep = $opt::arg_file_sep;
1805 if(defined $opt::number_of_sockets) {
1806 print SSHLogin::no_of_sockets(),"\n"; wait_and_exit(0);
1808 if(defined $opt::number_of_cpus) {
1809 print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0);
1811 if(defined $opt::number_of_cores) {
1812 print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0);
1814 if(defined $opt::number_of_threads) {
1815 print SSHLogin::no_of_threads(),"\n"; wait_and_exit(0);
1817 if(defined $opt::max_line_length_allowed) {
1818 print Limits::Command::real_max_length(),"\n"; wait_and_exit(0);
1820 if(defined $opt::version) { version(); wait_and_exit(0); }
1821 if(defined $opt::record_env) { record_env(); wait_and_exit(0); }
1822 if(defined $opt::show_limits) { show_limits(); }
1823 if(@opt::sshlogin) { @Global::sshlogin = @opt::sshlogin; }
1824 if(@opt::sshloginfile) { read_sshloginfiles(@opt::sshloginfile); }
1825 if(@opt::return) { push @Global::ret_files, @opt::return; }
1826 if($opt::transfer) {
1827 push @Global::transfer_files, $opt::i || $opt::I || "{}";
1829 push @Global::transfer_files, @opt::transfer_files;
1830 if(not defined $opt::recstart and
1831 not defined $opt::recend) { $opt::recend = "\n"; }
1832 $Global::blocksize = multiply_binary_prefix($opt::blocksize || "1M");
1833 if($Global::blocksize > 2**31-1 and not $opt::pipepart) {
1834 warning("--blocksize >= 2G causes problems. Using 2G-1.");
1835 $Global::blocksize = 2**31-1;
1837 if($^O eq "cygwin" and
1838 ($opt::pipe or $opt::pipepart or $opt::roundrobin)
1839 and $Global::blocksize > 65535) {
1840 warning("--blocksize >= 64K causes problems on Cygwin.");
1842 $opt::memfree = multiply_binary_prefix($opt::memfree);
1843 $opt::memsuspend = multiply_binary_prefix($opt::memsuspend);
1844 $Global::memlimit = $opt::memsuspend + $opt::memfree;
1845 check_invalid_option_combinations();
1846 if((defined $opt::fifo or defined $opt::cat)
1847 and not $opt::pipepart) {
1848 $opt::pipe = 1;
1850 if(defined $opt::minversion) {
1851 print $Global::version,"\n";
1852 if($Global::version < $opt::minversion) {
1853 wait_and_exit(255);
1854 } else {
1855 wait_and_exit(0);
1858 if(not defined $opt::delay) {
1859 # Set --delay to --sshdelay if not set
1860 $opt::delay = $opt::sshdelay;
1862 $opt::delay = multiply_time_units($opt::delay);
1863 if($opt::compress_program) {
1864 $opt::compress = 1;
1865 $opt::decompress_program ||= $opt::compress_program." -dc";
1868 if(defined $opt::results) {
1869 # Is the output a dir or CSV-file?
1870 if($opt::results =~ /\.csv$/i) {
1871 # CSV with , as separator
1872 $Global::csvsep = ",";
1873 $Global::membuffer ||= 1;
1874 } elsif($opt::results =~ /\.tsv$/i) {
1875 # CSV with TAB as separator
1876 $Global::csvsep = "\t";
1877 $Global::membuffer ||= 1;
1880 if($opt::compress) {
1881 my ($compress, $decompress) = find_compression_program();
1882 $opt::compress_program ||= $compress;
1883 $opt::decompress_program ||= $decompress;
1884 if(($opt::results and not $Global::csvsep) or $opt::files) {
1885 # No need for decompressing
1886 $opt::decompress_program = "cat >/dev/null";
1889 if(defined $opt::dryrun) {
1890 # Force grouping due to bug #51039: --dry-run --timeout 3600 -u breaks
1891 $opt::ungroup = 0;
1892 $opt::group = 1;
1894 if(defined $opt::nonall) {
1895 # Append a dummy empty argument if there are no arguments
1896 # on the command line to avoid reading from STDIN.
1897 # arg_sep = random 50 char
1898 # \0noarg => nothing (not the empty string)
1899 $Global::arg_sep = join "",
1900 map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..50);
1901 push @ARGV, $Global::arg_sep, "\0noarg";
1903 if(defined $opt::tee) {
1904 if(not defined $opt::jobs) {
1905 $opt::jobs = 0;
1908 if(defined $opt::tty) {
1909 # Defaults for --tty: -j1 -u
1910 # Can be overridden with -jXXX -g
1911 if(not defined $opt::jobs) {
1912 $opt::jobs = 1;
1914 if(not defined $opt::group) {
1915 $opt::ungroup = 1;
1918 if(@opt::trc) {
1919 push @Global::ret_files, @opt::trc;
1920 if(not @Global::transfer_files) {
1921 # Defaults to --transferfile {}
1922 push @Global::transfer_files, $opt::i || $opt::I || "{}";
1924 $opt::cleanup = 1;
1926 if(defined $opt::max_lines) {
1927 if($opt::max_lines eq "-0") {
1928 # -l -0 (swallowed -0)
1929 $opt::max_lines = 1;
1930 $opt::null = 1;
1931 $/ = "\0";
1932 } elsif ($opt::max_lines == 0) {
1933 # If not given (or if 0 is given) => 1
1934 $opt::max_lines = 1;
1936 $Global::max_lines = $opt::max_lines;
1937 if(not $opt::pipe) {
1938 # --pipe -L means length of record - not max_number_of_args
1939 $Global::max_number_of_args ||= $Global::max_lines;
1943 # Read more than one arg at a time (-L, -N)
1944 if(defined $opt::L) {
1945 $Global::max_lines = $opt::L;
1946 if(not $opt::pipe) {
1947 # --pipe -L means length of record - not max_number_of_args
1948 $Global::max_number_of_args ||= $Global::max_lines;
1951 if(defined $opt::max_replace_args) {
1952 $Global::max_number_of_args = $opt::max_replace_args;
1953 $Global::ContextReplace = 1;
1955 if((defined $opt::L or defined $opt::max_replace_args)
1957 not ($opt::xargs or $opt::m)) {
1958 $Global::ContextReplace = 1;
1960 if(defined $opt::tag and not defined $opt::tagstring) {
1961 # Default = {}
1962 $opt::tagstring = $Global::parensleft.$Global::parensright;
1964 if(grep /^$Global::arg_sep\+?$|^$Global::arg_file_sep\+?$/o, @ARGV) {
1965 # Deal with ::: :::+ :::: and ::::+
1966 @ARGV = read_args_from_command_line();
1968 parse_semaphore();
1970 if(defined $opt::eta) { $opt::progress = $opt::eta; }
1971 if(defined $opt::bar) { $opt::progress = $opt::bar; }
1973 # Funding a free software project is hard. GNU Parallel is no
1974 # exception. On top of that it seems the less visible a project
1975 # is, the harder it is to get funding. And the nature of GNU
1976 # Parallel is that it will never be seen by "the guy with the
1977 # checkbook", but only by the people doing the actual work.
1979 # This problem has been covered by others - though no solution has
1980 # been found:
1981 # https://www.slideshare.net/NadiaEghbal/consider-the-maintainer
1982 # https://blog.licensezero.com/2019/08/24/process-of-elimination.html
1983 # https://www.numfocus.org/blog/why-is-numpy-only-now-getting-funded/
1985 # The FAQ tells you why the citation notice exists:
1986 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
1988 # If you want GNU Parallel to be maintained in the future, and not
1989 # just wither away like so many other free software tools, you
1990 # need to help finance the development.
1992 # The citation notice is a simple way of doing so, as citations
1993 # makes it possible to me to get a job where I can maintain GNU
1994 # Parallel as part of the job.
1996 # This means you can help financing development
1998 # WITHOUT PAYING A SINGLE CENT!
2000 # Before implementing the citation notice it was discussed with
2001 # the users:
2002 # https://lists.gnu.org/archive/html/parallel/2013-11/msg00006.html
2004 # Having to spend 10 seconds on running 'parallel --citation' once
2005 # is no doubt not an ideal solution, but no one has so far come up
2006 # with an ideal solution - neither for funding GNU Parallel nor
2007 # other free software.
2009 # If you believe you have the perfect solution, you should try it
2010 # out, and if it works, you should post it on the email
2011 # list. Ideas that will cost work and which have not been tested
2012 # are, however, unlikely to be prioritized.
2014 # Please note that GPL version 3 gives you the right to fork GNU
2015 # Parallel under a new name, but it does not give you the right to
2016 # distribute modified copies with the citation notice disabled in
2017 # a way where the software can be confused with GNU Parallel. To
2018 # do that you need to be the owner of the GNU Parallel
2019 # trademark. The xt:Commerce case shows this.
2021 # Description of the xt:Commerce case in OLG Duesseldorf
2022 # http://www.inta.org/INTABulletin/Pages/GERMANYGeneralPublicLicenseDoesNotPermitUseofThird-PartyTrademarksforAdvertisingModifiedVersionsofOpen-SourceSoftware.aspx
2023 # https://web.archive.org/web/20180715073746/http://www.inta.org/INTABulletin/Pages/GERMANYGeneralPublicLicenseDoesNotPermitUseofThird-PartyTrademarksforAdvertisingModifiedVersionsofOpen-SourceSoftware.aspx
2025 # The verdict in German
2026 # https://www.admody.com/urteilsdatenbank/cafe6fdaeed3/OLG-Duesseldorf_Urteil_vom_28-September-2010_Az_I-20-U-41-09
2027 # https://web.archive.org/web/20180715073717/https://www.admody.com/urteilsdatenbank/cafe6fdaeed3/OLG-Duesseldorf_Urteil_vom_28-September-2010_Az_I-20-U-41-09
2029 # Other free software limiting derivates by the same name:
2030 # https://en.wikipedia.org/wiki/Red_Hat_Enterprise_Linux_derivatives#Legal_aspects
2031 # https://tm.joomla.org/trademark-faq.html
2032 # https://www.mozilla.org/en-US/foundation/trademarks/faq/
2034 # Running 'parallel --citation' one single time takes less than 10
2035 # seconds, and will silence the citation notice for future
2036 # runs. If that is too much trouble for you, why not use one of
2037 # the alternatives instead?
2038 # See a list in: 'man parallel_alternatives'
2040 # If you want GNU Parallel to be maintained in the future keep
2041 # this line.
2042 citation_notice();
2043 # Seriously: _YOU_ will be harming free software by removing the
2044 # notice. _YOU_ make it harder to justify spending time developing
2045 # it. If you *do* remove the line, please email
2046 # hallofshame@tange.dk if you want to avoid being put in a hall of
2047 # shame.
2049 parse_halt();
2051 if($ENV{'PARALLEL_ENV'}) {
2052 # Read environment and set $Global::parallel_env
2053 # Must be done before is_acceptable_command_line_length()
2054 my $penv = $ENV{'PARALLEL_ENV'};
2055 # unset $PARALLEL_ENV: It should not be given to children
2056 # because it takes up a lot of env space
2057 delete $ENV{'PARALLEL_ENV'};
2058 if(-e $penv) {
2059 # This is a file/fifo: Replace envvar with content of file
2060 open(my $parallel_env, "<", $penv) ||
2061 ::die_bug("Cannot read parallel_env from $penv");
2062 local $/; # Put <> in slurp mode
2063 $penv = <$parallel_env>;
2064 close $parallel_env;
2066 # Map \001 to \n to make it easer to quote \n in $PARALLEL_ENV
2067 $penv =~ s/\001/\n/g;
2068 if($penv =~ /\0/) {
2069 ::warning('\0 (NUL) in environment is not supported');
2071 $Global::parallel_env = $penv;
2074 parse_sshlogin();
2076 if(remote_hosts() and ($opt::X or $opt::m or $opt::xargs)) {
2077 # As we do not know the max line length on the remote machine
2078 # long commands generated by xargs may fail
2079 # If $opt::max_replace_args is set, it is probably safe
2080 ::warning("Using -X or -m with --sshlogin may fail.");
2083 if(not defined $opt::jobs) { $opt::jobs = "100%"; }
2084 open_joblog();
2085 open_csv();
2086 if($opt::sqlmaster or $opt::sqlworker) {
2087 $Global::sql = SQL->new($opt::sqlmaster || $opt::sqlworker);
2089 if($opt::sqlworker) { $Global::membuffer ||= 1; }
2090 # The sqlmaster groups the arguments, so the should just read one
2091 if($opt::sqlworker and not $opt::sqlmaster) { $Global::max_number_of_args = 1; }
2095 sub check_invalid_option_combinations() {
2096 if(defined $opt::timeout and
2097 $opt::timeout !~ /^\d+(\.\d+)?%?$|^(\d+(\.\d+)?[dhms])+$/i) {
2098 ::error("--timeout must be seconds or percentage.");
2099 wait_and_exit(255);
2101 if(defined $opt::fifo and defined $opt::cat) {
2102 ::error("--fifo cannot be combined with --cat.");
2103 ::wait_and_exit(255);
2105 if(defined $opt::retries and defined $opt::roundrobin) {
2106 ::error("--retries cannot be combined with --roundrobin.");
2107 ::wait_and_exit(255);
2109 if(defined $opt::pipepart and
2110 (defined $opt::L or defined $opt::max_lines
2111 or defined $opt::max_replace_args)) {
2112 ::error("--pipepart is incompatible with --max-replace-args, ".
2113 "--max-lines, and -L.");
2114 wait_and_exit(255);
2116 if(defined $opt::group and $opt::ungroup) {
2117 ::error("--group cannot be combined with --ungroup.");
2118 ::wait_and_exit(255);
2120 if(defined $opt::group and $opt::linebuffer) {
2121 ::error("--group cannot be combined with --line-buffer.");
2122 ::wait_and_exit(255);
2124 if(defined $opt::ungroup and $opt::linebuffer) {
2125 ::error("--ungroup cannot be combined with --line-buffer.");
2126 ::wait_and_exit(255);
2128 if(defined $opt::tollef and not $opt::gnu) {
2129 ::error("--tollef has been retired.",
2130 "Remove --tollef or use --gnu to override --tollef.");
2131 ::wait_and_exit(255);
2133 if(defined $opt::retired) {
2134 ::error("-g has been retired. Use --group.",
2135 "-B has been retired. Use --bf.",
2136 "-T has been retired. Use --tty.",
2137 "-U has been retired. Use --er.",
2138 "-W has been retired. Use --wd.",
2139 "-Y has been retired. Use --shebang.",
2140 "-H has been retired. Use --halt.",
2141 "--sql has been retired. Use --sqlmaster.",
2142 "--ctrlc has been retired.",
2143 "--noctrlc has been retired.");
2144 ::wait_and_exit(255);
2146 if($opt::groupby) {
2147 if(not $opt::pipe and not $opt::pipepart) {
2148 $opt::pipe = 1;
2150 if($opt::remove_rec_sep) {
2151 ::error("--remove-rec-sep is not compatible with --groupby");
2152 ::wait_and_exit(255);
2154 if($opt::recstart) {
2155 ::error("--recstart is not compatible with --groupby");
2156 ::wait_and_exit(255);
2158 if($opt::recend ne "\n") {
2159 ::error("--recend is not compatible with --groupby");
2160 ::wait_and_exit(255);
2165 sub init_globals() {
2166 # Defaults:
2167 $Global::version = 20200923;
2168 $Global::progname = 'parallel';
2169 $::name = "GNU Parallel";
2170 $Global::infinity = 2**31;
2171 $Global::debug = 0;
2172 $Global::verbose = 0;
2173 # Don't quote every part of the command line
2174 $Global::quoting = 0;
2175 # Quote replacement strings
2176 $Global::quote_replace = 1;
2177 $Global::total_completed = 0;
2178 $Global::cache_replacement_eval = 1;
2179 # Read only table with default --rpl values
2180 %Global::replace =
2182 '{}' => '',
2183 '{#}' => '1 $_=$job->seq()',
2184 '{%}' => '1 $_=$job->slot()',
2185 '{/}' => 's:.*/::',
2186 '{//}' =>
2187 ('$Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; '.
2188 '$_ = dirname($_);'),
2189 '{/.}' => 's:.*/::; s:\.[^/.]+$::;',
2190 '{.}' => 's:\.[^/.]+$::',
2192 %Global::plus =
2194 # {} = {+/}/{/}
2195 # = {.}.{+.} = {+/}/{/.}.{+.}
2196 # = {..}.{+..} = {+/}/{/..}.{+..}
2197 # = {...}.{+...} = {+/}/{/...}.{+...}
2198 '{+/}' => 's:/[^/]*$::',
2199 '{+.}' => 's:.*\.::',
2200 '{+..}' => 's:.*\.([^.]*\.):$1:',
2201 '{+...}' => 's:.*\.([^.]*\.[^.]*\.):$1:',
2202 '{..}' => 's:\.[^/.]+$::; s:\.[^/.]+$::',
2203 '{...}' => 's:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::',
2204 '{/..}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::',
2205 '{/...}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::',
2206 '{choose_k}' => 'for $t (2..$#arg){ if($arg[$t-1] ge $arg[$t]) { skip() } }',
2207 # {##} = number of jobs
2208 '{##}' => '$_=total_jobs()',
2209 # Bash ${a:-myval}
2210 '{:-([^}]+?)}' => '$_ ||= $$1',
2211 # Bash ${a:2}
2212 '{:(\d+?)}' => 'substr($_,0,$$1) = ""',
2213 # Bash ${a:2:3}
2214 '{:(\d+?):(\d+?)}' => '$_ = substr($_,$$1,$$2);',
2215 # Bash ${a#bc}
2216 '{#([^#}][^}]*?)}' => 's/^$$1//;',
2217 # Bash ${a%def}
2218 '{%([^}]+?)}' => 's/$$1$//;',
2219 # Bash ${a/def/ghi} ${a/def/}
2220 '{/([^}]+?)/([^}]*?)}' => 's/$$1/$$2/;',
2221 # Bash ${a^a}
2222 '{^([^}]+?)}' => 's/^($$1)/uc($1)/e;',
2223 # Bash ${a^^a}
2224 '{^^([^}]+?)}' => 's/($$1)/uc($1)/eg;',
2225 # Bash ${a,A}
2226 '{,([^}]+?)}' => 's/^($$1)/lc($1)/e;',
2227 # Bash ${a,,A}
2228 '{,,([^}]+?)}' => 's/($$1)/lc($1)/eg;',
2229 # {slot} = $PARALLEL_JOBSLOT
2230 '{slot}' => '1 $_="\${PARALLEL_JOBSLOT}";uq()',
2231 # {host} = ssh host
2232 '{host}' => '1 $_="\${PARALLEL_SSHHOST}";uq()',
2233 # {sshlogin} = sshlogin
2234 '{sshlogin}' => '1 $_="\${PARALLEL_SSHLOGIN}";uq()',
2236 # Modifiable copy of %Global::replace
2237 %Global::rpl = %Global::replace;
2238 $/ = "\n";
2239 $Global::ignore_empty = 0;
2240 $Global::interactive = 0;
2241 $Global::stderr_verbose = 0;
2242 $Global::default_simultaneous_sshlogins = 9;
2243 $Global::exitstatus = 0;
2244 $Global::arg_sep = ":::";
2245 $Global::arg_file_sep = "::::";
2246 $Global::trim = 'n';
2247 $Global::max_jobs_running = 0;
2248 $Global::job_already_run = '';
2249 $ENV{'TMPDIR'} ||= "/tmp";
2250 $ENV{'OLDPWD'} = $ENV{'PWD'};
2251 if(not $ENV{HOME}) {
2252 # $ENV{HOME} is sometimes not set if called from PHP
2253 ::warning("\$HOME not set. Using /tmp.");
2254 $ENV{HOME} = "/tmp";
2256 # no warnings to allow for undefined $XDG_*
2257 no warnings 'uninitialized';
2258 # $xdg_config_home is needed to make env_parallel.fish stop complaining
2259 my $xdg_config_home = $ENV{'XDG_CONFIG_HOME'};
2260 # config_dirs = $PARALLEL_HOME, $XDG_CONFIG_HOME/parallel,
2261 # $(each XDG_CONFIG_DIRS)/parallel, $HOME/.parallel
2262 # Keep only dirs that exist
2263 @Global::config_dirs =
2264 (grep { -d $_ }
2265 $ENV{'PARALLEL_HOME'},
2266 (map { "$_/parallel" }
2267 $xdg_config_home,
2268 split /:/, $ENV{'XDG_CONFIG_DIRS'}),
2269 $ENV{'HOME'} . "/.parallel");
2270 # Use first dir as config dir
2271 $Global::config_dir = $Global::config_dirs[0] ||
2272 $ENV{'HOME'} . "/.parallel";
2273 # cache_dirs = $PARALLEL_HOME, $XDG_CACHE_HOME/parallel,
2274 # Keep only dirs that exist
2275 @Global::cache_dirs =
2276 (grep { -d $_ }
2277 $ENV{'PARALLEL_HOME'}, $ENV{'XDG_CACHE_HOME'}."/parallel");
2278 $Global::cache_dir = $Global::cache_dirs[0] ||
2279 $ENV{'HOME'} . "/.parallel";
2282 sub parse_halt() {
2283 # $opt::halt flavours
2284 # Uses:
2285 # $opt::halt
2286 # $Global::halt_when
2287 # $Global::halt_fail
2288 # $Global::halt_success
2289 # $Global::halt_pct
2290 # $Global::halt_count
2291 if(defined $opt::halt) {
2292 my %halt_expansion = (
2293 "0" => "never",
2294 "1" => "soon,fail=1",
2295 "2" => "now,fail=1",
2296 "-1" => "soon,success=1",
2297 "-2" => "now,success=1",
2299 # Expand -2,-1,0,1,2 into long form
2300 $opt::halt = $halt_expansion{$opt::halt} || $opt::halt;
2301 # --halt 5% == --halt soon,fail=5%
2302 $opt::halt =~ s/^(\d+)%$/soon,fail=$1%/;
2303 # Split: soon,fail=5%
2304 my ($when,$fail_success,$pct_count) = split /[,=]/, $opt::halt;
2305 if(not grep { $when eq $_ } qw(never soon now)) {
2306 ::error("--halt must have 'never', 'soon', or 'now'.");
2307 ::wait_and_exit(255);
2309 $Global::halt_when = $when;
2310 if($when ne "never") {
2311 if($fail_success eq "fail") {
2312 $Global::halt_fail = 1;
2313 } elsif($fail_success eq "success") {
2314 $Global::halt_success = 1;
2315 } elsif($fail_success eq "done") {
2316 $Global::halt_done = 1;
2317 } else {
2318 ::error("--halt $when must be followed by ,success or ,fail.");
2319 ::wait_and_exit(255);
2321 if($pct_count =~ /^(\d+)%$/) {
2322 $Global::halt_pct = $1/100;
2323 } elsif($pct_count =~ /^(\d+)$/) {
2324 $Global::halt_count = $1;
2325 } else {
2326 ::error("--halt $when,$fail_success ".
2327 "must be followed by ,number or ,percent%.");
2328 ::wait_and_exit(255);
2334 sub parse_replacement_string_options() {
2335 # Deal with --rpl
2336 # Uses:
2337 # %Global::rpl
2338 # $Global::parensleft
2339 # $Global::parensright
2340 # $opt::parens
2341 # $Global::parensleft
2342 # $Global::parensright
2343 # $opt::plus
2344 # %Global::plus
2345 # $opt::I
2346 # $opt::U
2347 # $opt::i
2348 # $opt::basenamereplace
2349 # $opt::dirnamereplace
2350 # $opt::seqreplace
2351 # $opt::slotreplace
2352 # $opt::basenameextensionreplace
2354 sub rpl($$) {
2355 # Modify %Global::rpl
2356 # Replace $old with $new
2357 my ($old,$new) = @_;
2358 if($old ne $new) {
2359 $Global::rpl{$new} = $Global::rpl{$old};
2360 delete $Global::rpl{$old};
2363 my $parens = "{==}";
2364 if(defined $opt::parens) { $parens = $opt::parens; }
2365 my $parenslen = 0.5*length $parens;
2366 $Global::parensleft = substr($parens,0,$parenslen);
2367 $Global::parensright = substr($parens,$parenslen);
2368 if(defined $opt::plus) { %Global::rpl = (%Global::plus,%Global::rpl); }
2369 if(defined $opt::I) { rpl('{}',$opt::I); }
2370 if(defined $opt::i and $opt::i) { rpl('{}',$opt::i); }
2371 if(defined $opt::U) { rpl('{.}',$opt::U); }
2372 if(defined $opt::basenamereplace) { rpl('{/}',$opt::basenamereplace); }
2373 if(defined $opt::dirnamereplace) { rpl('{//}',$opt::dirnamereplace); }
2374 if(defined $opt::seqreplace) { rpl('{#}',$opt::seqreplace); }
2375 if(defined $opt::slotreplace) { rpl('{%}',$opt::slotreplace); }
2376 if(defined $opt::basenameextensionreplace) {
2377 rpl('{/.}',$opt::basenameextensionreplace);
2379 for(@opt::rpl) {
2380 # Create $Global::rpl entries for --rpl options
2381 # E.g: "{..} s:\.[^.]+$:;s:\.[^.]+$:;"
2382 my ($shorthand,$long) = split/\s/,$_,2;
2383 $Global::rpl{$shorthand} = $long;
2387 sub parse_semaphore() {
2388 # Semaphore defaults
2389 # Must be done before computing number of processes and max_line_length
2390 # because when running as a semaphore GNU Parallel does not read args
2391 # Uses:
2392 # $opt::semaphore
2393 # $Global::semaphore
2394 # $opt::semaphoretimeout
2395 # $Semaphore::timeout
2396 # $opt::semaphorename
2397 # $Semaphore::name
2398 # $opt::fg
2399 # $Semaphore::fg
2400 # $opt::wait
2401 # $Semaphore::wait
2402 # $opt::bg
2403 # @opt::a
2404 # @Global::unget_argv
2405 # $Global::default_simultaneous_sshlogins
2406 # $opt::jobs
2407 # $Global::interactive
2408 $Global::semaphore ||= ($0 =~ m:(^|/)sem$:); # called as 'sem'
2409 if(defined $opt::semaphore) { $Global::semaphore = 1; }
2410 if(defined $opt::semaphoretimeout) { $Global::semaphore = 1; }
2411 if(defined $opt::semaphorename) { $Global::semaphore = 1; }
2412 if(defined $opt::fg and not $opt::tmux and not $opt::tmuxpane) {
2413 $Global::semaphore = 1;
2415 if(defined $opt::bg) { $Global::semaphore = 1; }
2416 if(defined $opt::wait and not $opt::sqlmaster) {
2417 $Global::semaphore = 1; @ARGV = "true";
2419 if($Global::semaphore) {
2420 if(@opt::a) {
2421 # A semaphore does not take input from neither stdin nor file
2422 ::error("A semaphore does not take input from neither stdin nor a file\n");
2423 ::wait_and_exit(255);
2425 @opt::a = ("/dev/null");
2426 # Append a dummy empty argument
2427 # \0 => nothing (not the empty string)
2428 push(@Global::unget_argv, [Arg->new("\0noarg")]);
2429 $Semaphore::timeout = $opt::semaphoretimeout || 0;
2430 if(defined $opt::semaphorename) {
2431 $Semaphore::name = $opt::semaphorename;
2432 } else {
2433 local $/ = "\n";
2434 $Semaphore::name = `tty`;
2435 chomp $Semaphore::name;
2437 $Semaphore::fg = $opt::fg;
2438 $Semaphore::wait = $opt::wait;
2439 $Global::default_simultaneous_sshlogins = 1;
2440 if(not defined $opt::jobs) {
2441 $opt::jobs = 1;
2443 if($Global::interactive and $opt::bg) {
2444 ::error("Jobs running in the ".
2445 "background cannot be interactive.");
2446 ::wait_and_exit(255);
2451 sub record_env() {
2452 # Record current %ENV-keys in $PARALLEL_HOME/ignored_vars
2453 # Returns: N/A
2454 my $ignore_filename = $Global::config_dir . "/ignored_vars";
2455 if(open(my $vars_fh, ">", $ignore_filename)) {
2456 print $vars_fh map { $_,"\n" } keys %ENV;
2457 } else {
2458 ::error("Cannot write to $ignore_filename.");
2459 ::wait_and_exit(255);
2463 sub open_joblog() {
2464 # Open joblog as specified by --joblog
2465 # Uses:
2466 # $opt::resume
2467 # $opt::resume_failed
2468 # $opt::joblog
2469 # $opt::results
2470 # $Global::job_already_run
2471 # %Global::fd
2472 my $append = 0;
2473 if(($opt::resume or $opt::resume_failed)
2475 not ($opt::joblog or $opt::results)) {
2476 ::error("--resume and --resume-failed require --joblog or --results.");
2477 ::wait_and_exit(255);
2479 if(defined $opt::joblog and $opt::joblog =~ s/^\+//) {
2480 # --joblog +filename = append to filename
2481 $append = 1;
2483 if($opt::joblog
2485 ($opt::sqlmaster
2487 not $opt::sqlworker)) {
2488 # Do not log if --sqlworker
2489 if($opt::resume || $opt::resume_failed || $opt::retry_failed) {
2490 if(open(my $joblog_fh, "<", $opt::joblog)) {
2491 # Read the joblog
2492 # Override $/ with \n because -d might be set
2493 local $/ = "\n";
2494 # If there is a header: Open as append later
2495 $append = <$joblog_fh>;
2496 my $joblog_regexp;
2497 if($opt::retry_failed) {
2498 # Make a regexp that only matches commands with exit+signal=0
2499 # 4 host 1360490623.067 3.445 1023 1222 0 0 command
2500 $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t';
2501 my @group;
2502 while(<$joblog_fh>) {
2503 if(/$joblog_regexp/o) {
2504 # This is 30% faster than set_job_already_run($1);
2505 vec($Global::job_already_run,($1||0),1) = 1;
2506 $Global::total_completed++;
2507 $group[$1-1] = "true";
2508 } elsif(/(\d+)\s+\S+(\s+[-0-9.]+){6}\s+(.*)$/) {
2509 # Grab out the command
2510 $group[$1-1] = $3;
2511 } else {
2512 chomp;
2513 ::error("Format of '$opt::joblog' is wrong: $_");
2514 ::wait_and_exit(255);
2517 if(@group) {
2518 my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg");
2519 unlink($name);
2520 # Put args into argfile
2521 if(grep /\0/, @group) {
2522 # force --null to deal with \n in commandlines
2523 ::warning("Command lines contain newline. Forcing --null.");
2524 $opt::null = 1;
2525 $/ = "\0";
2527 # Replace \0 with '\n' as used in print_joblog()
2528 print $outfh map { s/\0/\n/g; $_,$/ } map { $_ } @group;
2529 seek $outfh, 0, 0;
2530 exit_if_disk_full();
2531 # Set filehandle to -a
2532 @opt::a = ($outfh);
2534 # Remove $command (so -a is run)
2535 @ARGV = ();
2537 if($opt::resume || $opt::resume_failed) {
2538 if($opt::resume_failed) {
2539 # Make a regexp that only matches commands with exit+signal=0
2540 # 4 host 1360490623.067 3.445 1023 1222 0 0 command
2541 $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t';
2542 } else {
2543 # Just match the job number
2544 $joblog_regexp='^(\d+)';
2546 while(<$joblog_fh>) {
2547 if(/$joblog_regexp/o) {
2548 # This is 30% faster than set_job_already_run($1);
2549 vec($Global::job_already_run,($1||0),1) = 1;
2550 $Global::total_completed++;
2551 } elsif(not /\d+\s+[^\s]+\s+([-0-9.]+\s+){6}/) {
2552 ::error("Format of '$opt::joblog' is wrong: $_");
2553 ::wait_and_exit(255);
2557 close $joblog_fh;
2559 # $opt::null may be set if the commands contain \n
2560 if($opt::null) { $/ = "\0"; }
2562 if($opt::dryrun) {
2563 # Do not write to joblog in a dry-run
2564 if(not open($Global::joblog, ">", "/dev/null")) {
2565 ::error("Cannot write to --joblog $opt::joblog.");
2566 ::wait_and_exit(255);
2568 } elsif($append) {
2569 # Append to joblog
2570 if(not open($Global::joblog, ">>", $opt::joblog)) {
2571 ::error("Cannot append to --joblog $opt::joblog.");
2572 ::wait_and_exit(255);
2574 } else {
2575 if($opt::joblog eq "-") {
2576 # Use STDOUT as joblog
2577 $Global::joblog = $Global::fd{1};
2578 } elsif(not open($Global::joblog, ">", $opt::joblog)) {
2579 # Overwrite the joblog
2580 ::error("Cannot write to --joblog $opt::joblog.");
2581 ::wait_and_exit(255);
2583 print $Global::joblog
2584 join("\t", "Seq", "Host", "Starttime", "JobRuntime",
2585 "Send", "Receive", "Exitval", "Signal", "Command"
2586 ). "\n";
2591 sub open_csv() {
2592 if($opt::results) {
2593 # Output as CSV/TSV
2594 if($opt::results eq "-.csv"
2596 $opt::results eq "-.tsv") {
2597 # Output as CSV/TSV on stdout
2598 open $Global::csv_fh, ">&", "STDOUT" or
2599 ::die_bug("Can't dup STDOUT in csv: $!");
2600 # Do not print any other output to STDOUT
2601 # by forcing all other output to /dev/null
2602 open my $fd, ">", "/dev/null" or
2603 ::die_bug("Can't >/dev/null in csv: $!");
2604 $Global::fd{1} = $fd;
2605 $Global::fd{2} = $fd;
2606 } elsif($Global::csvsep) {
2607 if(not open($Global::csv_fh,">",$opt::results)) {
2608 ::error("Cannot open results file `$opt::results': ".
2609 "$!.");
2610 wait_and_exit(255);
2616 sub find_compression_program() {
2617 # Find a fast compression program
2618 # Returns:
2619 # $compress_program = compress program with options
2620 # $decompress_program = decompress program with options
2622 # Search for these. Sorted by speed on 128 core
2624 # seq 120000000|shuf > 1gb &
2625 # apt-get update
2626 # apt install make g++ htop
2627 # wget -O - pi.dk/3 | bash
2628 # apt install zstd clzip liblz4-tool lzop pigz pxz gzip plzip pbzip2 lzma xz-utils lzip bzip2 lbzip2 lrzip pixz
2629 # git clone https://github.com/facebook/zstd.git
2630 # (cd zstd/contrib/pzstd; make -j; cp pzstd /usr/local/bin)
2631 # echo 'lrzip -L $((-$1))' >/usr/local/bin/lrz
2632 # chmod +x /usr/local/bin/lrz
2633 # wait
2634 # onethread="zstd clzip lz4 lzop gzip lzma xz bzip2"
2635 # multithread="pzstd pigz pxz plzip pbzip2 lzip lbzip2 lrz pixz"
2636 # parallel --shuf -j1 --joblog jl-m --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $multithread
2637 # parallel --shuf -j50% --delay 1 --joblog jl-s --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $onethread
2638 # sort -nk4 jl-?
2640 # 1-core:
2641 # 2-cores: pzstd zstd lz4 lzop pigz gzip lbzip2 pbzip2 lrz bzip2 lzma pxz plzip xz lzip clzip
2642 # 4-cores:
2643 # 8-cores: pzstd lz4 zstd pigz lzop lbzip2 pbzip2 gzip lzip lrz plzip pxz bzip2 lzma xz clzip
2644 # 16-cores: pzstd lz4 pigz lzop lbzip2 pbzip2 plzip lzip lrz pxz gzip lzma xz bzip2
2645 # 32-cores: pzstd lbzip2 pbzip2 zstd pigz lz4 lzop plzip lzip lrz gzip pxz lzma bzip2 xz clzip
2646 # 64-cores: pzstd lbzip2 pbzip2 pigz zstd pixz lz4 plzip lzop lzip lrz gzip pxz lzma bzip2 xz clzip
2647 # 128-core: pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip lrz pxz bzip2 lzma xz clzip
2649 my @prg = qw(pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip
2650 lrz pxz bzip2 lzma xz clzip);
2651 for my $p (@prg) {
2652 if(which($p)) {
2653 return ("$p -c -1","$p -dc");
2656 # Fall back to cat
2657 return ("cat","cat");
2660 sub read_options() {
2661 # Read options from command line, profile and $PARALLEL
2662 # Uses:
2663 # $opt::shebang_wrap
2664 # $opt::shebang
2665 # @ARGV
2666 # $opt::plain
2667 # @opt::profile
2668 # $ENV{'HOME'}
2669 # $ENV{'PARALLEL'}
2670 # Returns:
2671 # @ARGV_no_opt = @ARGV without --options
2673 # This must be done first as this may exec myself
2674 if(defined $ARGV[0] and ($ARGV[0] =~ /^--shebang/ or
2675 $ARGV[0] =~ /^--shebang-?wrap/ or
2676 $ARGV[0] =~ /^--hashbang/)) {
2677 # Program is called from #! line in script
2678 # remove --shebang-wrap if it is set
2679 $opt::shebang_wrap = ($ARGV[0] =~ s/^--shebang-?wrap *//);
2680 # remove --shebang if it is set
2681 $opt::shebang = ($ARGV[0] =~ s/^--shebang *//);
2682 # remove --hashbang if it is set
2683 $opt::shebang .= ($ARGV[0] =~ s/^--hashbang *//);
2684 if($opt::shebang) {
2685 my $argfile = Q(pop @ARGV);
2686 # exec myself to split $ARGV[0] into separate fields
2687 exec "$0 --skip-first-line -a $argfile @ARGV";
2689 if($opt::shebang_wrap) {
2690 my @options;
2691 my @parser;
2692 if ($^O eq 'freebsd') {
2693 # FreeBSD's #! puts different values in @ARGV than Linux' does.
2694 my @nooptions = @ARGV;
2695 get_options_from_array(\@nooptions);
2696 while($#ARGV > $#nooptions) {
2697 push @options, shift @ARGV;
2699 while(@ARGV and $ARGV[0] ne ":::") {
2700 push @parser, shift @ARGV;
2702 if(@ARGV and $ARGV[0] eq ":::") {
2703 shift @ARGV;
2705 } else {
2706 @options = shift @ARGV;
2708 my $script = Q(shift @ARGV);
2709 # exec myself to split $ARGV[0] into separate fields
2710 exec "$0 --internal-pipe-means-argfiles @options @parser $script ".
2711 "::: @ARGV";
2714 if($ARGV[0] =~ / --shebang(-?wrap)? /) {
2715 ::warning("--shebang and --shebang-wrap must be the first argument.\n");
2718 Getopt::Long::Configure("bundling","require_order");
2719 my @ARGV_copy = @ARGV;
2720 my @ARGV_orig = @ARGV;
2721 # Check if there is a --profile to set @opt::profile
2722 get_options_from_array(\@ARGV_copy,"profile|J=s","plain") || die_usage();
2723 my @ARGV_profile = ();
2724 my @ARGV_env = ();
2725 if(not $opt::plain) {
2726 # Add options from $PARALLEL_HOME/config and other profiles
2727 my @config_profiles = (
2728 "/etc/parallel/config",
2729 (map { "$_/config" } @Global::config_dirs),
2730 $ENV{'HOME'}."/.parallelrc");
2731 my @profiles = @config_profiles;
2732 if(@opt::profile) {
2733 # --profile overrides default profiles
2734 @profiles = ();
2735 for my $profile (@opt::profile) {
2736 if($profile =~ m:^\./|^/:) {
2737 # Look for ./profile in .
2738 # Look for /profile in /
2739 push @profiles, grep { -r $_ } $profile;
2740 } else {
2741 # Look for the $profile in @Global::config_dirs
2742 push @profiles, grep { -r $_ }
2743 map { "$_/$profile" } @Global::config_dirs;
2747 for my $profile (@profiles) {
2748 if(-r $profile) {
2749 ::debug("init","Read $profile\n");
2750 local $/ = "\n";
2751 open (my $in_fh, "<", $profile) ||
2752 ::die_bug("read-profile: $profile");
2753 while(<$in_fh>) {
2754 /^\s*\#/ and next;
2755 chomp;
2756 push @ARGV_profile, shell_words($_);
2758 close $in_fh;
2759 } else {
2760 if(grep /^$profile$/, @config_profiles) {
2761 # config file is not required to exist
2762 } else {
2763 ::error("$profile not readable.");
2764 wait_and_exit(255);
2768 # Add options from shell variable $PARALLEL
2769 if($ENV{'PARALLEL'}) {
2770 push @ARGV_env, shell_words($ENV{'PARALLEL'});
2772 # Add options from env_parallel.csh via $PARALLEL_CSH
2773 if($ENV{'PARALLEL_CSH'}) {
2774 push @ARGV_env, shell_words($ENV{'PARALLEL_CSH'});
2777 Getopt::Long::Configure("bundling","require_order");
2778 get_options_from_array(\@ARGV_profile) || die_usage();
2779 get_options_from_array(\@ARGV_env) || die_usage();
2780 get_options_from_array(\@ARGV) || die_usage();
2781 # What were the options given on the command line?
2782 # Used to start --sqlworker
2783 my $ai = arrayindex(\@ARGV_orig, \@ARGV);
2784 @Global::options_in_argv = @ARGV_orig[0..$ai-1];
2785 # Prepend non-options to @ARGV (such as commands like 'nice')
2786 unshift @ARGV, @ARGV_profile, @ARGV_env;
2787 return @ARGV;
2790 sub arrayindex() {
2791 # Similar to Perl's index function, but for arrays
2792 # Input:
2793 # $arr_ref1 = ref to @array1 to search in
2794 # $arr_ref2 = ref to @array2 to search for
2795 # Returns:
2796 # $pos = position of @array1 in @array2, -1 if not found
2797 my ($arr_ref1,$arr_ref2) = @_;
2798 my $array1_as_string = join "", map { "\0".$_ } @$arr_ref1;
2799 my $array2_as_string = join "", map { "\0".$_ } @$arr_ref2;
2800 my $i = index($array1_as_string,$array2_as_string,0);
2801 if($i == -1) { return -1 }
2802 my @before = split /\0/, substr($array1_as_string,0,$i);
2803 return $#before;
2806 sub read_args_from_command_line() {
2807 # Arguments given on the command line after:
2808 # ::: ($Global::arg_sep)
2809 # :::: ($Global::arg_file_sep)
2810 # :::+ ($Global::arg_sep with --link)
2811 # ::::+ ($Global::arg_file_sep with --link)
2812 # Removes the arguments from @ARGV and:
2813 # - puts filenames into -a
2814 # - puts arguments into files and add the files to -a
2815 # - adds --linkinputsource with 0/1 for each -a depending on :::+/::::+
2816 # Input:
2817 # @::ARGV = command option ::: arg arg arg :::: argfiles
2818 # Uses:
2819 # $Global::arg_sep
2820 # $Global::arg_file_sep
2821 # $opt::internal_pipe_means_argfiles
2822 # $opt::pipe
2823 # @opt::a
2824 # Returns:
2825 # @argv_no_argsep = @::ARGV without ::: and :::: and following args
2826 my @new_argv = ();
2827 for(my $arg = shift @ARGV; @ARGV; $arg = shift @ARGV) {
2828 if($arg eq $Global::arg_sep
2830 $arg eq $Global::arg_sep."+"
2832 $arg eq $Global::arg_file_sep
2834 $arg eq $Global::arg_file_sep."+") {
2835 my $group_sep = $arg; # This group of arguments is args or argfiles
2836 my @group;
2837 while(defined ($arg = shift @ARGV)) {
2838 if($arg eq $Global::arg_sep
2840 $arg eq $Global::arg_sep."+"
2842 $arg eq $Global::arg_file_sep
2844 $arg eq $Global::arg_file_sep."+") {
2845 # exit while loop if finding new separator
2846 last;
2847 } else {
2848 # If not hitting ::: :::+ :::: or ::::+
2849 # Append it to the group
2850 push @group, $arg;
2853 my $is_linked = ($group_sep =~ /\+$/) ? 1 : 0;
2854 my $is_file = ($group_sep eq $Global::arg_file_sep
2856 $group_sep eq $Global::arg_file_sep."+");
2857 if($is_file) {
2858 # :::: / ::::+
2859 push @opt::linkinputsource, map { $is_linked } @group;
2860 } else {
2861 # ::: / :::+
2862 push @opt::linkinputsource, $is_linked;
2864 if($is_file
2865 or ($opt::internal_pipe_means_argfiles and $opt::pipe)
2867 # Group of file names on the command line.
2868 # Append args into -a
2869 push @opt::a, @group;
2870 } else {
2871 # Group of arguments on the command line.
2872 # Put them into a file.
2873 # Create argfile
2874 my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg");
2875 unlink($name);
2876 # Put args into argfile
2877 print $outfh map { $_,$/ } @group;
2878 seek $outfh, 0, 0;
2879 exit_if_disk_full();
2880 # Append filehandle to -a
2881 push @opt::a, $outfh;
2883 if(defined($arg)) {
2884 # $arg is ::: :::+ :::: or ::::+
2885 # so there is another group
2886 redo;
2887 } else {
2888 # $arg is undef -> @ARGV empty
2889 last;
2892 push @new_argv, $arg;
2894 # Output: @ARGV = command to run with options
2895 return @new_argv;
2898 sub cleanup() {
2899 # Returns: N/A
2900 unlink keys %Global::unlink;
2901 map { rmdir $_ } keys %Global::unlink;
2902 if(@opt::basefile and $opt::cleanup) { cleanup_basefile(); }
2903 for(keys %Global::sshmaster) {
2904 # If 'ssh -M's are running: kill them
2905 kill "TERM", $_;
2910 sub __QUOTING_ARGUMENTS_FOR_SHELL__() {}
2912 sub shell_quote(@) {
2913 # Input:
2914 # @strings = strings to be quoted
2915 # Returns:
2916 # @shell_quoted_strings = string quoted as needed by the shell
2917 return wantarray ? (map { Q($_) } @_) : (join" ",map { Q($_) } @_);
2920 sub shell_quote_scalar_rc($) {
2921 # Quote for the rc-shell
2922 my $a = $_[0];
2923 if(defined $a) {
2924 if(($a =~ s/'/''/g)
2926 ($a =~ s/[\n\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]+/'$&'/go)) {
2927 # A string was replaced
2928 # No need to test for "" or \0
2929 } elsif($a eq "") {
2930 $a = "''";
2931 } elsif($a eq "\0") {
2932 $a = "";
2935 return $a;
2938 sub shell_quote_scalar_csh($) {
2939 # Quote for (t)csh
2940 my $a = $_[0];
2941 if(defined $a) {
2942 # $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g;
2943 # This is 1% faster than the above
2944 if(($a =~ s/[\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]/\\$&/go)
2946 # quote newline in csh as \\\n
2947 ($a =~ s/[\n]/"\\\n"/go)) {
2948 # A string was replaced
2949 # No need to test for "" or \0
2950 } elsif($a eq "") {
2951 $a = "''";
2952 } elsif($a eq "\0") {
2953 $a = "";
2956 return $a;
2959 sub shell_quote_scalar_default($) {
2960 # Quote for other shells (Bourne compatibles)
2961 # Inputs:
2962 # $string = string to be quoted
2963 # Returns:
2964 # $shell_quoted = string quoted as needed by the shell
2965 my $s = $_[0];
2966 if($s =~ /[^-_.+a-z0-9\/]/i) {
2967 $s =~ s/'/'"'"'/g; # "-quote single quotes
2968 $s = "'$s'"; # '-quote entire string
2969 $s =~ s/^''//; # Remove unneeded '' at ends
2970 $s =~ s/''$//; # (faster than s/^''|''$//g)
2971 return $s;
2972 } elsif ($s eq "") {
2973 return "''";
2974 } else {
2975 # No quoting needed
2976 return $s;
2980 sub shell_quote_scalar($) {
2981 # Quote the string so the shell will not expand any special chars
2982 # Inputs:
2983 # $string = string to be quoted
2984 # Returns:
2985 # $shell_quoted = string quoted as needed by the shell
2987 # Speed optimization: Choose the correct shell_quote_scalar_*
2988 # and call that directly from now on
2989 no warnings 'redefine';
2990 if($Global::cshell) {
2991 # (t)csh
2992 *shell_quote_scalar = \&shell_quote_scalar_csh;
2993 } elsif($Global::shell =~ m:(^|/)rc$:) {
2994 # rc-shell
2995 *shell_quote_scalar = \&shell_quote_scalar_rc;
2996 } else {
2997 # other shells
2998 *shell_quote_scalar = \&shell_quote_scalar_default;
3000 # The sub is now redefined. Call it
3001 return shell_quote_scalar($_[0]);
3004 sub Q($) {
3005 # Q alias for ::shell_quote_scalar
3006 my $ret = shell_quote_scalar($_[0]);
3007 no warnings 'redefine';
3008 *Q = \&::shell_quote_scalar;
3009 return $ret;
3012 sub shell_quote_file($) {
3013 # Quote the string so shell will not expand any special chars
3014 # and prepend ./ if needed
3015 # Input:
3016 # $filename = filename to be shell quoted
3017 # Returns:
3018 # $quoted_filename = filename quoted with \ and ./ if needed
3019 my $a = shift;
3020 if(defined $a) {
3021 if($a =~ m:^/: or $a =~ m:^\./:) {
3022 # /abs/path or ./rel/path => skip
3023 } else {
3024 # rel/path => ./rel/path
3025 $a = "./".$a;
3028 return Q($a);
3031 sub shell_words(@) {
3032 # Input:
3033 # $string = shell line
3034 # Returns:
3035 # @shell_words = $string split into words as shell would do
3036 $Global::use{"Text::ParseWords"} ||= eval "use Text::ParseWords; 1;";
3037 return Text::ParseWords::shellwords(@_);
3040 sub perl_quote_scalar($) {
3041 # Quote the string so perl's eval will not expand any special chars
3042 # Inputs:
3043 # $string = string to be quoted
3044 # Returns:
3045 # $perl_quoted = string quoted with \ as needed by perl's eval
3046 my $a = $_[0];
3047 if(defined $a) {
3048 $a =~ s/[\\\"\$\@]/\\$&/go;
3050 return $a;
3053 # -w complains about prototype
3054 sub pQ($) {
3055 # pQ alias for ::perl_quote_scalar
3056 my $ret = perl_quote_scalar($_[0]);
3057 *pQ = \&::perl_quote_scalar;
3058 return $ret;
3061 sub unquote_printf() {
3062 # Convert \t \n \r \000 \0
3063 # Inputs:
3064 # $string = string with \t \n \r \num \0
3065 # Returns:
3066 # $replaced = string with TAB NEWLINE CR <ascii-num> NUL
3067 $_ = shift;
3068 s/\\t/\t/g;
3069 s/\\n/\n/g;
3070 s/\\r/\r/g;
3071 s/\\(\d\d\d)/eval 'sprintf "\\'.$1.'"'/ge;
3072 s/\\(\d)/eval 'sprintf "\\'.$1.'"'/ge;
3073 return $_;
3077 sub __FILEHANDLES__() {}
3080 sub save_stdin_stdout_stderr() {
3081 # Remember the original STDIN, STDOUT and STDERR
3082 # and file descriptors opened by the shell (e.g. 3>/tmp/foo)
3083 # Uses:
3084 # %Global::fd
3085 # $Global::original_stderr
3086 # $Global::original_stdin
3087 # Returns: N/A
3089 # TODO Disabled until we have an open3 that will take n filehandles
3090 # for my $fdno (1..61) {
3091 # # /dev/fd/62 and above are used by bash for <(cmd)
3092 # # Find file descriptors that are already opened (by the shell)
3093 # Only focus on stdout+stderr for now
3094 for my $fdno (1..2) {
3095 my $fh;
3096 # 2-argument-open is used to be compatible with old perl 5.8.0
3097 # bug #43570: Perl 5.8.0 creates 61 files
3098 if(open($fh,">&=$fdno")) {
3099 $Global::fd{$fdno}=$fh;
3102 open $Global::original_stderr, ">&", "STDERR" or
3103 ::die_bug("Can't dup STDERR: $!");
3104 open $Global::status_fd, ">&", "STDERR" or
3105 ::die_bug("Can't dup STDERR: $!");
3106 open $Global::original_stdin, "<&", "STDIN" or
3107 ::die_bug("Can't dup STDIN: $!");
3110 sub enough_file_handles() {
3111 # Check that we have enough filehandles available for starting
3112 # another job
3113 # Uses:
3114 # $opt::ungroup
3115 # %Global::fd
3116 # Returns:
3117 # 1 if ungrouped (thus not needing extra filehandles)
3118 # 0 if too few filehandles
3119 # 1 if enough filehandles
3120 if(not $opt::ungroup) {
3121 my %fh;
3122 my $enough_filehandles = 1;
3123 # perl uses 7 filehandles for something?
3124 # open3 uses 2 extra filehandles temporarily
3125 # We need a filehandle for each redirected file descriptor
3126 # (normally just STDOUT and STDERR)
3127 for my $i (1..(7+2+keys %Global::fd)) {
3128 $enough_filehandles &&= open($fh{$i}, "<", "/dev/null");
3130 for (values %fh) { close $_; }
3131 return $enough_filehandles;
3132 } else {
3133 # Ungrouped does not need extra file handles
3134 return 1;
3138 sub open_or_exit($) {
3139 # Open a file name or exit if the file cannot be opened
3140 # Inputs:
3141 # $file = filehandle or filename to open
3142 # Uses:
3143 # $Global::original_stdin
3144 # Returns:
3145 # $fh = file handle to read-opened file
3146 my $file = shift;
3147 if($file eq "-") {
3148 return ($Global::original_stdin || *STDIN);
3150 if(ref $file eq "GLOB") {
3151 # This is an open filehandle
3152 return $file;
3154 my $fh = gensym;
3155 if(not open($fh, "<", $file)) {
3156 ::error("Cannot open input file `$file': No such file or directory.");
3157 wait_and_exit(255);
3159 return $fh;
3162 sub set_fh_blocking($) {
3163 # Set filehandle as blocking
3164 # Inputs:
3165 # $fh = filehandle to be blocking
3166 # Returns:
3167 # N/A
3168 my $fh = shift;
3169 $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
3170 my $flags;
3171 # Get the current flags on the filehandle
3172 fcntl($fh, &F_GETFL, $flags) || die $!;
3173 # Remove non-blocking from the flags
3174 $flags &= ~&O_NONBLOCK;
3175 # Set the flags on the filehandle
3176 fcntl($fh, &F_SETFL, $flags) || die $!;
3179 sub set_fh_non_blocking($) {
3180 # Set filehandle as non-blocking
3181 # Inputs:
3182 # $fh = filehandle to be blocking
3183 # Returns:
3184 # N/A
3185 my $fh = shift;
3186 $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
3187 my $flags;
3188 # Get the current flags on the filehandle
3189 fcntl($fh, &F_GETFL, $flags) || die $!;
3190 # Add non-blocking to the flags
3191 $flags |= &O_NONBLOCK;
3192 # Set the flags on the filehandle
3193 fcntl($fh, &F_SETFL, $flags) || die $!;
3197 sub __RUNNING_THE_JOBS_AND_PRINTING_PROGRESS__() {}
3200 # Variable structure:
3202 # $Global::running{$pid} = Pointer to Job-object
3203 # @Global::virgin_jobs = Pointer to Job-object that have received no input
3204 # $Global::host{$sshlogin} = Pointer to SSHLogin-object
3205 # $Global::total_running = total number of running jobs
3206 # $Global::total_started = total jobs started
3207 # $Global::max_procs_file = filename if --jobs is given a filename
3208 # $Global::JobQueue = JobQueue object for the queue of jobs
3209 # $Global::timeoutq = queue of times where jobs timeout
3210 # $Global::newest_job = Job object of the most recent job started
3211 # $Global::newest_starttime = timestamp of $Global::newest_job
3212 # @Global::sshlogin
3213 # $Global::minimal_command_line_length = minimum length supported by all sshlogins
3214 # $Global::start_no_new_jobs = should more jobs be started?
3215 # $Global::original_stderr = file handle for STDERR when the program started
3216 # $Global::total_started = total number of jobs started
3217 # $Global::joblog = filehandle of joblog
3218 # $Global::debug = Is debugging on?
3219 # $Global::exitstatus = status code of GNU Parallel
3220 # $Global::quoting = quote the command to run
3222 sub init_run_jobs() {
3223 # Set Global variables and progress signal handlers
3224 # Do the copying of basefiles
3225 # Returns: N/A
3226 $Global::total_running = 0;
3227 $Global::total_started = 0;
3228 $SIG{USR1} = \&list_running_jobs;
3229 $SIG{USR2} = \&toggle_progress;
3230 if(@opt::basefile) { setup_basefile(); }
3234 my $last_time;
3235 my %last_mtime;
3236 my $max_procs_file_last_mod;
3238 sub changed_procs_file {
3239 # If --jobs is a file and it is modfied:
3240 # Force recomputing of max_jobs_running for each $sshlogin
3241 # Uses:
3242 # $Global::max_procs_file
3243 # %Global::host
3244 # Returns: N/A
3245 if($Global::max_procs_file) {
3246 # --jobs filename
3247 my $mtime = (stat($Global::max_procs_file))[9];
3248 $max_procs_file_last_mod ||= 0;
3249 if($mtime > $max_procs_file_last_mod) {
3250 # file changed: Force re-computing max_jobs_running
3251 $max_procs_file_last_mod = $mtime;
3252 for my $sshlogin (values %Global::host) {
3253 $sshlogin->set_max_jobs_running(undef);
3259 sub changed_sshloginfile {
3260 # If --slf is changed:
3261 # reload --slf
3262 # filter_hosts
3263 # setup_basefile
3264 # Uses:
3265 # @opt::sshloginfile
3266 # @Global::sshlogin
3267 # %Global::host
3268 # $opt::filter_hosts
3269 # Returns: N/A
3270 if(@opt::sshloginfile) {
3271 # Is --sshloginfile changed?
3272 for my $slf (@opt::sshloginfile) {
3273 my $actual_file = expand_slf_shorthand($slf);
3274 my $mtime = (stat($actual_file))[9];
3275 $last_mtime{$actual_file} ||= $mtime;
3276 if($mtime - $last_mtime{$actual_file} > 1) {
3277 ::debug("run","--sshloginfile $actual_file changed. reload\n");
3278 $last_mtime{$actual_file} = $mtime;
3279 # Reload $slf
3280 # Empty sshlogins
3281 @Global::sshlogin = ();
3282 for (values %Global::host) {
3283 # Don't start new jobs on any host
3284 # except the ones added back later
3285 $_->set_max_jobs_running(0);
3287 # This will set max_jobs_running on the SSHlogins
3288 read_sshloginfile($actual_file);
3289 parse_sshlogin();
3290 $opt::filter_hosts and filter_hosts();
3291 setup_basefile();
3297 sub start_more_jobs {
3298 # Run start_another_job() but only if:
3299 # * not $Global::start_no_new_jobs set
3300 # * not JobQueue is empty
3301 # * not load on server is too high
3302 # * not server swapping
3303 # * not too short time since last remote login
3304 # Uses:
3305 # %Global::host
3306 # $Global::start_no_new_jobs
3307 # $Global::JobQueue
3308 # $opt::pipe
3309 # $opt::load
3310 # $opt::noswap
3311 # $opt::delay
3312 # $Global::newest_starttime
3313 # Returns:
3314 # $jobs_started = number of jobs started
3315 my $jobs_started = 0;
3316 if($Global::start_no_new_jobs) {
3317 return $jobs_started;
3319 if(time - ($last_time||0) > 1) {
3320 # At most do this every second
3321 $last_time = time;
3322 changed_procs_file();
3323 changed_sshloginfile();
3325 # This will start 1 job on each --sshlogin (if possible)
3326 # thus distribute the jobs on the --sshlogins round robin
3327 for my $sshlogin (values %Global::host) {
3328 if($Global::JobQueue->empty() and not $opt::pipe) {
3329 # No more jobs in the queue
3330 last;
3332 debug("run", "Running jobs before on ", $sshlogin->string(), ": ",
3333 $sshlogin->jobs_running(), "\n");
3334 if ($sshlogin->jobs_running() < $sshlogin->max_jobs_running()) {
3335 if($opt::delay
3337 $opt::delay - 0.008 > ::now() - $Global::newest_starttime) {
3338 # It has been too short since last start
3339 next;
3341 if($opt::load and $sshlogin->loadavg_too_high()) {
3342 # The load is too high or unknown
3343 next;
3345 if($opt::noswap and $sshlogin->swapping()) {
3346 # The server is swapping
3347 next;
3349 if($opt::limit and $sshlogin->limit()) {
3350 # Over limit
3351 next;
3353 if(($opt::memfree or $opt::memsuspend)
3355 $sshlogin->memfree() < $Global::memlimit) {
3356 # The server has not enough mem free
3357 ::debug("mem", "Not starting job: not enough mem\n");
3358 next;
3360 if($sshlogin->too_fast_remote_login()) {
3361 # It has been too short since
3362 next;
3364 debug("run", $sshlogin->string(),
3365 " has ", $sshlogin->jobs_running(),
3366 " out of ", $sshlogin->max_jobs_running(),
3367 " jobs running. Start another.\n");
3368 if(start_another_job($sshlogin) == 0) {
3369 # No more jobs to start on this $sshlogin
3370 debug("run","No jobs started on ",
3371 $sshlogin->string(), "\n");
3372 next;
3374 $sshlogin->inc_jobs_running();
3375 $sshlogin->set_last_login_at(::now());
3376 $jobs_started++;
3378 debug("run","Running jobs after on ", $sshlogin->string(), ": ",
3379 $sshlogin->jobs_running(), " of ",
3380 $sshlogin->max_jobs_running(), "\n");
3383 return $jobs_started;
3388 my $no_more_file_handles_warned;
3390 sub start_another_job() {
3391 # If there are enough filehandles
3392 # and JobQueue not empty
3393 # and not $job is in joblog
3394 # Then grab a job from Global::JobQueue,
3395 # start it at sshlogin
3396 # mark it as virgin_job
3397 # Inputs:
3398 # $sshlogin = the SSHLogin to start the job on
3399 # Uses:
3400 # $Global::JobQueue
3401 # $opt::pipe
3402 # $opt::results
3403 # $opt::resume
3404 # @Global::virgin_jobs
3405 # Returns:
3406 # 1 if another jobs was started
3407 # 0 otherwise
3408 my $sshlogin = shift;
3409 # Do we have enough file handles to start another job?
3410 if(enough_file_handles()) {
3411 if($Global::JobQueue->empty() and not $opt::pipe) {
3412 # No more commands to run
3413 debug("start", "Not starting: JobQueue empty\n");
3414 return 0;
3415 } else {
3416 my $job;
3417 # Skip jobs already in job log
3418 # Skip jobs already in results
3419 do {
3420 $job = get_job_with_sshlogin($sshlogin);
3421 if(not defined $job) {
3422 # No command available for that sshlogin
3423 debug("start", "Not starting: no jobs available for ",
3424 $sshlogin->string(), "\n");
3425 return 0;
3427 if($job->is_already_in_joblog()) {
3428 $job->free_slot();
3430 } while ($job->is_already_in_joblog()
3432 ($opt::results and $opt::resume and $job->is_already_in_results()));
3433 debug("start", "Command to run on '", $job->sshlogin()->string(), "': '",
3434 $job->replaced(),"'\n");
3435 if($job->start()) {
3436 if($opt::pipe) {
3437 if($job->virgin()) {
3438 push(@Global::virgin_jobs,$job);
3439 } else {
3440 # Block already set: This is a retry
3441 if(fork()) {
3442 ::debug("pipe","\n\nWriting ",length ${$job->block_ref()},
3443 " to ", $job->seq(),"\n");
3444 close $job->fh(0,"w");
3445 } else {
3446 $job->write($job->block_ref());
3447 close $job->fh(0,"w");
3448 exit(0);
3452 debug("start", "Started as seq ", $job->seq(),
3453 " pid:", $job->pid(), "\n");
3454 return 1;
3455 } else {
3456 # Not enough processes to run the job.
3457 # Put it back on the queue.
3458 $Global::JobQueue->unget($job);
3459 # Count down the number of jobs to run for this SSHLogin.
3460 my $max = $sshlogin->max_jobs_running();
3461 if($max > 1) { $max--; } else {
3462 my @arg;
3463 for my $record (@{$job->{'commandline'}->{'arg_list'}}) {
3464 push @arg, map { $_->orig() } @$record;
3466 ::error("No more processes: cannot run a single job. Something is wrong at @arg.");
3467 ::wait_and_exit(255);
3469 $sshlogin->set_max_jobs_running($max);
3470 # Sleep up to 300 ms to give other processes time to die
3471 ::usleep(rand()*300);
3472 ::warning("No more processes: ".
3473 "Decreasing number of running jobs to $max.",
3474 "Raising ulimit -u or /etc/security/limits.conf may help.");
3475 return 0;
3478 } else {
3479 # No more file handles
3480 $no_more_file_handles_warned++ or
3481 ::warning("No more file handles. ",
3482 "Raising ulimit -n or /etc/security/limits.conf may help.");
3483 debug("start", "No more file handles. ");
3484 return 0;
3489 sub init_progress() {
3490 # Uses:
3491 # $opt::bar
3492 # Returns:
3493 # list of computers for progress output
3494 $|=1;
3495 if($opt::bar) {
3496 return("","");
3498 my %progress = progress();
3499 return ("\nComputers / CPU cores / Max jobs to run\n",
3500 $progress{'workerlist'});
3503 sub drain_job_queue(@) {
3504 # Uses:
3505 # $opt::progress
3506 # $Global::total_running
3507 # $Global::max_jobs_running
3508 # %Global::running
3509 # $Global::JobQueue
3510 # %Global::host
3511 # $Global::start_no_new_jobs
3512 # Returns: N/A
3513 my @command = @_;
3514 if($opt::progress) {
3515 ::status_no_nl(init_progress());
3517 my $last_header = "";
3518 my $sleep = 0.2;
3519 do {
3520 while($Global::total_running > 0) {
3521 debug("init",$Global::total_running, "==", scalar
3522 keys %Global::running," slots: ", $Global::max_jobs_running);
3523 if($opt::pipe) {
3524 # When using --pipe sometimes file handles are not
3525 # closed properly
3526 for my $job (values %Global::running) {
3527 close $job->fh(0,"w");
3530 if($opt::progress) {
3531 my %progress = progress();
3532 if($last_header ne $progress{'header'}) {
3533 ::status("", $progress{'header'});
3534 $last_header = $progress{'header'};
3536 ::status_no_nl("\r",$progress{'status'});
3538 if($Global::total_running < $Global::max_jobs_running
3539 and not $Global::JobQueue->empty()) {
3540 # These jobs may not be started because of loadavg
3541 # or too little time between each ssh login.
3542 if(start_more_jobs() > 0) {
3543 # Exponential back-on if jobs were started
3544 $sleep = $sleep/2+0.001;
3547 # Exponential back-off sleeping
3548 $sleep = ::reap_usleep($sleep);
3550 if(not $Global::JobQueue->empty()) {
3551 # These jobs may not be started:
3552 # * because there the --filter-hosts has removed all
3553 if(not %Global::host) {
3554 ::error("There are no hosts left to run on.");
3555 ::wait_and_exit(255);
3557 # * because of loadavg
3558 # * because of too little time between each ssh login.
3559 $sleep = ::reap_usleep($sleep);
3560 start_more_jobs();
3561 if($Global::max_jobs_running == 0) {
3562 ::warning("There are no job slots available. Increase --jobs.");
3565 while($opt::sqlmaster and not $Global::sql->finished()) {
3566 # SQL master
3567 $sleep = ::reap_usleep($sleep);
3568 start_more_jobs();
3569 if($Global::start_sqlworker) {
3570 # Start an SQL worker as we are now sure there is work to do
3571 $Global::start_sqlworker = 0;
3572 if(my $pid = fork()) {
3573 $Global::unkilled_sqlworker = $pid;
3574 } else {
3575 # Replace --sql/--sqlandworker with --sqlworker
3576 my @ARGV = map { s/^--sql(andworker)?$/--sqlworker/; $_ } @Global::options_in_argv;
3577 # exec the --sqlworker
3578 exec($0,@ARGV,@command);
3582 } while ($Global::total_running > 0
3584 not $Global::start_no_new_jobs and not $Global::JobQueue->empty()
3586 $opt::sqlmaster and not $Global::sql->finished());
3587 if($opt::progress) {
3588 my %progress = progress();
3589 ::status("\r".$progress{'status'});
3593 sub toggle_progress() {
3594 # Turn on/off progress view
3595 # Uses:
3596 # $opt::progress
3597 # Returns: N/A
3598 $opt::progress = not $opt::progress;
3599 if($opt::progress) {
3600 ::status_no_nl(init_progress());
3604 sub progress() {
3605 # Uses:
3606 # $opt::bar
3607 # $opt::eta
3608 # %Global::host
3609 # $Global::total_started
3610 # Returns:
3611 # $workerlist = list of workers
3612 # $header = that will fit on the screen
3613 # $status = message that will fit on the screen
3614 if($opt::bar) {
3615 return ("workerlist" => "", "header" => "", "status" => bar());
3617 my $eta = "";
3618 my ($status,$header)=("","");
3619 if($opt::eta) {
3620 my($total, $completed, $left, $pctcomplete, $avgtime, $this_eta) =
3621 compute_eta();
3622 $eta = sprintf("ETA: %ds Left: %d AVG: %.2fs ",
3623 $this_eta, $left, $avgtime);
3625 my $termcols = terminal_columns();
3626 my @workers = sort keys %Global::host;
3627 my %sshlogin = map { $_ eq ":" ? ($_ => "local") : ($_ => $_) } @workers;
3628 my $workerno = 1;
3629 my %workerno = map { ($_=>$workerno++) } @workers;
3630 my $workerlist = "";
3631 for my $w (@workers) {
3632 $workerlist .=
3633 $workerno{$w}.":".$sshlogin{$w} ." / ".
3634 ($Global::host{$w}->ncpus() || "-")." / ".
3635 $Global::host{$w}->max_jobs_running()."\n";
3637 $status = "x"x($termcols+1);
3638 # Select an output format that will fit on a single line
3639 if(length $status > $termcols) {
3640 # sshlogin1:XX/XX/XX%/XX.Xs sshlogin2:XX/XX/XX%/XX.Xs sshlogin3:XX/XX/XX%/XX.Xs
3641 $header = "Computer:jobs running/jobs completed/%of started jobs/Average seconds to complete";
3642 $status = $eta .
3643 join(" ",map
3645 if($Global::total_started) {
3646 my $completed = ($Global::host{$_}->jobs_completed()||0);
3647 my $running = $Global::host{$_}->jobs_running();
3648 my $time = $completed ? (time-$^T)/($completed) : "0";
3649 sprintf("%s:%d/%d/%d%%/%.1fs ",
3650 $sshlogin{$_}, $running, $completed,
3651 ($running+$completed)*100
3652 / $Global::total_started, $time);
3654 } @workers);
3656 if(length $status > $termcols) {
3657 # 1:XX/XX/XX%/XX.Xs 2:XX/XX/XX%/XX.Xs 3:XX/XX/XX%/XX.Xs 4:XX/XX/XX%/XX.Xs
3658 $header = "Computer:jobs running/jobs completed/%of started jobs";
3659 $status = $eta .
3660 join(" ",map
3662 if($Global::total_started) {
3663 my $completed = ($Global::host{$_}->jobs_completed()||0);
3664 my $running = $Global::host{$_}->jobs_running();
3665 my $time = $completed ? (time-$^T)/($completed) : "0";
3666 sprintf("%s:%d/%d/%d%%/%.1fs ",
3667 $workerno{$_}, $running, $completed,
3668 ($running+$completed)*100
3669 / $Global::total_started, $time);
3671 } @workers);
3673 if(length $status > $termcols) {
3674 # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX/XX%
3675 $header = "Computer:jobs running/jobs completed/%of started jobs";
3676 $status = $eta .
3677 join(" ",map
3679 if($Global::total_started) {
3680 sprintf("%s:%d/%d/%d%%",
3681 $sshlogin{$_},
3682 $Global::host{$_}->jobs_running(),
3683 ($Global::host{$_}->jobs_completed()||0),
3684 ($Global::host{$_}->jobs_running()+
3685 ($Global::host{$_}->jobs_completed()||0))*100
3686 / $Global::total_started)
3689 @workers);
3691 if(length $status > $termcols) {
3692 # 1:XX/XX/XX% 2:XX/XX/XX% 3:XX/XX/XX% 4:XX/XX/XX% 5:XX/XX/XX% 6:XX/XX/XX%
3693 $header = "Computer:jobs running/jobs completed/%of started jobs";
3694 $status = $eta .
3695 join(" ",map
3697 if($Global::total_started) {
3698 sprintf("%s:%d/%d/%d%%",
3699 $workerno{$_},
3700 $Global::host{$_}->jobs_running(),
3701 ($Global::host{$_}->jobs_completed()||0),
3702 ($Global::host{$_}->jobs_running()+
3703 ($Global::host{$_}->jobs_completed()||0))*100
3704 / $Global::total_started)
3707 @workers);
3709 if(length $status > $termcols) {
3710 # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX sshlogin4:XX/XX
3711 $header = "Computer:jobs running/jobs completed";
3712 $status = $eta .
3713 join(" ",map
3714 { sprintf("%s:%d/%d",
3715 $sshlogin{$_}, $Global::host{$_}->jobs_running(),
3716 ($Global::host{$_}->jobs_completed()||0)) }
3717 @workers);
3719 if(length $status > $termcols) {
3720 # sshlogin1:XX/XX sshlogin2:XX/XX sshlogin3:XX/XX sshlogin4:XX/XX
3721 $header = "Computer:jobs running/jobs completed";
3722 $status = $eta .
3723 join(" ",map
3724 { sprintf("%s:%d/%d",
3725 $sshlogin{$_}, $Global::host{$_}->jobs_running(),
3726 ($Global::host{$_}->jobs_completed()||0)) }
3727 @workers);
3729 if(length $status > $termcols) {
3730 # 1:XX/XX 2:XX/XX 3:XX/XX 4:XX/XX 5:XX/XX 6:XX/XX
3731 $header = "Computer:jobs running/jobs completed";
3732 $status = $eta .
3733 join(" ",map
3734 { sprintf("%s:%d/%d",
3735 $workerno{$_}, $Global::host{$_}->jobs_running(),
3736 ($Global::host{$_}->jobs_completed()||0)) }
3737 @workers);
3739 if(length $status > $termcols) {
3740 # sshlogin1:XX sshlogin2:XX sshlogin3:XX sshlogin4:XX sshlogin5:XX
3741 $header = "Computer:jobs completed";
3742 $status = $eta .
3743 join(" ",map
3744 { sprintf("%s:%d",
3745 $sshlogin{$_},
3746 ($Global::host{$_}->jobs_completed()||0)) }
3747 @workers);
3749 if(length $status > $termcols) {
3750 # 1:XX 2:XX 3:XX 4:XX 5:XX 6:XX
3751 $header = "Computer:jobs completed";
3752 $status = $eta .
3753 join(" ",map
3754 { sprintf("%s:%d",
3755 $workerno{$_},
3756 ($Global::host{$_}->jobs_completed()||0)) }
3757 @workers);
3759 return ("workerlist" => $workerlist, "header" => $header, "status" => $status);
3764 my ($first_completed, $smoothed_avg_time, $last_eta);
3766 sub compute_eta {
3767 # Calculate important numbers for ETA
3768 # Returns:
3769 # $total = number of jobs in total
3770 # $completed = number of jobs completed
3771 # $left = number of jobs left
3772 # $pctcomplete = percent of jobs completed
3773 # $avgtime = averaged time
3774 # $eta = smoothed eta
3775 my $completed = $Global::total_completed;
3776 # In rare cases with -X will $completed > total_jobs()
3777 my $total = ::max($Global::JobQueue->total_jobs(),$completed);
3778 my $left = $total - $completed;
3779 if(not $completed) {
3780 return($total, $completed, $left, 0, 0, 0);
3782 my $pctcomplete = ::min($completed / $total,100);
3783 $first_completed ||= time;
3784 my $timepassed = (time - $first_completed);
3785 my $avgtime = $timepassed / $completed;
3786 $smoothed_avg_time ||= $avgtime;
3787 # Smooth the eta so it does not jump wildly
3788 $smoothed_avg_time = (1 - $pctcomplete) * $smoothed_avg_time +
3789 $pctcomplete * $avgtime;
3790 my $eta = int($left * $smoothed_avg_time);
3791 if($eta*0.90 < $last_eta and $last_eta < $eta) {
3792 # Eta jumped less that 10% up: Keep the last eta instead
3793 $eta = $last_eta;
3794 } else {
3795 $last_eta = $eta;
3797 return($total, $completed, $left, $pctcomplete, $avgtime, $eta);
3802 my ($rev,$reset);
3804 sub bar() {
3805 # Return:
3806 # $status = bar with eta, completed jobs, arg and pct
3807 $rev ||= "\033[7m";
3808 $reset ||= "\033[0m";
3809 my($total, $completed, $left, $pctcomplete, $avgtime, $eta) =
3810 compute_eta();
3811 my $arg = $Global::newest_job ?
3812 $Global::newest_job->{'commandline'}->
3813 replace_placeholders(["\257<\257>"],0,0) : "";
3814 # These chars mess up display in the terminal
3815 $arg =~ tr/[\011-\016\033\302-\365]//d;
3816 my $eta_dhms = ::seconds_to_time_units($eta);
3817 my $bar_text =
3818 sprintf("%d%% %d:%d=%s %s",
3819 $pctcomplete*100, $completed, $left, $eta_dhms, $arg);
3820 my $terminal_width = terminal_columns();
3821 my $s = sprintf("%-${terminal_width}s",
3822 substr($bar_text." "x$terminal_width,
3823 0,$terminal_width));
3824 my $width = int($terminal_width * $pctcomplete);
3825 substr($s,$width,0) = $reset;
3826 my $zenity = sprintf("%-${terminal_width}s",
3827 substr("# $eta sec $arg",
3828 0,$terminal_width));
3829 $s = "\r" . $zenity . "\r" . $pctcomplete*100 . # Prefix with zenity header
3830 "\r" . $rev . $s . $reset;
3831 return $s;
3836 my ($columns,$last_column_time);
3838 sub terminal_columns() {
3839 # Get the number of columns of the terminal.
3840 # Only update once per second.
3841 # Returns:
3842 # number of columns of the screen
3843 if(not $columns or $last_column_time < time) {
3844 $last_column_time = time;
3845 $columns = $ENV{'COLUMNS'};
3846 if(not $columns) {
3847 # && true is to force spawning a shell and not just exec'ing
3848 my $stty = qx{stty -a </dev/tty 2>/dev/null && true};
3849 # FreeBSD/OpenBSD/NetBSD/Dragonfly/MirOS
3850 # MacOSX/IRIX/AIX/Tru64
3851 $stty =~ /(\d+) columns/ and do { $columns = $1; };
3852 # GNU/Linux/Solaris
3853 $stty =~ /columns (\d+)/ and do { $columns = $1; };
3854 # Solaris-x86/HPUX/SCOsysV/UnixWare/OpenIndiana
3855 $stty =~ /columns = (\d+)/ and do { $columns = $1; };
3856 # QNX
3857 $stty =~ /rows=\d+,(\d+)/ and do { $columns = $1; };
3859 if(not $columns) {
3860 # && true is to force spawning a shell and not just exec'ing
3861 my $resize = qx{resize 2>/dev/null && true};
3862 $resize =~ /COLUMNS=(\d+);/ and do { $columns = $1; };
3864 $columns ||= 80;
3866 return $columns;
3870 # Prototype forwarding
3871 sub get_job_with_sshlogin($);
3872 sub get_job_with_sshlogin($) {
3873 # Input:
3874 # $sshlogin = which host should the job be run on?
3875 # Uses:
3876 # $opt::hostgroups
3877 # $Global::JobQueue
3878 # Returns:
3879 # $job = next job object for $sshlogin if any available
3880 my $sshlogin = shift;
3881 my $job;
3883 if ($opt::hostgroups) {
3884 my @other_hostgroup_jobs = ();
3886 while($job = $Global::JobQueue->get()) {
3887 if($sshlogin->in_hostgroups($job->hostgroups())) {
3888 # Found a job to be run on a hostgroup of this
3889 # $sshlogin
3890 last;
3891 } else {
3892 # This job was not in the hostgroups of $sshlogin
3893 push @other_hostgroup_jobs, $job;
3896 $Global::JobQueue->unget(@other_hostgroup_jobs);
3897 if(not defined $job) {
3898 # No more jobs
3899 return undef;
3901 } else {
3902 $job = $Global::JobQueue->get();
3903 if(not defined $job) {
3904 # No more jobs
3905 ::debug("start", "No more jobs: JobQueue empty\n");
3906 return undef;
3909 if(not $job->suspended()) {
3910 $job->set_sshlogin($sshlogin);
3912 if($opt::retries and $job->failed_here()) {
3913 # This command with these args failed for this sshlogin
3914 my ($no_of_failed_sshlogins,$min_failures) = $job->min_failed();
3915 # Only look at the Global::host that have > 0 jobslots
3916 if($no_of_failed_sshlogins ==
3917 grep { $_->max_jobs_running() > 0 } values %Global::host
3918 and $job->failed_here() == $min_failures) {
3919 # It failed the same or more times on another host:
3920 # run it on this host
3921 } else {
3922 # If it failed fewer times on another host:
3923 # Find another job to run
3924 my $nextjob;
3925 if(not $Global::JobQueue->empty()) {
3926 # This can potentially recurse for all args
3927 no warnings 'recursion';
3928 $nextjob = get_job_with_sshlogin($sshlogin);
3930 # Push the command back on the queue
3931 $Global::JobQueue->unget($job);
3932 return $nextjob;
3935 return $job;
3939 sub __REMOTE_SSH__() {}
3942 sub read_sshloginfiles(@) {
3943 # Read a list of --slf's
3944 # Input:
3945 # @files = files or symbolic file names to read
3946 # Returns: N/A
3947 for my $s (@_) {
3948 read_sshloginfile(expand_slf_shorthand($s));
3952 sub expand_slf_shorthand($) {
3953 # Expand --slf shorthand into a read file name
3954 # Input:
3955 # $file = file or symbolic file name to read
3956 # Returns:
3957 # $file = actual file name to read
3958 my $file = shift;
3959 if($file eq "-") {
3960 # skip: It is stdin
3961 } elsif($file eq "..") {
3962 $file = $Global::config_dir."/sshloginfile";
3963 } elsif($file eq ".") {
3964 $file = "/etc/parallel/sshloginfile";
3965 } elsif(not -r $file) {
3966 for(@Global::config_dirs) {
3967 if(not -r $_."/".$file) {
3968 # Try prepending $PARALLEL_HOME
3969 ::error("Cannot open $file.");
3970 ::wait_and_exit(255);
3971 } else {
3972 $file = $_."/".$file;
3973 last;
3977 return $file;
3980 sub read_sshloginfile($) {
3981 # Read sshloginfile into @Global::sshlogin
3982 # Input:
3983 # $file = file to read
3984 # Uses:
3985 # @Global::sshlogin
3986 # Returns: N/A
3987 local $/ = "\n";
3988 my $file = shift;
3989 my $close = 1;
3990 my $in_fh;
3991 ::debug("init","--slf ",$file);
3992 if($file eq "-") {
3993 $in_fh = *STDIN;
3994 $close = 0;
3995 } else {
3996 if(not open($in_fh, "<", $file)) {
3997 # Try the filename
3998 ::error("Cannot open $file.");
3999 ::wait_and_exit(255);
4002 while(<$in_fh>) {
4003 chomp;
4004 /^\s*#/ and next;
4005 /^\s*$/ and next;
4006 push @Global::sshlogin, $_;
4008 if($close) {
4009 close $in_fh;
4013 sub parse_sshlogin() {
4014 # Parse @Global::sshlogin into %Global::host.
4015 # Keep only hosts that are in one of the given ssh hostgroups.
4016 # Uses:
4017 # @Global::sshlogin
4018 # $Global::minimal_command_line_length
4019 # %Global::host
4020 # $opt::transfer
4021 # @opt::return
4022 # $opt::cleanup
4023 # @opt::basefile
4024 # @opt::trc
4025 # Returns: N/A
4026 my @login;
4027 if(not @Global::sshlogin) { @Global::sshlogin = (":"); }
4028 for my $sshlogin (@Global::sshlogin) {
4029 # Split up -S sshlogin,sshlogin
4030 for my $s (split /,|\n/, $sshlogin) {
4031 if ($s eq ".." or $s eq "-") {
4032 # This may add to @Global::sshlogin - possibly bug
4033 read_sshloginfile(expand_slf_shorthand($s));
4034 } else {
4035 $s =~ s/\s*$//;
4036 push (@login, $s);
4040 $Global::minimal_command_line_length = 100_000_000;
4041 my @allowed_hostgroups;
4042 for my $ncpu_sshlogin_string (::uniq(@login)) {
4043 my $sshlogin = SSHLogin->new($ncpu_sshlogin_string);
4044 my $sshlogin_string = $sshlogin->string();
4045 if($sshlogin_string eq "") {
4046 # This is an ssh group: -S @webservers
4047 push @allowed_hostgroups, $sshlogin->hostgroups();
4048 next;
4050 if($Global::host{$sshlogin_string}) {
4051 # This sshlogin has already been added:
4052 # It is probably a host that has come back
4053 # Set the max_jobs_running back to the original
4054 debug("run","Already seen $sshlogin_string\n");
4055 if($sshlogin->{'ncpus'}) {
4056 # If ncpus set by '#/' of the sshlogin, overwrite it:
4057 $Global::host{$sshlogin_string}->set_ncpus($sshlogin->ncpus());
4059 $Global::host{$sshlogin_string}->set_max_jobs_running(undef);
4060 next;
4062 $sshlogin->set_maxlength(Limits::Command::max_length());
4064 $Global::minimal_command_line_length =
4065 ::min($Global::minimal_command_line_length, $sshlogin->maxlength());
4066 $Global::host{$sshlogin_string} = $sshlogin;
4068 if(@allowed_hostgroups) {
4069 # Remove hosts that are not in these groups
4070 while (my ($string, $sshlogin) = each %Global::host) {
4071 if(not $sshlogin->in_hostgroups(@allowed_hostgroups)) {
4072 delete $Global::host{$string};
4077 # debug("start", "sshlogin: ", my_dump(%Global::host),"\n");
4078 if(@Global::transfer_files or @opt::return or $opt::cleanup or @opt::basefile) {
4079 if(not remote_hosts()) {
4080 # There are no remote hosts
4081 if(@opt::trc) {
4082 ::warning("--trc ignored as there are no remote --sshlogin.");
4083 } elsif (defined $opt::transfer) {
4084 ::warning("--transfer ignored as there are no remote --sshlogin.");
4085 } elsif (@opt::transfer_files) {
4086 ::warning("--transferfile ignored as there are no remote --sshlogin.");
4087 } elsif (@opt::return) {
4088 ::warning("--return ignored as there are no remote --sshlogin.");
4089 } elsif (defined $opt::cleanup) {
4090 ::warning("--cleanup ignored as there are no remote --sshlogin.");
4091 } elsif (@opt::basefile) {
4092 ::warning("--basefile ignored as there are no remote --sshlogin.");
4098 sub remote_hosts() {
4099 # Return sshlogins that are not ':'
4100 # Uses:
4101 # %Global::host
4102 # Returns:
4103 # list of sshlogins with ':' removed
4104 return grep !/^:$/, keys %Global::host;
4107 sub setup_basefile() {
4108 # Transfer basefiles to each $sshlogin
4109 # This needs to be done before first jobs on $sshlogin is run
4110 # Uses:
4111 # %Global::host
4112 # @opt::basefile
4113 # Returns: N/A
4114 my @cmd;
4115 my $rsync_destdir;
4116 my $workdir;
4117 for my $sshlogin (values %Global::host) {
4118 if($sshlogin->string() eq ":") { next }
4119 for my $file (@opt::basefile) {
4120 if($file !~ m:^/: and $opt::workdir eq "...") {
4121 ::error("Work dir '...' will not work with relative basefiles.");
4122 ::wait_and_exit(255);
4124 if(not $workdir) {
4125 my $dummycmdline = CommandLine->new(1,["true"],{},0,0,[],[],{},{},{});
4126 my $dummyjob = Job->new($dummycmdline);
4127 $workdir = $dummyjob->workdir();
4129 push @cmd, $sshlogin->rsync_transfer_cmd($file,$workdir);
4132 debug("init", "basesetup: @cmd\n");
4133 my ($exitstatus,$stdout_ref,$stderr_ref) =
4134 run_gnu_parallel((join "\n",@cmd),"-j0","--retries",5);
4135 if($exitstatus) {
4136 my @stdout = @$stdout_ref;
4137 my @stderr = @$stderr_ref;
4138 ::error("Copying of --basefile failed: @stdout@stderr");
4139 ::wait_and_exit(255);
4143 sub cleanup_basefile() {
4144 # Remove the basefiles transferred
4145 # Uses:
4146 # %Global::host
4147 # @opt::basefile
4148 # Returns: N/A
4149 my @cmd;
4150 my $workdir;
4151 if(not $workdir) {
4152 my $dummycmdline = CommandLine->new(1,"true",0,0,0,0,0,{},{},{});
4153 my $dummyjob = Job->new($dummycmdline);
4154 $workdir = $dummyjob->workdir();
4156 for my $sshlogin (values %Global::host) {
4157 if($sshlogin->string() eq ":") { next }
4158 for my $file (@opt::basefile) {
4159 push @cmd, $sshlogin->cleanup_cmd($file,$workdir);
4162 debug("init", "basecleanup: @cmd\n");
4163 my ($exitstatus,$stdout_ref,$stderr_ref) =
4164 run_gnu_parallel(join("\n",@cmd),"-j0","--retries",5);
4165 if($exitstatus) {
4166 my @stdout = @$stdout_ref;
4167 my @stderr = @$stderr_ref;
4168 ::error("Cleanup of --basefile failed: @stdout@stderr");
4169 ::wait_and_exit(255);
4173 sub run_gnu_parallel() {
4174 my ($stdin,@args) = @_;
4175 my $cmd = join "",map { " $_ & " } split /\n/, $stdin;
4176 print $Global::original_stderr ` $cmd wait` ;
4177 return 0
4180 sub _run_gnu_parallel() {
4181 # Run GNU Parallel
4182 # This should ideally just fork an internal copy
4183 # and not start it through a shell
4184 # Input:
4185 # $stdin = data to provide on stdin for GNU Parallel
4186 # @args = command line arguments
4187 # Returns:
4188 # $exitstatus = exitcode of GNU Parallel run
4189 # \@stdout = standard output
4190 # \@stderr = standard error
4191 my ($stdin,@args) = @_;
4192 my ($exitstatus,@stdout,@stderr);
4193 my ($stdin_fh,$stdout_fh)=(gensym(),gensym());
4194 my ($stderr_fh, $stderrname) = ::tmpfile(SUFFIX => ".par");
4195 unlink $stderrname;
4197 my $pid = ::open3($stdin_fh,$stdout_fh,$stderr_fh,
4198 $0,qw(--plain --shell /bin/sh --will-cite), @args);
4199 if(my $writerpid = fork()) {
4200 close $stdin_fh;
4201 @stdout = <$stdout_fh>;
4202 # Now stdout is closed:
4203 # These pids should be dead or die very soon
4204 while(kill 0, $writerpid) { ::usleep(1); }
4205 die;
4206 # reap $writerpid;
4207 # while(kill 0, $pid) { ::usleep(1); }
4208 # reap $writerpid;
4209 $exitstatus = $?;
4210 seek $stderr_fh, 0, 0;
4211 @stderr = <$stderr_fh>;
4212 close $stdout_fh;
4213 close $stderr_fh;
4214 } else {
4215 close $stdout_fh;
4216 close $stderr_fh;
4217 print $stdin_fh $stdin;
4218 close $stdin_fh;
4219 exit(0);
4221 return ($exitstatus,\@stdout,\@stderr);
4224 sub filter_hosts() {
4225 # Remove down --sshlogins from active duty.
4226 # Find ncpus, ncores, maxlen, time-to-login for each host.
4227 # Uses:
4228 # %Global::host
4229 # $Global::minimal_command_line_length
4230 # $opt::use_sockets_instead_of_threads
4231 # $opt::use_cores_instead_of_threads
4232 # $opt::use_cpus_instead_of_cores
4233 # Returns: N/A
4235 my ($nsockets_ref,$ncores_ref, $nthreads_ref, $time_to_login_ref,
4236 $maxlen_ref, $echo_ref, $down_hosts_ref) =
4237 parse_host_filtering(parallelized_host_filtering());
4239 delete @Global::host{@$down_hosts_ref};
4240 @$down_hosts_ref and ::warning("Removed @$down_hosts_ref.");
4242 $Global::minimal_command_line_length = 100_000_000;
4243 while (my ($sshlogin, $obj) = each %Global::host) {
4244 if($sshlogin eq ":") { next }
4245 $nsockets_ref->{$sshlogin} or
4246 ::die_bug("nsockets missing: ".$obj->serverlogin());
4247 $ncores_ref->{$sshlogin} or
4248 ::die_bug("ncores missing: ".$obj->serverlogin());
4249 $nthreads_ref->{$sshlogin} or
4250 ::die_bug("nthreads missing: ".$obj->serverlogin());
4251 $time_to_login_ref->{$sshlogin} or
4252 ::die_bug("time_to_login missing: ".$obj->serverlogin());
4253 $maxlen_ref->{$sshlogin} or
4254 ::die_bug("maxlen missing: ".$obj->serverlogin());
4255 $obj->set_ncpus($nthreads_ref->{$sshlogin});
4256 if($opt::use_cpus_instead_of_cores) {
4257 $obj->set_ncpus($ncores_ref->{$sshlogin});
4258 } elsif($opt::use_sockets_instead_of_threads) {
4259 $obj->set_ncpus($nsockets_ref->{$sshlogin});
4260 } elsif($opt::use_cores_instead_of_threads) {
4261 $obj->set_ncpus($ncores_ref->{$sshlogin});
4263 $obj->set_time_to_login($time_to_login_ref->{$sshlogin});
4264 $obj->set_maxlength($maxlen_ref->{$sshlogin});
4265 $Global::minimal_command_line_length =
4266 ::min($Global::minimal_command_line_length,
4267 int($maxlen_ref->{$sshlogin}/2));
4268 ::debug("init", "Timing from -S:$sshlogin ",
4269 " nsockets:",$nsockets_ref->{$sshlogin},
4270 " ncores:", $ncores_ref->{$sshlogin},
4271 " nthreads:",$nthreads_ref->{$sshlogin},
4272 " time_to_login:", $time_to_login_ref->{$sshlogin},
4273 " maxlen:", $maxlen_ref->{$sshlogin},
4274 " min_max_len:", $Global::minimal_command_line_length,"\n");
4278 sub parse_host_filtering() {
4279 # Input:
4280 # @lines = output from parallelized_host_filtering()
4281 # Returns:
4282 # \%nsockets = number of sockets of {host}
4283 # \%ncores = number of cores of {host}
4284 # \%nthreads = number of hyperthreaded cores of {host}
4285 # \%time_to_login = time_to_login on {host}
4286 # \%maxlen = max command len on {host}
4287 # \%echo = echo received from {host}
4288 # \@down_hosts = list of hosts with no answer
4289 local $/ = "\n";
4290 my (%nsockets, %ncores, %nthreads, %time_to_login, %maxlen, %echo,
4291 @down_hosts);
4292 for (@_) {
4293 ::debug("init","Read: ",$_);
4294 chomp;
4295 my @col = split /\t/, $_;
4296 if($col[0] =~ /^parallel: Warning:/) {
4297 # Timed out job: Ignore it
4298 next;
4299 } elsif(defined $col[6]) {
4300 # This is a line from --joblog
4301 # seq host time spent sent received exit signal command
4302 # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores
4303 if($col[0] eq "Seq" and $col[1] eq "Host" and
4304 $col[2] eq "Starttime") {
4305 # Header => skip
4306 next;
4308 # Get server from: eval true server\;
4309 $col[8] =~ /eval .?true.?\s([^\;]+);/ or
4310 ::die_bug("col8 does not contain host: $col[8]");
4311 my $host = $1;
4312 $host =~ tr/\\//d;
4313 $Global::host{$host} or next;
4314 if($col[6] eq "255" or $col[6] eq "-1" or $col[6] eq "1") {
4315 # exit == 255 or exit == timeout (-1): ssh failed/timedout
4316 # exit == 1: lsh failed
4317 # Remove sshlogin
4318 ::debug("init", "--filtered $host\n");
4319 push(@down_hosts, $host);
4320 } elsif($col[6] eq "127") {
4321 # signal == 127: parallel not installed remote
4322 # Set nsockets, ncores, nthreads = 1
4323 ::warning("Could not figure out ".
4324 "number of cpus on $host. Using 1.");
4325 $nsockets{$host} = 1;
4326 $ncores{$host} = 1;
4327 $nthreads{$host} = 1;
4328 $maxlen{$host} = Limits::Command::max_length();
4329 } elsif($col[0] =~ /^\d+$/ and $Global::host{$host}) {
4330 # Remember how log it took to log in
4331 # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ echo
4332 $time_to_login{$host} = ::min($time_to_login{$host},$col[3]);
4333 } else {
4334 ::die_bug("host check unmatched long jobline: $_");
4336 } elsif($Global::host{$col[0]}) {
4337 # This output from --number-of-cores, --number-of-cpus,
4338 # --max-line-length-allowed
4339 # ncores: server 8
4340 # ncpus: server 2
4341 # maxlen: server 131071
4342 if(/parallel: Warning: Cannot figure out number of/) {
4343 next;
4345 if(not $nsockets{$col[0]}) {
4346 $nsockets{$col[0]} = $col[1];
4347 } elsif(not $ncores{$col[0]}) {
4348 $ncores{$col[0]} = $col[1];
4349 } elsif(not $nthreads{$col[0]}) {
4350 $nthreads{$col[0]} = $col[1];
4351 } elsif(not $maxlen{$col[0]}) {
4352 $maxlen{$col[0]} = $col[1];
4353 } elsif(not $echo{$col[0]}) {
4354 $echo{$col[0]} = $col[1];
4355 } elsif(m/perl: warning:|LANGUAGE =|LC_ALL =|LANG =|are supported and installed/) {
4356 # Skip these:
4357 # perl: warning: Setting locale failed.
4358 # perl: warning: Please check that your locale settings:
4359 # LANGUAGE = (unset),
4360 # LC_ALL = (unset),
4361 # LANG = "en_US.UTF-8"
4362 # are supported and installed on your system.
4363 # perl: warning: Falling back to the standard locale ("C").
4364 } else {
4365 ::die_bug("host check too many col0: $_");
4367 } else {
4368 ::die_bug("host check unmatched short jobline ($col[0]): $_");
4371 @down_hosts = uniq(@down_hosts);
4372 return(\%nsockets, \%ncores, \%nthreads, \%time_to_login,
4373 \%maxlen, \%echo, \@down_hosts);
4376 sub parallelized_host_filtering() {
4377 # Uses:
4378 # %Global::host
4379 # Returns:
4380 # text entries with:
4381 # * joblog line
4382 # * hostname \t number of cores
4383 # * hostname \t number of cpus
4384 # * hostname \t max-line-length-allowed
4385 # * hostname \t empty
4387 sub sshwrapped {
4388 # Wrap with ssh and --env
4389 # Return $default_value if command fails
4390 my $sshlogin = shift;
4391 my $command = shift;
4392 my $default_value = shift;
4393 # wrapper that returns $default_value if the command fails:
4394 # bug #57886: Errors when using different version on remote
4395 # perl -e '$a=`$command`; print $? ? "$default_value" : $a'
4396 my $wcmd = q(perl -e '$a=`).$command.q(`;).
4397 q(print $? ? ").::pQ($default_value).q(" : $a');
4398 my $commandline = CommandLine->new(1,[$wcmd],{},0,0,[],[],{},{},{});
4399 my $job = Job->new($commandline);
4400 $job->set_sshlogin($sshlogin);
4401 $job->wrapped();
4402 return($job->{'wrapped'});
4405 my(@sockets, @cores, @threads, @maxline, @echo);
4406 while (my ($host, $sshlogin) = each %Global::host) {
4407 if($host eq ":") { next }
4408 # The 'true' is used to get the $host out later
4409 push(@sockets, $host."\t"."true $host; ".
4410 sshwrapped($sshlogin,"parallel --number-of-sockets",0)."\n\0");
4411 push(@cores, $host."\t"."true $host; ".
4412 sshwrapped($sshlogin,"parallel --number-of-cores",0)."\n\0");
4413 push(@threads, $host."\t"."true $host; ".
4414 sshwrapped($sshlogin,"parallel --number-of-threads",0)."\n\0");
4415 push(@maxline, $host."\t"."true $host; ".
4416 sshwrapped($sshlogin,"parallel --max-line-length-allowed",0)."\n\0");
4417 # 'echo' is used to get the fastest possible ssh login time
4418 my $sshcmd = "true $host; exec " .$sshlogin->sshcommand()." ".
4419 $sshlogin->serverlogin();
4420 push(@echo, $host."\t".$sshcmd." -- echo\n\0");
4422 # --timeout 10: Setting up an SSH connection and running a simple
4423 # command should never take > 10 sec.
4424 # --delay 0.1: If multiple sshlogins use the same proxy the delay
4425 # will make it less likely to overload the ssh daemon.
4426 # --retries 3: If the ssh daemon is overloaded, try 3 times
4427 my $cmd =
4428 "$0 -j0 --timeout 10 --joblog - --plain --delay 0.1 --retries 3 ".
4429 "--tag --tagstring '{1}' -0 --colsep '\t' -k eval '{2}' && true ";
4430 $cmd = $Global::shell." -c ".Q($cmd);
4431 ::debug("init", $cmd, "\n");
4432 my @out;
4433 my $prepend = "";
4435 my ($host_fh,$in,$err);
4436 open3($in, $host_fh, $err, $cmd) || ::die_bug("parallel host check: $cmd");
4437 ::debug("init", map { $_,"\n" } @sockets, @cores, @threads, @maxline, @echo);
4439 if(not fork()) {
4440 # Give the commands to run to the $cmd
4441 close $host_fh;
4442 print $in @sockets, @cores, @threads, @maxline, @echo;
4443 close $in;
4444 exit();
4446 close $in;
4447 for(<$host_fh>) {
4448 # TODO incompatible with '-quoting. Needs to be fixed differently
4449 #if(/\'$/) {
4450 # # if last char = ' then append next line
4451 # # This may be due to quoting of \n in environment var
4452 # $prepend .= $_;
4453 # next;
4455 $_ = $prepend . $_;
4456 $prepend = "";
4457 push @out, $_;
4459 close $host_fh;
4460 return @out;
4463 sub onall($@) {
4464 # Runs @command on all hosts.
4465 # Uses parallel to run @command on each host.
4466 # --jobs = number of hosts to run on simultaneously.
4467 # For each host a parallel command with the args will be running.
4468 # Uses:
4469 # $Global::quoting
4470 # @opt::basefile
4471 # $opt::jobs
4472 # $opt::linebuffer
4473 # $opt::ungroup
4474 # $opt::group
4475 # $opt::keeporder
4476 # $opt::D
4477 # $opt::plain
4478 # $opt::max_chars
4479 # $opt::linebuffer
4480 # $opt::files
4481 # $opt::colsep
4482 # $opt::timeout
4483 # $opt::plain
4484 # $opt::retries
4485 # $opt::max_chars
4486 # $opt::arg_sep
4487 # $opt::arg_file_sep
4488 # @opt::v
4489 # @opt::env
4490 # %Global::host
4491 # $Global::exitstatus
4492 # $Global::debug
4493 # $Global::joblog
4494 # $opt::joblog
4495 # $opt::tag
4496 # $opt::tee
4497 # Input:
4498 # @command = command to run on all hosts
4499 # Returns: N/A
4500 sub tmp_joblog {
4501 # Input:
4502 # $joblog = filename of joblog - undef if none
4503 # Returns:
4504 # $tmpfile = temp file for joblog - undef if none
4505 my $joblog = shift;
4506 if(not defined $joblog) {
4507 return undef;
4509 my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".log");
4510 close $fh;
4511 return $tmpfile;
4513 my ($input_source_fh_ref,@command) = @_;
4514 if($Global::quoting) {
4515 @command = shell_quote(@command);
4518 # Copy all @input_source_fh (-a and :::) into tempfiles
4519 my @argfiles = ();
4520 for my $fh (@$input_source_fh_ref) {
4521 my ($outfh, $name) = ::tmpfile(SUFFIX => ".all", UNLINK => not $opt::D);
4522 print $outfh (<$fh>);
4523 close $outfh;
4524 push @argfiles, $name;
4526 if(@opt::basefile) { setup_basefile(); }
4527 # for each sshlogin do:
4528 # parallel -S $sshlogin $command :::: @argfiles
4530 # Pass some of the options to the sub-parallels, not all of them as
4531 # -P should only go to the first, and -S should not be copied at all.
4532 my $options =
4533 join(" ",
4534 ((defined $opt::sshdelay) ? "--delay ".$opt::sshdelay : ""),
4535 ((defined $opt::memfree) ? "--memfree ".$opt::memfree : ""),
4536 ((defined $opt::memsuspend) ? "--memfree ".$opt::memsuspend : ""),
4537 ((defined $opt::D) ? "-D $opt::D" : ""),
4538 ((defined $opt::group) ? "-g" : ""),
4539 ((defined $opt::jobs) ? "-P $opt::jobs" : ""),
4540 ((defined $opt::keeporder) ? "--keeporder" : ""),
4541 ((defined $opt::linebuffer) ? "--linebuffer" : ""),
4542 ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""),
4543 ((defined $opt::plain) ? "--plain" : ""),
4544 ((defined $opt::ungroup) ? "-u" : ""),
4545 ((defined $opt::tee) ? "--tee" : ""),
4547 my $suboptions =
4548 join(" ",
4549 ((defined $opt::sshdelay) ? "--delay ".$opt::sshdelay : ""),
4550 ((defined $opt::D) ? "-D $opt::D" : ""),
4551 ((defined $opt::arg_file_sep) ? "--arg-file-sep ".$opt::arg_file_sep : ""),
4552 ((defined $opt::arg_sep) ? "--arg-sep ".$opt::arg_sep : ""),
4553 ((defined $opt::colsep) ? "--colsep ".shell_quote($opt::colsep) : ""),
4554 ((defined $opt::files) ? "--files" : ""),
4555 ((defined $opt::group) ? "-g" : ""),
4556 ((defined $opt::cleanup) ? "--cleanup" : ""),
4557 ((defined $opt::keeporder) ? "--keeporder" : ""),
4558 ((defined $opt::linebuffer) ? "--linebuffer" : ""),
4559 ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""),
4560 ((defined $opt::plain) ? "--plain" : ""),
4561 ((defined $opt::plus) ? "--plus" : ""),
4562 ((defined $opt::retries) ? "--retries ".$opt::retries : ""),
4563 ((defined $opt::timeout) ? "--timeout ".$opt::timeout : ""),
4564 ((defined $opt::ungroup) ? "-u" : ""),
4565 ((defined $opt::ssh) ? "--ssh '".$opt::ssh."'" : ""),
4566 ((defined $opt::tee) ? "--tee" : ""),
4567 ((defined $opt::workdir) ? "--wd ".Q($opt::workdir) : ""),
4568 (@Global::transfer_files ? map { "--tf ".Q($_) }
4569 @Global::transfer_files : ""),
4570 (@Global::ret_files ? map { "--return ".Q($_) }
4571 @Global::ret_files : ""),
4572 (@opt::env ? map { "--env ".Q($_) } @opt::env : ""),
4573 (map { "-v" } @opt::v),
4575 ::debug("init", "| $0 $options\n");
4576 open(my $parallel_fh, "|-", "$0 -0 --will-cite -j0 $options") ||
4577 ::die_bug("This does not run GNU Parallel: $0 $options");
4578 my @joblogs;
4579 for my $host (sort keys %Global::host) {
4580 my $sshlogin = $Global::host{$host};
4581 my $joblog = tmp_joblog($opt::joblog);
4582 if($joblog) {
4583 push @joblogs, $joblog;
4584 $joblog = "--joblog $joblog";
4586 my $quad = $opt::arg_file_sep || "::::";
4587 # If PARALLEL_ENV is set: Pass it on
4588 my $penv=$Global::parallel_env ?
4589 "PARALLEL_ENV=".Q($Global::parallel_env) :
4591 ::debug("init", "$penv $0 $suboptions -j1 $joblog ",
4592 ((defined $opt::tag) ?
4593 "--tagstring ".Q($sshlogin->string()) : ""),
4594 " -S ", Q($sshlogin->string())," ",
4595 join(" ",shell_quote(@command))," $quad @argfiles\n");
4596 print $parallel_fh "$penv $0 $suboptions -j1 $joblog ",
4597 ((defined $opt::tag) ?
4598 "--tagstring ".Q($sshlogin->string()) : ""),
4599 " -S ", Q($sshlogin->string())," ",
4600 join(" ",shell_quote(@command))," $quad @argfiles\0";
4602 close $parallel_fh;
4603 $Global::exitstatus = $? >> 8;
4604 debug("init", "--onall exitvalue ", $?);
4605 if(@opt::basefile and $opt::cleanup) { cleanup_basefile(); }
4606 $Global::debug or unlink(@argfiles);
4607 my %seen;
4608 for my $joblog (@joblogs) {
4609 # Append to $joblog
4610 open(my $fh, "<", $joblog) || ::die_bug("Cannot open tmp joblog $joblog");
4611 # Skip first line (header);
4612 <$fh>;
4613 print $Global::joblog (<$fh>);
4614 close $fh;
4615 unlink($joblog);
4620 sub __SIGNAL_HANDLING__() {}
4623 sub sigtstp() {
4624 # Send TSTP signal (Ctrl-Z) to all children process groups
4625 # Uses:
4626 # %SIG
4627 # Returns: N/A
4628 signal_children("TSTP");
4631 sub sigpipe() {
4632 # Send SIGPIPE signal to all children process groups
4633 # Uses:
4634 # %SIG
4635 # Returns: N/A
4636 signal_children("PIPE");
4639 sub signal_children() {
4640 # Send signal to all children process groups
4641 # and GNU Parallel itself
4642 # Uses:
4643 # %SIG
4644 # Returns: N/A
4645 my $signal = shift;
4646 debug("run", "Sending $signal ");
4647 kill $signal, map { -$_ } keys %Global::running;
4648 # Use default signal handler for GNU Parallel itself
4649 $SIG{$signal} = undef;
4650 kill $signal, $$;
4653 sub save_original_signal_handler() {
4654 # Remember the original signal handler
4655 # Uses:
4656 # %Global::original_sig
4657 # Returns: N/A
4658 $SIG{INT} = sub {
4659 if($opt::tmux) { ::qqx("tmux kill-session -t p$$"); }
4660 wait_and_exit(255);
4662 $SIG{TERM} = sub {
4663 if($opt::tmux) { ::qqx("tmux kill-session -t p$$"); }
4664 wait_and_exit(255);
4666 %Global::original_sig = %SIG;
4667 $SIG{TERM} = sub {}; # Dummy until jobs really start
4668 $SIG{ALRM} = 'IGNORE';
4669 # Allow Ctrl-Z to suspend and `fg` to continue
4670 $SIG{TSTP} = \&sigtstp;
4671 $SIG{PIPE} = \&sigpipe;
4672 $SIG{CONT} = sub {
4673 # Set $SIG{TSTP} again (it is undef'ed in sigtstp() )
4674 $SIG{TSTP} = \&sigtstp;
4675 # Send continue signal to all children process groups
4676 kill "CONT", map { -$_ } keys %Global::running;
4680 sub list_running_jobs() {
4681 # Print running jobs on tty
4682 # Uses:
4683 # %Global::running
4684 # Returns: N/A
4685 for my $job (values %Global::running) {
4686 ::status("$Global::progname: ".$job->replaced());
4690 sub start_no_new_jobs() {
4691 # Start no more jobs
4692 # Uses:
4693 # %Global::original_sig
4694 # %Global::unlink
4695 # $Global::start_no_new_jobs
4696 # Returns: N/A
4697 # $SIG{TERM} = $Global::original_sig{TERM};
4698 unlink keys %Global::unlink;
4699 ::status
4700 ("$Global::progname: SIGHUP received. No new jobs will be started.",
4701 "$Global::progname: Waiting for these ".(keys %Global::running).
4702 " jobs to finish. Send SIGTERM to stop now.");
4703 list_running_jobs();
4704 $Global::start_no_new_jobs ||= 1;
4707 sub reapers() {
4708 # Run reaper until there are no more left
4709 # Returns:
4710 # @pids_reaped = pids of reaped processes
4711 my @pids_reaped;
4712 my $pid;
4713 while($pid = reaper()) {
4714 push @pids_reaped, $pid;
4716 return @pids_reaped;
4719 sub reaper() {
4720 # A job finished:
4721 # * Set exitstatus, exitsignal, endtime.
4722 # * Free ressources for new job
4723 # * Update median runtime
4724 # * Print output
4725 # * If --halt = now: Kill children
4726 # * Print progress
4727 # Uses:
4728 # %Global::running
4729 # $opt::timeout
4730 # $Global::timeoutq
4731 # $opt::keeporder
4732 # $Global::total_running
4733 # Returns:
4734 # $stiff = PID of child finished
4735 my $stiff;
4736 debug("run", "Reaper ");
4737 if(($stiff = waitpid(-1, &WNOHANG)) <= 0) {
4738 # No jobs waiting to be reaped
4739 return 0;
4742 # $stiff = pid of dead process
4743 my $job = $Global::running{$stiff};
4745 # '-a <(seq 10)' will give us a pid not in %Global::running
4746 # The same will one of the ssh -M: ignore
4747 $job or return 0;
4748 delete $Global::running{$stiff};
4749 $Global::total_running--;
4750 if($job->{'commandline'}{'skip'}) {
4751 # $job->skip() was called
4752 $job->set_exitstatus(-2);
4753 $job->set_exitsignal(0);
4754 } else {
4755 $job->set_exitstatus($? >> 8);
4756 $job->set_exitsignal($? & 127);
4759 debug("run", "seq ",$job->seq()," died (", $job->exitstatus(), ")");
4760 $job->set_endtime(::now());
4761 my $sshlogin = $job->sshlogin();
4762 $sshlogin->dec_jobs_running();
4763 if($job->should_be_retried()) {
4764 # Free up file handles
4765 $job->free_ressources();
4766 } else {
4767 # The job is done
4768 $sshlogin->inc_jobs_completed();
4769 # Free the jobslot
4770 $job->free_slot();
4771 if($opt::timeout and not $job->exitstatus()) {
4772 # Update average runtime for timeout only for successful jobs
4773 $Global::timeoutq->update_median_runtime($job->runtime());
4775 if($opt::keeporder) {
4776 $job->print_earlier_jobs();
4777 } else {
4778 $job->print();
4780 if($job->should_we_halt() eq "now") {
4781 # Kill children
4782 ::kill_sleep_seq($job->pid());
4783 ::killall();
4784 ::wait_and_exit($Global::halt_exitstatus);
4787 $job->cleanup();
4789 if($opt::progress) {
4790 my %progress = progress();
4791 ::status_no_nl("\r",$progress{'status'});
4794 debug("run", "done ");
4795 return $stiff;
4799 sub __USAGE__() {}
4802 sub killall() {
4803 # Kill all jobs by killing their process groups
4804 # Uses:
4805 # $Global::start_no_new_jobs = we are stopping
4806 # $Global::killall = Flag to not run reaper
4807 $Global::start_no_new_jobs ||= 1;
4808 # Do not reap killed children: Ignore them instead
4809 $Global::killall ||= 1;
4810 kill_sleep_seq(keys %Global::running);
4813 sub kill_sleep_seq(@) {
4814 # Send jobs TERM,TERM,KILL to processgroups
4815 # Input:
4816 # @pids = list of pids that are also processgroups
4817 # Convert pids to process groups ($processgroup = -$pid)
4818 my @pgrps = map { -$_ } @_;
4819 my @term_seq = split/,/,$opt::termseq;
4820 if(not @term_seq) {
4821 @term_seq = ("TERM",200,"TERM",100,"TERM",50,"KILL",25);
4823 while(@term_seq) {
4824 @pgrps = kill_sleep(shift @term_seq, shift @term_seq, @pgrps);
4828 sub kill_sleep() {
4829 # Kill pids with a signal and wait a while for them to die
4830 # Input:
4831 # $signal = signal to send to @pids
4832 # $sleep_max = number of ms to sleep at most before returning
4833 # @pids = pids to kill (actually process groups)
4834 # Uses:
4835 # $Global::killall = set by killall() to avoid calling reaper
4836 # Returns:
4837 # @pids = pids still alive
4838 my ($signal, $sleep_max, @pids) = @_;
4839 ::debug("kill","kill_sleep $signal ",(join " ",sort @pids),"\n");
4840 kill $signal, @pids;
4841 my $sleepsum = 0;
4842 my $sleep = 0.001;
4844 while(@pids and $sleepsum < $sleep_max) {
4845 if($Global::killall) {
4846 # Killall => don't run reaper
4847 while(waitpid(-1, &WNOHANG) > 0) {
4848 $sleep = $sleep/2+0.001;
4850 } elsif(reapers()) {
4851 $sleep = $sleep/2+0.001;
4853 $sleep *= 1.1;
4854 ::usleep($sleep);
4855 $sleepsum += $sleep;
4856 # Keep only living children
4857 @pids = grep { kill(0, $_) } @pids;
4859 return @pids;
4862 sub wait_and_exit($) {
4863 # If we do not wait, we sometimes get segfault
4864 # Returns: N/A
4865 my $error = shift;
4866 unlink keys %Global::unlink;
4867 if($error) {
4868 # Kill all jobs without printing
4869 killall();
4871 for (keys %Global::unkilled_children) {
4872 # Kill any (non-jobs) children (e.g. reserved processes)
4873 kill 9, $_;
4874 waitpid($_,0);
4875 delete $Global::unkilled_children{$_};
4877 if($Global::unkilled_sqlworker) {
4878 waitpid($Global::unkilled_sqlworker,0);
4880 # Avoid: Warning: unable to close filehandle properly: No space
4881 # left on device during global destruction.
4882 $SIG{__WARN__} = sub {};
4883 exit($error);
4886 sub die_usage() {
4887 # Returns: N/A
4888 usage();
4889 wait_and_exit(255);
4892 sub usage() {
4893 # Returns: N/A
4894 print join
4895 ("\n",
4896 "Usage:",
4898 "$Global::progname [options] [command [arguments]] < list_of_arguments",
4899 "$Global::progname [options] [command [arguments]] (::: arguments|:::: argfile(s))...",
4900 "cat ... | $Global::progname --pipe [options] [command [arguments]]",
4902 "-j n Run n jobs in parallel",
4903 "-k Keep same order",
4904 "-X Multiple arguments with context replace",
4905 "--colsep regexp Split input on regexp for positional replacements",
4906 "{} {.} {/} {/.} {#} {%} {= perl code =} Replacement strings",
4907 "{3} {3.} {3/} {3/.} {=3 perl code =} Positional replacement strings",
4908 "With --plus: {} = {+/}/{/} = {.}.{+.} = {+/}/{/.}.{+.} = {..}.{+..} =",
4909 " {+/}/{/..}.{+..} = {...}.{+...} = {+/}/{/...}.{+...}",
4911 "-S sshlogin Example: foo\@server.example.com",
4912 "--slf .. Use ~/.parallel/sshloginfile as the list of sshlogins",
4913 "--trc {}.bar Shorthand for --transfer --return {}.bar --cleanup",
4914 "--onall Run the given command with argument on all sshlogins",
4915 "--nonall Run the given command with no arguments on all sshlogins",
4917 "--pipe Split stdin (standard input) to multiple jobs.",
4918 "--recend str Record end separator for --pipe.",
4919 "--recstart str Record start separator for --pipe.",
4921 "GNU Parallel can do much more. See 'man $Global::progname' for details",
4923 "Academic tradition requires you to cite works you base your article on.",
4924 "If you use programs that use GNU Parallel to process data for an article in a",
4925 "scientific publication, please cite:",
4927 " Tange, O. (2020, September 22). GNU Parallel 20200922 ('Ginsburg').",
4928 " Zenodo. https://doi.org/10.5281/zenodo.4045386",
4930 # Before changing this line, please read
4931 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
4932 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
4933 "This helps funding further development; AND IT WON'T COST YOU A CENT.",
4934 "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
4936 "",);
4939 sub citation_notice() {
4940 # if --will-cite or --plain: do nothing
4941 # if stderr redirected: do nothing
4942 # if $PARALLEL_HOME/will-cite: do nothing
4943 # else: print citation notice to stderr
4944 if($opt::willcite
4946 $opt::plain
4948 not -t $Global::original_stderr
4950 grep { -e "$_/will-cite" } @Global::config_dirs) {
4951 # skip
4952 } else {
4953 ::status
4954 ("Academic tradition requires you to cite works you base your article on.",
4955 "If you use programs that use GNU Parallel to process data for an article in a",
4956 "scientific publication, please cite:",
4958 " Tange, O. (2020, September 22). GNU Parallel 20200922 ('Ginsburg').",
4959 " Zenodo. https://doi.org/10.5281/zenodo.4045386",
4961 # Before changing this line, please read
4962 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice and
4963 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
4964 "This helps funding further development; AND IT WON'T COST YOU A CENT.",
4965 "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
4967 "More about funding GNU Parallel and the citation notice:",
4968 "https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice",
4970 "To silence this citation notice: run 'parallel --citation' once.",
4973 mkdir $Global::config_dir;
4974 # Number of times the user has run GNU Parallel without showing
4975 # willingness to cite
4976 my $runs = 0;
4977 if(open (my $fh, "<", $Global::config_dir.
4978 "/runs-without-willing-to-cite")) {
4979 $runs = <$fh>;
4980 close $fh;
4982 $runs++;
4983 if(open (my $fh, ">", $Global::config_dir.
4984 "/runs-without-willing-to-cite")) {
4985 print $fh $runs;
4986 close $fh;
4987 if($runs >= 10) {
4988 ::status("Come on: You have run parallel $runs times. Isn't it about time ",
4989 "you run 'parallel --citation' once to silence the citation notice?",
4990 "");
4996 sub status(@) {
4997 my @w = @_;
4998 my $fh = $Global::status_fd || *STDERR;
4999 print $fh map { ($_, "\n") } @w;
5000 flush $fh;
5003 sub status_no_nl(@) {
5004 my @w = @_;
5005 my $fh = $Global::status_fd || *STDERR;
5006 print $fh @w;
5007 flush $fh;
5010 sub warning(@) {
5011 my @w = @_;
5012 my $prog = $Global::progname || "parallel";
5013 status_no_nl(map { ($prog, ": Warning: ", $_, "\n"); } @w);
5016 sub error(@) {
5017 my @w = @_;
5018 my $prog = $Global::progname || "parallel";
5019 status(map { ($prog.": Error: ". $_); } @w);
5022 sub die_bug($) {
5023 my $bugid = shift;
5024 print STDERR
5025 ("$Global::progname: This should not happen. You have found a bug.\n",
5026 "Please contact <parallel\@gnu.org> and follow\n",
5027 "https://www.gnu.org/software/parallel/man.html#REPORTING-BUGS\n",
5028 "\n",
5029 "Include this in the report:\n",
5030 "* The version number: $Global::version\n",
5031 "* The bugid: $bugid\n",
5032 "* The command line being run\n",
5033 "* The files being read (put the files on a webserver if they are big)\n",
5034 "\n",
5035 "If you get the error on smaller/fewer files, please include those instead.\n");
5036 ::wait_and_exit(255);
5039 sub version() {
5040 # Returns: N/A
5041 print join
5042 ("\n",
5043 "GNU $Global::progname $Global::version",
5044 "Copyright (C) 2007-2020 Ole Tange, http://ole.tange.dk and Free Software",
5045 "Foundation, Inc.",
5046 "License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>",
5047 "This is free software: you are free to change and redistribute it.",
5048 "GNU $Global::progname comes with no warranty.",
5050 "Web site: https://www.gnu.org/software/${Global::progname}\n",
5051 "When using programs that use GNU Parallel to process data for publication",
5052 "please cite as described in 'parallel --citation'.\n",
5056 sub citation() {
5057 # Returns: N/A
5058 my ($all_argv_ref,$argv_options_removed_ref) = @_;
5059 my $all_argv = "@$all_argv_ref";
5060 my $no_opts = "@$argv_options_removed_ref";
5061 $all_argv=~s/--citation//;
5062 if($all_argv ne $no_opts) {
5063 ::warning("--citation ignores all other options and arguments.");
5064 ::status("");
5067 ::status(
5068 "Academic tradition requires you to cite works you base your article on.",
5069 "If you use programs that use GNU Parallel to process data for an article in a",
5070 "scientific publication, please cite:",
5072 "\@software{tange_2020_4045386,",
5073 " author = {Tange, Ole},",
5074 " title = {GNU Parallel 20200922 ('Ginsburg')},",
5075 " month = Sep,",
5076 " year = 2020,",
5077 " note = {{GNU Parallel is a general parallelizer to run",
5078 " multiple serial command line programs in parallel",
5079 " without changing them.}},",
5080 " publisher = {Zenodo},",
5081 " doi = {10.5281/zenodo.4045386},",
5082 " url = {https://doi.org/10.5281/zenodo.4045386}",
5083 "}",
5085 "(Feel free to use \\nocite{tange_2020_4045386})",
5087 # Before changing this line, please read
5088 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice and
5089 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
5090 "This helps funding further development; AND IT WON'T COST YOU A CENT.",
5091 "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
5093 "More about funding GNU Parallel and the citation notice:",
5094 "https://lists.gnu.org/archive/html/parallel/2013-11/msg00006.html",
5095 "https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice",
5096 "https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt",
5098 "If you send a copy of your published article to tange\@gnu.org, it will be",
5099 "mentioned in the release notes of next version of GNU Parallel.",
5102 while(not grep { -e "$_/will-cite" } @Global::config_dirs) {
5103 print "\nType: 'will cite' and press enter.\n> ";
5104 my $input = <STDIN>;
5105 if(not defined $input) {
5106 exit(255);
5108 if($input =~ /will cite/i) {
5109 mkdir $Global::config_dir;
5110 if(open (my $fh, ">", $Global::config_dir."/will-cite")) {
5111 close $fh;
5112 ::status(
5114 "Thank you for your support: You are the reason why there is funding to",
5115 "continue maintaining GNU Parallel. On behalf of future versions of",
5116 "GNU Parallel, which would not exist without your support:",
5118 " THANK YOU SO MUCH",
5120 "It is really appreciated. The citation notice is now silenced.",
5121 "");
5122 } else {
5123 ::status(
5125 "Thank you for your support. It is much appreciated. The citation",
5126 "cannot permanently be silenced. Use '--will-cite' instead.",
5128 "If you use '--will-cite' in scripts to be run by others you are making",
5129 "it harder for others to see the citation notice. The development of",
5130 "GNU Parallel is indirectly financed through citations, so if users",
5131 "do not know they should cite then you are making it harder to finance",
5132 "development. However, if you pay 10000 EUR, you should feel free to",
5133 "use '--will-cite' in scripts.",
5134 "");
5135 last;
5141 sub show_limits() {
5142 # Returns: N/A
5143 print("Maximal size of command: ",Limits::Command::real_max_length(),"\n",
5144 "Maximal used size of command: ",Limits::Command::max_length(),"\n",
5145 "\n",
5146 "Execution of will continue now, and it will try to read its input\n",
5147 "and run commands; if this is not what you wanted to happen, please\n",
5148 "press CTRL-D or CTRL-C\n");
5151 sub embed() {
5152 # Give an embeddable version of GNU Parallel
5153 # Tested with: bash, zsh, ksh, ash, dash, sh
5154 my $randomstring = "cut-here-".join"",
5155 map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..20);
5156 if(not -f $0 or not -r $0) {
5157 ::error("--embed only works if parallel is a readable file");
5158 exit(255);
5160 if(open(my $fh, "<", $0)) {
5161 # Read the source from $0
5162 my @source = <$fh>;
5163 my $user = $ENV{LOGNAME} || $ENV{USERNAME} || $ENV{USER};
5164 my @env_parallel_source = ();
5165 my $shell = $Global::shell;
5166 $shell =~ s:.*/::;
5167 for(which("env_parallel.$shell")) {
5168 -r $_ or next;
5169 # Read the source of env_parallel.shellname
5170 open(my $env_parallel_source_fh, $_) || die;
5171 @env_parallel_source = <$env_parallel_source_fh>;
5172 close $env_parallel_source_fh;
5173 last;
5175 print "#!$Global::shell
5177 # Copyright (C) 2007-2020 $user, Ole Tange, http://ole.tange.dk
5178 # and Free Software Foundation, Inc.
5180 # This program is free software; you can redistribute it and/or modify
5181 # it under the terms of the GNU General Public License as published by
5182 # the Free Software Foundation; either version 3 of the License, or
5183 # (at your option) any later version.
5185 # This program is distributed in the hope that it will be useful, but
5186 # WITHOUT ANY WARRANTY; without even the implied warranty of
5187 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5188 # General Public License for more details.
5190 # You should have received a copy of the GNU General Public License
5191 # along with this program; if not, see <https://www.gnu.org/licenses/>
5192 # or write to the Free Software Foundation, Inc., 51 Franklin St,
5193 # Fifth Floor, Boston, MA 02110-1301 USA
5196 print q!
5197 # Embedded GNU Parallel created with --embed
5198 parallel() {
5199 # Start GNU Parallel without leaving temporary files
5201 # Not all shells support 'perl <(cat ...)'
5202 # This is a complex way of doing:
5203 # perl <(cat <<'cut-here'
5204 # [...]
5205 # ) "$@"
5206 # and also avoiding:
5207 # [1]+ Done cat
5209 # Make a temporary fifo that perl can read from
5210 _fifo_with_GNU_Parallel_source=`perl -e 'use POSIX qw(mkfifo);
5211 do {
5212 $f = "/tmp/parallel-".join"",
5213 map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
5214 } while(-e $f);
5215 mkfifo($f,0600);
5216 print $f;'`
5217 # Put source code into temporary file
5218 # so it is easy to copy to the fifo
5219 _file_with_GNU_Parallel_source=`mktemp`;
5221 "cat <<'$randomstring' > \$_file_with_GNU_Parallel_source\n",
5222 @source,
5223 $randomstring,"\n",
5225 # Copy the source code from the file to the fifo
5226 # and remove the file and fifo ASAP
5227 # 'sh -c' is needed to avoid
5228 # [1]+ Done cat
5229 sh -c "(rm $_file_with_GNU_Parallel_source; cat >$_fifo_with_GNU_Parallel_source; rm $_fifo_with_GNU_Parallel_source) < $_file_with_GNU_Parallel_source &"
5231 # Read the source from the fifo
5232 perl $_fifo_with_GNU_Parallel_source "$@"
5235 @env_parallel_source,
5238 # This will call the functions above
5239 parallel -k echo ::: Put your code here
5240 env_parallel --session
5241 env_parallel -k echo ::: Put your code here
5242 parset p,y,c,h -k echo ::: Put your code here
5243 echo $p $y $c $h
5245 } else {
5246 ::error("Cannot open $0");
5247 exit(255);
5249 ::status("Redirect the output to a file and add your changes at the end:",
5250 " $0 --embed > new_script");
5254 sub __GENERIC_COMMON_FUNCTION__() {}
5257 sub mkdir_or_die($) {
5258 # If dir is not executable: die
5259 my $dir = shift;
5260 # The eval is needed to catch exception from mkdir
5261 eval { File::Path::mkpath($dir); };
5262 if(not -x $dir) {
5263 ::error("Cannot change into non-executable dir $dir: $!");
5264 ::wait_and_exit(255);
5268 sub tmpfile(@) {
5269 # Create tempfile as $TMPDIR/parXXXXX
5270 # Returns:
5271 # $filehandle = opened file handle
5272 # $filename = file name created
5273 my($filehandle,$filename) =
5274 ::tempfile(DIR=>$ENV{'TMPDIR'}, TEMPLATE => 'parXXXXX', @_);
5275 if(wantarray) {
5276 return($filehandle,$filename);
5277 } else {
5278 # Separate unlink due to NFS dealing badly with File::Temp
5279 unlink $filename;
5280 return $filehandle;
5284 sub tmpname($) {
5285 # Select a name that does not exist
5286 # Do not create the file as it may be used for creating a socket (by tmux)
5287 # Remember the name in $Global::unlink to avoid hitting the same name twice
5288 my $name = shift;
5289 my($tmpname);
5290 if(not -w $ENV{'TMPDIR'}) {
5291 if(not -e $ENV{'TMPDIR'}) {
5292 ::error("Tmpdir '$ENV{'TMPDIR'}' does not exist.","Try 'mkdir $ENV{'TMPDIR'}'");
5293 } else {
5294 ::error("Tmpdir '$ENV{'TMPDIR'}' is not writable.","Try 'chmod +w $ENV{'TMPDIR'}'");
5296 ::wait_and_exit(255);
5298 do {
5299 $tmpname = $ENV{'TMPDIR'}."/".$name.
5300 join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
5301 } while(-e $tmpname or $Global::unlink{$tmpname}++);
5302 return $tmpname;
5305 sub tmpfifo() {
5306 # Find an unused name and mkfifo on it
5307 my $tmpfifo = tmpname("fif");
5308 mkfifo($tmpfifo,0600);
5309 return $tmpfifo;
5312 sub rm(@) {
5313 # Remove file and remove it from %Global::unlink
5314 # Uses:
5315 # %Global::unlink
5316 delete @Global::unlink{@_};
5317 unlink @_;
5320 sub size_of_block_dev() {
5321 # Like -s but for block devices
5322 # Input:
5323 # $blockdev = file name of block device
5324 # Returns:
5325 # $size = in bytes, undef if error
5326 my $blockdev = shift;
5327 if(open(my $fh, "<", $blockdev)) {
5328 seek($fh,0,2) || ::die_bug("cannot seek $blockdev");
5329 my $size = tell($fh);
5330 close $fh;
5331 return $size;
5332 } else {
5333 ::error("cannot open $blockdev");
5334 wait_and_exit(255);
5338 sub qqx(@) {
5339 # Like qx but with clean environment (except for @keep)
5340 # and STDERR ignored
5341 # This is needed if the environment contains functions
5342 # that /bin/sh does not understand
5343 my %env;
5344 # ssh with ssh-agent needs PATH SSH_AUTH_SOCK SSH_AGENT_PID
5345 # ssh with Kerberos needs KRB5CCNAME
5346 # tmux needs LC_CTYPE
5347 # lsh needs HOME LOGNAME
5348 my @keep = qw(PATH SSH_AUTH_SOCK SSH_AGENT_PID KRB5CCNAME LC_CTYPE HOME LOGNAME);
5349 @env{@keep} = @ENV{@keep};
5350 local %ENV;
5351 %ENV = %env;
5352 if($Global::debug) {
5353 # && true is to force spawning a shell and not just exec'ing
5354 return qx{ @_ && true };
5355 } else {
5356 # CygWin does not respect 2>/dev/null
5357 # so we do that by hand
5358 # This trick does not work:
5359 # https://stackoverflow.com/q/13833088/363028
5360 # local *STDERR;
5361 # open(STDERR, ">", "/dev/null");
5362 open(local *CHILD_STDIN, '<', '/dev/null') or die $!;
5363 open(local *CHILD_STDERR, '>', '/dev/null') or die $!;
5364 my $out;
5365 # eval is needed if open3 fails (e.g. command line too long)
5366 eval {
5367 my $pid = open3(
5368 '<&CHILD_STDIN',
5369 $out,
5370 '>&CHILD_STDERR',
5371 # && true is to force spawning a shell and not just exec'ing
5372 "@_ && true");
5373 my @arr = <$out>;
5374 close $out;
5375 # Make sure $? is set
5376 waitpid($pid, 0);
5377 return wantarray ? @arr : join "",@arr;
5378 } or do {
5379 # If eval fails, force $?=false
5380 `false`;
5385 sub uniq(@) {
5386 # Remove duplicates and return unique values
5387 return keys %{{ map { $_ => 1 } @_ }};
5390 sub min(@) {
5391 # Returns:
5392 # Minimum value of array
5393 my $min;
5394 for (@_) {
5395 # Skip undefs
5396 defined $_ or next;
5397 defined $min or do { $min = $_; next; }; # Set $_ to the first non-undef
5398 $min = ($min < $_) ? $min : $_;
5400 return $min;
5403 sub max(@) {
5404 # Returns:
5405 # Maximum value of array
5406 my $max;
5407 for (@_) {
5408 # Skip undefs
5409 defined $_ or next;
5410 defined $max or do { $max = $_; next; }; # Set $_ to the first non-undef
5411 $max = ($max > $_) ? $max : $_;
5413 return $max;
5416 sub sum(@) {
5417 # Returns:
5418 # Sum of values of array
5419 my @args = @_;
5420 my $sum = 0;
5421 for (@args) {
5422 # Skip undefs
5423 $_ and do { $sum += $_; }
5425 return $sum;
5428 sub undef_as_zero($) {
5429 my $a = shift;
5430 return $a ? $a : 0;
5433 sub undef_as_empty($) {
5434 my $a = shift;
5435 return $a ? $a : "";
5438 sub undef_if_empty($) {
5439 if(defined($_[0]) and $_[0] eq "") {
5440 return undef;
5442 return $_[0];
5445 sub multiply_binary_prefix(@) {
5446 # Evalualte numbers with binary prefix
5447 # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80
5448 # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80
5449 # K =2^10, M =2^20, G =2^30, T =2^40, P =2^50, E =2^70, Z =2^80, Y =2^80
5450 # k =10^3, m =10^6, g =10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24
5451 # 13G = 13*1024*1024*1024 = 13958643712
5452 # Input:
5453 # $s = string with prefixes
5454 # Returns:
5455 # $value = int with prefixes multiplied
5456 my @v = @_;
5457 for(@v) {
5458 defined $_ or next;
5459 s/ki/*1024/gi;
5460 s/mi/*1024*1024/gi;
5461 s/gi/*1024*1024*1024/gi;
5462 s/ti/*1024*1024*1024*1024/gi;
5463 s/pi/*1024*1024*1024*1024*1024/gi;
5464 s/ei/*1024*1024*1024*1024*1024*1024/gi;
5465 s/zi/*1024*1024*1024*1024*1024*1024*1024/gi;
5466 s/yi/*1024*1024*1024*1024*1024*1024*1024*1024/gi;
5467 s/xi/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi;
5469 s/K/*1024/g;
5470 s/M/*1024*1024/g;
5471 s/G/*1024*1024*1024/g;
5472 s/T/*1024*1024*1024*1024/g;
5473 s/P/*1024*1024*1024*1024*1024/g;
5474 s/E/*1024*1024*1024*1024*1024*1024/g;
5475 s/Z/*1024*1024*1024*1024*1024*1024*1024/g;
5476 s/Y/*1024*1024*1024*1024*1024*1024*1024*1024/g;
5477 s/X/*1024*1024*1024*1024*1024*1024*1024*1024*1024/g;
5479 s/k/*1000/g;
5480 s/m/*1000*1000/g;
5481 s/g/*1000*1000*1000/g;
5482 s/t/*1000*1000*1000*1000/g;
5483 s/p/*1000*1000*1000*1000*1000/g;
5484 s/e/*1000*1000*1000*1000*1000*1000/g;
5485 s/z/*1000*1000*1000*1000*1000*1000*1000/g;
5486 s/y/*1000*1000*1000*1000*1000*1000*1000*1000/g;
5487 s/x/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g;
5489 $_ = eval $_;
5491 return wantarray ? @v : $v[0];
5494 sub multiply_time_units($) {
5495 # Evalualte numbers with time units
5496 # s=1, m=60, h=3600, d=86400
5497 # Input:
5498 # $s = string time units
5499 # Returns:
5500 # $value = int in seconds
5501 my @v = @_;
5502 for(@v) {
5503 defined $_ or next;
5504 if(/[dhms]/i) {
5505 s/s/*1+/gi;
5506 s/m/*60+/gi;
5507 s/h/*3600+/gi;
5508 s/d/*86400+/gi;
5509 $_ = eval $_."0";
5512 return wantarray ? @v : $v[0];
5515 sub seconds_to_time_units() {
5516 # Convert seconds into ??d??h??m??s
5517 # s=1, m=60, h=3600, d=86400
5518 # Input:
5519 # $s = int in seconds
5520 # Returns:
5521 # $str = string time units
5522 my $s = shift;
5523 my $str;
5524 my $d = int($s/86400);
5525 $s -= $d * 86400;
5526 my $h = int($s/3600);
5527 $s -= $h * 3600;
5528 my $m = int($s/60);
5529 $s -= $m * 60;
5530 if($d) {
5531 $str = sprintf("%dd%02dh%02dm%02ds",$d,$h,$m,$s);
5532 } elsif($h) {
5533 $str = sprintf("%dh%02dm%02ds",$h,$m,$s);
5534 } elsif($m) {
5535 $str = sprintf("%dm%02ds",$m,$s);
5536 } else {
5537 $str = sprintf("%ds",$s);
5539 return $str;
5543 my ($disk_full_fh, $b8193, $error_printed);
5544 sub exit_if_disk_full() {
5545 # Checks if $TMPDIR is full by writing 8kb to a tmpfile
5546 # If the disk is full: Exit immediately.
5547 # Returns:
5548 # N/A
5549 if(not $disk_full_fh) {
5550 $disk_full_fh = ::tmpfile(SUFFIX => ".df");
5551 $b8193 = "b"x8193;
5553 # Linux does not discover if a disk is full if writing <= 8192
5554 # Tested on:
5555 # bfs btrfs cramfs ext2 ext3 ext4 ext4dev jffs2 jfs minix msdos
5556 # ntfs reiserfs tmpfs ubifs vfat xfs
5557 # TODO this should be tested on different OS similar to this:
5559 # doit() {
5560 # sudo mount /dev/ram0 /mnt/loop; sudo chmod 1777 /mnt/loop
5561 # seq 100000 | parallel --tmpdir /mnt/loop/ true &
5562 # seq 6900000 > /mnt/loop/i && echo seq OK
5563 # seq 6980868 > /mnt/loop/i
5564 # seq 10000 > /mnt/loop/ii
5565 # sleep 3
5566 # sudo umount /mnt/loop/ || sudo umount -l /mnt/loop/
5567 # echo >&2
5569 print $disk_full_fh $b8193;
5570 if(not $disk_full_fh
5572 tell $disk_full_fh != 8193) {
5573 # On raspbian the disk can be full except for 10 chars.
5574 if(not $error_printed) {
5575 ::error("Output is incomplete.",
5576 "Cannot append to buffer file in $ENV{'TMPDIR'}.",
5577 "Is the disk full?",
5578 "Change \$TMPDIR with --tmpdir or use --compress.");
5579 $error_printed = 1;
5581 ::wait_and_exit(255);
5583 truncate $disk_full_fh, 0;
5584 seek($disk_full_fh, 0, 0) || die;
5588 sub spacefree($$) {
5589 # Remove comments and spaces
5590 # Inputs:
5591 # $spaces = keep 1 space?
5592 # $s = string to remove spaces from
5593 # Returns:
5594 # $s = with spaces removed
5595 my $spaces = shift;
5596 my $s = shift;
5597 $s =~ s/#.*//mg;
5598 if(1 == $spaces) {
5599 $s =~ s/\s+/ /mg;
5600 } elsif(2 == $spaces) {
5601 # Keep newlines
5602 $s =~ s/\n\n+/\n/sg;
5603 $s =~ s/[ \t]+/ /mg;
5604 } elsif(3 == $spaces) {
5605 # Keep perl code required space
5606 $s =~ s{([^a-zA-Z0-9/])\s+}{$1}sg;
5607 $s =~ s{([a-zA-Z0-9/])\s+([^:a-zA-Z0-9/])}{$1$2}sg;
5608 } else {
5609 $s =~ s/\s//mg;
5611 return $s;
5615 my $hostname;
5616 sub hostname() {
5617 local $/ = "\n";
5618 if(not $hostname) {
5619 $hostname = `hostname`;
5620 chomp($hostname);
5621 $hostname ||= "nohostname";
5623 return $hostname;
5627 sub which(@) {
5628 # Input:
5629 # @programs = programs to find the path to
5630 # Returns:
5631 # @full_path = full paths to @programs. Nothing if not found
5632 my @which;
5633 for my $prg (@_) {
5634 push(@which, grep { not -d $_ and -x $_ }
5635 map { $_."/".$prg } split(":",$ENV{'PATH'}));
5636 if($prg =~ m:/:) {
5637 # Including path
5638 push(@which, grep { not -d $_ and -x $_ } $prg);
5641 ::debug("which", "$which[0] in $ENV{'PATH'}\n");
5642 return wantarray ? @which : $which[0];
5646 my ($regexp,$shell,%fakename);
5648 sub parent_shell {
5649 # Input:
5650 # $pid = pid to see if (grand)*parent is a shell
5651 # Returns:
5652 # $shellpath = path to shell - undef if no shell found
5653 my $pid = shift;
5654 ::debug("init","Parent of $pid\n");
5655 if(not $regexp) {
5656 # All shells known to mankind
5658 # ash bash csh dash fdsh fish fizsh ksh ksh93 mksh pdksh
5659 # posh rbash rc rush rzsh sash sh static-sh tcsh yash zsh
5661 my @shells = (qw(ash bash bsd-csh csh dash fdsh fish fizsh
5662 ksh ksh93 lksh mksh pdksh posh rbash rc rush rzsh sash sh
5663 static-sh tcsh yash zsh -sh -csh -bash),
5664 '-sh (sh)' # sh on FreeBSD
5666 # Can be formatted as:
5667 # [sh] -sh sh busybox sh -sh (sh)
5668 # /bin/sh /sbin/sh /opt/csw/sh
5669 # But not: foo.sh sshd crash flush pdflush scosh fsflush ssh
5670 $shell = "(?:".join("|",map { "\Q$_\E" } @shells).")";
5671 $regexp = '^((\[)(-?)('. $shell. ')(\])|(|\S+/|busybox )'.
5672 '(-?)('. $shell. '))( *$| [^(])';
5673 %fakename = (
5674 # sh disguises itself as -sh (sh) on FreeBSD
5675 "-sh (sh)" => ["sh"],
5676 # csh and tcsh disguise themselves as -sh/-csh
5677 # E.g.: ssh -tt csh@lo 'ps aux;true' |egrep ^csh
5678 # but sh also disguises itself as -sh
5679 # (TODO When does that happen?)
5680 "-sh" => ["sh"],
5681 "-csh" => ["tcsh", "csh"],
5682 # ash disguises itself as -ash
5683 "-ash" => ["ash", "dash", "sh"],
5684 # dash disguises itself as -dash
5685 "-dash" => ["dash", "ash", "sh"],
5686 # bash disguises itself as -bash
5687 "-bash" => ["bash", "sh"],
5688 # ksh disguises itself as -ksh
5689 "-ksh" => ["ksh", "sh"],
5690 # zsh disguises itself as -zsh
5691 "-zsh" => ["zsh", "sh"],
5694 if($^O eq "linux") {
5695 # Optimized for GNU/Linux
5696 my $testpid = $pid;
5697 my $shellpath;
5698 my $shellline;
5699 while($testpid) {
5700 if(open(my $fd, "<", "/proc/$testpid/cmdline")) {
5701 local $/="\0";
5702 chomp($shellline = <$fd>);
5703 if($shellline =~ /$regexp/o) {
5704 my $shellname = $4 || $8;
5705 my $dash = $3 || $7;
5706 if($shellname eq "sh" and $dash) {
5707 # -sh => csh or sh
5708 if($shellpath = readlink "/proc/$testpid/exe") {
5709 ::debug("init","procpath $shellpath\n");
5710 if($shellpath =~ m:/$shell$:o) {
5711 ::debug("init", "proc which ".$shellpath." => ");
5712 return $shellpath;
5716 ::debug("init", "which ".$shellname." => ");
5717 $shellpath = (which($shellname,@{$fakename{$shellname}}))[0];
5718 ::debug("init", "shell path $shellpath\n");
5719 return $shellpath;
5722 # Get parent pid
5723 if(open(my $fd, "<", "/proc/$testpid/stat")) {
5724 my $line = <$fd>;
5725 close $fd;
5726 # Parent pid is field 4
5727 $testpid = (split /\s+/, $line)[3];
5728 } else {
5729 # Something is wrong: fall back to old method
5730 last;
5734 # if -sh or -csh try readlink /proc/$$/exe
5735 my ($children_of_ref, $parent_of_ref, $name_of_ref) = pid_table();
5736 my $shellpath;
5737 my $testpid = $pid;
5738 while($testpid) {
5739 if($name_of_ref->{$testpid} =~ /$regexp/o) {
5740 my $shellname = $4 || $8;
5741 my $dash = $3 || $7;
5742 if($shellname eq "sh" and $dash) {
5743 # -sh => csh or sh
5744 if($shellpath = readlink "/proc/$testpid/exe") {
5745 ::debug("init","procpath $shellpath\n");
5746 if($shellpath =~ m:/$shell$:o) {
5747 ::debug("init", "proc which ".$shellpath." => ");
5748 return $shellpath;
5752 ::debug("init", "which ".$shellname." => ");
5753 $shellpath = (which($shellname,@{$fakename{$shellname}}))[0];
5754 ::debug("init", "shell path $shellpath\n");
5755 $shellpath and last;
5757 if($testpid == $parent_of_ref->{$testpid}) {
5758 # In Solaris zones, the PPID of the zsched process is itself
5759 last;
5761 $testpid = $parent_of_ref->{$testpid};
5763 return $shellpath;
5768 my %pid_parentpid_cmd;
5770 sub pid_table() {
5771 # Returns:
5772 # %children_of = { pid -> children of pid }
5773 # %parent_of = { pid -> pid of parent }
5774 # %name_of = { pid -> commandname }
5776 if(not %pid_parentpid_cmd) {
5777 # Filter for SysV-style `ps`
5778 my $sysv = q( ps -ef | perl -ane '1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;).
5779 q(s/^.{$s}//; print "@F[1,2] $_"' );
5780 # Minix uses cols 2,3 and can have newlines in the command
5781 # so lines not having numbers in cols 2,3 must be ignored
5782 my $minix = q( ps -ef | perl -ane '1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;).
5783 q(s/^.{$s}// and $F[2]>0 and $F[3]>0 and print "@F[2,3] $_"' );
5784 # BSD-style `ps`
5785 my $bsd = q(ps -o pid,ppid,command -ax);
5786 %pid_parentpid_cmd =
5788 'aix' => $sysv,
5789 'android' => $sysv,
5790 'cygwin' => $sysv,
5791 'darwin' => $bsd,
5792 'dec_osf' => $sysv,
5793 'dragonfly' => $bsd,
5794 'freebsd' => $bsd,
5795 'gnu' => $sysv,
5796 'hpux' => $sysv,
5797 'linux' => $sysv,
5798 'mirbsd' => $bsd,
5799 'minix' => $minix,
5800 'msys' => $sysv,
5801 'MSWin32' => $sysv,
5802 'netbsd' => $bsd,
5803 'nto' => $sysv,
5804 'openbsd' => $bsd,
5805 'solaris' => $sysv,
5806 'svr5' => $sysv,
5807 'syllable' => "echo ps not supported",
5810 $pid_parentpid_cmd{$^O} or ::die_bug("pid_parentpid_cmd for $^O missing");
5812 my (@pidtable,%parent_of,%children_of,%name_of);
5813 # Table with pid -> children of pid
5814 @pidtable = `$pid_parentpid_cmd{$^O}`;
5815 my $p=$$;
5816 for (@pidtable) {
5817 # must match: 24436 21224 busybox ash
5818 # must match: 24436 21224 <<empty on MacOSX running cubase>>
5819 # must match: 24436 21224 <<empty on system running Viber>>
5820 # or: perl -e 'while($0=" "){}'
5821 if(/^\s*(\S+)\s+(\S+)\s+(\S+.*)/
5823 /^\s*(\S+)\s+(\S+)\s+()$/) {
5824 $parent_of{$1} = $2;
5825 push @{$children_of{$2}}, $1;
5826 $name_of{$1} = $3;
5827 } else {
5828 ::die_bug("pidtable format: $_");
5831 return(\%children_of, \%parent_of, \%name_of);
5835 sub now() {
5836 # Returns time since epoch as in seconds with 3 decimals
5837 # Uses:
5838 # @Global::use
5839 # Returns:
5840 # $time = time now with millisecond accuracy
5841 if(not $Global::use{"Time::HiRes"}) {
5842 if(eval "use Time::HiRes qw ( time );") {
5843 eval "sub TimeHiRestime { return Time::HiRes::time };";
5844 } else {
5845 eval "sub TimeHiRestime { return time() };";
5847 $Global::use{"Time::HiRes"} = 1;
5850 return (int(TimeHiRestime()*1000))/1000;
5853 sub usleep($) {
5854 # Sleep this many milliseconds.
5855 # Input:
5856 # $ms = milliseconds to sleep
5857 my $ms = shift;
5858 ::debug("timing",int($ms),"ms ");
5859 select(undef, undef, undef, $ms/1000);
5862 sub __KILLER_REAPER__() {}
5864 sub reap_usleep() {
5865 # Reap dead children.
5866 # If no dead children: Sleep specified amount with exponential backoff
5867 # Input:
5868 # $ms = milliseconds to sleep
5869 # Returns:
5870 # $ms/2+0.001 if children reaped
5871 # $ms*1.1 if no children reaped
5872 my $ms = shift;
5873 if(reapers()) {
5874 if(not $Global::total_completed % 100) {
5875 if($opt::timeout) {
5876 # Force cleaning the timeout queue for every 100 jobs
5877 # Fixes potential memleak
5878 $Global::timeoutq->process_timeouts();
5881 # Sleep exponentially shorter (1/2^n) if a job finished
5882 return $ms/2+0.001;
5883 } else {
5884 if($opt::timeout) {
5885 $Global::timeoutq->process_timeouts();
5887 if($opt::memfree) {
5888 kill_youngster_if_not_enough_mem($opt::memfree*0.5);
5890 if($opt::memsuspend) {
5891 kill_youngster_if_not_enough_mem($opt::memsuspend*0.5);
5893 if($opt::limit) {
5894 kill_youngest_if_over_limit();
5896 exit_if_disk_full();
5897 if($opt::linebuffer) {
5898 my $something_printed = 0;
5899 if($opt::keeporder) {
5900 for my $job (values %Global::running) {
5901 $something_printed += $job->print_earlier_jobs();
5903 } else {
5904 for my $job (values %Global::running) {
5905 $something_printed += $job->print();
5908 if($something_printed) {
5909 $ms = $ms/2+0.001;
5912 if($ms > 0.002) {
5913 # When a child dies, wake up from sleep (or select(,,,))
5914 $SIG{CHLD} = sub { kill "ALRM", $$ };
5915 if($opt::delay) {
5916 # The 0.004s is approximately the time it takes for one round
5917 usleep(1000*($Global::newest_starttime +
5918 $opt::delay - 0.004 - ::now()));
5919 } else {
5920 usleep($ms);
5922 # --compress needs $SIG{CHLD} unset
5923 $SIG{CHLD} = 'DEFAULT';
5925 # Sleep exponentially longer (1.1^n) if a job did not finish,
5926 # though at most 1000 ms.
5927 return (($ms < 1000) ? ($ms * 1.1) : ($ms));
5931 sub kill_youngest_if_over_limit() {
5932 # Check each $sshlogin we are over limit
5933 # If over limit: kill off the youngest child
5934 # Put the child back in the queue.
5935 # Uses:
5936 # %Global::running
5937 my %jobs_of;
5938 my @sshlogins;
5940 for my $job (values %Global::running) {
5941 if(not $jobs_of{$job->sshlogin()}) {
5942 push @sshlogins, $job->sshlogin();
5944 push @{$jobs_of{$job->sshlogin()}}, $job;
5946 for my $sshlogin (@sshlogins) {
5947 for my $job (sort { $b->seq() <=> $a->seq() } @{$jobs_of{$sshlogin}}) {
5948 if($sshlogin->limit() == 2) {
5949 $job->kill();
5950 last;
5956 sub kill_youngster_if_not_enough_mem() {
5957 # Check each $sshlogin if there is enough mem.
5958 # If less than 50% enough free mem: kill off the youngest child
5959 # Put the child back in the queue.
5960 # Uses:
5961 # %Global::running
5962 my $limit = shift;
5963 my %jobs_of;
5964 my @sshlogins;
5966 for my $job (values %Global::running) {
5967 if(not $jobs_of{$job->sshlogin()}) {
5968 push @sshlogins, $job->sshlogin();
5970 push @{$jobs_of{$job->sshlogin()}}, $job;
5972 for my $sshlogin (@sshlogins) {
5973 for my $job (sort { $b->seq() <=> $a->seq() } @{$jobs_of{$sshlogin}}) {
5974 if($sshlogin->memfree() < $limit) {
5975 ::debug("mem","\n",map { $_->seq()." " }
5976 (sort { $b->seq() <=> $a->seq() }
5977 @{$jobs_of{$sshlogin}}));
5978 ::debug("mem","\n", $job->seq(), "killed ",
5979 $sshlogin->memfree()," < ",$limit);
5980 if($opt::memsuspend) {
5981 $job->suspend();
5982 } else {
5983 $job->kill();
5985 $sshlogin->memfree_recompute();
5986 } else {
5987 last;
5990 ::debug("mem","Free mem OK? ",
5991 $sshlogin->memfree()," > ",$limit);
5996 sub __DEBUGGING__() {}
5999 sub debug(@) {
6000 # Uses:
6001 # $Global::debug
6002 # %Global::fd
6003 # Returns: N/A
6004 $Global::debug or return;
6005 @_ = grep { defined $_ ? $_ : "" } @_;
6006 if($Global::debug eq "all" or $Global::debug eq $_[0]) {
6007 if($Global::fd{1}) {
6008 # Original stdout was saved
6009 my $stdout = $Global::fd{1};
6010 print $stdout @_[1..$#_];
6011 } else {
6012 print @_[1..$#_];
6017 sub my_memory_usage() {
6018 # Returns:
6019 # memory usage if found
6020 # 0 otherwise
6021 use strict;
6022 use FileHandle;
6024 local $/ = "\n";
6025 my $pid = $$;
6026 if(-e "/proc/$pid/stat") {
6027 my $fh = FileHandle->new("</proc/$pid/stat");
6029 my $data = <$fh>;
6030 chomp $data;
6031 $fh->close;
6033 my @procinfo = split(/\s+/,$data);
6035 return undef_as_zero($procinfo[22]);
6036 } else {
6037 return 0;
6041 sub my_size() {
6042 # Returns:
6043 # $size = size of object if Devel::Size is installed
6044 # -1 otherwise
6045 my @size_this = (@_);
6046 eval "use Devel::Size qw(size total_size)";
6047 if ($@) {
6048 return -1;
6049 } else {
6050 return total_size(@_);
6054 sub my_dump(@) {
6055 # Returns:
6056 # ascii expression of object if Data::Dump(er) is installed
6057 # error code otherwise
6058 my @dump_this = (@_);
6059 eval "use Data::Dump qw(dump);";
6060 if ($@) {
6061 # Data::Dump not installed
6062 eval "use Data::Dumper;";
6063 if ($@) {
6064 my $err = "Neither Data::Dump nor Data::Dumper is installed\n".
6065 "Not dumping output\n";
6066 ::status($err);
6067 return $err;
6068 } else {
6069 return Dumper(@dump_this);
6071 } else {
6072 # Create a dummy Data::Dump:dump as Hans Schou sometimes has
6073 # it undefined
6074 eval "sub Data::Dump:dump {}";
6075 eval "use Data::Dump qw(dump);";
6076 return (Data::Dump::dump(@dump_this));
6080 sub my_croak(@) {
6081 eval "use Carp; 1";
6082 $Carp::Verbose = 1;
6083 croak(@_);
6086 sub my_carp() {
6087 eval "use Carp; 1";
6088 $Carp::Verbose = 1;
6089 carp(@_);
6093 sub __OBJECT_ORIENTED_PARTS__() {}
6096 package SSHLogin;
6098 sub new($$) {
6099 my $class = shift;
6100 my $sshlogin_string = shift;
6101 my $ncpus;
6102 my %hostgroups;
6103 # SSHLogins can have these formats:
6104 # @grp+grp/ncpu//usr/bin/ssh user@server
6105 # ncpu//usr/bin/ssh user@server
6106 # /usr/bin/ssh user@server
6107 # user@server
6108 # ncpu/user@server
6109 # @grp+grp/user@server
6110 if($sshlogin_string =~ s:^\@([^/]+)/?::) {
6111 # Look for SSHLogin hostgroups
6112 %hostgroups = map { $_ => 1 } split(/\+/, $1);
6114 # An SSHLogin is always in the hostgroup of its "numcpu/host"
6115 $hostgroups{$sshlogin_string} = 1;
6116 if ($sshlogin_string =~ s:^(\d+)/::) {
6117 # Override default autodetected ncpus unless missing
6118 $ncpus = $1;
6120 my $string = $sshlogin_string;
6121 # An SSHLogin is always in the hostgroup of its $string-name
6122 $hostgroups{$string} = 1;
6123 @Global::hostgroups{keys %hostgroups} = values %hostgroups;
6124 my @unget = ();
6125 my $no_slash_string = $string;
6126 $no_slash_string =~ s/[^-a-z0-9:]/_/gi;
6127 return bless {
6128 'string' => $string,
6129 'jobs_running' => 0,
6130 'jobs_completed' => 0,
6131 'maxlength' => undef,
6132 'max_jobs_running' => undef,
6133 'orig_max_jobs_running' => undef,
6134 'ncpus' => $ncpus,
6135 'hostgroups' => \%hostgroups,
6136 'sshcommand' => undef,
6137 'serverlogin' => undef,
6138 'control_path_dir' => undef,
6139 'control_path' => undef,
6140 'time_to_login' => undef,
6141 'last_login_at' => undef,
6142 'loadavg_file' => $Global::cache_dir . "/tmp/sshlogin/" .
6143 $no_slash_string . "/loadavg",
6144 'loadavg' => undef,
6145 'last_loadavg_update' => 0,
6146 'swap_activity_file' => $Global::cache_dir . "/tmp/sshlogin/" .
6147 $no_slash_string . "/swap_activity",
6148 'swap_activity' => undef,
6149 }, ref($class) || $class;
6152 sub DESTROY($) {
6153 my $self = shift;
6154 # Remove temporary files if they are created.
6155 ::rm($self->{'loadavg_file'});
6156 ::rm($self->{'swap_activity_file'});
6159 sub string($) {
6160 my $self = shift;
6161 return $self->{'string'};
6164 sub jobs_running($) {
6165 my $self = shift;
6166 return ($self->{'jobs_running'} || "0");
6169 sub inc_jobs_running($) {
6170 my $self = shift;
6171 $self->{'jobs_running'}++;
6174 sub dec_jobs_running($) {
6175 my $self = shift;
6176 $self->{'jobs_running'}--;
6179 sub set_maxlength($$) {
6180 my $self = shift;
6181 $self->{'maxlength'} = shift;
6184 sub maxlength($) {
6185 my $self = shift;
6186 return $self->{'maxlength'};
6189 sub jobs_completed() {
6190 my $self = shift;
6191 return $self->{'jobs_completed'};
6194 sub in_hostgroups() {
6195 # Input:
6196 # @hostgroups = the hostgroups to look for
6197 # Returns:
6198 # true if intersection of @hostgroups and the hostgroups of this
6199 # SSHLogin is non-empty
6200 my $self = shift;
6201 return grep { defined $self->{'hostgroups'}{$_} } @_;
6204 sub hostgroups() {
6205 my $self = shift;
6206 return keys %{$self->{'hostgroups'}};
6209 sub inc_jobs_completed($) {
6210 my $self = shift;
6211 $self->{'jobs_completed'}++;
6212 $Global::total_completed++;
6215 sub set_max_jobs_running($$) {
6216 my $self = shift;
6217 if(defined $self->{'max_jobs_running'}) {
6218 $Global::max_jobs_running -= $self->{'max_jobs_running'};
6220 $self->{'max_jobs_running'} = shift;
6221 if(defined $self->{'max_jobs_running'}) {
6222 # max_jobs_running could be resat if -j is a changed file
6223 $Global::max_jobs_running += $self->{'max_jobs_running'};
6225 # Initialize orig to the first non-zero value that comes around
6226 $self->{'orig_max_jobs_running'} ||= $self->{'max_jobs_running'};
6229 sub memfree() {
6230 # Returns:
6231 # $memfree in bytes
6232 my $self = shift;
6233 $self->memfree_recompute();
6234 # Return 1 if not defined.
6235 return (not defined $self->{'memfree'} or $self->{'memfree'})
6238 sub memfree_recompute() {
6239 my $self = shift;
6240 my $script = memfreescript();
6242 # TODO add sshlogin and backgrounding
6243 # Run the script twice if it gives 0 (typically intermittent error)
6244 $self->{'memfree'} = ::qqx($script) || ::qqx($script);
6245 if(not $self->{'memfree'}) {
6246 ::die_bug("Less than 1 byte memory free");
6248 #::debug("mem","New free:",$self->{'memfree'}," ");
6252 my $script;
6254 sub memfreescript() {
6255 # Returns:
6256 # shellscript for giving available memory in bytes
6257 if(not $script) {
6258 my %script_of = (
6259 # /proc/meminfo
6260 # MemFree: 7012 kB
6261 # Buffers: 19876 kB
6262 # Cached: 431192 kB
6263 # SwapCached: 0 kB
6264 "linux" => (
6266 print 1024 * qx{
6267 awk '/^((Swap)?Cached|MemFree|Buffers):/
6268 { sum += \$2} END { print sum }'
6269 /proc/meminfo }
6271 # Android uses same code as GNU/Linux
6272 "android" => (
6274 print 1024 * qx{
6275 awk '/^((Swap)?Cached|MemFree|Buffers):/
6276 { sum += \$2} END { print sum }'
6277 /proc/meminfo }
6279 # $ vmstat 1 1
6280 # procs memory page faults cpu
6281 # r b w avm free re at pi po fr de sr in sy cs us sy id
6282 # 1 0 0 242793 389737 5 1 0 0 0 0 0 107 978 60 1 1 99
6283 "hpux" => (
6285 print (((reverse `vmstat 1 1`)[0]
6286 =~ /(?:\d+\D+){4}(\d+)/)[0]*1024)
6288 # $ vmstat 1 2
6289 # kthr memory page disk faults cpu
6290 # r b w swap free re mf pi po fr de sr s3 s4 -- -- in sy cs us sy id
6291 # 0 0 0 6496720 5170320 68 260 8 2 1 0 0 -0 3 0 0 309 1371 255 1 2 97
6292 # 0 0 0 6434088 5072656 7 15 8 0 0 0 0 0 261 0 0 1889 1899 3222 0 8 92
6294 # The second free value is correct
6295 "solaris" => (
6297 print (((reverse `vmstat 1 2`)[0]
6298 =~ /(?:\d+\D+){4}(\d+)/)[0]*1024)
6300 # hw.pagesize: 4096
6301 # vm.stats.vm.v_cache_count: 0
6302 # vm.stats.vm.v_inactive_count: 79574
6303 # vm.stats.vm.v_free_count: 4507
6304 "freebsd" => (
6306 for(qx{/sbin/sysctl -a}) {
6307 if (/^([^:]+):\s+(.+)\s*$/s) {
6308 $sysctl->{$1} = $2;
6311 print $sysctl->{"hw.pagesize"} *
6312 ($sysctl->{"vm.stats.vm.v_cache_count"}
6313 + $sysctl->{"vm.stats.vm.v_inactive_count"}
6314 + $sysctl->{"vm.stats.vm.v_free_count"});
6316 # Mach Virtual Memory Statistics: (page size of 4096 bytes)
6317 # Pages free: 198061.
6318 # Pages active: 159701.
6319 # Pages inactive: 47378.
6320 # Pages speculative: 29707.
6321 # Pages wired down: 89231.
6322 # "Translation faults": 928901425.
6323 # Pages copy-on-write: 156988239.
6324 # Pages zero filled: 271267894.
6325 # Pages reactivated: 48895.
6326 # Pageins: 1798068.
6327 # Pageouts: 257.
6328 # Object cache: 6603 hits of 1713223 lookups (0% hit rate)
6329 'darwin' => (
6331 $vm = `vm_stat`;
6332 print (($vm =~ /page size of (\d+)/)[0] *
6333 (($vm =~ /Pages free:\s+(\d+)/)[0] +
6334 ($vm =~ /Pages inactive:\s+(\d+)/)[0]));
6337 my $perlscript = "";
6338 # Make a perl script that detects the OS ($^O) and runs
6339 # the appropriate command
6340 for my $os (keys %script_of) {
6341 $perlscript .= 'if($^O eq "'.$os.'") { '.$script_of{$os}.'}';
6343 $script = "perl -e " . ::Q(::spacefree(1,$perlscript));
6345 return $script;
6349 sub limit($) {
6350 # Returns:
6351 # 0 = Below limit. Start another job.
6352 # 1 = Over limit. Start no jobs.
6353 # 2 = Kill youngest job
6354 my $self = shift;
6356 if(not defined $self->{'limitscript'}) {
6357 my %limitscripts =
6358 ("io" => q!
6359 io() {
6360 limit=$1;
6361 io_file=$2;
6362 # Do the measurement in the background
6363 (tmp=$(tempfile);
6364 LANG=C iostat -x 1 2 > $tmp;
6365 mv $tmp $io_file) &
6366 perl -e '-e $ARGV[0] or exit(1);
6367 for(reverse <>) {
6368 /Device/ and last;
6369 /(\S+)$/ and $max = $max > $1 ? $max : $1; }
6370 exit ($max < '$limit')' $io_file;
6372 export -f io;
6373 io %s %s
6375 "mem" => q!
6376 mem() {
6377 limit=$1;
6378 awk '/^((Swap)?Cached|MemFree|Buffers):/{ sum += $2}
6379 END {
6380 if (sum*1024 < '$limit'/2) { exit 2; }
6381 else { exit (sum*1024 < '$limit') }
6382 }' /proc/meminfo;
6384 export -f mem;
6385 mem %s;
6387 "load" => q!
6388 load() {
6389 limit=$1;
6390 ps ax -o state,command |
6391 grep -E '^[DOR].[^[]' |
6392 wc -l |
6393 perl -ne 'exit ('$limit' < $_)';
6395 export -f load;
6396 load %s;
6399 my ($cmd,@args) = split /\s+/,$opt::limit;
6400 if($limitscripts{$cmd}) {
6401 my $tmpfile = ::tmpname("parlmt");
6402 ++$Global::unlink{$tmpfile};
6403 $self->{'limitscript'} =
6404 ::spacefree(1, sprintf($limitscripts{$cmd},
6405 ::multiply_binary_prefix(@args),$tmpfile));
6406 } else {
6407 $self->{'limitscript'} = $opt::limit;
6411 my %env = %ENV;
6412 local %ENV = %env;
6413 $ENV{'SSHLOGIN'} = $self->string();
6414 system($Global::shell,"-c",$self->{'limitscript'});
6415 ::debug("limit","limit `".$self->{'limitscript'}."` result ".($?>>8)."\n");
6416 return $?>>8;
6420 sub swapping($) {
6421 my $self = shift;
6422 my $swapping = $self->swap_activity();
6423 return (not defined $swapping or $swapping)
6426 sub swap_activity($) {
6427 # If the currently known swap activity is too old:
6428 # Recompute a new one in the background
6429 # Returns:
6430 # last swap activity computed
6431 my $self = shift;
6432 # Should we update the swap_activity file?
6433 my $update_swap_activity_file = 0;
6434 if(-r $self->{'swap_activity_file'}) {
6435 open(my $swap_fh, "<", $self->{'swap_activity_file'}) ||
6436 ::die_bug("swap_activity_file-r");
6437 my $swap_out = <$swap_fh>;
6438 close $swap_fh;
6439 if($swap_out =~ /^(\d+)$/) {
6440 $self->{'swap_activity'} = $1;
6441 ::debug("swap", "New swap_activity: ", $self->{'swap_activity'});
6443 ::debug("swap", "Last update: ", $self->{'last_swap_activity_update'});
6444 if(time - $self->{'last_swap_activity_update'} > 10) {
6445 # last swap activity update was started 10 seconds ago
6446 ::debug("swap", "Older than 10 sec: ", $self->{'swap_activity_file'});
6447 $update_swap_activity_file = 1;
6449 } else {
6450 ::debug("swap", "No swap_activity file: ", $self->{'swap_activity_file'});
6451 $self->{'swap_activity'} = undef;
6452 $update_swap_activity_file = 1;
6454 if($update_swap_activity_file) {
6455 ::debug("swap", "Updating swap_activity file ", $self->{'swap_activity_file'});
6456 $self->{'last_swap_activity_update'} = time;
6457 my $dir = ::dirname($self->{'swap_activity_file'});
6458 -d $dir or eval { File::Path::mkpath($dir); };
6459 my $swap_activity;
6460 $swap_activity = swapactivityscript();
6461 if($self->{'string'} ne ":") {
6462 $swap_activity = $self->sshcommand() . " " . $self->serverlogin() . " " .
6463 ::Q($swap_activity);
6465 # Run swap_activity measuring.
6466 # As the command can take long to run if run remote
6467 # save it to a tmp file before moving it to the correct file
6468 my $file = $self->{'swap_activity_file'};
6469 my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".swp");
6470 ::debug("swap", "\n", $swap_activity, "\n");
6471 ::qqx("($swap_activity > $tmpfile && mv $tmpfile $file || rm $tmpfile &)");
6473 return $self->{'swap_activity'};
6477 my $script;
6479 sub swapactivityscript() {
6480 # Returns:
6481 # shellscript for detecting swap activity
6483 # arguments for vmstat are OS dependant
6484 # swap_in and swap_out are in different columns depending on OS
6486 if(not $script) {
6487 my %vmstat = (
6488 # linux: $7*$8
6489 # $ vmstat 1 2
6490 # procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
6491 # r b swpd free buff cache si so bi bo in cs us sy id wa
6492 # 5 0 51208 1701096 198012 18857888 0 0 37 153 28 19 56 11 33 1
6493 # 3 0 51208 1701288 198012 18857972 0 0 0 0 3638 10412 15 3 82 0
6494 'linux' => ['vmstat 1 2 | tail -n1', '$7*$8'],
6496 # solaris: $6*$7
6497 # $ vmstat -S 1 2
6498 # kthr memory page disk faults cpu
6499 # r b w swap free si so pi po fr de sr s3 s4 -- -- in sy cs us sy id
6500 # 0 0 0 4628952 3208408 0 0 3 1 1 0 0 -0 2 0 0 263 613 246 1 2 97
6501 # 0 0 0 4552504 3166360 0 0 0 0 0 0 0 0 0 0 0 246 213 240 1 1 98
6502 'solaris' => ['vmstat -S 1 2 | tail -1', '$6*$7'],
6504 # darwin (macosx): $21*$22
6505 # $ vm_stat -c 2 1
6506 # Mach Virtual Memory Statistics: (page size of 4096 bytes)
6507 # free active specul inactive throttle wired prgable faults copy 0fill reactive purged file-backed anonymous cmprssed cmprssor dcomprs comprs pageins pageout swapins swapouts
6508 # 346306 829050 74871 606027 0 240231 90367 544858K 62343596 270837K 14178 415070 570102 939846 356 370 116 922 4019813 4 0 0
6509 # 345740 830383 74875 606031 0 239234 90369 2696 359 553 0 0 570110 941179 356 370 0 0 0 0 0 0
6510 'darwin' => ['vm_stat -c 2 1 | tail -n1', '$21*$22'],
6512 # ultrix: $12*$13
6513 # $ vmstat -S 1 2
6514 # procs faults cpu memory page disk
6515 # r b w in sy cs us sy id avm fre si so pi po fr de sr s0
6516 # 1 0 0 4 23 2 3 0 97 7743 217k 0 0 0 0 0 0 0 0
6517 # 1 0 0 6 40 8 0 1 99 7743 217k 0 0 3 0 0 0 0 0
6518 'ultrix' => ['vmstat -S 1 2 | tail -1', '$12*$13'],
6520 # aix: $6*$7
6521 # $ vmstat 1 2
6522 # System configuration: lcpu=1 mem=2048MB
6524 # kthr memory page faults cpu
6525 # ----- ----------- ------------------------ ------------ -----------
6526 # r b avm fre re pi po fr sr cy in sy cs us sy id wa
6527 # 0 0 333933 241803 0 0 0 0 0 0 10 143 90 0 0 99 0
6528 # 0 0 334125 241569 0 0 0 0 0 0 37 5368 184 0 9 86 5
6529 'aix' => ['vmstat 1 2 | tail -n1', '$6*$7'],
6531 # freebsd: $8*$9
6532 # $ vmstat -H 1 2
6533 # procs memory page disks faults cpu
6534 # r b w avm fre flt re pi po fr sr ad0 ad1 in sy cs us sy id
6535 # 1 0 0 596716 19560 32 0 0 0 33 8 0 0 11 220 277 0 0 99
6536 # 0 0 0 596716 19560 2 0 0 0 0 0 0 0 11 144 263 0 1 99
6537 'freebsd' => ['vmstat -H 1 2 | tail -n1', '$8*$9'],
6539 # mirbsd: $8*$9
6540 # $ vmstat 1 2
6541 # procs memory page disks traps cpu
6542 # r b w avm fre flt re pi po fr sr wd0 cd0 int sys cs us sy id
6543 # 0 0 0 25776 164968 34 0 0 0 0 0 0 0 230 259 38 4 0 96
6544 # 0 0 0 25776 164968 24 0 0 0 0 0 0 0 237 275 37 0 0 100
6545 'mirbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'],
6547 # netbsd: $7*$8
6548 # $ vmstat 1 2
6549 # procs memory page disks faults cpu
6550 # r b avm fre flt re pi po fr sr w0 w1 in sy cs us sy id
6551 # 0 0 138452 6012 54 0 0 0 1 2 3 0 4 100 23 0 0 100
6552 # 0 0 138456 6008 1 0 0 0 0 0 0 0 7 26 19 0 0 100
6553 'netbsd' => ['vmstat 1 2 | tail -n1', '$7*$8'],
6555 # openbsd: $8*$9
6556 # $ vmstat 1 2
6557 # procs memory page disks traps cpu
6558 # r b w avm fre flt re pi po fr sr wd0 wd1 int sys cs us sy id
6559 # 0 0 0 76596 109944 73 0 0 0 0 0 0 1 5 259 22 0 1 99
6560 # 0 0 0 76604 109936 24 0 0 0 0 0 0 0 7 114 20 0 1 99
6561 'openbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'],
6563 # hpux: $8*$9
6564 # $ vmstat 1 2
6565 # procs memory page faults cpu
6566 # r b w avm free re at pi po fr de sr in sy cs us sy id
6567 # 1 0 0 247211 216476 4 1 0 0 0 0 0 102 73005 54 6 11 83
6568 # 1 0 0 247211 216421 43 9 0 0 0 0 0 144 1675 96 25269512791222387000 25269512791222387000 105
6569 'hpux' => ['vmstat 1 2 | tail -n1', '$8*$9'],
6571 # dec_osf (tru64): $11*$12
6572 # $ vmstat 1 2
6573 # Virtual Memory Statistics: (pagesize = 8192)
6574 # procs memory pages intr cpu
6575 # r w u act free wire fault cow zero react pin pout in sy cs us sy id
6576 # 3 181 36 51K 1895 8696 348M 59M 122M 259 79M 0 5 218 302 4 1 94
6577 # 3 181 36 51K 1893 8696 3 15 21 0 28 0 4 81 321 1 1 98
6578 'dec_osf' => ['vmstat 1 2 | tail -n1', '$11*$12'],
6580 # gnu (hurd): $7*$8
6581 # $ vmstat -k 1 2
6582 # (pagesize: 4, size: 512288, swap size: 894972)
6583 # free actv inact wired zeroed react pgins pgouts pfaults cowpfs hrat caobj cache swfree
6584 # 371940 30844 89228 20276 298348 0 48192 19016 756105 99808 98% 876 20628 894972
6585 # 371940 30844 89228 20276 +0 +0 +0 +0 +42 +2 98% 876 20628 894972
6586 'gnu' => ['vmstat -k 1 2 | tail -n1', '$7*$8'],
6588 # -nto (qnx has no swap)
6589 #-irix
6590 #-svr5 (scosysv)
6592 my $perlscript = "";
6593 # Make a perl script that detects the OS ($^O) and runs
6594 # the appropriate vmstat command
6595 for my $os (keys %vmstat) {
6596 $vmstat{$os}[1] =~ s/\$/\\\\\\\$/g; # $ => \\\$
6597 $perlscript .= 'if($^O eq "'.$os.'") { print `'.$vmstat{$os}[0].' | awk "{print ' .
6598 $vmstat{$os}[1] . '}"` }';
6600 $script = "perl -e " . ::Q($perlscript);
6602 return $script;
6606 sub too_fast_remote_login($) {
6607 my $self = shift;
6608 if($self->{'last_login_at'} and $self->{'time_to_login'}) {
6609 # sshd normally allows 10 simultaneous logins
6610 # A login takes time_to_login
6611 # So time_to_login/5 should be safe
6612 # If now <= last_login + time_to_login/5: Then it is too soon.
6613 my $too_fast = (::now() <= $self->{'last_login_at'}
6614 + $self->{'time_to_login'}/5);
6615 ::debug("run", "Too fast? $too_fast ");
6616 return $too_fast;
6617 } else {
6618 # No logins so far (or time_to_login not computed): it is not too fast
6619 return 0;
6623 sub last_login_at($) {
6624 my $self = shift;
6625 return $self->{'last_login_at'};
6628 sub set_last_login_at($$) {
6629 my $self = shift;
6630 $self->{'last_login_at'} = shift;
6633 sub loadavg_too_high($) {
6634 my $self = shift;
6635 my $loadavg = $self->loadavg();
6636 if(defined $loadavg) {
6637 ::debug("load", "Load $loadavg > ",$self->max_loadavg());
6638 return $loadavg >= $self->max_loadavg();
6639 } else {
6640 # Unknown load: Assume load is too high
6641 return 1;
6646 my $cmd;
6647 sub loadavg_cmd() {
6648 if(not $cmd) {
6649 # aix => "ps -ae -o state,command" # state wrong
6650 # bsd => "ps ax -o state,command"
6651 # sysv => "ps -ef -o s -o comm"
6652 # cygwin => perl -ne 'close STDERR; /Name/ and print"\n"; \
6653 # /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status |
6654 # awk '{print $2,$1}'
6655 # dec_osf => bsd
6656 # dragonfly => bsd
6657 # freebsd => bsd
6658 # gnu => bsd
6659 # hpux => ps -el|awk '{print $2,$14,$15}'
6660 # irix => ps -ef -o state -o comm
6661 # linux => bsd
6662 # minix => ps el|awk '{print \$1,\$11}'
6663 # mirbsd => bsd
6664 # netbsd => bsd
6665 # openbsd => bsd
6666 # solaris => sysv
6667 # svr5 => sysv
6668 # ultrix => ps -ax | awk '{print $3,$5}'
6669 # unixware => ps -el|awk '{print $2,$14,$15}'
6670 my $ps = ::spacefree(1,q{
6671 $sysv="ps -ef -o s -o comm";
6672 $sysv2="ps -ef -o state -o comm";
6673 $bsd="ps ax -o state,command";
6674 # Treat threads as processes
6675 $bsd2="ps axH -o state,command";
6676 $psel="ps -el|awk '{ print \$2,\$14,\$15 }'";
6677 $cygwin=q{ perl -ne 'close STDERR; /Name/ and print"\n";
6678 /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status |
6679 awk '{print $2,$1}' };
6680 $dummy="echo S COMMAND;echo R dummy";
6681 %ps=(
6682 # TODO Find better code for AIX/Android
6683 'aix' => "uptime",
6684 'android' => "uptime",
6685 'cygwin' => $cygwin,
6686 'darwin' => $bsd,
6687 'dec_osf' => $sysv2,
6688 'dragonfly' => $bsd,
6689 'freebsd' => $bsd2,
6690 'gnu' => $bsd,
6691 'hpux' => $psel,
6692 'irix' => $sysv2,
6693 'linux' => $bsd2,
6694 'minix' => "ps el|awk '{print \$1,\$11}'",
6695 'mirbsd' => $bsd,
6696 'msys' => $cygwin,
6697 'netbsd' => $bsd,
6698 'nto' => $dummy,
6699 'openbsd' => $bsd,
6700 'solaris' => $sysv,
6701 'svr5' => $psel,
6702 'ultrix' => "ps -ax | awk '{print \$3,\$5}'",
6703 'MSWin32' => $sysv,
6705 print `$ps{$^O}`;
6707 # The command is too long for csh, so base64_wrap the command
6708 $cmd = Job::base64_wrap($ps);
6710 return $cmd;
6715 sub loadavg($) {
6716 # If the currently know loadavg is too old:
6717 # Recompute a new one in the background
6718 # The load average is computed as the number of processes waiting for disk
6719 # or CPU right now. So it is the server load this instant and not averaged over
6720 # several minutes. This is needed so GNU Parallel will at most start one job
6721 # that will push the load over the limit.
6723 # Returns:
6724 # $last_loadavg = last load average computed (undef if none)
6725 my $self = shift;
6726 # Should we update the loadavg file?
6727 my $update_loadavg_file = 0;
6728 if(open(my $load_fh, "<", $self->{'loadavg_file'})) {
6729 local $/; # $/ = undef => slurp whole file
6730 my $load_out = <$load_fh>;
6731 close $load_fh;
6732 if($load_out =~ /\S/) {
6733 # Content can be empty if ~/ is on NFS
6734 # due to reading being non-atomic.
6736 # Count lines starting with D,O,R but command does not start with [
6737 my $load =()= ($load_out=~/(^\s?[DOR]\S* +(?=[^\[])\S)/gm);
6738 if($load > 0) {
6739 # load is overestimated by 1
6740 $self->{'loadavg'} = $load - 1;
6741 ::debug("load", "New loadavg: ", $self->{'loadavg'},"\n");
6742 } elsif ($load_out=~/average: (\d+.\d+)/) {
6743 # AIX does not support instant load average
6744 # 04:11AM up 21 days, 12:55, 1 user, load average: 1.85, 1.57, 1.55
6745 $self->{'loadavg'} = $1;
6746 } else {
6747 ::die_bug("loadavg_invalid_content: " .
6748 $self->{'loadavg_file'} . "\n$load_out");
6751 $update_loadavg_file = 1;
6752 } else {
6753 ::debug("load", "No loadavg file: ", $self->{'loadavg_file'});
6754 $self->{'loadavg'} = undef;
6755 $update_loadavg_file = 1;
6757 if($update_loadavg_file) {
6758 ::debug("load", "Updating loadavg file", $self->{'loadavg_file'}, "\n");
6759 $self->{'last_loadavg_update'} = time;
6760 my $dir = ::dirname($self->{'swap_activity_file'});
6761 -d $dir or eval { File::Path::mkpath($dir); };
6762 -w $dir or ::die_bug("Cannot write to $dir");
6763 my $cmd = "";
6764 if($self->{'string'} ne ":") {
6765 $cmd = $self->sshcommand() . " " . $self->serverlogin() . " " .
6766 ::Q(loadavg_cmd());
6767 } else {
6768 $cmd .= loadavg_cmd();
6770 # As the command can take long to run if run remote
6771 # save it to a tmp file before moving it to the correct file
6772 ::debug("load", "Update load\n");
6773 my $file = $self->{'loadavg_file'};
6774 # tmpfile on same filesystem as $file
6775 my $tmpfile = $file.$$;
6776 ::qqx("($cmd > $tmpfile && mv $tmpfile $file || rm $tmpfile & )");
6778 return $self->{'loadavg'};
6781 sub max_loadavg($) {
6782 my $self = shift;
6783 # If --load is a file it might be changed
6784 if($Global::max_load_file) {
6785 my $mtime = (stat($Global::max_load_file))[9];
6786 if($mtime > $Global::max_load_file_last_mod) {
6787 $Global::max_load_file_last_mod = $mtime;
6788 for my $sshlogin (values %Global::host) {
6789 $sshlogin->set_max_loadavg(undef);
6793 if(not defined $self->{'max_loadavg'}) {
6794 $self->{'max_loadavg'} =
6795 $self->compute_max_loadavg($opt::load);
6797 ::debug("load", "max_loadavg: ", $self->string(), " ", $self->{'max_loadavg'});
6798 return $self->{'max_loadavg'};
6801 sub set_max_loadavg($$) {
6802 my $self = shift;
6803 $self->{'max_loadavg'} = shift;
6806 sub compute_max_loadavg($) {
6807 # Parse the max loadaverage that the user asked for using --load
6808 # Returns:
6809 # max loadaverage
6810 my $self = shift;
6811 my $loadspec = shift;
6812 my $load;
6813 if(defined $loadspec) {
6814 if($loadspec =~ /^\+(\d+)$/) {
6815 # E.g. --load +2
6816 my $j = $1;
6817 $load =
6818 $self->ncpus() + $j;
6819 } elsif ($loadspec =~ /^-(\d+)$/) {
6820 # E.g. --load -2
6821 my $j = $1;
6822 $load =
6823 $self->ncpus() - $j;
6824 } elsif ($loadspec =~ /^(\d+)\%$/) {
6825 my $j = $1;
6826 $load =
6827 $self->ncpus() * $j / 100;
6828 } elsif ($loadspec =~ /^(\d+(\.\d+)?)$/) {
6829 $load = $1;
6830 } elsif (-f $loadspec) {
6831 $Global::max_load_file = $loadspec;
6832 $Global::max_load_file_last_mod = (stat($Global::max_load_file))[9];
6833 if(open(my $in_fh, "<", $Global::max_load_file)) {
6834 my $opt_load_file = join("",<$in_fh>);
6835 close $in_fh;
6836 $load = $self->compute_max_loadavg($opt_load_file);
6837 } else {
6838 ::error("Cannot open $loadspec.");
6839 ::wait_and_exit(255);
6841 } else {
6842 ::error("Parsing of --load failed.");
6843 ::die_usage();
6845 if($load < 0.01) {
6846 $load = 0.01;
6849 return $load;
6852 sub time_to_login($) {
6853 my $self = shift;
6854 return $self->{'time_to_login'};
6857 sub set_time_to_login($$) {
6858 my $self = shift;
6859 $self->{'time_to_login'} = shift;
6862 sub max_jobs_running($) {
6863 my $self = shift;
6864 if(not defined $self->{'max_jobs_running'}) {
6865 my $nproc = $self->compute_number_of_processes($opt::jobs);
6866 $self->set_max_jobs_running($nproc);
6868 return $self->{'max_jobs_running'};
6871 sub orig_max_jobs_running($) {
6872 my $self = shift;
6873 return $self->{'orig_max_jobs_running'};
6876 sub compute_number_of_processes($) {
6877 # Number of processes wanted and limited by system resources
6878 # Returns:
6879 # Number of processes
6880 my $self = shift;
6881 my $opt_P = shift;
6882 my $wanted_processes = $self->user_requested_processes($opt_P);
6883 if(not defined $wanted_processes) {
6884 $wanted_processes = $Global::default_simultaneous_sshlogins;
6886 ::debug("load", "Wanted procs: $wanted_processes\n");
6887 my $system_limit =
6888 $self->processes_available_by_system_limit($wanted_processes);
6889 ::debug("load", "Limited to procs: $system_limit\n");
6890 return $system_limit;
6894 my @children;
6895 my $max_system_proc_reached;
6896 my $more_filehandles;
6897 my %fh;
6898 my $tmpfhname;
6899 my $count_jobs_already_read;
6900 my @jobs;
6901 my $job;
6902 my @args;
6903 my $arg;
6905 sub reserve_filehandles($) {
6906 # Reserves filehandle
6907 my $n = shift;
6908 for (1..$n) {
6909 $more_filehandles &&= open($fh{$tmpfhname++}, "<", "/dev/null");
6913 sub reserve_process() {
6914 # Spawn a dummy process
6915 my $child;
6916 if($child = fork()) {
6917 push @children, $child;
6918 $Global::unkilled_children{$child} = 1;
6919 } elsif(defined $child) {
6920 # This is the child
6921 # The child takes one process slot
6922 # It will be killed later
6923 $SIG{'TERM'} = $Global::original_sig{'TERM'};
6924 if($^O eq "cygwin" or $^O eq "msys" or $^O eq "nto") {
6925 # The exec does not work on Cygwin and QNX
6926 sleep 10101010;
6927 } else {
6928 # 'exec sleep' takes less RAM than sleeping in perl
6929 exec 'sleep', 10101;
6931 exit(0);
6932 } else {
6933 # Failed to spawn
6934 $max_system_proc_reached = 1;
6938 sub get_args_or_jobs() {
6939 # Get an arg or a job (depending on mode)
6940 if($Global::semaphore or ($opt::pipe and not $opt::tee)) {
6941 # Skip: No need to get args
6942 return 1;
6943 } elsif(defined $opt::retries and $count_jobs_already_read) {
6944 # For retries we may need to run all jobs on this sshlogin
6945 # so include the already read jobs for this sshlogin
6946 $count_jobs_already_read--;
6947 return 1;
6948 } else {
6949 if($opt::X or $opt::m) {
6950 # The arguments may have to be re-spread over several jobslots
6951 # So pessimistically only read one arg per jobslot
6952 # instead of a full commandline
6953 if($Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->empty()) {
6954 if($Global::JobQueue->empty()) {
6955 return 0;
6956 } else {
6957 $job = $Global::JobQueue->get();
6958 push(@jobs, $job);
6959 return 1;
6961 } else {
6962 $arg = $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get();
6963 push(@args, $arg);
6964 return 1;
6966 } else {
6967 # If there are no more command lines, then we have a process
6968 # per command line, so no need to go further
6969 if($Global::JobQueue->empty()) {
6970 return 0;
6971 } else {
6972 $job = $Global::JobQueue->get();
6973 # Replacement must happen here due to seq()
6974 $job and $job->replaced();
6975 push(@jobs, $job);
6976 return 1;
6982 sub cleanup() {
6983 # Cleanup: Close the files
6984 for (values %fh) { close $_ }
6985 # Cleanup: Kill the children
6986 for my $pid (@children) {
6987 kill 9, $pid;
6988 waitpid($pid,0);
6989 delete $Global::unkilled_children{$pid};
6991 # Cleanup: Unget the command_lines or the @args
6992 $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->unget(@args);
6993 @args = ();
6994 $Global::JobQueue->unget(@jobs);
6995 @jobs = ();
6998 sub processes_available_by_system_limit($) {
6999 # If the wanted number of processes is bigger than the system limits:
7000 # Limit them to the system limits
7001 # Limits are: File handles, number of input lines, processes,
7002 # and taking > 1 second to spawn 10 extra processes
7003 # Returns:
7004 # Number of processes
7005 my $self = shift;
7006 my $wanted_processes = shift;
7007 my $system_limit = 0;
7008 my $slow_spawning_warning_printed = 0;
7009 my $time = time;
7010 $more_filehandles = 1;
7011 $tmpfhname = "TmpFhNamE";
7013 # perl uses 7 filehandles for something?
7014 # parallel uses 1 for memory_usage
7015 # parallel uses 4 for ?
7016 reserve_filehandles(12);
7017 # Two processes for load avg and ?
7018 reserve_process();
7019 reserve_process();
7021 # For --retries count also jobs already run
7022 $count_jobs_already_read = $Global::JobQueue->next_seq();
7023 my $wait_time_for_getting_args = 0;
7024 my $start_time = time;
7025 while(1) {
7026 $system_limit >= $wanted_processes and last;
7027 not $more_filehandles and last;
7028 $max_system_proc_reached and last;
7030 my $before_getting_arg = time;
7031 if(!$Global::dummy_jobs) {
7032 get_args_or_jobs() or last;
7034 $wait_time_for_getting_args += time - $before_getting_arg;
7035 $system_limit++;
7037 # Every simultaneous process uses 2 filehandles to write to
7038 # and 2 filehandles to read from
7039 reserve_filehandles(4);
7041 # System process limit
7042 reserve_process();
7044 my $forktime = time - $time - $wait_time_for_getting_args;
7045 ::debug("run", "Time to fork $system_limit procs: $wait_time_for_getting_args ",
7046 $forktime,
7047 " (processes so far: ", $system_limit,")\n");
7048 if($system_limit > 10 and
7049 $forktime > 1 and
7050 $forktime > $system_limit * 0.01
7051 and not $slow_spawning_warning_printed) {
7052 # It took more than 0.01 second to fork a processes on avg.
7053 # Give the user a warning. He can press Ctrl-C if this
7054 # sucks.
7055 ::warning("Starting $system_limit processes took > $forktime sec.",
7056 "Consider adjusting -j. Press CTRL-C to stop.");
7057 $slow_spawning_warning_printed = 1;
7060 cleanup();
7062 if($system_limit < $wanted_processes) {
7063 # The system_limit is less than the wanted_processes
7064 if($system_limit < 1 and not $Global::JobQueue->empty()) {
7065 ::warning("Cannot spawn any jobs. ".
7066 "Raising ulimit -u or 'nproc' in /etc/security/limits.conf",
7067 "or /proc/sys/kernel/pid_max may help.");
7068 ::wait_and_exit(255);
7070 if(not $more_filehandles) {
7071 ::warning("Only enough file handles to run ".
7072 $system_limit. " jobs in parallel.",
7073 "Running 'parallel -j0 -N $system_limit --pipe parallel -j0' or",
7074 "raising 'ulimit -n' or 'nofile' in /etc/security/limits.conf",
7075 "or /proc/sys/fs/file-max may help.");
7077 if($max_system_proc_reached) {
7078 ::warning("Only enough available processes to run ".
7079 $system_limit. " jobs in parallel.",
7080 "Raising ulimit -u or /etc/security/limits.conf ",
7081 "or /proc/sys/kernel/pid_max may help.");
7084 if($] == 5.008008 and $system_limit > 1000) {
7085 # https://savannah.gnu.org/bugs/?36942
7086 $system_limit = 1000;
7088 if($Global::JobQueue->empty()) {
7089 $system_limit ||= 1;
7091 if($self->string() ne ":" and
7092 $system_limit > $Global::default_simultaneous_sshlogins) {
7093 $system_limit =
7094 $self->simultaneous_sshlogin_limit($system_limit);
7096 return $system_limit;
7100 sub simultaneous_sshlogin_limit($) {
7101 # Test by logging in wanted number of times simultaneously
7102 # Returns:
7103 # min($wanted_processes,$working_simultaneous_ssh_logins-1)
7104 my $self = shift;
7105 my $wanted_processes = shift;
7106 if($self->{'time_to_login'}) {
7107 return $wanted_processes;
7110 # Try twice because it guesses wrong sometimes
7111 # Choose the minimal
7112 my $ssh_limit =
7113 ::min($self->simultaneous_sshlogin($wanted_processes),
7114 $self->simultaneous_sshlogin($wanted_processes));
7115 if($ssh_limit < $wanted_processes) {
7116 my $serverlogin = $self->serverlogin();
7117 ::warning("ssh to $serverlogin only allows ".
7118 "for $ssh_limit simultaneous logins.",
7119 "You may raise this by changing",
7120 "/etc/ssh/sshd_config:MaxStartups and MaxSessions on $serverlogin.",
7121 "You can also try --sshdelay 0.1",
7122 "Using only ".($ssh_limit-1)." connections ".
7123 "to avoid race conditions.");
7124 # Race condition can cause problem if using all sshs.
7125 if($ssh_limit > 1) { $ssh_limit -= 1; }
7127 return $ssh_limit;
7130 sub simultaneous_sshlogin($) {
7131 # Using $sshlogin try to see if we can do $wanted_processes
7132 # simultaneous logins
7133 # (ssh host echo simul-login & ssh host echo simul-login & ...) |
7134 # grep simul|wc -l
7135 # Input:
7136 # $wanted_processes = Try for this many logins in parallel
7137 # Returns:
7138 # $ssh_limit = Number of succesful parallel logins
7139 local $/ = "\n";
7140 my $self = shift;
7141 my $wanted_processes = shift;
7142 my $sshcmd = $self->sshcommand();
7143 my $serverlogin = $self->serverlogin();
7144 my $sshdelay = $opt::sshdelay ? "sleep $opt::sshdelay;" : "";
7145 # TODO sh -c wrapper to work for csh
7146 my $cmd = ("$sshdelay$sshcmd $serverlogin -- ".
7147 "echo simultaneouslogin </dev/null 2>&1 &")x$wanted_processes;
7148 ::debug("init", "Trying $wanted_processes logins at $serverlogin\n");
7149 open (my $simul_fh, "-|", "($cmd)|grep simultaneouslogin | wc -l") or
7150 ::die_bug("simultaneouslogin");
7151 my $ssh_limit = <$simul_fh>;
7152 close $simul_fh;
7153 chomp $ssh_limit;
7154 return $ssh_limit;
7157 sub set_ncpus($$) {
7158 my $self = shift;
7159 $self->{'ncpus'} = shift;
7162 sub user_requested_processes($) {
7163 # Parse the number of processes that the user asked for using -j
7164 # Input:
7165 # $opt_P = string formatted as for -P
7166 # Returns:
7167 # $processes = the number of processes to run on this sshlogin
7168 my $self = shift;
7169 my $opt_P = shift;
7170 my $processes;
7171 if(defined $opt_P) {
7172 if($opt_P =~ /^\+(\d+)$/) {
7173 # E.g. -P +2
7174 my $j = $1;
7175 $processes =
7176 $self->ncpus() + $j;
7177 } elsif ($opt_P =~ /^-(\d+)$/) {
7178 # E.g. -P -2
7179 my $j = $1;
7180 $processes =
7181 $self->ncpus() - $j;
7182 } elsif ($opt_P =~ /^(\d+(\.\d+)?)\%$/) {
7183 # E.g. -P 10.5%
7184 my $j = $1;
7185 $processes =
7186 $self->ncpus() * $j / 100;
7187 } elsif ($opt_P =~ /^(\d+)$/) {
7188 $processes = $1;
7189 if($processes == 0) {
7190 # -P 0 = infinity (or at least close)
7191 $processes = $Global::infinity;
7193 } elsif (-f $opt_P) {
7194 $Global::max_procs_file = $opt_P;
7195 if(open(my $in_fh, "<", $Global::max_procs_file)) {
7196 my $opt_P_file = join("",<$in_fh>);
7197 close $in_fh;
7198 $processes = $self->user_requested_processes($opt_P_file);
7199 } else {
7200 ::error("Cannot open $opt_P.");
7201 ::wait_and_exit(255);
7203 } else {
7204 ::error("Parsing of --jobs/-j/--max-procs/-P failed.");
7205 ::die_usage();
7207 $processes = ::ceil($processes);
7209 return $processes;
7212 sub ncpus($) {
7213 # Number of CPU threads
7214 # --use_sockets_instead_of_threads = count socket instead
7215 # --use_cores_instead_of_threads = count physical cores instead
7216 # Returns:
7217 # $ncpus = number of cpu (threads) on this sshlogin
7218 local $/ = "\n";
7219 my $self = shift;
7220 if(not defined $self->{'ncpus'}) {
7221 my $sshcmd = $self->sshcommand();
7222 my $serverlogin = $self->serverlogin();
7223 if($serverlogin eq ":") {
7224 if($opt::use_sockets_instead_of_threads) {
7225 $self->{'ncpus'} = socket_core_thread()->{'sockets'};
7226 } elsif($opt::use_cores_instead_of_threads) {
7227 $self->{'ncpus'} = socket_core_thread()->{'cores'};
7228 } else {
7229 $self->{'ncpus'} = socket_core_thread()->{'threads'};
7231 } else {
7232 my $ncpu;
7233 ::debug("init","echo|$sshcmd $serverlogin -- parallel --number-of-sockets");
7234 if($opt::use_sockets_instead_of_threads
7236 $opt::use_cpus_instead_of_cores) {
7237 $ncpu =
7238 ::qqx("echo|$sshcmd $serverlogin -- parallel --number-of-sockets");
7239 } elsif($opt::use_cores_instead_of_threads) {
7240 $ncpu =
7241 ::qqx("echo|$sshcmd $serverlogin -- parallel --number-of-cores");
7242 } else {
7243 $ncpu =
7244 ::qqx("echo|$sshcmd $serverlogin -- parallel --number-of-threads");
7246 chomp $ncpu;
7247 if($ncpu =~ /^\s*[0-9]+\s*$/s) {
7248 $self->{'ncpus'} = $ncpu;
7249 } else {
7250 ::warning("Could not figure out ".
7251 "number of cpus on $serverlogin ($ncpu). Using 1.");
7252 $self->{'ncpus'} = 1;
7256 return $self->{'ncpus'};
7260 sub nproc() {
7261 # Returns:
7262 # Number of threads using `nproc`
7263 my $no_of_threads = ::qqx("nproc");
7264 chomp $no_of_threads;
7265 return $no_of_threads;
7268 sub no_of_sockets() {
7269 return socket_core_thread()->{'sockets'};
7272 sub no_of_cores() {
7273 return socket_core_thread()->{'cores'};
7276 sub no_of_threads() {
7277 return socket_core_thread()->{'threads'};
7280 sub socket_core_thread() {
7281 # Returns:
7283 # 'sockets' => #sockets = number of socket with CPU present
7284 # 'cores' => #cores = number of physical cores
7285 # 'threads' => #threads = number of compute cores (hyperthreading)
7286 # 'active' => #taskset_threads = number of taskset limited cores
7288 my $cpu;
7289 my $cached_cpuspec = $Global::cache_dir . "/tmp/sshlogin/" .
7290 ::hostname() . "/cpuspec";
7291 if(-e $cached_cpuspec and -M $cached_cpuspec < 1) {
7292 # Reading cached copy instead of /proc/cpuinfo is 17 ms faster
7293 local $/ = "\n";
7294 if(open(my $in_fh, "<", $cached_cpuspec)) {
7295 ::debug("init","Read $cached_cpuspec\n");
7296 $cpu->{'sockets'} = int(<$in_fh>);
7297 $cpu->{'cores'} = int(<$in_fh>);
7298 $cpu->{'threads'} = int(<$in_fh>);
7299 close $in_fh;
7302 if ($^O eq 'linux') {
7303 $cpu = sct_gnu_linux($cpu);
7304 } elsif ($^O eq 'android') {
7305 $cpu = sct_android($cpu);
7306 } elsif ($^O eq 'freebsd') {
7307 $cpu = sct_freebsd($cpu);
7308 } elsif ($^O eq 'netbsd') {
7309 $cpu = sct_netbsd($cpu);
7310 } elsif ($^O eq 'openbsd') {
7311 $cpu = sct_openbsd($cpu);
7312 } elsif ($^O eq 'gnu') {
7313 $cpu = sct_hurd($cpu);
7314 } elsif ($^O eq 'darwin') {
7315 $cpu = sct_darwin($cpu);
7316 } elsif ($^O eq 'solaris') {
7317 $cpu = sct_solaris($cpu);
7318 } elsif ($^O eq 'aix') {
7319 $cpu = sct_aix($cpu);
7320 } elsif ($^O eq 'hpux') {
7321 $cpu = sct_hpux($cpu);
7322 } elsif ($^O eq 'nto') {
7323 $cpu = sct_qnx($cpu);
7324 } elsif ($^O eq 'svr5') {
7325 $cpu = sct_openserver($cpu);
7326 } elsif ($^O eq 'irix') {
7327 $cpu = sct_irix($cpu);
7328 } elsif ($^O eq 'dec_osf') {
7329 $cpu = sct_tru64($cpu);
7330 } else {
7331 # Try all methods until we find something that works
7332 $cpu = (sct_gnu_linux($cpu)
7333 || sct_android($cpu)
7334 || sct_freebsd($cpu)
7335 || sct_netbsd($cpu)
7336 || sct_openbsd($cpu)
7337 || sct_hurd($cpu)
7338 || sct_darwin($cpu)
7339 || sct_solaris($cpu)
7340 || sct_aix($cpu)
7341 || sct_hpux($cpu)
7342 || sct_qnx($cpu)
7343 || sct_openserver($cpu)
7344 || sct_irix($cpu)
7345 || sct_tru64($cpu)
7348 if(not grep { $_ > 0 } values %$cpu) {
7349 $cpu = undef;
7351 # Write cached copy instead of /proc/cpuinfo is 17 ms faster
7352 if($cpu and open(my $out_fh, ">", $cached_cpuspec)) {
7353 print $out_fh (map { chomp; "$_\n" }
7354 $cpu->{'sockets'},
7355 $cpu->{'cores'},
7356 $cpu->{'threads'});
7357 close $out_fh;
7359 if(not $cpu) {
7360 my $nproc = nproc();
7361 if($nproc) {
7362 $cpu->{'sockets'} =
7363 $cpu->{'cores'} =
7364 $cpu->{'threads'} =
7365 $cpu->{'active'} =
7366 $nproc;
7369 if(not $cpu) {
7370 ::warning("Cannot figure out number of cpus. Using 1.");
7371 $cpu->{'sockets'} =
7372 $cpu->{'cores'} =
7373 $cpu->{'threads'} =
7374 $cpu->{'active'} =
7377 $cpu->{'sockets'} ||= 1;
7378 $cpu->{'threads'} ||= $cpu->{'cores'};
7379 $cpu->{'active'} ||= $cpu->{'threads'};
7380 chomp($cpu->{'sockets'},
7381 $cpu->{'cores'},
7382 $cpu->{'threads'},
7383 $cpu->{'active'});
7384 # Choose minimum of active and actual
7385 my $mincpu;
7386 $mincpu->{'sockets'} = ::min($cpu->{'sockets'},$cpu->{'active'});
7387 $mincpu->{'cores'} = ::min($cpu->{'cores'},$cpu->{'active'});
7388 $mincpu->{'threads'} = ::min($cpu->{'threads'},$cpu->{'active'});
7389 return $mincpu;
7392 sub sct_gnu_linux($) {
7393 # Returns:
7394 # { 'sockets' => #sockets
7395 # 'cores' => #cores
7396 # 'threads' => #threads
7397 # 'active' => #taskset_threads }
7398 my $cpu = shift;
7400 sub read_topology($) {
7401 my $prefix = shift;
7402 my %sibiling;
7403 my %socket;
7404 my $thread;
7405 for($thread = 0;
7406 -r "$prefix/cpu$thread/topology/physical_package_id";
7407 $thread++) {
7408 open(my $fh,"<",
7409 "$prefix/cpu$thread/topology/physical_package_id")
7410 || die;
7411 $socket{<$fh>}++;
7412 close $fh;
7414 for($thread = 0;
7415 -r "$prefix/cpu$thread/topology/thread_siblings";
7416 $thread++) {
7417 open(my $fh,"<",
7418 "$prefix/cpu$thread/topology/thread_siblings")
7419 || die;
7420 $sibiling{<$fh>}++;
7421 close $fh;
7423 $cpu->{'sockets'} = keys %socket;
7424 $cpu->{'cores'} = keys %sibiling;
7425 $cpu->{'threads'} = $thread;
7428 sub read_cpuinfo(@) {
7429 my @cpuinfo = @_;
7430 $cpu->{'sockets'} = 0;
7431 $cpu->{'cores'} = 0;
7432 $cpu->{'threads'} = 0;
7433 my %seen;
7434 my %phy_seen;
7435 my $physicalid;
7436 for(@cpuinfo) {
7437 # physical id : 0
7438 if(/^physical id.*[:](.*)/) {
7439 $physicalid = $1;
7440 if(not $phy_seen{$1}++) {
7441 $cpu->{'sockets'}++;
7444 # core id : 3
7445 if(/^core id.*[:](.*)/ and not $seen{$physicalid,$1}++) {
7446 $cpu->{'cores'}++;
7448 # processor : 2
7449 /^processor.*[:]\s*\d/i and $cpu->{'threads'}++;
7451 $cpu->{'cores'} ||= $cpu->{'threads'};
7452 $cpu->{'cpus'} ||= $cpu->{'threads'};
7453 $cpu->{'sockets'} ||= 1;
7456 sub read_lscpu(@) {
7457 my @lscpu = @_;
7458 my $threads_per_core;
7459 my $cores_per_socket;
7460 for(@lscpu) {
7461 /^CPU.s.:\s*(\d+)/ and $cpu->{'threads'} = $1;
7462 /^Thread.s. per core:\s*(\d+)/ and $threads_per_core = $1;
7463 /^Core.s. per socket:\s*(\d+)/ and $cores_per_socket = $1;
7464 /^(CPU )?Socket.s.:\s*(\d+)/i and $cpu->{'sockets'} = $2;
7466 if($threads_per_core and $cpu->{'threads'}) {
7467 $cpu->{'cores'} = $cpu->{'threads'} / $threads_per_core;
7469 $cpu->{'cpus'} ||= $cpu->{'threads'};
7472 local $/ = "\n"; # If delimiter is set, then $/ will be wrong
7473 my @cpuinfo;
7474 my @lscpu;
7475 if($ENV{'PARALLEL_CPUINFO'}) {
7476 # Use CPUINFO from environment - used for testing only
7477 read_cpuinfo(split/(?<=\n)/,$ENV{'PARALLEL_CPUINFO'});
7478 } elsif($ENV{'PARALLEL_LSCPU'}) {
7479 # Use LSCPU from environment - used for testing only
7480 read_lscpu(split/(?<=\n)/,$ENV{'PARALLEL_LSCPU'});
7481 } elsif(-r "$ENV{'PARALLEL_CPUPREFIX'}/cpu0/topology/thread_siblings") {
7482 # Use CPUPREFIX from environment - used for testing only
7483 read_topology($ENV{'PARALLEL_CPUPREFIX'});
7484 } elsif($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'}) {
7485 # Skip /proc/cpuinfo - already set
7486 } else {
7487 # Not debugging: Look at this computer
7488 if(!($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'})
7490 open(my $in_fh, "-|", "lscpu")) {
7491 # Parse output from lscpu
7492 read_lscpu(<$in_fh>);
7493 close $in_fh;
7495 if(!($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'})
7497 -r "/sys/devices/system/cpu/cpu0/topology/thread_siblings") {
7498 read_topology("/sys/devices/system/cpu");
7500 if(!($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'})
7502 open(my $in_fh, "<", "/proc/cpuinfo")) {
7503 # Read /proc/cpuinfo
7504 read_cpuinfo(<$in_fh>);
7505 close $in_fh;
7508 if(-e "/proc/self/status" and not $ENV{'PARALLEL_CPUINFO'}) {
7509 # if 'taskset' is used to limit number of threads
7510 if(open(my $in_fh, "<", "/proc/self/status")) {
7511 while(<$in_fh>) {
7512 if(/^Cpus_allowed:\s*(\S+)/) {
7513 my $a = $1;
7514 $a =~ tr/,//d;
7515 $cpu->{'active'} = unpack ("%32b*", pack ("H*",$a));
7518 close $in_fh;
7521 return $cpu;
7524 sub sct_android($) {
7525 # Returns:
7526 # { 'sockets' => #sockets
7527 # 'cores' => #cores
7528 # 'threads' => #threads
7529 # 'active' => #taskset_threads }
7530 # Use GNU/Linux
7531 return sct_gnu_linux($_[0]);
7534 sub sct_freebsd($) {
7535 # Returns:
7536 # { 'sockets' => #sockets
7537 # 'cores' => #cores
7538 # 'threads' => #threads
7539 # 'active' => #taskset_threads }
7540 local $/ = "\n";
7541 my $cpu = shift;
7542 $cpu->{'cores'} ||=
7543 (::qqx(qq{ sysctl -a dev.cpu | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' })
7545 ::qqx(qq{ sysctl hw.ncpu | awk '{ print \$2 }' }));
7546 $cpu->{'threads'} ||=
7547 (::qqx(qq{ sysctl hw.ncpu | awk '{ print \$2 }' })
7549 ::qqx(qq{ sysctl -a dev.cpu | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' }));
7550 return $cpu;
7553 sub sct_netbsd($) {
7554 # Returns:
7555 # { 'sockets' => #sockets
7556 # 'cores' => #cores
7557 # 'threads' => #threads
7558 # 'active' => #taskset_threads }
7559 local $/ = "\n";
7560 my $cpu = shift;
7561 $cpu->{'cores'} ||= ::qqx("sysctl -n hw.ncpu");
7562 return $cpu;
7565 sub sct_openbsd($) {
7566 # Returns:
7567 # { 'sockets' => #sockets
7568 # 'cores' => #cores
7569 # 'threads' => #threads
7570 # 'active' => #taskset_threads }
7571 local $/ = "\n";
7572 my $cpu = shift;
7573 $cpu->{'cores'} ||= ::qqx('sysctl -n hw.ncpu');
7574 return $cpu;
7577 sub sct_hurd($) {
7578 # Returns:
7579 # { 'sockets' => #sockets
7580 # 'cores' => #cores
7581 # 'threads' => #threads
7582 # 'active' => #taskset_threads }
7583 local $/ = "\n";
7584 my $cpu = shift;
7585 $cpu->{'cores'} ||= ::qqx("nproc");
7586 return $cpu;
7589 sub sct_darwin($) {
7590 # Returns:
7591 # { 'sockets' => #sockets
7592 # 'cores' => #cores
7593 # 'threads' => #threads
7594 # 'active' => #taskset_threads }
7595 local $/ = "\n";
7596 my $cpu = shift;
7597 $cpu->{'cores'} ||=
7598 (::qqx('sysctl -n hw.physicalcpu')
7600 ::qqx(qq{ sysctl -a hw | grep [^a-z]physicalcpu[^a-z] | awk '{ print \$2 }' }));
7601 $cpu->{'threads'} ||=
7602 (::qqx('sysctl -n hw.logicalcpu')
7604 ::qqx(qq{ sysctl -a hw | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }' }));
7605 return $cpu;
7608 sub sct_solaris($) {
7609 # Returns:
7610 # { 'sockets' => #sockets
7611 # 'cores' => #cores
7612 # 'threads' => #threads
7613 # 'active' => #taskset_threads }
7614 local $/ = "\n";
7615 my $cpu = shift;
7616 if(not $cpu->{'cores'}) {
7617 if(-x "/usr/bin/kstat") {
7618 my @chip_id = ::qqx("/usr/bin/kstat cpu_info|grep chip_id");
7619 if($#chip_id >= 0) {
7620 $cpu->{'sockets'} ||= $#chip_id +1;
7622 my @core_id = ::qqx("/usr/bin/kstat -m cpu_info|grep -w core_id|uniq");
7623 if($#core_id >= 0) {
7624 $cpu->{'cores'} ||= $#core_id +1;
7627 if(-x "/usr/sbin/psrinfo") {
7628 my @psrinfo = ::qqx("/usr/sbin/psrinfo -p");
7629 if($#psrinfo >= 0) {
7630 $cpu->{'sockets'} ||= $psrinfo[0];
7633 if(-x "/usr/sbin/prtconf") {
7634 my @prtconf = ::qqx("/usr/sbin/prtconf | grep cpu..instance");
7635 if($#prtconf >= 0) {
7636 $cpu->{'cores'} ||= $#prtconf +1;
7640 return $cpu;
7643 sub sct_aix($) {
7644 # Returns:
7645 # { 'sockets' => #sockets
7646 # 'cores' => #cores
7647 # 'threads' => #threads
7648 # 'active' => #taskset_threads }
7649 local $/ = "\n";
7650 my $cpu = shift;
7651 if(not $cpu->{'cores'}) {
7652 if(-x "/usr/sbin/lscfg") {
7653 if(open(my $in_fh, "-|", "/usr/sbin/lscfg -vs |grep proc | wc -l|tr -d ' '")) {
7654 $cpu->{'cores'} = <$in_fh>;
7655 close $in_fh;
7659 if(not $cpu->{'threads'}) {
7660 if(-x "/usr/bin/vmstat") {
7661 if(open(my $in_fh, "-|", "/usr/bin/vmstat 1 1")) {
7662 while(<$in_fh>) {
7663 /lcpu=([0-9]*) / and $cpu->{'threads'} = $1;
7665 close $in_fh;
7669 return $cpu;
7672 sub sct_hpux($) {
7673 # Returns:
7674 # { 'sockets' => #sockets
7675 # 'cores' => #cores
7676 # 'threads' => #threads
7677 # 'active' => #taskset_threads }
7678 local $/ = "\n";
7679 my $cpu = shift;
7680 $cpu->{'cores'} ||=
7681 ::qqx(qq{ /usr/bin/mpsched -s 2>&1 | grep 'Locality Domain Count' | awk '{ print \$4 }'});
7682 $cpu->{'threads'} ||=
7683 ::qqx(qq{ /usr/bin/mpsched -s 2>&1 | perl -ne '/Processor Count\\D+(\\d+)/ and print "\$1"'});
7684 return $cpu;
7687 sub sct_qnx($) {
7688 # Returns:
7689 # { 'sockets' => #sockets
7690 # 'cores' => #cores
7691 # 'threads' => #threads
7692 # 'active' => #taskset_threads }
7693 local $/ = "\n";
7694 my $cpu = shift;
7695 # BUG: It is not known how to calculate this.
7697 return $cpu;
7700 sub sct_openserver($) {
7701 # Returns:
7702 # { 'sockets' => #sockets
7703 # 'cores' => #cores
7704 # 'threads' => #threads
7705 # 'active' => #taskset_threads }
7706 local $/ = "\n";
7707 my $cpu = shift;
7708 if(not $cpu->{'cores'}) {
7709 if(-x "/usr/sbin/psrinfo") {
7710 my @psrinfo = ::qqx("/usr/sbin/psrinfo");
7711 if($#psrinfo >= 0) {
7712 $cpu->{'cores'} = $#psrinfo +1;
7716 $cpu->{'sockets'} ||= $cpu->{'cores'};
7717 return $cpu;
7720 sub sct_irix($) {
7721 # Returns:
7722 # { 'sockets' => #sockets
7723 # 'cores' => #cores
7724 # 'threads' => #threads
7725 # 'active' => #taskset_threads }
7726 local $/ = "\n";
7727 my $cpu = shift;
7728 $cpu->{'cores'} ||=
7729 ::qqx(qq{ hinv | grep HZ | grep Processor | awk '{print \$1}' });
7730 return $cpu;
7733 sub sct_tru64($) {
7734 # Returns:
7735 # { 'sockets' => #sockets
7736 # 'cores' => #cores
7737 # 'threads' => #threads
7738 # 'active' => #taskset_threads }
7739 local $/ = "\n";
7740 my $cpu = shift;
7741 $cpu->{'cores'} ||= ::qqx("sizer -pr");
7742 $cpu->{'sockets'} ||= $cpu->{'cores'};
7743 $cpu->{'threads'} ||= $cpu->{'cores'};
7745 return $cpu;
7748 sub sshcommand($) {
7749 # Returns:
7750 # $sshcommand = the command (incl options) to run when using ssh
7751 my $self = shift;
7752 if (not defined $self->{'sshcommand'}) {
7753 $self->sshcommand_of_sshlogin();
7755 return $self->{'sshcommand'};
7758 sub serverlogin($) {
7759 # Returns:
7760 # $sshcommand = the command (incl options) to run when using ssh
7761 my $self = shift;
7762 if (not defined $self->{'serverlogin'}) {
7763 $self->sshcommand_of_sshlogin();
7765 return $self->{'serverlogin'};
7768 sub sshcommand_of_sshlogin($) {
7769 # Compute ssh command and serverlogin from sshlogin
7770 # 'server' -> ('ssh -S /tmp/parallel-ssh-RANDOM/host-','server')
7771 # 'user@server' -> ('ssh','user@server')
7772 # 'myssh user@server' -> ('myssh','user@server')
7773 # 'myssh -l user server' -> ('myssh -l user','server')
7774 # '/usr/bin/myssh -l user server' -> ('/usr/bin/myssh -l user','server')
7775 # Sets:
7776 # $self->{'sshcommand'}
7777 # $self->{'serverlogin'}
7778 my $self = shift;
7779 my ($sshcmd, $serverlogin);
7780 # If $opt::ssh is unset, use $PARALLEL_SSH or 'ssh'
7781 $opt::ssh ||= $ENV{'PARALLEL_SSH'} || "ssh";
7782 if($self->{'string'} =~ /(.+) (\S+)$/) {
7783 # Own ssh command
7784 $sshcmd = $1; $serverlogin = $2;
7785 } else {
7786 # Normal ssh
7787 if($opt::controlmaster) {
7788 # Use control_path to make ssh faster
7789 my $control_path = $self->control_path_dir()."/ssh-%r@%h:%p";
7790 $sshcmd = $opt::ssh." -S ".$control_path;
7791 $serverlogin = $self->{'string'};
7792 if(not $self->{'control_path'}{$control_path}++) {
7793 # Master is not running for this control_path
7794 # Start it
7795 my $pid = fork();
7796 if($pid) {
7797 $Global::sshmaster{$pid} ||= 1;
7798 } else {
7799 $SIG{'TERM'} = undef;
7800 # Ignore the 'foo' being printed
7801 open(STDOUT,">","/dev/null");
7802 # STDERR >/dev/null to ignore
7803 open(STDERR,">","/dev/null");
7804 open(STDIN,"<","/dev/null");
7805 # Run a sleep that outputs data, so it will discover
7806 # if the ssh connection closes.
7807 my $sleep = ::Q('$|=1;while(1){sleep 1;print "foo\n"}');
7808 my @master = ($opt::ssh, "-MTS",
7809 $control_path, $serverlogin, "--", "perl", "-e",
7810 $sleep);
7811 exec(@master);
7814 } else {
7815 $sshcmd = $opt::ssh; $serverlogin = $self->{'string'};
7819 if($serverlogin =~ s/(\S+)\@(\S+)/$2/) {
7820 # convert user@server to '-l user server'
7821 # because lsh does not support user@server
7822 $sshcmd = $sshcmd." -l ".$1;
7825 $self->{'sshcommand'} = $sshcmd;
7826 $self->{'serverlogin'} = $serverlogin;
7829 sub control_path_dir($) {
7830 # Returns:
7831 # $control_path_dir = dir of control path (for -M)
7832 my $self = shift;
7833 if(not defined $self->{'control_path_dir'}) {
7834 $self->{'control_path_dir'} =
7835 # Use $ENV{'TMPDIR'} as that is typically not
7836 # NFS mounted
7837 File::Temp::tempdir($ENV{'TMPDIR'}
7838 . "/control_path_dir-XXXX",
7839 CLEANUP => 1);
7841 return $self->{'control_path_dir'};
7844 sub rsync_transfer_cmd($) {
7845 # Command to run to transfer a file
7846 # Input:
7847 # $file = filename of file to transfer
7848 # $workdir = destination dir
7849 # Returns:
7850 # $cmd = rsync command to run to transfer $file ("" if unreadable)
7851 my $self = shift;
7852 my $file = shift;
7853 my $workdir = shift;
7854 if(not -r $file) {
7855 ::warning($file. " is not readable and will not be transferred.");
7856 return "true";
7858 my $rsync_destdir;
7859 my $relpath = ($file !~ m:^/:) || ($file =~ m:/\./:); # Is the path relative or /./?
7860 if($relpath) {
7861 $rsync_destdir = ::shell_quote_file($workdir);
7862 } else {
7863 # rsync /foo/bar /
7864 $rsync_destdir = "/";
7866 $file = ::shell_quote_file($file);
7867 my $sshcmd = $self->sshcommand();
7868 my $rsync_opts = $ENV{'PARALLEL_RSYNC_OPTS'}.
7869 " -e".::Q($sshcmd);
7870 my $serverlogin = $self->serverlogin();
7871 # Make dir if it does not exist
7872 return "$sshcmd $serverlogin -- mkdir -p $rsync_destdir && " .
7873 rsync()." $rsync_opts $file $serverlogin:$rsync_destdir";
7876 sub cleanup_cmd($$$) {
7877 # Command to run to remove the remote file
7878 # Input:
7879 # $file = filename to remove
7880 # $workdir = destination dir
7881 # Returns:
7882 # $cmd = ssh command to run to remove $file and empty parent dirs
7883 my $self = shift;
7884 my $file = shift;
7885 my $workdir = shift;
7886 my $f = $file;
7887 if($f =~ m:/\./:) {
7888 # foo/bar/./baz/quux => workdir/baz/quux
7889 # /foo/bar/./baz/quux => workdir/baz/quux
7890 $f =~ s:.*/\./:$workdir/:;
7891 } elsif($f =~ m:^[^/]:) {
7892 # foo/bar => workdir/foo/bar
7893 $f = $workdir."/".$f;
7895 my @subdirs = split m:/:, ::dirname($f);
7896 my @rmdir;
7897 my $dir = "";
7898 for(@subdirs) {
7899 $dir .= $_."/";
7900 unshift @rmdir, ::shell_quote_file($dir);
7902 my $rmdir = @rmdir ? "sh -c ".::Q("rmdir @rmdir 2>/dev/null;") : "";
7903 if(defined $opt::workdir and $opt::workdir eq "...") {
7904 $rmdir .= ::Q("rm -rf " . ::shell_quote_file($workdir).';');
7907 $f = ::shell_quote_file($f);
7908 my $sshcmd = $self->sshcommand();
7909 my $serverlogin = $self->serverlogin();
7910 return "$sshcmd $serverlogin -- ".::Q("rm -f $f; $rmdir");
7914 my $rsync;
7916 sub rsync {
7917 # rsync 3.1.x uses protocol 31 which is unsupported by 2.5.7.
7918 # If the version >= 3.1.0: downgrade to protocol 30
7919 # Returns:
7920 # $rsync = "rsync" or "rsync --protocol 30"
7921 if(not $rsync) {
7922 my @out = `rsync --version`;
7923 for (@out) {
7924 # rsync version 3.1.3 protocol version 31
7925 # rsync version v3.2.3 protocol version 31
7926 if(/version v?(\d+.\d+)(.\d+)?/) {
7927 if($1 >= 3.1) {
7928 # Version 3.1.0 or later: Downgrade to protocol 30
7929 $rsync = "rsync --protocol 30";
7930 } else {
7931 $rsync = "rsync";
7935 $rsync or ::die_bug("Cannot figure out version of rsync: @out");
7937 return $rsync;
7942 package JobQueue;
7944 sub new($) {
7945 my $class = shift;
7946 my $commandref = shift;
7947 my $read_from = shift;
7948 my $context_replace = shift;
7949 my $max_number_of_args = shift;
7950 my $transfer_files = shift;
7951 my $return_files = shift;
7952 my $commandlinequeue = CommandLineQueue->new
7953 ($commandref, $read_from, $context_replace, $max_number_of_args,
7954 $transfer_files, $return_files);
7955 my @unget = ();
7956 return bless {
7957 'unget' => \@unget,
7958 'commandlinequeue' => $commandlinequeue,
7959 'this_job_no' => 0,
7960 'total_jobs' => undef,
7961 }, ref($class) || $class;
7964 sub get($) {
7965 my $self = shift;
7967 $self->{'this_job_no'}++;
7968 if(@{$self->{'unget'}}) {
7969 return shift @{$self->{'unget'}};
7970 } else {
7971 my $commandline = $self->{'commandlinequeue'}->get();
7972 if(defined $commandline) {
7973 return Job->new($commandline);
7974 } else {
7975 $self->{'this_job_no'}--;
7976 return undef;
7981 sub unget($) {
7982 my $self = shift;
7983 unshift @{$self->{'unget'}}, @_;
7984 $self->{'this_job_no'} -= @_;
7987 sub empty($) {
7988 my $self = shift;
7989 my $empty = (not @{$self->{'unget'}}) &&
7990 $self->{'commandlinequeue'}->empty();
7991 ::debug("run", "JobQueue->empty $empty ");
7992 return $empty;
7995 sub total_jobs($) {
7996 my $self = shift;
7997 if(not defined $self->{'total_jobs'}) {
7998 if($opt::pipe and not $opt::tee) {
7999 ::error("--pipe is incompatible with --eta/--bar/--shuf");
8000 ::wait_and_exit(255);
8002 if($opt::sqlworker) {
8003 $self->{'total_jobs'} = $Global::sql->total_jobs();
8004 } else {
8005 my $record;
8006 my @arg_records;
8007 my $record_queue = $self->{'commandlinequeue'}{'arg_queue'};
8008 my $start = time;
8009 while($record = $record_queue->get()) {
8010 push @arg_records, $record;
8011 if(time - $start > 10) {
8012 ::warning("Reading ".scalar(@arg_records).
8013 " arguments took longer than 10 seconds.");
8014 $opt::eta && ::warning("Consider removing --eta.");
8015 $opt::bar && ::warning("Consider removing --bar.");
8016 $opt::shuf && ::warning("Consider removing --shuf.");
8017 last;
8020 while($record = $record_queue->get()) {
8021 push @arg_records, $record;
8023 if($opt::shuf and @arg_records) {
8024 my $i = @arg_records;
8025 while (--$i) {
8026 my $j = int rand($i+1);
8027 @arg_records[$i,$j] = @arg_records[$j,$i];
8030 $record_queue->unget(@arg_records);
8031 # $#arg_records = number of args - 1
8032 # We have read one @arg_record for this job (so add 1 more)
8033 my $num_args = $#arg_records + 2;
8034 # This jobs is not started so -1
8035 my $started_jobs = $self->{'this_job_no'} - 1;
8036 my $max_args = ::max($Global::max_number_of_args,1);
8037 $self->{'total_jobs'} = ::ceil($num_args / $max_args)
8038 + $started_jobs;
8039 ::debug("init","Total jobs: ".$self->{'total_jobs'}.
8040 " ($num_args/$max_args + $started_jobs)\n");
8043 return $self->{'total_jobs'};
8046 sub flush_total_jobs($) {
8047 # Unset total_jobs to force recomputing
8048 my $self = shift;
8049 ::debug("init","flush Total jobs: ");
8050 $self->{'total_jobs'} = undef;
8053 sub next_seq($) {
8054 my $self = shift;
8056 return $self->{'commandlinequeue'}->seq();
8059 sub quote_args($) {
8060 my $self = shift;
8061 return $self->{'commandlinequeue'}->quote_args();
8065 package Job;
8067 sub new($) {
8068 my $class = shift;
8069 my $commandlineref = shift;
8070 return bless {
8071 'commandline' => $commandlineref, # CommandLine object
8072 'workdir' => undef, # --workdir
8073 # filehandle for stdin (used for --pipe)
8074 # filename for writing stdout to (used for --files)
8075 # remaining data not sent to stdin (used for --pipe)
8076 # tmpfiles to cleanup when job is done
8077 'unlink' => [],
8078 # amount of data sent via stdin (used for --pipe)
8079 'transfersize' => 0, # size of files using --transfer
8080 'returnsize' => 0, # size of files using --return
8081 'pid' => undef,
8082 # hash of { SSHLogins => number of times the command failed there }
8083 'failed' => undef,
8084 'sshlogin' => undef,
8085 # The commandline wrapped with rsync and ssh
8086 'sshlogin_wrap' => undef,
8087 'exitstatus' => undef,
8088 'exitsignal' => undef,
8089 # Timestamp for timeout if any
8090 'timeout' => undef,
8091 'virgin' => 1,
8092 # Output used for SQL and CSV-output
8093 'output' => { 1 => [], 2 => [] },
8094 'halfline' => { 1 => [], 2 => [] },
8095 }, ref($class) || $class;
8098 sub replaced($) {
8099 my $self = shift;
8100 $self->{'commandline'} or ::die_bug("commandline empty");
8101 return $self->{'commandline'}->replaced();
8104 sub seq($) {
8105 my $self = shift;
8106 return $self->{'commandline'}->seq();
8109 sub set_seq($$) {
8110 my $self = shift;
8111 return $self->{'commandline'}->set_seq(shift);
8114 sub slot($) {
8115 my $self = shift;
8116 return $self->{'commandline'}->slot();
8119 sub free_slot($) {
8120 my $self = shift;
8121 push @Global::slots, $self->slot();
8125 my($cattail);
8127 sub cattail() {
8128 # Returns:
8129 # $cattail = perl program for:
8130 # cattail "decompress program" writerpid [file_to_decompress or stdin] [file_to_unlink]
8131 if(not $cattail) {
8132 $cattail = q{
8133 # cat followed by tail (possibly with rm as soon at the file is opened)
8134 # If $writerpid dead: finish after this round
8135 use Fcntl;
8136 $|=1;
8138 my ($comfile, $cmd, $writerpid, $read_file, $unlink_file) = @ARGV;
8139 if($read_file) {
8140 open(IN,"<",$read_file) || die("cattail: Cannot open $read_file");
8141 } else {
8142 *IN = *STDIN;
8144 while(! -s $comfile) {
8145 # Writer has not opened the buffer file, so we cannot remove it yet
8146 $sleep = ($sleep < 30) ? ($sleep * 1.001 + 0.01) : ($sleep);
8147 usleep($sleep);
8149 # The writer and we have both opened the file, so it is safe to unlink it
8150 unlink $unlink_file;
8151 unlink $comfile;
8153 my $first_round = 1;
8154 my $flags;
8155 fcntl(IN, F_GETFL, $flags) || die $!; # Get the current flags on the filehandle
8156 $flags |= O_NONBLOCK; # Add non-blocking to the flags
8157 fcntl(IN, F_SETFL, $flags) || die $!; # Set the flags on the filehandle
8159 while(1) {
8160 # clear EOF
8161 seek(IN,0,1);
8162 my $writer_running = kill 0, $writerpid;
8163 $read = sysread(IN,$buf,131072);
8164 if($read) {
8165 if($first_round) {
8166 # Only start the command if there any input to process
8167 $first_round = 0;
8168 open(OUT,"|-",$cmd) || die("cattail: Cannot run $cmd");
8171 # Blocking print
8172 while($buf) {
8173 my $bytes_written = syswrite(OUT,$buf);
8174 # syswrite may be interrupted by SIGHUP
8175 substr($buf,0,$bytes_written) = "";
8177 # Something printed: Wait less next time
8178 $sleep /= 2;
8179 } else {
8180 if(eof(IN) and not $writer_running) {
8181 # Writer dead: There will never be sent more to the decompressor
8182 close OUT;
8183 exit;
8185 # TODO This could probably be done more efficiently using select(2)
8186 # Nothing read: Wait longer before next read
8187 # Up to 100 milliseconds
8188 $sleep = ($sleep < 100) ? ($sleep * 1.001 + 0.01) : ($sleep);
8189 usleep($sleep);
8193 sub usleep {
8194 # Sleep this many milliseconds.
8195 my $secs = shift;
8196 select(undef, undef, undef, $secs/1000);
8199 $cattail =~ s/#.*//mg;
8200 $cattail =~ s/\s+/ /g;
8202 return $cattail;
8206 sub openoutputfiles($) {
8207 # Open files for STDOUT and STDERR
8208 # Set file handles in $self->fh
8209 my $self = shift;
8210 my ($outfhw, $errfhw, $outname, $errname);
8212 if($opt::linebuffer and not
8213 ($opt::keeporder or $opt::files or $opt::results or
8214 $opt::compress or $opt::compress_program or
8215 $opt::decompress_program)) {
8216 # Do not save to files: Use non-blocking pipe
8217 my ($outfhr, $errfhr);
8218 pipe($outfhr, $outfhw) || die;
8219 pipe($errfhr, $errfhw) || die;
8220 $self->set_fh(1,'w',$outfhw);
8221 $self->set_fh(2,'w',$errfhw);
8222 $self->set_fh(1,'r',$outfhr);
8223 $self->set_fh(2,'r',$errfhr);
8224 # Make it possible to read non-blocking from the pipe
8225 for my $fdno (1,2) {
8226 ::set_fh_non_blocking($self->fh($fdno,'r'));
8228 # Return immediately because we do not need setting filenames
8229 return;
8230 } elsif($opt::results and not $Global::csvsep) {
8231 my $out = $self->{'commandline'}->results_out();
8232 my $seqname;
8233 if($out eq $opt::results or $out =~ m:/$:) {
8234 # $opt::results = simple string or ending in /
8235 # => $out is a dir/
8236 # prefix/name1/val1/name2/val2/seq
8237 $seqname = $out."seq";
8238 # prefix/name1/val1/name2/val2/stdout
8239 $outname = $out."stdout";
8240 # prefix/name1/val1/name2/val2/stderr
8241 $errname = $out."stderr";
8242 } else {
8243 # $opt::results = replacement string not ending in /
8244 # => $out is a file
8245 $outname = $out;
8246 $errname = "$out.err";
8247 $seqname = "$out.seq";
8249 my $seqfhw;
8250 if(not open($seqfhw, "+>", $seqname)) {
8251 ::error("Cannot write to `$seqname'.");
8252 ::wait_and_exit(255);
8254 print $seqfhw $self->seq();
8255 close $seqfhw;
8256 if(not open($outfhw, "+>", $outname)) {
8257 ::error("Cannot write to `$outname'.");
8258 ::wait_and_exit(255);
8260 if(not open($errfhw, "+>", $errname)) {
8261 ::error("Cannot write to `$errname'.");
8262 ::wait_and_exit(255);
8264 $self->set_fh(1,"unlink","");
8265 $self->set_fh(2,"unlink","");
8266 if($opt::sqlworker) {
8267 # Save the filenames in SQL table
8268 $Global::sql->update("SET Stdout = ?, Stderr = ? ".
8269 "WHERE Seq = ". $self->seq(),
8270 $outname, $errname);
8272 } elsif(not $opt::ungroup) {
8273 # To group we create temporary files for STDOUT and STDERR
8274 # To avoid the cleanup unlink the files immediately (but keep them open)
8275 if($opt::files) {
8276 ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par");
8277 ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par");
8278 # --files => only remove stderr
8279 $self->set_fh(1,"unlink","");
8280 $self->set_fh(2,"unlink",$errname);
8281 } else {
8282 ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par");
8283 ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par");
8284 $self->set_fh(1,"unlink",$outname);
8285 $self->set_fh(2,"unlink",$errname);
8287 } else {
8288 # --ungroup
8289 open($outfhw,">&",$Global::fd{1}) || die;
8290 open($errfhw,">&",$Global::fd{2}) || die;
8291 # File name must be empty as it will otherwise be printed
8292 $outname = "";
8293 $errname = "";
8294 $self->set_fh(1,"unlink",$outname);
8295 $self->set_fh(2,"unlink",$errname);
8297 # Set writing FD
8298 $self->set_fh(1,'w',$outfhw);
8299 $self->set_fh(2,'w',$errfhw);
8300 $self->set_fh(1,'name',$outname);
8301 $self->set_fh(2,'name',$errname);
8302 if($opt::compress) {
8303 $self->filter_through_compress();
8304 } elsif(not $opt::ungroup) {
8305 $self->grouped();
8307 if($opt::linebuffer) {
8308 # Make it possible to read non-blocking from
8309 # the buffer files
8310 # Used for --linebuffer with -k, --files, --res, --compress*
8311 for my $fdno (1,2) {
8312 ::set_fh_non_blocking($self->fh($fdno,'r'));
8317 sub print_verbose_dryrun($) {
8318 # If -v set: print command to stdout (possibly buffered)
8319 # This must be done before starting the command
8320 my $self = shift;
8321 if($Global::verbose or $opt::dryrun) {
8322 my $fh = $self->fh(1,"w");
8323 if($Global::verbose <= 1) {
8324 print $fh $self->replaced(),"\n";
8325 } else {
8326 # Verbose level > 1: Print the rsync and stuff
8327 print $fh $self->wrapped(),"\n";
8330 if($opt::sqlworker) {
8331 $Global::sql->update("SET Command = ? WHERE Seq = ".$self->seq(),
8332 $self->replaced());
8336 sub add_rm($) {
8337 # Files to remove when job is done
8338 my $self = shift;
8339 push @{$self->{'unlink'}}, @_;
8342 sub get_rm($) {
8343 # Files to remove when job is done
8344 my $self = shift;
8345 return @{$self->{'unlink'}};
8348 sub cleanup($) {
8349 # Remove files when job is done
8350 my $self = shift;
8351 unlink $self->get_rm();
8352 delete @Global::unlink{$self->get_rm()};
8355 sub grouped($) {
8356 my $self = shift;
8357 # Set reading FD if using --group (--ungroup does not need)
8358 for my $fdno (1,2) {
8359 # Re-open the file for reading
8360 # so fdw can be closed seperately
8361 # and fdr can be seeked seperately (for --line-buffer)
8362 open(my $fdr,"<", $self->fh($fdno,'name')) ||
8363 ::die_bug("fdr: Cannot open ".$self->fh($fdno,'name'));
8364 $self->set_fh($fdno,'r',$fdr);
8365 # Unlink if not debugging
8366 $Global::debug or ::rm($self->fh($fdno,"unlink"));
8370 sub empty_input_wrapper($) {
8371 # If no input: exit(0)
8372 # If some input: Pass input as input to command on STDIN
8373 # This avoids starting the command if there is no input.
8374 # Input:
8375 # $command = command to pipe data to
8376 # Returns:
8377 # $wrapped_command = the wrapped command
8378 my $command = shift;
8379 my $script =
8380 ::spacefree(0,q{
8381 if(sysread(STDIN, $buf, 1)) {
8382 open($fh, "|-", @ARGV) || die;
8383 syswrite($fh, $buf);
8384 # Align up to 128k block
8385 if($read = sysread(STDIN, $buf, 131071)) {
8386 syswrite($fh, $buf);
8388 while($read = sysread(STDIN, $buf, 131072)) {
8389 syswrite($fh, $buf);
8391 close $fh;
8392 exit ($?&127 ? 128+($?&127) : 1+$?>>8)
8395 ::debug("run",'Empty wrap: perl -e '.::Q($script)."\n");
8396 if($Global::cshell
8398 length $command > 499) {
8399 # csh does not like words longer than 1000 (499 quoted)
8400 # $command = "perl -e '".base64_zip_eval()."' ".
8401 # join" ",string_zip_base64(
8402 # 'exec "'.::perl_quote_scalar($command).'"');
8403 return 'perl -e '.::Q($script)." ".
8404 base64_wrap("exec \"$Global::shell\",'-c',\"".
8405 ::perl_quote_scalar($command).'"');
8406 } else {
8407 return 'perl -e '.::Q($script)." ".
8408 $Global::shell." -c ".::Q($command);
8412 sub filter_through_compress($) {
8413 my $self = shift;
8414 # Send stdout to stdin for $opt::compress_program(1)
8415 # Send stderr to stdin for $opt::compress_program(2)
8416 # cattail get pid: $pid = $self->fh($fdno,'rpid');
8417 my $cattail = cattail();
8419 for my $fdno (1,2) {
8420 # Make a communication file.
8421 my ($fh, $comfile) = ::tmpfile(SUFFIX => ".pac");
8422 close $fh;
8423 # Compressor: (echo > $comfile; compress pipe) > output
8424 # When the echo is written to $comfile,
8425 # it is known that output file is opened,
8426 # thus output file can then be removed by the decompressor.
8427 my $wpid = open(my $fdw,"|-", "(echo > $comfile; ".
8428 empty_input_wrapper($opt::compress_program).") >".
8429 $self->fh($fdno,'name')) || die $?;
8430 $self->set_fh($fdno,'w',$fdw);
8431 $self->set_fh($fdno,'wpid',$wpid);
8432 # Decompressor: open output; -s $comfile > 0: rm $comfile output;
8433 # decompress output > stdout
8434 my $rpid = open(my $fdr, "-|", "perl", "-e", $cattail, $comfile,
8435 $opt::decompress_program, $wpid,
8436 $self->fh($fdno,'name'),$self->fh($fdno,'unlink'))
8437 || die $?;
8438 $self->set_fh($fdno,'r',$fdr);
8439 $self->set_fh($fdno,'rpid',$rpid);
8445 sub set_fh($$$$) {
8446 # Set file handle
8447 my ($self, $fd_no, $key, $fh) = @_;
8448 $self->{'fd'}{$fd_no,$key} = $fh;
8451 sub fh($) {
8452 # Get file handle
8453 my ($self, $fd_no, $key) = @_;
8454 return $self->{'fd'}{$fd_no,$key};
8457 sub write($) {
8458 my $self = shift;
8459 my $remaining_ref = shift;
8460 my $stdin_fh = $self->fh(0,"w");
8462 my $len = length $$remaining_ref;
8463 # syswrite may not write all in one go,
8464 # so make sure everything is written.
8465 my $written;
8467 # If writing is to a closed pipe:
8468 # Do not call signal handler, but let nothing be written
8469 local $SIG{PIPE} = undef;
8470 while($written = syswrite($stdin_fh,$$remaining_ref)){
8471 substr($$remaining_ref,0,$written) = "";
8475 sub set_block($$$$$$) {
8476 # Copy stdin buffer from $block_ref up to $endpos
8477 # Prepend with $header_ref if virgin (i.e. not --roundrobin)
8478 # Remove $recstart and $recend if needed
8479 # Input:
8480 # $header_ref = ref to $header to prepend
8481 # $buffer_ref = ref to $buffer containing the block
8482 # $endpos = length of $block to pass on
8483 # $recstart = --recstart regexp
8484 # $recend = --recend regexp
8485 # Returns:
8486 # N/A
8487 my $self = shift;
8488 my ($header_ref,$buffer_ref,$endpos,$recstart,$recend) = @_;
8489 $self->{'block'} = ($self->virgin() ? $$header_ref : "").
8490 substr($$buffer_ref,0,$endpos);
8491 if($opt::remove_rec_sep) {
8492 remove_rec_sep(\$self->{'block'},$recstart,$recend);
8494 $self->{'block_length'} = length $self->{'block'};
8495 $self->{'block_pos'} = 0;
8496 $self->add_transfersize($self->{'block_length'});
8499 sub block_ref($) {
8500 my $self = shift;
8501 return \$self->{'block'};
8505 sub block_length($) {
8506 my $self = shift;
8507 return $self->{'block_length'};
8510 sub remove_rec_sep($) {
8511 # Remove --recstart and --recend from $block
8512 # Input:
8513 # $block_ref = reference to $block to be modified
8514 # $recstart = --recstart
8515 # $recend = --recend
8516 # Uses:
8517 # $opt::regexp = Are --recstart/--recend regexp?
8518 # Returns:
8519 # N/A
8520 my ($block_ref,$recstart,$recend) = @_;
8521 # Remove record separator
8522 if($opt::regexp) {
8523 $$block_ref =~ s/$recend$recstart//gos;
8524 $$block_ref =~ s/^$recstart//os;
8525 $$block_ref =~ s/$recend$//os;
8526 } else {
8527 $$block_ref =~ s/\Q$recend$recstart\E//gos;
8528 $$block_ref =~ s/^\Q$recstart\E//os;
8529 $$block_ref =~ s/\Q$recend\E$//os;
8533 sub non_blocking_write($) {
8534 my $self = shift;
8535 my $something_written = 0;
8537 my $in = $self->fh(0,"w");
8538 my $rv = syswrite($in,
8539 substr($self->{'block'},$self->{'block_pos'}));
8540 if (!defined($rv) && $! == ::EAGAIN()) {
8541 # would block - but would have written
8542 $something_written = 0;
8543 # avoid triggering auto expanding block size
8544 $Global::no_autoexpand_block ||= 1;
8545 } elsif ($self->{'block_pos'}+$rv != $self->{'block_length'}) {
8546 # incomplete write
8547 # Remove the written part
8548 $self->{'block_pos'} += $rv;
8549 $something_written = $rv;
8550 } else {
8551 # successfully wrote everything
8552 # Empty block to free memory
8553 my $a = "";
8554 $self->set_block(\$a,\$a,0,"","");
8555 $something_written = $rv;
8557 ::debug("pipe", "Non-block: ", $something_written);
8558 return $something_written;
8562 sub virgin($) {
8563 my $self = shift;
8564 return $self->{'virgin'};
8567 sub set_virgin($$) {
8568 my $self = shift;
8569 $self->{'virgin'} = shift;
8572 sub pid($) {
8573 my $self = shift;
8574 return $self->{'pid'};
8577 sub set_pid($$) {
8578 my $self = shift;
8579 $self->{'pid'} = shift;
8582 sub starttime($) {
8583 # Returns:
8584 # UNIX-timestamp this job started
8585 my $self = shift;
8586 return sprintf("%.3f",$self->{'starttime'});
8589 sub set_starttime($@) {
8590 my $self = shift;
8591 my $starttime = shift || ::now();
8592 $self->{'starttime'} = $starttime;
8593 $opt::sqlworker and
8594 $Global::sql->update("SET Starttime = ? WHERE Seq = ".$self->seq(),
8595 $starttime);
8598 sub runtime($) {
8599 # Returns:
8600 # Run time in seconds with 3 decimals
8601 my $self = shift;
8602 return sprintf("%.3f",
8603 int(($self->endtime() - $self->starttime())*1000)/1000);
8606 sub endtime($) {
8607 # Returns:
8608 # UNIX-timestamp this job ended
8609 # 0 if not ended yet
8610 my $self = shift;
8611 return ($self->{'endtime'} || 0);
8614 sub set_endtime($$) {
8615 my $self = shift;
8616 my $endtime = shift;
8617 $self->{'endtime'} = $endtime;
8618 $opt::sqlworker and
8619 $Global::sql->update("SET JobRuntime = ? WHERE Seq = ".$self->seq(),
8620 $self->runtime());
8623 sub is_timedout($) {
8624 # Is the job timedout?
8625 # Input:
8626 # $delta_time = time that the job may run
8627 # Returns:
8628 # True or false
8629 my $self = shift;
8630 my $delta_time = shift;
8631 return time > $self->{'starttime'} + $delta_time;
8634 sub kill($) {
8635 my $self = shift;
8636 $self->set_exitstatus(-1);
8637 ::kill_sleep_seq($self->pid());
8640 sub suspend($) {
8641 my $self = shift;
8642 my @pgrps = map { -$_ } $self->pid();
8643 kill "STOP", @pgrps;
8644 $self->set_suspended(1);
8645 # push job onto start stack
8646 $Global::JobQueue->unget($self);
8649 sub set_suspended($$) {
8650 my $self = shift;
8651 $self->{'suspended'} = shift;
8654 sub suspended($) {
8655 my $self = shift;
8656 return $self->{'suspended'};
8659 sub resume($) {
8660 my $self = shift;
8661 my @pgrps = map { -$_ } $self->pid();
8662 kill "CONT", @pgrps;
8663 $self->set_suspended(0);
8666 sub failed($) {
8667 # return number of times failed for this $sshlogin
8668 # Input:
8669 # $sshlogin
8670 # Returns:
8671 # Number of times failed for $sshlogin
8672 my $self = shift;
8673 my $sshlogin = shift;
8674 return $self->{'failed'}{$sshlogin};
8677 sub failed_here($) {
8678 # return number of times failed for the current $sshlogin
8679 # Returns:
8680 # Number of times failed for this sshlogin
8681 my $self = shift;
8682 return $self->{'failed'}{$self->sshlogin()};
8685 sub add_failed($) {
8686 # increase the number of times failed for this $sshlogin
8687 my $self = shift;
8688 my $sshlogin = shift;
8689 $self->{'failed'}{$sshlogin}++;
8692 sub add_failed_here($) {
8693 # increase the number of times failed for the current $sshlogin
8694 my $self = shift;
8695 $self->{'failed'}{$self->sshlogin()}++;
8698 sub reset_failed($) {
8699 # increase the number of times failed for this $sshlogin
8700 my $self = shift;
8701 my $sshlogin = shift;
8702 delete $self->{'failed'}{$sshlogin};
8705 sub reset_failed_here($) {
8706 # increase the number of times failed for this $sshlogin
8707 my $self = shift;
8708 delete $self->{'failed'}{$self->sshlogin()};
8711 sub min_failed($) {
8712 # Returns:
8713 # the number of sshlogins this command has failed on
8714 # the minimal number of times this command has failed
8715 my $self = shift;
8716 my $min_failures =
8717 ::min(map { $self->{'failed'}{$_} } keys %{$self->{'failed'}});
8718 my $number_of_sshlogins_failed_on = scalar keys %{$self->{'failed'}};
8719 return ($number_of_sshlogins_failed_on,$min_failures);
8722 sub total_failed($) {
8723 # Returns:
8724 # $total_failures = the number of times this command has failed
8725 my $self = shift;
8726 my $total_failures = 0;
8727 for (values %{$self->{'failed'}}) {
8728 $total_failures += $_;
8730 return $total_failures;
8734 my $script;
8736 sub postpone_exit_and_cleanup {
8737 # Command to remove files and dirs (given as args) without
8738 # affecting the exit value in $?/$status.
8739 if(not $script) {
8740 $script = "perl -e '".
8741 ::spacefree(0,q{
8742 $bash=shift;
8743 $csh=shift;
8744 for(@ARGV){
8745 unlink;
8746 rmdir;
8748 if($bash=~s/(\d+)h/$1/) {
8749 exit $bash;
8751 exit $csh;
8753 # `echo \$?h` is needed to make fish not complain
8754 "' ".'"`echo \\\\\\\\\$?h`" "$status" ';
8756 return $script
8761 my $script;
8763 sub fifo_wrap() {
8764 # Script to create a fifo, run a command on the fifo
8765 # while copying STDIN to the fifo, and finally
8766 # remove the fifo and return the exit code of the command.
8767 if(not $script) {
8768 # {} == $PARALLEL_TMP for --fifo
8769 # To make it csh compatible a wrapper needs to:
8770 # * mkfifo
8771 # * spawn $command &
8772 # * cat > fifo
8773 # * waitpid to get the exit code from $command
8774 # * be less than 1000 chars long
8775 $script = "perl -e '".
8776 (::spacefree
8777 (0, q{
8778 ($s,$c,$f) = @ARGV;
8779 # mkfifo $PARALLEL_TMP
8780 system "mkfifo", $f;
8781 # spawn $shell -c $command &
8782 $pid = fork || exec $s, "-c", $c;
8783 open($o,">",$f) || die $!;
8784 # cat > $PARALLEL_TMP
8785 while(sysread(STDIN,$buf,131072)){
8786 syswrite $o, $buf;
8788 close $o;
8789 # waitpid to get the exit code from $command
8790 waitpid $pid,0;
8791 # Cleanup
8792 unlink $f;
8793 exit $?/256;
8794 }))."'";
8796 return $script;
8800 sub wrapped($) {
8801 # Wrap command with:
8802 # * --shellquote
8803 # * --nice
8804 # * --cat
8805 # * --fifo
8806 # * --sshlogin
8807 # * --pipepart (@Global::cat_prepends)
8808 # * --tee (@Global::cat_prepends)
8809 # * --pipe
8810 # * --tmux
8811 # The ordering of the wrapping is important:
8812 # * --nice/--cat/--fifo should be done on the remote machine
8813 # * --pipepart/--pipe should be done on the local machine inside --tmux
8814 # Uses:
8815 # @opt::shellquote
8816 # $opt::nice
8817 # $Global::shell
8818 # $opt::cat
8819 # $opt::fifo
8820 # @Global::cat_prepends
8821 # $opt::pipe
8822 # $opt::tmux
8823 # Returns:
8824 # $self->{'wrapped'} = the command wrapped with the above
8825 my $self = shift;
8826 if(not defined $self->{'wrapped'}) {
8827 my $command = $self->replaced();
8828 # Bug in Bash and Ksh when running multiline aliases
8829 # This will force them to run correctly, but will fail in
8830 # tcsh so we do not do it.
8831 # $command .= "\n\n";
8832 if(@opt::shellquote) {
8833 # Quote one time for each --shellquote
8834 my $c = $command;
8835 for(@opt::shellquote) {
8836 $c = ::Q($c);
8838 # Prepend "echo" (it is written in perl because
8839 # quoting '-e' causes problem in some versions and
8840 # csh's version does something wrong)
8841 $command = q(perl -e '$,=" "; print "@ARGV\n";' -- ) . ::Q($c);
8843 if($Global::parallel_env) {
8844 # If $PARALLEL_ENV set, put that in front of the command
8845 # Used for env_parallel.*
8846 if($Global::shell =~ /zsh/) {
8847 # The extra 'eval' will make aliases work, too
8848 $command = $Global::parallel_env."\n".
8849 "eval ".::Q($command);
8850 } else {
8851 $command = $Global::parallel_env."\n".$command;
8854 if($opt::cat) {
8855 # In '--cat' and '--fifo' {} == $PARALLEL_TMP.
8856 # This is to make it possible to compute $PARALLEL_TMP on
8857 # the fly when running remotely.
8858 # $ENV{PARALLEL_TMP} is set in the remote wrapper before
8859 # the command is run.
8861 # Prepend 'cat > $PARALLEL_TMP;'
8862 # Append 'unlink $PARALLEL_TMP without affecting $?'
8863 $command =
8864 'cat > $PARALLEL_TMP;'.
8865 $command.";". postpone_exit_and_cleanup().
8866 '$PARALLEL_TMP';
8867 } elsif($opt::fifo) {
8868 # Prepend fifo-wrapper. In essence:
8869 # mkfifo {}
8870 # ( $command ) &
8871 # # $command must read {}, otherwise this 'cat' will block
8872 # cat > {};
8873 # wait; rm {}
8874 # without affecting $?
8875 $command = fifo_wrap(). " ".
8876 $Global::shell. " ". ::Q($command). ' $PARALLEL_TMP'. ';';
8878 # Wrap with ssh + tranferring of files
8879 $command = $self->sshlogin_wrap($command);
8880 if(@Global::cat_prepends) {
8881 # --pipepart: prepend:
8882 # < /tmp/foo perl -e 'while(@ARGV) {
8883 # sysseek(STDIN,shift,0) || die; $left = shift;
8884 # while($read = sysread(STDIN,$buf, ($left > 131072 ? 131072 : $left))){
8885 # $left -= $read; syswrite(STDOUT,$buf);
8887 # }' 0 0 0 11 |
8889 # --pipepart --tee: prepend:
8890 # < dash-a-file
8892 # --pipe --tee: wrap:
8893 # (rm fifo; ... ) < fifo
8895 # --pipe --shard X:
8896 # (rm fifo; ... ) < fifo
8897 $command = (shift @Global::cat_prepends). "($command)".
8898 (shift @Global::cat_appends);
8899 } elsif($opt::pipe and not $opt::roundrobin) {
8900 # Wrap with EOF-detector to avoid starting $command if EOF.
8901 $command = empty_input_wrapper($command);
8903 if($opt::tmux) {
8904 # Wrap command with 'tmux'
8905 $command = $self->tmux_wrap($command);
8907 if($Global::cshell
8909 length $command > 499) {
8910 # csh does not like words longer than 1000 (499 quoted)
8911 # $command = "perl -e '".base64_zip_eval()."' ".
8912 # join" ",string_zip_base64(
8913 # 'exec "'.::perl_quote_scalar($command).'"');
8914 $command = base64_wrap("exec \"$Global::shell\",'-c',\"".
8915 ::perl_quote_scalar($command).'"');
8917 $self->{'wrapped'} = $command;
8919 return $self->{'wrapped'};
8922 sub set_sshlogin($$) {
8923 my $self = shift;
8924 my $sshlogin = shift;
8925 $self->{'sshlogin'} = $sshlogin;
8926 delete $self->{'sshlogin_wrap'}; # If sshlogin is changed the wrap is wrong
8927 delete $self->{'wrapped'};
8929 if($opt::sqlworker) {
8930 # Identify worker as --sqlworker often runs on different machines
8931 my $host = $sshlogin->string();
8932 if($host eq ":") {
8933 $host = ::hostname();
8935 $Global::sql->update("SET Host = ? WHERE Seq = ".$self->seq(), $host);
8939 sub sshlogin($) {
8940 my $self = shift;
8941 return $self->{'sshlogin'};
8944 sub string_base64($) {
8945 # Base64 encode strings into 1000 byte blocks.
8946 # 1000 bytes is the largest word size csh supports
8947 # Input:
8948 # @strings = to be encoded
8949 # Returns:
8950 # @base64 = 1000 byte block
8951 $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;";
8952 my @base64 = unpack("(A1000)*",encode_base64((join"",@_),""));
8953 return @base64;
8956 sub string_zip_base64($) {
8957 # Pipe string through 'bzip2 -9' and base64 encode it into 1000
8958 # byte blocks.
8959 # 1000 bytes is the largest word size csh supports
8960 # Zipping will make exporting big environments work, too
8961 # Input:
8962 # @strings = to be encoded
8963 # Returns:
8964 # @base64 = 1000 byte block
8965 my($zipin_fh, $zipout_fh,@base64);
8966 ::open3($zipin_fh,$zipout_fh,">&STDERR","bzip2 -9");
8967 if(fork) {
8968 close $zipin_fh;
8969 $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;";
8970 # Split base64 encoded into 1000 byte blocks
8971 @base64 = unpack("(A1000)*",encode_base64((join"",<$zipout_fh>),""));
8972 close $zipout_fh;
8973 } else {
8974 close $zipout_fh;
8975 print $zipin_fh @_;
8976 close $zipin_fh;
8977 exit;
8979 ::debug("base64","Orig:@_\nAs bzip2 base64:@base64\n");
8980 return @base64;
8983 sub base64_zip_eval() {
8984 # Script that:
8985 # * reads base64 strings from @ARGV
8986 # * decodes them
8987 # * pipes through 'bzip2 -dc'
8988 # * evals the result
8989 # Reverse of string_zip_base64 + eval
8990 # Will be wrapped in ' so single quote is forbidden
8991 # Returns:
8992 # $script = 1-liner for perl -e
8993 my $script = ::spacefree(0,q{
8994 @GNU_Parallel = split /_/, "use_IPC::Open3;_use_MIME::Base64";
8995 eval"@GNU_Parallel";
8996 $chld = $SIG{CHLD};
8997 $SIG{CHLD} = "IGNORE";
8998 # Search for bzip2. Not found => use default path
8999 my $zip = (grep { -x $_ } "/usr/local/bin/bzip2")[0] || "bzip2";
9000 # $in = stdin on $zip, $out = stdout from $zip
9001 # Forget my() to save chars for csh
9002 # my($in, $out,$eval);
9003 open3($in,$out,">&STDERR",$zip,"-dc");
9004 if(my $perlpid = fork) {
9005 close $in;
9006 $eval = join "", <$out>;
9007 close $out;
9008 } else {
9009 close $out;
9010 # Pipe decoded base64 into 'bzip2 -dc'
9011 print $in (decode_base64(join"",@ARGV));
9012 close $in;
9013 exit;
9015 wait;
9016 $SIG{CHLD} = $chld;
9017 eval $eval;
9019 ::debug("base64",$script,"\n");
9020 return $script;
9023 sub base64_wrap($) {
9024 # base64 encode Perl code
9025 # Split it into chunks of < 1000 bytes
9026 # Prepend it with a decoder that eval's it
9027 # Input:
9028 # $eval_string = Perl code to run
9029 # Returns:
9030 # $shell_command = shell command that runs $eval_string
9031 my $eval_string = shift;
9032 return
9033 "perl -e ".
9034 ::Q(base64_zip_eval())." ".
9035 join" ",::shell_quote(string_zip_base64($eval_string));
9038 sub base64_eval($) {
9039 # Script that:
9040 # * reads base64 strings from @ARGV
9041 # * decodes them
9042 # * evals the result
9043 # Reverse of string_base64 + eval
9044 # Will be wrapped in ' so single quote is forbidden.
9045 # Spaces are stripped so spaces cannot be significant.
9046 # The funny 'use IPC::Open3'-syntax is to avoid spaces and
9047 # to make it clear that this is a GNU Parallel command
9048 # when looking at the process table.
9049 # Returns:
9050 # $script = 1-liner for perl -e
9051 my $script = ::spacefree(0,q{
9052 @GNU_Parallel=("use","IPC::Open3;","use","MIME::Base64");
9053 eval "@GNU_Parallel";
9054 my $eval = decode_base64(join"",@ARGV);
9055 eval $eval;
9057 ::debug("base64",$script,"\n");
9058 return $script;
9061 sub sshlogin_wrap($) {
9062 # Wrap the command with the commands needed to run remotely
9063 # Input:
9064 # $command = command to run
9065 # Returns:
9066 # $self->{'sshlogin_wrap'} = command wrapped with ssh+transfer commands
9067 sub monitor_parent_sshd_script {
9068 # This script is to solve the problem of
9069 # * not mixing STDERR and STDOUT
9070 # * terminating with ctrl-c
9071 # If its parent is ssh: all good
9072 # If its parent is init(1): ssh died, so kill children
9073 my $monitor_parent_sshd_script;
9075 if(not $monitor_parent_sshd_script) {
9076 $monitor_parent_sshd_script =
9077 # This will be packed in ', so only use "
9078 ::spacefree(0,'$shell = "'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'.
9079 '$tmpdir = "'.::perl_quote_scalar($ENV{'TMPDIR'}).'";'.
9080 '$nice = '.$opt::nice.';'.
9081 '$termseq = "'.$opt::termseq.'";'.
9083 # Set $PARALLEL_TMP to a non-existent file name in $TMPDIR
9084 do {
9085 $ENV{PARALLEL_TMP} = $tmpdir."/par".
9086 join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
9087 } while(-e $ENV{PARALLEL_TMP});
9088 $SIG{CHLD} = sub { $done = 1; };
9089 $pid = fork;
9090 unless($pid) {
9091 # Make own process group to be able to kill HUP it later
9092 eval { setpgrp };
9093 eval { setpriority(0,0,$nice) };
9094 exec $shell, "-c", ($bashfunc."@ARGV");
9095 die "exec: $!\n";
9097 do {
9098 # Parent is not init (ppid=1), so sshd is alive
9099 # Exponential sleep up to 1 sec
9100 $s = $s < 1 ? 0.001 + $s * 1.03 : $s;
9101 select(undef, undef, undef, $s);
9102 } until ($done || getppid == 1);
9103 if(not $done) {
9104 # Kill as per --termseq
9105 my @term_seq = split/,/,$termseq;
9106 if(not @term_seq) {
9107 @term_seq = ("TERM",200,"TERM",100,"TERM",50,"KILL",25);
9109 while(@term_seq && kill(0,-$pid)) {
9110 kill(shift @term_seq, -$pid);
9111 select(undef, undef, undef, (shift @term_seq)/1000);
9114 wait;
9115 exit ($?&127 ? 128+($?&127) : 1+$?>>8)
9118 return $monitor_parent_sshd_script;
9121 sub vars_to_export {
9122 # Uses:
9123 # @opt::env
9124 my @vars = ("parallel_bash_environment");
9125 for my $varstring (@opt::env) {
9126 # Split up --env VAR1,VAR2
9127 push @vars, split /,/, $varstring;
9129 for (@vars) {
9130 if(-r $_ and not -d) {
9131 # Read as environment definition bug #44041
9132 # TODO parse this
9133 my $fh = ::open_or_exit($_);
9134 $Global::envdef = join("",<$fh>);
9135 close $fh;
9138 if(grep { /^_$/ } @vars) {
9139 local $/ = "\n";
9140 # --env _
9141 # Include all vars that are not in a clean environment
9142 if(open(my $vars_fh, "<", $Global::config_dir . "/ignored_vars")) {
9143 my @ignore = <$vars_fh>;
9144 chomp @ignore;
9145 my %ignore;
9146 @ignore{@ignore} = @ignore;
9147 close $vars_fh;
9148 push @vars, grep { not defined $ignore{$_} } keys %ENV;
9149 @vars = grep { not /^_$/ } @vars;
9150 } else {
9151 ::error("Run '$Global::progname --record-env' ".
9152 "in a clean environment first.");
9153 ::wait_and_exit(255);
9156 # Duplicate vars as BASH functions to include post-shellshock functions (v1+v2)
9157 # So --env myfunc should look for BASH_FUNC_myfunc() and BASH_FUNC_myfunc%%
9159 push(@vars, "PARALLEL_PID", "PARALLEL_SEQ",
9160 "PARALLEL_SSHLOGIN", "PARALLEL_SSHHOST", "PARALLEL_JOBSLOT",
9161 map { ("BASH_FUNC_$_()", "BASH_FUNC_$_%%") } @vars);
9162 # Keep only defined variables
9163 return grep { defined($ENV{$_}) } @vars;
9166 sub env_as_eval {
9167 # Returns:
9168 # $eval = '$ENV{"..."}=...; ...'
9169 my @vars = vars_to_export();
9170 my $csh_friendly = not grep { /\n/ } @ENV{@vars};
9171 my @bash_functions = grep { substr($ENV{$_},0,4) eq "() {" } @vars;
9172 my @non_functions = (grep { !/PARALLEL_ENV/ }
9173 grep { substr($ENV{$_},0,4) ne "() {" } @vars);
9175 # eval of @envset will set %ENV
9176 my $envset = join"", map {
9177 '$ENV{"'.::perl_quote_scalar($_).'"}="'.
9178 ::perl_quote_scalar($ENV{$_}).'";'; } @non_functions;
9180 # running @bashfunc on the command line, will set the functions
9181 my @bashfunc = map {
9182 my $v=$_;
9183 s/BASH_FUNC_(.*)(\(\)|%%)/$1/;
9184 "$_$ENV{$v};export -f $_ >/dev/null;" } @bash_functions;
9185 # eval $bashfuncset will set $bashfunc
9186 my $bashfuncset;
9187 if(@bashfunc) {
9188 # Functions are not supported for all shells
9189 if($Global::shell !~ m:(bash|rbash|zsh|rzsh|dash|ksh):) {
9190 ::warning("Shell functions may not be supported in $Global::shell.");
9192 $bashfuncset =
9193 '@bash_functions=qw('."@bash_functions".");".
9194 ::spacefree(1,'$shell="'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'.q{
9195 if($shell=~/csh/) {
9196 print STDERR "CSH/TCSH DO NOT SUPPORT newlines IN VARIABLES/FUNCTIONS. Unset @bash_functions\n";
9197 exec "false";
9200 "\n".'$bashfunc = "'.::perl_quote_scalar("@bashfunc").'";';
9201 } else {
9202 $bashfuncset = '$bashfunc = "";'
9204 if($ENV{'parallel_bash_environment'}) {
9205 $bashfuncset .= '$bashfunc .= "eval\ \"\$parallel_bash_environment\"\;";';
9207 ::debug("base64",$envset,$bashfuncset,"\n");
9208 return $csh_friendly,$envset,$bashfuncset;
9211 my $self = shift;
9212 my $command = shift;
9213 # TODO test that *sh -c 'parallel --env' use *sh
9214 if(not defined $self->{'sshlogin_wrap'}{$command}) {
9215 my $sshlogin = $self->sshlogin();
9216 my $serverlogin = $sshlogin->serverlogin();
9217 my $quoted_remote_command;
9218 $ENV{'PARALLEL_SEQ'} = $self->seq();
9219 $ENV{'PARALLEL_JOBSLOT'} = $self->slot();
9220 $ENV{'PARALLEL_SSHLOGIN'} = $sshlogin->string();
9221 $ENV{'PARALLEL_SSHHOST'} = $sshlogin->serverlogin();
9222 $ENV{'PARALLEL_PID'} = $$;
9223 if($serverlogin eq ":") {
9224 if($opt::workdir) {
9225 # Create workdir if needed. Then cd to it.
9226 my $wd = $self->workdir();
9227 if($opt::workdir eq "." or $opt::workdir eq "...") {
9228 # If $wd does not start with '/': Prepend $HOME
9229 $wd =~ s:^([^/]):$ENV{'HOME'}/$1:;
9231 ::mkdir_or_die($wd);
9232 my $post = "";
9233 if($opt::workdir eq "...") {
9234 $post = ";".exitstatuswrapper("rm -rf ".::Q($wd).";");
9237 $command = "cd ".::Q($wd)." || exit 255; " .
9238 $command . $post;;
9240 if(@opt::env) {
9241 # Prepend with environment setter, which sets functions in zsh
9242 my ($csh_friendly,$envset,$bashfuncset) = env_as_eval();
9243 my $perl_code = $envset.$bashfuncset.
9244 '@ARGV="'.::perl_quote_scalar($command).'";'.
9245 "exec\"$Global::shell\",\"-c\",\(\$bashfunc.\"\@ARGV\"\)\;die\"exec:\$\!\\n\"\;";
9246 if(length $perl_code > 999
9248 not $csh_friendly
9250 $command =~ /\n/) {
9251 # csh does not deal well with > 1000 chars in one word
9252 # csh does not deal well with $ENV with \n
9253 $self->{'sshlogin_wrap'}{$command} = base64_wrap($perl_code);
9254 } else {
9255 $self->{'sshlogin_wrap'}{$command} = "perl -e ".::Q($perl_code);
9257 } else {
9258 $self->{'sshlogin_wrap'}{$command} = $command;
9260 } else {
9261 my $pwd = "";
9262 if($opt::workdir) {
9263 # Create remote workdir if needed. Then cd to it.
9264 my $wd = ::pQ($self->workdir());
9265 $pwd = qq{system("mkdir","-p","--","$wd"); chdir "$wd" ||}.
9266 qq{print(STDERR "parallel: Cannot chdir to $wd\\n") && exit 255;};
9268 my ($csh_friendly,$envset,$bashfuncset) = env_as_eval();
9269 my $remote_command = $pwd.$envset.$bashfuncset.
9270 '@ARGV="'.::perl_quote_scalar($command).'";'.
9271 monitor_parent_sshd_script();
9272 $quoted_remote_command = "perl -e ". ::Q($remote_command);
9273 my $dq_remote_command = ::Q($quoted_remote_command);
9274 if(length $dq_remote_command > 999
9276 not $csh_friendly
9278 $command =~ /\n/) {
9279 # csh does not deal well with > 1000 chars in one word
9280 # csh does not deal well with $ENV with \n
9281 $quoted_remote_command =
9282 "perl -e ". ::Q(::Q(base64_zip_eval()))." ".
9283 join" ",::shell_quote(::shell_quote(string_zip_base64($remote_command)));
9284 } else {
9285 $quoted_remote_command = $dq_remote_command;
9288 my $sshcmd = $sshlogin->sshcommand();
9289 my ($pre,$post,$cleanup)=("","","");
9290 # --transfer
9291 $pre .= $self->sshtransfer();
9292 # --return
9293 $post .= $self->sshreturn();
9294 # --cleanup
9295 $post .= $self->sshcleanup();
9296 if($post) {
9297 # We need to save the exit status of the job
9298 $post = exitstatuswrapper($post);
9300 $self->{'sshlogin_wrap'}{$command} =
9301 ($pre
9302 . "$sshcmd $serverlogin -- exec "
9303 . $quoted_remote_command
9304 . ";"
9305 . $post);
9308 return $self->{'sshlogin_wrap'}{$command};
9311 sub transfer($) {
9312 # Files to transfer
9313 # Non-quoted and with {...} substituted
9314 # Returns:
9315 # @transfer - File names of files to transfer
9316 my $self = shift;
9318 my $transfersize = 0;
9319 my @transfer = $self->{'commandline'}->
9320 replace_placeholders($self->{'commandline'}{'transfer_files'},0,0);
9321 for(@transfer) {
9322 # filesize
9323 if(-e $_) {
9324 $transfersize += (stat($_))[7];
9327 $self->add_transfersize($transfersize);
9328 return @transfer;
9331 sub transfersize($) {
9332 my $self = shift;
9333 return $self->{'transfersize'};
9336 sub add_transfersize($) {
9337 my $self = shift;
9338 my $transfersize = shift;
9339 $self->{'transfersize'} += $transfersize;
9340 $opt::sqlworker and
9341 $Global::sql->update("SET Send = ? WHERE Seq = ".$self->seq(),
9342 $self->{'transfersize'});
9345 sub sshtransfer($) {
9346 # Returns for each transfer file:
9347 # rsync $file remote:$workdir
9348 my $self = shift;
9349 my @pre;
9350 my $sshlogin = $self->sshlogin();
9351 my $workdir = $self->workdir();
9352 for my $file ($self->transfer()) {
9353 push @pre, $sshlogin->rsync_transfer_cmd($file,$workdir).";";
9355 return join("",@pre);
9358 sub return($) {
9359 # Files to return
9360 # Non-quoted and with {...} substituted
9361 # Returns:
9362 # @non_quoted_filenames
9363 my $self = shift;
9364 return $self->{'commandline'}->
9365 replace_placeholders($self->{'commandline'}{'return_files'},0,0);
9368 sub returnsize($) {
9369 # This is called after the job has finished
9370 # Returns:
9371 # $number_of_bytes transferred in return
9372 my $self = shift;
9373 for my $file ($self->return()) {
9374 if(-e $file) {
9375 $self->{'returnsize'} += (stat($file))[7];
9378 return $self->{'returnsize'};
9381 sub add_returnsize($) {
9382 my $self = shift;
9383 my $returnsize = shift;
9384 $self->{'returnsize'} += $returnsize;
9385 $opt::sqlworker and
9386 $Global::sql->update("SET Receive = ? WHERE Seq = ".$self->seq(),
9387 $self->{'returnsize'});
9390 sub sshreturn($) {
9391 # Returns for each return-file:
9392 # rsync remote:$workdir/$file .
9393 my $self = shift;
9394 my $sshlogin = $self->sshlogin();
9395 my $sshcmd = $sshlogin->sshcommand();
9396 my $serverlogin = $sshlogin->serverlogin();
9397 my $rsync_opts = $ENV{'PARALLEL_RSYNC_OPTS'}. " -e". ::Q($sshcmd);
9398 my $pre = "";
9399 for my $file ($self->return()) {
9400 $file =~ s:^\./::g; # Remove ./ if any
9401 my $relpath = ($file !~ m:^/:) || ($file =~ m:/\./:); # Is the path relative or /./?
9402 my $cd = "";
9403 my $wd = "";
9404 if($relpath) {
9405 # rsync -avR /foo/./bar/baz.c remote:/tmp/
9406 # == (on old systems)
9407 # rsync -avR --rsync-path="cd /foo; rsync" remote:bar/baz.c /tmp/
9408 $wd = ::shell_quote_file($self->workdir()."/");
9410 # Only load File::Basename if actually needed
9411 $Global::use{"File::Basename"} ||= eval "use File::Basename; 1;";
9412 # dir/./file means relative to dir, so remove dir on remote
9413 $file =~ m:(.*)/\./:;
9414 my $basedir = $1 ? ::shell_quote_file($1."/") : "";
9415 my $nobasedir = $file;
9416 $nobasedir =~ s:.*/\./::;
9417 $cd = ::shell_quote_file(::dirname($nobasedir));
9418 my $rsync_cd = '--rsync-path='.::Q("cd $wd$cd; rsync");
9419 my $basename = ::Q(::shell_quote_file(::basename($file)));
9420 # --return
9421 # mkdir -p /home/tange/dir/subdir/;
9422 # rsync (--protocol 30) -rlDzR
9423 # --rsync-path="cd /home/tange/dir/subdir/; rsync"
9424 # server:file.gz /home/tange/dir/subdir/
9425 $pre .= "mkdir -p $basedir$cd && ". $sshlogin->rsync().
9426 " $rsync_cd $rsync_opts $serverlogin:".
9427 $basename . " ".$basedir.$cd.";";
9429 return $pre;
9432 sub sshcleanup($) {
9433 # Return the sshcommand needed to remove the file
9434 # Returns:
9435 # ssh command needed to remove files from sshlogin
9436 my $self = shift;
9437 my $sshlogin = $self->sshlogin();
9438 my $sshcmd = $sshlogin->sshcommand();
9439 my $serverlogin = $sshlogin->serverlogin();
9440 my $workdir = $self->workdir();
9441 my $cleancmd = "";
9443 for my $file ($self->remote_cleanup()) {
9444 my @subworkdirs = parentdirs_of($file);
9445 $cleancmd .= $sshlogin->cleanup_cmd($file,$workdir).";";
9447 if(defined $opt::workdir and $opt::workdir eq "...") {
9448 $cleancmd .= "$sshcmd $serverlogin -- rm -rf " . ::Q($workdir).';';
9450 return $cleancmd;
9453 sub remote_cleanup($) {
9454 # Returns:
9455 # Files to remove at cleanup
9456 my $self = shift;
9457 if($opt::cleanup) {
9458 my @transfer = $self->transfer();
9459 my @return = $self->return();
9460 return (@transfer,@return);
9461 } else {
9462 return ();
9466 sub exitstatuswrapper(@) {
9467 # Input:
9468 # @shellcode = shell code to execute
9469 # Returns:
9470 # shell script that returns current status after executing @shellcode
9471 if($Global::cshell) {
9472 return ('set _EXIT_status=$status; ' .
9473 join(" ",@_).
9474 'exit $_EXIT_status;');
9475 } else {
9476 return ('_EXIT_status=$?; ' .
9477 join(" ",@_).
9478 'exit $_EXIT_status;');
9482 sub workdir($) {
9483 # Returns:
9484 # the workdir on a remote machine
9485 my $self = shift;
9486 if(not defined $self->{'workdir'}) {
9487 my $workdir;
9488 if(defined $opt::workdir) {
9489 if($opt::workdir eq ".") {
9490 # . means current dir
9491 my $home = $ENV{'HOME'};
9492 eval 'use Cwd';
9493 my $cwd = cwd();
9494 $workdir = $cwd;
9495 if($home) {
9496 # If homedir exists: remove the homedir from
9497 # workdir if cwd starts with homedir
9498 # E.g. /home/foo/my/dir => my/dir
9499 # E.g. /tmp/my/dir => /tmp/my/dir
9500 my ($home_dev, $home_ino) = (stat($home))[0,1];
9501 my $parent = "";
9502 my @dir_parts = split(m:/:,$cwd);
9503 my $part;
9504 while(defined ($part = shift @dir_parts)) {
9505 $part eq "" and next;
9506 $parent .= "/".$part;
9507 my ($parent_dev, $parent_ino) = (stat($parent))[0,1];
9508 if($parent_dev == $home_dev and $parent_ino == $home_ino) {
9509 # dev and ino is the same: We found the homedir.
9510 $workdir = join("/",@dir_parts);
9511 last;
9515 if($workdir eq "") {
9516 $workdir = ".";
9518 } elsif($opt::workdir eq "...") {
9519 $workdir = ".parallel/tmp/" . ::hostname() . "-" . $$
9520 . "-" . $self->seq();
9521 } else {
9522 $workdir = $self->{'commandline'}->
9523 replace_placeholders([$opt::workdir],0,0);
9524 #$workdir = $opt::workdir;
9525 # Rsync treats /./ special. We dont want that
9526 $workdir =~ s:/\./:/:g; # Remove /./
9527 $workdir =~ s:(.)/+$:$1:; # Remove ending / if any
9528 $workdir =~ s:^\./::g; # Remove starting ./ if any
9530 } else {
9531 $workdir = ".";
9533 $self->{'workdir'} = $workdir;
9535 return $self->{'workdir'};
9538 sub parentdirs_of($) {
9539 # Return:
9540 # all parentdirs except . of this dir or file - sorted desc by length
9541 my $d = shift;
9542 my @parents = ();
9543 while($d =~ s:/[^/]+$::) {
9544 if($d ne ".") {
9545 push @parents, $d;
9548 return @parents;
9551 sub start($) {
9552 # Setup STDOUT and STDERR for a job and start it.
9553 # Returns:
9554 # job-object or undef if job not to run
9556 sub open3_setpgrp_internal {
9557 # Run open3+setpgrp followed by the command
9558 # Input:
9559 # $stdin_fh = Filehandle to use as STDIN
9560 # $stdout_fh = Filehandle to use as STDOUT
9561 # $stderr_fh = Filehandle to use as STDERR
9562 # $command = Command to run
9563 # Returns:
9564 # $pid = Process group of job started
9565 my ($stdin_fh,$stdout_fh,$stderr_fh,$command) = @_;
9566 my $pid;
9567 local (*OUT,*ERR);
9568 open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!");
9569 open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!");
9570 # The eval is needed to catch exception from open3
9571 eval {
9572 if(not $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", "-")) {
9573 # Each child gets its own process group to make it safe to killall
9574 eval{ setpgrp(0,0) };
9575 eval{ setpriority(0,0,$opt::nice) };
9576 exec($Global::shell,"-c",$command)
9577 || ::die_bug("open3-$stdin_fh $command");
9580 return $pid;
9583 sub open3_setpgrp_external {
9584 # Run open3 on $command wrapped with a perl script doing setpgrp
9585 # Works on systems that do not support open3(,,,"-")
9586 # Input:
9587 # $stdin_fh = Filehandle to use as STDIN
9588 # $stdout_fh = Filehandle to use as STDOUT
9589 # $stderr_fh = Filehandle to use as STDERR
9590 # $command = Command to run
9591 # Returns:
9592 # $pid = Process group of job started
9593 my ($stdin_fh,$stdout_fh,$stderr_fh,$command) = @_;
9594 local (*OUT,*ERR);
9595 open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!");
9596 open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!");
9598 my $pid;
9599 my @setpgrp_wrap =
9600 ('perl','-e',
9601 "eval\{setpgrp\}\;eval\{setpriority\(0,0,$opt::nice\)\}\;".
9602 "exec '$Global::shell', '-c', \@ARGV");
9603 # The eval is needed to catch exception from open3
9604 eval {
9605 $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", @setpgrp_wrap, $command)
9606 || ::die_bug("open3-$stdin_fh");
9609 return $pid;
9612 sub redefine_open3_setpgrp {
9613 my $setgprp_cache = shift;
9614 # Select and run open3_setpgrp_internal/open3_setpgrp_external
9615 no warnings 'redefine';
9616 my ($outfh,$name) = ::tmpfile(SUFFIX => ".tst");
9617 # Test to see if open3(x,x,x,"-") is fully supported
9618 # Can an exported bash function be called via open3?
9619 my $script = 'if($pid=::open3($i,$o,$e,"-")) { wait; } '.
9620 'else { exec("bash","-c","testfun && true"); }';
9621 my $bash =
9622 ::shell_quote_scalar_default(
9623 "testfun() { rm $name; }; export -f testfun; ".
9624 "perl -MIPC::Open3 -e ".
9625 ::shell_quote_scalar_default($script)
9627 my $redefine_eval;
9628 # Redirect STDERR temporarily,
9629 # so errors on MacOS X are ignored.
9630 open my $saveerr, ">&STDERR";
9631 open STDERR, '>', "/dev/null";
9632 # Run the test
9633 ::debug("init",qq{bash -c $bash 2>/dev/null});
9634 qx{ bash -c $bash 2>/dev/null };
9635 open STDERR, ">&", $saveerr;
9637 if(-e $name) {
9638 # Does not support open3(x,x,x,"-")
9639 # or does not have bash:
9640 # Use (slow) external version
9641 unlink($name);
9642 $redefine_eval = '*open3_setpgrp = \&open3_setpgrp_external';
9643 ::debug("init","open3_setpgrp_external chosen\n");
9644 } else {
9645 # Supports open3(x,x,x,"-")
9646 # This is 0.5 ms faster to run
9647 $redefine_eval = '*open3_setpgrp = \&open3_setpgrp_internal';
9648 ::debug("init","open3_setpgrp_internal chosen\n");
9650 if(open(my $fh, ">", $setgprp_cache)) {
9651 print $fh $redefine_eval;
9652 close $fh;
9653 } else {
9654 ::debug("init","Cannot write to $setgprp_cache");
9656 eval $redefine_eval;
9659 sub open3_setpgrp {
9660 my $setgprp_cache = $Global::cache_dir . "/tmp/sshlogin/" .
9661 ::hostname() . "/setpgrp_func";
9662 sub read_cache() {
9663 -e $setgprp_cache || return 0;
9664 local $/ = undef;
9665 open(my $fh, "<", $setgprp_cache) || return 0;
9666 eval <$fh> || return 0;
9667 close $fh;
9668 return 1;
9670 if(not read_cache()) {
9671 redefine_open3_setpgrp($setgprp_cache);
9673 # The sub is now redefined. Call it
9674 return open3_setpgrp(@_);
9677 my $job = shift;
9678 if($job->suspended()) {
9679 # Job is kill -STOP'ped: Restart it.
9680 $job->resume();
9681 return $job;
9683 # Get the shell command to be executed (possibly with ssh infront).
9684 my $command = $job->wrapped();
9685 my $pid;
9687 if($Global::interactive or $Global::stderr_verbose) {
9688 $job->interactive_start();
9690 # Must be run after $job->interactive_start():
9691 # $job->interactive_start() may call $job->skip()
9692 if($job->{'commandline'}{'skip'}) {
9693 # $job->skip() was called
9694 $command = "true";
9696 $job->openoutputfiles();
9697 $job->print_verbose_dryrun();
9698 my($stdout_fh,$stderr_fh) = ($job->fh(1,"w"),$job->fh(2,"w"));
9699 if($opt::dryrun or $opt::sqlmaster) { $command = "true"; }
9700 $ENV{'PARALLEL_SEQ'} = $job->seq();
9701 $ENV{'PARALLEL_PID'} = $$;
9702 $ENV{'PARALLEL_JOBSLOT'} = $job->slot();
9703 $ENV{'PARALLEL_TMP'} = ::tmpname("par");
9704 $job->add_rm($ENV{'PARALLEL_TMP'});
9705 ::debug("run", $Global::total_running, " processes . Starting (",
9706 $job->seq(), "): $command\n");
9707 if($opt::pipe) {
9708 my ($stdin_fh) = ::gensym();
9709 $pid = open3_setpgrp($stdin_fh,$stdout_fh,$stderr_fh,$command);
9710 if($opt::roundrobin and not $opt::keeporder) {
9711 # --keep-order will make sure the order will be reproducible
9712 ::set_fh_non_blocking($stdin_fh);
9714 $job->set_fh(0,"w",$stdin_fh);
9715 if($opt::tee or $opt::shard or $opt::bin) { $job->set_virgin(0); }
9716 } elsif ($opt::tty and -c "/dev/tty" and
9717 open(my $devtty_fh, "<", "/dev/tty")) {
9718 # Give /dev/tty to the command if no one else is using it
9719 # The eval is needed to catch exception from open3
9720 local (*IN,*OUT,*ERR);
9721 open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!");
9722 open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!");
9723 *IN = $devtty_fh;
9724 # The eval is needed to catch exception from open3
9725 my @wrap = ('perl','-e',
9726 "eval\{setpriority\(0,0,$opt::nice\)\}\;".
9727 "exec '$Global::shell', '-c', \@ARGV");
9728 eval {
9729 $pid = ::open3("<&IN", ">&OUT", ">&ERR", @wrap, $command)
9730 || ::die_bug("open3-/dev/tty");
9733 close $devtty_fh;
9734 $job->set_virgin(0);
9735 } else {
9736 $pid = open3_setpgrp(::gensym(),$stdout_fh,$stderr_fh,$command);
9737 $job->set_virgin(0);
9739 if($pid) {
9740 # A job was started
9741 $Global::total_running++;
9742 $Global::total_started++;
9743 $job->set_pid($pid);
9744 $job->set_starttime();
9745 $Global::running{$job->pid()} = $job;
9746 if($opt::timeout) {
9747 $Global::timeoutq->insert($job);
9749 $Global::newest_job = $job;
9750 $Global::newest_starttime = ::now();
9751 return $job;
9752 } else {
9753 # No more processes
9754 ::debug("run", "Cannot spawn more jobs.\n");
9755 return undef;
9759 sub interactive_start($) {
9760 my $self = shift;
9761 my $command = $self->wrapped();
9762 if($Global::interactive) {
9763 my $answer;
9764 ::status_no_nl("$command ?...");
9766 open(my $tty_fh, "<", "/dev/tty") || ::die_bug("interactive-tty");
9767 $answer = <$tty_fh>;
9768 close $tty_fh;
9769 # Sometime we get an empty string (not even \n)
9770 # Do not know why, so let us just ignore it and try again
9771 } while(length $answer < 1);
9772 if (not ($answer =~ /^\s*y/i)) {
9773 $self->{'commandline'}->skip();
9775 } else {
9776 print $Global::original_stderr "$command\n";
9781 my $tmuxsocket;
9783 sub tmux_wrap($) {
9784 # Wrap command with tmux for session pPID
9785 # Input:
9786 # $actual_command = the actual command being run (incl ssh wrap)
9787 my $self = shift;
9788 my $actual_command = shift;
9789 # Temporary file name. Used for fifo to communicate exit val
9790 my $tmpfifo = ::tmpname("tmx");
9791 $self->add_rm($tmpfifo);
9793 if(length($tmpfifo) >=100) {
9794 ::error("tmux does not support sockets with path > 100.");
9795 ::wait_and_exit(255);
9797 if($opt::tmuxpane) {
9798 # Move the command into a pane in window 0
9799 $actual_command = $ENV{'PARALLEL_TMUX'}.' joinp -t :0 ; '.
9800 $ENV{'PARALLEL_TMUX'}.' select-layout -t :0 tiled ; '.
9801 $actual_command;
9803 my $visual_command = $self->replaced();
9804 my $title = $visual_command;
9805 if($visual_command =~ /\0/) {
9806 ::error("Command line contains NUL. tmux is confused by NUL.");
9807 ::wait_and_exit(255);
9809 # ; causes problems
9810 # ascii 194-245 annoys tmux
9811 $title =~ tr/[\011-\016;\302-\365]/ /s;
9812 $title = ::Q($title);
9814 my $l_act = length($actual_command);
9815 my $l_tit = length($title);
9816 my $l_fifo = length($tmpfifo);
9817 # The line to run contains a 118 chars extra code + the title 2x
9818 my $l_tot = 2 * $l_tit + $l_act + $l_fifo;
9820 my $quoted_space75 = ::Q(" ")x75;
9821 while($l_tit < 1000 and
9823 (890 < $l_tot and $l_tot < 1350)
9825 (9250 < $l_tot and $l_tot < 9800)
9826 )) {
9827 # tmux blocks for certain lengths:
9828 # 900 < title + command < 1200
9829 # 9250 < title + command < 9800
9830 # but only if title < 1000, so expand the title with 75 spaces
9831 # The measured lengths are:
9832 # 996 < (title + whole command) < 1127
9833 # 9331 < (title + whole command) < 9636
9834 $title .= $quoted_space75;
9835 $l_tit = length($title);
9836 $l_tot = 2 * $l_tit + $l_act + $l_fifo;
9839 my $tmux;
9840 $ENV{'PARALLEL_TMUX'} ||= "tmux";
9841 if(not $tmuxsocket) {
9842 $tmuxsocket = ::tmpname("tms");
9843 if($opt::fg) {
9844 if(not fork) {
9845 # Run tmux in the foreground
9846 # Wait for the socket to appear
9847 while (not -e $tmuxsocket) { }
9848 `$ENV{'PARALLEL_TMUX'} -S $tmuxsocket attach`;
9849 exit;
9852 ::status("See output with: $ENV{'PARALLEL_TMUX'} -S $tmuxsocket attach");
9854 $tmux = "sh -c '".
9855 $ENV{'PARALLEL_TMUX'}." -S $tmuxsocket new-session -s p$$ -d \"sleep .2\" >/dev/null 2>&1';" .
9856 $ENV{'PARALLEL_TMUX'}." -S $tmuxsocket new-window -t p$$ -n $title";
9858 ::debug("tmux", "title len:", $l_tit, " act ", $l_act, " max ",
9859 $Limits::Command::line_max_len, " tot ",
9860 $l_tot, "\n");
9862 return "mkfifo $tmpfifo && $tmux ".
9863 # Run in tmux
9866 "(".$actual_command.');'.
9867 # The triple print is needed - otherwise the testsuite fails
9868 q[ perl -e 'while($t++<3){ print $ARGV[0],"\n" }' $?h/$status >> ].$tmpfifo."&".
9869 "echo $title; echo \007Job finished at: `date`;sleep 10"
9871 # Run outside tmux
9872 # Read a / separated line: 0h/2 for csh, 2/0 for bash.
9873 # If csh the first will be 0h, so use the second as exit value.
9874 # Otherwise just use the first value as exit value.
9875 q{; exec perl -e '$/="/";$_=<>;$c=<>;unlink $ARGV; /(\d+)h/ and exit($1);exit$c' }.$tmpfifo;
9879 sub is_already_in_results($) {
9880 # Do we already have results for this job?
9881 # Returns:
9882 # $job_already_run = bool whether there is output for this or not
9883 my $job = $_[0];
9884 my $out = $job->{'commandline'}->results_out();
9885 ::debug("run", "Test ${out}stdout", -e "${out}stdout", "\n");
9886 return(-e $out."stdout" or -f $out);
9889 sub is_already_in_joblog($) {
9890 my $job = shift;
9891 return vec($Global::job_already_run,$job->seq(),1);
9894 sub set_job_in_joblog($) {
9895 my $job = shift;
9896 vec($Global::job_already_run,$job->seq(),1) = 1;
9899 sub should_be_retried($) {
9900 # Should this job be retried?
9901 # Returns
9902 # 0 - do not retry
9903 # 1 - job queued for retry
9904 my $self = shift;
9905 if (not $opt::retries) {
9906 return 0;
9908 if(not $self->exitstatus() and not $self->exitsignal()) {
9909 # Completed with success. If there is a recorded failure: forget it
9910 $self->reset_failed_here();
9911 return 0;
9912 } else {
9913 # The job failed. Should it be retried?
9914 $self->add_failed_here();
9915 my $retries = $self->{'commandline'}->
9916 replace_placeholders([$opt::retries],0,0);
9917 if($self->total_failed() == $retries) {
9918 # This has been retried enough
9919 return 0;
9920 } else {
9921 # This command should be retried
9922 $self->set_endtime(undef);
9923 $self->reset_exitstatus();
9924 $Global::JobQueue->unget($self);
9925 ::debug("run", "Retry ", $self->seq(), "\n");
9926 return 1;
9932 my (%print_later,$job_seq_to_print);
9934 sub print_earlier_jobs($) {
9935 # Print jobs whose output is postponed due to --keep-order
9936 # Returns: N/A
9937 my $job = shift;
9938 $print_later{$job->seq()} = $job;
9939 $job_seq_to_print ||= 1;
9940 my $returnsize = 0;
9941 ::debug("run", "Looking for: $job_seq_to_print ",
9942 "This: ", $job->seq(), "\n");
9943 for(;vec($Global::job_already_run,$job_seq_to_print,1);
9944 $job_seq_to_print++) {}
9945 while(my $j = $print_later{$job_seq_to_print}) {
9946 $returnsize += $j->print();
9947 if($j->endtime()) {
9948 # Job finished - look at the next
9949 delete $print_later{$job_seq_to_print};
9950 $job_seq_to_print++;
9951 next;
9952 } else {
9953 # Job not finished yet - look at it again next round
9954 last;
9957 return $returnsize;
9961 sub print($) {
9962 # Print the output of the jobs
9963 # Returns: N/A
9964 my $self = shift;
9966 ::debug("print", ">>joboutput ", $self->replaced(), "\n");
9967 if($opt::dryrun) {
9968 # Nothing was printed to this job:
9969 # cleanup tmp files if --files was set
9970 ::rm($self->fh(1,"name"));
9972 if($opt::pipe and $self->virgin() and not $opt::tee) {
9973 # Skip --joblog, --dryrun, --verbose
9974 } else {
9975 if($opt::ungroup) {
9976 # NULL returnsize = 0 returnsize
9977 $self->returnsize() or $self->add_returnsize(0);
9978 if($Global::joblog and defined $self->{'exitstatus'}) {
9979 # Add to joblog when finished
9980 $self->print_joblog();
9981 # Printing is only relevant for grouped/--line-buffer output.
9982 $opt::ungroup and return;
9985 # Check for disk full
9986 ::exit_if_disk_full();
9989 my $returnsize = $self->returnsize();
9990 for my $fdno (sort { $a <=> $b } keys %Global::fd) {
9991 # Sort by file descriptor numerically: 1,2,3,..,9,10,11
9992 $fdno == 0 and next;
9993 my $out_fd = $Global::fd{$fdno};
9994 my $in_fh = $self->fh($fdno,"r");
9995 if(not $in_fh) {
9996 if(not $Job::file_descriptor_warning_printed{$fdno}++) {
9997 # ::warning("File descriptor $fdno not defined\n");
9999 next;
10001 ::debug("print", "File descriptor $fdno (", $self->fh($fdno,"name"), "):\n");
10002 if($opt::linebuffer) {
10003 # Line buffered print out
10004 $self->print_linebuffer($fdno,$in_fh,$out_fd);
10005 } elsif($opt::files) {
10006 $self->print_files($fdno,$in_fh,$out_fd);
10007 } elsif($opt::tag or defined $opt::tagstring) {
10008 $self->print_tag($fdno,$in_fh,$out_fd);
10009 } else {
10010 $self->print_normal($fdno,$in_fh,$out_fd);
10012 flush $out_fd;
10014 ::debug("print", "<<joboutput\n");
10015 if(defined $self->{'exitstatus'}
10016 and not ($self->virgin() and $opt::pipe)) {
10017 if($Global::joblog and not $opt::sqlworker) {
10018 # Add to joblog when finished
10019 $self->print_joblog();
10021 if($opt::sqlworker and not $opt::results) {
10022 $Global::sql->output($self);
10024 if($Global::csvsep) {
10025 # Add output to CSV when finished
10026 $self->print_csv();
10029 return $returnsize - $self->returnsize();
10033 my $header_printed;
10035 sub print_csv($) {
10036 my $self = shift;
10037 my $cmd;
10038 if($Global::verbose <= 1) {
10039 $cmd = $self->replaced();
10040 } else {
10041 # Verbose level > 1: Print the rsync and stuff
10042 $cmd = join " ", @{$self->{'commandline'}};
10044 my $record_ref = $self->{'commandline'}{'arg_list_flat_orig'};
10046 if(not $header_printed) {
10047 # Variable headers
10048 # Normal => V1..Vn
10049 # --header : => first value from column
10050 my @V;
10051 if($opt::header) {
10052 my $i = 1;
10053 @V = (map { $Global::input_source_header{$i++} }
10054 @$record_ref[1..$#$record_ref]);
10055 } else {
10056 my $V = "V1";
10057 @V = (map { $V++ } @$record_ref[1..$#$record_ref]);
10059 print $Global::csv_fh
10060 (map { $$_ }
10061 combine_ref("Seq", "Host", "Starttime", "JobRuntime",
10062 "Send", "Receive", "Exitval", "Signal", "Command",
10064 "Stdout","Stderr"
10065 )),"\n";
10066 $header_printed++;
10068 # Memory optimization: Overwrite with the joined output
10069 $self->{'output'}{1} = join("", @{$self->{'output'}{1}});
10070 $self->{'output'}{2} = join("", @{$self->{'output'}{2}});
10071 print $Global::csv_fh
10072 (map { $$_ }
10073 combine_ref
10074 ($self->seq(),
10075 $self->sshlogin()->string(),
10076 $self->starttime(), sprintf("%0.3f",$self->runtime()),
10077 $self->transfersize(), $self->returnsize(),
10078 $self->exitstatus(), $self->exitsignal(), \$cmd,
10079 \@$record_ref[1..$#$record_ref],
10080 \$self->{'output'}{1},
10081 \$self->{'output'}{2})),"\n";
10085 sub combine_ref($) {
10086 # Inspired by Text::CSV_PP::_combine (by Makamaka Hannyaharamitu)
10087 my @part = @_;
10088 my $sep = $Global::csvsep;
10089 my $quot = '"';
10090 my @out = ();
10092 my $must_be_quoted;
10093 for my $column (@part) {
10094 # Memory optimization: Content transferred as reference
10095 if(ref $column ne "SCALAR") {
10096 # Convert all columns to scalar references
10097 my $v = $column;
10098 $column = \$v;
10100 if(not defined $$column) {
10101 $$column = '';
10102 next;
10105 $must_be_quoted = 0;
10107 if($$column =~ s/$quot/$quot$quot/go){
10108 # Replace " => ""
10109 $must_be_quoted ||=1;
10111 if($$column =~ /[\s\Q$sep\E]/o){
10112 # Put quotes around if the column contains ,
10113 $must_be_quoted ||=1;
10116 $Global::use{"bytes"} ||= eval "use bytes; 1;";
10117 if ($$column =~ /\0/) {
10118 # Contains \0 => put quotes around
10119 $must_be_quoted ||=1;
10121 if($must_be_quoted){
10122 push @out, \$sep, \$quot, $column, \$quot;
10123 } else {
10124 push @out, \$sep, $column;
10127 # Pop off a $sep
10128 shift @out;
10129 return @out;
10132 sub print_files($) {
10133 # Print the name of the file containing stdout on stdout
10134 # Uses:
10135 # $opt::pipe
10136 # $opt::group = Print when job is done
10137 # $opt::linebuffer = Print ASAP
10138 # Returns: N/A
10139 my $self = shift;
10140 my ($fdno,$in_fh,$out_fd) = @_;
10142 # If the job is dead: close printing fh. Needed for --compress
10143 close $self->fh($fdno,"w");
10144 if($? and $opt::compress) {
10145 ::error($opt::compress_program." failed.");
10146 $self->set_exitstatus(255);
10148 if($opt::compress) {
10149 # Kill the decompressor which will not be needed
10150 CORE::kill "TERM", $self->fh($fdno,"rpid");
10152 close $in_fh;
10154 if($opt::pipe and $self->virgin()) {
10155 # Nothing was printed to this job:
10156 # cleanup unused tmp files because --files was set
10157 for my $fdno (1,2) {
10158 ::rm($self->fh($fdno,"name"));
10159 ::rm($self->fh($fdno,"unlink"));
10161 } elsif($fdno == 1 and $self->fh($fdno,"name")) {
10162 print $out_fd $self->tag(),$self->fh($fdno,"name"),"\n";
10163 if($Global::membuffer) {
10164 push @{$self->{'output'}{$fdno}},
10165 $self->tag(), $self->fh($fdno,"name");
10167 $self->add_returnsize(-s $self->fh($fdno,"name"));
10168 # Mark as printed - do not print again
10169 $self->set_fh($fdno,"name",undef);
10173 sub print_linebuffer($) {
10174 my $self = shift;
10175 my ($fdno,$in_fh,$out_fd) = @_;
10176 if(defined $self->{'exitstatus'}) {
10177 # If the job is dead: close printing fh. Needed for --compress
10178 close $self->fh($fdno,"w");
10179 if($? and $opt::compress) {
10180 ::error($opt::compress_program." failed.");
10181 $self->set_exitstatus(255);
10183 if($opt::compress) {
10184 # Blocked reading in final round
10185 for my $fdno (1,2) {
10186 ::set_fh_blocking($self->fh($fdno,'r'));
10190 if(not $self->virgin()) {
10191 if($opt::files or ($opt::results and not $Global::csvsep)) {
10192 # Print filename
10193 if($fdno == 1 and not $self->fh($fdno,"printed")) {
10194 print $out_fd $self->tag(),$self->fh($fdno,"name"),"\n";
10195 if($Global::membuffer) {
10196 push(@{$self->{'output'}{$fdno}}, $self->tag(),
10197 $self->fh($fdno,"name"));
10199 $self->set_fh($fdno,"printed",1);
10201 # No need for reading $in_fh, as it is from "cat >/dev/null"
10202 } else {
10203 # Read halflines and print full lines
10204 my $outputlength = 0;
10205 my $halfline_ref = $self->{'halfline'}{$fdno};
10206 my ($buf,$i,$rv);
10207 # 1310720 gives 1.2 GB/s
10208 # 131072 gives 0.9 GB/s
10209 while($rv = sysread($in_fh, $buf,1310720)) {
10210 $outputlength += $rv;
10211 # TODO --recend
10212 # Treat both \n and \r as line end
10213 $i = ::max((rindex($buf,"\n")+1), (rindex($buf,"\r")+1));
10214 if($i) {
10215 # One or more complete lines were found
10216 if($opt::tag or defined $opt::tagstring) {
10217 # Replace ^ with $tag within the full line
10218 if($Global::cache_replacement_eval) {
10219 # Replace with the same value for tag
10220 my $tag = $self->tag();
10221 unshift @$halfline_ref, $tag;
10222 # TODO --recend that can be partially in @$halfline_ref
10223 substr($buf,0,$i-1) =~ s/(?<=[\n\r])(?=.|$)/$tag/gs;
10224 # The length changed, so find the new ending pos
10225 $i = ::max((rindex($buf,"\n")+1), (rindex($buf,"\r")+1));
10226 } else {
10227 # Replace with freshly computed value of tag
10228 unshift @$halfline_ref, $self->tag();
10229 substr($buf,0,$i-1) =~ s/(?<=[\n\r])(?=.|$)/$self->tag()/gse;
10230 # The length changed, so find the new ending pos
10231 $i = ::max((rindex($buf,"\n")+1), (rindex($buf,"\r")+1));
10234 # Print the partial line (halfline) and the last half
10235 print $out_fd @$halfline_ref, substr($buf,0,$i);
10236 # Buffer in memory for SQL and CSV-output
10237 if($Global::membuffer) {
10238 push(@{$self->{'output'}{$fdno}},
10239 @$halfline_ref, substr($buf,0,$i));
10241 # Remove the printed part by keeping the unprinted part
10242 @$halfline_ref = (substr($buf,$i));
10243 } else {
10244 # No newline, so append to the halfline
10245 push @$halfline_ref, $buf;
10248 $self->add_returnsize($outputlength);
10250 if(defined $self->{'exitstatus'}) {
10251 if($opt::files or ($opt::results and not $Global::csvsep)) {
10252 $self->add_returnsize(-s $self->fh($fdno,"name"));
10253 } else {
10254 # If the job is dead: print the remaining partial line
10255 # read remaining
10256 my $halfline_ref = $self->{'halfline'}{$fdno};
10257 if(grep /./, @$halfline_ref) {
10258 my $returnsize = 0;
10259 for(@{$self->{'halfline'}{$fdno}}) {
10260 $returnsize += length $_;
10262 $self->add_returnsize($returnsize);
10263 if($opt::tag or defined $opt::tagstring) {
10264 # Prepend $tag the the remaining half line
10265 unshift @$halfline_ref, $self->tag();
10267 # Print the partial line (halfline)
10268 print $out_fd @{$self->{'halfline'}{$fdno}};
10269 # Buffer in memory for SQL and CSV-output
10270 if($Global::membuffer) {
10271 push(@{$self->{'output'}{$fdno}}, @$halfline_ref);
10273 @$halfline_ref = ();
10276 if($self->fh($fdno,"rpid") and
10277 CORE::kill 0, $self->fh($fdno,"rpid")) {
10278 # decompress still running
10279 } else {
10280 # decompress done: close fh
10281 close $in_fh;
10282 if($? and $opt::compress) {
10283 ::error($opt::decompress_program." failed.");
10284 $self->set_exitstatus(255);
10291 sub print_tag(@) {
10292 return print_normal(@_);
10295 sub free_ressources() {
10296 my $self = shift;
10297 if(not $opt::ungroup) {
10298 my $fh;
10299 for my $fdno (sort { $a <=> $b } keys %Global::fd) {
10300 $fh = $self->fh($fdno,"w");
10301 $fh and close $fh;
10302 $fh = $self->fh($fdno,"r");
10303 $fh and close $fh;
10308 sub print_normal($) {
10309 my $self = shift;
10310 my ($fdno,$in_fh,$out_fd) = @_;
10311 my $buf;
10312 close $self->fh($fdno,"w");
10313 if($? and $opt::compress) {
10314 ::error($opt::compress_program." failed.");
10315 $self->set_exitstatus(255);
10317 if(not $self->virgin()) {
10318 seek $in_fh, 0, 0;
10319 # $in_fh is now ready for reading at position 0
10320 my $outputlength = 0;
10321 my @output;
10323 if($opt::tag or $opt::tagstring) {
10324 # Read line by line
10325 local $/ = "\n";
10326 my $tag = $self->tag();
10327 while(<$in_fh>) {
10328 $outputlength += length $_;
10329 # Tag lines with \r, too
10330 $_ =~ s/(?<=[\r])(?=.|$)/$tag/gs;
10331 print $out_fd $tag,$_;
10332 if($Global::membuffer) {
10333 push @{$self->{'output'}{$fdno}}, $tag, $_;
10336 } else {
10337 while(sysread($in_fh,$buf,131072)) {
10338 print $out_fd $buf;
10339 $outputlength += length $buf;
10340 if($Global::membuffer) {
10341 push @{$self->{'output'}{$fdno}}, $buf;
10345 if($fdno == 1) {
10346 $self->add_returnsize($outputlength);
10348 close $in_fh;
10349 if($? and $opt::compress) {
10350 ::error($opt::decompress_program." failed.");
10351 $self->set_exitstatus(255);
10356 sub print_joblog($) {
10357 my $self = shift;
10358 my $cmd;
10359 if($Global::verbose <= 1) {
10360 $cmd = $self->replaced();
10361 } else {
10362 # Verbose level > 1: Print the rsync and stuff
10363 $cmd = $self->wrapped();
10365 # Newlines make it hard to parse the joblog
10366 $cmd =~ s/\n/\0/g;
10367 print $Global::joblog
10368 join("\t", $self->seq(), $self->sshlogin()->string(),
10369 $self->starttime(), sprintf("%10.3f",$self->runtime()),
10370 $self->transfersize(), $self->returnsize(),
10371 $self->exitstatus(), $self->exitsignal(), $cmd
10372 ). "\n";
10373 flush $Global::joblog;
10374 $self->set_job_in_joblog();
10377 sub tag($) {
10378 my $self = shift;
10379 if(not defined $self->{'tag'} or not $Global::cache_replacement_eval) {
10380 if($opt::tag or defined $opt::tagstring) {
10381 $self->{'tag'} = $self->{'commandline'}->
10382 replace_placeholders([$opt::tagstring],0,0)."\t";
10383 } else {
10384 $self->{'tag'} = "";
10387 return $self->{'tag'};
10390 sub hostgroups($) {
10391 my $self = shift;
10392 if(not defined $self->{'hostgroups'}) {
10393 $self->{'hostgroups'} =
10394 $self->{'commandline'}->{'arg_list'}[0][0]->{'hostgroups'};
10396 return @{$self->{'hostgroups'}};
10399 sub exitstatus($) {
10400 my $self = shift;
10401 return $self->{'exitstatus'};
10404 sub set_exitstatus($$) {
10405 my $self = shift;
10406 my $exitstatus = shift;
10407 if($exitstatus) {
10408 # Overwrite status if non-zero
10409 $self->{'exitstatus'} = $exitstatus;
10410 } else {
10411 # Set status but do not overwrite
10412 # Status may have been set by --timeout
10413 $self->{'exitstatus'} ||= $exitstatus;
10415 $opt::sqlworker and
10416 $Global::sql->update("SET Exitval = ? WHERE Seq = ".$self->seq(),
10417 $exitstatus);
10420 sub reset_exitstatus($) {
10421 my $self = shift;
10422 undef $self->{'exitstatus'};
10425 sub exitsignal($) {
10426 my $self = shift;
10427 return $self->{'exitsignal'};
10430 sub set_exitsignal($$) {
10431 my $self = shift;
10432 my $exitsignal = shift;
10433 $self->{'exitsignal'} = $exitsignal;
10434 $opt::sqlworker and
10435 $Global::sql->update("SET _Signal = ? WHERE Seq = ".$self->seq(),
10436 $exitsignal);
10440 my $status_printed;
10441 my $total_jobs;
10443 sub should_we_halt {
10444 # Should we halt? Immediately? Gracefully?
10445 # Returns: N/A
10446 my $job = shift;
10447 my $limit;
10448 if($job->exitstatus() or $job->exitsignal()) {
10449 # Job failed
10450 $Global::exitstatus++;
10451 $Global::total_failed++;
10452 if($Global::halt_fail) {
10453 ::status("$Global::progname: This job failed:",
10454 $job->replaced());
10455 $limit = $Global::total_failed;
10457 } elsif($Global::halt_success) {
10458 ::status("$Global::progname: This job succeeded:",
10459 $job->replaced());
10460 $limit = $Global::total_completed - $Global::total_failed;
10462 if($Global::halt_done) {
10463 ::status("$Global::progname: This job finished:",
10464 $job->replaced());
10465 $limit = $Global::total_completed;
10467 if(not defined $limit) {
10468 return ""
10470 # --halt # => 1..100 (number of jobs failed, 101 means > 100)
10471 # --halt % => 1..100 (pct of jobs failed)
10472 if($Global::halt_pct and not $Global::halt_count) {
10473 $total_jobs ||= $Global::JobQueue->total_jobs();
10474 # From the pct compute the number of jobs that must fail/succeed
10475 $Global::halt_count = $total_jobs * $Global::halt_pct;
10477 if($limit >= $Global::halt_count) {
10478 # At least N jobs have failed/succeded/completed
10479 # or at least N% have failed/succeded/completed
10480 # So we should prepare for exit
10481 if($Global::halt_fail or $Global::halt_done) {
10482 # Set exit status
10483 if(not defined $Global::halt_exitstatus) {
10484 if($Global::halt_pct) {
10485 # --halt now,fail=X% or soon,fail=X%
10486 # --halt now,done=X% or soon,done=X%
10487 $Global::halt_exitstatus =
10488 ::ceil($Global::total_failed / $total_jobs * 100);
10489 } elsif($Global::halt_count) {
10490 # --halt now,fail=X or soon,fail=X
10491 # --halt now,done=X or soon,done=X
10492 $Global::halt_exitstatus =
10493 ::min($Global::total_failed,101);
10495 if($Global::halt_count and $Global::halt_count == 1) {
10496 # --halt now,fail=1 or soon,fail=1
10497 # --halt now,done=1 or soon,done=1
10498 # Emulate Bash's +128 if there is a signal
10499 $Global::halt_exitstatus =
10500 ($job->exitstatus()
10502 $job->exitsignal() ? $job->exitsignal() + 128 : 0);
10505 ::debug("halt","Pct: ",$Global::halt_pct,
10506 " count: ",$Global::halt_count,
10507 " status: ",$Global::halt_exitstatus,"\n");
10508 } elsif($Global::halt_success) {
10509 $Global::halt_exitstatus = 0;
10511 if($Global::halt_when eq "soon"
10513 (scalar(keys %Global::running) > 0
10515 $Global::max_jobs_running == 1)) {
10516 ::status
10517 ("$Global::progname: Starting no more jobs. ".
10518 "Waiting for ". (keys %Global::running).
10519 " jobs to finish.");
10520 $Global::start_no_new_jobs ||= 1;
10522 return($Global::halt_when);
10524 return "";
10529 package CommandLine;
10531 sub new($) {
10532 my $class = shift;
10533 my $seq = shift;
10534 my $commandref = shift;
10535 $commandref || die;
10536 my $arg_queue = shift;
10537 my $context_replace = shift;
10538 my $max_number_of_args = shift; # for -N and normal (-n1)
10539 my $transfer_files = shift;
10540 my $return_files = shift;
10541 my $replacecount_ref = shift;
10542 my $len_ref = shift;
10543 my %replacecount = %$replacecount_ref;
10544 my %len = %$len_ref;
10545 for (keys %$replacecount_ref) {
10546 # Total length of this replacement string {} replaced with all args
10547 $len{$_} = 0;
10549 return bless {
10550 'command' => $commandref,
10551 'seq' => $seq,
10552 'len' => \%len,
10553 'arg_list' => [],
10554 'arg_list_flat' => [],
10555 'arg_list_flat_orig' => [undef],
10556 'arg_queue' => $arg_queue,
10557 'max_number_of_args' => $max_number_of_args,
10558 'replacecount' => \%replacecount,
10559 'context_replace' => $context_replace,
10560 'transfer_files' => $transfer_files,
10561 'return_files' => $return_files,
10562 'replaced' => undef,
10563 }, ref($class) || $class;
10566 sub seq($) {
10567 my $self = shift;
10568 return $self->{'seq'};
10571 sub set_seq($$) {
10572 my $self = shift;
10573 $self->{'seq'} = shift;
10576 sub slot($) {
10577 # Find the number of a free job slot and return it
10578 # Uses:
10579 # @Global::slots - list with free jobslots
10580 # Returns:
10581 # $jobslot = number of jobslot
10582 my $self = shift;
10583 if(not $self->{'slot'}) {
10584 if(not @Global::slots) {
10585 # $max_slot_number will typically be $Global::max_jobs_running
10586 push @Global::slots, ++$Global::max_slot_number;
10588 $self->{'slot'} = shift @Global::slots;
10590 return $self->{'slot'};
10594 my $already_spread;
10595 my $darwin_max_len;
10597 sub populate($) {
10598 # Add arguments from arg_queue until the number of arguments or
10599 # max line length is reached
10600 # Uses:
10601 # $Global::minimal_command_line_length
10602 # $opt::cat
10603 # $opt::fifo
10604 # $Global::JobQueue
10605 # $opt::m
10606 # $opt::X
10607 # $Global::max_jobs_running
10608 # Returns: N/A
10609 my $self = shift;
10610 my $next_arg;
10611 my $max_len = $Global::minimal_command_line_length
10612 || Limits::Command::max_length();
10613 if($^O eq "darwin") {
10614 # Darwin's limit is affected by:
10615 # * number of environment names (variables+functions)
10616 # * size of environment
10617 # * the length of arguments:
10618 # a one-char argument lowers the limit by 5
10619 # To be safe assume all arguments are one-char
10620 # The max_len is cached between runs, but if the size of
10621 # the environment is different we need to recompute the
10622 # usable max length for this run of GNU Parallel
10623 # See https://unix.stackexchange.com/a/604943/2972
10624 if(not $darwin_max_len) {
10625 my $envc = (keys %ENV);
10626 my $envn = length join"",(keys %ENV);
10627 my $envv = length join"",(values %ENV);
10628 $darwin_max_len = -146+($max_len - $envn - $envv) - $envc*10;
10629 ::debug("init",
10630 "length: $darwin_max_len ".
10631 "3+($max_len - $envn - $envv)/5 - $envc*2");
10633 $max_len = $darwin_max_len;
10635 if($opt::cat or $opt::fifo) {
10636 # Get the empty arg added by --pipepart (if any)
10637 $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get();
10638 # $PARALLEL_TMP will point to a tempfile that will be used as {}
10639 $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->
10640 unget([Arg->new('$PARALLEL_TMP')]);
10642 while (not $self->{'arg_queue'}->empty()) {
10643 $next_arg = $self->{'arg_queue'}->get();
10644 if(not defined $next_arg) {
10645 next;
10647 $self->push($next_arg);
10648 if($self->len() >= $max_len) {
10649 # Command length is now > max_length
10650 # If there are arguments: remove the last
10651 # If there are no arguments: Error
10652 # TODO stuff about -x opt_x
10653 if($self->number_of_args() > 1) {
10654 # There is something to work on
10655 $self->{'arg_queue'}->unget($self->pop());
10656 last;
10657 } else {
10658 my $args = join(" ", map { $_->orig() } @$next_arg);
10659 ::error("Command line too long (".
10660 $self->len(). " >= ".
10661 $max_len.
10662 ") at input ".
10663 $self->{'arg_queue'}->arg_number().
10664 ": ".
10665 ((length $args > 50) ?
10666 (substr($args,0,50))."..." :
10667 $args));
10668 $self->{'arg_queue'}->unget($self->pop());
10669 ::wait_and_exit(255);
10673 if(defined $self->{'max_number_of_args'}) {
10674 if($self->number_of_args() >= $self->{'max_number_of_args'}) {
10675 last;
10679 if(($opt::m or $opt::X) and not $already_spread
10680 and $self->{'arg_queue'}->empty() and $Global::max_jobs_running) {
10681 # -m or -X and EOF => Spread the arguments over all jobslots
10682 # (unless they are already spread)
10683 $already_spread ||= 1;
10684 if($self->number_of_args() > 1) {
10685 $self->{'max_number_of_args'} =
10686 ::ceil($self->number_of_args()/$Global::max_jobs_running);
10687 $Global::JobQueue->{'commandlinequeue'}->{'max_number_of_args'} =
10688 $self->{'max_number_of_args'};
10689 $self->{'arg_queue'}->unget($self->pop_all());
10690 while($self->number_of_args() < $self->{'max_number_of_args'}) {
10691 $self->push($self->{'arg_queue'}->get());
10694 $Global::JobQueue->flush_total_jobs();
10697 if($opt::sqlmaster) {
10698 # Insert the V1..Vn for this $seq in SQL table instead of generating one
10699 $Global::sql->insert_records($self->seq(), $self->{'command'},
10700 $self->{'arg_list_flat_orig'});
10705 sub push($) {
10706 # Add one or more records as arguments
10707 # Returns: N/A
10708 my $self = shift;
10709 my $record = shift;
10710 push @{$self->{'arg_list_flat_orig'}}, map { $_->orig() } @$record;
10711 push @{$self->{'arg_list_flat'}}, @$record;
10712 push @{$self->{'arg_list'}}, $record;
10713 # Make @arg available for {= =}
10714 *Arg::arg = $self->{'arg_list_flat_orig'};
10716 my $quote_arg = ($Global::quote_replace and not $Global::quoting);
10717 for my $perlexpr (keys %{$self->{'replacecount'}}) {
10718 if($perlexpr =~ /^(\d+) /) {
10719 # Positional
10720 defined($record->[$1-1]) or next;
10721 $self->{'len'}{$perlexpr} +=
10722 length $record->[$1-1]->replace($perlexpr,$quote_arg,$self);
10723 } else {
10724 for my $arg (@$record) {
10725 if(defined $arg) {
10726 $self->{'len'}{$perlexpr} +=
10727 length $arg->replace($perlexpr,$quote_arg,$self);
10734 sub pop($) {
10735 # Remove last argument
10736 # Returns:
10737 # the last record
10738 my $self = shift;
10739 my $record = pop @{$self->{'arg_list'}};
10740 # pop off arguments from @$record
10741 splice @{$self->{'arg_list_flat_orig'}}, -($#$record+1), $#$record+1;
10742 splice @{$self->{'arg_list_flat'}}, -($#$record+1), $#$record+1;
10743 my $quote_arg = ($Global::quote_replace and not $Global::quoting);
10744 for my $perlexpr (keys %{$self->{'replacecount'}}) {
10745 if($perlexpr =~ /^(\d+) /) {
10746 # Positional
10747 defined($record->[$1-1]) or next;
10748 $self->{'len'}{$perlexpr} -=
10749 length $record->[$1-1]->replace($perlexpr,$quote_arg,$self);
10750 } else {
10751 for my $arg (@$record) {
10752 if(defined $arg) {
10753 $self->{'len'}{$perlexpr} -=
10754 length $arg->replace($perlexpr,$quote_arg,$self);
10759 return $record;
10762 sub pop_all($) {
10763 # Remove all arguments and zeros the length of replacement perlexpr
10764 # Returns:
10765 # all records
10766 my $self = shift;
10767 my @popped = @{$self->{'arg_list'}};
10768 for my $perlexpr (keys %{$self->{'replacecount'}}) {
10769 $self->{'len'}{$perlexpr} = 0;
10771 $self->{'arg_list'} = [];
10772 $self->{'arg_list_flat_orig'} = [undef];
10773 $self->{'arg_list_flat'} = [];
10774 return @popped;
10777 sub number_of_args($) {
10778 # The number of records
10779 # Returns:
10780 # number of records
10781 my $self = shift;
10782 # This is really the number of records
10783 return $#{$self->{'arg_list'}}+1;
10786 sub number_of_recargs($) {
10787 # The number of args in records
10788 # Returns:
10789 # number of args records
10790 my $self = shift;
10791 my $sum = 0;
10792 my $nrec = scalar @{$self->{'arg_list'}};
10793 if($nrec) {
10794 $sum = $nrec * (scalar @{$self->{'arg_list'}[0]});
10796 return $sum;
10799 sub args_as_string($) {
10800 # Returns:
10801 # all unmodified arguments joined with ' ' (similar to {})
10802 my $self = shift;
10803 return (join " ", map { $_->orig() }
10804 map { @$_ } @{$self->{'arg_list'}});
10807 sub results_out($) {
10808 sub max_file_name_length {
10809 # Figure out the max length of a subdir
10810 # TODO and the max total length
10811 # Ext4 = 255,130816
10812 # Uses:
10813 # $Global::max_file_length is set
10814 # Returns:
10815 # $Global::max_file_length
10816 my $testdir = shift;
10818 my $upper = 100_000_000;
10819 # Dir length of 8 chars is supported everywhere
10820 my $len = 8;
10821 my $dir = "x"x$len;
10822 do {
10823 rmdir($testdir."/".$dir);
10824 $len *= 16;
10825 $dir = "x"x$len;
10826 } while ($len < $upper and mkdir $testdir."/".$dir);
10827 # Then search for the actual max length between $len/16 and $len
10828 my $min = $len/16;
10829 my $max = $len;
10830 while($max-$min > 5) {
10831 # If we are within 5 chars of the exact value:
10832 # it is not worth the extra time to find the exact value
10833 my $test = int(($min+$max)/2);
10834 $dir = "x"x$test;
10835 if(mkdir $testdir."/".$dir) {
10836 rmdir($testdir."/".$dir);
10837 $min = $test;
10838 } else {
10839 $max = $test;
10842 $Global::max_file_length = $min;
10843 return $min;
10846 my $self = shift;
10847 my $out = $self->replace_placeholders([$opt::results],0,0);
10848 if($out eq $opt::results) {
10849 # $opt::results simple string: Append args_as_dirname
10850 my $args_as_dirname = $self->args_as_dirname();
10851 # Output in: prefix/name1/val1/name2/val2/stdout
10852 $out = $opt::results."/".$args_as_dirname;
10853 if(-d $out or eval{ File::Path::mkpath($out); }) {
10854 # OK
10855 } else {
10856 # mkpath failed: Argument probably too long.
10857 # Set $Global::max_file_length, which will keep the individual
10858 # dir names shorter than the max length
10859 max_file_name_length($opt::results);
10860 $args_as_dirname = $self->args_as_dirname();
10861 # prefix/name1/val1/name2/val2/
10862 $out = $opt::results."/".$args_as_dirname;
10863 File::Path::mkpath($out);
10865 $out .="/";
10866 } else {
10867 if($out =~ m:/$:) {
10868 # / = dir
10869 if(-d $out or eval{ File::Path::mkpath($out); }) {
10870 # OK
10871 } else {
10872 ::error("Cannot make dir '$out'.");
10873 ::wait_and_exit(255);
10875 } else {
10876 $out =~ m:(.*)/:;
10877 File::Path::mkpath($1);
10880 return $out;
10883 sub args_as_dirname($) {
10884 # Returns:
10885 # all unmodified arguments joined with '/' (similar to {})
10886 # \t \0 \\ and / are quoted as: \t \0 \\ \_
10887 # If $Global::max_file_length: Keep subdirs < $Global::max_file_length
10888 my $self = shift;
10889 my @res = ();
10891 for my $rec_ref (@{$self->{'arg_list'}}) {
10892 # If headers are used, sort by them.
10893 # Otherwise keep the order from the command line.
10894 my @header_indexes_sorted = header_indexes_sorted($#$rec_ref+1);
10895 for my $n (@header_indexes_sorted) {
10896 CORE::push(@res,
10897 $Global::input_source_header{$n},
10898 map { my $s = $_;
10899 # \t \0 \\ and / are quoted as: \t \0 \\ \_
10900 $s =~ s/\\/\\\\/g;
10901 $s =~ s/\t/\\t/g;
10902 $s =~ s/\0/\\0/g;
10903 $s =~ s:/:\\_:g;
10904 if($Global::max_file_length) {
10905 # Keep each subdir shorter than the longest
10906 # allowed file name
10907 $s = substr($s,0,$Global::max_file_length);
10909 $s; }
10910 $rec_ref->[$n-1]->orig());
10913 return join "/", @res;
10916 sub header_indexes_sorted($) {
10917 # Sort headers first by number then by name.
10918 # E.g.: 1a 1b 11a 11b
10919 # Returns:
10920 # Indexes of %Global::input_source_header sorted
10921 my $max_col = shift;
10923 no warnings 'numeric';
10924 for my $col (1 .. $max_col) {
10925 # Make sure the header is defined. If it is not: use column number
10926 if(not defined $Global::input_source_header{$col}) {
10927 $Global::input_source_header{$col} = $col;
10930 my @header_indexes_sorted = sort {
10931 # Sort headers numerically then asciibetically
10932 $Global::input_source_header{$a} <=> $Global::input_source_header{$b}
10934 $Global::input_source_header{$a} cmp $Global::input_source_header{$b}
10935 } 1 .. $max_col;
10936 return @header_indexes_sorted;
10939 sub len($) {
10940 # Uses:
10941 # @opt::shellquote
10942 # The length of the command line with args substituted
10943 my $self = shift;
10944 my $len = 0;
10945 # Add length of the original command with no args
10946 # Length of command w/ all replacement args removed
10947 $len += $self->{'len'}{'noncontext'} + @{$self->{'command'}} -1;
10948 ::debug("length", "noncontext + command: $len\n");
10949 # MacOS has an overhead of 8 bytes per argument
10950 my $darwin = ($^O eq "darwin") ? 8 : 0;
10951 my $recargs = $self->number_of_recargs();
10952 if($self->{'context_replace'}) {
10953 # Context is duplicated for each arg
10954 $len += $recargs * $self->{'len'}{'context'};
10955 for my $replstring (keys %{$self->{'replacecount'}}) {
10956 # If the replacements string is more than once: mulitply its length
10957 $len += $self->{'len'}{$replstring} *
10958 $self->{'replacecount'}{$replstring};
10959 ::debug("length", $replstring, " ", $self->{'len'}{$replstring}, "*",
10960 $self->{'replacecount'}{$replstring}, "\n");
10962 # echo 11 22 33 44 55 66 77 88 99 1010
10963 # echo 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10
10964 # 5 + ctxgrp*arg
10965 ::debug("length", "Ctxgrp: ", $self->{'len'}{'contextgroups'},
10966 " Groups: ", $self->{'len'}{'noncontextgroups'}, "\n");
10967 # Add space between context groups
10968 $len += ($recargs-1) * ($self->{'len'}{'contextgroups'});
10969 if($darwin) {
10970 $len += $recargs * $self->{'len'}{'contextgroups'} * $darwin;
10972 } else {
10973 # Each replacement string may occur several times
10974 # Add the length for each time
10975 $len += 1*$self->{'len'}{'context'};
10976 ::debug("length", "context+noncontext + command: $len\n");
10977 for my $replstring (keys %{$self->{'replacecount'}}) {
10978 # (space between recargs + length of replacement)
10979 # * number this replacement is used
10980 $len += ($recargs -1 + $self->{'len'}{$replstring}) *
10981 $self->{'replacecount'}{$replstring};
10982 if($darwin) {
10983 $len += ($recargs * $self->{'replacecount'}{$replstring}
10984 * $darwin);
10988 if(defined $Global::parallel_env) {
10989 # If we are using --env, add the prefix for that, too.
10990 $len += length $Global::parallel_env;
10992 if($Global::quoting) {
10993 # Pessimistic length if -q is set
10994 # Worse than worst case: ' => "'" + " => '"'
10995 # TODO can we count the number of expanding chars?
10996 # and count them in arguments, too?
10997 $len *= 3;
10999 if(@opt::shellquote) {
11000 # Pessimistic length if --shellquote is set
11001 # Worse than worst case: ' => "'"
11002 for(@opt::shellquote) {
11003 $len *= 3;
11005 $len *= 5;
11007 if(@opt::sshlogin) {
11008 # Pessimistic length if remote
11009 # Worst case is BASE64 encoding 3 bytes -> 4 bytes
11010 $len = int($len*4/3);
11013 return $len;
11016 sub replaced($) {
11017 # Uses:
11018 # $Global::quote_replace
11019 # $Global::quoting
11020 # Returns:
11021 # $replaced = command with place holders replaced and prepended
11022 my $self = shift;
11023 if(not defined $self->{'replaced'}) {
11024 # Don't quote arguments if the input is the full command line
11025 my $quote_arg = ($Global::quote_replace and not $Global::quoting);
11026 # or if ($opt::cat or $opt::pipe) as they use $PARALLEL_TMP
11027 $quote_arg = ($opt::cat || $opt::fifo) ? 0 : $quote_arg;
11028 $self->{'replaced'} = $self->
11029 replace_placeholders($self->{'command'},$Global::quoting,
11030 $quote_arg);
11031 my $len = length $self->{'replaced'};
11032 if ($len != $self->len()) {
11033 ::debug("length", $len, " != ", $self->len(),
11034 " ", $self->{'replaced'}, "\n");
11035 } else {
11036 ::debug("length", $len, " == ", $self->len(),
11037 " ", $self->{'replaced'}, "\n");
11040 return $self->{'replaced'};
11043 sub replace_placeholders($$$$) {
11044 # Replace foo{}bar with fooargbar
11045 # Input:
11046 # $targetref = command as shell words
11047 # $quote = should everything be quoted?
11048 # $quote_arg = should replaced arguments be quoted?
11049 # Uses:
11050 # @Arg::arg = arguments as strings to be use in {= =}
11051 # Returns:
11052 # @target with placeholders replaced
11053 my $self = shift;
11054 my $targetref = shift;
11055 my $quote = shift;
11056 my $quote_arg = shift;
11057 my %replace;
11059 # Token description:
11060 # \0spc = unquoted space
11061 # \0end = last token element
11062 # \0ign = dummy token to be ignored
11063 # \257<...\257> = replacement expression
11064 # " " = quoted space, that splits -X group
11065 # text = normal text - possibly part of -X group
11066 my $spacer = 0;
11067 my @tokens = grep { length $_ > 0 } map {
11068 if(/^\257<|^ $/) {
11069 # \257<...\257> or space
11071 } else {
11072 # Split each space/tab into a token
11073 split /(?=\s)|(?<=\s)/
11076 # Split \257< ... \257> into own token
11077 map { split /(?=\257<)|(?<=\257>)/ }
11078 # Insert "\0spc" between every element
11079 # This space should never be quoted
11080 map { $spacer++ ? ("\0spc",$_) : $_ }
11081 map { $_ eq "" ? "\0empty" : $_ }
11082 @$targetref;
11084 if(not @tokens) {
11085 # @tokens is empty: Return empty array
11086 return @tokens;
11088 ::debug("replace", "Tokens ".join":",@tokens,"\n");
11089 # Make it possible to use $arg[2] in {= =}
11090 *Arg::arg = $self->{'arg_list_flat_orig'};
11091 # Flat list:
11092 # $self->{'arg_list'} = [ [Arg11, Arg12], [Arg21, Arg22], [Arg31, Arg32] ]
11093 # $self->{'arg_list_flat'} = [ Arg11, Arg12, Arg21, Arg22, Arg31, Arg32 ]
11094 if(not @{$self->{'arg_list_flat'}}) {
11095 @{$self->{'arg_list_flat'}} = Arg->new("");
11097 my $argref = $self->{'arg_list_flat'};
11098 # Number of arguments - used for positional arguments
11099 my $n = $#$argref+1;
11101 # $self is actually a CommandLine-object,
11102 # but it looks nice to be able to say {= $job->slot() =}
11103 my $job = $self;
11104 # @replaced = tokens with \257< \257> replaced
11105 my @replaced;
11106 if($self->{'context_replace'}) {
11107 my @ctxgroup;
11108 for my $t (@tokens,"\0end") {
11109 # \0end = last token was end of tokens.
11110 if($t eq "\t" or $t eq " " or $t eq "\0end" or $t eq "\0spc") {
11111 # Context group complete: Replace in it
11112 if(grep { /^\257</ } @ctxgroup) {
11113 # Context group contains a replacement string:
11114 # Copy once per arg
11115 my $space = "\0ign";
11116 for my $arg (@$argref) {
11117 my $normal_replace;
11118 # Push output
11119 # Put unquoted space before each context group
11120 # except the first
11121 CORE::push @replaced, $space, map {
11122 $a = $_;
11123 if($a =~
11124 s{\257<(-?\d+)?(.*)\257>}
11126 if($1) {
11127 # Positional replace
11128 # Find the relevant arg and replace it
11129 ($argref->[$1 > 0 ? $1-1 : $n+$1] ? # If defined: replace
11130 $argref->[$1 > 0 ? $1-1 : $n+$1]->
11131 replace($2,$quote_arg,$self)
11132 : "");
11133 } else {
11134 # Normal replace
11135 $normal_replace ||= 1;
11136 ($arg ? $arg->replace($2,$quote_arg,$self) : "");
11138 }sgxe) {
11139 # Token is \257<..\257>
11140 } else {
11141 if($Global::escape_string_present) {
11142 # Command line contains \257:
11143 # Unescape it \257\256 => \257
11144 $a =~ s/\257\256/\257/g;
11148 } @ctxgroup;
11149 $normal_replace or last;
11150 $space = "\0spc";
11152 } else {
11153 # Context group has no a replacement string: Copy it once
11154 CORE::push @replaced, map {
11155 $Global::escape_string_present and s/\257\256/\257/g; $_;
11156 } @ctxgroup;
11158 # New context group
11159 @ctxgroup=();
11161 if($t eq "\0spc" or $t eq " ") {
11162 CORE::push @replaced,$t;
11163 } else {
11164 CORE::push @ctxgroup,$t;
11167 } else {
11168 # @group = @token
11169 # Replace in group
11170 # Push output
11171 # repquote = no if {} first on line, no if $quote, yes otherwise
11172 for my $t (@tokens) {
11173 if($t =~ /^\257</) {
11174 my $space = "\0ign";
11175 for my $arg (@$argref) {
11176 my $normal_replace;
11177 $a = $t;
11178 $a =~
11179 s{\257<(-?\d+)?(.*)\257>}
11181 if($1) {
11182 # Positional replace
11183 # Find the relevant arg and replace it
11184 ($argref->[$1 > 0 ? $1-1 : $n+$1] ?
11185 # If defined: replace
11186 $argref->[$1 > 0 ? $1-1 : $n+$1]->
11187 replace($2,$quote_arg,$self)
11188 : "");
11189 } else {
11190 # Normal replace
11191 $normal_replace ||= 1;
11192 ($arg ? $arg->replace($2,$quote_arg,$self) : "");
11194 }sgxe;
11195 CORE::push @replaced, $space, $a;
11196 $normal_replace or last;
11197 $space = "\0spc";
11199 } else {
11200 # No replacement
11201 CORE::push @replaced, map {
11202 $Global::escape_string_present and s/\257\256/\257/g; $_;
11203 } $t;
11207 *Arg::arg = [];
11208 ::debug("replace","Replaced: ".join":",@replaced,"\n");
11210 # Put tokens into groups that may be quoted.
11211 my @quotegroup;
11212 my @quoted;
11213 for (map { $_ eq "\0empty" ? "" : $_ }
11214 grep { $_ ne "\0ign" and $_ ne "\0noarg" and $_ ne "'\0noarg'" }
11215 @replaced, "\0end") {
11216 if($_ eq "\0spc" or $_ eq "\0end") {
11217 # \0spc splits quotable groups
11218 if($quote) {
11219 if(@quotegroup) {
11220 CORE::push @quoted, ::Q(join"",@quotegroup);;
11222 } else {
11223 CORE::push @quoted, join"",@quotegroup;
11225 @quotegroup = ();
11226 } else {
11227 CORE::push @quotegroup, $_;
11230 ::debug("replace","Quoted: ".join":",@quoted,"\n");
11231 return wantarray ? @quoted : "@quoted";
11234 sub skip($) {
11235 # Skip this job
11236 my $self = shift;
11237 $self->{'skip'} = 1;
11241 package CommandLineQueue;
11243 sub new($) {
11244 my $class = shift;
11245 my $commandref = shift;
11246 my $read_from = shift;
11247 my $context_replace = shift || 0;
11248 my $max_number_of_args = shift;
11249 my $transfer_files = shift;
11250 my $return_files = shift;
11251 my @unget = ();
11252 my $posrpl;
11253 my ($replacecount_ref, $len_ref);
11254 my @command = @$commandref;
11255 my $seq = 1;
11256 # Replace replacement strings with {= perl expr =}
11257 # '{=' 'perlexpr' '=}' => '{= perlexpr =}'
11258 @command = merge_rpl_parts(@command);
11260 # Protect matching inside {= perl expr =}
11261 # by replacing {= and =} with \257< and \257>
11262 # in options that can contain replacement strings:
11263 # @command, --transferfile, --return,
11264 # --tagstring, --workdir, --results
11265 for(@command, @$transfer_files, @$return_files,
11266 $opt::tagstring, $opt::workdir, $opt::results, $opt::retries) {
11267 # Skip if undefined
11268 $_ or next;
11269 # Escape \257 => \257\256
11270 $Global::escape_string_present += s/\257/\257\256/g;
11271 # Needs to match rightmost left parens (Perl defaults to leftmost)
11272 # to deal with: {={==} and {={==}=}
11273 # Replace {= -> \257< and =} -> \257>
11275 # Complex way to do:
11276 # s/{=(.*)=}/\257<$1\257>/g
11277 # which would not work
11278 s[\Q$Global::parensleft\E # Match {=
11279 # Match . unless the next string is {= or =}
11280 # needed to force matching the shortest {= =}
11281 ((?:(?! \Q$Global::parensleft\E|\Q$Global::parensright\E ).)*?)
11282 \Q$Global::parensright\E ] # Match =}
11283 {\257<$1\257>}gxs;
11284 for my $rpl (sort { length $b <=> length $a } keys %Global::rpl) {
11285 # Replace long --rpl's before short ones, as a short may be a
11286 # substring of a long:
11287 # --rpl '% s/a/b/' --rpl '%% s/b/a/'
11289 # Replace the shorthand string (--rpl)
11290 # with the {= perl expr =}
11292 # Avoid searching for shorthand strings inside existing {= perl expr =}
11294 # Replace $$1 in {= perl expr =} with groupings in shorthand string
11296 # --rpl '{/(\.\S+)/(\.\S+)} s/$$1/$$2/g;'
11297 # echo {/.tar/.gz} ::: UU.tar.gz
11298 my ($prefix,$grp_regexp,$postfix) =
11299 $rpl =~ /^( [^(]* ) # Prefix - e.g. {%%
11300 ( \(.*\) )? # Group capture regexp - e.g (.*)
11301 ( [^)]* )$ # Postfix - e.g }
11302 /xs;
11303 $grp_regexp ||= '';
11304 my $rplval = $Global::rpl{$rpl};
11305 while(s{( (?: ^|\257> ) (?: [^\257]*|[\257][^<>] )*? )
11306 # Don't replace after \257 unless \257>
11307 \Q$prefix\E $grp_regexp \Q$postfix\E}
11309 # The start remains the same
11310 my $unchanged = $1;
11311 # Dummy entry to start at 1.
11312 my @grp = (1);
11313 # $2 = first ()-group in $grp_regexp
11314 # Put $2 in $grp[1], Put $3 in $grp[2]
11315 # so first ()-group in $grp_regexp is $grp[1];
11316 for(my $i = 2; defined $grp[$#grp]; $i++) {
11317 push @grp, eval '$'.$i;
11319 my $rv = $rplval;
11320 # replace $$1 with $_pAr_gRp1, $$2 with $_pAr_gRp2
11321 # in the code to be executed
11322 $rv =~ s/\$\$ (\d+)/\$_pAr_gRp$1/gx;
11323 # prepend with $_pAr_gRp1 = perlquote($1),
11324 my $set_args = "";
11325 for(my $i = 1;defined $grp[$i]; $i++) {
11326 $set_args .= "\$_pAr_gRp$i = \"" .
11327 ::perl_quote_scalar($grp[$i]) . "\";";
11329 $unchanged . "\257<" . $set_args . $rv . "\257>"
11330 }gxes) {
11332 # Do the same for the positional replacement strings
11333 $posrpl = $rpl;
11334 if($posrpl =~ s/^\{//) {
11335 # Only do this if the shorthand start with {
11336 $prefix=~s/^\{//;
11337 # Don't replace after \257 unless \257>
11338 while(s{( (?: ^|\257> ) (?: [^\257]*|[\257][^<>] )*? )
11339 \{(-?\d+) \s* \Q$prefix\E $grp_regexp \Q$postfix\E}
11341 # The start remains the same
11342 my $unchanged = $1;
11343 my $position = $2;
11344 # Dummy entry to start at 1.
11345 my @grp = (1);
11346 # $3 = first ()-group in $grp_regexp
11347 # Put $3 in $grp[1], Put $4 in $grp[2]
11348 # so first ()-group in $grp_regexp is $grp[1];
11349 for(my $i = 3; defined $grp[$#grp]; $i++) {
11350 push @grp, eval '$'.$i;
11352 my $rv = $rplval;
11353 # replace $$1 with $_pAr_gRp1, $$2 with $_pAr_gRp2
11354 # in the code to be executed
11355 $rv =~ s/\$\$ (\d+)/\$_pAr_gRp$1/gx;
11356 # prepend with $_pAr_gRp1 = perlquote($1),
11357 my $set_args = "";
11358 for(my $i = 1;defined $grp[$i]; $i++) {
11359 $set_args .= "\$_pAr_gRp$i = \"" .
11360 ::perl_quote_scalar($grp[$i]) . "\";";
11362 $unchanged . "\257<" . $position . $set_args . $rv . "\257>"
11363 }gxes) {
11369 # Add {} if no replacement strings in @command
11370 ($replacecount_ref, $len_ref, @command) =
11371 replacement_counts_and_lengths($transfer_files,$return_files,@command);
11372 if("@command" =~ /^[^ \t\n=]*\257</) {
11373 # Replacement string is (part of) the command (and not just
11374 # argument or variable definition V1={})
11375 # E.g. parallel {}, parallel my_{= s/_//=}, parallel {2}
11376 # Do no quote (Otherwise it will fail if the input contains spaces)
11377 $Global::quote_replace = 0;
11380 if($opt::sqlmaster and $Global::sql->append()) {
11381 $seq = $Global::sql->max_seq() + 1;
11384 return bless {
11385 'unget' => \@unget,
11386 'command' => \@command,
11387 'replacecount' => $replacecount_ref,
11388 'arg_queue' => RecordQueue->new($read_from,$opt::colsep),
11389 'context_replace' => $context_replace,
11390 'len' => $len_ref,
11391 'max_number_of_args' => $max_number_of_args,
11392 'size' => undef,
11393 'transfer_files' => $transfer_files,
11394 'return_files' => $return_files,
11395 'seq' => $seq,
11396 }, ref($class) || $class;
11399 sub merge_rpl_parts($) {
11400 # '{=' 'perlexpr' '=}' => '{= perlexpr =}'
11401 # Input:
11402 # @in = the @command as given by the user
11403 # Uses:
11404 # $Global::parensleft
11405 # $Global::parensright
11406 # Returns:
11407 # @command with parts merged to keep {= and =} as one
11408 my @in = @_;
11409 my @out;
11410 my $l = quotemeta($Global::parensleft);
11411 my $r = quotemeta($Global::parensright);
11413 while(@in) {
11414 my $s = shift @in;
11415 $_ = $s;
11416 # Remove matching (right most) parens
11417 while(s/(.*)$l.*?$r/$1/os) {}
11418 if(/$l/o) {
11419 # Missing right parens
11420 while(@in) {
11421 $s .= " ".shift @in;
11422 $_ = $s;
11423 while(s/(.*)$l.*?$r/$1/os) {}
11424 if(not /$l/o) {
11425 last;
11429 push @out, $s;
11431 return @out;
11434 sub replacement_counts_and_lengths($$@) {
11435 # Count the number of different replacement strings.
11436 # Find the lengths of context for context groups and non-context
11437 # groups.
11438 # If no {} found in @command: add it to @command
11440 # Input:
11441 # \@transfer_files = array of filenames to transfer
11442 # \@return_files = array of filenames to return
11443 # @command = command template
11444 # Output:
11445 # \%replacecount, \%len, @command
11446 my $transfer_files = shift;
11447 my $return_files = shift;
11448 my @command = @_;
11449 my (%replacecount,%len);
11450 my $sum = 0;
11451 while($sum == 0) {
11452 # Count how many times each replacement string is used
11453 my @cmd = @command;
11454 my $contextlen = 0;
11455 my $noncontextlen = 0;
11456 my $contextgroups = 0;
11457 for my $c (@cmd) {
11458 while($c =~ s/ \257<( (?: [^\257]*|[\257][^<>] )*?)\257> /\000/xs) {
11459 # %replacecount = { "perlexpr" => number of times seen }
11460 # e.g { "s/a/b/" => 2 }
11461 $replacecount{$1}++;
11462 $sum++;
11464 # Measure the length of the context around the {= perl expr =}
11465 # Use that {=...=} has been replaced with \000 above
11466 # So there is no need to deal with \257<
11467 while($c =~ s/ (\S*\000\S*) //xs) {
11468 my $w = $1;
11469 $w =~ tr/\000//d; # Remove all \000's
11470 $contextlen += length($w);
11471 $contextgroups++;
11473 # All {= perl expr =} have been removed: The rest is non-context
11474 $noncontextlen += length $c;
11476 for(@$transfer_files, @$return_files,
11477 $opt::tagstring, $opt::workdir, $opt::results, $opt::retries) {
11478 # Options that can contain replacement strings
11479 $_ or next;
11480 my $t = $_;
11481 while($t =~ s/ \257<( (?: [^\257]*|[\257][^<>] )* )\257> //xs) {
11482 # %replacecount = { "perlexpr" => number of times seen }
11483 # e.g { "$_++" => 2 }
11484 # But for tagstring we just need to mark it as seen
11485 $replacecount{$1} ||= 1;
11488 if($opt::bar) {
11489 # If the command does not contain {} force it to be computed
11490 # as it is being used by --bar
11491 $replacecount{""} ||= 1;
11494 $len{'context'} = 0+$contextlen;
11495 $len{'noncontext'} = $noncontextlen;
11496 $len{'contextgroups'} = $contextgroups;
11497 $len{'noncontextgroups'} = @cmd-$contextgroups;
11498 ::debug("length", "@command Context: ", $len{'context'},
11499 " Non: ", $len{'noncontext'}, " Ctxgrp: ", $len{'contextgroups'},
11500 " NonCtxGrp: ", $len{'noncontextgroups'}, "\n");
11501 if($sum == 0) {
11502 if(not @command) {
11503 # Default command = {}
11504 @command = ("\257<\257>");
11505 } elsif(($opt::pipe or $opt::pipepart)
11506 and not $opt::fifo and not $opt::cat) {
11507 # With --pipe / --pipe-part you can have no replacement
11508 last;
11509 } else {
11510 # Append {} to the command if there are no {...}'s and no {=...=}
11511 push @command, ("\257<\257>");
11515 return(\%replacecount,\%len,@command);
11518 sub get($) {
11519 my $self = shift;
11520 if(@{$self->{'unget'}}) {
11521 my $cmd_line = shift @{$self->{'unget'}};
11522 return ($cmd_line);
11523 } else {
11524 if($opt::sqlworker) {
11525 # Get the sequence number from the SQL table
11526 $self->set_seq($SQL::next_seq);
11527 # Get the command from the SQL table
11528 $self->{'command'} = $SQL::command_ref;
11529 my @command;
11530 # Recompute replace counts based on the read command
11531 ($self->{'replacecount'},
11532 $self->{'len'}, @command) =
11533 replacement_counts_and_lengths($self->{'transfer_files'},
11534 $self->{'return_files'},
11535 @$SQL::command_ref);
11536 if("@command" =~ /^[^ \t\n=]*\257</) {
11537 # Replacement string is (part of) the command (and not just
11538 # argument or variable definition V1={})
11539 # E.g. parallel {}, parallel my_{= s/_//=}, parallel {2}
11540 # Do no quote (Otherwise it will fail if the input contains spaces)
11541 $Global::quote_replace = 0;
11545 my $cmd_line = CommandLine->new($self->seq(),
11546 $self->{'command'},
11547 $self->{'arg_queue'},
11548 $self->{'context_replace'},
11549 $self->{'max_number_of_args'},
11550 $self->{'transfer_files'},
11551 $self->{'return_files'},
11552 $self->{'replacecount'},
11553 $self->{'len'},
11555 $cmd_line->populate();
11556 ::debug("run","cmd_line->number_of_args ",
11557 $cmd_line->number_of_args(), "\n");
11558 if(not $Global::no_more_input and ($opt::pipe or $opt::pipepart)) {
11559 if($cmd_line->replaced() eq "") {
11560 # Empty command - pipe requires a command
11561 ::error("--pipe/--pipepart must have a command to pipe into ".
11562 "(e.g. 'cat').");
11563 ::wait_and_exit(255);
11565 } elsif($cmd_line->number_of_args() == 0) {
11566 # We did not get more args - maybe at EOF string?
11567 return undef;
11569 $self->set_seq($self->seq()+1);
11570 return $cmd_line;
11574 sub unget($) {
11575 my $self = shift;
11576 unshift @{$self->{'unget'}}, @_;
11579 sub empty($) {
11580 my $self = shift;
11581 my $empty = (not @{$self->{'unget'}}) &&
11582 $self->{'arg_queue'}->empty();
11583 ::debug("run", "CommandLineQueue->empty $empty");
11584 return $empty;
11587 sub seq($) {
11588 my $self = shift;
11589 return $self->{'seq'};
11592 sub set_seq($$) {
11593 my $self = shift;
11594 $self->{'seq'} = shift;
11597 sub quote_args($) {
11598 my $self = shift;
11599 # If there is not command emulate |bash
11600 return $self->{'command'};
11604 package Limits::Command;
11606 # Maximal command line length (for -m and -X)
11607 sub max_length($) {
11608 # Find the max_length of a command line and cache it
11609 # Returns:
11610 # number of chars on the longest command line allowed
11611 if(not $Limits::Command::line_max_len) {
11612 # Disk cache of max command line length
11613 my $len_cache = $Global::cache_dir . "/tmp/sshlogin/" . ::hostname() .
11614 "/linelen";
11615 my $cached_limit;
11616 if(-e $len_cache) {
11617 open(my $fh, "<", $len_cache) || ::die_bug("Cannot read $len_cache");
11618 $cached_limit = <$fh>;
11619 close $fh;
11621 if(not $cached_limit) {
11622 $cached_limit = real_max_length();
11623 # If $HOME is write protected: Do not fail
11624 my $dir = ::dirname($len_cache);
11625 -d $dir or eval { File::Path::mkpath($dir); };
11626 open(my $fh, ">", $len_cache.$$);
11627 print $fh $cached_limit;
11628 close $fh;
11629 rename $len_cache.$$, $len_cache || ::die_bug("rename cache file");
11631 $Limits::Command::line_max_len = tmux_length($cached_limit);
11632 if($opt::max_chars) {
11633 if($opt::max_chars <= $cached_limit) {
11634 $Limits::Command::line_max_len = $opt::max_chars;
11635 } else {
11636 ::warning("Value for -s option should be < $cached_limit.");
11640 return int($Limits::Command::line_max_len);
11643 sub real_max_length() {
11644 # Find the max_length of a command line
11645 # Returns:
11646 # The maximal command line length with 1 byte arguments
11647 # return find_max(" x");
11648 return find_max("x");
11651 sub find_max($) {
11652 my $string = shift;
11653 # This is slow on Cygwin, so give Cygwin users a warning
11654 if($^O eq "cygwin") {
11655 ::warning("Finding the maximal command line length. This may take up to 30 seconds.")
11657 # Use an upper bound of 100 MB if the shell allows for infinite long lengths
11658 my $upper = 100_000_000;
11659 # 1000 is supported everywhere, so the search can start anywhere 1..999
11660 # 324 makes the search much faster on Cygwin, so let us use that
11661 my $len = 324;
11662 do {
11663 if($len > $upper) { return $len };
11664 $len *= 16;
11665 } while (is_acceptable_command_line_length($len,$string));
11666 # Then search for the actual max length between 0 and upper bound
11667 return binary_find_max(int($len/16),$len,$string);
11670 # Prototype forwarding
11671 sub binary_find_max($$$);
11672 sub binary_find_max($$$) {
11673 # Given a lower and upper bound find the max (length or args) of a command line
11674 # Returns:
11675 # number of chars on the longest command line allowed
11676 my ($lower, $upper, $string) = (@_);
11677 if($lower == $upper or $lower == $upper-1) { return $lower; }
11678 my $middle = int (($upper-$lower)/2 + $lower);
11679 ::debug("init", "Maxlen: $lower<$middle<$upper: ");
11680 if (is_acceptable_command_line_length($middle,$string)) {
11681 return binary_find_max($middle,$upper,$string);
11682 } else {
11683 return binary_find_max($lower,$middle,$string);
11687 sub is_acceptable_command_line_length($$) {
11688 # Test if a command line of this length can run
11689 # in the current environment
11690 # If the string is " x" it tests how many args are allowed
11691 # Returns:
11692 # 0 if the command line length is too long
11693 # 1 otherwise
11694 my $len = shift;
11695 my $string = shift;
11696 if($Global::parallel_env) {
11697 $len += length $Global::parallel_env;
11699 # Force using non-built-in command
11700 ::qqx("/bin/echo ".${string}x(($len-length "/bin/echo ")/length $string));
11701 ::debug("init", "$len=$? ");
11702 return not $?;
11705 sub tmux_length($) {
11706 # If $opt::tmux set, find the limit for tmux
11707 # tmux 1.8 has a 2kB limit
11708 # tmux 1.9 has a 16kB limit
11709 # tmux 2.0 has a 16kB limit
11710 # tmux 2.1 has a 16kB limit
11711 # tmux 2.2 has a 16kB limit
11712 # Input:
11713 # $len = maximal command line length
11714 # Returns:
11715 # $tmux_len = maximal length runable in tmux
11716 local $/ = "\n";
11717 my $len = shift;
11718 if($opt::tmux) {
11719 $ENV{'PARALLEL_TMUX'} ||= "tmux";
11720 if(not ::which($ENV{'PARALLEL_TMUX'})) {
11721 ::error($ENV{'PARALLEL_TMUX'}." not found in \$PATH.");
11722 ::wait_and_exit(255);
11724 my @out;
11725 for my $l (1, 2020, 16320, 100000, $len) {
11726 my $tmpfile = ::tmpname("tms");
11727 my $tmuxcmd = $ENV{'PARALLEL_TMUX'}.
11728 " -S $tmpfile new-session -d -n echo $l".
11729 ("x"x$l). " && echo $l; rm -f $tmpfile";
11730 push @out, ::qqx($tmuxcmd);
11731 ::rm($tmpfile);
11733 ::debug("tmux","tmux-out ",@out);
11734 chomp @out;
11735 # The arguments is given 3 times on the command line
11736 # and the wrapping is around 30 chars
11737 # (29 for tmux1.9, 33 for tmux1.8)
11738 my $tmux_len = ::max(@out);
11739 $len = ::min($len,int($tmux_len/4-33));
11740 ::debug("tmux","tmux-length ",$len);
11742 return $len;
11746 package RecordQueue;
11748 sub new($) {
11749 my $class = shift;
11750 my $fhs = shift;
11751 my $colsep = shift;
11752 my @unget = ();
11753 my $arg_sub_queue;
11754 if($opt::sqlworker) {
11755 # Open SQL table
11756 $arg_sub_queue = SQLRecordQueue->new();
11757 } elsif(defined $colsep) {
11758 # Open one file with colsep or CSV
11759 $arg_sub_queue = RecordColQueue->new($fhs);
11760 } else {
11761 # Open one or more files if multiple -a
11762 $arg_sub_queue = MultifileQueue->new($fhs);
11764 return bless {
11765 'unget' => \@unget,
11766 'arg_number' => 0,
11767 'arg_sub_queue' => $arg_sub_queue,
11768 }, ref($class) || $class;
11771 sub get($) {
11772 # Returns:
11773 # reference to array of Arg-objects
11774 my $self = shift;
11775 if(@{$self->{'unget'}}) {
11776 $self->{'arg_number'}++;
11777 # Flush cached computed replacements in Arg-objects
11778 # To fix: parallel --bar echo {%} ::: a b c ::: d e f
11779 my $ret = shift @{$self->{'unget'}};
11780 if($ret) {
11781 map { $_->flush_cache() } @$ret;
11783 return $ret;
11785 my $ret = $self->{'arg_sub_queue'}->get();
11786 if($ret) {
11787 if(grep { index($_->orig(),"\0") > 0 } @$ret) {
11788 # Allow for \0 in position 0 because GNU Parallel uses "\0noarg"
11789 # to mean no-string
11790 ::warning("A NUL character in the input was replaced with \\0.",
11791 "NUL cannot be passed through in the argument list.",
11792 "Did you mean to use the --null option?");
11793 for(grep { index($_->orig(),"\0") > 0 } @$ret) {
11794 # Replace \0 with \\0
11795 my $a = $_->orig();
11796 $a =~ s/\0/\\0/g;
11797 $_->set_orig($a);
11800 if(defined $Global::max_number_of_args
11801 and $Global::max_number_of_args == 0) {
11802 ::debug("run", "Read 1 but return 0 args\n");
11803 # \0noarg => nothing (not the empty string)
11804 map { $_->set_orig("\0noarg"); } @$ret;
11806 # Flush cached computed replacements in Arg-objects
11807 # To fix: parallel --bar echo {%} ::: a b c ::: d e f
11808 map { $_->flush_cache() } @$ret;
11810 return $ret;
11813 sub unget($) {
11814 my $self = shift;
11815 ::debug("run", "RecordQueue-unget\n");
11816 $self->{'arg_number'} -= @_;
11817 unshift @{$self->{'unget'}}, @_;
11820 sub empty($) {
11821 my $self = shift;
11822 my $empty = (not @{$self->{'unget'}}) &&
11823 $self->{'arg_sub_queue'}->empty();
11824 ::debug("run", "RecordQueue->empty $empty");
11825 return $empty;
11828 sub arg_number($) {
11829 my $self = shift;
11830 return $self->{'arg_number'};
11834 package RecordColQueue;
11836 sub new($) {
11837 my $class = shift;
11838 my $fhs = shift;
11839 my @unget = ();
11840 my $arg_sub_queue = MultifileQueue->new($fhs);
11841 return bless {
11842 'unget' => \@unget,
11843 'arg_sub_queue' => $arg_sub_queue,
11844 }, ref($class) || $class;
11847 sub get($) {
11848 # Returns:
11849 # reference to array of Arg-objects
11850 my $self = shift;
11851 if(@{$self->{'unget'}}) {
11852 return shift @{$self->{'unget'}};
11854 my $unget_ref = $self->{'unget'};
11855 if($self->{'arg_sub_queue'}->empty()) {
11856 return undef;
11858 my $in_record = $self->{'arg_sub_queue'}->get();
11859 if(defined $in_record) {
11860 my @out_record = ();
11861 for my $arg (@$in_record) {
11862 ::debug("run", "RecordColQueue::arg $arg\n");
11863 my $line = $arg->orig();
11864 ::debug("run", "line='$line'\n");
11865 if($line ne "") {
11866 if($opt::csv) {
11867 # Parse CSV
11868 chomp $line;
11869 if(not $Global::csv->parse($line)) {
11870 die "CSV has unexpected format: ^$line^";
11872 for($Global::csv->fields()) {
11873 push @out_record, Arg->new($_);
11875 } else {
11876 for my $s (split /$opt::colsep/o, $line, -1) {
11877 push @out_record, Arg->new($s);
11880 } else {
11881 push @out_record, Arg->new("");
11884 return \@out_record;
11885 } else {
11886 return undef;
11890 sub unget($) {
11891 my $self = shift;
11892 ::debug("run", "RecordColQueue-unget '@_'\n");
11893 unshift @{$self->{'unget'}}, @_;
11896 sub empty($) {
11897 my $self = shift;
11898 my $empty = (not @{$self->{'unget'}}) &&
11899 $self->{'arg_sub_queue'}->empty();
11900 ::debug("run", "RecordColQueue->empty $empty");
11901 return $empty;
11905 package SQLRecordQueue;
11907 sub new($) {
11908 my $class = shift;
11909 my @unget = ();
11910 return bless {
11911 'unget' => \@unget,
11912 }, ref($class) || $class;
11915 sub get($) {
11916 # Returns:
11917 # reference to array of Arg-objects
11918 my $self = shift;
11919 if(@{$self->{'unget'}}) {
11920 return shift @{$self->{'unget'}};
11922 return $Global::sql->get_record();
11925 sub unget($) {
11926 my $self = shift;
11927 ::debug("run", "SQLRecordQueue-unget '@_'\n");
11928 unshift @{$self->{'unget'}}, @_;
11931 sub empty($) {
11932 my $self = shift;
11933 if(@{$self->{'unget'}}) { return 0; }
11934 my $get = $self->get();
11935 if(defined $get) {
11936 $self->unget($get);
11938 my $empty = not $get;
11939 ::debug("run", "SQLRecordQueue->empty $empty");
11940 return $empty;
11944 package MultifileQueue;
11946 @Global::unget_argv=();
11948 sub new($$) {
11949 my $class = shift;
11950 my $fhs = shift;
11951 for my $fh (@$fhs) {
11952 if(-t $fh and -t ($Global::status_fd || *STDERR)) {
11953 ::warning("Input is read from the terminal. You are either an expert",
11954 "(in which case: YOU ARE AWESOME!) or maybe you forgot",
11955 "::: or :::: or -a or to pipe data into parallel. If so",
11956 "consider going through the tutorial: man parallel_tutorial",
11957 "Press CTRL-D to exit.");
11960 return bless {
11961 'unget' => \@Global::unget_argv,
11962 'fhs' => $fhs,
11963 'arg_matrix' => undef,
11964 }, ref($class) || $class;
11967 sub get($) {
11968 my $self = shift;
11969 if($opt::link) {
11970 return $self->link_get();
11971 } else {
11972 return $self->nest_get();
11976 sub unget($) {
11977 my $self = shift;
11978 ::debug("run", "MultifileQueue-unget '@_'\n");
11979 unshift @{$self->{'unget'}}, @_;
11982 sub empty($) {
11983 my $self = shift;
11984 my $empty = (not @Global::unget_argv) &&
11985 not @{$self->{'unget'}};
11986 for my $fh (@{$self->{'fhs'}}) {
11987 $empty &&= eof($fh);
11989 ::debug("run", "MultifileQueue->empty $empty ");
11990 return $empty;
11993 sub link_get($) {
11994 my $self = shift;
11995 if(@{$self->{'unget'}}) {
11996 return shift @{$self->{'unget'}};
11998 my @record = ();
11999 my $prepend;
12000 my $empty = 1;
12001 for my $fh (@{$self->{'fhs'}}) {
12002 my $arg = read_arg_from_fh($fh);
12003 if(defined $arg) {
12004 # Record $arg for recycling at end of file
12005 push @{$self->{'arg_matrix'}{$fh}}, $arg;
12006 push @record, $arg;
12007 $empty = 0;
12008 } else {
12009 ::debug("run", "EOA ");
12010 # End of file: Recycle arguments
12011 push @{$self->{'arg_matrix'}{$fh}}, shift @{$self->{'arg_matrix'}{$fh}};
12012 # return last @{$args->{'args'}{$fh}};
12013 push @record, @{$self->{'arg_matrix'}{$fh}}[-1];
12016 if($empty) {
12017 return undef;
12018 } else {
12019 return \@record;
12023 sub nest_get($) {
12024 my $self = shift;
12025 if(@{$self->{'unget'}}) {
12026 return shift @{$self->{'unget'}};
12028 my @record = ();
12029 my $prepend;
12030 my $empty = 1;
12031 my $no_of_inputsources = $#{$self->{'fhs'}} + 1;
12032 if(not $self->{'arg_matrix'}) {
12033 # Initialize @arg_matrix with one arg from each file
12034 # read one line from each file
12035 my @first_arg_set;
12036 my $all_empty = 1;
12037 for (my $fhno = 0; $fhno < $no_of_inputsources ; $fhno++) {
12038 my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]);
12039 if(defined $arg) {
12040 $all_empty = 0;
12042 $self->{'arg_matrix'}[$fhno][0] = $arg || Arg->new("");
12043 push @first_arg_set, $self->{'arg_matrix'}[$fhno][0];
12045 if($all_empty) {
12046 # All filehandles were at eof or eof-string
12047 return undef;
12049 return [@first_arg_set];
12052 # Treat the case with one input source special. For multiple
12053 # input sources we need to remember all previously read values to
12054 # generate all combinations. But for one input source we can
12055 # forget the value after first use.
12056 if($no_of_inputsources == 1) {
12057 my $arg = read_arg_from_fh($self->{'fhs'}[0]);
12058 if(defined($arg)) {
12059 return [$arg];
12061 return undef;
12063 for (my $fhno = $no_of_inputsources - 1; $fhno >= 0; $fhno--) {
12064 if(eof($self->{'fhs'}[$fhno])) {
12065 next;
12066 } else {
12067 # read one
12068 my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]);
12069 defined($arg) || next; # If we just read an EOF string: Treat this as EOF
12070 my $len = $#{$self->{'arg_matrix'}[$fhno]} + 1;
12071 $self->{'arg_matrix'}[$fhno][$len] = $arg;
12072 # make all new combinations
12073 my @combarg = ();
12074 for (my $fhn = 0; $fhn < $no_of_inputsources; $fhn++) {
12075 push(@combarg, [0, $#{$self->{'arg_matrix'}[$fhn]}],
12076 # Is input source --link'ed to the next?
12077 $opt::linkinputsource[$fhn+1]);
12079 # Find only combinations with this new entry
12080 $combarg[2*$fhno] = [$len,$len];
12081 # map combinations
12082 # [ 1, 3, 7 ], [ 2, 4, 1 ]
12083 # =>
12084 # [ m[0][1], m[1][3], m[2][7] ], [ m[0][2], m[1][4], m[2][1] ]
12085 my @mapped;
12086 for my $c (expand_combinations(@combarg)) {
12087 my @a;
12088 for my $n (0 .. $no_of_inputsources - 1 ) {
12089 push @a, $self->{'arg_matrix'}[$n][$$c[$n]];
12091 push @mapped, \@a;
12093 # append the mapped to the ungotten arguments
12094 push @{$self->{'unget'}}, @mapped;
12095 # get the first
12096 if(@mapped) {
12097 return shift @{$self->{'unget'}};
12101 # all are eof or at EOF string; return from the unget queue
12102 return shift @{$self->{'unget'}};
12105 sub read_arg_from_fh($) {
12106 # Read one Arg from filehandle
12107 # Returns:
12108 # Arg-object with one read line
12109 # undef if end of file
12110 my $fh = shift;
12111 my $prepend;
12112 my $arg;
12113 my $half_record = 0;
12114 do {{
12115 # This makes 10% faster
12116 if(not defined ($arg = <$fh>)) {
12117 if(defined $prepend) {
12118 return Arg->new($prepend);
12119 } else {
12120 return undef;
12123 if($opt::csv) {
12124 # We need to read a full CSV line.
12125 if(($arg =~ y/"/"/) % 2 ) {
12126 # The number of " on the line is uneven:
12127 # If we were in a half_record => we have a full record now
12128 # If we were ouside a half_record => we are in a half record now
12129 $half_record = not $half_record;
12131 if($half_record) {
12132 # CSV half-record with quoting:
12133 # col1,"col2 2""x3"" board newline <-this one
12134 # cont",col3
12135 $prepend .= $arg;
12136 redo;
12137 } else {
12138 # Now we have a full CSV record
12141 # Remove delimiter
12142 chomp $arg;
12143 if($Global::end_of_file_string and
12144 $arg eq $Global::end_of_file_string) {
12145 # Ignore the rest of input file
12146 close $fh;
12147 ::debug("run", "EOF-string ($arg) met\n");
12148 if(defined $prepend) {
12149 return Arg->new($prepend);
12150 } else {
12151 return undef;
12154 if(defined $prepend) {
12155 $arg = $prepend.$arg; # For line continuation
12156 undef $prepend;
12158 if($Global::ignore_empty) {
12159 if($arg =~ /^\s*$/) {
12160 redo; # Try the next line
12163 if($Global::max_lines) {
12164 if($arg =~ /\s$/) {
12165 # Trailing space => continued on next line
12166 $prepend = $arg;
12167 redo;
12170 }} while (1 == 0); # Dummy loop {{}} for redo
12171 if(defined $arg) {
12172 return Arg->new($arg);
12173 } else {
12174 ::die_bug("multiread arg undefined");
12178 # Prototype forwarding
12179 sub expand_combinations(@);
12180 sub expand_combinations(@) {
12181 # Input:
12182 # ([xmin,xmax], [ymin,ymax], ...)
12183 # Returns: ([x,y,...],[x,y,...])
12184 # where xmin <= x <= xmax and ymin <= y <= ymax
12185 my $minmax_ref = shift;
12186 my $link = shift; # This is linked to the next input source
12187 my $xmin = $$minmax_ref[0];
12188 my $xmax = $$minmax_ref[1];
12189 my @p;
12190 if(@_) {
12191 my @rest = expand_combinations(@_);
12192 if($link) {
12193 # Linked to next col with --link/:::+/::::+
12194 # TODO BUG does not wrap values if not same number of vals
12195 push(@p, map { [$$_[0], @$_] }
12196 grep { $xmin <= $$_[0] and $$_[0] <= $xmax } @rest);
12197 } else {
12198 # If there are more columns: Compute those recursively
12199 for(my $x = $xmin; $x <= $xmax; $x++) {
12200 push @p, map { [$x, @$_] } @rest;
12203 } else {
12204 for(my $x = $xmin; $x <= $xmax; $x++) {
12205 push @p, [$x];
12208 return @p;
12212 package Arg;
12214 sub new($) {
12215 my $class = shift;
12216 my $orig = shift;
12217 my @hostgroups;
12218 if($opt::hostgroups) {
12219 if($orig =~ s:@(.+)::) {
12220 # We found hostgroups on the arg
12221 @hostgroups = split(/\+/, $1);
12222 if(not grep { defined $Global::hostgroups{$_} } @hostgroups) {
12223 # This hostgroup is not defined using -S
12224 # Add it
12225 ::warning("Adding hostgroups: @hostgroups");
12226 # Add sshlogin
12227 for(grep { not defined $Global::hostgroups{$_} } @hostgroups) {
12228 my $sshlogin = SSHLogin->new($_);
12229 my $sshlogin_string = $sshlogin->string();
12230 $Global::host{$sshlogin_string} = $sshlogin;
12231 $Global::hostgroups{$sshlogin_string} = 1;
12234 } else {
12235 # No hostgroup on the arg => any hostgroup
12236 @hostgroups = (keys %Global::hostgroups);
12239 return bless {
12240 'orig' => $orig,
12241 'hostgroups' => \@hostgroups,
12242 }, ref($class) || $class;
12245 sub Q($) {
12246 # Q alias for ::shell_quote_scalar
12247 my $ret = ::Q($_[0]);
12248 no warnings 'redefine';
12249 *Q = \&::Q;
12250 return $ret;
12253 sub pQ($) {
12254 # pQ alias for ::perl_quote_scalar
12255 my $ret = ::pQ($_[0]);
12256 no warnings 'redefine';
12257 *pQ = \&::pQ;
12258 return $ret;
12261 sub total_jobs() {
12262 return $Global::JobQueue->total_jobs();
12266 my %perleval;
12267 my $job;
12268 sub skip() {
12269 # shorthand for $job->skip();
12270 $job->skip();
12272 sub slot() {
12273 # shorthand for $job->slot();
12274 $job->slot();
12276 sub seq() {
12277 # shorthand for $job->seq();
12278 $job->seq();
12280 sub uq() {
12281 # Do not quote this arg
12282 $Global::unquote_arg = 1;
12285 sub replace($$$$) {
12286 # Calculates the corresponding value for a given perl expression
12287 # Returns:
12288 # The calculated string (quoted if asked for)
12289 my $self = shift;
12290 my $perlexpr = shift; # E.g. $_=$_ or s/.gz//
12291 my $quote = shift; # should the string be quoted?
12292 # This is actually a CommandLine-object,
12293 # but it looks nice to be able to say {= $job->slot() =}
12294 $job = shift;
12295 $perlexpr =~ s/^(-?\d+)? *//; # Positional replace treated as normal replace
12296 if(not $Global::cache_replacement_eval
12298 not $self->{'cache'}{$perlexpr}) {
12299 # Only compute the value once
12300 # Use $_ as the variable to change
12301 local $_;
12302 if($Global::trim eq "n") {
12303 $_ = $self->{'orig'};
12304 } else {
12305 # Trim the input
12306 $_ = trim_of($self->{'orig'});
12308 ::debug("replace", "eval ", $perlexpr, " ", $_, "\n");
12309 if(not $perleval{$perlexpr}) {
12310 # Make an anonymous function of the $perlexpr
12311 # And more importantly: Compile it only once
12312 if($perleval{$perlexpr} =
12313 eval('sub { no strict; no warnings; my $job = shift; '.
12314 $perlexpr.' }')) {
12315 # All is good
12316 } else {
12317 # The eval failed. Maybe $perlexpr is invalid perl?
12318 ::error("Cannot use $perlexpr: $@");
12319 ::wait_and_exit(255);
12322 # Execute the function
12323 $perleval{$perlexpr}->($job);
12324 $self->{'cache'}{$perlexpr} = $_;
12325 if($Global::unquote_arg) {
12326 # uq() was called in perlexpr
12327 $self->{'cache'}{'unquote'}{$perlexpr} = 1;
12328 # Reset for next perlexpr
12329 $Global::unquote_arg = 0;
12332 # Return the value quoted if needed
12333 if($self->{'cache'}{'unquote'}{$perlexpr}) {
12334 return($self->{'cache'}{$perlexpr});
12335 } else {
12336 return($quote ? Q($self->{'cache'}{$perlexpr})
12337 : $self->{'cache'}{$perlexpr});
12342 sub flush_cache($) {
12343 # Flush cache of computed values
12344 my $self = shift;
12345 $self->{'cache'} = undef;
12348 sub orig($) {
12349 my $self = shift;
12350 return $self->{'orig'};
12353 sub set_orig($$) {
12354 my $self = shift;
12355 $self->{'orig'} = shift;
12358 sub trim_of($) {
12359 # Removes white space as specifed by --trim:
12360 # n = nothing
12361 # l = start
12362 # r = end
12363 # lr|rl = both
12364 # Returns:
12365 # string with white space removed as needed
12366 my @strings = map { defined $_ ? $_ : "" } (@_);
12367 my $arg;
12368 if($Global::trim eq "n") {
12369 # skip
12370 } elsif($Global::trim eq "l") {
12371 for my $arg (@strings) { $arg =~ s/^\s+//; }
12372 } elsif($Global::trim eq "r") {
12373 for my $arg (@strings) { $arg =~ s/\s+$//; }
12374 } elsif($Global::trim eq "rl" or $Global::trim eq "lr") {
12375 for my $arg (@strings) { $arg =~ s/^\s+//; $arg =~ s/\s+$//; }
12376 } else {
12377 ::error("--trim must be one of: r l rl lr.");
12378 ::wait_and_exit(255);
12380 return wantarray ? @strings : "@strings";
12384 package TimeoutQueue;
12386 sub new($) {
12387 my $class = shift;
12388 my $delta_time = shift;
12389 my ($pct);
12390 if($delta_time =~ /(\d+(\.\d+)?)%/) {
12391 # Timeout in percent
12392 $pct = $1/100;
12393 $delta_time = 1_000_000;
12395 $delta_time = ::multiply_time_units($delta_time);
12397 return bless {
12398 'queue' => [],
12399 'delta_time' => $delta_time,
12400 'pct' => $pct,
12401 'remedian_idx' => 0,
12402 'remedian_arr' => [],
12403 'remedian' => undef,
12404 }, ref($class) || $class;
12407 sub delta_time($) {
12408 my $self = shift;
12409 return $self->{'delta_time'};
12412 sub set_delta_time($$) {
12413 my $self = shift;
12414 $self->{'delta_time'} = shift;
12417 sub remedian($) {
12418 my $self = shift;
12419 return $self->{'remedian'};
12422 sub set_remedian($$) {
12423 # Set median of the last 999^3 (=997002999) values using Remedian
12425 # Rousseeuw, Peter J., and Gilbert W. Bassett Jr. "The remedian: A
12426 # robust averaging method for large data sets." Journal of the
12427 # American Statistical Association 85.409 (1990): 97-104.
12428 my $self = shift;
12429 my $val = shift;
12430 my $i = $self->{'remedian_idx'}++;
12431 my $rref = $self->{'remedian_arr'};
12432 $rref->[0][$i%999] = $val;
12433 $rref->[1][$i/999%999] = (sort @{$rref->[0]})[$#{$rref->[0]}/2];
12434 $rref->[2][$i/999/999%999] = (sort @{$rref->[1]})[$#{$rref->[1]}/2];
12435 $self->{'remedian'} = (sort @{$rref->[2]})[$#{$rref->[2]}/2];
12438 sub update_median_runtime($) {
12439 # Update delta_time based on runtime of finished job if timeout is
12440 # a percentage
12441 my $self = shift;
12442 my $runtime = shift;
12443 if($self->{'pct'}) {
12444 $self->set_remedian($runtime);
12445 $self->{'delta_time'} = $self->{'pct'} * $self->remedian();
12446 ::debug("run", "Timeout: $self->{'delta_time'}s ");
12450 sub process_timeouts($) {
12451 # Check if there was a timeout
12452 my $self = shift;
12453 # $self->{'queue'} is sorted by start time
12454 while (@{$self->{'queue'}}) {
12455 my $job = $self->{'queue'}[0];
12456 if($job->endtime()) {
12457 # Job already finished. No need to timeout the job
12458 # This could be because of --keep-order
12459 shift @{$self->{'queue'}};
12460 } elsif($job->is_timedout($self->{'delta_time'})) {
12461 # Need to shift off queue before kill
12462 # because kill calls usleep that calls process_timeouts
12463 shift @{$self->{'queue'}};
12464 ::warning("This job was killed because it timed out:",
12465 $job->replaced());
12466 $job->kill();
12467 } else {
12468 # Because they are sorted by start time the rest are later
12469 last;
12474 sub insert($) {
12475 my $self = shift;
12476 my $in = shift;
12477 push @{$self->{'queue'}}, $in;
12481 package SQL;
12483 sub new($) {
12484 my $class = shift;
12485 my $dburl = shift;
12486 $Global::use{"DBI"} ||= eval "use DBI; 1;";
12487 # +DBURL = append to this DBURL
12488 my $append = $dburl=~s/^\+//;
12489 my %options = parse_dburl(get_alias($dburl));
12490 my %driveralias = ("sqlite" => "SQLite",
12491 "sqlite3" => "SQLite",
12492 "pg" => "Pg",
12493 "postgres" => "Pg",
12494 "postgresql" => "Pg",
12495 "csv" => "CSV",
12496 "oracle" => "Oracle",
12497 "ora" => "Oracle");
12498 my $driver = $driveralias{$options{'databasedriver'}} ||
12499 $options{'databasedriver'};
12500 my $database = $options{'database'};
12501 my $host = $options{'host'} ? ";host=".$options{'host'} : "";
12502 my $port = $options{'port'} ? ";port=".$options{'port'} : "";
12503 my $dsn = "DBI:$driver:dbname=$database$host$port";
12504 my $userid = $options{'user'};
12505 my $password = $options{'password'};;
12506 if(not grep /$driver/, DBI->available_drivers) {
12507 ::error("$driver not supported. Are you missing a perl DBD::$driver module?");
12508 ::wait_and_exit(255);
12510 my $dbh;
12511 if($driver eq "CSV") {
12512 # CSV does not use normal dsn
12513 if(-d $database) {
12514 $dbh = DBI->connect("dbi:CSV:", "", "", { f_dir => "$database", })
12515 or die $DBI::errstr;
12516 } else {
12517 ::error("$database is not a directory.");
12518 ::wait_and_exit(255);
12520 } else {
12521 $dbh = DBI->connect($dsn, $userid, $password,
12522 { RaiseError => 1, AutoInactiveDestroy => 1 })
12523 or die $DBI::errstr;
12525 $dbh->{'PrintWarn'} = $Global::debug || 0;
12526 $dbh->{'PrintError'} = $Global::debug || 0;
12527 $dbh->{'RaiseError'} = 1;
12528 $dbh->{'ShowErrorStatement'} = 1;
12529 $dbh->{'HandleError'} = sub {};
12530 if(not defined $options{'table'}) {
12531 ::error("The DBURL ($dburl) must contain a table.");
12532 ::wait_and_exit(255);
12535 return bless {
12536 'dbh' => $dbh,
12537 'driver' => $driver,
12538 'max_number_of_args' => undef,
12539 'table' => $options{'table'},
12540 'append' => $append,
12541 }, ref($class) || $class;
12544 # Prototype forwarding
12545 sub get_alias($);
12546 sub get_alias($) {
12547 my $alias = shift;
12548 $alias =~ s/^(sql:)*//; # Accept aliases prepended with sql:
12549 if ($alias !~ /^:/) {
12550 return $alias;
12553 # Find the alias
12554 my $path;
12555 if (-l $0) {
12556 ($path) = readlink($0) =~ m|^(.*)/|;
12557 } else {
12558 ($path) = $0 =~ m|^(.*)/|;
12561 my @deprecated = ("$ENV{HOME}/.dburl.aliases",
12562 "$path/dburl.aliases", "$path/dburl.aliases.dist");
12563 for (@deprecated) {
12564 if(-r $_) {
12565 ::warning("$_ is deprecated. ".
12566 "Use .sql/aliases instead (read man sql).");
12569 my @urlalias=();
12570 check_permissions("$ENV{HOME}/.sql/aliases");
12571 check_permissions("$ENV{HOME}/.dburl.aliases");
12572 my @search = ("$ENV{HOME}/.sql/aliases",
12573 "$ENV{HOME}/.dburl.aliases", "/etc/sql/aliases",
12574 "$path/dburl.aliases", "$path/dburl.aliases.dist");
12575 for my $alias_file (@search) {
12576 # local $/ needed if -0 set
12577 local $/ = "\n";
12578 if(-r $alias_file) {
12579 open(my $in, "<", $alias_file) || die;
12580 push @urlalias, <$in>;
12581 close $in;
12584 my ($alias_part,$rest) = $alias=~/(:\w*)(.*)/;
12585 # If we saw this before: we have an alias loop
12586 if(grep {$_ eq $alias_part } @Private::seen_aliases) {
12587 ::error("$alias_part is a cyclic alias.");
12588 exit -1;
12589 } else {
12590 push @Private::seen_aliases, $alias_part;
12593 my $dburl;
12594 for (@urlalias) {
12595 /^$alias_part\s+(\S+.*)/ and do { $dburl = $1; last; }
12598 if($dburl) {
12599 return get_alias($dburl.$rest);
12600 } else {
12601 ::error("$alias is not defined in @search");
12602 exit(-1);
12606 sub check_permissions($) {
12607 my $file = shift;
12609 if(-e $file) {
12610 if(not -o $file) {
12611 my $username = (getpwuid($<))[0];
12612 ::warning("$file should be owned by $username: ".
12613 "chown $username $file");
12615 my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
12616 $atime,$mtime,$ctime,$blksize,$blocks) = stat($file);
12617 if($mode & 077) {
12618 my $username = (getpwuid($<))[0];
12619 ::warning("$file should be only be readable by $username: ".
12620 "chmod 600 $file");
12625 sub parse_dburl($) {
12626 my $url = shift;
12627 my %options = ();
12628 # sql:mysql://[[user][:password]@][host][:port]/[database[/table][?query]]
12630 if($url=~m!^(?:sql:)? # You can prefix with 'sql:'
12631 ((?:oracle|ora|mysql|pg|postgres|postgresql)(?:s|ssl|)|
12632 (?:sqlite|sqlite2|sqlite3|csv)):// # Databasedriver ($1)
12634 ([^:@/][^:@]*|) # Username ($2)
12636 :([^@]*) # Password ($3)
12639 ([^:/]*)? # Hostname ($4)
12642 ([^/]*)? # Port ($5)
12646 ([^/?]*)? # Database ($6)
12650 ([^?]*)? # Table ($7)
12654 (.*)? # Query ($8)
12656 $!ix) {
12657 $options{databasedriver} = ::undef_if_empty(lc(uri_unescape($1)));
12658 $options{user} = ::undef_if_empty(uri_unescape($2));
12659 $options{password} = ::undef_if_empty(uri_unescape($3));
12660 $options{host} = ::undef_if_empty(uri_unescape($4));
12661 $options{port} = ::undef_if_empty(uri_unescape($5));
12662 $options{database} = ::undef_if_empty(uri_unescape($6));
12663 $options{table} = ::undef_if_empty(uri_unescape($7));
12664 $options{query} = ::undef_if_empty(uri_unescape($8));
12665 ::debug("sql", "dburl $url\n");
12666 ::debug("sql", "databasedriver ", $options{databasedriver},
12667 " user ", $options{user},
12668 " password ", $options{password}, " host ", $options{host},
12669 " port ", $options{port}, " database ", $options{database},
12670 " table ", $options{table}, " query ", $options{query}, "\n");
12671 } else {
12672 ::error("$url is not a valid DBURL");
12673 exit 255;
12675 return %options;
12678 sub uri_unescape($) {
12679 # Copied from http://cpansearch.perl.org/src/GAAS/URI-1.55/URI/Escape.pm
12680 # to avoid depending on URI::Escape
12681 # This section is (C) Gisle Aas.
12682 # Note from RFC1630: "Sequences which start with a percent sign
12683 # but are not followed by two hexadecimal characters are reserved
12684 # for future extension"
12685 my $str = shift;
12686 if (@_ && wantarray) {
12687 # not executed for the common case of a single argument
12688 my @str = ($str, @_); # need to copy
12689 foreach (@str) {
12690 s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
12692 return @str;
12694 $str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str;
12695 $str;
12698 sub run($) {
12699 my $self = shift;
12700 my $stmt = shift;
12701 if($self->{'driver'} eq "CSV") {
12702 $stmt=~ s/;$//;
12703 if($stmt eq "BEGIN" or
12704 $stmt eq "COMMIT") {
12705 return undef;
12708 my @retval;
12709 my $dbh = $self->{'dbh'};
12710 ::debug("sql","$opt::sqlmaster$opt::sqlworker run $stmt\n");
12711 # Execute with the rest of the args - if any
12712 my $rv;
12713 my $sth;
12714 my $lockretry = 0;
12715 while($lockretry < 10) {
12716 $sth = $dbh->prepare($stmt);
12717 if($sth
12719 eval { $rv = $sth->execute(@_) }) {
12720 last;
12721 } else {
12722 if($@ =~ /no such table|Table .* doesn.t exist|relation ".*" does not exist/
12724 $DBI::errstr =~ /no such table|Table .* doesn.t exist|relation ".*" does not exist/) {
12725 # This is fine:
12726 # It is just a worker that reported back too late -
12727 # another worker had finished the job first
12728 # and the table was then dropped
12729 $rv = $sth = 0;
12730 last;
12732 if($DBI::errstr =~ /locked/) {
12733 ::debug("sql", "Lock retry: $lockretry");
12734 $lockretry++;
12735 ::usleep(rand()*300);
12736 } elsif(not $sth) {
12737 # Try again
12738 $lockretry++;
12739 } else {
12740 ::error($DBI::errstr);
12741 ::wait_and_exit(255);
12745 if($lockretry >= 10) {
12746 ::die_bug("retry > 10: $DBI::errstr");
12748 if($rv < 0 and $DBI::errstr){
12749 ::error($DBI::errstr);
12750 ::wait_and_exit(255);
12752 return $sth;
12755 sub get($) {
12756 my $self = shift;
12757 my $sth = $self->run(@_);
12758 my @retval;
12759 # If $sth = 0 it means the table was dropped by another process
12760 while($sth) {
12761 my @row = $sth->fetchrow_array();
12762 @row or last;
12763 push @retval, \@row;
12765 return \@retval;
12768 sub table($) {
12769 my $self = shift;
12770 return $self->{'table'};
12773 sub append($) {
12774 my $self = shift;
12775 return $self->{'append'};
12778 sub update($) {
12779 my $self = shift;
12780 my $stmt = shift;
12781 my $table = $self->table();
12782 $self->run("UPDATE $table $stmt",@_);
12785 sub output($) {
12786 my $self = shift;
12787 my $commandline = shift;
12789 $self->update("SET Stdout = ?, Stderr = ? WHERE Seq = ".
12790 $commandline->seq(),
12791 join("",@{$commandline->{'output'}{1}}),
12792 join("",@{$commandline->{'output'}{2}}));
12795 sub max_number_of_args($) {
12796 # Maximal number of args for this table
12797 my $self = shift;
12798 if(not $self->{'max_number_of_args'}) {
12799 # Read the number of args from the SQL table
12800 my $table = $self->table();
12801 my $v = $self->get("SELECT * FROM $table LIMIT 1;");
12802 my @reserved_columns = qw(Seq Host Starttime JobRuntime Send
12803 Receive Exitval _Signal Command Stdout Stderr);
12804 if(not $v) {
12805 ::error("$table contains no records");
12807 # Count the number of Vx columns
12808 $self->{'max_number_of_args'} = $#{$v->[0]} - $#reserved_columns;
12810 return $self->{'max_number_of_args'};
12813 sub set_max_number_of_args($$) {
12814 my $self = shift;
12815 $self->{'max_number_of_args'} = shift;
12818 sub create_table($) {
12819 my $self = shift;
12820 if($self->append()) { return; }
12821 my $max_number_of_args = shift;
12822 $self->set_max_number_of_args($max_number_of_args);
12823 my $table = $self->table();
12824 $self->run(qq(DROP TABLE IF EXISTS $table;));
12825 # BIGINT and TEXT are not supported in these databases or are too small
12826 my %vartype = (
12827 "Oracle" => { "BIGINT" => "NUMBER(19,0)",
12828 "TEXT" => "CLOB", },
12829 "mysql" => { "TEXT" => "BLOB", },
12830 "CSV" => { "BIGINT" => "INT",
12831 "FLOAT" => "REAL", },
12833 my $BIGINT = $vartype{$self->{'driver'}}{"BIGINT"} || "BIGINT";
12834 my $TEXT = $vartype{$self->{'driver'}}{"TEXT"} || "TEXT";
12835 my $FLOAT = $vartype{$self->{'driver'}}{"FLOAT"} || "FLOAT(44)";
12836 my $v_def = join "", map { "V$_ $TEXT," } (1..$self->max_number_of_args());
12837 $self->run(qq{CREATE TABLE $table
12838 (Seq $BIGINT,
12839 Host $TEXT,
12840 Starttime $FLOAT,
12841 JobRuntime $FLOAT,
12842 Send $BIGINT,
12843 Receive $BIGINT,
12844 Exitval $BIGINT,
12845 _Signal $BIGINT,
12846 Command $TEXT,}.
12847 $v_def.
12848 qq{Stdout $TEXT,
12849 Stderr $TEXT);});
12852 sub insert_records($) {
12853 my $self = shift;
12854 my $seq = shift;
12855 my $command_ref = shift;
12856 my $record_ref = shift;
12857 my $table = $self->table();
12858 # For SQL encode the command with \257 space as split points
12859 my $command = join("\257 ",@$command_ref);
12860 my @v_cols = map { ", V$_" } (1..$self->max_number_of_args());
12861 # Two extra value due to $seq, Exitval, Send
12862 my $v_vals = join ",", map { "?" } (1..$self->max_number_of_args()+4);
12863 $self->run("INSERT INTO $table (Seq,Command,Exitval,Send @v_cols) ".
12864 "VALUES ($v_vals);", $seq, $command, -1000,
12865 0, @$record_ref[1..$#$record_ref]);
12869 sub get_record($) {
12870 my $self = shift;
12871 my @retval;
12872 my $table = $self->table();
12873 my @v_cols = map { ", V$_" } (1..$self->max_number_of_args());
12874 my $rand = "Reserved-".$$.rand();
12875 my $v;
12876 my $more_pending;
12878 do {
12879 if($self->{'driver'} eq "CSV") {
12880 # Sub SELECT is not supported in CSV
12881 # So to minimize the race condition below select a job at random
12882 my $r = $self->get("SELECT Seq, Command @v_cols FROM $table ".
12883 "WHERE Exitval = -1000 LIMIT 100;");
12884 $v = [ sort { rand() > 0.5 } @$r ];
12885 } else {
12886 # Avoid race condition where multiple workers get the same job
12887 # by setting Stdout to a unique string
12888 # (SELECT * FROM (...) AS dummy) is needed due to sillyness in MySQL
12889 $self->update("SET Stdout = ?,Exitval = ? ".
12890 "WHERE Seq = (".
12891 " SELECT * FROM (".
12892 " SELECT min(Seq) FROM $table WHERE Exitval = -1000".
12893 " ) AS dummy".
12894 ") AND Exitval = -1000;", $rand, -1210);
12895 # If a parallel worker overwrote the unique string this will get nothing
12896 $v = $self->get("SELECT Seq, Command @v_cols FROM $table ".
12897 "WHERE Stdout = ?;", $rand);
12899 if($v->[0]) {
12900 my $val_ref = $v->[0];
12901 # Mark record as taken
12902 my $seq = shift @$val_ref;
12903 # Save the sequence number to use when running the job
12904 $SQL::next_seq = $seq;
12905 $self->update("SET Exitval = ? WHERE Seq = ".$seq, -1220);
12906 # Command is encoded with '\257 space' as splitting char
12907 my @command = split /\257 /, shift @$val_ref;
12908 $SQL::command_ref = \@command;
12909 for (@$val_ref) {
12910 push @retval, Arg->new($_);
12912 } else {
12913 # If the record was updated by another job in parallel,
12914 # then we may not be done, so see if there are more jobs pending
12915 $more_pending =
12916 $self->get("SELECT Seq FROM $table WHERE Exitval = ?;", -1210);
12918 } while (not $v->[0] and $more_pending->[0]);
12920 if(@retval) {
12921 return \@retval;
12922 } else {
12923 return undef;
12927 sub total_jobs($) {
12928 my $self = shift;
12929 my $table = $self->table();
12930 my $v = $self->get("SELECT count(*) FROM $table;");
12931 if($v->[0]) {
12932 return $v->[0]->[0];
12933 } else {
12934 ::die_bug("SQL::total_jobs");
12938 sub max_seq($) {
12939 my $self = shift;
12940 my $table = $self->table();
12941 my $v = $self->get("SELECT max(Seq) FROM $table;");
12942 if($v->[0]) {
12943 return $v->[0]->[0];
12944 } else {
12945 ::die_bug("SQL::max_seq");
12949 sub finished($) {
12950 # Check if there are any jobs left in the SQL table that do not
12951 # have a "real" exitval
12952 my $self = shift;
12953 if($opt::wait or $Global::start_sqlworker) {
12954 my $table = $self->table();
12955 my $rv = $self->get("select Seq,Exitval from $table ".
12956 "where Exitval <= -1000 limit 1");
12957 return not $rv->[0];
12958 } else {
12959 return 1;
12963 package Semaphore;
12965 # This package provides a counting semaphore
12967 # If a process dies without releasing the semaphore the next process
12968 # that needs that entry will clean up dead semaphores
12970 # The semaphores are stored in $PARALLEL_HOME/semaphores/id-<name> Each
12971 # file in $PARALLEL_HOME/semaphores/id-<name>/ is the process ID of the
12972 # process holding the entry. If the process dies, the entry can be
12973 # taken by another process.
12975 sub new($) {
12976 my $class = shift;
12977 my $id = shift;
12978 my $count = shift;
12979 $id =~ s/([^-_a-z0-9])/unpack("H*",$1)/ige; # Convert non-word chars to hex
12980 $id = "id-".$id; # To distinguish it from a process id
12981 my $parallel_locks = $Global::cache_dir . "/semaphores";
12982 -d $parallel_locks or ::mkdir_or_die($parallel_locks);
12983 my $lockdir = "$parallel_locks/$id";
12984 my $lockfile = $lockdir.".lock";
12985 if(-d $parallel_locks and -w $parallel_locks
12986 and -r $parallel_locks and -x $parallel_locks) {
12987 # skip
12988 } else {
12989 ::error("Semaphoredir must be writable: '$parallel_locks'");
12990 ::wait_and_exit(255);
12993 if($count < 1) { ::die_bug("semaphore-count: $count"); }
12994 return bless {
12995 'lockfile' => $lockfile,
12996 'lockfh' => Symbol::gensym(),
12997 'lockdir' => $lockdir,
12998 'id' => $id,
12999 'idfile' => $lockdir."/".$id,
13000 'pid' => $$,
13001 'pidfile' => $lockdir."/".$$.'@'.::hostname(),
13002 'count' => $count + 1 # nlinks returns a link for the 'id-' as well
13003 }, ref($class) || $class;
13006 sub remove_dead_locks($) {
13007 my $self = shift;
13008 my $lockdir = $self->{'lockdir'};
13010 for my $d (glob "$lockdir/*") {
13011 $d =~ m:$lockdir/([0-9]+)\@([-\._a-z0-9]+)$:o or next;
13012 my ($pid, $host) = ($1, $2);
13013 if($host eq ::hostname()) {
13014 if(kill 0, $pid) {
13015 ::debug("sem", "Alive: $pid $d\n");
13016 } else {
13017 ::debug("sem", "Dead: $d\n");
13018 ::rm($d);
13024 sub acquire($) {
13025 my $self = shift;
13026 my $sleep = 1; # 1 ms
13027 my $start_time = time;
13028 while(1) {
13029 # Can we get a lock?
13030 $self->atomic_link_if_count_less_than() and last;
13031 $self->remove_dead_locks();
13032 # Retry slower and slower up to 1 second
13033 $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep);
13034 # Random to avoid every sleeping job waking up at the same time
13035 ::usleep(rand()*$sleep);
13036 if($opt::semaphoretimeout) {
13037 if($opt::semaphoretimeout > 0
13039 time - $start_time > $opt::semaphoretimeout) {
13040 # Timeout: Take the semaphore anyway
13041 ::warning("Semaphore timed out. Stealing the semaphore.");
13042 if(not -e $self->{'idfile'}) {
13043 open (my $fh, ">", $self->{'idfile'}) or
13044 ::die_bug("timeout_write_idfile: $self->{'idfile'}");
13045 close $fh;
13047 link $self->{'idfile'}, $self->{'pidfile'};
13048 last;
13050 if($opt::semaphoretimeout < 0
13052 time - $start_time > -$opt::semaphoretimeout) {
13053 # Timeout: Exit
13054 ::warning("Semaphore timed out. Exiting.");
13055 exit(1);
13056 last;
13060 ::debug("sem", "acquired $self->{'pid'}\n");
13063 sub release($) {
13064 my $self = shift;
13065 ::rm($self->{'pidfile'});
13066 if($self->nlinks() == 1) {
13067 # This is the last link, so atomic cleanup
13068 $self->lock();
13069 if($self->nlinks() == 1) {
13070 ::rm($self->{'idfile'});
13071 rmdir $self->{'lockdir'};
13073 $self->unlock();
13075 ::debug("run", "released $self->{'pid'}\n");
13078 sub pid_change($) {
13079 # This should do what release()+acquire() would do without having
13080 # to re-acquire the semaphore
13081 my $self = shift;
13083 my $old_pidfile = $self->{'pidfile'};
13084 $self->{'pid'} = $$;
13085 $self->{'pidfile'} = $self->{'lockdir'}."/".$$.'@'.::hostname();
13086 my $retval = link $self->{'idfile'}, $self->{'pidfile'};
13087 ::debug("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n");
13088 ::rm($old_pidfile);
13091 sub atomic_link_if_count_less_than($) {
13092 # Link $file1 to $file2 if nlinks to $file1 < $count
13093 my $self = shift;
13094 my $retval = 0;
13095 $self->lock();
13096 my $nlinks = $self->nlinks();
13097 ::debug("sem","$nlinks<$self->{'count'} ");
13098 if($nlinks < $self->{'count'}) {
13099 -d $self->{'lockdir'} or ::mkdir_or_die($self->{'lockdir'});
13100 if(not -e $self->{'idfile'}) {
13101 open (my $fh, ">", $self->{'idfile'}) or
13102 ::die_bug("write_idfile: $self->{'idfile'}");
13103 close $fh;
13105 $retval = link $self->{'idfile'}, $self->{'pidfile'};
13106 ::debug("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n");
13108 $self->unlock();
13109 ::debug("sem", "atomic $retval");
13110 return $retval;
13113 sub nlinks($) {
13114 my $self = shift;
13115 if(-e $self->{'idfile'}) {
13116 return (stat(_))[3];
13117 } else {
13118 return 0;
13122 sub lock($) {
13123 my $self = shift;
13124 my $sleep = 100; # 100 ms
13125 my $total_sleep = 0;
13126 $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
13127 my $locked = 0;
13128 while(not $locked) {
13129 if(tell($self->{'lockfh'}) == -1) {
13130 # File not open
13131 open($self->{'lockfh'}, ">", $self->{'lockfile'})
13132 or ::debug("run", "Cannot open $self->{'lockfile'}");
13134 if($self->{'lockfh'}) {
13135 # File is open
13136 chmod 0666, $self->{'lockfile'}; # assuming you want it a+rw
13137 if(flock($self->{'lockfh'}, LOCK_EX()|LOCK_NB())) {
13138 # The file is locked: No need to retry
13139 $locked = 1;
13140 last;
13141 } else {
13142 if ($! =~ m/Function not implemented/) {
13143 ::warning("flock: $!",
13144 "Will wait for a random while.");
13145 ::usleep(rand(5000));
13146 # File cannot be locked: No need to retry
13147 $locked = 2;
13148 last;
13152 # Locking failed in first round
13153 # Sleep and try again
13154 $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep);
13155 # Random to avoid every sleeping job waking up at the same time
13156 ::usleep(rand()*$sleep);
13157 $total_sleep += $sleep;
13158 if($opt::semaphoretimeout) {
13159 if($opt::semaphoretimeout > 0
13161 $total_sleep/1000 > $opt::semaphoretimeout) {
13162 # Timeout: Take the semaphore anyway
13163 ::warning("Semaphore timed out. Taking the semaphore.");
13164 $locked = 3;
13165 last;
13167 if($opt::semaphoretimeout < 0
13169 $total_sleep/1000 > -$opt::semaphoretimeout) {
13170 # Timeout: Exit
13171 ::warning("Semaphore timed out. Exiting.");
13172 $locked = 4;
13173 last;
13175 } else {
13176 if($total_sleep/1000 > 30) {
13177 ::warning("Semaphore stuck for 30 seconds. ".
13178 "Consider using --semaphoretimeout.");
13182 ::debug("run", "locked $self->{'lockfile'}");
13185 sub unlock($) {
13186 my $self = shift;
13187 ::rm($self->{'lockfile'});
13188 close $self->{'lockfh'};
13189 ::debug("run", "unlocked\n");
13192 # Keep perl -w happy
13194 $opt::x = $Semaphore::timeout = $Semaphore::wait =
13195 $Job::file_descriptor_warning_printed = $Global::envdef = @Arg::arg =
13196 $Global::max_slot_number = $opt::session;
13198 package main;
13200 sub main() {
13201 save_stdin_stdout_stderr();
13202 save_original_signal_handler();
13203 parse_options();
13204 ::debug("init", "Open file descriptors: ", join(" ",keys %Global::fd), "\n");
13205 my $number_of_args;
13206 if($Global::max_number_of_args) {
13207 $number_of_args = $Global::max_number_of_args;
13208 } elsif ($opt::X or $opt::m or $opt::xargs) {
13209 $number_of_args = undef;
13210 } else {
13211 $number_of_args = 1;
13214 my @command = @ARGV;
13215 my @input_source_fh;
13216 if($opt::pipepart) {
13217 if($opt::tee) {
13218 @input_source_fh = map { open_or_exit($_) } @opt::a;
13219 # Remove the first: It will be the file piped.
13220 shift @input_source_fh;
13221 if(not @input_source_fh and not $opt::pipe) {
13222 @input_source_fh = (*STDIN);
13224 } else {
13225 # -a is used for data - not for command line args
13226 @input_source_fh = map { open_or_exit($_) } "/dev/null";
13228 } else {
13229 @input_source_fh = map { open_or_exit($_) } @opt::a;
13230 if(not @input_source_fh and not $opt::pipe) {
13231 @input_source_fh = (*STDIN);
13235 if($opt::skip_first_line) {
13236 # Skip the first line for the first file handle
13237 my $fh = $input_source_fh[0];
13238 <$fh>;
13241 set_input_source_header(\@command,\@input_source_fh);
13242 if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) {
13243 # Parallel check all hosts are up. Remove hosts that are down
13244 filter_hosts();
13248 if($opt::sqlmaster and $opt::sqlworker) {
13249 # Start a real --sqlworker in the background later
13250 $Global::start_sqlworker = 1;
13251 $opt::sqlworker = undef;
13254 if($opt::nonall or $opt::onall) {
13255 onall(\@input_source_fh,@command);
13256 wait_and_exit(min(undef_as_zero($Global::exitstatus),254));
13259 $Global::JobQueue = JobQueue->new(
13260 \@command,\@input_source_fh,$Global::ContextReplace,
13261 $number_of_args,\@Global::transfer_files,\@Global::ret_files);
13263 if($opt::sqlmaster) {
13264 # Create SQL table to hold joblog + output
13265 # Figure out how many arguments are in a job
13266 # (It is affected by --colsep, -N, $number_source_fh)
13267 my $record_queue = $Global::JobQueue->{'commandlinequeue'}{'arg_queue'};
13268 my $record = $record_queue->get();
13269 my $no_of_values = $number_of_args * (1+$#{$record});
13270 $record_queue->unget($record);
13271 $Global::sql->create_table($no_of_values);
13272 if($opt::sqlworker) {
13273 # Start a real --sqlworker in the background later
13274 $Global::start_sqlworker = 1;
13275 $opt::sqlworker = undef;
13279 if($opt::pipepart) {
13280 pipepart_setup();
13281 } elsif($opt::pipe and $opt::tee) {
13282 pipe_tee_setup();
13283 } elsif($opt::pipe and $opt::shard or $opt::bin) {
13284 pipe_shard_setup();
13287 if(not $opt::pipepart and $opt::groupby) {
13288 group_by_stdin_filter();
13290 if($opt::eta or $opt::bar or $opt::shuf or $Global::halt_pct) {
13291 # Count the number of jobs or shuffle all jobs
13292 # before starting any.
13293 # Must be done after ungetting any --pipepart jobs.
13294 $Global::JobQueue->total_jobs();
13296 # Compute $Global::max_jobs_running
13297 # Must be done after ungetting any --pipepart jobs.
13298 max_jobs_running();
13299 init_run_jobs();
13300 my $sem;
13301 if($Global::semaphore) {
13302 $sem = acquire_semaphore();
13304 $SIG{TERM} = $Global::original_sig{TERM};
13305 $SIG{HUP} = \&start_no_new_jobs;
13307 if($opt::tee or $opt::shard or $opt::bin) {
13308 # All jobs must be running in parallel for --tee/--shard/--bin
13309 while(start_more_jobs()) {}
13310 $Global::start_no_new_jobs = 1;
13311 if(not $Global::JobQueue->empty()) {
13312 if($opt::tee) {
13313 ::error("--tee requires --jobs to be higher. Try --jobs 0.");
13314 } elsif($opt::bin) {
13315 ::error("--bin requires --jobs to be higher than the number of",
13316 "arguments. Increase --jobs.");
13317 } elsif($opt::shard) {
13318 ::error("--shard requires --jobs to be higher than the number of",
13319 "arguments. Increase --jobs.");
13320 } else {
13321 ::die_bug("--bin/--shard/--tee should not get here");
13323 ::wait_and_exit(255);
13325 } elsif($opt::pipe and not $opt::pipepart) {
13326 # Fill all jobslots
13327 while(start_more_jobs()) {}
13328 spreadstdin();
13329 } else {
13330 # Reap one - start one
13331 while(reaper() + start_more_jobs()) {}
13333 ::debug("init", "Start draining\n");
13334 drain_job_queue(@command);
13335 ::debug("init", "Done draining\n");
13336 reapers();
13337 ::debug("init", "Done reaping\n");
13338 if($Global::semaphore) {
13339 $sem->release();
13341 cleanup();
13342 ::debug("init", "Halt\n");
13343 halt();
13346 main();