Released as 20240522 ('Tbilisi')
[parallel.git] / src / parallel
blobab7681707ecfabae330c821de4334d398adc9b7d
1 #!/usr/bin/env perl
3 # Copyright (C) 2007-2024 Ole Tange, http://ole.tange.dk and Free
4 # Software Foundation, Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, see <https://www.gnu.org/licenses/>
18 # or write to the Free Software Foundation, Inc., 51 Franklin St,
19 # Fifth Floor, Boston, MA 02110-1301 USA
21 # SPDX-FileCopyrightText: 2007-2024 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc.
22 # SPDX-License-Identifier: GPL-3.0-or-later
24 # open3 used in Job::start
25 use IPC::Open3;
26 use POSIX;
27 # gensym used in Job::start
28 use Symbol qw(gensym);
29 # tempfile used in Job::start
30 use File::Temp qw(tempfile tempdir);
31 # mkpath used in openresultsfile
32 use File::Path;
33 # GetOptions used in get_options_from_array
34 use Getopt::Long;
35 # Used to ensure code quality
36 use strict;
37 use File::Basename;
39 sub set_input_source_header($$) {
40 my ($command_ref,$input_source_fh_ref) = @_;
41 if(defined $opt::header and not $opt::pipe) {
42 # split with colsep or \t
43 # $header force $colsep = \t if undef?
44 my $delimiter = defined $opt::colsep ? $opt::colsep : "\t";
45 # regexp for {=
46 my $left = "\Q$Global::parensleft\E";
47 my $l = $Global::parensleft;
48 # regexp for =}
49 my $right = "\Q$Global::parensright\E";
50 my $r = $Global::parensright;
51 if($opt::header ne "0") {
52 my $id = 1;
53 for my $fh (@$input_source_fh_ref) {
54 my $line = <$fh>;
55 chomp($line);
56 $line =~ s/\r$//;
57 ::debug("init", "Delimiter: '$delimiter'");
58 for my $s (split /$delimiter/o, $line) {
59 ::debug("init", "Colname: '$s'");
60 # Replace {colname} with {2}
61 for(@$command_ref, @Global::ret_files,
62 @Global::transfer_files, $opt::tagstring,
63 $opt::workdir, $opt::results, $opt::retries,
64 @Global::template_contents, @Global::template_names,
65 @opt::filter) {
66 # Skip if undefined
67 $_ or next;
68 s:\{$s(|/|//|\.|/\.)\}:\{$id$1\}:g;
69 # {=header1 ... =} => {=1 ... =}
70 s:$left $s (.*?) $right:$l$id$1$r:gx;
72 $Global::input_source_header{$id} = $s;
73 $id++;
77 # Make it possible to do:
78 # parallel --header 0 echo {file2} {file1} :::: file1 file2
79 my $id = 1;
80 for my $s (@opt::a) {
81 # ::: are put into files and given a filehandle
82 # ignore these and only keep the filenames.
83 fileno $s and next;
84 for(@$command_ref, @Global::ret_files,
85 @Global::transfer_files, $opt::tagstring,
86 $opt::workdir, $opt::results, $opt::retries,
87 @Global::template_contents, @Global::template_names,
88 @opt::filter) {
89 # Skip if undefined
90 $_ or next;
91 s:\{\Q$s\E(|/|//|\.|/\.)\}:\{$id$1\}:g;
92 # {=header1 ... =} => {=1 ... =}
93 s:$left \Q$s\E (.*?) $right:$l$id$1$r:gx;
95 $Global::input_source_header{$id} = $s;
96 $id++;
98 } else {
99 my $id = 1;
100 for my $fh (@$input_source_fh_ref) {
101 $Global::input_source_header{$id} = $id;
102 $id++;
107 sub max_jobs_running() {
108 # Compute $Global::max_jobs_running as the max number of jobs
109 # running on each sshlogin.
110 # Returns:
111 # $Global::max_jobs_running
112 if(not $Global::max_jobs_running) {
113 for my $sshlogin (values %Global::host) {
114 $sshlogin->max_jobs_running();
117 if(not $Global::max_jobs_running) {
118 ::error("Cannot run any jobs.");
119 wait_and_exit(255);
121 return $Global::max_jobs_running;
124 sub halt() {
125 # Compute exit value,
126 # wait for children to complete
127 # and exit
128 if($opt::halt and $Global::halt_when ne "never") {
129 if(not defined $Global::halt_exitstatus) {
130 if($Global::halt_pct) {
131 $Global::halt_exitstatus =
132 ::ceil($Global::total_failed /
133 ($Global::total_started || 1) * 100);
134 } elsif($Global::halt_count) {
135 $Global::halt_exitstatus =
136 ::min(undef_as_zero($Global::total_failed),101);
139 wait_and_exit($Global::halt_exitstatus);
140 } else {
141 if($Global::semaphore) {
142 # --semaphore runs a single job:
143 # Use exit value of that
144 wait_and_exit($Global::halt_exitstatus);
145 } else {
146 # 0 = all jobs succeeded
147 # 1-100 = n jobs failed
148 # 101 = >100 jobs failed
149 wait_and_exit(min(undef_as_zero($Global::exitstatus),101));
155 sub __PIPE_MODE__() {}
158 sub pipepart_setup() {
159 # Compute the blocksize
160 # Generate the commands to extract the blocks
161 # Push the commands on queue
162 # Changes:
163 # @Global::cat_prepends
164 # $Global::JobQueue
165 if($opt::tee) {
166 # Prepend each command with
167 # < file
168 my $cat_string = "< ".Q($opt::a[0]);
169 for(1..$Global::JobQueue->total_jobs()) {
170 push @Global::cat_appends, $cat_string;
171 push @Global::cat_prepends, "";
173 } else {
174 if(not $opt::blocksize) {
175 # --blocksize with 10 jobs per jobslot
176 $opt::blocksize = -10;
178 if($opt::roundrobin) {
179 # --blocksize with 1 job per jobslot
180 $opt::blocksize = -1;
182 if($opt::blocksize < 0) {
183 my $size = 0;
184 # Compute size of -a
185 for(@opt::a) {
186 if(-f $_) {
187 $size += -s $_;
188 } elsif(-b $_) {
189 $size += size_of_block_dev($_);
190 } elsif(-e $_) {
191 ::error("$_ is neither a file nor a block device");
192 wait_and_exit(255);
193 } else {
194 ::error("File not found: $_");
195 wait_and_exit(255);
198 # Run in total $job_slots*(- $blocksize) jobs
199 # Set --blocksize = size / no of proc / (- $blocksize)
200 $Global::dummy_jobs = 1;
201 $Global::blocksize = 1 +
202 int($size / max_jobs_running() /
203 -multiply_binary_prefix($opt::blocksize));
205 @Global::cat_prepends = (map { pipe_part_files($_) }
206 # ::: are put into files and given a filehandle
207 # ignore these and only keep the filenames.
208 grep { ! fileno $_ } @opt::a);
209 # Unget the empty arg as many times as there are parts
210 $Global::JobQueue->{'commandlinequeue'}{'arg_queue'}->unget(
211 map { [Arg->new("\0noarg")] } @Global::cat_prepends
216 sub pipe_tee_setup() {
217 # Create temporary fifos
218 # Run 'tee fifo1 fifo2 fifo3 ... fifoN' in the background
219 # This will spread the input to fifos
220 # Generate commands that reads from fifo1..N:
221 # cat fifo | user_command
222 # Changes:
223 # @Global::cat_prepends
224 my @fifos;
225 for(1..$Global::JobQueue->total_jobs()) {
226 push @fifos, tmpfifo();
228 # cat foo | tee fifo1 fifo2 fifo3 fifo4 fifo5 > /dev/null
229 if(not fork()){
230 # Test if tee supports --output-error=warn-nopipe
231 `echo | tee --output-error=warn-nopipe /dev/null >/dev/null 2>/dev/null`;
232 my $opt = $? ? "" : "--output-error=warn-nopipe";
233 ::debug("init","tee $opt");
234 if($opt::dryrun) {
235 # This is not exactly what is run, but it gives the basic idea
236 print "mkfifo @fifos\n";
237 print "tee $opt @fifos >/dev/null &\n";
238 } else {
239 # Let tee inherit our stdin
240 # and redirect stdout to null
241 open STDOUT, ">","/dev/null";
242 if($opt) {
243 exec "tee", $opt, @fifos;
244 } else {
245 exec "tee", @fifos;
248 exit(0);
250 # For each fifo
251 # (rm fifo1; grep 1) < fifo1
252 # (rm fifo2; grep 2) < fifo2
253 # (rm fifo3; grep 3) < fifo3
254 # Remove the tmpfifo as soon as it is open
255 @Global::cat_prepends = map { "(rm $_;" } shell_quote(@fifos);
256 @Global::cat_appends = map { ") < $_" } shell_quote(@fifos);
260 sub parcat_script() {
261 # TODO if script fails: Use parallel -j0 --plain --lb cat ::: fifos
262 my $script = q'{
263 use POSIX qw(:errno_h);
264 use IO::Select;
265 use strict;
266 use threads;
267 use Thread::Queue;
268 use Fcntl qw(:DEFAULT :flock);
270 my $opened :shared;
271 my $q = Thread::Queue->new();
272 my $okq = Thread::Queue->new();
273 my @producers;
275 if(not @ARGV) {
276 if(-t *STDIN) {
277 print "Usage:\n";
278 print " parcat file(s)\n";
279 print " cat argfile | parcat\n";
280 } else {
281 # Read arguments from stdin
282 chomp(@ARGV = <STDIN>);
285 my $files_to_open = 0;
286 # Default: fd = stdout
287 my $fd = 1;
288 for (@ARGV) {
289 # --rm = remove file when opened
290 /^--rm$/ and do { $opt::rm = 1; next; };
291 # -1 = output to fd 1, -2 = output to fd 2
292 /^-(\d+)$/ and do { $fd = $1; next; };
293 push @producers, threads->create("producer", $_, $fd);
294 $files_to_open++;
297 sub producer {
298 # Open a file/fifo, set non blocking, enqueue fileno of the file handle
299 my $file = shift;
300 my $output_fd = shift;
301 open(my $fh, "<", $file) || do {
302 print STDERR "parcat: Cannot open $file: $!\n";
303 exit(1);
305 # Remove file when it has been opened
306 if($opt::rm) {
307 unlink $file;
309 set_fh_non_blocking($fh);
310 $opened++;
311 # Pass the fileno to parent
312 $q->enqueue(fileno($fh),$output_fd);
313 # Get an OK that the $fh is opened and we can release the $fh
314 while(1) {
315 my $ok = $okq->dequeue();
316 if($ok == fileno($fh)) { last; }
317 # Not ours - very unlikely to happen
318 $okq->enqueue($ok);
320 return;
323 my $s = IO::Select->new();
324 my %buffer;
326 sub add_file {
327 my $infd = shift;
328 my $outfd = shift;
329 open(my $infh, "<&=", $infd) || die;
330 open(my $outfh, ">&=", $outfd) || die;
331 $s->add($infh);
332 # Tell the producer now opened here and can be released
333 $okq->enqueue($infd);
334 # Initialize the buffer
335 @{$buffer{$infh}{$outfd}} = ();
336 $Global::fh{$outfd} = $outfh;
339 sub add_files {
340 # Non-blocking dequeue
341 my ($infd,$outfd);
342 do {
343 ($infd,$outfd) = $q->dequeue_nb(2);
344 if(defined($outfd)) {
345 add_file($infd,$outfd);
347 } while(defined($outfd));
350 sub add_files_block {
351 # Blocking dequeue
352 my ($infd,$outfd) = $q->dequeue(2);
353 add_file($infd,$outfd);
357 my $fd;
358 my (@ready,$infh,$rv,$buf);
359 do {
360 # Wait until at least one file is opened
361 add_files_block();
362 while($q->pending or keys %buffer) {
363 add_files();
364 while(keys %buffer) {
365 @ready = $s->can_read(0.01);
366 if(not @ready) {
367 add_files();
369 for $infh (@ready) {
370 # There is only one key, namely the output file descriptor
371 for my $outfd (keys %{$buffer{$infh}}) {
372 # TODO test if 60800 is optimal (2^17 is used elsewhere)
373 $rv = sysread($infh, $buf, 60800);
374 if (!$rv) {
375 if($! == EAGAIN) {
376 # Would block: Nothing read
377 next;
378 } else {
379 # Nothing read, but would not block:
380 # This file is done
381 $s->remove($infh);
382 for(@{$buffer{$infh}{$outfd}}) {
383 syswrite($Global::fh{$outfd},$_);
385 delete $buffer{$infh};
386 # Closing the $infh causes it to block
387 # close $infh;
388 add_files();
389 next;
392 # Something read.
393 # Find \n or \r for full line
394 my $i = (rindex($buf,"\n")+1);
395 if($i) {
396 # Print full line
397 for(@{$buffer{$infh}{$outfd}}, substr($buf,0,$i)) {
398 syswrite($Global::fh{$outfd},$_);
400 # @buffer = remaining half line
401 $buffer{$infh}{$outfd} = [substr($buf,$i,$rv-$i)];
402 } else {
403 # Something read, but not a full line
404 push @{$buffer{$infh}{$outfd}}, $buf;
406 redo;
411 } while($opened < $files_to_open);
413 for (@producers) {
414 $_->join();
417 sub set_fh_non_blocking {
418 # Set filehandle as non-blocking
419 # Inputs:
420 # $fh = filehandle to be blocking
421 # Returns:
422 # N/A
423 my $fh = shift;
424 my $flags;
425 fcntl($fh, &F_GETFL, $flags) || die $!; # Get the current flags on the filehandle
426 $flags |= &O_NONBLOCK; # Add non-blocking to the flags
427 fcntl($fh, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle
430 return ::spacefree(3, $script);
433 sub sharder_script() {
434 my $script = q{
435 use B;
436 # Column separator
437 my $sep = shift;
438 # Which columns to shard on (count from 1)
439 my $col = shift;
440 # Which columns to shard on (count from 0)
441 my $col0 = $col - 1;
442 # Perl expression
443 my $perlexpr = shift;
444 my $bins = @ARGV;
445 # Open fifos for writing, fh{0..$bins}
446 my $t = 0;
447 my %fh;
448 for(@ARGV) {
449 open $fh{$t++}, ">", $_;
450 # open blocks until it is opened by reader
451 # so unlink only happens when it is ready
452 unlink $_;
454 if($perlexpr) {
455 my $subref = eval("sub { no strict; no warnings; $perlexpr }");
456 while(<STDIN>) {
457 # Split into $col columns (no need to split into more)
458 @F = split $sep, $_, $col+1;
460 local $_ = $F[$col0];
461 &$subref();
462 $fh = $fh{ hex(B::hash($_))%$bins };
464 print $fh $_;
466 } else {
467 while(<STDIN>) {
468 # Split into $col columns (no need to split into more)
469 @F = split $sep, $_, $col+1;
470 $fh = $fh{ hex(B::hash($F[$col0]))%$bins };
471 print $fh $_;
474 # Close all open fifos
475 close values %fh;
477 return ::spacefree(1, $script);
480 sub binner_script() {
481 my $script = q{
482 use B;
483 # Column separator
484 my $sep = shift;
485 # Which columns to shard on (count from 1)
486 my $col = shift;
487 # Which columns to shard on (count from 0)
488 my $col0 = $col - 1;
489 # Perl expression
490 my $perlexpr = shift;
491 my $bins = @ARGV;
492 # Open fifos for writing, fh{0..$bins}
493 my $t = 0;
494 my %fh;
495 # Let the last output fifo be the 0'th
496 open $fh{$t++}, ">", pop @ARGV;
497 for(@ARGV) {
498 open $fh{$t++}, ">", $_;
499 # open blocks until it is opened by reader
500 # so unlink only happens when it is ready
501 unlink $_;
503 if($perlexpr) {
504 my $subref = eval("sub { no strict; no warnings; $perlexpr }");
505 while(<STDIN>) {
506 # Split into $col columns (no need to split into more)
507 @F = split $sep, $_, $col+1;
509 local $_ = $F[$col0];
510 &$subref();
511 $fh = $fh{ $_%$bins };
513 print $fh $_;
515 } else {
516 while(<STDIN>) {
517 # Split into $col columns (no need to split into more)
518 @F = split $sep, $_, $col+1;
519 $fh = $fh{ $F[$col0]%$bins };
520 print $fh $_;
523 # Close all open fifos
524 close values %fh;
526 return ::spacefree(1, $script);
529 sub pipe_shard_setup() {
530 # Create temporary fifos
531 # Run 'shard.pl sep col fifo1 fifo2 fifo3 ... fifoN' in the background
532 # This will spread the input to fifos
533 # Generate commands that reads from fifo1..N:
534 # cat fifo | user_command
535 # Changes:
536 # @Global::cat_prepends
537 my @shardfifos;
538 my @parcatfifos;
539 # TODO $opt::jobs should be evaluated (100%)
540 # TODO $opt::jobs should be number of total_jobs if there are arguments
541 max_jobs_running();
542 my $njobs = $Global::max_jobs_running;
543 for my $m (0..$njobs-1) {
544 for my $n (0..$njobs-1) {
545 # sharding to A B C D
546 # parcatting all As together
547 $parcatfifos[$n][$m] = $shardfifos[$m][$n] = tmpfifo();
550 my $shardbin = ($opt::shard || $opt::bin);
551 my $script;
552 if($opt::bin) {
553 $script = binner_script();
554 } else {
555 $script = sharder_script();
558 # cat foo | sharder sep col fifo1 fifo2 fifo3 ... fifoN
560 if($shardbin =~ /^[a-z_][a-z_0-9]*(\s|$)/i) {
561 # Group by column name
562 # (Yes, this will also wrongly match a perlexpr like: chop)
563 my($read,$char,@line);
564 # A full line, but nothing more (the rest must be read by the child)
565 # $Global::header used to prepend block to each job
566 do {
567 $read = sysread(STDIN,$char,1);
568 push @line, $char;
569 } while($read and $char ne "\n");
570 $Global::header = join "", @line;
572 my ($col, $perlexpr, $subref) =
573 column_perlexpr($shardbin, $Global::header, $opt::colsep);
574 if(not fork()) {
575 # Let the sharder inherit our stdin
576 # and redirect stdout to null
577 open STDOUT, ">","/dev/null";
578 # The PERL_HASH_SEED must be the same for all sharders
579 # so B::hash will return the same value for any given input
580 $ENV{'PERL_HASH_SEED'} = $$;
581 exec qw(parallel -0 --block 100k -q --pipe -j), $njobs,
582 qw(--roundrobin -u perl -e), $script, ($opt::colsep || ","),
583 $col, $perlexpr, '{}', (map { (':::+', @{$_}) } @parcatfifos);
585 # For each fifo
586 # (rm fifo1; grep 1) < fifo1
587 # (rm fifo2; grep 2) < fifo2
588 # (rm fifo3; grep 3) < fifo3
589 my $parcat = Q(parcat_script());
590 if(not $parcat) {
591 ::error("'parcat' must be in path.");
592 ::wait_and_exit(255);
594 @Global::cat_prepends =
595 map { "perl -e $parcat ".
596 join(" ",shell_quote(@$_))." | "} @parcatfifos;
599 sub pipe_part_files(@) {
600 # Given the bigfile:
601 # - find header and split positions
602 # - make commands that 'cat's the partial file
603 # Input:
604 # $file = the file to read
605 # Returns:
606 # @commands that will cat_partial each part
607 my ($file) = @_;
608 my $buf = "";
609 if(not -f $file and not -b $file) {
610 ::error("--pipepart only works on seekable files, not streams/pipes.",
611 "$file is not a seekable file.");
612 ::wait_and_exit(255);
615 my $fh = open_or_exit("<",$file);
616 my $firstlinelen = 0;
617 if($opt::skip_first_line) {
618 my $newline;
619 # Read a full line one byte at a time
620 while($firstlinelen += sysread($fh,$newline,1,0)) {
621 $newline eq "\n" and last;
624 my $header = find_header(\$buf,$fh);
625 # find positions
626 my @pos = find_split_positions($file,int($Global::blocksize),
627 $header,$firstlinelen);
628 # Make @cat_prepends
629 my @cat_prepends = ();
630 for(my $i=0; $i<$#pos; $i++) {
631 push(@cat_prepends,
632 cat_partial($file, $firstlinelen, $firstlinelen+length($header),
633 $pos[$i], $pos[$i+1]));
635 return @cat_prepends;
638 sub find_header($$) {
639 # Compute the header based on $opt::header
640 # Input:
641 # $buf_ref = reference to read-in buffer
642 # $fh = filehandle to read from
643 # Uses:
644 # $opt::header
645 # $Global::blocksize
646 # $Global::header
647 # Returns:
648 # $header string
649 my ($buf_ref, $fh) = @_;
650 my $header = "";
651 # $Global::header may be set in group_by_loop()
652 if($Global::header) { return $Global::header }
653 if($opt::header) {
654 if($opt::header eq ":") { $opt::header = "(.*\n)"; }
655 # Number = number of lines
656 $opt::header =~ s/^(\d+)$/"(.*\n)"x$1/e;
657 while(sysread($fh,$$buf_ref,int($Global::blocksize),length $$buf_ref)) {
658 if($$buf_ref =~ s/^($opt::header)//) {
659 $header = $1;
660 last;
664 return $header;
667 sub find_split_positions($$$) {
668 # Find positions in bigfile where recend is followed by recstart
669 # Input:
670 # $file = the file to read
671 # $block = (minimal) --block-size of each chunk
672 # $header = header to be skipped
673 # Uses:
674 # $opt::recstart
675 # $opt::recend
676 # Returns:
677 # @positions of block start/end
678 my($file, $block, $header, $firstlinelen) = @_;
679 my $skiplen = $firstlinelen + length $header;
680 my $size = -s $file;
681 if(-b $file) {
682 # $file is a blockdevice
683 $size = size_of_block_dev($file);
685 $block = int $block;
686 if($opt::groupby) {
687 return split_positions_for_group_by($file,$size,$block,
688 $header,$firstlinelen);
690 # The optimal dd blocksize for mint, redhat, solaris, openbsd = 2^17..2^20
691 # The optimal dd blocksize for freebsd = 2^15..2^17
692 # The optimal dd blocksize for ubuntu (AMD6376) = 2^16
693 my $dd_block_size = 131072; # 2^17
694 my @pos;
695 my ($recstart,$recend) = recstartrecend();
696 my $recendrecstart = $recend.$recstart;
697 my $fh = ::open_or_exit("<",$file);
698 push(@pos,$skiplen);
699 for(my $pos = $block+$skiplen; $pos < $size; $pos += $block) {
700 my $buf;
701 if($recendrecstart eq "") {
702 # records ends anywhere
703 push(@pos,$pos);
704 } else {
705 # Seek the the block start
706 if(not sysseek($fh, $pos, 0)) {
707 ::error("Cannot seek to $pos in $file");
708 edit(255);
710 while(sysread($fh,$buf,$dd_block_size,length $buf)) {
711 if($opt::regexp) {
712 # If match /$recend$recstart/ => Record position
713 if($buf =~ m:^(.*$recend)$recstart:os) {
714 # Start looking for next record _after_ this match
715 $pos += length($1);
716 push(@pos,$pos);
717 last;
719 } else {
720 # If match $recend$recstart => Record position
721 # TODO optimize to only look at the appended
722 # $dd_block_size + len $recendrecstart
723 # TODO increase $dd_block_size to optimize for longer records
724 my $i = index64(\$buf,$recendrecstart);
725 if($i != -1) {
726 # Start looking for next record _after_ this match
727 $pos += $i + length($recend);
728 push(@pos,$pos);
729 last;
735 if($pos[$#pos] != $size) {
736 # Last splitpoint was not at end of the file: add $size as the last
737 push @pos, $size;
739 close $fh;
740 return @pos;
743 sub split_positions_for_group_by($$$$) {
744 my($fh);
745 my %value;
746 sub value_at($) {
747 my $pos = shift;
748 if(not defined $value{$pos}) {
749 if($pos != 0) {
750 seek($fh, $pos-1, 0) || die;
751 # Read half line
752 <$fh>;
754 # Read full line
755 my $linepos = tell($fh);
756 if(not defined $value{$linepos}) {
757 $_ = <$fh>;
758 if(defined $_) {
759 # Not end of file
760 my @F;
761 if(defined $group_by::col) {
762 $opt::colsep ||= "\t";
763 @F = split /$opt::colsep/, $_;
764 $_ = $F[$group_by::col];
766 eval $group_by::perlexpr;
768 $value{$linepos} = [$_,$linepos];
770 $value{$pos} = $value{$linepos};
772 return (@{$value{$pos}});
775 sub binary_search_end($$$) {
776 my ($s,$spos,$epos) = @_;
777 # value_at($spos) == $s
778 # value_at($epos) != $s
779 my $posdif = $epos - $spos;
780 my ($v,$vpos);
781 while($posdif) {
782 ($v,$vpos) = value_at($spos+$posdif);
783 if($v eq $s) {
784 $spos = $vpos;
785 $posdif = $epos - $spos;
786 } else {
787 $epos = $vpos;
789 $posdif = int($posdif/2);
791 return($v,$vpos);
794 sub binary_search_start($$$) {
795 my ($s,$spos,$epos) = @_;
796 # value_at($spos) != $s
797 # value_at($epos) == $s
798 my $posdif = $epos - $spos;
799 my ($v,$vpos);
800 while($posdif) {
801 ($v,$vpos) = value_at($spos+$posdif);
802 if($v eq $s) {
803 $epos = $vpos;
804 } else {
805 $spos = $vpos;
806 $posdif = $epos - $spos;
808 $posdif = int($posdif/2);
810 return($v,$vpos);
813 my ($file,$size,$block,$header,$firstlinelen) = @_;
814 my @pos;
815 $fh = open_or_exit("<",$file);
816 # Set $Global::group_by_column $Global::group_by_perlexpr
817 group_by_loop($fh,$opt::recsep);
818 if($opt::max_args) {
819 # Split after n values
820 my ($a,$apos);
821 # $xpos = linestart, $x = value at $xpos
822 $apos = $firstlinelen + length $header;
823 for(($a,$apos) = value_at($apos); $apos < $size;) {
824 push @pos, $apos;
825 ($a,$apos) = binary_search_end($a,$apos,$size);
826 if(eof($fh)) {
827 push @pos, $size; last;
830 # @pos = start of every value
831 # Merge n values
832 # -nX = keep every X'th position
833 my $i = 0;
834 @pos = grep { not ($i++ % $opt::max_args) } @pos;
835 } else {
836 # Split after any value group
837 # Preferable < $blocksize
838 my ($a,$b,$c,$apos,$bpos,$cpos);
839 # $xpos = linestart, $x = value at $xpos, $apos < $bpos < $cpos
840 $apos = $firstlinelen + length $header;
841 for(($a,$apos) = value_at($apos); $apos < $size;) {
842 push @pos, $apos;
843 $bpos = $apos + $block;
844 ($b,$bpos) = value_at($bpos);
845 if(eof($fh)) {
846 # EOF is less than 1 block away
847 push @pos, $size; last;
849 $cpos = $bpos + $block;
850 ($c,$cpos) = value_at($cpos);
851 if($a eq $b) {
852 while($b eq $c) {
853 # Move bpos, cpos a block forward until $a == $b != $c
854 $bpos = $cpos;
855 $cpos += $block;
856 ($c,$cpos) = value_at($cpos);
857 if($cpos >= $size) {
858 $cpos = $size;
859 last;
862 # $a == $b != $c
863 # Binary search for $b ending between ($bpos,$cpos)
864 ($b,$bpos) = binary_search_end($b,$bpos,$cpos);
865 } else {
866 if($b eq $c) {
867 # $a != $b == $c
868 # Binary search for $b starting between ($apos,$bpos)
869 ($b,$bpos) = binary_search_start($b,$apos,$bpos);
870 } else {
871 # $a != $b != $c
872 # Binary search for $b ending between ($bpos,$cpos)
873 ($b,$bpos) = binary_search_end($b,$bpos,$cpos);
876 ($a,$apos) = ($b,$bpos);
879 if($pos[$#pos] != $size) {
880 # Last splitpoint was not at end of the file: add it
881 push @pos, $size;
883 return @pos;
886 sub cat_partial($@) {
887 # Efficient command to copy from byte X to byte Y
888 # Input:
889 # $file = the file to read
890 # ($start, $end, [$start2, $end2, ...]) = start byte, end byte
891 # Returns:
892 # Efficient command to copy $start..$end, $start2..$end2, ... to stdout
893 my($file, @start_end) = @_;
894 my($start, $i);
895 # Convert (start,end) to (start,len)
896 my @start_len = map {
897 if(++$i % 2) { $start = $_; } else { $_-$start }
898 } @start_end;
899 # The optimal block size differs
900 # It has been measured on:
901 # AMD 6376: n*4k-1; small n
902 # AMD Neo N36L: 44k-200k
903 # Intel i7-3632QM: 55k-
904 # ARM Cortex A53: 4k-28k
905 # Intel i5-2410M: 36k-46k
907 # I choose 2^15-1 = 32767
908 # q{
909 # expseq() {
910 # perl -E '
911 # $last = pop @ARGV;
912 # $first = shift || 1;
913 # $inc = shift || 1.03;
914 # for($i=$first; $i<=$last;$i*=$inc) { say int $i }
915 # ' "$@"
918 # seq 111111111 > big;
919 # f() { ppar --test $1 -a big --pipepart --block -1 'md5sum > /dev/null'; }
920 # export -f f;
921 # expseq 1000 1.001 300000 | shuf | parallel -j1 --jl jl-md5sum f;
922 # };
923 my $script = spacefree
926 while(@ARGV) {
927 sysseek(STDIN,shift,0) || die;
928 $left = shift;
929 while($read =
930 sysread(STDIN,$buf, $left > 32767 ? 32767 : $left)){
931 $left -= $read;
932 syswrite(STDOUT,$buf);
936 return "<". Q($file) .
937 " perl -e '$script' @start_len |";
940 sub column_perlexpr($$$) {
941 # Compute the column number (if any), perlexpression from combined
942 # string (such as --shard key, --groupby key, {=n perlexpr=}
943 # Input:
944 # $column_perlexpr = string with column and perl expression
945 # $header = header from input file (if column is column name)
946 # $colsep = column separator regexp
947 # Returns:
948 # $col = column number
949 # $perlexpr = perl expression
950 # $subref = compiled perl expression as sub reference
951 my ($column_perlexpr, $header, $colsep) = @_;
952 my ($col, $perlexpr, $subref);
953 if($column_perlexpr =~ /^[-a-z0-9_]+(\s|$)/i) {
954 # Column name/number (possibly prefix)
955 if($column_perlexpr =~ s/^(-?\d+)(\s|$)//) {
956 # Column number (possibly prefix)
957 $col = $1;
958 } elsif($column_perlexpr =~ s/^([a-z0-9_]+)(\s+|$)//i) {
959 # Column name (possibly prefix)
960 my $colname = $1;
961 # Split on --copsep pattern
962 my @headers = split /$colsep/, $header;
963 my %headers;
964 @headers{@headers} = (1..($#headers+1));
965 $col = $headers{$colname};
966 if(not defined $col) {
967 ::error("Column '$colname' $colsep not found in header",keys %headers);
968 ::wait_and_exit(255);
972 # What is left of $column_perlexpr is $perlexpr (possibly empty)
973 $perlexpr = $column_perlexpr;
974 $subref = eval("sub { no strict; no warnings; $perlexpr }");
975 return($col, $perlexpr, $subref);
978 sub group_by_loop($$) {
979 # Generate perl code for group-by loop
980 # Insert a $recsep when the column value changes
981 # The column value can be computed with $perlexpr
982 my($fh,$recsep) = @_;
983 my $groupby = $opt::groupby;
984 if($groupby =~ /^[a-z_][a-z_0-9]*(\s|$)/i) {
985 # Group by column name
986 # (Yes, this will also wrongly match a perlexpr like: chop)
987 my($read,$char,@line);
988 # Read a full line, but nothing more
989 # (the rest must be read by the child)
990 # $Global::header used to prepend block to each job
991 do {
992 $read = sysread($fh,$char,1);
993 push @line, $char;
994 } while($read and $char ne "\n");
995 $Global::header = join "", @line;
997 $opt::colsep ||= "\t";
998 ($group_by::col, $group_by::perlexpr, $group_by::subref) =
999 column_perlexpr($groupby, $Global::header, $opt::colsep);
1000 # Numbered 0..n-1 due to being used by $F[n]
1001 if($group_by::col) { $group_by::col--; }
1003 my $loop = ::spacefree(0,q{
1004 BEGIN{ $last = "RECSEP"; }
1006 local $_=COLVALUE;
1007 PERLEXPR;
1008 if(($last) ne $_) {
1009 print "RECSEP";
1010 $last = $_;
1014 if(defined $group_by::col) {
1015 $loop =~ s/COLVALUE/\$F[$group_by::col]/g;
1016 } else {
1017 $loop =~ s/COLVALUE/\$_/g;
1019 $loop =~ s/PERLEXPR/$group_by::perlexpr/g;
1020 $loop =~ s/RECSEP/$recsep/g;
1021 return $loop;
1024 sub pipe_group_by_setup() {
1025 # Record separator with 119 bit random value
1026 $opt::recend = '';
1027 $opt::recstart =
1028 join "", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..20);
1029 $opt::remove_rec_sep = 1;
1030 my @filter;
1031 push @filter, "perl";
1032 if($opt::groupby =~ /^[a-z0-9_]+(\s|$)/i) {
1033 # This is column number/name
1034 # Use -a (auto-split)
1035 push @filter, "-a";
1036 $opt::colsep ||= "\t";
1037 my $sep = $opt::colsep;
1038 $sep =~ s/\t/\\t/g;
1039 $sep =~ s/\"/\\"/g;
1040 # man perlrun: -Fpattern [...] You can't use literal whitespace
1041 $sep =~ s/ /\\040/g;
1042 push @filter, "-F$sep";
1044 push @filter, "-pe";
1045 push @filter, group_by_loop(*STDIN,$opt::recstart);
1046 ::debug("init", "@filter\n");
1047 open(STDIN, '-|', @filter) || die ("Cannot start @filter");
1048 if(which("mbuffer")) {
1049 # You get a speed up of 30% by going through mbuffer
1050 open(STDIN, '-|', "mbuffer", "-q","-m6M","-b5") ||
1051 die ("Cannot start mbuffer");
1055 sub spreadstdin() {
1056 # read a record
1057 # Spawn a job and print the record to it.
1058 # Uses:
1059 # $Global::blocksize
1060 # STDIN
1061 # $opt::r
1062 # $Global::max_lines
1063 # $Global::max_number_of_args
1064 # $opt::regexp
1065 # $Global::start_no_new_jobs
1066 # $opt::roundrobin
1067 # %Global::running
1068 # Returns: N/A
1070 my $buf = "";
1071 my ($recstart,$recend) = recstartrecend();
1072 my $recendrecstart = $recend.$recstart;
1073 my $chunk_number = 1;
1074 my $one_time_through;
1075 my $two_gb = 2**31-1;
1076 my $blocksize = int($Global::blocksize);
1077 my $in = *STDIN;
1078 my $timeout = $Global::blocktimeout;
1080 if($opt::skip_first_line) {
1081 my $newline;
1082 # Read a full line one byte at a time
1083 while(sysread($in,$newline,1,0)) {
1084 $newline eq "\n" and last;
1087 my $header = find_header(\$buf,$in);
1088 my $anything_written;
1089 my $eof;
1090 my $garbage_read;
1092 sub read_block() {
1093 # Read a --blocksize from STDIN
1094 # possibly interrupted by --blocktimeout
1095 # Add up to the next full block
1096 my $readsize = $blocksize - (length $buf) % $blocksize;
1097 my ($nread,$alarm);
1098 eval {
1099 local $SIG{ALRM} = sub { die "alarm\n" }; # NB: \n required
1100 # --blocktimeout (or 0 if not set)
1101 alarm $timeout;
1102 if($] >= 5.026) {
1103 do {
1104 $nread = sysread $in, $buf, $readsize, length $buf;
1105 $readsize -= $nread;
1106 } while($readsize and $nread);
1107 } else {
1108 # Less efficient reading, but 32-bit sysread compatible
1109 do {
1110 $nread = sysread($in,substr($buf,length $buf,0),$readsize,0);
1111 $readsize -= $nread;
1112 } while($readsize and $nread);
1114 alarm 0;
1116 if ($@) {
1117 die unless $@ eq "alarm\n"; # propagate unexpected errors
1118 $alarm = 1;
1119 } else {
1120 $alarm = 0;
1122 $eof = not ($nread or $alarm);
1125 sub pass_n_line_records() {
1126 # Pass records of N lines
1127 my $n_lines = $buf =~ tr/\n/\n/;
1128 my $last_newline_pos = rindex64(\$buf,"\n");
1129 # Go backwards until there are full n-line records
1130 while($n_lines % $Global::max_lines) {
1131 $n_lines--;
1132 $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1);
1134 # Chop at $last_newline_pos as that is where n-line record ends
1135 $anything_written +=
1136 write_record_to_pipe($chunk_number++,\$header,\$buf,
1137 $recstart,$recend,$last_newline_pos+1);
1138 shorten(\$buf,$last_newline_pos+1);
1141 sub pass_n_regexps() {
1142 # Pass records of N regexps
1143 # -N => (start..*?end){n}
1144 # -L -N => (start..*?end){n*l}
1145 if(not $garbage_read) {
1146 $garbage_read = 1;
1147 if($buf !~ /^$recstart/o) {
1148 # Buf does not start with $recstart => There is garbage.
1149 # Make a single record of the garbage
1150 if($buf =~
1151 /(?s:^)(
1152 (?:(?:(?!$recend$recstart)(?s:.))*?$recend)
1154 # Followed by recstart
1155 (?=$recstart)/mox and length $1 > 0) {
1156 $anything_written +=
1157 write_record_to_pipe($chunk_number++,\$header,\$buf,
1158 $recstart,$recend,length $1);
1159 shorten(\$buf,length $1);
1164 my $n_records =
1165 $Global::max_number_of_args * ($Global::max_lines || 1);
1166 # (?!negative lookahead) is needed to avoid backtracking
1167 # See: https://unix.stackexchange.com/questions/439356/
1168 # (?s:.) = (.|[\n]) but faster
1169 while($buf =~
1170 /(?s:^)(
1171 # n more times recstart.*recend
1172 (?:$recstart(?:(?!$recend$recstart)(?s:.))*?$recend){$n_records}
1174 # Followed by recstart
1175 (?=$recstart)/mox and length $1 > 0) {
1176 $anything_written +=
1177 write_record_to_pipe($chunk_number++,\$header,\$buf,
1178 $recstart,$recend,length $1);
1179 shorten(\$buf,length $1);
1183 sub pass_regexp() {
1184 # Find the last recend-recstart in $buf
1185 $eof and return;
1186 # (?s:.) = (.|[\n]) but faster
1187 if($buf =~ /^((?s:.)*$recend)$recstart(?s:.)*?$/mox) {
1188 $anything_written +=
1189 write_record_to_pipe($chunk_number++,\$header,\$buf,
1190 $recstart,$recend,length $1);
1191 shorten(\$buf,length $1);
1195 sub pass_csv_record() {
1196 # Pass CVS record
1197 # We define a CSV record as an even number of " + end of line
1198 # This works if you use " as quoting character
1199 my $last_newline_pos = length $buf;
1200 # Go backwards from the last \n and search for a position
1201 # where there is an even number of "
1202 do {
1203 # find last EOL
1204 $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1);
1205 # While uneven "
1206 } while((substr($buf,0,$last_newline_pos) =~ y/"/"/)%2
1207 and $last_newline_pos >= 0);
1208 # Chop at $last_newline_pos as that is where CSV record ends
1209 $anything_written +=
1210 write_record_to_pipe($chunk_number++,\$header,\$buf,
1211 $recstart,$recend,$last_newline_pos+1);
1212 shorten(\$buf,$last_newline_pos+1);
1215 sub pass_n() {
1216 # Pass n records of --recend/--recstart
1217 # -N => (start..*?end){n}
1218 my $i = 0;
1219 my $read_n_lines =
1220 $Global::max_number_of_args * ($Global::max_lines || 1);
1221 while(($i = nindex(\$buf,$recendrecstart,$read_n_lines)) != -1
1223 length $buf) {
1224 $i += length $recend; # find the actual splitting location
1225 $anything_written +=
1226 write_record_to_pipe($chunk_number++,\$header,\$buf,
1227 $recstart,$recend,$i);
1228 shorten(\$buf,$i);
1232 sub pass() {
1233 # Pass records of --recend/--recstart
1234 # Split record at fixed string
1235 # Find the last recend+recstart in $buf
1236 $eof and return;
1237 my $i = rindex64(\$buf,$recendrecstart);
1238 if($i != -1) {
1239 $i += length $recend; # find the actual splitting location
1240 $anything_written +=
1241 write_record_to_pipe($chunk_number++,\$header,\$buf,
1242 $recstart,$recend,$i);
1243 shorten(\$buf,$i);
1247 sub increase_blocksize_maybe() {
1248 if(not $anything_written
1249 and not $opt::blocktimeout
1250 and not $Global::no_autoexpand_block) {
1251 # Nothing was written - maybe the block size < record size?
1252 # Increase blocksize exponentially up to 2GB-1 (2GB causes problems)
1253 if($blocksize < $two_gb) {
1254 my $old_blocksize = $blocksize;
1255 $blocksize = ::min(ceil($blocksize * 1.3 + 1), $two_gb);
1256 ::warning("A record was longer than $old_blocksize. " .
1257 "Increasing to --blocksize $blocksize.");
1262 while(1) {
1263 $anything_written = 0;
1264 read_block();
1265 if($opt::r) {
1266 # Remove empty lines
1267 $buf =~ s/^\s*\n//gm;
1268 if(length $buf == 0) {
1269 if($eof) {
1270 last;
1271 } else {
1272 next;
1276 if($Global::max_lines and not $Global::max_number_of_args) {
1277 # Pass n-line records
1278 pass_n_line_records();
1279 } elsif($opt::csv) {
1280 # Pass a full CSV record
1281 pass_csv_record();
1282 } elsif($opt::regexp) {
1283 # Split record at regexp
1284 if($Global::max_number_of_args) {
1285 pass_n_regexps();
1286 } else {
1287 pass_regexp();
1289 } else {
1290 # Pass normal --recend/--recstart record
1291 if($Global::max_number_of_args) {
1292 pass_n();
1293 } else {
1294 pass();
1297 $eof and last;
1298 increase_blocksize_maybe();
1299 ::debug("init", "Round\n");
1301 ::debug("init", "Done reading input\n");
1303 # If there is anything left in the buffer write it
1304 write_record_to_pipe($chunk_number++, \$header, \$buf, $recstart,
1305 $recend, length $buf);
1307 if($opt::retries) {
1308 $Global::no_more_input = 1;
1309 # We need to start no more jobs: At most we need to retry some
1310 # of the already running.
1311 my @running = values %Global::running;
1312 # Stop any virgins.
1313 for my $job (@running) {
1314 if(defined $job and $job->virgin()) {
1315 close $job->fh(0,"w");
1318 # Wait for running jobs to be done
1319 my $sleep = 1;
1320 while($Global::total_running > 0) {
1321 $sleep = ::reap_usleep($sleep);
1322 start_more_jobs();
1325 $Global::start_no_new_jobs ||= 1;
1326 if($opt::roundrobin) {
1327 # Flush blocks to roundrobin procs
1328 my $sleep = 1;
1329 while(%Global::running) {
1330 my $something_written = 0;
1331 for my $job (values %Global::running) {
1332 if($job->block_length()) {
1333 $something_written += $job->non_blocking_write();
1334 } else {
1335 close $job->fh(0,"w");
1338 if($something_written) {
1339 $sleep = $sleep/2+0.001;
1341 $sleep = ::reap_usleep($sleep);
1346 sub recstartrecend() {
1347 # Uses:
1348 # $opt::recstart
1349 # $opt::recend
1350 # Returns:
1351 # $recstart,$recend with default values and regexp conversion
1352 my($recstart,$recend);
1353 if(defined($opt::recstart) and defined($opt::recend)) {
1354 # If both --recstart and --recend is given then both must match
1355 $recstart = $opt::recstart;
1356 $recend = $opt::recend;
1357 } elsif(defined($opt::recstart)) {
1358 # If --recstart is given it must match start of record
1359 $recstart = $opt::recstart;
1360 $recend = "";
1361 } elsif(defined($opt::recend)) {
1362 # If --recend is given then it must match end of record
1363 $recstart = "";
1364 $recend = $opt::recend;
1365 if($opt::regexp and $recend eq '') {
1366 # --regexp --recend ''
1367 $recend = '(?s:.)';
1371 if($opt::regexp) {
1372 # Do not allow /x comments - to avoid having to quote space
1373 $recstart = "(?-x:".$recstart.")";
1374 $recend = "(?-x:".$recend.")";
1375 # If $recstart/$recend contains '|'
1376 # the | should only apply to the regexp
1377 $recstart = "(?:".$recstart.")";
1378 $recend = "(?:".$recend.")";
1379 } else {
1380 # $recstart/$recend = printf strings (\n)
1381 $recstart =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee;
1382 $recend =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee;
1384 return ($recstart,$recend);
1387 sub nindex($$) {
1388 # See if string is in buffer N times
1389 # Returns:
1390 # the position where the Nth copy is found
1391 my ($buf_ref, $str, $n) = @_;
1392 my $i = 0;
1393 for(1..$n) {
1394 $i = index64($buf_ref,$str,$i+1);
1395 if($i == -1) { last }
1397 return $i;
1401 my @robin_queue;
1402 my $sleep = 1;
1404 sub round_robin_write($$$$$) {
1405 # Input:
1406 # $header_ref = ref to $header string
1407 # $block_ref = ref to $block to be written
1408 # $recstart = record start string
1409 # $recend = record end string
1410 # $endpos = end position of $block
1411 # Uses:
1412 # %Global::running
1413 # Returns:
1414 # $something_written = amount of bytes written
1415 my ($header_ref,$buffer_ref,$recstart,$recend,$endpos) = @_;
1416 my $written = 0;
1417 my $block_passed = 0;
1418 while(not $block_passed) {
1419 # Continue flushing existing buffers
1420 # until one is empty and a new block is passed
1421 if(@robin_queue) {
1422 # Rotate queue once so new blocks get a fair chance
1423 # to be given to another slot
1424 push @robin_queue, shift @robin_queue;
1425 } else {
1426 # Make a queue to spread the blocks evenly
1427 push @robin_queue, (sort { $a->seq() <=> $b->seq() }
1428 values %Global::running);
1430 do {
1431 $written = 0;
1432 for my $job (@robin_queue) {
1433 if($job->block_length() > 0) {
1434 $written += $job->non_blocking_write();
1435 } else {
1436 $job->set_block($header_ref, $buffer_ref,
1437 $endpos, $recstart, $recend);
1438 $block_passed = 1;
1439 $written += $job->non_blocking_write();
1440 last;
1443 if($written) {
1444 $sleep = $sleep/1.5+0.001;
1446 # Don't sleep if something is written
1447 } while($written and not $block_passed);
1448 $sleep = ::reap_usleep($sleep);
1450 return $written;
1454 sub index64($$$) {
1455 # Do index on strings > 2GB.
1456 # index in Perl < v5.22 does not work for > 2GB
1457 # Input:
1458 # as index except STR which must be passed as a reference
1459 # Output:
1460 # as index
1461 my $ref = shift;
1462 my $match = shift;
1463 my $pos = shift || 0;
1464 my $max2gb = 2**31-1;
1465 my $strlen = length($$ref);
1466 # No point in doing extra work if we don't need to.
1467 if($strlen < $max2gb or $] > 5.022) {
1468 return index($$ref, $match, $pos);
1471 my $matchlen = length($match);
1472 my $ret;
1473 my $offset = $pos;
1474 while($offset < $strlen) {
1475 $ret = index(
1476 substr($$ref, $offset, $max2gb),
1477 $match, $pos-$offset);
1478 if($ret != -1) {
1479 return $ret + $offset;
1481 $offset += ($max2gb - $matchlen - 1);
1483 return -1;
1486 sub rindex64($@) {
1487 # Do rindex on strings > 2GB.
1488 # rindex in Perl < v5.22 does not work for > 2GB
1489 # Input:
1490 # as rindex except STR which must be passed as a reference
1491 # Output:
1492 # as rindex
1493 my $ref = shift;
1494 my $match = shift;
1495 my $pos = shift;
1496 my $block_size = 2**31-1;
1497 my $strlen = length($$ref);
1498 # Default: search from end
1499 $pos = defined $pos ? $pos : $strlen;
1500 # No point in doing extra work if we don't need to.
1501 if($strlen < $block_size or $] > 5.022) {
1502 return rindex($$ref, $match, $pos);
1505 my $matchlen = length($match);
1506 my $ret;
1507 my $offset = $pos - $block_size + $matchlen;
1508 if($offset < 0) {
1509 # The offset is less than a $block_size
1510 # Set the $offset to 0 and
1511 # Adjust block_size accordingly
1512 $block_size = $block_size + $offset;
1513 $offset = 0;
1515 while($offset >= 0) {
1516 $ret = rindex(
1517 substr($$ref, $offset, $block_size),
1518 $match);
1519 if($ret != -1) {
1520 return $ret + $offset;
1522 $offset -= ($block_size - $matchlen - 1);
1524 return -1;
1527 sub shorten($$) {
1528 # Do: substr($buf,0,$i) = "";
1529 # Some Perl versions do not support $i > 2GB, so do this in 2GB chunks
1530 # Input:
1531 # $buf_ref = \$buf
1532 # $i = position to shorten to
1533 # Returns: N/A
1534 my ($buf_ref, $i) = @_;
1535 my $two_gb = 2**31-1;
1536 while($i > $two_gb) {
1537 substr($$buf_ref,0,$two_gb) = "";
1538 $i -= $two_gb;
1540 substr($$buf_ref,0,$i) = "";
1543 sub write_record_to_pipe($$$$$$) {
1544 # Fork then
1545 # Write record from pos 0 .. $endpos to pipe
1546 # Input:
1547 # $chunk_number = sequence number - to see if already run
1548 # $header_ref = reference to header string to prepend
1549 # $buffer_ref = reference to record to write
1550 # $recstart = start string of record
1551 # $recend = end string of record
1552 # $endpos = position in $buffer_ref where record ends
1553 # Uses:
1554 # $Global::job_already_run
1555 # $opt::roundrobin
1556 # @Global::virgin_jobs
1557 # Returns:
1558 # Number of chunks written (0 or 1)
1559 my ($chunk_number, $header_ref, $buffer_ref,
1560 $recstart, $recend, $endpos) = @_;
1561 if($endpos == 0) { return 0; }
1562 if(vec($Global::job_already_run,$chunk_number,1)) { return 1; }
1563 if($opt::roundrobin) {
1564 # Write the block to one of the already running jobs
1565 return round_robin_write($header_ref, $buffer_ref,
1566 $recstart, $recend, $endpos);
1568 # If no virgin found, backoff
1569 my $sleep = 0.0001; # 0.01 ms - better performance on highend
1570 while(not @Global::virgin_jobs) {
1571 ::debug("pipe", "No virgin jobs");
1572 $sleep = ::reap_usleep($sleep);
1573 # Jobs may not be started because of loadavg
1574 # or too little time between each ssh login
1575 # or retrying failed jobs.
1576 start_more_jobs();
1578 my $job = shift @Global::virgin_jobs;
1579 $job->set_block($header_ref, $buffer_ref, $endpos, $recstart, $recend);
1580 $job->write_block();
1581 return 1;
1585 sub __SEM_MODE__() {}
1588 sub acquire_semaphore() {
1589 # Acquires semaphore. If needed: spawns to the background
1590 # Uses:
1591 # @Global::host
1592 # Returns:
1593 # The semaphore to be released when jobs is complete
1594 $Global::host{':'} = SSHLogin->new(":");
1595 my $sem = Semaphore->new($Semaphore::name,
1596 $Global::host{':'}->max_jobs_running());
1597 $sem->acquire();
1598 if($Semaphore::fg) {
1599 # skip
1600 } else {
1601 if(fork()) {
1602 exit(0);
1603 } else {
1604 # If run in the background, the PID will change
1605 $sem->pid_change();
1608 return $sem;
1612 sub __PARSE_OPTIONS__() {}
1614 sub shell_completion() {
1615 if($opt::shellcompletion eq "zsh") {
1616 # if shell == zsh
1617 zsh_competion();
1618 } elsif($opt::shellcompletion eq "bash") {
1619 # if shell == bash
1620 bash_competion();
1621 } elsif($opt::shellcompletion eq "auto") {
1622 if($Global::shell =~ m:/zsh$|^zsh$:) {
1623 # if shell == zsh
1624 zsh_competion();
1625 } elsif($Global::shell =~ m:/bash$|^bash$:) {
1626 # if shell == bash
1627 bash_competion();
1628 } else {
1629 ::error("--shellcompletion is not implemented for ".
1630 "'$Global::shell'.");
1631 wait_and_exit(255);
1633 } else {
1634 ::error("--shellcompletion is not implemented for ".
1635 "'$opt::shellcompletion'.");
1636 wait_and_exit(255);
1640 sub bash_competion() {
1641 # Print:
1642 # complete -F _comp_parallel parallel;
1643 # _comp_parallel() {
1644 # COMPREPLY=($(compgen -W "--options" --
1645 # "${COMP_WORDS[$COMP_CWORD]}"));
1646 # };
1647 my @bash_completion =
1648 ("complete -F _comp_parallel parallel;",
1649 '_comp_parallel() { COMPREPLY=($(compgen -W "');
1650 my @och = options_completion_hash();
1651 while(@och) {
1652 $_ = shift @och;
1653 # Split input like:
1654 # "joblog|jl=s[Logfile for executed jobs]:logfile:_files"
1655 if(/^(.*?)(\[.*?])?(:[^:]*)?(:.*)?$/) {
1656 my $opt = $1;
1657 my $desc = $2;
1658 my $argdesc = $3;
1659 my $func = $4;
1660 # opt=s => opt
1661 $opt =~ s/[:=].$//;
1662 if($opt =~ /^_/) {
1663 # internal options start with --_
1664 # skip
1665 } else {
1666 push @bash_completion,
1667 (map { (length $_ == 1) ? "-$_ " : "--$_ " }
1668 split /\|/, $opt);
1671 shift @och;
1673 push @bash_completion,'" -- "${COMP_WORDS[$COMP_CWORD]}")); };'."\n";
1674 print @bash_completion;
1677 sub zsh_competion() {
1678 # Print code used for completion in zsh
1679 my @zsh_completion =
1680 ("compdef _comp_parallel parallel; ",
1681 "setopt localoptions extended_glob; ",
1682 "_comp_parallel() { ",
1683 "_arguments ");
1684 my @och = options_completion_hash();
1685 while(@och) {
1686 $_ = shift @och;
1687 # Split input like:
1688 # "joblog|jl=s[Logfile for executed jobs]:logfile:_files"
1689 if(/^(.*?)(\[.*?])?(:[^:]*)?(:.*)?$/) {
1690 my $opt = $1;
1691 my $desc = $2;
1692 my $argdesc = $3;
1693 my $func = $4;
1694 # opt=s => opt
1695 $opt =~ s/[:=].$//;
1696 if($opt =~ /^_/) {
1697 # internal options start with --_
1698 # skip
1699 } else {
1700 # {-o,--option}
1701 my $zsh_opt = join(",",
1702 (map { (length $_ == 1) ? "-$_" : "--$_" }
1703 split /\|/, $opt));
1704 if($zsh_opt =~ /,/) { $zsh_opt = "{$zsh_opt}"; }
1705 $desc =~ s/'/'"'"'/g;
1706 $argdesc =~ s/'/'"'"'/g;
1707 $func =~ s/'/'"'"'/g;
1708 push @zsh_completion, $zsh_opt."'".$desc.$argdesc.$func."' ";
1711 shift @och;
1713 push @zsh_completion,
1714 q{'(-)1:command:{_command_names -e}' },
1715 q{'*::arguments:_normal'},
1716 "};\n";
1717 print @zsh_completion;
1720 sub options_hash() {
1721 # Returns:
1722 # %hash = for GetOptions
1723 my %och = options_completion_hash();
1724 my %oh;
1725 my ($k,$v);
1726 while(($k,$v) = each %och) {
1727 # Remove description
1728 $k =~ s/\[.*//;
1729 $oh{$k} = $v;
1731 return %oh;
1734 sub options_completion_hash() {
1735 # Returns:
1736 # %hash = for GetOptions and shell completion
1737 return
1738 ("debug|D=s" => \$opt::D,
1739 "xargs[Insert as many arguments as the command line length permits]"
1740 => \$opt::xargs,
1741 "m[Multiple arguments]" => \$opt::m,
1742 ("X[Insert as many arguments with context as the command line ".
1743 "length permits]"
1744 => \$opt::X),
1745 "v[Verbose]" => \@opt::v,
1746 "sql=s[Use --sql-master instead (obsolete)]:DBURL" => \$opt::retired,
1747 ("sql-master|sqlmaster=s".
1748 "[Submit jobs via SQL server. DBURL must point to a table, which ".
1749 "will contain --joblog, the values, and output]:DBURL"
1750 => \$opt::sqlmaster),
1751 ("sql-worker|sqlworker=s".
1752 "[Execute jobs via SQL server. Read the input sources variables ".
1753 "from the table pointed to by DBURL.]:DBURL"
1754 => \$opt::sqlworker),
1755 ("sql-and-worker|sqlandworker=s".
1756 "[--sql-master DBURL --sql-worker DBURL]:DBURL"
1757 => \$opt::sqlandworker),
1758 ("joblog|jl=s[Logfile for executed jobs]:logfile:_files"
1759 => \$opt::joblog),
1760 ("results|result|res=s[Save the output into files]:name:_files"
1761 => \$opt::results),
1762 "resume[Resumes from the last unfinished job]" => \$opt::resume,
1763 ("resume-failed|resumefailed".
1764 "[Retry all failed and resume from the last unfinished job]"
1765 => \$opt::resume_failed),
1766 ("retry-failed|retryfailed[Retry all failed jobs in joblog]"
1767 => \$opt::retry_failed),
1768 "silent[Silent]" => \$opt::silent,
1769 ("keep-order|keeporder|k".
1770 "[Keep sequence of output same as the order of input]"
1771 => \$opt::keeporder),
1772 ("no-keep-order|nokeeporder|nok|no-k".
1773 "[Overrides an earlier --keep-order (e.g. if set in ".
1774 "~/.parallel/config)]"
1775 => \$opt::nokeeporder),
1776 "group[Group output]" => \$opt::group,
1777 "g" => \$opt::retired,
1778 ("ungroup|u".
1779 "[Output is printed as soon as possible and bypasses GNU parallel ".
1780 "internal processing]"
1781 => \$opt::ungroup),
1782 ("latest-line|latestline|ll".
1783 "[Print latest line of each job]"
1784 => \$opt::latestline),
1785 ("line-buffer|line-buffered|linebuffer|linebuffered|lb".
1786 "[Buffer output on line basis]"
1787 => \$opt::linebuffer),
1788 ("tmux".
1789 "[Use tmux for output. Start a tmux session and run each job in a ".
1790 "window in that session. No other output will be produced]"
1791 => \$opt::tmux),
1792 ("tmux-pane|tmuxpane".
1793 "[Use tmux for output but put output into panes in the first ".
1794 "window. Useful if you want to monitor the progress of less than ".
1795 "100 concurrent jobs]"
1796 => \$opt::tmuxpane),
1797 "null|0[Use NUL as delimiter]" => \$opt::null,
1798 "quote|q[Quote command]" => \$opt::quote,
1799 # Replacement strings
1800 ("parens=s[Use parensstring instead of {==}]:parensstring"
1801 => \$opt::parens),
1802 ('rpl=s[Define replacement string]:"tag perl expression"'
1803 => \@opt::rpl),
1804 "plus[Add more replacement strings]" => \$opt::plus,
1805 ("I=s".
1806 "[Use the replacement string replace-str instead of {}]:replace-str"
1807 => \$opt::I),
1808 ("extensionreplace|er=s".
1809 "[Use the replacement string replace-str instead of {.} for input ".
1810 "line without extension]:replace-str"
1811 => \$opt::U),
1812 "U=s" => \$opt::retired,
1813 ("basenamereplace|bnr=s".
1814 "[Use the replacement string replace-str instead of {/} for ".
1815 "basename of input line]:replace-str"
1816 => \$opt::basenamereplace),
1817 ("dirnamereplace|dnr=s".
1818 "[Use the replacement string replace-str instead of {//} for ".
1819 "dirname of input line]:replace-str"
1820 => \$opt::dirnamereplace),
1821 ("basenameextensionreplace|bner=s".
1822 "[Use the replacement string replace-str instead of {/.} for ".
1823 "basename of input line without extension]:replace-str"
1824 => \$opt::basenameextensionreplace),
1825 ("seqreplace=s".
1826 "[Use the replacement string replace-str instead of {#} for job ".
1827 "sequence number]:replace-str"
1828 => \$opt::seqreplace),
1829 ("slotreplace=s".
1830 "[Use the replacement string replace-str instead of {%} for job ".
1831 "slot number]:replace-str"
1832 => \$opt::slotreplace),
1833 ("delay=s".
1834 "[Delay starting next job by duration]:duration" => \$opt::delay),
1835 ("ssh-delay|sshdelay=f".
1836 "[Delay starting next ssh by duration]:duration"
1837 => \$opt::sshdelay),
1838 ("load=s".
1839 "[Only start jobs if load is less than max-load]:max-load"
1840 => \$opt::load),
1841 "noswap[Do not start job is computer is swapping]" => \$opt::noswap,
1842 ("max-line-length-allowed|maxlinelengthallowed".
1843 "[Print maximal command line length]"
1844 => \$opt::max_line_length_allowed),
1845 ("number-of-cpus|numberofcpus".
1846 "[Print the number of physical CPU cores and exit (obsolete)]"
1847 => \$opt::number_of_cpus),
1848 ("number-of-sockets|numberofsockets".
1849 "[Print the number of CPU sockets and exit]"
1850 => \$opt::number_of_sockets),
1851 ("number-of-cores|numberofcores".
1852 "[Print the number of physical CPU cores and exit]"
1853 => \$opt::number_of_cores),
1854 ("number-of-threads|numberofthreads".
1855 "[Print the number of hyperthreaded CPU cores and exit]"
1856 => \$opt::number_of_threads),
1857 ("use-sockets-instead-of-threads|usesocketsinsteadofthreads".
1858 "[Determine how GNU Parallel counts the number of CPUs]"
1859 => \$opt::use_sockets_instead_of_threads),
1860 ("use-cores-instead-of-threads|usecoresinsteadofthreads".
1861 "[Determine how GNU Parallel counts the number of CPUs]"
1862 => \$opt::use_cores_instead_of_threads),
1863 ("use-cpus-instead-of-cores|usecpusinsteadofcores".
1864 "[Determine how GNU Parallel counts the number of CPUs]"
1865 => \$opt::use_cpus_instead_of_cores),
1866 ("shell-quote|shellquote|shell_quote".
1867 "[Does not run the command but quotes it. Useful for making ".
1868 "quoted composed commands for GNU parallel]"
1869 => \@opt::shellquote),
1870 ('nice=i[Run the command at this niceness]:niceness:($(seq -20 19))'
1871 => \$opt::nice),
1872 "tag[Tag lines with arguments]" => \$opt::tag,
1873 ("tag-string|tagstring=s".
1874 "[Tag lines with a string]:str" => \$opt::tagstring),
1875 "ctag[Color tag]:str" => \$opt::ctag,
1876 "ctag-string|ctagstring=s[Colour tagstring]:str" => \$opt::ctagstring,
1877 "color|colour[Colourize output]" => \$opt::color,
1878 ("color-failed|colour-failed|colorfailed|colourfailed|".
1879 "color-fail|colour-fail|colorfail|colourfail|cf".
1880 "[Colour failed jobs red]"
1881 => \$opt::colorfailed),
1882 ("onall[Run all the jobs on all computers given with --sshlogin]"
1883 => \$opt::onall),
1884 "nonall[--onall with no arguments]" => \$opt::nonall,
1885 ("filter-hosts|filterhosts|filter-host[Remove down hosts]"
1886 => \$opt::filter_hosts),
1887 ('sshlogin|S=s'.
1888 '[Distribute jobs to remote computers]'.
1889 ':[@hostgroups/][ncpus/]sshlogin'.
1890 '[,[@hostgroups/][ncpus/]sshlogin[,...]] or @hostgroup'.
1891 ':_users') => \@opt::sshlogin,
1892 ("sshloginfile|slf=s".
1893 "[File with sshlogins on separate lines. Lines starting with '#' ".
1894 "are ignored.]:filename:_files"
1895 => \@opt::sshloginfile),
1896 ("controlmaster|M".
1897 "[Use ssh's ControlMaster to make ssh connections faster]"
1898 => \$opt::controlmaster),
1899 ("ssh=s".
1900 "[Use this command instead of ssh for remote access]:sshcommand"
1901 => \$opt::ssh),
1902 ("transfer-file|transferfile|transfer-files|transferfiles|tf=s".
1903 "[Transfer filename to remote computers]:filename:_files"
1904 => \@opt::transfer_files),
1905 ("return=s[Transfer files from remote computers]:filename:_files"
1906 => \@opt::return),
1907 ("trc=s[--transfer --return filename --cleanup]:filename:_files"
1908 => \@opt::trc),
1909 "transfer[Transfer files to remote computers]" => \$opt::transfer,
1910 "cleanup[Remove transferred files]" => \$opt::cleanup,
1911 ("basefile|bf=s".
1912 "[Transfer file to each sshlogin before first job is started]".
1913 ":file:_files"
1914 => \@opt::basefile),
1915 ("template|tmpl=s".
1916 "[Replace replacement strings in file and save it in repl]".
1917 ":file=repl:_files"
1918 => \%opt::template),
1919 "B=s" => \$opt::retired,
1920 "ctrl-c|ctrlc" => \$opt::retired,
1921 "no-ctrl-c|no-ctrlc|noctrlc" => \$opt::retired,
1922 ("work-dir|workdir|wd=s".
1923 "[Jobs will be run in the dir mydir. (default: the current dir ".
1924 "for the local machine, the login dir for remote computers)]".
1925 ":mydir:_cd"
1926 => \$opt::workdir),
1927 "W=s" => \$opt::retired,
1928 ("rsync-opts|rsyncopts=s[Options to pass on to rsync]:options"
1929 => \$opt::rsync_opts),
1930 ("tmpdir|tempdir=s[Directory for temporary files]:dirname:_cd"
1931 => \$opt::tmpdir),
1932 ("use-compress-program|compress-program|".
1933 "usecompressprogram|compressprogram=s".
1934 "[Use prg for compressing temporary files]:prg:_commands"
1935 => \$opt::compress_program),
1936 ("use-decompress-program|decompress-program|".
1937 "usedecompressprogram|decompressprogram=s".
1938 "[Use prg for decompressing temporary files]:prg:_commands"
1939 => \$opt::decompress_program),
1940 "compress[Compress temporary files]" => \$opt::compress,
1941 "open-tty|o[Open terminal tty]" => \$opt::open_tty,
1942 "tty[Open terminal tty]" => \$opt::tty,
1943 "T" => \$opt::retired,
1944 "H=i" => \$opt::retired,
1945 ("dry-run|dryrun|dr".
1946 "[Print the job to run on stdout (standard output), but do not ".
1947 "run the job]"
1948 => \$opt::dryrun),
1949 "progress[Show progress of computations]" => \$opt::progress,
1950 ("eta[Show the estimated number of seconds before finishing]"
1951 => \$opt::eta),
1952 "bar[Show progress as a progress bar]" => \$opt::bar,
1953 ("total-jobs|totaljobs|total=s".
1954 "[Set total number of jobs]" => \$opt::totaljobs),
1955 "shuf[Shuffle jobs]" => \$opt::shuf,
1956 ("arg-sep|argsep=s".
1957 "[Use sep-str instead of ::: as separator string]:sep-str"
1958 => \$opt::arg_sep),
1959 ("arg-file-sep|argfilesep=s".
1960 "[Use sep-str instead of :::: as separator string ".
1961 "between command and argument files]:sep-str"
1962 => \$opt::arg_file_sep),
1963 ('trim=s[Trim white space in input]:trim_method:'.
1964 '((n\:"No trim" l\:"Left\ trim" r\:"Right trim" '.
1965 'lr\:"Both trim" rl\:"Both trim"))'
1966 => \$opt::trim),
1967 "env=s[Copy environment variable var]:var:_vars" => \@opt::env,
1968 "recordenv|record-env[Record environment]" => \$opt::record_env,
1969 ('session'.
1970 '[Record names in current environment in $PARALLEL_IGNORED_NAMES '.
1971 'and exit. Only used with env_parallel. '.
1972 'Aliases, functions, and variables with names i]'
1973 => \$opt::session),
1974 ('plain[Ignore --profile, $PARALLEL, and ~/.parallel/config]'
1975 => \$opt::plain),
1976 ("profile|J=s".
1977 "[Use profile profilename for options]:profilename:_files"
1978 => \@opt::profile),
1979 "tollef" => \$opt::tollef,
1980 "gnu[Behave like GNU parallel]" => \$opt::gnu,
1981 "link|xapply[Link input sources]" => \$opt::link,
1982 "linkinputsource|xapplyinputsource=i" => \@opt::linkinputsource,
1983 # Before changing these lines, please read
1984 # https://www.gnu.org/software/parallel/parallel_design.html#citation-notice
1985 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
1986 # You accept to be put in a public hall-of-shame by removing
1987 # these lines
1988 ("bibtex|citation".
1989 "[Print the citation notice and BibTeX entry for GNU parallel, ".
1990 "silence citation notice for all future runs, and exit. ".
1991 "It will not run any commands]"
1992 => \$opt::citation),
1993 "will-cite|willcite|nn|nonotice|no-notice" => \$opt::willcite,
1994 # Termination and retries
1995 ('halt-on-error|haltonerror|halt=s'.
1996 '[When should GNU parallel terminate]'.
1997 ':when:((now\:"kill all running jobs and halt immediately" '.
1998 'soon\:"wait for all running jobs to complete, start no new jobs"))'
1999 => \$opt::halt),
2000 'limit=s[Dynamic job limit]:"command args"' => \$opt::limit,
2001 ("memfree=s".
2002 "[Minimum memory free when starting another job]:size"
2003 => \$opt::memfree),
2004 ("memsuspend=s".
2005 "[Suspend jobs when there is less memory available]:size"
2006 => \$opt::memsuspend),
2007 "retries=s[Try failing jobs n times]:n" => \$opt::retries,
2008 ("timeout=s".
2009 "[Time out for command. If the command runs for longer than ".
2010 "duration seconds it will get killed as per --term-seq]:duration"
2011 => \$opt::timeout),
2012 ("term-seq|termseq=s".
2013 "[Termination sequence]:sequence" => \$opt::termseq),
2014 # xargs-compatibility - implemented, man, testsuite
2015 ("max-procs|maxprocs|P|jobs|j=s".
2016 "[Add N to/Subtract N from/Multiply N% with/ the number of CPU ".
2017 "threads or read parameter from file]:+N/-N/N%/N/procfile:_files"
2018 => \$opt::jobs),
2019 ("delimiter|d=s[Input items are terminated by delim]:delim"
2020 => \$opt::d),
2021 ("max-chars|maxchars|s=s[Limit length of command]:max-chars"
2022 => \$opt::max_chars),
2023 ("arg-file|argfile|a=s".
2024 "[Use input-file as input source]:input-file:_files" => \@opt::a),
2025 "no-run-if-empty|norunifempty|r[Do not run empty input]" => \$opt::r,
2026 ("replace|i:s".
2027 "[This option is deprecated; use -I instead]:replace-str"
2028 => \$opt::i),
2029 "E=s" => \$opt::eof,
2030 ("eof|e:s[Set the end of file string to eof-str]:eof-str"
2031 => \$opt::eof),
2032 ("process-slot-var|processslotvar=s".
2033 "[Set this variable to job slot number]:varname"
2034 => \$opt::process_slot_var),
2035 ("max-args|maxargs|n=s".
2036 "[Use at most max-args arguments per command line]:max-args"
2037 => \$opt::max_args),
2038 ("max-replace-args|maxreplaceargs|N=s".
2039 "[Use at most max-args arguments per command line]:max-args"
2040 => \$opt::max_replace_args),
2041 "col-sep|colsep|C=s[Column separator]:regexp" => \$opt::colsep,
2042 "csv[Treat input as CSV-format]"=> \$opt::csv,
2043 ("help|h[Print a summary of the options to GNU parallel and exit]"
2044 => \$opt::help),
2045 ("L=s[When used with --pipe: Read records of recsize]:recsize"
2046 => \$opt::L),
2047 ("max-lines|maxlines|l:f".
2048 "[When used with --pipe: Read records of recsize lines]:recsize"
2049 => \$opt::max_lines),
2050 "interactive|p[Ask user before running a job]" => \$opt::interactive,
2051 ("verbose|t[Print the job to be run on stderr (standard error)]"
2052 => \$opt::verbose),
2053 ("version|V[Print the version GNU parallel and exit]"
2054 => \$opt::version),
2055 ('min-version|minversion=i'.
2056 '[Print the version GNU parallel and exit]'.
2057 ':version:($(parallel --minversion 0))'
2058 => \$opt::minversion),
2059 ("show-limits|showlimits".
2060 "[Display limits given by the operating system]"
2061 => \$opt::show_limits),
2062 ("exit|x[Exit if the size (see the -s option) is exceeded]"
2063 => \$opt::x),
2064 # Semaphore
2065 "semaphore[Work as a counting semaphore]" => \$opt::semaphore,
2066 ("semaphore-timeout|semaphoretimeout|st=s".
2067 "[If secs > 0: If the semaphore is not released within secs ".
2068 "seconds, take it anyway]:secs"
2069 => \$opt::semaphoretimeout),
2070 ("semaphore-name|semaphorename|id=s".
2071 "[Use name as the name of the semaphore]:name"
2072 => \$opt::semaphorename),
2073 "fg[Run command in foreground]" => \$opt::fg,
2074 "bg[Run command in background]" => \$opt::bg,
2075 "wait[Wait for all commands to complete]" => \$opt::wait,
2076 # Shebang #!/usr/bin/parallel --shebang
2077 ("shebang|hashbang".
2078 "[GNU parallel can be called as a shebang (#!) command as the ".
2079 "first line of a script. The content of the file will be treated ".
2080 "as inputsource]"
2081 => \$opt::shebang),
2082 ("_pipe-means-argfiles[internal]"
2083 => \$opt::_pipe_means_argfiles),
2084 "Y" => \$opt::retired,
2085 ("skip-first-line|skipfirstline".
2086 "[Do not use the first line of input]"
2087 => \$opt::skip_first_line),
2088 "_bug" => \$opt::_bug,
2089 "_unsafe" => \$opt::_unsafe,
2090 # --pipe
2091 ("pipe|spreadstdin".
2092 "[Spread input to jobs on stdin (standard input)]" => \$opt::pipe),
2093 ("round-robin|roundrobin|round".
2094 "[Distribute chunks of standard input in a round robin fashion]"
2095 => \$opt::roundrobin),
2096 "recstart=s" => \$opt::recstart,
2097 ("recend=s".
2098 "[Split record between endstring and startstring]:endstring"
2099 => \$opt::recend),
2100 ("regexp|regex".
2101 "[Interpret --recstart and --recend as regular expressions]"
2102 => \$opt::regexp),
2103 ("remove-rec-sep|removerecsep|rrs".
2104 "[Remove record separator]" => \$opt::remove_rec_sep),
2105 ("output-as-files|outputasfiles|files[Save output to files]"
2106 => \$opt::files),
2107 ("output-as-files0|outputasfiles0|files0".
2108 "[Save output to files separated by NUL]"
2109 => \$opt::files0),
2110 ("block-size|blocksize|block=s".
2111 "[Size of block in bytes to read at a time]:size"
2112 => \$opt::blocksize),
2113 ("block-timeout|blocktimeout|bt=s".
2114 "[Timeout for reading block when using --pipe]:duration"
2115 => \$opt::blocktimeout),
2116 "header=s[Use regexp as header]:regexp" => \$opt::header,
2117 "cat[Create a temporary file with content]" => \$opt::cat,
2118 "fifo[Create a temporary fifo with content]" => \$opt::fifo,
2119 ("pipe-part|pipepart[Pipe parts of a physical file]"
2120 => \$opt::pipepart),
2121 "tee[Pipe all data to all jobs]" => \$opt::tee,
2122 ("shard=s".
2123 "[Use shardexpr as shard key and shard input to the jobs]:shardexpr"
2124 => \$opt::shard),
2125 ("bin=s".
2126 "[Use binexpr as binning key and bin input to the jobs]:binexpr"
2127 => \$opt::bin),
2128 "group-by|groupby=s[Group input by value]:val" => \$opt::groupby,
2130 ("hgrp|hostgrp|hostgroup|hostgroups[Enable hostgroups on arguments]"
2131 => \$opt::hostgroups),
2132 "embed[Embed GNU parallel in a shell script]" => \$opt::embed,
2133 ("filter=s[Only run jobs where filter is true]:filter"
2134 => \@opt::filter),
2135 "combineexec|combine-exec|combineexecutable|combine-executable=s".
2136 "[Embed GNU parallel in a shell script]" => \$opt::combineexec,
2137 ("filter=s[Only run jobs where filter is true]:filter"
2138 => \@opt::filter),
2139 "_parset=s[Generate shell code for parset]" => \$opt::_parset,
2140 ("shell-completion|shellcompletion=s".
2141 "[Generate shell code for shell completion]:shell:(bash zsh)"
2142 => \$opt::shellcompletion),
2143 # Parameter for testing optimal values
2144 "_test=s" => \$opt::_test,
2148 sub get_options_from_array($@) {
2149 # Run GetOptions on @array
2150 # Input:
2151 # $array_ref = ref to @ARGV to parse
2152 # @keep_only = Keep only these options (e.g. --profile)
2153 # Uses:
2154 # @ARGV
2155 # Returns:
2156 # true if parsing worked
2157 # false if parsing failed
2158 # @$array_ref is changed
2159 my ($array_ref, @keep_only) = @_;
2160 if(not @$array_ref) {
2161 # Empty array: No need to look more at that
2162 return 1;
2164 # A bit of shuffling of @ARGV needed as GetOptionsFromArray is not
2165 # supported everywhere
2166 my @save_argv;
2167 my $this_is_ARGV = (\@::ARGV == $array_ref);
2168 if(not $this_is_ARGV) {
2169 @save_argv = @::ARGV;
2170 @::ARGV = @{$array_ref};
2172 # If @keep_only set: Ignore all values except @keep_only
2173 my %options = options_hash();
2174 if(@keep_only) {
2175 my (%keep,@dummy);
2176 @keep{@keep_only} = @keep_only;
2177 for my $k (grep { not $keep{$_} } keys %options) {
2178 # Store the value of the option in @dummy
2179 $options{$k} = \@dummy;
2182 my $retval = GetOptions(%options);
2183 if(not $this_is_ARGV) {
2184 @{$array_ref} = @::ARGV;
2185 @::ARGV = @save_argv;
2187 return $retval;
2190 sub parse_parset() {
2191 $Global::progname = "parset";
2192 @Global::parset_vars = split /[ ,]/, $opt::_parset;
2193 my $var_or_assoc = shift @Global::parset_vars;
2194 # Legal names: var _v2ar arrayentry[2]
2195 my @illegal = (grep { not /^[a-zA-Z_][a-zA-Z_0-9]*(\[\d+\])?$/ }
2196 @Global::parset_vars);
2197 if(@illegal) {
2198 ::error
2199 ("@illegal is an invalid variable name.",
2200 "Variable names must be letter followed by letters or digits.",
2201 "Usage:",
2202 " parset varname GNU Parallel options and command");
2203 wait_and_exit(255);
2205 if($var_or_assoc eq "assoc") {
2206 my $var = shift @Global::parset_vars;
2207 print "$var=(";
2208 $Global::parset = "assoc";
2209 $Global::parset_endstring=")\n";
2210 } elsif($var_or_assoc eq "var") {
2211 if($#Global::parset_vars > 0) {
2212 $Global::parset = "var";
2213 } else {
2214 my $var = shift @Global::parset_vars;
2215 print "$var=(";
2216 $Global::parset = "array";
2217 $Global::parset_endstring=")\n";
2219 } else {
2220 ::die_bug("parset: unknown '$opt::_parset'");
2224 sub parse_options(@) {
2225 # Returns: N/A
2226 init_globals();
2227 my @argv_before = @ARGV;
2228 @ARGV = read_options();
2229 # Before changing these line, please read
2230 # https://www.gnu.org/software/parallel/parallel_design.html#citation-notice
2231 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
2232 # You accept to be added to a public hall-of-shame by removing the lines
2233 if(defined $opt::citation) {
2234 citation(\@argv_before,\@ARGV);
2235 wait_and_exit(0);
2237 # no-* overrides *
2238 if($opt::nokeeporder) { $opt::keeporder = undef; }
2240 if(@opt::v) { $Global::verbose = $#opt::v+1; } # Convert -v -v to v=2
2241 if($opt::_bug) { ::die_bug("test-bug"); }
2242 $Global::debug = $opt::D;
2244 ## Shell
2246 $Global::shell = $ENV{'PARALLEL_SHELL'} || parent_shell($$)
2247 || $ENV{'SHELL'} || "/bin/sh";
2248 if(not -x $Global::shell and not which($Global::shell)) {
2249 ::error("Shell '$Global::shell' not found.");
2250 wait_and_exit(255);
2252 ::debug("init","Global::shell $Global::shell\n");
2253 $Global::cshell = $Global::shell =~ m:(/[-a-z]*)?csh:;
2254 $Global::fish = $Global::shell =~ m:(/[-a-z]*)?fish:;
2255 if(defined $opt::_parset) { parse_parset(); }
2256 if(defined $opt::X) { $Global::ContextReplace = 1; }
2257 if(defined $opt::silent) { $Global::verbose = 0; }
2258 if(defined $opt::null) { $/ = "\0"; }
2259 if(defined $opt::files) { $Global::files = 1; $Global::files_sep = "\n"; }
2260 if(defined $opt::files0) { $Global::files = 1; $Global::files_sep = "\0"; }
2261 if(defined $opt::d) { $/ = unquote_printf($opt::d) }
2262 parse_replacement_string_options();
2263 $opt::tag ||= $opt::ctag;
2264 $opt::tagstring ||= $opt::ctagstring;
2265 if(defined $opt::ctag or defined $opt::ctagstring
2266 or defined $opt::color) {
2267 $Global::color = 1;
2269 if($opt::linebuffer or $opt::latestline) {
2270 $Global::linebuffer = 1;
2271 Job::latestline_init();
2273 if(defined $opt::tag and not defined $opt::tagstring) {
2274 # Default = {}
2275 $opt::tagstring = $Global::parensleft.$Global::parensright;
2277 if(defined $opt::tagstring) {
2278 $opt::tagstring = unquote_printf($opt::tagstring);
2279 if($opt::tagstring =~
2280 /\Q$Global::parensleft\E.*\S+.*\Q$Global::parensright\E/
2282 $Global::linebuffer) {
2283 # --tagstring contains {= ... =} and --linebuffer =>
2284 # recompute replacement string for each use (do not cache)
2285 $Global::cache_replacement_eval = 0;
2288 if(defined $opt::interactive) { $Global::interactive = $opt::interactive; }
2289 if(defined $opt::quote) { $Global::quoting = 1; }
2290 if(defined $opt::r) { $Global::ignore_empty = 1; }
2291 if(defined $opt::verbose) { $Global::stderr_verbose = 1; }
2292 if(defined $opt::eof) { $Global::end_of_file_string = $opt::eof; }
2293 if(defined $opt::max_args) {
2294 $opt::max_args = multiply_binary_prefix($opt::max_args);
2295 $Global::max_number_of_args = $opt::max_args;
2296 if($opt::pipepart and $opt::groupby) { $Global::max_number_of_args = 1; }
2298 if(defined $opt::blocktimeout) {
2299 $Global::blocktimeout = int(multiply_time_units($opt::blocktimeout));
2300 if($Global::blocktimeout < 1) {
2301 ::error("--block-timeout must be at least 1");
2302 wait_and_exit(255);
2305 if(defined $opt::timeout) {
2306 $Global::timeoutq = TimeoutQueue->new($opt::timeout);
2308 if(defined $opt::tmpdir) { $ENV{'TMPDIR'} = $opt::tmpdir; }
2309 $ENV{'PARALLEL_RSYNC_OPTS'} = $opt::rsync_opts ||
2310 $ENV{'PARALLEL_RSYNC_OPTS'} || '-rlDzR';
2311 # Default: Same nice level as GNU Parallel is started at
2312 $opt::nice ||= eval { getpriority(0,0) } || 0;
2313 if(defined $opt::help) { usage(); exit(0); }
2314 if(defined $opt::shellcompletion) { shell_completion(); exit(0); }
2315 if(defined $opt::embed) { embed(); exit(0); }
2316 if(defined $opt::sqlandworker) {
2317 $opt::sqlmaster = $opt::sqlworker = $opt::sqlandworker;
2319 if(defined $opt::tmuxpane) { $opt::tmux = $opt::tmuxpane; }
2320 if(defined $opt::colsep) { $Global::trim = 'lr'; }
2321 if(defined $opt::csv) {
2322 if(not $Global::use{"Text::CSV"} ||= eval "use Text::CSV; 1;") {
2323 ::error("The perl module Text::CSV is not installed.");
2324 ::error("Try installing libtext-csv-perl or perl-Text-CSV.");
2325 wait_and_exit(255);
2327 $opt::colsep = defined $opt::colsep ? $opt::colsep : ",";
2328 my $csv_setting = { binary => 1, sep_char => $opt::colsep };
2329 my $sep = $csv_setting->{sep_char};
2330 $Global::csv = Text::CSV->new($csv_setting)
2331 or die "Cannot use CSV: ".Text::CSV->error_diag ();
2333 if(defined $opt::header) {
2334 $opt::colsep = defined $opt::colsep ? $opt::colsep : "\t";
2336 if(defined $opt::trim) { $Global::trim = $opt::trim; }
2337 if(defined $opt::arg_sep) { $Global::arg_sep = $opt::arg_sep; }
2338 if(defined $opt::arg_file_sep) {
2339 $Global::arg_file_sep = $opt::arg_file_sep;
2341 if(not defined $opt::process_slot_var) {
2342 $opt::process_slot_var = 'PARALLEL_JOBSLOT0';
2344 if(defined $opt::number_of_sockets) {
2345 print SSHLogin::no_of_sockets(),"\n"; wait_and_exit(0);
2347 if(defined $opt::number_of_cpus) {
2348 print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0);
2350 if(defined $opt::number_of_cores) {
2351 print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0);
2353 if(defined $opt::number_of_threads) {
2354 print SSHLogin::no_of_threads(),"\n"; wait_and_exit(0);
2356 if(defined $opt::max_line_length_allowed) {
2357 print Limits::Command::real_max_length(),"\n"; wait_and_exit(0);
2359 if(defined $opt::max_chars) {
2360 $opt::max_chars = multiply_binary_prefix($opt::max_chars);
2362 if(defined $opt::version) { version(); wait_and_exit(0); }
2363 if(defined $opt::record_env) { record_env(); wait_and_exit(0); }
2364 if(@opt::sshlogin) { @Global::sshlogin = @opt::sshlogin; }
2365 if(@opt::sshloginfile) { read_sshloginfiles(@opt::sshloginfile); }
2366 if(@opt::return) { push @Global::ret_files, @opt::return; }
2367 if($opt::transfer) {
2368 push @Global::transfer_files, $opt::i || $opt::I || "{}";
2370 push @Global::transfer_files, @opt::transfer_files;
2371 if(%opt::template) {
2372 while (my ($source, $template_name) = each %opt::template) {
2373 push @Global::template_names, $template_name;
2374 push @Global::template_contents, slurp_or_exit($source);
2377 if(not defined $opt::recstart and
2378 not defined $opt::recend) { $opt::recend = "\n"; }
2379 $Global::blocksize = multiply_binary_prefix($opt::blocksize || "1M");
2380 if($Global::blocksize > 2**31-1 and not $opt::pipepart) {
2381 warning("--blocksize >= 2G causes problems. Using 2G-1.");
2382 $Global::blocksize = 2**31-1;
2384 if($^O eq "cygwin" and
2385 ($opt::pipe or $opt::pipepart or $opt::roundrobin)
2386 and $Global::blocksize > 65535) {
2387 warning("--blocksize >= 64K causes problems on Cygwin.");
2389 $opt::memfree = multiply_binary_prefix($opt::memfree);
2390 $opt::memsuspend = multiply_binary_prefix($opt::memsuspend);
2391 $Global::memlimit = $opt::memsuspend + $opt::memfree;
2392 check_invalid_option_combinations();
2393 if((defined $opt::fifo or defined $opt::cat) and not $opt::pipepart) {
2394 $opt::pipe = 1;
2396 if(defined $opt::minversion) {
2397 print $Global::version,"\n";
2398 if($Global::version < $opt::minversion) {
2399 wait_and_exit(255);
2400 } else {
2401 wait_and_exit(0);
2404 if(not defined $opt::delay) {
2405 # Set --delay to --sshdelay if not set
2406 $opt::delay = $opt::sshdelay;
2408 $Global::sshdelayauto = $opt::sshdelay =~ s/auto$//;
2409 $opt::sshdelay = multiply_time_units($opt::sshdelay);
2410 $Global::delayauto = $opt::delay =~ s/auto$//;
2411 $opt::delay = multiply_time_units($opt::delay);
2412 if($opt::compress_program) {
2413 $opt::compress = 1;
2414 $opt::decompress_program ||= $opt::compress_program." -dc";
2417 if(defined $opt::results) {
2418 # Is the output a dir or CSV-file?
2419 if($opt::results =~ /\.csv$/i) {
2420 # CSV with , as separator
2421 $Global::csvsep = ",";
2422 $Global::membuffer ||= 1;
2423 } elsif($opt::results =~ /\.tsv$/i) {
2424 # CSV with TAB as separator
2425 $Global::csvsep = "\t";
2426 $Global::membuffer ||= 1;
2427 } elsif($opt::results =~ /\.json$/i) {
2428 # JSON output
2429 $Global::jsonout ||= 1;
2430 $Global::membuffer ||= 1;
2433 if($opt::compress) {
2434 my ($compress, $decompress) = find_compression_program();
2435 $opt::compress_program ||= $compress;
2436 $opt::decompress_program ||= $decompress;
2437 if(($opt::results and not $Global::csvsep) or $Global::files) {
2438 # No need for decompressing
2439 $opt::decompress_program = "cat >/dev/null";
2442 if(defined $opt::dryrun) {
2443 # Force grouping due to bug #51039: --dry-run --timeout 3600 -u breaks
2444 $opt::ungroup = 0;
2445 $opt::group = 1;
2447 if(defined $opt::nonall) {
2448 # Append a dummy empty argument if there are no arguments
2449 # on the command line to avoid reading from STDIN.
2450 # arg_sep = random 50 char
2451 # \0noarg => nothing (not the empty string)
2452 $Global::arg_sep = join "",
2453 map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..50);
2454 push @ARGV, $Global::arg_sep, "\0noarg";
2456 if(defined $opt::tee) {
2457 if(not defined $opt::jobs) {
2458 $opt::jobs = 0;
2461 if(defined $opt::tty) {
2462 # Defaults for --tty: -j1 -u
2463 # Can be overridden with -jXXX -g
2464 if(not defined $opt::jobs) {
2465 $opt::jobs = 1;
2467 if(not defined $opt::group) {
2468 $opt::ungroup = 1;
2471 if(@opt::trc) {
2472 push @Global::ret_files, @opt::trc;
2473 if(not @Global::transfer_files) {
2474 # Defaults to --transferfile {}
2475 push @Global::transfer_files, $opt::i || $opt::I || "{}";
2477 $opt::cleanup = 1;
2479 if(defined $opt::max_lines) {
2480 if($opt::max_lines eq "-0") {
2481 # -l -0 (swallowed -0)
2482 $opt::max_lines = 1;
2483 $opt::null = 1;
2484 $/ = "\0";
2485 } else {
2486 $opt::max_lines = multiply_binary_prefix($opt::max_lines);
2487 if ($opt::max_lines == 0) {
2488 # If not given (or if 0 is given) => 1
2489 $opt::max_lines = 1;
2493 $Global::max_lines = $opt::max_lines;
2494 if(not $opt::pipe) {
2495 # --pipe -L means length of record - not max_number_of_args
2496 $Global::max_number_of_args ||= $Global::max_lines;
2500 # Read more than one arg at a time (-L, -N)
2501 if(defined $opt::L) {
2502 $opt::L = multiply_binary_prefix($opt::L);
2503 $Global::max_lines = $opt::L;
2504 if(not $opt::pipe) {
2505 # --pipe -L means length of record - not max_number_of_args
2506 $Global::max_number_of_args ||= $Global::max_lines;
2509 if(defined $opt::max_replace_args) {
2510 $opt::max_replace_args =
2511 multiply_binary_prefix($opt::max_replace_args);
2512 $Global::max_number_of_args = $opt::max_replace_args;
2513 $Global::ContextReplace = 1;
2515 if((defined $opt::L or defined $opt::max_replace_args)
2517 not ($opt::xargs or $opt::m)) {
2518 $Global::ContextReplace = 1;
2520 # Deal with ::: :::+ :::: ::::+ and -a +file
2521 my @ARGV_with_argsep = @ARGV;
2522 @ARGV = read_args_from_command_line();
2523 if(defined $opt::combineexec) {
2524 pack_combined_executable(\@argv_before,\@ARGV_with_argsep,\@ARGV);
2525 exit(0);
2527 parse_semaphore();
2529 if(defined $opt::eta) { $opt::progress = $opt::eta; }
2530 if(defined $opt::bar) { $opt::progress = $opt::bar; }
2531 if(defined $opt::bar or defined $opt::latestline) {
2532 my $fh = $Global::status_fd || *STDERR;
2533 # Activate decode_utf8
2534 eval q{
2535 # Enable utf8 if possible
2536 use utf8;
2537 binmode $fh, "encoding(utf8)";
2538 *decode_utf8 = \&Encode::decode_utf8;
2540 if(eval { decode_utf8("x") }) {
2541 # Great: decode works
2542 } else {
2543 # UTF8-decode not supported: Dummy decode
2544 eval q{sub decode_utf8($;$) { $_[0]; }};
2546 # Activate decode_utf8
2547 eval q{
2548 # Enable utf8 if possible
2549 use utf8;
2550 use Encode qw( encode_utf8 );
2551 use Text::CharWidth qw( mbswidth );
2552 use Unicode::Normalize qw( NFC NFD );
2554 if(eval { mbswidth("ヌー平行") }) {
2555 # Great: mbswidth works
2556 } else {
2557 # mbswidth not supported: Dummy mbswidth
2558 eval q{ sub mbswidth { return length @_; } };
2562 # If you want GNU Parallel to be maintained in the future you
2563 # should keep this.
2564 # *YOU* will be harming free software by removing the notice.
2566 # Funding a free software project is hard. GNU Parallel is no
2567 # exception. On top of that it seems the less visible a project
2568 # is, the harder it is to get funding. And the nature of GNU
2569 # Parallel is that it will never be seen by "the guy with the
2570 # checkbook", but only by the people doing the actual work.
2572 # This problem has been covered by others - though no solution has
2573 # been found:
2574 # https://www.slideshare.net/NadiaEghbal/consider-the-maintainer
2575 # https://www.numfocus.org/blog/why-is-numpy-only-now-getting-funded/
2577 # The FAQ tells you why the citation notice exists:
2578 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
2580 # If you want GNU Parallel to be maintained in the future, and not
2581 # just wither away like so many other free software tools, you
2582 # need to help finance the development.
2584 # The citation notice is a simple way of doing so, as citations
2585 # makes it possible to me to get a job where I can maintain GNU
2586 # Parallel as part of the job.
2588 # This means you can help financing development
2590 # WITHOUT PAYING A SINGLE CENT!
2592 # Before implementing the citation notice it was discussed with
2593 # the users:
2594 # https://lists.gnu.org/archive/html/parallel/2013-11/msg00006.html
2596 # Having to spend 10 seconds on running 'parallel --citation' once
2597 # is no doubt not an ideal solution, but no one has so far come up
2598 # with an ideal solution - neither for funding GNU Parallel nor
2599 # other free software.
2601 # If you believe you have the perfect solution, you should try it
2602 # out, and if it works, you should post it on the email
2603 # list. Ideas that will cost work and which have not been tested
2604 # are, however, unlikely to be prioritized.
2606 # Please note that GPL version 3 gives you the right to fork GNU
2607 # Parallel under a new name, but it does not give you the right to
2608 # distribute modified copies with the citation notice disabled in
2609 # a way where the software can be confused with GNU Parallel. To
2610 # do that you need to be the owner of the GNU Parallel
2611 # trademark. The xt:Commerce case shows this.
2613 # Description of the xt:Commerce case in OLG Duesseldorf
2614 # https://web.archive.org/web/20180715073746/http://www.inta.org/INTABulletin/Pages/GERMANYGeneralPublicLicenseDoesNotPermitUseofThird-PartyTrademarksforAdvertisingModifiedVersionsofOpen-SourceSoftware.aspx
2616 # The verdict in German
2617 # https://www.admody.com/urteilsdatenbank/cafe6fdaeed3/OLG-Duesseldorf_Urteil_vom_28-September-2010_Az_I-20-U-41-09
2618 # https://web.archive.org/web/20180715073717/https://www.admody.com/urteilsdatenbank/cafe6fdaeed3/OLG-Duesseldorf_Urteil_vom_28-September-2010_Az_I-20-U-41-09
2620 # Other free software limiting derivates by the same name:
2621 # https://en.wikipedia.org/wiki/Red_Hat_Enterprise_Linux_derivatives#Legal_aspects
2622 # https://tm.joomla.org/trademark-faq.html
2623 # https://www.mozilla.org/en-US/foundation/trademarks/faq/
2625 # Running 'parallel --citation' one single time takes less than 10
2626 # seconds, and will silence the citation notice for future
2627 # runs. If that is too much trouble for you, why not use one of
2628 # the alternatives instead?
2629 # See a list in: 'man parallel_alternatives'
2631 # If you want GNU Parallel to be maintained in the future, you
2632 # should keep this line:
2633 citation_notice();
2634 # This is because _YOU_ actively make it harder to justify
2635 # spending time developing GNU Parallel by removing it.
2637 # If you disagree, please read (especially 77-):
2638 # https://www.fordfoundation.org/media/2976/roads-and-bridges-the-unseen-labor-behind-our-digital-infrastructure.pdf
2640 # *YOU* will be harming free software by removing the notice. You
2641 # accept to be added to a public hall of shame by removing the
2642 # line. That includes you, George and Andreas.
2644 parse_halt();
2646 if($ENV{'PARALLEL_ENV'}) {
2647 # Read environment and set $Global::parallel_env
2648 # Must be done before is_acceptable_command_line_length()
2649 my $penv = $ENV{'PARALLEL_ENV'};
2650 # unset $PARALLEL_ENV: It should not be given to children
2651 # because it takes up a lot of env space
2652 delete $ENV{'PARALLEL_ENV'};
2653 if(-e $penv) {
2654 # This is a file/fifo: Replace envvar with content of file
2655 $penv = slurp_or_exit($penv);
2657 # Map \001 to \n to make it easer to quote \n in $PARALLEL_ENV
2658 $penv =~ s/\001/\n/g;
2659 if($penv =~ /\0/) {
2660 ::warning('\0 (NUL) in environment is not supported');
2662 $Global::parallel_env = $penv;
2665 parse_sshlogin();
2666 if(defined $opt::show_limits) { show_limits(); }
2668 if(remote_hosts() and
2669 (defined $opt::X or defined $opt::m or defined $opt::xargs)) {
2670 # As we do not know the max line length on the remote machine
2671 # long commands generated by xargs may fail
2672 # If $opt::max_replace_args is set, it is probably safe
2673 ::warning("Using -X or -m with --sshlogin may fail.");
2676 if(not defined $opt::jobs) { $opt::jobs = "100%"; }
2677 open_joblog();
2678 open_json_csv();
2679 if(defined $opt::sqlmaster or defined $opt::sqlworker) {
2680 $Global::sql = SQL->new($opt::sqlmaster || $opt::sqlworker);
2682 if(defined $opt::sqlworker) { $Global::membuffer ||= 1; }
2683 # The sqlmaster groups the arguments, so the should just read one
2684 if(defined $opt::sqlworker and not defined $opt::sqlmaster) {
2685 $Global::max_number_of_args = 1;
2687 if(defined $Global::color or defined $opt::colorfailed) {
2688 Job::init_color();
2692 sub check_invalid_option_combinations() {
2693 if(defined $opt::timeout and
2694 $opt::timeout !~ /^\d+(\.\d+)?%?$|^(\d+(\.\d+)?[dhms])+$/i) {
2695 ::error("--timeout must be seconds or percentage.");
2696 wait_and_exit(255);
2698 if(defined $opt::fifo and defined $opt::cat) {
2699 ::error("--fifo cannot be combined with --cat.");
2700 ::wait_and_exit(255);
2702 if(defined $opt::retries and defined $opt::roundrobin) {
2703 ::error("--retries cannot be combined with --roundrobin.");
2704 ::wait_and_exit(255);
2706 if(defined $opt::pipepart and
2707 (defined $opt::L or defined $opt::max_lines
2708 or defined $opt::max_replace_args)) {
2709 ::error("--pipepart is incompatible with --max-replace-args, ".
2710 "--max-lines, and -L.");
2711 wait_and_exit(255);
2713 if(defined $opt::group and defined $opt::ungroup) {
2714 ::error("--group cannot be combined with --ungroup.");
2715 ::wait_and_exit(255);
2717 if(defined $opt::group and defined $opt::linebuffer) {
2718 ::error("--group cannot be combined with --line-buffer.");
2719 ::wait_and_exit(255);
2721 if(defined $opt::ungroup and defined $opt::linebuffer) {
2722 ::error("--ungroup cannot be combined with --line-buffer.");
2723 ::wait_and_exit(255);
2725 if(defined $opt::tollef and not defined $opt::gnu) {
2726 ::error("--tollef has been retired.",
2727 "Remove --tollef or use --gnu to override --tollef.");
2728 ::wait_and_exit(255);
2730 if(defined $opt::retired) {
2731 ::error("-g has been retired. Use --group.",
2732 "-B has been retired. Use --bf.",
2733 "-T has been retired. Use --tty.",
2734 "-U has been retired. Use --er.",
2735 "-W has been retired. Use --wd.",
2736 "-Y has been retired. Use --shebang.",
2737 "-H has been retired. Use --halt.",
2738 "--sql has been retired. Use --sqlmaster.",
2739 "--ctrlc has been retired.",
2740 "--noctrlc has been retired.");
2741 ::wait_and_exit(255);
2743 if(defined $opt::groupby) {
2744 if(not defined $opt::pipe and not defined $opt::pipepart) {
2745 $opt::pipe = 1;
2747 if(defined $opt::remove_rec_sep) {
2748 ::error("--remove-rec-sep is not compatible with --groupby");
2749 ::wait_and_exit(255);
2751 if(defined $opt::recstart) {
2752 ::error("--recstart is not compatible with --groupby");
2753 ::wait_and_exit(255);
2755 if($opt::recend ne "\n") {
2756 ::error("--recend is not compatible with --groupby");
2757 ::wait_and_exit(255);
2760 sub unsafe_warn {
2761 # use --_unsafe to only generate a warning
2762 if($opt::_unsafe) { ::warning(@_); } else { ::error(@_); exit(255); }
2764 if(defined $opt::results) {
2765 if($opt::nonall or $opt::onall) {
2766 unsafe_warn("--(n)onall + --results not supported (yet).");
2769 sub test_safe_chars {
2770 my $var = shift;
2771 if($ENV{$var} =~ m{^[-a-z0-9_+,.%:/= ]*$}i) {
2772 # OK
2773 } else {
2774 unsafe_warn("\$$var can only contain [-a-z0-9_+,.%:/= ].");
2777 if($ENV{'TMPDIR'} =~ /\n/) {
2778 if(defined $opt::files) {
2779 ::warning("Use --files0 when \$TMPDIR contains newline.");
2780 } elsif($Global::cshell
2782 (defined $opt::cat or defined $opt::fifo)) {
2783 ::warning("--cat/--fifo fails under csh ".
2784 "if \$TMPDIR contains newline.");
2786 } elsif($ENV{'TMPDIR'} =~ /\257/) {
2787 unsafe_warn("\$TMPDIR with \\257 (\257) is not supported.");
2788 } else{
2789 test_safe_chars('TMPDIR');
2791 map { test_safe_chars($_); } qw(PARALLEL_HOME XDG_CONFIG_DIRS
2792 PARALLEL_REMOTE_TMPDIR XDG_CACHE_HOME);
2795 sub init_globals() {
2796 # Defaults:
2797 $Global::version = 20240522;
2798 $Global::progname = 'parallel';
2799 $::name = "GNU Parallel";
2800 $Global::infinity = 2**31;
2801 $Global::debug = 0;
2802 $Global::verbose = 0;
2803 # Don't quote every part of the command line
2804 $Global::quoting = 0;
2805 # Quote replacement strings
2806 $Global::quote_replace = 1;
2807 $Global::total_completed = 0;
2808 $Global::cache_replacement_eval = 1;
2809 # Read only table with default --rpl values
2810 %Global::replace =
2812 '{}' => '',
2813 '{#}' => '1 $_=$job->seq()',
2814 '{%}' => '1 $_=$job->slot()',
2815 '{/}' => 's:.*/::',
2816 '{//}' =>
2817 ('$Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; '.
2818 '$_ = dirname($_);'),
2819 '{/.}' => 's:.*/::; s:\.[^/.]*$::;',
2820 '{.}' => 's:\.[^/.]*$::',
2822 %Global::plus =
2824 # {} = {+/}/{/}
2825 # = {.}.{+.} = {+/}/{/.}.{+.}
2826 # = {..}.{+..} = {+/}/{/..}.{+..}
2827 # = {...}.{+...} = {+/}/{/...}.{+...}
2828 '{+/}' => 's:/[^/]*$:: || s:.*$::',
2829 # a.b => b; a => ''
2830 '{+.}' => 's:.*\.:: || s:.*$::',
2831 # a.b.c => b.c; a.b => ''; a => ''
2832 '{+..}' => 's:.*\.([^/.]*\.[^/.]*)$:$1: || s:.*$::',
2833 '{+...}' => 's:.*\.([^/.]*\.[^/.]*\.[^/.]*)$:$1: || s:.*$::',
2834 '{..}' => 's:\.[^/.]*\.[^/.]*$::',
2835 '{...}' => 's:\.[^/.]*\.[^/.]*\.[^/.]*$::',
2836 '{/..}' => 's:.*/::; s:\.[^/.]*\.[^/.]*$::',
2837 '{/...}' => 's:.*/::; s:\.[^/.]*\.[^/.]*\.[^/.]*$::',
2838 # n choose k = Binomial coefficient
2839 '{choose_k}' => ('for $t (2..$#arg)'.
2840 '{ if($arg[$t-1] ge $arg[$t]) { skip() } }'),
2841 # unique values: Skip job if any args are the same
2842 '{uniq}' => 'if(::uniq(@arg) != @arg) { skip(); }',
2843 # {##} = number of jobs
2844 '{##}' => '1 $_=total_jobs()',
2845 # {0#} = 0-padded seq
2846 '{0#}' => ('1 $f=1+int((log(total_jobs())/log(10)));'.
2847 '$_=sprintf("%0${f}d",seq())'),
2848 # {0%} = 0-padded jobslot
2849 '{0%}' => ('1 $f=1+int((log($Global::max_jobs_running||1)/log(10)));'.
2850 '$_=sprintf("%0${f}d",slot())'),
2851 # {seq-1} = seq-1 = counting from 0
2852 '{seq(.*?)}' => '$_=eval q{$job->seq()}.qq{$$1}',
2853 # {seq-1} = jobslot-1 = counting from 0
2854 '{slot(.*?)}' => '$_=eval q{$job->slot()}.qq{$$1}',
2856 ## Bash inspired replacement strings
2857 # Bash ${a:-myval}
2858 '{:-([^}]+?)}' => '$_ ||= $$1',
2859 # Bash ${a:2}
2860 '{:(\d+?)}' => 'substr($_,0,$$1) = ""',
2861 # Bash ${a:2:3}
2862 '{:(\d+?):(\d+?)}' => '$_ = substr($_,$$1,$$2);',
2863 # echo {#z.*z.} ::: z.z.z.foo => z.foo
2864 # echo {##z.*z.} ::: z.z.z.foo => foo
2865 # Bash ${a#bc}
2866 '{#([^#}][^}]*?)}' =>
2867 '$nongreedy=::make_regexp_ungreedy($$1);s/^$nongreedy(.*)/$1/;',
2868 # Bash ${a##bc}
2869 '{##([^#}][^}]*?)}' => 's/^$$1//;',
2870 # echo {%.z.*z} ::: foo.z.z.z => foo.z
2871 # echo {%%.z.*z} ::: foo.z.z.z => foo
2872 # Bash ${a%def}
2873 '{%([^}]+?)}' =>
2874 '$nongreedy=::make_regexp_ungreedy($$1);s/(.*)$nongreedy$/$1/;',
2875 # Bash ${a%%def}
2876 '{%%([^}]+?)}' => 's/$$1$//;',
2877 # Bash ${a/def/ghi} ${a/def/}
2878 '{/([^#%}/]+?)/([^}]*?)}' => 's/$$1/$$2/;',
2879 # Bash ${a/#def/ghi} ${a/#def/}
2880 '{/#([^}]+?)/([^}]*?)}' => 's/^$$1/$$2/g;',
2881 # Bash ${a/%def/ghi} ${a/%def/}
2882 '{/%([^}]+?)/([^}]*?)}' => 's/$$1$/$$2/g;',
2883 # Bash ${a//def/ghi} ${a//def/}
2884 '{//([^}]+?)/([^}]*?)}' => 's/$$1/$$2/g;',
2885 # Bash ${a^a}
2886 '{^([^}]+?)}' => 's/^($$1)/uc($1)/e;',
2887 # Bash ${a^^a}
2888 '{^^([^}]+?)}' => 's/($$1)/uc($1)/eg;',
2889 # Bash ${a,A}
2890 '{,([^}]+?)}' => 's/^($$1)/lc($1)/e;',
2891 # Bash ${a,,A}
2892 '{,,([^}]+?)}' => 's/($$1)/lc($1)/eg;',
2894 # {slot} = $PARALLEL_JOBSLOT
2895 '{slot}' => '1 $_="\${PARALLEL_JOBSLOT}";uq()',
2896 # {host} = ssh host
2897 '{host}' => '1 $_="\${PARALLEL_SSHHOST}";uq()',
2898 # {sshlogin} = sshlogin
2899 '{sshlogin}' => '1 $_="\${PARALLEL_SSHLOGIN}";uq()',
2900 # {hgrp} = hostgroups of the host
2901 '{hgrp}' => '1 $_="\${PARALLEL_HOSTGROUPS}";uq()',
2902 # {agrp} = hostgroups of the argument
2903 '{agrp}' => '1 $_="\${PARALLEL_ARGHOSTGROUPS}";uq()',
2905 # Modifiable copy of %Global::replace
2906 %Global::rpl = %Global::replace;
2907 $/ = "\n";
2908 $Global::ignore_empty = 0;
2909 $Global::interactive = 0;
2910 $Global::stderr_verbose = 0;
2911 $Global::default_simultaneous_sshlogins = 9;
2912 $Global::exitstatus = 0;
2913 $Global::arg_sep = ":::";
2914 $Global::arg_file_sep = "::::";
2915 $Global::trim = 'n';
2916 $Global::max_jobs_running = 0;
2917 $Global::job_already_run = '';
2918 $ENV{'TMPDIR'} ||= "/tmp";
2919 $ENV{'PARALLEL_REMOTE_TMPDIR'} ||= "/tmp";
2920 # bug #55398: set $OLDPWD when using --wd
2921 $ENV{'OLDPWD'} = $ENV{'PWD'};
2922 if(not $ENV{HOME}) {
2923 # $ENV{HOME} is sometimes not set if called from PHP
2924 ::warning("\$HOME not set. Using /tmp.");
2925 $ENV{HOME} = "/tmp";
2927 # no warnings to allow for undefined $XDG_*
2928 no warnings 'uninitialized';
2929 # If $PARALLEL_HOME is set, but does not exist, try making it.
2930 if(defined $ENV{'PARALLEL_HOME'}) {
2931 eval { File::Path::mkpath($ENV{'PARALLEL_HOME'}); };
2933 # $xdg_config_home is needed to make env_parallel.fish stop complaining
2934 my $xdg_config_home = $ENV{'XDG_CONFIG_HOME'};
2935 # Use the first config dir that exists from:
2936 # $PARALLEL_HOME
2937 # $XDG_CONFIG_HOME/parallel
2938 # $(each XDG_CONFIG_DIRS)/parallel
2939 # $HOME/.parallel
2941 # Keep only dirs that exist
2942 @Global::config_dirs =
2943 (grep { -d $_ }
2944 $ENV{'PARALLEL_HOME'},
2945 (map { "$_/parallel" }
2946 $xdg_config_home,
2947 split /:/, $ENV{'XDG_CONFIG_DIRS'}),
2948 $ENV{'HOME'} . "/.parallel");
2949 # Use first dir as config dir
2950 $Global::config_dir = $Global::config_dirs[0] ||
2951 $ENV{'HOME'} . "/.parallel";
2952 if($ENV{'PARALLEL_HOME'} =~ /./ and not -d $ENV{'PARALLEL_HOME'}) {
2953 ::warning("\$PARALLEL_HOME ($ENV{'PARALLEL_HOME'}) does not exist.");
2954 ::warning("Using $Global::config_dir");
2956 # Use the first cache dir that exists from:
2957 # $PARALLEL_HOME
2958 # $XDG_CACHE_HOME/parallel
2959 # Keep only dirs that exist
2960 @Global::cache_dirs = (grep { -d $_ }
2961 $ENV{'PARALLEL_HOME'},
2962 $ENV{'XDG_CACHE_HOME'}."/parallel");
2963 $Global::cache_dir = $Global::cache_dirs[0] ||
2964 $ENV{'HOME'} . "/.parallel";
2965 Job::init_color();
2968 sub parse_halt() {
2969 # $opt::halt flavours
2970 # Uses:
2971 # $opt::halt
2972 # $Global::halt_when
2973 # $Global::halt_fail
2974 # $Global::halt_success
2975 # $Global::halt_pct
2976 # $Global::halt_count
2977 if(defined $opt::halt) {
2978 my %halt_expansion = (
2979 "0" => "never",
2980 "1" => "soon,fail=1",
2981 "2" => "now,fail=1",
2982 "-1" => "soon,success=1",
2983 "-2" => "now,success=1",
2985 # Expand -2,-1,0,1,2 into long form
2986 $opt::halt = $halt_expansion{$opt::halt} || $opt::halt;
2987 # --halt 5% == --halt soon,fail=5%
2988 $opt::halt =~ s/^(\d+)%$/soon,fail=$1%/;
2989 # Split: soon,fail=5%
2990 my ($when,$fail_success,$pct_count) = split /[,=]/, $opt::halt;
2991 if(not grep { $when eq $_ } qw(never soon now)) {
2992 ::error("--halt must have 'never', 'soon', or 'now'.");
2993 ::wait_and_exit(255);
2995 $Global::halt_when = $when;
2996 if($when ne "never") {
2997 if($fail_success eq "fail") {
2998 $Global::halt_fail = 1;
2999 } elsif($fail_success eq "success") {
3000 $Global::halt_success = 1;
3001 } elsif($fail_success eq "done") {
3002 $Global::halt_done = 1;
3003 } else {
3004 ::error("--halt $when must be followed by ,success or ,fail.");
3005 ::wait_and_exit(255);
3007 if($pct_count =~ /^(\d+)%$/) {
3008 $Global::halt_pct = $1/100;
3009 } elsif($pct_count =~ /^(\d+)$/) {
3010 $Global::halt_count = $1;
3011 } else {
3012 ::error("--halt $when,$fail_success ".
3013 "must be followed by ,number or ,percent%.");
3014 ::wait_and_exit(255);
3020 sub parse_replacement_string_options() {
3021 # Deal with --rpl
3022 # Uses:
3023 # %Global::rpl
3024 # $Global::parensleft
3025 # $Global::parensright
3026 # $opt::parens
3027 # $Global::parensleft
3028 # $Global::parensright
3029 # $opt::plus
3030 # %Global::plus
3031 # $opt::I
3032 # $opt::U
3033 # $opt::i
3034 # $opt::basenamereplace
3035 # $opt::dirnamereplace
3036 # $opt::seqreplace
3037 # $opt::slotreplace
3038 # $opt::basenameextensionreplace
3040 sub rpl($$) {
3041 # Modify %Global::rpl
3042 # Replace $old with $new
3043 my ($old,$new) = @_;
3044 if($old ne $new) {
3045 $Global::rpl{$new} = $Global::rpl{$old};
3046 delete $Global::rpl{$old};
3049 my $parens = "{==}";
3050 if(defined $opt::parens) { $parens = $opt::parens; }
3051 my $parenslen = 0.5*length $parens;
3052 $Global::parensleft = substr($parens,0,$parenslen);
3053 $Global::parensright = substr($parens,$parenslen);
3054 if(defined $opt::plus) { %Global::rpl = (%Global::plus,%Global::rpl); }
3055 if(defined $opt::I) { rpl('{}',$opt::I); }
3056 if(defined $opt::i and $opt::i) { rpl('{}',$opt::i); }
3057 if(defined $opt::U) { rpl('{.}',$opt::U); }
3058 if(defined $opt::basenamereplace) { rpl('{/}',$opt::basenamereplace); }
3059 if(defined $opt::dirnamereplace) { rpl('{//}',$opt::dirnamereplace); }
3060 if(defined $opt::seqreplace) { rpl('{#}',$opt::seqreplace); }
3061 if(defined $opt::slotreplace) { rpl('{%}',$opt::slotreplace); }
3062 if(defined $opt::basenameextensionreplace) {
3063 rpl('{/.}',$opt::basenameextensionreplace);
3065 for(@opt::rpl) {
3066 # Create $Global::rpl entries for --rpl options
3067 # E.g: "{..} s:\.[^.]+$:;s:\.[^.]+$:;"
3068 my ($shorthand,$long) = split/\s/,$_,2;
3069 $Global::rpl{$shorthand} = $long;
3073 sub parse_semaphore() {
3074 # Semaphore defaults
3075 # Must be done before computing number of processes and max_line_length
3076 # because when running as a semaphore GNU Parallel does not read args
3077 # Uses:
3078 # $opt::semaphore
3079 # $Global::semaphore
3080 # $opt::semaphoretimeout
3081 # $Semaphore::timeout
3082 # $opt::semaphorename
3083 # $Semaphore::name
3084 # $opt::fg
3085 # $Semaphore::fg
3086 # $opt::wait
3087 # $Semaphore::wait
3088 # $opt::bg
3089 # @opt::a
3090 # @Global::unget_argv
3091 # $Global::default_simultaneous_sshlogins
3092 # $opt::jobs
3093 # $Global::interactive
3094 $Global::semaphore ||= ($0 =~ m:(^|/)sem$:); # called as 'sem'
3095 if(defined $opt::semaphore) { $Global::semaphore = 1; }
3096 if(defined $opt::semaphoretimeout) { $Global::semaphore = 1; }
3097 if(defined $opt::semaphorename) { $Global::semaphore = 1; }
3098 if(defined $opt::fg and not $opt::tmux and not $opt::tmuxpane) {
3099 $Global::semaphore = 1;
3101 if(defined $opt::bg) { $Global::semaphore = 1; }
3102 if(defined $opt::wait and not $opt::sqlmaster) {
3103 $Global::semaphore = 1; @ARGV = "true";
3105 if($Global::semaphore) {
3106 if(@opt::a) {
3107 # Assign the first -a to STDIN
3108 open(STDIN,"<",shift @opt::a);
3109 if(@opt::a) {
3110 # We currently have no way of dealing with more -a
3111 ::error("A semaphore cannot take input from more files\n");
3112 ::wait_and_exit(255);
3115 @opt::a = ("/dev/null");
3116 # Append a dummy empty argument
3117 # \0 => nothing (not the empty string)
3118 push(@Global::unget_argv, [Arg->new("\0noarg")]);
3119 $Semaphore::timeout = int(multiply_time_units($opt::semaphoretimeout))
3120 || 0;
3121 if(defined $opt::semaphorename) {
3122 $Semaphore::name = $opt::semaphorename;
3123 } else {
3124 local $/ = "\n";
3125 $Semaphore::name = `tty`;
3126 chomp $Semaphore::name;
3128 $Semaphore::fg = $opt::fg;
3129 $Semaphore::wait = $opt::wait;
3130 $Global::default_simultaneous_sshlogins = 1;
3131 if(not defined $opt::jobs) {
3132 $opt::jobs = 1;
3134 if($Global::interactive and $opt::bg) {
3135 ::error("Jobs running in the ".
3136 "background cannot be interactive.");
3137 ::wait_and_exit(255);
3142 sub record_env() {
3143 # Record current %ENV-keys in $PARALLEL_HOME/ignored_vars
3144 # Returns: N/A
3145 my $ignore_filename = $Global::config_dir . "/ignored_vars";
3146 write_or_exit($ignore_filename,map { $_,"\n" } keys %ENV);
3149 sub open_joblog() {
3150 # Open joblog as specified by --joblog
3151 # Uses:
3152 # $opt::resume
3153 # $opt::resume_failed
3154 # $opt::joblog
3155 # $opt::results
3156 # $Global::job_already_run
3157 # %Global::fh
3158 my $append = 0;
3159 if(($opt::resume or $opt::resume_failed)
3161 not ($opt::joblog or $opt::results)) {
3162 ::error("--resume and --resume-failed require --joblog or --results.");
3163 ::wait_and_exit(255);
3165 if(defined $opt::joblog and $opt::joblog =~ s/^\+//) {
3166 # --joblog +filename = append to filename
3167 $append = 1;
3169 if($opt::joblog
3171 ($opt::sqlmaster
3173 not $opt::sqlworker)) {
3174 # Do not log if --sqlworker
3175 if($opt::resume || $opt::resume_failed || $opt::retry_failed) {
3176 if(open(my $joblog_fh, "<", $opt::joblog)) {
3177 # Enable utf8 if possible
3178 eval q{ binmode $joblog_fh, "encoding(utf8)"; };
3179 # Read the joblog
3180 # Override $/ with \n because -d might be set
3181 local $/ = "\n";
3182 # If there is a header: Open as append later
3183 $append = <$joblog_fh>;
3184 my $joblog_regexp;
3185 if($opt::retry_failed) {
3186 # Make a regexp that matches commands with exit+signal=0
3187 # 4 host 1360490623.067 3.445 1023 1222 0 0 command
3188 $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t';
3189 my @group;
3190 while(<$joblog_fh>) {
3191 if(/$joblog_regexp/o) {
3192 # This is 30% faster than set_job_already_run($1);
3193 vec($Global::job_already_run,($1||0),1) = 1;
3194 $Global::total_completed++;
3195 $group[$1-1] = "true";
3196 } elsif(/(\d+)\s+\S+(\s+[-0-9.]+){6}\s+(.*)$/) {
3197 # Grab out the command
3198 $group[$1-1] = $3;
3199 } else {
3200 chomp;
3201 ::error("Format of '$opt::joblog' is wrong: $_");
3202 ::wait_and_exit(255);
3205 if(@group) {
3206 my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg");
3207 unlink($name);
3208 # Put args into argfile
3209 if(grep /\0/, @group) {
3210 # force --null to deal with \n in commandlines
3211 ::warning("Command lines contain newline. ".
3212 "Forcing --null.");
3213 $opt::null = 1;
3214 $/ = "\0";
3216 # Replace \0 with '\n' as used in print_joblog()
3217 print $outfh (map { s/\0/\n/g; $_,$/ }
3218 map { $_ } @group);
3219 seek $outfh, 0, 0;
3220 exit_if_disk_full();
3221 # Set filehandle to -a
3222 @opt::a = ($outfh);
3224 # Remove $command (so -a is run)
3225 @ARGV = ();
3227 if($opt::resume || $opt::resume_failed) {
3228 if($opt::resume_failed) {
3229 # Make a regexp that matches commands with exit+signal=0
3230 # 4 host 1360490623.067 3.445 1023 1222 0 0 command
3231 $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t';
3232 } else {
3233 # Just match the job number
3234 $joblog_regexp='^(\d+)';
3236 while(<$joblog_fh>) {
3237 if(/$joblog_regexp/o) {
3238 # This is 30% faster than set_job_already_run($1);
3239 vec($Global::job_already_run,($1||0),1) = 1;
3240 $Global::total_completed++;
3241 } elsif(not /\d+\s+[^\s]+\s+([-0-9.]+\s+){6}/) {
3242 ::error("Format of '$opt::joblog' is wrong: $_");
3243 ::wait_and_exit(255);
3247 close $joblog_fh;
3249 # $opt::null may be set if the commands contain \n
3250 if($opt::null) { $/ = "\0"; }
3252 if($opt::dryrun) {
3253 # Do not write to joblog in a dry-run
3255 } elsif($append) {
3256 # Append to joblog
3257 $Global::joblog = open_or_exit(">>", $opt::joblog);
3258 } else {
3259 if($opt::joblog eq "-") {
3260 # Use STDOUT as joblog
3261 $Global::joblog = $Global::fh{1};
3262 } else {
3263 # Overwrite the joblog
3264 $Global::joblog = open_or_exit(">", $opt::joblog);
3266 print $Global::joblog
3267 join("\t", "Seq", "Host", "Starttime", "JobRuntime",
3268 "Send", "Receive", "Exitval", "Signal", "Command"
3269 ). "\n";
3274 sub open_json_csv() {
3275 if($opt::results) {
3276 # Output as JSON/CSV/TSV
3277 if($opt::results eq "-.csv"
3279 $opt::results eq "-.tsv"
3281 $opt::results eq "-.json") {
3282 # Output as JSON/CSV/TSV on stdout
3283 open $Global::csv_fh, ">&", "STDOUT" or
3284 ::die_bug("Can't dup STDOUT in csv: $!");
3285 # Do not print any other output to STDOUT
3286 # by forcing all other output to /dev/null
3287 open my $fd, ">", "/dev/null" or
3288 ::die_bug("Can't >/dev/null in csv: $!");
3289 $Global::fh{1} = $fd;
3290 $Global::fh{2} = $fd;
3291 } elsif($Global::csvsep or $Global::jsonout) {
3292 $Global::csv_fh = open_or_exit(">",$opt::results);
3297 sub find_compression_program() {
3298 # Find a fast compression program
3299 # Returns:
3300 # $compress_program = compress program with options
3301 # $decompress_program = decompress program with options
3303 # Search for these. Sorted by speed on 128 core
3305 # seq 120000000|shuf > 1gb &
3306 # apt-get update
3307 # apt install make g++ htop
3308 # wget -O - pi.dk/3 | bash
3309 # apt install zstd clzip liblz4-tool lzop pigz pxz gzip plzip pbzip2 lzma xz-utils lzip bzip2 lbzip2 lrzip pixz
3310 # git clone https://github.com/facebook/zstd.git
3311 # (cd zstd/contrib/pzstd; make -j; cp pzstd /usr/local/bin)
3312 # echo 'lrzip -L $((-$1))' >/usr/local/bin/lrz
3313 # chmod +x /usr/local/bin/lrz
3314 # wait
3315 # onethread="zstd clzip lz4 lzop gzip lzma xz bzip2"
3316 # multithread="pzstd pigz pxz plzip pbzip2 lzip lbzip2 lrz pixz"
3317 # parallel --shuf -j1 --joblog jl-m --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $multithread
3318 # parallel --shuf -j50% --delay 1 --joblog jl-s --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $onethread
3319 # sort -nk4 jl-?
3321 # 1-core:
3322 # 2-cores: pzstd zstd lz4 lzop pigz gzip lbzip2 pbzip2 lrz bzip2 lzma pxz plzip xz lzip clzip
3323 # 4-cores:
3324 # 8-cores: pzstd lz4 zstd pigz lzop lbzip2 pbzip2 gzip lzip lrz plzip pxz bzip2 lzma xz clzip
3325 # 16-cores: pzstd lz4 pigz lzop lbzip2 pbzip2 plzip lzip lrz pxz gzip lzma xz bzip2
3326 # 32-cores: pzstd lbzip2 pbzip2 zstd pigz lz4 lzop plzip lzip lrz gzip pxz lzma bzip2 xz clzip
3327 # 64-cores: pzstd lbzip2 pbzip2 pigz zstd pixz lz4 plzip lzop lzip lrz gzip pxz lzma bzip2 xz clzip
3328 # 128-core: pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip lrz pxz bzip2 lzma xz clzip
3330 my @prg = qw(pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip
3331 lrz pxz bzip2 lzma xz clzip);
3332 for my $p (@prg) {
3333 if(which($p)) {
3334 return ("$p -c -1","$p -dc");
3337 # Fall back to cat
3338 return ("cat","cat");
3341 sub read_options() {
3342 # Read options from command line, profile and $PARALLEL
3343 # Uses:
3344 # $opt::shebang_wrap
3345 # $opt::shebang
3346 # @ARGV
3347 # $opt::plain
3348 # @opt::profile
3349 # $ENV{'HOME'}
3350 # $ENV{'PARALLEL'}
3351 # Returns:
3352 # @ARGV_no_opt = @ARGV without --options
3354 # This must be done first as this may exec myself
3355 if(defined $ARGV[0] and ($ARGV[0] =~ /^--shebang/ or
3356 $ARGV[0] =~ /^--shebang-?wrap/ or
3357 $ARGV[0] =~ /^--hashbang/)) {
3358 # Program is called from #! line in script
3359 # remove --shebang-wrap if it is set
3360 $opt::shebang_wrap = ($ARGV[0] =~ s/^--shebang-?wrap *//);
3361 # remove --shebang if it is set
3362 $opt::shebang = ($ARGV[0] =~ s/^--shebang *//);
3363 # remove --hashbang if it is set
3364 $opt::shebang .= ($ARGV[0] =~ s/^--hashbang *//);
3365 if($opt::shebang) {
3366 my $argfile = Q(pop @ARGV);
3367 # exec myself to split $ARGV[0] into separate fields
3368 exec "$0 --skip-first-line -a $argfile @ARGV";
3370 if($opt::shebang_wrap) {
3371 my @options;
3372 my @parser;
3373 if ($^O eq 'freebsd') {
3374 # FreeBSD's #! puts different values in @ARGV than Linux' does
3375 my @nooptions = @ARGV;
3376 get_options_from_array(\@nooptions);
3377 while($#ARGV > $#nooptions) {
3378 push @options, shift @ARGV;
3380 while(@ARGV and $ARGV[0] ne ":::") {
3381 push @parser, shift @ARGV;
3383 if(@ARGV and $ARGV[0] eq ":::") {
3384 shift @ARGV;
3386 } else {
3387 @options = shift @ARGV;
3389 my $script = Q(Q(shift @ARGV)); # TODO - test if script = " "
3390 my @args = map{ Q($_) } @ARGV;
3391 # exec myself to split $ARGV[0] into separate fields
3392 exec "$0 --_pipe-means-argfiles @options @parser $script ".
3393 "::: @args";
3396 if($ARGV[0] =~ / --shebang(-?wrap)? /) {
3397 ::warning("--shebang and --shebang-wrap must be the first ".
3398 "argument.\n");
3401 Getopt::Long::Configure("bundling","require_order");
3402 my @ARGV_copy = @ARGV;
3403 my @ARGV_orig = @ARGV;
3404 # Check if there is a --profile to set @opt::profile
3405 get_options_from_array(\@ARGV_copy,"profile|J=s","plain") || die_usage();
3406 my @ARGV_profile = ();
3407 my @ARGV_env = ();
3408 if(not $opt::plain) {
3409 # Add options from $PARALLEL_HOME/config and other profiles
3410 my @config_profiles = (
3411 "/etc/parallel/config",
3412 (map { "$_/config" } @Global::config_dirs),
3413 $ENV{'HOME'}."/.parallelrc");
3414 my @profiles = @config_profiles;
3415 if(@opt::profile) {
3416 # --profile overrides default profiles
3417 @profiles = ();
3418 for my $profile (@opt::profile) {
3419 if($profile =~ m:^\./|^/:) {
3420 # Look for ./profile in .
3421 # Look for /profile in /
3422 push @profiles, grep { -r $_ } $profile;
3423 } else {
3424 # Look for the $profile in @Global::config_dirs
3425 push @profiles, grep { -r $_ }
3426 map { "$_/$profile" } @Global::config_dirs;
3430 for my $profile (@profiles) {
3431 if(-r $profile) {
3432 ::debug("init","Read $profile\n");
3433 local $/ = "\n";
3434 open (my $in_fh, "<", $profile) ||
3435 ::die_bug("read-profile: $profile");
3436 while(<$in_fh>) {
3437 /^\s*\#/ and next;
3438 chomp;
3439 push @ARGV_profile, shell_words($_);
3441 close $in_fh;
3442 } else {
3443 if(grep /^\Q$profile\E$/, @config_profiles) {
3444 # config file is not required to exist
3445 } else {
3446 ::error("$profile not readable.");
3447 wait_and_exit(255);
3451 # Add options from shell variable $PARALLEL
3452 if($ENV{'PARALLEL'}) {
3453 push @ARGV_env, shell_words($ENV{'PARALLEL'});
3455 # Add options from env_parallel.csh via $PARALLEL_CSH
3456 if($ENV{'PARALLEL_CSH'}) {
3457 push @ARGV_env, shell_words($ENV{'PARALLEL_CSH'});
3460 Getopt::Long::Configure("bundling","require_order");
3461 get_options_from_array(\@ARGV_profile) || die_usage();
3462 get_options_from_array(\@ARGV_env) || die_usage();
3463 get_options_from_array(\@ARGV) || die_usage();
3464 # What were the options given on the command line?
3465 # Used to start --sqlworker
3466 my $ai = arrayindex(\@ARGV_orig, \@ARGV);
3467 @Global::options_in_argv = @ARGV_orig[0..$ai-1];
3468 # Prepend non-options to @ARGV (such as commands like 'nice')
3469 unshift @ARGV, @ARGV_profile, @ARGV_env;
3470 return @ARGV;
3473 sub arrayindex($$) {
3474 # Similar to Perl's index function, but for arrays
3475 # Input:
3476 # $arr_ref1 = ref to @array1 to search in
3477 # $arr_ref2 = ref to @array2 to search for
3478 # Returns:
3479 # $pos = position of @array1 in @array2, -1 if not found
3480 my ($arr_ref1,$arr_ref2) = @_;
3481 my $array1_as_string = join "", map { "\0".$_ } @$arr_ref1;
3482 my $array2_as_string = join "", map { "\0".$_ } @$arr_ref2;
3483 my $i = index($array1_as_string,$array2_as_string,0);
3484 if($i == -1) { return -1 }
3485 my @before = split /\0/, substr($array1_as_string,0,$i);
3486 return $#before;
3489 sub read_args_from_command_line() {
3490 # Arguments given on the command line after:
3491 # ::: ($Global::arg_sep)
3492 # :::: ($Global::arg_file_sep)
3493 # :::+ ($Global::arg_sep with --link)
3494 # ::::+ ($Global::arg_file_sep with --link)
3495 # Removes the arguments from @ARGV and:
3496 # - puts filenames into -a
3497 # - puts arguments into files and add the files to -a
3498 # - adds --linkinputsource with 0/1 for each -a depending on :::+/::::+
3499 # Input:
3500 # @::ARGV = command option ::: arg arg arg :::: argfiles
3501 # Uses:
3502 # $Global::arg_sep
3503 # $Global::arg_file_sep
3504 # $opt::_pipe_means_argfiles
3505 # $opt::pipe
3506 # @opt::a
3507 # Returns:
3508 # @argv_no_argsep = @::ARGV without ::: and :::: and following args
3509 my %group_sep = ($Global::arg_sep => ":::",
3510 $Global::arg_sep."+" => ":::+",
3511 $Global::arg_file_sep => "::::",
3512 $Global::arg_file_sep."+" => "::::+");
3513 sub is_linked($) {
3514 # file is linked if file starts with +
3515 local $_ = shift;
3516 if(/^\+(.*)/) {
3517 my $noplus = $1;
3518 if(-e $_ and -e $noplus) {
3519 ::error("It is unclear whether you mean +./$noplus or ./+$noplus");
3520 wait_and_exit(255);
3521 } elsif(-e $_ and not -e $noplus) {
3522 # This is ./+file = this is not linked
3523 return 0;
3524 } elsif(not -e $_ and -e $noplus) {
3525 # This is +./file = this is linked
3526 return 1;
3527 } elsif(not -e $_ and not -e $noplus) {
3528 # File does not exist, maybe it is stdin?
3529 if($_ eq "-") {
3530 # This is - = this is not linked
3531 return 0;
3532 } elsif($_ eq "+-") {
3533 # This is +- = this is linked
3534 return 1;
3535 } else {
3536 ::error("File not found: $_");
3537 wait_and_exit(255);
3539 } else {
3540 ::die_bug("noplus: $noplus $_");
3543 # not linked
3544 return 0;
3546 sub cmd_template() {
3547 # remove command template from @ARGV
3548 # keep ::: / :::: in @ARGV if any
3549 my @cmd_template;
3550 while(@ARGV) {
3551 my $arg = shift @ARGV;
3552 if($group_sep{$arg}) {
3553 # Found separator: push it back and exit loop
3554 unshift @ARGV, $arg;
3555 last;
3557 push @cmd_template, $arg;
3559 return @cmd_template;
3561 sub divide_into_groups() {
3562 # Split arguments from @ARGV into groups:
3563 # ::: 1 2 3 :::: a b c ::::+ d e f
3564 # =>
3565 # [ ::: 1 2 3 ], [ :::: a b c ], [ ::::+ d e f ]
3566 my @g;
3567 my @grp;
3568 while(@ARGV) {
3569 my $arg = shift @ARGV;
3570 if($group_sep{$arg}) {
3571 # start a new group
3572 push @grp, [@g];
3573 @g = ($group_sep{$arg});
3574 } else {
3575 push @g, $arg;
3578 push @grp, [@g];
3579 shift @grp; # The first will always be empty
3580 return @grp;
3582 sub save_to_file(@) {
3583 # Put args into a file, return open file handle of file
3584 # Create argfile
3585 my ($fh,$name) = ::tmpfile(SUFFIX => ".arg");
3586 unlink($name);
3587 # Put args into argfile
3588 print $fh map { $_,$/ } @_;
3589 seek $fh, 0, 0;
3590 exit_if_disk_full();
3591 return $fh;
3593 my @cmd = cmd_template();
3594 # The rest of @ARGV is ::: / :::: args
3595 # If there are any -a: Rewrite them to use ::::
3596 if(@opt::a) { unshift @ARGV, $Global::arg_file_sep, @opt::a; }
3597 @opt::a = ();
3598 # Convert ::: and :::: into (linked) files and put those into @opt::a
3599 for my $g_ref (divide_into_groups()) {
3600 my $group_sep = shift @$g_ref;
3601 if($group_sep eq ":::" or $group_sep eq ":::+") {
3602 # Group starts with ::: / :::+
3603 if($opt::_pipe_means_argfiles and $#$g_ref < 0) {
3604 # TODO
3605 # Deal with --shebang-wrap and ::: on the shebang line
3606 } else {
3607 push @opt::a, save_to_file(@$g_ref);
3608 # if $group_sep == ":::+": it is linked
3609 push @opt::linkinputsource, ($group_sep eq ":::+");
3611 } elsif($group_sep eq "::::" or $group_sep eq "::::+") {
3612 # Group starts with :::: / ::::+
3613 for my $f (@$g_ref) {
3614 if($group_sep eq "::::+") {
3615 # Linking forced
3616 push @opt::a, $f;
3617 push @opt::linkinputsource, 1;
3618 } elsif($group_sep eq "::::") {
3619 # Auto detect linking
3620 if(is_linked($f)) {
3621 # +file
3622 push @opt::linkinputsource, 1;
3623 $f =~ s/^\+//;
3624 } else {
3625 # file (no plus)
3626 push @opt::linkinputsource, 0;
3628 push @opt::a, $f;
3629 } else {
3630 ::die_bug("arg link error");
3633 } else {
3634 ::die_bug("arg link error");
3637 # Output: command to run with options
3638 return @cmd;
3641 sub cleanup() {
3642 # Returns: N/A
3643 unlink keys %Global::unlink;
3644 map { rmdir $_ } keys %Global::unlink;
3645 if(@opt::basefile and $opt::cleanup) { cleanup_basefile(); }
3646 for(keys %Global::sshmaster) {
3647 # If 'ssh -M's are running: kill them
3648 kill "TERM", $_;
3653 sub __QUOTING_ARGUMENTS_FOR_SHELL__() {}
3655 sub shell_quote(@) {
3656 # Input:
3657 # @strings = strings to be quoted
3658 # Returns:
3659 # @shell_quoted_strings = string quoted as needed by the shell
3660 return wantarray ? (map { Q($_) } @_) : (join" ",map { Q($_) } @_);
3663 sub shell_quote_scalar_rc($) {
3664 # Quote for the rc-shell
3665 my $a = $_[0];
3666 if(defined $a) {
3667 if(($a =~ s/'/''/g)
3669 ($a =~ s/[\n\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]+/'$&'/go)) {
3670 # A string was replaced
3671 # No need to test for "" or \0
3672 } elsif($a eq "") {
3673 $a = "''";
3674 } elsif($a eq "\0") {
3675 $a = "";
3678 return $a;
3681 sub shell_quote_scalar_csh($) {
3682 # Quote for (t)csh
3683 my $a = $_[0];
3684 if(defined $a) {
3685 # $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g;
3686 # This is 1% faster than the above
3687 if(($a =~ s/[\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]/\\$&/go)
3689 # quote newline in csh as \\\n
3690 ($a =~ s/[\n]/"\\\n"/go)) {
3691 # A string was replaced
3692 # No need to test for "" or \0
3693 } elsif($a eq "") {
3694 $a = "''";
3695 } elsif($a eq "\0") {
3696 $a = "";
3699 return $a;
3702 sub shell_quote_scalar_default($) {
3703 # Quote for other shells (Bourne compatibles)
3704 # Inputs:
3705 # $string = string to be quoted
3706 # Returns:
3707 # $shell_quoted = string quoted as needed by the shell
3708 local $_ = $_[0];
3709 if(/[^-_.+a-z0-9\/]/i) {
3710 s/'+/'"$&"'/g; # "-quote '-quotes: ''' => "'''"
3711 $_ = "'$_'"; # '-quote entire string
3712 s/^''//; # Remove unneeded '' at ends
3713 s/''$//; # (faster than s/^''|''$//g)
3714 return $_;
3715 } elsif ($_ eq "") {
3716 return "''";
3717 } else {
3718 # No quoting needed
3719 return $_;
3723 sub shell_quote_scalar($) {
3724 # Quote the string so the shell will not expand any special chars
3725 # Inputs:
3726 # $string = string to be quoted
3727 # Returns:
3728 # $shell_quoted = string quoted as needed by the shell
3730 # Speed optimization: Choose the correct shell_quote_scalar_*
3731 # and call that directly from now on
3732 no warnings 'redefine';
3733 if($Global::cshell) {
3734 # (t)csh
3735 *shell_quote_scalar = \&shell_quote_scalar_csh;
3736 } elsif($Global::shell =~ m:(^|/)rc$:) {
3737 # rc-shell
3738 *shell_quote_scalar = \&shell_quote_scalar_rc;
3739 } else {
3740 # other shells
3741 *shell_quote_scalar = \&shell_quote_scalar_default;
3743 # The sub is now redefined. Call it
3744 return shell_quote_scalar($_[0]);
3747 sub Q($) {
3748 # Q alias for ::shell_quote_scalar
3749 my $ret = shell_quote_scalar($_[0]);
3750 no warnings 'redefine';
3751 *Q = \&::shell_quote_scalar;
3752 return $ret;
3755 sub shell_quote_file($) {
3756 # Quote the string so shell will not expand any special chars
3757 # and prepend ./ if needed
3758 # Input:
3759 # $filename = filename to be shell quoted
3760 # Returns:
3761 # $quoted_filename = filename quoted with \ and ./ if needed
3762 my $a = shift;
3763 if(defined $a) {
3764 if($a =~ m:^/: or $a =~ m:^\./:) {
3765 # /abs/path or ./rel/path => skip
3766 } else {
3767 # rel/path => ./rel/path
3768 $a = "./".$a;
3771 return Q($a);
3774 sub shell_words(@) {
3775 # Input:
3776 # $string = shell line
3777 # Returns:
3778 # @shell_words = $string split into words as shell would do
3779 $Global::use{"Text::ParseWords"} ||= eval "use Text::ParseWords; 1;";
3780 return Text::ParseWords::shellwords(@_);
3783 sub perl_quote_scalar($) {
3784 # Quote the string so perl's eval will not expand any special chars
3785 # Inputs:
3786 # $string = string to be quoted
3787 # Returns:
3788 # $perl_quoted = string quoted with \ as needed by perl's eval
3789 my $a = $_[0];
3790 if(defined $a) {
3791 $a =~ s/[\\\"\$\@]/\\$&/go;
3793 return $a;
3796 # -w complains about prototype
3797 sub pQ($) {
3798 # pQ alias for ::perl_quote_scalar
3799 my $ret = perl_quote_scalar($_[0]);
3800 *pQ = \&::perl_quote_scalar;
3801 return $ret;
3804 sub unquote_printf() {
3805 # Convert \t \n \r \000 \0
3806 # Inputs:
3807 # $string = string with \t \n \r \num \0
3808 # Returns:
3809 # $replaced = string with TAB NEWLINE CR <ascii-num> NUL
3810 $_ = shift;
3811 s/\\t/\t/g;
3812 s/\\n/\n/g;
3813 s/\\r/\r/g;
3814 s/\\(\d\d\d)/eval 'sprintf "\\'.$1.'"'/ge;
3815 s/\\(\d)/eval 'sprintf "\\'.$1.'"'/ge;
3816 return $_;
3820 sub __FILEHANDLES__() {}
3823 sub save_stdin_stdout_stderr() {
3824 # Remember the original STDIN, STDOUT and STDERR
3825 # and file descriptors opened by the shell (e.g. 3>/tmp/foo)
3826 # Uses:
3827 # %Global::fh
3828 # $Global::original_stderr
3829 # $Global::original_stdin
3830 # Returns: N/A
3832 # TODO Disabled until we have an open3 that will take n filehandles
3833 # for my $fdno (1..61) {
3834 # # /dev/fd/62 and above are used by bash for <(cmd)
3835 # # Find file descriptors that are already opened (by the shell)
3836 # Only focus on stdout+stderr for now
3837 for my $fdno (1..2) {
3838 my $fh;
3839 # 2-argument-open is used to be compatible with old perl 5.8.0
3840 # bug #43570: Perl 5.8.0 creates 61 files
3841 if(open($fh,">&=$fdno")) {
3842 $Global::fh{$fdno}=$fh;
3845 open $Global::original_stderr, ">&", "STDERR" or
3846 ::die_bug("Can't dup STDERR: $!");
3847 open $Global::status_fd, ">&", "STDERR" or
3848 ::die_bug("Can't dup STDERR: $!");
3849 open $Global::original_stdin, "<&", "STDIN" or
3850 ::die_bug("Can't dup STDIN: $!");
3853 sub enough_file_handles() {
3854 # Check that we have enough filehandles available for starting
3855 # another job
3856 # Uses:
3857 # $opt::ungroup
3858 # %Global::fh
3859 # Returns:
3860 # 1 if ungrouped (thus not needing extra filehandles)
3861 # 0 if too few filehandles
3862 # 1 if enough filehandles
3863 if(not $opt::ungroup) {
3864 my %fh;
3865 my $enough_filehandles = 1;
3866 # perl uses 7 filehandles for something?
3867 # open3 uses 2 extra filehandles temporarily
3868 # We need a filehandle for each redirected file descriptor
3869 # (normally just STDOUT and STDERR)
3870 for my $i (1..(7+2+keys %Global::fh)) {
3871 $enough_filehandles &&= open($fh{$i}, "<", "/dev/null");
3873 for (values %fh) { close $_; }
3874 return $enough_filehandles;
3875 } else {
3876 # Ungrouped does not need extra file handles
3877 return 1;
3881 sub open_or_exit($$) {
3882 # Open a file name or exit if the file cannot be opened
3883 # Inputs:
3884 # $mode = read:"<" write:">"
3885 # $file = filehandle or filename to open
3886 # Uses:
3887 # $Global::original_stdin
3888 # Returns:
3889 # $fh = file handle to opened file
3890 my $mode = shift;
3891 my $file = shift;
3892 if($file eq "-") {
3893 if($mode eq "<") {
3894 return ($Global::original_stdin || *STDIN);
3895 } else {
3896 return ($Global::original_stderr || *STDERR);
3899 if(ref $file eq "GLOB") {
3900 # This is an open filehandle
3901 return $file;
3903 my $fh = gensym;
3904 if(not open($fh, $mode, $file)) {
3905 ::error("Cannot open `$file': $!");
3906 wait_and_exit(255);
3908 return $fh;
3911 sub slurp_or_exit($) {
3912 # Read content of a file or exit if the file cannot be opened
3913 # Inputs:
3914 # $file = filehandle or filename to open
3915 # Returns:
3916 # $content = content as scalar
3917 my $fh = open_or_exit("<",shift);
3918 # $/ = undef => slurp whole file
3919 local $/;
3920 my $content = <$fh>;
3921 close $fh;
3922 return $content;
3925 sub write_or_exit(@) {
3926 # Write content to a file or exit if the file cannot be opened
3927 # Inputs:
3928 # $file = filehandle or filename to open
3929 # @content = content to be written
3930 # Returns:
3931 # N/A
3932 my $file = shift;
3933 sub failed {
3934 error("Cannot write to `$file': $!");
3935 wait_and_exit(255);
3937 my $fh = open_or_exit(">",$file);
3938 print($fh @_) or failed();
3939 close($fh) or failed();
3942 sub set_fh_blocking($) {
3943 # Set filehandle as blocking
3944 # Inputs:
3945 # $fh = filehandle to be blocking
3946 # Returns:
3947 # N/A
3948 my $fh = shift;
3949 $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
3950 my $flags;
3951 # Get the current flags on the filehandle
3952 fcntl($fh, &F_GETFL, $flags) || die $!;
3953 # Remove non-blocking from the flags
3954 $flags &= ~&O_NONBLOCK;
3955 # Set the flags on the filehandle
3956 fcntl($fh, &F_SETFL, $flags) || die $!;
3959 sub set_fh_non_blocking($) {
3960 # Set filehandle as non-blocking
3961 # Inputs:
3962 # $fh = filehandle to be blocking
3963 # Returns:
3964 # N/A
3965 my $fh = shift;
3966 $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
3967 my $flags;
3968 # Get the current flags on the filehandle
3969 fcntl($fh, &F_GETFL, $flags) || die $!;
3970 # Add non-blocking to the flags
3971 $flags |= &O_NONBLOCK;
3972 # Set the flags on the filehandle
3973 fcntl($fh, &F_SETFL, $flags) || die $!;
3977 sub __RUNNING_THE_JOBS_AND_PRINTING_PROGRESS__() {}
3980 # Variable structure:
3982 # $Global::running{$pid} = Pointer to Job-object
3983 # @Global::virgin_jobs = Pointer to Job-object that have received no input
3984 # $Global::host{$sshlogin} = Pointer to SSHLogin-object
3985 # $Global::total_running = total number of running jobs
3986 # $Global::total_started = total jobs started
3987 # $Global::max_procs_file = filename if --jobs is given a filename
3988 # $Global::JobQueue = JobQueue object for the queue of jobs
3989 # $Global::timeoutq = queue of times where jobs timeout
3990 # $Global::newest_job = Job object of the most recent job started
3991 # $Global::newest_starttime = timestamp of $Global::newest_job
3992 # @Global::sshlogin
3993 # $Global::minimal_command_line_length = min len supported by all sshlogins
3994 # $Global::start_no_new_jobs = should more jobs be started?
3995 # $Global::original_stderr = file handle for STDERR when the program started
3996 # $Global::total_started = total number of jobs started
3997 # $Global::joblog = filehandle of joblog
3998 # $Global::debug = Is debugging on?
3999 # $Global::exitstatus = status code of GNU Parallel
4000 # $Global::quoting = quote the command to run
4002 sub init_run_jobs() {
4003 # Set Global variables and progress signal handlers
4004 # Do the copying of basefiles
4005 # Returns: N/A
4006 $Global::total_running = 0;
4007 $Global::total_started = 0;
4008 $SIG{USR1} = \&list_running_jobs;
4009 $SIG{USR2} = \&toggle_progress;
4010 if(@opt::basefile) { setup_basefile(); }
4014 my $last_time;
4015 my %last_mtime;
4016 my $max_procs_file_last_mod;
4018 sub changed_procs_file {
4019 # If --jobs is a file and it is modfied:
4020 # Force recomputing of max_jobs_running for each $sshlogin
4021 # Uses:
4022 # $Global::max_procs_file
4023 # %Global::host
4024 # Returns: N/A
4025 if($Global::max_procs_file) {
4026 # --jobs filename
4027 my $mtime = (stat($Global::max_procs_file))[9];
4028 $max_procs_file_last_mod ||= 0;
4029 if($mtime > $max_procs_file_last_mod) {
4030 # file changed: Force re-computing max_jobs_running
4031 $max_procs_file_last_mod = $mtime;
4032 for my $sshlogin (values %Global::host) {
4033 $sshlogin->set_max_jobs_running(undef);
4039 sub changed_sshloginfile {
4040 # If --slf is changed:
4041 # reload --slf
4042 # filter_hosts
4043 # setup_basefile
4044 # Uses:
4045 # @opt::sshloginfile
4046 # @Global::sshlogin
4047 # %Global::host
4048 # $opt::filter_hosts
4049 # Returns: N/A
4050 if(@opt::sshloginfile) {
4051 # Is --sshloginfile changed?
4052 for my $slf (@opt::sshloginfile) {
4053 my $actual_file = expand_slf_shorthand($slf);
4054 my $mtime = (stat($actual_file))[9];
4055 $last_mtime{$actual_file} ||= $mtime;
4056 if($mtime - $last_mtime{$actual_file} > 1) {
4057 ::debug("run",
4058 "--sshloginfile $actual_file changed. reload\n");
4059 $last_mtime{$actual_file} = $mtime;
4060 # Reload $slf
4061 # Empty sshlogins
4062 @Global::sshlogin = ();
4063 for (values %Global::host) {
4064 # Don't start new jobs on any host
4065 # except the ones added back later
4066 $_->set_max_jobs_running(0);
4068 # This will set max_jobs_running on the SSHlogins
4069 read_sshloginfile($actual_file);
4070 parse_sshlogin();
4071 $opt::filter_hosts and filter_hosts();
4072 setup_basefile();
4078 sub start_more_jobs {
4079 # Run start_another_job() but only if:
4080 # * not $Global::start_no_new_jobs set
4081 # * not JobQueue is empty
4082 # * not load on server is too high
4083 # * not server swapping
4084 # * not too short time since last remote login
4085 # Uses:
4086 # %Global::host
4087 # $Global::start_no_new_jobs
4088 # $Global::JobQueue
4089 # $opt::pipe
4090 # $opt::load
4091 # $opt::noswap
4092 # $opt::delay
4093 # $Global::newest_starttime
4094 # Returns:
4095 # $jobs_started = number of jobs started
4096 my $jobs_started = 0;
4097 if($Global::start_no_new_jobs) {
4098 return $jobs_started;
4100 if(time - ($last_time||0) > 1) {
4101 # At most do this every second
4102 $last_time = time;
4103 changed_procs_file();
4104 changed_sshloginfile();
4106 # This will start 1 job on each --sshlogin (if possible)
4107 # thus distribute the jobs on the --sshlogins round robin
4108 for my $sshlogin (values %Global::host) {
4109 if($Global::JobQueue->empty() and not $opt::pipe) {
4110 # No more jobs in the queue
4111 last;
4113 debug("run", "Running jobs before on ", $sshlogin->string(), ": ",
4114 $sshlogin->jobs_running(), "\n");
4115 if ($sshlogin->jobs_running() < $sshlogin->max_jobs_running()) {
4116 if($opt::delay
4118 $opt::delay-0.008 > ::now()-$Global::newest_starttime) {
4119 # It has been too short since last start
4120 next;
4122 if($opt::load and $sshlogin->loadavg_too_high()) {
4123 # The load is too high or unknown
4124 next;
4126 if($opt::noswap and $sshlogin->swapping()) {
4127 # The server is swapping
4128 next;
4130 if($opt::limit and $sshlogin->limit()) {
4131 # Over limit
4132 next;
4134 if(($opt::memfree or $opt::memsuspend)
4136 $sshlogin->memfree() < $Global::memlimit) {
4137 # The server has not enough mem free
4138 ::debug("mem", "Not starting job: not enough mem\n");
4139 next;
4141 if($sshlogin->too_fast_remote_login()) {
4142 # It has been too short since last login
4143 next;
4145 debug("run", $sshlogin->string(),
4146 " has ", $sshlogin->jobs_running(),
4147 " out of ", $sshlogin->max_jobs_running(),
4148 " jobs running. Start another.\n");
4149 if(start_another_job($sshlogin) == 0) {
4150 # No more jobs to start on this $sshlogin
4151 debug("run","No jobs started on ",
4152 $sshlogin->string(), "\n");
4153 next;
4155 $sshlogin->inc_jobs_running();
4156 $sshlogin->set_last_login_at(::now());
4157 $jobs_started++;
4159 debug("run","Running jobs after on ", $sshlogin->string(), ": ",
4160 $sshlogin->jobs_running(), " of ",
4161 $sshlogin->max_jobs_running(), "\n");
4164 return $jobs_started;
4169 my $no_more_file_handles_warned;
4171 sub start_another_job() {
4172 # If there are enough filehandles
4173 # and JobQueue not empty
4174 # and not $job is in joblog
4175 # Then grab a job from Global::JobQueue,
4176 # start it at sshlogin
4177 # mark it as virgin_job
4178 # Inputs:
4179 # $sshlogin = the SSHLogin to start the job on
4180 # Uses:
4181 # $Global::JobQueue
4182 # $opt::pipe
4183 # $opt::results
4184 # $opt::resume
4185 # @Global::virgin_jobs
4186 # Returns:
4187 # 1 if another jobs was started
4188 # 0 otherwise
4189 my $sshlogin = shift;
4190 # Do we have enough file handles to start another job?
4191 if(enough_file_handles()) {
4192 if($Global::JobQueue->empty() and not $opt::pipe) {
4193 # No more commands to run
4194 debug("start", "Not starting: JobQueue empty\n");
4195 return 0;
4196 } else {
4197 my $job;
4198 # Skip jobs already in job log
4199 # Skip jobs already in results
4200 do {
4201 $job = get_job_with_sshlogin($sshlogin);
4202 if(not defined $job) {
4203 # No command available for that sshlogin
4204 debug("start", "Not starting: no jobs available for ",
4205 $sshlogin->string(), "\n");
4206 return 0;
4208 if($job->is_already_in_joblog()) {
4209 $job->free_slot();
4211 } while ($job->is_already_in_joblog()
4213 ($opt::results and $opt::resume
4214 and $job->is_already_in_results()));
4215 debug("start", "Command to run on '",
4216 $job->sshlogin()->string(), "': '",
4217 $job->replaced(),"'\n");
4218 if($job->start()) {
4219 if($opt::pipe) {
4220 if($job->virgin()) {
4221 push(@Global::virgin_jobs,$job);
4222 } else {
4223 # Block already set: This is a retry
4224 $job->write_block();
4227 debug("start", "Started as seq ", $job->seq(),
4228 " pid:", $job->pid(), "\n");
4229 return 1;
4230 } else {
4231 # Not enough processes to run the job.
4232 # Put it back on the queue.
4233 $Global::JobQueue->unget($job);
4234 # Count down the number of jobs to run for this SSHLogin.
4235 my $max = $sshlogin->max_jobs_running();
4236 if($max > 1) { $max--; } else {
4237 my @arg;
4238 for my $record (@{$job->{'commandline'}{'arg_list'}}) {
4239 push @arg, map { $_->orig() } @$record;
4241 ::error("No more processes: cannot run a single job. ".
4242 "Something is wrong at @arg.");
4243 ::wait_and_exit(255);
4245 $sshlogin->set_max_jobs_running($max);
4246 # Sleep up to 300 ms to give other processes time to die
4247 ::usleep(rand()*300);
4248 ::warning("No more processes: ".
4249 "Decreasing number of running jobs to $max.",
4250 "Try increasing 'ulimit -u' (try: ulimit -u `ulimit -Hu`)",
4251 "or increasing 'nproc' in /etc/security/limits.conf",
4252 "or increasing /proc/sys/kernel/pid_max");
4253 return 0;
4256 } else {
4257 # No more file handles
4258 $no_more_file_handles_warned++ or
4259 ::warning("No more file handles. ",
4260 "Try running 'parallel -j0 -N 100 --pipe parallel -j0'",
4261 "or increasing 'ulimit -n' (try: ulimit -n `ulimit -Hn`)",
4262 "or increasing 'nofile' in /etc/security/limits.conf",
4263 "or increasing /proc/sys/fs/file-max");
4264 debug("start", "No more file handles. ");
4265 return 0;
4270 sub init_progress() {
4271 # Uses:
4272 # $opt::bar
4273 # Returns:
4274 # list of computers for progress output
4275 $|=1;
4276 if($opt::bar) {
4277 return("","");
4279 my $progress = progress();
4280 my $cpu_units = $opt::use_sockets_instead_of_threads ? "CPU sockets" :
4281 ($opt::use_cores_instead_of_threads ? "CPU cores" : "CPU threads");
4282 return ("\nComputers / $cpu_units / Max jobs to run\n",
4283 $progress->{'workerlist'},"\n",$progress->{'header'});
4286 sub drain_job_queue(@) {
4287 # Uses:
4288 # $opt::progress
4289 # $Global::total_running
4290 # $Global::max_jobs_running
4291 # %Global::running
4292 # $Global::JobQueue
4293 # %Global::host
4294 # $Global::start_no_new_jobs
4295 # Returns: N/A
4296 my @command = @_;
4297 my $sleep = 0.2;
4298 my $sleepsum = 0;
4299 do {
4300 while($Global::total_running > 0) {
4301 debug("init",$Global::total_running, "==", scalar
4302 keys %Global::running," slots: ", $Global::max_jobs_running);
4303 if($opt::pipe) {
4304 # When using --pipe sometimes file handles are not
4305 # closed properly
4306 for my $job (values %Global::running) {
4307 close $job->fh(0,"w");
4310 if($opt::progress) {
4311 my $progress = progress();
4312 ::status_no_nl("\r",$progress->{'status'});
4314 if($Global::total_running < $Global::max_jobs_running
4315 and not $Global::JobQueue->empty()) {
4316 # These jobs may not be started because of loadavg
4317 # or too little time between each ssh login.
4318 if(start_more_jobs() > 0) {
4319 # Exponential back-on if jobs were started
4320 $sleep = $sleep/2+0.001;
4323 # Exponential back-off sleeping
4324 $sleep = ::reap_usleep($sleep);
4325 $sleepsum += $sleep;
4326 if($sleepsum >= 1000) {
4327 # At most do this every second
4328 $sleepsum = 0;
4329 changed_procs_file();
4330 changed_sshloginfile();
4331 start_more_jobs();
4334 if(not $Global::JobQueue->empty()) {
4335 # These jobs may not be started:
4336 # * because there the --filter-hosts has removed all
4337 if(not %Global::host) {
4338 ::error("There are no hosts left to run on.");
4339 ::wait_and_exit(255);
4341 # * because of loadavg
4342 # * because of too little time between each ssh login.
4343 $sleep = ::reap_usleep($sleep);
4344 start_more_jobs();
4345 if($Global::max_jobs_running == 0) {
4346 ::warning("There are no job slots available. Increase --jobs.");
4349 while($opt::sqlmaster and not $Global::sql->finished()) {
4350 # SQL master
4351 $sleep = ::reap_usleep($sleep);
4352 start_more_jobs();
4353 if($Global::start_sqlworker) {
4354 # Start an SQL worker as we are now sure there is work to do
4355 $Global::start_sqlworker = 0;
4356 if(my $pid = fork()) {
4357 $Global::unkilled_sqlworker = $pid;
4358 } else {
4359 # Replace --sql/--sqlandworker with --sqlworker
4360 my @ARGV = (map { s/^--sql(andworker)?$/--sqlworker/; $_ }
4361 @Global::options_in_argv);
4362 # exec the --sqlworker
4363 exec($0,@ARGV,@command);
4367 } while ($Global::total_running > 0
4369 not $Global::start_no_new_jobs and not $Global::JobQueue->empty()
4371 $opt::sqlmaster and not $Global::sql->finished());
4372 $Global::all_jobs_done = 1;
4373 if($opt::progress) {
4374 my $progress = progress();
4375 ::status("\r".$progress->{'status'});
4379 sub toggle_progress() {
4380 # Turn on/off progress view
4381 # Uses:
4382 # $opt::progress
4383 # Returns: N/A
4384 $opt::progress = not $opt::progress;
4385 if($opt::progress) {
4386 ::status_no_nl(init_progress());
4391 my $last_header;
4392 my $eol;
4394 sub progress() {
4395 # Uses:
4396 # $opt::bar
4397 # $opt::eta
4398 # %Global::host
4399 # $Global::total_started
4400 # Returns:
4401 # $workerlist = list of workers
4402 # $header = that will fit on the screen
4403 # $status = message that will fit on the screen
4404 if($opt::bar) {
4405 return {"workerlist" => "", "header" => "", "status" => bar()};
4407 my $eta = "";
4408 my ($status,$header)=("","");
4409 if($opt::eta) {
4410 my($total, $completed, $left, $pctcomplete, $avgtime, $this_eta) =
4411 compute_eta();
4412 $eta = sprintf("ETA: %ds Left: %d AVG: %.2fs ",
4413 $this_eta, $left, $avgtime);
4415 my $termcols = terminal_columns();
4416 my @workers = sort keys %Global::host;
4417 my $workerno = 1;
4418 my %wrk;
4419 for my $w (@workers) {
4420 my %i;
4421 $i{'sshlogin'} = $w eq ":" ? "local" : $w;
4422 $i{'no'} = $workerno++;
4423 $i{'ncpu'} = ($Global::host{$w}->ncpus() || "-");
4424 $i{'jobslots'} = $Global::host{$w}->max_jobs_running();
4425 $i{'completed'} = ($Global::host{$w}->jobs_completed() || 0);
4426 $i{'running'} = $Global::host{$w}->jobs_running();
4427 $i{'pct'} = $Global::total_started ?
4428 (($i{'running'}+$i{'completed'})*100 /
4429 $Global::total_started) : 0;
4430 $i{'time'} = $i{'completed'} ? (time-$^T)/($i{'completed'}) : 0;
4431 $wrk{$w} = \%i;
4434 my $workerlist = "";
4435 for my $w (@workers) {
4436 $workerlist .=
4437 $wrk{$w}{'no'}.":".$wrk{$w}{'sshlogin'} ." / ".
4438 $wrk{$w}{'ncpu'}." / ".
4439 $wrk{$w}{'jobslots'}."\n";
4441 # Force $status to select one of the below formats
4442 $status = "c"x($termcols+1);
4443 # Select an output format that will fit on a single line
4444 if(length $status > $termcols) {
4445 # sshlogin1:XX/XX/XX%/XX.Xs s2:XX/XX/XX%/XX.Xs s3:XX/XX/XX%/XX.Xs
4446 $header = "Computer:jobs running/jobs completed/".
4447 "%of started jobs/Average seconds to complete";
4448 $status = $eta . join(" ",map {
4449 sprintf("%s:%d/%d/%d%%/%.1fs ",
4450 @{$wrk{$_}}
4451 {'sshlogin','running','completed','pct','time'}
4452 ); } @workers);
4454 if(length $status > $termcols) {
4455 # 1:XX/XX/XX%/X.Xs 2:XX/XX/XX%/X.Xs 3:XX/XX/XX%/X.Xs
4456 $header = "Computer:jobs running/jobs completed/%of started jobs";
4457 $status = $eta . join(" ",map {
4458 sprintf("%s:%d/%d/%d%%/%.1fs ",
4459 @{$wrk{$_}}
4460 {'no','running','completed','pct','time'}
4461 ); } @workers);
4463 if(length $status > $termcols) {
4464 # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX/XX%
4465 $header = "Computer:jobs running/jobs completed/%of started jobs";
4466 $status = $eta . join(" ",map {
4467 sprintf("%s:%d/%d/%d%%",
4468 @{$wrk{$_}}
4469 {'sshlogin','running','completed','pct'}
4470 ); } @workers);
4472 if(length $status > $termcols) {
4473 # 1:XX/XX/XX% 2:XX/XX/XX% 3:XX/XX/XX% 4:XX/XX/XX% 5:XX/XX/XX%
4474 $header = "Computer:jobs running/jobs completed/%of started jobs";
4475 $status = $eta . join(" ",map {
4476 sprintf("%s:%d/%d/%d%%",
4477 @{$wrk{$_}}
4478 {'no','running','completed','pct'}
4479 ); } @workers);
4481 if(length $status > $termcols) {
4482 # sshlogin1:XX/XX sshlogin2:XX/XX sshlogin3:XX/XX
4483 $header = "Computer:jobs running/jobs completed";
4484 $status = $eta . join(" ", map {
4485 sprintf("%s:%d/%d",
4486 @{$wrk{$_}}
4487 {'sshlogin','running','completed'}
4488 ); } @workers);
4490 if(length $status > $termcols) {
4491 # 1:XX/XX 2:XX/XX 3:XX/XX 4:XX/XX 5:XX/XX 6:XX/XX
4492 $header = "Computer:jobs running/jobs completed";
4493 $status = $eta . join(" ", map {
4494 sprintf("%s:%d/%d",
4495 @{$wrk{$_}}
4496 {'no','running','completed'}
4497 ); } @workers);
4499 if(length $status > $termcols) {
4500 # sshlogin1:XX sshlogin2:XX sshlogin3:XX sshlogin4:XX sshlogin5:XX
4501 $header = "Computer:jobs completed";
4502 $status = $eta . join(" ", map {
4503 sprintf("%s:%d",
4504 @{$wrk{$_}}
4505 {'sshlogin','completed'}
4506 ); } @workers);
4508 if(length $status > $termcols) {
4509 # 1:XX 2:XX 3:XX 4:XX 5:XX 6:XX
4510 $header = "Computer:jobs completed";
4511 $status = $eta . join(" ", map {
4512 sprintf("%s:%d",
4513 @{$wrk{$_}}
4514 {'no','completed'}
4515 ); } @workers);
4517 if($last_header ne $header) {
4518 $header .= "\n";
4519 $last_header = $header;
4520 } else {
4521 $header = "";
4523 if(not $eol) {
4524 $eol = `sh -c "tput el </dev/tty" 2>/dev/null`;
4525 chomp($eol);
4526 if($eol eq "") { $eol = "\033[K"; }
4529 return {"workerlist" => $workerlist, "header" => $header,
4530 "status" => $status.$eol};
4536 my ($first_completed, $smoothed_avg_time, $last_eta);
4538 sub compute_eta {
4539 # Calculate important numbers for ETA
4540 # Returns:
4541 # $total = number of jobs in total
4542 # $completed = number of jobs completed
4543 # $left = number of jobs left
4544 # $pctcomplete = percent of jobs completed
4545 # $avgtime = averaged time
4546 # $eta = smoothed eta
4547 my $completed = $Global::total_completed;
4548 # In rare cases with -X will $completed > total_jobs()
4549 my $total = ::max($Global::JobQueue->total_jobs(),$completed);
4550 my $left = $total - $completed;
4551 if(not $completed) {
4552 return($total, $completed, $left, 0, 0, 0);
4554 my $pctcomplete = ::min($completed / $total,100);
4555 $first_completed ||= time;
4556 my $timepassed = (time - $first_completed);
4557 my $avgtime = $timepassed / $completed;
4558 $smoothed_avg_time ||= $avgtime;
4559 # Smooth the eta so it does not jump wildly
4560 $smoothed_avg_time = (1 - $pctcomplete) * $smoothed_avg_time +
4561 $pctcomplete * $avgtime;
4562 my $eta = int($left * $smoothed_avg_time);
4563 if($eta*0.90 < $last_eta and $last_eta < $eta) {
4564 # Eta jumped less that 10% up: Keep the last eta instead
4565 $eta = $last_eta;
4566 } else {
4567 $last_eta = $eta;
4569 return($total, $completed, $left, $pctcomplete, $avgtime, $eta);
4574 my ($rev,$reset);
4576 sub bar() {
4577 # Return:
4578 # $status = bar with eta, completed jobs, arg and pct
4579 $rev ||= "\033[7m";
4580 $reset ||= "\033[0m";
4581 my($total, $completed, $left, $pctcomplete, $avgtime, $eta) =
4582 compute_eta();
4583 if($Global::all_jobs_done) { $eta = now()-$Global::start_time; }
4584 my $arg = $Global::newest_job ?
4585 $Global::newest_job->{'commandline'}->
4586 replace_placeholders(["\257<\257>"],0,0) : "";
4587 $arg = decode_utf8($arg);
4588 my $eta_dhms = ::seconds_to_time_units($eta);
4589 my $bar_text =
4590 sprintf("%d%% %d:%d=%s %s",
4591 $pctcomplete*100, $completed, $left, $eta_dhms, $arg);
4592 my $terminal_width = terminal_columns();
4593 my $s = sprintf("%-${terminal_width}s",
4594 substr($bar_text." "x$terminal_width,
4595 0,$terminal_width));
4596 my $width = int($terminal_width * $pctcomplete);
4597 substr($s,$width,0) = $reset;
4598 my $zenity = sprintf("%-${terminal_width}s",
4599 substr("# $eta sec $arg",
4600 0,$terminal_width));
4601 # Prefix with zenity header
4602 $s = "\r" . $zenity . "\r" . $pctcomplete*100 .
4603 "\r" . $rev . $s . $reset;
4604 return $s;
4609 my ($rows,$columns,$last_update_time);
4611 sub compute_terminal_size() {
4612 # && true is to force spawning a shell and not just exec'ing
4613 my @tput = qx{ tput lines cols </dev/tty 2>/dev/null && true };
4614 $rows = 0 + $tput[0];
4615 $columns = 0 + $tput[1];
4616 if(not ($rows && $columns)) {
4617 # && true is to force spawning a shell and not just exec'ing
4618 my $stty = qx{ stty -a </dev/tty 2>/dev/null && true };
4619 # FreeBSD/OpenBSD/NetBSD/Dragonfly/MirOS
4620 # MacOSX/IRIX/AIX/Tru64
4621 $stty =~ /(\d+) columns/ and do { $columns = $1; };
4622 $stty =~ /(\d+) rows/ and do { $rows = $1; };
4623 # GNU/Linux/Solaris
4624 $stty =~ /columns (\d+)/ and do { $columns = $1; };
4625 $stty =~ /rows (\d+)/ and do { $rows = $1; };
4626 # Solaris-x86/HPUX/SCOsysV/UnixWare/OpenIndiana
4627 $stty =~ /columns = (\d+)/ and do { $columns = $1; };
4628 $stty =~ /rows = (\d+)/ and do { $rows = $1; };
4629 # QNX
4630 $stty =~ /rows=(\d+),(\d+)/ and do { ($rows,$columns) = ($1,$2); };
4632 if(not ($rows && $columns)) {
4633 # && true is to force spawning a shell and not just exec'ing
4634 my $resize = qx{ resize 2>/dev/null && true };
4635 $resize =~ /COLUMNS=(\d+);/ and do { $columns ||= $1; };
4636 $resize =~ /LINES=(\d+);/ and do { $rows ||= $1; };
4638 $rows ||= 24;
4639 $columns ||= 80;
4642 sub update_terminal_size() {
4643 # Only update once per second.
4644 if($last_update_time < time) {
4645 $last_update_time = time;
4646 compute_terminal_size();
4647 # Set signal WINdow CHange to force recompute
4648 $SIG{WINCH} = \&compute_terminal_size;
4652 sub terminal_rows() {
4653 # Get the number of rows of the terminal.
4654 # Returns:
4655 # number of rows of the screen
4656 update_terminal_size();
4657 return $rows;
4660 sub terminal_columns() {
4661 # Get the number of columns of the terminal.
4662 # Returns:
4663 # number of columns of the screen
4664 update_terminal_size();
4665 return $columns;
4669 sub untabify($) {
4670 # Convert \t into spaces
4671 my @out;
4672 my ($src);
4673 # Deal with multi-byte characters
4674 for my $src (split("\t",$_[0])) {
4675 push @out, $src. " "x(8-mbswidth($src)%8);
4677 return join "",@out;
4680 # Prototype forwarding
4681 sub get_job_with_sshlogin($);
4682 sub get_job_with_sshlogin($) {
4683 # Input:
4684 # $sshlogin = which host should the job be run on?
4685 # Uses:
4686 # $opt::hostgroups
4687 # $Global::JobQueue
4688 # Returns:
4689 # $job = next job object for $sshlogin if any available
4690 my $sshlogin = shift;
4691 my $job;
4693 if ($opt::hostgroups) {
4694 my @other_hostgroup_jobs = ();
4696 while($job = $Global::JobQueue->get()) {
4697 if($sshlogin->in_hostgroups($job->hostgroups())) {
4698 # Found a job to be run on a hostgroup of this
4699 # $sshlogin
4700 last;
4701 } else {
4702 # This job was not in the hostgroups of $sshlogin
4703 push @other_hostgroup_jobs, $job;
4706 $Global::JobQueue->unget(@other_hostgroup_jobs);
4707 if(not defined $job) {
4708 # No more jobs
4709 return undef;
4711 } else {
4712 $job = $Global::JobQueue->get();
4713 if(not defined $job) {
4714 # No more jobs
4715 ::debug("start", "No more jobs: JobQueue empty\n");
4716 return undef;
4719 if(not $job->suspended()) {
4720 $job->set_sshlogin($sshlogin);
4722 if(defined $opt::retries and $job->failed_here()) {
4723 # This command with these args failed for this sshlogin
4724 my ($no_of_failed_sshlogins,$min_failures) = $job->min_failed();
4725 # Only look at the Global::host that have > 0 jobslots
4726 if($no_of_failed_sshlogins ==
4727 grep { $_->max_jobs_running() > 0 } values %Global::host
4728 and $job->failed_here() == $min_failures) {
4729 # It failed the same or more times on another host:
4730 # run it on this host
4731 } else {
4732 # If it failed fewer times on another host:
4733 # Find another job to run
4734 my $nextjob;
4735 if(not $Global::JobQueue->empty()) {
4736 # This can potentially recurse for all args
4737 no warnings 'recursion';
4738 $nextjob = get_job_with_sshlogin($sshlogin);
4740 # Push the command back on the queue
4741 $Global::JobQueue->unget($job);
4742 return $nextjob;
4745 return $job;
4749 sub __REMOTE_SSH__() {}
4752 sub read_sshloginfiles(@) {
4753 # Read a list of --slf's
4754 # Input:
4755 # @files = files or symbolic file names to read
4756 # Returns: N/A
4757 for my $s (@_) {
4758 read_sshloginfile(expand_slf_shorthand($s));
4762 sub expand_slf_shorthand($) {
4763 # Expand --slf shorthand into a read file name
4764 # Input:
4765 # $file = file or symbolic file name to read
4766 # Returns:
4767 # $file = actual file name to read
4768 my $file = shift;
4769 if($file eq "-") {
4770 # skip: It is stdin
4771 } elsif($file eq "..") {
4772 $file = $Global::config_dir."/sshloginfile";
4773 } elsif($file eq ".") {
4774 $file = "/etc/parallel/sshloginfile";
4775 } elsif(not -r $file) {
4776 for(@Global::config_dirs) {
4777 if(not -r $_."/".$file) {
4778 # Try prepending $PARALLEL_HOME
4779 ::error("Cannot open $file.");
4780 ::wait_and_exit(255);
4781 } else {
4782 $file = $_."/".$file;
4783 last;
4787 return $file;
4790 sub read_sshloginfile($) {
4791 # Read sshloginfile into @Global::sshlogin
4792 # Input:
4793 # $file = file to read
4794 # Uses:
4795 # @Global::sshlogin
4796 # Returns: N/A
4797 local $/ = "\n";
4798 my $file = shift;
4799 my $close = 1;
4800 my $in_fh;
4801 ::debug("init","--slf ",$file);
4802 if($file eq "-") {
4803 $in_fh = *STDIN;
4804 $close = 0;
4805 } else {
4806 $in_fh = open_or_exit("<", $file);
4808 while(<$in_fh>) {
4809 chomp;
4810 /^\s*#/ and next;
4811 /^\s*$/ and next;
4812 push @Global::sshlogin, $_;
4814 if($close) {
4815 close $in_fh;
4819 sub parse_sshlogin() {
4820 # Parse @Global::sshlogin into %Global::host.
4821 # Keep only hosts that are in one of the given ssh hostgroups.
4822 # Uses:
4823 # @Global::sshlogin
4824 # $Global::minimal_command_line_length
4825 # %Global::host
4826 # $opt::transfer
4827 # @opt::return
4828 # $opt::cleanup
4829 # @opt::basefile
4830 # @opt::trc
4831 # Returns: N/A
4832 sub expand_range($) {
4833 # Expand host[9-11,15]a[09-11]b
4834 # [9-11,15] => 9 10 11 15
4835 # [09-11] => 09 10 11
4836 my ($in) = @_;
4837 my ($prefix, $range, $suffix);
4838 if(($prefix, $range, $suffix) = $in =~ /^(.*?)\[([-0-9,]*)\](.*)$/) {
4839 my @res;
4840 while(length $range) {
4841 if($range =~ s/^,//) {
4842 # skip
4843 } elsif($range =~ s/^(\d+)-(\d+)//) {
4844 my ($start, $end) = ($1, $2);
4845 push @res, map { $prefix . $_ . $suffix } $start..$end;
4846 } elsif($range =~ s/^(\d+)//) {
4847 push @res, map { $prefix . $_ . $suffix } $1;
4848 } else {
4849 die "Cannot parse $in (at $range)";
4852 return map { expand_range($_) } @res;
4853 } else {
4854 return $in;
4857 my @login;
4858 if(not @Global::sshlogin) { @Global::sshlogin = (":"); }
4859 for my $sshlogin (@Global::sshlogin) {
4860 # Split up -S sshlogin,sshlogin
4861 # Parse ,, and \, as , but do not split on that
4862 # -S "ssh -J jump1,,jump2 host1,host2" =>
4863 # ssh -J jump1,jump2 host1
4864 # host2
4865 # Protect \, and ,, as \0
4866 $sshlogin =~ s/\\,|,,/\0/g;
4867 # Protect , in ranges: [___,___] => [___\0___]
4868 while($sshlogin =~ s/(\[[-0-9\0]*),(.*\])/$1\0$2/g) {}
4869 for my $s (split /,|\n/, $sshlogin) {
4870 # Replace \0 => ,
4871 $s =~ s/\0/,/g;
4872 if ($s eq ".." or $s eq "-") {
4873 # This may add to @Global::sshlogin - possibly bug
4874 read_sshloginfile(expand_slf_shorthand($s));
4875 } else {
4876 $s =~ s/\s*$//;
4877 # Expand host[1-12,15]a[01-10]b
4878 push @login, expand_range($s);
4882 $Global::minimal_command_line_length = 100_000_000;
4883 my @allowed_hostgroups;
4884 for my $ncpu_sshlogin_string (::uniq(@login)) {
4885 my $sshlogin = SSHLogin->new($ncpu_sshlogin_string);
4886 my $sshlogin_string = $sshlogin->string();
4887 if($sshlogin_string eq "") {
4888 # This is an ssh group: -S @webservers
4889 push @allowed_hostgroups, $sshlogin->hostgroups();
4890 next;
4892 if($Global::host{$sshlogin_string}) {
4893 # This sshlogin has already been added:
4894 # It is probably a host that has come back
4895 # Set the max_jobs_running back to the original
4896 debug("run","Already seen $sshlogin_string\n");
4897 if($sshlogin->{'ncpus'}) {
4898 # If ncpus set by '#/' of the sshlogin, overwrite it:
4899 $Global::host{$sshlogin_string}->set_ncpus($sshlogin->ncpus());
4901 $Global::host{$sshlogin_string}->set_max_jobs_running(undef);
4902 next;
4904 $sshlogin->set_maxlength(Limits::Command::max_length());
4906 $Global::minimal_command_line_length =
4907 ::min($Global::minimal_command_line_length, $sshlogin->maxlength());
4908 $Global::host{$sshlogin_string} = $sshlogin;
4910 $Global::usable_command_line_length =
4911 # Usable len = maxlen - 3000 for wrapping, div 2 for hexing
4912 int(($Global::minimal_command_line_length - 3000)/2);
4913 if($opt::max_chars) {
4914 if($opt::max_chars <= $Global::usable_command_line_length) {
4915 $Global::usable_command_line_length = $opt::max_chars;
4916 } else {
4917 ::warning("Value for option -s should be < ".
4918 $Global::usable_command_line_length.".");
4921 if(@allowed_hostgroups) {
4922 # Remove hosts that are not in these groups
4923 while (my ($string, $sshlogin) = each %Global::host) {
4924 if(not $sshlogin->in_hostgroups(@allowed_hostgroups)) {
4925 delete $Global::host{$string};
4930 # debug("start", "sshlogin: ", my_dump(%Global::host),"\n");
4931 if(@Global::transfer_files or @opt::return
4932 or $opt::cleanup or @opt::basefile) {
4933 if(not remote_hosts()) {
4934 # There are no remote hosts
4935 if(@opt::trc) {
4936 ::warning("--trc ignored as there are no remote --sshlogin.");
4937 } elsif (defined $opt::transfer) {
4938 ::warning("--transfer ignored as there are ".
4939 "no remote --sshlogin.");
4940 } elsif (@opt::transfer_files) {
4941 ::warning("--transferfile ignored as there ".
4942 "are no remote --sshlogin.");
4943 } elsif (@opt::return) {
4944 ::warning("--return ignored as there are no remote --sshlogin.");
4945 } elsif (defined $opt::cleanup and not %opt::template) {
4946 ::warning("--cleanup ignored as there ".
4947 "are no remote --sshlogin.");
4948 } elsif (@opt::basefile) {
4949 ::warning("--basefile ignored as there ".
4950 "are no remote --sshlogin.");
4956 sub remote_hosts() {
4957 # Return sshlogins that are not ':'
4958 # Uses:
4959 # %Global::host
4960 # Returns:
4961 # list of sshlogins with ':' removed
4962 return grep !/^:$/, keys %Global::host;
4965 sub setup_basefile() {
4966 # Transfer basefiles to each $sshlogin
4967 # This needs to be done before first jobs on $sshlogin is run
4968 # Uses:
4969 # %Global::host
4970 # @opt::basefile
4971 # Returns: N/A
4972 my @cmd;
4973 my $rsync_destdir;
4974 my $workdir;
4975 for my $sshlogin (values %Global::host) {
4976 if($sshlogin->local()) { next }
4977 for my $file (@opt::basefile) {
4978 if($file !~ m:^/: and $opt::workdir eq "...") {
4979 ::error("Work dir '...' will not work with relative basefiles.");
4980 ::wait_and_exit(255);
4982 if(not $workdir) {
4983 my $dummycmdline =
4984 CommandLine->new(1,["true"],{},0,0,[],[],[],[],{},{});
4985 my $dummyjob = Job->new($dummycmdline);
4986 $workdir = $dummyjob->workdir();
4988 push @cmd, $sshlogin->rsync_transfer_cmd($file,$workdir);
4991 debug("init", "basesetup: @cmd\n");
4992 my ($exitstatus,$stdout_ref,$stderr_ref) =
4993 run_gnu_parallel((join "\n",@cmd),"-j0","--retries",5);
4994 if($exitstatus) {
4995 my @stdout = @$stdout_ref;
4996 my @stderr = @$stderr_ref;
4997 ::error("Copying of --basefile failed: @stdout@stderr");
4998 ::wait_and_exit(255);
5002 sub cleanup_basefile() {
5003 # Remove the basefiles transferred
5004 # Uses:
5005 # %Global::host
5006 # @opt::basefile
5007 # Returns: N/A
5008 my @cmd;
5009 my $workdir;
5010 if(not $workdir) {
5011 my $dummycmdline = CommandLine->new(1,["true"],{},0,0,[],[],[],[],{},{});
5012 my $dummyjob = Job->new($dummycmdline);
5013 $workdir = $dummyjob->workdir();
5015 for my $sshlogin (values %Global::host) {
5016 if($sshlogin->local()) { next }
5017 for my $file (@opt::basefile) {
5018 push @cmd, $sshlogin->cleanup_cmd($file,$workdir);
5021 debug("init", "basecleanup: @cmd\n");
5022 my ($exitstatus,$stdout_ref,$stderr_ref) =
5023 run_gnu_parallel(join("\n",@cmd),"-j0","--retries",5);
5024 if($exitstatus) {
5025 my @stdout = @$stdout_ref;
5026 my @stderr = @$stderr_ref;
5027 ::error("Cleanup of --basefile failed: @stdout@stderr");
5028 ::wait_and_exit(255);
5032 sub run_gnu_parallel() {
5033 my ($stdin,@args) = @_;
5034 my $cmd = join "",map { " $_ & " } split /\n/, $stdin;
5035 print $Global::original_stderr ` $cmd wait` ;
5036 return 0
5039 sub _run_gnu_parallel() {
5040 # Run GNU Parallel
5041 # This should ideally just fork an internal copy
5042 # and not start it through a shell
5043 # Input:
5044 # $stdin = data to provide on stdin for GNU Parallel
5045 # @args = command line arguments
5046 # Returns:
5047 # $exitstatus = exitcode of GNU Parallel run
5048 # \@stdout = standard output
5049 # \@stderr = standard error
5050 my ($stdin,@args) = @_;
5051 my ($exitstatus,@stdout,@stderr);
5052 my ($stdin_fh,$stdout_fh)=(gensym(),gensym());
5053 my ($stderr_fh, $stderrname) = ::tmpfile(SUFFIX => ".par");
5054 unlink $stderrname;
5056 my $pid = ::open3($stdin_fh,$stdout_fh,$stderr_fh,
5057 $0,qw(--plain --shell /bin/sh --will-cite), @args);
5058 if(my $writerpid = fork()) {
5059 close $stdin_fh;
5060 @stdout = <$stdout_fh>;
5061 # Now stdout is closed:
5062 # These pids should be dead or die very soon
5063 while(kill 0, $writerpid) { ::usleep(1); }
5064 die;
5065 # reap $writerpid;
5066 # while(kill 0, $pid) { ::usleep(1); }
5067 # reap $writerpid;
5068 $exitstatus = $?;
5069 seek $stderr_fh, 0, 0;
5070 @stderr = <$stderr_fh>;
5071 close $stdout_fh;
5072 close $stderr_fh;
5073 } else {
5074 close $stdout_fh;
5075 close $stderr_fh;
5076 print $stdin_fh $stdin;
5077 close $stdin_fh;
5078 exit(0);
5080 return ($exitstatus,\@stdout,\@stderr);
5083 sub filter_hosts() {
5084 # Remove down --sshlogins from active duty.
5085 # Find ncpus, ncores, maxlen, time-to-login for each host.
5086 # Uses:
5087 # %Global::host
5088 # $Global::minimal_command_line_length
5089 # $opt::use_sockets_instead_of_threads
5090 # $opt::use_cores_instead_of_threads
5091 # $opt::use_cpus_instead_of_cores
5092 # Returns: N/A
5094 my ($nsockets_ref,$ncores_ref, $nthreads_ref, $time_to_login_ref,
5095 $maxlen_ref, $echo_ref, $down_hosts_ref) =
5096 parse_host_filtering(parallelized_host_filtering());
5098 delete @Global::host{@$down_hosts_ref};
5099 @$down_hosts_ref and ::warning("Removed @$down_hosts_ref.");
5101 $Global::minimal_command_line_length = 100_000_000;
5102 while (my ($string, $sshlogin) = each %Global::host) {
5103 if($sshlogin->local()) { next }
5104 my ($nsockets,$ncores,$nthreads,$time_to_login,$maxlen) =
5105 ($nsockets_ref->{$string},$ncores_ref->{$string},
5106 $nthreads_ref->{$string},$time_to_login_ref->{$string},
5107 $maxlen_ref->{$string});
5108 defined $nsockets or ::die_bug("nsockets missing: $string");
5109 defined $ncores or ::die_bug("ncores missing: $string");
5110 defined $nthreads or ::die_bug("nthreads missing: $string");
5111 defined $time_to_login or ::die_bug("time_to_login missing: $string");
5112 defined $maxlen or ::die_bug("maxlen missing: $string");
5113 # ncpus may be set by 4/hostname or may be undefined yet
5114 my $ncpus = $sshlogin->{'ncpus'};
5115 # $nthreads may be 0 if GNU Parallel is not installed remotely
5116 $ncpus = $nthreads || $ncpus || $sshlogin->ncpus();
5117 if($opt::use_cpus_instead_of_cores) {
5118 $ncpus = $ncores || $ncpus;
5119 } elsif($opt::use_sockets_instead_of_threads) {
5120 $ncpus = $nsockets || $ncpus;
5121 } elsif($opt::use_cores_instead_of_threads) {
5122 $ncpus = $ncores || $ncpus;
5124 $sshlogin->set_ncpus($ncpus);
5125 $sshlogin->set_time_to_login($time_to_login);
5126 $maxlen = $maxlen || Limits::Command::max_length();
5127 $sshlogin->set_maxlength($maxlen);
5128 ::debug("init", "Timing from -S:$string ",
5129 " ncpus:", $ncpus,
5130 " nsockets:",$nsockets,
5131 " ncores:", $ncores,
5132 " nthreads:",$nthreads,
5133 " time_to_login:", $time_to_login,
5134 " maxlen:", $maxlen,
5135 " min_max_len:", $Global::minimal_command_line_length,"\n");
5139 sub parse_host_filtering() {
5140 # Input:
5141 # @lines = output from parallelized_host_filtering()
5142 # Returns:
5143 # \%nsockets = number of sockets of {host}
5144 # \%ncores = number of cores of {host}
5145 # \%nthreads = number of hyperthreaded cores of {host}
5146 # \%time_to_login = time_to_login on {host}
5147 # \%maxlen = max command len on {host}
5148 # \%echo = echo received from {host}
5149 # \@down_hosts = list of hosts with no answer
5150 local $/ = "\n";
5151 my (%nsockets, %ncores, %nthreads, %time_to_login, %maxlen, %echo,
5152 @down_hosts);
5153 for (@_) {
5154 ::debug("init","Read: ",$_);
5155 chomp;
5156 my @col = split /\t/, $_;
5157 if($col[0] =~ /^parallel: Warning:/) {
5158 # Timed out job: Ignore it
5159 next;
5160 } elsif(defined $col[6]) {
5161 # This is a line from --joblog
5162 # seq host time spent sent received exit signal command
5163 # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores
5164 if($col[0] eq "Seq" and $col[1] eq "Host" and
5165 $col[2] eq "Starttime") {
5166 # Header => skip
5167 next;
5169 # Get server from: eval true server\;
5170 $col[8] =~ /eval .?true.?\s([^\;]+);/ or
5171 ::die_bug("col8 does not contain host: $col[8] in $_");
5172 my $host = $1;
5173 $host =~ tr/\\//d;
5174 $Global::host{$host} or next;
5175 if($col[6] eq "255" or $col[6] eq "-1" or $col[6] eq "1") {
5176 # exit == 255 or exit == timeout (-1): ssh failed/timedout
5177 # exit == 1: lsh failed
5178 # Remove sshlogin
5179 ::debug("init", "--filtered $host\n");
5180 push(@down_hosts, $host);
5181 } elsif($col[6] eq "127") {
5182 # signal == 127: parallel not installed remote
5183 # Set nsockets, ncores, nthreads = 1
5184 ::warning("Could not figure out ".
5185 "number of cpus on $host. Using 1.");
5186 $nsockets{$host} = 1;
5187 $ncores{$host} = 1;
5188 $nthreads{$host} = 1;
5189 $maxlen{$host} = Limits::Command::max_length();
5190 } elsif($col[0] =~ /^\d+$/ and $Global::host{$host}) {
5191 # Remember how log it took to log in
5192 # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ echo
5193 $time_to_login{$host} = ::min($time_to_login{$host},$col[3]);
5194 } else {
5195 ::die_bug("host check unmatched long jobline: $_");
5197 } elsif($Global::host{$col[0]}) {
5198 # This output from --number-of-cores, --number-of-cpus,
5199 # --max-line-length-allowed
5200 # ncores: server 8
5201 # ncpus: server 2
5202 # maxlen: server 131071
5203 if(/parallel: Warning: Cannot figure out number of/) {
5204 next;
5206 if(/\t(perl: warning:|LANGUAGE =|LC_ALL =|LANG =|are supported and installed|Disconnected from|Received disconnect from)/
5208 /\tWarning: /
5210 /\t(Host key fingerprint is|\+-.*-\+|\|.*\|)/
5212 /\t\S+: Undefined variable./
5214 # Skip these (from perl):
5215 # perl: warning: Setting locale failed.
5216 # perl: warning: Please check that your locale settings:
5217 # LANGUAGE = (unset),
5218 # LC_ALL = (unset),
5219 # LANG = "en_US.UTF-8"
5220 # are supported and installed on your system.
5221 # perl: warning: Falling back to the standard locale ("C").
5222 # Disconnected from 127.0.0.1 port 22
5224 # Skip these (from ssh):
5225 # Warning: Permanently added * to the list of known hosts.
5226 # Warning: Identity file * not accessible: *
5227 # (VisualHostKey=yes)
5228 # Host key fingerprint is SHA256:...
5229 # +--[ED25519 256]--+
5230 # | o |
5231 # +----[SHA256]-----+
5233 # Skip these (from csh):
5234 # MANPATH: Undefined variable.
5235 } elsif(not defined $nsockets{$col[0]}) {
5236 $nsockets{$col[0]} = $col[1];
5237 } elsif(not defined $ncores{$col[0]}) {
5238 $ncores{$col[0]} = $col[1];
5239 } elsif(not defined $nthreads{$col[0]}) {
5240 $nthreads{$col[0]} = $col[1];
5241 } elsif(not defined $maxlen{$col[0]}) {
5242 $maxlen{$col[0]} = $col[1];
5243 } elsif(not defined $echo{$col[0]}) {
5244 $echo{$col[0]} = $col[1];
5245 } else {
5246 ::die_bug("host check too many col0: $_");
5248 } else {
5249 ::die_bug("host check unmatched short jobline ($col[0]): $_");
5252 @down_hosts = uniq(@down_hosts);
5253 return(\%nsockets, \%ncores, \%nthreads, \%time_to_login,
5254 \%maxlen, \%echo, \@down_hosts);
5257 sub parallelized_host_filtering() {
5258 # Uses:
5259 # %Global::host
5260 # Returns:
5261 # text entries with:
5262 # * joblog line
5263 # * hostname \t number of cores
5264 # * hostname \t number of cpus
5265 # * hostname \t max-line-length-allowed
5266 # * hostname \t empty
5268 sub sshwrapped {
5269 # Wrap with ssh and --env
5270 # Return $default_value if command fails
5271 my $sshlogin = shift;
5272 my $command = shift;
5273 # wrapper that returns output "0\n" if the command fails
5274 # E.g. parallel not installed => "0\n"
5275 my $wcmd = q(perl -e '$a=`).$command.q(`; print $? ? "0".v010 : $a');
5276 my $commandline = CommandLine->new(1,[$wcmd],{},0,0,[],[],[],[],{},{});
5277 my $job = Job->new($commandline);
5278 $job->set_sshlogin($sshlogin);
5279 $job->wrapped();
5280 return($job->{'wrapped'});
5283 my(@sockets, @cores, @threads, @maxline, @echo);
5284 while (my ($host, $sshlogin) = each %Global::host) {
5285 if($host eq ":") { next }
5286 # The 'true' is used to get the $host out later
5287 push(@sockets, $host."\t"."true $host; ".
5288 sshwrapped($sshlogin,"parallel --number-of-sockets")."\n\0");
5289 push(@cores, $host."\t"."true $host; ".
5290 sshwrapped($sshlogin,"parallel --number-of-cores")."\n\0");
5291 push(@threads, $host."\t"."true $host; ".
5292 sshwrapped($sshlogin,"parallel --number-of-threads")."\n\0");
5293 push(@maxline, $host."\t"."true $host; ".
5294 sshwrapped($sshlogin,
5295 "parallel --max-line-length-allowed")."\n\0");
5296 # 'echo' is used to get the fastest possible ssh login time
5297 push(@echo, $host."\t"."true $host; ".
5298 $sshlogin->wrap("echo $host")."\n\0");
5300 # --timeout 10: Setting up an SSH connection and running a simple
5301 # command should never take > 10 sec.
5302 # --delay 0.1: If multiple sshlogins use the same proxy the delay
5303 # will make it less likely to overload the ssh daemon.
5304 # --retries 3: If the ssh daemon is overloaded, try 3 times
5305 my $cmd =
5306 "$0 -j0 --timeout 10 --joblog - --plain --delay 0.1 --retries 3 ".
5307 "--tag --tagstring '{1}' -0 --colsep '\t' -k eval '{2}' && true ";
5308 $cmd = $Global::shell." -c ".Q($cmd);
5309 ::debug("init", $cmd, "\n");
5310 my @out;
5311 my $prepend = "";
5313 my ($host_fh,$in,$err);
5314 open3($in, $host_fh, $err, $cmd) || ::die_bug("parallel host check: $cmd");
5315 ::debug("init", map { $_,"\n" } @sockets, @cores, @threads, @maxline, @echo);
5317 if(not fork()) {
5318 # Give the commands to run to the $cmd
5319 close $host_fh;
5320 print $in @sockets, @cores, @threads, @maxline, @echo;
5321 close $in;
5322 exit();
5324 close $in;
5325 # If -0: $/ must be \n
5326 local $/ = "\n";
5327 for(<$host_fh>) {
5328 # TODO incompatible with '-quoting. Needs to be fixed differently
5329 #if(/\'$/) {
5330 # # if last char = ' then append next line
5331 # # This may be due to quoting of \n in environment var
5332 # $prepend .= $_;
5333 # next;
5335 $_ = $prepend . $_;
5336 $prepend = "";
5337 push @out, $_;
5339 close $host_fh;
5340 return @out;
5343 sub onall($@) {
5344 # Runs @command on all hosts.
5345 # Uses parallel to run @command on each host.
5346 # --jobs = number of hosts to run on simultaneously.
5347 # For each host a parallel command with the args will be running.
5348 # Uses:
5349 # $Global::debug
5350 # $Global::exitstatus
5351 # $Global::joblog
5352 # $Global::quoting
5353 # $opt::D
5354 # $opt::arg_file_sep
5355 # $opt::arg_sep
5356 # $opt::colsep
5357 # $opt::files
5358 # $opt::files0
5359 # $opt::group
5360 # $opt::joblog
5361 # $opt::jobs
5362 # $opt::keeporder
5363 # $opt::linebuffer
5364 # $opt::max_chars
5365 # $opt::plain
5366 # $opt::retries
5367 # $opt::tag
5368 # $opt::tee
5369 # $opt::timeout
5370 # $opt::ungroup
5371 # %Global::host
5372 # @opt::basefile
5373 # @opt::env
5374 # @opt::v
5375 # Input:
5376 # @command = command to run on all hosts
5377 # Returns: N/A
5378 sub tmp_joblog {
5379 # Input:
5380 # $joblog = filename of joblog - undef if none
5381 # Returns:
5382 # $tmpfile = temp file for joblog - undef if none
5383 my $joblog = shift;
5384 if(not defined $joblog) {
5385 return undef;
5387 my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".log");
5388 close $fh;
5389 return $tmpfile;
5391 my ($input_source_fh_ref,@command) = @_;
5392 if($Global::quoting) {
5393 @command = shell_quote(@command);
5396 # Copy all @input_source_fh (-a and :::) into tempfiles
5397 my @argfiles = ();
5398 for my $fh (@$input_source_fh_ref) {
5399 my ($outfh, $name) = ::tmpfile(SUFFIX => ".all", UNLINK => not $opt::D);
5400 print $outfh (<$fh>);
5401 close $outfh;
5402 push @argfiles, $name;
5404 if(@opt::basefile) { setup_basefile(); }
5405 # for each sshlogin do:
5406 # parallel -S $sshlogin $command :::: @argfiles
5408 # Pass some of the options to the sub-parallels, not all of them as
5409 # -P should only go to the first, and -S should not be copied at all.
5410 my $options =
5411 join(" ",
5412 ((defined $opt::sshdelay) ? "--delay ".$opt::sshdelay : ""),
5413 ((defined $opt::memfree) ? "--memfree ".$opt::memfree : ""),
5414 ((defined $opt::memsuspend) ? "--memfree ".$opt::memsuspend : ""),
5415 ((defined $opt::D) ? "-D $opt::D" : ""),
5416 ((defined $opt::group) ? "--group" : ""),
5417 ((defined $opt::jobs) ? "-P $opt::jobs" : ""),
5418 ((defined $opt::keeporder) ? "--keeporder" : ""),
5419 ((defined $opt::linebuffer) ? "--linebuffer" : ""),
5420 ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""),
5421 ((defined $opt::plain) ? "--plain" : ""),
5422 (($opt::ungroup == 1) ? "-u" : ""),
5423 ((defined $opt::tee) ? "--tee" : ""),
5425 my $suboptions =
5426 join(" ",
5427 ((defined $opt::sshdelay) ? "--delay ".$opt::sshdelay : ""),
5428 ((defined $opt::D) ? "-D $opt::D" : ""),
5429 ((defined $opt::arg_file_sep) ? "--arg-file-sep ".$opt::arg_file_sep : ""),
5430 ((defined $opt::arg_sep) ? "--arg-sep ".$opt::arg_sep : ""),
5431 ((defined $opt::colsep) ? "--colsep ".shell_quote($opt::colsep) : ""),
5432 ((defined $opt::files) ? "--files" : ""),
5433 ((defined $opt::files0) ? "--files0" : ""),
5434 ((defined $opt::group) ? "--group" : ""),
5435 ((defined $opt::cleanup) ? "--cleanup" : ""),
5436 ((defined $opt::keeporder) ? "--keeporder" : ""),
5437 ((defined $opt::linebuffer) ? "--linebuffer" : ""),
5438 ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""),
5439 ((defined $opt::plain) ? "--plain" : ""),
5440 ((defined $opt::plus) ? "--plus" : ""),
5441 ((defined $opt::retries) ? "--retries ".$opt::retries : ""),
5442 ((defined $opt::timeout) ? "--timeout ".$opt::timeout : ""),
5443 (($opt::ungroup == 1) ? "-u" : ""),
5444 ((defined $opt::ssh) ? "--ssh '".$opt::ssh."'" : ""),
5445 ((defined $opt::tee) ? "--tee" : ""),
5446 ((defined $opt::workdir) ? "--wd ".Q($opt::workdir) : ""),
5447 (@Global::transfer_files ? map { "--tf ".Q($_) }
5448 @Global::transfer_files : ""),
5449 (@Global::ret_files ? map { "--return ".Q($_) }
5450 @Global::ret_files : ""),
5451 (@opt::env ? map { "--env ".Q($_) } @opt::env : ""),
5452 (map { "-v" } @opt::v),
5454 ::debug("init", "| $0 $options\n");
5455 open(my $parallel_fh, "|-", "$0 -0 --will-cite -j0 $options") ||
5456 ::die_bug("This does not run GNU Parallel: $0 $options");
5457 my @joblogs;
5458 for my $host (sort keys %Global::host) {
5459 my $sshlogin = $Global::host{$host};
5460 my $qsshlogin = Q($sshlogin->string());
5461 my $qsshloginpw = Q($sshlogin->pwstring());
5462 if($qsshloginpw ne $qsshlogin) {
5463 ::warning_once("Using password or SSHPASS with --(n)onall ".
5464 "exposes the password",
5465 "on the command line, ".
5466 "making it visible to local users via `ps`.");
5468 my $joblog = tmp_joblog($opt::joblog);
5469 if($joblog) {
5470 push @joblogs, $joblog;
5471 $joblog = "--joblog ".::Q($joblog);
5473 my $quad = $opt::arg_file_sep || "::::";
5474 # If PARALLEL_ENV is set: Pass it on
5475 my $penv=$Global::parallel_env ?
5476 "PARALLEL_ENV=".Q($Global::parallel_env) : '';
5477 my $results;
5478 if(defined $opt::results) {
5479 $results = Q($opt::results) . $qsshlogin;
5481 ::debug("init", "$penv $0 $suboptions -j1 $joblog ",
5482 ((defined $opt::tag) ? "--tagstring ".$qsshlogin : ""),
5483 ((defined $opt::results) ? "--results ".$results : ""),
5484 " -S $qsshloginpw ",
5485 join(" ",shell_quote(@command,$quad,@argfiles)),"\n");
5486 print $parallel_fh "$penv $0 $suboptions -j1 $joblog ",
5487 ((defined $opt::tag) ? "--tagstring ".$qsshlogin : ""),
5488 ((defined $opt::results) ? "--results ".$results : ""),
5489 " -S $qsshloginpw ",
5490 join(" ",shell_quote(@command,$quad,@argfiles)),"\0";
5492 close $parallel_fh;
5493 $Global::exitstatus = $? >> 8;
5494 debug("init", "--onall exitvalue ", $?);
5495 if(@opt::basefile and $opt::cleanup) { cleanup_basefile(); }
5496 $Global::debug or unlink(@argfiles);
5497 my %seen;
5498 for my $joblog (@joblogs) {
5499 # Append to $joblog
5500 my $fh = open_or_exit("<", $joblog);
5501 # Skip first line (header);
5502 <$fh>;
5503 print $Global::joblog (<$fh>);
5504 close $fh;
5505 unlink($joblog);
5510 sub __SIGNAL_HANDLING__() {}
5513 sub sigtstp() {
5514 # Send TSTP signal (Ctrl-Z) to all children process groups
5515 # Uses:
5516 # %SIG
5517 # Returns: N/A
5518 signal_children("TSTP");
5521 sub sigpipe() {
5522 # Send SIGPIPE signal to all children process groups
5523 # Uses:
5524 # %SIG
5525 # Returns: N/A
5526 signal_children("PIPE");
5529 sub signal_children() {
5530 # Send signal to all children process groups
5531 # and GNU Parallel itself
5532 # Uses:
5533 # %SIG
5534 # Returns: N/A
5535 my $signal = shift;
5536 debug("run", "Sending $signal ");
5537 kill $signal, map { -$_ } keys %Global::running;
5538 # Use default signal handler for GNU Parallel itself
5539 $SIG{$signal} = undef;
5540 kill $signal, $$;
5543 sub save_original_signal_handler() {
5544 # Remember the original signal handler
5545 # Uses:
5546 # %Global::original_sig
5547 # Returns: N/A
5548 $SIG{INT} = sub {
5549 if($opt::tmux) { ::qqx("tmux kill-session -t p$$"); }
5550 wait_and_exit(255);
5552 $SIG{TERM} = sub {
5553 if($opt::tmux) { ::qqx("tmux kill-session -t p$$"); }
5554 wait_and_exit(255);
5556 %Global::original_sig = %SIG;
5557 $SIG{TERM} = sub {}; # Dummy until jobs really start
5558 $SIG{ALRM} = 'IGNORE';
5559 # Allow Ctrl-Z to suspend and `fg` to continue
5560 $SIG{TSTP} = \&sigtstp;
5561 $SIG{PIPE} = \&sigpipe;
5562 $SIG{CONT} = sub {
5563 # Set $SIG{TSTP} again (it is undef'ed in sigtstp() )
5564 $SIG{TSTP} = \&sigtstp;
5565 for my $job (values %Global::running) {
5566 if($job->suspended()) {
5567 # Force jobs to suspend, if they are marked as suspended.
5568 # --memsupspend can suspend a job that will be resumed
5569 # if the user presses CTRL-Z followed by `fg`.
5570 $job->suspend();
5571 } else {
5572 # Resume the rest of the jobs
5573 $job->resume();
5579 sub list_running_jobs() {
5580 # Print running jobs on tty
5581 # Uses:
5582 # %Global::running
5583 # Returns: N/A
5584 for my $job (values %Global::running) {
5585 ::status("$Global::progname: ".$job->replaced());
5589 sub start_no_new_jobs() {
5590 # Start no more jobs
5591 # Uses:
5592 # %Global::original_sig
5593 # %Global::unlink
5594 # $Global::start_no_new_jobs
5595 # Returns: N/A
5596 unlink keys %Global::unlink;
5597 ::status
5598 ("$Global::progname: SIGHUP received. No new jobs will be started.",
5599 "$Global::progname: Waiting for these ".(keys %Global::running).
5600 " jobs to finish. Send SIGTERM to stop now.");
5601 list_running_jobs();
5602 $Global::start_no_new_jobs ||= 1;
5605 sub reapers() {
5606 # Run reaper until there are no more left
5607 # Returns:
5608 # @pids_reaped = pids of reaped processes
5609 my @pids_reaped;
5610 my $pid;
5611 while($pid = reaper()) {
5612 push @pids_reaped, $pid;
5614 return @pids_reaped;
5617 sub reaper() {
5618 # A job finished:
5619 # * Set exitstatus, exitsignal, endtime.
5620 # * Free ressources for new job
5621 # * Update median runtime
5622 # * Print output
5623 # * If --halt = now: Kill children
5624 # * Print progress
5625 # Uses:
5626 # %Global::running
5627 # $opt::timeout
5628 # $Global::timeoutq
5629 # $opt::keeporder
5630 # $Global::total_running
5631 # Returns:
5632 # $stiff = PID of child finished
5633 my $stiff;
5634 debug("run", "Reaper ");
5635 if(($stiff = waitpid(-1, &WNOHANG)) <= 0) {
5636 # No jobs waiting to be reaped
5637 return 0;
5640 # $stiff = pid of dead process
5641 my $job = $Global::running{$stiff};
5643 # '-a <(seq 10)' will give us a pid not in %Global::running
5644 # The same will one of the ssh -M: ignore
5645 $job or return 0;
5646 delete $Global::running{$stiff};
5647 $Global::total_running--;
5648 if($job->{'commandline'}{'skip'}) {
5649 # $job->skip() was called
5650 $job->set_exitstatus(-2);
5651 $job->set_exitsignal(0);
5652 } else {
5653 $job->set_exitsignal($? & 127);
5654 if($job->exitstatus()) {
5655 # Exit status already set - probably by --timeout
5656 } elsif($? & 127) {
5657 # Killed by signal. Many shells return: 128 | $signal
5658 $job->set_exitstatus(128 | $?);
5659 } else {
5660 # Normal exit
5661 $job->set_exitstatus($? >> 8);
5665 debug("run", "\nseq ",$job->seq()," died (", $job->exitstatus(), ")");
5666 if($Global::delayauto or $Global::sshdelayauto) {
5667 if($job->exitstatus()) {
5668 # Job failed: Increase delay (if $opt::(ssh)delay set)
5669 $opt::delay &&= $opt::delay * 1.3;
5670 $opt::sshdelay &&= $opt::sshdelay * 1.3;
5671 } else {
5672 # Job succeeded: Decrease delay (if $opt::(ssh)delay set)
5673 $opt::delay &&= $opt::delay * 0.9;
5674 $opt::sshdelay &&= $opt::sshdelay * 0.9;
5676 debug("run", "delay:$opt::delay ssh:$opt::sshdelay ");
5678 $job->set_endtime(::now());
5679 my $sshlogin = $job->sshlogin();
5680 $sshlogin->dec_jobs_running();
5681 if($job->should_be_retried()) {
5682 # Free up file handles
5683 $job->free_ressources();
5684 } else {
5685 # The job is done
5686 $sshlogin->inc_jobs_completed();
5687 # Free the jobslot
5688 $job->free_slot();
5689 if($opt::timeout and not $job->exitstatus()) {
5690 # Update average runtime for timeout only for successful jobs
5691 $Global::timeoutq->update_median_runtime($job->runtime());
5693 if($opt::keeporder and not $opt::latestline) {
5694 # --latestline fixes --keeporder in Job::row()
5695 $job->print_earlier_jobs();
5696 } else {
5697 $job->print();
5699 if($job->should_we_halt() eq "now") {
5700 # Kill children
5701 ::kill_sleep_seq($job->pid());
5702 ::killall();
5703 ::wait_and_exit($Global::halt_exitstatus);
5706 $job->cleanup();
5708 if($opt::progress) {
5709 my $progress = progress();
5710 ::status_no_nl("\r",$progress->{'status'});
5713 debug("run", "jobdone \n");
5714 return $stiff;
5718 sub __USAGE__() {}
5721 sub killall() {
5722 # Kill all jobs by killing their process groups
5723 # Uses:
5724 # $Global::start_no_new_jobs = we are stopping
5725 # $Global::killall = Flag to not run reaper
5726 $Global::start_no_new_jobs ||= 1;
5727 # Do not reap killed children: Ignore them instead
5728 $Global::killall ||= 1;
5729 kill_sleep_seq(keys %Global::running);
5732 sub kill_sleep_seq(@) {
5733 # Send jobs TERM,TERM,KILL to processgroups
5734 # Input:
5735 # @pids = list of pids that are also processgroups
5736 # Killing can be slow if you follow @term_seq
5737 # So run the killing in parallel
5738 if(not fork()) {
5739 # Convert pids to process groups ($processgroup = -$pid)
5740 my @pgrps = map { -$_ } @_;
5741 my @term_seq = split/,/,$opt::termseq;
5742 if(not @term_seq) {
5743 @term_seq = ("TERM",200,"TERM",100,"TERM",50,"KILL",25);
5745 # for each signal+waittime: kill process groups still not dead
5746 while(@term_seq) {
5747 @pgrps = kill_sleep(shift @term_seq, shift @term_seq, @pgrps);
5749 exit(0);
5753 sub kill_sleep() {
5754 # Kill pids with a signal and wait a while for them to die
5755 # Input:
5756 # $signal = signal to send to @pids
5757 # $sleep_max = number of ms to sleep at most before returning
5758 # @pids = pids to kill (actually process groups)
5759 # Uses:
5760 # $Global::killall = set by killall() to avoid calling reaper
5761 # Returns:
5762 # @pids = pids still alive
5763 my ($signal, $sleep_max, @pids) = @_;
5764 ::debug("kill","kill_sleep $signal ",(join " ",sort @pids),"\n");
5765 kill $signal, @pids;
5766 my $sleepsum = 0;
5767 my $sleep = 0.001;
5769 while(@pids and $sleepsum < $sleep_max) {
5770 if($Global::killall) {
5771 # Killall => don't run reaper
5772 while(waitpid(-1, &WNOHANG) > 0) {
5773 $sleep = $sleep/2+0.001;
5775 } elsif(reapers()) {
5776 $sleep = $sleep/2+0.001;
5778 $sleep *= 1.1;
5779 ::usleep($sleep);
5780 $sleepsum += $sleep;
5781 # Keep only living children
5782 @pids = grep { kill(0, $_) } @pids;
5784 return @pids;
5787 sub wait_and_exit($) {
5788 # If we do not wait, we sometimes get segfault
5789 # Returns: N/A
5790 my $error = shift;
5791 unlink keys %Global::unlink;
5792 if($error) {
5793 # Kill all jobs without printing
5794 killall();
5796 for (keys %Global::unkilled_children) {
5797 # Kill any (non-jobs) children (e.g. reserved processes)
5798 kill 9, $_;
5799 waitpid($_,0);
5800 delete $Global::unkilled_children{$_};
5802 if($Global::unkilled_sqlworker) {
5803 waitpid($Global::unkilled_sqlworker,0);
5805 # Avoid: Warning: unable to close filehandle properly: No space
5806 # left on device during global destruction.
5807 $SIG{__WARN__} = sub {};
5808 if($opt::_parset) {
5809 # Make the shell script return $error
5810 print "$Global::parset_endstring\nreturn $error";
5812 exit($error);
5815 sub die_usage() {
5816 # Returns: N/A
5817 usage();
5818 wait_and_exit(255);
5821 sub usage() {
5822 # Returns: N/A
5823 print join
5824 ("\n",
5825 "Usage:",
5827 "$Global::progname [options] [command [arguments]] < list_of_arguments",
5828 "$Global::progname [options] [command [arguments]] (::: arguments|:::: argfile(s))...",
5829 "cat ... | $Global::progname --pipe [options] [command [arguments]]",
5831 "-j n Run n jobs in parallel",
5832 "-k Keep same order",
5833 "-X Multiple arguments with context replace",
5834 "--colsep regexp Split input on regexp for positional replacements",
5835 "{} {.} {/} {/.} {#} {%} {= perl code =} Replacement strings",
5836 "{3} {3.} {3/} {3/.} {=3 perl code =} Positional replacement strings",
5837 "With --plus: {} = {+/}/{/} = {.}.{+.} = {+/}/{/.}.{+.} = {..}.{+..} =",
5838 " {+/}/{/..}.{+..} = {...}.{+...} = {+/}/{/...}.{+...}",
5840 "-S sshlogin Example: foo\@server.example.com",
5841 "--slf .. Use ~/.parallel/sshloginfile as the list of sshlogins",
5842 "--trc {}.bar Shorthand for --transfer --return {}.bar --cleanup",
5843 "--onall Run the given command with argument on all sshlogins",
5844 "--nonall Run the given command with no arguments on all sshlogins",
5846 "--pipe Split stdin (standard input) to multiple jobs.",
5847 "--recend str Record end separator for --pipe.",
5848 "--recstart str Record start separator for --pipe.",
5850 "GNU Parallel can do much more. See 'man $Global::progname' for details",
5852 "Academic tradition requires you to cite works you base your article on.",
5853 "If you use programs that use GNU Parallel to process data for an article in a",
5854 "scientific publication, please cite:",
5856 " Tange, O. (2024, May 22). GNU Parallel 20240522 ('Tbilisi').",
5857 " Zenodo. https://doi.org/10.5281/zenodo.11247979",
5859 # Before changing these lines, please read
5860 # https://www.gnu.org/software/parallel/parallel_design.html#citation-notice
5861 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
5862 # You accept to be put in a public hall of shame by removing
5863 # these lines
5864 "This helps funding further development; AND IT WON'T COST YOU A CENT.",
5865 "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
5867 "",);
5870 sub citation_notice() {
5871 # if --will-cite or --plain: do nothing
5872 # if stderr redirected: do nothing
5873 # if $PARALLEL_HOME/will-cite: do nothing
5874 # else: print citation notice to stderr
5875 if($opt::willcite
5877 $opt::plain
5879 not -t $Global::original_stderr
5881 grep { -e "$_/will-cite" } @Global::config_dirs) {
5882 # skip
5883 } else {
5884 ::status
5885 ("Academic tradition requires you to cite works you base your article on.",
5886 "If you use programs that use GNU Parallel to process data for an article in a",
5887 "scientific publication, please cite:",
5889 " Tange, O. (2024, May 22). GNU Parallel 20240522 ('Tbilisi').",
5890 " Zenodo. https://doi.org/10.5281/zenodo.11247979",
5892 # Before changing these line, please read
5893 # https://www.gnu.org/software/parallel/parallel_design.html#citation-notice and
5894 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
5895 # You accept to be put in a public hall of shame by
5896 # removing these lines
5897 "This helps funding further development; AND IT WON'T COST YOU A CENT.",
5898 "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
5900 "More about funding GNU Parallel and the citation notice:",
5901 "https://www.gnu.org/software/parallel/parallel_design.html#citation-notice",
5903 "To silence this citation notice: run 'parallel --citation' once.",
5906 mkdir $Global::config_dir;
5907 # Number of times the user has run GNU Parallel without showing
5908 # willingness to cite
5909 my $runs = 0;
5910 if(open (my $fh, "<", $Global::config_dir.
5911 "/runs-without-willing-to-cite")) {
5912 $runs = <$fh>;
5913 close $fh;
5915 $runs++;
5916 if(open (my $fh, ">", $Global::config_dir.
5917 "/runs-without-willing-to-cite")) {
5918 print $fh $runs;
5919 close $fh;
5920 if($runs >= 10) {
5921 ::status("Come on: You have run parallel $runs times. ".
5922 "Isn't it about time ",
5923 "you run 'parallel --citation' once to silence ".
5924 "the citation notice?",
5925 "");
5931 sub status(@) {
5932 my @w = @_;
5933 my $fh = $Global::status_fd || *STDERR;
5934 print $fh map { ($_, "\n") } @w;
5935 flush $fh;
5938 sub status_no_nl(@) {
5939 my @w = @_;
5940 my $fh = $Global::status_fd || *STDERR;
5941 print $fh @w;
5942 flush $fh;
5945 sub warning(@) {
5946 my @w = @_;
5947 my $prog = $Global::progname || "parallel";
5948 status_no_nl(map { ($prog, ": Warning: ", $_, "\n"); } @w);
5952 my %warnings;
5953 sub warning_once(@) {
5954 my @w = @_;
5955 my $prog = $Global::progname || "parallel";
5956 $warnings{@w}++ or
5957 status_no_nl(map { ($prog, ": Warning: ", $_, "\n"); } @w);
5961 sub error(@) {
5962 my @w = @_;
5963 my $prog = $Global::progname || "parallel";
5964 status(map { ($prog.": Error: ". $_); } @w);
5967 sub die_bug($) {
5968 my $bugid = shift;
5969 print STDERR
5970 ("$Global::progname: This should not happen. You have found a bug. ",
5971 "Please follow\n",
5972 "https://www.gnu.org/software/parallel/man.html#reporting-bugs\n",
5973 "\n",
5974 "Include this in the report:\n",
5975 "* The version number: $Global::version\n",
5976 "* The bugid: $bugid\n",
5977 "* The command line being run\n",
5978 "* The files being read (put the files on a webserver if they are big)\n",
5979 "\n",
5980 "If you get the error on smaller/fewer files, please include those instead.\n");
5981 ::wait_and_exit(255);
5984 sub version() {
5985 # Returns: N/A
5986 print join
5987 ("\n",
5988 "GNU $Global::progname $Global::version",
5989 "Copyright (C) 2007-2024 Ole Tange, http://ole.tange.dk and Free Software",
5990 "Foundation, Inc.",
5991 "License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>",
5992 "This is free software: you are free to change and redistribute it.",
5993 "GNU $Global::progname comes with no warranty.",
5995 "Web site: https://www.gnu.org/software/${Global::progname}\n",
5996 "When using programs that use GNU Parallel to process data for publication",
5997 "please cite as described in 'parallel --citation'.\n",
6001 sub citation() {
6002 # Returns: N/A
6003 my ($all_argv_ref,$argv_options_removed_ref) = @_;
6004 my $all_argv = "@$all_argv_ref";
6005 my $no_opts = "@$argv_options_removed_ref";
6006 $all_argv=~s/--citation//;
6007 if($all_argv ne $no_opts) {
6008 ::warning("--citation ignores all other options and arguments.");
6009 ::status("");
6012 ::status(
6013 "Academic tradition requires you to cite works you base your article on.",
6014 "If you use programs that use GNU Parallel to process data for an article in a",
6015 "scientific publication, please cite:",
6017 "\@software{tange_2024_11247979,",
6018 " author = {Tange, Ole},",
6019 " title = {GNU Parallel 20240522 ('Tbilisi')},",
6020 " month = May,",
6021 " year = 2023,",
6022 " note = {{GNU Parallel is a general parallelizer to run",
6023 " multiple serial command line programs in parallel",
6024 " without changing them.}},",
6025 " publisher = {Zenodo},",
6026 " doi = {10.5281/zenodo.11247979},",
6027 " url = {https://doi.org/10.5281/zenodo.11247979}",
6028 "}",
6030 "(Feel free to use \\nocite{tange_2024_11247979})",
6032 # Before changing these lines, please read
6033 # https://www.gnu.org/software/parallel/parallel_design.html#citation-notice and
6034 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
6035 # You accept to be put in a public hall of shame by removing
6036 # these lines
6037 "This helps funding further development; AND IT WON'T COST YOU A CENT.",
6038 "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
6040 "More about funding GNU Parallel and the citation notice:",
6041 "https://lists.gnu.org/archive/html/parallel/2013-11/msg00006.html",
6042 "https://www.gnu.org/software/parallel/parallel_design.html#citation-notice",
6043 "https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt",
6046 while(not grep { -e "$_/will-cite" } @Global::config_dirs) {
6047 print "\nType: 'will cite' and press enter.\n> ";
6048 my $input = <STDIN>;
6049 if(not defined $input) {
6050 exit(255);
6052 if($input =~ /will cite/i) {
6053 if(mkdir $Global::config_dir) {
6054 # Recompute @Global::config_dirs so we can break out of the loop.
6055 init_globals();
6057 if(open (my $fh, ">", $Global::config_dir."/will-cite")) {
6058 close $fh;
6059 ::status(
6061 "Thank you for your support: You are the reason why there is funding to",
6062 "continue maintaining GNU Parallel. On behalf of future versions of",
6063 "GNU Parallel, which would not exist without your support:",
6065 " THANK YOU SO MUCH",
6067 "It is really appreciated. The citation notice is now silenced.",
6068 "");
6069 } else {
6070 ::status(
6072 "Thank you for your support. It is much appreciated. The citation",
6073 "cannot permanently be silenced. Use '--will-cite' instead.",
6075 "If you use '--will-cite' in scripts to be run by others you are making",
6076 "it harder for others to see the citation notice. The development of",
6077 "GNU Parallel is indirectly financed through citations, so if users",
6078 "do not know they should cite then you are making it harder to finance",
6079 "development. However, if you pay 10000 EUR, you should feel free to",
6080 "use '--will-cite' in scripts.",
6081 "");
6082 last;
6088 sub show_limits() {
6089 # Returns: N/A
6090 print("Maximal size of command: ",Limits::Command::real_max_length(),"\n",
6091 "Maximal usable size of command: ",
6092 $Global::usable_command_line_length,"\n",
6093 "\n",
6094 "Execution will continue now, ",
6095 "and it will try to read its input\n",
6096 "and run commands; if this is not ",
6097 "what you wanted to happen, please\n",
6098 "press CTRL-D or CTRL-C\n");
6101 sub embed() {
6102 # Give an embeddable version of GNU Parallel
6103 # Tested with: bash, zsh, ksh, ash, dash, sh
6104 my $randomstring = "cut-here-".join"",
6105 map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..20);
6106 if(not -f $0 or not -r $0) {
6107 ::error("--embed only works if parallel is a readable file");
6108 exit(255);
6110 # Read the source from $0
6111 my $source = slurp_or_exit($0);
6112 my $user = $ENV{LOGNAME} || $ENV{USERNAME} || $ENV{USER};
6113 my $env_parallel_source;
6114 my $shell = $Global::shell;
6115 $shell =~ s:.*/::;
6116 for(which("env_parallel.$shell")) {
6117 -r $_ or next;
6118 # Read the source of env_parallel.shellname
6119 $env_parallel_source .= slurp_or_exit($_);
6120 last;
6122 print "#!$Global::shell
6124 # Copyright (C) 2007-2024 $user, Ole Tange, http://ole.tange.dk
6125 # and Free Software Foundation, Inc.
6127 # This program is free software; you can redistribute it and/or modify
6128 # it under the terms of the GNU General Public License as published by
6129 # the Free Software Foundation; either version 3 of the License, or
6130 # (at your option) any later version.
6132 # This program is distributed in the hope that it will be useful, but
6133 # WITHOUT ANY WARRANTY; without even the implied warranty of
6134 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
6135 # General Public License for more details.
6137 # You should have received a copy of the GNU General Public License
6138 # along with this program; if not, see <https://www.gnu.org/licenses/>
6139 # or write to the Free Software Foundation, Inc., 51 Franklin St,
6140 # Fifth Floor, Boston, MA 02110-1301 USA
6143 print q!
6144 # Embedded GNU Parallel created with --embed
6145 parallel() {
6146 # Start GNU Parallel without leaving temporary files
6148 # Not all shells support 'perl <(cat ...)'
6149 # This is a complex way of doing:
6150 # perl <(cat <<'cut-here'
6151 # [...]
6152 # ) "$@"
6153 # and also avoiding:
6154 # [1]+ Done cat
6156 # Make a temporary fifo that perl can read from
6157 _fifo_with_GNU_Parallel_source=`perl -e 'use POSIX qw(mkfifo);
6158 do {
6159 $f = "/tmp/parallel-".join"",
6160 map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
6161 } while(-e $f);
6162 mkfifo($f,0600);
6163 print $f;'`
6164 # Put source code into temporary file
6165 # so it is easy to copy to the fifo
6166 _file_with_GNU_Parallel_source=`mktemp`;
6168 "cat <<'$randomstring' > \$_file_with_GNU_Parallel_source\n",
6169 $source,
6170 $randomstring,"\n",
6172 # Copy the source code from the file to the fifo
6173 # and remove the file and fifo ASAP
6174 # 'sh -c' is needed to avoid
6175 # [1]+ Done cat
6176 sh -c "(rm $_file_with_GNU_Parallel_source; cat >$_fifo_with_GNU_Parallel_source; rm $_fifo_with_GNU_Parallel_source) < $_file_with_GNU_Parallel_source &"
6178 # Read the source from the fifo
6179 perl $_fifo_with_GNU_Parallel_source "$@"
6182 $env_parallel_source,
6185 # This will call the functions above
6186 parallel -k echo ::: Put your code here
6187 env_parallel --session
6188 env_parallel -k echo ::: Put your code here
6189 parset p,y,c,h -k echo ::: Put your code here
6190 echo $p $y $c $h
6191 echo You can also activate GNU Parallel for interactive use by:
6192 echo . "$0"
6194 ::status("Redirect the output to a file and add your changes at the end:",
6195 " $0 --embed > new_script");
6198 sub pack_combined_executable {
6199 my ($before_ref,$with_argsep_ref,$argv_ref) = @_;
6200 my @parallelopts;
6201 my $skip_next;
6202 # Remove '--combine-exec file' from options
6203 for(@{$before_ref}[0..(arrayindex($before_ref,$with_argsep_ref))-1]) {
6204 if (/^--combine-?exec(utable)?$/ || $skip_next) {
6205 # Also skip the filename given to --combine-exec
6206 $skip_next = !$skip_next;
6207 next;
6209 push @parallelopts, $_;
6211 # From ::: and to end
6212 my @argsep = @{$with_argsep_ref}[($#ARGV+1)..$#$with_argsep_ref];
6213 # The executable is now the first in @ARGV
6214 my $execname = shift @ARGV;
6215 # The rest of @ARGV are options for $execname
6216 my @execopts = @ARGV;
6217 debug("combine",
6218 "Parallel opts: @parallelopts ",
6219 "Executable: $execname ",
6220 "Execopts: @execopts ",
6221 "Argsep: @argsep\n");
6222 # Read the the executable
6223 my $exec = slurp_or_exit(which($execname));
6224 # Read the source of GNU Parallel and the executable
6225 my $parallel = slurp_or_exit($0);
6226 # Remove possibly __END__ from GNU Parallel
6227 $parallel =~ s/^__END__.*//s;
6228 if(-t $Global::original_stderr) {
6229 ::status(
6230 "Please be aware that combining GNU Parallel and '$execname'",
6231 "into a combined executable will make the whole executable",
6232 "licensed under GPLv3 (section 5.c).",
6234 "If the license of '$execname' is incompatible with GPLv3,",
6235 "you cannot legally convey copies of the combined executable",
6236 "to others. You can, however, still run them yourself.",
6238 "The combined executable will not have a citation notice,",
6239 "so it is your resposibilty to advice that academic tradition",
6240 "requires the users to cite GNU Parallel.",
6243 my $input;
6244 do {
6245 ::status_no_nl("\nType: 'I agree' and press enter.\n> ");
6246 $input = <STDIN>;
6247 if(not defined $input) {
6248 exit(255);
6250 } until($input =~ /I agree/i);
6252 write_or_exit($opt::combineexec,
6253 $parallel,
6254 "\n__END__\n",
6255 (map { "$_\0\n" } @parallelopts), "\0\0\n",
6256 $execname, "\0\0\n",
6257 (map { "$_\0\n" } @execopts), "\0\0\n",
6258 (map { "$_\0\n" } @argsep), "\0\0\n",
6259 $exec);
6260 # Set +x permission
6261 chmod 0700, $opt::combineexec;
6262 exit(0);
6265 sub unpack_combined_executable {
6266 # If the script is a combined executable,
6267 # it will have stuff in <DATA> (I.e. after __END__)
6268 my $combine_exec = join("",<DATA>);
6269 if(length $combine_exec) {
6270 # Parse the <DATA>
6272 # __END__
6273 # Option for GNU Parallel\0\n
6274 # Option for GNU Parallel\0\n
6275 # \0\0\n
6276 # Name of executable\0\0\n
6277 # Option for executable\0\n
6278 # Option for executable\0\n
6279 # \0\0\n
6280 # argsep + args if any\0\n
6281 # argsep + args if any\0\n
6282 # \0\0\n
6283 # <<binary of exec>>
6285 # parallel --combine --pipe -j10% --recend '' myscript --myopt myval
6286 # __END__
6287 # --pipe\0\n --pipe
6288 # -j10%\0\n -j10%
6289 # --recend\0\n --recend
6290 # \0\n ''
6291 # \0\0\n end-of-parallel-options
6292 # myscript\0\0\n myscript
6293 # --myopt\0\n --myopt
6294 # myval\0\n myval
6295 # \0\0\n end-of-myscript-options
6296 # \0\0\n no argsep
6297 # <<binary of myscript>>
6299 # parallel --combine -j10% myscript :::
6300 # __END__
6301 # -j10%\0\n
6302 # \0\0\n end-of-parallel-options
6303 # myscript\0\0\n
6304 # \0\0\n end-of-myscript-options
6305 # :::\0\n
6306 # \0\0\n
6307 # <<binary of myscript>>
6309 my ($opts,$execname,$execopts,$argsep,$exec) =
6310 split /\0\0\n/,$combine_exec,5;
6311 # Make a tmpdir with a file called $execname
6312 local %ENV;
6313 $ENV{TMPDIR} ||= "/tmp";
6314 my $dir = File::Temp::tempdir($ENV{'TMPDIR'} . "/parXXXXX", CLEANUP => 1);
6315 my $script = $dir."/".$execname;
6316 write_or_exit($script,$exec);
6317 # Set +x permission
6318 chmod 0700, $script;
6319 # Mark it for unlinking later
6320 $Global::unlink{$script}++;
6321 $Global::unlink{$dir}++;
6322 # pass the options for GNU Parallel
6323 my @opts = split /\0\n/, $opts;
6324 my @execopts = split /\0\n/, $execopts;
6325 if(length $argsep) {
6326 # Only add argsep if set
6327 unshift(@ARGV, split(/\0\n/,$argsep));
6329 unshift(@ARGV,@opts,$script,@execopts);
6334 sub __GENERIC_COMMON_FUNCTION__() {}
6337 sub mkdir_or_die($) {
6338 # If dir is not executable: die
6339 my $dir = shift;
6340 # The eval is needed to catch exception from mkdir
6341 eval { File::Path::mkpath($dir); };
6342 if(not -x $dir) {
6343 ::error("Cannot change into non-executable dir $dir: $!");
6344 ::wait_and_exit(255);
6348 sub tmpfile(@) {
6349 # Create tempfile as $TMPDIR/parXXXXX
6350 # Returns:
6351 # $filehandle = opened file handle
6352 # $filename = file name created
6353 my($filehandle,$filename) =
6354 ::tempfile(DIR=>$ENV{'TMPDIR'}, TEMPLATE => 'parXXXXX', @_);
6355 if(wantarray) {
6356 return($filehandle,$filename);
6357 } else {
6358 # Separate unlink due to NFS dealing badly with File::Temp
6359 unlink $filename;
6360 return $filehandle;
6364 sub tmpname($) {
6365 # Select a name that does not exist
6366 # Do not create the file as it may be used for creating a socket (by tmux)
6367 # Remember the name in $Global::unlink to avoid hitting the same name twice
6368 my $name = shift;
6369 my($tmpname);
6370 if(not -w $ENV{'TMPDIR'}) {
6371 my $qtmp = ::Q($ENV{'TMPDIR'});
6372 if(not -e $ENV{'TMPDIR'}) {
6373 ::error("Tmpdir $qtmp does not exist.","Try: mkdir -p $qtmp");
6374 } else {
6375 ::error("Tmpdir $qtmp is not writable.","Try: chmod +w $qtmp");
6377 ::wait_and_exit(255);
6379 do {
6380 $tmpname = $ENV{'TMPDIR'}."/".$name.
6381 join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
6382 } while(-e $tmpname or $Global::unlink{$tmpname}++);
6383 return $tmpname;
6386 sub tmpfifo() {
6387 # Find an unused name and mkfifo on it
6388 my $tmpfifo = tmpname("fif");
6389 mkfifo($tmpfifo,0600);
6390 return $tmpfifo;
6393 sub rm(@) {
6394 # Remove file and remove it from %Global::unlink
6395 # Uses:
6396 # %Global::unlink
6397 delete @Global::unlink{@_};
6398 unlink @_;
6401 sub size_of_block_dev() {
6402 # Like -s but for block devices
6403 # Input:
6404 # $blockdev = file name of block device
6405 # Returns:
6406 # $size = in bytes, undef if error
6407 my $blockdev = shift;
6408 my $fh = open_or_exit("<", $blockdev);
6409 seek($fh,0,2) || ::die_bug("cannot seek $blockdev");
6410 my $size = tell($fh);
6411 close $fh;
6412 return $size;
6415 sub qqx(@) {
6416 # Like qx but with clean environment (except for @keep)
6417 # and STDERR ignored
6418 # This is needed if the environment contains functions
6419 # that /bin/sh does not understand
6420 my %env;
6421 # ssh with ssh-agent needs PATH SSH_AUTH_SOCK SSH_AGENT_PID
6422 # ssh with Kerberos needs KRB5CCNAME
6423 # sshpass needs SSHPASS
6424 # tmux needs LC_CTYPE
6425 # lsh needs HOME LOGNAME
6426 my @keep = qw(PATH SSH_AUTH_SOCK SSH_AGENT_PID KRB5CCNAME LC_CTYPE
6427 HOME LOGNAME SSHPASS);
6428 @env{@keep} = @ENV{@keep};
6429 local %ENV;
6430 %ENV = %env;
6431 if($Global::debug) {
6432 # && true is to force spawning a shell and not just exec'ing
6433 return qx{ @_ && true };
6434 } else {
6435 # CygWin does not respect 2>/dev/null
6436 # so we do that by hand
6437 # This trick does not work:
6438 # https://stackoverflow.com/q/13833088/363028
6439 # local *STDERR;
6440 # open(STDERR, ">", "/dev/null");
6441 open(local *CHILD_STDIN, '<', '/dev/null') or die $!;
6442 open(local *CHILD_STDERR, '>', '/dev/null') or die $!;
6443 my $out;
6444 # eval is needed if open3 fails (e.g. command line too long)
6445 eval {
6446 my $pid = open3(
6447 '<&CHILD_STDIN',
6448 $out,
6449 '>&CHILD_STDERR',
6450 # && true is to force spawning a shell and not just exec'ing
6451 "@_ && true");
6452 my @arr = <$out>;
6453 close $out;
6454 # Make sure $? is set
6455 waitpid($pid, 0);
6456 return wantarray ? @arr : join "",@arr;
6457 } or do {
6458 # If eval fails, force $?=false
6459 `false`;
6464 sub uniq(@) {
6465 # Remove duplicates and return unique values
6466 return keys %{{ map { $_ => 1 } @_ }};
6469 sub min(@) {
6470 # Returns:
6471 # Minimum value of array
6472 my $min;
6473 for (@_) {
6474 # Skip undefs
6475 defined $_ or next;
6476 defined $min or do { $min = $_; next; }; # Set $_ to the first non-undef
6477 $min = ($min < $_) ? $min : $_;
6479 return $min;
6482 sub max(@) {
6483 # Returns:
6484 # Maximum value of array
6485 my $max;
6486 for (@_) {
6487 # Skip undefs
6488 defined $_ or next;
6489 defined $max or do { $max = $_; next; }; # Set $_ to the first non-undef
6490 $max = ($max > $_) ? $max : $_;
6492 return $max;
6495 sub sum(@) {
6496 # Returns:
6497 # Sum of values of array
6498 my @args = @_;
6499 my $sum = 0;
6500 for (@args) {
6501 # Skip undefs
6502 $_ and do { $sum += $_; }
6504 return $sum;
6507 sub undef_as_zero($) {
6508 my $a = shift;
6509 return $a ? $a : 0;
6512 sub undef_as_empty($) {
6513 my $a = shift;
6514 return $a ? $a : "";
6517 sub undef_if_empty($) {
6518 if(defined($_[0]) and $_[0] eq "") {
6519 return undef;
6521 return $_[0];
6524 sub multiply_binary_prefix(@) {
6525 # Evalualte numbers with binary prefix
6526 # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80
6527 # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80
6528 # K =2^10, M =2^20, G =2^30, T =2^40, P =2^50, E =2^70, Z =2^80, Y =2^80
6529 # k =10^3, m =10^6, g =10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24
6530 # 13G = 13*1024*1024*1024 = 13958643712
6531 # Input:
6532 # $s = string with prefixes
6533 # Returns:
6534 # $value = int with prefixes multiplied
6535 my @v = @_;
6536 for(@v) {
6537 defined $_ or next;
6538 s/ki/*1024/gi;
6539 s/mi/*1024*1024/gi;
6540 s/gi/*1024*1024*1024/gi;
6541 s/ti/*1024*1024*1024*1024/gi;
6542 s/pi/*1024*1024*1024*1024*1024/gi;
6543 s/ei/*1024*1024*1024*1024*1024*1024/gi;
6544 s/zi/*1024*1024*1024*1024*1024*1024*1024/gi;
6545 s/yi/*1024*1024*1024*1024*1024*1024*1024*1024/gi;
6546 s/xi/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi;
6548 s/K/*1024/g;
6549 s/M/*1024*1024/g;
6550 s/G/*1024*1024*1024/g;
6551 s/T/*1024*1024*1024*1024/g;
6552 s/P/*1024*1024*1024*1024*1024/g;
6553 s/E/*1024*1024*1024*1024*1024*1024/g;
6554 s/Z/*1024*1024*1024*1024*1024*1024*1024/g;
6555 s/Y/*1024*1024*1024*1024*1024*1024*1024*1024/g;
6556 s/X/*1024*1024*1024*1024*1024*1024*1024*1024*1024/g;
6558 s/k/*1000/g;
6559 s/m/*1000*1000/g;
6560 s/g/*1000*1000*1000/g;
6561 s/t/*1000*1000*1000*1000/g;
6562 s/p/*1000*1000*1000*1000*1000/g;
6563 s/e/*1000*1000*1000*1000*1000*1000/g;
6564 s/z/*1000*1000*1000*1000*1000*1000*1000/g;
6565 s/y/*1000*1000*1000*1000*1000*1000*1000*1000/g;
6566 s/x/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g;
6568 $_ = eval $_;
6570 return wantarray ? @v : $v[0];
6573 sub multiply_time_units($) {
6574 # Evalualte numbers with time units
6575 # s=1, m=60, h=3600, d=86400
6576 # Input:
6577 # $s = string time units
6578 # Returns:
6579 # $value = int in seconds
6580 my @v = @_;
6581 for(@v) {
6582 defined $_ or next;
6583 if(/[dhms]/i) {
6584 s/s/*1+/gi;
6585 s/m/*60+/gi;
6586 s/h/*3600+/gi;
6587 s/d/*86400+/gi;
6588 # 1m/3 => 1*60+/3 => 1*60/3
6589 s/\+(\D)/$1/gi;
6591 $_ = eval $_."-0";
6593 return wantarray ? @v : $v[0];
6596 sub seconds_to_time_units() {
6597 # Convert seconds into ??d??h??m??s
6598 # s=1, m=60, h=3600, d=86400
6599 # Input:
6600 # $s = int in seconds
6601 # Returns:
6602 # $str = string time units
6603 my $s = shift;
6604 my $str;
6605 my $d = int($s/86400);
6606 $s -= $d * 86400;
6607 my $h = int($s/3600);
6608 $s -= $h * 3600;
6609 my $m = int($s/60);
6610 $s -= $m * 60;
6611 if($d) {
6612 $str = sprintf("%dd%02dh%02dm%02ds",$d,$h,$m,$s);
6613 } elsif($h) {
6614 $str = sprintf("%dh%02dm%02ds",$h,$m,$s);
6615 } elsif($m) {
6616 $str = sprintf("%dm%02ds",$m,$s);
6617 } else {
6618 $str = sprintf("%ds",$s);
6620 return $str;
6624 my ($disk_full_fh, $b8193, $error_printed);
6625 sub exit_if_disk_full() {
6626 # Checks if $TMPDIR is full by writing 8kb to a tmpfile
6627 # If the disk is full: Exit immediately.
6628 # Returns:
6629 # N/A
6630 if(not $disk_full_fh) {
6631 $disk_full_fh = ::tmpfile(SUFFIX => ".df");
6632 $b8193 = "b"x8193;
6634 # Linux does not discover if a disk is full if writing <= 8192
6635 # Tested on:
6636 # bfs btrfs cramfs ext2 ext3 ext4 ext4dev jffs2 jfs minix msdos
6637 # ntfs reiserfs tmpfs ubifs vfat xfs
6638 # TODO this should be tested on different OS similar to this:
6640 # doit() {
6641 # sudo mount /dev/ram0 /mnt/loop; sudo chmod 1777 /mnt/loop
6642 # seq 100000 | parallel --tmpdir /mnt/loop/ true &
6643 # seq 6900000 > /mnt/loop/i && echo seq OK
6644 # seq 6980868 > /mnt/loop/i
6645 # seq 10000 > /mnt/loop/ii
6646 # sleep 3
6647 # sudo umount /mnt/loop/ || sudo umount -l /mnt/loop/
6648 # echo >&2
6650 print $disk_full_fh $b8193;
6651 if(not $disk_full_fh
6653 tell $disk_full_fh != 8193) {
6654 # On raspbian the disk can be full except for 10 chars.
6655 if(not $error_printed) {
6656 ::error("Output is incomplete.",
6657 "Cannot append to buffer file in $ENV{'TMPDIR'}.",
6658 "Is the disk full?",
6659 "Change \$TMPDIR with --tmpdir or use --compress.");
6660 $error_printed = 1;
6662 ::wait_and_exit(255);
6664 truncate $disk_full_fh, 0;
6665 seek($disk_full_fh, 0, 0) || die;
6669 sub spacefree($$) {
6670 # Remove comments and spaces
6671 # Inputs:
6672 # $spaces = keep 1 space?
6673 # $s = string to remove spaces from
6674 # Returns:
6675 # $s = with spaces removed
6676 my $spaces = shift;
6677 my $s = shift;
6678 $s =~ s/#.*//mg;
6679 if(1 == $spaces) {
6680 $s =~ s/\s+/ /mg;
6681 } elsif(2 == $spaces) {
6682 # Keep newlines
6683 $s =~ s/\n\n+/\n/sg;
6684 $s =~ s/[ \t]+/ /mg;
6685 } elsif(3 == $spaces) {
6686 # Keep perl code required space
6687 $s =~ s{([^a-zA-Z0-9/])\s+}{$1}sg;
6688 $s =~ s{([a-zA-Z0-9/])\s+([^:a-zA-Z0-9/])}{$1$2}sg;
6689 } else {
6690 $s =~ s/\s//mg;
6692 return $s;
6696 my $hostname;
6697 sub hostname() {
6698 local $/ = "\n";
6699 if(not $hostname) {
6700 $hostname = `hostname`;
6701 chomp($hostname);
6702 $hostname ||= "nohostname";
6704 return $hostname;
6708 sub which(@) {
6709 # Input:
6710 # @programs = programs to find the path to
6711 # Returns:
6712 # @full_path = full paths to @programs. Nothing if not found
6713 my @which;
6714 for my $prg (@_) {
6715 push(@which, grep { not -d $_ and -x $_ }
6716 map { $_."/".$prg } split(":",$ENV{'PATH'}));
6717 if($prg =~ m:/:) {
6718 # Test if program with full path exists
6719 push(@which, grep { not -d $_ and -x $_ } $prg);
6722 ::debug("which", "$which[0] in $ENV{'PATH'}\n");
6723 return wantarray ? @which : $which[0];
6727 my ($regexp,$shell,%fakename);
6729 sub parent_shell {
6730 # Input:
6731 # $pid = pid to see if (grand)*parent is a shell
6732 # Returns:
6733 # $shellpath = path to shell - undef if no shell found
6734 my $pid = shift;
6735 ::debug("init","Parent of $pid\n");
6736 if(not $regexp) {
6737 # All shells known to mankind
6739 # ash bash csh dash fdsh fish fizsh ksh ksh93 mksh pdksh
6740 # posh rbash rc rush rzsh sash sh static-sh tcsh yash zsh
6742 my @shells = (qw(ash bash bsd-csh csh dash fdsh fish fizsh ksh
6743 ksh93 lksh mksh pdksh posh rbash rc rush rzsh sash sh
6744 static-sh tcsh yash zsh -sh -csh -bash),
6745 '-sh (sh)' # sh on FreeBSD
6747 # Can be formatted as:
6748 # [sh] -sh sh busybox sh -sh (sh)
6749 # /bin/sh /sbin/sh /opt/csw/sh
6750 # But not: foo.sh sshd crash flush pdflush scosh fsflush ssh
6751 $shell = "(?:".join("|",map { "\Q$_\E" } @shells).")";
6752 $regexp = '^((\[)(-?)('. $shell. ')(\])|(|\S+/|busybox )'.
6753 '(-?)('. $shell. '))( *$| [^(])';
6754 %fakename = (
6755 # sh disguises itself as -sh (sh) on FreeBSD
6756 "-sh (sh)" => ["sh"],
6757 # csh and tcsh disguise themselves as -sh/-csh
6758 # E.g.: ssh -tt csh@lo 'ps aux;true' |egrep ^csh
6759 # but sh also disguises itself as -sh
6760 # (TODO When does that happen?)
6761 "-sh" => ["sh"],
6762 "-csh" => ["tcsh", "csh"],
6763 # ash disguises itself as -ash
6764 "-ash" => ["ash", "dash", "sh"],
6765 # dash disguises itself as -dash
6766 "-dash" => ["dash", "ash", "sh"],
6767 # bash disguises itself as -bash
6768 "-bash" => ["bash", "sh"],
6769 # ksh disguises itself as -ksh
6770 "-ksh" => ["ksh", "sh"],
6771 # zsh disguises itself as -zsh
6772 "-zsh" => ["zsh", "sh"],
6775 if($^O eq "linux") {
6776 # Optimized for GNU/Linux
6777 my $testpid = $pid;
6778 my $shellpath;
6779 my $shellline;
6780 while($testpid) {
6781 if(open(my $fd, "<", "/proc/$testpid/cmdline")) {
6782 local $/="\0";
6783 chomp($shellline = <$fd>);
6784 if($shellline =~ /$regexp/o) {
6785 my $shellname = $4 || $8;
6786 my $dash = $3 || $7;
6787 if($shellname eq "sh" and $dash) {
6788 # -sh => csh or sh
6789 if($shellpath = readlink "/proc/$testpid/exe") {
6790 ::debug("init","procpath $shellpath\n");
6791 if($shellpath =~ m:/$shell$:o) {
6792 ::debug("init",
6793 "proc which ".$shellpath." => ");
6794 return $shellpath;
6798 ::debug("init", "which ".$shellname." => ");
6799 $shellpath = (which($shellname,
6800 @{$fakename{$shellname}}))[0];
6801 ::debug("init", "shell path $shellpath\n");
6802 return $shellpath;
6805 # Get parent pid
6806 if(open(my $fd, "<", "/proc/$testpid/stat")) {
6807 my $line = <$fd>;
6808 close $fd;
6809 # Parent pid is field 4
6810 $testpid = (split /\s+/, $line)[3];
6811 } else {
6812 # Something is wrong: fall back to old method
6813 last;
6817 # if -sh or -csh try readlink /proc/$$/exe
6818 my ($children_of_ref, $parent_of_ref, $name_of_ref) = pid_table();
6819 my $shellpath;
6820 my $testpid = $pid;
6821 while($testpid) {
6822 if($name_of_ref->{$testpid} =~ /$regexp/o) {
6823 my $shellname = $4 || $8;
6824 my $dash = $3 || $7;
6825 if($shellname eq "sh" and $dash) {
6826 # -sh => csh or sh
6827 if($shellpath = readlink "/proc/$testpid/exe") {
6828 ::debug("init","procpath $shellpath\n");
6829 if($shellpath =~ m:/$shell$:o) {
6830 ::debug("init", "proc which ".$shellpath." => ");
6831 return $shellpath;
6835 ::debug("init", "which ".$shellname." => ");
6836 $shellpath = (which($shellname,@{$fakename{$shellname}}))[0];
6837 ::debug("init", "shell path $shellpath\n");
6838 $shellpath and last;
6840 if($testpid == $parent_of_ref->{$testpid}) {
6841 # In Solaris zones, the PPID of the zsched process is itself
6842 last;
6844 $testpid = $parent_of_ref->{$testpid};
6846 return $shellpath;
6851 my %pid_parentpid_cmd;
6853 sub pid_table() {
6854 # Returns:
6855 # %children_of = { pid -> children of pid }
6856 # %parent_of = { pid -> pid of parent }
6857 # %name_of = { pid -> commandname }
6859 if(not %pid_parentpid_cmd) {
6860 # Filter for SysV-style `ps`
6861 my $sysv = q( ps -ef |).
6862 q(perl -ane '1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;).
6863 q(s/^.{$s}//; print "@F[1,2] $_"' );
6864 # Minix uses cols 2,3 and can have newlines in the command
6865 # so lines not having numbers in cols 2,3 must be ignored
6866 my $minix = q( ps -ef |).
6867 q(perl -ane '1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;).
6868 q(s/^.{$s}// and $F[2]>0 and $F[3]>0 and print "@F[2,3] $_"' );
6869 # BSD-style `ps`
6870 my $bsd = q(ps -o pid,ppid,command -ax);
6871 %pid_parentpid_cmd =
6873 'aix' => $sysv,
6874 'android' => $sysv,
6875 'cygwin' => $sysv,
6876 'darwin' => $bsd,
6877 'dec_osf' => $sysv,
6878 'dragonfly' => $bsd,
6879 'freebsd' => $bsd,
6880 'gnu' => $sysv,
6881 'hpux' => $sysv,
6882 'linux' => $sysv,
6883 'mirbsd' => $bsd,
6884 'minix' => $minix,
6885 'msys' => $sysv,
6886 'MSWin32' => $sysv,
6887 'netbsd' => $bsd,
6888 'nto' => $sysv,
6889 'openbsd' => $bsd,
6890 'solaris' => $sysv,
6891 'svr5' => $sysv,
6892 'syllable' => "echo ps not supported",
6895 $pid_parentpid_cmd{$^O} or
6896 ::die_bug("pid_parentpid_cmd for $^O missing");
6898 my (@pidtable,%parent_of,%children_of,%name_of);
6899 # Table with pid -> children of pid
6900 @pidtable = `$pid_parentpid_cmd{$^O}`;
6901 my $p=$$;
6902 for (@pidtable) {
6903 # must match: 24436 21224 busybox ash
6904 # must match: 24436 21224 <<empty on MacOSX running cubase>>
6905 # must match: 24436 21224 <<empty on system running Viber>>
6906 # or: perl -e 'while($0=" "){}'
6907 if(/^\s*(\S+)\s+(\S+)\s+(\S+.*)/
6909 /^\s*(\S+)\s+(\S+)\s+()$/) {
6910 $parent_of{$1} = $2;
6911 push @{$children_of{$2}}, $1;
6912 $name_of{$1} = $3;
6913 } else {
6914 ::die_bug("pidtable format: $_");
6917 return(\%children_of, \%parent_of, \%name_of);
6921 sub now() {
6922 # Returns time since epoch as in seconds with 3 decimals
6923 # Uses:
6924 # @Global::use
6925 # Returns:
6926 # $time = time now with millisecond accuracy
6927 if(not $Global::use{"Time::HiRes"}) {
6928 if(eval "use Time::HiRes qw ( time );") {
6929 eval "sub TimeHiRestime { return Time::HiRes::time };";
6930 } else {
6931 eval "sub TimeHiRestime { return time() };";
6933 $Global::use{"Time::HiRes"} = 1;
6936 return (int(TimeHiRestime()*1000))/1000;
6939 sub usleep($) {
6940 # Sleep this many milliseconds.
6941 # Input:
6942 # $ms = milliseconds to sleep
6943 my $ms = shift;
6944 ::debug("timing",int($ms),"ms ");
6945 select(undef, undef, undef, $ms/1000);
6948 sub make_regexp_ungreedy {
6949 my $regexp = shift;
6950 my $class_state = 0;
6951 my $escape_state = 0;
6952 my $found = 0;
6953 my $ungreedy = "";
6954 my $c;
6956 for $c (split (//, $regexp)) {
6957 if ($found) {
6958 if($c ne "?") { $ungreedy .= "?"; }
6959 $found = 0;
6961 $ungreedy .= $c;
6963 if ($escape_state) { $escape_state = 0; next; }
6964 if ($c eq "\\") { $escape_state = 1; next; }
6965 if ($c eq '[') { $class_state = 1; next; }
6966 if ($class_state) {
6967 if($c eq ']') { $class_state = 0; }
6968 next;
6970 # Quantifiers: + * {...}
6971 if ($c =~ /[*}+]/) { $found = 1; }
6973 if($found) { $ungreedy .= '?'; }
6974 return $ungreedy;
6978 sub __KILLER_REAPER__() {}
6980 sub reap_usleep() {
6981 # Reap dead children.
6982 # If no dead children: Sleep specified amount with exponential backoff
6983 # Input:
6984 # $ms = milliseconds to sleep
6985 # Returns:
6986 # $ms/2+0.001 if children reaped
6987 # $ms*1.1 if no children reaped
6988 my $ms = shift;
6989 if(reapers()) {
6990 if(not $Global::total_completed % 100) {
6991 if($opt::timeout) {
6992 # Force cleaning the timeout queue for every 100 jobs
6993 # Fixes potential memleak
6994 $Global::timeoutq->process_timeouts();
6997 # Sleep exponentially shorter (1/2^n) if a job finished
6998 return $ms/2+0.001;
6999 } else {
7000 if($opt::timeout) {
7001 $Global::timeoutq->process_timeouts();
7003 if($opt::memfree) {
7004 kill_youngster_if_not_enough_mem($opt::memfree*0.5);
7006 if($opt::memsuspend) {
7007 suspend_young_if_not_enough_mem($opt::memsuspend);
7009 if($opt::limit) {
7010 kill_youngest_if_over_limit();
7012 exit_if_disk_full();
7013 if($Global::linebuffer) {
7014 my $something_printed = 0;
7015 if($opt::keeporder and not $opt::latestline) {
7016 for my $job (values %Global::running) {
7017 $something_printed += $job->print_earlier_jobs();
7019 } else {
7020 for my $job (values %Global::running) {
7021 $something_printed += $job->print();
7024 if($something_printed) { $ms = $ms/2+0.001; }
7026 if($ms > 0.002) {
7027 # When a child dies, wake up from sleep (or select(,,,))
7028 $SIG{CHLD} = sub { kill "ALRM", $$ };
7029 if($opt::delay and not $Global::linebuffer) {
7030 # The 0.004s is approximately the time it takes for one round
7031 my $next_earliest_start =
7032 $Global::newest_starttime + $opt::delay - 0.004;
7033 my $remaining_ms = 1000 * ($next_earliest_start - ::now());
7034 # The next job can only start at $next_earliest_start
7035 # so sleep until then (but sleep at least $ms)
7036 usleep(::max($ms,$remaining_ms));
7037 } else {
7038 usleep($ms);
7040 # --compress needs $SIG{CHLD} unset
7041 $SIG{CHLD} = 'DEFAULT';
7043 # Sleep exponentially longer (1.1^n) if a job did not finish,
7044 # though at most 1000 ms.
7045 return (($ms < 1000) ? ($ms * 1.1) : ($ms));
7049 sub kill_youngest_if_over_limit() {
7050 # Check each $sshlogin we are over limit
7051 # If over limit: kill off the youngest child
7052 # Put the child back in the queue.
7053 # Uses:
7054 # %Global::running
7055 my %jobs_of;
7056 my @sshlogins;
7058 for my $job (values %Global::running) {
7059 if(not $jobs_of{$job->sshlogin()}) {
7060 push @sshlogins, $job->sshlogin();
7062 push @{$jobs_of{$job->sshlogin()}}, $job;
7064 for my $sshlogin (@sshlogins) {
7065 for my $job (sort { $b->seq() <=> $a->seq() }
7066 @{$jobs_of{$sshlogin}}) {
7067 if($sshlogin->limit() == 2) {
7068 $job->kill();
7069 last;
7075 sub suspend_young_if_not_enough_mem() {
7076 # Check each $sshlogin if there is enough mem.
7077 # If less than $limit free mem: suspend some of the young children
7078 # Else: Resume all jobs
7079 # Uses:
7080 # %Global::running
7081 my $limit = shift;
7082 my %jobs_of;
7083 my @sshlogins;
7085 for my $job (values %Global::running) {
7086 if(not $jobs_of{$job->sshlogin()}) {
7087 push @sshlogins, $job->sshlogin();
7089 push @{$jobs_of{$job->sshlogin()}}, $job;
7091 for my $sshlogin (@sshlogins) {
7092 my $free = $sshlogin->memfree();
7093 if($free < 2*$limit) {
7094 # Suspend all jobs (resume some of them later)
7095 map { $_->suspended() or $_->suspend(); } @{$jobs_of{$sshlogin}};
7096 my @jobs = (sort { $b->seq() <=> $a->seq() }
7097 @{$jobs_of{$sshlogin}});
7098 # how many should be running?
7099 # limit*1 => 1;
7100 # limit*1.5 => 2;
7101 # limit*1.75 => 4;
7102 # free < limit*(2-1/2^n);
7103 # =>
7104 # 1/(2-free/limit) < 2^n;
7105 my $run = int(1/(2-$free/$limit));
7106 $run = ::min($run,$#jobs);
7107 # Resume the oldest running
7108 for my $job ((sort { $a->seq() <=> $b->seq() } @jobs)[0..$run]) {
7109 ::debug("mem","\nResume ",$run+1, " jobs. Seq ",
7110 $job->seq(), " resumed ",
7111 $sshlogin->memfree()," < ",2*$limit);
7112 $job->resume();
7114 } else {
7115 for my $job (@{$jobs_of{$sshlogin}}) {
7116 if($job->suspended()) {
7117 $job->resume();
7118 ::debug("mem","\nResume ",$#{$jobs_of{$sshlogin}}+1,
7119 " jobs. Seq ", $job->seq(), " resumed ",
7120 $sshlogin->memfree()," > ",2*$limit);
7121 last;
7128 sub kill_youngster_if_not_enough_mem() {
7129 # Check each $sshlogin if there is enough mem.
7130 # If less than 50% enough free mem: kill off the youngest child
7131 # Put the child back in the queue.
7132 # Uses:
7133 # %Global::running
7134 my $limit = shift;
7135 my %jobs_of;
7136 my @sshlogins;
7138 for my $job (values %Global::running) {
7139 if(not $jobs_of{$job->sshlogin()}) {
7140 push @sshlogins, $job->sshlogin();
7142 push @{$jobs_of{$job->sshlogin()}}, $job;
7144 for my $sshlogin (@sshlogins) {
7145 for my $job (sort { $b->seq() <=> $a->seq() }
7146 @{$jobs_of{$sshlogin}}) {
7147 if($sshlogin->memfree() < $limit) {
7148 ::debug("mem","\n",map { $_->seq()." " }
7149 (sort { $b->seq() <=> $a->seq() }
7150 @{$jobs_of{$sshlogin}}));
7151 ::debug("mem","\n", $job->seq(), "killed ",
7152 $sshlogin->memfree()," < ",$limit);
7153 $job->kill();
7154 $job->set_killreason("mem");
7155 $sshlogin->memfree_recompute();
7156 } else {
7157 last;
7160 ::debug("mem","Free mem OK? ",
7161 $sshlogin->memfree()," > ",$limit);
7166 sub __DEBUGGING__() {}
7169 sub debug(@) {
7170 # Uses:
7171 # $Global::debug
7172 # %Global::fh
7173 # Returns: N/A
7174 $Global::debug or return;
7175 @_ = grep { defined $_ ? $_ : "" } @_;
7176 if($Global::debug eq "all" or $Global::debug eq $_[0]) {
7177 if($Global::fh{2}) {
7178 # Original stderr was saved
7179 my $stderr = $Global::fh{2};
7180 print $stderr @_[1..$#_];
7181 } else {
7182 print STDERR @_[1..$#_];
7187 sub my_memory_usage() {
7188 # Returns:
7189 # memory usage if found
7190 # 0 otherwise
7191 use strict;
7192 use FileHandle;
7194 local $/ = "\n";
7195 my $pid = $$;
7196 if(-e "/proc/$pid/stat") {
7197 my $fh = FileHandle->new("</proc/$pid/stat");
7199 my $data = <$fh>;
7200 chomp $data;
7201 $fh->close;
7203 my @procinfo = split(/\s+/,$data);
7205 return undef_as_zero($procinfo[22]);
7206 } else {
7207 return 0;
7211 sub my_size() {
7212 # Returns:
7213 # $size = size of object if Devel::Size is installed
7214 # -1 otherwise
7215 my @size_this = (@_);
7216 eval "use Devel::Size qw(size total_size)";
7217 if ($@) {
7218 return -1;
7219 } else {
7220 return total_size(@_);
7224 sub my_dump(@) {
7225 # Returns:
7226 # ascii expression of object if Data::Dump(er) is installed
7227 # error code otherwise
7228 my @dump_this = (@_);
7229 eval "use Data::Dump qw(dump);";
7230 if ($@) {
7231 # Data::Dump not installed
7232 eval "use Data::Dumper;";
7233 if ($@) {
7234 my $err = "Neither Data::Dump nor Data::Dumper is installed\n".
7235 "Not dumping output\n";
7236 ::status($err);
7237 return $err;
7238 } else {
7239 return Dumper(@dump_this);
7241 } else {
7242 # Create a dummy Data::Dump:dump as Hans Schou sometimes has
7243 # it undefined
7244 eval "sub Data::Dump:dump {}";
7245 eval "use Data::Dump qw(dump);";
7246 return (Data::Dump::dump(@dump_this));
7250 sub my_croak(@) {
7251 eval "use Carp; 1";
7252 $Carp::Verbose = 1;
7253 croak(@_);
7256 sub my_carp() {
7257 eval "use Carp; 1";
7258 $Carp::Verbose = 1;
7259 carp(@_);
7263 sub __OBJECT_ORIENTED_PARTS__() {}
7266 package SSHLogin;
7268 sub new($$) {
7269 my $class = shift;
7270 my $s = shift;
7271 my $origs = $s;
7272 my %hostgroups;
7273 my $ncpus;
7274 my $sshcommand;
7275 my $user;
7276 my $password;
7277 my $host;
7278 my $port;
7279 my $local;
7280 my $string;
7281 # SSHLogins can have these formats:
7282 # @grp+grp/ncpu//usr/bin/ssh user@server
7283 # ncpu//usr/bin/ssh user@server
7284 # /usr/bin/ssh user@server
7285 # user@server
7286 # ncpu/user@server
7287 # @grp+grp/user@server
7288 # above with: user:password@server
7289 # above with: user@server:port
7290 # So:
7291 # [@grp+grp][ncpu/][ssh command ][[user][:password]@][server[:port]]
7293 # [@grp+grp]/ncpu//usr/bin/ssh user:pass@server:port
7294 if($s =~ s:^\@([^/]+)/?::) {
7295 # Look for SSHLogin hostgroups
7296 %hostgroups = map { $_ => 1 } split(/\+|,/, $1);
7298 # An SSHLogin is always in the hostgroup of its "numcpu/host"
7299 $hostgroups{$s} = 1;
7301 # [ncpu/]/usr/bin/ssh user:pass@server:port
7302 if ($s =~ s:^(\d+)/::) { $ncpus = $1; }
7304 # Why disallow space in password?
7305 # Example:
7306 # C:/bin/ssh user:C:/bin/ssh@host
7307 # Should this parse as:
7308 # user 'C' with password '/bin/ssh user:C:/bin/ssh'
7309 # or
7310 # cmd 'C:/bin/ssh' user 'user' with password 'C:/bin/ssh'
7311 # This is impossible to determine.
7312 # With space forbidden in password it uniquely parses as the 2nd.
7313 # [/usr/bin/ssh ]user:pass@server:port
7314 if($s =~ s/^(.*) //) { $sshcommand = $1; }
7316 # [user:pass@]server:port
7317 if($s =~ s/^(.+)@//) {
7318 my $userpw = $1;
7319 # user[:pass]
7320 if($userpw =~ s/:(.*)//) {
7321 $password = $1;
7322 if($password eq "") { $password = $ENV{'SSHPASS'} }
7323 if(not ::which("sshpass")) {
7324 ::error("--sshlogin with password requires sshpass installed");
7325 ::wait_and_exit(255);
7328 $user = $userpw;
7330 # [server]:port
7331 if(not $s =~ /:.*:/
7333 $s =~ s/^([-a-z0-9._]+)//i) {
7334 # Not IPv6 (IPv6 has 2 or more ':')
7335 $host = $1;
7336 } elsif($s =~ s/^(\\[\[\]box0-9a-f.]+)//i) {
7337 # RFC2673 allows for:
7338 # \[b11010000011101] \[o64072/14] \[xd074/14] \[208.116.0.0/14]
7339 $host = $1;
7340 } elsif($s =~ s/^\[([0-9a-f:]+)\]//i
7342 $s =~ s/^([0-9a-f:]+)//i) {
7343 # RFC5952
7344 # [2001:db8::1]:80
7345 # 2001:db8::1.80
7346 # 2001:db8::1p80
7347 # 2001:db8::1#80
7348 # 2001:db8::1:80 - not supported
7349 # 2001:db8::1 port 80 - not supported
7350 $host = $1;
7353 # [:port]
7354 if($s =~ s/^:(\w+)//i) {
7355 $port = $1;
7356 } elsif($s =~ s/^[p\.\#](\w+)//i) {
7357 # RFC5952
7358 # 2001:db8::1.80
7359 # 2001:db8::1p80
7360 # 2001:db8::1#80
7361 $port = $1;
7364 if($s and $s ne ':') {
7365 ::die_bug("SSHLogin parser failed on '$origs' => '$s'");
7368 $string =
7369 # Only include the sshcommand in $string if it is set by user
7370 ($sshcommand && $sshcommand." ").
7371 ($user && $user."@").
7372 ($host && $host).
7373 ($port && ":$port");
7374 my $pwstring =
7375 # Only include the sshcommand in $string if it is set by user
7376 ($sshcommand && $sshcommand." ").
7377 ($user && $user. ($password && ":".$password)."@").
7378 ($host && $host).
7379 ($port && ":$port");
7380 if($host eq ':') {
7381 $local = 1;
7382 $string = ":";
7383 } else {
7384 $sshcommand ||= $opt::ssh || $ENV{'PARALLEL_SSH'} || "ssh";
7386 # An SSHLogin is always in the hostgroup of its $string-name
7387 $hostgroups{$string} = 1;
7388 @Global::hostgroups{keys %hostgroups} = values %hostgroups;
7389 # Used for file names for loadavg
7390 my $no_slash_string = $string;
7391 $no_slash_string =~ s/[^-a-z0-9:]/_/gi;
7392 return bless {
7393 'string' => $string,
7394 'pwstring' => $pwstring,
7395 'jobs_running' => 0,
7396 'jobs_completed' => 0,
7397 'maxlength' => undef,
7398 'max_jobs_running' => undef,
7399 'orig_max_jobs_running' => undef,
7400 'ncpus' => $ncpus,
7401 'sshcommand' => $sshcommand,
7402 'user' => $user,
7403 'password' => $password,
7404 'host' => $host,
7405 'port' => $port,
7406 'hostgroups' => \%hostgroups,
7407 'local' => $local,
7408 'control_path_dir' => undef,
7409 'control_path' => undef,
7410 'time_to_login' => undef,
7411 'last_login_at' => undef,
7412 'loadavg_file' => $Global::cache_dir . "/tmp/sshlogin/" .
7413 $no_slash_string . "/loadavg",
7414 'loadavg' => undef,
7415 'last_loadavg_update' => 0,
7416 'swap_activity_file' => $Global::cache_dir . "/tmp/sshlogin/" .
7417 $no_slash_string . "/swap_activity",
7418 'swap_activity' => undef,
7419 }, ref($class) || $class;
7422 sub DESTROY($) {
7423 my $self = shift;
7424 # Remove temporary files if they are created.
7425 ::rm($self->{'loadavg_file'});
7426 ::rm($self->{'swap_activity_file'});
7429 sub string($) {
7430 my $self = shift;
7431 return $self->{'string'};
7434 sub pwstring($) {
7435 my $self = shift;
7436 return $self->{'pwstring'};
7439 sub host($) {
7440 my $self = shift;
7441 return $self->{'host'};
7444 sub sshcmd($) {
7445 # Give the ssh command without hostname
7446 # Returns:
7447 # "sshpass -e ssh -p port -l user"
7448 my $self = shift;
7449 my @local;
7450 # [sshpass -e] ssh -p port -l user
7451 if($self->{'password'}) { push @local, "sshpass -e"; }
7452 # [ssh] -p port -l user
7453 # TODO sshpass + space
7454 push @local, $self->{'sshcommand'};
7455 # [-p port] -l user
7456 if($self->{'port'}) { push @local, '-p',$self->{'port'}; }
7457 # [-l user]
7458 if($self->{'user'}) { push @local, '-l',$self->{'user'}; }
7459 if($opt::controlmaster) {
7460 # Use control_path to make ssh faster
7461 my $control_path = $self->control_path_dir()."/ssh-%r@%h:%p";
7463 if(not $self->{'control_path'}{$control_path}++) {
7464 # Master is not running for this control_path
7465 # Start it
7466 my $pid = fork();
7467 if($pid) {
7468 $Global::sshmaster{$pid} ||= 1;
7469 } else {
7470 push @local, "-S", $control_path;
7471 $SIG{'TERM'} = undef;
7472 # Run a sleep that outputs data, so it will discover
7473 # if the ssh connection closes.
7474 my $sleep = ::Q('$|=1;while(1){sleep 1;print "foo\n"}');
7475 # Ignore the 'foo' being printed
7476 open(STDOUT,">","/dev/null");
7477 # STDERR >/dev/null to ignore
7478 open(STDERR,">","/dev/null");
7479 open(STDIN,"<","/dev/null");
7480 exec(@local, "-MT", $self->{'host'}, "--",
7481 "perl", "-e", $sleep);
7484 push @local, "-S", ::Q($control_path);
7486 return "@local";
7489 sub wrap($@) {
7490 # Input:
7491 # @cmd = shell command to run on remote
7492 # Returns:
7493 # $sshwrapped = ssh remote @cmd
7494 my $self = shift;
7495 my @remote = @_;
7496 return(join " ",
7497 $self->sshcmd(), $self->{'host'}, "--", "exec", @remote);
7500 sub hexwrap($@) {
7501 # Input:
7502 # @cmd = perl expresion to eval
7503 # Returns:
7504 # $hexencoded = perl command that decodes hex and evals @cmd
7505 my $self = shift;
7506 my $cmd = join("",@_);
7508 # "#" is needed because Perl on MacOS X adds NULs
7509 # when running pack q/H10000000/
7510 my $hex = unpack "H*", $cmd."#";
7511 # csh does not deal well with > 1000 chars in one word
7512 # Insert space every 1000 char
7513 $hex =~ s/\G.{1000}\K/ /sg;
7514 # Explanation:
7515 # Write this without special chars: eval pack 'H*', join '',@ARGV
7516 # GNU_Parallel_worker = String so people can see this is from GNU Parallel
7517 # eval+ = way to write 'eval ' without space (gives warning)
7518 # pack+ = way to write 'pack ' without space
7519 # q/H10000000/, = almost the same as "H*" but does not use *
7520 # join+q//, = join '',
7521 return('perl -X -e '.
7522 'GNU_Parallel_worker,eval+pack+q/H10000000/,join+q//,@ARGV '.
7523 $hex);
7526 sub jobs_running($) {
7527 my $self = shift;
7528 return ($self->{'jobs_running'} || "0");
7531 sub inc_jobs_running($) {
7532 my $self = shift;
7533 $self->{'jobs_running'}++;
7536 sub dec_jobs_running($) {
7537 my $self = shift;
7538 $self->{'jobs_running'}--;
7541 sub set_maxlength($$) {
7542 my $self = shift;
7543 $self->{'maxlength'} = shift;
7546 sub maxlength($) {
7547 my $self = shift;
7548 return $self->{'maxlength'};
7551 sub jobs_completed() {
7552 my $self = shift;
7553 return $self->{'jobs_completed'};
7556 sub in_hostgroups() {
7557 # Input:
7558 # @hostgroups = the hostgroups to look for
7559 # Returns:
7560 # true if intersection of @hostgroups and the hostgroups of this
7561 # SSHLogin is non-empty
7562 my $self = shift;
7563 return grep { defined $self->{'hostgroups'}{$_} } @_;
7566 sub hostgroups() {
7567 my $self = shift;
7568 return keys %{$self->{'hostgroups'}};
7571 sub inc_jobs_completed($) {
7572 my $self = shift;
7573 $self->{'jobs_completed'}++;
7574 $Global::total_completed++;
7577 sub set_max_jobs_running($$) {
7578 my $self = shift;
7579 if(defined $self->{'max_jobs_running'}) {
7580 $Global::max_jobs_running -= $self->{'max_jobs_running'};
7582 $self->{'max_jobs_running'} = shift;
7584 if(defined $self->{'max_jobs_running'}) {
7585 # max_jobs_running could be resat if -j is a changed file
7586 $Global::max_jobs_running += $self->{'max_jobs_running'};
7588 # Initialize orig to the first non-zero value that comes around
7589 $self->{'orig_max_jobs_running'} ||= $self->{'max_jobs_running'};
7592 sub memfree() {
7593 # Returns:
7594 # $memfree in bytes
7595 my $self = shift;
7596 $self->memfree_recompute();
7597 # Return 1 if not defined.
7598 return (not defined $self->{'memfree'} or $self->{'memfree'})
7601 sub memfree_recompute() {
7602 my $self = shift;
7603 my $script = memfreescript();
7605 # TODO add sshlogin and backgrounding
7606 # Run the script twice if it gives 0 (typically intermittent error)
7607 $self->{'memfree'} = ::qqx($script) || ::qqx($script);
7608 if(not $self->{'memfree'}) {
7609 ::die_bug("Less than 1 byte memory free");
7611 #::debug("mem","New free:",$self->{'memfree'}," ");
7615 my $script;
7617 sub memfreescript() {
7618 # Returns:
7619 # shellscript for giving available memory in bytes
7620 if(not $script) {
7621 my %script_of = (
7622 # /proc/meminfo
7623 # MemFree: 7012 kB
7624 # Buffers: 19876 kB
7625 # Cached: 431192 kB
7626 # SwapCached: 0 kB
7627 "linux" => (
7629 print 1024 * qx{
7630 awk '/^((Swap)?Cached|MemFree|Buffers):/
7631 { sum += \$2} END { print sum }'
7632 /proc/meminfo }
7634 # Android uses same code as GNU/Linux
7635 "android" => (
7637 print 1024 * qx{
7638 awk '/^((Swap)?Cached|MemFree|Buffers):/
7639 { sum += \$2} END { print sum }'
7640 /proc/meminfo }
7642 # $ vmstat 1 1
7643 # procs memory page faults cpu
7644 # r b w avm free re at pi po fr de sr in sy cs us sy id
7645 # 1 0 0 242793 389737 5 1 0 0 0 0 0 107 978 60 1 1 99
7646 "hpux" => (
7648 print (((reverse `vmstat 1 1`)[0]
7649 =~ /(?:\d+\D+){4}(\d+)/)[0]*1024)
7651 # $ vmstat 1 2
7652 # kthr memory page disk faults cpu
7653 # r b w swap free re mf pi po fr de sr s3 s4 -- -- in sy cs us sy id
7654 # 0 0 0 6496720 5170320 68 260 8 2 1 0 0 -0 3 0 0 309 1371 255 1 2 97
7655 # 0 0 0 6434088 5072656 7 15 8 0 0 0 0 0 261 0 0 1889 1899 3222 0 8 92
7657 # The second free value is correct
7658 "solaris" => (
7660 print (((reverse `vmstat 1 2`)[0]
7661 =~ /(?:\d+\D+){4}(\d+)/)[0]*1024)
7663 # hw.pagesize: 4096
7664 # vm.stats.vm.v_cache_count: 0
7665 # vm.stats.vm.v_inactive_count: 79574
7666 # vm.stats.vm.v_free_count: 4507
7667 "freebsd" => (
7669 for(qx{/sbin/sysctl -a}) {
7670 if (/^([^:]+):\s+(.+)\s*$/s) {
7671 $sysctl->{$1} = $2;
7674 print $sysctl->{"hw.pagesize"} *
7675 ($sysctl->{"vm.stats.vm.v_cache_count"}
7676 + $sysctl->{"vm.stats.vm.v_inactive_count"}
7677 + $sysctl->{"vm.stats.vm.v_free_count"});
7679 # Mach Virtual Memory Statistics: (page size of 4096 bytes)
7680 # Pages free: 198061.
7681 # Pages active: 159701.
7682 # Pages inactive: 47378.
7683 # Pages speculative: 29707.
7684 # Pages wired down: 89231.
7685 # "Translation faults": 928901425.
7686 # Pages copy-on-write: 156988239.
7687 # Pages zero filled: 271267894.
7688 # Pages reactivated: 48895.
7689 # Pageins: 1798068.
7690 # Pageouts: 257.
7691 # Object cache: 6603 hits of 1713223 lookups (0% hit rate)
7692 'darwin' => (
7694 $vm = `vm_stat`;
7695 print (($vm =~ /page size of (\d+)/)[0] *
7696 (($vm =~ /Pages free:\s+(\d+)/)[0] +
7697 ($vm =~ /Pages inactive:\s+(\d+)/)[0]));
7700 my $perlscript = "";
7701 # Make a perl script that detects the OS ($^O) and runs
7702 # the appropriate command
7703 for my $os (keys %script_of) {
7704 $perlscript .= 'if($^O eq "'.$os.'") { '.$script_of{$os}.'}';
7706 $script = "perl -e " . ::Q(::spacefree(1,$perlscript));
7708 return $script;
7712 sub limit($) {
7713 # Returns:
7714 # 0 = Below limit. Start another job.
7715 # 1 = Over limit. Start no jobs.
7716 # 2 = Kill youngest job
7717 my $self = shift;
7719 if(not defined $self->{'limitscript'}) {
7720 my %limitscripts =
7721 ("io" => q!
7722 io() {
7723 limit=$1;
7724 io_file=$2;
7725 # Do the measurement in the background
7726 ((tmp=$(tempfile);
7727 LANG=C iostat -x 1 2 > $tmp;
7728 mv $tmp $io_file) </dev/null >/dev/null & );
7729 perl -e '-e $ARGV[0] or exit(1);
7730 for(reverse <>) {
7731 /Device/ and last;
7732 /(\S+)$/ and $max = $max > $1 ? $max : $1; }
7733 exit ('$limit' < $max)' $io_file;
7735 io %s %s
7737 "mem" => q!
7738 mem() {
7739 limit=$1;
7740 awk '/^((Swap)?Cached|MemFree|Buffers):/{ sum += $2}
7741 END {
7742 if (sum*1024 < '$limit'/2) { exit 2; }
7743 else { exit (sum*1024 < '$limit') }
7744 }' /proc/meminfo;
7746 mem %s;
7748 "load" => q!
7749 load() {
7750 limit=$1;
7751 ps ax -o state,command |
7752 grep -E '^[DOR].[^[]' |
7753 wc -l |
7754 perl -ne 'exit ('$limit' < $_)';
7756 load %s
7759 my ($cmd,@args) = split /\s+/,$opt::limit;
7760 if($limitscripts{$cmd}) {
7761 my $tmpfile = ::tmpname("parlmt");
7762 ++$Global::unlink{$tmpfile};
7763 $self->{'limitscript'} =
7764 ::spacefree(1, sprintf($limitscripts{$cmd},
7765 ::multiply_binary_prefix(@args),$tmpfile));
7766 } else {
7767 $self->{'limitscript'} = $opt::limit;
7771 my %env = %ENV;
7772 local %ENV = %env;
7773 $ENV{'SSHLOGIN'} = $self->string();
7774 system($Global::shell,"-c",$self->{'limitscript'});
7775 #::qqx($self->{'limitscript'});
7776 ::debug("limit","limit `".$self->{'limitscript'}."` result ".($?>>8)."\n");
7777 return $?>>8;
7781 sub swapping($) {
7782 my $self = shift;
7783 my $swapping = $self->swap_activity();
7784 return (not defined $swapping or $swapping)
7787 sub swap_activity($) {
7788 # If the currently known swap activity is too old:
7789 # Recompute a new one in the background
7790 # Returns:
7791 # last swap activity computed
7792 my $self = shift;
7793 # Should we update the swap_activity file?
7794 my $update_swap_activity_file = 0;
7795 # Test with (on 64 core machine):
7796 # seq 100 | parallel --lb -j100 'seq 1000 | parallel --noswap -j 1 true'
7797 if(open(my $swap_fh, "<", $self->{'swap_activity_file'})) {
7798 my $swap_out = <$swap_fh>;
7799 close $swap_fh;
7800 if($swap_out =~ /^(\d+)$/) {
7801 $self->{'swap_activity'} = $1;
7802 ::debug("swap", "New swap_activity: ", $self->{'swap_activity'});
7804 ::debug("swap", "Last update: ", $self->{'last_swap_activity_update'});
7805 if(time - $self->{'last_swap_activity_update'} > 10) {
7806 # last swap activity update was started 10 seconds ago
7807 ::debug("swap", "Older than 10 sec: ", $self->{'swap_activity_file'});
7808 $update_swap_activity_file = 1;
7810 } else {
7811 ::debug("swap", "No swap_activity file: ", $self->{'swap_activity_file'});
7812 $self->{'swap_activity'} = undef;
7813 $update_swap_activity_file = 1;
7815 if($update_swap_activity_file) {
7816 ::debug("swap", "Updating swap_activity file ", $self->{'swap_activity_file'});
7817 $self->{'last_swap_activity_update'} = time;
7818 my $dir = ::dirname($self->{'swap_activity_file'});
7819 -d $dir or eval { File::Path::mkpath($dir); };
7820 my $swap_activity;
7821 $swap_activity = swapactivityscript();
7822 if(not $self->local()) {
7823 $swap_activity = $self->wrap($swap_activity);
7825 # Run swap_activity measuring.
7826 # As the command can take long to run if run remote
7827 # save it to a tmp file before moving it to the correct file
7828 my $file = $self->{'swap_activity_file'};
7829 my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".swp");
7830 ::debug("swap", "\n", $swap_activity, "\n");
7831 my $qtmp = ::Q($tmpfile);
7832 my $qfile = ::Q($file);
7833 ::qqx("($swap_activity > $qtmp && mv $qtmp $qfile || rm $qtmp &)");
7835 return $self->{'swap_activity'};
7839 my $script;
7841 sub swapactivityscript() {
7842 # Returns:
7843 # shellscript for detecting swap activity
7845 # arguments for vmstat are OS dependant
7846 # swap_in and swap_out are in different columns depending on OS
7848 if(not $script) {
7849 my %vmstat = (
7850 # linux: $7*$8
7851 # $ vmstat 1 2
7852 # procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
7853 # r b swpd free buff cache si so bi bo in cs us sy id wa
7854 # 5 0 51208 1701096 198012 18857888 0 0 37 153 28 19 56 11 33 1
7855 # 3 0 51208 1701288 198012 18857972 0 0 0 0 3638 10412 15 3 82 0
7856 'linux' => ['vmstat 1 2 | tail -n1', '$7*$8'],
7858 # solaris: $6*$7
7859 # $ vmstat -S 1 2
7860 # kthr memory page disk faults cpu
7861 # r b w swap free si so pi po fr de sr s3 s4 -- -- in sy cs us sy id
7862 # 0 0 0 4628952 3208408 0 0 3 1 1 0 0 -0 2 0 0 263 613 246 1 2 97
7863 # 0 0 0 4552504 3166360 0 0 0 0 0 0 0 0 0 0 0 246 213 240 1 1 98
7864 'solaris' => ['vmstat -S 1 2 | tail -1', '$6*$7'],
7866 # darwin (macosx): $21*$22
7867 # $ vm_stat -c 2 1
7868 # Mach Virtual Memory Statistics: (page size of 4096 bytes)
7869 # free active specul inactive throttle wired prgable faults copy 0fill reactive purged file-backed anonymous cmprssed cmprssor dcomprs comprs pageins pageout swapins swapouts
7870 # 346306 829050 74871 606027 0 240231 90367 544858K 62343596 270837K 14178 415070 570102 939846 356 370 116 922 4019813 4 0 0
7871 # 345740 830383 74875 606031 0 239234 90369 2696 359 553 0 0 570110 941179 356 370 0 0 0 0 0 0
7872 'darwin' => ['vm_stat -c 2 1 | tail -n1', '$21*$22'],
7874 # ultrix: $12*$13
7875 # $ vmstat -S 1 2
7876 # procs faults cpu memory page disk
7877 # r b w in sy cs us sy id avm fre si so pi po fr de sr s0
7878 # 1 0 0 4 23 2 3 0 97 7743 217k 0 0 0 0 0 0 0 0
7879 # 1 0 0 6 40 8 0 1 99 7743 217k 0 0 3 0 0 0 0 0
7880 'ultrix' => ['vmstat -S 1 2 | tail -1', '$12*$13'],
7882 # aix: $6*$7
7883 # $ vmstat 1 2
7884 # System configuration: lcpu=1 mem=2048MB
7886 # kthr memory page faults cpu
7887 # ----- ----------- ------------------------ ------------ -----------
7888 # r b avm fre re pi po fr sr cy in sy cs us sy id wa
7889 # 0 0 333933 241803 0 0 0 0 0 0 10 143 90 0 0 99 0
7890 # 0 0 334125 241569 0 0 0 0 0 0 37 5368 184 0 9 86 5
7891 'aix' => ['vmstat 1 2 | tail -n1', '$6*$7'],
7893 # freebsd: $8*$9
7894 # $ vmstat -H 1 2
7895 # procs memory page disks faults cpu
7896 # r b w avm fre flt re pi po fr sr ad0 ad1 in sy cs us sy id
7897 # 1 0 0 596716 19560 32 0 0 0 33 8 0 0 11 220 277 0 0 99
7898 # 0 0 0 596716 19560 2 0 0 0 0 0 0 0 11 144 263 0 1 99
7899 'freebsd' => ['vmstat -H 1 2 | tail -n1', '$8*$9'],
7901 # mirbsd: $8*$9
7902 # $ vmstat 1 2
7903 # procs memory page disks traps cpu
7904 # r b w avm fre flt re pi po fr sr wd0 cd0 int sys cs us sy id
7905 # 0 0 0 25776 164968 34 0 0 0 0 0 0 0 230 259 38 4 0 96
7906 # 0 0 0 25776 164968 24 0 0 0 0 0 0 0 237 275 37 0 0 100
7907 'mirbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'],
7909 # netbsd: $7*$8
7910 # $ vmstat 1 2
7911 # procs memory page disks faults cpu
7912 # r b avm fre flt re pi po fr sr w0 w1 in sy cs us sy id
7913 # 0 0 138452 6012 54 0 0 0 1 2 3 0 4 100 23 0 0 100
7914 # 0 0 138456 6008 1 0 0 0 0 0 0 0 7 26 19 0 0 100
7915 'netbsd' => ['vmstat 1 2 | tail -n1', '$7*$8'],
7917 # openbsd: $8*$9
7918 # $ vmstat 1 2
7919 # procs memory page disks traps cpu
7920 # r b w avm fre flt re pi po fr sr wd0 wd1 int sys cs us sy id
7921 # 0 0 0 76596 109944 73 0 0 0 0 0 0 1 5 259 22 0 1 99
7922 # 0 0 0 76604 109936 24 0 0 0 0 0 0 0 7 114 20 0 1 99
7923 'openbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'],
7925 # hpux: $8*$9
7926 # $ vmstat 1 2
7927 # procs memory page faults cpu
7928 # r b w avm free re at pi po fr de sr in sy cs us sy id
7929 # 1 0 0 247211 216476 4 1 0 0 0 0 0 102 73005 54 6 11 83
7930 # 1 0 0 247211 216421 43 9 0 0 0 0 0 144 1675 96 25269512791222387000 25269512791222387000 105
7931 'hpux' => ['vmstat 1 2 | tail -n1', '$8*$9'],
7933 # dec_osf (tru64): $11*$12
7934 # $ vmstat 1 2
7935 # Virtual Memory Statistics: (pagesize = 8192)
7936 # procs memory pages intr cpu
7937 # r w u act free wire fault cow zero react pin pout in sy cs us sy id
7938 # 3 181 36 51K 1895 8696 348M 59M 122M 259 79M 0 5 218 302 4 1 94
7939 # 3 181 36 51K 1893 8696 3 15 21 0 28 0 4 81 321 1 1 98
7940 'dec_osf' => ['vmstat 1 2 | tail -n1', '$11*$12'],
7942 # gnu (hurd): $7*$8
7943 # $ vmstat -k 1 2
7944 # (pagesize: 4, size: 512288, swap size: 894972)
7945 # free actv inact wired zeroed react pgins pgouts pfaults cowpfs hrat caobj cache swfree
7946 # 371940 30844 89228 20276 298348 0 48192 19016 756105 99808 98% 876 20628 894972
7947 # 371940 30844 89228 20276 +0 +0 +0 +0 +42 +2 98% 876 20628 894972
7948 'gnu' => ['vmstat -k 1 2 | tail -n1', '$7*$8'],
7950 # -nto (qnx has no swap)
7951 #-irix
7952 #-svr5 (scosysv)
7954 my $perlscript = "";
7955 # Make a perl script that detects the OS ($^O) and runs
7956 # the appropriate vmstat command
7957 for my $os (keys %vmstat) {
7958 $vmstat{$os}[1] =~ s/\$/\\\\\\\$/g; # $ => \\\$
7959 $perlscript .= 'if($^O eq "'.$os.'") { print `'.$vmstat{$os}[0].' | awk "{print ' .
7960 $vmstat{$os}[1] . '}"` }';
7962 $script = "perl -e " . ::Q($perlscript);
7964 return $script;
7968 sub too_fast_remote_login($) {
7969 my $self = shift;
7970 if($self->{'last_login_at'} and $self->{'time_to_login'}) {
7971 # sshd normally allows 10 simultaneous logins
7972 # A login takes time_to_login
7973 # So time_to_login/5 should be safe
7974 # If now <= last_login + time_to_login/5: Then it is too soon.
7975 my $too_fast = (::now() <= $self->{'last_login_at'}
7976 + $self->{'time_to_login'}/5);
7977 ::debug("run", "Too fast? $too_fast ");
7978 return $too_fast;
7979 } else {
7980 # No logins so far (or time_to_login not computed): it is not too fast
7981 return 0;
7985 sub last_login_at($) {
7986 my $self = shift;
7987 return $self->{'last_login_at'};
7990 sub set_last_login_at($$) {
7991 my $self = shift;
7992 $self->{'last_login_at'} = shift;
7995 sub loadavg_too_high($) {
7996 my $self = shift;
7997 my $loadavg = $self->loadavg();
7998 if(defined $loadavg) {
7999 ::debug("load", "Load $loadavg > ",$self->max_loadavg());
8000 return $loadavg >= $self->max_loadavg();
8001 } else {
8002 # Unknown load: Assume load is too high
8003 return 1;
8009 sub loadavg($) {
8010 # If the currently know loadavg is too old:
8011 # Recompute a new one in the background
8012 # The load average is computed as the number of processes waiting
8013 # for disk or CPU right now. So it is the server load this instant
8014 # and not averaged over several minutes. This is needed so GNU
8015 # Parallel will at most start one job that will push the load over
8016 # the limit.
8018 # Returns:
8019 # $last_loadavg = last load average computed (undef if none)
8021 my $self = shift;
8022 sub loadavg_cmd() {
8023 if(not $Global::loadavg_cmd) {
8024 # aix => "ps -ae -o state,command" # state wrong
8025 # bsd => "ps ax -o state,command"
8026 # sysv => "ps -ef -o s -o comm"
8027 # cygwin => perl -ne 'close STDERR; /Name/ and print"\n"; \
8028 # /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status |
8029 # awk '{print $2,$1}'
8030 # dec_osf => bsd
8031 # dragonfly => bsd
8032 # freebsd => bsd
8033 # gnu => bsd
8034 # hpux => ps -el|awk '{print $2,$14,$15}'
8035 # irix => ps -ef -o state -o comm
8036 # linux => bsd
8037 # minix => ps el|awk '{print \$1,\$11}'
8038 # mirbsd => bsd
8039 # netbsd => bsd
8040 # openbsd => bsd
8041 # solaris => sysv
8042 # svr5 => sysv
8043 # ultrix => ps -ax | awk '{print $3,$5}'
8044 # unixware => ps -el|awk '{print $2,$14,$15}'
8045 my $ps = ::spacefree(1,q{
8046 $sysv="ps -ef -o s -o comm";
8047 $sysv2="ps -ef -o state -o comm";
8048 $bsd="ps ax -o state,command";
8049 # Treat threads as processes
8050 $bsd2="ps axH -o state,command";
8051 $psel="ps -el|awk '{ print \$2,\$14,\$15 }'";
8052 $cygwin=q{ perl -ne 'close STDERR; /Name/ and print"\n";
8053 /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status |
8054 awk '{print $2,$1}' };
8055 $dummy="echo S COMMAND;echo R dummy";
8056 %ps=(
8057 # TODO Find better code for AIX/Android
8058 'aix' => "uptime",
8059 'android' => "uptime",
8060 'cygwin' => $cygwin,
8061 'darwin' => $bsd,
8062 'dec_osf' => $sysv2,
8063 'dragonfly' => $bsd,
8064 'freebsd' => $bsd2,
8065 'gnu' => $bsd,
8066 'hpux' => $psel,
8067 'irix' => $sysv2,
8068 'linux' => $bsd2,
8069 'minix' => "ps el|awk '{print \$1,\$11}'",
8070 'mirbsd' => $bsd,
8071 'msys' => $cygwin,
8072 'netbsd' => $bsd,
8073 'nto' => $dummy,
8074 'openbsd' => $bsd,
8075 'solaris' => $sysv,
8076 'svr5' => $psel,
8077 'ultrix' => "ps -ax | awk '{print \$3,\$5}'",
8078 'MSWin32' => $sysv,
8080 print `$ps{$^O}`;
8082 # The command is too long for csh, so base64_wrap the command
8083 $Global::loadavg_cmd = $self->hexwrap($ps);
8085 return $Global::loadavg_cmd;
8087 # Should we update the loadavg file?
8088 my $update_loadavg_file = 0;
8089 if(open(my $load_fh, "<", $self->{'loadavg_file'})) {
8090 local $/; # $/ = undef => slurp whole file
8091 my $load_out = <$load_fh>;
8092 close $load_fh;
8093 if($load_out =~ /\S/) {
8094 # Content can be empty if ~/ is on NFS
8095 # due to reading being non-atomic.
8097 # Count lines starting with D,O,R but command does not start with [
8098 my $load =()= ($load_out=~/(^\s?[DOR]\S* +(?=[^\[])\S)/gm);
8099 if($load > 0) {
8100 # load is overestimated by 1
8101 $self->{'loadavg'} = $load - 1;
8102 ::debug("load", "New loadavg: ", $self->{'loadavg'},"\n");
8103 } elsif ($load_out=~/average: (\d+.\d+)/) {
8104 # AIX does not support instant load average
8105 # 04:11AM up 21 days, 12:55, 1 user, load average: 1.85, 1.57, 1.55
8106 $self->{'loadavg'} = $1;
8107 } else {
8108 ::die_bug("loadavg_invalid_content: " .
8109 $self->{'loadavg_file'} . "\n$load_out");
8112 $update_loadavg_file = 1;
8113 } else {
8114 ::debug("load", "No loadavg file: ", $self->{'loadavg_file'});
8115 $self->{'loadavg'} = undef;
8116 $update_loadavg_file = 1;
8118 if($update_loadavg_file) {
8119 ::debug("load", "Updating loadavg file", $self->{'loadavg_file'}, "\n");
8120 $self->{'last_loadavg_update'} = time;
8121 my $dir = ::dirname($self->{'swap_activity_file'});
8122 -d $dir or eval { File::Path::mkpath($dir); };
8123 -w $dir or ::die_bug("Cannot write to $dir");
8124 my $cmd = "";
8125 if($self->{'string'} ne ":") {
8126 $cmd = $self->wrap(loadavg_cmd());
8127 } else {
8128 $cmd .= loadavg_cmd();
8130 # As the command can take long to run if run remote
8131 # save it to a tmp file before moving it to the correct file
8132 ::debug("load", "Update load\n");
8133 my $file = ::Q($self->{'loadavg_file'});
8134 # tmpfile on same filesystem as $file
8135 my $tmpfile = $file.$$;
8136 $ENV{'SSHPASS'} = $self->{'password'};
8137 ::qqx("($cmd > $tmpfile && mv $tmpfile $file || rm $tmpfile & )");
8139 return $self->{'loadavg'};
8142 sub max_loadavg($) {
8143 my $self = shift;
8144 # If --load is a file it might be changed
8145 if($Global::max_load_file) {
8146 my $mtime = (stat($Global::max_load_file))[9];
8147 if($mtime > $Global::max_load_file_last_mod) {
8148 $Global::max_load_file_last_mod = $mtime;
8149 for my $sshlogin (values %Global::host) {
8150 $sshlogin->set_max_loadavg(undef);
8154 if(not defined $self->{'max_loadavg'}) {
8155 $self->{'max_loadavg'} =
8156 $self->compute_max_loadavg($opt::load);
8158 ::debug("load", "max_loadavg: ", $self->string(), " ", $self->{'max_loadavg'});
8159 return $self->{'max_loadavg'};
8162 sub set_max_loadavg($$) {
8163 my $self = shift;
8164 $self->{'max_loadavg'} = shift;
8167 sub compute_max_loadavg($) {
8168 # Parse the max loadaverage that the user asked for using --load
8169 # Returns:
8170 # max loadaverage
8171 my $self = shift;
8172 my $loadspec = shift;
8173 my $load;
8174 if(defined $loadspec) {
8175 if($loadspec =~ /^\+(\d+)$/) {
8176 # E.g. --load +2
8177 my $j = $1;
8178 $load =
8179 $self->ncpus() + $j;
8180 } elsif ($loadspec =~ /^-(\d+)$/) {
8181 # E.g. --load -2
8182 my $j = $1;
8183 $load =
8184 $self->ncpus() - $j;
8185 } elsif ($loadspec =~ /^(\d+)\%$/) {
8186 my $j = $1;
8187 $load =
8188 $self->ncpus() * $j / 100;
8189 } elsif ($loadspec =~ /^(\d+(\.\d+)?)$/) {
8190 $load = $1;
8191 } elsif (-f $loadspec) {
8192 $Global::max_load_file = $loadspec;
8193 $Global::max_load_file_last_mod = (stat($Global::max_load_file))[9];
8194 $load = $self->compute_max_loadavg(
8195 ::slurp_or_exit($Global::max_load_file)
8197 } else {
8198 ::error("Parsing of --load failed.");
8199 ::die_usage();
8201 if($load < 0.01) {
8202 $load = 0.01;
8205 return $load;
8208 sub time_to_login($) {
8209 my $self = shift;
8210 return $self->{'time_to_login'};
8213 sub set_time_to_login($$) {
8214 my $self = shift;
8215 $self->{'time_to_login'} = shift;
8218 sub max_jobs_running($) {
8219 my $self = shift;
8220 if(not defined $self->{'max_jobs_running'}) {
8221 my $nproc = $self->compute_number_of_processes($opt::jobs);
8222 $self->set_max_jobs_running($nproc);
8224 return $self->{'max_jobs_running'};
8227 sub orig_max_jobs_running($) {
8228 my $self = shift;
8229 return $self->{'orig_max_jobs_running'};
8232 sub compute_number_of_processes($) {
8233 # Number of processes wanted and limited by system resources
8234 # Returns:
8235 # Number of processes
8236 my $self = shift;
8237 my $opt_P = shift;
8238 my $wanted_processes = $self->user_requested_processes($opt_P);
8239 if(not defined $wanted_processes) {
8240 $wanted_processes = $Global::default_simultaneous_sshlogins;
8242 ::debug("load", "Wanted procs: $wanted_processes\n");
8243 my $system_limit =
8244 $self->processes_available_by_system_limit($wanted_processes);
8245 ::debug("load", "Limited to procs: $system_limit\n");
8246 return $system_limit;
8250 my @children;
8251 my $max_system_proc_reached;
8252 my $more_filehandles;
8253 my %fh;
8254 my $tmpfhname;
8255 my $count_jobs_already_read;
8256 my @jobs;
8257 my $job;
8258 my @args;
8259 my $arg;
8261 sub reserve_filehandles($) {
8262 # Reserves filehandle
8263 my $n = shift;
8264 for (1..$n) {
8265 $more_filehandles &&= open($fh{$tmpfhname++}, "<", "/dev/null");
8269 sub reserve_process() {
8270 # Spawn a dummy process
8271 my $child;
8272 if($child = fork()) {
8273 push @children, $child;
8274 $Global::unkilled_children{$child} = 1;
8275 } elsif(defined $child) {
8276 # This is the child
8277 # The child takes one process slot
8278 # It will be killed later
8279 $SIG{'TERM'} = $Global::original_sig{'TERM'};
8280 if($^O eq "cygwin" or $^O eq "msys" or $^O eq "nto") {
8281 # The exec does not work on Cygwin and QNX
8282 sleep 10101010;
8283 } else {
8284 # 'exec sleep' takes less RAM than sleeping in perl
8285 exec 'sleep', 10101;
8287 exit(0);
8288 } else {
8289 # Failed to spawn
8290 $max_system_proc_reached = 1;
8294 sub get_args_or_jobs() {
8295 # Get an arg or a job (depending on mode)
8296 if($Global::semaphore or ($opt::pipe and not $opt::tee)) {
8297 # Skip: No need to get args
8298 return 1;
8299 } elsif(defined $opt::retries and $count_jobs_already_read) {
8300 # For retries we may need to run all jobs on this sshlogin
8301 # so include the already read jobs for this sshlogin
8302 $count_jobs_already_read--;
8303 return 1;
8304 } else {
8305 if($opt::X or $opt::m) {
8306 # The arguments may have to be re-spread over several jobslots
8307 # So pessimistically only read one arg per jobslot
8308 # instead of a full commandline
8309 if($Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->empty()) {
8310 if($Global::JobQueue->empty()) {
8311 return 0;
8312 } else {
8313 $job = $Global::JobQueue->get();
8314 push(@jobs, $job);
8315 return 1;
8317 } else {
8318 $arg = $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get();
8319 push(@args, $arg);
8320 return 1;
8322 } else {
8323 # If there are no more command lines, then we have a process
8324 # per command line, so no need to go further
8325 if($Global::JobQueue->empty()) {
8326 return 0;
8327 } else {
8328 $job = $Global::JobQueue->get();
8329 # Replacement must happen here due to seq()
8330 $job and $job->replaced();
8331 push(@jobs, $job);
8332 return 1;
8338 sub cleanup() {
8339 # Cleanup: Close the files
8340 for (values %fh) { close $_ }
8341 # Cleanup: Kill the children
8342 for my $pid (@children) {
8343 kill 9, $pid;
8344 waitpid($pid,0);
8345 delete $Global::unkilled_children{$pid};
8347 # Cleanup: Unget the command_lines or the @args
8348 $Global::JobQueue->{'commandlinequeue'}{'arg_queue'}->unget(@args);
8349 @args = ();
8350 $Global::JobQueue->unget(@jobs);
8351 @jobs = ();
8354 sub processes_available_by_system_limit($) {
8355 # If the wanted number of processes is bigger than the system limits:
8356 # Limit them to the system limits
8357 # Limits are: File handles, number of input lines, processes,
8358 # and taking > 1 second to spawn 10 extra processes
8359 # Returns:
8360 # Number of processes
8361 my $self = shift;
8362 my $wanted_processes = shift;
8363 my $system_limit = 0;
8364 my $slow_spawning_warning_printed = 0;
8365 my $time = time;
8366 $more_filehandles = 1;
8367 $tmpfhname = "TmpFhNamE";
8369 # perl uses 7 filehandles for something?
8370 # parallel uses 1 for memory_usage
8371 # parallel uses 4 for ?
8372 reserve_filehandles(12);
8373 # Two processes for load avg and ?
8374 reserve_process();
8375 reserve_process();
8377 # For --retries count also jobs already run
8378 $count_jobs_already_read = $Global::JobQueue->next_seq();
8379 my $wait_time_for_getting_args = 0;
8380 my $start_time = time;
8381 if($wanted_processes < $Global::infinity) {
8382 $Global::dummy_jobs = 1;
8384 while(1) {
8385 $system_limit >= $wanted_processes and last;
8386 not $more_filehandles and last;
8387 $max_system_proc_reached and last;
8389 my $before_getting_arg = time;
8390 if(!$Global::dummy_jobs) {
8391 get_args_or_jobs() or last;
8393 $wait_time_for_getting_args += time - $before_getting_arg;
8394 $system_limit++;
8396 # Every simultaneous process uses 2 filehandles to write to
8397 # and 2 filehandles to read from
8398 reserve_filehandles(4);
8400 # System process limit
8401 reserve_process();
8403 my $forktime = time - $time - $wait_time_for_getting_args;
8404 ::debug("run", "Time to fork $system_limit procs: ".
8405 $wait_time_for_getting_args, " ", $forktime,
8406 " (processes so far: ", $system_limit,")\n");
8407 if($system_limit > 10 and
8408 $forktime > 1 and
8409 $forktime > $system_limit * 0.01) {
8410 # It took more than 0.01 second to fork a processes on avg.
8411 # Give the user a warning. He can press Ctrl-C if this
8412 # sucks.
8413 ::warning_once(
8414 "Starting $system_limit processes took > $forktime sec.",
8415 "Consider adjusting -j. Press CTRL-C to stop.");
8418 cleanup();
8420 if($system_limit < $wanted_processes) {
8421 # The system_limit is less than the wanted_processes
8422 if($system_limit < 1 and not $Global::JobQueue->empty()) {
8423 ::warning("Cannot spawn any jobs.",
8424 "Try increasing 'ulimit -u' (try: ulimit -u `ulimit -Hu`)",
8425 "or increasing 'nproc' in /etc/security/limits.conf",
8426 "or increasing /proc/sys/kernel/pid_max");
8427 ::wait_and_exit(255);
8429 if(not $more_filehandles) {
8430 ::warning("Only enough file handles to run ".
8431 $system_limit. " jobs in parallel.",
8432 "Try running 'parallel -j0 -N $system_limit --pipe parallel -j0'",
8433 "or increasing 'ulimit -n' (try: ulimit -n `ulimit -Hn`)",
8434 "or increasing 'nofile' in /etc/security/limits.conf",
8435 "or increasing /proc/sys/fs/file-max");
8437 if($max_system_proc_reached) {
8438 ::warning("Only enough available processes to run ".
8439 $system_limit. " jobs in parallel.",
8440 "Try increasing 'ulimit -u' (try: ulimit -u `ulimit -Hu`)",
8441 "or increasing 'nproc' in /etc/security/limits.conf",
8442 "or increasing /proc/sys/kernel/pid_max");
8445 if($] == 5.008008 and $system_limit > 1000) {
8446 # https://savannah.gnu.org/bugs/?36942
8447 $system_limit = 1000;
8449 if($Global::JobQueue->empty()) {
8450 $system_limit ||= 1;
8452 if($self->string() ne ":" and
8453 $system_limit > $Global::default_simultaneous_sshlogins) {
8454 $system_limit =
8455 $self->simultaneous_sshlogin_limit($system_limit);
8457 return $system_limit;
8461 sub simultaneous_sshlogin_limit($) {
8462 # Test by logging in wanted number of times simultaneously
8463 # Returns:
8464 # min($wanted_processes,$working_simultaneous_ssh_logins-1)
8465 my $self = shift;
8466 my $wanted_processes = shift;
8467 if($self->{'time_to_login'}) {
8468 return $wanted_processes;
8471 # Try twice because it guesses wrong sometimes
8472 # Choose the minimal
8473 my $ssh_limit =
8474 ::min($self->simultaneous_sshlogin($wanted_processes),
8475 $self->simultaneous_sshlogin($wanted_processes));
8476 if($ssh_limit < $wanted_processes) {
8477 my $serverlogin = $self->string();
8478 ::warning("ssh to $serverlogin only allows ".
8479 "for $ssh_limit simultaneous logins.",
8480 "You may raise this by changing",
8481 "/etc/ssh/sshd_config:MaxStartups and MaxSessions on $serverlogin.",
8482 "You can also try --sshdelay 0.1",
8483 "Using only ".($ssh_limit-1)." connections ".
8484 "to avoid race conditions.");
8485 # Race condition can cause problem if using all sshs.
8486 if($ssh_limit > 1) { $ssh_limit -= 1; }
8488 return $ssh_limit;
8491 sub simultaneous_sshlogin($) {
8492 # Using $sshlogin try to see if we can do $wanted_processes
8493 # simultaneous logins
8494 # (ssh host echo simul-login & ssh host echo simul-login & ...) |
8495 # grep simul|wc -l
8496 # Input:
8497 # $wanted_processes = Try for this many logins in parallel
8498 # Returns:
8499 # $ssh_limit = Number of succesful parallel logins
8500 local $/ = "\n";
8501 my $self = shift;
8502 my $wanted_processes = shift;
8503 my $sshdelay = $opt::sshdelay ? "sleep $opt::sshdelay;" : "";
8504 # TODO sh -c wrapper to work for csh
8505 my $cmd = ($sshdelay.$self->wrap("echo simultaneouslogin").
8506 "</dev/null 2>&1 &")x$wanted_processes;
8507 ::debug("init","Trying $wanted_processes logins at ".$self->string()."\n");
8508 open (my $simul_fh, "-|", "($cmd)|grep simultaneouslogin | wc -l") or
8509 ::die_bug("simultaneouslogin");
8510 my $ssh_limit = <$simul_fh>;
8511 close $simul_fh;
8512 chomp $ssh_limit;
8513 return $ssh_limit;
8516 sub set_ncpus($$) {
8517 my $self = shift;
8518 $self->{'ncpus'} = shift;
8521 sub user_requested_processes($) {
8522 # Parse the number of processes that the user asked for using -j
8523 # Input:
8524 # $opt_P = string formatted as for -P
8525 # Returns:
8526 # $processes = the number of processes to run on this sshlogin
8527 my $self = shift;
8528 my $opt_P = shift;
8529 my $processes;
8530 if(defined $opt_P) {
8531 if (-f $opt_P) {
8532 $Global::max_procs_file = $opt_P;
8533 my $opt_P_file = ::slurp_or_exit($Global::max_procs_file);
8534 if($opt_P_file !~ /\S/) {
8535 ::warning_once("$Global::max_procs_file is empty. ".
8536 "Treated as 100%");
8537 $opt_P_file = "100%";
8539 $processes = $self->user_requested_processes($opt_P_file);
8540 } else {
8541 if($opt_P eq "0") {
8542 # -P 0 = infinity (or at least close)
8543 $processes = $Global::infinity;
8544 } else {
8545 # -P +3 and -P -1
8546 $opt_P =~ s/^([-+])/\$self->ncpus()$1/;
8547 # -P 40%
8548 $opt_P =~ s:%$:*\$self->ncpus()/100:;
8549 $processes = eval $opt_P;
8550 if($processes <= 0) {
8551 # Do not go below 1
8552 $processes = 1;
8556 $processes = ::ceil($processes);
8558 return $processes;
8561 sub ncpus($) {
8562 # Number of CPU threads
8563 # --use_sockets_instead_of_threads = count socket instead
8564 # --use_cores_instead_of_threads = count physical cores instead
8565 # Returns:
8566 # $ncpus = number of cpu (threads) on this sshlogin
8567 local $/ = "\n";
8568 my $self = shift;
8569 if(not defined $self->{'ncpus'}) {
8570 if($self->local()) {
8571 if($opt::use_sockets_instead_of_threads) {
8572 $self->{'ncpus'} = socket_core_thread()->{'sockets'};
8573 } elsif($opt::use_cores_instead_of_threads) {
8574 $self->{'ncpus'} = socket_core_thread()->{'cores'};
8575 } else {
8576 $self->{'ncpus'} = socket_core_thread()->{'threads'};
8578 } else {
8579 my $ncpu;
8580 $ENV{'SSHPASS'} = $self->{'password'};
8581 ::debug("init",("echo | ".$self->wrap("parallel --number-of-sockets")));
8582 if($opt::use_sockets_instead_of_threads
8584 $opt::use_cpus_instead_of_cores) {
8585 $ncpu = ::qqx("echo | ".$self->wrap("parallel --number-of-sockets"));
8586 } elsif($opt::use_cores_instead_of_threads) {
8587 $ncpu = ::qqx("echo | ".$self->wrap("parallel --number-of-cores"));
8588 } else {
8589 $ncpu = ::qqx("echo | ".$self->wrap("parallel --number-of-threads"));
8591 chomp $ncpu;
8592 if($ncpu =~ /^\s*[0-9]+\s*$/s) {
8593 $self->{'ncpus'} = $ncpu;
8594 } else {
8595 ::warning("Could not figure out ".
8596 "number of cpus on ".$self->string." ($ncpu). Using 1.");
8597 $self->{'ncpus'} = 1;
8601 return $self->{'ncpus'};
8605 sub nproc() {
8606 # Returns:
8607 # Number of threads using `nproc`
8608 my $no_of_threads = ::qqx("nproc");
8609 chomp $no_of_threads;
8610 return $no_of_threads;
8613 sub no_of_sockets() {
8614 return socket_core_thread()->{'sockets'};
8617 sub no_of_cores() {
8618 return socket_core_thread()->{'cores'};
8621 sub no_of_threads() {
8622 return socket_core_thread()->{'threads'};
8625 sub socket_core_thread() {
8626 # Returns:
8628 # 'sockets' => #sockets = number of socket with CPU present
8629 # 'cores' => #cores = number of physical cores
8630 # 'threads' => #threads = number of compute cores (hyperthreading)
8631 # 'active' => #taskset_threads = number of taskset limited cores
8633 my $cpu;
8634 if ($^O eq 'linux') {
8635 $cpu = sct_gnu_linux($cpu);
8636 } elsif ($^O eq 'android') {
8637 $cpu = sct_android($cpu);
8638 } elsif ($^O eq 'freebsd') {
8639 $cpu = sct_freebsd($cpu);
8640 } elsif ($^O eq 'netbsd') {
8641 $cpu = sct_netbsd($cpu);
8642 } elsif ($^O eq 'openbsd') {
8643 $cpu = sct_openbsd($cpu);
8644 } elsif ($^O eq 'gnu') {
8645 $cpu = sct_hurd($cpu);
8646 } elsif ($^O eq 'darwin') {
8647 $cpu = sct_darwin($cpu);
8648 } elsif ($^O eq 'solaris') {
8649 $cpu = sct_solaris($cpu);
8650 } elsif ($^O eq 'aix') {
8651 $cpu = sct_aix($cpu);
8652 } elsif ($^O eq 'hpux') {
8653 $cpu = sct_hpux($cpu);
8654 } elsif ($^O eq 'nto') {
8655 $cpu = sct_qnx($cpu);
8656 } elsif ($^O eq 'svr5') {
8657 $cpu = sct_openserver($cpu);
8658 } elsif ($^O eq 'irix') {
8659 $cpu = sct_irix($cpu);
8660 } elsif ($^O eq 'dec_osf') {
8661 $cpu = sct_tru64($cpu);
8662 } else {
8663 # Try all methods until we find something that works
8664 $cpu = (sct_gnu_linux($cpu)
8665 || sct_android($cpu)
8666 || sct_freebsd($cpu)
8667 || sct_netbsd($cpu)
8668 || sct_openbsd($cpu)
8669 || sct_hurd($cpu)
8670 || sct_darwin($cpu)
8671 || sct_solaris($cpu)
8672 || sct_aix($cpu)
8673 || sct_hpux($cpu)
8674 || sct_qnx($cpu)
8675 || sct_openserver($cpu)
8676 || sct_irix($cpu)
8677 || sct_tru64($cpu)
8680 if(not $cpu) {
8681 # Fall back: Set all to nproc
8682 my $nproc = nproc();
8683 if($nproc) {
8684 $cpu->{'sockets'} =
8685 $cpu->{'cores'} =
8686 $cpu->{'threads'} =
8687 $cpu->{'active'} =
8688 $nproc;
8691 if(not $cpu) {
8692 ::warning("Cannot figure out number of cpus. Using 1.");
8693 $cpu->{'sockets'} =
8694 $cpu->{'cores'} =
8695 $cpu->{'threads'} =
8696 $cpu->{'active'} =
8699 $cpu->{'sockets'} ||= 1;
8700 $cpu->{'threads'} ||= $cpu->{'cores'};
8701 $cpu->{'active'} ||= $cpu->{'threads'};
8702 chomp($cpu->{'sockets'},
8703 $cpu->{'cores'},
8704 $cpu->{'threads'},
8705 $cpu->{'active'});
8706 # Choose minimum of active and actual
8707 my $mincpu;
8708 $mincpu->{'sockets'} = ::min($cpu->{'sockets'},$cpu->{'active'});
8709 $mincpu->{'cores'} = ::min($cpu->{'cores'},$cpu->{'active'});
8710 $mincpu->{'threads'} = ::min($cpu->{'threads'},$cpu->{'active'});
8711 return $mincpu;
8714 sub sct_gnu_linux($) {
8715 # Returns:
8716 # { 'sockets' => #sockets
8717 # 'cores' => #cores
8718 # 'threads' => #threads
8719 # 'active' => #taskset_threads }
8720 my $cpu = shift;
8722 sub read_topology($) {
8723 my $prefix = shift;
8724 my %sibiling;
8725 my %socket;
8726 my $thread;
8727 for($thread = 0;
8728 -r "$prefix/cpu$thread/topology/physical_package_id";
8729 $thread++) {
8730 $socket{::slurp_or_exit(
8731 "$prefix/cpu$thread/topology/physical_package_id")}++;
8733 for($thread = 0;
8734 -r "$prefix/cpu$thread/topology/thread_siblings";
8735 $thread++) {
8736 $sibiling{::slurp_or_exit(
8737 "$prefix/cpu$thread/topology/thread_siblings")}++;
8739 $cpu->{'sockets'} = keys %socket;
8740 $cpu->{'cores'} = keys %sibiling;
8741 $cpu->{'threads'} = $thread;
8744 sub read_cpuinfo(@) {
8745 my @cpuinfo = @_;
8746 $cpu->{'sockets'} = 0;
8747 $cpu->{'cores'} = 0;
8748 $cpu->{'threads'} = 0;
8749 my %seen;
8750 my %phy_seen;
8751 my $physicalid;
8752 for(@cpuinfo) {
8753 # physical id : 0
8754 if(/^physical id.*[:](.*)/) {
8755 $physicalid = $1;
8756 if(not $phy_seen{$1}++) {
8757 $cpu->{'sockets'}++;
8760 # core id : 3
8761 if(/^core id.*[:](.*)/ and not $seen{$physicalid,$1}++) {
8762 $cpu->{'cores'}++;
8764 # processor : 2
8765 /^processor.*[:]\s*\d/i and $cpu->{'threads'}++;
8767 $cpu->{'cores'} ||= $cpu->{'threads'};
8768 $cpu->{'cpus'} ||= $cpu->{'threads'};
8769 $cpu->{'sockets'} ||= 1;
8772 sub read_lscpu(@) {
8773 my @lscpu = @_;
8774 my $threads_per_core;
8775 my $cores_per_socket;
8776 for(@lscpu) {
8777 /^CPU.s.:\s*(\d+)/ and $cpu->{'threads'} = $1;
8778 /^Thread.s. per core:\s*(\d+)/ and $threads_per_core = $1;
8779 /^Core.s. per socket:\s*(\d+)/ and $cores_per_socket = $1;
8780 /^(CPU )?Socket.s.:\s*(\d+)/i and $cpu->{'sockets'} = $2;
8782 if($cores_per_socket and $cpu->{'sockets'}) {
8783 $cpu->{'cores'} = $cores_per_socket * $cpu->{'sockets'};
8785 if($threads_per_core and $cpu->{'cores'}) {
8786 $cpu->{'threads'} = $threads_per_core * $cpu->{'cores'};
8788 if($threads_per_core and $cpu->{'threads'}) {
8789 $cpu->{'cores'} = $cpu->{'threads'} / $threads_per_core;
8791 $cpu->{'cpus'} ||= $cpu->{'threads'};
8794 local $/ = "\n"; # If delimiter is set, then $/ will be wrong
8795 my @cpuinfo;
8796 my @lscpu;
8797 if($ENV{'PARALLEL_CPUINFO'}) {
8798 # Use CPUINFO from environment - used for testing only
8799 read_cpuinfo(split/(?<=\n)/,$ENV{'PARALLEL_CPUINFO'});
8800 } elsif($ENV{'PARALLEL_LSCPU'}) {
8801 # Use LSCPU from environment - used for testing only
8802 read_lscpu(split/(?<=\n)/,$ENV{'PARALLEL_LSCPU'});
8803 } elsif(-r "$ENV{'PARALLEL_CPUPREFIX'}/cpu0/topology/thread_siblings") {
8804 # Use CPUPREFIX from environment - used for testing only
8805 read_topology($ENV{'PARALLEL_CPUPREFIX'});
8806 } elsif($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'}) {
8807 # Skip /proc/cpuinfo - already set
8808 } else {
8809 # Not debugging: Look at this computer
8810 if(!($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'})
8812 open(my $in_fh, "-|", "lscpu")) {
8813 # Parse output from lscpu
8814 read_lscpu(<$in_fh>);
8815 close $in_fh;
8817 if(!($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'})
8819 -r "/sys/devices/system/cpu/cpu0/topology/thread_siblings") {
8820 read_topology("/sys/devices/system/cpu");
8822 if(!($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'})
8824 open(my $in_fh, "<", "/proc/cpuinfo")) {
8825 # Read /proc/cpuinfo
8826 read_cpuinfo(<$in_fh>);
8827 close $in_fh;
8830 if(-e "/proc/self/status"
8831 and not $ENV{'PARALLEL_CPUINFO'}
8832 and not $ENV{'PARALLEL_LSCPU'}) {
8833 # if 'taskset' is used to limit number of threads
8834 if(open(my $in_fh, "<", "/proc/self/status")) {
8835 while(<$in_fh>) {
8836 if(/^Cpus_allowed:\s*(\S+)/) {
8837 my $a = $1;
8838 $a =~ tr/,//d;
8839 $cpu->{'active'} = unpack ("%32b*", pack ("H*",$a));
8842 close $in_fh;
8845 return $cpu;
8848 sub sct_android($) {
8849 # Returns:
8850 # { 'sockets' => #sockets
8851 # 'cores' => #cores
8852 # 'threads' => #threads
8853 # 'active' => #taskset_threads }
8854 # Use GNU/Linux
8855 return sct_gnu_linux($_[0]);
8858 sub sct_freebsd($) {
8859 # Returns:
8860 # { 'sockets' => #sockets
8861 # 'cores' => #cores
8862 # 'threads' => #threads
8863 # 'active' => #taskset_threads }
8864 local $/ = "\n";
8865 my $cpu = shift;
8866 $cpu->{'cores'} ||=
8867 (::qqx(qq{ sysctl -a dev.cpu | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' })
8869 ::qqx(qq{ sysctl hw.ncpu | awk '{ print \$2 }' }));
8870 $cpu->{'threads'} ||=
8871 (::qqx(qq{ sysctl hw.ncpu | awk '{ print \$2 }' })
8873 ::qqx(qq{ sysctl -a dev.cpu | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' }));
8874 return $cpu;
8877 sub sct_netbsd($) {
8878 # Returns:
8879 # { 'sockets' => #sockets
8880 # 'cores' => #cores
8881 # 'threads' => #threads
8882 # 'active' => #taskset_threads }
8883 local $/ = "\n";
8884 my $cpu = shift;
8885 $cpu->{'cores'} ||= ::qqx("sysctl -n hw.ncpu");
8886 return $cpu;
8889 sub sct_openbsd($) {
8890 # Returns:
8891 # { 'sockets' => #sockets
8892 # 'cores' => #cores
8893 # 'threads' => #threads
8894 # 'active' => #taskset_threads }
8895 local $/ = "\n";
8896 my $cpu = shift;
8897 $cpu->{'cores'} ||= ::qqx('sysctl -n hw.ncpu');
8898 return $cpu;
8901 sub sct_hurd($) {
8902 # Returns:
8903 # { 'sockets' => #sockets
8904 # 'cores' => #cores
8905 # 'threads' => #threads
8906 # 'active' => #taskset_threads }
8907 local $/ = "\n";
8908 my $cpu = shift;
8909 $cpu->{'cores'} ||= ::qqx("nproc");
8910 return $cpu;
8913 sub sct_darwin($) {
8914 # Returns:
8915 # { 'sockets' => #sockets
8916 # 'cores' => #cores
8917 # 'threads' => #threads
8918 # 'active' => #taskset_threads }
8919 local $/ = "\n";
8920 my $cpu = shift;
8921 $cpu->{'cores'} ||=
8922 (::qqx('sysctl -n hw.physicalcpu')
8924 ::qqx(qq{ sysctl -a hw | grep [^a-z]physicalcpu[^a-z] | awk '{ print \$2 }' }));
8925 $cpu->{'threads'} ||=
8926 (::qqx('sysctl -n hw.logicalcpu')
8928 ::qqx(qq{ sysctl -a hw | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }' }));
8929 return $cpu;
8932 sub sct_solaris($) {
8933 # Returns:
8934 # { 'sockets' => #sockets
8935 # 'cores' => #cores
8936 # 'threads' => #threads
8937 # 'active' => #taskset_threads }
8938 local $/ = "\n";
8939 my $cpu = shift;
8940 if(not $cpu->{'cores'}) {
8941 if(-x "/usr/bin/kstat") {
8942 my @chip_id = ::qqx("/usr/bin/kstat cpu_info|grep chip_id");
8943 if($#chip_id >= 0) {
8944 $cpu->{'sockets'} ||= $#chip_id +1;
8946 my @core_id = ::qqx("/usr/bin/kstat -m cpu_info|grep -w core_id|uniq");
8947 if($#core_id >= 0) {
8948 $cpu->{'cores'} ||= $#core_id +1;
8951 if(-x "/usr/sbin/psrinfo") {
8952 my @psrinfo = ::qqx("/usr/sbin/psrinfo -p");
8953 if($#psrinfo >= 0) {
8954 $cpu->{'sockets'} ||= $psrinfo[0];
8957 if(-x "/usr/sbin/prtconf") {
8958 my @prtconf = ::qqx("/usr/sbin/prtconf | grep cpu..instance");
8959 if($#prtconf >= 0) {
8960 $cpu->{'cores'} ||= $#prtconf +1;
8964 return $cpu;
8967 sub sct_aix($) {
8968 # Returns:
8969 # { 'sockets' => #sockets
8970 # 'cores' => #cores
8971 # 'threads' => #threads
8972 # 'active' => #taskset_threads }
8973 local $/ = "\n";
8974 my $cpu = shift;
8975 if(not $cpu->{'cores'}) {
8976 if(-x "/usr/sbin/lscfg") {
8977 if(open(my $in_fh, "-|",
8978 "/usr/sbin/lscfg -vs |grep proc | wc -l|tr -d ' '")) {
8979 $cpu->{'cores'} = <$in_fh>;
8980 close $in_fh;
8984 if(not $cpu->{'threads'}) {
8985 if(-x "/usr/bin/vmstat") {
8986 if(open(my $in_fh, "-|", "/usr/bin/vmstat 1 1")) {
8987 while(<$in_fh>) {
8988 /lcpu=([0-9]*) / and $cpu->{'threads'} = $1;
8990 close $in_fh;
8994 return $cpu;
8997 sub sct_hpux($) {
8998 # Returns:
8999 # { 'sockets' => #sockets
9000 # 'cores' => #cores
9001 # 'threads' => #threads
9002 # 'active' => #taskset_threads }
9003 local $/ = "\n";
9004 my $cpu = shift;
9005 $cpu->{'cores'} ||=
9006 ::qqx(qq{ /usr/bin/mpsched -s 2>&1 | grep 'Locality Domain Count' | awk '{ print \$4 }'});
9007 $cpu->{'threads'} ||=
9008 ::qqx(qq{ /usr/bin/mpsched -s 2>&1 | perl -ne '/Processor Count\\D+(\\d+)/ and print "\$1"'});
9009 return $cpu;
9012 sub sct_qnx($) {
9013 # Returns:
9014 # { 'sockets' => #sockets
9015 # 'cores' => #cores
9016 # 'threads' => #threads
9017 # 'active' => #taskset_threads }
9018 local $/ = "\n";
9019 my $cpu = shift;
9020 # BUG: It is not known how to calculate this.
9022 return $cpu;
9025 sub sct_openserver($) {
9026 # Returns:
9027 # { 'sockets' => #sockets
9028 # 'cores' => #cores
9029 # 'threads' => #threads
9030 # 'active' => #taskset_threads }
9031 local $/ = "\n";
9032 my $cpu = shift;
9033 if(not $cpu->{'cores'}) {
9034 if(-x "/usr/sbin/psrinfo") {
9035 my @psrinfo = ::qqx("/usr/sbin/psrinfo");
9036 if($#psrinfo >= 0) {
9037 $cpu->{'cores'} = $#psrinfo +1;
9041 $cpu->{'sockets'} ||= $cpu->{'cores'};
9042 return $cpu;
9045 sub sct_irix($) {
9046 # Returns:
9047 # { 'sockets' => #sockets
9048 # 'cores' => #cores
9049 # 'threads' => #threads
9050 # 'active' => #taskset_threads }
9051 local $/ = "\n";
9052 my $cpu = shift;
9053 $cpu->{'cores'} ||=
9054 ::qqx(qq{ hinv | grep HZ | grep Processor | awk '{print \$1}' });
9055 return $cpu;
9058 sub sct_tru64($) {
9059 # Returns:
9060 # { 'sockets' => #sockets
9061 # 'cores' => #cores
9062 # 'threads' => #threads
9063 # 'active' => #taskset_threads }
9064 local $/ = "\n";
9065 my $cpu = shift;
9066 $cpu->{'cores'} ||= ::qqx("sizer -pr");
9067 $cpu->{'sockets'} ||= $cpu->{'cores'};
9068 $cpu->{'threads'} ||= $cpu->{'cores'};
9070 return $cpu;
9073 sub sshcommand($) {
9074 # Returns:
9075 # $sshcommand = the command (incl options) to run when using ssh
9076 my $self = shift;
9077 if (not defined $self->{'sshcommand'}) {
9078 ::die_bug("sshcommand not set");
9080 return $self->{'sshcommand'};
9083 sub local($) {
9084 my $self = shift;
9085 return $self->{'local'};
9088 sub control_path_dir($) {
9089 # Returns:
9090 # $control_path_dir = dir of control path (for -M)
9091 my $self = shift;
9092 if(not defined $self->{'control_path_dir'}) {
9093 $self->{'control_path_dir'} =
9094 # Use $ENV{'TMPDIR'} as that is typically not
9095 # NFS mounted.
9096 # The file system must support UNIX domain sockets
9097 File::Temp::tempdir($ENV{'TMPDIR'}
9098 . "/ctrlpath-XXXX",
9099 CLEANUP => 1);
9101 return $self->{'control_path_dir'};
9104 sub rsync_transfer_cmd($) {
9105 # Command to run to transfer a file
9106 # Input:
9107 # $file = filename of file to transfer
9108 # $workdir = destination dir
9109 # Returns:
9110 # $cmd = rsync command to run to transfer $file ("" if unreadable)
9111 my $self = shift;
9112 my $file = shift;
9113 my $workdir = shift;
9114 if(not -r $file) {
9115 ::warning($file. " is not readable and will not be transferred.");
9116 return "true";
9118 my $rsync_destdir;
9119 my $relpath = ($file !~ m:^/:) || ($file =~ m:/\./:); # Is the path relative or /./?
9120 if($relpath) {
9121 $rsync_destdir = ::shell_quote_file($workdir);
9122 } else {
9123 # rsync /foo/bar /
9124 $rsync_destdir = "/";
9126 $file = ::shell_quote_file($file);
9127 # Make dir if it does not exist
9128 return($self->wrap("mkdir -p $rsync_destdir") . " && " .
9129 $self->rsync()." $file ".$self->{'host'}.":$rsync_destdir");
9133 my $rsync_fix;
9134 my $rsync_version;
9136 sub rsync($) {
9137 # rsync 3.1.x uses protocol 31 which is unsupported by 2.5.7.
9138 # If the version >= 3.1.0: downgrade to protocol 30
9139 # rsync 3.2.4 introduces a quoting bug: Add --old-args for that
9140 # Returns:
9141 # $rsync = "rsync" or "rsync --protocol 30 --old-args"
9142 sub rsync_version {
9143 if(not $rsync_version) {
9144 my @out = `rsync --version`;
9145 if(not @out) {
9146 if(::which("rsync")) {
9147 ::die_bug("'rsync --version' gave no output.");
9148 } else {
9149 ::error("'rsync' is not in \$PATH.");
9150 ::wait_and_exit(255);
9153 for (@out) {
9154 # rsync version 3.1.3 protocol version 31
9155 # rsync version v3.2.3 protocol version 31
9156 if(/version v?(\d+)\.(\d+)(\.(\d+))?/) {
9157 # 3.2.27 => 03.0227
9158 $rsync_version = sprintf "%02d.%02d%02d",$1,$2,$4;
9161 $rsync_version or
9162 ::die_bug("Cannot figure out version of rsync: @out");
9166 sub rsync_fixup {
9167 # rsync 3.1.x uses protocol 31 which is unsupported by 2.5.7.
9168 # If the version >= 3.1.0: downgrade to protocol 30
9169 # Returns:
9170 # $rsync = "rsync" or "rsync --protocol 30"
9171 if(not $rsync_fix) {
9172 rsync_version();
9173 if($rsync_version >= 3.01) {
9174 # Version 3.1.0 or later: Downgrade to protocol 30
9175 $rsync_fix .= " --protocol 30";
9177 if($rsync_version >= 3.0204) {
9178 # Version 3.2.4 .. 3.2.8: --old-args
9179 $rsync_fix .= " --old-args";
9182 return $rsync_fix;
9184 my $self = shift;
9186 return "rsync".rsync_fixup()." ".$ENV{'PARALLEL_RSYNC_OPTS'}.
9187 " -e".::Q($self->sshcmd());
9191 sub cleanup_cmd($$$) {
9192 # Command to run to remove the remote file
9193 # Input:
9194 # $file = filename to remove
9195 # $workdir = destination dir
9196 # Returns:
9197 # $cmd = ssh command to run to remove $file and empty parent dirs
9198 my $self = shift;
9199 my $file = shift;
9200 my $workdir = shift;
9201 my $f = $file;
9202 if($f =~ m:/\./:) {
9203 # foo/bar/./baz/quux => workdir/baz/quux
9204 # /foo/bar/./baz/quux => workdir/baz/quux
9205 $f =~ s:.*/\./:$workdir/:;
9206 } elsif($f =~ m:^[^/]:) {
9207 # foo/bar => workdir/foo/bar
9208 $f = $workdir."/".$f;
9210 my @subdirs = split m:/:, ::dirname($f);
9211 my @rmdir;
9212 my $dir = "";
9213 for(@subdirs) {
9214 $dir .= $_."/";
9215 unshift @rmdir, ::shell_quote_file($dir);
9217 my $rmdir = @rmdir ? "rmdir @rmdir 2>/dev/null;" : "";
9218 if(defined $opt::workdir and $opt::workdir eq "...") {
9219 $rmdir .= "rm -rf " . ::shell_quote_file($workdir).';';
9221 my $rmf = "sh -c ".
9222 ::Q("rm -f ".::shell_quote_file($f)." 2>/dev/null;".$rmdir);
9223 return $self->wrap(::Q($rmf));
9226 package JobQueue;
9228 sub new($) {
9229 my $class = shift;
9230 my $commandref = shift;
9231 my $read_from = shift;
9232 my $context_replace = shift;
9233 my $max_number_of_args = shift;
9234 my $transfer_files = shift;
9235 my $return_files = shift;
9236 my $template_names = shift;
9237 my $template_contents = shift;
9238 my $commandlinequeue = CommandLineQueue->new
9239 ($commandref, $read_from, $context_replace, $max_number_of_args,
9240 $transfer_files, $return_files, $template_names, $template_contents);
9241 my @unget = ();
9242 return bless {
9243 'unget' => \@unget,
9244 'commandlinequeue' => $commandlinequeue,
9245 'this_job_no' => 0,
9246 'total_jobs' => undef,
9247 }, ref($class) || $class;
9250 sub get($) {
9251 my $self = shift;
9253 $self->{'this_job_no'}++;
9254 if(@{$self->{'unget'}}) {
9255 my $job = shift @{$self->{'unget'}};
9256 # {%} may have changed, so flush computed values
9257 $job && $job->flush_cache();
9258 return $job;
9259 } else {
9260 my $commandline = $self->{'commandlinequeue'}->get();
9261 if(defined $commandline) {
9262 return Job->new($commandline);
9263 } else {
9264 $self->{'this_job_no'}--;
9265 return undef;
9270 sub unget($) {
9271 my $self = shift;
9272 unshift @{$self->{'unget'}}, @_;
9273 $self->{'this_job_no'} -= @_;
9276 sub empty($) {
9277 my $self = shift;
9278 my $empty = (not @{$self->{'unget'}}) &&
9279 $self->{'commandlinequeue'}->empty();
9280 ::debug("run", "JobQueue->empty $empty ");
9281 return $empty;
9284 sub total_jobs($) {
9285 my $self = shift;
9286 if(not defined $self->{'total_jobs'}) {
9287 if($opt::pipe and not $opt::tee) {
9288 ::error("--pipe is incompatible with --eta/--bar/--shuf");
9289 ::wait_and_exit(255);
9291 if($opt::totaljobs) {
9292 $self->{'total_jobs'} = $opt::totaljobs;
9293 } elsif($opt::sqlworker) {
9294 $self->{'total_jobs'} = $Global::sql->total_jobs();
9295 } else {
9296 my $record;
9297 my @arg_records;
9298 my $record_queue = $self->{'commandlinequeue'}{'arg_queue'};
9299 my $start = time;
9300 while($record = $record_queue->get()) {
9301 push @arg_records, $record;
9302 if(time - $start > 10) {
9303 ::warning("Reading ".scalar(@arg_records).
9304 " arguments took longer than 10 seconds.");
9305 $opt::eta && ::warning("Consider removing --eta.");
9306 $opt::bar && ::warning("Consider removing --bar.");
9307 $opt::shuf && ::warning("Consider removing --shuf.");
9308 last;
9311 while($record = $record_queue->get()) {
9312 push @arg_records, $record;
9314 if($opt::shuf and @arg_records) {
9315 my $i = @arg_records;
9316 while (--$i) {
9317 my $j = int rand($i+1);
9318 @arg_records[$i,$j] = @arg_records[$j,$i];
9321 $record_queue->unget(@arg_records);
9322 # $#arg_records = number of args - 1
9323 # We have read one @arg_record for this job (so add 1 more)
9324 my $num_args = $#arg_records + 2;
9325 # This jobs is not started so -1
9326 my $started_jobs = $self->{'this_job_no'} - 1;
9327 my $max_args = ::max($Global::max_number_of_args,1);
9328 $self->{'total_jobs'} = ::ceil($num_args / $max_args)
9329 + $started_jobs;
9330 ::debug("init","Total jobs: ".$self->{'total_jobs'}.
9331 " ($num_args/$max_args + $started_jobs)\n");
9334 return $self->{'total_jobs'};
9337 sub flush_total_jobs($) {
9338 # Unset total_jobs to force recomputing
9339 my $self = shift;
9340 ::debug("init","flush Total jobs: ");
9341 $self->{'total_jobs'} = undef;
9344 sub next_seq($) {
9345 my $self = shift;
9347 return $self->{'commandlinequeue'}->seq();
9350 sub quote_args($) {
9351 my $self = shift;
9352 return $self->{'commandlinequeue'}->quote_args();
9356 package Job;
9358 sub new($) {
9359 my $class = shift;
9360 my $commandlineref = shift;
9361 return bless {
9362 'commandline' => $commandlineref, # CommandLine object
9363 'workdir' => undef, # --workdir
9364 # filehandle for stdin (used for --pipe)
9365 # filename for writing stdout to (used for --files)
9366 # remaining data not sent to stdin (used for --pipe)
9367 # tmpfiles to cleanup when job is done
9368 'unlink' => [],
9369 # amount of data sent via stdin (used for --pipe)
9370 'transfersize' => 0, # size of files using --transfer
9371 'returnsize' => 0, # size of files using --return
9372 'pid' => undef,
9373 # hash of { SSHLogins => number of times the command failed there }
9374 'failed' => undef,
9375 'sshlogin' => undef,
9376 # The commandline wrapped with rsync and ssh
9377 'sshlogin_wrap' => undef,
9378 'exitstatus' => undef,
9379 'exitsignal' => undef,
9380 # Timestamp for timeout if any
9381 'timeout' => undef,
9382 'virgin' => 1,
9383 # Output used for SQL and CSV-output
9384 'output' => { 1 => [], 2 => [] },
9385 'halfline' => { 1 => [], 2 => [] },
9386 }, ref($class) || $class;
9389 sub flush_cache($) {
9390 my $self = shift;
9391 $self->{'commandline'}->flush_cache();
9394 sub replaced($) {
9395 my $self = shift;
9396 $self->{'commandline'} or ::die_bug("commandline empty");
9397 return $self->{'commandline'}->replaced();
9401 my $next_available_row;
9403 sub row($) {
9404 my $self = shift;
9405 if(not defined $self->{'row'}) {
9406 if($opt::keeporder) {
9407 $self->{'row'} = $self->seq();
9408 } else {
9409 $self->{'row'} = ++$next_available_row;
9412 return $self->{'row'};
9416 sub seq($) {
9417 my $self = shift;
9418 return $self->{'commandline'}->seq();
9421 sub set_seq($$) {
9422 my $self = shift;
9423 return $self->{'commandline'}->set_seq(shift);
9426 sub slot($) {
9427 my $self = shift;
9428 return $self->{'commandline'}->slot();
9431 sub free_slot($) {
9432 my $self = shift;
9433 push @Global::slots, $self->slot();
9437 my($cattail);
9439 sub cattail() {
9440 # Returns:
9441 # $cattail = perl program for:
9442 # cattail "decomp-prg" wpid [file_stdin] [file_to_unlink]
9443 # decomp-prg = decompress program
9444 # wpid = pid of writer program
9445 # file_stdin = file_to_decompress
9446 # file_to_unlink = unlink this file
9447 if(not $cattail) {
9448 $cattail = q{
9449 # cat followed by tail (possibly with rm as soon at the file is opened)
9450 # If $writerpid dead: finish after this round
9451 use Fcntl;
9452 $|=1;
9454 my ($comfile, $cmd, $writerpid, $read_file, $unlink_file) = @ARGV;
9455 if($read_file) {
9456 open(IN,"<",$read_file) || die("cattail: Cannot open $read_file");
9457 } else {
9458 *IN = *STDIN;
9460 while(! -s $comfile) {
9461 # Writer has not opened the buffer file, so we cannot remove it yet
9462 $sleep = ($sleep < 30) ? ($sleep * 1.001 + 0.01) : ($sleep);
9463 usleep($sleep);
9465 # The writer and we have both opened the file, so it is safe to unlink it
9466 unlink $unlink_file;
9467 unlink $comfile;
9469 my $first_round = 1;
9470 my $flags;
9471 fcntl(IN, F_GETFL, $flags) || die $!; # Get the current flags on the filehandle
9472 $flags |= O_NONBLOCK; # Add non-blocking to the flags
9473 fcntl(IN, F_SETFL, $flags) || die $!; # Set the flags on the filehandle
9475 while(1) {
9476 # clear EOF
9477 seek(IN,0,1);
9478 my $writer_running = kill 0, $writerpid;
9479 $read = sysread(IN,$buf,131072);
9480 if($read) {
9481 if($first_round) {
9482 # Only start the command if there any input to process
9483 $first_round = 0;
9484 open(OUT,"|-",$cmd) || die("cattail: Cannot run $cmd");
9487 # Blocking print
9488 while($buf) {
9489 my $bytes_written = syswrite(OUT,$buf);
9490 # syswrite may be interrupted by SIGHUP
9491 substr($buf,0,$bytes_written) = "";
9493 # Something printed: Wait less next time
9494 $sleep /= 2;
9495 } else {
9496 if(eof(IN) and not $writer_running) {
9497 # Writer dead: There will never be sent more to the decompressor
9498 close OUT;
9499 exit;
9501 # TODO This could probably be done more efficiently using select(2)
9502 # Nothing read: Wait longer before next read
9503 # Up to 100 milliseconds
9504 $sleep = ($sleep < 100) ? ($sleep * 1.001 + 0.01) : ($sleep);
9505 usleep($sleep);
9509 sub usleep {
9510 # Sleep this many milliseconds.
9511 my $secs = shift;
9512 select(undef, undef, undef, $secs/1000);
9515 $cattail =~ s/#.*//mg;
9516 $cattail =~ s/\s+/ /g;
9518 return $cattail;
9522 sub openoutputfiles($) {
9523 # Open files for STDOUT and STDERR
9524 # Set file handles in $self->fh
9525 my $self = shift;
9526 my ($outfhw, $errfhw, $outname, $errname);
9528 if($opt::latestline) {
9529 # Do not save to files: Use non-blocking pipe
9530 my ($outfhr, $errfhr);
9531 pipe($outfhr, $outfhw) || die;
9532 $self->set_fh(1,'w',$outfhw);
9533 $self->set_fh(2,'w',$outfhw);
9534 $self->set_fh(1,'r',$outfhr);
9535 $self->set_fh(2,'r',$outfhr);
9536 # Make it possible to read non-blocking from the pipe
9537 for my $fdno (1,2) {
9538 ::set_fh_non_blocking($self->fh($fdno,'r'));
9540 # Return immediately because we do not need setting filenames
9541 return;
9542 } elsif($Global::linebuffer and not
9543 ($opt::keeporder or $Global::files or $opt::results or
9544 $opt::compress or $opt::compress_program or
9545 $opt::decompress_program)) {
9546 # Do not save to files: Use non-blocking pipe
9547 my ($outfhr, $errfhr);
9548 pipe($outfhr, $outfhw) || die;
9549 pipe($errfhr, $errfhw) || die;
9550 $self->set_fh(1,'w',$outfhw);
9551 $self->set_fh(2,'w',$errfhw);
9552 $self->set_fh(1,'r',$outfhr);
9553 $self->set_fh(2,'r',$errfhr);
9554 # Make it possible to read non-blocking from the pipe
9555 for my $fdno (1,2) {
9556 ::set_fh_non_blocking($self->fh($fdno,'r'));
9558 # Return immediately because we do not need setting filenames
9559 return;
9560 } elsif($opt::results and not $Global::csvsep and not $Global::jsonout) {
9561 # If --results, but not --results *.csv/*.tsv
9562 my $out = $self->{'commandline'}->results_out();
9563 my $seqname;
9564 if($out eq $opt::results or $out =~ m:/$:) {
9565 # $opt::results = simple string or ending in /
9566 # => $out is a dir/
9567 # prefix/name1/val1/name2/val2/seq
9568 $seqname = $out."seq";
9569 # prefix/name1/val1/name2/val2/stdout
9570 $outname = $out."stdout";
9571 # prefix/name1/val1/name2/val2/stderr
9572 $errname = $out."stderr";
9573 } else {
9574 # $opt::results = replacement string not ending in /
9575 # => $out is a file
9576 $outname = $out;
9577 $errname = "$out.err";
9578 $seqname = "$out.seq";
9580 ::write_or_exit($seqname, $self->seq());
9581 $outfhw = ::open_or_exit("+>", $outname);
9582 $errfhw = ::open_or_exit("+>", $errname);
9583 $self->set_fh(1,"unlink","");
9584 $self->set_fh(2,"unlink","");
9585 if($opt::sqlworker) {
9586 # Save the filenames in SQL table
9587 $Global::sql->update("SET Stdout = ?, Stderr = ? ".
9588 "WHERE Seq = ". $self->seq(),
9589 $outname, $errname);
9591 } elsif(not $opt::ungroup) {
9592 # To group we create temporary files for STDOUT and STDERR
9593 # To avoid the cleanup unlink the files immediately (but keep them open)
9594 if($Global::files) {
9595 ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par");
9596 ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par");
9597 # --files => only remove stderr
9598 $self->set_fh(1,"unlink","");
9599 $self->set_fh(2,"unlink",$errname);
9600 } else {
9601 ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par");
9602 ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par");
9603 $self->set_fh(1,"unlink",$outname);
9604 $self->set_fh(2,"unlink",$errname);
9606 } else {
9607 # --ungroup
9608 open($outfhw,">&",$Global::fh{1}) || die;
9609 open($errfhw,">&",$Global::fh{2}) || die;
9610 # File name must be empty as it will otherwise be printed
9611 $outname = "";
9612 $errname = "";
9613 $self->set_fh(1,"unlink",$outname);
9614 $self->set_fh(2,"unlink",$errname);
9616 # Set writing FD
9617 $self->set_fh(1,'w',$outfhw);
9618 $self->set_fh(2,'w',$errfhw);
9619 $self->set_fh(1,'name',$outname);
9620 $self->set_fh(2,'name',$errname);
9621 if($opt::compress) {
9622 $self->filter_through_compress();
9623 } elsif(not $opt::ungroup) {
9624 $self->grouped();
9626 if($Global::linebuffer) {
9627 # Make it possible to read non-blocking from
9628 # the buffer files
9629 # Used for --linebuffer with -k, --files, --res, --compress*
9630 for my $fdno (1,2) {
9631 ::set_fh_non_blocking($self->fh($fdno,'r'));
9636 sub print_verbose_dryrun($) {
9637 # If -v set: print command to stdout (possibly buffered)
9638 # This must be done before starting the command
9639 my $self = shift;
9640 if($Global::verbose or $opt::dryrun) {
9641 my $fh = $self->fh(1,"w");
9642 if($Global::verbose <= 1) {
9643 print $fh $self->replaced(),"\n";
9644 } else {
9645 # Verbose level > 1: Print the rsync and stuff
9646 print $fh $self->wrapped(),"\n";
9649 if($opt::sqlworker) {
9650 $Global::sql->update("SET Command = ? WHERE Seq = ".$self->seq(),
9651 $self->replaced());
9655 sub add_rm($) {
9656 # Files to remove when job is done
9657 my $self = shift;
9658 push @{$self->{'unlink'}}, @_;
9661 sub get_rm($) {
9662 # Files to remove when job is done
9663 my $self = shift;
9664 return @{$self->{'unlink'}};
9667 sub cleanup($) {
9668 # Remove files when job is done
9669 my $self = shift;
9670 unlink $self->get_rm();
9671 delete @Global::unlink{$self->get_rm()};
9674 sub grouped($) {
9675 my $self = shift;
9676 # Set reading FD if using --group (--ungroup does not need)
9677 for my $fdno (1,2) {
9678 # Re-open the file for reading
9679 # so fdw can be closed seperately
9680 # and fdr can be seeked seperately (for --line-buffer)
9681 my $fdr = ::open_or_exit("<", $self->fh($fdno,'name'));
9682 $self->set_fh($fdno,'r',$fdr);
9683 # Unlink if not debugging
9684 $Global::debug or ::rm($self->fh($fdno,"unlink"));
9688 sub empty_input_wrapper($) {
9689 # If no input: exit(0)
9690 # If some input: Pass input as input to command on STDIN
9691 # This avoids starting the command if there is no input.
9692 # Input:
9693 # $command = command to pipe data to
9694 # Returns:
9695 # $wrapped_command = the wrapped command
9696 my $command = shift;
9697 # The optimal block size differs
9698 # It has been measured on:
9699 # AMD 6376: 59000
9700 # <big ppar --pipe --block 100M --test $1 -j1 'cat >/dev/null';
9701 my $script =
9702 ::spacefree(0,q{
9703 if(sysread(STDIN, $buf, 1)) {
9704 open($fh, "|-", @ARGV) || die;
9705 syswrite($fh, $buf);
9706 while($read = sysread(STDIN, $buf, 59000)) {
9707 syswrite($fh, $buf);
9709 close $fh;
9710 exit ($?&127 ? 128+($?&127) : 1+$?>>8)
9713 ::debug("run",'Empty wrap: perl -e '.::Q($script)."\n");
9714 if($Global::cshell
9716 length $command > 499) {
9717 # csh does not like words longer than 1000 (499 quoted)
9718 # $command = "perl -e '".base64_zip_eval()."' ".
9719 # join" ",string_zip_base64(
9720 # 'exec "'.::perl_quote_scalar($command).'"');
9721 return 'perl -e '.::Q($script)." ".
9722 base64_wrap("exec \"$Global::shell\",'-c',\"".
9723 ::perl_quote_scalar($command).'"');
9724 } else {
9725 return 'perl -e '.::Q($script)." ".
9726 $Global::shell." -c ".::Q($command);
9730 sub filter_through_compress($) {
9731 my $self = shift;
9732 # Send stdout to stdin for $opt::compress_program(1)
9733 # Send stderr to stdin for $opt::compress_program(2)
9734 # cattail get pid: $pid = $self->fh($fdno,'rpid');
9735 my $cattail = cattail();
9737 for my $fdno (1,2) {
9738 # Make a communication file.
9739 my ($fh, $comfile) = ::tmpfile(SUFFIX => ".pac");
9740 close $fh;
9741 # Compressor: (echo > $comfile; compress pipe) > output
9742 # When the echo is written to $comfile,
9743 # it is known that output file is opened,
9744 # thus output file can then be removed by the decompressor.
9745 # empty_input_wrapper is needed for plzip
9746 my $qcom = ::Q($comfile);
9747 my $wpid = open(my $fdw,"|-", "(echo > $qcom; ".
9748 empty_input_wrapper($opt::compress_program).") >".
9749 ::Q($self->fh($fdno,'name'))) || die $?;
9750 $self->set_fh($fdno,'w',$fdw);
9751 $self->set_fh($fdno,'wpid',$wpid);
9752 # Decompressor: open output; -s $comfile > 0: rm $comfile output;
9753 # decompress output > stdout
9754 my $rpid = open(my $fdr, "-|", "perl", "-e", $cattail, $comfile,
9755 $opt::decompress_program, $wpid,
9756 $self->fh($fdno,'name'),$self->fh($fdno,'unlink'))
9757 || die $?;
9758 $self->set_fh($fdno,'r',$fdr);
9759 $self->set_fh($fdno,'rpid',$rpid);
9763 sub set_fh($$$$) {
9764 # Set file handle
9765 my ($self, $fd_no, $key, $fh) = @_;
9766 $self->{'fd'}{$fd_no,$key} = $fh;
9769 sub fh($) {
9770 # Get file handle
9771 my ($self, $fd_no, $key) = @_;
9772 return $self->{'fd'}{$fd_no,$key};
9775 sub write_block($) {
9776 my $self = shift;
9777 my $stdin_fh = $self->fh(0,"w");
9778 if(fork()) {
9779 # Close in parent
9780 close $stdin_fh;
9781 } else {
9782 # If writing is to a closed pipe:
9783 # Do not call signal handler, but let nothing be written
9784 local $SIG{PIPE} = undef;
9786 for my $part (
9787 grep { defined $_ }
9788 $self->{'header'},$self->{'block'}) {
9789 # syswrite may not write all in one go,
9790 # so make sure everything is written.
9791 my $written;
9792 while($written = syswrite($stdin_fh,$$part)) {
9793 substr($$part,0,$written) = "";
9796 close $stdin_fh;
9797 exit(0);
9801 sub write($) {
9802 my $self = shift;
9803 my $remaining_ref = shift;
9804 my $stdin_fh = $self->fh(0,"w");
9806 my $len = length $$remaining_ref;
9807 # syswrite may not write all in one go,
9808 # so make sure everything is written.
9809 my $written;
9811 # If writing is to a closed pipe:
9812 # Do not call signal handler, but let nothing be written
9813 local $SIG{PIPE} = undef;
9814 while($written = syswrite($stdin_fh,$$remaining_ref)){
9815 substr($$remaining_ref,0,$written) = "";
9819 sub set_block($$$$$$) {
9820 # Copy stdin buffer from $block_ref up to $endpos
9821 # Prepend with $header_ref if virgin (i.e. not --roundrobin)
9822 # Remove $recstart and $recend if needed
9823 # Input:
9824 # $header_ref = ref to $header to prepend
9825 # $buffer_ref = ref to $buffer containing the block
9826 # $endpos = length of $block to pass on
9827 # $recstart = --recstart regexp
9828 # $recend = --recend regexp
9829 # Returns:
9830 # N/A
9831 my $self = shift;
9832 my ($header_ref,$buffer_ref,$endpos,$recstart,$recend) = @_;
9833 $self->{'header'} = $header_ref;
9834 if($opt::roundrobin or $opt::remove_rec_sep or defined $opt::retries) {
9835 my $a = "";
9836 if(($opt::roundrobin or defined $opt::retries) and $self->virgin()) {
9837 $a .= $$header_ref;
9839 # Job is no longer virgin
9840 $self->set_virgin(0);
9841 # Make a full copy because $buffer will change
9842 $a .= substr($$buffer_ref,0,$endpos);
9843 $self->{'block'} = \$a;
9844 if($opt::remove_rec_sep) {
9845 remove_rec_sep($self->{'block'},$recstart,$recend);
9847 $self->{'block_length'} = length ${$self->{'block'}};
9848 } else {
9849 $self->set_virgin(0);
9850 for(substr($$buffer_ref,0,$endpos)) {
9851 $self->{'block'} = \$_;
9853 $self->{'block_length'} = $endpos + length ${$self->{'header'}};
9855 $self->{'block_pos'} = 0;
9856 $self->add_transfersize($self->{'block_length'});
9859 sub block_ref($) {
9860 my $self = shift;
9861 return $self->{'block'};
9864 sub block_length($) {
9865 my $self = shift;
9866 return $self->{'block_length'};
9869 sub remove_rec_sep($) {
9870 # Remove --recstart and --recend from $block
9871 # Input:
9872 # $block_ref = reference to $block to be modified
9873 # $recstart = --recstart
9874 # $recend = --recend
9875 # Uses:
9876 # $opt::regexp = Are --recstart/--recend regexp?
9877 # Returns:
9878 # N/A
9879 my ($block_ref,$recstart,$recend) = @_;
9880 # Remove record separator
9881 if($opt::regexp) {
9882 $$block_ref =~ s/$recend$recstart//gom;
9883 $$block_ref =~ s/^$recstart//os;
9884 $$block_ref =~ s/$recend$//os;
9885 } else {
9886 $$block_ref =~ s/\Q$recend$recstart\E//gom;
9887 $$block_ref =~ s/^\Q$recstart\E//os;
9888 $$block_ref =~ s/\Q$recend\E$//os;
9892 sub non_blocking_write($) {
9893 my $self = shift;
9894 my $something_written = 0;
9896 my $in = $self->fh(0,"w");
9897 my $rv = syswrite($in,
9898 substr(${$self->{'block'}},$self->{'block_pos'}));
9899 if (!defined($rv) && $! == ::EAGAIN()) {
9900 # would block - but would have written
9901 $something_written = 0;
9902 # avoid triggering auto expanding block size
9903 $Global::no_autoexpand_block ||= 1;
9904 } elsif ($self->{'block_pos'}+$rv != $self->{'block_length'}) {
9905 # incomplete write
9906 # Remove the written part
9907 $self->{'block_pos'} += $rv;
9908 $something_written = $rv;
9909 } else {
9910 # successfully wrote everything
9911 # Empty block to free memory
9912 my $a = "";
9913 $self->set_block(\$a,\$a,0,"","");
9914 $something_written = $rv;
9916 ::debug("pipe", "Non-block: ", $something_written);
9917 return $something_written;
9921 sub virgin($) {
9922 my $self = shift;
9923 return $self->{'virgin'};
9926 sub set_virgin($$) {
9927 my $self = shift;
9928 $self->{'virgin'} = shift;
9931 sub pid($) {
9932 my $self = shift;
9933 return $self->{'pid'};
9936 sub set_pid($$) {
9937 my $self = shift;
9938 $self->{'pid'} = shift;
9941 sub starttime($) {
9942 # Returns:
9943 # UNIX-timestamp this job started
9944 my $self = shift;
9945 return sprintf("%.3f",$self->{'starttime'});
9948 sub set_starttime($@) {
9949 my $self = shift;
9950 my $starttime = shift || ::now();
9951 $self->{'starttime'} = $starttime;
9952 $opt::sqlworker and
9953 $Global::sql->update("SET Starttime = ? WHERE Seq = ".$self->seq(),
9954 $starttime);
9957 sub runtime($) {
9958 # Returns:
9959 # Run time in seconds with 3 decimals
9960 my $self = shift;
9961 return sprintf("%.3f",
9962 int(($self->endtime() - $self->starttime())*1000)/1000);
9965 sub endtime($) {
9966 # Returns:
9967 # UNIX-timestamp this job ended
9968 # 0 if not ended yet
9969 my $self = shift;
9970 return ($self->{'endtime'} || 0);
9973 sub set_endtime($$) {
9974 my $self = shift;
9975 my $endtime = shift;
9976 $self->{'endtime'} = $endtime;
9977 $opt::sqlworker and
9978 $Global::sql->update("SET JobRuntime = ? WHERE Seq = ".$self->seq(),
9979 $self->runtime());
9982 sub is_timedout($) {
9983 # Is the job timedout?
9984 # Input:
9985 # $delta_time = time that the job may run
9986 # Returns:
9987 # True or false
9988 my $self = shift;
9989 my $delta_time = shift;
9990 return time > $self->{'starttime'} + $delta_time;
9993 sub kill($) {
9994 my $self = shift;
9995 $self->set_exitstatus(-1);
9996 ::kill_sleep_seq($self->pid());
9999 sub killreason($) {
10000 my $self = shift;
10001 return $self->{'killreason'};
10004 sub set_killreason($) {
10005 my $self = shift;
10006 $self->{'killreason'} = shift;
10009 sub suspend($) {
10010 my $self = shift;
10011 my @pgrps = map { -$_ } $self->pid();
10012 kill "STOP", @pgrps;
10013 $self->set_suspended(1);
10016 sub set_suspended($$) {
10017 my $self = shift;
10018 $self->{'suspended'} = shift;
10021 sub suspended($) {
10022 my $self = shift;
10023 return $self->{'suspended'};
10026 sub resume($) {
10027 my $self = shift;
10028 my @pgrps = map { -$_ } $self->pid();
10029 kill "CONT", @pgrps;
10030 $self->set_suspended(0);
10033 sub failed($) {
10034 # return number of times failed for this $sshlogin
10035 # Input:
10036 # $sshlogin
10037 # Returns:
10038 # Number of times failed for $sshlogin
10039 my $self = shift;
10040 my $sshlogin = shift;
10041 return $self->{'failed'}{$sshlogin};
10044 sub failed_here($) {
10045 # return number of times failed for the current $sshlogin
10046 # Returns:
10047 # Number of times failed for this sshlogin
10048 my $self = shift;
10049 return $self->{'failed'}{$self->sshlogin()};
10052 sub add_failed($) {
10053 # increase the number of times failed for this $sshlogin
10054 my $self = shift;
10055 my $sshlogin = shift;
10056 $self->{'failed'}{$sshlogin}++;
10059 sub add_failed_here($) {
10060 # increase the number of times failed for the current $sshlogin
10061 my $self = shift;
10062 $self->{'failed'}{$self->sshlogin()}++;
10065 sub reset_failed($) {
10066 # increase the number of times failed for this $sshlogin
10067 my $self = shift;
10068 my $sshlogin = shift;
10069 delete $self->{'failed'}{$sshlogin};
10072 sub reset_failed_here($) {
10073 # increase the number of times failed for this $sshlogin
10074 my $self = shift;
10075 delete $self->{'failed'}{$self->sshlogin()};
10078 sub min_failed($) {
10079 # Returns:
10080 # the number of sshlogins this command has failed on
10081 # the minimal number of times this command has failed
10082 my $self = shift;
10083 my $min_failures =
10084 ::min(map { $self->{'failed'}{$_} } keys %{$self->{'failed'}});
10085 my $number_of_sshlogins_failed_on = scalar keys %{$self->{'failed'}};
10086 return ($number_of_sshlogins_failed_on,$min_failures);
10089 sub total_failed($) {
10090 # Returns:
10091 # $total_failures = the number of times this command has failed
10092 my $self = shift;
10093 my $total_failures = 0;
10094 for (values %{$self->{'failed'}}) {
10095 $total_failures += $_;
10097 return $total_failures;
10101 my $script;
10103 sub postpone_exit_and_cleanup {
10104 # Command to remove files and dirs (given as args) without
10105 # affecting the exit value in $?/$status.
10106 if(not $script) {
10107 $script = "perl -e '".
10108 ::spacefree(0,q{
10109 $bash=shift;
10110 $csh=shift;
10111 for(@ARGV){
10112 unlink;
10113 rmdir;
10115 if($bash=~s/(\d+)h/$1/) {
10116 exit $bash;
10118 exit $csh;
10120 # `echo \$?h` is needed to make fish not complain
10121 "' ".'"`echo \\\\\\\\\$?h`" "$status" ';
10123 return $script
10128 my $script;
10130 sub fifo_wrap() {
10131 # Script to create a fifo, run a command on the fifo
10132 # while copying STDIN to the fifo, and finally
10133 # remove the fifo and return the exit code of the command.
10134 if(not $script) {
10135 # {} == $PARALLEL_TMP for --fifo
10136 # To make it csh compatible a wrapper needs to:
10137 # * mkfifo
10138 # * spawn $command &
10139 # * cat > fifo
10140 # * waitpid to get the exit code from $command
10141 # * be less than 1000 chars long
10143 # The optimal block size differs
10144 # It has been measured on:
10145 # AMD 6376: 4095
10146 # ppar -a big --pipepart --block -1 --test $1 --fifo 'cat {} >/dev/null';
10147 $script = "perl -e '".
10148 (::spacefree
10149 (0, q{
10150 ($s,$c,$f) = @ARGV;
10151 # mkfifo $PARALLEL_TMP
10152 system "mkfifo", $f;
10153 # spawn $shell -c $command &
10154 $pid = fork || exec $s, "-c", $c;
10155 open($o,">",$f) || die $!;
10156 # cat > $PARALLEL_TMP
10157 while(sysread(STDIN,$buf,4095)){
10158 syswrite $o, $buf;
10160 close $o;
10161 # waitpid to get the exit code from $command
10162 waitpid $pid,0;
10163 # Cleanup
10164 unlink $f;
10165 exit $?/256;
10166 }))."'";
10168 return $script;
10172 sub wrapped($) {
10173 # Wrap command with:
10174 # * --shellquote
10175 # * --nice
10176 # * --cat
10177 # * --fifo
10178 # * --sshlogin
10179 # * --pipepart (@Global::cat_prepends)
10180 # * --tee (@Global::cat_prepends)
10181 # * --pipe
10182 # * --tmux
10183 # The ordering of the wrapping is important:
10184 # * --nice/--cat/--fifo should be done on the remote machine
10185 # * --pipepart/--pipe should be done on the local machine inside --tmux
10186 # Uses:
10187 # @opt::shellquote
10188 # $opt::nice
10189 # $Global::shell
10190 # $opt::cat
10191 # $opt::fifo
10192 # @Global::cat_prepends
10193 # $opt::pipe
10194 # $opt::tmux
10195 # Returns:
10196 # $self->{'wrapped'} = the command wrapped with the above
10197 my $self = shift;
10198 if(not defined $self->{'wrapped'}) {
10199 my $command = $self->replaced();
10200 # Bug in Bash and Ksh when running multiline aliases
10201 # This will force them to run correctly, but will fail in
10202 # tcsh so we do not do it.
10203 # $command .= "\n\n";
10204 if(@opt::shellquote) {
10205 # Quote one time for each --shellquote
10206 my $c = $command;
10207 for(@opt::shellquote) {
10208 $c = ::Q($c);
10210 # Prepend "echo" (it is written in perl because
10211 # quoting '-e' causes problem in some versions and
10212 # csh's version does something wrong)
10213 $command = q(perl -e '$,=" "; print "@ARGV\n";' -- ) . ::Q($c);
10215 if($Global::parallel_env) {
10216 # If $PARALLEL_ENV set, put that in front of the command
10217 # Used for env_parallel.*
10218 if($Global::shell =~ /zsh/) {
10219 # The extra 'eval' will make aliases work, too
10220 $command = $Global::parallel_env."\n".
10221 "eval ".::Q($command);
10222 } else {
10223 $command = $Global::parallel_env."\n".$command;
10226 if($opt::cat) {
10227 # In '--cat' and '--fifo' {} == $PARALLEL_TMP.
10228 # This is to make it possible to compute $PARALLEL_TMP on
10229 # the fly when running remotely.
10230 # $ENV{PARALLEL_TMP} is set in the remote wrapper before
10231 # the command is run.
10233 # Prepend 'cat > $PARALLEL_TMP;'
10234 # Append 'unlink $PARALLEL_TMP without affecting $?'
10235 $command =
10236 'cat > "$PARALLEL_TMP";'.
10237 $command.";". postpone_exit_and_cleanup().
10238 '"$PARALLEL_TMP"';
10239 } elsif($opt::fifo) {
10240 # Prepend fifo-wrapper. In essence:
10241 # mkfifo {}
10242 # ( $command ) &
10243 # # $command must read {}, otherwise this 'cat' will block
10244 # cat > {};
10245 # wait; rm {}
10246 # without affecting $?
10247 $command = fifo_wrap(). " ".
10248 $Global::shell. " ". ::Q($command). ' "$PARALLEL_TMP"'. ';';
10250 # Wrap with ssh + tranferring of files
10251 $command = $self->sshlogin_wrap($command);
10252 if(@Global::cat_prepends) {
10253 # --pipepart: prepend:
10254 # < /tmp/foo perl -e 'while(@ARGV) {
10255 # sysseek(STDIN,shift,0) || die; $left = shift;
10256 # while($read = sysread(STDIN,$buf, ($left > 60800 ? 60800 : $left))){
10257 # $left -= $read; syswrite(STDOUT,$buf);
10259 # }' 0 0 0 11 |
10261 # --pipepart --tee: prepend:
10262 # < dash-a-file
10264 # --pipe --tee: wrap:
10265 # (rm fifo; ... ) < fifo
10267 # --pipe --shard X:
10268 # (rm fifo; ... ) < fifo
10269 $command = (shift @Global::cat_prepends). "($command)".
10270 (shift @Global::cat_appends);
10271 } elsif($opt::pipe and not $opt::roundrobin) {
10272 # Wrap with EOF-detector to avoid starting $command if EOF.
10273 $command = empty_input_wrapper($command);
10275 if($opt::tmux) {
10276 # Wrap command with 'tmux'
10277 $command = $self->tmux_wrap($command);
10279 if($Global::cshell
10281 length $command > 499) {
10282 # csh does not like words longer than 1000 (499 quoted)
10283 # $command = "perl -e '".base64_zip_eval()."' ".
10284 # join" ",string_zip_base64(
10285 # 'exec "'.::perl_quote_scalar($command).'"');
10286 $command = base64_wrap("exec \"$Global::shell\",'-c',\"".
10287 ::perl_quote_scalar($command).'"');
10289 $self->{'wrapped'} = $command;
10291 return $self->{'wrapped'};
10294 sub set_sshlogin($$) {
10295 my $self = shift;
10296 my $sshlogin = shift;
10297 $self->{'sshlogin'} = $sshlogin;
10298 delete $self->{'sshlogin_wrap'}; # If sshlogin is changed the wrap is wrong
10299 delete $self->{'wrapped'};
10301 if($opt::sqlworker) {
10302 # Identify worker as --sqlworker often runs on different machines
10303 # If local: Use hostname
10304 my $host = $sshlogin->local() ? ::hostname() : $sshlogin->host();
10305 $Global::sql->update("SET Host = ? WHERE Seq = ".$self->seq(), $host);
10309 sub sshlogin($) {
10310 my $self = shift;
10311 return $self->{'sshlogin'};
10314 sub string_base64($) {
10315 # Base64 encode strings into 1000 byte blocks.
10316 # 1000 bytes is the largest word size csh supports
10317 # Input:
10318 # @strings = to be encoded
10319 # Returns:
10320 # @base64 = 1000 byte block
10321 $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;";
10322 my @base64 = unpack("(A1000)*",encode_base64((join"",@_),""));
10323 return @base64;
10326 sub string_zip_base64($) {
10327 # Pipe string through 'bzip2 -9' and base64 encode it into 1000
10328 # byte blocks.
10329 # 1000 bytes is the largest word size csh supports
10330 # Zipping will make exporting big environments work, too
10331 # Input:
10332 # @strings = to be encoded
10333 # Returns:
10334 # @base64 = 1000 byte block
10335 my($zipin_fh, $zipout_fh,@base64);
10336 ::open3($zipin_fh,$zipout_fh,">&STDERR","bzip2 -9");
10337 if(fork) {
10338 close $zipin_fh;
10339 $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;";
10340 # Split base64 encoded into 1000 byte blocks
10341 @base64 = unpack("(A1000)*",encode_base64((join"",<$zipout_fh>),""));
10342 close $zipout_fh;
10343 } else {
10344 close $zipout_fh;
10345 print $zipin_fh @_;
10346 close $zipin_fh;
10347 exit;
10349 ::debug("base64","Orig:@_\nAs bzip2 base64:@base64\n");
10350 return @base64;
10353 sub base64_zip_eval() {
10354 # Script that:
10355 # * reads base64 strings from @ARGV
10356 # * decodes them
10357 # * pipes through 'bzip2 -dc'
10358 # * evals the result
10359 # Reverse of string_zip_base64 + eval
10360 # Will be wrapped in ' so single quote is forbidden
10361 # Returns:
10362 # $script = 1-liner for perl -e
10363 my $script = ::spacefree(0,q{
10364 @GNU_Parallel = split /_/, "use_IPC::Open3;_use_MIME::Base64";
10365 eval"@GNU_Parallel";
10366 $chld = $SIG{CHLD};
10367 $SIG{CHLD} = "IGNORE";
10368 # Search for bzip2. Not found => use default path
10369 my $zip = (grep { -x $_ } "/usr/local/bin/bzip2")[0] || "bzip2";
10370 # $in = stdin on $zip, $out = stdout from $zip
10371 # Forget my() to save chars for csh
10372 # my($in, $out,$eval);
10373 open3($in,$out,">&STDERR",$zip,"-dc");
10374 if(my $perlpid = fork) {
10375 close $in;
10376 $eval = join "", <$out>;
10377 close $out;
10378 } else {
10379 close $out;
10380 # Pipe decoded base64 into 'bzip2 -dc'
10381 print $in (decode_base64(join"",@ARGV));
10382 close $in;
10383 exit;
10385 wait;
10386 $SIG{CHLD} = $chld;
10387 eval $eval;
10389 ::debug("base64",$script,"\n");
10390 return $script;
10393 sub base64_wrap($) {
10394 # base64 encode Perl code
10395 # Split it into chunks of < 1000 bytes
10396 # Prepend it with a decoder that eval's it
10397 # Input:
10398 # $eval_string = Perl code to run
10399 # Returns:
10400 # $shell_command = shell command that runs $eval_string
10401 my $eval_string = shift;
10402 return
10403 "perl -e ".
10404 ::Q(base64_zip_eval())." ".
10405 join" ",::shell_quote(string_zip_base64($eval_string));
10408 sub base64_eval($) {
10409 # Script that:
10410 # * reads base64 strings from @ARGV
10411 # * decodes them
10412 # * evals the result
10413 # Reverse of string_base64 + eval
10414 # Will be wrapped in ' so single quote is forbidden.
10415 # Spaces are stripped so spaces cannot be significant.
10416 # The funny 'use IPC::Open3'-syntax is to avoid spaces and
10417 # to make it clear that this is a GNU Parallel command
10418 # when looking at the process table.
10419 # Returns:
10420 # $script = 1-liner for perl -e
10421 my $script = ::spacefree(0,q{
10422 @GNU_Parallel=("use","IPC::Open3;","use","MIME::Base64");
10423 eval "@GNU_Parallel";
10424 my $eval = decode_base64(join"",@ARGV);
10425 eval $eval;
10427 ::debug("base64",$script,"\n");
10428 return $script;
10431 sub sshlogin_wrap($) {
10432 # Wrap the command with the commands needed to run remotely
10433 # Input:
10434 # $command = command to run
10435 # Returns:
10436 # $self->{'sshlogin_wrap'} = command wrapped with ssh+transfer commands
10437 sub monitor_parent_sshd_script {
10438 # This script is to solve the problem of
10439 # * not mixing STDERR and STDOUT
10440 # * terminating with ctrl-c
10441 # If its parent is ssh: all good
10442 # If its parent is init(1): ssh died, so kill children
10443 my $monitor_parent_sshd_script;
10445 if(not $monitor_parent_sshd_script) {
10446 $monitor_parent_sshd_script =
10447 # This will be packed in ', so only use "
10448 ::spacefree
10449 (0,'$shell = "'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'.
10450 '$tmpdir = $ENV{"TMPDIR"} || "'.
10451 ::perl_quote_scalar($ENV{'PARALLEL_REMOTE_TMPDIR'}).'";'.
10452 '$nice = '.$opt::nice.';'.
10453 '$termseq = "'.$opt::termseq.'";'.
10456 # Check that $tmpdir is writable
10457 -w $tmpdir ||
10458 die("$tmpdir\040is\040not\040writable.".
10459 "\040Set\040PARALLEL_REMOTE_TMPDIR");
10460 # Set $PARALLEL_TMP to a non-existent file name in $TMPDIR
10461 do {
10462 $ENV{PARALLEL_TMP} = $tmpdir."/par".
10463 join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
10464 } while(-e $ENV{PARALLEL_TMP});
10465 # Set $script to a non-existent file name in $TMPDIR
10466 do {
10467 $script = $tmpdir."/par-job-$ENV{PARALLEL_SEQ}_".
10468 join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
10469 } while(-e $script);
10470 # Create a script from the hex code
10471 # that removes itself and runs the commands
10472 open($fh,">",$script) || die;
10473 # \040 = space - but we remove spaces in the script
10474 # ' needed due to rc-shell
10475 print($fh("rm\040\'$script\'\n",$bashfunc.$cmd));
10476 close $fh;
10477 my $parent = getppid;
10478 my $done = 0;
10479 $SIG{CHLD} = sub { $done = 1; };
10480 $pid = fork;
10481 unless($pid) {
10482 # Make own process group to be able to kill HUP it later
10483 eval { setpgrp };
10484 # Set nice value
10485 eval { setpriority(0,0,$nice) };
10486 # Run the script
10487 exec($shell,$script);
10488 die("exec\040failed: $!");
10490 while((not $done) and (getppid == $parent)) {
10491 # Parent pid is not changed, so sshd is alive
10492 # Exponential sleep up to 1 sec
10493 $s = $s < 1 ? 0.001 + $s * 1.03 : $s;
10494 select(undef, undef, undef, $s);
10496 if(not $done) {
10497 # sshd is dead: User pressed Ctrl-C
10498 # Kill as per --termseq
10499 my @term_seq = split/,/,$termseq;
10500 if(not @term_seq) {
10501 @term_seq = ("TERM",200,"TERM",100,"TERM",50,"KILL",25);
10503 while(@term_seq && kill(0,-$pid)) {
10504 kill(shift @term_seq, -$pid);
10505 select(undef, undef, undef, (shift @term_seq)/1000);
10508 wait;
10509 exit ($?&127 ? 128+($?&127) : 1+$?>>8)
10512 return $monitor_parent_sshd_script;
10515 sub vars_to_export {
10516 # Uses:
10517 # @opt::env
10518 my @vars = ("parallel_bash_environment");
10519 for my $varstring (@opt::env) {
10520 # Split up --env VAR1,VAR2
10521 push @vars, split /,/, $varstring;
10523 for (@vars) {
10524 if(-r $_ and not -d) {
10525 # Read as environment definition bug #44041
10526 # TODO parse this
10527 $Global::envdef = ::slurp_or_exit($_);
10530 if(grep { /^_$/ } @vars) {
10531 local $/ = "\n";
10532 # --env _
10533 # Include all vars that are not in a clean environment
10534 if(open(my $vars_fh, "<", $Global::config_dir . "/ignored_vars")) {
10535 my @ignore = <$vars_fh>;
10536 chomp @ignore;
10537 my %ignore;
10538 @ignore{@ignore} = @ignore;
10539 close $vars_fh;
10540 push @vars, grep { not defined $ignore{$_} } keys %ENV;
10541 @vars = grep { not /^_$/ } @vars;
10542 } else {
10543 ::error("Run '$Global::progname --record-env' ".
10544 "in a clean environment first.");
10545 ::wait_and_exit(255);
10548 # Duplicate vars as BASH functions to include post-shellshock functions (v1+v2)
10549 # So --env myfunc should look for BASH_FUNC_myfunc() and BASH_FUNC_myfunc%%
10551 push(@vars, "PARALLEL_PID", "PARALLEL_SEQ",
10552 "PARALLEL_SSHLOGIN", "PARALLEL_SSHHOST",
10553 "PARALLEL_HOSTGROUPS", "PARALLEL_ARGHOSTGROUPS",
10554 "PARALLEL_JOBSLOT", $opt::process_slot_var,
10555 map { ("BASH_FUNC_$_()", "BASH_FUNC_$_%%") } @vars);
10556 # Keep only defined variables
10557 return grep { defined($ENV{$_}) } @vars;
10560 sub env_as_eval {
10561 # Returns:
10562 # $eval = '$ENV{"..."}=...; ...'
10563 my @vars = vars_to_export();
10564 my $csh_friendly = not grep { /\n/ } @ENV{@vars};
10565 my @bash_functions = grep { substr($ENV{$_},0,4) eq "() {" } @vars;
10566 my @non_functions = (grep { !/PARALLEL_ENV/ }
10567 grep { substr($ENV{$_},0,4) ne "() {" } @vars);
10569 # eval of @envset will set %ENV
10570 my $envset = join"", map {
10571 '$ENV{"'.::perl_quote_scalar($_).'"}="'.
10572 ::perl_quote_scalar($ENV{$_}).'";'; } @non_functions;
10574 # running @bashfunc on the command line, will set the functions
10575 my @bashfunc = map {
10576 my $v=$_;
10577 s/BASH_FUNC_(.*)(\(\)|%%)/$1/;
10578 "$_$ENV{$v};\nexport -f $_ 2> /dev/null;\n" } @bash_functions;
10579 # eval $bashfuncset will set $bashfunc
10580 my $bashfuncset;
10581 if(@bashfunc) {
10582 # Functions are not supported for all shells
10583 if($Global::shell !~ m:(^|/)(ash|bash|rbash|zsh|rzsh|dash|ksh):) {
10584 ::warning("Shell functions may not be supported in $Global::shell.");
10586 $bashfuncset =
10587 '@bash_functions=qw('."@bash_functions".");".
10588 ::spacefree(1,'$shell="'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'.q{
10589 if($shell=~/csh/) {
10590 print STDERR "CSH/TCSH DO NOT SUPPORT newlines IN VARIABLES/FUNCTIONS. Unset @bash_functions\n";
10591 exec "false";
10594 "\n".'$bashfunc = "'.::perl_quote_scalar("@bashfunc").'";';
10595 } else {
10596 $bashfuncset = '$bashfunc = "";'
10598 if($ENV{'parallel_bash_environment'}) {
10599 $bashfuncset .= '$bashfunc .= "eval\ \"\$parallel_bash_environment\"\;";';
10601 ::debug("base64",$envset,$bashfuncset,"\n");
10602 return $csh_friendly,$envset,$bashfuncset;
10605 my $self = shift;
10606 my $command = shift;
10607 # TODO test that *sh -c 'parallel --env' use *sh
10608 if(not defined $self->{'sshlogin_wrap'}{$command}) {
10609 my $sshlogin = $self->sshlogin();
10610 $ENV{'PARALLEL_SEQ'} = $self->seq();
10611 $ENV{$opt::process_slot_var} = -1 +
10612 ($ENV{'PARALLEL_JOBSLOT'} = $self->slot());
10613 $ENV{'PARALLEL_SSHLOGIN'} = $sshlogin->string();
10614 $ENV{'PARALLEL_SSHHOST'} = $sshlogin->host();
10615 if ($opt::hostgroups) {
10616 $ENV{'PARALLEL_HOSTGROUPS'} = join '+', $sshlogin->hostgroups();
10617 $ENV{'PARALLEL_ARGHOSTGROUPS'} = join '+', $self->hostgroups();
10619 $ENV{'PARALLEL_PID'} = $$;
10620 if($sshlogin->local()) {
10621 if($opt::workdir) {
10622 # Create workdir if needed. Then cd to it.
10623 my $wd = $self->workdir();
10624 if($opt::workdir eq "." or $opt::workdir eq "...") {
10625 # If $wd does not start with '/': Prepend $HOME
10626 $wd =~ s:^([^/]):$ENV{'HOME'}/$1:;
10628 ::mkdir_or_die($wd);
10629 my $post = "";
10630 if($opt::workdir eq "...") {
10631 $post = ";".exitstatuswrapper("rm -rf ".::Q($wd).";");
10634 $command = "cd ".::Q($wd)." || exit 255; " .
10635 $command . $post;;
10637 if(@opt::env) {
10638 # Prepend with environment setter, which sets functions in zsh
10639 my ($csh_friendly,$envset,$bashfuncset) = env_as_eval();
10640 my $perl_code = $envset.$bashfuncset.
10641 '@ARGV="'.::perl_quote_scalar($command).'";'.
10642 "exec\"$Global::shell\",\"-c\",\(\$bashfunc.\"\@ARGV\"\)\;die\"exec:\$\!\\n\"\;";
10643 if(length $perl_code > 999
10645 not $csh_friendly
10647 $command =~ /\n/) {
10648 # csh does not deal well with > 1000 chars in one word
10649 # csh does not deal well with $ENV with \n
10650 $self->{'sshlogin_wrap'}{$command} = base64_wrap($perl_code);
10651 } else {
10652 $self->{'sshlogin_wrap'}{$command} = "perl -e ".::Q($perl_code);
10654 } else {
10655 $self->{'sshlogin_wrap'}{$command} = $command;
10657 } else {
10658 my $pwd = "";
10659 if($opt::workdir) {
10660 # Create remote workdir if needed. Then cd to it.
10661 my $wd = ::pQ($self->workdir());
10662 $pwd = qq{system("mkdir","-p","--","$wd"); chdir "$wd" ||}.
10663 qq{print(STDERR "parallel: Cannot chdir to $wd\\n") &&}.
10664 qq{exit 255;};
10666 my ($csh_friendly,$envset,$bashfuncset) = env_as_eval();
10667 my $cmd = $command;
10668 # q// does not quote \, so we must do that
10669 $cmd =~ s/\\/\\\\/g;
10671 my $remote_command = $sshlogin->hexwrap
10672 ($pwd.$envset.$bashfuncset.'$cmd='."q\0".$cmd."\0;".
10673 monitor_parent_sshd_script());
10674 my ($pre,$post,$cleanup)=("","","");
10675 # --transfer
10676 $pre .= $self->sshtransfer();
10677 # --return
10678 $post .= $self->sshreturn();
10679 # --cleanup
10680 $post .= $self->sshcleanup();
10681 if($post) {
10682 # We need to save the exit status of the job
10683 $post = exitstatuswrapper($post);
10685 $self->{'sshlogin_wrap'}{$command} =
10686 ($pre
10687 . $sshlogin->wrap($remote_command)
10688 . ";"
10689 . $post);
10692 return $self->{'sshlogin_wrap'}{$command};
10695 sub fill_templates($) {
10696 # Replace replacement strings in template(s)
10697 # Returns:
10698 # @templates - File names of replaced templates
10699 my $self = shift;
10701 if(%opt::template) {
10702 my @template_name =
10703 map { $self->{'commandline'}->replace_placeholders([$_],0,0) }
10704 @{$self->{'commandline'}{'template_names'}};
10705 ::debug("tmpl","Names: @template_name\n");
10706 for(my $i = 0; $i <= $#template_name; $i++) {
10707 ::write_or_exit
10708 ($template_name[$i],
10709 $self->{'commandline'}->
10710 replace_placeholders([$self->{'commandline'}
10711 {'template_contents'}[$i]],0,0));
10713 if($opt::cleanup) {
10714 $self->add_rm(@template_name);
10719 sub filter($) {
10720 # Replace replacement strings in filter(s) and evaluate them
10721 # Returns:
10722 # $run - 1=yes, undef=no
10723 my $self = shift;
10724 my $run = 1;
10725 if(@opt::filter) {
10726 for my $eval ($self->{'commandline'}->
10727 replace_placeholders(\@opt::filter,0,0)) {
10728 $run &&= eval $eval;
10730 $self->{'commandline'}{'skip'} ||= not $run;
10732 return $run;
10735 sub transfer($) {
10736 # Files to transfer
10737 # Non-quoted and with {...} substituted
10738 # Returns:
10739 # @transfer - File names of files to transfer
10740 my $self = shift;
10742 my $transfersize = 0;
10743 my @transfer = $self->{'commandline'}->
10744 replace_placeholders($self->{'commandline'}{'transfer_files'},0,0);
10745 for(@transfer) {
10746 # filesize
10747 if(-e $_) {
10748 $transfersize += (stat($_))[7];
10751 $self->add_transfersize($transfersize);
10752 return @transfer;
10755 sub transfersize($) {
10756 my $self = shift;
10757 return $self->{'transfersize'};
10760 sub add_transfersize($) {
10761 my $self = shift;
10762 my $transfersize = shift;
10763 $self->{'transfersize'} += $transfersize;
10764 $opt::sqlworker and
10765 $Global::sql->update("SET Send = ? WHERE Seq = ".$self->seq(),
10766 $self->{'transfersize'});
10769 sub sshtransfer($) {
10770 # Returns for each transfer file:
10771 # rsync $file remote:$workdir
10772 my $self = shift;
10773 my @pre;
10774 my $sshlogin = $self->sshlogin();
10775 my $workdir = $self->workdir();
10776 for my $file ($self->transfer()) {
10777 push @pre, $sshlogin->rsync_transfer_cmd($file,$workdir).";";
10779 return join("",@pre);
10782 sub return($) {
10783 # Files to return
10784 # Non-quoted and with {...} substituted
10785 # Returns:
10786 # @non_quoted_filenames
10787 my $self = shift;
10788 return $self->{'commandline'}->
10789 replace_placeholders($self->{'commandline'}{'return_files'},0,0);
10792 sub returnsize($) {
10793 # This is called after the job has finished
10794 # Returns:
10795 # $number_of_bytes transferred in return
10796 my $self = shift;
10797 for my $file ($self->return()) {
10798 if(-e $file) {
10799 $self->{'returnsize'} += (stat($file))[7];
10802 return $self->{'returnsize'};
10805 sub add_returnsize($) {
10806 my $self = shift;
10807 my $returnsize = shift;
10808 $self->{'returnsize'} += $returnsize;
10809 $opt::sqlworker and
10810 $Global::sql->update("SET Receive = ? WHERE Seq = ".$self->seq(),
10811 $self->{'returnsize'});
10814 sub sshreturn($) {
10815 # Returns for each return-file:
10816 # rsync remote:$workdir/$file .
10817 my $self = shift;
10818 my $sshlogin = $self->sshlogin();
10819 my $pre = "";
10820 for my $file ($self->return()) {
10821 $file =~ s:^\./::g; # Remove ./ if any
10822 my $relpath = ($file !~ m:^/:) ||
10823 ($file =~ m:/\./:); # Is the path relative or /./?
10824 my $cd = "";
10825 my $wd = "";
10826 if($relpath) {
10827 # rsync -avR /foo/./bar/baz.c remote:/tmp/
10828 # == (on old systems)
10829 # rsync -avR --rsync-path="cd /foo; rsync" remote:bar/baz.c /tmp/
10830 $wd = ::shell_quote_file($self->workdir()."/");
10832 # Only load File::Basename if actually needed
10833 $Global::use{"File::Basename"} ||= eval "use File::Basename; 1;";
10834 # dir/./file means relative to dir, so remove dir on remote
10835 $file =~ m:(.*)/\./:;
10836 my $basedir = $1 ? ::shell_quote_file($1."/") : "";
10837 my $nobasedir = $file;
10838 $nobasedir =~ s:.*/\./::;
10839 $cd = ::shell_quote_file(::dirname($nobasedir));
10840 my $rsync_cd = '--rsync-path='.::Q("cd $wd$cd; rsync");
10841 my $basename = ::Q(::shell_quote_file(::basename($file)));
10842 # --return
10843 # mkdir -p /home/tange/dir/subdir/;
10844 # rsync (--protocol 30) -rlDzR
10845 # --rsync-path="cd /home/tange/dir/subdir/; rsync"
10846 # server:file.gz /home/tange/dir/subdir/
10847 $pre .= "mkdir -p $basedir$cd" . " && " .
10848 $sshlogin->rsync(). " $rsync_cd -- ".$sshlogin->host().':'.
10849 $basename . " ".$basedir.$cd.";";
10851 return $pre;
10854 sub sshcleanup($) {
10855 # Return the sshcommand needed to remove the file
10856 # Returns:
10857 # ssh command needed to remove files from sshlogin
10858 my $self = shift;
10859 my $sshlogin = $self->sshlogin();
10860 my $workdir = $self->workdir();
10861 my $cleancmd = "";
10863 for my $file ($self->remote_cleanup()) {
10864 my @subworkdirs = parentdirs_of($file);
10865 $cleancmd .= $sshlogin->cleanup_cmd($file,$workdir).";";
10867 if(defined $opt::workdir and $opt::workdir eq "...") {
10868 $cleancmd .= $sshlogin->wrap("rm -rf " . ::Q($workdir).';');
10870 return $cleancmd;
10873 sub remote_cleanup($) {
10874 # Returns:
10875 # Files to remove at cleanup
10876 my $self = shift;
10877 if($opt::cleanup) {
10878 my @transfer = $self->transfer();
10879 my @return = $self->return();
10880 return (@transfer,@return);
10881 } else {
10882 return ();
10886 sub exitstatuswrapper(@) {
10887 # Input:
10888 # @shellcode = shell code to execute
10889 # Returns:
10890 # shell script that returns current status after executing @shellcode
10891 if($Global::cshell) {
10892 return ('set _EXIT_status=$status; ' .
10893 join(" ",@_).
10894 'exit $_EXIT_status;');
10895 } elsif($Global::fish) {
10896 return ('export _EXIT_status=$status; ' .
10897 join(" ",@_).
10898 'exit $_EXIT_status;');
10899 } else {
10900 return ('_EXIT_status=$?; ' .
10901 join(" ",@_).
10902 'exit $_EXIT_status;');
10906 sub workdir($) {
10907 # Returns:
10908 # the workdir on a remote machine
10909 my $self = shift;
10910 if(not defined $self->{'workdir'}) {
10911 my $workdir;
10912 if(defined $opt::workdir) {
10913 if($opt::workdir eq ".") {
10914 # . means current dir
10915 my $home = $ENV{'HOME'};
10916 eval 'use Cwd';
10917 my $cwd = cwd();
10918 $workdir = $cwd;
10919 if($home) {
10920 # If homedir exists: remove the homedir from
10921 # workdir if cwd starts with homedir
10922 # E.g. /home/foo/my/dir => my/dir
10923 # E.g. /tmp/my/dir => /tmp/my/dir
10924 my ($home_dev, $home_ino) = (stat($home))[0,1];
10925 my $parent = "";
10926 my @dir_parts = split(m:/:,$cwd);
10927 my $part;
10928 while(defined ($part = shift @dir_parts)) {
10929 $part eq "" and next;
10930 $parent .= "/".$part;
10931 my ($parent_dev, $parent_ino) = (stat($parent))[0,1];
10932 if($parent_dev == $home_dev and $parent_ino == $home_ino) {
10933 # dev and ino is the same: We found the homedir.
10934 $workdir = join("/",@dir_parts);
10935 last;
10939 if($workdir eq "") {
10940 $workdir = ".";
10942 } elsif($opt::workdir eq "...") {
10943 $workdir = ".parallel/tmp/" . ::hostname() . "-" . $$
10944 . "-" . $self->seq();
10945 } else {
10946 $workdir = $self->{'commandline'}->
10947 replace_placeholders([$opt::workdir],0,0);
10948 #$workdir = $opt::workdir;
10949 # Rsync treats /./ special. We dont want that
10950 $workdir =~ s:/\./:/:g; # Remove /./
10951 $workdir =~ s:(.)/+$:$1:; # Remove ending / if any
10952 $workdir =~ s:^\./::g; # Remove starting ./ if any
10954 } else {
10955 $workdir = ".";
10957 $self->{'workdir'} = $workdir;
10959 return $self->{'workdir'};
10962 sub parentdirs_of($) {
10963 # Return:
10964 # all parentdirs except . of this dir or file - sorted desc by length
10965 my $d = shift;
10966 my @parents = ();
10967 while($d =~ s:/[^/]+$::) {
10968 if($d ne ".") {
10969 push @parents, $d;
10972 return @parents;
10975 sub start($) {
10976 # Setup STDOUT and STDERR for a job and start it.
10977 # Returns:
10978 # job-object or undef if job not to run
10980 sub open3_setpgrp_internal {
10981 # Run open3+setpgrp followed by the command
10982 # Input:
10983 # $stdin_fh = Filehandle to use as STDIN
10984 # $stdout_fh = Filehandle to use as STDOUT
10985 # $stderr_fh = Filehandle to use as STDERR
10986 # $command = Command to run
10987 # Returns:
10988 # $pid = Process group of job started
10989 my ($stdin_fh,$stdout_fh,$stderr_fh,$command) = @_;
10990 my $pid;
10991 local (*OUT,*ERR);
10992 open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!");
10993 open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!");
10994 # The eval is needed to catch exception from open3
10995 eval {
10996 if(not $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", "-")) {
10997 # Each child gets its own process group to make it safe to killall
10998 eval{ setpgrp(0,0) };
10999 eval{ setpriority(0,0,$opt::nice) };
11000 exec($Global::shell,"-c",$command)
11001 || ::die_bug("open3-$stdin_fh ".substr($command,0,200));
11004 return $pid;
11007 sub open3_setpgrp_external {
11008 # Run open3 on $command wrapped with a perl script doing setpgrp
11009 # Works on systems that do not support open3(,,,"-")
11010 # Input:
11011 # $stdin_fh = Filehandle to use as STDIN
11012 # $stdout_fh = Filehandle to use as STDOUT
11013 # $stderr_fh = Filehandle to use as STDERR
11014 # $command = Command to run
11015 # Returns:
11016 # $pid = Process group of job started
11017 my ($stdin_fh,$stdout_fh,$stderr_fh,$command) = @_;
11018 local (*OUT,*ERR);
11019 open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!");
11020 open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!");
11022 my $pid;
11023 my @setpgrp_wrap =
11024 ('perl','-e',
11025 "eval\{setpgrp\}\;eval\{setpriority\(0,0,$opt::nice\)\}\;".
11026 "exec '$Global::shell', '-c', \@ARGV");
11027 # The eval is needed to catch exception from open3
11028 eval {
11029 $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", @setpgrp_wrap, $command)
11030 || ::die_bug("open3-$stdin_fh");
11033 return $pid;
11036 sub redefine_open3_setpgrp {
11037 my $setgprp_cache = shift;
11038 # Select and run open3_setpgrp_internal/open3_setpgrp_external
11039 no warnings 'redefine';
11040 my ($outfh,$name) = ::tmpfile(SUFFIX => ".tst");
11041 # Test to see if open3(x,x,x,"-") is fully supported
11042 # Can an exported bash function be called via open3?
11043 my $script = 'if($pid=::open3($i,$o,$e,"-")) { wait; } '.
11044 'else { exec("bash","-c","testfun && true"); }';
11045 my $bash =
11046 ::shell_quote_scalar_default(
11047 "testfun() { rm $name; }; export -f testfun; ".
11048 "perl -MIPC::Open3 -e ".
11049 ::Q(::Q($script))
11051 my $redefine_eval;
11052 # Redirect STDERR temporarily,
11053 # so errors on MacOS X are ignored.
11054 open my $saveerr, ">&STDERR";
11055 open STDERR, '>', "/dev/null";
11056 # Run the test
11057 ::debug("init",qq{bash -c $bash 2>/dev/null});
11058 qx{ bash -c $bash 2>/dev/null };
11059 open STDERR, ">&", $saveerr;
11061 if(-e $name) {
11062 # Does not support open3(x,x,x,"-")
11063 # or does not have bash:
11064 # Use (slow) external version
11065 unlink($name);
11066 $redefine_eval = '*open3_setpgrp = \&open3_setpgrp_external';
11067 ::debug("init","open3_setpgrp_external chosen\n");
11068 } else {
11069 # Supports open3(x,x,x,"-")
11070 # This is 0.5 ms faster to run
11071 $redefine_eval = '*open3_setpgrp = \&open3_setpgrp_internal';
11072 ::debug("init","open3_setpgrp_internal chosen\n");
11074 if(open(my $fh, ">", $setgprp_cache)) {
11075 print $fh $redefine_eval;
11076 close $fh;
11077 } else {
11078 ::debug("init","Cannot write to $setgprp_cache");
11080 eval $redefine_eval;
11083 sub open3_setpgrp {
11084 my $setgprp_cache = $Global::cache_dir . "/tmp/sshlogin/" .
11085 ::hostname() . "/setpgrp_func";
11086 sub read_cache() {
11087 -e $setgprp_cache || return 0;
11088 local $/ = undef;
11089 open(my $fh, "<", $setgprp_cache) || return 0;
11090 eval <$fh> || return 0;
11091 close $fh;
11092 return 1;
11094 if(not read_cache()) {
11095 redefine_open3_setpgrp($setgprp_cache);
11097 # The sub is now redefined. Call it
11098 return open3_setpgrp(@_);
11101 my $job = shift;
11102 # Get the shell command to be executed (possibly with ssh infront).
11103 my $command = $job->wrapped();
11104 my $pid;
11106 if($Global::interactive or $Global::stderr_verbose) {
11107 $job->interactive_start();
11109 # Must be run after $job->interactive_start():
11110 # $job->interactive_start() may call $job->skip()
11111 if($job->{'commandline'}{'skip'}
11113 not $job->filter()) {
11114 # $job->skip() was called or job filtered
11115 $command = "true";
11117 $job->openoutputfiles();
11118 $job->print_verbose_dryrun();
11119 my($stdout_fh,$stderr_fh) = ($job->fh(1,"w"),$job->fh(2,"w"));
11120 if($opt::dryrun or $opt::sqlmaster) { $command = "true"; }
11121 $ENV{'PARALLEL_SEQ'} = $job->seq();
11122 $ENV{'PARALLEL_PID'} = $$;
11123 $ENV{$opt::process_slot_var} = -1 +
11124 ($ENV{'PARALLEL_JOBSLOT'} = $job->slot());
11125 $ENV{'PARALLEL_TMP'} = ::tmpname("par");
11126 $job->add_rm($ENV{'PARALLEL_TMP'});
11127 $job->fill_templates();
11128 $ENV{'SSHPASS'} = $job->{'sshlogin'}->{'password'};
11129 ::debug("run", $Global::total_running, " processes . Starting (",
11130 $job->seq(), "): $command\n");
11131 if($opt::pipe) {
11132 my ($stdin_fh) = ::gensym();
11133 $pid = open3_setpgrp($stdin_fh,$stdout_fh,$stderr_fh,$command);
11134 if($opt::roundrobin and not $opt::keeporder) {
11135 # --keep-order will make sure the order will be reproducible
11136 ::set_fh_non_blocking($stdin_fh);
11138 $job->set_fh(0,"w",$stdin_fh);
11139 if($opt::tee or $opt::shard or $opt::bin) { $job->set_virgin(0); }
11140 } elsif(($opt::tty or $opt::open_tty) and -c "/dev/tty" and
11141 open(my $devtty_fh, "<", "/dev/tty")) {
11142 # Give /dev/tty to the command if no one else is using it
11143 # The eval is needed to catch exception from open3
11144 local (*IN,*OUT,*ERR);
11145 open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!");
11146 open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!");
11147 *IN = $devtty_fh;
11148 # The eval is needed to catch exception from open3
11149 my @wrap = ('perl','-e',
11150 "eval\{setpriority\(0,0,$opt::nice\)\}\;".
11151 "exec '$Global::shell', '-c', \@ARGV");
11152 eval {
11153 $pid = ::open3("<&IN", ">&OUT", ">&ERR", @wrap, $command)
11154 || ::die_bug("open3-/dev/tty");
11157 close $devtty_fh;
11158 $job->set_virgin(0);
11159 } elsif($Global::semaphore) {
11160 # Allow sem to read from stdin
11161 $pid = open3_setpgrp("<&STDIN",$stdout_fh,$stderr_fh,$command);
11162 $job->set_virgin(0);
11163 } else {
11164 $pid = open3_setpgrp(::gensym(),$stdout_fh,$stderr_fh,$command);
11165 $job->set_virgin(0);
11167 if($pid) {
11168 # A job was started
11169 $Global::total_running++;
11170 $Global::total_started++;
11171 $job->set_pid($pid);
11172 $job->set_starttime();
11173 $Global::running{$job->pid()} = $job;
11174 if($opt::timeout) {
11175 $Global::timeoutq->insert($job);
11177 $Global::newest_job = $job;
11178 $Global::newest_starttime = ::now();
11179 return $job;
11180 } else {
11181 # No more processes
11182 ::debug("run", "Cannot spawn more jobs.\n");
11183 return undef;
11187 sub interactive_start($) {
11188 my $self = shift;
11189 my $command = $self->wrapped();
11190 if($Global::interactive) {
11191 my $answer;
11192 ::status_no_nl("$command ?...");
11194 my $tty_fh = ::open_or_exit("<","/dev/tty");
11195 $answer = <$tty_fh>;
11196 close $tty_fh;
11197 # Sometime we get an empty string (not even \n)
11198 # Do not know why, so let us just ignore it and try again
11199 } while(length $answer < 1);
11200 if (not ($answer =~ /^\s*y/i)) {
11201 $self->{'commandline'}->skip();
11203 } else {
11204 print $Global::original_stderr "$command\n";
11209 my $tmuxsocket;
11210 my $qsocket;
11212 sub tmux_wrap($) {
11213 # Wrap command with tmux for session pPID
11214 # Input:
11215 # $actual_command = the actual command being run (incl ssh wrap)
11216 my $self = shift;
11217 my $actual_command = shift;
11218 # Temporary file name. Used for fifo to communicate exit val
11219 my $tmpfifo = ::tmpname("tmx");
11220 $self->add_rm($tmpfifo);
11221 if(length($tmpfifo) >=100) {
11222 ::error("tmux does not support sockets with path > 100.");
11223 ::wait_and_exit(255);
11225 if($opt::tmuxpane) {
11226 # Move the command into a pane in window 0
11227 $actual_command = $ENV{'PARALLEL_TMUX'}.' joinp -t :0 ; '.
11228 $ENV{'PARALLEL_TMUX'}.' select-layout -t :0 tiled ; '.
11229 $actual_command;
11231 my $visual_command = $self->replaced();
11232 my $title = $visual_command;
11233 if($visual_command =~ /\0/) {
11234 ::error("Command line contains NUL. tmux is confused by NUL.");
11235 ::wait_and_exit(255);
11237 # ; causes problems
11238 # ascii 194-245 annoys tmux
11239 $title =~ tr/[\011-\016;\302-\365]/ /s;
11240 $title = ::Q($title);
11242 my $l_act = length($actual_command);
11243 my $l_tit = length($title);
11244 my $l_fifo = length($tmpfifo);
11245 # The line to run contains a 118 chars extra code + the title 2x
11246 my $l_tot = 2 * $l_tit + $l_act + $l_fifo;
11248 my $quoted_space75 = ::Q(" ")x75;
11249 while($l_tit < 1000 and
11251 (890 < $l_tot and $l_tot < 1350)
11253 (9250 < $l_tot and $l_tot < 9800)
11254 )) {
11255 # tmux blocks for certain lengths:
11256 # 900 < title + command < 1200
11257 # 9250 < title + command < 9800
11258 # but only if title < 1000, so expand the title with 75 spaces
11259 # The measured lengths are:
11260 # 996 < (title + whole command) < 1127
11261 # 9331 < (title + whole command) < 9636
11262 $title .= $quoted_space75;
11263 $l_tit = length($title);
11264 $l_tot = 2 * $l_tit + $l_act + $l_fifo;
11267 my $tmux;
11268 $ENV{'PARALLEL_TMUX'} ||= "tmux";
11269 if(not $tmuxsocket) {
11270 $tmuxsocket = ::tmpname("tms");
11271 $qsocket = ::Q($tmuxsocket);
11272 ::debug("tmux", "Start: $ENV{'PARALLEL_TMUX'} -S $qsocket attach");
11273 if($opt::fg) {
11274 if(not fork) {
11275 # Run tmux in the foreground
11276 # Wait for the socket to appear
11277 while (not -e $tmuxsocket) { }
11278 `$ENV{'PARALLEL_TMUX'} -S $qsocket attach`;
11279 exit;
11282 ::status("See output with: $ENV{'PARALLEL_TMUX'} -S $qsocket attach");
11284 $tmux = "sh -c ".::Q(
11285 $ENV{'PARALLEL_TMUX'}.
11286 " -S $qsocket new-session -s p$$ -d \"sleep .2\" >/dev/null 2>&1").";" .
11287 $ENV{'PARALLEL_TMUX'}.
11288 " -S $qsocket new-window -t p$$ -n $title";
11290 ::debug("tmux", "title len:", $l_tit, " act ", $l_act, " max ",
11291 $Limits::Command::line_max_len, " tot ",
11292 $l_tot, "\n");
11293 return "mkfifo ".::Q($tmpfifo)." && $tmux ".
11294 # Run in tmux
11297 "(".$actual_command.');'.
11298 # The triple print is needed - otherwise the testsuite fails
11299 q[ perl -e 'while($t++<3){ print $ARGV[0],"\n" }' $?h/$status >> ].
11300 ::Q($tmpfifo)."&".
11301 "echo $title; echo \007Job finished at: `date`;sleep 10"
11303 # Run outside tmux
11304 # Read a / separated line: 0h/2 for csh, 2/0 for bash.
11305 # If csh the first will be 0h, so use the second as exit value.
11306 # Otherwise just use the first value as exit value.
11307 q{; exec perl -e '$/="/";$_=<>;$c=<>;unlink $ARGV; }.
11308 q{/(\d+)h/ and exit($1);exit$c' }.::Q($tmpfifo);
11312 sub is_already_in_results($) {
11313 # Do we already have results for this job?
11314 # Returns:
11315 # $job_already_run = bool whether there is output for this or not
11316 my $job = $_[0];
11317 if($Global::csvsep) {
11318 if($opt::joblog) {
11319 # OK: You can look for job run in joblog
11320 return 0
11321 } else {
11322 ::warning_once(
11323 "--resume --results .csv/.tsv/.json is not supported yet\n");
11324 # TODO read and parse the file
11325 return 0
11328 my $out = $job->{'commandline'}->results_out();
11329 ::debug("run", "Test ${out}stdout", -e "${out}stdout", "\n");
11330 return(-e $out."stdout" or -f $out);
11333 sub is_already_in_joblog($) {
11334 my $job = shift;
11335 return vec($Global::job_already_run,$job->seq(),1);
11338 sub set_job_in_joblog($) {
11339 my $job = shift;
11340 vec($Global::job_already_run,$job->seq(),1) = 1;
11343 sub retry() {
11344 # This command should be retried
11345 my $self = shift;
11347 $self->set_endtime(undef);
11348 $self->reset_exitstatus();
11349 $self->set_killreason(undef);
11350 $Global::JobQueue->unget($self);
11351 ::debug("run", "Retry ", $self->seq(), "\n");
11352 return 1;
11355 sub should_be_retried($) {
11356 # Should this job be retried?
11357 # Returns
11358 # 0 - do not retry
11359 # 1 - job queued for retry
11360 my $self = shift;
11361 if($opt::memfree and $self->killreason() eq "mem") {
11362 # Job was killed due to memfree => retry
11363 return $self->retry();
11365 if (not defined $opt::retries) { return 0; }
11366 if(not $self->exitstatus() and not $self->exitsignal()) {
11367 # Completed with success. If there is a recorded failure: forget it
11368 $self->reset_failed_here();
11369 return 0;
11370 } else {
11371 # The job failed. Should it be retried?
11372 $self->add_failed_here();
11373 my $retries = $self->{'commandline'}->
11374 replace_placeholders([$opt::retries],0,0);
11375 # 0 = Inf
11376 if($retries == 0) { $retries = 2**31; }
11377 # Ignore files already unlinked to avoid memory leak
11378 $self->{'unlink'} = [ grep { -e $_ } @{$self->{'unlink'}} ];
11379 map { -e $_ or delete $Global::unlink{$_} } keys %Global::unlink;
11380 if($self->total_failed() == $retries) {
11381 # This has been retried enough
11382 return 0;
11383 } else {
11384 # This command should be retried
11385 return $self->retry();
11391 my (%print_later,$job_seq_to_print);
11393 sub print_earlier_jobs($) {
11394 # Print jobs whose output is postponed due to --keep-order
11395 # Returns: N/A
11396 my $job = shift;
11397 $print_later{$job->seq()} = $job;
11398 $job_seq_to_print ||= 1;
11399 my $returnsize = 0;
11400 ::debug("run", "Looking for: $job_seq_to_print ",
11401 "This: ", $job->seq(), "\n");
11402 for(;vec($Global::job_already_run,$job_seq_to_print,1);
11403 $job_seq_to_print++) {}
11404 while(my $j = $print_later{$job_seq_to_print}) {
11405 $returnsize += $j->print();
11406 if($j->endtime()) {
11407 # Job finished - look at the next
11408 delete $print_later{$job_seq_to_print};
11409 $job_seq_to_print++;
11410 next;
11411 } else {
11412 # Job not finished yet - look at it again next round
11413 last;
11416 return $returnsize;
11420 sub print($) {
11421 # Print the output of the jobs
11422 # Returns: N/A
11423 my $self = shift;
11425 ::debug("print", ">>joboutput ", $self->replaced(), "\n");
11426 if($opt::dryrun) {
11427 # Nothing was printed to this job:
11428 # cleanup tmp files if --files was set
11429 ::rm($self->fh(1,"name"));
11431 if($opt::pipe and $self->virgin() and not $opt::tee) {
11432 # Skip --joblog, --dryrun, --verbose
11433 } else {
11434 if($opt::ungroup) {
11435 # NULL returnsize = 0 returnsize
11436 $self->returnsize() or $self->add_returnsize(0);
11437 if($Global::joblog and defined $self->{'exitstatus'}) {
11438 # Add to joblog when finished
11439 $self->print_joblog();
11440 # Printing is only relevant for grouped/--line-buffer output.
11441 $opt::ungroup and return;
11444 # Check for disk full
11445 ::exit_if_disk_full();
11448 my $returnsize = $self->returnsize();
11449 my @fdno;
11450 if($opt::latestline) {
11451 @fdno = (1);
11452 } else {
11453 @fdno = (sort { $a <=> $b } keys %Global::fh);
11455 for my $fdno (@fdno) {
11456 # Sort by file descriptor numerically: 1,2,3,..,9,10,11
11457 $fdno == 0 and next;
11458 my $out_fh = $Global::fh{$fdno};
11459 my $in_fh = $self->fh($fdno,"r");
11460 if(not $in_fh) {
11461 if(not $Job::file_descriptor_warning_printed{$fdno}++) {
11462 # ::warning("File descriptor $fdno not defined\n");
11464 next;
11466 ::debug("print", "File descriptor $fdno (", $self->fh($fdno,"name"), "):\n");
11467 if($Global::linebuffer) {
11468 # Line buffered print out
11469 $self->print_linebuffer($fdno,$in_fh,$out_fh);
11470 } elsif($Global::files) {
11471 $self->print_files($fdno,$in_fh,$out_fh);
11472 } elsif($opt::results) {
11473 $self->print_results($fdno,$in_fh,$out_fh);
11474 } else {
11475 $self->print_normal($fdno,$in_fh,$out_fh);
11477 flush $out_fh;
11479 ::debug("print", "<<joboutput\n");
11480 if(defined $self->{'exitstatus'}
11481 and not ($self->virgin() and $opt::pipe)) {
11482 if($Global::joblog and not $opt::sqlworker) {
11483 # Add to joblog when finished
11484 $self->print_joblog();
11486 if($opt::sqlworker and not $opt::results) {
11487 $Global::sql->output($self);
11489 if($Global::csvsep) {
11490 # Add output to CSV when finished
11491 $self->print_csv();
11493 if($Global::jsonout) {
11494 $self->print_json();
11497 return $returnsize - $self->returnsize();
11501 my %jsonmap;
11503 sub print_json($) {
11504 my $self = shift;
11505 sub jsonquote($) {
11506 my $a = shift;
11507 if(not $jsonmap{"\001"}) {
11508 map { $jsonmap{sprintf("%c",$_)} =
11509 sprintf '\u%04x', $_ } 0..31;
11511 $a =~ s/\\/\\\\/g;
11512 $a =~ s/\"/\\"/g;
11513 $a =~ s/([\000-\037])/$jsonmap{$1}/g;
11514 return $a;
11517 my $cmd;
11518 if($Global::verbose <= 1) {
11519 $cmd = jsonquote($self->replaced());
11520 } else {
11521 # Verbose level > 1: Print the rsync and stuff
11522 $cmd = jsonquote(join " ", @{$self->{'commandline'}});
11524 my $record_ref = $self->{'commandline'}{'arg_list_flat_orig'};
11526 # Memory optimization: Overwrite with the joined output
11527 $self->{'output'}{1} = join("", @{$self->{'output'}{1}});
11528 $self->{'output'}{2} = join("", @{$self->{'output'}{2}});
11530 # "Seq": 12,
11531 # "Host": "/usr/bin/ssh foo@lo",
11532 # "Starttime": 1608344711.743,
11533 # "JobRuntime": 0.01,
11534 # "Send": 0,
11535 # "Receive": 10,
11536 # "Exitval": 0,
11537 # "Signal": 0,
11538 # "Command": "echo 1",
11539 # "V": [
11540 # "1"
11541 # ],
11542 # "Stdout": "1\n",
11543 # "Stderr": ""
11546 printf($Global::csv_fh
11547 q({ "Seq": %s, "Host": "%s", "Starttime": %s, "JobRuntime": %s, ).
11548 q("Send": %s, "Receive": %s, "Exitval": %s, "Signal": %s, ).
11549 q("Command": "%s", "V": [ %s ], "Stdout": "%s", "Stderr": "%s" }).
11550 "\n",
11551 $self->seq(),
11552 jsonquote($self->sshlogin()->string()),
11553 $self->starttime(), sprintf("%0.3f",$self->runtime()),
11554 $self->transfersize(), $self->returnsize(),
11555 $self->exitstatus(), $self->exitsignal(), $cmd,
11556 (join ",",
11557 map { '"'.jsonquote($_).'"' } @$record_ref[1..$#$record_ref],
11559 jsonquote($self->{'output'}{1}),
11560 jsonquote($self->{'output'}{2})
11566 my $header_printed;
11568 sub print_csv($) {
11569 my $self = shift;
11570 my $cmd;
11571 if($Global::verbose <= 1) {
11572 $cmd = $self->replaced();
11573 } else {
11574 # Verbose level > 1: Print the rsync and stuff
11575 $cmd = join " ", @{$self->{'commandline'}};
11577 my $record_ref = $self->{'commandline'}{'arg_list_flat_orig'};
11579 if(not $header_printed) {
11580 # Variable headers
11581 # Normal => V1..Vn
11582 # --header : => first value from column
11583 my @V;
11584 if($opt::header) {
11585 my $i = 1;
11586 @V = (map { $Global::input_source_header{$i++} }
11587 @$record_ref[1..$#$record_ref]);
11588 } else {
11589 my $V = "V1";
11590 @V = (map { $V++ } @$record_ref[1..$#$record_ref]);
11592 print $Global::csv_fh
11593 (map { $$_ }
11594 combine_ref("Seq", "Host", "Starttime", "JobRuntime",
11595 "Send", "Receive", "Exitval", "Signal", "Command",
11597 "Stdout","Stderr"
11598 )),"\n";
11599 $header_printed++;
11601 # Memory optimization: Overwrite with the joined output
11602 $self->{'output'}{1} = join("", @{$self->{'output'}{1}});
11603 $self->{'output'}{2} = join("", @{$self->{'output'}{2}});
11604 print $Global::csv_fh
11605 (map { $$_ }
11606 combine_ref
11607 ($self->seq(),
11608 $self->sshlogin()->string(),
11609 $self->starttime(), sprintf("%0.3f",$self->runtime()),
11610 $self->transfersize(), $self->returnsize(),
11611 $self->exitstatus(), $self->exitsignal(), \$cmd,
11612 \@$record_ref[1..$#$record_ref],
11613 \$self->{'output'}{1},
11614 \$self->{'output'}{2})),"\n";
11618 sub combine_ref($) {
11619 # Inspired by Text::CSV_PP::_combine (by Makamaka Hannyaharamitu)
11620 my @part = @_;
11621 my $sep = $Global::csvsep;
11622 my $quot = '"';
11623 my @out = ();
11625 my $must_be_quoted;
11626 for my $column (@part) {
11627 # Memory optimization: Content transferred as reference
11628 if(ref $column ne "SCALAR") {
11629 # Convert all columns to scalar references
11630 my $v = $column;
11631 $column = \$v;
11633 if(not defined $$column) {
11634 $$column = '';
11635 next;
11638 $must_be_quoted = 0;
11640 if($$column =~ s/$quot/$quot$quot/go){
11641 # Replace " => ""
11642 $must_be_quoted ||=1;
11644 if($$column =~ /[\s\Q$sep\E]/o){
11645 # Put quotes around if the column contains ,
11646 $must_be_quoted ||=1;
11649 $Global::use{"bytes"} ||= eval "use bytes; 1;";
11650 if ($$column =~ /\0/) {
11651 # Contains \0 => put quotes around
11652 $must_be_quoted ||=1;
11654 if($must_be_quoted){
11655 push @out, \$sep, \$quot, $column, \$quot;
11656 } else {
11657 push @out, \$sep, $column;
11660 # Remove the first $sep: ,val,"val" => val,"val"
11661 shift @out;
11662 return @out;
11665 sub print_files($) {
11666 # Print the name of the file containing stdout on stdout
11667 # Uses:
11668 # $opt::pipe
11669 # $opt::group = Print when job is done
11670 # $opt::linebuffer = Print ASAP
11671 # Returns: N/A
11672 my $self = shift;
11673 my ($fdno,$in_fh,$out_fh) = @_;
11675 # If the job is dead: close printing fh. Needed for --compress
11676 close $self->fh($fdno,"w");
11677 if($? and $opt::compress) {
11678 ::error($opt::compress_program." failed.");
11679 $self->set_exitstatus(255);
11681 if($opt::compress) {
11682 # Kill the decompressor which will not be needed
11683 CORE::kill "TERM", $self->fh($fdno,"rpid");
11685 close $in_fh;
11687 if($opt::pipe and $self->virgin()) {
11688 # Nothing was printed to this job:
11689 # cleanup unused tmp files because --files was set
11690 for my $fdno (1,2) {
11691 ::rm($self->fh($fdno,"name"));
11692 ::rm($self->fh($fdno,"unlink"));
11694 } elsif($fdno == 1 and $self->fh($fdno,"name")) {
11695 print $out_fh $self->tag(),$self->fh($fdno,"name"), $Global::files_sep;
11696 if($Global::membuffer) {
11697 push @{$self->{'output'}{$fdno}},
11698 $self->tag(), $self->fh($fdno,"name");
11700 $self->add_returnsize(-s $self->fh($fdno,"name"));
11701 # Mark as printed - do not print again
11702 $self->set_fh($fdno,"name",undef);
11707 # Different print types
11708 # (--ll | --ll --bar | --lb | --group | --parset | --sql-worker)
11709 # (--files | --results (.json|.csv|.tsv) )
11710 # --color-failed
11711 # --color
11712 # --keep-order
11713 # --tag
11714 # --bar
11716 my ($up,$eol,$currow,$maxrow);
11717 my ($minvisible,%print_later,%notvisible);
11718 my (%binmodeset,%tab);
11720 sub latestline_init() {
11721 # cursor_up cuu1 = up one line
11722 $up = `sh -c "tput cuu1 </dev/tty" 2>/dev/null`;
11723 chomp($up);
11724 $eol = `sh -c "tput el </dev/tty" 2>/dev/null`;
11725 chomp($eol);
11726 if($eol eq "") { $eol = "\033[K"; }
11727 $currow = 1;
11728 $maxrow = 1;
11729 $minvisible = 1;
11730 for(0..8) {
11731 $tab{$_} = " "x(8-($_%8));
11735 sub mbtrunc($$) {
11736 # Simple mbtrunc to avoid using Text::WideChar::Util
11737 my $str = shift;
11738 my $len = shift;
11739 if(::mbswidth($str) == length($str)) {
11740 $str = substr($str,0,$len);
11741 } else {
11742 # mb chars (ヌー平行) are wider than 1 char on screen
11743 # We need at most $len chars - they may be wide
11744 $str =~ s/(.{$len}).*/$1/;
11745 my $rlen = int((::mbswidth($str) - $len)/2+0.5);
11746 do {
11747 $str =~ s/.{$rlen}$//;
11748 $rlen = int((::mbswidth($str) - $len)/2+0.5);
11749 } while($rlen >= 1);
11751 return $str;
11754 sub print_latest_line($) {
11755 my $self = shift;
11756 my $out_fh = shift;
11757 if(not defined $self->{$out_fh,'latestline'}) { return; }
11758 my $row = $self->row();
11759 # Is row visible?
11760 if(not ($minvisible <= $row
11762 $row < $minvisible + ::terminal_rows() - 1)) {
11763 return;
11765 if(not $binmodeset{$out_fh}++) {
11766 # Enable utf8 if possible
11767 eval q{ binmode $out_fh, "encoding(utf8)"; };
11769 my ($color,$reset_color) = $self->color();
11770 my $termcol = ::terminal_columns();
11771 my $untabify_tag = ::decode_utf8($self->untabtag());
11772 my $untabify_str =
11773 ::untabify(::decode_utf8($self->{$out_fh,'latestline'}));
11774 # -1 to make space for $truncated_str
11775 my $maxtaglen = $termcol - 1;
11776 $untabify_tag = mbtrunc($untabify_tag,$maxtaglen);
11777 my $taglen = ::mbswidth($untabify_tag);
11778 my $maxstrlen = $termcol - $taglen - 1;
11779 $untabify_str = mbtrunc($untabify_str,$maxstrlen);
11780 my $strlen = ::mbswidth($untabify_str);
11781 my $truncated_tag = "";
11782 my $truncated_str = "";
11783 if($termcol - $taglen < 2) {
11784 $truncated_tag = ">";
11785 } else {
11786 if($termcol - $taglen - $strlen <= 2) {
11787 $truncated_str = ">";
11790 $maxrow = ($row > $maxrow) ? $row : $maxrow;
11791 printf($out_fh
11792 ("%s%s%s%s". # up down \r eol
11793 "%s%s". # tag trunc_tag
11794 "%s%s%s%s". # color line trunc reset_color
11795 "%s" # down
11797 "$up"x($currow - $row), "\n"x($row - $currow), "\r", $eol,
11798 $untabify_tag,$truncated_tag,
11799 $color, $untabify_str, $truncated_str, $reset_color,
11800 "\n"x($maxrow - $row + 1));
11801 $currow = $maxrow + 1;
11804 sub print_linebuffer($) {
11805 my $self = shift;
11806 my ($fdno,$in_fh,$out_fh) = @_;
11807 if(defined $self->{'exitstatus'}) {
11808 # If the job is dead: close printing fh. Needed for --compress
11809 close $self->fh($fdno,"w");
11810 if($opt::compress) {
11811 if($?) {
11812 ::error($opt::compress_program." failed.");
11813 $self->set_exitstatus(255);
11815 # Blocked reading in final round
11816 for my $fdno (1,2) { ::set_fh_blocking($self->fh($fdno,'r')); }
11818 if($opt::latestline) { $print_later{$self->row()} = $self; }
11820 if(not $self->virgin()) {
11821 if($Global::files or ($opt::results and not $Global::csvsep)) {
11822 # Print filename
11823 if($fdno == 1 and not $self->fh($fdno,"printed")) {
11824 print $out_fh $self->tag(),$self->fh($fdno,"name"),"\n";
11825 if($Global::membuffer) {
11826 push(@{$self->{'output'}{$fdno}}, $self->tag(),
11827 $self->fh($fdno,"name"));
11829 $self->set_fh($fdno,"printed",1);
11831 # No need for reading $in_fh, as it is from "cat >/dev/null"
11832 } else {
11833 # Read halflines and print full lines
11834 my $outputlength = 0;
11835 my $halfline_ref = $self->{'halfline'}{$fdno};
11836 my ($buf,$i,$rv);
11837 # 1310720 gives 1.2 GB/s
11838 # 131072 gives 0.9 GB/s
11839 # The optimal block size differs
11840 # It has been measured on:
11841 # AMD 6376: 60800 (>70k is also reasonable)
11842 # Intel i7-3632QM: 52-59k, 170-175k
11843 # seq 64 | ppar --_test $1 --lb \
11844 # 'yes {} `seq 1000`|head -c 10000000' >/dev/null
11845 while($rv = sysread($in_fh, $buf, 60800)) {
11846 $outputlength += $rv;
11847 # TODO --recend
11848 # Treat both \n and \r as line end
11849 # Only test for \r if there is no \n
11850 # Test:
11851 # perl -e '$a="x"x1000000;
11852 # $b="$a\r$a\n$a\r$a\n";
11853 # map { print $b,$_ } 1..10'
11854 $i = ((rindex($buf,"\n")+1) || (rindex($buf,"\r")+1));
11855 if($i) {
11856 if($opt::latestline) {
11857 # Keep the latest full line
11858 my $l = join('', @$halfline_ref,
11859 substr($buf,0,$i-1));
11860 # "ab\rb\n" = "bb", but we cannot process that correctly.
11861 # Line may be:
11862 # foo \r bar \n
11863 # foo \r bar \r baz \r
11864 # If so: Remove 'foo \r'
11865 $l =~ s/.*\r//g;
11866 my $j = ((rindex($l,"\n")+1) ||
11867 (rindex($l,"\r")+1));
11868 $self->{$out_fh,'latestline'} = substr($l,$j);
11869 # Remove the processed part
11870 # by keeping the unprocessed part
11871 @$halfline_ref = (substr($buf,$i));
11872 } else {
11873 # One or more complete lines were found
11874 if($Global::color) {
11875 my $print = join("",@$halfline_ref,
11876 substr($buf,0,$i));
11877 chomp($print);
11878 my ($color,$reset_color) = $self->color();
11879 my $colortag = $color.$self->tag();
11880 # \n => reset \n color tag
11881 $print =~ s{([\n\r])(?=.|$)}
11882 {$reset_color$1$colortag}gs;
11883 print($out_fh $colortag, $print,
11884 $reset_color, "\n");
11885 } elsif($opt::tag or defined $opt::tagstring) {
11886 # Replace ^ with $tag within the full line
11887 if($Global::cache_replacement_eval) {
11888 # Replace with the same value for tag
11889 my $tag = $self->tag();
11890 unshift @$halfline_ref, $tag;
11891 # TODO --recend that can be partially in
11892 # @$halfline_ref
11893 substr($buf,0,$i-1) =~
11894 s/([\n\r])(?=.|$)/$1$tag/gs;
11895 } else {
11896 # Replace with freshly computed tag-value
11897 unshift @$halfline_ref, $self->tag();
11898 substr($buf,0,$i-1) =~
11899 s/([\n\r])(?=.|$)/$1.$self->tag()/gse;
11901 # The length changed,
11902 # so find the new ending pos
11903 $i = ::max((rindex($buf,"\n")+1),
11904 (rindex($buf,"\r")+1));
11905 # Print the partial line (halfline)
11906 # and the last half
11907 print $out_fh @$halfline_ref, substr($buf,0,$i);
11908 } else {
11909 # Print the partial line (halfline)
11910 # and the last half
11911 print $out_fh @$halfline_ref, substr($buf,0,$i);
11913 # Buffer in memory for SQL and CSV-output
11914 if($Global::membuffer) {
11915 push(@{$self->{'output'}{$fdno}},
11916 @$halfline_ref, substr($buf,0,$i));
11918 # Remove the printed part by keeping the unprinted
11919 @$halfline_ref = (substr($buf,$i));
11921 } else {
11922 # No newline, so append to the halfline
11923 push @$halfline_ref, $buf;
11926 $self->add_returnsize($outputlength);
11927 if($opt::latestline) { $self->print_latest_line($out_fh); }
11929 if(defined $self->{'exitstatus'}) {
11930 if($Global::files or ($opt::results and not $Global::csvsep)) {
11931 $self->add_returnsize(-s $self->fh($fdno,"name"));
11932 } else {
11933 if($opt::latestline) {
11934 # Force re-computing color if --colorfailed
11935 if($opt::colorfailed) { delete $self->{'color'}; }
11936 if($self->{$out_fh,'latestline'} ne "") {
11937 $self->print_latest_line($out_fh);
11939 if(@{$self->{'halfline'}{$fdno}}) {
11940 my $l = join('', @{$self->{'halfline'}{$fdno}});
11941 if($l ne "") {
11942 $self->{$out_fh,'latestline'} = $l;
11944 } else {
11945 $self->{$out_fh,'latestline'} = undef;
11947 # Print latest line from jobs that are already done
11948 while($print_later{$minvisible}) {
11949 $print_later{$minvisible}->print_latest_line($out_fh);
11950 delete $print_later{$minvisible};
11951 $minvisible++;
11953 # Print latest line from jobs that are on screen now
11954 for(my $row = $minvisible;
11955 $row < $minvisible -1 + ::terminal_rows();
11956 $row++) {
11957 $print_later{$row} and
11958 $print_later{$row}->print_latest_line($out_fh);
11960 } else {
11961 # If the job is dead: print the remaining partial line
11962 # read remaining (already done for $opt::latestline)
11963 my $halfline_ref = $self->{'halfline'}{$fdno};
11964 if(grep /./, @$halfline_ref) {
11965 my $returnsize = 0;
11966 for(@{$self->{'halfline'}{$fdno}}) {
11967 $returnsize += length $_;
11969 $self->add_returnsize($returnsize);
11970 if($opt::tag or defined $opt::tagstring) {
11971 # Prepend $tag the the remaining half line
11972 unshift @$halfline_ref, $self->tag();
11974 # Print the partial line (halfline)
11975 print $out_fh @{$self->{'halfline'}{$fdno}};
11976 # Buffer in memory for SQL and CSV-output
11977 if($Global::membuffer) {
11978 push(@{$self->{'output'}{$fdno}}, @$halfline_ref);
11980 @$halfline_ref = ();
11984 if($self->fh($fdno,"rpid") and
11985 CORE::kill 0, $self->fh($fdno,"rpid")) {
11986 # decompress still running
11987 } else {
11988 # decompress done: close fh
11989 close $in_fh;
11990 if($? and $opt::compress) {
11991 ::error($opt::decompress_program." failed.");
11992 $self->set_exitstatus(255);
12000 sub free_ressources() {
12001 my $self = shift;
12002 if(not $opt::ungroup) {
12003 my $fh;
12004 for my $fdno (sort { $a <=> $b } keys %Global::fh) {
12005 $fh = $self->fh($fdno,"w");
12006 $fh and close $fh;
12007 $fh = $self->fh($fdno,"r");
12008 $fh and close $fh;
12013 sub print_parset($) {
12014 # Wrap output with shell script code to set as variables
12015 my $self = shift;
12016 my ($fdno,$in_fh,$out_fh) = @_;
12017 my $outputlength = 0;
12019 ::debug("parset","print $Global::parset");
12020 if($Global::parset eq "assoc") {
12021 # Start: (done in parse_parset())
12022 # eval "`echo 'declare -A myassoc; myassoc=(
12023 # Each: (done here)
12024 # [$'a\tb']=$'a\tb\tc ddd'
12025 # End: (done in wait_and_exit())
12026 # )'`"
12027 print '[',::Q($self->{'commandline'}->
12028 replace_placeholders(["\257<\257>"],0,0)),']=';
12029 } elsif($Global::parset eq "array") {
12030 # Start: (done in parse_parset())
12031 # eval "`echo 'myassoc=(
12032 # Each: (done here)
12033 # $'a\tb\tc ddd'
12034 # End: (done in wait_and_exit())
12035 # )'`"
12036 } elsif($Global::parset eq "var") {
12037 # Start: (done in parse_parset())
12038 # <empty>
12039 # Each: (done here)
12040 # var=$'a\tb\tc ddd'
12041 # End: (done in wait_and_exit())
12042 # <empty>
12043 if(not @Global::parset_vars) {
12044 ::error("Too few named destination variables");
12045 ::wait_and_exit(255);
12047 print shift @Global::parset_vars,"=";
12049 local $/ = "\n";
12050 my $tag = $self->tag();
12051 my @out;
12052 while(<$in_fh>) {
12053 $outputlength += length $_;
12054 # Tag lines with \r, too
12055 $_ =~ s/(?<=[\r])(?=.|$)/$tag/gs;
12056 push @out, $tag,$_;
12058 # Remove last newline
12059 # This often makes it easier to use the output in shell
12060 @out and ${out[$#out]} =~ s/\n$//s;
12061 print ::Q(join("",@out)),"\n";
12062 return $outputlength;
12065 sub print_normal($) {
12066 my $self = shift;
12067 my ($fdno,$in_fh,$out_fh) = @_;
12068 my $buf;
12069 close $self->fh($fdno,"w");
12070 if($? and $opt::compress) {
12071 ::error($opt::compress_program." failed.");
12072 $self->set_exitstatus(255);
12074 if(not $self->virgin()) {
12075 seek $in_fh, 0, 0;
12076 # $in_fh is now ready for reading at position 0
12077 my $outputlength = 0;
12078 my @output;
12080 if($Global::parset and $fdno == 1) {
12081 $outputlength += $self->print_parset($fdno,$in_fh,$out_fh);
12082 } elsif(defined $opt::tag or defined $opt::tagstring
12083 or $Global::color or $opt::colorfailed) {
12084 if($Global::color or $opt::colorfailed) {
12085 my ($color,$reset_color) = $self->color();
12086 my $colortag = $color.$self->tag();
12087 # Read line by line
12088 local $/ = "\n";
12089 while(<$in_fh>) {
12090 $outputlength += length $_;
12091 # Tag lines with \r, too
12092 chomp;
12093 s{([\n\r])(?=.|$)}{$reset_color$1$colortag}gs;
12094 print $out_fh $colortag,$_,$reset_color,"\n";
12096 } else {
12097 my $tag = $self->tag();
12098 my $pretag = 1;
12099 my $s;
12100 while(sysread($in_fh,$buf,32767)) {
12101 $outputlength += length $buf;
12102 $buf =~ s/(?<=[\r\n])(?=.)/$tag/gs;
12103 print $out_fh ($pretag ? $tag : ""),$buf;
12104 if($Global::membuffer) {
12105 push @{$self->{'output'}{$fdno}},
12106 ($pretag ? $tag : ""),$buf;
12108 # Should next print start with a tag?
12109 $s = substr($buf, -1);
12110 # This is faster than ($s eq "\n") || ($s eq "\r")
12111 $pretag = ($s eq "\n") ? 1 : ($s eq "\r");
12114 } else {
12115 # Most efficient way of copying data from $in_fh to $out_fh
12116 # Intel i7-3632QM: 25k-
12117 while(sysread($in_fh,$buf,32767)) {
12118 print $out_fh $buf;
12119 $outputlength += length $buf;
12120 if($Global::membuffer) {
12121 push @{$self->{'output'}{$fdno}}, $buf;
12125 if($fdno == 1) {
12126 $self->add_returnsize($outputlength);
12128 close $in_fh;
12129 if($? and $opt::compress) {
12130 ::error($opt::decompress_program." failed.");
12131 $self->set_exitstatus(255);
12136 sub print_results($) {
12137 my $self = shift;
12138 my ($fdno,$in_fh,$out_fh) = @_;
12139 my $buf;
12140 close $self->fh($fdno,"w");
12141 if($? and $opt::compress) {
12142 ::error($opt::compress_program." failed.");
12143 $self->set_exitstatus(255);
12145 if(not $self->virgin()) {
12146 seek $in_fh, 0, 0;
12147 # $in_fh is now ready for reading at position 0
12148 my $outputlength = 0;
12149 my @output;
12151 if($Global::membuffer) {
12152 # Read data into membuffer
12153 if($opt::tag or $opt::tagstring) {
12154 # Read line by line
12155 local $/ = "\n";
12156 my $tag = $self->tag();
12157 while(<$in_fh>) {
12158 $outputlength += length $_;
12159 # Tag lines with \r, too
12160 $_ =~ s/(?<=[\r])(?=.|$)/$tag/gs;
12161 push @{$self->{'output'}{$fdno}}, $tag, $_;
12163 } else {
12164 # Most efficient way of copying data from $in_fh to $out_fh
12165 while(sysread($in_fh,$buf,60000)) {
12166 $outputlength += length $buf;
12167 push @{$self->{'output'}{$fdno}}, $buf;
12170 } else {
12171 # Not membuffer: No need to read the file
12172 if($opt::compress) {
12173 $outputlength = -1;
12174 } else {
12175 # Determine $outputlength = file length
12176 seek($in_fh, 0, 2) || ::die_bug("cannot seek result");
12177 $outputlength = tell($in_fh);
12180 if($fdno == 1) { $self->add_returnsize($outputlength); }
12181 close $in_fh;
12182 if($? and $opt::compress) {
12183 ::error($opt::decompress_program." failed.");
12184 $self->set_exitstatus(255);
12189 sub print_joblog($) {
12190 my $self = shift;
12191 my $cmd;
12192 if($Global::verbose <= 1) {
12193 $cmd = $self->replaced();
12194 } else {
12195 # Verbose level > 1: Print the rsync and stuff
12196 $cmd = $self->wrapped();
12198 # Newlines make it hard to parse the joblog
12199 $cmd =~ s/\n/\0/g;
12200 print $Global::joblog
12201 join("\t", $self->seq(), $self->sshlogin()->string(),
12202 $self->starttime(), sprintf("%10.3f",$self->runtime()),
12203 $self->transfersize(), $self->returnsize(),
12204 $self->exitstatus(), $self->exitsignal(), $cmd
12205 ). "\n";
12206 flush $Global::joblog;
12207 $self->set_job_in_joblog();
12210 sub tag($) {
12211 my $self = shift;
12212 if(not defined $self->{'tag'} or not $Global::cache_replacement_eval) {
12213 if(defined $opt::tag or defined $opt::tagstring) {
12214 $self->{'tag'} =
12215 ($self->{'commandline'}->
12216 replace_placeholders([$opt::tagstring],0,0)).
12217 "\t";
12218 } else {
12219 # No tag
12220 $self->{'tag'} = "";
12223 return $self->{'tag'};
12226 sub untabtag($) {
12227 # tag with \t replaced with spaces
12228 my $self = shift;
12229 my $tag = $self->tag();
12230 if(not defined $self->{'untab'}{$tag}) {
12231 $self->{'untab'}{$tag} = ::untabify($tag);
12233 return $self->{'untab'}{$tag};
12237 my (@color,$eol,$reset_color,$init);
12239 sub init_color() {
12240 if(not $init) {
12241 $init = 1;
12242 # color combinations that are readable: black/white text
12243 # on colored background, but not white on yellow
12244 my @color_combinations =
12245 # Force each color code to have the same length in chars
12246 # This will make \t work as expected
12247 ((map { [sprintf("%03d",$_),"000"] }
12248 6..7,9..11,13..15,40..51,75..87,113..123,147..159,
12249 171..182,185..231,249..254),
12250 (map { [sprintf("%03d",$_),231] }
12251 1..9,12..13,16..45,52..81,88..114,124..149,
12252 160..178,180,182..184,196..214,232..250));
12253 # reorder list so adjacent colors are dissimilar
12254 # %23 and %7 were found experimentally
12255 my @order = reverse sort {
12256 (($a%23) <=> ($b%23))
12258 (($b%7) <=> ($a%7));
12259 } 0..$#color_combinations;
12260 @order = @order[54 .. $#color_combinations, 0 .. 53];
12261 @color = map {
12262 # TODO Can this be done with `tput` codes?
12263 "\033[48;5;".$_->[0].";38;5;".$_->[1]."m"
12264 } @color_combinations[ @order ];
12266 # clr_eol el = clear to end of line
12267 $eol = `sh -c "tput el </dev/tty" 2>/dev/null`;
12268 chomp($eol);
12269 if($eol eq "") { $eol = "\033[K"; }
12270 # exit_attribute_mode sgr0 = turn off all attributes
12271 $reset_color = `sh -c "tput sgr0 </dev/tty" 2>/dev/null`;
12272 chomp($reset_color);
12273 if($reset_color eq "") { $reset_color = "\033[m"; }
12277 sub color($) {
12278 my $self = shift;
12279 if(not defined $self->{'color'}) {
12280 if($Global::color) {
12281 # Choose a value based on the seq
12282 $self->{'color'} = $color[$self->seq() % ($#color+1)].$eol;
12283 $self->{'reset_color'} = $reset_color;
12284 } else {
12285 $self->{'color'} = "";
12286 $self->{'reset_color'} = "";
12288 if($opt::colorfailed) {
12289 if($self->exitstatus()) {
12290 # White on Red
12291 # Can this be done more generally?
12292 $self->{'color'} =
12293 "\033[48;5;"."196".";38;5;"."231"."m".$eol;
12294 $self->{'reset_color'} = $reset_color;
12298 return ($self->{'color'},$self->{'reset_color'});
12302 sub hostgroups($) {
12303 my $self = shift;
12304 if(not defined $self->{'hostgroups'}) {
12305 $self->{'hostgroups'} =
12306 $self->{'commandline'}->{'arg_list'}[0][0]->{'hostgroups'};
12308 return @{$self->{'hostgroups'}};
12311 sub exitstatus($) {
12312 my $self = shift;
12313 return $self->{'exitstatus'};
12316 sub set_exitstatus($$) {
12317 my $self = shift;
12318 my $exitstatus = shift;
12319 if($exitstatus) {
12320 # Overwrite status if non-zero
12321 $self->{'exitstatus'} = $exitstatus;
12322 } else {
12323 # Set status but do not overwrite
12324 # Status may have been set by --timeout
12325 $self->{'exitstatus'} ||= $exitstatus;
12327 $opt::sqlworker and
12328 $Global::sql->update("SET Exitval = ? WHERE Seq = ".$self->seq(),
12329 $exitstatus);
12332 sub reset_exitstatus($) {
12333 my $self = shift;
12334 undef $self->{'exitstatus'};
12337 sub exitsignal($) {
12338 my $self = shift;
12339 return $self->{'exitsignal'};
12342 sub set_exitsignal($$) {
12343 my $self = shift;
12344 my $exitsignal = shift;
12345 $self->{'exitsignal'} = $exitsignal;
12346 $opt::sqlworker and
12347 $Global::sql->update("SET _Signal = ? WHERE Seq = ".$self->seq(),
12348 $exitsignal);
12352 my $total_jobs;
12354 sub should_we_halt {
12355 # Should we halt? Immediately? Gracefully?
12356 # Returns: N/A
12357 my $job = shift;
12358 my $limit;
12359 if($Global::semaphore) {
12360 # Emulate Bash's +128 if there is a signal
12361 $Global::halt_exitstatus =
12362 ($job->exitstatus()
12364 $job->exitsignal() ? $job->exitsignal() + 128 : 0);
12366 if($job->exitstatus() or $job->exitsignal()) {
12367 # Job failed
12368 $Global::exitstatus++;
12369 $Global::total_failed++;
12370 if($Global::halt_fail) {
12371 ::status("$Global::progname: This job failed:",
12372 $job->replaced());
12373 $limit = $Global::total_failed;
12375 } elsif($Global::halt_success) {
12376 ::status("$Global::progname: This job succeeded:",
12377 $job->replaced());
12378 $limit = $Global::total_completed - $Global::total_failed;
12380 if($Global::halt_done) {
12381 ::status("$Global::progname: This job finished:",
12382 $job->replaced());
12383 $limit = $Global::total_completed;
12385 if(not defined $limit) {
12386 return ""
12388 # --halt # => 1..100 (number of jobs failed, 101 means > 100)
12389 # --halt % => 1..100 (pct of jobs failed)
12390 if($Global::halt_pct and not $Global::halt_count) {
12391 $total_jobs ||= $Global::JobQueue->total_jobs();
12392 # From the pct compute the number of jobs that must fail/succeed
12393 $Global::halt_count = $total_jobs * $Global::halt_pct;
12395 if($limit >= $Global::halt_count) {
12396 # At least N jobs have failed/succeded/completed
12397 # or at least N% have failed/succeded/completed
12398 # So we should prepare for exit
12399 if($Global::halt_fail or $Global::halt_done) {
12400 # Set exit status
12401 if(not defined $Global::halt_exitstatus) {
12402 if($Global::halt_pct) {
12403 # --halt now,fail=X% or soon,fail=X%
12404 # --halt now,done=X% or soon,done=X%
12405 $Global::halt_exitstatus =
12406 ::ceil($Global::total_failed / $total_jobs * 100);
12407 } elsif($Global::halt_count) {
12408 # --halt now,fail=X or soon,fail=X
12409 # --halt now,done=X or soon,done=X
12410 $Global::halt_exitstatus =
12411 ::min($Global::total_failed,101);
12413 if($Global::halt_count and $Global::halt_count == 1) {
12414 # --halt now,fail=1 or soon,fail=1
12415 # --halt now,done=1 or soon,done=1
12416 # Emulate Bash's +128 if there is a signal
12417 $Global::halt_exitstatus =
12418 ($job->exitstatus()
12420 $job->exitsignal() ? $job->exitsignal() + 128 : 0);
12423 ::debug("halt","Pct: ",$Global::halt_pct,
12424 " count: ",$Global::halt_count,
12425 " status: ",$Global::halt_exitstatus,"\n");
12426 } elsif($Global::halt_success) {
12427 $Global::halt_exitstatus = 0;
12429 if($Global::halt_when eq "soon") {
12430 $Global::start_no_new_jobs ||= 1;
12431 if(scalar(keys %Global::running) > 0) {
12432 # Only warn if there are more jobs running
12433 ::status
12434 ("$Global::progname: Starting no more jobs. ".
12435 "Waiting for ". (keys %Global::running).
12436 " jobs to finish.");
12439 return($Global::halt_when);
12441 return "";
12446 package CommandLine;
12448 sub new($) {
12449 my $class = shift;
12450 my $seq = shift;
12451 my $commandref = shift;
12452 $commandref || die;
12453 my $arg_queue = shift;
12454 my $context_replace = shift;
12455 my $max_number_of_args = shift; # for -N and normal (-n1)
12456 my $transfer_files = shift;
12457 my $return_files = shift;
12458 my $template_names = shift;
12459 my $template_contents = shift;
12460 my $replacecount_ref = shift;
12461 my $len_ref = shift;
12462 my %replacecount = %$replacecount_ref;
12463 my %len = %$len_ref;
12464 for (keys %$replacecount_ref) {
12465 # Total length of this replacement string {} replaced with all args
12466 $len{$_} = 0;
12468 return bless {
12469 'command' => $commandref,
12470 'seq' => $seq,
12471 'len' => \%len,
12472 'arg_list' => [],
12473 'arg_list_flat' => [],
12474 'arg_list_flat_orig' => [undef],
12475 'arg_queue' => $arg_queue,
12476 'max_number_of_args' => $max_number_of_args,
12477 'replacecount' => \%replacecount,
12478 'context_replace' => $context_replace,
12479 'transfer_files' => $transfer_files,
12480 'return_files' => $return_files,
12481 'template_names' => $template_names,
12482 'template_contents' => $template_contents,
12483 'replaced' => undef,
12484 }, ref($class) || $class;
12487 sub flush_cache() {
12488 my $self = shift;
12489 for my $arglist (@{$self->{'arg_list'}}) {
12490 for my $arg (@$arglist) {
12491 $arg->flush_cache();
12494 $self->{'arg_queue'}->flush_cache();
12495 $self->{'replaced'} = undef;
12498 sub seq($) {
12499 my $self = shift;
12500 return $self->{'seq'};
12503 sub set_seq($$) {
12504 my $self = shift;
12505 $self->{'seq'} = shift;
12508 sub slot($) {
12509 # Find the number of a free job slot and return it
12510 # Uses:
12511 # @Global::slots - list with free jobslots
12512 # Returns:
12513 # $jobslot = number of jobslot
12514 my $self = shift;
12515 if(not $self->{'slot'}) {
12516 if(not @Global::slots) {
12517 # $max_slot_number will typically be $Global::max_jobs_running
12518 push @Global::slots, ++$Global::max_slot_number;
12520 $self->{'slot'} = shift @Global::slots;
12522 return $self->{'slot'};
12526 my $already_spread;
12527 my $darwin_max_len;
12529 sub populate($) {
12530 # Add arguments from arg_queue until the number of arguments or
12531 # max line length is reached
12532 # Uses:
12533 # $Global::usable_command_line_length
12534 # $opt::cat
12535 # $opt::fifo
12536 # $Global::JobQueue
12537 # $opt::m
12538 # $opt::X
12539 # $Global::max_jobs_running
12540 # Returns: N/A
12541 my $self = shift;
12542 my $next_arg;
12543 my $max_len = $Global::usable_command_line_length || die;
12544 if($^O eq "darwin") {
12545 # Darwin's limit is affected by:
12546 # * number of environment names (variables+functions)
12547 # * size of environment
12548 # * the length of arguments:
12549 # a one-char argument lowers the limit by 5
12550 # To be safe assume all arguments are one-char
12551 # The max_len is cached between runs, but if the size of
12552 # the environment is different we need to recompute the
12553 # usable max length for this run of GNU Parallel
12554 # See https://unix.stackexchange.com/a/604943/2972
12555 if(not $darwin_max_len) {
12556 my $envc = (keys %ENV);
12557 my $envn = length join"",(keys %ENV);
12558 my $envv = length join"",(values %ENV);
12559 $darwin_max_len = -146+($max_len - $envn - $envv) - $envc*10;
12560 ::debug("init",
12561 "length: $darwin_max_len ".
12562 "3+($max_len - $envn - $envv)/5 - $envc*2");
12564 $max_len = $darwin_max_len;
12566 if($opt::cat or $opt::fifo) {
12567 # Get the empty arg added by --pipepart (if any)
12568 $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get();
12569 # $PARALLEL_TMP will point to a tempfile that will be used as {}
12570 $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->
12571 unget([Arg->new('"$PARALLEL_TMP"')]);
12573 while (not $self->{'arg_queue'}->empty()) {
12574 $next_arg = $self->{'arg_queue'}->get();
12575 if(not defined $next_arg) {
12576 next;
12578 $self->push($next_arg);
12579 if($self->len() >= $max_len) {
12580 # Command length is now > max_length
12581 # If there are arguments: remove the last
12582 # If there are no arguments: Error
12583 # TODO stuff about -x opt_x
12584 if($self->number_of_args() > 1) {
12585 # There is something to work on
12586 $self->{'arg_queue'}->unget($self->pop());
12587 last;
12588 } else {
12589 my $args = join(" ", map { $_->orig() } @$next_arg);
12590 ::error("Command line too long (".
12591 $self->len(). " >= ".
12592 $max_len.
12593 ") at input ".
12594 $self->{'arg_queue'}->arg_number().
12595 ": ".
12596 ((length $args > 50) ?
12597 (substr($args,0,50))."..." :
12598 $args));
12599 $self->{'arg_queue'}->unget($self->pop());
12600 ::wait_and_exit(255);
12604 if(defined $self->{'max_number_of_args'}) {
12605 if($self->number_of_args() >= $self->{'max_number_of_args'}) {
12606 last;
12610 if(($opt::m or $opt::X) and not $already_spread
12611 and $self->{'arg_queue'}->empty() and $Global::max_jobs_running) {
12612 # -m or -X and EOF => Spread the arguments over all jobslots
12613 # (unless they are already spread)
12614 $already_spread ||= 1;
12615 if($self->number_of_args() > 1) {
12616 $self->{'max_number_of_args'} =
12617 ::ceil($self->number_of_args()/$Global::max_jobs_running);
12618 $Global::JobQueue->{'commandlinequeue'}->{'max_number_of_args'} =
12619 $self->{'max_number_of_args'};
12620 $self->{'arg_queue'}->unget($self->pop_all());
12621 while($self->number_of_args() < $self->{'max_number_of_args'}) {
12622 $self->push($self->{'arg_queue'}->get());
12625 $Global::JobQueue->flush_total_jobs();
12628 if($opt::sqlmaster) {
12629 # Insert the V1..Vn for this $seq in SQL table
12630 # instead of generating one
12631 $Global::sql->insert_records($self->seq(), $self->{'command'},
12632 $self->{'arg_list_flat_orig'});
12637 sub push($) {
12638 # Add one or more records as arguments
12639 # Returns: N/A
12640 my $self = shift;
12641 my $record = shift;
12642 push @{$self->{'arg_list_flat_orig'}}, map { $_->orig() } @$record;
12643 push @{$self->{'arg_list_flat'}}, @$record;
12644 push @{$self->{'arg_list'}}, $record;
12645 # Make @arg available for {= =}
12646 *Arg::arg = $self->{'arg_list_flat_orig'};
12648 my $quote_arg = ($Global::quote_replace and not $Global::quoting);
12649 my $col;
12650 for my $perlexpr (keys %{$self->{'replacecount'}}) {
12651 if($perlexpr =~ /^(-?\d+)(?:\D.*|)$/) {
12652 # Positional replacement string
12653 # Deal with negative positional replacement string
12654 $col = ($1 < 0) ? $1 : $1-1;
12655 if(defined($record->[$col])) {
12656 $self->{'len'}{$perlexpr} +=
12657 length $record->[$col]->replace($perlexpr,$quote_arg,$self);
12659 } else {
12660 for my $arg (@$record) {
12661 if(defined $arg) {
12662 $self->{'len'}{$perlexpr} +=
12663 length $arg->replace($perlexpr,$quote_arg,$self);
12670 sub pop($) {
12671 # Remove last argument
12672 # Returns:
12673 # the last record
12674 my $self = shift;
12675 my $record = pop @{$self->{'arg_list'}};
12676 # pop off arguments from @$record
12677 splice @{$self->{'arg_list_flat_orig'}}, -($#$record+1), $#$record+1;
12678 splice @{$self->{'arg_list_flat'}}, -($#$record+1), $#$record+1;
12679 my $quote_arg = ($Global::quote_replace and not $Global::quoting);
12680 for my $perlexpr (keys %{$self->{'replacecount'}}) {
12681 if($perlexpr =~ /^(\d+) /) {
12682 # Positional
12683 defined($record->[$1-1]) or next;
12684 $self->{'len'}{$perlexpr} -=
12685 length $record->[$1-1]->replace($perlexpr,$quote_arg,$self);
12686 } else {
12687 for my $arg (@$record) {
12688 if(defined $arg) {
12689 $self->{'len'}{$perlexpr} -=
12690 length $arg->replace($perlexpr,$quote_arg,$self);
12695 return $record;
12698 sub pop_all($) {
12699 # Remove all arguments and zeros the length of replacement perlexpr
12700 # Returns:
12701 # all records
12702 my $self = shift;
12703 my @popped = @{$self->{'arg_list'}};
12704 for my $perlexpr (keys %{$self->{'replacecount'}}) {
12705 $self->{'len'}{$perlexpr} = 0;
12707 $self->{'arg_list'} = [];
12708 $self->{'arg_list_flat_orig'} = [undef];
12709 $self->{'arg_list_flat'} = [];
12710 return @popped;
12713 sub number_of_args($) {
12714 # The number of records
12715 # Returns:
12716 # number of records
12717 my $self = shift;
12718 # This is really the number of records
12719 return $#{$self->{'arg_list'}}+1;
12722 sub number_of_recargs($) {
12723 # The number of args in records
12724 # Returns:
12725 # number of args records
12726 my $self = shift;
12727 my $sum = 0;
12728 my $nrec = scalar @{$self->{'arg_list'}};
12729 if($nrec) {
12730 $sum = $nrec * (scalar @{$self->{'arg_list'}[0]});
12732 return $sum;
12735 sub args_as_string($) {
12736 # Returns:
12737 # all unmodified arguments joined with ' ' (similar to {})
12738 my $self = shift;
12739 return (join " ", map { $_->orig() }
12740 map { @$_ } @{$self->{'arg_list'}});
12743 sub results_out($) {
12744 sub max_file_name_length {
12745 # Figure out the max length of a subdir
12746 # TODO and the max total length
12747 # Ext4 = 255,130816
12748 # Uses:
12749 # $Global::max_file_length is set
12750 # Returns:
12751 # $Global::max_file_length
12752 my $testdir = shift;
12754 my $upper = 100_000_000;
12755 # Dir length of 8 chars is supported everywhere
12756 my $len = 8;
12757 my $dir = "d"x$len;
12758 do {
12759 rmdir($testdir."/".$dir);
12760 $len *= 16;
12761 $dir = "d"x$len;
12762 } while ($len < $upper and mkdir $testdir."/".$dir);
12763 # Then search for the actual max length between $len/16 and $len
12764 my $min = $len/16;
12765 my $max = $len;
12766 while($max-$min > 5) {
12767 # If we are within 5 chars of the exact value:
12768 # it is not worth the extra time to find the exact value
12769 my $test = int(($min+$max)/2);
12770 $dir = "d"x$test;
12771 if(mkdir $testdir."/".$dir) {
12772 rmdir($testdir."/".$dir);
12773 $min = $test;
12774 } else {
12775 $max = $test;
12778 $Global::max_file_length = $min;
12779 return $min;
12782 my $self = shift;
12783 my $out = $self->replace_placeholders([$opt::results],0,0);
12784 if($out eq $opt::results) {
12785 # $opt::results simple string: Append args_as_dirname
12786 my $args_as_dirname = $self->args_as_dirname(0);
12787 # Output in: prefix/name1/val1/name2/val2/stdout
12788 $out = $opt::results."/".$args_as_dirname;
12789 if(-d $out or eval{ File::Path::mkpath($out); }) {
12790 # OK
12791 } else {
12792 # mkpath failed: Argument too long or not quoted
12793 # Set $Global::max_file_length, which will keep the individual
12794 # dir names shorter than the max length
12795 max_file_name_length($opt::results);
12796 # Quote dirnames with +
12797 $args_as_dirname = $self->args_as_dirname(1);
12798 # prefix/name1/val1/name2/val2/
12799 $out = $opt::results."/".$args_as_dirname;
12800 File::Path::mkpath($out);
12802 $out .="/";
12803 } else {
12804 if($out =~ m:/$:s) {
12805 # / = dir
12806 if(-d $out or eval{ File::Path::mkpath($out); }) {
12807 # OK
12808 } else {
12809 ::error("Cannot make dir '$out'.");
12810 ::wait_and_exit(255);
12812 } else {
12813 $out =~ m:(.*)/:s;
12814 File::Path::mkpath($1);
12817 return $out;
12821 my %map;
12822 my %stringmap;
12823 my $sep;
12825 # test: '' . .. a. a.. + ++ 0..255 on fat12 ext4
12826 sub args_as_dirname($) {
12827 # Returns:
12828 # all arguments joined with '/' (similar to {})
12829 # Chars that are not safe on all file systems are quoted.
12830 sub init() {
12831 # ext4: / \t \n \0 \\ \r
12832 # fat: 0..31 " * / : < > ? \ | Maybe also: # [ ] ; = ,
12833 # exfat: 128..255
12834 # Other FS: , [ ] { } ( ) ! ; " ' * ? < > |
12836 # Quote these as:
12837 # + = ++
12838 # \0 = +0
12839 # \t = +t
12840 # \\ = +b (backslash)
12841 # \n = +n
12842 # \r = +r
12843 # / = +z (zlash)
12844 # ? = +y (whY?)
12845 # " = +d (double quote)
12846 # ' = +q (quote)
12847 # * = +a (asterisk)
12848 # < = +l (less than)
12849 # > = +g (greater than)
12850 # : = +k (kolon)
12851 # ! = +x (eXclamation)
12852 # | = +p (pipe)
12853 # # = +h (hash)
12854 # ; = +s (semicolon)
12855 # = = +e (equal)
12856 # , = +c (comma)
12857 # 1..32 128..255 = +XX (hex value)
12858 # [ ] = +e +f
12859 # ( ) = +i +j
12860 # { } = +v +w
12861 # Quote '' as +m (eMpty)
12862 # Quote . as +_
12863 # Quote .. as +__
12864 # (Unused: ou)
12865 %map = qw(
12866 + ++
12867 \0 +0
12868 \t +t
12869 \\ +b
12870 \n +n
12871 \r +r
12872 / +z
12873 ? +y
12874 " +d
12875 ' +q
12876 * +a
12877 < +l
12878 > +g
12879 : +k
12880 ! +x
12881 | +p
12882 # +h
12883 ; +s
12884 = +e
12885 , +c
12886 [ +e
12887 ( +i
12888 { +v
12889 ] +f
12890 ) +j
12891 } +w
12893 # 1..32 128..255 = +XX (hex value)
12894 map { $map{sprintf "%c",$_} = sprintf "+%02x",$_ } 1..32, 128..255;
12895 # Default value = itself
12896 map { $map{sprintf "%c",$_} ||= sprintf "%c",$_ } 0..255;
12897 # Quote '' as +m (eMpty)
12898 $stringmap{""} = "+m";
12899 # Quote . as +_
12900 $stringmap{"."} = "+_";
12901 # Quote .. as +__
12902 $stringmap{".."} = "+__";
12903 # Set dir separator
12904 eval 'use File::Spec; $sep = File::Spec->catfile("", "");';
12905 $sep ||= '/';
12907 # If $Global::max_file_length: Keep subdirs < $Global::max_file_length
12908 my $self = shift;
12909 my $quote = shift;
12910 my @res = ();
12911 if(not $sep) { init(); }
12913 for my $rec_ref (@{$self->{'arg_list'}}) {
12914 # If headers are used, sort by them.
12915 # Otherwise keep the order from the command line.
12916 my @header_indexes_sorted = header_indexes_sorted($#$rec_ref+1);
12917 for my $n (@header_indexes_sorted) {
12918 CORE::push(@res,
12919 $Global::input_source_header{$n},
12920 $quote ?
12922 grep { $_ ne "\0noarg" } map {
12923 my $s = $_;
12924 # Quote + as ++
12925 $s =~ s/(.)/$map{$1}/gs;
12926 if($Global::max_file_length) {
12927 # Keep each subdir shorter than the longest
12928 # allowed file name
12929 $s = substr($s,0,$Global::max_file_length);
12931 $s; }
12932 $rec_ref->[$n-1]->orig()
12935 grep { $_ ne "\0noarg" } map {
12936 my $s = $_;
12937 # Quote / as +z and + as ++
12938 $s =~ s/($sep|\+)/$map{$1}/gos;
12939 if($Global::max_file_length) {
12940 # Keep each subdir shorter than the longest
12941 # allowed file name
12942 $s = substr($s,0,$Global::max_file_length);
12944 $s; }
12945 $rec_ref->[$n-1]->orig()
12950 return join $sep, map { $stringmap{$_} || $_ } @res;
12954 sub header_indexes_sorted($) {
12955 # Sort headers first by number then by name.
12956 # E.g.: 1a 1b 11a 11b
12957 # Returns:
12958 # Indexes of %Global::input_source_header sorted
12959 my $max_col = shift;
12961 no warnings 'numeric';
12962 for my $col (1 .. $max_col) {
12963 # Make sure the header is defined. If it is not: use column number
12964 if(not defined $Global::input_source_header{$col}) {
12965 $Global::input_source_header{$col} = $col;
12968 my @header_indexes_sorted = sort {
12969 # Sort headers numerically then asciibetically
12970 $Global::input_source_header{$a} <=> $Global::input_source_header{$b}
12972 $Global::input_source_header{$a} cmp $Global::input_source_header{$b}
12973 } 1 .. $max_col;
12974 return @header_indexes_sorted;
12977 sub len($) {
12978 # Uses:
12979 # @opt::shellquote
12980 # The length of the command line with args substituted
12981 my $self = shift;
12982 my $len = 0;
12983 # Add length of the original command with no args
12984 # Length of command w/ all replacement args removed
12985 $len += $self->{'len'}{'noncontext'} + @{$self->{'command'}} -1;
12986 ::debug("length", "noncontext + command: $len\n");
12987 # MacOS has an overhead of 8 bytes per argument
12988 my $darwin = ($^O eq "darwin") ? 8 : 0;
12989 my $recargs = $self->number_of_recargs();
12990 if($self->{'context_replace'}) {
12991 # Context is duplicated for each arg
12992 $len += $recargs * $self->{'len'}{'context'};
12993 for my $replstring (keys %{$self->{'replacecount'}}) {
12994 # If the replacements string is more than once: mulitply its length
12995 $len += $self->{'len'}{$replstring} *
12996 $self->{'replacecount'}{$replstring};
12997 ::debug("length", $replstring, " ", $self->{'len'}{$replstring}, "*",
12998 $self->{'replacecount'}{$replstring}, "\n");
13000 # echo 11 22 33 44 55 66 77 88 99 1010
13001 # echo 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10
13002 # 5 + ctxgrp*arg
13003 ::debug("length", "Ctxgrp: ", $self->{'len'}{'contextgroups'},
13004 " Groups: ", $self->{'len'}{'noncontextgroups'}, "\n");
13005 # Add space between context groups
13006 $len += ($recargs-1) * ($self->{'len'}{'contextgroups'});
13007 if($darwin) {
13008 $len += $recargs * $self->{'len'}{'contextgroups'} * $darwin;
13010 } else {
13011 # Each replacement string may occur several times
13012 # Add the length for each time
13013 $len += 1*$self->{'len'}{'context'};
13014 ::debug("length", "context+noncontext + command: $len\n");
13015 for my $replstring (keys %{$self->{'replacecount'}}) {
13016 # (space between recargs + length of replacement)
13017 # * number this replacement is used
13018 $len += ($recargs -1 + $self->{'len'}{$replstring}) *
13019 $self->{'replacecount'}{$replstring};
13020 if($darwin) {
13021 $len += ($recargs * $self->{'replacecount'}{$replstring}
13022 * $darwin);
13026 if(defined $Global::parallel_env) {
13027 # If we are using --env, add the prefix for that, too.
13028 $len += length $Global::parallel_env;
13030 if($Global::quoting) {
13031 # Pessimistic length if -q is set
13032 # Worse than worst case: ' => "'" + " => '"'
13033 # TODO can we count the number of expanding chars?
13034 # and count them in arguments, too?
13035 $len *= 3;
13037 if(@opt::shellquote) {
13038 # Pessimistic length if --shellquote is set
13039 # Worse than worst case: ' => "'"
13040 for(@opt::shellquote) {
13041 $len *= 3;
13043 $len *= 5;
13045 if(@opt::sshlogin) {
13046 # Pessimistic length if remote
13047 # Worst case is BASE64 encoding 3 bytes -> 4 bytes
13048 $len = int($len*4/3);
13050 return $len;
13053 sub replaced($) {
13054 # Uses:
13055 # $Global::quote_replace
13056 # $Global::quoting
13057 # Returns:
13058 # $replaced = command with place holders replaced and prepended
13059 my $self = shift;
13060 if(not defined $self->{'replaced'}) {
13061 # Don't quote arguments if the input is the full command line
13062 my $quote_arg = ($Global::quote_replace and not $Global::quoting);
13063 # or if ($opt::cat or $opt::pipe) as they use $PARALLEL_TMP
13064 $quote_arg = ($opt::cat || $opt::fifo) ? 0 : $quote_arg;
13065 $self->{'replaced'} = $self->
13066 replace_placeholders($self->{'command'},$Global::quoting,
13067 $quote_arg);
13068 my $len = length $self->{'replaced'};
13069 if ($len != $self->len()) {
13070 ::debug("length", $len, " != ", $self->len(),
13071 " ", $self->{'replaced'}, "\n");
13072 } else {
13073 ::debug("length", $len, " == ", $self->len(),
13074 " ", $self->{'replaced'}, "\n");
13077 return $self->{'replaced'};
13080 sub replace_placeholders($$$$) {
13081 # Replace foo{}bar with fooargbar
13082 # Input:
13083 # $targetref = command as shell words
13084 # $quote = should everything be quoted?
13085 # $quote_arg = should replaced arguments be quoted?
13086 # Uses:
13087 # @Arg::arg = arguments as strings to be use in {= =}
13088 # Returns:
13089 # @target with placeholders replaced
13090 my $self = shift;
13091 my $targetref = shift;
13092 my $quote = shift;
13093 my $quote_arg = shift;
13094 my %replace;
13096 # Token description:
13097 # \0spc = unquoted space
13098 # \0end = last token element
13099 # \0ign = dummy token to be ignored
13100 # \257<...\257> = replacement expression
13101 # " " = quoted space, that splits -X group
13102 # text = normal text - possibly part of -X group
13103 my $spacer = 0;
13104 my @tokens = grep { length $_ > 0 } map {
13105 if(/^\257<|^ $/) {
13106 # \257<...\257> or space
13108 } else {
13109 # Split each space/tab into a token
13110 split /(?=\s)|(?<=\s)/
13113 # Split \257< ... \257> into own token
13114 map { split /(?=\257<)|(?<=\257>)/ }
13115 # Insert "\0spc" between every element
13116 # This space should never be quoted
13117 map { $spacer++ ? ("\0spc",$_) : $_ }
13118 map { $_ eq "" ? "\0empty" : $_ }
13119 @$targetref;
13121 if(not @tokens) {
13122 # @tokens is empty: Return empty array
13123 return @tokens;
13125 ::debug("replace", "Tokens ".join":",@tokens,"\n");
13126 # Make it possible to use $arg[2] in {= =}
13127 *Arg::arg = $self->{'arg_list_flat_orig'};
13128 # Flat list:
13129 # $self->{'arg_list'} = [ [Arg11, Arg12], [Arg21, Arg22], [Arg31, Arg32] ]
13130 # $self->{'arg_list_flat'} = [ Arg11, Arg12, Arg21, Arg22, Arg31, Arg32 ]
13131 if(not @{$self->{'arg_list_flat'}}) {
13132 @{$self->{'arg_list_flat'}} = Arg->new("");
13134 my $argref = $self->{'arg_list_flat'};
13135 # Number of arguments - used for positional arguments
13136 my $n = $#$argref+1;
13138 # $self is actually a CommandLine-object,
13139 # but it looks nice to be able to say {= $job->slot() =}
13140 my $job = $self;
13141 # @replaced = tokens with \257< \257> replaced
13142 my @replaced;
13143 if($self->{'context_replace'}) {
13144 my @ctxgroup;
13145 for my $t (@tokens,"\0end") {
13146 # \0end = last token was end of tokens.
13147 if($t eq "\t" or $t eq " " or $t eq "\0end" or $t eq "\0spc") {
13148 # Context group complete: Replace in it
13149 if(grep { /^\257</ } @ctxgroup) {
13150 # Context group contains a replacement string:
13151 # Copy once per arg
13152 my $space = "\0ign";
13153 for my $arg (@$argref) {
13154 my $normal_replace;
13155 # Push output
13156 # Put unquoted space before each context group
13157 # except the first
13158 CORE::push @replaced, $space, map {
13159 $a = $_;
13160 if($a =~
13161 s{\257<(-?\d+)?(.*)\257>}
13163 if($1) {
13164 # Positional replace
13165 # Find the relevant arg and replace it
13166 ($argref->[$1 > 0 ? $1-1 : $n+$1] ? # If defined: replace
13167 $argref->[$1 > 0 ? $1-1 : $n+$1]->
13168 replace($2,$quote_arg,$self)
13169 : "");
13170 } else {
13171 # Normal replace
13172 $normal_replace ||= 1;
13173 ($arg ? $arg->replace($2,$quote_arg,$self) : "");
13175 }sgxe) {
13176 # Token is \257<..\257>
13177 } else {
13178 if($Global::escape_string_present) {
13179 # Command line contains \257:
13180 # Unescape it \257\256 => \257
13181 $a =~ s/\257\256/\257/g;
13185 } @ctxgroup;
13186 $normal_replace or last;
13187 $space = "\0spc";
13189 } else {
13190 # Context group has no a replacement string: Copy it once
13191 CORE::push @replaced, map {
13192 $Global::escape_string_present and s/\257\256/\257/g; $_;
13193 } @ctxgroup;
13195 # New context group
13196 @ctxgroup=();
13198 if($t eq "\0spc" or $t eq " ") {
13199 CORE::push @replaced,$t;
13200 } else {
13201 CORE::push @ctxgroup,$t;
13204 } else {
13205 # @group = @token
13206 # Replace in group
13207 # Push output
13208 # repquote = no if {} first on line, no if $quote, yes otherwise
13209 for my $t (@tokens) {
13210 if($t =~ /^\257</) {
13211 my $space = "\0ign";
13212 for my $arg (@$argref) {
13213 my $normal_replace;
13214 $a = $t;
13215 $a =~
13216 s{\257<(-?\d+)?(.*)\257>}
13218 if($1) {
13219 # Positional replace
13220 # Find the relevant arg and replace it
13221 ($argref->[$1 > 0 ? $1-1 : $n+$1] ?
13222 # If defined: replace
13223 $argref->[$1 > 0 ? $1-1 : $n+$1]->
13224 replace($2,$quote_arg,$self)
13225 : "");
13226 } else {
13227 # Normal replace
13228 $normal_replace ||= 1;
13229 ($arg ? $arg->replace($2,$quote_arg,$self) : "");
13231 }sgxe;
13232 CORE::push @replaced, $space, $a;
13233 $normal_replace or last;
13234 $space = "\0spc";
13236 } else {
13237 # No replacement
13238 CORE::push @replaced, map {
13239 $Global::escape_string_present and s/\257\256/\257/g; $_;
13240 } $t;
13244 *Arg::arg = [];
13245 ::debug("replace","Replaced: ".join":",@replaced,"\n");
13247 # Put tokens into groups that may be quoted.
13248 my @quotegroup;
13249 my @quoted;
13250 for (map { $_ eq "\0empty" ? "" : $_ }
13251 grep { $_ ne "\0ign" and $_ ne "\0noarg" and $_ ne "'\0noarg'" }
13252 @replaced, "\0end") {
13253 if($_ eq "\0spc" or $_ eq "\0end") {
13254 # \0spc splits quotable groups
13255 if($quote) {
13256 if(@quotegroup) {
13257 CORE::push @quoted, ::Q(join"",@quotegroup);;
13259 } else {
13260 CORE::push @quoted, join"",@quotegroup;
13262 @quotegroup = ();
13263 } else {
13264 CORE::push @quotegroup, $_;
13267 ::debug("replace","Quoted: ".join":",@quoted,"\n");
13268 return wantarray ? @quoted : "@quoted";
13271 sub skip($) {
13272 # Skip this job
13273 my $self = shift;
13274 $self->{'skip'} = 1;
13278 package CommandLineQueue;
13280 sub new($) {
13281 my $class = shift;
13282 my $commandref = shift;
13283 my $read_from = shift;
13284 my $context_replace = shift || 0;
13285 my $max_number_of_args = shift;
13286 my $transfer_files = shift;
13287 my $return_files = shift;
13288 my $template_names = shift;
13289 my $template_contents = shift;
13290 my @unget = ();
13291 my $posrpl;
13292 my ($replacecount_ref, $len_ref);
13293 my @command = @$commandref;
13294 my $seq = 1;
13295 # Replace replacement strings with {= perl expr =}
13296 # '{=' 'perlexpr' '=}' => '{= perlexpr =}'
13297 @command = merge_rpl_parts(@command);
13299 # Protect matching inside {= perl expr =}
13300 # by replacing {= and =} with \257< and \257>
13301 # in options that can contain replacement strings:
13302 # @command, --transferfile, --return,
13303 # --tagstring, --workdir, --results
13304 for(@command, @$transfer_files, @$return_files,
13305 @$template_names, @$template_contents,
13306 $opt::tagstring, $opt::workdir, $opt::results, $opt::retries,
13307 @opt::filter) {
13308 # Skip if undefined
13309 defined($_) or next;
13310 # Escape \257 => \257\256
13311 $Global::escape_string_present += s/\257/\257\256/g;
13312 # Needs to match rightmost left parens (Perl defaults to leftmost)
13313 # to deal with: {={==} and {={==}=}
13314 # Replace {= -> \257< and =} -> \257>
13316 # Complex way to do:
13317 # s/{=(.*)=}/\257<$1\257>/g
13318 # which would not work
13319 s[\Q$Global::parensleft\E # Match {=
13320 # Match . unless the next string is {= or =}
13321 # needed to force matching the shortest {= =}
13322 ((?:(?! \Q$Global::parensleft\E|\Q$Global::parensright\E ).)*?)
13323 \Q$Global::parensright\E ] # Match =}
13324 {\257<$1\257>}gxs;
13325 for my $rpl (sort { length $b <=> length $a } keys %Global::rpl) {
13326 # Replace long --rpl's before short ones, as a short may be a
13327 # substring of a long:
13328 # --rpl '% s/a/b/' --rpl '%% s/b/a/'
13330 # Replace the shorthand string (--rpl)
13331 # with the {= perl expr =}
13333 # Avoid searching for shorthand strings inside existing {= perl expr =}
13335 # Replace $$1 in {= perl expr =} with groupings in shorthand string
13337 # --rpl '{/(\.\S+)/(\.\S+)} s/$$1/$$2/g;'
13338 # echo {/.tar/.gz} ::: UU.tar.gz
13339 my ($prefix,$grp_regexp,$postfix) =
13340 $rpl =~ /^( [^(]* ) # Prefix - e.g. {%%
13341 ( \(.*\) )? # Group capture regexp - e.g (.*)
13342 ( [^)]* )$ # Postfix - e.g }
13343 /xs;
13344 $grp_regexp ||= '';
13345 my $rplval = $Global::rpl{$rpl};
13346 while(s{( (?: ^|\257> ) (?: [^\257]*|[\257][^<>] )*? )
13347 # Don't replace after \257 unless \257>
13348 \Q$prefix\E $grp_regexp \Q$postfix\E}
13350 # The start remains the same
13351 my $unchanged = $1;
13352 # Dummy entry to start at 1.
13353 my @grp = (1);
13354 # $2 = first ()-group in $grp_regexp
13355 # Put $2 in $grp[1], Put $3 in $grp[2]
13356 # so first ()-group in $grp_regexp is $grp[1];
13357 for(my $i = 2; defined $grp[$#grp]; $i++) {
13358 push @grp, eval '$'.$i;
13360 my $rv = $rplval;
13361 # replace $$1 with $_pAr_gRp1, $$2 with $_pAr_gRp2
13362 # in the code to be executed
13363 $rv =~ s/\$\$ (\d+)/\$_pAr_gRp$1/gx;
13364 # prepend with $_pAr_gRp1 = perlquote($1),
13365 my $set_args = "";
13366 for(my $i = 1;defined $grp[$i]; $i++) {
13367 $set_args .= "\$_pAr_gRp$i = \"" .
13368 ::perl_quote_scalar($grp[$i]) . "\";";
13370 $unchanged . "\257<" . $set_args . $rv . "\257>"
13371 }gxes) {
13373 # Do the same for the positional replacement strings
13374 $posrpl = $rpl;
13375 if($posrpl =~ s/^\{//) {
13376 # Only do this if the shorthand start with {
13377 $prefix=~s/^\{//;
13378 # Don't replace after \257 unless \257>
13379 while(s{( (?: ^|\257> ) (?: [^\257]*|[\257][^<>] )*? )
13380 \{(-?\d+) \s* \Q$prefix\E $grp_regexp \Q$postfix\E}
13382 # The start remains the same
13383 my $unchanged = $1;
13384 my $position = $2;
13385 # Dummy entry to start at 1.
13386 my @grp = (1);
13387 # $3 = first ()-group in $grp_regexp
13388 # Put $3 in $grp[1], Put $4 in $grp[2]
13389 # so first ()-group in $grp_regexp is $grp[1];
13390 for(my $i = 3; defined $grp[$#grp]; $i++) {
13391 push @grp, eval '$'.$i;
13393 my $rv = $rplval;
13394 # replace $$1 with $_pAr_gRp1, $$2 with $_pAr_gRp2
13395 # in the code to be executed
13396 $rv =~ s/\$\$ (\d+)/\$_pAr_gRp$1/gx;
13397 # prepend with $_pAr_gRp1 = perlquote($1),
13398 my $set_args = "";
13399 for(my $i = 1;defined $grp[$i]; $i++) {
13400 $set_args .= "\$_pAr_gRp$i = \"" .
13401 ::perl_quote_scalar($grp[$i]) . "\";";
13403 $unchanged . "\257<" . $position . $set_args . $rv . "\257>"
13404 }gxes) {
13409 # Add {} if no replacement strings in @command
13410 ($replacecount_ref, $len_ref, @command) =
13411 replacement_counts_and_lengths($transfer_files, $return_files,
13412 $template_names, $template_contents,
13413 @command);
13414 if("@command" =~ /^[^ \t\n=]*\257</) {
13415 # Replacement string is (part of) the command (and not just
13416 # argument or variable definition V1={})
13417 # E.g. parallel {}, parallel my_{= s/_//=}, parallel {2}
13418 # Do no quote (Otherwise it will fail if the input contains spaces)
13419 $Global::quote_replace = 0;
13422 if($opt::sqlmaster and $Global::sql->append()) {
13423 $seq = $Global::sql->max_seq() + 1;
13426 return bless {
13427 ('unget' => \@unget,
13428 'command' => \@command,
13429 'replacecount' => $replacecount_ref,
13430 'arg_queue' => RecordQueue->new($read_from,$opt::colsep),
13431 'context_replace' => $context_replace,
13432 'len' => $len_ref,
13433 'max_number_of_args' => $max_number_of_args,
13434 'size' => undef,
13435 'transfer_files' => $transfer_files,
13436 'return_files' => $return_files,
13437 'template_names' => $template_names,
13438 'template_contents' => $template_contents,
13439 'seq' => $seq,
13441 }, ref($class) || $class;
13444 sub merge_rpl_parts($) {
13445 # '{=' 'perlexpr' '=}' => '{= perlexpr =}'
13446 # Input:
13447 # @in = the @command as given by the user
13448 # Uses:
13449 # $Global::parensleft
13450 # $Global::parensright
13451 # Returns:
13452 # @command with parts merged to keep {= and =} as one
13453 my @in = @_;
13454 my @out;
13455 my $l = quotemeta($Global::parensleft);
13456 my $r = quotemeta($Global::parensright);
13458 while(@in) {
13459 my $s = shift @in;
13460 $_ = $s;
13461 # Remove matching (right most) parens
13462 while(s/(.*)$l.*?$r/$1/os) {}
13463 if(/$l/o) {
13464 # Missing right parens
13465 while(@in) {
13466 $s .= " ".shift @in;
13467 $_ = $s;
13468 while(s/(.*)$l.*?$r/$1/os) {}
13469 if(not /$l/o) {
13470 last;
13474 push @out, $s;
13476 return @out;
13479 sub replacement_counts_and_lengths($$@) {
13480 # Count the number of different replacement strings.
13481 # Find the lengths of context for context groups and non-context
13482 # groups.
13483 # If no {} found in @command: add it to @command
13485 # Input:
13486 # \@transfer_files = array of filenames to transfer
13487 # \@return_files = array of filenames to return
13488 # \@template_names = array of names to copy to
13489 # \@template_contents = array of contents to write
13490 # @command = command template
13491 # Output:
13492 # \%replacecount, \%len, @command
13493 my $transfer_files = shift;
13494 my $return_files = shift;
13495 my $template_names = shift;
13496 my $template_contents = shift;
13497 my @command = @_;
13498 my (%replacecount,%len);
13499 my $sum = 0;
13500 while($sum == 0) {
13501 # Count how many times each replacement string is used
13502 my @cmd = @command;
13503 my $contextlen = 0;
13504 my $noncontextlen = 0;
13505 my $contextgroups = 0;
13506 for my $c (@cmd) {
13507 while($c =~ s/ \257<( (?: [^\257]*|[\257][^<>] )*?)\257> /\000/xs) {
13508 # %replacecount = { "perlexpr" => number of times seen }
13509 # e.g { "s/a/b/" => 2 }
13510 $replacecount{$1}++;
13511 $sum++;
13513 # Measure the length of the context around the {= perl expr =}
13514 # Use that {=...=} has been replaced with \000 above
13515 # So there is no need to deal with \257<
13516 while($c =~ s/ (\S*\000\S*) //xs) {
13517 my $w = $1;
13518 $w =~ tr/\000//d; # Remove all \000's
13519 $contextlen += length($w);
13520 $contextgroups++;
13522 # All {= perl expr =} have been removed: The rest is non-context
13523 $noncontextlen += length $c;
13525 for(@$transfer_files, @$return_files,
13526 @$template_names, @$template_contents,
13527 @opt::filter,
13528 $opt::tagstring, $opt::workdir, $opt::results, $opt::retries) {
13529 # Options that can contain replacement strings
13530 defined($_) or next;
13531 my $t = $_;
13532 while($t =~ s/ \257<( (?: [^\257]*|[\257][^<>] )* )\257> //xs) {
13533 # %replacecount = { "perlexpr" => number of times seen }
13534 # e.g { "$_++" => 2 }
13535 # But for tagstring we just need to mark it as seen
13536 $replacecount{$1} ||= 1;
13539 if($opt::bar) {
13540 # If the command does not contain {} force it to be computed
13541 # as it is being used by --bar
13542 $replacecount{""} ||= 1;
13545 $len{'context'} = 0+$contextlen;
13546 $len{'noncontext'} = $noncontextlen;
13547 $len{'contextgroups'} = $contextgroups;
13548 $len{'noncontextgroups'} = @cmd-$contextgroups;
13549 ::debug("length", "@command Context: ", $len{'context'},
13550 " Non: ", $len{'noncontext'}, " Ctxgrp: ", $len{'contextgroups'},
13551 " NonCtxGrp: ", $len{'noncontextgroups'}, "\n");
13552 if($sum == 0) {
13553 if(not @command) {
13554 # Default command = {}
13555 @command = ("\257<\257>");
13556 } elsif(($opt::pipe or $opt::pipepart)
13557 and not $opt::fifo and not $opt::cat) {
13558 # With --pipe / --pipe-part you can have no replacement
13559 last;
13560 } else {
13561 # Append {} to the command if there are no {...}'s and no {=...=}
13562 push @command, ("\257<\257>");
13566 return(\%replacecount,\%len,@command);
13569 sub get($) {
13570 my $self = shift;
13571 if(@{$self->{'unget'}}) {
13572 my $cmd_line = shift @{$self->{'unget'}};
13573 return ($cmd_line);
13574 } else {
13575 if($opt::sqlworker) {
13576 # Get the sequence number from the SQL table
13577 $self->set_seq($SQL::next_seq);
13578 # Get the command from the SQL table
13579 $self->{'command'} = $SQL::command_ref;
13580 my @command;
13581 # Recompute replace counts based on the read command
13582 ($self->{'replacecount'},
13583 $self->{'len'}, @command) =
13584 replacement_counts_and_lengths($self->{'transfer_files'},
13585 $self->{'return_files'},
13586 $self->{'template_name'},
13587 $self->{'template_contents'},
13588 @$SQL::command_ref);
13589 if("@command" =~ /^[^ \t\n=]*\257</) {
13590 # Replacement string is (part of) the command (and not just
13591 # argument or variable definition V1={})
13592 # E.g. parallel {}, parallel my_{= s/_//=}, parallel {2}
13593 # Do no quote (Otherwise it will fail if the input contains spaces)
13594 $Global::quote_replace = 0;
13598 my $cmd_line = CommandLine->new($self->seq(),
13599 $self->{'command'},
13600 $self->{'arg_queue'},
13601 $self->{'context_replace'},
13602 $self->{'max_number_of_args'},
13603 $self->{'transfer_files'},
13604 $self->{'return_files'},
13605 $self->{'template_names'},
13606 $self->{'template_contents'},
13607 $self->{'replacecount'},
13608 $self->{'len'},
13610 $cmd_line->populate();
13611 ::debug("run","cmd_line->number_of_args ",
13612 $cmd_line->number_of_args(), "\n");
13613 if(not $Global::no_more_input and ($opt::pipe or $opt::pipepart)) {
13614 if($cmd_line->replaced() eq "") {
13615 # Empty command - pipe requires a command
13616 ::error("--pipe/--pipepart must have a command to pipe into ".
13617 "(e.g. 'cat').");
13618 ::wait_and_exit(255);
13620 } elsif($cmd_line->number_of_args() == 0) {
13621 # We did not get more args - maybe at EOF string?
13622 return undef;
13624 $self->set_seq($self->seq()+1);
13625 return $cmd_line;
13629 sub unget($) {
13630 my $self = shift;
13631 unshift @{$self->{'unget'}}, @_;
13634 sub empty($) {
13635 my $self = shift;
13636 my $empty = (not @{$self->{'unget'}}) &&
13637 $self->{'arg_queue'}->empty();
13638 ::debug("run", "CommandLineQueue->empty $empty");
13639 return $empty;
13642 sub seq($) {
13643 my $self = shift;
13644 return $self->{'seq'};
13647 sub set_seq($$) {
13648 my $self = shift;
13649 $self->{'seq'} = shift;
13652 sub quote_args($) {
13653 my $self = shift;
13654 # If there is not command emulate |bash
13655 return $self->{'command'};
13659 package Limits::Command;
13661 # Maximal command line length (for -m and -X)
13662 sub max_length($) {
13663 # Find the max_length of a command line and cache it
13664 # Returns:
13665 # number of chars on the longest command line allowed
13666 if(not $Limits::Command::line_max_len) {
13667 # Disk cache of max command line length
13668 my $len_cache = $Global::cache_dir . "/tmp/sshlogin/" . ::hostname() .
13669 "/linelen";
13670 my $cached_limit;
13671 local $/ = undef;
13672 if(open(my $fh, "<", $len_cache)) {
13673 $cached_limit = <$fh>;
13674 $cached_limit || ::warning("Invalid content in $len_cache");
13675 close $fh;
13677 if(not $cached_limit) {
13678 $cached_limit = real_max_length();
13679 # If $HOME is write protected: Do not fail
13680 my $dir = ::dirname($len_cache);
13681 -d $dir or eval { File::Path::mkpath($dir); };
13682 open(my $fh, ">", $len_cache.$$);
13683 print $fh $cached_limit;
13684 close $fh;
13685 rename $len_cache.$$, $len_cache || ::die_bug("rename cache file");
13687 $Limits::Command::line_max_len = tmux_length($cached_limit);
13689 return int($Limits::Command::line_max_len);
13692 sub real_max_length() {
13693 # Find the max_length of a command line
13694 # Returns:
13695 # The maximal command line length with 1 byte arguments
13696 # return find_max(" c");
13697 return find_max("c");
13700 sub find_max($) {
13701 my $string = shift;
13702 # This is slow on Cygwin, so give Cygwin users a warning
13703 if($^O eq "cygwin" or $^O eq "msys") {
13704 ::warning("Finding the maximal command line length. ".
13705 "This may take up to 1 minute.")
13707 # Use an upper bound of 100 MB if the shell allows for infinite
13708 # long lengths
13709 my $upper = 100_000_000;
13710 my $lower;
13711 # 1000 is supported everywhere, so the search can start anywhere 1..999
13712 # 324 makes the search much faster on Cygwin, so let us use that
13713 my $len = 324;
13714 do {
13715 if($len > $upper) { return $len };
13716 $lower = $len;
13717 $len *= 4;
13718 ::debug("init", "Maxlen: $lower<$len<$upper(".($upper-$lower)."): ");
13719 } while (is_acceptable_command_line_length($len,$string));
13720 # Then search for the actual max length between
13721 # last successful length ($len/16) and upper bound
13722 return binary_find_max(int($len/16),$len,$string);
13726 # Prototype forwarding
13727 sub binary_find_max($$$);
13728 sub binary_find_max($$$) {
13729 # Given a lower and upper bound find the max (length or args) of a
13730 # command line
13731 # Returns:
13732 # number of chars on the longest command line allowed
13733 my ($lower, $upper, $string) = (@_);
13734 if($lower == $upper
13735 or $lower == $upper-1
13736 or $lower/$upper > 0.99) {
13737 # $lower is +- 1 or within 1%: Don't search more
13738 return $lower;
13740 # Unevenly split binary search which is faster for Microsoft Windows.
13741 # Guessing too high is cheap. Guessing too low is expensive.
13742 my $split = ($^O eq "cygwin" or $^O eq "msys") ? 0.93 : 0.5;
13743 my $middle = int (($upper-$lower)*$split + $lower);
13744 ::debug("init", "Maxlen: $lower<$middle<$upper(".($upper-$lower)."): ");
13745 if (is_acceptable_command_line_length($middle,$string)) {
13746 return binary_find_max($middle,$upper,$string);
13747 } else {
13748 return binary_find_max($lower,$middle,$string);
13753 my $prg;
13755 sub is_acceptable_command_line_length($$) {
13756 # Test if a command line of this length can run
13757 # in the current environment
13758 # If the string is " x" it tests how many args are allowed
13759 # Returns:
13760 # 0 if the command line length is too long
13761 # 1 otherwise
13762 my $len = shift;
13763 my $string = shift;
13764 if($Global::parallel_env) {
13765 $len += length $Global::parallel_env;
13767 # Force using non-built-in command
13768 $prg ||= ::which("echo");
13769 my $l = length ::qqx("$prg ".${string}x(($len-1-length $prg)/length $string));
13770 if($l < $len/2) {
13771 # The command returned OK, but did not output $len chars
13772 # => this failed (Centos3 does this craziness)
13773 return 0
13775 ::debug("init", "$len=$?\n");
13776 return not $?;
13780 sub tmux_length($) {
13781 # If $opt::tmux set, find the limit for tmux
13782 # tmux 1.8 has a 2kB limit
13783 # tmux 1.9 has a 16kB limit
13784 # tmux 2.0 has a 16kB limit
13785 # tmux 2.1 has a 16kB limit
13786 # tmux 2.2 has a 16kB limit
13787 # Input:
13788 # $len = maximal command line length
13789 # Returns:
13790 # $tmux_len = maximal length runable in tmux
13791 local $/ = "\n";
13792 my $len = shift;
13793 if($opt::tmux) {
13794 $ENV{'PARALLEL_TMUX'} ||= "tmux";
13795 if(not ::which($ENV{'PARALLEL_TMUX'})) {
13796 ::error($ENV{'PARALLEL_TMUX'}." not found in \$PATH.");
13797 ::wait_and_exit(255);
13799 my @out;
13800 for my $l (1, 2020, 16320, 30000, $len) {
13801 my $tmpfile = ::tmpname("tms");
13802 my $qtmp = ::Q($tmpfile);
13803 my $tmuxcmd = $ENV{'PARALLEL_TMUX'}.
13804 " -S $qtmp new-session -d -n echo $l".
13805 ("t"x$l). " && echo $l; rm -f $qtmp";
13806 push @out, ::qqx($tmuxcmd);
13807 ::rm($tmpfile);
13809 ::debug("tmux","tmux-out ",@out);
13810 chomp @out;
13811 # The arguments is given 3 times on the command line
13812 # and the tmux wrapping is around 30 chars
13813 # (29 for tmux1.9, 33 for tmux1.8)
13814 my $tmux_len = ::max(@out);
13815 $len = ::min($len,int($tmux_len/4-33));
13816 ::debug("tmux","tmux-length ",$len);
13818 return $len;
13822 package RecordQueue;
13824 sub new($) {
13825 my $class = shift;
13826 my $fhs = shift;
13827 my $colsep = shift;
13828 my @unget = ();
13829 my $arg_sub_queue;
13830 if($opt::sqlworker) {
13831 # Open SQL table
13832 $arg_sub_queue = SQLRecordQueue->new();
13833 } elsif(defined $colsep) {
13834 # Open one file with colsep or CSV
13835 $arg_sub_queue = RecordColQueue->new($fhs);
13836 } else {
13837 # Open one or more files if multiple -a
13838 $arg_sub_queue = MultifileQueue->new($fhs);
13840 return bless {
13841 'unget' => \@unget,
13842 'arg_number' => 0,
13843 'arg_sub_queue' => $arg_sub_queue,
13844 }, ref($class) || $class;
13847 sub get($) {
13848 # Returns:
13849 # reference to array of Arg-objects
13850 my $self = shift;
13851 if(@{$self->{'unget'}}) {
13852 $self->{'arg_number'}++;
13853 # Flush cached computed replacements in Arg-objects
13854 # To fix: parallel --bar echo {%} ::: a b c ::: d e f
13855 my $ret = shift @{$self->{'unget'}};
13856 if($ret) {
13857 map { $_->flush_cache() } @$ret;
13859 return $ret;
13861 my $ret = $self->{'arg_sub_queue'}->get();
13862 if($ret) {
13863 if(grep { index($_->orig(),"\0") > 0 } @$ret) {
13864 # Allow for \0 in position 0 because GNU Parallel uses "\0noarg"
13865 # to mean no-string
13866 ::warning("A NUL character in the input was replaced with \\0.",
13867 "NUL cannot be passed through in the argument list.",
13868 "Did you mean to use the --null option?");
13869 for(grep { index($_->orig(),"\0") > 0 } @$ret) {
13870 # Replace \0 with \\0
13871 my $a = $_->orig();
13872 $a =~ s/\0/\\0/g;
13873 $_->set_orig($a);
13876 if(defined $Global::max_number_of_args
13877 and $Global::max_number_of_args == 0) {
13878 ::debug("run", "Read 1 but return 0 args\n");
13879 # \0noarg => nothing (not the empty string)
13880 map { $_->set_orig("\0noarg"); } @$ret;
13882 # Flush cached computed replacements in Arg-objects
13883 # To fix: parallel --bar echo {%} ::: a b c ::: d e f
13884 map { $_->flush_cache() } @$ret;
13886 return $ret;
13889 sub unget($) {
13890 my $self = shift;
13891 ::debug("run", "RecordQueue-unget\n");
13892 $self->{'arg_number'} -= @_;
13893 unshift @{$self->{'unget'}}, @_;
13896 sub empty($) {
13897 my $self = shift;
13898 my $empty = (not @{$self->{'unget'}}) &&
13899 $self->{'arg_sub_queue'}->empty();
13900 ::debug("run", "RecordQueue->empty $empty");
13901 return $empty;
13904 sub flush_cache($) {
13905 my $self = shift;
13906 for my $record (@{$self->{'unget'}}) {
13907 for my $arg (@$record) {
13908 $arg->flush_cache();
13911 $self->{'arg_sub_queue'}->flush_cache();
13914 sub arg_number($) {
13915 my $self = shift;
13916 return $self->{'arg_number'};
13920 package RecordColQueue;
13922 sub new($) {
13923 my $class = shift;
13924 my $fhs = shift;
13925 my @unget = ();
13926 my $arg_sub_queue = MultifileQueue->new($fhs);
13927 return bless {
13928 'unget' => \@unget,
13929 'arg_sub_queue' => $arg_sub_queue,
13930 }, ref($class) || $class;
13933 sub get($) {
13934 # Returns:
13935 # reference to array of Arg-objects
13936 my $self = shift;
13937 if(@{$self->{'unget'}}) {
13938 return shift @{$self->{'unget'}};
13940 if($self->{'arg_sub_queue'}->empty()) {
13941 return undef;
13943 my $in_record = $self->{'arg_sub_queue'}->get();
13944 if(defined $in_record) {
13945 my @out_record = ();
13946 for my $arg (@$in_record) {
13947 ::debug("run", "RecordColQueue::arg $arg\n");
13948 my $line = $arg->orig();
13949 ::debug("run", "line='$line'\n");
13950 if($line ne "") {
13951 if($opt::csv) {
13952 # Parse CSV and put it into a record
13953 chomp $line;
13954 if(not $Global::csv->parse($line)) {
13955 die "CSV has unexpected format: ^$line^";
13957 for($Global::csv->fields()) {
13958 push @out_record, Arg->new($_);
13960 } else {
13961 # Split --colsep into record
13962 for my $s (split /$opt::colsep/o, $line, -1) {
13963 push @out_record, Arg->new($s);
13966 } else {
13967 push @out_record, Arg->new("");
13970 return \@out_record;
13971 } else {
13972 return undef;
13976 sub unget($) {
13977 my $self = shift;
13978 ::debug("run", "RecordColQueue-unget '@_'\n");
13979 unshift @{$self->{'unget'}}, @_;
13982 sub empty($) {
13983 my $self = shift;
13984 my $empty = (not @{$self->{'unget'}}) &&
13985 $self->{'arg_sub_queue'}->empty();
13986 ::debug("run", "RecordColQueue->empty $empty");
13987 return $empty;
13990 sub flush_cache($) {
13991 my $self = shift;
13992 for my $arg (@{$self->{'unget'}}) {
13993 $arg->flush_cache();
13995 $self->{'arg_sub_queue'}->flush_cache();
13999 package SQLRecordQueue;
14001 sub new($) {
14002 my $class = shift;
14003 my @unget = ();
14004 return bless {
14005 'unget' => \@unget,
14006 }, ref($class) || $class;
14009 sub get($) {
14010 # Returns:
14011 # reference to array of Arg-objects
14012 my $self = shift;
14013 if(@{$self->{'unget'}}) {
14014 return shift @{$self->{'unget'}};
14016 return $Global::sql->get_record();
14019 sub unget($) {
14020 my $self = shift;
14021 ::debug("run", "SQLRecordQueue-unget '@_'\n");
14022 unshift @{$self->{'unget'}}, @_;
14025 sub empty($) {
14026 my $self = shift;
14027 if(@{$self->{'unget'}}) { return 0; }
14028 my $get = $self->get();
14029 if(defined $get) {
14030 $self->unget($get);
14032 my $empty = not $get;
14033 ::debug("run", "SQLRecordQueue->empty $empty");
14034 return $empty;
14037 sub flush_cache($) {
14038 my $self = shift;
14039 for my $record (@{$self->{'unget'}}) {
14040 for my $arg (@$record) {
14041 $arg->flush_cache();
14047 package MultifileQueue;
14049 @Global::unget_argv=();
14051 sub new($$) {
14052 my $class = shift;
14053 my $fhs = shift;
14054 for my $fh (@$fhs) {
14055 if(-t $fh and -t ($Global::status_fd || *STDERR)) {
14056 ::warning(
14057 "Input is read from the terminal. You are either an expert",
14058 "(in which case: YOU ARE AWESOME!) or maybe you forgot",
14059 "::: or :::: or -a or to pipe data into parallel. If so",
14060 "consider going through the tutorial: man parallel_tutorial",
14061 "Press CTRL-D to exit.");
14064 return bless {
14065 'unget' => \@Global::unget_argv,
14066 'fhs' => $fhs,
14067 'arg_matrix' => undef,
14068 }, ref($class) || $class;
14071 sub get($) {
14072 my $self = shift;
14073 if($opt::link) {
14074 return $self->link_get();
14075 } else {
14076 return $self->nest_get();
14080 sub unget($) {
14081 my $self = shift;
14082 ::debug("run", "MultifileQueue-unget '@_'\n");
14083 unshift @{$self->{'unget'}}, @_;
14086 sub empty($) {
14087 my $self = shift;
14088 my $empty = (not @Global::unget_argv) &&
14089 not @{$self->{'unget'}};
14090 for my $fh (@{$self->{'fhs'}}) {
14091 $empty &&= eof($fh);
14093 ::debug("run", "MultifileQueue->empty $empty ");
14094 return $empty;
14097 sub flush_cache($) {
14098 my $self = shift;
14099 for my $record (@{$self->{'unget'}}, @{$self->{'arg_matrix'}}) {
14100 for my $arg (@$record) {
14101 $arg->flush_cache();
14106 sub link_get($) {
14107 my $self = shift;
14108 if(@{$self->{'unget'}}) {
14109 return shift @{$self->{'unget'}};
14111 my @record = ();
14112 my $prepend;
14113 my $empty = 1;
14114 for my $i (0..$#{$self->{'fhs'}}) {
14115 my $fh = $self->{'fhs'}[$i];
14116 my $arg = read_arg_from_fh($fh);
14117 if(defined $arg) {
14118 # Record $arg for recycling at end of file
14119 push @{$self->{'arg_matrix'}[$i]}, $arg;
14120 push @record, $arg;
14121 $empty = 0;
14122 } else {
14123 ::debug("run", "EOA ");
14124 # End of file: Recycle arguments
14125 push @{$self->{'arg_matrix'}[$i]}, shift @{$self->{'arg_matrix'}[$i]};
14126 # return last @{$args->{'args'}{$fh}};
14127 push @record, @{$self->{'arg_matrix'}[$i]}[-1];
14130 if($empty) {
14131 return undef;
14132 } else {
14133 return \@record;
14137 sub nest_get($) {
14138 my $self = shift;
14139 if(@{$self->{'unget'}}) {
14140 return shift @{$self->{'unget'}};
14142 my @record = ();
14143 my $prepend;
14144 my $empty = 1;
14145 my $no_of_inputsources = $#{$self->{'fhs'}} + 1;
14146 if(not $self->{'arg_matrix'}) {
14147 # Initialize @arg_matrix with one arg from each file
14148 # read one line from each file
14149 my @first_arg_set;
14150 my $all_empty = 1;
14151 for (my $fhno = 0; $fhno < $no_of_inputsources ; $fhno++) {
14152 my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]);
14153 if(defined $arg) {
14154 $all_empty = 0;
14156 $self->{'arg_matrix'}[$fhno][0] = $arg || Arg->new("");
14157 push @first_arg_set, $self->{'arg_matrix'}[$fhno][0];
14159 if($all_empty) {
14160 # All filehandles were at eof or eof-string
14161 return undef;
14163 return [@first_arg_set];
14166 # Treat the case with one input source special. For multiple
14167 # input sources we need to remember all previously read values to
14168 # generate all combinations. But for one input source we can
14169 # forget the value after first use.
14170 if($no_of_inputsources == 1) {
14171 my $arg = read_arg_from_fh($self->{'fhs'}[0]);
14172 if(defined($arg)) {
14173 return [$arg];
14175 return undef;
14177 for (my $fhno = $no_of_inputsources - 1; $fhno >= 0; $fhno--) {
14178 if(eof($self->{'fhs'}[$fhno])) {
14179 next;
14180 } else {
14181 # read one
14182 my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]);
14183 defined($arg) || next; # If we just read an EOF string: Treat this as EOF
14184 my $len = $#{$self->{'arg_matrix'}[$fhno]} + 1;
14185 $self->{'arg_matrix'}[$fhno][$len] = $arg;
14186 # make all new combinations
14187 my @combarg = ();
14188 for (my $fhn = 0; $fhn < $no_of_inputsources; $fhn++) {
14189 push(@combarg, [0, $#{$self->{'arg_matrix'}[$fhn]}],
14190 # Is input source --link'ed to the next?
14191 $opt::linkinputsource[$fhn+1]);
14193 # Find only combinations with this new entry
14194 $combarg[2*$fhno] = [$len,$len];
14195 # map combinations
14196 # [ 1, 3, 7 ], [ 2, 4, 1 ]
14197 # =>
14198 # [ m[0][1], m[1][3], m[2][7] ], [ m[0][2], m[1][4], m[2][1] ]
14199 my @mapped;
14200 for my $c (expand_combinations(@combarg)) {
14201 my @a;
14202 for my $n (0 .. $no_of_inputsources - 1 ) {
14203 push @a, $self->{'arg_matrix'}[$n][$$c[$n]];
14205 push @mapped, \@a;
14207 # append the mapped to the ungotten arguments
14208 push @{$self->{'unget'}}, @mapped;
14209 # get the first
14210 if(@mapped) {
14211 return shift @{$self->{'unget'}};
14215 # all are eof or at EOF string; return from the unget queue
14216 return shift @{$self->{'unget'}};
14220 my $cr_count = 0;
14221 my $nl_count = 0;
14222 my $dos_crnl_determined;
14223 sub read_arg_from_fh($) {
14224 # Read one Arg from filehandle
14225 # Returns:
14226 # Arg-object with one read line
14227 # undef if end of file
14228 my $fh = shift;
14229 my $prepend;
14230 my $arg;
14231 my $half_record = 0;
14232 do {{
14233 # This makes 10% faster
14234 if(not defined ($arg = <$fh>)) {
14235 if(defined $prepend) {
14236 return Arg->new($prepend);
14237 } else {
14238 return undef;
14241 if(not $dos_crnl_determined and not defined $opt::d) {
14242 # Warn if input has CR-NL and -d is not set
14243 if($arg =~ /\r$/) {
14244 $cr_count++;
14245 } else {
14246 $nl_count++;
14248 if($cr_count == 3 or $nl_count == 3) {
14249 $dos_crnl_determined = 1;
14250 if($nl_count == 0 and $cr_count == 3) {
14251 ::warning('The first three values end in CR-NL. '.
14252 'Consider using -d "\r\n"');
14256 if($opt::csv) {
14257 # We need to read a full CSV line.
14258 if(($arg =~ y/"/"/) % 2 ) {
14259 # The number of " on the line is uneven:
14260 # If we were in a half_record => we have a full record now
14261 # If we were outside a half_record =>
14262 # we are in a half record now
14263 $half_record = not $half_record;
14265 if($half_record) {
14266 # CSV half-record with quoting:
14267 # col1,"col2 2""x3"" board newline <-this one
14268 # cont",col3
14269 $prepend .= $arg;
14270 redo;
14271 } else {
14272 # Now we have a full CSV record
14275 # Remove delimiter
14276 chomp $arg;
14277 if($Global::end_of_file_string and
14278 $arg eq $Global::end_of_file_string) {
14279 # Ignore the rest of input file
14280 close $fh;
14281 ::debug("run", "EOF-string ($arg) met\n");
14282 if(defined $prepend) {
14283 return Arg->new($prepend);
14284 } else {
14285 return undef;
14288 if(defined $prepend) {
14289 $arg = $prepend.$arg; # For line continuation
14290 undef $prepend;
14292 if($Global::ignore_empty) {
14293 if($arg =~ /^\s*$/) {
14294 redo; # Try the next line
14297 if($Global::max_lines) {
14298 if($arg =~ /\s$/) {
14299 # Trailing space => continued on next line
14300 $prepend = $arg;
14301 redo;
14304 }} while (1 == 0); # Dummy loop {{}} for redo
14305 if(defined $arg) {
14306 return Arg->new($arg);
14307 } else {
14308 ::die_bug("multiread arg undefined");
14313 # Prototype forwarding
14314 sub expand_combinations(@);
14315 sub expand_combinations(@) {
14316 # Input:
14317 # ([xmin,xmax], [ymin,ymax], ...)
14318 # Returns: ([x,y,...],[x,y,...])
14319 # where xmin <= x <= xmax and ymin <= y <= ymax
14320 my $minmax_ref = shift;
14321 my $link = shift; # This is linked to the next input source
14322 my $xmin = $$minmax_ref[0];
14323 my $xmax = $$minmax_ref[1];
14324 my @p;
14325 if(@_) {
14326 my @rest = expand_combinations(@_);
14327 if($link) {
14328 # Linked to next col with --link/:::+/::::+
14329 # TODO BUG does not wrap values if not same number of vals
14330 push(@p, map { [$$_[0], @$_] }
14331 grep { $xmin <= $$_[0] and $$_[0] <= $xmax } @rest);
14332 } else {
14333 # If there are more columns: Compute those recursively
14334 for(my $x = $xmin; $x <= $xmax; $x++) {
14335 push @p, map { [$x, @$_] } @rest;
14338 } else {
14339 for(my $x = $xmin; $x <= $xmax; $x++) {
14340 push @p, [$x];
14343 return @p;
14347 package Arg;
14349 sub new($) {
14350 my $class = shift;
14351 my $orig = shift;
14352 my @hostgroups;
14353 if($opt::hostgroups) {
14354 if($orig =~ s:@(.+)::) {
14355 # We found hostgroups on the arg
14356 @hostgroups = split(/\+|,/, $1);
14357 if(not grep { defined $Global::hostgroups{$_} } @hostgroups) {
14358 # This hostgroup is not defined using -S
14359 # Add it
14360 ::warning("Adding hostgroups: @hostgroups");
14361 # Add sshlogin
14362 for(grep { not defined $Global::hostgroups{$_} } @hostgroups) {
14363 my $sshlogin = SSHLogin->new($_);
14364 my $sshlogin_string = $sshlogin->string();
14365 $Global::host{$sshlogin_string} = $sshlogin;
14366 $Global::hostgroups{$sshlogin_string} = 1;
14369 } else {
14370 # No hostgroup on the arg => any hostgroup
14371 @hostgroups = (keys %Global::hostgroups);
14374 return bless {
14375 'orig' => $orig,
14376 'hostgroups' => \@hostgroups,
14377 }, ref($class) || $class;
14380 sub Q($) {
14381 # Q alias for ::shell_quote_scalar
14382 my $ret = ::Q($_[0]);
14383 no warnings 'redefine';
14384 *Q = \&::Q;
14385 return $ret;
14388 sub pQ($) {
14389 # pQ alias for ::perl_quote_scalar
14390 my $ret = ::pQ($_[0]);
14391 no warnings 'redefine';
14392 *pQ = \&::pQ;
14393 return $ret;
14396 sub hash($) {
14397 $Global::use{"DBI"} ||= eval "use B; 1;";
14398 B::hash(@_);
14401 sub total_jobs() {
14402 return $Global::JobQueue->total_jobs();
14406 my %perleval;
14407 my $job;
14408 sub skip() {
14409 # shorthand for $job->skip();
14410 $job->skip();
14412 sub slot() {
14413 # shorthand for $job->slot();
14414 $job->slot();
14416 sub seq() {
14417 # shorthand for $job->seq();
14418 $job->seq();
14420 sub uq() {
14421 # Do not quote this arg
14422 $Global::unquote_arg = 1;
14424 sub yyyy_mm_dd_hh_mm_ss(@) {
14425 # ISO8601 2038-01-19T03:14:08
14426 ::strftime("%Y-%m-%dT%H:%M:%S", localtime(shift || time()));
14428 sub yyyy_mm_dd_hh_mm(@) {
14429 # ISO8601 2038-01-19T03:14
14430 ::strftime("%Y-%m-%dT%H:%M", localtime(shift || time()));
14432 sub yyyy_mm_dd(@) {
14433 # ISO8601 2038-01-19
14434 ::strftime("%Y-%m-%d", localtime(shift || time()));
14436 sub hh_mm_ss(@) {
14437 # ISO8601 03:14:08
14438 ::strftime("%H:%M:%S", localtime(shift || time()));
14440 sub hh_mm(@) {
14441 # ISO8601 03:14
14442 ::strftime("%H:%M", localtime(shift || time()));
14444 sub yyyymmddhhmmss(@) {
14445 # ISO8601 20380119 + ISO8601 031408
14446 ::strftime("%Y%m%d%H%M%S", localtime(shift || time()));
14448 sub yyyymmddhhmm(@) {
14449 # ISO8601 20380119 + ISO8601 0314
14450 ::strftime("%Y%m%d%H%M", localtime(shift || time()));
14452 sub yyyymmdd(@) {
14453 # ISO8601 20380119
14454 ::strftime("%Y%m%d", localtime(shift || time()));
14456 sub hhmmss(@) {
14457 # ISO8601 031408
14458 ::strftime("%H%M%S", localtime(shift || time()));
14460 sub hhmm(@) {
14461 # ISO8601 0314
14462 ::strftime("%H%M", localtime(shift || time()));
14465 sub replace($$$$) {
14466 # Calculates the corresponding value for a given perl expression
14467 # Returns:
14468 # The calculated string (quoted if asked for)
14469 my $self = shift;
14470 my $perlexpr = shift; # E.g. $_=$_ or s/.gz//
14471 my $quote = shift; # should the string be quoted?
14472 # This is actually a CommandLine-object,
14473 # but it looks nice to be able to say {= $job->slot() =}
14474 $job = shift;
14475 # Positional replace treated as normal replace
14476 $perlexpr =~ s/^(-?\d+)? *//;
14477 if(not $Global::cache_replacement_eval
14479 not $self->{'cache'}{$perlexpr}) {
14480 # Only compute the value once
14481 # Use $_ as the variable to change
14482 local $_;
14483 if($Global::trim eq "n") {
14484 $_ = $self->{'orig'};
14485 } else {
14486 # Trim the input
14487 $_ = trim_of($self->{'orig'});
14489 ::debug("replace", "eval ", $perlexpr, " ", $_, "\n");
14490 if(not $perleval{$perlexpr}) {
14491 # Make an anonymous function of the $perlexpr
14492 # And more importantly: Compile it only once
14493 if($perleval{$perlexpr} =
14494 eval('sub { no strict; no warnings; my $job = shift; '.
14495 $perlexpr.' }')) {
14496 # All is good
14497 } else {
14498 # The eval failed. Maybe $perlexpr is invalid perl?
14499 ::error("Cannot use $perlexpr: $@");
14500 ::wait_and_exit(255);
14503 # Execute the function
14504 $perleval{$perlexpr}->($job);
14505 $self->{'cache'}{$perlexpr} = $_;
14506 if($Global::unquote_arg) {
14507 # uq() was called in perlexpr
14508 $self->{'cache'}{'unquote'}{$perlexpr} = 1;
14509 # Reset for next perlexpr
14510 $Global::unquote_arg = 0;
14513 # Return the value quoted if needed
14514 if($self->{'cache'}{'unquote'}{$perlexpr}) {
14515 return($self->{'cache'}{$perlexpr});
14516 } else {
14517 return($quote ? Q($self->{'cache'}{$perlexpr})
14518 : $self->{'cache'}{$perlexpr});
14523 sub flush_cache($) {
14524 # Flush cache of computed values
14525 my $self = shift;
14526 $self->{'cache'} = undef;
14529 sub orig($) {
14530 my $self = shift;
14531 return $self->{'orig'};
14534 sub set_orig($$) {
14535 my $self = shift;
14536 $self->{'orig'} = shift;
14539 sub trim_of($) {
14540 # Removes white space as specifed by --trim:
14541 # n = nothing
14542 # l = start
14543 # r = end
14544 # lr|rl = both
14545 # Returns:
14546 # string with white space removed as needed
14547 my @strings = map { defined $_ ? $_ : "" } (@_);
14548 my $arg;
14549 if($Global::trim eq "n") {
14550 # skip
14551 } elsif($Global::trim eq "l") {
14552 for my $arg (@strings) { $arg =~ s/^\s+//; }
14553 } elsif($Global::trim eq "r") {
14554 for my $arg (@strings) { $arg =~ s/\s+$//; }
14555 } elsif($Global::trim eq "rl" or $Global::trim eq "lr") {
14556 for my $arg (@strings) { $arg =~ s/^\s+//; $arg =~ s/\s+$//; }
14557 } else {
14558 ::error("--trim must be one of: r l rl lr.");
14559 ::wait_and_exit(255);
14561 return wantarray ? @strings : "@strings";
14565 package TimeoutQueue;
14567 sub new($) {
14568 my $class = shift;
14569 my $delta_time = shift;
14570 my ($pct);
14571 if($delta_time =~ /(\d+(\.\d+)?)%/) {
14572 # Timeout in percent
14573 $pct = $1/100;
14574 $delta_time = 1_000_000;
14576 $delta_time = ::multiply_time_units($delta_time);
14578 return bless {
14579 'queue' => [],
14580 'delta_time' => $delta_time,
14581 'pct' => $pct,
14582 'remedian_idx' => 0,
14583 'remedian_arr' => [],
14584 'remedian' => undef,
14585 }, ref($class) || $class;
14588 sub delta_time($) {
14589 my $self = shift;
14590 return $self->{'delta_time'};
14593 sub set_delta_time($$) {
14594 my $self = shift;
14595 $self->{'delta_time'} = shift;
14598 sub remedian($) {
14599 my $self = shift;
14600 return $self->{'remedian'};
14603 sub set_remedian($$) {
14604 # Set median of the last 999^3 (=997002999) values using Remedian
14606 # Rousseeuw, Peter J., and Gilbert W. Bassett Jr. "The remedian: A
14607 # robust averaging method for large data sets." Journal of the
14608 # American Statistical Association 85.409 (1990): 97-104.
14609 my $self = shift;
14610 my $val = shift;
14611 my $i = $self->{'remedian_idx'}++;
14612 my $rref = $self->{'remedian_arr'};
14613 $rref->[0][$i%999] = $val;
14614 $rref->[1][$i/999%999] = (sort @{$rref->[0]})[$#{$rref->[0]}/2];
14615 $rref->[2][$i/999/999%999] = (sort @{$rref->[1]})[$#{$rref->[1]}/2];
14616 $self->{'remedian'} = (sort @{$rref->[2]})[$#{$rref->[2]}/2];
14619 sub update_median_runtime($) {
14620 # Update delta_time based on runtime of finished job if timeout is
14621 # a percentage
14622 my $self = shift;
14623 my $runtime = shift;
14624 if($self->{'pct'}) {
14625 $self->set_remedian($runtime);
14626 $self->{'delta_time'} = $self->{'pct'} * $self->remedian();
14627 ::debug("run", "Timeout: $self->{'delta_time'}s ");
14631 sub process_timeouts($) {
14632 # Check if there was a timeout
14633 my $self = shift;
14634 # $self->{'queue'} is sorted by start time
14635 while (@{$self->{'queue'}}) {
14636 my $job = $self->{'queue'}[0];
14637 if($job->endtime()) {
14638 # Job already finished. No need to timeout the job
14639 # This could be because of --keep-order
14640 shift @{$self->{'queue'}};
14641 } elsif($job->is_timedout($self->{'delta_time'})) {
14642 # Need to shift off queue before kill
14643 # because kill calls usleep that calls process_timeouts
14644 shift @{$self->{'queue'}};
14645 ::warning("This job was killed because it timed out:",
14646 $job->replaced());
14647 $job->kill();
14648 } else {
14649 # Because they are sorted by start time the rest are later
14650 last;
14655 sub insert($) {
14656 my $self = shift;
14657 my $in = shift;
14658 push @{$self->{'queue'}}, $in;
14662 package SQL;
14664 sub new($) {
14665 my $class = shift;
14666 my $dburl = shift;
14667 $Global::use{"DBI"} ||= eval "use DBI; 1;";
14668 # +DBURL = append to this DBURL
14669 my $append = $dburl=~s/^\+//;
14670 my %options = parse_dburl(get_alias($dburl));
14671 my %driveralias = ("sqlite" => "SQLite",
14672 "sqlite3" => "SQLite",
14673 "pg" => "Pg",
14674 "postgres" => "Pg",
14675 "postgresql" => "Pg",
14676 "csv" => "CSV",
14677 "oracle" => "Oracle",
14678 "ora" => "Oracle");
14679 my $driver = $driveralias{$options{'databasedriver'}} ||
14680 $options{'databasedriver'};
14681 my $database = $options{'database'};
14682 my $host = $options{'host'} ? ";host=".$options{'host'} : "";
14683 my $port = $options{'port'} ? ";port=".$options{'port'} : "";
14684 my $dsn = "DBI:$driver:dbname=$database$host$port";
14685 my $userid = $options{'user'};
14686 my $password = $options{'password'};;
14687 if(not grep /$driver/, DBI->available_drivers) {
14688 ::error("$driver not supported. Are you missing a perl DBD::$driver module?");
14689 ::wait_and_exit(255);
14691 my $dbh;
14692 if($driver eq "CSV") {
14693 # CSV does not use normal dsn
14694 if(-d $database) {
14695 $dbh = DBI->connect("dbi:CSV:", "", "", { f_dir => "$database", })
14696 or die $DBI::errstr;
14697 } else {
14698 ::error("$database is not a directory.");
14699 ::wait_and_exit(255);
14701 } else {
14702 $dbh = DBI->connect($dsn, $userid, $password,
14703 { RaiseError => 1, AutoInactiveDestroy => 1 })
14704 or die $DBI::errstr;
14706 $dbh->{'PrintWarn'} = $Global::debug || 0;
14707 $dbh->{'PrintError'} = $Global::debug || 0;
14708 $dbh->{'RaiseError'} = 1;
14709 $dbh->{'ShowErrorStatement'} = 1;
14710 $dbh->{'HandleError'} = sub {};
14711 if(not defined $options{'table'}) {
14712 ::error("The DBURL ($dburl) must contain a table.");
14713 ::wait_and_exit(255);
14716 return bless {
14717 'dbh' => $dbh,
14718 'driver' => $driver,
14719 'max_number_of_args' => undef,
14720 'table' => $options{'table'},
14721 'append' => $append,
14722 }, ref($class) || $class;
14725 # Prototype forwarding
14726 sub get_alias($);
14727 sub get_alias($) {
14728 my $alias = shift;
14729 $alias =~ s/^(sql:)*//; # Accept aliases prepended with sql:
14730 if ($alias !~ /^:/) {
14731 return $alias;
14734 # Find the alias
14735 my $path;
14736 if (-l $0) {
14737 ($path) = readlink($0) =~ m|^(.*)/|;
14738 } else {
14739 ($path) = $0 =~ m|^(.*)/|;
14742 my @deprecated = ("$ENV{HOME}/.dburl.aliases",
14743 "$path/dburl.aliases", "$path/dburl.aliases.dist");
14744 for (@deprecated) {
14745 if(-r $_) {
14746 ::warning("$_ is deprecated. ".
14747 "Use .sql/aliases instead (read man sql).");
14750 my @urlalias=();
14751 check_permissions("$ENV{HOME}/.sql/aliases");
14752 check_permissions("$ENV{HOME}/.dburl.aliases");
14753 my @search = ("$ENV{HOME}/.sql/aliases",
14754 "$ENV{HOME}/.dburl.aliases", "/etc/sql/aliases",
14755 "$path/dburl.aliases", "$path/dburl.aliases.dist");
14756 for my $alias_file (@search) {
14757 # local $/ needed if -0 set
14758 local $/ = "\n";
14759 if(-r $alias_file) {
14760 my $in = ::open_or_exit("<",$alias_file);
14761 push @urlalias, <$in>;
14762 close $in;
14765 my ($alias_part,$rest) = $alias=~/(:\w*)(.*)/;
14766 # If we saw this before: we have an alias loop
14767 if(grep {$_ eq $alias_part } @Private::seen_aliases) {
14768 ::error("$alias_part is a cyclic alias.");
14769 exit -1;
14770 } else {
14771 push @Private::seen_aliases, $alias_part;
14774 my $dburl;
14775 for (@urlalias) {
14776 /^$alias_part\s+(\S+.*)/ and do { $dburl = $1; last; }
14779 if($dburl) {
14780 return get_alias($dburl.$rest);
14781 } else {
14782 ::error("$alias is not defined in @search");
14783 exit(-1);
14787 sub check_permissions($) {
14788 my $file = shift;
14790 if(-e $file) {
14791 if(not -o $file) {
14792 my $username = (getpwuid($<))[0];
14793 ::warning("$file should be owned by $username: ".
14794 "chown $username $file");
14796 my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
14797 $atime,$mtime,$ctime,$blksize,$blocks) = stat($file);
14798 if($mode & 077) {
14799 my $username = (getpwuid($<))[0];
14800 ::warning("$file should be only be readable by $username: ".
14801 "chmod 600 $file");
14806 sub parse_dburl($) {
14807 my $url = shift;
14808 my %options = ();
14809 # sql:mysql://[[user][:password]@][host][:port]/[database[/table][?query]]
14811 if($url=~m!^(?:sql:)? # You can prefix with 'sql:'
14812 ((?:oracle|ora|mysql|pg|postgres|postgresql)(?:s|ssl|)|
14813 (?:sqlite|sqlite2|sqlite3|csv)):// # Databasedriver ($1)
14815 ([^:@/][^:@]*|) # Username ($2)
14817 :([^@]*) # Password ($3)
14820 ([^:/]*)? # Hostname ($4)
14823 ([^/]*)? # Port ($5)
14827 ([^/?]*)? # Database ($6)
14831 ([^?]*)? # Table ($7)
14835 (.*)? # Query ($8)
14837 $!ix) {
14838 $options{databasedriver} = ::undef_if_empty(lc(uri_unescape($1)));
14839 $options{user} = ::undef_if_empty(uri_unescape($2));
14840 $options{password} = ::undef_if_empty(uri_unescape($3));
14841 $options{host} = ::undef_if_empty(uri_unescape($4));
14842 $options{port} = ::undef_if_empty(uri_unescape($5));
14843 $options{database} = ::undef_if_empty(uri_unescape($6));
14844 $options{table} = ::undef_if_empty(uri_unescape($7));
14845 $options{query} = ::undef_if_empty(uri_unescape($8));
14846 ::debug("sql", "dburl $url\n");
14847 ::debug("sql", "databasedriver ", $options{databasedriver},
14848 " user ", $options{user},
14849 " password ", $options{password}, " host ", $options{host},
14850 " port ", $options{port}, " database ", $options{database},
14851 " table ", $options{table}, " query ", $options{query}, "\n");
14852 } else {
14853 ::error("$url is not a valid DBURL");
14854 exit 255;
14856 return %options;
14859 sub uri_unescape($) {
14860 # Copied from http://cpansearch.perl.org/src/GAAS/URI-1.55/URI/Escape.pm
14861 # to avoid depending on URI::Escape
14862 # This section is (C) Gisle Aas.
14863 # Note from RFC1630: "Sequences which start with a percent sign
14864 # but are not followed by two hexadecimal characters are reserved
14865 # for future extension"
14866 my $str = shift;
14867 if (@_ && wantarray) {
14868 # not executed for the common case of a single argument
14869 my @str = ($str, @_); # need to copy
14870 foreach (@str) {
14871 s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
14873 return @str;
14875 $str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str;
14876 $str;
14879 sub run($) {
14880 my $self = shift;
14881 my $stmt = shift;
14882 if($self->{'driver'} eq "CSV") {
14883 $stmt=~ s/;$//;
14884 if($stmt eq "BEGIN" or
14885 $stmt eq "COMMIT") {
14886 return undef;
14889 my @retval;
14890 my $dbh = $self->{'dbh'};
14891 ::debug("sql","$opt::sqlmaster$opt::sqlworker run $stmt\n");
14892 # Execute with the rest of the args - if any
14893 my $rv;
14894 my $sth;
14895 my $lockretry = 0;
14896 while($lockretry < 10) {
14897 $sth = $dbh->prepare($stmt);
14898 if($sth
14900 eval { $rv = $sth->execute(@_) }) {
14901 last;
14902 } else {
14903 if($@ =~ /no such table|Table .* doesn.t exist|relation ".*" does not exist/
14905 $DBI::errstr =~ /no such table|Table .* doesn.t exist|relation ".*" does not exist/) {
14906 # This is fine:
14907 # It is just a worker that reported back too late -
14908 # another worker had finished the job first
14909 # and the table was then dropped
14910 $rv = $sth = 0;
14911 last;
14913 if($DBI::errstr =~ /locked/) {
14914 ::debug("sql", "Lock retry: $lockretry");
14915 $lockretry++;
14916 ::usleep(rand()*300);
14917 } elsif(not $sth) {
14918 # Try again
14919 $lockretry++;
14920 } else {
14921 ::error($DBI::errstr);
14922 ::wait_and_exit(255);
14926 if($lockretry >= 10) {
14927 ::die_bug("retry > 10: $DBI::errstr");
14929 if($rv < 0 and $DBI::errstr){
14930 ::error($DBI::errstr);
14931 ::wait_and_exit(255);
14933 return $sth;
14936 sub get($) {
14937 my $self = shift;
14938 my $sth = $self->run(@_);
14939 my @retval;
14940 # If $sth = 0 it means the table was dropped by another process
14941 while($sth) {
14942 my @row = $sth->fetchrow_array();
14943 @row or last;
14944 push @retval, \@row;
14946 return \@retval;
14949 sub table($) {
14950 my $self = shift;
14951 return $self->{'table'};
14954 sub append($) {
14955 my $self = shift;
14956 return $self->{'append'};
14959 sub update($) {
14960 my $self = shift;
14961 my $stmt = shift;
14962 my $table = $self->table();
14963 $self->run("UPDATE $table $stmt",@_);
14966 sub output($) {
14967 my $self = shift;
14968 my $commandline = shift;
14970 $self->update("SET Stdout = ?, Stderr = ? WHERE Seq = ".
14971 $commandline->seq(),
14972 join("",@{$commandline->{'output'}{1}}),
14973 join("",@{$commandline->{'output'}{2}}));
14976 sub max_number_of_args($) {
14977 # Maximal number of args for this table
14978 my $self = shift;
14979 if(not $self->{'max_number_of_args'}) {
14980 # Read the number of args from the SQL table
14981 my $table = $self->table();
14982 my $v = $self->get("SELECT * FROM $table LIMIT 1;");
14983 my @reserved_columns = qw(Seq Host Starttime JobRuntime Send
14984 Receive Exitval _Signal Command Stdout Stderr);
14985 if(not $v) {
14986 ::error("$table contains no records");
14988 # Count the number of Vx columns
14989 $self->{'max_number_of_args'} = $#{$v->[0]} - $#reserved_columns;
14991 return $self->{'max_number_of_args'};
14994 sub set_max_number_of_args($$) {
14995 my $self = shift;
14996 $self->{'max_number_of_args'} = shift;
14999 sub create_table($) {
15000 my $self = shift;
15001 if($self->append()) { return; }
15002 my $max_number_of_args = shift;
15003 $self->set_max_number_of_args($max_number_of_args);
15004 my $table = $self->table();
15005 $self->run(qq(DROP TABLE IF EXISTS $table;));
15006 # BIGINT and TEXT are not supported in these databases or are too small
15007 my %vartype = (
15008 "Oracle" => { "BIGINT" => "NUMBER(19,0)",
15009 "TEXT" => "CLOB", },
15010 "mysql" => { "TEXT" => "BLOB", },
15011 "CSV" => { "BIGINT" => "INT",
15012 "FLOAT" => "REAL", },
15014 my $BIGINT = $vartype{$self->{'driver'}}{"BIGINT"} || "BIGINT";
15015 my $TEXT = $vartype{$self->{'driver'}}{"TEXT"} || "TEXT";
15016 my $FLOAT = $vartype{$self->{'driver'}}{"FLOAT"} || "FLOAT(44)";
15017 my $v_def = join "", map { "V$_ $TEXT," } (1..$self->max_number_of_args());
15018 $self->run(qq{CREATE TABLE $table
15019 (Seq $BIGINT,
15020 Host $TEXT,
15021 Starttime $FLOAT,
15022 JobRuntime $FLOAT,
15023 Send $BIGINT,
15024 Receive $BIGINT,
15025 Exitval $BIGINT,
15026 _Signal $BIGINT,
15027 Command $TEXT,}.
15028 $v_def.
15029 qq{Stdout $TEXT,
15030 Stderr $TEXT);});
15033 sub insert_records($) {
15034 my $self = shift;
15035 my $seq = shift;
15036 my $command_ref = shift;
15037 my $record_ref = shift;
15038 my $table = $self->table();
15039 # For SQL encode the command with \257 space as split points
15040 my $command = join("\257 ",@$command_ref);
15041 my @v_cols = map { ", V$_" } (1..$self->max_number_of_args());
15042 # Two extra value due to $seq, Exitval, Send
15043 my $v_vals = join ",", map { "?" } (1..$self->max_number_of_args()+4);
15044 $self->run("INSERT INTO $table (Seq,Command,Exitval,Send @v_cols) ".
15045 "VALUES ($v_vals);", $seq, $command, -1000,
15046 0, @$record_ref[1..$#$record_ref]);
15050 sub get_record($) {
15051 my $self = shift;
15052 my @retval;
15053 my $table = $self->table();
15054 my @v_cols = map { ", V$_" } (1..$self->max_number_of_args());
15055 my $rand = "Reserved-".$$.rand();
15056 my $v;
15057 my $more_pending;
15059 do {
15060 if($self->{'driver'} eq "CSV") {
15061 # Sub SELECT is not supported in CSV
15062 # So to minimize the race condition below select a job at random
15063 my $r = $self->get("SELECT Seq, Command @v_cols FROM $table ".
15064 "WHERE Exitval = -1000 LIMIT 100;");
15065 $v = [ sort { rand() > 0.5 } @$r ];
15066 } else {
15067 # Avoid race condition where multiple workers get the same job
15068 # by setting Stdout to a unique string
15069 # (SELECT * FROM (...) AS dummy) is needed due to sillyness in MySQL
15070 $self->update("SET Stdout = ?,Exitval = ? ".
15071 "WHERE Seq = (".
15072 " SELECT * FROM (".
15073 " SELECT min(Seq) FROM $table WHERE Exitval = -1000".
15074 " ) AS dummy".
15075 ") AND Exitval = -1000;", $rand, -1210);
15076 # If a parallel worker overwrote the unique string this will get nothing
15077 $v = $self->get("SELECT Seq, Command @v_cols FROM $table ".
15078 "WHERE Stdout = ?;", $rand);
15080 if($v->[0]) {
15081 my $val_ref = $v->[0];
15082 # Mark record as taken
15083 my $seq = shift @$val_ref;
15084 # Save the sequence number to use when running the job
15085 $SQL::next_seq = $seq;
15086 $self->update("SET Exitval = ? WHERE Seq = ".$seq, -1220);
15087 # Command is encoded with '\257 space' as splitting char
15088 my @command = split /\257 /, shift @$val_ref;
15089 $SQL::command_ref = \@command;
15090 for (@$val_ref) {
15091 push @retval, Arg->new($_);
15093 } else {
15094 # If the record was updated by another job in parallel,
15095 # then we may not be done, so see if there are more jobs pending
15096 $more_pending =
15097 $self->get("SELECT Seq FROM $table WHERE Exitval = ?;", -1210);
15099 } while (not $v->[0] and $more_pending->[0]);
15101 if(@retval) {
15102 return \@retval;
15103 } else {
15104 return undef;
15108 sub total_jobs($) {
15109 my $self = shift;
15110 my $table = $self->table();
15111 my $v = $self->get("SELECT count(*) FROM $table;");
15112 if($v->[0]) {
15113 return $v->[0]->[0];
15114 } else {
15115 ::die_bug("SQL::total_jobs");
15119 sub max_seq($) {
15120 my $self = shift;
15121 my $table = $self->table();
15122 my $v = $self->get("SELECT max(Seq) FROM $table;");
15123 if($v->[0]) {
15124 return $v->[0]->[0];
15125 } else {
15126 ::die_bug("SQL::max_seq");
15130 sub finished($) {
15131 # Check if there are any jobs left in the SQL table that do not
15132 # have a "real" exitval
15133 my $self = shift;
15134 if($opt::wait or $Global::start_sqlworker) {
15135 my $table = $self->table();
15136 my $rv = $self->get("select Seq,Exitval from $table ".
15137 "where Exitval <= -1000 limit 1");
15138 return not $rv->[0];
15139 } else {
15140 return 1;
15144 package Semaphore;
15146 # This package provides a counting semaphore
15148 # If a process dies without releasing the semaphore the next process
15149 # that needs that entry will clean up dead semaphores
15151 # The semaphores are stored in $PARALLEL_HOME/semaphores/id-<name> Each
15152 # file in $PARALLEL_HOME/semaphores/id-<name>/ is the process ID of the
15153 # process holding the entry. If the process dies, the entry can be
15154 # taken by another process.
15156 sub new($) {
15157 my $class = shift;
15158 my $id = shift;
15159 my $count = shift;
15160 $id =~ s/([^-_a-z0-9])/unpack("H*",$1)/ige; # Convert non-word chars to hex
15161 $id = "id-".$id; # To distinguish it from a process id
15162 my $parallel_locks = $Global::cache_dir . "/semaphores";
15163 -d $parallel_locks or ::mkdir_or_die($parallel_locks);
15164 my $lockdir = "$parallel_locks/$id";
15165 my $lockfile = $lockdir.".lock";
15166 if(-d $parallel_locks and -w $parallel_locks
15167 and -r $parallel_locks and -x $parallel_locks) {
15168 # skip
15169 } else {
15170 ::error("Semaphoredir must be writable: '$parallel_locks'");
15171 ::wait_and_exit(255);
15174 if($count < 1) { ::die_bug("semaphore-count: $count"); }
15175 return bless {
15176 'lockfile' => $lockfile,
15177 'lockfh' => Symbol::gensym(),
15178 'lockdir' => $lockdir,
15179 'id' => $id,
15180 'idfile' => $lockdir."/".$id,
15181 'pid' => $$,
15182 'pidfile' => $lockdir."/".$$.'@'.::hostname(),
15183 'count' => $count + 1 # nlinks returns a link for the 'id-' as well
15184 }, ref($class) || $class;
15187 sub remove_dead_locks($) {
15188 my $self = shift;
15189 my $lockdir = $self->{'lockdir'};
15191 for my $d (glob "$lockdir/*") {
15192 $d =~ m:$lockdir/([0-9]+)\@([-\._a-z0-9]+)$:o or next;
15193 my ($pid, $host) = ($1, $2);
15194 if($host eq ::hostname()) {
15195 if(kill 0, $pid) {
15196 ::debug("sem", "Alive: $pid $d\n");
15197 } else {
15198 ::debug("sem", "Dead: $d\n");
15199 ::rm($d);
15205 sub acquire($) {
15206 my $self = shift;
15207 my $sleep = 1; # 1 ms
15208 my $start_time = time;
15209 while(1) {
15210 # Can we get a lock?
15211 $self->atomic_link_if_count_less_than() and last;
15212 $self->remove_dead_locks();
15213 # Retry slower and slower up to 1 second
15214 $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep);
15215 # Random to avoid every sleeping job waking up at the same time
15216 ::usleep(rand()*$sleep);
15217 if($opt::semaphoretimeout) {
15218 if($opt::semaphoretimeout > 0
15220 time - $start_time > $opt::semaphoretimeout) {
15221 # Timeout: Take the semaphore anyway
15222 ::warning("Semaphore timed out. Stealing the semaphore.");
15223 if(not -e $self->{'idfile'}) {
15224 open (my $fh, ">", $self->{'idfile'}) or
15225 ::die_bug("timeout_write_idfile: $self->{'idfile'}");
15226 close $fh;
15228 link $self->{'idfile'}, $self->{'pidfile'};
15229 last;
15231 if($opt::semaphoretimeout < 0
15233 time - $start_time > -$opt::semaphoretimeout) {
15234 # Timeout: Exit
15235 ::warning("Semaphore timed out. Exiting.");
15236 exit(1);
15237 last;
15241 ::debug("sem", "acquired $self->{'pid'}\n");
15244 sub release($) {
15245 my $self = shift;
15246 ::rm($self->{'pidfile'});
15247 if($self->nlinks() == 1) {
15248 # This is the last link, so atomic cleanup
15249 $self->lock();
15250 if($self->nlinks() == 1) {
15251 ::rm($self->{'idfile'});
15252 rmdir $self->{'lockdir'};
15254 $self->unlock();
15256 ::debug("run", "released $self->{'pid'}\n");
15259 sub pid_change($) {
15260 # This should do what release()+acquire() would do without having
15261 # to re-acquire the semaphore
15262 my $self = shift;
15264 my $old_pidfile = $self->{'pidfile'};
15265 $self->{'pid'} = $$;
15266 $self->{'pidfile'} = $self->{'lockdir'}."/".$$.'@'.::hostname();
15267 my $retval = link $self->{'idfile'}, $self->{'pidfile'};
15268 ::debug("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n");
15269 ::rm($old_pidfile);
15272 sub atomic_link_if_count_less_than($) {
15273 # Link $file1 to $file2 if nlinks to $file1 < $count
15274 my $self = shift;
15275 my $retval = 0;
15276 $self->lock();
15277 my $nlinks = $self->nlinks();
15278 ::debug("sem","$nlinks<$self->{'count'} ");
15279 if($nlinks < $self->{'count'}) {
15280 -d $self->{'lockdir'} or ::mkdir_or_die($self->{'lockdir'});
15281 if(not -e $self->{'idfile'}) {
15282 open (my $fh, ">", $self->{'idfile'}) or
15283 ::die_bug("write_idfile: $self->{'idfile'}");
15284 close $fh;
15286 $retval = link $self->{'idfile'}, $self->{'pidfile'};
15287 ::debug("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n");
15289 $self->unlock();
15290 ::debug("sem", "atomic $retval");
15291 return $retval;
15294 sub nlinks($) {
15295 my $self = shift;
15296 if(-e $self->{'idfile'}) {
15297 return (stat(_))[3];
15298 } else {
15299 return 0;
15303 sub lock($) {
15304 my $self = shift;
15305 my $sleep = 100; # 100 ms
15306 my $total_sleep = 0;
15307 $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
15308 my $locked = 0;
15309 while(not $locked) {
15310 if(tell($self->{'lockfh'}) == -1) {
15311 # File not open
15312 open($self->{'lockfh'}, ">", $self->{'lockfile'})
15313 or ::debug("run", "Cannot open $self->{'lockfile'}");
15315 if($self->{'lockfh'}) {
15316 # File is open
15317 chmod 0666, $self->{'lockfile'}; # assuming you want it a+rw
15318 if(flock($self->{'lockfh'}, LOCK_EX()|LOCK_NB())) {
15319 # The file is locked: No need to retry
15320 $locked = 1;
15321 last;
15322 } else {
15323 if ($! =~ m/Function not implemented/) {
15324 ::warning("flock: $!",
15325 "Will wait for a random while.");
15326 ::usleep(rand(5000));
15327 # File cannot be locked: No need to retry
15328 $locked = 2;
15329 last;
15333 # Locking failed in first round
15334 # Sleep and try again
15335 $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep);
15336 # Random to avoid every sleeping job waking up at the same time
15337 ::usleep(rand()*$sleep);
15338 $total_sleep += $sleep;
15339 if($opt::semaphoretimeout) {
15340 if($opt::semaphoretimeout > 0
15342 $total_sleep/1000 > $opt::semaphoretimeout) {
15343 # Timeout: Take the semaphore anyway
15344 ::warning("Semaphore timed out. Taking the semaphore.");
15345 $locked = 3;
15346 last;
15348 if($opt::semaphoretimeout < 0
15350 $total_sleep/1000 > -$opt::semaphoretimeout) {
15351 # Timeout: Exit
15352 ::warning("Semaphore timed out. Exiting.");
15353 $locked = 4;
15354 last;
15356 } else {
15357 if($total_sleep/1000 > 30) {
15358 ::warning("Semaphore stuck for 30 seconds. ".
15359 "Consider using --semaphoretimeout.");
15363 ::debug("run", "locked $self->{'lockfile'}");
15366 sub unlock($) {
15367 my $self = shift;
15368 ::rm($self->{'lockfile'});
15369 close $self->{'lockfh'};
15370 ::debug("run", "unlocked\n");
15373 # Keep perl -w happy
15375 $opt::x = $Semaphore::timeout = $Semaphore::wait =
15376 $Job::file_descriptor_warning_printed = $Global::envdef = @Arg::arg =
15377 $Global::max_slot_number = $opt::session;
15379 package main;
15382 sub main() {
15383 unpack_combined_executable();
15384 save_stdin_stdout_stderr();
15385 save_original_signal_handler();
15386 parse_options();
15387 ::debug("init", "Open file descriptors: ", join(" ",keys %Global::fh), "\n");
15388 my $number_of_args;
15389 if($Global::max_number_of_args) {
15390 $number_of_args = $Global::max_number_of_args;
15391 } elsif ($opt::X or $opt::m or $opt::xargs) {
15392 $number_of_args = undef;
15393 } else {
15394 $number_of_args = 1;
15397 my @command = @ARGV;
15398 my @input_source_fh;
15399 if($opt::pipepart) {
15400 if($opt::tee) {
15401 @input_source_fh = map { open_or_exit("<",$_) } @opt::a;
15402 # Remove the first: It will be the file piped.
15403 shift @input_source_fh;
15404 if(not @input_source_fh and not $opt::pipe) {
15405 @input_source_fh = (*STDIN);
15407 } else {
15408 # -a is used for data - not for command line args
15409 @input_source_fh = map { open_or_exit("<",$_) } "/dev/null";
15411 } else {
15412 @input_source_fh = map { open_or_exit("<",$_) } @opt::a;
15413 if(not @input_source_fh and not $opt::pipe) {
15414 @input_source_fh = (*STDIN);
15418 if($opt::skip_first_line) {
15419 # Skip the first line for the first file handle
15420 my $fh = $input_source_fh[0];
15421 <$fh>;
15424 set_input_source_header(\@command,\@input_source_fh);
15425 if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) {
15426 # Parallel check all hosts are up. Remove hosts that are down
15427 filter_hosts();
15429 if($opt::sqlmaster and $opt::sqlworker) {
15430 # Start a real --sqlworker in the background later
15431 $Global::start_sqlworker = 1;
15432 $opt::sqlworker = undef;
15435 $Global::start_time = ::now();
15436 if($opt::nonall or $opt::onall) {
15437 onall(\@input_source_fh,@command);
15438 wait_and_exit(min(undef_as_zero($Global::exitstatus),254));
15441 $Global::JobQueue = JobQueue->new(
15442 \@command, \@input_source_fh, $Global::ContextReplace,
15443 $number_of_args, \@Global::transfer_files, \@Global::ret_files,
15444 \@Global::template_names, \@Global::template_contents
15447 if($opt::sqlmaster) {
15448 # Create SQL table to hold joblog + output
15449 # Figure out how many arguments are in a job
15450 # (It is affected by --colsep, -N, $number_source_fh)
15451 my $record_queue = $Global::JobQueue->{'commandlinequeue'}{'arg_queue'};
15452 my $record = $record_queue->get();
15453 my $no_of_values = $number_of_args * (1+$#{$record});
15454 $record_queue->unget($record);
15455 $Global::sql->create_table($no_of_values);
15456 if($opt::sqlworker) {
15457 # Start a real --sqlworker in the background later
15458 $Global::start_sqlworker = 1;
15459 $opt::sqlworker = undef;
15463 if($opt::pipepart) {
15464 pipepart_setup();
15465 } elsif($opt::pipe) {
15466 if($opt::tee) {
15467 pipe_tee_setup();
15468 } elsif($opt::shard or $opt::bin) {
15469 pipe_shard_setup();
15470 } elsif($opt::groupby) {
15471 pipe_group_by_setup();
15475 if($opt::eta or $opt::bar or $opt::shuf or $Global::halt_pct) {
15476 # Count the number of jobs or shuffle all jobs
15477 # before starting any.
15478 # Must be done after ungetting any --pipepart jobs.
15479 $Global::JobQueue->total_jobs();
15481 # Compute $Global::max_jobs_running
15482 # Must be done after ungetting any --pipepart jobs.
15483 max_jobs_running();
15484 init_run_jobs();
15485 my $sem;
15486 if($Global::semaphore) {
15487 $sem = acquire_semaphore();
15489 $SIG{TERM} = $Global::original_sig{TERM};
15490 $SIG{HUP} = \&start_no_new_jobs;
15492 if($opt::progress) {
15493 ::status_no_nl(init_progress());
15495 if($opt::tee or $opt::shard or $opt::bin) {
15496 # All jobs must be running in parallel for --tee/--shard/--bin
15497 while(start_more_jobs()) {}
15498 $Global::start_no_new_jobs = 1;
15499 if(not $Global::JobQueue->empty()) {
15500 if($opt::tee) {
15501 ::error("--tee requires --jobs to be higher. Try --jobs 0.");
15502 } elsif($opt::bin) {
15503 ::error("--bin requires --jobs to be higher than the number of",
15504 "arguments. Increase --jobs.");
15505 } elsif($opt::shard) {
15506 ::error("--shard requires --jobs to be higher than the number of",
15507 "arguments. Increase --jobs.");
15508 } else {
15509 ::die_bug("--bin/--shard/--tee should not get here");
15511 ::wait_and_exit(255);
15513 } elsif($opt::pipe and not $opt::pipepart and not $opt::semaphore) {
15514 # Fill all jobslots
15515 while(start_more_jobs()) {}
15516 spreadstdin();
15517 } else {
15518 # Reap the finished jobs and start more
15519 while(reapers() + start_more_jobs()) {}
15521 ::debug("init", "Start draining\n");
15522 drain_job_queue(@command);
15523 ::debug("init", "Done draining\n");
15524 reapers();
15525 ::debug("init", "Done reaping\n");
15526 if($Global::semaphore) { $sem->release(); }
15527 cleanup();
15528 ::debug("init", "Halt\n");
15529 halt();
15532 main();