3 # Copyright (C) 2007-2019 Ole Tange and Free Software Foundation, Inc.
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, see <http://www.gnu.org/licenses/>
17 # or write to the Free Software Foundation, Inc., 51 Franklin St,
18 # Fifth Floor, Boston, MA 02110-1301 USA
20 # open3 used in Job::start
22 # &WNOHANG used in reaper
23 use POSIX
qw(:sys_wait_h setsid ceil :errno_h);
24 # gensym used in Job::start
25 use Symbol
qw(gensym);
26 # tempfile used in Job::start
27 use File
::Temp
qw(tempfile tempdir);
28 # mkpath used in openresultsfile
30 # GetOptions used in get_options_from_array
32 # Used to ensure code quality
36 sub set_input_source_header
($$) {
37 my ($command_ref,$input_source_fh_ref) = @_;
38 if($opt::header
and not $opt::pipe) {
39 # split with colsep or \t
40 # $header force $colsep = \t if undef?
41 my $delimiter = defined $opt::colsep ?
$opt::colsep
: "\t";
43 my $left = "\Q$Global::parensleft\E";
44 my $l = $Global::parensleft
;
46 my $right = "\Q$Global::parensright\E";
47 my $r = $Global::parensright
;
49 for my $fh (@
$input_source_fh_ref) {
52 ::debug
("init", "Delimiter: '$delimiter'");
53 for my $s (split /$delimiter/o, $line) {
54 ::debug
("init", "Colname: '$s'");
55 # Replace {colname} with {2}
56 for(@
$command_ref, @Global::ret_files
,
57 @Global::transfer_files
, $opt::tagstring
,
58 $opt::workdir
, $opt::results
, $opt::retries
) {
61 s
:\
{$s(|/|//|\.|/\
.)\
}:\
{$id$1\
}:g
;
62 # {=header1 ... =} => {=1 ... =}
63 s
:$left $s (.*?
) $right:$l$id$1$r:gx
;
65 $Global::input_source_header
{$id} = $s;
71 for my $fh (@
$input_source_fh_ref) {
72 $Global::input_source_header
{$id} = $id;
78 sub max_jobs_running
() {
79 # Compute $Global::max_jobs_running as the max number of jobs
80 # running on each sshlogin.
82 # $Global::max_jobs_running
83 if(not $Global::max_jobs_running
) {
84 for my $sshlogin (values %Global::host
) {
85 $sshlogin->max_jobs_running();
88 if(not $Global::max_jobs_running
) {
89 ::error
("Cannot run any jobs.");
92 return $Global::max_jobs_running
;
97 # wait for children to complete
99 if($opt::halt
and $Global::halt_when
ne "never") {
100 if(not defined $Global::halt_exitstatus
) {
101 if($Global::halt_pct
) {
102 $Global::halt_exitstatus
=
103 ::ceil
($Global::total_failed
/
104 ($Global::total_started
|| 1) * 100);
105 } elsif($Global::halt_count
) {
106 $Global::halt_exitstatus
=
107 ::min
(undef_as_zero
($Global::total_failed
),101);
110 wait_and_exit
($Global::halt_exitstatus
);
112 wait_and_exit
(min
(undef_as_zero
($Global::exitstatus
),101));
117 sub __PIPE_MODE__
() {}
120 sub pipepart_setup
() {
121 # Compute the blocksize
122 # Generate the commands to extract the blocks
123 # Push the commands on queue
125 # @Global::cat_prepends
128 # Prepend each command with
130 my $cat_string = "< ".Q
($opt::a
[0]);
131 for(1..$Global::JobQueue
->total_jobs()) {
132 push @Global::cat_appends
, $cat_string;
133 push @Global::cat_prepends
, "";
136 if(not $opt::blocksize
) {
137 # --blocksize with 10 jobs per jobslot
138 $opt::blocksize
= -10;
140 if($opt::roundrobin
) {
141 # --blocksize with 1 job per jobslot
142 $opt::blocksize
= -1;
144 if($opt::blocksize
< 0) {
151 $size += size_of_block_dev
($_);
153 ::error
("$_ is neither a file nor a block device");
157 # Run in total $job_slots*(- $blocksize) jobs
158 # Set --blocksize = size / no of proc / (- $blocksize)
159 $Global::dummy_jobs
= 1;
160 $Global::blocksize
= 1 +
161 int($size / max_jobs_running() / -$opt::blocksize
);
163 @Global::cat_prepends
= map { pipe_part_files
($_) } @opt::a
;
164 # Unget the empty arg as many times as there are parts
165 $Global::JobQueue
->{'commandlinequeue'}{'arg_queue'}->unget(
166 map { [Arg
->new("\0noarg")] } @Global::cat_prepends
171 sub pipe_tee_setup
() {
172 # Create temporary fifos
173 # Run 'tee fifo1 fifo2 fifo3 ... fifoN' in the background
174 # This will spread the input to fifos
175 # Generate commands that reads from fifo1..N:
176 # cat fifo | user_command
178 # @Global::cat_prepends
180 for(1..$Global::JobQueue
->total_jobs()) {
181 push @fifos, tmpfifo
();
183 # cat foo | tee fifo1 fifo2 fifo3 fifo4 fifo5 > /dev/null
185 # Let tee inherit our stdin
186 # and redirect stdout to null
187 open STDOUT
, ">","/dev/null";
191 # (rm fifo1; grep 1) < fifo1
192 # (rm fifo2; grep 2) < fifo2
193 # (rm fifo3; grep 3) < fifo3
194 # Remove the tmpfifo as soon as it is open
195 @Global::cat_prepends
= map { "(rm $_;" } @fifos;
196 @Global::cat_appends
= map { ") < $_" } @fifos;
200 sub parcat_script
() {
201 # TODO if script fails: Use parallel -j0 --plain --lb cat ::: fifos
203 use POSIX qw(:errno_h);
208 use Fcntl
qw(:DEFAULT :flock);
211 my $q = Thread
::Queue
->new();
212 my $okq = Thread
::Queue
->new();
218 print " parcat file(s)\n";
219 print " cat argfile | parcat\n";
221 # Read arguments from stdin
222 chomp(@ARGV = <STDIN
>);
225 my $files_to_open = 0;
226 # Default: fd = stdout
229 # --rm = remove file when opened
230 /^--rm$/ and do { $opt::rm
= 1; next; };
231 # -1 = output to fd 1, -2 = output to fd 2
232 /^-(\d+)$/ and do { $fd = $1; next; };
233 push @producers, threads
->create("producer", $_, $fd);
238 # Open a file/fifo, set non blocking, enqueue fileno of the file handle
240 my $output_fd = shift;
241 open(my $fh, "<", $file) || do {
242 print STDERR
"parcat: Cannot open $file\n";
245 # Remove file when it has been opened
249 set_fh_non_blocking
($fh);
251 # Pass the fileno to parent
252 $q->enqueue(fileno($fh),$output_fd);
253 # Get an OK that the $fh is opened and we can release the $fh
255 my $ok = $okq->dequeue();
256 if($ok == fileno($fh)) { last; }
257 # Not ours - very unlikely to happen
263 my $s = IO
::Select
->new();
269 open(my $infh, "<&=", $infd) || die;
270 open(my $outfh, ">&=", $outfd) || die;
272 # Tell the producer now opened here and can be released
273 $okq->enqueue($infd);
274 # Initialize the buffer
275 @
{$buffer{$infh}{$outfd}} = ();
276 $Global::fh
{$outfd} = $outfh;
280 # Non-blocking dequeue
283 ($infd,$outfd) = $q->dequeue_nb(2);
284 if(defined($outfd)) {
285 add_file
($infd,$outfd);
287 } while(defined($outfd));
290 sub add_files_block
{
292 my ($infd,$outfd) = $q->dequeue(2);
293 add_file
($infd,$outfd);
298 my (@ready,$infh,$rv,$buf);
300 # Wait until at least one file is opened
302 while($q->pending or keys %buffer) {
304 while(keys %buffer) {
305 @ready = $s->can_read(0.01);
310 # There is only one key, namely the output file descriptor
311 for my $outfd (keys %{$buffer{$infh}}) {
312 $rv = sysread($infh, $buf, 65536);
315 # Would block: Nothing read
318 # Nothing read, but would not block:
321 for(@
{$buffer{$infh}{$outfd}}) {
322 syswrite($Global::fh
{$outfd},$_);
324 delete $buffer{$infh};
325 # Closing the $infh causes it to block
332 # Find \n or \r for full line
333 my $i = (rindex($buf,"\n")+1);
336 for(@
{$buffer{$infh}{$outfd}}, substr($buf,0,$i)) {
337 syswrite($Global::fh
{$outfd},$_);
339 # @buffer = remaining half line
340 $buffer{$infh}{$outfd} = [substr($buf,$i,$rv-$i)];
342 # Something read, but not a full line
343 push @
{$buffer{$infh}{$outfd}}, $buf;
350 } while($opened < $files_to_open);
356 sub set_fh_non_blocking
{
357 # Set filehandle as non-blocking
359 # $fh = filehandle to be blocking
364 fcntl($fh, &F_GETFL
, $flags) || die $!; # Get the current flags on the filehandle
365 $flags |= &O_NONBLOCK
; # Add non-blocking to the flags
366 fcntl($fh, &F_SETFL
, $flags) || die $!; # Set the flags on the filehandle
369 return ::spacefree(3, $script);
372 sub sharder_script() {
377 # Which columns to shard on (count from 1)
379 # Which columns to shard on (count from 0)
382 # Open fifos for writing, fh{0..$bins}
386 open $fh{$t++}, ">", $_;
387 # open blocks until it is opened by reader
388 # so unlink only happens when it is ready
392 # Split into $col columns (no need to split into more)
393 @F = split $sep, $_, $col+1;
394 $fh = $fh{ hex(B::hash($F[$col0]))%$bins };
397 # Close all open fifos
400 return ::spacefree(1, $script);
403 sub pipe_shard_setup() {
404 # Create temporary fifos
405 # Run 'shard
.pl sep col fifo1 fifo2 fifo3
... fifoN
' in the background
406 # This will spread the input to fifos
407 # Generate commands that reads from fifo1..N:
408 # cat fifo | user_command
410 # @Global::cat_prepends
413 # TODO $opt::jobs should be evaluated (100%)
414 # TODO $opt::jobs should be number of total_jobs if there are argugemts
415 my $njobs = $opt::jobs;
416 for my $m (0..$njobs-1) {
417 for my $n (0..$njobs-1) {
418 # sharding to A B C D
419 # parcatting all As together
420 $parcatfifos[$n][$m] = $shardfifos[$m][$n] = tmpfifo();
423 my $script = sharder_script();
424 # cat foo | sharder sep col fifo1 fifo2 fifo3 ... fifoN
427 # Let the sharder inherit our stdin
428 # and redirect stdout to null
429 open STDOUT, ">","/dev/null";
430 # The PERL_HASH_SEED must be the same for all sharders
431 # so B::hash will return the same value for any given input
432 $ENV{'PERL_HASH_SEED
'} = $$;
433 exec qw(parallel --block 100k -q --pipe -j), $njobs,
434 qw(--roundrobin -u perl -e), $script, ($opt::colsep
|| ","),
435 $opt::shard
, '{}', (map { (':::+', @
{$_}) } @parcatfifos);
438 # (rm fifo1; grep 1) < fifo1
439 # (rm fifo2; grep 2) < fifo2
440 # (rm fifo3; grep 3) < fifo3
441 my $parcat = Q
(parcat_script
());
443 ::error
("'parcat' must be in path.");
444 ::wait_and_exit
(255);
446 @Global::cat_prepends
= map { "perl -e $parcat @$_ | " } @parcatfifos;
449 sub pipe_part_files
(@
) {
451 # find header and split positions
452 # make commands that 'cat's the partial file
454 # $file = the file to read
456 # @commands that will cat_partial each part
459 if(not -f
$file and not -b
$file) {
460 ::error
("$file is not a seekable file.");
461 ::wait_and_exit
(255);
463 my $header = find_header
(\
$buf,open_or_exit
($file));
465 my @pos = find_split_positions
($file,$Global::blocksize
,length $header);
467 my @cat_prepends = ();
468 for(my $i=0; $i<$#pos; $i++) {
470 cat_partial
($file, 0, length($header), $pos[$i], $pos[$i+1]));
472 return @cat_prepends;
475 sub find_header
($$) {
476 # Compute the header based on $opt::header
478 # $buf_ref = reference to read-in buffer
479 # $fh = filehandle to read from
486 my ($buf_ref, $fh) = @_;
488 # $Global::header may be set in group_by_loop()
489 if($Global::header
) { return $Global::header
}
491 if($opt::header
eq ":") { $opt::header
= "(.*\n)"; }
492 # Number = number of lines
493 $opt::header
=~ s/^(\d+)$/"(.*\n)"x$1/e;
494 while(read($fh,substr($$buf_ref,length $$buf_ref,0),
495 $Global::blocksize
)) {
496 if($$buf_ref =~ s/^($opt::header)//) {
505 sub find_split_positions
($$$) {
506 # Find positions in bigfile where recend is followed by recstart
508 # $file = the file to read
509 # $block = (minimal) --block-size of each chunk
510 # $headerlen = length of header to be skipped
515 # @positions of block start/end
516 my($file, $block, $headerlen) = @_;
519 # $file is a blockdevice
520 $size = size_of_block_dev
($file);
523 # The optimal dd blocksize for mint, redhat, solaris, openbsd = 2^17..2^20
524 # The optimal dd blocksize for freebsd = 2^15..2^17
525 my $dd_block_size = 131072; # 2^17
527 my ($recstart,$recend) = recstartrecend
();
528 my $recendrecstart = $recend.$recstart;
529 my $fh = ::open_or_exit
($file);
530 push(@pos,$headerlen);
531 for(my $pos = $block+$headerlen; $pos < $size; $pos += $block) {
533 if($recendrecstart eq "") {
534 # records ends anywhere
537 # Seek the the block start
538 seek($fh, $pos, 0) || die;
539 while(read($fh,substr($buf,length $buf,0),$dd_block_size)) {
541 # If match /$recend$recstart/ => Record position
542 if($buf =~ m
:^(.*$recend)$recstart:os
) {
543 # Start looking for next record _after_ this match
549 # If match $recend$recstart => Record position
550 # TODO optimize to only look at the appended
551 # $dd_block_size + len $recendrecstart
552 # TODO increase $dd_block_size to optimize for longer records
553 my $i = index64
(\
$buf,$recendrecstart);
555 # Start looking for next record _after_ this match
556 $pos += $i + length($recend);
564 if($pos[$#pos] != $size) {
565 # Last splitpoint was not at end of the file: add it
572 sub cat_partial
($@
) {
573 # Efficient command to copy from byte X to byte Y
575 # $file = the file to read
576 # ($start, $end, [$start2, $end2, ...]) = start byte, end byte
578 # Efficient command to copy $start..$end, $start2..$end2, ... to stdout
579 my($file, @start_end) = @_;
581 # Convert (start,end) to (start,len)
582 my @start_len = map {
583 if(++$i % 2) { $start = $_; } else { $_-$start }
585 # This can read 7 GB/s using a single core
586 my $script = spacefree
590 sysseek(STDIN,shift,0) || die;
593 sysread(STDIN,$buf, $left > 131072 ? 131072 : $left)){
595 syswrite(STDOUT,$buf);
599 return "<". Q($file) .
600 " perl -e '$script' @start_len |";
603 sub group_by_loop($) {
604 # Generate perl code for group-by loop
605 # Insert a $recsep when the column value changes
606 # The column value can be computed with $perexpr
608 my $groupby = $opt::groupby;
610 if($groupby =~ /^[a-z0-9_]+(\s|$)/i) {
611 # Column name/number (possibly prefix)
612 if($groupby =~ s/^(\d+)\s*//) {
613 # Column number (possibly prefix)
615 } elsif($groupby =~ s/^([a-z0-9_]+)\s*//) {
616 # Column name (possibly prefix)
618 my($read,$char,@line,$header);
619 # A full line, but nothing more (the rest must be read by the child)
621 $read = sysread(STDIN,$char,1);
623 } while($read and $char ne "\n");
624 # $Global::header used to prepend block to each job
625 $Global::header = join "", @line;
626 # Split on --copsep pattern
627 my @headers = split/$opt::colsep/, $Global::header;
629 # Numbered 0..n-1 due to being used by $F[n]
630 @headers{@headers} = (0..$#headers);
631 $col = $headers{$colname};
632 if(not defined $col) {
633 ::error("Column '$colname' $opt::colsep not found in header",keys %headers);
634 ::wait_and_exit(255);
638 # What is left of $groupby is $perlexpr
639 $perlexpr = $groupby;
641 my $loop = ::spacefree(0,'{
644 if(! defined $last) { $last = $_ }
652 $loop =~ s/COLVALUE/\$F[$col]/g;
654 $loop =~ s/COLVALUE/\$_/g;
656 $loop =~ s/PERLEXPR/$perlexpr/g;
657 $loop =~ s/RECSEP/$recsep/g;
661 sub group_by_stdin_filter() {
662 # Record separator with 119 bit random value
665 join "", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..20);
666 $opt::remove_rec_sep = 1;
668 push @filter, "perl";
669 if($opt::groupby =~ /^[a-z0-9_]+(\s|$)/i) {
670 # This is column number/name
671 # Use -a (auto-split)
673 $opt::colsep ||= "\t";
674 my $sep = $opt::colsep;
677 push @filter, "-F$sep";
680 push @filter, group_by_loop($opt::recstart);
681 ::debug("init", "@filter\n");
682 open(STDIN, '-|', @filter) || die ("Cannot start @filter");
687 # Spawn a job and print the record to it.
693 # $Global::max_number_of_args
695 # $Global::start_no_new_jobs
701 my ($recstart,$recend) = recstartrecend();
702 my $recendrecstart = $recend.$recstart;
703 my $chunk_number = 1;
704 my $one_time_through;
705 my $two_gb = 2**31-1;
706 my $blocksize = $Global::blocksize;
708 my $header = find_header(\$buf,$in);
710 my $anything_written = 0;
711 my $buflen = length $buf;
712 my $readsize = ($buflen < $blocksize) ? $blocksize-$buflen : $blocksize;
713 # If $buf < $blocksize, append so it is $blocksize long after reading.
714 # Otherwise append a full $blocksize
715 if(not read($in,substr($buf,$buflen,0),$readsize)) {
717 $chunk_number != 1 and last;
718 # Force the while-loop once if everything was read by header reading
719 $one_time_through++ and last;
723 $buf =~ s/^\s*\n//gm;
724 if(length $buf == 0) {
728 if($Global::max_lines and not $Global::max_number_of_args) {
729 # Read n-line records
730 my $n_lines = $buf =~ tr/\n/\n/;
731 my $last_newline_pos = rindex64(\$buf,"\n");
732 # Go backwards until there are full n-line records
733 while($n_lines % $Global::max_lines) {
735 $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1);
737 # Chop at $last_newline_pos as that is where n-line record ends
739 write_record_to_pipe($chunk_number++,\$header,\$buf,
740 $recstart,$recend,$last_newline_pos+1);
741 shorten(\$buf,$last_newline_pos+1);
742 } elsif($opt::regexp) {
743 if($Global::max_number_of_args) {
744 # -N => (start..*?end){n}
745 # -L -N => (start..*?end){n*l}
746 my $read_n_lines = -1+
747 $Global::max_number_of_args * ($Global::max_lines || 1);
748 # (?!negative lookahead) is needed to avoid backtracking
749 # See: https://unix.stackexchange.com/questions/439356/
752 # Either recstart or at least one char from start
755 (?:(?!$recend$recstart).)*?
758 # Then n-1 times recstart.*recend
759 (?:$recstart(?:(?!$recend$recstart).)*?$recend){$read_n_lines}
761 # Followed by recstart
764 write_record_to_pipe($chunk_number++,\$header,\$buf,
765 $recstart,$recend,length $1);
766 shorten(\$buf,length $1);
770 # Find the last recend-recstart in $buf
771 if($buf =~ /^(.*$recend)$recstart.*?$/os) {
773 write_record_to_pipe($chunk_number++,\$header,\$buf,
774 $recstart,$recend,length $1);
775 shorten(\$buf,length $1);
779 # Read a full CSV record
780 # even number of " + end of line
781 my $last_newline_pos = length $buf;
784 $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1);
786 } while((substr($buf,0,$last_newline_pos) =~ y/"/"/)%2
787 and $last_newline_pos >= 0);
788 # Chop at $last_newline_pos as that is where CSV record ends
790 write_record_to_pipe($chunk_number++,\$header,\$buf,
791 $recstart,$recend,$last_newline_pos+1);
792 shorten(\$buf,$last_newline_pos+1);
794 if($Global::max_number_of_args) {
795 # -N => (start..*?end){n}
798 $Global::max_number_of_args * ($Global::max_lines || 1);
799 while(($i = nindex(\$buf,$recendrecstart,$read_n_lines)) != -1
802 $i += length $recend; # find the actual splitting location
804 write_record_to_pipe($chunk_number++,\$header,\$buf,
805 $recstart,$recend,$i);
810 # Find the last recend+recstart in $buf
811 my $i = rindex64(\$buf,$recendrecstart);
813 $i += length $recend; # find the actual splitting location
815 write_record_to_pipe($chunk_number++,\$header,\$buf,
816 $recstart,$recend,$i);
821 if(not $anything_written
823 and not $Global::no_autoexpand_block) {
824 # Nothing was written - maybe the block size < record size?
825 # Increase blocksize exponentially up to 2GB-1 (2GB causes problems)
826 if($blocksize < $two_gb) {
827 my $old_blocksize = $blocksize;
828 $blocksize = ::min(ceil($blocksize * 1.3 + 1), $two_gb);
829 ::warning("A record was longer than $old_blocksize. " .
830 "Increasing to --blocksize $blocksize.");
834 ::debug("init", "Done reading input\n");
836 # If there is anything left in the buffer write it
837 write_record_to_pipe($chunk_number++, \$header, \$buf, $recstart,
838 $recend, length $buf);
841 $Global::no_more_input = 1;
842 # We need to start no more jobs: At most we need to retry some
843 # of the already running.
844 my @running = values %Global::running;
846 for my $job (@running) {
847 if(defined $job and $job->virgin()) {
848 close $job->fh(0,"w");
851 # Wait for running jobs to be done
853 while($Global::total_running > 0) {
854 $sleep = ::reap_usleep($sleep);
858 $Global::start_no_new_jobs ||= 1;
859 if($opt::roundrobin) {
860 # Flush blocks to roundrobin procs
862 while(%Global::running) {
863 my $something_written = 0;
864 for my $job (values %Global::running) {
865 if($job->block_length()) {
866 $something_written += $job->non_blocking_write();
868 close $job->fh(0,"w");
871 if($something_written) {
872 $sleep = $sleep/2+0.001;
874 $sleep = ::reap_usleep($sleep);
879 sub recstartrecend() {
884 # $recstart,$recend with default values and regexp conversion
885 my($recstart,$recend);
886 if(defined($opt::recstart) and defined($opt::recend)) {
887 # If both --recstart and --recend is given then both must match
888 $recstart = $opt::recstart;
889 $recend = $opt::recend;
890 } elsif(defined($opt::recstart)) {
891 # If --recstart is given it must match start of record
892 $recstart = $opt::recstart;
894 } elsif(defined($opt::recend)) {
895 # If --recend is given then it must match end of record
897 $recend = $opt::recend;
898 if($opt::regexp and $recend eq '') {
899 # --regexp --recend ''
905 # If $recstart/$recend contains '|'
906 # this should only apply to the regexp
907 $recstart = "(?:".$recstart.")";
908 $recend = "(?:".$recend.")";
910 # $recstart/$recend = printf strings (\n)
911 $recstart =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee;
912 $recend =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee;
914 return ($recstart,$recend);
918 # See if string is in buffer N times
920 # the position where the Nth copy is found
921 my ($buf_ref, $str, $n) = @_;
924 $i = index64($buf_ref,$str,$i+1);
925 if($i == -1) { last }
934 sub round_robin_write($$$$$) {
936 # $header_ref = ref to $header string
937 # $block_ref = ref to $block to be written
938 # $recstart = record start string
939 # $recend = record end string
940 # $endpos = end position of $block
944 # $something_written = amount of bytes written
945 my ($header_ref,$buffer_ref,$recstart,$recend,$endpos) = @_;
947 my $block_passed = 0;
948 while(not $block_passed) {
949 # Continue flushing existing buffers
950 # until one is empty and a new block is passed
952 # Rotate queue once so new blocks get a fair chance
953 # to be given to another block
954 push @robin_queue, shift @robin_queue;
956 # Make a queue to spread the blocks evenly
957 push @robin_queue, (sort { $a->seq() <=> $b->seq() }
958 values %Global::running);
962 for my $job (@robin_queue) {
963 if($job->block_length() > 0) {
964 $written += $job->non_blocking_write();
966 $job->set_block($header_ref, $buffer_ref,
967 $endpos, $recstart, $recend);
970 $written += $job->non_blocking_write();
975 $sleep = $sleep/1.5+0.001;
977 # Don't sleep if something is written
978 } while($written and not $block_passed);
979 $sleep = ::reap_usleep($sleep);
986 # Do index on strings > 2GB.
987 # index in Perl < v5.22 does not work for > 2GB
989 # as index except STR which must be passed as a reference
994 my $pos = shift || 0;
995 my $block_size = 2**31-1;
996 my $strlen = length($$ref);
997 # No point in doing extra work if we don't need to.
998 if($strlen < $block_size or $] > 5.022) {
999 return index($$ref, $match, $pos);
1002 my $matchlen = length($match);
1005 while($offset < $strlen) {
1007 substr($$ref, $offset, $block_size),
1008 $match, $pos-$offset);
1010 return $ret + $offset;
1012 $offset += ($block_size - $matchlen - 1);
1018 # Do rindex on strings > 2GB.
1019 # rindex in Perl < v5.22 does not work for > 2GB
1021 # as rindex except STR which must be passed as a reference
1027 my $block_size = 2**31-1;
1028 my $strlen = length($$ref);
1029 # Default: search from end
1030 $pos = defined $pos ? $pos : $strlen;
1031 # No point in doing extra work if we don't need to.
1032 if($strlen < $block_size) {
1033 return rindex($$ref, $match, $pos);
1036 my $matchlen = length($match);
1038 my $offset = $pos - $block_size + $matchlen;
1040 # The offset is less than a $block_size
1041 # Set the $offset to 0 and
1042 # Adjust block_size accordingly
1043 $block_size = $block_size + $offset;
1046 while($offset >= 0) {
1048 substr($$ref, $offset, $block_size),
1051 return $ret + $offset;
1053 $offset -= ($block_size - $matchlen - 1);
1059 # Do: substr($buf,0,$i) = "";
1060 # Some Perl versions do not support $i > 2GB, so do this in 2GB chunks
1063 # $i = position to shorten to
1065 my ($buf_ref, $i) = @_;
1066 my $two_gb = 2**31-1;
1067 while($i > $two_gb) {
1068 substr($$buf_ref,0,$two_gb) = "";
1071 substr($$buf_ref,0,$i) = "";
1074 sub write_record_to_pipe($$$$$$) {
1076 # Write record from pos 0 .. $endpos to pipe
1078 # $chunk_number = sequence number - to see if already run
1079 # $header_ref = reference to header string to prepend
1080 # $buffer_ref = reference to record to write
1081 # $recstart = start string of record
1082 # $recend = end string of record
1083 # $endpos = position in $buffer_ref where record ends
1085 # $Global::job_already_run
1087 # @Global::virgin_jobs
1089 # Number of chunks written (0 or 1)
1090 my ($chunk_number, $header_ref, $buffer_ref,
1091 $recstart, $recend, $endpos) = @_;
1092 if($endpos == 0) { return 0; }
1093 if(vec($Global::job_already_run,$chunk_number,1)) { return 1; }
1094 if($opt::roundrobin) {
1095 # Write the block to one of the already running jobs
1096 return round_robin_write($header_ref, $buffer_ref,
1097 $recstart, $recend, $endpos);
1099 # If no virgin found, backoff
1100 my $sleep = 0.0001; # 0.01 ms - better performance on highend
1101 while(not @Global::virgin_jobs) {
1102 ::debug("pipe", "No virgin jobs");
1103 $sleep = ::reap_usleep($sleep);
1104 # Jobs may not be started because of loadavg
1105 # or too little time between each ssh login
1106 # or retrying failed jobs.
1109 my $job = shift @Global::virgin_jobs;
1110 # Job is no longer virgin
1111 $job->set_virgin(0);
1114 # Copy $buffer[0..$endpos] to $job->{'block'}
1116 # Run $job->add_transfersize
1117 $job->set_block($header_ref, $buffer_ref, $endpos,
1118 $recstart, $recend);
1122 $job->write($job->block_ref());
1123 close $job->fh(0,"w");
1127 # We ignore the removed rec_sep which is technically wrong.
1128 $job->add_transfersize($endpos + length $$header_ref);
1132 # Chop of at $endpos as we do not know how many rec_sep will
1134 substr($$buffer_ref,$endpos,length $$buffer_ref) = "";
1136 if($opt::remove_rec_sep) {
1137 Job::remove_rec_sep($buffer_ref, $recstart, $recend);
1139 $job->write($header_ref);
1140 $job->write($buffer_ref);
1141 close $job->fh(0,"w");
1145 close $job->fh(0,"w");
1150 sub __SEM_MODE__() {}
1153 sub acquire_semaphore() {
1154 # Acquires semaphore. If needed: spawns to the background
1158 # The semaphore to be released when jobs is complete
1159 $Global::host{':'} = SSHLogin->new(":");
1160 my $sem = Semaphore->new($Semaphore::name,
1161 $Global::host{':'}->max_jobs_running());
1163 if($Semaphore::fg) {
1169 # If run in the background, the PID will change
1177 sub __PARSE_OPTIONS__() {}
1180 sub options_hash() {
1182 # %hash = the GetOptions config
1184 ("debug|D=s" => \$opt::D,
1185 "xargs" => \$opt::xargs,
1189 "sql=s" => \$opt::retired,
1190 "sqlmaster=s" => \$opt::sqlmaster,
1191 "sqlworker=s" => \$opt::sqlworker,
1192 "sqlandworker=s" => \$opt::sqlandworker,
1193 "joblog|jl=s" => \$opt::joblog,
1194 "results|result|res=s" => \$opt::results,
1195 "resume" => \$opt::resume,
1196 "resume-failed|resumefailed" => \$opt::resume_failed,
1197 "retry-failed|retryfailed" => \$opt::retry_failed,
1198 "silent" => \$opt::silent,
1199 "keep-order|keeporder|k" => \$opt::keeporder,
1200 "no-keep-order|nokeeporder|nok|no-k" => \$opt::nokeeporder,
1201 "group" => \$opt::group,
1202 "g" => \$opt::retired,
1203 "ungroup|u" => \$opt::ungroup,
1204 "linebuffer|linebuffered|line-buffer|line-buffered|lb"
1205 => \$opt::linebuffer,
1206 "tmux" => \$opt::tmux,
1207 "tmuxpane" => \$opt::tmuxpane,
1208 "null|0" => \$opt::null,
1209 "quote|q" => \$opt::q,
1210 # Replacement strings
1211 "parens=s" => \$opt::parens,
1212 "rpl=s" => \@opt::rpl,
1213 "plus" => \$opt::plus,
1215 "extensionreplace|er=s" => \$opt::U,
1216 "U=s" => \$opt::retired,
1217 "basenamereplace|bnr=s" => \$opt::basenamereplace,
1218 "dirnamereplace|dnr=s" => \$opt::dirnamereplace,
1219 "basenameextensionreplace|bner=s" => \$opt::basenameextensionreplace,
1220 "seqreplace=s" => \$opt::seqreplace,
1221 "slotreplace=s" => \$opt::slotreplace,
1222 "jobs|j=s" => \$opt::jobs,
1223 "delay=s" => \$opt::delay,
1224 "sshdelay=f" => \$opt::sshdelay,
1225 "load=s" => \$opt::load,
1226 "noswap" => \$opt::noswap,
1227 "max-line-length-allowed" => \$opt::max_line_length_allowed,
1228 "number-of-cpus" => \$opt::number_of_cpus,
1229 "number-of-sockets" => \$opt::number_of_sockets,
1230 "number-of-cores" => \$opt::number_of_cores,
1231 "number-of-threads" => \$opt::number_of_threads,
1232 "use-sockets-instead-of-threads"
1233 => \$opt::use_sockets_instead_of_threads,
1234 "use-cores-instead-of-threads"
1235 => \$opt::use_cores_instead_of_threads,
1236 "use-cpus-instead-of-cores" => \$opt::use_cpus_instead_of_cores,
1237 "shellquote|shell_quote|shell-quote" => \@opt::shellquote,
1238 "nice=i" => \$opt::nice,
1239 "tag" => \$opt::tag,
1240 "tagstring|tag-string=s" => \$opt::tagstring,
1241 "onall" => \$opt::onall,
1242 "nonall" => \$opt::nonall,
1243 "filter-hosts|filterhosts|filter-host" => \$opt::filter_hosts,
1244 "sshlogin|S=s" => \@opt::sshlogin,
1245 "sshloginfile|slf=s" => \@opt::sshloginfile,
1246 "controlmaster|M" => \$opt::controlmaster,
1247 "ssh=s" => \$opt::ssh,
1248 "transfer-file|transferfile|transfer-files|transferfiles|tf=s"
1249 => \@opt::transfer_files,
1250 "return=s" => \@opt::return,
1251 "trc=s" => \@opt::trc,
1252 "transfer" => \$opt::transfer,
1253 "cleanup" => \$opt::cleanup,
1254 "basefile|bf=s" => \@opt::basefile,
1255 "B=s" => \$opt::retired,
1256 "ctrlc|ctrl-c" => \$opt::retired,
1257 "noctrlc|no-ctrlc|no-ctrl-c" => \$opt::retired,
1258 "workdir|work-dir|wd=s" => \$opt::workdir,
1259 "W=s" => \$opt::retired,
1260 "rsync-opts|rsyncopts=s" => \$opt::rsync_opts,
1261 "tmpdir|tempdir=s" => \$opt::tmpdir,
1262 "use-compress-program|compress-program=s" => \$opt::compress_program,
1263 "use-decompress-program|decompress-program=s"
1264 => \$opt::decompress_program,
1265 "compress" => \$opt::compress,
1266 "tty" => \$opt::tty,
1267 "T" => \$opt::retired,
1268 "H=i" => \$opt::retired,
1269 "dry-run|dryrun|dr" => \$opt::dryrun,
1270 "progress" => \$opt::progress,
1271 "eta" => \$opt::eta,
1272 "bar" => \$opt::bar,
1273 "shuf" => \$opt::shuf,
1274 "arg-sep|argsep=s" => \$opt::arg_sep,
1275 "arg-file-sep|argfilesep=s" => \$opt::arg_file_sep,
1276 "trim=s" => \$opt::trim,
1277 "env=s" => \@opt::env,
1278 "recordenv|record-env" => \$opt::record_env,
1279 "session" => \$opt::session,
1280 "plain" => \$opt::plain,
1281 "profile|J=s" => \@opt::profile,
1282 "pipe|spreadstdin" => \$opt::pipe,
1283 "robin|round-robin|roundrobin" => \$opt::roundrobin,
1284 "recstart=s" => \$opt::recstart,
1285 "recend=s" => \$opt::recend,
1286 "regexp|regex" => \$opt::regexp,
1287 "remove-rec-sep|removerecsep|rrs" => \$opt::remove_rec_sep,
1288 "files|output-as-files|outputasfiles" => \$opt::files,
1289 "block|block-size|blocksize=s" => \$opt::blocksize,
1290 "tollef" => \$opt::tollef,
1291 "gnu" => \$opt::gnu,
1292 "link|xapply" => \$opt::link,
1293 "linkinputsource|xapplyinputsource=i" => \@opt::linkinputsource,
1294 # Before changing this line, please read
1295 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
1296 "bibtex|citation" => \$opt::citation,
1297 "wc|willcite|will-cite|nn|nonotice|no-notice" => \$opt::willcite,
1298 # Termination and retries
1299 "halt-on-error|halt=s" => \$opt::halt,
1300 "limit=s" => \$opt::limit,
1301 "memfree=s" => \$opt::memfree,
1302 "retries=s" => \$opt::retries,
1303 "timeout=s" => \$opt::timeout,
1304 "termseq|term-seq=s" => \$opt::termseq,
1305 # xargs-compatibility - implemented, man, testsuite
1306 "max-procs|P=s" => \$opt::jobs,
1307 "delimiter|d=s" => \$opt::d,
1308 "max-chars|s=i" => \$opt::max_chars,
1309 "arg-file|a=s" => \@opt::a,
1310 "no-run-if-empty|r" => \$opt::r,
1311 "replace|i:s" => \$opt::i,
1312 "E=s" => \$opt::eof,
1313 "eof|e:s" => \$opt::eof,
1314 "max-args|maxargs|n=i" => \$opt::max_args,
1315 "max-replace-args|N=i" => \$opt::max_replace_args,
1316 "colsep|col-sep|C=s" => \$opt::colsep,
1318 "help|h" => \$opt::help,
1320 "max-lines|l:f" => \$opt::max_lines,
1321 "interactive|p" => \$opt::interactive,
1322 "verbose|t" => \$opt::verbose,
1323 "version|V" => \$opt::version,
1324 "minversion|min-version=i" => \$opt::minversion,
1325 "show-limits|showlimits" => \$opt::show_limits,
1326 "exit|x" => \$opt::x,
1328 "semaphore" => \$opt::semaphore,
1329 "semaphoretimeout|st=i" => \$opt::semaphoretimeout,
1330 "semaphorename|id=s" => \$opt::semaphorename,
1333 "wait" => \$opt::wait,
1334 # Shebang #!/usr/bin/parallel --shebang
1335 "shebang|hashbang" => \$opt::shebang,
1336 "internal-pipe-means-argfiles"
1337 => \$opt::internal_pipe_means_argfiles,
1338 "Y" => \$opt::retired,
1339 "skip-first-line" => \$opt::skip_first_line,
1340 "bug" => \$opt::bug,
1341 "header=s" => \$opt::header,
1342 "cat" => \$opt::cat,
1343 "fifo" => \$opt::fifo,
1344 "pipepart|pipe-part" => \$opt::pipepart,
1345 "tee" => \$opt::tee,
1346 "shard=s" => \$opt::shard,
1347 "groupby|group-by=s" => \$opt::groupby,
1348 "hgrp|hostgrp|hostgroup|hostgroups" => \$opt::hostgroups,
1349 "embed" => \$opt::embed,
1353 sub get_options_from_array($@) {
1354 # Run GetOptions on @array
1356 # $array_ref = ref to @ARGV to parse
1357 # @keep_only = Keep only these options
1361 # true if parsing worked
1362 # false if parsing failed
1363 # @$array_ref is changed
1364 my ($array_ref, @keep_only) = @_;
1365 if(not @$array_ref) {
1366 # Empty array: No need to look more at that
1369 # A bit of shuffling of @ARGV needed as GetOptionsFromArray is not
1370 # supported everywhere
1372 my $this_is_ARGV = (\@::ARGV == $array_ref);
1373 if(not $this_is_ARGV) {
1374 @save_argv = @::ARGV;
1375 @::ARGV = @{$array_ref};
1377 # If @keep_only set: Ignore all values except @keep_only
1378 my %options = options_hash();
1381 @keep{@keep_only} = @keep_only;
1382 for my $k (grep { not $keep{$_} } keys %options) {
1383 # Store the value of the option in @dummy
1384 $options{$k} = \@dummy;
1387 my $retval = GetOptions(%options);
1388 if(not $this_is_ARGV) {
1389 @{$array_ref} = @::ARGV;
1390 @::ARGV = @save_argv;
1395 sub parse_options(@) {
1398 my @argv_before = @ARGV;
1399 @ARGV = read_options();
1401 # Before changing this line, please read
1402 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
1403 if(defined $opt::citation) {
1404 citation(\@argv_before,\@ARGV);
1408 if($opt::nokeeporder) { $opt::keeporder = undef; }
1410 if(@opt::v) { $Global::verbose = $#opt::v+1; } # Convert -v -v to v=2
1411 if($opt::bug) { ::die_bug("test-bug"); }
1412 $Global::debug = $opt::D;
1413 $Global::shell = $ENV{'PARALLEL_SHELL'} || parent_shell($$)
1414 || $ENV{'SHELL'} || "/bin/sh";
1415 if(not -x $Global::shell and not which($Global::shell)) {
1416 ::error("Shell '$Global::shell' not found.");
1419 ::debug("init","Global::shell $Global::shell\n");
1420 $Global::cshell = $Global::shell =~ m:(/[-a-z]*)?csh:;
1421 if(defined $opt::X) { $Global::ContextReplace = 1; }
1422 if(defined $opt::silent) { $Global::verbose = 0; }
1423 if(defined $opt::null) { $/ = "\0"; }
1424 if(defined $opt::d) { $/ = unquote_printf($opt::d) }
1425 if(defined $opt::tagstring) {
1426 $opt::tagstring = unquote_printf($opt::tagstring);
1428 if(defined $opt::interactive) { $Global::interactive = $opt::interactive; }
1429 if(defined $opt::q) { $Global::quoting = 1; }
1430 if(defined $opt::r) { $Global::ignore_empty = 1; }
1431 if(defined $opt::verbose) { $Global::stderr_verbose = 1; }
1432 parse_replacement_string_options();
1433 if(defined $opt::eof) { $Global::end_of_file_string = $opt::eof; }
1434 if(defined $opt::max_args) {
1435 $Global::max_number_of_args = $opt::max_args;
1437 if(defined $opt::timeout) {
1438 $Global::timeoutq = TimeoutQueue->new($opt::timeout);
1440 if(defined $opt::tmpdir) { $ENV{'TMPDIR'} = $opt::tmpdir; }
1441 $ENV{'PARALLEL_RSYNC_OPTS'} = $opt::rsync_opts ||
1442 $ENV{'PARALLEL_RSYNC_OPTS'} || '-rlDzR';
1444 if(defined $opt::help) { usage(); exit(0); }
1445 if(defined $opt::embed) { embed(); exit(0); }
1446 if(defined $opt::sqlandworker) {
1447 $opt::sqlmaster = $opt::sqlworker = $opt::sqlandworker;
1449 if(defined $opt::tmuxpane) { $opt::tmux = $opt::tmuxpane; }
1450 if(defined $opt::colsep) { $Global::trim = 'lr'; }
1451 if(defined $opt::csv) {
1452 $Global::use{"Text::CSV"} ||= eval "use Text::CSV; 1;";
1453 $opt::colsep = defined $opt::colsep ? $opt::colsep : ",";
1454 my $csv_setting = { binary => 1, sep_char => $opt::colsep };
1455 my $sep = $csv_setting->{sep_char};
1456 $Global::csv = Text::CSV->new($csv_setting)
1457 or die "Cannot use CSV: ".Text::CSV->error_diag ();
1459 if(defined $opt::header) {
1460 $opt::colsep = defined $opt::colsep ? $opt::colsep : "\t";
1462 if(defined $opt::trim) { $Global::trim = $opt::trim; }
1463 if(defined $opt::arg_sep) { $Global::arg_sep = $opt::arg_sep; }
1464 if(defined $opt::arg_file_sep) {
1465 $Global::arg_file_sep = $opt::arg_file_sep;
1467 if(defined $opt::number_of_sockets) {
1468 print SSHLogin::no_of_sockets(),"\n"; wait_and_exit(0);
1470 if(defined $opt::number_of_cpus) {
1471 print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0);
1473 if(defined $opt::number_of_cores) {
1474 print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0);
1476 if(defined $opt::number_of_threads) {
1477 print SSHLogin::no_of_threads(),"\n"; wait_and_exit(0);
1479 if(defined $opt::max_line_length_allowed) {
1480 print Limits::Command::real_max_length(),"\n"; wait_and_exit(0);
1482 if(defined $opt::version) { version(); wait_and_exit(0); }
1483 if(defined $opt::record_env) { record_env(); wait_and_exit(0); }
1484 if(defined $opt::show_limits) { show_limits(); }
1485 if(@opt::sshlogin) { @Global::sshlogin = @opt::sshlogin; }
1486 if(@opt::sshloginfile) { read_sshloginfiles(@opt::sshloginfile); }
1487 if(@opt::return) { push @Global::ret_files, @opt::return; }
1488 if($opt::transfer) {
1489 push @Global::transfer_files, $opt::i || $opt::I || "{}";
1491 push @Global::transfer_files, @opt::transfer_files;
1492 if(not defined $opt::recstart and
1493 not defined $opt::recend) { $opt::recend = "\n"; }
1494 $Global::blocksize = multiply_binary_prefix($opt::blocksize || "1M");
1495 if($Global::blocksize > 2**31-1 and not $opt::pipepart) {
1496 warning("--blocksize >= 2G causes problems. Using 2G-1.");
1497 $Global::blocksize = 2**31-1;
1499 if($^O eq "cygwin" and
1500 ($opt::pipe or $opt::pipepart or $opt::roundrobin)
1501 and $Global::blocksize > 65535) {
1502 warning("--blocksize >= 64K causes problems on Cygwin.");
1504 $opt::memfree = multiply_binary_prefix($opt::memfree);
1505 check_invalid_option_combinations();
1506 if((defined $opt::fifo or defined $opt::cat)
1507 and not $opt::pipepart) {
1510 if(defined $opt::minversion) {
1511 print $Global::version,"\n";
1512 if($Global::version < $opt::minversion) {
1518 if(not defined $opt::delay) {
1519 # Set --delay to --sshdelay if not set
1520 $opt::delay = $opt::sshdelay;
1522 $opt::delay = multiply_time_units($opt::delay);
1523 if($opt::compress_program) {
1525 $opt::decompress_program ||= $opt::compress_program." -dc";
1528 if(defined $opt::results) {
1529 # Is the output a dir or CSV-file?
1530 if($opt::results =~ /\.csv$/i) {
1531 # CSV with , as separator
1532 $Global::csvsep = ",";
1533 $Global::membuffer ||= 1;
1534 } elsif($opt::results =~ /\.tsv$/i) {
1535 # CSV with TAB as separator
1536 $Global::csvsep = "\t";
1537 $Global::membuffer ||= 1;
1540 if($opt::compress) {
1541 my ($compress, $decompress) = find_compression_program();
1542 $opt::compress_program ||= $compress;
1543 $opt::decompress_program ||= $decompress;
1544 if(($opt::results and not $Global::csvsep) or $opt::files) {
1545 # No need for decompressing
1546 $opt::decompress_program = "cat >/dev/null";
1549 if(defined $opt::dryrun) {
1550 # Force grouping due to bug #51039: --dry-run --timeout 3600 -u breaks
1554 if(defined $opt::nonall) {
1555 # Append a dummy empty argument if there are no arguments
1556 # on the command line to avoid reading from STDIN.
1557 # arg_sep = random 50 char
1558 # \0noarg => nothing (not the empty string)
1559 $Global::arg_sep = join "",
1560 map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..50);
1561 push @ARGV, $Global::arg_sep, "\0noarg";
1563 if(defined $opt::tee) {
1564 if(not defined $opt::jobs) {
1568 if(defined $opt::tty) {
1569 # Defaults for --tty: -j1 -u
1570 # Can be overridden with -jXXX -g
1571 if(not defined $opt::jobs) {
1574 if(not defined $opt::group) {
1579 push @Global::ret_files, @opt::trc;
1580 if(not @Global::transfer_files) {
1581 # Defaults to --transferfile {}
1582 push @Global::transfer_files, $opt::i || $opt::I || "{}";
1586 if(defined $opt::max_lines) {
1587 if($opt::max_lines eq "-0") {
1588 # -l -0 (swallowed -0)
1589 $opt::max_lines = 1;
1592 } elsif ($opt::max_lines == 0) {
1593 # If not given (or if 0 is given) => 1
1594 $opt::max_lines = 1;
1596 $Global::max_lines = $opt::max_lines;
1597 if(not $opt::pipe) {
1598 # --pipe -L means length of record - not max_number_of_args
1599 $Global::max_number_of_args ||= $Global::max_lines;
1603 # Read more than one arg at a time (-L, -N)
1604 if(defined $opt::L) {
1605 $Global::max_lines = $opt::L;
1606 if(not $opt::pipe) {
1607 # --pipe -L means length of record - not max_number_of_args
1608 $Global::max_number_of_args ||= $Global::max_lines;
1611 if(defined $opt::max_replace_args) {
1612 $Global::max_number_of_args = $opt::max_replace_args;
1613 $Global::ContextReplace = 1;
1615 if((defined $opt::L or defined $opt::max_replace_args)
1617 not ($opt::xargs or $opt::m)) {
1618 $Global::ContextReplace = 1;
1620 if(defined $opt::tag and not defined $opt::tagstring) {
1622 $opt::tagstring = $Global::parensleft.$Global::parensright;
1624 if(grep /^$Global::arg_sep\+?$|^$Global::arg_file_sep\+?$/o, @ARGV) {
1625 # Deal with ::: :::+ :::: and ::::+
1626 @ARGV = read_args_from_command_line();
1630 if(defined $opt::eta) { $opt::progress = $opt::eta; }
1631 if(defined $opt::bar) { $opt::progress = $opt::bar; }
1633 # Funding a free software project is hard. GNU Parallel is no
1634 # exception. On top of that it seems the less visible a project
1635 # is, the harder it is to get funding. And the nature of GNU
1636 # Parallel is that it will never be seen by "the guy with the
1637 # checkbook", but only by the people doing the actual work.
1639 # This problem has been covered by others - though no solution has
1641 # https://www.slideshare.net/NadiaEghbal/consider-the-maintainer
1642 # https://www.numfocus.org/blog/why-is-numpy-only-now-getting-funded/
1644 # If you want GNU Parallel to be maintained in the future, and not
1645 # just wither away like so many other free software tools, you
1646 # need to help finance the development.
1648 # The citation notice is a simple way of doing so, as citations
1649 # makes it possible to me to get a job where I can maintain GNU
1650 # Parallel as part of the job.
1652 # This means you can help financing development
1654 # WITHOUT PAYING A SINGLE CENT!
1656 # Before implementing the citation notice it was discussed with
1658 # https://lists.gnu.org/archive/html/parallel/2013-11/msg00006.html
1660 # Having to spend 10 seconds on running 'parallel --citation' once
1661 # is no doubt not an ideal solution, but no one has so far come up
1662 # with an ideal solution - neither for funding GNU Parallel nor
1663 # other free software.
1665 # If you believe you have the perfect solution, you should try it
1666 # out, and if it works, you should post it on the email
1667 # list. Ideas that will cost work and which have not been tested
1668 # are, however, unlikely to be prioritized.
1670 # Please note that GPL version 3 gives you the right to fork GNU
1671 # Parallel under a new name, but it does not give you the right to
1672 # distribute modified copies with the citation notice disabled in
1673 # a way where the software can be confused with GNU Parallel. To
1674 # do that you need to be the owner of the GNU Parallel
1675 # trademark. The xt:Commerce case shows this.
1677 # Description of the xt:Commerce case in OLG Duesseldorf
1678 # http://www.inta.org/INTABulletin/Pages/GERMANYGeneralPublicLicenseDoesNotPermitUseofThird-PartyTrademarksforAdvertisingModifiedVersionsofOpen-SourceSoftware.aspx
1679 # https://web.archive.org/web/20180715073746/http://www.inta.org/INTABulletin/Pages/GERMANYGeneralPublicLicenseDoesNotPermitUseofThird-PartyTrademarksforAdvertisingModifiedVersionsofOpen-SourceSoftware.aspx
1681 # The verdict in German
1682 # https://www.admody.com/urteilsdatenbank/cafe6fdaeed3/OLG-Duesseldorf_Urteil_vom_28-September-2010_Az_I-20-U-41-09
1683 # https://web.archive.org/web/20180715073717/https://www.admody.com/urteilsdatenbank/cafe6fdaeed3/OLG-Duesseldorf_Urteil_vom_28-September-2010_Az_I-20-U-41-09
1685 # Other free software limiting derivates by the same name
1686 # https://en.wikipedia.org/wiki/Red_Hat_Enterprise_Linux_derivatives#Legal_aspects
1687 # https://tm.joomla.org/trademark-faq.html
1688 # https://www.mozilla.org/en-US/foundation/trademarks/faq/
1690 # Running 'parallel --citation' one single time takes less than 10
1691 # seconds, and will silence the citation notice for future
1692 # runs. If that is too much trouble for you, why not use one of
1693 # the alternatives instead?
1694 # See a list in: 'man parallel_alternatives'
1696 # If you are an honest person please read the above before
1697 # changing this line.
1702 if($ENV{'PARALLEL_ENV'}) {
1703 # Read environment and set $Global::parallel_env
1704 # Must be done before is_acceptable_command_line_length()
1705 my $penv = $ENV{'PARALLEL_ENV'};
1706 # unset $PARALLEL_ENV: It should not be given to children
1707 # because it takes up a lot of env space
1708 delete $ENV{'PARALLEL_ENV'};
1710 # This is a file/fifo: Replace envvar with content of file
1711 open(my $parallel_env, "<", $penv) ||
1712 ::die_bug("Cannot read parallel_env from $penv");
1713 local $/; # Put <> in slurp mode
1714 $penv = <$parallel_env>;
1715 close $parallel_env;
1717 # Map \001 to \n to make it easer to quote \n in $PARALLEL_ENV
1718 $penv =~ s/\001/\n/g;
1720 ::warning('\0 (NUL) in environment is not supported');
1722 $Global::parallel_env = $penv;
1727 if(remote_hosts() and ($opt::X or $opt::m or $opt::xargs)) {
1728 # As we do not know the max line length on the remote machine
1729 # long commands generated by xargs may fail
1730 # If $opt::max_replace_args is set, it is probably safe
1731 ::warning("Using -X or -m with --sshlogin may fail.");
1734 if(not defined $opt::jobs) { $opt::jobs = "100%"; }
1737 if($opt::sqlmaster or $opt::sqlworker) {
1738 $Global::sql = SQL->new($opt::sqlmaster || $opt::sqlworker);
1740 if($opt::sqlworker) { $Global::membuffer ||= 1; }
1743 sub check_invalid_option_combinations() {
1744 if(defined $opt::timeout and
1745 $opt::timeout !~ /^\d+(\.\d+)?%?$|^(\d+(\.\d+)?[dhms])+$/i) {
1746 ::error("--timeout must be seconds or percentage.");
1749 if(defined $opt::fifo and defined $opt::cat) {
1750 ::error("--fifo cannot be combined with --cat.");
1751 ::wait_and_exit(255);
1753 if(defined $opt::retries and defined $opt::roundrobin) {
1754 ::error("--retries cannot be combined with --roundrobin.");
1755 ::wait_and_exit(255);
1757 if(defined $opt::pipepart and
1758 (defined $opt::L or defined $opt::max_lines
1759 or defined $opt::max_replace_args)) {
1760 ::error("--pipepart is incompatible with --max-replace-args, ".
1761 "--max-lines, and -L.");
1764 if(defined $opt::group and $opt::ungroup) {
1765 ::error("--group cannot be combined with --ungroup.");
1766 ::wait_and_exit(255);
1768 if(defined $opt::group and $opt::linebuffer) {
1769 ::error("--group cannot be combined with --line-buffer.");
1770 ::wait_and_exit(255);
1772 if(defined $opt::ungroup and $opt::linebuffer) {
1773 ::error("--ungroup cannot be combined with --line-buffer.");
1774 ::wait_and_exit(255);
1776 if(defined $opt::tollef and not $opt::gnu) {
1777 ::error("--tollef has been retired.",
1778 "Remove --tollef or use --gnu to override --tollef.");
1779 ::wait_and_exit(255);
1781 if(defined $opt::retired) {
1782 ::error("-g has been retired. Use --group.",
1783 "-B has been retired. Use --bf.",
1784 "-T has been retired. Use --tty.",
1785 "-U has been retired. Use --er.",
1786 "-W has been retired. Use --wd.",
1787 "-Y has been retired. Use --shebang.",
1788 "-H has been retired. Use --halt.",
1789 "--sql has been retired. Use --sqlmaster.",
1790 "--ctrlc has been retired.",
1791 "--noctrlc has been retired.");
1792 ::wait_and_exit(255);
1795 if(not $opt::pipe) {
1798 if($opt::remove_rec_sep) {
1799 ::error("--remove-rec-sep is not compatible with --groupby");
1800 ::wait_and_exit(255);
1802 if($opt::recstart) {
1803 ::error("--recstart is not compatible with --groupby");
1804 ::wait_and_exit(255);
1806 if($opt::recend ne "\n") {
1807 ::error("--recend is not compatible with --groupby");
1808 ::wait_and_exit(255);
1810 if($opt::pipepart) {
1811 # TODO This may be possible to do later
1812 # Finding split points might be a bitch though
1813 ::error("--pipepart is not compatible with --groupby");
1814 ::wait_and_exit(255);
1819 sub init_globals() {
1821 $Global::version = 20190504;
1822 $Global::progname = 'parallel';
1823 $Global::infinity = 2**31;
1825 $Global::verbose = 0;
1826 $Global::quoting = 0;
1827 $Global::total_completed = 0;
1828 # Read only table with default --rpl values
1832 '{#}' => '1 $_=$job->seq()',
1833 '{%}' => '1 $_=$job->slot()',
1836 ('$Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; '.
1837 '$_ = dirname($_);'),
1838 '{/.}' => 's:.*/::; s:\.[^/.]+$::;',
1839 '{.}' => 's:\.[^/.]+$::',
1844 # = {.}.{+.} = {+/}/{/.}.{+.}
1845 # = {..}.{+..} = {+/}/{/..}.{+..}
1846 # = {...}.{+...} = {+/}/{/...}.{+...}
1847 '{+/}' => 's:/[^/]*$::',
1848 '{+.}' => 's:.*\.::',
1849 '{+..}' => 's:.*\.([^.]*\.):$1:',
1850 '{+...}' => 's:.*\.([^.]*\.[^.]*\.):$1:',
1851 '{..}' => 's:\.[^/.]+$::; s:\.[^/.]+$::',
1852 '{...}' => 's:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::',
1853 '{/..}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::',
1854 '{/...}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::',
1855 '{choose_k}' => 'for $t (2..$#arg){ if($arg[$t-1] ge $arg[$t]) { skip() } }',
1856 # {##} = number of jobs
1857 '{##}' => '$_=total_jobs()',
1859 '{:-([^}]+?)}' => '$_ ||= $$1',
1861 '{:(\d+?)}' => 'substr($_,0,$$1) = ""',
1863 '{:(\d+?):(\d+?)}' => '$_ = substr($_,$$1,$$2);',
1865 '{#([^#}][^}]*?)}' => 's/^$$1//;',
1867 '{%([^}]+?)}' => 's/$$1$//;',
1868 # Bash ${a/def/ghi} ${a/def/}
1869 '{/([^}]+?)/([^}]*?)}' => 's/$$1/$$2/;',
1871 '{^([^}]+?)}' => 's/^($$1)/uc($1)/e;',
1873 '{^^([^}]+?)}' => 's/($$1)/uc($1)/eg;',
1875 '{,([^}]+?)}' => 's/^($$1)/lc($1)/e;',
1877 '{,,([^}]+?)}' => 's/($$1)/lc($1)/eg;',
1879 # Modifiable copy of %Global::replace
1880 %Global::rpl = %Global::replace;
1882 $Global::ignore_empty = 0;
1883 $Global::interactive = 0;
1884 $Global::stderr_verbose = 0;
1885 $Global::default_simultaneous_sshlogins = 9;
1886 $Global::exitstatus = 0;
1887 $Global::arg_sep = ":::";
1888 $Global::arg_file_sep = "::::";
1889 $Global::trim = 'n';
1890 $Global::max_jobs_running = 0;
1891 $Global::job_already_run = '';
1892 $ENV{'TMPDIR'} ||= "/tmp";
1893 $ENV{'OLDPWD'} = $ENV{'PWD'};
1894 if(not $ENV{HOME}) {
1895 # $ENV{HOME} is sometimes not set if called from PHP
1896 ::warning("\$HOME not set. Using /tmp.");
1897 $ENV{HOME} = "/tmp";
1899 # no warnings to allow for undefined $XDG_*
1900 no warnings 'uninitialized';
1901 # $xdg_config_home is needed to make env_parallel.fish stop complaining
1902 my $xdg_config_home = $ENV{'XDG_CONFIG_HOME'};
1903 # config_dirs = $PARALLEL_HOME, $XDG_CONFIG_HOME/parallel,
1904 # $(each XDG_CONFIG_DIRS)/parallel, $HOME/.parallel
1905 # Keep only dirs that exist
1906 @Global::config_dirs =
1908 $ENV{'PARALLEL_HOME'},
1909 (map { "$_/parallel" }
1911 split /:/, $ENV{'XDG_CONFIG_DIRS'}),
1912 $ENV{'HOME'} . "/.parallel");
1913 # Use first dir as config dir
1914 $Global::config_dir = $Global::config_dirs[0] ||
1915 $ENV{'HOME'} . "/.parallel";
1916 # cache_dirs = $PARALLEL_HOME, $XDG_CACHE_HOME/parallel,
1917 # Keep only dirs that exist
1918 @Global::cache_dirs =
1920 $ENV{'PARALLEL_HOME'}, $ENV{'XDG_CACHE_HOME'}."/parallel");
1921 $Global::cache_dir = $Global::cache_dirs[0] ||
1922 $ENV{'HOME'} . "/.parallel";
1926 # $opt::halt flavours
1929 # $Global::halt_when
1930 # $Global::halt_fail
1931 # $Global::halt_success
1933 # $Global::halt_count
1934 if(defined $opt::halt) {
1935 my %halt_expansion = (
1937 "1" => "soon,fail=1",
1938 "2" => "now,fail=1",
1939 "-1" => "soon,success=1",
1940 "-2" => "now,success=1",
1942 # Expand -2,-1,0,1,2 into long form
1943 $opt::halt = $halt_expansion{$opt::halt} || $opt::halt;
1944 # --halt 5% == --halt soon,fail=5%
1945 $opt::halt =~ s/^(\d+)%$/soon,fail=$1%/;
1946 # Split: soon,fail=5%
1947 my ($when,$fail_success,$pct_count) = split /[,=]/, $opt::halt;
1948 if(not grep { $when eq $_ } qw(never soon now)) {
1949 ::error
("--halt must have 'never', 'soon', or 'now'.");
1950 ::wait_and_exit
(255);
1952 $Global::halt_when
= $when;
1953 if($when ne "never") {
1954 if($fail_success eq "fail") {
1955 $Global::halt_fail
= 1;
1956 } elsif($fail_success eq "success") {
1957 $Global::halt_success
= 1;
1958 } elsif($fail_success eq "done") {
1959 $Global::halt_done
= 1;
1961 ::error
("--halt $when must be followed by ,success or ,fail.");
1962 ::wait_and_exit
(255);
1964 if($pct_count =~ /^(\d+)%$/) {
1965 $Global::halt_pct
= $1/100;
1966 } elsif($pct_count =~ /^(\d+)$/) {
1967 $Global::halt_count
= $1;
1969 ::error
("--halt $when,$fail_success ".
1970 "must be followed by ,number or ,percent%.");
1971 ::wait_and_exit
(255);
1977 sub parse_replacement_string_options
() {
1981 # $Global::parensleft
1982 # $Global::parensright
1984 # $Global::parensleft
1985 # $Global::parensright
1991 # $opt::basenamereplace
1992 # $opt::dirnamereplace
1995 # $opt::basenameextensionreplace
1998 # Modify %Global::rpl
1999 # Replace $old with $new
2000 my ($old,$new) = @_;
2002 $Global::rpl
{$new} = $Global::rpl
{$old};
2003 delete $Global::rpl
{$old};
2006 my $parens = "{==}";
2007 if(defined $opt::parens
) { $parens = $opt::parens
; }
2008 my $parenslen = 0.5*length $parens;
2009 $Global::parensleft
= substr($parens,0,$parenslen);
2010 $Global::parensright
= substr($parens,$parenslen);
2011 if(defined $opt::plus
) { %Global::rpl
= (%Global::plus
,%Global::rpl
); }
2012 if(defined $opt::I
) { rpl
('{}',$opt::I
); }
2013 if(defined $opt::i
and $opt::i
) { rpl
('{}',$opt::i
); }
2014 if(defined $opt::U
) { rpl
('{.}',$opt::U
); }
2015 if(defined $opt::basenamereplace
) { rpl
('{/}',$opt::basenamereplace
); }
2016 if(defined $opt::dirnamereplace
) { rpl
('{//}',$opt::dirnamereplace
); }
2017 if(defined $opt::seqreplace
) { rpl
('{#}',$opt::seqreplace
); }
2018 if(defined $opt::slotreplace
) { rpl
('{%}',$opt::slotreplace
); }
2019 if(defined $opt::basenameextensionreplace
) {
2020 rpl
('{/.}',$opt::basenameextensionreplace
);
2023 # Create $Global::rpl entries for --rpl options
2024 # E.g: "{..} s:\.[^.]+$:;s:\.[^.]+$:;"
2025 my ($shorthand,$long) = split/ /,$_,2;
2026 $Global::rpl
{$shorthand} = $long;
2030 sub parse_semaphore
() {
2031 # Semaphore defaults
2032 # Must be done before computing number of processes and max_line_length
2033 # because when running as a semaphore GNU Parallel does not read args
2036 # $Global::semaphore
2037 # $opt::semaphoretimeout
2038 # $Semaphore::timeout
2039 # $opt::semaphorename
2047 # @Global::unget_argv
2048 # $Global::default_simultaneous_sshlogins
2050 # $Global::interactive
2051 $Global::semaphore
||= ($0 =~ m
:(^|/)sem
$:); # called as 'sem'
2052 if(defined $opt::semaphore
) { $Global::semaphore
= 1; }
2053 if(defined $opt::semaphoretimeout
) { $Global::semaphore
= 1; }
2054 if(defined $opt::semaphorename
) { $Global::semaphore
= 1; }
2055 if(defined $opt::fg
and not $opt::tmux
and not $opt::tmuxpane
) {
2056 $Global::semaphore
= 1;
2058 if(defined $opt::bg
) { $Global::semaphore
= 1; }
2059 if(defined $opt::wait and not $opt::sqlmaster
) {
2060 $Global::semaphore
= 1; @ARGV = "true";
2062 if($Global::semaphore
) {
2064 # A semaphore does not take input from neither stdin nor file
2065 ::error
("A semaphore does not take input from neither stdin nor a file\n");
2066 ::wait_and_exit
(255);
2068 @opt::a
= ("/dev/null");
2069 # Append a dummy empty argument
2070 # \0 => nothing (not the empty string)
2071 push(@Global::unget_argv
, [Arg
->new("\0noarg")]);
2072 $Semaphore::timeout
= $opt::semaphoretimeout
|| 0;
2073 if(defined $opt::semaphorename
) {
2074 $Semaphore::name
= $opt::semaphorename
;
2077 $Semaphore::name
= `tty`;
2078 chomp $Semaphore::name
;
2080 $Semaphore::fg
= $opt::fg
;
2081 $Semaphore::wait = $opt::wait;
2082 $Global::default_simultaneous_sshlogins
= 1;
2083 if(not defined $opt::jobs
) {
2086 if($Global::interactive
and $opt::bg
) {
2087 ::error
("Jobs running in the ".
2088 "background cannot be interactive.");
2089 ::wait_and_exit
(255);
2095 # Record current %ENV-keys in $PARALLEL_HOME/ignored_vars
2097 my $ignore_filename = $Global::config_dir
. "/ignored_vars";
2098 if(open(my $vars_fh, ">", $ignore_filename)) {
2099 print $vars_fh map { $_,"\n" } keys %ENV;
2101 ::error
("Cannot write to $ignore_filename.");
2102 ::wait_and_exit
(255);
2107 # Open joblog as specified by --joblog
2110 # $opt::resume_failed
2113 # $Global::job_already_run
2116 if(($opt::resume
or $opt::resume_failed
)
2118 not ($opt::joblog
or $opt::results
)) {
2119 ::error
("--resume and --resume-failed require --joblog or --results.");
2120 ::wait_and_exit
(255);
2122 if(defined $opt::joblog
and $opt::joblog
=~ s/^\+//) {
2123 # --joblog +filename = append to filename
2130 not $opt::sqlworker
)) {
2131 # Do not log if --sqlworker
2132 if($opt::resume
|| $opt::resume_failed
|| $opt::retry_failed
) {
2133 if(open(my $joblog_fh, "<", $opt::joblog
)) {
2135 # Override $/ with \n because -d might be set
2137 # If there is a header: Open as append later
2138 $append = <$joblog_fh>;
2140 if($opt::retry_failed
) {
2141 # Make a regexp that only matches commands with exit+signal=0
2142 # 4 host 1360490623.067 3.445 1023 1222 0 0 command
2143 $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t';
2145 while(<$joblog_fh>) {
2146 if(/$joblog_regexp/o) {
2147 # This is 30% faster than set_job_already_run($1);
2148 vec($Global::job_already_run
,($1||0),1) = 1;
2149 $Global::total_completed
++;
2150 $group[$1-1] = "true";
2151 } elsif(/(\d+)\s+\S+(\s+[-0-9.]+){6}\s+(.*)$/) {
2152 # Grab out the command
2156 ::error
("Format of '$opt::joblog' is wrong: $_");
2157 ::wait_and_exit
(255);
2161 my ($outfh,$name) = ::tmpfile
(SUFFIX
=> ".arg");
2163 # Put args into argfile
2164 if(grep /\0/, @group) {
2165 # force --null to deal with \n in commandlines
2166 ::warning
("Command lines contain newline. Forcing --null.");
2170 # Replace \0 with '\n' as used in print_joblog()
2171 print $outfh map { s/\0/\n/g; $_,$/ } map { $_ } @group;
2173 exit_if_disk_full
();
2174 # Set filehandle to -a
2177 # Remove $command (so -a is run)
2180 if($opt::resume
|| $opt::resume_failed
) {
2181 if($opt::resume_failed
) {
2182 # Make a regexp that only matches commands with exit+signal=0
2183 # 4 host 1360490623.067 3.445 1023 1222 0 0 command
2184 $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t';
2186 # Just match the job number
2187 $joblog_regexp='^(\d+)';
2189 while(<$joblog_fh>) {
2190 if(/$joblog_regexp/o) {
2191 # This is 30% faster than set_job_already_run($1);
2192 vec($Global::job_already_run
,($1||0),1) = 1;
2193 $Global::total_completed
++;
2194 } elsif(not /\d+\s+[^\s]+\s+([-0-9.]+\s+){6}/) {
2195 ::error
("Format of '$opt::joblog' is wrong: $_");
2196 ::wait_and_exit
(255);
2202 # $opt::null may be set if the commands contain \n
2203 if($opt::null
) { $/ = "\0"; }
2206 # Do not write to joblog in a dry-run
2207 if(not open($Global::joblog
, ">", "/dev/null")) {
2208 ::error
("Cannot write to --joblog $opt::joblog.");
2209 ::wait_and_exit
(255);
2213 if(not open($Global::joblog
, ">>", $opt::joblog
)) {
2214 ::error
("Cannot append to --joblog $opt::joblog.");
2215 ::wait_and_exit
(255);
2218 if($opt::joblog
eq "-") {
2219 # Use STDOUT as joblog
2220 $Global::joblog
= $Global::fd
{1};
2221 } elsif(not open($Global::joblog
, ">", $opt::joblog
)) {
2222 # Overwrite the joblog
2223 ::error
("Cannot write to --joblog $opt::joblog.");
2224 ::wait_and_exit
(255);
2226 print $Global::joblog
2227 join("\t", "Seq", "Host", "Starttime", "JobRuntime",
2228 "Send", "Receive", "Exitval", "Signal", "Command"
2237 if($opt::results
eq "-.csv"
2239 $opt::results
eq "-.tsv") {
2240 # Output as CSV/TSV on stdout
2241 open $Global::csv_fh
, ">&", "STDOUT" or
2242 ::die_bug
("Can't dup STDOUT in csv: $!");
2243 # Do not print any other output to STDOUT
2244 # by forcing all other output to /dev/null
2245 open my $fd, ">", "/dev/null" or
2246 ::die_bug
("Can't >/dev/null in csv: $!");
2247 $Global::fd
{1} = $fd;
2248 $Global::fd
{2} = $fd;
2249 } elsif($Global::csvsep
) {
2250 if(not open($Global::csv_fh
,">",$opt::results
)) {
2251 ::error
("Cannot open results file `$opt::results': ".
2259 sub find_compression_program
() {
2260 # Find a fast compression program
2262 # $compress_program = compress program with options
2263 # $decompress_program = decompress program with options
2265 # Search for these. Sorted by speed on 128 core
2267 # seq 120000000|shuf > 1gb &
2269 # apt install make g++ htop
2270 # wget -O - pi.dk/3 | bash
2271 # apt install zstd clzip liblz4-tool lzop pigz pxz gzip plzip pbzip2 lzma xz-utils lzip bzip2 lbzip2 lrzip pixz
2272 # git clone https://github.com/facebook/zstd.git
2273 # (cd zstd/contrib/pzstd; make -j; cp pzstd /usr/local/bin)
2274 # echo 'lrzip -L $((-$1))' >/usr/local/bin/lrz
2275 # chmod +x /usr/local/bin/lrz
2277 # onethread="zstd clzip lz4 lzop gzip lzma xz bzip2"
2278 # multithread="pzstd pigz pxz plzip pbzip2 lzip lbzip2 lrz pixz"
2279 # parallel --shuf -j1 --joblog jl-m --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $multithread
2280 # parallel --shuf -j50% --delay 1 --joblog jl-s --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $onethread
2284 # 2-cores: pzstd zstd lz4 lzop pigz gzip lbzip2 pbzip2 lrz bzip2 lzma pxz plzip xz lzip clzip
2286 # 8-cores: pzstd lz4 zstd pigz lzop lbzip2 pbzip2 gzip lzip lrz plzip pxz bzip2 lzma xz clzip
2287 # 16-cores: pzstd lz4 pigz lzop lbzip2 pbzip2 plzip lzip lrz pxz gzip lzma xz bzip2
2288 # 32-cores: pzstd lbzip2 pbzip2 zstd pigz lz4 lzop plzip lzip lrz gzip pxz lzma bzip2 xz clzip
2289 # 64-cores: pzstd lbzip2 pbzip2 pigz zstd pixz lz4 plzip lzop lzip lrz gzip pxz lzma bzip2 xz clzip
2290 # 128-core: pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip lrz pxz bzip2 lzma xz clzip
2292 my @prg = qw(pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip
2293 lrz pxz bzip2 lzma xz clzip);
2296 return ("$p -c -1","$p -dc");
2300 return ("cat","cat");
2303 sub read_options
() {
2304 # Read options from command line, profile and $PARALLEL
2306 # $opt::shebang_wrap
2314 # @ARGV_no_opt = @ARGV without --options
2316 # This must be done first as this may exec myself
2317 if(defined $ARGV[0] and ($ARGV[0] =~ /^--shebang/ or
2318 $ARGV[0] =~ /^--shebang-?wrap/ or
2319 $ARGV[0] =~ /^--hashbang/)) {
2320 # Program is called from #! line in script
2321 # remove --shebang-wrap if it is set
2322 $opt::shebang_wrap
= ($ARGV[0] =~ s/^--shebang-?wrap *//);
2323 # remove --shebang if it is set
2324 $opt::shebang
= ($ARGV[0] =~ s/^--shebang *//);
2325 # remove --hashbang if it is set
2326 $opt::shebang
.= ($ARGV[0] =~ s/^--hashbang *//);
2328 my $argfile = Q
(pop @ARGV);
2329 # exec myself to split $ARGV[0] into separate fields
2330 exec "$0 --skip-first-line -a $argfile @ARGV";
2332 if($opt::shebang_wrap
) {
2335 if ($^O
eq 'freebsd') {
2336 # FreeBSD's #! puts different values in @ARGV than Linux' does.
2337 my @nooptions = @ARGV;
2338 get_options_from_array
(\
@nooptions);
2339 while($#ARGV > $#nooptions) {
2340 push @options, shift @ARGV;
2342 while(@ARGV and $ARGV[0] ne ":::") {
2343 push @parser, shift @ARGV;
2345 if(@ARGV and $ARGV[0] eq ":::") {
2349 @options = shift @ARGV;
2351 my $script = Q
(shift @ARGV);
2352 # exec myself to split $ARGV[0] into separate fields
2353 exec "$0 --internal-pipe-means-argfiles @options @parser $script ".
2357 if($ARGV[0] =~ / --shebang(-?wrap)? /) {
2358 ::warning
("--shebang and --shebang-wrap must be the first argument.\n");
2361 Getopt
::Long
::Configure
("bundling","require_order");
2362 my @ARGV_copy = @ARGV;
2363 my @ARGV_orig = @ARGV;
2364 # Check if there is a --profile to set @opt::profile
2365 get_options_from_array
(\
@ARGV_copy,"profile|J=s","plain") || die_usage
();
2366 my @ARGV_profile = ();
2368 if(not $opt::plain
) {
2369 # Add options from $PARALLEL_HOME/config and other profiles
2370 my @config_profiles = (
2371 "/etc/parallel/config",
2372 (map { "$_/config" } @Global::config_dirs
),
2373 $ENV{'HOME'}."/.parallelrc");
2374 my @profiles = @config_profiles;
2376 # --profile overrides default profiles
2378 for my $profile (@opt::profile
) {
2379 # Look for the $profile in . and @Global::config_dirs
2380 push @profiles, grep { -r
$_ }
2381 map { "$_/$profile" } ".", @Global::config_dirs
;
2384 for my $profile (@profiles) {
2387 open (my $in_fh, "<", $profile) ||
2388 ::die_bug
("read-profile: $profile");
2392 push @ARGV_profile, shell_words
($_);
2396 if(grep /^$profile$/, @config_profiles) {
2397 # config file is not required to exist
2399 ::error
("$profile not readable.");
2404 # Add options from shell variable $PARALLEL
2405 if($ENV{'PARALLEL'}) {
2406 push @ARGV_env, shell_words
($ENV{'PARALLEL'});
2408 # Add options from env_parallel.csh via $PARALLEL_CSH
2409 if($ENV{'PARALLEL_CSH'}) {
2410 push @ARGV_env, shell_words
($ENV{'PARALLEL_CSH'});
2413 Getopt
::Long
::Configure
("bundling","require_order");
2414 get_options_from_array
(\
@ARGV_profile) || die_usage
();
2415 get_options_from_array
(\
@ARGV_env) || die_usage
();
2416 get_options_from_array
(\
@ARGV) || die_usage
();
2417 # What were the options given on the command line?
2418 # Used to start --sqlworker
2419 my $ai = arrayindex
(\
@ARGV_orig, \
@ARGV);
2420 @Global::options_in_argv
= @ARGV_orig[0..$ai-1];
2421 # Prepend non-options to @ARGV (such as commands like 'nice')
2422 unshift @ARGV, @ARGV_profile, @ARGV_env;
2427 # Similar to Perl's index function, but for arrays
2429 # $arr_ref1 = ref to @array1 to search in
2430 # $arr_ref2 = ref to @array2 to search for
2432 # $pos = position of @array1 in @array2, -1 if not found
2433 my ($arr_ref1,$arr_ref2) = @_;
2434 my $array1_as_string = join "", map { "\0".$_ } @
$arr_ref1;
2435 my $array2_as_string = join "", map { "\0".$_ } @
$arr_ref2;
2436 my $i = index($array1_as_string,$array2_as_string,0);
2437 if($i == -1) { return -1 }
2438 my @before = split /\0/, substr($array1_as_string,0,$i);
2442 sub read_args_from_command_line
() {
2443 # Arguments given on the command line after:
2444 # ::: ($Global::arg_sep)
2445 # :::: ($Global::arg_file_sep)
2446 # :::+ ($Global::arg_sep with --link)
2447 # ::::+ ($Global::arg_file_sep with --link)
2448 # Removes the arguments from @ARGV and:
2449 # - puts filenames into -a
2450 # - puts arguments into files and add the files to -a
2451 # - adds --linkinputsource with 0/1 for each -a depending on :::+/::::+
2453 # @::ARGV = command option ::: arg arg arg :::: argfiles
2456 # $Global::arg_file_sep
2457 # $opt::internal_pipe_means_argfiles
2461 # @argv_no_argsep = @::ARGV without ::: and :::: and following args
2463 for(my $arg = shift @ARGV; @ARGV; $arg = shift @ARGV) {
2464 if($arg eq $Global::arg_sep
2466 $arg eq $Global::arg_sep
."+"
2468 $arg eq $Global::arg_file_sep
2470 $arg eq $Global::arg_file_sep
."+") {
2471 my $group_sep = $arg; # This group of arguments is args or argfiles
2473 while(defined ($arg = shift @ARGV)) {
2474 if($arg eq $Global::arg_sep
2476 $arg eq $Global::arg_sep
."+"
2478 $arg eq $Global::arg_file_sep
2480 $arg eq $Global::arg_file_sep
."+") {
2481 # exit while loop if finding new separator
2484 # If not hitting ::: :::+ :::: or ::::+
2485 # Append it to the group
2489 my $is_linked = ($group_sep =~ /\+$/) ?
1 : 0;
2490 my $is_file = ($group_sep eq $Global::arg_file_sep
2492 $group_sep eq $Global::arg_file_sep
."+");
2495 push @opt::linkinputsource
, map { $is_linked } @group;
2498 push @opt::linkinputsource
, $is_linked;
2501 or ($opt::internal_pipe_means_argfiles
and $opt::pipe)
2503 # Group of file names on the command line.
2504 # Append args into -a
2505 push @opt::a
, @group;
2507 # Group of arguments on the command line.
2508 # Put them into a file.
2510 my ($outfh,$name) = ::tmpfile
(SUFFIX
=> ".arg");
2512 # Put args into argfile
2513 print $outfh map { $_,$/ } @group;
2515 exit_if_disk_full
();
2516 # Append filehandle to -a
2517 push @opt::a
, $outfh;
2520 # $arg is ::: :::+ :::: or ::::+
2521 # so there is another group
2524 # $arg is undef -> @ARGV empty
2528 push @new_argv, $arg;
2530 # Output: @ARGV = command to run with options
2536 unlink keys %Global::unlink;
2537 map { rmdir $_ } keys %Global::unlink;
2538 if(@opt::basefile
and $opt::cleanup
) { cleanup_basefile
(); }
2539 for(keys %Global::sshmaster
) {
2540 # If 'ssh -M's are running: kill them
2546 sub __QUOTING_ARGUMENTS_FOR_SHELL__
() {}
2548 sub shell_quote
(@
) {
2550 # @strings = strings to be quoted
2552 # @shell_quoted_strings = string quoted as needed by the shell
2553 return wantarray ?
(map { Q
($_) } @_) : (join" ",map { Q
($_) } @_);
2556 sub shell_quote_scalar_rc
($) {
2557 # Quote for the rc-shell
2562 ($a =~ s/[\n\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]+/'$&'/go)) {
2563 # A string was replaced
2564 # No need to test for "" or \0
2567 } elsif($a eq "\0") {
2574 sub shell_quote_scalar_csh
($) {
2578 # $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g;
2579 # This is 1% faster than the above
2580 if(($a =~ s/[\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]/\\$&/go)
2582 # quote newline in csh as \\\n
2583 ($a =~ s/[\n]/"\\\n"/go)) {
2584 # A string was replaced
2585 # No need to test for "" or \0
2588 } elsif($a eq "\0") {
2595 sub shell_quote_scalar_default
($) {
2596 # Quote for other shells (Bourne compatibles)
2598 # $string = string to be quoted
2600 # $shell_quoted = string quoted as needed by the shell
2602 if($par =~ /[^-_.+a-z0-9\/]/i
) {
2603 $par =~ s/'/'"'"'/g; # "-quote single quotes
2604 $par = "'$par'"; # '-quote entire string
2605 $par =~ s/^''|''$//g; # Remove unneeded '' at ends
2607 } elsif ($par eq "") {
2615 sub shell_quote_scalar
($) {
2616 # Quote the string so the shell will not expand any special chars
2618 # $string = string to be quoted
2620 # $shell_quoted = string quoted as needed by the shell
2622 # Speed optimization: Choose the correct shell_quote_scalar_*
2623 # and call that directly from now on
2624 no warnings
'redefine';
2625 if($Global::cshell
) {
2627 *shell_quote_scalar
= \
&shell_quote_scalar_csh
;
2628 } elsif($Global::shell
=~ m
:(^|/)rc
$:) {
2630 *shell_quote_scalar
= \
&shell_quote_scalar_rc
;
2633 *shell_quote_scalar
= \
&shell_quote_scalar_default
;
2635 # The sub is now redefined. Call it
2636 return shell_quote_scalar
($_[0]);
2640 # Q alias for ::shell_quote_scalar
2641 my $ret = shell_quote_scalar
($_[0]);
2642 no warnings
'redefine';
2643 *Q
= \
&::shell_quote_scalar
;
2647 sub shell_quote_file
($) {
2648 # Quote the string so shell will not expand any special chars
2649 # and prepend ./ if needed
2651 # $filename = filename to be shell quoted
2653 # $quoted_filename = filename quoted with \ and ./ if needed
2656 if($a =~ m
:^/: or $a =~ m:^\./:) {
2657 # /abs/path or ./rel/path => skip
2659 # rel/path => ./rel/path
2666 sub shell_words
(@
) {
2668 # $string = shell line
2670 # @shell_words = $string split into words as shell would do
2671 $Global::use{"Text::ParseWords"} ||= eval "use Text::ParseWords; 1;";
2672 return Text
::ParseWords
::shellwords
(@_);
2675 sub perl_quote_scalar
($) {
2676 # Quote the string so perl's eval will not expand any special chars
2678 # $string = string to be quoted
2680 # $perl_quoted = string quoted with \ as needed by perl's eval
2683 $a =~ s/[\\\"\$\@]/\\$&/go;
2688 # -w complains about prototype
2690 # pQ alias for ::perl_quote_scalar
2691 my $ret = perl_quote_scalar
($_[0]);
2692 *pQ
= \
&::perl_quote_scalar
;
2696 sub unquote_printf
() {
2697 # Convert \t \n \r \000 \0
2699 # $string = string with \t \n \r \num \0
2701 # $replaced = string with TAB NEWLINE CR <ascii-num> NUL
2706 s/\\(\d\d\d)/eval 'sprintf "\\'.$1.'"'/ge;
2707 s/\\(\d)/eval 'sprintf "\\'.$1.'"'/ge;
2712 sub __FILEHANDLES__
() {}
2715 sub save_stdin_stdout_stderr
() {
2716 # Remember the original STDIN, STDOUT and STDERR
2717 # and file descriptors opened by the shell (e.g. 3>/tmp/foo)
2720 # $Global::original_stderr
2721 # $Global::original_stdin
2724 # TODO Disabled until we have an open3 that will take n filehandles
2725 # for my $fdno (1..61) {
2726 # # /dev/fd/62 and above are used by bash for <(cmd)
2727 # # Find file descriptors that are already opened (by the shell)
2728 # Only focus on stdout+stderr for now
2729 for my $fdno (1..2) {
2731 # 2-argument-open is used to be compatible with old perl 5.8.0
2732 # bug #43570: Perl 5.8.0 creates 61 files
2733 if(open($fh,">&=$fdno")) {
2734 $Global::fd
{$fdno}=$fh;
2737 open $Global::original_stderr
, ">&", "STDERR" or
2738 ::die_bug
("Can't dup STDERR: $!");
2739 open $Global::status_fd
, ">&", "STDERR" or
2740 ::die_bug
("Can't dup STDERR: $!");
2741 open $Global::original_stdin
, "<&", "STDIN" or
2742 ::die_bug
("Can't dup STDIN: $!");
2745 sub enough_file_handles
() {
2746 # Check that we have enough filehandles available for starting
2752 # 1 if ungrouped (thus not needing extra filehandles)
2753 # 0 if too few filehandles
2754 # 1 if enough filehandles
2755 if(not $opt::ungroup
) {
2757 my $enough_filehandles = 1;
2758 # perl uses 7 filehandles for something?
2759 # open3 uses 2 extra filehandles temporarily
2760 # We need a filehandle for each redirected file descriptor
2761 # (normally just STDOUT and STDERR)
2762 for my $i (1..(7+2+keys %Global::fd
)) {
2763 $enough_filehandles &&= open($fh{$i}, "<", "/dev/null");
2765 for (values %fh) { close $_; }
2766 return $enough_filehandles;
2768 # Ungrouped does not need extra file handles
2773 sub open_or_exit
($) {
2774 # Open a file name or exit if the file cannot be opened
2776 # $file = filehandle or filename to open
2778 # $Global::original_stdin
2780 # $fh = file handle to read-opened file
2783 return ($Global::original_stdin
|| *STDIN
);
2785 if(ref $file eq "GLOB") {
2786 # This is an open filehandle
2790 if(not open($fh, "<", $file)) {
2791 ::error
("Cannot open input file `$file': No such file or directory.");
2797 sub set_fh_blocking
($) {
2798 # Set filehandle as blocking
2800 # $fh = filehandle to be blocking
2804 $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
2806 # Get the current flags on the filehandle
2807 fcntl($fh, &F_GETFL, $flags) || die $!;
2808 # Remove non-blocking from the flags
2809 $flags &= ~&O_NONBLOCK;
2810 # Set the flags on the filehandle
2811 fcntl($fh, &F_SETFL, $flags) || die $!;
2814 sub set_fh_non_blocking($) {
2815 # Set filehandle as non-blocking
2817 # $fh = filehandle to be blocking
2821 $Global::use{"Fcntl
"} ||= eval "use Fcntl
qw(:DEFAULT :flock); 1;";
2823 # Get the current flags on the filehandle
2824 fcntl($fh, &F_GETFL, $flags) || die $!;
2825 # Add non-blocking to the flags
2826 $flags |= &O_NONBLOCK;
2827 # Set the flags on the filehandle
2828 fcntl($fh, &F_SETFL, $flags) || die $!;
2832 sub __RUNNING_THE_JOBS_AND_PRINTING_PROGRESS__() {}
2835 # Variable structure:
2837 # $Global::running{$pid} = Pointer to Job-object
2838 # @Global::virgin_jobs = Pointer to Job-object that have received no input
2839 # $Global::host{$sshlogin} = Pointer to SSHLogin-object
2840 # $Global::total_running = total number of running jobs
2841 # $Global::total_started = total jobs started
2842 # $Global::max_procs_file = filename if --jobs is given a filename
2843 # $Global::JobQueue = JobQueue object for the queue of jobs
2844 # $Global::timeoutq = queue of times where jobs timeout
2845 # $Global::newest_job = Job object of the most recent job started
2846 # $Global::newest_starttime = timestamp of $Global::newest_job
2848 # $Global::minimal_command_line_length = minimum length supported by all sshlogins
2849 # $Global::start_no_new_jobs = should more jobs be started?
2850 # $Global::original_stderr = file handle for STDERR when the program started
2851 # $Global::total_started = total number of jobs started
2852 # $Global::joblog = filehandle of joblog
2853 # $Global::debug = Is debugging on?
2854 # $Global::exitstatus = status code of GNU Parallel
2855 # $Global::quoting = quote the command to run
2857 sub init_run_jobs() {
2858 # Set Global variables and progress signal handlers
2859 # Do the copying of basefiles
2861 $Global::total_running = 0;
2862 $Global::total_started = 0;
2863 $SIG{USR1} = \&list_running_jobs;
2864 $SIG{USR2} = \&toggle_progress;
2865 if(@opt::basefile) { setup_basefile(); }
2871 my $max_procs_file_last_mod;
2873 sub changed_procs_file {
2874 # If --jobs is a file and it is modfied:
2875 # Force recomputing of max_jobs_running for each $sshlogin
2877 # $Global::max_procs_file
2880 if($Global::max_procs_file) {
2882 my $mtime = (stat($Global::max_procs_file))[9];
2883 $max_procs_file_last_mod ||= 0;
2884 if($mtime > $max_procs_file_last_mod) {
2885 # file changed: Force re-computing max_jobs_running
2886 $max_procs_file_last_mod = $mtime;
2887 for my $sshlogin (values %Global::host) {
2888 $sshlogin->set_max_jobs_running(undef);
2894 sub changed_sshloginfile {
2895 # If --slf is changed:
2900 # @opt::sshloginfile
2903 # $opt::filter_hosts
2905 if(@opt::sshloginfile) {
2906 # Is --sshloginfile changed?
2907 for my $slf (@opt::sshloginfile) {
2908 my $actual_file = expand_slf_shorthand($slf);
2909 my $mtime = (stat($actual_file))[9];
2910 $last_mtime{$actual_file} ||= $mtime;
2911 if($mtime - $last_mtime{$actual_file} > 1) {
2912 ::debug("run
","--sshloginfile
$actual_file changed
. reload
\n");
2913 $last_mtime{$actual_file} = $mtime;
2916 @Global::sshlogin = ();
2917 for (values %Global::host) {
2918 # Don't start new jobs on any host
2919 # except the ones added back later
2920 $_->set_max_jobs_running(0);
2922 # This will set max_jobs_running on the SSHlogins
2923 read_sshloginfile($actual_file);
2925 $opt::filter_hosts and filter_hosts();
2932 sub start_more_jobs {
2933 # Run start_another_job() but only if:
2934 # * not $Global::start_no_new_jobs set
2935 # * not JobQueue is empty
2936 # * not load on server is too high
2937 # * not server swapping
2938 # * not too short time since last remote login
2941 # $Global::start_no_new_jobs
2947 # $Global::newest_starttime
2949 # $jobs_started = number of jobs started
2950 my $jobs_started = 0;
2951 if($Global::start_no_new_jobs) {
2952 return $jobs_started;
2954 if(time - ($last_time||0) > 1) {
2955 # At most do this every second
2957 changed_procs_file();
2958 changed_sshloginfile();
2960 # This will start 1 job on each --sshlogin (if possible)
2961 # thus distribute the jobs on the --sshlogins round robin
2962 for my $sshlogin (values %Global::host) {
2963 if($Global::JobQueue->empty() and not $opt::pipe) {
2964 # No more jobs in the queue
2967 debug("run
", "Running jobs before on
", $sshlogin->string(), ": ",
2968 $sshlogin->jobs_running(), "\n");
2969 if ($sshlogin->jobs_running() < $sshlogin->max_jobs_running()) {
2972 $opt::delay > ::now() - $Global::newest_starttime) {
2973 # It has been too short since last start
2976 if($opt::load and $sshlogin->loadavg_too_high()) {
2977 # The load is too high or unknown
2980 if($opt::noswap and $sshlogin->swapping()) {
2981 # The server is swapping
2984 if($opt::limit and $sshlogin->limit()) {
2988 if($opt::memfree and $sshlogin->memfree() < $opt::memfree) {
2989 # The server has not enough mem free
2990 ::debug("mem
", "Not starting job
: not enough mem
\n");
2993 if($sshlogin->too_fast_remote_login()) {
2994 # It has been too short since
2997 debug("run
", $sshlogin->string(),
2998 " has
", $sshlogin->jobs_running(),
2999 " out of
", $sshlogin->max_jobs_running(),
3000 " jobs running
. Start another
.\n");
3001 if(start_another_job($sshlogin) == 0) {
3002 # No more jobs to start on this $sshlogin
3003 debug("run
","No jobs started on
",
3004 $sshlogin->string(), "\n");
3007 $sshlogin->inc_jobs_running();
3008 $sshlogin->set_last_login_at(::now());
3011 debug("run
","Running jobs after on
", $sshlogin->string(), ": ",
3012 $sshlogin->jobs_running(), " of
",
3013 $sshlogin->max_jobs_running(), "\n");
3016 return $jobs_started;
3021 my $no_more_file_handles_warned;
3023 sub start_another_job() {
3024 # If there are enough filehandles
3025 # and JobQueue not empty
3026 # and not $job is in joblog
3027 # Then grab a job from Global::JobQueue,
3028 # start it at sshlogin
3029 # mark it as virgin_job
3031 # $sshlogin = the SSHLogin to start the job on
3037 # @Global::virgin_jobs
3039 # 1 if another jobs was started
3041 my $sshlogin = shift;
3042 # Do we have enough file handles to start another job?
3043 if(enough_file_handles()) {
3044 if($Global::JobQueue->empty() and not $opt::pipe) {
3045 # No more commands to run
3046 debug("start
", "Not starting
: JobQueue empty
\n");
3050 # Skip jobs already in job log
3051 # Skip jobs already in results
3053 $job = get_job_with_sshlogin($sshlogin);
3054 if(not defined $job) {
3055 # No command available for that sshlogin
3056 debug("start
", "Not starting
: no jobs available
for ",
3057 $sshlogin->string(), "\n");
3060 if($job->is_already_in_joblog()) {
3063 } while ($job->is_already_in_joblog()
3065 ($opt::results and $opt::resume and $job->is_already_in_results()));
3066 debug("start
", "Command to run on
'", $job->sshlogin()->string(), "': '",
3067 $job->replaced(),"'\n");
3070 if($job->virgin()) {
3071 push(@Global::virgin_jobs,$job);
3073 # Block already set: This is a retry
3075 ::debug("pipe","\n\nWriting
",length ${$job->block_ref()},
3076 " to
", $job->seq(),"\n");
3077 close $job->fh(0,"w
");
3079 $job->write($job->block_ref());
3080 close $job->fh(0,"w
");
3085 debug("start
", "Started as seq
", $job->seq(),
3086 " pid
:", $job->pid(), "\n");
3089 # Not enough processes to run the job.
3090 # Put it back on the queue.
3091 $Global::JobQueue->unget($job);
3092 # Count down the number of jobs to run for this SSHLogin.
3093 my $max = $sshlogin->max_jobs_running();
3094 if($max > 1) { $max--; } else {
3096 for my $record (@{$job->{'commandline'}->{'arg_list'}}) {
3097 push @arg, map { $_->orig() } @$record;
3099 ::error("No more processes
: cannot run a single job
. Something is wrong at
@arg.");
3100 ::wait_and_exit(255);
3102 $sshlogin->set_max_jobs_running($max);
3103 # Sleep up to 300 ms to give other processes time to die
3104 ::usleep(rand()*300);
3105 ::warning("No more processes
: ".
3106 "Decreasing number of running jobs to
$max.",
3107 "Raising ulimit
-u
or /etc/security
/limits
.conf may help
.");
3112 # No more file handles
3113 $no_more_file_handles_warned++ or
3114 ::warning("No more file handles
. ",
3115 "Raising ulimit
-n
or /etc/security
/limits
.conf may help
.");
3116 debug("start
", "No more file handles
. ");
3122 sub init_progress() {
3126 # list of computers for progress output
3131 my %progress = progress();
3132 return ("\nComputers
/ CPU cores / Max jobs to run
\n",
3133 $progress{'workerlist'});
3136 sub drain_job_queue(@) {
3139 # $Global::total_running
3140 # $Global::max_jobs_running
3144 # $Global::start_no_new_jobs
3147 if($opt::progress) {
3148 ::status_no_nl(init_progress());
3150 my $last_header = "";
3153 while($Global::total_running > 0) {
3154 debug($Global::total_running, "==", scalar
3155 keys %Global::running," slots
: ", $Global::max_jobs_running);
3157 # When using --pipe sometimes file handles are not closed properly
3158 for my $job (values %Global::running) {
3159 close $job->fh(0,"w
");
3162 if($opt::progress) {
3163 my %progress = progress();
3164 if($last_header ne $progress{'header'}) {
3165 ::status("", $progress{'header'});
3166 $last_header = $progress{'header'};
3168 ::status_no_nl("\r",$progress{'status'});
3170 if($Global::total_running < $Global::max_jobs_running
3171 and not $Global::JobQueue->empty()) {
3172 # These jobs may not be started because of loadavg
3173 # or too little time between each ssh login.
3174 if(start_more_jobs() > 0) {
3175 # Exponential back-on if jobs were started
3176 $sleep = $sleep/2+0.001;
3179 # Exponential back-off sleeping
3180 $sleep = ::reap_usleep($sleep);
3182 if(not $Global::JobQueue->empty()) {
3183 # These jobs may not be started:
3184 # * because there the --filter-hosts has removed all
3185 if(not %Global::host) {
3186 ::error("There are
no hosts left to run on
.");
3187 ::wait_and_exit(255);
3189 # * because of loadavg
3190 # * because of too little time between each ssh login.
3191 $sleep = ::reap_usleep($sleep);
3193 if($Global::max_jobs_running == 0) {
3194 ::warning("There are
no job slots available
. Increase
--jobs
.");
3197 while($opt::sqlmaster and not $Global::sql->finished()) {
3199 $sleep = ::reap_usleep($sleep);
3201 if($Global::start_sqlworker) {
3202 # Start an SQL worker as we are now sure there is work to do
3203 $Global::start_sqlworker = 0;
3204 if(my $pid = fork()) {
3205 $Global::unkilled_sqlworker = $pid;
3207 # Replace --sql/--sqlandworker with --sqlworker
3208 my @ARGV = map { s/^--sql(andworker)?$/--sqlworker/; $_ } @Global::options_in_argv;
3209 # exec the --sqlworker
3210 exec($0,@ARGV,@command);
3214 } while ($Global::total_running > 0
3216 not $Global::start_no_new_jobs and not $Global::JobQueue->empty()
3218 $opt::sqlmaster and not $Global::sql->finished());
3219 if($opt::progress) {
3220 my %progress = progress();
3221 ::status("\r".$progress{'status'});
3225 sub toggle_progress() {
3226 # Turn on/off progress view
3230 $opt::progress = not $opt::progress;
3231 if($opt::progress) {
3232 ::status_no_nl(init_progress());
3241 # $Global::total_started
3243 # $workerlist = list of workers
3244 # $header = that will fit on the screen
3245 # $status = message that will fit on the screen
3247 return ("workerlist
" => "", "header
" => "", "status
" => bar());
3250 my ($status,$header)=("","");
3252 my($total, $completed, $left, $pctcomplete, $avgtime, $this_eta) =
3254 $eta = sprintf("ETA
: %ds Left
: %d AVG
: %.2fs
",
3255 $this_eta, $left, $avgtime);
3257 my $termcols = terminal_columns();
3258 my @workers = sort keys %Global::host;
3259 my %sshlogin = map { $_ eq ":" ? ($_ => "local") : ($_ => $_) } @workers;
3261 my %workerno = map { ($_=>$workerno++) } @workers;
3262 my $workerlist = "";
3263 for my $w (@workers) {
3265 $workerno{$w}.":".$sshlogin{$w} ." / ".
3266 ($Global::host{$w}->ncpus() || "-")." / ".
3267 $Global::host{$w}->max_jobs_running()."\n";
3269 $status = "x
"x($termcols+1);
3270 # Select an output format that will fit on a single line
3271 if(length $status > $termcols) {
3272 # sshlogin1:XX/XX/XX%/XX.Xs sshlogin2:XX/XX/XX%/XX.Xs sshlogin3:XX/XX/XX%/XX.Xs
3273 $header = "Computer
:jobs running
/jobs completed/%of started jobs
/Average seconds to complete
";
3277 if($Global::total_started) {
3278 my $completed = ($Global::host{$_}->jobs_completed()||0);
3279 my $running = $Global::host{$_}->jobs_running();
3280 my $time = $completed ? (time-$^T)/($completed) : "0";
3281 sprintf("%s:%d/%d/%d%%/%.1fs
",
3282 $sshlogin{$_}, $running, $completed,
3283 ($running+$completed)*100
3284 / $Global::total_started, $time);
3288 if(length $status > $termcols) {
3289 # 1:XX/XX/XX%/XX.Xs 2:XX/XX/XX%/XX.Xs 3:XX/XX/XX%/XX.Xs 4:XX/XX/XX%/XX.Xs
3290 $header = "Computer
:jobs running
/jobs completed/%of started jobs
";
3294 if($Global::total_started) {
3295 my $completed = ($Global::host{$_}->jobs_completed()||0);
3296 my $running = $Global::host{$_}->jobs_running();
3297 my $time = $completed ? (time-$^T)/($completed) : "0";
3298 sprintf("%s:%d/%d/%d%%/%.1fs
",
3299 $workerno{$_}, $running, $completed,
3300 ($running+$completed)*100
3301 / $Global::total_started, $time);
3305 if(length $status > $termcols) {
3306 # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX/XX%
3307 $header = "Computer
:jobs running
/jobs completed/%of started jobs
";
3311 if($Global::total_started) {
3312 sprintf("%s:%d/%d/%d%%",
3314 $Global::host{$_}->jobs_running(),
3315 ($Global::host{$_}->jobs_completed()||0),
3316 ($Global::host{$_}->jobs_running()+
3317 ($Global::host{$_}->jobs_completed()||0))*100
3318 / $Global::total_started)
3323 if(length $status > $termcols) {
3324 # 1:XX/XX/XX% 2:XX/XX/XX% 3:XX/XX/XX% 4:XX/XX/XX% 5:XX/XX/XX% 6:XX/XX/XX%
3325 $header = "Computer
:jobs running
/jobs completed/%of started jobs
";
3329 if($Global::total_started) {
3330 sprintf("%s:%d/%d/%d%%",
3332 $Global::host{$_}->jobs_running(),
3333 ($Global::host{$_}->jobs_completed()||0),
3334 ($Global::host{$_}->jobs_running()+
3335 ($Global::host{$_}->jobs_completed()||0))*100
3336 / $Global::total_started)
3341 if(length $status > $termcols) {
3342 # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX sshlogin4:XX/XX
3343 $header = "Computer
:jobs running
/jobs completed
";
3346 { sprintf("%s:%d/%d",
3347 $sshlogin{$_}, $Global::host{$_}->jobs_running(),
3348 ($Global::host{$_}->jobs_completed()||0)) }
3351 if(length $status > $termcols) {
3352 # sshlogin1:XX/XX sshlogin2:XX/XX sshlogin3:XX/XX sshlogin4:XX/XX
3353 $header = "Computer
:jobs running
/jobs completed
";
3356 { sprintf("%s:%d/%d",
3357 $sshlogin{$_}, $Global::host{$_}->jobs_running(),
3358 ($Global::host{$_}->jobs_completed()||0)) }
3361 if(length $status > $termcols) {
3362 # 1:XX/XX 2:XX/XX 3:XX/XX 4:XX/XX 5:XX/XX 6:XX/XX
3363 $header = "Computer
:jobs running
/jobs completed
";
3366 { sprintf("%s:%d/%d",
3367 $workerno{$_}, $Global::host{$_}->jobs_running(),
3368 ($Global::host{$_}->jobs_completed()||0)) }
3371 if(length $status > $termcols) {
3372 # sshlogin1:XX sshlogin2:XX sshlogin3:XX sshlogin4:XX sshlogin5:XX
3373 $header = "Computer
:jobs completed
";
3378 ($Global::host{$_}->jobs_completed()||0)) }
3381 if(length $status > $termcols) {
3382 # 1:XX 2:XX 3:XX 4:XX 5:XX 6:XX
3383 $header = "Computer
:jobs completed
";
3388 ($Global::host{$_}->jobs_completed()||0)) }
3391 return ("workerlist
" => $workerlist, "header
" => $header, "status
" => $status);
3396 my ($first_completed, $smoothed_avg_time, $last_eta);
3399 # Calculate important numbers for ETA
3401 # $total = number of jobs in total
3402 # $completed = number of jobs completed
3403 # $left = number of jobs left
3404 # $pctcomplete = percent of jobs completed
3405 # $avgtime = averaged time
3406 # $eta = smoothed eta
3407 my $completed = $Global::total_completed;
3408 # In rare cases with -X will $completed > total_jobs()
3409 my $total = ::max($Global::JobQueue->total_jobs(),$completed);
3410 my $left = $total - $completed;
3411 if(not $completed) {
3412 return($total, $completed, $left, 0, 0, 0);
3414 my $pctcomplete = ::min($completed / $total,100);
3415 $first_completed ||= time;
3416 my $timepassed = (time - $first_completed);
3417 my $avgtime = $timepassed / $completed;
3418 $smoothed_avg_time ||= $avgtime;
3419 # Smooth the eta so it does not jump wildly
3420 $smoothed_avg_time = (1 - $pctcomplete) * $smoothed_avg_time +
3421 $pctcomplete * $avgtime;
3422 my $eta = int($left * $smoothed_avg_time);
3423 if($eta*0.90 < $last_eta and $last_eta < $eta) {
3424 # Eta jumped less that 10% up: Keep the last eta instead
3429 return($total, $completed, $left, $pctcomplete, $avgtime, $eta);
3438 # $status = bar with eta, completed jobs, arg and pct
3440 $reset ||= "\033[0m
";
3441 my($total, $completed, $left, $pctcomplete, $avgtime, $eta) =
3443 my $arg = $Global::newest_job ?
3444 $Global::newest_job->{'commandline'}->
3445 replace_placeholders(["\257<\257>"],0,0) : "";
3446 # These chars mess up display in the terminal
3447 $arg =~ tr/[\011-\016\033\302-\365]//d;
3448 my $eta_dhms = ::seconds_to_time_units($eta);
3450 sprintf("%d%% %d:%d=%s %s",
3451 $pctcomplete*100, $completed, $left, $eta_dhms, $arg);
3452 my $terminal_width = terminal_columns();
3453 my $s = sprintf("%-${terminal_width
}s
",
3454 substr($bar_text." "x$terminal_width,
3455 0,$terminal_width));
3456 my $width = int($terminal_width * $pctcomplete);
3457 substr($s,$width,0) = $reset;
3458 my $zenity = sprintf("%-${terminal_width
}s
",
3459 substr("# $eta sec $arg",
3460 0,$terminal_width));
3461 $s = "\r" . $zenity . "\r" . $pctcomplete*100 . # Prefix with zenity header
3462 "\r" . $rev . $s . $reset;
3468 my ($columns,$last_column_time);
3470 sub terminal_columns
() {
3471 # Get the number of columns of the terminal.
3472 # Only update once per second.
3474 # number of columns of the screen
3475 if(not $columns or $last_column_time < time) {
3476 $last_column_time = time;
3477 $columns = $ENV{'COLUMNS'};
3479 my $stty = ::qqx
("stty -a </dev/tty");
3480 # FreeBSD/OpenBSD/NetBSD/Dragonfly/MirOS
3481 # MacOSX/IRIX/AIX/Tru64
3482 $stty =~ /(\d+) columns/ and do { $columns = $1; };
3484 $stty =~ /columns (\d+)/ and do { $columns = $1; };
3485 # Solaris-x86/HPUX/SCOsysV/UnixWare/OpenIndiana
3486 $stty =~ /columns = (\d+)/ and do { $columns = $1; };
3488 $stty =~ /rows=\d+,(\d+)/ and do { $columns = $1; };
3491 my $resize = ::qqx
("resize");
3492 $resize =~ /COLUMNS=(\d+);/ and do { $columns = $1; };
3500 # Prototype forwarding
3501 sub get_job_with_sshlogin
($);
3502 sub get_job_with_sshlogin
($) {
3504 # $sshlogin = which host should the job be run on?
3509 # $job = next job object for $sshlogin if any available
3510 my $sshlogin = shift;
3513 if ($opt::hostgroups
) {
3514 my @other_hostgroup_jobs = ();
3516 while($job = $Global::JobQueue
->get()) {
3517 if($sshlogin->in_hostgroups($job->hostgroups())) {
3518 # Found a job to be run on a hostgroup of this
3522 # This job was not in the hostgroups of $sshlogin
3523 push @other_hostgroup_jobs, $job;
3526 $Global::JobQueue
->unget(@other_hostgroup_jobs);
3527 if(not defined $job) {
3532 $job = $Global::JobQueue
->get();
3533 if(not defined $job) {
3535 ::debug
("start", "No more jobs: JobQueue empty\n");
3539 $job->set_sshlogin($sshlogin);
3540 if($opt::retries
and $job->failed_here()) {
3541 # This command with these args failed for this sshlogin
3542 my ($no_of_failed_sshlogins,$min_failures) = $job->min_failed();
3543 # Only look at the Global::host that have > 0 jobslots
3544 if($no_of_failed_sshlogins ==
3545 grep { $_->max_jobs_running() > 0 } values %Global::host
3546 and $job->failed_here() == $min_failures) {
3547 # It failed the same or more times on another host:
3548 # run it on this host
3550 # If it failed fewer times on another host:
3551 # Find another job to run
3553 if(not $Global::JobQueue
->empty()) {
3554 # This can potentially recurse for all args
3555 no warnings
'recursion';
3556 $nextjob = get_job_with_sshlogin
($sshlogin);
3558 # Push the command back on the queue
3559 $Global::JobQueue
->unget($job);
3567 sub __REMOTE_SSH__
() {}
3570 sub read_sshloginfiles
(@
) {
3571 # Read a list of --slf's
3573 # @files = files or symbolic file names to read
3576 read_sshloginfile
(expand_slf_shorthand
($s));
3580 sub expand_slf_shorthand
($) {
3581 # Expand --slf shorthand into a read file name
3583 # $file = file or symbolic file name to read
3585 # $file = actual file name to read
3589 } elsif($file eq "..") {
3590 $file = $Global::config_dir
."/sshloginfile";
3591 } elsif($file eq ".") {
3592 $file = "/etc/parallel/sshloginfile";
3593 } elsif(not -r
$file) {
3594 for(@Global::config_dirs
) {
3595 if(not -r
$_."/".$file) {
3596 # Try prepending $PARALLEL_HOME
3597 ::error
("Cannot open $file.");
3598 ::wait_and_exit
(255);
3600 $file = $_."/".$file;
3608 sub read_sshloginfile
($) {
3609 # Read sshloginfile into @Global::sshlogin
3611 # $file = file to read
3619 ::debug
("init","--slf ",$file);
3624 if(not open($in_fh, "<", $file)) {
3626 ::error
("Cannot open $file.");
3627 ::wait_and_exit
(255);
3634 push @Global::sshlogin
, $_;
3641 sub parse_sshlogin
() {
3642 # Parse @Global::sshlogin into %Global::host.
3643 # Keep only hosts that are in one of the given ssh hostgroups.
3646 # $Global::minimal_command_line_length
3655 if(not @Global::sshlogin
) { @Global::sshlogin
= (":"); }
3656 for my $sshlogin (@Global::sshlogin
) {
3657 # Split up -S sshlogin,sshlogin
3658 for my $s (split /,|\n/, $sshlogin) {
3659 if ($s eq ".." or $s eq "-") {
3660 # This may add to @Global::sshlogin - possibly bug
3661 read_sshloginfile
(expand_slf_shorthand
($s));
3668 $Global::minimal_command_line_length
= 100_000_000
;
3669 my @allowed_hostgroups;
3670 for my $ncpu_sshlogin_string (::uniq
(@login)) {
3671 my $sshlogin = SSHLogin
->new($ncpu_sshlogin_string);
3672 my $sshlogin_string = $sshlogin->string();
3673 if($sshlogin_string eq "") {
3674 # This is an ssh group: -S @webservers
3675 push @allowed_hostgroups, $sshlogin->hostgroups();
3678 if($Global::host
{$sshlogin_string}) {
3679 # This sshlogin has already been added:
3680 # It is probably a host that has come back
3681 # Set the max_jobs_running back to the original
3682 debug
("run","Already seen $sshlogin_string\n");
3683 if($sshlogin->{'ncpus'}) {
3684 # If ncpus set by '#/' of the sshlogin, overwrite it:
3685 $Global::host
{$sshlogin_string}->set_ncpus($sshlogin->ncpus());
3687 $Global::host
{$sshlogin_string}->set_max_jobs_running(undef);
3690 $sshlogin->set_maxlength(Limits
::Command
::max_length
());
3692 $Global::minimal_command_line_length
=
3693 ::min
($Global::minimal_command_line_length
, $sshlogin->maxlength());
3694 $Global::host
{$sshlogin_string} = $sshlogin;
3696 if(@allowed_hostgroups) {
3697 # Remove hosts that are not in these groups
3698 while (my ($string, $sshlogin) = each %Global::host
) {
3699 if(not $sshlogin->in_hostgroups(@allowed_hostgroups)) {
3700 delete $Global::host
{$string};
3705 # debug("start", "sshlogin: ", my_dump(%Global::host),"\n");
3706 if(@Global::transfer_files
or @opt::return or $opt::cleanup
or @opt::basefile
) {
3707 if(not remote_hosts
()) {
3708 # There are no remote hosts
3710 ::warning
("--trc ignored as there are no remote --sshlogin.");
3711 } elsif (defined $opt::transfer
) {
3712 ::warning
("--transfer ignored as there are no remote --sshlogin.");
3713 } elsif (@opt::transfer_files
) {
3714 ::warning
("--transferfile ignored as there are no remote --sshlogin.");
3715 } elsif (@opt::return) {
3716 ::warning
("--return ignored as there are no remote --sshlogin.");
3717 } elsif (defined $opt::cleanup
) {
3718 ::warning
("--cleanup ignored as there are no remote --sshlogin.");
3719 } elsif (@opt::basefile
) {
3720 ::warning
("--basefile ignored as there are no remote --sshlogin.");
3726 sub remote_hosts
() {
3727 # Return sshlogins that are not ':'
3731 # list of sshlogins with ':' removed
3732 return grep !/^:$/, keys %Global::host
;
3735 sub setup_basefile
() {
3736 # Transfer basefiles to each $sshlogin
3737 # This needs to be done before first jobs on $sshlogin is run
3745 for my $sshlogin (values %Global::host
) {
3746 if($sshlogin->string() eq ":") { next }
3747 for my $file (@opt::basefile
) {
3748 if($file !~ m
:^/: and $opt::workdir
eq "...") {
3749 ::error
("Work dir '...' will not work with relative basefiles.");
3750 ::wait_and_exit
(255);
3753 my $dummycmdline = CommandLine
->new(1,["true"],{},0,0,[],[],{},{},{});
3754 my $dummyjob = Job
->new($dummycmdline);
3755 $workdir = $dummyjob->workdir();
3757 push @cmd, $sshlogin->rsync_transfer_cmd($file,$workdir);
3760 debug
("init", "basesetup: @cmd\n");
3761 my ($exitstatus,$stdout_ref,$stderr_ref) =
3762 run_parallel
((join "\n",@cmd),"-j0","--retries",5);
3764 my @stdout = @
$stdout_ref;
3765 my @stderr = @
$stderr_ref;
3766 ::error
("Copying of --basefile failed: @stdout@stderr");
3767 ::wait_and_exit
(255);
3771 sub cleanup_basefile
() {
3772 # Remove the basefiles transferred
3780 my $dummycmdline = CommandLine
->new(1,"true",0,0,0,0,0,{},{},{});
3781 my $dummyjob = Job
->new($dummycmdline);
3782 $workdir = $dummyjob->workdir();
3784 for my $sshlogin (values %Global::host
) {
3785 if($sshlogin->string() eq ":") { next }
3786 for my $file (@opt::basefile
) {
3787 push @cmd, $sshlogin->cleanup_cmd($file,$workdir);
3790 debug
("init", "basecleanup: @cmd\n");
3791 my ($exitstatus,$stdout_ref,$stderr_ref) =
3792 run_parallel
(join("\n",@cmd),"-j0","--retries",5);
3794 my @stdout = @
$stdout_ref;
3795 my @stderr = @
$stderr_ref;
3796 ::error
("Cleanup of --basefile failed: @stdout@stderr");
3797 ::wait_and_exit
(255);
3801 sub run_parallel
() {
3802 my ($stdin,@args) = @_;
3803 my $cmd = join "",map { " $_ & " } split /\n/, $stdin;
3804 print $Global::original_stderr
` $cmd wait` ;
3808 sub _run_parallel
() {
3810 # This should ideally just fork an internal copy
3811 # and not start it through a shell
3813 # $stdin = data to provide on stdin for GNU Parallel
3814 # @args = command line arguments
3816 # $exitstatus = exitcode of GNU Parallel run
3817 # \@stdout = standard output
3818 # \@stderr = standard error
3819 my ($stdin,@args) = @_;
3820 my ($exitstatus,@stdout,@stderr);
3821 my ($stdin_fh,$stdout_fh)=(gensym
(),gensym
());
3822 my ($stderr_fh, $stderrname) = ::tmpfile
(SUFFIX
=> ".par");
3825 my $pid = ::open3
($stdin_fh,$stdout_fh,$stderr_fh,
3826 $0,qw(--plain --shell /bin/sh --will-cite), @args);
3827 if(my $writerpid = fork()) {
3829 @stdout = <$stdout_fh>;
3830 # Now stdout is closed:
3831 # These pids should be dead or die very soon
3832 while(kill 0, $writerpid) { ::usleep
(1); }
3835 # while(kill 0, $pid) { ::usleep(1); }
3838 seek $stderr_fh, 0, 0;
3839 @stderr = <$stderr_fh>;
3845 print $stdin_fh $stdin;
3849 return ($exitstatus,\
@stdout,\
@stderr);
3852 sub filter_hosts
() {
3853 # Remove down --sshlogins from active duty.
3854 # Find ncpus, ncores, maxlen, time-to-login for each host.
3857 # $Global::minimal_command_line_length
3858 # $opt::use_sockets_instead_of_threads
3859 # $opt::use_cores_instead_of_threads
3860 # $opt::use_cpus_instead_of_cores
3863 my ($nsockets_ref,$ncores_ref, $nthreads_ref, $time_to_login_ref,
3864 $maxlen_ref, $echo_ref, $down_hosts_ref) =
3865 parse_host_filtering
(parallelized_host_filtering
());
3867 delete @Global::host
{@
$down_hosts_ref};
3868 @
$down_hosts_ref and ::warning
("Removed @$down_hosts_ref.");
3870 $Global::minimal_command_line_length
= 100_000_000
;
3871 while (my ($sshlogin, $obj) = each %Global::host
) {
3872 if($sshlogin eq ":") { next }
3873 $nsockets_ref->{$sshlogin} or
3874 ::die_bug
("nsockets missing: ".$obj->serverlogin());
3875 $ncores_ref->{$sshlogin} or
3876 ::die_bug
("ncores missing: ".$obj->serverlogin());
3877 $nthreads_ref->{$sshlogin} or
3878 ::die_bug
("nthreads missing: ".$obj->serverlogin());
3879 $time_to_login_ref->{$sshlogin} or
3880 ::die_bug
("time_to_login missing: ".$obj->serverlogin());
3881 $maxlen_ref->{$sshlogin} or
3882 ::die_bug
("maxlen missing: ".$obj->serverlogin());
3883 $obj->set_ncpus($nthreads_ref->{$sshlogin});
3884 if($opt::use_cpus_instead_of_cores
) {
3885 $obj->set_ncpus($ncores_ref->{$sshlogin});
3886 } elsif($opt::use_sockets_instead_of_threads
) {
3887 $obj->set_ncpus($nsockets_ref->{$sshlogin});
3888 } elsif($opt::use_cores_instead_of_threads
) {
3889 $obj->set_ncpus($ncores_ref->{$sshlogin});
3891 $obj->set_time_to_login($time_to_login_ref->{$sshlogin});
3892 $obj->set_maxlength($maxlen_ref->{$sshlogin});
3893 $Global::minimal_command_line_length
=
3894 ::min
($Global::minimal_command_line_length
,
3895 int($maxlen_ref->{$sshlogin}/2));
3896 ::debug
("init", "Timing from -S:$sshlogin ",
3897 " nsockets:",$nsockets_ref->{$sshlogin},
3898 " ncores:", $ncores_ref->{$sshlogin},
3899 " nthreads:",$nthreads_ref->{$sshlogin},
3900 " time_to_login:", $time_to_login_ref->{$sshlogin},
3901 " maxlen:", $maxlen_ref->{$sshlogin},
3902 " min_max_len:", $Global::minimal_command_line_length
,"\n");
3906 sub parse_host_filtering
() {
3908 # @lines = output from parallelized_host_filtering()
3910 # \%nsockets = number of sockets of {host}
3911 # \%ncores = number of cores of {host}
3912 # \%nthreads = number of hyperthreaded cores of {host}
3913 # \%time_to_login = time_to_login on {host}
3914 # \%maxlen = max command len on {host}
3915 # \%echo = echo received from {host}
3916 # \@down_hosts = list of hosts with no answer
3918 my (%nsockets, %ncores, %nthreads, %time_to_login, %maxlen, %echo,
3921 ::debug
("init","Read: ",$_);
3923 my @col = split /\t/, $_;
3924 if($col[0] =~ /^parallel: Warning:/) {
3925 # Timed out job: Ignore it
3927 } elsif(defined $col[6]) {
3928 # This is a line from --joblog
3929 # seq host time spent sent received exit signal command
3930 # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores
3931 if($col[0] eq "Seq" and $col[1] eq "Host" and
3932 $col[2] eq "Starttime") {
3936 # Get server from: eval true server\;
3937 $col[8] =~ /eval .?true.?\s([^\;]+);/ or
3938 ::die_bug
("col8 does not contain host: $col[8]");
3941 $Global::host
{$host} or next;
3942 if($col[6] eq "255" or $col[6] eq "-1" or $col[6] eq "1") {
3943 # exit == 255 or exit == timeout (-1): ssh failed/timedout
3944 # exit == 1: lsh failed
3946 ::debug
("init", "--filtered $host\n");
3947 push(@down_hosts, $host);
3948 } elsif($col[6] eq "127") {
3949 # signal == 127: parallel not installed remote
3950 # Set nsockets, ncores, nthreads = 1
3951 ::warning
("Could not figure out ".
3952 "number of cpus on $host. Using 1.");
3953 $nsockets{$host} = 1;
3955 $nthreads{$host} = 1;
3956 $maxlen{$host} = Limits
::Command
::max_length
();
3957 } elsif($col[0] =~ /^\d+$/ and $Global::host
{$host}) {
3958 # Remember how log it took to log in
3959 # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ echo
3960 $time_to_login{$host} = ::min
($time_to_login{$host},$col[3]);
3962 ::die_bug
("host check unmatched long jobline: $_");
3964 } elsif($Global::host
{$col[0]}) {
3965 # This output from --number-of-cores, --number-of-cpus,
3966 # --max-line-length-allowed
3969 # maxlen: server 131071
3970 if(/parallel: Warning: Cannot figure out number of/) {
3973 if(not $nsockets{$col[0]}) {
3974 $nsockets{$col[0]} = $col[1];
3975 } elsif(not $ncores{$col[0]}) {
3976 $ncores{$col[0]} = $col[1];
3977 } elsif(not $nthreads{$col[0]}) {
3978 $nthreads{$col[0]} = $col[1];
3979 } elsif(not $maxlen{$col[0]}) {
3980 $maxlen{$col[0]} = $col[1];
3981 } elsif(not $echo{$col[0]}) {
3982 $echo{$col[0]} = $col[1];
3983 } elsif(m/perl: warning:|LANGUAGE =|LC_ALL =|LANG =|are supported and installed/) {
3985 # perl: warning: Setting locale failed.
3986 # perl: warning: Please check that your locale settings:
3987 # LANGUAGE = (unset),
3989 # LANG = "en_US.UTF-8"
3990 # are supported and installed on your system.
3991 # perl: warning: Falling back to the standard locale ("C").
3993 ::die_bug
("host check too many col0: $_");
3996 ::die_bug
("host check unmatched short jobline ($col[0]): $_");
3999 @down_hosts = uniq
(@down_hosts);
4000 return(\
%nsockets, \
%ncores, \
%nthreads, \
%time_to_login,
4001 \
%maxlen, \
%echo, \
@down_hosts);
4004 sub parallelized_host_filtering
() {
4008 # text entries with:
4010 # * hostname \t number of cores
4011 # * hostname \t number of cpus
4012 # * hostname \t max-line-length-allowed
4013 # * hostname \t empty
4016 # Wrap with ssh and --env
4017 my $sshlogin = shift;
4018 my $command = shift;
4019 my $commandline = CommandLine
->new(1,[$command],{},0,0,[],[],{},{},{});
4020 my $job = Job
->new($commandline);
4021 $job->set_sshlogin($sshlogin);
4023 return($job->{'wrapped'});
4026 my(@sockets, @cores, @threads, @maxline, @echo);
4027 while (my ($host, $sshlogin) = each %Global::host
) {
4028 if($host eq ":") { next }
4029 # The 'true' is used to get the $host out later
4030 push(@sockets, $host."\t"."true $host; ".
4031 sshwrapped
($sshlogin,"parallel --number-of-sockets")."\n\0");
4032 push(@cores, $host."\t"."true $host; ".
4033 sshwrapped
($sshlogin,"parallel --number-of-cores")."\n\0");
4034 push(@threads, $host."\t"."true $host; ".
4035 sshwrapped
($sshlogin,"parallel --number-of-threads")."\n\0");
4036 push(@maxline, $host."\t"."true $host; ".
4037 sshwrapped
($sshlogin,"parallel --max-line-length-allowed")."\n\0");
4038 # 'echo' is used to get the fastest possible ssh login time
4039 my $sshcmd = "true $host; exec " .$sshlogin->sshcommand()." ".
4040 $sshlogin->serverlogin();
4041 push(@echo, $host."\t".$sshcmd." -- echo\n\0");
4044 # --timeout 10: Setting up an SSH connection and running a simple
4045 # command should never take > 10 sec.
4046 # --delay 0.1: If multiple sshlogins use the same proxy the delay
4047 # will make it less likely to overload the ssh daemon.
4048 # --retries 3: If the ssh daemon is overloaded, try 3 times
4050 "$0 -j0 --timeout 10 --joblog - --plain --delay 0.1 --retries 3 ".
4051 "--tag --tagstring '{1}' -0 --colsep '\t' -k eval '{2}' && true ";
4052 $cmd = $Global::shell
." -c ".Q
($cmd);
4053 ::debug
("init", $cmd, "\n");
4057 my ($host_fh,$in,$err);
4058 open3
($in, $host_fh, $err, $cmd) || ::die_bug
("parallel host check: $cmd");
4060 # Give the commands to run to the $cmd
4062 print $in @sockets, @cores, @threads, @maxline, @echo;
4068 # TODO incompatible with '-quoting. Needs to be fixed differently
4070 # # if last char = ' then append next line
4071 # # This may be due to quoting of \n in environment var
4084 # Runs @command on all hosts.
4085 # Uses parallel to run @command on each host.
4086 # --jobs = number of hosts to run on simultaneously.
4087 # For each host a parallel command with the args will be running.
4107 # $opt::arg_file_sep
4111 # $Global::exitstatus
4118 # @command = command to run on all hosts
4122 # $joblog = filename of joblog - undef if none
4124 # $tmpfile = temp file for joblog - undef if none
4126 if(not defined $joblog) {
4129 my ($fh, $tmpfile) = ::tmpfile
(SUFFIX
=> ".log");
4133 my ($input_source_fh_ref,@command) = @_;
4134 if($Global::quoting
) {
4135 @command = shell_quote
(@command);
4138 # Copy all @input_source_fh (-a and :::) into tempfiles
4140 for my $fh (@
$input_source_fh_ref) {
4141 my ($outfh, $name) = ::tmpfile
(SUFFIX
=> ".all", UNLINK
=> not $opt::D
);
4142 print $outfh (<$fh>);
4144 push @argfiles, $name;
4146 if(@opt::basefile
) { setup_basefile
(); }
4147 # for each sshlogin do:
4148 # parallel -S $sshlogin $command :::: @argfiles
4150 # Pass some of the options to the sub-parallels, not all of them as
4151 # -P should only go to the first, and -S should not be copied at all.
4154 ((defined $opt::memfree
) ?
"--memfree ".$opt::memfree
: ""),
4155 ((defined $opt::D
) ?
"-D $opt::D" : ""),
4156 ((defined $opt::group
) ?
"-g" : ""),
4157 ((defined $opt::jobs
) ?
"-P $opt::jobs" : ""),
4158 ((defined $opt::keeporder
) ?
"--keeporder" : ""),
4159 ((defined $opt::linebuffer
) ?
"--linebuffer" : ""),
4160 ((defined $opt::max_chars
) ?
"--max-chars ".$opt::max_chars
: ""),
4161 ((defined $opt::plain
) ?
"--plain" : ""),
4162 ((defined $opt::ungroup
) ?
"-u" : ""),
4163 ((defined $opt::tee
) ?
"--tee" : ""),
4167 ((defined $opt::D
) ?
"-D $opt::D" : ""),
4168 ((defined $opt::arg_file_sep
) ?
"--arg-file-sep ".$opt::arg_file_sep
: ""),
4169 ((defined $opt::arg_sep
) ?
"--arg-sep ".$opt::arg_sep
: ""),
4170 ((defined $opt::colsep
) ?
"--colsep ".shell_quote
($opt::colsep
) : ""),
4171 ((defined $opt::files
) ?
"--files" : ""),
4172 ((defined $opt::group
) ?
"-g" : ""),
4173 ((defined $opt::cleanup
) ?
"--cleanup" : ""),
4174 ((defined $opt::keeporder
) ?
"--keeporder" : ""),
4175 ((defined $opt::linebuffer
) ?
"--linebuffer" : ""),
4176 ((defined $opt::max_chars
) ?
"--max-chars ".$opt::max_chars
: ""),
4177 ((defined $opt::plain
) ?
"--plain" : ""),
4178 ((defined $opt::retries
) ?
"--retries ".$opt::retries
: ""),
4179 ((defined $opt::timeout
) ?
"--timeout ".$opt::timeout
: ""),
4180 ((defined $opt::ungroup
) ?
"-u" : ""),
4181 ((defined $opt::tee
) ?
"--tee" : ""),
4182 ((defined $opt::workdir
) ?
"--wd ".Q
($opt::workdir
) : ""),
4183 (@Global::transfer_files ?
map { "--tf ".Q
($_) }
4184 @Global::transfer_files
: ""),
4185 (@Global::ret_files ?
map { "--return ".Q
($_) }
4186 @Global::ret_files
: ""),
4187 (@opt::env ?
map { "--env ".Q
($_) } @opt::env
: ""),
4188 (map { "-v" } @opt::v
),
4190 ::debug
("init", "| $0 $options\n");
4191 open(my $parallel_fh, "|-", "$0 -0 --will-cite -j0 $options") ||
4192 ::die_bug
("This does not run GNU Parallel: $0 $options");
4194 for my $host (sort keys %Global::host
) {
4195 my $sshlogin = $Global::host
{$host};
4196 my $joblog = tmp_joblog
($opt::joblog
);
4198 push @joblogs, $joblog;
4199 $joblog = "--joblog $joblog";
4201 my $quad = $opt::arg_file_sep
|| "::::";
4202 # If PARALLEL_ENV is set: Pass it on
4203 my $penv=$Global::parallel_env ?
4204 "PARALLEL_ENV=".Q
($Global::parallel_env
) :
4206 ::debug
("init", "$penv $0 $suboptions -j1 $joblog ",
4207 ((defined $opt::tag
) ?
4208 "--tagstring ".Q
($sshlogin->string()) : ""),
4209 " -S ", Q
($sshlogin->string())," ",
4210 join(" ",shell_quote
(@command))," $quad @argfiles\n");
4211 print $parallel_fh "$penv $0 $suboptions -j1 $joblog ",
4212 ((defined $opt::tag
) ?
4213 "--tagstring ".Q
($sshlogin->string()) : ""),
4214 " -S ", Q
($sshlogin->string())," ",
4215 join(" ",shell_quote
(@command))," $quad @argfiles\0";
4218 $Global::exitstatus
= $?
>> 8;
4219 debug
("init", "--onall exitvalue ", $?
);
4220 if(@opt::basefile
and $opt::cleanup
) { cleanup_basefile
(); }
4221 $Global::debug
or unlink(@argfiles);
4223 for my $joblog (@joblogs) {
4225 open(my $fh, "<", $joblog) || ::die_bug
("Cannot open tmp joblog $joblog");
4226 # Skip first line (header);
4228 print $Global::joblog
(<$fh>);
4235 sub __SIGNAL_HANDLING__
() {}
4239 # Send TSTP signal (Ctrl-Z) to all children process groups
4243 signal_children
("TSTP");
4247 # Send SIGPIPE signal to all children process groups
4251 signal_children
("PIPE");
4254 sub signal_children
() {
4255 # Send signal to all children process groups
4256 # and GNU Parallel itself
4261 debug
("run", "Sending $signal ");
4262 kill $signal, map { -$_ } keys %Global::running
;
4263 # Use default signal handler for GNU Parallel itself
4264 $SIG{$signal} = undef;
4268 sub save_original_signal_handler
() {
4269 # Remember the original signal handler
4271 # %Global::original_sig
4274 if($opt::tmux
) { ::qqx
("tmux kill-session -t p$$"); }
4278 if($opt::tmux
) { ::qqx
("tmux kill-session -t p$$"); }
4281 %Global::original_sig
= %SIG;
4282 $SIG{TERM
} = sub {}; # Dummy until jobs really start
4283 $SIG{ALRM
} = 'IGNORE';
4284 # Allow Ctrl-Z to suspend and `fg` to continue
4285 $SIG{TSTP
} = \
&sigtstp
;
4286 $SIG{PIPE
} = \
&sigpipe
;
4288 # Set $SIG{TSTP} again (it is undef'ed in sigtstp() )
4289 $SIG{TSTP
} = \
&sigtstp
;
4290 # Send continue signal to all children process groups
4291 kill "CONT", map { -$_ } keys %Global::running
;
4295 sub list_running_jobs
() {
4296 # Print running jobs on tty
4300 for my $job (values %Global::running
) {
4301 ::status
("$Global::progname: ".$job->replaced());
4305 sub start_no_new_jobs
() {
4306 # Start no more jobs
4308 # %Global::original_sig
4310 # $Global::start_no_new_jobs
4312 # $SIG{TERM} = $Global::original_sig{TERM};
4313 unlink keys %Global::unlink;
4315 ("$Global::progname: SIGHUP received. No new jobs will be started.",
4316 "$Global::progname: Waiting for these ".(keys %Global::running
).
4317 " jobs to finish. Send SIGTERM to stop now.");
4318 list_running_jobs
();
4319 $Global::start_no_new_jobs
||= 1;
4323 # Run reaper until there are no more left
4325 # @pids_reaped = pids of reaped processes
4328 while($pid = reaper
()) {
4329 push @pids_reaped, $pid;
4331 return @pids_reaped;
4336 # * Set exitstatus, exitsignal, endtime.
4337 # * Free ressources for new job
4338 # * Update median runtime
4340 # * If --halt = now: Kill children
4347 # $Global::total_running
4349 # $stiff = PID of child finished
4351 debug
("run", "Reaper ");
4352 if(($stiff = waitpid(-1, &WNOHANG
)) <= 0) {
4353 # No jobs waiting to be reaped
4357 # $stiff = pid of dead process
4358 my $job = $Global::running
{$stiff};
4360 # '-a <(seq 10)' will give us a pid not in %Global::running
4361 # The same will one of the ssh -M: ignore
4363 delete $Global::running
{$stiff};
4364 $Global::total_running
--;
4365 if($job->{'commandline'}{'skip'}) {
4366 # $job->skip() was called
4367 $job->set_exitstatus(-2);
4368 $job->set_exitsignal(0);
4370 $job->set_exitstatus($?
>> 8);
4371 $job->set_exitsignal($?
& 127);
4374 debug
("run", "seq ",$job->seq()," died (", $job->exitstatus(), ")");
4375 $job->set_endtime(::now
());
4376 my $sshlogin = $job->sshlogin();
4377 $sshlogin->dec_jobs_running();
4378 if($job->should_be_retried()) {
4379 # Free up file handles
4380 $job->free_ressources();
4383 $sshlogin->inc_jobs_completed();
4386 if($opt::timeout
and not $job->exitstatus()) {
4387 # Update average runtime for timeout only for successful jobs
4388 $Global::timeoutq
->update_median_runtime($job->runtime());
4390 if($opt::keeporder
) {
4391 $job->print_earlier_jobs();
4395 if($job->should_we_halt() eq "now") {
4397 ::kill_sleep_seq
($job->pid());
4399 ::wait_and_exit
($Global::halt_exitstatus
);
4404 if($opt::progress
) {
4405 my %progress = progress
();
4406 ::status_no_nl
("\r",$progress{'status'});
4409 debug
("run", "done ");
4418 # Kill all jobs by killing their process groups
4420 # $Global::start_no_new_jobs = we are stopping
4421 # $Global::killall = Flag to not run reaper
4422 $Global::start_no_new_jobs
||= 1;
4423 # Do not reap killed children: Ignore them instead
4424 $Global::killall
||= 1;
4425 kill_sleep_seq
(keys %Global::running
);
4428 sub kill_sleep_seq
(@
) {
4429 # Send jobs TERM,TERM,KILL to processgroups
4431 # @pids = list of pids that are also processgroups
4432 # Convert pids to process groups ($processgroup = -$pid)
4433 my @pgrps = map { -$_ } @_;
4434 my @term_seq = split/,/,$opt::termseq
;
4436 @term_seq = ("TERM",200,"TERM",100,"TERM",50,"KILL",25);
4439 @pgrps = kill_sleep
(shift @term_seq, shift @term_seq, @pgrps);
4444 # Kill pids with a signal and wait a while for them to die
4446 # $signal = signal to send to @pids
4447 # $sleep_max = number of ms to sleep at most before returning
4448 # @pids = pids to kill (actually process groups)
4450 # $Global::killall = set by killall() to avoid calling reaper
4452 # @pids = pids still alive
4453 my ($signal, $sleep_max, @pids) = @_;
4454 ::debug
("kill","kill_sleep $signal ",(join " ",sort @pids),"\n");
4455 kill $signal, @pids;
4459 while(@pids and $sleepsum < $sleep_max) {
4460 if($Global::killall
) {
4461 # Killall => don't run reaper
4462 while(waitpid(-1, &WNOHANG
) > 0) {
4463 $sleep = $sleep/2+0.001;
4465 } elsif(reapers
()) {
4466 $sleep = $sleep/2+0.001;
4470 $sleepsum += $sleep;
4471 # Keep only living children
4472 @pids = grep { kill(0, $_) } @pids;
4477 sub wait_and_exit
($) {
4478 # If we do not wait, we sometimes get segfault
4481 unlink keys %Global::unlink;
4483 # Kill all jobs without printing
4486 for (keys %Global::unkilled_children
) {
4487 # Kill any (non-jobs) children (e.g. reserved processes)
4490 delete $Global::unkilled_children
{$_};
4492 if($Global::unkilled_sqlworker
) {
4493 waitpid($Global::unkilled_sqlworker
,0);
4510 "$Global::progname [options] [command [arguments]] < list_of_arguments",
4511 "$Global::progname [options] [command [arguments]] (::: arguments|:::: argfile(s))...",
4512 "cat ... | $Global::progname --pipe [options] [command [arguments]]",
4514 "-j n Run n jobs in parallel",
4515 "-k Keep same order",
4516 "-X Multiple arguments with context replace",
4517 "--colsep regexp Split input on regexp for positional replacements",
4518 "{} {.} {/} {/.} {#} {%} {= perl code =} Replacement strings",
4519 "{3} {3.} {3/} {3/.} {=3 perl code =} Positional replacement strings",
4520 "With --plus: {} = {+/}/{/} = {.}.{+.} = {+/}/{/.}.{+.} = {..}.{+..} =",
4521 " {+/}/{/..}.{+..} = {...}.{+...} = {+/}/{/...}.{+...}",
4523 "-S sshlogin Example: foo\@server.example.com",
4524 "--slf .. Use ~/.parallel/sshloginfile as the list of sshlogins",
4525 "--trc {}.bar Shorthand for --transfer --return {}.bar --cleanup",
4526 "--onall Run the given command with argument on all sshlogins",
4527 "--nonall Run the given command with no arguments on all sshlogins",
4529 "--pipe Split stdin (standard input) to multiple jobs.",
4530 "--recend str Record end separator for --pipe.",
4531 "--recstart str Record start separator for --pipe.",
4533 "See 'man $Global::progname' for details",
4535 "Academic tradition requires you to cite works you base your article on.",
4536 "If you use programs that use GNU Parallel to process data for an article in a",
4537 "scientific publication, please cite:",
4539 " O. Tange (2018): GNU Parallel 2018, Mar 2018, ISBN 9781387509881,",
4540 " DOI https://doi.org/10.5281/zenodo.1146014",
4542 # Before changing this line, please read
4543 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
4544 "This helps funding further development; AND IT WON'T COST YOU A CENT.",
4545 "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
4550 sub citation_notice
() {
4551 # if --will-cite or --plain: do nothing
4552 # if stderr redirected: do nothing
4553 # if $PARALLEL_HOME/will-cite: do nothing
4554 # else: print citation notice to stderr
4559 not -t
$Global::original_stderr
4561 grep { -e
"$_/will-cite" } @Global::config_dirs
) {
4565 ("Academic tradition requires you to cite works you base your article on.",
4566 "If you use programs that use GNU Parallel to process data for an article in a",
4567 "scientific publication, please cite:",
4569 " O. Tange (2018): GNU Parallel 2018, Mar 2018, ISBN 9781387509881,",
4570 " DOI https://doi.org/10.5281/zenodo.1146014",
4572 # Before changing this line, please read
4573 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
4574 "This helps funding further development; AND IT WON'T COST YOU A CENT.",
4575 "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
4577 "More about funding GNU Parallel and the citation notice:",
4578 "https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice",
4580 "To silence this citation notice: run 'parallel --citation' once.",
4583 mkdir $Global::config_dir
;
4584 # Number of times the user has run GNU Parallel without showing
4585 # willingness to cite
4587 if(open (my $fh, "<", $Global::config_dir
.
4588 "/runs-without-willing-to-cite")) {
4593 if(open (my $fh, ">", $Global::config_dir
.
4594 "/runs-without-willing-to-cite")) {
4598 ::status
("Come on: You have run parallel $runs times. Isn't it about time ",
4599 "you run 'parallel --citation' once to silence the citation notice?",
4608 my $fh = $Global::status_fd
|| *STDERR
;
4609 print $fh map { ($_, "\n") } @w;
4613 sub status_no_nl
(@
) {
4615 my $fh = $Global::status_fd
|| *STDERR
;
4622 my $prog = $Global::progname
|| "parallel";
4623 status_no_nl
(map { ($prog, ": Warning: ", $_, "\n"); } @w);
4628 my $prog = $Global::progname
|| "parallel";
4629 status
(map { ($prog.": Error: ". $_); } @w);
4635 ("$Global::progname: This should not happen. You have found a bug.\n",
4636 "Please contact <parallel\@gnu.org> and follow\n",
4637 "https://www.gnu.org/software/parallel/man.html#REPORTING-BUGS\n",
4639 "Include this in the report:\n",
4640 "* The version number: $Global::version\n",
4641 "* The bugid: $bugid\n",
4642 "* The command line being run\n",
4643 "* The files being read (put the files on a webserver if they are big)\n",
4645 "If you get the error on smaller/fewer files, please include those instead.\n");
4646 ::wait_and_exit
(255);
4653 "GNU $Global::progname $Global::version",
4654 "Copyright (C) 2007-2019 Ole Tange and Free Software Foundation, Inc.",
4655 "License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>",
4656 "This is free software: you are free to change and redistribute it.",
4657 "GNU $Global::progname comes with no warranty.",
4659 "Web site: http://www.gnu.org/software/${Global::progname}\n",
4660 "When using programs that use GNU Parallel to process data for publication",
4661 "please cite as described in 'parallel --citation'.\n",
4667 my ($all_argv_ref,$argv_options_removed_ref) = @_;
4668 my $all_argv = "@$all_argv_ref";
4669 my $no_opts = "@$argv_options_removed_ref";
4670 $all_argv=~s/--citation//;
4671 if($all_argv ne $no_opts) {
4672 ::warning
("--citation ignores all other options and arguments.");
4677 "Academic tradition requires you to cite works you base your article on.",
4678 "If you use programs that use GNU Parallel to process data for an article in a",
4679 "scientific publication, please cite:",
4681 "\@book{tange_ole_2018_1146014,",
4682 " author = {Tange, Ole},",
4683 " title = {GNU Parallel 2018},",
4684 " publisher = {Ole Tange},",
4687 " ISBN = {9781387509881},",
4688 " doi = {10.5281/zenodo.1146014},",
4689 " url = {https://doi.org/10.5281/zenodo.1146014}",
4692 "(Feel free to use \\nocite{tange_ole_2018_1146014})",
4694 # Before changing this line, please read
4695 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
4696 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
4697 "This helps funding further development; AND IT WON'T COST YOU A CENT.",
4698 "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
4700 "More about funding GNU Parallel and the citation notice:",
4701 "https://lists.gnu.org/archive/html/parallel/2013-11/msg00006.html",
4702 "https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice",
4703 "https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt",
4705 "If you send a copy of your published article to tange\@gnu.org, it will be",
4706 "mentioned in the release notes of next version of GNU Parallel.",
4709 while(not grep { -e
"$_/will-cite" } @Global::config_dirs
) {
4710 print "\nType: 'will cite' and press enter.\n> ";
4711 my $input = <STDIN
>;
4712 if(not defined $input) {
4715 if($input =~ /will cite/i) {
4716 mkdir $Global::config_dir
;
4717 if(open (my $fh, ">", $Global::config_dir
."/will-cite")) {
4721 "Thank you for your support: You are the reason why there is funding to",
4722 "continue maintaining GNU Parallel. On behalf of future versions of",
4723 "GNU Parallel, which would not exist without your support:",
4725 " THANK YOU SO MUCH",
4727 "It is really appreciated. The citation notice is now silenced.",
4732 "Thank you for your support. It is much appreciated. The citation",
4733 "cannot permanently be silenced. Use '--will-cite' instead.",
4735 "If you use '--will-cite' in scripts to be run by others you are making",
4736 "it harder for others to see the citation notice. The development of",
4737 "GNU parallel is indirectly financed through citations, so if users",
4738 "do not know they should cite then you are making it harder to finance",
4739 "development. However, if you pay 10000 EUR, you should feel free to",
4740 "use '--will-cite' in scripts.",
4750 print("Maximal size of command: ",Limits
::Command
::real_max_length
(),"\n",
4751 "Maximal used size of command: ",Limits
::Command
::max_length
(),"\n",
4753 "Execution of will continue now, and it will try to read its input\n",
4754 "and run commands; if this is not what you wanted to happen, please\n",
4755 "press CTRL-D or CTRL-C\n");
4759 # Give an embeddable version of GNU Parallel
4760 # Tested with: bash, zsh, ksh, ash, dash, sh
4761 my $randomstring = "cut-here-".join"",
4762 map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..20);
4763 if(not -f
$0 or not -r
$0) {
4764 ::error
("--embed only works if parallel is a readable file");
4767 if(open(my $fh, "<", $0)) {
4768 # Read the source from $0
4770 my $user = $ENV{LOGNAME
} || $ENV{USERNAME
} || $ENV{USER
};
4771 my @env_parallel_source = ();
4772 my $shell = $Global::shell
;
4774 for(which
("env_parallel.$shell")) {
4776 # Read the source of env_parallel.shellname
4777 open(my $env_parallel_source_fh, $_) || die;
4778 @env_parallel_source = <$env_parallel_source_fh>;
4779 close $env_parallel_source_fh;
4782 print "#!$Global::shell
4784 # Copyright (C) 2007-2019 $user, Ole Tange and Free Software
4787 # This program is free software; you can redistribute it and/or modify
4788 # it under the terms of the GNU General Public License as published by
4789 # the Free Software Foundation; either version 3 of the License, or
4790 # (at your option) any later version.
4792 # This program is distributed in the hope that it will be useful, but
4793 # WITHOUT ANY WARRANTY; without even the implied warranty of
4794 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
4795 # General Public License for more details.
4797 # You should have received a copy of the GNU General Public License
4798 # along with this program; if not, see <http://www.gnu.org/licenses/>
4799 # or write to the Free Software Foundation, Inc., 51 Franklin St,
4800 # Fifth Floor, Boston, MA 02110-1301 USA
4804 # Embedded GNU Parallel created with --embed
4806 # Start GNU Parallel without leaving temporary files
4808 # Not all shells support 'perl <(cat ...)'
4809 # This is a complex way of doing:
4810 # perl <(cat <<'cut-here'
4813 # and also avoiding:
4816 # Make a temporary fifo that perl can read from
4817 _fifo_with_parallel_source
=`perl -e 'use POSIX qw(mkfifo);
4819 $f = "/tmp/parallel-".join"",
4820 map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
4824 # Put source code into temporary file
4825 # so it is easy to copy to the fifo
4826 _file_with_parallel_source=`mktemp`;
4828 "cat <<'$randomstring' > \$_file_with_parallel_source\n",
4832 # Copy the source code from the file to the fifo
4833 # and remove the file and fifo ASAP
4834 # 'sh
-c
' is needed to avoid
4836 sh -c "(rm $_file_with_parallel_source; cat >$_fifo_with_parallel_source; rm $_fifo_with_parallel_source) < $_file_with_parallel_source &"
4838 # Read the source from the fifo
4839 perl $_fifo_with_parallel_source "$@"
4842 @env_parallel_source,
4845 # This will call the functions above
4846 parallel -k echo ::: Put your code here
4847 env_parallel --session
4848 env_parallel -k echo ::: Put your code here
4849 parset p,y,c,h -k echo ::: Put your code here
4853 ::error("Cannot open $0");
4856 ::status("Redirect the output to a file and add your changes at the end:",
4857 " $0 --embed > new_script");
4861 sub __GENERIC_COMMON_FUNCTION__() {}
4864 sub mkdir_or_die($) {
4865 # If dir is not executable: die
4867 # The eval is needed to catch exception from mkdir
4868 eval { File::Path::mkpath($dir); };
4870 ::error("Cannot change into non-executable dir $dir: $!");
4871 ::wait_and_exit(255);
4876 # Create tempfile as $TMPDIR/parXXXXX
4878 # $filehandle = opened file handle
4879 # $filename = file name created
4880 my($filehandle,$filename) =
4881 ::tempfile(DIR=>$ENV{'TMPDIR
'}, TEMPLATE => 'parXXXXX
', @_);
4883 return($filehandle,$filename);
4885 # Separate unlink due to NFS dealing badly with File::Temp
4892 # Select a name that does not exist
4893 # Do not create the file as it may be used for creating a socket (by tmux)
4894 # Remember the name in $Global::unlink to avoid hitting the same name twice
4897 if(not -w $ENV{'TMPDIR
'}) {
4898 if(not -e $ENV{'TMPDIR
'}) {
4899 ::error("Tmpdir '$ENV{'TMPDIR'}' does not exist.","Try 'mkdir $ENV{'TMPDIR'}'");
4901 ::error("Tmpdir '$ENV{'TMPDIR'}' is not writable.","Try 'chmod +w
$ENV{'TMPDIR'}'");
4903 ::wait_and_exit(255);
4906 $tmpname = $ENV{'TMPDIR
'}."/".$name.
4907 join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
4908 } while(-e $tmpname or $Global::unlink{$tmpname}++);
4913 # Find an unused name and mkfifo on it
4914 use POSIX qw(mkfifo);
4915 my $tmpfifo = tmpname
("fif");
4916 mkfifo
($tmpfifo,0600);
4921 # Remove file and remove it from %Global::unlink
4924 delete @Global::unlink{@_};
4928 sub size_of_block_dev
() {
4929 # Like -s but for block devices
4931 # $blockdev = file name of block device
4933 # $size = in bytes, undef if error
4934 my $blockdev = shift;
4935 if(open(my $fh, "<", $blockdev)) {
4936 seek($fh,0,2) || ::die_bug
("cannot seek $blockdev");
4937 my $size = tell($fh);
4941 ::error
("cannot open $blockdev");
4947 # Like qx but with clean environment (except for @keep)
4948 # and STDERR ignored
4949 # This is needed if the environment contains functions
4950 # that /bin/sh does not understand
4951 my $PATH = $ENV{'PATH'};
4953 # ssh with ssh-agent needs PATH SSH_AUTH_SOCK SSH_AGENT_PID
4954 # ssh with Kerberos needs KRB5CCNAME
4955 # tmux needs LC_CTYPE
4956 my @keep = qw(PATH SSH_AUTH_SOCK SSH_AGENT_PID KRB5CCNAME LC_CTYPE);
4957 @env{@keep} = @ENV{@keep};
4960 if($Global::debug
) {
4961 return qx{ @_ && true
};
4963 return qx{ ( @_ ) 2>/dev/null
};
4968 # Remove duplicates and return unique values
4969 return keys %{{ map { $_ => 1 } @_ }};
4974 # Minimum value of array
4979 defined $min or do { $min = $_; next; }; # Set $_ to the first non-undef
4980 $min = ($min < $_) ?
$min : $_;
4987 # Maximum value of array
4992 defined $max or do { $max = $_; next; }; # Set $_ to the first non-undef
4993 $max = ($max > $_) ?
$max : $_;
5000 # Sum of values of array
5005 $_ and do { $sum += $_; }
5010 sub undef_as_zero
($) {
5015 sub undef_as_empty
($) {
5017 return $a ?
$a : "";
5020 sub undef_if_empty
($) {
5021 if(defined($_[0]) and $_[0] eq "") {
5027 sub multiply_binary_prefix
(@
) {
5028 # Evalualte numbers with binary prefix
5029 # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80
5030 # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80
5031 # K =2^10, M =2^20, G =2^30, T =2^40, P =2^50, E =2^70, Z =2^80, Y =2^80
5032 # k =10^3, m =10^6, g =10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24
5033 # 13G = 13*1024*1024*1024 = 13958643712
5035 # $s = string with prefixes
5037 # $value = int with prefixes multiplied
5043 s/gi/*1024*1024*1024/gi;
5044 s/ti/*1024*1024*1024*1024/gi;
5045 s/pi/*1024*1024*1024*1024*1024/gi;
5046 s/ei/*1024*1024*1024*1024*1024*1024/gi;
5047 s/zi/*1024*1024*1024*1024*1024*1024*1024/gi;
5048 s/yi/*1024*1024*1024*1024*1024*1024*1024*1024/gi;
5049 s/xi/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi;
5053 s/G/*1024*1024*1024/g;
5054 s/T/*1024*1024*1024*1024/g;
5055 s/P/*1024*1024*1024*1024*1024/g;
5056 s/E/*1024*1024*1024*1024*1024*1024/g;
5057 s/Z/*1024*1024*1024*1024*1024*1024*1024/g;
5058 s/Y/*1024*1024*1024*1024*1024*1024*1024*1024/g;
5059 s/X/*1024*1024*1024*1024*1024*1024*1024*1024*1024/g;
5063 s/g/*1000*1000*1000/g;
5064 s/t/*1000*1000*1000*1000/g;
5065 s/p/*1000*1000*1000*1000*1000/g;
5066 s/e/*1000*1000*1000*1000*1000*1000/g;
5067 s/z/*1000*1000*1000*1000*1000*1000*1000/g;
5068 s/y/*1000*1000*1000*1000*1000*1000*1000*1000/g;
5069 s/x/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g;
5073 return wantarray ?
@v : $v[0];
5076 sub multiply_time_units
($) {
5077 # Evalualte numbers with time units
5078 # s=1, m=60, h=3600, d=86400
5080 # $s = string time units
5082 # $value = int in seconds
5094 return wantarray ?
@v : $v[0];
5097 sub seconds_to_time_units
() {
5098 # Convert seconds into ??d??h??m??s
5099 # s=1, m=60, h=3600, d=86400
5101 # $s = int in seconds
5103 # $str = string time units
5106 my $d = int($s/86400);
5108 my $h = int($s/3600);
5113 $str = sprintf("%dd%02dh%02dm%02ds",$d,$h,$m,$s);
5115 $str = sprintf("%dh%02dm%02ds",$h,$m,$s);
5117 $str = sprintf("%dm%02ds",$m,$s);
5119 $str = sprintf("%ds",$s);
5125 my ($disk_full_fh, $b8193, $error_printed);
5126 sub exit_if_disk_full
() {
5127 # Checks if $TMPDIR is full by writing 8kb to a tmpfile
5128 # If the disk is full: Exit immediately.
5131 if(not $disk_full_fh) {
5132 $disk_full_fh = ::tmpfile
(SUFFIX
=> ".df");
5135 # Linux does not discover if a disk is full if writing <= 8192
5137 # bfs btrfs cramfs ext2 ext3 ext4 ext4dev jffs2 jfs minix msdos
5138 # ntfs reiserfs tmpfs ubifs vfat xfs
5139 # TODO this should be tested on different OS similar to this:
5142 # sudo mount /dev/ram0 /mnt/loop; sudo chmod 1777 /mnt/loop
5143 # seq 100000 | parallel --tmpdir /mnt/loop/ true &
5144 # seq 6900000 > /mnt/loop/i && echo seq OK
5145 # seq 6980868 > /mnt/loop/i
5146 # seq 10000 > /mnt/loop/ii
5148 # sudo umount /mnt/loop/ || sudo umount -l /mnt/loop/
5151 print $disk_full_fh $b8193;
5152 if(not $disk_full_fh
5154 tell $disk_full_fh != 8193) {
5155 # On raspbian the disk can be full except for 10 chars.
5156 if(not $error_printed) {
5157 ::error
("Output is incomplete.",
5158 "Cannot append to buffer file in $ENV{'TMPDIR'}.",
5159 "Is the disk full?",
5160 "Change \$TMPDIR with --tmpdir or use --compress.");
5163 ::wait_and_exit
(255);
5165 truncate $disk_full_fh, 0;
5166 seek($disk_full_fh, 0, 0) || die;
5171 # Remove comments and spaces
5173 # $spaces = keep 1 space?
5174 # $s = string to remove spaces from
5176 # $s = with spaces removed
5182 } elsif(2 == $spaces) {
5184 $s =~ s/\n\n+/\n/sg;
5185 $s =~ s/[ \t]+/ /mg;
5186 } elsif(3 == $spaces) {
5187 # Keep perl code required space
5188 $s =~ s{([^a-zA-Z0-9/])\s+}{$1}sg;
5189 $s =~ s{([a-zA-Z0-9/])\s+([^:a-zA-Z0-9/])}{$1$2}sg;
5201 $hostname = `hostname`;
5203 $hostname ||= "nohostname";
5211 # @programs = programs to find the path to
5213 # @full_path = full paths to @programs. Nothing if not found
5215 ::debug
("which", "@_ in $ENV{'PATH'}\n");
5217 push(@which, grep { not -d
$_ and -x
$_ }
5218 map { $_."/".$prg } split(":",$ENV{'PATH'}));
5221 push(@which, grep { not -d
$_ and -x
$_ } $prg);
5224 return wantarray ?
@which : $which[0];
5228 my ($regexp,$shell,%fakename);
5232 # $pid = pid to see if (grand)*parent is a shell
5234 # $shellpath = path to shell - undef if no shell found
5236 ::debug
("init","Parent of $pid\n");
5238 # All shells known to mankind
5240 # ash bash csh dash fdsh fish fizsh ksh ksh93 mksh pdksh
5241 # posh rbash rc rush rzsh sash sh static-sh tcsh yash zsh
5243 my @shells = (qw(ash bash bsd-csh csh dash fdsh fish fizsh
5244 ksh ksh93 lksh mksh pdksh posh rbash rc rush rzsh sash sh
5245 static-sh tcsh yash zsh -sh -csh -bash),
5246 '-sh (sh)' # sh on FreeBSD
5248 # Can be formatted as:
5249 # [sh] -sh sh busybox sh -sh (sh)
5250 # /bin/sh /sbin/sh /opt/csw/sh
5251 # But not: foo.sh sshd crash flush pdflush scosh fsflush ssh
5252 $shell = "(?:".join("|",map { "\Q$_\E" } @shells).")";
5253 $regexp = '^((\[)(-?)('. $shell. ')(\])|(|\S+/|busybox )'.
5254 '(-?)('. $shell. '))( *$| [^(])';
5256 # sh disguises itself as -sh (sh) on FreeBSD
5257 "-sh (sh)" => ["sh"],
5258 # csh and tcsh disguise themselves as -sh/-csh
5259 # E.g.: ssh -tt csh@lo 'ps aux;true' |egrep ^csh
5260 # but sh also disguise itself as -sh
5261 # (TODO When does that happen?)
5263 "-csh" => ["tcsh", "csh"],
5264 # ash disguises itself as -ash
5265 "-ash" => ["ash", "dash", "sh"],
5266 # dash disguises itself as -dash
5267 "-dash" => ["dash", "ash", "sh"],
5268 # bash disguises itself as -bash
5269 "-bash" => ["bash", "sh"],
5270 # ksh disguises itself as -ksh
5271 "-ksh" => ["ksh", "sh"],
5272 # zsh disguises itself as -zsh
5273 "-zsh" => ["zsh", "sh"],
5276 # if -sh or -csh try readlink /proc/$$/exe
5277 my ($children_of_ref, $parent_of_ref, $name_of_ref) = pid_table
();
5281 if($name_of_ref->{$testpid} =~ /$regexp/o) {
5282 my $shellname = $4 || $8;
5283 my $dash = $3 || $7;
5284 if($shellname eq "sh" and $dash) {
5286 if($shellpath = readlink "/proc/$testpid/exe") {
5287 ::debug
("init","procpath $shellpath\n");
5288 if($shellpath =~ m
:/$shell$:o
) {
5289 ::debug
("init", "proc which ".$shellpath." => ");
5294 ::debug
("init", "which ".$shellname." => ");
5295 $shellpath = (which
($shellname,@
{$fakename{$shellname}}))[0];
5296 ::debug
("init", "shell path $shellpath\n");
5297 $shellpath and last;
5299 if($testpid == $parent_of_ref->{$testpid}) {
5300 # In Solaris zones, the PPID of the zsched process is itself
5303 $testpid = $parent_of_ref->{$testpid};
5310 my %pid_parentpid_cmd;
5314 # %children_of = { pid -> children of pid }
5315 # %parent_of = { pid -> pid of parent }
5316 # %name_of = { pid -> commandname }
5318 if(not %pid_parentpid_cmd) {
5319 # Filter for SysV-style `ps`
5320 my $sysv = q
( ps
-ef
| perl
-ane
'1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;).
5321 q(s/^.{$s}//; print "@F[1,2] $_"' );
5322 # Minix uses cols 2,3 and can have newlines in the command
5323 # so lines not having numbers in cols 2,3 must be ignored
5324 my $minix = q
( ps
-ef
| perl
-ane
'1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;).
5325 q(s/^.{$s}// and $F[2]>0 and $F[3]>0 and print "@F[2,3] $_"' );
5327 my $bsd = q
(ps
-o pid
,ppid
,command
-ax
);
5328 %pid_parentpid_cmd =
5335 'dragonfly' => $bsd,
5349 'syllable' => "echo ps not supported",
5352 $pid_parentpid_cmd{$^O
} or ::die_bug
("pid_parentpid_cmd for $^O missing");
5354 my (@pidtable,%parent_of,%children_of,%name_of);
5355 # Table with pid -> children of pid
5356 @pidtable = `$pid_parentpid_cmd{$^O}`;
5359 # must match: 24436 21224 busybox ash
5360 # must match: 24436 21224 <<empty on MacOSX running cubase>>
5361 # must match: 24436 21224 <<empty on system running Viber>>
5362 # or: perl -e 'while($0=" "){}'
5363 if(/^\s*(\S+)\s+(\S+)\s+(\S+.*)/
5365 /^\s*(\S+)\s+(\S+)\s+()$/) {
5366 $parent_of{$1} = $2;
5367 push @
{$children_of{$2}}, $1;
5370 ::die_bug
("pidtable format: $_");
5373 return(\
%children_of, \
%parent_of, \
%name_of);
5378 # Returns time since epoch as in seconds with 3 decimals
5382 # $time = time now with millisecond accuracy
5383 if(not $Global::use{"Time::HiRes"}) {
5384 if(eval "use Time::HiRes qw ( time );") {
5385 eval "sub TimeHiRestime { return Time::HiRes::time };";
5387 eval "sub TimeHiRestime { return time() };";
5389 $Global::use{"Time::HiRes"} = 1;
5392 return (int(TimeHiRestime
()*1000))/1000;
5396 # Sleep this many milliseconds.
5398 # $ms = milliseconds to sleep
5400 ::debug
("timing",int($ms),"ms ");
5401 select(undef, undef, undef, $ms/1000);
5404 sub __KILLER_REAPER__
() {}
5407 # Reap dead children.
5408 # If no dead children: Sleep specified amount with exponential backoff
5410 # $ms = milliseconds to sleep
5412 # $ms/2+0.001 if children reaped
5413 # $ms*1.1 if no children reaped
5416 if(not $Global::total_completed
% 100) {
5418 # Force cleaning the timeout queue for every 1000 jobs
5419 # Fixes potential memleak
5420 $Global::timeoutq
->process_timeouts();
5423 # Sleep exponentially shorter (1/2^n) if a job finished
5427 $Global::timeoutq
->process_timeouts();
5430 kill_youngster_if_not_enough_mem
();
5433 kill_youngest_if_over_limit
();
5436 # When a child dies, wake up from sleep (or select(,,,))
5437 $SIG{CHLD
} = sub { kill "ALRM", $$ };
5439 # --compress needs $SIG{CHLD} unset
5440 $SIG{CHLD
} = 'DEFAULT';
5442 exit_if_disk_full
();
5443 if($opt::linebuffer
) {
5444 my $something_printed = 0;
5445 if($opt::keeporder
) {
5446 for my $job (values %Global::running
) {
5447 $something_printed += $job->print_earlier_jobs();
5450 for my $job (values %Global::running
) {
5451 $something_printed += $job->print();
5454 if($something_printed) {
5458 # Sleep exponentially longer (1.1^n) if a job did not finish,
5459 # though at most 1000 ms.
5460 return (($ms < 1000) ?
($ms * 1.1) : ($ms));
5464 sub kill_youngest_if_over_limit
() {
5465 # Check each $sshlogin we are over limit
5466 # If over limit: kill off the youngest child
5467 # Put the child back in the queue.
5473 for my $job (values %Global::running
) {
5474 if(not $jobs_of{$job->sshlogin()}) {
5475 push @sshlogins, $job->sshlogin();
5477 push @
{$jobs_of{$job->sshlogin()}}, $job;
5479 for my $sshlogin (@sshlogins) {
5480 for my $job (sort { $b->seq() <=> $a->seq() } @
{$jobs_of{$sshlogin}}) {
5481 if($sshlogin->limit() == 2) {
5489 sub kill_youngster_if_not_enough_mem
() {
5490 # Check each $sshlogin if there is enough mem.
5491 # If less than 50% enough free mem: kill off the youngest child
5492 # Put the child back in the queue.
5498 for my $job (values %Global::running
) {
5499 if(not $jobs_of{$job->sshlogin()}) {
5500 push @sshlogins, $job->sshlogin();
5502 push @
{$jobs_of{$job->sshlogin()}}, $job;
5504 for my $sshlogin (@sshlogins) {
5505 for my $job (sort { $b->seq() <=> $a->seq() } @
{$jobs_of{$sshlogin}}) {
5506 if($sshlogin->memfree() < $opt::memfree
* 0.5) {
5507 ::debug
("mem","\n",map { $_->seq()." " }
5508 (sort { $b->seq() <=> $a->seq() }
5509 @
{$jobs_of{$sshlogin}}));
5510 ::debug
("mem","\n", $job->seq(), "killed ",
5511 $sshlogin->memfree()," < ",$opt::memfree
* 0.5);
5513 $sshlogin->memfree_recompute();
5518 ::debug
("mem","Free mem OK ",
5519 $sshlogin->memfree()," > ",$opt::memfree
* 0.5);
5524 sub __DEBUGGING__
() {}
5532 $Global::debug
or return;
5533 @_ = grep { defined $_ ?
$_ : "" } @_;
5534 if($Global::debug
eq "all" or $Global::debug
eq $_[0]) {
5535 if($Global::fd
{1}) {
5536 # Original stdout was saved
5537 my $stdout = $Global::fd
{1};
5538 print $stdout @_[1..$#_];
5545 sub my_memory_usage
() {
5547 # memory usage if found
5554 if(-e
"/proc/$pid/stat") {
5555 my $fh = FileHandle
->new("</proc/$pid/stat");
5561 my @procinfo = split(/\s+/,$data);
5563 return undef_as_zero
($procinfo[22]);
5571 # $size = size of object if Devel::Size is installed
5573 my @size_this = (@_);
5574 eval "use Devel::Size qw(size total_size)";
5578 return total_size(@_);
5584 # ascii expression of object if Data::Dump(er) is installed
5585 # error code otherwise
5586 my @dump_this = (@_);
5587 eval "use Data
::Dump
qw(dump);";
5589 # Data::Dump not installed
5590 eval "use Data
::Dumper
;";
5592 my $err = "Neither Data
::Dump nor Data
::Dumper is installed
\n".
5593 "Not dumping output
\n";
5597 return Dumper(@dump_this);
5600 # Create a dummy Data::Dump:dump as Hans Schou sometimes has
5602 eval "sub Data
::Dump
:dump {}";
5603 eval "use Data
::Dump
qw(dump);";
5604 return (Data::Dump::dump(@dump_this));
5621 sub __OBJECT_ORIENTED_PARTS__() {}
5628 my $sshlogin_string = shift;
5631 # SSHLogins can have these formats:
5632 # @grp+grp/ncpu//usr/bin/ssh user@server
5633 # ncpu//usr/bin/ssh user@server
5634 # /usr/bin/ssh user@server
5637 # @grp+grp/user@server
5638 if($sshlogin_string =~ s:^\@([^/]+)/?::) {
5639 # Look for SSHLogin hostgroups
5640 %hostgroups = map { $_ => 1 } split(/\+/, $1);
5642 # An SSHLogin is always in the hostgroup of its "numcpu
/host
"
5643 $hostgroups{$sshlogin_string} = 1;
5644 if ($sshlogin_string =~ s:^(\d+)/::) {
5645 # Override default autodetected ncpus unless missing
5648 my $string = $sshlogin_string;
5649 # An SSHLogin is always in the hostgroup of its $string-name
5650 $hostgroups{$string} = 1;
5651 @Global::hostgroups{keys %hostgroups} = values %hostgroups;
5653 my $no_slash_string = $string;
5654 $no_slash_string =~ s/[^-a-z0-9:]/_/gi;
5656 'string' => $string,
5657 'jobs_running' => 0,
5658 'jobs_completed' => 0,
5659 'maxlength' => undef,
5660 'max_jobs_running' => undef,
5661 'orig_max_jobs_running' => undef,
5663 'hostgroups' => \%hostgroups,
5664 'sshcommand' => undef,
5665 'serverlogin' => undef,
5666 'control_path_dir' => undef,
5667 'control_path' => undef,
5668 'time_to_login' => undef,
5669 'last_login_at' => undef,
5670 'loadavg_file' => $Global::cache_dir . "/tmp/sshlogin
/" .
5671 $no_slash_string . "/loadavg
",
5673 'last_loadavg_update' => 0,
5674 'swap_activity_file' => $Global::cache_dir . "/tmp/sshlogin
/" .
5675 $no_slash_string . "/swap_activity
",
5676 'swap_activity' => undef,
5677 }, ref($class) || $class;
5682 # Remove temporary files if they are created.
5683 ::rm($self->{'loadavg_file'});
5684 ::rm($self->{'swap_activity_file'});
5689 return $self->{'string'};
5692 sub jobs_running($) {
5694 return ($self->{'jobs_running'} || "0");
5697 sub inc_jobs_running($) {
5699 $self->{'jobs_running'}++;
5702 sub dec_jobs_running($) {
5704 $self->{'jobs_running'}--;
5707 sub set_maxlength($$) {
5709 $self->{'maxlength'} = shift;
5714 return $self->{'maxlength'};
5717 sub jobs_completed() {
5719 return $self->{'jobs_completed'};
5722 sub in_hostgroups() {
5724 # @hostgroups = the hostgroups to look for
5726 # true if intersection of @hostgroups and the hostgroups of this
5727 # SSHLogin is non-empty
5729 return grep { defined $self->{'hostgroups'}{$_} } @_;
5734 return keys %{$self->{'hostgroups'}};
5737 sub inc_jobs_completed($) {
5739 $self->{'jobs_completed'}++;
5740 $Global::total_completed++;
5743 sub set_max_jobs_running($$) {
5745 if(defined $self->{'max_jobs_running'}) {
5746 $Global::max_jobs_running -= $self->{'max_jobs_running'};
5748 $self->{'max_jobs_running'} = shift;
5749 if(defined $self->{'max_jobs_running'}) {
5750 # max_jobs_running could be resat if -j is a changed file
5751 $Global::max_jobs_running += $self->{'max_jobs_running'};
5753 # Initialize orig to the first non-zero value that comes around
5754 $self->{'orig_max_jobs_running'} ||= $self->{'max_jobs_running'};
5761 $self->memfree_recompute();
5762 # Return 1 if not defined.
5763 return (not defined $self->{'memfree'} or $self->{'memfree'})
5766 sub memfree_recompute() {
5768 my $script = memfreescript();
5770 # TODO add sshlogin and backgrounding
5771 # Run the script twice if it gives 0 (typically intermittent error)
5772 $self->{'memfree'} = ::qqx($script) || ::qqx($script);
5773 if(not $self->{'memfree'}) {
5774 ::die_bug("Less than
1 byte free
");
5776 #::debug("mem
","New free
:",$self->{'memfree'}," ");
5782 sub memfreescript() {
5784 # shellscript for giving available memory in bytes
5793 q[ print 1024 * qx{ ].
5794 q[ awk '/^((Swap)?Cached|MemFree|Buffers):/ ].
5795 q[ { sum += \$2} END { print sum }' ].
5796 q[ /proc/meminfo } ],
5797 # Android uses same code as GNU/Linux
5799 q[ print 1024 * qx{ ].
5800 q[ awk '/^((Swap)?Cached|MemFree|Buffers):/ ].
5801 q[ { sum += \$2} END { print sum }' ].
5802 q[ /proc/meminfo } ],
5805 # procs memory page faults cpu
5806 # r b w avm free re at pi po fr de sr in sy cs us sy id
5807 # 1 0 0 242793 389737 5 1 0 0 0 0 0 107 978 60 1 1 99
5809 q[ print (((reverse `vmstat 1 1`)[0] ].
5810 q[ =~ /(?:\d+\D+){4}(\d+)/)[0]*1024) ],
5812 # kthr memory page disk faults cpu
5813 # r b w swap free re mf pi po fr de sr s3 s4 -- -- in sy cs us sy id
5814 # 0 0 0 6496720 5170320 68 260 8 2 1 0 0 -0 3 0 0 309 1371 255 1 2 97
5815 # 0 0 0 6434088 5072656 7 15 8 0 0 0 0 0 261 0 0 1889 1899 3222 0 8 92
5817 # The second free value is correct
5819 q[ print (((reverse `vmstat 1 2`)[0] ].
5820 q[ =~ /(?:\d+\D+){4}(\d+)/)[0]*1024) ],
5822 for(qx{/sbin/sysctl -a}) {
5823 if (/^([^:]+):\s+(.+)\s*$/s) {
5827 print $sysctl->{"hw.pagesize"} *
5828 ($sysctl->{"vm.stats.vm.v_cache_count"}
5829 + $sysctl->{"vm.stats.vm.v_inactive_count"}
5830 + $sysctl->{"vm.stats.vm.v_free_count"});
5832 # Mach Virtual Memory Statistics: (page size of 4096 bytes)
5833 # Pages free: 198061.
5834 # Pages active: 159701.
5835 # Pages inactive: 47378.
5836 # Pages speculative: 29707.
5837 # Pages wired down: 89231.
5838 # "Translation faults": 928901425.
5839 # Pages copy-on-write: 156988239.
5840 # Pages zero filled: 271267894.
5841 # Pages reactivated: 48895.
5844 # Object cache: 6603 hits of 1713223 lookups (0% hit rate)
5847 print (($vm =~ /page size of (\d+)/)[0] *
5848 (($vm =~ /Pages free:\s+(\d+)/)[0] +
5849 ($vm =~ /Pages inactive:\s+(\d+)/)[0]));
5852 my $perlscript = "";
5853 # Make a perl script that detects the OS ($^O) and runs
5854 # the appropriate command
5855 for my $os (keys %script_of) {
5856 $perlscript .= 'if($^O eq "'.$os.'") { '.$script_of{$os}.'}';
5858 $perlscript =~ s/[\t\n ]+/ /g;
5859 $script = "perl -e " . ::Q
($perlscript);
5867 # 0 = Below limit. Start another job.
5868 # 1 = Over limit. Start no jobs.
5869 # 2 = Kill youngest job
5872 if(not defined $self->{'limitscript'}) {
5878 # Do the measurement in the background
5880 LANG
=C iostat
-x
1 2 > $tmp;
5882 perl
-e
'-e $ARGV[0] or exit(1);
5885 /(\S+)$/ and $max = $max > $1 ? $max : $1; }
5886 exit ($max < '$limit')' $io_file;
5894 awk
'/^((Swap)?Cached|MemFree|Buffers):/{ sum += $2}
5896 if (sum*1024 < '$limit'/2) { exit 2; }
5897 else { exit (sum*1024 < '$limit') }
5906 ps ax
-o
state,command
|
5907 grep -E
'^[DOR].[^[]' |
5909 perl
-ne 'exit ('$limit' < $_)';
5915 my ($cmd,@args) = split /\s+/,$opt::limit
;
5916 if($limitscripts{$cmd}) {
5917 my $tmpfile = ::tmpname
("parlmt");
5918 ++$Global::unlink{$tmpfile};
5919 $self->{'limitscript'} =
5920 ::spacefree
(1, sprintf($limitscripts{$cmd},
5921 ::multiply_binary_prefix
(@args),$tmpfile));
5923 $self->{'limitscript'} = $opt::limit
;
5929 $ENV{'SSHLOGIN'} = $self->string();
5930 system($Global::shell
,"-c",$self->{'limitscript'});
5931 ::debug
("limit","limit `".$self->{'limitscript'}."` result ".($?
>>8)."\n");
5938 my $swapping = $self->swap_activity();
5939 return (not defined $swapping or $swapping)
5942 sub swap_activity
($) {
5943 # If the currently known swap activity is too old:
5944 # Recompute a new one in the background
5946 # last swap activity computed
5948 # Should we update the swap_activity file?
5949 my $update_swap_activity_file = 0;
5950 if(-r
$self->{'swap_activity_file'}) {
5951 open(my $swap_fh, "<", $self->{'swap_activity_file'}) ||
5952 ::die_bug
("swap_activity_file-r");
5953 my $swap_out = <$swap_fh>;
5955 if($swap_out =~ /^(\d+)$/) {
5956 $self->{'swap_activity'} = $1;
5957 ::debug
("swap", "New swap_activity: ", $self->{'swap_activity'});
5959 ::debug
("swap", "Last update: ", $self->{'last_swap_activity_update'});
5960 if(time - $self->{'last_swap_activity_update'} > 10) {
5961 # last swap activity update was started 10 seconds ago
5962 ::debug
("swap", "Older than 10 sec: ", $self->{'swap_activity_file'});
5963 $update_swap_activity_file = 1;
5966 ::debug
("swap", "No swap_activity file: ", $self->{'swap_activity_file'});
5967 $self->{'swap_activity'} = undef;
5968 $update_swap_activity_file = 1;
5970 if($update_swap_activity_file) {
5971 ::debug
("swap", "Updating swap_activity file ", $self->{'swap_activity_file'});
5972 $self->{'last_swap_activity_update'} = time;
5973 my $dir = ::dirname
($self->{'swap_activity_file'});
5974 -d
$dir or eval { File
::Path
::mkpath
($dir); };
5976 $swap_activity = swapactivityscript
();
5977 if($self->{'string'} ne ":") {
5978 $swap_activity = $self->sshcommand() . " " . $self->serverlogin() . " " .
5979 ::Q
($swap_activity);
5981 # Run swap_activity measuring.
5982 # As the command can take long to run if run remote
5983 # save it to a tmp file before moving it to the correct file
5984 my $file = $self->{'swap_activity_file'};
5985 my ($dummy_fh, $tmpfile) = ::tmpfile
(SUFFIX
=> ".swp");
5986 ::debug
("swap", "\n", $swap_activity, "\n");
5987 ::qqx
("($swap_activity > $tmpfile && mv $tmpfile $file || rm $tmpfile &)");
5989 return $self->{'swap_activity'};
5995 sub swapactivityscript
() {
5997 # shellscript for detecting swap activity
5999 # arguments for vmstat are OS dependant
6000 # swap_in and swap_out are in different columns depending on OS
6006 # procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
6007 # r b swpd free buff cache si so bi bo in cs us sy id wa
6008 # 5 0 51208 1701096 198012 18857888 0 0 37 153 28 19 56 11 33 1
6009 # 3 0 51208 1701288 198012 18857972 0 0 0 0 3638 10412 15 3 82 0
6010 'linux' => ['vmstat 1 2 | tail -n1', '$7*$8'],
6014 # kthr memory page disk faults cpu
6015 # r b w swap free si so pi po fr de sr s3 s4 -- -- in sy cs us sy id
6016 # 0 0 0 4628952 3208408 0 0 3 1 1 0 0 -0 2 0 0 263 613 246 1 2 97
6017 # 0 0 0 4552504 3166360 0 0 0 0 0 0 0 0 0 0 0 246 213 240 1 1 98
6018 'solaris' => ['vmstat -S 1 2 | tail -1', '$6*$7'],
6020 # darwin (macosx): $21*$22
6022 # Mach Virtual Memory Statistics: (page size of 4096 bytes)
6023 # free active specul inactive throttle wired prgable faults copy 0fill reactive purged file-backed anonymous cmprssed cmprssor dcomprs comprs pageins pageout swapins swapouts
6024 # 346306 829050 74871 606027 0 240231 90367 544858K 62343596 270837K 14178 415070 570102 939846 356 370 116 922 4019813 4 0 0
6025 # 345740 830383 74875 606031 0 239234 90369 2696 359 553 0 0 570110 941179 356 370 0 0 0 0 0 0
6026 'darwin' => ['vm_stat -c 2 1 | tail -n1', '$21*$22'],
6030 # procs faults cpu memory page disk
6031 # r b w in sy cs us sy id avm fre si so pi po fr de sr s0
6032 # 1 0 0 4 23 2 3 0 97 7743 217k 0 0 0 0 0 0 0 0
6033 # 1 0 0 6 40 8 0 1 99 7743 217k 0 0 3 0 0 0 0 0
6034 'ultrix' => ['vmstat -S 1 2 | tail -1', '$12*$13'],
6038 # System configuration: lcpu=1 mem=2048MB
6040 # kthr memory page faults cpu
6041 # ----- ----------- ------------------------ ------------ -----------
6042 # r b avm fre re pi po fr sr cy in sy cs us sy id wa
6043 # 0 0 333933 241803 0 0 0 0 0 0 10 143 90 0 0 99 0
6044 # 0 0 334125 241569 0 0 0 0 0 0 37 5368 184 0 9 86 5
6045 'aix' => ['vmstat 1 2 | tail -n1', '$6*$7'],
6049 # procs memory page disks faults cpu
6050 # r b w avm fre flt re pi po fr sr ad0 ad1 in sy cs us sy id
6051 # 1 0 0 596716 19560 32 0 0 0 33 8 0 0 11 220 277 0 0 99
6052 # 0 0 0 596716 19560 2 0 0 0 0 0 0 0 11 144 263 0 1 99
6053 'freebsd' => ['vmstat -H 1 2 | tail -n1', '$8*$9'],
6057 # procs memory page disks traps cpu
6058 # r b w avm fre flt re pi po fr sr wd0 cd0 int sys cs us sy id
6059 # 0 0 0 25776 164968 34 0 0 0 0 0 0 0 230 259 38 4 0 96
6060 # 0 0 0 25776 164968 24 0 0 0 0 0 0 0 237 275 37 0 0 100
6061 'mirbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'],
6065 # procs memory page disks faults cpu
6066 # r b avm fre flt re pi po fr sr w0 w1 in sy cs us sy id
6067 # 0 0 138452 6012 54 0 0 0 1 2 3 0 4 100 23 0 0 100
6068 # 0 0 138456 6008 1 0 0 0 0 0 0 0 7 26 19 0 0 100
6069 'netbsd' => ['vmstat 1 2 | tail -n1', '$7*$8'],
6073 # procs memory page disks traps cpu
6074 # r b w avm fre flt re pi po fr sr wd0 wd1 int sys cs us sy id
6075 # 0 0 0 76596 109944 73 0 0 0 0 0 0 1 5 259 22 0 1 99
6076 # 0 0 0 76604 109936 24 0 0 0 0 0 0 0 7 114 20 0 1 99
6077 'openbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'],
6081 # procs memory page faults cpu
6082 # r b w avm free re at pi po fr de sr in sy cs us sy id
6083 # 1 0 0 247211 216476 4 1 0 0 0 0 0 102 73005 54 6 11 83
6084 # 1 0 0 247211 216421 43 9 0 0 0 0 0 144 1675 96 25269512791222387000 25269512791222387000 105
6085 'hpux' => ['vmstat 1 2 | tail -n1', '$8*$9'],
6087 # dec_osf (tru64): $11*$12
6089 # Virtual Memory Statistics: (pagesize = 8192)
6090 # procs memory pages intr cpu
6091 # r w u act free wire fault cow zero react pin pout in sy cs us sy id
6092 # 3 181 36 51K 1895 8696 348M 59M 122M 259 79M 0 5 218 302 4 1 94
6093 # 3 181 36 51K 1893 8696 3 15 21 0 28 0 4 81 321 1 1 98
6094 'dec_osf' => ['vmstat 1 2 | tail -n1', '$11*$12'],
6098 # (pagesize: 4, size: 512288, swap size: 894972)
6099 # free actv inact wired zeroed react pgins pgouts pfaults cowpfs hrat caobj cache swfree
6100 # 371940 30844 89228 20276 298348 0 48192 19016 756105 99808 98% 876 20628 894972
6101 # 371940 30844 89228 20276 +0 +0 +0 +0 +42 +2 98% 876 20628 894972
6102 'gnu' => ['vmstat -k 1 2 | tail -n1', '$7*$8'],
6104 # -nto (qnx has no swap)
6108 my $perlscript = "";
6109 # Make a perl script that detects the OS ($^O) and runs
6110 # the appropriate vmstat command
6111 for my $os (keys %vmstat) {
6112 $vmstat{$os}[1] =~ s/\$/\\\\\\\$/g; # $ => \\\$
6113 $perlscript .= 'if($^O eq "'.$os.'") { print `'.$vmstat{$os}[0].' | awk "{print ' .
6114 $vmstat{$os}[1] . '}"` }';
6116 $script = "perl -e " . ::Q
($perlscript);
6122 sub too_fast_remote_login
($) {
6124 if($self->{'last_login_at'} and $self->{'time_to_login'}) {
6125 # sshd normally allows 10 simultaneous logins
6126 # A login takes time_to_login
6127 # So time_to_login/5 should be safe
6128 # If now <= last_login + time_to_login/5: Then it is too soon.
6129 my $too_fast = (::now
() <= $self->{'last_login_at'}
6130 + $self->{'time_to_login'}/5);
6131 ::debug
("run", "Too fast? $too_fast ");
6134 # No logins so far (or time_to_login not computed): it is not too fast
6139 sub last_login_at
($) {
6141 return $self->{'last_login_at'};
6144 sub set_last_login_at
($$) {
6146 $self->{'last_login_at'} = shift;
6149 sub loadavg_too_high
($) {
6151 my $loadavg = $self->loadavg();
6152 if(defined $loadavg) {
6153 ::debug
("load", "Load $loadavg > ",$self->max_loadavg());
6154 return $loadavg >= $self->max_loadavg();
6156 # Unknown load: Assume load is too high
6165 # aix => "ps -ae -o state,command" # state wrong
6166 # bsd => "ps ax -o state,command"
6167 # sysv => "ps -ef -o s -o comm"
6168 # cygwin => perl -ne 'close STDERR; /Name/ and print"\n"; \
6169 # /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status |
6170 # awk '{print $2,$1}'
6175 # hpux => ps -el|awk '{print $2,$14,$15}'
6176 # irix => ps -ef -o state -o comm
6178 # minix => ps el|awk '{print \$1,\$11}'
6184 # ultrix => ps -ax | awk '{print $3,$5}'
6185 # unixware => ps -el|awk '{print $2,$14,$15}'
6186 my $ps = ::spacefree
(1,q{
6187 $sysv="ps -ef -o s -o comm";
6188 $sysv2="ps -ef -o state -o comm";
6189 $bsd="ps ax -o state,command";
6190 # Treat threads as processes
6191 $bsd2="ps axH -o state,command";
6192 $psel="ps -el|awk '{ print \$2,\$14,\$15 }'";
6193 $cygwin=q{ perl -ne 'close STDERR; /Name/ and print"\n";
6194 /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status |
6195 awk '{print $2,$1}' };
6196 $dummy="echo S COMMAND;echo R dummy";
6198 # TODO Find better code for AIX/Android
6200 'android' => "uptime",
6201 'cygwin' => $cygwin,
6203 'dec_osf' => $sysv2,
6204 'dragonfly' => $bsd,
6210 'minix' => "ps el|awk '{print \$1,\$11}'",
6218 'ultrix' => "ps -ax | awk '{print \$3,\$5}'",
6223 # The command is too long for csh, so base64_wrap the command
6224 $cmd = Job::base64_wrap($ps);
6232 # If the currently know loadavg is too old:
6233 # Recompute a new one in the background
6234 # The load average is computed as the number of processes waiting for disk
6235 # or CPU right now. So it is the server load this instant and not averaged over
6236 # several minutes. This is needed so GNU Parallel will at most start one job
6237 # that will push the load over the limit.
6240 # $last_loadavg = last load average computed (undef if none)
6242 # Should we update the loadavg file?
6243 my $update_loadavg_file = 0;
6244 if(open(my $load_fh, "<", $self->{'loadavg_file'})) {
6245 local $/; # $/ = undef => slurp whole file
6246 my $load_out = <$load_fh>;
6248 if($load_out =~ /\S/) {
6249 # Content can be empty if ~/ is on NFS
6250 # due to reading being non-atomic.
6252 # Count lines starting with D,O,R but command does not start with [
6253 my $load =()= ($load_out=~/(^\s?[DOR]\S* +(?=[^\[])\S)/gm);
6255 # load is overestimated by 1
6256 $self->{'loadavg'} = $load - 1;
6257 ::debug("load", "New loadavg: ", $self->{'loadavg'},"\n");
6258 } elsif ($load_out=~/average: (\d+.\d+)/) {
6259 # AIX does not support instant load average
6260 # 04:11AM up 21 days, 12:55, 1 user, load average: 1.85, 1.57, 1.55
6261 $self->{'loadavg'} = $1;
6263 ::die_bug("loadavg_invalid_content: " .
6264 $self->{'loadavg_file'} . "\n$load_out");
6267 $update_loadavg_file = 1;
6269 ::debug("load", "No loadavg file: ", $self->{'loadavg_file'});
6270 $self->{'loadavg'} = undef;
6271 $update_loadavg_file = 1;
6273 if($update_loadavg_file) {
6274 ::debug("load", "Updating loadavg file", $self->{'loadavg_file'}, "\n");
6275 $self->{'last_loadavg_update'} = time;
6276 my $dir = ::dirname($self->{'swap_activity_file'});
6277 -d $dir or eval { File::Path::mkpath($dir); };
6278 -w $dir or ::die_bug("Cannot write to $dir");
6280 if($self->{'string'} ne ":") {
6281 $cmd = $self->sshcommand() . " " . $self->serverlogin() . " " .
6284 $cmd .= loadavg_cmd();
6286 # As the command can take long to run if run remote
6287 # save it to a tmp file before moving it to the correct file
6288 ::debug("load", "Update load\n");
6289 my $file = $self->{'loadavg_file'};
6290 # tmpfile on same filesystem as $file
6291 my $tmpfile = $file.$$;
6292 ::qqx("($cmd > $tmpfile && mv $tmpfile $file || rm $tmpfile & )");
6294 return $self->{'loadavg'};
6297 sub max_loadavg($) {
6299 # If --load is a file it might be changed
6300 if($Global::max_load_file) {
6301 my $mtime = (stat($Global::max_load_file))[9];
6302 if($mtime > $Global::max_load_file_last_mod) {
6303 $Global::max_load_file_last_mod = $mtime;
6304 for my $sshlogin (values %Global::host) {
6305 $sshlogin->set_max_loadavg(undef);
6309 if(not defined $self->{'max_loadavg'}) {
6310 $self->{'max_loadavg'} =
6311 $self->compute_max_loadavg($opt::load);
6313 ::debug("load", "max_loadavg: ", $self->string(), " ", $self->{'max_loadavg'});
6314 return $self->{'max_loadavg'};
6317 sub set_max_loadavg($$) {
6319 $self->{'max_loadavg'} = shift;
6322 sub compute_max_loadavg($) {
6323 # Parse the max loadaverage that the user asked for using --load
6327 my $loadspec = shift;
6329 if(defined $loadspec) {
6330 if($loadspec =~ /^\+(\d+)$/) {
6334 $self->ncpus() + $j;
6335 } elsif ($loadspec =~ /^-(\d+)$/) {
6339 $self->ncpus() - $j;
6340 } elsif ($loadspec =~ /^(\d+)\%$/) {
6343 $self->ncpus() * $j / 100;
6344 } elsif ($loadspec =~ /^(\d+(\.\d+)?)$/) {
6346 } elsif (-f $loadspec) {
6347 $Global::max_load_file = $loadspec;
6348 $Global::max_load_file_last_mod = (stat($Global::max_load_file))[9];
6349 if(open(my $in_fh, "<", $Global::max_load_file)) {
6350 my $opt_load_file = join("",<$in_fh>);
6352 $load = $self->compute_max_loadavg($opt_load_file);
6354 ::error("Cannot open $loadspec.");
6355 ::wait_and_exit(255);
6358 ::error("Parsing of --load failed.");
6368 sub time_to_login($) {
6370 return $self->{'time_to_login'};
6373 sub set_time_to_login($$) {
6375 $self->{'time_to_login'} = shift;
6378 sub max_jobs_running($) {
6380 if(not defined $self->{'max_jobs_running'}) {
6381 my $nproc = $self->compute_number_of_processes($opt::jobs);
6382 $self->set_max_jobs_running($nproc);
6384 return $self->{'max_jobs_running'};
6387 sub orig_max_jobs_running($) {
6389 return $self->{'orig_max_jobs_running'};
6392 sub compute_number_of_processes($) {
6393 # Number of processes wanted and limited by system resources
6395 # Number of processes
6398 my $wanted_processes = $self->user_requested_processes($opt_P);
6399 if(not defined $wanted_processes) {
6400 $wanted_processes = $Global::default_simultaneous_sshlogins;
6402 ::debug("load", "Wanted procs: $wanted_processes\n");
6404 $self->processes_available_by_system_limit($wanted_processes);
6405 ::debug("load", "Limited to procs: $system_limit\n");
6406 return $system_limit;
6411 my $max_system_proc_reached;
6412 my $more_filehandles;
6415 my $count_jobs_already_read;
6421 sub reserve_filehandles($) {
6422 # Reserves filehandle
6425 $more_filehandles &&= open($fh{$tmpfhname++}, "<", "/dev/null");
6429 sub reserve_process() {
6430 # Spawn a dummy process
6432 if($child = fork()) {
6433 push @children, $child;
6434 $Global::unkilled_children{$child} = 1;
6435 } elsif(defined $child) {
6437 # The child takes one process slot
6438 # It will be killed later
6439 $SIG{'TERM'} = $Global::original_sig{'TERM'};
6440 if($^O eq "cygwin" or $^O eq "msys" or $^O eq "nto") {
6441 # The exec does not work on Cygwin and QNX
6444 # 'exec sleep' takes less RAM than sleeping in perl
6445 exec 'sleep', 10101;
6450 $max_system_proc_reached = 1;
6454 sub get_args_or_jobs() {
6455 # Get an arg or a job (depending on mode)
6456 if($Global::semaphore or ($opt::pipe and not $opt::tee)) {
6457 # Skip: No need to get args
6459 } elsif(defined $opt::retries and $count_jobs_already_read) {
6460 # For retries we may need to run all jobs on this sshlogin
6461 # so include the already read jobs for this sshlogin
6462 $count_jobs_already_read--;
6465 if($opt::X or $opt::m) {
6466 # The arguments may have to be re-spread over several jobslots
6467 # So pessimistically only read one arg per jobslot
6468 # instead of a full commandline
6469 if($Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->empty()) {
6470 if($Global::JobQueue->empty()) {
6473 $job = $Global::JobQueue->get();
6478 $arg = $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get();
6483 # If there are no more command lines, then we have a process
6484 # per command line, so no need to go further
6485 if($Global::JobQueue->empty()) {
6488 $job = $Global::JobQueue->get();
6489 # Replacement must happen here due to seq()
6490 $job and $job->replaced();
6499 # Cleanup: Close the files
6500 for (values %fh) { close $_ }
6501 # Cleanup: Kill the children
6502 for my $pid (@children) {
6505 delete $Global::unkilled_children{$pid};
6507 # Cleanup: Unget the command_lines or the @args
6508 $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->unget(@args);
6510 $Global::JobQueue->unget(@jobs);
6514 sub processes_available_by_system_limit($) {
6515 # If the wanted number of processes is bigger than the system limits:
6516 # Limit them to the system limits
6517 # Limits are: File handles, number of input lines, processes,
6518 # and taking > 1 second to spawn 10 extra processes
6520 # Number of processes
6522 my $wanted_processes = shift;
6523 my $system_limit = 0;
6524 my $slow_spawning_warning_printed = 0;
6526 $more_filehandles = 1;
6527 $tmpfhname = "TmpFhNamE";
6529 # perl uses 7 filehandles for something?
6530 # parallel uses 1 for memory_usage
6531 # parallel uses 4 for ?
6532 reserve_filehandles(12);
6533 # Two processes for load avg and ?
6537 # For --retries count also jobs already run
6538 $count_jobs_already_read = $Global::JobQueue->next_seq();
6539 my $wait_time_for_getting_args = 0;
6540 my $start_time = time;
6542 $system_limit >= $wanted_processes and last;
6543 not $more_filehandles and last;
6544 $max_system_proc_reached and last;
6546 my $before_getting_arg = time;
6547 if(!$Global::dummy_jobs) {
6548 get_args_or_jobs() or last;
6550 $wait_time_for_getting_args += time - $before_getting_arg;
6553 # Every simultaneous process uses 2 filehandles to write to
6554 # and 2 filehandles to read from
6555 reserve_filehandles(4);
6557 # System process limit
6560 my $forktime = time - $time - $wait_time_for_getting_args;
6561 ::debug("run", "Time to fork $system_limit procs: $wait_time_for_getting_args ",
6563 " (processes so far: ", $system_limit,")\n");
6564 if($system_limit > 10 and
6566 $forktime > $system_limit * 0.01
6567 and not $slow_spawning_warning_printed) {
6568 # It took more than 0.01 second to fork a processes on avg.
6569 # Give the user a warning. He can press Ctrl-C if this
6571 ::warning("Starting $system_limit processes took > $forktime sec.",
6572 "Consider adjusting -j. Press CTRL-C to stop.");
6573 $slow_spawning_warning_printed = 1;
6578 if($system_limit < $wanted_processes) {
6579 # The system_limit is less than the wanted_processes
6580 if($system_limit < 1 and not $Global::JobQueue->empty()) {
6581 ::warning("Cannot spawn any jobs. ".
6582 "Raising ulimit -u or 'nproc' in /etc/security/limits.conf",
6583 "or /proc/sys/kernel/pid_max may help.");
6584 ::wait_and_exit(255);
6586 if(not $more_filehandles) {
6587 ::warning("Only enough file handles to run ".
6588 $system_limit. " jobs in parallel.",
6589 "Running 'parallel -j0 -N $system_limit --pipe parallel -j0' or",
6590 "raising 'ulimit -n' or 'nofile' in /etc/security/limits.conf",
6591 "or /proc/sys/fs/file-max may help.");
6593 if($max_system_proc_reached) {
6594 ::warning("Only enough available processes to run ".
6595 $system_limit. " jobs in parallel.",
6596 "Raising ulimit -u or /etc/security/limits.conf ",
6597 "or /proc/sys/kernel/pid_max may help.");
6600 if($] == 5.008008 and $system_limit > 1000) {
6601 # https://savannah.gnu.org/bugs/?36942
6602 $system_limit = 1000;
6604 if($Global::JobQueue->empty()) {
6605 $system_limit ||= 1;
6607 if($self->string() ne ":" and
6608 $system_limit > $Global::default_simultaneous_sshlogins) {
6610 $self->simultaneous_sshlogin_limit($system_limit);
6612 return $system_limit;
6616 sub simultaneous_sshlogin_limit($) {
6617 # Test by logging in wanted number of times simultaneously
6619 # min($wanted_processes,$working_simultaneous_ssh_logins-1)
6621 my $wanted_processes = shift;
6622 if($self->{'time_to_login'}) {
6623 return $wanted_processes;
6626 # Try twice because it guesses wrong sometimes
6627 # Choose the minimal
6629 ::min($self->simultaneous_sshlogin($wanted_processes),
6630 $self->simultaneous_sshlogin($wanted_processes));
6631 if($ssh_limit < $wanted_processes) {
6632 my $serverlogin = $self->serverlogin();
6633 ::warning("ssh to $serverlogin only allows ".
6634 "for $ssh_limit simultaneous logins.",
6635 "You may raise this by changing",
6636 "/etc/ssh/sshd_config:MaxStartups and MaxSessions on $serverlogin.",
6637 "You can also try --sshdelay 0.1",
6638 "Using only ".($ssh_limit-1)." connections ".
6639 "to avoid race conditions.");
6640 # Race condition can cause problem if using all sshs.
6641 if($ssh_limit > 1) { $ssh_limit -= 1; }
6646 sub simultaneous_sshlogin($) {
6647 # Using $sshlogin try to see if we can do $wanted_processes
6648 # simultaneous logins
6649 # (ssh host echo simul-login & ssh host echo simul-login & ...) |
6652 # $wanted_processes = Try for this many logins in parallel
6654 # $ssh_limit = Number of succesful parallel logins
6657 my $wanted_processes = shift;
6658 my $sshcmd = $self->sshcommand();
6659 my $serverlogin = $self->serverlogin();
6660 my $sshdelay = $opt::sshdelay ? "sleep $opt::sshdelay;" : "";
6661 # TODO sh -c wrapper to work for csh
6662 my $cmd = ("$sshdelay$sshcmd $serverlogin -- ".
6663 "echo simultaneouslogin </dev/null 2>&1 &")x$wanted_processes;
6664 ::debug("init", "Trying $wanted_processes logins at $serverlogin\n");
6665 open (my $simul_fh, "-|", "($cmd)|grep simultaneouslogin | wc -l") or
6666 ::die_bug("simultaneouslogin");
6667 my $ssh_limit = <$simul_fh>;
6675 $self->{'ncpus'} = shift;
6678 sub user_requested_processes($) {
6679 # Parse the number of processes that the user asked for using -j
6681 # $opt_P = string formatted as for -P
6683 # $processes = the number of processes to run on this sshlogin
6687 if(defined $opt_P) {
6688 if($opt_P =~ /^\+(\d+)$/) {
6692 $self->ncpus() + $j;
6693 } elsif ($opt_P =~ /^-(\d+)$/) {
6697 $self->ncpus() - $j;
6698 } elsif ($opt_P =~ /^(\d+(\.\d+)?)\%$/) {
6702 $self->ncpus() * $j / 100;
6703 } elsif ($opt_P =~ /^(\d+)$/) {
6705 if($processes == 0) {
6706 # -P 0 = infinity (or at least close)
6707 $processes = $Global::infinity;
6709 } elsif (-f $opt_P) {
6710 $Global::max_procs_file = $opt_P;
6711 if(open(my $in_fh, "<", $Global::max_procs_file)) {
6712 my $opt_P_file = join("",<$in_fh>);
6714 $processes = $self->user_requested_processes($opt_P_file);
6716 ::error("Cannot open $opt_P.");
6717 ::wait_and_exit(255);
6720 ::error("Parsing of --jobs/-j/--max-procs/-P failed.");
6723 $processes = ::ceil($processes);
6729 # Number of CPU threads
6730 # --use_sockets_instead_of_threads = count socket instead
6731 # --use_cores_instead_of_threads = count physical cores instead
6733 # $ncpus = number of cpu (threads) on this sshlogin
6736 if(not defined $self->{'ncpus'}) {
6737 my $sshcmd = $self->sshcommand();
6738 my $serverlogin = $self->serverlogin();
6739 if($serverlogin eq ":") {
6740 if($opt::use_sockets_instead_of_threads) {
6741 $self->{'ncpus'} = socket_core_thread()->{'sockets'};
6742 } elsif($opt::use_cores_instead_of_threads) {
6743 $self->{'ncpus'} = socket_core_thread()->{'cores'};
6745 $self->{'ncpus'} = socket_core_thread()->{'threads'};
6749 if($opt::use_sockets_instead_of_threads
6751 $opt::use_cpus_instead_of_cores) {
6753 ::qqx("echo|$sshcmd $serverlogin -- parallel --number-of-sockets");
6754 } elsif($opt::use_cores_instead_of_threads) {
6756 ::qqx("echo|$sshcmd $serverlogin -- parallel --number-of-cores");
6759 ::qqx("echo|$sshcmd $serverlogin -- parallel --number-of-threads");
6762 if($ncpu =~ /^\s*[0-9]+\s*$/s) {
6763 $self->{'ncpus'} = $ncpu;
6765 ::warning("Could not figure out ".
6766 "number of cpus on $serverlogin ($ncpu). Using 1.");
6767 $self->{'ncpus'} = 1;
6771 return $self->{'ncpus'};
6777 # Number of threads using `nproc`
6778 my $no_of_threads = ::qqx("nproc");
6779 chomp $no_of_threads;
6780 return $no_of_threads;
6783 sub no_of_sockets() {
6784 return socket_core_thread()->{'sockets'};
6788 return socket_core_thread()->{'cores'};
6791 sub no_of_threads() {
6792 return socket_core_thread()->{'threads'};
6795 sub socket_core_thread() {
6798 # 'sockets' => #sockets = number of socket with CPU present
6799 # 'cores' => #cores = number of physical cores
6800 # 'threads' => #threads = number of compute cores (hyperthreading)
6801 # 'active' => #taskset_threads = number of taskset limited cores
6805 if ($^O eq 'linux') {
6806 $cpu = sct_gnu_linux();
6807 } elsif ($^O eq 'android') {
6808 $cpu = sct_android();
6809 } elsif ($^O eq 'freebsd') {
6810 $cpu = sct_freebsd();
6811 } elsif ($^O eq 'netbsd') {
6812 $cpu = sct_netbsd();
6813 } elsif ($^O eq 'openbsd') {
6814 $cpu = sct_openbsd();
6815 } elsif ($^O eq 'gnu') {
6817 } elsif ($^O eq 'darwin') {
6818 $cpu = sct_darwin();
6819 } elsif ($^O eq 'solaris') {
6820 $cpu = sct_solaris();
6821 } elsif ($^O eq 'aix') {
6823 } elsif ($^O eq 'hpux') {
6825 } elsif ($^O eq 'nto') {
6827 } elsif ($^O eq 'svr5') {
6828 $cpu = sct_openserver();
6829 } elsif ($^O eq 'irix') {
6831 } elsif ($^O eq 'dec_osf') {
6834 # Try all methods until we find something that works
6835 $cpu = (sct_gnu_linux()
6852 my $nproc = nproc();
6862 ::warning("Cannot figure out number of cpus. Using 1.");
6870 # Choose minimum of active and actual
6872 $mincpu->{'sockets'} = ::min($cpu->{'sockets'},$cpu->{'active'});
6873 $mincpu->{'cores'} = ::min($cpu->{'cores'},$cpu->{'active'});
6874 $mincpu->{'threads'} = ::min($cpu->{'threads'},$cpu->{'active'});
6878 sub sct_gnu_linux() {
6880 # { 'sockets' => #sockets
6882 # 'threads' => #threads
6883 # 'active' => #taskset_threads }
6885 local $/ = "\n"; # If delimiter is set, then $/ will be wrong
6886 if($ENV{'PARALLEL_CPUINFO'} or -e "/proc/cpuinfo") {
6887 $cpu->{'sockets'} = 0;
6888 $cpu->{'cores'} = 0;
6889 $cpu->{'threads'} = 0;
6894 if(open(my $in_fh, "<", "/proc/cpuinfo")) {
6895 @cpuinfo = <$in_fh>;
6898 if($ENV{'PARALLEL_CPUINFO'}) {
6899 # Use CPUINFO from environment - used for testing only
6900 @cpuinfo = split/(?<=\n)/,$ENV{'PARALLEL_CPUINFO'};
6903 if(/^physical id.*[:](.*)/) {
6905 if(not $phy_seen{$1}++) {
6906 $cpu->{'sockets'}++;
6909 if(/^core id.*[:](.*)/ and not $seen{$physicalid,$1}++) {
6912 /^processor.*[:]/i and $cpu->{'threads'}++;
6914 $cpu->{'sockets'} ||= 1;
6915 $cpu->{'cores'} ||= $cpu->{'threads'};
6917 if(-e "/proc/self/status" and not $ENV{'PARALLEL_CPUINFO'}) {
6918 # if 'taskset' is used to limit number of threads
6919 if(open(my $in_fh, "<", "/proc/self/status")) {
6921 if(/^Cpus_allowed:\s*(\S+)/) {
6924 $cpu->{'active'} = unpack ("%32b*", pack ("H*",$a));
6930 if(grep { /\d/ } values %$cpu) {
6939 # { 'sockets' => #sockets
6941 # 'threads' => #threads
6942 # 'active' => #taskset_threads }
6944 return sct_gnu_linux();
6949 # { 'sockets' => #sockets
6951 # 'threads' => #threads
6952 # 'active' => #taskset_threads }
6955 $cpu->{'cores'} = (::qqx(qq{ sysctl -a dev.cpu | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' })
6957 ::qqx(qq{ sysctl hw.ncpu | awk '{ print \$2 }' }));
6958 $cpu->{'cores'} and chomp $cpu->{'cores'};
6960 (::qqx(qq{ sysctl hw.ncpu | awk '{ print \$2 }' })
6962 ::qqx(qq{ sysctl -a dev.cpu | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' }));
6963 $cpu->{'threads'} and chomp $cpu->{'threads'};
6964 $cpu->{'sockets'} ||= $cpu->{'cores'};
6966 if(grep { /\d/ } values %$cpu) {
6975 # { 'sockets' => #sockets
6977 # 'threads' => #threads
6978 # 'active' => #taskset_threads }
6981 $cpu->{'cores'} = ::qqx("sysctl -n hw.ncpu");
6982 $cpu->{'cores'} and chomp $cpu->{'cores'};
6983 $cpu->{'threads'} = ::qqx("sysctl -n hw.ncpu");
6984 $cpu->{'threads'} and chomp $cpu->{'threads'};
6985 $cpu->{'sockets'} ||= $cpu->{'cores'};
6987 if(grep { /\d/ } values %$cpu) {
6996 # { 'sockets' => #sockets
6998 # 'threads' => #threads
6999 # 'active' => #taskset_threads }
7002 $cpu->{'cores'} = ::qqx('sysctl -n hw.ncpu');
7003 $cpu->{'cores'} and chomp $cpu->{'cores'};
7004 $cpu->{'threads'} = ::qqx('sysctl -n hw.ncpu');
7005 $cpu->{'threads'} and chomp $cpu->{'threads'};
7006 $cpu->{'sockets'} ||= $cpu->{'cores'};
7008 if(grep { /\d/ } values %$cpu) {
7017 # { 'sockets' => #sockets
7019 # 'threads' => #threads
7020 # 'active' => #taskset_threads }
7023 $cpu->{'cores'} = ::qqx("nproc");
7024 $cpu->{'cores'} and chomp $cpu->{'cores'};
7025 $cpu->{'threads'} = ::qqx("nproc");
7026 $cpu->{'threads'} and chomp $cpu->{'threads'};
7028 if(grep { /\d/ } values %$cpu) {
7037 # { 'sockets' => #sockets
7039 # 'threads' => #threads
7040 # 'active' => #taskset_threads }
7044 (::qqx('sysctl -n hw.physicalcpu')
7046 ::qqx(qq{ sysctl -a hw | grep [^a-z]physicalcpu[^a-z] | awk '{ print \$2 }' }));
7047 $cpu->{'cores'} and chomp $cpu->{'cores'};
7049 (::qqx('sysctl -n hw.logicalcpu')
7051 ::qqx(qq{ sysctl -a hw | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }' }));
7052 $cpu->{'threads'} and chomp $cpu->{'threads'};
7053 $cpu->{'sockets'} ||= $cpu->{'cores'};
7055 if(grep { /\d/ } values %$cpu) {
7064 # { 'sockets' => #sockets
7066 # 'threads' => #threads
7067 # 'active' => #taskset_threads }
7070 if(-x "/usr/sbin/psrinfo") {
7071 my @psrinfo = ::qqx("/usr/sbin/psrinfo");
7072 if($#psrinfo >= 0) {
7073 $cpu->{'cores'} = $#psrinfo +1;
7076 if(-x "/usr/sbin/prtconf") {
7077 my @prtconf = ::qqx("/usr/sbin/prtconf | grep cpu..instance");
7078 if($#prtconf >= 0) {
7079 $cpu->{'cores'} = $#prtconf +1;
7082 if(-x "/usr/sbin/prtconf") {
7083 my @prtconf = ::qqx("/usr/sbin/prtconf | grep cpu..instance");
7084 if($#prtconf >= 0) {
7085 $cpu->{'cores'} = $#prtconf +1;
7088 $cpu->{'cores'} and chomp $cpu->{'cores'};
7090 if(-x "/usr/sbin/psrinfo") {
7091 my @psrinfo = ::qqx("/usr/sbin/psrinfo");
7092 if($#psrinfo >= 0) {
7093 $cpu->{'threads'} = $#psrinfo +1;
7096 if(-x "/usr/sbin/prtconf") {
7097 my @prtconf = ::qqx("/usr/sbin/prtconf | grep cpu..instance");
7098 if($#prtconf >= 0) {
7099 $cpu->{'threads'} = $#prtconf +1;
7102 $cpu->{'threads'} and chomp $cpu->{'threads'};
7104 if(grep { /\d/ } values %$cpu) {
7113 # { 'sockets' => #sockets
7115 # 'threads' => #threads
7116 # 'active' => #taskset_threads }
7119 if(-x "/usr/sbin/lscfg") {
7120 if(open(my $in_fh, "-|", "/usr/sbin/lscfg -vs |grep proc | wc -l|tr -d ' '")) {
7121 $cpu->{'cores'} = <$in_fh>;
7122 chomp ($cpu->{'cores'});
7126 if(-x "/usr/bin/vmstat") {
7127 if(open(my $in_fh, "-|", "/usr/bin/vmstat 1 1")) {
7129 /lcpu=([0-9]*) / and $cpu->{'threads'} = $1;
7135 if(grep { /\d/ } values %$cpu) {
7136 # BUG It is not not known how to calculate this
7137 $cpu->{'sockets'} = 1;
7146 # { 'sockets' => #sockets
7148 # 'threads' => #threads
7149 # 'active' => #taskset_threads }
7153 ::qqx(qq{ /usr/bin/mpsched -s 2>&1 | grep 'Locality Domain Count' | awk '{ print \$4 }'});
7154 chomp($cpu->{'cores'});
7156 ::qqx(qq{ /usr/bin/mpsched -s 2>&1 | perl -ne '/Processor Count\\D+(\\d+)/ and print "\$1"'});
7158 if(grep { /\d/ } values %$cpu) {
7159 # BUG It is not not known how to calculate this
7160 $cpu->{'sockets'} = 1;
7169 # { 'sockets' => #sockets
7171 # 'threads' => #threads
7172 # 'active' => #taskset_threads }
7175 # BUG: It is not known how to calculate this.
7177 if(grep { /\d/ } values %$cpu) {
7184 sub sct_openserver() {
7186 # { 'sockets' => #sockets
7188 # 'threads' => #threads
7189 # 'active' => #taskset_threads }
7192 if(-x "/usr/sbin/psrinfo") {
7193 my @psrinfo = ::qqx("/usr/sbin/psrinfo");
7194 if($#psrinfo >= 0) {
7195 $cpu->{'cores'} = $#psrinfo +1;
7198 if(-x "/usr/sbin/psrinfo") {
7199 my @psrinfo = ::qqx("/usr/sbin/psrinfo");
7200 if($#psrinfo >= 0) {
7201 $cpu->{'threads'} = $#psrinfo +1;
7204 $cpu->{'sockets'} ||= $cpu->{'cores'};
7206 if(grep { /\d/ } values %$cpu) {
7215 # { 'sockets' => #sockets
7217 # 'threads' => #threads
7218 # 'active' => #taskset_threads }
7221 $cpu->{'cores'} = ::qqx(qq{ hinv | grep HZ | grep Processor | awk '{print \$1}' });
7222 $cpu->{'cores'} and chomp $cpu->{'cores'};
7224 if(grep { /\d/ } values %$cpu) {
7233 # { 'sockets' => #sockets
7235 # 'threads' => #threads
7236 # 'active' => #taskset_threads }
7239 $cpu->{'cores'} = ::qqx("sizer -pr");
7240 $cpu->{'cores'} and chomp $cpu->{'cores'};
7241 $cpu->{'cores'} ||= 1;
7242 $cpu->{'sockets'} ||= $cpu->{'cores'};
7243 $cpu->{'threads'} ||= $cpu->{'cores'};
7245 if(grep { /\d/ } values %$cpu) {
7254 # $sshcommand = the command (incl options) to run when using ssh
7256 if (not defined $self->{'sshcommand'}) {
7257 $self->sshcommand_of_sshlogin();
7259 return $self->{'sshcommand'};
7262 sub serverlogin($) {
7264 # $sshcommand = the command (incl options) to run when using ssh
7266 if (not defined $self->{'serverlogin'}) {
7267 $self->sshcommand_of_sshlogin();
7269 return $self->{'serverlogin'};
7272 sub sshcommand_of_sshlogin($) {
7273 # Compute ssh command and serverlogin from sshlogin
7274 # 'server' -> ('ssh -S /tmp/parallel-ssh-RANDOM/host-','server')
7275 # 'user@server' -> ('ssh','user@server')
7276 # 'myssh user@server' -> ('myssh','user@server')
7277 # 'myssh -l user server' -> ('myssh -l user','server')
7278 # '/usr/bin/myssh -l user server' -> ('/usr/bin/myssh -l user','server')
7280 # $self->{'sshcommand'}
7281 # $self->{'serverlogin'}
7283 my ($sshcmd, $serverlogin);
7284 # If $opt::ssh is unset, use $PARALLEL_SSH or 'ssh'
7285 $opt::ssh ||= $ENV{'PARALLEL_SSH'} || "ssh";
7286 if($self->{'string'} =~ /(.+) (\S+)$/) {
7288 $sshcmd = $1; $serverlogin = $2;
7291 if($opt::controlmaster) {
7292 # Use control_path to make ssh faster
7293 my $control_path = $self->control_path_dir()."/ssh-%r@%h:%p";
7294 $sshcmd = $opt::ssh." -S ".$control_path;
7295 $serverlogin = $self->{'string'};
7296 if(not $self->{'control_path'}{$control_path}++) {
7297 # Master is not running for this control_path
7301 $Global::sshmaster{$pid} ||= 1;
7303 $SIG{'TERM'} = undef;
7304 # Ignore the 'foo' being printed
7305 open(STDOUT,">","/dev/null");
7306 # STDERR >/dev/null to ignore
7307 open(STDERR,">","/dev/null");
7308 open(STDIN,"<","/dev/null");
7309 # Run a sleep that outputs data, so it will discover
7310 # if the ssh connection closes.
7311 my $sleep = ::Q('$|=1;while(1){sleep 1;print "foo\n"}');
7312 my @master = ($opt::ssh, "-MTS",
7313 $control_path, $serverlogin, "--", "perl", "-e",
7319 $sshcmd = $opt::ssh; $serverlogin = $self->{'string'};
7323 if($serverlogin =~ s/(\S+)\@(\S+)/$2/) {
7324 # convert user@server to '-l user server'
7325 # because lsh does not support user@server
7326 $sshcmd = $sshcmd." -l ".$1;
7329 $self->{'sshcommand'} = $sshcmd;
7330 $self->{'serverlogin'} = $serverlogin;
7333 sub control_path_dir($) {
7335 # $control_path_dir = dir of control path (for -M)
7337 if(not defined $self->{'control_path_dir'}) {
7338 $self->{'control_path_dir'} =
7339 # Use $ENV{'TMPDIR'} as that is typically not
7341 File::Temp::tempdir($ENV{'TMPDIR'}
7342 . "/control_path_dir-XXXX",
7345 return $self->{'control_path_dir'};
7348 sub rsync_transfer_cmd($) {
7349 # Command to run to transfer a file
7351 # $file = filename of file to transfer
7352 # $workdir = destination dir
7354 # $cmd = rsync command to run to transfer $file ("" if unreadable)
7357 my $workdir = shift;
7359 ::warning($file. " is not readable and will not be transferred.");
7363 my $relpath = ($file !~ m:^/:) || ($file =~ m:/\./:); # Is the path relative or /./?
7365 $rsync_destdir = ::shell_quote_file($workdir);
7368 $rsync_destdir = "/";
7370 $file = ::shell_quote_file($file);
7371 my $sshcmd = $self->sshcommand();
7372 my $rsync_opts = $ENV{'PARALLEL_RSYNC_OPTS'}.
7374 my $serverlogin = $self->serverlogin();
7375 # Make dir if it does not exist
7376 return "$sshcmd $serverlogin -- mkdir -p $rsync_destdir && " .
7377 rsync()." $rsync_opts $file $serverlogin:$rsync_destdir";
7380 sub cleanup_cmd($$$) {
7381 # Command to run to remove the remote file
7383 # $file = filename to remove
7384 # $workdir = destination dir
7386 # $cmd = ssh command to run to remove $file and empty parent dirs
7389 my $workdir = shift;
7392 # foo/bar/./baz/quux => workdir/baz/quux
7393 # /foo/bar/./baz/quux => workdir/baz/quux
7394 $f =~ s:.*/\./:$workdir/:;
7395 } elsif($f =~ m:^[^/]:) {
7396 # foo/bar => workdir/foo/bar
7397 $f = $workdir."/".$f;
7399 my @subdirs = split m:/:, ::dirname($f);
7404 unshift @rmdir, ::shell_quote_file($dir);
7406 my $rmdir = @rmdir ? "sh -c ".::Q("rmdir @rmdir 2>/dev/null;") : "";
7407 if(defined $opt::workdir and $opt::workdir eq "...") {
7408 $rmdir .= ::Q("rm -rf " . ::shell_quote_file($workdir).';');
7411 $f = ::shell_quote_file($f);
7412 my $sshcmd = $self->sshcommand();
7413 my $serverlogin = $self->serverlogin();
7414 return "$sshcmd $serverlogin -- ".::Q("rm -f $f; $rmdir");
7421 # rsync 3.1.x uses protocol 31 which is unsupported by 2.5.7.
7422 # If the version >= 3.1.0: downgrade to protocol 30
7424 # $rsync = "rsync" or "rsync --protocol 30"
7426 my @out = `rsync --version`;
7428 if(/version (\d+.\d+)(.\d+)?/) {
7430 # Version 3.1.0 or later: Downgrade to protocol 30
7431 $rsync = "rsync --protocol 30";
7437 $rsync or ::die_bug("Cannot figure out version of rsync: @out");
7448 my $commandref = shift;
7449 my $read_from = shift;
7450 my $context_replace = shift;
7451 my $max_number_of_args = shift;
7452 my $transfer_files = shift;
7453 my $return_files = shift;
7454 my $commandlinequeue = CommandLineQueue->new
7455 ($commandref, $read_from, $context_replace, $max_number_of_args,
7456 $transfer_files, $return_files);
7460 'commandlinequeue' => $commandlinequeue,
7462 'total_jobs' => undef,
7463 }, ref($class) || $class;
7469 $self->{'this_job_no'}++;
7470 if(@{$self->{'unget'}}) {
7471 return shift @{$self->{'unget'}};
7473 my $commandline = $self->{'commandlinequeue'}->get();
7474 if(defined $commandline) {
7475 return Job->new($commandline);
7477 $self->{'this_job_no'}--;
7485 unshift @{$self->{'unget'}}, @_;
7486 $self->{'this_job_no'} -= @_;
7491 my $empty = (not @{$self->{'unget'}}) &&
7492 $self->{'commandlinequeue'}->empty();
7493 ::debug("run", "JobQueue->empty $empty ");
7499 if(not defined $self->{'total_jobs'}) {
7500 if($opt::pipe and not $opt::tee) {
7501 ::error("--pipe is incompatible with --eta/--bar/--shuf");
7502 ::wait_and_exit(255);
7504 if($opt::sqlworker) {
7505 $self->{'total_jobs'} = $Global::sql->total_jobs();
7509 my $record_queue = $self->{'commandlinequeue'}{'arg_queue'};
7511 while($record = $record_queue->get()) {
7512 push @arg_records, $record;
7513 if(time - $start > 10) {
7514 ::warning("Reading ".scalar(@arg_records).
7515 " arguments took longer than 10 seconds.");
7516 $opt::eta && ::warning("Consider removing --eta.");
7517 $opt::bar && ::warning("Consider removing --bar.");
7518 $opt::shuf && ::warning("Consider removing --shuf.");
7522 while($record = $record_queue->get()) {
7523 push @arg_records, $record;
7525 if($opt::shuf and @arg_records) {
7526 my $i = @arg_records;
7528 my $j = int rand($i+1);
7529 @arg_records[$i,$j] = @arg_records[$j,$i];
7532 $record_queue->unget(@arg_records);
7533 # $#arg_records = number of args - 1
7534 # We have read one @arg_record for this job (so add 1 more)
7535 my $num_args = $#arg_records + 2;
7536 # This jobs is not started so -1
7537 my $started_jobs = $self->{'this_job_no'} - 1;
7538 my $max_args = ::max($Global::max_number_of_args,1);
7539 $self->{'total_jobs'} = ::ceil($num_args / $max_args)
7541 ::debug("init","Total jobs: ".$self->{'total_jobs'}.
7542 " ($num_args/$max_args + $started_jobs)\n");
7545 return $self->{'total_jobs'};
7548 sub flush_total_jobs($) {
7549 # Unset total_jobs to force recomputing
7551 ::debug("init","flush Total jobs: ");
7552 $self->{'total_jobs'} = undef;
7558 return $self->{'commandlinequeue'}->seq();
7563 return $self->{'commandlinequeue'}->quote_args();
7571 my $commandlineref = shift;
7573 'commandline' => $commandlineref, # CommandLine object
7574 'workdir' => undef, # --workdir
7575 # filehandle for stdin (used for --pipe)
7576 # filename for writing stdout to (used for --files)
7577 # remaining data not sent to stdin (used for --pipe)
7578 # tmpfiles to cleanup when job is done
7580 # amount of data sent via stdin (used for --pipe)
7581 'transfersize' => 0, # size of files using --transfer
7582 'returnsize' => 0, # size of files using --return
7584 # hash of { SSHLogins => number of times the command failed there }
7586 'sshlogin' => undef,
7587 # The commandline wrapped with rsync and ssh
7588 'sshlogin_wrap' => undef,
7589 'exitstatus' => undef,
7590 'exitsignal' => undef,
7591 # Timestamp for timeout if any
7594 # Output used for SQL and CSV-output
7595 'output' => { 1 => [], 2 => [] },
7596 'halfline' => { 1 => [], 2 => [] },
7597 }, ref($class) || $class;
7602 $self->{'commandline'} or ::die_bug("commandline empty");
7603 return $self->{'commandline'}->replaced();
7608 return $self->{'commandline'}->seq();
7613 return $self->{'commandline'}->set_seq(shift);
7618 return $self->{'commandline'}->slot();
7623 push @Global::slots, $self->slot();
7631 # $cattail = perl program for:
7632 # cattail "decompress program" writerpid [file_to_decompress or stdin] [file_to_unlink]
7635 # cat followed by tail (possibly with rm as soon at the file is opened)
7636 # If $writerpid dead: finish after this round
7640 my ($comfile, $cmd, $writerpid, $read_file, $unlink_file) = @ARGV;
7642 open(IN,"<",$read_file) || die("cattail: Cannot open $read_file");
7646 while(! -s $comfile) {
7647 # Writer has not opened the buffer file, so we cannot remove it yet
7648 $sleep = ($sleep < 30) ? ($sleep * 1.001 + 0.01) : ($sleep);
7651 # The writer and we have both opened the file, so it is safe to unlink it
7652 unlink $unlink_file;
7655 my $first_round = 1;
7657 fcntl(IN, F_GETFL, $flags) || die $!; # Get the current flags on the filehandle
7658 $flags |= O_NONBLOCK; # Add non-blocking to the flags
7659 fcntl(IN, F_SETFL, $flags) || die $!; # Set the flags on the filehandle
7664 my $writer_running = kill 0, $writerpid;
7665 $read = sysread(IN,$buf,131072);
7668 # Only start the command if there any input to process
7670 open(OUT,"|-",$cmd) || die("cattail: Cannot run $cmd");
7675 my $bytes_written = syswrite(OUT,$buf);
7676 # syswrite may be interrupted by SIGHUP
7677 substr($buf,0,$bytes_written) = "";
7679 # Something printed: Wait less next time
7682 if(eof(IN) and not $writer_running) {
7683 # Writer dead: There will never be sent more to the decompressor
7687 # TODO This could probably be done more efficiently using select(2)
7688 # Nothing read: Wait longer before next read
7689 # Up to 100 milliseconds
7690 $sleep = ($sleep < 100) ? ($sleep * 1.001 + 0.01) : ($sleep);
7696 # Sleep this many milliseconds.
7698 select(undef, undef, undef, $secs/1000);
7701 $cattail =~ s/#.*//mg;
7702 $cattail =~ s/\s+/ /g;
7708 sub openoutputfiles($) {
7709 # Open files for STDOUT and STDERR
7710 # Set file handles in $self->fh
7712 my ($outfhw, $errfhw, $outname, $errname);
7714 if($opt::linebuffer and not
7715 ($opt::keeporder or $opt::files or $opt::results or
7716 $opt::compress or $opt::compress_program or
7717 $opt::decompress_program)) {
7718 # Do not save to files: Use non-blocking pipe
7719 my ($outfhr, $errfhr);
7720 pipe($outfhr, $outfhw) || die;
7721 pipe($errfhr, $errfhw) || die;
7722 $self->set_fh(1,'w',$outfhw);
7723 $self->set_fh(2,'w',$errfhw);
7724 $self->set_fh(1,'r',$outfhr);
7725 $self->set_fh(2,'r',$errfhr);
7726 # Make it possible to read non-blocking from the pipe
7727 for my $fdno (1,2) {
7728 ::set_fh_non_blocking($self->fh($fdno,'r'));
7730 # Return immediately because we do not need setting filenames
7732 } elsif($opt::results and not $Global::csvsep) {
7733 my $out = $self->{'commandline'}->results_out();
7735 if($out eq $opt::results or $out =~ m:/$:) {
7736 # $opt::results = simple string or ending in /
7738 # prefix/name1/val1/name2/val2/seq
7739 $seqname = $out."seq";
7740 # prefix/name1/val1/name2/val2/stdout
7741 $outname = $out."stdout";
7742 # prefix/name1/val1/name2/val2/stderr
7743 $errname = $out."stderr";
7745 # $opt::results = replacement string not ending in /
7748 $errname = "$out.err";
7749 $seqname = "$out.seq";
7752 if(not open($seqfhw, "+>", $seqname)) {
7753 ::error("Cannot write to `$seqname'.");
7754 ::wait_and_exit(255);
7756 print $seqfhw $self->seq();
7758 if(not open($outfhw, "+>", $outname)) {
7759 ::error("Cannot write to `$outname'.");
7760 ::wait_and_exit(255);
7762 if(not open($errfhw, "+>", $errname)) {
7763 ::error("Cannot write to `$errname'.");
7764 ::wait_and_exit(255);
7766 $self->set_fh(1,"unlink","");
7767 $self->set_fh(2,"unlink","");
7768 if($opt::sqlworker) {
7769 # Save the filenames in SQL table
7770 $Global::sql->update("SET Stdout = ?, Stderr = ? ".
7771 "WHERE Seq = ". $self->seq(),
7772 $outname, $errname);
7774 } elsif(not $opt::ungroup) {
7775 # To group we create temporary files for STDOUT and STDERR
7776 # To avoid the cleanup unlink the files immediately (but keep them open)
7778 ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par");
7779 ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par");
7780 # --files => only remove stderr
7781 $self->set_fh(1,"unlink","");
7782 $self->set_fh(2,"unlink",$errname);
7784 ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par");
7785 ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par");
7786 $self->set_fh(1,"unlink",$outname);
7787 $self->set_fh(2,"unlink",$errname);
7791 open($outfhw,">&",$Global::fd{1}) || die;
7792 open($errfhw,">&",$Global::fd{2}) || die;
7793 # File name must be empty as it will otherwise be printed
7796 $self->set_fh(1,"unlink",$outname);
7797 $self->set_fh(2,"unlink",$errname);
7800 $self->set_fh(1,'w',$outfhw);
7801 $self->set_fh(2,'w',$errfhw);
7802 $self->set_fh(1,'name',$outname);
7803 $self->set_fh(2,'name',$errname);
7804 if($opt::compress) {
7805 $self->filter_through_compress();
7806 } elsif(not $opt::ungroup) {
7809 if($opt::linebuffer) {
7810 # Make it possible to read non-blocking from
7812 # Used for --linebuffer with -k, --files, --res, --compress*
7813 for my $fdno (1,2) {
7814 ::set_fh_non_blocking($self->fh($fdno,'r'));
7819 sub print_verbose_dryrun($) {
7820 # If -v set: print command to stdout (possibly buffered)
7821 # This must be done before starting the command
7823 if($Global::verbose or $opt::dryrun) {
7824 my $fh = $self->fh(1,"w");
7825 if($Global::verbose <= 1) {
7826 print $fh $self->replaced(),"\n";
7828 # Verbose level > 1: Print the rsync and stuff
7829 print $fh $self->wrapped(),"\n";
7832 if($opt::sqlworker) {
7833 $Global::sql->update("SET Command = ? WHERE Seq = ".$self->seq(),
7839 # Files to remove when job is done
7841 push @{$self->{'unlink'}}, @_;
7845 # Files to remove when job is done
7847 return @{$self->{'unlink'}};
7851 # Remove files when job is done
7853 unlink $self->get_rm();
7854 delete @Global::unlink{$self->get_rm()};
7859 # Set reading FD if using --group (--ungroup does not need)
7860 for my $fdno (1,2) {
7861 # Re-open the file for reading
7862 # so fdw can be closed seperately
7863 # and fdr can be seeked seperately (for --line-buffer)
7864 open(my $fdr,"<", $self->fh($fdno,'name')) ||
7865 ::die_bug("fdr: Cannot open ".$self->fh($fdno,'name'));
7866 $self->set_fh($fdno,'r',$fdr);
7867 # Unlink if not debugging
7868 $Global::debug or ::rm($self->fh($fdno,"unlink"));
7872 sub empty_input_wrapper($) {
7873 # If no input: exit(0)
7874 # If some input: Pass input as input to command on STDIN
7875 # This avoids starting the command if there is no input.
7877 # $command = command to pipe data to
7879 # $wrapped_command = the wrapped command
7880 my $command = shift;
7883 if(sysread(STDIN, $buf, 1)) {
7884 open($fh, "|-", @ARGV) || die;
7885 syswrite($fh, $buf);
7886 # Align up to 128k block
7887 if($read = sysread(STDIN, $buf, 131071)) {
7888 syswrite($fh, $buf);
7890 while($read = sysread(STDIN, $buf, 131072)) {
7891 syswrite($fh, $buf);
7894 exit ($?&127 ? 128+($?&127) : 1+$?>>8)
7897 ::debug("run",'Empty wrap: perl -e '.::Q($script)."\n");
7900 length $command > 499) {
7901 # csh does not like words longer than 1000 (499 quoted)
7902 # $command = "perl -e '".base64_zip_eval()."' ".
7903 # join" ",string_zip_base64(
7904 # 'exec "'.::perl_quote_scalar($command).'"');
7905 return 'perl -e '.::Q($script)." ".
7906 base64_wrap("exec \"$Global::shell\",'-c',\"".
7907 ::perl_quote_scalar($command).'"');
7909 return 'perl -e '.::Q($script)." ".
7910 $Global::shell." -c ".::Q($command);
7914 sub filter_through_compress($) {
7916 # Send stdout to stdin for $opt::compress_program(1)
7917 # Send stderr to stdin for $opt::compress_program(2)
7918 # cattail get pid: $pid = $self->fh($fdno,'rpid');
7919 my $cattail = cattail();
7921 for my $fdno (1,2) {
7922 # Make a communication file.
7923 my ($fh, $comfile) = ::tmpfile(SUFFIX => ".pac");
7925 # Compressor: (echo > $comfile; compress pipe) > output
7926 # When the echo is written to $comfile,
7927 # it is known that output file is opened,
7928 # thus output file can then be removed by the decompressor.
7929 my $wpid = open(my $fdw,"|-", "(echo > $comfile; ".
7930 empty_input_wrapper($opt::compress_program).") >".
7931 $self->fh($fdno,'name')) || die $?;
7932 $self->set_fh($fdno,'w',$fdw);
7933 $self->set_fh($fdno,'wpid',$wpid);
7934 # Decompressor: open output; -s $comfile > 0: rm $comfile output;
7935 # decompress output > stdout
7936 my $rpid = open(my $fdr, "-|", "perl", "-e", $cattail, $comfile,
7937 $opt::decompress_program, $wpid,
7938 $self->fh($fdno,'name'),$self->fh($fdno,'unlink'))
7940 $self->set_fh($fdno,'r',$fdr);
7941 $self->set_fh($fdno,'rpid',$rpid);
7949 my ($self, $fd_no, $key, $fh) = @_;
7950 $self->{'fd'}{$fd_no,$key} = $fh;
7955 my ($self, $fd_no, $key) = @_;
7956 return $self->{'fd'}{$fd_no,$key};
7961 my $remaining_ref = shift;
7962 my $stdin_fh = $self->fh(0,"w");
7964 my $len = length $$remaining_ref;
7965 # syswrite may not write all in one go,
7966 # so make sure everything is written.
7969 # If writing is to a closed pipe:
7970 # Do not call signal handler, but let nothing be written
7971 local $SIG{PIPE} = undef;
7972 while($written = syswrite($stdin_fh,$$remaining_ref)){
7973 substr($$remaining_ref,0,$written) = "";
7977 sub set_block($$$$$$) {
7978 # Copy stdin buffer from $block_ref up to $endpos
7979 # Prepend with $header_ref if virgin (i.e. not --roundrobin)
7980 # Remove $recstart and $recend if needed
7982 # $header_ref = ref to $header to prepend
7983 # $buffer_ref = ref to $buffer containing the block
7984 # $endpos = length of $block to pass on
7985 # $recstart = --recstart regexp
7986 # $recend = --recend regexp
7990 my ($header_ref,$buffer_ref,$endpos,$recstart,$recend) = @_;
7991 $self->{'block'} = ($self->virgin() ? $$header_ref : "").
7992 substr($$buffer_ref,0,$endpos);
7993 if($opt::remove_rec_sep) {
7994 remove_rec_sep(\$self->{'block'},$recstart,$recend);
7996 $self->{'block_length'} = length $self->{'block'};
7997 $self->{'block_pos'} = 0;
7998 $self->add_transfersize($self->{'block_length'});
8003 return \$self->{'block'};
8007 sub block_length($) {
8009 return $self->{'block_length'};
8012 sub remove_rec_sep($) {
8013 my ($block_ref,$recstart,$recend) = @_;
8014 # Remove record separator
8015 $$block_ref =~ s/$recend$recstart//gos;
8016 $$block_ref =~ s/^$recstart//os;
8017 $$block_ref =~ s/$recend$//os;
8020 sub non_blocking_write($) {
8022 my $something_written = 0;
8023 use POSIX qw(:errno_h);
8025 my $in = $self->fh(0,"w");
8026 my $rv = syswrite($in,
8027 substr($self->{'block'},$self->{'block_pos'}));
8028 if (!defined($rv) && $! == EAGAIN
) {
8029 # would block - but would have written
8030 $something_written = 0;
8031 # avoid triggering auto expanding block
8032 $Global::no_autoexpand_block
||= 1;
8033 } elsif ($self->{'block_pos'}+$rv != $self->{'block_length'}) {
8035 # Remove the written part
8036 $self->{'block_pos'} += $rv;
8037 $something_written = $rv;
8039 # successfully wrote everything
8040 # Empty block to free memory
8042 $self->set_block(\
$a,\
$a,0,"","");
8043 $something_written = $rv;
8045 ::debug
("pipe", "Non-block: ", $something_written);
8046 return $something_written;
8052 return $self->{'virgin'};
8055 sub set_virgin
($$) {
8057 $self->{'virgin'} = shift;
8062 return $self->{'pid'};
8067 $self->{'pid'} = shift;
8072 # UNIX-timestamp this job started
8074 return sprintf("%.3f",$self->{'starttime'});
8077 sub set_starttime
($@
) {
8079 my $starttime = shift || ::now
();
8080 $self->{'starttime'} = $starttime;
8082 $Global::sql
->update("SET Starttime = ? WHERE Seq = ".$self->seq(),
8088 # Run time in seconds with 3 decimals
8090 return sprintf("%.3f",
8091 int(($self->endtime() - $self->starttime())*1000)/1000);
8096 # UNIX-timestamp this job ended
8097 # 0 if not ended yet
8099 return ($self->{'endtime'} || 0);
8102 sub set_endtime
($$) {
8104 my $endtime = shift;
8105 $self->{'endtime'} = $endtime;
8107 $Global::sql
->update("SET JobRuntime = ? WHERE Seq = ".$self->seq(),
8111 sub is_timedout
($) {
8112 # Is the job timedout?
8114 # $delta_time = time that the job may run
8118 my $delta_time = shift;
8119 return time > $self->{'starttime'} + $delta_time;
8124 $self->set_exitstatus(-1);
8125 ::kill_sleep_seq
($self->pid());
8129 # return number of times failed for this $sshlogin
8133 # Number of times failed for $sshlogin
8135 my $sshlogin = shift;
8136 return $self->{'failed'}{$sshlogin};
8139 sub failed_here
($) {
8140 # return number of times failed for the current $sshlogin
8142 # Number of times failed for this sshlogin
8144 return $self->{'failed'}{$self->sshlogin()};
8148 # increase the number of times failed for this $sshlogin
8150 my $sshlogin = shift;
8151 $self->{'failed'}{$sshlogin}++;
8154 sub add_failed_here
($) {
8155 # increase the number of times failed for the current $sshlogin
8157 $self->{'failed'}{$self->sshlogin()}++;
8160 sub reset_failed
($) {
8161 # increase the number of times failed for this $sshlogin
8163 my $sshlogin = shift;
8164 delete $self->{'failed'}{$sshlogin};
8167 sub reset_failed_here
($) {
8168 # increase the number of times failed for this $sshlogin
8170 delete $self->{'failed'}{$self->sshlogin()};
8175 # the number of sshlogins this command has failed on
8176 # the minimal number of times this command has failed
8179 ::min
(map { $self->{'failed'}{$_} } keys %{$self->{'failed'}});
8180 my $number_of_sshlogins_failed_on = scalar keys %{$self->{'failed'}};
8181 return ($number_of_sshlogins_failed_on,$min_failures);
8184 sub total_failed
($) {
8186 # $total_failures = the number of times this command has failed
8188 my $total_failures = 0;
8189 for (values %{$self->{'failed'}}) {
8190 $total_failures += $_;
8192 return $total_failures;
8198 sub postpone_exit_and_cleanup
{
8199 # Command to remove files and dirs (given as args) without
8200 # affecting the exit value in $?/$status.
8202 $script = "perl -e '".
8215 "' ".'"$?h" "$status" ';
8225 # Script to create a fifo, run a command on the fifo
8226 # while copying STDIN to the fifo, and finally
8227 # remove the fifo and return the exit code of the command.
8229 # {} == $PARALLEL_TMP for --fifo
8230 # To make it csh compatible a wrapper needs to:
8232 # * spawn $command &
8234 # * waitpid to get the exit code from $command
8235 # * be less than 1000 chars long
8236 $script = "perl -e '".
8240 # mkfifo $PARALLEL_TMP
8241 system "mkfifo", $f;
8242 # spawn $shell -c $command &
8243 $pid = fork || exec $s, "-c", $c;
8244 open($o,">",$f) || die $!;
8245 # cat > $PARALLEL_TMP
8246 while(sysread(STDIN,$buf,131072)){
8250 # waitpid to get the exit code from $command
8262 # Wrap command with:
8268 # * --pipepart (@Global::cat_prepends)
8269 # * --tee (@Global::cat_prepends)
8272 # The ordering of the wrapping is important:
8273 # * --nice/--cat/--fifo should be done on the remote machine
8274 # * --pipepart/--pipe should be done on the local machine inside --tmux
8281 # @Global::cat_prepends
8285 # $self->{'wrapped'} = the command wrapped with the above
8287 if(not defined $self->{'wrapped'}) {
8288 my $command = $self->replaced();
8289 # Bug in Bash and Ksh when running multiline aliases
8290 # This will force them to run correctly, but will fail in
8291 # tcsh so we do not do it.
8292 # $command .= "\n\n";
8293 if(@opt::shellquote) {
8294 # Quote one time for each --shellquote
8296 for(@opt::shellquote) {
8299 # Prepend "echo" (it is written in perl because
8300 # quoting '-e' causes problem in some versions and
8301 # csh's version does something wrong)
8302 $command = q(perl -e '$,=" "; print "@ARGV\n";' -- ) . ::Q($c);
8304 if($Global::parallel_env) {
8305 # If $PARALLEL_ENV set, put that in front of the command
8306 # Used for env_parallel.*
8307 if($Global::shell =~ /zsh/) {
8308 # The extra 'eval' will make aliases work, too
8309 $command = $Global::parallel_env."\n".
8310 "eval ".::Q($command);
8312 $command = $Global::parallel_env."\n".$command;
8316 # In '--cat' and '--fifo' {} == $PARALLEL_TMP.
8317 # This is to make it possible to compute $PARALLEL_TMP on
8318 # the fly when running remotely.
8319 # $ENV{PARALLEL_TMP} is set in the remote wrapper before
8320 # the command is run.
8322 # Prepend 'cat > $PARALLEL_TMP;'
8323 # Append 'unlink $PARALLEL_TMP without affecting $?'
8325 'cat > $PARALLEL_TMP;'.
8326 $command.";". postpone_exit_and_cleanup().
8328 } elsif($opt::fifo) {
8329 # Prepend fifo-wrapper. In essence:
8332 # # $command must read {}, otherwise this 'cat' will block
8335 # without affecting $?
8336 $command = fifo_wrap(). " ".
8337 $Global::shell. " ". ::Q($command). ' $PARALLEL_TMP'. ';';
8339 # Wrap with ssh + tranferring of files
8340 $command = $self->sshlogin_wrap($command);
8341 if(@Global::cat_prepends) {
8342 # --pipepart: prepend:
8343 # < /tmp/foo perl -e 'while(@ARGV) {
8344 # sysseek(STDIN,shift,0) || die; $left = shift;
8345 # while($read = sysread(STDIN,$buf, ($left > 131072 ? 131072 : $left))){
8346 # $left -= $read; syswrite(STDOUT,$buf);
8350 # --pipepart --tee: prepend:
8353 # --pipe --tee: wrap:
8354 # (rm fifo; ... ) < fifo
8357 # (rm fifo; ... ) < fifo
8358 $command = (shift @Global::cat_prepends). "($command)".
8359 (shift @Global::cat_appends);
8360 } elsif($opt::pipe and not $opt::roundrobin) {
8361 # Wrap with EOF-detector to avoid starting $command if EOF.
8362 $command = empty_input_wrapper($command);
8365 # Wrap command with 'tmux'
8366 $command = $self->tmux_wrap($command);
8370 length $command > 499) {
8371 # csh does not like words longer than 1000 (499 quoted)
8372 # $command = "perl -e '".base64_zip_eval()."' ".
8373 # join" ",string_zip_base64(
8374 # 'exec "'.::perl_quote_scalar($command).'"');
8375 $command = base64_wrap("exec \"$Global::shell\",'-c',\"".
8376 ::perl_quote_scalar($command).'"');
8378 $self->{'wrapped'} = $command;
8380 return $self->{'wrapped'};
8383 sub set_sshlogin($$) {
8385 my $sshlogin = shift;
8386 $self->{'sshlogin'} = $sshlogin;
8387 delete $self->{'sshlogin_wrap'}; # If sshlogin is changed the wrap is wrong
8388 delete $self->{'wrapped'};
8390 if($opt::sqlworker) {
8391 # Identify worker as --sqlworker often runs on different machines
8392 my $host = $sshlogin->string();
8394 $host = ::hostname();
8396 $Global::sql->update("SET Host = ? WHERE Seq = ".$self->seq(), $host);
8402 return $self->{'sshlogin'};
8405 sub string_base64($) {
8406 # Base64 encode strings into 1000 byte blocks.
8407 # 1000 bytes is the largest word size csh supports
8409 # @strings = to be encoded
8411 # @base64 = 1000 byte block
8412 $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;";
8413 my @base64 = unpack("(A1000)*",encode_base64((join"",@_),""));
8417 sub string_zip_base64($) {
8418 # Pipe string through 'bzip2 -9' and base64 encode it into 1000
8420 # 1000 bytes is the largest word size csh supports
8421 # Zipping will make exporting big environments work, too
8423 # @strings = to be encoded
8425 # @base64 = 1000 byte block
8426 my($zipin_fh, $zipout_fh,@base64);
8427 ::open3($zipin_fh,$zipout_fh,">&STDERR","bzip2 -9");
8430 $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;";
8431 # Split base64 encoded into 1000 byte blocks
8432 @base64 = unpack("(A1000)*",encode_base64((join"",<$zipout_fh>),""));
8440 ::debug("base64","Orig:@_\nAs bzip2 base64:@base64\n");
8444 sub base64_zip_eval() {
8446 # * reads base64 strings from @ARGV
8448 # * pipes through 'bzip2 -dc'
8449 # * evals the result
8450 # Reverse of string_zip_base64 + eval
8451 # Will be wrapped in ' so single quote is forbidden
8453 # $script = 1-liner for perl -e
8454 my $script = ::spacefree(0,q{
8455 @GNU_Parallel = split /_/, "use_IPC::Open3;_use_MIME::Base64";
8456 eval"@GNU_Parallel";
8458 $SIG{CHLD} = "IGNORE";
8459 # Search for bzip2. Not found => use default path
8460 my $zip = (grep { -x $_ } "/usr/local/bin/bzip2")[0] || "bzip2";
8461 # $in = stdin on $zip, $out = stdout from $zip
8462 # Forget my() to save chars for csh
8463 # my($in, $out,$eval);
8464 open3($in,$out,">&STDERR",$zip,"-dc");
8465 if(my $perlpid = fork) {
8467 $eval = join "", <$out>;
8471 # Pipe decoded base64 into 'bzip2 -dc'
8472 print $in (decode_base64(join"",@ARGV));
8480 ::debug("base64",$script,"\n");
8484 sub base64_wrap($) {
8485 # base64 encode Perl code
8486 # Split it into chunks of < 1000 bytes
8487 # Prepend it with a decoder that eval's it
8489 # $eval_string = Perl code to run
8491 # $shell_command = shell command that runs $eval_string
8492 my $eval_string = shift;
8495 ::Q(base64_zip_eval())." ".
8496 join" ",::shell_quote(string_zip_base64($eval_string));
8499 sub base64_eval($) {
8501 # * reads base64 strings from @ARGV
8503 # * evals the result
8504 # Reverse of string_base64 + eval
8505 # Will be wrapped in ' so single quote is forbidden.
8506 # Spaces are stripped so spaces cannot be significant.
8507 # The funny 'use IPC::Open3'-syntax is to avoid spaces and
8508 # to make it clear that this is a GNU Parallel command
8509 # when looking at the process table.
8511 # $script = 1-liner for perl -e
8512 my $script = ::spacefree(0,q{
8513 @GNU_Parallel=("use","IPC::Open3;","use","MIME::Base64");
8514 eval "@GNU_Parallel";
8515 my $eval = decode_base64(join"",@ARGV);
8518 ::debug("base64",$script,"\n");
8522 sub sshlogin_wrap($) {
8523 # Wrap the command with the commands needed to run remotely
8525 # $command = command to run
8527 # $self->{'sshlogin_wrap'} = command wrapped with ssh+transfer commands
8528 sub monitor_parent_sshd_script {
8529 # This script is to solve the problem of
8530 # * not mixing STDERR and STDOUT
8531 # * terminating with ctrl-c
8532 # If its parent is ssh: all good
8533 # If its parent is init(1): ssh died, so kill children
8534 my $monitor_parent_sshd_script;
8536 if(not $monitor_parent_sshd_script) {
8537 $monitor_parent_sshd_script =
8538 # This will be packed in ', so only use "
8539 ::spacefree(0,'$shell = "'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'.
8540 '$tmpdir = "'.::perl_quote_scalar($ENV{'TMPDIR'}).'";'.
8541 '$nice = '.$opt::nice.';'.
8543 # Set $PARALLEL_TMP to a non-existent file name in $TMPDIR
8545 $ENV{PARALLEL_TMP} = $tmpdir."/par".
8546 join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
8547 } while(-e $ENV{PARALLEL_TMP});
8548 $SIG{CHLD} = sub { $done = 1; };
8551 # Make own process group to be able to kill HUP it later
8553 eval { setpriority(0,0,$nice) };
8554 exec $shell, "-c", ($bashfunc."@ARGV");
8558 # Parent is not init (ppid=1), so sshd is alive
8559 # Exponential sleep up to 1 sec
8560 $s = $s < 1 ? 0.001 + $s * 1.03 : $s;
8561 select(undef, undef, undef, $s);
8562 } until ($done || getppid == 1);
8563 # Kill HUP the process group if job not done
8564 kill(SIGHUP, -${pid}) unless $done;
8566 exit ($?&127 ? 128+($?&127) : 1+$?>>8)
8569 return $monitor_parent_sshd_script;
8572 sub vars_to_export {
8575 my @vars = ("parallel_bash_environment");
8576 for my $varstring (@opt::env) {
8577 # Split up --env VAR1,VAR2
8578 push @vars, split /,/, $varstring;
8581 if(-r $_ and not -d) {
8582 # Read as environment definition bug #44041
8584 my $fh = ::open_or_exit($_);
8585 $Global::envdef = join("",<$fh>);
8589 if(grep { /^_$/ } @vars) {
8592 # Include all vars that are not in a clean environment
8593 if(open(my $vars_fh, "<", $Global::config_dir . "/ignored_vars")) {
8594 my @ignore = <$vars_fh>;
8597 @ignore{@ignore} = @ignore;
8599 push @vars, grep { not defined $ignore{$_} } keys %ENV;
8600 @vars = grep { not /^_$/ } @vars;
8602 ::error("Run '$Global::progname --record-env' ".
8603 "in a clean environment first.");
8604 ::wait_and_exit(255);
8607 # Duplicate vars as BASH functions to include post-shellshock functions (v1+v2)
8608 # So --env myfunc should look for BASH_FUNC_myfunc() and BASH_FUNC_myfunc%%
8609 push(@vars, "PARALLEL_PID", "PARALLEL_SEQ",
8610 map { ("BASH_FUNC_$_()", "BASH_FUNC_$_%%") } @vars);
8611 # Keep only defined variables
8612 return grep { defined($ENV{$_}) } @vars;
8617 # $eval = '$ENV{"..."}=...; ...'
8618 my @vars = vars_to_export();
8619 my $csh_friendly = not grep { /\n/ } @ENV{@vars};
8620 my @bash_functions = grep { substr($ENV{$_},0,4) eq "() {" } @vars;
8621 my @non_functions = (grep { !/PARALLEL_ENV/ }
8622 grep { substr($ENV{$_},0,4) ne "() {" } @vars);
8624 # eval of @envset will set %ENV
8625 my $envset = join"", map {
8626 '$ENV{"'.::perl_quote_scalar($_).'"}="'.
8627 ::perl_quote_scalar($ENV{$_}).'";'; } @non_functions;
8629 # running @bashfunc on the command line, will set the functions
8630 my @bashfunc = map {
8632 s/BASH_FUNC_(.*)(\(\)|%%)/$1/;
8633 "$_$ENV{$v};export -f $_ >/dev/null;" } @bash_functions;
8634 # eval $bashfuncset will set $bashfunc
8637 # Functions are not supported for all shells
8638 if($Global::shell !~ m:(bash|rbash|zsh|rzsh|dash|ksh):) {
8639 ::warning("Shell functions may not be supported in $Global::shell.");
8642 '@bash_functions=qw('."@bash_functions".");".
8643 ::spacefree(1,'$shell="'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'.q{
8645 print STDERR "CSH/TCSH DO NOT SUPPORT newlines IN VARIABLES/FUNCTIONS. Unset @bash_functions\n";
8649 "\n".'$bashfunc = "'.::perl_quote_scalar("@bashfunc").'";';
8651 $bashfuncset = '$bashfunc = "";'
8653 if($ENV{'parallel_bash_environment'}) {
8654 $bashfuncset .= '$bashfunc .= "eval\ \"\$parallel_bash_environment\"\;";';
8656 ::debug("base64",$envset,$bashfuncset,"\n");
8657 return $csh_friendly,$envset,$bashfuncset;
8661 my $command = shift;
8662 # TODO test that *sh -c 'parallel --env' use *sh
8663 if(not defined $self->{'sshlogin_wrap'}{$command}) {
8664 my $sshlogin = $self->sshlogin();
8665 my $serverlogin = $sshlogin->serverlogin();
8666 my $quoted_remote_command;
8667 $ENV{'PARALLEL_SEQ'} = $self->seq();
8668 $ENV{'PARALLEL_PID'} = $$;
8669 if($serverlogin eq ":") {
8671 # Create workdir if needed. Then cd to it.
8672 my $wd = $self->workdir();
8673 if($opt::workdir eq "." or $opt::workdir eq "...") {
8674 # If $wd does not start with '/': Prepend $HOME
8675 $wd =~ s:^([^/]):$ENV{'HOME'}/$1:;
8677 ::mkdir_or_die($wd);
8679 if($opt::workdir eq "...") {
8680 $post = ";".exitstatuswrapper("rm -rf ".::Q($wd).";");
8683 $command = "cd ".::Q($wd)." || exit 255; " .
8687 # Prepend with environment setter, which sets functions in zsh
8688 my ($csh_friendly,$envset,$bashfuncset) = env_as_eval();
8689 my $perl_code = $envset.$bashfuncset.
8690 '@ARGV="'.::perl_quote_scalar($command).'";'.
8691 "exec\"$Global::shell\",\"-c\",\(\$bashfunc.\"\@ARGV\"\)\;die\"exec:\$\!\\n\"\;";
8692 if(length $perl_code > 999
8697 # csh does not deal well with > 1000 chars in one word
8698 # csh does not deal well with $ENV with \n
8699 $self->{'sshlogin_wrap'}{$command} = base64_wrap($perl_code);
8701 $self->{'sshlogin_wrap'}{$command} = "perl -e ".::Q($perl_code);
8704 $self->{'sshlogin_wrap'}{$command} = $command;
8709 # Create remote workdir if needed. Then cd to it.
8710 my $wd = ::pQ($self->workdir());
8711 $pwd = qq{system("mkdir","-p","--","$wd"); chdir "$wd" ||}.
8712 qq{print(STDERR "parallel: Cannot chdir to $wd\\n") && exit 255;};
8714 my ($csh_friendly,$envset,$bashfuncset) = env_as_eval();
8715 my $remote_command = $pwd.$envset.$bashfuncset.
8716 '@ARGV="'.::perl_quote_scalar($command).'";'.
8717 monitor_parent_sshd_script();
8718 $quoted_remote_command = "perl -e ". ::Q($remote_command);
8719 my $dq_remote_command = ::Q($quoted_remote_command);
8720 if(length $dq_remote_command > 999
8725 # csh does not deal well with > 1000 chars in one word
8726 # csh does not deal well with $ENV with \n
8727 $quoted_remote_command =
8728 "perl -e ". ::Q(::Q(base64_zip_eval()))." ".
8729 join" ",::shell_quote(::shell_quote(string_zip_base64($remote_command)));
8731 $quoted_remote_command = $dq_remote_command;
8734 my $sshcmd = $sshlogin->sshcommand();
8735 my ($pre,$post,$cleanup)=("","","");
8737 $pre .= $self->sshtransfer();
8739 $post .= $self->sshreturn();
8741 $post .= $self->sshcleanup();
8743 # We need to save the exit status of the job
8744 $post = exitstatuswrapper($post);
8746 $self->{'sshlogin_wrap'}{$command} =
8748 . "$sshcmd $serverlogin -- exec "
8749 . $quoted_remote_command
8754 return $self->{'sshlogin_wrap'}{$command};
8759 # Non-quoted and with {...} substituted
8761 # @transfer - File names of files to transfer
8764 my $transfersize = 0;
8765 my @transfer = $self->{'commandline'}->
8766 replace_placeholders($self->{'commandline'}{'transfer_files'},0,0);
8770 $transfersize += (stat($_))[7];
8773 $self->add_transfersize($transfersize);
8777 sub transfersize($) {
8779 return $self->{'transfersize'};
8782 sub add_transfersize($) {
8784 my $transfersize = shift;
8785 $self->{'transfersize'} += $transfersize;
8787 $Global::sql->update("SET Send = ? WHERE Seq = ".$self->seq(),
8788 $self->{'transfersize'});
8791 sub sshtransfer($) {
8792 # Returns for each transfer file:
8793 # rsync $file remote:$workdir
8796 my $sshlogin = $self->sshlogin();
8797 my $workdir = $self->workdir();
8798 for my $file ($self->transfer()) {
8799 push @pre, $sshlogin->rsync_transfer_cmd($file,$workdir).";";
8801 return join("",@pre);
8806 # Non-quoted and with {...} substituted
8808 # @non_quoted_filenames
8810 return $self->{'commandline'}->
8811 replace_placeholders($self->{'commandline'}{'return_files'},0,0);
8815 # This is called after the job has finished
8817 # $number_of_bytes transferred in return
8819 for my $file ($self->return()) {
8821 $self->{'returnsize'} += (stat($file))[7];
8824 return $self->{'returnsize'};
8827 sub add_returnsize($) {
8829 my $returnsize = shift;
8830 $self->{'returnsize'} += $returnsize;
8832 $Global::sql->update("SET Receive = ? WHERE Seq = ".$self->seq(),
8833 $self->{'returnsize'});
8837 # Returns for each return-file:
8838 # rsync remote:$workdir/$file .
8840 my $sshlogin = $self->sshlogin();
8841 my $sshcmd = $sshlogin->sshcommand();
8842 my $serverlogin = $sshlogin->serverlogin();
8843 my $rsync_opts = $ENV{'PARALLEL_RSYNC_OPTS'}. " -e". ::Q($sshcmd);
8845 for my $file ($self->return()) {
8846 $file =~ s:^\./::g; # Remove ./ if any
8847 my $relpath = ($file !~ m:^/:) || ($file =~ m:/\./:); # Is the path relative or /./?
8851 # rsync -avR /foo/./bar/baz.c remote:/tmp/
8852 # == (on old systems)
8853 # rsync -avR --rsync-path="cd /foo; rsync" remote:bar/baz.c /tmp/
8854 $wd = ::shell_quote_file($self->workdir()."/");
8856 # Only load File::Basename if actually needed
8857 $Global::use{"File::Basename"} ||= eval "use File::Basename; 1;";
8858 # dir/./file means relative to dir, so remove dir on remote
8859 $file =~ m:(.*)/\./:;
8860 my $basedir = $1 ? ::shell_quote_file($1."/") : "";
8861 my $nobasedir = $file;
8862 $nobasedir =~ s:.*/\./::;
8863 $cd = ::shell_quote_file(::dirname($nobasedir));
8864 my $rsync_cd = '--rsync-path='.::Q("cd $wd$cd; rsync");
8865 my $basename = ::Q(::shell_quote_file(::basename($file)));
8867 # mkdir -p /home/tange/dir/subdir/;
8868 # rsync (--protocol 30) -rlDzR
8869 # --rsync-path="cd /home/tange/dir/subdir/; rsync"
8870 # server:file.gz /home/tange/dir/subdir/
8871 $pre .= "mkdir -p $basedir$cd && ". $sshlogin->rsync().
8872 " $rsync_cd $rsync_opts $serverlogin:".
8873 $basename . " ".$basedir.$cd.";";
8879 # Return the sshcommand needed to remove the file
8881 # ssh command needed to remove files from sshlogin
8883 my $sshlogin = $self->sshlogin();
8884 my $sshcmd = $sshlogin->sshcommand();
8885 my $serverlogin = $sshlogin->serverlogin();
8886 my $workdir = $self->workdir();
8889 for my $file ($self->remote_cleanup()) {
8890 my @subworkdirs = parentdirs_of($file);
8891 $cleancmd .= $sshlogin->cleanup_cmd($file,$workdir).";";
8893 if(defined $opt::workdir and $opt::workdir eq "...") {
8894 $cleancmd .= "$sshcmd $serverlogin -- rm -rf " . ::Q($workdir).';';
8899 sub remote_cleanup($) {
8901 # Files to remove at cleanup
8904 my @transfer = $self->transfer();
8905 my @return = $self->return();
8906 return (@transfer,@return);
8912 sub exitstatuswrapper(@) {
8913 if($Global::cshell) {
8914 return ('set _EXIT_status=$status; ' .
8916 'exit $_EXIT_status;');
8918 return ('_EXIT_status=$?; ' .
8920 'exit $_EXIT_status;');
8927 # the workdir on a remote machine
8929 if(not defined $self->{'workdir'}) {
8931 if(defined $opt::workdir) {
8932 if($opt::workdir eq ".") {
8933 # . means current dir
8934 my $home = $ENV{'HOME'};
8939 # If homedir exists: remove the homedir from
8940 # workdir if cwd starts with homedir
8941 # E.g. /home/foo/my/dir => my/dir
8942 # E.g. /tmp/my/dir => /tmp/my/dir
8943 my ($home_dev, $home_ino) = (stat($home))[0,1];
8945 my @dir_parts = split(m:/:,$cwd);
8947 while(defined ($part = shift @dir_parts)) {
8948 $part eq "" and next;
8949 $parent .= "/".$part;
8950 my ($parent_dev, $parent_ino) = (stat($parent))[0,1];
8951 if($parent_dev == $home_dev and $parent_ino == $home_ino) {
8952 # dev and ino is the same: We found the homedir.
8953 $workdir = join("/",@dir_parts);
8958 if($workdir eq "") {
8961 } elsif($opt::workdir eq "...") {
8962 $workdir = ".parallel/tmp/" . ::hostname() . "-" . $$
8963 . "-" . $self->seq();
8965 $workdir = $self->{'commandline'}->
8966 replace_placeholders([$opt::workdir],0,0);
8967 #$workdir = $opt::workdir;
8968 # Rsync treats /./ special. We dont want that
8969 $workdir =~ s:/\./:/:g; # Remove /./
8970 $workdir =~ s:(.)/+$:$1:; # Remove ending / if any
8971 $workdir =~ s:^\./::g; # Remove starting ./ if any
8976 $self->{'workdir'} = $workdir;
8978 return $self->{'workdir'};
8981 sub parentdirs_of($) {
8983 # all parentdirs except . of this dir or file - sorted desc by length
8986 while($d =~ s:/[^/]+$::) {
8995 # Setup STDOUT and STDERR for a job and start it.
8997 # job-object or undef if job not to run
8999 sub open3_setpgrp_internal {
9000 # Run open3+setpgrp followed by the command
9002 # $stdin_fh = Filehandle to use as STDIN
9003 # $stdout_fh = Filehandle to use as STDOUT
9004 # $stderr_fh = Filehandle to use as STDERR
9005 # $command = Command to run
9007 # $pid = Process group of job started
9008 my ($stdin_fh,$stdout_fh,$stderr_fh,$command) = @_;
9011 open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!");
9012 open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!");
9013 # The eval is needed to catch exception from open3
9015 if(not $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", "-")) {
9016 # Each child gets its own process group to make it safe to killall
9017 eval{ setpgrp(0,0) };
9018 eval{ setpriority(0,0,$opt::nice) };
9019 exec($Global::shell,"-c",$command)
9020 || ::die_bug("open3-$stdin_fh $command");
9026 sub open3_setpgrp_external {
9027 # Run open3 on $command wrapped with a perl script doing setpgrp
9028 # Works on systems that do not support open3(,,,"-")
9030 # $stdin_fh = Filehandle to use as STDIN
9031 # $stdout_fh = Filehandle to use as STDOUT
9032 # $stderr_fh = Filehandle to use as STDERR
9033 # $command = Command to run
9035 # $pid = Process group of job started
9036 my ($stdin_fh,$stdout_fh,$stderr_fh,$command) = @_;
9038 open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!");
9039 open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!");
9044 "eval\{setpgrp\}\;eval\{setpriority\(0,0,$opt::nice\)\}\;".
9045 "exec '$Global::shell', '-c', \@ARGV");
9046 # The eval is needed to catch exception from open3
9048 $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", @setpgrp_wrap, $command)
9049 || ::die_bug("open3-$stdin_fh");
9056 # Select and run open3_setpgrp_internal/open3_setpgrp_external
9057 no warnings 'redefine';
9058 my ($outfh,$name) = ::tmpfile(SUFFIX => ".tst");
9059 # Test to see if open3(x,x,x,"-") is fully supported
9060 # Can an exported bash function be called via open3?
9061 my $script = 'if($pid=::open3($i,$o,$e,"-")) { wait; } '.
9062 'else { exec("bash","-c","testfun && true"); }';
9064 ::shell_quote_scalar_default(
9065 "testfun() { rm $name; }; export -f testfun; ".
9066 "perl -MIPC::Open3 -e ".
9067 ::shell_quote_scalar_default($script)
9069 # Redirect STDERR temporarily,
9070 # so errors on MacOS X are ignored.
9071 open my $saveerr, ">&STDERR";
9072 open STDERR, '>', "/dev/null";
9074 ::debug("init",qq{bash -c $bash 2>/dev/null});
9075 qx{ bash -c $bash 2>/dev/null };
9076 open STDERR, ">&", $saveerr;
9079 # Does not support open3(x,x,x,"-")
9080 # or does not have bash:
9081 # Use (slow) external version
9083 *open3_setpgrp = \&open3_setpgrp_external;
9084 ::debug("init","open3_setpgrp_external chosen\n");
9086 # Supports open3(x,x,x,"-")
9087 # This is 0.5 ms faster to run
9088 *open3_setpgrp = \&open3_setpgrp_internal;
9089 ::debug("init","open3_setpgrp_internal chosen\n");
9091 # The sub is now redefined. Call it
9092 return open3_setpgrp(@_);
9096 # Get the shell command to be executed (possibly with ssh infront).
9097 my $command = $job->wrapped();
9100 if($Global::interactive or $Global::stderr_verbose) {
9101 $job->interactive_start();
9103 # Must be run after $job->interactive_start():
9104 # $job->interactive_start() may call $job->skip()
9105 if($job->{'commandline'}{'skip'}) {
9106 # $job->skip() was called
9109 $job->openoutputfiles();
9110 $job->print_verbose_dryrun();
9111 # Call slot to store the slot value
9113 my($stdout_fh,$stderr_fh) = ($job->fh(1,"w"),$job->fh(2,"w"));
9114 if($opt::dryrun or $opt::sqlmaster) { $command = "true"; }
9115 $ENV{'PARALLEL_SEQ'} = $job->seq();
9116 $ENV{'PARALLEL_PID'} = $$;
9117 $ENV{'PARALLEL_TMP'} = ::tmpname("par");
9118 $job->add_rm($ENV{'PARALLEL_TMP'});
9119 ::debug("run", $Global::total_running, " processes . Starting (",
9120 $job->seq(), "): $command\n");
9122 my ($stdin_fh) = ::gensym();
9123 $pid = open3_setpgrp($stdin_fh,$stdout_fh,$stderr_fh,$command);
9124 if($opt::roundrobin and not $opt::keeporder) {
9125 # --keep-order will make sure the order will be reproducible
9126 ::set_fh_non_blocking($stdin_fh);
9128 $job->set_fh(0,"w",$stdin_fh);
9129 if($opt::tee or $opt::shard) { $job->set_virgin(0); }
9130 } elsif ($opt::tty and -c "/dev/tty" and
9131 open(my $devtty_fh, "<", "/dev/tty")) {
9132 # Give /dev/tty to the command if no one else is using it
9133 # The eval is needed to catch exception from open3
9134 local (*IN,*OUT,*ERR);
9135 open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!");
9136 open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!");
9138 # The eval is needed to catch exception from open3
9139 my @wrap = ('perl','-e',
9140 "eval\{setpriority\(0,0,$opt::nice\)\}\;".
9141 "exec '$Global::shell', '-c', \@ARGV");
9143 $pid = ::open3("<&IN", ">&OUT", ">&ERR", @wrap, $command)
9144 || ::die_bug("open3-/dev/tty");
9148 $job->set_virgin(0);
9150 $pid = open3_setpgrp(::gensym(),$stdout_fh,$stderr_fh,$command);
9151 $job->set_virgin(0);
9155 $Global::total_running++;
9156 $Global::total_started++;
9157 $job->set_pid($pid);
9158 $job->set_starttime();
9159 $Global::running{$job->pid()} = $job;
9161 $Global::timeoutq->insert($job);
9163 $Global::newest_job = $job;
9164 $Global::newest_starttime = ::now();
9168 ::debug("run", "Cannot spawn more jobs.\n");
9173 sub interactive_start($) {
9175 my $command = $self->wrapped();
9176 if($Global::interactive) {
9178 ::status_no_nl("$command ?...");
9180 open(my $tty_fh, "<", "/dev/tty") || ::die_bug("interactive-tty");
9181 $answer = <$tty_fh>;
9183 # Sometime we get an empty string (not even \n)
9184 # Do not know why, so let us just ignore it and try again
9185 } while(length $answer < 1);
9186 if (not ($answer =~ /^\s*y/i)) {
9187 $self->{'commandline'}->skip();
9190 print $Global::original_stderr "$command\n";
9198 # Wrap command with tmux for session pPID
9200 # $actual_command = the actual command being run (incl ssh wrap)
9202 my $actual_command = shift;
9203 # Temporary file name. Used for fifo to communicate exit val
9204 my $tmpfifo = ::tmpname("tmx");
9205 $self->add_rm($tmpfifo);
9207 if(length($tmpfifo) >=100) {
9208 ::error("tmux does not support sockets with path > 100.");
9209 ::wait_and_exit(255);
9211 if($opt::tmuxpane) {
9212 # Move the command into a pane in window 0
9213 $actual_command = $ENV{'PARALLEL_TMUX'}.' joinp -t :0 ; '.
9214 $ENV{'PARALLEL_TMUX'}.' select-layout -t :0 tiled ; '.
9217 my $visual_command = $self->replaced();
9218 my $title = $visual_command;
9219 if($visual_command =~ /\0/) {
9220 ::error("Command line contains NUL. tmux is confused by NUL.");
9221 ::wait_and_exit(255);
9224 # ascii 194-245 annoys tmux
9225 $title =~ tr/[\011-\016;\302-\365]/ /s;
9226 $title = ::Q($title);
9228 my $l_act = length($actual_command);
9229 my $l_tit = length($title);
9230 my $l_fifo = length($tmpfifo);
9231 # The line to run contains a 118 chars extra code + the title 2x
9232 my $l_tot = 2 * $l_tit + $l_act + $l_fifo;
9234 my $quoted_space75 = ::Q(" ")x75;
9235 while($l_tit < 1000 and
9237 (890 < $l_tot and $l_tot < 1350)
9239 (9250 < $l_tot and $l_tot < 9800)
9241 # tmux blocks for certain lengths:
9242 # 900 < title + command < 1200
9243 # 9250 < title + command < 9800
9244 # but only if title < 1000, so expand the title with 75 spaces
9245 # The measured lengths are:
9246 # 996 < (title + whole command) < 1127
9247 # 9331 < (title + whole command) < 9636
9248 $title .= $quoted_space75;
9249 $l_tit = length($title);
9250 $l_tot = 2 * $l_tit + $l_act + $l_fifo;
9254 $ENV{'PARALLEL_TMUX'} ||= "tmux";
9255 if(not $tmuxsocket) {
9256 $tmuxsocket = ::tmpname("tms");
9259 # Run tmux in the foreground
9260 # Wait for the socket to appear
9261 while (not -e $tmuxsocket) { }
9262 `$ENV{'PARALLEL_TMUX'} -S $tmuxsocket attach`;
9266 ::status("See output with: $ENV{'PARALLEL_TMUX'} -S $tmuxsocket attach");
9269 $ENV{'PARALLEL_TMUX'}." -S $tmuxsocket new-session -s p$$ -d \"sleep .2\" >/dev/null 2>&1';" .
9270 $ENV{'PARALLEL_TMUX'}." -S $tmuxsocket new-window -t p$$ -n $title";
9272 ::debug("tmux", "title len:", $l_tit, " act ", $l_act, " max ",
9273 $Limits::Command::line_max_len, " tot ",
9276 return "mkfifo $tmpfifo && $tmux ".
9280 "(".$actual_command.');'.
9281 # The triple print is needed - otherwise the testsuite fails
9282 q[ perl -e 'while($t++<3){ print $ARGV[0],"\n" }' $?h/$status >> ].$tmpfifo."&".
9283 "echo $title; echo \007Job finished at: `date`;sleep 10"
9286 # Read a / separated line: 0h/2 for csh, 2/0 for bash.
9287 # If csh the first will be 0h, so use the second as exit value.
9288 # Otherwise just use the first value as exit value.
9289 q{; exec perl -e '$/="/";$_=<>;$c=<>;unlink $ARGV; /(\d+)h/ and exit($1);exit$c' }.$tmpfifo;
9293 sub is_already_in_results($) {
9294 # Do we already have results for this job?
9296 # $job_already_run = bool whether there is output for this or not
9298 my $out = $job->{'commandline'}->results_out();
9299 ::debug("run", "Test ${out}stdout", -e "${out}stdout", "\n");
9300 return(-e $out."stdout" or -f $out);
9303 sub is_already_in_joblog($) {
9305 return vec($Global::job_already_run,$job->seq(),1);
9308 sub set_job_in_joblog($) {
9310 vec($Global::job_already_run,$job->seq(),1) = 1;
9313 sub should_be_retried($) {
9314 # Should this job be retried?
9317 # 1 - job queued for retry
9319 if (not $opt::retries) {
9322 if(not $self->exitstatus() and not $self->exitsignal()) {
9323 # Completed with success. If there is a recorded failure: forget it
9324 $self->reset_failed_here();
9327 # The job failed. Should it be retried?
9328 $self->add_failed_here();
9329 my $retries = $self->{'commandline'}->
9330 replace_placeholders([$opt::retries],0,0);
9331 if($self->total_failed() == $retries) {
9332 # This has been retried enough
9335 # This command should be retried
9336 $self->set_endtime(undef);
9337 $self->reset_exitstatus();
9338 $Global::JobQueue->unget($self);
9339 ::debug("run", "Retry ", $self->seq(), "\n");
9346 my (%print_later,$job_seq_to_print);
9348 sub print_earlier_jobs($) {
9349 # Print jobs whose output is postponed due to --keep-order
9352 $print_later{$job->seq()} = $job;
9353 $job_seq_to_print ||= 1;
9355 ::debug("run", "Looking for: $job_seq_to_print ",
9356 "This: ", $job->seq(), "\n");
9357 for(;vec($Global::job_already_run,$job_seq_to_print,1);
9358 $job_seq_to_print++) {}
9359 while(my $j = $print_later{$job_seq_to_print}) {
9360 $returnsize += $j->print();
9362 # Job finished - look at the next
9363 delete $print_later{$job_seq_to_print};
9364 $job_seq_to_print++;
9367 # Job not finished yet - look at it again next round
9376 # Print the output of the jobs
9380 ::debug("print", ">>joboutput ", $self->replaced(), "\n");
9382 # Nothing was printed to this job:
9383 # cleanup tmp files if --files was set
9384 ::rm($self->fh(1,"name"));
9386 if($opt::pipe and $self->virgin() and not $opt::tee) {
9387 # Skip --joblog, --dryrun, --verbose
9390 # NULL returnsize = 0 returnsize
9391 $self->returnsize() or $self->add_returnsize(0);
9392 if($Global::joblog and defined $self->{'exitstatus'}) {
9393 # Add to joblog when finished
9394 $self->print_joblog();
9395 # Printing is only relevant for grouped/--line-buffer output.
9396 $opt::ungroup and return;
9400 # Check for disk full
9401 ::exit_if_disk_full();
9404 my $returnsize = $self->returnsize();
9405 for my $fdno (sort { $a <=> $b } keys %Global::fd) {
9406 # Sort by file descriptor numerically: 1,2,3,..,9,10,11
9407 $fdno == 0 and next;
9408 my $out_fd = $Global::fd{$fdno};
9409 my $in_fh = $self->fh($fdno,"r");
9411 if(not $Job::file_descriptor_warning_printed{$fdno}++) {
9412 # ::warning("File descriptor $fdno not defined\n");
9416 ::debug("print", "File descriptor $fdno (", $self->fh($fdno,"name"), "):\n");
9417 if($opt::linebuffer) {
9418 # Line buffered print out
9419 $self->print_linebuffer($fdno,$in_fh,$out_fd);
9420 } elsif($opt::files) {
9421 $self->print_files($fdno,$in_fh,$out_fd);
9422 } elsif($opt::tag or defined $opt::tagstring) {
9423 $self->print_tag($fdno,$in_fh,$out_fd);
9425 $self->print_normal($fdno,$in_fh,$out_fd);
9429 ::debug("print", "<<joboutput\n");
9430 if(defined $self->{'exitstatus'}
9431 and not ($self->virgin() and $opt::pipe)) {
9432 if($Global::joblog and not $opt::sqlworker) {
9433 # Add to joblog when finished
9434 $self->print_joblog();
9436 if($opt::sqlworker and not $opt::results) {
9437 $Global::sql->output($self);
9439 if($Global::csvsep) {
9440 # Add output to CSV when finished
9444 return $returnsize - $self->returnsize();
9453 if($Global::verbose <= 1) {
9454 $cmd = $self->replaced();
9456 # Verbose level > 1: Print the rsync and stuff
9457 $cmd = join " ", @{$self->{'commandline'}};
9459 my $record_ref = $self->{'commandline'}{'arg_list_flat_orig'};
9461 if(not $header_printed) {
9464 # --header : => first value from column
9468 @V = (map { $Global::input_source_header{$i++} }
9469 @$record_ref[1..$#$record_ref]);
9472 @V = (map { $V++ } @$record_ref[1..$#$record_ref]);
9474 print $Global::csv_fh
9476 combine_ref("Seq", "Host", "Starttime", "JobRuntime",
9477 "Send", "Receive", "Exitval", "Signal", "Command",
9483 # Memory optimization: Overwrite with the joined output
9484 $self->{'output'}{1} = join("", @{$self->{'output'}{1}});
9485 $self->{'output'}{2} = join("", @{$self->{'output'}{2}});
9486 print $Global::csv_fh
9490 $self->sshlogin()->string(),
9491 $self->starttime(), sprintf("%0.3f",$self->runtime()),
9492 $self->transfersize(), $self->returnsize(),
9493 $self->exitstatus(), $self->exitsignal(), \$cmd,
9494 \@$record_ref[1..$#$record_ref],
9495 \$self->{'output'}{1},
9496 \$self->{'output'}{2})),"\n";
9500 sub combine_ref($) {
9501 # Inspired by Text::CSV_PP::_combine (by Makamaka Hannyaharamitu)
9503 my $sep = $Global::csvsep;
9508 for my $column (@part) {
9509 # Memory optimization: Content transferred as reference
9510 if(ref $column ne "SCALAR") {
9511 # Convert all columns to scalar references
9515 if(not defined $$column) {
9520 $must_be_quoted = 0;
9522 if($$column =~ s/$quot/$quot$quot/go){
9524 $must_be_quoted ||=1;
9526 if($$column =~ /[\s\Q$sep\E]/o){
9527 # Put quotes around if the column contains ,
9528 $must_be_quoted ||=1;
9531 $Global::use{"bytes"} ||= eval "use bytes; 1;";
9532 if ($$column =~ /\0/) {
9533 # Contains \0 => put quotes around
9534 $must_be_quoted ||=1;
9536 if($must_be_quoted){
9537 push @out, \$sep, \$quot, $column, \$quot;
9539 push @out, \$sep, $column;
9547 sub print_files($) {
9548 # Print the name of the file containing stdout on stdout
9551 # $opt::group = Print when job is done
9552 # $opt::linebuffer = Print ASAP
9555 my ($fdno,$in_fh,$out_fd) = @_;
9557 # If the job is dead: close printing fh. Needed for --compress
9558 close $self->fh($fdno,"w");
9559 if($? and $opt::compress) {
9560 ::error($opt::compress_program." failed.");
9561 $self->set_exitstatus(255);
9563 if($opt::compress) {
9564 # Kill the decompressor which will not be needed
9565 CORE::kill "TERM", $self->fh($fdno,"rpid");
9569 if($opt::pipe and $self->virgin()) {
9570 # Nothing was printed to this job:
9571 # cleanup unused tmp files because --files was set
9572 for my $fdno (1,2) {
9573 ::rm($self->fh($fdno,"name"));
9574 ::rm($self->fh($fdno,"unlink"));
9576 } elsif($fdno == 1 and $self->fh($fdno,"name")) {
9577 print $out_fd $self->tag(),$self->fh($fdno,"name"),"\n";
9578 if($Global::membuffer) {
9579 push @{$self->{'output'}{$fdno}},
9580 $self->tag(), $self->fh($fdno,"name");
9582 $self->add_returnsize(-s $self->fh($fdno,"name"));
9583 # Mark as printed - do not print again
9584 $self->set_fh($fdno,"name",undef);
9588 sub print_linebuffer($) {
9590 my ($fdno,$in_fh,$out_fd) = @_;
9591 if(defined $self->{'exitstatus'}) {
9592 # If the job is dead: close printing fh. Needed for --compress
9593 close $self->fh($fdno,"w");
9594 if($? and $opt::compress) {
9595 ::error($opt::compress_program." failed.");
9596 $self->set_exitstatus(255);
9598 if($opt::compress) {
9599 # Blocked reading in final round
9600 for my $fdno (1,2) {
9601 ::set_fh_blocking($self->fh($fdno,'r'));
9605 if(not $self->virgin()) {
9606 if($opt::files or ($opt::results and not $Global::csvsep)) {
9608 if($fdno == 1 and not $self->fh($fdno,"printed")) {
9609 print $out_fd $self->tag(),$self->fh($fdno,"name"),"\n";
9610 if($Global::membuffer) {
9611 push(@{$self->{'output'}{$fdno}}, $self->tag(),
9612 $self->fh($fdno,"name"));
9614 $self->set_fh($fdno,"printed",1);
9616 # No need for reading $in_fh, as it is from "cat >/dev/null"
9618 # Read halflines and print full lines
9619 my $outputlength = 0;
9620 my $halfline_ref = $self->{'halfline'}{$fdno};
9622 # 1310720 gives 1.2 GB/s
9623 # 131072 gives 0.9 GB/s
9624 while($rv = sysread($in_fh, $buf,1310720)) {
9625 $outputlength += $rv;
9627 # Treat both \n and \r as line end
9628 $i = (rindex($buf,"\n")+1) || (rindex($buf,"\r")+1);
9630 # One or more complete lines were found
9631 if($opt::tag or defined $opt::tagstring) {
9632 # Replace ^ with $tag within the full line
9633 my $tag = $self->tag();
9634 # TODO --recend that can be partially in @$halfline_ref
9635 substr($buf,0,$i-1) =~ s/(?<=[\n\r])/$tag/gm;
9636 # The length changed, so find the new ending pos
9637 $i = (rindex($buf,"\n")+1) || (rindex($buf,"\r")+1);
9638 unshift @$halfline_ref, $tag;
9640 # Print the partial line (halfline) and the last half
9641 print $out_fd @$halfline_ref, substr($buf,0,$i);
9642 # Buffer in memory for SQL and CSV-output
9643 if($Global::membuffer) {
9644 push(@{$self->{'output'}{$fdno}},
9645 @$halfline_ref, substr($buf,0,$i));
9647 # Remove the printed part by keeping the unprinted part
9648 @$halfline_ref = (substr($buf,$i));
9650 # No newline, so append to the halfline
9651 push @$halfline_ref, $buf;
9654 $self->add_returnsize($outputlength);
9656 if(defined $self->{'exitstatus'}) {
9657 if($opt::files or ($opt::results and not $Global::csvsep)) {
9658 $self->add_returnsize(-s $self->fh($fdno,"name"));
9660 # If the job is dead: print the remaining partial line
9662 my $halfline_ref = $self->{'halfline'}{$fdno};
9663 if(grep /./, @$halfline_ref) {
9665 for(@{$self->{'halfline'}{$fdno}}) {
9666 $returnsize += length $_;
9668 $self->add_returnsize($returnsize);
9669 if($opt::tag or defined $opt::tagstring) {
9670 # Prepend $tag the the remaining half line
9671 unshift @$halfline_ref, $self->tag();
9673 # Print the partial line (halfline)
9674 print $out_fd @{$self->{'halfline'}{$fdno}};
9675 # Buffer in memory for SQL and CSV-output
9676 if($Global::membuffer) {
9677 push(@{$self->{'output'}{$fdno}}, @$halfline_ref);
9679 @$halfline_ref = ();
9682 if($self->fh($fdno,"rpid") and
9683 CORE::kill 0, $self->fh($fdno,"rpid")) {
9684 # decompress still running
9686 # decompress done: close fh
9688 if($? and $opt::compress) {
9689 ::error($opt::decompress_program." failed.");
9690 $self->set_exitstatus(255);
9698 return print_normal(@_);
9701 sub free_ressources() {
9703 if(not $opt::ungroup) {
9704 for my $fdno (sort { $a <=> $b } keys %Global::fd) {
9705 close $self->fh($fdno,"w");
9706 close $self->fh($fdno,"r");
9711 sub print_normal($) {
9713 my ($fdno,$in_fh,$out_fd) = @_;
9715 close $self->fh($fdno,"w");
9716 if($? and $opt::compress) {
9717 ::error($opt::compress_program." failed.");
9718 $self->set_exitstatus(255);
9720 if(not $self->virgin()) {
9722 # $in_fh is now ready for reading at position 0
9723 my $outputlength = 0;
9726 if($opt::tag or $opt::tagstring) {
9729 my $tag = $self->tag();
9731 print $out_fd $tag,$_;
9732 $outputlength += length $_;
9733 if($Global::membuffer) {
9734 push @{$self->{'output'}{$fdno}}, $tag, $_;
9738 while(sysread($in_fh,$buf,131072)) {
9740 $outputlength += length $buf;
9741 if($Global::membuffer) {
9742 push @{$self->{'output'}{$fdno}}, $buf;
9747 $self->add_returnsize($outputlength);
9750 if($? and $opt::compress) {
9751 ::error($opt::decompress_program." failed.");
9752 $self->set_exitstatus(255);
9757 sub print_joblog($) {
9760 if($Global::verbose <= 1) {
9761 $cmd = $self->replaced();
9763 # Verbose level > 1: Print the rsync and stuff
9764 $cmd = join " ", @{$self->{'commandline'}};
9766 # Newlines make it hard to parse the joblog
9768 print $Global::joblog
9769 join("\t", $self->seq(), $self->sshlogin()->string(),
9770 $self->starttime(), sprintf("%10.3f",$self->runtime()),
9771 $self->transfersize(), $self->returnsize(),
9772 $self->exitstatus(), $self->exitsignal(), $cmd
9774 flush $Global::joblog;
9775 $self->set_job_in_joblog();
9780 if(not defined $self->{'tag'}) {
9781 if($opt::tag or defined $opt::tagstring) {
9782 $self->{'tag'} = $self->{'commandline'}->
9783 replace_placeholders([$opt::tagstring],0,0)."\t";
9785 $self->{'tag'} = "";
9788 return $self->{'tag'};
9793 if(not defined $self->{'hostgroups'}) {
9794 $self->{'hostgroups'} =
9795 $self->{'commandline'}->{'arg_list'}[0][0]->{'hostgroups'};
9797 return @{$self->{'hostgroups'}};
9802 return $self->{'exitstatus'};
9805 sub set_exitstatus($$) {
9807 my $exitstatus = shift;
9809 # Overwrite status if non-zero
9810 $self->{'exitstatus'} = $exitstatus;
9812 # Set status but do not overwrite
9813 # Status may have been set by --timeout
9814 $self->{'exitstatus'} ||= $exitstatus;
9817 $Global::sql->update("SET Exitval = ? WHERE Seq = ".$self->seq(),
9821 sub reset_exitstatus($) {
9823 undef $self->{'exitstatus'};
9828 return $self->{'exitsignal'};
9831 sub set_exitsignal($$) {
9833 my $exitsignal = shift;
9834 $self->{'exitsignal'} = $exitsignal;
9836 $Global::sql->update("SET _Signal = ? WHERE Seq = ".$self->seq(),
9844 sub should_we_halt {
9845 # Should we halt? Immediately? Gracefully?
9849 if($job->exitstatus() or $job->exitsignal()) {
9851 $Global::exitstatus++;
9852 $Global::total_failed++;
9853 if($Global::halt_fail) {
9854 ::status("$Global::progname: This job failed:",
9856 $limit = $Global::total_failed;
9858 } elsif($Global::halt_success) {
9859 ::status("$Global::progname: This job succeeded:",
9861 $limit = $Global::total_completed - $Global::total_failed;
9863 if($Global::halt_done) {
9864 ::status("$Global::progname: This job finished:",
9866 $limit = $Global::total_completed;
9868 if(not defined $limit) {
9871 # --halt # => 1..100 (number of jobs failed, 101 means > 100)
9872 # --halt % => 1..100 (pct of jobs failed)
9873 if($Global::halt_pct and not $Global::halt_count) {
9874 $total_jobs ||= $Global::JobQueue->total_jobs();
9875 # From the pct compute the number of jobs that must fail/succeed
9876 $Global::halt_count = $total_jobs * $Global::halt_pct;
9878 if($limit >= $Global::halt_count) {
9879 # At least N jobs have failed/succeded/completed
9880 # or at least N% have failed/succeded/completed
9881 # So we should prepare for exit
9882 if($Global::halt_fail or $Global::halt_done) {
9884 if(not defined $Global::halt_exitstatus) {
9885 if($Global::halt_pct) {
9886 # --halt now,fail=X% or soon,fail=X%
9887 # --halt now,done=X% or soon,done=X%
9888 $Global::halt_exitstatus =
9889 ::ceil($Global::total_failed / $total_jobs * 100);
9890 } elsif($Global::halt_count) {
9891 # --halt now,fail=X or soon,fail=X
9892 # --halt now,done=X or soon,done=X
9893 $Global::halt_exitstatus =
9894 ::min($Global::total_failed,101);
9896 if($Global::halt_count and $Global::halt_count == 1) {
9897 # --halt now,fail=1 or soon,fail=1
9898 # --halt now,done=1 or soon,done=1
9899 # Emulate Bash's +128 if there is a signal
9900 $Global::halt_exitstatus =
9903 $job->exitsignal() ? $job->exitsignal() + 128 : 0);
9906 ::debug("halt","Pct: ",$Global::halt_pct,
9907 " count: ",$Global::halt_count,
9908 " status: ",$Global::halt_exitstatus,"\n");
9909 } elsif($Global::halt_success) {
9910 $Global::halt_exitstatus = 0;
9912 if($Global::halt_when eq "soon"
9914 (scalar(keys %Global::running) > 0
9916 $Global::max_jobs_running == 1)) {
9918 ("$Global::progname: Starting no more jobs. ".
9919 "Waiting for ". (keys %Global::running).
9920 " jobs to finish.");
9921 $Global::start_no_new_jobs ||= 1;
9923 return($Global::halt_when);
9930 package CommandLine;
9935 my $commandref = shift;
9937 my $arg_queue = shift;
9938 my $context_replace = shift;
9939 my $max_number_of_args = shift; # for -N and normal (-n1)
9940 my $transfer_files = shift;
9941 my $return_files = shift;
9942 my $replacecount_ref = shift;
9943 my $len_ref = shift;
9944 my %replacecount = %$replacecount_ref;
9945 my %len = %$len_ref;
9946 for (keys %$replacecount_ref) {
9947 # Total length of this replacement string {} replaced with all args
9951 'command' => $commandref,
9955 'arg_list_flat' => [],
9956 'arg_list_flat_orig' => [undef],
9957 'arg_queue' => $arg_queue,
9958 'max_number_of_args' => $max_number_of_args,
9959 'replacecount' => \%replacecount,
9960 'context_replace' => $context_replace,
9961 'transfer_files' => $transfer_files,
9962 'return_files' => $return_files,
9963 'replaced' => undef,
9964 }, ref($class) || $class;
9969 return $self->{'seq'};
9974 $self->{'seq'} = shift;
9978 # Find the number of a free job slot and return it
9980 # @Global::slots - list with free jobslots
9982 # $jobslot = number of jobslot
9984 if(not $self->{'slot'}) {
9985 if(not @Global::slots) {
9986 # $max_slot_number will typically be $Global::max_jobs_running
9987 push @Global::slots, ++$Global::max_slot_number;
9989 $self->{'slot'} = shift @Global::slots;
9991 return $self->{'slot'};
9998 # Add arguments from arg_queue until the number of arguments or
9999 # max line length is reached
10001 # $Global::minimal_command_line_length
10004 # $Global::JobQueue
10007 # $Global::max_jobs_running
10011 my $max_len = $Global::minimal_command_line_length
10012 || Limits::Command::max_length();
10014 if($opt::cat or $opt::fifo) {
10015 # Get the empty arg added by --pipepart (if any)
10016 $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get();
10017 # $PARALLEL_TMP will point to a tempfile that will be used as {}
10018 $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->
10019 unget([Arg->new('$PARALLEL_TMP')]);
10021 while (not $self->{'arg_queue'}->empty()) {
10022 $next_arg = $self->{'arg_queue'}->get();
10023 if(not defined $next_arg) {
10026 $self->push($next_arg);
10027 if($self->len() >= $max_len) {
10028 # Command length is now > max_length
10029 # If there are arguments: remove the last
10030 # If there are no arguments: Error
10031 # TODO stuff about -x opt_x
10032 if($self->number_of_args() > 1) {
10033 # There is something to work on
10034 $self->{'arg_queue'}->unget($self->pop());
10037 my $args = join(" ", map { $_->orig() } @$next_arg);
10038 ::error("Command line too long (".
10039 $self->len(). " >= ".
10042 $self->{'arg_queue'}->arg_number().
10044 ((length $args > 50) ?
10045 (substr($args,0,50))."..." :
10047 $self->{'arg_queue'}->unget($self->pop());
10048 ::wait_and_exit(255);
10052 if(defined $self->{'max_number_of_args'}) {
10053 if($self->number_of_args() >= $self->{'max_number_of_args'}) {
10058 if(($opt::m or $opt::X) and not $already_spread
10059 and $self->{'arg_queue'}->empty() and $Global::max_jobs_running) {
10060 # -m or -X and EOF => Spread the arguments over all jobslots
10061 # (unless they are already spread)
10062 $already_spread ||= 1;
10063 if($self->number_of_args() > 1) {
10064 $self->{'max_number_of_args'} =
10065 ::ceil($self->number_of_args()/$Global::max_jobs_running);
10066 $Global::JobQueue->{'commandlinequeue'}->{'max_number_of_args'} =
10067 $self->{'max_number_of_args'};
10068 $self->{'arg_queue'}->unget($self->pop_all());
10069 while($self->number_of_args() < $self->{'max_number_of_args'}) {
10070 $self->push($self->{'arg_queue'}->get());
10073 $Global::JobQueue->flush_total_jobs();
10076 if($opt::sqlmaster) {
10077 # Insert the V1..Vn for this $seq in SQL table instead of generating one
10078 $Global::sql->insert_records($self->seq(), $self->{'command'},
10079 $self->{'arg_list_flat_orig'});
10085 # Add one or more records as arguments
10088 my $record = shift;
10089 push @{$self->{'arg_list_flat_orig'}}, map { $_->orig() } @$record;
10090 push @{$self->{'arg_list_flat'}}, @$record;
10091 push @{$self->{'arg_list'}}, $record;
10092 # Make @arg available for {= =}
10093 *Arg::arg = $self->{'arg_list_flat_orig'};
10095 my $quote_arg = $Global::noquote ? 0 : not $Global::quoting;
10096 for my $perlexpr (keys %{$self->{'replacecount'}}) {
10097 if($perlexpr =~ /^(\d+) /) {
10099 defined($record->[$1-1]) or next;
10100 $self->{'len'}{$perlexpr} +=
10101 length $record->[$1-1]->replace($perlexpr,$quote_arg,$self);
10103 for my $arg (@$record) {
10105 $self->{'len'}{$perlexpr} +=
10106 length $arg->replace($perlexpr,$quote_arg,$self);
10114 # Remove last argument
10118 my $record = pop @{$self->{'arg_list'}};
10119 # pop off arguments from @$record
10120 splice @{$self->{'arg_list_flat_orig'}}, -($#$record+1), $#$record+1;
10121 splice @{$self->{'arg_list_flat'}}, -($#$record+1), $#$record+1;
10122 my $quote_arg = $Global::noquote ? 0 : not $Global::quoting;
10123 for my $perlexpr (keys %{$self->{'replacecount'}}) {
10124 if($perlexpr =~ /^(\d+) /) {
10126 defined($record->[$1-1]) or next;
10127 $self->{'len'}{$perlexpr} -=
10128 length $record->[$1-1]->replace($perlexpr,$quote_arg,$self);
10130 for my $arg (@$record) {
10132 $self->{'len'}{$perlexpr} -=
10133 length $arg->replace($perlexpr,$quote_arg,$self);
10142 # Remove all arguments and zeros the length of replacement perlexpr
10146 my @popped = @{$self->{'arg_list'}};
10147 for my $perlexpr (keys %{$self->{'replacecount'}}) {
10148 $self->{'len'}{$perlexpr} = 0;
10150 $self->{'arg_list'} = [];
10151 $self->{'arg_list_flat_orig'} = [undef];
10152 $self->{'arg_list_flat'} = [];
10156 sub number_of_args($) {
10157 # The number of records
10159 # number of records
10161 # This is really the number of records
10162 return $#{$self->{'arg_list'}}+1;
10165 sub number_of_recargs($) {
10166 # The number of args in records
10168 # number of args records
10171 my $nrec = scalar @{$self->{'arg_list'}};
10173 $sum = $nrec * (scalar @{$self->{'arg_list'}[0]});
10178 sub args_as_string($) {
10180 # all unmodified arguments joined with ' ' (similar to {})
10182 return (join " ", map { $_->orig() }
10183 map { @$_ } @{$self->{'arg_list'}});
10186 sub results_out($) {
10187 sub max_file_name_length {
10188 # Figure out the max length of a subdir
10189 # TODO and the max total length
10190 # Ext4 = 255,130816
10192 # $Global::max_file_length is set
10194 # $Global::max_file_length
10195 my $testdir = shift;
10197 my $upper = 100_000_000;
10198 # Dir length of 8 chars is supported everywhere
10200 my $dir = "x"x$len;
10202 rmdir($testdir."/".$dir);
10205 } while ($len < $upper and mkdir $testdir."/".$dir);
10206 # Then search for the actual max length between $len/16 and $len
10209 while($max-$min > 5) {
10210 # If we are within 5 chars of the exact value:
10211 # it is not worth the extra time to find the exact value
10212 my $test = int(($min+$max)/2);
10214 if(mkdir $testdir."/".$dir) {
10215 rmdir($testdir."/".$dir);
10221 $Global::max_file_length = $min;
10226 my $out = $self->replace_placeholders([$opt::results],0,0);
10227 if($out eq $opt::results) {
10228 # $opt::results simple string: Append args_as_dirname
10229 my $args_as_dirname = $self->args_as_dirname();
10230 # Output in: prefix/name1/val1/name2/val2/stdout
10231 $out = $opt::results."/".$args_as_dirname;
10232 if(-d $out or eval{ File::Path::mkpath($out); }) {
10235 # mkpath failed: Argument probably too long.
10236 # Set $Global::max_file_length, which will keep the individual
10237 # dir names shorter than the max length
10238 max_file_name_length($opt::results);
10239 $args_as_dirname = $self->args_as_dirname();
10240 # prefix/name1/val1/name2/val2/
10241 $out = $opt::results."/".$args_as_dirname;
10242 File::Path::mkpath($out);
10246 if($out =~ m:/$:) {
10248 if(-d $out or eval{ File::Path::mkpath($out); }) {
10251 ::error("Cannot make dir '$out'.");
10252 ::wait_and_exit(255);
10256 File::Path::mkpath($1);
10262 sub args_as_dirname($) {
10264 # all unmodified arguments joined with '/' (similar to {})
10265 # \t \0 \\ and / are quoted as: \t \0 \\ \_
10266 # If $Global::max_file_length: Keep subdirs < $Global::max_file_length
10270 for my $rec_ref (@{$self->{'arg_list'}}) {
10271 # If headers are used, sort by them.
10272 # Otherwise keep the order from the command line.
10273 my @header_indexes_sorted = header_indexes_sorted($#$rec_ref+1);
10274 for my $n (@header_indexes_sorted) {
10276 $Global::input_source_header{$n},
10278 # \t \0 \\ and / are quoted as: \t \0 \\ \_
10283 if($Global::max_file_length) {
10284 # Keep each subdir shorter than the longest
10285 # allowed file name
10286 $s = substr($s,0,$Global::max_file_length);
10289 $rec_ref->[$n-1]->orig());
10292 return join "/", @res;
10295 sub header_indexes_sorted($) {
10296 # Sort headers first by number then by name.
10297 # E.g.: 1a 1b 11a 11b
10299 # Indexes of %Global::input_source_header sorted
10300 my $max_col = shift;
10302 no warnings 'numeric';
10303 for my $col (1 .. $max_col) {
10304 # Make sure the header is defined. If it is not: use column number
10305 if(not defined $Global::input_source_header{$col}) {
10306 $Global::input_source_header{$col} = $col;
10309 my @header_indexes_sorted = sort {
10310 # Sort headers numerically then asciibetically
10311 $Global::input_source_header{$a} <=> $Global::input_source_header{$b}
10313 $Global::input_source_header{$a} cmp $Global::input_source_header{$b}
10315 return @header_indexes_sorted;
10321 # The length of the command line with args substituted
10324 # Add length of the original command with no args
10325 # Length of command w/ all replacement args removed
10326 $len += $self->{'len'}{'noncontext'} + @{$self->{'command'}} -1;
10327 ::debug("length", "noncontext + command: $len\n");
10328 my $recargs = $self->number_of_recargs();
10329 if($self->{'context_replace'}) {
10330 # Context is duplicated for each arg
10331 $len += $recargs * $self->{'len'}{'context'};
10332 for my $replstring (keys %{$self->{'replacecount'}}) {
10333 # If the replacements string is more than once: mulitply its length
10334 $len += $self->{'len'}{$replstring} *
10335 $self->{'replacecount'}{$replstring};
10336 ::debug("length", $replstring, " ", $self->{'len'}{$replstring}, "*",
10337 $self->{'replacecount'}{$replstring}, "\n");
10339 # echo 11 22 33 44 55 66 77 88 99 1010
10340 # echo 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10
10342 ::debug("length", "Ctxgrp: ", $self->{'len'}{'contextgroups'},
10343 " Groups: ", $self->{'len'}{'noncontextgroups'}, "\n");
10344 # Add space between context groups
10345 $len += ($recargs-1) * ($self->{'len'}{'contextgroups'});
10347 # Each replacement string may occur several times
10348 # Add the length for each time
10349 $len += 1*$self->{'len'}{'context'};
10350 ::debug("length", "context+noncontext + command: $len\n");
10351 for my $replstring (keys %{$self->{'replacecount'}}) {
10352 # (space between regargs + length of replacement)
10353 # * number this replacement is used
10354 $len += ($recargs -1 + $self->{'len'}{$replstring}) *
10355 $self->{'replacecount'}{$replstring};
10358 if(defined $Global::parallel_env) {
10359 # If we are using --env, add the prefix for that, too.
10360 $len += length $Global::parallel_env;
10362 if($Global::quoting) {
10363 # Pessimistic length if -q is set
10364 # Worse than worst case: ' => "'" + " => '"'
10365 # TODO can we count the number of expanding chars?
10366 # and count them in arguments, too?
10369 if(@opt::shellquote) {
10370 # Pessimistic length if --shellquote is set
10371 # Worse than worst case: ' => "'"
10372 for(@opt::shellquote) {
10377 if(@opt::sshlogin) {
10378 # Pessimistic length if remote
10379 # Worst case is BASE64 encoding 3 bytes -> 4 bytes
10380 $len = int($len*4/3);
10391 # $replaced = command with place holders replaced and prepended
10393 if(not defined $self->{'replaced'}) {
10394 # Don't quote arguments if the input is the full command line
10395 my $quote_arg = $Global::noquote ? 0 : not $Global::quoting;
10396 # or if ($opt::cat or $opt::pipe) as they use $PARALLEL_TMP
10397 $quote_arg = ($opt::cat || $opt::fifo) ? 0 : $quote_arg;
10398 $self->{'replaced'} = $self->
10399 replace_placeholders($self->{'command'},$Global::quoting,
10401 my $len = length $self->{'replaced'};
10402 if ($len != $self->len()) {
10403 ::debug("length", $len, " != ", $self->len(),
10404 " ", $self->{'replaced'}, "\n");
10406 ::debug("length", $len, " == ", $self->len(),
10407 " ", $self->{'replaced'}, "\n");
10410 return $self->{'replaced'};
10413 sub replace_placeholders($$$$) {
10414 # Replace foo{}bar with fooargbar
10416 # $targetref = command as shell words
10417 # $quote = should everything be quoted?
10418 # $quote_arg = should replaced arguments be quoted?
10420 # @Arg::arg = arguments as strings to be use in {= =}
10422 # @target with placeholders replaced
10424 my $targetref = shift;
10426 my $quote_arg = shift;
10429 # Token description:
10430 # \0spc = unquoted space
10431 # \0end = last token element
10432 # \0ign = dummy token to be ignored
10433 # \257<...\257> = replacement expression
10434 # " " = quoted space, that splits -X group
10435 # text = normal text - possibly part of -X group
10437 my @tokens = grep { length $_ > 0 } map {
10439 # \257<...\257> or space
10442 # Split each space/tab into a token
10443 split /(?=\s)|(?<=\s)/
10446 # Split \257< ... \257> into own token
10447 map { split /(?=\257<)|(?<=\257>)/ }
10448 # Insert "\0spc" between every element
10449 # This space should never be quoted
10450 map { $spacer++ ? ("\0spc",$_) : $_ }
10451 map { $_ eq "" ? "\0empty" : $_ }
10455 # @tokens is empty: Return empty array
10458 ::debug("replace", "Tokens ".join":",@tokens,"\n");
10459 # Make it possible to use $arg[2] in {= =}
10460 *Arg::arg = $self->{'arg_list_flat_orig'};
10462 # $self->{'arg_list'} = [ [Arg11, Arg12], [Arg21, Arg22], [Arg31, Arg32] ]
10463 # $self->{'arg_list_flat'} = [ Arg11, Arg12, Arg21, Arg22, Arg31, Arg32 ]
10464 if(not @{$self->{'arg_list_flat'}}) {
10465 @{$self->{'arg_list_flat'}} = Arg->new("");
10467 my $argref = $self->{'arg_list_flat'};
10468 # Number of arguments - used for positional arguments
10469 my $n = $#$argref+1;
10471 # $self is actually a CommandLine-object,
10472 # but it looks nice to be able to say {= $job->slot() =}
10474 # @replaced = tokens with \257< \257> replaced
10476 if($self->{'context_replace'}) {
10478 for my $t (@tokens,"\0end") {
10479 # \0end = last token was end of tokens.
10480 if($t eq "\t" or $t eq " " or $t eq "\0end" or $t eq "\0spc") {
10481 # Context group complete: Replace in it
10482 if(grep { /^\257</ } @ctxgroup) {
10483 # Context group contains a replacement string:
10484 # Copy once per arg
10485 my $space = "\0ign";
10486 for my $arg (@$argref) {
10487 my $normal_replace;
10489 # Put unquoted space before each context group
10491 CORE::push @replaced, $space, map {
10494 s{\257<(-?\d+)?(.*)\257>}
10497 # Positional replace
10498 # Find the relevant arg and replace it
10499 ($argref->[$1 > 0 ? $1-1 : $n+$1] ? # If defined: replace
10500 $argref->[$1 > 0 ? $1-1 : $n+$1]->
10501 replace($2,$quote_arg,$self)
10505 $normal_replace ||= 1;
10506 ($arg ? $arg->replace($2,$quote_arg,$self) : "");
10511 $normal_replace or last;
10515 # Context group has no a replacement string: Copy it once
10516 CORE::push @replaced, @ctxgroup;
10518 # New context group
10521 if($t eq "\0spc" or $t eq " ") {
10522 CORE::push @replaced,$t;
10524 CORE::push @ctxgroup,$t;
10531 # repquote = no if {} first on line, no if $quote, yes otherwise
10532 for my $t (@tokens) {
10533 if($t =~ /^\257</) {
10534 my $space = "\0ign";
10535 for my $arg (@$argref) {
10536 my $normal_replace;
10539 s{\257<(-?\d+)?(.*)\257>}
10542 # Positional replace
10543 # Find the relevant arg and replace it
10544 ($argref->[$1 > 0 ? $1-1 : $n+$1] ?
10545 # If defined: replace
10546 $argref->[$1 > 0 ? $1-1 : $n+$1]->
10547 replace($2,$quote_arg,$self)
10551 $normal_replace ||= 1;
10552 ($arg ? $arg->replace($2,$quote_arg,$self) : "");
10555 CORE::push @replaced, $space, $a;
10556 $normal_replace or last;
10561 CORE::push @replaced, $t;
10566 ::debug("replace","Replaced: ".join":",@replaced,"\n");
10567 if($Global::escape_string_present) {
10568 # Command line contains \257: Unescape it \257\256 => \257
10569 # If a replacement resulted in \257\256
10570 # it will have been escaped into \\\257\\\\256
10571 # and will not be matched below
10577 # Put tokens into groups that may be quoted.
10580 for (map { $_ eq "\0empty" ? "" : $_ }
10581 grep { $_ ne "\0ign" and $_ ne "\0noarg" and $_ ne "'\0noarg'" }
10582 @replaced, "\0end") {
10583 if($_ eq "\0spc" or $_ eq "\0end") {
10584 # \0spc splits quotable groups
10587 CORE::push @quoted, ::Q(join"",@quotegroup);;
10590 CORE::push @quoted, join"",@quotegroup;
10594 CORE::push @quotegroup, $_;
10597 ::debug("replace","Quoted: ".join":",@quoted,"\n");
10598 return wantarray ? @quoted : "@quoted";
10604 $self->{'skip'} = 1;
10608 package CommandLineQueue;
10612 my $commandref = shift;
10613 my $read_from = shift;
10614 my $context_replace = shift || 0;
10615 my $max_number_of_args = shift;
10616 my $transfer_files = shift;
10617 my $return_files = shift;
10620 my ($replacecount_ref, $len_ref);
10621 my @command = @$commandref;
10623 # Replace replacement strings with {= perl expr =}
10624 # '{=' 'perlexpr' '=}' => '{= perlexpr =}'
10625 @command = merge_rpl_parts(@command);
10627 # Protect matching inside {= perl expr =}
10628 # by replacing {= and =} with \257< and \257>
10629 # in options that can contain replacement strings:
10630 # @command, --transferfile, --return,
10631 # --tagstring, --workdir, --results
10632 for(@command, @$transfer_files, @$return_files,
10633 $opt::tagstring, $opt::workdir, $opt::results, $opt::retries) {
10634 # Skip if undefined
10636 # Escape \257 => \257\256
10637 $Global::escape_string_present += s/\257/\257\256/g;
10638 # Needs to match rightmost left parens (Perl defaults to leftmost)
10639 # to deal with: {={==} and {={==}=}
10640 # Replace {= -> \257< and =} -> \257>
10642 # Complex way to do:
10643 # s/{=(.*)=}/\257<$1\257>/g
10644 # which would not work
10645 s[\Q$Global::parensleft\E # Match {=
10646 # Match . unless the next string is {= or =}
10647 # needed to force matching the shortest {= =}
10648 ((?:(?! \Q$Global::parensleft\E|\Q$Global::parensright\E ).)*?)
10649 \Q$Global::parensright\E ] # Match =}
10651 for my $rpl (sort { length $b <=> length $a } keys %Global::rpl) {
10652 # Replace long --rpl's before short ones, as a short may be a
10653 # substring of a long:
10654 # --rpl '% s/a/b/' --rpl '%% s/b/a/'
10656 # Replace the shorthand string (--rpl)
10657 # with the {= perl expr =}
10659 # Avoid searching for shorthand strings inside existing {= perl expr =}
10661 # Replace $$1 in {= perl expr =} with groupings in shorthand string
10663 # --rpl '{/(\.\S+)/(\.\S+)} s/$$1/$$2/g;'
10664 # echo {/.tar/.gz} ::: UU.tar.gz
10665 my ($prefix,$grp_regexp,$postfix) =
10666 $rpl =~ /^( [^(]* ) # Prefix - e.g. {%%
10667 ( \(.*\) )? # Group capture regexp - e.g (.*)
10668 ( [^)]* )$ # Postfix - e.g }
10670 $grp_regexp ||= '';
10671 my $rplval = $Global::rpl{$rpl};
10672 while(s{( (?: ^|\257> ) (?: [^\257]*|[\257][^<>] )*? )
10673 # Don't replace after \257 unless \257>
10674 \Q$prefix\E $grp_regexp \Q$postfix\E}
10676 # The start remains the same
10677 my $unchanged = $1;
10678 # Dummy entry to start at 1.
10680 # $2 = first ()-group in $grp_regexp
10681 # Put $2 in $grp[1], Put $3 in $grp[2]
10682 # so first ()-group in $grp_regexp is $grp[1];
10683 for(my $i = 2; defined $grp[$#grp]; $i++) {
10684 push @grp, eval '$'.$i;
10687 # replace $$1 with $_pAr_gRp1, $$2 with $_pAr_gRp2
10688 # in the code to be executed
10689 $rv =~ s/\$\$ (\d+)/\$_pAr_gRp$1/gx;
10690 # prepend with $_pAr_gRp1 = perlquote($1),
10692 for(my $i = 1;defined $grp[$i]; $i++) {
10693 $set_args .= "\$_pAr_gRp$i = \"" .
10694 ::perl_quote_scalar($grp[$i]) . "\";";
10696 $unchanged . "\257<" . $set_args . $rv . "\257>"
10699 # Do the same for the positional replacement strings
10701 if($posrpl =~ s/^\{//) {
10702 # Only do this if the shorthand start with {
10704 # Don't replace after \257 unless \257>
10705 while(s{( (?: ^|\257> ) (?: [^\257]*|[\257][^<>] )*? )
10706 \{(-?\d+) \s* \Q$prefix\E $grp_regexp \Q$postfix\E}
10708 # The start remains the same
10709 my $unchanged = $1;
10711 # Dummy entry to start at 1.
10713 # $3 = first ()-group in $grp_regexp
10714 # Put $3 in $grp[1], Put $4 in $grp[2]
10715 # so first ()-group in $grp_regexp is $grp[1];
10716 for(my $i = 3; defined $grp[$#grp]; $i++) {
10717 push @grp, eval '$'.$i;
10720 # replace $$1 with $_pAr_gRp1, $$2 with $_pAr_gRp2
10721 # in the code to be executed
10722 $rv =~ s/\$\$ (\d+)/\$_pAr_gRp$1/gx;
10723 # prepend with $_pAr_gRp1 = perlquote($1),
10725 for(my $i = 1;defined $grp[$i]; $i++) {
10726 $set_args .= "\$_pAr_gRp$i = \"" .
10727 ::perl_quote_scalar($grp[$i]) . "\";";
10729 $unchanged . "\257<" . $position . $set_args . $rv . "\257>"
10736 # Add {} if no replacement strings in @command
10737 ($replacecount_ref, $len_ref, @command) =
10738 replacement_counts_and_lengths($transfer_files,$return_files,@command);
10739 if("@command" =~ /^[^ \t\n=]*\257</) {
10740 # Replacement string is (part of) the command (and not just
10741 # argument or variable definition V1={})
10742 # E.g. parallel {}, parallel my_{= s/_//=}, parallel {2}
10743 # Do no quote (Otherwise it will fail if the input contains spaces)
10744 $Global::noquote = 1;
10747 if($opt::sqlmaster and $Global::sql->append()) {
10748 $seq = $Global::sql->max_seq() + 1;
10752 'unget' => \@unget,
10753 'command' => \@command,
10754 'replacecount' => $replacecount_ref,
10755 'arg_queue' => RecordQueue->new($read_from,$opt::colsep),
10756 'context_replace' => $context_replace,
10758 'max_number_of_args' => $max_number_of_args,
10760 'transfer_files' => $transfer_files,
10761 'return_files' => $return_files,
10763 }, ref($class) || $class;
10766 sub merge_rpl_parts($) {
10767 # '{=' 'perlexpr' '=}' => '{= perlexpr =}'
10769 # @in = the @command as given by the user
10771 # $Global::parensleft
10772 # $Global::parensright
10774 # @command with parts merged to keep {= and =} as one
10777 my $l = quotemeta($Global::parensleft);
10778 my $r = quotemeta($Global::parensright);
10783 # Remove matching (right most) parens
10784 while(s/(.*)$l.*?$r/$1/os) {}
10786 # Missing right parens
10788 $s .= " ".shift @in;
10790 while(s/(.*)$l.*?$r/$1/os) {}
10801 sub replacement_counts_and_lengths($$@) {
10802 # Count the number of different replacement strings.
10803 # Find the lengths of context for context groups and non-context
10805 # If no {} found in @command: add it to @command
10808 # \@transfer_files = array of filenames to transfer
10809 # \@return_files = array of filenames to return
10810 # @command = command template
10812 # \%replacecount, \%len, @command
10813 my $transfer_files = shift;
10814 my $return_files = shift;
10816 my (%replacecount,%len);
10819 # Count how many times each replacement string is used
10820 my @cmd = @command;
10821 my $contextlen = 0;
10822 my $noncontextlen = 0;
10823 my $contextgroups = 0;
10825 while($c =~ s/ \257<( (?: [^\257]*|[\257][^<>] )*?)\257> /\000/xs) {
10826 # %replacecount = { "perlexpr" => number of times seen }
10827 # e.g { "s/a/b/" => 2 }
10828 $replacecount{$1}++;
10831 # Measure the length of the context around the {= perl expr =}
10832 # Use that {=...=} has been replaced with \000 above
10833 # So there is no need to deal with \257<
10834 while($c =~ s/ (\S*\000\S*) //xs) {
10836 $w =~ tr/\000//d; # Remove all \000's
10837 $contextlen += length($w);
10840 # All {= perl expr =} have been removed: The rest is non-context
10841 $noncontextlen += length $c;
10843 for(@$transfer_files, @$return_files,
10844 $opt::tagstring, $opt::workdir, $opt::results, $opt::retries) {
10845 # Options that can contain replacement strings
10848 while($t =~ s/ \257<( (?: [^\257]*|[\257][^<>] )* )\257> //xs) {
10849 # %replacecount = { "perlexpr" => number of times seen }
10850 # e.g { "$_++" => 2 }
10851 # But for tagstring we just need to mark it as seen
10852 $replacecount{$1} ||= 1;
10856 # If the command does not contain {} force it to be computed
10857 # as it is being used by --bar
10858 $replacecount{""} ||= 1;
10861 $len{'context'} = 0+$contextlen;
10862 $len{'noncontext'} = $noncontextlen;
10863 $len{'contextgroups'} = $contextgroups;
10864 $len{'noncontextgroups'} = @cmd-$contextgroups;
10865 ::debug("length", "@command Context: ", $len{'context'},
10866 " Non: ", $len{'noncontext'}, " Ctxgrp: ", $len{'contextgroups'},
10867 " NonCtxGrp: ", $len{'noncontextgroups'}, "\n");
10870 # Default command = {}
10871 @command = ("\257<\257>");
10872 } elsif(($opt::pipe or $opt::pipepart)
10873 and not $opt::fifo and not $opt::cat) {
10874 # With --pipe / --pipe-part you can have no replacement
10877 # Append {} to the command if there are no {...}'s and no {=...=}
10878 push @command, ("\257<\257>");
10882 return(\%replacecount,\%len,@command);
10887 if(@{$self->{'unget'}}) {
10888 my $cmd_line = shift @{$self->{'unget'}};
10889 return ($cmd_line);
10891 if($opt::sqlworker) {
10892 # Get the sequence number from the SQL table
10893 $self->set_seq($SQL::next_seq);
10894 # Get the command from the SQL table
10895 $self->{'command'} = $SQL::command_ref;
10897 # Recompute replace counts based on the read command
10898 ($self->{'replacecount'},
10899 $self->{'len'}, @command) =
10900 replacement_counts_and_lengths($self->{'transfer_files'},
10901 $self->{'return_files'},
10902 @$SQL::command_ref);
10903 if("@command" =~ /^[^ \t\n=]*\257</) {
10904 # Replacement string is (part of) the command (and not just
10905 # argument or variable definition V1={})
10906 # E.g. parallel {}, parallel my_{= s/_//=}, parallel {2}
10907 # Do no quote (Otherwise it will fail if the input contains spaces)
10908 $Global::noquote = 1;
10912 my $cmd_line = CommandLine->new($self->seq(),
10913 $self->{'command'},
10914 $self->{'arg_queue'},
10915 $self->{'context_replace'},
10916 $self->{'max_number_of_args'},
10917 $self->{'transfer_files'},
10918 $self->{'return_files'},
10919 $self->{'replacecount'},
10922 $cmd_line->populate();
10923 ::debug("run","cmd_line->number_of_args ",
10924 $cmd_line->number_of_args(), "\n");
10925 if(not $Global::no_more_input and ($opt::pipe or $opt::pipepart)) {
10926 if($cmd_line->replaced() eq "") {
10927 # Empty command - pipe requires a command
10928 ::error("--pipe/--pipepart must have a command to pipe into ".
10930 ::wait_and_exit(255);
10932 } elsif($cmd_line->number_of_args() == 0) {
10933 # We did not get more args - maybe at EOF string?
10936 $self->set_seq($self->seq()+1);
10943 unshift @{$self->{'unget'}}, @_;
10948 my $empty = (not @{$self->{'unget'}}) &&
10949 $self->{'arg_queue'}->empty();
10950 ::debug("run", "CommandLineQueue->empty $empty");
10956 return $self->{'seq'};
10961 $self->{'seq'} = shift;
10964 sub quote_args($) {
10966 # If there is not command emulate |bash
10967 return $self->{'command'};
10971 package Limits::Command;
10973 # Maximal command line length (for -m and -X)
10974 sub max_length($) {
10975 # Find the max_length of a command line and cache it
10977 # number of chars on the longest command line allowed
10978 if(not $Limits::Command::line_max_len) {
10979 # Disk cache of max command line length
10980 my $len_cache = $Global::cache_dir . "/tmp/sshlogin/" . ::hostname() .
10983 if(-e $len_cache) {
10984 open(my $fh, "<", $len_cache) || ::die_bug("Cannot read $len_cache");
10985 $cached_limit = <$fh>;
10988 $cached_limit = real_max_length();
10989 # If $HOME is write protected: Do not fail
10990 my $dir = ::dirname($len_cache);
10991 -d $dir or eval { File::Path::mkpath($dir); };
10992 open(my $fh, ">", $len_cache);
10993 print $fh $cached_limit;
10996 $Limits::Command::line_max_len = tmux_length($cached_limit);
10997 if($opt::max_chars) {
10998 if($opt::max_chars <= $cached_limit) {
10999 $Limits::Command::line_max_len = $opt::max_chars;
11001 ::warning("Value for -s option should be < $cached_limit.");
11005 return int($Limits::Command::line_max_len);
11008 sub real_max_length($) {
11009 # Find the max_length of a command line
11011 # The maximal command line length
11012 # Use an upper bound of 100 MB if the shell allows for infinite long lengths
11013 my $upper = 100_000_000;
11014 # 1000 is supported everywhere, so the search can start anywhere 1..999
11015 # 324 makes the search much faster on CygWin, so let us use that
11018 if($len > $upper) { return $len };
11020 } while (is_acceptable_command_line_length($len));
11021 # Then search for the actual max length between 0 and upper bound
11022 return binary_find_max_length(int($len/16),$len);
11025 # Prototype forwarding
11026 sub binary_find_max_length($$);
11027 sub binary_find_max_length($$) {
11028 # Given a lower and upper bound find the max_length of a command line
11030 # number of chars on the longest command line allowed
11031 my ($lower, $upper) = (@_);
11032 if($lower == $upper or $lower == $upper-1) { return $lower; }
11033 my $middle = int (($upper-$lower)/2 + $lower);
11034 ::debug("init", "Maxlen: $lower<$middle<$upper: ");
11035 if (is_acceptable_command_line_length($middle)) {
11036 return binary_find_max_length($middle,$upper);
11038 return binary_find_max_length($lower,$middle);
11042 sub is_acceptable_command_line_length($) {
11043 # Test if a command line of this length can run
11044 # in the current environment
11046 # 0 if the command line length is too long
11049 if($Global::parallel_env) {
11050 $len += length $Global::parallel_env;
11052 ::qqx("true "."x"x$len);
11053 ::debug("init", "$len=$? ");
11057 sub tmux_length($) {
11058 # If $opt::tmux set, find the limit for tmux
11059 # tmux 1.8 has a 2kB limit
11060 # tmux 1.9 has a 16kB limit
11061 # tmux 2.0 has a 16kB limit
11062 # tmux 2.1 has a 16kB limit
11063 # tmux 2.2 has a 16kB limit
11065 # $len = maximal command line length
11067 # $tmux_len = maximal length runable in tmux
11071 $ENV{'PARALLEL_TMUX'} ||= "tmux";
11072 if(not ::which($ENV{'PARALLEL_TMUX'})) {
11073 ::error($ENV{'PARALLEL_TMUX'}." not found in \$PATH.");
11074 ::wait_and_exit(255);
11077 for my $l (1, 2020, 16320, 100000, $len) {
11078 my $tmpfile = ::tmpname("tms");
11079 my $tmuxcmd = $ENV{'PARALLEL_TMUX'}.
11080 " -S $tmpfile new-session -d -n echo $l".
11081 ("x"x$l). " && echo $l; rm -f $tmpfile";
11082 push @out, ::qqx($tmuxcmd);
11085 ::debug("tmux","tmux-out ",@out);
11087 # The arguments is given 3 times on the command line
11088 # and the wrapping is around 30 chars
11089 # (29 for tmux1.9, 33 for tmux1.8)
11090 my $tmux_len = ::max(@out);
11091 $len = ::min($len,int($tmux_len/4-33));
11092 ::debug("tmux","tmux-length ",$len);
11098 package RecordQueue;
11103 my $colsep = shift;
11106 if($opt::sqlworker) {
11108 $arg_sub_queue = SQLRecordQueue->new();
11109 } elsif(defined $colsep) {
11110 # Open one file with colsep or CSV
11111 $arg_sub_queue = RecordColQueue->new($fhs);
11113 # Open one or more files if multiple -a
11114 $arg_sub_queue = MultifileQueue->new($fhs);
11117 'unget' => \@unget,
11119 'arg_sub_queue' => $arg_sub_queue,
11120 }, ref($class) || $class;
11125 # reference to array of Arg-objects
11127 if(@{$self->{'unget'}}) {
11128 $self->{'arg_number'}++;
11129 # Flush cached computed replacements in Arg-objects
11130 # To fix: parallel --bar echo {%} ::: a b c ::: d e f
11131 my $ret = shift @{$self->{'unget'}};
11133 map { $_->flush_cache() } @$ret;
11137 my $ret = $self->{'arg_sub_queue'}->get();
11139 if(grep { index($_->orig(),"\0") > 0 } @$ret) {
11140 # Allow for \0 in position 0 because GNU Parallel uses "\0noarg"
11141 # to mean no-string
11142 ::warning("A NUL character in the input was replaced with \\0.",
11143 "NUL cannot be passed through in the argument list.",
11144 "Did you mean to use the --null option?");
11145 for(grep { index($_->orig(),"\0") > 0 } @$ret) {
11146 # Replace \0 with \\0
11147 my $a = $_->orig();
11152 if(defined $Global::max_number_of_args
11153 and $Global::max_number_of_args == 0) {
11154 ::debug("run", "Read 1 but return 0 args\n");
11155 # \0noarg => nothing (not the empty string)
11156 map { $_->set_orig("\0noarg"); } @$ret;
11158 # Flush cached computed replacements in Arg-objects
11159 # To fix: parallel --bar echo {%} ::: a b c ::: d e f
11160 map { $_->flush_cache() } @$ret;
11167 ::debug("run", "RecordQueue-unget\n");
11168 $self->{'arg_number'} -= @_;
11169 unshift @{$self->{'unget'}}, @_;
11174 my $empty = (not @{$self->{'unget'}}) &&
11175 $self->{'arg_sub_queue'}->empty();
11176 ::debug("run", "RecordQueue->empty $empty");
11180 sub arg_number($) {
11182 return $self->{'arg_number'};
11186 package RecordColQueue;
11192 my $arg_sub_queue = MultifileQueue->new($fhs);
11194 'unget' => \@unget,
11195 'arg_sub_queue' => $arg_sub_queue,
11196 }, ref($class) || $class;
11201 # reference to array of Arg-objects
11203 if(@{$self->{'unget'}}) {
11204 return shift @{$self->{'unget'}};
11206 my $unget_ref = $self->{'unget'};
11207 if($self->{'arg_sub_queue'}->empty()) {
11210 my $in_record = $self->{'arg_sub_queue'}->get();
11211 if(defined $in_record) {
11212 my @out_record = ();
11213 for my $arg (@$in_record) {
11214 ::debug("run", "RecordColQueue::arg $arg\n");
11215 my $line = $arg->orig();
11216 ::debug("run", "line='$line'\n");
11221 if(not $Global::csv->parse($line)) {
11222 die "CSV has unexpected format: ^$line^";
11224 for($Global::csv->fields()) {
11225 push @out_record, Arg->new($_);
11228 for my $s (split /$opt::colsep/o, $line, -1) {
11229 push @out_record, Arg->new($s);
11233 push @out_record, Arg->new("");
11236 return \@out_record;
11244 ::debug("run", "RecordColQueue-unget '@_'\n");
11245 unshift @{$self->{'unget'}}, @_;
11250 my $empty = (not @{$self->{'unget'}}) &&
11251 $self->{'arg_sub_queue'}->empty();
11252 ::debug("run", "RecordColQueue->empty $empty");
11257 package SQLRecordQueue;
11263 'unget' => \@unget,
11264 }, ref($class) || $class;
11269 # reference to array of Arg-objects
11271 if(@{$self->{'unget'}}) {
11272 return shift @{$self->{'unget'}};
11274 return $Global::sql->get_record();
11279 ::debug("run", "SQLRecordQueue-unget '@_'\n");
11280 unshift @{$self->{'unget'}}, @_;
11285 if(@{$self->{'unget'}}) { return 0; }
11286 my $get = $self->get();
11288 $self->unget($get);
11290 my $empty = not $get;
11291 ::debug("run", "SQLRecordQueue->empty $empty");
11296 package MultifileQueue;
11298 @Global::unget_argv=();
11303 for my $fh (@$fhs) {
11304 if(-t $fh and -t ($Global::status_fd || *STDERR)) {
11305 ::warning("Input is read from the terminal. You are either an expert",
11306 "(in which case: YOU ARE AWESOME!) or maybe you forgot",
11307 "::: or :::: or -a or to pipe data into parallel. If so",
11308 "consider going through the tutorial: man parallel_tutorial",
11309 "Press CTRL-D to exit.");
11313 'unget' => \@Global::unget_argv,
11315 'arg_matrix' => undef,
11316 }, ref($class) || $class;
11322 return $self->link_get();
11324 return $self->nest_get();
11330 ::debug("run", "MultifileQueue-unget '@_'\n");
11331 unshift @{$self->{'unget'}}, @_;
11336 my $empty = (not @Global::unget_argv) &&
11337 not @{$self->{'unget'}};
11338 for my $fh (@{$self->{'fhs'}}) {
11339 $empty &&= eof($fh);
11341 ::debug("run", "MultifileQueue->empty $empty ");
11347 if(@{$self->{'unget'}}) {
11348 return shift @{$self->{'unget'}};
11353 for my $fh (@{$self->{'fhs'}}) {
11354 my $arg = read_arg_from_fh($fh);
11356 # Record $arg for recycling at end of file
11357 push @{$self->{'arg_matrix'}{$fh}}, $arg;
11358 push @record, $arg;
11361 ::debug("run", "EOA ");
11362 # End of file: Recycle arguments
11363 push @{$self->{'arg_matrix'}{$fh}}, shift @{$self->{'arg_matrix'}{$fh}};
11364 # return last @{$args->{'args'}{$fh}};
11365 push @record, @{$self->{'arg_matrix'}{$fh}}[-1];
11377 if(@{$self->{'unget'}}) {
11378 return shift @{$self->{'unget'}};
11383 my $no_of_inputsources = $#{$self->{'fhs'}} + 1;
11384 if(not $self->{'arg_matrix'}) {
11385 # Initialize @arg_matrix with one arg from each file
11386 # read one line from each file
11389 for (my $fhno = 0; $fhno < $no_of_inputsources ; $fhno++) {
11390 my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]);
11394 $self->{'arg_matrix'}[$fhno][0] = $arg || Arg->new("");
11395 push @first_arg_set, $self->{'arg_matrix'}[$fhno][0];
11398 # All filehandles were at eof or eof-string
11401 return [@first_arg_set];
11404 # Treat the case with one input source special. For multiple
11405 # input sources we need to remember all previously read values to
11406 # generate all combinations. But for one input source we can
11407 # forget the value after first use.
11408 if($no_of_inputsources == 1) {
11409 my $arg = read_arg_from_fh($self->{'fhs'}[0]);
11410 if(defined($arg)) {
11415 for (my $fhno = $no_of_inputsources - 1; $fhno >= 0; $fhno--) {
11416 if(eof($self->{'fhs'}[$fhno])) {
11420 my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]);
11421 defined($arg) || next; # If we just read an EOF string: Treat this as EOF
11422 my $len = $#{$self->{'arg_matrix'}[$fhno]} + 1;
11423 $self->{'arg_matrix'}[$fhno][$len] = $arg;
11424 # make all new combinations
11426 for (my $fhn = 0; $fhn < $no_of_inputsources; $fhn++) {
11427 push(@combarg, [0, $#{$self->{'arg_matrix'}[$fhn]}],
11428 # Is input source --link'ed to the next?
11429 $opt::linkinputsource[$fhn+1]);
11431 # Find only combinations with this new entry
11432 $combarg[2*$fhno] = [$len,$len];
11434 # [ 1, 3, 7 ], [ 2, 4, 1 ]
11436 # [ m[0][1], m[1][3], m[2][7] ], [ m[0][2], m[1][4], m[2][1] ]
11438 for my $c (expand_combinations(@combarg)) {
11440 for my $n (0 .. $no_of_inputsources - 1 ) {
11441 push @a, $self->{'arg_matrix'}[$n][$$c[$n]];
11445 # append the mapped to the ungotten arguments
11446 push @{$self->{'unget'}}, @mapped;
11449 return shift @{$self->{'unget'}};
11453 # all are eof or at EOF string; return from the unget queue
11454 return shift @{$self->{'unget'}};
11457 sub read_arg_from_fh($) {
11458 # Read one Arg from filehandle
11460 # Arg-object with one read line
11461 # undef if end of file
11465 my $half_record = 0;
11467 # This makes 10% faster
11468 if(not defined ($arg = <$fh>)) {
11469 if(defined $prepend) {
11470 return Arg->new($prepend);
11476 # We need to read a full CSV line.
11477 if(($arg =~ y/"/"/) % 2 ) {
11478 # The number of " on the line is uneven:
11479 # If we were in a half_record => we have a full record now
11480 # If we were ouside a half_record => we are in a half record now
11481 $half_record = not $half_record;
11484 # CSV half-record with quoting:
11485 # col1,"col2 2""x3"" board newline <-this one
11490 # Now we have a full CSV record
11495 if($Global::end_of_file_string and
11496 $arg eq $Global::end_of_file_string) {
11497 # Ignore the rest of input file
11499 ::debug("run", "EOF-string ($arg) met\n");
11500 if(defined $prepend) {
11501 return Arg->new($prepend);
11506 if(defined $prepend) {
11507 $arg = $prepend.$arg; # For line continuation
11510 if($Global::ignore_empty) {
11511 if($arg =~ /^\s*$/) {
11512 redo; # Try the next line
11515 if($Global::max_lines) {
11516 if($arg =~ /\s$/) {
11517 # Trailing space => continued on next line
11522 }} while (1 == 0); # Dummy loop {{}} for redo
11524 return Arg->new($arg);
11526 ::die_bug("multiread arg undefined");
11530 # Prototype forwarding
11531 sub expand_combinations(@);
11532 sub expand_combinations(@) {
11534 # ([xmin,xmax], [ymin,ymax], ...)
11535 # Returns: ([x,y,...],[x,y,...])
11536 # where xmin <= x <= xmax and ymin <= y <= ymax
11537 my $minmax_ref = shift;
11538 my $link = shift; # This is linked to the next input source
11539 my $xmin = $$minmax_ref[0];
11540 my $xmax = $$minmax_ref[1];
11543 my @rest = expand_combinations(@_);
11545 # Linked to next col with --link/:::+/::::+
11546 # TODO BUG does not wrap values if not same number of vals
11547 push(@p, map { [$$_[0], @$_] }
11548 grep { $xmin <= $$_[0] and $$_[0] <= $xmax } @rest);
11550 # If there are more columns: Compute those recursively
11551 for(my $x = $xmin; $x <= $xmax; $x++) {
11552 push @p, map { [$x, @$_] } @rest;
11556 for(my $x = $xmin; $x <= $xmax; $x++) {
11570 if($opt::hostgroups) {
11571 if($orig =~ s:@(.+)::) {
11572 # We found hostgroups on the arg
11573 @hostgroups = split(/\+/, $1);
11574 if(not grep { defined $Global::hostgroups{$_} } @hostgroups) {
11575 # This hostgroup is not defined using -S
11577 ::warning("Adding hostgroups: @hostgroups");
11579 for(grep { not defined $Global::hostgroups{$_} } @hostgroups) {
11580 my $sshlogin = SSHLogin->new($_);
11581 my $sshlogin_string = $sshlogin->string();
11582 $Global::host{$sshlogin_string} = $sshlogin;
11583 $Global::hostgroups{$sshlogin_string} = 1;
11587 # No hostgroup on the arg => any hostgroup
11588 @hostgroups = (keys %Global::hostgroups);
11593 'hostgroups' => \@hostgroups,
11594 }, ref($class) || $class;
11598 # Q alias for ::shell_quote_scalar
11599 my $ret = ::Q($_[0]);
11600 no warnings 'redefine';
11606 # pQ alias for ::perl_quote_scalar
11607 my $ret = ::pQ($_[0]);
11608 no warnings 'redefine';
11614 return $Global::JobQueue->total_jobs();
11621 # shorthand for $job->skip();
11625 # shorthand for $job->slot();
11629 # shorthand for $job->seq();
11633 sub replace($$$$) {
11634 # Calculates the corresponding value for a given perl expression
11636 # The calculated string (quoted if asked for)
11638 my $perlexpr = shift; # E.g. $_=$_ or s/.gz//
11639 my $quote = (shift) ? 1 : 0; # should the string be quoted?
11640 # This is actually a CommandLine-object,
11641 # but it looks nice to be able to say {= $job->slot() =}
11643 $perlexpr =~ s/^(-?\d+)? *//; # Positional replace treated as normal replace
11644 if(not $self->{'cache'}{$perlexpr}) {
11645 # Only compute the value once
11646 # Use $_ as the variable to change
11648 if($Global::trim eq "n") {
11649 $_ = $self->{'orig'};
11652 $_ = trim_of($self->{'orig'});
11654 ::debug("replace", "eval ", $perlexpr, " ", $_, "\n");
11655 if(not $perleval{$perlexpr}) {
11656 # Make an anonymous function of the $perlexpr
11657 # And more importantly: Compile it only once
11658 if($perleval{$perlexpr} =
11659 eval('sub { no strict; no warnings; my $job = shift; '.
11663 # The eval failed. Maybe $perlexpr is invalid perl?
11664 ::error("Cannot use $perlexpr: $@");
11665 ::wait_and_exit(255);
11668 # Execute the function
11669 $perleval{$perlexpr}->($job);
11670 $self->{'cache'}{$perlexpr} = $_;
11672 # Return the value quoted if needed
11673 return($quote ? Q($self->{'cache'}{$perlexpr})
11674 : $self->{'cache'}{$perlexpr});
11678 sub flush_cache($) {
11679 # Flush cache of computed values
11681 $self->{'cache'} = undef;
11686 return $self->{'orig'};
11691 $self->{'orig'} = shift;
11695 # Removes white space as specifed by --trim:
11701 # string with white space removed as needed
11702 my @strings = map { defined $_ ? $_ : "" } (@_);
11704 if($Global::trim eq "n") {
11706 } elsif($Global::trim eq "l") {
11707 for my $arg (@strings) { $arg =~ s/^\s+//; }
11708 } elsif($Global::trim eq "r") {
11709 for my $arg (@strings) { $arg =~ s/\s+$//; }
11710 } elsif($Global::trim eq "rl" or $Global::trim eq "lr") {
11711 for my $arg (@strings) { $arg =~ s/^\s+//; $arg =~ s/\s+$//; }
11713 ::error("--trim must be one of: r l rl lr.");
11714 ::wait_and_exit(255);
11716 return wantarray ? @strings : "@strings";
11720 package TimeoutQueue;
11724 my $delta_time = shift;
11726 if($delta_time =~ /(\d+(\.\d+)?)%/) {
11727 # Timeout in percent
11729 $delta_time = 1_000_000;
11731 $delta_time = ::multiply_time_units($delta_time);
11735 'delta_time' => $delta_time,
11737 'remedian_idx' => 0,
11738 'remedian_arr' => [],
11739 'remedian' => undef,
11740 }, ref($class) || $class;
11743 sub delta_time($) {
11745 return $self->{'delta_time'};
11748 sub set_delta_time($$) {
11750 $self->{'delta_time'} = shift;
11755 return $self->{'remedian'};
11758 sub set_remedian($$) {
11759 # Set median of the last 999^3 (=997002999) values using Remedian
11761 # Rousseeuw, Peter J., and Gilbert W. Bassett Jr. "The remedian: A
11762 # robust averaging method for large data sets." Journal of the
11763 # American Statistical Association 85.409 (1990): 97-104.
11766 my $i = $self->{'remedian_idx'}++;
11767 my $rref = $self->{'remedian_arr'};
11768 $rref->[0][$i%999] = $val;
11769 $rref->[1][$i/999%999] = (sort @{$rref->[0]})[$#{$rref->[0]}/2];
11770 $rref->[2][$i/999/999%999] = (sort @{$rref->[1]})[$#{$rref->[1]}/2];
11771 $self->{'remedian'} = (sort @{$rref->[2]})[$#{$rref->[2]}/2];
11774 sub update_median_runtime($) {
11775 # Update delta_time based on runtime of finished job if timeout is
11778 my $runtime = shift;
11779 if($self->{'pct'}) {
11780 $self->set_remedian($runtime);
11781 $self->{'delta_time'} = $self->{'pct'} * $self->remedian();
11782 ::debug("run", "Timeout: $self->{'delta_time'}s ");
11786 sub process_timeouts($) {
11787 # Check if there was a timeout
11789 # $self->{'queue'} is sorted by start time
11790 while (@{$self->{'queue'}}) {
11791 my $job = $self->{'queue'}[0];
11792 if($job->endtime()) {
11793 # Job already finished. No need to timeout the job
11794 # This could be because of --keep-order
11795 shift @{$self->{'queue'}};
11796 } elsif($job->is_timedout($self->{'delta_time'})) {
11797 # Need to shift off queue before kill
11798 # because kill calls usleep that calls process_timeouts
11799 shift @{$self->{'queue'}};
11800 ::warning("This job was killed because it timed out:",
11804 # Because they are sorted by start time the rest are later
11813 push @{$self->{'queue'}}, $in;
11822 $Global::use{"DBI"} ||= eval "use DBI; 1;";
11823 # +DBURL = append to this DBURL
11824 my $append = $dburl=~s/^\+//;
11825 my %options = parse_dburl(get_alias($dburl));
11826 my %driveralias = ("sqlite" => "SQLite",
11827 "sqlite3" => "SQLite",
11829 "postgres" => "Pg",
11830 "postgresql" => "Pg",
11832 "oracle" => "Oracle",
11833 "ora" => "Oracle");
11834 my $driver = $driveralias{$options{'databasedriver'}} ||
11835 $options{'databasedriver'};
11836 my $database = $options{'database'};
11837 my $host = $options{'host'} ? ";host=".$options{'host'} : "";
11838 my $port = $options{'port'} ? ";port=".$options{'port'} : "";
11839 my $dsn = "DBI:$driver:dbname=$database$host$port";
11840 my $userid = $options{'user'};
11841 my $password = $options{'password'};;
11842 if(not grep /$driver/, DBI->available_drivers) {
11843 ::error("$driver not supported. Are you missing a perl DBD::$driver module?");
11844 ::wait_and_exit(255);
11846 my $dbh = DBI->connect($dsn, $userid, $password,
11847 { RaiseError => 1, AutoInactiveDestroy => 1 })
11848 or die $DBI::errstr;
11850 $dbh->{'PrintWarn'} = $Global::debug || 0;
11851 $dbh->{'PrintError'} = $Global::debug || 0;
11852 $dbh->{'RaiseError'} = 1;
11853 $dbh->{'ShowErrorStatement'} = 1;
11854 $dbh->{'HandleError'} = sub {};
11856 if(not defined $options{'table'}) {
11857 ::error("The DBURL ($dburl) must contain a table.");
11858 ::wait_and_exit(255);
11863 'driver' => $driver,
11864 'max_number_of_args' => undef,
11865 'table' => $options{'table'},
11866 'append' => $append,
11867 }, ref($class) || $class;
11870 # Prototype forwarding
11874 $alias =~ s/^(sql:)*//; # Accept aliases prepended with sql:
11875 if ($alias !~ /^:/) {
11882 ($path) = readlink($0) =~ m|^(.*)/|;
11884 ($path) = $0 =~ m|^(.*)/|;
11887 my @deprecated = ("$ENV{HOME}/.dburl.aliases",
11888 "$path/dburl.aliases", "$path/dburl.aliases.dist");
11889 for (@deprecated) {
11891 ::warning("$_ is deprecated. ".
11892 "Use .sql/aliases instead (read man sql).");
11896 check_permissions("$ENV{HOME}/.sql/aliases");
11897 check_permissions("$ENV{HOME}/.dburl.aliases");
11898 my @search = ("$ENV{HOME}/.sql/aliases",
11899 "$ENV{HOME}/.dburl.aliases", "/etc/sql/aliases",
11900 "$path/dburl.aliases", "$path/dburl.aliases.dist");
11901 for my $alias_file (@search) {
11902 # local $/ needed if -0 set
11904 if(-r $alias_file) {
11905 open(my $in, "<", $alias_file) || die;
11906 push @urlalias, <$in>;
11910 my ($alias_part,$rest) = $alias=~/(:\w*)(.*)/;
11911 # If we saw this before: we have an alias loop
11912 if(grep {$_ eq $alias_part } @Private::seen_aliases) {
11913 ::error("$alias_part is a cyclic alias.");
11916 push @Private::seen_aliases, $alias_part;
11921 /^$alias_part\s+(\S+.*)/ and do { $dburl = $1; last; }
11925 return get_alias($dburl.$rest);
11927 ::error("$alias is not defined in @search");
11932 sub check_permissions($) {
11937 my $username = (getpwuid($<))[0];
11938 ::warning("$file should be owned by $username: ".
11939 "chown $username $file");
11941 my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
11942 $atime,$mtime,$ctime,$blksize,$blocks) = stat($file);
11944 my $username = (getpwuid($<))[0];
11945 ::warning("$file should be only be readable by $username: ".
11946 "chmod 600 $file");
11951 sub parse_dburl($) {
11954 # sql:mysql://[[user][:password]@][host][:port]/[database[/table][?query]]
11956 if($url=~m!^(?:sql:)? # You can prefix with 'sql:'
11957 ((?:oracle|ora|mysql|pg|postgres|postgresql)(?:s|ssl|)|
11958 (?:sqlite|sqlite2|sqlite3|csv)):// # Databasedriver ($1)
11960 ([^:@/][^:@]*|) # Username ($2)
11962 :([^@]*) # Password ($3)
11965 ([^:/]*)? # Hostname ($4)
11968 ([^/]*)? # Port ($5)
11972 ([^/?]*)? # Database ($6)
11976 ([^?]*)? # Table ($7)
11983 $options{databasedriver} = ::undef_if_empty(lc(uri_unescape($1)));
11984 $options{user} = ::undef_if_empty(uri_unescape($2));
11985 $options{password} = ::undef_if_empty(uri_unescape($3));
11986 $options{host} = ::undef_if_empty(uri_unescape($4));
11987 $options{port} = ::undef_if_empty(uri_unescape($5));
11988 $options{database} = ::undef_if_empty(uri_unescape($6));
11989 $options{table} = ::undef_if_empty(uri_unescape($7));
11990 $options{query} = ::undef_if_empty(uri_unescape($8));
11991 ::debug("sql", "dburl $url\n");
11992 ::debug("sql", "databasedriver ", $options{databasedriver},
11993 " user ", $options{user},
11994 " password ", $options{password}, " host ", $options{host},
11995 " port ", $options{port}, " database ", $options{database},
11996 " table ", $options{table}, " query ", $options{query}, "\n");
11998 ::error("$url is not a valid DBURL");
12004 sub uri_unescape($) {
12005 # Copied from http://cpansearch.perl.org/src/GAAS/URI-1.55/URI/Escape.pm
12006 # to avoid depending on URI::Escape
12007 # This section is (C) Gisle Aas.
12008 # Note from RFC1630: "Sequences which start with a percent sign
12009 # but are not followed by two hexadecimal characters are reserved
12010 # for future extension"
12012 if (@_ && wantarray) {
12013 # not executed for the common case of a single argument
12014 my @str = ($str, @_); # need to copy
12016 s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
12020 $str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str;
12027 if($self->{'driver'} eq "CSV") {
12029 if($stmt eq "BEGIN" or
12030 $stmt eq "COMMIT") {
12035 my $dbh = $self->{'dbh'};
12036 ::debug("sql","$opt::sqlmaster$opt::sqlworker run $stmt\n");
12037 # Execute with the rest of the args - if any
12041 while($lockretry < 10) {
12042 $sth = $dbh->prepare($stmt);
12045 eval { $rv = $sth->execute(@_) }) {
12048 if($@ =~ /no such table|Table .* doesn.t exist|relation ".*" does not exist/
12050 $DBI::errstr =~ /no such table|Table .* doesn.t exist|relation ".*" does not exist/) {
12052 # It is just a worker that reported back too late -
12053 # another worker had finished the job first
12054 # and the table was then dropped
12058 if($DBI::errstr =~ /locked/) {
12059 ::debug("sql", "Lock retry: $lockretry");
12061 ::usleep(rand()*300);
12062 } elsif(not $sth) {
12066 ::error($DBI::errstr);
12067 ::wait_and_exit(255);
12071 if($lockretry >= 10) {
12072 ::die_bug("retry > 10: $DBI::errstr");
12074 if($rv < 0 and $DBI::errstr){
12075 ::error($DBI::errstr);
12076 ::wait_and_exit(255);
12083 my $sth = $self->run(@_);
12085 # If $sth = 0 it means the table was dropped by another process
12087 my @row = $sth->fetchrow_array();
12089 push @retval, \@row;
12096 return $self->{'table'};
12101 return $self->{'append'};
12107 my $table = $self->table();
12108 $self->run("UPDATE $table $stmt",@_);
12113 my $commandline = shift;
12115 $self->update("SET Stdout = ?, Stderr = ? WHERE Seq = ".
12116 $commandline->seq(),
12117 join("",@{$commandline->{'output'}{1}}),
12118 join("",@{$commandline->{'output'}{2}}));
12121 sub max_number_of_args($) {
12122 # Maximal number of args for this table
12124 if(not $self->{'max_number_of_args'}) {
12125 # Read the number of args from the SQL table
12126 my $table = $self->table();
12127 my $v = $self->get("SELECT * FROM $table LIMIT 1;");
12128 my @reserved_columns = qw(Seq Host Starttime JobRuntime Send
12129 Receive Exitval _Signal Command Stdout Stderr);
12131 ::error
("$table contains no records");
12133 # Count the number of Vx columns
12134 $self->{'max_number_of_args'} = $#{$v->[0]} - $#reserved_columns;
12136 return $self->{'max_number_of_args'};
12139 sub set_max_number_of_args
($$) {
12141 $self->{'max_number_of_args'} = shift;
12144 sub create_table
($) {
12146 if($self->append()) { return; }
12147 my $max_number_of_args = shift;
12148 $self->set_max_number_of_args($max_number_of_args);
12149 my $table = $self->table();
12150 $self->run(qq(DROP TABLE IF EXISTS
$table;));
12151 # BIGINT and TEXT are not supported in these databases or are too small
12153 "Oracle" => { "BIGINT" => "NUMBER(19,0)",
12154 "TEXT" => "CLOB", },
12155 "mysql" => { "TEXT" => "LONGTEXT", },
12156 "CSV" => { "BIGINT" => "INT",
12157 "FLOAT" => "REAL", },
12159 my $BIGINT = $vartype{$self->{'driver'}}{"BIGINT"} || "BIGINT";
12160 my $TEXT = $vartype{$self->{'driver'}}{"TEXT"} || "TEXT";
12161 my $FLOAT = $vartype{$self->{'driver'}}{"FLOAT"} || "FLOAT(44)";
12162 my $v_def = join "", map { "V$_ $TEXT," } (1..$self->max_number_of_args());
12163 $self->run(qq{CREATE TABLE
$table
12178 sub insert_records
($) {
12181 my $command_ref = shift;
12182 my $record_ref = shift;
12183 my $table = $self->table();
12184 # For SQL encode the command with \257 space as split points
12185 my $command = join("\257 ",@
$command_ref);
12186 my @v_cols = map { ", V$_" } (1..$self->max_number_of_args());
12187 # Two extra value due to $seq, Exitval, Send
12188 my $v_vals = join ",", map { "?" } (1..$self->max_number_of_args()+4);
12189 $self->run("INSERT INTO $table (Seq,Command,Exitval,Send @v_cols) ".
12190 "VALUES ($v_vals);", $seq, $command, -1000,
12191 0, @
$record_ref[1..$#$record_ref]);
12194 sub get_record
($) {
12197 my $table = $self->table();
12198 my @v_cols = map { ", V$_" } (1..$self->max_number_of_args());
12199 my $v = $self->get("SELECT Seq, Command @v_cols FROM $table ".
12200 "WHERE Exitval = -1000 ORDER BY Seq LIMIT 1;");
12202 my $val_ref = $v->[0];
12203 # Mark record as taken
12204 my $seq = shift @
$val_ref;
12205 # Save the sequence number to use when running the job
12206 $SQL::next_seq
= $seq;
12207 $self->update("SET Exitval = ? WHERE Seq = ".$seq, -1220);
12208 my @command = split /\257 /, shift @
$val_ref;
12209 $SQL::command_ref
= \
@command;
12211 push @retval, Arg
->new($_);
12221 sub total_jobs
($) {
12223 my $table = $self->table();
12224 my $v = $self->get("SELECT count(*) FROM $table;");
12226 return $v->[0]->[0];
12228 ::die_bug
("SQL::total_jobs");
12234 my $table = $self->table();
12235 my $v = $self->get("SELECT max(Seq) FROM $table;");
12237 return $v->[0]->[0];
12239 ::die_bug
("SQL::max_seq");
12244 # Check if there are any jobs left in the SQL table that do not
12245 # have a "real" exitval
12247 if($opt::wait or $Global::start_sqlworker
) {
12248 my $table = $self->table();
12249 my $rv = $self->get("select Seq,Exitval from $table ".
12250 "where Exitval <= -1000 limit 1");
12251 return not $rv->[0];
12259 # This package provides a counting semaphore
12261 # If a process dies without releasing the semaphore the next process
12262 # that needs that entry will clean up dead semaphores
12264 # The semaphores are stored in $PARALLEL_HOME/semaphores/id-<name> Each
12265 # file in $PARALLEL_HOME/semaphores/id-<name>/ is the process ID of the
12266 # process holding the entry. If the process dies, the entry can be
12267 # taken by another process.
12273 $id =~ s/([^-_a-z0-9])/unpack("H*",$1)/ige; # Convert non-word chars to hex
12274 $id = "id-".$id; # To distinguish it from a process id
12275 my $parallel_locks = $Global::cache_dir
. "/semaphores";
12276 -d
$parallel_locks or ::mkdir_or_die
($parallel_locks);
12277 my $lockdir = "$parallel_locks/$id";
12279 my $lockfile = $lockdir.".lock";
12280 if($count < 1) { ::die_bug
("semaphore-count: $count"); }
12282 'lockfile' => $lockfile,
12283 'lockfh' => Symbol
::gensym
(),
12284 'lockdir' => $lockdir,
12286 'idfile' => $lockdir."/".$id,
12288 'pidfile' => $lockdir."/".$$.'@'.::hostname
(),
12289 'count' => $count + 1 # nlinks returns a link for the 'id-' as well
12290 }, ref($class) || $class;
12293 sub remove_dead_locks
($) {
12295 my $lockdir = $self->{'lockdir'};
12297 for my $d (glob "$lockdir/*") {
12298 $d =~ m
:$lockdir/([0-9]+)\@
([-\
._a
-z0
-9]+)$:o
or next;
12299 my ($pid, $host) = ($1, $2);
12300 if($host eq ::hostname
()) {
12302 ::debug
("sem", "Alive: $pid $d\n");
12304 ::debug
("sem", "Dead: $d\n");
12313 my $sleep = 1; # 1 ms
12314 my $start_time = time;
12316 # Can we get a lock?
12317 $self->atomic_link_if_count_less_than() and last;
12318 $self->remove_dead_locks();
12319 # Retry slower and slower up to 1 second
12320 $sleep = ($sleep < 1000) ?
($sleep * 1.1) : ($sleep);
12321 # Random to avoid every sleeping job waking up at the same time
12322 ::usleep
(rand()*$sleep);
12323 if($opt::semaphoretimeout
) {
12324 if($opt::semaphoretimeout
> 0
12326 time - $start_time > $opt::semaphoretimeout
) {
12327 # Timeout: Take the semaphore anyway
12328 ::warning
("Semaphore timed out. Stealing the semaphore.");
12329 if(not -e
$self->{'idfile'}) {
12330 open (my $fh, ">", $self->{'idfile'}) or
12331 ::die_bug
("timeout_write_idfile: $self->{'idfile'}");
12334 link $self->{'idfile'}, $self->{'pidfile'};
12337 if($opt::semaphoretimeout
< 0
12339 time - $start_time > -$opt::semaphoretimeout
) {
12341 ::warning
("Semaphore timed out. Exiting.");
12347 ::debug
("sem", "acquired $self->{'pid'}\n");
12352 ::rm
($self->{'pidfile'});
12353 if($self->nlinks() == 1) {
12354 # This is the last link, so atomic cleanup
12356 if($self->nlinks() == 1) {
12357 ::rm
($self->{'idfile'});
12358 rmdir $self->{'lockdir'};
12362 ::debug
("run", "released $self->{'pid'}\n");
12365 sub pid_change
($) {
12366 # This should do what release()+acquire() would do without having
12367 # to re-acquire the semaphore
12370 my $old_pidfile = $self->{'pidfile'};
12371 $self->{'pid'} = $$;
12372 $self->{'pidfile'} = $self->{'lockdir'}."/".$$.'@'.::hostname
();
12373 my $retval = link $self->{'idfile'}, $self->{'pidfile'};
12374 ::debug
("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n");
12375 ::rm
($old_pidfile);
12378 sub atomic_link_if_count_less_than
($) {
12379 # Link $file1 to $file2 if nlinks to $file1 < $count
12383 my $nlinks = $self->nlinks();
12384 ::debug
("sem","$nlinks<$self->{'count'} ");
12385 if($nlinks < $self->{'count'}) {
12386 -d
$self->{'lockdir'} or ::mkdir_or_die
($self->{'lockdir'});
12387 if(not -e
$self->{'idfile'}) {
12388 open (my $fh, ">", $self->{'idfile'}) or
12389 ::die_bug
("write_idfile: $self->{'idfile'}");
12392 $retval = link $self->{'idfile'}, $self->{'pidfile'};
12393 ::debug
("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n");
12396 ::debug
("sem", "atomic $retval");
12402 if(-e
$self->{'idfile'}) {
12403 return (stat(_
))[3];
12411 my $sleep = 100; # 100 ms
12412 my $total_sleep = 0;
12413 $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
12415 while(not $locked) {
12416 if(tell($self->{'lockfh'}) == -1) {
12418 open($self->{'lockfh'}, ">", $self->{'lockfile'})
12419 or ::debug("run
", "Cannot
open $self->{'lockfile'}");
12421 if($self->{'lockfh'}) {
12423 chmod 0666, $self->{'lockfile'}; # assuming you want it a+rw
12424 if(flock($self->{'lockfh'}, LOCK_EX()|LOCK_NB())) {
12425 # The file is locked: No need to retry
12429 if ($! =~ m/Function not implemented/) {
12430 ::warning("flock: $!",
12431 "Will
wait for a random
while.");
12432 ::usleep(rand(5000));
12433 # File cannot be locked: No need to retry
12439 # Locking failed in first round
12440 # Sleep and try again
12441 $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep);
12442 # Random to avoid every sleeping job waking up at the same time
12443 ::usleep(rand()*$sleep);
12444 $total_sleep += $sleep;
12445 if($opt::semaphoretimeout) {
12446 if($opt::semaphoretimeout > 0
12448 $total_sleep/1000 > $opt::semaphoretimeout) {
12449 # Timeout: Take the semaphore anyway
12450 ::warning("Semaphore timed out
. Taking the semaphore
.");
12454 if($opt::semaphoretimeout < 0
12456 $total_sleep/1000 > -$opt::semaphoretimeout) {
12458 ::warning("Semaphore timed out
. Exiting
.");
12463 if($total_sleep/1000 > 30) {
12464 ::warning("Semaphore stuck
for 30 seconds
. ".
12465 "Consider using
--semaphoretimeout
.");
12469 ::debug("run
", "locked
$self->{'lockfile'}");
12474 ::rm($self->{'lockfile'});
12475 close $self->{'lockfh'};
12476 ::debug("run
", "unlocked
\n");
12479 # Keep perl -w happy
12481 $opt::x = $Semaphore::timeout = $Semaphore::wait =
12482 $Job::file_descriptor_warning_printed = $Global::envdef = @Arg::arg =
12483 $Global::max_slot_number = $opt::session;
12488 save_stdin_stdout_stderr();
12489 save_original_signal_handler();
12491 ::debug("init
", "Open file descriptors
: ", join(" ",keys %Global::fd), "\n");
12492 my $number_of_args;
12493 if($Global::max_number_of_args) {
12494 $number_of_args = $Global::max_number_of_args;
12495 } elsif ($opt::X or $opt::m or $opt::xargs) {
12496 $number_of_args = undef;
12498 $number_of_args = 1;
12501 my @command = @ARGV;
12502 my @input_source_fh;
12503 if($opt::pipepart) {
12505 @input_source_fh = map { open_or_exit($_) } @opt::a;
12506 # Remove the first: It will be the file piped.
12507 shift @input_source_fh;
12508 if(not @input_source_fh and not $opt::pipe) {
12509 @input_source_fh = (*STDIN);
12512 # -a is used for data - not for command line args
12513 @input_source_fh = map { open_or_exit($_) } "/dev/null
";
12516 @input_source_fh = map { open_or_exit($_) } @opt::a;
12517 if(not @input_source_fh and not $opt::pipe) {
12518 @input_source_fh = (*STDIN);
12521 if($opt::sqlmaster) {
12522 # Create SQL table to hold joblog + output
12523 $Global::sql->create_table($#input_source_fh+1);
12524 if($opt::sqlworker) {
12525 # Start a real --sqlworker in the background later
12526 $Global::start_sqlworker = 1;
12527 $opt::sqlworker = undef;
12531 if($opt::skip_first_line) {
12532 # Skip the first line for the first file handle
12533 my $fh = $input_source_fh[0];
12537 set_input_source_header(\@command,\@input_source_fh);
12539 if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) {
12540 # Parallel check all hosts are up. Remove hosts that are down
12544 if($opt::nonall or $opt::onall) {
12545 onall(\@input_source_fh,@command);
12546 wait_and_exit(min(undef_as_zero($Global::exitstatus),254));
12549 $Global::JobQueue = JobQueue->new(
12550 \@command,\@input_source_fh,$Global::ContextReplace,
12551 $number_of_args,\@Global::transfer_files,\@Global::ret_files);
12553 if($opt::pipepart) {
12555 } elsif($opt::pipe and $opt::tee) {
12557 } elsif($opt::pipe and $opt::shard) {
12558 pipe_shard_setup();
12561 if($opt::groupby) {
12562 group_by_stdin_filter();
12564 if($opt::eta or $opt::bar or $opt::shuf or $Global::halt_pct) {
12565 # Count the number of jobs or shuffle all jobs
12566 # before starting any.
12567 # Must be done after ungetting any --pipepart jobs.
12568 $Global::JobQueue->total_jobs();
12570 # Compute $Global::max_jobs_running
12571 # Must be done after ungetting any --pipepart jobs.
12572 max_jobs_running();
12576 if($Global::semaphore) {
12577 $sem = acquire_semaphore();
12579 $SIG{TERM} = $Global::original_sig{TERM};
12580 $SIG{HUP} = \&start_no_new_jobs;
12582 if($opt::tee or $opt::shard) {
12583 # All jobs must be running in parallel for --tee/--shard
12584 while(start_more_jobs()) {}
12585 $Global::start_no_new_jobs = 1;
12586 if(not $Global::JobQueue->empty()) {
12587 ::error("--tee requres
--jobs to be higher
. Try
--jobs
0.");
12588 ::wait_and_exit(255);
12590 } elsif($opt::pipe and not $opt::pipepart) {
12591 # Fill all jobslots
12592 while(start_more_jobs()) {}
12595 # Reap one - start one
12596 while(reaper() + start_more_jobs()) {}
12598 ::debug("init
", "Start draining
\n");
12599 drain_job_queue(@command);
12600 ::debug("init
", "Done draining
\n");
12602 ::debug("init
", "Done reaping
\n");
12603 if($Global::semaphore) {
12607 ::debug("init
", "Halt
\n");