parallel: --group-by initial version. Documentation missing.
[parallel.git] / src / parallel
bloba51da9114eb138a9d686a4b0e52c7759f2702a9d
1 #!/usr/bin/env perl
3 # Copyright (C) 2007-2019 Ole Tange and Free Software Foundation, Inc.
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, see <http://www.gnu.org/licenses/>
17 # or write to the Free Software Foundation, Inc., 51 Franklin St,
18 # Fifth Floor, Boston, MA 02110-1301 USA
20 # open3 used in Job::start
21 use IPC::Open3;
22 # &WNOHANG used in reaper
23 use POSIX qw(:sys_wait_h setsid ceil :errno_h);
24 # gensym used in Job::start
25 use Symbol qw(gensym);
26 # tempfile used in Job::start
27 use File::Temp qw(tempfile tempdir);
28 # mkpath used in openresultsfile
29 use File::Path;
30 # GetOptions used in get_options_from_array
31 use Getopt::Long;
32 # Used to ensure code quality
33 use strict;
34 use File::Basename;
36 sub set_input_source_header($$) {
37 my ($command_ref,$input_source_fh_ref) = @_;
38 if($opt::header and not $opt::pipe) {
39 # split with colsep or \t
40 # $header force $colsep = \t if undef?
41 my $delimiter = defined $opt::colsep ? $opt::colsep : "\t";
42 # regexp for {=
43 my $left = "\Q$Global::parensleft\E";
44 my $l = $Global::parensleft;
45 # regexp for =}
46 my $right = "\Q$Global::parensright\E";
47 my $r = $Global::parensright;
48 my $id = 1;
49 for my $fh (@$input_source_fh_ref) {
50 my $line = <$fh>;
51 chomp($line);
52 ::debug("init", "Delimiter: '$delimiter'");
53 for my $s (split /$delimiter/o, $line) {
54 ::debug("init", "Colname: '$s'");
55 # Replace {colname} with {2}
56 for(@$command_ref, @Global::ret_files,
57 @Global::transfer_files, $opt::tagstring,
58 $opt::workdir, $opt::results, $opt::retries) {
59 # Skip if undefined
60 $_ or next;
61 s:\{$s(|/|//|\.|/\.)\}:\{$id$1\}:g;
62 # {=header1 ... =} => {=1 ... =}
63 s:$left $s (.*?) $right:$l$id$1$r:gx;
65 $Global::input_source_header{$id} = $s;
66 $id++;
69 } else {
70 my $id = 1;
71 for my $fh (@$input_source_fh_ref) {
72 $Global::input_source_header{$id} = $id;
73 $id++;
78 sub max_jobs_running() {
79 # Compute $Global::max_jobs_running as the max number of jobs
80 # running on each sshlogin.
81 # Returns:
82 # $Global::max_jobs_running
83 if(not $Global::max_jobs_running) {
84 for my $sshlogin (values %Global::host) {
85 $sshlogin->max_jobs_running();
88 if(not $Global::max_jobs_running) {
89 ::error("Cannot run any jobs.");
90 wait_and_exit(255);
92 return $Global::max_jobs_running;
95 sub halt() {
96 # Compute exit value,
97 # wait for children to complete
98 # and exit
99 if($opt::halt and $Global::halt_when ne "never") {
100 if(not defined $Global::halt_exitstatus) {
101 if($Global::halt_pct) {
102 $Global::halt_exitstatus =
103 ::ceil($Global::total_failed /
104 ($Global::total_started || 1) * 100);
105 } elsif($Global::halt_count) {
106 $Global::halt_exitstatus =
107 ::min(undef_as_zero($Global::total_failed),101);
110 wait_and_exit($Global::halt_exitstatus);
111 } else {
112 wait_and_exit(min(undef_as_zero($Global::exitstatus),101));
117 sub __PIPE_MODE__() {}
120 sub pipepart_setup() {
121 # Compute the blocksize
122 # Generate the commands to extract the blocks
123 # Push the commands on queue
124 # Changes:
125 # @Global::cat_prepends
126 # $Global::JobQueue
127 if($opt::tee) {
128 # Prepend each command with
129 # < file
130 my $cat_string = "< ".Q($opt::a[0]);
131 for(1..$Global::JobQueue->total_jobs()) {
132 push @Global::cat_appends, $cat_string;
133 push @Global::cat_prepends, "";
135 } else {
136 if(not $opt::blocksize) {
137 # --blocksize with 10 jobs per jobslot
138 $opt::blocksize = -10;
140 if($opt::roundrobin) {
141 # --blocksize with 1 job per jobslot
142 $opt::blocksize = -1;
144 if($opt::blocksize < 0) {
145 my $size = 0;
146 # Compute size of -a
147 for(@opt::a) {
148 if(-f $_) {
149 $size += -s $_;
150 } elsif(-b $_) {
151 $size += size_of_block_dev($_);
152 } else {
153 ::error("$_ is neither a file nor a block device");
154 wait_and_exit(255);
157 # Run in total $job_slots*(- $blocksize) jobs
158 # Set --blocksize = size / no of proc / (- $blocksize)
159 $Global::dummy_jobs = 1;
160 $Global::blocksize = 1 +
161 int($size / max_jobs_running() / -$opt::blocksize);
163 @Global::cat_prepends = map { pipe_part_files($_) } @opt::a;
164 # Unget the empty arg as many times as there are parts
165 $Global::JobQueue->{'commandlinequeue'}{'arg_queue'}->unget(
166 map { [Arg->new("\0noarg")] } @Global::cat_prepends
171 sub pipe_tee_setup() {
172 # Create temporary fifos
173 # Run 'tee fifo1 fifo2 fifo3 ... fifoN' in the background
174 # This will spread the input to fifos
175 # Generate commands that reads from fifo1..N:
176 # cat fifo | user_command
177 # Changes:
178 # @Global::cat_prepends
179 my @fifos;
180 for(1..$Global::JobQueue->total_jobs()) {
181 push @fifos, tmpfifo();
183 # cat foo | tee fifo1 fifo2 fifo3 fifo4 fifo5 > /dev/null
184 if(not fork()){
185 # Let tee inherit our stdin
186 # and redirect stdout to null
187 open STDOUT, ">","/dev/null";
188 exec "tee",@fifos;
190 # For each fifo
191 # (rm fifo1; grep 1) < fifo1
192 # (rm fifo2; grep 2) < fifo2
193 # (rm fifo3; grep 3) < fifo3
194 # Remove the tmpfifo as soon as it is open
195 @Global::cat_prepends = map { "(rm $_;" } @fifos;
196 @Global::cat_appends = map { ") < $_" } @fifos;
200 sub parcat_script() {
201 # TODO if script fails: Use parallel -j0 --plain --lb cat ::: fifos
202 my $script = q'{
203 use POSIX qw(:errno_h);
204 use IO::Select;
205 use strict;
206 use threads;
207 use Thread::Queue;
208 use Fcntl qw(:DEFAULT :flock);
210 my $opened :shared;
211 my $q = Thread::Queue->new();
212 my $okq = Thread::Queue->new();
213 my @producers;
215 if(not @ARGV) {
216 if(-t *STDIN) {
217 print "Usage:\n";
218 print " parcat file(s)\n";
219 print " cat argfile | parcat\n";
220 } else {
221 # Read arguments from stdin
222 chomp(@ARGV = <STDIN>);
225 my $files_to_open = 0;
226 # Default: fd = stdout
227 my $fd = 1;
228 for (@ARGV) {
229 # --rm = remove file when opened
230 /^--rm$/ and do { $opt::rm = 1; next; };
231 # -1 = output to fd 1, -2 = output to fd 2
232 /^-(\d+)$/ and do { $fd = $1; next; };
233 push @producers, threads->create("producer", $_, $fd);
234 $files_to_open++;
237 sub producer {
238 # Open a file/fifo, set non blocking, enqueue fileno of the file handle
239 my $file = shift;
240 my $output_fd = shift;
241 open(my $fh, "<", $file) || do {
242 print STDERR "parcat: Cannot open $file\n";
243 exit(1);
245 # Remove file when it has been opened
246 if($opt::rm) {
247 unlink $file;
249 set_fh_non_blocking($fh);
250 $opened++;
251 # Pass the fileno to parent
252 $q->enqueue(fileno($fh),$output_fd);
253 # Get an OK that the $fh is opened and we can release the $fh
254 while(1) {
255 my $ok = $okq->dequeue();
256 if($ok == fileno($fh)) { last; }
257 # Not ours - very unlikely to happen
258 $okq->enqueue($ok);
260 return;
263 my $s = IO::Select->new();
264 my %buffer;
266 sub add_file {
267 my $infd = shift;
268 my $outfd = shift;
269 open(my $infh, "<&=", $infd) || die;
270 open(my $outfh, ">&=", $outfd) || die;
271 $s->add($infh);
272 # Tell the producer now opened here and can be released
273 $okq->enqueue($infd);
274 # Initialize the buffer
275 @{$buffer{$infh}{$outfd}} = ();
276 $Global::fh{$outfd} = $outfh;
279 sub add_files {
280 # Non-blocking dequeue
281 my ($infd,$outfd);
282 do {
283 ($infd,$outfd) = $q->dequeue_nb(2);
284 if(defined($outfd)) {
285 add_file($infd,$outfd);
287 } while(defined($outfd));
290 sub add_files_block {
291 # Blocking dequeue
292 my ($infd,$outfd) = $q->dequeue(2);
293 add_file($infd,$outfd);
297 my $fd;
298 my (@ready,$infh,$rv,$buf);
299 do {
300 # Wait until at least one file is opened
301 add_files_block();
302 while($q->pending or keys %buffer) {
303 add_files();
304 while(keys %buffer) {
305 @ready = $s->can_read(0.01);
306 if(not @ready) {
307 add_files();
309 for $infh (@ready) {
310 # There is only one key, namely the output file descriptor
311 for my $outfd (keys %{$buffer{$infh}}) {
312 $rv = sysread($infh, $buf, 65536);
313 if (!$rv) {
314 if($! == EAGAIN) {
315 # Would block: Nothing read
316 next;
317 } else {
318 # Nothing read, but would not block:
319 # This file is done
320 $s->remove($infh);
321 for(@{$buffer{$infh}{$outfd}}) {
322 syswrite($Global::fh{$outfd},$_);
324 delete $buffer{$infh};
325 # Closing the $infh causes it to block
326 # close $infh;
327 add_files();
328 next;
331 # Something read.
332 # Find \n or \r for full line
333 my $i = (rindex($buf,"\n")+1);
334 if($i) {
335 # Print full line
336 for(@{$buffer{$infh}{$outfd}}, substr($buf,0,$i)) {
337 syswrite($Global::fh{$outfd},$_);
339 # @buffer = remaining half line
340 $buffer{$infh}{$outfd} = [substr($buf,$i,$rv-$i)];
341 } else {
342 # Something read, but not a full line
343 push @{$buffer{$infh}{$outfd}}, $buf;
345 redo;
350 } while($opened < $files_to_open);
352 for (@producers) {
353 $_->join();
356 sub set_fh_non_blocking {
357 # Set filehandle as non-blocking
358 # Inputs:
359 # $fh = filehandle to be blocking
360 # Returns:
361 # N/A
362 my $fh = shift;
363 my $flags;
364 fcntl($fh, &F_GETFL, $flags) || die $!; # Get the current flags on the filehandle
365 $flags |= &O_NONBLOCK; # Add non-blocking to the flags
366 fcntl($fh, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle
369 return ::spacefree(3, $script);
372 sub sharder_script() {
373 my $script = q{
374 use B;
375 # Column separator
376 my $sep = shift;
377 # Which columns to shard on (count from 1)
378 my $col = shift;
379 # Which columns to shard on (count from 0)
380 my $col0 = $col - 1;
381 my $bins = @ARGV;
382 # Open fifos for writing, fh{0..$bins}
383 my $t = 0;
384 my %fh;
385 for(@ARGV) {
386 open $fh{$t++}, ">", $_;
387 # open blocks until it is opened by reader
388 # so unlink only happens when it is ready
389 unlink $_;
391 while(<STDIN>) {
392 # Split into $col columns (no need to split into more)
393 @F = split $sep, $_, $col+1;
394 $fh = $fh{ hex(B::hash($F[$col0]))%$bins };
395 print $fh $_;
397 # Close all open fifos
398 close values %fh;
400 return ::spacefree(1, $script);
403 sub pipe_shard_setup() {
404 # Create temporary fifos
405 # Run 'shard.pl sep col fifo1 fifo2 fifo3 ... fifoN' in the background
406 # This will spread the input to fifos
407 # Generate commands that reads from fifo1..N:
408 # cat fifo | user_command
409 # Changes:
410 # @Global::cat_prepends
411 my @shardfifos;
412 my @parcatfifos;
413 # TODO $opt::jobs should be evaluated (100%)
414 # TODO $opt::jobs should be number of total_jobs if there are argugemts
415 my $njobs = $opt::jobs;
416 for my $m (0..$njobs-1) {
417 for my $n (0..$njobs-1) {
418 # sharding to A B C D
419 # parcatting all As together
420 $parcatfifos[$n][$m] = $shardfifos[$m][$n] = tmpfifo();
423 my $script = sharder_script();
424 # cat foo | sharder sep col fifo1 fifo2 fifo3 ... fifoN
426 if(not fork()) {
427 # Let the sharder inherit our stdin
428 # and redirect stdout to null
429 open STDOUT, ">","/dev/null";
430 # The PERL_HASH_SEED must be the same for all sharders
431 # so B::hash will return the same value for any given input
432 $ENV{'PERL_HASH_SEED'} = $$;
433 exec qw(parallel --block 100k -q --pipe -j), $njobs,
434 qw(--roundrobin -u perl -e), $script, ($opt::colsep || ","),
435 $opt::shard, '{}', (map { (':::+', @{$_}) } @parcatfifos);
437 # For each fifo
438 # (rm fifo1; grep 1) < fifo1
439 # (rm fifo2; grep 2) < fifo2
440 # (rm fifo3; grep 3) < fifo3
441 my $parcat = Q(parcat_script());
442 if(not $parcat) {
443 ::error("'parcat' must be in path.");
444 ::wait_and_exit(255);
446 @Global::cat_prepends = map { "perl -e $parcat @$_ | " } @parcatfifos;
449 sub pipe_part_files(@) {
450 # Given the bigfile
451 # find header and split positions
452 # make commands that 'cat's the partial file
453 # Input:
454 # $file = the file to read
455 # Returns:
456 # @commands that will cat_partial each part
457 my ($file) = @_;
458 my $buf = "";
459 if(not -f $file and not -b $file) {
460 ::error("$file is not a seekable file.");
461 ::wait_and_exit(255);
463 my $header = find_header(\$buf,open_or_exit($file));
464 # find positions
465 my @pos = find_split_positions($file,$Global::blocksize,length $header);
466 # Make @cat_prepends
467 my @cat_prepends = ();
468 for(my $i=0; $i<$#pos; $i++) {
469 push(@cat_prepends,
470 cat_partial($file, 0, length($header), $pos[$i], $pos[$i+1]));
472 return @cat_prepends;
475 sub find_header($$) {
476 # Compute the header based on $opt::header
477 # Input:
478 # $buf_ref = reference to read-in buffer
479 # $fh = filehandle to read from
480 # Uses:
481 # $opt::header
482 # $Global::blocksize
483 # $Global::header
484 # Returns:
485 # $header string
486 my ($buf_ref, $fh) = @_;
487 my $header = "";
488 # $Global::header may be set in group_by_loop()
489 if($Global::header) { return $Global::header }
490 if($opt::header) {
491 if($opt::header eq ":") { $opt::header = "(.*\n)"; }
492 # Number = number of lines
493 $opt::header =~ s/^(\d+)$/"(.*\n)"x$1/e;
494 while(read($fh,substr($$buf_ref,length $$buf_ref,0),
495 $Global::blocksize)) {
496 if($$buf_ref =~ s/^($opt::header)//) {
497 $header = $1;
498 last;
502 return $header;
505 sub find_split_positions($$$) {
506 # Find positions in bigfile where recend is followed by recstart
507 # Input:
508 # $file = the file to read
509 # $block = (minimal) --block-size of each chunk
510 # $headerlen = length of header to be skipped
511 # Uses:
512 # $opt::recstart
513 # $opt::recend
514 # Returns:
515 # @positions of block start/end
516 my($file, $block, $headerlen) = @_;
517 my $size = -s $file;
518 if(-b $file) {
519 # $file is a blockdevice
520 $size = size_of_block_dev($file);
522 $block = int $block;
523 # The optimal dd blocksize for mint, redhat, solaris, openbsd = 2^17..2^20
524 # The optimal dd blocksize for freebsd = 2^15..2^17
525 my $dd_block_size = 131072; # 2^17
526 my @pos;
527 my ($recstart,$recend) = recstartrecend();
528 my $recendrecstart = $recend.$recstart;
529 my $fh = ::open_or_exit($file);
530 push(@pos,$headerlen);
531 for(my $pos = $block+$headerlen; $pos < $size; $pos += $block) {
532 my $buf;
533 if($recendrecstart eq "") {
534 # records ends anywhere
535 push(@pos,$pos);
536 } else {
537 # Seek the the block start
538 seek($fh, $pos, 0) || die;
539 while(read($fh,substr($buf,length $buf,0),$dd_block_size)) {
540 if($opt::regexp) {
541 # If match /$recend$recstart/ => Record position
542 if($buf =~ m:^(.*$recend)$recstart:os) {
543 # Start looking for next record _after_ this match
544 $pos += length($1);
545 push(@pos,$pos);
546 last;
548 } else {
549 # If match $recend$recstart => Record position
550 # TODO optimize to only look at the appended
551 # $dd_block_size + len $recendrecstart
552 # TODO increase $dd_block_size to optimize for longer records
553 my $i = index64(\$buf,$recendrecstart);
554 if($i != -1) {
555 # Start looking for next record _after_ this match
556 $pos += $i + length($recend);
557 push(@pos,$pos);
558 last;
564 if($pos[$#pos] != $size) {
565 # Last splitpoint was not at end of the file: add it
566 push(@pos,$size);
568 close $fh;
569 return @pos;
572 sub cat_partial($@) {
573 # Efficient command to copy from byte X to byte Y
574 # Input:
575 # $file = the file to read
576 # ($start, $end, [$start2, $end2, ...]) = start byte, end byte
577 # Returns:
578 # Efficient command to copy $start..$end, $start2..$end2, ... to stdout
579 my($file, @start_end) = @_;
580 my($start, $i);
581 # Convert (start,end) to (start,len)
582 my @start_len = map {
583 if(++$i % 2) { $start = $_; } else { $_-$start }
584 } @start_end;
585 # This can read 7 GB/s using a single core
586 my $script = spacefree
589 while(@ARGV) {
590 sysseek(STDIN,shift,0) || die;
591 $left = shift;
592 while($read =
593 sysread(STDIN,$buf, $left > 131072 ? 131072 : $left)){
594 $left -= $read;
595 syswrite(STDOUT,$buf);
599 return "<". Q($file) .
600 " perl -e '$script' @start_len |";
603 sub group_by_loop($) {
604 # Generate perl code for group-by loop
605 # Insert a $recsep when the column value changes
606 # The column value can be computed with $perexpr
607 my($recsep) = @_;
608 my $groupby = $opt::groupby;
609 my ($col,$perlexpr);
610 if($groupby =~ /^[a-z0-9_]+(\s|$)/i) {
611 # Column name/number (possibly prefix)
612 if($groupby =~ s/^(\d+)\s*//) {
613 # Column number (possibly prefix)
614 $col = $1-1;
615 } elsif($groupby =~ s/^([a-z0-9_]+)\s*//) {
616 # Column name (possibly prefix)
617 my $colname = $1;
618 my($read,$char,@line,$header);
619 # A full line, but nothing more (the rest must be read by the child)
620 do {
621 $read = sysread(STDIN,$char,1);
622 push @line, $char;
623 } while($read and $char ne "\n");
624 # $Global::header used to prepend block to each job
625 $Global::header = join "", @line;
626 # Split on --copsep pattern
627 my @headers = split/$opt::colsep/, $Global::header;
628 my %headers;
629 # Numbered 0..n-1 due to being used by $F[n]
630 @headers{@headers} = (0..$#headers);
631 $col = $headers{$colname};
632 if(not defined $col) {
633 ::error("Column '$colname' $opt::colsep not found in header",keys %headers);
634 ::wait_and_exit(255);
638 # What is left of $groupby is $perlexpr
639 $perlexpr = $groupby;
641 my $loop = ::spacefree(0,'{
642 local $_=COLVALUE;
643 PERLEXPR;
644 if(! defined $last) { $last = $_ }
645 if(($last) ne $_) {
646 print "RECSEP";
647 $last = $_;
649 }');
651 if(defined $col) {
652 $loop =~ s/COLVALUE/\$F[$col]/g;
653 } else {
654 $loop =~ s/COLVALUE/\$_/g;
656 $loop =~ s/PERLEXPR/$perlexpr/g;
657 $loop =~ s/RECSEP/$recsep/g;
658 return $loop;
661 sub group_by_stdin_filter() {
662 # Record separator with 119 bit random value
663 $opt::recend = '';
664 $opt::recstart =
665 join "", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..20);
666 $opt::remove_rec_sep = 1;
667 my @filter;
668 push @filter, "perl";
669 if($opt::groupby =~ /^[a-z0-9_]+(\s|$)/i) {
670 # This is column number/name
671 # Use -a (auto-split)
672 push @filter, "-a";
673 $opt::colsep ||= "\t";
674 my $sep = $opt::colsep;
675 $sep =~ s/\t/\\t/g;
676 $sep =~ s/\"/\\"/g;
677 push @filter, "-F$sep";
679 push @filter, "-pe";
680 push @filter, group_by_loop($opt::recstart);
681 ::debug("init", "@filter\n");
682 open(STDIN, '-|', @filter) || die ("Cannot start @filter");
685 sub spreadstdin() {
686 # read a record
687 # Spawn a job and print the record to it.
688 # Uses:
689 # $Global::blocksize
690 # STDIN
691 # $opt::r
692 # $Global::max_lines
693 # $Global::max_number_of_args
694 # $opt::regexp
695 # $Global::start_no_new_jobs
696 # $opt::roundrobin
697 # %Global::running
698 # Returns: N/A
700 my $buf = "";
701 my ($recstart,$recend) = recstartrecend();
702 my $recendrecstart = $recend.$recstart;
703 my $chunk_number = 1;
704 my $one_time_through;
705 my $two_gb = 2**31-1;
706 my $blocksize = $Global::blocksize;
707 my $in = *STDIN;
708 my $header = find_header(\$buf,$in);
709 while(1) {
710 my $anything_written = 0;
711 my $buflen = length $buf;
712 my $readsize = ($buflen < $blocksize) ? $blocksize-$buflen : $blocksize;
713 # If $buf < $blocksize, append so it is $blocksize long after reading.
714 # Otherwise append a full $blocksize
715 if(not read($in,substr($buf,$buflen,0),$readsize)) {
716 # End-of-file
717 $chunk_number != 1 and last;
718 # Force the while-loop once if everything was read by header reading
719 $one_time_through++ and last;
721 if($opt::r) {
722 # Remove empty lines
723 $buf =~ s/^\s*\n//gm;
724 if(length $buf == 0) {
725 next;
728 if($Global::max_lines and not $Global::max_number_of_args) {
729 # Read n-line records
730 my $n_lines = $buf =~ tr/\n/\n/;
731 my $last_newline_pos = rindex64(\$buf,"\n");
732 # Go backwards until there are full n-line records
733 while($n_lines % $Global::max_lines) {
734 $n_lines--;
735 $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1);
737 # Chop at $last_newline_pos as that is where n-line record ends
738 $anything_written +=
739 write_record_to_pipe($chunk_number++,\$header,\$buf,
740 $recstart,$recend,$last_newline_pos+1);
741 shorten(\$buf,$last_newline_pos+1);
742 } elsif($opt::regexp) {
743 if($Global::max_number_of_args) {
744 # -N => (start..*?end){n}
745 # -L -N => (start..*?end){n*l}
746 my $read_n_lines = -1+
747 $Global::max_number_of_args * ($Global::max_lines || 1);
748 # (?!negative lookahead) is needed to avoid backtracking
749 # See: https://unix.stackexchange.com/questions/439356/
750 while($buf =~
752 # Either recstart or at least one char from start
753 ^(?: $recstart | .)
754 # followed something
755 (?:(?!$recend$recstart).)*?
756 # and then recend
757 $recend
758 # Then n-1 times recstart.*recend
759 (?:$recstart(?:(?!$recend$recstart).)*?$recend){$read_n_lines}
761 # Followed by recstart
762 (?=$recstart)/osx) {
763 $anything_written +=
764 write_record_to_pipe($chunk_number++,\$header,\$buf,
765 $recstart,$recend,length $1);
766 shorten(\$buf,length $1);
768 } else {
769 eof($in) and last;
770 # Find the last recend-recstart in $buf
771 if($buf =~ /^(.*$recend)$recstart.*?$/os) {
772 $anything_written +=
773 write_record_to_pipe($chunk_number++,\$header,\$buf,
774 $recstart,$recend,length $1);
775 shorten(\$buf,length $1);
778 } elsif($opt::csv) {
779 # Read a full CSV record
780 # even number of " + end of line
781 my $last_newline_pos = length $buf;
782 do {
783 # find last EOL
784 $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1);
785 # While uneven "
786 } while((substr($buf,0,$last_newline_pos) =~ y/"/"/)%2
787 and $last_newline_pos >= 0);
788 # Chop at $last_newline_pos as that is where CSV record ends
789 $anything_written +=
790 write_record_to_pipe($chunk_number++,\$header,\$buf,
791 $recstart,$recend,$last_newline_pos+1);
792 shorten(\$buf,$last_newline_pos+1);
793 } else {
794 if($Global::max_number_of_args) {
795 # -N => (start..*?end){n}
796 my $i = 0;
797 my $read_n_lines =
798 $Global::max_number_of_args * ($Global::max_lines || 1);
799 while(($i = nindex(\$buf,$recendrecstart,$read_n_lines)) != -1
801 length $buf) {
802 $i += length $recend; # find the actual splitting location
803 $anything_written +=
804 write_record_to_pipe($chunk_number++,\$header,\$buf,
805 $recstart,$recend,$i);
806 shorten(\$buf,$i);
808 } else {
809 eof($in) and last;
810 # Find the last recend+recstart in $buf
811 my $i = rindex64(\$buf,$recendrecstart);
812 if($i != -1) {
813 $i += length $recend; # find the actual splitting location
814 $anything_written +=
815 write_record_to_pipe($chunk_number++,\$header,\$buf,
816 $recstart,$recend,$i);
817 shorten(\$buf,$i);
821 if(not $anything_written
822 and not eof($in)
823 and not $Global::no_autoexpand_block) {
824 # Nothing was written - maybe the block size < record size?
825 # Increase blocksize exponentially up to 2GB-1 (2GB causes problems)
826 if($blocksize < $two_gb) {
827 my $old_blocksize = $blocksize;
828 $blocksize = ::min(ceil($blocksize * 1.3 + 1), $two_gb);
829 ::warning("A record was longer than $old_blocksize. " .
830 "Increasing to --blocksize $blocksize.");
834 ::debug("init", "Done reading input\n");
836 # If there is anything left in the buffer write it
837 write_record_to_pipe($chunk_number++, \$header, \$buf, $recstart,
838 $recend, length $buf);
840 if($opt::retries) {
841 $Global::no_more_input = 1;
842 # We need to start no more jobs: At most we need to retry some
843 # of the already running.
844 my @running = values %Global::running;
845 # Stop any virgins.
846 for my $job (@running) {
847 if(defined $job and $job->virgin()) {
848 close $job->fh(0,"w");
851 # Wait for running jobs to be done
852 my $sleep =1;
853 while($Global::total_running > 0) {
854 $sleep = ::reap_usleep($sleep);
855 start_more_jobs();
858 $Global::start_no_new_jobs ||= 1;
859 if($opt::roundrobin) {
860 # Flush blocks to roundrobin procs
861 my $sleep = 1;
862 while(%Global::running) {
863 my $something_written = 0;
864 for my $job (values %Global::running) {
865 if($job->block_length()) {
866 $something_written += $job->non_blocking_write();
867 } else {
868 close $job->fh(0,"w");
871 if($something_written) {
872 $sleep = $sleep/2+0.001;
874 $sleep = ::reap_usleep($sleep);
879 sub recstartrecend() {
880 # Uses:
881 # $opt::recstart
882 # $opt::recend
883 # Returns:
884 # $recstart,$recend with default values and regexp conversion
885 my($recstart,$recend);
886 if(defined($opt::recstart) and defined($opt::recend)) {
887 # If both --recstart and --recend is given then both must match
888 $recstart = $opt::recstart;
889 $recend = $opt::recend;
890 } elsif(defined($opt::recstart)) {
891 # If --recstart is given it must match start of record
892 $recstart = $opt::recstart;
893 $recend = "";
894 } elsif(defined($opt::recend)) {
895 # If --recend is given then it must match end of record
896 $recstart = "";
897 $recend = $opt::recend;
898 if($opt::regexp and $recend eq '') {
899 # --regexp --recend ''
900 $recend = '.';
904 if($opt::regexp) {
905 # If $recstart/$recend contains '|'
906 # this should only apply to the regexp
907 $recstart = "(?:".$recstart.")";
908 $recend = "(?:".$recend.")";
909 } else {
910 # $recstart/$recend = printf strings (\n)
911 $recstart =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee;
912 $recend =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee;
914 return ($recstart,$recend);
917 sub nindex($$) {
918 # See if string is in buffer N times
919 # Returns:
920 # the position where the Nth copy is found
921 my ($buf_ref, $str, $n) = @_;
922 my $i = 0;
923 for(1..$n) {
924 $i = index64($buf_ref,$str,$i+1);
925 if($i == -1) { last }
927 return $i;
931 my @robin_queue;
932 my $sleep = 1;
934 sub round_robin_write($$$$$) {
935 # Input:
936 # $header_ref = ref to $header string
937 # $block_ref = ref to $block to be written
938 # $recstart = record start string
939 # $recend = record end string
940 # $endpos = end position of $block
941 # Uses:
942 # %Global::running
943 # Returns:
944 # $something_written = amount of bytes written
945 my ($header_ref,$buffer_ref,$recstart,$recend,$endpos) = @_;
946 my $written = 0;
947 my $block_passed = 0;
948 while(not $block_passed) {
949 # Continue flushing existing buffers
950 # until one is empty and a new block is passed
951 if(@robin_queue) {
952 # Rotate queue once so new blocks get a fair chance
953 # to be given to another block
954 push @robin_queue, shift @robin_queue;
955 } else {
956 # Make a queue to spread the blocks evenly
957 push @robin_queue, (sort { $a->seq() <=> $b->seq() }
958 values %Global::running);
960 do {
961 $written = 0;
962 for my $job (@robin_queue) {
963 if($job->block_length() > 0) {
964 $written += $job->non_blocking_write();
965 } else {
966 $job->set_block($header_ref, $buffer_ref,
967 $endpos, $recstart, $recend);
968 $block_passed = 1;
969 $job->set_virgin(0);
970 $written += $job->non_blocking_write();
971 last;
974 if($written) {
975 $sleep = $sleep/1.5+0.001;
977 # Don't sleep if something is written
978 } while($written and not $block_passed);
979 $sleep = ::reap_usleep($sleep);
981 return $written;
985 sub index64($$$) {
986 # Do index on strings > 2GB.
987 # index in Perl < v5.22 does not work for > 2GB
988 # Input:
989 # as index except STR which must be passed as a reference
990 # Output:
991 # as index
992 my $ref = shift;
993 my $match = shift;
994 my $pos = shift || 0;
995 my $block_size = 2**31-1;
996 my $strlen = length($$ref);
997 # No point in doing extra work if we don't need to.
998 if($strlen < $block_size or $] > 5.022) {
999 return index($$ref, $match, $pos);
1002 my $matchlen = length($match);
1003 my $ret;
1004 my $offset = $pos;
1005 while($offset < $strlen) {
1006 $ret = index(
1007 substr($$ref, $offset, $block_size),
1008 $match, $pos-$offset);
1009 if($ret != -1) {
1010 return $ret + $offset;
1012 $offset += ($block_size - $matchlen - 1);
1014 return -1;
1017 sub rindex64($@) {
1018 # Do rindex on strings > 2GB.
1019 # rindex in Perl < v5.22 does not work for > 2GB
1020 # Input:
1021 # as rindex except STR which must be passed as a reference
1022 # Output:
1023 # as rindex
1024 my $ref = shift;
1025 my $match = shift;
1026 my $pos = shift;
1027 my $block_size = 2**31-1;
1028 my $strlen = length($$ref);
1029 # Default: search from end
1030 $pos = defined $pos ? $pos : $strlen;
1031 # No point in doing extra work if we don't need to.
1032 if($strlen < $block_size) {
1033 return rindex($$ref, $match, $pos);
1036 my $matchlen = length($match);
1037 my $ret;
1038 my $offset = $pos - $block_size + $matchlen;
1039 if($offset < 0) {
1040 # The offset is less than a $block_size
1041 # Set the $offset to 0 and
1042 # Adjust block_size accordingly
1043 $block_size = $block_size + $offset;
1044 $offset = 0;
1046 while($offset >= 0) {
1047 $ret = rindex(
1048 substr($$ref, $offset, $block_size),
1049 $match);
1050 if($ret != -1) {
1051 return $ret + $offset;
1053 $offset -= ($block_size - $matchlen - 1);
1055 return -1;
1058 sub shorten($$) {
1059 # Do: substr($buf,0,$i) = "";
1060 # Some Perl versions do not support $i > 2GB, so do this in 2GB chunks
1061 # Input:
1062 # $buf_ref = \$buf
1063 # $i = position to shorten to
1064 # Returns: N/A
1065 my ($buf_ref, $i) = @_;
1066 my $two_gb = 2**31-1;
1067 while($i > $two_gb) {
1068 substr($$buf_ref,0,$two_gb) = "";
1069 $i -= $two_gb;
1071 substr($$buf_ref,0,$i) = "";
1074 sub write_record_to_pipe($$$$$$) {
1075 # Fork then
1076 # Write record from pos 0 .. $endpos to pipe
1077 # Input:
1078 # $chunk_number = sequence number - to see if already run
1079 # $header_ref = reference to header string to prepend
1080 # $buffer_ref = reference to record to write
1081 # $recstart = start string of record
1082 # $recend = end string of record
1083 # $endpos = position in $buffer_ref where record ends
1084 # Uses:
1085 # $Global::job_already_run
1086 # $opt::roundrobin
1087 # @Global::virgin_jobs
1088 # Returns:
1089 # Number of chunks written (0 or 1)
1090 my ($chunk_number, $header_ref, $buffer_ref,
1091 $recstart, $recend, $endpos) = @_;
1092 if($endpos == 0) { return 0; }
1093 if(vec($Global::job_already_run,$chunk_number,1)) { return 1; }
1094 if($opt::roundrobin) {
1095 # Write the block to one of the already running jobs
1096 return round_robin_write($header_ref, $buffer_ref,
1097 $recstart, $recend, $endpos);
1099 # If no virgin found, backoff
1100 my $sleep = 0.0001; # 0.01 ms - better performance on highend
1101 while(not @Global::virgin_jobs) {
1102 ::debug("pipe", "No virgin jobs");
1103 $sleep = ::reap_usleep($sleep);
1104 # Jobs may not be started because of loadavg
1105 # or too little time between each ssh login
1106 # or retrying failed jobs.
1107 start_more_jobs();
1109 my $job = shift @Global::virgin_jobs;
1110 # Job is no longer virgin
1111 $job->set_virgin(0);
1113 if($opt::retries) {
1114 # Copy $buffer[0..$endpos] to $job->{'block'}
1115 # Remove rec_sep
1116 # Run $job->add_transfersize
1117 $job->set_block($header_ref, $buffer_ref, $endpos,
1118 $recstart, $recend);
1119 if(fork()) {
1120 # Skip
1121 } else {
1122 $job->write($job->block_ref());
1123 close $job->fh(0,"w");
1124 exit(0);
1126 } else {
1127 # We ignore the removed rec_sep which is technically wrong.
1128 $job->add_transfersize($endpos + length $$header_ref);
1129 if(fork()) {
1130 # Skip
1131 } else {
1132 # Chop of at $endpos as we do not know how many rec_sep will
1133 # be removed.
1134 substr($$buffer_ref,$endpos,length $$buffer_ref) = "";
1135 # Remove rec_sep
1136 if($opt::remove_rec_sep) {
1137 Job::remove_rec_sep($buffer_ref, $recstart, $recend);
1139 $job->write($header_ref);
1140 $job->write($buffer_ref);
1141 close $job->fh(0,"w");
1142 exit(0);
1145 close $job->fh(0,"w");
1146 return 1;
1150 sub __SEM_MODE__() {}
1153 sub acquire_semaphore() {
1154 # Acquires semaphore. If needed: spawns to the background
1155 # Uses:
1156 # @Global::host
1157 # Returns:
1158 # The semaphore to be released when jobs is complete
1159 $Global::host{':'} = SSHLogin->new(":");
1160 my $sem = Semaphore->new($Semaphore::name,
1161 $Global::host{':'}->max_jobs_running());
1162 $sem->acquire();
1163 if($Semaphore::fg) {
1164 # skip
1165 } else {
1166 if(fork()) {
1167 exit(0);
1168 } else {
1169 # If run in the background, the PID will change
1170 $sem->pid_change();
1173 return $sem;
1177 sub __PARSE_OPTIONS__() {}
1180 sub options_hash() {
1181 # Returns:
1182 # %hash = the GetOptions config
1183 return
1184 ("debug|D=s" => \$opt::D,
1185 "xargs" => \$opt::xargs,
1186 "m" => \$opt::m,
1187 "X" => \$opt::X,
1188 "v" => \@opt::v,
1189 "sql=s" => \$opt::retired,
1190 "sqlmaster=s" => \$opt::sqlmaster,
1191 "sqlworker=s" => \$opt::sqlworker,
1192 "sqlandworker=s" => \$opt::sqlandworker,
1193 "joblog|jl=s" => \$opt::joblog,
1194 "results|result|res=s" => \$opt::results,
1195 "resume" => \$opt::resume,
1196 "resume-failed|resumefailed" => \$opt::resume_failed,
1197 "retry-failed|retryfailed" => \$opt::retry_failed,
1198 "silent" => \$opt::silent,
1199 "keep-order|keeporder|k" => \$opt::keeporder,
1200 "no-keep-order|nokeeporder|nok|no-k" => \$opt::nokeeporder,
1201 "group" => \$opt::group,
1202 "g" => \$opt::retired,
1203 "ungroup|u" => \$opt::ungroup,
1204 "linebuffer|linebuffered|line-buffer|line-buffered|lb"
1205 => \$opt::linebuffer,
1206 "tmux" => \$opt::tmux,
1207 "tmuxpane" => \$opt::tmuxpane,
1208 "null|0" => \$opt::null,
1209 "quote|q" => \$opt::q,
1210 # Replacement strings
1211 "parens=s" => \$opt::parens,
1212 "rpl=s" => \@opt::rpl,
1213 "plus" => \$opt::plus,
1214 "I=s" => \$opt::I,
1215 "extensionreplace|er=s" => \$opt::U,
1216 "U=s" => \$opt::retired,
1217 "basenamereplace|bnr=s" => \$opt::basenamereplace,
1218 "dirnamereplace|dnr=s" => \$opt::dirnamereplace,
1219 "basenameextensionreplace|bner=s" => \$opt::basenameextensionreplace,
1220 "seqreplace=s" => \$opt::seqreplace,
1221 "slotreplace=s" => \$opt::slotreplace,
1222 "jobs|j=s" => \$opt::jobs,
1223 "delay=s" => \$opt::delay,
1224 "sshdelay=f" => \$opt::sshdelay,
1225 "load=s" => \$opt::load,
1226 "noswap" => \$opt::noswap,
1227 "max-line-length-allowed" => \$opt::max_line_length_allowed,
1228 "number-of-cpus" => \$opt::number_of_cpus,
1229 "number-of-sockets" => \$opt::number_of_sockets,
1230 "number-of-cores" => \$opt::number_of_cores,
1231 "number-of-threads" => \$opt::number_of_threads,
1232 "use-sockets-instead-of-threads"
1233 => \$opt::use_sockets_instead_of_threads,
1234 "use-cores-instead-of-threads"
1235 => \$opt::use_cores_instead_of_threads,
1236 "use-cpus-instead-of-cores" => \$opt::use_cpus_instead_of_cores,
1237 "shellquote|shell_quote|shell-quote" => \@opt::shellquote,
1238 "nice=i" => \$opt::nice,
1239 "tag" => \$opt::tag,
1240 "tagstring|tag-string=s" => \$opt::tagstring,
1241 "onall" => \$opt::onall,
1242 "nonall" => \$opt::nonall,
1243 "filter-hosts|filterhosts|filter-host" => \$opt::filter_hosts,
1244 "sshlogin|S=s" => \@opt::sshlogin,
1245 "sshloginfile|slf=s" => \@opt::sshloginfile,
1246 "controlmaster|M" => \$opt::controlmaster,
1247 "ssh=s" => \$opt::ssh,
1248 "transfer-file|transferfile|transfer-files|transferfiles|tf=s"
1249 => \@opt::transfer_files,
1250 "return=s" => \@opt::return,
1251 "trc=s" => \@opt::trc,
1252 "transfer" => \$opt::transfer,
1253 "cleanup" => \$opt::cleanup,
1254 "basefile|bf=s" => \@opt::basefile,
1255 "B=s" => \$opt::retired,
1256 "ctrlc|ctrl-c" => \$opt::retired,
1257 "noctrlc|no-ctrlc|no-ctrl-c" => \$opt::retired,
1258 "workdir|work-dir|wd=s" => \$opt::workdir,
1259 "W=s" => \$opt::retired,
1260 "rsync-opts|rsyncopts=s" => \$opt::rsync_opts,
1261 "tmpdir|tempdir=s" => \$opt::tmpdir,
1262 "use-compress-program|compress-program=s" => \$opt::compress_program,
1263 "use-decompress-program|decompress-program=s"
1264 => \$opt::decompress_program,
1265 "compress" => \$opt::compress,
1266 "tty" => \$opt::tty,
1267 "T" => \$opt::retired,
1268 "H=i" => \$opt::retired,
1269 "dry-run|dryrun|dr" => \$opt::dryrun,
1270 "progress" => \$opt::progress,
1271 "eta" => \$opt::eta,
1272 "bar" => \$opt::bar,
1273 "shuf" => \$opt::shuf,
1274 "arg-sep|argsep=s" => \$opt::arg_sep,
1275 "arg-file-sep|argfilesep=s" => \$opt::arg_file_sep,
1276 "trim=s" => \$opt::trim,
1277 "env=s" => \@opt::env,
1278 "recordenv|record-env" => \$opt::record_env,
1279 "session" => \$opt::session,
1280 "plain" => \$opt::plain,
1281 "profile|J=s" => \@opt::profile,
1282 "pipe|spreadstdin" => \$opt::pipe,
1283 "robin|round-robin|roundrobin" => \$opt::roundrobin,
1284 "recstart=s" => \$opt::recstart,
1285 "recend=s" => \$opt::recend,
1286 "regexp|regex" => \$opt::regexp,
1287 "remove-rec-sep|removerecsep|rrs" => \$opt::remove_rec_sep,
1288 "files|output-as-files|outputasfiles" => \$opt::files,
1289 "block|block-size|blocksize=s" => \$opt::blocksize,
1290 "tollef" => \$opt::tollef,
1291 "gnu" => \$opt::gnu,
1292 "link|xapply" => \$opt::link,
1293 "linkinputsource|xapplyinputsource=i" => \@opt::linkinputsource,
1294 # Before changing this line, please read
1295 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
1296 "bibtex|citation" => \$opt::citation,
1297 "wc|willcite|will-cite|nn|nonotice|no-notice" => \$opt::willcite,
1298 # Termination and retries
1299 "halt-on-error|halt=s" => \$opt::halt,
1300 "limit=s" => \$opt::limit,
1301 "memfree=s" => \$opt::memfree,
1302 "retries=s" => \$opt::retries,
1303 "timeout=s" => \$opt::timeout,
1304 "termseq|term-seq=s" => \$opt::termseq,
1305 # xargs-compatibility - implemented, man, testsuite
1306 "max-procs|P=s" => \$opt::jobs,
1307 "delimiter|d=s" => \$opt::d,
1308 "max-chars|s=i" => \$opt::max_chars,
1309 "arg-file|a=s" => \@opt::a,
1310 "no-run-if-empty|r" => \$opt::r,
1311 "replace|i:s" => \$opt::i,
1312 "E=s" => \$opt::eof,
1313 "eof|e:s" => \$opt::eof,
1314 "max-args|maxargs|n=i" => \$opt::max_args,
1315 "max-replace-args|N=i" => \$opt::max_replace_args,
1316 "colsep|col-sep|C=s" => \$opt::colsep,
1317 "csv"=> \$opt::csv,
1318 "help|h" => \$opt::help,
1319 "L=f" => \$opt::L,
1320 "max-lines|l:f" => \$opt::max_lines,
1321 "interactive|p" => \$opt::interactive,
1322 "verbose|t" => \$opt::verbose,
1323 "version|V" => \$opt::version,
1324 "minversion|min-version=i" => \$opt::minversion,
1325 "show-limits|showlimits" => \$opt::show_limits,
1326 "exit|x" => \$opt::x,
1327 # Semaphore
1328 "semaphore" => \$opt::semaphore,
1329 "semaphoretimeout|st=i" => \$opt::semaphoretimeout,
1330 "semaphorename|id=s" => \$opt::semaphorename,
1331 "fg" => \$opt::fg,
1332 "bg" => \$opt::bg,
1333 "wait" => \$opt::wait,
1334 # Shebang #!/usr/bin/parallel --shebang
1335 "shebang|hashbang" => \$opt::shebang,
1336 "internal-pipe-means-argfiles"
1337 => \$opt::internal_pipe_means_argfiles,
1338 "Y" => \$opt::retired,
1339 "skip-first-line" => \$opt::skip_first_line,
1340 "bug" => \$opt::bug,
1341 "header=s" => \$opt::header,
1342 "cat" => \$opt::cat,
1343 "fifo" => \$opt::fifo,
1344 "pipepart|pipe-part" => \$opt::pipepart,
1345 "tee" => \$opt::tee,
1346 "shard=s" => \$opt::shard,
1347 "groupby|group-by=s" => \$opt::groupby,
1348 "hgrp|hostgrp|hostgroup|hostgroups" => \$opt::hostgroups,
1349 "embed" => \$opt::embed,
1353 sub get_options_from_array($@) {
1354 # Run GetOptions on @array
1355 # Input:
1356 # $array_ref = ref to @ARGV to parse
1357 # @keep_only = Keep only these options
1358 # Uses:
1359 # @ARGV
1360 # Returns:
1361 # true if parsing worked
1362 # false if parsing failed
1363 # @$array_ref is changed
1364 my ($array_ref, @keep_only) = @_;
1365 if(not @$array_ref) {
1366 # Empty array: No need to look more at that
1367 return 1;
1369 # A bit of shuffling of @ARGV needed as GetOptionsFromArray is not
1370 # supported everywhere
1371 my @save_argv;
1372 my $this_is_ARGV = (\@::ARGV == $array_ref);
1373 if(not $this_is_ARGV) {
1374 @save_argv = @::ARGV;
1375 @::ARGV = @{$array_ref};
1377 # If @keep_only set: Ignore all values except @keep_only
1378 my %options = options_hash();
1379 if(@keep_only) {
1380 my (%keep,@dummy);
1381 @keep{@keep_only} = @keep_only;
1382 for my $k (grep { not $keep{$_} } keys %options) {
1383 # Store the value of the option in @dummy
1384 $options{$k} = \@dummy;
1387 my $retval = GetOptions(%options);
1388 if(not $this_is_ARGV) {
1389 @{$array_ref} = @::ARGV;
1390 @::ARGV = @save_argv;
1392 return $retval;
1395 sub parse_options(@) {
1396 # Returns: N/A
1397 init_globals();
1398 my @argv_before = @ARGV;
1399 @ARGV = read_options();
1401 # Before changing this line, please read
1402 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
1403 if(defined $opt::citation) {
1404 citation(\@argv_before,\@ARGV);
1405 wait_and_exit(0);
1407 # no-* overrides *
1408 if($opt::nokeeporder) { $opt::keeporder = undef; }
1410 if(@opt::v) { $Global::verbose = $#opt::v+1; } # Convert -v -v to v=2
1411 if($opt::bug) { ::die_bug("test-bug"); }
1412 $Global::debug = $opt::D;
1413 $Global::shell = $ENV{'PARALLEL_SHELL'} || parent_shell($$)
1414 || $ENV{'SHELL'} || "/bin/sh";
1415 if(not -x $Global::shell and not which($Global::shell)) {
1416 ::error("Shell '$Global::shell' not found.");
1417 wait_and_exit(255);
1419 ::debug("init","Global::shell $Global::shell\n");
1420 $Global::cshell = $Global::shell =~ m:(/[-a-z]*)?csh:;
1421 if(defined $opt::X) { $Global::ContextReplace = 1; }
1422 if(defined $opt::silent) { $Global::verbose = 0; }
1423 if(defined $opt::null) { $/ = "\0"; }
1424 if(defined $opt::d) { $/ = unquote_printf($opt::d) }
1425 if(defined $opt::tagstring) {
1426 $opt::tagstring = unquote_printf($opt::tagstring);
1428 if(defined $opt::interactive) { $Global::interactive = $opt::interactive; }
1429 if(defined $opt::q) { $Global::quoting = 1; }
1430 if(defined $opt::r) { $Global::ignore_empty = 1; }
1431 if(defined $opt::verbose) { $Global::stderr_verbose = 1; }
1432 parse_replacement_string_options();
1433 if(defined $opt::eof) { $Global::end_of_file_string = $opt::eof; }
1434 if(defined $opt::max_args) {
1435 $Global::max_number_of_args = $opt::max_args;
1437 if(defined $opt::timeout) {
1438 $Global::timeoutq = TimeoutQueue->new($opt::timeout);
1440 if(defined $opt::tmpdir) { $ENV{'TMPDIR'} = $opt::tmpdir; }
1441 $ENV{'PARALLEL_RSYNC_OPTS'} = $opt::rsync_opts ||
1442 $ENV{'PARALLEL_RSYNC_OPTS'} || '-rlDzR';
1443 $opt::nice ||= 0;
1444 if(defined $opt::help) { usage(); exit(0); }
1445 if(defined $opt::embed) { embed(); exit(0); }
1446 if(defined $opt::sqlandworker) {
1447 $opt::sqlmaster = $opt::sqlworker = $opt::sqlandworker;
1449 if(defined $opt::tmuxpane) { $opt::tmux = $opt::tmuxpane; }
1450 if(defined $opt::colsep) { $Global::trim = 'lr'; }
1451 if(defined $opt::csv) {
1452 $Global::use{"Text::CSV"} ||= eval "use Text::CSV; 1;";
1453 $opt::colsep = defined $opt::colsep ? $opt::colsep : ",";
1454 my $csv_setting = { binary => 1, sep_char => $opt::colsep };
1455 my $sep = $csv_setting->{sep_char};
1456 $Global::csv = Text::CSV->new($csv_setting)
1457 or die "Cannot use CSV: ".Text::CSV->error_diag ();
1459 if(defined $opt::header) {
1460 $opt::colsep = defined $opt::colsep ? $opt::colsep : "\t";
1462 if(defined $opt::trim) { $Global::trim = $opt::trim; }
1463 if(defined $opt::arg_sep) { $Global::arg_sep = $opt::arg_sep; }
1464 if(defined $opt::arg_file_sep) {
1465 $Global::arg_file_sep = $opt::arg_file_sep;
1467 if(defined $opt::number_of_sockets) {
1468 print SSHLogin::no_of_sockets(),"\n"; wait_and_exit(0);
1470 if(defined $opt::number_of_cpus) {
1471 print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0);
1473 if(defined $opt::number_of_cores) {
1474 print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0);
1476 if(defined $opt::number_of_threads) {
1477 print SSHLogin::no_of_threads(),"\n"; wait_and_exit(0);
1479 if(defined $opt::max_line_length_allowed) {
1480 print Limits::Command::real_max_length(),"\n"; wait_and_exit(0);
1482 if(defined $opt::version) { version(); wait_and_exit(0); }
1483 if(defined $opt::record_env) { record_env(); wait_and_exit(0); }
1484 if(defined $opt::show_limits) { show_limits(); }
1485 if(@opt::sshlogin) { @Global::sshlogin = @opt::sshlogin; }
1486 if(@opt::sshloginfile) { read_sshloginfiles(@opt::sshloginfile); }
1487 if(@opt::return) { push @Global::ret_files, @opt::return; }
1488 if($opt::transfer) {
1489 push @Global::transfer_files, $opt::i || $opt::I || "{}";
1491 push @Global::transfer_files, @opt::transfer_files;
1492 if(not defined $opt::recstart and
1493 not defined $opt::recend) { $opt::recend = "\n"; }
1494 $Global::blocksize = multiply_binary_prefix($opt::blocksize || "1M");
1495 if($Global::blocksize > 2**31-1 and not $opt::pipepart) {
1496 warning("--blocksize >= 2G causes problems. Using 2G-1.");
1497 $Global::blocksize = 2**31-1;
1499 if($^O eq "cygwin" and
1500 ($opt::pipe or $opt::pipepart or $opt::roundrobin)
1501 and $Global::blocksize > 65535) {
1502 warning("--blocksize >= 64K causes problems on Cygwin.");
1504 $opt::memfree = multiply_binary_prefix($opt::memfree);
1505 check_invalid_option_combinations();
1506 if((defined $opt::fifo or defined $opt::cat)
1507 and not $opt::pipepart) {
1508 $opt::pipe = 1;
1510 if(defined $opt::minversion) {
1511 print $Global::version,"\n";
1512 if($Global::version < $opt::minversion) {
1513 wait_and_exit(255);
1514 } else {
1515 wait_and_exit(0);
1518 if(not defined $opt::delay) {
1519 # Set --delay to --sshdelay if not set
1520 $opt::delay = $opt::sshdelay;
1522 $opt::delay = multiply_time_units($opt::delay);
1523 if($opt::compress_program) {
1524 $opt::compress = 1;
1525 $opt::decompress_program ||= $opt::compress_program." -dc";
1528 if(defined $opt::results) {
1529 # Is the output a dir or CSV-file?
1530 if($opt::results =~ /\.csv$/i) {
1531 # CSV with , as separator
1532 $Global::csvsep = ",";
1533 $Global::membuffer ||= 1;
1534 } elsif($opt::results =~ /\.tsv$/i) {
1535 # CSV with TAB as separator
1536 $Global::csvsep = "\t";
1537 $Global::membuffer ||= 1;
1540 if($opt::compress) {
1541 my ($compress, $decompress) = find_compression_program();
1542 $opt::compress_program ||= $compress;
1543 $opt::decompress_program ||= $decompress;
1544 if(($opt::results and not $Global::csvsep) or $opt::files) {
1545 # No need for decompressing
1546 $opt::decompress_program = "cat >/dev/null";
1549 if(defined $opt::dryrun) {
1550 # Force grouping due to bug #51039: --dry-run --timeout 3600 -u breaks
1551 $opt::ungroup = 0;
1552 $opt::group = 1;
1554 if(defined $opt::nonall) {
1555 # Append a dummy empty argument if there are no arguments
1556 # on the command line to avoid reading from STDIN.
1557 # arg_sep = random 50 char
1558 # \0noarg => nothing (not the empty string)
1559 $Global::arg_sep = join "",
1560 map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..50);
1561 push @ARGV, $Global::arg_sep, "\0noarg";
1563 if(defined $opt::tee) {
1564 if(not defined $opt::jobs) {
1565 $opt::jobs = 0;
1568 if(defined $opt::tty) {
1569 # Defaults for --tty: -j1 -u
1570 # Can be overridden with -jXXX -g
1571 if(not defined $opt::jobs) {
1572 $opt::jobs = 1;
1574 if(not defined $opt::group) {
1575 $opt::ungroup = 1;
1578 if(@opt::trc) {
1579 push @Global::ret_files, @opt::trc;
1580 if(not @Global::transfer_files) {
1581 # Defaults to --transferfile {}
1582 push @Global::transfer_files, $opt::i || $opt::I || "{}";
1584 $opt::cleanup = 1;
1586 if(defined $opt::max_lines) {
1587 if($opt::max_lines eq "-0") {
1588 # -l -0 (swallowed -0)
1589 $opt::max_lines = 1;
1590 $opt::null = 1;
1591 $/ = "\0";
1592 } elsif ($opt::max_lines == 0) {
1593 # If not given (or if 0 is given) => 1
1594 $opt::max_lines = 1;
1596 $Global::max_lines = $opt::max_lines;
1597 if(not $opt::pipe) {
1598 # --pipe -L means length of record - not max_number_of_args
1599 $Global::max_number_of_args ||= $Global::max_lines;
1603 # Read more than one arg at a time (-L, -N)
1604 if(defined $opt::L) {
1605 $Global::max_lines = $opt::L;
1606 if(not $opt::pipe) {
1607 # --pipe -L means length of record - not max_number_of_args
1608 $Global::max_number_of_args ||= $Global::max_lines;
1611 if(defined $opt::max_replace_args) {
1612 $Global::max_number_of_args = $opt::max_replace_args;
1613 $Global::ContextReplace = 1;
1615 if((defined $opt::L or defined $opt::max_replace_args)
1617 not ($opt::xargs or $opt::m)) {
1618 $Global::ContextReplace = 1;
1620 if(defined $opt::tag and not defined $opt::tagstring) {
1621 # Default = {}
1622 $opt::tagstring = $Global::parensleft.$Global::parensright;
1624 if(grep /^$Global::arg_sep\+?$|^$Global::arg_file_sep\+?$/o, @ARGV) {
1625 # Deal with ::: :::+ :::: and ::::+
1626 @ARGV = read_args_from_command_line();
1628 parse_semaphore();
1630 if(defined $opt::eta) { $opt::progress = $opt::eta; }
1631 if(defined $opt::bar) { $opt::progress = $opt::bar; }
1633 # Funding a free software project is hard. GNU Parallel is no
1634 # exception. On top of that it seems the less visible a project
1635 # is, the harder it is to get funding. And the nature of GNU
1636 # Parallel is that it will never be seen by "the guy with the
1637 # checkbook", but only by the people doing the actual work.
1639 # This problem has been covered by others - though no solution has
1640 # been found:
1641 # https://www.slideshare.net/NadiaEghbal/consider-the-maintainer
1642 # https://www.numfocus.org/blog/why-is-numpy-only-now-getting-funded/
1644 # If you want GNU Parallel to be maintained in the future, and not
1645 # just wither away like so many other free software tools, you
1646 # need to help finance the development.
1648 # The citation notice is a simple way of doing so, as citations
1649 # makes it possible to me to get a job where I can maintain GNU
1650 # Parallel as part of the job.
1652 # This means you can help financing development
1654 # WITHOUT PAYING A SINGLE CENT!
1656 # Before implementing the citation notice it was discussed with
1657 # the users:
1658 # https://lists.gnu.org/archive/html/parallel/2013-11/msg00006.html
1660 # Having to spend 10 seconds on running 'parallel --citation' once
1661 # is no doubt not an ideal solution, but no one has so far come up
1662 # with an ideal solution - neither for funding GNU Parallel nor
1663 # other free software.
1665 # If you believe you have the perfect solution, you should try it
1666 # out, and if it works, you should post it on the email
1667 # list. Ideas that will cost work and which have not been tested
1668 # are, however, unlikely to be prioritized.
1670 # Please note that GPL version 3 gives you the right to fork GNU
1671 # Parallel under a new name, but it does not give you the right to
1672 # distribute modified copies with the citation notice disabled in
1673 # a way where the software can be confused with GNU Parallel. To
1674 # do that you need to be the owner of the GNU Parallel
1675 # trademark. The xt:Commerce case shows this.
1677 # Description of the xt:Commerce case in OLG Duesseldorf
1678 # http://www.inta.org/INTABulletin/Pages/GERMANYGeneralPublicLicenseDoesNotPermitUseofThird-PartyTrademarksforAdvertisingModifiedVersionsofOpen-SourceSoftware.aspx
1679 # https://web.archive.org/web/20180715073746/http://www.inta.org/INTABulletin/Pages/GERMANYGeneralPublicLicenseDoesNotPermitUseofThird-PartyTrademarksforAdvertisingModifiedVersionsofOpen-SourceSoftware.aspx
1681 # The verdict in German
1682 # https://www.admody.com/urteilsdatenbank/cafe6fdaeed3/OLG-Duesseldorf_Urteil_vom_28-September-2010_Az_I-20-U-41-09
1683 # https://web.archive.org/web/20180715073717/https://www.admody.com/urteilsdatenbank/cafe6fdaeed3/OLG-Duesseldorf_Urteil_vom_28-September-2010_Az_I-20-U-41-09
1685 # Other free software limiting derivates by the same name
1686 # https://en.wikipedia.org/wiki/Red_Hat_Enterprise_Linux_derivatives#Legal_aspects
1687 # https://tm.joomla.org/trademark-faq.html
1688 # https://www.mozilla.org/en-US/foundation/trademarks/faq/
1690 # Running 'parallel --citation' one single time takes less than 10
1691 # seconds, and will silence the citation notice for future
1692 # runs. If that is too much trouble for you, why not use one of
1693 # the alternatives instead?
1694 # See a list in: 'man parallel_alternatives'
1696 # If you are an honest person please read the above before
1697 # changing this line.
1698 citation_notice();
1700 parse_halt();
1702 if($ENV{'PARALLEL_ENV'}) {
1703 # Read environment and set $Global::parallel_env
1704 # Must be done before is_acceptable_command_line_length()
1705 my $penv = $ENV{'PARALLEL_ENV'};
1706 # unset $PARALLEL_ENV: It should not be given to children
1707 # because it takes up a lot of env space
1708 delete $ENV{'PARALLEL_ENV'};
1709 if(-e $penv) {
1710 # This is a file/fifo: Replace envvar with content of file
1711 open(my $parallel_env, "<", $penv) ||
1712 ::die_bug("Cannot read parallel_env from $penv");
1713 local $/; # Put <> in slurp mode
1714 $penv = <$parallel_env>;
1715 close $parallel_env;
1717 # Map \001 to \n to make it easer to quote \n in $PARALLEL_ENV
1718 $penv =~ s/\001/\n/g;
1719 if($penv =~ /\0/) {
1720 ::warning('\0 (NUL) in environment is not supported');
1722 $Global::parallel_env = $penv;
1725 parse_sshlogin();
1727 if(remote_hosts() and ($opt::X or $opt::m or $opt::xargs)) {
1728 # As we do not know the max line length on the remote machine
1729 # long commands generated by xargs may fail
1730 # If $opt::max_replace_args is set, it is probably safe
1731 ::warning("Using -X or -m with --sshlogin may fail.");
1734 if(not defined $opt::jobs) { $opt::jobs = "100%"; }
1735 open_joblog();
1736 open_csv();
1737 if($opt::sqlmaster or $opt::sqlworker) {
1738 $Global::sql = SQL->new($opt::sqlmaster || $opt::sqlworker);
1740 if($opt::sqlworker) { $Global::membuffer ||= 1; }
1743 sub check_invalid_option_combinations() {
1744 if(defined $opt::timeout and
1745 $opt::timeout !~ /^\d+(\.\d+)?%?$|^(\d+(\.\d+)?[dhms])+$/i) {
1746 ::error("--timeout must be seconds or percentage.");
1747 wait_and_exit(255);
1749 if(defined $opt::fifo and defined $opt::cat) {
1750 ::error("--fifo cannot be combined with --cat.");
1751 ::wait_and_exit(255);
1753 if(defined $opt::retries and defined $opt::roundrobin) {
1754 ::error("--retries cannot be combined with --roundrobin.");
1755 ::wait_and_exit(255);
1757 if(defined $opt::pipepart and
1758 (defined $opt::L or defined $opt::max_lines
1759 or defined $opt::max_replace_args)) {
1760 ::error("--pipepart is incompatible with --max-replace-args, ".
1761 "--max-lines, and -L.");
1762 wait_and_exit(255);
1764 if(defined $opt::group and $opt::ungroup) {
1765 ::error("--group cannot be combined with --ungroup.");
1766 ::wait_and_exit(255);
1768 if(defined $opt::group and $opt::linebuffer) {
1769 ::error("--group cannot be combined with --line-buffer.");
1770 ::wait_and_exit(255);
1772 if(defined $opt::ungroup and $opt::linebuffer) {
1773 ::error("--ungroup cannot be combined with --line-buffer.");
1774 ::wait_and_exit(255);
1776 if(defined $opt::tollef and not $opt::gnu) {
1777 ::error("--tollef has been retired.",
1778 "Remove --tollef or use --gnu to override --tollef.");
1779 ::wait_and_exit(255);
1781 if(defined $opt::retired) {
1782 ::error("-g has been retired. Use --group.",
1783 "-B has been retired. Use --bf.",
1784 "-T has been retired. Use --tty.",
1785 "-U has been retired. Use --er.",
1786 "-W has been retired. Use --wd.",
1787 "-Y has been retired. Use --shebang.",
1788 "-H has been retired. Use --halt.",
1789 "--sql has been retired. Use --sqlmaster.",
1790 "--ctrlc has been retired.",
1791 "--noctrlc has been retired.");
1792 ::wait_and_exit(255);
1794 if($opt::groupby) {
1795 if(not $opt::pipe) {
1796 $opt::pipe = 1;
1798 if($opt::remove_rec_sep) {
1799 ::error("--remove-rec-sep is not compatible with --groupby");
1800 ::wait_and_exit(255);
1802 if($opt::recstart) {
1803 ::error("--recstart is not compatible with --groupby");
1804 ::wait_and_exit(255);
1806 if($opt::recend ne "\n") {
1807 ::error("--recend is not compatible with --groupby");
1808 ::wait_and_exit(255);
1810 if($opt::pipepart) {
1811 # TODO This may be possible to do later
1812 # Finding split points might be a bitch though
1813 ::error("--pipepart is not compatible with --groupby");
1814 ::wait_and_exit(255);
1819 sub init_globals() {
1820 # Defaults:
1821 $Global::version = 20190504;
1822 $Global::progname = 'parallel';
1823 $Global::infinity = 2**31;
1824 $Global::debug = 0;
1825 $Global::verbose = 0;
1826 $Global::quoting = 0;
1827 $Global::total_completed = 0;
1828 # Read only table with default --rpl values
1829 %Global::replace =
1831 '{}' => '',
1832 '{#}' => '1 $_=$job->seq()',
1833 '{%}' => '1 $_=$job->slot()',
1834 '{/}' => 's:.*/::',
1835 '{//}' =>
1836 ('$Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; '.
1837 '$_ = dirname($_);'),
1838 '{/.}' => 's:.*/::; s:\.[^/.]+$::;',
1839 '{.}' => 's:\.[^/.]+$::',
1841 %Global::plus =
1843 # {} = {+/}/{/}
1844 # = {.}.{+.} = {+/}/{/.}.{+.}
1845 # = {..}.{+..} = {+/}/{/..}.{+..}
1846 # = {...}.{+...} = {+/}/{/...}.{+...}
1847 '{+/}' => 's:/[^/]*$::',
1848 '{+.}' => 's:.*\.::',
1849 '{+..}' => 's:.*\.([^.]*\.):$1:',
1850 '{+...}' => 's:.*\.([^.]*\.[^.]*\.):$1:',
1851 '{..}' => 's:\.[^/.]+$::; s:\.[^/.]+$::',
1852 '{...}' => 's:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::',
1853 '{/..}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::',
1854 '{/...}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::',
1855 '{choose_k}' => 'for $t (2..$#arg){ if($arg[$t-1] ge $arg[$t]) { skip() } }',
1856 # {##} = number of jobs
1857 '{##}' => '$_=total_jobs()',
1858 # Bash ${a:-myval}
1859 '{:-([^}]+?)}' => '$_ ||= $$1',
1860 # Bash ${a:2}
1861 '{:(\d+?)}' => 'substr($_,0,$$1) = ""',
1862 # Bash ${a:2:3}
1863 '{:(\d+?):(\d+?)}' => '$_ = substr($_,$$1,$$2);',
1864 # Bash ${a#bc}
1865 '{#([^#}][^}]*?)}' => 's/^$$1//;',
1866 # Bash ${a%def}
1867 '{%([^}]+?)}' => 's/$$1$//;',
1868 # Bash ${a/def/ghi} ${a/def/}
1869 '{/([^}]+?)/([^}]*?)}' => 's/$$1/$$2/;',
1870 # Bash ${a^a}
1871 '{^([^}]+?)}' => 's/^($$1)/uc($1)/e;',
1872 # Bash ${a^^a}
1873 '{^^([^}]+?)}' => 's/($$1)/uc($1)/eg;',
1874 # Bash ${a,A}
1875 '{,([^}]+?)}' => 's/^($$1)/lc($1)/e;',
1876 # Bash ${a,,A}
1877 '{,,([^}]+?)}' => 's/($$1)/lc($1)/eg;',
1879 # Modifiable copy of %Global::replace
1880 %Global::rpl = %Global::replace;
1881 $/ = "\n";
1882 $Global::ignore_empty = 0;
1883 $Global::interactive = 0;
1884 $Global::stderr_verbose = 0;
1885 $Global::default_simultaneous_sshlogins = 9;
1886 $Global::exitstatus = 0;
1887 $Global::arg_sep = ":::";
1888 $Global::arg_file_sep = "::::";
1889 $Global::trim = 'n';
1890 $Global::max_jobs_running = 0;
1891 $Global::job_already_run = '';
1892 $ENV{'TMPDIR'} ||= "/tmp";
1893 $ENV{'OLDPWD'} = $ENV{'PWD'};
1894 if(not $ENV{HOME}) {
1895 # $ENV{HOME} is sometimes not set if called from PHP
1896 ::warning("\$HOME not set. Using /tmp.");
1897 $ENV{HOME} = "/tmp";
1899 # no warnings to allow for undefined $XDG_*
1900 no warnings 'uninitialized';
1901 # $xdg_config_home is needed to make env_parallel.fish stop complaining
1902 my $xdg_config_home = $ENV{'XDG_CONFIG_HOME'};
1903 # config_dirs = $PARALLEL_HOME, $XDG_CONFIG_HOME/parallel,
1904 # $(each XDG_CONFIG_DIRS)/parallel, $HOME/.parallel
1905 # Keep only dirs that exist
1906 @Global::config_dirs =
1907 (grep { -d $_ }
1908 $ENV{'PARALLEL_HOME'},
1909 (map { "$_/parallel" }
1910 $xdg_config_home,
1911 split /:/, $ENV{'XDG_CONFIG_DIRS'}),
1912 $ENV{'HOME'} . "/.parallel");
1913 # Use first dir as config dir
1914 $Global::config_dir = $Global::config_dirs[0] ||
1915 $ENV{'HOME'} . "/.parallel";
1916 # cache_dirs = $PARALLEL_HOME, $XDG_CACHE_HOME/parallel,
1917 # Keep only dirs that exist
1918 @Global::cache_dirs =
1919 (grep { -d $_ }
1920 $ENV{'PARALLEL_HOME'}, $ENV{'XDG_CACHE_HOME'}."/parallel");
1921 $Global::cache_dir = $Global::cache_dirs[0] ||
1922 $ENV{'HOME'} . "/.parallel";
1925 sub parse_halt() {
1926 # $opt::halt flavours
1927 # Uses:
1928 # $opt::halt
1929 # $Global::halt_when
1930 # $Global::halt_fail
1931 # $Global::halt_success
1932 # $Global::halt_pct
1933 # $Global::halt_count
1934 if(defined $opt::halt) {
1935 my %halt_expansion = (
1936 "0" => "never",
1937 "1" => "soon,fail=1",
1938 "2" => "now,fail=1",
1939 "-1" => "soon,success=1",
1940 "-2" => "now,success=1",
1942 # Expand -2,-1,0,1,2 into long form
1943 $opt::halt = $halt_expansion{$opt::halt} || $opt::halt;
1944 # --halt 5% == --halt soon,fail=5%
1945 $opt::halt =~ s/^(\d+)%$/soon,fail=$1%/;
1946 # Split: soon,fail=5%
1947 my ($when,$fail_success,$pct_count) = split /[,=]/, $opt::halt;
1948 if(not grep { $when eq $_ } qw(never soon now)) {
1949 ::error("--halt must have 'never', 'soon', or 'now'.");
1950 ::wait_and_exit(255);
1952 $Global::halt_when = $when;
1953 if($when ne "never") {
1954 if($fail_success eq "fail") {
1955 $Global::halt_fail = 1;
1956 } elsif($fail_success eq "success") {
1957 $Global::halt_success = 1;
1958 } elsif($fail_success eq "done") {
1959 $Global::halt_done = 1;
1960 } else {
1961 ::error("--halt $when must be followed by ,success or ,fail.");
1962 ::wait_and_exit(255);
1964 if($pct_count =~ /^(\d+)%$/) {
1965 $Global::halt_pct = $1/100;
1966 } elsif($pct_count =~ /^(\d+)$/) {
1967 $Global::halt_count = $1;
1968 } else {
1969 ::error("--halt $when,$fail_success ".
1970 "must be followed by ,number or ,percent%.");
1971 ::wait_and_exit(255);
1977 sub parse_replacement_string_options() {
1978 # Deal with --rpl
1979 # Uses:
1980 # %Global::rpl
1981 # $Global::parensleft
1982 # $Global::parensright
1983 # $opt::parens
1984 # $Global::parensleft
1985 # $Global::parensright
1986 # $opt::plus
1987 # %Global::plus
1988 # $opt::I
1989 # $opt::U
1990 # $opt::i
1991 # $opt::basenamereplace
1992 # $opt::dirnamereplace
1993 # $opt::seqreplace
1994 # $opt::slotreplace
1995 # $opt::basenameextensionreplace
1997 sub rpl($$) {
1998 # Modify %Global::rpl
1999 # Replace $old with $new
2000 my ($old,$new) = @_;
2001 if($old ne $new) {
2002 $Global::rpl{$new} = $Global::rpl{$old};
2003 delete $Global::rpl{$old};
2006 my $parens = "{==}";
2007 if(defined $opt::parens) { $parens = $opt::parens; }
2008 my $parenslen = 0.5*length $parens;
2009 $Global::parensleft = substr($parens,0,$parenslen);
2010 $Global::parensright = substr($parens,$parenslen);
2011 if(defined $opt::plus) { %Global::rpl = (%Global::plus,%Global::rpl); }
2012 if(defined $opt::I) { rpl('{}',$opt::I); }
2013 if(defined $opt::i and $opt::i) { rpl('{}',$opt::i); }
2014 if(defined $opt::U) { rpl('{.}',$opt::U); }
2015 if(defined $opt::basenamereplace) { rpl('{/}',$opt::basenamereplace); }
2016 if(defined $opt::dirnamereplace) { rpl('{//}',$opt::dirnamereplace); }
2017 if(defined $opt::seqreplace) { rpl('{#}',$opt::seqreplace); }
2018 if(defined $opt::slotreplace) { rpl('{%}',$opt::slotreplace); }
2019 if(defined $opt::basenameextensionreplace) {
2020 rpl('{/.}',$opt::basenameextensionreplace);
2022 for(@opt::rpl) {
2023 # Create $Global::rpl entries for --rpl options
2024 # E.g: "{..} s:\.[^.]+$:;s:\.[^.]+$:;"
2025 my ($shorthand,$long) = split/ /,$_,2;
2026 $Global::rpl{$shorthand} = $long;
2030 sub parse_semaphore() {
2031 # Semaphore defaults
2032 # Must be done before computing number of processes and max_line_length
2033 # because when running as a semaphore GNU Parallel does not read args
2034 # Uses:
2035 # $opt::semaphore
2036 # $Global::semaphore
2037 # $opt::semaphoretimeout
2038 # $Semaphore::timeout
2039 # $opt::semaphorename
2040 # $Semaphore::name
2041 # $opt::fg
2042 # $Semaphore::fg
2043 # $opt::wait
2044 # $Semaphore::wait
2045 # $opt::bg
2046 # @opt::a
2047 # @Global::unget_argv
2048 # $Global::default_simultaneous_sshlogins
2049 # $opt::jobs
2050 # $Global::interactive
2051 $Global::semaphore ||= ($0 =~ m:(^|/)sem$:); # called as 'sem'
2052 if(defined $opt::semaphore) { $Global::semaphore = 1; }
2053 if(defined $opt::semaphoretimeout) { $Global::semaphore = 1; }
2054 if(defined $opt::semaphorename) { $Global::semaphore = 1; }
2055 if(defined $opt::fg and not $opt::tmux and not $opt::tmuxpane) {
2056 $Global::semaphore = 1;
2058 if(defined $opt::bg) { $Global::semaphore = 1; }
2059 if(defined $opt::wait and not $opt::sqlmaster) {
2060 $Global::semaphore = 1; @ARGV = "true";
2062 if($Global::semaphore) {
2063 if(@opt::a) {
2064 # A semaphore does not take input from neither stdin nor file
2065 ::error("A semaphore does not take input from neither stdin nor a file\n");
2066 ::wait_and_exit(255);
2068 @opt::a = ("/dev/null");
2069 # Append a dummy empty argument
2070 # \0 => nothing (not the empty string)
2071 push(@Global::unget_argv, [Arg->new("\0noarg")]);
2072 $Semaphore::timeout = $opt::semaphoretimeout || 0;
2073 if(defined $opt::semaphorename) {
2074 $Semaphore::name = $opt::semaphorename;
2075 } else {
2076 local $/ = "\n";
2077 $Semaphore::name = `tty`;
2078 chomp $Semaphore::name;
2080 $Semaphore::fg = $opt::fg;
2081 $Semaphore::wait = $opt::wait;
2082 $Global::default_simultaneous_sshlogins = 1;
2083 if(not defined $opt::jobs) {
2084 $opt::jobs = 1;
2086 if($Global::interactive and $opt::bg) {
2087 ::error("Jobs running in the ".
2088 "background cannot be interactive.");
2089 ::wait_and_exit(255);
2094 sub record_env() {
2095 # Record current %ENV-keys in $PARALLEL_HOME/ignored_vars
2096 # Returns: N/A
2097 my $ignore_filename = $Global::config_dir . "/ignored_vars";
2098 if(open(my $vars_fh, ">", $ignore_filename)) {
2099 print $vars_fh map { $_,"\n" } keys %ENV;
2100 } else {
2101 ::error("Cannot write to $ignore_filename.");
2102 ::wait_and_exit(255);
2106 sub open_joblog() {
2107 # Open joblog as specified by --joblog
2108 # Uses:
2109 # $opt::resume
2110 # $opt::resume_failed
2111 # $opt::joblog
2112 # $opt::results
2113 # $Global::job_already_run
2114 # %Global::fd
2115 my $append = 0;
2116 if(($opt::resume or $opt::resume_failed)
2118 not ($opt::joblog or $opt::results)) {
2119 ::error("--resume and --resume-failed require --joblog or --results.");
2120 ::wait_and_exit(255);
2122 if(defined $opt::joblog and $opt::joblog =~ s/^\+//) {
2123 # --joblog +filename = append to filename
2124 $append = 1;
2126 if($opt::joblog
2128 ($opt::sqlmaster
2130 not $opt::sqlworker)) {
2131 # Do not log if --sqlworker
2132 if($opt::resume || $opt::resume_failed || $opt::retry_failed) {
2133 if(open(my $joblog_fh, "<", $opt::joblog)) {
2134 # Read the joblog
2135 # Override $/ with \n because -d might be set
2136 local $/ = "\n";
2137 # If there is a header: Open as append later
2138 $append = <$joblog_fh>;
2139 my $joblog_regexp;
2140 if($opt::retry_failed) {
2141 # Make a regexp that only matches commands with exit+signal=0
2142 # 4 host 1360490623.067 3.445 1023 1222 0 0 command
2143 $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t';
2144 my @group;
2145 while(<$joblog_fh>) {
2146 if(/$joblog_regexp/o) {
2147 # This is 30% faster than set_job_already_run($1);
2148 vec($Global::job_already_run,($1||0),1) = 1;
2149 $Global::total_completed++;
2150 $group[$1-1] = "true";
2151 } elsif(/(\d+)\s+\S+(\s+[-0-9.]+){6}\s+(.*)$/) {
2152 # Grab out the command
2153 $group[$1-1] = $3;
2154 } else {
2155 chomp;
2156 ::error("Format of '$opt::joblog' is wrong: $_");
2157 ::wait_and_exit(255);
2160 if(@group) {
2161 my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg");
2162 unlink($name);
2163 # Put args into argfile
2164 if(grep /\0/, @group) {
2165 # force --null to deal with \n in commandlines
2166 ::warning("Command lines contain newline. Forcing --null.");
2167 $opt::null = 1;
2168 $/ = "\0";
2170 # Replace \0 with '\n' as used in print_joblog()
2171 print $outfh map { s/\0/\n/g; $_,$/ } map { $_ } @group;
2172 seek $outfh, 0, 0;
2173 exit_if_disk_full();
2174 # Set filehandle to -a
2175 @opt::a = ($outfh);
2177 # Remove $command (so -a is run)
2178 @ARGV = ();
2180 if($opt::resume || $opt::resume_failed) {
2181 if($opt::resume_failed) {
2182 # Make a regexp that only matches commands with exit+signal=0
2183 # 4 host 1360490623.067 3.445 1023 1222 0 0 command
2184 $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t';
2185 } else {
2186 # Just match the job number
2187 $joblog_regexp='^(\d+)';
2189 while(<$joblog_fh>) {
2190 if(/$joblog_regexp/o) {
2191 # This is 30% faster than set_job_already_run($1);
2192 vec($Global::job_already_run,($1||0),1) = 1;
2193 $Global::total_completed++;
2194 } elsif(not /\d+\s+[^\s]+\s+([-0-9.]+\s+){6}/) {
2195 ::error("Format of '$opt::joblog' is wrong: $_");
2196 ::wait_and_exit(255);
2200 close $joblog_fh;
2202 # $opt::null may be set if the commands contain \n
2203 if($opt::null) { $/ = "\0"; }
2205 if($opt::dryrun) {
2206 # Do not write to joblog in a dry-run
2207 if(not open($Global::joblog, ">", "/dev/null")) {
2208 ::error("Cannot write to --joblog $opt::joblog.");
2209 ::wait_and_exit(255);
2211 } elsif($append) {
2212 # Append to joblog
2213 if(not open($Global::joblog, ">>", $opt::joblog)) {
2214 ::error("Cannot append to --joblog $opt::joblog.");
2215 ::wait_and_exit(255);
2217 } else {
2218 if($opt::joblog eq "-") {
2219 # Use STDOUT as joblog
2220 $Global::joblog = $Global::fd{1};
2221 } elsif(not open($Global::joblog, ">", $opt::joblog)) {
2222 # Overwrite the joblog
2223 ::error("Cannot write to --joblog $opt::joblog.");
2224 ::wait_and_exit(255);
2226 print $Global::joblog
2227 join("\t", "Seq", "Host", "Starttime", "JobRuntime",
2228 "Send", "Receive", "Exitval", "Signal", "Command"
2229 ). "\n";
2234 sub open_csv() {
2235 if($opt::results) {
2236 # Output as CSV/TSV
2237 if($opt::results eq "-.csv"
2239 $opt::results eq "-.tsv") {
2240 # Output as CSV/TSV on stdout
2241 open $Global::csv_fh, ">&", "STDOUT" or
2242 ::die_bug("Can't dup STDOUT in csv: $!");
2243 # Do not print any other output to STDOUT
2244 # by forcing all other output to /dev/null
2245 open my $fd, ">", "/dev/null" or
2246 ::die_bug("Can't >/dev/null in csv: $!");
2247 $Global::fd{1} = $fd;
2248 $Global::fd{2} = $fd;
2249 } elsif($Global::csvsep) {
2250 if(not open($Global::csv_fh,">",$opt::results)) {
2251 ::error("Cannot open results file `$opt::results': ".
2252 "$!.");
2253 wait_and_exit(255);
2259 sub find_compression_program() {
2260 # Find a fast compression program
2261 # Returns:
2262 # $compress_program = compress program with options
2263 # $decompress_program = decompress program with options
2265 # Search for these. Sorted by speed on 128 core
2267 # seq 120000000|shuf > 1gb &
2268 # apt-get update
2269 # apt install make g++ htop
2270 # wget -O - pi.dk/3 | bash
2271 # apt install zstd clzip liblz4-tool lzop pigz pxz gzip plzip pbzip2 lzma xz-utils lzip bzip2 lbzip2 lrzip pixz
2272 # git clone https://github.com/facebook/zstd.git
2273 # (cd zstd/contrib/pzstd; make -j; cp pzstd /usr/local/bin)
2274 # echo 'lrzip -L $((-$1))' >/usr/local/bin/lrz
2275 # chmod +x /usr/local/bin/lrz
2276 # wait
2277 # onethread="zstd clzip lz4 lzop gzip lzma xz bzip2"
2278 # multithread="pzstd pigz pxz plzip pbzip2 lzip lbzip2 lrz pixz"
2279 # parallel --shuf -j1 --joblog jl-m --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $multithread
2280 # parallel --shuf -j50% --delay 1 --joblog jl-s --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $onethread
2281 # sort -nk4 jl-?
2283 # 1-core:
2284 # 2-cores: pzstd zstd lz4 lzop pigz gzip lbzip2 pbzip2 lrz bzip2 lzma pxz plzip xz lzip clzip
2285 # 4-cores:
2286 # 8-cores: pzstd lz4 zstd pigz lzop lbzip2 pbzip2 gzip lzip lrz plzip pxz bzip2 lzma xz clzip
2287 # 16-cores: pzstd lz4 pigz lzop lbzip2 pbzip2 plzip lzip lrz pxz gzip lzma xz bzip2
2288 # 32-cores: pzstd lbzip2 pbzip2 zstd pigz lz4 lzop plzip lzip lrz gzip pxz lzma bzip2 xz clzip
2289 # 64-cores: pzstd lbzip2 pbzip2 pigz zstd pixz lz4 plzip lzop lzip lrz gzip pxz lzma bzip2 xz clzip
2290 # 128-core: pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip lrz pxz bzip2 lzma xz clzip
2292 my @prg = qw(pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip
2293 lrz pxz bzip2 lzma xz clzip);
2294 for my $p (@prg) {
2295 if(which($p)) {
2296 return ("$p -c -1","$p -dc");
2299 # Fall back to cat
2300 return ("cat","cat");
2303 sub read_options() {
2304 # Read options from command line, profile and $PARALLEL
2305 # Uses:
2306 # $opt::shebang_wrap
2307 # $opt::shebang
2308 # @ARGV
2309 # $opt::plain
2310 # @opt::profile
2311 # $ENV{'HOME'}
2312 # $ENV{'PARALLEL'}
2313 # Returns:
2314 # @ARGV_no_opt = @ARGV without --options
2316 # This must be done first as this may exec myself
2317 if(defined $ARGV[0] and ($ARGV[0] =~ /^--shebang/ or
2318 $ARGV[0] =~ /^--shebang-?wrap/ or
2319 $ARGV[0] =~ /^--hashbang/)) {
2320 # Program is called from #! line in script
2321 # remove --shebang-wrap if it is set
2322 $opt::shebang_wrap = ($ARGV[0] =~ s/^--shebang-?wrap *//);
2323 # remove --shebang if it is set
2324 $opt::shebang = ($ARGV[0] =~ s/^--shebang *//);
2325 # remove --hashbang if it is set
2326 $opt::shebang .= ($ARGV[0] =~ s/^--hashbang *//);
2327 if($opt::shebang) {
2328 my $argfile = Q(pop @ARGV);
2329 # exec myself to split $ARGV[0] into separate fields
2330 exec "$0 --skip-first-line -a $argfile @ARGV";
2332 if($opt::shebang_wrap) {
2333 my @options;
2334 my @parser;
2335 if ($^O eq 'freebsd') {
2336 # FreeBSD's #! puts different values in @ARGV than Linux' does.
2337 my @nooptions = @ARGV;
2338 get_options_from_array(\@nooptions);
2339 while($#ARGV > $#nooptions) {
2340 push @options, shift @ARGV;
2342 while(@ARGV and $ARGV[0] ne ":::") {
2343 push @parser, shift @ARGV;
2345 if(@ARGV and $ARGV[0] eq ":::") {
2346 shift @ARGV;
2348 } else {
2349 @options = shift @ARGV;
2351 my $script = Q(shift @ARGV);
2352 # exec myself to split $ARGV[0] into separate fields
2353 exec "$0 --internal-pipe-means-argfiles @options @parser $script ".
2354 "::: @ARGV";
2357 if($ARGV[0] =~ / --shebang(-?wrap)? /) {
2358 ::warning("--shebang and --shebang-wrap must be the first argument.\n");
2361 Getopt::Long::Configure("bundling","require_order");
2362 my @ARGV_copy = @ARGV;
2363 my @ARGV_orig = @ARGV;
2364 # Check if there is a --profile to set @opt::profile
2365 get_options_from_array(\@ARGV_copy,"profile|J=s","plain") || die_usage();
2366 my @ARGV_profile = ();
2367 my @ARGV_env = ();
2368 if(not $opt::plain) {
2369 # Add options from $PARALLEL_HOME/config and other profiles
2370 my @config_profiles = (
2371 "/etc/parallel/config",
2372 (map { "$_/config" } @Global::config_dirs),
2373 $ENV{'HOME'}."/.parallelrc");
2374 my @profiles = @config_profiles;
2375 if(@opt::profile) {
2376 # --profile overrides default profiles
2377 @profiles = ();
2378 for my $profile (@opt::profile) {
2379 # Look for the $profile in . and @Global::config_dirs
2380 push @profiles, grep { -r $_ }
2381 map { "$_/$profile" } ".", @Global::config_dirs;
2384 for my $profile (@profiles) {
2385 if(-r $profile) {
2386 local $/ = "\n";
2387 open (my $in_fh, "<", $profile) ||
2388 ::die_bug("read-profile: $profile");
2389 while(<$in_fh>) {
2390 /^\s*\#/ and next;
2391 chomp;
2392 push @ARGV_profile, shell_words($_);
2394 close $in_fh;
2395 } else {
2396 if(grep /^$profile$/, @config_profiles) {
2397 # config file is not required to exist
2398 } else {
2399 ::error("$profile not readable.");
2400 wait_and_exit(255);
2404 # Add options from shell variable $PARALLEL
2405 if($ENV{'PARALLEL'}) {
2406 push @ARGV_env, shell_words($ENV{'PARALLEL'});
2408 # Add options from env_parallel.csh via $PARALLEL_CSH
2409 if($ENV{'PARALLEL_CSH'}) {
2410 push @ARGV_env, shell_words($ENV{'PARALLEL_CSH'});
2413 Getopt::Long::Configure("bundling","require_order");
2414 get_options_from_array(\@ARGV_profile) || die_usage();
2415 get_options_from_array(\@ARGV_env) || die_usage();
2416 get_options_from_array(\@ARGV) || die_usage();
2417 # What were the options given on the command line?
2418 # Used to start --sqlworker
2419 my $ai = arrayindex(\@ARGV_orig, \@ARGV);
2420 @Global::options_in_argv = @ARGV_orig[0..$ai-1];
2421 # Prepend non-options to @ARGV (such as commands like 'nice')
2422 unshift @ARGV, @ARGV_profile, @ARGV_env;
2423 return @ARGV;
2426 sub arrayindex() {
2427 # Similar to Perl's index function, but for arrays
2428 # Input:
2429 # $arr_ref1 = ref to @array1 to search in
2430 # $arr_ref2 = ref to @array2 to search for
2431 # Returns:
2432 # $pos = position of @array1 in @array2, -1 if not found
2433 my ($arr_ref1,$arr_ref2) = @_;
2434 my $array1_as_string = join "", map { "\0".$_ } @$arr_ref1;
2435 my $array2_as_string = join "", map { "\0".$_ } @$arr_ref2;
2436 my $i = index($array1_as_string,$array2_as_string,0);
2437 if($i == -1) { return -1 }
2438 my @before = split /\0/, substr($array1_as_string,0,$i);
2439 return $#before;
2442 sub read_args_from_command_line() {
2443 # Arguments given on the command line after:
2444 # ::: ($Global::arg_sep)
2445 # :::: ($Global::arg_file_sep)
2446 # :::+ ($Global::arg_sep with --link)
2447 # ::::+ ($Global::arg_file_sep with --link)
2448 # Removes the arguments from @ARGV and:
2449 # - puts filenames into -a
2450 # - puts arguments into files and add the files to -a
2451 # - adds --linkinputsource with 0/1 for each -a depending on :::+/::::+
2452 # Input:
2453 # @::ARGV = command option ::: arg arg arg :::: argfiles
2454 # Uses:
2455 # $Global::arg_sep
2456 # $Global::arg_file_sep
2457 # $opt::internal_pipe_means_argfiles
2458 # $opt::pipe
2459 # @opt::a
2460 # Returns:
2461 # @argv_no_argsep = @::ARGV without ::: and :::: and following args
2462 my @new_argv = ();
2463 for(my $arg = shift @ARGV; @ARGV; $arg = shift @ARGV) {
2464 if($arg eq $Global::arg_sep
2466 $arg eq $Global::arg_sep."+"
2468 $arg eq $Global::arg_file_sep
2470 $arg eq $Global::arg_file_sep."+") {
2471 my $group_sep = $arg; # This group of arguments is args or argfiles
2472 my @group;
2473 while(defined ($arg = shift @ARGV)) {
2474 if($arg eq $Global::arg_sep
2476 $arg eq $Global::arg_sep."+"
2478 $arg eq $Global::arg_file_sep
2480 $arg eq $Global::arg_file_sep."+") {
2481 # exit while loop if finding new separator
2482 last;
2483 } else {
2484 # If not hitting ::: :::+ :::: or ::::+
2485 # Append it to the group
2486 push @group, $arg;
2489 my $is_linked = ($group_sep =~ /\+$/) ? 1 : 0;
2490 my $is_file = ($group_sep eq $Global::arg_file_sep
2492 $group_sep eq $Global::arg_file_sep."+");
2493 if($is_file) {
2494 # :::: / ::::+
2495 push @opt::linkinputsource, map { $is_linked } @group;
2496 } else {
2497 # ::: / :::+
2498 push @opt::linkinputsource, $is_linked;
2500 if($is_file
2501 or ($opt::internal_pipe_means_argfiles and $opt::pipe)
2503 # Group of file names on the command line.
2504 # Append args into -a
2505 push @opt::a, @group;
2506 } else {
2507 # Group of arguments on the command line.
2508 # Put them into a file.
2509 # Create argfile
2510 my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg");
2511 unlink($name);
2512 # Put args into argfile
2513 print $outfh map { $_,$/ } @group;
2514 seek $outfh, 0, 0;
2515 exit_if_disk_full();
2516 # Append filehandle to -a
2517 push @opt::a, $outfh;
2519 if(defined($arg)) {
2520 # $arg is ::: :::+ :::: or ::::+
2521 # so there is another group
2522 redo;
2523 } else {
2524 # $arg is undef -> @ARGV empty
2525 last;
2528 push @new_argv, $arg;
2530 # Output: @ARGV = command to run with options
2531 return @new_argv;
2534 sub cleanup() {
2535 # Returns: N/A
2536 unlink keys %Global::unlink;
2537 map { rmdir $_ } keys %Global::unlink;
2538 if(@opt::basefile and $opt::cleanup) { cleanup_basefile(); }
2539 for(keys %Global::sshmaster) {
2540 # If 'ssh -M's are running: kill them
2541 kill "TERM", $_;
2546 sub __QUOTING_ARGUMENTS_FOR_SHELL__() {}
2548 sub shell_quote(@) {
2549 # Input:
2550 # @strings = strings to be quoted
2551 # Returns:
2552 # @shell_quoted_strings = string quoted as needed by the shell
2553 return wantarray ? (map { Q($_) } @_) : (join" ",map { Q($_) } @_);
2556 sub shell_quote_scalar_rc($) {
2557 # Quote for the rc-shell
2558 my $a = $_[0];
2559 if(defined $a) {
2560 if(($a =~ s/'/''/g)
2562 ($a =~ s/[\n\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]+/'$&'/go)) {
2563 # A string was replaced
2564 # No need to test for "" or \0
2565 } elsif($a eq "") {
2566 $a = "''";
2567 } elsif($a eq "\0") {
2568 $a = "";
2571 return $a;
2574 sub shell_quote_scalar_csh($) {
2575 # Quote for (t)csh
2576 my $a = $_[0];
2577 if(defined $a) {
2578 # $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g;
2579 # This is 1% faster than the above
2580 if(($a =~ s/[\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]/\\$&/go)
2582 # quote newline in csh as \\\n
2583 ($a =~ s/[\n]/"\\\n"/go)) {
2584 # A string was replaced
2585 # No need to test for "" or \0
2586 } elsif($a eq "") {
2587 $a = "''";
2588 } elsif($a eq "\0") {
2589 $a = "";
2592 return $a;
2595 sub shell_quote_scalar_default($) {
2596 # Quote for other shells (Bourne compatibles)
2597 # Inputs:
2598 # $string = string to be quoted
2599 # Returns:
2600 # $shell_quoted = string quoted as needed by the shell
2601 my $par = $_[0];
2602 if($par =~ /[^-_.+a-z0-9\/]/i) {
2603 $par =~ s/'/'"'"'/g; # "-quote single quotes
2604 $par = "'$par'"; # '-quote entire string
2605 $par =~ s/^''|''$//g; # Remove unneeded '' at ends
2606 return $par;
2607 } elsif ($par eq "") {
2608 return "''";
2609 } else {
2610 # No quoting needed
2611 return $par;
2615 sub shell_quote_scalar($) {
2616 # Quote the string so the shell will not expand any special chars
2617 # Inputs:
2618 # $string = string to be quoted
2619 # Returns:
2620 # $shell_quoted = string quoted as needed by the shell
2622 # Speed optimization: Choose the correct shell_quote_scalar_*
2623 # and call that directly from now on
2624 no warnings 'redefine';
2625 if($Global::cshell) {
2626 # (t)csh
2627 *shell_quote_scalar = \&shell_quote_scalar_csh;
2628 } elsif($Global::shell =~ m:(^|/)rc$:) {
2629 # rc-shell
2630 *shell_quote_scalar = \&shell_quote_scalar_rc;
2631 } else {
2632 # other shells
2633 *shell_quote_scalar = \&shell_quote_scalar_default;
2635 # The sub is now redefined. Call it
2636 return shell_quote_scalar($_[0]);
2639 sub Q($) {
2640 # Q alias for ::shell_quote_scalar
2641 my $ret = shell_quote_scalar($_[0]);
2642 no warnings 'redefine';
2643 *Q = \&::shell_quote_scalar;
2644 return $ret;
2647 sub shell_quote_file($) {
2648 # Quote the string so shell will not expand any special chars
2649 # and prepend ./ if needed
2650 # Input:
2651 # $filename = filename to be shell quoted
2652 # Returns:
2653 # $quoted_filename = filename quoted with \ and ./ if needed
2654 my $a = shift;
2655 if(defined $a) {
2656 if($a =~ m:^/: or $a =~ m:^\./:) {
2657 # /abs/path or ./rel/path => skip
2658 } else {
2659 # rel/path => ./rel/path
2660 $a = "./".$a;
2663 return Q($a);
2666 sub shell_words(@) {
2667 # Input:
2668 # $string = shell line
2669 # Returns:
2670 # @shell_words = $string split into words as shell would do
2671 $Global::use{"Text::ParseWords"} ||= eval "use Text::ParseWords; 1;";
2672 return Text::ParseWords::shellwords(@_);
2675 sub perl_quote_scalar($) {
2676 # Quote the string so perl's eval will not expand any special chars
2677 # Inputs:
2678 # $string = string to be quoted
2679 # Returns:
2680 # $perl_quoted = string quoted with \ as needed by perl's eval
2681 my $a = $_[0];
2682 if(defined $a) {
2683 $a =~ s/[\\\"\$\@]/\\$&/go;
2685 return $a;
2688 # -w complains about prototype
2689 sub pQ($) {
2690 # pQ alias for ::perl_quote_scalar
2691 my $ret = perl_quote_scalar($_[0]);
2692 *pQ = \&::perl_quote_scalar;
2693 return $ret;
2696 sub unquote_printf() {
2697 # Convert \t \n \r \000 \0
2698 # Inputs:
2699 # $string = string with \t \n \r \num \0
2700 # Returns:
2701 # $replaced = string with TAB NEWLINE CR <ascii-num> NUL
2702 $_ = shift;
2703 s/\\t/\t/g;
2704 s/\\n/\n/g;
2705 s/\\r/\r/g;
2706 s/\\(\d\d\d)/eval 'sprintf "\\'.$1.'"'/ge;
2707 s/\\(\d)/eval 'sprintf "\\'.$1.'"'/ge;
2708 return $_;
2712 sub __FILEHANDLES__() {}
2715 sub save_stdin_stdout_stderr() {
2716 # Remember the original STDIN, STDOUT and STDERR
2717 # and file descriptors opened by the shell (e.g. 3>/tmp/foo)
2718 # Uses:
2719 # %Global::fd
2720 # $Global::original_stderr
2721 # $Global::original_stdin
2722 # Returns: N/A
2724 # TODO Disabled until we have an open3 that will take n filehandles
2725 # for my $fdno (1..61) {
2726 # # /dev/fd/62 and above are used by bash for <(cmd)
2727 # # Find file descriptors that are already opened (by the shell)
2728 # Only focus on stdout+stderr for now
2729 for my $fdno (1..2) {
2730 my $fh;
2731 # 2-argument-open is used to be compatible with old perl 5.8.0
2732 # bug #43570: Perl 5.8.0 creates 61 files
2733 if(open($fh,">&=$fdno")) {
2734 $Global::fd{$fdno}=$fh;
2737 open $Global::original_stderr, ">&", "STDERR" or
2738 ::die_bug("Can't dup STDERR: $!");
2739 open $Global::status_fd, ">&", "STDERR" or
2740 ::die_bug("Can't dup STDERR: $!");
2741 open $Global::original_stdin, "<&", "STDIN" or
2742 ::die_bug("Can't dup STDIN: $!");
2745 sub enough_file_handles() {
2746 # Check that we have enough filehandles available for starting
2747 # another job
2748 # Uses:
2749 # $opt::ungroup
2750 # %Global::fd
2751 # Returns:
2752 # 1 if ungrouped (thus not needing extra filehandles)
2753 # 0 if too few filehandles
2754 # 1 if enough filehandles
2755 if(not $opt::ungroup) {
2756 my %fh;
2757 my $enough_filehandles = 1;
2758 # perl uses 7 filehandles for something?
2759 # open3 uses 2 extra filehandles temporarily
2760 # We need a filehandle for each redirected file descriptor
2761 # (normally just STDOUT and STDERR)
2762 for my $i (1..(7+2+keys %Global::fd)) {
2763 $enough_filehandles &&= open($fh{$i}, "<", "/dev/null");
2765 for (values %fh) { close $_; }
2766 return $enough_filehandles;
2767 } else {
2768 # Ungrouped does not need extra file handles
2769 return 1;
2773 sub open_or_exit($) {
2774 # Open a file name or exit if the file cannot be opened
2775 # Inputs:
2776 # $file = filehandle or filename to open
2777 # Uses:
2778 # $Global::original_stdin
2779 # Returns:
2780 # $fh = file handle to read-opened file
2781 my $file = shift;
2782 if($file eq "-") {
2783 return ($Global::original_stdin || *STDIN);
2785 if(ref $file eq "GLOB") {
2786 # This is an open filehandle
2787 return $file;
2789 my $fh = gensym;
2790 if(not open($fh, "<", $file)) {
2791 ::error("Cannot open input file `$file': No such file or directory.");
2792 wait_and_exit(255);
2794 return $fh;
2797 sub set_fh_blocking($) {
2798 # Set filehandle as blocking
2799 # Inputs:
2800 # $fh = filehandle to be blocking
2801 # Returns:
2802 # N/A
2803 my $fh = shift;
2804 $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
2805 my $flags;
2806 # Get the current flags on the filehandle
2807 fcntl($fh, &F_GETFL, $flags) || die $!;
2808 # Remove non-blocking from the flags
2809 $flags &= ~&O_NONBLOCK;
2810 # Set the flags on the filehandle
2811 fcntl($fh, &F_SETFL, $flags) || die $!;
2814 sub set_fh_non_blocking($) {
2815 # Set filehandle as non-blocking
2816 # Inputs:
2817 # $fh = filehandle to be blocking
2818 # Returns:
2819 # N/A
2820 my $fh = shift;
2821 $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
2822 my $flags;
2823 # Get the current flags on the filehandle
2824 fcntl($fh, &F_GETFL, $flags) || die $!;
2825 # Add non-blocking to the flags
2826 $flags |= &O_NONBLOCK;
2827 # Set the flags on the filehandle
2828 fcntl($fh, &F_SETFL, $flags) || die $!;
2832 sub __RUNNING_THE_JOBS_AND_PRINTING_PROGRESS__() {}
2835 # Variable structure:
2837 # $Global::running{$pid} = Pointer to Job-object
2838 # @Global::virgin_jobs = Pointer to Job-object that have received no input
2839 # $Global::host{$sshlogin} = Pointer to SSHLogin-object
2840 # $Global::total_running = total number of running jobs
2841 # $Global::total_started = total jobs started
2842 # $Global::max_procs_file = filename if --jobs is given a filename
2843 # $Global::JobQueue = JobQueue object for the queue of jobs
2844 # $Global::timeoutq = queue of times where jobs timeout
2845 # $Global::newest_job = Job object of the most recent job started
2846 # $Global::newest_starttime = timestamp of $Global::newest_job
2847 # @Global::sshlogin
2848 # $Global::minimal_command_line_length = minimum length supported by all sshlogins
2849 # $Global::start_no_new_jobs = should more jobs be started?
2850 # $Global::original_stderr = file handle for STDERR when the program started
2851 # $Global::total_started = total number of jobs started
2852 # $Global::joblog = filehandle of joblog
2853 # $Global::debug = Is debugging on?
2854 # $Global::exitstatus = status code of GNU Parallel
2855 # $Global::quoting = quote the command to run
2857 sub init_run_jobs() {
2858 # Set Global variables and progress signal handlers
2859 # Do the copying of basefiles
2860 # Returns: N/A
2861 $Global::total_running = 0;
2862 $Global::total_started = 0;
2863 $SIG{USR1} = \&list_running_jobs;
2864 $SIG{USR2} = \&toggle_progress;
2865 if(@opt::basefile) { setup_basefile(); }
2869 my $last_time;
2870 my %last_mtime;
2871 my $max_procs_file_last_mod;
2873 sub changed_procs_file {
2874 # If --jobs is a file and it is modfied:
2875 # Force recomputing of max_jobs_running for each $sshlogin
2876 # Uses:
2877 # $Global::max_procs_file
2878 # %Global::host
2879 # Returns: N/A
2880 if($Global::max_procs_file) {
2881 # --jobs filename
2882 my $mtime = (stat($Global::max_procs_file))[9];
2883 $max_procs_file_last_mod ||= 0;
2884 if($mtime > $max_procs_file_last_mod) {
2885 # file changed: Force re-computing max_jobs_running
2886 $max_procs_file_last_mod = $mtime;
2887 for my $sshlogin (values %Global::host) {
2888 $sshlogin->set_max_jobs_running(undef);
2894 sub changed_sshloginfile {
2895 # If --slf is changed:
2896 # reload --slf
2897 # filter_hosts
2898 # setup_basefile
2899 # Uses:
2900 # @opt::sshloginfile
2901 # @Global::sshlogin
2902 # %Global::host
2903 # $opt::filter_hosts
2904 # Returns: N/A
2905 if(@opt::sshloginfile) {
2906 # Is --sshloginfile changed?
2907 for my $slf (@opt::sshloginfile) {
2908 my $actual_file = expand_slf_shorthand($slf);
2909 my $mtime = (stat($actual_file))[9];
2910 $last_mtime{$actual_file} ||= $mtime;
2911 if($mtime - $last_mtime{$actual_file} > 1) {
2912 ::debug("run","--sshloginfile $actual_file changed. reload\n");
2913 $last_mtime{$actual_file} = $mtime;
2914 # Reload $slf
2915 # Empty sshlogins
2916 @Global::sshlogin = ();
2917 for (values %Global::host) {
2918 # Don't start new jobs on any host
2919 # except the ones added back later
2920 $_->set_max_jobs_running(0);
2922 # This will set max_jobs_running on the SSHlogins
2923 read_sshloginfile($actual_file);
2924 parse_sshlogin();
2925 $opt::filter_hosts and filter_hosts();
2926 setup_basefile();
2932 sub start_more_jobs {
2933 # Run start_another_job() but only if:
2934 # * not $Global::start_no_new_jobs set
2935 # * not JobQueue is empty
2936 # * not load on server is too high
2937 # * not server swapping
2938 # * not too short time since last remote login
2939 # Uses:
2940 # %Global::host
2941 # $Global::start_no_new_jobs
2942 # $Global::JobQueue
2943 # $opt::pipe
2944 # $opt::load
2945 # $opt::noswap
2946 # $opt::delay
2947 # $Global::newest_starttime
2948 # Returns:
2949 # $jobs_started = number of jobs started
2950 my $jobs_started = 0;
2951 if($Global::start_no_new_jobs) {
2952 return $jobs_started;
2954 if(time - ($last_time||0) > 1) {
2955 # At most do this every second
2956 $last_time = time;
2957 changed_procs_file();
2958 changed_sshloginfile();
2960 # This will start 1 job on each --sshlogin (if possible)
2961 # thus distribute the jobs on the --sshlogins round robin
2962 for my $sshlogin (values %Global::host) {
2963 if($Global::JobQueue->empty() and not $opt::pipe) {
2964 # No more jobs in the queue
2965 last;
2967 debug("run", "Running jobs before on ", $sshlogin->string(), ": ",
2968 $sshlogin->jobs_running(), "\n");
2969 if ($sshlogin->jobs_running() < $sshlogin->max_jobs_running()) {
2970 if($opt::delay
2972 $opt::delay > ::now() - $Global::newest_starttime) {
2973 # It has been too short since last start
2974 next;
2976 if($opt::load and $sshlogin->loadavg_too_high()) {
2977 # The load is too high or unknown
2978 next;
2980 if($opt::noswap and $sshlogin->swapping()) {
2981 # The server is swapping
2982 next;
2984 if($opt::limit and $sshlogin->limit()) {
2985 # Over limit
2986 next;
2988 if($opt::memfree and $sshlogin->memfree() < $opt::memfree) {
2989 # The server has not enough mem free
2990 ::debug("mem", "Not starting job: not enough mem\n");
2991 next;
2993 if($sshlogin->too_fast_remote_login()) {
2994 # It has been too short since
2995 next;
2997 debug("run", $sshlogin->string(),
2998 " has ", $sshlogin->jobs_running(),
2999 " out of ", $sshlogin->max_jobs_running(),
3000 " jobs running. Start another.\n");
3001 if(start_another_job($sshlogin) == 0) {
3002 # No more jobs to start on this $sshlogin
3003 debug("run","No jobs started on ",
3004 $sshlogin->string(), "\n");
3005 next;
3007 $sshlogin->inc_jobs_running();
3008 $sshlogin->set_last_login_at(::now());
3009 $jobs_started++;
3011 debug("run","Running jobs after on ", $sshlogin->string(), ": ",
3012 $sshlogin->jobs_running(), " of ",
3013 $sshlogin->max_jobs_running(), "\n");
3016 return $jobs_started;
3021 my $no_more_file_handles_warned;
3023 sub start_another_job() {
3024 # If there are enough filehandles
3025 # and JobQueue not empty
3026 # and not $job is in joblog
3027 # Then grab a job from Global::JobQueue,
3028 # start it at sshlogin
3029 # mark it as virgin_job
3030 # Inputs:
3031 # $sshlogin = the SSHLogin to start the job on
3032 # Uses:
3033 # $Global::JobQueue
3034 # $opt::pipe
3035 # $opt::results
3036 # $opt::resume
3037 # @Global::virgin_jobs
3038 # Returns:
3039 # 1 if another jobs was started
3040 # 0 otherwise
3041 my $sshlogin = shift;
3042 # Do we have enough file handles to start another job?
3043 if(enough_file_handles()) {
3044 if($Global::JobQueue->empty() and not $opt::pipe) {
3045 # No more commands to run
3046 debug("start", "Not starting: JobQueue empty\n");
3047 return 0;
3048 } else {
3049 my $job;
3050 # Skip jobs already in job log
3051 # Skip jobs already in results
3052 do {
3053 $job = get_job_with_sshlogin($sshlogin);
3054 if(not defined $job) {
3055 # No command available for that sshlogin
3056 debug("start", "Not starting: no jobs available for ",
3057 $sshlogin->string(), "\n");
3058 return 0;
3060 if($job->is_already_in_joblog()) {
3061 $job->free_slot();
3063 } while ($job->is_already_in_joblog()
3065 ($opt::results and $opt::resume and $job->is_already_in_results()));
3066 debug("start", "Command to run on '", $job->sshlogin()->string(), "': '",
3067 $job->replaced(),"'\n");
3068 if($job->start()) {
3069 if($opt::pipe) {
3070 if($job->virgin()) {
3071 push(@Global::virgin_jobs,$job);
3072 } else {
3073 # Block already set: This is a retry
3074 if(fork()) {
3075 ::debug("pipe","\n\nWriting ",length ${$job->block_ref()},
3076 " to ", $job->seq(),"\n");
3077 close $job->fh(0,"w");
3078 } else {
3079 $job->write($job->block_ref());
3080 close $job->fh(0,"w");
3081 exit(0);
3085 debug("start", "Started as seq ", $job->seq(),
3086 " pid:", $job->pid(), "\n");
3087 return 1;
3088 } else {
3089 # Not enough processes to run the job.
3090 # Put it back on the queue.
3091 $Global::JobQueue->unget($job);
3092 # Count down the number of jobs to run for this SSHLogin.
3093 my $max = $sshlogin->max_jobs_running();
3094 if($max > 1) { $max--; } else {
3095 my @arg;
3096 for my $record (@{$job->{'commandline'}->{'arg_list'}}) {
3097 push @arg, map { $_->orig() } @$record;
3099 ::error("No more processes: cannot run a single job. Something is wrong at @arg.");
3100 ::wait_and_exit(255);
3102 $sshlogin->set_max_jobs_running($max);
3103 # Sleep up to 300 ms to give other processes time to die
3104 ::usleep(rand()*300);
3105 ::warning("No more processes: ".
3106 "Decreasing number of running jobs to $max.",
3107 "Raising ulimit -u or /etc/security/limits.conf may help.");
3108 return 0;
3111 } else {
3112 # No more file handles
3113 $no_more_file_handles_warned++ or
3114 ::warning("No more file handles. ",
3115 "Raising ulimit -n or /etc/security/limits.conf may help.");
3116 debug("start", "No more file handles. ");
3117 return 0;
3122 sub init_progress() {
3123 # Uses:
3124 # $opt::bar
3125 # Returns:
3126 # list of computers for progress output
3127 $|=1;
3128 if($opt::bar) {
3129 return("","");
3131 my %progress = progress();
3132 return ("\nComputers / CPU cores / Max jobs to run\n",
3133 $progress{'workerlist'});
3136 sub drain_job_queue(@) {
3137 # Uses:
3138 # $opt::progress
3139 # $Global::total_running
3140 # $Global::max_jobs_running
3141 # %Global::running
3142 # $Global::JobQueue
3143 # %Global::host
3144 # $Global::start_no_new_jobs
3145 # Returns: N/A
3146 my @command = @_;
3147 if($opt::progress) {
3148 ::status_no_nl(init_progress());
3150 my $last_header = "";
3151 my $sleep = 0.2;
3152 do {
3153 while($Global::total_running > 0) {
3154 debug($Global::total_running, "==", scalar
3155 keys %Global::running," slots: ", $Global::max_jobs_running);
3156 if($opt::pipe) {
3157 # When using --pipe sometimes file handles are not closed properly
3158 for my $job (values %Global::running) {
3159 close $job->fh(0,"w");
3162 if($opt::progress) {
3163 my %progress = progress();
3164 if($last_header ne $progress{'header'}) {
3165 ::status("", $progress{'header'});
3166 $last_header = $progress{'header'};
3168 ::status_no_nl("\r",$progress{'status'});
3170 if($Global::total_running < $Global::max_jobs_running
3171 and not $Global::JobQueue->empty()) {
3172 # These jobs may not be started because of loadavg
3173 # or too little time between each ssh login.
3174 if(start_more_jobs() > 0) {
3175 # Exponential back-on if jobs were started
3176 $sleep = $sleep/2+0.001;
3179 # Exponential back-off sleeping
3180 $sleep = ::reap_usleep($sleep);
3182 if(not $Global::JobQueue->empty()) {
3183 # These jobs may not be started:
3184 # * because there the --filter-hosts has removed all
3185 if(not %Global::host) {
3186 ::error("There are no hosts left to run on.");
3187 ::wait_and_exit(255);
3189 # * because of loadavg
3190 # * because of too little time between each ssh login.
3191 $sleep = ::reap_usleep($sleep);
3192 start_more_jobs();
3193 if($Global::max_jobs_running == 0) {
3194 ::warning("There are no job slots available. Increase --jobs.");
3197 while($opt::sqlmaster and not $Global::sql->finished()) {
3198 # SQL master
3199 $sleep = ::reap_usleep($sleep);
3200 start_more_jobs();
3201 if($Global::start_sqlworker) {
3202 # Start an SQL worker as we are now sure there is work to do
3203 $Global::start_sqlworker = 0;
3204 if(my $pid = fork()) {
3205 $Global::unkilled_sqlworker = $pid;
3206 } else {
3207 # Replace --sql/--sqlandworker with --sqlworker
3208 my @ARGV = map { s/^--sql(andworker)?$/--sqlworker/; $_ } @Global::options_in_argv;
3209 # exec the --sqlworker
3210 exec($0,@ARGV,@command);
3214 } while ($Global::total_running > 0
3216 not $Global::start_no_new_jobs and not $Global::JobQueue->empty()
3218 $opt::sqlmaster and not $Global::sql->finished());
3219 if($opt::progress) {
3220 my %progress = progress();
3221 ::status("\r".$progress{'status'});
3225 sub toggle_progress() {
3226 # Turn on/off progress view
3227 # Uses:
3228 # $opt::progress
3229 # Returns: N/A
3230 $opt::progress = not $opt::progress;
3231 if($opt::progress) {
3232 ::status_no_nl(init_progress());
3236 sub progress() {
3237 # Uses:
3238 # $opt::bar
3239 # $opt::eta
3240 # %Global::host
3241 # $Global::total_started
3242 # Returns:
3243 # $workerlist = list of workers
3244 # $header = that will fit on the screen
3245 # $status = message that will fit on the screen
3246 if($opt::bar) {
3247 return ("workerlist" => "", "header" => "", "status" => bar());
3249 my $eta = "";
3250 my ($status,$header)=("","");
3251 if($opt::eta) {
3252 my($total, $completed, $left, $pctcomplete, $avgtime, $this_eta) =
3253 compute_eta();
3254 $eta = sprintf("ETA: %ds Left: %d AVG: %.2fs ",
3255 $this_eta, $left, $avgtime);
3257 my $termcols = terminal_columns();
3258 my @workers = sort keys %Global::host;
3259 my %sshlogin = map { $_ eq ":" ? ($_ => "local") : ($_ => $_) } @workers;
3260 my $workerno = 1;
3261 my %workerno = map { ($_=>$workerno++) } @workers;
3262 my $workerlist = "";
3263 for my $w (@workers) {
3264 $workerlist .=
3265 $workerno{$w}.":".$sshlogin{$w} ." / ".
3266 ($Global::host{$w}->ncpus() || "-")." / ".
3267 $Global::host{$w}->max_jobs_running()."\n";
3269 $status = "x"x($termcols+1);
3270 # Select an output format that will fit on a single line
3271 if(length $status > $termcols) {
3272 # sshlogin1:XX/XX/XX%/XX.Xs sshlogin2:XX/XX/XX%/XX.Xs sshlogin3:XX/XX/XX%/XX.Xs
3273 $header = "Computer:jobs running/jobs completed/%of started jobs/Average seconds to complete";
3274 $status = $eta .
3275 join(" ",map
3277 if($Global::total_started) {
3278 my $completed = ($Global::host{$_}->jobs_completed()||0);
3279 my $running = $Global::host{$_}->jobs_running();
3280 my $time = $completed ? (time-$^T)/($completed) : "0";
3281 sprintf("%s:%d/%d/%d%%/%.1fs ",
3282 $sshlogin{$_}, $running, $completed,
3283 ($running+$completed)*100
3284 / $Global::total_started, $time);
3286 } @workers);
3288 if(length $status > $termcols) {
3289 # 1:XX/XX/XX%/XX.Xs 2:XX/XX/XX%/XX.Xs 3:XX/XX/XX%/XX.Xs 4:XX/XX/XX%/XX.Xs
3290 $header = "Computer:jobs running/jobs completed/%of started jobs";
3291 $status = $eta .
3292 join(" ",map
3294 if($Global::total_started) {
3295 my $completed = ($Global::host{$_}->jobs_completed()||0);
3296 my $running = $Global::host{$_}->jobs_running();
3297 my $time = $completed ? (time-$^T)/($completed) : "0";
3298 sprintf("%s:%d/%d/%d%%/%.1fs ",
3299 $workerno{$_}, $running, $completed,
3300 ($running+$completed)*100
3301 / $Global::total_started, $time);
3303 } @workers);
3305 if(length $status > $termcols) {
3306 # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX/XX%
3307 $header = "Computer:jobs running/jobs completed/%of started jobs";
3308 $status = $eta .
3309 join(" ",map
3311 if($Global::total_started) {
3312 sprintf("%s:%d/%d/%d%%",
3313 $sshlogin{$_},
3314 $Global::host{$_}->jobs_running(),
3315 ($Global::host{$_}->jobs_completed()||0),
3316 ($Global::host{$_}->jobs_running()+
3317 ($Global::host{$_}->jobs_completed()||0))*100
3318 / $Global::total_started)
3321 @workers);
3323 if(length $status > $termcols) {
3324 # 1:XX/XX/XX% 2:XX/XX/XX% 3:XX/XX/XX% 4:XX/XX/XX% 5:XX/XX/XX% 6:XX/XX/XX%
3325 $header = "Computer:jobs running/jobs completed/%of started jobs";
3326 $status = $eta .
3327 join(" ",map
3329 if($Global::total_started) {
3330 sprintf("%s:%d/%d/%d%%",
3331 $workerno{$_},
3332 $Global::host{$_}->jobs_running(),
3333 ($Global::host{$_}->jobs_completed()||0),
3334 ($Global::host{$_}->jobs_running()+
3335 ($Global::host{$_}->jobs_completed()||0))*100
3336 / $Global::total_started)
3339 @workers);
3341 if(length $status > $termcols) {
3342 # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX sshlogin4:XX/XX
3343 $header = "Computer:jobs running/jobs completed";
3344 $status = $eta .
3345 join(" ",map
3346 { sprintf("%s:%d/%d",
3347 $sshlogin{$_}, $Global::host{$_}->jobs_running(),
3348 ($Global::host{$_}->jobs_completed()||0)) }
3349 @workers);
3351 if(length $status > $termcols) {
3352 # sshlogin1:XX/XX sshlogin2:XX/XX sshlogin3:XX/XX sshlogin4:XX/XX
3353 $header = "Computer:jobs running/jobs completed";
3354 $status = $eta .
3355 join(" ",map
3356 { sprintf("%s:%d/%d",
3357 $sshlogin{$_}, $Global::host{$_}->jobs_running(),
3358 ($Global::host{$_}->jobs_completed()||0)) }
3359 @workers);
3361 if(length $status > $termcols) {
3362 # 1:XX/XX 2:XX/XX 3:XX/XX 4:XX/XX 5:XX/XX 6:XX/XX
3363 $header = "Computer:jobs running/jobs completed";
3364 $status = $eta .
3365 join(" ",map
3366 { sprintf("%s:%d/%d",
3367 $workerno{$_}, $Global::host{$_}->jobs_running(),
3368 ($Global::host{$_}->jobs_completed()||0)) }
3369 @workers);
3371 if(length $status > $termcols) {
3372 # sshlogin1:XX sshlogin2:XX sshlogin3:XX sshlogin4:XX sshlogin5:XX
3373 $header = "Computer:jobs completed";
3374 $status = $eta .
3375 join(" ",map
3376 { sprintf("%s:%d",
3377 $sshlogin{$_},
3378 ($Global::host{$_}->jobs_completed()||0)) }
3379 @workers);
3381 if(length $status > $termcols) {
3382 # 1:XX 2:XX 3:XX 4:XX 5:XX 6:XX
3383 $header = "Computer:jobs completed";
3384 $status = $eta .
3385 join(" ",map
3386 { sprintf("%s:%d",
3387 $workerno{$_},
3388 ($Global::host{$_}->jobs_completed()||0)) }
3389 @workers);
3391 return ("workerlist" => $workerlist, "header" => $header, "status" => $status);
3396 my ($first_completed, $smoothed_avg_time, $last_eta);
3398 sub compute_eta {
3399 # Calculate important numbers for ETA
3400 # Returns:
3401 # $total = number of jobs in total
3402 # $completed = number of jobs completed
3403 # $left = number of jobs left
3404 # $pctcomplete = percent of jobs completed
3405 # $avgtime = averaged time
3406 # $eta = smoothed eta
3407 my $completed = $Global::total_completed;
3408 # In rare cases with -X will $completed > total_jobs()
3409 my $total = ::max($Global::JobQueue->total_jobs(),$completed);
3410 my $left = $total - $completed;
3411 if(not $completed) {
3412 return($total, $completed, $left, 0, 0, 0);
3414 my $pctcomplete = ::min($completed / $total,100);
3415 $first_completed ||= time;
3416 my $timepassed = (time - $first_completed);
3417 my $avgtime = $timepassed / $completed;
3418 $smoothed_avg_time ||= $avgtime;
3419 # Smooth the eta so it does not jump wildly
3420 $smoothed_avg_time = (1 - $pctcomplete) * $smoothed_avg_time +
3421 $pctcomplete * $avgtime;
3422 my $eta = int($left * $smoothed_avg_time);
3423 if($eta*0.90 < $last_eta and $last_eta < $eta) {
3424 # Eta jumped less that 10% up: Keep the last eta instead
3425 $eta = $last_eta;
3426 } else {
3427 $last_eta = $eta;
3429 return($total, $completed, $left, $pctcomplete, $avgtime, $eta);
3434 my ($rev,$reset);
3436 sub bar() {
3437 # Return:
3438 # $status = bar with eta, completed jobs, arg and pct
3439 $rev ||= "\033[7m";
3440 $reset ||= "\033[0m";
3441 my($total, $completed, $left, $pctcomplete, $avgtime, $eta) =
3442 compute_eta();
3443 my $arg = $Global::newest_job ?
3444 $Global::newest_job->{'commandline'}->
3445 replace_placeholders(["\257<\257>"],0,0) : "";
3446 # These chars mess up display in the terminal
3447 $arg =~ tr/[\011-\016\033\302-\365]//d;
3448 my $eta_dhms = ::seconds_to_time_units($eta);
3449 my $bar_text =
3450 sprintf("%d%% %d:%d=%s %s",
3451 $pctcomplete*100, $completed, $left, $eta_dhms, $arg);
3452 my $terminal_width = terminal_columns();
3453 my $s = sprintf("%-${terminal_width}s",
3454 substr($bar_text." "x$terminal_width,
3455 0,$terminal_width));
3456 my $width = int($terminal_width * $pctcomplete);
3457 substr($s,$width,0) = $reset;
3458 my $zenity = sprintf("%-${terminal_width}s",
3459 substr("# $eta sec $arg",
3460 0,$terminal_width));
3461 $s = "\r" . $zenity . "\r" . $pctcomplete*100 . # Prefix with zenity header
3462 "\r" . $rev . $s . $reset;
3463 return $s;
3468 my ($columns,$last_column_time);
3470 sub terminal_columns() {
3471 # Get the number of columns of the terminal.
3472 # Only update once per second.
3473 # Returns:
3474 # number of columns of the screen
3475 if(not $columns or $last_column_time < time) {
3476 $last_column_time = time;
3477 $columns = $ENV{'COLUMNS'};
3478 if(not $columns) {
3479 my $stty = ::qqx("stty -a </dev/tty");
3480 # FreeBSD/OpenBSD/NetBSD/Dragonfly/MirOS
3481 # MacOSX/IRIX/AIX/Tru64
3482 $stty =~ /(\d+) columns/ and do { $columns = $1; };
3483 # GNU/Linux/Solaris
3484 $stty =~ /columns (\d+)/ and do { $columns = $1; };
3485 # Solaris-x86/HPUX/SCOsysV/UnixWare/OpenIndiana
3486 $stty =~ /columns = (\d+)/ and do { $columns = $1; };
3487 # QNX
3488 $stty =~ /rows=\d+,(\d+)/ and do { $columns = $1; };
3490 if(not $columns) {
3491 my $resize = ::qqx("resize");
3492 $resize =~ /COLUMNS=(\d+);/ and do { $columns = $1; };
3494 $columns ||= 80;
3496 return $columns;
3500 # Prototype forwarding
3501 sub get_job_with_sshlogin($);
3502 sub get_job_with_sshlogin($) {
3503 # Input:
3504 # $sshlogin = which host should the job be run on?
3505 # Uses:
3506 # $opt::hostgroups
3507 # $Global::JobQueue
3508 # Returns:
3509 # $job = next job object for $sshlogin if any available
3510 my $sshlogin = shift;
3511 my $job;
3513 if ($opt::hostgroups) {
3514 my @other_hostgroup_jobs = ();
3516 while($job = $Global::JobQueue->get()) {
3517 if($sshlogin->in_hostgroups($job->hostgroups())) {
3518 # Found a job to be run on a hostgroup of this
3519 # $sshlogin
3520 last;
3521 } else {
3522 # This job was not in the hostgroups of $sshlogin
3523 push @other_hostgroup_jobs, $job;
3526 $Global::JobQueue->unget(@other_hostgroup_jobs);
3527 if(not defined $job) {
3528 # No more jobs
3529 return undef;
3531 } else {
3532 $job = $Global::JobQueue->get();
3533 if(not defined $job) {
3534 # No more jobs
3535 ::debug("start", "No more jobs: JobQueue empty\n");
3536 return undef;
3539 $job->set_sshlogin($sshlogin);
3540 if($opt::retries and $job->failed_here()) {
3541 # This command with these args failed for this sshlogin
3542 my ($no_of_failed_sshlogins,$min_failures) = $job->min_failed();
3543 # Only look at the Global::host that have > 0 jobslots
3544 if($no_of_failed_sshlogins ==
3545 grep { $_->max_jobs_running() > 0 } values %Global::host
3546 and $job->failed_here() == $min_failures) {
3547 # It failed the same or more times on another host:
3548 # run it on this host
3549 } else {
3550 # If it failed fewer times on another host:
3551 # Find another job to run
3552 my $nextjob;
3553 if(not $Global::JobQueue->empty()) {
3554 # This can potentially recurse for all args
3555 no warnings 'recursion';
3556 $nextjob = get_job_with_sshlogin($sshlogin);
3558 # Push the command back on the queue
3559 $Global::JobQueue->unget($job);
3560 return $nextjob;
3563 return $job;
3567 sub __REMOTE_SSH__() {}
3570 sub read_sshloginfiles(@) {
3571 # Read a list of --slf's
3572 # Input:
3573 # @files = files or symbolic file names to read
3574 # Returns: N/A
3575 for my $s (@_) {
3576 read_sshloginfile(expand_slf_shorthand($s));
3580 sub expand_slf_shorthand($) {
3581 # Expand --slf shorthand into a read file name
3582 # Input:
3583 # $file = file or symbolic file name to read
3584 # Returns:
3585 # $file = actual file name to read
3586 my $file = shift;
3587 if($file eq "-") {
3588 # skip: It is stdin
3589 } elsif($file eq "..") {
3590 $file = $Global::config_dir."/sshloginfile";
3591 } elsif($file eq ".") {
3592 $file = "/etc/parallel/sshloginfile";
3593 } elsif(not -r $file) {
3594 for(@Global::config_dirs) {
3595 if(not -r $_."/".$file) {
3596 # Try prepending $PARALLEL_HOME
3597 ::error("Cannot open $file.");
3598 ::wait_and_exit(255);
3599 } else {
3600 $file = $_."/".$file;
3601 last;
3605 return $file;
3608 sub read_sshloginfile($) {
3609 # Read sshloginfile into @Global::sshlogin
3610 # Input:
3611 # $file = file to read
3612 # Uses:
3613 # @Global::sshlogin
3614 # Returns: N/A
3615 local $/ = "\n";
3616 my $file = shift;
3617 my $close = 1;
3618 my $in_fh;
3619 ::debug("init","--slf ",$file);
3620 if($file eq "-") {
3621 $in_fh = *STDIN;
3622 $close = 0;
3623 } else {
3624 if(not open($in_fh, "<", $file)) {
3625 # Try the filename
3626 ::error("Cannot open $file.");
3627 ::wait_and_exit(255);
3630 while(<$in_fh>) {
3631 chomp;
3632 /^\s*#/ and next;
3633 /^\s*$/ and next;
3634 push @Global::sshlogin, $_;
3636 if($close) {
3637 close $in_fh;
3641 sub parse_sshlogin() {
3642 # Parse @Global::sshlogin into %Global::host.
3643 # Keep only hosts that are in one of the given ssh hostgroups.
3644 # Uses:
3645 # @Global::sshlogin
3646 # $Global::minimal_command_line_length
3647 # %Global::host
3648 # $opt::transfer
3649 # @opt::return
3650 # $opt::cleanup
3651 # @opt::basefile
3652 # @opt::trc
3653 # Returns: N/A
3654 my @login;
3655 if(not @Global::sshlogin) { @Global::sshlogin = (":"); }
3656 for my $sshlogin (@Global::sshlogin) {
3657 # Split up -S sshlogin,sshlogin
3658 for my $s (split /,|\n/, $sshlogin) {
3659 if ($s eq ".." or $s eq "-") {
3660 # This may add to @Global::sshlogin - possibly bug
3661 read_sshloginfile(expand_slf_shorthand($s));
3662 } else {
3663 $s =~ s/\s*$//;
3664 push (@login, $s);
3668 $Global::minimal_command_line_length = 100_000_000;
3669 my @allowed_hostgroups;
3670 for my $ncpu_sshlogin_string (::uniq(@login)) {
3671 my $sshlogin = SSHLogin->new($ncpu_sshlogin_string);
3672 my $sshlogin_string = $sshlogin->string();
3673 if($sshlogin_string eq "") {
3674 # This is an ssh group: -S @webservers
3675 push @allowed_hostgroups, $sshlogin->hostgroups();
3676 next;
3678 if($Global::host{$sshlogin_string}) {
3679 # This sshlogin has already been added:
3680 # It is probably a host that has come back
3681 # Set the max_jobs_running back to the original
3682 debug("run","Already seen $sshlogin_string\n");
3683 if($sshlogin->{'ncpus'}) {
3684 # If ncpus set by '#/' of the sshlogin, overwrite it:
3685 $Global::host{$sshlogin_string}->set_ncpus($sshlogin->ncpus());
3687 $Global::host{$sshlogin_string}->set_max_jobs_running(undef);
3688 next;
3690 $sshlogin->set_maxlength(Limits::Command::max_length());
3692 $Global::minimal_command_line_length =
3693 ::min($Global::minimal_command_line_length, $sshlogin->maxlength());
3694 $Global::host{$sshlogin_string} = $sshlogin;
3696 if(@allowed_hostgroups) {
3697 # Remove hosts that are not in these groups
3698 while (my ($string, $sshlogin) = each %Global::host) {
3699 if(not $sshlogin->in_hostgroups(@allowed_hostgroups)) {
3700 delete $Global::host{$string};
3705 # debug("start", "sshlogin: ", my_dump(%Global::host),"\n");
3706 if(@Global::transfer_files or @opt::return or $opt::cleanup or @opt::basefile) {
3707 if(not remote_hosts()) {
3708 # There are no remote hosts
3709 if(@opt::trc) {
3710 ::warning("--trc ignored as there are no remote --sshlogin.");
3711 } elsif (defined $opt::transfer) {
3712 ::warning("--transfer ignored as there are no remote --sshlogin.");
3713 } elsif (@opt::transfer_files) {
3714 ::warning("--transferfile ignored as there are no remote --sshlogin.");
3715 } elsif (@opt::return) {
3716 ::warning("--return ignored as there are no remote --sshlogin.");
3717 } elsif (defined $opt::cleanup) {
3718 ::warning("--cleanup ignored as there are no remote --sshlogin.");
3719 } elsif (@opt::basefile) {
3720 ::warning("--basefile ignored as there are no remote --sshlogin.");
3726 sub remote_hosts() {
3727 # Return sshlogins that are not ':'
3728 # Uses:
3729 # %Global::host
3730 # Returns:
3731 # list of sshlogins with ':' removed
3732 return grep !/^:$/, keys %Global::host;
3735 sub setup_basefile() {
3736 # Transfer basefiles to each $sshlogin
3737 # This needs to be done before first jobs on $sshlogin is run
3738 # Uses:
3739 # %Global::host
3740 # @opt::basefile
3741 # Returns: N/A
3742 my @cmd;
3743 my $rsync_destdir;
3744 my $workdir;
3745 for my $sshlogin (values %Global::host) {
3746 if($sshlogin->string() eq ":") { next }
3747 for my $file (@opt::basefile) {
3748 if($file !~ m:^/: and $opt::workdir eq "...") {
3749 ::error("Work dir '...' will not work with relative basefiles.");
3750 ::wait_and_exit(255);
3752 if(not $workdir) {
3753 my $dummycmdline = CommandLine->new(1,["true"],{},0,0,[],[],{},{},{});
3754 my $dummyjob = Job->new($dummycmdline);
3755 $workdir = $dummyjob->workdir();
3757 push @cmd, $sshlogin->rsync_transfer_cmd($file,$workdir);
3760 debug("init", "basesetup: @cmd\n");
3761 my ($exitstatus,$stdout_ref,$stderr_ref) =
3762 run_parallel((join "\n",@cmd),"-j0","--retries",5);
3763 if($exitstatus) {
3764 my @stdout = @$stdout_ref;
3765 my @stderr = @$stderr_ref;
3766 ::error("Copying of --basefile failed: @stdout@stderr");
3767 ::wait_and_exit(255);
3771 sub cleanup_basefile() {
3772 # Remove the basefiles transferred
3773 # Uses:
3774 # %Global::host
3775 # @opt::basefile
3776 # Returns: N/A
3777 my @cmd;
3778 my $workdir;
3779 if(not $workdir) {
3780 my $dummycmdline = CommandLine->new(1,"true",0,0,0,0,0,{},{},{});
3781 my $dummyjob = Job->new($dummycmdline);
3782 $workdir = $dummyjob->workdir();
3784 for my $sshlogin (values %Global::host) {
3785 if($sshlogin->string() eq ":") { next }
3786 for my $file (@opt::basefile) {
3787 push @cmd, $sshlogin->cleanup_cmd($file,$workdir);
3790 debug("init", "basecleanup: @cmd\n");
3791 my ($exitstatus,$stdout_ref,$stderr_ref) =
3792 run_parallel(join("\n",@cmd),"-j0","--retries",5);
3793 if($exitstatus) {
3794 my @stdout = @$stdout_ref;
3795 my @stderr = @$stderr_ref;
3796 ::error("Cleanup of --basefile failed: @stdout@stderr");
3797 ::wait_and_exit(255);
3801 sub run_parallel() {
3802 my ($stdin,@args) = @_;
3803 my $cmd = join "",map { " $_ & " } split /\n/, $stdin;
3804 print $Global::original_stderr ` $cmd wait` ;
3805 return 0
3808 sub _run_parallel() {
3809 # Run GNU Parallel
3810 # This should ideally just fork an internal copy
3811 # and not start it through a shell
3812 # Input:
3813 # $stdin = data to provide on stdin for GNU Parallel
3814 # @args = command line arguments
3815 # Returns:
3816 # $exitstatus = exitcode of GNU Parallel run
3817 # \@stdout = standard output
3818 # \@stderr = standard error
3819 my ($stdin,@args) = @_;
3820 my ($exitstatus,@stdout,@stderr);
3821 my ($stdin_fh,$stdout_fh)=(gensym(),gensym());
3822 my ($stderr_fh, $stderrname) = ::tmpfile(SUFFIX => ".par");
3823 unlink $stderrname;
3825 my $pid = ::open3($stdin_fh,$stdout_fh,$stderr_fh,
3826 $0,qw(--plain --shell /bin/sh --will-cite), @args);
3827 if(my $writerpid = fork()) {
3828 close $stdin_fh;
3829 @stdout = <$stdout_fh>;
3830 # Now stdout is closed:
3831 # These pids should be dead or die very soon
3832 while(kill 0, $writerpid) { ::usleep(1); }
3833 die;
3834 # reap $writerpid;
3835 # while(kill 0, $pid) { ::usleep(1); }
3836 # reap $writerpid;
3837 $exitstatus = $?;
3838 seek $stderr_fh, 0, 0;
3839 @stderr = <$stderr_fh>;
3840 close $stdout_fh;
3841 close $stderr_fh;
3842 } else {
3843 close $stdout_fh;
3844 close $stderr_fh;
3845 print $stdin_fh $stdin;
3846 close $stdin_fh;
3847 exit(0);
3849 return ($exitstatus,\@stdout,\@stderr);
3852 sub filter_hosts() {
3853 # Remove down --sshlogins from active duty.
3854 # Find ncpus, ncores, maxlen, time-to-login for each host.
3855 # Uses:
3856 # %Global::host
3857 # $Global::minimal_command_line_length
3858 # $opt::use_sockets_instead_of_threads
3859 # $opt::use_cores_instead_of_threads
3860 # $opt::use_cpus_instead_of_cores
3861 # Returns: N/A
3863 my ($nsockets_ref,$ncores_ref, $nthreads_ref, $time_to_login_ref,
3864 $maxlen_ref, $echo_ref, $down_hosts_ref) =
3865 parse_host_filtering(parallelized_host_filtering());
3867 delete @Global::host{@$down_hosts_ref};
3868 @$down_hosts_ref and ::warning("Removed @$down_hosts_ref.");
3870 $Global::minimal_command_line_length = 100_000_000;
3871 while (my ($sshlogin, $obj) = each %Global::host) {
3872 if($sshlogin eq ":") { next }
3873 $nsockets_ref->{$sshlogin} or
3874 ::die_bug("nsockets missing: ".$obj->serverlogin());
3875 $ncores_ref->{$sshlogin} or
3876 ::die_bug("ncores missing: ".$obj->serverlogin());
3877 $nthreads_ref->{$sshlogin} or
3878 ::die_bug("nthreads missing: ".$obj->serverlogin());
3879 $time_to_login_ref->{$sshlogin} or
3880 ::die_bug("time_to_login missing: ".$obj->serverlogin());
3881 $maxlen_ref->{$sshlogin} or
3882 ::die_bug("maxlen missing: ".$obj->serverlogin());
3883 $obj->set_ncpus($nthreads_ref->{$sshlogin});
3884 if($opt::use_cpus_instead_of_cores) {
3885 $obj->set_ncpus($ncores_ref->{$sshlogin});
3886 } elsif($opt::use_sockets_instead_of_threads) {
3887 $obj->set_ncpus($nsockets_ref->{$sshlogin});
3888 } elsif($opt::use_cores_instead_of_threads) {
3889 $obj->set_ncpus($ncores_ref->{$sshlogin});
3891 $obj->set_time_to_login($time_to_login_ref->{$sshlogin});
3892 $obj->set_maxlength($maxlen_ref->{$sshlogin});
3893 $Global::minimal_command_line_length =
3894 ::min($Global::minimal_command_line_length,
3895 int($maxlen_ref->{$sshlogin}/2));
3896 ::debug("init", "Timing from -S:$sshlogin ",
3897 " nsockets:",$nsockets_ref->{$sshlogin},
3898 " ncores:", $ncores_ref->{$sshlogin},
3899 " nthreads:",$nthreads_ref->{$sshlogin},
3900 " time_to_login:", $time_to_login_ref->{$sshlogin},
3901 " maxlen:", $maxlen_ref->{$sshlogin},
3902 " min_max_len:", $Global::minimal_command_line_length,"\n");
3906 sub parse_host_filtering() {
3907 # Input:
3908 # @lines = output from parallelized_host_filtering()
3909 # Returns:
3910 # \%nsockets = number of sockets of {host}
3911 # \%ncores = number of cores of {host}
3912 # \%nthreads = number of hyperthreaded cores of {host}
3913 # \%time_to_login = time_to_login on {host}
3914 # \%maxlen = max command len on {host}
3915 # \%echo = echo received from {host}
3916 # \@down_hosts = list of hosts with no answer
3917 local $/ = "\n";
3918 my (%nsockets, %ncores, %nthreads, %time_to_login, %maxlen, %echo,
3919 @down_hosts);
3920 for (@_) {
3921 ::debug("init","Read: ",$_);
3922 chomp;
3923 my @col = split /\t/, $_;
3924 if($col[0] =~ /^parallel: Warning:/) {
3925 # Timed out job: Ignore it
3926 next;
3927 } elsif(defined $col[6]) {
3928 # This is a line from --joblog
3929 # seq host time spent sent received exit signal command
3930 # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores
3931 if($col[0] eq "Seq" and $col[1] eq "Host" and
3932 $col[2] eq "Starttime") {
3933 # Header => skip
3934 next;
3936 # Get server from: eval true server\;
3937 $col[8] =~ /eval .?true.?\s([^\;]+);/ or
3938 ::die_bug("col8 does not contain host: $col[8]");
3939 my $host = $1;
3940 $host =~ tr/\\//d;
3941 $Global::host{$host} or next;
3942 if($col[6] eq "255" or $col[6] eq "-1" or $col[6] eq "1") {
3943 # exit == 255 or exit == timeout (-1): ssh failed/timedout
3944 # exit == 1: lsh failed
3945 # Remove sshlogin
3946 ::debug("init", "--filtered $host\n");
3947 push(@down_hosts, $host);
3948 } elsif($col[6] eq "127") {
3949 # signal == 127: parallel not installed remote
3950 # Set nsockets, ncores, nthreads = 1
3951 ::warning("Could not figure out ".
3952 "number of cpus on $host. Using 1.");
3953 $nsockets{$host} = 1;
3954 $ncores{$host} = 1;
3955 $nthreads{$host} = 1;
3956 $maxlen{$host} = Limits::Command::max_length();
3957 } elsif($col[0] =~ /^\d+$/ and $Global::host{$host}) {
3958 # Remember how log it took to log in
3959 # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ echo
3960 $time_to_login{$host} = ::min($time_to_login{$host},$col[3]);
3961 } else {
3962 ::die_bug("host check unmatched long jobline: $_");
3964 } elsif($Global::host{$col[0]}) {
3965 # This output from --number-of-cores, --number-of-cpus,
3966 # --max-line-length-allowed
3967 # ncores: server 8
3968 # ncpus: server 2
3969 # maxlen: server 131071
3970 if(/parallel: Warning: Cannot figure out number of/) {
3971 next;
3973 if(not $nsockets{$col[0]}) {
3974 $nsockets{$col[0]} = $col[1];
3975 } elsif(not $ncores{$col[0]}) {
3976 $ncores{$col[0]} = $col[1];
3977 } elsif(not $nthreads{$col[0]}) {
3978 $nthreads{$col[0]} = $col[1];
3979 } elsif(not $maxlen{$col[0]}) {
3980 $maxlen{$col[0]} = $col[1];
3981 } elsif(not $echo{$col[0]}) {
3982 $echo{$col[0]} = $col[1];
3983 } elsif(m/perl: warning:|LANGUAGE =|LC_ALL =|LANG =|are supported and installed/) {
3984 # Skip these:
3985 # perl: warning: Setting locale failed.
3986 # perl: warning: Please check that your locale settings:
3987 # LANGUAGE = (unset),
3988 # LC_ALL = (unset),
3989 # LANG = "en_US.UTF-8"
3990 # are supported and installed on your system.
3991 # perl: warning: Falling back to the standard locale ("C").
3992 } else {
3993 ::die_bug("host check too many col0: $_");
3995 } else {
3996 ::die_bug("host check unmatched short jobline ($col[0]): $_");
3999 @down_hosts = uniq(@down_hosts);
4000 return(\%nsockets, \%ncores, \%nthreads, \%time_to_login,
4001 \%maxlen, \%echo, \@down_hosts);
4004 sub parallelized_host_filtering() {
4005 # Uses:
4006 # %Global::host
4007 # Returns:
4008 # text entries with:
4009 # * joblog line
4010 # * hostname \t number of cores
4011 # * hostname \t number of cpus
4012 # * hostname \t max-line-length-allowed
4013 # * hostname \t empty
4015 sub sshwrapped {
4016 # Wrap with ssh and --env
4017 my $sshlogin = shift;
4018 my $command = shift;
4019 my $commandline = CommandLine->new(1,[$command],{},0,0,[],[],{},{},{});
4020 my $job = Job->new($commandline);
4021 $job->set_sshlogin($sshlogin);
4022 $job->wrapped();
4023 return($job->{'wrapped'});
4026 my(@sockets, @cores, @threads, @maxline, @echo);
4027 while (my ($host, $sshlogin) = each %Global::host) {
4028 if($host eq ":") { next }
4029 # The 'true' is used to get the $host out later
4030 push(@sockets, $host."\t"."true $host; ".
4031 sshwrapped($sshlogin,"parallel --number-of-sockets")."\n\0");
4032 push(@cores, $host."\t"."true $host; ".
4033 sshwrapped($sshlogin,"parallel --number-of-cores")."\n\0");
4034 push(@threads, $host."\t"."true $host; ".
4035 sshwrapped($sshlogin,"parallel --number-of-threads")."\n\0");
4036 push(@maxline, $host."\t"."true $host; ".
4037 sshwrapped($sshlogin,"parallel --max-line-length-allowed")."\n\0");
4038 # 'echo' is used to get the fastest possible ssh login time
4039 my $sshcmd = "true $host; exec " .$sshlogin->sshcommand()." ".
4040 $sshlogin->serverlogin();
4041 push(@echo, $host."\t".$sshcmd." -- echo\n\0");
4044 # --timeout 10: Setting up an SSH connection and running a simple
4045 # command should never take > 10 sec.
4046 # --delay 0.1: If multiple sshlogins use the same proxy the delay
4047 # will make it less likely to overload the ssh daemon.
4048 # --retries 3: If the ssh daemon is overloaded, try 3 times
4049 my $cmd =
4050 "$0 -j0 --timeout 10 --joblog - --plain --delay 0.1 --retries 3 ".
4051 "--tag --tagstring '{1}' -0 --colsep '\t' -k eval '{2}' && true ";
4052 $cmd = $Global::shell." -c ".Q($cmd);
4053 ::debug("init", $cmd, "\n");
4054 my @out;
4055 my $prepend = "";
4057 my ($host_fh,$in,$err);
4058 open3($in, $host_fh, $err, $cmd) || ::die_bug("parallel host check: $cmd");
4059 if(not fork()) {
4060 # Give the commands to run to the $cmd
4061 close $host_fh;
4062 print $in @sockets, @cores, @threads, @maxline, @echo;
4063 close $in;
4064 exit();
4066 close $in;
4067 for(<$host_fh>) {
4068 # TODO incompatible with '-quoting. Needs to be fixed differently
4069 #if(/\'$/) {
4070 # # if last char = ' then append next line
4071 # # This may be due to quoting of \n in environment var
4072 # $prepend .= $_;
4073 # next;
4075 $_ = $prepend . $_;
4076 $prepend = "";
4077 push @out, $_;
4079 close $host_fh;
4080 return @out;
4083 sub onall($@) {
4084 # Runs @command on all hosts.
4085 # Uses parallel to run @command on each host.
4086 # --jobs = number of hosts to run on simultaneously.
4087 # For each host a parallel command with the args will be running.
4088 # Uses:
4089 # $Global::quoting
4090 # @opt::basefile
4091 # $opt::jobs
4092 # $opt::linebuffer
4093 # $opt::ungroup
4094 # $opt::group
4095 # $opt::keeporder
4096 # $opt::D
4097 # $opt::plain
4098 # $opt::max_chars
4099 # $opt::linebuffer
4100 # $opt::files
4101 # $opt::colsep
4102 # $opt::timeout
4103 # $opt::plain
4104 # $opt::retries
4105 # $opt::max_chars
4106 # $opt::arg_sep
4107 # $opt::arg_file_sep
4108 # @opt::v
4109 # @opt::env
4110 # %Global::host
4111 # $Global::exitstatus
4112 # $Global::debug
4113 # $Global::joblog
4114 # $opt::joblog
4115 # $opt::tag
4116 # $opt::tee
4117 # Input:
4118 # @command = command to run on all hosts
4119 # Returns: N/A
4120 sub tmp_joblog {
4121 # Input:
4122 # $joblog = filename of joblog - undef if none
4123 # Returns:
4124 # $tmpfile = temp file for joblog - undef if none
4125 my $joblog = shift;
4126 if(not defined $joblog) {
4127 return undef;
4129 my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".log");
4130 close $fh;
4131 return $tmpfile;
4133 my ($input_source_fh_ref,@command) = @_;
4134 if($Global::quoting) {
4135 @command = shell_quote(@command);
4138 # Copy all @input_source_fh (-a and :::) into tempfiles
4139 my @argfiles = ();
4140 for my $fh (@$input_source_fh_ref) {
4141 my ($outfh, $name) = ::tmpfile(SUFFIX => ".all", UNLINK => not $opt::D);
4142 print $outfh (<$fh>);
4143 close $outfh;
4144 push @argfiles, $name;
4146 if(@opt::basefile) { setup_basefile(); }
4147 # for each sshlogin do:
4148 # parallel -S $sshlogin $command :::: @argfiles
4150 # Pass some of the options to the sub-parallels, not all of them as
4151 # -P should only go to the first, and -S should not be copied at all.
4152 my $options =
4153 join(" ",
4154 ((defined $opt::memfree) ? "--memfree ".$opt::memfree : ""),
4155 ((defined $opt::D) ? "-D $opt::D" : ""),
4156 ((defined $opt::group) ? "-g" : ""),
4157 ((defined $opt::jobs) ? "-P $opt::jobs" : ""),
4158 ((defined $opt::keeporder) ? "--keeporder" : ""),
4159 ((defined $opt::linebuffer) ? "--linebuffer" : ""),
4160 ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""),
4161 ((defined $opt::plain) ? "--plain" : ""),
4162 ((defined $opt::ungroup) ? "-u" : ""),
4163 ((defined $opt::tee) ? "--tee" : ""),
4165 my $suboptions =
4166 join(" ",
4167 ((defined $opt::D) ? "-D $opt::D" : ""),
4168 ((defined $opt::arg_file_sep) ? "--arg-file-sep ".$opt::arg_file_sep : ""),
4169 ((defined $opt::arg_sep) ? "--arg-sep ".$opt::arg_sep : ""),
4170 ((defined $opt::colsep) ? "--colsep ".shell_quote($opt::colsep) : ""),
4171 ((defined $opt::files) ? "--files" : ""),
4172 ((defined $opt::group) ? "-g" : ""),
4173 ((defined $opt::cleanup) ? "--cleanup" : ""),
4174 ((defined $opt::keeporder) ? "--keeporder" : ""),
4175 ((defined $opt::linebuffer) ? "--linebuffer" : ""),
4176 ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""),
4177 ((defined $opt::plain) ? "--plain" : ""),
4178 ((defined $opt::retries) ? "--retries ".$opt::retries : ""),
4179 ((defined $opt::timeout) ? "--timeout ".$opt::timeout : ""),
4180 ((defined $opt::ungroup) ? "-u" : ""),
4181 ((defined $opt::tee) ? "--tee" : ""),
4182 ((defined $opt::workdir) ? "--wd ".Q($opt::workdir) : ""),
4183 (@Global::transfer_files ? map { "--tf ".Q($_) }
4184 @Global::transfer_files : ""),
4185 (@Global::ret_files ? map { "--return ".Q($_) }
4186 @Global::ret_files : ""),
4187 (@opt::env ? map { "--env ".Q($_) } @opt::env : ""),
4188 (map { "-v" } @opt::v),
4190 ::debug("init", "| $0 $options\n");
4191 open(my $parallel_fh, "|-", "$0 -0 --will-cite -j0 $options") ||
4192 ::die_bug("This does not run GNU Parallel: $0 $options");
4193 my @joblogs;
4194 for my $host (sort keys %Global::host) {
4195 my $sshlogin = $Global::host{$host};
4196 my $joblog = tmp_joblog($opt::joblog);
4197 if($joblog) {
4198 push @joblogs, $joblog;
4199 $joblog = "--joblog $joblog";
4201 my $quad = $opt::arg_file_sep || "::::";
4202 # If PARALLEL_ENV is set: Pass it on
4203 my $penv=$Global::parallel_env ?
4204 "PARALLEL_ENV=".Q($Global::parallel_env) :
4206 ::debug("init", "$penv $0 $suboptions -j1 $joblog ",
4207 ((defined $opt::tag) ?
4208 "--tagstring ".Q($sshlogin->string()) : ""),
4209 " -S ", Q($sshlogin->string())," ",
4210 join(" ",shell_quote(@command))," $quad @argfiles\n");
4211 print $parallel_fh "$penv $0 $suboptions -j1 $joblog ",
4212 ((defined $opt::tag) ?
4213 "--tagstring ".Q($sshlogin->string()) : ""),
4214 " -S ", Q($sshlogin->string())," ",
4215 join(" ",shell_quote(@command))," $quad @argfiles\0";
4217 close $parallel_fh;
4218 $Global::exitstatus = $? >> 8;
4219 debug("init", "--onall exitvalue ", $?);
4220 if(@opt::basefile and $opt::cleanup) { cleanup_basefile(); }
4221 $Global::debug or unlink(@argfiles);
4222 my %seen;
4223 for my $joblog (@joblogs) {
4224 # Append to $joblog
4225 open(my $fh, "<", $joblog) || ::die_bug("Cannot open tmp joblog $joblog");
4226 # Skip first line (header);
4227 <$fh>;
4228 print $Global::joblog (<$fh>);
4229 close $fh;
4230 unlink($joblog);
4235 sub __SIGNAL_HANDLING__() {}
4238 sub sigtstp() {
4239 # Send TSTP signal (Ctrl-Z) to all children process groups
4240 # Uses:
4241 # %SIG
4242 # Returns: N/A
4243 signal_children("TSTP");
4246 sub sigpipe() {
4247 # Send SIGPIPE signal to all children process groups
4248 # Uses:
4249 # %SIG
4250 # Returns: N/A
4251 signal_children("PIPE");
4254 sub signal_children() {
4255 # Send signal to all children process groups
4256 # and GNU Parallel itself
4257 # Uses:
4258 # %SIG
4259 # Returns: N/A
4260 my $signal = shift;
4261 debug("run", "Sending $signal ");
4262 kill $signal, map { -$_ } keys %Global::running;
4263 # Use default signal handler for GNU Parallel itself
4264 $SIG{$signal} = undef;
4265 kill $signal, $$;
4268 sub save_original_signal_handler() {
4269 # Remember the original signal handler
4270 # Uses:
4271 # %Global::original_sig
4272 # Returns: N/A
4273 $SIG{INT} = sub {
4274 if($opt::tmux) { ::qqx("tmux kill-session -t p$$"); }
4275 wait_and_exit(255);
4277 $SIG{TERM} = sub {
4278 if($opt::tmux) { ::qqx("tmux kill-session -t p$$"); }
4279 wait_and_exit(255);
4281 %Global::original_sig = %SIG;
4282 $SIG{TERM} = sub {}; # Dummy until jobs really start
4283 $SIG{ALRM} = 'IGNORE';
4284 # Allow Ctrl-Z to suspend and `fg` to continue
4285 $SIG{TSTP} = \&sigtstp;
4286 $SIG{PIPE} = \&sigpipe;
4287 $SIG{CONT} = sub {
4288 # Set $SIG{TSTP} again (it is undef'ed in sigtstp() )
4289 $SIG{TSTP} = \&sigtstp;
4290 # Send continue signal to all children process groups
4291 kill "CONT", map { -$_ } keys %Global::running;
4295 sub list_running_jobs() {
4296 # Print running jobs on tty
4297 # Uses:
4298 # %Global::running
4299 # Returns: N/A
4300 for my $job (values %Global::running) {
4301 ::status("$Global::progname: ".$job->replaced());
4305 sub start_no_new_jobs() {
4306 # Start no more jobs
4307 # Uses:
4308 # %Global::original_sig
4309 # %Global::unlink
4310 # $Global::start_no_new_jobs
4311 # Returns: N/A
4312 # $SIG{TERM} = $Global::original_sig{TERM};
4313 unlink keys %Global::unlink;
4314 ::status
4315 ("$Global::progname: SIGHUP received. No new jobs will be started.",
4316 "$Global::progname: Waiting for these ".(keys %Global::running).
4317 " jobs to finish. Send SIGTERM to stop now.");
4318 list_running_jobs();
4319 $Global::start_no_new_jobs ||= 1;
4322 sub reapers() {
4323 # Run reaper until there are no more left
4324 # Returns:
4325 # @pids_reaped = pids of reaped processes
4326 my @pids_reaped;
4327 my $pid;
4328 while($pid = reaper()) {
4329 push @pids_reaped, $pid;
4331 return @pids_reaped;
4334 sub reaper() {
4335 # A job finished:
4336 # * Set exitstatus, exitsignal, endtime.
4337 # * Free ressources for new job
4338 # * Update median runtime
4339 # * Print output
4340 # * If --halt = now: Kill children
4341 # * Print progress
4342 # Uses:
4343 # %Global::running
4344 # $opt::timeout
4345 # $Global::timeoutq
4346 # $opt::keeporder
4347 # $Global::total_running
4348 # Returns:
4349 # $stiff = PID of child finished
4350 my $stiff;
4351 debug("run", "Reaper ");
4352 if(($stiff = waitpid(-1, &WNOHANG)) <= 0) {
4353 # No jobs waiting to be reaped
4354 return 0;
4357 # $stiff = pid of dead process
4358 my $job = $Global::running{$stiff};
4360 # '-a <(seq 10)' will give us a pid not in %Global::running
4361 # The same will one of the ssh -M: ignore
4362 $job or return 0;
4363 delete $Global::running{$stiff};
4364 $Global::total_running--;
4365 if($job->{'commandline'}{'skip'}) {
4366 # $job->skip() was called
4367 $job->set_exitstatus(-2);
4368 $job->set_exitsignal(0);
4369 } else {
4370 $job->set_exitstatus($? >> 8);
4371 $job->set_exitsignal($? & 127);
4374 debug("run", "seq ",$job->seq()," died (", $job->exitstatus(), ")");
4375 $job->set_endtime(::now());
4376 my $sshlogin = $job->sshlogin();
4377 $sshlogin->dec_jobs_running();
4378 if($job->should_be_retried()) {
4379 # Free up file handles
4380 $job->free_ressources();
4381 } else {
4382 # The job is done
4383 $sshlogin->inc_jobs_completed();
4384 # Free the jobslot
4385 $job->free_slot();
4386 if($opt::timeout and not $job->exitstatus()) {
4387 # Update average runtime for timeout only for successful jobs
4388 $Global::timeoutq->update_median_runtime($job->runtime());
4390 if($opt::keeporder) {
4391 $job->print_earlier_jobs();
4392 } else {
4393 $job->print();
4395 if($job->should_we_halt() eq "now") {
4396 # Kill children
4397 ::kill_sleep_seq($job->pid());
4398 ::killall();
4399 ::wait_and_exit($Global::halt_exitstatus);
4402 $job->cleanup();
4404 if($opt::progress) {
4405 my %progress = progress();
4406 ::status_no_nl("\r",$progress{'status'});
4409 debug("run", "done ");
4410 return $stiff;
4414 sub __USAGE__() {}
4417 sub killall() {
4418 # Kill all jobs by killing their process groups
4419 # Uses:
4420 # $Global::start_no_new_jobs = we are stopping
4421 # $Global::killall = Flag to not run reaper
4422 $Global::start_no_new_jobs ||= 1;
4423 # Do not reap killed children: Ignore them instead
4424 $Global::killall ||= 1;
4425 kill_sleep_seq(keys %Global::running);
4428 sub kill_sleep_seq(@) {
4429 # Send jobs TERM,TERM,KILL to processgroups
4430 # Input:
4431 # @pids = list of pids that are also processgroups
4432 # Convert pids to process groups ($processgroup = -$pid)
4433 my @pgrps = map { -$_ } @_;
4434 my @term_seq = split/,/,$opt::termseq;
4435 if(not @term_seq) {
4436 @term_seq = ("TERM",200,"TERM",100,"TERM",50,"KILL",25);
4438 while(@term_seq) {
4439 @pgrps = kill_sleep(shift @term_seq, shift @term_seq, @pgrps);
4443 sub kill_sleep() {
4444 # Kill pids with a signal and wait a while for them to die
4445 # Input:
4446 # $signal = signal to send to @pids
4447 # $sleep_max = number of ms to sleep at most before returning
4448 # @pids = pids to kill (actually process groups)
4449 # Uses:
4450 # $Global::killall = set by killall() to avoid calling reaper
4451 # Returns:
4452 # @pids = pids still alive
4453 my ($signal, $sleep_max, @pids) = @_;
4454 ::debug("kill","kill_sleep $signal ",(join " ",sort @pids),"\n");
4455 kill $signal, @pids;
4456 my $sleepsum = 0;
4457 my $sleep = 0.001;
4459 while(@pids and $sleepsum < $sleep_max) {
4460 if($Global::killall) {
4461 # Killall => don't run reaper
4462 while(waitpid(-1, &WNOHANG) > 0) {
4463 $sleep = $sleep/2+0.001;
4465 } elsif(reapers()) {
4466 $sleep = $sleep/2+0.001;
4468 $sleep *= 1.1;
4469 ::usleep($sleep);
4470 $sleepsum += $sleep;
4471 # Keep only living children
4472 @pids = grep { kill(0, $_) } @pids;
4474 return @pids;
4477 sub wait_and_exit($) {
4478 # If we do not wait, we sometimes get segfault
4479 # Returns: N/A
4480 my $error = shift;
4481 unlink keys %Global::unlink;
4482 if($error) {
4483 # Kill all jobs without printing
4484 killall();
4486 for (keys %Global::unkilled_children) {
4487 # Kill any (non-jobs) children (e.g. reserved processes)
4488 kill 9, $_;
4489 waitpid($_,0);
4490 delete $Global::unkilled_children{$_};
4492 if($Global::unkilled_sqlworker) {
4493 waitpid($Global::unkilled_sqlworker,0);
4495 exit($error);
4498 sub die_usage() {
4499 # Returns: N/A
4500 usage();
4501 wait_and_exit(255);
4504 sub usage() {
4505 # Returns: N/A
4506 print join
4507 ("\n",
4508 "Usage:",
4510 "$Global::progname [options] [command [arguments]] < list_of_arguments",
4511 "$Global::progname [options] [command [arguments]] (::: arguments|:::: argfile(s))...",
4512 "cat ... | $Global::progname --pipe [options] [command [arguments]]",
4514 "-j n Run n jobs in parallel",
4515 "-k Keep same order",
4516 "-X Multiple arguments with context replace",
4517 "--colsep regexp Split input on regexp for positional replacements",
4518 "{} {.} {/} {/.} {#} {%} {= perl code =} Replacement strings",
4519 "{3} {3.} {3/} {3/.} {=3 perl code =} Positional replacement strings",
4520 "With --plus: {} = {+/}/{/} = {.}.{+.} = {+/}/{/.}.{+.} = {..}.{+..} =",
4521 " {+/}/{/..}.{+..} = {...}.{+...} = {+/}/{/...}.{+...}",
4523 "-S sshlogin Example: foo\@server.example.com",
4524 "--slf .. Use ~/.parallel/sshloginfile as the list of sshlogins",
4525 "--trc {}.bar Shorthand for --transfer --return {}.bar --cleanup",
4526 "--onall Run the given command with argument on all sshlogins",
4527 "--nonall Run the given command with no arguments on all sshlogins",
4529 "--pipe Split stdin (standard input) to multiple jobs.",
4530 "--recend str Record end separator for --pipe.",
4531 "--recstart str Record start separator for --pipe.",
4533 "See 'man $Global::progname' for details",
4535 "Academic tradition requires you to cite works you base your article on.",
4536 "If you use programs that use GNU Parallel to process data for an article in a",
4537 "scientific publication, please cite:",
4539 " O. Tange (2018): GNU Parallel 2018, Mar 2018, ISBN 9781387509881,",
4540 " DOI https://doi.org/10.5281/zenodo.1146014",
4542 # Before changing this line, please read
4543 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
4544 "This helps funding further development; AND IT WON'T COST YOU A CENT.",
4545 "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
4547 "",);
4550 sub citation_notice() {
4551 # if --will-cite or --plain: do nothing
4552 # if stderr redirected: do nothing
4553 # if $PARALLEL_HOME/will-cite: do nothing
4554 # else: print citation notice to stderr
4555 if($opt::willcite
4557 $opt::plain
4559 not -t $Global::original_stderr
4561 grep { -e "$_/will-cite" } @Global::config_dirs) {
4562 # skip
4563 } else {
4564 ::status
4565 ("Academic tradition requires you to cite works you base your article on.",
4566 "If you use programs that use GNU Parallel to process data for an article in a",
4567 "scientific publication, please cite:",
4569 " O. Tange (2018): GNU Parallel 2018, Mar 2018, ISBN 9781387509881,",
4570 " DOI https://doi.org/10.5281/zenodo.1146014",
4572 # Before changing this line, please read
4573 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
4574 "This helps funding further development; AND IT WON'T COST YOU A CENT.",
4575 "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
4577 "More about funding GNU Parallel and the citation notice:",
4578 "https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice",
4580 "To silence this citation notice: run 'parallel --citation' once.",
4583 mkdir $Global::config_dir;
4584 # Number of times the user has run GNU Parallel without showing
4585 # willingness to cite
4586 my $runs = 0;
4587 if(open (my $fh, "<", $Global::config_dir.
4588 "/runs-without-willing-to-cite")) {
4589 $runs = <$fh>;
4590 close $fh;
4592 $runs++;
4593 if(open (my $fh, ">", $Global::config_dir.
4594 "/runs-without-willing-to-cite")) {
4595 print $fh $runs;
4596 close $fh;
4597 if($runs >= 10) {
4598 ::status("Come on: You have run parallel $runs times. Isn't it about time ",
4599 "you run 'parallel --citation' once to silence the citation notice?",
4600 "");
4606 sub status(@) {
4607 my @w = @_;
4608 my $fh = $Global::status_fd || *STDERR;
4609 print $fh map { ($_, "\n") } @w;
4610 flush $fh;
4613 sub status_no_nl(@) {
4614 my @w = @_;
4615 my $fh = $Global::status_fd || *STDERR;
4616 print $fh @w;
4617 flush $fh;
4620 sub warning(@) {
4621 my @w = @_;
4622 my $prog = $Global::progname || "parallel";
4623 status_no_nl(map { ($prog, ": Warning: ", $_, "\n"); } @w);
4626 sub error(@) {
4627 my @w = @_;
4628 my $prog = $Global::progname || "parallel";
4629 status(map { ($prog.": Error: ". $_); } @w);
4632 sub die_bug($) {
4633 my $bugid = shift;
4634 print STDERR
4635 ("$Global::progname: This should not happen. You have found a bug.\n",
4636 "Please contact <parallel\@gnu.org> and follow\n",
4637 "https://www.gnu.org/software/parallel/man.html#REPORTING-BUGS\n",
4638 "\n",
4639 "Include this in the report:\n",
4640 "* The version number: $Global::version\n",
4641 "* The bugid: $bugid\n",
4642 "* The command line being run\n",
4643 "* The files being read (put the files on a webserver if they are big)\n",
4644 "\n",
4645 "If you get the error on smaller/fewer files, please include those instead.\n");
4646 ::wait_and_exit(255);
4649 sub version() {
4650 # Returns: N/A
4651 print join
4652 ("\n",
4653 "GNU $Global::progname $Global::version",
4654 "Copyright (C) 2007-2019 Ole Tange and Free Software Foundation, Inc.",
4655 "License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>",
4656 "This is free software: you are free to change and redistribute it.",
4657 "GNU $Global::progname comes with no warranty.",
4659 "Web site: http://www.gnu.org/software/${Global::progname}\n",
4660 "When using programs that use GNU Parallel to process data for publication",
4661 "please cite as described in 'parallel --citation'.\n",
4665 sub citation() {
4666 # Returns: N/A
4667 my ($all_argv_ref,$argv_options_removed_ref) = @_;
4668 my $all_argv = "@$all_argv_ref";
4669 my $no_opts = "@$argv_options_removed_ref";
4670 $all_argv=~s/--citation//;
4671 if($all_argv ne $no_opts) {
4672 ::warning("--citation ignores all other options and arguments.");
4673 ::status("");
4676 ::status(
4677 "Academic tradition requires you to cite works you base your article on.",
4678 "If you use programs that use GNU Parallel to process data for an article in a",
4679 "scientific publication, please cite:",
4681 "\@book{tange_ole_2018_1146014,",
4682 " author = {Tange, Ole},",
4683 " title = {GNU Parallel 2018},",
4684 " publisher = {Ole Tange},",
4685 " month = Mar,",
4686 " year = 2018,",
4687 " ISBN = {9781387509881},",
4688 " doi = {10.5281/zenodo.1146014},",
4689 " url = {https://doi.org/10.5281/zenodo.1146014}",
4690 "}",
4692 "(Feel free to use \\nocite{tange_ole_2018_1146014})",
4694 # Before changing this line, please read
4695 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
4696 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
4697 "This helps funding further development; AND IT WON'T COST YOU A CENT.",
4698 "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
4700 "More about funding GNU Parallel and the citation notice:",
4701 "https://lists.gnu.org/archive/html/parallel/2013-11/msg00006.html",
4702 "https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice",
4703 "https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt",
4705 "If you send a copy of your published article to tange\@gnu.org, it will be",
4706 "mentioned in the release notes of next version of GNU Parallel.",
4709 while(not grep { -e "$_/will-cite" } @Global::config_dirs) {
4710 print "\nType: 'will cite' and press enter.\n> ";
4711 my $input = <STDIN>;
4712 if(not defined $input) {
4713 exit(255);
4715 if($input =~ /will cite/i) {
4716 mkdir $Global::config_dir;
4717 if(open (my $fh, ">", $Global::config_dir."/will-cite")) {
4718 close $fh;
4719 ::status(
4721 "Thank you for your support: You are the reason why there is funding to",
4722 "continue maintaining GNU Parallel. On behalf of future versions of",
4723 "GNU Parallel, which would not exist without your support:",
4725 " THANK YOU SO MUCH",
4727 "It is really appreciated. The citation notice is now silenced.",
4728 "");
4729 } else {
4730 ::status(
4732 "Thank you for your support. It is much appreciated. The citation",
4733 "cannot permanently be silenced. Use '--will-cite' instead.",
4735 "If you use '--will-cite' in scripts to be run by others you are making",
4736 "it harder for others to see the citation notice. The development of",
4737 "GNU parallel is indirectly financed through citations, so if users",
4738 "do not know they should cite then you are making it harder to finance",
4739 "development. However, if you pay 10000 EUR, you should feel free to",
4740 "use '--will-cite' in scripts.",
4741 "");
4742 last;
4748 sub show_limits() {
4749 # Returns: N/A
4750 print("Maximal size of command: ",Limits::Command::real_max_length(),"\n",
4751 "Maximal used size of command: ",Limits::Command::max_length(),"\n",
4752 "\n",
4753 "Execution of will continue now, and it will try to read its input\n",
4754 "and run commands; if this is not what you wanted to happen, please\n",
4755 "press CTRL-D or CTRL-C\n");
4758 sub embed() {
4759 # Give an embeddable version of GNU Parallel
4760 # Tested with: bash, zsh, ksh, ash, dash, sh
4761 my $randomstring = "cut-here-".join"",
4762 map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..20);
4763 if(not -f $0 or not -r $0) {
4764 ::error("--embed only works if parallel is a readable file");
4765 exit(255);
4767 if(open(my $fh, "<", $0)) {
4768 # Read the source from $0
4769 my @source = <$fh>;
4770 my $user = $ENV{LOGNAME} || $ENV{USERNAME} || $ENV{USER};
4771 my @env_parallel_source = ();
4772 my $shell = $Global::shell;
4773 $shell =~ s:.*/::;
4774 for(which("env_parallel.$shell")) {
4775 -r $_ or next;
4776 # Read the source of env_parallel.shellname
4777 open(my $env_parallel_source_fh, $_) || die;
4778 @env_parallel_source = <$env_parallel_source_fh>;
4779 close $env_parallel_source_fh;
4780 last;
4782 print "#!$Global::shell
4784 # Copyright (C) 2007-2019 $user, Ole Tange and Free Software
4785 # Foundation, Inc.
4787 # This program is free software; you can redistribute it and/or modify
4788 # it under the terms of the GNU General Public License as published by
4789 # the Free Software Foundation; either version 3 of the License, or
4790 # (at your option) any later version.
4792 # This program is distributed in the hope that it will be useful, but
4793 # WITHOUT ANY WARRANTY; without even the implied warranty of
4794 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
4795 # General Public License for more details.
4797 # You should have received a copy of the GNU General Public License
4798 # along with this program; if not, see <http://www.gnu.org/licenses/>
4799 # or write to the Free Software Foundation, Inc., 51 Franklin St,
4800 # Fifth Floor, Boston, MA 02110-1301 USA
4803 print q!
4804 # Embedded GNU Parallel created with --embed
4805 parallel() {
4806 # Start GNU Parallel without leaving temporary files
4808 # Not all shells support 'perl <(cat ...)'
4809 # This is a complex way of doing:
4810 # perl <(cat <<'cut-here'
4811 # [...]
4812 # ) "$@"
4813 # and also avoiding:
4814 # [1]+ Done cat
4816 # Make a temporary fifo that perl can read from
4817 _fifo_with_parallel_source=`perl -e 'use POSIX qw(mkfifo);
4818 do {
4819 $f = "/tmp/parallel-".join"",
4820 map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
4821 } while(-e $f);
4822 mkfifo($f,0600);
4823 print $f;'`
4824 # Put source code into temporary file
4825 # so it is easy to copy to the fifo
4826 _file_with_parallel_source=`mktemp`;
4828 "cat <<'$randomstring' > \$_file_with_parallel_source\n",
4829 @source,
4830 $randomstring,"\n",
4832 # Copy the source code from the file to the fifo
4833 # and remove the file and fifo ASAP
4834 # 'sh -c' is needed to avoid
4835 # [1]+ Done cat
4836 sh -c "(rm $_file_with_parallel_source; cat >$_fifo_with_parallel_source; rm $_fifo_with_parallel_source) < $_file_with_parallel_source &"
4838 # Read the source from the fifo
4839 perl $_fifo_with_parallel_source "$@"
4842 @env_parallel_source,
4845 # This will call the functions above
4846 parallel -k echo ::: Put your code here
4847 env_parallel --session
4848 env_parallel -k echo ::: Put your code here
4849 parset p,y,c,h -k echo ::: Put your code here
4850 echo $p $y $c $h
4852 } else {
4853 ::error("Cannot open $0");
4854 exit(255);
4856 ::status("Redirect the output to a file and add your changes at the end:",
4857 " $0 --embed > new_script");
4861 sub __GENERIC_COMMON_FUNCTION__() {}
4864 sub mkdir_or_die($) {
4865 # If dir is not executable: die
4866 my $dir = shift;
4867 # The eval is needed to catch exception from mkdir
4868 eval { File::Path::mkpath($dir); };
4869 if(not -x $dir) {
4870 ::error("Cannot change into non-executable dir $dir: $!");
4871 ::wait_and_exit(255);
4875 sub tmpfile(@) {
4876 # Create tempfile as $TMPDIR/parXXXXX
4877 # Returns:
4878 # $filehandle = opened file handle
4879 # $filename = file name created
4880 my($filehandle,$filename) =
4881 ::tempfile(DIR=>$ENV{'TMPDIR'}, TEMPLATE => 'parXXXXX', @_);
4882 if(wantarray) {
4883 return($filehandle,$filename);
4884 } else {
4885 # Separate unlink due to NFS dealing badly with File::Temp
4886 unlink $filename;
4887 return $filehandle;
4891 sub tmpname($) {
4892 # Select a name that does not exist
4893 # Do not create the file as it may be used for creating a socket (by tmux)
4894 # Remember the name in $Global::unlink to avoid hitting the same name twice
4895 my $name = shift;
4896 my($tmpname);
4897 if(not -w $ENV{'TMPDIR'}) {
4898 if(not -e $ENV{'TMPDIR'}) {
4899 ::error("Tmpdir '$ENV{'TMPDIR'}' does not exist.","Try 'mkdir $ENV{'TMPDIR'}'");
4900 } else {
4901 ::error("Tmpdir '$ENV{'TMPDIR'}' is not writable.","Try 'chmod +w $ENV{'TMPDIR'}'");
4903 ::wait_and_exit(255);
4905 do {
4906 $tmpname = $ENV{'TMPDIR'}."/".$name.
4907 join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
4908 } while(-e $tmpname or $Global::unlink{$tmpname}++);
4909 return $tmpname;
4912 sub tmpfifo() {
4913 # Find an unused name and mkfifo on it
4914 use POSIX qw(mkfifo);
4915 my $tmpfifo = tmpname("fif");
4916 mkfifo($tmpfifo,0600);
4917 return $tmpfifo;
4920 sub rm(@) {
4921 # Remove file and remove it from %Global::unlink
4922 # Uses:
4923 # %Global::unlink
4924 delete @Global::unlink{@_};
4925 unlink @_;
4928 sub size_of_block_dev() {
4929 # Like -s but for block devices
4930 # Input:
4931 # $blockdev = file name of block device
4932 # Returns:
4933 # $size = in bytes, undef if error
4934 my $blockdev = shift;
4935 if(open(my $fh, "<", $blockdev)) {
4936 seek($fh,0,2) || ::die_bug("cannot seek $blockdev");
4937 my $size = tell($fh);
4938 close $fh;
4939 return $size;
4940 } else {
4941 ::error("cannot open $blockdev");
4942 wait_and_exit(255);
4946 sub qqx(@) {
4947 # Like qx but with clean environment (except for @keep)
4948 # and STDERR ignored
4949 # This is needed if the environment contains functions
4950 # that /bin/sh does not understand
4951 my $PATH = $ENV{'PATH'};
4952 my %env;
4953 # ssh with ssh-agent needs PATH SSH_AUTH_SOCK SSH_AGENT_PID
4954 # ssh with Kerberos needs KRB5CCNAME
4955 # tmux needs LC_CTYPE
4956 my @keep = qw(PATH SSH_AUTH_SOCK SSH_AGENT_PID KRB5CCNAME LC_CTYPE);
4957 @env{@keep} = @ENV{@keep};
4958 local %ENV;
4959 %ENV = %env;
4960 if($Global::debug) {
4961 return qx{ @_ && true };
4962 } else {
4963 return qx{ ( @_ ) 2>/dev/null };
4967 sub uniq(@) {
4968 # Remove duplicates and return unique values
4969 return keys %{{ map { $_ => 1 } @_ }};
4972 sub min(@) {
4973 # Returns:
4974 # Minimum value of array
4975 my $min;
4976 for (@_) {
4977 # Skip undefs
4978 defined $_ or next;
4979 defined $min or do { $min = $_; next; }; # Set $_ to the first non-undef
4980 $min = ($min < $_) ? $min : $_;
4982 return $min;
4985 sub max(@) {
4986 # Returns:
4987 # Maximum value of array
4988 my $max;
4989 for (@_) {
4990 # Skip undefs
4991 defined $_ or next;
4992 defined $max or do { $max = $_; next; }; # Set $_ to the first non-undef
4993 $max = ($max > $_) ? $max : $_;
4995 return $max;
4998 sub sum() {
4999 # Returns:
5000 # Sum of values of array
5001 my @args = @_;
5002 my $sum = 0;
5003 for (@args) {
5004 # Skip undefs
5005 $_ and do { $sum += $_; }
5007 return $sum;
5010 sub undef_as_zero($) {
5011 my $a = shift;
5012 return $a ? $a : 0;
5015 sub undef_as_empty($) {
5016 my $a = shift;
5017 return $a ? $a : "";
5020 sub undef_if_empty($) {
5021 if(defined($_[0]) and $_[0] eq "") {
5022 return undef;
5024 return $_[0];
5027 sub multiply_binary_prefix(@) {
5028 # Evalualte numbers with binary prefix
5029 # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80
5030 # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80
5031 # K =2^10, M =2^20, G =2^30, T =2^40, P =2^50, E =2^70, Z =2^80, Y =2^80
5032 # k =10^3, m =10^6, g =10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24
5033 # 13G = 13*1024*1024*1024 = 13958643712
5034 # Input:
5035 # $s = string with prefixes
5036 # Returns:
5037 # $value = int with prefixes multiplied
5038 my @v = @_;
5039 for(@v) {
5040 defined $_ or next;
5041 s/ki/*1024/gi;
5042 s/mi/*1024*1024/gi;
5043 s/gi/*1024*1024*1024/gi;
5044 s/ti/*1024*1024*1024*1024/gi;
5045 s/pi/*1024*1024*1024*1024*1024/gi;
5046 s/ei/*1024*1024*1024*1024*1024*1024/gi;
5047 s/zi/*1024*1024*1024*1024*1024*1024*1024/gi;
5048 s/yi/*1024*1024*1024*1024*1024*1024*1024*1024/gi;
5049 s/xi/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi;
5051 s/K/*1024/g;
5052 s/M/*1024*1024/g;
5053 s/G/*1024*1024*1024/g;
5054 s/T/*1024*1024*1024*1024/g;
5055 s/P/*1024*1024*1024*1024*1024/g;
5056 s/E/*1024*1024*1024*1024*1024*1024/g;
5057 s/Z/*1024*1024*1024*1024*1024*1024*1024/g;
5058 s/Y/*1024*1024*1024*1024*1024*1024*1024*1024/g;
5059 s/X/*1024*1024*1024*1024*1024*1024*1024*1024*1024/g;
5061 s/k/*1000/g;
5062 s/m/*1000*1000/g;
5063 s/g/*1000*1000*1000/g;
5064 s/t/*1000*1000*1000*1000/g;
5065 s/p/*1000*1000*1000*1000*1000/g;
5066 s/e/*1000*1000*1000*1000*1000*1000/g;
5067 s/z/*1000*1000*1000*1000*1000*1000*1000/g;
5068 s/y/*1000*1000*1000*1000*1000*1000*1000*1000/g;
5069 s/x/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g;
5071 $_ = eval $_;
5073 return wantarray ? @v : $v[0];
5076 sub multiply_time_units($) {
5077 # Evalualte numbers with time units
5078 # s=1, m=60, h=3600, d=86400
5079 # Input:
5080 # $s = string time units
5081 # Returns:
5082 # $value = int in seconds
5083 my @v = @_;
5084 for(@v) {
5085 defined $_ or next;
5086 if(/[dhms]/i) {
5087 s/s/*1+/gi;
5088 s/m/*60+/gi;
5089 s/h/*3600+/gi;
5090 s/d/*86400+/gi;
5091 $_ = eval $_."0";
5094 return wantarray ? @v : $v[0];
5097 sub seconds_to_time_units() {
5098 # Convert seconds into ??d??h??m??s
5099 # s=1, m=60, h=3600, d=86400
5100 # Input:
5101 # $s = int in seconds
5102 # Returns:
5103 # $str = string time units
5104 my $s = shift;
5105 my $str;
5106 my $d = int($s/86400);
5107 $s -= $d * 86400;
5108 my $h = int($s/3600);
5109 $s -= $h * 3600;
5110 my $m = int($s/60);
5111 $s -= $m * 60;
5112 if($d) {
5113 $str = sprintf("%dd%02dh%02dm%02ds",$d,$h,$m,$s);
5114 } elsif($h) {
5115 $str = sprintf("%dh%02dm%02ds",$h,$m,$s);
5116 } elsif($m) {
5117 $str = sprintf("%dm%02ds",$m,$s);
5118 } else {
5119 $str = sprintf("%ds",$s);
5121 return $str;
5125 my ($disk_full_fh, $b8193, $error_printed);
5126 sub exit_if_disk_full() {
5127 # Checks if $TMPDIR is full by writing 8kb to a tmpfile
5128 # If the disk is full: Exit immediately.
5129 # Returns:
5130 # N/A
5131 if(not $disk_full_fh) {
5132 $disk_full_fh = ::tmpfile(SUFFIX => ".df");
5133 $b8193 = "x"x8193;
5135 # Linux does not discover if a disk is full if writing <= 8192
5136 # Tested on:
5137 # bfs btrfs cramfs ext2 ext3 ext4 ext4dev jffs2 jfs minix msdos
5138 # ntfs reiserfs tmpfs ubifs vfat xfs
5139 # TODO this should be tested on different OS similar to this:
5141 # doit() {
5142 # sudo mount /dev/ram0 /mnt/loop; sudo chmod 1777 /mnt/loop
5143 # seq 100000 | parallel --tmpdir /mnt/loop/ true &
5144 # seq 6900000 > /mnt/loop/i && echo seq OK
5145 # seq 6980868 > /mnt/loop/i
5146 # seq 10000 > /mnt/loop/ii
5147 # sleep 3
5148 # sudo umount /mnt/loop/ || sudo umount -l /mnt/loop/
5149 # echo >&2
5151 print $disk_full_fh $b8193;
5152 if(not $disk_full_fh
5154 tell $disk_full_fh != 8193) {
5155 # On raspbian the disk can be full except for 10 chars.
5156 if(not $error_printed) {
5157 ::error("Output is incomplete.",
5158 "Cannot append to buffer file in $ENV{'TMPDIR'}.",
5159 "Is the disk full?",
5160 "Change \$TMPDIR with --tmpdir or use --compress.");
5161 $error_printed = 1;
5163 ::wait_and_exit(255);
5165 truncate $disk_full_fh, 0;
5166 seek($disk_full_fh, 0, 0) || die;
5170 sub spacefree($$) {
5171 # Remove comments and spaces
5172 # Inputs:
5173 # $spaces = keep 1 space?
5174 # $s = string to remove spaces from
5175 # Returns:
5176 # $s = with spaces removed
5177 my $spaces = shift;
5178 my $s = shift;
5179 $s =~ s/#.*//mg;
5180 if(1 == $spaces) {
5181 $s =~ s/\s+/ /mg;
5182 } elsif(2 == $spaces) {
5183 # Keep newlines
5184 $s =~ s/\n\n+/\n/sg;
5185 $s =~ s/[ \t]+/ /mg;
5186 } elsif(3 == $spaces) {
5187 # Keep perl code required space
5188 $s =~ s{([^a-zA-Z0-9/])\s+}{$1}sg;
5189 $s =~ s{([a-zA-Z0-9/])\s+([^:a-zA-Z0-9/])}{$1$2}sg;
5190 } else {
5191 $s =~ s/\s//mg;
5193 return $s;
5197 my $hostname;
5198 sub hostname() {
5199 local $/ = "\n";
5200 if(not $hostname) {
5201 $hostname = `hostname`;
5202 chomp($hostname);
5203 $hostname ||= "nohostname";
5205 return $hostname;
5209 sub which(@) {
5210 # Input:
5211 # @programs = programs to find the path to
5212 # Returns:
5213 # @full_path = full paths to @programs. Nothing if not found
5214 my @which;
5215 ::debug("which", "@_ in $ENV{'PATH'}\n");
5216 for my $prg (@_) {
5217 push(@which, grep { not -d $_ and -x $_ }
5218 map { $_."/".$prg } split(":",$ENV{'PATH'}));
5219 if($prg =~ m:/:) {
5220 # Including path
5221 push(@which, grep { not -d $_ and -x $_ } $prg);
5224 return wantarray ? @which : $which[0];
5228 my ($regexp,$shell,%fakename);
5230 sub parent_shell {
5231 # Input:
5232 # $pid = pid to see if (grand)*parent is a shell
5233 # Returns:
5234 # $shellpath = path to shell - undef if no shell found
5235 my $pid = shift;
5236 ::debug("init","Parent of $pid\n");
5237 if(not $regexp) {
5238 # All shells known to mankind
5240 # ash bash csh dash fdsh fish fizsh ksh ksh93 mksh pdksh
5241 # posh rbash rc rush rzsh sash sh static-sh tcsh yash zsh
5243 my @shells = (qw(ash bash bsd-csh csh dash fdsh fish fizsh
5244 ksh ksh93 lksh mksh pdksh posh rbash rc rush rzsh sash sh
5245 static-sh tcsh yash zsh -sh -csh -bash),
5246 '-sh (sh)' # sh on FreeBSD
5248 # Can be formatted as:
5249 # [sh] -sh sh busybox sh -sh (sh)
5250 # /bin/sh /sbin/sh /opt/csw/sh
5251 # But not: foo.sh sshd crash flush pdflush scosh fsflush ssh
5252 $shell = "(?:".join("|",map { "\Q$_\E" } @shells).")";
5253 $regexp = '^((\[)(-?)('. $shell. ')(\])|(|\S+/|busybox )'.
5254 '(-?)('. $shell. '))( *$| [^(])';
5255 %fakename = (
5256 # sh disguises itself as -sh (sh) on FreeBSD
5257 "-sh (sh)" => ["sh"],
5258 # csh and tcsh disguise themselves as -sh/-csh
5259 # E.g.: ssh -tt csh@lo 'ps aux;true' |egrep ^csh
5260 # but sh also disguise itself as -sh
5261 # (TODO When does that happen?)
5262 "-sh" => ["sh"],
5263 "-csh" => ["tcsh", "csh"],
5264 # ash disguises itself as -ash
5265 "-ash" => ["ash", "dash", "sh"],
5266 # dash disguises itself as -dash
5267 "-dash" => ["dash", "ash", "sh"],
5268 # bash disguises itself as -bash
5269 "-bash" => ["bash", "sh"],
5270 # ksh disguises itself as -ksh
5271 "-ksh" => ["ksh", "sh"],
5272 # zsh disguises itself as -zsh
5273 "-zsh" => ["zsh", "sh"],
5276 # if -sh or -csh try readlink /proc/$$/exe
5277 my ($children_of_ref, $parent_of_ref, $name_of_ref) = pid_table();
5278 my $shellpath;
5279 my $testpid = $pid;
5280 while($testpid) {
5281 if($name_of_ref->{$testpid} =~ /$regexp/o) {
5282 my $shellname = $4 || $8;
5283 my $dash = $3 || $7;
5284 if($shellname eq "sh" and $dash) {
5285 # -sh => csh or sh
5286 if($shellpath = readlink "/proc/$testpid/exe") {
5287 ::debug("init","procpath $shellpath\n");
5288 if($shellpath =~ m:/$shell$:o) {
5289 ::debug("init", "proc which ".$shellpath." => ");
5290 return $shellpath;
5294 ::debug("init", "which ".$shellname." => ");
5295 $shellpath = (which($shellname,@{$fakename{$shellname}}))[0];
5296 ::debug("init", "shell path $shellpath\n");
5297 $shellpath and last;
5299 if($testpid == $parent_of_ref->{$testpid}) {
5300 # In Solaris zones, the PPID of the zsched process is itself
5301 last;
5303 $testpid = $parent_of_ref->{$testpid};
5305 return $shellpath;
5310 my %pid_parentpid_cmd;
5312 sub pid_table() {
5313 # Returns:
5314 # %children_of = { pid -> children of pid }
5315 # %parent_of = { pid -> pid of parent }
5316 # %name_of = { pid -> commandname }
5318 if(not %pid_parentpid_cmd) {
5319 # Filter for SysV-style `ps`
5320 my $sysv = q( ps -ef | perl -ane '1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;).
5321 q(s/^.{$s}//; print "@F[1,2] $_"' );
5322 # Minix uses cols 2,3 and can have newlines in the command
5323 # so lines not having numbers in cols 2,3 must be ignored
5324 my $minix = q( ps -ef | perl -ane '1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;).
5325 q(s/^.{$s}// and $F[2]>0 and $F[3]>0 and print "@F[2,3] $_"' );
5326 # BSD-style `ps`
5327 my $bsd = q(ps -o pid,ppid,command -ax);
5328 %pid_parentpid_cmd =
5330 'aix' => $sysv,
5331 'android' => $sysv,
5332 'cygwin' => $sysv,
5333 'darwin' => $bsd,
5334 'dec_osf' => $sysv,
5335 'dragonfly' => $bsd,
5336 'freebsd' => $bsd,
5337 'gnu' => $sysv,
5338 'hpux' => $sysv,
5339 'linux' => $sysv,
5340 'mirbsd' => $bsd,
5341 'minix' => $minix,
5342 'msys' => $sysv,
5343 'MSWin32' => $sysv,
5344 'netbsd' => $bsd,
5345 'nto' => $sysv,
5346 'openbsd' => $bsd,
5347 'solaris' => $sysv,
5348 'svr5' => $sysv,
5349 'syllable' => "echo ps not supported",
5352 $pid_parentpid_cmd{$^O} or ::die_bug("pid_parentpid_cmd for $^O missing");
5354 my (@pidtable,%parent_of,%children_of,%name_of);
5355 # Table with pid -> children of pid
5356 @pidtable = `$pid_parentpid_cmd{$^O}`;
5357 my $p=$$;
5358 for (@pidtable) {
5359 # must match: 24436 21224 busybox ash
5360 # must match: 24436 21224 <<empty on MacOSX running cubase>>
5361 # must match: 24436 21224 <<empty on system running Viber>>
5362 # or: perl -e 'while($0=" "){}'
5363 if(/^\s*(\S+)\s+(\S+)\s+(\S+.*)/
5365 /^\s*(\S+)\s+(\S+)\s+()$/) {
5366 $parent_of{$1} = $2;
5367 push @{$children_of{$2}}, $1;
5368 $name_of{$1} = $3;
5369 } else {
5370 ::die_bug("pidtable format: $_");
5373 return(\%children_of, \%parent_of, \%name_of);
5377 sub now() {
5378 # Returns time since epoch as in seconds with 3 decimals
5379 # Uses:
5380 # @Global::use
5381 # Returns:
5382 # $time = time now with millisecond accuracy
5383 if(not $Global::use{"Time::HiRes"}) {
5384 if(eval "use Time::HiRes qw ( time );") {
5385 eval "sub TimeHiRestime { return Time::HiRes::time };";
5386 } else {
5387 eval "sub TimeHiRestime { return time() };";
5389 $Global::use{"Time::HiRes"} = 1;
5392 return (int(TimeHiRestime()*1000))/1000;
5395 sub usleep($) {
5396 # Sleep this many milliseconds.
5397 # Input:
5398 # $ms = milliseconds to sleep
5399 my $ms = shift;
5400 ::debug("timing",int($ms),"ms ");
5401 select(undef, undef, undef, $ms/1000);
5404 sub __KILLER_REAPER__() {}
5406 sub reap_usleep() {
5407 # Reap dead children.
5408 # If no dead children: Sleep specified amount with exponential backoff
5409 # Input:
5410 # $ms = milliseconds to sleep
5411 # Returns:
5412 # $ms/2+0.001 if children reaped
5413 # $ms*1.1 if no children reaped
5414 my $ms = shift;
5415 if(reapers()) {
5416 if(not $Global::total_completed % 100) {
5417 if($opt::timeout) {
5418 # Force cleaning the timeout queue for every 1000 jobs
5419 # Fixes potential memleak
5420 $Global::timeoutq->process_timeouts();
5423 # Sleep exponentially shorter (1/2^n) if a job finished
5424 return $ms/2+0.001;
5425 } else {
5426 if($opt::timeout) {
5427 $Global::timeoutq->process_timeouts();
5429 if($opt::memfree) {
5430 kill_youngster_if_not_enough_mem();
5432 if($opt::limit) {
5433 kill_youngest_if_over_limit();
5435 if($ms > 0.002) {
5436 # When a child dies, wake up from sleep (or select(,,,))
5437 $SIG{CHLD} = sub { kill "ALRM", $$ };
5438 usleep($ms);
5439 # --compress needs $SIG{CHLD} unset
5440 $SIG{CHLD} = 'DEFAULT';
5442 exit_if_disk_full();
5443 if($opt::linebuffer) {
5444 my $something_printed = 0;
5445 if($opt::keeporder) {
5446 for my $job (values %Global::running) {
5447 $something_printed += $job->print_earlier_jobs();
5449 } else {
5450 for my $job (values %Global::running) {
5451 $something_printed += $job->print();
5454 if($something_printed) {
5455 $ms = $ms/2+0.001;
5458 # Sleep exponentially longer (1.1^n) if a job did not finish,
5459 # though at most 1000 ms.
5460 return (($ms < 1000) ? ($ms * 1.1) : ($ms));
5464 sub kill_youngest_if_over_limit() {
5465 # Check each $sshlogin we are over limit
5466 # If over limit: kill off the youngest child
5467 # Put the child back in the queue.
5468 # Uses:
5469 # %Global::running
5470 my %jobs_of;
5471 my @sshlogins;
5473 for my $job (values %Global::running) {
5474 if(not $jobs_of{$job->sshlogin()}) {
5475 push @sshlogins, $job->sshlogin();
5477 push @{$jobs_of{$job->sshlogin()}}, $job;
5479 for my $sshlogin (@sshlogins) {
5480 for my $job (sort { $b->seq() <=> $a->seq() } @{$jobs_of{$sshlogin}}) {
5481 if($sshlogin->limit() == 2) {
5482 $job->kill();
5483 last;
5489 sub kill_youngster_if_not_enough_mem() {
5490 # Check each $sshlogin if there is enough mem.
5491 # If less than 50% enough free mem: kill off the youngest child
5492 # Put the child back in the queue.
5493 # Uses:
5494 # %Global::running
5495 my %jobs_of;
5496 my @sshlogins;
5498 for my $job (values %Global::running) {
5499 if(not $jobs_of{$job->sshlogin()}) {
5500 push @sshlogins, $job->sshlogin();
5502 push @{$jobs_of{$job->sshlogin()}}, $job;
5504 for my $sshlogin (@sshlogins) {
5505 for my $job (sort { $b->seq() <=> $a->seq() } @{$jobs_of{$sshlogin}}) {
5506 if($sshlogin->memfree() < $opt::memfree * 0.5) {
5507 ::debug("mem","\n",map { $_->seq()." " }
5508 (sort { $b->seq() <=> $a->seq() }
5509 @{$jobs_of{$sshlogin}}));
5510 ::debug("mem","\n", $job->seq(), "killed ",
5511 $sshlogin->memfree()," < ",$opt::memfree * 0.5);
5512 $job->kill();
5513 $sshlogin->memfree_recompute();
5514 } else {
5515 last;
5518 ::debug("mem","Free mem OK ",
5519 $sshlogin->memfree()," > ",$opt::memfree * 0.5);
5524 sub __DEBUGGING__() {}
5527 sub debug(@) {
5528 # Uses:
5529 # $Global::debug
5530 # %Global::fd
5531 # Returns: N/A
5532 $Global::debug or return;
5533 @_ = grep { defined $_ ? $_ : "" } @_;
5534 if($Global::debug eq "all" or $Global::debug eq $_[0]) {
5535 if($Global::fd{1}) {
5536 # Original stdout was saved
5537 my $stdout = $Global::fd{1};
5538 print $stdout @_[1..$#_];
5539 } else {
5540 print @_[1..$#_];
5545 sub my_memory_usage() {
5546 # Returns:
5547 # memory usage if found
5548 # 0 otherwise
5549 use strict;
5550 use FileHandle;
5552 local $/ = "\n";
5553 my $pid = $$;
5554 if(-e "/proc/$pid/stat") {
5555 my $fh = FileHandle->new("</proc/$pid/stat");
5557 my $data = <$fh>;
5558 chomp $data;
5559 $fh->close;
5561 my @procinfo = split(/\s+/,$data);
5563 return undef_as_zero($procinfo[22]);
5564 } else {
5565 return 0;
5569 sub my_size() {
5570 # Returns:
5571 # $size = size of object if Devel::Size is installed
5572 # -1 otherwise
5573 my @size_this = (@_);
5574 eval "use Devel::Size qw(size total_size)";
5575 if ($@) {
5576 return -1;
5577 } else {
5578 return total_size(@_);
5582 sub my_dump(@) {
5583 # Returns:
5584 # ascii expression of object if Data::Dump(er) is installed
5585 # error code otherwise
5586 my @dump_this = (@_);
5587 eval "use Data::Dump qw(dump);";
5588 if ($@) {
5589 # Data::Dump not installed
5590 eval "use Data::Dumper;";
5591 if ($@) {
5592 my $err = "Neither Data::Dump nor Data::Dumper is installed\n".
5593 "Not dumping output\n";
5594 ::status($err);
5595 return $err;
5596 } else {
5597 return Dumper(@dump_this);
5599 } else {
5600 # Create a dummy Data::Dump:dump as Hans Schou sometimes has
5601 # it undefined
5602 eval "sub Data::Dump:dump {}";
5603 eval "use Data::Dump qw(dump);";
5604 return (Data::Dump::dump(@dump_this));
5608 sub my_croak(@) {
5609 eval "use Carp; 1";
5610 $Carp::Verbose = 1;
5611 croak(@_);
5614 sub my_carp() {
5615 eval "use Carp; 1";
5616 $Carp::Verbose = 1;
5617 carp(@_);
5621 sub __OBJECT_ORIENTED_PARTS__() {}
5624 package SSHLogin;
5626 sub new($$) {
5627 my $class = shift;
5628 my $sshlogin_string = shift;
5629 my $ncpus;
5630 my %hostgroups;
5631 # SSHLogins can have these formats:
5632 # @grp+grp/ncpu//usr/bin/ssh user@server
5633 # ncpu//usr/bin/ssh user@server
5634 # /usr/bin/ssh user@server
5635 # user@server
5636 # ncpu/user@server
5637 # @grp+grp/user@server
5638 if($sshlogin_string =~ s:^\@([^/]+)/?::) {
5639 # Look for SSHLogin hostgroups
5640 %hostgroups = map { $_ => 1 } split(/\+/, $1);
5642 # An SSHLogin is always in the hostgroup of its "numcpu/host"
5643 $hostgroups{$sshlogin_string} = 1;
5644 if ($sshlogin_string =~ s:^(\d+)/::) {
5645 # Override default autodetected ncpus unless missing
5646 $ncpus = $1;
5648 my $string = $sshlogin_string;
5649 # An SSHLogin is always in the hostgroup of its $string-name
5650 $hostgroups{$string} = 1;
5651 @Global::hostgroups{keys %hostgroups} = values %hostgroups;
5652 my @unget = ();
5653 my $no_slash_string = $string;
5654 $no_slash_string =~ s/[^-a-z0-9:]/_/gi;
5655 return bless {
5656 'string' => $string,
5657 'jobs_running' => 0,
5658 'jobs_completed' => 0,
5659 'maxlength' => undef,
5660 'max_jobs_running' => undef,
5661 'orig_max_jobs_running' => undef,
5662 'ncpus' => $ncpus,
5663 'hostgroups' => \%hostgroups,
5664 'sshcommand' => undef,
5665 'serverlogin' => undef,
5666 'control_path_dir' => undef,
5667 'control_path' => undef,
5668 'time_to_login' => undef,
5669 'last_login_at' => undef,
5670 'loadavg_file' => $Global::cache_dir . "/tmp/sshlogin/" .
5671 $no_slash_string . "/loadavg",
5672 'loadavg' => undef,
5673 'last_loadavg_update' => 0,
5674 'swap_activity_file' => $Global::cache_dir . "/tmp/sshlogin/" .
5675 $no_slash_string . "/swap_activity",
5676 'swap_activity' => undef,
5677 }, ref($class) || $class;
5680 sub DESTROY($) {
5681 my $self = shift;
5682 # Remove temporary files if they are created.
5683 ::rm($self->{'loadavg_file'});
5684 ::rm($self->{'swap_activity_file'});
5687 sub string($) {
5688 my $self = shift;
5689 return $self->{'string'};
5692 sub jobs_running($) {
5693 my $self = shift;
5694 return ($self->{'jobs_running'} || "0");
5697 sub inc_jobs_running($) {
5698 my $self = shift;
5699 $self->{'jobs_running'}++;
5702 sub dec_jobs_running($) {
5703 my $self = shift;
5704 $self->{'jobs_running'}--;
5707 sub set_maxlength($$) {
5708 my $self = shift;
5709 $self->{'maxlength'} = shift;
5712 sub maxlength($) {
5713 my $self = shift;
5714 return $self->{'maxlength'};
5717 sub jobs_completed() {
5718 my $self = shift;
5719 return $self->{'jobs_completed'};
5722 sub in_hostgroups() {
5723 # Input:
5724 # @hostgroups = the hostgroups to look for
5725 # Returns:
5726 # true if intersection of @hostgroups and the hostgroups of this
5727 # SSHLogin is non-empty
5728 my $self = shift;
5729 return grep { defined $self->{'hostgroups'}{$_} } @_;
5732 sub hostgroups() {
5733 my $self = shift;
5734 return keys %{$self->{'hostgroups'}};
5737 sub inc_jobs_completed($) {
5738 my $self = shift;
5739 $self->{'jobs_completed'}++;
5740 $Global::total_completed++;
5743 sub set_max_jobs_running($$) {
5744 my $self = shift;
5745 if(defined $self->{'max_jobs_running'}) {
5746 $Global::max_jobs_running -= $self->{'max_jobs_running'};
5748 $self->{'max_jobs_running'} = shift;
5749 if(defined $self->{'max_jobs_running'}) {
5750 # max_jobs_running could be resat if -j is a changed file
5751 $Global::max_jobs_running += $self->{'max_jobs_running'};
5753 # Initialize orig to the first non-zero value that comes around
5754 $self->{'orig_max_jobs_running'} ||= $self->{'max_jobs_running'};
5757 sub memfree() {
5758 # Returns:
5759 # $memfree in bytes
5760 my $self = shift;
5761 $self->memfree_recompute();
5762 # Return 1 if not defined.
5763 return (not defined $self->{'memfree'} or $self->{'memfree'})
5766 sub memfree_recompute() {
5767 my $self = shift;
5768 my $script = memfreescript();
5770 # TODO add sshlogin and backgrounding
5771 # Run the script twice if it gives 0 (typically intermittent error)
5772 $self->{'memfree'} = ::qqx($script) || ::qqx($script);
5773 if(not $self->{'memfree'}) {
5774 ::die_bug("Less than 1 byte free");
5776 #::debug("mem","New free:",$self->{'memfree'}," ");
5780 my $script;
5782 sub memfreescript() {
5783 # Returns:
5784 # shellscript for giving available memory in bytes
5785 if(not $script) {
5786 my %script_of = (
5787 # /proc/meminfo
5788 # MemFree: 7012 kB
5789 # Buffers: 19876 kB
5790 # Cached: 431192 kB
5791 # SwapCached: 0 kB
5792 "linux" =>
5793 q[ print 1024 * qx{ ].
5794 q[ awk '/^((Swap)?Cached|MemFree|Buffers):/ ].
5795 q[ { sum += \$2} END { print sum }' ].
5796 q[ /proc/meminfo } ],
5797 # Android uses same code as GNU/Linux
5798 "android" =>
5799 q[ print 1024 * qx{ ].
5800 q[ awk '/^((Swap)?Cached|MemFree|Buffers):/ ].
5801 q[ { sum += \$2} END { print sum }' ].
5802 q[ /proc/meminfo } ],
5804 # $ vmstat 1 1
5805 # procs memory page faults cpu
5806 # r b w avm free re at pi po fr de sr in sy cs us sy id
5807 # 1 0 0 242793 389737 5 1 0 0 0 0 0 107 978 60 1 1 99
5808 "hpux" =>
5809 q[ print (((reverse `vmstat 1 1`)[0] ].
5810 q[ =~ /(?:\d+\D+){4}(\d+)/)[0]*1024) ],
5811 # $ vmstat 1 2
5812 # kthr memory page disk faults cpu
5813 # r b w swap free re mf pi po fr de sr s3 s4 -- -- in sy cs us sy id
5814 # 0 0 0 6496720 5170320 68 260 8 2 1 0 0 -0 3 0 0 309 1371 255 1 2 97
5815 # 0 0 0 6434088 5072656 7 15 8 0 0 0 0 0 261 0 0 1889 1899 3222 0 8 92
5817 # The second free value is correct
5818 "solaris" =>
5819 q[ print (((reverse `vmstat 1 2`)[0] ].
5820 q[ =~ /(?:\d+\D+){4}(\d+)/)[0]*1024) ],
5821 "freebsd" => q{
5822 for(qx{/sbin/sysctl -a}) {
5823 if (/^([^:]+):\s+(.+)\s*$/s) {
5824 $sysctl->{$1} = $2;
5827 print $sysctl->{"hw.pagesize"} *
5828 ($sysctl->{"vm.stats.vm.v_cache_count"}
5829 + $sysctl->{"vm.stats.vm.v_inactive_count"}
5830 + $sysctl->{"vm.stats.vm.v_free_count"});
5832 # Mach Virtual Memory Statistics: (page size of 4096 bytes)
5833 # Pages free: 198061.
5834 # Pages active: 159701.
5835 # Pages inactive: 47378.
5836 # Pages speculative: 29707.
5837 # Pages wired down: 89231.
5838 # "Translation faults": 928901425.
5839 # Pages copy-on-write: 156988239.
5840 # Pages zero filled: 271267894.
5841 # Pages reactivated: 48895.
5842 # Pageins: 1798068.
5843 # Pageouts: 257.
5844 # Object cache: 6603 hits of 1713223 lookups (0% hit rate)
5845 'darwin' =>
5846 q[ $vm = `vm_stat`;
5847 print (($vm =~ /page size of (\d+)/)[0] *
5848 (($vm =~ /Pages free:\s+(\d+)/)[0] +
5849 ($vm =~ /Pages inactive:\s+(\d+)/)[0]));
5852 my $perlscript = "";
5853 # Make a perl script that detects the OS ($^O) and runs
5854 # the appropriate command
5855 for my $os (keys %script_of) {
5856 $perlscript .= 'if($^O eq "'.$os.'") { '.$script_of{$os}.'}';
5858 $perlscript =~ s/[\t\n ]+/ /g;
5859 $script = "perl -e " . ::Q($perlscript);
5861 return $script;
5865 sub limit($) {
5866 # Returns:
5867 # 0 = Below limit. Start another job.
5868 # 1 = Over limit. Start no jobs.
5869 # 2 = Kill youngest job
5870 my $self = shift;
5872 if(not defined $self->{'limitscript'}) {
5873 my %limitscripts =
5874 ("io" => q!
5875 io() {
5876 limit=$1;
5877 io_file=$2;
5878 # Do the measurement in the background
5879 (tmp=$(tempfile);
5880 LANG=C iostat -x 1 2 > $tmp;
5881 mv $tmp $io_file) &
5882 perl -e '-e $ARGV[0] or exit(1);
5883 for(reverse <>) {
5884 /Device/ and last;
5885 /(\S+)$/ and $max = $max > $1 ? $max : $1; }
5886 exit ($max < '$limit')' $io_file;
5888 export -f io;
5889 io %s %s
5891 "mem" => q!
5892 mem() {
5893 limit=$1;
5894 awk '/^((Swap)?Cached|MemFree|Buffers):/{ sum += $2}
5895 END {
5896 if (sum*1024 < '$limit'/2) { exit 2; }
5897 else { exit (sum*1024 < '$limit') }
5898 }' /proc/meminfo;
5900 export -f mem;
5901 mem %s;
5903 "load" => q!
5904 load() {
5905 limit=$1;
5906 ps ax -o state,command |
5907 grep -E '^[DOR].[^[]' |
5908 wc -l |
5909 perl -ne 'exit ('$limit' < $_)';
5911 export -f load;
5912 load %s;
5915 my ($cmd,@args) = split /\s+/,$opt::limit;
5916 if($limitscripts{$cmd}) {
5917 my $tmpfile = ::tmpname("parlmt");
5918 ++$Global::unlink{$tmpfile};
5919 $self->{'limitscript'} =
5920 ::spacefree(1, sprintf($limitscripts{$cmd},
5921 ::multiply_binary_prefix(@args),$tmpfile));
5922 } else {
5923 $self->{'limitscript'} = $opt::limit;
5927 my %env = %ENV;
5928 local %ENV = %env;
5929 $ENV{'SSHLOGIN'} = $self->string();
5930 system($Global::shell,"-c",$self->{'limitscript'});
5931 ::debug("limit","limit `".$self->{'limitscript'}."` result ".($?>>8)."\n");
5932 return $?>>8;
5936 sub swapping($) {
5937 my $self = shift;
5938 my $swapping = $self->swap_activity();
5939 return (not defined $swapping or $swapping)
5942 sub swap_activity($) {
5943 # If the currently known swap activity is too old:
5944 # Recompute a new one in the background
5945 # Returns:
5946 # last swap activity computed
5947 my $self = shift;
5948 # Should we update the swap_activity file?
5949 my $update_swap_activity_file = 0;
5950 if(-r $self->{'swap_activity_file'}) {
5951 open(my $swap_fh, "<", $self->{'swap_activity_file'}) ||
5952 ::die_bug("swap_activity_file-r");
5953 my $swap_out = <$swap_fh>;
5954 close $swap_fh;
5955 if($swap_out =~ /^(\d+)$/) {
5956 $self->{'swap_activity'} = $1;
5957 ::debug("swap", "New swap_activity: ", $self->{'swap_activity'});
5959 ::debug("swap", "Last update: ", $self->{'last_swap_activity_update'});
5960 if(time - $self->{'last_swap_activity_update'} > 10) {
5961 # last swap activity update was started 10 seconds ago
5962 ::debug("swap", "Older than 10 sec: ", $self->{'swap_activity_file'});
5963 $update_swap_activity_file = 1;
5965 } else {
5966 ::debug("swap", "No swap_activity file: ", $self->{'swap_activity_file'});
5967 $self->{'swap_activity'} = undef;
5968 $update_swap_activity_file = 1;
5970 if($update_swap_activity_file) {
5971 ::debug("swap", "Updating swap_activity file ", $self->{'swap_activity_file'});
5972 $self->{'last_swap_activity_update'} = time;
5973 my $dir = ::dirname($self->{'swap_activity_file'});
5974 -d $dir or eval { File::Path::mkpath($dir); };
5975 my $swap_activity;
5976 $swap_activity = swapactivityscript();
5977 if($self->{'string'} ne ":") {
5978 $swap_activity = $self->sshcommand() . " " . $self->serverlogin() . " " .
5979 ::Q($swap_activity);
5981 # Run swap_activity measuring.
5982 # As the command can take long to run if run remote
5983 # save it to a tmp file before moving it to the correct file
5984 my $file = $self->{'swap_activity_file'};
5985 my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".swp");
5986 ::debug("swap", "\n", $swap_activity, "\n");
5987 ::qqx("($swap_activity > $tmpfile && mv $tmpfile $file || rm $tmpfile &)");
5989 return $self->{'swap_activity'};
5993 my $script;
5995 sub swapactivityscript() {
5996 # Returns:
5997 # shellscript for detecting swap activity
5999 # arguments for vmstat are OS dependant
6000 # swap_in and swap_out are in different columns depending on OS
6002 if(not $script) {
6003 my %vmstat = (
6004 # linux: $7*$8
6005 # $ vmstat 1 2
6006 # procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
6007 # r b swpd free buff cache si so bi bo in cs us sy id wa
6008 # 5 0 51208 1701096 198012 18857888 0 0 37 153 28 19 56 11 33 1
6009 # 3 0 51208 1701288 198012 18857972 0 0 0 0 3638 10412 15 3 82 0
6010 'linux' => ['vmstat 1 2 | tail -n1', '$7*$8'],
6012 # solaris: $6*$7
6013 # $ vmstat -S 1 2
6014 # kthr memory page disk faults cpu
6015 # r b w swap free si so pi po fr de sr s3 s4 -- -- in sy cs us sy id
6016 # 0 0 0 4628952 3208408 0 0 3 1 1 0 0 -0 2 0 0 263 613 246 1 2 97
6017 # 0 0 0 4552504 3166360 0 0 0 0 0 0 0 0 0 0 0 246 213 240 1 1 98
6018 'solaris' => ['vmstat -S 1 2 | tail -1', '$6*$7'],
6020 # darwin (macosx): $21*$22
6021 # $ vm_stat -c 2 1
6022 # Mach Virtual Memory Statistics: (page size of 4096 bytes)
6023 # free active specul inactive throttle wired prgable faults copy 0fill reactive purged file-backed anonymous cmprssed cmprssor dcomprs comprs pageins pageout swapins swapouts
6024 # 346306 829050 74871 606027 0 240231 90367 544858K 62343596 270837K 14178 415070 570102 939846 356 370 116 922 4019813 4 0 0
6025 # 345740 830383 74875 606031 0 239234 90369 2696 359 553 0 0 570110 941179 356 370 0 0 0 0 0 0
6026 'darwin' => ['vm_stat -c 2 1 | tail -n1', '$21*$22'],
6028 # ultrix: $12*$13
6029 # $ vmstat -S 1 2
6030 # procs faults cpu memory page disk
6031 # r b w in sy cs us sy id avm fre si so pi po fr de sr s0
6032 # 1 0 0 4 23 2 3 0 97 7743 217k 0 0 0 0 0 0 0 0
6033 # 1 0 0 6 40 8 0 1 99 7743 217k 0 0 3 0 0 0 0 0
6034 'ultrix' => ['vmstat -S 1 2 | tail -1', '$12*$13'],
6036 # aix: $6*$7
6037 # $ vmstat 1 2
6038 # System configuration: lcpu=1 mem=2048MB
6040 # kthr memory page faults cpu
6041 # ----- ----------- ------------------------ ------------ -----------
6042 # r b avm fre re pi po fr sr cy in sy cs us sy id wa
6043 # 0 0 333933 241803 0 0 0 0 0 0 10 143 90 0 0 99 0
6044 # 0 0 334125 241569 0 0 0 0 0 0 37 5368 184 0 9 86 5
6045 'aix' => ['vmstat 1 2 | tail -n1', '$6*$7'],
6047 # freebsd: $8*$9
6048 # $ vmstat -H 1 2
6049 # procs memory page disks faults cpu
6050 # r b w avm fre flt re pi po fr sr ad0 ad1 in sy cs us sy id
6051 # 1 0 0 596716 19560 32 0 0 0 33 8 0 0 11 220 277 0 0 99
6052 # 0 0 0 596716 19560 2 0 0 0 0 0 0 0 11 144 263 0 1 99
6053 'freebsd' => ['vmstat -H 1 2 | tail -n1', '$8*$9'],
6055 # mirbsd: $8*$9
6056 # $ vmstat 1 2
6057 # procs memory page disks traps cpu
6058 # r b w avm fre flt re pi po fr sr wd0 cd0 int sys cs us sy id
6059 # 0 0 0 25776 164968 34 0 0 0 0 0 0 0 230 259 38 4 0 96
6060 # 0 0 0 25776 164968 24 0 0 0 0 0 0 0 237 275 37 0 0 100
6061 'mirbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'],
6063 # netbsd: $7*$8
6064 # $ vmstat 1 2
6065 # procs memory page disks faults cpu
6066 # r b avm fre flt re pi po fr sr w0 w1 in sy cs us sy id
6067 # 0 0 138452 6012 54 0 0 0 1 2 3 0 4 100 23 0 0 100
6068 # 0 0 138456 6008 1 0 0 0 0 0 0 0 7 26 19 0 0 100
6069 'netbsd' => ['vmstat 1 2 | tail -n1', '$7*$8'],
6071 # openbsd: $8*$9
6072 # $ vmstat 1 2
6073 # procs memory page disks traps cpu
6074 # r b w avm fre flt re pi po fr sr wd0 wd1 int sys cs us sy id
6075 # 0 0 0 76596 109944 73 0 0 0 0 0 0 1 5 259 22 0 1 99
6076 # 0 0 0 76604 109936 24 0 0 0 0 0 0 0 7 114 20 0 1 99
6077 'openbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'],
6079 # hpux: $8*$9
6080 # $ vmstat 1 2
6081 # procs memory page faults cpu
6082 # r b w avm free re at pi po fr de sr in sy cs us sy id
6083 # 1 0 0 247211 216476 4 1 0 0 0 0 0 102 73005 54 6 11 83
6084 # 1 0 0 247211 216421 43 9 0 0 0 0 0 144 1675 96 25269512791222387000 25269512791222387000 105
6085 'hpux' => ['vmstat 1 2 | tail -n1', '$8*$9'],
6087 # dec_osf (tru64): $11*$12
6088 # $ vmstat 1 2
6089 # Virtual Memory Statistics: (pagesize = 8192)
6090 # procs memory pages intr cpu
6091 # r w u act free wire fault cow zero react pin pout in sy cs us sy id
6092 # 3 181 36 51K 1895 8696 348M 59M 122M 259 79M 0 5 218 302 4 1 94
6093 # 3 181 36 51K 1893 8696 3 15 21 0 28 0 4 81 321 1 1 98
6094 'dec_osf' => ['vmstat 1 2 | tail -n1', '$11*$12'],
6096 # gnu (hurd): $7*$8
6097 # $ vmstat -k 1 2
6098 # (pagesize: 4, size: 512288, swap size: 894972)
6099 # free actv inact wired zeroed react pgins pgouts pfaults cowpfs hrat caobj cache swfree
6100 # 371940 30844 89228 20276 298348 0 48192 19016 756105 99808 98% 876 20628 894972
6101 # 371940 30844 89228 20276 +0 +0 +0 +0 +42 +2 98% 876 20628 894972
6102 'gnu' => ['vmstat -k 1 2 | tail -n1', '$7*$8'],
6104 # -nto (qnx has no swap)
6105 #-irix
6106 #-svr5 (scosysv)
6108 my $perlscript = "";
6109 # Make a perl script that detects the OS ($^O) and runs
6110 # the appropriate vmstat command
6111 for my $os (keys %vmstat) {
6112 $vmstat{$os}[1] =~ s/\$/\\\\\\\$/g; # $ => \\\$
6113 $perlscript .= 'if($^O eq "'.$os.'") { print `'.$vmstat{$os}[0].' | awk "{print ' .
6114 $vmstat{$os}[1] . '}"` }';
6116 $script = "perl -e " . ::Q($perlscript);
6118 return $script;
6122 sub too_fast_remote_login($) {
6123 my $self = shift;
6124 if($self->{'last_login_at'} and $self->{'time_to_login'}) {
6125 # sshd normally allows 10 simultaneous logins
6126 # A login takes time_to_login
6127 # So time_to_login/5 should be safe
6128 # If now <= last_login + time_to_login/5: Then it is too soon.
6129 my $too_fast = (::now() <= $self->{'last_login_at'}
6130 + $self->{'time_to_login'}/5);
6131 ::debug("run", "Too fast? $too_fast ");
6132 return $too_fast;
6133 } else {
6134 # No logins so far (or time_to_login not computed): it is not too fast
6135 return 0;
6139 sub last_login_at($) {
6140 my $self = shift;
6141 return $self->{'last_login_at'};
6144 sub set_last_login_at($$) {
6145 my $self = shift;
6146 $self->{'last_login_at'} = shift;
6149 sub loadavg_too_high($) {
6150 my $self = shift;
6151 my $loadavg = $self->loadavg();
6152 if(defined $loadavg) {
6153 ::debug("load", "Load $loadavg > ",$self->max_loadavg());
6154 return $loadavg >= $self->max_loadavg();
6155 } else {
6156 # Unknown load: Assume load is too high
6157 return 1;
6162 my $cmd;
6163 sub loadavg_cmd() {
6164 if(not $cmd) {
6165 # aix => "ps -ae -o state,command" # state wrong
6166 # bsd => "ps ax -o state,command"
6167 # sysv => "ps -ef -o s -o comm"
6168 # cygwin => perl -ne 'close STDERR; /Name/ and print"\n"; \
6169 # /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status |
6170 # awk '{print $2,$1}'
6171 # dec_osf => bsd
6172 # dragonfly => bsd
6173 # freebsd => bsd
6174 # gnu => bsd
6175 # hpux => ps -el|awk '{print $2,$14,$15}'
6176 # irix => ps -ef -o state -o comm
6177 # linux => bsd
6178 # minix => ps el|awk '{print \$1,\$11}'
6179 # mirbsd => bsd
6180 # netbsd => bsd
6181 # openbsd => bsd
6182 # solaris => sysv
6183 # svr5 => sysv
6184 # ultrix => ps -ax | awk '{print $3,$5}'
6185 # unixware => ps -el|awk '{print $2,$14,$15}'
6186 my $ps = ::spacefree(1,q{
6187 $sysv="ps -ef -o s -o comm";
6188 $sysv2="ps -ef -o state -o comm";
6189 $bsd="ps ax -o state,command";
6190 # Treat threads as processes
6191 $bsd2="ps axH -o state,command";
6192 $psel="ps -el|awk '{ print \$2,\$14,\$15 }'";
6193 $cygwin=q{ perl -ne 'close STDERR; /Name/ and print"\n";
6194 /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status |
6195 awk '{print $2,$1}' };
6196 $dummy="echo S COMMAND;echo R dummy";
6197 %ps=(
6198 # TODO Find better code for AIX/Android
6199 'aix' => "uptime",
6200 'android' => "uptime",
6201 'cygwin' => $cygwin,
6202 'darwin' => $bsd,
6203 'dec_osf' => $sysv2,
6204 'dragonfly' => $bsd,
6205 'freebsd' => $bsd2,
6206 'gnu' => $bsd,
6207 'hpux' => $psel,
6208 'irix' => $sysv2,
6209 'linux' => $bsd2,
6210 'minix' => "ps el|awk '{print \$1,\$11}'",
6211 'mirbsd' => $bsd,
6212 'msys' => $cygwin,
6213 'netbsd' => $bsd,
6214 'nto' => $dummy,
6215 'openbsd' => $bsd,
6216 'solaris' => $sysv,
6217 'svr5' => $psel,
6218 'ultrix' => "ps -ax | awk '{print \$3,\$5}'",
6219 'MSWin32' => $sysv,
6221 print `$ps{$^O}`;
6223 # The command is too long for csh, so base64_wrap the command
6224 $cmd = Job::base64_wrap($ps);
6226 return $cmd;
6231 sub loadavg($) {
6232 # If the currently know loadavg is too old:
6233 # Recompute a new one in the background
6234 # The load average is computed as the number of processes waiting for disk
6235 # or CPU right now. So it is the server load this instant and not averaged over
6236 # several minutes. This is needed so GNU Parallel will at most start one job
6237 # that will push the load over the limit.
6239 # Returns:
6240 # $last_loadavg = last load average computed (undef if none)
6241 my $self = shift;
6242 # Should we update the loadavg file?
6243 my $update_loadavg_file = 0;
6244 if(open(my $load_fh, "<", $self->{'loadavg_file'})) {
6245 local $/; # $/ = undef => slurp whole file
6246 my $load_out = <$load_fh>;
6247 close $load_fh;
6248 if($load_out =~ /\S/) {
6249 # Content can be empty if ~/ is on NFS
6250 # due to reading being non-atomic.
6252 # Count lines starting with D,O,R but command does not start with [
6253 my $load =()= ($load_out=~/(^\s?[DOR]\S* +(?=[^\[])\S)/gm);
6254 if($load > 0) {
6255 # load is overestimated by 1
6256 $self->{'loadavg'} = $load - 1;
6257 ::debug("load", "New loadavg: ", $self->{'loadavg'},"\n");
6258 } elsif ($load_out=~/average: (\d+.\d+)/) {
6259 # AIX does not support instant load average
6260 # 04:11AM up 21 days, 12:55, 1 user, load average: 1.85, 1.57, 1.55
6261 $self->{'loadavg'} = $1;
6262 } else {
6263 ::die_bug("loadavg_invalid_content: " .
6264 $self->{'loadavg_file'} . "\n$load_out");
6267 $update_loadavg_file = 1;
6268 } else {
6269 ::debug("load", "No loadavg file: ", $self->{'loadavg_file'});
6270 $self->{'loadavg'} = undef;
6271 $update_loadavg_file = 1;
6273 if($update_loadavg_file) {
6274 ::debug("load", "Updating loadavg file", $self->{'loadavg_file'}, "\n");
6275 $self->{'last_loadavg_update'} = time;
6276 my $dir = ::dirname($self->{'swap_activity_file'});
6277 -d $dir or eval { File::Path::mkpath($dir); };
6278 -w $dir or ::die_bug("Cannot write to $dir");
6279 my $cmd = "";
6280 if($self->{'string'} ne ":") {
6281 $cmd = $self->sshcommand() . " " . $self->serverlogin() . " " .
6282 ::Q(loadavg_cmd());
6283 } else {
6284 $cmd .= loadavg_cmd();
6286 # As the command can take long to run if run remote
6287 # save it to a tmp file before moving it to the correct file
6288 ::debug("load", "Update load\n");
6289 my $file = $self->{'loadavg_file'};
6290 # tmpfile on same filesystem as $file
6291 my $tmpfile = $file.$$;
6292 ::qqx("($cmd > $tmpfile && mv $tmpfile $file || rm $tmpfile & )");
6294 return $self->{'loadavg'};
6297 sub max_loadavg($) {
6298 my $self = shift;
6299 # If --load is a file it might be changed
6300 if($Global::max_load_file) {
6301 my $mtime = (stat($Global::max_load_file))[9];
6302 if($mtime > $Global::max_load_file_last_mod) {
6303 $Global::max_load_file_last_mod = $mtime;
6304 for my $sshlogin (values %Global::host) {
6305 $sshlogin->set_max_loadavg(undef);
6309 if(not defined $self->{'max_loadavg'}) {
6310 $self->{'max_loadavg'} =
6311 $self->compute_max_loadavg($opt::load);
6313 ::debug("load", "max_loadavg: ", $self->string(), " ", $self->{'max_loadavg'});
6314 return $self->{'max_loadavg'};
6317 sub set_max_loadavg($$) {
6318 my $self = shift;
6319 $self->{'max_loadavg'} = shift;
6322 sub compute_max_loadavg($) {
6323 # Parse the max loadaverage that the user asked for using --load
6324 # Returns:
6325 # max loadaverage
6326 my $self = shift;
6327 my $loadspec = shift;
6328 my $load;
6329 if(defined $loadspec) {
6330 if($loadspec =~ /^\+(\d+)$/) {
6331 # E.g. --load +2
6332 my $j = $1;
6333 $load =
6334 $self->ncpus() + $j;
6335 } elsif ($loadspec =~ /^-(\d+)$/) {
6336 # E.g. --load -2
6337 my $j = $1;
6338 $load =
6339 $self->ncpus() - $j;
6340 } elsif ($loadspec =~ /^(\d+)\%$/) {
6341 my $j = $1;
6342 $load =
6343 $self->ncpus() * $j / 100;
6344 } elsif ($loadspec =~ /^(\d+(\.\d+)?)$/) {
6345 $load = $1;
6346 } elsif (-f $loadspec) {
6347 $Global::max_load_file = $loadspec;
6348 $Global::max_load_file_last_mod = (stat($Global::max_load_file))[9];
6349 if(open(my $in_fh, "<", $Global::max_load_file)) {
6350 my $opt_load_file = join("",<$in_fh>);
6351 close $in_fh;
6352 $load = $self->compute_max_loadavg($opt_load_file);
6353 } else {
6354 ::error("Cannot open $loadspec.");
6355 ::wait_and_exit(255);
6357 } else {
6358 ::error("Parsing of --load failed.");
6359 ::die_usage();
6361 if($load < 0.01) {
6362 $load = 0.01;
6365 return $load;
6368 sub time_to_login($) {
6369 my $self = shift;
6370 return $self->{'time_to_login'};
6373 sub set_time_to_login($$) {
6374 my $self = shift;
6375 $self->{'time_to_login'} = shift;
6378 sub max_jobs_running($) {
6379 my $self = shift;
6380 if(not defined $self->{'max_jobs_running'}) {
6381 my $nproc = $self->compute_number_of_processes($opt::jobs);
6382 $self->set_max_jobs_running($nproc);
6384 return $self->{'max_jobs_running'};
6387 sub orig_max_jobs_running($) {
6388 my $self = shift;
6389 return $self->{'orig_max_jobs_running'};
6392 sub compute_number_of_processes($) {
6393 # Number of processes wanted and limited by system resources
6394 # Returns:
6395 # Number of processes
6396 my $self = shift;
6397 my $opt_P = shift;
6398 my $wanted_processes = $self->user_requested_processes($opt_P);
6399 if(not defined $wanted_processes) {
6400 $wanted_processes = $Global::default_simultaneous_sshlogins;
6402 ::debug("load", "Wanted procs: $wanted_processes\n");
6403 my $system_limit =
6404 $self->processes_available_by_system_limit($wanted_processes);
6405 ::debug("load", "Limited to procs: $system_limit\n");
6406 return $system_limit;
6410 my @children;
6411 my $max_system_proc_reached;
6412 my $more_filehandles;
6413 my %fh;
6414 my $tmpfhname;
6415 my $count_jobs_already_read;
6416 my @jobs;
6417 my $job;
6418 my @args;
6419 my $arg;
6421 sub reserve_filehandles($) {
6422 # Reserves filehandle
6423 my $n = shift;
6424 for (1..$n) {
6425 $more_filehandles &&= open($fh{$tmpfhname++}, "<", "/dev/null");
6429 sub reserve_process() {
6430 # Spawn a dummy process
6431 my $child;
6432 if($child = fork()) {
6433 push @children, $child;
6434 $Global::unkilled_children{$child} = 1;
6435 } elsif(defined $child) {
6436 # This is the child
6437 # The child takes one process slot
6438 # It will be killed later
6439 $SIG{'TERM'} = $Global::original_sig{'TERM'};
6440 if($^O eq "cygwin" or $^O eq "msys" or $^O eq "nto") {
6441 # The exec does not work on Cygwin and QNX
6442 sleep 10101010;
6443 } else {
6444 # 'exec sleep' takes less RAM than sleeping in perl
6445 exec 'sleep', 10101;
6447 exit(0);
6448 } else {
6449 # Failed to spawn
6450 $max_system_proc_reached = 1;
6454 sub get_args_or_jobs() {
6455 # Get an arg or a job (depending on mode)
6456 if($Global::semaphore or ($opt::pipe and not $opt::tee)) {
6457 # Skip: No need to get args
6458 return 1;
6459 } elsif(defined $opt::retries and $count_jobs_already_read) {
6460 # For retries we may need to run all jobs on this sshlogin
6461 # so include the already read jobs for this sshlogin
6462 $count_jobs_already_read--;
6463 return 1;
6464 } else {
6465 if($opt::X or $opt::m) {
6466 # The arguments may have to be re-spread over several jobslots
6467 # So pessimistically only read one arg per jobslot
6468 # instead of a full commandline
6469 if($Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->empty()) {
6470 if($Global::JobQueue->empty()) {
6471 return 0;
6472 } else {
6473 $job = $Global::JobQueue->get();
6474 push(@jobs, $job);
6475 return 1;
6477 } else {
6478 $arg = $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get();
6479 push(@args, $arg);
6480 return 1;
6482 } else {
6483 # If there are no more command lines, then we have a process
6484 # per command line, so no need to go further
6485 if($Global::JobQueue->empty()) {
6486 return 0;
6487 } else {
6488 $job = $Global::JobQueue->get();
6489 # Replacement must happen here due to seq()
6490 $job and $job->replaced();
6491 push(@jobs, $job);
6492 return 1;
6498 sub cleanup() {
6499 # Cleanup: Close the files
6500 for (values %fh) { close $_ }
6501 # Cleanup: Kill the children
6502 for my $pid (@children) {
6503 kill 9, $pid;
6504 waitpid($pid,0);
6505 delete $Global::unkilled_children{$pid};
6507 # Cleanup: Unget the command_lines or the @args
6508 $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->unget(@args);
6509 @args = ();
6510 $Global::JobQueue->unget(@jobs);
6511 @jobs = ();
6514 sub processes_available_by_system_limit($) {
6515 # If the wanted number of processes is bigger than the system limits:
6516 # Limit them to the system limits
6517 # Limits are: File handles, number of input lines, processes,
6518 # and taking > 1 second to spawn 10 extra processes
6519 # Returns:
6520 # Number of processes
6521 my $self = shift;
6522 my $wanted_processes = shift;
6523 my $system_limit = 0;
6524 my $slow_spawning_warning_printed = 0;
6525 my $time = time;
6526 $more_filehandles = 1;
6527 $tmpfhname = "TmpFhNamE";
6529 # perl uses 7 filehandles for something?
6530 # parallel uses 1 for memory_usage
6531 # parallel uses 4 for ?
6532 reserve_filehandles(12);
6533 # Two processes for load avg and ?
6534 reserve_process();
6535 reserve_process();
6537 # For --retries count also jobs already run
6538 $count_jobs_already_read = $Global::JobQueue->next_seq();
6539 my $wait_time_for_getting_args = 0;
6540 my $start_time = time;
6541 while(1) {
6542 $system_limit >= $wanted_processes and last;
6543 not $more_filehandles and last;
6544 $max_system_proc_reached and last;
6546 my $before_getting_arg = time;
6547 if(!$Global::dummy_jobs) {
6548 get_args_or_jobs() or last;
6550 $wait_time_for_getting_args += time - $before_getting_arg;
6551 $system_limit++;
6553 # Every simultaneous process uses 2 filehandles to write to
6554 # and 2 filehandles to read from
6555 reserve_filehandles(4);
6557 # System process limit
6558 reserve_process();
6560 my $forktime = time - $time - $wait_time_for_getting_args;
6561 ::debug("run", "Time to fork $system_limit procs: $wait_time_for_getting_args ",
6562 $forktime,
6563 " (processes so far: ", $system_limit,")\n");
6564 if($system_limit > 10 and
6565 $forktime > 1 and
6566 $forktime > $system_limit * 0.01
6567 and not $slow_spawning_warning_printed) {
6568 # It took more than 0.01 second to fork a processes on avg.
6569 # Give the user a warning. He can press Ctrl-C if this
6570 # sucks.
6571 ::warning("Starting $system_limit processes took > $forktime sec.",
6572 "Consider adjusting -j. Press CTRL-C to stop.");
6573 $slow_spawning_warning_printed = 1;
6576 cleanup();
6578 if($system_limit < $wanted_processes) {
6579 # The system_limit is less than the wanted_processes
6580 if($system_limit < 1 and not $Global::JobQueue->empty()) {
6581 ::warning("Cannot spawn any jobs. ".
6582 "Raising ulimit -u or 'nproc' in /etc/security/limits.conf",
6583 "or /proc/sys/kernel/pid_max may help.");
6584 ::wait_and_exit(255);
6586 if(not $more_filehandles) {
6587 ::warning("Only enough file handles to run ".
6588 $system_limit. " jobs in parallel.",
6589 "Running 'parallel -j0 -N $system_limit --pipe parallel -j0' or",
6590 "raising 'ulimit -n' or 'nofile' in /etc/security/limits.conf",
6591 "or /proc/sys/fs/file-max may help.");
6593 if($max_system_proc_reached) {
6594 ::warning("Only enough available processes to run ".
6595 $system_limit. " jobs in parallel.",
6596 "Raising ulimit -u or /etc/security/limits.conf ",
6597 "or /proc/sys/kernel/pid_max may help.");
6600 if($] == 5.008008 and $system_limit > 1000) {
6601 # https://savannah.gnu.org/bugs/?36942
6602 $system_limit = 1000;
6604 if($Global::JobQueue->empty()) {
6605 $system_limit ||= 1;
6607 if($self->string() ne ":" and
6608 $system_limit > $Global::default_simultaneous_sshlogins) {
6609 $system_limit =
6610 $self->simultaneous_sshlogin_limit($system_limit);
6612 return $system_limit;
6616 sub simultaneous_sshlogin_limit($) {
6617 # Test by logging in wanted number of times simultaneously
6618 # Returns:
6619 # min($wanted_processes,$working_simultaneous_ssh_logins-1)
6620 my $self = shift;
6621 my $wanted_processes = shift;
6622 if($self->{'time_to_login'}) {
6623 return $wanted_processes;
6626 # Try twice because it guesses wrong sometimes
6627 # Choose the minimal
6628 my $ssh_limit =
6629 ::min($self->simultaneous_sshlogin($wanted_processes),
6630 $self->simultaneous_sshlogin($wanted_processes));
6631 if($ssh_limit < $wanted_processes) {
6632 my $serverlogin = $self->serverlogin();
6633 ::warning("ssh to $serverlogin only allows ".
6634 "for $ssh_limit simultaneous logins.",
6635 "You may raise this by changing",
6636 "/etc/ssh/sshd_config:MaxStartups and MaxSessions on $serverlogin.",
6637 "You can also try --sshdelay 0.1",
6638 "Using only ".($ssh_limit-1)." connections ".
6639 "to avoid race conditions.");
6640 # Race condition can cause problem if using all sshs.
6641 if($ssh_limit > 1) { $ssh_limit -= 1; }
6643 return $ssh_limit;
6646 sub simultaneous_sshlogin($) {
6647 # Using $sshlogin try to see if we can do $wanted_processes
6648 # simultaneous logins
6649 # (ssh host echo simul-login & ssh host echo simul-login & ...) |
6650 # grep simul|wc -l
6651 # Input:
6652 # $wanted_processes = Try for this many logins in parallel
6653 # Returns:
6654 # $ssh_limit = Number of succesful parallel logins
6655 local $/ = "\n";
6656 my $self = shift;
6657 my $wanted_processes = shift;
6658 my $sshcmd = $self->sshcommand();
6659 my $serverlogin = $self->serverlogin();
6660 my $sshdelay = $opt::sshdelay ? "sleep $opt::sshdelay;" : "";
6661 # TODO sh -c wrapper to work for csh
6662 my $cmd = ("$sshdelay$sshcmd $serverlogin -- ".
6663 "echo simultaneouslogin </dev/null 2>&1 &")x$wanted_processes;
6664 ::debug("init", "Trying $wanted_processes logins at $serverlogin\n");
6665 open (my $simul_fh, "-|", "($cmd)|grep simultaneouslogin | wc -l") or
6666 ::die_bug("simultaneouslogin");
6667 my $ssh_limit = <$simul_fh>;
6668 close $simul_fh;
6669 chomp $ssh_limit;
6670 return $ssh_limit;
6673 sub set_ncpus($$) {
6674 my $self = shift;
6675 $self->{'ncpus'} = shift;
6678 sub user_requested_processes($) {
6679 # Parse the number of processes that the user asked for using -j
6680 # Input:
6681 # $opt_P = string formatted as for -P
6682 # Returns:
6683 # $processes = the number of processes to run on this sshlogin
6684 my $self = shift;
6685 my $opt_P = shift;
6686 my $processes;
6687 if(defined $opt_P) {
6688 if($opt_P =~ /^\+(\d+)$/) {
6689 # E.g. -P +2
6690 my $j = $1;
6691 $processes =
6692 $self->ncpus() + $j;
6693 } elsif ($opt_P =~ /^-(\d+)$/) {
6694 # E.g. -P -2
6695 my $j = $1;
6696 $processes =
6697 $self->ncpus() - $j;
6698 } elsif ($opt_P =~ /^(\d+(\.\d+)?)\%$/) {
6699 # E.g. -P 10.5%
6700 my $j = $1;
6701 $processes =
6702 $self->ncpus() * $j / 100;
6703 } elsif ($opt_P =~ /^(\d+)$/) {
6704 $processes = $1;
6705 if($processes == 0) {
6706 # -P 0 = infinity (or at least close)
6707 $processes = $Global::infinity;
6709 } elsif (-f $opt_P) {
6710 $Global::max_procs_file = $opt_P;
6711 if(open(my $in_fh, "<", $Global::max_procs_file)) {
6712 my $opt_P_file = join("",<$in_fh>);
6713 close $in_fh;
6714 $processes = $self->user_requested_processes($opt_P_file);
6715 } else {
6716 ::error("Cannot open $opt_P.");
6717 ::wait_and_exit(255);
6719 } else {
6720 ::error("Parsing of --jobs/-j/--max-procs/-P failed.");
6721 ::die_usage();
6723 $processes = ::ceil($processes);
6725 return $processes;
6728 sub ncpus($) {
6729 # Number of CPU threads
6730 # --use_sockets_instead_of_threads = count socket instead
6731 # --use_cores_instead_of_threads = count physical cores instead
6732 # Returns:
6733 # $ncpus = number of cpu (threads) on this sshlogin
6734 local $/ = "\n";
6735 my $self = shift;
6736 if(not defined $self->{'ncpus'}) {
6737 my $sshcmd = $self->sshcommand();
6738 my $serverlogin = $self->serverlogin();
6739 if($serverlogin eq ":") {
6740 if($opt::use_sockets_instead_of_threads) {
6741 $self->{'ncpus'} = socket_core_thread()->{'sockets'};
6742 } elsif($opt::use_cores_instead_of_threads) {
6743 $self->{'ncpus'} = socket_core_thread()->{'cores'};
6744 } else {
6745 $self->{'ncpus'} = socket_core_thread()->{'threads'};
6747 } else {
6748 my $ncpu;
6749 if($opt::use_sockets_instead_of_threads
6751 $opt::use_cpus_instead_of_cores) {
6752 $ncpu =
6753 ::qqx("echo|$sshcmd $serverlogin -- parallel --number-of-sockets");
6754 } elsif($opt::use_cores_instead_of_threads) {
6755 $ncpu =
6756 ::qqx("echo|$sshcmd $serverlogin -- parallel --number-of-cores");
6757 } else {
6758 $ncpu =
6759 ::qqx("echo|$sshcmd $serverlogin -- parallel --number-of-threads");
6761 chomp $ncpu;
6762 if($ncpu =~ /^\s*[0-9]+\s*$/s) {
6763 $self->{'ncpus'} = $ncpu;
6764 } else {
6765 ::warning("Could not figure out ".
6766 "number of cpus on $serverlogin ($ncpu). Using 1.");
6767 $self->{'ncpus'} = 1;
6771 return $self->{'ncpus'};
6775 sub nproc() {
6776 # Returns:
6777 # Number of threads using `nproc`
6778 my $no_of_threads = ::qqx("nproc");
6779 chomp $no_of_threads;
6780 return $no_of_threads;
6783 sub no_of_sockets() {
6784 return socket_core_thread()->{'sockets'};
6787 sub no_of_cores() {
6788 return socket_core_thread()->{'cores'};
6791 sub no_of_threads() {
6792 return socket_core_thread()->{'threads'};
6795 sub socket_core_thread() {
6796 # Returns:
6798 # 'sockets' => #sockets = number of socket with CPU present
6799 # 'cores' => #cores = number of physical cores
6800 # 'threads' => #threads = number of compute cores (hyperthreading)
6801 # 'active' => #taskset_threads = number of taskset limited cores
6803 my $cpu;
6805 if ($^O eq 'linux') {
6806 $cpu = sct_gnu_linux();
6807 } elsif ($^O eq 'android') {
6808 $cpu = sct_android();
6809 } elsif ($^O eq 'freebsd') {
6810 $cpu = sct_freebsd();
6811 } elsif ($^O eq 'netbsd') {
6812 $cpu = sct_netbsd();
6813 } elsif ($^O eq 'openbsd') {
6814 $cpu = sct_openbsd();
6815 } elsif ($^O eq 'gnu') {
6816 $cpu = sct_hurd();
6817 } elsif ($^O eq 'darwin') {
6818 $cpu = sct_darwin();
6819 } elsif ($^O eq 'solaris') {
6820 $cpu = sct_solaris();
6821 } elsif ($^O eq 'aix') {
6822 $cpu = sct_aix();
6823 } elsif ($^O eq 'hpux') {
6824 $cpu = sct_hpux();
6825 } elsif ($^O eq 'nto') {
6826 $cpu = sct_qnx();
6827 } elsif ($^O eq 'svr5') {
6828 $cpu = sct_openserver();
6829 } elsif ($^O eq 'irix') {
6830 $cpu = sct_irix();
6831 } elsif ($^O eq 'dec_osf') {
6832 $cpu = sct_tru64();
6833 } else {
6834 # Try all methods until we find something that works
6835 $cpu = (sct_gnu_linux()
6836 || sct_android()
6837 || sct_freebsd()
6838 || sct_netbsd()
6839 || sct_openbsd()
6840 || sct_hurd()
6841 || sct_darwin()
6842 || sct_solaris()
6843 || sct_aix()
6844 || sct_hpux()
6845 || sct_qnx()
6846 || sct_openserver()
6847 || sct_irix()
6848 || sct_tru64()
6851 if(not $cpu) {
6852 my $nproc = nproc();
6853 if($nproc) {
6854 $cpu->{'sockets'} =
6855 $cpu->{'cores'} =
6856 $cpu->{'threads'} =
6857 $cpu->{'active'} =
6858 $nproc;
6861 if(not $cpu) {
6862 ::warning("Cannot figure out number of cpus. Using 1.");
6863 $cpu->{'sockets'} =
6864 $cpu->{'cores'} =
6865 $cpu->{'threads'} =
6866 $cpu->{'active'} =
6870 # Choose minimum of active and actual
6871 my $mincpu;
6872 $mincpu->{'sockets'} = ::min($cpu->{'sockets'},$cpu->{'active'});
6873 $mincpu->{'cores'} = ::min($cpu->{'cores'},$cpu->{'active'});
6874 $mincpu->{'threads'} = ::min($cpu->{'threads'},$cpu->{'active'});
6875 return $mincpu;
6878 sub sct_gnu_linux() {
6879 # Returns:
6880 # { 'sockets' => #sockets
6881 # 'cores' => #cores
6882 # 'threads' => #threads
6883 # 'active' => #taskset_threads }
6884 my $cpu;
6885 local $/ = "\n"; # If delimiter is set, then $/ will be wrong
6886 if($ENV{'PARALLEL_CPUINFO'} or -e "/proc/cpuinfo") {
6887 $cpu->{'sockets'} = 0;
6888 $cpu->{'cores'} = 0;
6889 $cpu->{'threads'} = 0;
6890 my %seen;
6891 my %phy_seen;
6892 my @cpuinfo;
6893 my $physicalid;
6894 if(open(my $in_fh, "<", "/proc/cpuinfo")) {
6895 @cpuinfo = <$in_fh>;
6896 close $in_fh;
6898 if($ENV{'PARALLEL_CPUINFO'}) {
6899 # Use CPUINFO from environment - used for testing only
6900 @cpuinfo = split/(?<=\n)/,$ENV{'PARALLEL_CPUINFO'};
6902 for(@cpuinfo) {
6903 if(/^physical id.*[:](.*)/) {
6904 $physicalid=$1;
6905 if(not $phy_seen{$1}++) {
6906 $cpu->{'sockets'}++;
6909 if(/^core id.*[:](.*)/ and not $seen{$physicalid,$1}++) {
6910 $cpu->{'cores'}++;
6912 /^processor.*[:]/i and $cpu->{'threads'}++;
6914 $cpu->{'sockets'} ||= 1;
6915 $cpu->{'cores'} ||= $cpu->{'threads'};
6917 if(-e "/proc/self/status" and not $ENV{'PARALLEL_CPUINFO'}) {
6918 # if 'taskset' is used to limit number of threads
6919 if(open(my $in_fh, "<", "/proc/self/status")) {
6920 while(<$in_fh>) {
6921 if(/^Cpus_allowed:\s*(\S+)/) {
6922 my $a = $1;
6923 $a =~ tr/,//d;
6924 $cpu->{'active'} = unpack ("%32b*", pack ("H*",$a));
6927 close $in_fh;
6930 if(grep { /\d/ } values %$cpu) {
6931 return $cpu;
6932 } else {
6933 return undef;
6937 sub sct_android() {
6938 # Returns:
6939 # { 'sockets' => #sockets
6940 # 'cores' => #cores
6941 # 'threads' => #threads
6942 # 'active' => #taskset_threads }
6943 # Use GNU/Linux
6944 return sct_gnu_linux();
6947 sub sct_freebsd() {
6948 # Returns:
6949 # { 'sockets' => #sockets
6950 # 'cores' => #cores
6951 # 'threads' => #threads
6952 # 'active' => #taskset_threads }
6953 local $/ = "\n";
6954 my $cpu;
6955 $cpu->{'cores'} = (::qqx(qq{ sysctl -a dev.cpu | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' })
6957 ::qqx(qq{ sysctl hw.ncpu | awk '{ print \$2 }' }));
6958 $cpu->{'cores'} and chomp $cpu->{'cores'};
6959 $cpu->{'threads'} =
6960 (::qqx(qq{ sysctl hw.ncpu | awk '{ print \$2 }' })
6962 ::qqx(qq{ sysctl -a dev.cpu | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' }));
6963 $cpu->{'threads'} and chomp $cpu->{'threads'};
6964 $cpu->{'sockets'} ||= $cpu->{'cores'};
6966 if(grep { /\d/ } values %$cpu) {
6967 return $cpu;
6968 } else {
6969 return undef;
6973 sub sct_netbsd() {
6974 # Returns:
6975 # { 'sockets' => #sockets
6976 # 'cores' => #cores
6977 # 'threads' => #threads
6978 # 'active' => #taskset_threads }
6979 local $/ = "\n";
6980 my $cpu;
6981 $cpu->{'cores'} = ::qqx("sysctl -n hw.ncpu");
6982 $cpu->{'cores'} and chomp $cpu->{'cores'};
6983 $cpu->{'threads'} = ::qqx("sysctl -n hw.ncpu");
6984 $cpu->{'threads'} and chomp $cpu->{'threads'};
6985 $cpu->{'sockets'} ||= $cpu->{'cores'};
6987 if(grep { /\d/ } values %$cpu) {
6988 return $cpu;
6989 } else {
6990 return undef;
6994 sub sct_openbsd() {
6995 # Returns:
6996 # { 'sockets' => #sockets
6997 # 'cores' => #cores
6998 # 'threads' => #threads
6999 # 'active' => #taskset_threads }
7000 local $/ = "\n";
7001 my $cpu;
7002 $cpu->{'cores'} = ::qqx('sysctl -n hw.ncpu');
7003 $cpu->{'cores'} and chomp $cpu->{'cores'};
7004 $cpu->{'threads'} = ::qqx('sysctl -n hw.ncpu');
7005 $cpu->{'threads'} and chomp $cpu->{'threads'};
7006 $cpu->{'sockets'} ||= $cpu->{'cores'};
7008 if(grep { /\d/ } values %$cpu) {
7009 return $cpu;
7010 } else {
7011 return undef;
7015 sub sct_hurd() {
7016 # Returns:
7017 # { 'sockets' => #sockets
7018 # 'cores' => #cores
7019 # 'threads' => #threads
7020 # 'active' => #taskset_threads }
7021 local $/ = "\n";
7022 my $cpu;
7023 $cpu->{'cores'} = ::qqx("nproc");
7024 $cpu->{'cores'} and chomp $cpu->{'cores'};
7025 $cpu->{'threads'} = ::qqx("nproc");
7026 $cpu->{'threads'} and chomp $cpu->{'threads'};
7028 if(grep { /\d/ } values %$cpu) {
7029 return $cpu;
7030 } else {
7031 return undef;
7035 sub sct_darwin() {
7036 # Returns:
7037 # { 'sockets' => #sockets
7038 # 'cores' => #cores
7039 # 'threads' => #threads
7040 # 'active' => #taskset_threads }
7041 local $/ = "\n";
7042 my $cpu;
7043 $cpu->{'cores'} =
7044 (::qqx('sysctl -n hw.physicalcpu')
7046 ::qqx(qq{ sysctl -a hw | grep [^a-z]physicalcpu[^a-z] | awk '{ print \$2 }' }));
7047 $cpu->{'cores'} and chomp $cpu->{'cores'};
7048 $cpu->{'threads'} =
7049 (::qqx('sysctl -n hw.logicalcpu')
7051 ::qqx(qq{ sysctl -a hw | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }' }));
7052 $cpu->{'threads'} and chomp $cpu->{'threads'};
7053 $cpu->{'sockets'} ||= $cpu->{'cores'};
7055 if(grep { /\d/ } values %$cpu) {
7056 return $cpu;
7057 } else {
7058 return undef;
7062 sub sct_solaris() {
7063 # Returns:
7064 # { 'sockets' => #sockets
7065 # 'cores' => #cores
7066 # 'threads' => #threads
7067 # 'active' => #taskset_threads }
7068 local $/ = "\n";
7069 my $cpu;
7070 if(-x "/usr/sbin/psrinfo") {
7071 my @psrinfo = ::qqx("/usr/sbin/psrinfo");
7072 if($#psrinfo >= 0) {
7073 $cpu->{'cores'} = $#psrinfo +1;
7076 if(-x "/usr/sbin/prtconf") {
7077 my @prtconf = ::qqx("/usr/sbin/prtconf | grep cpu..instance");
7078 if($#prtconf >= 0) {
7079 $cpu->{'cores'} = $#prtconf +1;
7082 if(-x "/usr/sbin/prtconf") {
7083 my @prtconf = ::qqx("/usr/sbin/prtconf | grep cpu..instance");
7084 if($#prtconf >= 0) {
7085 $cpu->{'cores'} = $#prtconf +1;
7088 $cpu->{'cores'} and chomp $cpu->{'cores'};
7090 if(-x "/usr/sbin/psrinfo") {
7091 my @psrinfo = ::qqx("/usr/sbin/psrinfo");
7092 if($#psrinfo >= 0) {
7093 $cpu->{'threads'} = $#psrinfo +1;
7096 if(-x "/usr/sbin/prtconf") {
7097 my @prtconf = ::qqx("/usr/sbin/prtconf | grep cpu..instance");
7098 if($#prtconf >= 0) {
7099 $cpu->{'threads'} = $#prtconf +1;
7102 $cpu->{'threads'} and chomp $cpu->{'threads'};
7104 if(grep { /\d/ } values %$cpu) {
7105 return $cpu;
7106 } else {
7107 return undef;
7111 sub sct_aix() {
7112 # Returns:
7113 # { 'sockets' => #sockets
7114 # 'cores' => #cores
7115 # 'threads' => #threads
7116 # 'active' => #taskset_threads }
7117 local $/ = "\n";
7118 my $cpu;
7119 if(-x "/usr/sbin/lscfg") {
7120 if(open(my $in_fh, "-|", "/usr/sbin/lscfg -vs |grep proc | wc -l|tr -d ' '")) {
7121 $cpu->{'cores'} = <$in_fh>;
7122 chomp ($cpu->{'cores'});
7123 close $in_fh;
7126 if(-x "/usr/bin/vmstat") {
7127 if(open(my $in_fh, "-|", "/usr/bin/vmstat 1 1")) {
7128 while(<$in_fh>) {
7129 /lcpu=([0-9]*) / and $cpu->{'threads'} = $1;
7131 close $in_fh;
7135 if(grep { /\d/ } values %$cpu) {
7136 # BUG It is not not known how to calculate this
7137 $cpu->{'sockets'} = 1;
7138 return $cpu;
7139 } else {
7140 return undef;
7144 sub sct_hpux() {
7145 # Returns:
7146 # { 'sockets' => #sockets
7147 # 'cores' => #cores
7148 # 'threads' => #threads
7149 # 'active' => #taskset_threads }
7150 local $/ = "\n";
7151 my $cpu;
7152 $cpu->{'cores'} =
7153 ::qqx(qq{ /usr/bin/mpsched -s 2>&1 | grep 'Locality Domain Count' | awk '{ print \$4 }'});
7154 chomp($cpu->{'cores'});
7155 $cpu->{'threads'} =
7156 ::qqx(qq{ /usr/bin/mpsched -s 2>&1 | perl -ne '/Processor Count\\D+(\\d+)/ and print "\$1"'});
7158 if(grep { /\d/ } values %$cpu) {
7159 # BUG It is not not known how to calculate this
7160 $cpu->{'sockets'} = 1;
7161 return $cpu;
7162 } else {
7163 return undef;
7167 sub sct_qnx() {
7168 # Returns:
7169 # { 'sockets' => #sockets
7170 # 'cores' => #cores
7171 # 'threads' => #threads
7172 # 'active' => #taskset_threads }
7173 local $/ = "\n";
7174 my $cpu;
7175 # BUG: It is not known how to calculate this.
7177 if(grep { /\d/ } values %$cpu) {
7178 return $cpu;
7179 } else {
7180 return undef;
7184 sub sct_openserver() {
7185 # Returns:
7186 # { 'sockets' => #sockets
7187 # 'cores' => #cores
7188 # 'threads' => #threads
7189 # 'active' => #taskset_threads }
7190 local $/ = "\n";
7191 my $cpu;
7192 if(-x "/usr/sbin/psrinfo") {
7193 my @psrinfo = ::qqx("/usr/sbin/psrinfo");
7194 if($#psrinfo >= 0) {
7195 $cpu->{'cores'} = $#psrinfo +1;
7198 if(-x "/usr/sbin/psrinfo") {
7199 my @psrinfo = ::qqx("/usr/sbin/psrinfo");
7200 if($#psrinfo >= 0) {
7201 $cpu->{'threads'} = $#psrinfo +1;
7204 $cpu->{'sockets'} ||= $cpu->{'cores'};
7206 if(grep { /\d/ } values %$cpu) {
7207 return $cpu;
7208 } else {
7209 return undef;
7213 sub sct_irix() {
7214 # Returns:
7215 # { 'sockets' => #sockets
7216 # 'cores' => #cores
7217 # 'threads' => #threads
7218 # 'active' => #taskset_threads }
7219 local $/ = "\n";
7220 my $cpu;
7221 $cpu->{'cores'} = ::qqx(qq{ hinv | grep HZ | grep Processor | awk '{print \$1}' });
7222 $cpu->{'cores'} and chomp $cpu->{'cores'};
7224 if(grep { /\d/ } values %$cpu) {
7225 return $cpu;
7226 } else {
7227 return undef;
7231 sub sct_tru64() {
7232 # Returns:
7233 # { 'sockets' => #sockets
7234 # 'cores' => #cores
7235 # 'threads' => #threads
7236 # 'active' => #taskset_threads }
7237 local $/ = "\n";
7238 my $cpu;
7239 $cpu->{'cores'} = ::qqx("sizer -pr");
7240 $cpu->{'cores'} and chomp $cpu->{'cores'};
7241 $cpu->{'cores'} ||= 1;
7242 $cpu->{'sockets'} ||= $cpu->{'cores'};
7243 $cpu->{'threads'} ||= $cpu->{'cores'};
7245 if(grep { /\d/ } values %$cpu) {
7246 return $cpu;
7247 } else {
7248 return undef;
7252 sub sshcommand($) {
7253 # Returns:
7254 # $sshcommand = the command (incl options) to run when using ssh
7255 my $self = shift;
7256 if (not defined $self->{'sshcommand'}) {
7257 $self->sshcommand_of_sshlogin();
7259 return $self->{'sshcommand'};
7262 sub serverlogin($) {
7263 # Returns:
7264 # $sshcommand = the command (incl options) to run when using ssh
7265 my $self = shift;
7266 if (not defined $self->{'serverlogin'}) {
7267 $self->sshcommand_of_sshlogin();
7269 return $self->{'serverlogin'};
7272 sub sshcommand_of_sshlogin($) {
7273 # Compute ssh command and serverlogin from sshlogin
7274 # 'server' -> ('ssh -S /tmp/parallel-ssh-RANDOM/host-','server')
7275 # 'user@server' -> ('ssh','user@server')
7276 # 'myssh user@server' -> ('myssh','user@server')
7277 # 'myssh -l user server' -> ('myssh -l user','server')
7278 # '/usr/bin/myssh -l user server' -> ('/usr/bin/myssh -l user','server')
7279 # Sets:
7280 # $self->{'sshcommand'}
7281 # $self->{'serverlogin'}
7282 my $self = shift;
7283 my ($sshcmd, $serverlogin);
7284 # If $opt::ssh is unset, use $PARALLEL_SSH or 'ssh'
7285 $opt::ssh ||= $ENV{'PARALLEL_SSH'} || "ssh";
7286 if($self->{'string'} =~ /(.+) (\S+)$/) {
7287 # Own ssh command
7288 $sshcmd = $1; $serverlogin = $2;
7289 } else {
7290 # Normal ssh
7291 if($opt::controlmaster) {
7292 # Use control_path to make ssh faster
7293 my $control_path = $self->control_path_dir()."/ssh-%r@%h:%p";
7294 $sshcmd = $opt::ssh." -S ".$control_path;
7295 $serverlogin = $self->{'string'};
7296 if(not $self->{'control_path'}{$control_path}++) {
7297 # Master is not running for this control_path
7298 # Start it
7299 my $pid = fork();
7300 if($pid) {
7301 $Global::sshmaster{$pid} ||= 1;
7302 } else {
7303 $SIG{'TERM'} = undef;
7304 # Ignore the 'foo' being printed
7305 open(STDOUT,">","/dev/null");
7306 # STDERR >/dev/null to ignore
7307 open(STDERR,">","/dev/null");
7308 open(STDIN,"<","/dev/null");
7309 # Run a sleep that outputs data, so it will discover
7310 # if the ssh connection closes.
7311 my $sleep = ::Q('$|=1;while(1){sleep 1;print "foo\n"}');
7312 my @master = ($opt::ssh, "-MTS",
7313 $control_path, $serverlogin, "--", "perl", "-e",
7314 $sleep);
7315 exec(@master);
7318 } else {
7319 $sshcmd = $opt::ssh; $serverlogin = $self->{'string'};
7323 if($serverlogin =~ s/(\S+)\@(\S+)/$2/) {
7324 # convert user@server to '-l user server'
7325 # because lsh does not support user@server
7326 $sshcmd = $sshcmd." -l ".$1;
7329 $self->{'sshcommand'} = $sshcmd;
7330 $self->{'serverlogin'} = $serverlogin;
7333 sub control_path_dir($) {
7334 # Returns:
7335 # $control_path_dir = dir of control path (for -M)
7336 my $self = shift;
7337 if(not defined $self->{'control_path_dir'}) {
7338 $self->{'control_path_dir'} =
7339 # Use $ENV{'TMPDIR'} as that is typically not
7340 # NFS mounted
7341 File::Temp::tempdir($ENV{'TMPDIR'}
7342 . "/control_path_dir-XXXX",
7343 CLEANUP => 1);
7345 return $self->{'control_path_dir'};
7348 sub rsync_transfer_cmd($) {
7349 # Command to run to transfer a file
7350 # Input:
7351 # $file = filename of file to transfer
7352 # $workdir = destination dir
7353 # Returns:
7354 # $cmd = rsync command to run to transfer $file ("" if unreadable)
7355 my $self = shift;
7356 my $file = shift;
7357 my $workdir = shift;
7358 if(not -r $file) {
7359 ::warning($file. " is not readable and will not be transferred.");
7360 return "true";
7362 my $rsync_destdir;
7363 my $relpath = ($file !~ m:^/:) || ($file =~ m:/\./:); # Is the path relative or /./?
7364 if($relpath) {
7365 $rsync_destdir = ::shell_quote_file($workdir);
7366 } else {
7367 # rsync /foo/bar /
7368 $rsync_destdir = "/";
7370 $file = ::shell_quote_file($file);
7371 my $sshcmd = $self->sshcommand();
7372 my $rsync_opts = $ENV{'PARALLEL_RSYNC_OPTS'}.
7373 " -e".::Q($sshcmd);
7374 my $serverlogin = $self->serverlogin();
7375 # Make dir if it does not exist
7376 return "$sshcmd $serverlogin -- mkdir -p $rsync_destdir && " .
7377 rsync()." $rsync_opts $file $serverlogin:$rsync_destdir";
7380 sub cleanup_cmd($$$) {
7381 # Command to run to remove the remote file
7382 # Input:
7383 # $file = filename to remove
7384 # $workdir = destination dir
7385 # Returns:
7386 # $cmd = ssh command to run to remove $file and empty parent dirs
7387 my $self = shift;
7388 my $file = shift;
7389 my $workdir = shift;
7390 my $f = $file;
7391 if($f =~ m:/\./:) {
7392 # foo/bar/./baz/quux => workdir/baz/quux
7393 # /foo/bar/./baz/quux => workdir/baz/quux
7394 $f =~ s:.*/\./:$workdir/:;
7395 } elsif($f =~ m:^[^/]:) {
7396 # foo/bar => workdir/foo/bar
7397 $f = $workdir."/".$f;
7399 my @subdirs = split m:/:, ::dirname($f);
7400 my @rmdir;
7401 my $dir = "";
7402 for(@subdirs) {
7403 $dir .= $_."/";
7404 unshift @rmdir, ::shell_quote_file($dir);
7406 my $rmdir = @rmdir ? "sh -c ".::Q("rmdir @rmdir 2>/dev/null;") : "";
7407 if(defined $opt::workdir and $opt::workdir eq "...") {
7408 $rmdir .= ::Q("rm -rf " . ::shell_quote_file($workdir).';');
7411 $f = ::shell_quote_file($f);
7412 my $sshcmd = $self->sshcommand();
7413 my $serverlogin = $self->serverlogin();
7414 return "$sshcmd $serverlogin -- ".::Q("rm -f $f; $rmdir");
7418 my $rsync;
7420 sub rsync {
7421 # rsync 3.1.x uses protocol 31 which is unsupported by 2.5.7.
7422 # If the version >= 3.1.0: downgrade to protocol 30
7423 # Returns:
7424 # $rsync = "rsync" or "rsync --protocol 30"
7425 if(not $rsync) {
7426 my @out = `rsync --version`;
7427 for (@out) {
7428 if(/version (\d+.\d+)(.\d+)?/) {
7429 if($1 >= 3.1) {
7430 # Version 3.1.0 or later: Downgrade to protocol 30
7431 $rsync = "rsync --protocol 30";
7432 } else {
7433 $rsync = "rsync";
7437 $rsync or ::die_bug("Cannot figure out version of rsync: @out");
7439 return $rsync;
7444 package JobQueue;
7446 sub new($) {
7447 my $class = shift;
7448 my $commandref = shift;
7449 my $read_from = shift;
7450 my $context_replace = shift;
7451 my $max_number_of_args = shift;
7452 my $transfer_files = shift;
7453 my $return_files = shift;
7454 my $commandlinequeue = CommandLineQueue->new
7455 ($commandref, $read_from, $context_replace, $max_number_of_args,
7456 $transfer_files, $return_files);
7457 my @unget = ();
7458 return bless {
7459 'unget' => \@unget,
7460 'commandlinequeue' => $commandlinequeue,
7461 'this_job_no' => 0,
7462 'total_jobs' => undef,
7463 }, ref($class) || $class;
7466 sub get($) {
7467 my $self = shift;
7469 $self->{'this_job_no'}++;
7470 if(@{$self->{'unget'}}) {
7471 return shift @{$self->{'unget'}};
7472 } else {
7473 my $commandline = $self->{'commandlinequeue'}->get();
7474 if(defined $commandline) {
7475 return Job->new($commandline);
7476 } else {
7477 $self->{'this_job_no'}--;
7478 return undef;
7483 sub unget($) {
7484 my $self = shift;
7485 unshift @{$self->{'unget'}}, @_;
7486 $self->{'this_job_no'} -= @_;
7489 sub empty($) {
7490 my $self = shift;
7491 my $empty = (not @{$self->{'unget'}}) &&
7492 $self->{'commandlinequeue'}->empty();
7493 ::debug("run", "JobQueue->empty $empty ");
7494 return $empty;
7497 sub total_jobs($) {
7498 my $self = shift;
7499 if(not defined $self->{'total_jobs'}) {
7500 if($opt::pipe and not $opt::tee) {
7501 ::error("--pipe is incompatible with --eta/--bar/--shuf");
7502 ::wait_and_exit(255);
7504 if($opt::sqlworker) {
7505 $self->{'total_jobs'} = $Global::sql->total_jobs();
7506 } else {
7507 my $record;
7508 my @arg_records;
7509 my $record_queue = $self->{'commandlinequeue'}{'arg_queue'};
7510 my $start = time;
7511 while($record = $record_queue->get()) {
7512 push @arg_records, $record;
7513 if(time - $start > 10) {
7514 ::warning("Reading ".scalar(@arg_records).
7515 " arguments took longer than 10 seconds.");
7516 $opt::eta && ::warning("Consider removing --eta.");
7517 $opt::bar && ::warning("Consider removing --bar.");
7518 $opt::shuf && ::warning("Consider removing --shuf.");
7519 last;
7522 while($record = $record_queue->get()) {
7523 push @arg_records, $record;
7525 if($opt::shuf and @arg_records) {
7526 my $i = @arg_records;
7527 while (--$i) {
7528 my $j = int rand($i+1);
7529 @arg_records[$i,$j] = @arg_records[$j,$i];
7532 $record_queue->unget(@arg_records);
7533 # $#arg_records = number of args - 1
7534 # We have read one @arg_record for this job (so add 1 more)
7535 my $num_args = $#arg_records + 2;
7536 # This jobs is not started so -1
7537 my $started_jobs = $self->{'this_job_no'} - 1;
7538 my $max_args = ::max($Global::max_number_of_args,1);
7539 $self->{'total_jobs'} = ::ceil($num_args / $max_args)
7540 + $started_jobs;
7541 ::debug("init","Total jobs: ".$self->{'total_jobs'}.
7542 " ($num_args/$max_args + $started_jobs)\n");
7545 return $self->{'total_jobs'};
7548 sub flush_total_jobs($) {
7549 # Unset total_jobs to force recomputing
7550 my $self = shift;
7551 ::debug("init","flush Total jobs: ");
7552 $self->{'total_jobs'} = undef;
7555 sub next_seq($) {
7556 my $self = shift;
7558 return $self->{'commandlinequeue'}->seq();
7561 sub quote_args($) {
7562 my $self = shift;
7563 return $self->{'commandlinequeue'}->quote_args();
7567 package Job;
7569 sub new($) {
7570 my $class = shift;
7571 my $commandlineref = shift;
7572 return bless {
7573 'commandline' => $commandlineref, # CommandLine object
7574 'workdir' => undef, # --workdir
7575 # filehandle for stdin (used for --pipe)
7576 # filename for writing stdout to (used for --files)
7577 # remaining data not sent to stdin (used for --pipe)
7578 # tmpfiles to cleanup when job is done
7579 'unlink' => [],
7580 # amount of data sent via stdin (used for --pipe)
7581 'transfersize' => 0, # size of files using --transfer
7582 'returnsize' => 0, # size of files using --return
7583 'pid' => undef,
7584 # hash of { SSHLogins => number of times the command failed there }
7585 'failed' => undef,
7586 'sshlogin' => undef,
7587 # The commandline wrapped with rsync and ssh
7588 'sshlogin_wrap' => undef,
7589 'exitstatus' => undef,
7590 'exitsignal' => undef,
7591 # Timestamp for timeout if any
7592 'timeout' => undef,
7593 'virgin' => 1,
7594 # Output used for SQL and CSV-output
7595 'output' => { 1 => [], 2 => [] },
7596 'halfline' => { 1 => [], 2 => [] },
7597 }, ref($class) || $class;
7600 sub replaced($) {
7601 my $self = shift;
7602 $self->{'commandline'} or ::die_bug("commandline empty");
7603 return $self->{'commandline'}->replaced();
7606 sub seq($) {
7607 my $self = shift;
7608 return $self->{'commandline'}->seq();
7611 sub set_seq($$) {
7612 my $self = shift;
7613 return $self->{'commandline'}->set_seq(shift);
7616 sub slot($) {
7617 my $self = shift;
7618 return $self->{'commandline'}->slot();
7621 sub free_slot($) {
7622 my $self = shift;
7623 push @Global::slots, $self->slot();
7627 my($cattail);
7629 sub cattail() {
7630 # Returns:
7631 # $cattail = perl program for:
7632 # cattail "decompress program" writerpid [file_to_decompress or stdin] [file_to_unlink]
7633 if(not $cattail) {
7634 $cattail = q{
7635 # cat followed by tail (possibly with rm as soon at the file is opened)
7636 # If $writerpid dead: finish after this round
7637 use Fcntl;
7638 $|=1;
7640 my ($comfile, $cmd, $writerpid, $read_file, $unlink_file) = @ARGV;
7641 if($read_file) {
7642 open(IN,"<",$read_file) || die("cattail: Cannot open $read_file");
7643 } else {
7644 *IN = *STDIN;
7646 while(! -s $comfile) {
7647 # Writer has not opened the buffer file, so we cannot remove it yet
7648 $sleep = ($sleep < 30) ? ($sleep * 1.001 + 0.01) : ($sleep);
7649 usleep($sleep);
7651 # The writer and we have both opened the file, so it is safe to unlink it
7652 unlink $unlink_file;
7653 unlink $comfile;
7655 my $first_round = 1;
7656 my $flags;
7657 fcntl(IN, F_GETFL, $flags) || die $!; # Get the current flags on the filehandle
7658 $flags |= O_NONBLOCK; # Add non-blocking to the flags
7659 fcntl(IN, F_SETFL, $flags) || die $!; # Set the flags on the filehandle
7661 while(1) {
7662 # clear EOF
7663 seek(IN,0,1);
7664 my $writer_running = kill 0, $writerpid;
7665 $read = sysread(IN,$buf,131072);
7666 if($read) {
7667 if($first_round) {
7668 # Only start the command if there any input to process
7669 $first_round = 0;
7670 open(OUT,"|-",$cmd) || die("cattail: Cannot run $cmd");
7673 # Blocking print
7674 while($buf) {
7675 my $bytes_written = syswrite(OUT,$buf);
7676 # syswrite may be interrupted by SIGHUP
7677 substr($buf,0,$bytes_written) = "";
7679 # Something printed: Wait less next time
7680 $sleep /= 2;
7681 } else {
7682 if(eof(IN) and not $writer_running) {
7683 # Writer dead: There will never be sent more to the decompressor
7684 close OUT;
7685 exit;
7687 # TODO This could probably be done more efficiently using select(2)
7688 # Nothing read: Wait longer before next read
7689 # Up to 100 milliseconds
7690 $sleep = ($sleep < 100) ? ($sleep * 1.001 + 0.01) : ($sleep);
7691 usleep($sleep);
7695 sub usleep {
7696 # Sleep this many milliseconds.
7697 my $secs = shift;
7698 select(undef, undef, undef, $secs/1000);
7701 $cattail =~ s/#.*//mg;
7702 $cattail =~ s/\s+/ /g;
7704 return $cattail;
7708 sub openoutputfiles($) {
7709 # Open files for STDOUT and STDERR
7710 # Set file handles in $self->fh
7711 my $self = shift;
7712 my ($outfhw, $errfhw, $outname, $errname);
7714 if($opt::linebuffer and not
7715 ($opt::keeporder or $opt::files or $opt::results or
7716 $opt::compress or $opt::compress_program or
7717 $opt::decompress_program)) {
7718 # Do not save to files: Use non-blocking pipe
7719 my ($outfhr, $errfhr);
7720 pipe($outfhr, $outfhw) || die;
7721 pipe($errfhr, $errfhw) || die;
7722 $self->set_fh(1,'w',$outfhw);
7723 $self->set_fh(2,'w',$errfhw);
7724 $self->set_fh(1,'r',$outfhr);
7725 $self->set_fh(2,'r',$errfhr);
7726 # Make it possible to read non-blocking from the pipe
7727 for my $fdno (1,2) {
7728 ::set_fh_non_blocking($self->fh($fdno,'r'));
7730 # Return immediately because we do not need setting filenames
7731 return;
7732 } elsif($opt::results and not $Global::csvsep) {
7733 my $out = $self->{'commandline'}->results_out();
7734 my $seqname;
7735 if($out eq $opt::results or $out =~ m:/$:) {
7736 # $opt::results = simple string or ending in /
7737 # => $out is a dir/
7738 # prefix/name1/val1/name2/val2/seq
7739 $seqname = $out."seq";
7740 # prefix/name1/val1/name2/val2/stdout
7741 $outname = $out."stdout";
7742 # prefix/name1/val1/name2/val2/stderr
7743 $errname = $out."stderr";
7744 } else {
7745 # $opt::results = replacement string not ending in /
7746 # => $out is a file
7747 $outname = $out;
7748 $errname = "$out.err";
7749 $seqname = "$out.seq";
7751 my $seqfhw;
7752 if(not open($seqfhw, "+>", $seqname)) {
7753 ::error("Cannot write to `$seqname'.");
7754 ::wait_and_exit(255);
7756 print $seqfhw $self->seq();
7757 close $seqfhw;
7758 if(not open($outfhw, "+>", $outname)) {
7759 ::error("Cannot write to `$outname'.");
7760 ::wait_and_exit(255);
7762 if(not open($errfhw, "+>", $errname)) {
7763 ::error("Cannot write to `$errname'.");
7764 ::wait_and_exit(255);
7766 $self->set_fh(1,"unlink","");
7767 $self->set_fh(2,"unlink","");
7768 if($opt::sqlworker) {
7769 # Save the filenames in SQL table
7770 $Global::sql->update("SET Stdout = ?, Stderr = ? ".
7771 "WHERE Seq = ". $self->seq(),
7772 $outname, $errname);
7774 } elsif(not $opt::ungroup) {
7775 # To group we create temporary files for STDOUT and STDERR
7776 # To avoid the cleanup unlink the files immediately (but keep them open)
7777 if($opt::files) {
7778 ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par");
7779 ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par");
7780 # --files => only remove stderr
7781 $self->set_fh(1,"unlink","");
7782 $self->set_fh(2,"unlink",$errname);
7783 } else {
7784 ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par");
7785 ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par");
7786 $self->set_fh(1,"unlink",$outname);
7787 $self->set_fh(2,"unlink",$errname);
7789 } else {
7790 # --ungroup
7791 open($outfhw,">&",$Global::fd{1}) || die;
7792 open($errfhw,">&",$Global::fd{2}) || die;
7793 # File name must be empty as it will otherwise be printed
7794 $outname = "";
7795 $errname = "";
7796 $self->set_fh(1,"unlink",$outname);
7797 $self->set_fh(2,"unlink",$errname);
7799 # Set writing FD
7800 $self->set_fh(1,'w',$outfhw);
7801 $self->set_fh(2,'w',$errfhw);
7802 $self->set_fh(1,'name',$outname);
7803 $self->set_fh(2,'name',$errname);
7804 if($opt::compress) {
7805 $self->filter_through_compress();
7806 } elsif(not $opt::ungroup) {
7807 $self->grouped();
7809 if($opt::linebuffer) {
7810 # Make it possible to read non-blocking from
7811 # the buffer files
7812 # Used for --linebuffer with -k, --files, --res, --compress*
7813 for my $fdno (1,2) {
7814 ::set_fh_non_blocking($self->fh($fdno,'r'));
7819 sub print_verbose_dryrun($) {
7820 # If -v set: print command to stdout (possibly buffered)
7821 # This must be done before starting the command
7822 my $self = shift;
7823 if($Global::verbose or $opt::dryrun) {
7824 my $fh = $self->fh(1,"w");
7825 if($Global::verbose <= 1) {
7826 print $fh $self->replaced(),"\n";
7827 } else {
7828 # Verbose level > 1: Print the rsync and stuff
7829 print $fh $self->wrapped(),"\n";
7832 if($opt::sqlworker) {
7833 $Global::sql->update("SET Command = ? WHERE Seq = ".$self->seq(),
7834 $self->replaced());
7838 sub add_rm($) {
7839 # Files to remove when job is done
7840 my $self = shift;
7841 push @{$self->{'unlink'}}, @_;
7844 sub get_rm($) {
7845 # Files to remove when job is done
7846 my $self = shift;
7847 return @{$self->{'unlink'}};
7850 sub cleanup($) {
7851 # Remove files when job is done
7852 my $self = shift;
7853 unlink $self->get_rm();
7854 delete @Global::unlink{$self->get_rm()};
7857 sub grouped($) {
7858 my $self = shift;
7859 # Set reading FD if using --group (--ungroup does not need)
7860 for my $fdno (1,2) {
7861 # Re-open the file for reading
7862 # so fdw can be closed seperately
7863 # and fdr can be seeked seperately (for --line-buffer)
7864 open(my $fdr,"<", $self->fh($fdno,'name')) ||
7865 ::die_bug("fdr: Cannot open ".$self->fh($fdno,'name'));
7866 $self->set_fh($fdno,'r',$fdr);
7867 # Unlink if not debugging
7868 $Global::debug or ::rm($self->fh($fdno,"unlink"));
7872 sub empty_input_wrapper($) {
7873 # If no input: exit(0)
7874 # If some input: Pass input as input to command on STDIN
7875 # This avoids starting the command if there is no input.
7876 # Input:
7877 # $command = command to pipe data to
7878 # Returns:
7879 # $wrapped_command = the wrapped command
7880 my $command = shift;
7881 my $script =
7882 ::spacefree(0,q{
7883 if(sysread(STDIN, $buf, 1)) {
7884 open($fh, "|-", @ARGV) || die;
7885 syswrite($fh, $buf);
7886 # Align up to 128k block
7887 if($read = sysread(STDIN, $buf, 131071)) {
7888 syswrite($fh, $buf);
7890 while($read = sysread(STDIN, $buf, 131072)) {
7891 syswrite($fh, $buf);
7893 close $fh;
7894 exit ($?&127 ? 128+($?&127) : 1+$?>>8)
7897 ::debug("run",'Empty wrap: perl -e '.::Q($script)."\n");
7898 if($Global::cshell
7900 length $command > 499) {
7901 # csh does not like words longer than 1000 (499 quoted)
7902 # $command = "perl -e '".base64_zip_eval()."' ".
7903 # join" ",string_zip_base64(
7904 # 'exec "'.::perl_quote_scalar($command).'"');
7905 return 'perl -e '.::Q($script)." ".
7906 base64_wrap("exec \"$Global::shell\",'-c',\"".
7907 ::perl_quote_scalar($command).'"');
7908 } else {
7909 return 'perl -e '.::Q($script)." ".
7910 $Global::shell." -c ".::Q($command);
7914 sub filter_through_compress($) {
7915 my $self = shift;
7916 # Send stdout to stdin for $opt::compress_program(1)
7917 # Send stderr to stdin for $opt::compress_program(2)
7918 # cattail get pid: $pid = $self->fh($fdno,'rpid');
7919 my $cattail = cattail();
7921 for my $fdno (1,2) {
7922 # Make a communication file.
7923 my ($fh, $comfile) = ::tmpfile(SUFFIX => ".pac");
7924 close $fh;
7925 # Compressor: (echo > $comfile; compress pipe) > output
7926 # When the echo is written to $comfile,
7927 # it is known that output file is opened,
7928 # thus output file can then be removed by the decompressor.
7929 my $wpid = open(my $fdw,"|-", "(echo > $comfile; ".
7930 empty_input_wrapper($opt::compress_program).") >".
7931 $self->fh($fdno,'name')) || die $?;
7932 $self->set_fh($fdno,'w',$fdw);
7933 $self->set_fh($fdno,'wpid',$wpid);
7934 # Decompressor: open output; -s $comfile > 0: rm $comfile output;
7935 # decompress output > stdout
7936 my $rpid = open(my $fdr, "-|", "perl", "-e", $cattail, $comfile,
7937 $opt::decompress_program, $wpid,
7938 $self->fh($fdno,'name'),$self->fh($fdno,'unlink'))
7939 || die $?;
7940 $self->set_fh($fdno,'r',$fdr);
7941 $self->set_fh($fdno,'rpid',$rpid);
7947 sub set_fh($$$$) {
7948 # Set file handle
7949 my ($self, $fd_no, $key, $fh) = @_;
7950 $self->{'fd'}{$fd_no,$key} = $fh;
7953 sub fh($) {
7954 # Get file handle
7955 my ($self, $fd_no, $key) = @_;
7956 return $self->{'fd'}{$fd_no,$key};
7959 sub write($) {
7960 my $self = shift;
7961 my $remaining_ref = shift;
7962 my $stdin_fh = $self->fh(0,"w");
7964 my $len = length $$remaining_ref;
7965 # syswrite may not write all in one go,
7966 # so make sure everything is written.
7967 my $written;
7969 # If writing is to a closed pipe:
7970 # Do not call signal handler, but let nothing be written
7971 local $SIG{PIPE} = undef;
7972 while($written = syswrite($stdin_fh,$$remaining_ref)){
7973 substr($$remaining_ref,0,$written) = "";
7977 sub set_block($$$$$$) {
7978 # Copy stdin buffer from $block_ref up to $endpos
7979 # Prepend with $header_ref if virgin (i.e. not --roundrobin)
7980 # Remove $recstart and $recend if needed
7981 # Input:
7982 # $header_ref = ref to $header to prepend
7983 # $buffer_ref = ref to $buffer containing the block
7984 # $endpos = length of $block to pass on
7985 # $recstart = --recstart regexp
7986 # $recend = --recend regexp
7987 # Returns:
7988 # N/A
7989 my $self = shift;
7990 my ($header_ref,$buffer_ref,$endpos,$recstart,$recend) = @_;
7991 $self->{'block'} = ($self->virgin() ? $$header_ref : "").
7992 substr($$buffer_ref,0,$endpos);
7993 if($opt::remove_rec_sep) {
7994 remove_rec_sep(\$self->{'block'},$recstart,$recend);
7996 $self->{'block_length'} = length $self->{'block'};
7997 $self->{'block_pos'} = 0;
7998 $self->add_transfersize($self->{'block_length'});
8001 sub block_ref($) {
8002 my $self = shift;
8003 return \$self->{'block'};
8007 sub block_length($) {
8008 my $self = shift;
8009 return $self->{'block_length'};
8012 sub remove_rec_sep($) {
8013 my ($block_ref,$recstart,$recend) = @_;
8014 # Remove record separator
8015 $$block_ref =~ s/$recend$recstart//gos;
8016 $$block_ref =~ s/^$recstart//os;
8017 $$block_ref =~ s/$recend$//os;
8020 sub non_blocking_write($) {
8021 my $self = shift;
8022 my $something_written = 0;
8023 use POSIX qw(:errno_h);
8025 my $in = $self->fh(0,"w");
8026 my $rv = syswrite($in,
8027 substr($self->{'block'},$self->{'block_pos'}));
8028 if (!defined($rv) && $! == EAGAIN) {
8029 # would block - but would have written
8030 $something_written = 0;
8031 # avoid triggering auto expanding block
8032 $Global::no_autoexpand_block ||= 1;
8033 } elsif ($self->{'block_pos'}+$rv != $self->{'block_length'}) {
8034 # incomplete write
8035 # Remove the written part
8036 $self->{'block_pos'} += $rv;
8037 $something_written = $rv;
8038 } else {
8039 # successfully wrote everything
8040 # Empty block to free memory
8041 my $a = "";
8042 $self->set_block(\$a,\$a,0,"","");
8043 $something_written = $rv;
8045 ::debug("pipe", "Non-block: ", $something_written);
8046 return $something_written;
8050 sub virgin($) {
8051 my $self = shift;
8052 return $self->{'virgin'};
8055 sub set_virgin($$) {
8056 my $self = shift;
8057 $self->{'virgin'} = shift;
8060 sub pid($) {
8061 my $self = shift;
8062 return $self->{'pid'};
8065 sub set_pid($$) {
8066 my $self = shift;
8067 $self->{'pid'} = shift;
8070 sub starttime($) {
8071 # Returns:
8072 # UNIX-timestamp this job started
8073 my $self = shift;
8074 return sprintf("%.3f",$self->{'starttime'});
8077 sub set_starttime($@) {
8078 my $self = shift;
8079 my $starttime = shift || ::now();
8080 $self->{'starttime'} = $starttime;
8081 $opt::sqlworker and
8082 $Global::sql->update("SET Starttime = ? WHERE Seq = ".$self->seq(),
8083 $starttime);
8086 sub runtime($) {
8087 # Returns:
8088 # Run time in seconds with 3 decimals
8089 my $self = shift;
8090 return sprintf("%.3f",
8091 int(($self->endtime() - $self->starttime())*1000)/1000);
8094 sub endtime($) {
8095 # Returns:
8096 # UNIX-timestamp this job ended
8097 # 0 if not ended yet
8098 my $self = shift;
8099 return ($self->{'endtime'} || 0);
8102 sub set_endtime($$) {
8103 my $self = shift;
8104 my $endtime = shift;
8105 $self->{'endtime'} = $endtime;
8106 $opt::sqlworker and
8107 $Global::sql->update("SET JobRuntime = ? WHERE Seq = ".$self->seq(),
8108 $self->runtime());
8111 sub is_timedout($) {
8112 # Is the job timedout?
8113 # Input:
8114 # $delta_time = time that the job may run
8115 # Returns:
8116 # True or false
8117 my $self = shift;
8118 my $delta_time = shift;
8119 return time > $self->{'starttime'} + $delta_time;
8122 sub kill($) {
8123 my $self = shift;
8124 $self->set_exitstatus(-1);
8125 ::kill_sleep_seq($self->pid());
8128 sub failed($) {
8129 # return number of times failed for this $sshlogin
8130 # Input:
8131 # $sshlogin
8132 # Returns:
8133 # Number of times failed for $sshlogin
8134 my $self = shift;
8135 my $sshlogin = shift;
8136 return $self->{'failed'}{$sshlogin};
8139 sub failed_here($) {
8140 # return number of times failed for the current $sshlogin
8141 # Returns:
8142 # Number of times failed for this sshlogin
8143 my $self = shift;
8144 return $self->{'failed'}{$self->sshlogin()};
8147 sub add_failed($) {
8148 # increase the number of times failed for this $sshlogin
8149 my $self = shift;
8150 my $sshlogin = shift;
8151 $self->{'failed'}{$sshlogin}++;
8154 sub add_failed_here($) {
8155 # increase the number of times failed for the current $sshlogin
8156 my $self = shift;
8157 $self->{'failed'}{$self->sshlogin()}++;
8160 sub reset_failed($) {
8161 # increase the number of times failed for this $sshlogin
8162 my $self = shift;
8163 my $sshlogin = shift;
8164 delete $self->{'failed'}{$sshlogin};
8167 sub reset_failed_here($) {
8168 # increase the number of times failed for this $sshlogin
8169 my $self = shift;
8170 delete $self->{'failed'}{$self->sshlogin()};
8173 sub min_failed($) {
8174 # Returns:
8175 # the number of sshlogins this command has failed on
8176 # the minimal number of times this command has failed
8177 my $self = shift;
8178 my $min_failures =
8179 ::min(map { $self->{'failed'}{$_} } keys %{$self->{'failed'}});
8180 my $number_of_sshlogins_failed_on = scalar keys %{$self->{'failed'}};
8181 return ($number_of_sshlogins_failed_on,$min_failures);
8184 sub total_failed($) {
8185 # Returns:
8186 # $total_failures = the number of times this command has failed
8187 my $self = shift;
8188 my $total_failures = 0;
8189 for (values %{$self->{'failed'}}) {
8190 $total_failures += $_;
8192 return $total_failures;
8196 my $script;
8198 sub postpone_exit_and_cleanup {
8199 # Command to remove files and dirs (given as args) without
8200 # affecting the exit value in $?/$status.
8201 if(not $script) {
8202 $script = "perl -e '".
8203 ::spacefree(0,q{
8204 $bash=shift;
8205 $csh=shift;
8206 for(@ARGV){
8207 unlink;
8208 rmdir;
8210 if($bash=~s/h//) {
8211 exit $bash;
8213 exit $csh;
8215 "' ".'"$?h" "$status" ';
8217 return $script
8222 my $script;
8224 sub fifo_wrap() {
8225 # Script to create a fifo, run a command on the fifo
8226 # while copying STDIN to the fifo, and finally
8227 # remove the fifo and return the exit code of the command.
8228 if(not $script) {
8229 # {} == $PARALLEL_TMP for --fifo
8230 # To make it csh compatible a wrapper needs to:
8231 # * mkfifo
8232 # * spawn $command &
8233 # * cat > fifo
8234 # * waitpid to get the exit code from $command
8235 # * be less than 1000 chars long
8236 $script = "perl -e '".
8237 (::spacefree
8238 (0, q{
8239 ($s,$c,$f) = @ARGV;
8240 # mkfifo $PARALLEL_TMP
8241 system "mkfifo", $f;
8242 # spawn $shell -c $command &
8243 $pid = fork || exec $s, "-c", $c;
8244 open($o,">",$f) || die $!;
8245 # cat > $PARALLEL_TMP
8246 while(sysread(STDIN,$buf,131072)){
8247 syswrite $o, $buf;
8249 close $o;
8250 # waitpid to get the exit code from $command
8251 waitpid $pid,0;
8252 # Cleanup
8253 unlink $f;
8254 exit $?/256;
8255 }))."'";
8257 return $script;
8261 sub wrapped($) {
8262 # Wrap command with:
8263 # * --shellquote
8264 # * --nice
8265 # * --cat
8266 # * --fifo
8267 # * --sshlogin
8268 # * --pipepart (@Global::cat_prepends)
8269 # * --tee (@Global::cat_prepends)
8270 # * --pipe
8271 # * --tmux
8272 # The ordering of the wrapping is important:
8273 # * --nice/--cat/--fifo should be done on the remote machine
8274 # * --pipepart/--pipe should be done on the local machine inside --tmux
8275 # Uses:
8276 # @opt::shellquote
8277 # $opt::nice
8278 # $Global::shell
8279 # $opt::cat
8280 # $opt::fifo
8281 # @Global::cat_prepends
8282 # $opt::pipe
8283 # $opt::tmux
8284 # Returns:
8285 # $self->{'wrapped'} = the command wrapped with the above
8286 my $self = shift;
8287 if(not defined $self->{'wrapped'}) {
8288 my $command = $self->replaced();
8289 # Bug in Bash and Ksh when running multiline aliases
8290 # This will force them to run correctly, but will fail in
8291 # tcsh so we do not do it.
8292 # $command .= "\n\n";
8293 if(@opt::shellquote) {
8294 # Quote one time for each --shellquote
8295 my $c = $command;
8296 for(@opt::shellquote) {
8297 $c = ::Q($c);
8299 # Prepend "echo" (it is written in perl because
8300 # quoting '-e' causes problem in some versions and
8301 # csh's version does something wrong)
8302 $command = q(perl -e '$,=" "; print "@ARGV\n";' -- ) . ::Q($c);
8304 if($Global::parallel_env) {
8305 # If $PARALLEL_ENV set, put that in front of the command
8306 # Used for env_parallel.*
8307 if($Global::shell =~ /zsh/) {
8308 # The extra 'eval' will make aliases work, too
8309 $command = $Global::parallel_env."\n".
8310 "eval ".::Q($command);
8311 } else {
8312 $command = $Global::parallel_env."\n".$command;
8315 if($opt::cat) {
8316 # In '--cat' and '--fifo' {} == $PARALLEL_TMP.
8317 # This is to make it possible to compute $PARALLEL_TMP on
8318 # the fly when running remotely.
8319 # $ENV{PARALLEL_TMP} is set in the remote wrapper before
8320 # the command is run.
8322 # Prepend 'cat > $PARALLEL_TMP;'
8323 # Append 'unlink $PARALLEL_TMP without affecting $?'
8324 $command =
8325 'cat > $PARALLEL_TMP;'.
8326 $command.";". postpone_exit_and_cleanup().
8327 '$PARALLEL_TMP';
8328 } elsif($opt::fifo) {
8329 # Prepend fifo-wrapper. In essence:
8330 # mkfifo {}
8331 # ( $command ) &
8332 # # $command must read {}, otherwise this 'cat' will block
8333 # cat > {};
8334 # wait; rm {}
8335 # without affecting $?
8336 $command = fifo_wrap(). " ".
8337 $Global::shell. " ". ::Q($command). ' $PARALLEL_TMP'. ';';
8339 # Wrap with ssh + tranferring of files
8340 $command = $self->sshlogin_wrap($command);
8341 if(@Global::cat_prepends) {
8342 # --pipepart: prepend:
8343 # < /tmp/foo perl -e 'while(@ARGV) {
8344 # sysseek(STDIN,shift,0) || die; $left = shift;
8345 # while($read = sysread(STDIN,$buf, ($left > 131072 ? 131072 : $left))){
8346 # $left -= $read; syswrite(STDOUT,$buf);
8348 # }' 0 0 0 11 |
8350 # --pipepart --tee: prepend:
8351 # < dash-a-file
8353 # --pipe --tee: wrap:
8354 # (rm fifo; ... ) < fifo
8356 # --pipe --shard X:
8357 # (rm fifo; ... ) < fifo
8358 $command = (shift @Global::cat_prepends). "($command)".
8359 (shift @Global::cat_appends);
8360 } elsif($opt::pipe and not $opt::roundrobin) {
8361 # Wrap with EOF-detector to avoid starting $command if EOF.
8362 $command = empty_input_wrapper($command);
8364 if($opt::tmux) {
8365 # Wrap command with 'tmux'
8366 $command = $self->tmux_wrap($command);
8368 if($Global::cshell
8370 length $command > 499) {
8371 # csh does not like words longer than 1000 (499 quoted)
8372 # $command = "perl -e '".base64_zip_eval()."' ".
8373 # join" ",string_zip_base64(
8374 # 'exec "'.::perl_quote_scalar($command).'"');
8375 $command = base64_wrap("exec \"$Global::shell\",'-c',\"".
8376 ::perl_quote_scalar($command).'"');
8378 $self->{'wrapped'} = $command;
8380 return $self->{'wrapped'};
8383 sub set_sshlogin($$) {
8384 my $self = shift;
8385 my $sshlogin = shift;
8386 $self->{'sshlogin'} = $sshlogin;
8387 delete $self->{'sshlogin_wrap'}; # If sshlogin is changed the wrap is wrong
8388 delete $self->{'wrapped'};
8390 if($opt::sqlworker) {
8391 # Identify worker as --sqlworker often runs on different machines
8392 my $host = $sshlogin->string();
8393 if($host eq ":") {
8394 $host = ::hostname();
8396 $Global::sql->update("SET Host = ? WHERE Seq = ".$self->seq(), $host);
8400 sub sshlogin($) {
8401 my $self = shift;
8402 return $self->{'sshlogin'};
8405 sub string_base64($) {
8406 # Base64 encode strings into 1000 byte blocks.
8407 # 1000 bytes is the largest word size csh supports
8408 # Input:
8409 # @strings = to be encoded
8410 # Returns:
8411 # @base64 = 1000 byte block
8412 $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;";
8413 my @base64 = unpack("(A1000)*",encode_base64((join"",@_),""));
8414 return @base64;
8417 sub string_zip_base64($) {
8418 # Pipe string through 'bzip2 -9' and base64 encode it into 1000
8419 # byte blocks.
8420 # 1000 bytes is the largest word size csh supports
8421 # Zipping will make exporting big environments work, too
8422 # Input:
8423 # @strings = to be encoded
8424 # Returns:
8425 # @base64 = 1000 byte block
8426 my($zipin_fh, $zipout_fh,@base64);
8427 ::open3($zipin_fh,$zipout_fh,">&STDERR","bzip2 -9");
8428 if(fork) {
8429 close $zipin_fh;
8430 $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;";
8431 # Split base64 encoded into 1000 byte blocks
8432 @base64 = unpack("(A1000)*",encode_base64((join"",<$zipout_fh>),""));
8433 close $zipout_fh;
8434 } else {
8435 close $zipout_fh;
8436 print $zipin_fh @_;
8437 close $zipin_fh;
8438 exit;
8440 ::debug("base64","Orig:@_\nAs bzip2 base64:@base64\n");
8441 return @base64;
8444 sub base64_zip_eval() {
8445 # Script that:
8446 # * reads base64 strings from @ARGV
8447 # * decodes them
8448 # * pipes through 'bzip2 -dc'
8449 # * evals the result
8450 # Reverse of string_zip_base64 + eval
8451 # Will be wrapped in ' so single quote is forbidden
8452 # Returns:
8453 # $script = 1-liner for perl -e
8454 my $script = ::spacefree(0,q{
8455 @GNU_Parallel = split /_/, "use_IPC::Open3;_use_MIME::Base64";
8456 eval"@GNU_Parallel";
8457 $chld = $SIG{CHLD};
8458 $SIG{CHLD} = "IGNORE";
8459 # Search for bzip2. Not found => use default path
8460 my $zip = (grep { -x $_ } "/usr/local/bin/bzip2")[0] || "bzip2";
8461 # $in = stdin on $zip, $out = stdout from $zip
8462 # Forget my() to save chars for csh
8463 # my($in, $out,$eval);
8464 open3($in,$out,">&STDERR",$zip,"-dc");
8465 if(my $perlpid = fork) {
8466 close $in;
8467 $eval = join "", <$out>;
8468 close $out;
8469 } else {
8470 close $out;
8471 # Pipe decoded base64 into 'bzip2 -dc'
8472 print $in (decode_base64(join"",@ARGV));
8473 close $in;
8474 exit;
8476 wait;
8477 $SIG{CHLD} = $chld;
8478 eval $eval;
8480 ::debug("base64",$script,"\n");
8481 return $script;
8484 sub base64_wrap($) {
8485 # base64 encode Perl code
8486 # Split it into chunks of < 1000 bytes
8487 # Prepend it with a decoder that eval's it
8488 # Input:
8489 # $eval_string = Perl code to run
8490 # Returns:
8491 # $shell_command = shell command that runs $eval_string
8492 my $eval_string = shift;
8493 return
8494 "perl -e ".
8495 ::Q(base64_zip_eval())." ".
8496 join" ",::shell_quote(string_zip_base64($eval_string));
8499 sub base64_eval($) {
8500 # Script that:
8501 # * reads base64 strings from @ARGV
8502 # * decodes them
8503 # * evals the result
8504 # Reverse of string_base64 + eval
8505 # Will be wrapped in ' so single quote is forbidden.
8506 # Spaces are stripped so spaces cannot be significant.
8507 # The funny 'use IPC::Open3'-syntax is to avoid spaces and
8508 # to make it clear that this is a GNU Parallel command
8509 # when looking at the process table.
8510 # Returns:
8511 # $script = 1-liner for perl -e
8512 my $script = ::spacefree(0,q{
8513 @GNU_Parallel=("use","IPC::Open3;","use","MIME::Base64");
8514 eval "@GNU_Parallel";
8515 my $eval = decode_base64(join"",@ARGV);
8516 eval $eval;
8518 ::debug("base64",$script,"\n");
8519 return $script;
8522 sub sshlogin_wrap($) {
8523 # Wrap the command with the commands needed to run remotely
8524 # Input:
8525 # $command = command to run
8526 # Returns:
8527 # $self->{'sshlogin_wrap'} = command wrapped with ssh+transfer commands
8528 sub monitor_parent_sshd_script {
8529 # This script is to solve the problem of
8530 # * not mixing STDERR and STDOUT
8531 # * terminating with ctrl-c
8532 # If its parent is ssh: all good
8533 # If its parent is init(1): ssh died, so kill children
8534 my $monitor_parent_sshd_script;
8536 if(not $monitor_parent_sshd_script) {
8537 $monitor_parent_sshd_script =
8538 # This will be packed in ', so only use "
8539 ::spacefree(0,'$shell = "'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'.
8540 '$tmpdir = "'.::perl_quote_scalar($ENV{'TMPDIR'}).'";'.
8541 '$nice = '.$opt::nice.';'.
8543 # Set $PARALLEL_TMP to a non-existent file name in $TMPDIR
8544 do {
8545 $ENV{PARALLEL_TMP} = $tmpdir."/par".
8546 join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
8547 } while(-e $ENV{PARALLEL_TMP});
8548 $SIG{CHLD} = sub { $done = 1; };
8549 $pid = fork;
8550 unless($pid) {
8551 # Make own process group to be able to kill HUP it later
8552 eval { setpgrp };
8553 eval { setpriority(0,0,$nice) };
8554 exec $shell, "-c", ($bashfunc."@ARGV");
8555 die "exec: $!\n";
8557 do {
8558 # Parent is not init (ppid=1), so sshd is alive
8559 # Exponential sleep up to 1 sec
8560 $s = $s < 1 ? 0.001 + $s * 1.03 : $s;
8561 select(undef, undef, undef, $s);
8562 } until ($done || getppid == 1);
8563 # Kill HUP the process group if job not done
8564 kill(SIGHUP, -${pid}) unless $done;
8565 wait;
8566 exit ($?&127 ? 128+($?&127) : 1+$?>>8)
8569 return $monitor_parent_sshd_script;
8572 sub vars_to_export {
8573 # Uses:
8574 # @opt::env
8575 my @vars = ("parallel_bash_environment");
8576 for my $varstring (@opt::env) {
8577 # Split up --env VAR1,VAR2
8578 push @vars, split /,/, $varstring;
8580 for (@vars) {
8581 if(-r $_ and not -d) {
8582 # Read as environment definition bug #44041
8583 # TODO parse this
8584 my $fh = ::open_or_exit($_);
8585 $Global::envdef = join("",<$fh>);
8586 close $fh;
8589 if(grep { /^_$/ } @vars) {
8590 local $/ = "\n";
8591 # --env _
8592 # Include all vars that are not in a clean environment
8593 if(open(my $vars_fh, "<", $Global::config_dir . "/ignored_vars")) {
8594 my @ignore = <$vars_fh>;
8595 chomp @ignore;
8596 my %ignore;
8597 @ignore{@ignore} = @ignore;
8598 close $vars_fh;
8599 push @vars, grep { not defined $ignore{$_} } keys %ENV;
8600 @vars = grep { not /^_$/ } @vars;
8601 } else {
8602 ::error("Run '$Global::progname --record-env' ".
8603 "in a clean environment first.");
8604 ::wait_and_exit(255);
8607 # Duplicate vars as BASH functions to include post-shellshock functions (v1+v2)
8608 # So --env myfunc should look for BASH_FUNC_myfunc() and BASH_FUNC_myfunc%%
8609 push(@vars, "PARALLEL_PID", "PARALLEL_SEQ",
8610 map { ("BASH_FUNC_$_()", "BASH_FUNC_$_%%") } @vars);
8611 # Keep only defined variables
8612 return grep { defined($ENV{$_}) } @vars;
8615 sub env_as_eval {
8616 # Returns:
8617 # $eval = '$ENV{"..."}=...; ...'
8618 my @vars = vars_to_export();
8619 my $csh_friendly = not grep { /\n/ } @ENV{@vars};
8620 my @bash_functions = grep { substr($ENV{$_},0,4) eq "() {" } @vars;
8621 my @non_functions = (grep { !/PARALLEL_ENV/ }
8622 grep { substr($ENV{$_},0,4) ne "() {" } @vars);
8624 # eval of @envset will set %ENV
8625 my $envset = join"", map {
8626 '$ENV{"'.::perl_quote_scalar($_).'"}="'.
8627 ::perl_quote_scalar($ENV{$_}).'";'; } @non_functions;
8629 # running @bashfunc on the command line, will set the functions
8630 my @bashfunc = map {
8631 my $v=$_;
8632 s/BASH_FUNC_(.*)(\(\)|%%)/$1/;
8633 "$_$ENV{$v};export -f $_ >/dev/null;" } @bash_functions;
8634 # eval $bashfuncset will set $bashfunc
8635 my $bashfuncset;
8636 if(@bashfunc) {
8637 # Functions are not supported for all shells
8638 if($Global::shell !~ m:(bash|rbash|zsh|rzsh|dash|ksh):) {
8639 ::warning("Shell functions may not be supported in $Global::shell.");
8641 $bashfuncset =
8642 '@bash_functions=qw('."@bash_functions".");".
8643 ::spacefree(1,'$shell="'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'.q{
8644 if($shell=~/csh/) {
8645 print STDERR "CSH/TCSH DO NOT SUPPORT newlines IN VARIABLES/FUNCTIONS. Unset @bash_functions\n";
8646 exec "false";
8649 "\n".'$bashfunc = "'.::perl_quote_scalar("@bashfunc").'";';
8650 } else {
8651 $bashfuncset = '$bashfunc = "";'
8653 if($ENV{'parallel_bash_environment'}) {
8654 $bashfuncset .= '$bashfunc .= "eval\ \"\$parallel_bash_environment\"\;";';
8656 ::debug("base64",$envset,$bashfuncset,"\n");
8657 return $csh_friendly,$envset,$bashfuncset;
8660 my $self = shift;
8661 my $command = shift;
8662 # TODO test that *sh -c 'parallel --env' use *sh
8663 if(not defined $self->{'sshlogin_wrap'}{$command}) {
8664 my $sshlogin = $self->sshlogin();
8665 my $serverlogin = $sshlogin->serverlogin();
8666 my $quoted_remote_command;
8667 $ENV{'PARALLEL_SEQ'} = $self->seq();
8668 $ENV{'PARALLEL_PID'} = $$;
8669 if($serverlogin eq ":") {
8670 if($opt::workdir) {
8671 # Create workdir if needed. Then cd to it.
8672 my $wd = $self->workdir();
8673 if($opt::workdir eq "." or $opt::workdir eq "...") {
8674 # If $wd does not start with '/': Prepend $HOME
8675 $wd =~ s:^([^/]):$ENV{'HOME'}/$1:;
8677 ::mkdir_or_die($wd);
8678 my $post = "";
8679 if($opt::workdir eq "...") {
8680 $post = ";".exitstatuswrapper("rm -rf ".::Q($wd).";");
8683 $command = "cd ".::Q($wd)." || exit 255; " .
8684 $command . $post;;
8686 if(@opt::env) {
8687 # Prepend with environment setter, which sets functions in zsh
8688 my ($csh_friendly,$envset,$bashfuncset) = env_as_eval();
8689 my $perl_code = $envset.$bashfuncset.
8690 '@ARGV="'.::perl_quote_scalar($command).'";'.
8691 "exec\"$Global::shell\",\"-c\",\(\$bashfunc.\"\@ARGV\"\)\;die\"exec:\$\!\\n\"\;";
8692 if(length $perl_code > 999
8694 not $csh_friendly
8696 $command =~ /\n/) {
8697 # csh does not deal well with > 1000 chars in one word
8698 # csh does not deal well with $ENV with \n
8699 $self->{'sshlogin_wrap'}{$command} = base64_wrap($perl_code);
8700 } else {
8701 $self->{'sshlogin_wrap'}{$command} = "perl -e ".::Q($perl_code);
8703 } else {
8704 $self->{'sshlogin_wrap'}{$command} = $command;
8706 } else {
8707 my $pwd = "";
8708 if($opt::workdir) {
8709 # Create remote workdir if needed. Then cd to it.
8710 my $wd = ::pQ($self->workdir());
8711 $pwd = qq{system("mkdir","-p","--","$wd"); chdir "$wd" ||}.
8712 qq{print(STDERR "parallel: Cannot chdir to $wd\\n") && exit 255;};
8714 my ($csh_friendly,$envset,$bashfuncset) = env_as_eval();
8715 my $remote_command = $pwd.$envset.$bashfuncset.
8716 '@ARGV="'.::perl_quote_scalar($command).'";'.
8717 monitor_parent_sshd_script();
8718 $quoted_remote_command = "perl -e ". ::Q($remote_command);
8719 my $dq_remote_command = ::Q($quoted_remote_command);
8720 if(length $dq_remote_command > 999
8722 not $csh_friendly
8724 $command =~ /\n/) {
8725 # csh does not deal well with > 1000 chars in one word
8726 # csh does not deal well with $ENV with \n
8727 $quoted_remote_command =
8728 "perl -e ". ::Q(::Q(base64_zip_eval()))." ".
8729 join" ",::shell_quote(::shell_quote(string_zip_base64($remote_command)));
8730 } else {
8731 $quoted_remote_command = $dq_remote_command;
8734 my $sshcmd = $sshlogin->sshcommand();
8735 my ($pre,$post,$cleanup)=("","","");
8736 # --transfer
8737 $pre .= $self->sshtransfer();
8738 # --return
8739 $post .= $self->sshreturn();
8740 # --cleanup
8741 $post .= $self->sshcleanup();
8742 if($post) {
8743 # We need to save the exit status of the job
8744 $post = exitstatuswrapper($post);
8746 $self->{'sshlogin_wrap'}{$command} =
8747 ($pre
8748 . "$sshcmd $serverlogin -- exec "
8749 . $quoted_remote_command
8750 . ";"
8751 . $post);
8754 return $self->{'sshlogin_wrap'}{$command};
8757 sub transfer($) {
8758 # Files to transfer
8759 # Non-quoted and with {...} substituted
8760 # Returns:
8761 # @transfer - File names of files to transfer
8762 my $self = shift;
8764 my $transfersize = 0;
8765 my @transfer = $self->{'commandline'}->
8766 replace_placeholders($self->{'commandline'}{'transfer_files'},0,0);
8767 for(@transfer) {
8768 # filesize
8769 if(-e $_) {
8770 $transfersize += (stat($_))[7];
8773 $self->add_transfersize($transfersize);
8774 return @transfer;
8777 sub transfersize($) {
8778 my $self = shift;
8779 return $self->{'transfersize'};
8782 sub add_transfersize($) {
8783 my $self = shift;
8784 my $transfersize = shift;
8785 $self->{'transfersize'} += $transfersize;
8786 $opt::sqlworker and
8787 $Global::sql->update("SET Send = ? WHERE Seq = ".$self->seq(),
8788 $self->{'transfersize'});
8791 sub sshtransfer($) {
8792 # Returns for each transfer file:
8793 # rsync $file remote:$workdir
8794 my $self = shift;
8795 my @pre;
8796 my $sshlogin = $self->sshlogin();
8797 my $workdir = $self->workdir();
8798 for my $file ($self->transfer()) {
8799 push @pre, $sshlogin->rsync_transfer_cmd($file,$workdir).";";
8801 return join("",@pre);
8804 sub return($) {
8805 # Files to return
8806 # Non-quoted and with {...} substituted
8807 # Returns:
8808 # @non_quoted_filenames
8809 my $self = shift;
8810 return $self->{'commandline'}->
8811 replace_placeholders($self->{'commandline'}{'return_files'},0,0);
8814 sub returnsize($) {
8815 # This is called after the job has finished
8816 # Returns:
8817 # $number_of_bytes transferred in return
8818 my $self = shift;
8819 for my $file ($self->return()) {
8820 if(-e $file) {
8821 $self->{'returnsize'} += (stat($file))[7];
8824 return $self->{'returnsize'};
8827 sub add_returnsize($) {
8828 my $self = shift;
8829 my $returnsize = shift;
8830 $self->{'returnsize'} += $returnsize;
8831 $opt::sqlworker and
8832 $Global::sql->update("SET Receive = ? WHERE Seq = ".$self->seq(),
8833 $self->{'returnsize'});
8836 sub sshreturn($) {
8837 # Returns for each return-file:
8838 # rsync remote:$workdir/$file .
8839 my $self = shift;
8840 my $sshlogin = $self->sshlogin();
8841 my $sshcmd = $sshlogin->sshcommand();
8842 my $serverlogin = $sshlogin->serverlogin();
8843 my $rsync_opts = $ENV{'PARALLEL_RSYNC_OPTS'}. " -e". ::Q($sshcmd);
8844 my $pre = "";
8845 for my $file ($self->return()) {
8846 $file =~ s:^\./::g; # Remove ./ if any
8847 my $relpath = ($file !~ m:^/:) || ($file =~ m:/\./:); # Is the path relative or /./?
8848 my $cd = "";
8849 my $wd = "";
8850 if($relpath) {
8851 # rsync -avR /foo/./bar/baz.c remote:/tmp/
8852 # == (on old systems)
8853 # rsync -avR --rsync-path="cd /foo; rsync" remote:bar/baz.c /tmp/
8854 $wd = ::shell_quote_file($self->workdir()."/");
8856 # Only load File::Basename if actually needed
8857 $Global::use{"File::Basename"} ||= eval "use File::Basename; 1;";
8858 # dir/./file means relative to dir, so remove dir on remote
8859 $file =~ m:(.*)/\./:;
8860 my $basedir = $1 ? ::shell_quote_file($1."/") : "";
8861 my $nobasedir = $file;
8862 $nobasedir =~ s:.*/\./::;
8863 $cd = ::shell_quote_file(::dirname($nobasedir));
8864 my $rsync_cd = '--rsync-path='.::Q("cd $wd$cd; rsync");
8865 my $basename = ::Q(::shell_quote_file(::basename($file)));
8866 # --return
8867 # mkdir -p /home/tange/dir/subdir/;
8868 # rsync (--protocol 30) -rlDzR
8869 # --rsync-path="cd /home/tange/dir/subdir/; rsync"
8870 # server:file.gz /home/tange/dir/subdir/
8871 $pre .= "mkdir -p $basedir$cd && ". $sshlogin->rsync().
8872 " $rsync_cd $rsync_opts $serverlogin:".
8873 $basename . " ".$basedir.$cd.";";
8875 return $pre;
8878 sub sshcleanup($) {
8879 # Return the sshcommand needed to remove the file
8880 # Returns:
8881 # ssh command needed to remove files from sshlogin
8882 my $self = shift;
8883 my $sshlogin = $self->sshlogin();
8884 my $sshcmd = $sshlogin->sshcommand();
8885 my $serverlogin = $sshlogin->serverlogin();
8886 my $workdir = $self->workdir();
8887 my $cleancmd = "";
8889 for my $file ($self->remote_cleanup()) {
8890 my @subworkdirs = parentdirs_of($file);
8891 $cleancmd .= $sshlogin->cleanup_cmd($file,$workdir).";";
8893 if(defined $opt::workdir and $opt::workdir eq "...") {
8894 $cleancmd .= "$sshcmd $serverlogin -- rm -rf " . ::Q($workdir).';';
8896 return $cleancmd;
8899 sub remote_cleanup($) {
8900 # Returns:
8901 # Files to remove at cleanup
8902 my $self = shift;
8903 if($opt::cleanup) {
8904 my @transfer = $self->transfer();
8905 my @return = $self->return();
8906 return (@transfer,@return);
8907 } else {
8908 return ();
8912 sub exitstatuswrapper(@) {
8913 if($Global::cshell) {
8914 return ('set _EXIT_status=$status; ' .
8915 join(" ",@_).
8916 'exit $_EXIT_status;');
8917 } else {
8918 return ('_EXIT_status=$?; ' .
8919 join(" ",@_).
8920 'exit $_EXIT_status;');
8925 sub workdir($) {
8926 # Returns:
8927 # the workdir on a remote machine
8928 my $self = shift;
8929 if(not defined $self->{'workdir'}) {
8930 my $workdir;
8931 if(defined $opt::workdir) {
8932 if($opt::workdir eq ".") {
8933 # . means current dir
8934 my $home = $ENV{'HOME'};
8935 eval 'use Cwd';
8936 my $cwd = cwd();
8937 $workdir = $cwd;
8938 if($home) {
8939 # If homedir exists: remove the homedir from
8940 # workdir if cwd starts with homedir
8941 # E.g. /home/foo/my/dir => my/dir
8942 # E.g. /tmp/my/dir => /tmp/my/dir
8943 my ($home_dev, $home_ino) = (stat($home))[0,1];
8944 my $parent = "";
8945 my @dir_parts = split(m:/:,$cwd);
8946 my $part;
8947 while(defined ($part = shift @dir_parts)) {
8948 $part eq "" and next;
8949 $parent .= "/".$part;
8950 my ($parent_dev, $parent_ino) = (stat($parent))[0,1];
8951 if($parent_dev == $home_dev and $parent_ino == $home_ino) {
8952 # dev and ino is the same: We found the homedir.
8953 $workdir = join("/",@dir_parts);
8954 last;
8958 if($workdir eq "") {
8959 $workdir = ".";
8961 } elsif($opt::workdir eq "...") {
8962 $workdir = ".parallel/tmp/" . ::hostname() . "-" . $$
8963 . "-" . $self->seq();
8964 } else {
8965 $workdir = $self->{'commandline'}->
8966 replace_placeholders([$opt::workdir],0,0);
8967 #$workdir = $opt::workdir;
8968 # Rsync treats /./ special. We dont want that
8969 $workdir =~ s:/\./:/:g; # Remove /./
8970 $workdir =~ s:(.)/+$:$1:; # Remove ending / if any
8971 $workdir =~ s:^\./::g; # Remove starting ./ if any
8973 } else {
8974 $workdir = ".";
8976 $self->{'workdir'} = $workdir;
8978 return $self->{'workdir'};
8981 sub parentdirs_of($) {
8982 # Return:
8983 # all parentdirs except . of this dir or file - sorted desc by length
8984 my $d = shift;
8985 my @parents = ();
8986 while($d =~ s:/[^/]+$::) {
8987 if($d ne ".") {
8988 push @parents, $d;
8991 return @parents;
8994 sub start($) {
8995 # Setup STDOUT and STDERR for a job and start it.
8996 # Returns:
8997 # job-object or undef if job not to run
8999 sub open3_setpgrp_internal {
9000 # Run open3+setpgrp followed by the command
9001 # Input:
9002 # $stdin_fh = Filehandle to use as STDIN
9003 # $stdout_fh = Filehandle to use as STDOUT
9004 # $stderr_fh = Filehandle to use as STDERR
9005 # $command = Command to run
9006 # Returns:
9007 # $pid = Process group of job started
9008 my ($stdin_fh,$stdout_fh,$stderr_fh,$command) = @_;
9009 my $pid;
9010 local (*OUT,*ERR);
9011 open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!");
9012 open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!");
9013 # The eval is needed to catch exception from open3
9014 eval {
9015 if(not $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", "-")) {
9016 # Each child gets its own process group to make it safe to killall
9017 eval{ setpgrp(0,0) };
9018 eval{ setpriority(0,0,$opt::nice) };
9019 exec($Global::shell,"-c",$command)
9020 || ::die_bug("open3-$stdin_fh $command");
9023 return $pid;
9026 sub open3_setpgrp_external {
9027 # Run open3 on $command wrapped with a perl script doing setpgrp
9028 # Works on systems that do not support open3(,,,"-")
9029 # Input:
9030 # $stdin_fh = Filehandle to use as STDIN
9031 # $stdout_fh = Filehandle to use as STDOUT
9032 # $stderr_fh = Filehandle to use as STDERR
9033 # $command = Command to run
9034 # Returns:
9035 # $pid = Process group of job started
9036 my ($stdin_fh,$stdout_fh,$stderr_fh,$command) = @_;
9037 local (*OUT,*ERR);
9038 open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!");
9039 open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!");
9041 my $pid;
9042 my @setpgrp_wrap =
9043 ('perl','-e',
9044 "eval\{setpgrp\}\;eval\{setpriority\(0,0,$opt::nice\)\}\;".
9045 "exec '$Global::shell', '-c', \@ARGV");
9046 # The eval is needed to catch exception from open3
9047 eval {
9048 $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", @setpgrp_wrap, $command)
9049 || ::die_bug("open3-$stdin_fh");
9052 return $pid;
9055 sub open3_setpgrp {
9056 # Select and run open3_setpgrp_internal/open3_setpgrp_external
9057 no warnings 'redefine';
9058 my ($outfh,$name) = ::tmpfile(SUFFIX => ".tst");
9059 # Test to see if open3(x,x,x,"-") is fully supported
9060 # Can an exported bash function be called via open3?
9061 my $script = 'if($pid=::open3($i,$o,$e,"-")) { wait; } '.
9062 'else { exec("bash","-c","testfun && true"); }';
9063 my $bash =
9064 ::shell_quote_scalar_default(
9065 "testfun() { rm $name; }; export -f testfun; ".
9066 "perl -MIPC::Open3 -e ".
9067 ::shell_quote_scalar_default($script)
9069 # Redirect STDERR temporarily,
9070 # so errors on MacOS X are ignored.
9071 open my $saveerr, ">&STDERR";
9072 open STDERR, '>', "/dev/null";
9073 # Run the test
9074 ::debug("init",qq{bash -c $bash 2>/dev/null});
9075 qx{ bash -c $bash 2>/dev/null };
9076 open STDERR, ">&", $saveerr;
9078 if(-e $name) {
9079 # Does not support open3(x,x,x,"-")
9080 # or does not have bash:
9081 # Use (slow) external version
9082 unlink($name);
9083 *open3_setpgrp = \&open3_setpgrp_external;
9084 ::debug("init","open3_setpgrp_external chosen\n");
9085 } else {
9086 # Supports open3(x,x,x,"-")
9087 # This is 0.5 ms faster to run
9088 *open3_setpgrp = \&open3_setpgrp_internal;
9089 ::debug("init","open3_setpgrp_internal chosen\n");
9091 # The sub is now redefined. Call it
9092 return open3_setpgrp(@_);
9095 my $job = shift;
9096 # Get the shell command to be executed (possibly with ssh infront).
9097 my $command = $job->wrapped();
9098 my $pid;
9100 if($Global::interactive or $Global::stderr_verbose) {
9101 $job->interactive_start();
9103 # Must be run after $job->interactive_start():
9104 # $job->interactive_start() may call $job->skip()
9105 if($job->{'commandline'}{'skip'}) {
9106 # $job->skip() was called
9107 $command = "true";
9109 $job->openoutputfiles();
9110 $job->print_verbose_dryrun();
9111 # Call slot to store the slot value
9112 $job->slot();
9113 my($stdout_fh,$stderr_fh) = ($job->fh(1,"w"),$job->fh(2,"w"));
9114 if($opt::dryrun or $opt::sqlmaster) { $command = "true"; }
9115 $ENV{'PARALLEL_SEQ'} = $job->seq();
9116 $ENV{'PARALLEL_PID'} = $$;
9117 $ENV{'PARALLEL_TMP'} = ::tmpname("par");
9118 $job->add_rm($ENV{'PARALLEL_TMP'});
9119 ::debug("run", $Global::total_running, " processes . Starting (",
9120 $job->seq(), "): $command\n");
9121 if($opt::pipe) {
9122 my ($stdin_fh) = ::gensym();
9123 $pid = open3_setpgrp($stdin_fh,$stdout_fh,$stderr_fh,$command);
9124 if($opt::roundrobin and not $opt::keeporder) {
9125 # --keep-order will make sure the order will be reproducible
9126 ::set_fh_non_blocking($stdin_fh);
9128 $job->set_fh(0,"w",$stdin_fh);
9129 if($opt::tee or $opt::shard) { $job->set_virgin(0); }
9130 } elsif ($opt::tty and -c "/dev/tty" and
9131 open(my $devtty_fh, "<", "/dev/tty")) {
9132 # Give /dev/tty to the command if no one else is using it
9133 # The eval is needed to catch exception from open3
9134 local (*IN,*OUT,*ERR);
9135 open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!");
9136 open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!");
9137 *IN = $devtty_fh;
9138 # The eval is needed to catch exception from open3
9139 my @wrap = ('perl','-e',
9140 "eval\{setpriority\(0,0,$opt::nice\)\}\;".
9141 "exec '$Global::shell', '-c', \@ARGV");
9142 eval {
9143 $pid = ::open3("<&IN", ">&OUT", ">&ERR", @wrap, $command)
9144 || ::die_bug("open3-/dev/tty");
9147 close $devtty_fh;
9148 $job->set_virgin(0);
9149 } else {
9150 $pid = open3_setpgrp(::gensym(),$stdout_fh,$stderr_fh,$command);
9151 $job->set_virgin(0);
9153 if($pid) {
9154 # A job was started
9155 $Global::total_running++;
9156 $Global::total_started++;
9157 $job->set_pid($pid);
9158 $job->set_starttime();
9159 $Global::running{$job->pid()} = $job;
9160 if($opt::timeout) {
9161 $Global::timeoutq->insert($job);
9163 $Global::newest_job = $job;
9164 $Global::newest_starttime = ::now();
9165 return $job;
9166 } else {
9167 # No more processes
9168 ::debug("run", "Cannot spawn more jobs.\n");
9169 return undef;
9173 sub interactive_start($) {
9174 my $self = shift;
9175 my $command = $self->wrapped();
9176 if($Global::interactive) {
9177 my $answer;
9178 ::status_no_nl("$command ?...");
9180 open(my $tty_fh, "<", "/dev/tty") || ::die_bug("interactive-tty");
9181 $answer = <$tty_fh>;
9182 close $tty_fh;
9183 # Sometime we get an empty string (not even \n)
9184 # Do not know why, so let us just ignore it and try again
9185 } while(length $answer < 1);
9186 if (not ($answer =~ /^\s*y/i)) {
9187 $self->{'commandline'}->skip();
9189 } else {
9190 print $Global::original_stderr "$command\n";
9195 my $tmuxsocket;
9197 sub tmux_wrap($) {
9198 # Wrap command with tmux for session pPID
9199 # Input:
9200 # $actual_command = the actual command being run (incl ssh wrap)
9201 my $self = shift;
9202 my $actual_command = shift;
9203 # Temporary file name. Used for fifo to communicate exit val
9204 my $tmpfifo = ::tmpname("tmx");
9205 $self->add_rm($tmpfifo);
9207 if(length($tmpfifo) >=100) {
9208 ::error("tmux does not support sockets with path > 100.");
9209 ::wait_and_exit(255);
9211 if($opt::tmuxpane) {
9212 # Move the command into a pane in window 0
9213 $actual_command = $ENV{'PARALLEL_TMUX'}.' joinp -t :0 ; '.
9214 $ENV{'PARALLEL_TMUX'}.' select-layout -t :0 tiled ; '.
9215 $actual_command;
9217 my $visual_command = $self->replaced();
9218 my $title = $visual_command;
9219 if($visual_command =~ /\0/) {
9220 ::error("Command line contains NUL. tmux is confused by NUL.");
9221 ::wait_and_exit(255);
9223 # ; causes problems
9224 # ascii 194-245 annoys tmux
9225 $title =~ tr/[\011-\016;\302-\365]/ /s;
9226 $title = ::Q($title);
9228 my $l_act = length($actual_command);
9229 my $l_tit = length($title);
9230 my $l_fifo = length($tmpfifo);
9231 # The line to run contains a 118 chars extra code + the title 2x
9232 my $l_tot = 2 * $l_tit + $l_act + $l_fifo;
9234 my $quoted_space75 = ::Q(" ")x75;
9235 while($l_tit < 1000 and
9237 (890 < $l_tot and $l_tot < 1350)
9239 (9250 < $l_tot and $l_tot < 9800)
9240 )) {
9241 # tmux blocks for certain lengths:
9242 # 900 < title + command < 1200
9243 # 9250 < title + command < 9800
9244 # but only if title < 1000, so expand the title with 75 spaces
9245 # The measured lengths are:
9246 # 996 < (title + whole command) < 1127
9247 # 9331 < (title + whole command) < 9636
9248 $title .= $quoted_space75;
9249 $l_tit = length($title);
9250 $l_tot = 2 * $l_tit + $l_act + $l_fifo;
9253 my $tmux;
9254 $ENV{'PARALLEL_TMUX'} ||= "tmux";
9255 if(not $tmuxsocket) {
9256 $tmuxsocket = ::tmpname("tms");
9257 if($opt::fg) {
9258 if(not fork) {
9259 # Run tmux in the foreground
9260 # Wait for the socket to appear
9261 while (not -e $tmuxsocket) { }
9262 `$ENV{'PARALLEL_TMUX'} -S $tmuxsocket attach`;
9263 exit;
9266 ::status("See output with: $ENV{'PARALLEL_TMUX'} -S $tmuxsocket attach");
9268 $tmux = "sh -c '".
9269 $ENV{'PARALLEL_TMUX'}." -S $tmuxsocket new-session -s p$$ -d \"sleep .2\" >/dev/null 2>&1';" .
9270 $ENV{'PARALLEL_TMUX'}." -S $tmuxsocket new-window -t p$$ -n $title";
9272 ::debug("tmux", "title len:", $l_tit, " act ", $l_act, " max ",
9273 $Limits::Command::line_max_len, " tot ",
9274 $l_tot, "\n");
9276 return "mkfifo $tmpfifo && $tmux ".
9277 # Run in tmux
9280 "(".$actual_command.');'.
9281 # The triple print is needed - otherwise the testsuite fails
9282 q[ perl -e 'while($t++<3){ print $ARGV[0],"\n" }' $?h/$status >> ].$tmpfifo."&".
9283 "echo $title; echo \007Job finished at: `date`;sleep 10"
9285 # Run outside tmux
9286 # Read a / separated line: 0h/2 for csh, 2/0 for bash.
9287 # If csh the first will be 0h, so use the second as exit value.
9288 # Otherwise just use the first value as exit value.
9289 q{; exec perl -e '$/="/";$_=<>;$c=<>;unlink $ARGV; /(\d+)h/ and exit($1);exit$c' }.$tmpfifo;
9293 sub is_already_in_results($) {
9294 # Do we already have results for this job?
9295 # Returns:
9296 # $job_already_run = bool whether there is output for this or not
9297 my $job = $_[0];
9298 my $out = $job->{'commandline'}->results_out();
9299 ::debug("run", "Test ${out}stdout", -e "${out}stdout", "\n");
9300 return(-e $out."stdout" or -f $out);
9303 sub is_already_in_joblog($) {
9304 my $job = shift;
9305 return vec($Global::job_already_run,$job->seq(),1);
9308 sub set_job_in_joblog($) {
9309 my $job = shift;
9310 vec($Global::job_already_run,$job->seq(),1) = 1;
9313 sub should_be_retried($) {
9314 # Should this job be retried?
9315 # Returns
9316 # 0 - do not retry
9317 # 1 - job queued for retry
9318 my $self = shift;
9319 if (not $opt::retries) {
9320 return 0;
9322 if(not $self->exitstatus() and not $self->exitsignal()) {
9323 # Completed with success. If there is a recorded failure: forget it
9324 $self->reset_failed_here();
9325 return 0;
9326 } else {
9327 # The job failed. Should it be retried?
9328 $self->add_failed_here();
9329 my $retries = $self->{'commandline'}->
9330 replace_placeholders([$opt::retries],0,0);
9331 if($self->total_failed() == $retries) {
9332 # This has been retried enough
9333 return 0;
9334 } else {
9335 # This command should be retried
9336 $self->set_endtime(undef);
9337 $self->reset_exitstatus();
9338 $Global::JobQueue->unget($self);
9339 ::debug("run", "Retry ", $self->seq(), "\n");
9340 return 1;
9346 my (%print_later,$job_seq_to_print);
9348 sub print_earlier_jobs($) {
9349 # Print jobs whose output is postponed due to --keep-order
9350 # Returns: N/A
9351 my $job = shift;
9352 $print_later{$job->seq()} = $job;
9353 $job_seq_to_print ||= 1;
9354 my $returnsize = 0;
9355 ::debug("run", "Looking for: $job_seq_to_print ",
9356 "This: ", $job->seq(), "\n");
9357 for(;vec($Global::job_already_run,$job_seq_to_print,1);
9358 $job_seq_to_print++) {}
9359 while(my $j = $print_later{$job_seq_to_print}) {
9360 $returnsize += $j->print();
9361 if($j->endtime()) {
9362 # Job finished - look at the next
9363 delete $print_later{$job_seq_to_print};
9364 $job_seq_to_print++;
9365 next;
9366 } else {
9367 # Job not finished yet - look at it again next round
9368 last;
9371 return $returnsize;
9375 sub print($) {
9376 # Print the output of the jobs
9377 # Returns: N/A
9379 my $self = shift;
9380 ::debug("print", ">>joboutput ", $self->replaced(), "\n");
9381 if($opt::dryrun) {
9382 # Nothing was printed to this job:
9383 # cleanup tmp files if --files was set
9384 ::rm($self->fh(1,"name"));
9386 if($opt::pipe and $self->virgin() and not $opt::tee) {
9387 # Skip --joblog, --dryrun, --verbose
9388 } else {
9389 if($opt::ungroup) {
9390 # NULL returnsize = 0 returnsize
9391 $self->returnsize() or $self->add_returnsize(0);
9392 if($Global::joblog and defined $self->{'exitstatus'}) {
9393 # Add to joblog when finished
9394 $self->print_joblog();
9395 # Printing is only relevant for grouped/--line-buffer output.
9396 $opt::ungroup and return;
9400 # Check for disk full
9401 ::exit_if_disk_full();
9404 my $returnsize = $self->returnsize();
9405 for my $fdno (sort { $a <=> $b } keys %Global::fd) {
9406 # Sort by file descriptor numerically: 1,2,3,..,9,10,11
9407 $fdno == 0 and next;
9408 my $out_fd = $Global::fd{$fdno};
9409 my $in_fh = $self->fh($fdno,"r");
9410 if(not $in_fh) {
9411 if(not $Job::file_descriptor_warning_printed{$fdno}++) {
9412 # ::warning("File descriptor $fdno not defined\n");
9414 next;
9416 ::debug("print", "File descriptor $fdno (", $self->fh($fdno,"name"), "):\n");
9417 if($opt::linebuffer) {
9418 # Line buffered print out
9419 $self->print_linebuffer($fdno,$in_fh,$out_fd);
9420 } elsif($opt::files) {
9421 $self->print_files($fdno,$in_fh,$out_fd);
9422 } elsif($opt::tag or defined $opt::tagstring) {
9423 $self->print_tag($fdno,$in_fh,$out_fd);
9424 } else {
9425 $self->print_normal($fdno,$in_fh,$out_fd);
9427 flush $out_fd;
9429 ::debug("print", "<<joboutput\n");
9430 if(defined $self->{'exitstatus'}
9431 and not ($self->virgin() and $opt::pipe)) {
9432 if($Global::joblog and not $opt::sqlworker) {
9433 # Add to joblog when finished
9434 $self->print_joblog();
9436 if($opt::sqlworker and not $opt::results) {
9437 $Global::sql->output($self);
9439 if($Global::csvsep) {
9440 # Add output to CSV when finished
9441 $self->print_csv();
9444 return $returnsize - $self->returnsize();
9448 my $header_printed;
9450 sub print_csv($) {
9451 my $self = shift;
9452 my $cmd;
9453 if($Global::verbose <= 1) {
9454 $cmd = $self->replaced();
9455 } else {
9456 # Verbose level > 1: Print the rsync and stuff
9457 $cmd = join " ", @{$self->{'commandline'}};
9459 my $record_ref = $self->{'commandline'}{'arg_list_flat_orig'};
9461 if(not $header_printed) {
9462 # Variable headers
9463 # Normal => V1..Vn
9464 # --header : => first value from column
9465 my @V;
9466 if($opt::header) {
9467 my $i = 1;
9468 @V = (map { $Global::input_source_header{$i++} }
9469 @$record_ref[1..$#$record_ref]);
9470 } else {
9471 my $V = "V1";
9472 @V = (map { $V++ } @$record_ref[1..$#$record_ref]);
9474 print $Global::csv_fh
9475 (map { $$_ }
9476 combine_ref("Seq", "Host", "Starttime", "JobRuntime",
9477 "Send", "Receive", "Exitval", "Signal", "Command",
9479 "Stdout","Stderr"
9480 )),"\n";
9481 $header_printed++;
9483 # Memory optimization: Overwrite with the joined output
9484 $self->{'output'}{1} = join("", @{$self->{'output'}{1}});
9485 $self->{'output'}{2} = join("", @{$self->{'output'}{2}});
9486 print $Global::csv_fh
9487 (map { $$_ }
9488 combine_ref
9489 ($self->seq(),
9490 $self->sshlogin()->string(),
9491 $self->starttime(), sprintf("%0.3f",$self->runtime()),
9492 $self->transfersize(), $self->returnsize(),
9493 $self->exitstatus(), $self->exitsignal(), \$cmd,
9494 \@$record_ref[1..$#$record_ref],
9495 \$self->{'output'}{1},
9496 \$self->{'output'}{2})),"\n";
9500 sub combine_ref($) {
9501 # Inspired by Text::CSV_PP::_combine (by Makamaka Hannyaharamitu)
9502 my @part = @_;
9503 my $sep = $Global::csvsep;
9504 my $quot = '"';
9505 my @out = ();
9507 my $must_be_quoted;
9508 for my $column (@part) {
9509 # Memory optimization: Content transferred as reference
9510 if(ref $column ne "SCALAR") {
9511 # Convert all columns to scalar references
9512 my $v = $column;
9513 $column = \$v;
9515 if(not defined $$column) {
9516 $$column = '';
9517 next;
9520 $must_be_quoted = 0;
9522 if($$column =~ s/$quot/$quot$quot/go){
9523 # Replace " => ""
9524 $must_be_quoted ||=1;
9526 if($$column =~ /[\s\Q$sep\E]/o){
9527 # Put quotes around if the column contains ,
9528 $must_be_quoted ||=1;
9531 $Global::use{"bytes"} ||= eval "use bytes; 1;";
9532 if ($$column =~ /\0/) {
9533 # Contains \0 => put quotes around
9534 $must_be_quoted ||=1;
9536 if($must_be_quoted){
9537 push @out, \$sep, \$quot, $column, \$quot;
9538 } else {
9539 push @out, \$sep, $column;
9542 # Pop off a $sep
9543 shift @out;
9544 return @out;
9547 sub print_files($) {
9548 # Print the name of the file containing stdout on stdout
9549 # Uses:
9550 # $opt::pipe
9551 # $opt::group = Print when job is done
9552 # $opt::linebuffer = Print ASAP
9553 # Returns: N/A
9554 my $self = shift;
9555 my ($fdno,$in_fh,$out_fd) = @_;
9557 # If the job is dead: close printing fh. Needed for --compress
9558 close $self->fh($fdno,"w");
9559 if($? and $opt::compress) {
9560 ::error($opt::compress_program." failed.");
9561 $self->set_exitstatus(255);
9563 if($opt::compress) {
9564 # Kill the decompressor which will not be needed
9565 CORE::kill "TERM", $self->fh($fdno,"rpid");
9567 close $in_fh;
9569 if($opt::pipe and $self->virgin()) {
9570 # Nothing was printed to this job:
9571 # cleanup unused tmp files because --files was set
9572 for my $fdno (1,2) {
9573 ::rm($self->fh($fdno,"name"));
9574 ::rm($self->fh($fdno,"unlink"));
9576 } elsif($fdno == 1 and $self->fh($fdno,"name")) {
9577 print $out_fd $self->tag(),$self->fh($fdno,"name"),"\n";
9578 if($Global::membuffer) {
9579 push @{$self->{'output'}{$fdno}},
9580 $self->tag(), $self->fh($fdno,"name");
9582 $self->add_returnsize(-s $self->fh($fdno,"name"));
9583 # Mark as printed - do not print again
9584 $self->set_fh($fdno,"name",undef);
9588 sub print_linebuffer($) {
9589 my $self = shift;
9590 my ($fdno,$in_fh,$out_fd) = @_;
9591 if(defined $self->{'exitstatus'}) {
9592 # If the job is dead: close printing fh. Needed for --compress
9593 close $self->fh($fdno,"w");
9594 if($? and $opt::compress) {
9595 ::error($opt::compress_program." failed.");
9596 $self->set_exitstatus(255);
9598 if($opt::compress) {
9599 # Blocked reading in final round
9600 for my $fdno (1,2) {
9601 ::set_fh_blocking($self->fh($fdno,'r'));
9605 if(not $self->virgin()) {
9606 if($opt::files or ($opt::results and not $Global::csvsep)) {
9607 # Print filename
9608 if($fdno == 1 and not $self->fh($fdno,"printed")) {
9609 print $out_fd $self->tag(),$self->fh($fdno,"name"),"\n";
9610 if($Global::membuffer) {
9611 push(@{$self->{'output'}{$fdno}}, $self->tag(),
9612 $self->fh($fdno,"name"));
9614 $self->set_fh($fdno,"printed",1);
9616 # No need for reading $in_fh, as it is from "cat >/dev/null"
9617 } else {
9618 # Read halflines and print full lines
9619 my $outputlength = 0;
9620 my $halfline_ref = $self->{'halfline'}{$fdno};
9621 my ($buf,$i,$rv);
9622 # 1310720 gives 1.2 GB/s
9623 # 131072 gives 0.9 GB/s
9624 while($rv = sysread($in_fh, $buf,1310720)) {
9625 $outputlength += $rv;
9626 # TODO --recend
9627 # Treat both \n and \r as line end
9628 $i = (rindex($buf,"\n")+1) || (rindex($buf,"\r")+1);
9629 if($i) {
9630 # One or more complete lines were found
9631 if($opt::tag or defined $opt::tagstring) {
9632 # Replace ^ with $tag within the full line
9633 my $tag = $self->tag();
9634 # TODO --recend that can be partially in @$halfline_ref
9635 substr($buf,0,$i-1) =~ s/(?<=[\n\r])/$tag/gm;
9636 # The length changed, so find the new ending pos
9637 $i = (rindex($buf,"\n")+1) || (rindex($buf,"\r")+1);
9638 unshift @$halfline_ref, $tag;
9640 # Print the partial line (halfline) and the last half
9641 print $out_fd @$halfline_ref, substr($buf,0,$i);
9642 # Buffer in memory for SQL and CSV-output
9643 if($Global::membuffer) {
9644 push(@{$self->{'output'}{$fdno}},
9645 @$halfline_ref, substr($buf,0,$i));
9647 # Remove the printed part by keeping the unprinted part
9648 @$halfline_ref = (substr($buf,$i));
9649 } else {
9650 # No newline, so append to the halfline
9651 push @$halfline_ref, $buf;
9654 $self->add_returnsize($outputlength);
9656 if(defined $self->{'exitstatus'}) {
9657 if($opt::files or ($opt::results and not $Global::csvsep)) {
9658 $self->add_returnsize(-s $self->fh($fdno,"name"));
9659 } else {
9660 # If the job is dead: print the remaining partial line
9661 # read remaining
9662 my $halfline_ref = $self->{'halfline'}{$fdno};
9663 if(grep /./, @$halfline_ref) {
9664 my $returnsize = 0;
9665 for(@{$self->{'halfline'}{$fdno}}) {
9666 $returnsize += length $_;
9668 $self->add_returnsize($returnsize);
9669 if($opt::tag or defined $opt::tagstring) {
9670 # Prepend $tag the the remaining half line
9671 unshift @$halfline_ref, $self->tag();
9673 # Print the partial line (halfline)
9674 print $out_fd @{$self->{'halfline'}{$fdno}};
9675 # Buffer in memory for SQL and CSV-output
9676 if($Global::membuffer) {
9677 push(@{$self->{'output'}{$fdno}}, @$halfline_ref);
9679 @$halfline_ref = ();
9682 if($self->fh($fdno,"rpid") and
9683 CORE::kill 0, $self->fh($fdno,"rpid")) {
9684 # decompress still running
9685 } else {
9686 # decompress done: close fh
9687 close $in_fh;
9688 if($? and $opt::compress) {
9689 ::error($opt::decompress_program." failed.");
9690 $self->set_exitstatus(255);
9697 sub print_tag(@) {
9698 return print_normal(@_);
9701 sub free_ressources() {
9702 my $self = shift;
9703 if(not $opt::ungroup) {
9704 for my $fdno (sort { $a <=> $b } keys %Global::fd) {
9705 close $self->fh($fdno,"w");
9706 close $self->fh($fdno,"r");
9711 sub print_normal($) {
9712 my $self = shift;
9713 my ($fdno,$in_fh,$out_fd) = @_;
9714 my $buf;
9715 close $self->fh($fdno,"w");
9716 if($? and $opt::compress) {
9717 ::error($opt::compress_program." failed.");
9718 $self->set_exitstatus(255);
9720 if(not $self->virgin()) {
9721 seek $in_fh, 0, 0;
9722 # $in_fh is now ready for reading at position 0
9723 my $outputlength = 0;
9724 my @output;
9726 if($opt::tag or $opt::tagstring) {
9727 # Read line by line
9728 local $/ = "\n";
9729 my $tag = $self->tag();
9730 while(<$in_fh>) {
9731 print $out_fd $tag,$_;
9732 $outputlength += length $_;
9733 if($Global::membuffer) {
9734 push @{$self->{'output'}{$fdno}}, $tag, $_;
9737 } else {
9738 while(sysread($in_fh,$buf,131072)) {
9739 print $out_fd $buf;
9740 $outputlength += length $buf;
9741 if($Global::membuffer) {
9742 push @{$self->{'output'}{$fdno}}, $buf;
9746 if($fdno == 1) {
9747 $self->add_returnsize($outputlength);
9749 close $in_fh;
9750 if($? and $opt::compress) {
9751 ::error($opt::decompress_program." failed.");
9752 $self->set_exitstatus(255);
9757 sub print_joblog($) {
9758 my $self = shift;
9759 my $cmd;
9760 if($Global::verbose <= 1) {
9761 $cmd = $self->replaced();
9762 } else {
9763 # Verbose level > 1: Print the rsync and stuff
9764 $cmd = join " ", @{$self->{'commandline'}};
9766 # Newlines make it hard to parse the joblog
9767 $cmd =~ s/\n/\0/g;
9768 print $Global::joblog
9769 join("\t", $self->seq(), $self->sshlogin()->string(),
9770 $self->starttime(), sprintf("%10.3f",$self->runtime()),
9771 $self->transfersize(), $self->returnsize(),
9772 $self->exitstatus(), $self->exitsignal(), $cmd
9773 ). "\n";
9774 flush $Global::joblog;
9775 $self->set_job_in_joblog();
9778 sub tag($) {
9779 my $self = shift;
9780 if(not defined $self->{'tag'}) {
9781 if($opt::tag or defined $opt::tagstring) {
9782 $self->{'tag'} = $self->{'commandline'}->
9783 replace_placeholders([$opt::tagstring],0,0)."\t";
9784 } else {
9785 $self->{'tag'} = "";
9788 return $self->{'tag'};
9791 sub hostgroups($) {
9792 my $self = shift;
9793 if(not defined $self->{'hostgroups'}) {
9794 $self->{'hostgroups'} =
9795 $self->{'commandline'}->{'arg_list'}[0][0]->{'hostgroups'};
9797 return @{$self->{'hostgroups'}};
9800 sub exitstatus($) {
9801 my $self = shift;
9802 return $self->{'exitstatus'};
9805 sub set_exitstatus($$) {
9806 my $self = shift;
9807 my $exitstatus = shift;
9808 if($exitstatus) {
9809 # Overwrite status if non-zero
9810 $self->{'exitstatus'} = $exitstatus;
9811 } else {
9812 # Set status but do not overwrite
9813 # Status may have been set by --timeout
9814 $self->{'exitstatus'} ||= $exitstatus;
9816 $opt::sqlworker and
9817 $Global::sql->update("SET Exitval = ? WHERE Seq = ".$self->seq(),
9818 $exitstatus);
9821 sub reset_exitstatus($) {
9822 my $self = shift;
9823 undef $self->{'exitstatus'};
9826 sub exitsignal($) {
9827 my $self = shift;
9828 return $self->{'exitsignal'};
9831 sub set_exitsignal($$) {
9832 my $self = shift;
9833 my $exitsignal = shift;
9834 $self->{'exitsignal'} = $exitsignal;
9835 $opt::sqlworker and
9836 $Global::sql->update("SET _Signal = ? WHERE Seq = ".$self->seq(),
9837 $exitsignal);
9841 my $status_printed;
9842 my $total_jobs;
9844 sub should_we_halt {
9845 # Should we halt? Immediately? Gracefully?
9846 # Returns: N/A
9847 my $job = shift;
9848 my $limit;
9849 if($job->exitstatus() or $job->exitsignal()) {
9850 # Job failed
9851 $Global::exitstatus++;
9852 $Global::total_failed++;
9853 if($Global::halt_fail) {
9854 ::status("$Global::progname: This job failed:",
9855 $job->replaced());
9856 $limit = $Global::total_failed;
9858 } elsif($Global::halt_success) {
9859 ::status("$Global::progname: This job succeeded:",
9860 $job->replaced());
9861 $limit = $Global::total_completed - $Global::total_failed;
9863 if($Global::halt_done) {
9864 ::status("$Global::progname: This job finished:",
9865 $job->replaced());
9866 $limit = $Global::total_completed;
9868 if(not defined $limit) {
9869 return ""
9871 # --halt # => 1..100 (number of jobs failed, 101 means > 100)
9872 # --halt % => 1..100 (pct of jobs failed)
9873 if($Global::halt_pct and not $Global::halt_count) {
9874 $total_jobs ||= $Global::JobQueue->total_jobs();
9875 # From the pct compute the number of jobs that must fail/succeed
9876 $Global::halt_count = $total_jobs * $Global::halt_pct;
9878 if($limit >= $Global::halt_count) {
9879 # At least N jobs have failed/succeded/completed
9880 # or at least N% have failed/succeded/completed
9881 # So we should prepare for exit
9882 if($Global::halt_fail or $Global::halt_done) {
9883 # Set exit status
9884 if(not defined $Global::halt_exitstatus) {
9885 if($Global::halt_pct) {
9886 # --halt now,fail=X% or soon,fail=X%
9887 # --halt now,done=X% or soon,done=X%
9888 $Global::halt_exitstatus =
9889 ::ceil($Global::total_failed / $total_jobs * 100);
9890 } elsif($Global::halt_count) {
9891 # --halt now,fail=X or soon,fail=X
9892 # --halt now,done=X or soon,done=X
9893 $Global::halt_exitstatus =
9894 ::min($Global::total_failed,101);
9896 if($Global::halt_count and $Global::halt_count == 1) {
9897 # --halt now,fail=1 or soon,fail=1
9898 # --halt now,done=1 or soon,done=1
9899 # Emulate Bash's +128 if there is a signal
9900 $Global::halt_exitstatus =
9901 ($job->exitstatus()
9903 $job->exitsignal() ? $job->exitsignal() + 128 : 0);
9906 ::debug("halt","Pct: ",$Global::halt_pct,
9907 " count: ",$Global::halt_count,
9908 " status: ",$Global::halt_exitstatus,"\n");
9909 } elsif($Global::halt_success) {
9910 $Global::halt_exitstatus = 0;
9912 if($Global::halt_when eq "soon"
9914 (scalar(keys %Global::running) > 0
9916 $Global::max_jobs_running == 1)) {
9917 ::status
9918 ("$Global::progname: Starting no more jobs. ".
9919 "Waiting for ". (keys %Global::running).
9920 " jobs to finish.");
9921 $Global::start_no_new_jobs ||= 1;
9923 return($Global::halt_when);
9925 return "";
9930 package CommandLine;
9932 sub new($) {
9933 my $class = shift;
9934 my $seq = shift;
9935 my $commandref = shift;
9936 $commandref || die;
9937 my $arg_queue = shift;
9938 my $context_replace = shift;
9939 my $max_number_of_args = shift; # for -N and normal (-n1)
9940 my $transfer_files = shift;
9941 my $return_files = shift;
9942 my $replacecount_ref = shift;
9943 my $len_ref = shift;
9944 my %replacecount = %$replacecount_ref;
9945 my %len = %$len_ref;
9946 for (keys %$replacecount_ref) {
9947 # Total length of this replacement string {} replaced with all args
9948 $len{$_} = 0;
9950 return bless {
9951 'command' => $commandref,
9952 'seq' => $seq,
9953 'len' => \%len,
9954 'arg_list' => [],
9955 'arg_list_flat' => [],
9956 'arg_list_flat_orig' => [undef],
9957 'arg_queue' => $arg_queue,
9958 'max_number_of_args' => $max_number_of_args,
9959 'replacecount' => \%replacecount,
9960 'context_replace' => $context_replace,
9961 'transfer_files' => $transfer_files,
9962 'return_files' => $return_files,
9963 'replaced' => undef,
9964 }, ref($class) || $class;
9967 sub seq($) {
9968 my $self = shift;
9969 return $self->{'seq'};
9972 sub set_seq($$) {
9973 my $self = shift;
9974 $self->{'seq'} = shift;
9977 sub slot($) {
9978 # Find the number of a free job slot and return it
9979 # Uses:
9980 # @Global::slots - list with free jobslots
9981 # Returns:
9982 # $jobslot = number of jobslot
9983 my $self = shift;
9984 if(not $self->{'slot'}) {
9985 if(not @Global::slots) {
9986 # $max_slot_number will typically be $Global::max_jobs_running
9987 push @Global::slots, ++$Global::max_slot_number;
9989 $self->{'slot'} = shift @Global::slots;
9991 return $self->{'slot'};
9995 my $already_spread;
9997 sub populate($) {
9998 # Add arguments from arg_queue until the number of arguments or
9999 # max line length is reached
10000 # Uses:
10001 # $Global::minimal_command_line_length
10002 # $opt::cat
10003 # $opt::fifo
10004 # $Global::JobQueue
10005 # $opt::m
10006 # $opt::X
10007 # $Global::max_jobs_running
10008 # Returns: N/A
10009 my $self = shift;
10010 my $next_arg;
10011 my $max_len = $Global::minimal_command_line_length
10012 || Limits::Command::max_length();
10014 if($opt::cat or $opt::fifo) {
10015 # Get the empty arg added by --pipepart (if any)
10016 $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get();
10017 # $PARALLEL_TMP will point to a tempfile that will be used as {}
10018 $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->
10019 unget([Arg->new('$PARALLEL_TMP')]);
10021 while (not $self->{'arg_queue'}->empty()) {
10022 $next_arg = $self->{'arg_queue'}->get();
10023 if(not defined $next_arg) {
10024 next;
10026 $self->push($next_arg);
10027 if($self->len() >= $max_len) {
10028 # Command length is now > max_length
10029 # If there are arguments: remove the last
10030 # If there are no arguments: Error
10031 # TODO stuff about -x opt_x
10032 if($self->number_of_args() > 1) {
10033 # There is something to work on
10034 $self->{'arg_queue'}->unget($self->pop());
10035 last;
10036 } else {
10037 my $args = join(" ", map { $_->orig() } @$next_arg);
10038 ::error("Command line too long (".
10039 $self->len(). " >= ".
10040 $max_len.
10041 ") at input ".
10042 $self->{'arg_queue'}->arg_number().
10043 ": ".
10044 ((length $args > 50) ?
10045 (substr($args,0,50))."..." :
10046 $args));
10047 $self->{'arg_queue'}->unget($self->pop());
10048 ::wait_and_exit(255);
10052 if(defined $self->{'max_number_of_args'}) {
10053 if($self->number_of_args() >= $self->{'max_number_of_args'}) {
10054 last;
10058 if(($opt::m or $opt::X) and not $already_spread
10059 and $self->{'arg_queue'}->empty() and $Global::max_jobs_running) {
10060 # -m or -X and EOF => Spread the arguments over all jobslots
10061 # (unless they are already spread)
10062 $already_spread ||= 1;
10063 if($self->number_of_args() > 1) {
10064 $self->{'max_number_of_args'} =
10065 ::ceil($self->number_of_args()/$Global::max_jobs_running);
10066 $Global::JobQueue->{'commandlinequeue'}->{'max_number_of_args'} =
10067 $self->{'max_number_of_args'};
10068 $self->{'arg_queue'}->unget($self->pop_all());
10069 while($self->number_of_args() < $self->{'max_number_of_args'}) {
10070 $self->push($self->{'arg_queue'}->get());
10073 $Global::JobQueue->flush_total_jobs();
10076 if($opt::sqlmaster) {
10077 # Insert the V1..Vn for this $seq in SQL table instead of generating one
10078 $Global::sql->insert_records($self->seq(), $self->{'command'},
10079 $self->{'arg_list_flat_orig'});
10084 sub push($) {
10085 # Add one or more records as arguments
10086 # Returns: N/A
10087 my $self = shift;
10088 my $record = shift;
10089 push @{$self->{'arg_list_flat_orig'}}, map { $_->orig() } @$record;
10090 push @{$self->{'arg_list_flat'}}, @$record;
10091 push @{$self->{'arg_list'}}, $record;
10092 # Make @arg available for {= =}
10093 *Arg::arg = $self->{'arg_list_flat_orig'};
10095 my $quote_arg = $Global::noquote ? 0 : not $Global::quoting;
10096 for my $perlexpr (keys %{$self->{'replacecount'}}) {
10097 if($perlexpr =~ /^(\d+) /) {
10098 # Positional
10099 defined($record->[$1-1]) or next;
10100 $self->{'len'}{$perlexpr} +=
10101 length $record->[$1-1]->replace($perlexpr,$quote_arg,$self);
10102 } else {
10103 for my $arg (@$record) {
10104 if(defined $arg) {
10105 $self->{'len'}{$perlexpr} +=
10106 length $arg->replace($perlexpr,$quote_arg,$self);
10113 sub pop($) {
10114 # Remove last argument
10115 # Returns:
10116 # the last record
10117 my $self = shift;
10118 my $record = pop @{$self->{'arg_list'}};
10119 # pop off arguments from @$record
10120 splice @{$self->{'arg_list_flat_orig'}}, -($#$record+1), $#$record+1;
10121 splice @{$self->{'arg_list_flat'}}, -($#$record+1), $#$record+1;
10122 my $quote_arg = $Global::noquote ? 0 : not $Global::quoting;
10123 for my $perlexpr (keys %{$self->{'replacecount'}}) {
10124 if($perlexpr =~ /^(\d+) /) {
10125 # Positional
10126 defined($record->[$1-1]) or next;
10127 $self->{'len'}{$perlexpr} -=
10128 length $record->[$1-1]->replace($perlexpr,$quote_arg,$self);
10129 } else {
10130 for my $arg (@$record) {
10131 if(defined $arg) {
10132 $self->{'len'}{$perlexpr} -=
10133 length $arg->replace($perlexpr,$quote_arg,$self);
10138 return $record;
10141 sub pop_all($) {
10142 # Remove all arguments and zeros the length of replacement perlexpr
10143 # Returns:
10144 # all records
10145 my $self = shift;
10146 my @popped = @{$self->{'arg_list'}};
10147 for my $perlexpr (keys %{$self->{'replacecount'}}) {
10148 $self->{'len'}{$perlexpr} = 0;
10150 $self->{'arg_list'} = [];
10151 $self->{'arg_list_flat_orig'} = [undef];
10152 $self->{'arg_list_flat'} = [];
10153 return @popped;
10156 sub number_of_args($) {
10157 # The number of records
10158 # Returns:
10159 # number of records
10160 my $self = shift;
10161 # This is really the number of records
10162 return $#{$self->{'arg_list'}}+1;
10165 sub number_of_recargs($) {
10166 # The number of args in records
10167 # Returns:
10168 # number of args records
10169 my $self = shift;
10170 my $sum = 0;
10171 my $nrec = scalar @{$self->{'arg_list'}};
10172 if($nrec) {
10173 $sum = $nrec * (scalar @{$self->{'arg_list'}[0]});
10175 return $sum;
10178 sub args_as_string($) {
10179 # Returns:
10180 # all unmodified arguments joined with ' ' (similar to {})
10181 my $self = shift;
10182 return (join " ", map { $_->orig() }
10183 map { @$_ } @{$self->{'arg_list'}});
10186 sub results_out($) {
10187 sub max_file_name_length {
10188 # Figure out the max length of a subdir
10189 # TODO and the max total length
10190 # Ext4 = 255,130816
10191 # Uses:
10192 # $Global::max_file_length is set
10193 # Returns:
10194 # $Global::max_file_length
10195 my $testdir = shift;
10197 my $upper = 100_000_000;
10198 # Dir length of 8 chars is supported everywhere
10199 my $len = 8;
10200 my $dir = "x"x$len;
10201 do {
10202 rmdir($testdir."/".$dir);
10203 $len *= 16;
10204 $dir = "x"x$len;
10205 } while ($len < $upper and mkdir $testdir."/".$dir);
10206 # Then search for the actual max length between $len/16 and $len
10207 my $min = $len/16;
10208 my $max = $len;
10209 while($max-$min > 5) {
10210 # If we are within 5 chars of the exact value:
10211 # it is not worth the extra time to find the exact value
10212 my $test = int(($min+$max)/2);
10213 $dir = "x"x$test;
10214 if(mkdir $testdir."/".$dir) {
10215 rmdir($testdir."/".$dir);
10216 $min = $test;
10217 } else {
10218 $max = $test;
10221 $Global::max_file_length = $min;
10222 return $min;
10225 my $self = shift;
10226 my $out = $self->replace_placeholders([$opt::results],0,0);
10227 if($out eq $opt::results) {
10228 # $opt::results simple string: Append args_as_dirname
10229 my $args_as_dirname = $self->args_as_dirname();
10230 # Output in: prefix/name1/val1/name2/val2/stdout
10231 $out = $opt::results."/".$args_as_dirname;
10232 if(-d $out or eval{ File::Path::mkpath($out); }) {
10233 # OK
10234 } else {
10235 # mkpath failed: Argument probably too long.
10236 # Set $Global::max_file_length, which will keep the individual
10237 # dir names shorter than the max length
10238 max_file_name_length($opt::results);
10239 $args_as_dirname = $self->args_as_dirname();
10240 # prefix/name1/val1/name2/val2/
10241 $out = $opt::results."/".$args_as_dirname;
10242 File::Path::mkpath($out);
10244 $out .="/";
10245 } else {
10246 if($out =~ m:/$:) {
10247 # / = dir
10248 if(-d $out or eval{ File::Path::mkpath($out); }) {
10249 # OK
10250 } else {
10251 ::error("Cannot make dir '$out'.");
10252 ::wait_and_exit(255);
10254 } else {
10255 $out =~ m:(.*)/:;
10256 File::Path::mkpath($1);
10259 return $out;
10262 sub args_as_dirname($) {
10263 # Returns:
10264 # all unmodified arguments joined with '/' (similar to {})
10265 # \t \0 \\ and / are quoted as: \t \0 \\ \_
10266 # If $Global::max_file_length: Keep subdirs < $Global::max_file_length
10267 my $self = shift;
10268 my @res = ();
10270 for my $rec_ref (@{$self->{'arg_list'}}) {
10271 # If headers are used, sort by them.
10272 # Otherwise keep the order from the command line.
10273 my @header_indexes_sorted = header_indexes_sorted($#$rec_ref+1);
10274 for my $n (@header_indexes_sorted) {
10275 CORE::push(@res,
10276 $Global::input_source_header{$n},
10277 map { my $s = $_;
10278 # \t \0 \\ and / are quoted as: \t \0 \\ \_
10279 $s =~ s/\\/\\\\/g;
10280 $s =~ s/\t/\\t/g;
10281 $s =~ s/\0/\\0/g;
10282 $s =~ s:/:\\_:g;
10283 if($Global::max_file_length) {
10284 # Keep each subdir shorter than the longest
10285 # allowed file name
10286 $s = substr($s,0,$Global::max_file_length);
10288 $s; }
10289 $rec_ref->[$n-1]->orig());
10292 return join "/", @res;
10295 sub header_indexes_sorted($) {
10296 # Sort headers first by number then by name.
10297 # E.g.: 1a 1b 11a 11b
10298 # Returns:
10299 # Indexes of %Global::input_source_header sorted
10300 my $max_col = shift;
10302 no warnings 'numeric';
10303 for my $col (1 .. $max_col) {
10304 # Make sure the header is defined. If it is not: use column number
10305 if(not defined $Global::input_source_header{$col}) {
10306 $Global::input_source_header{$col} = $col;
10309 my @header_indexes_sorted = sort {
10310 # Sort headers numerically then asciibetically
10311 $Global::input_source_header{$a} <=> $Global::input_source_header{$b}
10313 $Global::input_source_header{$a} cmp $Global::input_source_header{$b}
10314 } 1 .. $max_col;
10315 return @header_indexes_sorted;
10318 sub len($) {
10319 # Uses:
10320 # @opt::shellquote
10321 # The length of the command line with args substituted
10322 my $self = shift;
10323 my $len = 0;
10324 # Add length of the original command with no args
10325 # Length of command w/ all replacement args removed
10326 $len += $self->{'len'}{'noncontext'} + @{$self->{'command'}} -1;
10327 ::debug("length", "noncontext + command: $len\n");
10328 my $recargs = $self->number_of_recargs();
10329 if($self->{'context_replace'}) {
10330 # Context is duplicated for each arg
10331 $len += $recargs * $self->{'len'}{'context'};
10332 for my $replstring (keys %{$self->{'replacecount'}}) {
10333 # If the replacements string is more than once: mulitply its length
10334 $len += $self->{'len'}{$replstring} *
10335 $self->{'replacecount'}{$replstring};
10336 ::debug("length", $replstring, " ", $self->{'len'}{$replstring}, "*",
10337 $self->{'replacecount'}{$replstring}, "\n");
10339 # echo 11 22 33 44 55 66 77 88 99 1010
10340 # echo 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10
10341 # 5 + ctxgrp*arg
10342 ::debug("length", "Ctxgrp: ", $self->{'len'}{'contextgroups'},
10343 " Groups: ", $self->{'len'}{'noncontextgroups'}, "\n");
10344 # Add space between context groups
10345 $len += ($recargs-1) * ($self->{'len'}{'contextgroups'});
10346 } else {
10347 # Each replacement string may occur several times
10348 # Add the length for each time
10349 $len += 1*$self->{'len'}{'context'};
10350 ::debug("length", "context+noncontext + command: $len\n");
10351 for my $replstring (keys %{$self->{'replacecount'}}) {
10352 # (space between regargs + length of replacement)
10353 # * number this replacement is used
10354 $len += ($recargs -1 + $self->{'len'}{$replstring}) *
10355 $self->{'replacecount'}{$replstring};
10358 if(defined $Global::parallel_env) {
10359 # If we are using --env, add the prefix for that, too.
10360 $len += length $Global::parallel_env;
10362 if($Global::quoting) {
10363 # Pessimistic length if -q is set
10364 # Worse than worst case: ' => "'" + " => '"'
10365 # TODO can we count the number of expanding chars?
10366 # and count them in arguments, too?
10367 $len *= 3;
10369 if(@opt::shellquote) {
10370 # Pessimistic length if --shellquote is set
10371 # Worse than worst case: ' => "'"
10372 for(@opt::shellquote) {
10373 $len *= 3;
10375 $len *= 5;
10377 if(@opt::sshlogin) {
10378 # Pessimistic length if remote
10379 # Worst case is BASE64 encoding 3 bytes -> 4 bytes
10380 $len = int($len*4/3);
10383 return $len;
10386 sub replaced($) {
10387 # Uses:
10388 # $Global::noquote
10389 # $Global::quoting
10390 # Returns:
10391 # $replaced = command with place holders replaced and prepended
10392 my $self = shift;
10393 if(not defined $self->{'replaced'}) {
10394 # Don't quote arguments if the input is the full command line
10395 my $quote_arg = $Global::noquote ? 0 : not $Global::quoting;
10396 # or if ($opt::cat or $opt::pipe) as they use $PARALLEL_TMP
10397 $quote_arg = ($opt::cat || $opt::fifo) ? 0 : $quote_arg;
10398 $self->{'replaced'} = $self->
10399 replace_placeholders($self->{'command'},$Global::quoting,
10400 $quote_arg);
10401 my $len = length $self->{'replaced'};
10402 if ($len != $self->len()) {
10403 ::debug("length", $len, " != ", $self->len(),
10404 " ", $self->{'replaced'}, "\n");
10405 } else {
10406 ::debug("length", $len, " == ", $self->len(),
10407 " ", $self->{'replaced'}, "\n");
10410 return $self->{'replaced'};
10413 sub replace_placeholders($$$$) {
10414 # Replace foo{}bar with fooargbar
10415 # Input:
10416 # $targetref = command as shell words
10417 # $quote = should everything be quoted?
10418 # $quote_arg = should replaced arguments be quoted?
10419 # Uses:
10420 # @Arg::arg = arguments as strings to be use in {= =}
10421 # Returns:
10422 # @target with placeholders replaced
10423 my $self = shift;
10424 my $targetref = shift;
10425 my $quote = shift;
10426 my $quote_arg = shift;
10427 my %replace;
10429 # Token description:
10430 # \0spc = unquoted space
10431 # \0end = last token element
10432 # \0ign = dummy token to be ignored
10433 # \257<...\257> = replacement expression
10434 # " " = quoted space, that splits -X group
10435 # text = normal text - possibly part of -X group
10436 my $spacer = 0;
10437 my @tokens = grep { length $_ > 0 } map {
10438 if(/^\257<|^ $/) {
10439 # \257<...\257> or space
10441 } else {
10442 # Split each space/tab into a token
10443 split /(?=\s)|(?<=\s)/
10446 # Split \257< ... \257> into own token
10447 map { split /(?=\257<)|(?<=\257>)/ }
10448 # Insert "\0spc" between every element
10449 # This space should never be quoted
10450 map { $spacer++ ? ("\0spc",$_) : $_ }
10451 map { $_ eq "" ? "\0empty" : $_ }
10452 @$targetref;
10454 if(not @tokens) {
10455 # @tokens is empty: Return empty array
10456 return @tokens;
10458 ::debug("replace", "Tokens ".join":",@tokens,"\n");
10459 # Make it possible to use $arg[2] in {= =}
10460 *Arg::arg = $self->{'arg_list_flat_orig'};
10461 # Flat list:
10462 # $self->{'arg_list'} = [ [Arg11, Arg12], [Arg21, Arg22], [Arg31, Arg32] ]
10463 # $self->{'arg_list_flat'} = [ Arg11, Arg12, Arg21, Arg22, Arg31, Arg32 ]
10464 if(not @{$self->{'arg_list_flat'}}) {
10465 @{$self->{'arg_list_flat'}} = Arg->new("");
10467 my $argref = $self->{'arg_list_flat'};
10468 # Number of arguments - used for positional arguments
10469 my $n = $#$argref+1;
10471 # $self is actually a CommandLine-object,
10472 # but it looks nice to be able to say {= $job->slot() =}
10473 my $job = $self;
10474 # @replaced = tokens with \257< \257> replaced
10475 my @replaced;
10476 if($self->{'context_replace'}) {
10477 my @ctxgroup;
10478 for my $t (@tokens,"\0end") {
10479 # \0end = last token was end of tokens.
10480 if($t eq "\t" or $t eq " " or $t eq "\0end" or $t eq "\0spc") {
10481 # Context group complete: Replace in it
10482 if(grep { /^\257</ } @ctxgroup) {
10483 # Context group contains a replacement string:
10484 # Copy once per arg
10485 my $space = "\0ign";
10486 for my $arg (@$argref) {
10487 my $normal_replace;
10488 # Push output
10489 # Put unquoted space before each context group
10490 # except the first
10491 CORE::push @replaced, $space, map {
10492 $a = $_;
10493 $a =~
10494 s{\257<(-?\d+)?(.*)\257>}
10496 if($1) {
10497 # Positional replace
10498 # Find the relevant arg and replace it
10499 ($argref->[$1 > 0 ? $1-1 : $n+$1] ? # If defined: replace
10500 $argref->[$1 > 0 ? $1-1 : $n+$1]->
10501 replace($2,$quote_arg,$self)
10502 : "");
10503 } else {
10504 # Normal replace
10505 $normal_replace ||= 1;
10506 ($arg ? $arg->replace($2,$quote_arg,$self) : "");
10508 }sgxe;
10510 } @ctxgroup;
10511 $normal_replace or last;
10512 $space = "\0spc";
10514 } else {
10515 # Context group has no a replacement string: Copy it once
10516 CORE::push @replaced, @ctxgroup;
10518 # New context group
10519 @ctxgroup=();
10521 if($t eq "\0spc" or $t eq " ") {
10522 CORE::push @replaced,$t;
10523 } else {
10524 CORE::push @ctxgroup,$t;
10527 } else {
10528 # @group = @token
10529 # Replace in group
10530 # Push output
10531 # repquote = no if {} first on line, no if $quote, yes otherwise
10532 for my $t (@tokens) {
10533 if($t =~ /^\257</) {
10534 my $space = "\0ign";
10535 for my $arg (@$argref) {
10536 my $normal_replace;
10537 $a = $t;
10538 $a =~
10539 s{\257<(-?\d+)?(.*)\257>}
10541 if($1) {
10542 # Positional replace
10543 # Find the relevant arg and replace it
10544 ($argref->[$1 > 0 ? $1-1 : $n+$1] ?
10545 # If defined: replace
10546 $argref->[$1 > 0 ? $1-1 : $n+$1]->
10547 replace($2,$quote_arg,$self)
10548 : "");
10549 } else {
10550 # Normal replace
10551 $normal_replace ||= 1;
10552 ($arg ? $arg->replace($2,$quote_arg,$self) : "");
10554 }sgxe;
10555 CORE::push @replaced, $space, $a;
10556 $normal_replace or last;
10557 $space = "\0spc";
10559 } else {
10560 # No replacement
10561 CORE::push @replaced, $t;
10565 *Arg::arg = [];
10566 ::debug("replace","Replaced: ".join":",@replaced,"\n");
10567 if($Global::escape_string_present) {
10568 # Command line contains \257: Unescape it \257\256 => \257
10569 # If a replacement resulted in \257\256
10570 # it will have been escaped into \\\257\\\\256
10571 # and will not be matched below
10572 for(@replaced) {
10573 s/\257\256/\257/g;
10577 # Put tokens into groups that may be quoted.
10578 my @quotegroup;
10579 my @quoted;
10580 for (map { $_ eq "\0empty" ? "" : $_ }
10581 grep { $_ ne "\0ign" and $_ ne "\0noarg" and $_ ne "'\0noarg'" }
10582 @replaced, "\0end") {
10583 if($_ eq "\0spc" or $_ eq "\0end") {
10584 # \0spc splits quotable groups
10585 if($quote) {
10586 if(@quotegroup) {
10587 CORE::push @quoted, ::Q(join"",@quotegroup);;
10589 } else {
10590 CORE::push @quoted, join"",@quotegroup;
10592 @quotegroup = ();
10593 } else {
10594 CORE::push @quotegroup, $_;
10597 ::debug("replace","Quoted: ".join":",@quoted,"\n");
10598 return wantarray ? @quoted : "@quoted";
10601 sub skip($) {
10602 # Skip this job
10603 my $self = shift;
10604 $self->{'skip'} = 1;
10608 package CommandLineQueue;
10610 sub new($) {
10611 my $class = shift;
10612 my $commandref = shift;
10613 my $read_from = shift;
10614 my $context_replace = shift || 0;
10615 my $max_number_of_args = shift;
10616 my $transfer_files = shift;
10617 my $return_files = shift;
10618 my @unget = ();
10619 my $posrpl;
10620 my ($replacecount_ref, $len_ref);
10621 my @command = @$commandref;
10622 my $seq = 1;
10623 # Replace replacement strings with {= perl expr =}
10624 # '{=' 'perlexpr' '=}' => '{= perlexpr =}'
10625 @command = merge_rpl_parts(@command);
10627 # Protect matching inside {= perl expr =}
10628 # by replacing {= and =} with \257< and \257>
10629 # in options that can contain replacement strings:
10630 # @command, --transferfile, --return,
10631 # --tagstring, --workdir, --results
10632 for(@command, @$transfer_files, @$return_files,
10633 $opt::tagstring, $opt::workdir, $opt::results, $opt::retries) {
10634 # Skip if undefined
10635 $_ or next;
10636 # Escape \257 => \257\256
10637 $Global::escape_string_present += s/\257/\257\256/g;
10638 # Needs to match rightmost left parens (Perl defaults to leftmost)
10639 # to deal with: {={==} and {={==}=}
10640 # Replace {= -> \257< and =} -> \257>
10642 # Complex way to do:
10643 # s/{=(.*)=}/\257<$1\257>/g
10644 # which would not work
10645 s[\Q$Global::parensleft\E # Match {=
10646 # Match . unless the next string is {= or =}
10647 # needed to force matching the shortest {= =}
10648 ((?:(?! \Q$Global::parensleft\E|\Q$Global::parensright\E ).)*?)
10649 \Q$Global::parensright\E ] # Match =}
10650 {\257<$1\257>}gxs;
10651 for my $rpl (sort { length $b <=> length $a } keys %Global::rpl) {
10652 # Replace long --rpl's before short ones, as a short may be a
10653 # substring of a long:
10654 # --rpl '% s/a/b/' --rpl '%% s/b/a/'
10656 # Replace the shorthand string (--rpl)
10657 # with the {= perl expr =}
10659 # Avoid searching for shorthand strings inside existing {= perl expr =}
10661 # Replace $$1 in {= perl expr =} with groupings in shorthand string
10663 # --rpl '{/(\.\S+)/(\.\S+)} s/$$1/$$2/g;'
10664 # echo {/.tar/.gz} ::: UU.tar.gz
10665 my ($prefix,$grp_regexp,$postfix) =
10666 $rpl =~ /^( [^(]* ) # Prefix - e.g. {%%
10667 ( \(.*\) )? # Group capture regexp - e.g (.*)
10668 ( [^)]* )$ # Postfix - e.g }
10669 /xs;
10670 $grp_regexp ||= '';
10671 my $rplval = $Global::rpl{$rpl};
10672 while(s{( (?: ^|\257> ) (?: [^\257]*|[\257][^<>] )*? )
10673 # Don't replace after \257 unless \257>
10674 \Q$prefix\E $grp_regexp \Q$postfix\E}
10676 # The start remains the same
10677 my $unchanged = $1;
10678 # Dummy entry to start at 1.
10679 my @grp = (1);
10680 # $2 = first ()-group in $grp_regexp
10681 # Put $2 in $grp[1], Put $3 in $grp[2]
10682 # so first ()-group in $grp_regexp is $grp[1];
10683 for(my $i = 2; defined $grp[$#grp]; $i++) {
10684 push @grp, eval '$'.$i;
10686 my $rv = $rplval;
10687 # replace $$1 with $_pAr_gRp1, $$2 with $_pAr_gRp2
10688 # in the code to be executed
10689 $rv =~ s/\$\$ (\d+)/\$_pAr_gRp$1/gx;
10690 # prepend with $_pAr_gRp1 = perlquote($1),
10691 my $set_args = "";
10692 for(my $i = 1;defined $grp[$i]; $i++) {
10693 $set_args .= "\$_pAr_gRp$i = \"" .
10694 ::perl_quote_scalar($grp[$i]) . "\";";
10696 $unchanged . "\257<" . $set_args . $rv . "\257>"
10697 }gxes) {
10699 # Do the same for the positional replacement strings
10700 $posrpl = $rpl;
10701 if($posrpl =~ s/^\{//) {
10702 # Only do this if the shorthand start with {
10703 $prefix=~s/^\{//;
10704 # Don't replace after \257 unless \257>
10705 while(s{( (?: ^|\257> ) (?: [^\257]*|[\257][^<>] )*? )
10706 \{(-?\d+) \s* \Q$prefix\E $grp_regexp \Q$postfix\E}
10708 # The start remains the same
10709 my $unchanged = $1;
10710 my $position = $2;
10711 # Dummy entry to start at 1.
10712 my @grp = (1);
10713 # $3 = first ()-group in $grp_regexp
10714 # Put $3 in $grp[1], Put $4 in $grp[2]
10715 # so first ()-group in $grp_regexp is $grp[1];
10716 for(my $i = 3; defined $grp[$#grp]; $i++) {
10717 push @grp, eval '$'.$i;
10719 my $rv = $rplval;
10720 # replace $$1 with $_pAr_gRp1, $$2 with $_pAr_gRp2
10721 # in the code to be executed
10722 $rv =~ s/\$\$ (\d+)/\$_pAr_gRp$1/gx;
10723 # prepend with $_pAr_gRp1 = perlquote($1),
10724 my $set_args = "";
10725 for(my $i = 1;defined $grp[$i]; $i++) {
10726 $set_args .= "\$_pAr_gRp$i = \"" .
10727 ::perl_quote_scalar($grp[$i]) . "\";";
10729 $unchanged . "\257<" . $position . $set_args . $rv . "\257>"
10730 }gxes) {
10736 # Add {} if no replacement strings in @command
10737 ($replacecount_ref, $len_ref, @command) =
10738 replacement_counts_and_lengths($transfer_files,$return_files,@command);
10739 if("@command" =~ /^[^ \t\n=]*\257</) {
10740 # Replacement string is (part of) the command (and not just
10741 # argument or variable definition V1={})
10742 # E.g. parallel {}, parallel my_{= s/_//=}, parallel {2}
10743 # Do no quote (Otherwise it will fail if the input contains spaces)
10744 $Global::noquote = 1;
10747 if($opt::sqlmaster and $Global::sql->append()) {
10748 $seq = $Global::sql->max_seq() + 1;
10751 return bless {
10752 'unget' => \@unget,
10753 'command' => \@command,
10754 'replacecount' => $replacecount_ref,
10755 'arg_queue' => RecordQueue->new($read_from,$opt::colsep),
10756 'context_replace' => $context_replace,
10757 'len' => $len_ref,
10758 'max_number_of_args' => $max_number_of_args,
10759 'size' => undef,
10760 'transfer_files' => $transfer_files,
10761 'return_files' => $return_files,
10762 'seq' => $seq,
10763 }, ref($class) || $class;
10766 sub merge_rpl_parts($) {
10767 # '{=' 'perlexpr' '=}' => '{= perlexpr =}'
10768 # Input:
10769 # @in = the @command as given by the user
10770 # Uses:
10771 # $Global::parensleft
10772 # $Global::parensright
10773 # Returns:
10774 # @command with parts merged to keep {= and =} as one
10775 my @in = @_;
10776 my @out;
10777 my $l = quotemeta($Global::parensleft);
10778 my $r = quotemeta($Global::parensright);
10780 while(@in) {
10781 my $s = shift @in;
10782 $_ = $s;
10783 # Remove matching (right most) parens
10784 while(s/(.*)$l.*?$r/$1/os) {}
10785 if(/$l/o) {
10786 # Missing right parens
10787 while(@in) {
10788 $s .= " ".shift @in;
10789 $_ = $s;
10790 while(s/(.*)$l.*?$r/$1/os) {}
10791 if(not /$l/o) {
10792 last;
10796 push @out, $s;
10798 return @out;
10801 sub replacement_counts_and_lengths($$@) {
10802 # Count the number of different replacement strings.
10803 # Find the lengths of context for context groups and non-context
10804 # groups.
10805 # If no {} found in @command: add it to @command
10807 # Input:
10808 # \@transfer_files = array of filenames to transfer
10809 # \@return_files = array of filenames to return
10810 # @command = command template
10811 # Output:
10812 # \%replacecount, \%len, @command
10813 my $transfer_files = shift;
10814 my $return_files = shift;
10815 my @command = @_;
10816 my (%replacecount,%len);
10817 my $sum = 0;
10818 while($sum == 0) {
10819 # Count how many times each replacement string is used
10820 my @cmd = @command;
10821 my $contextlen = 0;
10822 my $noncontextlen = 0;
10823 my $contextgroups = 0;
10824 for my $c (@cmd) {
10825 while($c =~ s/ \257<( (?: [^\257]*|[\257][^<>] )*?)\257> /\000/xs) {
10826 # %replacecount = { "perlexpr" => number of times seen }
10827 # e.g { "s/a/b/" => 2 }
10828 $replacecount{$1}++;
10829 $sum++;
10831 # Measure the length of the context around the {= perl expr =}
10832 # Use that {=...=} has been replaced with \000 above
10833 # So there is no need to deal with \257<
10834 while($c =~ s/ (\S*\000\S*) //xs) {
10835 my $w = $1;
10836 $w =~ tr/\000//d; # Remove all \000's
10837 $contextlen += length($w);
10838 $contextgroups++;
10840 # All {= perl expr =} have been removed: The rest is non-context
10841 $noncontextlen += length $c;
10843 for(@$transfer_files, @$return_files,
10844 $opt::tagstring, $opt::workdir, $opt::results, $opt::retries) {
10845 # Options that can contain replacement strings
10846 $_ or next;
10847 my $t = $_;
10848 while($t =~ s/ \257<( (?: [^\257]*|[\257][^<>] )* )\257> //xs) {
10849 # %replacecount = { "perlexpr" => number of times seen }
10850 # e.g { "$_++" => 2 }
10851 # But for tagstring we just need to mark it as seen
10852 $replacecount{$1} ||= 1;
10855 if($opt::bar) {
10856 # If the command does not contain {} force it to be computed
10857 # as it is being used by --bar
10858 $replacecount{""} ||= 1;
10861 $len{'context'} = 0+$contextlen;
10862 $len{'noncontext'} = $noncontextlen;
10863 $len{'contextgroups'} = $contextgroups;
10864 $len{'noncontextgroups'} = @cmd-$contextgroups;
10865 ::debug("length", "@command Context: ", $len{'context'},
10866 " Non: ", $len{'noncontext'}, " Ctxgrp: ", $len{'contextgroups'},
10867 " NonCtxGrp: ", $len{'noncontextgroups'}, "\n");
10868 if($sum == 0) {
10869 if(not @command) {
10870 # Default command = {}
10871 @command = ("\257<\257>");
10872 } elsif(($opt::pipe or $opt::pipepart)
10873 and not $opt::fifo and not $opt::cat) {
10874 # With --pipe / --pipe-part you can have no replacement
10875 last;
10876 } else {
10877 # Append {} to the command if there are no {...}'s and no {=...=}
10878 push @command, ("\257<\257>");
10882 return(\%replacecount,\%len,@command);
10885 sub get($) {
10886 my $self = shift;
10887 if(@{$self->{'unget'}}) {
10888 my $cmd_line = shift @{$self->{'unget'}};
10889 return ($cmd_line);
10890 } else {
10891 if($opt::sqlworker) {
10892 # Get the sequence number from the SQL table
10893 $self->set_seq($SQL::next_seq);
10894 # Get the command from the SQL table
10895 $self->{'command'} = $SQL::command_ref;
10896 my @command;
10897 # Recompute replace counts based on the read command
10898 ($self->{'replacecount'},
10899 $self->{'len'}, @command) =
10900 replacement_counts_and_lengths($self->{'transfer_files'},
10901 $self->{'return_files'},
10902 @$SQL::command_ref);
10903 if("@command" =~ /^[^ \t\n=]*\257</) {
10904 # Replacement string is (part of) the command (and not just
10905 # argument or variable definition V1={})
10906 # E.g. parallel {}, parallel my_{= s/_//=}, parallel {2}
10907 # Do no quote (Otherwise it will fail if the input contains spaces)
10908 $Global::noquote = 1;
10912 my $cmd_line = CommandLine->new($self->seq(),
10913 $self->{'command'},
10914 $self->{'arg_queue'},
10915 $self->{'context_replace'},
10916 $self->{'max_number_of_args'},
10917 $self->{'transfer_files'},
10918 $self->{'return_files'},
10919 $self->{'replacecount'},
10920 $self->{'len'},
10922 $cmd_line->populate();
10923 ::debug("run","cmd_line->number_of_args ",
10924 $cmd_line->number_of_args(), "\n");
10925 if(not $Global::no_more_input and ($opt::pipe or $opt::pipepart)) {
10926 if($cmd_line->replaced() eq "") {
10927 # Empty command - pipe requires a command
10928 ::error("--pipe/--pipepart must have a command to pipe into ".
10929 "(e.g. 'cat').");
10930 ::wait_and_exit(255);
10932 } elsif($cmd_line->number_of_args() == 0) {
10933 # We did not get more args - maybe at EOF string?
10934 return undef;
10936 $self->set_seq($self->seq()+1);
10937 return $cmd_line;
10941 sub unget($) {
10942 my $self = shift;
10943 unshift @{$self->{'unget'}}, @_;
10946 sub empty($) {
10947 my $self = shift;
10948 my $empty = (not @{$self->{'unget'}}) &&
10949 $self->{'arg_queue'}->empty();
10950 ::debug("run", "CommandLineQueue->empty $empty");
10951 return $empty;
10954 sub seq($) {
10955 my $self = shift;
10956 return $self->{'seq'};
10959 sub set_seq($$) {
10960 my $self = shift;
10961 $self->{'seq'} = shift;
10964 sub quote_args($) {
10965 my $self = shift;
10966 # If there is not command emulate |bash
10967 return $self->{'command'};
10971 package Limits::Command;
10973 # Maximal command line length (for -m and -X)
10974 sub max_length($) {
10975 # Find the max_length of a command line and cache it
10976 # Returns:
10977 # number of chars on the longest command line allowed
10978 if(not $Limits::Command::line_max_len) {
10979 # Disk cache of max command line length
10980 my $len_cache = $Global::cache_dir . "/tmp/sshlogin/" . ::hostname() .
10981 "/linelen";
10982 my $cached_limit;
10983 if(-e $len_cache) {
10984 open(my $fh, "<", $len_cache) || ::die_bug("Cannot read $len_cache");
10985 $cached_limit = <$fh>;
10986 close $fh;
10987 } else {
10988 $cached_limit = real_max_length();
10989 # If $HOME is write protected: Do not fail
10990 my $dir = ::dirname($len_cache);
10991 -d $dir or eval { File::Path::mkpath($dir); };
10992 open(my $fh, ">", $len_cache);
10993 print $fh $cached_limit;
10994 close $fh;
10996 $Limits::Command::line_max_len = tmux_length($cached_limit);
10997 if($opt::max_chars) {
10998 if($opt::max_chars <= $cached_limit) {
10999 $Limits::Command::line_max_len = $opt::max_chars;
11000 } else {
11001 ::warning("Value for -s option should be < $cached_limit.");
11005 return int($Limits::Command::line_max_len);
11008 sub real_max_length($) {
11009 # Find the max_length of a command line
11010 # Returns:
11011 # The maximal command line length
11012 # Use an upper bound of 100 MB if the shell allows for infinite long lengths
11013 my $upper = 100_000_000;
11014 # 1000 is supported everywhere, so the search can start anywhere 1..999
11015 # 324 makes the search much faster on CygWin, so let us use that
11016 my $len = 324;
11017 do {
11018 if($len > $upper) { return $len };
11019 $len *= 16;
11020 } while (is_acceptable_command_line_length($len));
11021 # Then search for the actual max length between 0 and upper bound
11022 return binary_find_max_length(int($len/16),$len);
11025 # Prototype forwarding
11026 sub binary_find_max_length($$);
11027 sub binary_find_max_length($$) {
11028 # Given a lower and upper bound find the max_length of a command line
11029 # Returns:
11030 # number of chars on the longest command line allowed
11031 my ($lower, $upper) = (@_);
11032 if($lower == $upper or $lower == $upper-1) { return $lower; }
11033 my $middle = int (($upper-$lower)/2 + $lower);
11034 ::debug("init", "Maxlen: $lower<$middle<$upper: ");
11035 if (is_acceptable_command_line_length($middle)) {
11036 return binary_find_max_length($middle,$upper);
11037 } else {
11038 return binary_find_max_length($lower,$middle);
11042 sub is_acceptable_command_line_length($) {
11043 # Test if a command line of this length can run
11044 # in the current environment
11045 # Returns:
11046 # 0 if the command line length is too long
11047 # 1 otherwise
11048 my $len = shift;
11049 if($Global::parallel_env) {
11050 $len += length $Global::parallel_env;
11052 ::qqx("true "."x"x$len);
11053 ::debug("init", "$len=$? ");
11054 return not $?;
11057 sub tmux_length($) {
11058 # If $opt::tmux set, find the limit for tmux
11059 # tmux 1.8 has a 2kB limit
11060 # tmux 1.9 has a 16kB limit
11061 # tmux 2.0 has a 16kB limit
11062 # tmux 2.1 has a 16kB limit
11063 # tmux 2.2 has a 16kB limit
11064 # Input:
11065 # $len = maximal command line length
11066 # Returns:
11067 # $tmux_len = maximal length runable in tmux
11068 local $/ = "\n";
11069 my $len = shift;
11070 if($opt::tmux) {
11071 $ENV{'PARALLEL_TMUX'} ||= "tmux";
11072 if(not ::which($ENV{'PARALLEL_TMUX'})) {
11073 ::error($ENV{'PARALLEL_TMUX'}." not found in \$PATH.");
11074 ::wait_and_exit(255);
11076 my @out;
11077 for my $l (1, 2020, 16320, 100000, $len) {
11078 my $tmpfile = ::tmpname("tms");
11079 my $tmuxcmd = $ENV{'PARALLEL_TMUX'}.
11080 " -S $tmpfile new-session -d -n echo $l".
11081 ("x"x$l). " && echo $l; rm -f $tmpfile";
11082 push @out, ::qqx($tmuxcmd);
11083 ::rm($tmpfile);
11085 ::debug("tmux","tmux-out ",@out);
11086 chomp @out;
11087 # The arguments is given 3 times on the command line
11088 # and the wrapping is around 30 chars
11089 # (29 for tmux1.9, 33 for tmux1.8)
11090 my $tmux_len = ::max(@out);
11091 $len = ::min($len,int($tmux_len/4-33));
11092 ::debug("tmux","tmux-length ",$len);
11094 return $len;
11098 package RecordQueue;
11100 sub new($) {
11101 my $class = shift;
11102 my $fhs = shift;
11103 my $colsep = shift;
11104 my @unget = ();
11105 my $arg_sub_queue;
11106 if($opt::sqlworker) {
11107 # Open SQL table
11108 $arg_sub_queue = SQLRecordQueue->new();
11109 } elsif(defined $colsep) {
11110 # Open one file with colsep or CSV
11111 $arg_sub_queue = RecordColQueue->new($fhs);
11112 } else {
11113 # Open one or more files if multiple -a
11114 $arg_sub_queue = MultifileQueue->new($fhs);
11116 return bless {
11117 'unget' => \@unget,
11118 'arg_number' => 0,
11119 'arg_sub_queue' => $arg_sub_queue,
11120 }, ref($class) || $class;
11123 sub get($) {
11124 # Returns:
11125 # reference to array of Arg-objects
11126 my $self = shift;
11127 if(@{$self->{'unget'}}) {
11128 $self->{'arg_number'}++;
11129 # Flush cached computed replacements in Arg-objects
11130 # To fix: parallel --bar echo {%} ::: a b c ::: d e f
11131 my $ret = shift @{$self->{'unget'}};
11132 if($ret) {
11133 map { $_->flush_cache() } @$ret;
11135 return $ret;
11137 my $ret = $self->{'arg_sub_queue'}->get();
11138 if($ret) {
11139 if(grep { index($_->orig(),"\0") > 0 } @$ret) {
11140 # Allow for \0 in position 0 because GNU Parallel uses "\0noarg"
11141 # to mean no-string
11142 ::warning("A NUL character in the input was replaced with \\0.",
11143 "NUL cannot be passed through in the argument list.",
11144 "Did you mean to use the --null option?");
11145 for(grep { index($_->orig(),"\0") > 0 } @$ret) {
11146 # Replace \0 with \\0
11147 my $a = $_->orig();
11148 $a =~ s/\0/\\0/g;
11149 $_->set_orig($a);
11152 if(defined $Global::max_number_of_args
11153 and $Global::max_number_of_args == 0) {
11154 ::debug("run", "Read 1 but return 0 args\n");
11155 # \0noarg => nothing (not the empty string)
11156 map { $_->set_orig("\0noarg"); } @$ret;
11158 # Flush cached computed replacements in Arg-objects
11159 # To fix: parallel --bar echo {%} ::: a b c ::: d e f
11160 map { $_->flush_cache() } @$ret;
11162 return $ret;
11165 sub unget($) {
11166 my $self = shift;
11167 ::debug("run", "RecordQueue-unget\n");
11168 $self->{'arg_number'} -= @_;
11169 unshift @{$self->{'unget'}}, @_;
11172 sub empty($) {
11173 my $self = shift;
11174 my $empty = (not @{$self->{'unget'}}) &&
11175 $self->{'arg_sub_queue'}->empty();
11176 ::debug("run", "RecordQueue->empty $empty");
11177 return $empty;
11180 sub arg_number($) {
11181 my $self = shift;
11182 return $self->{'arg_number'};
11186 package RecordColQueue;
11188 sub new($) {
11189 my $class = shift;
11190 my $fhs = shift;
11191 my @unget = ();
11192 my $arg_sub_queue = MultifileQueue->new($fhs);
11193 return bless {
11194 'unget' => \@unget,
11195 'arg_sub_queue' => $arg_sub_queue,
11196 }, ref($class) || $class;
11199 sub get($) {
11200 # Returns:
11201 # reference to array of Arg-objects
11202 my $self = shift;
11203 if(@{$self->{'unget'}}) {
11204 return shift @{$self->{'unget'}};
11206 my $unget_ref = $self->{'unget'};
11207 if($self->{'arg_sub_queue'}->empty()) {
11208 return undef;
11210 my $in_record = $self->{'arg_sub_queue'}->get();
11211 if(defined $in_record) {
11212 my @out_record = ();
11213 for my $arg (@$in_record) {
11214 ::debug("run", "RecordColQueue::arg $arg\n");
11215 my $line = $arg->orig();
11216 ::debug("run", "line='$line'\n");
11217 if($line ne "") {
11218 if($opt::csv) {
11219 # Parse CSV
11220 chomp $line;
11221 if(not $Global::csv->parse($line)) {
11222 die "CSV has unexpected format: ^$line^";
11224 for($Global::csv->fields()) {
11225 push @out_record, Arg->new($_);
11227 } else {
11228 for my $s (split /$opt::colsep/o, $line, -1) {
11229 push @out_record, Arg->new($s);
11232 } else {
11233 push @out_record, Arg->new("");
11236 return \@out_record;
11237 } else {
11238 return undef;
11242 sub unget($) {
11243 my $self = shift;
11244 ::debug("run", "RecordColQueue-unget '@_'\n");
11245 unshift @{$self->{'unget'}}, @_;
11248 sub empty($) {
11249 my $self = shift;
11250 my $empty = (not @{$self->{'unget'}}) &&
11251 $self->{'arg_sub_queue'}->empty();
11252 ::debug("run", "RecordColQueue->empty $empty");
11253 return $empty;
11257 package SQLRecordQueue;
11259 sub new($) {
11260 my $class = shift;
11261 my @unget = ();
11262 return bless {
11263 'unget' => \@unget,
11264 }, ref($class) || $class;
11267 sub get($) {
11268 # Returns:
11269 # reference to array of Arg-objects
11270 my $self = shift;
11271 if(@{$self->{'unget'}}) {
11272 return shift @{$self->{'unget'}};
11274 return $Global::sql->get_record();
11277 sub unget($) {
11278 my $self = shift;
11279 ::debug("run", "SQLRecordQueue-unget '@_'\n");
11280 unshift @{$self->{'unget'}}, @_;
11283 sub empty($) {
11284 my $self = shift;
11285 if(@{$self->{'unget'}}) { return 0; }
11286 my $get = $self->get();
11287 if(defined $get) {
11288 $self->unget($get);
11290 my $empty = not $get;
11291 ::debug("run", "SQLRecordQueue->empty $empty");
11292 return $empty;
11296 package MultifileQueue;
11298 @Global::unget_argv=();
11300 sub new($$) {
11301 my $class = shift;
11302 my $fhs = shift;
11303 for my $fh (@$fhs) {
11304 if(-t $fh and -t ($Global::status_fd || *STDERR)) {
11305 ::warning("Input is read from the terminal. You are either an expert",
11306 "(in which case: YOU ARE AWESOME!) or maybe you forgot",
11307 "::: or :::: or -a or to pipe data into parallel. If so",
11308 "consider going through the tutorial: man parallel_tutorial",
11309 "Press CTRL-D to exit.");
11312 return bless {
11313 'unget' => \@Global::unget_argv,
11314 'fhs' => $fhs,
11315 'arg_matrix' => undef,
11316 }, ref($class) || $class;
11319 sub get($) {
11320 my $self = shift;
11321 if($opt::link) {
11322 return $self->link_get();
11323 } else {
11324 return $self->nest_get();
11328 sub unget($) {
11329 my $self = shift;
11330 ::debug("run", "MultifileQueue-unget '@_'\n");
11331 unshift @{$self->{'unget'}}, @_;
11334 sub empty($) {
11335 my $self = shift;
11336 my $empty = (not @Global::unget_argv) &&
11337 not @{$self->{'unget'}};
11338 for my $fh (@{$self->{'fhs'}}) {
11339 $empty &&= eof($fh);
11341 ::debug("run", "MultifileQueue->empty $empty ");
11342 return $empty;
11345 sub link_get($) {
11346 my $self = shift;
11347 if(@{$self->{'unget'}}) {
11348 return shift @{$self->{'unget'}};
11350 my @record = ();
11351 my $prepend;
11352 my $empty = 1;
11353 for my $fh (@{$self->{'fhs'}}) {
11354 my $arg = read_arg_from_fh($fh);
11355 if(defined $arg) {
11356 # Record $arg for recycling at end of file
11357 push @{$self->{'arg_matrix'}{$fh}}, $arg;
11358 push @record, $arg;
11359 $empty = 0;
11360 } else {
11361 ::debug("run", "EOA ");
11362 # End of file: Recycle arguments
11363 push @{$self->{'arg_matrix'}{$fh}}, shift @{$self->{'arg_matrix'}{$fh}};
11364 # return last @{$args->{'args'}{$fh}};
11365 push @record, @{$self->{'arg_matrix'}{$fh}}[-1];
11368 if($empty) {
11369 return undef;
11370 } else {
11371 return \@record;
11375 sub nest_get($) {
11376 my $self = shift;
11377 if(@{$self->{'unget'}}) {
11378 return shift @{$self->{'unget'}};
11380 my @record = ();
11381 my $prepend;
11382 my $empty = 1;
11383 my $no_of_inputsources = $#{$self->{'fhs'}} + 1;
11384 if(not $self->{'arg_matrix'}) {
11385 # Initialize @arg_matrix with one arg from each file
11386 # read one line from each file
11387 my @first_arg_set;
11388 my $all_empty = 1;
11389 for (my $fhno = 0; $fhno < $no_of_inputsources ; $fhno++) {
11390 my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]);
11391 if(defined $arg) {
11392 $all_empty = 0;
11394 $self->{'arg_matrix'}[$fhno][0] = $arg || Arg->new("");
11395 push @first_arg_set, $self->{'arg_matrix'}[$fhno][0];
11397 if($all_empty) {
11398 # All filehandles were at eof or eof-string
11399 return undef;
11401 return [@first_arg_set];
11404 # Treat the case with one input source special. For multiple
11405 # input sources we need to remember all previously read values to
11406 # generate all combinations. But for one input source we can
11407 # forget the value after first use.
11408 if($no_of_inputsources == 1) {
11409 my $arg = read_arg_from_fh($self->{'fhs'}[0]);
11410 if(defined($arg)) {
11411 return [$arg];
11413 return undef;
11415 for (my $fhno = $no_of_inputsources - 1; $fhno >= 0; $fhno--) {
11416 if(eof($self->{'fhs'}[$fhno])) {
11417 next;
11418 } else {
11419 # read one
11420 my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]);
11421 defined($arg) || next; # If we just read an EOF string: Treat this as EOF
11422 my $len = $#{$self->{'arg_matrix'}[$fhno]} + 1;
11423 $self->{'arg_matrix'}[$fhno][$len] = $arg;
11424 # make all new combinations
11425 my @combarg = ();
11426 for (my $fhn = 0; $fhn < $no_of_inputsources; $fhn++) {
11427 push(@combarg, [0, $#{$self->{'arg_matrix'}[$fhn]}],
11428 # Is input source --link'ed to the next?
11429 $opt::linkinputsource[$fhn+1]);
11431 # Find only combinations with this new entry
11432 $combarg[2*$fhno] = [$len,$len];
11433 # map combinations
11434 # [ 1, 3, 7 ], [ 2, 4, 1 ]
11435 # =>
11436 # [ m[0][1], m[1][3], m[2][7] ], [ m[0][2], m[1][4], m[2][1] ]
11437 my @mapped;
11438 for my $c (expand_combinations(@combarg)) {
11439 my @a;
11440 for my $n (0 .. $no_of_inputsources - 1 ) {
11441 push @a, $self->{'arg_matrix'}[$n][$$c[$n]];
11443 push @mapped, \@a;
11445 # append the mapped to the ungotten arguments
11446 push @{$self->{'unget'}}, @mapped;
11447 # get the first
11448 if(@mapped) {
11449 return shift @{$self->{'unget'}};
11453 # all are eof or at EOF string; return from the unget queue
11454 return shift @{$self->{'unget'}};
11457 sub read_arg_from_fh($) {
11458 # Read one Arg from filehandle
11459 # Returns:
11460 # Arg-object with one read line
11461 # undef if end of file
11462 my $fh = shift;
11463 my $prepend;
11464 my $arg;
11465 my $half_record = 0;
11466 do {{
11467 # This makes 10% faster
11468 if(not defined ($arg = <$fh>)) {
11469 if(defined $prepend) {
11470 return Arg->new($prepend);
11471 } else {
11472 return undef;
11475 if($opt::csv) {
11476 # We need to read a full CSV line.
11477 if(($arg =~ y/"/"/) % 2 ) {
11478 # The number of " on the line is uneven:
11479 # If we were in a half_record => we have a full record now
11480 # If we were ouside a half_record => we are in a half record now
11481 $half_record = not $half_record;
11483 if($half_record) {
11484 # CSV half-record with quoting:
11485 # col1,"col2 2""x3"" board newline <-this one
11486 # cont",col3
11487 $prepend .= $arg;
11488 redo;
11489 } else {
11490 # Now we have a full CSV record
11493 # Remove delimiter
11494 chomp $arg;
11495 if($Global::end_of_file_string and
11496 $arg eq $Global::end_of_file_string) {
11497 # Ignore the rest of input file
11498 close $fh;
11499 ::debug("run", "EOF-string ($arg) met\n");
11500 if(defined $prepend) {
11501 return Arg->new($prepend);
11502 } else {
11503 return undef;
11506 if(defined $prepend) {
11507 $arg = $prepend.$arg; # For line continuation
11508 undef $prepend;
11510 if($Global::ignore_empty) {
11511 if($arg =~ /^\s*$/) {
11512 redo; # Try the next line
11515 if($Global::max_lines) {
11516 if($arg =~ /\s$/) {
11517 # Trailing space => continued on next line
11518 $prepend = $arg;
11519 redo;
11522 }} while (1 == 0); # Dummy loop {{}} for redo
11523 if(defined $arg) {
11524 return Arg->new($arg);
11525 } else {
11526 ::die_bug("multiread arg undefined");
11530 # Prototype forwarding
11531 sub expand_combinations(@);
11532 sub expand_combinations(@) {
11533 # Input:
11534 # ([xmin,xmax], [ymin,ymax], ...)
11535 # Returns: ([x,y,...],[x,y,...])
11536 # where xmin <= x <= xmax and ymin <= y <= ymax
11537 my $minmax_ref = shift;
11538 my $link = shift; # This is linked to the next input source
11539 my $xmin = $$minmax_ref[0];
11540 my $xmax = $$minmax_ref[1];
11541 my @p;
11542 if(@_) {
11543 my @rest = expand_combinations(@_);
11544 if($link) {
11545 # Linked to next col with --link/:::+/::::+
11546 # TODO BUG does not wrap values if not same number of vals
11547 push(@p, map { [$$_[0], @$_] }
11548 grep { $xmin <= $$_[0] and $$_[0] <= $xmax } @rest);
11549 } else {
11550 # If there are more columns: Compute those recursively
11551 for(my $x = $xmin; $x <= $xmax; $x++) {
11552 push @p, map { [$x, @$_] } @rest;
11555 } else {
11556 for(my $x = $xmin; $x <= $xmax; $x++) {
11557 push @p, [$x];
11560 return @p;
11564 package Arg;
11566 sub new($) {
11567 my $class = shift;
11568 my $orig = shift;
11569 my @hostgroups;
11570 if($opt::hostgroups) {
11571 if($orig =~ s:@(.+)::) {
11572 # We found hostgroups on the arg
11573 @hostgroups = split(/\+/, $1);
11574 if(not grep { defined $Global::hostgroups{$_} } @hostgroups) {
11575 # This hostgroup is not defined using -S
11576 # Add it
11577 ::warning("Adding hostgroups: @hostgroups");
11578 # Add sshlogin
11579 for(grep { not defined $Global::hostgroups{$_} } @hostgroups) {
11580 my $sshlogin = SSHLogin->new($_);
11581 my $sshlogin_string = $sshlogin->string();
11582 $Global::host{$sshlogin_string} = $sshlogin;
11583 $Global::hostgroups{$sshlogin_string} = 1;
11586 } else {
11587 # No hostgroup on the arg => any hostgroup
11588 @hostgroups = (keys %Global::hostgroups);
11591 return bless {
11592 'orig' => $orig,
11593 'hostgroups' => \@hostgroups,
11594 }, ref($class) || $class;
11597 sub Q($) {
11598 # Q alias for ::shell_quote_scalar
11599 my $ret = ::Q($_[0]);
11600 no warnings 'redefine';
11601 *Q = \&::Q;
11602 return $ret;
11605 sub pQ($) {
11606 # pQ alias for ::perl_quote_scalar
11607 my $ret = ::pQ($_[0]);
11608 no warnings 'redefine';
11609 *pQ = \&::pQ;
11610 return $ret;
11613 sub total_jobs() {
11614 return $Global::JobQueue->total_jobs();
11618 my %perleval;
11619 my $job;
11620 sub skip() {
11621 # shorthand for $job->skip();
11622 $job->skip();
11624 sub slot() {
11625 # shorthand for $job->slot();
11626 $job->slot();
11628 sub seq() {
11629 # shorthand for $job->seq();
11630 $job->seq();
11633 sub replace($$$$) {
11634 # Calculates the corresponding value for a given perl expression
11635 # Returns:
11636 # The calculated string (quoted if asked for)
11637 my $self = shift;
11638 my $perlexpr = shift; # E.g. $_=$_ or s/.gz//
11639 my $quote = (shift) ? 1 : 0; # should the string be quoted?
11640 # This is actually a CommandLine-object,
11641 # but it looks nice to be able to say {= $job->slot() =}
11642 $job = shift;
11643 $perlexpr =~ s/^(-?\d+)? *//; # Positional replace treated as normal replace
11644 if(not $self->{'cache'}{$perlexpr}) {
11645 # Only compute the value once
11646 # Use $_ as the variable to change
11647 local $_;
11648 if($Global::trim eq "n") {
11649 $_ = $self->{'orig'};
11650 } else {
11651 # Trim the input
11652 $_ = trim_of($self->{'orig'});
11654 ::debug("replace", "eval ", $perlexpr, " ", $_, "\n");
11655 if(not $perleval{$perlexpr}) {
11656 # Make an anonymous function of the $perlexpr
11657 # And more importantly: Compile it only once
11658 if($perleval{$perlexpr} =
11659 eval('sub { no strict; no warnings; my $job = shift; '.
11660 $perlexpr.' }')) {
11661 # All is good
11662 } else {
11663 # The eval failed. Maybe $perlexpr is invalid perl?
11664 ::error("Cannot use $perlexpr: $@");
11665 ::wait_and_exit(255);
11668 # Execute the function
11669 $perleval{$perlexpr}->($job);
11670 $self->{'cache'}{$perlexpr} = $_;
11672 # Return the value quoted if needed
11673 return($quote ? Q($self->{'cache'}{$perlexpr})
11674 : $self->{'cache'}{$perlexpr});
11678 sub flush_cache($) {
11679 # Flush cache of computed values
11680 my $self = shift;
11681 $self->{'cache'} = undef;
11684 sub orig($) {
11685 my $self = shift;
11686 return $self->{'orig'};
11689 sub set_orig($$) {
11690 my $self = shift;
11691 $self->{'orig'} = shift;
11694 sub trim_of($) {
11695 # Removes white space as specifed by --trim:
11696 # n = nothing
11697 # l = start
11698 # r = end
11699 # lr|rl = both
11700 # Returns:
11701 # string with white space removed as needed
11702 my @strings = map { defined $_ ? $_ : "" } (@_);
11703 my $arg;
11704 if($Global::trim eq "n") {
11705 # skip
11706 } elsif($Global::trim eq "l") {
11707 for my $arg (@strings) { $arg =~ s/^\s+//; }
11708 } elsif($Global::trim eq "r") {
11709 for my $arg (@strings) { $arg =~ s/\s+$//; }
11710 } elsif($Global::trim eq "rl" or $Global::trim eq "lr") {
11711 for my $arg (@strings) { $arg =~ s/^\s+//; $arg =~ s/\s+$//; }
11712 } else {
11713 ::error("--trim must be one of: r l rl lr.");
11714 ::wait_and_exit(255);
11716 return wantarray ? @strings : "@strings";
11720 package TimeoutQueue;
11722 sub new($) {
11723 my $class = shift;
11724 my $delta_time = shift;
11725 my ($pct);
11726 if($delta_time =~ /(\d+(\.\d+)?)%/) {
11727 # Timeout in percent
11728 $pct = $1/100;
11729 $delta_time = 1_000_000;
11731 $delta_time = ::multiply_time_units($delta_time);
11733 return bless {
11734 'queue' => [],
11735 'delta_time' => $delta_time,
11736 'pct' => $pct,
11737 'remedian_idx' => 0,
11738 'remedian_arr' => [],
11739 'remedian' => undef,
11740 }, ref($class) || $class;
11743 sub delta_time($) {
11744 my $self = shift;
11745 return $self->{'delta_time'};
11748 sub set_delta_time($$) {
11749 my $self = shift;
11750 $self->{'delta_time'} = shift;
11753 sub remedian($) {
11754 my $self = shift;
11755 return $self->{'remedian'};
11758 sub set_remedian($$) {
11759 # Set median of the last 999^3 (=997002999) values using Remedian
11761 # Rousseeuw, Peter J., and Gilbert W. Bassett Jr. "The remedian: A
11762 # robust averaging method for large data sets." Journal of the
11763 # American Statistical Association 85.409 (1990): 97-104.
11764 my $self = shift;
11765 my $val = shift;
11766 my $i = $self->{'remedian_idx'}++;
11767 my $rref = $self->{'remedian_arr'};
11768 $rref->[0][$i%999] = $val;
11769 $rref->[1][$i/999%999] = (sort @{$rref->[0]})[$#{$rref->[0]}/2];
11770 $rref->[2][$i/999/999%999] = (sort @{$rref->[1]})[$#{$rref->[1]}/2];
11771 $self->{'remedian'} = (sort @{$rref->[2]})[$#{$rref->[2]}/2];
11774 sub update_median_runtime($) {
11775 # Update delta_time based on runtime of finished job if timeout is
11776 # a percentage
11777 my $self = shift;
11778 my $runtime = shift;
11779 if($self->{'pct'}) {
11780 $self->set_remedian($runtime);
11781 $self->{'delta_time'} = $self->{'pct'} * $self->remedian();
11782 ::debug("run", "Timeout: $self->{'delta_time'}s ");
11786 sub process_timeouts($) {
11787 # Check if there was a timeout
11788 my $self = shift;
11789 # $self->{'queue'} is sorted by start time
11790 while (@{$self->{'queue'}}) {
11791 my $job = $self->{'queue'}[0];
11792 if($job->endtime()) {
11793 # Job already finished. No need to timeout the job
11794 # This could be because of --keep-order
11795 shift @{$self->{'queue'}};
11796 } elsif($job->is_timedout($self->{'delta_time'})) {
11797 # Need to shift off queue before kill
11798 # because kill calls usleep that calls process_timeouts
11799 shift @{$self->{'queue'}};
11800 ::warning("This job was killed because it timed out:",
11801 $job->replaced());
11802 $job->kill();
11803 } else {
11804 # Because they are sorted by start time the rest are later
11805 last;
11810 sub insert($) {
11811 my $self = shift;
11812 my $in = shift;
11813 push @{$self->{'queue'}}, $in;
11817 package SQL;
11819 sub new($) {
11820 my $class = shift;
11821 my $dburl = shift;
11822 $Global::use{"DBI"} ||= eval "use DBI; 1;";
11823 # +DBURL = append to this DBURL
11824 my $append = $dburl=~s/^\+//;
11825 my %options = parse_dburl(get_alias($dburl));
11826 my %driveralias = ("sqlite" => "SQLite",
11827 "sqlite3" => "SQLite",
11828 "pg" => "Pg",
11829 "postgres" => "Pg",
11830 "postgresql" => "Pg",
11831 "csv" => "CSV",
11832 "oracle" => "Oracle",
11833 "ora" => "Oracle");
11834 my $driver = $driveralias{$options{'databasedriver'}} ||
11835 $options{'databasedriver'};
11836 my $database = $options{'database'};
11837 my $host = $options{'host'} ? ";host=".$options{'host'} : "";
11838 my $port = $options{'port'} ? ";port=".$options{'port'} : "";
11839 my $dsn = "DBI:$driver:dbname=$database$host$port";
11840 my $userid = $options{'user'};
11841 my $password = $options{'password'};;
11842 if(not grep /$driver/, DBI->available_drivers) {
11843 ::error("$driver not supported. Are you missing a perl DBD::$driver module?");
11844 ::wait_and_exit(255);
11846 my $dbh = DBI->connect($dsn, $userid, $password,
11847 { RaiseError => 1, AutoInactiveDestroy => 1 })
11848 or die $DBI::errstr;
11850 $dbh->{'PrintWarn'} = $Global::debug || 0;
11851 $dbh->{'PrintError'} = $Global::debug || 0;
11852 $dbh->{'RaiseError'} = 1;
11853 $dbh->{'ShowErrorStatement'} = 1;
11854 $dbh->{'HandleError'} = sub {};
11856 if(not defined $options{'table'}) {
11857 ::error("The DBURL ($dburl) must contain a table.");
11858 ::wait_and_exit(255);
11861 return bless {
11862 'dbh' => $dbh,
11863 'driver' => $driver,
11864 'max_number_of_args' => undef,
11865 'table' => $options{'table'},
11866 'append' => $append,
11867 }, ref($class) || $class;
11870 # Prototype forwarding
11871 sub get_alias($);
11872 sub get_alias($) {
11873 my $alias = shift;
11874 $alias =~ s/^(sql:)*//; # Accept aliases prepended with sql:
11875 if ($alias !~ /^:/) {
11876 return $alias;
11879 # Find the alias
11880 my $path;
11881 if (-l $0) {
11882 ($path) = readlink($0) =~ m|^(.*)/|;
11883 } else {
11884 ($path) = $0 =~ m|^(.*)/|;
11887 my @deprecated = ("$ENV{HOME}/.dburl.aliases",
11888 "$path/dburl.aliases", "$path/dburl.aliases.dist");
11889 for (@deprecated) {
11890 if(-r $_) {
11891 ::warning("$_ is deprecated. ".
11892 "Use .sql/aliases instead (read man sql).");
11895 my @urlalias=();
11896 check_permissions("$ENV{HOME}/.sql/aliases");
11897 check_permissions("$ENV{HOME}/.dburl.aliases");
11898 my @search = ("$ENV{HOME}/.sql/aliases",
11899 "$ENV{HOME}/.dburl.aliases", "/etc/sql/aliases",
11900 "$path/dburl.aliases", "$path/dburl.aliases.dist");
11901 for my $alias_file (@search) {
11902 # local $/ needed if -0 set
11903 local $/ = "\n";
11904 if(-r $alias_file) {
11905 open(my $in, "<", $alias_file) || die;
11906 push @urlalias, <$in>;
11907 close $in;
11910 my ($alias_part,$rest) = $alias=~/(:\w*)(.*)/;
11911 # If we saw this before: we have an alias loop
11912 if(grep {$_ eq $alias_part } @Private::seen_aliases) {
11913 ::error("$alias_part is a cyclic alias.");
11914 exit -1;
11915 } else {
11916 push @Private::seen_aliases, $alias_part;
11919 my $dburl;
11920 for (@urlalias) {
11921 /^$alias_part\s+(\S+.*)/ and do { $dburl = $1; last; }
11924 if($dburl) {
11925 return get_alias($dburl.$rest);
11926 } else {
11927 ::error("$alias is not defined in @search");
11928 exit(-1);
11932 sub check_permissions($) {
11933 my $file = shift;
11935 if(-e $file) {
11936 if(not -o $file) {
11937 my $username = (getpwuid($<))[0];
11938 ::warning("$file should be owned by $username: ".
11939 "chown $username $file");
11941 my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
11942 $atime,$mtime,$ctime,$blksize,$blocks) = stat($file);
11943 if($mode & 077) {
11944 my $username = (getpwuid($<))[0];
11945 ::warning("$file should be only be readable by $username: ".
11946 "chmod 600 $file");
11951 sub parse_dburl($) {
11952 my $url = shift;
11953 my %options = ();
11954 # sql:mysql://[[user][:password]@][host][:port]/[database[/table][?query]]
11956 if($url=~m!^(?:sql:)? # You can prefix with 'sql:'
11957 ((?:oracle|ora|mysql|pg|postgres|postgresql)(?:s|ssl|)|
11958 (?:sqlite|sqlite2|sqlite3|csv)):// # Databasedriver ($1)
11960 ([^:@/][^:@]*|) # Username ($2)
11962 :([^@]*) # Password ($3)
11965 ([^:/]*)? # Hostname ($4)
11968 ([^/]*)? # Port ($5)
11972 ([^/?]*)? # Database ($6)
11976 ([^?]*)? # Table ($7)
11980 (.*)? # Query ($8)
11982 $!ix) {
11983 $options{databasedriver} = ::undef_if_empty(lc(uri_unescape($1)));
11984 $options{user} = ::undef_if_empty(uri_unescape($2));
11985 $options{password} = ::undef_if_empty(uri_unescape($3));
11986 $options{host} = ::undef_if_empty(uri_unescape($4));
11987 $options{port} = ::undef_if_empty(uri_unescape($5));
11988 $options{database} = ::undef_if_empty(uri_unescape($6));
11989 $options{table} = ::undef_if_empty(uri_unescape($7));
11990 $options{query} = ::undef_if_empty(uri_unescape($8));
11991 ::debug("sql", "dburl $url\n");
11992 ::debug("sql", "databasedriver ", $options{databasedriver},
11993 " user ", $options{user},
11994 " password ", $options{password}, " host ", $options{host},
11995 " port ", $options{port}, " database ", $options{database},
11996 " table ", $options{table}, " query ", $options{query}, "\n");
11997 } else {
11998 ::error("$url is not a valid DBURL");
11999 exit 255;
12001 return %options;
12004 sub uri_unescape($) {
12005 # Copied from http://cpansearch.perl.org/src/GAAS/URI-1.55/URI/Escape.pm
12006 # to avoid depending on URI::Escape
12007 # This section is (C) Gisle Aas.
12008 # Note from RFC1630: "Sequences which start with a percent sign
12009 # but are not followed by two hexadecimal characters are reserved
12010 # for future extension"
12011 my $str = shift;
12012 if (@_ && wantarray) {
12013 # not executed for the common case of a single argument
12014 my @str = ($str, @_); # need to copy
12015 foreach (@str) {
12016 s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
12018 return @str;
12020 $str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str;
12021 $str;
12024 sub run($) {
12025 my $self = shift;
12026 my $stmt = shift;
12027 if($self->{'driver'} eq "CSV") {
12028 $stmt=~ s/;$//;
12029 if($stmt eq "BEGIN" or
12030 $stmt eq "COMMIT") {
12031 return undef;
12034 my @retval;
12035 my $dbh = $self->{'dbh'};
12036 ::debug("sql","$opt::sqlmaster$opt::sqlworker run $stmt\n");
12037 # Execute with the rest of the args - if any
12038 my $rv;
12039 my $sth;
12040 my $lockretry = 0;
12041 while($lockretry < 10) {
12042 $sth = $dbh->prepare($stmt);
12043 if($sth
12045 eval { $rv = $sth->execute(@_) }) {
12046 last;
12047 } else {
12048 if($@ =~ /no such table|Table .* doesn.t exist|relation ".*" does not exist/
12050 $DBI::errstr =~ /no such table|Table .* doesn.t exist|relation ".*" does not exist/) {
12051 # This is fine:
12052 # It is just a worker that reported back too late -
12053 # another worker had finished the job first
12054 # and the table was then dropped
12055 $rv = $sth = 0;
12056 last;
12058 if($DBI::errstr =~ /locked/) {
12059 ::debug("sql", "Lock retry: $lockretry");
12060 $lockretry++;
12061 ::usleep(rand()*300);
12062 } elsif(not $sth) {
12063 # Try again
12064 $lockretry++;
12065 } else {
12066 ::error($DBI::errstr);
12067 ::wait_and_exit(255);
12071 if($lockretry >= 10) {
12072 ::die_bug("retry > 10: $DBI::errstr");
12074 if($rv < 0 and $DBI::errstr){
12075 ::error($DBI::errstr);
12076 ::wait_and_exit(255);
12078 return $sth;
12081 sub get($) {
12082 my $self = shift;
12083 my $sth = $self->run(@_);
12084 my @retval;
12085 # If $sth = 0 it means the table was dropped by another process
12086 while($sth) {
12087 my @row = $sth->fetchrow_array();
12088 @row or last;
12089 push @retval, \@row;
12091 return \@retval;
12094 sub table($) {
12095 my $self = shift;
12096 return $self->{'table'};
12099 sub append($) {
12100 my $self = shift;
12101 return $self->{'append'};
12104 sub update($) {
12105 my $self = shift;
12106 my $stmt = shift;
12107 my $table = $self->table();
12108 $self->run("UPDATE $table $stmt",@_);
12111 sub output($) {
12112 my $self = shift;
12113 my $commandline = shift;
12115 $self->update("SET Stdout = ?, Stderr = ? WHERE Seq = ".
12116 $commandline->seq(),
12117 join("",@{$commandline->{'output'}{1}}),
12118 join("",@{$commandline->{'output'}{2}}));
12121 sub max_number_of_args($) {
12122 # Maximal number of args for this table
12123 my $self = shift;
12124 if(not $self->{'max_number_of_args'}) {
12125 # Read the number of args from the SQL table
12126 my $table = $self->table();
12127 my $v = $self->get("SELECT * FROM $table LIMIT 1;");
12128 my @reserved_columns = qw(Seq Host Starttime JobRuntime Send
12129 Receive Exitval _Signal Command Stdout Stderr);
12130 if(not $v) {
12131 ::error("$table contains no records");
12133 # Count the number of Vx columns
12134 $self->{'max_number_of_args'} = $#{$v->[0]} - $#reserved_columns;
12136 return $self->{'max_number_of_args'};
12139 sub set_max_number_of_args($$) {
12140 my $self = shift;
12141 $self->{'max_number_of_args'} = shift;
12144 sub create_table($) {
12145 my $self = shift;
12146 if($self->append()) { return; }
12147 my $max_number_of_args = shift;
12148 $self->set_max_number_of_args($max_number_of_args);
12149 my $table = $self->table();
12150 $self->run(qq(DROP TABLE IF EXISTS $table;));
12151 # BIGINT and TEXT are not supported in these databases or are too small
12152 my %vartype = (
12153 "Oracle" => { "BIGINT" => "NUMBER(19,0)",
12154 "TEXT" => "CLOB", },
12155 "mysql" => { "TEXT" => "LONGTEXT", },
12156 "CSV" => { "BIGINT" => "INT",
12157 "FLOAT" => "REAL", },
12159 my $BIGINT = $vartype{$self->{'driver'}}{"BIGINT"} || "BIGINT";
12160 my $TEXT = $vartype{$self->{'driver'}}{"TEXT"} || "TEXT";
12161 my $FLOAT = $vartype{$self->{'driver'}}{"FLOAT"} || "FLOAT(44)";
12162 my $v_def = join "", map { "V$_ $TEXT," } (1..$self->max_number_of_args());
12163 $self->run(qq{CREATE TABLE $table
12164 (Seq $BIGINT,
12165 Host $TEXT,
12166 Starttime $FLOAT,
12167 JobRuntime $FLOAT,
12168 Send $BIGINT,
12169 Receive $BIGINT,
12170 Exitval $BIGINT,
12171 _Signal $BIGINT,
12172 Command $TEXT,}.
12173 $v_def.
12174 qq{Stdout $TEXT,
12175 Stderr $TEXT);});
12178 sub insert_records($) {
12179 my $self = shift;
12180 my $seq = shift;
12181 my $command_ref = shift;
12182 my $record_ref = shift;
12183 my $table = $self->table();
12184 # For SQL encode the command with \257 space as split points
12185 my $command = join("\257 ",@$command_ref);
12186 my @v_cols = map { ", V$_" } (1..$self->max_number_of_args());
12187 # Two extra value due to $seq, Exitval, Send
12188 my $v_vals = join ",", map { "?" } (1..$self->max_number_of_args()+4);
12189 $self->run("INSERT INTO $table (Seq,Command,Exitval,Send @v_cols) ".
12190 "VALUES ($v_vals);", $seq, $command, -1000,
12191 0, @$record_ref[1..$#$record_ref]);
12194 sub get_record($) {
12195 my $self = shift;
12196 my @retval;
12197 my $table = $self->table();
12198 my @v_cols = map { ", V$_" } (1..$self->max_number_of_args());
12199 my $v = $self->get("SELECT Seq, Command @v_cols FROM $table ".
12200 "WHERE Exitval = -1000 ORDER BY Seq LIMIT 1;");
12201 if($v->[0]) {
12202 my $val_ref = $v->[0];
12203 # Mark record as taken
12204 my $seq = shift @$val_ref;
12205 # Save the sequence number to use when running the job
12206 $SQL::next_seq = $seq;
12207 $self->update("SET Exitval = ? WHERE Seq = ".$seq, -1220);
12208 my @command = split /\257 /, shift @$val_ref;
12209 $SQL::command_ref = \@command;
12210 for (@$val_ref) {
12211 push @retval, Arg->new($_);
12214 if(@retval) {
12215 return \@retval;
12216 } else {
12217 return undef;
12221 sub total_jobs($) {
12222 my $self = shift;
12223 my $table = $self->table();
12224 my $v = $self->get("SELECT count(*) FROM $table;");
12225 if($v->[0]) {
12226 return $v->[0]->[0];
12227 } else {
12228 ::die_bug("SQL::total_jobs");
12232 sub max_seq($) {
12233 my $self = shift;
12234 my $table = $self->table();
12235 my $v = $self->get("SELECT max(Seq) FROM $table;");
12236 if($v->[0]) {
12237 return $v->[0]->[0];
12238 } else {
12239 ::die_bug("SQL::max_seq");
12243 sub finished($) {
12244 # Check if there are any jobs left in the SQL table that do not
12245 # have a "real" exitval
12246 my $self = shift;
12247 if($opt::wait or $Global::start_sqlworker) {
12248 my $table = $self->table();
12249 my $rv = $self->get("select Seq,Exitval from $table ".
12250 "where Exitval <= -1000 limit 1");
12251 return not $rv->[0];
12252 } else {
12253 return 1;
12257 package Semaphore;
12259 # This package provides a counting semaphore
12261 # If a process dies without releasing the semaphore the next process
12262 # that needs that entry will clean up dead semaphores
12264 # The semaphores are stored in $PARALLEL_HOME/semaphores/id-<name> Each
12265 # file in $PARALLEL_HOME/semaphores/id-<name>/ is the process ID of the
12266 # process holding the entry. If the process dies, the entry can be
12267 # taken by another process.
12269 sub new($) {
12270 my $class = shift;
12271 my $id = shift;
12272 my $count = shift;
12273 $id =~ s/([^-_a-z0-9])/unpack("H*",$1)/ige; # Convert non-word chars to hex
12274 $id = "id-".$id; # To distinguish it from a process id
12275 my $parallel_locks = $Global::cache_dir . "/semaphores";
12276 -d $parallel_locks or ::mkdir_or_die($parallel_locks);
12277 my $lockdir = "$parallel_locks/$id";
12279 my $lockfile = $lockdir.".lock";
12280 if($count < 1) { ::die_bug("semaphore-count: $count"); }
12281 return bless {
12282 'lockfile' => $lockfile,
12283 'lockfh' => Symbol::gensym(),
12284 'lockdir' => $lockdir,
12285 'id' => $id,
12286 'idfile' => $lockdir."/".$id,
12287 'pid' => $$,
12288 'pidfile' => $lockdir."/".$$.'@'.::hostname(),
12289 'count' => $count + 1 # nlinks returns a link for the 'id-' as well
12290 }, ref($class) || $class;
12293 sub remove_dead_locks($) {
12294 my $self = shift;
12295 my $lockdir = $self->{'lockdir'};
12297 for my $d (glob "$lockdir/*") {
12298 $d =~ m:$lockdir/([0-9]+)\@([-\._a-z0-9]+)$:o or next;
12299 my ($pid, $host) = ($1, $2);
12300 if($host eq ::hostname()) {
12301 if(kill 0, $pid) {
12302 ::debug("sem", "Alive: $pid $d\n");
12303 } else {
12304 ::debug("sem", "Dead: $d\n");
12305 ::rm($d);
12311 sub acquire($) {
12312 my $self = shift;
12313 my $sleep = 1; # 1 ms
12314 my $start_time = time;
12315 while(1) {
12316 # Can we get a lock?
12317 $self->atomic_link_if_count_less_than() and last;
12318 $self->remove_dead_locks();
12319 # Retry slower and slower up to 1 second
12320 $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep);
12321 # Random to avoid every sleeping job waking up at the same time
12322 ::usleep(rand()*$sleep);
12323 if($opt::semaphoretimeout) {
12324 if($opt::semaphoretimeout > 0
12326 time - $start_time > $opt::semaphoretimeout) {
12327 # Timeout: Take the semaphore anyway
12328 ::warning("Semaphore timed out. Stealing the semaphore.");
12329 if(not -e $self->{'idfile'}) {
12330 open (my $fh, ">", $self->{'idfile'}) or
12331 ::die_bug("timeout_write_idfile: $self->{'idfile'}");
12332 close $fh;
12334 link $self->{'idfile'}, $self->{'pidfile'};
12335 last;
12337 if($opt::semaphoretimeout < 0
12339 time - $start_time > -$opt::semaphoretimeout) {
12340 # Timeout: Exit
12341 ::warning("Semaphore timed out. Exiting.");
12342 exit(1);
12343 last;
12347 ::debug("sem", "acquired $self->{'pid'}\n");
12350 sub release($) {
12351 my $self = shift;
12352 ::rm($self->{'pidfile'});
12353 if($self->nlinks() == 1) {
12354 # This is the last link, so atomic cleanup
12355 $self->lock();
12356 if($self->nlinks() == 1) {
12357 ::rm($self->{'idfile'});
12358 rmdir $self->{'lockdir'};
12360 $self->unlock();
12362 ::debug("run", "released $self->{'pid'}\n");
12365 sub pid_change($) {
12366 # This should do what release()+acquire() would do without having
12367 # to re-acquire the semaphore
12368 my $self = shift;
12370 my $old_pidfile = $self->{'pidfile'};
12371 $self->{'pid'} = $$;
12372 $self->{'pidfile'} = $self->{'lockdir'}."/".$$.'@'.::hostname();
12373 my $retval = link $self->{'idfile'}, $self->{'pidfile'};
12374 ::debug("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n");
12375 ::rm($old_pidfile);
12378 sub atomic_link_if_count_less_than($) {
12379 # Link $file1 to $file2 if nlinks to $file1 < $count
12380 my $self = shift;
12381 my $retval = 0;
12382 $self->lock();
12383 my $nlinks = $self->nlinks();
12384 ::debug("sem","$nlinks<$self->{'count'} ");
12385 if($nlinks < $self->{'count'}) {
12386 -d $self->{'lockdir'} or ::mkdir_or_die($self->{'lockdir'});
12387 if(not -e $self->{'idfile'}) {
12388 open (my $fh, ">", $self->{'idfile'}) or
12389 ::die_bug("write_idfile: $self->{'idfile'}");
12390 close $fh;
12392 $retval = link $self->{'idfile'}, $self->{'pidfile'};
12393 ::debug("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n");
12395 $self->unlock();
12396 ::debug("sem", "atomic $retval");
12397 return $retval;
12400 sub nlinks($) {
12401 my $self = shift;
12402 if(-e $self->{'idfile'}) {
12403 return (stat(_))[3];
12404 } else {
12405 return 0;
12409 sub lock($) {
12410 my $self = shift;
12411 my $sleep = 100; # 100 ms
12412 my $total_sleep = 0;
12413 $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
12414 my $locked = 0;
12415 while(not $locked) {
12416 if(tell($self->{'lockfh'}) == -1) {
12417 # File not open
12418 open($self->{'lockfh'}, ">", $self->{'lockfile'})
12419 or ::debug("run", "Cannot open $self->{'lockfile'}");
12421 if($self->{'lockfh'}) {
12422 # File is open
12423 chmod 0666, $self->{'lockfile'}; # assuming you want it a+rw
12424 if(flock($self->{'lockfh'}, LOCK_EX()|LOCK_NB())) {
12425 # The file is locked: No need to retry
12426 $locked = 1;
12427 last;
12428 } else {
12429 if ($! =~ m/Function not implemented/) {
12430 ::warning("flock: $!",
12431 "Will wait for a random while.");
12432 ::usleep(rand(5000));
12433 # File cannot be locked: No need to retry
12434 $locked = 2;
12435 last;
12439 # Locking failed in first round
12440 # Sleep and try again
12441 $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep);
12442 # Random to avoid every sleeping job waking up at the same time
12443 ::usleep(rand()*$sleep);
12444 $total_sleep += $sleep;
12445 if($opt::semaphoretimeout) {
12446 if($opt::semaphoretimeout > 0
12448 $total_sleep/1000 > $opt::semaphoretimeout) {
12449 # Timeout: Take the semaphore anyway
12450 ::warning("Semaphore timed out. Taking the semaphore.");
12451 $locked = 3;
12452 last;
12454 if($opt::semaphoretimeout < 0
12456 $total_sleep/1000 > -$opt::semaphoretimeout) {
12457 # Timeout: Exit
12458 ::warning("Semaphore timed out. Exiting.");
12459 $locked = 4;
12460 last;
12462 } else {
12463 if($total_sleep/1000 > 30) {
12464 ::warning("Semaphore stuck for 30 seconds. ".
12465 "Consider using --semaphoretimeout.");
12469 ::debug("run", "locked $self->{'lockfile'}");
12472 sub unlock($) {
12473 my $self = shift;
12474 ::rm($self->{'lockfile'});
12475 close $self->{'lockfh'};
12476 ::debug("run", "unlocked\n");
12479 # Keep perl -w happy
12481 $opt::x = $Semaphore::timeout = $Semaphore::wait =
12482 $Job::file_descriptor_warning_printed = $Global::envdef = @Arg::arg =
12483 $Global::max_slot_number = $opt::session;
12485 package main;
12487 sub main() {
12488 save_stdin_stdout_stderr();
12489 save_original_signal_handler();
12490 parse_options();
12491 ::debug("init", "Open file descriptors: ", join(" ",keys %Global::fd), "\n");
12492 my $number_of_args;
12493 if($Global::max_number_of_args) {
12494 $number_of_args = $Global::max_number_of_args;
12495 } elsif ($opt::X or $opt::m or $opt::xargs) {
12496 $number_of_args = undef;
12497 } else {
12498 $number_of_args = 1;
12501 my @command = @ARGV;
12502 my @input_source_fh;
12503 if($opt::pipepart) {
12504 if($opt::tee) {
12505 @input_source_fh = map { open_or_exit($_) } @opt::a;
12506 # Remove the first: It will be the file piped.
12507 shift @input_source_fh;
12508 if(not @input_source_fh and not $opt::pipe) {
12509 @input_source_fh = (*STDIN);
12511 } else {
12512 # -a is used for data - not for command line args
12513 @input_source_fh = map { open_or_exit($_) } "/dev/null";
12515 } else {
12516 @input_source_fh = map { open_or_exit($_) } @opt::a;
12517 if(not @input_source_fh and not $opt::pipe) {
12518 @input_source_fh = (*STDIN);
12521 if($opt::sqlmaster) {
12522 # Create SQL table to hold joblog + output
12523 $Global::sql->create_table($#input_source_fh+1);
12524 if($opt::sqlworker) {
12525 # Start a real --sqlworker in the background later
12526 $Global::start_sqlworker = 1;
12527 $opt::sqlworker = undef;
12531 if($opt::skip_first_line) {
12532 # Skip the first line for the first file handle
12533 my $fh = $input_source_fh[0];
12534 <$fh>;
12537 set_input_source_header(\@command,\@input_source_fh);
12539 if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) {
12540 # Parallel check all hosts are up. Remove hosts that are down
12541 filter_hosts();
12544 if($opt::nonall or $opt::onall) {
12545 onall(\@input_source_fh,@command);
12546 wait_and_exit(min(undef_as_zero($Global::exitstatus),254));
12549 $Global::JobQueue = JobQueue->new(
12550 \@command,\@input_source_fh,$Global::ContextReplace,
12551 $number_of_args,\@Global::transfer_files,\@Global::ret_files);
12553 if($opt::pipepart) {
12554 pipepart_setup();
12555 } elsif($opt::pipe and $opt::tee) {
12556 pipe_tee_setup();
12557 } elsif($opt::pipe and $opt::shard) {
12558 pipe_shard_setup();
12561 if($opt::groupby) {
12562 group_by_stdin_filter();
12564 if($opt::eta or $opt::bar or $opt::shuf or $Global::halt_pct) {
12565 # Count the number of jobs or shuffle all jobs
12566 # before starting any.
12567 # Must be done after ungetting any --pipepart jobs.
12568 $Global::JobQueue->total_jobs();
12570 # Compute $Global::max_jobs_running
12571 # Must be done after ungetting any --pipepart jobs.
12572 max_jobs_running();
12574 init_run_jobs();
12575 my $sem;
12576 if($Global::semaphore) {
12577 $sem = acquire_semaphore();
12579 $SIG{TERM} = $Global::original_sig{TERM};
12580 $SIG{HUP} = \&start_no_new_jobs;
12582 if($opt::tee or $opt::shard) {
12583 # All jobs must be running in parallel for --tee/--shard
12584 while(start_more_jobs()) {}
12585 $Global::start_no_new_jobs = 1;
12586 if(not $Global::JobQueue->empty()) {
12587 ::error("--tee requres --jobs to be higher. Try --jobs 0.");
12588 ::wait_and_exit(255);
12590 } elsif($opt::pipe and not $opt::pipepart) {
12591 # Fill all jobslots
12592 while(start_more_jobs()) {}
12593 spreadstdin();
12594 } else {
12595 # Reap one - start one
12596 while(reaper() + start_more_jobs()) {}
12598 ::debug("init", "Start draining\n");
12599 drain_job_queue(@command);
12600 ::debug("init", "Done draining\n");
12601 reapers();
12602 ::debug("init", "Done reaping\n");
12603 if($Global::semaphore) {
12604 $sem->release();
12606 cleanup();
12607 ::debug("init", "Halt\n");
12608 halt();
12611 main();