Released as 20210922 ('Vindelev')
[parallel.git] / src / parallel
blob976861495ea08af5362999eca22b30fb389def49
1 #!/usr/bin/env perl
3 # Copyright (C) 2007-2021 Ole Tange, http://ole.tange.dk and Free
4 # Software Foundation, Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, see <https://www.gnu.org/licenses/>
18 # or write to the Free Software Foundation, Inc., 51 Franklin St,
19 # Fifth Floor, Boston, MA 02110-1301 USA
21 # SPDX-FileCopyrightText: 2021 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc.
22 # SPDX-License-Identifier: GPL-3.0-or-later
24 # open3 used in Job::start
25 use IPC::Open3;
26 use POSIX;
27 # gensym used in Job::start
28 use Symbol qw(gensym);
29 # tempfile used in Job::start
30 use File::Temp qw(tempfile tempdir);
31 # mkpath used in openresultsfile
32 use File::Path;
33 # GetOptions used in get_options_from_array
34 use Getopt::Long;
35 # Used to ensure code quality
36 use strict;
37 use File::Basename;
39 sub set_input_source_header($$) {
40 my ($command_ref,$input_source_fh_ref) = @_;
41 if($opt::header and not $opt::pipe) {
42 # split with colsep or \t
43 # $header force $colsep = \t if undef?
44 my $delimiter = defined $opt::colsep ? $opt::colsep : "\t";
45 # regexp for {=
46 my $left = "\Q$Global::parensleft\E";
47 my $l = $Global::parensleft;
48 # regexp for =}
49 my $right = "\Q$Global::parensright\E";
50 my $r = $Global::parensright;
51 my $id = 1;
52 for my $fh (@$input_source_fh_ref) {
53 my $line = <$fh>;
54 chomp($line);
55 $line =~ s/\r$//;
56 ::debug("init", "Delimiter: '$delimiter'");
57 for my $s (split /$delimiter/o, $line) {
58 ::debug("init", "Colname: '$s'");
59 # Replace {colname} with {2}
60 for(@$command_ref, @Global::ret_files,
61 @Global::transfer_files, $opt::tagstring,
62 $opt::workdir, $opt::results, $opt::retries,
63 @Global::template_contents, @Global::template_names,
64 @opt::filter) {
65 # Skip if undefined
66 $_ or next;
67 s:\{$s(|/|//|\.|/\.)\}:\{$id$1\}:g;
68 # {=header1 ... =} => {=1 ... =}
69 s:$left $s (.*?) $right:$l$id$1$r:gx;
71 $Global::input_source_header{$id} = $s;
72 $id++;
75 } else {
76 my $id = 1;
77 for my $fh (@$input_source_fh_ref) {
78 $Global::input_source_header{$id} = $id;
79 $id++;
84 sub max_jobs_running() {
85 # Compute $Global::max_jobs_running as the max number of jobs
86 # running on each sshlogin.
87 # Returns:
88 # $Global::max_jobs_running
89 if(not $Global::max_jobs_running) {
90 for my $sshlogin (values %Global::host) {
91 $sshlogin->max_jobs_running();
94 if(not $Global::max_jobs_running) {
95 ::error("Cannot run any jobs.");
96 wait_and_exit(255);
98 return $Global::max_jobs_running;
101 sub halt() {
102 # Compute exit value,
103 # wait for children to complete
104 # and exit
105 if($opt::halt and $Global::halt_when ne "never") {
106 if(not defined $Global::halt_exitstatus) {
107 if($Global::halt_pct) {
108 $Global::halt_exitstatus =
109 ::ceil($Global::total_failed /
110 ($Global::total_started || 1) * 100);
111 } elsif($Global::halt_count) {
112 $Global::halt_exitstatus =
113 ::min(undef_as_zero($Global::total_failed),101);
116 wait_and_exit($Global::halt_exitstatus);
117 } else {
118 wait_and_exit(min(undef_as_zero($Global::exitstatus),101));
123 sub __PIPE_MODE__() {}
126 sub pipepart_setup() {
127 # Compute the blocksize
128 # Generate the commands to extract the blocks
129 # Push the commands on queue
130 # Changes:
131 # @Global::cat_prepends
132 # $Global::JobQueue
133 if($opt::tee) {
134 # Prepend each command with
135 # < file
136 my $cat_string = "< ".Q($opt::a[0]);
137 for(1..$Global::JobQueue->total_jobs()) {
138 push @Global::cat_appends, $cat_string;
139 push @Global::cat_prepends, "";
141 } else {
142 if(not $opt::blocksize) {
143 # --blocksize with 10 jobs per jobslot
144 $opt::blocksize = -10;
146 if($opt::roundrobin) {
147 # --blocksize with 1 job per jobslot
148 $opt::blocksize = -1;
150 if($opt::blocksize < 0) {
151 my $size = 0;
152 # Compute size of -a
153 for(@opt::a) {
154 if(-f $_) {
155 $size += -s $_;
156 } elsif(-b $_) {
157 $size += size_of_block_dev($_);
158 } elsif(-e $_) {
159 ::error("$_ is neither a file nor a block device");
160 wait_and_exit(255);
161 } else {
162 ::error("File not found: $_");
163 wait_and_exit(255);
166 # Run in total $job_slots*(- $blocksize) jobs
167 # Set --blocksize = size / no of proc / (- $blocksize)
168 $Global::dummy_jobs = 1;
169 $Global::blocksize = 1 +
170 int($size / max_jobs_running() /
171 -multiply_binary_prefix($opt::blocksize));
173 @Global::cat_prepends = map { pipe_part_files($_) } @opt::a;
174 # Unget the empty arg as many times as there are parts
175 $Global::JobQueue->{'commandlinequeue'}{'arg_queue'}->unget(
176 map { [Arg->new("\0noarg")] } @Global::cat_prepends
181 sub pipe_tee_setup() {
182 # Create temporary fifos
183 # Run 'tee fifo1 fifo2 fifo3 ... fifoN' in the background
184 # This will spread the input to fifos
185 # Generate commands that reads from fifo1..N:
186 # cat fifo | user_command
187 # Changes:
188 # @Global::cat_prepends
189 my @fifos;
190 for(1..$Global::JobQueue->total_jobs()) {
191 push @fifos, tmpfifo();
193 # cat foo | tee fifo1 fifo2 fifo3 fifo4 fifo5 > /dev/null
194 if(not fork()){
195 # Test if tee supports --output-error=warn-nopipe
196 `echo | tee --output-error=warn-nopipe /dev/null >/dev/null 2>/dev/null`;
197 my $opt = $? ? "" : "--output-error=warn-nopipe";
198 ::debug("init","tee $opt");
199 # Let tee inherit our stdin
200 # and redirect stdout to null
201 open STDOUT, ">","/dev/null";
202 if($opt) {
203 exec "tee", $opt, @fifos;
204 } else {
205 exec "tee", @fifos;
208 # For each fifo
209 # (rm fifo1; grep 1) < fifo1
210 # (rm fifo2; grep 2) < fifo2
211 # (rm fifo3; grep 3) < fifo3
212 # Remove the tmpfifo as soon as it is open
213 @Global::cat_prepends = map { "(rm $_;" } @fifos;
214 @Global::cat_appends = map { ") < $_" } @fifos;
218 sub parcat_script() {
219 # TODO if script fails: Use parallel -j0 --plain --lb cat ::: fifos
220 my $script = q'{
221 use POSIX qw(:errno_h);
222 use IO::Select;
223 use strict;
224 use threads;
225 use Thread::Queue;
226 use Fcntl qw(:DEFAULT :flock);
228 my $opened :shared;
229 my $q = Thread::Queue->new();
230 my $okq = Thread::Queue->new();
231 my @producers;
233 if(not @ARGV) {
234 if(-t *STDIN) {
235 print "Usage:\n";
236 print " parcat file(s)\n";
237 print " cat argfile | parcat\n";
238 } else {
239 # Read arguments from stdin
240 chomp(@ARGV = <STDIN>);
243 my $files_to_open = 0;
244 # Default: fd = stdout
245 my $fd = 1;
246 for (@ARGV) {
247 # --rm = remove file when opened
248 /^--rm$/ and do { $opt::rm = 1; next; };
249 # -1 = output to fd 1, -2 = output to fd 2
250 /^-(\d+)$/ and do { $fd = $1; next; };
251 push @producers, threads->create("producer", $_, $fd);
252 $files_to_open++;
255 sub producer {
256 # Open a file/fifo, set non blocking, enqueue fileno of the file handle
257 my $file = shift;
258 my $output_fd = shift;
259 open(my $fh, "<", $file) || do {
260 print STDERR "parcat: Cannot open $file\n";
261 exit(1);
263 # Remove file when it has been opened
264 if($opt::rm) {
265 unlink $file;
267 set_fh_non_blocking($fh);
268 $opened++;
269 # Pass the fileno to parent
270 $q->enqueue(fileno($fh),$output_fd);
271 # Get an OK that the $fh is opened and we can release the $fh
272 while(1) {
273 my $ok = $okq->dequeue();
274 if($ok == fileno($fh)) { last; }
275 # Not ours - very unlikely to happen
276 $okq->enqueue($ok);
278 return;
281 my $s = IO::Select->new();
282 my %buffer;
284 sub add_file {
285 my $infd = shift;
286 my $outfd = shift;
287 open(my $infh, "<&=", $infd) || die;
288 open(my $outfh, ">&=", $outfd) || die;
289 $s->add($infh);
290 # Tell the producer now opened here and can be released
291 $okq->enqueue($infd);
292 # Initialize the buffer
293 @{$buffer{$infh}{$outfd}} = ();
294 $Global::fh{$outfd} = $outfh;
297 sub add_files {
298 # Non-blocking dequeue
299 my ($infd,$outfd);
300 do {
301 ($infd,$outfd) = $q->dequeue_nb(2);
302 if(defined($outfd)) {
303 add_file($infd,$outfd);
305 } while(defined($outfd));
308 sub add_files_block {
309 # Blocking dequeue
310 my ($infd,$outfd) = $q->dequeue(2);
311 add_file($infd,$outfd);
315 my $fd;
316 my (@ready,$infh,$rv,$buf);
317 do {
318 # Wait until at least one file is opened
319 add_files_block();
320 while($q->pending or keys %buffer) {
321 add_files();
322 while(keys %buffer) {
323 @ready = $s->can_read(0.01);
324 if(not @ready) {
325 add_files();
327 for $infh (@ready) {
328 # There is only one key, namely the output file descriptor
329 for my $outfd (keys %{$buffer{$infh}}) {
330 # TODO test if 65536 is optimal (2^17 is used elsewhere)
331 $rv = sysread($infh, $buf, 65536);
332 if (!$rv) {
333 if($! == EAGAIN) {
334 # Would block: Nothing read
335 next;
336 } else {
337 # Nothing read, but would not block:
338 # This file is done
339 $s->remove($infh);
340 for(@{$buffer{$infh}{$outfd}}) {
341 syswrite($Global::fh{$outfd},$_);
343 delete $buffer{$infh};
344 # Closing the $infh causes it to block
345 # close $infh;
346 add_files();
347 next;
350 # Something read.
351 # Find \n or \r for full line
352 my $i = (rindex($buf,"\n")+1);
353 if($i) {
354 # Print full line
355 for(@{$buffer{$infh}{$outfd}}, substr($buf,0,$i)) {
356 syswrite($Global::fh{$outfd},$_);
358 # @buffer = remaining half line
359 $buffer{$infh}{$outfd} = [substr($buf,$i,$rv-$i)];
360 } else {
361 # Something read, but not a full line
362 push @{$buffer{$infh}{$outfd}}, $buf;
364 redo;
369 } while($opened < $files_to_open);
371 for (@producers) {
372 $_->join();
375 sub set_fh_non_blocking {
376 # Set filehandle as non-blocking
377 # Inputs:
378 # $fh = filehandle to be blocking
379 # Returns:
380 # N/A
381 my $fh = shift;
382 my $flags;
383 fcntl($fh, &F_GETFL, $flags) || die $!; # Get the current flags on the filehandle
384 $flags |= &O_NONBLOCK; # Add non-blocking to the flags
385 fcntl($fh, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle
388 return ::spacefree(3, $script);
391 sub sharder_script() {
392 my $script = q{
393 use B;
394 # Column separator
395 my $sep = shift;
396 # Which columns to shard on (count from 1)
397 my $col = shift;
398 # Which columns to shard on (count from 0)
399 my $col0 = $col - 1;
400 # Perl expression
401 my $perlexpr = shift;
402 my $bins = @ARGV;
403 # Open fifos for writing, fh{0..$bins}
404 my $t = 0;
405 my %fh;
406 for(@ARGV) {
407 open $fh{$t++}, ">", $_;
408 # open blocks until it is opened by reader
409 # so unlink only happens when it is ready
410 unlink $_;
412 if($perlexpr) {
413 my $subref = eval("sub { no strict; no warnings; $perlexpr }");
414 while(<STDIN>) {
415 # Split into $col columns (no need to split into more)
416 @F = split $sep, $_, $col+1;
418 local $_ = $F[$col0];
419 &$subref();
420 $fh = $fh{ hex(B::hash($_))%$bins };
422 print $fh $_;
424 } else {
425 while(<STDIN>) {
426 # Split into $col columns (no need to split into more)
427 @F = split $sep, $_, $col+1;
428 $fh = $fh{ hex(B::hash($F[$col0]))%$bins };
429 print $fh $_;
432 # Close all open fifos
433 close values %fh;
435 return ::spacefree(1, $script);
438 sub binner_script() {
439 my $script = q{
440 use B;
441 # Column separator
442 my $sep = shift;
443 # Which columns to shard on (count from 1)
444 my $col = shift;
445 # Which columns to shard on (count from 0)
446 my $col0 = $col - 1;
447 # Perl expression
448 my $perlexpr = shift;
449 my $bins = @ARGV;
450 # Open fifos for writing, fh{0..$bins}
451 my $t = 0;
452 my %fh;
453 # Let the last output fifo be the 0'th
454 open $fh{$t++}, ">", pop @ARGV;
455 for(@ARGV) {
456 open $fh{$t++}, ">", $_;
457 # open blocks until it is opened by reader
458 # so unlink only happens when it is ready
459 unlink $_;
461 if($perlexpr) {
462 my $subref = eval("sub { no strict; no warnings; $perlexpr }");
463 while(<STDIN>) {
464 # Split into $col columns (no need to split into more)
465 @F = split $sep, $_, $col+1;
467 local $_ = $F[$col0];
468 &$subref();
469 $fh = $fh{ $_%$bins };
471 print $fh $_;
473 } else {
474 while(<STDIN>) {
475 # Split into $col columns (no need to split into more)
476 @F = split $sep, $_, $col+1;
477 $fh = $fh{ $F[$col0]%$bins };
478 print $fh $_;
481 # Close all open fifos
482 close values %fh;
484 return ::spacefree(1, $script);
487 sub pipe_shard_setup() {
488 # Create temporary fifos
489 # Run 'shard.pl sep col fifo1 fifo2 fifo3 ... fifoN' in the background
490 # This will spread the input to fifos
491 # Generate commands that reads from fifo1..N:
492 # cat fifo | user_command
493 # Changes:
494 # @Global::cat_prepends
495 my @shardfifos;
496 my @parcatfifos;
497 # TODO $opt::jobs should be evaluated (100%)
498 # TODO $opt::jobs should be number of total_jobs if there are argugemts
499 my $njobs = $opt::jobs;
500 for my $m (0..$njobs-1) {
501 for my $n (0..$njobs-1) {
502 # sharding to A B C D
503 # parcatting all As together
504 $parcatfifos[$n][$m] = $shardfifos[$m][$n] = tmpfifo();
507 my $shardbin = ($opt::shard || $opt::bin);
508 my $script;
509 if($opt::bin) {
510 $script = binner_script();
511 } else {
512 $script = sharder_script();
515 # cat foo | sharder sep col fifo1 fifo2 fifo3 ... fifoN
517 if($shardbin =~ /^[a-z_][a-z_0-9]*(\s|$)/i) {
518 # Group by column name
519 # (Yes, this will also wrongly match a perlexpr like: chop)
520 my($read,$char,@line);
521 # A full line, but nothing more (the rest must be read by the child)
522 # $Global::header used to prepend block to each job
523 do {
524 $read = sysread(STDIN,$char,1);
525 push @line, $char;
526 } while($read and $char ne "\n");
527 $Global::header = join "", @line;
529 my ($col, $perlexpr, $subref) =
530 column_perlexpr($shardbin, $Global::header, $opt::colsep);
531 if(not fork()) {
532 # Let the sharder inherit our stdin
533 # and redirect stdout to null
534 open STDOUT, ">","/dev/null";
535 # The PERL_HASH_SEED must be the same for all sharders
536 # so B::hash will return the same value for any given input
537 $ENV{'PERL_HASH_SEED'} = $$;
538 exec qw(parallel --block 100k -q --pipe -j), $njobs,
539 qw(--roundrobin -u perl -e), $script, ($opt::colsep || ","),
540 $col, $perlexpr, '{}', (map { (':::+', @{$_}) } @parcatfifos);
542 # For each fifo
543 # (rm fifo1; grep 1) < fifo1
544 # (rm fifo2; grep 2) < fifo2
545 # (rm fifo3; grep 3) < fifo3
546 my $parcat = Q(parcat_script());
547 if(not $parcat) {
548 ::error("'parcat' must be in path.");
549 ::wait_and_exit(255);
551 @Global::cat_prepends = map { "perl -e $parcat @$_ | " } @parcatfifos;
554 sub pipe_part_files(@) {
555 # Given the bigfile
556 # find header and split positions
557 # make commands that 'cat's the partial file
558 # Input:
559 # $file = the file to read
560 # Returns:
561 # @commands that will cat_partial each part
562 my ($file) = @_;
563 my $buf = "";
564 if(not -f $file and not -b $file) {
565 ::error("--pipepart only works on seekable files, not streams/pipes.",
566 "$file is not a seekable file.");
567 ::wait_and_exit(255);
569 my $header = find_header(\$buf,open_or_exit($file));
570 # find positions
571 my @pos = find_split_positions($file,int($Global::blocksize),$header);
572 # Make @cat_prepends
573 my @cat_prepends = ();
574 for(my $i=0; $i<$#pos; $i++) {
575 push(@cat_prepends,
576 cat_partial($file, 0, length($header), $pos[$i], $pos[$i+1]));
578 return @cat_prepends;
581 sub find_header($$) {
582 # Compute the header based on $opt::header
583 # Input:
584 # $buf_ref = reference to read-in buffer
585 # $fh = filehandle to read from
586 # Uses:
587 # $opt::header
588 # $Global::blocksize
589 # $Global::header
590 # Returns:
591 # $header string
592 my ($buf_ref, $fh) = @_;
593 my $header = "";
594 # $Global::header may be set in group_by_loop()
595 if($Global::header) { return $Global::header }
596 if($opt::header) {
597 if($opt::header eq ":") { $opt::header = "(.*\n)"; }
598 # Number = number of lines
599 $opt::header =~ s/^(\d+)$/"(.*\n)"x$1/e;
600 while(sysread($fh,$$buf_ref,int($Global::blocksize),length $$buf_ref)) {
601 if($$buf_ref =~ s/^($opt::header)//) {
602 $header = $1;
603 last;
607 return $header;
610 sub find_split_positions($$$) {
611 # Find positions in bigfile where recend is followed by recstart
612 # Input:
613 # $file = the file to read
614 # $block = (minimal) --block-size of each chunk
615 # $header = header to be skipped
616 # Uses:
617 # $opt::recstart
618 # $opt::recend
619 # Returns:
620 # @positions of block start/end
621 my($file, $block, $header) = @_;
622 my $headerlen = length $header;
623 my $size = -s $file;
624 if(-b $file) {
625 # $file is a blockdevice
626 $size = size_of_block_dev($file);
628 $block = int $block;
629 if($opt::groupby) {
630 return split_positions_for_group_by($file,$size,$block,$header);
632 # The optimal dd blocksize for mint, redhat, solaris, openbsd = 2^17..2^20
633 # The optimal dd blocksize for freebsd = 2^15..2^17
634 # The optimal dd blocksize for ubuntu (AMD6376) = 2^16
635 my $dd_block_size = 131072; # 2^17
636 my @pos;
637 my ($recstart,$recend) = recstartrecend();
638 my $recendrecstart = $recend.$recstart;
639 my $fh = ::open_or_exit($file);
640 push(@pos,$headerlen);
641 for(my $pos = $block+$headerlen; $pos < $size; $pos += $block) {
642 my $buf;
643 if($recendrecstart eq "") {
644 # records ends anywhere
645 push(@pos,$pos);
646 } else {
647 # Seek the the block start
648 if(not sysseek($fh, $pos, 0)) {
649 ::error("Cannot seek to $pos in $file");
650 edit(255);
652 while(sysread($fh,$buf,$dd_block_size,length $buf)) {
653 if($opt::regexp) {
654 # If match /$recend$recstart/ => Record position
655 if($buf =~ m:^(.*$recend)$recstart:os) {
656 # Start looking for next record _after_ this match
657 $pos += length($1);
658 push(@pos,$pos);
659 last;
661 } else {
662 # If match $recend$recstart => Record position
663 # TODO optimize to only look at the appended
664 # $dd_block_size + len $recendrecstart
665 # TODO increase $dd_block_size to optimize for longer records
666 my $i = index64(\$buf,$recendrecstart);
667 if($i != -1) {
668 # Start looking for next record _after_ this match
669 $pos += $i + length($recend);
670 push(@pos,$pos);
671 last;
677 if($pos[$#pos] != $size) {
678 # Last splitpoint was not at end of the file: add $size as the last
679 push @pos, $size;
681 close $fh;
682 return @pos;
685 sub split_positions_for_group_by($$$$) {
686 my($fh);
687 sub value_at($) {
688 my $pos = shift;
689 if($pos != 0) {
690 seek($fh, $pos-1, 0) || die;
691 # Read half line
692 <$fh>;
694 # Read full line
695 my $linepos = tell($fh);
696 $_ = <$fh>;
697 if(defined $_) {
698 # Not end of file
699 my @F;
700 if(defined $group_by::col) {
701 $opt::colsep ||= "\t";
702 @F = split /$opt::colsep/, $_;
703 $_ = $F[$group_by::col];
705 eval $group_by::perlexpr;
707 return ($_,$linepos);
710 sub binary_search_end($$$) {
711 my ($s,$spos,$epos) = @_;
712 # value_at($spos) == $s
713 # value_at($epos) != $s
714 my $posdif = $epos - $spos;
715 my ($v,$vpos);
716 while($posdif) {
717 ($v,$vpos) = value_at($spos+$posdif);
718 if($v eq $s) {
719 $spos = $vpos;
720 $posdif = $epos - $spos;
721 } else {
722 $epos = $vpos;
724 $posdif = int($posdif/2);
726 return($v,$vpos);
729 sub binary_search_start($$$) {
730 my ($s,$spos,$epos) = @_;
731 # value_at($spos) != $s
732 # value_at($epos) == $s
733 my $posdif = $epos - $spos;
734 my ($v,$vpos);
735 while($posdif) {
736 ($v,$vpos) = value_at($spos+$posdif);
737 if($v eq $s) {
738 $epos = $vpos;
739 } else {
740 $spos = $vpos;
741 $posdif = $epos - $spos;
743 $posdif = int($posdif/2);
745 return($v,$vpos);
748 my ($file,$size,$block,$header) = @_;
749 my ($a,$b,$c,$apos,$bpos,$cpos);
750 my @pos;
751 $fh = open_or_exit($file);
752 # Set $Global::group_by_column $Global::group_by_perlexpr
753 group_by_loop($fh,$opt::recsep);
754 # $xpos = linestart, $x = value at $xpos, $apos < $bpos < $cpos
755 $apos = length $header;
756 for(($a,$apos) = value_at($apos); $apos < $size;) {
757 push @pos, $apos;
758 $bpos = $apos + $block;
759 ($b,$bpos) = value_at($bpos);
760 if(eof($fh)) {
761 push @pos, $size; last;
763 $cpos = $bpos + $block;
764 ($c,$cpos) = value_at($cpos);
765 if($a eq $b) {
766 while($b eq $c) {
767 # Move bpos, cpos a block forward until $a == $b != $c
768 $bpos = $cpos;
769 $cpos += $block;
770 ($c,$cpos) = value_at($cpos);
771 if($cpos >= $size) {
772 $cpos = $size;
773 last;
776 # $a == $b != $c
777 # Binary search for $b ending between ($bpos,$cpos)
778 ($b,$bpos) = binary_search_end($b,$bpos,$cpos);
779 } else {
780 if($b eq $c) {
781 # $a != $b == $c
782 # Binary search for $b starting between ($apos,$bpos)
783 ($b,$bpos) = binary_search_start($b,$apos,$bpos);
784 } else {
785 # $a != $b != $c
786 # Binary search for $b ending between ($bpos,$cpos)
787 ($b,$bpos) = binary_search_end($b,$bpos,$cpos);
790 ($a,$apos) = ($b,$bpos);
792 if($pos[$#pos] != $size) {
793 # Last splitpoint was not at end of the file: add it
794 push @pos, $size;
796 return @pos;
799 sub cat_partial($@) {
800 # Efficient command to copy from byte X to byte Y
801 # Input:
802 # $file = the file to read
803 # ($start, $end, [$start2, $end2, ...]) = start byte, end byte
804 # Returns:
805 # Efficient command to copy $start..$end, $start2..$end2, ... to stdout
806 my($file, @start_end) = @_;
807 my($start, $i);
808 # Convert (start,end) to (start,len)
809 my @start_len = map {
810 if(++$i % 2) { $start = $_; } else { $_-$start }
811 } @start_end;
812 # This can read 7 GB/s using a single core
813 my $script = spacefree
816 while(@ARGV) {
817 sysseek(STDIN,shift,0) || die;
818 $left = shift;
819 while($read =
820 sysread(STDIN,$buf, $left > 131072 ? 131072 : $left)){
821 $left -= $read;
822 syswrite(STDOUT,$buf);
826 return "<". Q($file) .
827 " perl -e '$script' @start_len |";
830 sub column_perlexpr($$$) {
831 # Compute the column number (if any), perlexpression from combined
832 # string (such as --shard key, --groupby key, {=n perlexpr=}
833 # Input:
834 # $column_perlexpr = string with column and perl expression
835 # $header = header from input file (if column is column name)
836 # $colsep = column separator regexp
837 # Returns:
838 # $col = column number
839 # $perlexpr = perl expression
840 # $subref = compiled perl expression as sub reference
841 my ($column_perlexpr, $header, $colsep) = @_;
842 my ($col, $perlexpr, $subref);
843 if($column_perlexpr =~ /^[-a-z0-9_]+(\s|$)/i) {
844 # Column name/number (possibly prefix)
845 if($column_perlexpr =~ s/^(-?\d+)(\s|$)//) {
846 # Column number (possibly prefix)
847 $col = $1;
848 } elsif($column_perlexpr =~ s/^([a-z0-9_]+)(\s+|$)//i) {
849 # Column name (possibly prefix)
850 my $colname = $1;
851 # Split on --copsep pattern
852 my @headers = split /$colsep/, $header;
853 my %headers;
854 @headers{@headers} = (1..($#headers+1));
855 $col = $headers{$colname};
856 if(not defined $col) {
857 ::error("Column '$colname' $colsep not found in header",keys %headers);
858 ::wait_and_exit(255);
862 # What is left of $column_perlexpr is $perlexpr (possibly empty)
863 $perlexpr = $column_perlexpr;
864 $subref = eval("sub { no strict; no warnings; $perlexpr }");
865 return($col, $perlexpr, $subref);
868 sub group_by_loop($$) {
869 # Generate perl code for group-by loop
870 # Insert a $recsep when the column value changes
871 # The column value can be computed with $perlexpr
872 my($fh,$recsep) = @_;
873 my $groupby = $opt::groupby;
874 if($groupby =~ /^[a-z_][a-z_0-9]*(\s|$)/i) {
875 # Group by column name
876 # (Yes, this will also wrongly match a perlexpr like: chop)
877 my($read,$char,@line);
878 # Read a full line, but nothing more
879 # (the rest must be read by the child)
880 # $Global::header used to prepend block to each job
881 do {
882 $read = sysread($fh,$char,1);
883 push @line, $char;
884 } while($read and $char ne "\n");
885 $Global::header = join "", @line;
887 $opt::colsep ||= "\t";
888 ($group_by::col, $group_by::perlexpr, $group_by::subref) =
889 column_perlexpr($groupby, $Global::header, $opt::colsep);
890 # Numbered 0..n-1 due to being used by $F[n]
891 if($group_by::col) { $group_by::col--; }
893 my $loop = ::spacefree(0,q{
894 BEGIN{ $last = "RECSEP"; }
896 local $_=COLVALUE;
897 PERLEXPR;
898 if(($last) ne $_) {
899 print "RECSEP";
900 $last = $_;
904 if(defined $group_by::col) {
905 $loop =~ s/COLVALUE/\$F[$group_by::col]/g;
906 } else {
907 $loop =~ s/COLVALUE/\$_/g;
909 $loop =~ s/PERLEXPR/$group_by::perlexpr/g;
910 $loop =~ s/RECSEP/$recsep/g;
911 return $loop;
914 sub group_by_stdin_filter() {
915 # Record separator with 119 bit random value
916 $opt::recend = '';
917 $opt::recstart =
918 join "", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..20);
919 $opt::remove_rec_sep = 1;
920 my @filter;
921 push @filter, "perl";
922 if($opt::groupby =~ /^[a-z0-9_]+(\s|$)/i) {
923 # This is column number/name
924 # Use -a (auto-split)
925 push @filter, "-a";
926 $opt::colsep ||= "\t";
927 my $sep = $opt::colsep;
928 $sep =~ s/\t/\\t/g;
929 $sep =~ s/\"/\\"/g;
930 # man perlrun: -Fpattern [...] You can't use literal whitespace
931 $sep =~ s/ /\\040{1}/g;
932 push @filter, "-F$sep";
934 push @filter, "-pe";
935 push @filter, group_by_loop(*STDIN,$opt::recstart);
936 ::debug("init", "@filter\n");
937 open(STDIN, '-|', @filter) || die ("Cannot start @filter");
938 if(which("mbuffer")) {
939 # You get a speed up of 30% by going through mbuffer
940 open(STDIN, '-|', "mbuffer", "-q","-m6M","-b5") ||
941 die ("Cannot start mbuffer");
945 sub spreadstdin() {
946 # read a record
947 # Spawn a job and print the record to it.
948 # Uses:
949 # $Global::blocksize
950 # STDIN
951 # $opt::r
952 # $Global::max_lines
953 # $Global::max_number_of_args
954 # $opt::regexp
955 # $Global::start_no_new_jobs
956 # $opt::roundrobin
957 # %Global::running
958 # Returns: N/A
960 my $buf = "";
961 my ($recstart,$recend) = recstartrecend();
962 my $recendrecstart = $recend.$recstart;
963 my $chunk_number = 1;
964 my $one_time_through;
965 my $two_gb = 2**31-1;
966 my $blocksize = int($Global::blocksize);
967 my $in = *STDIN;
968 my $timeout = $Global::blocktimeout;
970 my $header = find_header(\$buf,$in);
971 my $anything_written;
972 my $eof;
973 my $garbage_read;
975 sub read_block() {
976 # Read a --blocksize from STDIN
977 # possibly interrupted by --blocktimeout
978 # Add up to the next full block
979 my $readsize = $blocksize - (length $buf) % $blocksize;
980 my ($nread,$alarm);
981 eval {
982 local $SIG{ALRM} = sub { die "alarm\n" }; # NB: \n required
983 # --blocktimeout (or 0 if not set)
984 alarm $timeout;
985 if($] >= 5.026) {
986 do {
987 $nread = sysread $in, $buf, $readsize, length $buf;
988 $readsize -= $nread;
989 } while($readsize and $nread);
990 } else {
991 # Less efficient reading, but 32-bit sysread compatible
992 do {
993 $nread = sysread($in,substr($buf,length $buf,0),$readsize,0);
994 $readsize -= $nread;
995 } while($readsize and $nread);
997 alarm 0;
999 if ($@) {
1000 die unless $@ eq "alarm\n"; # propagate unexpected errors
1001 $alarm = 1;
1002 } else {
1003 $alarm = 0;
1005 $eof = not ($nread or $alarm);
1008 sub pass_n_line_records() {
1009 # Pass records of N lines
1010 my $n_lines = $buf =~ tr/\n/\n/;
1011 my $last_newline_pos = rindex64(\$buf,"\n");
1012 # Go backwards until there are full n-line records
1013 while($n_lines % $Global::max_lines) {
1014 $n_lines--;
1015 $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1);
1017 # Chop at $last_newline_pos as that is where n-line record ends
1018 $anything_written +=
1019 write_record_to_pipe($chunk_number++,\$header,\$buf,
1020 $recstart,$recend,$last_newline_pos+1);
1021 shorten(\$buf,$last_newline_pos+1);
1024 sub pass_n_regexps() {
1025 # Pass records of N regexps
1026 # -N => (start..*?end){n}
1027 # -L -N => (start..*?end){n*l}
1028 if(not $garbage_read) {
1029 $garbage_read = 1;
1030 if($buf !~ /^$recstart/o) {
1031 # Buf does not start with $recstart => There is garbage.
1032 # Make a single record of the garbage
1033 if($buf =~
1034 /(?s:^)(
1035 (?:(?:(?!$recend$recstart)(?s:.))*?$recend)
1037 # Followed by recstart
1038 (?=$recstart)/mox and length $1 > 0) {
1039 $anything_written +=
1040 write_record_to_pipe($chunk_number++,\$header,\$buf,
1041 $recstart,$recend,length $1);
1042 shorten(\$buf,length $1);
1047 my $n_records =
1048 $Global::max_number_of_args * ($Global::max_lines || 1);
1049 # (?!negative lookahead) is needed to avoid backtracking
1050 # See: https://unix.stackexchange.com/questions/439356/
1051 # (?s:.) = (.|[\n]) but faster
1052 while($buf =~
1053 /(?s:^)(
1054 # n more times recstart.*recend
1055 (?:$recstart(?:(?!$recend$recstart)(?s:.))*?$recend){$n_records}
1057 # Followed by recstart
1058 (?=$recstart)/mox and length $1 > 0) {
1059 $anything_written +=
1060 write_record_to_pipe($chunk_number++,\$header,\$buf,
1061 $recstart,$recend,length $1);
1062 shorten(\$buf,length $1);
1066 sub pass_regexp() {
1067 # Find the last recend-recstart in $buf
1068 $eof and return;
1069 # (?s:.) = (.|[\n]) but faster
1070 if($buf =~ /^((?s:.)*$recend)$recstart(?s:.)*?$/mox) {
1071 $anything_written +=
1072 write_record_to_pipe($chunk_number++,\$header,\$buf,
1073 $recstart,$recend,length $1);
1074 shorten(\$buf,length $1);
1078 sub pass_csv_record() {
1079 # Pass CVS record
1080 # We define a CSV record as an even number of " + end of line
1081 # This works if you use " as quoting character
1082 my $last_newline_pos = length $buf;
1083 # Go backwards from the last \n and search for a position
1084 # where there is an even number of "
1085 do {
1086 # find last EOL
1087 $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1);
1088 # While uneven "
1089 } while((substr($buf,0,$last_newline_pos) =~ y/"/"/)%2
1090 and $last_newline_pos >= 0);
1091 # Chop at $last_newline_pos as that is where CSV record ends
1092 $anything_written +=
1093 write_record_to_pipe($chunk_number++,\$header,\$buf,
1094 $recstart,$recend,$last_newline_pos+1);
1095 shorten(\$buf,$last_newline_pos+1);
1098 sub pass_n() {
1099 # Pass n records of --recend/--recstart
1100 # -N => (start..*?end){n}
1101 my $i = 0;
1102 my $read_n_lines =
1103 $Global::max_number_of_args * ($Global::max_lines || 1);
1104 while(($i = nindex(\$buf,$recendrecstart,$read_n_lines)) != -1
1106 length $buf) {
1107 $i += length $recend; # find the actual splitting location
1108 $anything_written +=
1109 write_record_to_pipe($chunk_number++,\$header,\$buf,
1110 $recstart,$recend,$i);
1111 shorten(\$buf,$i);
1115 sub pass() {
1116 # Pass records of --recend/--recstart
1117 # Split record at fixed string
1118 # Find the last recend+recstart in $buf
1119 $eof and return;
1120 my $i = rindex64(\$buf,$recendrecstart);
1121 if($i != -1) {
1122 $i += length $recend; # find the actual splitting location
1123 $anything_written +=
1124 write_record_to_pipe($chunk_number++,\$header,\$buf,
1125 $recstart,$recend,$i);
1126 shorten(\$buf,$i);
1130 sub increase_blocksize_maybe() {
1131 if(not $anything_written
1132 and not $opt::blocktimeout
1133 and not $Global::no_autoexpand_block) {
1134 # Nothing was written - maybe the block size < record size?
1135 # Increase blocksize exponentially up to 2GB-1 (2GB causes problems)
1136 if($blocksize < $two_gb) {
1137 my $old_blocksize = $blocksize;
1138 $blocksize = ::min(ceil($blocksize * 1.3 + 1), $two_gb);
1139 ::warning("A record was longer than $old_blocksize. " .
1140 "Increasing to --blocksize $blocksize.");
1145 while(1) {
1146 $anything_written = 0;
1147 read_block();
1148 if($opt::r) {
1149 # Remove empty lines
1150 $buf =~ s/^\s*\n//gm;
1151 if(length $buf == 0) {
1152 if($eof) {
1153 last;
1154 } else {
1155 next;
1159 if($Global::max_lines and not $Global::max_number_of_args) {
1160 # Pass n-line records
1161 pass_n_line_records();
1162 } elsif($opt::csv) {
1163 # Pass a full CSV record
1164 pass_csv_record();
1165 } elsif($opt::regexp) {
1166 # Split record at regexp
1167 if($Global::max_number_of_args) {
1168 pass_n_regexps();
1169 } else {
1170 pass_regexp();
1172 } else {
1173 # Pass normal --recend/--recstart record
1174 if($Global::max_number_of_args) {
1175 pass_n();
1176 } else {
1177 pass();
1180 $eof and last;
1181 increase_blocksize_maybe();
1182 ::debug("init", "Round\n");
1184 ::debug("init", "Done reading input\n");
1186 # If there is anything left in the buffer write it
1187 write_record_to_pipe($chunk_number++, \$header, \$buf, $recstart,
1188 $recend, length $buf);
1190 if($opt::retries) {
1191 $Global::no_more_input = 1;
1192 # We need to start no more jobs: At most we need to retry some
1193 # of the already running.
1194 my @running = values %Global::running;
1195 # Stop any virgins.
1196 for my $job (@running) {
1197 if(defined $job and $job->virgin()) {
1198 close $job->fh(0,"w");
1201 # Wait for running jobs to be done
1202 my $sleep = 1;
1203 while($Global::total_running > 0) {
1204 $sleep = ::reap_usleep($sleep);
1205 start_more_jobs();
1208 $Global::start_no_new_jobs ||= 1;
1209 if($opt::roundrobin) {
1210 # Flush blocks to roundrobin procs
1211 my $sleep = 1;
1212 while(%Global::running) {
1213 my $something_written = 0;
1214 for my $job (values %Global::running) {
1215 if($job->block_length()) {
1216 $something_written += $job->non_blocking_write();
1217 } else {
1218 close $job->fh(0,"w");
1221 if($something_written) {
1222 $sleep = $sleep/2+0.001;
1224 $sleep = ::reap_usleep($sleep);
1229 sub recstartrecend() {
1230 # Uses:
1231 # $opt::recstart
1232 # $opt::recend
1233 # Returns:
1234 # $recstart,$recend with default values and regexp conversion
1235 my($recstart,$recend);
1236 if(defined($opt::recstart) and defined($opt::recend)) {
1237 # If both --recstart and --recend is given then both must match
1238 $recstart = $opt::recstart;
1239 $recend = $opt::recend;
1240 } elsif(defined($opt::recstart)) {
1241 # If --recstart is given it must match start of record
1242 $recstart = $opt::recstart;
1243 $recend = "";
1244 } elsif(defined($opt::recend)) {
1245 # If --recend is given then it must match end of record
1246 $recstart = "";
1247 $recend = $opt::recend;
1248 if($opt::regexp and $recend eq '') {
1249 # --regexp --recend ''
1250 $recend = '(?s:.)';
1254 if($opt::regexp) {
1255 # Do not allow /x comments - to avoid having to quote space
1256 $recstart = "(?-x:".$recstart.")";
1257 $recend = "(?-x:".$recend.")";
1258 # If $recstart/$recend contains '|'
1259 # the | should only apply to the regexp
1260 $recstart = "(?:".$recstart.")";
1261 $recend = "(?:".$recend.")";
1262 } else {
1263 # $recstart/$recend = printf strings (\n)
1264 $recstart =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee;
1265 $recend =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee;
1267 return ($recstart,$recend);
1270 sub nindex($$) {
1271 # See if string is in buffer N times
1272 # Returns:
1273 # the position where the Nth copy is found
1274 my ($buf_ref, $str, $n) = @_;
1275 my $i = 0;
1276 for(1..$n) {
1277 $i = index64($buf_ref,$str,$i+1);
1278 if($i == -1) { last }
1280 return $i;
1284 my @robin_queue;
1285 my $sleep = 1;
1287 sub round_robin_write($$$$$) {
1288 # Input:
1289 # $header_ref = ref to $header string
1290 # $block_ref = ref to $block to be written
1291 # $recstart = record start string
1292 # $recend = record end string
1293 # $endpos = end position of $block
1294 # Uses:
1295 # %Global::running
1296 # Returns:
1297 # $something_written = amount of bytes written
1298 my ($header_ref,$buffer_ref,$recstart,$recend,$endpos) = @_;
1299 my $written = 0;
1300 my $block_passed = 0;
1301 while(not $block_passed) {
1302 # Continue flushing existing buffers
1303 # until one is empty and a new block is passed
1304 if(@robin_queue) {
1305 # Rotate queue once so new blocks get a fair chance
1306 # to be given to another slot
1307 push @robin_queue, shift @robin_queue;
1308 } else {
1309 # Make a queue to spread the blocks evenly
1310 push @robin_queue, (sort { $a->seq() <=> $b->seq() }
1311 values %Global::running);
1313 do {
1314 $written = 0;
1315 for my $job (@robin_queue) {
1316 if($job->block_length() > 0) {
1317 $written += $job->non_blocking_write();
1318 } else {
1319 $job->set_block($header_ref, $buffer_ref,
1320 $endpos, $recstart, $recend);
1321 $block_passed = 1;
1322 $written += $job->non_blocking_write();
1323 last;
1326 if($written) {
1327 $sleep = $sleep/1.5+0.001;
1329 # Don't sleep if something is written
1330 } while($written and not $block_passed);
1331 $sleep = ::reap_usleep($sleep);
1333 return $written;
1337 sub index64($$$) {
1338 # Do index on strings > 2GB.
1339 # index in Perl < v5.22 does not work for > 2GB
1340 # Input:
1341 # as index except STR which must be passed as a reference
1342 # Output:
1343 # as index
1344 my $ref = shift;
1345 my $match = shift;
1346 my $pos = shift || 0;
1347 my $block_size = 2**31-1;
1348 my $strlen = length($$ref);
1349 # No point in doing extra work if we don't need to.
1350 if($strlen < $block_size or $] > 5.022) {
1351 return index($$ref, $match, $pos);
1354 my $matchlen = length($match);
1355 my $ret;
1356 my $offset = $pos;
1357 while($offset < $strlen) {
1358 $ret = index(
1359 substr($$ref, $offset, $block_size),
1360 $match, $pos-$offset);
1361 if($ret != -1) {
1362 return $ret + $offset;
1364 $offset += ($block_size - $matchlen - 1);
1366 return -1;
1369 sub rindex64($@) {
1370 # Do rindex on strings > 2GB.
1371 # rindex in Perl < v5.22 does not work for > 2GB
1372 # Input:
1373 # as rindex except STR which must be passed as a reference
1374 # Output:
1375 # as rindex
1376 my $ref = shift;
1377 my $match = shift;
1378 my $pos = shift;
1379 my $block_size = 2**31-1;
1380 my $strlen = length($$ref);
1381 # Default: search from end
1382 $pos = defined $pos ? $pos : $strlen;
1383 # No point in doing extra work if we don't need to.
1384 if($strlen < $block_size or $] > 5.022) {
1385 return rindex($$ref, $match, $pos);
1388 my $matchlen = length($match);
1389 my $ret;
1390 my $offset = $pos - $block_size + $matchlen;
1391 if($offset < 0) {
1392 # The offset is less than a $block_size
1393 # Set the $offset to 0 and
1394 # Adjust block_size accordingly
1395 $block_size = $block_size + $offset;
1396 $offset = 0;
1398 while($offset >= 0) {
1399 $ret = rindex(
1400 substr($$ref, $offset, $block_size),
1401 $match);
1402 if($ret != -1) {
1403 return $ret + $offset;
1405 $offset -= ($block_size - $matchlen - 1);
1407 return -1;
1410 sub shorten($$) {
1411 # Do: substr($buf,0,$i) = "";
1412 # Some Perl versions do not support $i > 2GB, so do this in 2GB chunks
1413 # Input:
1414 # $buf_ref = \$buf
1415 # $i = position to shorten to
1416 # Returns: N/A
1417 my ($buf_ref, $i) = @_;
1418 my $two_gb = 2**31-1;
1419 while($i > $two_gb) {
1420 substr($$buf_ref,0,$two_gb) = "";
1421 $i -= $two_gb;
1423 substr($$buf_ref,0,$i) = "";
1426 sub write_record_to_pipe($$$$$$) {
1427 # Fork then
1428 # Write record from pos 0 .. $endpos to pipe
1429 # Input:
1430 # $chunk_number = sequence number - to see if already run
1431 # $header_ref = reference to header string to prepend
1432 # $buffer_ref = reference to record to write
1433 # $recstart = start string of record
1434 # $recend = end string of record
1435 # $endpos = position in $buffer_ref where record ends
1436 # Uses:
1437 # $Global::job_already_run
1438 # $opt::roundrobin
1439 # @Global::virgin_jobs
1440 # Returns:
1441 # Number of chunks written (0 or 1)
1442 my ($chunk_number, $header_ref, $buffer_ref,
1443 $recstart, $recend, $endpos) = @_;
1444 if($endpos == 0) { return 0; }
1445 if(vec($Global::job_already_run,$chunk_number,1)) { return 1; }
1446 if($opt::roundrobin) {
1447 # Write the block to one of the already running jobs
1448 return round_robin_write($header_ref, $buffer_ref,
1449 $recstart, $recend, $endpos);
1451 # If no virgin found, backoff
1452 my $sleep = 0.0001; # 0.01 ms - better performance on highend
1453 while(not @Global::virgin_jobs) {
1454 ::debug("pipe", "No virgin jobs");
1455 $sleep = ::reap_usleep($sleep);
1456 # Jobs may not be started because of loadavg
1457 # or too little time between each ssh login
1458 # or retrying failed jobs.
1459 start_more_jobs();
1461 my $job = shift @Global::virgin_jobs;
1462 $job->set_block($header_ref, $buffer_ref, $endpos, $recstart, $recend);
1463 $job->write_block();
1464 return 1;
1468 sub __SEM_MODE__() {}
1471 sub acquire_semaphore() {
1472 # Acquires semaphore. If needed: spawns to the background
1473 # Uses:
1474 # @Global::host
1475 # Returns:
1476 # The semaphore to be released when jobs is complete
1477 $Global::host{':'} = SSHLogin->new(":");
1478 my $sem = Semaphore->new($Semaphore::name,
1479 $Global::host{':'}->max_jobs_running());
1480 $sem->acquire();
1481 if($Semaphore::fg) {
1482 # skip
1483 } else {
1484 if(fork()) {
1485 exit(0);
1486 } else {
1487 # If run in the background, the PID will change
1488 $sem->pid_change();
1491 return $sem;
1495 sub __PARSE_OPTIONS__() {}
1498 sub options_hash() {
1499 # Returns:
1500 # %hash = the GetOptions config
1501 return
1502 ("debug|D=s" => \$opt::D,
1503 "xargs" => \$opt::xargs,
1504 "m" => \$opt::m,
1505 "X" => \$opt::X,
1506 "v" => \@opt::v,
1507 "sql=s" => \$opt::retired,
1508 "sqlmaster=s" => \$opt::sqlmaster,
1509 "sqlworker=s" => \$opt::sqlworker,
1510 "sqlandworker=s" => \$opt::sqlandworker,
1511 "joblog|jl=s" => \$opt::joblog,
1512 "results|result|res=s" => \$opt::results,
1513 "resume" => \$opt::resume,
1514 "resume-failed|resumefailed" => \$opt::resume_failed,
1515 "retry-failed|retryfailed" => \$opt::retry_failed,
1516 "silent" => \$opt::silent,
1517 "keep-order|keeporder|k" => \$opt::keeporder,
1518 "no-keep-order|nokeeporder|nok|no-k" => \$opt::nokeeporder,
1519 "group" => \$opt::group,
1520 "g" => \$opt::retired,
1521 "ungroup|u" => \$opt::ungroup,
1522 "linebuffer|linebuffered|line-buffer|line-buffered|lb"
1523 => \$opt::linebuffer,
1524 "tmux" => \$opt::tmux,
1525 "tmuxpane" => \$opt::tmuxpane,
1526 "null|0" => \$opt::null,
1527 "quote|q" => \$opt::quote,
1528 # Replacement strings
1529 "parens=s" => \$opt::parens,
1530 "rpl=s" => \@opt::rpl,
1531 "plus" => \$opt::plus,
1532 "I=s" => \$opt::I,
1533 "extensionreplace|er=s" => \$opt::U,
1534 "U=s" => \$opt::retired,
1535 "basenamereplace|bnr=s" => \$opt::basenamereplace,
1536 "dirnamereplace|dnr=s" => \$opt::dirnamereplace,
1537 "basenameextensionreplace|bner=s" => \$opt::basenameextensionreplace,
1538 "seqreplace=s" => \$opt::seqreplace,
1539 "slotreplace=s" => \$opt::slotreplace,
1540 "jobs|j=s" => \$opt::jobs,
1541 "delay=s" => \$opt::delay,
1542 "sshdelay=f" => \$opt::sshdelay,
1543 "load=s" => \$opt::load,
1544 "noswap" => \$opt::noswap,
1545 "max-line-length-allowed" => \$opt::max_line_length_allowed,
1546 "number-of-cpus" => \$opt::number_of_cpus,
1547 "number-of-sockets" => \$opt::number_of_sockets,
1548 "number-of-cores" => \$opt::number_of_cores,
1549 "number-of-threads" => \$opt::number_of_threads,
1550 "use-sockets-instead-of-threads"
1551 => \$opt::use_sockets_instead_of_threads,
1552 "use-cores-instead-of-threads"
1553 => \$opt::use_cores_instead_of_threads,
1554 "use-cpus-instead-of-cores" => \$opt::use_cpus_instead_of_cores,
1555 "shellquote|shell_quote|shell-quote" => \@opt::shellquote,
1556 "nice=i" => \$opt::nice,
1557 "tag" => \$opt::tag,
1558 "tagstring|tag-string=s" => \$opt::tagstring,
1559 "ctag" => \$opt::ctag,
1560 "ctagstring|ctag-string=s" => \$opt::ctagstring,
1561 "onall" => \$opt::onall,
1562 "nonall" => \$opt::nonall,
1563 "filter-hosts|filterhosts|filter-host" => \$opt::filter_hosts,
1564 "sshlogin|S=s" => \@opt::sshlogin,
1565 "sshloginfile|slf=s" => \@opt::sshloginfile,
1566 "controlmaster|M" => \$opt::controlmaster,
1567 "ssh=s" => \$opt::ssh,
1568 "transfer-file|transferfile|transfer-files|transferfiles|tf=s"
1569 => \@opt::transfer_files,
1570 "return=s" => \@opt::return,
1571 "trc=s" => \@opt::trc,
1572 "transfer" => \$opt::transfer,
1573 "cleanup" => \$opt::cleanup,
1574 "basefile|bf=s" => \@opt::basefile,
1575 "template|tmpl=s" => \%opt::template,
1576 "B=s" => \$opt::retired,
1577 "ctrlc|ctrl-c" => \$opt::retired,
1578 "noctrlc|no-ctrlc|no-ctrl-c" => \$opt::retired,
1579 "workdir|work-dir|wd=s" => \$opt::workdir,
1580 "W=s" => \$opt::retired,
1581 "rsync-opts|rsyncopts=s" => \$opt::rsync_opts,
1582 "tmpdir|tempdir=s" => \$opt::tmpdir,
1583 "use-compress-program|compress-program=s" => \$opt::compress_program,
1584 "use-decompress-program|decompress-program=s"
1585 => \$opt::decompress_program,
1586 "compress" => \$opt::compress,
1587 "tty" => \$opt::tty,
1588 "T" => \$opt::retired,
1589 "H=i" => \$opt::retired,
1590 "dry-run|dryrun|dr" => \$opt::dryrun,
1591 "progress" => \$opt::progress,
1592 "eta" => \$opt::eta,
1593 "bar" => \$opt::bar,
1594 "shuf" => \$opt::shuf,
1595 "arg-sep|argsep=s" => \$opt::arg_sep,
1596 "arg-file-sep|argfilesep=s" => \$opt::arg_file_sep,
1597 "trim=s" => \$opt::trim,
1598 "env=s" => \@opt::env,
1599 "recordenv|record-env" => \$opt::record_env,
1600 "session" => \$opt::session,
1601 "plain" => \$opt::plain,
1602 "profile|J=s" => \@opt::profile,
1603 "tollef" => \$opt::tollef,
1604 "gnu" => \$opt::gnu,
1605 "link|xapply" => \$opt::link,
1606 "linkinputsource|xapplyinputsource=i" => \@opt::linkinputsource,
1607 # Before changing these lines, please read
1608 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
1609 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
1610 # You accept to be put in a public hall of shame by removing
1611 # the lines.
1612 "bibtex|citation" => \$opt::citation,
1613 "wc|willcite|will-cite|nn|nonotice|no-notice" => \$opt::willcite,
1614 # Termination and retries
1615 "halt-on-error|halt=s" => \$opt::halt,
1616 "limit=s" => \$opt::limit,
1617 "memfree=s" => \$opt::memfree,
1618 "memsuspend=s" => \$opt::memsuspend,
1619 "retries=s" => \$opt::retries,
1620 "timeout=s" => \$opt::timeout,
1621 "termseq|term-seq=s" => \$opt::termseq,
1622 # xargs-compatibility - implemented, man, testsuite
1623 "max-procs|P=s" => \$opt::jobs,
1624 "delimiter|d=s" => \$opt::d,
1625 "max-chars|s=s" => \$opt::max_chars,
1626 "arg-file|a=s" => \@opt::a,
1627 "no-run-if-empty|r" => \$opt::r,
1628 "replace|i:s" => \$opt::i,
1629 "E=s" => \$opt::eof,
1630 "eof|e:s" => \$opt::eof,
1631 "max-args|maxargs|n=s" => \$opt::max_args,
1632 "max-replace-args|N=s" => \$opt::max_replace_args,
1633 "colsep|col-sep|C=s" => \$opt::colsep,
1634 "csv"=> \$opt::csv,
1635 "help|h" => \$opt::help,
1636 "L=s" => \$opt::L,
1637 "max-lines|l:f" => \$opt::max_lines,
1638 "interactive|p" => \$opt::interactive,
1639 "verbose|t" => \$opt::verbose,
1640 "version|V" => \$opt::version,
1641 "minversion|min-version=i" => \$opt::minversion,
1642 "show-limits|showlimits" => \$opt::show_limits,
1643 "exit|x" => \$opt::x,
1644 # Semaphore
1645 "semaphore" => \$opt::semaphore,
1646 "semaphoretimeout|st=i" => \$opt::semaphoretimeout,
1647 "semaphorename|id=s" => \$opt::semaphorename,
1648 "fg" => \$opt::fg,
1649 "bg" => \$opt::bg,
1650 "wait" => \$opt::wait,
1651 # Shebang #!/usr/bin/parallel --shebang
1652 "shebang|hashbang" => \$opt::shebang,
1653 "internal-pipe-means-argfiles"
1654 => \$opt::internal_pipe_means_argfiles,
1655 "Y" => \$opt::retired,
1656 "skip-first-line" => \$opt::skip_first_line,
1657 "bug" => \$opt::bug,
1658 # --pipe
1659 "pipe|spreadstdin" => \$opt::pipe,
1660 "robin|round-robin|roundrobin" => \$opt::roundrobin,
1661 "recstart=s" => \$opt::recstart,
1662 "recend=s" => \$opt::recend,
1663 "regexp|regex" => \$opt::regexp,
1664 "remove-rec-sep|removerecsep|rrs" => \$opt::remove_rec_sep,
1665 "files|output-as-files|outputasfiles" => \$opt::files,
1666 "block|block-size|blocksize=s" => \$opt::blocksize,
1667 "blocktimeout|block-timeout|bt=s" => \$opt::blocktimeout,
1668 "header=s" => \$opt::header,
1669 "cat" => \$opt::cat,
1670 "fifo" => \$opt::fifo,
1671 "pipepart|pipe-part" => \$opt::pipepart,
1672 "tee" => \$opt::tee,
1673 "shard=s" => \$opt::shard,
1674 "bin=s" => \$opt::bin,
1675 "groupby|group-by=s" => \$opt::groupby,
1677 "hgrp|hostgrp|hostgroup|hostgroups" => \$opt::hostgroups,
1678 "embed" => \$opt::embed,
1679 "filter=s" => \@opt::filter,
1680 "parset=s" => \$opt::parset,
1684 sub get_options_from_array($@) {
1685 # Run GetOptions on @array
1686 # Input:
1687 # $array_ref = ref to @ARGV to parse
1688 # @keep_only = Keep only these options
1689 # Uses:
1690 # @ARGV
1691 # Returns:
1692 # true if parsing worked
1693 # false if parsing failed
1694 # @$array_ref is changed
1695 my ($array_ref, @keep_only) = @_;
1696 if(not @$array_ref) {
1697 # Empty array: No need to look more at that
1698 return 1;
1700 # A bit of shuffling of @ARGV needed as GetOptionsFromArray is not
1701 # supported everywhere
1702 my @save_argv;
1703 my $this_is_ARGV = (\@::ARGV == $array_ref);
1704 if(not $this_is_ARGV) {
1705 @save_argv = @::ARGV;
1706 @::ARGV = @{$array_ref};
1708 # If @keep_only set: Ignore all values except @keep_only
1709 my %options = options_hash();
1710 if(@keep_only) {
1711 my (%keep,@dummy);
1712 @keep{@keep_only} = @keep_only;
1713 for my $k (grep { not $keep{$_} } keys %options) {
1714 # Store the value of the option in @dummy
1715 $options{$k} = \@dummy;
1718 my $retval = GetOptions(%options);
1719 if(not $this_is_ARGV) {
1720 @{$array_ref} = @::ARGV;
1721 @::ARGV = @save_argv;
1723 return $retval;
1726 sub parse_parset() {
1727 $Global::progname = "parset";
1728 @Global::parset_vars = split /[ ,]/, $opt::parset;
1729 my $var_or_assoc = shift @Global::parset_vars;
1730 # Legal names: var _v2ar arrayentry[2]
1731 my @illegal = (grep { not /^[a-zA-Z_][a-zA-Z_0-9]*(\[\d+\])?$/ }
1732 @Global::parset_vars);
1733 if(@illegal) {
1734 ::error
1735 ("@illegal is an invalid variable name.",
1736 "Variable names must be letter followed by letters or digits.",
1737 "Usage:",
1738 " parset varname GNU Parallel options and command");
1739 wait_and_exit(255);
1741 if($var_or_assoc eq "assoc") {
1742 my $var = shift @Global::parset_vars;
1743 print "$var=(";
1744 $Global::parset = "assoc";
1745 $Global::parset_endstring=")\n";
1746 } elsif($var_or_assoc eq "var") {
1747 if($#Global::parset_vars > 0) {
1748 $Global::parset = "var";
1749 } else {
1750 my $var = shift @Global::parset_vars;
1751 print "$var=(";
1752 $Global::parset = "array";
1753 $Global::parset_endstring=")\n";
1755 } else {
1756 ::die_bug("parset: unknown '$opt::parset'");
1760 sub parse_options(@) {
1761 # Returns: N/A
1762 init_globals();
1763 my @argv_before = @ARGV;
1764 @ARGV = read_options();
1766 # Before changing these line, please read
1767 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
1768 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
1769 # You accept to be added to a public hall of shame by
1770 # removing the lines.
1771 if(defined $opt::citation) {
1772 citation(\@argv_before,\@ARGV);
1773 wait_and_exit(0);
1775 # no-* overrides *
1776 if($opt::nokeeporder) { $opt::keeporder = undef; }
1778 if(@opt::v) { $Global::verbose = $#opt::v+1; } # Convert -v -v to v=2
1779 if($opt::bug) { ::die_bug("test-bug"); }
1780 $Global::debug = $opt::D;
1781 $Global::shell = $ENV{'PARALLEL_SHELL'} || parent_shell($$)
1782 || $ENV{'SHELL'} || "/bin/sh";
1783 if(not -x $Global::shell and not which($Global::shell)) {
1784 ::error("Shell '$Global::shell' not found.");
1785 wait_and_exit(255);
1787 ::debug("init","Global::shell $Global::shell\n");
1788 $Global::cshell = $Global::shell =~ m:(/[-a-z]*)?csh:;
1789 if(defined $opt::parset) { parse_parset(); }
1790 if(defined $opt::X) { $Global::ContextReplace = 1; }
1791 if(defined $opt::silent) { $Global::verbose = 0; }
1792 if(defined $opt::null) { $/ = "\0"; }
1793 if(defined $opt::d) { $/ = unquote_printf($opt::d) }
1794 parse_replacement_string_options();
1795 $opt::tag ||= $opt::ctag;
1796 $opt::tagstring ||= $opt::ctagstring;
1797 if(defined $opt::ctag or defined $opt::ctagstring) {
1798 $Global::color = 1;
1800 if(defined $opt::tag and not defined $opt::tagstring) {
1801 # Default = {}
1802 $opt::tagstring = $Global::parensleft.$Global::parensright;
1804 if(defined $opt::tagstring) {
1805 $opt::tagstring = unquote_printf($opt::tagstring);
1806 if($opt::tagstring =~ /\Q$Global::parensleft\E.*\Q$Global::parensright\E/
1808 $opt::linebuffer) {
1809 # --tagstring contains {= =} and --linebuffer =>
1810 # recompute replacement string for each use (do not cache)
1811 $Global::cache_replacement_eval = 0;
1814 if(defined $opt::interactive) { $Global::interactive = $opt::interactive; }
1815 if(defined $opt::quote) { $Global::quoting = 1; }
1816 if(defined $opt::r) { $Global::ignore_empty = 1; }
1817 if(defined $opt::verbose) { $Global::stderr_verbose = 1; }
1818 if(defined $opt::eof) { $Global::end_of_file_string = $opt::eof; }
1819 if(defined $opt::max_args) {
1820 $opt::max_args = multiply_binary_prefix($opt::max_args);
1821 $Global::max_number_of_args = $opt::max_args;
1823 if(defined $opt::blocktimeout) {
1824 $Global::blocktimeout = int(multiply_time_units($opt::blocktimeout));
1825 if($Global::blocktimeout < 1) {
1826 ::error("--block-timeout must be at least 1");
1827 wait_and_exit(255);
1830 if(defined $opt::timeout) {
1831 $Global::timeoutq = TimeoutQueue->new($opt::timeout);
1833 if(defined $opt::tmpdir) { $ENV{'TMPDIR'} = $opt::tmpdir; }
1834 $ENV{'PARALLEL_RSYNC_OPTS'} = $opt::rsync_opts ||
1835 $ENV{'PARALLEL_RSYNC_OPTS'} || '-rlDzR';
1836 # Default: Same nice level as GNU Parallel is started at
1837 $opt::nice ||= eval { getpriority(0,0) } || 0;
1838 if(defined $opt::help) { usage(); exit(0); }
1839 if(defined $opt::embed) { embed(); exit(0); }
1840 if(defined $opt::sqlandworker) {
1841 $opt::sqlmaster = $opt::sqlworker = $opt::sqlandworker;
1843 if(defined $opt::tmuxpane) { $opt::tmux = $opt::tmuxpane; }
1844 if(defined $opt::colsep) { $Global::trim = 'lr'; }
1845 if(defined $opt::csv) {
1846 $Global::use{"Text::CSV"} ||= eval "use Text::CSV; 1;";
1847 $opt::colsep = defined $opt::colsep ? $opt::colsep : ",";
1848 my $csv_setting = { binary => 1, sep_char => $opt::colsep };
1849 my $sep = $csv_setting->{sep_char};
1850 $Global::csv = Text::CSV->new($csv_setting)
1851 or die "Cannot use CSV: ".Text::CSV->error_diag ();
1853 if(defined $opt::header) {
1854 $opt::colsep = defined $opt::colsep ? $opt::colsep : "\t";
1856 if(defined $opt::trim) { $Global::trim = $opt::trim; }
1857 if(defined $opt::arg_sep) { $Global::arg_sep = $opt::arg_sep; }
1858 if(defined $opt::arg_file_sep) {
1859 $Global::arg_file_sep = $opt::arg_file_sep;
1861 if(defined $opt::number_of_sockets) {
1862 print SSHLogin::no_of_sockets(),"\n"; wait_and_exit(0);
1864 if(defined $opt::number_of_cpus) {
1865 print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0);
1867 if(defined $opt::number_of_cores) {
1868 print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0);
1870 if(defined $opt::number_of_threads) {
1871 print SSHLogin::no_of_threads(),"\n"; wait_and_exit(0);
1873 if(defined $opt::max_line_length_allowed) {
1874 print Limits::Command::real_max_length(),"\n"; wait_and_exit(0);
1876 if(defined $opt::max_chars) {
1877 $opt::max_chars = multiply_binary_prefix($opt::max_chars);
1879 if(defined $opt::version) { version(); wait_and_exit(0); }
1880 if(defined $opt::record_env) { record_env(); wait_and_exit(0); }
1881 if(defined $opt::show_limits) { show_limits(); }
1882 if(@opt::sshlogin) { @Global::sshlogin = @opt::sshlogin; }
1883 if(@opt::sshloginfile) { read_sshloginfiles(@opt::sshloginfile); }
1884 if(@opt::return) { push @Global::ret_files, @opt::return; }
1885 if($opt::transfer) {
1886 push @Global::transfer_files, $opt::i || $opt::I || "{}";
1888 push @Global::transfer_files, @opt::transfer_files;
1889 if(%opt::template) {
1890 while (my ($source, $template_name) = each %opt::template) {
1891 if(open(my $tmpl, "<", $source)) {
1892 local $/; # $/ = undef => slurp whole file
1893 my $content = <$tmpl>;
1894 push @Global::template_names, $template_name;
1895 push @Global::template_contents, $content;
1896 ::debug("tmpl","Name: $template_name\n$content\n");
1897 } else {
1898 ::error("Cannot open '$source'.");
1899 wait_and_exit(255);
1903 if(not defined $opt::recstart and
1904 not defined $opt::recend) { $opt::recend = "\n"; }
1905 $Global::blocksize = multiply_binary_prefix($opt::blocksize || "1M");
1906 if($Global::blocksize > 2**31-1 and not $opt::pipepart) {
1907 warning("--blocksize >= 2G causes problems. Using 2G-1.");
1908 $Global::blocksize = 2**31-1;
1910 if($^O eq "cygwin" and
1911 ($opt::pipe or $opt::pipepart or $opt::roundrobin)
1912 and $Global::blocksize > 65535) {
1913 warning("--blocksize >= 64K causes problems on Cygwin.");
1915 $opt::memfree = multiply_binary_prefix($opt::memfree);
1916 $opt::memsuspend = multiply_binary_prefix($opt::memsuspend);
1917 $Global::memlimit = $opt::memsuspend + $opt::memfree;
1918 check_invalid_option_combinations();
1919 if((defined $opt::fifo or defined $opt::cat)
1920 and not $opt::pipepart) {
1921 $opt::pipe = 1;
1923 if(defined $opt::minversion) {
1924 print $Global::version,"\n";
1925 if($Global::version < $opt::minversion) {
1926 wait_and_exit(255);
1927 } else {
1928 wait_and_exit(0);
1931 if(not defined $opt::delay) {
1932 # Set --delay to --sshdelay if not set
1933 $opt::delay = $opt::sshdelay;
1935 $Global::sshdelayauto = $opt::sshdelay =~ s/auto$//;
1936 $opt::sshdelay = multiply_time_units($opt::sshdelay);
1937 $Global::delayauto = $opt::delay =~ s/auto$//;
1938 $opt::delay = multiply_time_units($opt::delay);
1939 if($opt::compress_program) {
1940 $opt::compress = 1;
1941 $opt::decompress_program ||= $opt::compress_program." -dc";
1944 if(defined $opt::results) {
1945 # Is the output a dir or CSV-file?
1946 if($opt::results =~ /\.csv$/i) {
1947 # CSV with , as separator
1948 $Global::csvsep = ",";
1949 $Global::membuffer ||= 1;
1950 } elsif($opt::results =~ /\.tsv$/i) {
1951 # CSV with TAB as separator
1952 $Global::csvsep = "\t";
1953 $Global::membuffer ||= 1;
1954 } elsif($opt::results =~ /\.json$/i) {
1955 # JSON output
1956 $Global::jsonout ||= 1;
1957 $Global::membuffer ||= 1;
1960 if($opt::compress) {
1961 my ($compress, $decompress) = find_compression_program();
1962 $opt::compress_program ||= $compress;
1963 $opt::decompress_program ||= $decompress;
1964 if(($opt::results and not $Global::csvsep) or $opt::files) {
1965 # No need for decompressing
1966 $opt::decompress_program = "cat >/dev/null";
1969 if(defined $opt::dryrun) {
1970 # Force grouping due to bug #51039: --dry-run --timeout 3600 -u breaks
1971 $opt::ungroup = 0;
1972 $opt::group = 1;
1974 if(defined $opt::nonall) {
1975 # Append a dummy empty argument if there are no arguments
1976 # on the command line to avoid reading from STDIN.
1977 # arg_sep = random 50 char
1978 # \0noarg => nothing (not the empty string)
1979 $Global::arg_sep = join "",
1980 map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..50);
1981 push @ARGV, $Global::arg_sep, "\0noarg";
1983 if(defined $opt::tee) {
1984 if(not defined $opt::jobs) {
1985 $opt::jobs = 0;
1988 if(defined $opt::tty) {
1989 # Defaults for --tty: -j1 -u
1990 # Can be overridden with -jXXX -g
1991 if(not defined $opt::jobs) {
1992 $opt::jobs = 1;
1994 if(not defined $opt::group) {
1995 $opt::ungroup = 1;
1998 if(@opt::trc) {
1999 push @Global::ret_files, @opt::trc;
2000 if(not @Global::transfer_files) {
2001 # Defaults to --transferfile {}
2002 push @Global::transfer_files, $opt::i || $opt::I || "{}";
2004 $opt::cleanup = 1;
2006 if(defined $opt::max_lines) {
2007 if($opt::max_lines eq "-0") {
2008 # -l -0 (swallowed -0)
2009 $opt::max_lines = 1;
2010 $opt::null = 1;
2011 $/ = "\0";
2012 } else {
2013 $opt::max_lines = multiply_binary_prefix($opt::max_lines);
2014 if ($opt::max_lines == 0) {
2015 # If not given (or if 0 is given) => 1
2016 $opt::max_lines = 1;
2020 $Global::max_lines = $opt::max_lines;
2021 if(not $opt::pipe) {
2022 # --pipe -L means length of record - not max_number_of_args
2023 $Global::max_number_of_args ||= $Global::max_lines;
2027 # Read more than one arg at a time (-L, -N)
2028 if(defined $opt::L) {
2029 $opt::L = multiply_binary_prefix($opt::L);
2030 $Global::max_lines = $opt::L;
2031 if(not $opt::pipe) {
2032 # --pipe -L means length of record - not max_number_of_args
2033 $Global::max_number_of_args ||= $Global::max_lines;
2036 if(defined $opt::max_replace_args) {
2037 $opt::max_replace_args = multiply_binary_prefix($opt::max_replace_args);
2038 $Global::max_number_of_args = $opt::max_replace_args;
2039 $Global::ContextReplace = 1;
2041 if((defined $opt::L or defined $opt::max_replace_args)
2043 not ($opt::xargs or $opt::m)) {
2044 $Global::ContextReplace = 1;
2046 if(grep /^$Global::arg_sep\+?$|^$Global::arg_file_sep\+?$/o, @ARGV) {
2047 # Deal with ::: :::+ :::: and ::::+
2048 @ARGV = read_args_from_command_line();
2050 parse_semaphore();
2052 if(defined $opt::eta) { $opt::progress = $opt::eta; }
2053 if(defined $opt::bar) { $opt::progress = $opt::bar; }
2055 # Funding a free software project is hard. GNU Parallel is no
2056 # exception. On top of that it seems the less visible a project
2057 # is, the harder it is to get funding. And the nature of GNU
2058 # Parallel is that it will never be seen by "the guy with the
2059 # checkbook", but only by the people doing the actual work.
2061 # This problem has been covered by others - though no solution has
2062 # been found:
2063 # https://www.slideshare.net/NadiaEghbal/consider-the-maintainer
2064 # https://blog.licensezero.com/2019/08/24/process-of-elimination.html
2065 # https://www.numfocus.org/blog/why-is-numpy-only-now-getting-funded/
2067 # The FAQ tells you why the citation notice exists:
2068 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
2070 # If you want GNU Parallel to be maintained in the future, and not
2071 # just wither away like so many other free software tools, you
2072 # need to help finance the development.
2074 # The citation notice is a simple way of doing so, as citations
2075 # makes it possible to me to get a job where I can maintain GNU
2076 # Parallel as part of the job.
2078 # This means you can help financing development
2080 # WITHOUT PAYING A SINGLE CENT!
2082 # Before implementing the citation notice it was discussed with
2083 # the users:
2084 # https://lists.gnu.org/archive/html/parallel/2013-11/msg00006.html
2086 # Having to spend 10 seconds on running 'parallel --citation' once
2087 # is no doubt not an ideal solution, but no one has so far come up
2088 # with an ideal solution - neither for funding GNU Parallel nor
2089 # other free software.
2091 # If you believe you have the perfect solution, you should try it
2092 # out, and if it works, you should post it on the email
2093 # list. Ideas that will cost work and which have not been tested
2094 # are, however, unlikely to be prioritized.
2096 # Please note that GPL version 3 gives you the right to fork GNU
2097 # Parallel under a new name, but it does not give you the right to
2098 # distribute modified copies with the citation notice disabled in
2099 # a way where the software can be confused with GNU Parallel. To
2100 # do that you need to be the owner of the GNU Parallel
2101 # trademark. The xt:Commerce case shows this.
2103 # Description of the xt:Commerce case in OLG Duesseldorf
2104 # http://www.inta.org/INTABulletin/Pages/GERMANYGeneralPublicLicenseDoesNotPermitUseofThird-PartyTrademarksforAdvertisingModifiedVersionsofOpen-SourceSoftware.aspx
2105 # https://web.archive.org/web/20180715073746/http://www.inta.org/INTABulletin/Pages/GERMANYGeneralPublicLicenseDoesNotPermitUseofThird-PartyTrademarksforAdvertisingModifiedVersionsofOpen-SourceSoftware.aspx
2107 # The verdict in German
2108 # https://www.admody.com/urteilsdatenbank/cafe6fdaeed3/OLG-Duesseldorf_Urteil_vom_28-September-2010_Az_I-20-U-41-09
2109 # https://web.archive.org/web/20180715073717/https://www.admody.com/urteilsdatenbank/cafe6fdaeed3/OLG-Duesseldorf_Urteil_vom_28-September-2010_Az_I-20-U-41-09
2111 # Other free software limiting derivates by the same name:
2112 # https://en.wikipedia.org/wiki/Red_Hat_Enterprise_Linux_derivatives#Legal_aspects
2113 # https://tm.joomla.org/trademark-faq.html
2114 # https://www.mozilla.org/en-US/foundation/trademarks/faq/
2116 # Running 'parallel --citation' one single time takes less than 10
2117 # seconds, and will silence the citation notice for future
2118 # runs. If that is too much trouble for you, why not use one of
2119 # the alternatives instead?
2120 # See a list in: 'man parallel_alternatives'
2122 # If you want GNU Parallel to be maintained in the future you
2123 # should keep this line.
2124 citation_notice();
2125 # _YOU_ will be harming free software by removing the notice. You
2126 # accept to be added to a public hall of shame by removing the
2127 # line. This is because _YOU_ actively make it harder to justify
2128 # spending time developing GNU Parallel.
2130 parse_halt();
2132 if($ENV{'PARALLEL_ENV'}) {
2133 # Read environment and set $Global::parallel_env
2134 # Must be done before is_acceptable_command_line_length()
2135 my $penv = $ENV{'PARALLEL_ENV'};
2136 # unset $PARALLEL_ENV: It should not be given to children
2137 # because it takes up a lot of env space
2138 delete $ENV{'PARALLEL_ENV'};
2139 if(-e $penv) {
2140 # This is a file/fifo: Replace envvar with content of file
2141 open(my $parallel_env, "<", $penv) ||
2142 ::die_bug("Cannot read parallel_env from $penv");
2143 local $/; # Put <> in slurp mode
2144 $penv = <$parallel_env>;
2145 close $parallel_env;
2147 # Map \001 to \n to make it easer to quote \n in $PARALLEL_ENV
2148 $penv =~ s/\001/\n/g;
2149 if($penv =~ /\0/) {
2150 ::warning('\0 (NUL) in environment is not supported');
2152 $Global::parallel_env = $penv;
2155 parse_sshlogin();
2157 if(remote_hosts() and ($opt::X or $opt::m or $opt::xargs)) {
2158 # As we do not know the max line length on the remote machine
2159 # long commands generated by xargs may fail
2160 # If $opt::max_replace_args is set, it is probably safe
2161 ::warning("Using -X or -m with --sshlogin may fail.");
2164 if(not defined $opt::jobs) { $opt::jobs = "100%"; }
2165 open_joblog();
2166 open_json_csv();
2167 if($opt::sqlmaster or $opt::sqlworker) {
2168 $Global::sql = SQL->new($opt::sqlmaster || $opt::sqlworker);
2170 if($opt::sqlworker) { $Global::membuffer ||= 1; }
2171 # The sqlmaster groups the arguments, so the should just read one
2172 if($opt::sqlworker and not $opt::sqlmaster) { $Global::max_number_of_args = 1; }
2175 sub check_invalid_option_combinations() {
2176 if(defined $opt::timeout and
2177 $opt::timeout !~ /^\d+(\.\d+)?%?$|^(\d+(\.\d+)?[dhms])+$/i) {
2178 ::error("--timeout must be seconds or percentage.");
2179 wait_and_exit(255);
2181 if(defined $opt::fifo and defined $opt::cat) {
2182 ::error("--fifo cannot be combined with --cat.");
2183 ::wait_and_exit(255);
2185 if(defined $opt::retries and defined $opt::roundrobin) {
2186 ::error("--retries cannot be combined with --roundrobin.");
2187 ::wait_and_exit(255);
2189 if(defined $opt::pipepart and
2190 (defined $opt::L or defined $opt::max_lines
2191 or defined $opt::max_replace_args)) {
2192 ::error("--pipepart is incompatible with --max-replace-args, ".
2193 "--max-lines, and -L.");
2194 wait_and_exit(255);
2196 if(defined $opt::group and $opt::ungroup) {
2197 ::error("--group cannot be combined with --ungroup.");
2198 ::wait_and_exit(255);
2200 if(defined $opt::group and $opt::linebuffer) {
2201 ::error("--group cannot be combined with --line-buffer.");
2202 ::wait_and_exit(255);
2204 if(defined $opt::ungroup and $opt::linebuffer) {
2205 ::error("--ungroup cannot be combined with --line-buffer.");
2206 ::wait_and_exit(255);
2208 if(defined $opt::tollef and not $opt::gnu) {
2209 ::error("--tollef has been retired.",
2210 "Remove --tollef or use --gnu to override --tollef.");
2211 ::wait_and_exit(255);
2213 if(defined $opt::retired) {
2214 ::error("-g has been retired. Use --group.",
2215 "-B has been retired. Use --bf.",
2216 "-T has been retired. Use --tty.",
2217 "-U has been retired. Use --er.",
2218 "-W has been retired. Use --wd.",
2219 "-Y has been retired. Use --shebang.",
2220 "-H has been retired. Use --halt.",
2221 "--sql has been retired. Use --sqlmaster.",
2222 "--ctrlc has been retired.",
2223 "--noctrlc has been retired.");
2224 ::wait_and_exit(255);
2226 if($opt::groupby) {
2227 if(not $opt::pipe and not $opt::pipepart) {
2228 $opt::pipe = 1;
2230 if($opt::remove_rec_sep) {
2231 ::error("--remove-rec-sep is not compatible with --groupby");
2232 ::wait_and_exit(255);
2234 if($opt::recstart) {
2235 ::error("--recstart is not compatible with --groupby");
2236 ::wait_and_exit(255);
2238 if($opt::recend ne "\n") {
2239 ::error("--recend is not compatible with --groupby");
2240 ::wait_and_exit(255);
2245 sub init_globals() {
2246 # Defaults:
2247 $Global::version = 20210922;
2248 $Global::progname = 'parallel';
2249 $::name = "GNU Parallel";
2250 $Global::infinity = 2**31;
2251 $Global::debug = 0;
2252 $Global::verbose = 0;
2253 # Don't quote every part of the command line
2254 $Global::quoting = 0;
2255 # Quote replacement strings
2256 $Global::quote_replace = 1;
2257 $Global::total_completed = 0;
2258 $Global::cache_replacement_eval = 1;
2259 # Read only table with default --rpl values
2260 %Global::replace =
2262 '{}' => '',
2263 '{#}' => '1 $_=$job->seq()',
2264 '{%}' => '1 $_=$job->slot()',
2265 '{/}' => 's:.*/::',
2266 '{//}' =>
2267 ('$Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; '.
2268 '$_ = dirname($_);'),
2269 '{/.}' => 's:.*/::; s:\.[^/.]+$::;',
2270 '{.}' => 's:\.[^/.]+$::',
2272 %Global::plus =
2274 # {} = {+/}/{/}
2275 # = {.}.{+.} = {+/}/{/.}.{+.}
2276 # = {..}.{+..} = {+/}/{/..}.{+..}
2277 # = {...}.{+...} = {+/}/{/...}.{+...}
2278 '{+/}' => 's:/[^/]*$::',
2279 '{+.}' => 's:.*\.::',
2280 '{+..}' => 's:.*\.([^.]*\.):$1:',
2281 '{+...}' => 's:.*\.([^.]*\.[^.]*\.):$1:',
2282 '{..}' => 's:\.[^/.]+$::; s:\.[^/.]+$::',
2283 '{...}' => 's:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::',
2284 '{/..}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::',
2285 '{/...}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::',
2286 # n choose k = Binomial coefficient
2287 '{choose_k}' => 'for $t (2..$#arg){ if($arg[$t-1] ge $arg[$t]) { skip() } }',
2288 # {##} = number of jobs
2289 '{##}' => '1 $_=total_jobs()',
2290 # {0%} = 0-padded jobslot
2291 '{0%}' => '1 $f=1+int((log($Global::max_jobs_running||1)/log(10))); $_=sprintf("%0${f}d",slot())',
2292 # {0%} = 0-padded seq
2293 '{0#}' => '1 $f=1+int((log(total_jobs())/log(10))); $_=sprintf("%0${f}d",seq())',
2295 ## Bash inspired replacement strings
2296 # Bash ${a:-myval}
2297 '{:-([^}]+?)}' => '$_ ||= $$1',
2298 # Bash ${a:2}
2299 '{:(\d+?)}' => 'substr($_,0,$$1) = ""',
2300 # Bash ${a:2:3}
2301 '{:(\d+?):(\d+?)}' => '$_ = substr($_,$$1,$$2);',
2302 # echo {#z.*z.} ::: z.z.z.foo => z.foo
2303 # echo {##z.*z.} ::: z.z.z.foo => foo
2304 # Bash ${a#bc}
2305 '{#([^#}][^}]*?)}' =>
2306 '$nongreedy=::make_regexp_ungreedy($$1);s/^$nongreedy(.*)/$1/;',
2307 # Bash ${a##bc}
2308 '{##([^#}][^}]*?)}' => 's/^$$1//;',
2309 # echo {%.z.*z} ::: foo.z.z.z => foo.z
2310 # echo {%%.z.*z} ::: foo.z.z.z => foo
2311 # Bash ${a%def}
2312 '{%([^}]+?)}' =>
2313 '$nongreedy=::make_regexp_ungreedy($$1);s/(.*)$nongreedy$/$1/;',
2314 # Bash ${a%%def}
2315 '{%%([^}]+?)}' => 's/$$1$//;',
2316 # Bash ${a/def/ghi} ${a/def/}
2317 '{/([^}]+?)/([^}]*?)}' => 's/$$1/$$2/;',
2318 # Bash ${a^a}
2319 '{^([^}]+?)}' => 's/^($$1)/uc($1)/e;',
2320 # Bash ${a^^a}
2321 '{^^([^}]+?)}' => 's/($$1)/uc($1)/eg;',
2322 # Bash ${a,A}
2323 '{,([^}]+?)}' => 's/^($$1)/lc($1)/e;',
2324 # Bash ${a,,A}
2325 '{,,([^}]+?)}' => 's/($$1)/lc($1)/eg;',
2327 # {slot} = $PARALLEL_JOBSLOT
2328 '{slot}' => '1 $_="\${PARALLEL_JOBSLOT}";uq()',
2329 # {host} = ssh host
2330 '{host}' => '1 $_="\${PARALLEL_SSHHOST}";uq()',
2331 # {sshlogin} = sshlogin
2332 '{sshlogin}' => '1 $_="\${PARALLEL_SSHLOGIN}";uq()',
2333 # {hgrp} = hostgroups of the host
2334 '{hgrp}' => '1 $_="\${PARALLEL_HOSTGROUPS}";uq()',
2335 # {agrp} = hostgroups of the argument
2336 '{agrp}' => '1 $_="\${PARALLEL_ARGHOSTGROUPS}";uq()',
2338 # Modifiable copy of %Global::replace
2339 %Global::rpl = %Global::replace;
2340 $/ = "\n";
2341 $Global::ignore_empty = 0;
2342 $Global::interactive = 0;
2343 $Global::stderr_verbose = 0;
2344 $Global::default_simultaneous_sshlogins = 9;
2345 $Global::exitstatus = 0;
2346 $Global::arg_sep = ":::";
2347 $Global::arg_file_sep = "::::";
2348 $Global::trim = 'n';
2349 $Global::max_jobs_running = 0;
2350 $Global::job_already_run = '';
2351 $ENV{'TMPDIR'} ||= "/tmp";
2352 $ENV{'OLDPWD'} = $ENV{'PWD'};
2353 if(not $ENV{HOME}) {
2354 # $ENV{HOME} is sometimes not set if called from PHP
2355 ::warning("\$HOME not set. Using /tmp.");
2356 $ENV{HOME} = "/tmp";
2358 # no warnings to allow for undefined $XDG_*
2359 no warnings 'uninitialized';
2360 # $xdg_config_home is needed to make env_parallel.fish stop complaining
2361 my $xdg_config_home = $ENV{'XDG_CONFIG_HOME'};
2362 # config_dirs = $PARALLEL_HOME, $XDG_CONFIG_HOME/parallel,
2363 # $(each XDG_CONFIG_DIRS)/parallel, $HOME/.parallel
2364 # Keep only dirs that exist
2365 @Global::config_dirs =
2366 (grep { -d $_ }
2367 $ENV{'PARALLEL_HOME'},
2368 (map { "$_/parallel" }
2369 $xdg_config_home,
2370 split /:/, $ENV{'XDG_CONFIG_DIRS'}),
2371 $ENV{'HOME'} . "/.parallel");
2372 # Use first dir as config dir
2373 $Global::config_dir = $Global::config_dirs[0] ||
2374 $ENV{'HOME'} . "/.parallel";
2375 if($ENV{'PARALLEL_HOME'} =~ /./ and not -d $ENV{'PARALLEL_HOME'}) {
2376 ::warning("\$PARALLEL_HOME ($ENV{'PARALLEL_HOME'}) does not exist.");
2377 ::warning("Using $Global::config_dir");
2379 # cache_dirs = $PARALLEL_HOME, $XDG_CACHE_HOME/parallel,
2380 # Keep only dirs that exist
2381 @Global::cache_dirs =
2382 (grep { -d $_ }
2383 $ENV{'PARALLEL_HOME'}, $ENV{'XDG_CACHE_HOME'}."/parallel");
2384 $Global::cache_dir = $Global::cache_dirs[0] ||
2385 $ENV{'HOME'} . "/.parallel";
2388 sub parse_halt() {
2389 # $opt::halt flavours
2390 # Uses:
2391 # $opt::halt
2392 # $Global::halt_when
2393 # $Global::halt_fail
2394 # $Global::halt_success
2395 # $Global::halt_pct
2396 # $Global::halt_count
2397 if(defined $opt::halt) {
2398 my %halt_expansion = (
2399 "0" => "never",
2400 "1" => "soon,fail=1",
2401 "2" => "now,fail=1",
2402 "-1" => "soon,success=1",
2403 "-2" => "now,success=1",
2405 # Expand -2,-1,0,1,2 into long form
2406 $opt::halt = $halt_expansion{$opt::halt} || $opt::halt;
2407 # --halt 5% == --halt soon,fail=5%
2408 $opt::halt =~ s/^(\d+)%$/soon,fail=$1%/;
2409 # Split: soon,fail=5%
2410 my ($when,$fail_success,$pct_count) = split /[,=]/, $opt::halt;
2411 if(not grep { $when eq $_ } qw(never soon now)) {
2412 ::error("--halt must have 'never', 'soon', or 'now'.");
2413 ::wait_and_exit(255);
2415 $Global::halt_when = $when;
2416 if($when ne "never") {
2417 if($fail_success eq "fail") {
2418 $Global::halt_fail = 1;
2419 } elsif($fail_success eq "success") {
2420 $Global::halt_success = 1;
2421 } elsif($fail_success eq "done") {
2422 $Global::halt_done = 1;
2423 } else {
2424 ::error("--halt $when must be followed by ,success or ,fail.");
2425 ::wait_and_exit(255);
2427 if($pct_count =~ /^(\d+)%$/) {
2428 $Global::halt_pct = $1/100;
2429 } elsif($pct_count =~ /^(\d+)$/) {
2430 $Global::halt_count = $1;
2431 } else {
2432 ::error("--halt $when,$fail_success ".
2433 "must be followed by ,number or ,percent%.");
2434 ::wait_and_exit(255);
2440 sub parse_replacement_string_options() {
2441 # Deal with --rpl
2442 # Uses:
2443 # %Global::rpl
2444 # $Global::parensleft
2445 # $Global::parensright
2446 # $opt::parens
2447 # $Global::parensleft
2448 # $Global::parensright
2449 # $opt::plus
2450 # %Global::plus
2451 # $opt::I
2452 # $opt::U
2453 # $opt::i
2454 # $opt::basenamereplace
2455 # $opt::dirnamereplace
2456 # $opt::seqreplace
2457 # $opt::slotreplace
2458 # $opt::basenameextensionreplace
2460 sub rpl($$) {
2461 # Modify %Global::rpl
2462 # Replace $old with $new
2463 my ($old,$new) = @_;
2464 if($old ne $new) {
2465 $Global::rpl{$new} = $Global::rpl{$old};
2466 delete $Global::rpl{$old};
2469 my $parens = "{==}";
2470 if(defined $opt::parens) { $parens = $opt::parens; }
2471 my $parenslen = 0.5*length $parens;
2472 $Global::parensleft = substr($parens,0,$parenslen);
2473 $Global::parensright = substr($parens,$parenslen);
2474 if(defined $opt::plus) { %Global::rpl = (%Global::plus,%Global::rpl); }
2475 if(defined $opt::I) { rpl('{}',$opt::I); }
2476 if(defined $opt::i and $opt::i) { rpl('{}',$opt::i); }
2477 if(defined $opt::U) { rpl('{.}',$opt::U); }
2478 if(defined $opt::basenamereplace) { rpl('{/}',$opt::basenamereplace); }
2479 if(defined $opt::dirnamereplace) { rpl('{//}',$opt::dirnamereplace); }
2480 if(defined $opt::seqreplace) { rpl('{#}',$opt::seqreplace); }
2481 if(defined $opt::slotreplace) { rpl('{%}',$opt::slotreplace); }
2482 if(defined $opt::basenameextensionreplace) {
2483 rpl('{/.}',$opt::basenameextensionreplace);
2485 for(@opt::rpl) {
2486 # Create $Global::rpl entries for --rpl options
2487 # E.g: "{..} s:\.[^.]+$:;s:\.[^.]+$:;"
2488 my ($shorthand,$long) = split/\s/,$_,2;
2489 $Global::rpl{$shorthand} = $long;
2493 sub parse_semaphore() {
2494 # Semaphore defaults
2495 # Must be done before computing number of processes and max_line_length
2496 # because when running as a semaphore GNU Parallel does not read args
2497 # Uses:
2498 # $opt::semaphore
2499 # $Global::semaphore
2500 # $opt::semaphoretimeout
2501 # $Semaphore::timeout
2502 # $opt::semaphorename
2503 # $Semaphore::name
2504 # $opt::fg
2505 # $Semaphore::fg
2506 # $opt::wait
2507 # $Semaphore::wait
2508 # $opt::bg
2509 # @opt::a
2510 # @Global::unget_argv
2511 # $Global::default_simultaneous_sshlogins
2512 # $opt::jobs
2513 # $Global::interactive
2514 $Global::semaphore ||= ($0 =~ m:(^|/)sem$:); # called as 'sem'
2515 if(defined $opt::semaphore) { $Global::semaphore = 1; }
2516 if(defined $opt::semaphoretimeout) { $Global::semaphore = 1; }
2517 if(defined $opt::semaphorename) { $Global::semaphore = 1; }
2518 if(defined $opt::fg and not $opt::tmux and not $opt::tmuxpane) {
2519 $Global::semaphore = 1;
2521 if(defined $opt::bg) { $Global::semaphore = 1; }
2522 if(defined $opt::wait and not $opt::sqlmaster) {
2523 $Global::semaphore = 1; @ARGV = "true";
2525 if($Global::semaphore) {
2526 if(@opt::a) {
2527 # A semaphore does not take input from neither stdin nor file
2528 ::error("A semaphore does not take input from neither stdin nor a file\n");
2529 ::wait_and_exit(255);
2531 @opt::a = ("/dev/null");
2532 # Append a dummy empty argument
2533 # \0 => nothing (not the empty string)
2534 push(@Global::unget_argv, [Arg->new("\0noarg")]);
2535 $Semaphore::timeout = $opt::semaphoretimeout || 0;
2536 if(defined $opt::semaphorename) {
2537 $Semaphore::name = $opt::semaphorename;
2538 } else {
2539 local $/ = "\n";
2540 $Semaphore::name = `tty`;
2541 chomp $Semaphore::name;
2543 $Semaphore::fg = $opt::fg;
2544 $Semaphore::wait = $opt::wait;
2545 $Global::default_simultaneous_sshlogins = 1;
2546 if(not defined $opt::jobs) {
2547 $opt::jobs = 1;
2549 if($Global::interactive and $opt::bg) {
2550 ::error("Jobs running in the ".
2551 "background cannot be interactive.");
2552 ::wait_and_exit(255);
2557 sub record_env() {
2558 # Record current %ENV-keys in $PARALLEL_HOME/ignored_vars
2559 # Returns: N/A
2560 my $ignore_filename = $Global::config_dir . "/ignored_vars";
2561 if(open(my $vars_fh, ">", $ignore_filename)) {
2562 print $vars_fh map { $_,"\n" } keys %ENV;
2563 } else {
2564 ::error("Cannot write to $ignore_filename.");
2565 ::wait_and_exit(255);
2569 sub open_joblog() {
2570 # Open joblog as specified by --joblog
2571 # Uses:
2572 # $opt::resume
2573 # $opt::resume_failed
2574 # $opt::joblog
2575 # $opt::results
2576 # $Global::job_already_run
2577 # %Global::fh
2578 my $append = 0;
2579 if(($opt::resume or $opt::resume_failed)
2581 not ($opt::joblog or $opt::results)) {
2582 ::error("--resume and --resume-failed require --joblog or --results.");
2583 ::wait_and_exit(255);
2585 if(defined $opt::joblog and $opt::joblog =~ s/^\+//) {
2586 # --joblog +filename = append to filename
2587 $append = 1;
2589 if($opt::joblog
2591 ($opt::sqlmaster
2593 not $opt::sqlworker)) {
2594 # Do not log if --sqlworker
2595 if($opt::resume || $opt::resume_failed || $opt::retry_failed) {
2596 if(open(my $joblog_fh, "<", $opt::joblog)) {
2597 # Read the joblog
2598 # Override $/ with \n because -d might be set
2599 local $/ = "\n";
2600 # If there is a header: Open as append later
2601 $append = <$joblog_fh>;
2602 my $joblog_regexp;
2603 if($opt::retry_failed) {
2604 # Make a regexp that only matches commands with exit+signal=0
2605 # 4 host 1360490623.067 3.445 1023 1222 0 0 command
2606 $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t';
2607 my @group;
2608 while(<$joblog_fh>) {
2609 if(/$joblog_regexp/o) {
2610 # This is 30% faster than set_job_already_run($1);
2611 vec($Global::job_already_run,($1||0),1) = 1;
2612 $Global::total_completed++;
2613 $group[$1-1] = "true";
2614 } elsif(/(\d+)\s+\S+(\s+[-0-9.]+){6}\s+(.*)$/) {
2615 # Grab out the command
2616 $group[$1-1] = $3;
2617 } else {
2618 chomp;
2619 ::error("Format of '$opt::joblog' is wrong: $_");
2620 ::wait_and_exit(255);
2623 if(@group) {
2624 my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg");
2625 unlink($name);
2626 # Put args into argfile
2627 if(grep /\0/, @group) {
2628 # force --null to deal with \n in commandlines
2629 ::warning("Command lines contain newline. Forcing --null.");
2630 $opt::null = 1;
2631 $/ = "\0";
2633 # Replace \0 with '\n' as used in print_joblog()
2634 print $outfh map { s/\0/\n/g; $_,$/ } map { $_ } @group;
2635 seek $outfh, 0, 0;
2636 exit_if_disk_full();
2637 # Set filehandle to -a
2638 @opt::a = ($outfh);
2640 # Remove $command (so -a is run)
2641 @ARGV = ();
2643 if($opt::resume || $opt::resume_failed) {
2644 if($opt::resume_failed) {
2645 # Make a regexp that only matches commands with exit+signal=0
2646 # 4 host 1360490623.067 3.445 1023 1222 0 0 command
2647 $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t';
2648 } else {
2649 # Just match the job number
2650 $joblog_regexp='^(\d+)';
2652 while(<$joblog_fh>) {
2653 if(/$joblog_regexp/o) {
2654 # This is 30% faster than set_job_already_run($1);
2655 vec($Global::job_already_run,($1||0),1) = 1;
2656 $Global::total_completed++;
2657 } elsif(not /\d+\s+[^\s]+\s+([-0-9.]+\s+){6}/) {
2658 ::error("Format of '$opt::joblog' is wrong: $_");
2659 ::wait_and_exit(255);
2663 close $joblog_fh;
2665 # $opt::null may be set if the commands contain \n
2666 if($opt::null) { $/ = "\0"; }
2668 if($opt::dryrun) {
2669 # Do not write to joblog in a dry-run
2670 if(not open($Global::joblog, ">", "/dev/null")) {
2671 ::error("Cannot write to --joblog $opt::joblog.");
2672 ::wait_and_exit(255);
2674 } elsif($append) {
2675 # Append to joblog
2676 if(not open($Global::joblog, ">>", $opt::joblog)) {
2677 ::error("Cannot append to --joblog $opt::joblog.");
2678 ::wait_and_exit(255);
2680 } else {
2681 if($opt::joblog eq "-") {
2682 # Use STDOUT as joblog
2683 $Global::joblog = $Global::fh{1};
2684 } elsif(not open($Global::joblog, ">", $opt::joblog)) {
2685 # Overwrite the joblog
2686 ::error("Cannot write to --joblog $opt::joblog.");
2687 ::wait_and_exit(255);
2689 print $Global::joblog
2690 join("\t", "Seq", "Host", "Starttime", "JobRuntime",
2691 "Send", "Receive", "Exitval", "Signal", "Command"
2692 ). "\n";
2697 sub open_json_csv() {
2698 if($opt::results) {
2699 # Output as JSON/CSV/TSV
2700 if($opt::results eq "-.csv"
2702 $opt::results eq "-.tsv"
2704 $opt::results eq "-.json") {
2705 # Output as JSON/CSV/TSV on stdout
2706 open $Global::csv_fh, ">&", "STDOUT" or
2707 ::die_bug("Can't dup STDOUT in csv: $!");
2708 # Do not print any other output to STDOUT
2709 # by forcing all other output to /dev/null
2710 open my $fd, ">", "/dev/null" or
2711 ::die_bug("Can't >/dev/null in csv: $!");
2712 $Global::fh{1} = $fd;
2713 $Global::fh{2} = $fd;
2714 } elsif($Global::csvsep) {
2715 if(not open($Global::csv_fh,">",$opt::results)) {
2716 ::error("Cannot open results file `$opt::results': ".
2717 "$!.");
2718 wait_and_exit(255);
2724 sub find_compression_program() {
2725 # Find a fast compression program
2726 # Returns:
2727 # $compress_program = compress program with options
2728 # $decompress_program = decompress program with options
2730 # Search for these. Sorted by speed on 128 core
2732 # seq 120000000|shuf > 1gb &
2733 # apt-get update
2734 # apt install make g++ htop
2735 # wget -O - pi.dk/3 | bash
2736 # apt install zstd clzip liblz4-tool lzop pigz pxz gzip plzip pbzip2 lzma xz-utils lzip bzip2 lbzip2 lrzip pixz
2737 # git clone https://github.com/facebook/zstd.git
2738 # (cd zstd/contrib/pzstd; make -j; cp pzstd /usr/local/bin)
2739 # echo 'lrzip -L $((-$1))' >/usr/local/bin/lrz
2740 # chmod +x /usr/local/bin/lrz
2741 # wait
2742 # onethread="zstd clzip lz4 lzop gzip lzma xz bzip2"
2743 # multithread="pzstd pigz pxz plzip pbzip2 lzip lbzip2 lrz pixz"
2744 # parallel --shuf -j1 --joblog jl-m --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $multithread
2745 # parallel --shuf -j50% --delay 1 --joblog jl-s --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $onethread
2746 # sort -nk4 jl-?
2748 # 1-core:
2749 # 2-cores: pzstd zstd lz4 lzop pigz gzip lbzip2 pbzip2 lrz bzip2 lzma pxz plzip xz lzip clzip
2750 # 4-cores:
2751 # 8-cores: pzstd lz4 zstd pigz lzop lbzip2 pbzip2 gzip lzip lrz plzip pxz bzip2 lzma xz clzip
2752 # 16-cores: pzstd lz4 pigz lzop lbzip2 pbzip2 plzip lzip lrz pxz gzip lzma xz bzip2
2753 # 32-cores: pzstd lbzip2 pbzip2 zstd pigz lz4 lzop plzip lzip lrz gzip pxz lzma bzip2 xz clzip
2754 # 64-cores: pzstd lbzip2 pbzip2 pigz zstd pixz lz4 plzip lzop lzip lrz gzip pxz lzma bzip2 xz clzip
2755 # 128-core: pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip lrz pxz bzip2 lzma xz clzip
2757 my @prg = qw(pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip
2758 lrz pxz bzip2 lzma xz clzip);
2759 for my $p (@prg) {
2760 if(which($p)) {
2761 return ("$p -c -1","$p -dc");
2764 # Fall back to cat
2765 return ("cat","cat");
2768 sub read_options() {
2769 # Read options from command line, profile and $PARALLEL
2770 # Uses:
2771 # $opt::shebang_wrap
2772 # $opt::shebang
2773 # @ARGV
2774 # $opt::plain
2775 # @opt::profile
2776 # $ENV{'HOME'}
2777 # $ENV{'PARALLEL'}
2778 # Returns:
2779 # @ARGV_no_opt = @ARGV without --options
2781 # This must be done first as this may exec myself
2782 if(defined $ARGV[0] and ($ARGV[0] =~ /^--shebang/ or
2783 $ARGV[0] =~ /^--shebang-?wrap/ or
2784 $ARGV[0] =~ /^--hashbang/)) {
2785 # Program is called from #! line in script
2786 # remove --shebang-wrap if it is set
2787 $opt::shebang_wrap = ($ARGV[0] =~ s/^--shebang-?wrap *//);
2788 # remove --shebang if it is set
2789 $opt::shebang = ($ARGV[0] =~ s/^--shebang *//);
2790 # remove --hashbang if it is set
2791 $opt::shebang .= ($ARGV[0] =~ s/^--hashbang *//);
2792 if($opt::shebang) {
2793 my $argfile = Q(pop @ARGV);
2794 # exec myself to split $ARGV[0] into separate fields
2795 exec "$0 --skip-first-line -a $argfile @ARGV";
2797 if($opt::shebang_wrap) {
2798 my @options;
2799 my @parser;
2800 if ($^O eq 'freebsd') {
2801 # FreeBSD's #! puts different values in @ARGV than Linux' does.
2802 my @nooptions = @ARGV;
2803 get_options_from_array(\@nooptions);
2804 while($#ARGV > $#nooptions) {
2805 push @options, shift @ARGV;
2807 while(@ARGV and $ARGV[0] ne ":::") {
2808 push @parser, shift @ARGV;
2810 if(@ARGV and $ARGV[0] eq ":::") {
2811 shift @ARGV;
2813 } else {
2814 @options = shift @ARGV;
2816 my $script = Q(shift @ARGV);
2817 # exec myself to split $ARGV[0] into separate fields
2818 exec "$0 --internal-pipe-means-argfiles @options @parser $script ".
2819 "::: @ARGV";
2822 if($ARGV[0] =~ / --shebang(-?wrap)? /) {
2823 ::warning("--shebang and --shebang-wrap must be the first argument.\n");
2826 Getopt::Long::Configure("bundling","require_order");
2827 my @ARGV_copy = @ARGV;
2828 my @ARGV_orig = @ARGV;
2829 # Check if there is a --profile to set @opt::profile
2830 get_options_from_array(\@ARGV_copy,"profile|J=s","plain") || die_usage();
2831 my @ARGV_profile = ();
2832 my @ARGV_env = ();
2833 if(not $opt::plain) {
2834 # Add options from $PARALLEL_HOME/config and other profiles
2835 my @config_profiles = (
2836 "/etc/parallel/config",
2837 (map { "$_/config" } @Global::config_dirs),
2838 $ENV{'HOME'}."/.parallelrc");
2839 my @profiles = @config_profiles;
2840 if(@opt::profile) {
2841 # --profile overrides default profiles
2842 @profiles = ();
2843 for my $profile (@opt::profile) {
2844 if($profile =~ m:^\./|^/:) {
2845 # Look for ./profile in .
2846 # Look for /profile in /
2847 push @profiles, grep { -r $_ } $profile;
2848 } else {
2849 # Look for the $profile in @Global::config_dirs
2850 push @profiles, grep { -r $_ }
2851 map { "$_/$profile" } @Global::config_dirs;
2855 for my $profile (@profiles) {
2856 if(-r $profile) {
2857 ::debug("init","Read $profile\n");
2858 local $/ = "\n";
2859 open (my $in_fh, "<", $profile) ||
2860 ::die_bug("read-profile: $profile");
2861 while(<$in_fh>) {
2862 /^\s*\#/ and next;
2863 chomp;
2864 push @ARGV_profile, shell_words($_);
2866 close $in_fh;
2867 } else {
2868 if(grep /^\Q$profile\E$/, @config_profiles) {
2869 # config file is not required to exist
2870 } else {
2871 ::error("$profile not readable.");
2872 wait_and_exit(255);
2876 # Add options from shell variable $PARALLEL
2877 if($ENV{'PARALLEL'}) {
2878 push @ARGV_env, shell_words($ENV{'PARALLEL'});
2880 # Add options from env_parallel.csh via $PARALLEL_CSH
2881 if($ENV{'PARALLEL_CSH'}) {
2882 push @ARGV_env, shell_words($ENV{'PARALLEL_CSH'});
2885 Getopt::Long::Configure("bundling","require_order");
2886 get_options_from_array(\@ARGV_profile) || die_usage();
2887 get_options_from_array(\@ARGV_env) || die_usage();
2888 get_options_from_array(\@ARGV) || die_usage();
2889 # What were the options given on the command line?
2890 # Used to start --sqlworker
2891 my $ai = arrayindex(\@ARGV_orig, \@ARGV);
2892 @Global::options_in_argv = @ARGV_orig[0..$ai-1];
2893 # Prepend non-options to @ARGV (such as commands like 'nice')
2894 unshift @ARGV, @ARGV_profile, @ARGV_env;
2895 return @ARGV;
2898 sub arrayindex() {
2899 # Similar to Perl's index function, but for arrays
2900 # Input:
2901 # $arr_ref1 = ref to @array1 to search in
2902 # $arr_ref2 = ref to @array2 to search for
2903 # Returns:
2904 # $pos = position of @array1 in @array2, -1 if not found
2905 my ($arr_ref1,$arr_ref2) = @_;
2906 my $array1_as_string = join "", map { "\0".$_ } @$arr_ref1;
2907 my $array2_as_string = join "", map { "\0".$_ } @$arr_ref2;
2908 my $i = index($array1_as_string,$array2_as_string,0);
2909 if($i == -1) { return -1 }
2910 my @before = split /\0/, substr($array1_as_string,0,$i);
2911 return $#before;
2914 sub read_args_from_command_line() {
2915 # Arguments given on the command line after:
2916 # ::: ($Global::arg_sep)
2917 # :::: ($Global::arg_file_sep)
2918 # :::+ ($Global::arg_sep with --link)
2919 # ::::+ ($Global::arg_file_sep with --link)
2920 # Removes the arguments from @ARGV and:
2921 # - puts filenames into -a
2922 # - puts arguments into files and add the files to -a
2923 # - adds --linkinputsource with 0/1 for each -a depending on :::+/::::+
2924 # Input:
2925 # @::ARGV = command option ::: arg arg arg :::: argfiles
2926 # Uses:
2927 # $Global::arg_sep
2928 # $Global::arg_file_sep
2929 # $opt::internal_pipe_means_argfiles
2930 # $opt::pipe
2931 # @opt::a
2932 # Returns:
2933 # @argv_no_argsep = @::ARGV without ::: and :::: and following args
2934 my @new_argv = ();
2935 for(my $arg = shift @ARGV; @ARGV; $arg = shift @ARGV) {
2936 if($arg eq $Global::arg_sep
2938 $arg eq $Global::arg_sep."+"
2940 $arg eq $Global::arg_file_sep
2942 $arg eq $Global::arg_file_sep."+") {
2943 my $group_sep = $arg; # This group of arguments is args or argfiles
2944 my @group;
2945 while(defined ($arg = shift @ARGV)) {
2946 if($arg eq $Global::arg_sep
2948 $arg eq $Global::arg_sep."+"
2950 $arg eq $Global::arg_file_sep
2952 $arg eq $Global::arg_file_sep."+") {
2953 # exit while loop if finding new separator
2954 last;
2955 } else {
2956 # If not hitting ::: :::+ :::: or ::::+
2957 # Append it to the group
2958 push @group, $arg;
2961 my $is_linked = ($group_sep =~ /\+$/) ? 1 : 0;
2962 my $is_file = ($group_sep eq $Global::arg_file_sep
2964 $group_sep eq $Global::arg_file_sep."+");
2965 if($is_file) {
2966 # :::: / ::::+
2967 push @opt::linkinputsource, map { $is_linked } @group;
2968 } else {
2969 # ::: / :::+
2970 push @opt::linkinputsource, $is_linked;
2972 if($is_file
2973 or ($opt::internal_pipe_means_argfiles and $opt::pipe)
2975 # Group of file names on the command line.
2976 # Append args into -a
2977 push @opt::a, @group;
2978 } else {
2979 # Group of arguments on the command line.
2980 # Put them into a file.
2981 # Create argfile
2982 my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg");
2983 unlink($name);
2984 # Put args into argfile
2985 print $outfh map { $_,$/ } @group;
2986 seek $outfh, 0, 0;
2987 exit_if_disk_full();
2988 # Append filehandle to -a
2989 push @opt::a, $outfh;
2991 if(defined($arg)) {
2992 # $arg is ::: :::+ :::: or ::::+
2993 # so there is another group
2994 redo;
2995 } else {
2996 # $arg is undef -> @ARGV empty
2997 last;
3000 push @new_argv, $arg;
3002 # Output: @ARGV = command to run with options
3003 return @new_argv;
3006 sub cleanup() {
3007 # Returns: N/A
3008 unlink keys %Global::unlink;
3009 map { rmdir $_ } keys %Global::unlink;
3010 if(@opt::basefile and $opt::cleanup) { cleanup_basefile(); }
3011 for(keys %Global::sshmaster) {
3012 # If 'ssh -M's are running: kill them
3013 kill "TERM", $_;
3018 sub __QUOTING_ARGUMENTS_FOR_SHELL__() {}
3020 sub shell_quote(@) {
3021 # Input:
3022 # @strings = strings to be quoted
3023 # Returns:
3024 # @shell_quoted_strings = string quoted as needed by the shell
3025 return wantarray ? (map { Q($_) } @_) : (join" ",map { Q($_) } @_);
3028 sub shell_quote_scalar_rc($) {
3029 # Quote for the rc-shell
3030 my $a = $_[0];
3031 if(defined $a) {
3032 if(($a =~ s/'/''/g)
3034 ($a =~ s/[\n\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]+/'$&'/go)) {
3035 # A string was replaced
3036 # No need to test for "" or \0
3037 } elsif($a eq "") {
3038 $a = "''";
3039 } elsif($a eq "\0") {
3040 $a = "";
3043 return $a;
3046 sub shell_quote_scalar_csh($) {
3047 # Quote for (t)csh
3048 my $a = $_[0];
3049 if(defined $a) {
3050 # $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g;
3051 # This is 1% faster than the above
3052 if(($a =~ s/[\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]/\\$&/go)
3054 # quote newline in csh as \\\n
3055 ($a =~ s/[\n]/"\\\n"/go)) {
3056 # A string was replaced
3057 # No need to test for "" or \0
3058 } elsif($a eq "") {
3059 $a = "''";
3060 } elsif($a eq "\0") {
3061 $a = "";
3064 return $a;
3067 sub shell_quote_scalar_default($) {
3068 # Quote for other shells (Bourne compatibles)
3069 # Inputs:
3070 # $string = string to be quoted
3071 # Returns:
3072 # $shell_quoted = string quoted as needed by the shell
3073 my $s = $_[0];
3074 if($s =~ /[^-_.+a-z0-9\/]/i) {
3075 $s =~ s/'/'"'"'/g; # "-quote single quotes
3076 $s = "'$s'"; # '-quote entire string
3077 $s =~ s/^''//; # Remove unneeded '' at ends
3078 $s =~ s/''$//; # (faster than s/^''|''$//g)
3079 return $s;
3080 } elsif ($s eq "") {
3081 return "''";
3082 } else {
3083 # No quoting needed
3084 return $s;
3088 sub shell_quote_scalar($) {
3089 # Quote the string so the shell will not expand any special chars
3090 # Inputs:
3091 # $string = string to be quoted
3092 # Returns:
3093 # $shell_quoted = string quoted as needed by the shell
3095 # Speed optimization: Choose the correct shell_quote_scalar_*
3096 # and call that directly from now on
3097 no warnings 'redefine';
3098 if($Global::cshell) {
3099 # (t)csh
3100 *shell_quote_scalar = \&shell_quote_scalar_csh;
3101 } elsif($Global::shell =~ m:(^|/)rc$:) {
3102 # rc-shell
3103 *shell_quote_scalar = \&shell_quote_scalar_rc;
3104 } else {
3105 # other shells
3106 *shell_quote_scalar = \&shell_quote_scalar_default;
3108 # The sub is now redefined. Call it
3109 return shell_quote_scalar($_[0]);
3112 sub Q($) {
3113 # Q alias for ::shell_quote_scalar
3114 my $ret = shell_quote_scalar($_[0]);
3115 no warnings 'redefine';
3116 *Q = \&::shell_quote_scalar;
3117 return $ret;
3120 sub shell_quote_file($) {
3121 # Quote the string so shell will not expand any special chars
3122 # and prepend ./ if needed
3123 # Input:
3124 # $filename = filename to be shell quoted
3125 # Returns:
3126 # $quoted_filename = filename quoted with \ and ./ if needed
3127 my $a = shift;
3128 if(defined $a) {
3129 if($a =~ m:^/: or $a =~ m:^\./:) {
3130 # /abs/path or ./rel/path => skip
3131 } else {
3132 # rel/path => ./rel/path
3133 $a = "./".$a;
3136 return Q($a);
3139 sub shell_words(@) {
3140 # Input:
3141 # $string = shell line
3142 # Returns:
3143 # @shell_words = $string split into words as shell would do
3144 $Global::use{"Text::ParseWords"} ||= eval "use Text::ParseWords; 1;";
3145 return Text::ParseWords::shellwords(@_);
3148 sub perl_quote_scalar($) {
3149 # Quote the string so perl's eval will not expand any special chars
3150 # Inputs:
3151 # $string = string to be quoted
3152 # Returns:
3153 # $perl_quoted = string quoted with \ as needed by perl's eval
3154 my $a = $_[0];
3155 if(defined $a) {
3156 $a =~ s/[\\\"\$\@]/\\$&/go;
3158 return $a;
3161 # -w complains about prototype
3162 sub pQ($) {
3163 # pQ alias for ::perl_quote_scalar
3164 my $ret = perl_quote_scalar($_[0]);
3165 *pQ = \&::perl_quote_scalar;
3166 return $ret;
3169 sub unquote_printf() {
3170 # Convert \t \n \r \000 \0
3171 # Inputs:
3172 # $string = string with \t \n \r \num \0
3173 # Returns:
3174 # $replaced = string with TAB NEWLINE CR <ascii-num> NUL
3175 $_ = shift;
3176 s/\\t/\t/g;
3177 s/\\n/\n/g;
3178 s/\\r/\r/g;
3179 s/\\(\d\d\d)/eval 'sprintf "\\'.$1.'"'/ge;
3180 s/\\(\d)/eval 'sprintf "\\'.$1.'"'/ge;
3181 return $_;
3185 sub __FILEHANDLES__() {}
3188 sub save_stdin_stdout_stderr() {
3189 # Remember the original STDIN, STDOUT and STDERR
3190 # and file descriptors opened by the shell (e.g. 3>/tmp/foo)
3191 # Uses:
3192 # %Global::fh
3193 # $Global::original_stderr
3194 # $Global::original_stdin
3195 # Returns: N/A
3197 # TODO Disabled until we have an open3 that will take n filehandles
3198 # for my $fdno (1..61) {
3199 # # /dev/fd/62 and above are used by bash for <(cmd)
3200 # # Find file descriptors that are already opened (by the shell)
3201 # Only focus on stdout+stderr for now
3202 for my $fdno (1..2) {
3203 my $fh;
3204 # 2-argument-open is used to be compatible with old perl 5.8.0
3205 # bug #43570: Perl 5.8.0 creates 61 files
3206 if(open($fh,">&=$fdno")) {
3207 $Global::fh{$fdno}=$fh;
3210 open $Global::original_stderr, ">&", "STDERR" or
3211 ::die_bug("Can't dup STDERR: $!");
3212 open $Global::status_fd, ">&", "STDERR" or
3213 ::die_bug("Can't dup STDERR: $!");
3214 open $Global::original_stdin, "<&", "STDIN" or
3215 ::die_bug("Can't dup STDIN: $!");
3218 sub enough_file_handles() {
3219 # Check that we have enough filehandles available for starting
3220 # another job
3221 # Uses:
3222 # $opt::ungroup
3223 # %Global::fh
3224 # Returns:
3225 # 1 if ungrouped (thus not needing extra filehandles)
3226 # 0 if too few filehandles
3227 # 1 if enough filehandles
3228 if(not $opt::ungroup) {
3229 my %fh;
3230 my $enough_filehandles = 1;
3231 # perl uses 7 filehandles for something?
3232 # open3 uses 2 extra filehandles temporarily
3233 # We need a filehandle for each redirected file descriptor
3234 # (normally just STDOUT and STDERR)
3235 for my $i (1..(7+2+keys %Global::fh)) {
3236 $enough_filehandles &&= open($fh{$i}, "<", "/dev/null");
3238 for (values %fh) { close $_; }
3239 return $enough_filehandles;
3240 } else {
3241 # Ungrouped does not need extra file handles
3242 return 1;
3246 sub open_or_exit($) {
3247 # Open a file name or exit if the file cannot be opened
3248 # Inputs:
3249 # $file = filehandle or filename to open
3250 # Uses:
3251 # $Global::original_stdin
3252 # Returns:
3253 # $fh = file handle to read-opened file
3254 my $file = shift;
3255 if($file eq "-") {
3256 return ($Global::original_stdin || *STDIN);
3258 if(ref $file eq "GLOB") {
3259 # This is an open filehandle
3260 return $file;
3262 my $fh = gensym;
3263 if(not open($fh, "<", $file)) {
3264 ::error("Cannot open input file `$file': No such file or directory.");
3265 wait_and_exit(255);
3267 return $fh;
3270 sub set_fh_blocking($) {
3271 # Set filehandle as blocking
3272 # Inputs:
3273 # $fh = filehandle to be blocking
3274 # Returns:
3275 # N/A
3276 my $fh = shift;
3277 $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
3278 my $flags;
3279 # Get the current flags on the filehandle
3280 fcntl($fh, &F_GETFL, $flags) || die $!;
3281 # Remove non-blocking from the flags
3282 $flags &= ~&O_NONBLOCK;
3283 # Set the flags on the filehandle
3284 fcntl($fh, &F_SETFL, $flags) || die $!;
3287 sub set_fh_non_blocking($) {
3288 # Set filehandle as non-blocking
3289 # Inputs:
3290 # $fh = filehandle to be blocking
3291 # Returns:
3292 # N/A
3293 my $fh = shift;
3294 $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
3295 my $flags;
3296 # Get the current flags on the filehandle
3297 fcntl($fh, &F_GETFL, $flags) || die $!;
3298 # Add non-blocking to the flags
3299 $flags |= &O_NONBLOCK;
3300 # Set the flags on the filehandle
3301 fcntl($fh, &F_SETFL, $flags) || die $!;
3305 sub __RUNNING_THE_JOBS_AND_PRINTING_PROGRESS__() {}
3308 # Variable structure:
3310 # $Global::running{$pid} = Pointer to Job-object
3311 # @Global::virgin_jobs = Pointer to Job-object that have received no input
3312 # $Global::host{$sshlogin} = Pointer to SSHLogin-object
3313 # $Global::total_running = total number of running jobs
3314 # $Global::total_started = total jobs started
3315 # $Global::max_procs_file = filename if --jobs is given a filename
3316 # $Global::JobQueue = JobQueue object for the queue of jobs
3317 # $Global::timeoutq = queue of times where jobs timeout
3318 # $Global::newest_job = Job object of the most recent job started
3319 # $Global::newest_starttime = timestamp of $Global::newest_job
3320 # @Global::sshlogin
3321 # $Global::minimal_command_line_length = minimum length supported by all sshlogins
3322 # $Global::start_no_new_jobs = should more jobs be started?
3323 # $Global::original_stderr = file handle for STDERR when the program started
3324 # $Global::total_started = total number of jobs started
3325 # $Global::joblog = filehandle of joblog
3326 # $Global::debug = Is debugging on?
3327 # $Global::exitstatus = status code of GNU Parallel
3328 # $Global::quoting = quote the command to run
3330 sub init_run_jobs() {
3331 # Set Global variables and progress signal handlers
3332 # Do the copying of basefiles
3333 # Returns: N/A
3334 $Global::total_running = 0;
3335 $Global::total_started = 0;
3336 $SIG{USR1} = \&list_running_jobs;
3337 $SIG{USR2} = \&toggle_progress;
3338 if(@opt::basefile) { setup_basefile(); }
3342 my $last_time;
3343 my %last_mtime;
3344 my $max_procs_file_last_mod;
3346 sub changed_procs_file {
3347 # If --jobs is a file and it is modfied:
3348 # Force recomputing of max_jobs_running for each $sshlogin
3349 # Uses:
3350 # $Global::max_procs_file
3351 # %Global::host
3352 # Returns: N/A
3353 if($Global::max_procs_file) {
3354 # --jobs filename
3355 my $mtime = (stat($Global::max_procs_file))[9];
3356 $max_procs_file_last_mod ||= 0;
3357 if($mtime > $max_procs_file_last_mod) {
3358 # file changed: Force re-computing max_jobs_running
3359 $max_procs_file_last_mod = $mtime;
3360 for my $sshlogin (values %Global::host) {
3361 $sshlogin->set_max_jobs_running(undef);
3367 sub changed_sshloginfile {
3368 # If --slf is changed:
3369 # reload --slf
3370 # filter_hosts
3371 # setup_basefile
3372 # Uses:
3373 # @opt::sshloginfile
3374 # @Global::sshlogin
3375 # %Global::host
3376 # $opt::filter_hosts
3377 # Returns: N/A
3378 if(@opt::sshloginfile) {
3379 # Is --sshloginfile changed?
3380 for my $slf (@opt::sshloginfile) {
3381 my $actual_file = expand_slf_shorthand($slf);
3382 my $mtime = (stat($actual_file))[9];
3383 $last_mtime{$actual_file} ||= $mtime;
3384 if($mtime - $last_mtime{$actual_file} > 1) {
3385 ::debug("run","--sshloginfile $actual_file changed. reload\n");
3386 $last_mtime{$actual_file} = $mtime;
3387 # Reload $slf
3388 # Empty sshlogins
3389 @Global::sshlogin = ();
3390 for (values %Global::host) {
3391 # Don't start new jobs on any host
3392 # except the ones added back later
3393 $_->set_max_jobs_running(0);
3395 # This will set max_jobs_running on the SSHlogins
3396 read_sshloginfile($actual_file);
3397 parse_sshlogin();
3398 $opt::filter_hosts and filter_hosts();
3399 setup_basefile();
3405 sub start_more_jobs {
3406 # Run start_another_job() but only if:
3407 # * not $Global::start_no_new_jobs set
3408 # * not JobQueue is empty
3409 # * not load on server is too high
3410 # * not server swapping
3411 # * not too short time since last remote login
3412 # Uses:
3413 # %Global::host
3414 # $Global::start_no_new_jobs
3415 # $Global::JobQueue
3416 # $opt::pipe
3417 # $opt::load
3418 # $opt::noswap
3419 # $opt::delay
3420 # $Global::newest_starttime
3421 # Returns:
3422 # $jobs_started = number of jobs started
3423 my $jobs_started = 0;
3424 if($Global::start_no_new_jobs) {
3425 return $jobs_started;
3427 if(time - ($last_time||0) > 1) {
3428 # At most do this every second
3429 $last_time = time;
3430 changed_procs_file();
3431 changed_sshloginfile();
3433 # This will start 1 job on each --sshlogin (if possible)
3434 # thus distribute the jobs on the --sshlogins round robin
3435 for my $sshlogin (values %Global::host) {
3436 if($Global::JobQueue->empty() and not $opt::pipe) {
3437 # No more jobs in the queue
3438 last;
3440 debug("run", "Running jobs before on ", $sshlogin->string(), ": ",
3441 $sshlogin->jobs_running(), "\n");
3442 if ($sshlogin->jobs_running() < $sshlogin->max_jobs_running()) {
3443 if($opt::delay
3445 $opt::delay - 0.008 > ::now() - $Global::newest_starttime) {
3446 # It has been too short since last start
3447 next;
3449 if($opt::load and $sshlogin->loadavg_too_high()) {
3450 # The load is too high or unknown
3451 next;
3453 if($opt::noswap and $sshlogin->swapping()) {
3454 # The server is swapping
3455 next;
3457 if($opt::limit and $sshlogin->limit()) {
3458 # Over limit
3459 next;
3461 if(($opt::memfree or $opt::memsuspend)
3463 $sshlogin->memfree() < $Global::memlimit) {
3464 # The server has not enough mem free
3465 ::debug("mem", "Not starting job: not enough mem\n");
3466 next;
3468 if($sshlogin->too_fast_remote_login()) {
3469 # It has been too short since
3470 next;
3472 debug("run", $sshlogin->string(),
3473 " has ", $sshlogin->jobs_running(),
3474 " out of ", $sshlogin->max_jobs_running(),
3475 " jobs running. Start another.\n");
3476 if(start_another_job($sshlogin) == 0) {
3477 # No more jobs to start on this $sshlogin
3478 debug("run","No jobs started on ",
3479 $sshlogin->string(), "\n");
3480 next;
3482 $sshlogin->inc_jobs_running();
3483 $sshlogin->set_last_login_at(::now());
3484 $jobs_started++;
3486 debug("run","Running jobs after on ", $sshlogin->string(), ": ",
3487 $sshlogin->jobs_running(), " of ",
3488 $sshlogin->max_jobs_running(), "\n");
3491 return $jobs_started;
3496 my $no_more_file_handles_warned;
3498 sub start_another_job() {
3499 # If there are enough filehandles
3500 # and JobQueue not empty
3501 # and not $job is in joblog
3502 # Then grab a job from Global::JobQueue,
3503 # start it at sshlogin
3504 # mark it as virgin_job
3505 # Inputs:
3506 # $sshlogin = the SSHLogin to start the job on
3507 # Uses:
3508 # $Global::JobQueue
3509 # $opt::pipe
3510 # $opt::results
3511 # $opt::resume
3512 # @Global::virgin_jobs
3513 # Returns:
3514 # 1 if another jobs was started
3515 # 0 otherwise
3516 my $sshlogin = shift;
3517 # Do we have enough file handles to start another job?
3518 if(enough_file_handles()) {
3519 if($Global::JobQueue->empty() and not $opt::pipe) {
3520 # No more commands to run
3521 debug("start", "Not starting: JobQueue empty\n");
3522 return 0;
3523 } else {
3524 my $job;
3525 # Skip jobs already in job log
3526 # Skip jobs already in results
3527 do {
3528 $job = get_job_with_sshlogin($sshlogin);
3529 if(not defined $job) {
3530 # No command available for that sshlogin
3531 debug("start", "Not starting: no jobs available for ",
3532 $sshlogin->string(), "\n");
3533 return 0;
3535 if($job->is_already_in_joblog()) {
3536 $job->free_slot();
3538 } while ($job->is_already_in_joblog()
3540 ($opt::results and $opt::resume and $job->is_already_in_results()));
3541 debug("start", "Command to run on '", $job->sshlogin()->string(), "': '",
3542 $job->replaced(),"'\n");
3543 if($job->start()) {
3544 if($opt::pipe) {
3545 if($job->virgin()) {
3546 push(@Global::virgin_jobs,$job);
3547 } else {
3548 # Block already set: This is a retry
3549 $job->write_block();
3552 debug("start", "Started as seq ", $job->seq(),
3553 " pid:", $job->pid(), "\n");
3554 return 1;
3555 } else {
3556 # Not enough processes to run the job.
3557 # Put it back on the queue.
3558 $Global::JobQueue->unget($job);
3559 # Count down the number of jobs to run for this SSHLogin.
3560 my $max = $sshlogin->max_jobs_running();
3561 if($max > 1) { $max--; } else {
3562 my @arg;
3563 for my $record (@{$job->{'commandline'}->{'arg_list'}}) {
3564 push @arg, map { $_->orig() } @$record;
3566 ::error("No more processes: cannot run a single job. Something is wrong at @arg.");
3567 ::wait_and_exit(255);
3569 $sshlogin->set_max_jobs_running($max);
3570 # Sleep up to 300 ms to give other processes time to die
3571 ::usleep(rand()*300);
3572 ::warning("No more processes: ".
3573 "Decreasing number of running jobs to $max.",
3574 "Try increasing 'ulimit -u' (try: ulimit -u `ulimit -Hu`)",
3575 "or increasing 'nproc' in /etc/security/limits.conf",
3576 "or increasing /proc/sys/kernel/pid_max");
3577 return 0;
3580 } else {
3581 # No more file handles
3582 $no_more_file_handles_warned++ or
3583 ::warning("No more file handles. ",
3584 "Try running 'parallel -j0 -N 100 --pipe parallel -j0'",
3585 "or increasing 'ulimit -n' (try: ulimit -n `ulimit -Hn`)",
3586 "or increasing 'nofile' in /etc/security/limits.conf",
3587 "or increasing /proc/sys/fs/file-max");
3588 debug("start", "No more file handles. ");
3589 return 0;
3594 sub init_progress() {
3595 # Uses:
3596 # $opt::bar
3597 # Returns:
3598 # list of computers for progress output
3599 $|=1;
3600 if($opt::bar) {
3601 return("","");
3603 my %progress = progress();
3604 return ("\nComputers / CPU cores / Max jobs to run\n",
3605 $progress{'workerlist'});
3608 sub drain_job_queue(@) {
3609 # Uses:
3610 # $opt::progress
3611 # $Global::total_running
3612 # $Global::max_jobs_running
3613 # %Global::running
3614 # $Global::JobQueue
3615 # %Global::host
3616 # $Global::start_no_new_jobs
3617 # Returns: N/A
3618 my @command = @_;
3619 if($opt::progress) {
3620 ::status_no_nl(init_progress());
3622 my $last_header = "";
3623 my $sleep = 0.2;
3624 do {
3625 while($Global::total_running > 0) {
3626 debug("init",$Global::total_running, "==", scalar
3627 keys %Global::running," slots: ", $Global::max_jobs_running);
3628 if($opt::pipe) {
3629 # When using --pipe sometimes file handles are not
3630 # closed properly
3631 for my $job (values %Global::running) {
3632 close $job->fh(0,"w");
3635 if($opt::progress) {
3636 my %progress = progress();
3637 if($last_header ne $progress{'header'}) {
3638 ::status("", $progress{'header'});
3639 $last_header = $progress{'header'};
3641 ::status_no_nl("\r",$progress{'status'});
3643 if($Global::total_running < $Global::max_jobs_running
3644 and not $Global::JobQueue->empty()) {
3645 # These jobs may not be started because of loadavg
3646 # or too little time between each ssh login.
3647 if(start_more_jobs() > 0) {
3648 # Exponential back-on if jobs were started
3649 $sleep = $sleep/2+0.001;
3652 # Exponential back-off sleeping
3653 $sleep = ::reap_usleep($sleep);
3655 if(not $Global::JobQueue->empty()) {
3656 # These jobs may not be started:
3657 # * because there the --filter-hosts has removed all
3658 if(not %Global::host) {
3659 ::error("There are no hosts left to run on.");
3660 ::wait_and_exit(255);
3662 # * because of loadavg
3663 # * because of too little time between each ssh login.
3664 $sleep = ::reap_usleep($sleep);
3665 start_more_jobs();
3666 if($Global::max_jobs_running == 0) {
3667 ::warning("There are no job slots available. Increase --jobs.");
3670 while($opt::sqlmaster and not $Global::sql->finished()) {
3671 # SQL master
3672 $sleep = ::reap_usleep($sleep);
3673 start_more_jobs();
3674 if($Global::start_sqlworker) {
3675 # Start an SQL worker as we are now sure there is work to do
3676 $Global::start_sqlworker = 0;
3677 if(my $pid = fork()) {
3678 $Global::unkilled_sqlworker = $pid;
3679 } else {
3680 # Replace --sql/--sqlandworker with --sqlworker
3681 my @ARGV = map { s/^--sql(andworker)?$/--sqlworker/; $_ } @Global::options_in_argv;
3682 # exec the --sqlworker
3683 exec($0,@ARGV,@command);
3687 } while ($Global::total_running > 0
3689 not $Global::start_no_new_jobs and not $Global::JobQueue->empty()
3691 $opt::sqlmaster and not $Global::sql->finished());
3692 if($opt::progress) {
3693 my %progress = progress();
3694 ::status("\r".$progress{'status'});
3698 sub toggle_progress() {
3699 # Turn on/off progress view
3700 # Uses:
3701 # $opt::progress
3702 # Returns: N/A
3703 $opt::progress = not $opt::progress;
3704 if($opt::progress) {
3705 ::status_no_nl(init_progress());
3709 sub progress() {
3710 # Uses:
3711 # $opt::bar
3712 # $opt::eta
3713 # %Global::host
3714 # $Global::total_started
3715 # Returns:
3716 # $workerlist = list of workers
3717 # $header = that will fit on the screen
3718 # $status = message that will fit on the screen
3719 if($opt::bar) {
3720 return ("workerlist" => "", "header" => "", "status" => bar());
3722 my $eta = "";
3723 my ($status,$header)=("","");
3724 if($opt::eta) {
3725 my($total, $completed, $left, $pctcomplete, $avgtime, $this_eta) =
3726 compute_eta();
3727 $eta = sprintf("ETA: %ds Left: %d AVG: %.2fs ",
3728 $this_eta, $left, $avgtime);
3730 my $termcols = terminal_columns();
3731 my @workers = sort keys %Global::host;
3732 my %sshlogin = map { $_ eq ":" ? ($_ => "local") : ($_ => $_) } @workers;
3733 my $workerno = 1;
3734 my %workerno = map { ($_=>$workerno++) } @workers;
3735 my $workerlist = "";
3736 for my $w (@workers) {
3737 $workerlist .=
3738 $workerno{$w}.":".$sshlogin{$w} ." / ".
3739 ($Global::host{$w}->ncpus() || "-")." / ".
3740 $Global::host{$w}->max_jobs_running()."\n";
3742 $status = "x"x($termcols+1);
3743 # Select an output format that will fit on a single line
3744 if(length $status > $termcols) {
3745 # sshlogin1:XX/XX/XX%/XX.Xs sshlogin2:XX/XX/XX%/XX.Xs sshlogin3:XX/XX/XX%/XX.Xs
3746 $header = "Computer:jobs running/jobs completed/%of started jobs/Average seconds to complete";
3747 $status = $eta .
3748 join(" ",map
3750 if($Global::total_started) {
3751 my $completed = ($Global::host{$_}->jobs_completed()||0);
3752 my $running = $Global::host{$_}->jobs_running();
3753 my $time = $completed ? (time-$^T)/($completed) : "0";
3754 sprintf("%s:%d/%d/%d%%/%.1fs ",
3755 $sshlogin{$_}, $running, $completed,
3756 ($running+$completed)*100
3757 / $Global::total_started, $time);
3759 } @workers);
3761 if(length $status > $termcols) {
3762 # 1:XX/XX/XX%/XX.Xs 2:XX/XX/XX%/XX.Xs 3:XX/XX/XX%/XX.Xs 4:XX/XX/XX%/XX.Xs
3763 $header = "Computer:jobs running/jobs completed/%of started jobs";
3764 $status = $eta .
3765 join(" ",map
3767 if($Global::total_started) {
3768 my $completed = ($Global::host{$_}->jobs_completed()||0);
3769 my $running = $Global::host{$_}->jobs_running();
3770 my $time = $completed ? (time-$^T)/($completed) : "0";
3771 sprintf("%s:%d/%d/%d%%/%.1fs ",
3772 $workerno{$_}, $running, $completed,
3773 ($running+$completed)*100
3774 / $Global::total_started, $time);
3776 } @workers);
3778 if(length $status > $termcols) {
3779 # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX/XX%
3780 $header = "Computer:jobs running/jobs completed/%of started jobs";
3781 $status = $eta .
3782 join(" ",map
3784 if($Global::total_started) {
3785 sprintf("%s:%d/%d/%d%%",
3786 $sshlogin{$_},
3787 $Global::host{$_}->jobs_running(),
3788 ($Global::host{$_}->jobs_completed()||0),
3789 ($Global::host{$_}->jobs_running()+
3790 ($Global::host{$_}->jobs_completed()||0))*100
3791 / $Global::total_started)
3794 @workers);
3796 if(length $status > $termcols) {
3797 # 1:XX/XX/XX% 2:XX/XX/XX% 3:XX/XX/XX% 4:XX/XX/XX% 5:XX/XX/XX% 6:XX/XX/XX%
3798 $header = "Computer:jobs running/jobs completed/%of started jobs";
3799 $status = $eta .
3800 join(" ",map
3802 if($Global::total_started) {
3803 sprintf("%s:%d/%d/%d%%",
3804 $workerno{$_},
3805 $Global::host{$_}->jobs_running(),
3806 ($Global::host{$_}->jobs_completed()||0),
3807 ($Global::host{$_}->jobs_running()+
3808 ($Global::host{$_}->jobs_completed()||0))*100
3809 / $Global::total_started)
3812 @workers);
3814 if(length $status > $termcols) {
3815 # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX sshlogin4:XX/XX
3816 $header = "Computer:jobs running/jobs completed";
3817 $status = $eta .
3818 join(" ",map
3819 { sprintf("%s:%d/%d",
3820 $sshlogin{$_}, $Global::host{$_}->jobs_running(),
3821 ($Global::host{$_}->jobs_completed()||0)) }
3822 @workers);
3824 if(length $status > $termcols) {
3825 # sshlogin1:XX/XX sshlogin2:XX/XX sshlogin3:XX/XX sshlogin4:XX/XX
3826 $header = "Computer:jobs running/jobs completed";
3827 $status = $eta .
3828 join(" ",map
3829 { sprintf("%s:%d/%d",
3830 $sshlogin{$_}, $Global::host{$_}->jobs_running(),
3831 ($Global::host{$_}->jobs_completed()||0)) }
3832 @workers);
3834 if(length $status > $termcols) {
3835 # 1:XX/XX 2:XX/XX 3:XX/XX 4:XX/XX 5:XX/XX 6:XX/XX
3836 $header = "Computer:jobs running/jobs completed";
3837 $status = $eta .
3838 join(" ",map
3839 { sprintf("%s:%d/%d",
3840 $workerno{$_}, $Global::host{$_}->jobs_running(),
3841 ($Global::host{$_}->jobs_completed()||0)) }
3842 @workers);
3844 if(length $status > $termcols) {
3845 # sshlogin1:XX sshlogin2:XX sshlogin3:XX sshlogin4:XX sshlogin5:XX
3846 $header = "Computer:jobs completed";
3847 $status = $eta .
3848 join(" ",map
3849 { sprintf("%s:%d",
3850 $sshlogin{$_},
3851 ($Global::host{$_}->jobs_completed()||0)) }
3852 @workers);
3854 if(length $status > $termcols) {
3855 # 1:XX 2:XX 3:XX 4:XX 5:XX 6:XX
3856 $header = "Computer:jobs completed";
3857 $status = $eta .
3858 join(" ",map
3859 { sprintf("%s:%d",
3860 $workerno{$_},
3861 ($Global::host{$_}->jobs_completed()||0)) }
3862 @workers);
3864 return ("workerlist" => $workerlist, "header" => $header, "status" => $status);
3869 my ($first_completed, $smoothed_avg_time, $last_eta);
3871 sub compute_eta {
3872 # Calculate important numbers for ETA
3873 # Returns:
3874 # $total = number of jobs in total
3875 # $completed = number of jobs completed
3876 # $left = number of jobs left
3877 # $pctcomplete = percent of jobs completed
3878 # $avgtime = averaged time
3879 # $eta = smoothed eta
3880 my $completed = $Global::total_completed;
3881 # In rare cases with -X will $completed > total_jobs()
3882 my $total = ::max($Global::JobQueue->total_jobs(),$completed);
3883 my $left = $total - $completed;
3884 if(not $completed) {
3885 return($total, $completed, $left, 0, 0, 0);
3887 my $pctcomplete = ::min($completed / $total,100);
3888 $first_completed ||= time;
3889 my $timepassed = (time - $first_completed);
3890 my $avgtime = $timepassed / $completed;
3891 $smoothed_avg_time ||= $avgtime;
3892 # Smooth the eta so it does not jump wildly
3893 $smoothed_avg_time = (1 - $pctcomplete) * $smoothed_avg_time +
3894 $pctcomplete * $avgtime;
3895 my $eta = int($left * $smoothed_avg_time);
3896 if($eta*0.90 < $last_eta and $last_eta < $eta) {
3897 # Eta jumped less that 10% up: Keep the last eta instead
3898 $eta = $last_eta;
3899 } else {
3900 $last_eta = $eta;
3902 return($total, $completed, $left, $pctcomplete, $avgtime, $eta);
3907 my ($rev,$reset);
3909 sub bar() {
3910 # Return:
3911 # $status = bar with eta, completed jobs, arg and pct
3912 $rev ||= "\033[7m";
3913 $reset ||= "\033[0m";
3914 my($total, $completed, $left, $pctcomplete, $avgtime, $eta) =
3915 compute_eta();
3916 my $arg = $Global::newest_job ?
3917 $Global::newest_job->{'commandline'}->
3918 replace_placeholders(["\257<\257>"],0,0) : "";
3919 # These chars mess up display in the terminal in US-ASCII
3920 # and in some combinations as UTF8 (e.g. ঐ ও ঔ ক 𐅪 𐅫 𐅬)
3921 $arg =~ tr/[\011-\016\033\302-\365]//d;
3922 my $eta_dhms = ::seconds_to_time_units($eta);
3923 my $bar_text =
3924 sprintf("%d%% %d:%d=%s %s",
3925 $pctcomplete*100, $completed, $left, $eta_dhms, $arg);
3926 my $terminal_width = terminal_columns();
3927 my $s = sprintf("%-${terminal_width}s",
3928 substr($bar_text." "x$terminal_width,
3929 0,$terminal_width));
3930 my $width = int($terminal_width * $pctcomplete);
3931 substr($s,$width,0) = $reset;
3932 my $zenity = sprintf("%-${terminal_width}s",
3933 substr("# $eta sec $arg",
3934 0,$terminal_width));
3935 $s = "\r" . $zenity . "\r" . $pctcomplete*100 . # Prefix with zenity header
3936 "\r" . $rev . $s . $reset;
3937 return $s;
3942 my ($columns,$last_column_time);
3944 sub terminal_columns() {
3945 # Get the number of columns of the terminal.
3946 # Only update once per second.
3947 # Returns:
3948 # number of columns of the screen
3949 if(not $columns or $last_column_time < time) {
3950 $last_column_time = time;
3951 $columns = $ENV{'COLUMNS'};
3952 if(not $columns) {
3953 # && true is to force spawning a shell and not just exec'ing
3954 my $stty = qx{stty -a </dev/tty 2>/dev/null && true};
3955 # FreeBSD/OpenBSD/NetBSD/Dragonfly/MirOS
3956 # MacOSX/IRIX/AIX/Tru64
3957 $stty =~ /(\d+) columns/ and do { $columns = $1; };
3958 # GNU/Linux/Solaris
3959 $stty =~ /columns (\d+)/ and do { $columns = $1; };
3960 # Solaris-x86/HPUX/SCOsysV/UnixWare/OpenIndiana
3961 $stty =~ /columns = (\d+)/ and do { $columns = $1; };
3962 # QNX
3963 $stty =~ /rows=\d+,(\d+)/ and do { $columns = $1; };
3965 if(not $columns) {
3966 # && true is to force spawning a shell and not just exec'ing
3967 my $resize = qx{resize 2>/dev/null && true};
3968 $resize =~ /COLUMNS=(\d+);/ and do { $columns = $1; };
3970 $columns ||= 80;
3972 return $columns;
3976 # Prototype forwarding
3977 sub get_job_with_sshlogin($);
3978 sub get_job_with_sshlogin($) {
3979 # Input:
3980 # $sshlogin = which host should the job be run on?
3981 # Uses:
3982 # $opt::hostgroups
3983 # $Global::JobQueue
3984 # Returns:
3985 # $job = next job object for $sshlogin if any available
3986 my $sshlogin = shift;
3987 my $job;
3989 if ($opt::hostgroups) {
3990 my @other_hostgroup_jobs = ();
3992 while($job = $Global::JobQueue->get()) {
3993 if($sshlogin->in_hostgroups($job->hostgroups())) {
3994 # Found a job to be run on a hostgroup of this
3995 # $sshlogin
3996 last;
3997 } else {
3998 # This job was not in the hostgroups of $sshlogin
3999 push @other_hostgroup_jobs, $job;
4002 $Global::JobQueue->unget(@other_hostgroup_jobs);
4003 if(not defined $job) {
4004 # No more jobs
4005 return undef;
4007 } else {
4008 $job = $Global::JobQueue->get();
4009 if(not defined $job) {
4010 # No more jobs
4011 ::debug("start", "No more jobs: JobQueue empty\n");
4012 return undef;
4015 if(not $job->suspended()) {
4016 $job->set_sshlogin($sshlogin);
4018 if($opt::retries and $job->failed_here()) {
4019 # This command with these args failed for this sshlogin
4020 my ($no_of_failed_sshlogins,$min_failures) = $job->min_failed();
4021 # Only look at the Global::host that have > 0 jobslots
4022 if($no_of_failed_sshlogins ==
4023 grep { $_->max_jobs_running() > 0 } values %Global::host
4024 and $job->failed_here() == $min_failures) {
4025 # It failed the same or more times on another host:
4026 # run it on this host
4027 } else {
4028 # If it failed fewer times on another host:
4029 # Find another job to run
4030 my $nextjob;
4031 if(not $Global::JobQueue->empty()) {
4032 # This can potentially recurse for all args
4033 no warnings 'recursion';
4034 $nextjob = get_job_with_sshlogin($sshlogin);
4036 # Push the command back on the queue
4037 $Global::JobQueue->unget($job);
4038 return $nextjob;
4041 return $job;
4045 sub __REMOTE_SSH__() {}
4048 sub read_sshloginfiles(@) {
4049 # Read a list of --slf's
4050 # Input:
4051 # @files = files or symbolic file names to read
4052 # Returns: N/A
4053 for my $s (@_) {
4054 read_sshloginfile(expand_slf_shorthand($s));
4058 sub expand_slf_shorthand($) {
4059 # Expand --slf shorthand into a read file name
4060 # Input:
4061 # $file = file or symbolic file name to read
4062 # Returns:
4063 # $file = actual file name to read
4064 my $file = shift;
4065 if($file eq "-") {
4066 # skip: It is stdin
4067 } elsif($file eq "..") {
4068 $file = $Global::config_dir."/sshloginfile";
4069 } elsif($file eq ".") {
4070 $file = "/etc/parallel/sshloginfile";
4071 } elsif(not -r $file) {
4072 for(@Global::config_dirs) {
4073 if(not -r $_."/".$file) {
4074 # Try prepending $PARALLEL_HOME
4075 ::error("Cannot open $file.");
4076 ::wait_and_exit(255);
4077 } else {
4078 $file = $_."/".$file;
4079 last;
4083 return $file;
4086 sub read_sshloginfile($) {
4087 # Read sshloginfile into @Global::sshlogin
4088 # Input:
4089 # $file = file to read
4090 # Uses:
4091 # @Global::sshlogin
4092 # Returns: N/A
4093 local $/ = "\n";
4094 my $file = shift;
4095 my $close = 1;
4096 my $in_fh;
4097 ::debug("init","--slf ",$file);
4098 if($file eq "-") {
4099 $in_fh = *STDIN;
4100 $close = 0;
4101 } else {
4102 if(not open($in_fh, "<", $file)) {
4103 # Try the filename
4104 ::error("Cannot open $file.");
4105 ::wait_and_exit(255);
4108 while(<$in_fh>) {
4109 chomp;
4110 /^\s*#/ and next;
4111 /^\s*$/ and next;
4112 push @Global::sshlogin, $_;
4114 if($close) {
4115 close $in_fh;
4119 sub parse_sshlogin() {
4120 # Parse @Global::sshlogin into %Global::host.
4121 # Keep only hosts that are in one of the given ssh hostgroups.
4122 # Uses:
4123 # @Global::sshlogin
4124 # $Global::minimal_command_line_length
4125 # %Global::host
4126 # $opt::transfer
4127 # @opt::return
4128 # $opt::cleanup
4129 # @opt::basefile
4130 # @opt::trc
4131 # Returns: N/A
4132 my @login;
4133 if(not @Global::sshlogin) { @Global::sshlogin = (":"); }
4134 for my $sshlogin (@Global::sshlogin) {
4135 # Split up -S sshlogin,sshlogin
4136 for my $s (split /,|\n/, $sshlogin) {
4137 if ($s eq ".." or $s eq "-") {
4138 # This may add to @Global::sshlogin - possibly bug
4139 read_sshloginfile(expand_slf_shorthand($s));
4140 } else {
4141 $s =~ s/\s*$//;
4142 push (@login, $s);
4146 $Global::minimal_command_line_length = 100_000_000;
4147 my @allowed_hostgroups;
4148 for my $ncpu_sshlogin_string (::uniq(@login)) {
4149 my $sshlogin = SSHLogin->new($ncpu_sshlogin_string);
4150 my $sshlogin_string = $sshlogin->string();
4151 if($sshlogin_string eq "") {
4152 # This is an ssh group: -S @webservers
4153 push @allowed_hostgroups, $sshlogin->hostgroups();
4154 next;
4156 if($Global::host{$sshlogin_string}) {
4157 # This sshlogin has already been added:
4158 # It is probably a host that has come back
4159 # Set the max_jobs_running back to the original
4160 debug("run","Already seen $sshlogin_string\n");
4161 if($sshlogin->{'ncpus'}) {
4162 # If ncpus set by '#/' of the sshlogin, overwrite it:
4163 $Global::host{$sshlogin_string}->set_ncpus($sshlogin->ncpus());
4165 $Global::host{$sshlogin_string}->set_max_jobs_running(undef);
4166 next;
4168 $sshlogin->set_maxlength(Limits::Command::max_length());
4170 $Global::minimal_command_line_length =
4171 ::min($Global::minimal_command_line_length, $sshlogin->maxlength());
4172 $Global::host{$sshlogin_string} = $sshlogin;
4174 if(@allowed_hostgroups) {
4175 # Remove hosts that are not in these groups
4176 while (my ($string, $sshlogin) = each %Global::host) {
4177 if(not $sshlogin->in_hostgroups(@allowed_hostgroups)) {
4178 delete $Global::host{$string};
4183 # debug("start", "sshlogin: ", my_dump(%Global::host),"\n");
4184 if(@Global::transfer_files or @opt::return or $opt::cleanup or @opt::basefile) {
4185 if(not remote_hosts()) {
4186 # There are no remote hosts
4187 if(@opt::trc) {
4188 ::warning("--trc ignored as there are no remote --sshlogin.");
4189 } elsif (defined $opt::transfer) {
4190 ::warning("--transfer ignored as there are no remote --sshlogin.");
4191 } elsif (@opt::transfer_files) {
4192 ::warning("--transferfile ignored as there are no remote --sshlogin.");
4193 } elsif (@opt::return) {
4194 ::warning("--return ignored as there are no remote --sshlogin.");
4195 } elsif (defined $opt::cleanup and not %opt::template) {
4196 ::warning("--cleanup ignored as there are no remote --sshlogin.");
4197 } elsif (@opt::basefile) {
4198 ::warning("--basefile ignored as there are no remote --sshlogin.");
4204 sub remote_hosts() {
4205 # Return sshlogins that are not ':'
4206 # Uses:
4207 # %Global::host
4208 # Returns:
4209 # list of sshlogins with ':' removed
4210 return grep !/^:$/, keys %Global::host;
4213 sub setup_basefile() {
4214 # Transfer basefiles to each $sshlogin
4215 # This needs to be done before first jobs on $sshlogin is run
4216 # Uses:
4217 # %Global::host
4218 # @opt::basefile
4219 # Returns: N/A
4220 my @cmd;
4221 my $rsync_destdir;
4222 my $workdir;
4223 for my $sshlogin (values %Global::host) {
4224 if($sshlogin->string() eq ":") { next }
4225 for my $file (@opt::basefile) {
4226 if($file !~ m:^/: and $opt::workdir eq "...") {
4227 ::error("Work dir '...' will not work with relative basefiles.");
4228 ::wait_and_exit(255);
4230 if(not $workdir) {
4231 my $dummycmdline = CommandLine->new(1,["true"],{},0,0,[],[],[],[],{},{});
4232 my $dummyjob = Job->new($dummycmdline);
4233 $workdir = $dummyjob->workdir();
4235 push @cmd, $sshlogin->rsync_transfer_cmd($file,$workdir);
4238 debug("init", "basesetup: @cmd\n");
4239 my ($exitstatus,$stdout_ref,$stderr_ref) =
4240 run_gnu_parallel((join "\n",@cmd),"-j0","--retries",5);
4241 if($exitstatus) {
4242 my @stdout = @$stdout_ref;
4243 my @stderr = @$stderr_ref;
4244 ::error("Copying of --basefile failed: @stdout@stderr");
4245 ::wait_and_exit(255);
4249 sub cleanup_basefile() {
4250 # Remove the basefiles transferred
4251 # Uses:
4252 # %Global::host
4253 # @opt::basefile
4254 # Returns: N/A
4255 my @cmd;
4256 my $workdir;
4257 if(not $workdir) {
4258 my $dummycmdline = CommandLine->new(1,["true"],{},0,0,[],[],[],[],{},{});
4259 my $dummyjob = Job->new($dummycmdline);
4260 $workdir = $dummyjob->workdir();
4262 for my $sshlogin (values %Global::host) {
4263 if($sshlogin->string() eq ":") { next }
4264 for my $file (@opt::basefile) {
4265 push @cmd, $sshlogin->cleanup_cmd($file,$workdir);
4268 debug("init", "basecleanup: @cmd\n");
4269 my ($exitstatus,$stdout_ref,$stderr_ref) =
4270 run_gnu_parallel(join("\n",@cmd),"-j0","--retries",5);
4271 if($exitstatus) {
4272 my @stdout = @$stdout_ref;
4273 my @stderr = @$stderr_ref;
4274 ::error("Cleanup of --basefile failed: @stdout@stderr");
4275 ::wait_and_exit(255);
4279 sub run_gnu_parallel() {
4280 my ($stdin,@args) = @_;
4281 my $cmd = join "",map { " $_ & " } split /\n/, $stdin;
4282 print $Global::original_stderr ` $cmd wait` ;
4283 return 0
4286 sub _run_gnu_parallel() {
4287 # Run GNU Parallel
4288 # This should ideally just fork an internal copy
4289 # and not start it through a shell
4290 # Input:
4291 # $stdin = data to provide on stdin for GNU Parallel
4292 # @args = command line arguments
4293 # Returns:
4294 # $exitstatus = exitcode of GNU Parallel run
4295 # \@stdout = standard output
4296 # \@stderr = standard error
4297 my ($stdin,@args) = @_;
4298 my ($exitstatus,@stdout,@stderr);
4299 my ($stdin_fh,$stdout_fh)=(gensym(),gensym());
4300 my ($stderr_fh, $stderrname) = ::tmpfile(SUFFIX => ".par");
4301 unlink $stderrname;
4303 my $pid = ::open3($stdin_fh,$stdout_fh,$stderr_fh,
4304 $0,qw(--plain --shell /bin/sh --will-cite), @args);
4305 if(my $writerpid = fork()) {
4306 close $stdin_fh;
4307 @stdout = <$stdout_fh>;
4308 # Now stdout is closed:
4309 # These pids should be dead or die very soon
4310 while(kill 0, $writerpid) { ::usleep(1); }
4311 die;
4312 # reap $writerpid;
4313 # while(kill 0, $pid) { ::usleep(1); }
4314 # reap $writerpid;
4315 $exitstatus = $?;
4316 seek $stderr_fh, 0, 0;
4317 @stderr = <$stderr_fh>;
4318 close $stdout_fh;
4319 close $stderr_fh;
4320 } else {
4321 close $stdout_fh;
4322 close $stderr_fh;
4323 print $stdin_fh $stdin;
4324 close $stdin_fh;
4325 exit(0);
4327 return ($exitstatus,\@stdout,\@stderr);
4330 sub filter_hosts() {
4331 # Remove down --sshlogins from active duty.
4332 # Find ncpus, ncores, maxlen, time-to-login for each host.
4333 # Uses:
4334 # %Global::host
4335 # $Global::minimal_command_line_length
4336 # $opt::use_sockets_instead_of_threads
4337 # $opt::use_cores_instead_of_threads
4338 # $opt::use_cpus_instead_of_cores
4339 # Returns: N/A
4341 my ($nsockets_ref,$ncores_ref, $nthreads_ref, $time_to_login_ref,
4342 $maxlen_ref, $echo_ref, $down_hosts_ref) =
4343 parse_host_filtering(parallelized_host_filtering());
4345 delete @Global::host{@$down_hosts_ref};
4346 @$down_hosts_ref and ::warning("Removed @$down_hosts_ref.");
4348 $Global::minimal_command_line_length = 100_000_000;
4349 while (my ($sshlogin, $obj) = each %Global::host) {
4350 if($sshlogin eq ":") { next }
4351 $nsockets_ref->{$sshlogin} or
4352 ::die_bug("nsockets missing: ".$obj->serverlogin());
4353 $ncores_ref->{$sshlogin} or
4354 ::die_bug("ncores missing: ".$obj->serverlogin());
4355 $nthreads_ref->{$sshlogin} or
4356 ::die_bug("nthreads missing: ".$obj->serverlogin());
4357 $time_to_login_ref->{$sshlogin} or
4358 ::die_bug("time_to_login missing: ".$obj->serverlogin());
4359 $maxlen_ref->{$sshlogin} or
4360 ::die_bug("maxlen missing: ".$obj->serverlogin());
4361 $obj->set_ncpus($nthreads_ref->{$sshlogin});
4362 if($opt::use_cpus_instead_of_cores) {
4363 $obj->set_ncpus($ncores_ref->{$sshlogin});
4364 } elsif($opt::use_sockets_instead_of_threads) {
4365 $obj->set_ncpus($nsockets_ref->{$sshlogin});
4366 } elsif($opt::use_cores_instead_of_threads) {
4367 $obj->set_ncpus($ncores_ref->{$sshlogin});
4369 $obj->set_time_to_login($time_to_login_ref->{$sshlogin});
4370 $obj->set_maxlength($maxlen_ref->{$sshlogin});
4371 $Global::minimal_command_line_length =
4372 ::min($Global::minimal_command_line_length,
4373 int($maxlen_ref->{$sshlogin}/2));
4374 ::debug("init", "Timing from -S:$sshlogin ",
4375 " nsockets:",$nsockets_ref->{$sshlogin},
4376 " ncores:", $ncores_ref->{$sshlogin},
4377 " nthreads:",$nthreads_ref->{$sshlogin},
4378 " time_to_login:", $time_to_login_ref->{$sshlogin},
4379 " maxlen:", $maxlen_ref->{$sshlogin},
4380 " min_max_len:", $Global::minimal_command_line_length,"\n");
4384 sub parse_host_filtering() {
4385 # Input:
4386 # @lines = output from parallelized_host_filtering()
4387 # Returns:
4388 # \%nsockets = number of sockets of {host}
4389 # \%ncores = number of cores of {host}
4390 # \%nthreads = number of hyperthreaded cores of {host}
4391 # \%time_to_login = time_to_login on {host}
4392 # \%maxlen = max command len on {host}
4393 # \%echo = echo received from {host}
4394 # \@down_hosts = list of hosts with no answer
4395 local $/ = "\n";
4396 my (%nsockets, %ncores, %nthreads, %time_to_login, %maxlen, %echo,
4397 @down_hosts);
4398 for (@_) {
4399 ::debug("init","Read: ",$_);
4400 chomp;
4401 my @col = split /\t/, $_;
4402 if($col[0] =~ /^parallel: Warning:/) {
4403 # Timed out job: Ignore it
4404 next;
4405 } elsif(defined $col[6]) {
4406 # This is a line from --joblog
4407 # seq host time spent sent received exit signal command
4408 # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores
4409 if($col[0] eq "Seq" and $col[1] eq "Host" and
4410 $col[2] eq "Starttime") {
4411 # Header => skip
4412 next;
4414 # Get server from: eval true server\;
4415 $col[8] =~ /eval .?true.?\s([^\;]+);/ or
4416 ::die_bug("col8 does not contain host: $col[8]");
4417 my $host = $1;
4418 $host =~ tr/\\//d;
4419 $Global::host{$host} or next;
4420 if($col[6] eq "255" or $col[6] eq "-1" or $col[6] eq "1") {
4421 # exit == 255 or exit == timeout (-1): ssh failed/timedout
4422 # exit == 1: lsh failed
4423 # Remove sshlogin
4424 ::debug("init", "--filtered $host\n");
4425 push(@down_hosts, $host);
4426 } elsif($col[6] eq "127") {
4427 # signal == 127: parallel not installed remote
4428 # Set nsockets, ncores, nthreads = 1
4429 ::warning("Could not figure out ".
4430 "number of cpus on $host. Using 1.");
4431 $nsockets{$host} = 1;
4432 $ncores{$host} = 1;
4433 $nthreads{$host} = 1;
4434 $maxlen{$host} = Limits::Command::max_length();
4435 } elsif($col[0] =~ /^\d+$/ and $Global::host{$host}) {
4436 # Remember how log it took to log in
4437 # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ echo
4438 $time_to_login{$host} = ::min($time_to_login{$host},$col[3]);
4439 } else {
4440 ::die_bug("host check unmatched long jobline: $_");
4442 } elsif($Global::host{$col[0]}) {
4443 # This output from --number-of-cores, --number-of-cpus,
4444 # --max-line-length-allowed
4445 # ncores: server 8
4446 # ncpus: server 2
4447 # maxlen: server 131071
4448 if(/parallel: Warning: Cannot figure out number of/) {
4449 next;
4451 if(not $nsockets{$col[0]}) {
4452 $nsockets{$col[0]} = $col[1];
4453 } elsif(not $ncores{$col[0]}) {
4454 $ncores{$col[0]} = $col[1];
4455 } elsif(not $nthreads{$col[0]}) {
4456 $nthreads{$col[0]} = $col[1];
4457 } elsif(not $maxlen{$col[0]}) {
4458 $maxlen{$col[0]} = $col[1];
4459 } elsif(not $echo{$col[0]}) {
4460 $echo{$col[0]} = $col[1];
4461 } elsif(m/perl: warning:|LANGUAGE =|LC_ALL =|LANG =|are supported and installed|Disconnected from|Received disconnect from/) {
4462 # Skip these:
4463 # perl: warning: Setting locale failed.
4464 # perl: warning: Please check that your locale settings:
4465 # LANGUAGE = (unset),
4466 # LC_ALL = (unset),
4467 # LANG = "en_US.UTF-8"
4468 # are supported and installed on your system.
4469 # perl: warning: Falling back to the standard locale ("C").
4470 # Disconnected from 127.0.0.1 port 22
4471 } else {
4472 ::die_bug("host check too many col0: $_");
4474 } else {
4475 ::die_bug("host check unmatched short jobline ($col[0]): $_");
4478 @down_hosts = uniq(@down_hosts);
4479 return(\%nsockets, \%ncores, \%nthreads, \%time_to_login,
4480 \%maxlen, \%echo, \@down_hosts);
4483 sub parallelized_host_filtering() {
4484 # Uses:
4485 # %Global::host
4486 # Returns:
4487 # text entries with:
4488 # * joblog line
4489 # * hostname \t number of cores
4490 # * hostname \t number of cpus
4491 # * hostname \t max-line-length-allowed
4492 # * hostname \t empty
4494 sub sshwrapped {
4495 # Wrap with ssh and --env
4496 # Return $default_value if command fails
4497 my $sshlogin = shift;
4498 my $command = shift;
4499 my $default_value = shift;
4500 # wrapper that returns $default_value if the command fails:
4501 # bug #57886: Errors when using different version on remote
4502 # perl -e '$a=`$command`; print $? ? "$default_value" : $a'
4503 my $wcmd = q(perl -e '$a=`).$command.q(`;).
4504 q(print $? ? ").::pQ($default_value).q(" : $a');
4505 my $commandline = CommandLine->new(1,[$wcmd],{},0,0,[],[],[],[],{},{});
4506 my $job = Job->new($commandline);
4507 $job->set_sshlogin($sshlogin);
4508 $job->wrapped();
4509 return($job->{'wrapped'});
4512 my(@sockets, @cores, @threads, @maxline, @echo);
4513 while (my ($host, $sshlogin) = each %Global::host) {
4514 if($host eq ":") { next }
4515 # The 'true' is used to get the $host out later
4516 push(@sockets, $host."\t"."true $host; ".
4517 sshwrapped($sshlogin,"parallel --number-of-sockets",0)."\n\0");
4518 push(@cores, $host."\t"."true $host; ".
4519 sshwrapped($sshlogin,"parallel --number-of-cores",0)."\n\0");
4520 push(@threads, $host."\t"."true $host; ".
4521 sshwrapped($sshlogin,"parallel --number-of-threads",0)."\n\0");
4522 push(@maxline, $host."\t"."true $host; ".
4523 sshwrapped($sshlogin,"parallel --max-line-length-allowed",0)."\n\0");
4524 # 'echo' is used to get the fastest possible ssh login time
4525 my $sshcmd = "true $host; exec " .$sshlogin->sshcommand()." ".
4526 $sshlogin->serverlogin();
4527 push(@echo, $host."\t".$sshcmd." -- echo\n\0");
4529 # --timeout 10: Setting up an SSH connection and running a simple
4530 # command should never take > 10 sec.
4531 # --delay 0.1: If multiple sshlogins use the same proxy the delay
4532 # will make it less likely to overload the ssh daemon.
4533 # --retries 3: If the ssh daemon is overloaded, try 3 times
4534 my $cmd =
4535 "$0 -j0 --timeout 10 --joblog - --plain --delay 0.1 --retries 3 ".
4536 "--tag --tagstring '{1}' -0 --colsep '\t' -k eval '{2}' && true ";
4537 $cmd = $Global::shell." -c ".Q($cmd);
4538 ::debug("init", $cmd, "\n");
4539 my @out;
4540 my $prepend = "";
4542 my ($host_fh,$in,$err);
4543 open3($in, $host_fh, $err, $cmd) || ::die_bug("parallel host check: $cmd");
4544 ::debug("init", map { $_,"\n" } @sockets, @cores, @threads, @maxline, @echo);
4546 if(not fork()) {
4547 # Give the commands to run to the $cmd
4548 close $host_fh;
4549 print $in @sockets, @cores, @threads, @maxline, @echo;
4550 close $in;
4551 exit();
4553 close $in;
4554 for(<$host_fh>) {
4555 # TODO incompatible with '-quoting. Needs to be fixed differently
4556 #if(/\'$/) {
4557 # # if last char = ' then append next line
4558 # # This may be due to quoting of \n in environment var
4559 # $prepend .= $_;
4560 # next;
4562 $_ = $prepend . $_;
4563 $prepend = "";
4564 push @out, $_;
4566 close $host_fh;
4567 return @out;
4570 sub onall($@) {
4571 # Runs @command on all hosts.
4572 # Uses parallel to run @command on each host.
4573 # --jobs = number of hosts to run on simultaneously.
4574 # For each host a parallel command with the args will be running.
4575 # Uses:
4576 # $Global::quoting
4577 # @opt::basefile
4578 # $opt::jobs
4579 # $opt::linebuffer
4580 # $opt::ungroup
4581 # $opt::group
4582 # $opt::keeporder
4583 # $opt::D
4584 # $opt::plain
4585 # $opt::max_chars
4586 # $opt::linebuffer
4587 # $opt::files
4588 # $opt::colsep
4589 # $opt::timeout
4590 # $opt::plain
4591 # $opt::retries
4592 # $opt::max_chars
4593 # $opt::arg_sep
4594 # $opt::arg_file_sep
4595 # @opt::v
4596 # @opt::env
4597 # %Global::host
4598 # $Global::exitstatus
4599 # $Global::debug
4600 # $Global::joblog
4601 # $opt::joblog
4602 # $opt::tag
4603 # $opt::tee
4604 # Input:
4605 # @command = command to run on all hosts
4606 # Returns: N/A
4607 sub tmp_joblog {
4608 # Input:
4609 # $joblog = filename of joblog - undef if none
4610 # Returns:
4611 # $tmpfile = temp file for joblog - undef if none
4612 my $joblog = shift;
4613 if(not defined $joblog) {
4614 return undef;
4616 my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".log");
4617 close $fh;
4618 return $tmpfile;
4620 my ($input_source_fh_ref,@command) = @_;
4621 if($Global::quoting) {
4622 @command = shell_quote(@command);
4625 # Copy all @input_source_fh (-a and :::) into tempfiles
4626 my @argfiles = ();
4627 for my $fh (@$input_source_fh_ref) {
4628 my ($outfh, $name) = ::tmpfile(SUFFIX => ".all", UNLINK => not $opt::D);
4629 print $outfh (<$fh>);
4630 close $outfh;
4631 push @argfiles, $name;
4633 if(@opt::basefile) { setup_basefile(); }
4634 # for each sshlogin do:
4635 # parallel -S $sshlogin $command :::: @argfiles
4637 # Pass some of the options to the sub-parallels, not all of them as
4638 # -P should only go to the first, and -S should not be copied at all.
4639 my $options =
4640 join(" ",
4641 ((defined $opt::sshdelay) ? "--delay ".$opt::sshdelay : ""),
4642 ((defined $opt::memfree) ? "--memfree ".$opt::memfree : ""),
4643 ((defined $opt::memsuspend) ? "--memfree ".$opt::memsuspend : ""),
4644 ((defined $opt::D) ? "-D $opt::D" : ""),
4645 ((defined $opt::group) ? "-g" : ""),
4646 ((defined $opt::jobs) ? "-P $opt::jobs" : ""),
4647 ((defined $opt::keeporder) ? "--keeporder" : ""),
4648 ((defined $opt::linebuffer) ? "--linebuffer" : ""),
4649 ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""),
4650 ((defined $opt::plain) ? "--plain" : ""),
4651 ((defined $opt::ungroup) ? "-u" : ""),
4652 ((defined $opt::tee) ? "--tee" : ""),
4654 my $suboptions =
4655 join(" ",
4656 ((defined $opt::sshdelay) ? "--delay ".$opt::sshdelay : ""),
4657 ((defined $opt::D) ? "-D $opt::D" : ""),
4658 ((defined $opt::arg_file_sep) ? "--arg-file-sep ".$opt::arg_file_sep : ""),
4659 ((defined $opt::arg_sep) ? "--arg-sep ".$opt::arg_sep : ""),
4660 ((defined $opt::colsep) ? "--colsep ".shell_quote($opt::colsep) : ""),
4661 ((defined $opt::files) ? "--files" : ""),
4662 ((defined $opt::group) ? "-g" : ""),
4663 ((defined $opt::cleanup) ? "--cleanup" : ""),
4664 ((defined $opt::keeporder) ? "--keeporder" : ""),
4665 ((defined $opt::linebuffer) ? "--linebuffer" : ""),
4666 ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""),
4667 ((defined $opt::plain) ? "--plain" : ""),
4668 ((defined $opt::plus) ? "--plus" : ""),
4669 ((defined $opt::retries) ? "--retries ".$opt::retries : ""),
4670 ((defined $opt::timeout) ? "--timeout ".$opt::timeout : ""),
4671 ((defined $opt::ungroup) ? "-u" : ""),
4672 ((defined $opt::ssh) ? "--ssh '".$opt::ssh."'" : ""),
4673 ((defined $opt::tee) ? "--tee" : ""),
4674 ((defined $opt::workdir) ? "--wd ".Q($opt::workdir) : ""),
4675 (@Global::transfer_files ? map { "--tf ".Q($_) }
4676 @Global::transfer_files : ""),
4677 (@Global::ret_files ? map { "--return ".Q($_) }
4678 @Global::ret_files : ""),
4679 (@opt::env ? map { "--env ".Q($_) } @opt::env : ""),
4680 (map { "-v" } @opt::v),
4682 ::debug("init", "| $0 $options\n");
4683 open(my $parallel_fh, "|-", "$0 -0 --will-cite -j0 $options") ||
4684 ::die_bug("This does not run GNU Parallel: $0 $options");
4685 my @joblogs;
4686 for my $host (sort keys %Global::host) {
4687 my $sshlogin = $Global::host{$host};
4688 my $joblog = tmp_joblog($opt::joblog);
4689 if($joblog) {
4690 push @joblogs, $joblog;
4691 $joblog = "--joblog $joblog";
4693 my $quad = $opt::arg_file_sep || "::::";
4694 # If PARALLEL_ENV is set: Pass it on
4695 my $penv=$Global::parallel_env ?
4696 "PARALLEL_ENV=".Q($Global::parallel_env) :
4698 ::debug("init", "$penv $0 $suboptions -j1 $joblog ",
4699 ((defined $opt::tag) ?
4700 "--tagstring ".Q($sshlogin->string()) : ""),
4701 " -S ", Q($sshlogin->string())," ",
4702 join(" ",shell_quote(@command))," $quad @argfiles\n");
4703 print $parallel_fh "$penv $0 $suboptions -j1 $joblog ",
4704 ((defined $opt::tag) ?
4705 "--tagstring ".Q($sshlogin->string()) : ""),
4706 " -S ", Q($sshlogin->string())," ",
4707 join(" ",shell_quote(@command))," $quad @argfiles\0";
4709 close $parallel_fh;
4710 $Global::exitstatus = $? >> 8;
4711 debug("init", "--onall exitvalue ", $?);
4712 if(@opt::basefile and $opt::cleanup) { cleanup_basefile(); }
4713 $Global::debug or unlink(@argfiles);
4714 my %seen;
4715 for my $joblog (@joblogs) {
4716 # Append to $joblog
4717 open(my $fh, "<", $joblog) || ::die_bug("Cannot open tmp joblog $joblog");
4718 # Skip first line (header);
4719 <$fh>;
4720 print $Global::joblog (<$fh>);
4721 close $fh;
4722 unlink($joblog);
4727 sub __SIGNAL_HANDLING__() {}
4730 sub sigtstp() {
4731 # Send TSTP signal (Ctrl-Z) to all children process groups
4732 # Uses:
4733 # %SIG
4734 # Returns: N/A
4735 signal_children("TSTP");
4738 sub sigpipe() {
4739 # Send SIGPIPE signal to all children process groups
4740 # Uses:
4741 # %SIG
4742 # Returns: N/A
4743 signal_children("PIPE");
4746 sub signal_children() {
4747 # Send signal to all children process groups
4748 # and GNU Parallel itself
4749 # Uses:
4750 # %SIG
4751 # Returns: N/A
4752 my $signal = shift;
4753 debug("run", "Sending $signal ");
4754 kill $signal, map { -$_ } keys %Global::running;
4755 # Use default signal handler for GNU Parallel itself
4756 $SIG{$signal} = undef;
4757 kill $signal, $$;
4760 sub save_original_signal_handler() {
4761 # Remember the original signal handler
4762 # Uses:
4763 # %Global::original_sig
4764 # Returns: N/A
4765 $SIG{INT} = sub {
4766 if($opt::tmux) { ::qqx("tmux kill-session -t p$$"); }
4767 wait_and_exit(255);
4769 $SIG{TERM} = sub {
4770 if($opt::tmux) { ::qqx("tmux kill-session -t p$$"); }
4771 wait_and_exit(255);
4773 %Global::original_sig = %SIG;
4774 $SIG{TERM} = sub {}; # Dummy until jobs really start
4775 $SIG{ALRM} = 'IGNORE';
4776 # Allow Ctrl-Z to suspend and `fg` to continue
4777 $SIG{TSTP} = \&sigtstp;
4778 $SIG{PIPE} = \&sigpipe;
4779 $SIG{CONT} = sub {
4780 # Set $SIG{TSTP} again (it is undef'ed in sigtstp() )
4781 $SIG{TSTP} = \&sigtstp;
4782 for my $job (values %Global::running) {
4783 if($job->suspended()) {
4784 # Force jobs to suspend, if they are marked as suspended.
4785 # --memsupspend can suspend a job that will be resumed
4786 # if the user presses CTRL-Z followed by `fg`.
4787 $job->suspend();
4788 } else {
4789 # Resume the rest of the jobs
4790 $job->resume();
4796 sub list_running_jobs() {
4797 # Print running jobs on tty
4798 # Uses:
4799 # %Global::running
4800 # Returns: N/A
4801 for my $job (values %Global::running) {
4802 ::status("$Global::progname: ".$job->replaced());
4806 sub start_no_new_jobs() {
4807 # Start no more jobs
4808 # Uses:
4809 # %Global::original_sig
4810 # %Global::unlink
4811 # $Global::start_no_new_jobs
4812 # Returns: N/A
4813 unlink keys %Global::unlink;
4814 ::status
4815 ("$Global::progname: SIGHUP received. No new jobs will be started.",
4816 "$Global::progname: Waiting for these ".(keys %Global::running).
4817 " jobs to finish. Send SIGTERM to stop now.");
4818 list_running_jobs();
4819 $Global::start_no_new_jobs ||= 1;
4822 sub reapers() {
4823 # Run reaper until there are no more left
4824 # Returns:
4825 # @pids_reaped = pids of reaped processes
4826 my @pids_reaped;
4827 my $pid;
4828 while($pid = reaper()) {
4829 push @pids_reaped, $pid;
4831 return @pids_reaped;
4834 sub reaper() {
4835 # A job finished:
4836 # * Set exitstatus, exitsignal, endtime.
4837 # * Free ressources for new job
4838 # * Update median runtime
4839 # * Print output
4840 # * If --halt = now: Kill children
4841 # * Print progress
4842 # Uses:
4843 # %Global::running
4844 # $opt::timeout
4845 # $Global::timeoutq
4846 # $opt::keeporder
4847 # $Global::total_running
4848 # Returns:
4849 # $stiff = PID of child finished
4850 my $stiff;
4851 debug("run", "Reaper ");
4852 if(($stiff = waitpid(-1, &WNOHANG)) <= 0) {
4853 # No jobs waiting to be reaped
4854 return 0;
4857 # $stiff = pid of dead process
4858 my $job = $Global::running{$stiff};
4860 # '-a <(seq 10)' will give us a pid not in %Global::running
4861 # The same will one of the ssh -M: ignore
4862 $job or return 0;
4863 delete $Global::running{$stiff};
4864 $Global::total_running--;
4865 if($job->{'commandline'}{'skip'}) {
4866 # $job->skip() was called
4867 $job->set_exitstatus(-2);
4868 $job->set_exitsignal(0);
4869 } else {
4870 $job->set_exitstatus($? >> 8);
4871 $job->set_exitsignal($? & 127);
4874 debug("run", "\nseq ",$job->seq()," died (", $job->exitstatus(), ")");
4875 if($Global::delayauto or $Global::sshdelayauto) {
4876 if($job->exitstatus()) {
4877 # Job failed: Increase delay (if $opt::(ssh)delay set)
4878 $opt::delay &&= $opt::delay * 2;
4879 $opt::sshdelay &&= $opt::sshdelay * 2;
4880 } else {
4881 # Job succeeded: Decrease delay (if $opt::(ssh)delay set)
4882 $opt::delay &&= $opt::delay * 0.9;
4883 $opt::sshdelay &&= $opt::sshdelay * 0.9;
4885 debug("run", "delay:$opt::delay ssh:$opt::sshdelay ");
4887 $job->set_endtime(::now());
4888 my $sshlogin = $job->sshlogin();
4889 $sshlogin->dec_jobs_running();
4890 if($job->should_be_retried()) {
4891 # Free up file handles
4892 $job->free_ressources();
4893 } else {
4894 # The job is done
4895 $sshlogin->inc_jobs_completed();
4896 # Free the jobslot
4897 $job->free_slot();
4898 if($opt::timeout and not $job->exitstatus()) {
4899 # Update average runtime for timeout only for successful jobs
4900 $Global::timeoutq->update_median_runtime($job->runtime());
4902 if($opt::keeporder) {
4903 $job->print_earlier_jobs();
4904 } else {
4905 $job->print();
4907 if($job->should_we_halt() eq "now") {
4908 # Kill children
4909 ::kill_sleep_seq($job->pid());
4910 ::killall();
4911 ::wait_and_exit($Global::halt_exitstatus);
4914 $job->cleanup();
4916 if($opt::progress) {
4917 my %progress = progress();
4918 ::status_no_nl("\r",$progress{'status'});
4921 debug("run", "jobdone \n");
4922 return $stiff;
4926 sub __USAGE__() {}
4929 sub killall() {
4930 # Kill all jobs by killing their process groups
4931 # Uses:
4932 # $Global::start_no_new_jobs = we are stopping
4933 # $Global::killall = Flag to not run reaper
4934 $Global::start_no_new_jobs ||= 1;
4935 # Do not reap killed children: Ignore them instead
4936 $Global::killall ||= 1;
4937 kill_sleep_seq(keys %Global::running);
4940 sub kill_sleep_seq(@) {
4941 # Send jobs TERM,TERM,KILL to processgroups
4942 # Input:
4943 # @pids = list of pids that are also processgroups
4944 # Convert pids to process groups ($processgroup = -$pid)
4945 my @pgrps = map { -$_ } @_;
4946 my @term_seq = split/,/,$opt::termseq;
4947 if(not @term_seq) {
4948 @term_seq = ("TERM",200,"TERM",100,"TERM",50,"KILL",25);
4950 while(@term_seq) {
4951 @pgrps = kill_sleep(shift @term_seq, shift @term_seq, @pgrps);
4955 sub kill_sleep() {
4956 # Kill pids with a signal and wait a while for them to die
4957 # Input:
4958 # $signal = signal to send to @pids
4959 # $sleep_max = number of ms to sleep at most before returning
4960 # @pids = pids to kill (actually process groups)
4961 # Uses:
4962 # $Global::killall = set by killall() to avoid calling reaper
4963 # Returns:
4964 # @pids = pids still alive
4965 my ($signal, $sleep_max, @pids) = @_;
4966 ::debug("kill","kill_sleep $signal ",(join " ",sort @pids),"\n");
4967 kill $signal, @pids;
4968 my $sleepsum = 0;
4969 my $sleep = 0.001;
4971 while(@pids and $sleepsum < $sleep_max) {
4972 if($Global::killall) {
4973 # Killall => don't run reaper
4974 while(waitpid(-1, &WNOHANG) > 0) {
4975 $sleep = $sleep/2+0.001;
4977 } elsif(reapers()) {
4978 $sleep = $sleep/2+0.001;
4980 $sleep *= 1.1;
4981 ::usleep($sleep);
4982 $sleepsum += $sleep;
4983 # Keep only living children
4984 @pids = grep { kill(0, $_) } @pids;
4986 return @pids;
4989 sub wait_and_exit($) {
4990 # If we do not wait, we sometimes get segfault
4991 # Returns: N/A
4992 my $error = shift;
4993 unlink keys %Global::unlink;
4994 if($error) {
4995 # Kill all jobs without printing
4996 killall();
4998 for (keys %Global::unkilled_children) {
4999 # Kill any (non-jobs) children (e.g. reserved processes)
5000 kill 9, $_;
5001 waitpid($_,0);
5002 delete $Global::unkilled_children{$_};
5004 if($Global::unkilled_sqlworker) {
5005 waitpid($Global::unkilled_sqlworker,0);
5007 # Avoid: Warning: unable to close filehandle properly: No space
5008 # left on device during global destruction.
5009 $SIG{__WARN__} = sub {};
5010 if($opt::parset) {
5011 # Make the shell script return $error
5012 print "$Global::parset_endstring\nreturn $error";
5014 exit($error);
5017 sub die_usage() {
5018 # Returns: N/A
5019 usage();
5020 wait_and_exit(255);
5023 sub usage() {
5024 # Returns: N/A
5025 print join
5026 ("\n",
5027 "Usage:",
5029 "$Global::progname [options] [command [arguments]] < list_of_arguments",
5030 "$Global::progname [options] [command [arguments]] (::: arguments|:::: argfile(s))...",
5031 "cat ... | $Global::progname --pipe [options] [command [arguments]]",
5033 "-j n Run n jobs in parallel",
5034 "-k Keep same order",
5035 "-X Multiple arguments with context replace",
5036 "--colsep regexp Split input on regexp for positional replacements",
5037 "{} {.} {/} {/.} {#} {%} {= perl code =} Replacement strings",
5038 "{3} {3.} {3/} {3/.} {=3 perl code =} Positional replacement strings",
5039 "With --plus: {} = {+/}/{/} = {.}.{+.} = {+/}/{/.}.{+.} = {..}.{+..} =",
5040 " {+/}/{/..}.{+..} = {...}.{+...} = {+/}/{/...}.{+...}",
5042 "-S sshlogin Example: foo\@server.example.com",
5043 "--slf .. Use ~/.parallel/sshloginfile as the list of sshlogins",
5044 "--trc {}.bar Shorthand for --transfer --return {}.bar --cleanup",
5045 "--onall Run the given command with argument on all sshlogins",
5046 "--nonall Run the given command with no arguments on all sshlogins",
5048 "--pipe Split stdin (standard input) to multiple jobs.",
5049 "--recend str Record end separator for --pipe.",
5050 "--recstart str Record start separator for --pipe.",
5052 "GNU Parallel can do much more. See 'man $Global::progname' for details",
5054 "Academic tradition requires you to cite works you base your article on.",
5055 "If you use programs that use GNU Parallel to process data for an article in a",
5056 "scientific publication, please cite:",
5058 " Tange, O. (2021, September 22). GNU Parallel 20210922 ('Vindelev').",
5059 " Zenodo. https://doi.org/10.5281/zenodo.5523272",
5061 # Before changing these lines, please read
5062 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
5063 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
5064 # You accept to be put in a public hall of shame by removing
5065 # the lines.
5066 "This helps funding further development; AND IT WON'T COST YOU A CENT.",
5067 "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
5069 "",);
5072 sub citation_notice() {
5073 # if --will-cite or --plain: do nothing
5074 # if stderr redirected: do nothing
5075 # if $PARALLEL_HOME/will-cite: do nothing
5076 # else: print citation notice to stderr
5077 if($opt::willcite
5079 $opt::plain
5081 not -t $Global::original_stderr
5083 grep { -e "$_/will-cite" } @Global::config_dirs) {
5084 # skip
5085 } else {
5086 ::status
5087 ("Academic tradition requires you to cite works you base your article on.",
5088 "If you use programs that use GNU Parallel to process data for an article in a",
5089 "scientific publication, please cite:",
5091 " Tange, O. (2021, September 22). GNU Parallel 20210922 ('Vindelev').",
5092 " Zenodo. https://doi.org/10.5281/zenodo.5523272",
5094 # Before changing these line, please read
5095 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice and
5096 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
5097 # You accept to be put in a public hall of shame by
5098 # removing the lines.
5099 "This helps funding further development; AND IT WON'T COST YOU A CENT.",
5100 "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
5102 "More about funding GNU Parallel and the citation notice:",
5103 "https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice",
5105 "To silence this citation notice: run 'parallel --citation' once.",
5108 mkdir $Global::config_dir;
5109 # Number of times the user has run GNU Parallel without showing
5110 # willingness to cite
5111 my $runs = 0;
5112 if(open (my $fh, "<", $Global::config_dir.
5113 "/runs-without-willing-to-cite")) {
5114 $runs = <$fh>;
5115 close $fh;
5117 $runs++;
5118 if(open (my $fh, ">", $Global::config_dir.
5119 "/runs-without-willing-to-cite")) {
5120 print $fh $runs;
5121 close $fh;
5122 if($runs >= 10) {
5123 ::status("Come on: You have run parallel $runs times. Isn't it about time ",
5124 "you run 'parallel --citation' once to silence the citation notice?",
5125 "");
5131 sub status(@) {
5132 my @w = @_;
5133 my $fh = $Global::status_fd || *STDERR;
5134 print $fh map { ($_, "\n") } @w;
5135 flush $fh;
5138 sub status_no_nl(@) {
5139 my @w = @_;
5140 my $fh = $Global::status_fd || *STDERR;
5141 print $fh @w;
5142 flush $fh;
5145 sub warning(@) {
5146 my @w = @_;
5147 my $prog = $Global::progname || "parallel";
5148 status_no_nl(map { ($prog, ": Warning: ", $_, "\n"); } @w);
5152 my %warnings;
5153 sub warning_once(@) {
5154 my @w = @_;
5155 my $prog = $Global::progname || "parallel";
5156 $warnings{@w}++ or
5157 status_no_nl(map { ($prog, ": Warning: ", $_, "\n"); } @w);
5161 sub error(@) {
5162 my @w = @_;
5163 my $prog = $Global::progname || "parallel";
5164 status(map { ($prog.": Error: ". $_); } @w);
5167 sub die_bug($) {
5168 my $bugid = shift;
5169 print STDERR
5170 ("$Global::progname: This should not happen. You have found a bug. ",
5171 "Please follow\n",
5172 "https://www.gnu.org/software/parallel/man.html#REPORTING-BUGS\n",
5173 "\n",
5174 "Include this in the report:\n",
5175 "* The version number: $Global::version\n",
5176 "* The bugid: $bugid\n",
5177 "* The command line being run\n",
5178 "* The files being read (put the files on a webserver if they are big)\n",
5179 "\n",
5180 "If you get the error on smaller/fewer files, please include those instead.\n");
5181 ::wait_and_exit(255);
5184 sub version() {
5185 # Returns: N/A
5186 print join
5187 ("\n",
5188 "GNU $Global::progname $Global::version",
5189 "Copyright (C) 2007-2021 Ole Tange, http://ole.tange.dk and Free Software",
5190 "Foundation, Inc.",
5191 "License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>",
5192 "This is free software: you are free to change and redistribute it.",
5193 "GNU $Global::progname comes with no warranty.",
5195 "Web site: https://www.gnu.org/software/${Global::progname}\n",
5196 "When using programs that use GNU Parallel to process data for publication",
5197 "please cite as described in 'parallel --citation'.\n",
5201 sub citation() {
5202 # Returns: N/A
5203 my ($all_argv_ref,$argv_options_removed_ref) = @_;
5204 my $all_argv = "@$all_argv_ref";
5205 my $no_opts = "@$argv_options_removed_ref";
5206 $all_argv=~s/--citation//;
5207 if($all_argv ne $no_opts) {
5208 ::warning("--citation ignores all other options and arguments.");
5209 ::status("");
5212 ::status(
5213 "Academic tradition requires you to cite works you base your article on.",
5214 "If you use programs that use GNU Parallel to process data for an article in a",
5215 "scientific publication, please cite:",
5217 "\@software{tange_2021_5523272,",
5218 " author = {Tange, Ole},",
5219 " title = {GNU Parallel 20210922 ('Vindelev')},",
5220 " month = Sep,",
5221 " year = 2021,",
5222 " note = {{GNU Parallel is a general parallelizer to run",
5223 " multiple serial command line programs in parallel",
5224 " without changing them.}},",
5225 " publisher = {Zenodo},",
5226 " doi = {10.5281/zenodo.5523272},",
5227 " url = {https://doi.org/10.5281/zenodo.5523272}",
5228 "}",
5230 "(Feel free to use \\nocite{tange_2021_5523272})",
5232 # Before changing these lines, please read
5233 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice and
5234 # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt
5235 # You accept to be put in a public hall of shame by removing
5236 # the lines.
5237 "This helps funding further development; AND IT WON'T COST YOU A CENT.",
5238 "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
5240 "More about funding GNU Parallel and the citation notice:",
5241 "https://lists.gnu.org/archive/html/parallel/2013-11/msg00006.html",
5242 "https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice",
5243 "https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt",
5245 "If you send a copy of your published article to tange\@gnu.org, it will be",
5246 "mentioned in the release notes of next version of GNU Parallel.",
5249 while(not grep { -e "$_/will-cite" } @Global::config_dirs) {
5250 print "\nType: 'will cite' and press enter.\n> ";
5251 my $input = <STDIN>;
5252 if(not defined $input) {
5253 exit(255);
5255 if($input =~ /will cite/i) {
5256 mkdir $Global::config_dir;
5257 if(open (my $fh, ">", $Global::config_dir."/will-cite")) {
5258 close $fh;
5259 ::status(
5261 "Thank you for your support: You are the reason why there is funding to",
5262 "continue maintaining GNU Parallel. On behalf of future versions of",
5263 "GNU Parallel, which would not exist without your support:",
5265 " THANK YOU SO MUCH",
5267 "It is really appreciated. The citation notice is now silenced.",
5268 "");
5269 } else {
5270 ::status(
5272 "Thank you for your support. It is much appreciated. The citation",
5273 "cannot permanently be silenced. Use '--will-cite' instead.",
5275 "If you use '--will-cite' in scripts to be run by others you are making",
5276 "it harder for others to see the citation notice. The development of",
5277 "GNU Parallel is indirectly financed through citations, so if users",
5278 "do not know they should cite then you are making it harder to finance",
5279 "development. However, if you pay 10000 EUR, you should feel free to",
5280 "use '--will-cite' in scripts.",
5281 "");
5282 last;
5288 sub show_limits() {
5289 # Returns: N/A
5290 print("Maximal size of command: ",Limits::Command::real_max_length(),"\n",
5291 "Maximal used size of command: ",Limits::Command::max_length(),"\n",
5292 "\n",
5293 "Execution of will continue now, and it will try to read its input\n",
5294 "and run commands; if this is not what you wanted to happen, please\n",
5295 "press CTRL-D or CTRL-C\n");
5298 sub embed() {
5299 # Give an embeddable version of GNU Parallel
5300 # Tested with: bash, zsh, ksh, ash, dash, sh
5301 my $randomstring = "cut-here-".join"",
5302 map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..20);
5303 if(not -f $0 or not -r $0) {
5304 ::error("--embed only works if parallel is a readable file");
5305 exit(255);
5307 if(open(my $fh, "<", $0)) {
5308 # Read the source from $0
5309 my @source = <$fh>;
5310 my $user = $ENV{LOGNAME} || $ENV{USERNAME} || $ENV{USER};
5311 my @env_parallel_source = ();
5312 my $shell = $Global::shell;
5313 $shell =~ s:.*/::;
5314 for(which("env_parallel.$shell")) {
5315 -r $_ or next;
5316 # Read the source of env_parallel.shellname
5317 open(my $env_parallel_source_fh, $_) || die;
5318 @env_parallel_source = <$env_parallel_source_fh>;
5319 close $env_parallel_source_fh;
5320 last;
5322 print "#!$Global::shell
5324 # Copyright (C) 2007-2021 $user, Ole Tange, http://ole.tange.dk
5325 # and Free Software Foundation, Inc.
5327 # This program is free software; you can redistribute it and/or modify
5328 # it under the terms of the GNU General Public License as published by
5329 # the Free Software Foundation; either version 3 of the License, or
5330 # (at your option) any later version.
5332 # This program is distributed in the hope that it will be useful, but
5333 # WITHOUT ANY WARRANTY; without even the implied warranty of
5334 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5335 # General Public License for more details.
5337 # You should have received a copy of the GNU General Public License
5338 # along with this program; if not, see <https://www.gnu.org/licenses/>
5339 # or write to the Free Software Foundation, Inc., 51 Franklin St,
5340 # Fifth Floor, Boston, MA 02110-1301 USA
5343 print q!
5344 # Embedded GNU Parallel created with --embed
5345 parallel() {
5346 # Start GNU Parallel without leaving temporary files
5348 # Not all shells support 'perl <(cat ...)'
5349 # This is a complex way of doing:
5350 # perl <(cat <<'cut-here'
5351 # [...]
5352 # ) "$@"
5353 # and also avoiding:
5354 # [1]+ Done cat
5356 # Make a temporary fifo that perl can read from
5357 _fifo_with_GNU_Parallel_source=`perl -e 'use POSIX qw(mkfifo);
5358 do {
5359 $f = "/tmp/parallel-".join"",
5360 map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
5361 } while(-e $f);
5362 mkfifo($f,0600);
5363 print $f;'`
5364 # Put source code into temporary file
5365 # so it is easy to copy to the fifo
5366 _file_with_GNU_Parallel_source=`mktemp`;
5368 "cat <<'$randomstring' > \$_file_with_GNU_Parallel_source\n",
5369 @source,
5370 $randomstring,"\n",
5372 # Copy the source code from the file to the fifo
5373 # and remove the file and fifo ASAP
5374 # 'sh -c' is needed to avoid
5375 # [1]+ Done cat
5376 sh -c "(rm $_file_with_GNU_Parallel_source; cat >$_fifo_with_GNU_Parallel_source; rm $_fifo_with_GNU_Parallel_source) < $_file_with_GNU_Parallel_source &"
5378 # Read the source from the fifo
5379 perl $_fifo_with_GNU_Parallel_source "$@"
5382 @env_parallel_source,
5385 # This will call the functions above
5386 parallel -k echo ::: Put your code here
5387 env_parallel --session
5388 env_parallel -k echo ::: Put your code here
5389 parset p,y,c,h -k echo ::: Put your code here
5390 echo $p $y $c $h
5391 echo You can also activate GNU Parallel for interactive use by:
5392 echo . "$0"
5394 } else {
5395 ::error("Cannot open $0");
5396 exit(255);
5398 ::status("Redirect the output to a file and add your changes at the end:",
5399 " $0 --embed > new_script");
5403 sub __GENERIC_COMMON_FUNCTION__() {}
5406 sub mkdir_or_die($) {
5407 # If dir is not executable: die
5408 my $dir = shift;
5409 # The eval is needed to catch exception from mkdir
5410 eval { File::Path::mkpath($dir); };
5411 if(not -x $dir) {
5412 ::error("Cannot change into non-executable dir $dir: $!");
5413 ::wait_and_exit(255);
5417 sub tmpfile(@) {
5418 # Create tempfile as $TMPDIR/parXXXXX
5419 # Returns:
5420 # $filehandle = opened file handle
5421 # $filename = file name created
5422 my($filehandle,$filename) =
5423 ::tempfile(DIR=>$ENV{'TMPDIR'}, TEMPLATE => 'parXXXXX', @_);
5424 if(wantarray) {
5425 return($filehandle,$filename);
5426 } else {
5427 # Separate unlink due to NFS dealing badly with File::Temp
5428 unlink $filename;
5429 return $filehandle;
5433 sub tmpname($) {
5434 # Select a name that does not exist
5435 # Do not create the file as it may be used for creating a socket (by tmux)
5436 # Remember the name in $Global::unlink to avoid hitting the same name twice
5437 my $name = shift;
5438 my($tmpname);
5439 if(not -w $ENV{'TMPDIR'}) {
5440 if(not -e $ENV{'TMPDIR'}) {
5441 ::error("Tmpdir '$ENV{'TMPDIR'}' does not exist.","Try 'mkdir $ENV{'TMPDIR'}'");
5442 } else {
5443 ::error("Tmpdir '$ENV{'TMPDIR'}' is not writable.","Try 'chmod +w $ENV{'TMPDIR'}'");
5445 ::wait_and_exit(255);
5447 do {
5448 $tmpname = $ENV{'TMPDIR'}."/".$name.
5449 join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
5450 } while(-e $tmpname or $Global::unlink{$tmpname}++);
5451 return $tmpname;
5454 sub tmpfifo() {
5455 # Find an unused name and mkfifo on it
5456 my $tmpfifo = tmpname("fif");
5457 mkfifo($tmpfifo,0600);
5458 return $tmpfifo;
5461 sub rm(@) {
5462 # Remove file and remove it from %Global::unlink
5463 # Uses:
5464 # %Global::unlink
5465 delete @Global::unlink{@_};
5466 unlink @_;
5469 sub size_of_block_dev() {
5470 # Like -s but for block devices
5471 # Input:
5472 # $blockdev = file name of block device
5473 # Returns:
5474 # $size = in bytes, undef if error
5475 my $blockdev = shift;
5476 if(open(my $fh, "<", $blockdev)) {
5477 seek($fh,0,2) || ::die_bug("cannot seek $blockdev");
5478 my $size = tell($fh);
5479 close $fh;
5480 return $size;
5481 } else {
5482 ::error("cannot open $blockdev");
5483 wait_and_exit(255);
5487 sub qqx(@) {
5488 # Like qx but with clean environment (except for @keep)
5489 # and STDERR ignored
5490 # This is needed if the environment contains functions
5491 # that /bin/sh does not understand
5492 my %env;
5493 # ssh with ssh-agent needs PATH SSH_AUTH_SOCK SSH_AGENT_PID
5494 # ssh with Kerberos needs KRB5CCNAME
5495 # tmux needs LC_CTYPE
5496 # lsh needs HOME LOGNAME
5497 my @keep = qw(PATH SSH_AUTH_SOCK SSH_AGENT_PID KRB5CCNAME LC_CTYPE HOME LOGNAME);
5498 @env{@keep} = @ENV{@keep};
5499 local %ENV;
5500 %ENV = %env;
5501 if($Global::debug) {
5502 # && true is to force spawning a shell and not just exec'ing
5503 return qx{ @_ && true };
5504 } else {
5505 # CygWin does not respect 2>/dev/null
5506 # so we do that by hand
5507 # This trick does not work:
5508 # https://stackoverflow.com/q/13833088/363028
5509 # local *STDERR;
5510 # open(STDERR, ">", "/dev/null");
5511 open(local *CHILD_STDIN, '<', '/dev/null') or die $!;
5512 open(local *CHILD_STDERR, '>', '/dev/null') or die $!;
5513 my $out;
5514 # eval is needed if open3 fails (e.g. command line too long)
5515 eval {
5516 my $pid = open3(
5517 '<&CHILD_STDIN',
5518 $out,
5519 '>&CHILD_STDERR',
5520 # && true is to force spawning a shell and not just exec'ing
5521 "@_ && true");
5522 my @arr = <$out>;
5523 close $out;
5524 # Make sure $? is set
5525 waitpid($pid, 0);
5526 return wantarray ? @arr : join "",@arr;
5527 } or do {
5528 # If eval fails, force $?=false
5529 `false`;
5534 sub uniq(@) {
5535 # Remove duplicates and return unique values
5536 return keys %{{ map { $_ => 1 } @_ }};
5539 sub min(@) {
5540 # Returns:
5541 # Minimum value of array
5542 my $min;
5543 for (@_) {
5544 # Skip undefs
5545 defined $_ or next;
5546 defined $min or do { $min = $_; next; }; # Set $_ to the first non-undef
5547 $min = ($min < $_) ? $min : $_;
5549 return $min;
5552 sub max(@) {
5553 # Returns:
5554 # Maximum value of array
5555 my $max;
5556 for (@_) {
5557 # Skip undefs
5558 defined $_ or next;
5559 defined $max or do { $max = $_; next; }; # Set $_ to the first non-undef
5560 $max = ($max > $_) ? $max : $_;
5562 return $max;
5565 sub sum(@) {
5566 # Returns:
5567 # Sum of values of array
5568 my @args = @_;
5569 my $sum = 0;
5570 for (@args) {
5571 # Skip undefs
5572 $_ and do { $sum += $_; }
5574 return $sum;
5577 sub undef_as_zero($) {
5578 my $a = shift;
5579 return $a ? $a : 0;
5582 sub undef_as_empty($) {
5583 my $a = shift;
5584 return $a ? $a : "";
5587 sub undef_if_empty($) {
5588 if(defined($_[0]) and $_[0] eq "") {
5589 return undef;
5591 return $_[0];
5594 sub multiply_binary_prefix(@) {
5595 # Evalualte numbers with binary prefix
5596 # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80
5597 # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80
5598 # K =2^10, M =2^20, G =2^30, T =2^40, P =2^50, E =2^70, Z =2^80, Y =2^80
5599 # k =10^3, m =10^6, g =10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24
5600 # 13G = 13*1024*1024*1024 = 13958643712
5601 # Input:
5602 # $s = string with prefixes
5603 # Returns:
5604 # $value = int with prefixes multiplied
5605 my @v = @_;
5606 for(@v) {
5607 defined $_ or next;
5608 s/ki/*1024/gi;
5609 s/mi/*1024*1024/gi;
5610 s/gi/*1024*1024*1024/gi;
5611 s/ti/*1024*1024*1024*1024/gi;
5612 s/pi/*1024*1024*1024*1024*1024/gi;
5613 s/ei/*1024*1024*1024*1024*1024*1024/gi;
5614 s/zi/*1024*1024*1024*1024*1024*1024*1024/gi;
5615 s/yi/*1024*1024*1024*1024*1024*1024*1024*1024/gi;
5616 s/xi/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi;
5618 s/K/*1024/g;
5619 s/M/*1024*1024/g;
5620 s/G/*1024*1024*1024/g;
5621 s/T/*1024*1024*1024*1024/g;
5622 s/P/*1024*1024*1024*1024*1024/g;
5623 s/E/*1024*1024*1024*1024*1024*1024/g;
5624 s/Z/*1024*1024*1024*1024*1024*1024*1024/g;
5625 s/Y/*1024*1024*1024*1024*1024*1024*1024*1024/g;
5626 s/X/*1024*1024*1024*1024*1024*1024*1024*1024*1024/g;
5628 s/k/*1000/g;
5629 s/m/*1000*1000/g;
5630 s/g/*1000*1000*1000/g;
5631 s/t/*1000*1000*1000*1000/g;
5632 s/p/*1000*1000*1000*1000*1000/g;
5633 s/e/*1000*1000*1000*1000*1000*1000/g;
5634 s/z/*1000*1000*1000*1000*1000*1000*1000/g;
5635 s/y/*1000*1000*1000*1000*1000*1000*1000*1000/g;
5636 s/x/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g;
5638 $_ = eval $_;
5640 return wantarray ? @v : $v[0];
5643 sub multiply_time_units($) {
5644 # Evalualte numbers with time units
5645 # s=1, m=60, h=3600, d=86400
5646 # Input:
5647 # $s = string time units
5648 # Returns:
5649 # $value = int in seconds
5650 my @v = @_;
5651 for(@v) {
5652 defined $_ or next;
5653 if(/[dhms]/i) {
5654 s/s/*1+/gi;
5655 s/m/*60+/gi;
5656 s/h/*3600+/gi;
5657 s/d/*86400+/gi;
5658 $_ = eval $_."0";
5661 return wantarray ? @v : $v[0];
5664 sub seconds_to_time_units() {
5665 # Convert seconds into ??d??h??m??s
5666 # s=1, m=60, h=3600, d=86400
5667 # Input:
5668 # $s = int in seconds
5669 # Returns:
5670 # $str = string time units
5671 my $s = shift;
5672 my $str;
5673 my $d = int($s/86400);
5674 $s -= $d * 86400;
5675 my $h = int($s/3600);
5676 $s -= $h * 3600;
5677 my $m = int($s/60);
5678 $s -= $m * 60;
5679 if($d) {
5680 $str = sprintf("%dd%02dh%02dm%02ds",$d,$h,$m,$s);
5681 } elsif($h) {
5682 $str = sprintf("%dh%02dm%02ds",$h,$m,$s);
5683 } elsif($m) {
5684 $str = sprintf("%dm%02ds",$m,$s);
5685 } else {
5686 $str = sprintf("%ds",$s);
5688 return $str;
5692 my ($disk_full_fh, $b8193, $error_printed);
5693 sub exit_if_disk_full() {
5694 # Checks if $TMPDIR is full by writing 8kb to a tmpfile
5695 # If the disk is full: Exit immediately.
5696 # Returns:
5697 # N/A
5698 if(not $disk_full_fh) {
5699 $disk_full_fh = ::tmpfile(SUFFIX => ".df");
5700 $b8193 = "b"x8193;
5702 # Linux does not discover if a disk is full if writing <= 8192
5703 # Tested on:
5704 # bfs btrfs cramfs ext2 ext3 ext4 ext4dev jffs2 jfs minix msdos
5705 # ntfs reiserfs tmpfs ubifs vfat xfs
5706 # TODO this should be tested on different OS similar to this:
5708 # doit() {
5709 # sudo mount /dev/ram0 /mnt/loop; sudo chmod 1777 /mnt/loop
5710 # seq 100000 | parallel --tmpdir /mnt/loop/ true &
5711 # seq 6900000 > /mnt/loop/i && echo seq OK
5712 # seq 6980868 > /mnt/loop/i
5713 # seq 10000 > /mnt/loop/ii
5714 # sleep 3
5715 # sudo umount /mnt/loop/ || sudo umount -l /mnt/loop/
5716 # echo >&2
5718 print $disk_full_fh $b8193;
5719 if(not $disk_full_fh
5721 tell $disk_full_fh != 8193) {
5722 # On raspbian the disk can be full except for 10 chars.
5723 if(not $error_printed) {
5724 ::error("Output is incomplete.",
5725 "Cannot append to buffer file in $ENV{'TMPDIR'}.",
5726 "Is the disk full?",
5727 "Change \$TMPDIR with --tmpdir or use --compress.");
5728 $error_printed = 1;
5730 ::wait_and_exit(255);
5732 truncate $disk_full_fh, 0;
5733 seek($disk_full_fh, 0, 0) || die;
5737 sub spacefree($$) {
5738 # Remove comments and spaces
5739 # Inputs:
5740 # $spaces = keep 1 space?
5741 # $s = string to remove spaces from
5742 # Returns:
5743 # $s = with spaces removed
5744 my $spaces = shift;
5745 my $s = shift;
5746 $s =~ s/#.*//mg;
5747 if(1 == $spaces) {
5748 $s =~ s/\s+/ /mg;
5749 } elsif(2 == $spaces) {
5750 # Keep newlines
5751 $s =~ s/\n\n+/\n/sg;
5752 $s =~ s/[ \t]+/ /mg;
5753 } elsif(3 == $spaces) {
5754 # Keep perl code required space
5755 $s =~ s{([^a-zA-Z0-9/])\s+}{$1}sg;
5756 $s =~ s{([a-zA-Z0-9/])\s+([^:a-zA-Z0-9/])}{$1$2}sg;
5757 } else {
5758 $s =~ s/\s//mg;
5760 return $s;
5764 my $hostname;
5765 sub hostname() {
5766 local $/ = "\n";
5767 if(not $hostname) {
5768 $hostname = `hostname`;
5769 chomp($hostname);
5770 $hostname ||= "nohostname";
5772 return $hostname;
5776 sub which(@) {
5777 # Input:
5778 # @programs = programs to find the path to
5779 # Returns:
5780 # @full_path = full paths to @programs. Nothing if not found
5781 my @which;
5782 for my $prg (@_) {
5783 push(@which, grep { not -d $_ and -x $_ }
5784 map { $_."/".$prg } split(":",$ENV{'PATH'}));
5785 if($prg =~ m:/:) {
5786 # Test if program with full path exists
5787 push(@which, grep { not -d $_ and -x $_ } $prg);
5790 ::debug("which", "$which[0] in $ENV{'PATH'}\n");
5791 return wantarray ? @which : $which[0];
5795 my ($regexp,$shell,%fakename);
5797 sub parent_shell {
5798 # Input:
5799 # $pid = pid to see if (grand)*parent is a shell
5800 # Returns:
5801 # $shellpath = path to shell - undef if no shell found
5802 my $pid = shift;
5803 ::debug("init","Parent of $pid\n");
5804 if(not $regexp) {
5805 # All shells known to mankind
5807 # ash bash csh dash fdsh fish fizsh ksh ksh93 mksh pdksh
5808 # posh rbash rc rush rzsh sash sh static-sh tcsh yash zsh
5810 my @shells = (qw(ash bash bsd-csh csh dash fdsh fish fizsh ksh
5811 ksh93 lksh mksh pdksh posh rbash rc rush rzsh sash sh
5812 static-sh tcsh yash zsh -sh -csh -bash),
5813 '-sh (sh)' # sh on FreeBSD
5815 # Can be formatted as:
5816 # [sh] -sh sh busybox sh -sh (sh)
5817 # /bin/sh /sbin/sh /opt/csw/sh
5818 # But not: foo.sh sshd crash flush pdflush scosh fsflush ssh
5819 $shell = "(?:".join("|",map { "\Q$_\E" } @shells).")";
5820 $regexp = '^((\[)(-?)('. $shell. ')(\])|(|\S+/|busybox )'.
5821 '(-?)('. $shell. '))( *$| [^(])';
5822 %fakename = (
5823 # sh disguises itself as -sh (sh) on FreeBSD
5824 "-sh (sh)" => ["sh"],
5825 # csh and tcsh disguise themselves as -sh/-csh
5826 # E.g.: ssh -tt csh@lo 'ps aux;true' |egrep ^csh
5827 # but sh also disguises itself as -sh
5828 # (TODO When does that happen?)
5829 "-sh" => ["sh"],
5830 "-csh" => ["tcsh", "csh"],
5831 # ash disguises itself as -ash
5832 "-ash" => ["ash", "dash", "sh"],
5833 # dash disguises itself as -dash
5834 "-dash" => ["dash", "ash", "sh"],
5835 # bash disguises itself as -bash
5836 "-bash" => ["bash", "sh"],
5837 # ksh disguises itself as -ksh
5838 "-ksh" => ["ksh", "sh"],
5839 # zsh disguises itself as -zsh
5840 "-zsh" => ["zsh", "sh"],
5843 if($^O eq "linux") {
5844 # Optimized for GNU/Linux
5845 my $testpid = $pid;
5846 my $shellpath;
5847 my $shellline;
5848 while($testpid) {
5849 if(open(my $fd, "<", "/proc/$testpid/cmdline")) {
5850 local $/="\0";
5851 chomp($shellline = <$fd>);
5852 if($shellline =~ /$regexp/o) {
5853 my $shellname = $4 || $8;
5854 my $dash = $3 || $7;
5855 if($shellname eq "sh" and $dash) {
5856 # -sh => csh or sh
5857 if($shellpath = readlink "/proc/$testpid/exe") {
5858 ::debug("init","procpath $shellpath\n");
5859 if($shellpath =~ m:/$shell$:o) {
5860 ::debug("init",
5861 "proc which ".$shellpath." => ");
5862 return $shellpath;
5866 ::debug("init", "which ".$shellname." => ");
5867 $shellpath = (which($shellname,
5868 @{$fakename{$shellname}}))[0];
5869 ::debug("init", "shell path $shellpath\n");
5870 return $shellpath;
5873 # Get parent pid
5874 if(open(my $fd, "<", "/proc/$testpid/stat")) {
5875 my $line = <$fd>;
5876 close $fd;
5877 # Parent pid is field 4
5878 $testpid = (split /\s+/, $line)[3];
5879 } else {
5880 # Something is wrong: fall back to old method
5881 last;
5885 # if -sh or -csh try readlink /proc/$$/exe
5886 my ($children_of_ref, $parent_of_ref, $name_of_ref) = pid_table();
5887 my $shellpath;
5888 my $testpid = $pid;
5889 while($testpid) {
5890 if($name_of_ref->{$testpid} =~ /$regexp/o) {
5891 my $shellname = $4 || $8;
5892 my $dash = $3 || $7;
5893 if($shellname eq "sh" and $dash) {
5894 # -sh => csh or sh
5895 if($shellpath = readlink "/proc/$testpid/exe") {
5896 ::debug("init","procpath $shellpath\n");
5897 if($shellpath =~ m:/$shell$:o) {
5898 ::debug("init", "proc which ".$shellpath." => ");
5899 return $shellpath;
5903 ::debug("init", "which ".$shellname." => ");
5904 $shellpath = (which($shellname,@{$fakename{$shellname}}))[0];
5905 ::debug("init", "shell path $shellpath\n");
5906 $shellpath and last;
5908 if($testpid == $parent_of_ref->{$testpid}) {
5909 # In Solaris zones, the PPID of the zsched process is itself
5910 last;
5912 $testpid = $parent_of_ref->{$testpid};
5914 return $shellpath;
5919 my %pid_parentpid_cmd;
5921 sub pid_table() {
5922 # Returns:
5923 # %children_of = { pid -> children of pid }
5924 # %parent_of = { pid -> pid of parent }
5925 # %name_of = { pid -> commandname }
5927 if(not %pid_parentpid_cmd) {
5928 # Filter for SysV-style `ps`
5929 my $sysv = q( ps -ef |).
5930 q(perl -ane '1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;).
5931 q(s/^.{$s}//; print "@F[1,2] $_"' );
5932 # Minix uses cols 2,3 and can have newlines in the command
5933 # so lines not having numbers in cols 2,3 must be ignored
5934 my $minix = q( ps -ef |).
5935 q(perl -ane '1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;).
5936 q(s/^.{$s}// and $F[2]>0 and $F[3]>0 and print "@F[2,3] $_"' );
5937 # BSD-style `ps`
5938 my $bsd = q(ps -o pid,ppid,command -ax);
5939 %pid_parentpid_cmd =
5941 'aix' => $sysv,
5942 'android' => $sysv,
5943 'cygwin' => $sysv,
5944 'darwin' => $bsd,
5945 'dec_osf' => $sysv,
5946 'dragonfly' => $bsd,
5947 'freebsd' => $bsd,
5948 'gnu' => $sysv,
5949 'hpux' => $sysv,
5950 'linux' => $sysv,
5951 'mirbsd' => $bsd,
5952 'minix' => $minix,
5953 'msys' => $sysv,
5954 'MSWin32' => $sysv,
5955 'netbsd' => $bsd,
5956 'nto' => $sysv,
5957 'openbsd' => $bsd,
5958 'solaris' => $sysv,
5959 'svr5' => $sysv,
5960 'syllable' => "echo ps not supported",
5963 $pid_parentpid_cmd{$^O} or
5964 ::die_bug("pid_parentpid_cmd for $^O missing");
5966 my (@pidtable,%parent_of,%children_of,%name_of);
5967 # Table with pid -> children of pid
5968 @pidtable = `$pid_parentpid_cmd{$^O}`;
5969 my $p=$$;
5970 for (@pidtable) {
5971 # must match: 24436 21224 busybox ash
5972 # must match: 24436 21224 <<empty on MacOSX running cubase>>
5973 # must match: 24436 21224 <<empty on system running Viber>>
5974 # or: perl -e 'while($0=" "){}'
5975 if(/^\s*(\S+)\s+(\S+)\s+(\S+.*)/
5977 /^\s*(\S+)\s+(\S+)\s+()$/) {
5978 $parent_of{$1} = $2;
5979 push @{$children_of{$2}}, $1;
5980 $name_of{$1} = $3;
5981 } else {
5982 ::die_bug("pidtable format: $_");
5985 return(\%children_of, \%parent_of, \%name_of);
5989 sub now() {
5990 # Returns time since epoch as in seconds with 3 decimals
5991 # Uses:
5992 # @Global::use
5993 # Returns:
5994 # $time = time now with millisecond accuracy
5995 if(not $Global::use{"Time::HiRes"}) {
5996 if(eval "use Time::HiRes qw ( time );") {
5997 eval "sub TimeHiRestime { return Time::HiRes::time };";
5998 } else {
5999 eval "sub TimeHiRestime { return time() };";
6001 $Global::use{"Time::HiRes"} = 1;
6004 return (int(TimeHiRestime()*1000))/1000;
6007 sub usleep($) {
6008 # Sleep this many milliseconds.
6009 # Input:
6010 # $ms = milliseconds to sleep
6011 my $ms = shift;
6012 ::debug("timing",int($ms),"ms ");
6013 select(undef, undef, undef, $ms/1000);
6016 sub make_regexp_ungreedy {
6017 my $regexp = shift;
6019 my $class_state = 0;
6020 my $escape_state = 0;
6021 my $found = 0;
6022 my $ungreedy = "";
6023 my $c;
6025 for $c (split (//, $regexp)) {
6026 if ($found) {
6027 if($c ne "?") { $ungreedy .= "?"; }
6028 $found = 0;
6030 $ungreedy .= $c;
6032 if ($escape_state) { $escape_state = 0; next; }
6033 if ($c eq "\\") { $escape_state = 1; next; }
6034 if ($c eq '[') { $class_state = 1; next; }
6035 if ($class_state) {
6036 if($c eq ']') { $class_state = 0; }
6037 next;
6039 # Quantifiers: + * {...}
6040 if ($c =~ /[*}+]/) { $found = 1; }
6042 if($found) { $ungreedy .= '?'; }
6043 return $ungreedy;
6047 sub __KILLER_REAPER__() {}
6049 sub reap_usleep() {
6050 # Reap dead children.
6051 # If no dead children: Sleep specified amount with exponential backoff
6052 # Input:
6053 # $ms = milliseconds to sleep
6054 # Returns:
6055 # $ms/2+0.001 if children reaped
6056 # $ms*1.1 if no children reaped
6057 my $ms = shift;
6058 if(reapers()) {
6059 if(not $Global::total_completed % 100) {
6060 if($opt::timeout) {
6061 # Force cleaning the timeout queue for every 100 jobs
6062 # Fixes potential memleak
6063 $Global::timeoutq->process_timeouts();
6066 # Sleep exponentially shorter (1/2^n) if a job finished
6067 return $ms/2+0.001;
6068 } else {
6069 if($opt::timeout) {
6070 $Global::timeoutq->process_timeouts();
6072 if($opt::memfree) {
6073 kill_youngster_if_not_enough_mem($opt::memfree*0.5);
6075 if($opt::memsuspend) {
6076 suspend_young_if_not_enough_mem($opt::memsuspend);
6078 if($opt::limit) {
6079 kill_youngest_if_over_limit();
6081 exit_if_disk_full();
6082 if($opt::linebuffer) {
6083 my $something_printed = 0;
6084 if($opt::keeporder) {
6085 for my $job (values %Global::running) {
6086 $something_printed += $job->print_earlier_jobs();
6088 } else {
6089 for my $job (values %Global::running) {
6090 $something_printed += $job->print();
6093 if($something_printed) {
6094 $ms = $ms/2+0.001;
6097 if($ms > 0.002) {
6098 # When a child dies, wake up from sleep (or select(,,,))
6099 $SIG{CHLD} = sub { kill "ALRM", $$ };
6100 if($opt::delay) {
6101 # The 0.004s is approximately the time it takes for one round
6102 my $next_earliest_start =
6103 $Global::newest_starttime + $opt::delay - 0.004;
6104 my $remaining_ms = 1000 * ($next_earliest_start - ::now());
6105 # The next job can only start at $next_earliest_start
6106 # so sleep until then (but sleep at least $ms)
6107 usleep(::max($ms,$remaining_ms));
6108 } else {
6109 usleep($ms);
6111 # --compress needs $SIG{CHLD} unset
6112 $SIG{CHLD} = 'DEFAULT';
6114 # Sleep exponentially longer (1.1^n) if a job did not finish,
6115 # though at most 1000 ms.
6116 return (($ms < 1000) ? ($ms * 1.1) : ($ms));
6120 sub kill_youngest_if_over_limit() {
6121 # Check each $sshlogin we are over limit
6122 # If over limit: kill off the youngest child
6123 # Put the child back in the queue.
6124 # Uses:
6125 # %Global::running
6126 my %jobs_of;
6127 my @sshlogins;
6129 for my $job (values %Global::running) {
6130 if(not $jobs_of{$job->sshlogin()}) {
6131 push @sshlogins, $job->sshlogin();
6133 push @{$jobs_of{$job->sshlogin()}}, $job;
6135 for my $sshlogin (@sshlogins) {
6136 for my $job (sort { $b->seq() <=> $a->seq() }
6137 @{$jobs_of{$sshlogin}}) {
6138 if($sshlogin->limit() == 2) {
6139 $job->kill();
6140 last;
6146 sub suspend_young_if_not_enough_mem() {
6147 # Check each $sshlogin if there is enough mem.
6148 # If less than $limit free mem: suspend some of the young children
6149 # Else: Resume all jobs
6150 # Uses:
6151 # %Global::running
6152 my $limit = shift;
6153 my %jobs_of;
6154 my @sshlogins;
6156 for my $job (values %Global::running) {
6157 if(not $jobs_of{$job->sshlogin()}) {
6158 push @sshlogins, $job->sshlogin();
6160 push @{$jobs_of{$job->sshlogin()}}, $job;
6162 for my $sshlogin (@sshlogins) {
6163 my $free = $sshlogin->memfree();
6164 if($free < 2*$limit) {
6165 # Suspend all jobs (resume some of them later)
6166 map { $_->suspended() or $_->suspend(); } @{$jobs_of{$sshlogin}};
6167 my @jobs = (sort { $b->seq() <=> $a->seq() }
6168 @{$jobs_of{$sshlogin}});
6169 # how many should be running?
6170 # limit*1 => 1;
6171 # limit*1.5 => 2;
6172 # limit*1.75 => 4;
6173 # free < limit*(2-1/2^n);
6174 # =>
6175 # 1/(2-free/limit) < 2^n;
6176 my $run = int(1/(2-$free/$limit));
6177 $run = ::min($run,$#jobs);
6178 # Resume the oldest running
6179 for my $job ((sort { $a->seq() <=> $b->seq() } @jobs)[0..$run]) {
6180 ::debug("mem","\nResume ",$run+1, " jobs. Seq ",
6181 $job->seq(), " resumed ",
6182 $sshlogin->memfree()," < ",2*$limit);
6183 $job->resume();
6185 } else {
6186 for my $job (@{$jobs_of{$sshlogin}}) {
6187 if($job->suspended()) {
6188 $job->resume();
6189 ::debug("mem","\nResume ",$#{$jobs_of{$sshlogin}}+1,
6190 " jobs. Seq ", $job->seq(), " resumed ",
6191 $sshlogin->memfree()," > ",2*$limit);
6192 last;
6199 sub kill_youngster_if_not_enough_mem() {
6200 # Check each $sshlogin if there is enough mem.
6201 # If less than 50% enough free mem: kill off the youngest child
6202 # Put the child back in the queue.
6203 # Uses:
6204 # %Global::running
6205 my $limit = shift;
6206 my %jobs_of;
6207 my @sshlogins;
6209 for my $job (values %Global::running) {
6210 if(not $jobs_of{$job->sshlogin()}) {
6211 push @sshlogins, $job->sshlogin();
6213 push @{$jobs_of{$job->sshlogin()}}, $job;
6215 for my $sshlogin (@sshlogins) {
6216 for my $job (sort { $b->seq() <=> $a->seq() }
6217 @{$jobs_of{$sshlogin}}) {
6218 if($sshlogin->memfree() < $limit) {
6219 ::debug("mem","\n",map { $_->seq()." " }
6220 (sort { $b->seq() <=> $a->seq() }
6221 @{$jobs_of{$sshlogin}}));
6222 ::debug("mem","\n", $job->seq(), "killed ",
6223 $sshlogin->memfree()," < ",$limit);
6224 $job->kill();
6225 $sshlogin->memfree_recompute();
6226 } else {
6227 last;
6230 ::debug("mem","Free mem OK? ",
6231 $sshlogin->memfree()," > ",$limit);
6236 sub __DEBUGGING__() {}
6239 sub debug(@) {
6240 # Uses:
6241 # $Global::debug
6242 # %Global::fh
6243 # Returns: N/A
6244 $Global::debug or return;
6245 @_ = grep { defined $_ ? $_ : "" } @_;
6246 if($Global::debug eq "all" or $Global::debug eq $_[0]) {
6247 if($Global::fh{2}) {
6248 # Original stderr was saved
6249 my $stderr = $Global::fh{2};
6250 print $stderr @_[1..$#_];
6251 } else {
6252 print STDERR @_[1..$#_];
6257 sub my_memory_usage() {
6258 # Returns:
6259 # memory usage if found
6260 # 0 otherwise
6261 use strict;
6262 use FileHandle;
6264 local $/ = "\n";
6265 my $pid = $$;
6266 if(-e "/proc/$pid/stat") {
6267 my $fh = FileHandle->new("</proc/$pid/stat");
6269 my $data = <$fh>;
6270 chomp $data;
6271 $fh->close;
6273 my @procinfo = split(/\s+/,$data);
6275 return undef_as_zero($procinfo[22]);
6276 } else {
6277 return 0;
6281 sub my_size() {
6282 # Returns:
6283 # $size = size of object if Devel::Size is installed
6284 # -1 otherwise
6285 my @size_this = (@_);
6286 eval "use Devel::Size qw(size total_size)";
6287 if ($@) {
6288 return -1;
6289 } else {
6290 return total_size(@_);
6294 sub my_dump(@) {
6295 # Returns:
6296 # ascii expression of object if Data::Dump(er) is installed
6297 # error code otherwise
6298 my @dump_this = (@_);
6299 eval "use Data::Dump qw(dump);";
6300 if ($@) {
6301 # Data::Dump not installed
6302 eval "use Data::Dumper;";
6303 if ($@) {
6304 my $err = "Neither Data::Dump nor Data::Dumper is installed\n".
6305 "Not dumping output\n";
6306 ::status($err);
6307 return $err;
6308 } else {
6309 return Dumper(@dump_this);
6311 } else {
6312 # Create a dummy Data::Dump:dump as Hans Schou sometimes has
6313 # it undefined
6314 eval "sub Data::Dump:dump {}";
6315 eval "use Data::Dump qw(dump);";
6316 return (Data::Dump::dump(@dump_this));
6320 sub my_croak(@) {
6321 eval "use Carp; 1";
6322 $Carp::Verbose = 1;
6323 croak(@_);
6326 sub my_carp() {
6327 eval "use Carp; 1";
6328 $Carp::Verbose = 1;
6329 carp(@_);
6333 sub __OBJECT_ORIENTED_PARTS__() {}
6336 package SSHLogin;
6338 sub new($$) {
6339 my $class = shift;
6340 my $sshlogin_string = shift;
6341 my $ncpus;
6342 my %hostgroups;
6343 # SSHLogins can have these formats:
6344 # @grp+grp/ncpu//usr/bin/ssh user@server
6345 # ncpu//usr/bin/ssh user@server
6346 # /usr/bin/ssh user@server
6347 # user@server
6348 # ncpu/user@server
6349 # @grp+grp/user@server
6350 if($sshlogin_string =~ s:^\@([^/]+)/?::) {
6351 # Look for SSHLogin hostgroups
6352 %hostgroups = map { $_ => 1 } split(/\+/, $1);
6354 # An SSHLogin is always in the hostgroup of its "numcpu/host"
6355 $hostgroups{$sshlogin_string} = 1;
6356 if ($sshlogin_string =~ s:^(\d+)/::) {
6357 # Override default autodetected ncpus unless missing
6358 $ncpus = $1;
6360 my $string = $sshlogin_string;
6361 # An SSHLogin is always in the hostgroup of its $string-name
6362 $hostgroups{$string} = 1;
6363 @Global::hostgroups{keys %hostgroups} = values %hostgroups;
6364 my @unget = ();
6365 my $no_slash_string = $string;
6366 $no_slash_string =~ s/[^-a-z0-9:]/_/gi;
6367 return bless {
6368 'string' => $string,
6369 'jobs_running' => 0,
6370 'jobs_completed' => 0,
6371 'maxlength' => undef,
6372 'max_jobs_running' => undef,
6373 'orig_max_jobs_running' => undef,
6374 'ncpus' => $ncpus,
6375 'hostgroups' => \%hostgroups,
6376 'sshcommand' => undef,
6377 'serverlogin' => undef,
6378 'control_path_dir' => undef,
6379 'control_path' => undef,
6380 'time_to_login' => undef,
6381 'last_login_at' => undef,
6382 'loadavg_file' => $Global::cache_dir . "/tmp/sshlogin/" .
6383 $no_slash_string . "/loadavg",
6384 'loadavg' => undef,
6385 'last_loadavg_update' => 0,
6386 'swap_activity_file' => $Global::cache_dir . "/tmp/sshlogin/" .
6387 $no_slash_string . "/swap_activity",
6388 'swap_activity' => undef,
6389 }, ref($class) || $class;
6392 sub DESTROY($) {
6393 my $self = shift;
6394 # Remove temporary files if they are created.
6395 ::rm($self->{'loadavg_file'});
6396 ::rm($self->{'swap_activity_file'});
6399 sub string($) {
6400 my $self = shift;
6401 return $self->{'string'};
6404 sub jobs_running($) {
6405 my $self = shift;
6406 return ($self->{'jobs_running'} || "0");
6409 sub inc_jobs_running($) {
6410 my $self = shift;
6411 $self->{'jobs_running'}++;
6414 sub dec_jobs_running($) {
6415 my $self = shift;
6416 $self->{'jobs_running'}--;
6419 sub set_maxlength($$) {
6420 my $self = shift;
6421 $self->{'maxlength'} = shift;
6424 sub maxlength($) {
6425 my $self = shift;
6426 return $self->{'maxlength'};
6429 sub jobs_completed() {
6430 my $self = shift;
6431 return $self->{'jobs_completed'};
6434 sub in_hostgroups() {
6435 # Input:
6436 # @hostgroups = the hostgroups to look for
6437 # Returns:
6438 # true if intersection of @hostgroups and the hostgroups of this
6439 # SSHLogin is non-empty
6440 my $self = shift;
6441 return grep { defined $self->{'hostgroups'}{$_} } @_;
6444 sub hostgroups() {
6445 my $self = shift;
6446 return keys %{$self->{'hostgroups'}};
6449 sub inc_jobs_completed($) {
6450 my $self = shift;
6451 $self->{'jobs_completed'}++;
6452 $Global::total_completed++;
6455 sub set_max_jobs_running($$) {
6456 my $self = shift;
6457 if(defined $self->{'max_jobs_running'}) {
6458 $Global::max_jobs_running -= $self->{'max_jobs_running'};
6460 $self->{'max_jobs_running'} = shift;
6462 if(defined $self->{'max_jobs_running'}) {
6463 # max_jobs_running could be resat if -j is a changed file
6464 $Global::max_jobs_running += $self->{'max_jobs_running'};
6466 # Initialize orig to the first non-zero value that comes around
6467 $self->{'orig_max_jobs_running'} ||= $self->{'max_jobs_running'};
6470 sub memfree() {
6471 # Returns:
6472 # $memfree in bytes
6473 my $self = shift;
6474 $self->memfree_recompute();
6475 # Return 1 if not defined.
6476 return (not defined $self->{'memfree'} or $self->{'memfree'})
6479 sub memfree_recompute() {
6480 my $self = shift;
6481 my $script = memfreescript();
6483 # TODO add sshlogin and backgrounding
6484 # Run the script twice if it gives 0 (typically intermittent error)
6485 $self->{'memfree'} = ::qqx($script) || ::qqx($script);
6486 if(not $self->{'memfree'}) {
6487 ::die_bug("Less than 1 byte memory free");
6489 #::debug("mem","New free:",$self->{'memfree'}," ");
6493 my $script;
6495 sub memfreescript() {
6496 # Returns:
6497 # shellscript for giving available memory in bytes
6498 if(not $script) {
6499 my %script_of = (
6500 # /proc/meminfo
6501 # MemFree: 7012 kB
6502 # Buffers: 19876 kB
6503 # Cached: 431192 kB
6504 # SwapCached: 0 kB
6505 "linux" => (
6507 print 1024 * qx{
6508 awk '/^((Swap)?Cached|MemFree|Buffers):/
6509 { sum += \$2} END { print sum }'
6510 /proc/meminfo }
6512 # Android uses same code as GNU/Linux
6513 "android" => (
6515 print 1024 * qx{
6516 awk '/^((Swap)?Cached|MemFree|Buffers):/
6517 { sum += \$2} END { print sum }'
6518 /proc/meminfo }
6520 # $ vmstat 1 1
6521 # procs memory page faults cpu
6522 # r b w avm free re at pi po fr de sr in sy cs us sy id
6523 # 1 0 0 242793 389737 5 1 0 0 0 0 0 107 978 60 1 1 99
6524 "hpux" => (
6526 print (((reverse `vmstat 1 1`)[0]
6527 =~ /(?:\d+\D+){4}(\d+)/)[0]*1024)
6529 # $ vmstat 1 2
6530 # kthr memory page disk faults cpu
6531 # r b w swap free re mf pi po fr de sr s3 s4 -- -- in sy cs us sy id
6532 # 0 0 0 6496720 5170320 68 260 8 2 1 0 0 -0 3 0 0 309 1371 255 1 2 97
6533 # 0 0 0 6434088 5072656 7 15 8 0 0 0 0 0 261 0 0 1889 1899 3222 0 8 92
6535 # The second free value is correct
6536 "solaris" => (
6538 print (((reverse `vmstat 1 2`)[0]
6539 =~ /(?:\d+\D+){4}(\d+)/)[0]*1024)
6541 # hw.pagesize: 4096
6542 # vm.stats.vm.v_cache_count: 0
6543 # vm.stats.vm.v_inactive_count: 79574
6544 # vm.stats.vm.v_free_count: 4507
6545 "freebsd" => (
6547 for(qx{/sbin/sysctl -a}) {
6548 if (/^([^:]+):\s+(.+)\s*$/s) {
6549 $sysctl->{$1} = $2;
6552 print $sysctl->{"hw.pagesize"} *
6553 ($sysctl->{"vm.stats.vm.v_cache_count"}
6554 + $sysctl->{"vm.stats.vm.v_inactive_count"}
6555 + $sysctl->{"vm.stats.vm.v_free_count"});
6557 # Mach Virtual Memory Statistics: (page size of 4096 bytes)
6558 # Pages free: 198061.
6559 # Pages active: 159701.
6560 # Pages inactive: 47378.
6561 # Pages speculative: 29707.
6562 # Pages wired down: 89231.
6563 # "Translation faults": 928901425.
6564 # Pages copy-on-write: 156988239.
6565 # Pages zero filled: 271267894.
6566 # Pages reactivated: 48895.
6567 # Pageins: 1798068.
6568 # Pageouts: 257.
6569 # Object cache: 6603 hits of 1713223 lookups (0% hit rate)
6570 'darwin' => (
6572 $vm = `vm_stat`;
6573 print (($vm =~ /page size of (\d+)/)[0] *
6574 (($vm =~ /Pages free:\s+(\d+)/)[0] +
6575 ($vm =~ /Pages inactive:\s+(\d+)/)[0]));
6578 my $perlscript = "";
6579 # Make a perl script that detects the OS ($^O) and runs
6580 # the appropriate command
6581 for my $os (keys %script_of) {
6582 $perlscript .= 'if($^O eq "'.$os.'") { '.$script_of{$os}.'}';
6584 $script = "perl -e " . ::Q(::spacefree(1,$perlscript));
6586 return $script;
6590 sub limit($) {
6591 # Returns:
6592 # 0 = Below limit. Start another job.
6593 # 1 = Over limit. Start no jobs.
6594 # 2 = Kill youngest job
6595 my $self = shift;
6597 if(not defined $self->{'limitscript'}) {
6598 my %limitscripts =
6599 ("io" => q!
6600 io() {
6601 limit=$1;
6602 io_file=$2;
6603 # Do the measurement in the background
6604 ((tmp=$(tempfile);
6605 LANG=C iostat -x 1 2 > $tmp;
6606 mv $tmp $io_file) </dev/null >/dev/null & );
6607 perl -e '-e $ARGV[0] or exit(1);
6608 for(reverse <>) {
6609 /Device/ and last;
6610 /(\S+)$/ and $max = $max > $1 ? $max : $1; }
6611 exit ('$limit' < $max)' $io_file;
6613 io %s %s
6615 "mem" => q!
6616 mem() {
6617 limit=$1;
6618 awk '/^((Swap)?Cached|MemFree|Buffers):/{ sum += $2}
6619 END {
6620 if (sum*1024 < '$limit'/2) { exit 2; }
6621 else { exit (sum*1024 < '$limit') }
6622 }' /proc/meminfo;
6624 export -f mem;
6625 mem %s;
6627 "load" => q!
6628 load() {
6629 limit=$1;
6630 ps ax -o state,command |
6631 grep -E '^[DOR].[^[]' |
6632 wc -l |
6633 perl -ne 'exit ('$limit' < $_)';
6635 export -f load;
6636 load %s;
6639 my ($cmd,@args) = split /\s+/,$opt::limit;
6640 if($limitscripts{$cmd}) {
6641 my $tmpfile = ::tmpname("parlmt");
6642 ++$Global::unlink{$tmpfile};
6643 $self->{'limitscript'} =
6644 ::spacefree(1, sprintf($limitscripts{$cmd},
6645 ::multiply_binary_prefix(@args),$tmpfile));
6646 } else {
6647 $self->{'limitscript'} = $opt::limit;
6651 my %env = %ENV;
6652 local %ENV = %env;
6653 $ENV{'SSHLOGIN'} = $self->string();
6654 system($Global::shell,"-c",$self->{'limitscript'});
6655 #::qqx($self->{'limitscript'});
6656 ::debug("limit","limit `".$self->{'limitscript'}."` result ".($?>>8)."\n");
6657 return $?>>8;
6661 sub swapping($) {
6662 my $self = shift;
6663 my $swapping = $self->swap_activity();
6664 return (not defined $swapping or $swapping)
6667 sub swap_activity($) {
6668 # If the currently known swap activity is too old:
6669 # Recompute a new one in the background
6670 # Returns:
6671 # last swap activity computed
6672 my $self = shift;
6673 # Should we update the swap_activity file?
6674 my $update_swap_activity_file = 0;
6675 if(-r $self->{'swap_activity_file'}) {
6676 open(my $swap_fh, "<", $self->{'swap_activity_file'}) ||
6677 ::die_bug("swap_activity_file-r");
6678 my $swap_out = <$swap_fh>;
6679 close $swap_fh;
6680 if($swap_out =~ /^(\d+)$/) {
6681 $self->{'swap_activity'} = $1;
6682 ::debug("swap", "New swap_activity: ", $self->{'swap_activity'});
6684 ::debug("swap", "Last update: ", $self->{'last_swap_activity_update'});
6685 if(time - $self->{'last_swap_activity_update'} > 10) {
6686 # last swap activity update was started 10 seconds ago
6687 ::debug("swap", "Older than 10 sec: ", $self->{'swap_activity_file'});
6688 $update_swap_activity_file = 1;
6690 } else {
6691 ::debug("swap", "No swap_activity file: ", $self->{'swap_activity_file'});
6692 $self->{'swap_activity'} = undef;
6693 $update_swap_activity_file = 1;
6695 if($update_swap_activity_file) {
6696 ::debug("swap", "Updating swap_activity file ", $self->{'swap_activity_file'});
6697 $self->{'last_swap_activity_update'} = time;
6698 my $dir = ::dirname($self->{'swap_activity_file'});
6699 -d $dir or eval { File::Path::mkpath($dir); };
6700 my $swap_activity;
6701 $swap_activity = swapactivityscript();
6702 if($self->{'string'} ne ":") {
6703 $swap_activity = $self->sshcommand() . " " . $self->serverlogin() . " " .
6704 ::Q($swap_activity);
6706 # Run swap_activity measuring.
6707 # As the command can take long to run if run remote
6708 # save it to a tmp file before moving it to the correct file
6709 my $file = $self->{'swap_activity_file'};
6710 my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".swp");
6711 ::debug("swap", "\n", $swap_activity, "\n");
6712 ::qqx("($swap_activity > $tmpfile && mv $tmpfile $file || rm $tmpfile &)");
6714 return $self->{'swap_activity'};
6718 my $script;
6720 sub swapactivityscript() {
6721 # Returns:
6722 # shellscript for detecting swap activity
6724 # arguments for vmstat are OS dependant
6725 # swap_in and swap_out are in different columns depending on OS
6727 if(not $script) {
6728 my %vmstat = (
6729 # linux: $7*$8
6730 # $ vmstat 1 2
6731 # procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
6732 # r b swpd free buff cache si so bi bo in cs us sy id wa
6733 # 5 0 51208 1701096 198012 18857888 0 0 37 153 28 19 56 11 33 1
6734 # 3 0 51208 1701288 198012 18857972 0 0 0 0 3638 10412 15 3 82 0
6735 'linux' => ['vmstat 1 2 | tail -n1', '$7*$8'],
6737 # solaris: $6*$7
6738 # $ vmstat -S 1 2
6739 # kthr memory page disk faults cpu
6740 # r b w swap free si so pi po fr de sr s3 s4 -- -- in sy cs us sy id
6741 # 0 0 0 4628952 3208408 0 0 3 1 1 0 0 -0 2 0 0 263 613 246 1 2 97
6742 # 0 0 0 4552504 3166360 0 0 0 0 0 0 0 0 0 0 0 246 213 240 1 1 98
6743 'solaris' => ['vmstat -S 1 2 | tail -1', '$6*$7'],
6745 # darwin (macosx): $21*$22
6746 # $ vm_stat -c 2 1
6747 # Mach Virtual Memory Statistics: (page size of 4096 bytes)
6748 # free active specul inactive throttle wired prgable faults copy 0fill reactive purged file-backed anonymous cmprssed cmprssor dcomprs comprs pageins pageout swapins swapouts
6749 # 346306 829050 74871 606027 0 240231 90367 544858K 62343596 270837K 14178 415070 570102 939846 356 370 116 922 4019813 4 0 0
6750 # 345740 830383 74875 606031 0 239234 90369 2696 359 553 0 0 570110 941179 356 370 0 0 0 0 0 0
6751 'darwin' => ['vm_stat -c 2 1 | tail -n1', '$21*$22'],
6753 # ultrix: $12*$13
6754 # $ vmstat -S 1 2
6755 # procs faults cpu memory page disk
6756 # r b w in sy cs us sy id avm fre si so pi po fr de sr s0
6757 # 1 0 0 4 23 2 3 0 97 7743 217k 0 0 0 0 0 0 0 0
6758 # 1 0 0 6 40 8 0 1 99 7743 217k 0 0 3 0 0 0 0 0
6759 'ultrix' => ['vmstat -S 1 2 | tail -1', '$12*$13'],
6761 # aix: $6*$7
6762 # $ vmstat 1 2
6763 # System configuration: lcpu=1 mem=2048MB
6765 # kthr memory page faults cpu
6766 # ----- ----------- ------------------------ ------------ -----------
6767 # r b avm fre re pi po fr sr cy in sy cs us sy id wa
6768 # 0 0 333933 241803 0 0 0 0 0 0 10 143 90 0 0 99 0
6769 # 0 0 334125 241569 0 0 0 0 0 0 37 5368 184 0 9 86 5
6770 'aix' => ['vmstat 1 2 | tail -n1', '$6*$7'],
6772 # freebsd: $8*$9
6773 # $ vmstat -H 1 2
6774 # procs memory page disks faults cpu
6775 # r b w avm fre flt re pi po fr sr ad0 ad1 in sy cs us sy id
6776 # 1 0 0 596716 19560 32 0 0 0 33 8 0 0 11 220 277 0 0 99
6777 # 0 0 0 596716 19560 2 0 0 0 0 0 0 0 11 144 263 0 1 99
6778 'freebsd' => ['vmstat -H 1 2 | tail -n1', '$8*$9'],
6780 # mirbsd: $8*$9
6781 # $ vmstat 1 2
6782 # procs memory page disks traps cpu
6783 # r b w avm fre flt re pi po fr sr wd0 cd0 int sys cs us sy id
6784 # 0 0 0 25776 164968 34 0 0 0 0 0 0 0 230 259 38 4 0 96
6785 # 0 0 0 25776 164968 24 0 0 0 0 0 0 0 237 275 37 0 0 100
6786 'mirbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'],
6788 # netbsd: $7*$8
6789 # $ vmstat 1 2
6790 # procs memory page disks faults cpu
6791 # r b avm fre flt re pi po fr sr w0 w1 in sy cs us sy id
6792 # 0 0 138452 6012 54 0 0 0 1 2 3 0 4 100 23 0 0 100
6793 # 0 0 138456 6008 1 0 0 0 0 0 0 0 7 26 19 0 0 100
6794 'netbsd' => ['vmstat 1 2 | tail -n1', '$7*$8'],
6796 # openbsd: $8*$9
6797 # $ vmstat 1 2
6798 # procs memory page disks traps cpu
6799 # r b w avm fre flt re pi po fr sr wd0 wd1 int sys cs us sy id
6800 # 0 0 0 76596 109944 73 0 0 0 0 0 0 1 5 259 22 0 1 99
6801 # 0 0 0 76604 109936 24 0 0 0 0 0 0 0 7 114 20 0 1 99
6802 'openbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'],
6804 # hpux: $8*$9
6805 # $ vmstat 1 2
6806 # procs memory page faults cpu
6807 # r b w avm free re at pi po fr de sr in sy cs us sy id
6808 # 1 0 0 247211 216476 4 1 0 0 0 0 0 102 73005 54 6 11 83
6809 # 1 0 0 247211 216421 43 9 0 0 0 0 0 144 1675 96 25269512791222387000 25269512791222387000 105
6810 'hpux' => ['vmstat 1 2 | tail -n1', '$8*$9'],
6812 # dec_osf (tru64): $11*$12
6813 # $ vmstat 1 2
6814 # Virtual Memory Statistics: (pagesize = 8192)
6815 # procs memory pages intr cpu
6816 # r w u act free wire fault cow zero react pin pout in sy cs us sy id
6817 # 3 181 36 51K 1895 8696 348M 59M 122M 259 79M 0 5 218 302 4 1 94
6818 # 3 181 36 51K 1893 8696 3 15 21 0 28 0 4 81 321 1 1 98
6819 'dec_osf' => ['vmstat 1 2 | tail -n1', '$11*$12'],
6821 # gnu (hurd): $7*$8
6822 # $ vmstat -k 1 2
6823 # (pagesize: 4, size: 512288, swap size: 894972)
6824 # free actv inact wired zeroed react pgins pgouts pfaults cowpfs hrat caobj cache swfree
6825 # 371940 30844 89228 20276 298348 0 48192 19016 756105 99808 98% 876 20628 894972
6826 # 371940 30844 89228 20276 +0 +0 +0 +0 +42 +2 98% 876 20628 894972
6827 'gnu' => ['vmstat -k 1 2 | tail -n1', '$7*$8'],
6829 # -nto (qnx has no swap)
6830 #-irix
6831 #-svr5 (scosysv)
6833 my $perlscript = "";
6834 # Make a perl script that detects the OS ($^O) and runs
6835 # the appropriate vmstat command
6836 for my $os (keys %vmstat) {
6837 $vmstat{$os}[1] =~ s/\$/\\\\\\\$/g; # $ => \\\$
6838 $perlscript .= 'if($^O eq "'.$os.'") { print `'.$vmstat{$os}[0].' | awk "{print ' .
6839 $vmstat{$os}[1] . '}"` }';
6841 $script = "perl -e " . ::Q($perlscript);
6843 return $script;
6847 sub too_fast_remote_login($) {
6848 my $self = shift;
6849 if($self->{'last_login_at'} and $self->{'time_to_login'}) {
6850 # sshd normally allows 10 simultaneous logins
6851 # A login takes time_to_login
6852 # So time_to_login/5 should be safe
6853 # If now <= last_login + time_to_login/5: Then it is too soon.
6854 my $too_fast = (::now() <= $self->{'last_login_at'}
6855 + $self->{'time_to_login'}/5);
6856 ::debug("run", "Too fast? $too_fast ");
6857 return $too_fast;
6858 } else {
6859 # No logins so far (or time_to_login not computed): it is not too fast
6860 return 0;
6864 sub last_login_at($) {
6865 my $self = shift;
6866 return $self->{'last_login_at'};
6869 sub set_last_login_at($$) {
6870 my $self = shift;
6871 $self->{'last_login_at'} = shift;
6874 sub loadavg_too_high($) {
6875 my $self = shift;
6876 my $loadavg = $self->loadavg();
6877 if(defined $loadavg) {
6878 ::debug("load", "Load $loadavg > ",$self->max_loadavg());
6879 return $loadavg >= $self->max_loadavg();
6880 } else {
6881 # Unknown load: Assume load is too high
6882 return 1;
6887 my $cmd;
6888 sub loadavg_cmd() {
6889 if(not $cmd) {
6890 # aix => "ps -ae -o state,command" # state wrong
6891 # bsd => "ps ax -o state,command"
6892 # sysv => "ps -ef -o s -o comm"
6893 # cygwin => perl -ne 'close STDERR; /Name/ and print"\n"; \
6894 # /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status |
6895 # awk '{print $2,$1}'
6896 # dec_osf => bsd
6897 # dragonfly => bsd
6898 # freebsd => bsd
6899 # gnu => bsd
6900 # hpux => ps -el|awk '{print $2,$14,$15}'
6901 # irix => ps -ef -o state -o comm
6902 # linux => bsd
6903 # minix => ps el|awk '{print \$1,\$11}'
6904 # mirbsd => bsd
6905 # netbsd => bsd
6906 # openbsd => bsd
6907 # solaris => sysv
6908 # svr5 => sysv
6909 # ultrix => ps -ax | awk '{print $3,$5}'
6910 # unixware => ps -el|awk '{print $2,$14,$15}'
6911 my $ps = ::spacefree(1,q{
6912 $sysv="ps -ef -o s -o comm";
6913 $sysv2="ps -ef -o state -o comm";
6914 $bsd="ps ax -o state,command";
6915 # Treat threads as processes
6916 $bsd2="ps axH -o state,command";
6917 $psel="ps -el|awk '{ print \$2,\$14,\$15 }'";
6918 $cygwin=q{ perl -ne 'close STDERR; /Name/ and print"\n";
6919 /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status |
6920 awk '{print $2,$1}' };
6921 $dummy="echo S COMMAND;echo R dummy";
6922 %ps=(
6923 # TODO Find better code for AIX/Android
6924 'aix' => "uptime",
6925 'android' => "uptime",
6926 'cygwin' => $cygwin,
6927 'darwin' => $bsd,
6928 'dec_osf' => $sysv2,
6929 'dragonfly' => $bsd,
6930 'freebsd' => $bsd2,
6931 'gnu' => $bsd,
6932 'hpux' => $psel,
6933 'irix' => $sysv2,
6934 'linux' => $bsd2,
6935 'minix' => "ps el|awk '{print \$1,\$11}'",
6936 'mirbsd' => $bsd,
6937 'msys' => $cygwin,
6938 'netbsd' => $bsd,
6939 'nto' => $dummy,
6940 'openbsd' => $bsd,
6941 'solaris' => $sysv,
6942 'svr5' => $psel,
6943 'ultrix' => "ps -ax | awk '{print \$3,\$5}'",
6944 'MSWin32' => $sysv,
6946 print `$ps{$^O}`;
6948 # The command is too long for csh, so base64_wrap the command
6949 $cmd = Job::base64_wrap($ps);
6951 return $cmd;
6956 sub loadavg($) {
6957 # If the currently know loadavg is too old:
6958 # Recompute a new one in the background
6959 # The load average is computed as the number of processes waiting for disk
6960 # or CPU right now. So it is the server load this instant and not averaged over
6961 # several minutes. This is needed so GNU Parallel will at most start one job
6962 # that will push the load over the limit.
6964 # Returns:
6965 # $last_loadavg = last load average computed (undef if none)
6966 my $self = shift;
6967 # Should we update the loadavg file?
6968 my $update_loadavg_file = 0;
6969 if(open(my $load_fh, "<", $self->{'loadavg_file'})) {
6970 local $/; # $/ = undef => slurp whole file
6971 my $load_out = <$load_fh>;
6972 close $load_fh;
6973 if($load_out =~ /\S/) {
6974 # Content can be empty if ~/ is on NFS
6975 # due to reading being non-atomic.
6977 # Count lines starting with D,O,R but command does not start with [
6978 my $load =()= ($load_out=~/(^\s?[DOR]\S* +(?=[^\[])\S)/gm);
6979 if($load > 0) {
6980 # load is overestimated by 1
6981 $self->{'loadavg'} = $load - 1;
6982 ::debug("load", "New loadavg: ", $self->{'loadavg'},"\n");
6983 } elsif ($load_out=~/average: (\d+.\d+)/) {
6984 # AIX does not support instant load average
6985 # 04:11AM up 21 days, 12:55, 1 user, load average: 1.85, 1.57, 1.55
6986 $self->{'loadavg'} = $1;
6987 } else {
6988 ::die_bug("loadavg_invalid_content: " .
6989 $self->{'loadavg_file'} . "\n$load_out");
6992 $update_loadavg_file = 1;
6993 } else {
6994 ::debug("load", "No loadavg file: ", $self->{'loadavg_file'});
6995 $self->{'loadavg'} = undef;
6996 $update_loadavg_file = 1;
6998 if($update_loadavg_file) {
6999 ::debug("load", "Updating loadavg file", $self->{'loadavg_file'}, "\n");
7000 $self->{'last_loadavg_update'} = time;
7001 my $dir = ::dirname($self->{'swap_activity_file'});
7002 -d $dir or eval { File::Path::mkpath($dir); };
7003 -w $dir or ::die_bug("Cannot write to $dir");
7004 my $cmd = "";
7005 if($self->{'string'} ne ":") {
7006 $cmd = $self->sshcommand() . " " . $self->serverlogin() . " " .
7007 ::Q(loadavg_cmd());
7008 } else {
7009 $cmd .= loadavg_cmd();
7011 # As the command can take long to run if run remote
7012 # save it to a tmp file before moving it to the correct file
7013 ::debug("load", "Update load\n");
7014 my $file = $self->{'loadavg_file'};
7015 # tmpfile on same filesystem as $file
7016 my $tmpfile = $file.$$;
7017 ::qqx("($cmd > $tmpfile && mv $tmpfile $file || rm $tmpfile & )");
7019 return $self->{'loadavg'};
7022 sub max_loadavg($) {
7023 my $self = shift;
7024 # If --load is a file it might be changed
7025 if($Global::max_load_file) {
7026 my $mtime = (stat($Global::max_load_file))[9];
7027 if($mtime > $Global::max_load_file_last_mod) {
7028 $Global::max_load_file_last_mod = $mtime;
7029 for my $sshlogin (values %Global::host) {
7030 $sshlogin->set_max_loadavg(undef);
7034 if(not defined $self->{'max_loadavg'}) {
7035 $self->{'max_loadavg'} =
7036 $self->compute_max_loadavg($opt::load);
7038 ::debug("load", "max_loadavg: ", $self->string(), " ", $self->{'max_loadavg'});
7039 return $self->{'max_loadavg'};
7042 sub set_max_loadavg($$) {
7043 my $self = shift;
7044 $self->{'max_loadavg'} = shift;
7047 sub compute_max_loadavg($) {
7048 # Parse the max loadaverage that the user asked for using --load
7049 # Returns:
7050 # max loadaverage
7051 my $self = shift;
7052 my $loadspec = shift;
7053 my $load;
7054 if(defined $loadspec) {
7055 if($loadspec =~ /^\+(\d+)$/) {
7056 # E.g. --load +2
7057 my $j = $1;
7058 $load =
7059 $self->ncpus() + $j;
7060 } elsif ($loadspec =~ /^-(\d+)$/) {
7061 # E.g. --load -2
7062 my $j = $1;
7063 $load =
7064 $self->ncpus() - $j;
7065 } elsif ($loadspec =~ /^(\d+)\%$/) {
7066 my $j = $1;
7067 $load =
7068 $self->ncpus() * $j / 100;
7069 } elsif ($loadspec =~ /^(\d+(\.\d+)?)$/) {
7070 $load = $1;
7071 } elsif (-f $loadspec) {
7072 $Global::max_load_file = $loadspec;
7073 $Global::max_load_file_last_mod = (stat($Global::max_load_file))[9];
7074 if(open(my $in_fh, "<", $Global::max_load_file)) {
7075 my $opt_load_file = join("",<$in_fh>);
7076 close $in_fh;
7077 $load = $self->compute_max_loadavg($opt_load_file);
7078 } else {
7079 ::error("Cannot open $loadspec.");
7080 ::wait_and_exit(255);
7082 } else {
7083 ::error("Parsing of --load failed.");
7084 ::die_usage();
7086 if($load < 0.01) {
7087 $load = 0.01;
7090 return $load;
7093 sub time_to_login($) {
7094 my $self = shift;
7095 return $self->{'time_to_login'};
7098 sub set_time_to_login($$) {
7099 my $self = shift;
7100 $self->{'time_to_login'} = shift;
7103 sub max_jobs_running($) {
7104 my $self = shift;
7105 if(not defined $self->{'max_jobs_running'}) {
7106 my $nproc = $self->compute_number_of_processes($opt::jobs);
7107 $self->set_max_jobs_running($nproc);
7109 return $self->{'max_jobs_running'};
7112 sub orig_max_jobs_running($) {
7113 my $self = shift;
7114 return $self->{'orig_max_jobs_running'};
7117 sub compute_number_of_processes($) {
7118 # Number of processes wanted and limited by system resources
7119 # Returns:
7120 # Number of processes
7121 my $self = shift;
7122 my $opt_P = shift;
7123 my $wanted_processes = $self->user_requested_processes($opt_P);
7124 if(not defined $wanted_processes) {
7125 $wanted_processes = $Global::default_simultaneous_sshlogins;
7127 ::debug("load", "Wanted procs: $wanted_processes\n");
7128 my $system_limit =
7129 $self->processes_available_by_system_limit($wanted_processes);
7130 ::debug("load", "Limited to procs: $system_limit\n");
7131 return $system_limit;
7135 my @children;
7136 my $max_system_proc_reached;
7137 my $more_filehandles;
7138 my %fh;
7139 my $tmpfhname;
7140 my $count_jobs_already_read;
7141 my @jobs;
7142 my $job;
7143 my @args;
7144 my $arg;
7146 sub reserve_filehandles($) {
7147 # Reserves filehandle
7148 my $n = shift;
7149 for (1..$n) {
7150 $more_filehandles &&= open($fh{$tmpfhname++}, "<", "/dev/null");
7154 sub reserve_process() {
7155 # Spawn a dummy process
7156 my $child;
7157 if($child = fork()) {
7158 push @children, $child;
7159 $Global::unkilled_children{$child} = 1;
7160 } elsif(defined $child) {
7161 # This is the child
7162 # The child takes one process slot
7163 # It will be killed later
7164 $SIG{'TERM'} = $Global::original_sig{'TERM'};
7165 if($^O eq "cygwin" or $^O eq "msys" or $^O eq "nto") {
7166 # The exec does not work on Cygwin and QNX
7167 sleep 10101010;
7168 } else {
7169 # 'exec sleep' takes less RAM than sleeping in perl
7170 exec 'sleep', 10101;
7172 exit(0);
7173 } else {
7174 # Failed to spawn
7175 $max_system_proc_reached = 1;
7179 sub get_args_or_jobs() {
7180 # Get an arg or a job (depending on mode)
7181 if($Global::semaphore or ($opt::pipe and not $opt::tee)) {
7182 # Skip: No need to get args
7183 return 1;
7184 } elsif(defined $opt::retries and $count_jobs_already_read) {
7185 # For retries we may need to run all jobs on this sshlogin
7186 # so include the already read jobs for this sshlogin
7187 $count_jobs_already_read--;
7188 return 1;
7189 } else {
7190 if($opt::X or $opt::m) {
7191 # The arguments may have to be re-spread over several jobslots
7192 # So pessimistically only read one arg per jobslot
7193 # instead of a full commandline
7194 if($Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->empty()) {
7195 if($Global::JobQueue->empty()) {
7196 return 0;
7197 } else {
7198 $job = $Global::JobQueue->get();
7199 push(@jobs, $job);
7200 return 1;
7202 } else {
7203 $arg = $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get();
7204 push(@args, $arg);
7205 return 1;
7207 } else {
7208 # If there are no more command lines, then we have a process
7209 # per command line, so no need to go further
7210 if($Global::JobQueue->empty()) {
7211 return 0;
7212 } else {
7213 $job = $Global::JobQueue->get();
7214 # Replacement must happen here due to seq()
7215 $job and $job->replaced();
7216 push(@jobs, $job);
7217 return 1;
7223 sub cleanup() {
7224 # Cleanup: Close the files
7225 for (values %fh) { close $_ }
7226 # Cleanup: Kill the children
7227 for my $pid (@children) {
7228 kill 9, $pid;
7229 waitpid($pid,0);
7230 delete $Global::unkilled_children{$pid};
7232 # Cleanup: Unget the command_lines or the @args
7233 $Global::JobQueue->{'commandlinequeue'}{'arg_queue'}->unget(@args);
7234 @args = ();
7235 $Global::JobQueue->unget(@jobs);
7236 @jobs = ();
7239 sub processes_available_by_system_limit($) {
7240 # If the wanted number of processes is bigger than the system limits:
7241 # Limit them to the system limits
7242 # Limits are: File handles, number of input lines, processes,
7243 # and taking > 1 second to spawn 10 extra processes
7244 # Returns:
7245 # Number of processes
7246 my $self = shift;
7247 my $wanted_processes = shift;
7248 my $system_limit = 0;
7249 my $slow_spawning_warning_printed = 0;
7250 my $time = time;
7251 $more_filehandles = 1;
7252 $tmpfhname = "TmpFhNamE";
7254 # perl uses 7 filehandles for something?
7255 # parallel uses 1 for memory_usage
7256 # parallel uses 4 for ?
7257 reserve_filehandles(12);
7258 # Two processes for load avg and ?
7259 reserve_process();
7260 reserve_process();
7262 # For --retries count also jobs already run
7263 $count_jobs_already_read = $Global::JobQueue->next_seq();
7264 my $wait_time_for_getting_args = 0;
7265 my $start_time = time;
7266 while(1) {
7267 $system_limit >= $wanted_processes and last;
7268 not $more_filehandles and last;
7269 $max_system_proc_reached and last;
7271 my $before_getting_arg = time;
7272 if(!$Global::dummy_jobs) {
7273 get_args_or_jobs() or last;
7275 $wait_time_for_getting_args += time - $before_getting_arg;
7276 $system_limit++;
7278 # Every simultaneous process uses 2 filehandles to write to
7279 # and 2 filehandles to read from
7280 reserve_filehandles(4);
7282 # System process limit
7283 reserve_process();
7285 my $forktime = time - $time - $wait_time_for_getting_args;
7286 ::debug("run", "Time to fork $system_limit procs: $wait_time_for_getting_args ",
7287 $forktime,
7288 " (processes so far: ", $system_limit,")\n");
7289 if($system_limit > 10 and
7290 $forktime > 1 and
7291 $forktime > $system_limit * 0.01) {
7292 # It took more than 0.01 second to fork a processes on avg.
7293 # Give the user a warning. He can press Ctrl-C if this
7294 # sucks.
7295 ::warning_once(
7296 "Starting $system_limit processes took > $forktime sec.",
7297 "Consider adjusting -j. Press CTRL-C to stop.");
7300 cleanup();
7302 if($system_limit < $wanted_processes) {
7303 # The system_limit is less than the wanted_processes
7304 if($system_limit < 1 and not $Global::JobQueue->empty()) {
7305 ::warning("Cannot spawn any jobs.",
7306 "Try increasing 'ulimit -u' (try: ulimit -u `ulimit -Hu`)",
7307 "or increasing 'nproc' in /etc/security/limits.conf",
7308 "or increasing /proc/sys/kernel/pid_max");
7309 ::wait_and_exit(255);
7311 if(not $more_filehandles) {
7312 ::warning("Only enough file handles to run ".
7313 $system_limit. " jobs in parallel.",
7314 "Try running 'parallel -j0 -N $system_limit --pipe parallel -j0'",
7315 "or increasing 'ulimit -n' (try: ulimit -n `ulimit -Hn`)",
7316 "or increasing 'nofile' in /etc/security/limits.conf",
7317 "or increasing /proc/sys/fs/file-max");
7319 if($max_system_proc_reached) {
7320 ::warning("Only enough available processes to run ".
7321 $system_limit. " jobs in parallel.",
7322 "Try increasing 'ulimit -u' (try: ulimit -u `ulimit -Hu`)",
7323 "or increasing 'nproc' in /etc/security/limits.conf",
7324 "or increasing /proc/sys/kernel/pid_max");
7327 if($] == 5.008008 and $system_limit > 1000) {
7328 # https://savannah.gnu.org/bugs/?36942
7329 $system_limit = 1000;
7331 if($Global::JobQueue->empty()) {
7332 $system_limit ||= 1;
7334 if($self->string() ne ":" and
7335 $system_limit > $Global::default_simultaneous_sshlogins) {
7336 $system_limit =
7337 $self->simultaneous_sshlogin_limit($system_limit);
7339 return $system_limit;
7343 sub simultaneous_sshlogin_limit($) {
7344 # Test by logging in wanted number of times simultaneously
7345 # Returns:
7346 # min($wanted_processes,$working_simultaneous_ssh_logins-1)
7347 my $self = shift;
7348 my $wanted_processes = shift;
7349 if($self->{'time_to_login'}) {
7350 return $wanted_processes;
7353 # Try twice because it guesses wrong sometimes
7354 # Choose the minimal
7355 my $ssh_limit =
7356 ::min($self->simultaneous_sshlogin($wanted_processes),
7357 $self->simultaneous_sshlogin($wanted_processes));
7358 if($ssh_limit < $wanted_processes) {
7359 my $serverlogin = $self->serverlogin();
7360 ::warning("ssh to $serverlogin only allows ".
7361 "for $ssh_limit simultaneous logins.",
7362 "You may raise this by changing",
7363 "/etc/ssh/sshd_config:MaxStartups and MaxSessions on $serverlogin.",
7364 "You can also try --sshdelay 0.1",
7365 "Using only ".($ssh_limit-1)." connections ".
7366 "to avoid race conditions.");
7367 # Race condition can cause problem if using all sshs.
7368 if($ssh_limit > 1) { $ssh_limit -= 1; }
7370 return $ssh_limit;
7373 sub simultaneous_sshlogin($) {
7374 # Using $sshlogin try to see if we can do $wanted_processes
7375 # simultaneous logins
7376 # (ssh host echo simul-login & ssh host echo simul-login & ...) |
7377 # grep simul|wc -l
7378 # Input:
7379 # $wanted_processes = Try for this many logins in parallel
7380 # Returns:
7381 # $ssh_limit = Number of succesful parallel logins
7382 local $/ = "\n";
7383 my $self = shift;
7384 my $wanted_processes = shift;
7385 my $sshcmd = $self->sshcommand();
7386 my $serverlogin = $self->serverlogin();
7387 my $sshdelay = $opt::sshdelay ? "sleep $opt::sshdelay;" : "";
7388 # TODO sh -c wrapper to work for csh
7389 my $cmd = ("$sshdelay$sshcmd $serverlogin -- ".
7390 "echo simultaneouslogin </dev/null 2>&1 &")x$wanted_processes;
7391 ::debug("init", "Trying $wanted_processes logins at $serverlogin\n");
7392 open (my $simul_fh, "-|", "($cmd)|grep simultaneouslogin | wc -l") or
7393 ::die_bug("simultaneouslogin");
7394 my $ssh_limit = <$simul_fh>;
7395 close $simul_fh;
7396 chomp $ssh_limit;
7397 return $ssh_limit;
7400 sub set_ncpus($$) {
7401 my $self = shift;
7402 $self->{'ncpus'} = shift;
7405 sub user_requested_processes($) {
7406 # Parse the number of processes that the user asked for using -j
7407 # Input:
7408 # $opt_P = string formatted as for -P
7409 # Returns:
7410 # $processes = the number of processes to run on this sshlogin
7411 my $self = shift;
7412 my $opt_P = shift;
7413 my $processes;
7414 if(defined $opt_P) {
7415 if($opt_P =~ /^\+(\d+)$/) {
7416 # E.g. -P +2
7417 my $j = $1;
7418 $processes =
7419 $self->ncpus() + $j;
7420 } elsif ($opt_P =~ /^-(\d+)$/) {
7421 # E.g. -P -2
7422 my $j = $1;
7423 $processes =
7424 $self->ncpus() - $j;
7425 } elsif ($opt_P =~ /^(\d+(\.\d+)?)\%$/) {
7426 # E.g. -P 10.5%
7427 my $j = $1;
7428 $processes =
7429 $self->ncpus() * $j / 100;
7430 } elsif ($opt_P =~ /^(\d+)$/) {
7431 $processes = $1;
7432 if($processes == 0) {
7433 # -P 0 = infinity (or at least close)
7434 $processes = $Global::infinity;
7436 } elsif (-f $opt_P) {
7437 $Global::max_procs_file = $opt_P;
7438 if(open(my $in_fh, "<", $Global::max_procs_file)) {
7439 my $opt_P_file = join("",<$in_fh>);
7440 close $in_fh;
7441 $processes = $self->user_requested_processes($opt_P_file);
7442 } else {
7443 ::error("Cannot open $opt_P.");
7444 ::wait_and_exit(255);
7446 } else {
7447 ::error("Parsing of --jobs/-j/--max-procs/-P failed.");
7448 ::die_usage();
7450 $processes = ::ceil($processes);
7452 return $processes;
7455 sub ncpus($) {
7456 # Number of CPU threads
7457 # --use_sockets_instead_of_threads = count socket instead
7458 # --use_cores_instead_of_threads = count physical cores instead
7459 # Returns:
7460 # $ncpus = number of cpu (threads) on this sshlogin
7461 local $/ = "\n";
7462 my $self = shift;
7463 if(not defined $self->{'ncpus'}) {
7464 my $sshcmd = $self->sshcommand();
7465 my $serverlogin = $self->serverlogin();
7466 if($serverlogin eq ":") {
7467 if($opt::use_sockets_instead_of_threads) {
7468 $self->{'ncpus'} = socket_core_thread()->{'sockets'};
7469 } elsif($opt::use_cores_instead_of_threads) {
7470 $self->{'ncpus'} = socket_core_thread()->{'cores'};
7471 } else {
7472 $self->{'ncpus'} = socket_core_thread()->{'threads'};
7474 } else {
7475 my $ncpu;
7476 ::debug("init","echo|$sshcmd $serverlogin -- parallel --number-of-sockets");
7477 if($opt::use_sockets_instead_of_threads
7479 $opt::use_cpus_instead_of_cores) {
7480 $ncpu =
7481 ::qqx("echo|$sshcmd $serverlogin -- parallel --number-of-sockets");
7482 } elsif($opt::use_cores_instead_of_threads) {
7483 $ncpu =
7484 ::qqx("echo|$sshcmd $serverlogin -- parallel --number-of-cores");
7485 } else {
7486 $ncpu =
7487 ::qqx("echo|$sshcmd $serverlogin -- parallel --number-of-threads");
7489 chomp $ncpu;
7490 if($ncpu =~ /^\s*[0-9]+\s*$/s) {
7491 $self->{'ncpus'} = $ncpu;
7492 } else {
7493 ::warning("Could not figure out ".
7494 "number of cpus on $serverlogin ($ncpu). Using 1.");
7495 $self->{'ncpus'} = 1;
7499 return $self->{'ncpus'};
7503 sub nproc() {
7504 # Returns:
7505 # Number of threads using `nproc`
7506 my $no_of_threads = ::qqx("nproc");
7507 chomp $no_of_threads;
7508 return $no_of_threads;
7511 sub no_of_sockets() {
7512 return socket_core_thread()->{'sockets'};
7515 sub no_of_cores() {
7516 return socket_core_thread()->{'cores'};
7519 sub no_of_threads() {
7520 return socket_core_thread()->{'threads'};
7523 sub socket_core_thread() {
7524 # Returns:
7526 # 'sockets' => #sockets = number of socket with CPU present
7527 # 'cores' => #cores = number of physical cores
7528 # 'threads' => #threads = number of compute cores (hyperthreading)
7529 # 'active' => #taskset_threads = number of taskset limited cores
7531 my $cpu;
7532 my $cached_cpuspec = $Global::cache_dir . "/tmp/sshlogin/" .
7533 ::hostname() . "/cpuspec";
7534 if(-e $cached_cpuspec and -M $cached_cpuspec < 1) {
7535 # Reading cached copy instead of /proc/cpuinfo is 17 ms faster
7536 local $/ = "\n";
7537 if(open(my $in_fh, "<", $cached_cpuspec)) {
7538 ::debug("init","Read $cached_cpuspec\n");
7539 $cpu->{'sockets'} = int(<$in_fh>);
7540 $cpu->{'cores'} = int(<$in_fh>);
7541 $cpu->{'threads'} = int(<$in_fh>);
7542 close $in_fh;
7545 if ($^O eq 'linux') {
7546 $cpu = sct_gnu_linux($cpu);
7547 } elsif ($^O eq 'android') {
7548 $cpu = sct_android($cpu);
7549 } elsif ($^O eq 'freebsd') {
7550 $cpu = sct_freebsd($cpu);
7551 } elsif ($^O eq 'netbsd') {
7552 $cpu = sct_netbsd($cpu);
7553 } elsif ($^O eq 'openbsd') {
7554 $cpu = sct_openbsd($cpu);
7555 } elsif ($^O eq 'gnu') {
7556 $cpu = sct_hurd($cpu);
7557 } elsif ($^O eq 'darwin') {
7558 $cpu = sct_darwin($cpu);
7559 } elsif ($^O eq 'solaris') {
7560 $cpu = sct_solaris($cpu);
7561 } elsif ($^O eq 'aix') {
7562 $cpu = sct_aix($cpu);
7563 } elsif ($^O eq 'hpux') {
7564 $cpu = sct_hpux($cpu);
7565 } elsif ($^O eq 'nto') {
7566 $cpu = sct_qnx($cpu);
7567 } elsif ($^O eq 'svr5') {
7568 $cpu = sct_openserver($cpu);
7569 } elsif ($^O eq 'irix') {
7570 $cpu = sct_irix($cpu);
7571 } elsif ($^O eq 'dec_osf') {
7572 $cpu = sct_tru64($cpu);
7573 } else {
7574 # Try all methods until we find something that works
7575 $cpu = (sct_gnu_linux($cpu)
7576 || sct_android($cpu)
7577 || sct_freebsd($cpu)
7578 || sct_netbsd($cpu)
7579 || sct_openbsd($cpu)
7580 || sct_hurd($cpu)
7581 || sct_darwin($cpu)
7582 || sct_solaris($cpu)
7583 || sct_aix($cpu)
7584 || sct_hpux($cpu)
7585 || sct_qnx($cpu)
7586 || sct_openserver($cpu)
7587 || sct_irix($cpu)
7588 || sct_tru64($cpu)
7591 if(not grep { $_ > 0 } values %$cpu) {
7592 $cpu = undef;
7594 # Write cached copy instead of /proc/cpuinfo is 17 ms faster
7595 if($cpu and open(my $out_fh, ">", $cached_cpuspec)) {
7596 print $out_fh (map { chomp; "$_\n" }
7597 $cpu->{'sockets'},
7598 $cpu->{'cores'},
7599 $cpu->{'threads'});
7600 close $out_fh;
7602 if(not $cpu) {
7603 my $nproc = nproc();
7604 if($nproc) {
7605 $cpu->{'sockets'} =
7606 $cpu->{'cores'} =
7607 $cpu->{'threads'} =
7608 $cpu->{'active'} =
7609 $nproc;
7612 if(not $cpu) {
7613 ::warning("Cannot figure out number of cpus. Using 1.");
7614 $cpu->{'sockets'} =
7615 $cpu->{'cores'} =
7616 $cpu->{'threads'} =
7617 $cpu->{'active'} =
7620 $cpu->{'sockets'} ||= 1;
7621 $cpu->{'threads'} ||= $cpu->{'cores'};
7622 $cpu->{'active'} ||= $cpu->{'threads'};
7623 chomp($cpu->{'sockets'},
7624 $cpu->{'cores'},
7625 $cpu->{'threads'},
7626 $cpu->{'active'});
7627 # Choose minimum of active and actual
7628 my $mincpu;
7629 $mincpu->{'sockets'} = ::min($cpu->{'sockets'},$cpu->{'active'});
7630 $mincpu->{'cores'} = ::min($cpu->{'cores'},$cpu->{'active'});
7631 $mincpu->{'threads'} = ::min($cpu->{'threads'},$cpu->{'active'});
7632 return $mincpu;
7635 sub sct_gnu_linux($) {
7636 # Returns:
7637 # { 'sockets' => #sockets
7638 # 'cores' => #cores
7639 # 'threads' => #threads
7640 # 'active' => #taskset_threads }
7641 my $cpu = shift;
7643 sub read_topology($) {
7644 my $prefix = shift;
7645 my %sibiling;
7646 my %socket;
7647 my $thread;
7648 for($thread = 0;
7649 -r "$prefix/cpu$thread/topology/physical_package_id";
7650 $thread++) {
7651 open(my $fh,"<",
7652 "$prefix/cpu$thread/topology/physical_package_id")
7653 || die;
7654 $socket{<$fh>}++;
7655 close $fh;
7657 for($thread = 0;
7658 -r "$prefix/cpu$thread/topology/thread_siblings";
7659 $thread++) {
7660 open(my $fh,"<",
7661 "$prefix/cpu$thread/topology/thread_siblings")
7662 || die;
7663 $sibiling{<$fh>}++;
7664 close $fh;
7666 $cpu->{'sockets'} = keys %socket;
7667 $cpu->{'cores'} = keys %sibiling;
7668 $cpu->{'threads'} = $thread;
7671 sub read_cpuinfo(@) {
7672 my @cpuinfo = @_;
7673 $cpu->{'sockets'} = 0;
7674 $cpu->{'cores'} = 0;
7675 $cpu->{'threads'} = 0;
7676 my %seen;
7677 my %phy_seen;
7678 my $physicalid;
7679 for(@cpuinfo) {
7680 # physical id : 0
7681 if(/^physical id.*[:](.*)/) {
7682 $physicalid = $1;
7683 if(not $phy_seen{$1}++) {
7684 $cpu->{'sockets'}++;
7687 # core id : 3
7688 if(/^core id.*[:](.*)/ and not $seen{$physicalid,$1}++) {
7689 $cpu->{'cores'}++;
7691 # processor : 2
7692 /^processor.*[:]\s*\d/i and $cpu->{'threads'}++;
7694 $cpu->{'cores'} ||= $cpu->{'threads'};
7695 $cpu->{'cpus'} ||= $cpu->{'threads'};
7696 $cpu->{'sockets'} ||= 1;
7699 sub read_lscpu(@) {
7700 my @lscpu = @_;
7701 my $threads_per_core;
7702 my $cores_per_socket;
7703 for(@lscpu) {
7704 /^CPU.s.:\s*(\d+)/ and $cpu->{'threads'} = $1;
7705 /^Thread.s. per core:\s*(\d+)/ and $threads_per_core = $1;
7706 /^Core.s. per socket:\s*(\d+)/ and $cores_per_socket = $1;
7707 /^(CPU )?Socket.s.:\s*(\d+)/i and $cpu->{'sockets'} = $2;
7709 if($threads_per_core and $cpu->{'threads'}) {
7710 $cpu->{'cores'} = $cpu->{'threads'} / $threads_per_core;
7712 $cpu->{'cpus'} ||= $cpu->{'threads'};
7715 local $/ = "\n"; # If delimiter is set, then $/ will be wrong
7716 my @cpuinfo;
7717 my @lscpu;
7718 if($ENV{'PARALLEL_CPUINFO'}) {
7719 # Use CPUINFO from environment - used for testing only
7720 read_cpuinfo(split/(?<=\n)/,$ENV{'PARALLEL_CPUINFO'});
7721 } elsif($ENV{'PARALLEL_LSCPU'}) {
7722 # Use LSCPU from environment - used for testing only
7723 read_lscpu(split/(?<=\n)/,$ENV{'PARALLEL_LSCPU'});
7724 } elsif(-r "$ENV{'PARALLEL_CPUPREFIX'}/cpu0/topology/thread_siblings") {
7725 # Use CPUPREFIX from environment - used for testing only
7726 read_topology($ENV{'PARALLEL_CPUPREFIX'});
7727 } elsif($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'}) {
7728 # Skip /proc/cpuinfo - already set
7729 } else {
7730 # Not debugging: Look at this computer
7731 if(!($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'})
7733 open(my $in_fh, "-|", "lscpu")) {
7734 # Parse output from lscpu
7735 read_lscpu(<$in_fh>);
7736 close $in_fh;
7738 if(!($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'})
7740 -r "/sys/devices/system/cpu/cpu0/topology/thread_siblings") {
7741 read_topology("/sys/devices/system/cpu");
7743 if(!($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'})
7745 open(my $in_fh, "<", "/proc/cpuinfo")) {
7746 # Read /proc/cpuinfo
7747 read_cpuinfo(<$in_fh>);
7748 close $in_fh;
7751 if(-e "/proc/self/status" and not $ENV{'PARALLEL_CPUINFO'}) {
7752 # if 'taskset' is used to limit number of threads
7753 if(open(my $in_fh, "<", "/proc/self/status")) {
7754 while(<$in_fh>) {
7755 if(/^Cpus_allowed:\s*(\S+)/) {
7756 my $a = $1;
7757 $a =~ tr/,//d;
7758 $cpu->{'active'} = unpack ("%32b*", pack ("H*",$a));
7761 close $in_fh;
7764 return $cpu;
7767 sub sct_android($) {
7768 # Returns:
7769 # { 'sockets' => #sockets
7770 # 'cores' => #cores
7771 # 'threads' => #threads
7772 # 'active' => #taskset_threads }
7773 # Use GNU/Linux
7774 return sct_gnu_linux($_[0]);
7777 sub sct_freebsd($) {
7778 # Returns:
7779 # { 'sockets' => #sockets
7780 # 'cores' => #cores
7781 # 'threads' => #threads
7782 # 'active' => #taskset_threads }
7783 local $/ = "\n";
7784 my $cpu = shift;
7785 $cpu->{'cores'} ||=
7786 (::qqx(qq{ sysctl -a dev.cpu | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' })
7788 ::qqx(qq{ sysctl hw.ncpu | awk '{ print \$2 }' }));
7789 $cpu->{'threads'} ||=
7790 (::qqx(qq{ sysctl hw.ncpu | awk '{ print \$2 }' })
7792 ::qqx(qq{ sysctl -a dev.cpu | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' }));
7793 return $cpu;
7796 sub sct_netbsd($) {
7797 # Returns:
7798 # { 'sockets' => #sockets
7799 # 'cores' => #cores
7800 # 'threads' => #threads
7801 # 'active' => #taskset_threads }
7802 local $/ = "\n";
7803 my $cpu = shift;
7804 $cpu->{'cores'} ||= ::qqx("sysctl -n hw.ncpu");
7805 return $cpu;
7808 sub sct_openbsd($) {
7809 # Returns:
7810 # { 'sockets' => #sockets
7811 # 'cores' => #cores
7812 # 'threads' => #threads
7813 # 'active' => #taskset_threads }
7814 local $/ = "\n";
7815 my $cpu = shift;
7816 $cpu->{'cores'} ||= ::qqx('sysctl -n hw.ncpu');
7817 return $cpu;
7820 sub sct_hurd($) {
7821 # Returns:
7822 # { 'sockets' => #sockets
7823 # 'cores' => #cores
7824 # 'threads' => #threads
7825 # 'active' => #taskset_threads }
7826 local $/ = "\n";
7827 my $cpu = shift;
7828 $cpu->{'cores'} ||= ::qqx("nproc");
7829 return $cpu;
7832 sub sct_darwin($) {
7833 # Returns:
7834 # { 'sockets' => #sockets
7835 # 'cores' => #cores
7836 # 'threads' => #threads
7837 # 'active' => #taskset_threads }
7838 local $/ = "\n";
7839 my $cpu = shift;
7840 $cpu->{'cores'} ||=
7841 (::qqx('sysctl -n hw.physicalcpu')
7843 ::qqx(qq{ sysctl -a hw | grep [^a-z]physicalcpu[^a-z] | awk '{ print \$2 }' }));
7844 $cpu->{'threads'} ||=
7845 (::qqx('sysctl -n hw.logicalcpu')
7847 ::qqx(qq{ sysctl -a hw | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }' }));
7848 return $cpu;
7851 sub sct_solaris($) {
7852 # Returns:
7853 # { 'sockets' => #sockets
7854 # 'cores' => #cores
7855 # 'threads' => #threads
7856 # 'active' => #taskset_threads }
7857 local $/ = "\n";
7858 my $cpu = shift;
7859 if(not $cpu->{'cores'}) {
7860 if(-x "/usr/bin/kstat") {
7861 my @chip_id = ::qqx("/usr/bin/kstat cpu_info|grep chip_id");
7862 if($#chip_id >= 0) {
7863 $cpu->{'sockets'} ||= $#chip_id +1;
7865 my @core_id = ::qqx("/usr/bin/kstat -m cpu_info|grep -w core_id|uniq");
7866 if($#core_id >= 0) {
7867 $cpu->{'cores'} ||= $#core_id +1;
7870 if(-x "/usr/sbin/psrinfo") {
7871 my @psrinfo = ::qqx("/usr/sbin/psrinfo -p");
7872 if($#psrinfo >= 0) {
7873 $cpu->{'sockets'} ||= $psrinfo[0];
7876 if(-x "/usr/sbin/prtconf") {
7877 my @prtconf = ::qqx("/usr/sbin/prtconf | grep cpu..instance");
7878 if($#prtconf >= 0) {
7879 $cpu->{'cores'} ||= $#prtconf +1;
7883 return $cpu;
7886 sub sct_aix($) {
7887 # Returns:
7888 # { 'sockets' => #sockets
7889 # 'cores' => #cores
7890 # 'threads' => #threads
7891 # 'active' => #taskset_threads }
7892 local $/ = "\n";
7893 my $cpu = shift;
7894 if(not $cpu->{'cores'}) {
7895 if(-x "/usr/sbin/lscfg") {
7896 if(open(my $in_fh, "-|", "/usr/sbin/lscfg -vs |grep proc | wc -l|tr -d ' '")) {
7897 $cpu->{'cores'} = <$in_fh>;
7898 close $in_fh;
7902 if(not $cpu->{'threads'}) {
7903 if(-x "/usr/bin/vmstat") {
7904 if(open(my $in_fh, "-|", "/usr/bin/vmstat 1 1")) {
7905 while(<$in_fh>) {
7906 /lcpu=([0-9]*) / and $cpu->{'threads'} = $1;
7908 close $in_fh;
7912 return $cpu;
7915 sub sct_hpux($) {
7916 # Returns:
7917 # { 'sockets' => #sockets
7918 # 'cores' => #cores
7919 # 'threads' => #threads
7920 # 'active' => #taskset_threads }
7921 local $/ = "\n";
7922 my $cpu = shift;
7923 $cpu->{'cores'} ||=
7924 ::qqx(qq{ /usr/bin/mpsched -s 2>&1 | grep 'Locality Domain Count' | awk '{ print \$4 }'});
7925 $cpu->{'threads'} ||=
7926 ::qqx(qq{ /usr/bin/mpsched -s 2>&1 | perl -ne '/Processor Count\\D+(\\d+)/ and print "\$1"'});
7927 return $cpu;
7930 sub sct_qnx($) {
7931 # Returns:
7932 # { 'sockets' => #sockets
7933 # 'cores' => #cores
7934 # 'threads' => #threads
7935 # 'active' => #taskset_threads }
7936 local $/ = "\n";
7937 my $cpu = shift;
7938 # BUG: It is not known how to calculate this.
7940 return $cpu;
7943 sub sct_openserver($) {
7944 # Returns:
7945 # { 'sockets' => #sockets
7946 # 'cores' => #cores
7947 # 'threads' => #threads
7948 # 'active' => #taskset_threads }
7949 local $/ = "\n";
7950 my $cpu = shift;
7951 if(not $cpu->{'cores'}) {
7952 if(-x "/usr/sbin/psrinfo") {
7953 my @psrinfo = ::qqx("/usr/sbin/psrinfo");
7954 if($#psrinfo >= 0) {
7955 $cpu->{'cores'} = $#psrinfo +1;
7959 $cpu->{'sockets'} ||= $cpu->{'cores'};
7960 return $cpu;
7963 sub sct_irix($) {
7964 # Returns:
7965 # { 'sockets' => #sockets
7966 # 'cores' => #cores
7967 # 'threads' => #threads
7968 # 'active' => #taskset_threads }
7969 local $/ = "\n";
7970 my $cpu = shift;
7971 $cpu->{'cores'} ||=
7972 ::qqx(qq{ hinv | grep HZ | grep Processor | awk '{print \$1}' });
7973 return $cpu;
7976 sub sct_tru64($) {
7977 # Returns:
7978 # { 'sockets' => #sockets
7979 # 'cores' => #cores
7980 # 'threads' => #threads
7981 # 'active' => #taskset_threads }
7982 local $/ = "\n";
7983 my $cpu = shift;
7984 $cpu->{'cores'} ||= ::qqx("sizer -pr");
7985 $cpu->{'sockets'} ||= $cpu->{'cores'};
7986 $cpu->{'threads'} ||= $cpu->{'cores'};
7988 return $cpu;
7991 sub sshcommand($) {
7992 # Returns:
7993 # $sshcommand = the command (incl options) to run when using ssh
7994 my $self = shift;
7995 if (not defined $self->{'sshcommand'}) {
7996 $self->sshcommand_of_sshlogin();
7998 return $self->{'sshcommand'};
8001 sub serverlogin($) {
8002 # Returns:
8003 # $sshcommand = the command (incl options) to run when using ssh
8004 my $self = shift;
8005 if (not defined $self->{'serverlogin'}) {
8006 $self->sshcommand_of_sshlogin();
8008 return $self->{'serverlogin'};
8011 sub sshcommand_of_sshlogin($) {
8012 # Compute ssh command and serverlogin from sshlogin
8013 # 'server' -> ('ssh -S /tmp/parallel-ssh-RANDOM/host-','server')
8014 # 'user@server' -> ('ssh','user@server')
8015 # 'myssh user@server' -> ('myssh','user@server')
8016 # 'myssh -l user server' -> ('myssh -l user','server')
8017 # '/usr/bin/myssh -l user server' -> ('/usr/bin/myssh -l user','server')
8018 # Sets:
8019 # $self->{'sshcommand'}
8020 # $self->{'serverlogin'}
8021 my $self = shift;
8022 my ($sshcmd, $serverlogin);
8023 # If $opt::ssh is unset, use $PARALLEL_SSH or 'ssh'
8024 $opt::ssh ||= $ENV{'PARALLEL_SSH'} || "ssh";
8025 if($self->{'string'} =~ /(.+) (\S+)$/) {
8026 # Own ssh command
8027 $sshcmd = $1; $serverlogin = $2;
8028 } else {
8029 # Normal ssh
8030 if($opt::controlmaster) {
8031 # Use control_path to make ssh faster
8032 my $control_path = $self->control_path_dir()."/ssh-%r@%h:%p";
8033 $sshcmd = $opt::ssh." -S ".$control_path;
8034 $serverlogin = $self->{'string'};
8035 if(not $self->{'control_path'}{$control_path}++) {
8036 # Master is not running for this control_path
8037 # Start it
8038 my $pid = fork();
8039 if($pid) {
8040 $Global::sshmaster{$pid} ||= 1;
8041 } else {
8042 $SIG{'TERM'} = undef;
8043 # Ignore the 'foo' being printed
8044 open(STDOUT,">","/dev/null");
8045 # STDERR >/dev/null to ignore
8046 open(STDERR,">","/dev/null");
8047 open(STDIN,"<","/dev/null");
8048 # Run a sleep that outputs data, so it will discover
8049 # if the ssh connection closes.
8050 my $sleep = ::Q('$|=1;while(1){sleep 1;print "foo\n"}');
8051 my @master = ($opt::ssh, "-MTS",
8052 $control_path, $serverlogin, "--", "perl", "-e",
8053 $sleep);
8054 exec(@master);
8057 } else {
8058 $sshcmd = $opt::ssh; $serverlogin = $self->{'string'};
8062 if($serverlogin =~ s/(\S+)\@(\S+)/$2/) {
8063 # convert user@server to '-l user server'
8064 # because lsh does not support user@server
8065 $sshcmd = $sshcmd." -l ".$1;
8068 $self->{'sshcommand'} = $sshcmd;
8069 $self->{'serverlogin'} = $serverlogin;
8072 sub control_path_dir($) {
8073 # Returns:
8074 # $control_path_dir = dir of control path (for -M)
8075 my $self = shift;
8076 if(not defined $self->{'control_path_dir'}) {
8077 $self->{'control_path_dir'} =
8078 # Use $ENV{'TMPDIR'} as that is typically not
8079 # NFS mounted
8080 File::Temp::tempdir($ENV{'TMPDIR'}
8081 . "/control_path_dir-XXXX",
8082 CLEANUP => 1);
8084 return $self->{'control_path_dir'};
8087 sub rsync_transfer_cmd($) {
8088 # Command to run to transfer a file
8089 # Input:
8090 # $file = filename of file to transfer
8091 # $workdir = destination dir
8092 # Returns:
8093 # $cmd = rsync command to run to transfer $file ("" if unreadable)
8094 my $self = shift;
8095 my $file = shift;
8096 my $workdir = shift;
8097 if(not -r $file) {
8098 ::warning($file. " is not readable and will not be transferred.");
8099 return "true";
8101 my $rsync_destdir;
8102 my $relpath = ($file !~ m:^/:) || ($file =~ m:/\./:); # Is the path relative or /./?
8103 if($relpath) {
8104 $rsync_destdir = ::shell_quote_file($workdir);
8105 } else {
8106 # rsync /foo/bar /
8107 $rsync_destdir = "/";
8109 $file = ::shell_quote_file($file);
8110 my $sshcmd = $self->sshcommand();
8111 my $rsync_opts = $ENV{'PARALLEL_RSYNC_OPTS'}.
8112 " -e".::Q($sshcmd);
8113 my $serverlogin = $self->serverlogin();
8114 # Make dir if it does not exist
8115 return "$sshcmd $serverlogin -- mkdir -p $rsync_destdir && " .
8116 rsync()." $rsync_opts $file $serverlogin:$rsync_destdir";
8119 sub cleanup_cmd($$$) {
8120 # Command to run to remove the remote file
8121 # Input:
8122 # $file = filename to remove
8123 # $workdir = destination dir
8124 # Returns:
8125 # $cmd = ssh command to run to remove $file and empty parent dirs
8126 my $self = shift;
8127 my $file = shift;
8128 my $workdir = shift;
8129 my $f = $file;
8130 if($f =~ m:/\./:) {
8131 # foo/bar/./baz/quux => workdir/baz/quux
8132 # /foo/bar/./baz/quux => workdir/baz/quux
8133 $f =~ s:.*/\./:$workdir/:;
8134 } elsif($f =~ m:^[^/]:) {
8135 # foo/bar => workdir/foo/bar
8136 $f = $workdir."/".$f;
8138 my @subdirs = split m:/:, ::dirname($f);
8139 my @rmdir;
8140 my $dir = "";
8141 for(@subdirs) {
8142 $dir .= $_."/";
8143 unshift @rmdir, ::shell_quote_file($dir);
8145 my $rmdir = @rmdir ? "rmdir @rmdir 2>/dev/null;" : "";
8146 if(defined $opt::workdir and $opt::workdir eq "...") {
8147 $rmdir .= "rm -rf " . ::shell_quote_file($workdir).';';
8149 my $rmf = "sh -c ".
8150 ::Q("rm -f ".::shell_quote_file($f)." 2>/dev/null;".$rmdir);
8151 my $sshcmd = $self->sshcommand();
8152 my $serverlogin = $self->serverlogin();
8153 return "$sshcmd $serverlogin -- ".::Q("$rmf");
8157 my $rsync;
8159 sub rsync {
8160 # rsync 3.1.x uses protocol 31 which is unsupported by 2.5.7.
8161 # If the version >= 3.1.0: downgrade to protocol 30
8162 # Returns:
8163 # $rsync = "rsync" or "rsync --protocol 30"
8164 if(not $rsync) {
8165 my @out = `rsync --version`;
8166 for (@out) {
8167 # rsync version 3.1.3 protocol version 31
8168 # rsync version v3.2.3 protocol version 31
8169 if(/version v?(\d+.\d+)(.\d+)?/) {
8170 if($1 >= 3.1) {
8171 # Version 3.1.0 or later: Downgrade to protocol 30
8172 $rsync = "rsync --protocol 30";
8173 } else {
8174 $rsync = "rsync";
8178 $rsync or ::die_bug("Cannot figure out version of rsync: @out");
8180 return $rsync;
8185 package JobQueue;
8187 sub new($) {
8188 my $class = shift;
8189 my $commandref = shift;
8190 my $read_from = shift;
8191 my $context_replace = shift;
8192 my $max_number_of_args = shift;
8193 my $transfer_files = shift;
8194 my $return_files = shift;
8195 my $template_names = shift;
8196 my $template_contents = shift;
8197 my $commandlinequeue = CommandLineQueue->new
8198 ($commandref, $read_from, $context_replace, $max_number_of_args,
8199 $transfer_files, $return_files, $template_names, $template_contents);
8200 my @unget = ();
8201 return bless {
8202 'unget' => \@unget,
8203 'commandlinequeue' => $commandlinequeue,
8204 'this_job_no' => 0,
8205 'total_jobs' => undef,
8206 }, ref($class) || $class;
8209 sub get($) {
8210 my $self = shift;
8212 $self->{'this_job_no'}++;
8213 if(@{$self->{'unget'}}) {
8214 my $job = shift @{$self->{'unget'}};
8215 # {%} may have changed, so flush computed values
8216 $job && $job->flush_cache();
8217 return $job;
8218 } else {
8219 my $commandline = $self->{'commandlinequeue'}->get();
8220 if(defined $commandline) {
8221 return Job->new($commandline);
8222 } else {
8223 $self->{'this_job_no'}--;
8224 return undef;
8229 sub unget($) {
8230 my $self = shift;
8231 unshift @{$self->{'unget'}}, @_;
8232 $self->{'this_job_no'} -= @_;
8235 sub empty($) {
8236 my $self = shift;
8237 my $empty = (not @{$self->{'unget'}}) &&
8238 $self->{'commandlinequeue'}->empty();
8239 ::debug("run", "JobQueue->empty $empty ");
8240 return $empty;
8243 sub total_jobs($) {
8244 my $self = shift;
8245 if(not defined $self->{'total_jobs'}) {
8246 if($opt::pipe and not $opt::tee) {
8247 ::error("--pipe is incompatible with --eta/--bar/--shuf");
8248 ::wait_and_exit(255);
8250 if($opt::sqlworker) {
8251 $self->{'total_jobs'} = $Global::sql->total_jobs();
8252 } else {
8253 my $record;
8254 my @arg_records;
8255 my $record_queue = $self->{'commandlinequeue'}{'arg_queue'};
8256 my $start = time;
8257 while($record = $record_queue->get()) {
8258 push @arg_records, $record;
8259 if(time - $start > 10) {
8260 ::warning("Reading ".scalar(@arg_records).
8261 " arguments took longer than 10 seconds.");
8262 $opt::eta && ::warning("Consider removing --eta.");
8263 $opt::bar && ::warning("Consider removing --bar.");
8264 $opt::shuf && ::warning("Consider removing --shuf.");
8265 last;
8268 while($record = $record_queue->get()) {
8269 push @arg_records, $record;
8271 if($opt::shuf and @arg_records) {
8272 my $i = @arg_records;
8273 while (--$i) {
8274 my $j = int rand($i+1);
8275 @arg_records[$i,$j] = @arg_records[$j,$i];
8278 $record_queue->unget(@arg_records);
8279 # $#arg_records = number of args - 1
8280 # We have read one @arg_record for this job (so add 1 more)
8281 my $num_args = $#arg_records + 2;
8282 # This jobs is not started so -1
8283 my $started_jobs = $self->{'this_job_no'} - 1;
8284 my $max_args = ::max($Global::max_number_of_args,1);
8285 $self->{'total_jobs'} = ::ceil($num_args / $max_args)
8286 + $started_jobs;
8287 ::debug("init","Total jobs: ".$self->{'total_jobs'}.
8288 " ($num_args/$max_args + $started_jobs)\n");
8291 return $self->{'total_jobs'};
8294 sub flush_total_jobs($) {
8295 # Unset total_jobs to force recomputing
8296 my $self = shift;
8297 ::debug("init","flush Total jobs: ");
8298 $self->{'total_jobs'} = undef;
8301 sub next_seq($) {
8302 my $self = shift;
8304 return $self->{'commandlinequeue'}->seq();
8307 sub quote_args($) {
8308 my $self = shift;
8309 return $self->{'commandlinequeue'}->quote_args();
8313 package Job;
8315 sub new($) {
8316 my $class = shift;
8317 my $commandlineref = shift;
8318 return bless {
8319 'commandline' => $commandlineref, # CommandLine object
8320 'workdir' => undef, # --workdir
8321 # filehandle for stdin (used for --pipe)
8322 # filename for writing stdout to (used for --files)
8323 # remaining data not sent to stdin (used for --pipe)
8324 # tmpfiles to cleanup when job is done
8325 'unlink' => [],
8326 # amount of data sent via stdin (used for --pipe)
8327 'transfersize' => 0, # size of files using --transfer
8328 'returnsize' => 0, # size of files using --return
8329 'pid' => undef,
8330 # hash of { SSHLogins => number of times the command failed there }
8331 'failed' => undef,
8332 'sshlogin' => undef,
8333 # The commandline wrapped with rsync and ssh
8334 'sshlogin_wrap' => undef,
8335 'exitstatus' => undef,
8336 'exitsignal' => undef,
8337 # Timestamp for timeout if any
8338 'timeout' => undef,
8339 'virgin' => 1,
8340 # Output used for SQL and CSV-output
8341 'output' => { 1 => [], 2 => [] },
8342 'halfline' => { 1 => [], 2 => [] },
8343 }, ref($class) || $class;
8346 sub flush_cache($) {
8347 my $self = shift;
8348 $self->{'commandline'}->flush_cache();
8351 sub replaced($) {
8352 my $self = shift;
8353 $self->{'commandline'} or ::die_bug("commandline empty");
8354 return $self->{'commandline'}->replaced();
8357 sub seq($) {
8358 my $self = shift;
8359 return $self->{'commandline'}->seq();
8362 sub set_seq($$) {
8363 my $self = shift;
8364 return $self->{'commandline'}->set_seq(shift);
8367 sub slot($) {
8368 my $self = shift;
8369 return $self->{'commandline'}->slot();
8372 sub free_slot($) {
8373 my $self = shift;
8374 push @Global::slots, $self->slot();
8378 my($cattail);
8380 sub cattail() {
8381 # Returns:
8382 # $cattail = perl program for:
8383 # cattail "decompress program" writerpid [file_to_decompress or stdin] [file_to_unlink]
8384 if(not $cattail) {
8385 $cattail = q{
8386 # cat followed by tail (possibly with rm as soon at the file is opened)
8387 # If $writerpid dead: finish after this round
8388 use Fcntl;
8389 $|=1;
8391 my ($comfile, $cmd, $writerpid, $read_file, $unlink_file) = @ARGV;
8392 if($read_file) {
8393 open(IN,"<",$read_file) || die("cattail: Cannot open $read_file");
8394 } else {
8395 *IN = *STDIN;
8397 while(! -s $comfile) {
8398 # Writer has not opened the buffer file, so we cannot remove it yet
8399 $sleep = ($sleep < 30) ? ($sleep * 1.001 + 0.01) : ($sleep);
8400 usleep($sleep);
8402 # The writer and we have both opened the file, so it is safe to unlink it
8403 unlink $unlink_file;
8404 unlink $comfile;
8406 my $first_round = 1;
8407 my $flags;
8408 fcntl(IN, F_GETFL, $flags) || die $!; # Get the current flags on the filehandle
8409 $flags |= O_NONBLOCK; # Add non-blocking to the flags
8410 fcntl(IN, F_SETFL, $flags) || die $!; # Set the flags on the filehandle
8412 while(1) {
8413 # clear EOF
8414 seek(IN,0,1);
8415 my $writer_running = kill 0, $writerpid;
8416 $read = sysread(IN,$buf,131072);
8417 if($read) {
8418 if($first_round) {
8419 # Only start the command if there any input to process
8420 $first_round = 0;
8421 open(OUT,"|-",$cmd) || die("cattail: Cannot run $cmd");
8424 # Blocking print
8425 while($buf) {
8426 my $bytes_written = syswrite(OUT,$buf);
8427 # syswrite may be interrupted by SIGHUP
8428 substr($buf,0,$bytes_written) = "";
8430 # Something printed: Wait less next time
8431 $sleep /= 2;
8432 } else {
8433 if(eof(IN) and not $writer_running) {
8434 # Writer dead: There will never be sent more to the decompressor
8435 close OUT;
8436 exit;
8438 # TODO This could probably be done more efficiently using select(2)
8439 # Nothing read: Wait longer before next read
8440 # Up to 100 milliseconds
8441 $sleep = ($sleep < 100) ? ($sleep * 1.001 + 0.01) : ($sleep);
8442 usleep($sleep);
8446 sub usleep {
8447 # Sleep this many milliseconds.
8448 my $secs = shift;
8449 select(undef, undef, undef, $secs/1000);
8452 $cattail =~ s/#.*//mg;
8453 $cattail =~ s/\s+/ /g;
8455 return $cattail;
8459 sub openoutputfiles($) {
8460 # Open files for STDOUT and STDERR
8461 # Set file handles in $self->fh
8462 my $self = shift;
8463 my ($outfhw, $errfhw, $outname, $errname);
8465 if($opt::linebuffer and not
8466 ($opt::keeporder or $opt::files or $opt::results or
8467 $opt::compress or $opt::compress_program or
8468 $opt::decompress_program)) {
8469 # Do not save to files: Use non-blocking pipe
8470 my ($outfhr, $errfhr);
8471 pipe($outfhr, $outfhw) || die;
8472 pipe($errfhr, $errfhw) || die;
8473 $self->set_fh(1,'w',$outfhw);
8474 $self->set_fh(2,'w',$errfhw);
8475 $self->set_fh(1,'r',$outfhr);
8476 $self->set_fh(2,'r',$errfhr);
8477 # Make it possible to read non-blocking from the pipe
8478 for my $fdno (1,2) {
8479 ::set_fh_non_blocking($self->fh($fdno,'r'));
8481 # Return immediately because we do not need setting filenames
8482 return;
8483 } elsif($opt::results and not $Global::csvsep) {
8484 # If --results, but not --results *.csv/*.tsv
8485 my $out = $self->{'commandline'}->results_out();
8486 my $seqname;
8487 if($out eq $opt::results or $out =~ m:/$:) {
8488 # $opt::results = simple string or ending in /
8489 # => $out is a dir/
8490 # prefix/name1/val1/name2/val2/seq
8491 $seqname = $out."seq";
8492 # prefix/name1/val1/name2/val2/stdout
8493 $outname = $out."stdout";
8494 # prefix/name1/val1/name2/val2/stderr
8495 $errname = $out."stderr";
8496 } else {
8497 # $opt::results = replacement string not ending in /
8498 # => $out is a file
8499 $outname = $out;
8500 $errname = "$out.err";
8501 $seqname = "$out.seq";
8503 my $seqfhw;
8504 if(not open($seqfhw, "+>", $seqname)) {
8505 ::error("Cannot write to `$seqname'.");
8506 ::wait_and_exit(255);
8508 print $seqfhw $self->seq();
8509 close $seqfhw;
8510 if(not open($outfhw, "+>", $outname)) {
8511 ::error("Cannot write to `$outname'.");
8512 ::wait_and_exit(255);
8514 if(not open($errfhw, "+>", $errname)) {
8515 ::error("Cannot write to `$errname'.");
8516 ::wait_and_exit(255);
8518 $self->set_fh(1,"unlink","");
8519 $self->set_fh(2,"unlink","");
8520 if($opt::sqlworker) {
8521 # Save the filenames in SQL table
8522 $Global::sql->update("SET Stdout = ?, Stderr = ? ".
8523 "WHERE Seq = ". $self->seq(),
8524 $outname, $errname);
8526 } elsif(not $opt::ungroup) {
8527 # To group we create temporary files for STDOUT and STDERR
8528 # To avoid the cleanup unlink the files immediately (but keep them open)
8529 if($opt::files) {
8530 ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par");
8531 ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par");
8532 # --files => only remove stderr
8533 $self->set_fh(1,"unlink","");
8534 $self->set_fh(2,"unlink",$errname);
8535 } else {
8536 ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par");
8537 ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par");
8538 $self->set_fh(1,"unlink",$outname);
8539 $self->set_fh(2,"unlink",$errname);
8541 } else {
8542 # --ungroup
8543 open($outfhw,">&",$Global::fh{1}) || die;
8544 open($errfhw,">&",$Global::fh{2}) || die;
8545 # File name must be empty as it will otherwise be printed
8546 $outname = "";
8547 $errname = "";
8548 $self->set_fh(1,"unlink",$outname);
8549 $self->set_fh(2,"unlink",$errname);
8551 # Set writing FD
8552 $self->set_fh(1,'w',$outfhw);
8553 $self->set_fh(2,'w',$errfhw);
8554 $self->set_fh(1,'name',$outname);
8555 $self->set_fh(2,'name',$errname);
8556 if($opt::compress) {
8557 $self->filter_through_compress();
8558 } elsif(not $opt::ungroup) {
8559 $self->grouped();
8561 if($opt::linebuffer) {
8562 # Make it possible to read non-blocking from
8563 # the buffer files
8564 # Used for --linebuffer with -k, --files, --res, --compress*
8565 for my $fdno (1,2) {
8566 ::set_fh_non_blocking($self->fh($fdno,'r'));
8571 sub print_verbose_dryrun($) {
8572 # If -v set: print command to stdout (possibly buffered)
8573 # This must be done before starting the command
8574 my $self = shift;
8575 if($Global::verbose or $opt::dryrun) {
8576 my $fh = $self->fh(1,"w");
8577 if($Global::verbose <= 1) {
8578 print $fh $self->replaced(),"\n";
8579 } else {
8580 # Verbose level > 1: Print the rsync and stuff
8581 print $fh $self->wrapped(),"\n";
8584 if($opt::sqlworker) {
8585 $Global::sql->update("SET Command = ? WHERE Seq = ".$self->seq(),
8586 $self->replaced());
8590 sub add_rm($) {
8591 # Files to remove when job is done
8592 my $self = shift;
8593 push @{$self->{'unlink'}}, @_;
8596 sub get_rm($) {
8597 # Files to remove when job is done
8598 my $self = shift;
8599 return @{$self->{'unlink'}};
8602 sub cleanup($) {
8603 # Remove files when job is done
8604 my $self = shift;
8605 unlink $self->get_rm();
8606 delete @Global::unlink{$self->get_rm()};
8609 sub grouped($) {
8610 my $self = shift;
8611 # Set reading FD if using --group (--ungroup does not need)
8612 for my $fdno (1,2) {
8613 # Re-open the file for reading
8614 # so fdw can be closed seperately
8615 # and fdr can be seeked seperately (for --line-buffer)
8616 open(my $fdr,"<", $self->fh($fdno,'name')) ||
8617 ::die_bug("fdr: Cannot open ".$self->fh($fdno,'name'));
8618 $self->set_fh($fdno,'r',$fdr);
8619 # Unlink if not debugging
8620 $Global::debug or ::rm($self->fh($fdno,"unlink"));
8624 sub empty_input_wrapper($) {
8625 # If no input: exit(0)
8626 # If some input: Pass input as input to command on STDIN
8627 # This avoids starting the command if there is no input.
8628 # Input:
8629 # $command = command to pipe data to
8630 # Returns:
8631 # $wrapped_command = the wrapped command
8632 my $command = shift;
8633 my $script =
8634 ::spacefree(0,q{
8635 if(sysread(STDIN, $buf, 1)) {
8636 open($fh, "|-", @ARGV) || die;
8637 syswrite($fh, $buf);
8638 # Align up to 128k block
8639 if($read = sysread(STDIN, $buf, 131071)) {
8640 syswrite($fh, $buf);
8642 while($read = sysread(STDIN, $buf, 131072)) {
8643 syswrite($fh, $buf);
8645 close $fh;
8646 exit ($?&127 ? 128+($?&127) : 1+$?>>8)
8649 ::debug("run",'Empty wrap: perl -e '.::Q($script)."\n");
8650 if($Global::cshell
8652 length $command > 499) {
8653 # csh does not like words longer than 1000 (499 quoted)
8654 # $command = "perl -e '".base64_zip_eval()."' ".
8655 # join" ",string_zip_base64(
8656 # 'exec "'.::perl_quote_scalar($command).'"');
8657 return 'perl -e '.::Q($script)." ".
8658 base64_wrap("exec \"$Global::shell\",'-c',\"".
8659 ::perl_quote_scalar($command).'"');
8660 } else {
8661 return 'perl -e '.::Q($script)." ".
8662 $Global::shell." -c ".::Q($command);
8666 sub filter_through_compress($) {
8667 my $self = shift;
8668 # Send stdout to stdin for $opt::compress_program(1)
8669 # Send stderr to stdin for $opt::compress_program(2)
8670 # cattail get pid: $pid = $self->fh($fdno,'rpid');
8671 my $cattail = cattail();
8673 for my $fdno (1,2) {
8674 # Make a communication file.
8675 my ($fh, $comfile) = ::tmpfile(SUFFIX => ".pac");
8676 close $fh;
8677 # Compressor: (echo > $comfile; compress pipe) > output
8678 # When the echo is written to $comfile,
8679 # it is known that output file is opened,
8680 # thus output file can then be removed by the decompressor.
8681 my $wpid = open(my $fdw,"|-", "(echo > $comfile; ".
8682 empty_input_wrapper($opt::compress_program).") >".
8683 ::Q($self->fh($fdno,'name'))) || die $?;
8684 $self->set_fh($fdno,'w',$fdw);
8685 $self->set_fh($fdno,'wpid',$wpid);
8686 # Decompressor: open output; -s $comfile > 0: rm $comfile output;
8687 # decompress output > stdout
8688 my $rpid = open(my $fdr, "-|", "perl", "-e", $cattail, $comfile,
8689 $opt::decompress_program, $wpid,
8690 $self->fh($fdno,'name'),$self->fh($fdno,'unlink'))
8691 || die $?;
8692 $self->set_fh($fdno,'r',$fdr);
8693 $self->set_fh($fdno,'rpid',$rpid);
8697 sub set_fh($$$$) {
8698 # Set file handle
8699 my ($self, $fd_no, $key, $fh) = @_;
8700 $self->{'fd'}{$fd_no,$key} = $fh;
8703 sub fh($) {
8704 # Get file handle
8705 my ($self, $fd_no, $key) = @_;
8706 return $self->{'fd'}{$fd_no,$key};
8709 sub write_block($) {
8710 my $self = shift;
8711 my $stdin_fh = $self->fh(0,"w");
8712 if(fork()) {
8713 # Close in parent
8714 close $stdin_fh;
8715 } else {
8716 # If writing is to a closed pipe:
8717 # Do not call signal handler, but let nothing be written
8718 local $SIG{PIPE} = undef;
8720 for my $part (
8721 grep { defined $_ }
8722 $self->{'header'},$self->{'block'}) {
8723 # syswrite may not write all in one go,
8724 # so make sure everything is written.
8725 my $written;
8726 while($written = syswrite($stdin_fh,$$part)) {
8727 substr($$part,0,$written) = "";
8730 close $stdin_fh;
8731 exit(0);
8735 sub write($) {
8736 my $self = shift;
8737 my $remaining_ref = shift;
8738 my $stdin_fh = $self->fh(0,"w");
8740 my $len = length $$remaining_ref;
8741 # syswrite may not write all in one go,
8742 # so make sure everything is written.
8743 my $written;
8745 # If writing is to a closed pipe:
8746 # Do not call signal handler, but let nothing be written
8747 local $SIG{PIPE} = undef;
8748 while($written = syswrite($stdin_fh,$$remaining_ref)){
8749 substr($$remaining_ref,0,$written) = "";
8753 sub set_block($$$$$$) {
8754 # Copy stdin buffer from $block_ref up to $endpos
8755 # Prepend with $header_ref if virgin (i.e. not --roundrobin)
8756 # Remove $recstart and $recend if needed
8757 # Input:
8758 # $header_ref = ref to $header to prepend
8759 # $buffer_ref = ref to $buffer containing the block
8760 # $endpos = length of $block to pass on
8761 # $recstart = --recstart regexp
8762 # $recend = --recend regexp
8763 # Returns:
8764 # N/A
8765 my $self = shift;
8766 my ($header_ref,$buffer_ref,$endpos,$recstart,$recend) = @_;
8767 $self->{'header'} = $header_ref;
8768 if($opt::roundrobin or $opt::remove_rec_sep or $opt::retries) {
8769 my $a = "";
8770 if(($opt::roundrobin or $opt::retries) and $self->virgin()) {
8771 $a .= $$header_ref;
8773 # Job is no longer virgin
8774 $self->set_virgin(0);
8775 # Make a full copy because $buffer will change
8776 $a .= substr($$buffer_ref,0,$endpos);
8777 $self->{'block'} = \$a;
8778 if($opt::remove_rec_sep) {
8779 remove_rec_sep($self->{'block'},$recstart,$recend);
8781 $self->{'block_length'} = length ${$self->{'block'}};
8782 } else {
8783 $self->set_virgin(0);
8784 for(substr($$buffer_ref,0,$endpos)) {
8785 $self->{'block'} = \$_;
8787 $self->{'block_length'} = $endpos + length ${$self->{'header'}};
8789 $self->{'block_pos'} = 0;
8790 $self->add_transfersize($self->{'block_length'});
8793 sub block_ref($) {
8794 my $self = shift;
8795 return $self->{'block'};
8798 sub block_length($) {
8799 my $self = shift;
8800 return $self->{'block_length'};
8803 sub remove_rec_sep($) {
8804 # Remove --recstart and --recend from $block
8805 # Input:
8806 # $block_ref = reference to $block to be modified
8807 # $recstart = --recstart
8808 # $recend = --recend
8809 # Uses:
8810 # $opt::regexp = Are --recstart/--recend regexp?
8811 # Returns:
8812 # N/A
8813 my ($block_ref,$recstart,$recend) = @_;
8814 # Remove record separator
8815 if($opt::regexp) {
8816 $$block_ref =~ s/$recend$recstart//gom;
8817 $$block_ref =~ s/^$recstart//os;
8818 $$block_ref =~ s/$recend$//os;
8819 } else {
8820 $$block_ref =~ s/\Q$recend$recstart\E//gom;
8821 $$block_ref =~ s/^\Q$recstart\E//os;
8822 $$block_ref =~ s/\Q$recend\E$//os;
8826 sub non_blocking_write($) {
8827 my $self = shift;
8828 my $something_written = 0;
8830 my $in = $self->fh(0,"w");
8831 my $rv = syswrite($in,
8832 substr(${$self->{'block'}},$self->{'block_pos'}));
8833 if (!defined($rv) && $! == ::EAGAIN()) {
8834 # would block - but would have written
8835 $something_written = 0;
8836 # avoid triggering auto expanding block size
8837 $Global::no_autoexpand_block ||= 1;
8838 } elsif ($self->{'block_pos'}+$rv != $self->{'block_length'}) {
8839 # incomplete write
8840 # Remove the written part
8841 $self->{'block_pos'} += $rv;
8842 $something_written = $rv;
8843 } else {
8844 # successfully wrote everything
8845 # Empty block to free memory
8846 my $a = "";
8847 $self->set_block(\$a,\$a,0,"","");
8848 $something_written = $rv;
8850 ::debug("pipe", "Non-block: ", $something_written);
8851 return $something_written;
8855 sub virgin($) {
8856 my $self = shift;
8857 return $self->{'virgin'};
8860 sub set_virgin($$) {
8861 my $self = shift;
8862 $self->{'virgin'} = shift;
8865 sub pid($) {
8866 my $self = shift;
8867 return $self->{'pid'};
8870 sub set_pid($$) {
8871 my $self = shift;
8872 $self->{'pid'} = shift;
8875 sub starttime($) {
8876 # Returns:
8877 # UNIX-timestamp this job started
8878 my $self = shift;
8879 return sprintf("%.3f",$self->{'starttime'});
8882 sub set_starttime($@) {
8883 my $self = shift;
8884 my $starttime = shift || ::now();
8885 $self->{'starttime'} = $starttime;
8886 $opt::sqlworker and
8887 $Global::sql->update("SET Starttime = ? WHERE Seq = ".$self->seq(),
8888 $starttime);
8891 sub runtime($) {
8892 # Returns:
8893 # Run time in seconds with 3 decimals
8894 my $self = shift;
8895 return sprintf("%.3f",
8896 int(($self->endtime() - $self->starttime())*1000)/1000);
8899 sub endtime($) {
8900 # Returns:
8901 # UNIX-timestamp this job ended
8902 # 0 if not ended yet
8903 my $self = shift;
8904 return ($self->{'endtime'} || 0);
8907 sub set_endtime($$) {
8908 my $self = shift;
8909 my $endtime = shift;
8910 $self->{'endtime'} = $endtime;
8911 $opt::sqlworker and
8912 $Global::sql->update("SET JobRuntime = ? WHERE Seq = ".$self->seq(),
8913 $self->runtime());
8916 sub is_timedout($) {
8917 # Is the job timedout?
8918 # Input:
8919 # $delta_time = time that the job may run
8920 # Returns:
8921 # True or false
8922 my $self = shift;
8923 my $delta_time = shift;
8924 return time > $self->{'starttime'} + $delta_time;
8927 sub kill($) {
8928 my $self = shift;
8929 $self->set_exitstatus(-1);
8930 ::kill_sleep_seq($self->pid());
8933 sub suspend($) {
8934 my $self = shift;
8935 my @pgrps = map { -$_ } $self->pid();
8936 kill "STOP", @pgrps;
8937 $self->set_suspended(1);
8940 sub set_suspended($$) {
8941 my $self = shift;
8942 $self->{'suspended'} = shift;
8945 sub suspended($) {
8946 my $self = shift;
8947 return $self->{'suspended'};
8950 sub resume($) {
8951 my $self = shift;
8952 my @pgrps = map { -$_ } $self->pid();
8953 kill "CONT", @pgrps;
8954 $self->set_suspended(0);
8957 sub failed($) {
8958 # return number of times failed for this $sshlogin
8959 # Input:
8960 # $sshlogin
8961 # Returns:
8962 # Number of times failed for $sshlogin
8963 my $self = shift;
8964 my $sshlogin = shift;
8965 return $self->{'failed'}{$sshlogin};
8968 sub failed_here($) {
8969 # return number of times failed for the current $sshlogin
8970 # Returns:
8971 # Number of times failed for this sshlogin
8972 my $self = shift;
8973 return $self->{'failed'}{$self->sshlogin()};
8976 sub add_failed($) {
8977 # increase the number of times failed for this $sshlogin
8978 my $self = shift;
8979 my $sshlogin = shift;
8980 $self->{'failed'}{$sshlogin}++;
8983 sub add_failed_here($) {
8984 # increase the number of times failed for the current $sshlogin
8985 my $self = shift;
8986 $self->{'failed'}{$self->sshlogin()}++;
8989 sub reset_failed($) {
8990 # increase the number of times failed for this $sshlogin
8991 my $self = shift;
8992 my $sshlogin = shift;
8993 delete $self->{'failed'}{$sshlogin};
8996 sub reset_failed_here($) {
8997 # increase the number of times failed for this $sshlogin
8998 my $self = shift;
8999 delete $self->{'failed'}{$self->sshlogin()};
9002 sub min_failed($) {
9003 # Returns:
9004 # the number of sshlogins this command has failed on
9005 # the minimal number of times this command has failed
9006 my $self = shift;
9007 my $min_failures =
9008 ::min(map { $self->{'failed'}{$_} } keys %{$self->{'failed'}});
9009 my $number_of_sshlogins_failed_on = scalar keys %{$self->{'failed'}};
9010 return ($number_of_sshlogins_failed_on,$min_failures);
9013 sub total_failed($) {
9014 # Returns:
9015 # $total_failures = the number of times this command has failed
9016 my $self = shift;
9017 my $total_failures = 0;
9018 for (values %{$self->{'failed'}}) {
9019 $total_failures += $_;
9021 return $total_failures;
9025 my $script;
9027 sub postpone_exit_and_cleanup {
9028 # Command to remove files and dirs (given as args) without
9029 # affecting the exit value in $?/$status.
9030 if(not $script) {
9031 $script = "perl -e '".
9032 ::spacefree(0,q{
9033 $bash=shift;
9034 $csh=shift;
9035 for(@ARGV){
9036 unlink;
9037 rmdir;
9039 if($bash=~s/(\d+)h/$1/) {
9040 exit $bash;
9042 exit $csh;
9044 # `echo \$?h` is needed to make fish not complain
9045 "' ".'"`echo \\\\\\\\\$?h`" "$status" ';
9047 return $script
9052 my $script;
9054 sub fifo_wrap() {
9055 # Script to create a fifo, run a command on the fifo
9056 # while copying STDIN to the fifo, and finally
9057 # remove the fifo and return the exit code of the command.
9058 if(not $script) {
9059 # {} == $PARALLEL_TMP for --fifo
9060 # To make it csh compatible a wrapper needs to:
9061 # * mkfifo
9062 # * spawn $command &
9063 # * cat > fifo
9064 # * waitpid to get the exit code from $command
9065 # * be less than 1000 chars long
9066 $script = "perl -e '".
9067 (::spacefree
9068 (0, q{
9069 ($s,$c,$f) = @ARGV;
9070 # mkfifo $PARALLEL_TMP
9071 system "mkfifo", $f;
9072 # spawn $shell -c $command &
9073 $pid = fork || exec $s, "-c", $c;
9074 open($o,">",$f) || die $!;
9075 # cat > $PARALLEL_TMP
9076 while(sysread(STDIN,$buf,131072)){
9077 syswrite $o, $buf;
9079 close $o;
9080 # waitpid to get the exit code from $command
9081 waitpid $pid,0;
9082 # Cleanup
9083 unlink $f;
9084 exit $?/256;
9085 }))."'";
9087 return $script;
9091 sub wrapped($) {
9092 # Wrap command with:
9093 # * --shellquote
9094 # * --nice
9095 # * --cat
9096 # * --fifo
9097 # * --sshlogin
9098 # * --pipepart (@Global::cat_prepends)
9099 # * --tee (@Global::cat_prepends)
9100 # * --pipe
9101 # * --tmux
9102 # The ordering of the wrapping is important:
9103 # * --nice/--cat/--fifo should be done on the remote machine
9104 # * --pipepart/--pipe should be done on the local machine inside --tmux
9105 # Uses:
9106 # @opt::shellquote
9107 # $opt::nice
9108 # $Global::shell
9109 # $opt::cat
9110 # $opt::fifo
9111 # @Global::cat_prepends
9112 # $opt::pipe
9113 # $opt::tmux
9114 # Returns:
9115 # $self->{'wrapped'} = the command wrapped with the above
9116 my $self = shift;
9117 if(not defined $self->{'wrapped'}) {
9118 my $command = $self->replaced();
9119 # Bug in Bash and Ksh when running multiline aliases
9120 # This will force them to run correctly, but will fail in
9121 # tcsh so we do not do it.
9122 # $command .= "\n\n";
9123 if(@opt::shellquote) {
9124 # Quote one time for each --shellquote
9125 my $c = $command;
9126 for(@opt::shellquote) {
9127 $c = ::Q($c);
9129 # Prepend "echo" (it is written in perl because
9130 # quoting '-e' causes problem in some versions and
9131 # csh's version does something wrong)
9132 $command = q(perl -e '$,=" "; print "@ARGV\n";' -- ) . ::Q($c);
9134 if($Global::parallel_env) {
9135 # If $PARALLEL_ENV set, put that in front of the command
9136 # Used for env_parallel.*
9137 if($Global::shell =~ /zsh/) {
9138 # The extra 'eval' will make aliases work, too
9139 $command = $Global::parallel_env."\n".
9140 "eval ".::Q($command);
9141 } else {
9142 $command = $Global::parallel_env."\n".$command;
9145 if($opt::cat) {
9146 # In '--cat' and '--fifo' {} == $PARALLEL_TMP.
9147 # This is to make it possible to compute $PARALLEL_TMP on
9148 # the fly when running remotely.
9149 # $ENV{PARALLEL_TMP} is set in the remote wrapper before
9150 # the command is run.
9152 # Prepend 'cat > $PARALLEL_TMP;'
9153 # Append 'unlink $PARALLEL_TMP without affecting $?'
9154 $command =
9155 'cat > $PARALLEL_TMP;'.
9156 $command.";". postpone_exit_and_cleanup().
9157 '$PARALLEL_TMP';
9158 } elsif($opt::fifo) {
9159 # Prepend fifo-wrapper. In essence:
9160 # mkfifo {}
9161 # ( $command ) &
9162 # # $command must read {}, otherwise this 'cat' will block
9163 # cat > {};
9164 # wait; rm {}
9165 # without affecting $?
9166 $command = fifo_wrap(). " ".
9167 $Global::shell. " ". ::Q($command). ' $PARALLEL_TMP'. ';';
9169 # Wrap with ssh + tranferring of files
9170 $command = $self->sshlogin_wrap($command);
9171 if(@Global::cat_prepends) {
9172 # --pipepart: prepend:
9173 # < /tmp/foo perl -e 'while(@ARGV) {
9174 # sysseek(STDIN,shift,0) || die; $left = shift;
9175 # while($read = sysread(STDIN,$buf, ($left > 131072 ? 131072 : $left))){
9176 # $left -= $read; syswrite(STDOUT,$buf);
9178 # }' 0 0 0 11 |
9180 # --pipepart --tee: prepend:
9181 # < dash-a-file
9183 # --pipe --tee: wrap:
9184 # (rm fifo; ... ) < fifo
9186 # --pipe --shard X:
9187 # (rm fifo; ... ) < fifo
9188 $command = (shift @Global::cat_prepends). "($command)".
9189 (shift @Global::cat_appends);
9190 } elsif($opt::pipe and not $opt::roundrobin) {
9191 # Wrap with EOF-detector to avoid starting $command if EOF.
9192 $command = empty_input_wrapper($command);
9194 if($opt::tmux) {
9195 # Wrap command with 'tmux'
9196 $command = $self->tmux_wrap($command);
9198 if($Global::cshell
9200 length $command > 499) {
9201 # csh does not like words longer than 1000 (499 quoted)
9202 # $command = "perl -e '".base64_zip_eval()."' ".
9203 # join" ",string_zip_base64(
9204 # 'exec "'.::perl_quote_scalar($command).'"');
9205 $command = base64_wrap("exec \"$Global::shell\",'-c',\"".
9206 ::perl_quote_scalar($command).'"');
9208 $self->{'wrapped'} = $command;
9210 return $self->{'wrapped'};
9213 sub set_sshlogin($$) {
9214 my $self = shift;
9215 my $sshlogin = shift;
9216 $self->{'sshlogin'} = $sshlogin;
9217 delete $self->{'sshlogin_wrap'}; # If sshlogin is changed the wrap is wrong
9218 delete $self->{'wrapped'};
9220 if($opt::sqlworker) {
9221 # Identify worker as --sqlworker often runs on different machines
9222 my $host = $sshlogin->string();
9223 if($host eq ":") {
9224 $host = ::hostname();
9226 $Global::sql->update("SET Host = ? WHERE Seq = ".$self->seq(), $host);
9230 sub sshlogin($) {
9231 my $self = shift;
9232 return $self->{'sshlogin'};
9235 sub string_base64($) {
9236 # Base64 encode strings into 1000 byte blocks.
9237 # 1000 bytes is the largest word size csh supports
9238 # Input:
9239 # @strings = to be encoded
9240 # Returns:
9241 # @base64 = 1000 byte block
9242 $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;";
9243 my @base64 = unpack("(A1000)*",encode_base64((join"",@_),""));
9244 return @base64;
9247 sub string_zip_base64($) {
9248 # Pipe string through 'bzip2 -9' and base64 encode it into 1000
9249 # byte blocks.
9250 # 1000 bytes is the largest word size csh supports
9251 # Zipping will make exporting big environments work, too
9252 # Input:
9253 # @strings = to be encoded
9254 # Returns:
9255 # @base64 = 1000 byte block
9256 my($zipin_fh, $zipout_fh,@base64);
9257 ::open3($zipin_fh,$zipout_fh,">&STDERR","bzip2 -9");
9258 if(fork) {
9259 close $zipin_fh;
9260 $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;";
9261 # Split base64 encoded into 1000 byte blocks
9262 @base64 = unpack("(A1000)*",encode_base64((join"",<$zipout_fh>),""));
9263 close $zipout_fh;
9264 } else {
9265 close $zipout_fh;
9266 print $zipin_fh @_;
9267 close $zipin_fh;
9268 exit;
9270 ::debug("base64","Orig:@_\nAs bzip2 base64:@base64\n");
9271 return @base64;
9274 sub base64_zip_eval() {
9275 # Script that:
9276 # * reads base64 strings from @ARGV
9277 # * decodes them
9278 # * pipes through 'bzip2 -dc'
9279 # * evals the result
9280 # Reverse of string_zip_base64 + eval
9281 # Will be wrapped in ' so single quote is forbidden
9282 # Returns:
9283 # $script = 1-liner for perl -e
9284 my $script = ::spacefree(0,q{
9285 @GNU_Parallel = split /_/, "use_IPC::Open3;_use_MIME::Base64";
9286 eval"@GNU_Parallel";
9287 $chld = $SIG{CHLD};
9288 $SIG{CHLD} = "IGNORE";
9289 # Search for bzip2. Not found => use default path
9290 my $zip = (grep { -x $_ } "/usr/local/bin/bzip2")[0] || "bzip2";
9291 # $in = stdin on $zip, $out = stdout from $zip
9292 # Forget my() to save chars for csh
9293 # my($in, $out,$eval);
9294 open3($in,$out,">&STDERR",$zip,"-dc");
9295 if(my $perlpid = fork) {
9296 close $in;
9297 $eval = join "", <$out>;
9298 close $out;
9299 } else {
9300 close $out;
9301 # Pipe decoded base64 into 'bzip2 -dc'
9302 print $in (decode_base64(join"",@ARGV));
9303 close $in;
9304 exit;
9306 wait;
9307 $SIG{CHLD} = $chld;
9308 eval $eval;
9310 ::debug("base64",$script,"\n");
9311 return $script;
9314 sub base64_wrap($) {
9315 # base64 encode Perl code
9316 # Split it into chunks of < 1000 bytes
9317 # Prepend it with a decoder that eval's it
9318 # Input:
9319 # $eval_string = Perl code to run
9320 # Returns:
9321 # $shell_command = shell command that runs $eval_string
9322 my $eval_string = shift;
9323 return
9324 "perl -e ".
9325 ::Q(base64_zip_eval())." ".
9326 join" ",::shell_quote(string_zip_base64($eval_string));
9329 sub base64_eval($) {
9330 # Script that:
9331 # * reads base64 strings from @ARGV
9332 # * decodes them
9333 # * evals the result
9334 # Reverse of string_base64 + eval
9335 # Will be wrapped in ' so single quote is forbidden.
9336 # Spaces are stripped so spaces cannot be significant.
9337 # The funny 'use IPC::Open3'-syntax is to avoid spaces and
9338 # to make it clear that this is a GNU Parallel command
9339 # when looking at the process table.
9340 # Returns:
9341 # $script = 1-liner for perl -e
9342 my $script = ::spacefree(0,q{
9343 @GNU_Parallel=("use","IPC::Open3;","use","MIME::Base64");
9344 eval "@GNU_Parallel";
9345 my $eval = decode_base64(join"",@ARGV);
9346 eval $eval;
9348 ::debug("base64",$script,"\n");
9349 return $script;
9352 sub sshlogin_wrap($) {
9353 # Wrap the command with the commands needed to run remotely
9354 # Input:
9355 # $command = command to run
9356 # Returns:
9357 # $self->{'sshlogin_wrap'} = command wrapped with ssh+transfer commands
9358 sub monitor_parent_sshd_script {
9359 # This script is to solve the problem of
9360 # * not mixing STDERR and STDOUT
9361 # * terminating with ctrl-c
9362 # If its parent is ssh: all good
9363 # If its parent is init(1): ssh died, so kill children
9364 my $monitor_parent_sshd_script;
9366 if(not $monitor_parent_sshd_script) {
9367 $monitor_parent_sshd_script =
9368 # This will be packed in ', so only use "
9369 ::spacefree(0,'$shell = "'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'.
9370 '$tmpdir = "'.::perl_quote_scalar($ENV{'TMPDIR'}).'";'.
9371 '$nice = '.$opt::nice.';'.
9372 '$termseq = "'.$opt::termseq.'";'.
9374 # Set $PARALLEL_TMP to a non-existent file name in $TMPDIR
9375 do {
9376 $ENV{PARALLEL_TMP} = $tmpdir."/par".
9377 join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
9378 } while(-e $ENV{PARALLEL_TMP});
9379 $SIG{CHLD} = sub { $done = 1; };
9380 $pid = fork;
9381 unless($pid) {
9382 # Make own process group to be able to kill HUP it later
9383 eval { setpgrp };
9384 eval { setpriority(0,0,$nice) };
9385 exec $shell, "-c", ($bashfunc."@ARGV");
9386 die "exec: $!\n";
9388 my $parent = getppid;
9389 do {
9390 # Parent pid is not changed, so sshd is alive
9391 # Exponential sleep up to 1 sec
9392 $s = $s < 1 ? 0.001 + $s * 1.03 : $s;
9393 select(undef, undef, undef, $s);
9394 } until ($done || getppid != $parent);
9395 if(not $done) {
9396 # Kill as per --termseq
9397 my @term_seq = split/,/,$termseq;
9398 if(not @term_seq) {
9399 @term_seq = ("TERM",200,"TERM",100,"TERM",50,"KILL",25);
9401 while(@term_seq && kill(0,-$pid)) {
9402 kill(shift @term_seq, -$pid);
9403 select(undef, undef, undef, (shift @term_seq)/1000);
9406 wait;
9407 exit ($?&127 ? 128+($?&127) : 1+$?>>8)
9410 return $monitor_parent_sshd_script;
9413 sub vars_to_export {
9414 # Uses:
9415 # @opt::env
9416 my @vars = ("parallel_bash_environment");
9417 for my $varstring (@opt::env) {
9418 # Split up --env VAR1,VAR2
9419 push @vars, split /,/, $varstring;
9421 for (@vars) {
9422 if(-r $_ and not -d) {
9423 # Read as environment definition bug #44041
9424 # TODO parse this
9425 my $fh = ::open_or_exit($_);
9426 $Global::envdef = join("",<$fh>);
9427 close $fh;
9430 if(grep { /^_$/ } @vars) {
9431 local $/ = "\n";
9432 # --env _
9433 # Include all vars that are not in a clean environment
9434 if(open(my $vars_fh, "<", $Global::config_dir . "/ignored_vars")) {
9435 my @ignore = <$vars_fh>;
9436 chomp @ignore;
9437 my %ignore;
9438 @ignore{@ignore} = @ignore;
9439 close $vars_fh;
9440 push @vars, grep { not defined $ignore{$_} } keys %ENV;
9441 @vars = grep { not /^_$/ } @vars;
9442 } else {
9443 ::error("Run '$Global::progname --record-env' ".
9444 "in a clean environment first.");
9445 ::wait_and_exit(255);
9448 # Duplicate vars as BASH functions to include post-shellshock functions (v1+v2)
9449 # So --env myfunc should look for BASH_FUNC_myfunc() and BASH_FUNC_myfunc%%
9451 push(@vars, "PARALLEL_PID", "PARALLEL_SEQ",
9452 "PARALLEL_SSHLOGIN", "PARALLEL_SSHHOST",
9453 "PARALLEL_HOSTGROUPS", "PARALLEL_ARGHOSTGROUPS",
9454 "PARALLEL_JOBSLOT", map { ("BASH_FUNC_$_()",
9455 "BASH_FUNC_$_%%") } @vars);
9456 # Keep only defined variables
9457 return grep { defined($ENV{$_}) } @vars;
9460 sub env_as_eval {
9461 # Returns:
9462 # $eval = '$ENV{"..."}=...; ...'
9463 my @vars = vars_to_export();
9464 my $csh_friendly = not grep { /\n/ } @ENV{@vars};
9465 my @bash_functions = grep { substr($ENV{$_},0,4) eq "() {" } @vars;
9466 my @non_functions = (grep { !/PARALLEL_ENV/ }
9467 grep { substr($ENV{$_},0,4) ne "() {" } @vars);
9469 # eval of @envset will set %ENV
9470 my $envset = join"", map {
9471 '$ENV{"'.::perl_quote_scalar($_).'"}="'.
9472 ::perl_quote_scalar($ENV{$_}).'";'; } @non_functions;
9474 # running @bashfunc on the command line, will set the functions
9475 my @bashfunc = map {
9476 my $v=$_;
9477 s/BASH_FUNC_(.*)(\(\)|%%)/$1/;
9478 "$_$ENV{$v};\nexport -f $_ 2> /dev/null;\n" } @bash_functions;
9479 # eval $bashfuncset will set $bashfunc
9480 my $bashfuncset;
9481 if(@bashfunc) {
9482 # Functions are not supported for all shells
9483 if($Global::shell !~ m:(^|/)(ash|bash|rbash|zsh|rzsh|dash|ksh):) {
9484 ::warning("Shell functions may not be supported in $Global::shell.");
9486 $bashfuncset =
9487 '@bash_functions=qw('."@bash_functions".");".
9488 ::spacefree(1,'$shell="'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'.q{
9489 if($shell=~/csh/) {
9490 print STDERR "CSH/TCSH DO NOT SUPPORT newlines IN VARIABLES/FUNCTIONS. Unset @bash_functions\n";
9491 exec "false";
9494 "\n".'$bashfunc = "'.::perl_quote_scalar("@bashfunc").'";';
9495 } else {
9496 $bashfuncset = '$bashfunc = "";'
9498 if($ENV{'parallel_bash_environment'}) {
9499 $bashfuncset .= '$bashfunc .= "eval\ \"\$parallel_bash_environment\"\;";';
9501 ::debug("base64",$envset,$bashfuncset,"\n");
9502 return $csh_friendly,$envset,$bashfuncset;
9505 my $self = shift;
9506 my $command = shift;
9507 # TODO test that *sh -c 'parallel --env' use *sh
9508 if(not defined $self->{'sshlogin_wrap'}{$command}) {
9509 my $sshlogin = $self->sshlogin();
9510 my $serverlogin = $sshlogin->serverlogin();
9511 my $quoted_remote_command;
9512 $ENV{'PARALLEL_SEQ'} = $self->seq();
9513 $ENV{'PARALLEL_JOBSLOT'} = $self->slot();
9514 $ENV{'PARALLEL_SSHLOGIN'} = $sshlogin->string();
9515 $ENV{'PARALLEL_SSHHOST'} = $sshlogin->serverlogin();
9516 if ($opt::hostgroups) {
9517 $ENV{'PARALLEL_HOSTGROUPS'} = join '+', $sshlogin->hostgroups();
9518 $ENV{'PARALLEL_ARGHOSTGROUPS'} = join '+', $self->hostgroups();
9520 $ENV{'PARALLEL_PID'} = $$;
9521 if($serverlogin eq ":") {
9522 if($opt::workdir) {
9523 # Create workdir if needed. Then cd to it.
9524 my $wd = $self->workdir();
9525 if($opt::workdir eq "." or $opt::workdir eq "...") {
9526 # If $wd does not start with '/': Prepend $HOME
9527 $wd =~ s:^([^/]):$ENV{'HOME'}/$1:;
9529 ::mkdir_or_die($wd);
9530 my $post = "";
9531 if($opt::workdir eq "...") {
9532 $post = ";".exitstatuswrapper("rm -rf ".::Q($wd).";");
9535 $command = "cd ".::Q($wd)." || exit 255; " .
9536 $command . $post;;
9538 if(@opt::env) {
9539 # Prepend with environment setter, which sets functions in zsh
9540 my ($csh_friendly,$envset,$bashfuncset) = env_as_eval();
9541 my $perl_code = $envset.$bashfuncset.
9542 '@ARGV="'.::perl_quote_scalar($command).'";'.
9543 "exec\"$Global::shell\",\"-c\",\(\$bashfunc.\"\@ARGV\"\)\;die\"exec:\$\!\\n\"\;";
9544 if(length $perl_code > 999
9546 not $csh_friendly
9548 $command =~ /\n/) {
9549 # csh does not deal well with > 1000 chars in one word
9550 # csh does not deal well with $ENV with \n
9551 $self->{'sshlogin_wrap'}{$command} = base64_wrap($perl_code);
9552 } else {
9553 $self->{'sshlogin_wrap'}{$command} = "perl -e ".::Q($perl_code);
9555 } else {
9556 $self->{'sshlogin_wrap'}{$command} = $command;
9558 } else {
9559 my $pwd = "";
9560 if($opt::workdir) {
9561 # Create remote workdir if needed. Then cd to it.
9562 my $wd = ::pQ($self->workdir());
9563 $pwd = qq{system("mkdir","-p","--","$wd"); chdir "$wd" ||}.
9564 qq{print(STDERR "parallel: Cannot chdir to $wd\\n") && exit 255;};
9566 my ($csh_friendly,$envset,$bashfuncset) = env_as_eval();
9567 my $remote_command = $pwd.$envset.$bashfuncset.
9568 '@ARGV="'.::perl_quote_scalar($command).'";'.
9569 monitor_parent_sshd_script();
9570 $quoted_remote_command = "perl -e ". ::Q($remote_command);
9571 my $dq_remote_command = ::Q($quoted_remote_command);
9572 if(length $dq_remote_command > 999
9574 not $csh_friendly
9576 $command =~ /\n/) {
9577 # csh does not deal well with > 1000 chars in one word
9578 # csh does not deal well with $ENV with \n
9579 $quoted_remote_command =
9580 "perl -e ". ::Q(::Q(base64_zip_eval()))." ".
9581 join" ",::shell_quote(::shell_quote(string_zip_base64($remote_command)));
9582 } else {
9583 $quoted_remote_command = $dq_remote_command;
9586 my $sshcmd = $sshlogin->sshcommand();
9587 my ($pre,$post,$cleanup)=("","","");
9588 # --transfer
9589 $pre .= $self->sshtransfer();
9590 # --return
9591 $post .= $self->sshreturn();
9592 # --cleanup
9593 $post .= $self->sshcleanup();
9594 if($post) {
9595 # We need to save the exit status of the job
9596 $post = exitstatuswrapper($post);
9598 $self->{'sshlogin_wrap'}{$command} =
9599 ($pre
9600 . "$sshcmd $serverlogin -- exec "
9601 . $quoted_remote_command
9602 . ";"
9603 . $post);
9606 return $self->{'sshlogin_wrap'}{$command};
9609 sub fill_templates($) {
9610 # Replace replacement strings in template(s)
9611 # Returns:
9612 # @templates - File names of replaced templates
9613 my $self = shift;
9615 if(%opt::template) {
9616 my @template_name =
9617 map { $self->{'commandline'}->replace_placeholders([$_],0,0) }
9618 @{$self->{'commandline'}{'template_names'}};
9619 ::debug("tmpl","Names: @template_name\n");
9620 for(my $i = 0; $i <= $#template_name; $i++) {
9621 open(my $fh, ">", $template_name[$i]) || die;
9622 print $fh $self->{'commandline'}->
9623 replace_placeholders([$self->{'commandline'}{'template_contents'}[$i]],0,0);
9624 close $fh;
9626 if($opt::cleanup) {
9627 $self->add_rm(@template_name);
9632 sub filter($) {
9633 # Replace replacement strings in filter(s) and evaluate them
9634 # Returns:
9635 # $run - 1=yes, undef=no
9636 my $self = shift;
9637 my $run = 1;
9638 if(@opt::filter) {
9639 for my $eval ($self->{'commandline'}->
9640 replace_placeholders(\@opt::filter,0,0)) {
9641 $run &&= eval $eval;
9643 $self->{'commandline'}{'skip'} ||= not $run;
9645 return $run;
9648 sub transfer($) {
9649 # Files to transfer
9650 # Non-quoted and with {...} substituted
9651 # Returns:
9652 # @transfer - File names of files to transfer
9653 my $self = shift;
9655 my $transfersize = 0;
9656 my @transfer = $self->{'commandline'}->
9657 replace_placeholders($self->{'commandline'}{'transfer_files'},0,0);
9658 for(@transfer) {
9659 # filesize
9660 if(-e $_) {
9661 $transfersize += (stat($_))[7];
9664 $self->add_transfersize($transfersize);
9665 return @transfer;
9668 sub transfersize($) {
9669 my $self = shift;
9670 return $self->{'transfersize'};
9673 sub add_transfersize($) {
9674 my $self = shift;
9675 my $transfersize = shift;
9676 $self->{'transfersize'} += $transfersize;
9677 $opt::sqlworker and
9678 $Global::sql->update("SET Send = ? WHERE Seq = ".$self->seq(),
9679 $self->{'transfersize'});
9682 sub sshtransfer($) {
9683 # Returns for each transfer file:
9684 # rsync $file remote:$workdir
9685 my $self = shift;
9686 my @pre;
9687 my $sshlogin = $self->sshlogin();
9688 my $workdir = $self->workdir();
9689 for my $file ($self->transfer()) {
9690 push @pre, $sshlogin->rsync_transfer_cmd($file,$workdir).";";
9692 return join("",@pre);
9695 sub return($) {
9696 # Files to return
9697 # Non-quoted and with {...} substituted
9698 # Returns:
9699 # @non_quoted_filenames
9700 my $self = shift;
9701 return $self->{'commandline'}->
9702 replace_placeholders($self->{'commandline'}{'return_files'},0,0);
9705 sub returnsize($) {
9706 # This is called after the job has finished
9707 # Returns:
9708 # $number_of_bytes transferred in return
9709 my $self = shift;
9710 for my $file ($self->return()) {
9711 if(-e $file) {
9712 $self->{'returnsize'} += (stat($file))[7];
9715 return $self->{'returnsize'};
9718 sub add_returnsize($) {
9719 my $self = shift;
9720 my $returnsize = shift;
9721 $self->{'returnsize'} += $returnsize;
9722 $opt::sqlworker and
9723 $Global::sql->update("SET Receive = ? WHERE Seq = ".$self->seq(),
9724 $self->{'returnsize'});
9727 sub sshreturn($) {
9728 # Returns for each return-file:
9729 # rsync remote:$workdir/$file .
9730 my $self = shift;
9731 my $sshlogin = $self->sshlogin();
9732 my $sshcmd = $sshlogin->sshcommand();
9733 my $serverlogin = $sshlogin->serverlogin();
9734 my $rsync_opts = $ENV{'PARALLEL_RSYNC_OPTS'}. " -e". ::Q($sshcmd);
9735 my $pre = "";
9736 for my $file ($self->return()) {
9737 $file =~ s:^\./::g; # Remove ./ if any
9738 my $relpath = ($file !~ m:^/:) || ($file =~ m:/\./:); # Is the path relative or /./?
9739 my $cd = "";
9740 my $wd = "";
9741 if($relpath) {
9742 # rsync -avR /foo/./bar/baz.c remote:/tmp/
9743 # == (on old systems)
9744 # rsync -avR --rsync-path="cd /foo; rsync" remote:bar/baz.c /tmp/
9745 $wd = ::shell_quote_file($self->workdir()."/");
9747 # Only load File::Basename if actually needed
9748 $Global::use{"File::Basename"} ||= eval "use File::Basename; 1;";
9749 # dir/./file means relative to dir, so remove dir on remote
9750 $file =~ m:(.*)/\./:;
9751 my $basedir = $1 ? ::shell_quote_file($1."/") : "";
9752 my $nobasedir = $file;
9753 $nobasedir =~ s:.*/\./::;
9754 $cd = ::shell_quote_file(::dirname($nobasedir));
9755 my $rsync_cd = '--rsync-path='.::Q("cd $wd$cd; rsync");
9756 my $basename = ::Q(::shell_quote_file(::basename($file)));
9757 # --return
9758 # mkdir -p /home/tange/dir/subdir/;
9759 # rsync (--protocol 30) -rlDzR
9760 # --rsync-path="cd /home/tange/dir/subdir/; rsync"
9761 # server:file.gz /home/tange/dir/subdir/
9762 $pre .= "mkdir -p $basedir$cd && ". $sshlogin->rsync().
9763 " $rsync_cd $rsync_opts $serverlogin:".
9764 $basename . " ".$basedir.$cd.";";
9766 return $pre;
9769 sub sshcleanup($) {
9770 # Return the sshcommand needed to remove the file
9771 # Returns:
9772 # ssh command needed to remove files from sshlogin
9773 my $self = shift;
9774 my $sshlogin = $self->sshlogin();
9775 my $sshcmd = $sshlogin->sshcommand();
9776 my $serverlogin = $sshlogin->serverlogin();
9777 my $workdir = $self->workdir();
9778 my $cleancmd = "";
9780 for my $file ($self->remote_cleanup()) {
9781 my @subworkdirs = parentdirs_of($file);
9782 $cleancmd .= $sshlogin->cleanup_cmd($file,$workdir).";";
9784 if(defined $opt::workdir and $opt::workdir eq "...") {
9785 $cleancmd .= "$sshcmd $serverlogin -- rm -rf " . ::Q($workdir).';';
9787 return $cleancmd;
9790 sub remote_cleanup($) {
9791 # Returns:
9792 # Files to remove at cleanup
9793 my $self = shift;
9794 if($opt::cleanup) {
9795 my @transfer = $self->transfer();
9796 my @return = $self->return();
9797 return (@transfer,@return);
9798 } else {
9799 return ();
9803 sub exitstatuswrapper(@) {
9804 # Input:
9805 # @shellcode = shell code to execute
9806 # Returns:
9807 # shell script that returns current status after executing @shellcode
9808 if($Global::cshell) {
9809 return ('set _EXIT_status=$status; ' .
9810 join(" ",@_).
9811 'exit $_EXIT_status;');
9812 } else {
9813 return ('_EXIT_status=$?; ' .
9814 join(" ",@_).
9815 'exit $_EXIT_status;');
9819 sub workdir($) {
9820 # Returns:
9821 # the workdir on a remote machine
9822 my $self = shift;
9823 if(not defined $self->{'workdir'}) {
9824 my $workdir;
9825 if(defined $opt::workdir) {
9826 if($opt::workdir eq ".") {
9827 # . means current dir
9828 my $home = $ENV{'HOME'};
9829 eval 'use Cwd';
9830 my $cwd = cwd();
9831 $workdir = $cwd;
9832 if($home) {
9833 # If homedir exists: remove the homedir from
9834 # workdir if cwd starts with homedir
9835 # E.g. /home/foo/my/dir => my/dir
9836 # E.g. /tmp/my/dir => /tmp/my/dir
9837 my ($home_dev, $home_ino) = (stat($home))[0,1];
9838 my $parent = "";
9839 my @dir_parts = split(m:/:,$cwd);
9840 my $part;
9841 while(defined ($part = shift @dir_parts)) {
9842 $part eq "" and next;
9843 $parent .= "/".$part;
9844 my ($parent_dev, $parent_ino) = (stat($parent))[0,1];
9845 if($parent_dev == $home_dev and $parent_ino == $home_ino) {
9846 # dev and ino is the same: We found the homedir.
9847 $workdir = join("/",@dir_parts);
9848 last;
9852 if($workdir eq "") {
9853 $workdir = ".";
9855 } elsif($opt::workdir eq "...") {
9856 $workdir = ".parallel/tmp/" . ::hostname() . "-" . $$
9857 . "-" . $self->seq();
9858 } else {
9859 $workdir = $self->{'commandline'}->
9860 replace_placeholders([$opt::workdir],0,0);
9861 #$workdir = $opt::workdir;
9862 # Rsync treats /./ special. We dont want that
9863 $workdir =~ s:/\./:/:g; # Remove /./
9864 $workdir =~ s:(.)/+$:$1:; # Remove ending / if any
9865 $workdir =~ s:^\./::g; # Remove starting ./ if any
9867 } else {
9868 $workdir = ".";
9870 $self->{'workdir'} = $workdir;
9872 return $self->{'workdir'};
9875 sub parentdirs_of($) {
9876 # Return:
9877 # all parentdirs except . of this dir or file - sorted desc by length
9878 my $d = shift;
9879 my @parents = ();
9880 while($d =~ s:/[^/]+$::) {
9881 if($d ne ".") {
9882 push @parents, $d;
9885 return @parents;
9888 sub start($) {
9889 # Setup STDOUT and STDERR for a job and start it.
9890 # Returns:
9891 # job-object or undef if job not to run
9893 sub open3_setpgrp_internal {
9894 # Run open3+setpgrp followed by the command
9895 # Input:
9896 # $stdin_fh = Filehandle to use as STDIN
9897 # $stdout_fh = Filehandle to use as STDOUT
9898 # $stderr_fh = Filehandle to use as STDERR
9899 # $command = Command to run
9900 # Returns:
9901 # $pid = Process group of job started
9902 my ($stdin_fh,$stdout_fh,$stderr_fh,$command) = @_;
9903 my $pid;
9904 local (*OUT,*ERR);
9905 open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!");
9906 open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!");
9907 # The eval is needed to catch exception from open3
9908 eval {
9909 if(not $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", "-")) {
9910 # Each child gets its own process group to make it safe to killall
9911 eval{ setpgrp(0,0) };
9912 eval{ setpriority(0,0,$opt::nice) };
9913 exec($Global::shell,"-c",$command)
9914 || ::die_bug("open3-$stdin_fh $command");
9917 return $pid;
9920 sub open3_setpgrp_external {
9921 # Run open3 on $command wrapped with a perl script doing setpgrp
9922 # Works on systems that do not support open3(,,,"-")
9923 # Input:
9924 # $stdin_fh = Filehandle to use as STDIN
9925 # $stdout_fh = Filehandle to use as STDOUT
9926 # $stderr_fh = Filehandle to use as STDERR
9927 # $command = Command to run
9928 # Returns:
9929 # $pid = Process group of job started
9930 my ($stdin_fh,$stdout_fh,$stderr_fh,$command) = @_;
9931 local (*OUT,*ERR);
9932 open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!");
9933 open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!");
9935 my $pid;
9936 my @setpgrp_wrap =
9937 ('perl','-e',
9938 "eval\{setpgrp\}\;eval\{setpriority\(0,0,$opt::nice\)\}\;".
9939 "exec '$Global::shell', '-c', \@ARGV");
9940 # The eval is needed to catch exception from open3
9941 eval {
9942 $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", @setpgrp_wrap, $command)
9943 || ::die_bug("open3-$stdin_fh");
9946 return $pid;
9949 sub redefine_open3_setpgrp {
9950 my $setgprp_cache = shift;
9951 # Select and run open3_setpgrp_internal/open3_setpgrp_external
9952 no warnings 'redefine';
9953 my ($outfh,$name) = ::tmpfile(SUFFIX => ".tst");
9954 # Test to see if open3(x,x,x,"-") is fully supported
9955 # Can an exported bash function be called via open3?
9956 my $script = 'if($pid=::open3($i,$o,$e,"-")) { wait; } '.
9957 'else { exec("bash","-c","testfun && true"); }';
9958 my $bash =
9959 ::shell_quote_scalar_default(
9960 "testfun() { rm $name; }; export -f testfun; ".
9961 "perl -MIPC::Open3 -e ".
9962 ::shell_quote_scalar_default($script)
9964 my $redefine_eval;
9965 # Redirect STDERR temporarily,
9966 # so errors on MacOS X are ignored.
9967 open my $saveerr, ">&STDERR";
9968 open STDERR, '>', "/dev/null";
9969 # Run the test
9970 ::debug("init",qq{bash -c $bash 2>/dev/null});
9971 qx{ bash -c $bash 2>/dev/null };
9972 open STDERR, ">&", $saveerr;
9974 if(-e $name) {
9975 # Does not support open3(x,x,x,"-")
9976 # or does not have bash:
9977 # Use (slow) external version
9978 unlink($name);
9979 $redefine_eval = '*open3_setpgrp = \&open3_setpgrp_external';
9980 ::debug("init","open3_setpgrp_external chosen\n");
9981 } else {
9982 # Supports open3(x,x,x,"-")
9983 # This is 0.5 ms faster to run
9984 $redefine_eval = '*open3_setpgrp = \&open3_setpgrp_internal';
9985 ::debug("init","open3_setpgrp_internal chosen\n");
9987 if(open(my $fh, ">", $setgprp_cache)) {
9988 print $fh $redefine_eval;
9989 close $fh;
9990 } else {
9991 ::debug("init","Cannot write to $setgprp_cache");
9993 eval $redefine_eval;
9996 sub open3_setpgrp {
9997 my $setgprp_cache = $Global::cache_dir . "/tmp/sshlogin/" .
9998 ::hostname() . "/setpgrp_func";
9999 sub read_cache() {
10000 -e $setgprp_cache || return 0;
10001 local $/ = undef;
10002 open(my $fh, "<", $setgprp_cache) || return 0;
10003 eval <$fh> || return 0;
10004 close $fh;
10005 return 1;
10007 if(not read_cache()) {
10008 redefine_open3_setpgrp($setgprp_cache);
10010 # The sub is now redefined. Call it
10011 return open3_setpgrp(@_);
10014 my $job = shift;
10015 # Get the shell command to be executed (possibly with ssh infront).
10016 my $command = $job->wrapped();
10017 my $pid;
10019 if($Global::interactive or $Global::stderr_verbose) {
10020 $job->interactive_start();
10022 # Must be run after $job->interactive_start():
10023 # $job->interactive_start() may call $job->skip()
10024 if($job->{'commandline'}{'skip'}
10026 not $job->filter()) {
10027 # $job->skip() was called or job filtered
10028 $command = "true";
10030 $job->openoutputfiles();
10031 $job->print_verbose_dryrun();
10032 my($stdout_fh,$stderr_fh) = ($job->fh(1,"w"),$job->fh(2,"w"));
10033 if($opt::dryrun or $opt::sqlmaster) { $command = "true"; }
10034 $ENV{'PARALLEL_SEQ'} = $job->seq();
10035 $ENV{'PARALLEL_PID'} = $$;
10036 $ENV{'PARALLEL_JOBSLOT'} = $job->slot();
10037 $ENV{'PARALLEL_TMP'} = ::tmpname("par");
10038 $job->add_rm($ENV{'PARALLEL_TMP'});
10039 $job->fill_templates();
10040 ::debug("run", $Global::total_running, " processes . Starting (",
10041 $job->seq(), "): $command\n");
10042 if($opt::pipe) {
10043 my ($stdin_fh) = ::gensym();
10044 $pid = open3_setpgrp($stdin_fh,$stdout_fh,$stderr_fh,$command);
10045 if($opt::roundrobin and not $opt::keeporder) {
10046 # --keep-order will make sure the order will be reproducible
10047 ::set_fh_non_blocking($stdin_fh);
10049 $job->set_fh(0,"w",$stdin_fh);
10050 if($opt::tee or $opt::shard or $opt::bin) { $job->set_virgin(0); }
10051 } elsif ($opt::tty and -c "/dev/tty" and
10052 open(my $devtty_fh, "<", "/dev/tty")) {
10053 # Give /dev/tty to the command if no one else is using it
10054 # The eval is needed to catch exception from open3
10055 local (*IN,*OUT,*ERR);
10056 open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!");
10057 open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!");
10058 *IN = $devtty_fh;
10059 # The eval is needed to catch exception from open3
10060 my @wrap = ('perl','-e',
10061 "eval\{setpriority\(0,0,$opt::nice\)\}\;".
10062 "exec '$Global::shell', '-c', \@ARGV");
10063 eval {
10064 $pid = ::open3("<&IN", ">&OUT", ">&ERR", @wrap, $command)
10065 || ::die_bug("open3-/dev/tty");
10068 close $devtty_fh;
10069 $job->set_virgin(0);
10070 } else {
10071 $pid = open3_setpgrp(::gensym(),$stdout_fh,$stderr_fh,$command);
10072 $job->set_virgin(0);
10074 if($pid) {
10075 # A job was started
10076 $Global::total_running++;
10077 $Global::total_started++;
10078 $job->set_pid($pid);
10079 $job->set_starttime();
10080 $Global::running{$job->pid()} = $job;
10081 if($opt::timeout) {
10082 $Global::timeoutq->insert($job);
10084 $Global::newest_job = $job;
10085 $Global::newest_starttime = ::now();
10086 return $job;
10087 } else {
10088 # No more processes
10089 ::debug("run", "Cannot spawn more jobs.\n");
10090 return undef;
10094 sub interactive_start($) {
10095 my $self = shift;
10096 my $command = $self->wrapped();
10097 if($Global::interactive) {
10098 my $answer;
10099 ::status_no_nl("$command ?...");
10101 open(my $tty_fh, "<", "/dev/tty") || ::die_bug("interactive-tty");
10102 $answer = <$tty_fh>;
10103 close $tty_fh;
10104 # Sometime we get an empty string (not even \n)
10105 # Do not know why, so let us just ignore it and try again
10106 } while(length $answer < 1);
10107 if (not ($answer =~ /^\s*y/i)) {
10108 $self->{'commandline'}->skip();
10110 } else {
10111 print $Global::original_stderr "$command\n";
10116 my $tmuxsocket;
10118 sub tmux_wrap($) {
10119 # Wrap command with tmux for session pPID
10120 # Input:
10121 # $actual_command = the actual command being run (incl ssh wrap)
10122 my $self = shift;
10123 my $actual_command = shift;
10124 # Temporary file name. Used for fifo to communicate exit val
10125 my $tmpfifo = ::tmpname("tmx");
10126 $self->add_rm($tmpfifo);
10128 if(length($tmpfifo) >=100) {
10129 ::error("tmux does not support sockets with path > 100.");
10130 ::wait_and_exit(255);
10132 if($opt::tmuxpane) {
10133 # Move the command into a pane in window 0
10134 $actual_command = $ENV{'PARALLEL_TMUX'}.' joinp -t :0 ; '.
10135 $ENV{'PARALLEL_TMUX'}.' select-layout -t :0 tiled ; '.
10136 $actual_command;
10138 my $visual_command = $self->replaced();
10139 my $title = $visual_command;
10140 if($visual_command =~ /\0/) {
10141 ::error("Command line contains NUL. tmux is confused by NUL.");
10142 ::wait_and_exit(255);
10144 # ; causes problems
10145 # ascii 194-245 annoys tmux
10146 $title =~ tr/[\011-\016;\302-\365]/ /s;
10147 $title = ::Q($title);
10149 my $l_act = length($actual_command);
10150 my $l_tit = length($title);
10151 my $l_fifo = length($tmpfifo);
10152 # The line to run contains a 118 chars extra code + the title 2x
10153 my $l_tot = 2 * $l_tit + $l_act + $l_fifo;
10155 my $quoted_space75 = ::Q(" ")x75;
10156 while($l_tit < 1000 and
10158 (890 < $l_tot and $l_tot < 1350)
10160 (9250 < $l_tot and $l_tot < 9800)
10161 )) {
10162 # tmux blocks for certain lengths:
10163 # 900 < title + command < 1200
10164 # 9250 < title + command < 9800
10165 # but only if title < 1000, so expand the title with 75 spaces
10166 # The measured lengths are:
10167 # 996 < (title + whole command) < 1127
10168 # 9331 < (title + whole command) < 9636
10169 $title .= $quoted_space75;
10170 $l_tit = length($title);
10171 $l_tot = 2 * $l_tit + $l_act + $l_fifo;
10174 my $tmux;
10175 $ENV{'PARALLEL_TMUX'} ||= "tmux";
10176 if(not $tmuxsocket) {
10177 $tmuxsocket = ::tmpname("tms");
10178 if($opt::fg) {
10179 if(not fork) {
10180 # Run tmux in the foreground
10181 # Wait for the socket to appear
10182 while (not -e $tmuxsocket) { }
10183 `$ENV{'PARALLEL_TMUX'} -S $tmuxsocket attach`;
10184 exit;
10187 ::status("See output with: $ENV{'PARALLEL_TMUX'} -S $tmuxsocket attach");
10189 $tmux = "sh -c '".
10190 $ENV{'PARALLEL_TMUX'}.
10191 " -S $tmuxsocket new-session -s p$$ -d \"sleep .2\" >/dev/null 2>&1';" .
10192 $ENV{'PARALLEL_TMUX'}.
10193 " -S $tmuxsocket new-window -t p$$ -n $title";
10195 ::debug("tmux", "title len:", $l_tit, " act ", $l_act, " max ",
10196 $Limits::Command::line_max_len, " tot ",
10197 $l_tot, "\n");
10199 return "mkfifo $tmpfifo && $tmux ".
10200 # Run in tmux
10203 "(".$actual_command.');'.
10204 # The triple print is needed - otherwise the testsuite fails
10205 q[ perl -e 'while($t++<3){ print $ARGV[0],"\n" }' $?h/$status >> ].
10206 $tmpfifo."&".
10207 "echo $title; echo \007Job finished at: `date`;sleep 10"
10209 # Run outside tmux
10210 # Read a / separated line: 0h/2 for csh, 2/0 for bash.
10211 # If csh the first will be 0h, so use the second as exit value.
10212 # Otherwise just use the first value as exit value.
10213 q{; exec perl -e '$/="/";$_=<>;$c=<>;unlink $ARGV; /(\d+)h/ and exit($1);exit$c' }.$tmpfifo;
10217 sub is_already_in_results($) {
10218 # Do we already have results for this job?
10219 # Returns:
10220 # $job_already_run = bool whether there is output for this or not
10221 my $job = $_[0];
10222 if($Global::csvsep) {
10223 if($opt::joblog) {
10224 # OK: You can look for job run in joblog
10225 return 0
10226 } else {
10227 ::warning_once(
10228 "--resume --results .csv/.tsv/.json is not supported yet\n");
10229 # TODO read and parse the file
10230 return 0
10233 my $out = $job->{'commandline'}->results_out();
10234 ::debug("run", "Test ${out}stdout", -e "${out}stdout", "\n");
10235 return(-e $out."stdout" or -f $out);
10238 sub is_already_in_joblog($) {
10239 my $job = shift;
10240 return vec($Global::job_already_run,$job->seq(),1);
10243 sub set_job_in_joblog($) {
10244 my $job = shift;
10245 vec($Global::job_already_run,$job->seq(),1) = 1;
10248 sub should_be_retried($) {
10249 # Should this job be retried?
10250 # Returns
10251 # 0 - do not retry
10252 # 1 - job queued for retry
10253 my $self = shift;
10254 if (not $opt::retries) {
10255 return 0;
10257 if(not $self->exitstatus() and not $self->exitsignal()) {
10258 # Completed with success. If there is a recorded failure: forget it
10259 $self->reset_failed_here();
10260 return 0;
10261 } else {
10262 # The job failed. Should it be retried?
10263 $self->add_failed_here();
10264 my $retries = $self->{'commandline'}->
10265 replace_placeholders([$opt::retries],0,0);
10266 if($self->total_failed() == $retries) {
10267 # This has been retried enough
10268 return 0;
10269 } else {
10270 # This command should be retried
10271 $self->set_endtime(undef);
10272 $self->reset_exitstatus();
10273 $Global::JobQueue->unget($self);
10274 ::debug("run", "Retry ", $self->seq(), "\n");
10275 return 1;
10281 my (%print_later,$job_seq_to_print);
10283 sub print_earlier_jobs($) {
10284 # Print jobs whose output is postponed due to --keep-order
10285 # Returns: N/A
10286 my $job = shift;
10287 $print_later{$job->seq()} = $job;
10288 $job_seq_to_print ||= 1;
10289 my $returnsize = 0;
10290 ::debug("run", "Looking for: $job_seq_to_print ",
10291 "This: ", $job->seq(), "\n");
10292 for(;vec($Global::job_already_run,$job_seq_to_print,1);
10293 $job_seq_to_print++) {}
10294 while(my $j = $print_later{$job_seq_to_print}) {
10295 $returnsize += $j->print();
10296 if($j->endtime()) {
10297 # Job finished - look at the next
10298 delete $print_later{$job_seq_to_print};
10299 $job_seq_to_print++;
10300 next;
10301 } else {
10302 # Job not finished yet - look at it again next round
10303 last;
10306 return $returnsize;
10310 sub print($) {
10311 # Print the output of the jobs
10312 # Returns: N/A
10313 my $self = shift;
10315 ::debug("print", ">>joboutput ", $self->replaced(), "\n");
10316 if($opt::dryrun) {
10317 # Nothing was printed to this job:
10318 # cleanup tmp files if --files was set
10319 ::rm($self->fh(1,"name"));
10321 if($opt::pipe and $self->virgin() and not $opt::tee) {
10322 # Skip --joblog, --dryrun, --verbose
10323 } else {
10324 if($opt::ungroup) {
10325 # NULL returnsize = 0 returnsize
10326 $self->returnsize() or $self->add_returnsize(0);
10327 if($Global::joblog and defined $self->{'exitstatus'}) {
10328 # Add to joblog when finished
10329 $self->print_joblog();
10330 # Printing is only relevant for grouped/--line-buffer output.
10331 $opt::ungroup and return;
10334 # Check for disk full
10335 ::exit_if_disk_full();
10338 my $returnsize = $self->returnsize();
10339 for my $fdno (sort { $a <=> $b } keys %Global::fh) {
10340 # Sort by file descriptor numerically: 1,2,3,..,9,10,11
10341 $fdno == 0 and next;
10342 my $out_fh = $Global::fh{$fdno};
10343 my $in_fh = $self->fh($fdno,"r");
10344 if(not $in_fh) {
10345 if(not $Job::file_descriptor_warning_printed{$fdno}++) {
10346 # ::warning("File descriptor $fdno not defined\n");
10348 next;
10350 ::debug("print", "File descriptor $fdno (", $self->fh($fdno,"name"), "):\n");
10351 if($opt::linebuffer) {
10352 # Line buffered print out
10353 $self->print_linebuffer($fdno,$in_fh,$out_fh);
10354 } elsif($opt::files) {
10355 $self->print_files($fdno,$in_fh,$out_fh);
10356 } elsif($opt::results) {
10357 $self->print_results($fdno,$in_fh,$out_fh);
10358 } else {
10359 $self->print_normal($fdno,$in_fh,$out_fh);
10361 flush $out_fh;
10363 ::debug("print", "<<joboutput\n");
10364 if(defined $self->{'exitstatus'}
10365 and not ($self->virgin() and $opt::pipe)) {
10366 if($Global::joblog and not $opt::sqlworker) {
10367 # Add to joblog when finished
10368 $self->print_joblog();
10370 if($opt::sqlworker and not $opt::results) {
10371 $Global::sql->output($self);
10373 if($Global::csvsep) {
10374 # Add output to CSV when finished
10375 $self->print_csv();
10377 if($Global::jsonout) {
10378 $self->print_json();
10381 return $returnsize - $self->returnsize();
10385 my %jsonmap;
10387 sub print_json($) {
10388 my $self = shift;
10389 sub jsonquote($) {
10390 my $a = shift;
10391 if(not $jsonmap{"\001"}) {
10392 map { $jsonmap{sprintf("%c",$_)} =
10393 sprintf '\u%04x', $_ } 0..31;
10395 $a =~ s/\\/\\\\/g;
10396 $a =~ s/\"/\\"/g;
10397 $a =~ s/([\000-\037])/$jsonmap{$1}/g;
10398 return $a;
10401 my $cmd;
10402 if($Global::verbose <= 1) {
10403 $cmd = jsonquote($self->replaced());
10404 } else {
10405 # Verbose level > 1: Print the rsync and stuff
10406 $cmd = jsonquote(join " ", @{$self->{'commandline'}});
10408 my $record_ref = $self->{'commandline'}{'arg_list_flat_orig'};
10410 # Memory optimization: Overwrite with the joined output
10411 $self->{'output'}{1} = join("", @{$self->{'output'}{1}});
10412 $self->{'output'}{2} = join("", @{$self->{'output'}{2}});
10414 # "Seq": 12,
10415 # "Host": "/usr/bin/ssh foo@lo",
10416 # "Starttime": 1608344711.743,
10417 # "JobRuntime": 0.01,
10418 # "Send": 0,
10419 # "Receive": 10,
10420 # "Exitval": 0,
10421 # "Signal": 0,
10422 # "Command": "echo 1",
10423 # "V": [
10424 # "1"
10425 # ],
10426 # "Stdout": "1\n",
10427 # "Stderr": ""
10430 printf($Global::csv_fh
10431 q({ "Seq": %s, "Host": "%s", "Starttime": %s, "JobRuntime": %s, ).
10432 q("Send": %s, "Receive": %s, "Exitval": %s, "Signal": %s, ).
10433 q("Command": "%s", "V": [ %s ], "Stdout": "%s", "Stderr": "%s" }).
10434 "\n",
10435 $self->seq(),
10436 jsonquote($self->sshlogin()->string()),
10437 $self->starttime(), sprintf("%0.3f",$self->runtime()),
10438 $self->transfersize(), $self->returnsize(),
10439 $self->exitstatus(), $self->exitsignal(), $cmd,
10440 # \@$record_ref[1..$#$record_ref],
10441 (join ",",
10442 map { '"'.jsonquote($_).'"' } @$record_ref[1..$#$record_ref],
10444 jsonquote($self->{'output'}{1}),
10445 jsonquote($self->{'output'}{2})
10451 my $header_printed;
10453 sub print_csv($) {
10454 my $self = shift;
10455 my $cmd;
10456 if($Global::verbose <= 1) {
10457 $cmd = $self->replaced();
10458 } else {
10459 # Verbose level > 1: Print the rsync and stuff
10460 $cmd = join " ", @{$self->{'commandline'}};
10462 my $record_ref = $self->{'commandline'}{'arg_list_flat_orig'};
10464 if(not $header_printed) {
10465 # Variable headers
10466 # Normal => V1..Vn
10467 # --header : => first value from column
10468 my @V;
10469 if($opt::header) {
10470 my $i = 1;
10471 @V = (map { $Global::input_source_header{$i++} }
10472 @$record_ref[1..$#$record_ref]);
10473 } else {
10474 my $V = "V1";
10475 @V = (map { $V++ } @$record_ref[1..$#$record_ref]);
10477 print $Global::csv_fh
10478 (map { $$_ }
10479 combine_ref("Seq", "Host", "Starttime", "JobRuntime",
10480 "Send", "Receive", "Exitval", "Signal", "Command",
10482 "Stdout","Stderr"
10483 )),"\n";
10484 $header_printed++;
10486 # Memory optimization: Overwrite with the joined output
10487 $self->{'output'}{1} = join("", @{$self->{'output'}{1}});
10488 $self->{'output'}{2} = join("", @{$self->{'output'}{2}});
10489 print $Global::csv_fh
10490 (map { $$_ }
10491 combine_ref
10492 ($self->seq(),
10493 $self->sshlogin()->string(),
10494 $self->starttime(), sprintf("%0.3f",$self->runtime()),
10495 $self->transfersize(), $self->returnsize(),
10496 $self->exitstatus(), $self->exitsignal(), \$cmd,
10497 \@$record_ref[1..$#$record_ref],
10498 \$self->{'output'}{1},
10499 \$self->{'output'}{2})),"\n";
10503 sub combine_ref($) {
10504 # Inspired by Text::CSV_PP::_combine (by Makamaka Hannyaharamitu)
10505 my @part = @_;
10506 my $sep = $Global::csvsep;
10507 my $quot = '"';
10508 my @out = ();
10510 my $must_be_quoted;
10511 for my $column (@part) {
10512 # Memory optimization: Content transferred as reference
10513 if(ref $column ne "SCALAR") {
10514 # Convert all columns to scalar references
10515 my $v = $column;
10516 $column = \$v;
10518 if(not defined $$column) {
10519 $$column = '';
10520 next;
10523 $must_be_quoted = 0;
10525 if($$column =~ s/$quot/$quot$quot/go){
10526 # Replace " => ""
10527 $must_be_quoted ||=1;
10529 if($$column =~ /[\s\Q$sep\E]/o){
10530 # Put quotes around if the column contains ,
10531 $must_be_quoted ||=1;
10534 $Global::use{"bytes"} ||= eval "use bytes; 1;";
10535 if ($$column =~ /\0/) {
10536 # Contains \0 => put quotes around
10537 $must_be_quoted ||=1;
10539 if($must_be_quoted){
10540 push @out, \$sep, \$quot, $column, \$quot;
10541 } else {
10542 push @out, \$sep, $column;
10545 # Pop off a $sep
10546 shift @out;
10547 return @out;
10550 sub print_files($) {
10551 # Print the name of the file containing stdout on stdout
10552 # Uses:
10553 # $opt::pipe
10554 # $opt::group = Print when job is done
10555 # $opt::linebuffer = Print ASAP
10556 # Returns: N/A
10557 my $self = shift;
10558 my ($fdno,$in_fh,$out_fh) = @_;
10560 # If the job is dead: close printing fh. Needed for --compress
10561 close $self->fh($fdno,"w");
10562 if($? and $opt::compress) {
10563 ::error($opt::compress_program." failed.");
10564 $self->set_exitstatus(255);
10566 if($opt::compress) {
10567 # Kill the decompressor which will not be needed
10568 CORE::kill "TERM", $self->fh($fdno,"rpid");
10570 close $in_fh;
10572 if($opt::pipe and $self->virgin()) {
10573 # Nothing was printed to this job:
10574 # cleanup unused tmp files because --files was set
10575 for my $fdno (1,2) {
10576 ::rm($self->fh($fdno,"name"));
10577 ::rm($self->fh($fdno,"unlink"));
10579 } elsif($fdno == 1 and $self->fh($fdno,"name")) {
10580 print $out_fh $self->tag(),$self->fh($fdno,"name"),"\n";
10581 if($Global::membuffer) {
10582 push @{$self->{'output'}{$fdno}},
10583 $self->tag(), $self->fh($fdno,"name");
10585 $self->add_returnsize(-s $self->fh($fdno,"name"));
10586 # Mark as printed - do not print again
10587 $self->set_fh($fdno,"name",undef);
10591 sub print_linebuffer($) {
10592 my $self = shift;
10593 my ($fdno,$in_fh,$out_fh) = @_;
10594 if(defined $self->{'exitstatus'}) {
10595 # If the job is dead: close printing fh. Needed for --compress
10596 close $self->fh($fdno,"w");
10597 if($? and $opt::compress) {
10598 ::error($opt::compress_program." failed.");
10599 $self->set_exitstatus(255);
10601 if($opt::compress) {
10602 # Blocked reading in final round
10603 for my $fdno (1,2) {
10604 ::set_fh_blocking($self->fh($fdno,'r'));
10608 if(not $self->virgin()) {
10609 if($opt::files or ($opt::results and not $Global::csvsep)) {
10610 # Print filename
10611 if($fdno == 1 and not $self->fh($fdno,"printed")) {
10612 print $out_fh $self->tag(),$self->fh($fdno,"name"),"\n";
10613 if($Global::membuffer) {
10614 push(@{$self->{'output'}{$fdno}}, $self->tag(),
10615 $self->fh($fdno,"name"));
10617 $self->set_fh($fdno,"printed",1);
10619 # No need for reading $in_fh, as it is from "cat >/dev/null"
10620 } else {
10621 # Read halflines and print full lines
10622 my $outputlength = 0;
10623 my $halfline_ref = $self->{'halfline'}{$fdno};
10624 my ($buf,$i,$rv);
10625 # 1310720 gives 1.2 GB/s
10626 # 131072 gives 0.9 GB/s
10627 while($rv = sysread($in_fh, $buf,1310720)) {
10628 $outputlength += $rv;
10629 # TODO --recend
10630 # Treat both \n and \r as line end
10631 $i = ::max((rindex($buf,"\n")+1), (rindex($buf,"\r")+1));
10632 if($i) {
10633 # One or more complete lines were found
10634 if($opt::tag or defined $opt::tagstring) {
10635 # Replace ^ with $tag within the full line
10636 if($Global::cache_replacement_eval) {
10637 # Replace with the same value for tag
10638 my $tag = $self->tag();
10639 unshift @$halfline_ref, $tag;
10640 # TODO --recend that can be partially in @$halfline_ref
10641 substr($buf,0,$i-1) =~
10642 s/(?<=[\n\r])(?=.|$)/$tag/gs;
10643 # The length changed, so find the new ending pos
10644 $i = ::max((rindex($buf,"\n")+1),
10645 (rindex($buf,"\r")+1));
10646 } else {
10647 # Replace with freshly computed value of tag
10648 unshift @$halfline_ref, $self->tag();
10649 substr($buf,0,$i-1) =~
10650 s/(?<=[\n\r])(?=.|$)/$self->tag()/gse;
10651 # The length changed, so find the new ending pos
10652 $i = ::max((rindex($buf,"\n")+1),
10653 (rindex($buf,"\r")+1));
10656 # Print the partial line (halfline) and the last half
10657 print $out_fh @$halfline_ref, substr($buf,0,$i);
10658 # Buffer in memory for SQL and CSV-output
10659 if($Global::membuffer) {
10660 push(@{$self->{'output'}{$fdno}},
10661 @$halfline_ref, substr($buf,0,$i));
10663 # Remove the printed part by keeping the unprinted part
10664 @$halfline_ref = (substr($buf,$i));
10665 } else {
10666 # No newline, so append to the halfline
10667 push @$halfline_ref, $buf;
10670 $self->add_returnsize($outputlength);
10672 if(defined $self->{'exitstatus'}) {
10673 if($opt::files or ($opt::results and not $Global::csvsep)) {
10674 $self->add_returnsize(-s $self->fh($fdno,"name"));
10675 } else {
10676 # If the job is dead: print the remaining partial line
10677 # read remaining
10678 my $halfline_ref = $self->{'halfline'}{$fdno};
10679 if(grep /./, @$halfline_ref) {
10680 my $returnsize = 0;
10681 for(@{$self->{'halfline'}{$fdno}}) {
10682 $returnsize += length $_;
10684 $self->add_returnsize($returnsize);
10685 if($opt::tag or defined $opt::tagstring) {
10686 # Prepend $tag the the remaining half line
10687 unshift @$halfline_ref, $self->tag();
10689 # Print the partial line (halfline)
10690 print $out_fh @{$self->{'halfline'}{$fdno}};
10691 # Buffer in memory for SQL and CSV-output
10692 if($Global::membuffer) {
10693 push(@{$self->{'output'}{$fdno}}, @$halfline_ref);
10695 @$halfline_ref = ();
10698 if($self->fh($fdno,"rpid") and
10699 CORE::kill 0, $self->fh($fdno,"rpid")) {
10700 # decompress still running
10701 } else {
10702 # decompress done: close fh
10703 close $in_fh;
10704 if($? and $opt::compress) {
10705 ::error($opt::decompress_program." failed.");
10706 $self->set_exitstatus(255);
10713 sub free_ressources() {
10714 my $self = shift;
10715 if(not $opt::ungroup) {
10716 my $fh;
10717 for my $fdno (sort { $a <=> $b } keys %Global::fh) {
10718 $fh = $self->fh($fdno,"w");
10719 $fh and close $fh;
10720 $fh = $self->fh($fdno,"r");
10721 $fh and close $fh;
10726 sub print_parset($) {
10727 # Wrap output with shell script code to set as variables
10728 my $self = shift;
10729 my ($fdno,$in_fh,$out_fh) = @_;
10730 my $outputlength = 0;
10732 ::debug("parset","print $Global::parset");
10733 if($Global::parset eq "assoc") {
10734 # eval "`echo 'declare -A myassoc; myassoc=(
10735 # Each:
10736 # [$'a\tb']=$'a\tb\tc ddd'
10737 # End:
10738 # )'`"
10739 print '[',::Q($self->{'commandline'}->
10740 replace_placeholders(["\257<\257>"],0,0)),']=';
10741 } elsif($Global::parset eq "array") {
10742 # eval "`echo 'myassoc=(
10743 # Each:
10744 # $'a\tb\tc ddd'
10745 # End:
10746 # )'`"
10747 } elsif($Global::parset eq "var") {
10748 # var=$'a\tb\tc ddd'
10749 if(not @Global::parset_vars) {
10750 ::error("Too few named destination variables");
10751 ::wait_and_exit(255);
10753 print shift @Global::parset_vars,"=";
10755 local $/ = "\n";
10756 my $tag = $self->tag();
10757 my @out;
10758 while(<$in_fh>) {
10759 $outputlength += length $_;
10760 # Tag lines with \r, too
10761 $_ =~ s/(?<=[\r])(?=.|$)/$tag/gs;
10762 push @out, $tag,$_;
10764 # Remove last newline
10765 # This often makes it easier to use the output in shell
10766 @out and ${out[$#out]} =~ s/\n$//s;
10767 print ::Q(join("",@out)),"\n";
10768 return $outputlength;
10771 sub print_normal($) {
10772 my $self = shift;
10773 my ($fdno,$in_fh,$out_fh) = @_;
10774 my $buf;
10775 close $self->fh($fdno,"w");
10776 if($? and $opt::compress) {
10777 ::error($opt::compress_program." failed.");
10778 $self->set_exitstatus(255);
10780 if(not $self->virgin()) {
10781 seek $in_fh, 0, 0;
10782 # $in_fh is now ready for reading at position 0
10783 my $outputlength = 0;
10784 my @output;
10786 if($Global::parset and $fdno == 1) {
10787 $outputlength += $self->print_parset($fdno,$in_fh,$out_fh);
10788 } elsif($opt::tag or $opt::tagstring) {
10789 # Read line by line
10790 local $/ = "\n";
10791 my $tag = $self->tag();
10792 while(<$in_fh>) {
10793 $outputlength += length $_;
10794 # Tag lines with \r, too
10795 $_ =~ s/(?<=[\r])(?=.|$)/$tag/gs;
10796 print $out_fh $tag,$_;
10797 if($Global::membuffer) {
10798 push @{$self->{'output'}{$fdno}}, $tag, $_;
10801 } else {
10802 # Most efficient way of copying data from $in_fh to $out_fh
10803 while(sysread($in_fh,$buf,131072)) {
10804 print $out_fh $buf;
10805 $outputlength += length $buf;
10806 if($Global::membuffer) {
10807 push @{$self->{'output'}{$fdno}}, $buf;
10811 if($fdno == 1) {
10812 $self->add_returnsize($outputlength);
10814 close $in_fh;
10815 if($? and $opt::compress) {
10816 ::error($opt::decompress_program." failed.");
10817 $self->set_exitstatus(255);
10822 sub print_results($) {
10823 my $self = shift;
10824 my ($fdno,$in_fh,$out_fh) = @_;
10825 my $buf;
10826 close $self->fh($fdno,"w");
10827 if($? and $opt::compress) {
10828 ::error($opt::compress_program." failed.");
10829 $self->set_exitstatus(255);
10831 if(not $self->virgin()) {
10832 seek $in_fh, 0, 0;
10833 # $in_fh is now ready for reading at position 0
10834 my $outputlength = 0;
10835 my @output;
10837 if($Global::membuffer) {
10838 # Read data into membuffer
10839 if($opt::tag or $opt::tagstring) {
10840 # Read line by line
10841 local $/ = "\n";
10842 my $tag = $self->tag();
10843 while(<$in_fh>) {
10844 $outputlength += length $_;
10845 # Tag lines with \r, too
10846 $_ =~ s/(?<=[\r])(?=.|$)/$tag/gs;
10847 push @{$self->{'output'}{$fdno}}, $tag, $_;
10849 } else {
10850 # Most efficient way of copying data from $in_fh to $out_fh
10851 while(sysread($in_fh,$buf,131072)) {
10852 $outputlength += length $buf;
10853 push @{$self->{'output'}{$fdno}}, $buf;
10856 } else {
10857 # Not membuffer: No need to read the file
10858 if($opt::compress) {
10859 $outputlength = -1;
10860 } else {
10861 # Determine $outputlength = file length
10862 seek($in_fh, 0, 2) || ::die_bug("cannot seek result");
10863 $outputlength = tell($in_fh);
10866 if($fdno == 1) {
10867 $self->add_returnsize($outputlength);
10869 close $in_fh;
10870 if($? and $opt::compress) {
10871 ::error($opt::decompress_program." failed.");
10872 $self->set_exitstatus(255);
10877 sub print_joblog($) {
10878 my $self = shift;
10879 my $cmd;
10880 if($Global::verbose <= 1) {
10881 $cmd = $self->replaced();
10882 } else {
10883 # Verbose level > 1: Print the rsync and stuff
10884 $cmd = $self->wrapped();
10886 # Newlines make it hard to parse the joblog
10887 $cmd =~ s/\n/\0/g;
10888 print $Global::joblog
10889 join("\t", $self->seq(), $self->sshlogin()->string(),
10890 $self->starttime(), sprintf("%10.3f",$self->runtime()),
10891 $self->transfersize(), $self->returnsize(),
10892 $self->exitstatus(), $self->exitsignal(), $cmd
10893 ). "\n";
10894 flush $Global::joblog;
10895 $self->set_job_in_joblog();
10899 my @color;
10901 sub tag($) {
10902 sub init_color() {
10903 # color combinations that are readable: black/white text
10904 # on colored background, but not white on yellow
10905 @color =
10906 # Force each color code to have the same length in chars
10907 # This will make \t work as expected
10908 ((map { [sprintf("%03d",$_),"000"] }
10909 6..7,9..11,13..15,40..51,75..87,113..123,147..159,
10910 171..231,249..254),
10911 (map { [sprintf("%03d",$_),231] }
10912 1..9,12..13,16..45,52..81,88..116,124..151,153,
10913 160..180,182..185,187..189,196..214,232..252,
10914 255..254));
10915 # reorder list so adjacent colors are dissimilar
10916 # %7 and %17 were found experimentally
10917 @color = @color[
10918 sort { ($b%7 <=> $a%7) or ($a%17 <=> $b%17) } 0..$#color
10921 my $self = shift;
10922 if(not defined $self->{'tag'} or not $Global::cache_replacement_eval) {
10923 if($opt::tag or defined $opt::tagstring) {
10924 if($Global::color) {
10925 if(not @color) { init_color() }
10926 # Choose a value based on the seq
10927 my $col = @color[$self->seq() % ($#color+1)];
10928 $self->{'tag'} = "\033[48;5;".$col->[0].
10929 ";38;5;".$col->[1]."m".
10930 ($self->{'commandline'}->
10931 replace_placeholders([$opt::tagstring],0,0)).
10932 "\033[00m\t";
10933 } else {
10934 $self->{'tag'} = $self->{'commandline'}->
10935 replace_placeholders([$opt::tagstring],0,0)."\t";
10937 } else {
10938 $self->{'tag'} = "";
10941 return $self->{'tag'};
10945 sub hostgroups($) {
10946 my $self = shift;
10947 if(not defined $self->{'hostgroups'}) {
10948 $self->{'hostgroups'} =
10949 $self->{'commandline'}->{'arg_list'}[0][0]->{'hostgroups'};
10951 return @{$self->{'hostgroups'}};
10954 sub exitstatus($) {
10955 my $self = shift;
10956 return $self->{'exitstatus'};
10959 sub set_exitstatus($$) {
10960 my $self = shift;
10961 my $exitstatus = shift;
10962 if($exitstatus) {
10963 # Overwrite status if non-zero
10964 $self->{'exitstatus'} = $exitstatus;
10965 } else {
10966 # Set status but do not overwrite
10967 # Status may have been set by --timeout
10968 $self->{'exitstatus'} ||= $exitstatus;
10970 $opt::sqlworker and
10971 $Global::sql->update("SET Exitval = ? WHERE Seq = ".$self->seq(),
10972 $exitstatus);
10975 sub reset_exitstatus($) {
10976 my $self = shift;
10977 undef $self->{'exitstatus'};
10980 sub exitsignal($) {
10981 my $self = shift;
10982 return $self->{'exitsignal'};
10985 sub set_exitsignal($$) {
10986 my $self = shift;
10987 my $exitsignal = shift;
10988 $self->{'exitsignal'} = $exitsignal;
10989 $opt::sqlworker and
10990 $Global::sql->update("SET _Signal = ? WHERE Seq = ".$self->seq(),
10991 $exitsignal);
10995 my $total_jobs;
10997 sub should_we_halt {
10998 # Should we halt? Immediately? Gracefully?
10999 # Returns: N/A
11000 my $job = shift;
11001 my $limit;
11002 if($job->exitstatus() or $job->exitsignal()) {
11003 # Job failed
11004 $Global::exitstatus++;
11005 $Global::total_failed++;
11006 if($Global::halt_fail) {
11007 ::status("$Global::progname: This job failed:",
11008 $job->replaced());
11009 $limit = $Global::total_failed;
11011 } elsif($Global::halt_success) {
11012 ::status("$Global::progname: This job succeeded:",
11013 $job->replaced());
11014 $limit = $Global::total_completed - $Global::total_failed;
11016 if($Global::halt_done) {
11017 ::status("$Global::progname: This job finished:",
11018 $job->replaced());
11019 $limit = $Global::total_completed;
11021 if(not defined $limit) {
11022 return ""
11024 # --halt # => 1..100 (number of jobs failed, 101 means > 100)
11025 # --halt % => 1..100 (pct of jobs failed)
11026 if($Global::halt_pct and not $Global::halt_count) {
11027 $total_jobs ||= $Global::JobQueue->total_jobs();
11028 # From the pct compute the number of jobs that must fail/succeed
11029 $Global::halt_count = $total_jobs * $Global::halt_pct;
11031 if($limit >= $Global::halt_count) {
11032 # At least N jobs have failed/succeded/completed
11033 # or at least N% have failed/succeded/completed
11034 # So we should prepare for exit
11035 if($Global::halt_fail or $Global::halt_done) {
11036 # Set exit status
11037 if(not defined $Global::halt_exitstatus) {
11038 if($Global::halt_pct) {
11039 # --halt now,fail=X% or soon,fail=X%
11040 # --halt now,done=X% or soon,done=X%
11041 $Global::halt_exitstatus =
11042 ::ceil($Global::total_failed / $total_jobs * 100);
11043 } elsif($Global::halt_count) {
11044 # --halt now,fail=X or soon,fail=X
11045 # --halt now,done=X or soon,done=X
11046 $Global::halt_exitstatus =
11047 ::min($Global::total_failed,101);
11049 if($Global::halt_count and $Global::halt_count == 1) {
11050 # --halt now,fail=1 or soon,fail=1
11051 # --halt now,done=1 or soon,done=1
11052 # Emulate Bash's +128 if there is a signal
11053 $Global::halt_exitstatus =
11054 ($job->exitstatus()
11056 $job->exitsignal() ? $job->exitsignal() + 128 : 0);
11059 ::debug("halt","Pct: ",$Global::halt_pct,
11060 " count: ",$Global::halt_count,
11061 " status: ",$Global::halt_exitstatus,"\n");
11062 } elsif($Global::halt_success) {
11063 $Global::halt_exitstatus = 0;
11065 if($Global::halt_when eq "soon") {
11066 $Global::start_no_new_jobs ||= 1;
11067 if(scalar(keys %Global::running) > 0) {
11068 # Only warn if there are more jobs running
11069 ::status
11070 ("$Global::progname: Starting no more jobs. ".
11071 "Waiting for ". (keys %Global::running).
11072 " jobs to finish.");
11075 return($Global::halt_when);
11077 return "";
11082 package CommandLine;
11084 sub new($) {
11085 my $class = shift;
11086 my $seq = shift;
11087 my $commandref = shift;
11088 $commandref || die;
11089 my $arg_queue = shift;
11090 my $context_replace = shift;
11091 my $max_number_of_args = shift; # for -N and normal (-n1)
11092 my $transfer_files = shift;
11093 my $return_files = shift;
11094 my $template_names = shift;
11095 my $template_contents = shift;
11096 my $replacecount_ref = shift;
11097 my $len_ref = shift;
11098 my %replacecount = %$replacecount_ref;
11099 my %len = %$len_ref;
11100 for (keys %$replacecount_ref) {
11101 # Total length of this replacement string {} replaced with all args
11102 $len{$_} = 0;
11104 return bless {
11105 'command' => $commandref,
11106 'seq' => $seq,
11107 'len' => \%len,
11108 'arg_list' => [],
11109 'arg_list_flat' => [],
11110 'arg_list_flat_orig' => [undef],
11111 'arg_queue' => $arg_queue,
11112 'max_number_of_args' => $max_number_of_args,
11113 'replacecount' => \%replacecount,
11114 'context_replace' => $context_replace,
11115 'transfer_files' => $transfer_files,
11116 'return_files' => $return_files,
11117 'template_names' => $template_names,
11118 'template_contents' => $template_contents,
11119 'replaced' => undef,
11120 }, ref($class) || $class;
11123 sub flush_cache() {
11124 my $self = shift;
11125 for my $arglist (@{$self->{'arg_list'}}) {
11126 for my $arg (@$arglist) {
11127 $arg->flush_cache();
11130 $self->{'arg_queue'}->flush_cache();
11131 $self->{'replaced'} = undef;
11134 sub seq($) {
11135 my $self = shift;
11136 return $self->{'seq'};
11139 sub set_seq($$) {
11140 my $self = shift;
11141 $self->{'seq'} = shift;
11144 sub slot($) {
11145 # Find the number of a free job slot and return it
11146 # Uses:
11147 # @Global::slots - list with free jobslots
11148 # Returns:
11149 # $jobslot = number of jobslot
11150 my $self = shift;
11151 if(not $self->{'slot'}) {
11152 if(not @Global::slots) {
11153 # $max_slot_number will typically be $Global::max_jobs_running
11154 push @Global::slots, ++$Global::max_slot_number;
11156 $self->{'slot'} = shift @Global::slots;
11158 return $self->{'slot'};
11162 my $already_spread;
11163 my $darwin_max_len;
11165 sub populate($) {
11166 # Add arguments from arg_queue until the number of arguments or
11167 # max line length is reached
11168 # Uses:
11169 # $Global::minimal_command_line_length
11170 # $opt::cat
11171 # $opt::fifo
11172 # $Global::JobQueue
11173 # $opt::m
11174 # $opt::X
11175 # $Global::max_jobs_running
11176 # Returns: N/A
11177 my $self = shift;
11178 my $next_arg;
11179 my $max_len = $Global::minimal_command_line_length
11180 || Limits::Command::max_length();
11181 if($^O eq "darwin") {
11182 # Darwin's limit is affected by:
11183 # * number of environment names (variables+functions)
11184 # * size of environment
11185 # * the length of arguments:
11186 # a one-char argument lowers the limit by 5
11187 # To be safe assume all arguments are one-char
11188 # The max_len is cached between runs, but if the size of
11189 # the environment is different we need to recompute the
11190 # usable max length for this run of GNU Parallel
11191 # See https://unix.stackexchange.com/a/604943/2972
11192 if(not $darwin_max_len) {
11193 my $envc = (keys %ENV);
11194 my $envn = length join"",(keys %ENV);
11195 my $envv = length join"",(values %ENV);
11196 $darwin_max_len = -146+($max_len - $envn - $envv) - $envc*10;
11197 ::debug("init",
11198 "length: $darwin_max_len ".
11199 "3+($max_len - $envn - $envv)/5 - $envc*2");
11201 $max_len = $darwin_max_len;
11203 if($opt::cat or $opt::fifo) {
11204 # Get the empty arg added by --pipepart (if any)
11205 $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get();
11206 # $PARALLEL_TMP will point to a tempfile that will be used as {}
11207 $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->
11208 unget([Arg->new('$PARALLEL_TMP')]);
11210 while (not $self->{'arg_queue'}->empty()) {
11211 $next_arg = $self->{'arg_queue'}->get();
11212 if(not defined $next_arg) {
11213 next;
11215 $self->push($next_arg);
11216 if($self->len() >= $max_len) {
11217 # Command length is now > max_length
11218 # If there are arguments: remove the last
11219 # If there are no arguments: Error
11220 # TODO stuff about -x opt_x
11221 if($self->number_of_args() > 1) {
11222 # There is something to work on
11223 $self->{'arg_queue'}->unget($self->pop());
11224 last;
11225 } else {
11226 my $args = join(" ", map { $_->orig() } @$next_arg);
11227 ::error("Command line too long (".
11228 $self->len(). " >= ".
11229 $max_len.
11230 ") at input ".
11231 $self->{'arg_queue'}->arg_number().
11232 ": ".
11233 ((length $args > 50) ?
11234 (substr($args,0,50))."..." :
11235 $args));
11236 $self->{'arg_queue'}->unget($self->pop());
11237 ::wait_and_exit(255);
11241 if(defined $self->{'max_number_of_args'}) {
11242 if($self->number_of_args() >= $self->{'max_number_of_args'}) {
11243 last;
11247 if(($opt::m or $opt::X) and not $already_spread
11248 and $self->{'arg_queue'}->empty() and $Global::max_jobs_running) {
11249 # -m or -X and EOF => Spread the arguments over all jobslots
11250 # (unless they are already spread)
11251 $already_spread ||= 1;
11252 if($self->number_of_args() > 1) {
11253 $self->{'max_number_of_args'} =
11254 ::ceil($self->number_of_args()/$Global::max_jobs_running);
11255 $Global::JobQueue->{'commandlinequeue'}->{'max_number_of_args'} =
11256 $self->{'max_number_of_args'};
11257 $self->{'arg_queue'}->unget($self->pop_all());
11258 while($self->number_of_args() < $self->{'max_number_of_args'}) {
11259 $self->push($self->{'arg_queue'}->get());
11262 $Global::JobQueue->flush_total_jobs();
11265 if($opt::sqlmaster) {
11266 # Insert the V1..Vn for this $seq in SQL table instead of generating one
11267 $Global::sql->insert_records($self->seq(), $self->{'command'},
11268 $self->{'arg_list_flat_orig'});
11273 sub push($) {
11274 # Add one or more records as arguments
11275 # Returns: N/A
11276 my $self = shift;
11277 my $record = shift;
11278 push @{$self->{'arg_list_flat_orig'}}, map { $_->orig() } @$record;
11279 push @{$self->{'arg_list_flat'}}, @$record;
11280 push @{$self->{'arg_list'}}, $record;
11281 # Make @arg available for {= =}
11282 *Arg::arg = $self->{'arg_list_flat_orig'};
11284 my $quote_arg = ($Global::quote_replace and not $Global::quoting);
11285 for my $perlexpr (keys %{$self->{'replacecount'}}) {
11286 if($perlexpr =~ /^(\d+) /) {
11287 # Positional
11288 defined($record->[$1-1]) or next;
11289 $self->{'len'}{$perlexpr} +=
11290 length $record->[$1-1]->replace($perlexpr,$quote_arg,$self);
11291 } else {
11292 for my $arg (@$record) {
11293 if(defined $arg) {
11294 $self->{'len'}{$perlexpr} +=
11295 length $arg->replace($perlexpr,$quote_arg,$self);
11302 sub pop($) {
11303 # Remove last argument
11304 # Returns:
11305 # the last record
11306 my $self = shift;
11307 my $record = pop @{$self->{'arg_list'}};
11308 # pop off arguments from @$record
11309 splice @{$self->{'arg_list_flat_orig'}}, -($#$record+1), $#$record+1;
11310 splice @{$self->{'arg_list_flat'}}, -($#$record+1), $#$record+1;
11311 my $quote_arg = ($Global::quote_replace and not $Global::quoting);
11312 for my $perlexpr (keys %{$self->{'replacecount'}}) {
11313 if($perlexpr =~ /^(\d+) /) {
11314 # Positional
11315 defined($record->[$1-1]) or next;
11316 $self->{'len'}{$perlexpr} -=
11317 length $record->[$1-1]->replace($perlexpr,$quote_arg,$self);
11318 } else {
11319 for my $arg (@$record) {
11320 if(defined $arg) {
11321 $self->{'len'}{$perlexpr} -=
11322 length $arg->replace($perlexpr,$quote_arg,$self);
11327 return $record;
11330 sub pop_all($) {
11331 # Remove all arguments and zeros the length of replacement perlexpr
11332 # Returns:
11333 # all records
11334 my $self = shift;
11335 my @popped = @{$self->{'arg_list'}};
11336 for my $perlexpr (keys %{$self->{'replacecount'}}) {
11337 $self->{'len'}{$perlexpr} = 0;
11339 $self->{'arg_list'} = [];
11340 $self->{'arg_list_flat_orig'} = [undef];
11341 $self->{'arg_list_flat'} = [];
11342 return @popped;
11345 sub number_of_args($) {
11346 # The number of records
11347 # Returns:
11348 # number of records
11349 my $self = shift;
11350 # This is really the number of records
11351 return $#{$self->{'arg_list'}}+1;
11354 sub number_of_recargs($) {
11355 # The number of args in records
11356 # Returns:
11357 # number of args records
11358 my $self = shift;
11359 my $sum = 0;
11360 my $nrec = scalar @{$self->{'arg_list'}};
11361 if($nrec) {
11362 $sum = $nrec * (scalar @{$self->{'arg_list'}[0]});
11364 return $sum;
11367 sub args_as_string($) {
11368 # Returns:
11369 # all unmodified arguments joined with ' ' (similar to {})
11370 my $self = shift;
11371 return (join " ", map { $_->orig() }
11372 map { @$_ } @{$self->{'arg_list'}});
11375 sub results_out($) {
11376 sub max_file_name_length {
11377 # Figure out the max length of a subdir
11378 # TODO and the max total length
11379 # Ext4 = 255,130816
11380 # Uses:
11381 # $Global::max_file_length is set
11382 # Returns:
11383 # $Global::max_file_length
11384 my $testdir = shift;
11386 my $upper = 100_000_000;
11387 # Dir length of 8 chars is supported everywhere
11388 my $len = 8;
11389 my $dir = "x"x$len;
11390 do {
11391 rmdir($testdir."/".$dir);
11392 $len *= 16;
11393 $dir = "x"x$len;
11394 } while ($len < $upper and mkdir $testdir."/".$dir);
11395 # Then search for the actual max length between $len/16 and $len
11396 my $min = $len/16;
11397 my $max = $len;
11398 while($max-$min > 5) {
11399 # If we are within 5 chars of the exact value:
11400 # it is not worth the extra time to find the exact value
11401 my $test = int(($min+$max)/2);
11402 $dir = "x"x$test;
11403 if(mkdir $testdir."/".$dir) {
11404 rmdir($testdir."/".$dir);
11405 $min = $test;
11406 } else {
11407 $max = $test;
11410 $Global::max_file_length = $min;
11411 return $min;
11414 my $self = shift;
11415 my $out = $self->replace_placeholders([$opt::results],0,0);
11416 if($out eq $opt::results) {
11417 # $opt::results simple string: Append args_as_dirname
11418 my $args_as_dirname = $self->args_as_dirname();
11419 # Output in: prefix/name1/val1/name2/val2/stdout
11420 $out = $opt::results."/".$args_as_dirname;
11421 if(-d $out or eval{ File::Path::mkpath($out); }) {
11422 # OK
11423 } else {
11424 # mkpath failed: Argument probably too long.
11425 # Set $Global::max_file_length, which will keep the individual
11426 # dir names shorter than the max length
11427 max_file_name_length($opt::results);
11428 $args_as_dirname = $self->args_as_dirname();
11429 # prefix/name1/val1/name2/val2/
11430 $out = $opt::results."/".$args_as_dirname;
11431 File::Path::mkpath($out);
11433 $out .="/";
11434 } else {
11435 if($out =~ m:/$:) {
11436 # / = dir
11437 if(-d $out or eval{ File::Path::mkpath($out); }) {
11438 # OK
11439 } else {
11440 ::error("Cannot make dir '$out'.");
11441 ::wait_and_exit(255);
11443 } else {
11444 $out =~ m:(.*)/:;
11445 File::Path::mkpath($1);
11448 return $out;
11451 sub args_as_dirname($) {
11452 # Returns:
11453 # all unmodified arguments joined with '/' (similar to {})
11454 # \t \0 \\ and / are quoted as: \t \0 \\ \_
11455 # If $Global::max_file_length: Keep subdirs < $Global::max_file_length
11456 my $self = shift;
11457 my @res = ();
11459 for my $rec_ref (@{$self->{'arg_list'}}) {
11460 # If headers are used, sort by them.
11461 # Otherwise keep the order from the command line.
11462 my @header_indexes_sorted = header_indexes_sorted($#$rec_ref+1);
11463 for my $n (@header_indexes_sorted) {
11464 CORE::push(@res,
11465 $Global::input_source_header{$n},
11466 map { my $s = $_;
11467 # \t \0 \\ and / are quoted as: \t \0 \\ \_
11468 $s =~ s/\\/\\\\/g;
11469 $s =~ s/\t/\\t/g;
11470 $s =~ s/\0/\\0/g;
11471 $s =~ s:/:\\_:g;
11472 if($Global::max_file_length) {
11473 # Keep each subdir shorter than the longest
11474 # allowed file name
11475 $s = substr($s,0,$Global::max_file_length);
11477 $s; }
11478 $rec_ref->[$n-1]->orig());
11481 return join "/", @res;
11484 sub header_indexes_sorted($) {
11485 # Sort headers first by number then by name.
11486 # E.g.: 1a 1b 11a 11b
11487 # Returns:
11488 # Indexes of %Global::input_source_header sorted
11489 my $max_col = shift;
11491 no warnings 'numeric';
11492 for my $col (1 .. $max_col) {
11493 # Make sure the header is defined. If it is not: use column number
11494 if(not defined $Global::input_source_header{$col}) {
11495 $Global::input_source_header{$col} = $col;
11498 my @header_indexes_sorted = sort {
11499 # Sort headers numerically then asciibetically
11500 $Global::input_source_header{$a} <=> $Global::input_source_header{$b}
11502 $Global::input_source_header{$a} cmp $Global::input_source_header{$b}
11503 } 1 .. $max_col;
11504 return @header_indexes_sorted;
11507 sub len($) {
11508 # Uses:
11509 # @opt::shellquote
11510 # The length of the command line with args substituted
11511 my $self = shift;
11512 my $len = 0;
11513 # Add length of the original command with no args
11514 # Length of command w/ all replacement args removed
11515 $len += $self->{'len'}{'noncontext'} + @{$self->{'command'}} -1;
11516 ::debug("length", "noncontext + command: $len\n");
11517 # MacOS has an overhead of 8 bytes per argument
11518 my $darwin = ($^O eq "darwin") ? 8 : 0;
11519 my $recargs = $self->number_of_recargs();
11520 if($self->{'context_replace'}) {
11521 # Context is duplicated for each arg
11522 $len += $recargs * $self->{'len'}{'context'};
11523 for my $replstring (keys %{$self->{'replacecount'}}) {
11524 # If the replacements string is more than once: mulitply its length
11525 $len += $self->{'len'}{$replstring} *
11526 $self->{'replacecount'}{$replstring};
11527 ::debug("length", $replstring, " ", $self->{'len'}{$replstring}, "*",
11528 $self->{'replacecount'}{$replstring}, "\n");
11530 # echo 11 22 33 44 55 66 77 88 99 1010
11531 # echo 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10
11532 # 5 + ctxgrp*arg
11533 ::debug("length", "Ctxgrp: ", $self->{'len'}{'contextgroups'},
11534 " Groups: ", $self->{'len'}{'noncontextgroups'}, "\n");
11535 # Add space between context groups
11536 $len += ($recargs-1) * ($self->{'len'}{'contextgroups'});
11537 if($darwin) {
11538 $len += $recargs * $self->{'len'}{'contextgroups'} * $darwin;
11540 } else {
11541 # Each replacement string may occur several times
11542 # Add the length for each time
11543 $len += 1*$self->{'len'}{'context'};
11544 ::debug("length", "context+noncontext + command: $len\n");
11545 for my $replstring (keys %{$self->{'replacecount'}}) {
11546 # (space between recargs + length of replacement)
11547 # * number this replacement is used
11548 $len += ($recargs -1 + $self->{'len'}{$replstring}) *
11549 $self->{'replacecount'}{$replstring};
11550 if($darwin) {
11551 $len += ($recargs * $self->{'replacecount'}{$replstring}
11552 * $darwin);
11556 if(defined $Global::parallel_env) {
11557 # If we are using --env, add the prefix for that, too.
11558 $len += length $Global::parallel_env;
11560 if($Global::quoting) {
11561 # Pessimistic length if -q is set
11562 # Worse than worst case: ' => "'" + " => '"'
11563 # TODO can we count the number of expanding chars?
11564 # and count them in arguments, too?
11565 $len *= 3;
11567 if(@opt::shellquote) {
11568 # Pessimistic length if --shellquote is set
11569 # Worse than worst case: ' => "'"
11570 for(@opt::shellquote) {
11571 $len *= 3;
11573 $len *= 5;
11575 if(@opt::sshlogin) {
11576 # Pessimistic length if remote
11577 # Worst case is BASE64 encoding 3 bytes -> 4 bytes
11578 $len = int($len*4/3);
11581 return $len;
11584 sub replaced($) {
11585 # Uses:
11586 # $Global::quote_replace
11587 # $Global::quoting
11588 # Returns:
11589 # $replaced = command with place holders replaced and prepended
11590 my $self = shift;
11591 if(not defined $self->{'replaced'}) {
11592 # Don't quote arguments if the input is the full command line
11593 my $quote_arg = ($Global::quote_replace and not $Global::quoting);
11594 # or if ($opt::cat or $opt::pipe) as they use $PARALLEL_TMP
11595 $quote_arg = ($opt::cat || $opt::fifo) ? 0 : $quote_arg;
11596 $self->{'replaced'} = $self->
11597 replace_placeholders($self->{'command'},$Global::quoting,
11598 $quote_arg);
11599 my $len = length $self->{'replaced'};
11600 if ($len != $self->len()) {
11601 ::debug("length", $len, " != ", $self->len(),
11602 " ", $self->{'replaced'}, "\n");
11603 } else {
11604 ::debug("length", $len, " == ", $self->len(),
11605 " ", $self->{'replaced'}, "\n");
11608 return $self->{'replaced'};
11611 sub replace_placeholders($$$$) {
11612 # Replace foo{}bar with fooargbar
11613 # Input:
11614 # $targetref = command as shell words
11615 # $quote = should everything be quoted?
11616 # $quote_arg = should replaced arguments be quoted?
11617 # Uses:
11618 # @Arg::arg = arguments as strings to be use in {= =}
11619 # Returns:
11620 # @target with placeholders replaced
11621 my $self = shift;
11622 my $targetref = shift;
11623 my $quote = shift;
11624 my $quote_arg = shift;
11625 my %replace;
11627 # Token description:
11628 # \0spc = unquoted space
11629 # \0end = last token element
11630 # \0ign = dummy token to be ignored
11631 # \257<...\257> = replacement expression
11632 # " " = quoted space, that splits -X group
11633 # text = normal text - possibly part of -X group
11634 my $spacer = 0;
11635 my @tokens = grep { length $_ > 0 } map {
11636 if(/^\257<|^ $/) {
11637 # \257<...\257> or space
11639 } else {
11640 # Split each space/tab into a token
11641 split /(?=\s)|(?<=\s)/
11644 # Split \257< ... \257> into own token
11645 map { split /(?=\257<)|(?<=\257>)/ }
11646 # Insert "\0spc" between every element
11647 # This space should never be quoted
11648 map { $spacer++ ? ("\0spc",$_) : $_ }
11649 map { $_ eq "" ? "\0empty" : $_ }
11650 @$targetref;
11652 if(not @tokens) {
11653 # @tokens is empty: Return empty array
11654 return @tokens;
11656 ::debug("replace", "Tokens ".join":",@tokens,"\n");
11657 # Make it possible to use $arg[2] in {= =}
11658 *Arg::arg = $self->{'arg_list_flat_orig'};
11659 # Flat list:
11660 # $self->{'arg_list'} = [ [Arg11, Arg12], [Arg21, Arg22], [Arg31, Arg32] ]
11661 # $self->{'arg_list_flat'} = [ Arg11, Arg12, Arg21, Arg22, Arg31, Arg32 ]
11662 if(not @{$self->{'arg_list_flat'}}) {
11663 @{$self->{'arg_list_flat'}} = Arg->new("");
11665 my $argref = $self->{'arg_list_flat'};
11666 # Number of arguments - used for positional arguments
11667 my $n = $#$argref+1;
11669 # $self is actually a CommandLine-object,
11670 # but it looks nice to be able to say {= $job->slot() =}
11671 my $job = $self;
11672 # @replaced = tokens with \257< \257> replaced
11673 my @replaced;
11674 if($self->{'context_replace'}) {
11675 my @ctxgroup;
11676 for my $t (@tokens,"\0end") {
11677 # \0end = last token was end of tokens.
11678 if($t eq "\t" or $t eq " " or $t eq "\0end" or $t eq "\0spc") {
11679 # Context group complete: Replace in it
11680 if(grep { /^\257</ } @ctxgroup) {
11681 # Context group contains a replacement string:
11682 # Copy once per arg
11683 my $space = "\0ign";
11684 for my $arg (@$argref) {
11685 my $normal_replace;
11686 # Push output
11687 # Put unquoted space before each context group
11688 # except the first
11689 CORE::push @replaced, $space, map {
11690 $a = $_;
11691 if($a =~
11692 s{\257<(-?\d+)?(.*)\257>}
11694 if($1) {
11695 # Positional replace
11696 # Find the relevant arg and replace it
11697 ($argref->[$1 > 0 ? $1-1 : $n+$1] ? # If defined: replace
11698 $argref->[$1 > 0 ? $1-1 : $n+$1]->
11699 replace($2,$quote_arg,$self)
11700 : "");
11701 } else {
11702 # Normal replace
11703 $normal_replace ||= 1;
11704 ($arg ? $arg->replace($2,$quote_arg,$self) : "");
11706 }sgxe) {
11707 # Token is \257<..\257>
11708 } else {
11709 if($Global::escape_string_present) {
11710 # Command line contains \257:
11711 # Unescape it \257\256 => \257
11712 $a =~ s/\257\256/\257/g;
11716 } @ctxgroup;
11717 $normal_replace or last;
11718 $space = "\0spc";
11720 } else {
11721 # Context group has no a replacement string: Copy it once
11722 CORE::push @replaced, map {
11723 $Global::escape_string_present and s/\257\256/\257/g; $_;
11724 } @ctxgroup;
11726 # New context group
11727 @ctxgroup=();
11729 if($t eq "\0spc" or $t eq " ") {
11730 CORE::push @replaced,$t;
11731 } else {
11732 CORE::push @ctxgroup,$t;
11735 } else {
11736 # @group = @token
11737 # Replace in group
11738 # Push output
11739 # repquote = no if {} first on line, no if $quote, yes otherwise
11740 for my $t (@tokens) {
11741 if($t =~ /^\257</) {
11742 my $space = "\0ign";
11743 for my $arg (@$argref) {
11744 my $normal_replace;
11745 $a = $t;
11746 $a =~
11747 s{\257<(-?\d+)?(.*)\257>}
11749 if($1) {
11750 # Positional replace
11751 # Find the relevant arg and replace it
11752 ($argref->[$1 > 0 ? $1-1 : $n+$1] ?
11753 # If defined: replace
11754 $argref->[$1 > 0 ? $1-1 : $n+$1]->
11755 replace($2,$quote_arg,$self)
11756 : "");
11757 } else {
11758 # Normal replace
11759 $normal_replace ||= 1;
11760 ($arg ? $arg->replace($2,$quote_arg,$self) : "");
11762 }sgxe;
11763 CORE::push @replaced, $space, $a;
11764 $normal_replace or last;
11765 $space = "\0spc";
11767 } else {
11768 # No replacement
11769 CORE::push @replaced, map {
11770 $Global::escape_string_present and s/\257\256/\257/g; $_;
11771 } $t;
11775 *Arg::arg = [];
11776 ::debug("replace","Replaced: ".join":",@replaced,"\n");
11778 # Put tokens into groups that may be quoted.
11779 my @quotegroup;
11780 my @quoted;
11781 for (map { $_ eq "\0empty" ? "" : $_ }
11782 grep { $_ ne "\0ign" and $_ ne "\0noarg" and $_ ne "'\0noarg'" }
11783 @replaced, "\0end") {
11784 if($_ eq "\0spc" or $_ eq "\0end") {
11785 # \0spc splits quotable groups
11786 if($quote) {
11787 if(@quotegroup) {
11788 CORE::push @quoted, ::Q(join"",@quotegroup);;
11790 } else {
11791 CORE::push @quoted, join"",@quotegroup;
11793 @quotegroup = ();
11794 } else {
11795 CORE::push @quotegroup, $_;
11798 ::debug("replace","Quoted: ".join":",@quoted,"\n");
11799 return wantarray ? @quoted : "@quoted";
11802 sub skip($) {
11803 # Skip this job
11804 my $self = shift;
11805 $self->{'skip'} = 1;
11809 package CommandLineQueue;
11811 sub new($) {
11812 my $class = shift;
11813 my $commandref = shift;
11814 my $read_from = shift;
11815 my $context_replace = shift || 0;
11816 my $max_number_of_args = shift;
11817 my $transfer_files = shift;
11818 my $return_files = shift;
11819 my $template_names = shift;
11820 my $template_contents = shift;
11821 my @unget = ();
11822 my $posrpl;
11823 my ($replacecount_ref, $len_ref);
11824 my @command = @$commandref;
11825 my $seq = 1;
11826 # Replace replacement strings with {= perl expr =}
11827 # '{=' 'perlexpr' '=}' => '{= perlexpr =}'
11828 @command = merge_rpl_parts(@command);
11830 # Protect matching inside {= perl expr =}
11831 # by replacing {= and =} with \257< and \257>
11832 # in options that can contain replacement strings:
11833 # @command, --transferfile, --return,
11834 # --tagstring, --workdir, --results
11835 for(@command, @$transfer_files, @$return_files,
11836 @$template_names, @$template_contents,
11837 $opt::tagstring, $opt::workdir, $opt::results, $opt::retries,
11838 @opt::filter) {
11839 # Skip if undefined
11840 $_ or next;
11841 # Escape \257 => \257\256
11842 $Global::escape_string_present += s/\257/\257\256/g;
11843 # Needs to match rightmost left parens (Perl defaults to leftmost)
11844 # to deal with: {={==} and {={==}=}
11845 # Replace {= -> \257< and =} -> \257>
11847 # Complex way to do:
11848 # s/{=(.*)=}/\257<$1\257>/g
11849 # which would not work
11850 s[\Q$Global::parensleft\E # Match {=
11851 # Match . unless the next string is {= or =}
11852 # needed to force matching the shortest {= =}
11853 ((?:(?! \Q$Global::parensleft\E|\Q$Global::parensright\E ).)*?)
11854 \Q$Global::parensright\E ] # Match =}
11855 {\257<$1\257>}gxs;
11856 for my $rpl (sort { length $b <=> length $a } keys %Global::rpl) {
11857 # Replace long --rpl's before short ones, as a short may be a
11858 # substring of a long:
11859 # --rpl '% s/a/b/' --rpl '%% s/b/a/'
11861 # Replace the shorthand string (--rpl)
11862 # with the {= perl expr =}
11864 # Avoid searching for shorthand strings inside existing {= perl expr =}
11866 # Replace $$1 in {= perl expr =} with groupings in shorthand string
11868 # --rpl '{/(\.\S+)/(\.\S+)} s/$$1/$$2/g;'
11869 # echo {/.tar/.gz} ::: UU.tar.gz
11870 my ($prefix,$grp_regexp,$postfix) =
11871 $rpl =~ /^( [^(]* ) # Prefix - e.g. {%%
11872 ( \(.*\) )? # Group capture regexp - e.g (.*)
11873 ( [^)]* )$ # Postfix - e.g }
11874 /xs;
11875 $grp_regexp ||= '';
11876 my $rplval = $Global::rpl{$rpl};
11877 while(s{( (?: ^|\257> ) (?: [^\257]*|[\257][^<>] )*? )
11878 # Don't replace after \257 unless \257>
11879 \Q$prefix\E $grp_regexp \Q$postfix\E}
11881 # The start remains the same
11882 my $unchanged = $1;
11883 # Dummy entry to start at 1.
11884 my @grp = (1);
11885 # $2 = first ()-group in $grp_regexp
11886 # Put $2 in $grp[1], Put $3 in $grp[2]
11887 # so first ()-group in $grp_regexp is $grp[1];
11888 for(my $i = 2; defined $grp[$#grp]; $i++) {
11889 push @grp, eval '$'.$i;
11891 my $rv = $rplval;
11892 # replace $$1 with $_pAr_gRp1, $$2 with $_pAr_gRp2
11893 # in the code to be executed
11894 $rv =~ s/\$\$ (\d+)/\$_pAr_gRp$1/gx;
11895 # prepend with $_pAr_gRp1 = perlquote($1),
11896 my $set_args = "";
11897 for(my $i = 1;defined $grp[$i]; $i++) {
11898 $set_args .= "\$_pAr_gRp$i = \"" .
11899 ::perl_quote_scalar($grp[$i]) . "\";";
11901 $unchanged . "\257<" . $set_args . $rv . "\257>"
11902 }gxes) {
11904 # Do the same for the positional replacement strings
11905 $posrpl = $rpl;
11906 if($posrpl =~ s/^\{//) {
11907 # Only do this if the shorthand start with {
11908 $prefix=~s/^\{//;
11909 # Don't replace after \257 unless \257>
11910 while(s{( (?: ^|\257> ) (?: [^\257]*|[\257][^<>] )*? )
11911 \{(-?\d+) \s* \Q$prefix\E $grp_regexp \Q$postfix\E}
11913 # The start remains the same
11914 my $unchanged = $1;
11915 my $position = $2;
11916 # Dummy entry to start at 1.
11917 my @grp = (1);
11918 # $3 = first ()-group in $grp_regexp
11919 # Put $3 in $grp[1], Put $4 in $grp[2]
11920 # so first ()-group in $grp_regexp is $grp[1];
11921 for(my $i = 3; defined $grp[$#grp]; $i++) {
11922 push @grp, eval '$'.$i;
11924 my $rv = $rplval;
11925 # replace $$1 with $_pAr_gRp1, $$2 with $_pAr_gRp2
11926 # in the code to be executed
11927 $rv =~ s/\$\$ (\d+)/\$_pAr_gRp$1/gx;
11928 # prepend with $_pAr_gRp1 = perlquote($1),
11929 my $set_args = "";
11930 for(my $i = 1;defined $grp[$i]; $i++) {
11931 $set_args .= "\$_pAr_gRp$i = \"" .
11932 ::perl_quote_scalar($grp[$i]) . "\";";
11934 $unchanged . "\257<" . $position . $set_args . $rv . "\257>"
11935 }gxes) {
11940 # Add {} if no replacement strings in @command
11941 ($replacecount_ref, $len_ref, @command) =
11942 replacement_counts_and_lengths($transfer_files, $return_files,
11943 $template_names, $template_contents,
11944 @command);
11945 if("@command" =~ /^[^ \t\n=]*\257</) {
11946 # Replacement string is (part of) the command (and not just
11947 # argument or variable definition V1={})
11948 # E.g. parallel {}, parallel my_{= s/_//=}, parallel {2}
11949 # Do no quote (Otherwise it will fail if the input contains spaces)
11950 $Global::quote_replace = 0;
11953 if($opt::sqlmaster and $Global::sql->append()) {
11954 $seq = $Global::sql->max_seq() + 1;
11957 return bless {
11958 ('unget' => \@unget,
11959 'command' => \@command,
11960 'replacecount' => $replacecount_ref,
11961 'arg_queue' => RecordQueue->new($read_from,$opt::colsep),
11962 'context_replace' => $context_replace,
11963 'len' => $len_ref,
11964 'max_number_of_args' => $max_number_of_args,
11965 'size' => undef,
11966 'transfer_files' => $transfer_files,
11967 'return_files' => $return_files,
11968 'template_names' => $template_names,
11969 'template_contents' => $template_contents,
11970 'seq' => $seq,
11972 }, ref($class) || $class;
11975 sub merge_rpl_parts($) {
11976 # '{=' 'perlexpr' '=}' => '{= perlexpr =}'
11977 # Input:
11978 # @in = the @command as given by the user
11979 # Uses:
11980 # $Global::parensleft
11981 # $Global::parensright
11982 # Returns:
11983 # @command with parts merged to keep {= and =} as one
11984 my @in = @_;
11985 my @out;
11986 my $l = quotemeta($Global::parensleft);
11987 my $r = quotemeta($Global::parensright);
11989 while(@in) {
11990 my $s = shift @in;
11991 $_ = $s;
11992 # Remove matching (right most) parens
11993 while(s/(.*)$l.*?$r/$1/os) {}
11994 if(/$l/o) {
11995 # Missing right parens
11996 while(@in) {
11997 $s .= " ".shift @in;
11998 $_ = $s;
11999 while(s/(.*)$l.*?$r/$1/os) {}
12000 if(not /$l/o) {
12001 last;
12005 push @out, $s;
12007 return @out;
12010 sub replacement_counts_and_lengths($$@) {
12011 # Count the number of different replacement strings.
12012 # Find the lengths of context for context groups and non-context
12013 # groups.
12014 # If no {} found in @command: add it to @command
12016 # Input:
12017 # \@transfer_files = array of filenames to transfer
12018 # \@return_files = array of filenames to return
12019 # \@template_names = array of names to copy to
12020 # \@template_contents = array of contents to write
12021 # @command = command template
12022 # Output:
12023 # \%replacecount, \%len, @command
12024 my $transfer_files = shift;
12025 my $return_files = shift;
12026 my $template_names = shift;
12027 my $template_contents = shift;
12028 my @command = @_;
12029 my (%replacecount,%len);
12030 my $sum = 0;
12031 while($sum == 0) {
12032 # Count how many times each replacement string is used
12033 my @cmd = @command;
12034 my $contextlen = 0;
12035 my $noncontextlen = 0;
12036 my $contextgroups = 0;
12037 for my $c (@cmd) {
12038 while($c =~ s/ \257<( (?: [^\257]*|[\257][^<>] )*?)\257> /\000/xs) {
12039 # %replacecount = { "perlexpr" => number of times seen }
12040 # e.g { "s/a/b/" => 2 }
12041 $replacecount{$1}++;
12042 $sum++;
12044 # Measure the length of the context around the {= perl expr =}
12045 # Use that {=...=} has been replaced with \000 above
12046 # So there is no need to deal with \257<
12047 while($c =~ s/ (\S*\000\S*) //xs) {
12048 my $w = $1;
12049 $w =~ tr/\000//d; # Remove all \000's
12050 $contextlen += length($w);
12051 $contextgroups++;
12053 # All {= perl expr =} have been removed: The rest is non-context
12054 $noncontextlen += length $c;
12056 for(@$transfer_files, @$return_files,
12057 @$template_names, @$template_contents,
12058 @opt::filter,
12059 $opt::tagstring, $opt::workdir, $opt::results, $opt::retries) {
12060 # Options that can contain replacement strings
12061 $_ or next;
12062 my $t = $_;
12063 while($t =~ s/ \257<( (?: [^\257]*|[\257][^<>] )* )\257> //xs) {
12064 # %replacecount = { "perlexpr" => number of times seen }
12065 # e.g { "$_++" => 2 }
12066 # But for tagstring we just need to mark it as seen
12067 $replacecount{$1} ||= 1;
12070 if($opt::bar) {
12071 # If the command does not contain {} force it to be computed
12072 # as it is being used by --bar
12073 $replacecount{""} ||= 1;
12076 $len{'context'} = 0+$contextlen;
12077 $len{'noncontext'} = $noncontextlen;
12078 $len{'contextgroups'} = $contextgroups;
12079 $len{'noncontextgroups'} = @cmd-$contextgroups;
12080 ::debug("length", "@command Context: ", $len{'context'},
12081 " Non: ", $len{'noncontext'}, " Ctxgrp: ", $len{'contextgroups'},
12082 " NonCtxGrp: ", $len{'noncontextgroups'}, "\n");
12083 if($sum == 0) {
12084 if(not @command) {
12085 # Default command = {}
12086 @command = ("\257<\257>");
12087 } elsif(($opt::pipe or $opt::pipepart)
12088 and not $opt::fifo and not $opt::cat) {
12089 # With --pipe / --pipe-part you can have no replacement
12090 last;
12091 } else {
12092 # Append {} to the command if there are no {...}'s and no {=...=}
12093 push @command, ("\257<\257>");
12097 return(\%replacecount,\%len,@command);
12100 sub get($) {
12101 my $self = shift;
12102 if(@{$self->{'unget'}}) {
12103 my $cmd_line = shift @{$self->{'unget'}};
12104 return ($cmd_line);
12105 } else {
12106 if($opt::sqlworker) {
12107 # Get the sequence number from the SQL table
12108 $self->set_seq($SQL::next_seq);
12109 # Get the command from the SQL table
12110 $self->{'command'} = $SQL::command_ref;
12111 my @command;
12112 # Recompute replace counts based on the read command
12113 ($self->{'replacecount'},
12114 $self->{'len'}, @command) =
12115 replacement_counts_and_lengths($self->{'transfer_files'},
12116 $self->{'return_files'},
12117 $self->{'template_name'},
12118 $self->{'template_contents'},
12119 @$SQL::command_ref);
12120 if("@command" =~ /^[^ \t\n=]*\257</) {
12121 # Replacement string is (part of) the command (and not just
12122 # argument or variable definition V1={})
12123 # E.g. parallel {}, parallel my_{= s/_//=}, parallel {2}
12124 # Do no quote (Otherwise it will fail if the input contains spaces)
12125 $Global::quote_replace = 0;
12129 my $cmd_line = CommandLine->new($self->seq(),
12130 $self->{'command'},
12131 $self->{'arg_queue'},
12132 $self->{'context_replace'},
12133 $self->{'max_number_of_args'},
12134 $self->{'transfer_files'},
12135 $self->{'return_files'},
12136 $self->{'template_names'},
12137 $self->{'template_contents'},
12138 $self->{'replacecount'},
12139 $self->{'len'},
12141 $cmd_line->populate();
12142 ::debug("run","cmd_line->number_of_args ",
12143 $cmd_line->number_of_args(), "\n");
12144 if(not $Global::no_more_input and ($opt::pipe or $opt::pipepart)) {
12145 if($cmd_line->replaced() eq "") {
12146 # Empty command - pipe requires a command
12147 ::error("--pipe/--pipepart must have a command to pipe into ".
12148 "(e.g. 'cat').");
12149 ::wait_and_exit(255);
12151 } elsif($cmd_line->number_of_args() == 0) {
12152 # We did not get more args - maybe at EOF string?
12153 return undef;
12155 $self->set_seq($self->seq()+1);
12156 return $cmd_line;
12160 sub unget($) {
12161 my $self = shift;
12162 unshift @{$self->{'unget'}}, @_;
12165 sub empty($) {
12166 my $self = shift;
12167 my $empty = (not @{$self->{'unget'}}) &&
12168 $self->{'arg_queue'}->empty();
12169 ::debug("run", "CommandLineQueue->empty $empty");
12170 return $empty;
12173 sub seq($) {
12174 my $self = shift;
12175 return $self->{'seq'};
12178 sub set_seq($$) {
12179 my $self = shift;
12180 $self->{'seq'} = shift;
12183 sub quote_args($) {
12184 my $self = shift;
12185 # If there is not command emulate |bash
12186 return $self->{'command'};
12190 package Limits::Command;
12192 # Maximal command line length (for -m and -X)
12193 sub max_length($) {
12194 # Find the max_length of a command line and cache it
12195 # Returns:
12196 # number of chars on the longest command line allowed
12197 if(not $Limits::Command::line_max_len) {
12198 # Disk cache of max command line length
12199 my $len_cache = $Global::cache_dir . "/tmp/sshlogin/" . ::hostname() .
12200 "/linelen";
12201 my $cached_limit;
12202 if(open(my $fh, "<", $len_cache)) {
12203 $cached_limit = <$fh>;
12204 $cached_limit || ::die_bug("Cannot read $len_cache");
12205 close $fh;
12207 if(not $cached_limit) {
12208 $cached_limit = real_max_length();
12209 # If $HOME is write protected: Do not fail
12210 my $dir = ::dirname($len_cache);
12211 -d $dir or eval { File::Path::mkpath($dir); };
12212 open(my $fh, ">", $len_cache.$$);
12213 print $fh $cached_limit;
12214 close $fh;
12215 rename $len_cache.$$, $len_cache || ::die_bug("rename cache file");
12217 $Limits::Command::line_max_len = tmux_length($cached_limit);
12218 if($opt::max_chars) {
12219 if($opt::max_chars <= $cached_limit) {
12220 $Limits::Command::line_max_len = $opt::max_chars;
12221 } else {
12222 ::warning("Value for -s option should be < $cached_limit.");
12226 return int($Limits::Command::line_max_len);
12229 sub real_max_length() {
12230 # Find the max_length of a command line
12231 # Returns:
12232 # The maximal command line length with 1 byte arguments
12233 # return find_max(" x");
12234 return find_max("x");
12237 sub find_max($) {
12238 my $string = shift;
12239 # This is slow on Cygwin, so give Cygwin users a warning
12240 if($^O eq "cygwin") {
12241 ::warning("Finding the maximal command line length. ".
12242 "This may take up to 30 seconds.")
12244 # Use an upper bound of 100 MB if the shell allows for infinite long lengths
12245 my $upper = 100_000_000;
12246 # 1000 is supported everywhere, so the search can start anywhere 1..999
12247 # 324 makes the search much faster on Cygwin, so let us use that
12248 my $len = 324;
12249 do {
12250 if($len > $upper) { return $len };
12251 $len *= 16;
12252 } while (is_acceptable_command_line_length($len,$string));
12253 # Then search for the actual max length between
12254 # last successful length ($len/16) and upper bound
12255 return binary_find_max(int($len/16),$len,$string);
12258 # Prototype forwarding
12259 sub binary_find_max($$$);
12260 sub binary_find_max($$$) {
12261 # Given a lower and upper bound find the max (length or args) of a command line
12262 # Returns:
12263 # number of chars on the longest command line allowed
12264 my ($lower, $upper, $string) = (@_);
12265 if($lower == $upper or $lower == $upper-1) { return $lower; }
12266 my $middle = int (($upper-$lower)/2 + $lower);
12267 ::debug("init", "Maxlen: $lower<$middle<$upper: ");
12268 if (is_acceptable_command_line_length($middle,$string)) {
12269 return binary_find_max($middle,$upper,$string);
12270 } else {
12271 return binary_find_max($lower,$middle,$string);
12276 my $prg;
12278 sub is_acceptable_command_line_length($$) {
12279 # Test if a command line of this length can run
12280 # in the current environment
12281 # If the string is " x" it tests how many args are allowed
12282 # Returns:
12283 # 0 if the command line length is too long
12284 # 1 otherwise
12285 my $len = shift;
12286 my $string = shift;
12287 if($Global::parallel_env) {
12288 $len += length $Global::parallel_env;
12290 # Force using non-built-in command
12291 $prg ||= ::which("echo");
12292 ::qqx("$prg ".${string}x(($len-1-length $prg)/length $string));
12293 ::debug("init", "$len=$? ");
12294 return not $?;
12298 sub tmux_length($) {
12299 # If $opt::tmux set, find the limit for tmux
12300 # tmux 1.8 has a 2kB limit
12301 # tmux 1.9 has a 16kB limit
12302 # tmux 2.0 has a 16kB limit
12303 # tmux 2.1 has a 16kB limit
12304 # tmux 2.2 has a 16kB limit
12305 # Input:
12306 # $len = maximal command line length
12307 # Returns:
12308 # $tmux_len = maximal length runable in tmux
12309 local $/ = "\n";
12310 my $len = shift;
12311 if($opt::tmux) {
12312 $ENV{'PARALLEL_TMUX'} ||= "tmux";
12313 if(not ::which($ENV{'PARALLEL_TMUX'})) {
12314 ::error($ENV{'PARALLEL_TMUX'}." not found in \$PATH.");
12315 ::wait_and_exit(255);
12317 my @out;
12318 for my $l (1, 2020, 16320, 100000, $len) {
12319 my $tmpfile = ::tmpname("tms");
12320 my $tmuxcmd = $ENV{'PARALLEL_TMUX'}.
12321 " -S $tmpfile new-session -d -n echo $l".
12322 ("x"x$l). " && echo $l; rm -f $tmpfile";
12323 push @out, ::qqx($tmuxcmd);
12324 ::rm($tmpfile);
12326 ::debug("tmux","tmux-out ",@out);
12327 chomp @out;
12328 # The arguments is given 3 times on the command line
12329 # and the wrapping is around 30 chars
12330 # (29 for tmux1.9, 33 for tmux1.8)
12331 my $tmux_len = ::max(@out);
12332 $len = ::min($len,int($tmux_len/4-33));
12333 ::debug("tmux","tmux-length ",$len);
12335 return $len;
12339 package RecordQueue;
12341 sub new($) {
12342 my $class = shift;
12343 my $fhs = shift;
12344 my $colsep = shift;
12345 my @unget = ();
12346 my $arg_sub_queue;
12347 if($opt::sqlworker) {
12348 # Open SQL table
12349 $arg_sub_queue = SQLRecordQueue->new();
12350 } elsif(defined $colsep) {
12351 # Open one file with colsep or CSV
12352 $arg_sub_queue = RecordColQueue->new($fhs);
12353 } else {
12354 # Open one or more files if multiple -a
12355 $arg_sub_queue = MultifileQueue->new($fhs);
12357 return bless {
12358 'unget' => \@unget,
12359 'arg_number' => 0,
12360 'arg_sub_queue' => $arg_sub_queue,
12361 }, ref($class) || $class;
12364 sub get($) {
12365 # Returns:
12366 # reference to array of Arg-objects
12367 my $self = shift;
12368 if(@{$self->{'unget'}}) {
12369 $self->{'arg_number'}++;
12370 # Flush cached computed replacements in Arg-objects
12371 # To fix: parallel --bar echo {%} ::: a b c ::: d e f
12372 my $ret = shift @{$self->{'unget'}};
12373 if($ret) {
12374 map { $_->flush_cache() } @$ret;
12376 return $ret;
12378 my $ret = $self->{'arg_sub_queue'}->get();
12379 if($ret) {
12380 if(grep { index($_->orig(),"\0") > 0 } @$ret) {
12381 # Allow for \0 in position 0 because GNU Parallel uses "\0noarg"
12382 # to mean no-string
12383 ::warning("A NUL character in the input was replaced with \\0.",
12384 "NUL cannot be passed through in the argument list.",
12385 "Did you mean to use the --null option?");
12386 for(grep { index($_->orig(),"\0") > 0 } @$ret) {
12387 # Replace \0 with \\0
12388 my $a = $_->orig();
12389 $a =~ s/\0/\\0/g;
12390 $_->set_orig($a);
12393 if(defined $Global::max_number_of_args
12394 and $Global::max_number_of_args == 0) {
12395 ::debug("run", "Read 1 but return 0 args\n");
12396 # \0noarg => nothing (not the empty string)
12397 map { $_->set_orig("\0noarg"); } @$ret;
12399 # Flush cached computed replacements in Arg-objects
12400 # To fix: parallel --bar echo {%} ::: a b c ::: d e f
12401 map { $_->flush_cache() } @$ret;
12403 return $ret;
12406 sub unget($) {
12407 my $self = shift;
12408 ::debug("run", "RecordQueue-unget\n");
12409 $self->{'arg_number'} -= @_;
12410 unshift @{$self->{'unget'}}, @_;
12413 sub empty($) {
12414 my $self = shift;
12415 my $empty = (not @{$self->{'unget'}}) &&
12416 $self->{'arg_sub_queue'}->empty();
12417 ::debug("run", "RecordQueue->empty $empty");
12418 return $empty;
12421 sub flush_cache($) {
12422 my $self = shift;
12423 for my $record (@{$self->{'unget'}}) {
12424 for my $arg (@$record) {
12425 $arg->flush_cache();
12428 $self->{'arg_sub_queue'}->flush_cache();
12431 sub arg_number($) {
12432 my $self = shift;
12433 return $self->{'arg_number'};
12437 package RecordColQueue;
12439 sub new($) {
12440 my $class = shift;
12441 my $fhs = shift;
12442 my @unget = ();
12443 my $arg_sub_queue = MultifileQueue->new($fhs);
12444 return bless {
12445 'unget' => \@unget,
12446 'arg_sub_queue' => $arg_sub_queue,
12447 }, ref($class) || $class;
12450 sub get($) {
12451 # Returns:
12452 # reference to array of Arg-objects
12453 my $self = shift;
12454 if(@{$self->{'unget'}}) {
12455 return shift @{$self->{'unget'}};
12457 if($self->{'arg_sub_queue'}->empty()) {
12458 return undef;
12460 my $in_record = $self->{'arg_sub_queue'}->get();
12461 if(defined $in_record) {
12462 my @out_record = ();
12463 for my $arg (@$in_record) {
12464 ::debug("run", "RecordColQueue::arg $arg\n");
12465 my $line = $arg->orig();
12466 ::debug("run", "line='$line'\n");
12467 if($line ne "") {
12468 if($opt::csv) {
12469 # Parse CSV and put it into a record
12470 chomp $line;
12471 if(not $Global::csv->parse($line)) {
12472 die "CSV has unexpected format: ^$line^";
12474 for($Global::csv->fields()) {
12475 push @out_record, Arg->new($_);
12477 } else {
12478 # Split --colsep into record
12479 for my $s (split /$opt::colsep/o, $line, -1) {
12480 push @out_record, Arg->new($s);
12483 } else {
12484 push @out_record, Arg->new("");
12487 return \@out_record;
12488 } else {
12489 return undef;
12493 sub unget($) {
12494 my $self = shift;
12495 ::debug("run", "RecordColQueue-unget '@_'\n");
12496 unshift @{$self->{'unget'}}, @_;
12499 sub empty($) {
12500 my $self = shift;
12501 my $empty = (not @{$self->{'unget'}}) &&
12502 $self->{'arg_sub_queue'}->empty();
12503 ::debug("run", "RecordColQueue->empty $empty");
12504 return $empty;
12507 sub flush_cache($) {
12508 my $self = shift;
12509 for my $arg (@{$self->{'unget'}}) {
12510 $arg->flush_cache();
12512 $self->{'arg_sub_queue'}->flush_cache();
12516 package SQLRecordQueue;
12518 sub new($) {
12519 my $class = shift;
12520 my @unget = ();
12521 return bless {
12522 'unget' => \@unget,
12523 }, ref($class) || $class;
12526 sub get($) {
12527 # Returns:
12528 # reference to array of Arg-objects
12529 my $self = shift;
12530 if(@{$self->{'unget'}}) {
12531 return shift @{$self->{'unget'}};
12533 return $Global::sql->get_record();
12536 sub unget($) {
12537 my $self = shift;
12538 ::debug("run", "SQLRecordQueue-unget '@_'\n");
12539 unshift @{$self->{'unget'}}, @_;
12542 sub empty($) {
12543 my $self = shift;
12544 if(@{$self->{'unget'}}) { return 0; }
12545 my $get = $self->get();
12546 if(defined $get) {
12547 $self->unget($get);
12549 my $empty = not $get;
12550 ::debug("run", "SQLRecordQueue->empty $empty");
12551 return $empty;
12554 sub flush_cache($) {
12555 my $self = shift;
12556 for my $record (@{$self->{'unget'}}) {
12557 for my $arg (@$record) {
12558 $arg->flush_cache();
12564 package MultifileQueue;
12566 @Global::unget_argv=();
12568 sub new($$) {
12569 my $class = shift;
12570 my $fhs = shift;
12571 for my $fh (@$fhs) {
12572 if(-t $fh and -t ($Global::status_fd || *STDERR)) {
12573 ::warning(
12574 "Input is read from the terminal. You are either an expert",
12575 "(in which case: YOU ARE AWESOME!) or maybe you forgot",
12576 "::: or :::: or -a or to pipe data into parallel. If so",
12577 "consider going through the tutorial: man parallel_tutorial",
12578 "Press CTRL-D to exit.");
12581 return bless {
12582 'unget' => \@Global::unget_argv,
12583 'fhs' => $fhs,
12584 'arg_matrix' => undef,
12585 }, ref($class) || $class;
12588 sub get($) {
12589 my $self = shift;
12590 if($opt::link) {
12591 return $self->link_get();
12592 } else {
12593 return $self->nest_get();
12597 sub unget($) {
12598 my $self = shift;
12599 ::debug("run", "MultifileQueue-unget '@_'\n");
12600 unshift @{$self->{'unget'}}, @_;
12603 sub empty($) {
12604 my $self = shift;
12605 my $empty = (not @Global::unget_argv) &&
12606 not @{$self->{'unget'}};
12607 for my $fh (@{$self->{'fhs'}}) {
12608 $empty &&= eof($fh);
12610 ::debug("run", "MultifileQueue->empty $empty ");
12611 return $empty;
12614 sub flush_cache($) {
12615 my $self = shift;
12616 for my $record (@{$self->{'unget'}}, @{$self->{'arg_matrix'}}) {
12617 for my $arg (@$record) {
12618 $arg->flush_cache();
12623 sub link_get($) {
12624 my $self = shift;
12625 if(@{$self->{'unget'}}) {
12626 return shift @{$self->{'unget'}};
12628 my @record = ();
12629 my $prepend;
12630 my $empty = 1;
12631 for my $i (0..$#{$self->{'fhs'}}) {
12632 my $fh = $self->{'fhs'}[$i];
12633 my $arg = read_arg_from_fh($fh);
12634 if(defined $arg) {
12635 # Record $arg for recycling at end of file
12636 push @{$self->{'arg_matrix'}[$i]}, $arg;
12637 push @record, $arg;
12638 $empty = 0;
12639 } else {
12640 ::debug("run", "EOA ");
12641 # End of file: Recycle arguments
12642 push @{$self->{'arg_matrix'}[$i]}, shift @{$self->{'arg_matrix'}[$i]};
12643 # return last @{$args->{'args'}{$fh}};
12644 push @record, @{$self->{'arg_matrix'}[$i]}[-1];
12647 if($empty) {
12648 return undef;
12649 } else {
12650 return \@record;
12654 sub nest_get($) {
12655 my $self = shift;
12656 if(@{$self->{'unget'}}) {
12657 return shift @{$self->{'unget'}};
12659 my @record = ();
12660 my $prepend;
12661 my $empty = 1;
12662 my $no_of_inputsources = $#{$self->{'fhs'}} + 1;
12663 if(not $self->{'arg_matrix'}) {
12664 # Initialize @arg_matrix with one arg from each file
12665 # read one line from each file
12666 my @first_arg_set;
12667 my $all_empty = 1;
12668 for (my $fhno = 0; $fhno < $no_of_inputsources ; $fhno++) {
12669 my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]);
12670 if(defined $arg) {
12671 $all_empty = 0;
12673 $self->{'arg_matrix'}[$fhno][0] = $arg || Arg->new("");
12674 push @first_arg_set, $self->{'arg_matrix'}[$fhno][0];
12676 if($all_empty) {
12677 # All filehandles were at eof or eof-string
12678 return undef;
12680 return [@first_arg_set];
12683 # Treat the case with one input source special. For multiple
12684 # input sources we need to remember all previously read values to
12685 # generate all combinations. But for one input source we can
12686 # forget the value after first use.
12687 if($no_of_inputsources == 1) {
12688 my $arg = read_arg_from_fh($self->{'fhs'}[0]);
12689 if(defined($arg)) {
12690 return [$arg];
12692 return undef;
12694 for (my $fhno = $no_of_inputsources - 1; $fhno >= 0; $fhno--) {
12695 if(eof($self->{'fhs'}[$fhno])) {
12696 next;
12697 } else {
12698 # read one
12699 my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]);
12700 defined($arg) || next; # If we just read an EOF string: Treat this as EOF
12701 my $len = $#{$self->{'arg_matrix'}[$fhno]} + 1;
12702 $self->{'arg_matrix'}[$fhno][$len] = $arg;
12703 # make all new combinations
12704 my @combarg = ();
12705 for (my $fhn = 0; $fhn < $no_of_inputsources; $fhn++) {
12706 push(@combarg, [0, $#{$self->{'arg_matrix'}[$fhn]}],
12707 # Is input source --link'ed to the next?
12708 $opt::linkinputsource[$fhn+1]);
12710 # Find only combinations with this new entry
12711 $combarg[2*$fhno] = [$len,$len];
12712 # map combinations
12713 # [ 1, 3, 7 ], [ 2, 4, 1 ]
12714 # =>
12715 # [ m[0][1], m[1][3], m[2][7] ], [ m[0][2], m[1][4], m[2][1] ]
12716 my @mapped;
12717 for my $c (expand_combinations(@combarg)) {
12718 my @a;
12719 for my $n (0 .. $no_of_inputsources - 1 ) {
12720 push @a, $self->{'arg_matrix'}[$n][$$c[$n]];
12722 push @mapped, \@a;
12724 # append the mapped to the ungotten arguments
12725 push @{$self->{'unget'}}, @mapped;
12726 # get the first
12727 if(@mapped) {
12728 return shift @{$self->{'unget'}};
12732 # all are eof or at EOF string; return from the unget queue
12733 return shift @{$self->{'unget'}};
12737 my $cr_count = 0;
12738 my $nl_count = 0;
12739 my $dos_crnl_determined;
12740 sub read_arg_from_fh($) {
12741 # Read one Arg from filehandle
12742 # Returns:
12743 # Arg-object with one read line
12744 # undef if end of file
12745 my $fh = shift;
12746 my $prepend;
12747 my $arg;
12748 my $half_record = 0;
12749 do {{
12750 # This makes 10% faster
12751 if(not defined ($arg = <$fh>)) {
12752 if(defined $prepend) {
12753 return Arg->new($prepend);
12754 } else {
12755 return undef;
12758 if(not $dos_crnl_determined and not $opt::d) {
12759 # Warn if input has CR-NL and -d is not set
12760 if($arg =~ /\r$/) {
12761 $cr_count++;
12762 } else {
12763 $nl_count++;
12765 if($cr_count == 3 or $nl_count == 3) {
12766 $dos_crnl_determined = 1;
12767 if($nl_count == 0 and $cr_count == 3) {
12768 ::warning('The first three values end in CR-NL. '.
12769 'Consider using -d "\r\n"');
12773 if($opt::csv) {
12774 # We need to read a full CSV line.
12775 if(($arg =~ y/"/"/) % 2 ) {
12776 # The number of " on the line is uneven:
12777 # If we were in a half_record => we have a full record now
12778 # If we were outside a half_record =>
12779 # we are in a half record now
12780 $half_record = not $half_record;
12782 if($half_record) {
12783 # CSV half-record with quoting:
12784 # col1,"col2 2""x3"" board newline <-this one
12785 # cont",col3
12786 $prepend .= $arg;
12787 redo;
12788 } else {
12789 # Now we have a full CSV record
12792 # Remove delimiter
12793 chomp $arg;
12794 if($Global::end_of_file_string and
12795 $arg eq $Global::end_of_file_string) {
12796 # Ignore the rest of input file
12797 close $fh;
12798 ::debug("run", "EOF-string ($arg) met\n");
12799 if(defined $prepend) {
12800 return Arg->new($prepend);
12801 } else {
12802 return undef;
12805 if(defined $prepend) {
12806 $arg = $prepend.$arg; # For line continuation
12807 undef $prepend;
12809 if($Global::ignore_empty) {
12810 if($arg =~ /^\s*$/) {
12811 redo; # Try the next line
12814 if($Global::max_lines) {
12815 if($arg =~ /\s$/) {
12816 # Trailing space => continued on next line
12817 $prepend = $arg;
12818 redo;
12821 }} while (1 == 0); # Dummy loop {{}} for redo
12822 if(defined $arg) {
12823 return Arg->new($arg);
12824 } else {
12825 ::die_bug("multiread arg undefined");
12830 # Prototype forwarding
12831 sub expand_combinations(@);
12832 sub expand_combinations(@) {
12833 # Input:
12834 # ([xmin,xmax], [ymin,ymax], ...)
12835 # Returns: ([x,y,...],[x,y,...])
12836 # where xmin <= x <= xmax and ymin <= y <= ymax
12837 my $minmax_ref = shift;
12838 my $link = shift; # This is linked to the next input source
12839 my $xmin = $$minmax_ref[0];
12840 my $xmax = $$minmax_ref[1];
12841 my @p;
12842 if(@_) {
12843 my @rest = expand_combinations(@_);
12844 if($link) {
12845 # Linked to next col with --link/:::+/::::+
12846 # TODO BUG does not wrap values if not same number of vals
12847 push(@p, map { [$$_[0], @$_] }
12848 grep { $xmin <= $$_[0] and $$_[0] <= $xmax } @rest);
12849 } else {
12850 # If there are more columns: Compute those recursively
12851 for(my $x = $xmin; $x <= $xmax; $x++) {
12852 push @p, map { [$x, @$_] } @rest;
12855 } else {
12856 for(my $x = $xmin; $x <= $xmax; $x++) {
12857 push @p, [$x];
12860 return @p;
12864 package Arg;
12866 sub new($) {
12867 my $class = shift;
12868 my $orig = shift;
12869 my @hostgroups;
12870 if($opt::hostgroups) {
12871 if($orig =~ s:@(.+)::) {
12872 # We found hostgroups on the arg
12873 @hostgroups = split(/\+/, $1);
12874 if(not grep { defined $Global::hostgroups{$_} } @hostgroups) {
12875 # This hostgroup is not defined using -S
12876 # Add it
12877 ::warning("Adding hostgroups: @hostgroups");
12878 # Add sshlogin
12879 for(grep { not defined $Global::hostgroups{$_} } @hostgroups) {
12880 my $sshlogin = SSHLogin->new($_);
12881 my $sshlogin_string = $sshlogin->string();
12882 $Global::host{$sshlogin_string} = $sshlogin;
12883 $Global::hostgroups{$sshlogin_string} = 1;
12886 } else {
12887 # No hostgroup on the arg => any hostgroup
12888 @hostgroups = (keys %Global::hostgroups);
12891 return bless {
12892 'orig' => $orig,
12893 'hostgroups' => \@hostgroups,
12894 }, ref($class) || $class;
12897 sub Q($) {
12898 # Q alias for ::shell_quote_scalar
12899 my $ret = ::Q($_[0]);
12900 no warnings 'redefine';
12901 *Q = \&::Q;
12902 return $ret;
12905 sub pQ($) {
12906 # pQ alias for ::perl_quote_scalar
12907 my $ret = ::pQ($_[0]);
12908 no warnings 'redefine';
12909 *pQ = \&::pQ;
12910 return $ret;
12913 sub hash($) {
12914 $Global::use{"DBI"} ||= eval "use B; 1;";
12915 B::hash(@_);
12918 sub total_jobs() {
12919 return $Global::JobQueue->total_jobs();
12923 my %perleval;
12924 my $job;
12925 sub skip() {
12926 # shorthand for $job->skip();
12927 $job->skip();
12929 sub slot() {
12930 # shorthand for $job->slot();
12931 $job->slot();
12933 sub seq() {
12934 # shorthand for $job->seq();
12935 $job->seq();
12937 sub uq() {
12938 # Do not quote this arg
12939 $Global::unquote_arg = 1;
12941 sub yyyy_mm_dd_hh_mm_ss() {
12942 # ISO8601 2038-01-19T03:14:08
12943 ::strftime("%Y-%m-%dT%H:%M:%S", localtime(time()));
12945 sub yyyy_mm_dd_hh_mm() {
12946 # ISO8601 2038-01-19T03:14
12947 ::strftime("%Y-%m-%dT%H:%M", localtime(time()));
12949 sub yyyy_mm_dd() {
12950 # ISO8601 2038-01-19
12951 ::strftime("%Y-%m-%d", localtime(time()));
12953 sub yyyymmddhhmmss() {
12954 # ISO8601 20380119031408
12955 ::strftime("%Y%m%d%H%M%S", localtime(time()));
12957 sub yyyymmddhhmm() {
12958 # ISO8601 203801190314
12959 ::strftime("%Y%m%d%H%M", localtime(time()));
12961 sub yyyymmdd() {
12962 # ISO8601 20380119
12963 ::strftime("%Y%m%d", localtime(time()));
12966 sub replace($$$$) {
12967 # Calculates the corresponding value for a given perl expression
12968 # Returns:
12969 # The calculated string (quoted if asked for)
12970 my $self = shift;
12971 my $perlexpr = shift; # E.g. $_=$_ or s/.gz//
12972 my $quote = shift; # should the string be quoted?
12973 # This is actually a CommandLine-object,
12974 # but it looks nice to be able to say {= $job->slot() =}
12975 $job = shift;
12976 # Positional replace treated as normal replace
12977 $perlexpr =~ s/^(-?\d+)? *//;
12978 if(not $Global::cache_replacement_eval
12980 not $self->{'cache'}{$perlexpr}) {
12981 # Only compute the value once
12982 # Use $_ as the variable to change
12983 local $_;
12984 if($Global::trim eq "n") {
12985 $_ = $self->{'orig'};
12986 } else {
12987 # Trim the input
12988 $_ = trim_of($self->{'orig'});
12990 ::debug("replace", "eval ", $perlexpr, " ", $_, "\n");
12991 if(not $perleval{$perlexpr}) {
12992 # Make an anonymous function of the $perlexpr
12993 # And more importantly: Compile it only once
12994 if($perleval{$perlexpr} =
12995 eval('sub { no strict; no warnings; my $job = shift; '.
12996 $perlexpr.' }')) {
12997 # All is good
12998 } else {
12999 # The eval failed. Maybe $perlexpr is invalid perl?
13000 ::error("Cannot use $perlexpr: $@");
13001 ::wait_and_exit(255);
13004 # Execute the function
13005 $perleval{$perlexpr}->($job);
13006 $self->{'cache'}{$perlexpr} = $_;
13007 if($Global::unquote_arg) {
13008 # uq() was called in perlexpr
13009 $self->{'cache'}{'unquote'}{$perlexpr} = 1;
13010 # Reset for next perlexpr
13011 $Global::unquote_arg = 0;
13014 # Return the value quoted if needed
13015 if($self->{'cache'}{'unquote'}{$perlexpr}) {
13016 return($self->{'cache'}{$perlexpr});
13017 } else {
13018 return($quote ? Q($self->{'cache'}{$perlexpr})
13019 : $self->{'cache'}{$perlexpr});
13024 sub flush_cache($) {
13025 # Flush cache of computed values
13026 my $self = shift;
13027 $self->{'cache'} = undef;
13030 sub orig($) {
13031 my $self = shift;
13032 return $self->{'orig'};
13035 sub set_orig($$) {
13036 my $self = shift;
13037 $self->{'orig'} = shift;
13040 sub trim_of($) {
13041 # Removes white space as specifed by --trim:
13042 # n = nothing
13043 # l = start
13044 # r = end
13045 # lr|rl = both
13046 # Returns:
13047 # string with white space removed as needed
13048 my @strings = map { defined $_ ? $_ : "" } (@_);
13049 my $arg;
13050 if($Global::trim eq "n") {
13051 # skip
13052 } elsif($Global::trim eq "l") {
13053 for my $arg (@strings) { $arg =~ s/^\s+//; }
13054 } elsif($Global::trim eq "r") {
13055 for my $arg (@strings) { $arg =~ s/\s+$//; }
13056 } elsif($Global::trim eq "rl" or $Global::trim eq "lr") {
13057 for my $arg (@strings) { $arg =~ s/^\s+//; $arg =~ s/\s+$//; }
13058 } else {
13059 ::error("--trim must be one of: r l rl lr.");
13060 ::wait_and_exit(255);
13062 return wantarray ? @strings : "@strings";
13066 package TimeoutQueue;
13068 sub new($) {
13069 my $class = shift;
13070 my $delta_time = shift;
13071 my ($pct);
13072 if($delta_time =~ /(\d+(\.\d+)?)%/) {
13073 # Timeout in percent
13074 $pct = $1/100;
13075 $delta_time = 1_000_000;
13077 $delta_time = ::multiply_time_units($delta_time);
13079 return bless {
13080 'queue' => [],
13081 'delta_time' => $delta_time,
13082 'pct' => $pct,
13083 'remedian_idx' => 0,
13084 'remedian_arr' => [],
13085 'remedian' => undef,
13086 }, ref($class) || $class;
13089 sub delta_time($) {
13090 my $self = shift;
13091 return $self->{'delta_time'};
13094 sub set_delta_time($$) {
13095 my $self = shift;
13096 $self->{'delta_time'} = shift;
13099 sub remedian($) {
13100 my $self = shift;
13101 return $self->{'remedian'};
13104 sub set_remedian($$) {
13105 # Set median of the last 999^3 (=997002999) values using Remedian
13107 # Rousseeuw, Peter J., and Gilbert W. Bassett Jr. "The remedian: A
13108 # robust averaging method for large data sets." Journal of the
13109 # American Statistical Association 85.409 (1990): 97-104.
13110 my $self = shift;
13111 my $val = shift;
13112 my $i = $self->{'remedian_idx'}++;
13113 my $rref = $self->{'remedian_arr'};
13114 $rref->[0][$i%999] = $val;
13115 $rref->[1][$i/999%999] = (sort @{$rref->[0]})[$#{$rref->[0]}/2];
13116 $rref->[2][$i/999/999%999] = (sort @{$rref->[1]})[$#{$rref->[1]}/2];
13117 $self->{'remedian'} = (sort @{$rref->[2]})[$#{$rref->[2]}/2];
13120 sub update_median_runtime($) {
13121 # Update delta_time based on runtime of finished job if timeout is
13122 # a percentage
13123 my $self = shift;
13124 my $runtime = shift;
13125 if($self->{'pct'}) {
13126 $self->set_remedian($runtime);
13127 $self->{'delta_time'} = $self->{'pct'} * $self->remedian();
13128 ::debug("run", "Timeout: $self->{'delta_time'}s ");
13132 sub process_timeouts($) {
13133 # Check if there was a timeout
13134 my $self = shift;
13135 # $self->{'queue'} is sorted by start time
13136 while (@{$self->{'queue'}}) {
13137 my $job = $self->{'queue'}[0];
13138 if($job->endtime()) {
13139 # Job already finished. No need to timeout the job
13140 # This could be because of --keep-order
13141 shift @{$self->{'queue'}};
13142 } elsif($job->is_timedout($self->{'delta_time'})) {
13143 # Need to shift off queue before kill
13144 # because kill calls usleep that calls process_timeouts
13145 shift @{$self->{'queue'}};
13146 ::warning("This job was killed because it timed out:",
13147 $job->replaced());
13148 $job->kill();
13149 } else {
13150 # Because they are sorted by start time the rest are later
13151 last;
13156 sub insert($) {
13157 my $self = shift;
13158 my $in = shift;
13159 push @{$self->{'queue'}}, $in;
13163 package SQL;
13165 sub new($) {
13166 my $class = shift;
13167 my $dburl = shift;
13168 $Global::use{"DBI"} ||= eval "use DBI; 1;";
13169 # +DBURL = append to this DBURL
13170 my $append = $dburl=~s/^\+//;
13171 my %options = parse_dburl(get_alias($dburl));
13172 my %driveralias = ("sqlite" => "SQLite",
13173 "sqlite3" => "SQLite",
13174 "pg" => "Pg",
13175 "postgres" => "Pg",
13176 "postgresql" => "Pg",
13177 "csv" => "CSV",
13178 "oracle" => "Oracle",
13179 "ora" => "Oracle");
13180 my $driver = $driveralias{$options{'databasedriver'}} ||
13181 $options{'databasedriver'};
13182 my $database = $options{'database'};
13183 my $host = $options{'host'} ? ";host=".$options{'host'} : "";
13184 my $port = $options{'port'} ? ";port=".$options{'port'} : "";
13185 my $dsn = "DBI:$driver:dbname=$database$host$port";
13186 my $userid = $options{'user'};
13187 my $password = $options{'password'};;
13188 if(not grep /$driver/, DBI->available_drivers) {
13189 ::error("$driver not supported. Are you missing a perl DBD::$driver module?");
13190 ::wait_and_exit(255);
13192 my $dbh;
13193 if($driver eq "CSV") {
13194 # CSV does not use normal dsn
13195 if(-d $database) {
13196 $dbh = DBI->connect("dbi:CSV:", "", "", { f_dir => "$database", })
13197 or die $DBI::errstr;
13198 } else {
13199 ::error("$database is not a directory.");
13200 ::wait_and_exit(255);
13202 } else {
13203 $dbh = DBI->connect($dsn, $userid, $password,
13204 { RaiseError => 1, AutoInactiveDestroy => 1 })
13205 or die $DBI::errstr;
13207 $dbh->{'PrintWarn'} = $Global::debug || 0;
13208 $dbh->{'PrintError'} = $Global::debug || 0;
13209 $dbh->{'RaiseError'} = 1;
13210 $dbh->{'ShowErrorStatement'} = 1;
13211 $dbh->{'HandleError'} = sub {};
13212 if(not defined $options{'table'}) {
13213 ::error("The DBURL ($dburl) must contain a table.");
13214 ::wait_and_exit(255);
13217 return bless {
13218 'dbh' => $dbh,
13219 'driver' => $driver,
13220 'max_number_of_args' => undef,
13221 'table' => $options{'table'},
13222 'append' => $append,
13223 }, ref($class) || $class;
13226 # Prototype forwarding
13227 sub get_alias($);
13228 sub get_alias($) {
13229 my $alias = shift;
13230 $alias =~ s/^(sql:)*//; # Accept aliases prepended with sql:
13231 if ($alias !~ /^:/) {
13232 return $alias;
13235 # Find the alias
13236 my $path;
13237 if (-l $0) {
13238 ($path) = readlink($0) =~ m|^(.*)/|;
13239 } else {
13240 ($path) = $0 =~ m|^(.*)/|;
13243 my @deprecated = ("$ENV{HOME}/.dburl.aliases",
13244 "$path/dburl.aliases", "$path/dburl.aliases.dist");
13245 for (@deprecated) {
13246 if(-r $_) {
13247 ::warning("$_ is deprecated. ".
13248 "Use .sql/aliases instead (read man sql).");
13251 my @urlalias=();
13252 check_permissions("$ENV{HOME}/.sql/aliases");
13253 check_permissions("$ENV{HOME}/.dburl.aliases");
13254 my @search = ("$ENV{HOME}/.sql/aliases",
13255 "$ENV{HOME}/.dburl.aliases", "/etc/sql/aliases",
13256 "$path/dburl.aliases", "$path/dburl.aliases.dist");
13257 for my $alias_file (@search) {
13258 # local $/ needed if -0 set
13259 local $/ = "\n";
13260 if(-r $alias_file) {
13261 open(my $in, "<", $alias_file) || die;
13262 push @urlalias, <$in>;
13263 close $in;
13266 my ($alias_part,$rest) = $alias=~/(:\w*)(.*)/;
13267 # If we saw this before: we have an alias loop
13268 if(grep {$_ eq $alias_part } @Private::seen_aliases) {
13269 ::error("$alias_part is a cyclic alias.");
13270 exit -1;
13271 } else {
13272 push @Private::seen_aliases, $alias_part;
13275 my $dburl;
13276 for (@urlalias) {
13277 /^$alias_part\s+(\S+.*)/ and do { $dburl = $1; last; }
13280 if($dburl) {
13281 return get_alias($dburl.$rest);
13282 } else {
13283 ::error("$alias is not defined in @search");
13284 exit(-1);
13288 sub check_permissions($) {
13289 my $file = shift;
13291 if(-e $file) {
13292 if(not -o $file) {
13293 my $username = (getpwuid($<))[0];
13294 ::warning("$file should be owned by $username: ".
13295 "chown $username $file");
13297 my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
13298 $atime,$mtime,$ctime,$blksize,$blocks) = stat($file);
13299 if($mode & 077) {
13300 my $username = (getpwuid($<))[0];
13301 ::warning("$file should be only be readable by $username: ".
13302 "chmod 600 $file");
13307 sub parse_dburl($) {
13308 my $url = shift;
13309 my %options = ();
13310 # sql:mysql://[[user][:password]@][host][:port]/[database[/table][?query]]
13312 if($url=~m!^(?:sql:)? # You can prefix with 'sql:'
13313 ((?:oracle|ora|mysql|pg|postgres|postgresql)(?:s|ssl|)|
13314 (?:sqlite|sqlite2|sqlite3|csv)):// # Databasedriver ($1)
13316 ([^:@/][^:@]*|) # Username ($2)
13318 :([^@]*) # Password ($3)
13321 ([^:/]*)? # Hostname ($4)
13324 ([^/]*)? # Port ($5)
13328 ([^/?]*)? # Database ($6)
13332 ([^?]*)? # Table ($7)
13336 (.*)? # Query ($8)
13338 $!ix) {
13339 $options{databasedriver} = ::undef_if_empty(lc(uri_unescape($1)));
13340 $options{user} = ::undef_if_empty(uri_unescape($2));
13341 $options{password} = ::undef_if_empty(uri_unescape($3));
13342 $options{host} = ::undef_if_empty(uri_unescape($4));
13343 $options{port} = ::undef_if_empty(uri_unescape($5));
13344 $options{database} = ::undef_if_empty(uri_unescape($6));
13345 $options{table} = ::undef_if_empty(uri_unescape($7));
13346 $options{query} = ::undef_if_empty(uri_unescape($8));
13347 ::debug("sql", "dburl $url\n");
13348 ::debug("sql", "databasedriver ", $options{databasedriver},
13349 " user ", $options{user},
13350 " password ", $options{password}, " host ", $options{host},
13351 " port ", $options{port}, " database ", $options{database},
13352 " table ", $options{table}, " query ", $options{query}, "\n");
13353 } else {
13354 ::error("$url is not a valid DBURL");
13355 exit 255;
13357 return %options;
13360 sub uri_unescape($) {
13361 # Copied from http://cpansearch.perl.org/src/GAAS/URI-1.55/URI/Escape.pm
13362 # to avoid depending on URI::Escape
13363 # This section is (C) Gisle Aas.
13364 # Note from RFC1630: "Sequences which start with a percent sign
13365 # but are not followed by two hexadecimal characters are reserved
13366 # for future extension"
13367 my $str = shift;
13368 if (@_ && wantarray) {
13369 # not executed for the common case of a single argument
13370 my @str = ($str, @_); # need to copy
13371 foreach (@str) {
13372 s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
13374 return @str;
13376 $str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str;
13377 $str;
13380 sub run($) {
13381 my $self = shift;
13382 my $stmt = shift;
13383 if($self->{'driver'} eq "CSV") {
13384 $stmt=~ s/;$//;
13385 if($stmt eq "BEGIN" or
13386 $stmt eq "COMMIT") {
13387 return undef;
13390 my @retval;
13391 my $dbh = $self->{'dbh'};
13392 ::debug("sql","$opt::sqlmaster$opt::sqlworker run $stmt\n");
13393 # Execute with the rest of the args - if any
13394 my $rv;
13395 my $sth;
13396 my $lockretry = 0;
13397 while($lockretry < 10) {
13398 $sth = $dbh->prepare($stmt);
13399 if($sth
13401 eval { $rv = $sth->execute(@_) }) {
13402 last;
13403 } else {
13404 if($@ =~ /no such table|Table .* doesn.t exist|relation ".*" does not exist/
13406 $DBI::errstr =~ /no such table|Table .* doesn.t exist|relation ".*" does not exist/) {
13407 # This is fine:
13408 # It is just a worker that reported back too late -
13409 # another worker had finished the job first
13410 # and the table was then dropped
13411 $rv = $sth = 0;
13412 last;
13414 if($DBI::errstr =~ /locked/) {
13415 ::debug("sql", "Lock retry: $lockretry");
13416 $lockretry++;
13417 ::usleep(rand()*300);
13418 } elsif(not $sth) {
13419 # Try again
13420 $lockretry++;
13421 } else {
13422 ::error($DBI::errstr);
13423 ::wait_and_exit(255);
13427 if($lockretry >= 10) {
13428 ::die_bug("retry > 10: $DBI::errstr");
13430 if($rv < 0 and $DBI::errstr){
13431 ::error($DBI::errstr);
13432 ::wait_and_exit(255);
13434 return $sth;
13437 sub get($) {
13438 my $self = shift;
13439 my $sth = $self->run(@_);
13440 my @retval;
13441 # If $sth = 0 it means the table was dropped by another process
13442 while($sth) {
13443 my @row = $sth->fetchrow_array();
13444 @row or last;
13445 push @retval, \@row;
13447 return \@retval;
13450 sub table($) {
13451 my $self = shift;
13452 return $self->{'table'};
13455 sub append($) {
13456 my $self = shift;
13457 return $self->{'append'};
13460 sub update($) {
13461 my $self = shift;
13462 my $stmt = shift;
13463 my $table = $self->table();
13464 $self->run("UPDATE $table $stmt",@_);
13467 sub output($) {
13468 my $self = shift;
13469 my $commandline = shift;
13471 $self->update("SET Stdout = ?, Stderr = ? WHERE Seq = ".
13472 $commandline->seq(),
13473 join("",@{$commandline->{'output'}{1}}),
13474 join("",@{$commandline->{'output'}{2}}));
13477 sub max_number_of_args($) {
13478 # Maximal number of args for this table
13479 my $self = shift;
13480 if(not $self->{'max_number_of_args'}) {
13481 # Read the number of args from the SQL table
13482 my $table = $self->table();
13483 my $v = $self->get("SELECT * FROM $table LIMIT 1;");
13484 my @reserved_columns = qw(Seq Host Starttime JobRuntime Send
13485 Receive Exitval _Signal Command Stdout Stderr);
13486 if(not $v) {
13487 ::error("$table contains no records");
13489 # Count the number of Vx columns
13490 $self->{'max_number_of_args'} = $#{$v->[0]} - $#reserved_columns;
13492 return $self->{'max_number_of_args'};
13495 sub set_max_number_of_args($$) {
13496 my $self = shift;
13497 $self->{'max_number_of_args'} = shift;
13500 sub create_table($) {
13501 my $self = shift;
13502 if($self->append()) { return; }
13503 my $max_number_of_args = shift;
13504 $self->set_max_number_of_args($max_number_of_args);
13505 my $table = $self->table();
13506 $self->run(qq(DROP TABLE IF EXISTS $table;));
13507 # BIGINT and TEXT are not supported in these databases or are too small
13508 my %vartype = (
13509 "Oracle" => { "BIGINT" => "NUMBER(19,0)",
13510 "TEXT" => "CLOB", },
13511 "mysql" => { "TEXT" => "BLOB", },
13512 "CSV" => { "BIGINT" => "INT",
13513 "FLOAT" => "REAL", },
13515 my $BIGINT = $vartype{$self->{'driver'}}{"BIGINT"} || "BIGINT";
13516 my $TEXT = $vartype{$self->{'driver'}}{"TEXT"} || "TEXT";
13517 my $FLOAT = $vartype{$self->{'driver'}}{"FLOAT"} || "FLOAT(44)";
13518 my $v_def = join "", map { "V$_ $TEXT," } (1..$self->max_number_of_args());
13519 $self->run(qq{CREATE TABLE $table
13520 (Seq $BIGINT,
13521 Host $TEXT,
13522 Starttime $FLOAT,
13523 JobRuntime $FLOAT,
13524 Send $BIGINT,
13525 Receive $BIGINT,
13526 Exitval $BIGINT,
13527 _Signal $BIGINT,
13528 Command $TEXT,}.
13529 $v_def.
13530 qq{Stdout $TEXT,
13531 Stderr $TEXT);});
13534 sub insert_records($) {
13535 my $self = shift;
13536 my $seq = shift;
13537 my $command_ref = shift;
13538 my $record_ref = shift;
13539 my $table = $self->table();
13540 # For SQL encode the command with \257 space as split points
13541 my $command = join("\257 ",@$command_ref);
13542 my @v_cols = map { ", V$_" } (1..$self->max_number_of_args());
13543 # Two extra value due to $seq, Exitval, Send
13544 my $v_vals = join ",", map { "?" } (1..$self->max_number_of_args()+4);
13545 $self->run("INSERT INTO $table (Seq,Command,Exitval,Send @v_cols) ".
13546 "VALUES ($v_vals);", $seq, $command, -1000,
13547 0, @$record_ref[1..$#$record_ref]);
13551 sub get_record($) {
13552 my $self = shift;
13553 my @retval;
13554 my $table = $self->table();
13555 my @v_cols = map { ", V$_" } (1..$self->max_number_of_args());
13556 my $rand = "Reserved-".$$.rand();
13557 my $v;
13558 my $more_pending;
13560 do {
13561 if($self->{'driver'} eq "CSV") {
13562 # Sub SELECT is not supported in CSV
13563 # So to minimize the race condition below select a job at random
13564 my $r = $self->get("SELECT Seq, Command @v_cols FROM $table ".
13565 "WHERE Exitval = -1000 LIMIT 100;");
13566 $v = [ sort { rand() > 0.5 } @$r ];
13567 } else {
13568 # Avoid race condition where multiple workers get the same job
13569 # by setting Stdout to a unique string
13570 # (SELECT * FROM (...) AS dummy) is needed due to sillyness in MySQL
13571 $self->update("SET Stdout = ?,Exitval = ? ".
13572 "WHERE Seq = (".
13573 " SELECT * FROM (".
13574 " SELECT min(Seq) FROM $table WHERE Exitval = -1000".
13575 " ) AS dummy".
13576 ") AND Exitval = -1000;", $rand, -1210);
13577 # If a parallel worker overwrote the unique string this will get nothing
13578 $v = $self->get("SELECT Seq, Command @v_cols FROM $table ".
13579 "WHERE Stdout = ?;", $rand);
13581 if($v->[0]) {
13582 my $val_ref = $v->[0];
13583 # Mark record as taken
13584 my $seq = shift @$val_ref;
13585 # Save the sequence number to use when running the job
13586 $SQL::next_seq = $seq;
13587 $self->update("SET Exitval = ? WHERE Seq = ".$seq, -1220);
13588 # Command is encoded with '\257 space' as splitting char
13589 my @command = split /\257 /, shift @$val_ref;
13590 $SQL::command_ref = \@command;
13591 for (@$val_ref) {
13592 push @retval, Arg->new($_);
13594 } else {
13595 # If the record was updated by another job in parallel,
13596 # then we may not be done, so see if there are more jobs pending
13597 $more_pending =
13598 $self->get("SELECT Seq FROM $table WHERE Exitval = ?;", -1210);
13600 } while (not $v->[0] and $more_pending->[0]);
13602 if(@retval) {
13603 return \@retval;
13604 } else {
13605 return undef;
13609 sub total_jobs($) {
13610 my $self = shift;
13611 my $table = $self->table();
13612 my $v = $self->get("SELECT count(*) FROM $table;");
13613 if($v->[0]) {
13614 return $v->[0]->[0];
13615 } else {
13616 ::die_bug("SQL::total_jobs");
13620 sub max_seq($) {
13621 my $self = shift;
13622 my $table = $self->table();
13623 my $v = $self->get("SELECT max(Seq) FROM $table;");
13624 if($v->[0]) {
13625 return $v->[0]->[0];
13626 } else {
13627 ::die_bug("SQL::max_seq");
13631 sub finished($) {
13632 # Check if there are any jobs left in the SQL table that do not
13633 # have a "real" exitval
13634 my $self = shift;
13635 if($opt::wait or $Global::start_sqlworker) {
13636 my $table = $self->table();
13637 my $rv = $self->get("select Seq,Exitval from $table ".
13638 "where Exitval <= -1000 limit 1");
13639 return not $rv->[0];
13640 } else {
13641 return 1;
13645 package Semaphore;
13647 # This package provides a counting semaphore
13649 # If a process dies without releasing the semaphore the next process
13650 # that needs that entry will clean up dead semaphores
13652 # The semaphores are stored in $PARALLEL_HOME/semaphores/id-<name> Each
13653 # file in $PARALLEL_HOME/semaphores/id-<name>/ is the process ID of the
13654 # process holding the entry. If the process dies, the entry can be
13655 # taken by another process.
13657 sub new($) {
13658 my $class = shift;
13659 my $id = shift;
13660 my $count = shift;
13661 $id =~ s/([^-_a-z0-9])/unpack("H*",$1)/ige; # Convert non-word chars to hex
13662 $id = "id-".$id; # To distinguish it from a process id
13663 my $parallel_locks = $Global::cache_dir . "/semaphores";
13664 -d $parallel_locks or ::mkdir_or_die($parallel_locks);
13665 my $lockdir = "$parallel_locks/$id";
13666 my $lockfile = $lockdir.".lock";
13667 if(-d $parallel_locks and -w $parallel_locks
13668 and -r $parallel_locks and -x $parallel_locks) {
13669 # skip
13670 } else {
13671 ::error("Semaphoredir must be writable: '$parallel_locks'");
13672 ::wait_and_exit(255);
13675 if($count < 1) { ::die_bug("semaphore-count: $count"); }
13676 return bless {
13677 'lockfile' => $lockfile,
13678 'lockfh' => Symbol::gensym(),
13679 'lockdir' => $lockdir,
13680 'id' => $id,
13681 'idfile' => $lockdir."/".$id,
13682 'pid' => $$,
13683 'pidfile' => $lockdir."/".$$.'@'.::hostname(),
13684 'count' => $count + 1 # nlinks returns a link for the 'id-' as well
13685 }, ref($class) || $class;
13688 sub remove_dead_locks($) {
13689 my $self = shift;
13690 my $lockdir = $self->{'lockdir'};
13692 for my $d (glob "$lockdir/*") {
13693 $d =~ m:$lockdir/([0-9]+)\@([-\._a-z0-9]+)$:o or next;
13694 my ($pid, $host) = ($1, $2);
13695 if($host eq ::hostname()) {
13696 if(kill 0, $pid) {
13697 ::debug("sem", "Alive: $pid $d\n");
13698 } else {
13699 ::debug("sem", "Dead: $d\n");
13700 ::rm($d);
13706 sub acquire($) {
13707 my $self = shift;
13708 my $sleep = 1; # 1 ms
13709 my $start_time = time;
13710 while(1) {
13711 # Can we get a lock?
13712 $self->atomic_link_if_count_less_than() and last;
13713 $self->remove_dead_locks();
13714 # Retry slower and slower up to 1 second
13715 $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep);
13716 # Random to avoid every sleeping job waking up at the same time
13717 ::usleep(rand()*$sleep);
13718 if($opt::semaphoretimeout) {
13719 if($opt::semaphoretimeout > 0
13721 time - $start_time > $opt::semaphoretimeout) {
13722 # Timeout: Take the semaphore anyway
13723 ::warning("Semaphore timed out. Stealing the semaphore.");
13724 if(not -e $self->{'idfile'}) {
13725 open (my $fh, ">", $self->{'idfile'}) or
13726 ::die_bug("timeout_write_idfile: $self->{'idfile'}");
13727 close $fh;
13729 link $self->{'idfile'}, $self->{'pidfile'};
13730 last;
13732 if($opt::semaphoretimeout < 0
13734 time - $start_time > -$opt::semaphoretimeout) {
13735 # Timeout: Exit
13736 ::warning("Semaphore timed out. Exiting.");
13737 exit(1);
13738 last;
13742 ::debug("sem", "acquired $self->{'pid'}\n");
13745 sub release($) {
13746 my $self = shift;
13747 ::rm($self->{'pidfile'});
13748 if($self->nlinks() == 1) {
13749 # This is the last link, so atomic cleanup
13750 $self->lock();
13751 if($self->nlinks() == 1) {
13752 ::rm($self->{'idfile'});
13753 rmdir $self->{'lockdir'};
13755 $self->unlock();
13757 ::debug("run", "released $self->{'pid'}\n");
13760 sub pid_change($) {
13761 # This should do what release()+acquire() would do without having
13762 # to re-acquire the semaphore
13763 my $self = shift;
13765 my $old_pidfile = $self->{'pidfile'};
13766 $self->{'pid'} = $$;
13767 $self->{'pidfile'} = $self->{'lockdir'}."/".$$.'@'.::hostname();
13768 my $retval = link $self->{'idfile'}, $self->{'pidfile'};
13769 ::debug("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n");
13770 ::rm($old_pidfile);
13773 sub atomic_link_if_count_less_than($) {
13774 # Link $file1 to $file2 if nlinks to $file1 < $count
13775 my $self = shift;
13776 my $retval = 0;
13777 $self->lock();
13778 my $nlinks = $self->nlinks();
13779 ::debug("sem","$nlinks<$self->{'count'} ");
13780 if($nlinks < $self->{'count'}) {
13781 -d $self->{'lockdir'} or ::mkdir_or_die($self->{'lockdir'});
13782 if(not -e $self->{'idfile'}) {
13783 open (my $fh, ">", $self->{'idfile'}) or
13784 ::die_bug("write_idfile: $self->{'idfile'}");
13785 close $fh;
13787 $retval = link $self->{'idfile'}, $self->{'pidfile'};
13788 ::debug("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n");
13790 $self->unlock();
13791 ::debug("sem", "atomic $retval");
13792 return $retval;
13795 sub nlinks($) {
13796 my $self = shift;
13797 if(-e $self->{'idfile'}) {
13798 return (stat(_))[3];
13799 } else {
13800 return 0;
13804 sub lock($) {
13805 my $self = shift;
13806 my $sleep = 100; # 100 ms
13807 my $total_sleep = 0;
13808 $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
13809 my $locked = 0;
13810 while(not $locked) {
13811 if(tell($self->{'lockfh'}) == -1) {
13812 # File not open
13813 open($self->{'lockfh'}, ">", $self->{'lockfile'})
13814 or ::debug("run", "Cannot open $self->{'lockfile'}");
13816 if($self->{'lockfh'}) {
13817 # File is open
13818 chmod 0666, $self->{'lockfile'}; # assuming you want it a+rw
13819 if(flock($self->{'lockfh'}, LOCK_EX()|LOCK_NB())) {
13820 # The file is locked: No need to retry
13821 $locked = 1;
13822 last;
13823 } else {
13824 if ($! =~ m/Function not implemented/) {
13825 ::warning("flock: $!",
13826 "Will wait for a random while.");
13827 ::usleep(rand(5000));
13828 # File cannot be locked: No need to retry
13829 $locked = 2;
13830 last;
13834 # Locking failed in first round
13835 # Sleep and try again
13836 $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep);
13837 # Random to avoid every sleeping job waking up at the same time
13838 ::usleep(rand()*$sleep);
13839 $total_sleep += $sleep;
13840 if($opt::semaphoretimeout) {
13841 if($opt::semaphoretimeout > 0
13843 $total_sleep/1000 > $opt::semaphoretimeout) {
13844 # Timeout: Take the semaphore anyway
13845 ::warning("Semaphore timed out. Taking the semaphore.");
13846 $locked = 3;
13847 last;
13849 if($opt::semaphoretimeout < 0
13851 $total_sleep/1000 > -$opt::semaphoretimeout) {
13852 # Timeout: Exit
13853 ::warning("Semaphore timed out. Exiting.");
13854 $locked = 4;
13855 last;
13857 } else {
13858 if($total_sleep/1000 > 30) {
13859 ::warning("Semaphore stuck for 30 seconds. ".
13860 "Consider using --semaphoretimeout.");
13864 ::debug("run", "locked $self->{'lockfile'}");
13867 sub unlock($) {
13868 my $self = shift;
13869 ::rm($self->{'lockfile'});
13870 close $self->{'lockfh'};
13871 ::debug("run", "unlocked\n");
13874 # Keep perl -w happy
13876 $opt::x = $Semaphore::timeout = $Semaphore::wait =
13877 $Job::file_descriptor_warning_printed = $Global::envdef = @Arg::arg =
13878 $Global::max_slot_number = $opt::session;
13880 package main;
13882 sub main() {
13883 save_stdin_stdout_stderr();
13884 save_original_signal_handler();
13885 parse_options();
13886 ::debug("init", "Open file descriptors: ", join(" ",keys %Global::fh), "\n");
13887 my $number_of_args;
13888 if($Global::max_number_of_args) {
13889 $number_of_args = $Global::max_number_of_args;
13890 } elsif ($opt::X or $opt::m or $opt::xargs) {
13891 $number_of_args = undef;
13892 } else {
13893 $number_of_args = 1;
13896 my @command = @ARGV;
13897 my @input_source_fh;
13898 if($opt::pipepart) {
13899 if($opt::tee) {
13900 @input_source_fh = map { open_or_exit($_) } @opt::a;
13901 # Remove the first: It will be the file piped.
13902 shift @input_source_fh;
13903 if(not @input_source_fh and not $opt::pipe) {
13904 @input_source_fh = (*STDIN);
13906 } else {
13907 # -a is used for data - not for command line args
13908 @input_source_fh = map { open_or_exit($_) } "/dev/null";
13910 } else {
13911 @input_source_fh = map { open_or_exit($_) } @opt::a;
13912 if(not @input_source_fh and not $opt::pipe) {
13913 @input_source_fh = (*STDIN);
13917 if($opt::skip_first_line) {
13918 # Skip the first line for the first file handle
13919 my $fh = $input_source_fh[0];
13920 <$fh>;
13923 set_input_source_header(\@command,\@input_source_fh);
13924 if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) {
13925 # Parallel check all hosts are up. Remove hosts that are down
13926 filter_hosts();
13930 if($opt::sqlmaster and $opt::sqlworker) {
13931 # Start a real --sqlworker in the background later
13932 $Global::start_sqlworker = 1;
13933 $opt::sqlworker = undef;
13936 if($opt::nonall or $opt::onall) {
13937 onall(\@input_source_fh,@command);
13938 wait_and_exit(min(undef_as_zero($Global::exitstatus),254));
13941 $Global::JobQueue = JobQueue->new(
13942 \@command, \@input_source_fh, $Global::ContextReplace,
13943 $number_of_args, \@Global::transfer_files, \@Global::ret_files,
13944 \@Global::template_names, \@Global::template_contents
13947 if($opt::sqlmaster) {
13948 # Create SQL table to hold joblog + output
13949 # Figure out how many arguments are in a job
13950 # (It is affected by --colsep, -N, $number_source_fh)
13951 my $record_queue = $Global::JobQueue->{'commandlinequeue'}{'arg_queue'};
13952 my $record = $record_queue->get();
13953 my $no_of_values = $number_of_args * (1+$#{$record});
13954 $record_queue->unget($record);
13955 $Global::sql->create_table($no_of_values);
13956 if($opt::sqlworker) {
13957 # Start a real --sqlworker in the background later
13958 $Global::start_sqlworker = 1;
13959 $opt::sqlworker = undef;
13963 if($opt::pipepart) {
13964 pipepart_setup();
13965 } elsif($opt::pipe and $opt::tee) {
13966 pipe_tee_setup();
13967 } elsif($opt::pipe and $opt::shard or $opt::bin) {
13968 pipe_shard_setup();
13971 if(not $opt::pipepart and $opt::groupby) {
13972 group_by_stdin_filter();
13974 if($opt::eta or $opt::bar or $opt::shuf or $Global::halt_pct) {
13975 # Count the number of jobs or shuffle all jobs
13976 # before starting any.
13977 # Must be done after ungetting any --pipepart jobs.
13978 $Global::JobQueue->total_jobs();
13980 # Compute $Global::max_jobs_running
13981 # Must be done after ungetting any --pipepart jobs.
13982 max_jobs_running();
13984 init_run_jobs();
13985 my $sem;
13986 if($Global::semaphore) {
13987 $sem = acquire_semaphore();
13989 $SIG{TERM} = $Global::original_sig{TERM};
13990 $SIG{HUP} = \&start_no_new_jobs;
13992 if($opt::tee or $opt::shard or $opt::bin) {
13993 # All jobs must be running in parallel for --tee/--shard/--bin
13994 while(start_more_jobs()) {}
13995 $Global::start_no_new_jobs = 1;
13996 if(not $Global::JobQueue->empty()) {
13997 if($opt::tee) {
13998 ::error("--tee requires --jobs to be higher. Try --jobs 0.");
13999 } elsif($opt::bin) {
14000 ::error("--bin requires --jobs to be higher than the number of",
14001 "arguments. Increase --jobs.");
14002 } elsif($opt::shard) {
14003 ::error("--shard requires --jobs to be higher than the number of",
14004 "arguments. Increase --jobs.");
14005 } else {
14006 ::die_bug("--bin/--shard/--tee should not get here");
14008 ::wait_and_exit(255);
14010 } elsif($opt::pipe and not $opt::pipepart) {
14011 # Fill all jobslots
14012 while(start_more_jobs()) {}
14013 spreadstdin();
14014 } else {
14015 # Reap the finished jobs and start more
14016 while(reapers() + start_more_jobs()) {}
14018 ::debug("init", "Start draining\n");
14019 drain_job_queue(@command);
14020 ::debug("init", "Done draining\n");
14021 reapers();
14022 ::debug("init", "Done reaping\n");
14023 if($Global::semaphore) {
14024 $sem->release();
14026 cleanup();
14027 ::debug("init", "Halt\n");
14028 halt();
14031 main();