testsuite: Make some tests more stable.
[parallel.git] / src / parallel
blob9cab6a5e005ad26d890acc965a4405c70d46caec
1 #!/usr/bin/env perl
3 # Copyright (C) 2007-2019 Ole Tange and Free Software Foundation, Inc.
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, see <http://www.gnu.org/licenses/>
17 # or write to the Free Software Foundation, Inc., 51 Franklin St,
18 # Fifth Floor, Boston, MA 02110-1301 USA
20 # open3 used in Job::start
21 use IPC::Open3;
22 # &WNOHANG used in reaper
23 use POSIX qw(:sys_wait_h setsid ceil :errno_h);
24 # gensym used in Job::start
25 use Symbol qw(gensym);
26 # tempfile used in Job::start
27 use File::Temp qw(tempfile tempdir);
28 # mkpath used in openresultsfile
29 use File::Path;
30 # GetOptions used in get_options_from_array
31 use Getopt::Long;
32 # Used to ensure code quality
33 use strict;
34 use File::Basename;
36 save_stdin_stdout_stderr();
37 save_original_signal_handler();
38 parse_options();
39 ::debug("init", "Open file descriptors: ", join(" ",keys %Global::fd), "\n");
40 my $number_of_args;
41 if($Global::max_number_of_args) {
42 $number_of_args = $Global::max_number_of_args;
43 } elsif ($opt::X or $opt::m or $opt::xargs) {
44 $number_of_args = undef;
45 } else {
46 $number_of_args = 1;
49 my @command = @ARGV;
50 my @input_source_fh;
51 if($opt::pipepart) {
52 if($opt::tee) {
53 @input_source_fh = map { open_or_exit($_) } @opt::a;
54 # Remove the first: It will be the file piped.
55 shift @input_source_fh;
56 if(not @input_source_fh and not $opt::pipe) {
57 @input_source_fh = (*STDIN);
59 } else {
60 # -a is used for data - not for command line args
61 @input_source_fh = map { open_or_exit($_) } "/dev/null";
63 } else {
64 @input_source_fh = map { open_or_exit($_) } @opt::a;
65 if(not @input_source_fh and not $opt::pipe) {
66 @input_source_fh = (*STDIN);
69 if($opt::sqlmaster) {
70 # Create SQL table to hold joblog + output
71 $Global::sql->create_table($#input_source_fh+1);
72 if($opt::sqlworker) {
73 # Start a real --sqlworker in the background later
74 $Global::start_sqlworker = 1;
75 $opt::sqlworker = undef;
79 if($opt::skip_first_line) {
80 # Skip the first line for the first file handle
81 my $fh = $input_source_fh[0];
82 <$fh>;
85 set_input_source_header();
87 if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) {
88 # Parallel check all hosts are up. Remove hosts that are down
89 filter_hosts();
92 if($opt::nonall or $opt::onall) {
93 onall(\@input_source_fh,@command);
94 wait_and_exit(min(undef_as_zero($Global::exitstatus),254));
97 $Global::JobQueue = JobQueue->new(
98 \@command,\@input_source_fh,$Global::ContextReplace,
99 $number_of_args,\@Global::transfer_files,\@Global::ret_files);
101 if($opt::pipepart) {
102 pipepart_setup();
103 } elsif($opt::pipe and $opt::tee) {
104 pipe_tee_setup();
107 if($opt::eta or $opt::bar or $opt::shuf or $Global::halt_pct) {
108 # Count the number of jobs or shuffle all jobs
109 # before starting any.
110 # Must be done after ungetting any --pipepart jobs.
111 $Global::JobQueue->total_jobs();
113 # Compute $Global::max_jobs_running
114 # Must be done after ungetting any --pipepart jobs.
115 max_jobs_running();
117 init_run_jobs();
118 my $sem;
119 if($Global::semaphore) {
120 $sem = acquire_semaphore();
122 $SIG{TERM} = \&start_no_new_jobs;
124 if($opt::tee) {
125 # All jobs must be running in parallel for --tee
126 while(start_more_jobs()) {}
127 $Global::start_no_new_jobs = 1;
128 if(not $Global::JobQueue->empty()) {
129 ::error("--tee requres --jobs to be higher. Try --jobs 0.");
130 ::wait_and_exit(255);
132 } elsif($opt::pipe and not $opt::pipepart) {
133 # Fill all jobslots
134 while(start_more_jobs()) {}
135 spreadstdin();
136 } else {
137 # Reap one - start one
138 while(reaper() + start_more_jobs()) {}
140 ::debug("init", "Start draining\n");
141 drain_job_queue();
142 ::debug("init", "Done draining\n");
143 reapers();
144 ::debug("init", "Done reaping\n");
145 if($Global::semaphore) {
146 $sem->release();
148 cleanup();
149 ::debug("init", "Halt\n");
150 halt();
152 sub set_input_source_header {
153 if($opt::header and not $opt::pipe) {
154 # split with colsep or \t
155 # $header force $colsep = \t if undef?
156 my $delimiter = defined $opt::colsep ? $opt::colsep : "\t";
157 # regexp for {=
158 my $left = "\Q$Global::parensleft\E";
159 my $l = $Global::parensleft;
160 # regexp for =}
161 my $right = "\Q$Global::parensright\E";
162 my $r = $Global::parensright;
163 my $id = 1;
164 for my $fh (@input_source_fh) {
165 my $line = <$fh>;
166 chomp($line);
167 ::debug("init", "Delimiter: '$delimiter'");
168 for my $s (split /$delimiter/o, $line) {
169 ::debug("init", "Colname: '$s'");
170 # Replace {colname} with {2}
171 for(@command,@Global::ret_files,@Global::transfer_files,
172 $opt::tagstring, $opt::workdir, $opt::results,
173 $opt::retries) {
174 # Skip if undefined
175 $_ or next;
176 s:\{$s(|/|//|\.|/\.)\}:\{$id$1\}:g;
177 # {=header1 ... =} => {=1 ... =}
178 s:$left $s (.*?) $right:$l$id$1$r:gx;
180 $Global::input_source_header{$id} = $s;
181 $id++;
184 } else {
185 my $id = 1;
186 for my $fh (@input_source_fh) {
187 $Global::input_source_header{$id} = $id;
188 $id++;
193 sub max_jobs_running {
194 # Compute $Global::max_jobs_running as the max number of jobs
195 # running on each sshlogin.
196 # Returns:
197 # $Global::max_jobs_running
198 if(not $Global::max_jobs_running) {
199 for my $sshlogin (values %Global::host) {
200 $sshlogin->max_jobs_running();
203 if(not $Global::max_jobs_running) {
204 ::error("Cannot run any jobs.");
205 wait_and_exit(255);
207 return $Global::max_jobs_running;
210 sub halt {
211 # Compute exit value,
212 # wait for children to complete
213 # and exit
214 if($opt::halt and $Global::halt_when ne "never") {
215 if(not defined $Global::halt_exitstatus) {
216 if($Global::halt_pct) {
217 $Global::halt_exitstatus =
218 ::ceil($Global::total_failed /
219 $Global::total_started * 100);
220 } elsif($Global::halt_count) {
221 $Global::halt_exitstatus =
222 ::min(undef_as_zero($Global::total_failed),101);
225 wait_and_exit($Global::halt_exitstatus);
226 } else {
227 wait_and_exit(min(undef_as_zero($Global::exitstatus),101));
231 sub __PIPE_MODE__ {}
233 sub pipepart_setup {
234 # Compute the blocksize
235 # Generate the commands to extract the blocks
236 # Push the commands on queue
237 # Changes:
238 # @Global::cat_prepends
239 # $Global::JobQueue
240 if($opt::tee) {
241 # Prepend each command with
242 # < file
243 my $cat_string = "< ".Q($opt::a[0]);
244 for(1..$Global::JobQueue->total_jobs()) {
245 push @Global::cat_appends, $cat_string;
246 push @Global::cat_prepends, "";
248 } else {
249 if(not $opt::blocksize) {
250 # --blocksize with 10 jobs per jobslot
251 $opt::blocksize = -10;
253 if($opt::roundrobin) {
254 # --blocksize with 1 job per jobslot
255 $opt::blocksize = -1;
257 if($opt::blocksize < 0) {
258 my $size = 0;
259 # Compute size of -a
260 for(@opt::a) {
261 if(-f $_) {
262 $size += -s $_;
263 } elsif(-b $_) {
264 $size += size_of_block_dev($_);
265 } else {
266 ::error("$_ is neither a file nor a block device");
267 wait_and_exit(255);
270 # Run in total $job_slots*(- $blocksize) jobs
271 # Set --blocksize = size / no of proc / (- $blocksize)
272 $Global::dummy_jobs = 1;
273 $Global::blocksize = 1 +
274 int($size / max_jobs_running() / -$opt::blocksize);
276 @Global::cat_prepends = map { pipe_part_files($_) } @opt::a;
277 # Unget the empty arg as many times as there are parts
278 $Global::JobQueue->{'commandlinequeue'}{'arg_queue'}->unget(
279 map { [Arg->new("\0noarg")] } @Global::cat_prepends
284 sub pipe_tee_setup {
285 # Create temporary fifos
286 # Run 'tee fifo1 fifo2 fifo3 ... fifoN' in the background
287 # This will spread the input to fifos
288 # Generate commands that reads from fifo1..N:
289 # cat fifo | user_command
290 # Changes:
291 # @Global::cat_prepends
292 my @fifos;
293 for(1..$Global::JobQueue->total_jobs()) {
294 push @fifos, tmpfifo();
296 # cat foo | tee fifo1 fifo2 fifo3 fifo4 fifo5 > /dev/null
297 if(not fork()){
298 # Let tee inherit our stdin
299 # and redirect stdout to null
300 open STDOUT, ">","/dev/null";
301 exec "tee",@fifos;
303 # For each fifo
304 # (rm fifo1; grep 1) < fifo1
305 # (rm fifo2; grep 2) < fifo2
306 # (rm fifo3; grep 3) < fifo3
307 # Remove the tmpfifo as soon as it is open
308 @Global::cat_prepends = map { "(rm $_;" } @fifos;
309 @Global::cat_appends = map { ") < $_" } @fifos;
312 sub pipe_part_files {
313 # Given the bigfile
314 # find header and split positions
315 # make commands that 'cat's the partial file
316 # Input:
317 # $file = the file to read
318 # Returns:
319 # @commands that will cat_partial each part
320 my ($file) = @_;
321 my $buf = "";
322 if(not -f $file and not -b $file) {
323 ::error("$file is not a seekable file.");
324 ::wait_and_exit(255);
326 my $header = find_header(\$buf,open_or_exit($file));
327 # find positions
328 my @pos = find_split_positions($file,$Global::blocksize,length $header);
329 # Make @cat_prepends
330 my @cat_prepends = ();
331 for(my $i=0; $i<$#pos; $i++) {
332 push(@cat_prepends,
333 cat_partial($file, 0, length($header), $pos[$i], $pos[$i+1]));
335 return @cat_prepends;
338 sub find_header {
339 # Compute the header based on $opt::header
340 # Input:
341 # $buf_ref = reference to read-in buffer
342 # $fh = filehandle to read from
343 # Uses:
344 # $opt::header
345 # $Global::blocksize
346 # Returns:
347 # $header string
348 my ($buf_ref, $fh) = @_;
349 my $header = "";
350 if($opt::header) {
351 if($opt::header eq ":") { $opt::header = "(.*\n)"; }
352 # Number = number of lines
353 $opt::header =~ s/^(\d+)$/"(.*\n)"x$1/e;
354 while(read($fh,substr($$buf_ref,length $$buf_ref,0),
355 $Global::blocksize)) {
356 if($$buf_ref=~s/^($opt::header)//) {
357 $header = $1;
358 last;
362 return $header;
365 sub find_split_positions {
366 # Find positions in bigfile where recend is followed by recstart
367 # Input:
368 # $file = the file to read
369 # $block = (minimal) --block-size of each chunk
370 # $headerlen = length of header to be skipped
371 # Uses:
372 # $opt::recstart
373 # $opt::recend
374 # Returns:
375 # @positions of block start/end
376 my($file, $block, $headerlen) = @_;
377 my $size = -s $file;
378 if(-b $file) {
379 # $file is a blockdevice
380 $size = size_of_block_dev($file);
382 $block = int $block;
383 # The optimal dd blocksize for mint, redhat, solaris, openbsd = 2^17..2^20
384 # The optimal dd blocksize for freebsd = 2^15..2^17
385 my $dd_block_size = 131072; # 2^17
386 my @pos;
387 my ($recstart,$recend) = recstartrecend();
388 my $recendrecstart = $recend.$recstart;
389 my $fh = ::open_or_exit($file);
390 push(@pos,$headerlen);
391 for(my $pos = $block+$headerlen; $pos < $size; $pos += $block) {
392 my $buf;
393 if($recendrecstart eq "") {
394 # records ends anywhere
395 push(@pos,$pos);
396 } else {
397 # Seek the the block start
398 seek($fh, $pos, 0) || die;
399 while(read($fh,substr($buf,length $buf,0),$dd_block_size)) {
400 if($opt::regexp) {
401 # If match /$recend$recstart/ => Record position
402 if($buf =~ /^(.*$recend)$recstart/os) {
403 # Start looking for next record _after_ this match
404 $pos += length($1);
405 push(@pos,$pos);
406 last;
408 } else {
409 # If match $recend$recstart => Record position
410 # TODO optimize to only look at the appended
411 # $dd_block_size + len $recendrecstart
412 # TODO increase $dd_block_size to optimize for longer records
413 my $i = index64(\$buf,$recendrecstart);
414 if($i != -1) {
415 # Start looking for next record _after_ this match
416 $pos += $i + length($recend);
417 push(@pos,$pos);
418 last;
424 if($pos[$#pos] != $size) {
425 # Last splitpoint was not at end of the file: add it
426 push(@pos,$size);
428 close $fh;
429 return @pos;
432 sub cat_partial {
433 # Efficient command to copy from byte X to byte Y
434 # Input:
435 # $file = the file to read
436 # ($start, $end, [$start2, $end2, ...]) = start byte, end byte
437 # Returns:
438 # Efficient command to copy $start..$end, $start2..$end2, ... to stdout
439 my($file, @start_end) = @_;
440 my($start, $i);
441 # Convert (start,end) to (start,len)
442 my @start_len = map {
443 if(++$i % 2) { $start = $_; } else { $_-$start }
444 } @start_end;
445 # This can read 7 GB/s using a single core
446 my $script = spacefree
449 while(@ARGV) {
450 sysseek(STDIN,shift,0) || die;
451 $left = shift;
452 while($read =
453 sysread(STDIN,$buf, $left > 131072 ? 131072 : $left)){
454 $left -= $read;
455 syswrite(STDOUT,$buf);
459 return "<". Q($file) .
460 " perl -e '$script' @start_len |";
463 sub spreadstdin {
464 # read a record
465 # Spawn a job and print the record to it.
466 # Uses:
467 # $Global::blocksize
468 # STDIN
469 # $opt::r
470 # $Global::max_lines
471 # $Global::max_number_of_args
472 # $opt::regexp
473 # $Global::start_no_new_jobs
474 # $opt::roundrobin
475 # %Global::running
476 # Returns: N/A
478 my $buf = "";
479 my ($recstart,$recend) = recstartrecend();
480 my $recendrecstart = $recend.$recstart;
481 my $chunk_number = 1;
482 my $one_time_through;
483 my $two_gb = 2**31-1;
484 my $blocksize = $Global::blocksize;
485 my $in = *STDIN;
486 my $header = find_header(\$buf,$in);
487 while(1) {
488 my $anything_written = 0;
489 my $buflen = length $buf;
490 my $readsize = ($buflen < $blocksize) ? $blocksize-$buflen : $blocksize;
491 # If $buf < $blocksize, append so it is $blocksize long after reading.
492 # Otherwise append a full $blocksize
493 if(not read($in,substr($buf,$buflen,0),$readsize)) {
494 # End-of-file
495 $chunk_number != 1 and last;
496 # Force the while-loop once if everything was read by header reading
497 $one_time_through++ and last;
499 if($opt::r) {
500 # Remove empty lines
501 $buf =~ s/^\s*\n//gm;
502 if(length $buf == 0) {
503 next;
506 if($Global::max_lines and not $Global::max_number_of_args) {
507 # Read n-line records
508 my $n_lines = $buf =~ tr/\n/\n/;
509 my $last_newline_pos = rindex64(\$buf,"\n");
510 # Go backwards until there are full n-line records
511 while($n_lines % $Global::max_lines) {
512 $n_lines--;
513 $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1);
515 # Chop at $last_newline_pos as that is where n-line record ends
516 $anything_written +=
517 write_record_to_pipe($chunk_number++,\$header,\$buf,
518 $recstart,$recend,$last_newline_pos+1);
519 shorten(\$buf,$last_newline_pos+1);
520 } elsif($opt::regexp) {
521 if($Global::max_number_of_args) {
522 # -N => (start..*?end){n}
523 # -L -N => (start..*?end){n*l}
524 my $read_n_lines = -1+
525 $Global::max_number_of_args * ($Global::max_lines || 1);
526 # (?!negative lookahead) is needed to avoid backtracking
527 # See: https://unix.stackexchange.com/questions/439356/
528 while($buf =~
530 # Either recstart or at least one char from start
531 ^(?: $recstart | .)
532 # followed something
533 (?:(?!$recend$recstart).)*?
534 # and then recend
535 $recend
536 # Then n-1 times recstart.*recend
537 (?:$recstart(?:(?!$recend$recstart).)*?$recend){$read_n_lines}
539 # Followed by recstart
540 (?=$recstart)/osx) {
541 $anything_written +=
542 write_record_to_pipe($chunk_number++,\$header,\$buf,
543 $recstart,$recend,length $1);
544 shorten(\$buf,length $1);
546 } else {
547 eof($in) and last;
548 # Find the last recend-recstart in $buf
549 if($buf =~ /^(.*$recend)$recstart.*?$/os) {
550 $anything_written +=
551 write_record_to_pipe($chunk_number++,\$header,\$buf,
552 $recstart,$recend,length $1);
553 shorten(\$buf,length $1);
556 } elsif($opt::csv) {
557 # Read a full CSV record
558 # even number of " + end of line
559 my $last_newline_pos = length $buf;
560 do {
561 # find last EOL
562 $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1);
563 # While uneven "
564 } while((substr($buf,0,$last_newline_pos) =~ y/"/"/)%2
565 and $last_newline_pos >= 0);
566 # Chop at $last_newline_pos as that is where CSV record ends
567 $anything_written +=
568 write_record_to_pipe($chunk_number++,\$header,\$buf,
569 $recstart,$recend,$last_newline_pos+1);
570 shorten(\$buf,$last_newline_pos+1);
571 } else {
572 if($Global::max_number_of_args) {
573 # -N => (start..*?end){n}
574 my $i = 0;
575 my $read_n_lines =
576 $Global::max_number_of_args * ($Global::max_lines || 1);
577 while(($i = nindex(\$buf,$recendrecstart,$read_n_lines)) != -1
579 length $buf) {
580 $i += length $recend; # find the actual splitting location
581 $anything_written +=
582 write_record_to_pipe($chunk_number++,\$header,\$buf,
583 $recstart,$recend,$i);
584 shorten(\$buf,$i);
586 } else {
587 eof($in) and last;
588 # Find the last recend+recstart in $buf
589 my $i = rindex64(\$buf,$recendrecstart);
590 if($i != -1) {
591 $i += length $recend; # find the actual splitting location
592 $anything_written +=
593 write_record_to_pipe($chunk_number++,\$header,\$buf,
594 $recstart,$recend,$i);
595 shorten(\$buf,$i);
599 if(not $anything_written
600 and not eof($in)
601 and not $Global::no_autoexpand_block) {
602 # Nothing was written - maybe the block size < record size?
603 # Increase blocksize exponentially up to 2GB-1 (2GB causes problems)
604 if($blocksize < $two_gb) {
605 my $old_blocksize = $blocksize;
606 $blocksize = ::min(ceil($blocksize * 1.3 + 1), $two_gb);
607 ::warning("A record was longer than $old_blocksize. " .
608 "Increasing to --blocksize $blocksize.");
612 ::debug("init", "Done reading input\n");
614 # If there is anything left in the buffer write it
615 write_record_to_pipe($chunk_number++, \$header, \$buf, $recstart,
616 $recend, length $buf);
618 if($opt::retries) {
619 $Global::no_more_input = 1;
620 # We need to start no more jobs: At most we need to retry some
621 # of the already running.
622 my @running = values %Global::running;
623 # Stop any virgins.
624 for my $job (@running) {
625 if(defined $job and $job->virgin()) {
626 close $job->fh(0,"w");
629 # Wait for running jobs to be done
630 my $sleep =1;
631 while($Global::total_running > 0) {
632 $sleep = ::reap_usleep($sleep);
633 start_more_jobs();
636 $Global::start_no_new_jobs ||= 1;
637 if($opt::roundrobin) {
638 # Flush blocks to roundrobin procs
639 my $sleep = 1;
640 while(%Global::running) {
641 my $something_written = 0;
642 for my $job (values %Global::running) {
643 if($job->block_length()) {
644 $something_written += $job->non_blocking_write();
645 } else {
646 close $job->fh(0,"w");
649 if($something_written) {
650 $sleep = $sleep/2+0.001;
652 $sleep = ::reap_usleep($sleep);
657 sub recstartrecend {
658 # Uses:
659 # $opt::recstart
660 # $opt::recend
661 # Returns:
662 # $recstart,$recend with default values and regexp conversion
663 my($recstart,$recend);
664 if(defined($opt::recstart) and defined($opt::recend)) {
665 # If both --recstart and --recend is given then both must match
666 $recstart = $opt::recstart;
667 $recend = $opt::recend;
668 } elsif(defined($opt::recstart)) {
669 # If --recstart is given it must match start of record
670 $recstart = $opt::recstart;
671 $recend = "";
672 } elsif(defined($opt::recend)) {
673 # If --recend is given then it must match end of record
674 $recstart = "";
675 $recend = $opt::recend;
676 if($opt::regexp and $recend eq '') {
677 # --regexp --recend ''
678 $recend = '.';
682 if($opt::regexp) {
683 # If $recstart/$recend contains '|'
684 # this should only apply to the regexp
685 $recstart = "(?:".$recstart.")";
686 $recend = "(?:".$recend.")";
687 } else {
688 # $recstart/$recend = printf strings (\n)
689 $recstart =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee;
690 $recend =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee;
692 return ($recstart,$recend);
695 sub nindex {
696 # See if string is in buffer N times
697 # Returns:
698 # the position where the Nth copy is found
699 my ($buf_ref, $str, $n) = @_;
700 my $i = 0;
701 for(1..$n) {
702 $i = index64($buf_ref,$str,$i+1);
703 if($i == -1) { last }
705 return $i;
709 my @robin_queue;
710 my $sleep = 1;
712 sub round_robin_write {
713 # Input:
714 # $header_ref = ref to $header string
715 # $block_ref = ref to $block to be written
716 # $recstart = record start string
717 # $recend = record end string
718 # $endpos = end position of $block
719 # Uses:
720 # %Global::running
721 # Returns:
722 # $something_written = amount of bytes written
723 my ($header_ref,$buffer_ref,$recstart,$recend,$endpos) = @_;
724 my $written = 0;
725 my $block_passed = 0;
726 while(not $block_passed) {
727 # Continue flushing existing buffers
728 # until one is empty and a new block is passed
729 if(@robin_queue) {
730 # Rotate queue once so new blocks get a fair chance
731 # to be given to another block
732 push @robin_queue, shift @robin_queue;
733 } else {
734 # Make a queue to spread the blocks evenly
735 push @robin_queue, (sort { $a->seq() <=> $b->seq() }
736 values %Global::running);
738 if($opt::keeporder) {
739 for my $job (@robin_queue) {
740 if($job->block_length() > 0) {
741 $written += $job->non_blocking_write();
742 } else {
743 $job->set_block($header_ref, $buffer_ref,
744 $endpos, $recstart, $recend);
745 $block_passed = 1;
746 $job->set_virgin(0);
747 $written += $job->non_blocking_write();
748 last;
751 } else {
752 do {
753 $written = 0;
754 for my $job (@robin_queue) {
755 if($job->block_length() > 0) {
756 $written += $job->non_blocking_write();
757 } else {
758 $job->set_block($header_ref, $buffer_ref,
759 $endpos, $recstart, $recend);
760 $block_passed = 1;
761 $job->set_virgin(0);
762 $written += $job->non_blocking_write();
763 last;
766 if($written) {
767 $sleep = $sleep/1.5+0.001;
769 } while($written and not $block_passed);
771 $sleep = ::reap_usleep($sleep);
773 return $written;
777 sub index64 {
778 # Do index on strings > 2GB.
779 # index in Perl < v5.22 does not work for > 2GB
780 # Input:
781 # as index except STR which must be passed as a reference
782 # Output:
783 # as index
784 my $ref = shift;
785 my $match = shift;
786 my $pos = shift || 0;
787 my $block_size = 2**31-1;
788 my $strlen = length($$ref);
789 # No point in doing extra work if we don't need to.
790 if($strlen < $block_size or $] > 5.022) {
791 return index($$ref, $match, $pos);
794 my $matchlen = length($match);
795 my $ret;
796 my $offset = $pos;
797 while($offset < $strlen) {
798 $ret = index(
799 substr($$ref, $offset, $block_size),
800 $match, $pos-$offset);
801 if($ret != -1) {
802 return $ret + $offset;
804 $offset += ($block_size - $matchlen - 1);
806 return -1;
809 sub rindex64 {
810 # Do rindex on strings > 2GB.
811 # rindex in Perl < v5.22 does not work for > 2GB
812 # Input:
813 # as rindex except STR which must be passed as a reference
814 # Output:
815 # as rindex
816 my $ref = shift;
817 my $match = shift;
818 my $pos = shift;
819 my $block_size = 2**31-1;
820 my $strlen = length($$ref);
821 # Default: search from end
822 $pos = defined $pos ? $pos : $strlen;
823 # No point in doing extra work if we don't need to.
824 if($strlen < $block_size) {
825 return rindex($$ref, $match, $pos);
828 my $matchlen = length($match);
829 my $ret;
830 my $offset = $pos - $block_size + $matchlen;
831 if($offset < 0) {
832 # The offset is less than a $block_size
833 # Set the $offset to 0 and
834 # Adjust block_size accordingly
835 $block_size = $block_size + $offset;
836 $offset = 0;
838 while($offset >= 0) {
839 $ret = rindex(
840 substr($$ref, $offset, $block_size),
841 $match);
842 if($ret != -1) {
843 return $ret + $offset;
845 $offset -= ($block_size - $matchlen - 1);
847 return -1;
850 sub shorten {
851 # Do: substr($buf,0,$i) = "";
852 # Some Perl versions do not support $i > 2GB, so do this in 2GB chunks
853 # Input:
854 # $buf_ref = \$buf
855 # $i = position to shorten to
856 # Returns: N/A
857 my ($buf_ref, $i) = @_;
858 my $two_gb = 2**31-1;
859 while($i > $two_gb) {
860 substr($$buf_ref,0,$two_gb) = "";
861 $i -= $two_gb;
863 substr($$buf_ref,0,$i) = "";
866 sub write_record_to_pipe {
867 # Fork then
868 # Write record from pos 0 .. $endpos to pipe
869 # Input:
870 # $chunk_number = sequence number - to see if already run
871 # $header_ref = reference to header string to prepend
872 # $buffer_ref = reference to record to write
873 # $recstart = start string of record
874 # $recend = end string of record
875 # $endpos = position in $buffer_ref where record ends
876 # Uses:
877 # $Global::job_already_run
878 # $opt::roundrobin
879 # @Global::virgin_jobs
880 # Returns:
881 # Number of chunks written (0 or 1)
882 my ($chunk_number, $header_ref, $buffer_ref,
883 $recstart, $recend, $endpos) = @_;
884 if($endpos == 0) { return 0; }
885 if(vec($Global::job_already_run,$chunk_number,1)) { return 1; }
886 if($opt::roundrobin) {
887 # Write the block to one of the already running jobs
888 return round_robin_write($header_ref, $buffer_ref,
889 $recstart, $recend, $endpos);
891 # If no virgin found, backoff
892 my $sleep = 0.0001; # 0.01 ms - better performance on highend
893 while(not @Global::virgin_jobs) {
894 ::debug("pipe", "No virgin jobs");
895 $sleep = ::reap_usleep($sleep);
896 # Jobs may not be started because of loadavg
897 # or too little time between each ssh login
898 # or retrying failed jobs.
899 start_more_jobs();
901 my $job = shift @Global::virgin_jobs;
902 # Job is no longer virgin
903 $job->set_virgin(0);
905 if($opt::retries) {
906 # Copy $buffer[0..$endpos] to $job->{'block'}
907 # Remove rec_sep
908 # Run $job->add_transfersize
909 $job->set_block($header_ref, $buffer_ref, $endpos,
910 $recstart, $recend);
911 if(fork()) {
912 # Skip
913 } else {
914 $job->write($job->block_ref());
915 close $job->fh(0,"w");
916 exit(0);
918 } else {
919 # We ignore the removed rec_sep which is technically wrong.
920 $job->add_transfersize($endpos + length $$header_ref);
921 if(fork()) {
922 # Skip
923 } else {
924 # Chop of at $endpos as we do not know how many rec_sep will
925 # be removed.
926 substr($$buffer_ref,$endpos,length $$buffer_ref) = "";
927 # Remove rec_sep
928 if($opt::remove_rec_sep) {
929 Job::remove_rec_sep($buffer_ref, $recstart, $recend);
931 $job->write($header_ref);
932 $job->write($buffer_ref);
933 close $job->fh(0,"w");
934 exit(0);
937 close $job->fh(0,"w");
938 return 1;
942 sub __SEM_MODE__ {}
945 sub acquire_semaphore {
946 # Acquires semaphore. If needed: spawns to the background
947 # Uses:
948 # @Global::host
949 # Returns:
950 # The semaphore to be released when jobs is complete
951 $Global::host{':'} = SSHLogin->new(":");
952 my $sem = Semaphore->new($Semaphore::name,
953 $Global::host{':'}->max_jobs_running());
954 $sem->acquire();
955 if($Semaphore::fg) {
956 # skip
957 } else {
958 if(fork()) {
959 exit(0);
960 } else {
961 # If run in the background, the PID will change
962 $sem->pid_change();
965 return $sem;
969 sub __PARSE_OPTIONS__ {}
972 sub options_hash {
973 # Returns:
974 # %hash = the GetOptions config
975 return
976 ("debug|D=s" => \$opt::D,
977 "xargs" => \$opt::xargs,
978 "m" => \$opt::m,
979 "X" => \$opt::X,
980 "v" => \@opt::v,
981 "sql=s" => \$opt::retired,
982 "sqlmaster=s" => \$opt::sqlmaster,
983 "sqlworker=s" => \$opt::sqlworker,
984 "sqlandworker=s" => \$opt::sqlandworker,
985 "joblog|jl=s" => \$opt::joblog,
986 "results|result|res=s" => \$opt::results,
987 "resume" => \$opt::resume,
988 "resume-failed|resumefailed" => \$opt::resume_failed,
989 "retry-failed|retryfailed" => \$opt::retry_failed,
990 "silent" => \$opt::silent,
991 "keep-order|keeporder|k" => \$opt::keeporder,
992 "no-keep-order|nokeeporder|nok|no-k" => \$opt::nokeeporder,
993 "group" => \$opt::group,
994 "g" => \$opt::retired,
995 "ungroup|u" => \$opt::ungroup,
996 "linebuffer|linebuffered|line-buffer|line-buffered|lb"
997 => \$opt::linebuffer,
998 "tmux" => \$opt::tmux,
999 "tmuxpane" => \$opt::tmuxpane,
1000 "null|0" => \$opt::null,
1001 "quote|q" => \$opt::q,
1002 # Replacement strings
1003 "parens=s" => \$opt::parens,
1004 "rpl=s" => \@opt::rpl,
1005 "plus" => \$opt::plus,
1006 "I=s" => \$opt::I,
1007 "extensionreplace|er=s" => \$opt::U,
1008 "U=s" => \$opt::retired,
1009 "basenamereplace|bnr=s" => \$opt::basenamereplace,
1010 "dirnamereplace|dnr=s" => \$opt::dirnamereplace,
1011 "basenameextensionreplace|bner=s" => \$opt::basenameextensionreplace,
1012 "seqreplace=s" => \$opt::seqreplace,
1013 "slotreplace=s" => \$opt::slotreplace,
1014 "jobs|j=s" => \$opt::jobs,
1015 "delay=s" => \$opt::delay,
1016 "sshdelay=f" => \$opt::sshdelay,
1017 "load=s" => \$opt::load,
1018 "noswap" => \$opt::noswap,
1019 "max-line-length-allowed" => \$opt::max_line_length_allowed,
1020 "number-of-cpus" => \$opt::number_of_cpus,
1021 "number-of-sockets" => \$opt::number_of_sockets,
1022 "number-of-cores" => \$opt::number_of_cores,
1023 "number-of-threads" => \$opt::number_of_threads,
1024 "use-sockets-instead-of-threads"
1025 => \$opt::use_sockets_instead_of_threads,
1026 "use-cores-instead-of-threads"
1027 => \$opt::use_cores_instead_of_threads,
1028 "use-cpus-instead-of-cores" => \$opt::use_cpus_instead_of_cores,
1029 "shellquote|shell_quote|shell-quote" => \$opt::shellquote,
1030 "nice=i" => \$opt::nice,
1031 "tag" => \$opt::tag,
1032 "tagstring|tag-string=s" => \$opt::tagstring,
1033 "onall" => \$opt::onall,
1034 "nonall" => \$opt::nonall,
1035 "filter-hosts|filterhosts|filter-host" => \$opt::filter_hosts,
1036 "sshlogin|S=s" => \@opt::sshlogin,
1037 "sshloginfile|slf=s" => \@opt::sshloginfile,
1038 "controlmaster|M" => \$opt::controlmaster,
1039 "ssh=s" => \$opt::ssh,
1040 "transfer-file|transferfile|transfer-files|transferfiles|tf=s"
1041 => \@opt::transfer_files,
1042 "return=s" => \@opt::return,
1043 "trc=s" => \@opt::trc,
1044 "transfer" => \$opt::transfer,
1045 "cleanup" => \$opt::cleanup,
1046 "basefile|bf=s" => \@opt::basefile,
1047 "B=s" => \$opt::retired,
1048 "ctrlc|ctrl-c" => \$opt::retired,
1049 "noctrlc|no-ctrlc|no-ctrl-c" => \$opt::retired,
1050 "workdir|work-dir|wd=s" => \$opt::workdir,
1051 "W=s" => \$opt::retired,
1052 "rsync-opts|rsyncopts=s" => \$opt::rsync_opts,
1053 "tmpdir|tempdir=s" => \$opt::tmpdir,
1054 "use-compress-program|compress-program=s" => \$opt::compress_program,
1055 "use-decompress-program|decompress-program=s"
1056 => \$opt::decompress_program,
1057 "compress" => \$opt::compress,
1058 "tty" => \$opt::tty,
1059 "T" => \$opt::retired,
1060 "H=i" => \$opt::retired,
1061 "dry-run|dryrun|dr" => \$opt::dryrun,
1062 "progress" => \$opt::progress,
1063 "eta" => \$opt::eta,
1064 "bar" => \$opt::bar,
1065 "shuf" => \$opt::shuf,
1066 "arg-sep|argsep=s" => \$opt::arg_sep,
1067 "arg-file-sep|argfilesep=s" => \$opt::arg_file_sep,
1068 "trim=s" => \$opt::trim,
1069 "env=s" => \@opt::env,
1070 "recordenv|record-env" => \$opt::record_env,
1071 "session" => \$opt::session,
1072 "plain" => \$opt::plain,
1073 "profile|J=s" => \@opt::profile,
1074 "pipe|spreadstdin" => \$opt::pipe,
1075 "robin|round-robin|roundrobin" => \$opt::roundrobin,
1076 "recstart=s" => \$opt::recstart,
1077 "recend=s" => \$opt::recend,
1078 "regexp|regex" => \$opt::regexp,
1079 "remove-rec-sep|removerecsep|rrs" => \$opt::remove_rec_sep,
1080 "files|output-as-files|outputasfiles" => \$opt::files,
1081 "block|block-size|blocksize=s" => \$opt::blocksize,
1082 "tollef" => \$opt::tollef,
1083 "gnu" => \$opt::gnu,
1084 "link|xapply" => \$opt::link,
1085 "linkinputsource|xapplyinputsource=i" => \@opt::linkinputsource,
1086 # Before changing this line, please read
1087 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
1088 "bibtex|citation" => \$opt::citation,
1089 "wc|willcite|will-cite|nn|nonotice|no-notice" => \$opt::willcite,
1090 # Termination and retries
1091 "halt-on-error|halt=s" => \$opt::halt,
1092 "limit=s" => \$opt::limit,
1093 "memfree=s" => \$opt::memfree,
1094 "retries=s" => \$opt::retries,
1095 "timeout=s" => \$opt::timeout,
1096 "termseq|term-seq=s" => \$opt::termseq,
1097 # xargs-compatibility - implemented, man, testsuite
1098 "max-procs|P=s" => \$opt::jobs,
1099 "delimiter|d=s" => \$opt::d,
1100 "max-chars|s=i" => \$opt::max_chars,
1101 "arg-file|a=s" => \@opt::a,
1102 "no-run-if-empty|r" => \$opt::r,
1103 "replace|i:s" => \$opt::i,
1104 "E=s" => \$opt::eof,
1105 "eof|e:s" => \$opt::eof,
1106 "max-args|maxargs|n=i" => \$opt::max_args,
1107 "max-replace-args|N=i" => \$opt::max_replace_args,
1108 "colsep|col-sep|C=s" => \$opt::colsep,
1109 "csv"=> \$opt::csv,
1110 "help|h" => \$opt::help,
1111 "L=f" => \$opt::L,
1112 "max-lines|l:f" => \$opt::max_lines,
1113 "interactive|p" => \$opt::interactive,
1114 "verbose|t" => \$opt::verbose,
1115 "version|V" => \$opt::version,
1116 "minversion|min-version=i" => \$opt::minversion,
1117 "show-limits|showlimits" => \$opt::show_limits,
1118 "exit|x" => \$opt::x,
1119 # Semaphore
1120 "semaphore" => \$opt::semaphore,
1121 "semaphoretimeout|st=i" => \$opt::semaphoretimeout,
1122 "semaphorename|id=s" => \$opt::semaphorename,
1123 "fg" => \$opt::fg,
1124 "bg" => \$opt::bg,
1125 "wait" => \$opt::wait,
1126 # Shebang #!/usr/bin/parallel --shebang
1127 "shebang|hashbang" => \$opt::shebang,
1128 "internal-pipe-means-argfiles"
1129 => \$opt::internal_pipe_means_argfiles,
1130 "Y" => \$opt::retired,
1131 "skip-first-line" => \$opt::skip_first_line,
1132 "bug" => \$opt::bug,
1133 "header=s" => \$opt::header,
1134 "cat" => \$opt::cat,
1135 "fifo" => \$opt::fifo,
1136 "pipepart|pipe-part" => \$opt::pipepart,
1137 "tee" => \$opt::tee,
1138 "hgrp|hostgrp|hostgroup|hostgroups" => \$opt::hostgroups,
1139 "embed" => \$opt::embed,
1143 sub get_options_from_array {
1144 # Run GetOptions on @array
1145 # Input:
1146 # $array_ref = ref to @ARGV to parse
1147 # @keep_only = Keep only these options
1148 # Uses:
1149 # @ARGV
1150 # Returns:
1151 # true if parsing worked
1152 # false if parsing failed
1153 # @$array_ref is changed
1154 my ($array_ref, @keep_only) = @_;
1155 if(not @$array_ref) {
1156 # Empty array: No need to look more at that
1157 return 1;
1159 # A bit of shuffling of @ARGV needed as GetOptionsFromArray is not
1160 # supported everywhere
1161 my @save_argv;
1162 my $this_is_ARGV = (\@::ARGV == $array_ref);
1163 if(not $this_is_ARGV) {
1164 @save_argv = @::ARGV;
1165 @::ARGV = @{$array_ref};
1167 # If @keep_only set: Ignore all values except @keep_only
1168 my %options = options_hash();
1169 if(@keep_only) {
1170 my (%keep,@dummy);
1171 @keep{@keep_only} = @keep_only;
1172 for my $k (grep { not $keep{$_} } keys %options) {
1173 # Store the value of the option in @dummy
1174 $options{$k} = \@dummy;
1177 my $retval = GetOptions(%options);
1178 if(not $this_is_ARGV) {
1179 @{$array_ref} = @::ARGV;
1180 @::ARGV = @save_argv;
1182 return $retval;
1185 sub parse_options {
1186 # Returns: N/A
1187 init_globals();
1188 my @argv_before = @ARGV;
1189 @ARGV = read_options();
1191 # Before changing this line, please read
1192 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
1193 if(defined $opt::citation) {
1194 citation(\@argv_before,\@ARGV);
1195 wait_and_exit(0);
1197 # no-* overrides *
1198 if($opt::nokeeporder) { $opt::keeporder = undef; }
1200 if(@opt::v) { $Global::verbose = $#opt::v+1; } # Convert -v -v to v=2
1201 if($opt::bug) { ::die_bug("test-bug"); }
1202 $Global::debug = $opt::D;
1203 $Global::shell = $ENV{'PARALLEL_SHELL'} || parent_shell($$)
1204 || $ENV{'SHELL'} || "/bin/sh";
1205 if(not -x $Global::shell and not which($Global::shell)) {
1206 ::error("Shell '$Global::shell' not found.");
1207 wait_and_exit(255);
1209 ::debug("init","Global::shell $Global::shell\n");
1210 $Global::cshell = $Global::shell =~ m:(/[-a-z]*)?csh:;
1211 if(defined $opt::X) { $Global::ContextReplace = 1; }
1212 if(defined $opt::silent) { $Global::verbose = 0; }
1213 if(defined $opt::null) { $/ = "\0"; }
1214 if(defined $opt::d) { $/ = unquote_printf($opt::d) }
1215 if(defined $opt::tagstring) {
1216 $opt::tagstring = unquote_printf($opt::tagstring);
1218 if(defined $opt::interactive) { $Global::interactive = $opt::interactive; }
1219 if(defined $opt::q) { $Global::quoting = 1; }
1220 if(defined $opt::r) { $Global::ignore_empty = 1; }
1221 if(defined $opt::verbose) { $Global::stderr_verbose = 1; }
1222 parse_replacement_string_options();
1223 if(defined $opt::eof) { $Global::end_of_file_string = $opt::eof; }
1224 if(defined $opt::max_args) {
1225 $Global::max_number_of_args = $opt::max_args;
1227 if(defined $opt::timeout) {
1228 $Global::timeoutq = TimeoutQueue->new($opt::timeout);
1230 if(defined $opt::tmpdir) { $ENV{'TMPDIR'} = $opt::tmpdir; }
1231 $ENV{'PARALLEL_RSYNC_OPTS'} = $opt::rsync_opts ||
1232 $ENV{'PARALLEL_RSYNC_OPTS'} || '-rlDzR';
1233 $opt::nice ||= 0;
1234 if(defined $opt::help) { usage(); exit(0); }
1235 if(defined $opt::embed) { embed(); exit(0); }
1236 if(defined $opt::sqlandworker) {
1237 $opt::sqlmaster = $opt::sqlworker = $opt::sqlandworker;
1239 if(defined $opt::tmuxpane) { $opt::tmux = $opt::tmuxpane; }
1240 if(defined $opt::colsep) { $Global::trim = 'lr'; }
1241 if(defined $opt::csv) {
1242 $Global::use{"Text::CSV"} ||= eval "use Text::CSV; 1;";
1243 $opt::colsep = defined $opt::colsep ? $opt::colsep : ",";
1244 my $csv_setting = { binary => 1, sep_char => $opt::colsep };
1245 my $sep = $csv_setting->{sep_char};
1246 $Global::csv = Text::CSV->new($csv_setting)
1247 or die "Cannot use CSV: ".Text::CSV->error_diag ();
1249 if(defined $opt::header) {
1250 $opt::colsep = defined $opt::colsep ? $opt::colsep : "\t";
1252 if(defined $opt::trim) { $Global::trim = $opt::trim; }
1253 if(defined $opt::arg_sep) { $Global::arg_sep = $opt::arg_sep; }
1254 if(defined $opt::arg_file_sep) {
1255 $Global::arg_file_sep = $opt::arg_file_sep;
1257 if(defined $opt::number_of_sockets) {
1258 print SSHLogin::no_of_sockets(),"\n"; wait_and_exit(0);
1260 if(defined $opt::number_of_cpus) {
1261 print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0);
1263 if(defined $opt::number_of_cores) {
1264 print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0);
1266 if(defined $opt::number_of_threads) {
1267 print SSHLogin::no_of_threads(),"\n"; wait_and_exit(0);
1269 if(defined $opt::max_line_length_allowed) {
1270 print Limits::Command::real_max_length(),"\n"; wait_and_exit(0);
1272 if(defined $opt::version) { version(); wait_and_exit(0); }
1273 if(defined $opt::record_env) { record_env(); wait_and_exit(0); }
1274 if(defined $opt::show_limits) { show_limits(); }
1275 if(@opt::sshlogin) { @Global::sshlogin = @opt::sshlogin; }
1276 if(@opt::sshloginfile) { read_sshloginfiles(@opt::sshloginfile); }
1277 if(@opt::return) { push @Global::ret_files, @opt::return; }
1278 if($opt::transfer) {
1279 push @Global::transfer_files, $opt::i || $opt::I || "{}";
1281 push @Global::transfer_files, @opt::transfer_files;
1282 if(not defined $opt::recstart and
1283 not defined $opt::recend) { $opt::recend = "\n"; }
1284 $Global::blocksize = multiply_binary_prefix($opt::blocksize || "1M");
1285 if($Global::blocksize > 2**31-1 and not $opt::pipepart) {
1286 warning("--blocksize >= 2G causes problems. Using 2G-1.");
1287 $Global::blocksize = 2**31-1;
1289 if($^O eq "cygwin" and
1290 ($opt::pipe or $opt::pipepart or $opt::roundrobin)
1291 and $Global::blocksize > 65535) {
1292 warning("--blocksize >= 64K causes problems on Cygwin.");
1294 $opt::memfree = multiply_binary_prefix($opt::memfree);
1295 check_invalid_option_combinations();
1296 if((defined $opt::fifo or defined $opt::cat)
1297 and not $opt::pipepart) {
1298 $opt::pipe = 1;
1300 if(defined $opt::minversion) {
1301 print $Global::version,"\n";
1302 if($Global::version < $opt::minversion) {
1303 wait_and_exit(255);
1304 } else {
1305 wait_and_exit(0);
1308 if(not defined $opt::delay) {
1309 # Set --delay to --sshdelay if not set
1310 $opt::delay = $opt::sshdelay;
1312 $opt::delay = multiply_time_units($opt::delay);
1313 if($opt::compress_program) {
1314 $opt::compress = 1;
1315 $opt::decompress_program ||= $opt::compress_program." -dc";
1318 if(defined $opt::results) {
1319 # Is the output a dir or CSV-file?
1320 if($opt::results =~ /\.csv$/i) {
1321 # CSV with , as separator
1322 $Global::csvsep = ",";
1323 $Global::membuffer ||= 1;
1324 } elsif($opt::results =~ /\.tsv$/i) {
1325 # CSV with TAB as separator
1326 $Global::csvsep = "\t";
1327 $Global::membuffer ||= 1;
1330 if($opt::compress) {
1331 my ($compress, $decompress) = find_compression_program();
1332 $opt::compress_program ||= $compress;
1333 $opt::decompress_program ||= $decompress;
1334 if(($opt::results and not $Global::csvsep) or $opt::files) {
1335 # No need for decompressing
1336 $opt::decompress_program = "cat >/dev/null";
1339 if(defined $opt::dryrun) {
1340 # Force grouping due to bug #51039: --dry-run --timeout 3600 -u breaks
1341 $opt::ungroup = 0;
1342 $opt::group = 1;
1344 if(defined $opt::nonall) {
1345 # Append a dummy empty argument if there are no arguments
1346 # on the command line to avoid reading from STDIN.
1347 # arg_sep = random 50 char
1348 # \0noarg => nothing (not the empty string)
1349 $Global::arg_sep = join "",
1350 map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..50);
1351 push @ARGV, $Global::arg_sep, "\0noarg";
1353 if(defined $opt::tee) {
1354 if(not defined $opt::jobs) {
1355 $opt::jobs = 0;
1358 if(defined $opt::tty) {
1359 # Defaults for --tty: -j1 -u
1360 # Can be overridden with -jXXX -g
1361 if(not defined $opt::jobs) {
1362 $opt::jobs = 1;
1364 if(not defined $opt::group) {
1365 $opt::ungroup = 1;
1368 if(@opt::trc) {
1369 push @Global::ret_files, @opt::trc;
1370 if(not @Global::transfer_files) {
1371 # Defaults to --transferfile {}
1372 push @Global::transfer_files, $opt::i || $opt::I || "{}";
1374 $opt::cleanup = 1;
1376 if(defined $opt::max_lines) {
1377 if($opt::max_lines eq "-0") {
1378 # -l -0 (swallowed -0)
1379 $opt::max_lines = 1;
1380 $opt::null = 1;
1381 $/ = "\0";
1382 } elsif ($opt::max_lines == 0) {
1383 # If not given (or if 0 is given) => 1
1384 $opt::max_lines = 1;
1386 $Global::max_lines = $opt::max_lines;
1387 if(not $opt::pipe) {
1388 # --pipe -L means length of record - not max_number_of_args
1389 $Global::max_number_of_args ||= $Global::max_lines;
1393 # Read more than one arg at a time (-L, -N)
1394 if(defined $opt::L) {
1395 $Global::max_lines = $opt::L;
1396 if(not $opt::pipe) {
1397 # --pipe -L means length of record - not max_number_of_args
1398 $Global::max_number_of_args ||= $Global::max_lines;
1401 if(defined $opt::max_replace_args) {
1402 $Global::max_number_of_args = $opt::max_replace_args;
1403 $Global::ContextReplace = 1;
1405 if((defined $opt::L or defined $opt::max_replace_args)
1407 not ($opt::xargs or $opt::m)) {
1408 $Global::ContextReplace = 1;
1410 if(defined $opt::tag and not defined $opt::tagstring) {
1411 # Default = {}
1412 $opt::tagstring = $Global::parensleft.$Global::parensright;
1414 if(grep /^$Global::arg_sep\+?$|^$Global::arg_file_sep\+?$/o, @ARGV) {
1415 # Deal with ::: :::+ :::: and ::::+
1416 @ARGV = read_args_from_command_line();
1418 parse_semaphore();
1420 if(defined $opt::eta) { $opt::progress = $opt::eta; }
1421 if(defined $opt::bar) { $opt::progress = $opt::bar; }
1423 # Funding a free software project is hard. GNU Parallel is no
1424 # exception. On top of that it seems the less visible a project
1425 # is, the harder it is to get funding. And the nature of GNU
1426 # Parallel is that it will never be seen by "the guy with the
1427 # checkbook", but only by the people doing the actual work.
1429 # This problem has been covered by others - though no solution has
1430 # been found:
1431 # https://www.slideshare.net/NadiaEghbal/consider-the-maintainer
1432 # https://www.numfocus.org/blog/why-is-numpy-only-now-getting-funded/
1434 # Before implementing the citation notice it was discussed with
1435 # the users:
1436 # https://lists.gnu.org/archive/html/parallel/2013-11/msg00006.html
1438 # Having to spend 10 seconds on running 'parallel --citation' once
1439 # is no doubt not an ideal solution, but no one has so far come up
1440 # with an ideal solution - neither for funding GNU Parallel nor
1441 # other free software.
1443 # If you believe you have the perfect solution, you should try it
1444 # out, and if it works, you should post it on the email
1445 # list. Ideas that will cost work and which have not been tested
1446 # are, however, unlikely to be prioritized.
1448 # Please note that GPL version 3 gives you the right to fork GNU
1449 # Parallel under a new name, but it does not give you the right to
1450 # distribute modified copies with the citation notice disabled
1451 # under the name GNU Parallel. To do that you need to be the owner
1452 # of the GNU Parallel trademark. The xt:Commerce case shows this.
1454 # Description of the xt:Commerce case in OLG Duesseldorf
1455 # http://www.inta.org/INTABulletin/Pages/GERMANYGeneralPublicLicenseDoesNotPermitUseofThird-PartyTrademarksforAdvertisingModifiedVersionsofOpen-SourceSoftware.aspx
1456 # https://web.archive.org/web/20180715073746/http://www.inta.org/INTABulletin/Pages/GERMANYGeneralPublicLicenseDoesNotPermitUseofThird-PartyTrademarksforAdvertisingModifiedVersionsofOpen-SourceSoftware.aspx
1458 # The verdict in German
1459 # https://www.admody.com/urteilsdatenbank/cafe6fdaeed3/OLG-Duesseldorf_Urteil_vom_28-September-2010_Az_I-20-U-41-09
1460 # https://web.archive.org/web/20180715073717/https://www.admody.com/urteilsdatenbank/cafe6fdaeed3/OLG-Duesseldorf_Urteil_vom_28-September-2010_Az_I-20-U-41-09
1462 # Other free software limiting derivates by the same name
1463 # https://en.wikipedia.org/wiki/Red_Hat_Enterprise_Linux_derivatives#Legal_aspects
1464 # https://tm.joomla.org/trademark-faq.html
1465 # https://www.mozilla.org/en-US/foundation/trademarks/faq/
1467 # Running 'parallel --citation' one single time takes less than 10
1468 # seconds, and will silence the citation notice for future
1469 # runs. If that is too much trouble for you, why not use one of
1470 # the alternatives instead?
1471 # See a list in: 'man parallel_alternatives'
1473 # Please read the above before changing this line.
1474 citation_notice();
1476 parse_halt();
1478 if($ENV{'PARALLEL_ENV'}) {
1479 # Read environment and set $Global::parallel_env
1480 # Must be done before is_acceptable_command_line_length()
1481 my $penv = $ENV{'PARALLEL_ENV'};
1482 # unset $PARALLEL_ENV: It should not be given to children
1483 # because it takes up a lot of env space
1484 delete $ENV{'PARALLEL_ENV'};
1485 if(-e $penv) {
1486 # This is a file/fifo: Replace envvar with content of file
1487 open(my $parallel_env, "<", $penv) ||
1488 ::die_bug("Cannot read parallel_env from $penv");
1489 local $/; # Put <> in slurp mode
1490 $penv = <$parallel_env>;
1491 close $parallel_env;
1493 # Map \001 to \n to make it easer to quote \n in $PARALLEL_ENV
1494 $penv =~ s/\001/\n/g;
1495 if($penv =~ /\0/) {
1496 ::warning('\0 (NUL) in environment is not supported');
1498 $Global::parallel_env = $penv;
1501 parse_sshlogin();
1503 if(remote_hosts() and ($opt::X or $opt::m or $opt::xargs)) {
1504 # As we do not know the max line length on the remote machine
1505 # long commands generated by xargs may fail
1506 # If $opt::max_replace_args is set, it is probably safe
1507 ::warning("Using -X or -m with --sshlogin may fail.");
1510 if(not defined $opt::jobs) { $opt::jobs = "100%"; }
1511 open_joblog();
1512 open_csv();
1513 if($opt::sqlmaster or $opt::sqlworker) {
1514 $Global::sql = SQL->new($opt::sqlmaster || $opt::sqlworker);
1516 if($opt::sqlworker) { $Global::membuffer ||= 1; }
1519 sub check_invalid_option_combinations {
1520 if(defined $opt::timeout and
1521 $opt::timeout !~ /^\d+(\.\d+)?%?$|^(\d+(\.\d+)?[dhms])+$/i) {
1522 ::error("--timeout must be seconds or percentage.");
1523 wait_and_exit(255);
1525 if(defined $opt::fifo and defined $opt::cat) {
1526 ::error("--fifo cannot be combined with --cat.");
1527 ::wait_and_exit(255);
1529 if(defined $opt::retries and defined $opt::roundrobin) {
1530 ::error("--retries cannot be combined with --roundrobin.");
1531 ::wait_and_exit(255);
1533 if(defined $opt::pipepart and
1534 (defined $opt::L or defined $opt::max_lines
1535 or defined $opt::max_replace_args)) {
1536 ::error("--pipepart is incompatible with --max-replace-args, ".
1537 "--max-lines, and -L.");
1538 wait_and_exit(255);
1540 if(defined $opt::group and $opt::ungroup) {
1541 ::error("--group cannot be combined with --ungroup.");
1542 ::wait_and_exit(255);
1544 if(defined $opt::group and $opt::linebuffer) {
1545 ::error("--group cannot be combined with --line-buffer.");
1546 ::wait_and_exit(255);
1548 if(defined $opt::ungroup and $opt::linebuffer) {
1549 ::error("--ungroup cannot be combined with --line-buffer.");
1550 ::wait_and_exit(255);
1552 if(defined $opt::tollef and not $opt::gnu) {
1553 ::error("--tollef has been retired.",
1554 "Remove --tollef or use --gnu to override --tollef.");
1555 ::wait_and_exit(255);
1557 if(defined $opt::retired) {
1558 ::error("-g has been retired. Use --group.",
1559 "-B has been retired. Use --bf.",
1560 "-T has been retired. Use --tty.",
1561 "-U has been retired. Use --er.",
1562 "-W has been retired. Use --wd.",
1563 "-Y has been retired. Use --shebang.",
1564 "-H has been retired. Use --halt.",
1565 "--sql has been retired. Use --sqlmaster.",
1566 "--ctrlc has been retired.",
1567 "--noctrlc has been retired.");
1568 ::wait_and_exit(255);
1572 sub init_globals {
1573 # Defaults:
1574 $Global::version = 20181223;
1575 $Global::progname = 'parallel';
1576 $Global::infinity = 2**31;
1577 $Global::debug = 0;
1578 $Global::verbose = 0;
1579 $Global::quoting = 0;
1580 $Global::total_completed = 0;
1581 # Read only table with default --rpl values
1582 %Global::replace =
1584 '{}' => '',
1585 '{#}' => '1 $_=$job->seq()',
1586 '{%}' => '1 $_=$job->slot()',
1587 '{/}' => 's:.*/::',
1588 '{//}' =>
1589 ('$Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; '.
1590 '$_ = dirname($_);'),
1591 '{/.}' => 's:.*/::; s:\.[^/.]+$::;',
1592 '{.}' => 's:\.[^/.]+$::',
1594 %Global::plus =
1596 # {} = {+/}/{/}
1597 # = {.}.{+.} = {+/}/{/.}.{+.}
1598 # = {..}.{+..} = {+/}/{/..}.{+..}
1599 # = {...}.{+...} = {+/}/{/...}.{+...}
1600 '{+/}' => 's:/[^/]*$::',
1601 '{+.}' => 's:.*\.::',
1602 '{+..}' => 's:.*\.([^.]*\.):$1:',
1603 '{+...}' => 's:.*\.([^.]*\.[^.]*\.):$1:',
1604 '{..}' => 's:\.[^/.]+$::; s:\.[^/.]+$::',
1605 '{...}' => 's:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::',
1606 '{/..}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::',
1607 '{/...}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::',
1608 '{choose_k}' => 'for $t (2..$#arg){ if($arg[$t-1] ge $arg[$t]) { skip() } }',
1609 # {##} = number of jobs
1610 '{##}' => '$_=total_jobs()',
1611 # Bash ${a:-myval}
1612 '{:-([^}]+?)}' => '$_ ||= $$1',
1613 # Bash ${a:2}
1614 '{:(\d+?)}' => 'substr($_,0,$$1) = ""',
1615 # Bash ${a:2:3}
1616 '{:(\d+?):(\d+?)}' => '$_ = substr($_,$$1,$$2);',
1617 # Bash ${a#bc}
1618 '{#([^#}][^}]*?)}' => 's/^$$1//;',
1619 # Bash ${a%def}
1620 '{%([^}]+?)}' => 's/$$1$//;',
1621 # Bash ${a/def/ghi} ${a/def/}
1622 '{/([^}]+?)/([^}]*?)}' => 's/$$1/$$2/;',
1623 # Bash ${a^a}
1624 '{^([^}]+?)}' => 's/^($$1)/uc($1)/e;',
1625 # Bash ${a^^a}
1626 '{^^([^}]+?)}' => 's/($$1)/uc($1)/eg;',
1627 # Bash ${a,A}
1628 '{,([^}]+?)}' => 's/^($$1)/lc($1)/e;',
1629 # Bash ${a,,A}
1630 '{,,([^}]+?)}' => 's/($$1)/lc($1)/eg;',
1632 # Modifiable copy of %Global::replace
1633 %Global::rpl = %Global::replace;
1634 $/ = "\n";
1635 $Global::ignore_empty = 0;
1636 $Global::interactive = 0;
1637 $Global::stderr_verbose = 0;
1638 $Global::default_simultaneous_sshlogins = 9;
1639 $Global::exitstatus = 0;
1640 $Global::arg_sep = ":::";
1641 $Global::arg_file_sep = "::::";
1642 $Global::trim = 'n';
1643 $Global::max_jobs_running = 0;
1644 $Global::job_already_run = '';
1645 $ENV{'TMPDIR'} ||= "/tmp";
1646 if(not $ENV{HOME}) {
1647 # $ENV{HOME} is sometimes not set if called from PHP
1648 ::warning("\$HOME not set. Using /tmp.");
1649 $ENV{HOME} = "/tmp";
1651 # no warnings to allow for undefined $XDG_*
1652 no warnings 'uninitialized';
1653 # $xdg_config_home is needed to make env_parallel.fish stop complaining
1654 my $xdg_config_home = $ENV{'XDG_CONFIG_HOME'};
1655 # config_dirs = $PARALLEL_HOME, $XDG_CONFIG_HOME/parallel,
1656 # $(each XDG_CONFIG_DIRS)/parallel, $HOME/.parallel
1657 # Keep only dirs that exist
1658 @Global::config_dirs =
1659 (grep { -d $_ }
1660 $ENV{'PARALLEL_HOME'},
1661 (map { "$_/parallel" }
1662 $xdg_config_home,
1663 split /:/, $ENV{'XDG_CONFIG_DIRS'}),
1664 $ENV{'HOME'} . "/.parallel");
1665 # Use first dir as config dir
1666 $Global::config_dir = $Global::config_dirs[0] ||
1667 $ENV{'HOME'} . "/.parallel";
1668 # cache_dirs = $PARALLEL_HOME, $XDG_CACHE_HOME/parallel,
1669 # Keep only dirs that exist
1670 @Global::cache_dirs =
1671 (grep { -d $_ }
1672 $ENV{'PARALLEL_HOME'}, $ENV{'XDG_CACHE_HOME'}."/parallel");
1673 $Global::cache_dir = $Global::cache_dirs[0] ||
1674 $ENV{'HOME'} . "/.parallel";
1677 sub parse_halt {
1678 # $opt::halt flavours
1679 # Uses:
1680 # $opt::halt
1681 # $Global::halt_when
1682 # $Global::halt_fail
1683 # $Global::halt_success
1684 # $Global::halt_pct
1685 # $Global::halt_count
1686 if(defined $opt::halt) {
1687 my %halt_expansion = (
1688 "0" => "never",
1689 "1" => "soon,fail=1",
1690 "2" => "now,fail=1",
1691 "-1" => "soon,success=1",
1692 "-2" => "now,success=1",
1694 # Expand -2,-1,0,1,2 into long form
1695 $opt::halt = $halt_expansion{$opt::halt} || $opt::halt;
1696 # --halt 5% == --halt soon,fail=5%
1697 $opt::halt =~ s/^(\d+)%$/soon,fail=$1%/;
1698 # Split: soon,fail=5%
1699 my ($when,$fail_success,$pct_count) = split /[,=]/, $opt::halt;
1700 if(not grep { $when eq $_ } qw(never soon now)) {
1701 ::error("--halt must have 'never', 'soon', or 'now'.");
1702 ::wait_and_exit(255);
1704 $Global::halt_when = $when;
1705 if($when ne "never") {
1706 if($fail_success eq "fail") {
1707 $Global::halt_fail = 1;
1708 } elsif($fail_success eq "success") {
1709 $Global::halt_success = 1;
1710 } elsif($fail_success eq "done") {
1711 $Global::halt_done = 1;
1712 } else {
1713 ::error("--halt $when must be followed by ,success or ,fail.");
1714 ::wait_and_exit(255);
1716 if($pct_count =~ /^(\d+)%$/) {
1717 $Global::halt_pct = $1/100;
1718 } elsif($pct_count =~ /^(\d+)$/) {
1719 $Global::halt_count = $1;
1720 } else {
1721 ::error("--halt $when,$fail_success ".
1722 "must be followed by ,number or ,percent%.");
1723 ::wait_and_exit(255);
1729 sub parse_replacement_string_options {
1730 # Deal with --rpl
1731 # Uses:
1732 # %Global::rpl
1733 # $Global::parensleft
1734 # $Global::parensright
1735 # $opt::parens
1736 # $Global::parensleft
1737 # $Global::parensright
1738 # $opt::plus
1739 # %Global::plus
1740 # $opt::I
1741 # $opt::U
1742 # $opt::i
1743 # $opt::basenamereplace
1744 # $opt::dirnamereplace
1745 # $opt::seqreplace
1746 # $opt::slotreplace
1747 # $opt::basenameextensionreplace
1749 sub rpl {
1750 # Modify %Global::rpl
1751 # Replace $old with $new
1752 my ($old,$new) = @_;
1753 if($old ne $new) {
1754 $Global::rpl{$new} = $Global::rpl{$old};
1755 delete $Global::rpl{$old};
1758 my $parens = "{==}";
1759 if(defined $opt::parens) { $parens = $opt::parens; }
1760 my $parenslen = 0.5*length $parens;
1761 $Global::parensleft = substr($parens,0,$parenslen);
1762 $Global::parensright = substr($parens,$parenslen);
1763 if(defined $opt::plus) { %Global::rpl = (%Global::plus,%Global::rpl); }
1764 if(defined $opt::I) { rpl('{}',$opt::I); }
1765 if(defined $opt::i and $opt::i) { rpl('{}',$opt::i); }
1766 if(defined $opt::U) { rpl('{.}',$opt::U); }
1767 if(defined $opt::basenamereplace) { rpl('{/}',$opt::basenamereplace); }
1768 if(defined $opt::dirnamereplace) { rpl('{//}',$opt::dirnamereplace); }
1769 if(defined $opt::seqreplace) { rpl('{#}',$opt::seqreplace); }
1770 if(defined $opt::slotreplace) { rpl('{%}',$opt::slotreplace); }
1771 if(defined $opt::basenameextensionreplace) {
1772 rpl('{/.}',$opt::basenameextensionreplace);
1774 for(@opt::rpl) {
1775 # Create $Global::rpl entries for --rpl options
1776 # E.g: "{..} s:\.[^.]+$:;s:\.[^.]+$:;"
1777 my ($shorthand,$long) = split/ /,$_,2;
1778 $Global::rpl{$shorthand} = $long;
1782 sub parse_semaphore {
1783 # Semaphore defaults
1784 # Must be done before computing number of processes and max_line_length
1785 # because when running as a semaphore GNU Parallel does not read args
1786 # Uses:
1787 # $opt::semaphore
1788 # $Global::semaphore
1789 # $opt::semaphoretimeout
1790 # $Semaphore::timeout
1791 # $opt::semaphorename
1792 # $Semaphore::name
1793 # $opt::fg
1794 # $Semaphore::fg
1795 # $opt::wait
1796 # $Semaphore::wait
1797 # $opt::bg
1798 # @opt::a
1799 # @Global::unget_argv
1800 # $Global::default_simultaneous_sshlogins
1801 # $opt::jobs
1802 # $Global::interactive
1803 $Global::semaphore ||= ($0 =~ m:(^|/)sem$:); # called as 'sem'
1804 if(defined $opt::semaphore) { $Global::semaphore = 1; }
1805 if(defined $opt::semaphoretimeout) { $Global::semaphore = 1; }
1806 if(defined $opt::semaphorename) { $Global::semaphore = 1; }
1807 if(defined $opt::fg and not $opt::tmux and not $opt::tmuxpane) {
1808 $Global::semaphore = 1;
1810 if(defined $opt::bg) { $Global::semaphore = 1; }
1811 if(defined $opt::wait and not $opt::sqlmaster) {
1812 $Global::semaphore = 1; @ARGV = "true";
1814 if($Global::semaphore) {
1815 if(@opt::a) {
1816 # A semaphore does not take input from neither stdin nor file
1817 ::error("A semaphore does not take input from neither stdin nor a file\n");
1818 ::wait_and_exit(255);
1820 @opt::a = ("/dev/null");
1821 # Append a dummy empty argument
1822 # \0 => nothing (not the empty string)
1823 push(@Global::unget_argv, [Arg->new("\0noarg")]);
1824 $Semaphore::timeout = $opt::semaphoretimeout || 0;
1825 if(defined $opt::semaphorename) {
1826 $Semaphore::name = $opt::semaphorename;
1827 } else {
1828 local $/ = "\n";
1829 $Semaphore::name = `tty`;
1830 chomp $Semaphore::name;
1832 $Semaphore::fg = $opt::fg;
1833 $Semaphore::wait = $opt::wait;
1834 $Global::default_simultaneous_sshlogins = 1;
1835 if(not defined $opt::jobs) {
1836 $opt::jobs = 1;
1838 if($Global::interactive and $opt::bg) {
1839 ::error("Jobs running in the ".
1840 "background cannot be interactive.");
1841 ::wait_and_exit(255);
1846 sub record_env {
1847 # Record current %ENV-keys in $PARALLEL_HOME/ignored_vars
1848 # Returns: N/A
1849 my $ignore_filename = $Global::config_dir . "/ignored_vars";
1850 if(open(my $vars_fh, ">", $ignore_filename)) {
1851 print $vars_fh map { $_,"\n" } keys %ENV;
1852 } else {
1853 ::error("Cannot write to $ignore_filename.");
1854 ::wait_and_exit(255);
1858 sub open_joblog {
1859 # Open joblog as specified by --joblog
1860 # Uses:
1861 # $opt::resume
1862 # $opt::resume_failed
1863 # $opt::joblog
1864 # $opt::results
1865 # $Global::job_already_run
1866 # %Global::fd
1867 my $append = 0;
1868 if(($opt::resume or $opt::resume_failed)
1870 not ($opt::joblog or $opt::results)) {
1871 ::error("--resume and --resume-failed require --joblog or --results.");
1872 ::wait_and_exit(255);
1874 if(defined $opt::joblog and $opt::joblog =~ s/^\+//) {
1875 # --joblog +filename = append to filename
1876 $append = 1;
1878 if($opt::joblog
1880 ($opt::sqlmaster
1882 not $opt::sqlworker)) {
1883 # Do not log if --sqlworker
1884 if($opt::resume || $opt::resume_failed || $opt::retry_failed) {
1885 if(open(my $joblog_fh, "<", $opt::joblog)) {
1886 # Read the joblog
1887 # If there is a header: Open as append later
1888 $append = <$joblog_fh>;
1889 my $joblog_regexp;
1890 if($opt::retry_failed) {
1891 # Make a regexp that only matches commands with exit+signal=0
1892 # 4 host 1360490623.067 3.445 1023 1222 0 0 command
1893 $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t';
1894 my @group;
1896 local $/ = "\n";
1897 while(<$joblog_fh>) {
1898 if(/$joblog_regexp/o) {
1899 # This is 30% faster than set_job_already_run($1);
1900 vec($Global::job_already_run,($1||0),1) = 1;
1901 $Global::total_completed++;
1902 $group[$1-1] = "true";
1903 } elsif(/(\d+)\s+\S+(\s+[-0-9.]+){6}\s+(.*)$/) {
1904 # Grab out the command
1905 $group[$1-1] = $3;
1906 } else {
1907 chomp;
1908 ::error("Format of '$opt::joblog' is wrong: $_");
1909 ::wait_and_exit(255);
1913 if(@group) {
1914 my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg");
1915 unlink($name);
1916 # Put args into argfile
1917 if(grep /\0/, @group) {
1918 # force --null to deal with \n in commandlines
1919 ::warning("Command lines contain newline. Forcing --null.");
1920 $opt::null = 1;
1921 $/ = "\0";
1923 # Replace \0 with '\n' as used in print_joblog()
1924 print $outfh map { s/\0/\n/g; $_,$/ } map { $_ } @group;
1925 seek $outfh, 0, 0;
1926 exit_if_disk_full();
1927 # Set filehandle to -a
1928 @opt::a = ($outfh);
1930 # Remove $command (so -a is run)
1931 @ARGV = ();
1933 if($opt::resume || $opt::resume_failed) {
1934 if($opt::resume_failed) {
1935 # Make a regexp that only matches commands with exit+signal=0
1936 # 4 host 1360490623.067 3.445 1023 1222 0 0 command
1937 $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t';
1938 } else {
1939 # Just match the job number
1940 $joblog_regexp='^(\d+)';
1942 while(<$joblog_fh>) {
1943 if(/$joblog_regexp/o) {
1944 # This is 30% faster than set_job_already_run($1);
1945 vec($Global::job_already_run,($1||0),1) = 1;
1946 $Global::total_completed++;
1947 } elsif(not /\d+\s+[^\s]+\s+([-0-9.]+\s+){6}/) {
1948 ::error("Format of '$opt::joblog' is wrong: $_");
1949 ::wait_and_exit(255);
1953 close $joblog_fh;
1956 if($opt::dryrun) {
1957 # Do not write to joblog in a dry-run
1958 if(not open($Global::joblog, ">", "/dev/null")) {
1959 ::error("Cannot write to --joblog $opt::joblog.");
1960 ::wait_and_exit(255);
1962 } elsif($append) {
1963 # Append to joblog
1964 if(not open($Global::joblog, ">>", $opt::joblog)) {
1965 ::error("Cannot append to --joblog $opt::joblog.");
1966 ::wait_and_exit(255);
1968 } else {
1969 if($opt::joblog eq "-") {
1970 # Use STDOUT as joblog
1971 $Global::joblog = $Global::fd{1};
1972 } elsif(not open($Global::joblog, ">", $opt::joblog)) {
1973 # Overwrite the joblog
1974 ::error("Cannot write to --joblog $opt::joblog.");
1975 ::wait_and_exit(255);
1977 print $Global::joblog
1978 join("\t", "Seq", "Host", "Starttime", "JobRuntime",
1979 "Send", "Receive", "Exitval", "Signal", "Command"
1980 ). "\n";
1985 sub open_csv {
1986 if($opt::results) {
1987 # Output as CSV/TSV
1988 if($opt::results eq "-.csv"
1990 $opt::results eq "-.tsv") {
1991 # Output as CSV/TSV on stdout
1992 open $Global::csv_fh, ">&", "STDOUT" or
1993 ::die_bug("Can't dup STDOUT in csv: $!");
1994 # Do not print any other output to STDOUT
1995 # by forcing all other output to /dev/null
1996 open my $fd, ">", "/dev/null" or
1997 ::die_bug("Can't >/dev/null in csv: $!");
1998 $Global::fd{1} = $fd;
1999 $Global::fd{2} = $fd;
2000 } elsif($Global::csvsep) {
2001 if(not open($Global::csv_fh,">",$opt::results)) {
2002 ::error("Cannot open results file `$opt::results': ".
2003 "$!.");
2004 wait_and_exit(255);
2010 sub find_compression_program {
2011 # Find a fast compression program
2012 # Returns:
2013 # $compress_program = compress program with options
2014 # $decompress_program = decompress program with options
2016 # Search for these. Sorted by speed on 128 core
2018 # seq 120000000|shuf > 1gb &
2019 # apt-get update
2020 # apt install make g++ htop
2021 # wget -O - pi.dk/3 | bash
2022 # apt install zstd clzip liblz4-tool lzop pigz pxz gzip plzip pbzip2 lzma xz-utils lzip bzip2 lbzip2 lrzip pixz
2023 # git clone https://github.com/facebook/zstd.git
2024 # (cd zstd/contrib/pzstd; make -j; cp pzstd /usr/local/bin)
2025 # echo 'lrzip -L $((-$1))' >/usr/local/bin/lrz
2026 # chmod +x /usr/local/bin/lrz
2027 # wait
2028 # onethread="zstd clzip lz4 lzop gzip lzma xz bzip2"
2029 # multithread="pzstd pigz pxz plzip pbzip2 lzip lbzip2 lrz pixz"
2030 # parallel --shuf -j1 --joblog jl-m --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $multithread
2031 # parallel --shuf -j50% --delay 1 --joblog jl-s --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $onethread
2032 # sort -nk4 jl-?
2034 # 1-core:
2035 # 2-cores: pzstd zstd lz4 lzop pigz gzip lbzip2 pbzip2 lrz bzip2 lzma pxz plzip xz lzip clzip
2036 # 4-cores:
2037 # 8-cores: pzstd lz4 zstd pigz lzop lbzip2 pbzip2 gzip lzip lrz plzip pxz bzip2 lzma xz clzip
2038 # 16-cores: pzstd lz4 pigz lzop lbzip2 pbzip2 plzip lzip lrz pxz gzip lzma xz bzip2
2039 # 32-cores: pzstd lbzip2 pbzip2 zstd pigz lz4 lzop plzip lzip lrz gzip pxz lzma bzip2 xz clzip
2040 # 64-cores: pzstd lbzip2 pbzip2 pigz zstd pixz lz4 plzip lzop lzip lrz gzip pxz lzma bzip2 xz clzip
2041 # 128-core: pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip lrz pxz bzip2 lzma xz clzip
2043 my @prg = qw(pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip
2044 lrz pxz bzip2 lzma xz clzip);
2045 for my $p (@prg) {
2046 if(which($p)) {
2047 return ("$p -c -1","$p -dc");
2050 # Fall back to cat
2051 return ("cat","cat");
2054 sub read_options {
2055 # Read options from command line, profile and $PARALLEL
2056 # Uses:
2057 # $opt::shebang_wrap
2058 # $opt::shebang
2059 # @ARGV
2060 # $opt::plain
2061 # @opt::profile
2062 # $ENV{'HOME'}
2063 # $ENV{'PARALLEL'}
2064 # Returns:
2065 # @ARGV_no_opt = @ARGV without --options
2067 # This must be done first as this may exec myself
2068 if(defined $ARGV[0] and ($ARGV[0] =~ /^--shebang/ or
2069 $ARGV[0] =~ /^--shebang-?wrap/ or
2070 $ARGV[0] =~ /^--hashbang/)) {
2071 # Program is called from #! line in script
2072 # remove --shebang-wrap if it is set
2073 $opt::shebang_wrap = ($ARGV[0] =~ s/^--shebang-?wrap *//);
2074 # remove --shebang if it is set
2075 $opt::shebang = ($ARGV[0] =~ s/^--shebang *//);
2076 # remove --hashbang if it is set
2077 $opt::shebang .= ($ARGV[0] =~ s/^--hashbang *//);
2078 if($opt::shebang) {
2079 my $argfile = Q(pop @ARGV);
2080 # exec myself to split $ARGV[0] into separate fields
2081 exec "$0 --skip-first-line -a $argfile @ARGV";
2083 if($opt::shebang_wrap) {
2084 my @options;
2085 my @parser;
2086 if ($^O eq 'freebsd') {
2087 # FreeBSD's #! puts different values in @ARGV than Linux' does.
2088 my @nooptions = @ARGV;
2089 get_options_from_array(\@nooptions);
2090 while($#ARGV > $#nooptions) {
2091 push @options, shift @ARGV;
2093 while(@ARGV and $ARGV[0] ne ":::") {
2094 push @parser, shift @ARGV;
2096 if(@ARGV and $ARGV[0] eq ":::") {
2097 shift @ARGV;
2099 } else {
2100 @options = shift @ARGV;
2102 my $script = Q(shift @ARGV);
2103 # exec myself to split $ARGV[0] into separate fields
2104 exec "$0 --internal-pipe-means-argfiles @options @parser $script ".
2105 "::: @ARGV";
2108 if($ARGV[0] =~ / --shebang(-?wrap)? /) {
2109 ::warning("--shebang and --shebang-wrap must be the first argument.\n");
2112 Getopt::Long::Configure("bundling","require_order");
2113 my @ARGV_copy = @ARGV;
2114 my @ARGV_orig = @ARGV;
2115 # Check if there is a --profile to set @opt::profile
2116 get_options_from_array(\@ARGV_copy,"profile|J=s","plain") || die_usage();
2117 my @ARGV_profile = ();
2118 my @ARGV_env = ();
2119 if(not $opt::plain) {
2120 # Add options from $PARALLEL_HOME/config and other profiles
2121 my @config_profiles = (
2122 "/etc/parallel/config",
2123 (map { "$_/config" } @Global::config_dirs),
2124 $ENV{'HOME'}."/.parallelrc");
2125 my @profiles = @config_profiles;
2126 if(@opt::profile) {
2127 # --profile overrides default profiles
2128 @profiles = ();
2129 for my $profile (@opt::profile) {
2130 # Look for the $profile in . and @Global::config_dirs
2131 push @profiles, grep { -r $_ }
2132 map { "$_/$profile" } ".", @Global::config_dirs;
2135 for my $profile (@profiles) {
2136 if(-r $profile) {
2137 local $/ = "\n";
2138 open (my $in_fh, "<", $profile) ||
2139 ::die_bug("read-profile: $profile");
2140 while(<$in_fh>) {
2141 /^\s*\#/ and next;
2142 chomp;
2143 push @ARGV_profile, shell_words($_);
2145 close $in_fh;
2146 } else {
2147 if(grep /^$profile$/, @config_profiles) {
2148 # config file is not required to exist
2149 } else {
2150 ::error("$profile not readable.");
2151 wait_and_exit(255);
2155 # Add options from shell variable $PARALLEL
2156 if($ENV{'PARALLEL'}) {
2157 @ARGV_env = shell_words($ENV{'PARALLEL'});
2160 Getopt::Long::Configure("bundling","require_order");
2161 get_options_from_array(\@ARGV_profile) || die_usage();
2162 get_options_from_array(\@ARGV_env) || die_usage();
2163 get_options_from_array(\@ARGV) || die_usage();
2164 # What were the options given on the command line?
2165 # Used to start --sqlworker
2166 my $ai = arrayindex(\@ARGV_orig, \@ARGV);
2167 @Global::options_in_argv = @ARGV_orig[0..$ai-1];
2168 # Prepend non-options to @ARGV (such as commands like 'nice')
2169 unshift @ARGV, @ARGV_profile, @ARGV_env;
2170 return @ARGV;
2173 sub arrayindex {
2174 # Similar to Perl's index function, but for arrays
2175 # Input:
2176 # $arr_ref1 = ref to @array1 to search in
2177 # $arr_ref2 = ref to @array2 to search for
2178 # Returns:
2179 # $pos = position of @array1 in @array2, -1 if not found
2180 my ($arr_ref1,$arr_ref2) = @_;
2181 my $array1_as_string = join "", map { "\0".$_ } @$arr_ref1;
2182 my $array2_as_string = join "", map { "\0".$_ } @$arr_ref2;
2183 my $i = index($array1_as_string,$array2_as_string,0);
2184 if($i == -1) { return -1 }
2185 my @before = split /\0/, substr($array1_as_string,0,$i);
2186 return $#before;
2189 sub read_args_from_command_line {
2190 # Arguments given on the command line after:
2191 # ::: ($Global::arg_sep)
2192 # :::: ($Global::arg_file_sep)
2193 # :::+ ($Global::arg_sep with --link)
2194 # ::::+ ($Global::arg_file_sep with --link)
2195 # Removes the arguments from @ARGV and:
2196 # - puts filenames into -a
2197 # - puts arguments into files and add the files to -a
2198 # - adds --linkinputsource with 0/1 for each -a depending on :::+/::::+
2199 # Input:
2200 # @::ARGV = command option ::: arg arg arg :::: argfiles
2201 # Uses:
2202 # $Global::arg_sep
2203 # $Global::arg_file_sep
2204 # $opt::internal_pipe_means_argfiles
2205 # $opt::pipe
2206 # @opt::a
2207 # Returns:
2208 # @argv_no_argsep = @::ARGV without ::: and :::: and following args
2209 my @new_argv = ();
2210 for(my $arg = shift @ARGV; @ARGV; $arg = shift @ARGV) {
2211 if($arg eq $Global::arg_sep
2213 $arg eq $Global::arg_sep."+"
2215 $arg eq $Global::arg_file_sep
2217 $arg eq $Global::arg_file_sep."+") {
2218 my $group_sep = $arg; # This group of arguments is args or argfiles
2219 my @group;
2220 while(defined ($arg = shift @ARGV)) {
2221 if($arg eq $Global::arg_sep
2223 $arg eq $Global::arg_sep."+"
2225 $arg eq $Global::arg_file_sep
2227 $arg eq $Global::arg_file_sep."+") {
2228 # exit while loop if finding new separator
2229 last;
2230 } else {
2231 # If not hitting ::: :::+ :::: or ::::+
2232 # Append it to the group
2233 push @group, $arg;
2236 my $is_linked = ($group_sep =~ /\+$/) ? 1 : 0;
2237 my $is_file = ($group_sep eq $Global::arg_file_sep
2239 $group_sep eq $Global::arg_file_sep."+");
2240 if($is_file) {
2241 # :::: / ::::+
2242 push @opt::linkinputsource, map { $is_linked } @group;
2243 } else {
2244 # ::: / :::+
2245 push @opt::linkinputsource, $is_linked;
2247 if($is_file
2248 or ($opt::internal_pipe_means_argfiles and $opt::pipe)
2250 # Group of file names on the command line.
2251 # Append args into -a
2252 push @opt::a, @group;
2253 } else {
2254 # Group of arguments on the command line.
2255 # Put them into a file.
2256 # Create argfile
2257 my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg");
2258 unlink($name);
2259 # Put args into argfile
2260 print $outfh map { $_,$/ } @group;
2261 seek $outfh, 0, 0;
2262 exit_if_disk_full();
2263 # Append filehandle to -a
2264 push @opt::a, $outfh;
2266 if(defined($arg)) {
2267 # $arg is ::: :::+ :::: or ::::+
2268 # so there is another group
2269 redo;
2270 } else {
2271 # $arg is undef -> @ARGV empty
2272 last;
2275 push @new_argv, $arg;
2277 # Output: @ARGV = command to run with options
2278 return @new_argv;
2281 sub cleanup {
2282 # Returns: N/A
2283 unlink keys %Global::unlink;
2284 map { rmdir $_ } keys %Global::unlink;
2285 if(@opt::basefile and $opt::cleanup) { cleanup_basefile(); }
2286 for(keys %Global::sshmaster) {
2287 # If 'ssh -M's are running: kill them
2288 kill "TERM", $_;
2293 sub __QUOTING_ARGUMENTS_FOR_SHELL__ {}
2296 sub shell_quote {
2297 # Input:
2298 # @strings = strings to be quoted
2299 # Returns:
2300 # @shell_quoted_strings = string quoted as needed by the shell
2301 return wantarray ?
2302 (map { Q($_) } @_)
2303 : (join" ",map { Q($_) } @_);
2306 sub shell_quote_scalar_rc {
2307 # Quote for the rc-shell
2308 my $a = $_[0];
2309 if(defined $a) {
2310 if(($a =~ s/'/''/g)
2312 ($a =~ s/[\n\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]+/'$&'/go)) {
2313 # A string was replaced
2314 # No need to test for "" or \0
2315 } elsif($a eq "") {
2316 $a = "''";
2317 } elsif($a eq "\0") {
2318 $a = "";
2321 return $a;
2324 sub shell_quote_scalar_csh {
2325 # Quote for (t)csh
2326 my $a = $_[0];
2327 if(defined $a) {
2328 # $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g;
2329 # This is 1% faster than the above
2330 if(($a =~ s/[\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]/\\$&/go)
2332 # quote newline in csh as \\\n
2333 ($a =~ s/[\n]/"\\\n"/go)) {
2334 # A string was replaced
2335 # No need to test for "" or \0
2336 } elsif($a eq "") {
2337 $a = "''";
2338 } elsif($a eq "\0") {
2339 $a = "";
2342 return $a;
2345 sub shell_quote_scalar_default {
2346 # Quote for other shells (Bourne compatibles)
2347 # Inputs:
2348 # $string = string to be quoted
2349 # Returns:
2350 # $shell_quoted = string quoted as needed by the shell
2351 my $par = $_[0];
2352 if($par =~ /[^-_.+a-z0-9\/]/i) {
2353 $par =~ s/'/'"'"'/g; # "-quote single quotes
2354 $par = "'$par'"; # '-quote entire string
2355 $par =~ s/^''|''$//g; # Remove unneeded '' at ends
2356 return $par;
2357 } elsif ($par eq "") {
2358 return "''";
2359 } else {
2360 # No quoting needed
2361 return $par;
2365 sub shell_quote_scalar {
2366 # Quote the string so the shell will not expand any special chars
2367 # Inputs:
2368 # $string = string to be quoted
2369 # Returns:
2370 # $shell_quoted = string quoted as needed by the shell
2372 # Speed optimization: Choose the correct shell_quote_scalar_*
2373 # and call that directly from now on
2374 no warnings 'redefine';
2375 if($Global::cshell) {
2376 # (t)csh
2377 *shell_quote_scalar = \&shell_quote_scalar_csh;
2378 } elsif($Global::shell =~ m:(^|/)rc$:) {
2379 # rc-shell
2380 *shell_quote_scalar = \&shell_quote_scalar_rc;
2381 } else {
2382 # other shells
2383 *shell_quote_scalar = \&shell_quote_scalar_default;
2385 # The sub is now redefined. Call it
2386 return shell_quote_scalar(@_);
2389 sub Q {
2390 # Q alias for ::shell_quote_scalar
2391 no warnings 'redefine';
2392 *Q = \&::shell_quote_scalar;
2393 return Q(@_);
2396 sub shell_quote_file {
2397 # Quote the string so shell will not expand any special chars
2398 # and prepend ./ if needed
2399 # Input:
2400 # $filename = filename to be shell quoted
2401 # Returns:
2402 # $quoted_filename = filename quoted with \ and ./ if needed
2403 my $a = shift;
2404 if(defined $a) {
2405 if($a =~ m:^/: or $a =~ m:^\./:) {
2406 # /abs/path or ./rel/path => skip
2407 } else {
2408 # rel/path => ./rel/path
2409 $a = "./".$a;
2412 return Q($a);
2415 sub shell_words {
2416 # Input:
2417 # $string = shell line
2418 # Returns:
2419 # @shell_words = $string split into words as shell would do
2420 $Global::use{"Text::ParseWords"} ||= eval "use Text::ParseWords; 1;";
2421 return Text::ParseWords::shellwords(@_);
2424 sub perl_quote_scalar {
2425 # Quote the string so perl's eval will not expand any special chars
2426 # Inputs:
2427 # $string = string to be quoted
2428 # Returns:
2429 # $perl_quoted = string quoted with \ as needed by perl's eval
2430 my $a = $_[0];
2431 if(defined $a) {
2432 $a =~ s/[\\\"\$\@]/\\$&/go;
2434 return $a;
2437 sub pQ {
2438 # pQ alias for ::perl_quote_scalar
2439 *pQ = \&::perl_quote_scalar;
2440 return pQ(@_);
2443 sub unquote_printf {
2444 # Convert \t \n \r \000 \0
2445 # Inputs:
2446 # $string = string with \t \n \r \num \0
2447 # Returns:
2448 # $replaced = string with TAB NEWLINE CR <ascii-num> NUL
2449 $_ = shift;
2450 s/\\t/\t/g;
2451 s/\\n/\n/g;
2452 s/\\r/\r/g;
2453 s/\\(\d\d\d)/eval 'sprintf "\\'.$1.'"'/ge;
2454 s/\\(\d)/eval 'sprintf "\\'.$1.'"'/ge;
2455 return $_;
2459 sub __FILEHANDLES__ {}
2462 sub save_stdin_stdout_stderr {
2463 # Remember the original STDIN, STDOUT and STDERR
2464 # and file descriptors opened by the shell (e.g. 3>/tmp/foo)
2465 # Uses:
2466 # %Global::fd
2467 # $Global::original_stderr
2468 # $Global::original_stdin
2469 # Returns: N/A
2471 # TODO Disabled until we have an open3 that will take n filehandles
2472 # for my $fdno (1..61) {
2473 # # /dev/fd/62 and above are used by bash for <(cmd)
2474 # # Find file descriptors that are already opened (by the shell)
2475 # Only focus on stdout+stderr for now
2476 for my $fdno (1..2) {
2477 my $fh;
2478 # 2-argument-open is used to be compatible with old perl 5.8.0
2479 # bug #43570: Perl 5.8.0 creates 61 files
2480 if(open($fh,">&=$fdno")) {
2481 $Global::fd{$fdno}=$fh;
2484 open $Global::original_stderr, ">&", "STDERR" or
2485 ::die_bug("Can't dup STDERR: $!");
2486 open $Global::status_fd, ">&", "STDERR" or
2487 ::die_bug("Can't dup STDERR: $!");
2488 open $Global::original_stdin, "<&", "STDIN" or
2489 ::die_bug("Can't dup STDIN: $!");
2492 sub enough_file_handles {
2493 # Check that we have enough filehandles available for starting
2494 # another job
2495 # Uses:
2496 # $opt::ungroup
2497 # %Global::fd
2498 # Returns:
2499 # 1 if ungrouped (thus not needing extra filehandles)
2500 # 0 if too few filehandles
2501 # 1 if enough filehandles
2502 if(not $opt::ungroup) {
2503 my %fh;
2504 my $enough_filehandles = 1;
2505 # perl uses 7 filehandles for something?
2506 # open3 uses 2 extra filehandles temporarily
2507 # We need a filehandle for each redirected file descriptor
2508 # (normally just STDOUT and STDERR)
2509 for my $i (1..(7+2+keys %Global::fd)) {
2510 $enough_filehandles &&= open($fh{$i}, "<", "/dev/null");
2512 for (values %fh) { close $_; }
2513 return $enough_filehandles;
2514 } else {
2515 # Ungrouped does not need extra file handles
2516 return 1;
2520 sub open_or_exit {
2521 # Open a file name or exit if the file cannot be opened
2522 # Inputs:
2523 # $file = filehandle or filename to open
2524 # Uses:
2525 # $Global::original_stdin
2526 # Returns:
2527 # $fh = file handle to read-opened file
2528 my $file = shift;
2529 if($file eq "-") {
2530 return ($Global::original_stdin || *STDIN);
2532 if(ref $file eq "GLOB") {
2533 # This is an open filehandle
2534 return $file;
2536 my $fh = gensym;
2537 if(not open($fh, "<", $file)) {
2538 ::error("Cannot open input file `$file': No such file or directory.");
2539 wait_and_exit(255);
2541 return $fh;
2544 sub set_fh_blocking {
2545 # Set filehandle as blocking
2546 # Inputs:
2547 # $fh = filehandle to be blocking
2548 # Returns:
2549 # N/A
2550 my $fh = shift;
2551 $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
2552 my $flags;
2553 # Get the current flags on the filehandle
2554 fcntl($fh, &F_GETFL, $flags) || die $!;
2555 # Remove non-blocking from the flags
2556 $flags &= ~&O_NONBLOCK;
2557 # Set the flags on the filehandle
2558 fcntl($fh, &F_SETFL, $flags) || die $!;
2561 sub set_fh_non_blocking {
2562 # Set filehandle as non-blocking
2563 # Inputs:
2564 # $fh = filehandle to be blocking
2565 # Returns:
2566 # N/A
2567 my $fh = shift;
2568 $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
2569 my $flags;
2570 # Get the current flags on the filehandle
2571 fcntl($fh, &F_GETFL, $flags) || die $!;
2572 # Add non-blocking to the flags
2573 $flags |= &O_NONBLOCK;
2574 # Set the flags on the filehandle
2575 fcntl($fh, &F_SETFL, $flags) || die $!;
2579 sub __RUNNING_THE_JOBS_AND_PRINTING_PROGRESS__ {}
2582 # Variable structure:
2584 # $Global::running{$pid} = Pointer to Job-object
2585 # @Global::virgin_jobs = Pointer to Job-object that have received no input
2586 # $Global::host{$sshlogin} = Pointer to SSHLogin-object
2587 # $Global::total_running = total number of running jobs
2588 # $Global::total_started = total jobs started
2589 # $Global::max_procs_file = filename if --jobs is given a filename
2590 # $Global::JobQueue = JobQueue object for the queue of jobs
2591 # $Global::timeoutq = queue of times where jobs timeout
2592 # $Global::newest_job = Job object of the most recent job started
2593 # $Global::newest_starttime = timestamp of $Global::newest_job
2594 # @Global::sshlogin
2595 # $Global::minimal_command_line_length = minimum length supported by all sshlogins
2596 # $Global::start_no_new_jobs = should more jobs be started?
2597 # $Global::original_stderr = file handle for STDERR when the program started
2598 # $Global::total_started = total number of jobs started
2599 # $Global::joblog = filehandle of joblog
2600 # $Global::debug = Is debugging on?
2601 # $Global::exitstatus = status code of GNU Parallel
2602 # $Global::quoting = quote the command to run
2604 sub init_run_jobs {
2605 # Set Global variables and progress signal handlers
2606 # Do the copying of basefiles
2607 # Returns: N/A
2608 $Global::total_running = 0;
2609 $Global::total_started = 0;
2610 $SIG{USR1} = \&list_running_jobs;
2611 $SIG{USR2} = \&toggle_progress;
2612 if(@opt::basefile) { setup_basefile(); }
2616 my $last_time;
2617 my %last_mtime;
2618 my $max_procs_file_last_mod;
2620 sub changed_procs_file {
2621 # If --jobs is a file and it is modfied:
2622 # Force recomputing of max_jobs_running for each $sshlogin
2623 # Uses:
2624 # $Global::max_procs_file
2625 # %Global::host
2626 # Returns: N/A
2627 if($Global::max_procs_file) {
2628 # --jobs filename
2629 my $mtime = (stat($Global::max_procs_file))[9];
2630 $max_procs_file_last_mod ||= 0;
2631 if($mtime > $max_procs_file_last_mod) {
2632 # file changed: Force re-computing max_jobs_running
2633 $max_procs_file_last_mod = $mtime;
2634 for my $sshlogin (values %Global::host) {
2635 $sshlogin->set_max_jobs_running(undef);
2641 sub changed_sshloginfile {
2642 # If --slf is changed:
2643 # reload --slf
2644 # filter_hosts
2645 # setup_basefile
2646 # Uses:
2647 # @opt::sshloginfile
2648 # @Global::sshlogin
2649 # %Global::host
2650 # $opt::filter_hosts
2651 # Returns: N/A
2652 if(@opt::sshloginfile) {
2653 # Is --sshloginfile changed?
2654 for my $slf (@opt::sshloginfile) {
2655 my $actual_file = expand_slf_shorthand($slf);
2656 my $mtime = (stat($actual_file))[9];
2657 $last_mtime{$actual_file} ||= $mtime;
2658 if($mtime - $last_mtime{$actual_file} > 1) {
2659 ::debug("run","--sshloginfile $actual_file changed. reload\n");
2660 $last_mtime{$actual_file} = $mtime;
2661 # Reload $slf
2662 # Empty sshlogins
2663 @Global::sshlogin = ();
2664 for (values %Global::host) {
2665 # Don't start new jobs on any host
2666 # except the ones added back later
2667 $_->set_max_jobs_running(0);
2669 # This will set max_jobs_running on the SSHlogins
2670 read_sshloginfile($actual_file);
2671 parse_sshlogin();
2672 $opt::filter_hosts and filter_hosts();
2673 setup_basefile();
2679 sub start_more_jobs {
2680 # Run start_another_job() but only if:
2681 # * not $Global::start_no_new_jobs set
2682 # * not JobQueue is empty
2683 # * not load on server is too high
2684 # * not server swapping
2685 # * not too short time since last remote login
2686 # Uses:
2687 # %Global::host
2688 # $Global::start_no_new_jobs
2689 # $Global::JobQueue
2690 # $opt::pipe
2691 # $opt::load
2692 # $opt::noswap
2693 # $opt::delay
2694 # $Global::newest_starttime
2695 # Returns:
2696 # $jobs_started = number of jobs started
2697 my $jobs_started = 0;
2698 if($Global::start_no_new_jobs) {
2699 return $jobs_started;
2701 if(time - ($last_time||0) > 1) {
2702 # At most do this every second
2703 $last_time = time;
2704 changed_procs_file();
2705 changed_sshloginfile();
2707 # This will start 1 job on each --sshlogin (if possible)
2708 # thus distribute the jobs on the --sshlogins round robin
2709 for my $sshlogin (values %Global::host) {
2710 if($Global::JobQueue->empty() and not $opt::pipe) {
2711 # No more jobs in the queue
2712 last;
2714 debug("run", "Running jobs before on ", $sshlogin->string(), ": ",
2715 $sshlogin->jobs_running(), "\n");
2716 if ($sshlogin->jobs_running() < $sshlogin->max_jobs_running()) {
2717 if($opt::delay
2719 $opt::delay > ::now() - $Global::newest_starttime) {
2720 # It has been too short since last start
2721 next;
2723 if($opt::load and $sshlogin->loadavg_too_high()) {
2724 # The load is too high or unknown
2725 next;
2727 if($opt::noswap and $sshlogin->swapping()) {
2728 # The server is swapping
2729 next;
2731 if($opt::limit and $sshlogin->limit()) {
2732 # Over limit
2733 next;
2735 if($opt::memfree and $sshlogin->memfree() < $opt::memfree) {
2736 # The server has not enough mem free
2737 ::debug("mem", "Not starting job: not enough mem\n");
2738 next;
2740 if($sshlogin->too_fast_remote_login()) {
2741 # It has been too short since
2742 next;
2744 debug("run", $sshlogin->string(),
2745 " has ", $sshlogin->jobs_running(),
2746 " out of ", $sshlogin->max_jobs_running(),
2747 " jobs running. Start another.\n");
2748 if(start_another_job($sshlogin) == 0) {
2749 # No more jobs to start on this $sshlogin
2750 debug("run","No jobs started on ",
2751 $sshlogin->string(), "\n");
2752 next;
2754 $sshlogin->inc_jobs_running();
2755 $sshlogin->set_last_login_at(::now());
2756 $jobs_started++;
2758 debug("run","Running jobs after on ", $sshlogin->string(), ": ",
2759 $sshlogin->jobs_running(), " of ",
2760 $sshlogin->max_jobs_running(), "\n");
2763 return $jobs_started;
2768 my $no_more_file_handles_warned;
2770 sub start_another_job {
2771 # If there are enough filehandles
2772 # and JobQueue not empty
2773 # and not $job is in joblog
2774 # Then grab a job from Global::JobQueue,
2775 # start it at sshlogin
2776 # mark it as virgin_job
2777 # Inputs:
2778 # $sshlogin = the SSHLogin to start the job on
2779 # Uses:
2780 # $Global::JobQueue
2781 # $opt::pipe
2782 # $opt::results
2783 # $opt::resume
2784 # @Global::virgin_jobs
2785 # Returns:
2786 # 1 if another jobs was started
2787 # 0 otherwise
2788 my $sshlogin = shift;
2789 # Do we have enough file handles to start another job?
2790 if(enough_file_handles()) {
2791 if($Global::JobQueue->empty() and not $opt::pipe) {
2792 # No more commands to run
2793 debug("start", "Not starting: JobQueue empty\n");
2794 return 0;
2795 } else {
2796 my $job;
2797 # Skip jobs already in job log
2798 # Skip jobs already in results
2799 do {
2800 $job = get_job_with_sshlogin($sshlogin);
2801 if(not defined $job) {
2802 # No command available for that sshlogin
2803 debug("start", "Not starting: no jobs available for ",
2804 $sshlogin->string(), "\n");
2805 return 0;
2807 if($job->is_already_in_joblog()) {
2808 $job->free_slot();
2810 } while ($job->is_already_in_joblog()
2812 ($opt::results and $opt::resume and $job->is_already_in_results()));
2813 debug("start", "Command to run on '", $job->sshlogin()->string(), "': '",
2814 $job->replaced(),"'\n");
2815 if($job->start()) {
2816 if($opt::pipe) {
2817 if($job->virgin()) {
2818 push(@Global::virgin_jobs,$job);
2819 } else {
2820 # Block already set: This is a retry
2821 if(fork()) {
2822 ::debug("pipe","\n\nWriting ",length ${$job->block_ref()},
2823 " to ", $job->seq(),"\n");
2824 close $job->fh(0,"w");
2825 } else {
2826 $job->write($job->block_ref());
2827 close $job->fh(0,"w");
2828 exit(0);
2832 debug("start", "Started as seq ", $job->seq(),
2833 " pid:", $job->pid(), "\n");
2834 return 1;
2835 } else {
2836 # Not enough processes to run the job.
2837 # Put it back on the queue.
2838 $Global::JobQueue->unget($job);
2839 # Count down the number of jobs to run for this SSHLogin.
2840 my $max = $sshlogin->max_jobs_running();
2841 if($max > 1) { $max--; } else {
2842 my @arg;
2843 for my $record (@{$job->{'commandline'}->{'arg_list'}}) {
2844 push @arg, map { $_->orig() } @$record;
2846 ::error("No more processes: cannot run a single job. Something is wrong at @arg.");
2847 ::wait_and_exit(255);
2849 $sshlogin->set_max_jobs_running($max);
2850 # Sleep up to 300 ms to give other processes time to die
2851 ::usleep(rand()*300);
2852 ::warning("No more processes: ".
2853 "Decreasing number of running jobs to $max.",
2854 "Raising ulimit -u or /etc/security/limits.conf may help.");
2855 return 0;
2858 } else {
2859 # No more file handles
2860 $no_more_file_handles_warned++ or
2861 ::warning("No more file handles. ",
2862 "Raising ulimit -n or /etc/security/limits.conf may help.");
2863 debug("start", "No more file handles. ");
2864 return 0;
2869 sub init_progress {
2870 # Uses:
2871 # $opt::bar
2872 # Returns:
2873 # list of computers for progress output
2874 $|=1;
2875 if($opt::bar) {
2876 return("","");
2878 my %progress = progress();
2879 return ("\nComputers / CPU cores / Max jobs to run\n",
2880 $progress{'workerlist'});
2883 sub drain_job_queue {
2884 # Uses:
2885 # $opt::progress
2886 # $Global::total_running
2887 # $Global::max_jobs_running
2888 # %Global::running
2889 # $Global::JobQueue
2890 # %Global::host
2891 # $Global::start_no_new_jobs
2892 # Returns: N/A
2893 if($opt::progress) {
2894 ::status_no_nl(init_progress());
2896 my $last_header = "";
2897 my $sleep = 0.2;
2898 do {
2899 while($Global::total_running > 0) {
2900 debug($Global::total_running, "==", scalar
2901 keys %Global::running," slots: ", $Global::max_jobs_running);
2902 if($opt::pipe) {
2903 # When using --pipe sometimes file handles are not closed properly
2904 for my $job (values %Global::running) {
2905 close $job->fh(0,"w");
2908 if($opt::progress) {
2909 my %progress = progress();
2910 if($last_header ne $progress{'header'}) {
2911 ::status("", $progress{'header'});
2912 $last_header = $progress{'header'};
2914 ::status_no_nl("\r",$progress{'status'});
2916 if($Global::total_running < $Global::max_jobs_running
2917 and not $Global::JobQueue->empty()) {
2918 # These jobs may not be started because of loadavg
2919 # or too little time between each ssh login.
2920 if(start_more_jobs() > 0) {
2921 # Exponential back-on if jobs were started
2922 $sleep = $sleep/2+0.001;
2925 # Exponential back-off sleeping
2926 $sleep = ::reap_usleep($sleep);
2928 if(not $Global::JobQueue->empty()) {
2929 # These jobs may not be started:
2930 # * because there the --filter-hosts has removed all
2931 if(not %Global::host) {
2932 ::error("There are no hosts left to run on.");
2933 ::wait_and_exit(255);
2935 # * because of loadavg
2936 # * because of too little time between each ssh login.
2937 $sleep = ::reap_usleep($sleep);
2938 start_more_jobs();
2939 if($Global::max_jobs_running == 0) {
2940 ::warning("There are no job slots available. Increase --jobs.");
2943 while($opt::sqlmaster and not $Global::sql->finished()) {
2944 # SQL master
2945 $sleep = ::reap_usleep($sleep);
2946 start_more_jobs();
2947 if($Global::start_sqlworker) {
2948 # Start an SQL worker as we are now sure there is work to do
2949 $Global::start_sqlworker = 0;
2950 if(my $pid = fork()) {
2951 $Global::unkilled_sqlworker = $pid;
2952 } else {
2953 # Replace --sql/--sqlandworker with --sqlworker
2954 my @ARGV = map { s/^--sql(andworker)?$/--sqlworker/; $_ } @Global::options_in_argv;
2955 # exec the --sqlworker
2956 exec($0,@ARGV,@command);
2960 } while ($Global::total_running > 0
2962 not $Global::start_no_new_jobs and not $Global::JobQueue->empty()
2964 $opt::sqlmaster and not $Global::sql->finished());
2965 if($opt::progress) {
2966 my %progress = progress();
2967 ::status("\r".$progress{'status'});
2971 sub toggle_progress {
2972 # Turn on/off progress view
2973 # Uses:
2974 # $opt::progress
2975 # Returns: N/A
2976 $opt::progress = not $opt::progress;
2977 if($opt::progress) {
2978 ::status_no_nl(init_progress());
2982 sub progress {
2983 # Uses:
2984 # $opt::bar
2985 # $opt::eta
2986 # %Global::host
2987 # $Global::total_started
2988 # Returns:
2989 # $workerlist = list of workers
2990 # $header = that will fit on the screen
2991 # $status = message that will fit on the screen
2992 if($opt::bar) {
2993 return ("workerlist" => "", "header" => "", "status" => bar());
2995 my $eta = "";
2996 my ($status,$header)=("","");
2997 if($opt::eta) {
2998 my($total, $completed, $left, $pctcomplete, $avgtime, $this_eta) =
2999 compute_eta();
3000 $eta = sprintf("ETA: %ds Left: %d AVG: %.2fs ",
3001 $this_eta, $left, $avgtime);
3003 my $termcols = terminal_columns();
3004 my @workers = sort keys %Global::host;
3005 my %sshlogin = map { $_ eq ":" ? ($_ => "local") : ($_ => $_) } @workers;
3006 my $workerno = 1;
3007 my %workerno = map { ($_=>$workerno++) } @workers;
3008 my $workerlist = "";
3009 for my $w (@workers) {
3010 $workerlist .=
3011 $workerno{$w}.":".$sshlogin{$w} ." / ".
3012 ($Global::host{$w}->ncpus() || "-")." / ".
3013 $Global::host{$w}->max_jobs_running()."\n";
3015 $status = "x"x($termcols+1);
3016 # Select an output format that will fit on a single line
3017 if(length $status > $termcols) {
3018 # sshlogin1:XX/XX/XX%/XX.Xs sshlogin2:XX/XX/XX%/XX.Xs sshlogin3:XX/XX/XX%/XX.Xs
3019 $header = "Computer:jobs running/jobs completed/%of started jobs/Average seconds to complete";
3020 $status = $eta .
3021 join(" ",map
3023 if($Global::total_started) {
3024 my $completed = ($Global::host{$_}->jobs_completed()||0);
3025 my $running = $Global::host{$_}->jobs_running();
3026 my $time = $completed ? (time-$^T)/($completed) : "0";
3027 sprintf("%s:%d/%d/%d%%/%.1fs ",
3028 $sshlogin{$_}, $running, $completed,
3029 ($running+$completed)*100
3030 / $Global::total_started, $time);
3032 } @workers);
3034 if(length $status > $termcols) {
3035 # 1:XX/XX/XX%/XX.Xs 2:XX/XX/XX%/XX.Xs 3:XX/XX/XX%/XX.Xs 4:XX/XX/XX%/XX.Xs
3036 $header = "Computer:jobs running/jobs completed/%of started jobs";
3037 $status = $eta .
3038 join(" ",map
3040 if($Global::total_started) {
3041 my $completed = ($Global::host{$_}->jobs_completed()||0);
3042 my $running = $Global::host{$_}->jobs_running();
3043 my $time = $completed ? (time-$^T)/($completed) : "0";
3044 sprintf("%s:%d/%d/%d%%/%.1fs ",
3045 $workerno{$_}, $running, $completed,
3046 ($running+$completed)*100
3047 / $Global::total_started, $time);
3049 } @workers);
3051 if(length $status > $termcols) {
3052 # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX/XX%
3053 $header = "Computer:jobs running/jobs completed/%of started jobs";
3054 $status = $eta .
3055 join(" ",map
3057 if($Global::total_started) {
3058 sprintf("%s:%d/%d/%d%%",
3059 $sshlogin{$_},
3060 $Global::host{$_}->jobs_running(),
3061 ($Global::host{$_}->jobs_completed()||0),
3062 ($Global::host{$_}->jobs_running()+
3063 ($Global::host{$_}->jobs_completed()||0))*100
3064 / $Global::total_started)
3067 @workers);
3069 if(length $status > $termcols) {
3070 # 1:XX/XX/XX% 2:XX/XX/XX% 3:XX/XX/XX% 4:XX/XX/XX% 5:XX/XX/XX% 6:XX/XX/XX%
3071 $header = "Computer:jobs running/jobs completed/%of started jobs";
3072 $status = $eta .
3073 join(" ",map
3075 if($Global::total_started) {
3076 sprintf("%s:%d/%d/%d%%",
3077 $workerno{$_},
3078 $Global::host{$_}->jobs_running(),
3079 ($Global::host{$_}->jobs_completed()||0),
3080 ($Global::host{$_}->jobs_running()+
3081 ($Global::host{$_}->jobs_completed()||0))*100
3082 / $Global::total_started)
3085 @workers);
3087 if(length $status > $termcols) {
3088 # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX sshlogin4:XX/XX
3089 $header = "Computer:jobs running/jobs completed";
3090 $status = $eta .
3091 join(" ",map
3092 { sprintf("%s:%d/%d",
3093 $sshlogin{$_}, $Global::host{$_}->jobs_running(),
3094 ($Global::host{$_}->jobs_completed()||0)) }
3095 @workers);
3097 if(length $status > $termcols) {
3098 # sshlogin1:XX/XX sshlogin2:XX/XX sshlogin3:XX/XX sshlogin4:XX/XX
3099 $header = "Computer:jobs running/jobs completed";
3100 $status = $eta .
3101 join(" ",map
3102 { sprintf("%s:%d/%d",
3103 $sshlogin{$_}, $Global::host{$_}->jobs_running(),
3104 ($Global::host{$_}->jobs_completed()||0)) }
3105 @workers);
3107 if(length $status > $termcols) {
3108 # 1:XX/XX 2:XX/XX 3:XX/XX 4:XX/XX 5:XX/XX 6:XX/XX
3109 $header = "Computer:jobs running/jobs completed";
3110 $status = $eta .
3111 join(" ",map
3112 { sprintf("%s:%d/%d",
3113 $workerno{$_}, $Global::host{$_}->jobs_running(),
3114 ($Global::host{$_}->jobs_completed()||0)) }
3115 @workers);
3117 if(length $status > $termcols) {
3118 # sshlogin1:XX sshlogin2:XX sshlogin3:XX sshlogin4:XX sshlogin5:XX
3119 $header = "Computer:jobs completed";
3120 $status = $eta .
3121 join(" ",map
3122 { sprintf("%s:%d",
3123 $sshlogin{$_},
3124 ($Global::host{$_}->jobs_completed()||0)) }
3125 @workers);
3127 if(length $status > $termcols) {
3128 # 1:XX 2:XX 3:XX 4:XX 5:XX 6:XX
3129 $header = "Computer:jobs completed";
3130 $status = $eta .
3131 join(" ",map
3132 { sprintf("%s:%d",
3133 $workerno{$_},
3134 ($Global::host{$_}->jobs_completed()||0)) }
3135 @workers);
3137 return ("workerlist" => $workerlist, "header" => $header, "status" => $status);
3142 my ($total, $first_completed, $smoothed_avg_time, $last_eta);
3144 sub compute_eta {
3145 # Calculate important numbers for ETA
3146 # Returns:
3147 # $total = number of jobs in total
3148 # $completed = number of jobs completed
3149 # $left = number of jobs left
3150 # $pctcomplete = percent of jobs completed
3151 # $avgtime = averaged time
3152 # $eta = smoothed eta
3153 $total = $Global::JobQueue->total_jobs();
3154 my $completed = $Global::total_completed;
3155 my $left = $total - $completed;
3156 if(not $completed) {
3157 return($total, $completed, $left, 0, 0, 0);
3159 my $pctcomplete = $completed / $total;
3160 $first_completed ||= time;
3161 my $timepassed = (time - $first_completed);
3162 my $avgtime = $timepassed / $completed;
3163 $smoothed_avg_time ||= $avgtime;
3164 # Smooth the eta so it does not jump wildly
3165 $smoothed_avg_time = (1 - $pctcomplete) * $smoothed_avg_time +
3166 $pctcomplete * $avgtime;
3167 my $eta = int($left * $smoothed_avg_time);
3168 if($eta*0.90 < $last_eta and $last_eta < $eta) {
3169 # Eta jumped less that 10% up: Keep the last eta instead
3170 $eta = $last_eta;
3171 } else {
3172 $last_eta = $eta;
3174 return($total, $completed, $left, $pctcomplete, $avgtime, $eta);
3179 my ($rev,$reset);
3181 sub bar {
3182 # Return:
3183 # $status = bar with eta, completed jobs, arg and pct
3184 $rev ||= "\033[7m";
3185 $reset ||= "\033[0m";
3186 my($total, $completed, $left, $pctcomplete, $avgtime, $eta) =
3187 compute_eta();
3188 my $arg = $Global::newest_job ?
3189 $Global::newest_job->{'commandline'}->
3190 replace_placeholders(["\257<\257>"],0,0) : "";
3191 # These chars mess up display in the terminal
3192 $arg =~ tr/[\011-\016\033\302-\365]//d;
3193 my $eta_dhms = ::seconds_to_time_units($eta);
3194 my $bar_text =
3195 sprintf("%d%% %d:%d=%s %s",
3196 $pctcomplete*100, $completed, $left, $eta_dhms, $arg);
3197 my $terminal_width = terminal_columns();
3198 my $s = sprintf("%-${terminal_width}s",
3199 substr($bar_text." "x$terminal_width,
3200 0,$terminal_width));
3201 my $width = int($terminal_width * $pctcomplete);
3202 substr($s,$width,0) = $reset;
3203 my $zenity = sprintf("%-${terminal_width}s",
3204 substr("# $eta sec $arg",
3205 0,$terminal_width));
3206 $s = "\r" . $zenity . "\r" . $pctcomplete*100 . # Prefix with zenity header
3207 "\r" . $rev . $s . $reset;
3208 return $s;
3213 my ($columns,$last_column_time);
3215 sub terminal_columns {
3216 # Get the number of columns of the terminal.
3217 # Only update once per second.
3218 # Returns:
3219 # number of columns of the screen
3220 if(not $columns or $last_column_time < time) {
3221 $last_column_time = time;
3222 $columns = $ENV{'COLUMNS'};
3223 if(not $columns) {
3224 my $stty = ::qqx("stty -a </dev/tty");
3225 # FreeBSD/OpenBSD/NetBSD/Dragonfly/MirOS
3226 # MacOSX/IRIX/AIX/Tru64
3227 $stty =~ /(\d+) columns/ and do { $columns = $1; };
3228 # GNU/Linux/Solaris
3229 $stty =~ /columns (\d+)/ and do { $columns = $1; };
3230 # Solaris-x86/HPUX/SCOsysV/UnixWare/OpenIndiana
3231 $stty =~ /columns = (\d+)/ and do { $columns = $1; };
3232 # QNX
3233 $stty =~ /rows=\d+,(\d+)/ and do { $columns = $1; };
3235 if(not $columns) {
3236 my $resize = ::qqx("resize");
3237 $resize =~ /COLUMNS=(\d+);/ and do { $columns = $1; };
3239 $columns ||= 80;
3241 return $columns;
3245 sub get_job_with_sshlogin {
3246 # Input:
3247 # $sshlogin = which host should the job be run on?
3248 # Uses:
3249 # $opt::hostgroups
3250 # $Global::JobQueue
3251 # Returns:
3252 # $job = next job object for $sshlogin if any available
3253 my $sshlogin = shift;
3254 my $job;
3256 if ($opt::hostgroups) {
3257 my @other_hostgroup_jobs = ();
3259 while($job = $Global::JobQueue->get()) {
3260 if($sshlogin->in_hostgroups($job->hostgroups())) {
3261 # Found a job to be run on a hostgroup of this
3262 # $sshlogin
3263 last;
3264 } else {
3265 # This job was not in the hostgroups of $sshlogin
3266 push @other_hostgroup_jobs, $job;
3269 $Global::JobQueue->unget(@other_hostgroup_jobs);
3270 if(not defined $job) {
3271 # No more jobs
3272 return undef;
3274 } else {
3275 $job = $Global::JobQueue->get();
3276 if(not defined $job) {
3277 # No more jobs
3278 ::debug("start", "No more jobs: JobQueue empty\n");
3279 return undef;
3282 $job->set_sshlogin($sshlogin);
3283 if($opt::retries and $job->failed_here()) {
3284 # This command with these args failed for this sshlogin
3285 my ($no_of_failed_sshlogins,$min_failures) = $job->min_failed();
3286 # Only look at the Global::host that have > 0 jobslots
3287 if($no_of_failed_sshlogins ==
3288 grep { $_->max_jobs_running() > 0 } values %Global::host
3289 and $job->failed_here() == $min_failures) {
3290 # It failed the same or more times on another host:
3291 # run it on this host
3292 } else {
3293 # If it failed fewer times on another host:
3294 # Find another job to run
3295 my $nextjob;
3296 if(not $Global::JobQueue->empty()) {
3297 # This can potentially recurse for all args
3298 no warnings 'recursion';
3299 $nextjob = get_job_with_sshlogin($sshlogin);
3301 # Push the command back on the queue
3302 $Global::JobQueue->unget($job);
3303 return $nextjob;
3306 return $job;
3310 sub __REMOTE_SSH__ {}
3313 sub read_sshloginfiles {
3314 # Read a list of --slf's
3315 # Input:
3316 # @files = files or symbolic file names to read
3317 # Returns: N/A
3318 for my $s (@_) {
3319 read_sshloginfile(expand_slf_shorthand($s));
3323 sub expand_slf_shorthand {
3324 # Expand --slf shorthand into a read file name
3325 # Input:
3326 # $file = file or symbolic file name to read
3327 # Returns:
3328 # $file = actual file name to read
3329 my $file = shift;
3330 if($file eq "-") {
3331 # skip: It is stdin
3332 } elsif($file eq "..") {
3333 $file = $Global::config_dir."/sshloginfile";
3334 } elsif($file eq ".") {
3335 $file = "/etc/parallel/sshloginfile";
3336 } elsif(not -r $file) {
3337 for(@Global::config_dirs) {
3338 if(not -r $_."/".$file) {
3339 # Try prepending $PARALLEL_HOME
3340 ::error("Cannot open $file.");
3341 ::wait_and_exit(255);
3342 } else {
3343 $file = $_."/".$file;
3344 last;
3348 return $file;
3351 sub read_sshloginfile {
3352 # Read sshloginfile into @Global::sshlogin
3353 # Input:
3354 # $file = file to read
3355 # Uses:
3356 # @Global::sshlogin
3357 # Returns: N/A
3358 local $/ = "\n";
3359 my $file = shift;
3360 my $close = 1;
3361 my $in_fh;
3362 ::debug("init","--slf ",$file);
3363 if($file eq "-") {
3364 $in_fh = *STDIN;
3365 $close = 0;
3366 } else {
3367 if(not open($in_fh, "<", $file)) {
3368 # Try the filename
3369 ::error("Cannot open $file.");
3370 ::wait_and_exit(255);
3373 while(<$in_fh>) {
3374 chomp;
3375 /^\s*#/ and next;
3376 /^\s*$/ and next;
3377 push @Global::sshlogin, $_;
3379 if($close) {
3380 close $in_fh;
3384 sub parse_sshlogin {
3385 # Parse @Global::sshlogin into %Global::host.
3386 # Keep only hosts that are in one of the given ssh hostgroups.
3387 # Uses:
3388 # @Global::sshlogin
3389 # $Global::minimal_command_line_length
3390 # %Global::host
3391 # $opt::transfer
3392 # @opt::return
3393 # $opt::cleanup
3394 # @opt::basefile
3395 # @opt::trc
3396 # Returns: N/A
3397 my @login;
3398 if(not @Global::sshlogin) { @Global::sshlogin = (":"); }
3399 for my $sshlogin (@Global::sshlogin) {
3400 # Split up -S sshlogin,sshlogin
3401 for my $s (split /,|\n/, $sshlogin) {
3402 if ($s eq ".." or $s eq "-") {
3403 # This may add to @Global::sshlogin - possibly bug
3404 read_sshloginfile(expand_slf_shorthand($s));
3405 } else {
3406 $s =~ s/\s*$//;
3407 push (@login, $s);
3411 $Global::minimal_command_line_length = 8_000_000;
3412 my @allowed_hostgroups;
3413 for my $ncpu_sshlogin_string (::uniq(@login)) {
3414 my $sshlogin = SSHLogin->new($ncpu_sshlogin_string);
3415 my $sshlogin_string = $sshlogin->string();
3416 if($sshlogin_string eq "") {
3417 # This is an ssh group: -S @webservers
3418 push @allowed_hostgroups, $sshlogin->hostgroups();
3419 next;
3421 if($Global::host{$sshlogin_string}) {
3422 # This sshlogin has already been added:
3423 # It is probably a host that has come back
3424 # Set the max_jobs_running back to the original
3425 debug("run","Already seen $sshlogin_string\n");
3426 if($sshlogin->{'ncpus'}) {
3427 # If ncpus set by '#/' of the sshlogin, overwrite it:
3428 $Global::host{$sshlogin_string}->set_ncpus($sshlogin->ncpus());
3430 $Global::host{$sshlogin_string}->set_max_jobs_running(undef);
3431 next;
3433 $sshlogin->set_maxlength(Limits::Command::max_length());
3435 $Global::minimal_command_line_length =
3436 ::min($Global::minimal_command_line_length, $sshlogin->maxlength());
3437 $Global::host{$sshlogin_string} = $sshlogin;
3439 if(@allowed_hostgroups) {
3440 # Remove hosts that are not in these groups
3441 while (my ($string, $sshlogin) = each %Global::host) {
3442 if(not $sshlogin->in_hostgroups(@allowed_hostgroups)) {
3443 delete $Global::host{$string};
3448 # debug("start", "sshlogin: ", my_dump(%Global::host),"\n");
3449 if(@Global::transfer_files or @opt::return or $opt::cleanup or @opt::basefile) {
3450 if(not remote_hosts()) {
3451 # There are no remote hosts
3452 if(@opt::trc) {
3453 ::warning("--trc ignored as there are no remote --sshlogin.");
3454 } elsif (defined $opt::transfer) {
3455 ::warning("--transfer ignored as there are no remote --sshlogin.");
3456 } elsif (@opt::transfer_files) {
3457 ::warning("--transferfile ignored as there are no remote --sshlogin.");
3458 } elsif (@opt::return) {
3459 ::warning("--return ignored as there are no remote --sshlogin.");
3460 } elsif (defined $opt::cleanup) {
3461 ::warning("--cleanup ignored as there are no remote --sshlogin.");
3462 } elsif (@opt::basefile) {
3463 ::warning("--basefile ignored as there are no remote --sshlogin.");
3469 sub remote_hosts {
3470 # Return sshlogins that are not ':'
3471 # Uses:
3472 # %Global::host
3473 # Returns:
3474 # list of sshlogins with ':' removed
3475 return grep !/^:$/, keys %Global::host;
3478 sub setup_basefile {
3479 # Transfer basefiles to each $sshlogin
3480 # This needs to be done before first jobs on $sshlogin is run
3481 # Uses:
3482 # %Global::host
3483 # @opt::basefile
3484 # Returns: N/A
3485 my @cmd;
3486 my $rsync_destdir;
3487 my $workdir;
3488 for my $sshlogin (values %Global::host) {
3489 if($sshlogin->string() eq ":") { next }
3490 for my $file (@opt::basefile) {
3491 if($file !~ m:^/: and $opt::workdir eq "...") {
3492 ::error("Work dir '...' will not work with relative basefiles.");
3493 ::wait_and_exit(255);
3495 if(not $workdir) {
3496 my $dummycmdline = CommandLine->new(1,["true"],{},0,0,[],[],{},{},{});
3497 my $dummyjob = Job->new($dummycmdline);
3498 $workdir = $dummyjob->workdir();
3500 push @cmd, $sshlogin->rsync_transfer_cmd($file,$workdir);
3503 debug("init", "basesetup: @cmd\n");
3504 my ($exitstatus,$stdout_ref,$stderr_ref) =
3505 run_parallel((join "\n",@cmd),"-j0","--retries",5);
3506 if($exitstatus) {
3507 my @stdout = @$stdout_ref;
3508 my @stderr = @$stderr_ref;
3509 ::error("Copying of --basefile failed: @stdout@stderr");
3510 ::wait_and_exit(255);
3514 sub cleanup_basefile {
3515 # Remove the basefiles transferred
3516 # Uses:
3517 # %Global::host
3518 # @opt::basefile
3519 # Returns: N/A
3520 my @cmd;
3521 my $workdir;
3522 if(not $workdir) {
3523 my $dummycmdline = CommandLine->new(1,"true",0,0,0,0,0,{},{},{});
3524 my $dummyjob = Job->new($dummycmdline);
3525 $workdir = $dummyjob->workdir();
3527 for my $sshlogin (values %Global::host) {
3528 if($sshlogin->string() eq ":") { next }
3529 for my $file (@opt::basefile) {
3530 push @cmd, $sshlogin->cleanup_cmd($file,$workdir);
3533 debug("init", "basecleanup: @cmd\n");
3534 my ($exitstatus,$stdout_ref,$stderr_ref) =
3535 run_parallel(join("\n",@cmd),"-j0","--retries",5);
3536 if($exitstatus) {
3537 my @stdout = @$stdout_ref;
3538 my @stderr = @$stderr_ref;
3539 ::error("Cleanup of --basefile failed: @stdout@stderr");
3540 ::wait_and_exit(255);
3544 sub run_parallel {
3545 my ($stdin,@args) = @_;
3546 my $cmd = join "",map { " $_ & " } split /\n/, $stdin;
3547 print $Global::original_stderr ` $cmd wait` ;
3548 return 0
3551 sub _run_parallel {
3552 # Run GNU Parallel
3553 # This should ideally just fork an internal copy
3554 # and not start it through a shell
3555 # Input:
3556 # $stdin = data to provide on stdin for GNU Parallel
3557 # @args = command line arguments
3558 # Returns:
3559 # $exitstatus = exitcode of GNU Parallel run
3560 # \@stdout = standard output
3561 # \@stderr = standard error
3562 my ($stdin,@args) = @_;
3563 my ($exitstatus,@stdout,@stderr);
3564 my ($stdin_fh,$stdout_fh)=(gensym(),gensym());
3565 my ($stderr_fh, $stderrname) = ::tmpfile(SUFFIX => ".par");
3566 unlink $stderrname;
3568 my $pid = ::open3($stdin_fh,$stdout_fh,$stderr_fh,
3569 $0,qw(--plain --shell /bin/sh --will-cite), @args);
3570 if(my $writerpid = fork()) {
3571 close $stdin_fh;
3572 @stdout = <$stdout_fh>;
3573 # Now stdout is closed:
3574 # These pids should be dead or die very soon
3575 while(kill 0, $writerpid) { ::usleep(1); }
3576 die;
3577 # reap $writerpid;
3578 # while(kill 0, $pid) { ::usleep(1); }
3579 # reap $writerpid;
3580 $exitstatus = $?;
3581 seek $stderr_fh, 0, 0;
3582 @stderr = <$stderr_fh>;
3583 close $stdout_fh;
3584 close $stderr_fh;
3585 } else {
3586 close $stdout_fh;
3587 close $stderr_fh;
3588 print $stdin_fh $stdin;
3589 close $stdin_fh;
3590 exit(0);
3592 return ($exitstatus,\@stdout,\@stderr);
3595 sub filter_hosts {
3596 # Remove down --sshlogins from active duty.
3597 # Find ncpus, ncores, maxlen, time-to-login for each host.
3598 # Uses:
3599 # %Global::host
3600 # $Global::minimal_command_line_length
3601 # $opt::use_sockets_instead_of_threads
3602 # $opt::use_cores_instead_of_threads
3603 # $opt::use_cpus_instead_of_cores
3604 # Returns: N/A
3606 my ($nsockets_ref,$ncores_ref, $nthreads_ref, $time_to_login_ref,
3607 $maxlen_ref, $echo_ref, $down_hosts_ref) =
3608 parse_host_filtering(parallelized_host_filtering());
3610 delete @Global::host{@$down_hosts_ref};
3611 @$down_hosts_ref and ::warning("Removed @$down_hosts_ref.");
3613 $Global::minimal_command_line_length = 8_000_000;
3614 while (my ($sshlogin, $obj) = each %Global::host) {
3615 if($sshlogin eq ":") { next }
3616 $nsockets_ref->{$sshlogin} or
3617 ::die_bug("nsockets missing: ".$obj->serverlogin());
3618 $ncores_ref->{$sshlogin} or
3619 ::die_bug("ncores missing: ".$obj->serverlogin());
3620 $nthreads_ref->{$sshlogin} or
3621 ::die_bug("nthreads missing: ".$obj->serverlogin());
3622 $time_to_login_ref->{$sshlogin} or
3623 ::die_bug("time_to_login missing: ".$obj->serverlogin());
3624 $maxlen_ref->{$sshlogin} or
3625 ::die_bug("maxlen missing: ".$obj->serverlogin());
3626 $obj->set_ncpus($nthreads_ref->{$sshlogin});
3627 if($opt::use_cpus_instead_of_cores) {
3628 $obj->set_ncpus($ncores_ref->{$sshlogin});
3629 } elsif($opt::use_sockets_instead_of_threads) {
3630 $obj->set_ncpus($nsockets_ref->{$sshlogin});
3631 } elsif($opt::use_cores_instead_of_threads) {
3632 $obj->set_ncpus($ncores_ref->{$sshlogin});
3634 $obj->set_time_to_login($time_to_login_ref->{$sshlogin});
3635 $obj->set_maxlength($maxlen_ref->{$sshlogin});
3636 $Global::minimal_command_line_length =
3637 ::min($Global::minimal_command_line_length,
3638 int($maxlen_ref->{$sshlogin}/2));
3639 ::debug("init", "Timing from -S:$sshlogin ",
3640 " nsockets:",$nsockets_ref->{$sshlogin},
3641 " ncores:", $ncores_ref->{$sshlogin},
3642 " nthreads:",$nthreads_ref->{$sshlogin},
3643 " time_to_login:", $time_to_login_ref->{$sshlogin},
3644 " maxlen:", $maxlen_ref->{$sshlogin},
3645 " min_max_len:", $Global::minimal_command_line_length,"\n");
3649 sub parse_host_filtering {
3650 # Input:
3651 # @lines = output from parallelized_host_filtering()
3652 # Returns:
3653 # \%nsockets = number of sockets of {host}
3654 # \%ncores = number of cores of {host}
3655 # \%nthreads = number of hyperthreaded cores of {host}
3656 # \%time_to_login = time_to_login on {host}
3657 # \%maxlen = max command len on {host}
3658 # \%echo = echo received from {host}
3659 # \@down_hosts = list of hosts with no answer
3660 local $/ = "\n";
3661 my (%nsockets, %ncores, %nthreads, %time_to_login, %maxlen, %echo,
3662 @down_hosts);
3663 for (@_) {
3664 ::debug("init","Read: ",$_);
3665 chomp;
3666 my @col = split /\t/, $_;
3667 if($col[0] =~ /^parallel: Warning:/) {
3668 # Timed out job: Ignore it
3669 next;
3670 } elsif(defined $col[6]) {
3671 # This is a line from --joblog
3672 # seq host time spent sent received exit signal command
3673 # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores
3674 if($col[0] eq "Seq" and $col[1] eq "Host" and
3675 $col[2] eq "Starttime") {
3676 # Header => skip
3677 next;
3679 # Get server from: eval true server\;
3680 $col[8] =~ /eval .?true.?\s([^\;]+);/ or
3681 ::die_bug("col8 does not contain host: $col[8]");
3682 my $host = $1;
3683 $host =~ tr/\\//d;
3684 $Global::host{$host} or next;
3685 if($col[6] eq "255" or $col[6] eq "-1" or $col[6] eq "1") {
3686 # exit == 255 or exit == timeout (-1): ssh failed/timedout
3687 # exit == 1: lsh failed
3688 # Remove sshlogin
3689 ::debug("init", "--filtered $host\n");
3690 push(@down_hosts, $host);
3691 } elsif($col[6] eq "127") {
3692 # signal == 127: parallel not installed remote
3693 # Set nsockets, ncores, nthreads = 1
3694 ::warning("Could not figure out ".
3695 "number of cpus on $host. Using 1.");
3696 $nsockets{$host} = 1;
3697 $ncores{$host} = 1;
3698 $nthreads{$host} = 1;
3699 $maxlen{$host} = Limits::Command::max_length();
3700 } elsif($col[0] =~ /^\d+$/ and $Global::host{$host}) {
3701 # Remember how log it took to log in
3702 # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ echo
3703 $time_to_login{$host} = ::min($time_to_login{$host},$col[3]);
3704 } else {
3705 ::die_bug("host check unmatched long jobline: $_");
3707 } elsif($Global::host{$col[0]}) {
3708 # This output from --number-of-cores, --number-of-cpus,
3709 # --max-line-length-allowed
3710 # ncores: server 8
3711 # ncpus: server 2
3712 # maxlen: server 131071
3713 if(/parallel: Warning: Cannot figure out number of/) {
3714 next;
3716 if(not $nsockets{$col[0]}) {
3717 $nsockets{$col[0]} = $col[1];
3718 } elsif(not $ncores{$col[0]}) {
3719 $ncores{$col[0]} = $col[1];
3720 } elsif(not $nthreads{$col[0]}) {
3721 $nthreads{$col[0]} = $col[1];
3722 } elsif(not $maxlen{$col[0]}) {
3723 $maxlen{$col[0]} = $col[1];
3724 } elsif(not $echo{$col[0]}) {
3725 $echo{$col[0]} = $col[1];
3726 } elsif(m/perl: warning:|LANGUAGE =|LC_ALL =|LANG =|are supported and installed/) {
3727 # Skip these:
3728 # perl: warning: Setting locale failed.
3729 # perl: warning: Please check that your locale settings:
3730 # LANGUAGE = (unset),
3731 # LC_ALL = (unset),
3732 # LANG = "en_US.UTF-8"
3733 # are supported and installed on your system.
3734 # perl: warning: Falling back to the standard locale ("C").
3735 } else {
3736 ::die_bug("host check too many col0: $_");
3738 } else {
3739 ::die_bug("host check unmatched short jobline ($col[0]): $_");
3742 @down_hosts = uniq(@down_hosts);
3743 return(\%nsockets, \%ncores, \%nthreads, \%time_to_login,
3744 \%maxlen, \%echo, \@down_hosts);
3747 sub parallelized_host_filtering {
3748 # Uses:
3749 # %Global::host
3750 # Returns:
3751 # text entries with:
3752 # * joblog line
3753 # * hostname \t number of cores
3754 # * hostname \t number of cpus
3755 # * hostname \t max-line-length-allowed
3756 # * hostname \t empty
3758 sub sshwrapped {
3759 # Wrap with ssh and --env
3760 my $sshlogin = shift;
3761 my $command = shift;
3762 my $commandline = CommandLine->new(1,[$command],{},0,0,[],[],{},{},{});
3763 my $job = Job->new($commandline);
3764 $job->set_sshlogin($sshlogin);
3765 $job->wrapped();
3766 return($job->{'wrapped'});
3769 my(@sockets, @cores, @threads, @maxline, @echo);
3770 while (my ($host, $sshlogin) = each %Global::host) {
3771 if($host eq ":") { next }
3772 # The 'true' is used to get the $host out later
3773 push(@sockets, $host."\t"."true $host; ".
3774 sshwrapped($sshlogin,"parallel --number-of-sockets")."\n\0");
3775 push(@cores, $host."\t"."true $host; ".
3776 sshwrapped($sshlogin,"parallel --number-of-cores")."\n\0");
3777 push(@threads, $host."\t"."true $host; ".
3778 sshwrapped($sshlogin,"parallel --number-of-threads")."\n\0");
3779 push(@maxline, $host."\t"."true $host; ".
3780 sshwrapped($sshlogin,"parallel --max-line-length-allowed")."\n\0");
3781 # 'echo' is used to get the fastest possible ssh login time
3782 my $sshcmd = "true $host; exec " .$sshlogin->sshcommand()." ".
3783 $sshlogin->serverlogin();
3784 push(@echo, $host."\t".$sshcmd." -- echo\n\0");
3787 # --timeout 10: Setting up an SSH connection and running a simple
3788 # command should never take > 10 sec.
3789 # --delay 0.1: If multiple sshlogins use the same proxy the delay
3790 # will make it less likely to overload the ssh daemon.
3791 # --retries 3: If the ssh daemon is overloaded, try 3 times
3792 my $cmd =
3793 "$0 -j0 --timeout 10 --joblog - --plain --delay 0.1 --retries 3 ".
3794 "--tag --tagstring '{1}' -0 --colsep '\t' -k eval '{2}' && true ";
3795 $cmd = $Global::shell." -c ".Q($cmd);
3796 ::debug("init", $cmd, "\n");
3797 my @out;
3798 my $prepend = "";
3800 my ($host_fh,$in,$err);
3801 open3($in, $host_fh, $err, $cmd) || ::die_bug("parallel host check: $cmd");
3802 if(not fork()) {
3803 # Give the commands to run to the $cmd
3804 close $host_fh;
3805 print $in @sockets, @cores, @threads, @maxline, @echo;
3806 close $in;
3807 exit();
3809 close $in;
3810 for(<$host_fh>) {
3811 # TODO incompatible with '-quoting. Needs to be fixed differently
3812 #if(/\'$/) {
3813 # # if last char = ' then append next line
3814 # # This may be due to quoting of \n in environment var
3815 # $prepend .= $_;
3816 # next;
3818 $_ = $prepend . $_;
3819 $prepend = "";
3820 push @out, $_;
3822 close $host_fh;
3823 return @out;
3826 sub onall {
3827 # Runs @command on all hosts.
3828 # Uses parallel to run @command on each host.
3829 # --jobs = number of hosts to run on simultaneously.
3830 # For each host a parallel command with the args will be running.
3831 # Uses:
3832 # $Global::quoting
3833 # @opt::basefile
3834 # $opt::jobs
3835 # $opt::linebuffer
3836 # $opt::ungroup
3837 # $opt::group
3838 # $opt::keeporder
3839 # $opt::D
3840 # $opt::plain
3841 # $opt::max_chars
3842 # $opt::linebuffer
3843 # $opt::files
3844 # $opt::colsep
3845 # $opt::timeout
3846 # $opt::plain
3847 # $opt::retries
3848 # $opt::max_chars
3849 # $opt::arg_sep
3850 # $opt::arg_file_sep
3851 # @opt::v
3852 # @opt::env
3853 # %Global::host
3854 # $Global::exitstatus
3855 # $Global::debug
3856 # $Global::joblog
3857 # $opt::joblog
3858 # $opt::tag
3859 # $opt::tee
3860 # Input:
3861 # @command = command to run on all hosts
3862 # Returns: N/A
3863 sub tmp_joblog {
3864 # Input:
3865 # $joblog = filename of joblog - undef if none
3866 # Returns:
3867 # $tmpfile = temp file for joblog - undef if none
3868 my $joblog = shift;
3869 if(not defined $joblog) {
3870 return undef;
3872 my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".log");
3873 close $fh;
3874 return $tmpfile;
3876 my ($input_source_fh_ref,@command) = @_;
3877 if($Global::quoting) {
3878 @command = shell_quote(@command);
3881 # Copy all @input_source_fh (-a and :::) into tempfiles
3882 my @argfiles = ();
3883 for my $fh (@$input_source_fh_ref) {
3884 my ($outfh, $name) = ::tmpfile(SUFFIX => ".all", UNLINK => not $opt::D);
3885 print $outfh (<$fh>);
3886 close $outfh;
3887 push @argfiles, $name;
3889 if(@opt::basefile) { setup_basefile(); }
3890 # for each sshlogin do:
3891 # parallel -S $sshlogin $command :::: @argfiles
3893 # Pass some of the options to the sub-parallels, not all of them as
3894 # -P should only go to the first, and -S should not be copied at all.
3895 my $options =
3896 join(" ",
3897 ((defined $opt::D) ? "-D $opt::D" : ""),
3898 ((defined $opt::group) ? "-g" : ""),
3899 ((defined $opt::jobs) ? "-P $opt::jobs" : ""),
3900 ((defined $opt::keeporder) ? "--keeporder" : ""),
3901 ((defined $opt::linebuffer) ? "--linebuffer" : ""),
3902 ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""),
3903 ((defined $opt::plain) ? "--plain" : ""),
3904 ((defined $opt::ungroup) ? "-u" : ""),
3905 ((defined $opt::tee) ? "--tee" : ""),
3907 my $suboptions =
3908 join(" ",
3909 ((defined $opt::D) ? "-D $opt::D" : ""),
3910 ((defined $opt::arg_file_sep) ? "--arg-file-sep ".$opt::arg_file_sep : ""),
3911 ((defined $opt::arg_sep) ? "--arg-sep ".$opt::arg_sep : ""),
3912 ((defined $opt::colsep) ? "--colsep ".shell_quote($opt::colsep) : ""),
3913 ((defined $opt::files) ? "--files" : ""),
3914 ((defined $opt::group) ? "-g" : ""),
3915 ((defined $opt::cleanup) ? "--cleanup" : ""),
3916 ((defined $opt::keeporder) ? "--keeporder" : ""),
3917 ((defined $opt::linebuffer) ? "--linebuffer" : ""),
3918 ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""),
3919 ((defined $opt::plain) ? "--plain" : ""),
3920 ((defined $opt::retries) ? "--retries ".$opt::retries : ""),
3921 ((defined $opt::timeout) ? "--timeout ".$opt::timeout : ""),
3922 ((defined $opt::ungroup) ? "-u" : ""),
3923 ((defined $opt::tee) ? "--tee" : ""),
3924 ((defined $opt::workdir) ? "--wd ".Q($opt::workdir) : ""),
3925 (@Global::transfer_files ? map { "--tf ".Q($_) }
3926 @Global::transfer_files : ""),
3927 (@Global::ret_files ? map { "--return ".Q($_) }
3928 @Global::ret_files : ""),
3929 (@opt::env ? map { "--env ".Q($_) } @opt::env : ""),
3930 (map { "-v" } @opt::v),
3932 ::debug("init", "| $0 $options\n");
3933 open(my $parallel_fh, "|-", "$0 -0 --will-cite -j0 $options") ||
3934 ::die_bug("This does not run GNU Parallel: $0 $options");
3935 my @joblogs;
3936 for my $host (sort keys %Global::host) {
3937 my $sshlogin = $Global::host{$host};
3938 my $joblog = tmp_joblog($opt::joblog);
3939 if($joblog) {
3940 push @joblogs, $joblog;
3941 $joblog = "--joblog $joblog";
3943 my $quad = $opt::arg_file_sep || "::::";
3944 # If PARALLEL_ENV is set: Pass it on
3945 my $penv=$Global::parallel_env ?
3946 "PARALLEL_ENV=".Q($Global::parallel_env) :
3948 ::debug("init", "$penv $0 $suboptions -j1 $joblog ",
3949 ((defined $opt::tag) ?
3950 "--tagstring ".Q($sshlogin->string()) : ""),
3951 " -S ", Q($sshlogin->string())," ",
3952 join(" ",shell_quote(@command))," $quad @argfiles\n");
3953 print $parallel_fh "$penv $0 $suboptions -j1 $joblog ",
3954 ((defined $opt::tag) ?
3955 "--tagstring ".Q($sshlogin->string()) : ""),
3956 " -S ", Q($sshlogin->string())," ",
3957 join(" ",shell_quote(@command))," $quad @argfiles\0";
3959 close $parallel_fh;
3960 $Global::exitstatus = $? >> 8;
3961 debug("init", "--onall exitvalue ", $?);
3962 if(@opt::basefile and $opt::cleanup) { cleanup_basefile(); }
3963 $Global::debug or unlink(@argfiles);
3964 my %seen;
3965 for my $joblog (@joblogs) {
3966 # Append to $joblog
3967 open(my $fh, "<", $joblog) || ::die_bug("Cannot open tmp joblog $joblog");
3968 # Skip first line (header);
3969 <$fh>;
3970 print $Global::joblog (<$fh>);
3971 close $fh;
3972 unlink($joblog);
3977 sub __SIGNAL_HANDLING__ {}
3980 sub sigtstp {
3981 # Send TSTP signal (Ctrl-Z) to all children process groups
3982 # Uses:
3983 # %SIG
3984 # Returns: N/A
3985 signal_children("TSTP");
3988 sub sigpipe {
3989 # Send SIGPIPE signal to all children process groups
3990 # Uses:
3991 # %SIG
3992 # Returns: N/A
3993 signal_children("PIPE");
3996 sub signal_children {
3997 # Send signal to all children process groups
3998 # and GNU Parallel itself
3999 # Uses:
4000 # %SIG
4001 # Returns: N/A
4002 my $signal = shift;
4003 debug("run", "Sending $signal ");
4004 kill $signal, map { -$_ } keys %Global::running;
4005 # Use default signal handler for GNU Parallel itself
4006 $SIG{$signal} = undef;
4007 kill $signal, $$;
4010 sub save_original_signal_handler {
4011 # Remember the original signal handler
4012 # Uses:
4013 # %Global::original_sig
4014 # Returns: N/A
4015 $SIG{INT} = sub {
4016 if($opt::tmux) { ::qqx("tmux kill-session -t p$$"); }
4017 wait_and_exit(255);
4019 $SIG{TERM} = sub {
4020 if($opt::tmux) { ::qqx("tmux kill-session -t p$$"); }
4021 wait_and_exit(255);
4023 %Global::original_sig = %SIG;
4024 $SIG{TERM} = sub {}; # Dummy until jobs really start
4025 $SIG{ALRM} = 'IGNORE';
4026 # Allow Ctrl-Z to suspend and `fg` to continue
4027 $SIG{TSTP} = \&sigtstp;
4028 $SIG{PIPE} = \&sigpipe;
4029 $SIG{CONT} = sub {
4030 # Set $SIG{TSTP} again (it is undef'ed in sigtstp() )
4031 $SIG{TSTP} = \&sigtstp;
4032 # Send continue signal to all children process groups
4033 kill "CONT", map { -$_ } keys %Global::running;
4037 sub list_running_jobs {
4038 # Print running jobs on tty
4039 # Uses:
4040 # %Global::running
4041 # Returns: N/A
4042 for my $job (values %Global::running) {
4043 ::status("$Global::progname: ".$job->replaced());
4047 sub start_no_new_jobs {
4048 # Start no more jobs
4049 # Uses:
4050 # %Global::original_sig
4051 # %Global::unlink
4052 # $Global::start_no_new_jobs
4053 # Returns: N/A
4054 $SIG{TERM} = $Global::original_sig{TERM};
4055 unlink keys %Global::unlink;
4056 ::status
4057 ("$Global::progname: SIGTERM received. No new jobs will be started.",
4058 "$Global::progname: Waiting for these ".(keys %Global::running).
4059 " jobs to finish. Send SIGTERM again to stop now.");
4060 list_running_jobs();
4061 $Global::start_no_new_jobs ||= 1;
4064 sub reapers {
4065 # Run reaper until there are no more left
4066 # Returns:
4067 # @pids_reaped = pids of reaped processes
4068 my @pids_reaped;
4069 my $pid;
4070 while($pid = reaper()) {
4071 push @pids_reaped, $pid;
4073 return @pids_reaped;
4076 sub reaper {
4077 # A job finished:
4078 # * Set exitstatus, exitsignal, endtime.
4079 # * Free ressources for new job
4080 # * Update median runtime
4081 # * Print output
4082 # * If --halt = now: Kill children
4083 # * Print progress
4084 # Uses:
4085 # %Global::running
4086 # $opt::timeout
4087 # $Global::timeoutq
4088 # $opt::keeporder
4089 # $Global::total_running
4090 # Returns:
4091 # $stiff = PID of child finished
4092 my $stiff;
4093 debug("run", "Reaper ");
4094 if(($stiff = waitpid(-1, &WNOHANG)) <= 0) {
4095 # No jobs waiting to be reaped
4096 return 0;
4099 # $stiff = pid of dead process
4100 my $job = $Global::running{$stiff};
4102 # '-a <(seq 10)' will give us a pid not in %Global::running
4103 # The same will one of the ssh -M: ignore
4104 $job or return 0;
4105 delete $Global::running{$stiff};
4106 $Global::total_running--;
4107 if($job->{'commandline'}{'skip'}) {
4108 # $job->skip() was called
4109 $job->set_exitstatus(-2);
4110 $job->set_exitsignal(0);
4111 } else {
4112 $job->set_exitstatus($? >> 8);
4113 $job->set_exitsignal($? & 127);
4116 debug("run", "seq ",$job->seq()," died (", $job->exitstatus(), ")");
4117 $job->set_endtime(::now());
4118 my $sshlogin = $job->sshlogin();
4119 $sshlogin->dec_jobs_running();
4120 if($job->should_be_retried()) {
4121 # Free up file handles
4122 $job->free_ressources();
4123 } else {
4124 # The job is done
4125 $sshlogin->inc_jobs_completed();
4126 # Free the jobslot
4127 $job->free_slot();
4128 if($opt::timeout and not $job->exitstatus()) {
4129 # Update average runtime for timeout only for successful jobs
4130 $Global::timeoutq->update_median_runtime($job->runtime());
4132 if($opt::keeporder) {
4133 $job->print_earlier_jobs();
4134 } else {
4135 $job->print();
4137 if($job->should_we_halt() eq "now") {
4138 # Kill children
4139 ::kill_sleep_seq($job->pid());
4140 ::killall();
4141 ::wait_and_exit($Global::halt_exitstatus);
4144 $job->cleanup();
4146 if($opt::progress) {
4147 my %progress = progress();
4148 ::status_no_nl("\r",$progress{'status'});
4151 debug("run", "done ");
4152 return $stiff;
4156 sub __USAGE__ {}
4159 sub killall {
4160 # Kill all jobs by killing their process groups
4161 # Uses:
4162 # $Global::start_no_new_jobs = we are stopping
4163 # $Global::killall = Flag to not run reaper
4164 $Global::start_no_new_jobs ||= 1;
4165 # Do not reap killed children: Ignore them instead
4166 $Global::killall ||= 1;
4167 kill_sleep_seq(keys %Global::running);
4170 sub kill_sleep_seq {
4171 # Send jobs TERM,TERM,KILL to processgroups
4172 # Input:
4173 # @pids = list of pids that are also processgroups
4174 # Convert pids to process groups ($processgroup = -$pid)
4175 my @pgrps = map { -$_ } @_;
4176 my @term_seq = split/,/,$opt::termseq;
4177 if(not @term_seq) {
4178 @term_seq = ("TERM",200,"TERM",100,"TERM",50,"KILL",25);
4180 while(@term_seq) {
4181 @pgrps = kill_sleep(shift @term_seq, shift @term_seq, @pgrps);
4185 sub kill_sleep {
4186 # Kill pids with a signal and wait a while for them to die
4187 # Input:
4188 # $signal = signal to send to @pids
4189 # $sleep_max = number of ms to sleep at most before returning
4190 # @pids = pids to kill (actually process groups)
4191 # Uses:
4192 # $Global::killall = set by killall() to avoid calling reaper
4193 # Returns:
4194 # @pids = pids still alive
4195 my ($signal, $sleep_max, @pids) = @_;
4196 ::debug("kill","kill_sleep $signal ",(join " ",sort @pids),"\n");
4197 kill $signal, @pids;
4198 my $sleepsum = 0;
4199 my $sleep = 0.001;
4201 while(@pids and $sleepsum < $sleep_max) {
4202 if($Global::killall) {
4203 # Killall => don't run reaper
4204 while(waitpid(-1, &WNOHANG) > 0) {
4205 $sleep = $sleep/2+0.001;
4207 } elsif(reapers()) {
4208 $sleep = $sleep/2+0.001;
4210 $sleep *= 1.1;
4211 ::usleep($sleep);
4212 $sleepsum += $sleep;
4213 # Keep only living children
4214 @pids = grep { kill(0, $_) } @pids;
4216 return @pids;
4219 sub wait_and_exit {
4220 # If we do not wait, we sometimes get segfault
4221 # Returns: N/A
4222 my $error = shift;
4223 unlink keys %Global::unlink;
4224 if($error) {
4225 # Kill all jobs without printing
4226 killall();
4228 for (keys %Global::unkilled_children) {
4229 # Kill any (non-jobs) children (e.g. reserved processes)
4230 kill 9, $_;
4231 waitpid($_,0);
4232 delete $Global::unkilled_children{$_};
4234 if($Global::unkilled_sqlworker) {
4235 waitpid($Global::unkilled_sqlworker,0);
4237 exit($error);
4240 sub die_usage {
4241 # Returns: N/A
4242 usage();
4243 wait_and_exit(255);
4246 sub usage {
4247 # Returns: N/A
4248 print join
4249 ("\n",
4250 "Usage:",
4252 "$Global::progname [options] [command [arguments]] < list_of_arguments",
4253 "$Global::progname [options] [command [arguments]] (::: arguments|:::: argfile(s))...",
4254 "cat ... | $Global::progname --pipe [options] [command [arguments]]",
4256 "-j n Run n jobs in parallel",
4257 "-k Keep same order",
4258 "-X Multiple arguments with context replace",
4259 "--colsep regexp Split input on regexp for positional replacements",
4260 "{} {.} {/} {/.} {#} {%} {= perl code =} Replacement strings",
4261 "{3} {3.} {3/} {3/.} {=3 perl code =} Positional replacement strings",
4262 "With --plus: {} = {+/}/{/} = {.}.{+.} = {+/}/{/.}.{+.} = {..}.{+..} =",
4263 " {+/}/{/..}.{+..} = {...}.{+...} = {+/}/{/...}.{+...}",
4265 "-S sshlogin Example: foo\@server.example.com",
4266 "--slf .. Use ~/.parallel/sshloginfile as the list of sshlogins",
4267 "--trc {}.bar Shorthand for --transfer --return {}.bar --cleanup",
4268 "--onall Run the given command with argument on all sshlogins",
4269 "--nonall Run the given command with no arguments on all sshlogins",
4271 "--pipe Split stdin (standard input) to multiple jobs.",
4272 "--recend str Record end separator for --pipe.",
4273 "--recstart str Record start separator for --pipe.",
4275 "See 'man $Global::progname' for details",
4277 "Academic tradition requires you to cite works you base your article on.",
4278 "If you use programs that use GNU Parallel to process data for an article in a",
4279 "scientific publication, please cite:",
4281 " O. Tange (2018): GNU Parallel 2018, Mar 2018, ISBN 9781387509881,",
4282 " DOI https://doi.org/10.5281/zenodo.1146014",
4284 # Before changing this line, please read
4285 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
4286 "This helps funding further development; AND IT WON'T COST YOU A CENT.",
4287 "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
4289 "",);
4292 sub citation_notice {
4293 # if --will-cite or --plain: do nothing
4294 # if stderr redirected: do nothing
4295 # if $PARALLEL_HOME/will-cite: do nothing
4296 # else: print citation notice to stderr
4297 if($opt::willcite
4299 $opt::plain
4301 not -t $Global::original_stderr
4303 grep { -e "$_/will-cite" } @Global::config_dirs) {
4304 # skip
4305 } else {
4306 ::status
4307 ("Academic tradition requires you to cite works you base your article on.",
4308 "If you use programs that use GNU Parallel to process data for an article in a",
4309 "scientific publication, please cite:",
4311 " O. Tange (2018): GNU Parallel 2018, Mar 2018, ISBN 9781387509881,",
4312 " DOI https://doi.org/10.5281/zenodo.1146014",
4314 # Before changing this line, please read
4315 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
4316 "This helps funding further development; AND IT WON'T COST YOU A CENT.",
4317 "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
4319 "More about funding GNU Parallel and the citation notice:",
4320 "https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice",
4322 "To silence this citation notice: run 'parallel --citation' once.",
4325 mkdir $Global::config_dir;
4326 # Number of times the user has run GNU Parallel without showing
4327 # willingness to cite
4328 my $runs = 0;
4329 if(open (my $fh, "<", $Global::config_dir.
4330 "/runs-without-willing-to-cite")) {
4331 $runs = <$fh>;
4332 close $fh;
4334 $runs++;
4335 if(open (my $fh, ">", $Global::config_dir.
4336 "/runs-without-willing-to-cite")) {
4337 print $fh $runs;
4338 close $fh;
4339 if($runs >= 10) {
4340 ::status("Come on: You have run parallel $runs times. Isn't it about time ",
4341 "you run 'parallel --citation' once to silence the citation notice?",
4342 "");
4348 sub status {
4349 my @w = @_;
4350 my $fh = $Global::status_fd || *STDERR;
4351 print $fh map { ($_, "\n") } @w;
4352 flush $fh;
4355 sub status_no_nl {
4356 my @w = @_;
4357 my $fh = $Global::status_fd || *STDERR;
4358 print $fh @w;
4359 flush $fh;
4362 sub warning {
4363 my @w = @_;
4364 my $prog = $Global::progname || "parallel";
4365 status_no_nl(map { ($prog, ": Warning: ", $_, "\n"); } @w);
4368 sub error {
4369 my @w = @_;
4370 my $prog = $Global::progname || "parallel";
4371 status(map { ($prog.": Error: ". $_); } @w);
4374 sub die_bug {
4375 my $bugid = shift;
4376 print STDERR
4377 ("$Global::progname: This should not happen. You have found a bug.\n",
4378 "Please contact <parallel\@gnu.org> and follow\n",
4379 "https://www.gnu.org/software/parallel/man.html#REPORTING-BUGS\n",
4380 "\n",
4381 "Include this in the report:\n",
4382 "* The version number: $Global::version\n",
4383 "* The bugid: $bugid\n",
4384 "* The command line being run\n",
4385 "* The files being read (put the files on a webserver if they are big)\n",
4386 "\n",
4387 "If you get the error on smaller/fewer files, please include those instead.\n");
4388 ::wait_and_exit(255);
4391 sub version {
4392 # Returns: N/A
4393 print join
4394 ("\n",
4395 "GNU $Global::progname $Global::version",
4396 "Copyright (C) 2007-2019 Ole Tange and Free Software Foundation, Inc.",
4397 "License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>",
4398 "This is free software: you are free to change and redistribute it.",
4399 "GNU $Global::progname comes with no warranty.",
4401 "Web site: http://www.gnu.org/software/${Global::progname}\n",
4402 "When using programs that use GNU Parallel to process data for publication",
4403 "please cite as described in 'parallel --citation'.\n",
4407 sub citation {
4408 # Returns: N/A
4409 my ($all_argv_ref,$argv_options_removed_ref) = @_;
4410 my $all_argv = "@$all_argv_ref";
4411 my $no_opts = "@$argv_options_removed_ref";
4412 $all_argv=~s/--citation//;
4413 if($all_argv ne $no_opts) {
4414 ::warning("--citation ignores all other options and arguments.");
4415 ::status("");
4418 ::status(
4419 "Academic tradition requires you to cite works you base your article on.",
4420 "If you use programs that use GNU Parallel to process data for an article in a",
4421 "scientific publication, please cite:",
4423 "\@book{tange_ole_2018_1146014,",
4424 " author = {Tange, Ole},",
4425 " title = {GNU Parallel 2018},",
4426 " publisher = {Ole Tange},",
4427 " month = Mar,",
4428 " year = 2018,",
4429 " ISBN = {9781387509881},",
4430 " doi = {10.5281/zenodo.1146014},",
4431 " url = {https://doi.org/10.5281/zenodo.1146014}",
4432 "}",
4434 "(Feel free to use \\nocite{tange_ole_2018_1146014})",
4436 # Before changing this line, please read
4437 # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice
4438 "This helps funding further development; AND IT WON'T COST YOU A CENT.",
4439 "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
4441 "More about funding GNU Parallel and the citation notice:",
4442 "https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice",
4444 "If you send a copy of your published article to tange\@gnu.org, it will be",
4445 "mentioned in the release notes of next version of GNU Parallel.",
4448 while(not grep { -e "$_/will-cite" } @Global::config_dirs) {
4449 print "\nType: 'will cite' and press enter.\n> ";
4450 my $input = <STDIN>;
4451 if(not defined $input) {
4452 exit(255);
4454 if($input =~ /will cite/i) {
4455 mkdir $Global::config_dir;
4456 if(open (my $fh, ">", $Global::config_dir."/will-cite")) {
4457 close $fh;
4458 ::status(
4460 "Thank you for your support. It is much appreciated. The citation",
4461 "notice is now silenced.",
4462 "");
4463 } else {
4464 ::status(
4466 "Thank you for your support. It is much appreciated. The citation",
4467 "cannot permanently be silenced. Use '--will-cite' instead.",
4469 "If you use '--will-cite' in scripts to be run by others you are making",
4470 "it harder for others to see the citation notice. The development of",
4471 "GNU parallel is indirectly financed through citations, so if users",
4472 "do not know they should cite then you are making it harder to finance",
4473 "development. However, if you pay 10000 EUR, you should feel free to",
4474 "use '--will-cite' in scripts.",
4475 "");
4476 last;
4482 sub show_limits {
4483 # Returns: N/A
4484 print("Maximal size of command: ",Limits::Command::real_max_length(),"\n",
4485 "Maximal used size of command: ",Limits::Command::max_length(),"\n",
4486 "\n",
4487 "Execution of will continue now, and it will try to read its input\n",
4488 "and run commands; if this is not what you wanted to happen, please\n",
4489 "press CTRL-D or CTRL-C\n");
4492 sub embed {
4493 # Give an embeddable version of GNU Parallel
4494 # Tested with: bash, zsh, ksh, ash, dash, sh
4495 my $randomstring = "cut-here-".join"",
4496 map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..20);
4497 if(not -f $0 or not -r $0) {
4498 ::error("--embed only works if parallel is a readable file");
4499 exit(255);
4501 if(open(my $fh, "<", $0)) {
4502 # Read the source from $0
4503 my @source = <$fh>;
4504 my $user = $ENV{LOGNAME} || $ENV{USERNAME} || $ENV{USER};
4505 my @env_parallel_source = ();
4506 my $shell = $Global::shell;
4507 $shell =~ s:.*/::;
4508 for(which("env_parallel.$shell")) {
4509 -r $_ or next;
4510 # Read the source of env_parallel.shellname
4511 open(my $env_parallel_source_fh, $_) || die;
4512 @env_parallel_source = <$env_parallel_source_fh>;
4513 close $env_parallel_source_fh;
4514 last;
4516 print "#!$Global::shell
4518 # Copyright (C) 2007-2019 $user, Ole Tange and Free Software
4519 # Foundation, Inc.
4521 # This program is free software; you can redistribute it and/or modify
4522 # it under the terms of the GNU General Public License as published by
4523 # the Free Software Foundation; either version 3 of the License, or
4524 # (at your option) any later version.
4526 # This program is distributed in the hope that it will be useful, but
4527 # WITHOUT ANY WARRANTY; without even the implied warranty of
4528 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
4529 # General Public License for more details.
4531 # You should have received a copy of the GNU General Public License
4532 # along with this program; if not, see <http://www.gnu.org/licenses/>
4533 # or write to the Free Software Foundation, Inc., 51 Franklin St,
4534 # Fifth Floor, Boston, MA 02110-1301 USA
4537 print q!
4538 # Embedded GNU Parallel created with --embed
4539 parallel() {
4540 # Start GNU Parallel without leaving temporary files
4542 # Not all shells support 'perl <(cat ...)'
4543 # This is a complex way of doing:
4544 # perl <(cat <<'cut-here'
4545 # [...]
4546 # ) "$@"
4547 # and also avoiding:
4548 # [1]+ Done cat
4550 # Make a temporary fifo that perl can read from
4551 _fifo_with_parallel_source=`perl -e 'use POSIX qw(mkfifo);
4552 do {
4553 $f = "/tmp/parallel-".join"",
4554 map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
4555 } while(-e $f);
4556 mkfifo($f,0600);
4557 print $f;'`
4558 # Put source code into temporary file
4559 # so it is easy to copy to the fifo
4560 _file_with_parallel_source=`mktemp`;
4562 "cat <<'$randomstring' > \$_file_with_parallel_source\n",
4563 @source,
4564 $randomstring,"\n",
4566 # Copy the source code from the file to the fifo
4567 # and remove the file and fifo ASAP
4568 # 'sh -c' is needed to avoid
4569 # [1]+ Done cat
4570 sh -c "(rm $_file_with_parallel_source; cat >$_fifo_with_parallel_source; rm $_fifo_with_parallel_source) < $_file_with_parallel_source &"
4572 # Read the source from the fifo
4573 perl $_fifo_with_parallel_source "$@"
4576 @env_parallel_source,
4579 # This will call the functions above
4580 parallel -k echo ::: Put your code here
4581 env_parallel --session
4582 env_parallel -k echo ::: Put your code here
4583 parset p,y,c,h -k echo ::: Put your code here
4584 echo $p $y $c $h
4586 } else {
4587 ::error("Cannot open $0");
4588 exit(255);
4590 ::status("Redirect the output to a file and add your changes at the end:",
4591 " $0 --embed > new_script");
4594 sub __GENERIC_COMMON_FUNCTION__ {}
4597 sub mkdir_or_die {
4598 # If dir is not executable: die
4599 my $dir = shift;
4600 # The eval is needed to catch exception from mkdir
4601 eval { File::Path::mkpath($dir); };
4602 if(not -x $dir) {
4603 ::error("Cannot change into non-executable dir $dir: $!");
4604 ::wait_and_exit(255);
4608 sub tmpfile {
4609 # Create tempfile as $TMPDIR/parXXXXX
4610 # Returns:
4611 # $filehandle = opened file handle
4612 # $filename = file name created
4613 my($filehandle,$filename) =
4614 ::tempfile(DIR=>$ENV{'TMPDIR'}, TEMPLATE => 'parXXXXX', @_);
4615 if(wantarray) {
4616 return($filehandle,$filename);
4617 } else {
4618 # Separate unlink due to NFS dealing badly with File::Temp
4619 unlink $filename;
4620 return $filehandle;
4624 sub tmpname {
4625 # Select a name that does not exist
4626 # Do not create the file as it may be used for creating a socket (by tmux)
4627 # Remember the name in $Global::unlink to avoid hitting the same name twice
4628 my $name = shift;
4629 my($tmpname);
4630 if(not -w $ENV{'TMPDIR'}) {
4631 if(not -e $ENV{'TMPDIR'}) {
4632 ::error("Tmpdir '$ENV{'TMPDIR'}' does not exist.","Try 'mkdir $ENV{'TMPDIR'}'");
4633 } else {
4634 ::error("Tmpdir '$ENV{'TMPDIR'}' is not writable.","Try 'chmod +w $ENV{'TMPDIR'}'");
4636 ::wait_and_exit(255);
4638 do {
4639 $tmpname = $ENV{'TMPDIR'}."/".$name.
4640 join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
4641 } while(-e $tmpname or $Global::unlink{$tmpname}++);
4642 return $tmpname;
4645 sub tmpfifo {
4646 # Find an unused name and mkfifo on it
4647 use POSIX qw(mkfifo);
4648 my $tmpfifo = tmpname("fif",@_);
4649 mkfifo($tmpfifo,0600);
4650 return $tmpfifo;
4653 sub rm {
4654 # Remove file and remove it from %Global::unlink
4655 # Uses:
4656 # %Global::unlink
4657 delete @Global::unlink{@_};
4658 unlink @_;
4661 sub size_of_block_dev {
4662 # Like -s but for block devices
4663 # Input:
4664 # $blockdev = file name of block device
4665 # Returns:
4666 # $size = in bytes, undef if error
4667 my $blockdev = shift;
4668 if(open(my $fh, "<", $blockdev)) {
4669 seek($fh,0,2) || ::die_bug("cannot seek $blockdev");
4670 my $size = tell($fh);
4671 close $fh;
4672 return $size;
4673 } else {
4674 ::error("cannot open $blockdev");
4675 wait_and_exit(255);
4679 sub qqx {
4680 # Like qx but with clean environment (except for @keep)
4681 # and STDERR ignored
4682 # This is needed if the environment contains functions
4683 # that /bin/sh does not understand
4684 my $PATH = $ENV{'PATH'};
4685 my %env;
4686 # ssh with ssh-agent needs PATH SSH_AUTH_SOCK SSH_AGENT_PID
4687 # ssh with Kerberos needs KRB5CCNAME
4688 # tmux needs LC_CTYPE
4689 my @keep = qw(PATH SSH_AUTH_SOCK SSH_AGENT_PID KRB5CCNAME LC_CTYPE);
4690 @env{@keep} = @ENV{@keep};
4691 local %ENV;
4692 %ENV = %env;
4693 if($Global::debug) {
4694 return qx{ @_ && true };
4695 } else {
4696 return qx{ ( @_ ) 2>/dev/null };
4700 sub uniq {
4701 # Remove duplicates and return unique values
4702 return keys %{{ map { $_ => 1 } @_ }};
4705 sub min {
4706 # Returns:
4707 # Minimum value of array
4708 my $min;
4709 for (@_) {
4710 # Skip undefs
4711 defined $_ or next;
4712 defined $min or do { $min = $_; next; }; # Set $_ to the first non-undef
4713 $min = ($min < $_) ? $min : $_;
4715 return $min;
4718 sub max {
4719 # Returns:
4720 # Maximum value of array
4721 my $max;
4722 for (@_) {
4723 # Skip undefs
4724 defined $_ or next;
4725 defined $max or do { $max = $_; next; }; # Set $_ to the first non-undef
4726 $max = ($max > $_) ? $max : $_;
4728 return $max;
4731 sub sum {
4732 # Returns:
4733 # Sum of values of array
4734 my @args = @_;
4735 my $sum = 0;
4736 for (@args) {
4737 # Skip undefs
4738 $_ and do { $sum += $_; }
4740 return $sum;
4743 sub undef_as_zero {
4744 my $a = shift;
4745 return $a ? $a : 0;
4748 sub undef_as_empty {
4749 my $a = shift;
4750 return $a ? $a : "";
4753 sub undef_if_empty {
4754 if(defined($_[0]) and $_[0] eq "") {
4755 return undef;
4757 return $_[0];
4760 sub multiply_binary_prefix {
4761 # Evalualte numbers with binary prefix
4762 # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80
4763 # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80
4764 # K =2^10, M =2^20, G =2^30, T =2^40, P =2^50, E =2^70, Z =2^80, Y =2^80
4765 # k =10^3, m =10^6, g =10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24
4766 # 13G = 13*1024*1024*1024 = 13958643712
4767 # Input:
4768 # $s = string with prefixes
4769 # Returns:
4770 # $value = int with prefixes multiplied
4771 my @v = @_;
4772 for(@v) {
4773 defined $_ or next;
4774 s/ki/*1024/gi;
4775 s/mi/*1024*1024/gi;
4776 s/gi/*1024*1024*1024/gi;
4777 s/ti/*1024*1024*1024*1024/gi;
4778 s/pi/*1024*1024*1024*1024*1024/gi;
4779 s/ei/*1024*1024*1024*1024*1024*1024/gi;
4780 s/zi/*1024*1024*1024*1024*1024*1024*1024/gi;
4781 s/yi/*1024*1024*1024*1024*1024*1024*1024*1024/gi;
4782 s/xi/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi;
4784 s/K/*1024/g;
4785 s/M/*1024*1024/g;
4786 s/G/*1024*1024*1024/g;
4787 s/T/*1024*1024*1024*1024/g;
4788 s/P/*1024*1024*1024*1024*1024/g;
4789 s/E/*1024*1024*1024*1024*1024*1024/g;
4790 s/Z/*1024*1024*1024*1024*1024*1024*1024/g;
4791 s/Y/*1024*1024*1024*1024*1024*1024*1024*1024/g;
4792 s/X/*1024*1024*1024*1024*1024*1024*1024*1024*1024/g;
4794 s/k/*1000/g;
4795 s/m/*1000*1000/g;
4796 s/g/*1000*1000*1000/g;
4797 s/t/*1000*1000*1000*1000/g;
4798 s/p/*1000*1000*1000*1000*1000/g;
4799 s/e/*1000*1000*1000*1000*1000*1000/g;
4800 s/z/*1000*1000*1000*1000*1000*1000*1000/g;
4801 s/y/*1000*1000*1000*1000*1000*1000*1000*1000/g;
4802 s/x/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g;
4804 $_ = eval $_;
4806 return wantarray ? @v : $v[0];
4809 sub multiply_time_units {
4810 # Evalualte numbers with time units
4811 # s=1, m=60, h=3600, d=86400
4812 # Input:
4813 # $s = string time units
4814 # Returns:
4815 # $value = int in seconds
4816 my @v = @_;
4817 for(@v) {
4818 defined $_ or next;
4819 if(/[dhms]/i) {
4820 s/s/*1+/gi;
4821 s/m/*60+/gi;
4822 s/h/*3600+/gi;
4823 s/d/*86400+/gi;
4824 $_ = eval $_."0";
4827 return wantarray ? @v : $v[0];
4830 sub seconds_to_time_units {
4831 # Convert seconds into ??d??h??m??s
4832 # s=1, m=60, h=3600, d=86400
4833 # Input:
4834 # $s = int in seconds
4835 # Returns:
4836 # $str = string time units
4837 my $s = shift;
4838 my $str;
4839 my $d = int($s/86400);
4840 $s -= $d * 86400;
4841 my $h = int($s/3600);
4842 $s -= $h * 3600;
4843 my $m = int($s/60);
4844 $s -= $m * 60;
4845 if($d) {
4846 $str = sprintf("%dd%02dh%02dm%02ds",$d,$h,$m,$s);
4847 } elsif($h) {
4848 $str = sprintf("%dh%02dm%02ds",$h,$m,$s);
4849 } elsif($m) {
4850 $str = sprintf("%dm%02ds",$m,$s);
4851 } else {
4852 $str = sprintf("%ds",$s);
4854 return $str;
4858 my ($disk_full_fh, $b8193, $error_printed);
4859 sub exit_if_disk_full {
4860 # Checks if $TMPDIR is full by writing 8kb to a tmpfile
4861 # If the disk is full: Exit immediately.
4862 # Returns:
4863 # N/A
4864 if(not $disk_full_fh) {
4865 $disk_full_fh = ::tmpfile(SUFFIX => ".df");
4866 $b8193 = "x"x8193;
4868 # Linux does not discover if a disk is full if writing <= 8192
4869 # Tested on:
4870 # bfs btrfs cramfs ext2 ext3 ext4 ext4dev jffs2 jfs minix msdos
4871 # ntfs reiserfs tmpfs ubifs vfat xfs
4872 # TODO this should be tested on different OS similar to this:
4874 # doit() {
4875 # sudo mount /dev/ram0 /mnt/loop; sudo chmod 1777 /mnt/loop
4876 # seq 100000 | parallel --tmpdir /mnt/loop/ true &
4877 # seq 6900000 > /mnt/loop/i && echo seq OK
4878 # seq 6980868 > /mnt/loop/i
4879 # seq 10000 > /mnt/loop/ii
4880 # sleep 3
4881 # sudo umount /mnt/loop/ || sudo umount -l /mnt/loop/
4882 # echo >&2
4884 print $disk_full_fh $b8193;
4885 if(not $disk_full_fh
4887 tell $disk_full_fh != 8193) {
4888 # On raspbian the disk can be full except for 10 chars.
4889 if(not $error_printed) {
4890 ::error("Output is incomplete.",
4891 "Cannot append to buffer file in $ENV{'TMPDIR'}.",
4892 "Is the disk full?",
4893 "Change \$TMPDIR with --tmpdir or use --compress.");
4894 $error_printed = 1;
4896 ::wait_and_exit(255);
4898 truncate $disk_full_fh, 0;
4899 seek($disk_full_fh, 0, 0) || die;
4903 sub spacefree {
4904 # Remove comments and spaces
4905 # Inputs:
4906 # $spaces = keep 1 space?
4907 # $s = string to remove spaces from
4908 # Returns:
4909 # $s = with spaces removed
4910 my $spaces = shift;
4911 my $s = shift;
4912 $s =~ s/#.*//mg;
4913 if(1 == $spaces) {
4914 $s =~ s/\s+/ /mg;
4915 } elsif(2 == $spaces) {
4916 # Keep newlines
4917 $s =~ s/\n\n+/\n/sg;
4918 $s =~ s/[ \t]+/ /mg;
4919 } else {
4920 $s =~ s/\s//mg;
4922 return $s;
4926 my $hostname;
4927 sub hostname {
4928 local $/ = "\n";
4929 if(not $hostname) {
4930 $hostname = `hostname`;
4931 chomp($hostname);
4932 $hostname ||= "nohostname";
4934 return $hostname;
4938 sub which {
4939 # Input:
4940 # @programs = programs to find the path to
4941 # Returns:
4942 # @full_path = full paths to @programs. Nothing if not found
4943 my @which;
4944 ::debug("which", "@_ in $ENV{'PATH'}\n");
4945 for my $prg (@_) {
4946 push(@which, grep { not -d $_ and -x $_ }
4947 map { $_."/".$prg } split(":",$ENV{'PATH'}));
4948 if($prg =~ m:/:) {
4949 # Including path
4950 push(@which, grep { not -d $_ and -x $_ } $prg);
4953 return @which;
4957 my ($regexp,$shell,%fakename);
4959 sub parent_shell {
4960 # Input:
4961 # $pid = pid to see if (grand)*parent is a shell
4962 # Returns:
4963 # $shellpath = path to shell - undef if no shell found
4964 my $pid = shift;
4965 ::debug("init","Parent of $pid\n");
4966 if(not $regexp) {
4967 # All shells known to mankind
4969 # ash bash csh dash fdsh fish fizsh ksh ksh93 mksh pdksh
4970 # posh rbash rc rush rzsh sash sh static-sh tcsh yash zsh
4972 my @shells = (qw(ash bash bsd-csh csh dash fdsh fish fizsh
4973 ksh ksh93 lksh mksh pdksh posh rbash rc rush rzsh sash sh
4974 static-sh tcsh yash zsh -sh -csh -bash),
4975 '-sh (sh)' # sh on FreeBSD
4977 # Can be formatted as:
4978 # [sh] -sh sh busybox sh -sh (sh)
4979 # /bin/sh /sbin/sh /opt/csw/sh
4980 # But not: foo.sh sshd crash flush pdflush scosh fsflush ssh
4981 $shell = "(?:".join("|",map { "\Q$_\E" } @shells).")";
4982 $regexp = '^((\[)(-?)('. $shell. ')(\])|(|\S+/|busybox )'.
4983 '(-?)('. $shell. '))( *$| [^(])';
4984 %fakename = (
4985 # sh disguises itself as -sh (sh) on FreeBSD
4986 "-sh (sh)" => ["sh"],
4987 # csh and tcsh disguise themselves as -sh/-csh
4988 # E.g.: ssh -tt csh@lo 'ps aux;true' |egrep ^csh
4989 # but sh also disguise itself as -sh
4990 # (When?)
4991 "-sh" => ["sh"],
4992 "-csh" => ["tcsh", "csh"],
4993 # ash disguises itself as -ash
4994 "-ash" => ["ash", "dash", "sh"],
4995 # dash disguises itself as -dash
4996 "-dash" => ["dash", "ash", "sh"],
4997 # bash disguises itself as -bash
4998 "-bash" => ["bash", "sh"],
4999 # ksh disguises itself as -ash
5000 "-ksh" => ["ksh", "sh"],
5001 # zsh disguises itself as -zsh
5002 "-zsh" => ["zsh", "sh"],
5005 # if -sh or -csh try readlink /proc/$$/exe
5006 my ($children_of_ref, $parent_of_ref, $name_of_ref) = pid_table();
5007 my $shellpath;
5008 my $testpid = $pid;
5009 while($testpid) {
5010 if($name_of_ref->{$testpid} =~ /$regexp/o) {
5011 my $shellname = $4 || $8;
5012 my $dash = $3 || $7;
5013 if($shellname eq "sh" and $dash) {
5014 # -sh => csh or sh
5015 if($shellpath = readlink "/proc/$testpid/exe") {
5016 ::debug("init","procpath $shellpath\n");
5017 if($shellpath =~ m:/$shell$:o) {
5018 ::debug("init", "proc which ".$shellpath." => ");
5019 return $shellpath;
5023 ::debug("init", "which ".$shellname." => ");
5024 $shellpath = (which($shellname,@{$fakename{$shellname}}))[0];
5025 ::debug("init", "shell path $shellpath\n");
5026 $shellpath and last;
5028 if($testpid == $parent_of_ref->{$testpid}) {
5029 # In Solaris zones, the PPID of the zsched process is itself
5030 last;
5032 $testpid = $parent_of_ref->{$testpid};
5034 return $shellpath;
5039 my %pid_parentpid_cmd;
5041 sub pid_table {
5042 # Returns:
5043 # %children_of = { pid -> children of pid }
5044 # %parent_of = { pid -> pid of parent }
5045 # %name_of = { pid -> commandname }
5047 if(not %pid_parentpid_cmd) {
5048 # Filter for SysV-style `ps`
5049 my $sysv = q( ps -ef | perl -ane '1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;).
5050 q(s/^.{$s}//; print "@F[1,2] $_"' );
5051 # Minix uses cols 2,3 and can have newlines in the command
5052 # so lines not having numbers in cols 2,3 must be ignored
5053 my $minix = q( ps -ef | perl -ane '1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;).
5054 q(s/^.{$s}// and $F[2]>0 and $F[3]>0 and print "@F[2,3] $_"' );
5055 # BSD-style `ps`
5056 my $bsd = q(ps -o pid,ppid,command -ax);
5057 %pid_parentpid_cmd =
5059 'aix' => $sysv,
5060 'android' => $sysv,
5061 'cygwin' => $sysv,
5062 'darwin' => $bsd,
5063 'dec_osf' => $sysv,
5064 'dragonfly' => $bsd,
5065 'freebsd' => $bsd,
5066 'gnu' => $sysv,
5067 'hpux' => $sysv,
5068 'linux' => $sysv,
5069 'mirbsd' => $bsd,
5070 'minix' => $minix,
5071 'msys' => $sysv,
5072 'MSWin32' => $sysv,
5073 'netbsd' => $bsd,
5074 'nto' => $sysv,
5075 'openbsd' => $bsd,
5076 'solaris' => $sysv,
5077 'svr5' => $sysv,
5078 'syllable' => "echo ps not supported",
5081 $pid_parentpid_cmd{$^O} or ::die_bug("pid_parentpid_cmd for $^O missing");
5083 my (@pidtable,%parent_of,%children_of,%name_of);
5084 # Table with pid -> children of pid
5085 @pidtable = `$pid_parentpid_cmd{$^O}`;
5086 my $p=$$;
5087 for (@pidtable) {
5088 # must match: 24436 21224 busybox ash
5089 # must match: 24436 21224 <<empty on MacOSX running cubase>>
5090 # must match: 24436 21224 <<empty on system running Viber>>
5091 # or: perl -e 'while($0=" "){}'
5092 if(/^\s*(\S+)\s+(\S+)\s+(\S+.*)/
5094 /^\s*(\S+)\s+(\S+)\s+()$/) {
5095 $parent_of{$1} = $2;
5096 push @{$children_of{$2}}, $1;
5097 $name_of{$1} = $3;
5098 } else {
5099 ::die_bug("pidtable format: $_");
5102 return(\%children_of, \%parent_of, \%name_of);
5106 sub now {
5107 # Returns time since epoch as in seconds with 3 decimals
5108 # Uses:
5109 # @Global::use
5110 # Returns:
5111 # $time = time now with millisecond accuracy
5112 if(not $Global::use{"Time::HiRes"}) {
5113 if(eval "use Time::HiRes qw ( time );") {
5114 eval "sub TimeHiRestime { return Time::HiRes::time };";
5115 } else {
5116 eval "sub TimeHiRestime { return time() };";
5118 $Global::use{"Time::HiRes"} = 1;
5121 return (int(TimeHiRestime()*1000))/1000;
5124 sub usleep {
5125 # Sleep this many milliseconds.
5126 # Input:
5127 # $ms = milliseconds to sleep
5128 my $ms = shift;
5129 ::debug("timing",int($ms),"ms ");
5130 select(undef, undef, undef, $ms/1000);
5133 sub reap_usleep {
5134 # Reap dead children.
5135 # If no dead children: Sleep specified amount with exponential backoff
5136 # Input:
5137 # $ms = milliseconds to sleep
5138 # Returns:
5139 # $ms/2+0.001 if children reaped
5140 # $ms*1.1 if no children reaped
5141 my $ms = shift;
5142 if(reapers()) {
5143 if(not $Global::total_completed % 100) {
5144 if($opt::timeout) {
5145 # Force cleaning the timeout queue for every 1000 jobs
5146 # Fixes potential memleak
5147 $Global::timeoutq->process_timeouts();
5150 # Sleep exponentially shorter (1/2^n) if a job finished
5151 return $ms/2+0.001;
5152 } else {
5153 if($opt::timeout) {
5154 $Global::timeoutq->process_timeouts();
5156 if($opt::memfree) {
5157 kill_youngster_if_not_enough_mem();
5159 if($opt::limit) {
5160 kill_youngest_if_over_limit();
5162 if($ms > 0.002) {
5163 # When a child dies, wake up from sleep (or select(,,,))
5164 $SIG{CHLD} = sub { kill "ALRM", $$ };
5165 usleep($ms);
5166 # --compress needs $SIG{CHLD} unset
5167 $SIG{CHLD} = 'DEFAULT';
5169 exit_if_disk_full();
5170 if($opt::linebuffer) {
5171 my $something_printed = 0;
5172 if($opt::keeporder) {
5173 for my $job (values %Global::running) {
5174 $something_printed += $job->print_earlier_jobs();
5176 } else {
5177 for my $job (values %Global::running) {
5178 $something_printed += $job->print();
5181 if($something_printed) {
5182 $ms = $ms/2+0.001;
5185 # Sleep exponentially longer (1.1^n) if a job did not finish,
5186 # though at most 1000 ms.
5187 return (($ms < 1000) ? ($ms * 1.1) : ($ms));
5191 sub kill_youngest_if_over_limit {
5192 # Check each $sshlogin we are over limit
5193 # If over limit: kill off the youngest child
5194 # Put the child back in the queue.
5195 # Uses:
5196 # %Global::running
5197 my %jobs_of;
5198 my @sshlogins;
5200 for my $job (values %Global::running) {
5201 if(not $jobs_of{$job->sshlogin()}) {
5202 push @sshlogins, $job->sshlogin();
5204 push @{$jobs_of{$job->sshlogin()}}, $job;
5206 for my $sshlogin (@sshlogins) {
5207 for my $job (sort { $b->seq() <=> $a->seq() } @{$jobs_of{$sshlogin}}) {
5208 if($sshlogin->limit() == 2) {
5209 $job->kill();
5210 last;
5216 sub kill_youngster_if_not_enough_mem {
5217 # Check each $sshlogin if there is enough mem.
5218 # If less than 50% enough free mem: kill off the youngest child
5219 # Put the child back in the queue.
5220 # Uses:
5221 # %Global::running
5222 my %jobs_of;
5223 my @sshlogins;
5225 for my $job (values %Global::running) {
5226 if(not $jobs_of{$job->sshlogin()}) {
5227 push @sshlogins, $job->sshlogin();
5229 push @{$jobs_of{$job->sshlogin()}}, $job;
5231 for my $sshlogin (@sshlogins) {
5232 for my $job (sort { $b->seq() <=> $a->seq() } @{$jobs_of{$sshlogin}}) {
5233 if($sshlogin->memfree() < $opt::memfree * 0.5) {
5234 ::debug("mem","\n",map { $_->seq()." " }
5235 (sort { $b->seq() <=> $a->seq() }
5236 @{$jobs_of{$sshlogin}}));
5237 ::debug("mem","\n", $job->seq(), "killed ",
5238 $sshlogin->memfree()," < ",$opt::memfree * 0.5);
5239 $job->kill();
5240 $sshlogin->memfree_recompute();
5241 } else {
5242 last;
5245 ::debug("mem","Free mem OK ",
5246 $sshlogin->memfree()," > ",$opt::memfree * 0.5);
5251 sub __DEBUGGING__ {}
5254 sub debug {
5255 # Uses:
5256 # $Global::debug
5257 # %Global::fd
5258 # Returns: N/A
5259 $Global::debug or return;
5260 @_ = grep { defined $_ ? $_ : "" } @_;
5261 if($Global::debug eq "all" or $Global::debug eq $_[0]) {
5262 if($Global::fd{1}) {
5263 # Original stdout was saved
5264 my $stdout = $Global::fd{1};
5265 print $stdout @_[1..$#_];
5266 } else {
5267 print @_[1..$#_];
5272 sub my_memory_usage {
5273 # Returns:
5274 # memory usage if found
5275 # 0 otherwise
5276 use strict;
5277 use FileHandle;
5279 local $/ = "\n";
5280 my $pid = $$;
5281 if(-e "/proc/$pid/stat") {
5282 my $fh = FileHandle->new("</proc/$pid/stat");
5284 my $data = <$fh>;
5285 chomp $data;
5286 $fh->close;
5288 my @procinfo = split(/\s+/,$data);
5290 return undef_as_zero($procinfo[22]);
5291 } else {
5292 return 0;
5296 sub my_size {
5297 # Returns:
5298 # $size = size of object if Devel::Size is installed
5299 # -1 otherwise
5300 my @size_this = (@_);
5301 eval "use Devel::Size qw(size total_size)";
5302 if ($@) {
5303 return -1;
5304 } else {
5305 return total_size(@_);
5309 sub my_dump {
5310 # Returns:
5311 # ascii expression of object if Data::Dump(er) is installed
5312 # error code otherwise
5313 my @dump_this = (@_);
5314 eval "use Data::Dump qw(dump);";
5315 if ($@) {
5316 # Data::Dump not installed
5317 eval "use Data::Dumper;";
5318 if ($@) {
5319 my $err = "Neither Data::Dump nor Data::Dumper is installed\n".
5320 "Not dumping output\n";
5321 ::status($err);
5322 return $err;
5323 } else {
5324 return Dumper(@dump_this);
5326 } else {
5327 # Create a dummy Data::Dump:dump as Hans Schou sometimes has
5328 # it undefined
5329 eval "sub Data::Dump:dump {}";
5330 eval "use Data::Dump qw(dump);";
5331 return (Data::Dump::dump(@dump_this));
5335 sub my_croak {
5336 eval "use Carp; 1";
5337 $Carp::Verbose = 1;
5338 croak(@_);
5341 sub my_carp {
5342 eval "use Carp; 1";
5343 $Carp::Verbose = 1;
5344 carp(@_);
5348 sub __OBJECT_ORIENTED_PARTS__ {}
5351 package SSHLogin;
5353 sub new {
5354 my $class = shift;
5355 my $sshlogin_string = shift;
5356 my $ncpus;
5357 my %hostgroups;
5358 # SSHLogins can have these formats:
5359 # @grp+grp/ncpu//usr/bin/ssh user@server
5360 # ncpu//usr/bin/ssh user@server
5361 # /usr/bin/ssh user@server
5362 # user@server
5363 # ncpu/user@server
5364 # @grp+grp/user@server
5365 if($sshlogin_string =~ s:^\@([^/]+)/?::) {
5366 # Look for SSHLogin hostgroups
5367 %hostgroups = map { $_ => 1 } split(/\+/, $1);
5369 # An SSHLogin is always in the hostgroup of its "numcpu/host"
5370 $hostgroups{$sshlogin_string} = 1;
5371 if ($sshlogin_string =~ s:^(\d+)/::) {
5372 # Override default autodetected ncpus unless missing
5373 $ncpus = $1;
5375 my $string = $sshlogin_string;
5376 # An SSHLogin is always in the hostgroup of its $string-name
5377 $hostgroups{$string} = 1;
5378 @Global::hostgroups{keys %hostgroups} = values %hostgroups;
5379 my @unget = ();
5380 my $no_slash_string = $string;
5381 $no_slash_string =~ s/[^-a-z0-9:]/_/gi;
5382 return bless {
5383 'string' => $string,
5384 'jobs_running' => 0,
5385 'jobs_completed' => 0,
5386 'maxlength' => undef,
5387 'max_jobs_running' => undef,
5388 'orig_max_jobs_running' => undef,
5389 'ncpus' => $ncpus,
5390 'hostgroups' => \%hostgroups,
5391 'sshcommand' => undef,
5392 'serverlogin' => undef,
5393 'control_path_dir' => undef,
5394 'control_path' => undef,
5395 'time_to_login' => undef,
5396 'last_login_at' => undef,
5397 'loadavg_file' => $Global::cache_dir . "/tmp/sshlogin/" .
5398 $no_slash_string . "/loadavg",
5399 'loadavg' => undef,
5400 'last_loadavg_update' => 0,
5401 'swap_activity_file' => $Global::cache_dir . "/tmp/sshlogin/" .
5402 $no_slash_string . "/swap_activity",
5403 'swap_activity' => undef,
5404 }, ref($class) || $class;
5407 sub DESTROY {
5408 my $self = shift;
5409 # Remove temporary files if they are created.
5410 ::rm($self->{'loadavg_file'});
5411 ::rm($self->{'swap_activity_file'});
5414 sub string {
5415 my $self = shift;
5416 return $self->{'string'};
5419 sub jobs_running {
5420 my $self = shift;
5421 return ($self->{'jobs_running'} || "0");
5424 sub inc_jobs_running {
5425 my $self = shift;
5426 $self->{'jobs_running'}++;
5429 sub dec_jobs_running {
5430 my $self = shift;
5431 $self->{'jobs_running'}--;
5434 sub set_maxlength {
5435 my $self = shift;
5436 $self->{'maxlength'} = shift;
5439 sub maxlength {
5440 my $self = shift;
5441 return $self->{'maxlength'};
5444 sub jobs_completed {
5445 my $self = shift;
5446 return $self->{'jobs_completed'};
5449 sub in_hostgroups {
5450 # Input:
5451 # @hostgroups = the hostgroups to look for
5452 # Returns:
5453 # true if intersection of @hostgroups and the hostgroups of this
5454 # SSHLogin is non-empty
5455 my $self = shift;
5456 return grep { defined $self->{'hostgroups'}{$_} } @_;
5459 sub hostgroups {
5460 my $self = shift;
5461 return keys %{$self->{'hostgroups'}};
5464 sub inc_jobs_completed {
5465 my $self = shift;
5466 $self->{'jobs_completed'}++;
5467 $Global::total_completed++;
5470 sub set_max_jobs_running {
5471 my $self = shift;
5472 if(defined $self->{'max_jobs_running'}) {
5473 $Global::max_jobs_running -= $self->{'max_jobs_running'};
5475 $self->{'max_jobs_running'} = shift;
5476 if(defined $self->{'max_jobs_running'}) {
5477 # max_jobs_running could be resat if -j is a changed file
5478 $Global::max_jobs_running += $self->{'max_jobs_running'};
5480 # Initialize orig to the first non-zero value that comes around
5481 $self->{'orig_max_jobs_running'} ||= $self->{'max_jobs_running'};
5484 sub memfree {
5485 # Returns:
5486 # $memfree in bytes
5487 my $self = shift;
5488 $self->memfree_recompute();
5489 return (not defined $self->{'memfree'} or $self->{'memfree'})
5492 sub memfree_recompute {
5493 my $self = shift;
5494 my $script = memfreescript();
5496 # TODO add sshlogin and backgrounding
5497 # Run the script twice if it gives 0 (typically intermittent error)
5498 $self->{'memfree'} = ::qqx($script) || ::qqx($script);
5499 if(not $self->{'memfree'}) {
5500 ::die_bug("Less than 1 byte free");
5502 #::debug("mem","New free:",$self->{'memfree'}," ");
5506 my $script;
5508 sub memfreescript {
5509 # Returns:
5510 # shellscript for giving available memory in bytes
5511 if(not $script) {
5512 my %script_of = (
5513 # /proc/meminfo
5514 # MemFree: 7012 kB
5515 # Buffers: 19876 kB
5516 # Cached: 431192 kB
5517 # SwapCached: 0 kB
5518 "linux" =>
5519 q[ print 1024 * qx{ ].
5520 q[ awk '/^((Swap)?Cached|MemFree|Buffers):/ ].
5521 q[ { sum += \$2} END { print sum }' ].
5522 q[ /proc/meminfo } ],
5523 # $ vmstat 1 1
5524 # procs memory page faults cpu
5525 # r b w avm free re at pi po fr de sr in sy cs us sy id
5526 # 1 0 0 242793 389737 5 1 0 0 0 0 0 107 978 60 1 1 99
5527 "hpux" =>
5528 q[ print (((reverse `vmstat 1 1`)[0] ].
5529 q[ =~ /(?:\d+\D+){4}(\d+)/)[0]*1024) ],
5530 # $ vmstat 1 2
5531 # kthr memory page disk faults cpu
5532 # r b w swap free re mf pi po fr de sr s3 s4 -- -- in sy cs us sy id
5533 # 0 0 0 6496720 5170320 68 260 8 2 1 0 0 -0 3 0 0 309 1371 255 1 2 97
5534 # 0 0 0 6434088 5072656 7 15 8 0 0 0 0 0 261 0 0 1889 1899 3222 0 8 92
5536 # The second free value is correct
5537 "solaris" =>
5538 q[ print (((reverse `vmstat 1 2`)[0] ].
5539 q[ =~ /(?:\d+\D+){4}(\d+)/)[0]*1024) ],
5540 "freebsd" => q{
5541 for(qx{/sbin/sysctl -a}) {
5542 if (/^([^:]+):\s+(.+)\s*$/s) {
5543 $sysctl->{$1} = $2;
5546 print $sysctl->{"hw.pagesize"} *
5547 ($sysctl->{"vm.stats.vm.v_cache_count"}
5548 + $sysctl->{"vm.stats.vm.v_inactive_count"}
5549 + $sysctl->{"vm.stats.vm.v_free_count"});
5551 # Mach Virtual Memory Statistics: (page size of 4096 bytes)
5552 # Pages free: 198061.
5553 # Pages active: 159701.
5554 # Pages inactive: 47378.
5555 # Pages speculative: 29707.
5556 # Pages wired down: 89231.
5557 # "Translation faults": 928901425.
5558 # Pages copy-on-write: 156988239.
5559 # Pages zero filled: 271267894.
5560 # Pages reactivated: 48895.
5561 # Pageins: 1798068.
5562 # Pageouts: 257.
5563 # Object cache: 6603 hits of 1713223 lookups (0% hit rate)
5564 'darwin' =>
5565 q[ $vm = `vm_stat`;
5566 print (($vm =~ /page size of (\d+)/)[0] *
5567 (($vm =~ /Pages free:\s+(\d+)/)[0] +
5568 ($vm =~ /Pages inactive:\s+(\d+)/)[0]));
5571 my $perlscript = "";
5572 # Make a perl script that detects the OS ($^O) and runs
5573 # the appropriate command
5574 for my $os (keys %script_of) {
5575 $perlscript .= 'if($^O eq "'.$os.'") { '.$script_of{$os}.'}';
5577 $perlscript =~ s/[\t\n ]+/ /g;
5578 $script = "perl -e " . ::Q($perlscript);
5580 return $script;
5584 sub limit {
5585 # Returns:
5586 # 0 = Below limit. Start another job.
5587 # 1 = Over limit. Start no jobs.
5588 # 2 = Kill youngest job
5589 my $self = shift;
5591 if(not defined $self->{'limitscript'}) {
5592 my %limitscripts =
5593 ("io" => q!
5594 io() {
5595 limit=$1;
5596 io_file=$2;
5597 # Do the measurement in the background
5598 (tmp=$(tempfile);
5599 LANG=C iostat -x 1 2 > $tmp;
5600 mv $tmp $io_file) &
5601 perl -e '-e $ARGV[0] or exit(1);
5602 for(reverse <>) {
5603 /Device:/ and last;
5604 /(\S+)$/ and $max = $max > $1 ? $max : $1; }
5605 exit ($max < '$limit')' $io_file;
5607 export -f io;
5608 io %s %s
5610 "mem" => q!
5611 mem() {
5612 limit=$1;
5613 awk '/^((Swap)?Cached|MemFree|Buffers):/{ sum += $2}
5614 END {
5615 if (sum*1024 < '$limit'/2) { exit 2; }
5616 else { exit (sum*1024 < '$limit') }
5617 }' /proc/meminfo;
5619 export -f mem;
5620 mem %s;
5622 "load" => q!
5623 load() {
5624 limit=$1;
5625 ps ax -o state,command |
5626 grep -E '^[DOR].[^[]' |
5627 wc -l |
5628 perl -ne 'exit ('$limit' < $_)';
5630 export -f load;
5631 load %s;
5634 my ($cmd,@args) = split /\s+/,$opt::limit;
5635 if($limitscripts{$cmd}) {
5636 my $tmpfile = ::tmpname("parlmt");
5637 ++$Global::unlink{$tmpfile};
5638 $self->{'limitscript'} =
5639 ::spacefree(1, sprintf($limitscripts{$cmd},
5640 ::multiply_binary_prefix(@args),$tmpfile));
5641 } else {
5642 $self->{'limitscript'} = $opt::limit;
5646 my %env = %ENV;
5647 local %ENV = %env;
5648 $ENV{'SSHLOGIN'} = $self->string();
5649 system($Global::shell,"-c",$self->{'limitscript'});
5650 ::debug("limit","limit `".$self->{'limitscript'}."` result ".($?>>8)."\n");
5651 return $?>>8;
5655 sub swapping {
5656 my $self = shift;
5657 my $swapping = $self->swap_activity();
5658 return (not defined $swapping or $swapping)
5661 sub swap_activity {
5662 # If the currently known swap activity is too old:
5663 # Recompute a new one in the background
5664 # Returns:
5665 # last swap activity computed
5666 my $self = shift;
5667 # Should we update the swap_activity file?
5668 my $update_swap_activity_file = 0;
5669 if(-r $self->{'swap_activity_file'}) {
5670 open(my $swap_fh, "<", $self->{'swap_activity_file'}) ||
5671 ::die_bug("swap_activity_file-r");
5672 my $swap_out = <$swap_fh>;
5673 close $swap_fh;
5674 if($swap_out =~ /^(\d+)$/) {
5675 $self->{'swap_activity'} = $1;
5676 ::debug("swap", "New swap_activity: ", $self->{'swap_activity'});
5678 ::debug("swap", "Last update: ", $self->{'last_swap_activity_update'});
5679 if(time - $self->{'last_swap_activity_update'} > 10) {
5680 # last swap activity update was started 10 seconds ago
5681 ::debug("swap", "Older than 10 sec: ", $self->{'swap_activity_file'});
5682 $update_swap_activity_file = 1;
5684 } else {
5685 ::debug("swap", "No swap_activity file: ", $self->{'swap_activity_file'});
5686 $self->{'swap_activity'} = undef;
5687 $update_swap_activity_file = 1;
5689 if($update_swap_activity_file) {
5690 ::debug("swap", "Updating swap_activity file ", $self->{'swap_activity_file'});
5691 $self->{'last_swap_activity_update'} = time;
5692 my $dir = ::dirname($self->{'swap_activity_file'});
5693 -d $dir or eval { File::Path::mkpath($dir); };
5694 my $swap_activity;
5695 $swap_activity = swapactivityscript();
5696 if($self->{'string'} ne ":") {
5697 $swap_activity = $self->sshcommand() . " " . $self->serverlogin() . " " .
5698 ::Q($swap_activity);
5700 # Run swap_activity measuring.
5701 # As the command can take long to run if run remote
5702 # save it to a tmp file before moving it to the correct file
5703 my $file = $self->{'swap_activity_file'};
5704 my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".swp");
5705 ::debug("swap", "\n", $swap_activity, "\n");
5706 ::qqx("($swap_activity > $tmpfile && mv $tmpfile $file || rm $tmpfile &)");
5708 return $self->{'swap_activity'};
5712 my $script;
5714 sub swapactivityscript {
5715 # Returns:
5716 # shellscript for detecting swap activity
5718 # arguments for vmstat are OS dependant
5719 # swap_in and swap_out are in different columns depending on OS
5721 if(not $script) {
5722 my %vmstat = (
5723 # linux: $7*$8
5724 # $ vmstat 1 2
5725 # procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
5726 # r b swpd free buff cache si so bi bo in cs us sy id wa
5727 # 5 0 51208 1701096 198012 18857888 0 0 37 153 28 19 56 11 33 1
5728 # 3 0 51208 1701288 198012 18857972 0 0 0 0 3638 10412 15 3 82 0
5729 'linux' => ['vmstat 1 2 | tail -n1', '$7*$8'],
5731 # solaris: $6*$7
5732 # $ vmstat -S 1 2
5733 # kthr memory page disk faults cpu
5734 # r b w swap free si so pi po fr de sr s3 s4 -- -- in sy cs us sy id
5735 # 0 0 0 4628952 3208408 0 0 3 1 1 0 0 -0 2 0 0 263 613 246 1 2 97
5736 # 0 0 0 4552504 3166360 0 0 0 0 0 0 0 0 0 0 0 246 213 240 1 1 98
5737 'solaris' => ['vmstat -S 1 2 | tail -1', '$6*$7'],
5739 # darwin (macosx): $21*$22
5740 # $ vm_stat -c 2 1
5741 # Mach Virtual Memory Statistics: (page size of 4096 bytes)
5742 # free active specul inactive throttle wired prgable faults copy 0fill reactive purged file-backed anonymous cmprssed cmprssor dcomprs comprs pageins pageout swapins swapouts
5743 # 346306 829050 74871 606027 0 240231 90367 544858K 62343596 270837K 14178 415070 570102 939846 356 370 116 922 4019813 4 0 0
5744 # 345740 830383 74875 606031 0 239234 90369 2696 359 553 0 0 570110 941179 356 370 0 0 0 0 0 0
5745 'darwin' => ['vm_stat -c 2 1 | tail -n1', '$21*$22'],
5747 # ultrix: $12*$13
5748 # $ vmstat -S 1 2
5749 # procs faults cpu memory page disk
5750 # r b w in sy cs us sy id avm fre si so pi po fr de sr s0
5751 # 1 0 0 4 23 2 3 0 97 7743 217k 0 0 0 0 0 0 0 0
5752 # 1 0 0 6 40 8 0 1 99 7743 217k 0 0 3 0 0 0 0 0
5753 'ultrix' => ['vmstat -S 1 2 | tail -1', '$12*$13'],
5755 # aix: $6*$7
5756 # $ vmstat 1 2
5757 # System configuration: lcpu=1 mem=2048MB
5759 # kthr memory page faults cpu
5760 # ----- ----------- ------------------------ ------------ -----------
5761 # r b avm fre re pi po fr sr cy in sy cs us sy id wa
5762 # 0 0 333933 241803 0 0 0 0 0 0 10 143 90 0 0 99 0
5763 # 0 0 334125 241569 0 0 0 0 0 0 37 5368 184 0 9 86 5
5764 'aix' => ['vmstat 1 2 | tail -n1', '$6*$7'],
5766 # freebsd: $8*$9
5767 # $ vmstat -H 1 2
5768 # procs memory page disks faults cpu
5769 # r b w avm fre flt re pi po fr sr ad0 ad1 in sy cs us sy id
5770 # 1 0 0 596716 19560 32 0 0 0 33 8 0 0 11 220 277 0 0 99
5771 # 0 0 0 596716 19560 2 0 0 0 0 0 0 0 11 144 263 0 1 99
5772 'freebsd' => ['vmstat -H 1 2 | tail -n1', '$8*$9'],
5774 # mirbsd: $8*$9
5775 # $ vmstat 1 2
5776 # procs memory page disks traps cpu
5777 # r b w avm fre flt re pi po fr sr wd0 cd0 int sys cs us sy id
5778 # 0 0 0 25776 164968 34 0 0 0 0 0 0 0 230 259 38 4 0 96
5779 # 0 0 0 25776 164968 24 0 0 0 0 0 0 0 237 275 37 0 0 100
5780 'mirbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'],
5782 # netbsd: $7*$8
5783 # $ vmstat 1 2
5784 # procs memory page disks faults cpu
5785 # r b avm fre flt re pi po fr sr w0 w1 in sy cs us sy id
5786 # 0 0 138452 6012 54 0 0 0 1 2 3 0 4 100 23 0 0 100
5787 # 0 0 138456 6008 1 0 0 0 0 0 0 0 7 26 19 0 0 100
5788 'netbsd' => ['vmstat 1 2 | tail -n1', '$7*$8'],
5790 # openbsd: $8*$9
5791 # $ vmstat 1 2
5792 # procs memory page disks traps cpu
5793 # r b w avm fre flt re pi po fr sr wd0 wd1 int sys cs us sy id
5794 # 0 0 0 76596 109944 73 0 0 0 0 0 0 1 5 259 22 0 1 99
5795 # 0 0 0 76604 109936 24 0 0 0 0 0 0 0 7 114 20 0 1 99
5796 'openbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'],
5798 # hpux: $8*$9
5799 # $ vmstat 1 2
5800 # procs memory page faults cpu
5801 # r b w avm free re at pi po fr de sr in sy cs us sy id
5802 # 1 0 0 247211 216476 4 1 0 0 0 0 0 102 73005 54 6 11 83
5803 # 1 0 0 247211 216421 43 9 0 0 0 0 0 144 1675 96 25269512791222387000 25269512791222387000 105
5804 'hpux' => ['vmstat 1 2 | tail -n1', '$8*$9'],
5806 # dec_osf (tru64): $11*$12
5807 # $ vmstat 1 2
5808 # Virtual Memory Statistics: (pagesize = 8192)
5809 # procs memory pages intr cpu
5810 # r w u act free wire fault cow zero react pin pout in sy cs us sy id
5811 # 3 181 36 51K 1895 8696 348M 59M 122M 259 79M 0 5 218 302 4 1 94
5812 # 3 181 36 51K 1893 8696 3 15 21 0 28 0 4 81 321 1 1 98
5813 'dec_osf' => ['vmstat 1 2 | tail -n1', '$11*$12'],
5815 # gnu (hurd): $7*$8
5816 # $ vmstat -k 1 2
5817 # (pagesize: 4, size: 512288, swap size: 894972)
5818 # free actv inact wired zeroed react pgins pgouts pfaults cowpfs hrat caobj cache swfree
5819 # 371940 30844 89228 20276 298348 0 48192 19016 756105 99808 98% 876 20628 894972
5820 # 371940 30844 89228 20276 +0 +0 +0 +0 +42 +2 98% 876 20628 894972
5821 'gnu' => ['vmstat -k 1 2 | tail -n1', '$7*$8'],
5823 # -nto (qnx has no swap)
5824 #-irix
5825 #-svr5 (scosysv)
5827 my $perlscript = "";
5828 # Make a perl script that detects the OS ($^O) and runs
5829 # the appropriate vmstat command
5830 for my $os (keys %vmstat) {
5831 $vmstat{$os}[1] =~ s/\$/\\\\\\\$/g; # $ => \\\$
5832 $perlscript .= 'if($^O eq "'.$os.'") { print `'.$vmstat{$os}[0].' | awk "{print ' .
5833 $vmstat{$os}[1] . '}"` }';
5835 $script = "perl -e " . ::Q($perlscript);
5837 return $script;
5841 sub too_fast_remote_login {
5842 my $self = shift;
5843 if($self->{'last_login_at'} and $self->{'time_to_login'}) {
5844 # sshd normally allows 10 simultaneous logins
5845 # A login takes time_to_login
5846 # So time_to_login/5 should be safe
5847 # If now <= last_login + time_to_login/5: Then it is too soon.
5848 my $too_fast = (::now() <= $self->{'last_login_at'}
5849 + $self->{'time_to_login'}/5);
5850 ::debug("run", "Too fast? $too_fast ");
5851 return $too_fast;
5852 } else {
5853 # No logins so far (or time_to_login not computed): it is not too fast
5854 return 0;
5858 sub last_login_at {
5859 my $self = shift;
5860 return $self->{'last_login_at'};
5863 sub set_last_login_at {
5864 my $self = shift;
5865 $self->{'last_login_at'} = shift;
5868 sub loadavg_too_high {
5869 my $self = shift;
5870 my $loadavg = $self->loadavg();
5871 return (not defined $loadavg or
5872 $loadavg > $self->max_loadavg());
5876 my $cmd;
5877 sub loadavg_cmd {
5878 if(not $cmd) {
5879 # aix => "ps -ae -o state,command" # state wrong
5880 # bsd => "ps ax -o state,command"
5881 # sysv => "ps -ef -o s -o comm"
5882 # cygwin => perl -ne 'close STDERR; /Name/ and print"\n"; \
5883 # /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status |
5884 # awk '{print $2,$1}'
5885 # dec_osf => bsd
5886 # dragonfly => bsd
5887 # freebsd => bsd
5888 # gnu => bsd
5889 # hpux => ps -el|awk '{print $2,$14,$15}'
5890 # irix => ps -ef -o state -o comm
5891 # linux => bsd
5892 # minix => ps el|awk '{print \$1,\$11}'
5893 # mirbsd => bsd
5894 # netbsd => bsd
5895 # openbsd => bsd
5896 # solaris => sysv
5897 # svr5 => sysv
5898 # ultrix => ps -ax | awk '{print $3,$5}'
5899 # unixware => ps -el|awk '{print $2,$14,$15}'
5900 my $ps = ::spacefree(1,q{
5901 $sysv="ps -ef -o s -o comm";
5902 $sysv2="ps -ef -o state -o comm";
5903 $bsd="ps ax -o state,command";
5904 # Treat threads as processes
5905 $bsd2="ps axH -o state,command";
5906 $psel="ps -el|awk '{ print \$2,\$14,\$15 }'";
5907 $cygwin=q{ perl -ne 'close STDERR; /Name/ and print"\n";
5908 /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status |
5909 awk '{print $2,$1}' };
5910 $dummy="echo S COMMAND;echo R dummy";
5911 %ps=(
5912 # TODO Find better code for AIX
5913 'aix' => "uptime",
5914 'cygwin' => $cygwin,
5915 'darwin' => $bsd,
5916 'dec_osf' => $sysv2,
5917 'dragonfly' => $bsd,
5918 'freebsd' => $bsd2,
5919 'gnu' => $bsd,
5920 'hpux' => $psel,
5921 'irix' => $sysv2,
5922 'linux' => $bsd2,
5923 'minix' => "ps el|awk '{print \$1,\$11}'",
5924 'mirbsd' => $bsd,
5925 'msys' => $cygwin,
5926 'MSWin32' => $sysv,
5927 'netbsd' => $bsd,
5928 'nto' => $dummy,
5929 'openbsd' => $bsd,
5930 'solaris' => $sysv,
5931 'svr5' => $psel,
5932 'ultrix' => "ps -ax | awk '{print \$3,\$5}'",
5934 print `$ps{$^O}`;
5936 # The command is too long for csh, so base64_wrap the command
5937 $cmd = Job::base64_wrap($ps);
5939 return $cmd;
5944 sub loadavg {
5945 # If the currently know loadavg is too old:
5946 # Recompute a new one in the background
5947 # The load average is computed as the number of processes waiting for disk
5948 # or CPU right now. So it is the server load this instant and not averaged over
5949 # several minutes. This is needed so GNU Parallel will at most start one job
5950 # that will push the load over the limit.
5952 # Returns:
5953 # $last_loadavg = last load average computed (undef if none)
5954 my $self = shift;
5955 # Should we update the loadavg file?
5956 my $update_loadavg_file = 0;
5957 if(open(my $load_fh, "<", $self->{'loadavg_file'})) {
5958 local $/; # $/ = undef => slurp whole file
5959 my $load_out = <$load_fh>;
5960 close $load_fh;
5961 if($load_out =~ /\S/) {
5962 # Content can be empty if ~/ is on NFS
5963 # due to reading being non-atomic.
5965 # Count lines starting with D,O,R but command does not start with [
5966 my $load =()= ($load_out=~/(^\s?[DOR]\S* +(?=[^\[])\S)/gm);
5967 if($load > 0) {
5968 # load is overestimated by 1
5969 $self->{'loadavg'} = $load - 1;
5970 ::debug("load", "New loadavg: ", $self->{'loadavg'},"\n");
5971 } elsif ($load_out=~/average: (\d+.\d+)/) {
5972 # AIX does not support instant load average
5973 # 04:11AM up 21 days, 12:55, 1 user, load average: 1.85, 1.57, 1.55
5974 $self->{'loadavg'} = $1;
5975 } else {
5976 ::die_bug("loadavg_invalid_content: " .
5977 $self->{'loadavg_file'} . "\n$load_out");
5980 $update_loadavg_file = 1;
5981 } else {
5982 ::debug("load", "No loadavg file: ", $self->{'loadavg_file'});
5983 $self->{'loadavg'} = undef;
5984 $update_loadavg_file = 1;
5986 if($update_loadavg_file) {
5987 ::debug("load", "Updating loadavg file", $self->{'loadavg_file'}, "\n");
5988 $self->{'last_loadavg_update'} = time;
5989 my $dir = ::dirname($self->{'swap_activity_file'});
5990 -d $dir or eval { File::Path::mkpath($dir); };
5991 -w $dir or ::die_bug("Cannot write to $dir");
5992 my $cmd = "";
5993 if($self->{'string'} ne ":") {
5994 $cmd = $self->sshcommand() . " " . $self->serverlogin() . " " .
5995 ::Q(loadavg_cmd());
5996 } else {
5997 $cmd .= loadavg_cmd();
5999 # As the command can take long to run if run remote
6000 # save it to a tmp file before moving it to the correct file
6001 ::debug("load", "Cmd: ", $cmd,"\n");
6002 my $file = $self->{'loadavg_file'};
6003 # tmpfile on same filesystem as $file
6004 my $tmpfile = $file.$$;
6005 ::qqx("($cmd > $tmpfile && mv $tmpfile $file || rm $tmpfile & )");
6007 return $self->{'loadavg'};
6010 sub max_loadavg {
6011 my $self = shift;
6012 # If --load is a file it might be changed
6013 if($Global::max_load_file) {
6014 my $mtime = (stat($Global::max_load_file))[9];
6015 if($mtime > $Global::max_load_file_last_mod) {
6016 $Global::max_load_file_last_mod = $mtime;
6017 for my $sshlogin (values %Global::host) {
6018 $sshlogin->set_max_loadavg(undef);
6022 if(not defined $self->{'max_loadavg'}) {
6023 $self->{'max_loadavg'} =
6024 $self->compute_max_loadavg($opt::load);
6026 ::debug("load", "max_loadavg: ", $self->string(), " ", $self->{'max_loadavg'});
6027 return $self->{'max_loadavg'};
6030 sub set_max_loadavg {
6031 my $self = shift;
6032 $self->{'max_loadavg'} = shift;
6035 sub compute_max_loadavg {
6036 # Parse the max loadaverage that the user asked for using --load
6037 # Returns:
6038 # max loadaverage
6039 my $self = shift;
6040 my $loadspec = shift;
6041 my $load;
6042 if(defined $loadspec) {
6043 if($loadspec =~ /^\+(\d+)$/) {
6044 # E.g. --load +2
6045 my $j = $1;
6046 $load =
6047 $self->ncpus() + $j;
6048 } elsif ($loadspec =~ /^-(\d+)$/) {
6049 # E.g. --load -2
6050 my $j = $1;
6051 $load =
6052 $self->ncpus() - $j;
6053 } elsif ($loadspec =~ /^(\d+)\%$/) {
6054 my $j = $1;
6055 $load =
6056 $self->ncpus() * $j / 100;
6057 } elsif ($loadspec =~ /^(\d+(\.\d+)?)$/) {
6058 $load = $1;
6059 } elsif (-f $loadspec) {
6060 $Global::max_load_file = $loadspec;
6061 $Global::max_load_file_last_mod = (stat($Global::max_load_file))[9];
6062 if(open(my $in_fh, "<", $Global::max_load_file)) {
6063 my $opt_load_file = join("",<$in_fh>);
6064 close $in_fh;
6065 $load = $self->compute_max_loadavg($opt_load_file);
6066 } else {
6067 ::error("Cannot open $loadspec.");
6068 ::wait_and_exit(255);
6070 } else {
6071 ::error("Parsing of --load failed.");
6072 ::die_usage();
6074 if($load < 0.01) {
6075 $load = 0.01;
6078 return $load;
6081 sub time_to_login {
6082 my $self = shift;
6083 return $self->{'time_to_login'};
6086 sub set_time_to_login {
6087 my $self = shift;
6088 $self->{'time_to_login'} = shift;
6091 sub max_jobs_running {
6092 my $self = shift;
6093 if(not defined $self->{'max_jobs_running'}) {
6094 my $nproc = $self->compute_number_of_processes($opt::jobs);
6095 $self->set_max_jobs_running($nproc);
6097 return $self->{'max_jobs_running'};
6100 sub orig_max_jobs_running {
6101 my $self = shift;
6102 return $self->{'orig_max_jobs_running'};
6105 sub compute_number_of_processes {
6106 # Number of processes wanted and limited by system resources
6107 # Returns:
6108 # Number of processes
6109 my $self = shift;
6110 my $opt_P = shift;
6111 my $wanted_processes = $self->user_requested_processes($opt_P);
6112 if(not defined $wanted_processes) {
6113 $wanted_processes = $Global::default_simultaneous_sshlogins;
6115 ::debug("load", "Wanted procs: $wanted_processes\n");
6116 my $system_limit =
6117 $self->processes_available_by_system_limit($wanted_processes);
6118 ::debug("load", "Limited to procs: $system_limit\n");
6119 return $system_limit;
6123 my @children;
6124 my $max_system_proc_reached;
6125 my $more_filehandles;
6126 my %fh;
6127 my $tmpfhname;
6128 my $count_jobs_already_read;
6129 my @jobs;
6130 my $job;
6131 my @args;
6132 my $arg;
6134 sub reserve_filehandles {
6135 # Reserves filehandle
6136 my $n = shift;
6137 for (1..$n) {
6138 $more_filehandles &&= open($fh{$tmpfhname++}, "<", "/dev/null");
6142 sub reserve_process {
6143 # Spawn a dummy process
6144 my $child;
6145 if($child = fork()) {
6146 push @children, $child;
6147 $Global::unkilled_children{$child} = 1;
6148 } elsif(defined $child) {
6149 # This is the child
6150 # The child takes one process slot
6151 # It will be killed later
6152 $SIG{'TERM'} = $Global::original_sig{'TERM'};
6153 if($^O eq "cygwin" or $^O eq "msys" or $^O eq "nto") {
6154 # The exec does not work on Cygwin and QNX
6155 sleep 10101010;
6156 } else {
6157 # 'exec sleep' takes less RAM than sleeping in perl
6158 exec 'sleep', 10101;
6160 exit(0);
6161 } else {
6162 # Failed to spawn
6163 $max_system_proc_reached = 1;
6167 sub get_args_or_jobs {
6168 # Get an arg or a job (depending on mode)
6169 if($Global::semaphore or ($opt::pipe and not $opt::tee)) {
6170 # Skip: No need to get args
6171 return 1;
6172 } elsif(defined $opt::retries and $count_jobs_already_read) {
6173 # For retries we may need to run all jobs on this sshlogin
6174 # so include the already read jobs for this sshlogin
6175 $count_jobs_already_read--;
6176 return 1;
6177 } else {
6178 if($opt::X or $opt::m) {
6179 # The arguments may have to be re-spread over several jobslots
6180 # So pessimistically only read one arg per jobslot
6181 # instead of a full commandline
6182 if($Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->empty()) {
6183 if($Global::JobQueue->empty()) {
6184 return 0;
6185 } else {
6186 $job = $Global::JobQueue->get();
6187 push(@jobs, $job);
6188 return 1;
6190 } else {
6191 $arg = $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get();
6192 push(@args, $arg);
6193 return 1;
6195 } else {
6196 # If there are no more command lines, then we have a process
6197 # per command line, so no need to go further
6198 if($Global::JobQueue->empty()) {
6199 return 0;
6200 } else {
6201 $job = $Global::JobQueue->get();
6202 # Replacement must happen here due to seq()
6203 $job and $job->replaced();
6204 push(@jobs, $job);
6205 return 1;
6211 sub cleanup {
6212 # Cleanup: Close the files
6213 for (values %fh) { close $_ }
6214 # Cleanup: Kill the children
6215 for my $pid (@children) {
6216 kill 9, $pid;
6217 waitpid($pid,0);
6218 delete $Global::unkilled_children{$pid};
6220 # Cleanup: Unget the command_lines or the @args
6221 $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->unget(@args);
6222 @args = ();
6223 $Global::JobQueue->unget(@jobs);
6224 @jobs = ();
6227 sub processes_available_by_system_limit {
6228 # If the wanted number of processes is bigger than the system limits:
6229 # Limit them to the system limits
6230 # Limits are: File handles, number of input lines, processes,
6231 # and taking > 1 second to spawn 10 extra processes
6232 # Returns:
6233 # Number of processes
6234 my $self = shift;
6235 my $wanted_processes = shift;
6236 my $system_limit = 0;
6237 my $slow_spawning_warning_printed = 0;
6238 my $time = time;
6239 $more_filehandles = 1;
6240 $tmpfhname = "TmpFhNamE";
6242 # perl uses 7 filehandles for something?
6243 # parallel uses 1 for memory_usage
6244 # parallel uses 4 for ?
6245 reserve_filehandles(12);
6246 # Two processes for load avg and ?
6247 reserve_process();
6248 reserve_process();
6250 # For --retries count also jobs already run
6251 $count_jobs_already_read = $Global::JobQueue->next_seq();
6252 my $wait_time_for_getting_args = 0;
6253 my $start_time = time;
6254 while(1) {
6255 $system_limit >= $wanted_processes and last;
6256 not $more_filehandles and last;
6257 $max_system_proc_reached and last;
6259 my $before_getting_arg = time;
6260 if(!$Global::dummy_jobs) {
6261 get_args_or_jobs() or last;
6263 $wait_time_for_getting_args += time - $before_getting_arg;
6264 $system_limit++;
6266 # Every simultaneous process uses 2 filehandles to write to
6267 # and 2 filehandles to read from
6268 reserve_filehandles(4);
6270 # System process limit
6271 reserve_process();
6273 my $forktime = time - $time - $wait_time_for_getting_args;
6274 ::debug("run", "Time to fork $system_limit procs: $wait_time_for_getting_args ",
6275 $forktime,
6276 " (processes so far: ", $system_limit,")\n");
6277 if($system_limit > 10 and
6278 $forktime > 1 and
6279 $forktime > $system_limit * 0.01
6280 and not $slow_spawning_warning_printed) {
6281 # It took more than 0.01 second to fork a processes on avg.
6282 # Give the user a warning. He can press Ctrl-C if this
6283 # sucks.
6284 ::warning("Starting $system_limit processes took > $forktime sec.",
6285 "Consider adjusting -j. Press CTRL-C to stop.");
6286 $slow_spawning_warning_printed = 1;
6289 cleanup();
6291 if($system_limit < $wanted_processes) {
6292 # The system_limit is less than the wanted_processes
6293 if($system_limit < 1 and not $Global::JobQueue->empty()) {
6294 ::warning("Cannot spawn any jobs. ".
6295 "Raising ulimit -u or 'nproc' in /etc/security/limits.conf",
6296 "or /proc/sys/kernel/pid_max may help.");
6297 ::wait_and_exit(255);
6299 if(not $more_filehandles) {
6300 ::warning("Only enough file handles to run ".
6301 $system_limit. " jobs in parallel.",
6302 "Running 'parallel -j0 -N $system_limit --pipe parallel -j0' or",
6303 "raising 'ulimit -n' or 'nofile' in /etc/security/limits.conf",
6304 "or /proc/sys/fs/file-max may help.");
6306 if($max_system_proc_reached) {
6307 ::warning("Only enough available processes to run ".
6308 $system_limit. " jobs in parallel.",
6309 "Raising ulimit -u or /etc/security/limits.conf ",
6310 "or /proc/sys/kernel/pid_max may help.");
6313 if($] == 5.008008 and $system_limit > 1000) {
6314 # https://savannah.gnu.org/bugs/?36942
6315 $system_limit = 1000;
6317 if($Global::JobQueue->empty()) {
6318 $system_limit ||= 1;
6320 if($self->string() ne ":" and
6321 $system_limit > $Global::default_simultaneous_sshlogins) {
6322 $system_limit =
6323 $self->simultaneous_sshlogin_limit($system_limit);
6325 return $system_limit;
6329 sub simultaneous_sshlogin_limit {
6330 # Test by logging in wanted number of times simultaneously
6331 # Returns:
6332 # min($wanted_processes,$working_simultaneous_ssh_logins-1)
6333 my $self = shift;
6334 my $wanted_processes = shift;
6335 if($self->{'time_to_login'}) {
6336 return $wanted_processes;
6339 # Try twice because it guesses wrong sometimes
6340 # Choose the minimal
6341 my $ssh_limit =
6342 ::min($self->simultaneous_sshlogin($wanted_processes),
6343 $self->simultaneous_sshlogin($wanted_processes));
6344 if($ssh_limit < $wanted_processes) {
6345 my $serverlogin = $self->serverlogin();
6346 ::warning("ssh to $serverlogin only allows ".
6347 "for $ssh_limit simultaneous logins.",
6348 "You may raise this by changing",
6349 "/etc/ssh/sshd_config:MaxStartups and MaxSessions on $serverlogin.",
6350 "You can also try --sshdelay 0.1",
6351 "Using only ".($ssh_limit-1)." connections ".
6352 "to avoid race conditions.");
6353 # Race condition can cause problem if using all sshs.
6354 if($ssh_limit > 1) { $ssh_limit -= 1; }
6356 return $ssh_limit;
6359 sub simultaneous_sshlogin {
6360 # Using $sshlogin try to see if we can do $wanted_processes
6361 # simultaneous logins
6362 # (ssh host echo simul-login & ssh host echo simul-login & ...) |
6363 # grep simul|wc -l
6364 # Input:
6365 # $wanted_processes = Try for this many logins in parallel
6366 # Returns:
6367 # $ssh_limit = Number of succesful parallel logins
6368 local $/ = "\n";
6369 my $self = shift;
6370 my $wanted_processes = shift;
6371 my $sshcmd = $self->sshcommand();
6372 my $serverlogin = $self->serverlogin();
6373 my $sshdelay = $opt::sshdelay ? "sleep $opt::sshdelay;" : "";
6374 # TODO sh -c wrapper to work for csh
6375 my $cmd = ("$sshdelay$sshcmd $serverlogin -- ".
6376 "echo simultaneouslogin </dev/null 2>&1 &")x$wanted_processes;
6377 ::debug("init", "Trying $wanted_processes logins at $serverlogin\n");
6378 open (my $simul_fh, "-|", "($cmd)|grep simultaneouslogin | wc -l") or
6379 ::die_bug("simultaneouslogin");
6380 my $ssh_limit = <$simul_fh>;
6381 close $simul_fh;
6382 chomp $ssh_limit;
6383 return $ssh_limit;
6386 sub set_ncpus {
6387 my $self = shift;
6388 $self->{'ncpus'} = shift;
6391 sub user_requested_processes {
6392 # Parse the number of processes that the user asked for using -j
6393 # Input:
6394 # $opt_P = string formatted as for -P
6395 # Returns:
6396 # $processes = the number of processes to run on this sshlogin
6397 my $self = shift;
6398 my $opt_P = shift;
6399 my $processes;
6400 if(defined $opt_P) {
6401 if($opt_P =~ /^\+(\d+)$/) {
6402 # E.g. -P +2
6403 my $j = $1;
6404 $processes =
6405 $self->ncpus() + $j;
6406 } elsif ($opt_P =~ /^-(\d+)$/) {
6407 # E.g. -P -2
6408 my $j = $1;
6409 $processes =
6410 $self->ncpus() - $j;
6411 } elsif ($opt_P =~ /^(\d+(\.\d+)?)\%$/) {
6412 # E.g. -P 10.5%
6413 my $j = $1;
6414 $processes =
6415 $self->ncpus() * $j / 100;
6416 } elsif ($opt_P =~ /^(\d+)$/) {
6417 $processes = $1;
6418 if($processes == 0) {
6419 # -P 0 = infinity (or at least close)
6420 $processes = $Global::infinity;
6422 } elsif (-f $opt_P) {
6423 $Global::max_procs_file = $opt_P;
6424 if(open(my $in_fh, "<", $Global::max_procs_file)) {
6425 my $opt_P_file = join("",<$in_fh>);
6426 close $in_fh;
6427 $processes = $self->user_requested_processes($opt_P_file);
6428 } else {
6429 ::error("Cannot open $opt_P.");
6430 ::wait_and_exit(255);
6432 } else {
6433 ::error("Parsing of --jobs/-j/--max-procs/-P failed.");
6434 ::die_usage();
6436 $processes = ::ceil($processes);
6438 return $processes;
6441 sub ncpus {
6442 # Number of CPU threads
6443 # --use_sockets_instead_of_threads = count socket instead
6444 # --use_cores_instead_of_threads = count physical cores instead
6445 # Returns:
6446 # $ncpus = number of cpu (threads) on this sshlogin
6447 local $/ = "\n";
6448 my $self = shift;
6449 if(not defined $self->{'ncpus'}) {
6450 my $sshcmd = $self->sshcommand();
6451 my $serverlogin = $self->serverlogin();
6452 if($serverlogin eq ":") {
6453 if($opt::use_sockets_instead_of_threads) {
6454 $self->{'ncpus'} = socket_core_thread()->{'sockets'};
6455 } elsif($opt::use_cores_instead_of_threads) {
6456 $self->{'ncpus'} = socket_core_thread()->{'cores'};
6457 } else {
6458 $self->{'ncpus'} = socket_core_thread()->{'threads'};
6460 } else {
6461 my $ncpu;
6462 if($opt::use_sockets_instead_of_threads
6464 $opt::use_cpus_instead_of_cores) {
6465 $ncpu =
6466 ::qqx("echo|$sshcmd $serverlogin -- parallel --number-of-sockets");
6467 } elsif($opt::use_cores_instead_of_threads) {
6468 $ncpu =
6469 ::qqx("echo|$sshcmd $serverlogin -- parallel --number-of-cores");
6470 } else {
6471 $ncpu =
6472 ::qqx("echo|$sshcmd $serverlogin -- parallel --number-of-threads");
6474 chomp $ncpu;
6475 if($ncpu =~ /^\s*[0-9]+\s*$/s) {
6476 $self->{'ncpus'} = $ncpu;
6477 } else {
6478 ::warning("Could not figure out ".
6479 "number of cpus on $serverlogin ($ncpu). Using 1.");
6480 $self->{'ncpus'} = 1;
6484 return $self->{'ncpus'};
6488 sub nproc {
6489 # Returns:
6490 # Number of threads using `nproc`
6491 my $no_of_threads = ::qqx("nproc");
6492 chomp $no_of_threads;
6493 return $no_of_threads;
6496 sub no_of_sockets() {
6497 return socket_core_thread()->{'sockets'};
6500 sub no_of_cores() {
6501 return socket_core_thread()->{'cores'};
6504 sub no_of_threads() {
6505 return socket_core_thread()->{'threads'};
6508 sub socket_core_thread() {
6509 # Returns:
6511 # 'sockets' => #sockets = number of socket with CPU present
6512 # 'cores' => #cores = number of physical cores
6513 # 'threads' => #threads = number of compute cores (hyperthreading)
6514 # 'active' => #taskset_threads = number of taskset limited cores
6516 my $cpu;
6518 if ($^O eq 'linux') {
6519 $cpu = sct_gnu_linux();
6520 } elsif ($^O eq 'freebsd') {
6521 $cpu = sct_freebsd();
6522 } elsif ($^O eq 'netbsd') {
6523 $cpu = sct_netbsd();
6524 } elsif ($^O eq 'openbsd') {
6525 $cpu = sct_openbsd();
6526 } elsif ($^O eq 'gnu') {
6527 $cpu = sct_hurd();
6528 } elsif ($^O eq 'darwin') {
6529 $cpu = sct_darwin();
6530 } elsif ($^O eq 'solaris') {
6531 $cpu = sct_solaris();
6532 } elsif ($^O eq 'aix') {
6533 $cpu = sct_aix();
6534 } elsif ($^O eq 'hpux') {
6535 $cpu = sct_hpux();
6536 } elsif ($^O eq 'nto') {
6537 $cpu = sct_qnx();
6538 } elsif ($^O eq 'svr5') {
6539 $cpu = sct_openserver();
6540 } elsif ($^O eq 'irix') {
6541 $cpu = sct_irix();
6542 } elsif ($^O eq 'dec_osf') {
6543 $cpu = sct_tru64();
6544 } else {
6545 # Try all methods until you find something that works
6546 $cpu = (sct_gnu_linux()
6547 || sct_freebsd()
6548 || sct_netbsd()
6549 || sct_openbsd()
6550 || sct_hurd()
6551 || sct_darwin()
6552 || sct_solaris()
6553 || sct_aix()
6554 || sct_hpux()
6555 || sct_qnx()
6556 || sct_openserver()
6557 || sct_irix()
6558 || sct_tru64()
6561 if(not $cpu) {
6562 my $nproc = nproc();
6563 if($nproc) {
6564 $cpu->{'sockets'} =
6565 $cpu->{'cores'} =
6566 $cpu->{'threads'} =
6567 $cpu->{'active'} =
6568 $nproc;
6571 if(not $cpu) {
6572 ::warning("Cannot figure out number of cpus. Using 1.");
6573 $cpu->{'sockets'} =
6574 $cpu->{'cores'} =
6575 $cpu->{'threads'} =
6576 $cpu->{'active'} =
6580 # Choose minimum of active and actual
6581 my $mincpu;
6582 $mincpu->{'sockets'} = ::min($cpu->{'sockets'},$cpu->{'active'});
6583 $mincpu->{'cores'} = ::min($cpu->{'cores'},$cpu->{'active'});
6584 $mincpu->{'threads'} = ::min($cpu->{'threads'},$cpu->{'active'});
6585 return $mincpu;
6588 sub sct_gnu_linux() {
6589 # Returns:
6590 # { 'sockets' => #sockets
6591 # 'cores' => #cores
6592 # 'threads' => #threads
6593 # 'active' => #taskset_threads }
6594 my $cpu;
6595 local $/ = "\n"; # If delimiter is set, then $/ will be wrong
6596 if($ENV{'PARALLEL_CPUINFO'} or -e "/proc/cpuinfo") {
6597 $cpu->{'sockets'} = 0;
6598 $cpu->{'cores'} = 0;
6599 $cpu->{'threads'} = 0;
6600 my %seen;
6601 my %phy_seen;
6602 my @cpuinfo;
6603 my $physicalid;
6604 if(open(my $in_fh, "<", "/proc/cpuinfo")) {
6605 @cpuinfo = <$in_fh>;
6606 close $in_fh;
6608 if($ENV{'PARALLEL_CPUINFO'}) {
6609 # Use CPUINFO from environment - used for testing only
6610 @cpuinfo = split/(?<=\n)/,$ENV{'PARALLEL_CPUINFO'};
6612 for(@cpuinfo) {
6613 if(/^physical id.*[:](.*)/) {
6614 $physicalid=$1;
6615 if(not $phy_seen{$1}++) {
6616 $cpu->{'sockets'}++;
6619 if(/^core id.*[:](.*)/ and not $seen{$physicalid,$1}++) {
6620 $cpu->{'cores'}++;
6622 /^processor.*[:]/i and $cpu->{'threads'}++;
6624 $cpu->{'sockets'} ||= 1;
6625 $cpu->{'cores'} ||= $cpu->{'threads'};
6627 if(-e "/proc/self/status" and not $ENV{'PARALLEL_CPUINFO'}) {
6628 # if 'taskset' is used to limit number of threads
6629 if(open(my $in_fh, "<", "/proc/self/status")) {
6630 while(<$in_fh>) {
6631 if(/^Cpus_allowed:\s*(\S+)/) {
6632 my $a = $1;
6633 $a =~ tr/,//d;
6634 $cpu->{'active'} = unpack ("%32b*", pack ("H*",$a));
6637 close $in_fh;
6640 if(grep { /\d/ } values %$cpu) {
6641 return $cpu;
6642 } else {
6643 return undef;
6647 sub sct_freebsd() {
6648 # Returns:
6649 # { 'sockets' => #sockets
6650 # 'cores' => #cores
6651 # 'threads' => #threads
6652 # 'active' => #taskset_threads }
6653 local $/ = "\n";
6654 my $cpu;
6655 $cpu->{'cores'} = (::qqx(qq{ sysctl -a dev.cpu | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' })
6657 ::qqx(qq{ sysctl hw.ncpu | awk '{ print \$2 }' }));
6658 $cpu->{'cores'} and chomp $cpu->{'cores'};
6659 $cpu->{'threads'} =
6660 (::qqx(qq{ sysctl hw.ncpu | awk '{ print \$2 }' })
6662 ::qqx(qq{ sysctl -a dev.cpu | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' }));
6663 $cpu->{'threads'} and chomp $cpu->{'threads'};
6664 $cpu->{'sockets'} ||= $cpu->{'cores'};
6666 if(grep { /\d/ } values %$cpu) {
6667 return $cpu;
6668 } else {
6669 return undef;
6673 sub sct_netbsd() {
6674 # Returns:
6675 # { 'sockets' => #sockets
6676 # 'cores' => #cores
6677 # 'threads' => #threads
6678 # 'active' => #taskset_threads }
6679 local $/ = "\n";
6680 my $cpu;
6681 $cpu->{'cores'} = ::qqx("sysctl -n hw.ncpu");
6682 $cpu->{'cores'} and chomp $cpu->{'cores'};
6683 $cpu->{'threads'} = ::qqx("sysctl -n hw.ncpu");
6684 $cpu->{'threads'} and chomp $cpu->{'threads'};
6685 $cpu->{'sockets'} ||= $cpu->{'cores'};
6687 if(grep { /\d/ } values %$cpu) {
6688 return $cpu;
6689 } else {
6690 return undef;
6694 sub sct_openbsd() {
6695 # Returns:
6696 # { 'sockets' => #sockets
6697 # 'cores' => #cores
6698 # 'threads' => #threads
6699 # 'active' => #taskset_threads }
6700 local $/ = "\n";
6701 my $cpu;
6702 $cpu->{'cores'} = ::qqx('sysctl -n hw.ncpu');
6703 $cpu->{'cores'} and chomp $cpu->{'cores'};
6704 $cpu->{'threads'} = ::qqx('sysctl -n hw.ncpu');
6705 $cpu->{'threads'} and chomp $cpu->{'threads'};
6706 $cpu->{'sockets'} ||= $cpu->{'cores'};
6708 if(grep { /\d/ } values %$cpu) {
6709 return $cpu;
6710 } else {
6711 return undef;
6715 sub sct_hurd() {
6716 # Returns:
6717 # { 'sockets' => #sockets
6718 # 'cores' => #cores
6719 # 'threads' => #threads
6720 # 'active' => #taskset_threads }
6721 local $/ = "\n";
6722 my $cpu;
6723 $cpu->{'cores'} = ::qqx("nproc");
6724 $cpu->{'cores'} and chomp $cpu->{'cores'};
6725 $cpu->{'threads'} = ::qqx("nproc");
6726 $cpu->{'threads'} and chomp $cpu->{'threads'};
6728 if(grep { /\d/ } values %$cpu) {
6729 return $cpu;
6730 } else {
6731 return undef;
6735 sub sct_darwin() {
6736 # Returns:
6737 # { 'sockets' => #sockets
6738 # 'cores' => #cores
6739 # 'threads' => #threads
6740 # 'active' => #taskset_threads }
6741 local $/ = "\n";
6742 my $cpu;
6743 $cpu->{'cores'} =
6744 (::qqx('sysctl -n hw.physicalcpu')
6746 ::qqx(qq{ sysctl -a hw | grep [^a-z]physicalcpu[^a-z] | awk '{ print \$2 }' }));
6747 $cpu->{'cores'} and chomp $cpu->{'cores'};
6748 $cpu->{'threads'} =
6749 (::qqx('sysctl -n hw.logicalcpu')
6751 ::qqx(qq{ sysctl -a hw | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }' }));
6752 $cpu->{'threads'} and chomp $cpu->{'threads'};
6753 $cpu->{'sockets'} ||= $cpu->{'cores'};
6755 if(grep { /\d/ } values %$cpu) {
6756 return $cpu;
6757 } else {
6758 return undef;
6762 sub sct_solaris() {
6763 # Returns:
6764 # { 'sockets' => #sockets
6765 # 'cores' => #cores
6766 # 'threads' => #threads
6767 # 'active' => #taskset_threads }
6768 local $/ = "\n";
6769 my $cpu;
6770 if(-x "/usr/sbin/psrinfo") {
6771 my @psrinfo = ::qqx("/usr/sbin/psrinfo");
6772 if($#psrinfo >= 0) {
6773 $cpu->{'cores'} = $#psrinfo +1;
6776 if(-x "/usr/sbin/prtconf") {
6777 my @prtconf = ::qqx("/usr/sbin/prtconf | grep cpu..instance");
6778 if($#prtconf >= 0) {
6779 $cpu->{'cores'} = $#prtconf +1;
6782 if(-x "/usr/sbin/prtconf") {
6783 my @prtconf = ::qqx("/usr/sbin/prtconf | grep cpu..instance");
6784 if($#prtconf >= 0) {
6785 $cpu->{'cores'} = $#prtconf +1;
6788 $cpu->{'cores'} and chomp $cpu->{'cores'};
6790 if(-x "/usr/sbin/psrinfo") {
6791 my @psrinfo = ::qqx("/usr/sbin/psrinfo");
6792 if($#psrinfo >= 0) {
6793 $cpu->{'threads'} = $#psrinfo +1;
6796 if(-x "/usr/sbin/prtconf") {
6797 my @prtconf = ::qqx("/usr/sbin/prtconf | grep cpu..instance");
6798 if($#prtconf >= 0) {
6799 $cpu->{'threads'} = $#prtconf +1;
6802 $cpu->{'threads'} and chomp $cpu->{'threads'};
6804 if(grep { /\d/ } values %$cpu) {
6805 return $cpu;
6806 } else {
6807 return undef;
6811 sub sct_aix() {
6812 # Returns:
6813 # { 'sockets' => #sockets
6814 # 'cores' => #cores
6815 # 'threads' => #threads
6816 # 'active' => #taskset_threads }
6817 local $/ = "\n";
6818 my $cpu;
6819 if(-x "/usr/sbin/lscfg") {
6820 if(open(my $in_fh, "-|", "/usr/sbin/lscfg -vs |grep proc | wc -l|tr -d ' '")) {
6821 $cpu->{'cores'} = <$in_fh>;
6822 chomp ($cpu->{'cores'});
6823 close $in_fh;
6826 if(-x "/usr/bin/vmstat") {
6827 if(open(my $in_fh, "-|", "/usr/bin/vmstat 1 1")) {
6828 while(<$in_fh>) {
6829 /lcpu=([0-9]*) / and $cpu->{'threads'} = $1;
6831 close $in_fh;
6835 if(grep { /\d/ } values %$cpu) {
6836 # BUG It is not not known how to calculate this
6837 $cpu->{'sockets'} = 1;
6838 return $cpu;
6839 } else {
6840 return undef;
6844 sub sct_hpux() {
6845 # Returns:
6846 # { 'sockets' => #sockets
6847 # 'cores' => #cores
6848 # 'threads' => #threads
6849 # 'active' => #taskset_threads }
6850 local $/ = "\n";
6851 my $cpu;
6852 $cpu->{'cores'} =
6853 ::qqx(qq{ /usr/bin/mpsched -s 2>&1 | grep 'Locality Domain Count' | awk '{ print \$4 }'});
6854 chomp($cpu->{'cores'});
6855 $cpu->{'threads'} =
6856 ::qqx(qq{ /usr/bin/mpsched -s 2>&1 | perl -ne '/Processor Count\\D+(\\d+)/ and print "\$1"'});
6858 if(grep { /\d/ } values %$cpu) {
6859 # BUG It is not not known how to calculate this
6860 $cpu->{'sockets'} = 1;
6861 return $cpu;
6862 } else {
6863 return undef;
6867 sub sct_qnx() {
6868 # Returns:
6869 # { 'sockets' => #sockets
6870 # 'cores' => #cores
6871 # 'threads' => #threads
6872 # 'active' => #taskset_threads }
6873 local $/ = "\n";
6874 my $cpu;
6875 # BUG: It is not known how to calculate this.
6877 if(grep { /\d/ } values %$cpu) {
6878 return $cpu;
6879 } else {
6880 return undef;
6884 sub sct_openserver() {
6885 # Returns:
6886 # { 'sockets' => #sockets
6887 # 'cores' => #cores
6888 # 'threads' => #threads
6889 # 'active' => #taskset_threads }
6890 local $/ = "\n";
6891 my $cpu;
6892 if(-x "/usr/sbin/psrinfo") {
6893 my @psrinfo = ::qqx("/usr/sbin/psrinfo");
6894 if($#psrinfo >= 0) {
6895 $cpu->{'cores'} = $#psrinfo +1;
6898 if(-x "/usr/sbin/psrinfo") {
6899 my @psrinfo = ::qqx("/usr/sbin/psrinfo");
6900 if($#psrinfo >= 0) {
6901 $cpu->{'threads'} = $#psrinfo +1;
6904 $cpu->{'sockets'} ||= $cpu->{'cores'};
6906 if(grep { /\d/ } values %$cpu) {
6907 return $cpu;
6908 } else {
6909 return undef;
6913 sub sct_irix() {
6914 # Returns:
6915 # { 'sockets' => #sockets
6916 # 'cores' => #cores
6917 # 'threads' => #threads
6918 # 'active' => #taskset_threads }
6919 local $/ = "\n";
6920 my $cpu;
6921 $cpu->{'cores'} = ::qqx(qq{ hinv | grep HZ | grep Processor | awk '{print \$1}' });
6922 $cpu->{'cores'} and chomp $cpu->{'cores'};
6924 if(grep { /\d/ } values %$cpu) {
6925 return $cpu;
6926 } else {
6927 return undef;
6931 sub sct_tru64() {
6932 # Returns:
6933 # { 'sockets' => #sockets
6934 # 'cores' => #cores
6935 # 'threads' => #threads
6936 # 'active' => #taskset_threads }
6937 local $/ = "\n";
6938 my $cpu;
6939 $cpu->{'cores'} = ::qqx("sizer -pr");
6940 $cpu->{'cores'} and chomp $cpu->{'cores'};
6941 $cpu->{'cores'} ||= 1;
6942 $cpu->{'sockets'} ||= $cpu->{'cores'};
6943 $cpu->{'threads'} ||= $cpu->{'cores'};
6945 if(grep { /\d/ } values %$cpu) {
6946 return $cpu;
6947 } else {
6948 return undef;
6952 sub sshcommand {
6953 # Returns:
6954 # $sshcommand = the command (incl options) to run when using ssh
6955 my $self = shift;
6956 if (not defined $self->{'sshcommand'}) {
6957 $self->sshcommand_of_sshlogin();
6959 return $self->{'sshcommand'};
6962 sub serverlogin {
6963 # Returns:
6964 # $sshcommand = the command (incl options) to run when using ssh
6965 my $self = shift;
6966 if (not defined $self->{'serverlogin'}) {
6967 $self->sshcommand_of_sshlogin();
6969 return $self->{'serverlogin'};
6972 sub sshcommand_of_sshlogin {
6973 # Compute ssh command and serverlogin from sshlogin
6974 # 'server' -> ('ssh -S /tmp/parallel-ssh-RANDOM/host-','server')
6975 # 'user@server' -> ('ssh','user@server')
6976 # 'myssh user@server' -> ('myssh','user@server')
6977 # 'myssh -l user server' -> ('myssh -l user','server')
6978 # '/usr/bin/myssh -l user server' -> ('/usr/bin/myssh -l user','server')
6979 # Sets:
6980 # $self->{'sshcommand'}
6981 # $self->{'serverlogin'}
6982 my $self = shift;
6983 my ($sshcmd, $serverlogin);
6984 # If $opt::ssh is unset, use $PARALLEL_SSH or 'ssh'
6985 $opt::ssh ||= $ENV{'PARALLEL_SSH'} || "ssh";
6986 if($self->{'string'} =~ /(.+) (\S+)$/) {
6987 # Own ssh command
6988 $sshcmd = $1; $serverlogin = $2;
6989 } else {
6990 # Normal ssh
6991 if($opt::controlmaster) {
6992 # Use control_path to make ssh faster
6993 my $control_path = $self->control_path_dir()."/ssh-%r@%h:%p";
6994 $sshcmd = $opt::ssh." -S ".$control_path;
6995 $serverlogin = $self->{'string'};
6996 if(not $self->{'control_path'}{$control_path}++) {
6997 # Master is not running for this control_path
6998 # Start it
6999 my $pid = fork();
7000 if($pid) {
7001 $Global::sshmaster{$pid} ||= 1;
7002 } else {
7003 $SIG{'TERM'} = undef;
7004 # Ignore the 'foo' being printed
7005 open(STDOUT,">","/dev/null");
7006 # STDERR >/dev/null to ignore
7007 open(STDERR,">","/dev/null");
7008 open(STDIN,"<","/dev/null");
7009 # Run a sleep that outputs data, so it will discover
7010 # if the ssh connection closes.
7011 my $sleep = ::Q('$|=1;while(1){sleep 1;print "foo\n"}');
7012 my @master = ($opt::ssh, "-MTS",
7013 $control_path, $serverlogin, "--", "perl", "-e",
7014 $sleep);
7015 exec(@master);
7018 } else {
7019 $sshcmd = $opt::ssh; $serverlogin = $self->{'string'};
7023 if($serverlogin =~ s/(\S+)\@(\S+)/$2/) {
7024 # convert user@server to '-l user server'
7025 # because lsh does not support user@server
7026 $sshcmd = $sshcmd." -l ".$1;
7029 $self->{'sshcommand'} = $sshcmd;
7030 $self->{'serverlogin'} = $serverlogin;
7033 sub control_path_dir {
7034 # Returns:
7035 # $control_path_dir = dir of control path (for -M)
7036 my $self = shift;
7037 if(not defined $self->{'control_path_dir'}) {
7038 $self->{'control_path_dir'} =
7039 # Use $ENV{'TMPDIR'} as that is typically not
7040 # NFS mounted
7041 File::Temp::tempdir($ENV{'TMPDIR'}
7042 . "/control_path_dir-XXXX",
7043 CLEANUP => 1);
7045 return $self->{'control_path_dir'};
7048 sub rsync_transfer_cmd {
7049 # Command to run to transfer a file
7050 # Input:
7051 # $file = filename of file to transfer
7052 # $workdir = destination dir
7053 # Returns:
7054 # $cmd = rsync command to run to transfer $file ("" if unreadable)
7055 my $self = shift;
7056 my $file = shift;
7057 my $workdir = shift;
7058 if(not -r $file) {
7059 ::warning($file. " is not readable and will not be transferred.");
7060 return "true";
7062 my $rsync_destdir;
7063 my $relpath = ($file !~ m:^/:) || ($file =~ m:/\./:); # Is the path relative or /./?
7064 if($relpath) {
7065 $rsync_destdir = ::shell_quote_file($workdir);
7066 } else {
7067 # rsync /foo/bar /
7068 $rsync_destdir = "/";
7070 $file = ::shell_quote_file($file);
7071 my $sshcmd = $self->sshcommand();
7072 my $rsync_opts = $ENV{'PARALLEL_RSYNC_OPTS'}.
7073 " -e".::Q($sshcmd);
7074 my $serverlogin = $self->serverlogin();
7075 # Make dir if it does not exist
7076 return "$sshcmd $serverlogin -- mkdir -p $rsync_destdir && " .
7077 rsync()." $rsync_opts $file $serverlogin:$rsync_destdir";
7080 sub cleanup_cmd {
7081 # Command to run to remove the remote file
7082 # Input:
7083 # $file = filename to remove
7084 # $workdir = destination dir
7085 # Returns:
7086 # $cmd = ssh command to run to remove $file and empty parent dirs
7087 my $self = shift;
7088 my $file = shift;
7089 my $workdir = shift;
7090 my $f = $file;
7091 if($f =~ m:/\./:) {
7092 # foo/bar/./baz/quux => workdir/baz/quux
7093 # /foo/bar/./baz/quux => workdir/baz/quux
7094 $f =~ s:.*/\./:$workdir/:;
7095 } elsif($f =~ m:^[^/]:) {
7096 # foo/bar => workdir/foo/bar
7097 $f = $workdir."/".$f;
7099 my @subdirs = split m:/:, ::dirname($f);
7100 my @rmdir;
7101 my $dir = "";
7102 for(@subdirs) {
7103 $dir .= $_."/";
7104 unshift @rmdir, ::shell_quote_file($dir);
7106 my $rmdir = @rmdir ? "sh -c ".::Q("rmdir @rmdir 2>/dev/null;") : "";
7107 if(defined $opt::workdir and $opt::workdir eq "...") {
7108 $rmdir .= ::Q("rm -rf " . ::shell_quote_file($workdir).';');
7111 $f = ::shell_quote_file($f);
7112 my $sshcmd = $self->sshcommand();
7113 my $serverlogin = $self->serverlogin();
7114 return "$sshcmd $serverlogin -- ".::Q("rm -f $f; $rmdir");
7118 my $rsync;
7120 sub rsync {
7121 # rsync 3.1.x uses protocol 31 which is unsupported by 2.5.7.
7122 # If the version >= 3.1.0: downgrade to protocol 30
7123 # Returns:
7124 # $rsync = "rsync" or "rsync --protocol 30"
7125 if(not $rsync) {
7126 my @out = `rsync --version`;
7127 for (@out) {
7128 if(/version (\d+.\d+)(.\d+)?/) {
7129 if($1 >= 3.1) {
7130 # Version 3.1.0 or later: Downgrade to protocol 30
7131 $rsync = "rsync --protocol 30";
7132 } else {
7133 $rsync = "rsync";
7137 $rsync or ::die_bug("Cannot figure out version of rsync: @out");
7139 return $rsync;
7144 package JobQueue;
7146 sub new {
7147 my $class = shift;
7148 my $commandref = shift;
7149 my $read_from = shift;
7150 my $context_replace = shift;
7151 my $max_number_of_args = shift;
7152 my $transfer_files = shift;
7153 my $return_files = shift;
7154 my $commandlinequeue = CommandLineQueue->new
7155 ($commandref, $read_from, $context_replace, $max_number_of_args,
7156 $transfer_files, $return_files);
7157 my @unget = ();
7158 return bless {
7159 'unget' => \@unget,
7160 'commandlinequeue' => $commandlinequeue,
7161 'this_job_no' => 0,
7162 'total_jobs' => undef,
7163 }, ref($class) || $class;
7166 sub get {
7167 my $self = shift;
7169 $self->{'this_job_no'}++;
7170 if(@{$self->{'unget'}}) {
7171 return shift @{$self->{'unget'}};
7172 } else {
7173 my $commandline = $self->{'commandlinequeue'}->get();
7174 if(defined $commandline) {
7175 return Job->new($commandline);
7176 } else {
7177 $self->{'this_job_no'}--;
7178 return undef;
7183 sub unget {
7184 my $self = shift;
7185 unshift @{$self->{'unget'}}, @_;
7186 $self->{'this_job_no'} -= @_;
7189 sub empty {
7190 my $self = shift;
7191 my $empty = (not @{$self->{'unget'}}) &&
7192 $self->{'commandlinequeue'}->empty();
7193 ::debug("run", "JobQueue->empty $empty ");
7194 return $empty;
7197 sub total_jobs {
7198 my $self = shift;
7199 if(not defined $self->{'total_jobs'}) {
7200 if($opt::pipe and not $opt::tee) {
7201 ::error("--pipe is incompatible with --eta/--bar/--shuf");
7202 ::wait_and_exit(255);
7204 if($opt::sqlworker) {
7205 $self->{'total_jobs'} = $Global::sql->total_jobs();
7206 } else {
7207 my $record;
7208 my @arg_records;
7209 my $record_queue = $self->{'commandlinequeue'}{'arg_queue'};
7210 my $start = time;
7211 while($record = $record_queue->get()) {
7212 push @arg_records, $record;
7213 if(time - $start > 10) {
7214 ::warning("Reading ".scalar(@arg_records).
7215 " arguments took longer than 10 seconds.");
7216 $opt::eta && ::warning("Consider removing --eta.");
7217 $opt::bar && ::warning("Consider removing --bar.");
7218 $opt::shuf && ::warning("Consider removing --shuf.");
7219 last;
7222 while($record = $record_queue->get()) {
7223 push @arg_records, $record;
7225 if($opt::shuf and @arg_records) {
7226 my $i = @arg_records;
7227 while (--$i) {
7228 my $j = int rand($i+1);
7229 @arg_records[$i,$j] = @arg_records[$j,$i];
7232 $record_queue->unget(@arg_records);
7233 $self->{'total_jobs'} =
7234 ::ceil((1+$#arg_records+$self->{'this_job_no'})
7235 / ::max($Global::max_number_of_args,1));
7236 ::debug("init","Total jobs: ".$self->{'total_jobs'}.
7237 " (".(1+$#arg_records)."+".$self->{'this_job_no'}.")\n");
7240 return $self->{'total_jobs'};
7243 sub flush_total_jobs {
7244 # Unset total_jobs to force recomputing
7245 my $self = shift;
7246 $self->{'total_jobs'} = undef;
7249 sub next_seq {
7250 my $self = shift;
7252 return $self->{'commandlinequeue'}->seq();
7255 sub quote_args {
7256 my $self = shift;
7257 return $self->{'commandlinequeue'}->quote_args();
7261 package Job;
7263 sub new {
7264 my $class = shift;
7265 my $commandlineref = shift;
7266 return bless {
7267 'commandline' => $commandlineref, # CommandLine object
7268 'workdir' => undef, # --workdir
7269 # filehandle for stdin (used for --pipe)
7270 # filename for writing stdout to (used for --files)
7271 # remaining data not sent to stdin (used for --pipe)
7272 # tmpfiles to cleanup when job is done
7273 'unlink' => [],
7274 # amount of data sent via stdin (used for --pipe)
7275 'transfersize' => 0, # size of files using --transfer
7276 'returnsize' => 0, # size of files using --return
7277 'pid' => undef,
7278 # hash of { SSHLogins => number of times the command failed there }
7279 'failed' => undef,
7280 'sshlogin' => undef,
7281 # The commandline wrapped with rsync and ssh
7282 'sshlogin_wrap' => undef,
7283 'exitstatus' => undef,
7284 'exitsignal' => undef,
7285 # Timestamp for timeout if any
7286 'timeout' => undef,
7287 'virgin' => 1,
7288 # Output used for SQL and CSV-output
7289 'output' => { 1 => [], 2 => [] },
7290 'halfline' => { 1 => [], 2 => [] },
7291 }, ref($class) || $class;
7294 sub replaced {
7295 my $self = shift;
7296 $self->{'commandline'} or ::die_bug("commandline empty");
7297 return $self->{'commandline'}->replaced();
7300 sub seq {
7301 my $self = shift;
7302 return $self->{'commandline'}->seq();
7305 sub set_seq {
7306 my $self = shift;
7307 return $self->{'commandline'}->set_seq(shift);
7310 sub slot {
7311 my $self = shift;
7312 return $self->{'commandline'}->slot();
7315 sub free_slot {
7316 my $self = shift;
7317 push @Global::slots, $self->slot();
7321 my($cattail);
7323 sub cattail {
7324 # Returns:
7325 # $cattail = perl program for:
7326 # cattail "decompress program" writerpid [file_to_decompress or stdin] [file_to_unlink]
7327 if(not $cattail) {
7328 $cattail = q{
7329 # cat followed by tail (possibly with rm as soon at the file is opened)
7330 # If $writerpid dead: finish after this round
7331 use Fcntl;
7332 $|=1;
7334 my ($comfile, $cmd, $writerpid, $read_file, $unlink_file) = @ARGV;
7335 if($read_file) {
7336 open(IN,"<",$read_file) || die("cattail: Cannot open $read_file");
7337 } else {
7338 *IN = *STDIN;
7340 while(! -s $comfile) {
7341 # Writer has not opened the buffer file, so we cannot remove it yet
7342 $sleep = ($sleep < 30) ? ($sleep * 1.001 + 0.01) : ($sleep);
7343 usleep($sleep);
7345 # The writer and we have both opened the file, so it is safe to unlink it
7346 unlink $unlink_file;
7347 unlink $comfile;
7349 my $first_round = 1;
7350 my $flags;
7351 fcntl(IN, F_GETFL, $flags) || die $!; # Get the current flags on the filehandle
7352 $flags |= O_NONBLOCK; # Add non-blocking to the flags
7353 fcntl(IN, F_SETFL, $flags) || die $!; # Set the flags on the filehandle
7355 while(1) {
7356 # clear EOF
7357 seek(IN,0,1);
7358 my $writer_running = kill 0, $writerpid;
7359 $read = sysread(IN,$buf,131072);
7360 if($read) {
7361 if($first_round) {
7362 # Only start the command if there any input to process
7363 $first_round = 0;
7364 open(OUT,"|-",$cmd) || die("cattail: Cannot run $cmd");
7367 # Blocking print
7368 while($buf) {
7369 my $bytes_written = syswrite(OUT,$buf);
7370 # syswrite may be interrupted by SIGHUP
7371 substr($buf,0,$bytes_written) = "";
7373 # Something printed: Wait less next time
7374 $sleep /= 2;
7375 } else {
7376 if(eof(IN) and not $writer_running) {
7377 # Writer dead: There will never be sent more to the decompressor
7378 close OUT;
7379 exit;
7381 # TODO This could probably be done more efficiently using select(2)
7382 # Nothing read: Wait longer before next read
7383 # Up to 100 milliseconds
7384 $sleep = ($sleep < 100) ? ($sleep * 1.001 + 0.01) : ($sleep);
7385 usleep($sleep);
7389 sub usleep {
7390 # Sleep this many milliseconds.
7391 my $secs = shift;
7392 select(undef, undef, undef, $secs/1000);
7395 $cattail =~ s/#.*//mg;
7396 $cattail =~ s/\s+/ /g;
7398 return $cattail;
7402 sub openoutputfiles {
7403 # Open files for STDOUT and STDERR
7404 # Set file handles in $self->fh
7405 my $self = shift;
7406 my ($outfhw, $errfhw, $outname, $errname);
7408 if($opt::linebuffer and not
7409 ($opt::keeporder or $opt::files or $opt::results or
7410 $opt::compress or $opt::compress_program or
7411 $opt::decompress_program)) {
7412 # Do not save to files: Use non-blocking pipe
7413 my ($outfhr, $errfhr);
7414 pipe($outfhr, $outfhw) || die;
7415 pipe($errfhr, $errfhw) || die;
7416 $self->set_fh(1,'w',$outfhw);
7417 $self->set_fh(2,'w',$errfhw);
7418 $self->set_fh(1,'r',$outfhr);
7419 $self->set_fh(2,'r',$errfhr);
7420 # Make it possible to read non-blocking from the pipe
7421 for my $fdno (1,2) {
7422 ::set_fh_non_blocking($self->fh($fdno,'r'));
7424 # Return immediately because we do not need setting filenames
7425 return;
7426 } elsif($opt::results and not $Global::csvsep) {
7427 my $out = $self->{'commandline'}->results_out();
7428 my $seqname;
7429 if($out eq $opt::results or $out =~ m:/$:) {
7430 # $opt::results = simple string or ending in /
7431 # => $out is a dir/
7432 # prefix/name1/val1/name2/val2/seq
7433 $seqname = $out."seq";
7434 # prefix/name1/val1/name2/val2/stdout
7435 $outname = $out."stdout";
7436 # prefix/name1/val1/name2/val2/stderr
7437 $errname = $out."stderr";
7438 } else {
7439 # $opt::results = replacement string not ending in /
7440 # => $out is a file
7441 $outname = $out;
7442 $errname = "$out.err";
7443 $seqname = "$out.seq";
7445 my $seqfhw;
7446 if(not open($seqfhw, "+>", $seqname)) {
7447 ::error("Cannot write to `$seqname'.");
7448 ::wait_and_exit(255);
7450 print $seqfhw $self->seq();
7451 close $seqfhw;
7452 if(not open($outfhw, "+>", $outname)) {
7453 ::error("Cannot write to `$outname'.");
7454 ::wait_and_exit(255);
7456 if(not open($errfhw, "+>", $errname)) {
7457 ::error("Cannot write to `$errname'.");
7458 ::wait_and_exit(255);
7460 $self->set_fh(1,"unlink","");
7461 $self->set_fh(2,"unlink","");
7462 if($opt::sqlworker) {
7463 # Save the filenames in SQL table
7464 $Global::sql->update("SET Stdout = ?, Stderr = ? ".
7465 "WHERE Seq = ". $self->seq(),
7466 $outname, $errname);
7468 } elsif(not $opt::ungroup) {
7469 # To group we create temporary files for STDOUT and STDERR
7470 # To avoid the cleanup unlink the files immediately (but keep them open)
7471 if($opt::files) {
7472 ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par");
7473 ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par");
7474 # --files => only remove stderr
7475 $self->set_fh(1,"unlink","");
7476 $self->set_fh(2,"unlink",$errname);
7477 } else {
7478 ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par");
7479 ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par");
7480 $self->set_fh(1,"unlink",$outname);
7481 $self->set_fh(2,"unlink",$errname);
7483 } else {
7484 # --ungroup
7485 open($outfhw,">&",$Global::fd{1}) || die;
7486 open($errfhw,">&",$Global::fd{2}) || die;
7487 # File name must be empty as it will otherwise be printed
7488 $outname = "";
7489 $errname = "";
7490 $self->set_fh(1,"unlink",$outname);
7491 $self->set_fh(2,"unlink",$errname);
7493 # Set writing FD
7494 $self->set_fh(1,'w',$outfhw);
7495 $self->set_fh(2,'w',$errfhw);
7496 $self->set_fh(1,'name',$outname);
7497 $self->set_fh(2,'name',$errname);
7498 if($opt::compress) {
7499 $self->filter_through_compress();
7500 } elsif(not $opt::ungroup) {
7501 $self->grouped();
7503 if($opt::linebuffer) {
7504 # Make it possible to read non-blocking from
7505 # the buffer files
7506 # Used for --linebuffer with -k, --files, --res, --compress*
7507 for my $fdno (1,2) {
7508 ::set_fh_non_blocking($self->fh($fdno,'r'));
7513 sub print_verbose_dryrun {
7514 # If -v set: print command to stdout (possibly buffered)
7515 # This must be done before starting the command
7516 my $self = shift;
7517 if($Global::verbose or $opt::dryrun) {
7518 my $fh = $self->fh(1,"w");
7519 if($Global::verbose <= 1) {
7520 print $fh $self->replaced(),"\n";
7521 } else {
7522 # Verbose level > 1: Print the rsync and stuff
7523 print $fh $self->wrapped(),"\n";
7526 if($opt::sqlworker) {
7527 $Global::sql->update("SET Command = ? WHERE Seq = ".$self->seq(),
7528 $self->replaced());
7532 sub add_rm {
7533 # Files to remove when job is done
7534 my $self = shift;
7535 push @{$self->{'unlink'}}, @_;
7538 sub get_rm {
7539 # Files to remove when job is done
7540 my $self = shift;
7541 return @{$self->{'unlink'}};
7544 sub cleanup {
7545 # Remove files when job is done
7546 my $self = shift;
7547 unlink $self->get_rm();
7548 delete @Global::unlink{$self->get_rm()};
7551 sub grouped {
7552 my $self = shift;
7553 # Set reading FD if using --group (--ungroup does not need)
7554 for my $fdno (1,2) {
7555 # Re-open the file for reading
7556 # so fdw can be closed seperately
7557 # and fdr can be seeked seperately (for --line-buffer)
7558 open(my $fdr,"<", $self->fh($fdno,'name')) ||
7559 ::die_bug("fdr: Cannot open ".$self->fh($fdno,'name'));
7560 $self->set_fh($fdno,'r',$fdr);
7561 # Unlink if not debugging
7562 $Global::debug or ::rm($self->fh($fdno,"unlink"));
7566 sub empty_input_wrapper {
7567 # If no input: exit(0)
7568 # If some input: Pass input as input to command on STDIN
7569 # This avoids starting the command if there is no input.
7570 # Input:
7571 # $command = command to pipe data to
7572 # Returns:
7573 # $wrapped_command = the wrapped command
7574 my $command = shift;
7575 my $script =
7576 ::spacefree(0,q{
7577 if(sysread(STDIN, $buf, 1)) {
7578 open($fh, "|-", @ARGV) || die;
7579 syswrite($fh, $buf);
7580 # Align up to 128k block
7581 if($read = sysread(STDIN, $buf, 131071)) {
7582 syswrite($fh, $buf);
7584 while($read = sysread(STDIN, $buf, 131072)) {
7585 syswrite($fh, $buf);
7587 close $fh;
7588 exit ($?&127 ? 128+($?&127) : 1+$?>>8)
7591 ::debug("run",'Empty wrap: perl -e '.::Q($script)."\n");
7592 if($Global::cshell
7594 length $command > 499) {
7595 # csh does not like words longer than 1000 (499 quoted)
7596 # $command = "perl -e '".base64_zip_eval()."' ".
7597 # join" ",string_zip_base64(
7598 # 'exec "'.::perl_quote_scalar($command).'"');
7599 return 'perl -e '.::Q($script)." ".
7600 base64_wrap("exec \"$Global::shell\",'-c',\"".
7601 ::perl_quote_scalar($command).'"');
7602 } else {
7603 return 'perl -e '.::Q($script)." ".
7604 $Global::shell." -c ".::Q($command);
7608 sub filter_through_compress {
7609 my $self = shift;
7610 # Send stdout to stdin for $opt::compress_program(1)
7611 # Send stderr to stdin for $opt::compress_program(2)
7612 # cattail get pid: $pid = $self->fh($fdno,'rpid');
7613 my $cattail = cattail();
7615 for my $fdno (1,2) {
7616 # Make a communication file.
7617 my ($fh, $comfile) = ::tmpfile(SUFFIX => ".pac");
7618 close $fh;
7619 # Compressor: (echo > $comfile; compress pipe) > output
7620 # When the echo is written to $comfile,
7621 # it is known that output file is opened,
7622 # thus output file can then be removed by the decompressor.
7623 my $wpid = open(my $fdw,"|-", "(echo > $comfile; ".
7624 empty_input_wrapper($opt::compress_program).") >".
7625 $self->fh($fdno,'name')) || die $?;
7626 $self->set_fh($fdno,'w',$fdw);
7627 $self->set_fh($fdno,'wpid',$wpid);
7628 # Decompressor: open output; -s $comfile > 0: rm $comfile output;
7629 # decompress output > stdout
7630 my $rpid = open(my $fdr, "-|", "perl", "-e", $cattail, $comfile,
7631 $opt::decompress_program, $wpid,
7632 $self->fh($fdno,'name'),$self->fh($fdno,'unlink'))
7633 || die $?;
7634 $self->set_fh($fdno,'r',$fdr);
7635 $self->set_fh($fdno,'rpid',$rpid);
7641 sub set_fh {
7642 # Set file handle
7643 my ($self, $fd_no, $key, $fh) = @_;
7644 $self->{'fd'}{$fd_no,$key} = $fh;
7647 sub fh {
7648 # Get file handle
7649 my ($self, $fd_no, $key) = @_;
7650 return $self->{'fd'}{$fd_no,$key};
7653 sub write {
7654 my $self = shift;
7655 my $remaining_ref = shift;
7656 my $stdin_fh = $self->fh(0,"w");
7658 my $len = length $$remaining_ref;
7659 # syswrite may not write all in one go,
7660 # so make sure everything is written.
7661 my $written;
7663 # If writing is to a closed pipe:
7664 # Do not call signal handler, but let nothing be written
7665 local $SIG{PIPE} = undef;
7666 while($written = syswrite($stdin_fh,$$remaining_ref)){
7667 substr($$remaining_ref,0,$written) = "";
7671 sub set_block {
7672 # Copy stdin buffer from $block_ref up to $endpos
7673 # Prepend with $header_ref if virgin (i.e. not --roundrobin)
7674 # Remove $recstart and $recend if needed
7675 # Input:
7676 # $header_ref = ref to $header to prepend
7677 # $buffer_ref = ref to $buffer containing the block
7678 # $endpos = length of $block to pass on
7679 # $recstart = --recstart regexp
7680 # $recend = --recend regexp
7681 # Returns:
7682 # N/A
7683 my $self = shift;
7684 my ($header_ref,$buffer_ref,$endpos,$recstart,$recend) = @_;
7685 $self->{'block'} = ($self->virgin() ? $$header_ref : "").
7686 substr($$buffer_ref,0,$endpos);
7687 if($opt::remove_rec_sep) {
7688 remove_rec_sep(\$self->{'block'},$recstart,$recend);
7690 $self->{'block_length'} = length $self->{'block'};
7691 $self->{'block_pos'} = 0;
7692 $self->add_transfersize($self->{'block_length'});
7695 sub block_ref {
7696 my $self = shift;
7697 return \$self->{'block'};
7701 sub block_length {
7702 my $self = shift;
7703 return $self->{'block_length'};
7706 sub remove_rec_sep {
7707 my ($block_ref,$recstart,$recend) = @_;
7708 # Remove record separator
7709 $$block_ref =~ s/$recend$recstart//gos;
7710 $$block_ref =~ s/^$recstart//os;
7711 $$block_ref =~ s/$recend$//os;
7714 sub non_blocking_write {
7715 my $self = shift;
7716 my $something_written = 0;
7717 use POSIX qw(:errno_h);
7719 my $in = $self->fh(0,"w");
7720 my $rv = syswrite($in,
7721 substr($self->{'block'},$self->{'block_pos'}));
7722 if (!defined($rv) && $! == EAGAIN) {
7723 # would block - but would have written
7724 $something_written = 0;
7725 # avoid triggering auto expanding block
7726 $Global::no_autoexpand_block ||= 1;
7727 } elsif ($self->{'block_pos'}+$rv != $self->{'block_length'}) {
7728 # incomplete write
7729 # Remove the written part
7730 $self->{'block_pos'} += $rv;
7731 $something_written = $rv;
7732 } else {
7733 # successfully wrote everything
7734 # Empty block to free memory
7735 my $a = "";
7736 $self->set_block(\$a,\$a,0,"","");
7737 $something_written = $rv;
7739 ::debug("pipe", "Non-block: ", $something_written);
7740 return $something_written;
7744 sub virgin {
7745 my $self = shift;
7746 return $self->{'virgin'};
7749 sub set_virgin {
7750 my $self = shift;
7751 $self->{'virgin'} = shift;
7754 sub pid {
7755 my $self = shift;
7756 return $self->{'pid'};
7759 sub set_pid {
7760 my $self = shift;
7761 $self->{'pid'} = shift;
7764 sub starttime {
7765 # Returns:
7766 # UNIX-timestamp this job started
7767 my $self = shift;
7768 return sprintf("%.3f",$self->{'starttime'});
7771 sub set_starttime {
7772 my $self = shift;
7773 my $starttime = shift || ::now();
7774 $self->{'starttime'} = $starttime;
7775 $opt::sqlworker and
7776 $Global::sql->update("SET Starttime = ? WHERE Seq = ".$self->seq(),
7777 $starttime);
7780 sub runtime {
7781 # Returns:
7782 # Run time in seconds with 3 decimals
7783 my $self = shift;
7784 return sprintf("%.3f",
7785 int(($self->endtime() - $self->starttime())*1000)/1000);
7788 sub endtime {
7789 # Returns:
7790 # UNIX-timestamp this job ended
7791 # 0 if not ended yet
7792 my $self = shift;
7793 return ($self->{'endtime'} || 0);
7796 sub set_endtime {
7797 my $self = shift;
7798 my $endtime = shift;
7799 $self->{'endtime'} = $endtime;
7800 $opt::sqlworker and
7801 $Global::sql->update("SET JobRuntime = ? WHERE Seq = ".$self->seq(),
7802 $self->runtime());
7805 sub is_timedout {
7806 # Is the job timedout?
7807 # Input:
7808 # $delta_time = time that the job may run
7809 # Returns:
7810 # True or false
7811 my $self = shift;
7812 my $delta_time = shift;
7813 return time > $self->{'starttime'} + $delta_time;
7816 sub kill {
7817 my $self = shift;
7818 $self->set_exitstatus(-1);
7819 ::kill_sleep_seq($self->pid());
7822 sub failed {
7823 # return number of times failed for this $sshlogin
7824 # Input:
7825 # $sshlogin
7826 # Returns:
7827 # Number of times failed for $sshlogin
7828 my $self = shift;
7829 my $sshlogin = shift;
7830 return $self->{'failed'}{$sshlogin};
7833 sub failed_here {
7834 # return number of times failed for the current $sshlogin
7835 # Returns:
7836 # Number of times failed for this sshlogin
7837 my $self = shift;
7838 return $self->{'failed'}{$self->sshlogin()};
7841 sub add_failed {
7842 # increase the number of times failed for this $sshlogin
7843 my $self = shift;
7844 my $sshlogin = shift;
7845 $self->{'failed'}{$sshlogin}++;
7848 sub add_failed_here {
7849 # increase the number of times failed for the current $sshlogin
7850 my $self = shift;
7851 $self->{'failed'}{$self->sshlogin()}++;
7854 sub reset_failed {
7855 # increase the number of times failed for this $sshlogin
7856 my $self = shift;
7857 my $sshlogin = shift;
7858 delete $self->{'failed'}{$sshlogin};
7861 sub reset_failed_here {
7862 # increase the number of times failed for this $sshlogin
7863 my $self = shift;
7864 delete $self->{'failed'}{$self->sshlogin()};
7867 sub min_failed {
7868 # Returns:
7869 # the number of sshlogins this command has failed on
7870 # the minimal number of times this command has failed
7871 my $self = shift;
7872 my $min_failures =
7873 ::min(map { $self->{'failed'}{$_} } keys %{$self->{'failed'}});
7874 my $number_of_sshlogins_failed_on = scalar keys %{$self->{'failed'}};
7875 return ($number_of_sshlogins_failed_on,$min_failures);
7878 sub total_failed {
7879 # Returns:
7880 # $total_failures = the number of times this command has failed
7881 my $self = shift;
7882 my $total_failures = 0;
7883 for (values %{$self->{'failed'}}) {
7884 $total_failures += $_;
7886 return $total_failures;
7890 my $script;
7892 sub postpone_exit_and_cleanup {
7893 # Command to remove files and dirs (given as args) without
7894 # affecting the exit value in $?/$status.
7895 if(not $script) {
7896 $script = "perl -e '".
7897 ::spacefree(0,q{
7898 $bash=shift;
7899 $csh=shift;
7900 for(@ARGV){
7901 unlink;
7902 rmdir;
7904 if($bash=~s/h//) {
7905 exit $bash;
7907 exit $csh;
7909 "' ".'"$?h" "$status" ';
7911 return $script
7916 my $script;
7918 sub fifo_wrap {
7919 # Script to create a fifo, run a command on the fifo
7920 # while copying STDIN to the fifo, and finally
7921 # remove the fifo and return the exit code of the command.
7922 if(not $script) {
7923 # {} == $PARALLEL_TMP for --fifo
7924 # To make it csh compatible a wrapper needs to:
7925 # * mkfifo
7926 # * spawn $command &
7927 # * cat > fifo
7928 # * waitpid to get the exit code from $command
7929 # * be less than 1000 chars long
7930 $script = "perl -e '".
7931 (::spacefree
7932 (0, q{
7933 ($s,$c,$f) = @ARGV;
7934 # mkfifo $PARALLEL_TMP
7935 system "mkfifo", $f;
7936 # spawn $shell -c $command &
7937 $pid = fork || exec $s, "-c", $c;
7938 open($o,">",$f) || die $!;
7939 # cat > $PARALLEL_TMP
7940 while(sysread(STDIN,$buf,131072)){
7941 syswrite $o, $buf;
7943 close $o;
7944 # waitpid to get the exit code from $command
7945 waitpid $pid,0;
7946 # Cleanup
7947 unlink $f;
7948 exit $?/256;
7949 }))."'";
7951 return $script;
7955 sub wrapped {
7956 # Wrap command with:
7957 # * --shellquote
7958 # * --nice
7959 # * --cat
7960 # * --fifo
7961 # * --sshlogin
7962 # * --pipepart (@Global::cat_prepends)
7963 # * --tee (@Global::cat_prepends)
7964 # * --pipe
7965 # * --tmux
7966 # The ordering of the wrapping is important:
7967 # * --nice/--cat/--fifo should be done on the remote machine
7968 # * --pipepart/--pipe should be done on the local machine inside --tmux
7969 # Uses:
7970 # $opt::shellquote
7971 # $opt::nice
7972 # $Global::shell
7973 # $opt::cat
7974 # $opt::fifo
7975 # @Global::cat_prepends
7976 # $opt::pipe
7977 # $opt::tmux
7978 # Returns:
7979 # $self->{'wrapped'} = the command wrapped with the above
7980 my $self = shift;
7981 if(not defined $self->{'wrapped'}) {
7982 my $command = $self->replaced();
7983 # Bug in Bash and Ksh when running multiline aliases
7984 # This will force them to run correctly, but will fail in
7985 # tcsh so we do not do it.
7986 # $command .= "\n\n";
7987 if($opt::shellquote) {
7988 # Prepend /bin/echo (echo no-/bin is wrong in csh)
7989 # and quote twice
7990 $command = "/bin/echo " .
7991 ::Q(::Q($command));
7993 if($Global::parallel_env) {
7994 # If $PARALLEL_ENV set, put that in front of the command
7995 # Used for env_parallel.*
7996 if($Global::shell =~ /zsh/) {
7997 # The extra 'eval' will make aliases work, too
7998 $command = $Global::parallel_env."\n".
7999 "eval ".::Q($command);
8000 } else {
8001 $command = $Global::parallel_env."\n".$command;
8004 if($opt::cat) {
8005 # In '--cat' and '--fifo' {} == $PARALLEL_TMP.
8006 # This is to make it possible to compute $PARALLEL_TMP on
8007 # the fly when running remotely.
8008 # $ENV{PARALLEL_TMP} is set in the remote wrapper before
8009 # the command is run.
8011 # Prepend 'cat > $PARALLEL_TMP;'
8012 # Append 'unlink $PARALLEL_TMP without affecting $?'
8013 $command =
8014 'cat > $PARALLEL_TMP;'.
8015 $command.";". postpone_exit_and_cleanup().
8016 '$PARALLEL_TMP';
8017 } elsif($opt::fifo) {
8018 # Prepend fifo-wrapper. In essence:
8019 # mkfifo {}
8020 # ( $command ) &
8021 # # $command must read {}, otherwise this 'cat' will block
8022 # cat > {};
8023 # wait; rm {}
8024 # without affecting $?
8025 $command = fifo_wrap(). " ".
8026 $Global::shell. " ". ::Q($command). ' $PARALLEL_TMP'. ';';
8028 # Wrap with ssh + tranferring of files
8029 $command = $self->sshlogin_wrap($command);
8030 if(@Global::cat_prepends) {
8031 # --pipepart: prepend:
8032 # < /tmp/foo perl -e 'while(@ARGV) {
8033 # sysseek(STDIN,shift,0) || die; $left = shift;
8034 # while($read = sysread(STDIN,$buf, ($left > 131072 ? 131072 : $left))){
8035 # $left -= $read; syswrite(STDOUT,$buf);
8037 # }' 0 0 0 11 |
8039 # --pipepart --tee: prepend:
8040 # < dash-a-file
8042 # --pipe --tee: wrap:
8043 # (rm fifo; ... ) < fifo
8044 $command = (shift @Global::cat_prepends). "($command)".
8045 (shift @Global::cat_appends);
8046 } elsif($opt::pipe) {
8047 # Wrap with EOF-detector to avoid starting $command if EOF.
8048 $command = empty_input_wrapper($command);
8050 if($opt::tmux) {
8051 # Wrap command with 'tmux'
8052 $command = $self->tmux_wrap($command);
8054 if($Global::cshell
8056 length $command > 499) {
8057 # csh does not like words longer than 1000 (499 quoted)
8058 # $command = "perl -e '".base64_zip_eval()."' ".
8059 # join" ",string_zip_base64(
8060 # 'exec "'.::perl_quote_scalar($command).'"');
8061 $command = base64_wrap("exec \"$Global::shell\",'-c',\"".
8062 ::perl_quote_scalar($command).'"');
8064 $self->{'wrapped'} = $command;
8066 return $self->{'wrapped'};
8069 sub set_sshlogin {
8070 my $self = shift;
8071 my $sshlogin = shift;
8072 $self->{'sshlogin'} = $sshlogin;
8073 delete $self->{'sshlogin_wrap'}; # If sshlogin is changed the wrap is wrong
8074 delete $self->{'wrapped'};
8076 if($opt::sqlworker) {
8077 # Identify worker as --sqlworker often runs on different machines
8078 my $host = $sshlogin->string();
8079 if($host eq ":") {
8080 $host = ::hostname();
8082 $Global::sql->update("SET Host = ? WHERE Seq = ".$self->seq(), $host);
8086 sub sshlogin {
8087 my $self = shift;
8088 return $self->{'sshlogin'};
8091 sub string_base64 {
8092 # Base64 encode strings into 1000 byte blocks.
8093 # 1000 bytes is the largest word size csh supports
8094 # Input:
8095 # @strings = to be encoded
8096 # Returns:
8097 # @base64 = 1000 byte block
8098 $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;";
8099 my @base64 = unpack("(A1000)*",encode_base64((join"",@_),""));
8100 return @base64;
8103 sub string_zip_base64 {
8104 # Pipe string through 'bzip2 -9' and base64 encode it into 1000
8105 # byte blocks.
8106 # 1000 bytes is the largest word size csh supports
8107 # Zipping will make exporting big environments work, too
8108 # Input:
8109 # @strings = to be encoded
8110 # Returns:
8111 # @base64 = 1000 byte block
8112 my($zipin_fh, $zipout_fh,@base64);
8113 ::open3($zipin_fh,$zipout_fh,">&STDERR","bzip2 -9");
8114 if(fork) {
8115 close $zipin_fh;
8116 $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;";
8117 # Split base64 encoded into 1000 byte blocks
8118 @base64 = unpack("(A1000)*",encode_base64((join"",<$zipout_fh>),""));
8119 close $zipout_fh;
8120 } else {
8121 close $zipout_fh;
8122 print $zipin_fh @_;
8123 close $zipin_fh;
8124 exit;
8126 ::debug("base64","Orig:@_\nAs bzip2 base64:@base64\n");
8127 return @base64;
8130 sub base64_zip_eval {
8131 # Script that:
8132 # * reads base64 strings from @ARGV
8133 # * decodes them
8134 # * pipes through 'bzip2 -dc'
8135 # * evals the result
8136 # Reverse of string_zip_base64 + eval
8137 # Will be wrapped in ' so single quote is forbidden
8138 # Returns:
8139 # $script = 1-liner for perl -e
8140 my $script = ::spacefree(0,q{
8141 @GNU_Parallel = split /_/, "use_IPC::Open3;_use_MIME::Base64";
8142 eval"@GNU_Parallel";
8143 $chld = $SIG{CHLD};
8144 $SIG{CHLD} = "IGNORE";
8145 # Search for bzip2. Not found => use default path
8146 my $zip = (grep { -x $_ } "/usr/local/bin/bzip2")[0] || "bzip2";
8147 # $in = stdin on $zip, $out = stdout from $zip
8148 # Forget my() to save chars for csh
8149 # my($in, $out,$eval);
8150 open3($in,$out,">&STDERR",$zip,"-dc");
8151 if(my $perlpid = fork) {
8152 close $in;
8153 $eval = join "", <$out>;
8154 close $out;
8155 } else {
8156 close $out;
8157 # Pipe decoded base64 into 'bzip2 -dc'
8158 print $in (decode_base64(join"",@ARGV));
8159 close $in;
8160 exit;
8162 wait;
8163 $SIG{CHLD} = $chld;
8164 eval $eval;
8166 ::debug("base64",$script,"\n");
8167 return $script;
8170 sub base64_wrap {
8171 # base64 encode Perl code
8172 # Split it into chunks of < 1000 bytes
8173 # Prepend it with a decoder that eval's it
8174 # Input:
8175 # $eval_string = Perl code to run
8176 # Returns:
8177 # $shell_command = shell command that runs $eval_string
8178 my $eval_string = shift;
8179 return
8180 "perl -e ".
8181 ::Q(base64_zip_eval())." ".
8182 join" ",::shell_quote(string_zip_base64($eval_string));
8185 sub base64_eval {
8186 # Script that:
8187 # * reads base64 strings from @ARGV
8188 # * decodes them
8189 # * evals the result
8190 # Reverse of string_base64 + eval
8191 # Will be wrapped in ' so single quote is forbidden.
8192 # Spaces are stripped so spaces cannot be significant.
8193 # The funny 'use IPC::Open3'-syntax is to avoid spaces and
8194 # to make it clear that this is a GNU Parallel command
8195 # when looking at the process table.
8196 # Returns:
8197 # $script = 1-liner for perl -e
8198 my $script = ::spacefree(0,q{
8199 @GNU_Parallel=("use","IPC::Open3;","use","MIME::Base64");
8200 eval "@GNU_Parallel";
8201 my $eval = decode_base64(join"",@ARGV);
8202 eval $eval;
8204 ::debug("base64",$script,"\n");
8205 return $script;
8208 sub sshlogin_wrap {
8209 # Wrap the command with the commands needed to run remotely
8210 # Input:
8211 # $command = command to run
8212 # Returns:
8213 # $self->{'sshlogin_wrap'} = command wrapped with ssh+transfer commands
8214 sub monitor_parent_sshd_script {
8215 # This script is to solve the problem of
8216 # * not mixing STDERR and STDOUT
8217 # * terminating with ctrl-c
8218 # If its parent is ssh: all good
8219 # If its parent is init(1): ssh died, so kill children
8220 my $monitor_parent_sshd_script;
8222 if(not $monitor_parent_sshd_script) {
8223 $monitor_parent_sshd_script =
8224 # This will be packed in ', so only use "
8225 ::spacefree(0,'$shell = "'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'.
8226 '$tmpdir = "'.::perl_quote_scalar($ENV{'TMPDIR'}).'";'.
8227 '$nice = '.$opt::nice.';'.
8229 # Set $PARALLEL_TMP to a non-existent file name in $TMPDIR
8230 do {
8231 $ENV{PARALLEL_TMP} = $tmpdir."/par".
8232 join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
8233 } while(-e $ENV{PARALLEL_TMP});
8234 $SIG{CHLD} = sub { $done = 1; };
8235 $pid = fork;
8236 unless($pid) {
8237 # Make own process group to be able to kill HUP it later
8238 eval { setpgrp };
8239 eval { setpriority(0,0,$nice) };
8240 exec $shell, "-c", ($bashfunc."@ARGV");
8241 die "exec: $!\n";
8243 do {
8244 # Parent is not init (ppid=1), so sshd is alive
8245 # Exponential sleep up to 1 sec
8246 $s = $s < 1 ? 0.001 + $s * 1.03 : $s;
8247 select(undef, undef, undef, $s);
8248 } until ($done || getppid == 1);
8249 # Kill HUP the process group if job not done
8250 kill(SIGHUP, -${pid}) unless $done;
8251 wait;
8252 exit ($?&127 ? 128+($?&127) : 1+$?>>8)
8255 return $monitor_parent_sshd_script;
8258 sub vars_to_export {
8259 # Uses:
8260 # @opt::env
8261 my @vars = ("parallel_bash_environment");
8262 for my $varstring (@opt::env) {
8263 # Split up --env VAR1,VAR2
8264 push @vars, split /,/, $varstring;
8266 for (@vars) {
8267 if(-r $_ and not -d) {
8268 # Read as environment definition bug #44041
8269 # TODO parse this
8270 my $fh = ::open_or_exit($_);
8271 $Global::envdef = join("",<$fh>);
8272 close $fh;
8275 if(grep { /^_$/ } @vars) {
8276 local $/ = "\n";
8277 # --env _
8278 # Include all vars that are not in a clean environment
8279 if(open(my $vars_fh, "<", $Global::config_dir . "/ignored_vars")) {
8280 my @ignore = <$vars_fh>;
8281 chomp @ignore;
8282 my %ignore;
8283 @ignore{@ignore} = @ignore;
8284 close $vars_fh;
8285 push @vars, grep { not defined $ignore{$_} } keys %ENV;
8286 @vars = grep { not /^_$/ } @vars;
8287 } else {
8288 ::error("Run '$Global::progname --record-env' ".
8289 "in a clean environment first.");
8290 ::wait_and_exit(255);
8293 # Duplicate vars as BASH functions to include post-shellshock functions (v1+v2)
8294 # So --env myfunc should look for BASH_FUNC_myfunc() and BASH_FUNC_myfunc%%
8295 push(@vars, "PARALLEL_PID", "PARALLEL_SEQ",
8296 map { ("BASH_FUNC_$_()", "BASH_FUNC_$_%%") } @vars);
8297 # Keep only defined variables
8298 return grep { defined($ENV{$_}) } @vars;
8301 sub env_as_eval {
8302 # Returns:
8303 # $eval = '$ENV{"..."}=...; ...'
8304 my @vars = vars_to_export();
8305 my $csh_friendly = not grep { /\n/ } @ENV{@vars};
8306 my @bash_functions = grep { substr($ENV{$_},0,4) eq "() {" } @vars;
8307 my @non_functions = (grep { !/PARALLEL_ENV/ }
8308 grep { substr($ENV{$_},0,4) ne "() {" } @vars);
8310 # eval of @envset will set %ENV
8311 my $envset = join"", map {
8312 '$ENV{"'.::perl_quote_scalar($_).'"}="'.
8313 ::perl_quote_scalar($ENV{$_}).'";'; } @non_functions;
8315 # running @bashfunc on the command line, will set the functions
8316 my @bashfunc = map {
8317 my $v=$_;
8318 s/BASH_FUNC_(.*)(\(\)|%%)/$1/;
8319 "$_$ENV{$v};export -f $_ >/dev/null;" } @bash_functions;
8320 # eval $bashfuncset will set $bashfunc
8321 my $bashfuncset;
8322 if(@bashfunc) {
8323 # Functions are not supported for all shells
8324 if($Global::shell !~ m:(bash|rbash|zsh|rzsh|dash|ksh):) {
8325 ::warning("Shell functions may not be supported in $Global::shell.");
8327 $bashfuncset =
8328 '@bash_functions=qw('."@bash_functions".");".
8329 ::spacefree(1,'$shell="'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'.q{
8330 if($shell=~/csh/) {
8331 print STDERR "CSH/TCSH DO NOT SUPPORT newlines IN VARIABLES/FUNCTIONS. Unset @bash_functions\n";
8332 exec "false";
8335 "\n".'$bashfunc = "'.::perl_quote_scalar("@bashfunc").'";';
8336 } else {
8337 $bashfuncset = '$bashfunc = "";'
8339 if($ENV{"parallel_bash_environment"}) {
8340 $bashfuncset .= '$bashfunc .= "eval\ \"\$parallel_bash_environment\"\;";';
8342 ::debug("base64",$envset,$bashfuncset,"\n");
8343 return $csh_friendly,$envset,$bashfuncset;
8346 my $self = shift;
8347 my $command = shift;
8348 # TODO test that *sh -c 'parallel --env' use *sh
8349 if(not defined $self->{'sshlogin_wrap'}{$command}) {
8350 my $sshlogin = $self->sshlogin();
8351 my $serverlogin = $sshlogin->serverlogin();
8352 my $quoted_remote_command;
8353 $ENV{'PARALLEL_SEQ'} = $self->seq();
8354 $ENV{'PARALLEL_PID'} = $$;
8355 if($serverlogin eq ":") {
8356 if($opt::workdir) {
8357 # Create workdir if needed. Then cd to it.
8358 my $wd = $self->workdir();
8359 if($opt::workdir eq "." or $opt::workdir eq "...") {
8360 # If $wd does not start with '/': Prepend $HOME
8361 $wd =~ s:^([^/]):$ENV{'HOME'}/$1:;
8363 ::mkdir_or_die($wd);
8364 $command = "cd ".::Q($wd)." || exit 255; ".$command;
8366 if(@opt::env) {
8367 # Prepend with environment setter, which sets functions in zsh
8368 my ($csh_friendly,$envset,$bashfuncset) = env_as_eval();
8369 my $perl_code = $envset.$bashfuncset.
8370 '@ARGV="'.::perl_quote_scalar($command).'";'.
8371 "exec\"$Global::shell\",\"-c\",\(\$bashfunc.\"\@ARGV\"\)\;die\"exec:\$\!\\n\"\;";
8372 if(length $perl_code > 999
8374 not $csh_friendly
8376 $command =~ /\n/) {
8377 # csh does not deal well with > 1000 chars in one word
8378 # csh does not deal well with $ENV with \n
8379 $self->{'sshlogin_wrap'}{$command} = base64_wrap($perl_code);
8380 } else {
8381 $self->{'sshlogin_wrap'}{$command} = "perl -e ".::Q($perl_code);
8383 } else {
8384 $self->{'sshlogin_wrap'}{$command} = $command;
8386 } else {
8387 my $pwd = "";
8388 if($opt::workdir) {
8389 # Create remote workdir if needed. Then cd to it.
8390 my $wd = ::pQ($self->workdir());
8391 $pwd = qq{system("mkdir","-p","--","$wd"); chdir "$wd" ||}.
8392 qq{print(STDERR "parallel: Cannot chdir to $wd\\n") && exit 255;};
8394 my ($csh_friendly,$envset,$bashfuncset) = env_as_eval();
8395 my $remote_command = $pwd.$envset.$bashfuncset.
8396 '@ARGV="'.::perl_quote_scalar($command).'";'.
8397 monitor_parent_sshd_script();
8398 $quoted_remote_command = "perl -e ". ::Q($remote_command);
8399 my $dq_remote_command = ::Q($quoted_remote_command);
8400 if(length $dq_remote_command > 999
8402 not $csh_friendly
8404 $command =~ /\n/) {
8405 # csh does not deal well with > 1000 chars in one word
8406 # csh does not deal well with $ENV with \n
8407 $quoted_remote_command =
8408 "perl -e ". ::Q(::Q(base64_zip_eval()))." ".
8409 join" ",::shell_quote(::shell_quote(string_zip_base64($remote_command)));
8410 } else {
8411 $quoted_remote_command = $dq_remote_command;
8414 my $sshcmd = $sshlogin->sshcommand();
8415 my ($pre,$post,$cleanup)=("","","");
8416 # --transfer
8417 $pre .= $self->sshtransfer();
8418 # --return
8419 $post .= $self->sshreturn();
8420 # --cleanup
8421 $post .= $self->sshcleanup();
8422 if($post) {
8423 # We need to save the exit status of the job
8424 $post = '_EXIT_status=$?; ' . $post . ' exit $_EXIT_status;';
8426 $self->{'sshlogin_wrap'}{$command} =
8427 ($pre
8428 . "$sshcmd $serverlogin -- exec "
8429 . $quoted_remote_command
8430 . ";"
8431 . $post);
8434 return $self->{'sshlogin_wrap'}{$command};
8437 sub transfer {
8438 # Files to transfer
8439 # Non-quoted and with {...} substituted
8440 # Returns:
8441 # @transfer - File names of files to transfer
8442 my $self = shift;
8444 my $transfersize = 0;
8445 my @transfer = $self->{'commandline'}->
8446 replace_placeholders($self->{'commandline'}{'transfer_files'},0,0);
8447 for(@transfer) {
8448 # filesize
8449 if(-e $_) {
8450 $transfersize += (stat($_))[7];
8453 $self->add_transfersize($transfersize);
8454 return @transfer;
8457 sub transfersize {
8458 my $self = shift;
8459 return $self->{'transfersize'};
8462 sub add_transfersize {
8463 my $self = shift;
8464 my $transfersize = shift;
8465 $self->{'transfersize'} += $transfersize;
8466 $opt::sqlworker and
8467 $Global::sql->update("SET Send = ? WHERE Seq = ".$self->seq(),
8468 $self->{'transfersize'});
8471 sub sshtransfer {
8472 # Returns for each transfer file:
8473 # rsync $file remote:$workdir
8474 my $self = shift;
8475 my @pre;
8476 my $sshlogin = $self->sshlogin();
8477 my $workdir = $self->workdir();
8478 for my $file ($self->transfer()) {
8479 push @pre, $sshlogin->rsync_transfer_cmd($file,$workdir).";";
8481 return join("",@pre);
8484 sub return {
8485 # Files to return
8486 # Non-quoted and with {...} substituted
8487 # Returns:
8488 # @non_quoted_filenames
8489 my $self = shift;
8490 return $self->{'commandline'}->
8491 replace_placeholders($self->{'commandline'}{'return_files'},0,0);
8494 sub returnsize {
8495 # This is called after the job has finished
8496 # Returns:
8497 # $number_of_bytes transferred in return
8498 my $self = shift;
8499 for my $file ($self->return()) {
8500 if(-e $file) {
8501 $self->{'returnsize'} += (stat($file))[7];
8504 return $self->{'returnsize'};
8507 sub add_returnsize {
8508 my $self = shift;
8509 my $returnsize = shift;
8510 $self->{'returnsize'} += $returnsize;
8511 $opt::sqlworker and
8512 $Global::sql->update("SET Receive = ? WHERE Seq = ".$self->seq(),
8513 $self->{'returnsize'});
8516 sub sshreturn {
8517 # Returns for each return-file:
8518 # rsync remote:$workdir/$file .
8519 my $self = shift;
8520 my $sshlogin = $self->sshlogin();
8521 my $sshcmd = $sshlogin->sshcommand();
8522 my $serverlogin = $sshlogin->serverlogin();
8523 my $rsync_opts = $ENV{'PARALLEL_RSYNC_OPTS'}. " -e". ::Q($sshcmd);
8524 my $pre = "";
8525 for my $file ($self->return()) {
8526 $file =~ s:^\./::g; # Remove ./ if any
8527 my $relpath = ($file !~ m:^/:) || ($file =~ m:/\./:); # Is the path relative or /./?
8528 my $cd = "";
8529 my $wd = "";
8530 if($relpath) {
8531 # rsync -avR /foo/./bar/baz.c remote:/tmp/
8532 # == (on old systems)
8533 # rsync -avR --rsync-path="cd /foo; rsync" remote:bar/baz.c /tmp/
8534 $wd = ::shell_quote_file($self->workdir()."/");
8536 # Only load File::Basename if actually needed
8537 $Global::use{"File::Basename"} ||= eval "use File::Basename; 1;";
8538 # dir/./file means relative to dir, so remove dir on remote
8539 $file =~ m:(.*)/\./:;
8540 my $basedir = $1 ? ::shell_quote_file($1."/") : "";
8541 my $nobasedir = $file;
8542 $nobasedir =~ s:.*/\./::;
8543 $cd = ::shell_quote_file(::dirname($nobasedir));
8544 my $rsync_cd = '--rsync-path='.::Q("cd $wd$cd; rsync");
8545 my $basename = ::Q(::shell_quote_file(::basename($file)));
8546 # --return
8547 # mkdir -p /home/tange/dir/subdir/;
8548 # rsync (--protocol 30) -rlDzR
8549 # --rsync-path="cd /home/tange/dir/subdir/; rsync"
8550 # server:file.gz /home/tange/dir/subdir/
8551 $pre .= "mkdir -p $basedir$cd && ". $sshlogin->rsync().
8552 " $rsync_cd $rsync_opts $serverlogin:".
8553 $basename . " ".$basedir.$cd.";";
8555 return $pre;
8558 sub sshcleanup {
8559 # Return the sshcommand needed to remove the file
8560 # Returns:
8561 # ssh command needed to remove files from sshlogin
8562 my $self = shift;
8563 my $sshlogin = $self->sshlogin();
8564 my $sshcmd = $sshlogin->sshcommand();
8565 my $serverlogin = $sshlogin->serverlogin();
8566 my $workdir = $self->workdir();
8567 my $cleancmd = "";
8569 for my $file ($self->remote_cleanup()) {
8570 my @subworkdirs = parentdirs_of($file);
8571 $cleancmd .= $sshlogin->cleanup_cmd($file,$workdir).";";
8573 if(defined $opt::workdir and $opt::workdir eq "...") {
8574 $cleancmd .= "$sshcmd $serverlogin -- rm -rf " . ::Q($workdir).';';
8576 return $cleancmd;
8579 sub remote_cleanup {
8580 # Returns:
8581 # Files to remove at cleanup
8582 my $self = shift;
8583 if($opt::cleanup) {
8584 my @transfer = $self->transfer();
8585 my @return = $self->return();
8586 return (@transfer,@return);
8587 } else {
8588 return ();
8592 sub workdir {
8593 # Returns:
8594 # the workdir on a remote machine
8595 my $self = shift;
8596 if(not defined $self->{'workdir'}) {
8597 my $workdir;
8598 if(defined $opt::workdir) {
8599 if($opt::workdir eq ".") {
8600 # . means current dir
8601 my $home = $ENV{'HOME'};
8602 eval 'use Cwd';
8603 my $cwd = cwd();
8604 $workdir = $cwd;
8605 if($home) {
8606 # If homedir exists: remove the homedir from
8607 # workdir if cwd starts with homedir
8608 # E.g. /home/foo/my/dir => my/dir
8609 # E.g. /tmp/my/dir => /tmp/my/dir
8610 my ($home_dev, $home_ino) = (stat($home))[0,1];
8611 my $parent = "";
8612 my @dir_parts = split(m:/:,$cwd);
8613 my $part;
8614 while(defined ($part = shift @dir_parts)) {
8615 $part eq "" and next;
8616 $parent .= "/".$part;
8617 my ($parent_dev, $parent_ino) = (stat($parent))[0,1];
8618 if($parent_dev == $home_dev and $parent_ino == $home_ino) {
8619 # dev and ino is the same: We found the homedir.
8620 $workdir = join("/",@dir_parts);
8621 last;
8625 if($workdir eq "") {
8626 $workdir = ".";
8628 } elsif($opt::workdir eq "...") {
8629 $workdir = ".parallel/tmp/" . ::hostname() . "-" . $$
8630 . "-" . $self->seq();
8631 } else {
8632 $workdir = $self->{'commandline'}->
8633 replace_placeholders([$opt::workdir],0,0);
8634 #$workdir = $opt::workdir;
8635 # Rsync treats /./ special. We dont want that
8636 $workdir =~ s:/\./:/:g; # Remove /./
8637 $workdir =~ s:(.)/+$:$1:; # Remove ending / if any
8638 $workdir =~ s:^\./::g; # Remove starting ./ if any
8640 } else {
8641 $workdir = ".";
8643 $self->{'workdir'} = $workdir;
8645 return $self->{'workdir'};
8648 sub parentdirs_of {
8649 # Return:
8650 # all parentdirs except . of this dir or file - sorted desc by length
8651 my $d = shift;
8652 my @parents = ();
8653 while($d =~ s:/[^/]+$::) {
8654 if($d ne ".") {
8655 push @parents, $d;
8658 return @parents;
8661 sub start {
8662 # Setup STDOUT and STDERR for a job and start it.
8663 # Returns:
8664 # job-object or undef if job not to run
8666 sub open3_setpgrp_internal {
8667 # Run open3+setpgrp followed by the command
8668 # Input:
8669 # $stdin_fh = Filehandle to use as STDIN
8670 # $stdout_fh = Filehandle to use as STDOUT
8671 # $stderr_fh = Filehandle to use as STDERR
8672 # $command = Command to run
8673 # Returns:
8674 # $pid = Process group of job started
8675 my ($stdin_fh,$stdout_fh,$stderr_fh,$command) = @_;
8676 my $pid;
8677 local (*OUT,*ERR);
8678 open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!");
8679 open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!");
8680 # The eval is needed to catch exception from open3
8681 eval {
8682 if(not $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", "-")) {
8683 # Each child gets its own process group to make it safe to killall
8684 eval{ setpgrp(0,0) };
8685 eval{ setpriority(0,0,$opt::nice) };
8686 exec($Global::shell,"-c",$command)
8687 || ::die_bug("open3-$stdin_fh $command");
8690 return $pid;
8693 sub open3_setpgrp_external {
8694 # Run open3 on $command wrapped with a perl script doing setpgrp
8695 # Works on systems that do not support open3(,,,"-")
8696 # Input:
8697 # $stdin_fh = Filehandle to use as STDIN
8698 # $stdout_fh = Filehandle to use as STDOUT
8699 # $stderr_fh = Filehandle to use as STDERR
8700 # $command = Command to run
8701 # Returns:
8702 # $pid = Process group of job started
8703 my ($stdin_fh,$stdout_fh,$stderr_fh,$command) = @_;
8704 local (*OUT,*ERR);
8705 open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!");
8706 open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!");
8708 my $pid;
8709 my @setpgrp_wrap =
8710 ('perl','-e',
8711 "eval\{setpgrp\}\;eval\{setpriority\(0,0,$opt::nice\)\}\;".
8712 "exec '$Global::shell', '-c', \@ARGV");
8713 # The eval is needed to catch exception from open3
8714 eval {
8715 $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", @setpgrp_wrap, $command)
8716 || ::die_bug("open3-$stdin_fh");
8719 return $pid;
8722 sub open3_setpgrp {
8723 # Select and run open3_setpgrp_internal/open3_setpgrp_external
8724 no warnings 'redefine';
8725 my ($outfh,$name) = ::tmpfile(SUFFIX => ".tst");
8726 # Test to see if open3(x,x,x,"-") is fully supported
8727 # Can an exported bash function be called via open3?
8728 my $script = 'if($pid=::open3($i,$o,$e,"-")) { wait; } '.
8729 'else { exec("bash","-c","testfun && true"); }';
8730 my $bash =
8731 ::shell_quote_scalar_default(
8732 "testfun() { rm $name; }; export -f testfun; ".
8733 "perl -MIPC::Open3 -e ".
8734 ::shell_quote_scalar_default($script)
8736 # Redirect STDERR temporarily,
8737 # so errors on MacOS X are ignored.
8738 open my $saveerr, ">&STDERR";
8739 open STDERR, '>', "/dev/null";
8740 # Run the test
8741 ::debug("init",qq{bash -c $bash 2>/dev/null});
8742 qx{ bash -c $bash 2>/dev/null };
8743 open STDERR, ">&", $saveerr;
8745 if(-e $name) {
8746 # Does not support open3(x,x,x,"-")
8747 # or does not have bash:
8748 # Use (slow) external version
8749 unlink($name);
8750 *open3_setpgrp = \&open3_setpgrp_external;
8751 ::debug("init","open3_setpgrp_external chosen\n");
8752 } else {
8753 # Supports open3(x,x,x,"-")
8754 # This is 0.5 ms faster to run
8755 *open3_setpgrp = \&open3_setpgrp_internal;
8756 ::debug("init","open3_setpgrp_internal chosen\n");
8758 # The sub is now redefined. Call it
8759 return open3_setpgrp(@_);
8762 my $job = shift;
8763 # Get the shell command to be executed (possibly with ssh infront).
8764 my $command = $job->wrapped();
8765 my $pid;
8767 if($Global::interactive or $Global::stderr_verbose) {
8768 $job->interactive_start();
8770 # Must be run after $job->interactive_start():
8771 # $job->interactive_start() may call $job->skip()
8772 if($job->{'commandline'}{'skip'}) {
8773 # $job->skip() was called
8774 $command = "true";
8776 $job->openoutputfiles();
8777 $job->print_verbose_dryrun();
8778 # Call slot to store the slot value
8779 $job->slot();
8780 my($stdout_fh,$stderr_fh) = ($job->fh(1,"w"),$job->fh(2,"w"));
8781 if($opt::dryrun or $opt::sqlmaster) { $command = "true"; }
8782 $ENV{'PARALLEL_SEQ'} = $job->seq();
8783 $ENV{'PARALLEL_PID'} = $$;
8784 $ENV{'PARALLEL_TMP'} = ::tmpname("par");
8785 $job->add_rm($ENV{'PARALLEL_TMP'});
8786 ::debug("run", $Global::total_running, " processes . Starting (",
8787 $job->seq(), "): $command\n");
8788 if($opt::pipe) {
8789 my ($stdin_fh) = ::gensym();
8790 $pid = open3_setpgrp($stdin_fh,$stdout_fh,$stderr_fh,$command);
8791 if($opt::roundrobin and not $opt::keeporder) {
8792 # --keep-order will make sure the order will be reproducible
8793 ::set_fh_non_blocking($stdin_fh);
8795 $job->set_fh(0,"w",$stdin_fh);
8796 if($opt::tee) { $job->set_virgin(0); }
8797 } elsif ($opt::tty and -c "/dev/tty" and
8798 open(my $devtty_fh, "<", "/dev/tty")) {
8799 # Give /dev/tty to the command if no one else is using it
8800 # The eval is needed to catch exception from open3
8801 local (*IN,*OUT,*ERR);
8802 open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!");
8803 open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!");
8804 *IN = $devtty_fh;
8805 # The eval is needed to catch exception from open3
8806 my @wrap = ('perl','-e',
8807 "eval\{setpriority\(0,0,$opt::nice\)\}\;".
8808 "exec '$Global::shell', '-c', \@ARGV");
8809 eval {
8810 $pid = ::open3("<&IN", ">&OUT", ">&ERR", @wrap, $command)
8811 || ::die_bug("open3-/dev/tty");
8814 close $devtty_fh;
8815 $job->set_virgin(0);
8816 } else {
8817 $pid = open3_setpgrp(::gensym(),$stdout_fh,$stderr_fh,$command);
8818 $job->set_virgin(0);
8820 if($pid) {
8821 # A job was started
8822 $Global::total_running++;
8823 $Global::total_started++;
8824 $job->set_pid($pid);
8825 $job->set_starttime();
8826 $Global::running{$job->pid()} = $job;
8827 if($opt::timeout) {
8828 $Global::timeoutq->insert($job);
8830 $Global::newest_job = $job;
8831 $Global::newest_starttime = ::now();
8832 return $job;
8833 } else {
8834 # No more processes
8835 ::debug("run", "Cannot spawn more jobs.\n");
8836 return undef;
8840 sub interactive_start {
8841 my $self = shift;
8842 my $command = $self->wrapped();
8843 if($Global::interactive) {
8844 ::status_no_nl("$command ?...");
8845 open(my $tty_fh, "<", "/dev/tty") || ::die_bug("interactive-tty");
8846 my $answer = <$tty_fh>;
8847 close $tty_fh;
8848 my $run_yes = ($answer =~ /^\s*y/i);
8849 if (not $run_yes) {
8850 $self->{'commandline'}->skip();
8852 } else {
8853 print $Global::original_stderr "$command\n";
8858 my $tmuxsocket;
8860 sub tmux_wrap {
8861 # Wrap command with tmux for session pPID
8862 # Input:
8863 # $actual_command = the actual command being run (incl ssh wrap)
8864 my $self = shift;
8865 my $actual_command = shift;
8866 # Temporary file name. Used for fifo to communicate exit val
8867 my $tmpfifo = ::tmpname("tmx");
8868 $self->add_rm($tmpfifo);
8870 if(length($tmpfifo) >=100) {
8871 ::error("tmux does not support sockets with path > 100.");
8872 ::wait_and_exit(255);
8874 if($opt::tmuxpane) {
8875 # Move the command into a pane in window 0
8876 $actual_command = $ENV{'PARALLEL_TMUX'}.' joinp -t :0 ; '.
8877 $ENV{'PARALLEL_TMUX'}.' select-layout -t :0 tiled ; '.
8878 $actual_command;
8880 my $visual_command = $self->replaced();
8881 my $title = $visual_command;
8882 if($visual_command =~ /\0/) {
8883 ::error("Command line contains NUL. tmux is confused by NUL.");
8884 ::wait_and_exit(255);
8886 # ; causes problems
8887 # ascii 194-245 annoys tmux
8888 $title =~ tr/[\011-\016;\302-\365]/ /s;
8889 $title = ::Q($title);
8891 my $l_act = length($actual_command);
8892 my $l_tit = length($title);
8893 my $l_fifo = length($tmpfifo);
8894 # The line to run contains a 118 chars extra code + the title 2x
8895 my $l_tot = 2 * $l_tit + $l_act + $l_fifo;
8897 my $quoted_space75 = ::Q(" ")x75;
8898 while($l_tit < 1000 and
8900 (890 < $l_tot and $l_tot < 1350)
8902 (9250 < $l_tot and $l_tot < 9800)
8903 )) {
8904 # tmux blocks for certain lengths:
8905 # 900 < title + command < 1200
8906 # 9250 < title + command < 9800
8907 # but only if title < 1000, so expand the title with 75 spaces
8908 # The measured lengths are:
8909 # 996 < (title + whole command) < 1127
8910 # 9331 < (title + whole command) < 9636
8911 $title .= $quoted_space75;
8912 $l_tit = length($title);
8913 $l_tot = 2 * $l_tit + $l_act + $l_fifo;
8916 my $tmux;
8917 $ENV{'PARALLEL_TMUX'} ||= "tmux";
8918 if(not $tmuxsocket) {
8919 $tmuxsocket = ::tmpname("tms");
8920 if($opt::fg) {
8921 if(not fork) {
8922 # Run tmux in the foreground
8923 # Wait for the socket to appear
8924 while (not -e $tmuxsocket) { }
8925 `$ENV{'PARALLEL_TMUX'} -S $tmuxsocket attach`;
8926 exit;
8929 ::status("See output with: $ENV{'PARALLEL_TMUX'} -S $tmuxsocket attach");
8931 $tmux = "sh -c '".
8932 $ENV{'PARALLEL_TMUX'}." -S $tmuxsocket new-session -s p$$ -d \"sleep .2\" >/dev/null 2>&1';" .
8933 $ENV{'PARALLEL_TMUX'}." -S $tmuxsocket new-window -t p$$ -n $title";
8935 ::debug("tmux", "title len:", $l_tit, " act ", $l_act, " max ",
8936 $Limits::Command::line_max_len, " tot ",
8937 $l_tot, "\n");
8939 return "mkfifo $tmpfifo && $tmux ".
8940 # Run in tmux
8943 "(".$actual_command.');'.
8944 # The triple print is needed - otherwise the testsuite fails
8945 q[ perl -e 'while($t++<3){ print $ARGV[0],"\n" }' $?h/$status >> ].$tmpfifo."&".
8946 "echo $title; echo \007Job finished at: `date`;sleep 10"
8948 # Run outside tmux
8949 # Read a / separated line: 0h/2 for csh, 2/0 for bash.
8950 # If csh the first will be 0h, so use the second as exit value.
8951 # Otherwise just use the first value as exit value.
8952 q{; exec perl -e '$/="/";$_=<>;$c=<>;unlink $ARGV; /(\d+)h/ and exit($1);exit$c' }.$tmpfifo;
8956 sub is_already_in_results {
8957 # Do we already have results for this job?
8958 # Returns:
8959 # $job_already_run = bool whether there is output for this or not
8960 my $job = $_[0];
8961 my $out = $job->{'commandline'}->results_out();
8962 ::debug("run", "Test ${out}stdout", -e "${out}stdout", "\n");
8963 return(-e $out."stdout" or -f $out);
8966 sub is_already_in_joblog {
8967 my $job = shift;
8968 return vec($Global::job_already_run,$job->seq(),1);
8971 sub set_job_in_joblog {
8972 my $job = shift;
8973 vec($Global::job_already_run,$job->seq(),1) = 1;
8976 sub should_be_retried {
8977 # Should this job be retried?
8978 # Returns
8979 # 0 - do not retry
8980 # 1 - job queued for retry
8981 my $self = shift;
8982 if (not $opt::retries) {
8983 return 0;
8985 if(not $self->exitstatus() and not $self->exitsignal()) {
8986 # Completed with success. If there is a recorded failure: forget it
8987 $self->reset_failed_here();
8988 return 0;
8989 } else {
8990 # The job failed. Should it be retried?
8991 $self->add_failed_here();
8992 my $retries = $self->{'commandline'}->
8993 replace_placeholders([$opt::retries],0,0);
8994 if($self->total_failed() == $retries) {
8995 # This has been retried enough
8996 return 0;
8997 } else {
8998 # This command should be retried
8999 $self->set_endtime(undef);
9000 $self->reset_exitstatus();
9001 $Global::JobQueue->unget($self);
9002 ::debug("run", "Retry ", $self->seq(), "\n");
9003 return 1;
9009 my (%print_later,$job_seq_to_print);
9011 sub print_earlier_jobs {
9012 # Print jobs whose output is postponed due to --keep-order
9013 # Returns: N/A
9014 my $job = shift;
9015 $print_later{$job->seq()} = $job;
9016 $job_seq_to_print ||= 1;
9017 my $returnsize = 0;
9018 ::debug("run", "Looking for: $job_seq_to_print ",
9019 "This: ", $job->seq(), "\n");
9020 for(;vec($Global::job_already_run,$job_seq_to_print,1);
9021 $job_seq_to_print++) {}
9022 while(my $j = $print_later{$job_seq_to_print}) {
9023 $returnsize += $j->print();
9024 if($j->endtime()) {
9025 # Job finished - look at the next
9026 delete $print_later{$job_seq_to_print};
9027 $job_seq_to_print++;
9028 next;
9029 } else {
9030 # Job not finished yet - look at it again next round
9031 last;
9034 return $returnsize;
9038 sub print {
9039 # Print the output of the jobs
9040 # Returns: N/A
9042 my $self = shift;
9043 ::debug("print", ">>joboutput ", $self->replaced(), "\n");
9044 if($opt::dryrun) {
9045 # Nothing was printed to this job:
9046 # cleanup tmp files if --files was set
9047 ::rm($self->fh(1,"name"));
9049 if($opt::pipe and $self->virgin() and not $opt::tee) {
9050 # Skip --joblog, --dryrun, --verbose
9051 } else {
9052 if($opt::ungroup) {
9053 # NULL returnsize = 0 returnsize
9054 $self->returnsize() or $self->add_returnsize(0);
9055 if($Global::joblog and defined $self->{'exitstatus'}) {
9056 # Add to joblog when finished
9057 $self->print_joblog();
9058 # Printing is only relevant for grouped/--line-buffer output.
9059 $opt::ungroup and return;
9063 # Check for disk full
9064 ::exit_if_disk_full();
9067 my $returnsize = $self->returnsize();
9068 for my $fdno (sort { $a <=> $b } keys %Global::fd) {
9069 # Sort by file descriptor numerically: 1,2,3,..,9,10,11
9070 $fdno == 0 and next;
9071 my $out_fd = $Global::fd{$fdno};
9072 my $in_fh = $self->fh($fdno,"r");
9073 if(not $in_fh) {
9074 if(not $Job::file_descriptor_warning_printed{$fdno}++) {
9075 # ::warning("File descriptor $fdno not defined\n");
9077 next;
9079 ::debug("print", "File descriptor $fdno (", $self->fh($fdno,"name"), "):\n");
9080 if($opt::linebuffer) {
9081 # Line buffered print out
9082 $self->print_linebuffer($fdno,$in_fh,$out_fd);
9083 } elsif($opt::files) {
9084 $self->print_files($fdno,$in_fh,$out_fd);
9085 } elsif($opt::tag or defined $opt::tagstring) {
9086 $self->print_tag($fdno,$in_fh,$out_fd);
9087 } else {
9088 $self->print_normal($fdno,$in_fh,$out_fd);
9090 flush $out_fd;
9092 ::debug("print", "<<joboutput @command\n");
9093 if(defined $self->{'exitstatus'}
9094 and not ($self->virgin() and $opt::pipe)) {
9095 if($Global::joblog and not $opt::sqlworker) {
9096 # Add to joblog when finished
9097 $self->print_joblog();
9099 if($opt::sqlworker and not $opt::results) {
9100 $Global::sql->output($self);
9102 if($Global::csvsep) {
9103 # Add output to CSV when finished
9104 $self->print_csv();
9107 return $returnsize - $self->returnsize();
9111 my $header_printed;
9113 sub print_csv {
9114 my $self = shift;
9115 my $cmd;
9116 if($Global::verbose <= 1) {
9117 $cmd = $self->replaced();
9118 } else {
9119 # Verbose level > 1: Print the rsync and stuff
9120 $cmd = "@command";
9122 my $record_ref = $self->{'commandline'}{'arg_list_flat_orig'};
9124 if(not $header_printed) {
9125 # Variable headers
9126 # Normal => V1..Vn
9127 # --header : => first value from column
9128 my @V;
9129 if($opt::header) {
9130 my $i = 1;
9131 @V = (map { $Global::input_source_header{$i++} }
9132 @$record_ref[1..$#$record_ref]);
9133 } else {
9134 my $V = "V1";
9135 @V = (map { $V++ } @$record_ref[1..$#$record_ref]);
9137 print $Global::csv_fh
9138 (map { $$_ }
9139 combine_ref("Seq", "Host", "Starttime", "JobRuntime",
9140 "Send", "Receive", "Exitval", "Signal", "Command",
9142 "Stdout","Stderr"
9143 )),"\n";
9144 $header_printed++;
9146 # Memory optimization: Overwrite with the joined output
9147 $self->{'output'}{1} = join("", @{$self->{'output'}{1}});
9148 $self->{'output'}{2} = join("", @{$self->{'output'}{2}});
9149 print $Global::csv_fh
9150 (map { $$_ }
9151 combine_ref
9152 ($self->seq(),
9153 $self->sshlogin()->string(),
9154 $self->starttime(), sprintf("%0.3f",$self->runtime()),
9155 $self->transfersize(), $self->returnsize(),
9156 $self->exitstatus(), $self->exitsignal(), \$cmd,
9157 \@$record_ref[1..$#$record_ref],
9158 \$self->{'output'}{1},
9159 \$self->{'output'}{2})),"\n";
9163 sub combine_ref {
9164 # Inspired by Text::CSV_PP::_combine (by Makamaka Hannyaharamitu)
9165 my @part = @_;
9166 my $sep = $Global::csvsep;
9167 my $quot = '"';
9168 my @out = ();
9170 my $must_be_quoted;
9171 for my $column (@part) {
9172 # Memory optimization: Content transferred as reference
9173 if(ref $column ne "SCALAR") {
9174 # Convert all columns to scalar references
9175 my $v = $column;
9176 $column = \$v;
9178 if(not defined $$column) {
9179 $$column = '';
9180 next;
9183 $must_be_quoted = 0;
9185 if($$column =~ s/$quot/$quot$quot/go){
9186 # Replace " => ""
9187 $must_be_quoted ||=1;
9189 if($$column =~ /[\s\Q$sep\E]/o){
9190 # Put quotes around if the column contains ,
9191 $must_be_quoted ||=1;
9194 $Global::use{"bytes"} ||= eval "use bytes; 1;";
9195 if ($$column =~ /\0/) {
9196 # Contains \0 => put quotes around
9197 $must_be_quoted ||=1;
9199 if($must_be_quoted){
9200 push @out, \$sep, \$quot, $column, \$quot;
9201 } else {
9202 push @out, \$sep, $column;
9205 # Pop off a $sep
9206 shift @out;
9207 return @out;
9210 sub print_files {
9211 # Print the name of the file containing stdout on stdout
9212 # Uses:
9213 # $opt::pipe
9214 # $opt::group = Print when job is done
9215 # $opt::linebuffer = Print ASAP
9216 # Returns: N/A
9217 my $self = shift;
9218 my ($fdno,$in_fh,$out_fd) = @_;
9220 # If the job is dead: close printing fh. Needed for --compress
9221 close $self->fh($fdno,"w");
9222 if($? and $opt::compress) {
9223 ::error($opt::compress_program." failed.");
9224 $self->set_exitstatus(255);
9226 if($opt::compress) {
9227 # Kill the decompressor which will not be needed
9228 CORE::kill "TERM", $self->fh($fdno,"rpid");
9230 close $in_fh;
9232 if($opt::pipe and $self->virgin()) {
9233 # Nothing was printed to this job:
9234 # cleanup unused tmp files because --files was set
9235 for my $fdno (1,2) {
9236 ::rm($self->fh($fdno,"name"));
9237 ::rm($self->fh($fdno,"unlink"));
9239 } elsif($fdno == 1 and $self->fh($fdno,"name")) {
9240 print $out_fd $self->tag(),$self->fh($fdno,"name"),"\n";
9241 if($Global::membuffer) {
9242 push @{$self->{'output'}{$fdno}},
9243 $self->tag(), $self->fh($fdno,"name");
9245 $self->add_returnsize(-s $self->fh($fdno,"name"));
9246 # Mark as printed - do not print again
9247 $self->set_fh($fdno,"name",undef);
9251 sub print_linebuffer {
9252 my $self = shift;
9253 my ($fdno,$in_fh,$out_fd) = @_;
9254 if(defined $self->{'exitstatus'}) {
9255 # If the job is dead: close printing fh. Needed for --compress
9256 close $self->fh($fdno,"w");
9257 if($? and $opt::compress) {
9258 ::error($opt::compress_program." failed.");
9259 $self->set_exitstatus(255);
9261 if($opt::compress) {
9262 # Blocked reading in final round
9263 for my $fdno (1,2) {
9264 ::set_fh_blocking($self->fh($fdno,'r'));
9268 if(not $self->virgin()) {
9269 if($opt::files or ($opt::results and not $Global::csvsep)) {
9270 # Print filename
9271 if($fdno == 1 and not $self->fh($fdno,"printed")) {
9272 print $out_fd $self->tag(),$self->fh($fdno,"name"),"\n";
9273 if($Global::membuffer) {
9274 push(@{$self->{'output'}{$fdno}}, $self->tag(),
9275 $self->fh($fdno,"name"));
9277 $self->set_fh($fdno,"printed",1);
9279 # No need for reading $in_fh, as it is from "cat >/dev/null"
9280 } else {
9281 # Read halflines and print full lines
9282 my $outputlength = 0;
9283 my $halfline_ref = $self->{'halfline'}{$fdno};
9284 my ($buf,$i,$rv);
9285 while($rv = sysread($in_fh, $buf, 131072)) {
9286 $outputlength += $rv;
9287 # TODO --recend
9288 # Treat both \n and \r as line end
9289 $i = (rindex($buf,"\n")+1) || (rindex($buf,"\r")+1);
9290 if($i) {
9291 # One or more complete lines were found
9292 if($opt::tag or defined $opt::tagstring) {
9293 # Replace ^ with $tag within the full line
9294 my $tag = $self->tag();
9295 # TODO --recend
9296 substr($buf,0,$i-1) =~ s/(?<=[\n\r])/$tag/gm;
9297 # The length changed, so find the new ending pos
9298 $i = (rindex($buf,"\n")+1) || (rindex($buf,"\r")+1);
9299 unshift @$halfline_ref, $tag;
9301 # Print the partial line (halfline) and the last half
9302 print $out_fd @$halfline_ref, substr($buf,0,$i);
9303 # Buffer in memory for SQL and CSV-output
9304 if($Global::membuffer) {
9305 push(@{$self->{'output'}{$fdno}},
9306 @$halfline_ref, substr($buf,0,$i));
9308 # Remove the printed part by keeping the unprinted part
9309 @$halfline_ref = (substr($buf,$i));
9310 } else {
9311 # No newline, so append to the halfline
9312 push @$halfline_ref, $buf;
9315 $self->add_returnsize($outputlength);
9317 if(defined $self->{'exitstatus'}) {
9318 if($opt::files or ($opt::results and not $Global::csvsep)) {
9319 $self->add_returnsize(-s $self->fh($fdno,"name"));
9320 } else {
9321 # If the job is dead: print the remaining partial line
9322 # read remaining
9323 my $halfline_ref = $self->{'halfline'}{$fdno};
9324 if(grep /./, @$halfline_ref) {
9325 $self->add_returnsize(length join("",@$halfline_ref));
9326 if($opt::tag or defined $opt::tagstring) {
9327 # Prepend $tag the the remaining half line
9328 unshift @$halfline_ref, $self->tag();
9330 # Print the partial line (halfline)
9331 print $out_fd @{$self->{'halfline'}{$fdno}};
9332 # Buffer in memory for SQL and CSV-output
9333 if($Global::membuffer) {
9334 push(@{$self->{'output'}{$fdno}}, @$halfline_ref);
9336 @$halfline_ref = ();
9339 if($self->fh($fdno,"rpid") and
9340 CORE::kill 0, $self->fh($fdno,"rpid")) {
9341 # decompress still running
9342 } else {
9343 # decompress done: close fh
9344 close $in_fh;
9345 if($? and $opt::compress) {
9346 ::error($opt::decompress_program." failed.");
9347 $self->set_exitstatus(255);
9354 sub print_tag {
9355 return print_normal(@_);
9358 sub free_ressources() {
9359 my $self = shift;
9360 if(not $opt::ungroup) {
9361 for my $fdno (sort { $a <=> $b } keys %Global::fd) {
9362 close $self->fh($fdno,"w");
9363 close $self->fh($fdno,"r");
9368 sub print_normal {
9369 my $self = shift;
9370 my ($fdno,$in_fh,$out_fd) = @_;
9371 my $buf;
9372 close $self->fh($fdno,"w");
9373 if($? and $opt::compress) {
9374 ::error($opt::compress_program." failed.");
9375 $self->set_exitstatus(255);
9377 if(not $self->virgin()) {
9378 seek $in_fh, 0, 0;
9379 # $in_fh is now ready for reading at position 0
9380 my $outputlength = 0;
9381 my @output;
9383 if($opt::tag or $opt::tagstring) {
9384 # Read line by line
9385 local $/ = "\n";
9386 my $tag = $self->tag();
9387 while(<$in_fh>) {
9388 print $out_fd $tag,$_;
9389 $outputlength += length $_;
9390 if($Global::membuffer) {
9391 push @{$self->{'output'}{$fdno}}, $tag, $_;
9394 } else {
9395 while(sysread($in_fh,$buf,131072)) {
9396 print $out_fd $buf;
9397 $outputlength += length $buf;
9398 if($Global::membuffer) {
9399 push @{$self->{'output'}{$fdno}}, $buf;
9403 if($fdno == 1) {
9404 $self->add_returnsize($outputlength);
9406 close $in_fh;
9407 if($? and $opt::compress) {
9408 ::error($opt::decompress_program." failed.");
9409 $self->set_exitstatus(255);
9414 sub print_joblog {
9415 my $self = shift;
9416 my $cmd;
9417 if($Global::verbose <= 1) {
9418 $cmd = $self->replaced();
9419 } else {
9420 # Verbose level > 1: Print the rsync and stuff
9421 $cmd = "@command";
9423 # Newlines make it hard to parse the joblog
9424 $cmd =~ s/\n/\0/g;
9425 print $Global::joblog
9426 join("\t", $self->seq(), $self->sshlogin()->string(),
9427 $self->starttime(), sprintf("%10.3f",$self->runtime()),
9428 $self->transfersize(), $self->returnsize(),
9429 $self->exitstatus(), $self->exitsignal(), $cmd
9430 ). "\n";
9431 flush $Global::joblog;
9432 $self->set_job_in_joblog();
9435 sub tag {
9436 my $self = shift;
9437 if(not defined $self->{'tag'}) {
9438 if($opt::tag or defined $opt::tagstring) {
9439 $self->{'tag'} = $self->{'commandline'}->
9440 replace_placeholders([$opt::tagstring],0,0)."\t";
9441 } else {
9442 $self->{'tag'} = "";
9445 return $self->{'tag'};
9448 sub hostgroups {
9449 my $self = shift;
9450 if(not defined $self->{'hostgroups'}) {
9451 $self->{'hostgroups'} =
9452 $self->{'commandline'}->{'arg_list'}[0][0]->{'hostgroups'};
9454 return @{$self->{'hostgroups'}};
9457 sub exitstatus {
9458 my $self = shift;
9459 return $self->{'exitstatus'};
9462 sub set_exitstatus {
9463 my $self = shift;
9464 my $exitstatus = shift;
9465 if($exitstatus) {
9466 # Overwrite status if non-zero
9467 $self->{'exitstatus'} = $exitstatus;
9468 } else {
9469 # Set status but do not overwrite
9470 # Status may have been set by --timeout
9471 $self->{'exitstatus'} ||= $exitstatus;
9473 $opt::sqlworker and
9474 $Global::sql->update("SET Exitval = ? WHERE Seq = ".$self->seq(),
9475 $exitstatus);
9478 sub reset_exitstatus {
9479 my $self = shift;
9480 undef $self->{'exitstatus'};
9483 sub exitsignal {
9484 my $self = shift;
9485 return $self->{'exitsignal'};
9488 sub set_exitsignal {
9489 my $self = shift;
9490 my $exitsignal = shift;
9491 $self->{'exitsignal'} = $exitsignal;
9492 $opt::sqlworker and
9493 $Global::sql->update("SET _Signal = ? WHERE Seq = ".$self->seq(),
9494 $exitsignal);
9498 my $status_printed;
9499 my $total_jobs;
9501 sub should_we_halt {
9502 # Should we halt? Immediately? Gracefully?
9503 # Returns: N/A
9504 my $job = shift;
9505 my $limit;
9506 if($job->exitstatus() or $job->exitsignal()) {
9507 # Job failed
9508 $Global::exitstatus++;
9509 $Global::total_failed++;
9510 if($Global::halt_fail) {
9511 ::status("$Global::progname: This job failed:",
9512 $job->replaced());
9513 $limit = $Global::total_failed;
9515 } elsif($Global::halt_success) {
9516 ::status("$Global::progname: This job succeeded:",
9517 $job->replaced());
9518 $limit = $Global::total_completed - $Global::total_failed;
9520 if($Global::halt_done) {
9521 ::status("$Global::progname: This job finished:",
9522 $job->replaced());
9523 $limit = $Global::total_completed;
9525 if(not defined $limit) {
9526 return ""
9528 # --halt # => 1..100 (number of jobs failed, 101 means > 100)
9529 # --halt % => 1..100 (pct of jobs failed)
9530 if($Global::halt_pct and not $Global::halt_count) {
9531 $total_jobs ||= $Global::JobQueue->total_jobs();
9532 # From the pct compute the number of jobs that must fail/succeed
9533 $Global::halt_count = $total_jobs * $Global::halt_pct;
9535 if($limit >= $Global::halt_count) {
9536 # At least N jobs have failed/succeded/completed
9537 # or at least N% have failed/succeded/completed
9538 # So we should prepare for exit
9539 if($Global::halt_fail or $Global::halt_done) {
9540 # Set exit status
9541 if(not defined $Global::halt_exitstatus) {
9542 if($Global::halt_pct) {
9543 # --halt now,fail=X% or soon,fail=X%
9544 # --halt now,done=X% or soon,done=X%
9545 $Global::halt_exitstatus =
9546 ::ceil($Global::total_failed / $total_jobs * 100);
9547 } elsif($Global::halt_count) {
9548 # --halt now,fail=X or soon,fail=X
9549 # --halt now,done=X or soon,done=X
9550 $Global::halt_exitstatus =
9551 ::min($Global::total_failed,101);
9553 if($Global::halt_count and $Global::halt_count == 1) {
9554 # --halt now,fail=1 or soon,fail=1
9555 # --halt now,done=1 or soon,done=1
9556 # Emulate Bash's +128 if there is a signal
9557 $Global::halt_exitstatus =
9558 ($job->exitstatus()
9560 $job->exitsignal() ? $job->exitsignal() + 128 : 0);
9563 ::debug("halt","Pct: ",$Global::halt_pct,
9564 " count: ",$Global::halt_count,
9565 " status: ",$Global::halt_exitstatus,"\n");
9566 } elsif($Global::halt_success) {
9567 $Global::halt_exitstatus = 0;
9569 if($Global::halt_when eq "soon"
9571 (scalar(keys %Global::running) > 0
9573 $Global::max_jobs_running == 1)) {
9574 ::status
9575 ("$Global::progname: Starting no more jobs. ".
9576 "Waiting for ". (keys %Global::running).
9577 " jobs to finish.");
9578 $Global::start_no_new_jobs ||= 1;
9580 return($Global::halt_when);
9582 return "";
9587 package CommandLine;
9589 sub new {
9590 my $class = shift;
9591 my $seq = shift;
9592 my $commandref = shift;
9593 $commandref || die;
9594 my $arg_queue = shift;
9595 my $context_replace = shift;
9596 my $max_number_of_args = shift; # for -N and normal (-n1)
9597 my $transfer_files = shift;
9598 my $return_files = shift;
9599 my $replacecount_ref = shift;
9600 my $len_ref = shift;
9601 my %replacecount = %$replacecount_ref;
9602 my %len = %$len_ref;
9603 for (keys %$replacecount_ref) {
9604 # Total length of this replacement string {} replaced with all args
9605 $len{$_} = 0;
9607 return bless {
9608 'command' => $commandref,
9609 'seq' => $seq,
9610 'len' => \%len,
9611 'arg_list' => [],
9612 'arg_list_flat' => [],
9613 'arg_list_flat_orig' => [undef],
9614 'arg_queue' => $arg_queue,
9615 'max_number_of_args' => $max_number_of_args,
9616 'replacecount' => \%replacecount,
9617 'context_replace' => $context_replace,
9618 'transfer_files' => $transfer_files,
9619 'return_files' => $return_files,
9620 'replaced' => undef,
9621 }, ref($class) || $class;
9624 sub seq {
9625 my $self = shift;
9626 return $self->{'seq'};
9629 sub set_seq {
9630 my $self = shift;
9631 $self->{'seq'} = shift;
9634 sub slot {
9635 # Find the number of a free job slot and return it
9636 # Uses:
9637 # @Global::slots - list with free jobslots
9638 # Returns:
9639 # $jobslot = number of jobslot
9640 my $self = shift;
9641 if(not $self->{'slot'}) {
9642 if(not @Global::slots) {
9643 # $max_slot_number will typically be $Global::max_jobs_running
9644 push @Global::slots, ++$Global::max_slot_number;
9646 $self->{'slot'} = shift @Global::slots;
9648 return $self->{'slot'};
9652 my $already_spread;
9654 sub populate {
9655 # Add arguments from arg_queue until the number of arguments or
9656 # max line length is reached
9657 # Uses:
9658 # $Global::minimal_command_line_length
9659 # $opt::cat
9660 # $opt::fifo
9661 # $Global::JobQueue
9662 # $opt::m
9663 # $opt::X
9664 # $Global::max_jobs_running
9665 # Returns: N/A
9666 my $self = shift;
9667 my $next_arg;
9668 my $max_len = $Global::minimal_command_line_length
9669 || Limits::Command::max_length();
9671 if($opt::cat or $opt::fifo) {
9672 # Get the empty arg added by --pipepart (if any)
9673 $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get();
9674 # $PARALLEL_TMP will point to a tempfile that will be used as {}
9675 $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->
9676 unget([Arg->new('$PARALLEL_TMP')]);
9678 while (not $self->{'arg_queue'}->empty()) {
9679 $next_arg = $self->{'arg_queue'}->get();
9680 if(not defined $next_arg) {
9681 next;
9683 $self->push($next_arg);
9684 if($self->len() >= $max_len) {
9685 # Command length is now > max_length
9686 # If there are arguments: remove the last
9687 # If there are no arguments: Error
9688 # TODO stuff about -x opt_x
9689 if($self->number_of_args() > 1) {
9690 # There is something to work on
9691 $self->{'arg_queue'}->unget($self->pop());
9692 last;
9693 } else {
9694 my $args = join(" ", map { $_->orig() } @$next_arg);
9695 ::error("Command line too long (".
9696 $self->len(). " >= ".
9697 $max_len.
9698 ") at input ".
9699 $self->{'arg_queue'}->arg_number().
9700 ": ".
9701 ((length $args > 50) ?
9702 (substr($args,0,50))."..." :
9703 $args));
9704 $self->{'arg_queue'}->unget($self->pop());
9705 ::wait_and_exit(255);
9709 if(defined $self->{'max_number_of_args'}) {
9710 if($self->number_of_args() >= $self->{'max_number_of_args'}) {
9711 last;
9715 if(($opt::m or $opt::X) and not $already_spread
9716 and $self->{'arg_queue'}->empty() and $Global::max_jobs_running) {
9717 # -m or -X and EOF => Spread the arguments over all jobslots
9718 # (unless they are already spread)
9719 $already_spread ||= 1;
9720 if($self->number_of_args() > 1) {
9721 $self->{'max_number_of_args'} =
9722 ::ceil($self->number_of_args()/$Global::max_jobs_running);
9723 $Global::JobQueue->{'commandlinequeue'}->{'max_number_of_args'} =
9724 $self->{'max_number_of_args'};
9725 $self->{'arg_queue'}->unget($self->pop_all());
9726 while($self->number_of_args() < $self->{'max_number_of_args'}) {
9727 $self->push($self->{'arg_queue'}->get());
9730 $Global::JobQueue->flush_total_jobs();
9733 if($opt::sqlmaster) {
9734 # Insert the V1..Vn for this $seq in SQL table instead of generating one
9735 $Global::sql->insert_records($self->seq(), $self->{'command'},
9736 $self->{'arg_list_flat_orig'});
9741 sub push {
9742 # Add one or more records as arguments
9743 # Returns: N/A
9744 my $self = shift;
9745 my $record = shift;
9746 push @{$self->{'arg_list_flat_orig'}}, map { $_->orig() } @$record;
9747 push @{$self->{'arg_list_flat'}}, @$record;
9748 push @{$self->{'arg_list'}}, $record;
9749 # Make @arg available for {= =}
9750 *Arg::arg = $self->{'arg_list_flat_orig'};
9752 my $quote_arg = $Global::noquote ? 0 : not $Global::quoting;
9753 for my $perlexpr (keys %{$self->{'replacecount'}}) {
9754 if($perlexpr =~ /^(\d+) /) {
9755 # Positional
9756 defined($record->[$1-1]) or next;
9757 $self->{'len'}{$perlexpr} +=
9758 length $record->[$1-1]->replace($perlexpr,$quote_arg,$self);
9759 } else {
9760 for my $arg (@$record) {
9761 if(defined $arg) {
9762 $self->{'len'}{$perlexpr} +=
9763 length $arg->replace($perlexpr,$quote_arg,$self);
9770 sub pop {
9771 # Remove last argument
9772 # Returns:
9773 # the last record
9774 my $self = shift;
9775 my $record = pop @{$self->{'arg_list'}};
9776 # pop off arguments from @$record
9777 splice @{$self->{'arg_list_flat_orig'}}, -($#$record+1), $#$record+1;
9778 splice @{$self->{'arg_list_flat'}}, -($#$record+1), $#$record+1;
9779 my $quote_arg = $Global::noquote ? 0 : not $Global::quoting;
9780 for my $perlexpr (keys %{$self->{'replacecount'}}) {
9781 if($perlexpr =~ /^(\d+) /) {
9782 # Positional
9783 defined($record->[$1-1]) or next;
9784 $self->{'len'}{$perlexpr} -=
9785 length $record->[$1-1]->replace($perlexpr,$quote_arg,$self);
9786 } else {
9787 for my $arg (@$record) {
9788 if(defined $arg) {
9789 $self->{'len'}{$perlexpr} -=
9790 length $arg->replace($perlexpr,$quote_arg,$self);
9795 return $record;
9798 sub pop_all {
9799 # Remove all arguments and zeros the length of replacement perlexpr
9800 # Returns:
9801 # all records
9802 my $self = shift;
9803 my @popped = @{$self->{'arg_list'}};
9804 for my $perlexpr (keys %{$self->{'replacecount'}}) {
9805 $self->{'len'}{$perlexpr} = 0;
9807 $self->{'arg_list'} = [];
9808 $self->{'arg_list_flat_orig'} = [undef];
9809 $self->{'arg_list_flat'} = [];
9810 return @popped;
9813 sub number_of_args {
9814 # The number of records
9815 # Returns:
9816 # number of records
9817 my $self = shift;
9818 # This is really the number of records
9819 return $#{$self->{'arg_list'}}+1;
9822 sub number_of_recargs {
9823 # The number of args in records
9824 # Returns:
9825 # number of args records
9826 my $self = shift;
9827 my $sum = 0;
9828 my $nrec = scalar @{$self->{'arg_list'}};
9829 if($nrec) {
9830 $sum = $nrec * (scalar @{$self->{'arg_list'}[0]});
9832 return $sum;
9835 sub args_as_string {
9836 # Returns:
9837 # all unmodified arguments joined with ' ' (similar to {})
9838 my $self = shift;
9839 return (join " ", map { $_->orig() }
9840 map { @$_ } @{$self->{'arg_list'}});
9843 sub results_out {
9844 sub max_file_name_length {
9845 # Figure out the max length of a subdir
9846 # TODO and the max total length
9847 # Ext4 = 255,130816
9848 # Uses:
9849 # $Global::max_file_length is set
9850 # Returns:
9851 # $Global::max_file_length
9852 my $testdir = shift;
9854 my $upper = 8_000_000;
9855 # Dir length of 8 chars is supported everywhere
9856 my $len = 8;
9857 my $dir = "x"x$len;
9858 do {
9859 rmdir($testdir."/".$dir);
9860 $len *= 16;
9861 $dir = "x"x$len;
9862 } while ($len < $upper and mkdir $testdir."/".$dir);
9863 # Then search for the actual max length between $len/16 and $len
9864 my $min = $len/16;
9865 my $max = $len;
9866 while($max-$min > 5) {
9867 # If we are within 5 chars of the exact value:
9868 # it is not worth the extra time to find the exact value
9869 my $test = int(($min+$max)/2);
9870 $dir = "x"x$test;
9871 if(mkdir $testdir."/".$dir) {
9872 rmdir($testdir."/".$dir);
9873 $min = $test;
9874 } else {
9875 $max = $test;
9878 $Global::max_file_length = $min;
9879 return $min;
9882 my $self = shift;
9883 my $out = $self->replace_placeholders([$opt::results],0,0);
9884 if($out eq $opt::results) {
9885 # $opt::results simple string: Append args_as_dirname
9886 my $args_as_dirname = $self->args_as_dirname();
9887 # Output in: prefix/name1/val1/name2/val2/stdout
9888 $out = $opt::results."/".$args_as_dirname;
9889 if(-d $out or eval{ File::Path::mkpath($out); }) {
9890 # OK
9891 } else {
9892 # mkpath failed: Argument probably too long.
9893 # Set $Global::max_file_length, which will keep the individual
9894 # dir names shorter than the max length
9895 max_file_name_length($opt::results);
9896 $args_as_dirname = $self->args_as_dirname();
9897 # prefix/name1/val1/name2/val2/
9898 $out = $opt::results."/".$args_as_dirname;
9899 File::Path::mkpath($out);
9901 $out .="/";
9902 } else {
9903 if($out =~ m:/$:) {
9904 # / = dir
9905 if(-d $out or eval{ File::Path::mkpath($out); }) {
9906 # OK
9907 } else {
9908 ::error("Cannot make dir '$out'.");
9909 ::wait_and_exit(255);
9911 } else {
9912 $out =~ m:(.*)/:;
9913 File::Path::mkpath($1);
9916 return $out;
9919 sub args_as_dirname {
9920 # Returns:
9921 # all unmodified arguments joined with '/' (similar to {})
9922 # \t \0 \\ and / are quoted as: \t \0 \\ \_
9923 # If $Global::max_file_length: Keep subdirs < $Global::max_file_length
9924 my $self = shift;
9925 my @res = ();
9927 for my $rec_ref (@{$self->{'arg_list'}}) {
9928 # If headers are used, sort by them.
9929 # Otherwise keep the order from the command line.
9930 my @header_indexes_sorted = header_indexes_sorted($#$rec_ref+1);
9931 for my $n (@header_indexes_sorted) {
9932 CORE::push(@res,
9933 $Global::input_source_header{$n},
9934 map { my $s = $_;
9935 # \t \0 \\ and / are quoted as: \t \0 \\ \_
9936 $s =~ s/\\/\\\\/g;
9937 $s =~ s/\t/\\t/g;
9938 $s =~ s/\0/\\0/g;
9939 $s =~ s:/:\\_:g;
9940 if($Global::max_file_length) {
9941 # Keep each subdir shorter than the longest
9942 # allowed file name
9943 $s = substr($s,0,$Global::max_file_length);
9945 $s; }
9946 $rec_ref->[$n-1]->orig());
9949 return join "/", @res;
9952 sub header_indexes_sorted {
9953 # Sort headers first by number then by name.
9954 # E.g.: 1a 1b 11a 11b
9955 # Returns:
9956 # Indexes of %Global::input_source_header sorted
9957 my $max_col = shift;
9959 no warnings 'numeric';
9960 for my $col (1 .. $max_col) {
9961 # Make sure the header is defined. If it is not: use column number
9962 if(not defined $Global::input_source_header{$col}) {
9963 $Global::input_source_header{$col} = $col;
9966 my @header_indexes_sorted = sort {
9967 # Sort headers numerically then asciibetically
9968 $Global::input_source_header{$a} <=> $Global::input_source_header{$b}
9970 $Global::input_source_header{$a} cmp $Global::input_source_header{$b}
9971 } 1 .. $max_col;
9972 return @header_indexes_sorted;
9975 sub len {
9976 # Uses:
9977 # $opt::shellquote
9978 # The length of the command line with args substituted
9979 my $self = shift;
9980 my $len = 0;
9981 # Add length of the original command with no args
9982 # Length of command w/ all replacement args removed
9983 $len += $self->{'len'}{'noncontext'} + @{$self->{'command'}} -1;
9984 ::debug("length", "noncontext + command: $len\n");
9985 my $recargs = $self->number_of_recargs();
9986 if($self->{'context_replace'}) {
9987 # Context is duplicated for each arg
9988 $len += $recargs * $self->{'len'}{'context'};
9989 for my $replstring (keys %{$self->{'replacecount'}}) {
9990 # If the replacements string is more than once: mulitply its length
9991 $len += $self->{'len'}{$replstring} *
9992 $self->{'replacecount'}{$replstring};
9993 ::debug("length", $replstring, " ", $self->{'len'}{$replstring}, "*",
9994 $self->{'replacecount'}{$replstring}, "\n");
9996 # echo 11 22 33 44 55 66 77 88 99 1010
9997 # echo 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10
9998 # 5 + ctxgrp*arg
9999 ::debug("length", "Ctxgrp: ", $self->{'len'}{'contextgroups'},
10000 " Groups: ", $self->{'len'}{'noncontextgroups'}, "\n");
10001 # Add space between context groups
10002 $len += ($recargs-1) * ($self->{'len'}{'contextgroups'});
10003 } else {
10004 # Each replacement string may occur several times
10005 # Add the length for each time
10006 $len += 1*$self->{'len'}{'context'};
10007 ::debug("length", "context+noncontext + command: $len\n");
10008 for my $replstring (keys %{$self->{'replacecount'}}) {
10009 # (space between regargs + length of replacement)
10010 # * number this replacement is used
10011 $len += ($recargs -1 + $self->{'len'}{$replstring}) *
10012 $self->{'replacecount'}{$replstring};
10015 if(defined $Global::parallel_env) {
10016 # If we are using --env, add the prefix for that, too.
10017 $len += length $Global::parallel_env;
10019 if($Global::quoting) {
10020 # Pessimistic length if -q is set
10021 # Worse than worst case: every char needs to be quoted with \
10022 $len *= 2;
10024 if($opt::shellquote) {
10025 # Pessimistic length if --shellquote is set
10026 # Worse than worst case: every char needs to be quoted with \ twice
10027 $len *= 4;
10029 if(@opt::sshlogin) {
10030 # Pessimistic length if remote
10031 # Worst case is BASE64 encoding 3 bytes -> 4 bytes
10032 $len = int($len*4/3);
10035 return $len;
10038 sub replaced {
10039 # Uses:
10040 # $Global::noquote
10041 # $Global::quoting
10042 # Returns:
10043 # $replaced = command with place holders replaced and prepended
10044 my $self = shift;
10045 if(not defined $self->{'replaced'}) {
10046 # Don't quote arguments if the input is the full command line
10047 my $quote_arg = $Global::noquote ? 0 : not $Global::quoting;
10048 # or if ($opt::cat or $opt::pipe) as they use $PARALLEL_TMP
10049 $quote_arg = ($opt::cat || $opt::fifo) ? 0 : $quote_arg;
10050 $self->{'replaced'} = $self->
10051 replace_placeholders($self->{'command'},$Global::quoting,
10052 $quote_arg);
10053 my $len = length $self->{'replaced'};
10054 if ($len != $self->len()) {
10055 ::debug("length", $len, " != ", $self->len(),
10056 " ", $self->{'replaced'}, "\n");
10057 } else {
10058 ::debug("length", $len, " == ", $self->len(),
10059 " ", $self->{'replaced'}, "\n");
10062 return $self->{'replaced'};
10065 sub replace_placeholders {
10066 # Replace foo{}bar with fooargbar
10067 # Input:
10068 # $targetref = command as shell words
10069 # $quote = should everything be quoted?
10070 # $quote_arg = should replaced arguments be quoted?
10071 # Uses:
10072 # @Arg::arg = arguments as strings to be use in {= =}
10073 # Returns:
10074 # @target with placeholders replaced
10075 my $self = shift;
10076 my $targetref = shift;
10077 my $quote = shift;
10078 my $quote_arg = shift;
10079 my %replace;
10081 # Token description:
10082 # \0spc = unquoted space
10083 # \0end = last token element
10084 # \0ign = dummy token to be ignored
10085 # \257<...\257> = replacement expression
10086 # " " = quoted space, that splits -X group
10087 # text = normal text - possibly part of -X group
10088 my $spacer = 0;
10089 my @tokens = grep { length $_ > 0 } map {
10090 if(/^\257<|^ $/) {
10091 # \257<...\257> or space
10093 } else {
10094 # Split each space/tab into a token
10095 split /(?=\s)|(?<=\s)/
10098 # Split \257< ... \257> into own token
10099 map { split /(?=\257<)|(?<=\257>)/ }
10100 # Insert "\0spc" between every element
10101 # This space should never be quoted
10102 map { $spacer++ ? ("\0spc",$_) : $_ }
10103 map { $_ eq "" ? "\0empty" : $_ }
10104 @$targetref;
10106 if(not @tokens) {
10107 # @tokens is empty: Return empty array
10108 return @tokens;
10110 ::debug("replace", "Tokens ".join":",@tokens,"\n");
10111 # Make it possible to use $arg[2] in {= =}
10112 *Arg::arg = $self->{'arg_list_flat_orig'};
10113 # Flat list:
10114 # $self->{'arg_list'} = [ [Arg11, Arg12], [Arg21, Arg22], [Arg31, Arg32] ]
10115 # $self->{'arg_list_flat'} = [ Arg11, Arg12, Arg21, Arg22, Arg31, Arg32 ]
10116 if(not @{$self->{'arg_list_flat'}}) {
10117 @{$self->{'arg_list_flat'}} = Arg->new("");
10119 my $argref = $self->{'arg_list_flat'};
10120 # Number of arguments - used for positional arguments
10121 my $n = $#$argref+1;
10123 # $self is actually a CommandLine-object,
10124 # but it looks nice to be able to say {= $job->slot() =}
10125 my $job = $self;
10126 # @replaced = tokens with \257< \257> replaced
10127 my @replaced;
10128 if($self->{'context_replace'}) {
10129 my @ctxgroup;
10130 for my $t (@tokens,"\0end") {
10131 # \0end = last token was end of tokens.
10132 if($t eq "\t" or $t eq " " or $t eq "\0end" or $t eq "\0spc") {
10133 # Context group complete: Replace in it
10134 if(grep { /^\257</ } @ctxgroup) {
10135 # Context group contains a replacement string:
10136 # Copy once per arg
10137 my $space = "\0ign";
10138 for my $arg (@$argref) {
10139 my $normal_replace;
10140 # Push output
10141 # Put unquoted space before each context group
10142 # except the first
10143 CORE::push @replaced, $space, map {
10144 $a = $_;
10145 $a =~
10146 s{\257<(-?\d+)?(.*)\257>}
10148 if($1) {
10149 # Positional replace
10150 # Find the relevant arg and replace it
10151 ($argref->[$1 > 0 ? $1-1 : $n+$1] ? # If defined: replace
10152 $argref->[$1 > 0 ? $1-1 : $n+$1]->
10153 replace($2,$quote_arg,$self)
10154 : "");
10155 } else {
10156 # Normal replace
10157 $normal_replace ||= 1;
10158 ($arg ? $arg->replace($2,$quote_arg,$self) : "");
10160 }sgxe;
10162 } @ctxgroup;
10163 $normal_replace or last;
10164 $space = "\0spc";
10166 } else {
10167 # Context group has no a replacement string: Copy it once
10168 CORE::push @replaced, @ctxgroup;
10170 # New context group
10171 @ctxgroup=();
10173 if($t eq "\0spc" or $t eq " ") {
10174 CORE::push @replaced,$t;
10175 } else {
10176 CORE::push @ctxgroup,$t;
10179 } else {
10180 # @group = @token
10181 # Replace in group
10182 # Push output
10183 # repquote = no if {} first on line, no if $quote, yes otherwise
10184 for my $t (@tokens) {
10185 if($t =~ /^\257</) {
10186 my $space = "\0ign";
10187 for my $arg (@$argref) {
10188 my $normal_replace;
10189 $a = $t;
10190 $a =~
10191 s{\257<(-?\d+)?(.*)\257>}
10193 if($1) {
10194 # Positional replace
10195 # Find the relevant arg and replace it
10196 ($argref->[$1 > 0 ? $1-1 : $n+$1] ? # If defined: replace
10197 $argref->[$1 > 0 ? $1-1 : $n+$1]->
10198 replace($2,$quote_arg,$self)
10199 : "");
10200 } else {
10201 # Normal replace
10202 $normal_replace ||= 1;
10203 ($arg ? $arg->replace($2,$quote_arg,$self) : "");
10205 }sgxe;
10206 CORE::push @replaced, $space, $a;
10207 $normal_replace or last;
10208 $space = "\0spc";
10210 } else {
10211 # No replacement
10212 CORE::push @replaced, $t;
10216 *Arg::arg = [];
10217 ::debug("replace","Replaced: ".join":",@replaced,"\n");
10218 if($Global::escape_string_present) {
10219 # Command line contains \257: Unescape it \257\256 => \257
10220 # If a replacement resulted in \257\256
10221 # it will have been escaped into \\\257\\\\256
10222 # and will not be matched below
10223 for(@replaced) {
10224 s/\257\256/\257/g;
10228 # Put tokens into groups that may be quoted.
10229 my @quotegroup;
10230 my @quoted;
10231 for (map { $_ eq "\0empty" ? "" : $_ }
10232 grep { $_ ne "\0ign" }
10233 grep { $_ !~ /\0noarg/ }
10234 @replaced, "\0end") {
10235 if($_ eq "\0spc" or $_ eq "\0end") {
10236 # \0spc splits quotable groups
10237 if($quote) {
10238 CORE::push @quoted, ::Q(join"",@quotegroup);
10239 } else {
10240 CORE::push @quoted, join"",@quotegroup;
10242 @quotegroup = ();
10243 } else {
10244 CORE::push @quotegroup, $_;
10247 ::debug("replace","Quoted: ".join":",@quoted,"\n");
10248 return wantarray ? @quoted : "@quoted";
10251 sub skip {
10252 # Skip this job
10253 my $self = shift;
10254 $self->{'skip'} = 1;
10258 package CommandLineQueue;
10260 sub new {
10261 my $class = shift;
10262 my $commandref = shift;
10263 my $read_from = shift;
10264 my $context_replace = shift || 0;
10265 my $max_number_of_args = shift;
10266 my $transfer_files = shift;
10267 my $return_files = shift;
10268 my @unget = ();
10269 my $posrpl;
10270 my ($replacecount_ref, $len_ref);
10271 my @command = @$commandref;
10272 my $seq = 1;
10273 # Replace replacement strings with {= perl expr =}
10274 # '{=' 'perlexpr' '=}' => '{= perlexpr =}'
10275 @command = merge_rpl_parts(@command);
10277 # Protect matching inside {= perl expr =}
10278 # by replacing {= and =} with \257< and \257>
10279 # in options that can contain replacement strings:
10280 # @command, --transferfile, --return,
10281 # --tagstring, --workdir, --results
10282 for(@command, @$transfer_files, @$return_files,
10283 $opt::tagstring, $opt::workdir, $opt::results, $opt::retries) {
10284 # Skip if undefined
10285 $_ or next;
10286 # Escape \257 => \257\256
10287 $Global::escape_string_present += s/\257/\257\256/g;
10288 # Needs to match rightmost left parens (Perl defaults to leftmost)
10289 # to deal with: {={==} and {={==}=}
10290 # Replace {= -> \257< and =} -> \257>
10292 # Complex way to do:
10293 # s/{=(.*)=}/\257<$1\257>/g
10294 # which would not work
10295 s[\Q$Global::parensleft\E # Match {=
10296 # Match . unless the next string is {= or =}
10297 # needed to force matching the shortest {= =}
10298 ((?:(?! \Q$Global::parensleft\E|\Q$Global::parensright\E ).)*?)
10299 \Q$Global::parensright\E ] # Match =}
10300 {\257<$1\257>}gxs;
10301 for my $rpl (sort { length $b <=> length $a } keys %Global::rpl) {
10302 # Replace long --rpl's before short ones, as a short may be a
10303 # substring of a long:
10304 # --rpl '% s/a/b/' --rpl '%% s/b/a/'
10306 # Replace the shorthand string (--rpl)
10307 # with the {= perl expr =}
10309 # Avoid searching for shorthand strings inside existing {= perl expr =}
10311 # Replace $$1 in {= perl expr =} with groupings in shorthand string
10313 # --rpl '{/(\.\S+)/(\.\S+)} s/$$1/$$2/g;'
10314 # echo {/.tar/.gz} ::: UU.tar.gz
10315 my ($prefix,$grp_regexp,$postfix) =
10316 $rpl =~ /^( [^(]* ) # Prefix - e.g. {%%
10317 ( \(.*\) )? # Group capture regexp - e.g (.*)
10318 ( [^)]* )$ # Postfix - e.g }
10319 /xs;
10320 $grp_regexp ||= '';
10321 my $rplval = $Global::rpl{$rpl};
10322 while(s{( (?: ^|\257> ) (?: [^\257]*|[\257][^<>] )*? )
10323 # Don't replace after \257 unless \257>
10324 \Q$prefix\E $grp_regexp \Q$postfix\E}
10326 # The start remains the same
10327 my $unchanged = $1;
10328 # Dummy entry to start at 1.
10329 my @grp = (1);
10330 # $2 = first ()-group in $grp_regexp
10331 # Put $2 in $grp[1], Put $3 in $grp[2]
10332 # so first ()-group in $grp_regexp is $grp[1];
10333 for(my $i = 2; defined $grp[$#grp]; $i++) {
10334 push @grp, eval '$'.$i;
10336 my $rv = $rplval;
10337 # replace $$1 with $_pAr_gRp1, $$2 with $_pAr_gRp2
10338 # in the code to be executed
10339 $rv =~ s/\$\$ (\d+)/\$_pAr_gRp$1/gx;
10340 # prepend with $_pAr_gRp1 = perlquote($1),
10341 my $set_args = "";
10342 for(my $i = 1;defined $grp[$i]; $i++) {
10343 $set_args .= "\$_pAr_gRp$i = \"" .
10344 ::perl_quote_scalar($grp[$i]) . "\";";
10346 $unchanged . "\257<" . $set_args . $rv . "\257>"
10347 }gxes) {
10349 # Do the same for the positional replacement strings
10350 $posrpl = $rpl;
10351 if($posrpl =~ s/^\{//) {
10352 # Only do this if the shorthand start with {
10353 $prefix=~s/^\{//;
10354 # Don't replace after \257 unless \257>
10355 while(s{( (?: ^|\257> ) (?: [^\257]*|[\257][^<>] )*? )
10356 \{(-?\d+) \s* \Q$prefix\E $grp_regexp \Q$postfix\E}
10358 # The start remains the same
10359 my $unchanged = $1;
10360 my $position = $2;
10361 # Dummy entry to start at 1.
10362 my @grp = (1);
10363 # $3 = first ()-group in $grp_regexp
10364 # Put $3 in $grp[1], Put $4 in $grp[2]
10365 # so first ()-group in $grp_regexp is $grp[1];
10366 for(my $i = 3; defined $grp[$#grp]; $i++) {
10367 push @grp, eval '$'.$i;
10369 my $rv = $rplval;
10370 # replace $$1 with $_pAr_gRp1, $$2 with $_pAr_gRp2
10371 # in the code to be executed
10372 $rv =~ s/\$\$ (\d+)/\$_pAr_gRp$1/gx;
10373 # prepend with $_pAr_gRp1 = perlquote($1),
10374 my $set_args = "";
10375 for(my $i = 1;defined $grp[$i]; $i++) {
10376 $set_args .= "\$_pAr_gRp$i = \"" .
10377 ::perl_quote_scalar($grp[$i]) . "\";";
10379 $unchanged . "\257<" . $position . $set_args . $rv . "\257>"
10380 }gxes) {
10386 # Add {} if no replacement strings in @command
10387 ($replacecount_ref, $len_ref, @command) =
10388 replacement_counts_and_lengths($transfer_files,$return_files,@command);
10389 if("@command" =~ /^[^ \t\n=]*\257</) {
10390 # Replacement string is (part of) the command (and not just
10391 # argument or variable definition V1={})
10392 # E.g. parallel {}, parallel my_{= s/_//=}, parallel {2}
10393 # Do no quote (Otherwise it will fail if the input contains spaces)
10394 $Global::noquote = 1;
10397 if($opt::sqlmaster and $Global::sql->append()) {
10398 $seq = $Global::sql->max_seq() + 1;
10401 return bless {
10402 'unget' => \@unget,
10403 'command' => \@command,
10404 'replacecount' => $replacecount_ref,
10405 'arg_queue' => RecordQueue->new($read_from,$opt::colsep),
10406 'context_replace' => $context_replace,
10407 'len' => $len_ref,
10408 'max_number_of_args' => $max_number_of_args,
10409 'size' => undef,
10410 'transfer_files' => $transfer_files,
10411 'return_files' => $return_files,
10412 'seq' => $seq,
10413 }, ref($class) || $class;
10416 sub merge_rpl_parts {
10417 # '{=' 'perlexpr' '=}' => '{= perlexpr =}'
10418 # Input:
10419 # @in = the @command as given by the user
10420 # Uses:
10421 # $Global::parensleft
10422 # $Global::parensright
10423 # Returns:
10424 # @command with parts merged to keep {= and =} as one
10425 my @in = @_;
10426 my @out;
10427 my $l = quotemeta($Global::parensleft);
10428 my $r = quotemeta($Global::parensright);
10430 while(@in) {
10431 my $s = shift @in;
10432 $_ = $s;
10433 # Remove matching (right most) parens
10434 while(s/(.*)$l.*?$r/$1/os) {}
10435 if(/$l/o) {
10436 # Missing right parens
10437 while(@in) {
10438 $s .= " ".shift @in;
10439 $_ = $s;
10440 while(s/(.*)$l.*?$r/$1/os) {}
10441 if(not /$l/o) {
10442 last;
10446 push @out, $s;
10448 return @out;
10451 sub replacement_counts_and_lengths {
10452 # Count the number of different replacement strings.
10453 # Find the lengths of context for context groups and non-context
10454 # groups.
10455 # If no {} found in @command: add it to @command
10457 # Input:
10458 # \@transfer_files = array of filenames to transfer
10459 # \@return_files = array of filenames to return
10460 # @command = command template
10461 # Output:
10462 # \%replacecount, \%len, @command
10463 my $transfer_files = shift;
10464 my $return_files = shift;
10465 my @command = @_;
10466 my (%replacecount,%len);
10467 my $sum = 0;
10468 while($sum == 0) {
10469 # Count how many times each replacement string is used
10470 my @cmd = @command;
10471 my $contextlen = 0;
10472 my $noncontextlen = 0;
10473 my $contextgroups = 0;
10474 for my $c (@cmd) {
10475 while($c =~ s/ \257<( (?: [^\257]*|[\257][^<>] )*?)\257> /\000/xs) {
10476 # %replacecount = { "perlexpr" => number of times seen }
10477 # e.g { "s/a/b/" => 2 }
10478 $replacecount{$1}++;
10479 $sum++;
10481 # Measure the length of the context around the {= perl expr =}
10482 # Use that {=...=} has been replaced with \000 above
10483 # So there is no need to deal with \257<
10484 while($c =~ s/ (\S*\000\S*) //xs) {
10485 my $w = $1;
10486 $w =~ tr/\000//d; # Remove all \000's
10487 $contextlen += length($w);
10488 $contextgroups++;
10490 # All {= perl expr =} have been removed: The rest is non-context
10491 $noncontextlen += length $c;
10493 for(@$transfer_files, @$return_files,
10494 $opt::tagstring, $opt::workdir, $opt::results, $opt::retries) {
10495 # Options that can contain replacement strings
10496 $_ or next;
10497 my $t = $_;
10498 while($t =~ s/ \257<( (?: [^\257]*|[\257][^<>] )* )\257> //xs) {
10499 # %replacecount = { "perlexpr" => number of times seen }
10500 # e.g { "$_++" => 2 }
10501 # But for tagstring we just need to mark it as seen
10502 $replacecount{$1} ||= 1;
10505 if($opt::bar) {
10506 # If the command does not contain {} force it to be computed
10507 # as it is being used by --bar
10508 $replacecount{""} ||= 1;
10511 $len{'context'} = 0+$contextlen;
10512 $len{'noncontext'} = $noncontextlen;
10513 $len{'contextgroups'} = $contextgroups;
10514 $len{'noncontextgroups'} = @cmd-$contextgroups;
10515 ::debug("length", "@command Context: ", $len{'context'},
10516 " Non: ", $len{'noncontext'}, " Ctxgrp: ", $len{'contextgroups'},
10517 " NonCtxGrp: ", $len{'noncontextgroups'}, "\n");
10518 if($sum == 0) {
10519 if(not @command) {
10520 # Default command = {}
10521 @command = ("\257<\257>");
10522 } elsif(($opt::pipe or $opt::pipepart)
10523 and not $opt::fifo and not $opt::cat) {
10524 # With --pipe / --pipe-part you can have no replacement
10525 last;
10526 } else {
10527 # Append {} to the command if there are no {...}'s and no {=...=}
10528 push @command, ("\257<\257>");
10532 return(\%replacecount,\%len,@command);
10535 sub get {
10536 my $self = shift;
10537 if(@{$self->{'unget'}}) {
10538 my $cmd_line = shift @{$self->{'unget'}};
10539 return ($cmd_line);
10540 } else {
10541 if($opt::sqlworker) {
10542 # Get the sequence number from the SQL table
10543 $self->set_seq($SQL::next_seq);
10544 # Get the command from the SQL table
10545 $self->{'command'} = $SQL::command_ref;
10546 my @command;
10547 # Recompute replace counts based on the read command
10548 ($self->{'replacecount'},
10549 $self->{'len'}, @command) =
10550 replacement_counts_and_lengths($self->{'transfer_files'},
10551 $self->{'return_files'},
10552 @$SQL::command_ref);
10553 if("@command" =~ /^[^ \t\n=]*\257</) {
10554 # Replacement string is (part of) the command (and not just
10555 # argument or variable definition V1={})
10556 # E.g. parallel {}, parallel my_{= s/_//=}, parallel {2}
10557 # Do no quote (Otherwise it will fail if the input contains spaces)
10558 $Global::noquote = 1;
10562 my $cmd_line = CommandLine->new($self->seq(),
10563 $self->{'command'},
10564 $self->{'arg_queue'},
10565 $self->{'context_replace'},
10566 $self->{'max_number_of_args'},
10567 $self->{'transfer_files'},
10568 $self->{'return_files'},
10569 $self->{'replacecount'},
10570 $self->{'len'},
10572 $cmd_line->populate();
10573 ::debug("init","cmd_line->number_of_args ",
10574 $cmd_line->number_of_args(), "\n");
10575 if(not $Global::no_more_input and ($opt::pipe or $opt::pipepart)) {
10576 if($cmd_line->replaced() eq "") {
10577 # Empty command - pipe requires a command
10578 ::error("--pipe/--pipepart must have a command to pipe into ".
10579 "(e.g. 'cat').");
10580 ::wait_and_exit(255);
10582 } elsif($cmd_line->number_of_args() == 0) {
10583 # We did not get more args - maybe at EOF string?
10584 return undef;
10586 $self->set_seq($self->seq()+1);
10587 return $cmd_line;
10591 sub unget {
10592 my $self = shift;
10593 unshift @{$self->{'unget'}}, @_;
10596 sub empty {
10597 my $self = shift;
10598 my $empty = (not @{$self->{'unget'}}) &&
10599 $self->{'arg_queue'}->empty();
10600 ::debug("run", "CommandLineQueue->empty $empty");
10601 return $empty;
10604 sub seq {
10605 my $self = shift;
10606 return $self->{'seq'};
10609 sub set_seq {
10610 my $self = shift;
10611 $self->{'seq'} = shift;
10614 sub quote_args {
10615 my $self = shift;
10616 # If there is not command emulate |bash
10617 return $self->{'command'};
10621 package Limits::Command;
10623 # Maximal command line length (for -m and -X)
10624 sub max_length {
10625 # Find the max_length of a command line and cache it
10626 # Returns:
10627 # number of chars on the longest command line allowed
10628 if(not $Limits::Command::line_max_len) {
10629 # Disk cache of max command line length
10630 my $len_cache = $Global::cache_dir . "/tmp/sshlogin/" . ::hostname() .
10631 "/linelen";
10632 my $cached_limit;
10633 if(-e $len_cache) {
10634 open(my $fh, "<", $len_cache) || ::die_bug("Cannot read $len_cache");
10635 $cached_limit = <$fh>;
10636 close $fh;
10637 } else {
10638 $cached_limit = real_max_length();
10639 # If $HOME is write protected: Do not fail
10640 my $dir = ::dirname($len_cache);
10641 -d $dir or eval { File::Path::mkpath($dir); };
10642 open(my $fh, ">", $len_cache);
10643 print $fh $cached_limit;
10644 close $fh;
10646 $Limits::Command::line_max_len = tmux_length($cached_limit);
10647 if($opt::max_chars) {
10648 if($opt::max_chars <= $cached_limit) {
10649 $Limits::Command::line_max_len = $opt::max_chars;
10650 } else {
10651 ::warning("Value for -s option should be < $cached_limit.");
10655 return int($Limits::Command::line_max_len);
10658 sub real_max_length {
10659 # Find the max_length of a command line
10660 # Returns:
10661 # The maximal command line length
10662 # Use an upper bound of 8 MB if the shell allows for infinite long lengths
10663 my $upper = 8_000_000;
10664 my $len = 8;
10665 do {
10666 if($len > $upper) { return $len };
10667 $len *= 16;
10668 } while (is_acceptable_command_line_length($len));
10669 # Then search for the actual max length between 0 and upper bound
10670 return binary_find_max_length(int($len/16),$len);
10673 sub binary_find_max_length {
10674 # Given a lower and upper bound find the max_length of a command line
10675 # Returns:
10676 # number of chars on the longest command line allowed
10677 my ($lower, $upper) = (@_);
10678 if($lower == $upper or $lower == $upper-1) { return $lower; }
10679 my $middle = int (($upper-$lower)/2 + $lower);
10680 ::debug("init", "Maxlen: $lower,$upper,$middle : ");
10681 if (is_acceptable_command_line_length($middle)) {
10682 return binary_find_max_length($middle,$upper);
10683 } else {
10684 return binary_find_max_length($lower,$middle);
10688 sub is_acceptable_command_line_length {
10689 # Test if a command line of this length can run
10690 # in the current environment
10691 # Returns:
10692 # 0 if the command line length is too long
10693 # 1 otherwise
10694 my $len = shift;
10695 if($Global::parallel_env) {
10696 $len += length $Global::parallel_env;
10698 ::qqx("true "."x"x$len);
10699 ::debug("init", "$len=$? ");
10700 return not $?;
10703 sub tmux_length {
10704 # If $opt::tmux set, find the limit for tmux
10705 # tmux 1.8 has a 2kB limit
10706 # tmux 1.9 has a 16kB limit
10707 # tmux 2.0 has a 16kB limit
10708 # tmux 2.1 has a 16kB limit
10709 # tmux 2.2 has a 16kB limit
10710 # Input:
10711 # $len = maximal command line length
10712 # Returns:
10713 # $tmux_len = maximal length runable in tmux
10714 local $/ = "\n";
10715 my $len = shift;
10716 if($opt::tmux) {
10717 $ENV{'PARALLEL_TMUX'} ||= "tmux";
10718 if(not ::which($ENV{'PARALLEL_TMUX'})) {
10719 ::error($ENV{'PARALLEL_TMUX'}." not found in \$PATH.");
10720 ::wait_and_exit(255);
10722 my @out;
10723 for my $l (1, 2020, 16320, 100000, $len) {
10724 my $tmpfile = ::tmpname("tms");
10725 my $tmuxcmd = $ENV{'PARALLEL_TMUX'}.
10726 " -S $tmpfile new-session -d -n echo $l".
10727 ("x"x$l). " && echo $l; rm -f $tmpfile";
10728 push @out, ::qqx($tmuxcmd);
10729 ::rm($tmpfile);
10731 ::debug("tmux","tmux-out ",@out);
10732 chomp @out;
10733 # The arguments is given 3 times on the command line
10734 # and the wrapping is around 30 chars
10735 # (29 for tmux1.9, 33 for tmux1.8)
10736 my $tmux_len = ::max(@out);
10737 $len = ::min($len,int($tmux_len/4-33));
10738 ::debug("tmux","tmux-length ",$len);
10740 return $len;
10744 package RecordQueue;
10746 sub new {
10747 my $class = shift;
10748 my $fhs = shift;
10749 my $colsep = shift;
10750 my @unget = ();
10751 my $arg_sub_queue;
10752 if($opt::sqlworker) {
10753 # Open SQL table
10754 $arg_sub_queue = SQLRecordQueue->new();
10755 } elsif(defined $colsep) {
10756 # Open one file with colsep or CSV
10757 $arg_sub_queue = RecordColQueue->new($fhs);
10758 } else {
10759 # Open one or more files if multiple -a
10760 $arg_sub_queue = MultifileQueue->new($fhs);
10762 return bless {
10763 'unget' => \@unget,
10764 'arg_number' => 0,
10765 'arg_sub_queue' => $arg_sub_queue,
10766 }, ref($class) || $class;
10769 sub get {
10770 # Returns:
10771 # reference to array of Arg-objects
10772 my $self = shift;
10773 if(@{$self->{'unget'}}) {
10774 $self->{'arg_number'}++;
10775 # Flush cached computed replacements in Arg-objects
10776 # To fix: parallel --bar echo {%} ::: a b c ::: d e f
10777 my $ret = shift @{$self->{'unget'}};
10778 if($ret) {
10779 map { $_->flush_cache() } @$ret;
10781 return $ret;
10783 my $ret = $self->{'arg_sub_queue'}->get();
10784 if($ret) {
10785 if(grep { index($_->orig(),"\0") > 0 } @$ret) {
10786 # Allow for \0 in position 0 because GNU Parallel uses "\0noarg"
10787 # to mean no-string
10788 ::warning("A NUL character in the input was replaced with \\0.",
10789 "NUL cannot be passed through in the argument list.",
10790 "Did you mean to use the --null option?");
10791 for(grep { index($_->orig(),"\0") > 0 } @$ret) {
10792 # Replace \0 with \\0
10793 my $a = $_->orig();
10794 $a =~ s/\0/\\0/g;
10795 $_->set_orig($a);
10798 if(defined $Global::max_number_of_args
10799 and $Global::max_number_of_args == 0) {
10800 ::debug("run", "Read 1 but return 0 args\n");
10801 # \0noarg => nothing (not the empty string)
10802 map { $_->set_orig("\0noarg"); } @$ret;
10804 # Flush cached computed replacements in Arg-objects
10805 # To fix: parallel --bar echo {%} ::: a b c ::: d e f
10806 map { $_->flush_cache() } @$ret;
10808 return $ret;
10811 sub unget {
10812 my $self = shift;
10813 ::debug("run", "RecordQueue-unget '@_'\n");
10814 $self->{'arg_number'} -= @_;
10815 unshift @{$self->{'unget'}}, @_;
10818 sub empty {
10819 my $self = shift;
10820 my $empty = (not @{$self->{'unget'}}) &&
10821 $self->{'arg_sub_queue'}->empty();
10822 ::debug("run", "RecordQueue->empty $empty");
10823 return $empty;
10826 sub arg_number {
10827 my $self = shift;
10828 return $self->{'arg_number'};
10832 package RecordColQueue;
10834 sub new {
10835 my $class = shift;
10836 my $fhs = shift;
10837 my @unget = ();
10838 my $arg_sub_queue = MultifileQueue->new($fhs);
10839 return bless {
10840 'unget' => \@unget,
10841 'arg_sub_queue' => $arg_sub_queue,
10842 }, ref($class) || $class;
10845 sub get {
10846 # Returns:
10847 # reference to array of Arg-objects
10848 my $self = shift;
10849 if(@{$self->{'unget'}}) {
10850 return shift @{$self->{'unget'}};
10852 my $unget_ref = $self->{'unget'};
10853 if($self->{'arg_sub_queue'}->empty()) {
10854 return undef;
10856 my $in_record = $self->{'arg_sub_queue'}->get();
10857 if(defined $in_record) {
10858 my @out_record = ();
10859 for my $arg (@$in_record) {
10860 ::debug("run", "RecordColQueue::arg $arg\n");
10861 my $line = $arg->orig();
10862 ::debug("run", "line='$line'\n");
10863 if($line ne "") {
10864 if($opt::csv) {
10865 # Parse CSV
10866 chomp $line;
10867 if(not $Global::csv->parse($line)) {
10868 die "CSV has unexpected format: ^$line^";
10870 for($Global::csv->fields()) {
10871 push @out_record, Arg->new($_);
10873 } else {
10874 for my $s (split /$opt::colsep/o, $line, -1) {
10875 push @out_record, Arg->new($s);
10878 } else {
10879 push @out_record, Arg->new("");
10882 return \@out_record;
10883 } else {
10884 return undef;
10888 sub unget {
10889 my $self = shift;
10890 ::debug("run", "RecordColQueue-unget '@_'\n");
10891 unshift @{$self->{'unget'}}, @_;
10894 sub empty {
10895 my $self = shift;
10896 my $empty = (not @{$self->{'unget'}}) &&
10897 $self->{'arg_sub_queue'}->empty();
10898 ::debug("run", "RecordColQueue->empty $empty");
10899 return $empty;
10903 package SQLRecordQueue;
10905 sub new {
10906 my $class = shift;
10907 my @unget = ();
10908 return bless {
10909 'unget' => \@unget,
10910 }, ref($class) || $class;
10913 sub get {
10914 # Returns:
10915 # reference to array of Arg-objects
10916 my $self = shift;
10917 if(@{$self->{'unget'}}) {
10918 return shift @{$self->{'unget'}};
10920 return $Global::sql->get_record();
10923 sub unget {
10924 my $self = shift;
10925 ::debug("run", "SQLRecordQueue-unget '@_'\n");
10926 unshift @{$self->{'unget'}}, @_;
10929 sub empty {
10930 my $self = shift;
10931 if(@{$self->{'unget'}}) { return 0; }
10932 my $get = $self->get();
10933 if(defined $get) {
10934 $self->unget($get);
10936 my $empty = not $get;
10937 ::debug("run", "SQLRecordQueue->empty $empty");
10938 return $empty;
10942 package MultifileQueue;
10944 @Global::unget_argv=();
10946 sub new {
10947 my $class = shift;
10948 my $fhs = shift;
10949 for my $fh (@$fhs) {
10950 if(-t $fh and -t ($Global::status_fd || *STDERR)) {
10951 ::warning("Input is read from the terminal. You either know what you",
10952 "are doing (in which case: YOU ARE AWESOME!) or you forgot",
10953 "::: or :::: or to pipe data into parallel. If so",
10954 "consider going through the tutorial: man parallel_tutorial",
10955 "Press CTRL-D to exit.");
10958 return bless {
10959 'unget' => \@Global::unget_argv,
10960 'fhs' => $fhs,
10961 'arg_matrix' => undef,
10962 }, ref($class) || $class;
10965 sub get {
10966 my $self = shift;
10967 if($opt::link) {
10968 return $self->link_get();
10969 } else {
10970 return $self->nest_get();
10974 sub unget {
10975 my $self = shift;
10976 ::debug("run", "MultifileQueue-unget '@_'\n");
10977 unshift @{$self->{'unget'}}, @_;
10980 sub empty {
10981 my $self = shift;
10982 my $empty = (not @Global::unget_argv) &&
10983 not @{$self->{'unget'}};
10984 for my $fh (@{$self->{'fhs'}}) {
10985 $empty &&= eof($fh);
10987 ::debug("run", "MultifileQueue->empty $empty ");
10988 return $empty;
10991 sub link_get {
10992 my $self = shift;
10993 if(@{$self->{'unget'}}) {
10994 return shift @{$self->{'unget'}};
10996 my @record = ();
10997 my $prepend;
10998 my $empty = 1;
10999 for my $fh (@{$self->{'fhs'}}) {
11000 my $arg = read_arg_from_fh($fh);
11001 if(defined $arg) {
11002 # Record $arg for recycling at end of file
11003 push @{$self->{'arg_matrix'}{$fh}}, $arg;
11004 push @record, $arg;
11005 $empty = 0;
11006 } else {
11007 ::debug("run", "EOA ");
11008 # End of file: Recycle arguments
11009 push @{$self->{'arg_matrix'}{$fh}}, shift @{$self->{'arg_matrix'}{$fh}};
11010 # return last @{$args->{'args'}{$fh}};
11011 push @record, @{$self->{'arg_matrix'}{$fh}}[-1];
11014 if($empty) {
11015 return undef;
11016 } else {
11017 return \@record;
11021 sub nest_get {
11022 my $self = shift;
11023 if(@{$self->{'unget'}}) {
11024 return shift @{$self->{'unget'}};
11026 my @record = ();
11027 my $prepend;
11028 my $empty = 1;
11029 my $no_of_inputsources = $#{$self->{'fhs'}} + 1;
11030 if(not $self->{'arg_matrix'}) {
11031 # Initialize @arg_matrix with one arg from each file
11032 # read one line from each file
11033 my @first_arg_set;
11034 my $all_empty = 1;
11035 for (my $fhno = 0; $fhno < $no_of_inputsources ; $fhno++) {
11036 my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]);
11037 if(defined $arg) {
11038 $all_empty = 0;
11040 $self->{'arg_matrix'}[$fhno][0] = $arg || Arg->new("");
11041 push @first_arg_set, $self->{'arg_matrix'}[$fhno][0];
11043 if($all_empty) {
11044 # All filehandles were at eof or eof-string
11045 return undef;
11047 return [@first_arg_set];
11050 # Treat the case with one input source special. For multiple
11051 # input sources we need to remember all previously read values to
11052 # generate all combinations. But for one input source we can
11053 # forget the value after first use.
11054 if($no_of_inputsources == 1) {
11055 my $arg = read_arg_from_fh($self->{'fhs'}[0]);
11056 if(defined($arg)) {
11057 return [$arg];
11059 return undef;
11061 for (my $fhno = $no_of_inputsources - 1; $fhno >= 0; $fhno--) {
11062 if(eof($self->{'fhs'}[$fhno])) {
11063 next;
11064 } else {
11065 # read one
11066 my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]);
11067 defined($arg) || next; # If we just read an EOF string: Treat this as EOF
11068 my $len = $#{$self->{'arg_matrix'}[$fhno]} + 1;
11069 $self->{'arg_matrix'}[$fhno][$len] = $arg;
11070 # make all new combinations
11071 my @combarg = ();
11072 for (my $fhn = 0; $fhn < $no_of_inputsources; $fhn++) {
11073 push(@combarg, [0, $#{$self->{'arg_matrix'}[$fhn]}],
11074 # Is input source --link'ed to the next?
11075 $opt::linkinputsource[$fhn+1]);
11077 # Find only combinations with this new entry
11078 $combarg[2*$fhno] = [$len,$len];
11079 # map combinations
11080 # [ 1, 3, 7 ], [ 2, 4, 1 ]
11081 # =>
11082 # [ m[0][1], m[1][3], m[2][7] ], [ m[0][2], m[1][4], m[2][1] ]
11083 my @mapped;
11084 for my $c (expand_combinations(@combarg)) {
11085 my @a;
11086 for my $n (0 .. $no_of_inputsources - 1 ) {
11087 push @a, $self->{'arg_matrix'}[$n][$$c[$n]];
11089 push @mapped, \@a;
11091 # append the mapped to the ungotten arguments
11092 push @{$self->{'unget'}}, @mapped;
11093 # get the first
11094 if(@mapped) {
11095 return shift @{$self->{'unget'}};
11099 # all are eof or at EOF string; return from the unget queue
11100 return shift @{$self->{'unget'}};
11103 sub read_arg_from_fh {
11104 # Read one Arg from filehandle
11105 # Returns:
11106 # Arg-object with one read line
11107 # undef if end of file
11108 my $fh = shift;
11109 my $prepend;
11110 my $arg;
11111 my $half_record = 0;
11112 do {{
11113 # This makes 10% faster
11114 if(not defined ($arg = <$fh>)) {
11115 if(defined $prepend) {
11116 return Arg->new($prepend);
11117 } else {
11118 return undef;
11121 if($opt::csv) {
11122 # We need to read a full CSV line.
11123 if(($arg =~ y/"/"/) % 2 ) {
11124 # The number of " on the line is uneven:
11125 # If we were in a half_record => we have a full record now
11126 # If we were ouside a half_record => we are in a half record now
11127 $half_record = not $half_record;
11129 if($half_record) {
11130 # CSV half-record with quoting:
11131 # col1,"col2 2""x3"" board newline <-this one
11132 # cont",col3
11133 $prepend .= $arg;
11134 redo;
11135 } else {
11136 # Now we have a full CSV record
11139 # Remove delimiter
11140 chomp $arg;
11141 if($Global::end_of_file_string and
11142 $arg eq $Global::end_of_file_string) {
11143 # Ignore the rest of input file
11144 close $fh;
11145 ::debug("run", "EOF-string ($arg) met\n");
11146 if(defined $prepend) {
11147 return Arg->new($prepend);
11148 } else {
11149 return undef;
11152 if(defined $prepend) {
11153 $arg = $prepend.$arg; # For line continuation
11154 undef $prepend;
11156 if($Global::ignore_empty) {
11157 if($arg =~ /^\s*$/) {
11158 redo; # Try the next line
11161 if($Global::max_lines) {
11162 if($arg =~ /\s$/) {
11163 # Trailing space => continued on next line
11164 $prepend = $arg;
11165 redo;
11168 }} while (1 == 0); # Dummy loop {{}} for redo
11169 if(defined $arg) {
11170 return Arg->new($arg);
11171 } else {
11172 ::die_bug("multiread arg undefined");
11176 sub expand_combinations {
11177 # Input:
11178 # ([xmin,xmax], [ymin,ymax], ...)
11179 # Returns: ([x,y,...],[x,y,...])
11180 # where xmin <= x <= xmax and ymin <= y <= ymax
11181 my $minmax_ref = shift;
11182 my $link = shift; # This is linked to the next input source
11183 my $xmin = $$minmax_ref[0];
11184 my $xmax = $$minmax_ref[1];
11185 my @p;
11186 if(@_) {
11187 my @rest = expand_combinations(@_);
11188 if($link) {
11189 # Linked to next col with --link/:::+/::::+
11190 # TODO BUG does not wrap values if not same number of vals
11191 push(@p, map { [$$_[0], @$_] }
11192 grep { $xmin <= $$_[0] and $$_[0] <= $xmax } @rest);
11193 } else {
11194 # If there are more columns: Compute those recursively
11195 for(my $x = $xmin; $x <= $xmax; $x++) {
11196 push @p, map { [$x, @$_] } @rest;
11199 } else {
11200 for(my $x = $xmin; $x <= $xmax; $x++) {
11201 push @p, [$x];
11204 return @p;
11208 package Arg;
11210 sub new {
11211 my $class = shift;
11212 my $orig = shift;
11213 my @hostgroups;
11214 if($opt::hostgroups) {
11215 if($orig =~ s:@(.+)::) {
11216 # We found hostgroups on the arg
11217 @hostgroups = split(/\+/, $1);
11218 if(not grep { defined $Global::hostgroups{$_} } @hostgroups) {
11219 # This hostgroup is not defined using -S
11220 # Add it
11221 ::warning("Adding hostgroups: @hostgroups");
11222 # Add sshlogin
11223 for(grep { not defined $Global::hostgroups{$_} } @hostgroups) {
11224 my $sshlogin = SSHLogin->new($_);
11225 my $sshlogin_string = $sshlogin->string();
11226 $Global::host{$sshlogin_string} = $sshlogin;
11227 $Global::hostgroups{$sshlogin_string} = 1;
11230 } else {
11231 # No hostgroup on the arg => any hostgroup
11232 @hostgroups = (keys %Global::hostgroups);
11235 return bless {
11236 'orig' => $orig,
11237 'hostgroups' => \@hostgroups,
11238 }, ref($class) || $class;
11241 sub Q {
11242 # Q alias for ::shell_quote_scalar
11243 no warnings 'redefine';
11244 *Q = \&::shell_quote_scalar;
11245 return Q(@_);
11248 sub pQ {
11249 # pQ alias for ::perl_quote_scalar
11250 *pQ = \&::perl_quote_scalar;
11251 return pQ(@_);
11254 sub total_jobs {
11255 return $Global::JobQueue->total_jobs();
11259 my %perleval;
11260 my $job;
11261 sub skip {
11262 # shorthand for $job->skip();
11263 $job->skip();
11265 sub slot {
11266 # shorthand for $job->slot();
11267 $job->slot();
11269 sub seq {
11270 # shorthand for $job->seq();
11271 $job->seq();
11274 sub replace {
11275 # Calculates the corresponding value for a given perl expression
11276 # Returns:
11277 # The calculated string (quoted if asked for)
11278 my $self = shift;
11279 my $perlexpr = shift; # E.g. $_=$_ or s/.gz//
11280 my $quote = (shift) ? 1 : 0; # should the string be quoted?
11281 # This is actually a CommandLine-object,
11282 # but it looks nice to be able to say {= $job->slot() =}
11283 $job = shift;
11284 $perlexpr =~ s/^(-?\d+)? *//; # Positional replace treated as normal replace
11285 if(not $self->{'cache'}{$perlexpr}) {
11286 # Only compute the value once
11287 # Use $_ as the variable to change
11288 local $_;
11289 if($Global::trim eq "n") {
11290 $_ = $self->{'orig'};
11291 } else {
11292 # Trim the input
11293 $_ = trim_of($self->{'orig'});
11295 ::debug("replace", "eval ", $perlexpr, " ", $_, "\n");
11296 if(not $perleval{$perlexpr}) {
11297 # Make an anonymous function of the $perlexpr
11298 # And more importantly: Compile it only once
11299 if($perleval{$perlexpr} =
11300 eval('sub { no strict; no warnings; my $job = shift; '.
11301 $perlexpr.' }')) {
11302 # All is good
11303 } else {
11304 # The eval failed. Maybe $perlexpr is invalid perl?
11305 ::error("Cannot use $perlexpr: $@");
11306 ::wait_and_exit(255);
11309 # Execute the function
11310 $perleval{$perlexpr}->($job);
11311 $self->{'cache'}{$perlexpr} = $_;
11313 # Return the value quoted if needed
11314 return($quote ? Q($self->{'cache'}{$perlexpr})
11315 : $self->{'cache'}{$perlexpr});
11319 sub flush_cache {
11320 # Flush cache of computed values
11321 my $self = shift;
11322 $self->{'cache'} = undef;
11325 sub orig {
11326 my $self = shift;
11327 return $self->{'orig'};
11330 sub set_orig {
11331 my $self = shift;
11332 $self->{'orig'} = shift;
11335 sub trim_of {
11336 # Removes white space as specifed by --trim:
11337 # n = nothing
11338 # l = start
11339 # r = end
11340 # lr|rl = both
11341 # Returns:
11342 # string with white space removed as needed
11343 my @strings = map { defined $_ ? $_ : "" } (@_);
11344 my $arg;
11345 if($Global::trim eq "n") {
11346 # skip
11347 } elsif($Global::trim eq "l") {
11348 for my $arg (@strings) { $arg =~ s/^\s+//; }
11349 } elsif($Global::trim eq "r") {
11350 for my $arg (@strings) { $arg =~ s/\s+$//; }
11351 } elsif($Global::trim eq "rl" or $Global::trim eq "lr") {
11352 for my $arg (@strings) { $arg =~ s/^\s+//; $arg =~ s/\s+$//; }
11353 } else {
11354 ::error("--trim must be one of: r l rl lr.");
11355 ::wait_and_exit(255);
11357 return wantarray ? @strings : "@strings";
11361 package TimeoutQueue;
11363 sub new {
11364 my $class = shift;
11365 my $delta_time = shift;
11366 my ($pct);
11367 if($delta_time =~ /(\d+(\.\d+)?)%/) {
11368 # Timeout in percent
11369 $pct = $1/100;
11370 $delta_time = 1_000_000;
11372 $delta_time = ::multiply_time_units($delta_time);
11374 return bless {
11375 'queue' => [],
11376 'delta_time' => $delta_time,
11377 'pct' => $pct,
11378 'remedian_idx' => 0,
11379 'remedian_arr' => [],
11380 'remedian' => undef,
11381 }, ref($class) || $class;
11384 sub delta_time {
11385 my $self = shift;
11386 return $self->{'delta_time'};
11389 sub set_delta_time {
11390 my $self = shift;
11391 $self->{'delta_time'} = shift;
11394 sub remedian {
11395 my $self = shift;
11396 return $self->{'remedian'};
11399 sub set_remedian {
11400 # Set median of the last 999^3 (=997002999) values using Remedian
11402 # Rousseeuw, Peter J., and Gilbert W. Bassett Jr. "The remedian: A
11403 # robust averaging method for large data sets." Journal of the
11404 # American Statistical Association 85.409 (1990): 97-104.
11405 my $self = shift;
11406 my $val = shift;
11407 my $i = $self->{'remedian_idx'}++;
11408 my $rref = $self->{'remedian_arr'};
11409 $rref->[0][$i%999] = $val;
11410 $rref->[1][$i/999%999] = (sort @{$rref->[0]})[$#{$rref->[0]}/2];
11411 $rref->[2][$i/999/999%999] = (sort @{$rref->[1]})[$#{$rref->[1]}/2];
11412 $self->{'remedian'} = (sort @{$rref->[2]})[$#{$rref->[2]}/2];
11415 sub update_median_runtime {
11416 # Update delta_time based on runtime of finished job if timeout is
11417 # a percentage
11418 my $self = shift;
11419 my $runtime = shift;
11420 if($self->{'pct'}) {
11421 $self->set_remedian($runtime);
11422 $self->{'delta_time'} = $self->{'pct'} * $self->remedian();
11423 ::debug("run", "Timeout: $self->{'delta_time'}s ");
11427 sub process_timeouts {
11428 # Check if there was a timeout
11429 my $self = shift;
11430 # $self->{'queue'} is sorted by start time
11431 while (@{$self->{'queue'}}) {
11432 my $job = $self->{'queue'}[0];
11433 if($job->endtime()) {
11434 # Job already finished. No need to timeout the job
11435 # This could be because of --keep-order
11436 shift @{$self->{'queue'}};
11437 } elsif($job->is_timedout($self->{'delta_time'})) {
11438 # Need to shift off queue before kill
11439 # because kill calls usleep that calls process_timeouts
11440 shift @{$self->{'queue'}};
11441 ::warning("This job was killed because it timed out:",
11442 $job->replaced());
11443 $job->kill();
11444 } else {
11445 # Because they are sorted by start time the rest are later
11446 last;
11451 sub insert {
11452 my $self = shift;
11453 my $in = shift;
11454 push @{$self->{'queue'}}, $in;
11458 package SQL;
11460 sub new {
11461 my $class = shift;
11462 my $dburl = shift;
11463 $Global::use{"DBI"} ||= eval "use DBI; 1;";
11464 # +DBURL = append to this DBURL
11465 my $append = $dburl=~s/^\+//;
11466 my %options = parse_dburl(get_alias($dburl));
11467 my %driveralias = ("sqlite" => "SQLite",
11468 "sqlite3" => "SQLite",
11469 "pg" => "Pg",
11470 "postgres" => "Pg",
11471 "postgresql" => "Pg",
11472 "csv" => "CSV",
11473 "oracle" => "Oracle",
11474 "ora" => "Oracle");
11475 my $driver = $driveralias{$options{'databasedriver'}} ||
11476 $options{'databasedriver'};
11477 my $database = $options{'database'};
11478 my $host = $options{'host'} ? ";host=".$options{'host'} : "";
11479 my $port = $options{'port'} ? ";port=".$options{'port'} : "";
11480 my $dsn = "DBI:$driver:dbname=$database$host$port";
11481 my $userid = $options{'user'};
11482 my $password = $options{'password'};;
11483 my $dbh = DBI->connect($dsn, $userid, $password,
11484 { RaiseError => 1, AutoInactiveDestroy => 1 })
11485 or die $DBI::errstr;
11486 $dbh->{'PrintWarn'} = $Global::debug || 0;
11487 $dbh->{'PrintError'} = $Global::debug || 0;
11488 $dbh->{'RaiseError'} = 1;
11489 $dbh->{'ShowErrorStatement'} = 1;
11490 $dbh->{'HandleError'} = sub {};
11492 if(not defined $options{'table'}) {
11493 ::error("The DBURL ($dburl) must contain a table.");
11494 ::wait_and_exit(255);
11497 return bless {
11498 'dbh' => $dbh,
11499 'driver' => $driver,
11500 'max_number_of_args' => undef,
11501 'table' => $options{'table'},
11502 'append' => $append,
11503 }, ref($class) || $class;
11506 sub get_alias {
11507 my $alias = shift;
11508 $alias =~ s/^(sql:)*//; # Accept aliases prepended with sql:
11509 if ($alias !~ /^:/) {
11510 return $alias;
11513 # Find the alias
11514 my $path;
11515 if (-l $0) {
11516 ($path) = readlink($0) =~ m|^(.*)/|;
11517 } else {
11518 ($path) = $0 =~ m|^(.*)/|;
11521 my @deprecated = ("$ENV{HOME}/.dburl.aliases",
11522 "$path/dburl.aliases", "$path/dburl.aliases.dist");
11523 for (@deprecated) {
11524 if(-r $_) {
11525 ::warning("$_ is deprecated. ".
11526 "Use .sql/aliases instead (read man sql).");
11529 my @urlalias=();
11530 check_permissions("$ENV{HOME}/.sql/aliases");
11531 check_permissions("$ENV{HOME}/.dburl.aliases");
11532 my @search = ("$ENV{HOME}/.sql/aliases",
11533 "$ENV{HOME}/.dburl.aliases", "/etc/sql/aliases",
11534 "$path/dburl.aliases", "$path/dburl.aliases.dist");
11535 for my $alias_file (@search) {
11536 # local $/ needed if -0 set
11537 local $/ = "\n";
11538 if(-r $alias_file) {
11539 open(my $in, "<", $alias_file) || die;
11540 push @urlalias, <$in>;
11541 close $in;
11544 my ($alias_part,$rest) = $alias=~/(:\w*)(.*)/;
11545 # If we saw this before: we have an alias loop
11546 if(grep {$_ eq $alias_part } @Private::seen_aliases) {
11547 ::error("$alias_part is a cyclic alias.");
11548 exit -1;
11549 } else {
11550 push @Private::seen_aliases, $alias_part;
11553 my $dburl;
11554 for (@urlalias) {
11555 /^$alias_part\s+(\S+.*)/ and do { $dburl = $1; last; }
11558 if($dburl) {
11559 return get_alias($dburl.$rest);
11560 } else {
11561 ::error("$alias is not defined in @search");
11562 exit(-1);
11566 sub check_permissions {
11567 my $file = shift;
11569 if(-e $file) {
11570 if(not -o $file) {
11571 my $username = (getpwuid($<))[0];
11572 ::warning("$file should be owned by $username: ".
11573 "chown $username $file");
11575 my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
11576 $atime,$mtime,$ctime,$blksize,$blocks) = stat($file);
11577 if($mode & 077) {
11578 my $username = (getpwuid($<))[0];
11579 ::warning("$file should be only be readable by $username: ".
11580 "chmod 600 $file");
11585 sub parse_dburl {
11586 my $url = shift;
11587 my %options = ();
11588 # sql:mysql://[[user][:password]@][host][:port]/[database[/table][?query]]
11590 if($url=~m!^(?:sql:)? # You can prefix with 'sql:'
11591 ((?:oracle|ora|mysql|pg|postgres|postgresql)(?:s|ssl|)|
11592 (?:sqlite|sqlite2|sqlite3|csv)):// # Databasedriver ($1)
11594 ([^:@/][^:@]*|) # Username ($2)
11596 :([^@]*) # Password ($3)
11599 ([^:/]*)? # Hostname ($4)
11602 ([^/]*)? # Port ($5)
11606 ([^/?]*)? # Database ($6)
11610 ([^?]*)? # Table ($7)
11614 (.*)? # Query ($8)
11616 $!ix) {
11617 $options{databasedriver} = ::undef_if_empty(lc(uri_unescape($1)));
11618 $options{user} = ::undef_if_empty(uri_unescape($2));
11619 $options{password} = ::undef_if_empty(uri_unescape($3));
11620 $options{host} = ::undef_if_empty(uri_unescape($4));
11621 $options{port} = ::undef_if_empty(uri_unescape($5));
11622 $options{database} = ::undef_if_empty(uri_unescape($6));
11623 $options{table} = ::undef_if_empty(uri_unescape($7));
11624 $options{query} = ::undef_if_empty(uri_unescape($8));
11625 ::debug("sql", "dburl $url\n");
11626 ::debug("sql", "databasedriver ", $options{databasedriver},
11627 " user ", $options{user},
11628 " password ", $options{password}, " host ", $options{host},
11629 " port ", $options{port}, " database ", $options{database},
11630 " table ", $options{table}, " query ", $options{query}, "\n");
11631 } else {
11632 ::error("$url is not a valid DBURL");
11633 exit 255;
11635 return %options;
11638 sub uri_unescape {
11639 # Copied from http://cpansearch.perl.org/src/GAAS/URI-1.55/URI/Escape.pm
11640 # to avoid depending on URI::Escape
11641 # This section is (C) Gisle Aas.
11642 # Note from RFC1630: "Sequences which start with a percent sign
11643 # but are not followed by two hexadecimal characters are reserved
11644 # for future extension"
11645 my $str = shift;
11646 if (@_ && wantarray) {
11647 # not executed for the common case of a single argument
11648 my @str = ($str, @_); # need to copy
11649 foreach (@str) {
11650 s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
11652 return @str;
11654 $str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str;
11655 $str;
11658 sub run {
11659 my $self = shift;
11660 my $stmt = shift;
11661 if($self->{'driver'} eq "CSV") {
11662 $stmt=~ s/;$//;
11663 if($stmt eq "BEGIN" or
11664 $stmt eq "COMMIT") {
11665 return undef;
11668 my @retval;
11669 my $dbh = $self->{'dbh'};
11670 ::debug("sql","$opt::sqlmaster$opt::sqlworker run $stmt\n");
11671 # Execute with the rest of the args - if any
11672 my $rv;
11673 my $sth;
11674 my $lockretry = 0;
11675 while($lockretry < 10) {
11676 $sth = $dbh->prepare($stmt);
11677 if($sth
11679 eval { $rv = $sth->execute(@_) }) {
11680 last;
11681 } else {
11682 if($@ =~ /no such table|Table .* doesn.t exist|relation ".*" does not exist/
11684 $DBI::errstr =~ /no such table|Table .* doesn.t exist|relation ".*" does not exist/) {
11685 # This is fine:
11686 # It is just a worker that reported back too late -
11687 # another worker had finished the job first
11688 # and the table was then dropped
11689 $rv = $sth = 0;
11690 last;
11692 if($DBI::errstr =~ /locked/) {
11693 ::debug("sql", "Lock retry: $lockretry");
11694 $lockretry++;
11695 ::usleep(rand()*300);
11696 } elsif(not $sth) {
11697 # Try again
11698 $lockretry++;
11699 } else {
11700 ::error($DBI::errstr);
11701 ::wait_and_exit(255);
11705 if($lockretry >= 10) {
11706 ::die_bug("retry > 10: $DBI::errstr");
11708 if($rv < 0 and $DBI::errstr){
11709 ::error($DBI::errstr);
11710 ::wait_and_exit(255);
11712 return $sth;
11715 sub get {
11716 my $self = shift;
11717 my $sth = $self->run(@_);
11718 my @retval;
11719 # If $sth = 0 it means the table was dropped by another process
11720 while($sth) {
11721 my @row = $sth->fetchrow_array();
11722 @row or last;
11723 push @retval, \@row;
11725 return \@retval;
11728 sub table {
11729 my $self = shift;
11730 return $self->{'table'};
11733 sub append {
11734 my $self = shift;
11735 return $self->{'append'};
11738 sub update {
11739 my $self = shift;
11740 my $stmt = shift;
11741 my $table = $self->table();
11742 $self->run("UPDATE $table $stmt",@_);
11745 sub output {
11746 my $self = shift;
11747 my $commandline = shift;
11749 $self->update("SET Stdout = ?, Stderr = ? WHERE Seq = ".
11750 $commandline->seq(),
11751 join("",@{$commandline->{'output'}{1}}),
11752 join("",@{$commandline->{'output'}{2}}));
11755 sub max_number_of_args {
11756 # Maximal number of args for this table
11757 my $self = shift;
11758 if(not $self->{'max_number_of_args'}) {
11759 # Read the number of args from the SQL table
11760 my $table = $self->table();
11761 my $v = $self->get("SELECT * FROM $table LIMIT 1;");
11762 my @reserved_columns = qw(Seq Host Starttime JobRuntime Send
11763 Receive Exitval _Signal Command Stdout Stderr);
11764 if(not $v) {
11765 ::error("$table contains no records");
11767 # Count the number of Vx columns
11768 $self->{'max_number_of_args'} = $#{$v->[0]} - $#reserved_columns;
11770 return $self->{'max_number_of_args'};
11773 sub set_max_number_of_args {
11774 my $self = shift;
11775 $self->{'max_number_of_args'} = shift;
11778 sub create_table {
11779 my $self = shift;
11780 if($self->append()) { return; }
11781 my $max_number_of_args = shift;
11782 $self->set_max_number_of_args($max_number_of_args);
11783 my $table = $self->table();
11784 $self->run(qq(DROP TABLE IF EXISTS $table;));
11785 # BIGINT and TEXT are not supported in these databases or are too small
11786 my %vartype = (
11787 "Oracle" => { "BIGINT" => "NUMBER(19,0)",
11788 "TEXT" => "CLOB", },
11789 "mysql" => { "TEXT" => "LONGTEXT", },
11790 "CSV" => { "BIGINT" => "INT",
11791 "FLOAT" => "REAL", },
11793 my $BIGINT = $vartype{$self->{'driver'}}{"BIGINT"} || "BIGINT";
11794 my $TEXT = $vartype{$self->{'driver'}}{"TEXT"} || "TEXT";
11795 my $FLOAT = $vartype{$self->{'driver'}}{"FLOAT"} || "FLOAT(44)";
11796 my $v_def = join "", map { "V$_ $TEXT," } (1..$self->max_number_of_args());
11797 $self->run(qq{CREATE TABLE $table
11798 (Seq $BIGINT,
11799 Host $TEXT,
11800 Starttime $FLOAT,
11801 JobRuntime $FLOAT,
11802 Send $BIGINT,
11803 Receive $BIGINT,
11804 Exitval $BIGINT,
11805 _Signal $BIGINT,
11806 Command $TEXT,}.
11807 $v_def.
11808 qq{Stdout $TEXT,
11809 Stderr $TEXT);});
11812 sub insert_records {
11813 my $self = shift;
11814 my $seq = shift;
11815 my $command_ref = shift;
11816 my $record_ref = shift;
11817 my $table = $self->table();
11818 # For SQL encode the command with \257 space as split points
11819 my $command = join("\257 ",@$command_ref);
11820 my @v_cols = map { ", V$_" } (1..$self->max_number_of_args());
11821 # Two extra value due to $seq, Exitval, Send
11822 my $v_vals = join ",", map { "?" } (1..$self->max_number_of_args()+4);
11823 $self->run("INSERT INTO $table (Seq,Command,Exitval,Send @v_cols) ".
11824 "VALUES ($v_vals);", $seq, $command, -1000,
11825 0, @$record_ref[1..$#$record_ref]);
11828 sub get_record {
11829 my $self = shift;
11830 my @retval;
11831 my $table = $self->table();
11832 my @v_cols = map { ", V$_" } (1..$self->max_number_of_args());
11833 my $v = $self->get("SELECT Seq, Command @v_cols FROM $table ".
11834 "WHERE Exitval = -1000 ORDER BY Seq LIMIT 1;");
11835 if($v->[0]) {
11836 my $val_ref = $v->[0];
11837 # Mark record as taken
11838 my $seq = shift @$val_ref;
11839 # Save the sequence number to use when running the job
11840 $SQL::next_seq = $seq;
11841 $self->update("SET Exitval = ? WHERE Seq = ".$seq, -1220);
11842 my @command = split /\257 /, shift @$val_ref;
11843 $SQL::command_ref = \@command;
11844 for (@$val_ref) {
11845 push @retval, Arg->new($_);
11848 if(@retval) {
11849 return \@retval;
11850 } else {
11851 return undef;
11855 sub total_jobs {
11856 my $self = shift;
11857 my $table = $self->table();
11858 my $v = $self->get("SELECT count(*) FROM $table;");
11859 if($v->[0]) {
11860 return $v->[0]->[0];
11861 } else {
11862 ::die_bug("SQL::total_jobs");
11866 sub max_seq {
11867 my $self = shift;
11868 my $table = $self->table();
11869 my $v = $self->get("SELECT max(Seq) FROM $table;");
11870 if($v->[0]) {
11871 return $v->[0]->[0];
11872 } else {
11873 ::die_bug("SQL::max_seq");
11877 sub finished {
11878 # Check if there are any jobs left in the SQL table that do not
11879 # have a "real" exitval
11880 my $self = shift;
11881 if($opt::wait or $Global::start_sqlworker) {
11882 my $table = $self->table();
11883 my $rv = $self->get("select Seq,Exitval from $table ".
11884 "where Exitval <= -1000 limit 1");
11885 return not $rv->[0];
11886 } else {
11887 return 1;
11891 package Semaphore;
11893 # This package provides a counting semaphore
11895 # If a process dies without releasing the semaphore the next process
11896 # that needs that entry will clean up dead semaphores
11898 # The semaphores are stored in $PARALLEL_HOME/semaphores/id-<name> Each
11899 # file in $PARALLEL_HOME/semaphores/id-<name>/ is the process ID of the
11900 # process holding the entry. If the process dies, the entry can be
11901 # taken by another process.
11903 sub new {
11904 my $class = shift;
11905 my $id = shift;
11906 my $count = shift;
11907 $id =~ s/([^-_a-z0-9])/unpack("H*",$1)/ige; # Convert non-word chars to hex
11908 $id = "id-".$id; # To distinguish it from a process id
11909 my $parallel_locks = $Global::cache_dir . "/semaphores";
11910 -d $parallel_locks or ::mkdir_or_die($parallel_locks);
11911 my $lockdir = "$parallel_locks/$id";
11913 my $lockfile = $lockdir.".lock";
11914 if($count < 1) { ::die_bug("semaphore-count: $count"); }
11915 return bless {
11916 'lockfile' => $lockfile,
11917 'lockfh' => Symbol::gensym(),
11918 'lockdir' => $lockdir,
11919 'id' => $id,
11920 'idfile' => $lockdir."/".$id,
11921 'pid' => $$,
11922 'pidfile' => $lockdir."/".$$.'@'.::hostname(),
11923 'count' => $count + 1 # nlinks returns a link for the 'id-' as well
11924 }, ref($class) || $class;
11927 sub remove_dead_locks {
11928 my $self = shift;
11929 my $lockdir = $self->{'lockdir'};
11931 for my $d (glob "$lockdir/*") {
11932 $d =~ m:$lockdir/([0-9]+)\@([-\._a-z0-9]+)$:o or next;
11933 my ($pid, $host) = ($1, $2);
11934 if($host eq ::hostname()) {
11935 if(kill 0, $pid) {
11936 ::debug("sem", "Alive: $pid $d\n");
11937 } else {
11938 ::debug("sem", "Dead: $d\n");
11939 ::rm($d);
11945 sub acquire {
11946 my $self = shift;
11947 my $sleep = 1; # 1 ms
11948 my $start_time = time;
11949 while(1) {
11950 # Can we get a lock?
11951 $self->atomic_link_if_count_less_than() and last;
11952 $self->remove_dead_locks();
11953 # Retry slower and slower up to 1 second
11954 $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep);
11955 # Random to avoid every sleeping job waking up at the same time
11956 ::usleep(rand()*$sleep);
11957 if($opt::semaphoretimeout) {
11958 if($opt::semaphoretimeout > 0
11960 time - $start_time > $opt::semaphoretimeout) {
11961 # Timeout: Take the semaphore anyway
11962 ::warning("Semaphore timed out. Stealing the semaphore.");
11963 if(not -e $self->{'idfile'}) {
11964 open (my $fh, ">", $self->{'idfile'}) or
11965 ::die_bug("timeout_write_idfile: $self->{'idfile'}");
11966 close $fh;
11968 link $self->{'idfile'}, $self->{'pidfile'};
11969 last;
11971 if($opt::semaphoretimeout < 0
11973 time - $start_time > -$opt::semaphoretimeout) {
11974 # Timeout: Exit
11975 ::warning("Semaphore timed out. Exiting.");
11976 exit(1);
11977 last;
11981 ::debug("sem", "acquired $self->{'pid'}\n");
11984 sub release {
11985 my $self = shift;
11986 ::rm($self->{'pidfile'});
11987 if($self->nlinks() == 1) {
11988 # This is the last link, so atomic cleanup
11989 $self->lock();
11990 if($self->nlinks() == 1) {
11991 ::rm($self->{'idfile'});
11992 rmdir $self->{'lockdir'};
11994 $self->unlock();
11996 ::debug("run", "released $self->{'pid'}\n");
11999 sub pid_change {
12000 # This should do what release()+acquire() would do without having
12001 # to re-acquire the semaphore
12002 my $self = shift;
12004 my $old_pidfile = $self->{'pidfile'};
12005 $self->{'pid'} = $$;
12006 $self->{'pidfile'} = $self->{'lockdir'}."/".$$.'@'.::hostname();
12007 my $retval = link $self->{'idfile'}, $self->{'pidfile'};
12008 ::debug("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n");
12009 ::rm($old_pidfile);
12012 sub atomic_link_if_count_less_than {
12013 # Link $file1 to $file2 if nlinks to $file1 < $count
12014 my $self = shift;
12015 my $retval = 0;
12016 $self->lock();
12017 my $nlinks = $self->nlinks();
12018 ::debug("sem","$nlinks<$self->{'count'} ");
12019 if($nlinks < $self->{'count'}) {
12020 -d $self->{'lockdir'} or ::mkdir_or_die($self->{'lockdir'});
12021 if(not -e $self->{'idfile'}) {
12022 open (my $fh, ">", $self->{'idfile'}) or
12023 ::die_bug("write_idfile: $self->{'idfile'}");
12024 close $fh;
12026 $retval = link $self->{'idfile'}, $self->{'pidfile'};
12027 ::debug("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n");
12029 $self->unlock();
12030 ::debug("sem", "atomic $retval");
12031 return $retval;
12034 sub nlinks {
12035 my $self = shift;
12036 if(-e $self->{'idfile'}) {
12037 return (stat(_))[3];
12038 } else {
12039 return 0;
12043 sub lock {
12044 my $self = shift;
12045 my $sleep = 100; # 100 ms
12046 my $total_sleep = 0;
12047 $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
12048 my $locked = 0;
12049 while(not $locked) {
12050 if(tell($self->{'lockfh'}) == -1) {
12051 # File not open
12052 open($self->{'lockfh'}, ">", $self->{'lockfile'})
12053 or ::debug("run", "Cannot open $self->{'lockfile'}");
12055 if($self->{'lockfh'}) {
12056 # File is open
12057 chmod 0666, $self->{'lockfile'}; # assuming you want it a+rw
12058 if(flock($self->{'lockfh'}, LOCK_EX()|LOCK_NB())) {
12059 # The file is locked: No need to retry
12060 $locked = 1;
12061 last;
12062 } else {
12063 if ($! =~ m/Function not implemented/) {
12064 ::warning("flock: $!",
12065 "Will wait for a random while.");
12066 ::usleep(rand(5000));
12067 # File cannot be locked: No need to retry
12068 $locked = 2;
12069 last;
12073 # Locking failed in first round
12074 # Sleep and try again
12075 $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep);
12076 # Random to avoid every sleeping job waking up at the same time
12077 ::usleep(rand()*$sleep);
12078 $total_sleep += $sleep;
12079 if($opt::semaphoretimeout) {
12080 if($opt::semaphoretimeout > 0
12082 $total_sleep/1000 > $opt::semaphoretimeout) {
12083 # Timeout: Take the semaphore anyway
12084 ::warning("Semaphore timed out. Taking the semaphore.");
12085 $locked = 3;
12086 last;
12088 if($opt::semaphoretimeout < 0
12090 $total_sleep/1000 > -$opt::semaphoretimeout) {
12091 # Timeout: Exit
12092 ::warning("Semaphore timed out. Exiting.");
12093 $locked = 4;
12094 last;
12096 } else {
12097 if($total_sleep/1000 > 30) {
12098 ::warning("Semaphore stuck for 30 seconds. ".
12099 "Consider using --semaphoretimeout.");
12103 ::debug("run", "locked $self->{'lockfile'}");
12106 sub unlock {
12107 my $self = shift;
12108 ::rm($self->{'lockfile'});
12109 close $self->{'lockfh'};
12110 ::debug("run", "unlocked\n");
12113 # Keep perl -w happy
12115 $opt::x = $Semaphore::timeout = $Semaphore::wait =
12116 $Job::file_descriptor_warning_printed = $Global::envdef = @Arg::arg =
12117 $Global::max_slot_number = $opt::session;