t/chainlint.pl

   1 #!/usr/bin/env perl
   2 #
   3 # Copyright (c) 2021-2022 Eric Sunshine <sunshine@sunshineco.com>
   4 #
   5 # This tool scans shell scripts for test definitions and checks those tests for
   6 # problems, such as broken &&-chains, which might hide bugs in the tests
   7 # themselves or in behaviors being exercised by the tests.
   8 #
   9 # Input arguments are pathnames of shell scripts containing test definitions,
  10 # or globs referencing a collection of scripts. For each problem discovered,
  11 # the pathname of the script containing the test is printed along with the test
  12 # name and the test body with a `?!FOO?!` annotation at the location of each
  13 # detected problem, where "FOO" is a tag such as "AMP" which indicates a broken
  14 # &&-chain. Returns zero if no problems are discovered, otherwise non-zero.
  15
  16 use warnings;
  17 use strict;
  18 use Config;
  19 use File::Glob;
  20 use Getopt::Long;
  21
  22 my $jobs = -1;
  23 my $show_stats;
  24 my $emit_all;
  25
  26 # Lexer tokenizes POSIX shell scripts. It is roughly modeled after section 2.3
  27 # "Token Recognition" of POSIX chapter 2 "Shell Command Language". Although
  28 # similar to lexical analyzers for other languages, this one differs in a few
  29 # substantial ways due to quirks of the shell command language.
  30 #
  31 # For instance, in many languages, newline is just whitespace like space or
  32 # TAB, but in shell a newline is a command separator, thus a distinct lexical
  33 # token. A newline is significant and returned as a distinct token even at the
  34 # end of a shell comment.
  35 #
  36 # In other languages, `1+2` would typically be scanned as three tokens
  37 # (`1`, `+`, and `2`), but in shell it is a single token. However, the similar
  38 # `1 + 2`, which embeds whitepace, is scanned as three token in shell, as well.
  39 # In shell, several characters with special meaning lose that meaning when not
  40 # surrounded by whitespace. For instance, the negation operator `!` is special
  41 # when standing alone surrounded by whitespace; whereas in `foo!uucp` it is
  42 # just a plain character in the longer token "foo!uucp". In many other
  43 # languages, `"string"/foo:'string'` might be scanned as five tokens ("string",
  44 # `/`, `foo`, `:`, and 'string'), but in shell, it is just a single token.
  45 #
  46 # The lexical analyzer for the shell command language is also somewhat unusual
  47 # in that it recursively invokes the parser to handle the body of `$(...)`
  48 # expressions which can contain arbitrary shell code. Such expressions may be
  49 # encountered both inside and outside of double-quoted strings.
  50 #
  51 # The lexical analyzer is responsible for consuming shell here-doc bodies which
  52 # extend from the line following a `<<TAG` operator until a line consisting
  53 # solely of `TAG`. Here-doc consumption begins when a newline is encountered.
  54 # It is legal for multiple here-doc `<<TAG` operators to be present on a single
  55 # line, in which case their bodies must be present one following the next, and
  56 # are consumed in the (left-to-right) order the `<<TAG` operators appear on the
  57 # line. A special complication is that the bodies of all here-docs must be
  58 # consumed when the newline is encountered even if the parse context depth has
  59 # changed. For instance, in `cat <<A && x=$(cat <<B &&\n`, bodies of here-docs
  60 # "A" and "B" must be consumed even though "A" was introduced outside the
  61 # recursive parse context in which "B" was introduced and in which the newline
  62 # is encountered.
  63 package Lexer;
  64
  65 sub new {
  66         my ($class, $parser, $s) = @_;
  67         bless {
  68                 parser => $parser,
  69                 buff => $s,
  70                 heretags => []
  71         } => $class;
  72 }
  73
  74 sub scan_heredoc_tag {
  75         my $self = shift @_;
  76         ${$self->{buff}} =~ /\G(-?)/gc;
  77         my $indented = $1;
  78         my $tag = $self->scan_token();
  79         $tag =~ s/['"\\]//g;
  80         push(@{$self->{heretags}}, $indented ? "\t$tag" : "$tag");
  81         return "<<$indented$tag";
  82 }
  83
  84 sub scan_op {
  85         my ($self, $c) = @_;
  86         my $b = $self->{buff};
  87         return $c unless $$b =~ /\G(.)/sgc;
  88         my $cc = $c . $1;
  89         return scan_heredoc_tag($self) if $cc eq '<<';
  90         return $cc if $cc =~ /^(?:&&|\|\||>>|;;|<&|>&|<>|>\|)$/;
  91         pos($$b)--;
  92         return $c;
  93 }
  94
  95 sub scan_sqstring {
  96         my $self = shift @_;
  97         ${$self->{buff}} =~ /\G([^']*'|.*\z)/sgc;
  98         return "'" . $1;
  99 }
 100
 101 sub scan_dqstring {
 102         my $self = shift @_;
 103         my $b = $self->{buff};
 104         my $s = '"';
 105         while (1) {
 106                 # slurp up non-special characters
 107                 $s .= $1 if $$b =~ /\G([^"\$\\]+)/gc;
 108                 # handle special characters
 109                 last unless $$b =~ /\G(.)/sgc;
 110                 my $c = $1;
 111                 $s .= '"', last if $c eq '"';
 112                 $s .= '$' . $self->scan_dollar(), next if $c eq '$';
 113                 if ($c eq '\\') {
 114                         $s .= '\\', last unless $$b =~ /\G(.)/sgc;
 115                         $c = $1;
 116                         next if $c eq "\n"; # line splice
 117                         # backslash escapes only $, `, ", \ in dq-string
 118                         $s .= '\\' unless $c =~ /^[\$`"\\]$/;
 119                         $s .= $c;
 120                         next;
 121                 }
 122                 die("internal error scanning dq-string '$c'\n");
 123         }
 124         return $s;
 125 }
 126
 127 sub scan_balanced {
 128         my ($self, $c1, $c2) = @_;
 129         my $b = $self->{buff};
 130         my $depth = 1;
 131         my $s = $c1;
 132         while ($$b =~ /\G([^\Q$c1$c2\E]*(?:[\Q$c1$c2\E]|\z))/gc) {
 133                 $s .= $1;
 134                 $depth++, next if $s =~ /\Q$c1\E$/;
 135                 $depth--;
 136                 last if $depth == 0;
 137         }
 138         return $s;
 139 }
 140
 141 sub scan_subst {
 142         my $self = shift @_;
 143         my @tokens = $self->{parser}->parse(qr/^\)$/);
 144         $self->{parser}->next_token(); # closing ")"
 145         return @tokens;
 146 }
 147
 148 sub scan_dollar {
 149         my $self = shift @_;
 150         my $b = $self->{buff};
 151         return $self->scan_balanced('(', ')') if $$b =~ /\G\((?=\()/gc; # $((...))
 152         return '(' . join(' ', $self->scan_subst()) . ')' if $$b =~ /\G\(/gc; # $(...)
 153         return $self->scan_balanced('{', '}') if $$b =~ /\G\{/gc; # ${...}
 154         return $1 if $$b =~ /\G(\w+)/gc; # $var
 155         return $1 if $$b =~ /\G([@*#?$!0-9-])/gc; # $*, $1, $$, etc.
 156         return '';
 157 }
 158
 159 sub swallow_heredocs {
 160         my $self = shift @_;
 161         my $b = $self->{buff};
 162         my $tags = $self->{heretags};
 163         while (my $tag = shift @$tags) {
 164                 my $indent = $tag =~ s/^\t// ? '\\s*' : '';
 165                 $$b =~ /(?:\G|\n)$indent\Q$tag\E(?:\n|\z)/gc;
 166         }
 167 }
 168
 169 sub scan_token {
 170         my $self = shift @_;
 171         my $b = $self->{buff};
 172         my $token = '';
 173 RESTART:
 174         $$b =~ /\G[ \t]+/gc; # skip whitespace (but not newline)
 175         return "\n" if $$b =~ /\G#[^\n]*(?:\n|\z)/gc; # comment
 176         while (1) {
 177                 # slurp up non-special characters
 178                 $token .= $1 if $$b =~ /\G([^\\;&|<>(){}'"\$\s]+)/gc;
 179                 # handle special characters
 180                 last unless $$b =~ /\G(.)/sgc;
 181                 my $c = $1;
 182                 last if $c =~ /^[ \t]$/; # whitespace ends token
 183                 pos($$b)--, last if length($token) && $c =~ /^[;&|<>(){}\n]$/;
 184                 $token .= $self->scan_sqstring(), next if $c eq "'";
 185                 $token .= $self->scan_dqstring(), next if $c eq '"';
 186                 $token .= $c . $self->scan_dollar(), next if $c eq '$';
 187                 $self->swallow_heredocs(), $token = $c, last if $c eq "\n";
 188                 $token = $self->scan_op($c), last if $c =~ /^[;&|<>]$/;
 189                 $token = $c, last if $c =~ /^[(){}]$/;
 190                 if ($c eq '\\') {
 191                         $token .= '\\', last unless $$b =~ /\G(.)/sgc;
 192                         $c = $1;
 193                         next if $c eq "\n" && length($token); # line splice
 194                         goto RESTART if $c eq "\n"; # line splice
 195                         $token .= '\\' . $c;
 196                         next;
 197                 }
 198                 die("internal error scanning character '$c'\n");
 199         }
 200         return length($token) ? $token : undef;
 201 }
 202
 203 # ShellParser parses POSIX shell scripts (with minor extensions for Bash). It
 204 # is a recursive descent parser very roughly modeled after section 2.10 "Shell
 205 # Grammar" of POSIX chapter 2 "Shell Command Language".
 206 package ShellParser;
 207
 208 sub new {
 209         my ($class, $s) = @_;
 210         my $self = bless {
 211                 buff => [],
 212                 stop => [],
 213                 output => []
 214         } => $class;
 215         $self->{lexer} = Lexer->new($self, $s);
 216         return $self;
 217 }
 218
 219 sub next_token {
 220         my $self = shift @_;
 221         return pop(@{$self->{buff}}) if @{$self->{buff}};
 222         return $self->{lexer}->scan_token();
 223 }
 224
 225 sub untoken {
 226         my $self = shift @_;
 227         push(@{$self->{buff}}, @_);
 228 }
 229
 230 sub peek {
 231         my $self = shift @_;
 232         my $token = $self->next_token();
 233         return undef unless defined($token);
 234         $self->untoken($token);
 235         return $token;
 236 }
 237
 238 sub stop_at {
 239         my ($self, $token) = @_;
 240         return 1 unless defined($token);
 241         my $stop = ${$self->{stop}}[-1] if @{$self->{stop}};
 242         return defined($stop) && $token =~ $stop;
 243 }
 244
 245 sub expect {
 246         my ($self, $expect) = @_;
 247         my $token = $self->next_token();
 248         return $token if defined($token) && $token eq $expect;
 249         push(@{$self->{output}}, "?!ERR?! expected '$expect' but found '" . (defined($token) ? $token : "<end-of-input>") . "'\n");
 250         $self->untoken($token) if defined($token);
 251         return ();
 252 }
 253
 254 sub optional_newlines {
 255         my $self = shift @_;
 256         my @tokens;
 257         while (my $token = $self->peek()) {
 258                 last unless $token eq "\n";
 259                 push(@tokens, $self->next_token());
 260         }
 261         return @tokens;
 262 }
 263
 264 sub parse_group {
 265         my $self = shift @_;
 266         return ($self->parse(qr/^}$/),
 267                 $self->expect('}'));
 268 }
 269
 270 sub parse_subshell {
 271         my $self = shift @_;
 272         return ($self->parse(qr/^\)$/),
 273                 $self->expect(')'));
 274 }
 275
 276 sub parse_case_pattern {
 277         my $self = shift @_;
 278         my @tokens;
 279         while (defined(my $token = $self->next_token())) {
 280                 push(@tokens, $token);
 281                 last if $token eq ')';
 282         }
 283         return @tokens;
 284 }
 285
 286 sub parse_case {
 287         my $self = shift @_;
 288         my @tokens;
 289         push(@tokens,
 290              $self->next_token(), # subject
 291              $self->optional_newlines(),
 292              $self->expect('in'),
 293              $self->optional_newlines());
 294         while (1) {
 295                 my $token = $self->peek();
 296                 last unless defined($token) && $token ne 'esac';
 297                 push(@tokens,
 298                      $self->parse_case_pattern(),
 299                      $self->optional_newlines(),
 300                      $self->parse(qr/^(?:;;|esac)$/)); # item body
 301                 $token = $self->peek();
 302                 last unless defined($token) && $token ne 'esac';
 303                 push(@tokens,
 304                      $self->expect(';;'),
 305                      $self->optional_newlines());
 306         }
 307         push(@tokens, $self->expect('esac'));
 308         return @tokens;
 309 }
 310
 311 sub parse_for {
 312         my $self = shift @_;
 313         my @tokens;
 314         push(@tokens,
 315              $self->next_token(), # variable
 316              $self->optional_newlines());
 317         my $token = $self->peek();
 318         if (defined($token) && $token eq 'in') {
 319                 push(@tokens,
 320                      $self->expect('in'),
 321                      $self->optional_newlines());
 322         }
 323         push(@tokens,
 324              $self->parse(qr/^do$/), # items
 325              $self->expect('do'),
 326              $self->optional_newlines(),
 327              $self->parse_loop_body(),
 328              $self->expect('done'));
 329         return @tokens;
 330 }
 331
 332 sub parse_if {
 333         my $self = shift @_;
 334         my @tokens;
 335         while (1) {
 336                 push(@tokens,
 337                      $self->parse(qr/^then$/), # if/elif condition
 338                      $self->expect('then'),
 339                      $self->optional_newlines(),
 340                      $self->parse(qr/^(?:elif|else|fi)$/)); # if/elif body
 341                 my $token = $self->peek();
 342                 last unless defined($token) && $token eq 'elif';
 343                 push(@tokens, $self->expect('elif'));
 344         }
 345         my $token = $self->peek();
 346         if (defined($token) && $token eq 'else') {
 347                 push(@tokens,
 348                      $self->expect('else'),
 349                      $self->optional_newlines(),
 350                      $self->parse(qr/^fi$/)); # else body
 351         }
 352         push(@tokens, $self->expect('fi'));
 353         return @tokens;
 354 }
 355
 356 sub parse_loop_body {
 357         my $self = shift @_;
 358         return $self->parse(qr/^done$/);
 359 }
 360
 361 sub parse_loop {
 362         my $self = shift @_;
 363         return ($self->parse(qr/^do$/), # condition
 364                 $self->expect('do'),
 365                 $self->optional_newlines(),
 366                 $self->parse_loop_body(),
 367                 $self->expect('done'));
 368 }
 369
 370 sub parse_func {
 371         my $self = shift @_;
 372         return ($self->expect('('),
 373                 $self->expect(')'),
 374                 $self->optional_newlines(),
 375                 $self->parse_cmd()); # body
 376 }
 377
 378 sub parse_bash_array_assignment {
 379         my $self = shift @_;
 380         my @tokens = $self->expect('(');
 381         while (defined(my $token = $self->next_token())) {
 382                 push(@tokens, $token);
 383                 last if $token eq ')';
 384         }
 385         return @tokens;
 386 }
 387
 388 my %compound = (
 389         '{' => \&parse_group,
 390         '(' => \&parse_subshell,
 391         'case' => \&parse_case,
 392         'for' => \&parse_for,
 393         'if' => \&parse_if,
 394         'until' => \&parse_loop,
 395         'while' => \&parse_loop);
 396
 397 sub parse_cmd {
 398         my $self = shift @_;
 399         my $cmd = $self->next_token();
 400         return () unless defined($cmd);
 401         return $cmd if $cmd eq "\n";
 402
 403         my $token;
 404         my @tokens = $cmd;
 405         if ($cmd eq '!') {
 406                 push(@tokens, $self->parse_cmd());
 407                 return @tokens;
 408         } elsif (my $f = $compound{$cmd}) {
 409                 push(@tokens, $self->$f());
 410         } elsif (defined($token = $self->peek()) && $token eq '(') {
 411                 if ($cmd !~ /\w=$/) {
 412                         push(@tokens, $self->parse_func());
 413                         return @tokens;
 414                 }
 415                 $tokens[-1] .= join(' ', $self->parse_bash_array_assignment());
 416         }
 417
 418         while (defined(my $token = $self->next_token())) {
 419                 $self->untoken($token), last if $self->stop_at($token);
 420                 push(@tokens, $token);
 421                 last if $token =~ /^(?:[;&\n|]|&&|\|\|)$/;
 422         }
 423         push(@tokens, $self->next_token()) if $tokens[-1] ne "\n" && defined($token = $self->peek()) && $token eq "\n";
 424         return @tokens;
 425 }
 426
 427 sub accumulate {
 428         my ($self, $tokens, $cmd) = @_;
 429         push(@$tokens, @$cmd);
 430 }
 431
 432 sub parse {
 433         my ($self, $stop) = @_;
 434         push(@{$self->{stop}}, $stop);
 435         goto DONE if $self->stop_at($self->peek());
 436         my @tokens;
 437         while (my @cmd = $self->parse_cmd()) {
 438                 $self->accumulate(\@tokens, \@cmd);
 439                 last if $self->stop_at($self->peek());
 440         }
 441 DONE:
 442         pop(@{$self->{stop}});
 443         return @tokens;
 444 }
 445
 446 # TestParser is a subclass of ShellParser which, beyond parsing shell script
 447 # code, is also imbued with semantic knowledge of test construction, and checks
 448 # tests for common problems (such as broken &&-chains) which might hide bugs in
 449 # the tests themselves or in behaviors being exercised by the tests. As such,
 450 # TestParser is only called upon to parse test bodies, not the top-level
 451 # scripts in which the tests are defined.
 452 package TestParser;
 453
 454 use base 'ShellParser';
 455
 456 sub find_non_nl {
 457         my $tokens = shift @_;
 458         my $n = shift @_;
 459         $n = $#$tokens if !defined($n);
 460         $n-- while $n >= 0 && $$tokens[$n] eq "\n";
 461         return $n;
 462 }
 463
 464 sub ends_with {
 465         my ($tokens, $needles) = @_;
 466         my $n = find_non_nl($tokens);
 467         for my $needle (reverse(@$needles)) {
 468                 return undef if $n < 0;
 469                 $n = find_non_nl($tokens, $n), next if $needle eq "\n";
 470                 return undef if $$tokens[$n] !~ $needle;
 471                 $n--;
 472         }
 473         return 1;
 474 }
 475
 476 sub match_ending {
 477         my ($tokens, $endings) = @_;
 478         for my $needles (@$endings) {
 479                 next if @$tokens < scalar(grep {$_ ne "\n"} @$needles);
 480                 return 1 if ends_with($tokens, $needles);
 481         }
 482         return undef;
 483 }
 484
 485 sub parse_loop_body {
 486         my $self = shift @_;
 487         my @tokens = $self->SUPER::parse_loop_body(@_);
 488         # did loop signal failure via "|| return" or "|| exit"?
 489         return @tokens if !@tokens || grep(/^(?:return|exit|\$\?)$/, @tokens);
 490         # did loop upstream of a pipe signal failure via "|| echo 'impossible
 491         # text'" as the final command in the loop body?
 492         return @tokens if ends_with(\@tokens, [qr/^\|\|$/, "\n", qr/^echo$/, qr/^.+$/]);
 493         # flag missing "return/exit" handling explicit failure in loop body
 494         my $n = find_non_nl(\@tokens);
 495         splice(@tokens, $n + 1, 0, '?!LOOP?!');
 496         return @tokens;
 497 }
 498
 499 my @safe_endings = (
 500         [qr/^(?:&&|\|\||\||&)$/],
 501         [qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/],
 502         [qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/, qr/^;$/],
 503         [qr/^(?:exit|return|continue)$/],
 504         [qr/^(?:exit|return|continue)$/, qr/^;$/]);
 505
 506 sub accumulate {
 507         my ($self, $tokens, $cmd) = @_;
 508         goto DONE unless @$tokens;
 509         goto DONE if @$cmd == 1 && $$cmd[0] eq "\n";
 510
 511         # did previous command end with "&&", "|", "|| return" or similar?
 512         goto DONE if match_ending($tokens, \@safe_endings);
 513
 514         # if this command handles "$?" specially, then okay for previous
 515         # command to be missing "&&"
 516         for my $token (@$cmd) {
 517                 goto DONE if $token =~ /\$\?/;
 518         }
 519
 520         # if this command is "false", "return 1", or "exit 1" (which signal
 521         # failure explicitly), then okay for all preceding commands to be
 522         # missing "&&"
 523         if ($$cmd[0] =~ /^(?:false|return|exit)$/) {
 524                 @$tokens = grep(!/^\?!AMP\?!$/, @$tokens);
 525                 goto DONE;
 526         }
 527
 528         # flag missing "&&" at end of previous command
 529         my $n = find_non_nl($tokens);
 530         splice(@$tokens, $n + 1, 0, '?!AMP?!') unless $n < 0;
 531
 532 DONE:
 533         $self->SUPER::accumulate($tokens, $cmd);
 534 }
 535
 536 # ScriptParser is a subclass of ShellParser which identifies individual test
 537 # definitions within test scripts, and passes each test body through TestParser
 538 # to identify possible problems. ShellParser detects test definitions not only
 539 # at the top-level of test scripts but also within compound commands such as
 540 # loops and function definitions.
 541 package ScriptParser;
 542
 543 use base 'ShellParser';
 544
 545 sub new {
 546         my $class = shift @_;
 547         my $self = $class->SUPER::new(@_);
 548         $self->{ntests} = 0;
 549         return $self;
 550 }
 551
 552 # extract the raw content of a token, which may be a single string or a
 553 # composition of multiple strings and non-string character runs; for instance,
 554 # `"test body"` unwraps to `test body`; `word"a b"42'c d'` to `worda b42c d`
 555 sub unwrap {
 556         my $token = @_ ? shift @_ : $_;
 557         # simple case: 'sqstring' or "dqstring"
 558         return $token if $token =~ s/^'([^']*)'$/$1/;
 559         return $token if $token =~ s/^"([^"]*)"$/$1/;
 560
 561         # composite case
 562         my ($s, $q, $escaped);
 563         while (1) {
 564                 # slurp up non-special characters
 565                 $s .= $1 if $token =~ /\G([^\\'"]*)/gc;
 566                 # handle special characters
 567                 last unless $token =~ /\G(.)/sgc;
 568                 my $c = $1;
 569                 $q = undef, next if defined($q) && $c eq $q;
 570                 $q = $c, next if !defined($q) && $c =~ /^['"]$/;
 571                 if ($c eq '\\') {
 572                         last unless $token =~ /\G(.)/sgc;
 573                         $c = $1;
 574                         $s .= '\\' if $c eq "\n"; # preserve line splice
 575                 }
 576                 $s .= $c;
 577         }
 578         return $s
 579 }
 580
 581 sub check_test {
 582         my $self = shift @_;
 583         my ($title, $body) = map(unwrap, @_);
 584         $self->{ntests}++;
 585         my $parser = TestParser->new(\$body);
 586         my @tokens = $parser->parse();
 587         return unless $emit_all || grep(/\?![^?]+\?!/, @tokens);
 588         my $c = main::fd_colors(1);
 589         my $checked = join(' ', @tokens);
 590         $checked =~ s/^\n//;
 591         $checked =~ s/^ //mg;
 592         $checked =~ s/ $//mg;
 593         $checked =~ s/(\?![^?]+\?!)/$c->{rev}$c->{red}$1$c->{reset}/mg;
 594         $checked .= "\n" unless $checked =~ /\n$/;
 595         push(@{$self->{output}}, "$c->{blue}# chainlint: $title$c->{reset}\n$checked");
 596 }
 597
 598 sub parse_cmd {
 599         my $self = shift @_;
 600         my @tokens = $self->SUPER::parse_cmd();
 601         return @tokens unless @tokens && $tokens[0] =~ /^test_expect_(?:success|failure)$/;
 602         my $n = $#tokens;
 603         $n-- while $n >= 0 && $tokens[$n] =~ /^(?:[;&\n|]|&&|\|\|)$/;
 604         $self->check_test($tokens[1], $tokens[2]) if $n == 2; # title body
 605         $self->check_test($tokens[2], $tokens[3]) if $n > 2;  # prereq title body
 606         return @tokens;
 607 }
 608
 609 # main contains high-level functionality for processing command-line switches,
 610 # feeding input test scripts to ScriptParser, and reporting results.
 611 package main;
 612
 613 my $getnow = sub { return time(); };
 614 my $interval = sub { return time() - shift; };
 615 if (eval {require Time::HiRes; Time::HiRes->import(); 1;}) {
 616         $getnow = sub { return [Time::HiRes::gettimeofday()]; };
 617         $interval = sub { return Time::HiRes::tv_interval(shift); };
 618 }
 619
 620 # Restore TERM if test framework set it to "dumb" so 'tput' will work; do this
 621 # outside of get_colors() since under 'ithreads' all threads use %ENV of main
 622 # thread and ignore %ENV changes in subthreads.
 623 $ENV{TERM} = $ENV{USER_TERM} if $ENV{USER_TERM};
 624
 625 my @NOCOLORS = (bold => '', rev => '', reset => '', blue => '', green => '', red => '');
 626 my %COLORS = ();
 627 sub get_colors {
 628         return \%COLORS if %COLORS;
 629         if (exists($ENV{NO_COLOR}) ||
 630             system("tput sgr0 >/dev/null 2>&1") != 0 ||
 631             system("tput bold >/dev/null 2>&1") != 0 ||
 632             system("tput rev  >/dev/null 2>&1") != 0 ||
 633             system("tput setaf 1 >/dev/null 2>&1") != 0) {
 634                 %COLORS = @NOCOLORS;
 635                 return \%COLORS;
 636         }
 637         %COLORS = (bold  => `tput bold`,
 638                    rev   => `tput rev`,
 639                    reset => `tput sgr0`,
 640                    blue  => `tput setaf 4`,
 641                    green => `tput setaf 2`,
 642                    red   => `tput setaf 1`);
 643         chomp(%COLORS);
 644         return \%COLORS;
 645 }
 646
 647 my %FD_COLORS = ();
 648 sub fd_colors {
 649         my $fd = shift;
 650         return $FD_COLORS{$fd} if exists($FD_COLORS{$fd});
 651         $FD_COLORS{$fd} = -t $fd ? get_colors() : {@NOCOLORS};
 652         return $FD_COLORS{$fd};
 653 }
 654
 655 sub ncores {
 656         # Windows
 657         return $ENV{NUMBER_OF_PROCESSORS} if exists($ENV{NUMBER_OF_PROCESSORS});
 658         # Linux / MSYS2 / Cygwin / WSL
 659         do { local @ARGV='/proc/cpuinfo'; return scalar(grep(/^processor\s*:/, <>)); } if -r '/proc/cpuinfo';
 660         # macOS & BSD
 661         return qx/sysctl -n hw.ncpu/ if $^O =~ /(?:^darwin$|bsd)/;
 662         return 1;
 663 }
 664
 665 sub show_stats {
 666         my ($start_time, $stats) = @_;
 667         my $walltime = $interval->($start_time);
 668         my ($usertime) = times();
 669         my ($total_workers, $total_scripts, $total_tests, $total_errs) = (0, 0, 0, 0);
 670         my $c = fd_colors(2);
 671         print(STDERR $c->{green});
 672         for (@$stats) {
 673                 my ($worker, $nscripts, $ntests, $nerrs) = @$_;
 674                 print(STDERR "worker $worker: $nscripts scripts, $ntests tests, $nerrs errors\n");
 675                 $total_workers++;
 676                 $total_scripts += $nscripts;
 677                 $total_tests += $ntests;
 678                 $total_errs += $nerrs;
 679         }
 680         printf(STDERR "total: %d workers, %d scripts, %d tests, %d errors, %.2fs/%.2fs (wall/user)$c->{reset}\n", $total_workers, $total_scripts, $total_tests, $total_errs, $walltime, $usertime);
 681 }
 682
 683 sub check_script {
 684         my ($id, $next_script, $emit) = @_;
 685         my ($nscripts, $ntests, $nerrs) = (0, 0, 0);
 686         while (my $path = $next_script->()) {
 687                 $nscripts++;
 688                 my $fh;
 689                 unless (open($fh, "<", $path)) {
 690                         $emit->("?!ERR?! $path: $!\n");
 691                         next;
 692                 }
 693                 my $s = do { local $/; <$fh> };
 694                 close($fh);
 695                 my $parser = ScriptParser->new(\$s);
 696                 1 while $parser->parse_cmd();
 697                 if (@{$parser->{output}}) {
 698                         my $c = fd_colors(1);
 699                         my $s = join('', @{$parser->{output}});
 700                         $emit->("$c->{bold}$c->{blue}# chainlint: $path$c->{reset}\n" . $s);
 701                         $nerrs += () = $s =~ /\?![^?]+\?!/g;
 702                 }
 703                 $ntests += $parser->{ntests};
 704         }
 705         return [$id, $nscripts, $ntests, $nerrs];
 706 }
 707
 708 sub exit_code {
 709         my $stats = shift @_;
 710         for (@$stats) {
 711                 my ($worker, $nscripts, $ntests, $nerrs) = @$_;
 712                 return 1 if $nerrs;
 713         }
 714         return 0;
 715 }
 716
 717 Getopt::Long::Configure(qw{bundling});
 718 GetOptions(
 719         "emit-all!" => \$emit_all,
 720         "jobs|j=i" => \$jobs,
 721         "stats|show-stats!" => \$show_stats) or die("option error\n");
 722 $jobs = ncores() if $jobs < 1;
 723
 724 my $start_time = $getnow->();
 725 my @stats;
 726
 727 my @scripts;
 728 push(@scripts, File::Glob::bsd_glob($_)) for (@ARGV);
 729 unless (@scripts) {
 730         show_stats($start_time, \@stats) if $show_stats;
 731         exit;
 732 }
 733
 734 unless ($Config{useithreads} && eval {
 735         require threads; threads->import();
 736         require Thread::Queue; Thread::Queue->import();
 737         1;
 738         }) {
 739         push(@stats, check_script(1, sub { shift(@scripts); }, sub { print(@_); }));
 740         show_stats($start_time, \@stats) if $show_stats;
 741         exit(exit_code(\@stats));
 742 }
 743
 744 my $script_queue = Thread::Queue->new();
 745 my $output_queue = Thread::Queue->new();
 746
 747 sub next_script { return $script_queue->dequeue(); }
 748 sub emit { $output_queue->enqueue(@_); }
 749
 750 sub monitor {
 751         while (my $s = $output_queue->dequeue()) {
 752                 print($s);
 753         }
 754 }
 755
 756 my $mon = threads->create({'context' => 'void'}, \&monitor);
 757 threads->create({'context' => 'list'}, \&check_script, $_, \&next_script, \&emit) for 1..$jobs;
 758
 759 $script_queue->enqueue(@scripts);
 760 $script_queue->end();
 761
 762 for (threads->list()) {
 763         push(@stats, $_->join()) unless $_ == $mon;
 764 }
 765
 766 $output_queue->end();
 767 $mon->join();
 768
 769 show_stats($start_time, \@stats) if $show_stats;
 770 exit(exit_code(\@stats));