t/chainlint.pl

   1 #!/usr/bin/env perl
   2 #
   3 # Copyright (c) 2021-2022 Eric Sunshine <sunshine@sunshineco.com>
   4 #
   5 # This tool scans shell scripts for test definitions and checks those tests for
   6 # problems, such as broken &&-chains, which might hide bugs in the tests
   7 # themselves or in behaviors being exercised by the tests.
   8 #
   9 # Input arguments are pathnames of shell scripts containing test definitions,
  10 # or globs referencing a collection of scripts. For each problem discovered,
  11 # the pathname of the script containing the test is printed along with the test
  12 # name and the test body with a `?!FOO?!` annotation at the location of each
  13 # detected problem, where "FOO" is a tag such as "AMP" which indicates a broken
  14 # &&-chain. Returns zero if no problems are discovered, otherwise non-zero.
  15
  16 use warnings;
  17 use strict;
  18 use Config;
  19 use File::Glob;
  20 use Getopt::Long;
  21
  22 my $jobs = -1;
  23 my $show_stats;
  24 my $emit_all;
  25
  26 # Lexer tokenizes POSIX shell scripts. It is roughly modeled after section 2.3
  27 # "Token Recognition" of POSIX chapter 2 "Shell Command Language". Although
  28 # similar to lexical analyzers for other languages, this one differs in a few
  29 # substantial ways due to quirks of the shell command language.
  30 #
  31 # For instance, in many languages, newline is just whitespace like space or
  32 # TAB, but in shell a newline is a command separator, thus a distinct lexical
  33 # token. A newline is significant and returned as a distinct token even at the
  34 # end of a shell comment.
  35 #
  36 # In other languages, `1+2` would typically be scanned as three tokens
  37 # (`1`, `+`, and `2`), but in shell it is a single token. However, the similar
  38 # `1 + 2`, which embeds whitepace, is scanned as three token in shell, as well.
  39 # In shell, several characters with special meaning lose that meaning when not
  40 # surrounded by whitespace. For instance, the negation operator `!` is special
  41 # when standing alone surrounded by whitespace; whereas in `foo!uucp` it is
  42 # just a plain character in the longer token "foo!uucp". In many other
  43 # languages, `"string"/foo:'string'` might be scanned as five tokens ("string",
  44 # `/`, `foo`, `:`, and 'string'), but in shell, it is just a single token.
  45 #
  46 # The lexical analyzer for the shell command language is also somewhat unusual
  47 # in that it recursively invokes the parser to handle the body of `$(...)`
  48 # expressions which can contain arbitrary shell code. Such expressions may be
  49 # encountered both inside and outside of double-quoted strings.
  50 #
  51 # The lexical analyzer is responsible for consuming shell here-doc bodies which
  52 # extend from the line following a `<<TAG` operator until a line consisting
  53 # solely of `TAG`. Here-doc consumption begins when a newline is encountered.
  54 # It is legal for multiple here-doc `<<TAG` operators to be present on a single
  55 # line, in which case their bodies must be present one following the next, and
  56 # are consumed in the (left-to-right) order the `<<TAG` operators appear on the
  57 # line. A special complication is that the bodies of all here-docs must be
  58 # consumed when the newline is encountered even if the parse context depth has
  59 # changed. For instance, in `cat <<A && x=$(cat <<B &&\n`, bodies of here-docs
  60 # "A" and "B" must be consumed even though "A" was introduced outside the
  61 # recursive parse context in which "B" was introduced and in which the newline
  62 # is encountered.
  63 package Lexer;
  64
  65 sub new {
  66         my ($class, $parser, $s) = @_;
  67         bless {
  68                 parser => $parser,
  69                 buff => $s,
  70                 lineno => 1,
  71                 heretags => []
  72         } => $class;
  73 }
  74
  75 sub scan_heredoc_tag {
  76         my $self = shift @_;
  77         ${$self->{buff}} =~ /\G(-?)/gc;
  78         my $indented = $1;
  79         my $token = $self->scan_token();
  80         return "<<$indented" unless $token;
  81         my $tag = $token->[0];
  82         $tag =~ s/['"\\]//g;
  83         push(@{$self->{heretags}}, $indented ? "\t$tag" : "$tag");
  84         return "<<$indented$tag";
  85 }
  86
  87 sub scan_op {
  88         my ($self, $c) = @_;
  89         my $b = $self->{buff};
  90         return $c unless $$b =~ /\G(.)/sgc;
  91         my $cc = $c . $1;
  92         return scan_heredoc_tag($self) if $cc eq '<<';
  93         return $cc if $cc =~ /^(?:&&|\|\||>>|;;|<&|>&|<>|>\|)$/;
  94         pos($$b)--;
  95         return $c;
  96 }
  97
  98 sub scan_sqstring {
  99         my $self = shift @_;
 100         ${$self->{buff}} =~ /\G([^']*'|.*\z)/sgc;
 101         my $s = $1;
 102         $self->{lineno} += () = $s =~ /\n/sg;
 103         return "'" . $s;
 104 }
 105
 106 sub scan_dqstring {
 107         my $self = shift @_;
 108         my $b = $self->{buff};
 109         my $s = '"';
 110         while (1) {
 111                 # slurp up non-special characters
 112                 $s .= $1 if $$b =~ /\G([^"\$\\]+)/gc;
 113                 # handle special characters
 114                 last unless $$b =~ /\G(.)/sgc;
 115                 my $c = $1;
 116                 $s .= '"', last if $c eq '"';
 117                 $s .= '$' . $self->scan_dollar(), next if $c eq '$';
 118                 if ($c eq '\\') {
 119                         $s .= '\\', last unless $$b =~ /\G(.)/sgc;
 120                         $c = $1;
 121                         $self->{lineno}++, next if $c eq "\n"; # line splice
 122                         # backslash escapes only $, `, ", \ in dq-string
 123                         $s .= '\\' unless $c =~ /^[\$`"\\]$/;
 124                         $s .= $c;
 125                         next;
 126                 }
 127                 die("internal error scanning dq-string '$c'\n");
 128         }
 129         $self->{lineno} += () = $s =~ /\n/sg;
 130         return $s;
 131 }
 132
 133 sub scan_balanced {
 134         my ($self, $c1, $c2) = @_;
 135         my $b = $self->{buff};
 136         my $depth = 1;
 137         my $s = $c1;
 138         while ($$b =~ /\G([^\Q$c1$c2\E]*(?:[\Q$c1$c2\E]|\z))/gc) {
 139                 $s .= $1;
 140                 $depth++, next if $s =~ /\Q$c1\E$/;
 141                 $depth--;
 142                 last if $depth == 0;
 143         }
 144         $self->{lineno} += () = $s =~ /\n/sg;
 145         return $s;
 146 }
 147
 148 sub scan_subst {
 149         my $self = shift @_;
 150         my @tokens = $self->{parser}->parse(qr/^\)$/);
 151         $self->{parser}->next_token(); # closing ")"
 152         return @tokens;
 153 }
 154
 155 sub scan_dollar {
 156         my $self = shift @_;
 157         my $b = $self->{buff};
 158         return $self->scan_balanced('(', ')') if $$b =~ /\G\((?=\()/gc; # $((...))
 159         return '(' . join(' ', map {$_->[0]} $self->scan_subst()) . ')' if $$b =~ /\G\(/gc; # $(...)
 160         return $self->scan_balanced('{', '}') if $$b =~ /\G\{/gc; # ${...}
 161         return $1 if $$b =~ /\G(\w+)/gc; # $var
 162         return $1 if $$b =~ /\G([@*#?$!0-9-])/gc; # $*, $1, $$, etc.
 163         return '';
 164 }
 165
 166 sub swallow_heredocs {
 167         my $self = shift @_;
 168         my $b = $self->{buff};
 169         my $tags = $self->{heretags};
 170         while (my $tag = shift @$tags) {
 171                 my $start = pos($$b);
 172                 my $indent = $tag =~ s/^\t// ? '\\s*' : '';
 173                 $$b =~ /(?:\G|\n)$indent\Q$tag\E(?:\n|\z)/gc;
 174                 my $body = substr($$b, $start, pos($$b) - $start);
 175                 $self->{lineno} += () = $body =~ /\n/sg;
 176         }
 177 }
 178
 179 sub scan_token {
 180         my $self = shift @_;
 181         my $b = $self->{buff};
 182         my $token = '';
 183         my ($start, $startln);
 184 RESTART:
 185         $startln = $self->{lineno};
 186         $$b =~ /\G[ \t]+/gc; # skip whitespace (but not newline)
 187         $start = pos($$b) || 0;
 188         $self->{lineno}++, return ["\n", $start, pos($$b), $startln, $startln] if $$b =~ /\G#[^\n]*(?:\n|\z)/gc; # comment
 189         while (1) {
 190                 # slurp up non-special characters
 191                 $token .= $1 if $$b =~ /\G([^\\;&|<>(){}'"\$\s]+)/gc;
 192                 # handle special characters
 193                 last unless $$b =~ /\G(.)/sgc;
 194                 my $c = $1;
 195                 pos($$b)--, last if $c =~ /^[ \t]$/; # whitespace ends token
 196                 pos($$b)--, last if length($token) && $c =~ /^[;&|<>(){}\n]$/;
 197                 $token .= $self->scan_sqstring(), next if $c eq "'";
 198                 $token .= $self->scan_dqstring(), next if $c eq '"';
 199                 $token .= $c . $self->scan_dollar(), next if $c eq '$';
 200                 $self->{lineno}++, $self->swallow_heredocs(), $token = $c, last if $c eq "\n";
 201                 $token = $self->scan_op($c), last if $c =~ /^[;&|<>]$/;
 202                 $token = $c, last if $c =~ /^[(){}]$/;
 203                 if ($c eq '\\') {
 204                         $token .= '\\', last unless $$b =~ /\G(.)/sgc;
 205                         $c = $1;
 206                         $self->{lineno}++, next if $c eq "\n" && length($token); # line splice
 207                         $self->{lineno}++, goto RESTART if $c eq "\n"; # line splice
 208                         $token .= '\\' . $c;
 209                         next;
 210                 }
 211                 die("internal error scanning character '$c'\n");
 212         }
 213         return length($token) ? [$token, $start, pos($$b), $startln, $self->{lineno}] : undef;
 214 }
 215
 216 # ShellParser parses POSIX shell scripts (with minor extensions for Bash). It
 217 # is a recursive descent parser very roughly modeled after section 2.10 "Shell
 218 # Grammar" of POSIX chapter 2 "Shell Command Language".
 219 package ShellParser;
 220
 221 sub new {
 222         my ($class, $s) = @_;
 223         my $self = bless {
 224                 buff => [],
 225                 stop => [],
 226                 output => []
 227         } => $class;
 228         $self->{lexer} = Lexer->new($self, $s);
 229         return $self;
 230 }
 231
 232 sub next_token {
 233         my $self = shift @_;
 234         return pop(@{$self->{buff}}) if @{$self->{buff}};
 235         return $self->{lexer}->scan_token();
 236 }
 237
 238 sub untoken {
 239         my $self = shift @_;
 240         push(@{$self->{buff}}, @_);
 241 }
 242
 243 sub peek {
 244         my $self = shift @_;
 245         my $token = $self->next_token();
 246         return undef unless defined($token);
 247         $self->untoken($token);
 248         return $token;
 249 }
 250
 251 sub stop_at {
 252         my ($self, $token) = @_;
 253         return 1 unless defined($token);
 254         my $stop = ${$self->{stop}}[-1] if @{$self->{stop}};
 255         return defined($stop) && $token->[0] =~ $stop;
 256 }
 257
 258 sub expect {
 259         my ($self, $expect) = @_;
 260         my $token = $self->next_token();
 261         return $token if defined($token) && $token->[0] eq $expect;
 262         push(@{$self->{output}}, "?!ERR?! expected '$expect' but found '" . (defined($token) ? $token->[0] : "<end-of-input>") . "'\n");
 263         $self->untoken($token) if defined($token);
 264         return ();
 265 }
 266
 267 sub optional_newlines {
 268         my $self = shift @_;
 269         my @tokens;
 270         while (my $token = $self->peek()) {
 271                 last unless $token->[0] eq "\n";
 272                 push(@tokens, $self->next_token());
 273         }
 274         return @tokens;
 275 }
 276
 277 sub parse_group {
 278         my $self = shift @_;
 279         return ($self->parse(qr/^}$/),
 280                 $self->expect('}'));
 281 }
 282
 283 sub parse_subshell {
 284         my $self = shift @_;
 285         return ($self->parse(qr/^\)$/),
 286                 $self->expect(')'));
 287 }
 288
 289 sub parse_case_pattern {
 290         my $self = shift @_;
 291         my @tokens;
 292         while (defined(my $token = $self->next_token())) {
 293                 push(@tokens, $token);
 294                 last if $token->[0] eq ')';
 295         }
 296         return @tokens;
 297 }
 298
 299 sub parse_case {
 300         my $self = shift @_;
 301         my @tokens;
 302         push(@tokens,
 303              $self->next_token(), # subject
 304              $self->optional_newlines(),
 305              $self->expect('in'),
 306              $self->optional_newlines());
 307         while (1) {
 308                 my $token = $self->peek();
 309                 last unless defined($token) && $token->[0] ne 'esac';
 310                 push(@tokens,
 311                      $self->parse_case_pattern(),
 312                      $self->optional_newlines(),
 313                      $self->parse(qr/^(?:;;|esac)$/)); # item body
 314                 $token = $self->peek();
 315                 last unless defined($token) && $token->[0] ne 'esac';
 316                 push(@tokens,
 317                      $self->expect(';;'),
 318                      $self->optional_newlines());
 319         }
 320         push(@tokens, $self->expect('esac'));
 321         return @tokens;
 322 }
 323
 324 sub parse_for {
 325         my $self = shift @_;
 326         my @tokens;
 327         push(@tokens,
 328              $self->next_token(), # variable
 329              $self->optional_newlines());
 330         my $token = $self->peek();
 331         if (defined($token) && $token->[0] eq 'in') {
 332                 push(@tokens,
 333                      $self->expect('in'),
 334                      $self->optional_newlines());
 335         }
 336         push(@tokens,
 337              $self->parse(qr/^do$/), # items
 338              $self->expect('do'),
 339              $self->optional_newlines(),
 340              $self->parse_loop_body(),
 341              $self->expect('done'));
 342         return @tokens;
 343 }
 344
 345 sub parse_if {
 346         my $self = shift @_;
 347         my @tokens;
 348         while (1) {
 349                 push(@tokens,
 350                      $self->parse(qr/^then$/), # if/elif condition
 351                      $self->expect('then'),
 352                      $self->optional_newlines(),
 353                      $self->parse(qr/^(?:elif|else|fi)$/)); # if/elif body
 354                 my $token = $self->peek();
 355                 last unless defined($token) && $token->[0] eq 'elif';
 356                 push(@tokens, $self->expect('elif'));
 357         }
 358         my $token = $self->peek();
 359         if (defined($token) && $token->[0] eq 'else') {
 360                 push(@tokens,
 361                      $self->expect('else'),
 362                      $self->optional_newlines(),
 363                      $self->parse(qr/^fi$/)); # else body
 364         }
 365         push(@tokens, $self->expect('fi'));
 366         return @tokens;
 367 }
 368
 369 sub parse_loop_body {
 370         my $self = shift @_;
 371         return $self->parse(qr/^done$/);
 372 }
 373
 374 sub parse_loop {
 375         my $self = shift @_;
 376         return ($self->parse(qr/^do$/), # condition
 377                 $self->expect('do'),
 378                 $self->optional_newlines(),
 379                 $self->parse_loop_body(),
 380                 $self->expect('done'));
 381 }
 382
 383 sub parse_func {
 384         my $self = shift @_;
 385         return ($self->expect('('),
 386                 $self->expect(')'),
 387                 $self->optional_newlines(),
 388                 $self->parse_cmd()); # body
 389 }
 390
 391 sub parse_bash_array_assignment {
 392         my $self = shift @_;
 393         my @tokens = $self->expect('(');
 394         while (defined(my $token = $self->next_token())) {
 395                 push(@tokens, $token);
 396                 last if $token->[0] eq ')';
 397         }
 398         return @tokens;
 399 }
 400
 401 my %compound = (
 402         '{' => \&parse_group,
 403         '(' => \&parse_subshell,
 404         'case' => \&parse_case,
 405         'for' => \&parse_for,
 406         'if' => \&parse_if,
 407         'until' => \&parse_loop,
 408         'while' => \&parse_loop);
 409
 410 sub parse_cmd {
 411         my $self = shift @_;
 412         my $cmd = $self->next_token();
 413         return () unless defined($cmd);
 414         return $cmd if $cmd->[0] eq "\n";
 415
 416         my $token;
 417         my @tokens = $cmd;
 418         if ($cmd->[0] eq '!') {
 419                 push(@tokens, $self->parse_cmd());
 420                 return @tokens;
 421         } elsif (my $f = $compound{$cmd->[0]}) {
 422                 push(@tokens, $self->$f());
 423         } elsif (defined($token = $self->peek()) && $token->[0] eq '(') {
 424                 if ($cmd->[0] !~ /\w=$/) {
 425                         push(@tokens, $self->parse_func());
 426                         return @tokens;
 427                 }
 428                 my @array = $self->parse_bash_array_assignment();
 429                 $tokens[-1]->[0] .= join(' ', map {$_->[0]} @array);
 430                 $tokens[-1]->[2] = $array[$#array][2] if @array;
 431         }
 432
 433         while (defined(my $token = $self->next_token())) {
 434                 $self->untoken($token), last if $self->stop_at($token);
 435                 push(@tokens, $token);
 436                 last if $token->[0] =~ /^(?:[;&\n|]|&&|\|\|)$/;
 437         }
 438         push(@tokens, $self->next_token()) if $tokens[-1]->[0] ne "\n" && defined($token = $self->peek()) && $token->[0] eq "\n";
 439         return @tokens;
 440 }
 441
 442 sub accumulate {
 443         my ($self, $tokens, $cmd) = @_;
 444         push(@$tokens, @$cmd);
 445 }
 446
 447 sub parse {
 448         my ($self, $stop) = @_;
 449         push(@{$self->{stop}}, $stop);
 450         goto DONE if $self->stop_at($self->peek());
 451         my @tokens;
 452         while (my @cmd = $self->parse_cmd()) {
 453                 $self->accumulate(\@tokens, \@cmd);
 454                 last if $self->stop_at($self->peek());
 455         }
 456 DONE:
 457         pop(@{$self->{stop}});
 458         return @tokens;
 459 }
 460
 461 # TestParser is a subclass of ShellParser which, beyond parsing shell script
 462 # code, is also imbued with semantic knowledge of test construction, and checks
 463 # tests for common problems (such as broken &&-chains) which might hide bugs in
 464 # the tests themselves or in behaviors being exercised by the tests. As such,
 465 # TestParser is only called upon to parse test bodies, not the top-level
 466 # scripts in which the tests are defined.
 467 package TestParser;
 468
 469 use base 'ShellParser';
 470
 471 sub new {
 472         my $class = shift @_;
 473         my $self = $class->SUPER::new(@_);
 474         $self->{problems} = [];
 475         return $self;
 476 }
 477
 478 sub find_non_nl {
 479         my $tokens = shift @_;
 480         my $n = shift @_;
 481         $n = $#$tokens if !defined($n);
 482         $n-- while $n >= 0 && $$tokens[$n]->[0] eq "\n";
 483         return $n;
 484 }
 485
 486 sub ends_with {
 487         my ($tokens, $needles) = @_;
 488         my $n = find_non_nl($tokens);
 489         for my $needle (reverse(@$needles)) {
 490                 return undef if $n < 0;
 491                 $n = find_non_nl($tokens, $n), next if $needle eq "\n";
 492                 return undef if $$tokens[$n]->[0] !~ $needle;
 493                 $n--;
 494         }
 495         return 1;
 496 }
 497
 498 sub match_ending {
 499         my ($tokens, $endings) = @_;
 500         for my $needles (@$endings) {
 501                 next if @$tokens < scalar(grep {$_ ne "\n"} @$needles);
 502                 return 1 if ends_with($tokens, $needles);
 503         }
 504         return undef;
 505 }
 506
 507 sub parse_loop_body {
 508         my $self = shift @_;
 509         my @tokens = $self->SUPER::parse_loop_body(@_);
 510         # did loop signal failure via "|| return" or "|| exit"?
 511         return @tokens if !@tokens || grep {$_->[0] =~ /^(?:return|exit|\$\?)$/} @tokens;
 512         # did loop upstream of a pipe signal failure via "|| echo 'impossible
 513         # text'" as the final command in the loop body?
 514         return @tokens if ends_with(\@tokens, [qr/^\|\|$/, "\n", qr/^echo$/, qr/^.+$/]);
 515         # flag missing "return/exit" handling explicit failure in loop body
 516         my $n = find_non_nl(\@tokens);
 517         push(@{$self->{problems}}, ['LOOP', $tokens[$n]]);
 518         return @tokens;
 519 }
 520
 521 my @safe_endings = (
 522         [qr/^(?:&&|\|\||\||&)$/],
 523         [qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/],
 524         [qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/, qr/^;$/],
 525         [qr/^(?:exit|return|continue)$/],
 526         [qr/^(?:exit|return|continue)$/, qr/^;$/]);
 527
 528 sub accumulate {
 529         my ($self, $tokens, $cmd) = @_;
 530         my $problems = $self->{problems};
 531
 532         # no previous command to check for missing "&&"
 533         goto DONE unless @$tokens;
 534
 535         # new command is empty line; can't yet check if previous is missing "&&"
 536         goto DONE if @$cmd == 1 && $$cmd[0]->[0] eq "\n";
 537
 538         # did previous command end with "&&", "|", "|| return" or similar?
 539         goto DONE if match_ending($tokens, \@safe_endings);
 540
 541         # if this command handles "$?" specially, then okay for previous
 542         # command to be missing "&&"
 543         for my $token (@$cmd) {
 544                 goto DONE if $token->[0] =~ /\$\?/;
 545         }
 546
 547         # if this command is "false", "return 1", or "exit 1" (which signal
 548         # failure explicitly), then okay for all preceding commands to be
 549         # missing "&&"
 550         if ($$cmd[0]->[0] =~ /^(?:false|return|exit)$/) {
 551                 @$problems = grep {$_->[0] ne 'AMP'} @$problems;
 552                 goto DONE;
 553         }
 554
 555         # flag missing "&&" at end of previous command
 556         my $n = find_non_nl($tokens);
 557         push(@$problems, ['AMP', $tokens->[$n]]) unless $n < 0;
 558
 559 DONE:
 560         $self->SUPER::accumulate($tokens, $cmd);
 561 }
 562
 563 # ScriptParser is a subclass of ShellParser which identifies individual test
 564 # definitions within test scripts, and passes each test body through TestParser
 565 # to identify possible problems. ShellParser detects test definitions not only
 566 # at the top-level of test scripts but also within compound commands such as
 567 # loops and function definitions.
 568 package ScriptParser;
 569
 570 use base 'ShellParser';
 571
 572 sub new {
 573         my $class = shift @_;
 574         my $self = $class->SUPER::new(@_);
 575         $self->{ntests} = 0;
 576         return $self;
 577 }
 578
 579 # extract the raw content of a token, which may be a single string or a
 580 # composition of multiple strings and non-string character runs; for instance,
 581 # `"test body"` unwraps to `test body`; `word"a b"42'c d'` to `worda b42c d`
 582 sub unwrap {
 583         my $token = (@_ ? shift @_ : $_)->[0];
 584         # simple case: 'sqstring' or "dqstring"
 585         return $token if $token =~ s/^'([^']*)'$/$1/;
 586         return $token if $token =~ s/^"([^"]*)"$/$1/;
 587
 588         # composite case
 589         my ($s, $q, $escaped);
 590         while (1) {
 591                 # slurp up non-special characters
 592                 $s .= $1 if $token =~ /\G([^\\'"]*)/gc;
 593                 # handle special characters
 594                 last unless $token =~ /\G(.)/sgc;
 595                 my $c = $1;
 596                 $q = undef, next if defined($q) && $c eq $q;
 597                 $q = $c, next if !defined($q) && $c =~ /^['"]$/;
 598                 if ($c eq '\\') {
 599                         last unless $token =~ /\G(.)/sgc;
 600                         $c = $1;
 601                         $s .= '\\' if $c eq "\n"; # preserve line splice
 602                 }
 603                 $s .= $c;
 604         }
 605         return $s
 606 }
 607
 608 sub check_test {
 609         my $self = shift @_;
 610         my ($title, $body) = map(unwrap, @_);
 611         $self->{ntests}++;
 612         my $parser = TestParser->new(\$body);
 613         my @tokens = $parser->parse();
 614         my $problems = $parser->{problems};
 615         return unless $emit_all || @$problems;
 616         my $c = main::fd_colors(1);
 617         my $lineno = $_[1]->[3];
 618         my $start = 0;
 619         my $checked = '';
 620         for (sort {$a->[1]->[2] <=> $b->[1]->[2]} @$problems) {
 621                 my ($label, $token) = @$_;
 622                 my $pos = $token->[2];
 623                 $checked .= substr($body, $start, $pos - $start) . " ?!$label?! ";
 624                 $start = $pos;
 625         }
 626         $checked .= substr($body, $start);
 627         $checked =~ s/^/$lineno++ . ' '/mge;
 628         $checked =~ s/^\d+ \n//;
 629         $checked =~ s/(\s) \?!/$1?!/mg;
 630         $checked =~ s/\?! (\s)/?!$1/mg;
 631         $checked =~ s/(\?![^?]+\?!)/$c->{rev}$c->{red}$1$c->{reset}/mg;
 632         $checked =~ s/^\d+/$c->{dim}$&$c->{reset}/mg;
 633         $checked .= "\n" unless $checked =~ /\n$/;
 634         push(@{$self->{output}}, "$c->{blue}# chainlint: $title$c->{reset}\n$checked");
 635 }
 636
 637 sub parse_cmd {
 638         my $self = shift @_;
 639         my @tokens = $self->SUPER::parse_cmd();
 640         return @tokens unless @tokens && $tokens[0]->[0] =~ /^test_expect_(?:success|failure)$/;
 641         my $n = $#tokens;
 642         $n-- while $n >= 0 && $tokens[$n]->[0] =~ /^(?:[;&\n|]|&&|\|\|)$/;
 643         $self->check_test($tokens[1], $tokens[2]) if $n == 2; # title body
 644         $self->check_test($tokens[2], $tokens[3]) if $n > 2;  # prereq title body
 645         return @tokens;
 646 }
 647
 648 # main contains high-level functionality for processing command-line switches,
 649 # feeding input test scripts to ScriptParser, and reporting results.
 650 package main;
 651
 652 my $getnow = sub { return time(); };
 653 my $interval = sub { return time() - shift; };
 654 if (eval {require Time::HiRes; Time::HiRes->import(); 1;}) {
 655         $getnow = sub { return [Time::HiRes::gettimeofday()]; };
 656         $interval = sub { return Time::HiRes::tv_interval(shift); };
 657 }
 658
 659 # Restore TERM if test framework set it to "dumb" so 'tput' will work; do this
 660 # outside of get_colors() since under 'ithreads' all threads use %ENV of main
 661 # thread and ignore %ENV changes in subthreads.
 662 $ENV{TERM} = $ENV{USER_TERM} if $ENV{USER_TERM};
 663
 664 my @NOCOLORS = (bold => '', rev => '', dim => '', reset => '', blue => '', green => '', red => '');
 665 my %COLORS = ();
 666 sub get_colors {
 667         return \%COLORS if %COLORS;
 668         if (exists($ENV{NO_COLOR})) {
 669                 %COLORS = @NOCOLORS;
 670                 return \%COLORS;
 671         }
 672         if ($ENV{TERM} =~ /xterm|xterm-\d+color|xterm-new|xterm-direct|nsterm|nsterm-\d+color|nsterm-direct/) {
 673                 %COLORS = (bold  => "\e[1m",
 674                            rev   => "\e[7m",
 675                            dim   => "\e[2m",
 676                            reset => "\e[0m",
 677                            blue  => "\e[34m",
 678                            green => "\e[32m",
 679                            red   => "\e[31m");
 680                 return \%COLORS;
 681         }
 682         if (system("tput sgr0 >/dev/null 2>&1") == 0 &&
 683             system("tput bold >/dev/null 2>&1") == 0 &&
 684             system("tput rev  >/dev/null 2>&1") == 0 &&
 685             system("tput dim  >/dev/null 2>&1") == 0 &&
 686             system("tput setaf 1 >/dev/null 2>&1") == 0) {
 687                 %COLORS = (bold  => `tput bold`,
 688                            rev   => `tput rev`,
 689                            dim   => `tput dim`,
 690                            reset => `tput sgr0`,
 691                            blue  => `tput setaf 4`,
 692                            green => `tput setaf 2`,
 693                            red   => `tput setaf 1`);
 694                 return \%COLORS;
 695         }
 696         %COLORS = @NOCOLORS;
 697         return \%COLORS;
 698 }
 699
 700 my %FD_COLORS = ();
 701 sub fd_colors {
 702         my $fd = shift;
 703         return $FD_COLORS{$fd} if exists($FD_COLORS{$fd});
 704         $FD_COLORS{$fd} = -t $fd ? get_colors() : {@NOCOLORS};
 705         return $FD_COLORS{$fd};
 706 }
 707
 708 sub ncores {
 709         # Windows
 710         return $ENV{NUMBER_OF_PROCESSORS} if exists($ENV{NUMBER_OF_PROCESSORS});
 711         # Linux / MSYS2 / Cygwin / WSL
 712         do { local @ARGV='/proc/cpuinfo'; return scalar(grep(/^processor[\s\d]*:/, <>)); } if -r '/proc/cpuinfo';
 713         # macOS & BSD
 714         return qx/sysctl -n hw.ncpu/ if $^O =~ /(?:^darwin$|bsd)/;
 715         return 1;
 716 }
 717
 718 sub show_stats {
 719         my ($start_time, $stats) = @_;
 720         my $walltime = $interval->($start_time);
 721         my ($usertime) = times();
 722         my ($total_workers, $total_scripts, $total_tests, $total_errs) = (0, 0, 0, 0);
 723         my $c = fd_colors(2);
 724         print(STDERR $c->{green});
 725         for (@$stats) {
 726                 my ($worker, $nscripts, $ntests, $nerrs) = @$_;
 727                 print(STDERR "worker $worker: $nscripts scripts, $ntests tests, $nerrs errors\n");
 728                 $total_workers++;
 729                 $total_scripts += $nscripts;
 730                 $total_tests += $ntests;
 731                 $total_errs += $nerrs;
 732         }
 733         printf(STDERR "total: %d workers, %d scripts, %d tests, %d errors, %.2fs/%.2fs (wall/user)$c->{reset}\n", $total_workers, $total_scripts, $total_tests, $total_errs, $walltime, $usertime);
 734 }
 735
 736 sub check_script {
 737         my ($id, $next_script, $emit) = @_;
 738         my ($nscripts, $ntests, $nerrs) = (0, 0, 0);
 739         while (my $path = $next_script->()) {
 740                 $nscripts++;
 741                 my $fh;
 742                 unless (open($fh, "<", $path)) {
 743                         $emit->("?!ERR?! $path: $!\n");
 744                         next;
 745                 }
 746                 my $s = do { local $/; <$fh> };
 747                 close($fh);
 748                 my $parser = ScriptParser->new(\$s);
 749                 1 while $parser->parse_cmd();
 750                 if (@{$parser->{output}}) {
 751                         my $c = fd_colors(1);
 752                         my $s = join('', @{$parser->{output}});
 753                         $emit->("$c->{bold}$c->{blue}# chainlint: $path$c->{reset}\n" . $s);
 754                         $nerrs += () = $s =~ /\?![^?]+\?!/g;
 755                 }
 756                 $ntests += $parser->{ntests};
 757         }
 758         return [$id, $nscripts, $ntests, $nerrs];
 759 }
 760
 761 sub exit_code {
 762         my $stats = shift @_;
 763         for (@$stats) {
 764                 my ($worker, $nscripts, $ntests, $nerrs) = @$_;
 765                 return 1 if $nerrs;
 766         }
 767         return 0;
 768 }
 769
 770 Getopt::Long::Configure(qw{bundling});
 771 GetOptions(
 772         "emit-all!" => \$emit_all,
 773         "jobs|j=i" => \$jobs,
 774         "stats|show-stats!" => \$show_stats) or die("option error\n");
 775 $jobs = ncores() if $jobs < 1;
 776
 777 my $start_time = $getnow->();
 778 my @stats;
 779
 780 my @scripts;
 781 push(@scripts, File::Glob::bsd_glob($_)) for (@ARGV);
 782 unless (@scripts) {
 783         show_stats($start_time, \@stats) if $show_stats;
 784         exit;
 785 }
 786
 787 unless ($Config{useithreads} && eval {
 788         require threads; threads->import();
 789         require Thread::Queue; Thread::Queue->import();
 790         1;
 791         }) {
 792         push(@stats, check_script(1, sub { shift(@scripts); }, sub { print(@_); }));
 793         show_stats($start_time, \@stats) if $show_stats;
 794         exit(exit_code(\@stats));
 795 }
 796
 797 my $script_queue = Thread::Queue->new();
 798 my $output_queue = Thread::Queue->new();
 799
 800 sub next_script { return $script_queue->dequeue(); }
 801 sub emit { $output_queue->enqueue(@_); }
 802
 803 sub monitor {
 804         while (my $s = $output_queue->dequeue()) {
 805                 print($s);
 806         }
 807 }
 808
 809 my $mon = threads->create({'context' => 'void'}, \&monitor);
 810 threads->create({'context' => 'list'}, \&check_script, $_, \&next_script, \&emit) for 1..$jobs;
 811
 812 $script_queue->enqueue(@scripts);
 813 $script_queue->end();
 814
 815 for (threads->list()) {
 816         push(@stats, $_->join()) unless $_ == $mon;
 817 }
 818
 819 $output_queue->end();
 820 $mon->join();
 821
 822 show_stats($start_time, \@stats) if $show_stats;
 823 exit(exit_code(\@stats));