src/wc.c

   1 /* wc - print the number of lines, words, and bytes in files
   2    Copyright (C) 1985-2023 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation, either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  16
  17 /* Written by Paul Rubin, phr@ocf.berkeley.edu
  18    and David MacKenzie, djm@gnu.ai.mit.edu. */
  19
  20 #include <config.h>
  21
  22 #include <stdckdint.h>
  23 #include <stdio.h>
  24 #include <getopt.h>
  25 #include <sys/types.h>
  26 #include <wchar.h>
  27 #include <wctype.h>
  28
  29 #include "system.h"
  30 #include "assure.h"
  31 #include "argmatch.h"
  32 #include "argv-iter.h"
  33 #include "fadvise.h"
  34 #include "mbchar.h"
  35 #include "physmem.h"
  36 #include "readtokens0.h"
  37 #include "safe-read.h"
  38 #include "stat-size.h"
  39 #include "xbinary-io.h"
  40
  41 #if !defined iswspace && !HAVE_ISWSPACE
  42 # define iswspace(wc) \
  43     ((wc) == to_uchar (wc) && isspace (to_uchar (wc)))
  44 #endif
  45
  46 /* The official name of this program (e.g., no 'g' prefix).  */
  47 #define PROGRAM_NAME "wc"
  48
  49 #define AUTHORS \
  50   proper_name ("Paul Rubin"), \
  51   proper_name ("David MacKenzie")
  52
  53 /* Size of atomic reads. */
  54 #define BUFFER_SIZE (16 * 1024)
  55
  56 #ifdef USE_AVX2_WC_LINECOUNT
  57 /* From wc_avx2.c */
  58 extern bool
  59 wc_lines_avx2 (char const *file, int fd, uintmax_t *lines_out,
  60                uintmax_t *bytes_out);
  61 #endif
  62
  63 static bool debug;
  64
  65 /* Cumulative number of lines, words, chars and bytes in all files so far.
  66    max_line_length is the maximum over all files processed so far.  */
  67 static uintmax_t total_lines;
  68 static uintmax_t total_words;
  69 static uintmax_t total_chars;
  70 static uintmax_t total_bytes;
  71 static uintmax_t total_lines_overflow;
  72 static uintmax_t total_words_overflow;
  73 static uintmax_t total_chars_overflow;
  74 static uintmax_t total_bytes_overflow;
  75 static uintmax_t max_line_length;
  76
  77 /* Which counts to print. */
  78 static bool print_lines, print_words, print_chars, print_bytes;
  79 static bool print_linelength;
  80
  81 /* The print width of each count.  */
  82 static int number_width;
  83
  84 /* True if we have ever read the standard input. */
  85 static bool have_read_stdin;
  86
  87 /* Used to determine if file size can be determined without reading.  */
  88 static size_t page_size;
  89
  90 /* Enable to _not_ treat non breaking space as a word separator.  */
  91 static bool posixly_correct;
  92
  93 /* The result of calling fstat or stat on a file descriptor or file.  */
  94 struct fstatus
  95 {
  96   /* If positive, fstat or stat has not been called yet.  Otherwise,
  97      this is the value returned from fstat or stat.  */
  98   int failed;
  99
 100   /* If FAILED is zero, this is the file's status.  */
 101   struct stat st;
 102 };
 103
 104 /* For long options that have no equivalent short option, use a
 105    non-character as a pseudo short option, starting with CHAR_MAX + 1.  */
 106 enum
 107 {
 108   DEBUG_PROGRAM_OPTION = CHAR_MAX + 1,
 109   FILES0_FROM_OPTION,
 110   TOTAL_OPTION,
 111 };
 112
 113 static struct option const longopts[] =
 114 {
 115   {"bytes", no_argument, nullptr, 'c'},
 116   {"chars", no_argument, nullptr, 'm'},
 117   {"lines", no_argument, nullptr, 'l'},
 118   {"words", no_argument, nullptr, 'w'},
 119   {"debug", no_argument, nullptr, DEBUG_PROGRAM_OPTION},
 120   {"files0-from", required_argument, nullptr, FILES0_FROM_OPTION},
 121   {"max-line-length", no_argument, nullptr, 'L'},
 122   {"total", required_argument, nullptr, TOTAL_OPTION},
 123   {GETOPT_HELP_OPTION_DECL},
 124   {GETOPT_VERSION_OPTION_DECL},
 125   {nullptr, 0, nullptr, 0}
 126 };
 127
 128 enum total_type
 129   {
 130     total_auto,         /* 0: default or --total=auto */
 131     total_always,       /* 1: --total=always */
 132     total_only,         /* 2: --total=only */
 133     total_never         /* 3: --total=never */
 134   };
 135 static char const *const total_args[] =
 136 {
 137   "auto", "always", "only", "never", nullptr
 138 };
 139 static enum total_type const total_types[] =
 140 {
 141   total_auto, total_always, total_only, total_never
 142 };
 143 ARGMATCH_VERIFY (total_args, total_types);
 144 static enum total_type total_mode = total_auto;
 145
 146 #ifdef USE_AVX2_WC_LINECOUNT
 147 static bool
 148 avx2_supported (void)
 149 {
 150   bool avx_enabled = 0 < __builtin_cpu_supports ("avx2");
 151
 152   if (debug)
 153     error (0, 0, (avx_enabled
 154                   ? _("using avx2 hardware support")
 155                   : _("avx2 support not detected")));
 156
 157   return avx_enabled;
 158 }
 159 #endif
 160
 161 void
 162 usage (int status)
 163 {
 164   if (status != EXIT_SUCCESS)
 165     emit_try_help ();
 166   else
 167     {
 168       printf (_("\
 169 Usage: %s [OPTION]... [FILE]...\n\
 170   or:  %s [OPTION]... --files0-from=F\n\
 171 "),
 172               program_name, program_name);
 173       fputs (_("\
 174 Print newline, word, and byte counts for each FILE, and a total line if\n\
 175 more than one FILE is specified.  A word is a non-zero-length sequence of\n\
 176 printable characters delimited by white space.\n\
 177 "), stdout);
 178
 179       emit_stdin_note ();
 180
 181       fputs (_("\
 182 \n\
 183 The options below may be used to select which counts are printed, always in\n\
 184 the following order: newline, word, character, byte, maximum line length.\n\
 185   -c, --bytes            print the byte counts\n\
 186   -m, --chars            print the character counts\n\
 187   -l, --lines            print the newline counts\n\
 188 "), stdout);
 189       fputs (_("\
 190       --files0-from=F    read input from the files specified by\n\
 191                            NUL-terminated names in file F;\n\
 192                            If F is - then read names from standard input\n\
 193   -L, --max-line-length  print the maximum display width\n\
 194   -w, --words            print the word counts\n\
 195 "), stdout);
 196       fputs (_("\
 197       --total=WHEN       when to print a line with total counts;\n\
 198                            WHEN can be: auto, always, only, never\n\
 199 "), stdout);
 200       fputs (HELP_OPTION_DESCRIPTION, stdout);
 201       fputs (VERSION_OPTION_DESCRIPTION, stdout);
 202       emit_ancillary_info (PROGRAM_NAME);
 203     }
 204   exit (status);
 205 }
 206
 207 /* Return non zero if a non breaking space.  */
 208 ATTRIBUTE_PURE
 209 static int
 210 iswnbspace (wint_t wc)
 211 {
 212   return ! posixly_correct
 213          && (wc == 0x00A0 || wc == 0x2007
 214              || wc == 0x202F || wc == 0x2060);
 215 }
 216
 217 static int
 218 isnbspace (int c)
 219 {
 220   return iswnbspace (btowc (c));
 221 }
 222
 223 /* FILE is the name of the file (or null for standard input)
 224    associated with the specified counters.  */
 225 static void
 226 write_counts (uintmax_t lines,
 227               uintmax_t words,
 228               uintmax_t chars,
 229               uintmax_t bytes,
 230               uintmax_t linelength,
 231               char const *file)
 232 {
 233   static char const format_sp_int[] = " %*s";
 234   char const *format_int = format_sp_int + 1;
 235   char buf[INT_BUFSIZE_BOUND (uintmax_t)];
 236
 237   if (print_lines)
 238     {
 239       printf (format_int, number_width, umaxtostr (lines, buf));
 240       format_int = format_sp_int;
 241     }
 242   if (print_words)
 243     {
 244       printf (format_int, number_width, umaxtostr (words, buf));
 245       format_int = format_sp_int;
 246     }
 247   if (print_chars)
 248     {
 249       printf (format_int, number_width, umaxtostr (chars, buf));
 250       format_int = format_sp_int;
 251     }
 252   if (print_bytes)
 253     {
 254       printf (format_int, number_width, umaxtostr (bytes, buf));
 255       format_int = format_sp_int;
 256     }
 257   if (print_linelength)
 258     {
 259       printf (format_int, number_width, umaxtostr (linelength, buf));
 260     }
 261   if (file)
 262     printf (" %s", strchr (file, '\n') ? quotef (file) : file);
 263   putchar ('\n');
 264 }
 265
 266 static bool
 267 wc_lines (char const *file, int fd, uintmax_t *lines_out, uintmax_t *bytes_out)
 268 {
 269   size_t bytes_read;
 270   uintmax_t lines, bytes;
 271   char buf[BUFFER_SIZE + 1];
 272   bool long_lines = false;
 273
 274   if (!lines_out || !bytes_out)
 275     {
 276       return false;
 277     }
 278
 279   lines = bytes = 0;
 280
 281   while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
 282     {
 283
 284       if (bytes_read == SAFE_READ_ERROR)
 285         {
 286           error (0, errno, "%s", quotef (file));
 287           return false;
 288         }
 289
 290       bytes += bytes_read;
 291
 292       char *p = buf;
 293       char *end = buf + bytes_read;
 294       uintmax_t plines = lines;
 295
 296       if (! long_lines)
 297         {
 298           /* Avoid function call overhead for shorter lines.  */
 299           while (p != end)
 300             lines += *p++ == '\n';
 301         }
 302       else
 303         {
 304           /* rawmemchr is more efficient with longer lines.  */
 305           *end = '\n';
 306           while ((p = rawmemchr (p, '\n')) < end)
 307             {
 308               ++p;
 309               ++lines;
 310             }
 311         }
 312
 313       /* If the average line length in the block is >= 15, then use
 314           memchr for the next block, where system specific optimizations
 315           may outweigh function call overhead.
 316           FIXME: This line length was determined in 2015, on both
 317           x86_64 and ppc64, but it's worth re-evaluating in future with
 318           newer compilers, CPUs, or memchr() implementations etc.  */
 319       if (lines - plines <= bytes_read / 15)
 320         long_lines = true;
 321       else
 322         long_lines = false;
 323     }
 324
 325   *bytes_out = bytes;
 326   *lines_out = lines;
 327
 328   return true;
 329 }
 330
 331 /* Count words.  FILE_X is the name of the file (or null for standard
 332    input) that is open on descriptor FD.  *FSTATUS is its status.
 333    CURRENT_POS is the current file offset if known, negative if unknown.
 334    Return true if successful.  */
 335 static bool
 336 wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
 337 {
 338   bool ok = true;
 339   char buf[BUFFER_SIZE + 1];
 340   size_t bytes_read;
 341   uintmax_t lines, words, chars, bytes, linelength;
 342   bool count_bytes, count_chars, count_complicated;
 343   char const *file = file_x ? file_x : _("standard input");
 344
 345   lines = words = chars = bytes = linelength = 0;
 346
 347   /* If in the current locale, chars are equivalent to bytes, we prefer
 348      counting bytes, because that's easier.  */
 349 #if MB_LEN_MAX > 1
 350   if (MB_CUR_MAX > 1)
 351     {
 352       count_bytes = print_bytes;
 353       count_chars = print_chars;
 354     }
 355   else
 356 #endif
 357     {
 358       count_bytes = print_bytes || print_chars;
 359       count_chars = false;
 360     }
 361   count_complicated = print_words || print_linelength;
 362
 363   /* Advise the kernel of our access pattern only if we will read().  */
 364   if (!count_bytes || count_chars || print_lines || count_complicated)
 365     fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
 366
 367   /* When counting only bytes, save some line- and word-counting
 368      overhead.  If FD is a 'regular' Unix file, using lseek is enough
 369      to get its 'size' in bytes.  Otherwise, read blocks of BUFFER_SIZE
 370      bytes at a time until EOF.  Note that the 'size' (number of bytes)
 371      that wc reports is smaller than stats.st_size when the file is not
 372      positioned at its beginning.  That's why the lseek calls below are
 373      necessary.  For example the command
 374      '(dd ibs=99k skip=1 count=0; ./wc -c) < /etc/group'
 375      should make wc report '0' bytes.  */
 376
 377   if (count_bytes && !count_chars && !print_lines && !count_complicated)
 378     {
 379       bool skip_read = false;
 380
 381       if (0 < fstatus->failed)
 382         fstatus->failed = fstat (fd, &fstatus->st);
 383
 384       /* For sized files, seek to one st_blksize before EOF rather than to EOF.
 385          This works better for files in proc-like file systems where
 386          the size is only approximate.  */
 387       if (! fstatus->failed && usable_st_size (&fstatus->st)
 388           && 0 <= fstatus->st.st_size)
 389         {
 390           off_t end_pos = fstatus->st.st_size;
 391           if (current_pos < 0)
 392             current_pos = lseek (fd, 0, SEEK_CUR);
 393
 394           if (end_pos % page_size)
 395             {
 396               /* We only need special handling of /proc and /sys files etc.
 397                  when they're a multiple of PAGE_SIZE.  In the common case
 398                  for files with st_size not a multiple of PAGE_SIZE,
 399                  it's more efficient and accurate to use st_size.
 400
 401                  Be careful here.  The current position may actually be
 402                  beyond the end of the file.  As in the example above.  */
 403
 404               bytes = end_pos < current_pos ? 0 : end_pos - current_pos;
 405               if (bytes && 0 <= lseek (fd, bytes, SEEK_CUR))
 406                 skip_read = true;
 407               else
 408                 bytes = 0;
 409             }
 410           else
 411             {
 412               off_t hi_pos = end_pos - end_pos % (ST_BLKSIZE (fstatus->st) + 1);
 413               if (0 <= current_pos && current_pos < hi_pos
 414                   && 0 <= lseek (fd, hi_pos, SEEK_CUR))
 415                 bytes = hi_pos - current_pos;
 416             }
 417         }
 418
 419       if (! skip_read)
 420         {
 421           fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
 422           while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
 423             {
 424               if (bytes_read == SAFE_READ_ERROR)
 425                 {
 426                   error (0, errno, "%s", quotef (file));
 427                   ok = false;
 428                   break;
 429                 }
 430               bytes += bytes_read;
 431             }
 432         }
 433     }
 434   else if (!count_chars && !count_complicated)
 435     {
 436 #ifdef USE_AVX2_WC_LINECOUNT
 437       static bool (*wc_lines_p) (char const *, int, uintmax_t *, uintmax_t *);
 438       if (!wc_lines_p)
 439         wc_lines_p = avx2_supported () ? wc_lines_avx2 : wc_lines;
 440 #else
 441       bool (*wc_lines_p) (char const *, int, uintmax_t *, uintmax_t *)
 442         = wc_lines;
 443 #endif
 444
 445       /* Use a separate loop when counting only lines or lines and bytes --
 446          but not chars or words.  */
 447       ok = wc_lines_p (file, fd, &lines, &bytes);
 448     }
 449 #if MB_LEN_MAX > 1
 450 # define SUPPORT_OLD_MBRTOWC 1
 451   else if (MB_CUR_MAX > 1)
 452     {
 453       bool in_word = false;
 454       uintmax_t linepos = 0;
 455       mbstate_t state = { 0, };
 456       bool in_shift = false;
 457 # if SUPPORT_OLD_MBRTOWC
 458       /* Back-up the state before each multibyte character conversion and
 459          move the last incomplete character of the buffer to the front
 460          of the buffer.  This is needed because we don't know whether
 461          the 'mbrtowc' function updates the state when it returns -2, --
 462          this is the ISO C 99 and glibc-2.2 behaviour - or not - amended
 463          ANSI C, glibc-2.1 and Solaris 5.7 behaviour.  We don't have an
 464          autoconf test for this, yet.  */
 465       size_t prev = 0; /* number of bytes carried over from previous round */
 466 # else
 467       const size_t prev = 0;
 468 # endif
 469
 470       while ((bytes_read = safe_read (fd, buf + prev, BUFFER_SIZE - prev)) > 0)
 471         {
 472           char const *p;
 473 # if SUPPORT_OLD_MBRTOWC
 474           mbstate_t backup_state;
 475 # endif
 476           if (bytes_read == SAFE_READ_ERROR)
 477             {
 478               error (0, errno, "%s", quotef (file));
 479               ok = false;
 480               break;
 481             }
 482
 483           bytes += bytes_read;
 484           p = buf;
 485           bytes_read += prev;
 486           do
 487             {
 488               wchar_t wide_char;
 489               size_t n;
 490               bool wide = true;
 491
 492               if (!in_shift && is_basic (*p))
 493                 {
 494                   /* Handle most ASCII characters quickly, without calling
 495                      mbrtowc().  */
 496                   n = 1;
 497                   wide_char = *p;
 498                   wide = false;
 499                 }
 500               else
 501                 {
 502                   in_shift = true;
 503 # if SUPPORT_OLD_MBRTOWC
 504                   backup_state = state;
 505 # endif
 506                   n = mbrtowc (&wide_char, p, bytes_read, &state);
 507                   if (n == (size_t) -2)
 508                     {
 509 # if SUPPORT_OLD_MBRTOWC
 510                       state = backup_state;
 511 # endif
 512                       break;
 513                     }
 514                   if (n == (size_t) -1)
 515                     {
 516                       /* Remember that we read a byte, but don't complain
 517                          about the error.  Because of the decoding error,
 518                          this is a considered to be byte but not a
 519                          character (that is, chars is not incremented).  */
 520                       p++;
 521                       bytes_read--;
 522                       continue;
 523                     }
 524                   if (mbsinit (&state))
 525                     in_shift = false;
 526                   if (n == 0)
 527                     {
 528                       wide_char = 0;
 529                       n = 1;
 530                     }
 531                 }
 532
 533               switch (wide_char)
 534                 {
 535                 case '\n':
 536                   lines++;
 537                   FALLTHROUGH;
 538                 case '\r':
 539                 case '\f':
 540                   if (linepos > linelength)
 541                     linelength = linepos;
 542                   linepos = 0;
 543                   goto mb_word_separator;
 544                 case '\t':
 545                   linepos += 8 - (linepos % 8);
 546                   goto mb_word_separator;
 547                 case ' ':
 548                   linepos++;
 549                   FALLTHROUGH;
 550                 case '\v':
 551                 mb_word_separator:
 552                   words += in_word;
 553                   in_word = false;
 554                   break;
 555                 default:
 556                   if (wide && iswprint (wide_char))
 557                     {
 558                       /* wcwidth can be expensive on OSX for example,
 559                          so avoid if uneeded.  */
 560                       if (print_linelength)
 561                         {
 562                           int width = wcwidth (wide_char);
 563                           if (width > 0)
 564                             linepos += width;
 565                         }
 566                       if (iswspace (wide_char) || iswnbspace (wide_char))
 567                         goto mb_word_separator;
 568                       in_word = true;
 569                     }
 570                   else if (!wide && isprint (to_uchar (*p)))
 571                     {
 572                       linepos++;
 573                       if (isspace (to_uchar (*p)))
 574                         goto mb_word_separator;
 575                       in_word = true;
 576                     }
 577                   break;
 578                 }
 579
 580               p += n;
 581               bytes_read -= n;
 582               chars++;
 583             }
 584           while (bytes_read > 0);
 585
 586 # if SUPPORT_OLD_MBRTOWC
 587           if (bytes_read > 0)
 588             {
 589               if (bytes_read == BUFFER_SIZE)
 590                 {
 591                   /* Encountered a very long redundant shift sequence.  */
 592                   p++;
 593                   bytes_read--;
 594                 }
 595               memmove (buf, p, bytes_read);
 596             }
 597           prev = bytes_read;
 598 # endif
 599         }
 600       if (linepos > linelength)
 601         linelength = linepos;
 602       words += in_word;
 603     }
 604 #endif
 605   else
 606     {
 607       bool in_word = false;
 608       uintmax_t linepos = 0;
 609
 610       while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
 611         {
 612           char const *p = buf;
 613           if (bytes_read == SAFE_READ_ERROR)
 614             {
 615               error (0, errno, "%s", quotef (file));
 616               ok = false;
 617               break;
 618             }
 619
 620           bytes += bytes_read;
 621           do
 622             {
 623               switch (*p++)
 624                 {
 625                 case '\n':
 626                   lines++;
 627                   FALLTHROUGH;
 628                 case '\r':
 629                 case '\f':
 630                   if (linepos > linelength)
 631                     linelength = linepos;
 632                   linepos = 0;
 633                   goto word_separator;
 634                 case '\t':
 635                   linepos += 8 - (linepos % 8);
 636                   goto word_separator;
 637                 case ' ':
 638                   linepos++;
 639                   FALLTHROUGH;
 640                 case '\v':
 641                 word_separator:
 642                   words += in_word;
 643                   in_word = false;
 644                   break;
 645                 default:
 646                   if (isprint (to_uchar (p[-1])))
 647                     {
 648                       linepos++;
 649                       if (isspace (to_uchar (p[-1]))
 650                           || isnbspace (to_uchar (p[-1])))
 651                         goto word_separator;
 652                       in_word = true;
 653                     }
 654                   break;
 655                 }
 656             }
 657           while (--bytes_read);
 658         }
 659       if (linepos > linelength)
 660         linelength = linepos;
 661       words += in_word;
 662     }
 663
 664   if (count_chars < print_chars)
 665     chars = bytes;
 666
 667   if (total_mode != total_only)
 668     write_counts (lines, words, chars, bytes, linelength, file_x);
 669
 670   if (ckd_add (&total_lines, total_lines, lines))
 671     total_lines_overflow = true;
 672   if (ckd_add (&total_words, total_words, words))
 673     total_words_overflow = true;
 674   if (ckd_add (&total_chars, total_chars, chars))
 675     total_chars_overflow = true;
 676   if (ckd_add (&total_bytes, total_bytes, bytes))
 677     total_bytes_overflow = true;
 678
 679   if (linelength > max_line_length)
 680     max_line_length = linelength;
 681
 682   return ok;
 683 }
 684
 685 static bool
 686 wc_file (char const *file, struct fstatus *fstatus)
 687 {
 688   if (! file || STREQ (file, "-"))
 689     {
 690       have_read_stdin = true;
 691       xset_binary_mode (STDIN_FILENO, O_BINARY);
 692       return wc (STDIN_FILENO, file, fstatus, -1);
 693     }
 694   else
 695     {
 696       int fd = open (file, O_RDONLY | O_BINARY);
 697       if (fd == -1)
 698         {
 699           error (0, errno, "%s", quotef (file));
 700           return false;
 701         }
 702       else
 703         {
 704           bool ok = wc (fd, file, fstatus, 0);
 705           if (close (fd) != 0)
 706             {
 707               error (0, errno, "%s", quotef (file));
 708               return false;
 709             }
 710           return ok;
 711         }
 712     }
 713 }
 714
 715 /* Return the file status for the NFILES files addressed by FILE.
 716    Optimize the case where only one number is printed, for just one
 717    file; in that case we can use a print width of 1, so we don't need
 718    to stat the file.  Handle the case of (nfiles == 0) in the same way;
 719    that happens when we don't know how long the list of file names will be.  */
 720
 721 static struct fstatus *
 722 get_input_fstatus (size_t nfiles, char *const *file)
 723 {
 724   struct fstatus *fstatus = xnmalloc (nfiles ? nfiles : 1, sizeof *fstatus);
 725
 726   if (nfiles == 0
 727       || (nfiles == 1
 728           && ((print_lines + print_words + print_chars
 729                + print_bytes + print_linelength)
 730               == 1)))
 731     fstatus[0].failed = 1;
 732   else
 733     {
 734       for (size_t i = 0; i < nfiles; i++)
 735         fstatus[i].failed = (! file[i] || STREQ (file[i], "-")
 736                              ? fstat (STDIN_FILENO, &fstatus[i].st)
 737                              : stat (file[i], &fstatus[i].st));
 738     }
 739
 740   return fstatus;
 741 }
 742
 743 /* Return a print width suitable for the NFILES files whose status is
 744    recorded in FSTATUS.  Optimize the same special case that
 745    get_input_fstatus optimizes.  */
 746
 747 ATTRIBUTE_PURE
 748 static int
 749 compute_number_width (size_t nfiles, struct fstatus const *fstatus)
 750 {
 751   int width = 1;
 752
 753   if (0 < nfiles && fstatus[0].failed <= 0)
 754     {
 755       int minimum_width = 1;
 756       uintmax_t regular_total = 0;
 757
 758       for (size_t i = 0; i < nfiles; i++)
 759         if (! fstatus[i].failed)
 760           {
 761             if (S_ISREG (fstatus[i].st.st_mode))
 762               regular_total += fstatus[i].st.st_size;
 763             else
 764               minimum_width = 7;
 765           }
 766
 767       for (; 10 <= regular_total; regular_total /= 10)
 768         width++;
 769       if (width < minimum_width)
 770         width = minimum_width;
 771     }
 772
 773   return width;
 774 }
 775
 776
 777 int
 778 main (int argc, char **argv)
 779 {
 780   bool ok;
 781   int optc;
 782   size_t nfiles;
 783   char **files;
 784   char *files_from = nullptr;
 785   struct fstatus *fstatus;
 786   struct Tokens tok;
 787
 788   initialize_main (&argc, &argv);
 789   set_program_name (argv[0]);
 790   setlocale (LC_ALL, "");
 791   bindtextdomain (PACKAGE, LOCALEDIR);
 792   textdomain (PACKAGE);
 793
 794   atexit (close_stdout);
 795
 796   page_size = getpagesize ();
 797   /* Line buffer stdout to ensure lines are written atomically and immediately
 798      so that processes running in parallel do not intersperse their output.  */
 799   setvbuf (stdout, nullptr, _IOLBF, 0);
 800
 801   posixly_correct = (getenv ("POSIXLY_CORRECT") != nullptr);
 802
 803   print_lines = print_words = print_chars = print_bytes = false;
 804   print_linelength = false;
 805   total_lines = total_words = total_chars = total_bytes = max_line_length = 0;
 806
 807   while ((optc = getopt_long (argc, argv, "clLmw", longopts, nullptr)) != -1)
 808     switch (optc)
 809       {
 810       case 'c':
 811         print_bytes = true;
 812         break;
 813
 814       case 'm':
 815         print_chars = true;
 816         break;
 817
 818       case 'l':
 819         print_lines = true;
 820         break;
 821
 822       case 'w':
 823         print_words = true;
 824         break;
 825
 826       case 'L':
 827         print_linelength = true;
 828         break;
 829
 830       case DEBUG_PROGRAM_OPTION:
 831         debug = true;
 832         break;
 833
 834       case FILES0_FROM_OPTION:
 835         files_from = optarg;
 836         break;
 837
 838       case TOTAL_OPTION:
 839         total_mode = XARGMATCH ("--total", optarg, total_args, total_types);
 840         break;
 841
 842       case_GETOPT_HELP_CHAR;
 843
 844       case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
 845
 846       default:
 847         usage (EXIT_FAILURE);
 848       }
 849
 850   if (! (print_lines || print_words || print_chars || print_bytes
 851          || print_linelength))
 852     print_lines = print_words = print_bytes = true;
 853
 854   bool read_tokens = false;
 855   struct argv_iterator *ai;
 856   if (files_from)
 857     {
 858       FILE *stream;
 859
 860       /* When using --files0-from=F, you may not specify any files
 861          on the command-line.  */
 862       if (optind < argc)
 863         {
 864           error (0, 0, _("extra operand %s"), quoteaf (argv[optind]));
 865           fprintf (stderr, "%s\n",
 866                    _("file operands cannot be combined with --files0-from"));
 867           usage (EXIT_FAILURE);
 868         }
 869
 870       if (STREQ (files_from, "-"))
 871         stream = stdin;
 872       else
 873         {
 874           stream = fopen (files_from, "r");
 875           if (stream == nullptr)
 876             error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
 877                    quoteaf (files_from));
 878         }
 879
 880       /* Read the file list into RAM if we can detect its size and that
 881          size is reasonable.  Otherwise, we'll read a name at a time.  */
 882       struct stat st;
 883       if (fstat (fileno (stream), &st) == 0
 884           && S_ISREG (st.st_mode)
 885           && st.st_size <= MIN (10 * 1024 * 1024, physmem_available () / 2))
 886         {
 887           read_tokens = true;
 888           readtokens0_init (&tok);
 889           if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
 890             error (EXIT_FAILURE, 0, _("cannot read file names from %s"),
 891                    quoteaf (files_from));
 892           files = tok.tok;
 893           nfiles = tok.n_tok;
 894           ai = argv_iter_init_argv (files);
 895         }
 896       else
 897         {
 898           files = nullptr;
 899           nfiles = 0;
 900           ai = argv_iter_init_stream (stream);
 901         }
 902     }
 903   else
 904     {
 905       static char *stdin_only[] = { nullptr };
 906       files = (optind < argc ? argv + optind : stdin_only);
 907       nfiles = (optind < argc ? argc - optind : 1);
 908       ai = argv_iter_init_argv (files);
 909     }
 910
 911   if (!ai)
 912     xalloc_die ();
 913
 914   fstatus = get_input_fstatus (nfiles, files);
 915   if (total_mode == total_only)
 916     number_width = 1;  /* No extra padding, since no alignment requirement.  */
 917   else
 918     number_width = compute_number_width (nfiles, fstatus);
 919
 920   ok = true;
 921   for (int i = 0; /* */; i++)
 922     {
 923       bool skip_file = false;
 924       enum argv_iter_err ai_err;
 925       char *file_name = argv_iter (ai, &ai_err);
 926       if (!file_name)
 927         {
 928           switch (ai_err)
 929             {
 930             case AI_ERR_EOF:
 931               goto argv_iter_done;
 932             case AI_ERR_READ:
 933               error (0, errno, _("%s: read error"),
 934                      quotef (files_from));
 935               ok = false;
 936               goto argv_iter_done;
 937             case AI_ERR_MEM:
 938               xalloc_die ();
 939             default:
 940               affirm (!"unexpected error code from argv_iter");
 941             }
 942         }
 943       if (files_from && STREQ (files_from, "-") && STREQ (file_name, "-"))
 944         {
 945           /* Give a better diagnostic in an unusual case:
 946              printf - | wc --files0-from=- */
 947           error (0, 0, _("when reading file names from stdin, "
 948                          "no file name of %s allowed"),
 949                  quoteaf (file_name));
 950           skip_file = true;
 951         }
 952
 953       if (!file_name[0])
 954         {
 955           /* Diagnose a zero-length file name.  When it's one
 956              among many, knowing the record number may help.
 957              FIXME: currently print the record number only with
 958              --files0-from=FILE.  Maybe do it for argv, too?  */
 959           if (files_from == nullptr)
 960             error (0, 0, "%s", _("invalid zero-length file name"));
 961           else
 962             {
 963               /* Using the standard 'filename:line-number:' prefix here is
 964                  not totally appropriate, since NUL is the separator, not NL,
 965                  but it might be better than nothing.  */
 966               unsigned long int file_number = argv_iter_n_args (ai);
 967               error (0, 0, "%s:%lu: %s", quotef (files_from),
 968                      file_number, _("invalid zero-length file name"));
 969             }
 970           skip_file = true;
 971         }
 972
 973       if (skip_file)
 974         ok = false;
 975       else
 976         ok &= wc_file (file_name, &fstatus[nfiles ? i : 0]);
 977
 978       if (! nfiles)
 979         fstatus[0].failed = 1;
 980     }
 981  argv_iter_done:
 982
 983   /* No arguments on the command line is fine.  That means read from stdin.
 984      However, no arguments on the --files0-from input stream is an error
 985      means don't read anything.  */
 986   if (ok && !files_from && argv_iter_n_args (ai) == 0)
 987     ok &= wc_file (nullptr, &fstatus[0]);
 988
 989   if (read_tokens)
 990     readtokens0_free (&tok);
 991
 992   if (total_mode != total_never
 993       && (total_mode != total_auto || 1 < argv_iter_n_args (ai)))
 994     {
 995       if (total_lines_overflow)
 996         {
 997           total_lines = UINTMAX_MAX;
 998           error (0, EOVERFLOW, _("total lines"));
 999           ok = false;
1000         }
1001       if (total_words_overflow)
1002         {
1003           total_words = UINTMAX_MAX;
1004           error (0, EOVERFLOW, _("total words"));
1005           ok = false;
1006         }
1007       if (total_chars_overflow)
1008         {
1009           total_chars = UINTMAX_MAX;
1010           error (0, EOVERFLOW, _("total characters"));
1011           ok = false;
1012         }
1013       if (total_bytes_overflow)
1014         {
1015           total_bytes = UINTMAX_MAX;
1016           error (0, EOVERFLOW, _("total bytes"));
1017           ok = false;
1018         }
1019
1020       write_counts (total_lines, total_words, total_chars, total_bytes,
1021                     max_line_length,
1022                     total_mode != total_only ? _("total") : nullptr);
1023     }
1024
1025   argv_iter_free (ai);
1026
1027   free (fstatus);
1028
1029   if (have_read_stdin && close (STDIN_FILENO) != 0)
1030     error (EXIT_FAILURE, errno, "-");
1031
1032   return ok ? EXIT_SUCCESS : EXIT_FAILURE;
1033 }