src/wc.c

   1 /* wc - print the number of lines, words, and bytes in files
   2    Copyright (C) 1985-2023 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation, either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  16
  17 /* Written by Paul Rubin, phr@ocf.berkeley.edu
  18    and David MacKenzie, djm@gnu.ai.mit.edu. */
  19
  20 #include <config.h>
  21
  22 #include <stdio.h>
  23 #include <assert.h>
  24 #include <getopt.h>
  25 #include <sys/types.h>
  26 #include <wchar.h>
  27 #include <wctype.h>
  28
  29 #include "system.h"
  30 #include "argmatch.h"
  31 #include "argv-iter.h"
  32 #include "die.h"
  33 #include "error.h"
  34 #include "fadvise.h"
  35 #include "mbchar.h"
  36 #include "physmem.h"
  37 #include "readtokens0.h"
  38 #include "safe-read.h"
  39 #include "stat-size.h"
  40 #include "xbinary-io.h"
  41
  42 #if !defined iswspace && !HAVE_ISWSPACE
  43 # define iswspace(wc) \
  44     ((wc) == to_uchar (wc) && isspace (to_uchar (wc)))
  45 #endif
  46
  47 /* The official name of this program (e.g., no 'g' prefix).  */
  48 #define PROGRAM_NAME "wc"
  49
  50 #define AUTHORS \
  51   proper_name ("Paul Rubin"), \
  52   proper_name ("David MacKenzie")
  53
  54 /* Size of atomic reads. */
  55 #define BUFFER_SIZE (16 * 1024)
  56
  57 static bool
  58 wc_lines (char const *file, int fd, uintmax_t *lines_out,
  59           uintmax_t *bytes_out);
  60 #ifdef USE_AVX2_WC_LINECOUNT
  61 /* From wc_avx2.c */
  62 extern bool
  63 wc_lines_avx2 (char const *file, int fd, uintmax_t *lines_out,
  64                uintmax_t *bytes_out);
  65 #endif
  66 static bool
  67 (*wc_lines_p) (char const *file, int fd, uintmax_t *lines_out,
  68                 uintmax_t *bytes_out) = wc_lines;
  69
  70 static bool debug;
  71
  72 /* Cumulative number of lines, words, chars and bytes in all files so far.
  73    max_line_length is the maximum over all files processed so far.  */
  74 static uintmax_t total_lines;
  75 static uintmax_t total_words;
  76 static uintmax_t total_chars;
  77 static uintmax_t total_bytes;
  78 static uintmax_t total_lines_overflow;
  79 static uintmax_t total_words_overflow;
  80 static uintmax_t total_chars_overflow;
  81 static uintmax_t total_bytes_overflow;
  82 static uintmax_t max_line_length;
  83
  84 /* Which counts to print. */
  85 static bool print_lines, print_words, print_chars, print_bytes;
  86 static bool print_linelength;
  87
  88 /* The print width of each count.  */
  89 static int number_width;
  90
  91 /* True if we have ever read the standard input. */
  92 static bool have_read_stdin;
  93
  94 /* Used to determine if file size can be determined without reading.  */
  95 static size_t page_size;
  96
  97 /* Enable to _not_ treat non breaking space as a word separator.  */
  98 static bool posixly_correct;
  99
 100 /* The result of calling fstat or stat on a file descriptor or file.  */
 101 struct fstatus
 102 {
 103   /* If positive, fstat or stat has not been called yet.  Otherwise,
 104      this is the value returned from fstat or stat.  */
 105   int failed;
 106
 107   /* If FAILED is zero, this is the file's status.  */
 108   struct stat st;
 109 };
 110
 111 /* For long options that have no equivalent short option, use a
 112    non-character as a pseudo short option, starting with CHAR_MAX + 1.  */
 113 enum
 114 {
 115   DEBUG_PROGRAM_OPTION = CHAR_MAX + 1,
 116   FILES0_FROM_OPTION,
 117   TOTAL_OPTION,
 118 };
 119
 120 static struct option const longopts[] =
 121 {
 122   {"bytes", no_argument, NULL, 'c'},
 123   {"chars", no_argument, NULL, 'm'},
 124   {"lines", no_argument, NULL, 'l'},
 125   {"words", no_argument, NULL, 'w'},
 126   {"debug", no_argument, NULL, DEBUG_PROGRAM_OPTION},
 127   {"files0-from", required_argument, NULL, FILES0_FROM_OPTION},
 128   {"max-line-length", no_argument, NULL, 'L'},
 129   {"total", required_argument, NULL, TOTAL_OPTION},
 130   {GETOPT_HELP_OPTION_DECL},
 131   {GETOPT_VERSION_OPTION_DECL},
 132   {NULL, 0, NULL, 0}
 133 };
 134
 135 enum total_type
 136   {
 137     total_auto,         /* 0: default or --total=auto */
 138     total_always,       /* 1: --total=always */
 139     total_only,         /* 2: --total=only */
 140     total_never         /* 3: --total=never */
 141   };
 142 static char const *const total_args[] =
 143 {
 144   "auto", "always", "only", "never", NULL
 145 };
 146 static enum total_type const total_types[] =
 147 {
 148   total_auto, total_always, total_only, total_never
 149 };
 150 ARGMATCH_VERIFY (total_args, total_types);
 151 static enum total_type total_mode = total_auto;
 152
 153 #ifdef USE_AVX2_WC_LINECOUNT
 154 static bool
 155 avx2_supported (void)
 156 {
 157   bool avx_enabled = 0 < __builtin_cpu_supports ("avx2");
 158
 159   if (debug)
 160     error (0, 0, (avx_enabled
 161                   ? _("using avx2 hardware support")
 162                   : _("avx2 support not detected")));
 163
 164   return avx_enabled;
 165 }
 166 #endif
 167
 168 void
 169 usage (int status)
 170 {
 171   if (status != EXIT_SUCCESS)
 172     emit_try_help ();
 173   else
 174     {
 175       printf (_("\
 176 Usage: %s [OPTION]... [FILE]...\n\
 177   or:  %s [OPTION]... --files0-from=F\n\
 178 "),
 179               program_name, program_name);
 180       fputs (_("\
 181 Print newline, word, and byte counts for each FILE, and a total line if\n\
 182 more than one FILE is specified.  A word is a non-zero-length sequence of\n\
 183 printable characters delimited by white space.\n\
 184 "), stdout);
 185
 186       emit_stdin_note ();
 187
 188       fputs (_("\
 189 \n\
 190 The options below may be used to select which counts are printed, always in\n\
 191 the following order: newline, word, character, byte, maximum line length.\n\
 192   -c, --bytes            print the byte counts\n\
 193   -m, --chars            print the character counts\n\
 194   -l, --lines            print the newline counts\n\
 195 "), stdout);
 196       fputs (_("\
 197       --files0-from=F    read input from the files specified by\n\
 198                            NUL-terminated names in file F;\n\
 199                            If F is - then read names from standard input\n\
 200   -L, --max-line-length  print the maximum display width\n\
 201   -w, --words            print the word counts\n\
 202 "), stdout);
 203       fputs (_("\
 204       --total=WHEN       when to print a line with total counts;\n\
 205                            WHEN can be: auto, always, only, never\n\
 206 "), stdout);
 207       fputs (HELP_OPTION_DESCRIPTION, stdout);
 208       fputs (VERSION_OPTION_DESCRIPTION, stdout);
 209       emit_ancillary_info (PROGRAM_NAME);
 210     }
 211   exit (status);
 212 }
 213
 214 /* Return non zero if a non breaking space.  */
 215 ATTRIBUTE_PURE
 216 static int
 217 iswnbspace (wint_t wc)
 218 {
 219   return ! posixly_correct
 220          && (wc == 0x00A0 || wc == 0x2007
 221              || wc == 0x202F || wc == 0x2060);
 222 }
 223
 224 static int
 225 isnbspace (int c)
 226 {
 227   return iswnbspace (btowc (c));
 228 }
 229
 230 /* FILE is the name of the file (or NULL for standard input)
 231    associated with the specified counters.  */
 232 static void
 233 write_counts (uintmax_t lines,
 234               uintmax_t words,
 235               uintmax_t chars,
 236               uintmax_t bytes,
 237               uintmax_t linelength,
 238               char const *file)
 239 {
 240   static char const format_sp_int[] = " %*s";
 241   char const *format_int = format_sp_int + 1;
 242   char buf[INT_BUFSIZE_BOUND (uintmax_t)];
 243
 244   if (print_lines)
 245     {
 246       printf (format_int, number_width, umaxtostr (lines, buf));
 247       format_int = format_sp_int;
 248     }
 249   if (print_words)
 250     {
 251       printf (format_int, number_width, umaxtostr (words, buf));
 252       format_int = format_sp_int;
 253     }
 254   if (print_chars)
 255     {
 256       printf (format_int, number_width, umaxtostr (chars, buf));
 257       format_int = format_sp_int;
 258     }
 259   if (print_bytes)
 260     {
 261       printf (format_int, number_width, umaxtostr (bytes, buf));
 262       format_int = format_sp_int;
 263     }
 264   if (print_linelength)
 265     {
 266       printf (format_int, number_width, umaxtostr (linelength, buf));
 267     }
 268   if (file)
 269     printf (" %s", strchr (file, '\n') ? quotef (file) : file);
 270   putchar ('\n');
 271 }
 272
 273 static bool
 274 wc_lines (char const *file, int fd, uintmax_t *lines_out, uintmax_t *bytes_out)
 275 {
 276   size_t bytes_read;
 277   uintmax_t lines, bytes;
 278   char buf[BUFFER_SIZE + 1];
 279   bool long_lines = false;
 280
 281   if (!lines_out || !bytes_out)
 282     {
 283       return false;
 284     }
 285
 286   lines = bytes = 0;
 287
 288   while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
 289     {
 290
 291       if (bytes_read == SAFE_READ_ERROR)
 292         {
 293           error (0, errno, "%s", quotef (file));
 294           return false;
 295         }
 296
 297       bytes += bytes_read;
 298
 299       char *p = buf;
 300       char *end = buf + bytes_read;
 301       uintmax_t plines = lines;
 302
 303       if (! long_lines)
 304         {
 305           /* Avoid function call overhead for shorter lines.  */
 306           while (p != end)
 307             lines += *p++ == '\n';
 308         }
 309       else
 310         {
 311           /* rawmemchr is more efficient with longer lines.  */
 312           *end = '\n';
 313           while ((p = rawmemchr (p, '\n')) < end)
 314             {
 315               ++p;
 316               ++lines;
 317             }
 318         }
 319
 320       /* If the average line length in the block is >= 15, then use
 321           memchr for the next block, where system specific optimizations
 322           may outweigh function call overhead.
 323           FIXME: This line length was determined in 2015, on both
 324           x86_64 and ppc64, but it's worth re-evaluating in future with
 325           newer compilers, CPUs, or memchr() implementations etc.  */
 326       if (lines - plines <= bytes_read / 15)
 327         long_lines = true;
 328       else
 329         long_lines = false;
 330     }
 331
 332   *bytes_out = bytes;
 333   *lines_out = lines;
 334
 335   return true;
 336 }
 337
 338 /* Count words.  FILE_X is the name of the file (or NULL for standard
 339    input) that is open on descriptor FD.  *FSTATUS is its status.
 340    CURRENT_POS is the current file offset if known, negative if unknown.
 341    Return true if successful.  */
 342 static bool
 343 wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
 344 {
 345   bool ok = true;
 346   char buf[BUFFER_SIZE + 1];
 347   size_t bytes_read;
 348   uintmax_t lines, words, chars, bytes, linelength;
 349   bool count_bytes, count_chars, count_complicated;
 350   char const *file = file_x ? file_x : _("standard input");
 351
 352   lines = words = chars = bytes = linelength = 0;
 353
 354   /* If in the current locale, chars are equivalent to bytes, we prefer
 355      counting bytes, because that's easier.  */
 356 #if MB_LEN_MAX > 1
 357   if (MB_CUR_MAX > 1)
 358     {
 359       count_bytes = print_bytes;
 360       count_chars = print_chars;
 361     }
 362   else
 363 #endif
 364     {
 365       count_bytes = print_bytes || print_chars;
 366       count_chars = false;
 367     }
 368   count_complicated = print_words || print_linelength;
 369
 370   /* Advise the kernel of our access pattern only if we will read().  */
 371   if (!count_bytes || count_chars || print_lines || count_complicated)
 372     fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
 373
 374   /* When counting only bytes, save some line- and word-counting
 375      overhead.  If FD is a 'regular' Unix file, using lseek is enough
 376      to get its 'size' in bytes.  Otherwise, read blocks of BUFFER_SIZE
 377      bytes at a time until EOF.  Note that the 'size' (number of bytes)
 378      that wc reports is smaller than stats.st_size when the file is not
 379      positioned at its beginning.  That's why the lseek calls below are
 380      necessary.  For example the command
 381      '(dd ibs=99k skip=1 count=0; ./wc -c) < /etc/group'
 382      should make wc report '0' bytes.  */
 383
 384   if (count_bytes && !count_chars && !print_lines && !count_complicated)
 385     {
 386       bool skip_read = false;
 387
 388       if (0 < fstatus->failed)
 389         fstatus->failed = fstat (fd, &fstatus->st);
 390
 391       /* For sized files, seek to one st_blksize before EOF rather than to EOF.
 392          This works better for files in proc-like file systems where
 393          the size is only approximate.  */
 394       if (! fstatus->failed && usable_st_size (&fstatus->st)
 395           && 0 <= fstatus->st.st_size)
 396         {
 397           off_t end_pos = fstatus->st.st_size;
 398           if (current_pos < 0)
 399             current_pos = lseek (fd, 0, SEEK_CUR);
 400
 401           if (end_pos % page_size)
 402             {
 403               /* We only need special handling of /proc and /sys files etc.
 404                  when they're a multiple of PAGE_SIZE.  In the common case
 405                  for files with st_size not a multiple of PAGE_SIZE,
 406                  it's more efficient and accurate to use st_size.
 407
 408                  Be careful here.  The current position may actually be
 409                  beyond the end of the file.  As in the example above.  */
 410
 411               bytes = end_pos < current_pos ? 0 : end_pos - current_pos;
 412               if (bytes && 0 <= lseek (fd, bytes, SEEK_CUR))
 413                 skip_read = true;
 414               else
 415                 bytes = 0;
 416             }
 417           else
 418             {
 419               off_t hi_pos = end_pos - end_pos % (ST_BLKSIZE (fstatus->st) + 1);
 420               if (0 <= current_pos && current_pos < hi_pos
 421                   && 0 <= lseek (fd, hi_pos, SEEK_CUR))
 422                 bytes = hi_pos - current_pos;
 423             }
 424         }
 425
 426       if (! skip_read)
 427         {
 428           fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
 429           while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
 430             {
 431               if (bytes_read == SAFE_READ_ERROR)
 432                 {
 433                   error (0, errno, "%s", quotef (file));
 434                   ok = false;
 435                   break;
 436                 }
 437               bytes += bytes_read;
 438             }
 439         }
 440     }
 441   else if (!count_chars && !count_complicated)
 442     {
 443 #ifdef USE_AVX2_WC_LINECOUNT
 444       if (avx2_supported ())
 445         wc_lines_p = wc_lines_avx2;
 446 #endif
 447
 448       /* Use a separate loop when counting only lines or lines and bytes --
 449          but not chars or words.  */
 450       ok = wc_lines_p (file, fd, &lines, &bytes);
 451     }
 452 #if MB_LEN_MAX > 1
 453 # define SUPPORT_OLD_MBRTOWC 1
 454   else if (MB_CUR_MAX > 1)
 455     {
 456       bool in_word = false;
 457       uintmax_t linepos = 0;
 458       mbstate_t state = { 0, };
 459       bool in_shift = false;
 460 # if SUPPORT_OLD_MBRTOWC
 461       /* Back-up the state before each multibyte character conversion and
 462          move the last incomplete character of the buffer to the front
 463          of the buffer.  This is needed because we don't know whether
 464          the 'mbrtowc' function updates the state when it returns -2, --
 465          this is the ISO C 99 and glibc-2.2 behaviour - or not - amended
 466          ANSI C, glibc-2.1 and Solaris 5.7 behaviour.  We don't have an
 467          autoconf test for this, yet.  */
 468       size_t prev = 0; /* number of bytes carried over from previous round */
 469 # else
 470       const size_t prev = 0;
 471 # endif
 472
 473       while ((bytes_read = safe_read (fd, buf + prev, BUFFER_SIZE - prev)) > 0)
 474         {
 475           char const *p;
 476 # if SUPPORT_OLD_MBRTOWC
 477           mbstate_t backup_state;
 478 # endif
 479           if (bytes_read == SAFE_READ_ERROR)
 480             {
 481               error (0, errno, "%s", quotef (file));
 482               ok = false;
 483               break;
 484             }
 485
 486           bytes += bytes_read;
 487           p = buf;
 488           bytes_read += prev;
 489           do
 490             {
 491               wchar_t wide_char;
 492               size_t n;
 493               bool wide = true;
 494
 495               if (!in_shift && is_basic (*p))
 496                 {
 497                   /* Handle most ASCII characters quickly, without calling
 498                      mbrtowc().  */
 499                   n = 1;
 500                   wide_char = *p;
 501                   wide = false;
 502                 }
 503               else
 504                 {
 505                   in_shift = true;
 506 # if SUPPORT_OLD_MBRTOWC
 507                   backup_state = state;
 508 # endif
 509                   n = mbrtowc (&wide_char, p, bytes_read, &state);
 510                   if (n == (size_t) -2)
 511                     {
 512 # if SUPPORT_OLD_MBRTOWC
 513                       state = backup_state;
 514 # endif
 515                       break;
 516                     }
 517                   if (n == (size_t) -1)
 518                     {
 519                       /* Remember that we read a byte, but don't complain
 520                          about the error.  Because of the decoding error,
 521                          this is a considered to be byte but not a
 522                          character (that is, chars is not incremented).  */
 523                       p++;
 524                       bytes_read--;
 525                       continue;
 526                     }
 527                   if (mbsinit (&state))
 528                     in_shift = false;
 529                   if (n == 0)
 530                     {
 531                       wide_char = 0;
 532                       n = 1;
 533                     }
 534                 }
 535
 536               switch (wide_char)
 537                 {
 538                 case '\n':
 539                   lines++;
 540                   FALLTHROUGH;
 541                 case '\r':
 542                 case '\f':
 543                   if (linepos > linelength)
 544                     linelength = linepos;
 545                   linepos = 0;
 546                   goto mb_word_separator;
 547                 case '\t':
 548                   linepos += 8 - (linepos % 8);
 549                   goto mb_word_separator;
 550                 case ' ':
 551                   linepos++;
 552                   FALLTHROUGH;
 553                 case '\v':
 554                 mb_word_separator:
 555                   words += in_word;
 556                   in_word = false;
 557                   break;
 558                 default:
 559                   if (wide && iswprint (wide_char))
 560                     {
 561                       /* wcwidth can be expensive on OSX for example,
 562                          so avoid if uneeded.  */
 563                       if (print_linelength)
 564                         {
 565                           int width = wcwidth (wide_char);
 566                           if (width > 0)
 567                             linepos += width;
 568                         }
 569                       if (iswspace (wide_char) || iswnbspace (wide_char))
 570                         goto mb_word_separator;
 571                       in_word = true;
 572                     }
 573                   else if (!wide && isprint (to_uchar (*p)))
 574                     {
 575                       linepos++;
 576                       if (isspace (to_uchar (*p)))
 577                         goto mb_word_separator;
 578                       in_word = true;
 579                     }
 580                   break;
 581                 }
 582
 583               p += n;
 584               bytes_read -= n;
 585               chars++;
 586             }
 587           while (bytes_read > 0);
 588
 589 # if SUPPORT_OLD_MBRTOWC
 590           if (bytes_read > 0)
 591             {
 592               if (bytes_read == BUFFER_SIZE)
 593                 {
 594                   /* Encountered a very long redundant shift sequence.  */
 595                   p++;
 596                   bytes_read--;
 597                 }
 598               memmove (buf, p, bytes_read);
 599             }
 600           prev = bytes_read;
 601 # endif
 602         }
 603       if (linepos > linelength)
 604         linelength = linepos;
 605       words += in_word;
 606     }
 607 #endif
 608   else
 609     {
 610       bool in_word = false;
 611       uintmax_t linepos = 0;
 612
 613       while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
 614         {
 615           char const *p = buf;
 616           if (bytes_read == SAFE_READ_ERROR)
 617             {
 618               error (0, errno, "%s", quotef (file));
 619               ok = false;
 620               break;
 621             }
 622
 623           bytes += bytes_read;
 624           do
 625             {
 626               switch (*p++)
 627                 {
 628                 case '\n':
 629                   lines++;
 630                   FALLTHROUGH;
 631                 case '\r':
 632                 case '\f':
 633                   if (linepos > linelength)
 634                     linelength = linepos;
 635                   linepos = 0;
 636                   goto word_separator;
 637                 case '\t':
 638                   linepos += 8 - (linepos % 8);
 639                   goto word_separator;
 640                 case ' ':
 641                   linepos++;
 642                   FALLTHROUGH;
 643                 case '\v':
 644                 word_separator:
 645                   words += in_word;
 646                   in_word = false;
 647                   break;
 648                 default:
 649                   if (isprint (to_uchar (p[-1])))
 650                     {
 651                       linepos++;
 652                       if (isspace (to_uchar (p[-1]))
 653                           || isnbspace (to_uchar (p[-1])))
 654                         goto word_separator;
 655                       in_word = true;
 656                     }
 657                   break;
 658                 }
 659             }
 660           while (--bytes_read);
 661         }
 662       if (linepos > linelength)
 663         linelength = linepos;
 664       words += in_word;
 665     }
 666
 667   if (count_chars < print_chars)
 668     chars = bytes;
 669
 670   if (total_mode != total_only)
 671     write_counts (lines, words, chars, bytes, linelength, file_x);
 672
 673   if (INT_ADD_WRAPV (total_lines, lines, &total_lines))
 674     total_lines_overflow = true;
 675   if (INT_ADD_WRAPV (total_words, words, &total_words))
 676     total_words_overflow = true;
 677   if (INT_ADD_WRAPV (total_chars, chars, &total_chars))
 678     total_chars_overflow = true;
 679   if (INT_ADD_WRAPV (total_bytes, bytes, &total_bytes))
 680     total_bytes_overflow = true;
 681
 682   if (linelength > max_line_length)
 683     max_line_length = linelength;
 684
 685   return ok;
 686 }
 687
 688 static bool
 689 wc_file (char const *file, struct fstatus *fstatus)
 690 {
 691   if (! file || STREQ (file, "-"))
 692     {
 693       have_read_stdin = true;
 694       xset_binary_mode (STDIN_FILENO, O_BINARY);
 695       return wc (STDIN_FILENO, file, fstatus, -1);
 696     }
 697   else
 698     {
 699       int fd = open (file, O_RDONLY | O_BINARY);
 700       if (fd == -1)
 701         {
 702           error (0, errno, "%s", quotef (file));
 703           return false;
 704         }
 705       else
 706         {
 707           bool ok = wc (fd, file, fstatus, 0);
 708           if (close (fd) != 0)
 709             {
 710               error (0, errno, "%s", quotef (file));
 711               return false;
 712             }
 713           return ok;
 714         }
 715     }
 716 }
 717
 718 /* Return the file status for the NFILES files addressed by FILE.
 719    Optimize the case where only one number is printed, for just one
 720    file; in that case we can use a print width of 1, so we don't need
 721    to stat the file.  Handle the case of (nfiles == 0) in the same way;
 722    that happens when we don't know how long the list of file names will be.  */
 723
 724 static struct fstatus *
 725 get_input_fstatus (size_t nfiles, char *const *file)
 726 {
 727   struct fstatus *fstatus = xnmalloc (nfiles ? nfiles : 1, sizeof *fstatus);
 728
 729   if (nfiles == 0
 730       || (nfiles == 1
 731           && ((print_lines + print_words + print_chars
 732                + print_bytes + print_linelength)
 733               == 1)))
 734     fstatus[0].failed = 1;
 735   else
 736     {
 737       for (size_t i = 0; i < nfiles; i++)
 738         fstatus[i].failed = (! file[i] || STREQ (file[i], "-")
 739                              ? fstat (STDIN_FILENO, &fstatus[i].st)
 740                              : stat (file[i], &fstatus[i].st));
 741     }
 742
 743   return fstatus;
 744 }
 745
 746 /* Return a print width suitable for the NFILES files whose status is
 747    recorded in FSTATUS.  Optimize the same special case that
 748    get_input_fstatus optimizes.  */
 749
 750 ATTRIBUTE_PURE
 751 static int
 752 compute_number_width (size_t nfiles, struct fstatus const *fstatus)
 753 {
 754   int width = 1;
 755
 756   if (0 < nfiles && fstatus[0].failed <= 0)
 757     {
 758       int minimum_width = 1;
 759       uintmax_t regular_total = 0;
 760
 761       for (size_t i = 0; i < nfiles; i++)
 762         if (! fstatus[i].failed)
 763           {
 764             if (S_ISREG (fstatus[i].st.st_mode))
 765               regular_total += fstatus[i].st.st_size;
 766             else
 767               minimum_width = 7;
 768           }
 769
 770       for (; 10 <= regular_total; regular_total /= 10)
 771         width++;
 772       if (width < minimum_width)
 773         width = minimum_width;
 774     }
 775
 776   return width;
 777 }
 778
 779
 780 int
 781 main (int argc, char **argv)
 782 {
 783   bool ok;
 784   int optc;
 785   size_t nfiles;
 786   char **files;
 787   char *files_from = NULL;
 788   struct fstatus *fstatus;
 789   struct Tokens tok;
 790
 791   initialize_main (&argc, &argv);
 792   set_program_name (argv[0]);
 793   setlocale (LC_ALL, "");
 794   bindtextdomain (PACKAGE, LOCALEDIR);
 795   textdomain (PACKAGE);
 796
 797   atexit (close_stdout);
 798
 799   page_size = getpagesize ();
 800   /* Line buffer stdout to ensure lines are written atomically and immediately
 801      so that processes running in parallel do not intersperse their output.  */
 802   setvbuf (stdout, NULL, _IOLBF, 0);
 803
 804   posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
 805
 806   print_lines = print_words = print_chars = print_bytes = false;
 807   print_linelength = false;
 808   total_lines = total_words = total_chars = total_bytes = max_line_length = 0;
 809
 810   while ((optc = getopt_long (argc, argv, "clLmw", longopts, NULL)) != -1)
 811     switch (optc)
 812       {
 813       case 'c':
 814         print_bytes = true;
 815         break;
 816
 817       case 'm':
 818         print_chars = true;
 819         break;
 820
 821       case 'l':
 822         print_lines = true;
 823         break;
 824
 825       case 'w':
 826         print_words = true;
 827         break;
 828
 829       case 'L':
 830         print_linelength = true;
 831         break;
 832
 833       case DEBUG_PROGRAM_OPTION:
 834         debug = true;
 835         break;
 836
 837       case FILES0_FROM_OPTION:
 838         files_from = optarg;
 839         break;
 840
 841       case TOTAL_OPTION:
 842         total_mode = XARGMATCH ("--total", optarg, total_args, total_types);
 843         break;
 844
 845       case_GETOPT_HELP_CHAR;
 846
 847       case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
 848
 849       default:
 850         usage (EXIT_FAILURE);
 851       }
 852
 853   if (! (print_lines || print_words || print_chars || print_bytes
 854          || print_linelength))
 855     print_lines = print_words = print_bytes = true;
 856
 857   bool read_tokens = false;
 858   struct argv_iterator *ai;
 859   if (files_from)
 860     {
 861       FILE *stream;
 862
 863       /* When using --files0-from=F, you may not specify any files
 864          on the command-line.  */
 865       if (optind < argc)
 866         {
 867           error (0, 0, _("extra operand %s"), quoteaf (argv[optind]));
 868           fprintf (stderr, "%s\n",
 869                    _("file operands cannot be combined with --files0-from"));
 870           usage (EXIT_FAILURE);
 871         }
 872
 873       if (STREQ (files_from, "-"))
 874         stream = stdin;
 875       else
 876         {
 877           stream = fopen (files_from, "r");
 878           if (stream == NULL)
 879             die (EXIT_FAILURE, errno, _("cannot open %s for reading"),
 880                  quoteaf (files_from));
 881         }
 882
 883       /* Read the file list into RAM if we can detect its size and that
 884          size is reasonable.  Otherwise, we'll read a name at a time.  */
 885       struct stat st;
 886       if (fstat (fileno (stream), &st) == 0
 887           && S_ISREG (st.st_mode)
 888           && st.st_size <= MIN (10 * 1024 * 1024, physmem_available () / 2))
 889         {
 890           read_tokens = true;
 891           readtokens0_init (&tok);
 892           if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
 893             die (EXIT_FAILURE, 0, _("cannot read file names from %s"),
 894                  quoteaf (files_from));
 895           files = tok.tok;
 896           nfiles = tok.n_tok;
 897           ai = argv_iter_init_argv (files);
 898         }
 899       else
 900         {
 901           files = NULL;
 902           nfiles = 0;
 903           ai = argv_iter_init_stream (stream);
 904         }
 905     }
 906   else
 907     {
 908       static char *stdin_only[] = { NULL };
 909       files = (optind < argc ? argv + optind : stdin_only);
 910       nfiles = (optind < argc ? argc - optind : 1);
 911       ai = argv_iter_init_argv (files);
 912     }
 913
 914   if (!ai)
 915     xalloc_die ();
 916
 917   fstatus = get_input_fstatus (nfiles, files);
 918   if (total_mode == total_only)
 919     number_width = 1;  /* No extra padding, since no alignment requirement.  */
 920   else
 921     number_width = compute_number_width (nfiles, fstatus);
 922
 923   ok = true;
 924   for (int i = 0; /* */; i++)
 925     {
 926       bool skip_file = false;
 927       enum argv_iter_err ai_err;
 928       char *file_name = argv_iter (ai, &ai_err);
 929       if (!file_name)
 930         {
 931           switch (ai_err)
 932             {
 933             case AI_ERR_EOF:
 934               goto argv_iter_done;
 935             case AI_ERR_READ:
 936               error (0, errno, _("%s: read error"),
 937                      quotef (files_from));
 938               ok = false;
 939               goto argv_iter_done;
 940             case AI_ERR_MEM:
 941               xalloc_die ();
 942             default:
 943               assert (!"unexpected error code from argv_iter");
 944             }
 945         }
 946       if (files_from && STREQ (files_from, "-") && STREQ (file_name, "-"))
 947         {
 948           /* Give a better diagnostic in an unusual case:
 949              printf - | wc --files0-from=- */
 950           error (0, 0, _("when reading file names from stdin, "
 951                          "no file name of %s allowed"),
 952                  quoteaf (file_name));
 953           skip_file = true;
 954         }
 955
 956       if (!file_name[0])
 957         {
 958           /* Diagnose a zero-length file name.  When it's one
 959              among many, knowing the record number may help.
 960              FIXME: currently print the record number only with
 961              --files0-from=FILE.  Maybe do it for argv, too?  */
 962           if (files_from == NULL)
 963             error (0, 0, "%s", _("invalid zero-length file name"));
 964           else
 965             {
 966               /* Using the standard 'filename:line-number:' prefix here is
 967                  not totally appropriate, since NUL is the separator, not NL,
 968                  but it might be better than nothing.  */
 969               unsigned long int file_number = argv_iter_n_args (ai);
 970               error (0, 0, "%s:%lu: %s", quotef (files_from),
 971                      file_number, _("invalid zero-length file name"));
 972             }
 973           skip_file = true;
 974         }
 975
 976       if (skip_file)
 977         ok = false;
 978       else
 979         ok &= wc_file (file_name, &fstatus[nfiles ? i : 0]);
 980
 981       if (! nfiles)
 982         fstatus[0].failed = 1;
 983     }
 984  argv_iter_done:
 985
 986   /* No arguments on the command line is fine.  That means read from stdin.
 987      However, no arguments on the --files0-from input stream is an error
 988      means don't read anything.  */
 989   if (ok && !files_from && argv_iter_n_args (ai) == 0)
 990     ok &= wc_file (NULL, &fstatus[0]);
 991
 992   if (read_tokens)
 993     readtokens0_free (&tok);
 994
 995   if (total_mode != total_never
 996       && (total_mode != total_auto || 1 < argv_iter_n_args (ai)))
 997     {
 998       if (total_lines_overflow)
 999         {
1000           total_lines = UINTMAX_MAX;
1001           error (0, EOVERFLOW, _("total lines"));
1002           ok = false;
1003         }
1004       if (total_words_overflow)
1005         {
1006           total_words = UINTMAX_MAX;
1007           error (0, EOVERFLOW, _("total words"));
1008           ok = false;
1009         }
1010       if (total_chars_overflow)
1011         {
1012           total_chars = UINTMAX_MAX;
1013           error (0, EOVERFLOW, _("total characters"));
1014           ok = false;
1015         }
1016       if (total_bytes_overflow)
1017         {
1018           total_bytes = UINTMAX_MAX;
1019           error (0, EOVERFLOW, _("total bytes"));
1020           ok = false;
1021         }
1022
1023       write_counts (total_lines, total_words, total_chars, total_bytes,
1024                     max_line_length,
1025                     total_mode != total_only ? _("total") : NULL);
1026     }
1027
1028   argv_iter_free (ai);
1029
1030   free (fstatus);
1031
1032   if (have_read_stdin && close (STDIN_FILENO) != 0)
1033     die (EXIT_FAILURE, errno, "-");
1034
1035   return ok ? EXIT_SUCCESS : EXIT_FAILURE;
1036 }