wc: fix word count bug
[coreutils.git] / src / wc.c
blob4db3a770d23f7da79bc64eb51392b61695149030
1 /* wc - print the number of lines, words, and bytes in files
2 Copyright (C) 1985-2023 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Paul Rubin, phr@ocf.berkeley.edu
18 and David MacKenzie, djm@gnu.ai.mit.edu. */
20 #include <config.h>
22 #include <stdckdint.h>
23 #include <stdio.h>
24 #include <getopt.h>
25 #include <sys/types.h>
26 #include <uchar.h>
28 #include "system.h"
29 #include "assure.h"
30 #include "argmatch.h"
31 #include "argv-iter.h"
32 #include "fadvise.h"
33 #include "physmem.h"
34 #include "readtokens0.h"
35 #include "safe-read.h"
36 #include "stat-size.h"
37 #include "xbinary-io.h"
39 /* The official name of this program (e.g., no 'g' prefix). */
40 #define PROGRAM_NAME "wc"
42 #define AUTHORS \
43 proper_name ("Paul Rubin"), \
44 proper_name ("David MacKenzie")
46 /* Size of atomic reads. */
47 #define BUFFER_SIZE (16 * 1024)
49 #define SUPPORT_OLD_MBRTOWC 1
51 #ifdef USE_AVX2_WC_LINECOUNT
52 /* From wc_avx2.c */
53 extern bool
54 wc_lines_avx2 (char const *file, int fd, uintmax_t *lines_out,
55 uintmax_t *bytes_out);
56 #endif
58 static bool debug;
60 /* Cumulative number of lines, words, chars and bytes in all files so far.
61 max_line_length is the maximum over all files processed so far. */
62 static uintmax_t total_lines;
63 static uintmax_t total_words;
64 static uintmax_t total_chars;
65 static uintmax_t total_bytes;
66 static uintmax_t total_lines_overflow;
67 static uintmax_t total_words_overflow;
68 static uintmax_t total_chars_overflow;
69 static uintmax_t total_bytes_overflow;
70 static uintmax_t max_line_length;
72 /* Which counts to print. */
73 static bool print_lines, print_words, print_chars, print_bytes;
74 static bool print_linelength;
76 /* The print width of each count. */
77 static int number_width;
79 /* True if we have ever read the standard input. */
80 static bool have_read_stdin;
82 /* Used to determine if file size can be determined without reading. */
83 static size_t page_size;
85 /* Enable to _not_ treat non breaking space as a word separator. */
86 static bool posixly_correct;
88 /* The result of calling fstat or stat on a file descriptor or file. */
89 struct fstatus
91 /* If positive, fstat or stat has not been called yet. Otherwise,
92 this is the value returned from fstat or stat. */
93 int failed;
95 /* If FAILED is zero, this is the file's status. */
96 struct stat st;
99 /* For long options that have no equivalent short option, use a
100 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
101 enum
103 DEBUG_PROGRAM_OPTION = CHAR_MAX + 1,
104 FILES0_FROM_OPTION,
105 TOTAL_OPTION,
108 static struct option const longopts[] =
110 {"bytes", no_argument, nullptr, 'c'},
111 {"chars", no_argument, nullptr, 'm'},
112 {"lines", no_argument, nullptr, 'l'},
113 {"words", no_argument, nullptr, 'w'},
114 {"debug", no_argument, nullptr, DEBUG_PROGRAM_OPTION},
115 {"files0-from", required_argument, nullptr, FILES0_FROM_OPTION},
116 {"max-line-length", no_argument, nullptr, 'L'},
117 {"total", required_argument, nullptr, TOTAL_OPTION},
118 {GETOPT_HELP_OPTION_DECL},
119 {GETOPT_VERSION_OPTION_DECL},
120 {nullptr, 0, nullptr, 0}
123 enum total_type
125 total_auto, /* 0: default or --total=auto */
126 total_always, /* 1: --total=always */
127 total_only, /* 2: --total=only */
128 total_never /* 3: --total=never */
130 static char const *const total_args[] =
132 "auto", "always", "only", "never", nullptr
134 static enum total_type const total_types[] =
136 total_auto, total_always, total_only, total_never
138 ARGMATCH_VERIFY (total_args, total_types);
139 static enum total_type total_mode = total_auto;
141 #ifdef USE_AVX2_WC_LINECOUNT
142 static bool
143 avx2_supported (void)
145 bool avx_enabled = 0 < __builtin_cpu_supports ("avx2");
147 if (debug)
148 error (0, 0, (avx_enabled
149 ? _("using avx2 hardware support")
150 : _("avx2 support not detected")));
152 return avx_enabled;
154 #endif
156 void
157 usage (int status)
159 if (status != EXIT_SUCCESS)
160 emit_try_help ();
161 else
163 printf (_("\
164 Usage: %s [OPTION]... [FILE]...\n\
165 or: %s [OPTION]... --files0-from=F\n\
167 program_name, program_name);
168 fputs (_("\
169 Print newline, word, and byte counts for each FILE, and a total line if\n\
170 more than one FILE is specified. A word is a nonempty sequence of non white\n\
171 space delimited by white space characters or by start or end of input.\n\
172 "), stdout);
174 emit_stdin_note ();
176 fputs (_("\
178 The options below may be used to select which counts are printed, always in\n\
179 the following order: newline, word, character, byte, maximum line length.\n\
180 -c, --bytes print the byte counts\n\
181 -m, --chars print the character counts\n\
182 -l, --lines print the newline counts\n\
183 "), stdout);
184 fputs (_("\
185 --files0-from=F read input from the files specified by\n\
186 NUL-terminated names in file F;\n\
187 If F is - then read names from standard input\n\
188 -L, --max-line-length print the maximum display width\n\
189 -w, --words print the word counts\n\
190 "), stdout);
191 fputs (_("\
192 --total=WHEN when to print a line with total counts;\n\
193 WHEN can be: auto, always, only, never\n\
194 "), stdout);
195 fputs (HELP_OPTION_DESCRIPTION, stdout);
196 fputs (VERSION_OPTION_DESCRIPTION, stdout);
197 emit_ancillary_info (PROGRAM_NAME);
199 exit (status);
202 /* Return non zero if a non breaking space. */
203 ATTRIBUTE_PURE
204 static int
205 iswnbspace (wint_t wc)
207 return ! posixly_correct
208 && (wc == 0x00A0 || wc == 0x2007
209 || wc == 0x202F || wc == 0x2060);
212 static int
213 isnbspace (int c)
215 return iswnbspace (btoc32 (c));
218 /* FILE is the name of the file (or null for standard input)
219 associated with the specified counters. */
220 static void
221 write_counts (uintmax_t lines,
222 uintmax_t words,
223 uintmax_t chars,
224 uintmax_t bytes,
225 uintmax_t linelength,
226 char const *file)
228 static char const format_sp_int[] = " %*s";
229 char const *format_int = format_sp_int + 1;
230 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
232 if (print_lines)
234 printf (format_int, number_width, umaxtostr (lines, buf));
235 format_int = format_sp_int;
237 if (print_words)
239 printf (format_int, number_width, umaxtostr (words, buf));
240 format_int = format_sp_int;
242 if (print_chars)
244 printf (format_int, number_width, umaxtostr (chars, buf));
245 format_int = format_sp_int;
247 if (print_bytes)
249 printf (format_int, number_width, umaxtostr (bytes, buf));
250 format_int = format_sp_int;
252 if (print_linelength)
254 printf (format_int, number_width, umaxtostr (linelength, buf));
256 if (file)
257 printf (" %s", strchr (file, '\n') ? quotef (file) : file);
258 putchar ('\n');
261 static bool
262 wc_lines (char const *file, int fd, uintmax_t *lines_out, uintmax_t *bytes_out)
264 size_t bytes_read;
265 uintmax_t lines, bytes;
266 char buf[BUFFER_SIZE + 1];
267 bool long_lines = false;
269 if (!lines_out || !bytes_out)
271 return false;
274 lines = bytes = 0;
276 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
279 if (bytes_read == SAFE_READ_ERROR)
281 error (0, errno, "%s", quotef (file));
282 return false;
285 bytes += bytes_read;
287 char *p = buf;
288 char *end = buf + bytes_read;
289 uintmax_t plines = lines;
291 if (! long_lines)
293 /* Avoid function call overhead for shorter lines. */
294 while (p != end)
295 lines += *p++ == '\n';
297 else
299 /* rawmemchr is more efficient with longer lines. */
300 *end = '\n';
301 while ((p = rawmemchr (p, '\n')) < end)
303 ++p;
304 ++lines;
308 /* If the average line length in the block is >= 15, then use
309 memchr for the next block, where system specific optimizations
310 may outweigh function call overhead.
311 FIXME: This line length was determined in 2015, on both
312 x86_64 and ppc64, but it's worth re-evaluating in future with
313 newer compilers, CPUs, or memchr() implementations etc. */
314 if (lines - plines <= bytes_read / 15)
315 long_lines = true;
316 else
317 long_lines = false;
320 *bytes_out = bytes;
321 *lines_out = lines;
323 return true;
326 /* Count words. FILE_X is the name of the file (or null for standard
327 input) that is open on descriptor FD. *FSTATUS is its status.
328 CURRENT_POS is the current file offset if known, negative if unknown.
329 Return true if successful. */
330 static bool
331 wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
333 bool ok = true;
334 char buf[BUFFER_SIZE + 1];
335 size_t bytes_read;
336 uintmax_t lines, words, chars, bytes, linelength;
337 bool count_bytes, count_chars, count_complicated;
338 char const *file = file_x ? file_x : _("standard input");
340 lines = words = chars = bytes = linelength = 0;
342 /* If in the current locale, chars are equivalent to bytes, we prefer
343 counting bytes, because that's easier. */
344 if (MB_CUR_MAX > 1)
346 count_bytes = print_bytes;
347 count_chars = print_chars;
349 else
351 count_bytes = print_bytes || print_chars;
352 count_chars = false;
354 count_complicated = print_words || print_linelength;
356 /* Advise the kernel of our access pattern only if we will read(). */
357 if (!count_bytes || count_chars || print_lines || count_complicated)
358 fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
360 /* When counting only bytes, save some line- and word-counting
361 overhead. If FD is a 'regular' Unix file, using lseek is enough
362 to get its 'size' in bytes. Otherwise, read blocks of BUFFER_SIZE
363 bytes at a time until EOF. Note that the 'size' (number of bytes)
364 that wc reports is smaller than stats.st_size when the file is not
365 positioned at its beginning. That's why the lseek calls below are
366 necessary. For example the command
367 '(dd ibs=99k skip=1 count=0; ./wc -c) < /etc/group'
368 should make wc report '0' bytes. */
370 if (count_bytes && !count_chars && !print_lines && !count_complicated)
372 bool skip_read = false;
374 if (0 < fstatus->failed)
375 fstatus->failed = fstat (fd, &fstatus->st);
377 /* For sized files, seek to one st_blksize before EOF rather than to EOF.
378 This works better for files in proc-like file systems where
379 the size is only approximate. */
380 if (! fstatus->failed && usable_st_size (&fstatus->st)
381 && 0 <= fstatus->st.st_size)
383 off_t end_pos = fstatus->st.st_size;
384 if (current_pos < 0)
385 current_pos = lseek (fd, 0, SEEK_CUR);
387 if (end_pos % page_size)
389 /* We only need special handling of /proc and /sys files etc.
390 when they're a multiple of PAGE_SIZE. In the common case
391 for files with st_size not a multiple of PAGE_SIZE,
392 it's more efficient and accurate to use st_size.
394 Be careful here. The current position may actually be
395 beyond the end of the file. As in the example above. */
397 bytes = end_pos < current_pos ? 0 : end_pos - current_pos;
398 if (bytes && 0 <= lseek (fd, bytes, SEEK_CUR))
399 skip_read = true;
400 else
401 bytes = 0;
403 else
405 off_t hi_pos = (end_pos
406 - end_pos % (STP_BLKSIZE (&fstatus->st) + 1));
407 if (0 <= current_pos && current_pos < hi_pos
408 && 0 <= lseek (fd, hi_pos, SEEK_CUR))
409 bytes = hi_pos - current_pos;
413 if (! skip_read)
415 fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
416 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
418 if (bytes_read == SAFE_READ_ERROR)
420 error (0, errno, "%s", quotef (file));
421 ok = false;
422 break;
424 bytes += bytes_read;
428 else if (!count_chars && !count_complicated)
430 #ifdef USE_AVX2_WC_LINECOUNT
431 static bool (*wc_lines_p) (char const *, int, uintmax_t *, uintmax_t *);
432 if (!wc_lines_p)
433 wc_lines_p = avx2_supported () ? wc_lines_avx2 : wc_lines;
434 #else
435 bool (*wc_lines_p) (char const *, int, uintmax_t *, uintmax_t *)
436 = wc_lines;
437 #endif
439 /* Use a separate loop when counting only lines or lines and bytes --
440 but not chars or words. */
441 ok = wc_lines_p (file, fd, &lines, &bytes);
443 else if (MB_CUR_MAX > 1)
445 bool in_word = false;
446 uintmax_t linepos = 0;
447 mbstate_t state; mbszero (&state);
448 bool in_shift = false;
449 #if SUPPORT_OLD_MBRTOWC
450 /* Back-up the state before each multibyte character conversion and
451 move the last incomplete character of the buffer to the front
452 of the buffer. This is needed because we don't know whether
453 the 'mbrtowc' function updates the state when it returns -2, --
454 this is the ISO C 99 and glibc-2.2 behavior - or not - amended
455 ANSI C, glibc-2.1 and Solaris 5.7 behavior. We don't have an
456 autoconf test for this, yet. */
457 size_t prev = 0; /* number of bytes carried over from previous round */
458 #else
459 const size_t prev = 0;
460 #endif
462 while ((bytes_read = safe_read (fd, buf + prev, BUFFER_SIZE - prev)) > 0)
464 char const *p;
465 #if SUPPORT_OLD_MBRTOWC
466 mbstate_t backup_state;
467 #endif
468 if (bytes_read == SAFE_READ_ERROR)
470 error (0, errno, "%s", quotef (file));
471 ok = false;
472 break;
475 bytes += bytes_read;
476 p = buf;
477 bytes_read += prev;
480 char32_t wide_char;
481 size_t n;
482 bool single_byte_ascii = !in_shift && 0 <= *p && *p < 0x80;
484 if (single_byte_ascii)
486 /* Handle most ASCII characters quickly, without calling
487 mbrtowc(). */
488 n = 1;
489 wide_char = *p;
491 else
493 in_shift = true;
494 #if SUPPORT_OLD_MBRTOWC
495 backup_state = state;
496 #endif
497 n = mbrtoc32 (&wide_char, p, bytes_read, &state);
498 if (n == (size_t) -2)
500 #if SUPPORT_OLD_MBRTOWC
501 state = backup_state;
502 #endif
503 break;
505 if (n == (size_t) -1)
507 /* Remember that we read a byte, but don't complain
508 about the error. Because of the decoding error,
509 this is a considered to be byte but not a
510 character (that is, chars is not incremented). */
511 p++;
512 bytes_read--;
513 mbszero (&state);
514 in_shift = false;
515 continue;
517 if (mbsinit (&state))
518 in_shift = false;
519 if (n == 0)
521 wide_char = 0;
522 n = 1;
526 switch (wide_char)
528 case '\n':
529 lines++;
530 FALLTHROUGH;
531 case '\r':
532 case '\f':
533 if (linepos > linelength)
534 linelength = linepos;
535 linepos = 0;
536 goto mb_word_separator;
537 case '\t':
538 linepos += 8 - (linepos % 8);
539 goto mb_word_separator;
540 case ' ':
541 linepos++;
542 FALLTHROUGH;
543 case '\v':
544 mb_word_separator:
545 in_word = false;
546 break;
547 default:
548 /* c32width can be expensive on macOS for example,
549 so avoid if not needed. */
550 if (print_linelength)
552 if (single_byte_ascii)
553 linepos += !!isprint (wide_char);
554 else
556 int width = c32width (wide_char);
557 if (width > 0)
558 linepos += width;
561 if (single_byte_ascii ? isspace (wide_char)
562 : c32isspace (wide_char) || iswnbspace (wide_char))
563 goto mb_word_separator;
565 /* Count words by counting word starts, i.e., each
566 white space character (or the start of input)
567 followed by non white space.
569 POSIX says a word is "a non-zero-length string of
570 characters delimited by white space". This is certainly
571 wrong in some sense, as the string can be delimited
572 by start or end of input, and it is not clear
573 what it means when the input contains encoding errors.
574 Although GNU wc ignores encoding errors when determining
575 word boundaries, this behavior is not documented or
576 portable and should not be relied upon. */
577 words += !in_word;
578 in_word = true;
579 break;
582 p += n;
583 bytes_read -= n;
584 chars++;
586 while (bytes_read > 0);
588 #if SUPPORT_OLD_MBRTOWC
589 if (bytes_read > 0)
591 if (bytes_read == BUFFER_SIZE)
593 /* Encountered a very long redundant shift sequence. */
594 p++;
595 bytes_read--;
597 memmove (buf, p, bytes_read);
599 prev = bytes_read;
600 #endif
602 if (linepos > linelength)
603 linelength = linepos;
605 else
607 bool in_word = false;
608 uintmax_t linepos = 0;
610 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
612 char const *p = buf;
613 if (bytes_read == SAFE_READ_ERROR)
615 error (0, errno, "%s", quotef (file));
616 ok = false;
617 break;
620 bytes += bytes_read;
623 unsigned char c = *p++;
624 switch (c)
626 case '\n':
627 lines++;
628 FALLTHROUGH;
629 case '\r':
630 case '\f':
631 if (linepos > linelength)
632 linelength = linepos;
633 linepos = 0;
634 goto word_separator;
635 case '\t':
636 linepos += 8 - (linepos % 8);
637 goto word_separator;
638 case ' ':
639 linepos++;
640 FALLTHROUGH;
641 case '\v':
642 word_separator:
643 in_word = false;
644 break;
645 default:
646 linepos += !!isprint (c);
647 if (isspace (c) || isnbspace (c))
648 goto word_separator;
649 words += !in_word;
650 in_word = true;
651 break;
654 while (--bytes_read);
656 if (linepos > linelength)
657 linelength = linepos;
660 if (count_chars < print_chars)
661 chars = bytes;
663 if (total_mode != total_only)
664 write_counts (lines, words, chars, bytes, linelength, file_x);
666 if (ckd_add (&total_lines, total_lines, lines))
667 total_lines_overflow = true;
668 if (ckd_add (&total_words, total_words, words))
669 total_words_overflow = true;
670 if (ckd_add (&total_chars, total_chars, chars))
671 total_chars_overflow = true;
672 if (ckd_add (&total_bytes, total_bytes, bytes))
673 total_bytes_overflow = true;
675 if (linelength > max_line_length)
676 max_line_length = linelength;
678 return ok;
681 static bool
682 wc_file (char const *file, struct fstatus *fstatus)
684 if (! file || STREQ (file, "-"))
686 have_read_stdin = true;
687 xset_binary_mode (STDIN_FILENO, O_BINARY);
688 return wc (STDIN_FILENO, file, fstatus, -1);
690 else
692 int fd = open (file, O_RDONLY | O_BINARY);
693 if (fd == -1)
695 error (0, errno, "%s", quotef (file));
696 return false;
698 else
700 bool ok = wc (fd, file, fstatus, 0);
701 if (close (fd) != 0)
703 error (0, errno, "%s", quotef (file));
704 return false;
706 return ok;
711 /* Return the file status for the NFILES files addressed by FILE.
712 Optimize the case where only one number is printed, for just one
713 file; in that case we can use a print width of 1, so we don't need
714 to stat the file. Handle the case of (nfiles == 0) in the same way;
715 that happens when we don't know how long the list of file names will be. */
717 static struct fstatus *
718 get_input_fstatus (size_t nfiles, char *const *file)
720 struct fstatus *fstatus = xnmalloc (nfiles ? nfiles : 1, sizeof *fstatus);
722 if (nfiles == 0
723 || (nfiles == 1
724 && ((print_lines + print_words + print_chars
725 + print_bytes + print_linelength)
726 == 1)))
727 fstatus[0].failed = 1;
728 else
730 for (size_t i = 0; i < nfiles; i++)
731 fstatus[i].failed = (! file[i] || STREQ (file[i], "-")
732 ? fstat (STDIN_FILENO, &fstatus[i].st)
733 : stat (file[i], &fstatus[i].st));
736 return fstatus;
739 /* Return a print width suitable for the NFILES files whose status is
740 recorded in FSTATUS. Optimize the same special case that
741 get_input_fstatus optimizes. */
743 ATTRIBUTE_PURE
744 static int
745 compute_number_width (size_t nfiles, struct fstatus const *fstatus)
747 int width = 1;
749 if (0 < nfiles && fstatus[0].failed <= 0)
751 int minimum_width = 1;
752 uintmax_t regular_total = 0;
754 for (size_t i = 0; i < nfiles; i++)
755 if (! fstatus[i].failed)
757 if (S_ISREG (fstatus[i].st.st_mode))
758 regular_total += fstatus[i].st.st_size;
759 else
760 minimum_width = 7;
763 for (; 10 <= regular_total; regular_total /= 10)
764 width++;
765 if (width < minimum_width)
766 width = minimum_width;
769 return width;
774 main (int argc, char **argv)
776 bool ok;
777 int optc;
778 size_t nfiles;
779 char **files;
780 char *files_from = nullptr;
781 struct fstatus *fstatus;
782 struct Tokens tok;
784 initialize_main (&argc, &argv);
785 set_program_name (argv[0]);
786 setlocale (LC_ALL, "");
787 bindtextdomain (PACKAGE, LOCALEDIR);
788 textdomain (PACKAGE);
790 atexit (close_stdout);
792 page_size = getpagesize ();
793 /* Line buffer stdout to ensure lines are written atomically and immediately
794 so that processes running in parallel do not intersperse their output. */
795 setvbuf (stdout, nullptr, _IOLBF, 0);
797 posixly_correct = (getenv ("POSIXLY_CORRECT") != nullptr);
799 print_lines = print_words = print_chars = print_bytes = false;
800 print_linelength = false;
801 total_lines = total_words = total_chars = total_bytes = max_line_length = 0;
803 while ((optc = getopt_long (argc, argv, "clLmw", longopts, nullptr)) != -1)
804 switch (optc)
806 case 'c':
807 print_bytes = true;
808 break;
810 case 'm':
811 print_chars = true;
812 break;
814 case 'l':
815 print_lines = true;
816 break;
818 case 'w':
819 print_words = true;
820 break;
822 case 'L':
823 print_linelength = true;
824 break;
826 case DEBUG_PROGRAM_OPTION:
827 debug = true;
828 break;
830 case FILES0_FROM_OPTION:
831 files_from = optarg;
832 break;
834 case TOTAL_OPTION:
835 total_mode = XARGMATCH ("--total", optarg, total_args, total_types);
836 break;
838 case_GETOPT_HELP_CHAR;
840 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
842 default:
843 usage (EXIT_FAILURE);
846 if (! (print_lines || print_words || print_chars || print_bytes
847 || print_linelength))
848 print_lines = print_words = print_bytes = true;
850 bool read_tokens = false;
851 struct argv_iterator *ai;
852 if (files_from)
854 FILE *stream;
856 /* When using --files0-from=F, you may not specify any files
857 on the command-line. */
858 if (optind < argc)
860 error (0, 0, _("extra operand %s"), quoteaf (argv[optind]));
861 fprintf (stderr, "%s\n",
862 _("file operands cannot be combined with --files0-from"));
863 usage (EXIT_FAILURE);
866 if (STREQ (files_from, "-"))
867 stream = stdin;
868 else
870 stream = fopen (files_from, "r");
871 if (stream == nullptr)
872 error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
873 quoteaf (files_from));
876 /* Read the file list into RAM if we can detect its size and that
877 size is reasonable. Otherwise, we'll read a name at a time. */
878 struct stat st;
879 if (fstat (fileno (stream), &st) == 0
880 && S_ISREG (st.st_mode)
881 && st.st_size <= MIN (10 * 1024 * 1024, physmem_available () / 2))
883 read_tokens = true;
884 readtokens0_init (&tok);
885 if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
886 error (EXIT_FAILURE, 0, _("cannot read file names from %s"),
887 quoteaf (files_from));
888 files = tok.tok;
889 nfiles = tok.n_tok;
890 ai = argv_iter_init_argv (files);
892 else
894 files = nullptr;
895 nfiles = 0;
896 ai = argv_iter_init_stream (stream);
899 else
901 static char *stdin_only[] = { nullptr };
902 files = (optind < argc ? argv + optind : stdin_only);
903 nfiles = (optind < argc ? argc - optind : 1);
904 ai = argv_iter_init_argv (files);
907 if (!ai)
908 xalloc_die ();
910 fstatus = get_input_fstatus (nfiles, files);
911 if (total_mode == total_only)
912 number_width = 1; /* No extra padding, since no alignment requirement. */
913 else
914 number_width = compute_number_width (nfiles, fstatus);
916 ok = true;
917 for (int i = 0; /* */; i++)
919 bool skip_file = false;
920 enum argv_iter_err ai_err;
921 char *file_name = argv_iter (ai, &ai_err);
922 if (!file_name)
924 switch (ai_err)
926 case AI_ERR_EOF:
927 goto argv_iter_done;
928 case AI_ERR_READ:
929 error (0, errno, _("%s: read error"),
930 quotef (files_from));
931 ok = false;
932 goto argv_iter_done;
933 case AI_ERR_MEM:
934 xalloc_die ();
935 default:
936 affirm (!"unexpected error code from argv_iter");
939 if (files_from && STREQ (files_from, "-") && STREQ (file_name, "-"))
941 /* Give a better diagnostic in an unusual case:
942 printf - | wc --files0-from=- */
943 error (0, 0, _("when reading file names from stdin, "
944 "no file name of %s allowed"),
945 quoteaf (file_name));
946 skip_file = true;
949 if (!file_name[0])
951 /* Diagnose a zero-length file name. When it's one
952 among many, knowing the record number may help.
953 FIXME: currently print the record number only with
954 --files0-from=FILE. Maybe do it for argv, too? */
955 if (files_from == nullptr)
956 error (0, 0, "%s", _("invalid zero-length file name"));
957 else
959 /* Using the standard 'filename:line-number:' prefix here is
960 not totally appropriate, since NUL is the separator, not NL,
961 but it might be better than nothing. */
962 unsigned long int file_number = argv_iter_n_args (ai);
963 error (0, 0, "%s:%lu: %s", quotef (files_from),
964 file_number, _("invalid zero-length file name"));
966 skip_file = true;
969 if (skip_file)
970 ok = false;
971 else
972 ok &= wc_file (file_name, &fstatus[nfiles ? i : 0]);
974 if (! nfiles)
975 fstatus[0].failed = 1;
977 argv_iter_done:
979 /* No arguments on the command line is fine. That means read from stdin.
980 However, no arguments on the --files0-from input stream is an error
981 means don't read anything. */
982 if (ok && !files_from && argv_iter_n_args (ai) == 0)
983 ok &= wc_file (nullptr, &fstatus[0]);
985 if (read_tokens)
986 readtokens0_free (&tok);
988 if (total_mode != total_never
989 && (total_mode != total_auto || 1 < argv_iter_n_args (ai)))
991 if (total_lines_overflow)
993 total_lines = UINTMAX_MAX;
994 error (0, EOVERFLOW, _("total lines"));
995 ok = false;
997 if (total_words_overflow)
999 total_words = UINTMAX_MAX;
1000 error (0, EOVERFLOW, _("total words"));
1001 ok = false;
1003 if (total_chars_overflow)
1005 total_chars = UINTMAX_MAX;
1006 error (0, EOVERFLOW, _("total characters"));
1007 ok = false;
1009 if (total_bytes_overflow)
1011 total_bytes = UINTMAX_MAX;
1012 error (0, EOVERFLOW, _("total bytes"));
1013 ok = false;
1016 write_counts (total_lines, total_words, total_chars, total_bytes,
1017 max_line_length,
1018 total_mode != total_only ? _("total") : nullptr);
1021 argv_iter_free (ai);
1023 free (fstatus);
1025 if (have_read_stdin && close (STDIN_FILENO) != 0)
1026 error (EXIT_FAILURE, errno, "-");
1028 return ok ? EXIT_SUCCESS : EXIT_FAILURE;