build: update gnulib submodule to latest
[coreutils.git] / src / wc.c
blobb1d82074d36fc2037e4390a6f81cf9fd0a420e59
1 /* wc - print the number of lines, words, and bytes in files
2 Copyright (C) 1985-2023 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Paul Rubin, phr@ocf.berkeley.edu
18 and David MacKenzie, djm@gnu.ai.mit.edu. */
20 #include <config.h>
22 #include <stdckdint.h>
23 #include <stdio.h>
24 #include <getopt.h>
25 #include <sys/types.h>
26 #include <wchar.h>
27 #include <wctype.h>
29 #include "system.h"
30 #include "assure.h"
31 #include "argmatch.h"
32 #include "argv-iter.h"
33 #include "fadvise.h"
34 #include "mbchar.h"
35 #include "physmem.h"
36 #include "readtokens0.h"
37 #include "safe-read.h"
38 #include "stat-size.h"
39 #include "xbinary-io.h"
41 #if !defined iswspace && !HAVE_ISWSPACE
42 # define iswspace(wc) \
43 ((wc) == to_uchar (wc) && isspace (to_uchar (wc)))
44 #endif
46 /* The official name of this program (e.g., no 'g' prefix). */
47 #define PROGRAM_NAME "wc"
49 #define AUTHORS \
50 proper_name ("Paul Rubin"), \
51 proper_name ("David MacKenzie")
53 /* Size of atomic reads. */
54 #define BUFFER_SIZE (16 * 1024)
56 #ifdef USE_AVX2_WC_LINECOUNT
57 /* From wc_avx2.c */
58 extern bool
59 wc_lines_avx2 (char const *file, int fd, uintmax_t *lines_out,
60 uintmax_t *bytes_out);
61 #endif
63 static bool debug;
65 /* Cumulative number of lines, words, chars and bytes in all files so far.
66 max_line_length is the maximum over all files processed so far. */
67 static uintmax_t total_lines;
68 static uintmax_t total_words;
69 static uintmax_t total_chars;
70 static uintmax_t total_bytes;
71 static uintmax_t total_lines_overflow;
72 static uintmax_t total_words_overflow;
73 static uintmax_t total_chars_overflow;
74 static uintmax_t total_bytes_overflow;
75 static uintmax_t max_line_length;
77 /* Which counts to print. */
78 static bool print_lines, print_words, print_chars, print_bytes;
79 static bool print_linelength;
81 /* The print width of each count. */
82 static int number_width;
84 /* True if we have ever read the standard input. */
85 static bool have_read_stdin;
87 /* Used to determine if file size can be determined without reading. */
88 static size_t page_size;
90 /* Enable to _not_ treat non breaking space as a word separator. */
91 static bool posixly_correct;
93 /* The result of calling fstat or stat on a file descriptor or file. */
94 struct fstatus
96 /* If positive, fstat or stat has not been called yet. Otherwise,
97 this is the value returned from fstat or stat. */
98 int failed;
100 /* If FAILED is zero, this is the file's status. */
101 struct stat st;
104 /* For long options that have no equivalent short option, use a
105 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
106 enum
108 DEBUG_PROGRAM_OPTION = CHAR_MAX + 1,
109 FILES0_FROM_OPTION,
110 TOTAL_OPTION,
113 static struct option const longopts[] =
115 {"bytes", no_argument, nullptr, 'c'},
116 {"chars", no_argument, nullptr, 'm'},
117 {"lines", no_argument, nullptr, 'l'},
118 {"words", no_argument, nullptr, 'w'},
119 {"debug", no_argument, nullptr, DEBUG_PROGRAM_OPTION},
120 {"files0-from", required_argument, nullptr, FILES0_FROM_OPTION},
121 {"max-line-length", no_argument, nullptr, 'L'},
122 {"total", required_argument, nullptr, TOTAL_OPTION},
123 {GETOPT_HELP_OPTION_DECL},
124 {GETOPT_VERSION_OPTION_DECL},
125 {nullptr, 0, nullptr, 0}
128 enum total_type
130 total_auto, /* 0: default or --total=auto */
131 total_always, /* 1: --total=always */
132 total_only, /* 2: --total=only */
133 total_never /* 3: --total=never */
135 static char const *const total_args[] =
137 "auto", "always", "only", "never", nullptr
139 static enum total_type const total_types[] =
141 total_auto, total_always, total_only, total_never
143 ARGMATCH_VERIFY (total_args, total_types);
144 static enum total_type total_mode = total_auto;
146 #ifdef USE_AVX2_WC_LINECOUNT
147 static bool
148 avx2_supported (void)
150 bool avx_enabled = 0 < __builtin_cpu_supports ("avx2");
152 if (debug)
153 error (0, 0, (avx_enabled
154 ? _("using avx2 hardware support")
155 : _("avx2 support not detected")));
157 return avx_enabled;
159 #endif
161 void
162 usage (int status)
164 if (status != EXIT_SUCCESS)
165 emit_try_help ();
166 else
168 printf (_("\
169 Usage: %s [OPTION]... [FILE]...\n\
170 or: %s [OPTION]... --files0-from=F\n\
172 program_name, program_name);
173 fputs (_("\
174 Print newline, word, and byte counts for each FILE, and a total line if\n\
175 more than one FILE is specified. A word is a non-zero-length sequence of\n\
176 printable characters delimited by white space.\n\
177 "), stdout);
179 emit_stdin_note ();
181 fputs (_("\
183 The options below may be used to select which counts are printed, always in\n\
184 the following order: newline, word, character, byte, maximum line length.\n\
185 -c, --bytes print the byte counts\n\
186 -m, --chars print the character counts\n\
187 -l, --lines print the newline counts\n\
188 "), stdout);
189 fputs (_("\
190 --files0-from=F read input from the files specified by\n\
191 NUL-terminated names in file F;\n\
192 If F is - then read names from standard input\n\
193 -L, --max-line-length print the maximum display width\n\
194 -w, --words print the word counts\n\
195 "), stdout);
196 fputs (_("\
197 --total=WHEN when to print a line with total counts;\n\
198 WHEN can be: auto, always, only, never\n\
199 "), stdout);
200 fputs (HELP_OPTION_DESCRIPTION, stdout);
201 fputs (VERSION_OPTION_DESCRIPTION, stdout);
202 emit_ancillary_info (PROGRAM_NAME);
204 exit (status);
207 /* Return non zero if a non breaking space. */
208 ATTRIBUTE_PURE
209 static int
210 iswnbspace (wint_t wc)
212 return ! posixly_correct
213 && (wc == 0x00A0 || wc == 0x2007
214 || wc == 0x202F || wc == 0x2060);
217 static int
218 isnbspace (int c)
220 return iswnbspace (btowc (c));
223 /* FILE is the name of the file (or null for standard input)
224 associated with the specified counters. */
225 static void
226 write_counts (uintmax_t lines,
227 uintmax_t words,
228 uintmax_t chars,
229 uintmax_t bytes,
230 uintmax_t linelength,
231 char const *file)
233 static char const format_sp_int[] = " %*s";
234 char const *format_int = format_sp_int + 1;
235 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
237 if (print_lines)
239 printf (format_int, number_width, umaxtostr (lines, buf));
240 format_int = format_sp_int;
242 if (print_words)
244 printf (format_int, number_width, umaxtostr (words, buf));
245 format_int = format_sp_int;
247 if (print_chars)
249 printf (format_int, number_width, umaxtostr (chars, buf));
250 format_int = format_sp_int;
252 if (print_bytes)
254 printf (format_int, number_width, umaxtostr (bytes, buf));
255 format_int = format_sp_int;
257 if (print_linelength)
259 printf (format_int, number_width, umaxtostr (linelength, buf));
261 if (file)
262 printf (" %s", strchr (file, '\n') ? quotef (file) : file);
263 putchar ('\n');
266 static bool
267 wc_lines (char const *file, int fd, uintmax_t *lines_out, uintmax_t *bytes_out)
269 size_t bytes_read;
270 uintmax_t lines, bytes;
271 char buf[BUFFER_SIZE + 1];
272 bool long_lines = false;
274 if (!lines_out || !bytes_out)
276 return false;
279 lines = bytes = 0;
281 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
284 if (bytes_read == SAFE_READ_ERROR)
286 error (0, errno, "%s", quotef (file));
287 return false;
290 bytes += bytes_read;
292 char *p = buf;
293 char *end = buf + bytes_read;
294 uintmax_t plines = lines;
296 if (! long_lines)
298 /* Avoid function call overhead for shorter lines. */
299 while (p != end)
300 lines += *p++ == '\n';
302 else
304 /* rawmemchr is more efficient with longer lines. */
305 *end = '\n';
306 while ((p = rawmemchr (p, '\n')) < end)
308 ++p;
309 ++lines;
313 /* If the average line length in the block is >= 15, then use
314 memchr for the next block, where system specific optimizations
315 may outweigh function call overhead.
316 FIXME: This line length was determined in 2015, on both
317 x86_64 and ppc64, but it's worth re-evaluating in future with
318 newer compilers, CPUs, or memchr() implementations etc. */
319 if (lines - plines <= bytes_read / 15)
320 long_lines = true;
321 else
322 long_lines = false;
325 *bytes_out = bytes;
326 *lines_out = lines;
328 return true;
331 /* Count words. FILE_X is the name of the file (or null for standard
332 input) that is open on descriptor FD. *FSTATUS is its status.
333 CURRENT_POS is the current file offset if known, negative if unknown.
334 Return true if successful. */
335 static bool
336 wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
338 bool ok = true;
339 char buf[BUFFER_SIZE + 1];
340 size_t bytes_read;
341 uintmax_t lines, words, chars, bytes, linelength;
342 bool count_bytes, count_chars, count_complicated;
343 char const *file = file_x ? file_x : _("standard input");
345 lines = words = chars = bytes = linelength = 0;
347 /* If in the current locale, chars are equivalent to bytes, we prefer
348 counting bytes, because that's easier. */
349 #if MB_LEN_MAX > 1
350 if (MB_CUR_MAX > 1)
352 count_bytes = print_bytes;
353 count_chars = print_chars;
355 else
356 #endif
358 count_bytes = print_bytes || print_chars;
359 count_chars = false;
361 count_complicated = print_words || print_linelength;
363 /* Advise the kernel of our access pattern only if we will read(). */
364 if (!count_bytes || count_chars || print_lines || count_complicated)
365 fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
367 /* When counting only bytes, save some line- and word-counting
368 overhead. If FD is a 'regular' Unix file, using lseek is enough
369 to get its 'size' in bytes. Otherwise, read blocks of BUFFER_SIZE
370 bytes at a time until EOF. Note that the 'size' (number of bytes)
371 that wc reports is smaller than stats.st_size when the file is not
372 positioned at its beginning. That's why the lseek calls below are
373 necessary. For example the command
374 '(dd ibs=99k skip=1 count=0; ./wc -c) < /etc/group'
375 should make wc report '0' bytes. */
377 if (count_bytes && !count_chars && !print_lines && !count_complicated)
379 bool skip_read = false;
381 if (0 < fstatus->failed)
382 fstatus->failed = fstat (fd, &fstatus->st);
384 /* For sized files, seek to one st_blksize before EOF rather than to EOF.
385 This works better for files in proc-like file systems where
386 the size is only approximate. */
387 if (! fstatus->failed && usable_st_size (&fstatus->st)
388 && 0 <= fstatus->st.st_size)
390 off_t end_pos = fstatus->st.st_size;
391 if (current_pos < 0)
392 current_pos = lseek (fd, 0, SEEK_CUR);
394 if (end_pos % page_size)
396 /* We only need special handling of /proc and /sys files etc.
397 when they're a multiple of PAGE_SIZE. In the common case
398 for files with st_size not a multiple of PAGE_SIZE,
399 it's more efficient and accurate to use st_size.
401 Be careful here. The current position may actually be
402 beyond the end of the file. As in the example above. */
404 bytes = end_pos < current_pos ? 0 : end_pos - current_pos;
405 if (bytes && 0 <= lseek (fd, bytes, SEEK_CUR))
406 skip_read = true;
407 else
408 bytes = 0;
410 else
412 off_t hi_pos = end_pos - end_pos % (ST_BLKSIZE (fstatus->st) + 1);
413 if (0 <= current_pos && current_pos < hi_pos
414 && 0 <= lseek (fd, hi_pos, SEEK_CUR))
415 bytes = hi_pos - current_pos;
419 if (! skip_read)
421 fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
422 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
424 if (bytes_read == SAFE_READ_ERROR)
426 error (0, errno, "%s", quotef (file));
427 ok = false;
428 break;
430 bytes += bytes_read;
434 else if (!count_chars && !count_complicated)
436 #ifdef USE_AVX2_WC_LINECOUNT
437 static bool (*wc_lines_p) (char const *, int, uintmax_t *, uintmax_t *);
438 if (!wc_lines_p)
439 wc_lines_p = avx2_supported () ? wc_lines_avx2 : wc_lines;
440 #else
441 bool (*wc_lines_p) (char const *, int, uintmax_t *, uintmax_t *)
442 = wc_lines;
443 #endif
445 /* Use a separate loop when counting only lines or lines and bytes --
446 but not chars or words. */
447 ok = wc_lines_p (file, fd, &lines, &bytes);
449 #if MB_LEN_MAX > 1
450 # define SUPPORT_OLD_MBRTOWC 1
451 else if (MB_CUR_MAX > 1)
453 bool in_word = false;
454 uintmax_t linepos = 0;
455 mbstate_t state = { 0, };
456 bool in_shift = false;
457 # if SUPPORT_OLD_MBRTOWC
458 /* Back-up the state before each multibyte character conversion and
459 move the last incomplete character of the buffer to the front
460 of the buffer. This is needed because we don't know whether
461 the 'mbrtowc' function updates the state when it returns -2, --
462 this is the ISO C 99 and glibc-2.2 behaviour - or not - amended
463 ANSI C, glibc-2.1 and Solaris 5.7 behaviour. We don't have an
464 autoconf test for this, yet. */
465 size_t prev = 0; /* number of bytes carried over from previous round */
466 # else
467 const size_t prev = 0;
468 # endif
470 while ((bytes_read = safe_read (fd, buf + prev, BUFFER_SIZE - prev)) > 0)
472 char const *p;
473 # if SUPPORT_OLD_MBRTOWC
474 mbstate_t backup_state;
475 # endif
476 if (bytes_read == SAFE_READ_ERROR)
478 error (0, errno, "%s", quotef (file));
479 ok = false;
480 break;
483 bytes += bytes_read;
484 p = buf;
485 bytes_read += prev;
488 wchar_t wide_char;
489 size_t n;
490 bool wide = true;
492 if (!in_shift && is_basic (*p))
494 /* Handle most ASCII characters quickly, without calling
495 mbrtowc(). */
496 n = 1;
497 wide_char = *p;
498 wide = false;
500 else
502 in_shift = true;
503 # if SUPPORT_OLD_MBRTOWC
504 backup_state = state;
505 # endif
506 n = mbrtowc (&wide_char, p, bytes_read, &state);
507 if (n == (size_t) -2)
509 # if SUPPORT_OLD_MBRTOWC
510 state = backup_state;
511 # endif
512 break;
514 if (n == (size_t) -1)
516 /* Remember that we read a byte, but don't complain
517 about the error. Because of the decoding error,
518 this is a considered to be byte but not a
519 character (that is, chars is not incremented). */
520 p++;
521 bytes_read--;
522 continue;
524 if (mbsinit (&state))
525 in_shift = false;
526 if (n == 0)
528 wide_char = 0;
529 n = 1;
533 switch (wide_char)
535 case '\n':
536 lines++;
537 FALLTHROUGH;
538 case '\r':
539 case '\f':
540 if (linepos > linelength)
541 linelength = linepos;
542 linepos = 0;
543 goto mb_word_separator;
544 case '\t':
545 linepos += 8 - (linepos % 8);
546 goto mb_word_separator;
547 case ' ':
548 linepos++;
549 FALLTHROUGH;
550 case '\v':
551 mb_word_separator:
552 words += in_word;
553 in_word = false;
554 break;
555 default:
556 if (wide && iswprint (wide_char))
558 /* wcwidth can be expensive on OSX for example,
559 so avoid if uneeded. */
560 if (print_linelength)
562 int width = wcwidth (wide_char);
563 if (width > 0)
564 linepos += width;
566 if (iswspace (wide_char) || iswnbspace (wide_char))
567 goto mb_word_separator;
568 in_word = true;
570 else if (!wide && isprint (to_uchar (*p)))
572 linepos++;
573 if (isspace (to_uchar (*p)))
574 goto mb_word_separator;
575 in_word = true;
577 break;
580 p += n;
581 bytes_read -= n;
582 chars++;
584 while (bytes_read > 0);
586 # if SUPPORT_OLD_MBRTOWC
587 if (bytes_read > 0)
589 if (bytes_read == BUFFER_SIZE)
591 /* Encountered a very long redundant shift sequence. */
592 p++;
593 bytes_read--;
595 memmove (buf, p, bytes_read);
597 prev = bytes_read;
598 # endif
600 if (linepos > linelength)
601 linelength = linepos;
602 words += in_word;
604 #endif
605 else
607 bool in_word = false;
608 uintmax_t linepos = 0;
610 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
612 char const *p = buf;
613 if (bytes_read == SAFE_READ_ERROR)
615 error (0, errno, "%s", quotef (file));
616 ok = false;
617 break;
620 bytes += bytes_read;
623 switch (*p++)
625 case '\n':
626 lines++;
627 FALLTHROUGH;
628 case '\r':
629 case '\f':
630 if (linepos > linelength)
631 linelength = linepos;
632 linepos = 0;
633 goto word_separator;
634 case '\t':
635 linepos += 8 - (linepos % 8);
636 goto word_separator;
637 case ' ':
638 linepos++;
639 FALLTHROUGH;
640 case '\v':
641 word_separator:
642 words += in_word;
643 in_word = false;
644 break;
645 default:
646 if (isprint (to_uchar (p[-1])))
648 linepos++;
649 if (isspace (to_uchar (p[-1]))
650 || isnbspace (to_uchar (p[-1])))
651 goto word_separator;
652 in_word = true;
654 break;
657 while (--bytes_read);
659 if (linepos > linelength)
660 linelength = linepos;
661 words += in_word;
664 if (count_chars < print_chars)
665 chars = bytes;
667 if (total_mode != total_only)
668 write_counts (lines, words, chars, bytes, linelength, file_x);
670 if (ckd_add (&total_lines, total_lines, lines))
671 total_lines_overflow = true;
672 if (ckd_add (&total_words, total_words, words))
673 total_words_overflow = true;
674 if (ckd_add (&total_chars, total_chars, chars))
675 total_chars_overflow = true;
676 if (ckd_add (&total_bytes, total_bytes, bytes))
677 total_bytes_overflow = true;
679 if (linelength > max_line_length)
680 max_line_length = linelength;
682 return ok;
685 static bool
686 wc_file (char const *file, struct fstatus *fstatus)
688 if (! file || STREQ (file, "-"))
690 have_read_stdin = true;
691 xset_binary_mode (STDIN_FILENO, O_BINARY);
692 return wc (STDIN_FILENO, file, fstatus, -1);
694 else
696 int fd = open (file, O_RDONLY | O_BINARY);
697 if (fd == -1)
699 error (0, errno, "%s", quotef (file));
700 return false;
702 else
704 bool ok = wc (fd, file, fstatus, 0);
705 if (close (fd) != 0)
707 error (0, errno, "%s", quotef (file));
708 return false;
710 return ok;
715 /* Return the file status for the NFILES files addressed by FILE.
716 Optimize the case where only one number is printed, for just one
717 file; in that case we can use a print width of 1, so we don't need
718 to stat the file. Handle the case of (nfiles == 0) in the same way;
719 that happens when we don't know how long the list of file names will be. */
721 static struct fstatus *
722 get_input_fstatus (size_t nfiles, char *const *file)
724 struct fstatus *fstatus = xnmalloc (nfiles ? nfiles : 1, sizeof *fstatus);
726 if (nfiles == 0
727 || (nfiles == 1
728 && ((print_lines + print_words + print_chars
729 + print_bytes + print_linelength)
730 == 1)))
731 fstatus[0].failed = 1;
732 else
734 for (size_t i = 0; i < nfiles; i++)
735 fstatus[i].failed = (! file[i] || STREQ (file[i], "-")
736 ? fstat (STDIN_FILENO, &fstatus[i].st)
737 : stat (file[i], &fstatus[i].st));
740 return fstatus;
743 /* Return a print width suitable for the NFILES files whose status is
744 recorded in FSTATUS. Optimize the same special case that
745 get_input_fstatus optimizes. */
747 ATTRIBUTE_PURE
748 static int
749 compute_number_width (size_t nfiles, struct fstatus const *fstatus)
751 int width = 1;
753 if (0 < nfiles && fstatus[0].failed <= 0)
755 int minimum_width = 1;
756 uintmax_t regular_total = 0;
758 for (size_t i = 0; i < nfiles; i++)
759 if (! fstatus[i].failed)
761 if (S_ISREG (fstatus[i].st.st_mode))
762 regular_total += fstatus[i].st.st_size;
763 else
764 minimum_width = 7;
767 for (; 10 <= regular_total; regular_total /= 10)
768 width++;
769 if (width < minimum_width)
770 width = minimum_width;
773 return width;
778 main (int argc, char **argv)
780 bool ok;
781 int optc;
782 size_t nfiles;
783 char **files;
784 char *files_from = nullptr;
785 struct fstatus *fstatus;
786 struct Tokens tok;
788 initialize_main (&argc, &argv);
789 set_program_name (argv[0]);
790 setlocale (LC_ALL, "");
791 bindtextdomain (PACKAGE, LOCALEDIR);
792 textdomain (PACKAGE);
794 atexit (close_stdout);
796 page_size = getpagesize ();
797 /* Line buffer stdout to ensure lines are written atomically and immediately
798 so that processes running in parallel do not intersperse their output. */
799 setvbuf (stdout, nullptr, _IOLBF, 0);
801 posixly_correct = (getenv ("POSIXLY_CORRECT") != nullptr);
803 print_lines = print_words = print_chars = print_bytes = false;
804 print_linelength = false;
805 total_lines = total_words = total_chars = total_bytes = max_line_length = 0;
807 while ((optc = getopt_long (argc, argv, "clLmw", longopts, nullptr)) != -1)
808 switch (optc)
810 case 'c':
811 print_bytes = true;
812 break;
814 case 'm':
815 print_chars = true;
816 break;
818 case 'l':
819 print_lines = true;
820 break;
822 case 'w':
823 print_words = true;
824 break;
826 case 'L':
827 print_linelength = true;
828 break;
830 case DEBUG_PROGRAM_OPTION:
831 debug = true;
832 break;
834 case FILES0_FROM_OPTION:
835 files_from = optarg;
836 break;
838 case TOTAL_OPTION:
839 total_mode = XARGMATCH ("--total", optarg, total_args, total_types);
840 break;
842 case_GETOPT_HELP_CHAR;
844 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
846 default:
847 usage (EXIT_FAILURE);
850 if (! (print_lines || print_words || print_chars || print_bytes
851 || print_linelength))
852 print_lines = print_words = print_bytes = true;
854 bool read_tokens = false;
855 struct argv_iterator *ai;
856 if (files_from)
858 FILE *stream;
860 /* When using --files0-from=F, you may not specify any files
861 on the command-line. */
862 if (optind < argc)
864 error (0, 0, _("extra operand %s"), quoteaf (argv[optind]));
865 fprintf (stderr, "%s\n",
866 _("file operands cannot be combined with --files0-from"));
867 usage (EXIT_FAILURE);
870 if (STREQ (files_from, "-"))
871 stream = stdin;
872 else
874 stream = fopen (files_from, "r");
875 if (stream == nullptr)
876 error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
877 quoteaf (files_from));
880 /* Read the file list into RAM if we can detect its size and that
881 size is reasonable. Otherwise, we'll read a name at a time. */
882 struct stat st;
883 if (fstat (fileno (stream), &st) == 0
884 && S_ISREG (st.st_mode)
885 && st.st_size <= MIN (10 * 1024 * 1024, physmem_available () / 2))
887 read_tokens = true;
888 readtokens0_init (&tok);
889 if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
890 error (EXIT_FAILURE, 0, _("cannot read file names from %s"),
891 quoteaf (files_from));
892 files = tok.tok;
893 nfiles = tok.n_tok;
894 ai = argv_iter_init_argv (files);
896 else
898 files = nullptr;
899 nfiles = 0;
900 ai = argv_iter_init_stream (stream);
903 else
905 static char *stdin_only[] = { nullptr };
906 files = (optind < argc ? argv + optind : stdin_only);
907 nfiles = (optind < argc ? argc - optind : 1);
908 ai = argv_iter_init_argv (files);
911 if (!ai)
912 xalloc_die ();
914 fstatus = get_input_fstatus (nfiles, files);
915 if (total_mode == total_only)
916 number_width = 1; /* No extra padding, since no alignment requirement. */
917 else
918 number_width = compute_number_width (nfiles, fstatus);
920 ok = true;
921 for (int i = 0; /* */; i++)
923 bool skip_file = false;
924 enum argv_iter_err ai_err;
925 char *file_name = argv_iter (ai, &ai_err);
926 if (!file_name)
928 switch (ai_err)
930 case AI_ERR_EOF:
931 goto argv_iter_done;
932 case AI_ERR_READ:
933 error (0, errno, _("%s: read error"),
934 quotef (files_from));
935 ok = false;
936 goto argv_iter_done;
937 case AI_ERR_MEM:
938 xalloc_die ();
939 default:
940 affirm (!"unexpected error code from argv_iter");
943 if (files_from && STREQ (files_from, "-") && STREQ (file_name, "-"))
945 /* Give a better diagnostic in an unusual case:
946 printf - | wc --files0-from=- */
947 error (0, 0, _("when reading file names from stdin, "
948 "no file name of %s allowed"),
949 quoteaf (file_name));
950 skip_file = true;
953 if (!file_name[0])
955 /* Diagnose a zero-length file name. When it's one
956 among many, knowing the record number may help.
957 FIXME: currently print the record number only with
958 --files0-from=FILE. Maybe do it for argv, too? */
959 if (files_from == nullptr)
960 error (0, 0, "%s", _("invalid zero-length file name"));
961 else
963 /* Using the standard 'filename:line-number:' prefix here is
964 not totally appropriate, since NUL is the separator, not NL,
965 but it might be better than nothing. */
966 unsigned long int file_number = argv_iter_n_args (ai);
967 error (0, 0, "%s:%lu: %s", quotef (files_from),
968 file_number, _("invalid zero-length file name"));
970 skip_file = true;
973 if (skip_file)
974 ok = false;
975 else
976 ok &= wc_file (file_name, &fstatus[nfiles ? i : 0]);
978 if (! nfiles)
979 fstatus[0].failed = 1;
981 argv_iter_done:
983 /* No arguments on the command line is fine. That means read from stdin.
984 However, no arguments on the --files0-from input stream is an error
985 means don't read anything. */
986 if (ok && !files_from && argv_iter_n_args (ai) == 0)
987 ok &= wc_file (nullptr, &fstatus[0]);
989 if (read_tokens)
990 readtokens0_free (&tok);
992 if (total_mode != total_never
993 && (total_mode != total_auto || 1 < argv_iter_n_args (ai)))
995 if (total_lines_overflow)
997 total_lines = UINTMAX_MAX;
998 error (0, EOVERFLOW, _("total lines"));
999 ok = false;
1001 if (total_words_overflow)
1003 total_words = UINTMAX_MAX;
1004 error (0, EOVERFLOW, _("total words"));
1005 ok = false;
1007 if (total_chars_overflow)
1009 total_chars = UINTMAX_MAX;
1010 error (0, EOVERFLOW, _("total characters"));
1011 ok = false;
1013 if (total_bytes_overflow)
1015 total_bytes = UINTMAX_MAX;
1016 error (0, EOVERFLOW, _("total bytes"));
1017 ok = false;
1020 write_counts (total_lines, total_words, total_chars, total_bytes,
1021 max_line_length,
1022 total_mode != total_only ? _("total") : nullptr);
1025 argv_iter_free (ai);
1027 free (fstatus);
1029 if (have_read_stdin && close (STDIN_FILENO) != 0)
1030 error (EXIT_FAILURE, errno, "-");
1032 return ok ? EXIT_SUCCESS : EXIT_FAILURE;