maint: use mbszero
[coreutils.git] / src / wc.c
blob1753acfc1649aa3ed7d857c2b694d410b9ef14f9
1 /* wc - print the number of lines, words, and bytes in files
2 Copyright (C) 1985-2023 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Paul Rubin, phr@ocf.berkeley.edu
18 and David MacKenzie, djm@gnu.ai.mit.edu. */
20 #include <config.h>
22 #include <stdckdint.h>
23 #include <stdio.h>
24 #include <getopt.h>
25 #include <sys/types.h>
26 #include <wchar.h>
27 #include <wctype.h>
29 #include "system.h"
30 #include "assure.h"
31 #include "argmatch.h"
32 #include "argv-iter.h"
33 #include "fadvise.h"
34 #include "physmem.h"
35 #include "readtokens0.h"
36 #include "safe-read.h"
37 #include "stat-size.h"
38 #include "xbinary-io.h"
40 #if !defined iswspace && !HAVE_ISWSPACE
41 # define iswspace(wc) \
42 ((wc) == to_uchar (wc) && isspace (to_uchar (wc)))
43 #endif
45 /* The official name of this program (e.g., no 'g' prefix). */
46 #define PROGRAM_NAME "wc"
48 #define AUTHORS \
49 proper_name ("Paul Rubin"), \
50 proper_name ("David MacKenzie")
52 /* Size of atomic reads. */
53 #define BUFFER_SIZE (16 * 1024)
55 #ifdef USE_AVX2_WC_LINECOUNT
56 /* From wc_avx2.c */
57 extern bool
58 wc_lines_avx2 (char const *file, int fd, uintmax_t *lines_out,
59 uintmax_t *bytes_out);
60 #endif
62 static bool debug;
64 /* Cumulative number of lines, words, chars and bytes in all files so far.
65 max_line_length is the maximum over all files processed so far. */
66 static uintmax_t total_lines;
67 static uintmax_t total_words;
68 static uintmax_t total_chars;
69 static uintmax_t total_bytes;
70 static uintmax_t total_lines_overflow;
71 static uintmax_t total_words_overflow;
72 static uintmax_t total_chars_overflow;
73 static uintmax_t total_bytes_overflow;
74 static uintmax_t max_line_length;
76 /* Which counts to print. */
77 static bool print_lines, print_words, print_chars, print_bytes;
78 static bool print_linelength;
80 /* The print width of each count. */
81 static int number_width;
83 /* True if we have ever read the standard input. */
84 static bool have_read_stdin;
86 /* Used to determine if file size can be determined without reading. */
87 static size_t page_size;
89 /* Enable to _not_ treat non breaking space as a word separator. */
90 static bool posixly_correct;
92 /* The result of calling fstat or stat on a file descriptor or file. */
93 struct fstatus
95 /* If positive, fstat or stat has not been called yet. Otherwise,
96 this is the value returned from fstat or stat. */
97 int failed;
99 /* If FAILED is zero, this is the file's status. */
100 struct stat st;
103 /* For long options that have no equivalent short option, use a
104 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
105 enum
107 DEBUG_PROGRAM_OPTION = CHAR_MAX + 1,
108 FILES0_FROM_OPTION,
109 TOTAL_OPTION,
112 static struct option const longopts[] =
114 {"bytes", no_argument, nullptr, 'c'},
115 {"chars", no_argument, nullptr, 'm'},
116 {"lines", no_argument, nullptr, 'l'},
117 {"words", no_argument, nullptr, 'w'},
118 {"debug", no_argument, nullptr, DEBUG_PROGRAM_OPTION},
119 {"files0-from", required_argument, nullptr, FILES0_FROM_OPTION},
120 {"max-line-length", no_argument, nullptr, 'L'},
121 {"total", required_argument, nullptr, TOTAL_OPTION},
122 {GETOPT_HELP_OPTION_DECL},
123 {GETOPT_VERSION_OPTION_DECL},
124 {nullptr, 0, nullptr, 0}
127 enum total_type
129 total_auto, /* 0: default or --total=auto */
130 total_always, /* 1: --total=always */
131 total_only, /* 2: --total=only */
132 total_never /* 3: --total=never */
134 static char const *const total_args[] =
136 "auto", "always", "only", "never", nullptr
138 static enum total_type const total_types[] =
140 total_auto, total_always, total_only, total_never
142 ARGMATCH_VERIFY (total_args, total_types);
143 static enum total_type total_mode = total_auto;
145 #ifdef USE_AVX2_WC_LINECOUNT
146 static bool
147 avx2_supported (void)
149 bool avx_enabled = 0 < __builtin_cpu_supports ("avx2");
151 if (debug)
152 error (0, 0, (avx_enabled
153 ? _("using avx2 hardware support")
154 : _("avx2 support not detected")));
156 return avx_enabled;
158 #endif
160 void
161 usage (int status)
163 if (status != EXIT_SUCCESS)
164 emit_try_help ();
165 else
167 printf (_("\
168 Usage: %s [OPTION]... [FILE]...\n\
169 or: %s [OPTION]... --files0-from=F\n\
171 program_name, program_name);
172 fputs (_("\
173 Print newline, word, and byte counts for each FILE, and a total line if\n\
174 more than one FILE is specified. A word is a non-zero-length sequence of\n\
175 printable characters delimited by white space.\n\
176 "), stdout);
178 emit_stdin_note ();
180 fputs (_("\
182 The options below may be used to select which counts are printed, always in\n\
183 the following order: newline, word, character, byte, maximum line length.\n\
184 -c, --bytes print the byte counts\n\
185 -m, --chars print the character counts\n\
186 -l, --lines print the newline counts\n\
187 "), stdout);
188 fputs (_("\
189 --files0-from=F read input from the files specified by\n\
190 NUL-terminated names in file F;\n\
191 If F is - then read names from standard input\n\
192 -L, --max-line-length print the maximum display width\n\
193 -w, --words print the word counts\n\
194 "), stdout);
195 fputs (_("\
196 --total=WHEN when to print a line with total counts;\n\
197 WHEN can be: auto, always, only, never\n\
198 "), stdout);
199 fputs (HELP_OPTION_DESCRIPTION, stdout);
200 fputs (VERSION_OPTION_DESCRIPTION, stdout);
201 emit_ancillary_info (PROGRAM_NAME);
203 exit (status);
206 /* Return non zero if a non breaking space. */
207 ATTRIBUTE_PURE
208 static int
209 iswnbspace (wint_t wc)
211 return ! posixly_correct
212 && (wc == 0x00A0 || wc == 0x2007
213 || wc == 0x202F || wc == 0x2060);
216 static int
217 isnbspace (int c)
219 return iswnbspace (btowc (c));
222 /* FILE is the name of the file (or null for standard input)
223 associated with the specified counters. */
224 static void
225 write_counts (uintmax_t lines,
226 uintmax_t words,
227 uintmax_t chars,
228 uintmax_t bytes,
229 uintmax_t linelength,
230 char const *file)
232 static char const format_sp_int[] = " %*s";
233 char const *format_int = format_sp_int + 1;
234 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
236 if (print_lines)
238 printf (format_int, number_width, umaxtostr (lines, buf));
239 format_int = format_sp_int;
241 if (print_words)
243 printf (format_int, number_width, umaxtostr (words, buf));
244 format_int = format_sp_int;
246 if (print_chars)
248 printf (format_int, number_width, umaxtostr (chars, buf));
249 format_int = format_sp_int;
251 if (print_bytes)
253 printf (format_int, number_width, umaxtostr (bytes, buf));
254 format_int = format_sp_int;
256 if (print_linelength)
258 printf (format_int, number_width, umaxtostr (linelength, buf));
260 if (file)
261 printf (" %s", strchr (file, '\n') ? quotef (file) : file);
262 putchar ('\n');
265 static bool
266 wc_lines (char const *file, int fd, uintmax_t *lines_out, uintmax_t *bytes_out)
268 size_t bytes_read;
269 uintmax_t lines, bytes;
270 char buf[BUFFER_SIZE + 1];
271 bool long_lines = false;
273 if (!lines_out || !bytes_out)
275 return false;
278 lines = bytes = 0;
280 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
283 if (bytes_read == SAFE_READ_ERROR)
285 error (0, errno, "%s", quotef (file));
286 return false;
289 bytes += bytes_read;
291 char *p = buf;
292 char *end = buf + bytes_read;
293 uintmax_t plines = lines;
295 if (! long_lines)
297 /* Avoid function call overhead for shorter lines. */
298 while (p != end)
299 lines += *p++ == '\n';
301 else
303 /* rawmemchr is more efficient with longer lines. */
304 *end = '\n';
305 while ((p = rawmemchr (p, '\n')) < end)
307 ++p;
308 ++lines;
312 /* If the average line length in the block is >= 15, then use
313 memchr for the next block, where system specific optimizations
314 may outweigh function call overhead.
315 FIXME: This line length was determined in 2015, on both
316 x86_64 and ppc64, but it's worth re-evaluating in future with
317 newer compilers, CPUs, or memchr() implementations etc. */
318 if (lines - plines <= bytes_read / 15)
319 long_lines = true;
320 else
321 long_lines = false;
324 *bytes_out = bytes;
325 *lines_out = lines;
327 return true;
330 /* Count words. FILE_X is the name of the file (or null for standard
331 input) that is open on descriptor FD. *FSTATUS is its status.
332 CURRENT_POS is the current file offset if known, negative if unknown.
333 Return true if successful. */
334 static bool
335 wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
337 bool ok = true;
338 char buf[BUFFER_SIZE + 1];
339 size_t bytes_read;
340 uintmax_t lines, words, chars, bytes, linelength;
341 bool count_bytes, count_chars, count_complicated;
342 char const *file = file_x ? file_x : _("standard input");
344 lines = words = chars = bytes = linelength = 0;
346 /* If in the current locale, chars are equivalent to bytes, we prefer
347 counting bytes, because that's easier. */
348 #if MB_LEN_MAX > 1
349 if (MB_CUR_MAX > 1)
351 count_bytes = print_bytes;
352 count_chars = print_chars;
354 else
355 #endif
357 count_bytes = print_bytes || print_chars;
358 count_chars = false;
360 count_complicated = print_words || print_linelength;
362 /* Advise the kernel of our access pattern only if we will read(). */
363 if (!count_bytes || count_chars || print_lines || count_complicated)
364 fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
366 /* When counting only bytes, save some line- and word-counting
367 overhead. If FD is a 'regular' Unix file, using lseek is enough
368 to get its 'size' in bytes. Otherwise, read blocks of BUFFER_SIZE
369 bytes at a time until EOF. Note that the 'size' (number of bytes)
370 that wc reports is smaller than stats.st_size when the file is not
371 positioned at its beginning. That's why the lseek calls below are
372 necessary. For example the command
373 '(dd ibs=99k skip=1 count=0; ./wc -c) < /etc/group'
374 should make wc report '0' bytes. */
376 if (count_bytes && !count_chars && !print_lines && !count_complicated)
378 bool skip_read = false;
380 if (0 < fstatus->failed)
381 fstatus->failed = fstat (fd, &fstatus->st);
383 /* For sized files, seek to one st_blksize before EOF rather than to EOF.
384 This works better for files in proc-like file systems where
385 the size is only approximate. */
386 if (! fstatus->failed && usable_st_size (&fstatus->st)
387 && 0 <= fstatus->st.st_size)
389 off_t end_pos = fstatus->st.st_size;
390 if (current_pos < 0)
391 current_pos = lseek (fd, 0, SEEK_CUR);
393 if (end_pos % page_size)
395 /* We only need special handling of /proc and /sys files etc.
396 when they're a multiple of PAGE_SIZE. In the common case
397 for files with st_size not a multiple of PAGE_SIZE,
398 it's more efficient and accurate to use st_size.
400 Be careful here. The current position may actually be
401 beyond the end of the file. As in the example above. */
403 bytes = end_pos < current_pos ? 0 : end_pos - current_pos;
404 if (bytes && 0 <= lseek (fd, bytes, SEEK_CUR))
405 skip_read = true;
406 else
407 bytes = 0;
409 else
411 off_t hi_pos = (end_pos
412 - end_pos % (STP_BLKSIZE (&fstatus->st) + 1));
413 if (0 <= current_pos && current_pos < hi_pos
414 && 0 <= lseek (fd, hi_pos, SEEK_CUR))
415 bytes = hi_pos - current_pos;
419 if (! skip_read)
421 fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
422 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
424 if (bytes_read == SAFE_READ_ERROR)
426 error (0, errno, "%s", quotef (file));
427 ok = false;
428 break;
430 bytes += bytes_read;
434 else if (!count_chars && !count_complicated)
436 #ifdef USE_AVX2_WC_LINECOUNT
437 static bool (*wc_lines_p) (char const *, int, uintmax_t *, uintmax_t *);
438 if (!wc_lines_p)
439 wc_lines_p = avx2_supported () ? wc_lines_avx2 : wc_lines;
440 #else
441 bool (*wc_lines_p) (char const *, int, uintmax_t *, uintmax_t *)
442 = wc_lines;
443 #endif
445 /* Use a separate loop when counting only lines or lines and bytes --
446 but not chars or words. */
447 ok = wc_lines_p (file, fd, &lines, &bytes);
449 #if MB_LEN_MAX > 1
450 # define SUPPORT_OLD_MBRTOWC 1
451 else if (MB_CUR_MAX > 1)
453 bool in_word = false;
454 uintmax_t linepos = 0;
455 mbstate_t state; mbszero (&state);
456 bool in_shift = false;
457 # if SUPPORT_OLD_MBRTOWC
458 /* Back-up the state before each multibyte character conversion and
459 move the last incomplete character of the buffer to the front
460 of the buffer. This is needed because we don't know whether
461 the 'mbrtowc' function updates the state when it returns -2, --
462 this is the ISO C 99 and glibc-2.2 behavior - or not - amended
463 ANSI C, glibc-2.1 and Solaris 5.7 behavior. We don't have an
464 autoconf test for this, yet. */
465 size_t prev = 0; /* number of bytes carried over from previous round */
466 # else
467 const size_t prev = 0;
468 # endif
470 while ((bytes_read = safe_read (fd, buf + prev, BUFFER_SIZE - prev)) > 0)
472 char const *p;
473 # if SUPPORT_OLD_MBRTOWC
474 mbstate_t backup_state;
475 # endif
476 if (bytes_read == SAFE_READ_ERROR)
478 error (0, errno, "%s", quotef (file));
479 ok = false;
480 break;
483 bytes += bytes_read;
484 p = buf;
485 bytes_read += prev;
488 wchar_t wide_char;
489 size_t n;
490 bool wide = true;
492 if (!in_shift && 0 <= *p && *p < 0x80)
494 /* Handle most ASCII characters quickly, without calling
495 mbrtowc(). */
496 n = 1;
497 wide_char = *p;
498 wide = false;
500 else
502 in_shift = true;
503 # if SUPPORT_OLD_MBRTOWC
504 backup_state = state;
505 # endif
506 n = mbrtowc (&wide_char, p, bytes_read, &state);
507 if (n == (size_t) -2)
509 # if SUPPORT_OLD_MBRTOWC
510 state = backup_state;
511 # endif
512 break;
514 if (n == (size_t) -1)
516 /* Remember that we read a byte, but don't complain
517 about the error. Because of the decoding error,
518 this is a considered to be byte but not a
519 character (that is, chars is not incremented). */
520 p++;
521 bytes_read--;
522 continue;
524 if (mbsinit (&state))
525 in_shift = false;
526 if (n == 0)
528 wide_char = 0;
529 n = 1;
533 switch (wide_char)
535 case '\n':
536 lines++;
537 FALLTHROUGH;
538 case '\r':
539 case '\f':
540 if (linepos > linelength)
541 linelength = linepos;
542 linepos = 0;
543 goto mb_word_separator;
544 case '\t':
545 linepos += 8 - (linepos % 8);
546 goto mb_word_separator;
547 case ' ':
548 linepos++;
549 FALLTHROUGH;
550 case '\v':
551 mb_word_separator:
552 words += in_word;
553 in_word = false;
554 break;
555 default:
556 if (wide && iswprint (wide_char))
558 /* wcwidth can be expensive on OSX for example,
559 so avoid if not needed. */
560 if (print_linelength)
562 int width = wcwidth (wide_char);
563 if (width > 0)
564 linepos += width;
566 if (iswspace (wide_char) || iswnbspace (wide_char))
567 goto mb_word_separator;
568 in_word = true;
570 else if (!wide && isprint (to_uchar (*p)))
572 linepos++;
573 if (isspace (to_uchar (*p)))
574 goto mb_word_separator;
575 in_word = true;
577 break;
580 p += n;
581 bytes_read -= n;
582 chars++;
584 while (bytes_read > 0);
586 # if SUPPORT_OLD_MBRTOWC
587 if (bytes_read > 0)
589 if (bytes_read == BUFFER_SIZE)
591 /* Encountered a very long redundant shift sequence. */
592 p++;
593 bytes_read--;
595 memmove (buf, p, bytes_read);
597 prev = bytes_read;
598 # endif
600 if (linepos > linelength)
601 linelength = linepos;
602 words += in_word;
604 #endif
605 else
607 bool in_word = false;
608 uintmax_t linepos = 0;
610 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
612 char const *p = buf;
613 if (bytes_read == SAFE_READ_ERROR)
615 error (0, errno, "%s", quotef (file));
616 ok = false;
617 break;
620 bytes += bytes_read;
623 switch (*p++)
625 case '\n':
626 lines++;
627 FALLTHROUGH;
628 case '\r':
629 case '\f':
630 if (linepos > linelength)
631 linelength = linepos;
632 linepos = 0;
633 goto word_separator;
634 case '\t':
635 linepos += 8 - (linepos % 8);
636 goto word_separator;
637 case ' ':
638 linepos++;
639 FALLTHROUGH;
640 case '\v':
641 word_separator:
642 words += in_word;
643 in_word = false;
644 break;
645 default:
646 if (isprint (to_uchar (p[-1])))
648 linepos++;
649 if (isspace (to_uchar (p[-1]))
650 || isnbspace (to_uchar (p[-1])))
651 goto word_separator;
652 in_word = true;
654 break;
657 while (--bytes_read);
659 if (linepos > linelength)
660 linelength = linepos;
661 words += in_word;
664 if (count_chars < print_chars)
665 chars = bytes;
667 if (total_mode != total_only)
668 write_counts (lines, words, chars, bytes, linelength, file_x);
670 if (ckd_add (&total_lines, total_lines, lines))
671 total_lines_overflow = true;
672 if (ckd_add (&total_words, total_words, words))
673 total_words_overflow = true;
674 if (ckd_add (&total_chars, total_chars, chars))
675 total_chars_overflow = true;
676 if (ckd_add (&total_bytes, total_bytes, bytes))
677 total_bytes_overflow = true;
679 if (linelength > max_line_length)
680 max_line_length = linelength;
682 return ok;
685 static bool
686 wc_file (char const *file, struct fstatus *fstatus)
688 if (! file || STREQ (file, "-"))
690 have_read_stdin = true;
691 xset_binary_mode (STDIN_FILENO, O_BINARY);
692 return wc (STDIN_FILENO, file, fstatus, -1);
694 else
696 int fd = open (file, O_RDONLY | O_BINARY);
697 if (fd == -1)
699 error (0, errno, "%s", quotef (file));
700 return false;
702 else
704 bool ok = wc (fd, file, fstatus, 0);
705 if (close (fd) != 0)
707 error (0, errno, "%s", quotef (file));
708 return false;
710 return ok;
715 /* Return the file status for the NFILES files addressed by FILE.
716 Optimize the case where only one number is printed, for just one
717 file; in that case we can use a print width of 1, so we don't need
718 to stat the file. Handle the case of (nfiles == 0) in the same way;
719 that happens when we don't know how long the list of file names will be. */
721 static struct fstatus *
722 get_input_fstatus (size_t nfiles, char *const *file)
724 struct fstatus *fstatus = xnmalloc (nfiles ? nfiles : 1, sizeof *fstatus);
726 if (nfiles == 0
727 || (nfiles == 1
728 && ((print_lines + print_words + print_chars
729 + print_bytes + print_linelength)
730 == 1)))
731 fstatus[0].failed = 1;
732 else
734 for (size_t i = 0; i < nfiles; i++)
735 fstatus[i].failed = (! file[i] || STREQ (file[i], "-")
736 ? fstat (STDIN_FILENO, &fstatus[i].st)
737 : stat (file[i], &fstatus[i].st));
740 return fstatus;
743 /* Return a print width suitable for the NFILES files whose status is
744 recorded in FSTATUS. Optimize the same special case that
745 get_input_fstatus optimizes. */
747 ATTRIBUTE_PURE
748 static int
749 compute_number_width (size_t nfiles, struct fstatus const *fstatus)
751 int width = 1;
753 if (0 < nfiles && fstatus[0].failed <= 0)
755 int minimum_width = 1;
756 uintmax_t regular_total = 0;
758 for (size_t i = 0; i < nfiles; i++)
759 if (! fstatus[i].failed)
761 if (S_ISREG (fstatus[i].st.st_mode))
762 regular_total += fstatus[i].st.st_size;
763 else
764 minimum_width = 7;
767 for (; 10 <= regular_total; regular_total /= 10)
768 width++;
769 if (width < minimum_width)
770 width = minimum_width;
773 return width;
778 main (int argc, char **argv)
780 bool ok;
781 int optc;
782 size_t nfiles;
783 char **files;
784 char *files_from = nullptr;
785 struct fstatus *fstatus;
786 struct Tokens tok;
788 initialize_main (&argc, &argv);
789 set_program_name (argv[0]);
790 setlocale (LC_ALL, "");
791 bindtextdomain (PACKAGE, LOCALEDIR);
792 textdomain (PACKAGE);
794 atexit (close_stdout);
796 page_size = getpagesize ();
797 /* Line buffer stdout to ensure lines are written atomically and immediately
798 so that processes running in parallel do not intersperse their output. */
799 setvbuf (stdout, nullptr, _IOLBF, 0);
801 posixly_correct = (getenv ("POSIXLY_CORRECT") != nullptr);
803 print_lines = print_words = print_chars = print_bytes = false;
804 print_linelength = false;
805 total_lines = total_words = total_chars = total_bytes = max_line_length = 0;
807 while ((optc = getopt_long (argc, argv, "clLmw", longopts, nullptr)) != -1)
808 switch (optc)
810 case 'c':
811 print_bytes = true;
812 break;
814 case 'm':
815 print_chars = true;
816 break;
818 case 'l':
819 print_lines = true;
820 break;
822 case 'w':
823 print_words = true;
824 break;
826 case 'L':
827 print_linelength = true;
828 break;
830 case DEBUG_PROGRAM_OPTION:
831 debug = true;
832 break;
834 case FILES0_FROM_OPTION:
835 files_from = optarg;
836 break;
838 case TOTAL_OPTION:
839 total_mode = XARGMATCH ("--total", optarg, total_args, total_types);
840 break;
842 case_GETOPT_HELP_CHAR;
844 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
846 default:
847 usage (EXIT_FAILURE);
850 if (! (print_lines || print_words || print_chars || print_bytes
851 || print_linelength))
852 print_lines = print_words = print_bytes = true;
854 bool read_tokens = false;
855 struct argv_iterator *ai;
856 if (files_from)
858 FILE *stream;
860 /* When using --files0-from=F, you may not specify any files
861 on the command-line. */
862 if (optind < argc)
864 error (0, 0, _("extra operand %s"), quoteaf (argv[optind]));
865 fprintf (stderr, "%s\n",
866 _("file operands cannot be combined with --files0-from"));
867 usage (EXIT_FAILURE);
870 if (STREQ (files_from, "-"))
871 stream = stdin;
872 else
874 stream = fopen (files_from, "r");
875 if (stream == nullptr)
876 error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
877 quoteaf (files_from));
880 /* Read the file list into RAM if we can detect its size and that
881 size is reasonable. Otherwise, we'll read a name at a time. */
882 struct stat st;
883 if (fstat (fileno (stream), &st) == 0
884 && S_ISREG (st.st_mode)
885 && st.st_size <= MIN (10 * 1024 * 1024, physmem_available () / 2))
887 read_tokens = true;
888 readtokens0_init (&tok);
889 if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
890 error (EXIT_FAILURE, 0, _("cannot read file names from %s"),
891 quoteaf (files_from));
892 files = tok.tok;
893 nfiles = tok.n_tok;
894 ai = argv_iter_init_argv (files);
896 else
898 files = nullptr;
899 nfiles = 0;
900 ai = argv_iter_init_stream (stream);
903 else
905 static char *stdin_only[] = { nullptr };
906 files = (optind < argc ? argv + optind : stdin_only);
907 nfiles = (optind < argc ? argc - optind : 1);
908 ai = argv_iter_init_argv (files);
911 if (!ai)
912 xalloc_die ();
914 fstatus = get_input_fstatus (nfiles, files);
915 if (total_mode == total_only)
916 number_width = 1; /* No extra padding, since no alignment requirement. */
917 else
918 number_width = compute_number_width (nfiles, fstatus);
920 ok = true;
921 for (int i = 0; /* */; i++)
923 bool skip_file = false;
924 enum argv_iter_err ai_err;
925 char *file_name = argv_iter (ai, &ai_err);
926 if (!file_name)
928 switch (ai_err)
930 case AI_ERR_EOF:
931 goto argv_iter_done;
932 case AI_ERR_READ:
933 error (0, errno, _("%s: read error"),
934 quotef (files_from));
935 ok = false;
936 goto argv_iter_done;
937 case AI_ERR_MEM:
938 xalloc_die ();
939 default:
940 affirm (!"unexpected error code from argv_iter");
943 if (files_from && STREQ (files_from, "-") && STREQ (file_name, "-"))
945 /* Give a better diagnostic in an unusual case:
946 printf - | wc --files0-from=- */
947 error (0, 0, _("when reading file names from stdin, "
948 "no file name of %s allowed"),
949 quoteaf (file_name));
950 skip_file = true;
953 if (!file_name[0])
955 /* Diagnose a zero-length file name. When it's one
956 among many, knowing the record number may help.
957 FIXME: currently print the record number only with
958 --files0-from=FILE. Maybe do it for argv, too? */
959 if (files_from == nullptr)
960 error (0, 0, "%s", _("invalid zero-length file name"));
961 else
963 /* Using the standard 'filename:line-number:' prefix here is
964 not totally appropriate, since NUL is the separator, not NL,
965 but it might be better than nothing. */
966 unsigned long int file_number = argv_iter_n_args (ai);
967 error (0, 0, "%s:%lu: %s", quotef (files_from),
968 file_number, _("invalid zero-length file name"));
970 skip_file = true;
973 if (skip_file)
974 ok = false;
975 else
976 ok &= wc_file (file_name, &fstatus[nfiles ? i : 0]);
978 if (! nfiles)
979 fstatus[0].failed = 1;
981 argv_iter_done:
983 /* No arguments on the command line is fine. That means read from stdin.
984 However, no arguments on the --files0-from input stream is an error
985 means don't read anything. */
986 if (ok && !files_from && argv_iter_n_args (ai) == 0)
987 ok &= wc_file (nullptr, &fstatus[0]);
989 if (read_tokens)
990 readtokens0_free (&tok);
992 if (total_mode != total_never
993 && (total_mode != total_auto || 1 < argv_iter_n_args (ai)))
995 if (total_lines_overflow)
997 total_lines = UINTMAX_MAX;
998 error (0, EOVERFLOW, _("total lines"));
999 ok = false;
1001 if (total_words_overflow)
1003 total_words = UINTMAX_MAX;
1004 error (0, EOVERFLOW, _("total words"));
1005 ok = false;
1007 if (total_chars_overflow)
1009 total_chars = UINTMAX_MAX;
1010 error (0, EOVERFLOW, _("total characters"));
1011 ok = false;
1013 if (total_bytes_overflow)
1015 total_bytes = UINTMAX_MAX;
1016 error (0, EOVERFLOW, _("total bytes"));
1017 ok = false;
1020 write_counts (total_lines, total_words, total_chars, total_bytes,
1021 max_line_length,
1022 total_mode != total_only ? _("total") : nullptr);
1025 argv_iter_free (ai);
1027 free (fstatus);
1029 if (have_read_stdin && close (STDIN_FILENO) != 0)
1030 error (EXIT_FAILURE, errno, "-");
1032 return ok ? EXIT_SUCCESS : EXIT_FAILURE;