cksum,wc: don’t include <cpuid.h>
[coreutils.git] / src / wc.c
blobebe83af4dfe587e84e263e667ba6a3dac4f2576b
1 /* wc - print the number of lines, words, and bytes in files
2 Copyright (C) 1985-2023 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Paul Rubin, phr@ocf.berkeley.edu
18 and David MacKenzie, djm@gnu.ai.mit.edu. */
20 #include <config.h>
22 #include <stdio.h>
23 #include <assert.h>
24 #include <getopt.h>
25 #include <sys/types.h>
26 #include <wchar.h>
27 #include <wctype.h>
29 #include "system.h"
30 #include "argmatch.h"
31 #include "argv-iter.h"
32 #include "die.h"
33 #include "error.h"
34 #include "fadvise.h"
35 #include "mbchar.h"
36 #include "physmem.h"
37 #include "readtokens0.h"
38 #include "safe-read.h"
39 #include "stat-size.h"
40 #include "xbinary-io.h"
42 #if !defined iswspace && !HAVE_ISWSPACE
43 # define iswspace(wc) \
44 ((wc) == to_uchar (wc) && isspace (to_uchar (wc)))
45 #endif
47 /* The official name of this program (e.g., no 'g' prefix). */
48 #define PROGRAM_NAME "wc"
50 #define AUTHORS \
51 proper_name ("Paul Rubin"), \
52 proper_name ("David MacKenzie")
54 /* Size of atomic reads. */
55 #define BUFFER_SIZE (16 * 1024)
57 static bool
58 wc_lines (char const *file, int fd, uintmax_t *lines_out,
59 uintmax_t *bytes_out);
60 #ifdef USE_AVX2_WC_LINECOUNT
61 /* From wc_avx2.c */
62 extern bool
63 wc_lines_avx2 (char const *file, int fd, uintmax_t *lines_out,
64 uintmax_t *bytes_out);
65 #endif
66 static bool
67 (*wc_lines_p) (char const *file, int fd, uintmax_t *lines_out,
68 uintmax_t *bytes_out) = wc_lines;
70 static bool debug;
72 /* Cumulative number of lines, words, chars and bytes in all files so far.
73 max_line_length is the maximum over all files processed so far. */
74 static uintmax_t total_lines;
75 static uintmax_t total_words;
76 static uintmax_t total_chars;
77 static uintmax_t total_bytes;
78 static uintmax_t total_lines_overflow;
79 static uintmax_t total_words_overflow;
80 static uintmax_t total_chars_overflow;
81 static uintmax_t total_bytes_overflow;
82 static uintmax_t max_line_length;
84 /* Which counts to print. */
85 static bool print_lines, print_words, print_chars, print_bytes;
86 static bool print_linelength;
88 /* The print width of each count. */
89 static int number_width;
91 /* True if we have ever read the standard input. */
92 static bool have_read_stdin;
94 /* Used to determine if file size can be determined without reading. */
95 static size_t page_size;
97 /* Enable to _not_ treat non breaking space as a word separator. */
98 static bool posixly_correct;
100 /* The result of calling fstat or stat on a file descriptor or file. */
101 struct fstatus
103 /* If positive, fstat or stat has not been called yet. Otherwise,
104 this is the value returned from fstat or stat. */
105 int failed;
107 /* If FAILED is zero, this is the file's status. */
108 struct stat st;
111 /* For long options that have no equivalent short option, use a
112 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
113 enum
115 DEBUG_PROGRAM_OPTION = CHAR_MAX + 1,
116 FILES0_FROM_OPTION,
117 TOTAL_OPTION,
120 static struct option const longopts[] =
122 {"bytes", no_argument, NULL, 'c'},
123 {"chars", no_argument, NULL, 'm'},
124 {"lines", no_argument, NULL, 'l'},
125 {"words", no_argument, NULL, 'w'},
126 {"debug", no_argument, NULL, DEBUG_PROGRAM_OPTION},
127 {"files0-from", required_argument, NULL, FILES0_FROM_OPTION},
128 {"max-line-length", no_argument, NULL, 'L'},
129 {"total", required_argument, NULL, TOTAL_OPTION},
130 {GETOPT_HELP_OPTION_DECL},
131 {GETOPT_VERSION_OPTION_DECL},
132 {NULL, 0, NULL, 0}
135 enum total_type
137 total_auto, /* 0: default or --total=auto */
138 total_always, /* 1: --total=always */
139 total_only, /* 2: --total=only */
140 total_never /* 3: --total=never */
142 static char const *const total_args[] =
144 "auto", "always", "only", "never", NULL
146 static enum total_type const total_types[] =
148 total_auto, total_always, total_only, total_never
150 ARGMATCH_VERIFY (total_args, total_types);
151 static enum total_type total_mode = total_auto;
153 #ifdef USE_AVX2_WC_LINECOUNT
154 static bool
155 avx2_supported (void)
157 bool avx_enabled = 0 < __builtin_cpu_supports ("avx2");
159 if (debug)
160 error (0, 0, (avx_enabled
161 ? _("using avx2 hardware support")
162 : _("avx2 support not detected")));
164 return avx_enabled;
166 #endif
168 void
169 usage (int status)
171 if (status != EXIT_SUCCESS)
172 emit_try_help ();
173 else
175 printf (_("\
176 Usage: %s [OPTION]... [FILE]...\n\
177 or: %s [OPTION]... --files0-from=F\n\
179 program_name, program_name);
180 fputs (_("\
181 Print newline, word, and byte counts for each FILE, and a total line if\n\
182 more than one FILE is specified. A word is a non-zero-length sequence of\n\
183 printable characters delimited by white space.\n\
184 "), stdout);
186 emit_stdin_note ();
188 fputs (_("\
190 The options below may be used to select which counts are printed, always in\n\
191 the following order: newline, word, character, byte, maximum line length.\n\
192 -c, --bytes print the byte counts\n\
193 -m, --chars print the character counts\n\
194 -l, --lines print the newline counts\n\
195 "), stdout);
196 fputs (_("\
197 --files0-from=F read input from the files specified by\n\
198 NUL-terminated names in file F;\n\
199 If F is - then read names from standard input\n\
200 -L, --max-line-length print the maximum display width\n\
201 -w, --words print the word counts\n\
202 "), stdout);
203 fputs (_("\
204 --total=WHEN when to print a line with total counts;\n\
205 WHEN can be: auto, always, only, never\n\
206 "), stdout);
207 fputs (HELP_OPTION_DESCRIPTION, stdout);
208 fputs (VERSION_OPTION_DESCRIPTION, stdout);
209 emit_ancillary_info (PROGRAM_NAME);
211 exit (status);
214 /* Return non zero if a non breaking space. */
215 ATTRIBUTE_PURE
216 static int
217 iswnbspace (wint_t wc)
219 return ! posixly_correct
220 && (wc == 0x00A0 || wc == 0x2007
221 || wc == 0x202F || wc == 0x2060);
224 static int
225 isnbspace (int c)
227 return iswnbspace (btowc (c));
230 /* FILE is the name of the file (or NULL for standard input)
231 associated with the specified counters. */
232 static void
233 write_counts (uintmax_t lines,
234 uintmax_t words,
235 uintmax_t chars,
236 uintmax_t bytes,
237 uintmax_t linelength,
238 char const *file)
240 static char const format_sp_int[] = " %*s";
241 char const *format_int = format_sp_int + 1;
242 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
244 if (print_lines)
246 printf (format_int, number_width, umaxtostr (lines, buf));
247 format_int = format_sp_int;
249 if (print_words)
251 printf (format_int, number_width, umaxtostr (words, buf));
252 format_int = format_sp_int;
254 if (print_chars)
256 printf (format_int, number_width, umaxtostr (chars, buf));
257 format_int = format_sp_int;
259 if (print_bytes)
261 printf (format_int, number_width, umaxtostr (bytes, buf));
262 format_int = format_sp_int;
264 if (print_linelength)
266 printf (format_int, number_width, umaxtostr (linelength, buf));
268 if (file)
269 printf (" %s", strchr (file, '\n') ? quotef (file) : file);
270 putchar ('\n');
273 static bool
274 wc_lines (char const *file, int fd, uintmax_t *lines_out, uintmax_t *bytes_out)
276 size_t bytes_read;
277 uintmax_t lines, bytes;
278 char buf[BUFFER_SIZE + 1];
279 bool long_lines = false;
281 if (!lines_out || !bytes_out)
283 return false;
286 lines = bytes = 0;
288 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
291 if (bytes_read == SAFE_READ_ERROR)
293 error (0, errno, "%s", quotef (file));
294 return false;
297 bytes += bytes_read;
299 char *p = buf;
300 char *end = buf + bytes_read;
301 uintmax_t plines = lines;
303 if (! long_lines)
305 /* Avoid function call overhead for shorter lines. */
306 while (p != end)
307 lines += *p++ == '\n';
309 else
311 /* rawmemchr is more efficient with longer lines. */
312 *end = '\n';
313 while ((p = rawmemchr (p, '\n')) < end)
315 ++p;
316 ++lines;
320 /* If the average line length in the block is >= 15, then use
321 memchr for the next block, where system specific optimizations
322 may outweigh function call overhead.
323 FIXME: This line length was determined in 2015, on both
324 x86_64 and ppc64, but it's worth re-evaluating in future with
325 newer compilers, CPUs, or memchr() implementations etc. */
326 if (lines - plines <= bytes_read / 15)
327 long_lines = true;
328 else
329 long_lines = false;
332 *bytes_out = bytes;
333 *lines_out = lines;
335 return true;
338 /* Count words. FILE_X is the name of the file (or NULL for standard
339 input) that is open on descriptor FD. *FSTATUS is its status.
340 CURRENT_POS is the current file offset if known, negative if unknown.
341 Return true if successful. */
342 static bool
343 wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
345 bool ok = true;
346 char buf[BUFFER_SIZE + 1];
347 size_t bytes_read;
348 uintmax_t lines, words, chars, bytes, linelength;
349 bool count_bytes, count_chars, count_complicated;
350 char const *file = file_x ? file_x : _("standard input");
352 lines = words = chars = bytes = linelength = 0;
354 /* If in the current locale, chars are equivalent to bytes, we prefer
355 counting bytes, because that's easier. */
356 #if MB_LEN_MAX > 1
357 if (MB_CUR_MAX > 1)
359 count_bytes = print_bytes;
360 count_chars = print_chars;
362 else
363 #endif
365 count_bytes = print_bytes || print_chars;
366 count_chars = false;
368 count_complicated = print_words || print_linelength;
370 /* Advise the kernel of our access pattern only if we will read(). */
371 if (!count_bytes || count_chars || print_lines || count_complicated)
372 fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
374 /* When counting only bytes, save some line- and word-counting
375 overhead. If FD is a 'regular' Unix file, using lseek is enough
376 to get its 'size' in bytes. Otherwise, read blocks of BUFFER_SIZE
377 bytes at a time until EOF. Note that the 'size' (number of bytes)
378 that wc reports is smaller than stats.st_size when the file is not
379 positioned at its beginning. That's why the lseek calls below are
380 necessary. For example the command
381 '(dd ibs=99k skip=1 count=0; ./wc -c) < /etc/group'
382 should make wc report '0' bytes. */
384 if (count_bytes && !count_chars && !print_lines && !count_complicated)
386 bool skip_read = false;
388 if (0 < fstatus->failed)
389 fstatus->failed = fstat (fd, &fstatus->st);
391 /* For sized files, seek to one st_blksize before EOF rather than to EOF.
392 This works better for files in proc-like file systems where
393 the size is only approximate. */
394 if (! fstatus->failed && usable_st_size (&fstatus->st)
395 && 0 <= fstatus->st.st_size)
397 off_t end_pos = fstatus->st.st_size;
398 if (current_pos < 0)
399 current_pos = lseek (fd, 0, SEEK_CUR);
401 if (end_pos % page_size)
403 /* We only need special handling of /proc and /sys files etc.
404 when they're a multiple of PAGE_SIZE. In the common case
405 for files with st_size not a multiple of PAGE_SIZE,
406 it's more efficient and accurate to use st_size.
408 Be careful here. The current position may actually be
409 beyond the end of the file. As in the example above. */
411 bytes = end_pos < current_pos ? 0 : end_pos - current_pos;
412 if (bytes && 0 <= lseek (fd, bytes, SEEK_CUR))
413 skip_read = true;
414 else
415 bytes = 0;
417 else
419 off_t hi_pos = end_pos - end_pos % (ST_BLKSIZE (fstatus->st) + 1);
420 if (0 <= current_pos && current_pos < hi_pos
421 && 0 <= lseek (fd, hi_pos, SEEK_CUR))
422 bytes = hi_pos - current_pos;
426 if (! skip_read)
428 fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
429 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
431 if (bytes_read == SAFE_READ_ERROR)
433 error (0, errno, "%s", quotef (file));
434 ok = false;
435 break;
437 bytes += bytes_read;
441 else if (!count_chars && !count_complicated)
443 #ifdef USE_AVX2_WC_LINECOUNT
444 if (avx2_supported ())
445 wc_lines_p = wc_lines_avx2;
446 #endif
448 /* Use a separate loop when counting only lines or lines and bytes --
449 but not chars or words. */
450 ok = wc_lines_p (file, fd, &lines, &bytes);
452 #if MB_LEN_MAX > 1
453 # define SUPPORT_OLD_MBRTOWC 1
454 else if (MB_CUR_MAX > 1)
456 bool in_word = false;
457 uintmax_t linepos = 0;
458 mbstate_t state = { 0, };
459 bool in_shift = false;
460 # if SUPPORT_OLD_MBRTOWC
461 /* Back-up the state before each multibyte character conversion and
462 move the last incomplete character of the buffer to the front
463 of the buffer. This is needed because we don't know whether
464 the 'mbrtowc' function updates the state when it returns -2, --
465 this is the ISO C 99 and glibc-2.2 behaviour - or not - amended
466 ANSI C, glibc-2.1 and Solaris 5.7 behaviour. We don't have an
467 autoconf test for this, yet. */
468 size_t prev = 0; /* number of bytes carried over from previous round */
469 # else
470 const size_t prev = 0;
471 # endif
473 while ((bytes_read = safe_read (fd, buf + prev, BUFFER_SIZE - prev)) > 0)
475 char const *p;
476 # if SUPPORT_OLD_MBRTOWC
477 mbstate_t backup_state;
478 # endif
479 if (bytes_read == SAFE_READ_ERROR)
481 error (0, errno, "%s", quotef (file));
482 ok = false;
483 break;
486 bytes += bytes_read;
487 p = buf;
488 bytes_read += prev;
491 wchar_t wide_char;
492 size_t n;
493 bool wide = true;
495 if (!in_shift && is_basic (*p))
497 /* Handle most ASCII characters quickly, without calling
498 mbrtowc(). */
499 n = 1;
500 wide_char = *p;
501 wide = false;
503 else
505 in_shift = true;
506 # if SUPPORT_OLD_MBRTOWC
507 backup_state = state;
508 # endif
509 n = mbrtowc (&wide_char, p, bytes_read, &state);
510 if (n == (size_t) -2)
512 # if SUPPORT_OLD_MBRTOWC
513 state = backup_state;
514 # endif
515 break;
517 if (n == (size_t) -1)
519 /* Remember that we read a byte, but don't complain
520 about the error. Because of the decoding error,
521 this is a considered to be byte but not a
522 character (that is, chars is not incremented). */
523 p++;
524 bytes_read--;
525 continue;
527 if (mbsinit (&state))
528 in_shift = false;
529 if (n == 0)
531 wide_char = 0;
532 n = 1;
536 switch (wide_char)
538 case '\n':
539 lines++;
540 FALLTHROUGH;
541 case '\r':
542 case '\f':
543 if (linepos > linelength)
544 linelength = linepos;
545 linepos = 0;
546 goto mb_word_separator;
547 case '\t':
548 linepos += 8 - (linepos % 8);
549 goto mb_word_separator;
550 case ' ':
551 linepos++;
552 FALLTHROUGH;
553 case '\v':
554 mb_word_separator:
555 words += in_word;
556 in_word = false;
557 break;
558 default:
559 if (wide && iswprint (wide_char))
561 /* wcwidth can be expensive on OSX for example,
562 so avoid if uneeded. */
563 if (print_linelength)
565 int width = wcwidth (wide_char);
566 if (width > 0)
567 linepos += width;
569 if (iswspace (wide_char) || iswnbspace (wide_char))
570 goto mb_word_separator;
571 in_word = true;
573 else if (!wide && isprint (to_uchar (*p)))
575 linepos++;
576 if (isspace (to_uchar (*p)))
577 goto mb_word_separator;
578 in_word = true;
580 break;
583 p += n;
584 bytes_read -= n;
585 chars++;
587 while (bytes_read > 0);
589 # if SUPPORT_OLD_MBRTOWC
590 if (bytes_read > 0)
592 if (bytes_read == BUFFER_SIZE)
594 /* Encountered a very long redundant shift sequence. */
595 p++;
596 bytes_read--;
598 memmove (buf, p, bytes_read);
600 prev = bytes_read;
601 # endif
603 if (linepos > linelength)
604 linelength = linepos;
605 words += in_word;
607 #endif
608 else
610 bool in_word = false;
611 uintmax_t linepos = 0;
613 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
615 char const *p = buf;
616 if (bytes_read == SAFE_READ_ERROR)
618 error (0, errno, "%s", quotef (file));
619 ok = false;
620 break;
623 bytes += bytes_read;
626 switch (*p++)
628 case '\n':
629 lines++;
630 FALLTHROUGH;
631 case '\r':
632 case '\f':
633 if (linepos > linelength)
634 linelength = linepos;
635 linepos = 0;
636 goto word_separator;
637 case '\t':
638 linepos += 8 - (linepos % 8);
639 goto word_separator;
640 case ' ':
641 linepos++;
642 FALLTHROUGH;
643 case '\v':
644 word_separator:
645 words += in_word;
646 in_word = false;
647 break;
648 default:
649 if (isprint (to_uchar (p[-1])))
651 linepos++;
652 if (isspace (to_uchar (p[-1]))
653 || isnbspace (to_uchar (p[-1])))
654 goto word_separator;
655 in_word = true;
657 break;
660 while (--bytes_read);
662 if (linepos > linelength)
663 linelength = linepos;
664 words += in_word;
667 if (count_chars < print_chars)
668 chars = bytes;
670 if (total_mode != total_only)
671 write_counts (lines, words, chars, bytes, linelength, file_x);
673 if (INT_ADD_WRAPV (total_lines, lines, &total_lines))
674 total_lines_overflow = true;
675 if (INT_ADD_WRAPV (total_words, words, &total_words))
676 total_words_overflow = true;
677 if (INT_ADD_WRAPV (total_chars, chars, &total_chars))
678 total_chars_overflow = true;
679 if (INT_ADD_WRAPV (total_bytes, bytes, &total_bytes))
680 total_bytes_overflow = true;
682 if (linelength > max_line_length)
683 max_line_length = linelength;
685 return ok;
688 static bool
689 wc_file (char const *file, struct fstatus *fstatus)
691 if (! file || STREQ (file, "-"))
693 have_read_stdin = true;
694 xset_binary_mode (STDIN_FILENO, O_BINARY);
695 return wc (STDIN_FILENO, file, fstatus, -1);
697 else
699 int fd = open (file, O_RDONLY | O_BINARY);
700 if (fd == -1)
702 error (0, errno, "%s", quotef (file));
703 return false;
705 else
707 bool ok = wc (fd, file, fstatus, 0);
708 if (close (fd) != 0)
710 error (0, errno, "%s", quotef (file));
711 return false;
713 return ok;
718 /* Return the file status for the NFILES files addressed by FILE.
719 Optimize the case where only one number is printed, for just one
720 file; in that case we can use a print width of 1, so we don't need
721 to stat the file. Handle the case of (nfiles == 0) in the same way;
722 that happens when we don't know how long the list of file names will be. */
724 static struct fstatus *
725 get_input_fstatus (size_t nfiles, char *const *file)
727 struct fstatus *fstatus = xnmalloc (nfiles ? nfiles : 1, sizeof *fstatus);
729 if (nfiles == 0
730 || (nfiles == 1
731 && ((print_lines + print_words + print_chars
732 + print_bytes + print_linelength)
733 == 1)))
734 fstatus[0].failed = 1;
735 else
737 for (size_t i = 0; i < nfiles; i++)
738 fstatus[i].failed = (! file[i] || STREQ (file[i], "-")
739 ? fstat (STDIN_FILENO, &fstatus[i].st)
740 : stat (file[i], &fstatus[i].st));
743 return fstatus;
746 /* Return a print width suitable for the NFILES files whose status is
747 recorded in FSTATUS. Optimize the same special case that
748 get_input_fstatus optimizes. */
750 ATTRIBUTE_PURE
751 static int
752 compute_number_width (size_t nfiles, struct fstatus const *fstatus)
754 int width = 1;
756 if (0 < nfiles && fstatus[0].failed <= 0)
758 int minimum_width = 1;
759 uintmax_t regular_total = 0;
761 for (size_t i = 0; i < nfiles; i++)
762 if (! fstatus[i].failed)
764 if (S_ISREG (fstatus[i].st.st_mode))
765 regular_total += fstatus[i].st.st_size;
766 else
767 minimum_width = 7;
770 for (; 10 <= regular_total; regular_total /= 10)
771 width++;
772 if (width < minimum_width)
773 width = minimum_width;
776 return width;
781 main (int argc, char **argv)
783 bool ok;
784 int optc;
785 size_t nfiles;
786 char **files;
787 char *files_from = NULL;
788 struct fstatus *fstatus;
789 struct Tokens tok;
791 initialize_main (&argc, &argv);
792 set_program_name (argv[0]);
793 setlocale (LC_ALL, "");
794 bindtextdomain (PACKAGE, LOCALEDIR);
795 textdomain (PACKAGE);
797 atexit (close_stdout);
799 page_size = getpagesize ();
800 /* Line buffer stdout to ensure lines are written atomically and immediately
801 so that processes running in parallel do not intersperse their output. */
802 setvbuf (stdout, NULL, _IOLBF, 0);
804 posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
806 print_lines = print_words = print_chars = print_bytes = false;
807 print_linelength = false;
808 total_lines = total_words = total_chars = total_bytes = max_line_length = 0;
810 while ((optc = getopt_long (argc, argv, "clLmw", longopts, NULL)) != -1)
811 switch (optc)
813 case 'c':
814 print_bytes = true;
815 break;
817 case 'm':
818 print_chars = true;
819 break;
821 case 'l':
822 print_lines = true;
823 break;
825 case 'w':
826 print_words = true;
827 break;
829 case 'L':
830 print_linelength = true;
831 break;
833 case DEBUG_PROGRAM_OPTION:
834 debug = true;
835 break;
837 case FILES0_FROM_OPTION:
838 files_from = optarg;
839 break;
841 case TOTAL_OPTION:
842 total_mode = XARGMATCH ("--total", optarg, total_args, total_types);
843 break;
845 case_GETOPT_HELP_CHAR;
847 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
849 default:
850 usage (EXIT_FAILURE);
853 if (! (print_lines || print_words || print_chars || print_bytes
854 || print_linelength))
855 print_lines = print_words = print_bytes = true;
857 bool read_tokens = false;
858 struct argv_iterator *ai;
859 if (files_from)
861 FILE *stream;
863 /* When using --files0-from=F, you may not specify any files
864 on the command-line. */
865 if (optind < argc)
867 error (0, 0, _("extra operand %s"), quoteaf (argv[optind]));
868 fprintf (stderr, "%s\n",
869 _("file operands cannot be combined with --files0-from"));
870 usage (EXIT_FAILURE);
873 if (STREQ (files_from, "-"))
874 stream = stdin;
875 else
877 stream = fopen (files_from, "r");
878 if (stream == NULL)
879 die (EXIT_FAILURE, errno, _("cannot open %s for reading"),
880 quoteaf (files_from));
883 /* Read the file list into RAM if we can detect its size and that
884 size is reasonable. Otherwise, we'll read a name at a time. */
885 struct stat st;
886 if (fstat (fileno (stream), &st) == 0
887 && S_ISREG (st.st_mode)
888 && st.st_size <= MIN (10 * 1024 * 1024, physmem_available () / 2))
890 read_tokens = true;
891 readtokens0_init (&tok);
892 if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
893 die (EXIT_FAILURE, 0, _("cannot read file names from %s"),
894 quoteaf (files_from));
895 files = tok.tok;
896 nfiles = tok.n_tok;
897 ai = argv_iter_init_argv (files);
899 else
901 files = NULL;
902 nfiles = 0;
903 ai = argv_iter_init_stream (stream);
906 else
908 static char *stdin_only[] = { NULL };
909 files = (optind < argc ? argv + optind : stdin_only);
910 nfiles = (optind < argc ? argc - optind : 1);
911 ai = argv_iter_init_argv (files);
914 if (!ai)
915 xalloc_die ();
917 fstatus = get_input_fstatus (nfiles, files);
918 if (total_mode == total_only)
919 number_width = 1; /* No extra padding, since no alignment requirement. */
920 else
921 number_width = compute_number_width (nfiles, fstatus);
923 ok = true;
924 for (int i = 0; /* */; i++)
926 bool skip_file = false;
927 enum argv_iter_err ai_err;
928 char *file_name = argv_iter (ai, &ai_err);
929 if (!file_name)
931 switch (ai_err)
933 case AI_ERR_EOF:
934 goto argv_iter_done;
935 case AI_ERR_READ:
936 error (0, errno, _("%s: read error"),
937 quotef (files_from));
938 ok = false;
939 goto argv_iter_done;
940 case AI_ERR_MEM:
941 xalloc_die ();
942 default:
943 assert (!"unexpected error code from argv_iter");
946 if (files_from && STREQ (files_from, "-") && STREQ (file_name, "-"))
948 /* Give a better diagnostic in an unusual case:
949 printf - | wc --files0-from=- */
950 error (0, 0, _("when reading file names from stdin, "
951 "no file name of %s allowed"),
952 quoteaf (file_name));
953 skip_file = true;
956 if (!file_name[0])
958 /* Diagnose a zero-length file name. When it's one
959 among many, knowing the record number may help.
960 FIXME: currently print the record number only with
961 --files0-from=FILE. Maybe do it for argv, too? */
962 if (files_from == NULL)
963 error (0, 0, "%s", _("invalid zero-length file name"));
964 else
966 /* Using the standard 'filename:line-number:' prefix here is
967 not totally appropriate, since NUL is the separator, not NL,
968 but it might be better than nothing. */
969 unsigned long int file_number = argv_iter_n_args (ai);
970 error (0, 0, "%s:%lu: %s", quotef (files_from),
971 file_number, _("invalid zero-length file name"));
973 skip_file = true;
976 if (skip_file)
977 ok = false;
978 else
979 ok &= wc_file (file_name, &fstatus[nfiles ? i : 0]);
981 if (! nfiles)
982 fstatus[0].failed = 1;
984 argv_iter_done:
986 /* No arguments on the command line is fine. That means read from stdin.
987 However, no arguments on the --files0-from input stream is an error
988 means don't read anything. */
989 if (ok && !files_from && argv_iter_n_args (ai) == 0)
990 ok &= wc_file (NULL, &fstatus[0]);
992 if (read_tokens)
993 readtokens0_free (&tok);
995 if (total_mode != total_never
996 && (total_mode != total_auto || 1 < argv_iter_n_args (ai)))
998 if (total_lines_overflow)
1000 total_lines = UINTMAX_MAX;
1001 error (0, EOVERFLOW, _("total lines"));
1002 ok = false;
1004 if (total_words_overflow)
1006 total_words = UINTMAX_MAX;
1007 error (0, EOVERFLOW, _("total words"));
1008 ok = false;
1010 if (total_chars_overflow)
1012 total_chars = UINTMAX_MAX;
1013 error (0, EOVERFLOW, _("total characters"));
1014 ok = false;
1016 if (total_bytes_overflow)
1018 total_bytes = UINTMAX_MAX;
1019 error (0, EOVERFLOW, _("total bytes"));
1020 ok = false;
1023 write_counts (total_lines, total_words, total_chars, total_bytes,
1024 max_line_length,
1025 total_mode != total_only ? _("total") : NULL);
1028 argv_iter_free (ai);
1030 free (fstatus);
1032 if (have_read_stdin && close (STDIN_FILENO) != 0)
1033 die (EXIT_FAILURE, errno, "-");
1035 return ok ? EXIT_SUCCESS : EXIT_FAILURE;