maint: prefer C23-style nullptr
[coreutils.git] / src / wc.c
blob9f345aa7275a2f3980414b3c2e829235f26b315f
1 /* wc - print the number of lines, words, and bytes in files
2 Copyright (C) 1985-2023 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Paul Rubin, phr@ocf.berkeley.edu
18 and David MacKenzie, djm@gnu.ai.mit.edu. */
20 #include <config.h>
22 #include <stdio.h>
23 #include <assert.h>
24 #include <getopt.h>
25 #include <sys/types.h>
26 #include <wchar.h>
27 #include <wctype.h>
29 #include "system.h"
30 #include "argmatch.h"
31 #include "argv-iter.h"
32 #include "die.h"
33 #include "error.h"
34 #include "fadvise.h"
35 #include "mbchar.h"
36 #include "physmem.h"
37 #include "readtokens0.h"
38 #include "safe-read.h"
39 #include "stat-size.h"
40 #include "xbinary-io.h"
42 #if !defined iswspace && !HAVE_ISWSPACE
43 # define iswspace(wc) \
44 ((wc) == to_uchar (wc) && isspace (to_uchar (wc)))
45 #endif
47 /* The official name of this program (e.g., no 'g' prefix). */
48 #define PROGRAM_NAME "wc"
50 #define AUTHORS \
51 proper_name ("Paul Rubin"), \
52 proper_name ("David MacKenzie")
54 /* Size of atomic reads. */
55 #define BUFFER_SIZE (16 * 1024)
57 #ifdef USE_AVX2_WC_LINECOUNT
58 /* From wc_avx2.c */
59 extern bool
60 wc_lines_avx2 (char const *file, int fd, uintmax_t *lines_out,
61 uintmax_t *bytes_out);
62 #endif
64 static bool debug;
66 /* Cumulative number of lines, words, chars and bytes in all files so far.
67 max_line_length is the maximum over all files processed so far. */
68 static uintmax_t total_lines;
69 static uintmax_t total_words;
70 static uintmax_t total_chars;
71 static uintmax_t total_bytes;
72 static uintmax_t total_lines_overflow;
73 static uintmax_t total_words_overflow;
74 static uintmax_t total_chars_overflow;
75 static uintmax_t total_bytes_overflow;
76 static uintmax_t max_line_length;
78 /* Which counts to print. */
79 static bool print_lines, print_words, print_chars, print_bytes;
80 static bool print_linelength;
82 /* The print width of each count. */
83 static int number_width;
85 /* True if we have ever read the standard input. */
86 static bool have_read_stdin;
88 /* Used to determine if file size can be determined without reading. */
89 static size_t page_size;
91 /* Enable to _not_ treat non breaking space as a word separator. */
92 static bool posixly_correct;
94 /* The result of calling fstat or stat on a file descriptor or file. */
95 struct fstatus
97 /* If positive, fstat or stat has not been called yet. Otherwise,
98 this is the value returned from fstat or stat. */
99 int failed;
101 /* If FAILED is zero, this is the file's status. */
102 struct stat st;
105 /* For long options that have no equivalent short option, use a
106 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
107 enum
109 DEBUG_PROGRAM_OPTION = CHAR_MAX + 1,
110 FILES0_FROM_OPTION,
111 TOTAL_OPTION,
114 static struct option const longopts[] =
116 {"bytes", no_argument, nullptr, 'c'},
117 {"chars", no_argument, nullptr, 'm'},
118 {"lines", no_argument, nullptr, 'l'},
119 {"words", no_argument, nullptr, 'w'},
120 {"debug", no_argument, nullptr, DEBUG_PROGRAM_OPTION},
121 {"files0-from", required_argument, nullptr, FILES0_FROM_OPTION},
122 {"max-line-length", no_argument, nullptr, 'L'},
123 {"total", required_argument, nullptr, TOTAL_OPTION},
124 {GETOPT_HELP_OPTION_DECL},
125 {GETOPT_VERSION_OPTION_DECL},
126 {nullptr, 0, nullptr, 0}
129 enum total_type
131 total_auto, /* 0: default or --total=auto */
132 total_always, /* 1: --total=always */
133 total_only, /* 2: --total=only */
134 total_never /* 3: --total=never */
136 static char const *const total_args[] =
138 "auto", "always", "only", "never", nullptr
140 static enum total_type const total_types[] =
142 total_auto, total_always, total_only, total_never
144 ARGMATCH_VERIFY (total_args, total_types);
145 static enum total_type total_mode = total_auto;
147 #ifdef USE_AVX2_WC_LINECOUNT
148 static bool
149 avx2_supported (void)
151 bool avx_enabled = 0 < __builtin_cpu_supports ("avx2");
153 if (debug)
154 error (0, 0, (avx_enabled
155 ? _("using avx2 hardware support")
156 : _("avx2 support not detected")));
158 return avx_enabled;
160 #endif
162 void
163 usage (int status)
165 if (status != EXIT_SUCCESS)
166 emit_try_help ();
167 else
169 printf (_("\
170 Usage: %s [OPTION]... [FILE]...\n\
171 or: %s [OPTION]... --files0-from=F\n\
173 program_name, program_name);
174 fputs (_("\
175 Print newline, word, and byte counts for each FILE, and a total line if\n\
176 more than one FILE is specified. A word is a non-zero-length sequence of\n\
177 printable characters delimited by white space.\n\
178 "), stdout);
180 emit_stdin_note ();
182 fputs (_("\
184 The options below may be used to select which counts are printed, always in\n\
185 the following order: newline, word, character, byte, maximum line length.\n\
186 -c, --bytes print the byte counts\n\
187 -m, --chars print the character counts\n\
188 -l, --lines print the newline counts\n\
189 "), stdout);
190 fputs (_("\
191 --files0-from=F read input from the files specified by\n\
192 NUL-terminated names in file F;\n\
193 If F is - then read names from standard input\n\
194 -L, --max-line-length print the maximum display width\n\
195 -w, --words print the word counts\n\
196 "), stdout);
197 fputs (_("\
198 --total=WHEN when to print a line with total counts;\n\
199 WHEN can be: auto, always, only, never\n\
200 "), stdout);
201 fputs (HELP_OPTION_DESCRIPTION, stdout);
202 fputs (VERSION_OPTION_DESCRIPTION, stdout);
203 emit_ancillary_info (PROGRAM_NAME);
205 exit (status);
208 /* Return non zero if a non breaking space. */
209 ATTRIBUTE_PURE
210 static int
211 iswnbspace (wint_t wc)
213 return ! posixly_correct
214 && (wc == 0x00A0 || wc == 0x2007
215 || wc == 0x202F || wc == 0x2060);
218 static int
219 isnbspace (int c)
221 return iswnbspace (btowc (c));
224 /* FILE is the name of the file (or null for standard input)
225 associated with the specified counters. */
226 static void
227 write_counts (uintmax_t lines,
228 uintmax_t words,
229 uintmax_t chars,
230 uintmax_t bytes,
231 uintmax_t linelength,
232 char const *file)
234 static char const format_sp_int[] = " %*s";
235 char const *format_int = format_sp_int + 1;
236 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
238 if (print_lines)
240 printf (format_int, number_width, umaxtostr (lines, buf));
241 format_int = format_sp_int;
243 if (print_words)
245 printf (format_int, number_width, umaxtostr (words, buf));
246 format_int = format_sp_int;
248 if (print_chars)
250 printf (format_int, number_width, umaxtostr (chars, buf));
251 format_int = format_sp_int;
253 if (print_bytes)
255 printf (format_int, number_width, umaxtostr (bytes, buf));
256 format_int = format_sp_int;
258 if (print_linelength)
260 printf (format_int, number_width, umaxtostr (linelength, buf));
262 if (file)
263 printf (" %s", strchr (file, '\n') ? quotef (file) : file);
264 putchar ('\n');
267 static bool
268 wc_lines (char const *file, int fd, uintmax_t *lines_out, uintmax_t *bytes_out)
270 size_t bytes_read;
271 uintmax_t lines, bytes;
272 char buf[BUFFER_SIZE + 1];
273 bool long_lines = false;
275 if (!lines_out || !bytes_out)
277 return false;
280 lines = bytes = 0;
282 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
285 if (bytes_read == SAFE_READ_ERROR)
287 error (0, errno, "%s", quotef (file));
288 return false;
291 bytes += bytes_read;
293 char *p = buf;
294 char *end = buf + bytes_read;
295 uintmax_t plines = lines;
297 if (! long_lines)
299 /* Avoid function call overhead for shorter lines. */
300 while (p != end)
301 lines += *p++ == '\n';
303 else
305 /* rawmemchr is more efficient with longer lines. */
306 *end = '\n';
307 while ((p = rawmemchr (p, '\n')) < end)
309 ++p;
310 ++lines;
314 /* If the average line length in the block is >= 15, then use
315 memchr for the next block, where system specific optimizations
316 may outweigh function call overhead.
317 FIXME: This line length was determined in 2015, on both
318 x86_64 and ppc64, but it's worth re-evaluating in future with
319 newer compilers, CPUs, or memchr() implementations etc. */
320 if (lines - plines <= bytes_read / 15)
321 long_lines = true;
322 else
323 long_lines = false;
326 *bytes_out = bytes;
327 *lines_out = lines;
329 return true;
332 /* Count words. FILE_X is the name of the file (or null for standard
333 input) that is open on descriptor FD. *FSTATUS is its status.
334 CURRENT_POS is the current file offset if known, negative if unknown.
335 Return true if successful. */
336 static bool
337 wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
339 bool ok = true;
340 char buf[BUFFER_SIZE + 1];
341 size_t bytes_read;
342 uintmax_t lines, words, chars, bytes, linelength;
343 bool count_bytes, count_chars, count_complicated;
344 char const *file = file_x ? file_x : _("standard input");
346 lines = words = chars = bytes = linelength = 0;
348 /* If in the current locale, chars are equivalent to bytes, we prefer
349 counting bytes, because that's easier. */
350 #if MB_LEN_MAX > 1
351 if (MB_CUR_MAX > 1)
353 count_bytes = print_bytes;
354 count_chars = print_chars;
356 else
357 #endif
359 count_bytes = print_bytes || print_chars;
360 count_chars = false;
362 count_complicated = print_words || print_linelength;
364 /* Advise the kernel of our access pattern only if we will read(). */
365 if (!count_bytes || count_chars || print_lines || count_complicated)
366 fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
368 /* When counting only bytes, save some line- and word-counting
369 overhead. If FD is a 'regular' Unix file, using lseek is enough
370 to get its 'size' in bytes. Otherwise, read blocks of BUFFER_SIZE
371 bytes at a time until EOF. Note that the 'size' (number of bytes)
372 that wc reports is smaller than stats.st_size when the file is not
373 positioned at its beginning. That's why the lseek calls below are
374 necessary. For example the command
375 '(dd ibs=99k skip=1 count=0; ./wc -c) < /etc/group'
376 should make wc report '0' bytes. */
378 if (count_bytes && !count_chars && !print_lines && !count_complicated)
380 bool skip_read = false;
382 if (0 < fstatus->failed)
383 fstatus->failed = fstat (fd, &fstatus->st);
385 /* For sized files, seek to one st_blksize before EOF rather than to EOF.
386 This works better for files in proc-like file systems where
387 the size is only approximate. */
388 if (! fstatus->failed && usable_st_size (&fstatus->st)
389 && 0 <= fstatus->st.st_size)
391 off_t end_pos = fstatus->st.st_size;
392 if (current_pos < 0)
393 current_pos = lseek (fd, 0, SEEK_CUR);
395 if (end_pos % page_size)
397 /* We only need special handling of /proc and /sys files etc.
398 when they're a multiple of PAGE_SIZE. In the common case
399 for files with st_size not a multiple of PAGE_SIZE,
400 it's more efficient and accurate to use st_size.
402 Be careful here. The current position may actually be
403 beyond the end of the file. As in the example above. */
405 bytes = end_pos < current_pos ? 0 : end_pos - current_pos;
406 if (bytes && 0 <= lseek (fd, bytes, SEEK_CUR))
407 skip_read = true;
408 else
409 bytes = 0;
411 else
413 off_t hi_pos = end_pos - end_pos % (ST_BLKSIZE (fstatus->st) + 1);
414 if (0 <= current_pos && current_pos < hi_pos
415 && 0 <= lseek (fd, hi_pos, SEEK_CUR))
416 bytes = hi_pos - current_pos;
420 if (! skip_read)
422 fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
423 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
425 if (bytes_read == SAFE_READ_ERROR)
427 error (0, errno, "%s", quotef (file));
428 ok = false;
429 break;
431 bytes += bytes_read;
435 else if (!count_chars && !count_complicated)
437 #ifdef USE_AVX2_WC_LINECOUNT
438 static bool (*wc_lines_p) (char const *, int, uintmax_t *, uintmax_t *);
439 if (!wc_lines_p)
440 wc_lines_p = avx2_supported () ? wc_lines_avx2 : wc_lines;
441 #else
442 bool (*wc_lines_p) (char const *, int, uintmax_t *, uintmax_t *)
443 = wc_lines;
444 #endif
446 /* Use a separate loop when counting only lines or lines and bytes --
447 but not chars or words. */
448 ok = wc_lines_p (file, fd, &lines, &bytes);
450 #if MB_LEN_MAX > 1
451 # define SUPPORT_OLD_MBRTOWC 1
452 else if (MB_CUR_MAX > 1)
454 bool in_word = false;
455 uintmax_t linepos = 0;
456 mbstate_t state = { 0, };
457 bool in_shift = false;
458 # if SUPPORT_OLD_MBRTOWC
459 /* Back-up the state before each multibyte character conversion and
460 move the last incomplete character of the buffer to the front
461 of the buffer. This is needed because we don't know whether
462 the 'mbrtowc' function updates the state when it returns -2, --
463 this is the ISO C 99 and glibc-2.2 behaviour - or not - amended
464 ANSI C, glibc-2.1 and Solaris 5.7 behaviour. We don't have an
465 autoconf test for this, yet. */
466 size_t prev = 0; /* number of bytes carried over from previous round */
467 # else
468 const size_t prev = 0;
469 # endif
471 while ((bytes_read = safe_read (fd, buf + prev, BUFFER_SIZE - prev)) > 0)
473 char const *p;
474 # if SUPPORT_OLD_MBRTOWC
475 mbstate_t backup_state;
476 # endif
477 if (bytes_read == SAFE_READ_ERROR)
479 error (0, errno, "%s", quotef (file));
480 ok = false;
481 break;
484 bytes += bytes_read;
485 p = buf;
486 bytes_read += prev;
489 wchar_t wide_char;
490 size_t n;
491 bool wide = true;
493 if (!in_shift && is_basic (*p))
495 /* Handle most ASCII characters quickly, without calling
496 mbrtowc(). */
497 n = 1;
498 wide_char = *p;
499 wide = false;
501 else
503 in_shift = true;
504 # if SUPPORT_OLD_MBRTOWC
505 backup_state = state;
506 # endif
507 n = mbrtowc (&wide_char, p, bytes_read, &state);
508 if (n == (size_t) -2)
510 # if SUPPORT_OLD_MBRTOWC
511 state = backup_state;
512 # endif
513 break;
515 if (n == (size_t) -1)
517 /* Remember that we read a byte, but don't complain
518 about the error. Because of the decoding error,
519 this is a considered to be byte but not a
520 character (that is, chars is not incremented). */
521 p++;
522 bytes_read--;
523 continue;
525 if (mbsinit (&state))
526 in_shift = false;
527 if (n == 0)
529 wide_char = 0;
530 n = 1;
534 switch (wide_char)
536 case '\n':
537 lines++;
538 FALLTHROUGH;
539 case '\r':
540 case '\f':
541 if (linepos > linelength)
542 linelength = linepos;
543 linepos = 0;
544 goto mb_word_separator;
545 case '\t':
546 linepos += 8 - (linepos % 8);
547 goto mb_word_separator;
548 case ' ':
549 linepos++;
550 FALLTHROUGH;
551 case '\v':
552 mb_word_separator:
553 words += in_word;
554 in_word = false;
555 break;
556 default:
557 if (wide && iswprint (wide_char))
559 /* wcwidth can be expensive on OSX for example,
560 so avoid if uneeded. */
561 if (print_linelength)
563 int width = wcwidth (wide_char);
564 if (width > 0)
565 linepos += width;
567 if (iswspace (wide_char) || iswnbspace (wide_char))
568 goto mb_word_separator;
569 in_word = true;
571 else if (!wide && isprint (to_uchar (*p)))
573 linepos++;
574 if (isspace (to_uchar (*p)))
575 goto mb_word_separator;
576 in_word = true;
578 break;
581 p += n;
582 bytes_read -= n;
583 chars++;
585 while (bytes_read > 0);
587 # if SUPPORT_OLD_MBRTOWC
588 if (bytes_read > 0)
590 if (bytes_read == BUFFER_SIZE)
592 /* Encountered a very long redundant shift sequence. */
593 p++;
594 bytes_read--;
596 memmove (buf, p, bytes_read);
598 prev = bytes_read;
599 # endif
601 if (linepos > linelength)
602 linelength = linepos;
603 words += in_word;
605 #endif
606 else
608 bool in_word = false;
609 uintmax_t linepos = 0;
611 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
613 char const *p = buf;
614 if (bytes_read == SAFE_READ_ERROR)
616 error (0, errno, "%s", quotef (file));
617 ok = false;
618 break;
621 bytes += bytes_read;
624 switch (*p++)
626 case '\n':
627 lines++;
628 FALLTHROUGH;
629 case '\r':
630 case '\f':
631 if (linepos > linelength)
632 linelength = linepos;
633 linepos = 0;
634 goto word_separator;
635 case '\t':
636 linepos += 8 - (linepos % 8);
637 goto word_separator;
638 case ' ':
639 linepos++;
640 FALLTHROUGH;
641 case '\v':
642 word_separator:
643 words += in_word;
644 in_word = false;
645 break;
646 default:
647 if (isprint (to_uchar (p[-1])))
649 linepos++;
650 if (isspace (to_uchar (p[-1]))
651 || isnbspace (to_uchar (p[-1])))
652 goto word_separator;
653 in_word = true;
655 break;
658 while (--bytes_read);
660 if (linepos > linelength)
661 linelength = linepos;
662 words += in_word;
665 if (count_chars < print_chars)
666 chars = bytes;
668 if (total_mode != total_only)
669 write_counts (lines, words, chars, bytes, linelength, file_x);
671 if (INT_ADD_WRAPV (total_lines, lines, &total_lines))
672 total_lines_overflow = true;
673 if (INT_ADD_WRAPV (total_words, words, &total_words))
674 total_words_overflow = true;
675 if (INT_ADD_WRAPV (total_chars, chars, &total_chars))
676 total_chars_overflow = true;
677 if (INT_ADD_WRAPV (total_bytes, bytes, &total_bytes))
678 total_bytes_overflow = true;
680 if (linelength > max_line_length)
681 max_line_length = linelength;
683 return ok;
686 static bool
687 wc_file (char const *file, struct fstatus *fstatus)
689 if (! file || STREQ (file, "-"))
691 have_read_stdin = true;
692 xset_binary_mode (STDIN_FILENO, O_BINARY);
693 return wc (STDIN_FILENO, file, fstatus, -1);
695 else
697 int fd = open (file, O_RDONLY | O_BINARY);
698 if (fd == -1)
700 error (0, errno, "%s", quotef (file));
701 return false;
703 else
705 bool ok = wc (fd, file, fstatus, 0);
706 if (close (fd) != 0)
708 error (0, errno, "%s", quotef (file));
709 return false;
711 return ok;
716 /* Return the file status for the NFILES files addressed by FILE.
717 Optimize the case where only one number is printed, for just one
718 file; in that case we can use a print width of 1, so we don't need
719 to stat the file. Handle the case of (nfiles == 0) in the same way;
720 that happens when we don't know how long the list of file names will be. */
722 static struct fstatus *
723 get_input_fstatus (size_t nfiles, char *const *file)
725 struct fstatus *fstatus = xnmalloc (nfiles ? nfiles : 1, sizeof *fstatus);
727 if (nfiles == 0
728 || (nfiles == 1
729 && ((print_lines + print_words + print_chars
730 + print_bytes + print_linelength)
731 == 1)))
732 fstatus[0].failed = 1;
733 else
735 for (size_t i = 0; i < nfiles; i++)
736 fstatus[i].failed = (! file[i] || STREQ (file[i], "-")
737 ? fstat (STDIN_FILENO, &fstatus[i].st)
738 : stat (file[i], &fstatus[i].st));
741 return fstatus;
744 /* Return a print width suitable for the NFILES files whose status is
745 recorded in FSTATUS. Optimize the same special case that
746 get_input_fstatus optimizes. */
748 ATTRIBUTE_PURE
749 static int
750 compute_number_width (size_t nfiles, struct fstatus const *fstatus)
752 int width = 1;
754 if (0 < nfiles && fstatus[0].failed <= 0)
756 int minimum_width = 1;
757 uintmax_t regular_total = 0;
759 for (size_t i = 0; i < nfiles; i++)
760 if (! fstatus[i].failed)
762 if (S_ISREG (fstatus[i].st.st_mode))
763 regular_total += fstatus[i].st.st_size;
764 else
765 minimum_width = 7;
768 for (; 10 <= regular_total; regular_total /= 10)
769 width++;
770 if (width < minimum_width)
771 width = minimum_width;
774 return width;
779 main (int argc, char **argv)
781 bool ok;
782 int optc;
783 size_t nfiles;
784 char **files;
785 char *files_from = nullptr;
786 struct fstatus *fstatus;
787 struct Tokens tok;
789 initialize_main (&argc, &argv);
790 set_program_name (argv[0]);
791 setlocale (LC_ALL, "");
792 bindtextdomain (PACKAGE, LOCALEDIR);
793 textdomain (PACKAGE);
795 atexit (close_stdout);
797 page_size = getpagesize ();
798 /* Line buffer stdout to ensure lines are written atomically and immediately
799 so that processes running in parallel do not intersperse their output. */
800 setvbuf (stdout, nullptr, _IOLBF, 0);
802 posixly_correct = (getenv ("POSIXLY_CORRECT") != nullptr);
804 print_lines = print_words = print_chars = print_bytes = false;
805 print_linelength = false;
806 total_lines = total_words = total_chars = total_bytes = max_line_length = 0;
808 while ((optc = getopt_long (argc, argv, "clLmw", longopts, nullptr)) != -1)
809 switch (optc)
811 case 'c':
812 print_bytes = true;
813 break;
815 case 'm':
816 print_chars = true;
817 break;
819 case 'l':
820 print_lines = true;
821 break;
823 case 'w':
824 print_words = true;
825 break;
827 case 'L':
828 print_linelength = true;
829 break;
831 case DEBUG_PROGRAM_OPTION:
832 debug = true;
833 break;
835 case FILES0_FROM_OPTION:
836 files_from = optarg;
837 break;
839 case TOTAL_OPTION:
840 total_mode = XARGMATCH ("--total", optarg, total_args, total_types);
841 break;
843 case_GETOPT_HELP_CHAR;
845 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
847 default:
848 usage (EXIT_FAILURE);
851 if (! (print_lines || print_words || print_chars || print_bytes
852 || print_linelength))
853 print_lines = print_words = print_bytes = true;
855 bool read_tokens = false;
856 struct argv_iterator *ai;
857 if (files_from)
859 FILE *stream;
861 /* When using --files0-from=F, you may not specify any files
862 on the command-line. */
863 if (optind < argc)
865 error (0, 0, _("extra operand %s"), quoteaf (argv[optind]));
866 fprintf (stderr, "%s\n",
867 _("file operands cannot be combined with --files0-from"));
868 usage (EXIT_FAILURE);
871 if (STREQ (files_from, "-"))
872 stream = stdin;
873 else
875 stream = fopen (files_from, "r");
876 if (stream == nullptr)
877 die (EXIT_FAILURE, errno, _("cannot open %s for reading"),
878 quoteaf (files_from));
881 /* Read the file list into RAM if we can detect its size and that
882 size is reasonable. Otherwise, we'll read a name at a time. */
883 struct stat st;
884 if (fstat (fileno (stream), &st) == 0
885 && S_ISREG (st.st_mode)
886 && st.st_size <= MIN (10 * 1024 * 1024, physmem_available () / 2))
888 read_tokens = true;
889 readtokens0_init (&tok);
890 if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
891 die (EXIT_FAILURE, 0, _("cannot read file names from %s"),
892 quoteaf (files_from));
893 files = tok.tok;
894 nfiles = tok.n_tok;
895 ai = argv_iter_init_argv (files);
897 else
899 files = nullptr;
900 nfiles = 0;
901 ai = argv_iter_init_stream (stream);
904 else
906 static char *stdin_only[] = { nullptr };
907 files = (optind < argc ? argv + optind : stdin_only);
908 nfiles = (optind < argc ? argc - optind : 1);
909 ai = argv_iter_init_argv (files);
912 if (!ai)
913 xalloc_die ();
915 fstatus = get_input_fstatus (nfiles, files);
916 if (total_mode == total_only)
917 number_width = 1; /* No extra padding, since no alignment requirement. */
918 else
919 number_width = compute_number_width (nfiles, fstatus);
921 ok = true;
922 for (int i = 0; /* */; i++)
924 bool skip_file = false;
925 enum argv_iter_err ai_err;
926 char *file_name = argv_iter (ai, &ai_err);
927 if (!file_name)
929 switch (ai_err)
931 case AI_ERR_EOF:
932 goto argv_iter_done;
933 case AI_ERR_READ:
934 error (0, errno, _("%s: read error"),
935 quotef (files_from));
936 ok = false;
937 goto argv_iter_done;
938 case AI_ERR_MEM:
939 xalloc_die ();
940 default:
941 assert (!"unexpected error code from argv_iter");
944 if (files_from && STREQ (files_from, "-") && STREQ (file_name, "-"))
946 /* Give a better diagnostic in an unusual case:
947 printf - | wc --files0-from=- */
948 error (0, 0, _("when reading file names from stdin, "
949 "no file name of %s allowed"),
950 quoteaf (file_name));
951 skip_file = true;
954 if (!file_name[0])
956 /* Diagnose a zero-length file name. When it's one
957 among many, knowing the record number may help.
958 FIXME: currently print the record number only with
959 --files0-from=FILE. Maybe do it for argv, too? */
960 if (files_from == nullptr)
961 error (0, 0, "%s", _("invalid zero-length file name"));
962 else
964 /* Using the standard 'filename:line-number:' prefix here is
965 not totally appropriate, since NUL is the separator, not NL,
966 but it might be better than nothing. */
967 unsigned long int file_number = argv_iter_n_args (ai);
968 error (0, 0, "%s:%lu: %s", quotef (files_from),
969 file_number, _("invalid zero-length file name"));
971 skip_file = true;
974 if (skip_file)
975 ok = false;
976 else
977 ok &= wc_file (file_name, &fstatus[nfiles ? i : 0]);
979 if (! nfiles)
980 fstatus[0].failed = 1;
982 argv_iter_done:
984 /* No arguments on the command line is fine. That means read from stdin.
985 However, no arguments on the --files0-from input stream is an error
986 means don't read anything. */
987 if (ok && !files_from && argv_iter_n_args (ai) == 0)
988 ok &= wc_file (nullptr, &fstatus[0]);
990 if (read_tokens)
991 readtokens0_free (&tok);
993 if (total_mode != total_never
994 && (total_mode != total_auto || 1 < argv_iter_n_args (ai)))
996 if (total_lines_overflow)
998 total_lines = UINTMAX_MAX;
999 error (0, EOVERFLOW, _("total lines"));
1000 ok = false;
1002 if (total_words_overflow)
1004 total_words = UINTMAX_MAX;
1005 error (0, EOVERFLOW, _("total words"));
1006 ok = false;
1008 if (total_chars_overflow)
1010 total_chars = UINTMAX_MAX;
1011 error (0, EOVERFLOW, _("total characters"));
1012 ok = false;
1014 if (total_bytes_overflow)
1016 total_bytes = UINTMAX_MAX;
1017 error (0, EOVERFLOW, _("total bytes"));
1018 ok = false;
1021 write_counts (total_lines, total_words, total_chars, total_bytes,
1022 max_line_length,
1023 total_mode != total_only ? _("total") : nullptr);
1026 argv_iter_free (ai);
1028 free (fstatus);
1030 if (have_read_stdin && close (STDIN_FILENO) != 0)
1031 die (EXIT_FAILURE, errno, "-");
1033 return ok ? EXIT_SUCCESS : EXIT_FAILURE;