split: port ‘split -n N /dev/null’ better to macOS
[coreutils.git] / src / wc.c
blob5f3ef6eee55c53e780cb4f84d400faf1a4483c07
1 /* wc - print the number of lines, words, and bytes in files
2 Copyright (C) 1985-2023 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Paul Rubin, phr@ocf.berkeley.edu
18 and David MacKenzie, djm@gnu.ai.mit.edu. */
20 #include <config.h>
22 #include <stdio.h>
23 #include <assert.h>
24 #include <getopt.h>
25 #include <sys/types.h>
26 #include <wchar.h>
27 #include <wctype.h>
29 #include "system.h"
30 #include "argmatch.h"
31 #include "argv-iter.h"
32 #include "die.h"
33 #include "error.h"
34 #include "fadvise.h"
35 #include "mbchar.h"
36 #include "physmem.h"
37 #include "readtokens0.h"
38 #include "safe-read.h"
39 #include "stat-size.h"
40 #include "xbinary-io.h"
41 #ifdef USE_AVX2_WC_LINECOUNT
42 # include <cpuid.h>
43 #endif
45 #if !defined iswspace && !HAVE_ISWSPACE
46 # define iswspace(wc) \
47 ((wc) == to_uchar (wc) && isspace (to_uchar (wc)))
48 #endif
50 /* The official name of this program (e.g., no 'g' prefix). */
51 #define PROGRAM_NAME "wc"
53 #define AUTHORS \
54 proper_name ("Paul Rubin"), \
55 proper_name ("David MacKenzie")
57 /* Size of atomic reads. */
58 #define BUFFER_SIZE (16 * 1024)
60 static bool
61 wc_lines (char const *file, int fd, uintmax_t *lines_out,
62 uintmax_t *bytes_out);
63 #ifdef USE_AVX2_WC_LINECOUNT
64 /* From wc_avx2.c */
65 extern bool
66 wc_lines_avx2 (char const *file, int fd, uintmax_t *lines_out,
67 uintmax_t *bytes_out);
68 #endif
69 static bool
70 (*wc_lines_p) (char const *file, int fd, uintmax_t *lines_out,
71 uintmax_t *bytes_out) = wc_lines;
73 static bool debug;
75 /* Cumulative number of lines, words, chars and bytes in all files so far.
76 max_line_length is the maximum over all files processed so far. */
77 static uintmax_t total_lines;
78 static uintmax_t total_words;
79 static uintmax_t total_chars;
80 static uintmax_t total_bytes;
81 static uintmax_t max_line_length;
83 /* Which counts to print. */
84 static bool print_lines, print_words, print_chars, print_bytes;
85 static bool print_linelength;
87 /* The print width of each count. */
88 static int number_width;
90 /* True if we have ever read the standard input. */
91 static bool have_read_stdin;
93 /* Used to determine if file size can be determined without reading. */
94 static size_t page_size;
96 /* Enable to _not_ treat non breaking space as a word separator. */
97 static bool posixly_correct;
99 /* The result of calling fstat or stat on a file descriptor or file. */
100 struct fstatus
102 /* If positive, fstat or stat has not been called yet. Otherwise,
103 this is the value returned from fstat or stat. */
104 int failed;
106 /* If FAILED is zero, this is the file's status. */
107 struct stat st;
110 /* For long options that have no equivalent short option, use a
111 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
112 enum
114 DEBUG_PROGRAM_OPTION = CHAR_MAX + 1,
115 FILES0_FROM_OPTION,
116 TOTAL_OPTION,
119 static struct option const longopts[] =
121 {"bytes", no_argument, NULL, 'c'},
122 {"chars", no_argument, NULL, 'm'},
123 {"lines", no_argument, NULL, 'l'},
124 {"words", no_argument, NULL, 'w'},
125 {"debug", no_argument, NULL, DEBUG_PROGRAM_OPTION},
126 {"files0-from", required_argument, NULL, FILES0_FROM_OPTION},
127 {"max-line-length", no_argument, NULL, 'L'},
128 {"total", required_argument, NULL, TOTAL_OPTION},
129 {GETOPT_HELP_OPTION_DECL},
130 {GETOPT_VERSION_OPTION_DECL},
131 {NULL, 0, NULL, 0}
134 enum total_type
136 total_auto, /* 0: default or --total=auto */
137 total_always, /* 1: --total=always */
138 total_only, /* 2: --total=only */
139 total_never /* 3: --total=never */
141 static char const *const total_args[] =
143 "auto", "always", "only", "never", NULL
145 static enum total_type const total_types[] =
147 total_auto, total_always, total_only, total_never
149 ARGMATCH_VERIFY (total_args, total_types);
150 static enum total_type total_mode = total_auto;
152 #ifdef USE_AVX2_WC_LINECOUNT
153 static bool
154 avx2_supported (void)
156 unsigned int eax = 0;
157 unsigned int ebx = 0;
158 unsigned int ecx = 0;
159 unsigned int edx = 0;
160 bool getcpuid_ok = false;
161 bool avx_enabled = false;
163 if (__get_cpuid (1, &eax, &ebx, &ecx, &edx))
165 getcpuid_ok = true;
166 if (ecx & bit_OSXSAVE)
167 avx_enabled = true; /* Support is not disabled. */
171 if (avx_enabled)
173 eax = ebx = ecx = edx = 0;
174 if (! __get_cpuid_count (7, 0, &eax, &ebx, &ecx, &edx))
175 getcpuid_ok = false;
176 else
178 if (! (ebx & bit_AVX2))
179 avx_enabled = false; /* Hardware doesn't support it. */
184 if (! getcpuid_ok)
186 if (debug)
187 error (0, 0, "%s", _("failed to get cpuid"));
188 return false;
190 else if (! avx_enabled)
192 if (debug)
193 error (0, 0, "%s", _("avx2 support not detected"));
194 return false;
196 else
198 if (debug)
199 error (0, 0, "%s", _("using avx2 hardware support"));
200 return true;
203 #endif
205 void
206 usage (int status)
208 if (status != EXIT_SUCCESS)
209 emit_try_help ();
210 else
212 printf (_("\
213 Usage: %s [OPTION]... [FILE]...\n\
214 or: %s [OPTION]... --files0-from=F\n\
216 program_name, program_name);
217 fputs (_("\
218 Print newline, word, and byte counts for each FILE, and a total line if\n\
219 more than one FILE is specified. A word is a non-zero-length sequence of\n\
220 printable characters delimited by white space.\n\
221 "), stdout);
223 emit_stdin_note ();
225 fputs (_("\
227 The options below may be used to select which counts are printed, always in\n\
228 the following order: newline, word, character, byte, maximum line length.\n\
229 -c, --bytes print the byte counts\n\
230 -m, --chars print the character counts\n\
231 -l, --lines print the newline counts\n\
232 "), stdout);
233 fputs (_("\
234 --files0-from=F read input from the files specified by\n\
235 NUL-terminated names in file F;\n\
236 If F is - then read names from standard input\n\
237 -L, --max-line-length print the maximum display width\n\
238 -w, --words print the word counts\n\
239 "), stdout);
240 fputs (_("\
241 --total=WHEN when to print a line with total counts;\n\
242 WHEN can be: auto, always, only, never\n\
243 "), stdout);
244 fputs (HELP_OPTION_DESCRIPTION, stdout);
245 fputs (VERSION_OPTION_DESCRIPTION, stdout);
246 emit_ancillary_info (PROGRAM_NAME);
248 exit (status);
251 /* Return non zero if a non breaking space. */
252 ATTRIBUTE_PURE
253 static int
254 iswnbspace (wint_t wc)
256 return ! posixly_correct
257 && (wc == 0x00A0 || wc == 0x2007
258 || wc == 0x202F || wc == 0x2060);
261 static int
262 isnbspace (int c)
264 return iswnbspace (btowc (c));
267 /* FILE is the name of the file (or NULL for standard input)
268 associated with the specified counters. */
269 static void
270 write_counts (uintmax_t lines,
271 uintmax_t words,
272 uintmax_t chars,
273 uintmax_t bytes,
274 uintmax_t linelength,
275 char const *file)
277 static char const format_sp_int[] = " %*s";
278 char const *format_int = format_sp_int + 1;
279 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
281 if (print_lines)
283 printf (format_int, number_width, umaxtostr (lines, buf));
284 format_int = format_sp_int;
286 if (print_words)
288 printf (format_int, number_width, umaxtostr (words, buf));
289 format_int = format_sp_int;
291 if (print_chars)
293 printf (format_int, number_width, umaxtostr (chars, buf));
294 format_int = format_sp_int;
296 if (print_bytes)
298 printf (format_int, number_width, umaxtostr (bytes, buf));
299 format_int = format_sp_int;
301 if (print_linelength)
303 printf (format_int, number_width, umaxtostr (linelength, buf));
305 if (file)
306 printf (" %s", strchr (file, '\n') ? quotef (file) : file);
307 putchar ('\n');
310 static bool
311 wc_lines (char const *file, int fd, uintmax_t *lines_out, uintmax_t *bytes_out)
313 size_t bytes_read;
314 uintmax_t lines, bytes;
315 char buf[BUFFER_SIZE + 1];
316 bool long_lines = false;
318 if (!lines_out || !bytes_out)
320 return false;
323 lines = bytes = 0;
325 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
328 if (bytes_read == SAFE_READ_ERROR)
330 error (0, errno, "%s", quotef (file));
331 return false;
334 bytes += bytes_read;
336 char *p = buf;
337 char *end = buf + bytes_read;
338 uintmax_t plines = lines;
340 if (! long_lines)
342 /* Avoid function call overhead for shorter lines. */
343 while (p != end)
344 lines += *p++ == '\n';
346 else
348 /* rawmemchr is more efficient with longer lines. */
349 *end = '\n';
350 while ((p = rawmemchr (p, '\n')) < end)
352 ++p;
353 ++lines;
357 /* If the average line length in the block is >= 15, then use
358 memchr for the next block, where system specific optimizations
359 may outweigh function call overhead.
360 FIXME: This line length was determined in 2015, on both
361 x86_64 and ppc64, but it's worth re-evaluating in future with
362 newer compilers, CPUs, or memchr() implementations etc. */
363 if (lines - plines <= bytes_read / 15)
364 long_lines = true;
365 else
366 long_lines = false;
369 *bytes_out = bytes;
370 *lines_out = lines;
372 return true;
375 /* Count words. FILE_X is the name of the file (or NULL for standard
376 input) that is open on descriptor FD. *FSTATUS is its status.
377 CURRENT_POS is the current file offset if known, negative if unknown.
378 Return true if successful. */
379 static bool
380 wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
382 bool ok = true;
383 char buf[BUFFER_SIZE + 1];
384 size_t bytes_read;
385 uintmax_t lines, words, chars, bytes, linelength;
386 bool count_bytes, count_chars, count_complicated;
387 char const *file = file_x ? file_x : _("standard input");
389 lines = words = chars = bytes = linelength = 0;
391 /* If in the current locale, chars are equivalent to bytes, we prefer
392 counting bytes, because that's easier. */
393 #if MB_LEN_MAX > 1
394 if (MB_CUR_MAX > 1)
396 count_bytes = print_bytes;
397 count_chars = print_chars;
399 else
400 #endif
402 count_bytes = print_bytes || print_chars;
403 count_chars = false;
405 count_complicated = print_words || print_linelength;
407 /* Advise the kernel of our access pattern only if we will read(). */
408 if (!count_bytes || count_chars || print_lines || count_complicated)
409 fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
411 /* When counting only bytes, save some line- and word-counting
412 overhead. If FD is a 'regular' Unix file, using lseek is enough
413 to get its 'size' in bytes. Otherwise, read blocks of BUFFER_SIZE
414 bytes at a time until EOF. Note that the 'size' (number of bytes)
415 that wc reports is smaller than stats.st_size when the file is not
416 positioned at its beginning. That's why the lseek calls below are
417 necessary. For example the command
418 '(dd ibs=99k skip=1 count=0; ./wc -c) < /etc/group'
419 should make wc report '0' bytes. */
421 if (count_bytes && !count_chars && !print_lines && !count_complicated)
423 bool skip_read = false;
425 if (0 < fstatus->failed)
426 fstatus->failed = fstat (fd, &fstatus->st);
428 /* For sized files, seek to one st_blksize before EOF rather than to EOF.
429 This works better for files in proc-like file systems where
430 the size is only approximate. */
431 if (! fstatus->failed && usable_st_size (&fstatus->st)
432 && 0 <= fstatus->st.st_size)
434 off_t end_pos = fstatus->st.st_size;
435 if (current_pos < 0)
436 current_pos = lseek (fd, 0, SEEK_CUR);
438 if (end_pos % page_size)
440 /* We only need special handling of /proc and /sys files etc.
441 when they're a multiple of PAGE_SIZE. In the common case
442 for files with st_size not a multiple of PAGE_SIZE,
443 it's more efficient and accurate to use st_size.
445 Be careful here. The current position may actually be
446 beyond the end of the file. As in the example above. */
448 bytes = end_pos < current_pos ? 0 : end_pos - current_pos;
449 skip_read = true;
451 else
453 off_t hi_pos = end_pos - end_pos % (ST_BLKSIZE (fstatus->st) + 1);
454 if (0 <= current_pos && current_pos < hi_pos
455 && 0 <= lseek (fd, hi_pos, SEEK_CUR))
456 bytes = hi_pos - current_pos;
460 if (! skip_read)
462 fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
463 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
465 if (bytes_read == SAFE_READ_ERROR)
467 error (0, errno, "%s", quotef (file));
468 ok = false;
469 break;
471 bytes += bytes_read;
475 else if (!count_chars && !count_complicated)
477 #ifdef USE_AVX2_WC_LINECOUNT
478 if (avx2_supported ())
479 wc_lines_p = wc_lines_avx2;
480 #endif
482 /* Use a separate loop when counting only lines or lines and bytes --
483 but not chars or words. */
484 ok = wc_lines_p (file, fd, &lines, &bytes);
486 #if MB_LEN_MAX > 1
487 # define SUPPORT_OLD_MBRTOWC 1
488 else if (MB_CUR_MAX > 1)
490 bool in_word = false;
491 uintmax_t linepos = 0;
492 mbstate_t state = { 0, };
493 bool in_shift = false;
494 # if SUPPORT_OLD_MBRTOWC
495 /* Back-up the state before each multibyte character conversion and
496 move the last incomplete character of the buffer to the front
497 of the buffer. This is needed because we don't know whether
498 the 'mbrtowc' function updates the state when it returns -2, --
499 this is the ISO C 99 and glibc-2.2 behaviour - or not - amended
500 ANSI C, glibc-2.1 and Solaris 5.7 behaviour. We don't have an
501 autoconf test for this, yet. */
502 size_t prev = 0; /* number of bytes carried over from previous round */
503 # else
504 const size_t prev = 0;
505 # endif
507 while ((bytes_read = safe_read (fd, buf + prev, BUFFER_SIZE - prev)) > 0)
509 char const *p;
510 # if SUPPORT_OLD_MBRTOWC
511 mbstate_t backup_state;
512 # endif
513 if (bytes_read == SAFE_READ_ERROR)
515 error (0, errno, "%s", quotef (file));
516 ok = false;
517 break;
520 bytes += bytes_read;
521 p = buf;
522 bytes_read += prev;
525 wchar_t wide_char;
526 size_t n;
527 bool wide = true;
529 if (!in_shift && is_basic (*p))
531 /* Handle most ASCII characters quickly, without calling
532 mbrtowc(). */
533 n = 1;
534 wide_char = *p;
535 wide = false;
537 else
539 in_shift = true;
540 # if SUPPORT_OLD_MBRTOWC
541 backup_state = state;
542 # endif
543 n = mbrtowc (&wide_char, p, bytes_read, &state);
544 if (n == (size_t) -2)
546 # if SUPPORT_OLD_MBRTOWC
547 state = backup_state;
548 # endif
549 break;
551 if (n == (size_t) -1)
553 /* Remember that we read a byte, but don't complain
554 about the error. Because of the decoding error,
555 this is a considered to be byte but not a
556 character (that is, chars is not incremented). */
557 p++;
558 bytes_read--;
559 continue;
561 if (mbsinit (&state))
562 in_shift = false;
563 if (n == 0)
565 wide_char = 0;
566 n = 1;
570 switch (wide_char)
572 case '\n':
573 lines++;
574 FALLTHROUGH;
575 case '\r':
576 case '\f':
577 if (linepos > linelength)
578 linelength = linepos;
579 linepos = 0;
580 goto mb_word_separator;
581 case '\t':
582 linepos += 8 - (linepos % 8);
583 goto mb_word_separator;
584 case ' ':
585 linepos++;
586 FALLTHROUGH;
587 case '\v':
588 mb_word_separator:
589 words += in_word;
590 in_word = false;
591 break;
592 default:
593 if (wide && iswprint (wide_char))
595 /* wcwidth can be expensive on OSX for example,
596 so avoid if uneeded. */
597 if (print_linelength)
599 int width = wcwidth (wide_char);
600 if (width > 0)
601 linepos += width;
603 if (iswspace (wide_char) || iswnbspace (wide_char))
604 goto mb_word_separator;
605 in_word = true;
607 else if (!wide && isprint (to_uchar (*p)))
609 linepos++;
610 if (isspace (to_uchar (*p)))
611 goto mb_word_separator;
612 in_word = true;
614 break;
617 p += n;
618 bytes_read -= n;
619 chars++;
621 while (bytes_read > 0);
623 # if SUPPORT_OLD_MBRTOWC
624 if (bytes_read > 0)
626 if (bytes_read == BUFFER_SIZE)
628 /* Encountered a very long redundant shift sequence. */
629 p++;
630 bytes_read--;
632 memmove (buf, p, bytes_read);
634 prev = bytes_read;
635 # endif
637 if (linepos > linelength)
638 linelength = linepos;
639 words += in_word;
641 #endif
642 else
644 bool in_word = false;
645 uintmax_t linepos = 0;
647 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
649 char const *p = buf;
650 if (bytes_read == SAFE_READ_ERROR)
652 error (0, errno, "%s", quotef (file));
653 ok = false;
654 break;
657 bytes += bytes_read;
660 switch (*p++)
662 case '\n':
663 lines++;
664 FALLTHROUGH;
665 case '\r':
666 case '\f':
667 if (linepos > linelength)
668 linelength = linepos;
669 linepos = 0;
670 goto word_separator;
671 case '\t':
672 linepos += 8 - (linepos % 8);
673 goto word_separator;
674 case ' ':
675 linepos++;
676 FALLTHROUGH;
677 case '\v':
678 word_separator:
679 words += in_word;
680 in_word = false;
681 break;
682 default:
683 if (isprint (to_uchar (p[-1])))
685 linepos++;
686 if (isspace (to_uchar (p[-1]))
687 || isnbspace (to_uchar (p[-1])))
688 goto word_separator;
689 in_word = true;
691 break;
694 while (--bytes_read);
696 if (linepos > linelength)
697 linelength = linepos;
698 words += in_word;
701 if (count_chars < print_chars)
702 chars = bytes;
704 if (total_mode != total_only)
705 write_counts (lines, words, chars, bytes, linelength, file_x);
706 total_lines += lines;
707 total_words += words;
708 total_chars += chars;
709 total_bytes += bytes;
710 if (linelength > max_line_length)
711 max_line_length = linelength;
713 return ok;
716 static bool
717 wc_file (char const *file, struct fstatus *fstatus)
719 if (! file || STREQ (file, "-"))
721 have_read_stdin = true;
722 xset_binary_mode (STDIN_FILENO, O_BINARY);
723 return wc (STDIN_FILENO, file, fstatus, -1);
725 else
727 int fd = open (file, O_RDONLY | O_BINARY);
728 if (fd == -1)
730 error (0, errno, "%s", quotef (file));
731 return false;
733 else
735 bool ok = wc (fd, file, fstatus, 0);
736 if (close (fd) != 0)
738 error (0, errno, "%s", quotef (file));
739 return false;
741 return ok;
746 /* Return the file status for the NFILES files addressed by FILE.
747 Optimize the case where only one number is printed, for just one
748 file; in that case we can use a print width of 1, so we don't need
749 to stat the file. Handle the case of (nfiles == 0) in the same way;
750 that happens when we don't know how long the list of file names will be. */
752 static struct fstatus *
753 get_input_fstatus (size_t nfiles, char *const *file)
755 struct fstatus *fstatus = xnmalloc (nfiles ? nfiles : 1, sizeof *fstatus);
757 if (nfiles == 0
758 || (nfiles == 1
759 && ((print_lines + print_words + print_chars
760 + print_bytes + print_linelength)
761 == 1)))
762 fstatus[0].failed = 1;
763 else
765 for (size_t i = 0; i < nfiles; i++)
766 fstatus[i].failed = (! file[i] || STREQ (file[i], "-")
767 ? fstat (STDIN_FILENO, &fstatus[i].st)
768 : stat (file[i], &fstatus[i].st));
771 return fstatus;
774 /* Return a print width suitable for the NFILES files whose status is
775 recorded in FSTATUS. Optimize the same special case that
776 get_input_fstatus optimizes. */
778 ATTRIBUTE_PURE
779 static int
780 compute_number_width (size_t nfiles, struct fstatus const *fstatus)
782 int width = 1;
784 if (0 < nfiles && fstatus[0].failed <= 0)
786 int minimum_width = 1;
787 uintmax_t regular_total = 0;
789 for (size_t i = 0; i < nfiles; i++)
790 if (! fstatus[i].failed)
792 if (S_ISREG (fstatus[i].st.st_mode))
793 regular_total += fstatus[i].st.st_size;
794 else
795 minimum_width = 7;
798 for (; 10 <= regular_total; regular_total /= 10)
799 width++;
800 if (width < minimum_width)
801 width = minimum_width;
804 return width;
809 main (int argc, char **argv)
811 bool ok;
812 int optc;
813 size_t nfiles;
814 char **files;
815 char *files_from = NULL;
816 struct fstatus *fstatus;
817 struct Tokens tok;
819 initialize_main (&argc, &argv);
820 set_program_name (argv[0]);
821 setlocale (LC_ALL, "");
822 bindtextdomain (PACKAGE, LOCALEDIR);
823 textdomain (PACKAGE);
825 atexit (close_stdout);
827 page_size = getpagesize ();
828 /* Line buffer stdout to ensure lines are written atomically and immediately
829 so that processes running in parallel do not intersperse their output. */
830 setvbuf (stdout, NULL, _IOLBF, 0);
832 posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
834 print_lines = print_words = print_chars = print_bytes = false;
835 print_linelength = false;
836 total_lines = total_words = total_chars = total_bytes = max_line_length = 0;
838 while ((optc = getopt_long (argc, argv, "clLmw", longopts, NULL)) != -1)
839 switch (optc)
841 case 'c':
842 print_bytes = true;
843 break;
845 case 'm':
846 print_chars = true;
847 break;
849 case 'l':
850 print_lines = true;
851 break;
853 case 'w':
854 print_words = true;
855 break;
857 case 'L':
858 print_linelength = true;
859 break;
861 case DEBUG_PROGRAM_OPTION:
862 debug = true;
863 break;
865 case FILES0_FROM_OPTION:
866 files_from = optarg;
867 break;
869 case TOTAL_OPTION:
870 total_mode = XARGMATCH ("--total", optarg, total_args, total_types);
871 break;
873 case_GETOPT_HELP_CHAR;
875 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
877 default:
878 usage (EXIT_FAILURE);
881 if (! (print_lines || print_words || print_chars || print_bytes
882 || print_linelength))
883 print_lines = print_words = print_bytes = true;
885 bool read_tokens = false;
886 struct argv_iterator *ai;
887 if (files_from)
889 FILE *stream;
891 /* When using --files0-from=F, you may not specify any files
892 on the command-line. */
893 if (optind < argc)
895 error (0, 0, _("extra operand %s"), quoteaf (argv[optind]));
896 fprintf (stderr, "%s\n",
897 _("file operands cannot be combined with --files0-from"));
898 usage (EXIT_FAILURE);
901 if (STREQ (files_from, "-"))
902 stream = stdin;
903 else
905 stream = fopen (files_from, "r");
906 if (stream == NULL)
907 die (EXIT_FAILURE, errno, _("cannot open %s for reading"),
908 quoteaf (files_from));
911 /* Read the file list into RAM if we can detect its size and that
912 size is reasonable. Otherwise, we'll read a name at a time. */
913 struct stat st;
914 if (fstat (fileno (stream), &st) == 0
915 && S_ISREG (st.st_mode)
916 && st.st_size <= MIN (10 * 1024 * 1024, physmem_available () / 2))
918 read_tokens = true;
919 readtokens0_init (&tok);
920 if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
921 die (EXIT_FAILURE, 0, _("cannot read file names from %s"),
922 quoteaf (files_from));
923 files = tok.tok;
924 nfiles = tok.n_tok;
925 ai = argv_iter_init_argv (files);
927 else
929 files = NULL;
930 nfiles = 0;
931 ai = argv_iter_init_stream (stream);
934 else
936 static char *stdin_only[] = { NULL };
937 files = (optind < argc ? argv + optind : stdin_only);
938 nfiles = (optind < argc ? argc - optind : 1);
939 ai = argv_iter_init_argv (files);
942 if (!ai)
943 xalloc_die ();
945 fstatus = get_input_fstatus (nfiles, files);
946 if (total_mode == total_only)
947 number_width = 1; /* No extra padding, since no alignment requirement. */
948 else
949 number_width = compute_number_width (nfiles, fstatus);
951 ok = true;
952 for (int i = 0; /* */; i++)
954 bool skip_file = false;
955 enum argv_iter_err ai_err;
956 char *file_name = argv_iter (ai, &ai_err);
957 if (!file_name)
959 switch (ai_err)
961 case AI_ERR_EOF:
962 goto argv_iter_done;
963 case AI_ERR_READ:
964 error (0, errno, _("%s: read error"),
965 quotef (files_from));
966 ok = false;
967 goto argv_iter_done;
968 case AI_ERR_MEM:
969 xalloc_die ();
970 default:
971 assert (!"unexpected error code from argv_iter");
974 if (files_from && STREQ (files_from, "-") && STREQ (file_name, "-"))
976 /* Give a better diagnostic in an unusual case:
977 printf - | wc --files0-from=- */
978 error (0, 0, _("when reading file names from stdin, "
979 "no file name of %s allowed"),
980 quoteaf (file_name));
981 skip_file = true;
984 if (!file_name[0])
986 /* Diagnose a zero-length file name. When it's one
987 among many, knowing the record number may help.
988 FIXME: currently print the record number only with
989 --files0-from=FILE. Maybe do it for argv, too? */
990 if (files_from == NULL)
991 error (0, 0, "%s", _("invalid zero-length file name"));
992 else
994 /* Using the standard 'filename:line-number:' prefix here is
995 not totally appropriate, since NUL is the separator, not NL,
996 but it might be better than nothing. */
997 unsigned long int file_number = argv_iter_n_args (ai);
998 error (0, 0, "%s:%lu: %s", quotef (files_from),
999 file_number, _("invalid zero-length file name"));
1001 skip_file = true;
1004 if (skip_file)
1005 ok = false;
1006 else
1007 ok &= wc_file (file_name, &fstatus[nfiles ? i : 0]);
1009 if (! nfiles)
1010 fstatus[0].failed = 1;
1012 argv_iter_done:
1014 /* No arguments on the command line is fine. That means read from stdin.
1015 However, no arguments on the --files0-from input stream is an error
1016 means don't read anything. */
1017 if (ok && !files_from && argv_iter_n_args (ai) == 0)
1018 ok &= wc_file (NULL, &fstatus[0]);
1020 if (read_tokens)
1021 readtokens0_free (&tok);
1023 if (total_mode != total_never
1024 && (total_mode != total_auto || 1 < argv_iter_n_args (ai)))
1025 write_counts (total_lines, total_words, total_chars, total_bytes,
1026 max_line_length,
1027 total_mode != total_only ? _("total") : NULL);
1029 argv_iter_free (ai);
1031 free (fstatus);
1033 if (have_read_stdin && close (STDIN_FILENO) != 0)
1034 die (EXIT_FAILURE, errno, "-");
1036 return ok ? EXIT_SUCCESS : EXIT_FAILURE;