1 /* wc - print the number of lines, words, and bytes in files
2 Copyright (C) 1985, 1991, 1995-2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Paul Rubin, phr@ocf.berkeley.edu
18 and David MacKenzie, djm@gnu.ai.mit.edu. */
25 #include <sys/types.h>
30 #include "argv-iter.h"
37 #include "readtokens0.h"
38 #include "safe-read.h"
41 #if !defined iswspace && !HAVE_ISWSPACE
42 # define iswspace(wc) \
43 ((wc) == to_uchar (wc) && isspace (to_uchar (wc)))
46 /* The official name of this program (e.g., no `g' prefix). */
47 #define PROGRAM_NAME "wc"
50 proper_name ("Paul Rubin"), \
51 proper_name ("David MacKenzie")
53 /* Size of atomic reads. */
54 #define BUFFER_SIZE (16 * 1024)
56 /* Cumulative number of lines, words, chars and bytes in all files so far.
57 max_line_length is the maximum over all files processed so far. */
58 static uintmax_t total_lines
;
59 static uintmax_t total_words
;
60 static uintmax_t total_chars
;
61 static uintmax_t total_bytes
;
62 static uintmax_t max_line_length
;
64 /* Which counts to print. */
65 static bool print_lines
, print_words
, print_chars
, print_bytes
;
66 static bool print_linelength
;
68 /* The print width of each count. */
69 static int number_width
;
71 /* True if we have ever read the standard input. */
72 static bool have_read_stdin
;
74 /* The result of calling fstat or stat on a file descriptor or file. */
77 /* If positive, fstat or stat has not been called yet. Otherwise,
78 this is the value returned from fstat or stat. */
81 /* If FAILED is zero, this is the file's status. */
85 /* For long options that have no equivalent short option, use a
86 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
89 FILES0_FROM_OPTION
= CHAR_MAX
+ 1
92 static struct option
const longopts
[] =
94 {"bytes", no_argument
, NULL
, 'c'},
95 {"chars", no_argument
, NULL
, 'm'},
96 {"lines", no_argument
, NULL
, 'l'},
97 {"words", no_argument
, NULL
, 'w'},
98 {"files0-from", required_argument
, NULL
, FILES0_FROM_OPTION
},
99 {"max-line-length", no_argument
, NULL
, 'L'},
100 {GETOPT_HELP_OPTION_DECL
},
101 {GETOPT_VERSION_OPTION_DECL
},
108 if (status
!= EXIT_SUCCESS
)
109 fprintf (stderr
, _("Try `%s --help' for more information.\n"),
114 Usage: %s [OPTION]... [FILE]...\n\
115 or: %s [OPTION]... --files0-from=F\n\
117 program_name
, program_name
);
119 Print newline, word, and byte counts for each FILE, and a total line if\n\
120 more than one FILE is specified. With no FILE, or when FILE is -,\n\
121 read standard input. A word is a non-zero-length sequence of characters\n\
122 delimited by white space.\n\
123 -c, --bytes print the byte counts\n\
124 -m, --chars print the character counts\n\
125 -l, --lines print the newline counts\n\
128 --files0-from=F read input from the files specified by\n\
129 NUL-terminated names in file F;\n\
130 If F is - then read names from standard input\n\
131 -L, --max-line-length print the length of the longest line\n\
132 -w, --words print the word counts\n\
134 fputs (HELP_OPTION_DESCRIPTION
, stdout
);
135 fputs (VERSION_OPTION_DESCRIPTION
, stdout
);
136 emit_ancillary_info ();
141 /* FILE is the name of the file (or NULL for standard input)
142 associated with the specified counters. */
144 write_counts (uintmax_t lines
,
148 uintmax_t linelength
,
151 static char const format_sp_int
[] = " %*s";
152 char const *format_int
= format_sp_int
+ 1;
153 char buf
[INT_BUFSIZE_BOUND (uintmax_t)];
157 printf (format_int
, number_width
, umaxtostr (lines
, buf
));
158 format_int
= format_sp_int
;
162 printf (format_int
, number_width
, umaxtostr (words
, buf
));
163 format_int
= format_sp_int
;
167 printf (format_int
, number_width
, umaxtostr (chars
, buf
));
168 format_int
= format_sp_int
;
172 printf (format_int
, number_width
, umaxtostr (bytes
, buf
));
173 format_int
= format_sp_int
;
175 if (print_linelength
)
177 printf (format_int
, number_width
, umaxtostr (linelength
, buf
));
180 printf (" %s", file
);
184 /* Count words. FILE_X is the name of the file (or NULL for standard
185 input) that is open on descriptor FD. *FSTATUS is its status.
186 Return true if successful. */
188 wc (int fd
, char const *file_x
, struct fstatus
*fstatus
)
191 char buf
[BUFFER_SIZE
+ 1];
193 uintmax_t lines
, words
, chars
, bytes
, linelength
;
194 bool count_bytes
, count_chars
, count_complicated
;
195 char const *file
= file_x
? file_x
: _("standard input");
197 lines
= words
= chars
= bytes
= linelength
= 0;
199 /* If in the current locale, chars are equivalent to bytes, we prefer
200 counting bytes, because that's easier. */
204 count_bytes
= print_bytes
;
205 count_chars
= print_chars
;
210 count_bytes
= print_bytes
|| print_chars
;
213 count_complicated
= print_words
|| print_linelength
;
215 /* Advise the kernel of our access pattern only if we will read(). */
216 if (!count_bytes
|| count_chars
|| print_lines
|| count_complicated
)
217 fdadvise (fd
, 0, 0, FADVISE_SEQUENTIAL
);
219 /* When counting only bytes, save some line- and word-counting
220 overhead. If FD is a `regular' Unix file, using lseek is enough
221 to get its `size' in bytes. Otherwise, read blocks of BUFFER_SIZE
222 bytes at a time until EOF. Note that the `size' (number of bytes)
223 that wc reports is smaller than stats.st_size when the file is not
224 positioned at its beginning. That's why the lseek calls below are
225 necessary. For example the command
226 `(dd ibs=99k skip=1 count=0; ./wc -c) < /etc/group'
227 should make wc report `0' bytes. */
229 if (count_bytes
&& !count_chars
&& !print_lines
&& !count_complicated
)
231 off_t current_pos
, end_pos
;
233 if (0 < fstatus
->failed
)
234 fstatus
->failed
= fstat (fd
, &fstatus
->st
);
236 if (! fstatus
->failed
&& S_ISREG (fstatus
->st
.st_mode
)
237 && (current_pos
= lseek (fd
, (off_t
) 0, SEEK_CUR
)) != -1
238 && (end_pos
= lseek (fd
, (off_t
) 0, SEEK_END
)) != -1)
240 /* Be careful here. The current position may actually be
241 beyond the end of the file. As in the example above. */
242 bytes
= end_pos
< current_pos
? 0 : end_pos
- current_pos
;
246 fdadvise (fd
, 0, 0, FADVISE_SEQUENTIAL
);
247 while ((bytes_read
= safe_read (fd
, buf
, BUFFER_SIZE
)) > 0)
249 if (bytes_read
== SAFE_READ_ERROR
)
251 error (0, errno
, "%s", file
);
259 else if (!count_chars
&& !count_complicated
)
261 /* Use a separate loop when counting only lines or lines and bytes --
262 but not chars or words. */
263 while ((bytes_read
= safe_read (fd
, buf
, BUFFER_SIZE
)) > 0)
267 if (bytes_read
== SAFE_READ_ERROR
)
269 error (0, errno
, "%s", file
);
274 while ((p
= memchr (p
, '\n', (buf
+ bytes_read
) - p
)))
283 # define SUPPORT_OLD_MBRTOWC 1
284 else if (MB_CUR_MAX
> 1)
286 bool in_word
= false;
287 uintmax_t linepos
= 0;
288 DECLARE_ZEROED_AGGREGATE (mbstate_t, state
);
289 bool in_shift
= false;
290 # if SUPPORT_OLD_MBRTOWC
291 /* Back-up the state before each multibyte character conversion and
292 move the last incomplete character of the buffer to the front
293 of the buffer. This is needed because we don't know whether
294 the `mbrtowc' function updates the state when it returns -2, -
295 this is the ISO C 99 and glibc-2.2 behaviour - or not - amended
296 ANSI C, glibc-2.1 and Solaris 5.7 behaviour. We don't have an
297 autoconf test for this, yet. */
298 size_t prev
= 0; /* number of bytes carried over from previous round */
300 const size_t prev
= 0;
303 while ((bytes_read
= safe_read (fd
, buf
+ prev
, BUFFER_SIZE
- prev
)) > 0)
306 # if SUPPORT_OLD_MBRTOWC
307 mbstate_t backup_state
;
309 if (bytes_read
== SAFE_READ_ERROR
)
311 error (0, errno
, "%s", file
);
324 if (!in_shift
&& is_basic (*p
))
326 /* Handle most ASCII characters quickly, without calling
334 # if SUPPORT_OLD_MBRTOWC
335 backup_state
= state
;
337 n
= mbrtowc (&wide_char
, p
, bytes_read
, &state
);
338 if (n
== (size_t) -2)
340 # if SUPPORT_OLD_MBRTOWC
341 state
= backup_state
;
345 if (n
== (size_t) -1)
347 /* Remember that we read a byte, but don't complain
348 about the error. Because of the decoding error,
349 this is a considered to be byte but not a
350 character (that is, chars is not incremented). */
355 if (mbsinit (&state
))
373 if (linepos
> linelength
)
374 linelength
= linepos
;
376 goto mb_word_separator
;
378 linepos
+= 8 - (linepos
% 8);
379 goto mb_word_separator
;
389 if (iswprint (wide_char
))
391 int width
= wcwidth (wide_char
);
394 if (iswspace (wide_char
))
395 goto mb_word_separator
;
401 while (bytes_read
> 0);
403 # if SUPPORT_OLD_MBRTOWC
406 if (bytes_read
== BUFFER_SIZE
)
408 /* Encountered a very long redundant shift sequence. */
412 memmove (buf
, p
, bytes_read
);
417 if (linepos
> linelength
)
418 linelength
= linepos
;
424 bool in_word
= false;
425 uintmax_t linepos
= 0;
427 while ((bytes_read
= safe_read (fd
, buf
, BUFFER_SIZE
)) > 0)
430 if (bytes_read
== SAFE_READ_ERROR
)
432 error (0, errno
, "%s", file
);
447 if (linepos
> linelength
)
448 linelength
= linepos
;
452 linepos
+= 8 - (linepos
% 8);
463 if (isprint (to_uchar (p
[-1])))
466 if (isspace (to_uchar (p
[-1])))
473 while (--bytes_read
);
475 if (linepos
> linelength
)
476 linelength
= linepos
;
480 if (count_chars
< print_chars
)
483 write_counts (lines
, words
, chars
, bytes
, linelength
, file_x
);
484 total_lines
+= lines
;
485 total_words
+= words
;
486 total_chars
+= chars
;
487 total_bytes
+= bytes
;
488 if (linelength
> max_line_length
)
489 max_line_length
= linelength
;
495 wc_file (char const *file
, struct fstatus
*fstatus
)
497 if (! file
|| STREQ (file
, "-"))
499 have_read_stdin
= true;
500 if (O_BINARY
&& ! isatty (STDIN_FILENO
))
501 xfreopen (NULL
, "rb", stdin
);
502 return wc (STDIN_FILENO
, file
, fstatus
);
506 int fd
= open (file
, O_RDONLY
| O_BINARY
);
509 error (0, errno
, "%s", file
);
514 bool ok
= wc (fd
, file
, fstatus
);
517 error (0, errno
, "%s", file
);
525 /* Return the file status for the NFILES files addressed by FILE.
526 Optimize the case where only one number is printed, for just one
527 file; in that case we can use a print width of 1, so we don't need
528 to stat the file. Handle the case of (nfiles == 0) in the same way;
529 that happens when we don't know how long the list of file names will be. */
531 static struct fstatus
*
532 get_input_fstatus (int nfiles
, char *const *file
)
534 struct fstatus
*fstatus
= xnmalloc (nfiles
? nfiles
: 1, sizeof *fstatus
);
538 && ((print_lines
+ print_words
+ print_chars
539 + print_bytes
+ print_linelength
)
541 fstatus
[0].failed
= 1;
546 for (i
= 0; i
< nfiles
; i
++)
547 fstatus
[i
].failed
= (! file
[i
] || STREQ (file
[i
], "-")
548 ? fstat (STDIN_FILENO
, &fstatus
[i
].st
)
549 : stat (file
[i
], &fstatus
[i
].st
));
555 /* Return a print width suitable for the NFILES files whose status is
556 recorded in FSTATUS. Optimize the same special case that
557 get_input_fstatus optimizes. */
560 compute_number_width (int nfiles
, struct fstatus
const *fstatus
)
564 if (0 < nfiles
&& fstatus
[0].failed
<= 0)
566 int minimum_width
= 1;
567 uintmax_t regular_total
= 0;
570 for (i
= 0; i
< nfiles
; i
++)
571 if (! fstatus
[i
].failed
)
573 if (S_ISREG (fstatus
[i
].st
.st_mode
))
574 regular_total
+= fstatus
[i
].st
.st_size
;
579 for (; 10 <= regular_total
; regular_total
/= 10)
581 if (width
< minimum_width
)
582 width
= minimum_width
;
590 main (int argc
, char **argv
)
596 char *files_from
= NULL
;
597 struct fstatus
*fstatus
;
600 initialize_main (&argc
, &argv
);
601 set_program_name (argv
[0]);
602 setlocale (LC_ALL
, "");
603 bindtextdomain (PACKAGE
, LOCALEDIR
);
604 textdomain (PACKAGE
);
606 atexit (close_stdout
);
608 /* Line buffer stdout to ensure lines are written atomically and immediately
609 so that processes running in parallel do not intersperse their output. */
610 setvbuf (stdout
, NULL
, _IOLBF
, 0);
612 print_lines
= print_words
= print_chars
= print_bytes
= false;
613 print_linelength
= false;
614 total_lines
= total_words
= total_chars
= total_bytes
= max_line_length
= 0;
616 while ((optc
= getopt_long (argc
, argv
, "clLmw", longopts
, NULL
)) != -1)
636 print_linelength
= true;
639 case FILES0_FROM_OPTION
:
643 case_GETOPT_HELP_CHAR
;
645 case_GETOPT_VERSION_CHAR (PROGRAM_NAME
, AUTHORS
);
648 usage (EXIT_FAILURE
);
651 if (! (print_lines
|| print_words
|| print_chars
|| print_bytes
652 || print_linelength
))
653 print_lines
= print_words
= print_bytes
= true;
655 bool read_tokens
= false;
656 struct argv_iterator
*ai
;
661 /* When using --files0-from=F, you may not specify any files
662 on the command-line. */
665 error (0, 0, _("extra operand %s"), quote (argv
[optind
]));
666 fprintf (stderr
, "%s\n",
667 _("file operands cannot be combined with --files0-from"));
668 usage (EXIT_FAILURE
);
671 if (STREQ (files_from
, "-"))
675 stream
= fopen (files_from
, "r");
677 error (EXIT_FAILURE
, errno
, _("cannot open %s for reading"),
681 /* Read the file list into RAM if we can detect its size and that
682 size is reasonable. Otherwise, we'll read a name at a time. */
684 if (fstat (fileno (stream
), &st
) == 0
685 && S_ISREG (st
.st_mode
)
686 && st
.st_size
<= MIN (10 * 1024 * 1024, physmem_available () / 2))
689 readtokens0_init (&tok
);
690 if (! readtokens0 (stream
, &tok
) || fclose (stream
) != 0)
691 error (EXIT_FAILURE
, 0, _("cannot read file names from %s"),
695 ai
= argv_iter_init_argv (files
);
701 ai
= argv_iter_init_stream (stream
);
706 static char *stdin_only
[] = { NULL
};
707 files
= (optind
< argc
? argv
+ optind
: stdin_only
);
708 nfiles
= (optind
< argc
? argc
- optind
: 1);
709 ai
= argv_iter_init_argv (files
);
715 fstatus
= get_input_fstatus (nfiles
, files
);
716 number_width
= compute_number_width (nfiles
, fstatus
);
720 for (i
= 0; /* */; i
++)
722 bool skip_file
= false;
723 enum argv_iter_err ai_err
;
724 char *file_name
= argv_iter (ai
, &ai_err
);
732 error (0, errno
, _("%s: read error"),
733 quotearg_colon (files_from
));
739 assert (!"unexpected error code from argv_iter");
742 if (files_from
&& STREQ (files_from
, "-") && STREQ (file_name
, "-"))
744 /* Give a better diagnostic in an unusual case:
745 printf - | wc --files0-from=- */
746 error (0, 0, _("when reading file names from stdin, "
747 "no file name of %s allowed"),
754 /* Diagnose a zero-length file name. When it's one
755 among many, knowing the record number may help.
756 FIXME: currently print the record number only with
757 --files0-from=FILE. Maybe do it for argv, too? */
758 if (files_from
== NULL
)
759 error (0, 0, "%s", _("invalid zero-length file name"));
762 /* Using the standard `filename:line-number:' prefix here is
763 not totally appropriate, since NUL is the separator, not NL,
764 but it might be better than nothing. */
765 unsigned long int file_number
= argv_iter_n_args (ai
);
766 error (0, 0, "%s:%lu: %s", quotearg_colon (files_from
),
767 file_number
, _("invalid zero-length file name"));
775 ok
&= wc_file (file_name
, &fstatus
[nfiles
? i
: 0]);
779 /* No arguments on the command line is fine. That means read from stdin.
780 However, no arguments on the --files0-from input stream is an error
781 means don't read anything. */
782 if (ok
&& !files_from
&& argv_iter_n_args (ai
) == 0)
783 ok
&= wc_file (NULL
, &fstatus
[0]);
786 readtokens0_free (&tok
);
788 if (1 < argv_iter_n_args (ai
))
789 write_counts (total_lines
, total_words
, total_chars
, total_bytes
,
790 max_line_length
, _("total"));
796 if (have_read_stdin
&& close (STDIN_FILENO
) != 0)
797 error (EXIT_FAILURE
, errno
, "-");
799 exit (ok
? EXIT_SUCCESS
: EXIT_FAILURE
);