1 /* $NetBSD: wc.c,v 1.31 2008/07/21 14:19:28 lukem Exp $ */
4 * Copyright (c) 1980, 1987, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 #include <sys/cdefs.h>
34 __COPYRIGHT("@(#) Copyright (c) 1980, 1987, 1991, 1993\
35 The Regents of the University of California. All rights reserved.");
40 static char sccsid
[] = "@(#)wc.c 8.2 (Berkeley) 5/2/95";
42 __RCSID("$NetBSD: wc.c,v 1.31 2008/07/21 14:19:28 lukem Exp $");
46 /* wc line, word and char count */
48 #include <sys/param.h>
65 typedef u_long wc_count_t
;
66 # define WCFMT " %7lu"
67 # define WCCAST unsigned long
69 typedef u_quad_t wc_count_t
;
70 # define WCFMT " %7llu"
71 # define WCCAST unsigned long long
74 static wc_count_t tlinect
, twordct
, tcharct
;
75 static int doline
, doword
, dobyte
, dochar
;
78 static void cnt(const char *);
79 static void print_counts(wc_count_t
, wc_count_t
, wc_count_t
, const char *);
80 static void usage(void);
81 static size_t do_mb(wchar_t *, const char *, size_t, mbstate_t *,
82 size_t *, const char *);
83 int main(int, char *[]);
86 main(int argc
, char *argv
[])
90 setlocale(LC_ALL
, "");
92 while ((ch
= getopt(argc
, argv
, "lwcm")) != -1)
115 /* Wc's flags are on by default. */
116 if (doline
+ doword
+ dobyte
+ dochar
== 0)
117 doline
= doword
= dobyte
= 1;
122 int dototal
= (argc
> 1);
129 print_counts(tlinect
, twordct
, tcharct
, "total");
136 do_mb(wchar_t *wc
, const char *p
, size_t len
, mbstate_t *st
,
137 size_t *retcnt
, const char *file
)
143 r
= mbrtowc(wc
, p
, len
, st
);
144 if (r
== (size_t)-1) {
145 warnx("%s: invalid byte sequence", file
);
148 /* XXX skip 1 byte */
151 memset(st
, 0, sizeof(*st
));
153 } else if (r
== (size_t)-2)
170 cnt(const char *file
)
172 u_char buf
[MAXBSIZE
];
173 wchar_t wbuf
[MAXBSIZE
];
175 wc_count_t charct
, linect
, wordct
;
179 const char *name
; /* filename or <stdin> */
181 int fd
, gotsp
, len
= 0;
183 linect
= wordct
= charct
= 0;
185 if ((fd
= open(file
, O_RDONLY
, 0)) < 0) {
196 if (dochar
|| doword
)
197 memset(&st
, 0, sizeof(st
));
201 * line counting is split out because it's a lot
202 * faster to get lines than to get words, since
203 * the word count requires some logic.
205 if (doline
|| dochar
) {
206 while ((len
= read(fd
, buf
, MAXBSIZE
)) > 0) {
210 r
= do_mb(0, (char *)buf
, (size_t)len
,
216 for (C
= buf
; len
--; ++C
)
223 * if all we need is the number of characters and
224 * it's a directory or a regular or linked file, just
225 * stat the puppy. We avoid testing for it not being
226 * a special device in case someone adds a new type
230 if (fstat(fd
, &sb
)) {
234 if (S_ISREG(sb
.st_mode
) ||
235 S_ISLNK(sb
.st_mode
) ||
236 S_ISDIR(sb
.st_mode
)) {
240 read(fd
, buf
, MAXBSIZE
)) > 0)
246 /* do it the hard way... */
248 while ((len
= read(fd
, buf
, MAXBSIZE
)) > 0) {
251 r
= do_mb(wbuf
, (char *)buf
, (size_t)len
, &st
, &wlen
,
257 for (WC
= wbuf
; wlen
--; ++WC
) {
265 * This line implements the POSIX
266 * spec, i.e. a word is a "maximal
267 * string of characters delimited by
268 * whitespace." Notice nothing was
269 * said about a character being
270 * printing or non-printing.
285 if (dochar
&& r
== (size_t)-2) {
286 warnx("%s: incomplete multibyte character", name
);
290 print_counts(linect
, wordct
, charct
, file
);
293 * don't bother checkint doline, doword, or dobyte --- speeds
307 print_counts(wc_count_t lines
, wc_count_t words
, wc_count_t chars
,
312 printf(WCFMT
, (WCCAST
)lines
);
314 printf(WCFMT
, (WCCAST
)words
);
315 if (dobyte
|| dochar
)
316 printf(WCFMT
, (WCCAST
)chars
);
319 printf(" %s\n", name
);
328 (void)fprintf(stderr
, "usage: wc [-c | -m] [-lw] [file ...]\n");