Merge illumos-gate
[unleashed.git] / bin / wc / wc.c
blobc2f235cbf798352d816f9803dc79937e22e037d0
1 /* $OpenBSD: wc.c,v 1.22 2018/04/26 12:42:51 guenther Exp $ */
3 /*
4 * Copyright (c) 1980, 1987, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
32 #include <sys/param.h> /* MAXBSIZE */
33 #include <sys/stat.h>
35 #include <fcntl.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <locale.h>
39 #include <ctype.h>
40 #include <err.h>
41 #include <unistd.h>
42 #include <wchar.h>
43 #include <wctype.h>
45 int64_t tlinect, twordct, tcharct;
46 int doline, doword, dochar, multibyte;
47 int rval;
48 extern char *__progname;
50 static void print_counts(int64_t, int64_t, int64_t, char *);
51 static void format_and_print(int64_t);
52 static void cnt(char *);
54 int
55 main(int argc, char *argv[])
57 int ch;
59 setlocale(LC_CTYPE, "");
61 if (pledge("stdio rpath", NULL) == -1)
62 err(1, "pledge");
64 while ((ch = getopt(argc, argv, "lwcm")) != -1)
65 switch(ch) {
66 case 'l':
67 doline = 1;
68 break;
69 case 'w':
70 doword = 1;
71 break;
72 case 'm':
73 if (MB_CUR_MAX > 1)
74 multibyte = 1;
75 /* FALLTHROUGH */
76 case 'c':
77 dochar = 1;
78 break;
79 case '?':
80 default:
81 fprintf(stderr,
82 "usage: %s [-c | -m] [-lw] [file ...]\n",
83 __progname);
84 return 1;
86 argv += optind;
87 argc -= optind;
90 * wc is unusual in that its flags are on by default, so,
91 * if you don't get any arguments, you have to turn them
92 * all on.
94 if (!doline && !doword && !dochar)
95 doline = doword = dochar = 1;
97 if (!*argv) {
98 cnt(NULL);
99 } else {
100 int dototal = (argc > 1);
102 do {
103 cnt(*argv);
104 } while(*++argv);
106 if (dototal)
107 print_counts(tlinect, twordct, tcharct, "total");
110 return rval;
113 static void
114 cnt(char *file)
116 static char *buf;
117 static size_t bufsz;
119 FILE *stream;
120 char *C;
121 wchar_t wc;
122 short gotsp;
123 ssize_t len;
124 int64_t linect, wordct, charct;
125 struct stat sbuf;
126 int fd;
128 linect = wordct = charct = 0;
129 stream = NULL;
130 if (file) {
131 if ((fd = open(file, O_RDONLY, 0)) < 0) {
132 warn("%s", file);
133 rval = 1;
134 return;
136 } else {
137 fd = STDIN_FILENO;
140 if (!doword && !multibyte) {
141 if (bufsz < MAXBSIZE &&
142 (buf = realloc(buf, MAXBSIZE)) == NULL)
143 err(1, NULL);
145 * Line counting is split out because it's a lot
146 * faster to get lines than to get words, since
147 * the word count requires some logic.
149 if (doline) {
150 while ((len = read(fd, buf, MAXBSIZE)) > 0) {
151 charct += len;
152 for (C = buf; len--; ++C)
153 if (*C == '\n')
154 ++linect;
156 if (len == -1) {
157 warn("%s", file);
158 rval = 1;
162 * If all we need is the number of characters and
163 * it's a directory or a regular or linked file, just
164 * stat the puppy. We avoid testing for it not being
165 * a special device in case someone adds a new type
166 * of inode.
168 else if (dochar) {
169 mode_t ifmt;
171 if (fstat(fd, &sbuf)) {
172 warn("%s", file);
173 rval = 1;
174 } else {
175 ifmt = sbuf.st_mode & S_IFMT;
176 if (ifmt == S_IFREG || ifmt == S_IFLNK
177 || ifmt == S_IFDIR) {
178 charct = sbuf.st_size;
179 } else {
180 while ((len = read(fd, buf, MAXBSIZE)) > 0)
181 charct += len;
182 if (len == -1) {
183 warn("%s", file);
184 rval = 1;
189 } else {
190 if (file == NULL)
191 stream = stdin;
192 else if ((stream = fdopen(fd, "r")) == NULL) {
193 warn("%s", file);
194 close(fd);
195 rval = 1;
196 return;
200 * Do it the hard way.
201 * According to POSIX, a word is a "maximal string of
202 * characters delimited by whitespace." Nothing is said
203 * about a character being printing or non-printing.
205 gotsp = 1;
206 while ((len = getline(&buf, &bufsz, stream)) > 0) {
207 if (multibyte) {
208 for (C = buf; *C != '\0'; C += len) {
209 ++charct;
210 len = mbtowc(&wc, C, MB_CUR_MAX);
211 if (len == -1) {
212 mbtowc(NULL, NULL,
213 MB_CUR_MAX);
214 len = 1;
215 wc = L' ';
217 if (iswspace(wc)) {
218 gotsp = 1;
219 if (wc == L'\n')
220 ++linect;
221 } else if (gotsp) {
222 gotsp = 0;
223 ++wordct;
226 } else {
227 charct += len;
228 for (C = buf; *C != '\0'; ++C) {
229 if (isspace((unsigned char)*C)) {
230 gotsp = 1;
231 if (*C == '\n')
232 ++linect;
233 } else if (gotsp) {
234 gotsp = 0;
235 ++wordct;
240 if (ferror(stream)) {
241 warn("%s", file);
242 rval = 1;
246 print_counts(linect, wordct, charct, file);
249 * Don't bother checking doline, doword, or dochar -- speeds
250 * up the common case
252 tlinect += linect;
253 twordct += wordct;
254 tcharct += charct;
256 if ((stream == NULL ? close(fd) : fclose(stream)) != 0) {
257 warn("%s", file);
258 rval = 1;
262 static void
263 format_and_print(int64_t v)
265 printf(" %7lld", v);
268 static void
269 print_counts(int64_t lines, int64_t words, int64_t chars, char *name)
271 if (doline)
272 format_and_print(lines);
273 if (doword)
274 format_and_print(words);
275 if (dochar)
276 format_and_print(chars);
278 if (name)
279 printf(" %s\n", name);
280 else
281 printf("\n");