comm: add -Wall
[unleashed.git] / usr / src / cmd / fmt / fmt.c
blobb50bc8157e1d0d94f7fd1e6cecac37c3fcfe65ea
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <ctype.h>
33 #include <wctype.h>
34 #include <widec.h>
35 #include <dlfcn.h>
36 #include <locale.h>
37 #include <sys/param.h>
38 #include <string.h>
41 * fmt -- format the concatenation of input files or standard input
42 * onto standard output. Designed for use with Mail ~|
44 * Syntax: fmt [ -width | -w width ] [ -cs ] [ name ... ]
45 * Author: Kurt Shoens (UCB) 12/7/78
48 #define NOSTR ((wchar_t *)0) /* Null string pointer for lint */
49 #define MAXLINES 100 /* maximum mail header lines to verify */
51 wchar_t outbuf[BUFSIZ]; /* Sandbagged output line image */
52 wchar_t *outp; /* Pointer in above */
53 int filler; /* Filler amount in outbuf */
54 char sobuf[BUFSIZ]; /* Global buffer */
56 int pfx; /* Current leading blank count */
57 int width = 72; /* Width that we will not exceed */
58 int nojoin = 0; /* split lines only, don't join short ones */
59 int errs = 0; /* Current number of errors */
61 enum crown_type {c_none, c_reset, c_head, c_lead, c_fixup, c_body};
62 enum crown_type crown_state; /* Crown margin state */
63 int crown_head; /* The header offset */
64 int crown_body; /* The body offset */
65 /* currently-known initial strings found in mail headers */
66 wchar_t *headnames[] = {
67 L"Apparently-To", L"Bcc", L"bcc", L"Cc", L"cc", L"Confirmed-By",
68 L"Content", L"content-length", L"From", L"Date", L"id",
69 L"Message-I", L"MIME-Version", L"Precedence", L"Return-Path",
70 L"Received", L"Reply-To", L"Status", L"Subject", L"To", L"X-IMAP",
71 L"X-Lines", L"X-Sender", L"X-Sun", L"X-Status", L"X-UID",
72 0};
74 enum hdr_type {
75 off, /* mail header processing is off */
76 not_in_hdr, /* not currently processing a mail header */
77 in_hdr, /* currently filling hdrbuf with potential hdr lines */
78 flush_hdr, /* flush hdrbuf; not a header, no special processing */
79 do_hdr /* process hdrbuf as a mail header */
81 /* current state of hdrbuf */
82 enum hdr_type hdr_state = not_in_hdr;
84 wchar_t *hdrbuf[MAXLINES]; /* buffer to hold potential mail header lines */
85 int h_lines; /* index into lines of hdrbuf */
87 void (*(split))(wchar_t []);
88 extern int scrwidth(wchar_t);
89 extern boolean_t is_headline(const char *);
92 static void fill_hdrbuf(wchar_t []);
93 static void header_chk(void);
94 static void process_hdrbuf(void);
95 static void leadin(void);
96 static void tabulate(wchar_t []);
97 static void oflush(void);
98 static void pack(wchar_t []);
99 static void msplit(wchar_t []);
100 static void csplit(wchar_t []);
101 static void _wckind_init(void);
102 static void prefix(wchar_t []);
103 static void fmt(FILE *);
104 static int setopt(char *);
105 int _wckind(wchar_t);
108 * Drive the whole formatter by managing input files. Also,
109 * cause initialization of the output stuff and flush it out
110 * at the end.
114 main(int argc, char **argv)
116 FILE *fi;
117 char *cp;
118 int nofile;
119 char *locale;
121 outp = NOSTR;
122 setbuf(stdout, sobuf);
123 setlocale(LC_ALL, "");
124 locale = setlocale(LC_CTYPE, "");
125 if (strcmp(locale, "C") == 0) {
126 split = csplit;
127 } else {
128 split = msplit;
129 _wckind_init();
131 if (argc < 2) {
132 single:
133 fmt(stdin);
134 oflush();
135 exit(0);
137 nofile = 1;
138 while (--argc) {
139 cp = *++argv;
140 if (setopt(cp))
141 continue;
142 nofile = 0;
143 if ((fi = fopen(cp, "r")) == NULL) {
144 perror(cp);
145 errs++;
146 continue;
148 fmt(fi);
149 fclose(fi);
151 if (nofile)
152 goto single;
153 oflush();
154 fclose(stdout);
155 return (errs);
159 * Read up characters from the passed input file, forming lines,
160 * doing ^H processing, expanding tabs, stripping trailing blanks,
161 * and sending each line down for analysis.
164 static void
165 fmt(FILE *fi)
167 wchar_t linebuf[BUFSIZ], canonb[BUFSIZ];
168 wchar_t *cp, *cp2;
169 int col;
170 wchar_t c;
171 char cbuf[BUFSIZ]; /* stores wchar_t string as char string */
173 c = getwc(fi);
174 while (c != EOF) {
176 * Collect a line, doing ^H processing.
177 * Leave tabs for now.
180 cp = linebuf;
181 while (c != L'\n' && c != EOF && cp-linebuf < BUFSIZ-1) {
182 if (c == L'\b') {
183 if (cp > linebuf)
184 cp--;
185 c = getwc(fi);
186 continue;
188 if (!(iswprint(c)) && c != L'\t') {
189 c = getwc(fi);
190 continue;
192 *cp++ = c;
193 c = getwc(fi);
195 *cp = L'\0';
198 * Toss anything remaining on the input line.
201 while (c != L'\n' && c != EOF)
202 c = getwc(fi);
204 * Expand tabs on the way to canonb.
207 col = 0;
208 cp = linebuf;
209 cp2 = canonb;
210 while (c = *cp++) {
211 if (c != L'\t') {
212 col += scrwidth(c);
213 if (cp2-canonb < BUFSIZ-1)
214 *cp2++ = c;
215 continue;
217 do {
218 if (cp2-canonb < BUFSIZ-1)
219 *cp2++ = L' ';
220 col++;
221 } while ((col & 07) != 0);
225 * Swipe trailing blanks from the line.
228 for (cp2--; cp2 >= canonb && *cp2 == L' '; cp2--) {
230 *++cp2 = '\0';
232 /* special processing to look for mail header lines */
233 switch (hdr_state) {
234 case off:
235 prefix(canonb);
236 /* FALLTHROUGH */
237 case not_in_hdr:
238 /* look for an initial mail header line */
239 /* skip initial blanks */
240 for (cp = canonb; *cp == L' '; cp++) {
243 * Need to convert string from wchar_t to char,
244 * since this is what is_headline() expects. Since we
245 * only want to make sure cp points to a "From" line
246 * of the email, we don't have to alloc
247 * BUFSIZ * MB_LEN_MAX to cbuf.
249 wcstombs(cbuf, cp, (BUFSIZ - 1));
250 if (is_headline(cbuf) == B_TRUE) {
251 hdr_state = in_hdr;
252 fill_hdrbuf(canonb);
253 } else {
254 /* no mail header line; process normally */
255 prefix(canonb);
257 break;
258 case in_hdr:
259 /* already saw 1st mail header line; look for more */
260 if (canonb[0] == L'\0') {
262 * blank line means end of mail header;
263 * verify current mail header buffer
264 * then process it accordingly
266 header_chk();
267 process_hdrbuf();
268 /* now process the current blank line */
269 prefix(canonb);
270 } else
272 * not a blank line--save this line as
273 * a potential mail header line
275 fill_hdrbuf(canonb);
276 break;
278 if (c != EOF)
279 c = getwc(fi);
282 * end of this file--make sure we process the stuff in
283 * hdrbuf before we're finished
285 if (hdr_state == in_hdr) {
286 header_chk();
287 process_hdrbuf();
292 * Take a line devoid of tabs and other garbage and determine its
293 * blank prefix. If the indent changes, call for a linebreak.
294 * If the input line is blank, echo the blank line on the output.
295 * Finally, if the line minus the prefix is a mail header, try to keep
296 * it on a line by itself.
299 static void
300 prefix(wchar_t line[])
302 wchar_t *cp;
303 int np;
304 int nosplit = 0; /* flag set if line should not be split */
306 if (line[0] == L'\0') {
307 oflush();
308 putchar('\n');
309 if (crown_state != c_none)
310 crown_state = c_reset;
311 return;
313 for (cp = line; *cp == L' '; cp++) {
315 np = cp - line;
318 * The following horrible expression attempts to avoid linebreaks
319 * when the indent changes due to a paragraph.
322 if (crown_state == c_none && np != pfx && (np > pfx || abs(pfx-np) > 8))
323 oflush();
325 * if this is a mail header line, don't split it; flush previous
326 * line, if any, so we don't join this line to it
328 if (hdr_state == do_hdr) {
329 nosplit = 1;
330 oflush();
332 /* flush previous line so we don't join this one to it */
333 if (nojoin)
334 oflush();
335 /* nroff-type lines starting with '.' are not split nor joined */
336 if (!nosplit && (nosplit = (*cp == L'.')))
337 oflush();
338 pfx = np;
339 switch (crown_state) {
340 case c_reset:
341 crown_head = pfx;
342 crown_state = c_head;
343 break;
344 case c_lead:
345 crown_body = pfx;
346 crown_state = c_body;
347 break;
348 case c_fixup:
349 crown_body = pfx;
350 crown_state = c_body;
351 if (outp) {
352 wchar_t s[BUFSIZ];
354 *outp = L'\0';
355 wscpy(s, &outbuf[crown_head]);
356 outp = NOSTR;
357 split(s);
359 break;
361 if (nosplit) {
362 /* put whole input line onto outbuf and print it out */
363 pack(cp);
364 oflush();
365 } else
367 * split puts current line onto outbuf, but splits it
368 * at word boundaries, if it exceeds desired length
370 split(cp);
371 if (nojoin)
373 * flush current line so next lines, if any,
374 * won't join to this one
376 oflush();
380 * Split up the passed line into output "words" which are
381 * maximal strings of non-blanks with the blank separation
382 * attached at the end. Pass these words along to the output
383 * line packer.
386 static void
387 csplit(wchar_t line[])
389 wchar_t *cp, *cp2;
390 wchar_t word[BUFSIZ];
391 static const wchar_t *srchlist = (const wchar_t *) L".:!?";
393 cp = line;
394 while (*cp) {
395 cp2 = word;
398 * Collect a 'word,' allowing it to contain escaped
399 * white space.
402 while (*cp && !(iswspace(*cp))) {
403 if (*cp == '\\' && iswspace(cp[1]))
404 *cp2++ = *cp++;
405 *cp2++ = *cp++;
409 * Guarantee a space at end of line.
410 * Two spaces after end of sentence punctuation.
413 if (*cp == L'\0') {
414 *cp2++ = L' ';
415 if (wschr(srchlist, cp[-1]) != NULL)
416 *cp2++ = L' ';
418 while (iswspace(*cp))
419 *cp2++ = *cp++;
420 *cp2 = L'\0';
421 pack(word);
425 static void
426 msplit(wchar_t line[])
428 wchar_t *cp, *cp2, prev;
429 wchar_t word[BUFSIZ];
430 static const wchar_t *srchlist = (const wchar_t *) L".:!?";
432 cp = line;
433 while (*cp) {
434 cp2 = word;
435 prev = *cp;
438 * Collect a 'word,' allowing it to contain escaped
439 * white space.
442 while (*cp) {
443 if (iswspace(*cp))
444 break;
445 if (_wckind(*cp) != _wckind(prev))
446 if (wcsetno(*cp) != 0 || wcsetno(prev) != 0)
447 break;
448 if (*cp == '\\' && iswspace(cp[1]))
449 *cp2++ = *cp++;
450 prev = *cp;
451 *cp2++ = *cp++;
455 * Guarantee a space at end of line.
456 * Two spaces after end of sentence punctuation.
459 if (*cp == L'\0') {
460 *cp2++ = L' ';
461 if (wschr(srchlist, cp[-1]) != NULL)
462 *cp2++ = L' ';
464 while (iswspace(*cp))
465 *cp2++ = *cp++;
466 *cp2 = L'\0';
467 pack(word);
472 * Output section.
473 * Build up line images from the words passed in. Prefix
474 * each line with correct number of blanks. The buffer "outbuf"
475 * contains the current partial line image, including prefixed blanks.
476 * "outp" points to the next available space therein. When outp is NOSTR,
477 * there ain't nothing in there yet. At the bottom of this whole mess,
478 * leading tabs are reinserted.
482 * Pack a word onto the output line. If this is the beginning of
483 * the line, push on the appropriately-sized string of blanks first.
484 * If the word won't fit on the current line, flush and begin a new
485 * line. If the word is too long to fit all by itself on a line,
486 * just give it its own and hope for the best.
489 static void
490 pack(wchar_t word[])
492 wchar_t *cp;
493 int s, t;
495 if (outp == NOSTR)
496 leadin();
497 t = wscol(word);
498 *outp = L'\0';
499 s = wscol(outbuf);
500 if (t+s <= width) {
501 for (cp = word; *cp; *outp++ = *cp++) {
503 return;
505 if (s > filler) {
506 oflush();
507 leadin();
509 for (cp = word; *cp; *outp++ = *cp++) {
514 * If there is anything on the current output line, send it on
515 * its way. Set outp to NOSTR to indicate the absence of the current
516 * line prefix.
519 static void
520 oflush(void)
522 if (outp == NOSTR)
523 return;
524 *outp = L'\0';
525 tabulate(outbuf);
526 outp = NOSTR;
530 * Take the passed line buffer, insert leading tabs where possible, and
531 * output on standard output (finally).
534 static void
535 tabulate(wchar_t line[])
537 wchar_t *cp;
538 int b, t;
541 /* Toss trailing blanks in the output line */
542 cp = line + wslen(line) - 1;
543 while (cp >= line && *cp == L' ')
544 cp--;
545 *++cp = L'\0';
546 /* Count the leading blank space and tabulate */
547 for (cp = line; *cp == L' '; cp++) {
549 b = cp - line;
550 t = b >> 3;
551 b &= 07;
552 if (t > 0)
553 do {
554 putc('\t', stdout);
555 } while (--t);
556 if (b > 0)
557 do {
558 putc(' ', stdout);
559 } while (--b);
560 while (*cp)
561 putwc(*cp++, stdout);
562 putc('\n', stdout);
566 * Initialize the output line with the appropriate number of
567 * leading blanks.
570 static void
571 leadin(void)
573 int b;
574 wchar_t *cp;
575 int l;
577 switch (crown_state) {
578 case c_head:
579 l = crown_head;
580 crown_state = c_lead;
581 break;
583 case c_lead:
584 case c_fixup:
585 l = crown_head;
586 crown_state = c_fixup;
587 break;
589 case c_body:
590 l = crown_body;
591 break;
593 default:
594 l = pfx;
595 break;
597 filler = l;
598 for (b = 0, cp = outbuf; b < l; b++)
599 *cp++ = L' ';
600 outp = cp;
604 * Is s1 a prefix of s2??
607 static int
608 ispref(wchar_t *s1, wchar_t *s2)
611 while (*s1 != L'\0' && *s2 != L'\0')
612 if (*s1++ != *s2++)
613 return (0);
614 return (1);
618 * Set an input option
621 static int
622 setopt(char *cp)
624 static int ws = 0;
626 if (*cp == '-') {
627 if (cp[1] == 'c' && cp[2] == '\0') {
628 crown_state = c_reset;
629 return (1);
631 if (cp[1] == 's' && cp[2] == '\0') {
632 nojoin = 1;
633 return (1);
635 if (cp[1] == 'w' && cp[2] == '\0') {
636 ws++;
637 return (1);
639 width = atoi(cp+1);
640 } else if (ws) {
641 width = atoi(cp);
642 ws = 0;
643 } else
644 return (0);
645 if (width <= 0 || width >= BUFSIZ-2) {
646 fprintf(stderr, "fmt: bad width: %d\n", width);
647 exit(1);
649 return (1);
653 #define LIB_WDRESOLVE "/usr/lib/locale/%s/LC_CTYPE/wdresolve.so"
654 #define WCHKIND "_wdchkind_"
656 static int _wckind_c_locale(wchar_t);
658 static int (*__wckind)(wchar_t) = _wckind_c_locale;
659 static void *dlhandle = NULL;
662 static void
663 _wckind_init(void)
665 char *locale;
666 char path[MAXPATHLEN + 1];
669 if (dlhandle != NULL) {
670 (void) dlclose(dlhandle);
671 dlhandle = NULL;
674 locale = setlocale(LC_CTYPE, NULL);
675 if (strcmp(locale, "C") == 0)
676 goto c_locale;
678 (void) sprintf(path, LIB_WDRESOLVE, locale);
680 if ((dlhandle = dlopen(path, RTLD_LAZY)) != NULL) {
681 __wckind = (int (*)(wchar_t))dlsym(dlhandle, WCHKIND);
682 if (__wckind != NULL)
683 return;
684 (void) dlclose(dlhandle);
685 dlhandle = NULL;
688 c_locale:
689 __wckind = _wckind_c_locale;
694 _wckind(wchar_t wc)
696 return (*__wckind) (wc);
700 static int
701 _wckind_c_locale(wchar_t wc)
703 int ret;
706 * DEPEND_ON_ANSIC: L notion for the character is new in
707 * ANSI-C, k&r compiler won't work.
709 if (iswascii(wc))
710 ret = (iswalnum(wc) || wc == L'_') ? 0 : 1;
711 else
712 ret = wcsetno(wc) + 1;
714 return (ret);
718 * header_chk -
719 * Called when done looking for a set mail header lines.
720 * Either a blank line was seen, or EOF was reached.
722 * Verifies if current hdrbuf of potential mail header lines
723 * is really a mail header. A mail header must be at least 2
724 * lines and more than half of them must start with one of the
725 * known mail header strings in headnames.
727 * header_chk sets hdr_state to do_hdr if hdrbuf contained a valid
728 * mail header. Otherwise, it sets hdr_state to flush_hdr.
730 * h_lines = hdrbuf index for next line to be saved;
731 * also indicates current # of lines in potential header
733 static void
734 header_chk(void)
736 wchar_t *cp; /* ptr to current char of line */
737 wchar_t **hp; /* ptr to current char of a valid */
738 /* mail header string */
739 int l; /* index */
741 * number of lines in hdrbuf that look
742 * like mail header lines (start with
743 * a known mail header prefix)
745 int hdrcount = 0;
746 /* header must have at least 2 lines (h_lines > 1) */
747 if (h_lines < 2) {
748 hdr_state = flush_hdr;
749 return;
752 * go through each line in hdrbuf and see how many
753 * look like mail header lines
755 for (l = 0; l < h_lines; l++) {
756 /* skip initial blanks */
757 for (cp = hdrbuf[l]; *cp == L' '; cp++) {
759 for (hp = &headnames[0]; *hp != (wchar_t *)0; hp++)
760 if (ispref(*hp, cp)) {
761 hdrcount++;
762 break;
766 * if over half match, we'll assume this is a header;
767 * set hdr_state to indicate whether to treat
768 * these lines as mail header (do_hdr) or not (flush_hdr)
770 if (hdrcount > h_lines / 2)
771 hdr_state = do_hdr;
772 else
773 hdr_state = flush_hdr;
777 * fill_hdrbuf -
778 * Save given input line into next element of hdrbuf,
779 * as a potential mail header line, to be processed later
780 * once we decide whether or not the contents of hdrbuf is
781 * really a mail header, via header_chk().
783 * Does not allow hdrbuf to exceed MAXLINES lines.
784 * Dynamically allocates space for each line. If we are unable
785 * to allocate space for the current string, stop special mail
786 * header preservation at this point and continue formatting
787 * without it.
789 static void
790 fill_hdrbuf(wchar_t line[])
792 wchar_t *cp; /* pointer to characters in input line */
793 int i; /* index into characters a hdrbuf line */
795 if (h_lines >= MAXLINES) {
797 * if we run over MAXLINES potential mail header
798 * lines, stop checking--this is most likely NOT a
799 * mail header; flush out the hdrbuf, then process
800 * the current 'line' normally.
802 hdr_state = flush_hdr;
803 process_hdrbuf();
804 prefix(line);
805 return;
807 hdrbuf[h_lines] = (wchar_t *)malloc(sizeof (wchar_t) *
808 (wslen(line) + 1));
809 if (hdrbuf[h_lines] == NULL) {
810 perror("malloc");
811 fprintf(stderr, "fmt: unable to do mail header preservation\n");
812 errs++;
814 * Can't process mail header; flush current contents
815 * of mail header and continue with no more mail
816 * header processing
818 if (h_lines == 0)
819 /* hdrbuf is empty; process this line normally */
820 prefix(line);
821 else {
822 hdr_state = flush_hdr;
823 for (i = 0; i < h_lines; i++) {
824 prefix(hdrbuf[i]);
825 free(hdrbuf[i]);
827 h_lines = 0;
829 hdr_state = off;
830 return;
832 /* save this line as a potential mail header line */
833 for (i = 0, cp = line; (hdrbuf[h_lines][i] = *cp) != L'\0'; i++, cp++) {
835 h_lines++;
839 * process_hdrbuf -
840 * Outputs the lines currently stored in hdrbuf, according
841 * to the current hdr_state value, assumed to be either do_hdr
842 * or flush_hdr.
843 * This should be called after doing a header_chk() to verify
844 * the hdrbuf and set the hdr_state flag.
846 static void
847 process_hdrbuf(void)
849 int i;
851 for (i = 0; i < h_lines; i++) {
852 prefix(hdrbuf[i]);
853 free(hdrbuf[i]);
855 hdr_state = not_in_hdr;
856 h_lines = 0;