4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
37 #include <sys/param.h>
41 * fmt -- format the concatenation of input files or standard input
42 * onto standard output. Designed for use with Mail ~|
44 * Syntax: fmt [ -width | -w width ] [ -cs ] [ name ... ]
45 * Author: Kurt Shoens (UCB) 12/7/78
48 #define NOSTR ((wchar_t *)0) /* Null string pointer for lint */
49 #define MAXLINES 100 /* maximum mail header lines to verify */
51 wchar_t outbuf
[BUFSIZ
]; /* Sandbagged output line image */
52 wchar_t *outp
; /* Pointer in above */
53 int filler
; /* Filler amount in outbuf */
54 char sobuf
[BUFSIZ
]; /* Global buffer */
56 int pfx
; /* Current leading blank count */
57 int width
= 72; /* Width that we will not exceed */
58 int nojoin
= 0; /* split lines only, don't join short ones */
59 int errs
= 0; /* Current number of errors */
61 enum crown_type
{c_none
, c_reset
, c_head
, c_lead
, c_fixup
, c_body
};
62 enum crown_type crown_state
; /* Crown margin state */
63 int crown_head
; /* The header offset */
64 int crown_body
; /* The body offset */
65 /* currently-known initial strings found in mail headers */
66 wchar_t *headnames
[] = {
67 L
"Apparently-To", L
"Bcc", L
"bcc", L
"Cc", L
"cc", L
"Confirmed-By",
68 L
"Content", L
"content-length", L
"From", L
"Date", L
"id",
69 L
"Message-I", L
"MIME-Version", L
"Precedence", L
"Return-Path",
70 L
"Received", L
"Reply-To", L
"Status", L
"Subject", L
"To", L
"X-IMAP",
71 L
"X-Lines", L
"X-Sender", L
"X-Sun", L
"X-Status", L
"X-UID",
75 off
, /* mail header processing is off */
76 not_in_hdr
, /* not currently processing a mail header */
77 in_hdr
, /* currently filling hdrbuf with potential hdr lines */
78 flush_hdr
, /* flush hdrbuf; not a header, no special processing */
79 do_hdr
/* process hdrbuf as a mail header */
81 /* current state of hdrbuf */
82 enum hdr_type hdr_state
= not_in_hdr
;
84 wchar_t *hdrbuf
[MAXLINES
]; /* buffer to hold potential mail header lines */
85 int h_lines
; /* index into lines of hdrbuf */
87 void (*(split
))(wchar_t []);
88 extern int scrwidth(wchar_t);
89 extern boolean_t
is_headline(const char *);
92 static void fill_hdrbuf(wchar_t []);
93 static void header_chk(void);
94 static void process_hdrbuf(void);
95 static void leadin(void);
96 static void tabulate(wchar_t []);
97 static void oflush(void);
98 static void pack(wchar_t []);
99 static void msplit(wchar_t []);
100 static void csplit(wchar_t []);
101 static void _wckind_init(void);
102 static void prefix(wchar_t []);
103 static void fmt(FILE *);
104 static int setopt(char *);
105 int _wckind(wchar_t);
108 * Drive the whole formatter by managing input files. Also,
109 * cause initialization of the output stuff and flush it out
114 main(int argc
, char **argv
)
122 setbuf(stdout
, sobuf
);
123 setlocale(LC_ALL
, "");
124 locale
= setlocale(LC_CTYPE
, "");
125 if (strcmp(locale
, "C") == 0) {
143 if ((fi
= fopen(cp
, "r")) == NULL
) {
159 * Read up characters from the passed input file, forming lines,
160 * doing ^H processing, expanding tabs, stripping trailing blanks,
161 * and sending each line down for analysis.
167 wchar_t linebuf
[BUFSIZ
], canonb
[BUFSIZ
];
171 char cbuf
[BUFSIZ
]; /* stores wchar_t string as char string */
176 * Collect a line, doing ^H processing.
177 * Leave tabs for now.
181 while (c
!= L
'\n' && c
!= EOF
&& cp
-linebuf
< BUFSIZ
-1) {
188 if (!(iswprint(c
)) && c
!= L
'\t') {
198 * Toss anything remaining on the input line.
201 while (c
!= L
'\n' && c
!= EOF
)
204 * Expand tabs on the way to canonb.
213 if (cp2
-canonb
< BUFSIZ
-1)
218 if (cp2
-canonb
< BUFSIZ
-1)
221 } while ((col
& 07) != 0);
225 * Swipe trailing blanks from the line.
228 for (cp2
--; cp2
>= canonb
&& *cp2
== L
' '; cp2
--) {
232 /* special processing to look for mail header lines */
238 /* look for an initial mail header line */
239 /* skip initial blanks */
240 for (cp
= canonb
; *cp
== L
' '; cp
++) {
243 * Need to convert string from wchar_t to char,
244 * since this is what is_headline() expects. Since we
245 * only want to make sure cp points to a "From" line
246 * of the email, we don't have to alloc
247 * BUFSIZ * MB_LEN_MAX to cbuf.
249 wcstombs(cbuf
, cp
, (BUFSIZ
- 1));
250 if (is_headline(cbuf
) == B_TRUE
) {
254 /* no mail header line; process normally */
259 /* already saw 1st mail header line; look for more */
260 if (canonb
[0] == L
'\0') {
262 * blank line means end of mail header;
263 * verify current mail header buffer
264 * then process it accordingly
268 /* now process the current blank line */
272 * not a blank line--save this line as
273 * a potential mail header line
282 * end of this file--make sure we process the stuff in
283 * hdrbuf before we're finished
285 if (hdr_state
== in_hdr
) {
292 * Take a line devoid of tabs and other garbage and determine its
293 * blank prefix. If the indent changes, call for a linebreak.
294 * If the input line is blank, echo the blank line on the output.
295 * Finally, if the line minus the prefix is a mail header, try to keep
296 * it on a line by itself.
300 prefix(wchar_t line
[])
304 int nosplit
= 0; /* flag set if line should not be split */
306 if (line
[0] == L
'\0') {
309 if (crown_state
!= c_none
)
310 crown_state
= c_reset
;
313 for (cp
= line
; *cp
== L
' '; cp
++) {
318 * The following horrible expression attempts to avoid linebreaks
319 * when the indent changes due to a paragraph.
322 if (crown_state
== c_none
&& np
!= pfx
&& (np
> pfx
|| abs(pfx
-np
) > 8))
325 * if this is a mail header line, don't split it; flush previous
326 * line, if any, so we don't join this line to it
328 if (hdr_state
== do_hdr
) {
332 /* flush previous line so we don't join this one to it */
335 /* nroff-type lines starting with '.' are not split nor joined */
336 if (!nosplit
&& (nosplit
= (*cp
== L
'.')))
339 switch (crown_state
) {
342 crown_state
= c_head
;
346 crown_state
= c_body
;
350 crown_state
= c_body
;
355 wscpy(s
, &outbuf
[crown_head
]);
362 /* put whole input line onto outbuf and print it out */
367 * split puts current line onto outbuf, but splits it
368 * at word boundaries, if it exceeds desired length
373 * flush current line so next lines, if any,
374 * won't join to this one
380 * Split up the passed line into output "words" which are
381 * maximal strings of non-blanks with the blank separation
382 * attached at the end. Pass these words along to the output
387 csplit(wchar_t line
[])
390 wchar_t word
[BUFSIZ
];
391 static const wchar_t *srchlist
= (const wchar_t *) L
".:!?";
398 * Collect a 'word,' allowing it to contain escaped
402 while (*cp
&& !(iswspace(*cp
))) {
403 if (*cp
== '\\' && iswspace(cp
[1]))
409 * Guarantee a space at end of line.
410 * Two spaces after end of sentence punctuation.
415 if (wschr(srchlist
, cp
[-1]) != NULL
)
418 while (iswspace(*cp
))
426 msplit(wchar_t line
[])
428 wchar_t *cp
, *cp2
, prev
;
429 wchar_t word
[BUFSIZ
];
430 static const wchar_t *srchlist
= (const wchar_t *) L
".:!?";
438 * Collect a 'word,' allowing it to contain escaped
445 if (_wckind(*cp
) != _wckind(prev
))
446 if (wcsetno(*cp
) != 0 || wcsetno(prev
) != 0)
448 if (*cp
== '\\' && iswspace(cp
[1]))
455 * Guarantee a space at end of line.
456 * Two spaces after end of sentence punctuation.
461 if (wschr(srchlist
, cp
[-1]) != NULL
)
464 while (iswspace(*cp
))
473 * Build up line images from the words passed in. Prefix
474 * each line with correct number of blanks. The buffer "outbuf"
475 * contains the current partial line image, including prefixed blanks.
476 * "outp" points to the next available space therein. When outp is NOSTR,
477 * there ain't nothing in there yet. At the bottom of this whole mess,
478 * leading tabs are reinserted.
482 * Pack a word onto the output line. If this is the beginning of
483 * the line, push on the appropriately-sized string of blanks first.
484 * If the word won't fit on the current line, flush and begin a new
485 * line. If the word is too long to fit all by itself on a line,
486 * just give it its own and hope for the best.
501 for (cp
= word
; *cp
; *outp
++ = *cp
++) {
509 for (cp
= word
; *cp
; *outp
++ = *cp
++) {
514 * If there is anything on the current output line, send it on
515 * its way. Set outp to NOSTR to indicate the absence of the current
530 * Take the passed line buffer, insert leading tabs where possible, and
531 * output on standard output (finally).
535 tabulate(wchar_t line
[])
541 /* Toss trailing blanks in the output line */
542 cp
= line
+ wslen(line
) - 1;
543 while (cp
>= line
&& *cp
== L
' ')
546 /* Count the leading blank space and tabulate */
547 for (cp
= line
; *cp
== L
' '; cp
++) {
561 putwc(*cp
++, stdout
);
566 * Initialize the output line with the appropriate number of
577 switch (crown_state
) {
580 crown_state
= c_lead
;
586 crown_state
= c_fixup
;
598 for (b
= 0, cp
= outbuf
; b
< l
; b
++)
604 * Is s1 a prefix of s2??
608 ispref(wchar_t *s1
, wchar_t *s2
)
611 while (*s1
!= L
'\0' && *s2
!= L
'\0')
618 * Set an input option
627 if (cp
[1] == 'c' && cp
[2] == '\0') {
628 crown_state
= c_reset
;
631 if (cp
[1] == 's' && cp
[2] == '\0') {
635 if (cp
[1] == 'w' && cp
[2] == '\0') {
645 if (width
<= 0 || width
>= BUFSIZ
-2) {
646 fprintf(stderr
, "fmt: bad width: %d\n", width
);
653 #define LIB_WDRESOLVE "/usr/lib/locale/%s/LC_CTYPE/wdresolve.so"
654 #define WCHKIND "_wdchkind_"
656 static int _wckind_c_locale(wchar_t);
658 static int (*__wckind
)(wchar_t) = _wckind_c_locale
;
659 static void *dlhandle
= NULL
;
666 char path
[MAXPATHLEN
+ 1];
669 if (dlhandle
!= NULL
) {
670 (void) dlclose(dlhandle
);
674 locale
= setlocale(LC_CTYPE
, NULL
);
675 if (strcmp(locale
, "C") == 0)
678 (void) sprintf(path
, LIB_WDRESOLVE
, locale
);
680 if ((dlhandle
= dlopen(path
, RTLD_LAZY
)) != NULL
) {
681 __wckind
= (int (*)(wchar_t))dlsym(dlhandle
, WCHKIND
);
682 if (__wckind
!= NULL
)
684 (void) dlclose(dlhandle
);
689 __wckind
= _wckind_c_locale
;
696 return (*__wckind
) (wc
);
701 _wckind_c_locale(wchar_t wc
)
706 * DEPEND_ON_ANSIC: L notion for the character is new in
707 * ANSI-C, k&r compiler won't work.
710 ret
= (iswalnum(wc
) || wc
== L
'_') ? 0 : 1;
712 ret
= wcsetno(wc
) + 1;
719 * Called when done looking for a set mail header lines.
720 * Either a blank line was seen, or EOF was reached.
722 * Verifies if current hdrbuf of potential mail header lines
723 * is really a mail header. A mail header must be at least 2
724 * lines and more than half of them must start with one of the
725 * known mail header strings in headnames.
727 * header_chk sets hdr_state to do_hdr if hdrbuf contained a valid
728 * mail header. Otherwise, it sets hdr_state to flush_hdr.
730 * h_lines = hdrbuf index for next line to be saved;
731 * also indicates current # of lines in potential header
736 wchar_t *cp
; /* ptr to current char of line */
737 wchar_t **hp
; /* ptr to current char of a valid */
738 /* mail header string */
741 * number of lines in hdrbuf that look
742 * like mail header lines (start with
743 * a known mail header prefix)
746 /* header must have at least 2 lines (h_lines > 1) */
748 hdr_state
= flush_hdr
;
752 * go through each line in hdrbuf and see how many
753 * look like mail header lines
755 for (l
= 0; l
< h_lines
; l
++) {
756 /* skip initial blanks */
757 for (cp
= hdrbuf
[l
]; *cp
== L
' '; cp
++) {
759 for (hp
= &headnames
[0]; *hp
!= (wchar_t *)0; hp
++)
760 if (ispref(*hp
, cp
)) {
766 * if over half match, we'll assume this is a header;
767 * set hdr_state to indicate whether to treat
768 * these lines as mail header (do_hdr) or not (flush_hdr)
770 if (hdrcount
> h_lines
/ 2)
773 hdr_state
= flush_hdr
;
778 * Save given input line into next element of hdrbuf,
779 * as a potential mail header line, to be processed later
780 * once we decide whether or not the contents of hdrbuf is
781 * really a mail header, via header_chk().
783 * Does not allow hdrbuf to exceed MAXLINES lines.
784 * Dynamically allocates space for each line. If we are unable
785 * to allocate space for the current string, stop special mail
786 * header preservation at this point and continue formatting
790 fill_hdrbuf(wchar_t line
[])
792 wchar_t *cp
; /* pointer to characters in input line */
793 int i
; /* index into characters a hdrbuf line */
795 if (h_lines
>= MAXLINES
) {
797 * if we run over MAXLINES potential mail header
798 * lines, stop checking--this is most likely NOT a
799 * mail header; flush out the hdrbuf, then process
800 * the current 'line' normally.
802 hdr_state
= flush_hdr
;
807 hdrbuf
[h_lines
] = (wchar_t *)malloc(sizeof (wchar_t) *
809 if (hdrbuf
[h_lines
] == NULL
) {
811 fprintf(stderr
, "fmt: unable to do mail header preservation\n");
814 * Can't process mail header; flush current contents
815 * of mail header and continue with no more mail
819 /* hdrbuf is empty; process this line normally */
822 hdr_state
= flush_hdr
;
823 for (i
= 0; i
< h_lines
; i
++) {
832 /* save this line as a potential mail header line */
833 for (i
= 0, cp
= line
; (hdrbuf
[h_lines
][i
] = *cp
) != L
'\0'; i
++, cp
++) {
840 * Outputs the lines currently stored in hdrbuf, according
841 * to the current hdr_state value, assumed to be either do_hdr
843 * This should be called after doing a header_chk() to verify
844 * the hdrbuf and set the hdr_state flag.
851 for (i
= 0; i
< h_lines
; i
++) {
855 hdr_state
= not_in_hdr
;