Merge commit '6e270ca825f06ec0e2d77db462b83074ec585f2f'
[unleashed.git] / usr / src / cmd / deroff / deroff.c
bloba9322f5c8d8916bc49102528a1ef5dfadf4eebd4
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
20 * CDDL HEADER END
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
30 #pragma ident "%Z%%M% %I% %E% SMI"
32 #include <assert.h>
33 #include <errno.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <locale.h>
38 #include <sys/varargs.h>
41 * Deroff command -- strip troff, eqn, and Tbl sequences from a file.
42 * Has three flags argument, -w, to cause output one word per line
43 * rather than in the original format.
44 * -mm (or -ms) causes the corresponding macro's to be interpreted
45 * so that just sentences are output
46 * -ml also gets rid of lists.
47 * -i causes deroff to ignore .so and .nx commands.
48 * Deroff follows .so and .nx commands, removes contents of macro
49 * definitions, equations (both .EQ ... .EN and $...$),
50 * Tbl command sequences, and Troff backslash constructions.
52 * All input is through the C macro; the most recently read character
53 * is in c.
56 #define C ((c = getc(infile)) == EOF ? eof() : \
57 ((c == ldelim) && (filesp == files) ? skeqn() : c))
58 #define C1 ((c = getc(infile)) == EOF ? eof() : c)
59 #define SKIP while (C != '\n')
60 #define SKIP_TO_COM SKIP; SKIP; pc = c; \
61 while ((C != '.') || (pc != '\n') || \
62 (C > 'Z')) { \
63 pc = c; \
66 #define YES 1
67 #define NO 0
68 #define MS 0
69 #define MM 1
70 #define ONE 1
71 #define TWO 2
73 #define NOCHAR -2
74 #define SPECIAL 0
75 #define APOS 1
76 #define DIGIT 2
77 #define LETTER 3
79 #define MAXLINESZ 512
81 static int wordflag = NO;
82 static int msflag = NO;
83 static int iflag = NO;
84 static int mac = MM;
85 static int disp = 0;
86 static int inmacro = NO;
87 static int intable = NO;
88 static int lindx;
89 static size_t linesize = MAXLINESZ;
91 static char chars[128]; /* SPECIAL, APOS, DIGIT, or LETTER */
93 static char *line = NULL;
95 static char c;
96 static int pc;
97 static int ldelim = NOCHAR;
98 static int rdelim = NOCHAR;
100 static int argc;
101 static char **argv;
103 extern int optind;
104 extern char *optarg;
105 static char fname[50];
106 static FILE *files[15];
107 static FILE **filesp;
108 static FILE *infile;
110 static void backsl(void);
111 static void comline(void);
112 static char *copys(char *);
113 static int eof(void);
114 static void eqn(void);
115 static void fatal(const char *, ...);
116 static void fatal_msg(char *);
117 static void getfname(void);
118 static void macro(void);
119 static FILE *opn(char *);
120 static void putmac(char *, int);
121 static void putwords(int);
122 static void regline(int, int);
123 static void sce(void);
124 static int skeqn(void);
125 static void sdis(char, char);
126 static void stbl(void);
127 static void tbl(void);
128 static void usage(void);
129 static void work(void) __NORETURN;
132 main(int ac, char **av)
134 int i;
135 int errflg = 0;
136 int optchar;
138 (void) setlocale(LC_ALL, "");
139 #if !defined(TEXT_DOMAIN)
140 #define TEXT_DOMAIN "SYS_TEST"
141 #endif
142 (void) textdomain(TEXT_DOMAIN);
143 argc = ac;
144 argv = av;
145 while ((optchar = getopt(argc, argv, "wim:")) != EOF) {
146 switch (optchar) {
147 case 'w':
148 wordflag = YES;
149 break;
150 case 'm':
151 msflag = YES;
152 if (*optarg == 'm')
153 mac = MM;
154 else if (*optarg == 's')
155 mac = MS;
156 else if (*optarg == 'l')
157 disp = 1;
158 else
159 errflg++;
160 break;
161 case 'i':
162 iflag = YES;
163 break;
164 case '?':
165 errflg++;
168 if (errflg) {
169 usage();
170 return (1);
172 if (optind == argc)
173 infile = stdin;
174 else
175 infile = opn(argv[optind++]);
176 files[0] = infile;
177 filesp = &files[0];
179 for (i = 'a'; i <= 'z'; ++i)
180 chars[i] = LETTER;
181 for (i = 'A'; i <= 'Z'; ++i)
182 chars[i] = LETTER;
183 for (i = '0'; i <= '9'; ++i)
184 chars[i] = DIGIT;
185 chars['\''] = APOS;
186 chars['&'] = APOS;
187 work();
188 /* NOTREACHED */
192 static int
193 skeqn(void)
195 while ((c = getc(infile)) != rdelim) {
196 if (c == EOF) {
197 c = eof();
198 } else if (c == '"') {
199 while ((c = getc(infile)) != '"') {
200 if (c == EOF) {
201 c = eof();
202 } else if (c == '\\') {
203 if ((c = getc(infile)) == EOF) {
204 c = eof();
210 if (msflag) {
211 return (c = 'x');
213 return (c = ' ');
217 /* Functions calling opn() should ensure 'p' is non-null */
218 static FILE *
219 opn(char *p)
221 FILE *fd;
223 assert(p != NULL);
224 if ((fd = fopen(p, "r")) == NULL)
225 fatal(gettext("Cannot open file %s: %s\n"), p, strerror(errno));
227 return (fd);
232 static int
233 eof(void)
235 if (infile != stdin)
236 (void) fclose(infile);
237 if (filesp > files) {
238 infile = *--filesp;
239 } else if (optind < argc) {
240 infile = opn(argv[optind++]);
241 } else {
242 exit(0);
245 return (C);
250 static void
251 getfname(void)
253 char *p;
254 struct chain {
255 struct chain *nextp;
256 char *datap;
258 struct chain *q;
259 static struct chain *namechain = NULL;
261 while (C == ' ')
264 for (p = fname; ((*p = c) != '\n') && (c != ' ') && (c != '\t') &&
265 (c != '\\'); ++p) {
266 (void) C;
268 *p = '\0';
269 while (c != '\n') {
270 (void) C;
273 /* see if this name has already been used */
274 for (q = namechain; q; q = q->nextp)
275 if (strcmp(fname, q->datap) != 0) {
276 fname[0] = '\0';
277 return;
280 q = (struct chain *)calloc(1, sizeof (*namechain));
281 q->nextp = namechain;
282 q->datap = copys(fname);
283 namechain = q;
288 * Functions calling fatal() should ensure 'format' and
289 * arguments are non-null.
291 static void
292 fatal(const char *format, ...)
294 va_list alist;
296 assert(format != NULL);
297 (void) fputs(gettext("deroff: "), stderr);
298 va_start(alist, format);
299 (void) vfprintf(stderr, format, alist);
300 exit(1);
303 /* Functions calling fatal_msg() should ensure 's' is non-null */
304 static void
305 fatal_msg(char *s)
307 assert(s != NULL);
308 (void) fprintf(stderr, gettext("deroff: %s\n"), s);
309 exit(1);
312 static void
313 usage(void)
315 (void) fputs(gettext(
316 "usage: deroff [ -w ] [ -m (m s l) ] [ -i ] "
317 "[ file ] ... \n"), stderr);
320 static void
321 work(void)
324 for (;;) {
325 if ((C == '.') || (c == '\''))
326 comline();
327 else
328 regline(NO, TWO);
333 static void
334 regline(int macline, int cnst)
337 if (line == NULL) {
338 if ((line = (char *)malloc(linesize * sizeof (char))) == NULL) {
339 fatal_msg(gettext("Cannot allocate memory"));
343 lindx = 0;
344 line[lindx] = c;
345 for (;;) {
346 if (c == '\\') {
347 line[lindx] = ' ';
348 backsl();
349 if (c == '%') { /* no blank for hyphenation char */
350 lindx--;
353 if (c == '\n') {
354 break;
357 * We're just about to add another character to the line
358 * buffer so ensure we don't overrun it.
360 if (++lindx >= linesize - 1) {
361 linesize = linesize * 2;
362 if ((line = reallocarray(line, linesize,
363 sizeof (char))) == NULL) {
364 fatal_msg(gettext("Cannot allocate memory"));
367 if (intable && (c == 'T')) {
368 line[lindx] = C;
369 if ((c == '{') || (c == '}')) {
370 line[lindx - 1] = ' ';
371 line[lindx] = C;
373 } else {
374 line[lindx] = C;
378 line[lindx] = '\0';
380 if (line[0] != '\0') {
381 if (wordflag) {
382 putwords(macline);
383 } else if (macline) {
384 putmac(line, cnst);
385 } else {
386 (void) puts(line);
394 static void
395 putmac(char *s, int cnst)
397 char *t;
399 while (*s) {
400 while ((*s == ' ') || (*s == '\t')) {
401 (void) putchar(*s++);
403 for (t = s; (*t != ' ') && (*t != '\t') && (*t != '\0'); ++t)
405 if (*s == '\"')
406 s++;
407 if ((t > s + cnst) && (chars[s[0]] == LETTER) &&
408 (chars[s[1]] == LETTER)) {
409 while (s < t) {
410 if (*s == '\"')
411 s++;
412 else
413 (void) putchar(*s++);
415 } else {
416 s = t;
419 (void) putchar('\n');
424 static void
425 putwords(int macline) /* break into words for -w option */
427 char *p, *p1;
428 int i, nlet;
430 for (p1 = line; ; ) {
431 /* skip initial specials ampersands and apostrophes */
432 while (chars[*p1] < DIGIT) {
433 if (*p1++ == '\0')
434 return;
436 nlet = 0;
437 for (p = p1; (i = chars[*p]) != SPECIAL; ++p) {
438 if (i == LETTER)
439 ++nlet;
442 if ((!macline && (nlet > 1)) /* MDM definition of word */ ||
443 (macline && (nlet > 2) && (chars[p1[0]] == LETTER) &&
444 (chars[p1[1]] == LETTER))) {
445 /* delete trailing ampersands and apostrophes */
446 while ((p[-1] == '\'') || (p[-1] == '&')) {
447 --p;
449 while (p1 < p) {
450 (void) putchar(*p1++);
452 (void) putchar('\n');
453 } else {
454 p1 = p;
461 static void
462 comline(void)
464 int c1, c2;
466 com:
467 while ((C == ' ') || (c == '\t'))
469 comx:
470 if ((c1 = c) == '\n')
471 return;
472 c2 = C;
473 if ((c1 == '.') && (c2 != '.'))
474 inmacro = NO;
475 if (c2 == '\n')
476 return;
478 if ((c1 == 'E') && (c2 == 'Q') && (filesp == files)) {
479 eqn();
480 } else if ((c1 == 'T') && ((c2 == 'S') || (c2 == 'C') ||
481 (c2 == '&')) && (filesp == files)) {
482 if (msflag) {
483 stbl();
484 } else {
485 tbl();
487 } else if ((c1 == 'T') && (c2 == 'E')) {
488 intable = NO;
489 } else if (!inmacro && (c1 == 'd') && (c2 == 'e')) {
490 macro();
491 } else if (!inmacro && (c1 == 'i') && (c2 == 'g')) {
492 macro();
493 } else if (!inmacro && (c1 == 'a') && (c2 == 'm')) {
494 macro();
495 } else if ((c1 == 's') && (c2 == 'o')) {
496 if (iflag) {
497 SKIP;
498 } else {
499 getfname();
500 if (fname[0]) {
501 infile = *++filesp = opn(fname);
504 } else if ((c1 == 'n') && (c2 == 'x')) {
505 if (iflag) {
506 SKIP;
507 } else {
508 getfname();
509 if (fname[0] == '\0') {
510 exit(0);
512 if (infile != stdin) {
513 (void) fclose(infile);
515 infile = *filesp = opn(fname);
517 } else if ((c1 == 'h') && (c2 == 'w')) {
518 SKIP;
519 } else if (msflag && (c1 == 'T') && (c2 == 'L')) {
520 SKIP_TO_COM;
521 goto comx;
522 } else if (msflag && (c1 == 'N') && (c2 == 'R')) {
523 SKIP;
524 } else if (msflag && (c1 == 'A') && ((c2 == 'U') || (c2 == 'I'))) {
525 if (mac == MM) {
526 SKIP;
527 } else {
528 SKIP_TO_COM;
529 goto comx;
531 } else if (msflag && (c1 == 'F') && (c2 == 'S')) {
532 SKIP_TO_COM;
533 goto comx;
534 } else if (msflag && (c1 == 'S') && (c2 == 'H')) {
535 SKIP_TO_COM;
536 goto comx;
537 } else if (msflag && (c1 == 'N') && (c2 == 'H')) {
538 SKIP_TO_COM;
539 goto comx;
540 } else if (msflag && (c1 == 'O') && (c2 == 'K')) {
541 SKIP_TO_COM;
542 goto comx;
543 } else if (msflag && (c1 == 'N') && (c2 == 'D')) {
544 SKIP;
545 } else if (msflag && (mac == MM) && (c1 == 'H') &&
546 ((c2 == ' ') || (c2 == 'U'))) {
547 SKIP;
548 } else if (msflag && (mac == MM) && (c2 == 'L')) {
549 if (disp || (c1 == 'R')) {
550 sdis('L', 'E');
551 } else {
552 SKIP;
553 (void) putchar('.');
555 } else if (msflag && ((c1 == 'D') || (c1 == 'N') ||
556 (c1 == 'K') || (c1 == 'P')) && (c2 == 'S')) {
557 sdis(c1, 'E'); /* removed RS-RE */
558 } else if (msflag && (c1 == 'K' && c2 == 'F')) {
559 sdis(c1, 'E');
560 } else if (msflag && (c1 == 'n') && (c2 == 'f')) {
561 sdis('f', 'i');
562 } else if (msflag && (c1 == 'c') && (c2 == 'e')) {
563 sce();
564 } else {
565 if ((c1 == '.') && (c2 == '.')) {
566 while (C == '.')
569 ++inmacro;
570 if ((c1 <= 'Z') && msflag) {
571 regline(YES, ONE);
572 } else {
573 regline(YES, TWO);
575 --inmacro;
581 static void
582 macro(void)
584 if (msflag) {
585 /* look for .. */
586 do {
587 SKIP;
588 } while ((C != '.') || (C != '.') || (C == '.'));
589 if (c != '\n') {
590 SKIP;
592 return;
594 SKIP;
595 inmacro = YES;
601 static void
602 sdis(char a1, char a2)
604 int c1, c2;
605 int eqnf;
606 int notdone = 1;
607 eqnf = 1;
608 SKIP;
609 while (notdone) {
610 while (C != '.')
611 SKIP;
612 if ((c1 = C) == '\n')
613 continue;
614 if ((c2 = C) == '\n')
615 continue;
616 if ((c1 == a1) && (c2 == a2)) {
617 SKIP;
618 if (eqnf)
619 (void) putchar('.');
620 (void) putchar('\n');
621 return;
622 } else if ((a1 == 'D') && (c1 == 'E') && (c2 == 'Q')) {
623 eqn();
624 eqnf = 0;
625 } else {
626 SKIP;
631 static void
632 tbl(void)
634 while (C != '.')
636 SKIP;
637 intable = YES;
640 static void
641 stbl(void)
643 while (C != '.')
645 SKIP_TO_COM;
646 if ((c != 'T') || (C != 'E')) {
647 SKIP;
648 pc = c;
649 while ((C != '.') || (pc != '\n') ||
650 (C != 'T') || (C != 'E')) {
651 pc = c;
656 static void
657 eqn(void)
659 int c1, c2;
660 int dflg;
661 int last;
663 last = 0;
664 dflg = 1;
665 SKIP;
667 for (;;) {
668 if ((C1 == '.') || (c == '\'')) {
669 while ((C1 == ' ') || (c == '\t'))
671 if ((c == 'E') && (C1 == 'N')) {
672 SKIP;
673 if (msflag && dflg) {
674 (void) putchar('x');
675 (void) putchar(' ');
676 if (last) {
677 (void) putchar('.');
678 (void) putchar(' ');
681 return;
683 } else if (c == 'd') { /* look for delim */
684 if ((C1 == 'e') && (C1 == 'l')) {
685 if ((C1 == 'i') && (C1 == 'm')) {
686 while (C1 == ' ')
688 if (((c1 = c) == '\n') ||
689 ((c2 = C1) == '\n') ||
690 ((c1 == 'o') && (c2 == 'f') &&
691 (C1 == 'f'))) {
692 ldelim = NOCHAR;
693 rdelim = NOCHAR;
694 } else {
695 ldelim = c1;
696 rdelim = c2;
699 dflg = 0;
703 if (c != '\n') {
704 while (C1 != '\n') {
705 if (c == '.') {
706 last = 1;
707 } else {
708 last = 0;
717 static void
718 backsl(void) /* skip over a complete backslash construction */
720 int bdelim;
722 sw: switch (C) {
723 case '"':
724 SKIP;
725 return;
726 case 's':
727 if (C == '\\') {
728 backsl();
729 } else {
730 while ((C >= '0') && (c <= '9'))
732 (void) ungetc(c, infile);
733 c = '0';
735 lindx--;
736 return;
738 case 'f':
739 case 'n':
740 case '*':
741 if (C != '(')
742 return;
743 /* FALLTHROUGH */
745 case '(':
746 if (C != '\n') {
747 (void) C;
749 return;
751 case '$':
752 (void) C; /* discard argument number */
753 return;
755 case 'b':
756 case 'x':
757 case 'v':
758 case 'h':
759 case 'w':
760 case 'o':
761 case 'l':
762 case 'L':
763 if ((bdelim = C) == '\n')
764 return;
765 while ((C != '\n') && (c != bdelim))
766 if (c == '\\')
767 backsl();
768 return;
770 case '\\':
771 if (inmacro)
772 goto sw;
773 default:
774 return;
781 static char *
782 copys(char *s)
784 char *t, *t0;
786 if ((t0 = t = calloc((unsigned)(strlen(s) + 1), sizeof (*t))) == NULL)
787 fatal_msg(gettext("Cannot allocate memory"));
789 while (*t++ = *s++)
791 return (t0);
794 static void
795 sce(void)
797 char *ap;
798 int n, i;
799 char a[10];
801 for (ap = a; C != '\n'; ap++) {
802 *ap = c;
803 if (ap == &a[9]) {
804 SKIP;
805 ap = a;
806 break;
809 if (ap != a) {
810 n = atoi(a);
811 } else {
812 n = 1;
814 for (i = 0; i < n; ) {
815 if (C == '.') {
816 if (C == 'c') {
817 if (C == 'e') {
818 while (C == ' ')
820 if (c == '0') {
821 break;
822 } else {
823 SKIP;
825 } else {
826 SKIP;
828 } else {
829 SKIP;
831 } else {
832 SKIP;
833 i++;