Merge branch 'master' of ssh://swildner@crater.dragonflybsd.org/repository/git/dragonfly
[dragonfly.git] / usr.bin / checknr / checknr.c
blob2fce0512576ebb839dbe592f3dd13e5485284366
1 /*
2 * Copyright (c) 1980, 1993
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
33 * @(#) Copyright (c) 1980, 1993 The Regents of the University of California. All rights reserved.
34 * @(#)checknr.c 8.1 (Berkeley) 6/6/93
36 * $DragonFly: src/usr.bin/checknr/checknr.c,v 1.13 2008/11/11 01:02:40 pavalos Exp $
39 #include <err.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <ctype.h>
45 #define MAXSTK 100 /* Stack size */
46 #define MAXBR 100 /* Max number of bracket pairs known */
47 #define MAXCMDS 500 /* Max number of commands known */
49 static void addcmd(char *);
50 static void addmac(const char *);
51 static int binsrch(const char *, int *);
52 static void checkknown(const char *);
53 static void chkcmd(const char *);
54 static void complain(int);
55 static int eq(const char *, const char *);
56 static void nomatch(const char *);
57 static void pe(int);
58 static void process(FILE *);
59 static void prop(int);
60 static void usage(void);
63 * The stack on which we remember what we've seen so far.
65 struct stkstr {
66 int opno; /* number of opening bracket */
67 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
68 int parm; /* parm to size, font, etc */
69 int lno; /* line number the thing came in on */
70 } stk[MAXSTK];
71 int stktop;
74 * The kinds of opening and closing brackets.
76 struct brstr {
77 char opbr[3];
78 char clbr[3];
79 } br[MAXBR] = {
80 /* A few bare bones troff commands */
81 #define SZ 0
82 {"sz", "sz"}, /* also \s */
83 #define FT 1
84 {"ft", "ft"}, /* also \f */
85 /* the -mm package */
86 {"AL", "LE"},
87 {"AS", "AE"},
88 {"BL", "LE"},
89 {"BS", "BE"},
90 {"DF", "DE"},
91 {"DL", "LE"},
92 {"DS", "DE"},
93 {"FS", "FE"},
94 {"ML", "LE"},
95 {"NS", "NE"},
96 {"RL", "LE"},
97 {"VL", "LE"},
98 /* the -ms package */
99 {"AB", "AE"},
100 {"BD", "DE"},
101 {"CD", "DE"},
102 {"DS", "DE"},
103 {"FS", "FE"},
104 {"ID", "DE"},
105 {"KF", "KE"},
106 {"KS", "KE"},
107 {"LD", "DE"},
108 {"LG", "NL"},
109 {"QS", "QE"},
110 {"RS", "RE"},
111 {"SM", "NL"},
112 {"XA", "XE"},
113 {"XS", "XE"},
114 /* The -me package */
115 {"(b", ")b"},
116 {"(c", ")c"},
117 {"(d", ")d"},
118 {"(f", ")f"},
119 {"(l", ")l"},
120 {"(q", ")q"},
121 {"(x", ")x"},
122 {"(z", ")z"},
123 /* Things needed by preprocessors */
124 {"EQ", "EN"},
125 {"TS", "TE"},
126 /* Refer */
127 {"[", "]"}
131 * All commands known to nroff, plus macro packages.
132 * Used so we can complain about unrecognized commands.
134 char knowncmds[MAXCMDS][3] = {
135 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
136 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
137 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
138 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
139 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B", "B1", "B2",
140 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
141 "D", "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
142 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
143 "FQ", "FS", "FV", "FX", "H", "HC", "HD", "HM", "HO", "HU", "I", "ID",
144 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
145 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
146 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
147 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R", "RA",
148 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S", "S0", "S2", "S3", "SA",
149 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
150 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
151 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[", "[-", "[0",
152 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>",
153 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd",
154 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
155 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
156 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
157 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
158 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
159 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
160 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
161 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
162 "q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
163 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
164 "ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp",
165 "yr"
168 int lineno; /* current line number in input file */
169 char line[256]; /* the current line */
170 const char *cfilename; /* name of current file */
171 int nfiles; /* number of files to process */
172 int fflag; /* -f: ignore \f */
173 int sflag; /* -s: ignore \s */
174 int ncmds; /* size of knowncmds */
177 * checknr: check an nroff/troff input file for matching macro calls.
178 * we also attempt to match size and font changes, but only the embedded
179 * kind. These must end in \s0 and \fP resp. Maybe more sophistication
180 * later but for now think of these restrictions as contributions to
181 * structured typesetting.
184 main(int argc, char **argv)
186 FILE *f;
187 int i;
188 char *cp;
189 char b1[4];
191 /* Figure out how many known commands there are */
192 ncmds = 0;
193 while (ncmds < MAXCMDS && knowncmds[ncmds][0] != '\0')
194 ncmds++;
195 while (argc > 1 && argv[1][0] == '-') {
196 switch(argv[1][1]) {
198 /* -a: add pairs of macros */
199 case 'a':
200 if ((strlen(argv[1]) - 2) % 6 != 0)
201 usage();
202 /* look for empty macro slots */
203 i = 0;
204 while (i < MAXBR && br[i].opbr[0] != '\0')
205 i++;
206 if (i >= MAXBR) {
207 errx(1, "Only %d known macro-pairs allowed",
208 MAXBR);
210 for (cp = argv[1] + 3; cp[-1]; cp += 6) {
211 strncpy(br[i].opbr, cp, 2);
212 strncpy(br[i].clbr, cp + 3, 2);
214 * known pairs are also known cmds
216 addmac(br[i].opbr);
217 addmac(br[i].clbr);
218 i++;
220 break;
222 /* -c: add known commands */
223 case 'c':
224 i = strlen(argv[1]) - 2;
225 if (i % 3 != 0)
226 usage();
227 for (cp = argv[1] + 3; cp[-1]; cp += 3) {
228 if (cp[2] && cp[2] != '.')
229 usage();
230 strncpy(b1, cp, 2);
231 b1[2] = '\0';
232 addmac(b1);
234 break;
236 /* -f: ignore font changes */
237 case 'f':
238 fflag = 1;
239 break;
241 /* -s: ignore size changes */
242 case 's':
243 sflag = 1;
244 break;
245 default:
246 usage();
248 argc--; argv++;
251 nfiles = argc - 1;
253 if (nfiles > 0) {
254 for (i = 1; i < argc; i++) {
255 cfilename = argv[i];
256 f = fopen(cfilename, "r");
257 if (f == NULL)
258 warn("%s", cfilename);
259 else {
260 process(f);
261 fclose(f);
264 } else {
265 cfilename = "stdin";
266 process(stdin);
268 exit(0);
271 static void
272 usage(void)
274 fprintf(stderr,
275 "usage: checknr [-fs] [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] "
276 "file\n");
277 exit(1);
280 static void
281 process(FILE *f)
283 int i, n;
284 char mac[5]; /* The current macro or nroff command */
285 int pl;
287 stktop = -1;
288 for (lineno = 1; fgets(line, sizeof(line), f); lineno++) {
289 if (line[0] == '.') {
291 * find and isolate the macro/command name.
293 strncpy(mac, line + 1, 4);
294 if (isspace(mac[0])) {
295 pe(lineno);
296 printf("Empty command\n");
297 } else if (isspace(mac[1])) {
298 mac[1] = 0;
299 } else if (isspace(mac[2])) {
300 mac[2] = 0;
301 } else if (mac[0] != '\\' || mac[1] != '\"') {
302 pe(lineno);
303 printf("Command too long\n");
307 * Is it a known command?
309 checkknown(mac);
312 * Should we add it?
314 if (eq(mac, "de"))
315 addcmd(line);
317 chkcmd(mac);
321 * At this point we process the line looking
322 * for \s and \f.
324 for (i = 0; line[i]; i++) {
325 if (line[i] == '\\' && (i == 0 || line[i-1] != '\\')) {
326 if (!sflag && line[++i] == 's') {
327 pl = line[++i];
328 if (isdigit(pl)) {
329 n = pl - '0';
330 pl = ' ';
331 } else
332 n = 0;
333 while (isdigit(line[++i]))
334 n = 10 * n + line[i] - '0';
335 i--;
336 if (n == 0) {
337 if (stk[stktop].opno == SZ) {
338 stktop--;
339 } else {
340 pe(lineno);
341 printf("unmatched \\s0\n");
343 } else {
344 stk[++stktop].opno = SZ;
345 stk[stktop].pl = pl;
346 stk[stktop].parm = n;
347 stk[stktop].lno = lineno;
349 } else if (!fflag && line[i] == 'f') {
350 n = line[++i];
351 if (n == 'P') {
352 if (stk[stktop].opno == FT) {
353 stktop--;
354 } else {
355 pe(lineno);
356 printf("unmatched \\fP\n");
358 } else {
359 stk[++stktop].opno = FT;
360 stk[stktop].pl = 1;
361 stk[stktop].parm = n;
362 stk[stktop].lno = lineno;
369 * We've hit the end and look at all this stuff that hasn't been
370 * matched yet! Complain, complain.
372 for (i = stktop; i >= 0; i--) {
373 complain(i);
377 static void
378 complain(int i)
380 pe(stk[i].lno);
381 printf("Unmatched ");
382 prop(i);
383 printf("\n");
386 static void
387 prop(int i)
389 if (stk[i].pl == 0)
390 printf(".%s", br[stk[i].opno].opbr);
391 else switch(stk[i].opno) {
392 case SZ:
393 printf("\\s%c%d", stk[i].pl, stk[i].parm);
394 break;
395 case FT:
396 printf("\\f%c", stk[i].parm);
397 break;
398 default:
399 printf("Bug: stk[%d].opno = %d = .%s, .%s",
400 i, stk[i].opno, br[stk[i].opno].opbr,
401 br[stk[i].opno].clbr);
405 static void
406 chkcmd(const char *mac)
408 int i;
411 * Check to see if it matches top of stack.
413 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
414 stktop--; /* OK. Pop & forget */
415 else {
416 /* No. Maybe it's an opener */
417 for (i = 0; br[i].opbr[0] != '\0'; i++) {
418 if (eq(mac, br[i].opbr)) {
419 /* Found. Push it. */
420 stktop++;
421 stk[stktop].opno = i;
422 stk[stktop].pl = 0;
423 stk[stktop].parm = 0;
424 stk[stktop].lno = lineno;
425 break;
428 * Maybe it's an unmatched closer.
429 * NOTE: this depends on the fact
430 * that none of the closers can be
431 * openers too.
433 if (eq(mac, br[i].clbr)) {
434 nomatch(mac);
435 break;
441 static void
442 nomatch(const char *mac)
444 int i, j;
447 * Look for a match further down on stack
448 * If we find one, it suggests that the stuff in
449 * between is supposed to match itself.
451 for (j = stktop; j >= 0; j--) {
452 if (eq(mac, br[stk[j].opno].clbr)) {
453 /* Found. Make a good diagnostic. */
454 if (j == stktop - 2) {
456 * Check for special case \fx..\fR and don't
457 * complain.
459 if (stk[j + 1].opno == FT &&
460 stk[j + 1].parm != 'R' &&
461 stk[j + 2].opno == FT &&
462 stk[j + 2].parm == 'R') {
463 stktop = j - 1;
464 return;
467 * We have two unmatched frobs. Chances are
468 * they were intended to match, so we mention
469 * them together.
471 pe(stk[j + 1].lno);
472 prop(j + 1);
473 printf(" does not match %d: ", stk[j + 2].lno);
474 prop(j + 2);
475 printf("\n");
476 } else {
477 for (i = j + 1; i <= stktop; i++) {
478 complain(i);
481 stktop = j - 1;
482 return;
485 /* Didn't find one. Throw this away. */
486 pe(lineno);
487 printf("Unmatched .%s\n", mac);
490 /* eq: are two strings equal? */
491 static int
492 eq(const char *s1, const char *s2)
494 return (strcmp(s1, s2) == 0);
497 /* print the first part of an error message, given the line number */
498 static void
499 pe(int mylineno)
501 if (nfiles > 1)
502 printf("%s: ", cfilename);
503 printf("%d: ", mylineno);
506 static void
507 checkknown(const char *mac)
509 if (eq(mac, "."))
510 return;
511 if (binsrch(mac, NULL) >= 0)
512 return;
513 if (mac[0] == '\\' && mac[1] == '"') /* comments */
514 return;
516 pe(lineno);
517 printf("Unknown command: .%s\n", mac);
521 * We have a .de xx line in "line". Add xx to the list of known commands.
523 static void
524 addcmd(char *myline)
526 char *mac;
528 /* grab the macro being defined */
529 mac = myline + 4;
530 while (isspace(*mac))
531 mac++;
532 if (*mac == 0) {
533 pe(lineno);
534 printf("illegal define: %s\n", myline);
535 return;
537 mac[2] = 0;
538 if (isspace(mac[1]) || mac[1] == '\\')
539 mac[1] = 0;
540 addmac(mac);
544 * Add mac to the list. We should really have some kind of tree
545 * structure here, but the loop below is reasonably fast.
547 static void
548 addmac(const char *mac)
550 int i, slot;
552 if (ncmds >= MAXCMDS) {
553 errx(1, "Only %d known commands allowed", MAXCMDS);
556 /* Don't try to add it if it's already in the table. */
557 if (binsrch(mac, &slot) >= 0) {
558 #ifdef DEBUG
559 printf("binsrch(%s) -> already in table\n", mac);
560 #endif /* DEBUG */
561 return;
563 #ifdef DEBUG
564 printf("binsrch(%s) -> %d\n", mac, slot);
565 #endif
566 for (i = ncmds - 1; i >= slot; i--) {
567 strncpy(knowncmds[i + 1], knowncmds[i], 2);
569 strncpy(knowncmds[slot], mac, 2);
570 ncmds++;
571 #ifdef DEBUG
572 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2],
573 knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1],
574 knowncmds[slot+2], ncmds);
575 #endif
579 * Do a binary search in knowncmds for mac.
580 * If found, return the index. If not, return -1.
581 * Also, if not found, and if slot_ptr is not NULL,
582 * set *slot_ptr to where it should have been.
584 static int
585 binsrch(const char *mac, int *slot_ptr)
587 const char *p; /* pointer to current cmd in list */
588 int d; /* difference if any */
589 int mid; /* mid point in binary search */
590 int top, bot; /* boundaries of bin search, inclusive */
592 top = ncmds - 1;
593 bot = 0;
594 while (top >= bot) {
595 mid = (top + bot) / 2;
596 p = knowncmds[mid];
597 d = p[0] - mac[0];
598 if (d == 0)
599 d = p[1] - mac[1];
600 if (d == 0)
601 return mid;
602 if (d < 0)
603 bot = mid + 1;
604 else
605 top = mid - 1;
607 if (slot_ptr != NULL)
608 *slot_ptr = bot; /* place it would have gone */
609 return -1;