640 number_to_scaled_string is duplicated in several commands
[unleashed.git] / usr / src / cmd / checknr / checknr.c
blobfca907e2e87e958eddcf6c9a3a656cf27482d8e0
1 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
2 /* All Rights Reserved */
5 /*
6 * Copyright (c) 1980 Regents of the University of California.
7 * All rights reserved. The Berkeley software License Agreement
8 * specifies the terms and conditions for redistribution.
9 */
12 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
13 * Use is subject to license terms.
16 #pragma ident "%Z%%M% %I% %E% SMI"
19 * checknr: check an nroff/troff input file for matching macro calls.
20 * we also attempt to match size and font changes, but only the embedded
21 * kind. These must end in \s0 and \fP resp. Maybe more sophistication
22 * later but for now think of these restrictions as contributions to
23 * structured typesetting.
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <unistd.h>
28 #include <string.h>
29 #include <ctype.h>
30 #include <locale.h>
32 #define MAXSTK 100 /* Stack size */
33 static int maxstk;
34 #define MAXBR 100 /* Max number of bracket pairs known */
35 #define MAXCMDS 500 /* Max number of commands known */
38 * The stack on which we remember what we've seen so far.
40 static struct stkstr {
41 int opno; /* number of opening bracket */
42 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
43 int parm; /* parm to size, font, etc */
44 int lno; /* line number the thing came in in */
45 } *stk;
46 static int stktop;
49 * The kinds of opening and closing brackets.
51 static struct brstr {
52 char *opbr;
53 char *clbr;
54 } br[MAXBR] = {
55 /* A few bare bones troff commands */
56 #define SZ 0
57 "sz", "sz", /* also \s */
58 #define FT 1
59 "ft", "ft", /* also \f */
60 /* the -mm package */
61 "AL", "LE",
62 "AS", "AE",
63 "BL", "LE",
64 "BS", "BE",
65 "DF", "DE",
66 "DL", "LE",
67 "DS", "DE",
68 "FS", "FE",
69 "ML", "LE",
70 "NS", "NE",
71 "RL", "LE",
72 "VL", "LE",
73 /* the -ms package */
74 "AB", "AE",
75 "BD", "DE",
76 "CD", "DE",
77 "DS", "DE",
78 "FS", "FE",
79 "ID", "DE",
80 "KF", "KE",
81 "KS", "KE",
82 "LD", "DE",
83 "LG", "NL",
84 "QS", "QE",
85 "RS", "RE",
86 "SM", "NL",
87 "XA", "XE",
88 "XS", "XE",
89 /* The -me package */
90 "(b", ")b",
91 "(c", ")c",
92 "(d", ")d",
93 "(f", ")f",
94 "(l", ")l",
95 "(q", ")q",
96 "(x", ")x",
97 "(z", ")z",
98 /* Things needed by preprocessors */
99 "EQ", "EN",
100 "TS", "TE",
101 /* Refer */
102 "[", "]",
103 0, 0
107 * All commands known to nroff, plus macro packages.
108 * Used so we can complain about unrecognized commands.
110 static char *knowncmds[MAXCMDS] = {
111 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
112 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
113 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
114 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
115 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B", "B1", "B2",
116 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
117 "D", "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
118 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
119 "FQ", "FS", "FV", "FX", "H", "HC", "HD", "HM", "HO", "HU", "I", "ID",
120 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
121 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
122 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
123 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R", "RA",
124 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S", "S0", "S2", "S3", "SA",
125 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
126 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
127 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[", "[-", "[0",
128 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>",
129 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd",
130 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
131 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
132 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
133 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
134 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
135 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
136 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
137 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
138 "q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
139 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
140 "ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp",
141 "yr", 0
144 static int lineno; /* current line number in input file */
145 static char line[256]; /* the current line */
146 static char *cfilename; /* name of current file */
147 static int nfiles; /* number of files to process */
148 static int fflag; /* -f: ignore \f */
149 static int sflag; /* -s: ignore \s */
150 static int ncmds; /* size of knowncmds */
151 static int slot; /* slot in knowncmds found by binsrch */
153 static void growstk();
154 static void usage();
155 static void process(FILE *f);
156 static void complain(int i);
157 static void prop(int i);
158 static void chkcmd(char *line, char *mac);
159 static void nomatch(char *mac);
160 static int eq(char *s1, char *s2);
161 static void pe(int lineno);
162 static void checkknown(char *mac);
163 static void addcmd(char *line);
164 static void addmac(char *mac);
165 static int binsrch(char *mac);
167 static void
168 growstk()
170 stktop++;
171 if (stktop >= maxstk) {
172 maxstk *= 2;
173 stk = (struct stkstr *)realloc(stk,
174 sizeof (struct stkstr) * maxstk);
179 main(argc, argv)
180 int argc;
181 char **argv;
183 FILE *f;
184 int i;
185 char *cp;
186 char b1[4];
188 (void) setlocale(LC_ALL, "");
189 #if !defined(TEXT_DOMAIN)
190 #define TEXT_DOMAIN "SYS_TEST"
191 #endif
192 (void) textdomain(TEXT_DOMAIN);
193 stk = (struct stkstr *)calloc(sizeof (struct stkstr), 100);
194 maxstk = 100;
195 /* Figure out how many known commands there are */
196 while (knowncmds[ncmds])
197 ncmds++;
198 while (argc > 1 && argv[1][0] == '-') {
199 switch (argv[1][1]) {
201 /* -a: add pairs of macros */
202 case 'a':
203 i = strlen(argv[1]) - 2;
204 if (i % 6 != 0)
205 usage();
206 /* look for empty macro slots */
207 for (i = 0; br[i].opbr; i++)
209 for (cp = argv[1]+3; cp[-1]; cp += 6) {
210 br[i].opbr = malloc(3);
211 (void) strncpy(br[i].opbr, cp, 2);
212 br[i].clbr = malloc(3);
213 (void) strncpy(br[i].clbr, cp+3, 2);
214 /* knows pairs are also known cmds */
215 addmac(br[i].opbr);
216 addmac(br[i].clbr);
217 i++;
219 break;
221 /* -c: add known commands */
222 case 'c':
223 i = strlen(argv[1]) - 2;
224 if (i % 3 != 0)
225 usage();
226 for (cp = argv[1]+3; cp[-1]; cp += 3) {
227 if (cp[2] && cp[2] != '.')
228 usage();
229 (void) strncpy(b1, cp, 2);
230 addmac(b1);
232 break;
234 /* -f: ignore font changes */
235 case 'f':
236 fflag = 1;
237 break;
239 /* -s: ignore size changes */
240 case 's':
241 sflag = 1;
242 break;
243 default:
244 usage();
246 argc--; argv++;
249 nfiles = argc - 1;
251 if (nfiles > 0) {
252 for (i = 1; i < argc; i++) {
253 cfilename = argv[i];
254 f = fopen(cfilename, "r");
255 if (f == NULL) {
256 perror(cfilename);
257 exit(1);
259 else
260 process(f);
262 } else {
263 cfilename = "stdin";
264 process(stdin);
266 return (0);
269 static void
270 usage()
272 (void) printf(gettext("Usage: \
273 checknr [ -fs ] [ -a.xx.yy.xx.yy...] [-c.xx.xx.xx...] [ filename .. ]\n"));
274 exit(1);
277 static void
278 process(FILE *f)
280 int i, n;
281 char mac[5]; /* The current macro or nroff command */
282 int pl;
284 stktop = -1;
285 for (lineno = 1; fgets(line, sizeof (line), f); lineno++) {
286 if (line[0] == '.') {
288 * find and isolate the macro/command name.
290 (void) strncpy(mac, line+1, 4);
291 if (isspace(mac[0])) {
292 pe(lineno);
293 (void) printf(gettext("Empty command\n"));
294 } else if (isspace(mac[1])) {
295 mac[1] = 0;
296 } else if (isspace(mac[2])) {
297 mac[2] = 0;
298 } else if (mac[0] != '\\' || mac[1] != '\"') {
299 pe(lineno);
300 (void) printf(gettext("Command too long\n"));
304 * Is it a known command?
306 checkknown(mac);
309 * Should we add it?
311 if (eq(mac, "de"))
312 addcmd(line);
314 chkcmd(line, mac);
318 * At this point we process the line looking
319 * for \s and \f.
321 for (i = 0; line[i]; i++)
322 if (line[i] == '\\' && (i == 0 || line[i-1] != '\\')) {
323 if (!sflag && line[++i] == 's') {
324 pl = line[++i];
325 if (isdigit(pl)) {
326 n = pl - '0';
327 pl = ' ';
328 } else
329 n = 0;
330 while (isdigit(line[++i]))
331 n = 10 * n + line[i] - '0';
332 i--;
333 if (n == 0) {
334 if (stk[stktop].opno == SZ) {
335 stktop--;
336 } else {
337 pe(lineno);
338 (void) printf(
339 gettext("unmatched \\s0\n"));
341 } else {
342 growstk();
343 stk[stktop].opno = SZ;
344 stk[stktop].pl = pl;
345 stk[stktop].parm = n;
346 stk[stktop].lno = lineno;
348 } else if (!fflag && line[i] == 'f') {
349 n = line[++i];
350 if (n == 'P') {
351 if (stk[stktop].opno == FT) {
352 stktop--;
353 } else {
354 pe(lineno);
355 (void) printf(
356 gettext("unmatched \\fP\n"));
358 } else {
359 growstk();
360 stk[stktop].opno = FT;
361 stk[stktop].pl = 1;
362 stk[stktop].parm = n;
363 stk[stktop].lno = lineno;
369 * We've hit the end and look at all this stuff that hasn't been
370 * matched yet! Complain, complain.
372 for (i = stktop; i >= 0; i--) {
373 complain(i);
377 static void
378 complain(int i)
380 pe(stk[i].lno);
381 (void) printf(gettext("Unmatched "));
382 prop(i);
383 (void) printf("\n");
386 static void
387 prop(int i)
389 if (stk[i].pl == 0)
390 (void) printf(".%s", br[stk[i].opno].opbr);
391 else switch (stk[i].opno) {
392 case SZ:
393 (void) printf("\\s%c%d", stk[i].pl, stk[i].parm);
394 break;
395 case FT:
396 (void) printf("\\f%c", stk[i].parm);
397 break;
398 default:
399 (void) printf(gettext("Bug: stk[%d].opno = %d = .%s, .%s"),
400 i, stk[i].opno, br[stk[i].opno].opbr,
401 br[stk[i].opno].clbr);
405 /* ARGSUSED */
406 static void
407 chkcmd(char *line, char *mac)
409 int i;
412 * Check to see if it matches top of stack.
414 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
415 stktop--; /* OK. Pop & forget */
416 else {
417 /* No. Maybe it's an opener */
418 for (i = 0; br[i].opbr; i++) {
419 if (eq(mac, br[i].opbr)) {
420 /* Found. Push it. */
421 growstk();
422 stk[stktop].opno = i;
423 stk[stktop].pl = 0;
424 stk[stktop].parm = 0;
425 stk[stktop].lno = lineno;
426 break;
429 * Maybe it's an unmatched closer.
430 * NOTE: this depends on the fact
431 * that none of the closers can be
432 * openers too.
434 if (eq(mac, br[i].clbr)) {
435 nomatch(mac);
436 break;
442 static void
443 nomatch(char *mac)
445 int i, j;
448 * Look for a match further down on stack
449 * If we find one, it suggests that the stuff in
450 * between is supposed to match itself.
452 for (j = stktop; j >= 0; j--)
453 if (eq(mac, br[stk[j].opno].clbr)) {
454 /* Found. Make a good diagnostic. */
455 if (j == stktop-2) {
457 * Check for special case \fx..\fR and don't
458 * complain.
460 if (stk[j+1].opno == FT &&
461 stk[j+1].parm != 'R' &&
462 stk[j+2].opno == FT &&
463 stk[j+2].parm == 'R') {
464 stktop = j -1;
465 return;
468 * We have two unmatched frobs. Chances are
469 * they were intended to match, so we mention
470 * them together.
472 pe(stk[j+1].lno);
473 prop(j+1);
474 (void) printf(gettext(" does not match %d: "),
475 stk[j+2].lno);
476 prop(j+2);
477 (void) printf("\n");
478 } else for (i = j+1; i <= stktop; i++) {
479 complain(i);
481 stktop = j-1;
482 return;
484 /* Didn't find one. Throw this away. */
485 pe(lineno);
486 (void) printf(gettext("Unmatched .%s\n"), mac);
489 /* eq: are two strings equal? */
490 static int
491 eq(char *s1, char *s2)
493 return (strcmp(s1, s2) == 0);
496 /* print the first part of an error message, given the line number */
497 static void
498 pe(int lineno)
500 if (nfiles > 1)
501 (void) printf("%s: ", cfilename);
502 (void) printf("%d: ", lineno);
505 static void
506 checkknown(char *mac)
509 if (eq(mac, "."))
510 return;
511 if (binsrch(mac) >= 0)
512 return;
513 if (mac[0] == '\\' && mac[1] == '"') /* comments */
514 return;
516 pe(lineno);
517 (void) printf(gettext("Unknown command: .%s\n"), mac);
521 * We have a .de xx line in "line". Add xx to the list of known commands.
523 static void
524 addcmd(char *line)
526 char *mac;
528 /* grab the macro being defined */
529 mac = line+4;
530 while (isspace(*mac))
531 mac++;
532 if (*mac == 0) {
533 pe(lineno);
534 (void) printf(gettext("illegal define: %s\n"), line);
535 return;
537 mac[2] = 0;
538 if (isspace(mac[1]) || mac[1] == '\\')
539 mac[1] = 0;
540 if (ncmds >= MAXCMDS) {
541 (void) printf(gettext("Only %d known commands allowed\n"),
542 MAXCMDS);
543 exit(1);
545 addmac(mac);
549 * Add mac to the list. We should really have some kind of tree
550 * structure here but this is a quick-and-dirty job and I just don't
551 * have time to mess with it. (I wonder if this will come back to haunt
552 * me someday?) Anyway, I claim that .de is fairly rare in user
553 * nroff programs, and the loop below is pretty fast.
555 static void
556 addmac(char *mac)
558 char **src, **dest, **loc;
560 if (binsrch(mac) >= 0) { /* it's OK to redefine something */
561 #ifdef DEBUG
562 (void) printf("binsrch(%s) -> already in table\n", mac);
563 #endif
564 return;
566 /* binsrch sets slot as a side effect */
567 #ifdef DEBUG
568 printf("binsrch(%s) -> %d\n", mac, slot);
569 #endif
570 loc = &knowncmds[slot];
571 src = &knowncmds[ncmds-1];
572 dest = src+1;
573 while (dest > loc)
574 *dest-- = *src--;
575 *loc = malloc(3);
576 (void) strcpy(*loc, mac);
577 ncmds++;
578 #ifdef DEBUG
579 (void) printf("after: %s %s %s %s %s, %d cmds\n",
580 knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot],
581 knowncmds[slot+1], knowncmds[slot+2], ncmds);
582 #endif
586 * Do a binary search in knowncmds for mac.
587 * If found, return the index. If not, return -1.
589 static int
590 binsrch(char *mac)
592 char *p; /* pointer to current cmd in list */
593 int d; /* difference if any */
594 int mid; /* mid point in binary search */
595 int top, bot; /* boundaries of bin search, inclusive */
597 top = ncmds-1;
598 bot = 0;
599 while (top >= bot) {
600 mid = (top+bot)/2;
601 p = knowncmds[mid];
602 d = p[0] - mac[0];
603 if (d == 0)
604 d = p[1] - mac[1];
605 if (d == 0)
606 return (mid);
607 if (d < 0)
608 bot = mid + 1;
609 else
610 top = mid - 1;
612 slot = bot; /* place it would have gone */
613 return (-1);