libm: Fix misleading indent.
[dragonfly.git] / contrib / mdocml / roff.c
blob4903ee8c247b7b7e323458c0ff582cf30dcda864
1 /* $Id: roff.c,v 1.224 2014/08/01 17:27:44 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
28 #include "mandoc.h"
29 #include "mandoc_aux.h"
30 #include "libroff.h"
31 #include "libmandoc.h"
33 /* Maximum number of nested if-else conditionals. */
34 #define RSTACK_MAX 128
36 /* Maximum number of string expansions per line, to break infinite loops. */
37 #define EXPAND_LIMIT 1000
39 enum rofft {
40 ROFF_ad,
41 ROFF_am,
42 ROFF_ami,
43 ROFF_am1,
44 ROFF_as,
45 ROFF_cc,
46 ROFF_ce,
47 ROFF_de,
48 ROFF_dei,
49 ROFF_de1,
50 ROFF_ds,
51 ROFF_el,
52 ROFF_fam,
53 ROFF_hw,
54 ROFF_hy,
55 ROFF_ie,
56 ROFF_if,
57 ROFF_ig,
58 ROFF_it,
59 ROFF_ne,
60 ROFF_nh,
61 ROFF_nr,
62 ROFF_ns,
63 ROFF_ps,
64 ROFF_rm,
65 ROFF_rr,
66 ROFF_so,
67 ROFF_ta,
68 ROFF_tr,
69 ROFF_Dd,
70 ROFF_TH,
71 ROFF_TS,
72 ROFF_TE,
73 ROFF_T_,
74 ROFF_EQ,
75 ROFF_EN,
76 ROFF_cblock,
77 ROFF_USERDEF,
78 ROFF_MAX
82 * An incredibly-simple string buffer.
84 struct roffstr {
85 char *p; /* nil-terminated buffer */
86 size_t sz; /* saved strlen(p) */
90 * A key-value roffstr pair as part of a singly-linked list.
92 struct roffkv {
93 struct roffstr key;
94 struct roffstr val;
95 struct roffkv *next; /* next in list */
99 * A single number register as part of a singly-linked list.
101 struct roffreg {
102 struct roffstr key;
103 int val;
104 struct roffreg *next;
107 struct roff {
108 struct mparse *parse; /* parse point */
109 struct roffnode *last; /* leaf of stack */
110 int *rstack; /* stack of inverted `ie' values */
111 struct roffreg *regtab; /* number registers */
112 struct roffkv *strtab; /* user-defined strings & macros */
113 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
114 struct roffstr *xtab; /* single-byte trans table (`tr') */
115 const char *current_string; /* value of last called user macro */
116 struct tbl_node *first_tbl; /* first table parsed */
117 struct tbl_node *last_tbl; /* last table parsed */
118 struct tbl_node *tbl; /* current table being parsed */
119 struct eqn_node *last_eqn; /* last equation parsed */
120 struct eqn_node *first_eqn; /* first equation parsed */
121 struct eqn_node *eqn; /* current equation being parsed */
122 int options; /* parse options */
123 int rstacksz; /* current size limit of rstack */
124 int rstackpos; /* position in rstack */
125 char control; /* control character */
128 struct roffnode {
129 enum rofft tok; /* type of node */
130 struct roffnode *parent; /* up one in stack */
131 int line; /* parse line */
132 int col; /* parse col */
133 char *name; /* node name, e.g. macro name */
134 char *end; /* end-rules: custom token */
135 int endspan; /* end-rules: next-line or infty */
136 int rule; /* current evaluation rule */
139 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
140 enum rofft tok, /* tok of macro */ \
141 char **bufp, /* input buffer */ \
142 size_t *szp, /* size of input buffer */ \
143 int ln, /* parse line */ \
144 int ppos, /* original pos in buffer */ \
145 int pos, /* current pos in buffer */ \
146 int *offs /* reset offset of buffer data */
148 typedef enum rofferr (*roffproc)(ROFF_ARGS);
150 struct roffmac {
151 const char *name; /* macro name */
152 roffproc proc; /* process new macro */
153 roffproc text; /* process as child text of macro */
154 roffproc sub; /* process as child of macro */
155 int flags;
156 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
157 struct roffmac *next;
160 struct predef {
161 const char *name; /* predefined input name */
162 const char *str; /* replacement symbol */
165 #define PREDEF(__name, __str) \
166 { (__name), (__str) },
168 static enum rofft roffhash_find(const char *, size_t);
169 static void roffhash_init(void);
170 static void roffnode_cleanscope(struct roff *);
171 static void roffnode_pop(struct roff *);
172 static void roffnode_push(struct roff *, enum rofft,
173 const char *, int, int);
174 static enum rofferr roff_block(ROFF_ARGS);
175 static enum rofferr roff_block_text(ROFF_ARGS);
176 static enum rofferr roff_block_sub(ROFF_ARGS);
177 static enum rofferr roff_cblock(ROFF_ARGS);
178 static enum rofferr roff_cc(ROFF_ARGS);
179 static void roff_ccond(struct roff *, int, int);
180 static enum rofferr roff_cond(ROFF_ARGS);
181 static enum rofferr roff_cond_text(ROFF_ARGS);
182 static enum rofferr roff_cond_sub(ROFF_ARGS);
183 static enum rofferr roff_ds(ROFF_ARGS);
184 static int roff_evalcond(const char *, int *);
185 static int roff_evalnum(const char *, int *, int *, int);
186 static int roff_evalpar(const char *, int *, int *);
187 static int roff_evalstrcond(const char *, int *);
188 static void roff_free1(struct roff *);
189 static void roff_freereg(struct roffreg *);
190 static void roff_freestr(struct roffkv *);
191 static size_t roff_getname(struct roff *, char **, int, int);
192 static int roff_getnum(const char *, int *, int *);
193 static int roff_getop(const char *, int *, char *);
194 static int roff_getregn(const struct roff *,
195 const char *, size_t);
196 static int roff_getregro(const char *name);
197 static const char *roff_getstrn(const struct roff *,
198 const char *, size_t);
199 static enum rofferr roff_it(ROFF_ARGS);
200 static enum rofferr roff_line_ignore(ROFF_ARGS);
201 static enum rofferr roff_nr(ROFF_ARGS);
202 static void roff_openeqn(struct roff *, const char *,
203 int, int, const char *);
204 static enum rofft roff_parse(struct roff *, char *, int *,
205 int, int);
206 static enum rofferr roff_parsetext(char **, size_t *, int, int *);
207 static enum rofferr roff_res(struct roff *,
208 char **, size_t *, int, int);
209 static enum rofferr roff_rm(ROFF_ARGS);
210 static enum rofferr roff_rr(ROFF_ARGS);
211 static void roff_setstr(struct roff *,
212 const char *, const char *, int);
213 static void roff_setstrn(struct roffkv **, const char *,
214 size_t, const char *, size_t, int);
215 static enum rofferr roff_so(ROFF_ARGS);
216 static enum rofferr roff_tr(ROFF_ARGS);
217 static enum rofferr roff_Dd(ROFF_ARGS);
218 static enum rofferr roff_TH(ROFF_ARGS);
219 static enum rofferr roff_TE(ROFF_ARGS);
220 static enum rofferr roff_TS(ROFF_ARGS);
221 static enum rofferr roff_EQ(ROFF_ARGS);
222 static enum rofferr roff_EN(ROFF_ARGS);
223 static enum rofferr roff_T_(ROFF_ARGS);
224 static enum rofferr roff_userdef(ROFF_ARGS);
226 /* See roffhash_find() */
228 #define ASCII_HI 126
229 #define ASCII_LO 33
230 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
232 static struct roffmac *hash[HASHWIDTH];
234 static struct roffmac roffs[ROFF_MAX] = {
235 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
236 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
237 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
238 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
239 { "as", roff_ds, NULL, NULL, 0, NULL },
240 { "cc", roff_cc, NULL, NULL, 0, NULL },
241 { "ce", roff_line_ignore, NULL, NULL, 0, NULL },
242 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
243 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
244 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
245 { "ds", roff_ds, NULL, NULL, 0, NULL },
246 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
247 { "fam", roff_line_ignore, NULL, NULL, 0, NULL },
248 { "hw", roff_line_ignore, NULL, NULL, 0, NULL },
249 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
250 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
251 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
252 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
253 { "it", roff_it, NULL, NULL, 0, NULL },
254 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
255 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
256 { "nr", roff_nr, NULL, NULL, 0, NULL },
257 { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
258 { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
259 { "rm", roff_rm, NULL, NULL, 0, NULL },
260 { "rr", roff_rr, NULL, NULL, 0, NULL },
261 { "so", roff_so, NULL, NULL, 0, NULL },
262 { "ta", roff_line_ignore, NULL, NULL, 0, NULL },
263 { "tr", roff_tr, NULL, NULL, 0, NULL },
264 { "Dd", roff_Dd, NULL, NULL, 0, NULL },
265 { "TH", roff_TH, NULL, NULL, 0, NULL },
266 { "TS", roff_TS, NULL, NULL, 0, NULL },
267 { "TE", roff_TE, NULL, NULL, 0, NULL },
268 { "T&", roff_T_, NULL, NULL, 0, NULL },
269 { "EQ", roff_EQ, NULL, NULL, 0, NULL },
270 { "EN", roff_EN, NULL, NULL, 0, NULL },
271 { ".", roff_cblock, NULL, NULL, 0, NULL },
272 { NULL, roff_userdef, NULL, NULL, 0, NULL },
275 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
276 const char *const __mdoc_reserved[] = {
277 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
278 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
279 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
280 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
281 "Dt", "Dv", "Dx", "D1",
282 "Ec", "Ed", "Ef", "Ek", "El", "Em",
283 "En", "Eo", "Er", "Es", "Ev", "Ex",
284 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
285 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
286 "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
287 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
288 "Pa", "Pc", "Pf", "Po", "Pp", "Pq",
289 "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
290 "Sc", "Sh", "Sm", "So", "Sq",
291 "Ss", "St", "Sx", "Sy",
292 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
293 "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
294 "%P", "%Q", "%R", "%T", "%U", "%V",
295 NULL
298 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */
299 const char *const __man_reserved[] = {
300 "AT", "B", "BI", "BR", "DT",
301 "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
302 "LP", "OP", "P", "PD", "PP",
303 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
304 "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
305 NULL
308 /* Array of injected predefined strings. */
309 #define PREDEFS_MAX 38
310 static const struct predef predefs[PREDEFS_MAX] = {
311 #include "predefs.in"
314 /* See roffhash_find() */
315 #define ROFF_HASH(p) (p[0] - ASCII_LO)
317 static int roffit_lines; /* number of lines to delay */
318 static char *roffit_macro; /* nil-terminated macro line */
321 static void
322 roffhash_init(void)
324 struct roffmac *n;
325 int buc, i;
327 for (i = 0; i < (int)ROFF_USERDEF; i++) {
328 assert(roffs[i].name[0] >= ASCII_LO);
329 assert(roffs[i].name[0] <= ASCII_HI);
331 buc = ROFF_HASH(roffs[i].name);
333 if (NULL != (n = hash[buc])) {
334 for ( ; n->next; n = n->next)
335 /* Do nothing. */ ;
336 n->next = &roffs[i];
337 } else
338 hash[buc] = &roffs[i];
343 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
344 * the nil-terminated string name could be found.
346 static enum rofft
347 roffhash_find(const char *p, size_t s)
349 int buc;
350 struct roffmac *n;
353 * libroff has an extremely simple hashtable, for the time
354 * being, which simply keys on the first character, which must
355 * be printable, then walks a chain. It works well enough until
356 * optimised.
359 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
360 return(ROFF_MAX);
362 buc = ROFF_HASH(p);
364 if (NULL == (n = hash[buc]))
365 return(ROFF_MAX);
366 for ( ; n; n = n->next)
367 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
368 return((enum rofft)(n - roffs));
370 return(ROFF_MAX);
374 * Pop the current node off of the stack of roff instructions currently
375 * pending.
377 static void
378 roffnode_pop(struct roff *r)
380 struct roffnode *p;
382 assert(r->last);
383 p = r->last;
385 r->last = r->last->parent;
386 free(p->name);
387 free(p->end);
388 free(p);
392 * Push a roff node onto the instruction stack. This must later be
393 * removed with roffnode_pop().
395 static void
396 roffnode_push(struct roff *r, enum rofft tok, const char *name,
397 int line, int col)
399 struct roffnode *p;
401 p = mandoc_calloc(1, sizeof(struct roffnode));
402 p->tok = tok;
403 if (name)
404 p->name = mandoc_strdup(name);
405 p->parent = r->last;
406 p->line = line;
407 p->col = col;
408 p->rule = p->parent ? p->parent->rule : 0;
410 r->last = p;
413 static void
414 roff_free1(struct roff *r)
416 struct tbl_node *tbl;
417 struct eqn_node *e;
418 int i;
420 while (NULL != (tbl = r->first_tbl)) {
421 r->first_tbl = tbl->next;
422 tbl_free(tbl);
424 r->first_tbl = r->last_tbl = r->tbl = NULL;
426 while (NULL != (e = r->first_eqn)) {
427 r->first_eqn = e->next;
428 eqn_free(e);
430 r->first_eqn = r->last_eqn = r->eqn = NULL;
432 while (r->last)
433 roffnode_pop(r);
435 free (r->rstack);
436 r->rstack = NULL;
437 r->rstacksz = 0;
438 r->rstackpos = -1;
440 roff_freereg(r->regtab);
441 r->regtab = NULL;
443 roff_freestr(r->strtab);
444 roff_freestr(r->xmbtab);
445 r->strtab = r->xmbtab = NULL;
447 if (r->xtab)
448 for (i = 0; i < 128; i++)
449 free(r->xtab[i].p);
450 free(r->xtab);
451 r->xtab = NULL;
454 void
455 roff_reset(struct roff *r)
458 roff_free1(r);
459 r->control = 0;
462 void
463 roff_free(struct roff *r)
466 roff_free1(r);
467 free(r);
470 struct roff *
471 roff_alloc(struct mparse *parse, int options)
473 struct roff *r;
475 r = mandoc_calloc(1, sizeof(struct roff));
476 r->parse = parse;
477 r->options = options;
478 r->rstackpos = -1;
480 roffhash_init();
482 return(r);
486 * In the current line, expand escape sequences that tend to get
487 * used in numerical expressions and conditional requests.
488 * Also check the syntax of the remaining escape sequences.
490 static enum rofferr
491 roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
493 char ubuf[24]; /* buffer to print the number */
494 const char *start; /* start of the string to process */
495 char *stesc; /* start of an escape sequence ('\\') */
496 const char *stnam; /* start of the name, after "[(*" */
497 const char *cp; /* end of the name, e.g. before ']' */
498 const char *res; /* the string to be substituted */
499 char *nbuf; /* new buffer to copy bufp to */
500 size_t maxl; /* expected length of the escape name */
501 size_t naml; /* actual length of the escape name */
502 int expand_count; /* to avoid infinite loops */
503 int npos; /* position in numeric expression */
504 int arg_complete; /* argument not interrupted by eol */
505 char term; /* character terminating the escape */
507 expand_count = 0;
508 start = *bufp + pos;
509 stesc = strchr(start, '\0') - 1;
510 while (stesc-- > start) {
512 /* Search backwards for the next backslash. */
514 if ('\\' != *stesc)
515 continue;
517 /* If it is escaped, skip it. */
519 for (cp = stesc - 1; cp >= start; cp--)
520 if ('\\' != *cp)
521 break;
523 if (0 == (stesc - cp) % 2) {
524 stesc = (char *)cp;
525 continue;
528 /* Decide whether to expand or to check only. */
530 term = '\0';
531 cp = stesc + 1;
532 switch (*cp) {
533 case '*':
534 res = NULL;
535 break;
536 case 'B':
537 /* FALLTHROUGH */
538 case 'w':
539 term = cp[1];
540 /* FALLTHROUGH */
541 case 'n':
542 res = ubuf;
543 break;
544 default:
545 if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))
546 mandoc_vmsg(MANDOCERR_ESC_BAD,
547 r->parse, ln, (int)(stesc - *bufp),
548 "%.*s", (int)(cp - stesc), stesc);
549 continue;
552 if (EXPAND_LIMIT < ++expand_count) {
553 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
554 ln, (int)(stesc - *bufp), NULL);
555 return(ROFF_IGN);
559 * The third character decides the length
560 * of the name of the string or register.
561 * Save a pointer to the name.
564 if ('\0' == term) {
565 switch (*++cp) {
566 case '\0':
567 maxl = 0;
568 break;
569 case '(':
570 cp++;
571 maxl = 2;
572 break;
573 case '[':
574 cp++;
575 term = ']';
576 maxl = 0;
577 break;
578 default:
579 maxl = 1;
580 break;
582 } else {
583 cp += 2;
584 maxl = 0;
586 stnam = cp;
588 /* Advance to the end of the name. */
590 arg_complete = 1;
591 for (naml = 0; 0 == maxl || naml < maxl; naml++, cp++) {
592 if ('\0' == *cp) {
593 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
594 ln, (int)(stesc - *bufp), stesc);
595 arg_complete = 0;
596 break;
598 if (0 == maxl && *cp == term) {
599 cp++;
600 break;
605 * Retrieve the replacement string; if it is
606 * undefined, resume searching for escapes.
609 switch (stesc[1]) {
610 case '*':
611 if (arg_complete)
612 res = roff_getstrn(r, stnam, naml);
613 break;
614 case 'B':
615 npos = 0;
616 ubuf[0] = arg_complete &&
617 roff_evalnum(stnam, &npos, NULL, 0) &&
618 stnam + npos + 1 == cp ? '1' : '0';
619 ubuf[1] = '\0';
620 break;
621 case 'n':
622 if (arg_complete)
623 (void)snprintf(ubuf, sizeof(ubuf), "%d",
624 roff_getregn(r, stnam, naml));
625 else
626 ubuf[0] = '\0';
627 break;
628 case 'w':
629 /* use even incomplete args */
630 (void)snprintf(ubuf, sizeof(ubuf), "%d",
631 24 * (int)naml);
632 break;
635 if (NULL == res) {
636 mandoc_vmsg(MANDOCERR_STR_UNDEF,
637 r->parse, ln, (int)(stesc - *bufp),
638 "%.*s", (int)naml, stnam);
639 res = "";
642 /* Replace the escape sequence by the string. */
644 *stesc = '\0';
645 *szp = mandoc_asprintf(&nbuf, "%s%s%s",
646 *bufp, res, cp) + 1;
648 /* Prepare for the next replacement. */
650 start = nbuf + pos;
651 stesc = nbuf + (stesc - *bufp) + strlen(res);
652 free(*bufp);
653 *bufp = nbuf;
655 return(ROFF_CONT);
659 * Process text streams:
660 * Convert all breakable hyphens into ASCII_HYPH.
661 * Decrement and spring input line trap.
663 static enum rofferr
664 roff_parsetext(char **bufp, size_t *szp, int pos, int *offs)
666 size_t sz;
667 const char *start;
668 char *p;
669 int isz;
670 enum mandoc_esc esc;
672 start = p = *bufp + pos;
674 while ('\0' != *p) {
675 sz = strcspn(p, "-\\");
676 p += sz;
678 if ('\0' == *p)
679 break;
681 if ('\\' == *p) {
682 /* Skip over escapes. */
683 p++;
684 esc = mandoc_escape((const char **)&p, NULL, NULL);
685 if (ESCAPE_ERROR == esc)
686 break;
687 continue;
688 } else if (p == start) {
689 p++;
690 continue;
693 if (isalpha((unsigned char)p[-1]) &&
694 isalpha((unsigned char)p[1]))
695 *p = ASCII_HYPH;
696 p++;
699 /* Spring the input line trap. */
700 if (1 == roffit_lines) {
701 isz = mandoc_asprintf(&p, "%s\n.%s", *bufp, roffit_macro);
702 free(*bufp);
703 *bufp = p;
704 *szp = isz + 1;
705 *offs = 0;
706 free(roffit_macro);
707 roffit_lines = 0;
708 return(ROFF_REPARSE);
709 } else if (1 < roffit_lines)
710 --roffit_lines;
711 return(ROFF_CONT);
714 enum rofferr
715 roff_parseln(struct roff *r, int ln, char **bufp,
716 size_t *szp, int pos, int *offs)
718 enum rofft t;
719 enum rofferr e;
720 int ppos, ctl;
723 * Run the reserved-word filter only if we have some reserved
724 * words to fill in.
727 e = roff_res(r, bufp, szp, ln, pos);
728 if (ROFF_IGN == e)
729 return(e);
730 assert(ROFF_CONT == e);
732 ppos = pos;
733 ctl = roff_getcontrol(r, *bufp, &pos);
736 * First, if a scope is open and we're not a macro, pass the
737 * text through the macro's filter. If a scope isn't open and
738 * we're not a macro, just let it through.
739 * Finally, if there's an equation scope open, divert it into it
740 * no matter our state.
743 if (r->last && ! ctl) {
744 t = r->last->tok;
745 assert(roffs[t].text);
746 e = (*roffs[t].text)(r, t, bufp, szp, ln, pos, pos, offs);
747 assert(ROFF_IGN == e || ROFF_CONT == e);
748 if (ROFF_CONT != e)
749 return(e);
751 if (r->eqn)
752 return(eqn_read(&r->eqn, ln, *bufp, ppos, offs));
753 if ( ! ctl) {
754 if (r->tbl)
755 return(tbl_read(r->tbl, ln, *bufp, pos));
756 return(roff_parsetext(bufp, szp, pos, offs));
760 * If a scope is open, go to the child handler for that macro,
761 * as it may want to preprocess before doing anything with it.
762 * Don't do so if an equation is open.
765 if (r->last) {
766 t = r->last->tok;
767 assert(roffs[t].sub);
768 return((*roffs[t].sub)(r, t, bufp, szp,
769 ln, ppos, pos, offs));
773 * Lastly, as we've no scope open, try to look up and execute
774 * the new macro. If no macro is found, simply return and let
775 * the compilers handle it.
778 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos, ln, ppos)))
779 return(ROFF_CONT);
781 assert(roffs[t].proc);
782 return((*roffs[t].proc)(r, t, bufp, szp, ln, ppos, pos, offs));
785 void
786 roff_endparse(struct roff *r)
789 if (r->last)
790 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
791 r->last->line, r->last->col,
792 roffs[r->last->tok].name);
794 if (r->eqn) {
795 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
796 r->eqn->eqn.ln, r->eqn->eqn.pos, "EQ");
797 eqn_end(&r->eqn);
800 if (r->tbl) {
801 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
802 r->tbl->line, r->tbl->pos, "TS");
803 tbl_end(&r->tbl);
808 * Parse a roff node's type from the input buffer. This must be in the
809 * form of ".foo xxx" in the usual way.
811 static enum rofft
812 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
814 char *cp;
815 const char *mac;
816 size_t maclen;
817 enum rofft t;
819 cp = buf + *pos;
821 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
822 return(ROFF_MAX);
824 mac = cp;
825 maclen = roff_getname(r, &cp, ln, ppos);
827 t = (r->current_string = roff_getstrn(r, mac, maclen))
828 ? ROFF_USERDEF : roffhash_find(mac, maclen);
830 if (ROFF_MAX != t)
831 *pos = cp - buf;
833 return(t);
836 static enum rofferr
837 roff_cblock(ROFF_ARGS)
841 * A block-close `..' should only be invoked as a child of an
842 * ignore macro, otherwise raise a warning and just ignore it.
845 if (NULL == r->last) {
846 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
847 ln, ppos, "..");
848 return(ROFF_IGN);
851 switch (r->last->tok) {
852 case ROFF_am:
853 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
854 /* FALLTHROUGH */
855 case ROFF_ami:
856 /* FALLTHROUGH */
857 case ROFF_de:
858 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
859 /* FALLTHROUGH */
860 case ROFF_dei:
861 /* FALLTHROUGH */
862 case ROFF_ig:
863 break;
864 default:
865 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
866 ln, ppos, "..");
867 return(ROFF_IGN);
870 if ((*bufp)[pos])
871 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
872 ".. %s", *bufp + pos);
874 roffnode_pop(r);
875 roffnode_cleanscope(r);
876 return(ROFF_IGN);
880 static void
881 roffnode_cleanscope(struct roff *r)
884 while (r->last) {
885 if (--r->last->endspan != 0)
886 break;
887 roffnode_pop(r);
891 static void
892 roff_ccond(struct roff *r, int ln, int ppos)
895 if (NULL == r->last) {
896 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
897 ln, ppos, "\\}");
898 return;
901 switch (r->last->tok) {
902 case ROFF_el:
903 /* FALLTHROUGH */
904 case ROFF_ie:
905 /* FALLTHROUGH */
906 case ROFF_if:
907 break;
908 default:
909 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
910 ln, ppos, "\\}");
911 return;
914 if (r->last->endspan > -1) {
915 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
916 ln, ppos, "\\}");
917 return;
920 roffnode_pop(r);
921 roffnode_cleanscope(r);
922 return;
925 static enum rofferr
926 roff_block(ROFF_ARGS)
928 const char *name;
929 char *iname, *cp;
930 size_t namesz;
932 /* Ignore groff compatibility mode for now. */
934 if (ROFF_de1 == tok)
935 tok = ROFF_de;
936 else if (ROFF_am1 == tok)
937 tok = ROFF_am;
939 /* Parse the macro name argument. */
941 cp = *bufp + pos;
942 if (ROFF_ig == tok) {
943 iname = NULL;
944 namesz = 0;
945 } else {
946 iname = cp;
947 namesz = roff_getname(r, &cp, ln, ppos);
948 iname[namesz] = '\0';
951 /* Resolve the macro name argument if it is indirect. */
953 if (namesz && (ROFF_dei == tok || ROFF_ami == tok)) {
954 if (NULL == (name = roff_getstrn(r, iname, namesz))) {
955 mandoc_vmsg(MANDOCERR_STR_UNDEF,
956 r->parse, ln, (int)(iname - *bufp),
957 "%.*s", (int)namesz, iname);
958 namesz = 0;
959 } else
960 namesz = strlen(name);
961 } else
962 name = iname;
964 if (0 == namesz && ROFF_ig != tok) {
965 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
966 ln, ppos, roffs[tok].name);
967 return(ROFF_IGN);
970 roffnode_push(r, tok, name, ln, ppos);
973 * At the beginning of a `de' macro, clear the existing string
974 * with the same name, if there is one. New content will be
975 * appended from roff_block_text() in multiline mode.
978 if (ROFF_de == tok || ROFF_dei == tok)
979 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
981 if ('\0' == *cp)
982 return(ROFF_IGN);
984 /* Get the custom end marker. */
986 iname = cp;
987 namesz = roff_getname(r, &cp, ln, ppos);
989 /* Resolve the end marker if it is indirect. */
991 if (namesz && (ROFF_dei == tok || ROFF_ami == tok)) {
992 if (NULL == (name = roff_getstrn(r, iname, namesz))) {
993 mandoc_vmsg(MANDOCERR_STR_UNDEF,
994 r->parse, ln, (int)(iname - *bufp),
995 "%.*s", (int)namesz, iname);
996 namesz = 0;
997 } else
998 namesz = strlen(name);
999 } else
1000 name = iname;
1002 if (namesz)
1003 r->last->end = mandoc_strndup(name, namesz);
1005 if ('\0' != *cp)
1006 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
1007 ln, pos, ".%s ... %s", roffs[tok].name, cp);
1009 return(ROFF_IGN);
1012 static enum rofferr
1013 roff_block_sub(ROFF_ARGS)
1015 enum rofft t;
1016 int i, j;
1019 * First check whether a custom macro exists at this level. If
1020 * it does, then check against it. This is some of groff's
1021 * stranger behaviours. If we encountered a custom end-scope
1022 * tag and that tag also happens to be a "real" macro, then we
1023 * need to try interpreting it again as a real macro. If it's
1024 * not, then return ignore. Else continue.
1027 if (r->last->end) {
1028 for (i = pos, j = 0; r->last->end[j]; j++, i++)
1029 if ((*bufp)[i] != r->last->end[j])
1030 break;
1032 if ('\0' == r->last->end[j] &&
1033 ('\0' == (*bufp)[i] ||
1034 ' ' == (*bufp)[i] ||
1035 '\t' == (*bufp)[i])) {
1036 roffnode_pop(r);
1037 roffnode_cleanscope(r);
1039 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
1040 i++;
1042 pos = i;
1043 if (ROFF_MAX != roff_parse(r, *bufp, &pos, ln, ppos))
1044 return(ROFF_RERUN);
1045 return(ROFF_IGN);
1050 * If we have no custom end-query or lookup failed, then try
1051 * pulling it out of the hashtable.
1054 t = roff_parse(r, *bufp, &pos, ln, ppos);
1056 if (ROFF_cblock != t) {
1057 if (ROFF_ig != tok)
1058 roff_setstr(r, r->last->name, *bufp + ppos, 2);
1059 return(ROFF_IGN);
1062 assert(roffs[t].proc);
1063 return((*roffs[t].proc)(r, t, bufp, szp, ln, ppos, pos, offs));
1066 static enum rofferr
1067 roff_block_text(ROFF_ARGS)
1070 if (ROFF_ig != tok)
1071 roff_setstr(r, r->last->name, *bufp + pos, 2);
1073 return(ROFF_IGN);
1076 static enum rofferr
1077 roff_cond_sub(ROFF_ARGS)
1079 enum rofft t;
1080 char *ep;
1081 int rr;
1083 rr = r->last->rule;
1084 roffnode_cleanscope(r);
1085 t = roff_parse(r, *bufp, &pos, ln, ppos);
1088 * Fully handle known macros when they are structurally
1089 * required or when the conditional evaluated to true.
1092 if ((ROFF_MAX != t) &&
1093 (rr || ROFFMAC_STRUCT & roffs[t].flags)) {
1094 assert(roffs[t].proc);
1095 return((*roffs[t].proc)(r, t, bufp, szp,
1096 ln, ppos, pos, offs));
1100 * If `\}' occurs on a macro line without a preceding macro,
1101 * drop the line completely.
1104 ep = *bufp + pos;
1105 if ('\\' == ep[0] && '}' == ep[1])
1106 rr = 0;
1108 /* Always check for the closing delimiter `\}'. */
1110 while (NULL != (ep = strchr(ep, '\\'))) {
1111 if ('}' == *(++ep)) {
1112 *ep = '&';
1113 roff_ccond(r, ln, ep - *bufp - 1);
1115 ++ep;
1117 return(rr ? ROFF_CONT : ROFF_IGN);
1120 static enum rofferr
1121 roff_cond_text(ROFF_ARGS)
1123 char *ep;
1124 int rr;
1126 rr = r->last->rule;
1127 roffnode_cleanscope(r);
1129 ep = *bufp + pos;
1130 while (NULL != (ep = strchr(ep, '\\'))) {
1131 if ('}' == *(++ep)) {
1132 *ep = '&';
1133 roff_ccond(r, ln, ep - *bufp - 1);
1135 ++ep;
1137 return(rr ? ROFF_CONT : ROFF_IGN);
1141 * Parse a single signed integer number. Stop at the first non-digit.
1142 * If there is at least one digit, return success and advance the
1143 * parse point, else return failure and let the parse point unchanged.
1144 * Ignore overflows, treat them just like the C language.
1146 static int
1147 roff_getnum(const char *v, int *pos, int *res)
1149 int myres, n, p;
1151 if (NULL == res)
1152 res = &myres;
1154 p = *pos;
1155 n = v[p] == '-';
1156 if (n)
1157 p++;
1159 for (*res = 0; isdigit((unsigned char)v[p]); p++)
1160 *res = 10 * *res + v[p] - '0';
1161 if (p == *pos + n)
1162 return 0;
1164 if (n)
1165 *res = -*res;
1167 *pos = p;
1168 return 1;
1172 * Evaluate a string comparison condition.
1173 * The first character is the delimiter.
1174 * Succeed if the string up to its second occurrence
1175 * matches the string up to its third occurence.
1176 * Advance the cursor after the third occurrence
1177 * or lacking that, to the end of the line.
1179 static int
1180 roff_evalstrcond(const char *v, int *pos)
1182 const char *s1, *s2, *s3;
1183 int match;
1185 match = 0;
1186 s1 = v + *pos; /* initial delimiter */
1187 s2 = s1 + 1; /* for scanning the first string */
1188 s3 = strchr(s2, *s1); /* for scanning the second string */
1190 if (NULL == s3) /* found no middle delimiter */
1191 goto out;
1193 while ('\0' != *++s3) {
1194 if (*s2 != *s3) { /* mismatch */
1195 s3 = strchr(s3, *s1);
1196 break;
1198 if (*s3 == *s1) { /* found the final delimiter */
1199 match = 1;
1200 break;
1202 s2++;
1205 out:
1206 if (NULL == s3)
1207 s3 = strchr(s2, '\0');
1208 else
1209 s3++;
1210 *pos = s3 - v;
1211 return(match);
1215 * Evaluate an optionally negated single character, numerical,
1216 * or string condition.
1218 static int
1219 roff_evalcond(const char *v, int *pos)
1221 int wanttrue, number;
1223 if ('!' == v[*pos]) {
1224 wanttrue = 0;
1225 (*pos)++;
1226 } else
1227 wanttrue = 1;
1229 switch (v[*pos]) {
1230 case 'n':
1231 /* FALLTHROUGH */
1232 case 'o':
1233 (*pos)++;
1234 return(wanttrue);
1235 case 'c':
1236 /* FALLTHROUGH */
1237 case 'd':
1238 /* FALLTHROUGH */
1239 case 'e':
1240 /* FALLTHROUGH */
1241 case 'r':
1242 /* FALLTHROUGH */
1243 case 't':
1244 (*pos)++;
1245 return(!wanttrue);
1246 default:
1247 break;
1250 if (roff_evalnum(v, pos, &number, 0))
1251 return((number > 0) == wanttrue);
1252 else
1253 return(roff_evalstrcond(v, pos) == wanttrue);
1256 static enum rofferr
1257 roff_line_ignore(ROFF_ARGS)
1260 return(ROFF_IGN);
1263 static enum rofferr
1264 roff_cond(ROFF_ARGS)
1267 roffnode_push(r, tok, NULL, ln, ppos);
1270 * An `.el' has no conditional body: it will consume the value
1271 * of the current rstack entry set in prior `ie' calls or
1272 * defaults to DENY.
1274 * If we're not an `el', however, then evaluate the conditional.
1277 r->last->rule = ROFF_el == tok ?
1278 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
1279 roff_evalcond(*bufp, &pos);
1282 * An if-else will put the NEGATION of the current evaluated
1283 * conditional into the stack of rules.
1286 if (ROFF_ie == tok) {
1287 if (r->rstackpos + 1 == r->rstacksz) {
1288 r->rstacksz += 16;
1289 r->rstack = mandoc_reallocarray(r->rstack,
1290 r->rstacksz, sizeof(int));
1292 r->rstack[++r->rstackpos] = !r->last->rule;
1295 /* If the parent has false as its rule, then so do we. */
1297 if (r->last->parent && !r->last->parent->rule)
1298 r->last->rule = 0;
1301 * Determine scope.
1302 * If there is nothing on the line after the conditional,
1303 * not even whitespace, use next-line scope.
1306 if ('\0' == (*bufp)[pos]) {
1307 r->last->endspan = 2;
1308 goto out;
1311 while (' ' == (*bufp)[pos])
1312 pos++;
1314 /* An opening brace requests multiline scope. */
1316 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1317 r->last->endspan = -1;
1318 pos += 2;
1319 goto out;
1323 * Anything else following the conditional causes
1324 * single-line scope. Warn if the scope contains
1325 * nothing but trailing whitespace.
1328 if ('\0' == (*bufp)[pos])
1329 mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
1330 ln, ppos, roffs[tok].name);
1332 r->last->endspan = 1;
1334 out:
1335 *offs = pos;
1336 return(ROFF_RERUN);
1339 static enum rofferr
1340 roff_ds(ROFF_ARGS)
1342 char *string;
1343 const char *name;
1344 size_t namesz;
1347 * The first word is the name of the string.
1348 * If it is empty or terminated by an escape sequence,
1349 * abort the `ds' request without defining anything.
1352 name = string = *bufp + pos;
1353 if ('\0' == *name)
1354 return(ROFF_IGN);
1356 namesz = roff_getname(r, &string, ln, pos);
1357 if ('\\' == name[namesz])
1358 return(ROFF_IGN);
1360 /* Read past the initial double-quote, if any. */
1361 if ('"' == *string)
1362 string++;
1364 /* The rest is the value. */
1365 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
1366 ROFF_as == tok);
1367 return(ROFF_IGN);
1371 * Parse a single operator, one or two characters long.
1372 * If the operator is recognized, return success and advance the
1373 * parse point, else return failure and let the parse point unchanged.
1375 static int
1376 roff_getop(const char *v, int *pos, char *res)
1379 *res = v[*pos];
1381 switch (*res) {
1382 case '+':
1383 /* FALLTHROUGH */
1384 case '-':
1385 /* FALLTHROUGH */
1386 case '*':
1387 /* FALLTHROUGH */
1388 case '/':
1389 /* FALLTHROUGH */
1390 case '%':
1391 /* FALLTHROUGH */
1392 case '&':
1393 /* FALLTHROUGH */
1394 case ':':
1395 break;
1396 case '<':
1397 switch (v[*pos + 1]) {
1398 case '=':
1399 *res = 'l';
1400 (*pos)++;
1401 break;
1402 case '>':
1403 *res = '!';
1404 (*pos)++;
1405 break;
1406 case '?':
1407 *res = 'i';
1408 (*pos)++;
1409 break;
1410 default:
1411 break;
1413 break;
1414 case '>':
1415 switch (v[*pos + 1]) {
1416 case '=':
1417 *res = 'g';
1418 (*pos)++;
1419 break;
1420 case '?':
1421 *res = 'a';
1422 (*pos)++;
1423 break;
1424 default:
1425 break;
1427 break;
1428 case '=':
1429 if ('=' == v[*pos + 1])
1430 (*pos)++;
1431 break;
1432 default:
1433 return(0);
1435 (*pos)++;
1437 return(*res);
1441 * Evaluate either a parenthesized numeric expression
1442 * or a single signed integer number.
1444 static int
1445 roff_evalpar(const char *v, int *pos, int *res)
1448 if ('(' != v[*pos])
1449 return(roff_getnum(v, pos, res));
1451 (*pos)++;
1452 if ( ! roff_evalnum(v, pos, res, 1))
1453 return(0);
1456 * Omission of the closing parenthesis
1457 * is an error in validation mode,
1458 * but ignored in evaluation mode.
1461 if (')' == v[*pos])
1462 (*pos)++;
1463 else if (NULL == res)
1464 return(0);
1466 return(1);
1470 * Evaluate a complete numeric expression.
1471 * Proceed left to right, there is no concept of precedence.
1473 static int
1474 roff_evalnum(const char *v, int *pos, int *res, int skipwhite)
1476 int mypos, operand2;
1477 char operator;
1479 if (NULL == pos) {
1480 mypos = 0;
1481 pos = &mypos;
1484 if (skipwhite)
1485 while (isspace((unsigned char)v[*pos]))
1486 (*pos)++;
1488 if ( ! roff_evalpar(v, pos, res))
1489 return(0);
1491 while (1) {
1492 if (skipwhite)
1493 while (isspace((unsigned char)v[*pos]))
1494 (*pos)++;
1496 if ( ! roff_getop(v, pos, &operator))
1497 break;
1499 if (skipwhite)
1500 while (isspace((unsigned char)v[*pos]))
1501 (*pos)++;
1503 if ( ! roff_evalpar(v, pos, &operand2))
1504 return(0);
1506 if (skipwhite)
1507 while (isspace((unsigned char)v[*pos]))
1508 (*pos)++;
1510 if (NULL == res)
1511 continue;
1513 switch (operator) {
1514 case '+':
1515 *res += operand2;
1516 break;
1517 case '-':
1518 *res -= operand2;
1519 break;
1520 case '*':
1521 *res *= operand2;
1522 break;
1523 case '/':
1524 *res /= operand2;
1525 break;
1526 case '%':
1527 *res %= operand2;
1528 break;
1529 case '<':
1530 *res = *res < operand2;
1531 break;
1532 case '>':
1533 *res = *res > operand2;
1534 break;
1535 case 'l':
1536 *res = *res <= operand2;
1537 break;
1538 case 'g':
1539 *res = *res >= operand2;
1540 break;
1541 case '=':
1542 *res = *res == operand2;
1543 break;
1544 case '!':
1545 *res = *res != operand2;
1546 break;
1547 case '&':
1548 *res = *res && operand2;
1549 break;
1550 case ':':
1551 *res = *res || operand2;
1552 break;
1553 case 'i':
1554 if (operand2 < *res)
1555 *res = operand2;
1556 break;
1557 case 'a':
1558 if (operand2 > *res)
1559 *res = operand2;
1560 break;
1561 default:
1562 abort();
1565 return(1);
1568 void
1569 roff_setreg(struct roff *r, const char *name, int val, char sign)
1571 struct roffreg *reg;
1573 /* Search for an existing register with the same name. */
1574 reg = r->regtab;
1576 while (reg && strcmp(name, reg->key.p))
1577 reg = reg->next;
1579 if (NULL == reg) {
1580 /* Create a new register. */
1581 reg = mandoc_malloc(sizeof(struct roffreg));
1582 reg->key.p = mandoc_strdup(name);
1583 reg->key.sz = strlen(name);
1584 reg->val = 0;
1585 reg->next = r->regtab;
1586 r->regtab = reg;
1589 if ('+' == sign)
1590 reg->val += val;
1591 else if ('-' == sign)
1592 reg->val -= val;
1593 else
1594 reg->val = val;
1598 * Handle some predefined read-only number registers.
1599 * For now, return -1 if the requested register is not predefined;
1600 * in case a predefined read-only register having the value -1
1601 * were to turn up, another special value would have to be chosen.
1603 static int
1604 roff_getregro(const char *name)
1607 switch (*name) {
1608 case 'A': /* ASCII approximation mode is always off. */
1609 return(0);
1610 case 'g': /* Groff compatibility mode is always on. */
1611 return(1);
1612 case 'H': /* Fixed horizontal resolution. */
1613 return (24);
1614 case 'j': /* Always adjust left margin only. */
1615 return(0);
1616 case 'T': /* Some output device is always defined. */
1617 return(1);
1618 case 'V': /* Fixed vertical resolution. */
1619 return (40);
1620 default:
1621 return (-1);
1626 roff_getreg(const struct roff *r, const char *name)
1628 struct roffreg *reg;
1629 int val;
1631 if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
1632 val = roff_getregro(name + 1);
1633 if (-1 != val)
1634 return (val);
1637 for (reg = r->regtab; reg; reg = reg->next)
1638 if (0 == strcmp(name, reg->key.p))
1639 return(reg->val);
1641 return(0);
1644 static int
1645 roff_getregn(const struct roff *r, const char *name, size_t len)
1647 struct roffreg *reg;
1648 int val;
1650 if ('.' == name[0] && 2 == len) {
1651 val = roff_getregro(name + 1);
1652 if (-1 != val)
1653 return (val);
1656 for (reg = r->regtab; reg; reg = reg->next)
1657 if (len == reg->key.sz &&
1658 0 == strncmp(name, reg->key.p, len))
1659 return(reg->val);
1661 return(0);
1664 static void
1665 roff_freereg(struct roffreg *reg)
1667 struct roffreg *old_reg;
1669 while (NULL != reg) {
1670 free(reg->key.p);
1671 old_reg = reg;
1672 reg = reg->next;
1673 free(old_reg);
1677 static enum rofferr
1678 roff_nr(ROFF_ARGS)
1680 char *key, *val;
1681 size_t keysz;
1682 int iv;
1683 char sign;
1685 key = val = *bufp + pos;
1686 if ('\0' == *key)
1687 return(ROFF_IGN);
1689 keysz = roff_getname(r, &val, ln, pos);
1690 if ('\\' == key[keysz])
1691 return(ROFF_IGN);
1692 key[keysz] = '\0';
1694 sign = *val;
1695 if ('+' == sign || '-' == sign)
1696 val++;
1698 if (roff_evalnum(val, NULL, &iv, 0))
1699 roff_setreg(r, key, iv, sign);
1701 return(ROFF_IGN);
1704 static enum rofferr
1705 roff_rr(ROFF_ARGS)
1707 struct roffreg *reg, **prev;
1708 char *name, *cp;
1709 size_t namesz;
1711 name = cp = *bufp + pos;
1712 if ('\0' == *name)
1713 return(ROFF_IGN);
1714 namesz = roff_getname(r, &cp, ln, pos);
1715 name[namesz] = '\0';
1717 prev = &r->regtab;
1718 while (1) {
1719 reg = *prev;
1720 if (NULL == reg || !strcmp(name, reg->key.p))
1721 break;
1722 prev = &reg->next;
1724 if (NULL != reg) {
1725 *prev = reg->next;
1726 free(reg->key.p);
1727 free(reg);
1729 return(ROFF_IGN);
1732 static enum rofferr
1733 roff_rm(ROFF_ARGS)
1735 const char *name;
1736 char *cp;
1737 size_t namesz;
1739 cp = *bufp + pos;
1740 while ('\0' != *cp) {
1741 name = cp;
1742 namesz = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1743 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
1744 if ('\\' == name[namesz])
1745 break;
1747 return(ROFF_IGN);
1750 static enum rofferr
1751 roff_it(ROFF_ARGS)
1753 char *cp;
1754 size_t len;
1755 int iv;
1757 /* Parse the number of lines. */
1758 cp = *bufp + pos;
1759 len = strcspn(cp, " \t");
1760 cp[len] = '\0';
1761 if ((iv = mandoc_strntoi(cp, len, 10)) <= 0) {
1762 mandoc_msg(MANDOCERR_IT_NONUM, r->parse,
1763 ln, ppos, *bufp + 1);
1764 return(ROFF_IGN);
1766 cp += len + 1;
1768 /* Arm the input line trap. */
1769 roffit_lines = iv;
1770 roffit_macro = mandoc_strdup(cp);
1771 return(ROFF_IGN);
1774 static enum rofferr
1775 roff_Dd(ROFF_ARGS)
1777 const char *const *cp;
1779 if (0 == ((MPARSE_MDOC | MPARSE_QUICK) & r->options))
1780 for (cp = __mdoc_reserved; *cp; cp++)
1781 roff_setstr(r, *cp, NULL, 0);
1783 return(ROFF_CONT);
1786 static enum rofferr
1787 roff_TH(ROFF_ARGS)
1789 const char *const *cp;
1791 if (0 == (MPARSE_QUICK & r->options))
1792 for (cp = __man_reserved; *cp; cp++)
1793 roff_setstr(r, *cp, NULL, 0);
1795 return(ROFF_CONT);
1798 static enum rofferr
1799 roff_TE(ROFF_ARGS)
1802 if (NULL == r->tbl)
1803 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1804 ln, ppos, "TE");
1805 else
1806 tbl_end(&r->tbl);
1808 return(ROFF_IGN);
1811 static enum rofferr
1812 roff_T_(ROFF_ARGS)
1815 if (NULL == r->tbl)
1816 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1817 ln, ppos, "T&");
1818 else
1819 tbl_restart(ppos, ln, r->tbl);
1821 return(ROFF_IGN);
1824 #if 0
1825 static int
1826 roff_closeeqn(struct roff *r)
1829 return(r->eqn && ROFF_EQN == eqn_end(&r->eqn) ? 1 : 0);
1831 #endif
1833 static void
1834 roff_openeqn(struct roff *r, const char *name, int line,
1835 int offs, const char *buf)
1837 struct eqn_node *e;
1838 int poff;
1840 assert(NULL == r->eqn);
1841 e = eqn_alloc(name, offs, line, r->parse);
1843 if (r->last_eqn)
1844 r->last_eqn->next = e;
1845 else
1846 r->first_eqn = r->last_eqn = e;
1848 r->eqn = r->last_eqn = e;
1850 if (buf) {
1851 poff = 0;
1852 eqn_read(&r->eqn, line, buf, offs, &poff);
1856 static enum rofferr
1857 roff_EQ(ROFF_ARGS)
1860 roff_openeqn(r, *bufp + pos, ln, ppos, NULL);
1861 return(ROFF_IGN);
1864 static enum rofferr
1865 roff_EN(ROFF_ARGS)
1868 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN");
1869 return(ROFF_IGN);
1872 static enum rofferr
1873 roff_TS(ROFF_ARGS)
1875 struct tbl_node *tbl;
1877 if (r->tbl) {
1878 mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse,
1879 ln, ppos, "TS breaks TS");
1880 tbl_end(&r->tbl);
1883 tbl = tbl_alloc(ppos, ln, r->parse);
1885 if (r->last_tbl)
1886 r->last_tbl->next = tbl;
1887 else
1888 r->first_tbl = r->last_tbl = tbl;
1890 r->tbl = r->last_tbl = tbl;
1891 return(ROFF_IGN);
1894 static enum rofferr
1895 roff_cc(ROFF_ARGS)
1897 const char *p;
1899 p = *bufp + pos;
1901 if ('\0' == *p || '.' == (r->control = *p++))
1902 r->control = 0;
1904 if ('\0' != *p)
1905 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1907 return(ROFF_IGN);
1910 static enum rofferr
1911 roff_tr(ROFF_ARGS)
1913 const char *p, *first, *second;
1914 size_t fsz, ssz;
1915 enum mandoc_esc esc;
1917 p = *bufp + pos;
1919 if ('\0' == *p) {
1920 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1921 return(ROFF_IGN);
1924 while ('\0' != *p) {
1925 fsz = ssz = 1;
1927 first = p++;
1928 if ('\\' == *first) {
1929 esc = mandoc_escape(&p, NULL, NULL);
1930 if (ESCAPE_ERROR == esc) {
1931 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
1932 ln, (int)(p - *bufp), first);
1933 return(ROFF_IGN);
1935 fsz = (size_t)(p - first);
1938 second = p++;
1939 if ('\\' == *second) {
1940 esc = mandoc_escape(&p, NULL, NULL);
1941 if (ESCAPE_ERROR == esc) {
1942 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
1943 ln, (int)(p - *bufp), second);
1944 return(ROFF_IGN);
1946 ssz = (size_t)(p - second);
1947 } else if ('\0' == *second) {
1948 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
1949 ln, (int)(p - *bufp), NULL);
1950 second = " ";
1951 p--;
1954 if (fsz > 1) {
1955 roff_setstrn(&r->xmbtab, first, fsz,
1956 second, ssz, 0);
1957 continue;
1960 if (NULL == r->xtab)
1961 r->xtab = mandoc_calloc(128,
1962 sizeof(struct roffstr));
1964 free(r->xtab[(int)*first].p);
1965 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
1966 r->xtab[(int)*first].sz = ssz;
1969 return(ROFF_IGN);
1972 static enum rofferr
1973 roff_so(ROFF_ARGS)
1975 char *name;
1977 name = *bufp + pos;
1978 mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name);
1981 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1982 * opening anything that's not in our cwd or anything beneath
1983 * it. Thus, explicitly disallow traversing up the file-system
1984 * or using absolute paths.
1987 if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1988 mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
1989 ".so %s", name);
1990 return(ROFF_ERR);
1993 *offs = pos;
1994 return(ROFF_SO);
1997 static enum rofferr
1998 roff_userdef(ROFF_ARGS)
2000 const char *arg[9];
2001 char *cp, *n1, *n2;
2002 int i;
2005 * Collect pointers to macro argument strings
2006 * and NUL-terminate them.
2008 cp = *bufp + pos;
2009 for (i = 0; i < 9; i++)
2010 arg[i] = '\0' == *cp ? "" :
2011 mandoc_getarg(r->parse, &cp, ln, &pos);
2014 * Expand macro arguments.
2016 *szp = 0;
2017 n1 = cp = mandoc_strdup(r->current_string);
2018 while (NULL != (cp = strstr(cp, "\\$"))) {
2019 i = cp[2] - '1';
2020 if (0 > i || 8 < i) {
2021 /* Not an argument invocation. */
2022 cp += 2;
2023 continue;
2025 *cp = '\0';
2026 *szp = mandoc_asprintf(&n2, "%s%s%s",
2027 n1, arg[i], cp + 3) + 1;
2028 cp = n2 + (cp - n1);
2029 free(n1);
2030 n1 = n2;
2034 * Replace the macro invocation
2035 * by the expanded macro.
2037 free(*bufp);
2038 *bufp = n1;
2039 if (0 == *szp)
2040 *szp = strlen(*bufp) + 1;
2042 return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
2043 ROFF_REPARSE : ROFF_APPEND);
2046 static size_t
2047 roff_getname(struct roff *r, char **cpp, int ln, int pos)
2049 char *name, *cp;
2050 size_t namesz;
2052 name = *cpp;
2053 if ('\0' == *name)
2054 return(0);
2056 /* Read until end of name and terminate it with NUL. */
2057 for (cp = name; 1; cp++) {
2058 if ('\0' == *cp || ' ' == *cp) {
2059 namesz = cp - name;
2060 break;
2062 if ('\\' != *cp)
2063 continue;
2064 namesz = cp - name;
2065 if ('{' == cp[1] || '}' == cp[1])
2066 break;
2067 cp++;
2068 if ('\\' == *cp)
2069 continue;
2070 mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos,
2071 "%.*s", (int)(cp - name + 1), name);
2072 mandoc_escape((const char **)&cp, NULL, NULL);
2073 break;
2076 /* Read past spaces. */
2077 while (' ' == *cp)
2078 cp++;
2080 *cpp = cp;
2081 return(namesz);
2085 * Store *string into the user-defined string called *name.
2086 * To clear an existing entry, call with (*r, *name, NULL, 0).
2087 * append == 0: replace mode
2088 * append == 1: single-line append mode
2089 * append == 2: multiline append mode, append '\n' after each call
2091 static void
2092 roff_setstr(struct roff *r, const char *name, const char *string,
2093 int append)
2096 roff_setstrn(&r->strtab, name, strlen(name), string,
2097 string ? strlen(string) : 0, append);
2100 static void
2101 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
2102 const char *string, size_t stringsz, int append)
2104 struct roffkv *n;
2105 char *c;
2106 int i;
2107 size_t oldch, newch;
2109 /* Search for an existing string with the same name. */
2110 n = *r;
2112 while (n && (namesz != n->key.sz ||
2113 strncmp(n->key.p, name, namesz)))
2114 n = n->next;
2116 if (NULL == n) {
2117 /* Create a new string table entry. */
2118 n = mandoc_malloc(sizeof(struct roffkv));
2119 n->key.p = mandoc_strndup(name, namesz);
2120 n->key.sz = namesz;
2121 n->val.p = NULL;
2122 n->val.sz = 0;
2123 n->next = *r;
2124 *r = n;
2125 } else if (0 == append) {
2126 free(n->val.p);
2127 n->val.p = NULL;
2128 n->val.sz = 0;
2131 if (NULL == string)
2132 return;
2135 * One additional byte for the '\n' in multiline mode,
2136 * and one for the terminating '\0'.
2138 newch = stringsz + (1 < append ? 2u : 1u);
2140 if (NULL == n->val.p) {
2141 n->val.p = mandoc_malloc(newch);
2142 *n->val.p = '\0';
2143 oldch = 0;
2144 } else {
2145 oldch = n->val.sz;
2146 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
2149 /* Skip existing content in the destination buffer. */
2150 c = n->val.p + (int)oldch;
2152 /* Append new content to the destination buffer. */
2153 i = 0;
2154 while (i < (int)stringsz) {
2156 * Rudimentary roff copy mode:
2157 * Handle escaped backslashes.
2159 if ('\\' == string[i] && '\\' == string[i + 1])
2160 i++;
2161 *c++ = string[i++];
2164 /* Append terminating bytes. */
2165 if (1 < append)
2166 *c++ = '\n';
2168 *c = '\0';
2169 n->val.sz = (int)(c - n->val.p);
2172 static const char *
2173 roff_getstrn(const struct roff *r, const char *name, size_t len)
2175 const struct roffkv *n;
2176 int i;
2178 for (n = r->strtab; n; n = n->next)
2179 if (0 == strncmp(name, n->key.p, len) &&
2180 '\0' == n->key.p[(int)len])
2181 return(n->val.p);
2183 for (i = 0; i < PREDEFS_MAX; i++)
2184 if (0 == strncmp(name, predefs[i].name, len) &&
2185 '\0' == predefs[i].name[(int)len])
2186 return(predefs[i].str);
2188 return(NULL);
2191 static void
2192 roff_freestr(struct roffkv *r)
2194 struct roffkv *n, *nn;
2196 for (n = r; n; n = nn) {
2197 free(n->key.p);
2198 free(n->val.p);
2199 nn = n->next;
2200 free(n);
2204 const struct tbl_span *
2205 roff_span(const struct roff *r)
2208 return(r->tbl ? tbl_span(r->tbl) : NULL);
2211 const struct eqn *
2212 roff_eqn(const struct roff *r)
2215 return(r->last_eqn ? &r->last_eqn->eqn : NULL);
2219 * Duplicate an input string, making the appropriate character
2220 * conversations (as stipulated by `tr') along the way.
2221 * Returns a heap-allocated string with all the replacements made.
2223 char *
2224 roff_strdup(const struct roff *r, const char *p)
2226 const struct roffkv *cp;
2227 char *res;
2228 const char *pp;
2229 size_t ssz, sz;
2230 enum mandoc_esc esc;
2232 if (NULL == r->xmbtab && NULL == r->xtab)
2233 return(mandoc_strdup(p));
2234 else if ('\0' == *p)
2235 return(mandoc_strdup(""));
2238 * Step through each character looking for term matches
2239 * (remember that a `tr' can be invoked with an escape, which is
2240 * a glyph but the escape is multi-character).
2241 * We only do this if the character hash has been initialised
2242 * and the string is >0 length.
2245 res = NULL;
2246 ssz = 0;
2248 while ('\0' != *p) {
2249 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
2250 sz = r->xtab[(int)*p].sz;
2251 res = mandoc_realloc(res, ssz + sz + 1);
2252 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
2253 ssz += sz;
2254 p++;
2255 continue;
2256 } else if ('\\' != *p) {
2257 res = mandoc_realloc(res, ssz + 2);
2258 res[ssz++] = *p++;
2259 continue;
2262 /* Search for term matches. */
2263 for (cp = r->xmbtab; cp; cp = cp->next)
2264 if (0 == strncmp(p, cp->key.p, cp->key.sz))
2265 break;
2267 if (NULL != cp) {
2269 * A match has been found.
2270 * Append the match to the array and move
2271 * forward by its keysize.
2273 res = mandoc_realloc(res,
2274 ssz + cp->val.sz + 1);
2275 memcpy(res + ssz, cp->val.p, cp->val.sz);
2276 ssz += cp->val.sz;
2277 p += (int)cp->key.sz;
2278 continue;
2282 * Handle escapes carefully: we need to copy
2283 * over just the escape itself, or else we might
2284 * do replacements within the escape itself.
2285 * Make sure to pass along the bogus string.
2287 pp = p++;
2288 esc = mandoc_escape(&p, NULL, NULL);
2289 if (ESCAPE_ERROR == esc) {
2290 sz = strlen(pp);
2291 res = mandoc_realloc(res, ssz + sz + 1);
2292 memcpy(res + ssz, pp, sz);
2293 break;
2296 * We bail out on bad escapes.
2297 * No need to warn: we already did so when
2298 * roff_res() was called.
2300 sz = (int)(p - pp);
2301 res = mandoc_realloc(res, ssz + sz + 1);
2302 memcpy(res + ssz, pp, sz);
2303 ssz += sz;
2306 res[(int)ssz] = '\0';
2307 return(res);
2311 * Find out whether a line is a macro line or not.
2312 * If it is, adjust the current position and return one; if it isn't,
2313 * return zero and don't change the current position.
2314 * If the control character has been set with `.cc', then let that grain
2315 * precedence.
2316 * This is slighly contrary to groff, where using the non-breaking
2317 * control character when `cc' has been invoked will cause the
2318 * non-breaking macro contents to be printed verbatim.
2321 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
2323 int pos;
2325 pos = *ppos;
2327 if (0 != r->control && cp[pos] == r->control)
2328 pos++;
2329 else if (0 != r->control)
2330 return(0);
2331 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
2332 pos += 2;
2333 else if ('.' == cp[pos] || '\'' == cp[pos])
2334 pos++;
2335 else
2336 return(0);
2338 while (' ' == cp[pos] || '\t' == cp[pos])
2339 pos++;
2341 *ppos = pos;
2342 return(1);