mandoc(1): Update to 1.9.13.
[dragonfly.git] / usr.bin / mandoc / mdoc.c
blob05ea36c1eb8cacb4f9c34835c162eb280a6985fc
1 /* $Id: mdoc.c,v 1.113 2009/10/30 05:58:38 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 #include <sys/types.h>
19 #include <assert.h>
20 #include <ctype.h>
21 #include <stdarg.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
26 #include "libmdoc.h"
27 #include "libmandoc.h"
29 const char *const __mdoc_merrnames[MERRMAX] = {
30 "trailing whitespace", /* ETAILWS */
31 "unexpected quoted parameter", /* EQUOTPARM */
32 "unterminated quoted parameter", /* EQUOTTERM */
33 "argument parameter suggested", /* EARGVAL */
34 "macro disallowed in prologue", /* EBODYPROL */
35 "macro disallowed in body", /* EPROLBODY */
36 "text disallowed in prologue", /* ETEXTPROL */
37 "blank line disallowed", /* ENOBLANK */
38 "text parameter too long", /* ETOOLONG */
39 "invalid escape sequence", /* EESCAPE */
40 "invalid character", /* EPRINT */
41 "document has no body", /* ENODAT */
42 "document has no prologue", /* ENOPROLOGUE */
43 "expected line arguments", /* ELINE */
44 "invalid AT&T argument", /* EATT */
45 "default name not yet set", /* ENAME */
46 "missing list type", /* ELISTTYPE */
47 "missing display type", /* EDISPTYPE */
48 "too many display types", /* EMULTIDISP */
49 "too many list types", /* EMULTILIST */
50 "NAME section must be first", /* ESECNAME */
51 "badly-formed NAME section", /* ENAMESECINC */
52 "argument repeated", /* EARGREP */
53 "expected boolean parameter", /* EBOOL */
54 "inconsistent column syntax", /* ECOLMIS */
55 "nested display invalid", /* ENESTDISP */
56 "width argument missing", /* EMISSWIDTH */
57 "invalid section for this manual section", /* EWRONGMSEC */
58 "section out of conventional order", /* ESECOOO */
59 "section repeated", /* ESECREP */
60 "invalid standard argument", /* EBADSTAND */
61 "multi-line arguments discouraged", /* ENOMULTILINE */
62 "multi-line arguments suggested", /* EMULTILINE */
63 "line arguments discouraged", /* ENOLINE */
64 "prologue macro out of conventional order", /* EPROLOOO */
65 "prologue macro repeated", /* EPROLREP */
66 "invalid manual section", /* EBADMSEC */
67 "invalid section", /* EBADSEC */
68 "invalid font mode", /* EFONT */
69 "invalid date syntax", /* EBADDATE */
70 "invalid number format", /* ENUMFMT */
71 "superfluous width argument", /* ENOWIDTH */
72 "system: utsname error", /* EUTSNAME */
73 "obsolete macro", /* EOBS */
74 "end-of-line scope violation", /* EIMPBRK */
75 "empty macro ignored", /* EIGNE */
76 "unclosed explicit scope", /* EOPEN */
77 "unterminated quoted phrase", /* EQUOTPHR */
78 "closure macro without prior context", /* ENOCTX */
79 "no description found for library", /* ELIB */
80 "bad child for parent context", /* EBADCHILD */
81 "list arguments preceding type", /* ENOTYPE */
84 const char *const __mdoc_macronames[MDOC_MAX] = {
85 "Ap", "Dd", "Dt", "Os",
86 "Sh", "Ss", "Pp", "D1",
87 "Dl", "Bd", "Ed", "Bl",
88 "El", "It", "Ad", "An",
89 "Ar", "Cd", "Cm", "Dv",
90 "Er", "Ev", "Ex", "Fa",
91 "Fd", "Fl", "Fn", "Ft",
92 "Ic", "In", "Li", "Nd",
93 "Nm", "Op", "Ot", "Pa",
94 "Rv", "St", "Va", "Vt",
95 /* LINTED */
96 "Xr", "\%A", "\%B", "\%D",
97 /* LINTED */
98 "\%I", "\%J", "\%N", "\%O",
99 /* LINTED */
100 "\%P", "\%R", "\%T", "\%V",
101 "Ac", "Ao", "Aq", "At",
102 "Bc", "Bf", "Bo", "Bq",
103 "Bsx", "Bx", "Db", "Dc",
104 "Do", "Dq", "Ec", "Ef",
105 "Em", "Eo", "Fx", "Ms",
106 "No", "Ns", "Nx", "Ox",
107 "Pc", "Pf", "Po", "Pq",
108 "Qc", "Ql", "Qo", "Qq",
109 "Re", "Rs", "Sc", "So",
110 "Sq", "Sm", "Sx", "Sy",
111 "Tn", "Ux", "Xc", "Xo",
112 "Fo", "Fc", "Oo", "Oc",
113 "Bk", "Ek", "Bt", "Hf",
114 "Fr", "Ud", "Lb", "Lp",
115 "Lk", "Mt", "Brq", "Bro",
116 /* LINTED */
117 "Brc", "\%C", "Es", "En",
118 /* LINTED */
119 "Dx", "\%Q", "br", "sp",
120 /* LINTED */
121 "\%U"
124 const char *const __mdoc_argnames[MDOC_ARG_MAX] = {
125 "split", "nosplit", "ragged",
126 "unfilled", "literal", "file",
127 "offset", "bullet", "dash",
128 "hyphen", "item", "enum",
129 "tag", "diag", "hang",
130 "ohang", "inset", "column",
131 "width", "compact", "std",
132 "filled", "words", "emphasis",
133 "symbolic", "nested", "centered"
136 const char * const *mdoc_macronames = __mdoc_macronames;
137 const char * const *mdoc_argnames = __mdoc_argnames;
139 static void mdoc_free1(struct mdoc *);
140 static void mdoc_alloc1(struct mdoc *);
141 static struct mdoc_node *node_alloc(struct mdoc *, int, int,
142 int, enum mdoc_type);
143 static int node_append(struct mdoc *,
144 struct mdoc_node *);
145 static int parsetext(struct mdoc *, int, char *);
146 static int parsemacro(struct mdoc *, int, char *);
147 static int macrowarn(struct mdoc *, int, const char *);
148 static int pstring(struct mdoc *, int, int,
149 const char *, size_t);
151 #ifdef __linux__
152 extern size_t strlcpy(char *, const char *, size_t);
153 #endif
156 const struct mdoc_node *
157 mdoc_node(const struct mdoc *m)
160 return(MDOC_HALT & m->flags ? NULL : m->first);
164 const struct mdoc_meta *
165 mdoc_meta(const struct mdoc *m)
168 return(MDOC_HALT & m->flags ? NULL : &m->meta);
173 * Frees volatile resources (parse tree, meta-data, fields).
175 static void
176 mdoc_free1(struct mdoc *mdoc)
179 if (mdoc->first)
180 mdoc_node_freelist(mdoc->first);
181 if (mdoc->meta.title)
182 free(mdoc->meta.title);
183 if (mdoc->meta.os)
184 free(mdoc->meta.os);
185 if (mdoc->meta.name)
186 free(mdoc->meta.name);
187 if (mdoc->meta.arch)
188 free(mdoc->meta.arch);
189 if (mdoc->meta.vol)
190 free(mdoc->meta.vol);
195 * Allocate all volatile resources (parse tree, meta-data, fields).
197 static void
198 mdoc_alloc1(struct mdoc *mdoc)
201 memset(&mdoc->meta, 0, sizeof(struct mdoc_meta));
202 mdoc->flags = 0;
203 mdoc->lastnamed = mdoc->lastsec = SEC_NONE;
204 mdoc->last = mandoc_calloc(1, sizeof(struct mdoc_node));
205 mdoc->first = mdoc->last;
206 mdoc->last->type = MDOC_ROOT;
207 mdoc->next = MDOC_NEXT_CHILD;
212 * Free up volatile resources (see mdoc_free1()) then re-initialises the
213 * data with mdoc_alloc1(). After invocation, parse data has been reset
214 * and the parser is ready for re-invocation on a new tree; however,
215 * cross-parse non-volatile data is kept intact.
217 void
218 mdoc_reset(struct mdoc *mdoc)
221 mdoc_free1(mdoc);
222 mdoc_alloc1(mdoc);
227 * Completely free up all volatile and non-volatile parse resources.
228 * After invocation, the pointer is no longer usable.
230 void
231 mdoc_free(struct mdoc *mdoc)
234 mdoc_free1(mdoc);
235 free(mdoc);
240 * Allocate volatile and non-volatile parse resources.
242 struct mdoc *
243 mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb)
245 struct mdoc *p;
247 p = mandoc_calloc(1, sizeof(struct mdoc));
249 if (cb)
250 memcpy(&p->cb, cb, sizeof(struct mdoc_cb));
252 p->data = data;
253 p->pflags = pflags;
255 mdoc_hash_init();
256 mdoc_alloc1(p);
257 return(p);
262 * Climb back up the parse tree, validating open scopes. Mostly calls
263 * through to macro_end() in macro.c.
266 mdoc_endparse(struct mdoc *m)
269 if (MDOC_HALT & m->flags)
270 return(0);
271 else if (mdoc_macroend(m))
272 return(1);
273 m->flags |= MDOC_HALT;
274 return(0);
279 * Main parse routine. Parses a single line -- really just hands off to
280 * the macro (parsemacro()) or text parser (parsetext()).
283 mdoc_parseln(struct mdoc *m, int ln, char *buf)
286 if (MDOC_HALT & m->flags)
287 return(0);
289 return('.' == *buf ? parsemacro(m, ln, buf) :
290 parsetext(m, ln, buf));
295 mdoc_verr(struct mdoc *mdoc, int ln, int pos,
296 const char *fmt, ...)
298 char buf[256];
299 va_list ap;
301 if (NULL == mdoc->cb.mdoc_err)
302 return(0);
304 va_start(ap, fmt);
305 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
306 va_end(ap);
308 return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf));
313 mdoc_vwarn(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...)
315 char buf[256];
316 va_list ap;
318 if (NULL == mdoc->cb.mdoc_warn)
319 return(0);
321 va_start(ap, fmt);
322 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
323 va_end(ap);
325 return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, buf));
330 mdoc_err(struct mdoc *m, int line, int pos, int iserr, enum merr type)
332 const char *p;
334 p = __mdoc_merrnames[(int)type];
335 assert(p);
337 if (iserr)
338 return(mdoc_verr(m, line, pos, p));
340 return(mdoc_vwarn(m, line, pos, p));
345 mdoc_macro(struct mdoc *m, int tok,
346 int ln, int pp, int *pos, char *buf)
349 * If we're in the prologue, deny "body" macros. Similarly, if
350 * we're in the body, deny prologue calls.
352 if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
353 MDOC_PBODY & m->flags)
354 return(mdoc_perr(m, ln, pp, EPROLBODY));
355 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
356 ! (MDOC_PBODY & m->flags))
357 return(mdoc_perr(m, ln, pp, EBODYPROL));
359 return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf));
363 static int
364 node_append(struct mdoc *mdoc, struct mdoc_node *p)
367 assert(mdoc->last);
368 assert(mdoc->first);
369 assert(MDOC_ROOT != p->type);
371 switch (mdoc->next) {
372 case (MDOC_NEXT_SIBLING):
373 mdoc->last->next = p;
374 p->prev = mdoc->last;
375 p->parent = mdoc->last->parent;
376 break;
377 case (MDOC_NEXT_CHILD):
378 mdoc->last->child = p;
379 p->parent = mdoc->last;
380 break;
381 default:
382 abort();
383 /* NOTREACHED */
386 p->parent->nchild++;
388 if ( ! mdoc_valid_pre(mdoc, p))
389 return(0);
390 if ( ! mdoc_action_pre(mdoc, p))
391 return(0);
393 switch (p->type) {
394 case (MDOC_HEAD):
395 assert(MDOC_BLOCK == p->parent->type);
396 p->parent->head = p;
397 break;
398 case (MDOC_TAIL):
399 assert(MDOC_BLOCK == p->parent->type);
400 p->parent->tail = p;
401 break;
402 case (MDOC_BODY):
403 assert(MDOC_BLOCK == p->parent->type);
404 p->parent->body = p;
405 break;
406 default:
407 break;
410 mdoc->last = p;
412 switch (p->type) {
413 case (MDOC_TEXT):
414 if ( ! mdoc_valid_post(mdoc))
415 return(0);
416 if ( ! mdoc_action_post(mdoc))
417 return(0);
418 break;
419 default:
420 break;
423 return(1);
427 static struct mdoc_node *
428 node_alloc(struct mdoc *m, int line,
429 int pos, int tok, enum mdoc_type type)
431 struct mdoc_node *p;
433 p = mandoc_calloc(1, sizeof(struct mdoc_node));
434 p->sec = m->lastsec;
435 p->line = line;
436 p->pos = pos;
437 p->tok = tok;
438 if (MDOC_TEXT != (p->type = type))
439 assert(p->tok >= 0);
441 return(p);
446 mdoc_tail_alloc(struct mdoc *m, int line, int pos, int tok)
448 struct mdoc_node *p;
450 p = node_alloc(m, line, pos, tok, MDOC_TAIL);
451 if ( ! node_append(m, p))
452 return(0);
453 m->next = MDOC_NEXT_CHILD;
454 return(1);
459 mdoc_head_alloc(struct mdoc *m, int line, int pos, int tok)
461 struct mdoc_node *p;
463 assert(m->first);
464 assert(m->last);
466 p = node_alloc(m, line, pos, tok, MDOC_HEAD);
467 if ( ! node_append(m, p))
468 return(0);
469 m->next = MDOC_NEXT_CHILD;
470 return(1);
475 mdoc_body_alloc(struct mdoc *m, int line, int pos, int tok)
477 struct mdoc_node *p;
479 p = node_alloc(m, line, pos, tok, MDOC_BODY);
480 if ( ! node_append(m, p))
481 return(0);
482 m->next = MDOC_NEXT_CHILD;
483 return(1);
488 mdoc_block_alloc(struct mdoc *m, int line, int pos,
489 int tok, struct mdoc_arg *args)
491 struct mdoc_node *p;
493 p = node_alloc(m, line, pos, tok, MDOC_BLOCK);
494 p->args = args;
495 if (p->args)
496 (args->refcnt)++;
497 if ( ! node_append(m, p))
498 return(0);
499 m->next = MDOC_NEXT_CHILD;
500 return(1);
505 mdoc_elem_alloc(struct mdoc *m, int line, int pos,
506 int tok, struct mdoc_arg *args)
508 struct mdoc_node *p;
510 p = node_alloc(m, line, pos, tok, MDOC_ELEM);
511 p->args = args;
512 if (p->args)
513 (args->refcnt)++;
514 if ( ! node_append(m, p))
515 return(0);
516 m->next = MDOC_NEXT_CHILD;
517 return(1);
521 static int
522 pstring(struct mdoc *m, int line, int pos, const char *p, size_t len)
524 struct mdoc_node *n;
525 size_t sv;
527 n = node_alloc(m, line, pos, -1, MDOC_TEXT);
528 n->string = mandoc_malloc(len + 1);
529 sv = strlcpy(n->string, p, len + 1);
531 /* Prohibit truncation. */
532 assert(sv < len + 1);
534 if ( ! node_append(m, n))
535 return(0);
536 m->next = MDOC_NEXT_SIBLING;
537 return(1);
542 mdoc_word_alloc(struct mdoc *m, int line, int pos, const char *p)
545 return(pstring(m, line, pos, p, strlen(p)));
549 void
550 mdoc_node_free(struct mdoc_node *p)
553 if (p->parent)
554 p->parent->nchild--;
555 if (p->string)
556 free(p->string);
557 if (p->args)
558 mdoc_argv_free(p->args);
559 free(p);
563 void
564 mdoc_node_freelist(struct mdoc_node *p)
567 if (p->child)
568 mdoc_node_freelist(p->child);
569 if (p->next)
570 mdoc_node_freelist(p->next);
572 assert(0 == p->nchild);
573 mdoc_node_free(p);
578 * Parse free-form text, that is, a line that does not begin with the
579 * control character.
581 static int
582 parsetext(struct mdoc *m, int line, char *buf)
584 int i, j;
586 if (SEC_NONE == m->lastnamed)
587 return(mdoc_perr(m, line, 0, ETEXTPROL));
590 * If in literal mode, then pass the buffer directly to the
591 * back-end, as it should be preserved as a single term.
594 if (MDOC_LITERAL & m->flags)
595 return(mdoc_word_alloc(m, line, 0, buf));
597 /* Disallow blank/white-space lines in non-literal mode. */
599 for (i = 0; ' ' == buf[i]; i++)
600 /* Skip leading whitespace. */ ;
601 if (0 == buf[i])
602 return(mdoc_perr(m, line, 0, ENOBLANK));
605 * Break apart a free-form line into tokens. Spaces are
606 * stripped out of the input.
609 for (j = i; buf[i]; i++) {
610 if (' ' != buf[i])
611 continue;
613 /* Escaped whitespace. */
614 if (i && ' ' == buf[i] && '\\' == buf[i - 1])
615 continue;
617 buf[i++] = 0;
618 if ( ! pstring(m, line, j, &buf[j], (size_t)(i - j)))
619 return(0);
621 for ( ; ' ' == buf[i]; i++)
622 /* Skip trailing whitespace. */ ;
624 j = i;
625 if (0 == buf[i])
626 break;
629 if (j != i && ! pstring(m, line, j, &buf[j], (size_t)(i - j)))
630 return(0);
632 m->next = MDOC_NEXT_SIBLING;
633 return(1);
638 static int
639 macrowarn(struct mdoc *m, int ln, const char *buf)
641 if ( ! (MDOC_IGN_MACRO & m->pflags))
642 return(mdoc_verr(m, ln, 0,
643 "unknown macro: %s%s",
644 buf, strlen(buf) > 3 ? "..." : ""));
645 return(mdoc_vwarn(m, ln, 0, "unknown macro: %s%s",
646 buf, strlen(buf) > 3 ? "..." : ""));
651 * Parse a macro line, that is, a line beginning with the control
652 * character.
655 parsemacro(struct mdoc *m, int ln, char *buf)
657 int i, j, c;
658 char mac[5];
660 /* Empty lines are ignored. */
662 if (0 == buf[1])
663 return(1);
665 i = 1;
667 /* Accept whitespace after the initial control char. */
669 if (' ' == buf[i]) {
670 i++;
671 while (buf[i] && ' ' == buf[i])
672 i++;
673 if (0 == buf[i])
674 return(1);
677 /* Copy the first word into a nil-terminated buffer. */
679 for (j = 0; j < 4; j++, i++) {
680 if (0 == (mac[j] = buf[i]))
681 break;
682 else if (' ' == buf[i])
683 break;
685 /* Check for invalid characters. */
687 if (isgraph((u_char)buf[i]))
688 continue;
689 return(mdoc_perr(m, ln, i, EPRINT));
692 mac[j] = 0;
694 if (j == 4 || j < 2) {
695 if ( ! macrowarn(m, ln, mac))
696 goto err;
697 return(1);
700 if (MDOC_MAX == (c = mdoc_hash_find(mac))) {
701 if ( ! macrowarn(m, ln, mac))
702 goto err;
703 return(1);
706 /* The macro is sane. Jump to the next word. */
708 while (buf[i] && ' ' == buf[i])
709 i++;
712 * Begin recursive parse sequence. Since we're at the start of
713 * the line, we don't need to do callable/parseable checks.
715 if ( ! mdoc_macro(m, c, ln, 1, &i, buf))
716 goto err;
718 return(1);
720 err: /* Error out. */
722 m->flags |= MDOC_HALT;
723 return(0);