mandoc(1): Update to 1.9.15.
[dragonfly.git] / usr.bin / mandoc / mdoc.c
blobeeae77db8e4ba17bddf9a96dbfa233b7120c3333
1 /* $Id: mdoc.c,v 1.116 2010/01/07 10:24:43 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 #include <sys/types.h>
19 #include <assert.h>
20 #include <ctype.h>
21 #include <stdarg.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
26 #include "libmdoc.h"
27 #include "libmandoc.h"
29 const char *const __mdoc_merrnames[MERRMAX] = {
30 "trailing whitespace", /* ETAILWS */
31 "unexpected quoted parameter", /* EQUOTPARM */
32 "unterminated quoted parameter", /* EQUOTTERM */
33 "argument parameter suggested", /* EARGVAL */
34 "macro disallowed in prologue", /* EBODYPROL */
35 "macro disallowed in body", /* EPROLBODY */
36 "text disallowed in prologue", /* ETEXTPROL */
37 "blank line disallowed", /* ENOBLANK */
38 "text parameter too long", /* ETOOLONG */
39 "invalid escape sequence", /* EESCAPE */
40 "invalid character", /* EPRINT */
41 "document has no body", /* ENODAT */
42 "document has no prologue", /* ENOPROLOGUE */
43 "expected line arguments", /* ELINE */
44 "invalid AT&T argument", /* EATT */
45 "default name not yet set", /* ENAME */
46 "missing list type", /* ELISTTYPE */
47 "missing display type", /* EDISPTYPE */
48 "too many display types", /* EMULTIDISP */
49 "too many list types", /* EMULTILIST */
50 "NAME section must be first", /* ESECNAME */
51 "badly-formed NAME section", /* ENAMESECINC */
52 "argument repeated", /* EARGREP */
53 "expected boolean parameter", /* EBOOL */
54 "inconsistent column syntax", /* ECOLMIS */
55 "nested display invalid", /* ENESTDISP */
56 "width argument missing", /* EMISSWIDTH */
57 "invalid section for this manual section", /* EWRONGMSEC */
58 "section out of conventional order", /* ESECOOO */
59 "section repeated", /* ESECREP */
60 "invalid standard argument", /* EBADSTAND */
61 "multi-line arguments discouraged", /* ENOMULTILINE */
62 "multi-line arguments suggested", /* EMULTILINE */
63 "line arguments discouraged", /* ENOLINE */
64 "prologue macro out of conventional order", /* EPROLOOO */
65 "prologue macro repeated", /* EPROLREP */
66 "invalid manual section", /* EBADMSEC */
67 "invalid section", /* EBADSEC */
68 "invalid font mode", /* EFONT */
69 "invalid date syntax", /* EBADDATE */
70 "invalid number format", /* ENUMFMT */
71 "superfluous width argument", /* ENOWIDTH */
72 "system: utsname error", /* EUTSNAME */
73 "obsolete macro", /* EOBS */
74 "end-of-line scope violation", /* EIMPBRK */
75 "empty macro ignored", /* EIGNE */
76 "unclosed explicit scope", /* EOPEN */
77 "unterminated quoted phrase", /* EQUOTPHR */
78 "closure macro without prior context", /* ENOCTX */
79 "no description found for library", /* ELIB */
80 "bad child for parent context", /* EBADCHILD */
81 "list arguments preceding type", /* ENOTYPE */
84 const char *const __mdoc_macronames[MDOC_MAX] = {
85 "Ap", "Dd", "Dt", "Os",
86 "Sh", "Ss", "Pp", "D1",
87 "Dl", "Bd", "Ed", "Bl",
88 "El", "It", "Ad", "An",
89 "Ar", "Cd", "Cm", "Dv",
90 "Er", "Ev", "Ex", "Fa",
91 "Fd", "Fl", "Fn", "Ft",
92 "Ic", "In", "Li", "Nd",
93 "Nm", "Op", "Ot", "Pa",
94 "Rv", "St", "Va", "Vt",
95 /* LINTED */
96 "Xr", "%A", "%B", "%D",
97 /* LINTED */
98 "%I", "%J", "%N", "%O",
99 /* LINTED */
100 "%P", "%R", "%T", "%V",
101 "Ac", "Ao", "Aq", "At",
102 "Bc", "Bf", "Bo", "Bq",
103 "Bsx", "Bx", "Db", "Dc",
104 "Do", "Dq", "Ec", "Ef",
105 "Em", "Eo", "Fx", "Ms",
106 "No", "Ns", "Nx", "Ox",
107 "Pc", "Pf", "Po", "Pq",
108 "Qc", "Ql", "Qo", "Qq",
109 "Re", "Rs", "Sc", "So",
110 "Sq", "Sm", "Sx", "Sy",
111 "Tn", "Ux", "Xc", "Xo",
112 "Fo", "Fc", "Oo", "Oc",
113 "Bk", "Ek", "Bt", "Hf",
114 "Fr", "Ud", "Lb", "Lp",
115 "Lk", "Mt", "Brq", "Bro",
116 /* LINTED */
117 "Brc", "%C", "Es", "En",
118 /* LINTED */
119 "Dx", "%Q", "br", "sp",
120 /* LINTED */
121 "%U"
124 const char *const __mdoc_argnames[MDOC_ARG_MAX] = {
125 "split", "nosplit", "ragged",
126 "unfilled", "literal", "file",
127 "offset", "bullet", "dash",
128 "hyphen", "item", "enum",
129 "tag", "diag", "hang",
130 "ohang", "inset", "column",
131 "width", "compact", "std",
132 "filled", "words", "emphasis",
133 "symbolic", "nested", "centered"
136 const char * const *mdoc_macronames = __mdoc_macronames;
137 const char * const *mdoc_argnames = __mdoc_argnames;
139 static void mdoc_free1(struct mdoc *);
140 static void mdoc_alloc1(struct mdoc *);
141 static struct mdoc_node *node_alloc(struct mdoc *, int, int,
142 int, enum mdoc_type);
143 static int node_append(struct mdoc *,
144 struct mdoc_node *);
145 static int parsetext(struct mdoc *, int, char *);
146 static int parsemacro(struct mdoc *, int, char *);
147 static int macrowarn(struct mdoc *, int, const char *);
148 static int pstring(struct mdoc *, int, int,
149 const char *, size_t);
151 const struct mdoc_node *
152 mdoc_node(const struct mdoc *m)
155 return(MDOC_HALT & m->flags ? NULL : m->first);
159 const struct mdoc_meta *
160 mdoc_meta(const struct mdoc *m)
163 return(MDOC_HALT & m->flags ? NULL : &m->meta);
168 * Frees volatile resources (parse tree, meta-data, fields).
170 static void
171 mdoc_free1(struct mdoc *mdoc)
174 if (mdoc->first)
175 mdoc_node_freelist(mdoc->first);
176 if (mdoc->meta.title)
177 free(mdoc->meta.title);
178 if (mdoc->meta.os)
179 free(mdoc->meta.os);
180 if (mdoc->meta.name)
181 free(mdoc->meta.name);
182 if (mdoc->meta.arch)
183 free(mdoc->meta.arch);
184 if (mdoc->meta.vol)
185 free(mdoc->meta.vol);
190 * Allocate all volatile resources (parse tree, meta-data, fields).
192 static void
193 mdoc_alloc1(struct mdoc *mdoc)
196 memset(&mdoc->meta, 0, sizeof(struct mdoc_meta));
197 mdoc->flags = 0;
198 mdoc->lastnamed = mdoc->lastsec = SEC_NONE;
199 mdoc->last = mandoc_calloc(1, sizeof(struct mdoc_node));
200 mdoc->first = mdoc->last;
201 mdoc->last->type = MDOC_ROOT;
202 mdoc->next = MDOC_NEXT_CHILD;
207 * Free up volatile resources (see mdoc_free1()) then re-initialises the
208 * data with mdoc_alloc1(). After invocation, parse data has been reset
209 * and the parser is ready for re-invocation on a new tree; however,
210 * cross-parse non-volatile data is kept intact.
212 void
213 mdoc_reset(struct mdoc *mdoc)
216 mdoc_free1(mdoc);
217 mdoc_alloc1(mdoc);
222 * Completely free up all volatile and non-volatile parse resources.
223 * After invocation, the pointer is no longer usable.
225 void
226 mdoc_free(struct mdoc *mdoc)
229 mdoc_free1(mdoc);
230 free(mdoc);
235 * Allocate volatile and non-volatile parse resources.
237 struct mdoc *
238 mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb)
240 struct mdoc *p;
242 p = mandoc_calloc(1, sizeof(struct mdoc));
244 if (cb)
245 memcpy(&p->cb, cb, sizeof(struct mdoc_cb));
247 p->data = data;
248 p->pflags = pflags;
250 mdoc_hash_init();
251 mdoc_alloc1(p);
252 return(p);
257 * Climb back up the parse tree, validating open scopes. Mostly calls
258 * through to macro_end() in macro.c.
261 mdoc_endparse(struct mdoc *m)
264 if (MDOC_HALT & m->flags)
265 return(0);
266 else if (mdoc_macroend(m))
267 return(1);
268 m->flags |= MDOC_HALT;
269 return(0);
274 * Main parse routine. Parses a single line -- really just hands off to
275 * the macro (parsemacro()) or text parser (parsetext()).
278 mdoc_parseln(struct mdoc *m, int ln, char *buf)
281 if (MDOC_HALT & m->flags)
282 return(0);
284 return('.' == *buf ? parsemacro(m, ln, buf) :
285 parsetext(m, ln, buf));
290 mdoc_verr(struct mdoc *mdoc, int ln, int pos,
291 const char *fmt, ...)
293 char buf[256];
294 va_list ap;
296 if (NULL == mdoc->cb.mdoc_err)
297 return(0);
299 va_start(ap, fmt);
300 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
301 va_end(ap);
303 return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf));
308 mdoc_vwarn(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...)
310 char buf[256];
311 va_list ap;
313 if (NULL == mdoc->cb.mdoc_warn)
314 return(0);
316 va_start(ap, fmt);
317 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
318 va_end(ap);
320 return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, buf));
325 mdoc_err(struct mdoc *m, int line, int pos, int iserr, enum merr type)
327 const char *p;
329 p = __mdoc_merrnames[(int)type];
330 assert(p);
332 if (iserr)
333 return(mdoc_verr(m, line, pos, p));
335 return(mdoc_vwarn(m, line, pos, p));
340 mdoc_macro(struct mdoc *m, int tok,
341 int ln, int pp, int *pos, char *buf)
344 * If we're in the prologue, deny "body" macros. Similarly, if
345 * we're in the body, deny prologue calls.
347 if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
348 MDOC_PBODY & m->flags)
349 return(mdoc_perr(m, ln, pp, EPROLBODY));
350 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
351 ! (MDOC_PBODY & m->flags))
352 return(mdoc_perr(m, ln, pp, EBODYPROL));
354 return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf));
358 static int
359 node_append(struct mdoc *mdoc, struct mdoc_node *p)
362 assert(mdoc->last);
363 assert(mdoc->first);
364 assert(MDOC_ROOT != p->type);
366 switch (mdoc->next) {
367 case (MDOC_NEXT_SIBLING):
368 mdoc->last->next = p;
369 p->prev = mdoc->last;
370 p->parent = mdoc->last->parent;
371 break;
372 case (MDOC_NEXT_CHILD):
373 mdoc->last->child = p;
374 p->parent = mdoc->last;
375 break;
376 default:
377 abort();
378 /* NOTREACHED */
381 p->parent->nchild++;
383 if ( ! mdoc_valid_pre(mdoc, p))
384 return(0);
385 if ( ! mdoc_action_pre(mdoc, p))
386 return(0);
388 switch (p->type) {
389 case (MDOC_HEAD):
390 assert(MDOC_BLOCK == p->parent->type);
391 p->parent->head = p;
392 break;
393 case (MDOC_TAIL):
394 assert(MDOC_BLOCK == p->parent->type);
395 p->parent->tail = p;
396 break;
397 case (MDOC_BODY):
398 assert(MDOC_BLOCK == p->parent->type);
399 p->parent->body = p;
400 break;
401 default:
402 break;
405 mdoc->last = p;
407 switch (p->type) {
408 case (MDOC_TEXT):
409 if ( ! mdoc_valid_post(mdoc))
410 return(0);
411 if ( ! mdoc_action_post(mdoc))
412 return(0);
413 break;
414 default:
415 break;
418 return(1);
422 static struct mdoc_node *
423 node_alloc(struct mdoc *m, int line,
424 int pos, int tok, enum mdoc_type type)
426 struct mdoc_node *p;
428 p = mandoc_calloc(1, sizeof(struct mdoc_node));
429 p->sec = m->lastsec;
430 p->line = line;
431 p->pos = pos;
432 p->tok = tok;
433 if (MDOC_TEXT != (p->type = type))
434 assert(p->tok >= 0);
436 return(p);
441 mdoc_tail_alloc(struct mdoc *m, int line, int pos, int tok)
443 struct mdoc_node *p;
445 p = node_alloc(m, line, pos, tok, MDOC_TAIL);
446 if ( ! node_append(m, p))
447 return(0);
448 m->next = MDOC_NEXT_CHILD;
449 return(1);
454 mdoc_head_alloc(struct mdoc *m, int line, int pos, int tok)
456 struct mdoc_node *p;
458 assert(m->first);
459 assert(m->last);
461 p = node_alloc(m, line, pos, tok, MDOC_HEAD);
462 if ( ! node_append(m, p))
463 return(0);
464 m->next = MDOC_NEXT_CHILD;
465 return(1);
470 mdoc_body_alloc(struct mdoc *m, int line, int pos, int tok)
472 struct mdoc_node *p;
474 p = node_alloc(m, line, pos, tok, MDOC_BODY);
475 if ( ! node_append(m, p))
476 return(0);
477 m->next = MDOC_NEXT_CHILD;
478 return(1);
483 mdoc_block_alloc(struct mdoc *m, int line, int pos,
484 int tok, struct mdoc_arg *args)
486 struct mdoc_node *p;
488 p = node_alloc(m, line, pos, tok, MDOC_BLOCK);
489 p->args = args;
490 if (p->args)
491 (args->refcnt)++;
492 if ( ! node_append(m, p))
493 return(0);
494 m->next = MDOC_NEXT_CHILD;
495 return(1);
500 mdoc_elem_alloc(struct mdoc *m, int line, int pos,
501 int tok, struct mdoc_arg *args)
503 struct mdoc_node *p;
505 p = node_alloc(m, line, pos, tok, MDOC_ELEM);
506 p->args = args;
507 if (p->args)
508 (args->refcnt)++;
509 if ( ! node_append(m, p))
510 return(0);
511 m->next = MDOC_NEXT_CHILD;
512 return(1);
516 static int
517 pstring(struct mdoc *m, int line, int pos, const char *p, size_t len)
519 struct mdoc_node *n;
520 size_t sv;
522 n = node_alloc(m, line, pos, -1, MDOC_TEXT);
523 n->string = mandoc_malloc(len + 1);
524 sv = strlcpy(n->string, p, len + 1);
526 /* Prohibit truncation. */
527 assert(sv < len + 1);
529 if ( ! node_append(m, n))
530 return(0);
531 m->next = MDOC_NEXT_SIBLING;
532 return(1);
537 mdoc_word_alloc(struct mdoc *m, int line, int pos, const char *p)
540 return(pstring(m, line, pos, p, strlen(p)));
544 void
545 mdoc_node_free(struct mdoc_node *p)
548 if (p->parent)
549 p->parent->nchild--;
550 if (p->string)
551 free(p->string);
552 if (p->args)
553 mdoc_argv_free(p->args);
554 free(p);
558 void
559 mdoc_node_freelist(struct mdoc_node *p)
562 if (p->child)
563 mdoc_node_freelist(p->child);
564 if (p->next)
565 mdoc_node_freelist(p->next);
567 assert(0 == p->nchild);
568 mdoc_node_free(p);
573 * Parse free-form text, that is, a line that does not begin with the
574 * control character.
576 static int
577 parsetext(struct mdoc *m, int line, char *buf)
579 int i, j;
580 char sv;
582 if (SEC_NONE == m->lastnamed)
583 return(mdoc_perr(m, line, 0, ETEXTPROL));
586 * If in literal mode, then pass the buffer directly to the
587 * back-end, as it should be preserved as a single term.
590 if (MDOC_LITERAL & m->flags)
591 return(mdoc_word_alloc(m, line, 0, buf));
593 /* Disallow blank/white-space lines in non-literal mode. */
595 for (i = 0; ' ' == buf[i]; i++)
596 /* Skip leading whitespace. */ ;
598 if ('\0' == buf[i])
599 return(mdoc_perr(m, line, 0, ENOBLANK));
602 * Break apart a free-form line into tokens. Spaces are
603 * stripped out of the input.
606 for (j = i; buf[i]; i++) {
607 if (' ' != buf[i])
608 continue;
610 /* Escaped whitespace. */
611 if (i && ' ' == buf[i] && '\\' == buf[i - 1])
612 continue;
614 sv = buf[i];
615 buf[i++] = '\0';
617 if ( ! pstring(m, line, j, &buf[j], (size_t)(i - j)))
618 return(0);
620 /* Trailing whitespace? Check at overwritten byte. */
622 if (' ' == sv && '\0' == buf[i])
623 if ( ! mdoc_pwarn(m, line, i - 1, ETAILWS))
624 return(0);
626 for ( ; ' ' == buf[i]; i++)
627 /* Skip trailing whitespace. */ ;
629 j = i;
631 /* Trailing whitespace? */
633 if (' ' == buf[i - 1] && '\0' == buf[i])
634 if ( ! mdoc_pwarn(m, line, i - 1, ETAILWS))
635 return(0);
637 if ('\0' == buf[i])
638 break;
641 if (j != i && ! pstring(m, line, j, &buf[j], (size_t)(i - j)))
642 return(0);
644 m->next = MDOC_NEXT_SIBLING;
645 return(1);
650 static int
651 macrowarn(struct mdoc *m, int ln, const char *buf)
653 if ( ! (MDOC_IGN_MACRO & m->pflags))
654 return(mdoc_verr(m, ln, 0,
655 "unknown macro: %s%s",
656 buf, strlen(buf) > 3 ? "..." : ""));
657 return(mdoc_vwarn(m, ln, 0, "unknown macro: %s%s",
658 buf, strlen(buf) > 3 ? "..." : ""));
663 * Parse a macro line, that is, a line beginning with the control
664 * character.
667 parsemacro(struct mdoc *m, int ln, char *buf)
669 int i, j, c;
670 char mac[5];
672 /* Empty lines are ignored. */
674 if ('\0' == buf[1])
675 return(1);
677 i = 1;
679 /* Accept whitespace after the initial control char. */
681 if (' ' == buf[i]) {
682 i++;
683 while (buf[i] && ' ' == buf[i])
684 i++;
685 if ('\0' == buf[i])
686 return(1);
689 /* Copy the first word into a nil-terminated buffer. */
691 for (j = 0; j < 4; j++, i++) {
692 if ('\0' == (mac[j] = buf[i]))
693 break;
694 else if (' ' == buf[i])
695 break;
697 /* Check for invalid characters. */
699 if (isgraph((u_char)buf[i]))
700 continue;
701 return(mdoc_perr(m, ln, i, EPRINT));
704 mac[j] = 0;
706 if (j == 4 || j < 2) {
707 if ( ! macrowarn(m, ln, mac))
708 goto err;
709 return(1);
712 if (MDOC_MAX == (c = mdoc_hash_find(mac))) {
713 if ( ! macrowarn(m, ln, mac))
714 goto err;
715 return(1);
718 /* The macro is sane. Jump to the next word. */
720 while (buf[i] && ' ' == buf[i])
721 i++;
723 /* Trailing whitespace? */
725 if ('\0' == buf[i] && ' ' == buf[i - 1])
726 if ( ! mdoc_pwarn(m, ln, i - 1, ETAILWS))
727 goto err;
730 * Begin recursive parse sequence. Since we're at the start of
731 * the line, we don't need to do callable/parseable checks.
733 if ( ! mdoc_macro(m, c, ln, 1, &i, buf))
734 goto err;
736 return(1);
738 err: /* Error out. */
740 m->flags |= MDOC_HALT;
741 return(0);