6356 Update mdocml to 1.13.3
[unleashed.git] / usr / src / cmd / mandoc / read.c
blob471d415019f5ee2176bf2240121da6a1041b2b13
1 /* $Id: read.c,v 1.131 2015/03/11 13:05:20 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19 #include "config.h"
21 #include <sys/types.h>
22 #if HAVE_MMAP
23 #include <sys/mman.h>
24 #include <sys/stat.h>
25 #endif
26 #include <sys/wait.h>
28 #include <assert.h>
29 #include <ctype.h>
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <stdarg.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
39 #include "mandoc.h"
40 #include "mandoc_aux.h"
41 #include "libmandoc.h"
42 #include "mdoc.h"
43 #include "man.h"
45 #define REPARSE_LIMIT 1000
47 struct mparse {
48 struct man *pman; /* persistent man parser */
49 struct mdoc *pmdoc; /* persistent mdoc parser */
50 struct man *man; /* man parser */
51 struct mdoc *mdoc; /* mdoc parser */
52 struct roff *roff; /* roff parser (!NULL) */
53 const struct mchars *mchars; /* character table */
54 char *sodest; /* filename pointed to by .so */
55 const char *file; /* filename of current input file */
56 struct buf *primary; /* buffer currently being parsed */
57 struct buf *secondary; /* preprocessed copy of input */
58 const char *defos; /* default operating system */
59 mandocmsg mmsg; /* warning/error message handler */
60 enum mandoclevel file_status; /* status of current parse */
61 enum mandoclevel wlevel; /* ignore messages below this */
62 int options; /* parser options */
63 int filenc; /* encoding of the current file */
64 int reparse_count; /* finite interp. stack */
65 int line; /* line number in the file */
66 pid_t child; /* the gunzip(1) process */
69 static void choose_parser(struct mparse *);
70 static void resize_buf(struct buf *, size_t);
71 static void mparse_buf_r(struct mparse *, struct buf, size_t, int);
72 static int read_whole_file(struct mparse *, const char *, int,
73 struct buf *, int *);
74 static void mparse_end(struct mparse *);
75 static void mparse_parse_buffer(struct mparse *, struct buf,
76 const char *);
78 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
79 MANDOCERR_OK,
80 MANDOCERR_WARNING,
81 MANDOCERR_WARNING,
82 MANDOCERR_ERROR,
83 MANDOCERR_UNSUPP,
84 MANDOCERR_MAX,
85 MANDOCERR_MAX
88 static const char * const mandocerrs[MANDOCERR_MAX] = {
89 "ok",
91 "generic warning",
93 /* related to the prologue */
94 "missing manual title, using UNTITLED",
95 "missing manual title, using \"\"",
96 "lower case character in document title",
97 "missing manual section, using \"\"",
98 "unknown manual section",
99 "missing date, using today's date",
100 "cannot parse date, using it verbatim",
101 "missing Os macro, using \"\"",
102 "duplicate prologue macro",
103 "late prologue macro",
104 "skipping late title macro",
105 "prologue macros out of order",
107 /* related to document structure */
108 ".so is fragile, better use ln(1)",
109 "no document body",
110 "content before first section header",
111 "first section is not \"NAME\"",
112 "NAME section without name",
113 "NAME section without description",
114 "description not at the end of NAME",
115 "bad NAME section content",
116 "missing description line, using \"\"",
117 "sections out of conventional order",
118 "duplicate section title",
119 "unexpected section",
120 "unusual Xr order",
121 "unusual Xr punctuation",
122 "AUTHORS section without An macro",
124 /* related to macros and nesting */
125 "obsolete macro",
126 "macro neither callable nor escaped",
127 "skipping paragraph macro",
128 "moving paragraph macro out of list",
129 "skipping no-space macro",
130 "blocks badly nested",
131 "nested displays are not portable",
132 "moving content out of list",
133 ".Vt block has child macro",
134 "fill mode already enabled, skipping",
135 "fill mode already disabled, skipping",
136 "line scope broken",
138 /* related to missing macro arguments */
139 "skipping empty request",
140 "conditional request controls empty scope",
141 "skipping empty macro",
142 "empty block",
143 "empty argument, using 0n",
144 "missing display type, using -ragged",
145 "list type is not the first argument",
146 "missing -width in -tag list, using 8n",
147 "missing utility name, using \"\"",
148 "missing function name, using \"\"",
149 "empty head in list item",
150 "empty list item",
151 "missing font type, using \\fR",
152 "unknown font type, using \\fR",
153 "nothing follows prefix",
154 "empty reference block",
155 "missing -std argument, adding it",
156 "missing option string, using \"\"",
157 "missing resource identifier, using \"\"",
158 "missing eqn box, using \"\"",
160 /* related to bad macro arguments */
161 "unterminated quoted argument",
162 "duplicate argument",
163 "skipping duplicate argument",
164 "skipping duplicate display type",
165 "skipping duplicate list type",
166 "skipping -width argument",
167 "wrong number of cells",
168 "unknown AT&T UNIX version",
169 "comma in function argument",
170 "parenthesis in function name",
171 "invalid content in Rs block",
172 "invalid Boolean argument",
173 "unknown font, skipping request",
174 "odd number of characters in request",
176 /* related to plain text */
177 "blank line in fill mode, using .sp",
178 "tab in filled text",
179 "whitespace at end of input line",
180 "bad comment style",
181 "invalid escape sequence",
182 "undefined string, using \"\"",
184 /* related to tables */
185 "tbl line starts with span",
186 "tbl column starts with span",
187 "skipping vertical bar in tbl layout",
189 "generic error",
191 /* related to tables */
192 "non-alphabetic character in tbl options",
193 "skipping unknown tbl option",
194 "missing tbl option argument",
195 "wrong tbl option argument size",
196 "empty tbl layout",
197 "invalid character in tbl layout",
198 "unmatched parenthesis in tbl layout",
199 "tbl without any data cells",
200 "ignoring data in spanned tbl cell",
201 "ignoring extra tbl data cells",
202 "data block open at end of tbl",
204 /* related to document structure and macros */
205 NULL,
206 "input stack limit exceeded, infinite loop?",
207 "skipping bad character",
208 "skipping unknown macro",
209 "skipping insecure request",
210 "skipping item outside list",
211 "skipping column outside column list",
212 "skipping end of block that is not open",
213 "fewer RS blocks open, skipping",
214 "inserting missing end of block",
215 "appending missing end of block",
217 /* related to request and macro arguments */
218 "escaped character not allowed in a name",
219 "NOT IMPLEMENTED: Bd -file",
220 "missing list type, using -item",
221 "missing manual name, using \"\"",
222 "uname(3) system call failed, using UNKNOWN",
223 "unknown standard specifier",
224 "skipping request without numeric argument",
225 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
226 ".so request failed",
227 "skipping all arguments",
228 "skipping excess arguments",
229 "divide by zero",
231 "unsupported feature",
232 "input too large",
233 "unsupported control character",
234 "unsupported roff request",
235 "eqn delim option in tbl",
236 "unsupported tbl layout modifier",
237 "ignoring macro in table",
240 static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
241 "SUCCESS",
242 "RESERVED",
243 "WARNING",
244 "ERROR",
245 "UNSUPP",
246 "BADARG",
247 "SYSERR"
251 static void
252 resize_buf(struct buf *buf, size_t initial)
255 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
256 buf->buf = mandoc_realloc(buf->buf, buf->sz);
259 static void
260 choose_parser(struct mparse *curp)
262 char *cp, *ep;
263 int format;
266 * If neither command line arguments -mdoc or -man select
267 * a parser nor the roff parser found a .Dd or .TH macro
268 * yet, look ahead in the main input buffer.
271 if ((format = roff_getformat(curp->roff)) == 0) {
272 cp = curp->primary->buf;
273 ep = cp + curp->primary->sz;
274 while (cp < ep) {
275 if (*cp == '.' || *cp == '\'') {
276 cp++;
277 if (cp[0] == 'D' && cp[1] == 'd') {
278 format = MPARSE_MDOC;
279 break;
281 if (cp[0] == 'T' && cp[1] == 'H') {
282 format = MPARSE_MAN;
283 break;
286 cp = memchr(cp, '\n', ep - cp);
287 if (cp == NULL)
288 break;
289 cp++;
293 if (format == MPARSE_MDOC) {
294 if (NULL == curp->pmdoc)
295 curp->pmdoc = mdoc_alloc(
296 curp->roff, curp, curp->defos,
297 MPARSE_QUICK & curp->options ? 1 : 0);
298 assert(curp->pmdoc);
299 curp->mdoc = curp->pmdoc;
300 return;
303 /* Fall back to man(7) as a last resort. */
305 if (NULL == curp->pman)
306 curp->pman = man_alloc(
307 curp->roff, curp, curp->defos,
308 MPARSE_QUICK & curp->options ? 1 : 0);
309 assert(curp->pman);
310 curp->man = curp->pman;
314 * Main parse routine for a buffer.
315 * It assumes encoding and line numbering are already set up.
316 * It can recurse directly (for invocations of user-defined
317 * macros, inline equations, and input line traps)
318 * and indirectly (for .so file inclusion).
320 static void
321 mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
323 const struct tbl_span *span;
324 struct buf ln;
325 const char *save_file;
326 char *cp;
327 size_t pos; /* byte number in the ln buffer */
328 enum rofferr rr;
329 int of;
330 int lnn; /* line number in the real file */
331 int fd;
332 pid_t save_child;
333 unsigned char c;
335 memset(&ln, 0, sizeof(ln));
337 lnn = curp->line;
338 pos = 0;
340 while (i < blk.sz) {
341 if (0 == pos && '\0' == blk.buf[i])
342 break;
344 if (start) {
345 curp->line = lnn;
346 curp->reparse_count = 0;
348 if (lnn < 3 &&
349 curp->filenc & MPARSE_UTF8 &&
350 curp->filenc & MPARSE_LATIN1)
351 curp->filenc = preconv_cue(&blk, i);
354 while (i < blk.sz && (start || blk.buf[i] != '\0')) {
357 * When finding an unescaped newline character,
358 * leave the character loop to process the line.
359 * Skip a preceding carriage return, if any.
362 if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
363 '\n' == blk.buf[i + 1])
364 ++i;
365 if ('\n' == blk.buf[i]) {
366 ++i;
367 ++lnn;
368 break;
372 * Make sure we have space for the worst
373 * case of 11 bytes: "\\[u10ffff]\0"
376 if (pos + 11 > ln.sz)
377 resize_buf(&ln, 256);
380 * Encode 8-bit input.
383 c = blk.buf[i];
384 if (c & 0x80) {
385 if ( ! (curp->filenc && preconv_encode(
386 &blk, &i, &ln, &pos, &curp->filenc))) {
387 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
388 curp->line, pos, "0x%x", c);
389 ln.buf[pos++] = '?';
390 i++;
392 continue;
396 * Exclude control characters.
399 if (c == 0x7f || (c < 0x20 && c != 0x09)) {
400 mandoc_vmsg(c == 0x00 || c == 0x04 ||
401 c > 0x0a ? MANDOCERR_CHAR_BAD :
402 MANDOCERR_CHAR_UNSUPP,
403 curp, curp->line, pos, "0x%x", c);
404 i++;
405 if (c != '\r')
406 ln.buf[pos++] = '?';
407 continue;
410 /* Trailing backslash = a plain char. */
412 if (blk.buf[i] != '\\' || i + 1 == blk.sz) {
413 ln.buf[pos++] = blk.buf[i++];
414 continue;
418 * Found escape and at least one other character.
419 * When it's a newline character, skip it.
420 * When there is a carriage return in between,
421 * skip that one as well.
424 if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz &&
425 '\n' == blk.buf[i + 2])
426 ++i;
427 if ('\n' == blk.buf[i + 1]) {
428 i += 2;
429 ++lnn;
430 continue;
433 if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
434 i += 2;
435 /* Comment, skip to end of line */
436 for (; i < blk.sz; ++i) {
437 if ('\n' == blk.buf[i]) {
438 ++i;
439 ++lnn;
440 break;
444 /* Backout trailing whitespaces */
445 for (; pos > 0; --pos) {
446 if (ln.buf[pos - 1] != ' ')
447 break;
448 if (pos > 2 && ln.buf[pos - 2] == '\\')
449 break;
451 break;
454 /* Catch escaped bogus characters. */
456 c = (unsigned char) blk.buf[i+1];
458 if ( ! (isascii(c) &&
459 (isgraph(c) || isblank(c)))) {
460 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
461 curp->line, pos, "0x%x", c);
462 i += 2;
463 ln.buf[pos++] = '?';
464 continue;
467 /* Some other escape sequence, copy & cont. */
469 ln.buf[pos++] = blk.buf[i++];
470 ln.buf[pos++] = blk.buf[i++];
473 if (pos >= ln.sz)
474 resize_buf(&ln, 256);
476 ln.buf[pos] = '\0';
479 * A significant amount of complexity is contained by
480 * the roff preprocessor. It's line-oriented but can be
481 * expressed on one line, so we need at times to
482 * readjust our starting point and re-run it. The roff
483 * preprocessor can also readjust the buffers with new
484 * data, so we pass them in wholesale.
487 of = 0;
490 * Maintain a lookaside buffer of all parsed lines. We
491 * only do this if mparse_keep() has been invoked (the
492 * buffer may be accessed with mparse_getkeep()).
495 if (curp->secondary) {
496 curp->secondary->buf = mandoc_realloc(
497 curp->secondary->buf,
498 curp->secondary->sz + pos + 2);
499 memcpy(curp->secondary->buf +
500 curp->secondary->sz,
501 ln.buf, pos);
502 curp->secondary->sz += pos;
503 curp->secondary->buf
504 [curp->secondary->sz] = '\n';
505 curp->secondary->sz++;
506 curp->secondary->buf
507 [curp->secondary->sz] = '\0';
509 rerun:
510 rr = roff_parseln(curp->roff, curp->line, &ln, &of);
512 switch (rr) {
513 case ROFF_REPARSE:
514 if (REPARSE_LIMIT >= ++curp->reparse_count)
515 mparse_buf_r(curp, ln, of, 0);
516 else
517 mandoc_msg(MANDOCERR_ROFFLOOP, curp,
518 curp->line, pos, NULL);
519 pos = 0;
520 continue;
521 case ROFF_APPEND:
522 pos = strlen(ln.buf);
523 continue;
524 case ROFF_RERUN:
525 goto rerun;
526 case ROFF_IGN:
527 pos = 0;
528 continue;
529 case ROFF_SO:
530 if ( ! (curp->options & MPARSE_SO) &&
531 (i >= blk.sz || blk.buf[i] == '\0')) {
532 curp->sodest = mandoc_strdup(ln.buf + of);
533 free(ln.buf);
534 return;
537 * We remove `so' clauses from our lookaside
538 * buffer because we're going to descend into
539 * the file recursively.
541 if (curp->secondary)
542 curp->secondary->sz -= pos + 1;
543 save_file = curp->file;
544 save_child = curp->child;
545 if (mparse_open(curp, &fd, ln.buf + of) ==
546 MANDOCLEVEL_OK) {
547 mparse_readfd(curp, fd, ln.buf + of);
548 curp->file = save_file;
549 } else {
550 curp->file = save_file;
551 mandoc_vmsg(MANDOCERR_SO_FAIL,
552 curp, curp->line, pos,
553 ".so %s", ln.buf + of);
554 ln.sz = mandoc_asprintf(&cp,
555 ".sp\nSee the file %s.\n.sp",
556 ln.buf + of);
557 free(ln.buf);
558 ln.buf = cp;
559 of = 0;
560 mparse_buf_r(curp, ln, of, 0);
562 curp->child = save_child;
563 pos = 0;
564 continue;
565 default:
566 break;
570 * If input parsers have not been allocated, do so now.
571 * We keep these instanced between parsers, but set them
572 * locally per parse routine since we can use different
573 * parsers with each one.
576 if ( ! (curp->man || curp->mdoc))
577 choose_parser(curp);
580 * Lastly, push down into the parsers themselves.
581 * If libroff returns ROFF_TBL, then add it to the
582 * currently open parse. Since we only get here if
583 * there does exist data (see tbl_data.c), we're
584 * guaranteed that something's been allocated.
585 * Do the same for ROFF_EQN.
588 if (rr == ROFF_TBL) {
589 while ((span = roff_span(curp->roff)) != NULL)
590 if (curp->man == NULL)
591 mdoc_addspan(curp->mdoc, span);
592 else
593 man_addspan(curp->man, span);
594 } else if (rr == ROFF_EQN) {
595 if (curp->man == NULL)
596 mdoc_addeqn(curp->mdoc, roff_eqn(curp->roff));
597 else
598 man_addeqn(curp->man, roff_eqn(curp->roff));
599 } else if ((curp->man == NULL ?
600 mdoc_parseln(curp->mdoc, curp->line, ln.buf, of) :
601 man_parseln(curp->man, curp->line, ln.buf, of)) == 2)
602 break;
604 /* Temporary buffers typically are not full. */
606 if (0 == start && '\0' == blk.buf[i])
607 break;
609 /* Start the next input line. */
611 pos = 0;
614 free(ln.buf);
617 static int
618 read_whole_file(struct mparse *curp, const char *file, int fd,
619 struct buf *fb, int *with_mmap)
621 size_t off;
622 ssize_t ssz;
624 #if HAVE_MMAP
625 struct stat st;
626 if (-1 == fstat(fd, &st)) {
627 perror(file);
628 exit((int)MANDOCLEVEL_SYSERR);
632 * If we're a regular file, try just reading in the whole entry
633 * via mmap(). This is faster than reading it into blocks, and
634 * since each file is only a few bytes to begin with, I'm not
635 * concerned that this is going to tank any machines.
638 if (S_ISREG(st.st_mode)) {
639 if (st.st_size > 0x7fffffff) {
640 mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
641 return(0);
643 *with_mmap = 1;
644 fb->sz = (size_t)st.st_size;
645 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
646 if (fb->buf != MAP_FAILED)
647 return(1);
649 #endif
652 * If this isn't a regular file (like, say, stdin), then we must
653 * go the old way and just read things in bit by bit.
656 *with_mmap = 0;
657 off = 0;
658 fb->sz = 0;
659 fb->buf = NULL;
660 for (;;) {
661 if (off == fb->sz) {
662 if (fb->sz == (1U << 31)) {
663 mandoc_msg(MANDOCERR_TOOLARGE, curp,
664 0, 0, NULL);
665 break;
667 resize_buf(fb, 65536);
669 ssz = read(fd, fb->buf + (int)off, fb->sz - off);
670 if (ssz == 0) {
671 fb->sz = off;
672 return(1);
674 if (ssz == -1) {
675 perror(file);
676 exit((int)MANDOCLEVEL_SYSERR);
678 off += (size_t)ssz;
681 free(fb->buf);
682 fb->buf = NULL;
683 return(0);
686 static void
687 mparse_end(struct mparse *curp)
690 if (curp->mdoc == NULL &&
691 curp->man == NULL &&
692 curp->sodest == NULL) {
693 if (curp->options & MPARSE_MDOC)
694 curp->mdoc = curp->pmdoc;
695 else {
696 if (curp->pman == NULL)
697 curp->pman = man_alloc(
698 curp->roff, curp, curp->defos,
699 curp->options & MPARSE_QUICK ? 1 : 0);
700 curp->man = curp->pman;
703 if (curp->mdoc)
704 mdoc_endparse(curp->mdoc);
705 if (curp->man)
706 man_endparse(curp->man);
707 roff_endparse(curp->roff);
710 static void
711 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
713 struct buf *svprimary;
714 const char *svfile;
715 size_t offset;
716 static int recursion_depth;
718 if (64 < recursion_depth) {
719 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
720 return;
723 /* Line number is per-file. */
724 svfile = curp->file;
725 curp->file = file;
726 svprimary = curp->primary;
727 curp->primary = &blk;
728 curp->line = 1;
729 recursion_depth++;
731 /* Skip an UTF-8 byte order mark. */
732 if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
733 (unsigned char)blk.buf[0] == 0xef &&
734 (unsigned char)blk.buf[1] == 0xbb &&
735 (unsigned char)blk.buf[2] == 0xbf) {
736 offset = 3;
737 curp->filenc &= ~MPARSE_LATIN1;
738 } else
739 offset = 0;
741 mparse_buf_r(curp, blk, offset, 1);
743 if (--recursion_depth == 0)
744 mparse_end(curp);
746 curp->primary = svprimary;
747 curp->file = svfile;
750 enum mandoclevel
751 mparse_readmem(struct mparse *curp, void *buf, size_t len,
752 const char *file)
754 struct buf blk;
756 blk.buf = buf;
757 blk.sz = len;
759 mparse_parse_buffer(curp, blk, file);
760 return(curp->file_status);
764 * Read the whole file into memory and call the parsers.
765 * Called recursively when an .so request is encountered.
767 enum mandoclevel
768 mparse_readfd(struct mparse *curp, int fd, const char *file)
770 struct buf blk;
771 int with_mmap;
772 int save_filenc;
774 if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
775 save_filenc = curp->filenc;
776 curp->filenc = curp->options &
777 (MPARSE_UTF8 | MPARSE_LATIN1);
778 mparse_parse_buffer(curp, blk, file);
779 curp->filenc = save_filenc;
780 #if HAVE_MMAP
781 if (with_mmap)
782 munmap(blk.buf, blk.sz);
783 else
784 #endif
785 free(blk.buf);
788 if (fd != STDIN_FILENO && close(fd) == -1)
789 perror(file);
791 mparse_wait(curp);
792 return(curp->file_status);
795 enum mandoclevel
796 mparse_open(struct mparse *curp, int *fd, const char *file)
798 int pfd[2];
799 int save_errno;
800 char *cp;
802 curp->file = file;
804 /* Unless zipped, try to just open the file. */
806 if ((cp = strrchr(file, '.')) == NULL ||
807 strcmp(cp + 1, "gz")) {
808 curp->child = 0;
809 if ((*fd = open(file, O_RDONLY)) != -1)
810 return(MANDOCLEVEL_OK);
812 /* Open failed; try to append ".gz". */
814 mandoc_asprintf(&cp, "%s.gz", file);
815 file = cp;
816 } else
817 cp = NULL;
819 /* Before forking, make sure the file can be read. */
821 save_errno = errno;
822 if (access(file, R_OK) == -1) {
823 if (cp != NULL)
824 errno = save_errno;
825 free(cp);
826 *fd = -1;
827 curp->child = 0;
828 mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
829 return(MANDOCLEVEL_ERROR);
832 /* Run gunzip(1). */
834 if (pipe(pfd) == -1) {
835 perror("pipe");
836 exit((int)MANDOCLEVEL_SYSERR);
839 switch (curp->child = fork()) {
840 case -1:
841 perror("fork");
842 exit((int)MANDOCLEVEL_SYSERR);
843 case 0:
844 close(pfd[0]);
845 if (dup2(pfd[1], STDOUT_FILENO) == -1) {
846 perror("dup");
847 exit((int)MANDOCLEVEL_SYSERR);
849 execlp("gunzip", "gunzip", "-c", file, NULL);
850 perror("exec");
851 exit((int)MANDOCLEVEL_SYSERR);
852 default:
853 close(pfd[1]);
854 *fd = pfd[0];
855 return(MANDOCLEVEL_OK);
859 enum mandoclevel
860 mparse_wait(struct mparse *curp)
862 int status;
864 if (curp->child == 0)
865 return(MANDOCLEVEL_OK);
867 if (waitpid(curp->child, &status, 0) == -1) {
868 perror("wait");
869 exit((int)MANDOCLEVEL_SYSERR);
871 curp->child = 0;
872 if (WIFSIGNALED(status)) {
873 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
874 "gunzip died from signal %d", WTERMSIG(status));
875 return(MANDOCLEVEL_ERROR);
877 if (WEXITSTATUS(status)) {
878 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
879 "gunzip failed with code %d", WEXITSTATUS(status));
880 return(MANDOCLEVEL_ERROR);
882 return(MANDOCLEVEL_OK);
885 struct mparse *
886 mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg,
887 const struct mchars *mchars, const char *defos)
889 struct mparse *curp;
891 curp = mandoc_calloc(1, sizeof(struct mparse));
893 curp->options = options;
894 curp->wlevel = wlevel;
895 curp->mmsg = mmsg;
896 curp->defos = defos;
898 curp->mchars = mchars;
899 curp->roff = roff_alloc(curp, curp->mchars, options);
900 if (curp->options & MPARSE_MDOC)
901 curp->pmdoc = mdoc_alloc(
902 curp->roff, curp, curp->defos,
903 curp->options & MPARSE_QUICK ? 1 : 0);
904 if (curp->options & MPARSE_MAN)
905 curp->pman = man_alloc(
906 curp->roff, curp, curp->defos,
907 curp->options & MPARSE_QUICK ? 1 : 0);
909 return(curp);
912 void
913 mparse_reset(struct mparse *curp)
916 roff_reset(curp->roff);
918 if (curp->mdoc)
919 mdoc_reset(curp->mdoc);
920 if (curp->man)
921 man_reset(curp->man);
922 if (curp->secondary)
923 curp->secondary->sz = 0;
925 curp->file_status = MANDOCLEVEL_OK;
926 curp->mdoc = NULL;
927 curp->man = NULL;
929 free(curp->sodest);
930 curp->sodest = NULL;
933 void
934 mparse_free(struct mparse *curp)
937 if (curp->pmdoc)
938 mdoc_free(curp->pmdoc);
939 if (curp->pman)
940 man_free(curp->pman);
941 if (curp->roff)
942 roff_free(curp->roff);
943 if (curp->secondary)
944 free(curp->secondary->buf);
946 free(curp->secondary);
947 free(curp->sodest);
948 free(curp);
951 void
952 mparse_result(struct mparse *curp,
953 struct mdoc **mdoc, struct man **man, char **sodest)
956 if (sodest && NULL != (*sodest = curp->sodest)) {
957 *mdoc = NULL;
958 *man = NULL;
959 return;
961 if (mdoc)
962 *mdoc = curp->mdoc;
963 if (man)
964 *man = curp->man;
967 void
968 mandoc_vmsg(enum mandocerr t, struct mparse *m,
969 int ln, int pos, const char *fmt, ...)
971 char buf[256];
972 va_list ap;
974 va_start(ap, fmt);
975 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
976 va_end(ap);
978 mandoc_msg(t, m, ln, pos, buf);
981 void
982 mandoc_msg(enum mandocerr er, struct mparse *m,
983 int ln, int col, const char *msg)
985 enum mandoclevel level;
987 level = MANDOCLEVEL_UNSUPP;
988 while (er < mandoclimits[level])
989 level--;
991 if (level < m->wlevel && er != MANDOCERR_FILE)
992 return;
994 if (m->mmsg)
995 (*m->mmsg)(er, level, m->file, ln, col, msg);
997 if (m->file_status < level)
998 m->file_status = level;
1001 const char *
1002 mparse_strerror(enum mandocerr er)
1005 return(mandocerrs[er]);
1008 const char *
1009 mparse_strlevel(enum mandoclevel lvl)
1011 return(mandoclevels[lvl]);
1014 void
1015 mparse_keep(struct mparse *p)
1018 assert(NULL == p->secondary);
1019 p->secondary = mandoc_calloc(1, sizeof(struct buf));
1022 const char *
1023 mparse_getkeep(const struct mparse *p)
1026 assert(p->secondary);
1027 return(p->secondary->sz ? p->secondary->buf : NULL);