mandoc: update to 1.14.2
[unleashed.git] / bin / mandoc / mdoc_markdown.c
blob0b0f184821e5bf5990f1eb1a6f9d6a274f41764e
1 /* $Id: mdoc_markdown.c,v 1.23 2017/06/14 01:31:26 schwarze Exp $ */
2 /*
3 * Copyright (c) 2017 Ingo Schwarze <schwarze@openbsd.org>
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 #include <sys/types.h>
19 #include <assert.h>
20 #include <ctype.h>
21 #include <stdio.h>
22 #include <string.h>
24 #include "mandoc_aux.h"
25 #include "mandoc.h"
26 #include "roff.h"
27 #include "mdoc.h"
28 #include "main.h"
30 struct md_act {
31 int (*cond)(struct roff_node *n);
32 int (*pre)(struct roff_node *n);
33 void (*post)(struct roff_node *n);
34 const char *prefix; /* pre-node string constant */
35 const char *suffix; /* post-node string constant */
38 static void md_nodelist(struct roff_node *);
39 static void md_node(struct roff_node *);
40 static const char *md_stack(char c);
41 static void md_preword(void);
42 static void md_rawword(const char *);
43 static void md_word(const char *);
44 static void md_named(const char *);
45 static void md_char(unsigned char);
46 static void md_uri(const char *);
48 static int md_cond_head(struct roff_node *);
49 static int md_cond_body(struct roff_node *);
51 static int md_pre_raw(struct roff_node *);
52 static int md_pre_word(struct roff_node *);
53 static int md_pre_skip(struct roff_node *);
54 static void md_pre_syn(struct roff_node *);
55 static int md_pre_An(struct roff_node *);
56 static int md_pre_Ap(struct roff_node *);
57 static int md_pre_Bd(struct roff_node *);
58 static int md_pre_Bk(struct roff_node *);
59 static int md_pre_Bl(struct roff_node *);
60 static int md_pre_D1(struct roff_node *);
61 static int md_pre_Dl(struct roff_node *);
62 static int md_pre_En(struct roff_node *);
63 static int md_pre_Eo(struct roff_node *);
64 static int md_pre_Fa(struct roff_node *);
65 static int md_pre_Fd(struct roff_node *);
66 static int md_pre_Fn(struct roff_node *);
67 static int md_pre_Fo(struct roff_node *);
68 static int md_pre_In(struct roff_node *);
69 static int md_pre_It(struct roff_node *);
70 static int md_pre_Lk(struct roff_node *);
71 static int md_pre_Mt(struct roff_node *);
72 static int md_pre_Nd(struct roff_node *);
73 static int md_pre_Nm(struct roff_node *);
74 static int md_pre_No(struct roff_node *);
75 static int md_pre_Ns(struct roff_node *);
76 static int md_pre_Pp(struct roff_node *);
77 static int md_pre_Rs(struct roff_node *);
78 static int md_pre_Sh(struct roff_node *);
79 static int md_pre_Sm(struct roff_node *);
80 static int md_pre_Vt(struct roff_node *);
81 static int md_pre_Xr(struct roff_node *);
82 static int md_pre__T(struct roff_node *);
83 static int md_pre_br(struct roff_node *);
85 static void md_post_raw(struct roff_node *);
86 static void md_post_word(struct roff_node *);
87 static void md_post_pc(struct roff_node *);
88 static void md_post_Bk(struct roff_node *);
89 static void md_post_Bl(struct roff_node *);
90 static void md_post_D1(struct roff_node *);
91 static void md_post_En(struct roff_node *);
92 static void md_post_Eo(struct roff_node *);
93 static void md_post_Fa(struct roff_node *);
94 static void md_post_Fd(struct roff_node *);
95 static void md_post_Fl(struct roff_node *);
96 static void md_post_Fn(struct roff_node *);
97 static void md_post_Fo(struct roff_node *);
98 static void md_post_In(struct roff_node *);
99 static void md_post_It(struct roff_node *);
100 static void md_post_Lb(struct roff_node *);
101 static void md_post_Nm(struct roff_node *);
102 static void md_post_Pf(struct roff_node *);
103 static void md_post_Vt(struct roff_node *);
104 static void md_post__T(struct roff_node *);
106 static const struct md_act __md_acts[MDOC_MAX - MDOC_Dd] = {
107 { NULL, NULL, NULL, NULL, NULL }, /* Dd */
108 { NULL, NULL, NULL, NULL, NULL }, /* Dt */
109 { NULL, NULL, NULL, NULL, NULL }, /* Os */
110 { NULL, md_pre_Sh, NULL, NULL, NULL }, /* Sh */
111 { NULL, md_pre_Sh, NULL, NULL, NULL }, /* Ss */
112 { NULL, md_pre_Pp, NULL, NULL, NULL }, /* Pp */
113 { md_cond_body, md_pre_D1, md_post_D1, NULL, NULL }, /* D1 */
114 { md_cond_body, md_pre_Dl, md_post_D1, NULL, NULL }, /* Dl */
115 { md_cond_body, md_pre_Bd, md_post_D1, NULL, NULL }, /* Bd */
116 { NULL, NULL, NULL, NULL, NULL }, /* Ed */
117 { md_cond_body, md_pre_Bl, md_post_Bl, NULL, NULL }, /* Bl */
118 { NULL, NULL, NULL, NULL, NULL }, /* El */
119 { NULL, md_pre_It, md_post_It, NULL, NULL }, /* It */
120 { NULL, md_pre_raw, md_post_raw, "*", "*" }, /* Ad */
121 { NULL, md_pre_An, NULL, NULL, NULL }, /* An */
122 { NULL, md_pre_Ap, NULL, NULL, NULL }, /* Ap */
123 { NULL, md_pre_raw, md_post_raw, "*", "*" }, /* Ar */
124 { NULL, md_pre_raw, md_post_raw, "**", "**" }, /* Cd */
125 { NULL, md_pre_raw, md_post_raw, "**", "**" }, /* Cm */
126 { NULL, md_pre_raw, md_post_raw, "`", "`" }, /* Dv */
127 { NULL, md_pre_raw, md_post_raw, "`", "`" }, /* Er */
128 { NULL, md_pre_raw, md_post_raw, "`", "`" }, /* Ev */
129 { NULL, NULL, NULL, NULL, NULL }, /* Ex */
130 { NULL, md_pre_Fa, md_post_Fa, NULL, NULL }, /* Fa */
131 { NULL, md_pre_Fd, md_post_Fd, "**", "**" }, /* Fd */
132 { NULL, md_pre_raw, md_post_Fl, "**-", "**" }, /* Fl */
133 { NULL, md_pre_Fn, md_post_Fn, NULL, NULL }, /* Fn */
134 { NULL, md_pre_Fd, md_post_raw, "*", "*" }, /* Ft */
135 { NULL, md_pre_raw, md_post_raw, "**", "**" }, /* Ic */
136 { NULL, md_pre_In, md_post_In, NULL, NULL }, /* In */
137 { NULL, md_pre_raw, md_post_raw, "`", "`" }, /* Li */
138 { md_cond_head, md_pre_Nd, NULL, NULL, NULL }, /* Nd */
139 { NULL, md_pre_Nm, md_post_Nm, "**", "**" }, /* Nm */
140 { md_cond_body, md_pre_word, md_post_word, "[", "]" }, /* Op */
141 { NULL, md_pre_Fd, md_post_raw, "*", "*" }, /* Ot */
142 { NULL, md_pre_raw, md_post_raw, "*", "*" }, /* Pa */
143 { NULL, NULL, NULL, NULL, NULL }, /* Rv */
144 { NULL, NULL, NULL, NULL, NULL }, /* St */
145 { NULL, md_pre_raw, md_post_raw, "*", "*" }, /* Va */
146 { NULL, md_pre_Vt, md_post_Vt, "*", "*" }, /* Vt */
147 { NULL, md_pre_Xr, NULL, NULL, NULL }, /* Xr */
148 { NULL, NULL, md_post_pc, NULL, NULL }, /* %A */
149 { NULL, md_pre_raw, md_post_pc, "*", "*" }, /* %B */
150 { NULL, NULL, md_post_pc, NULL, NULL }, /* %D */
151 { NULL, md_pre_raw, md_post_pc, "*", "*" }, /* %I */
152 { NULL, md_pre_raw, md_post_pc, "*", "*" }, /* %J */
153 { NULL, NULL, md_post_pc, NULL, NULL }, /* %N */
154 { NULL, NULL, md_post_pc, NULL, NULL }, /* %O */
155 { NULL, NULL, md_post_pc, NULL, NULL }, /* %P */
156 { NULL, NULL, md_post_pc, NULL, NULL }, /* %R */
157 { NULL, md_pre__T, md_post__T, NULL, NULL }, /* %T */
158 { NULL, NULL, md_post_pc, NULL, NULL }, /* %V */
159 { NULL, NULL, NULL, NULL, NULL }, /* Ac */
160 { md_cond_body, md_pre_word, md_post_word, "<", ">" }, /* Ao */
161 { md_cond_body, md_pre_word, md_post_word, "<", ">" }, /* Aq */
162 { NULL, NULL, NULL, NULL, NULL }, /* At */
163 { NULL, NULL, NULL, NULL, NULL }, /* Bc */
164 { NULL, NULL, NULL, NULL, NULL }, /* Bf XXX not implemented */
165 { md_cond_body, md_pre_word, md_post_word, "[", "]" }, /* Bo */
166 { md_cond_body, md_pre_word, md_post_word, "[", "]" }, /* Bq */
167 { NULL, NULL, NULL, NULL, NULL }, /* Bsx */
168 { NULL, NULL, NULL, NULL, NULL }, /* Bx */
169 { NULL, NULL, NULL, NULL, NULL }, /* Db */
170 { NULL, NULL, NULL, NULL, NULL }, /* Dc */
171 { md_cond_body, md_pre_word, md_post_word, "\"", "\"" }, /* Do */
172 { md_cond_body, md_pre_word, md_post_word, "\"", "\"" }, /* Dq */
173 { NULL, NULL, NULL, NULL, NULL }, /* Ec */
174 { NULL, NULL, NULL, NULL, NULL }, /* Ef */
175 { NULL, md_pre_raw, md_post_raw, "*", "*" }, /* Em */
176 { md_cond_body, md_pre_Eo, md_post_Eo, NULL, NULL }, /* Eo */
177 { NULL, NULL, NULL, NULL, NULL }, /* Fx */
178 { NULL, md_pre_raw, md_post_raw, "**", "**" }, /* Ms */
179 { NULL, md_pre_No, NULL, NULL, NULL }, /* No */
180 { NULL, md_pre_Ns, NULL, NULL, NULL }, /* Ns */
181 { NULL, NULL, NULL, NULL, NULL }, /* Nx */
182 { NULL, NULL, NULL, NULL, NULL }, /* Ox */
183 { NULL, NULL, NULL, NULL, NULL }, /* Pc */
184 { NULL, NULL, md_post_Pf, NULL, NULL }, /* Pf */
185 { md_cond_body, md_pre_word, md_post_word, "(", ")" }, /* Po */
186 { md_cond_body, md_pre_word, md_post_word, "(", ")" }, /* Pq */
187 { NULL, NULL, NULL, NULL, NULL }, /* Qc */
188 { md_cond_body, md_pre_raw, md_post_raw, "'`", "`'" }, /* Ql */
189 { md_cond_body, md_pre_word, md_post_word, "\"", "\"" }, /* Qo */
190 { md_cond_body, md_pre_word, md_post_word, "\"", "\"" }, /* Qq */
191 { NULL, NULL, NULL, NULL, NULL }, /* Re */
192 { md_cond_body, md_pre_Rs, NULL, NULL, NULL }, /* Rs */
193 { NULL, NULL, NULL, NULL, NULL }, /* Sc */
194 { md_cond_body, md_pre_word, md_post_word, "'", "'" }, /* So */
195 { md_cond_body, md_pre_word, md_post_word, "'", "'" }, /* Sq */
196 { NULL, md_pre_Sm, NULL, NULL, NULL }, /* Sm */
197 { NULL, md_pre_raw, md_post_raw, "*", "*" }, /* Sx */
198 { NULL, md_pre_raw, md_post_raw, "**", "**" }, /* Sy */
199 { NULL, md_pre_raw, md_post_raw, "`", "`" }, /* Tn */
200 { NULL, NULL, NULL, NULL, NULL }, /* Ux */
201 { NULL, NULL, NULL, NULL, NULL }, /* Xc */
202 { NULL, NULL, NULL, NULL, NULL }, /* Xo */
203 { NULL, md_pre_Fo, md_post_Fo, "**", "**" }, /* Fo */
204 { NULL, NULL, NULL, NULL, NULL }, /* Fc */
205 { md_cond_body, md_pre_word, md_post_word, "[", "]" }, /* Oo */
206 { NULL, NULL, NULL, NULL, NULL }, /* Oc */
207 { NULL, md_pre_Bk, md_post_Bk, NULL, NULL }, /* Bk */
208 { NULL, NULL, NULL, NULL, NULL }, /* Ek */
209 { NULL, NULL, NULL, NULL, NULL }, /* Bt */
210 { NULL, NULL, NULL, NULL, NULL }, /* Hf */
211 { NULL, md_pre_raw, md_post_raw, "*", "*" }, /* Fr */
212 { NULL, NULL, NULL, NULL, NULL }, /* Ud */
213 { NULL, NULL, md_post_Lb, NULL, NULL }, /* Lb */
214 { NULL, md_pre_Pp, NULL, NULL, NULL }, /* Lp */
215 { NULL, md_pre_Lk, NULL, NULL, NULL }, /* Lk */
216 { NULL, md_pre_Mt, NULL, NULL, NULL }, /* Mt */
217 { md_cond_body, md_pre_word, md_post_word, "{", "}" }, /* Brq */
218 { md_cond_body, md_pre_word, md_post_word, "{", "}" }, /* Bro */
219 { NULL, NULL, NULL, NULL, NULL }, /* Brc */
220 { NULL, NULL, md_post_pc, NULL, NULL }, /* %C */
221 { NULL, md_pre_skip, NULL, NULL, NULL }, /* Es */
222 { md_cond_body, md_pre_En, md_post_En, NULL, NULL }, /* En */
223 { NULL, NULL, NULL, NULL, NULL }, /* Dx */
224 { NULL, NULL, md_post_pc, NULL, NULL }, /* %Q */
225 { NULL, md_pre_Lk, md_post_pc, NULL, NULL }, /* %U */
226 { NULL, NULL, NULL, NULL, NULL }, /* Ta */
228 static const struct md_act *const md_acts = __md_acts - MDOC_Dd;
230 static int outflags;
231 #define MD_spc (1 << 0) /* Blank character before next word. */
232 #define MD_spc_force (1 << 1) /* Even before trailing punctuation. */
233 #define MD_nonl (1 << 2) /* Prevent linebreak in markdown code. */
234 #define MD_nl (1 << 3) /* Break markdown code line. */
235 #define MD_br (1 << 4) /* Insert an output line break. */
236 #define MD_sp (1 << 5) /* Insert a paragraph break. */
237 #define MD_Sm (1 << 6) /* Horizontal spacing mode. */
238 #define MD_Bk (1 << 7) /* Word keep mode. */
239 #define MD_An_split (1 << 8) /* Author mode is "split". */
240 #define MD_An_nosplit (1 << 9) /* Author mode is "nosplit". */
242 static int escflags; /* Escape in generated markdown code: */
243 #define ESC_BOL (1 << 0) /* "#*+-" near the beginning of a line. */
244 #define ESC_NUM (1 << 1) /* "." after a leading number. */
245 #define ESC_HYP (1 << 2) /* "(" immediately after "]". */
246 #define ESC_SQU (1 << 4) /* "]" when "[" is open. */
247 #define ESC_FON (1 << 5) /* "*" immediately after unrelated "*". */
248 #define ESC_EOL (1 << 6) /* " " at the and of a line. */
250 static int code_blocks, quote_blocks, list_blocks;
251 static int outcount;
253 void
254 markdown_mdoc(void *arg, const struct roff_man *mdoc)
256 outflags = MD_Sm;
257 md_word(mdoc->meta.title);
258 if (mdoc->meta.msec != NULL) {
259 outflags &= ~MD_spc;
260 md_word("(");
261 md_word(mdoc->meta.msec);
262 md_word(")");
264 md_word("-");
265 md_word(mdoc->meta.vol);
266 if (mdoc->meta.arch != NULL) {
267 md_word("(");
268 md_word(mdoc->meta.arch);
269 md_word(")");
271 outflags |= MD_sp;
273 md_nodelist(mdoc->first->child);
275 outflags |= MD_sp;
276 md_word(mdoc->meta.os);
277 md_word("-");
278 md_word(mdoc->meta.date);
279 putchar('\n');
282 static void
283 md_nodelist(struct roff_node *n)
285 while (n != NULL) {
286 md_node(n);
287 n = n->next;
291 static void
292 md_node(struct roff_node *n)
294 const struct md_act *act;
295 int cond, process_children;
297 if (n->flags & NODE_NOPRT)
298 return;
300 if (outflags & MD_nonl)
301 outflags &= ~(MD_nl | MD_sp);
302 else if (outflags & MD_spc && n->flags & NODE_LINE)
303 outflags |= MD_nl;
305 act = NULL;
306 cond = 0;
307 process_children = 1;
308 n->flags &= ~NODE_ENDED;
310 if (n->type == ROFFT_TEXT) {
311 if (n->flags & NODE_DELIMC)
312 outflags &= ~(MD_spc | MD_spc_force);
313 else if (outflags & MD_Sm)
314 outflags |= MD_spc_force;
315 md_word(n->string);
316 if (n->flags & NODE_DELIMO)
317 outflags &= ~(MD_spc | MD_spc_force);
318 else if (outflags & MD_Sm)
319 outflags |= MD_spc;
320 } else if (n->tok < ROFF_MAX) {
321 switch (n->tok) {
322 case ROFF_br:
323 process_children = md_pre_br(n);
324 break;
325 case ROFF_sp:
326 process_children = md_pre_Pp(n);
327 break;
328 default:
329 process_children = 0;
330 break;
332 } else {
333 assert(n->tok >= MDOC_Dd && n->tok < MDOC_MAX);
334 act = md_acts + n->tok;
335 cond = act->cond == NULL || (*act->cond)(n);
336 if (cond && act->pre != NULL &&
337 (n->end == ENDBODY_NOT || n->child != NULL))
338 process_children = (*act->pre)(n);
341 if (process_children && n->child != NULL)
342 md_nodelist(n->child);
344 if (n->flags & NODE_ENDED)
345 return;
347 if (cond && act->post != NULL)
348 (*act->post)(n);
350 if (n->end != ENDBODY_NOT)
351 n->body->flags |= NODE_ENDED;
354 static const char *
355 md_stack(char c)
357 static char *stack;
358 static size_t sz;
359 static size_t cur;
361 switch (c) {
362 case '\0':
363 break;
364 case (char)-1:
365 assert(cur);
366 stack[--cur] = '\0';
367 break;
368 default:
369 if (cur + 1 >= sz) {
370 sz += 8;
371 stack = mandoc_realloc(stack, sz);
373 stack[cur] = c;
374 stack[++cur] = '\0';
375 break;
377 return stack == NULL ? "" : stack;
381 * Handle vertical and horizontal spacing.
383 static void
384 md_preword(void)
386 const char *cp;
389 * If a list block is nested inside a code block or a blockquote,
390 * blank lines for paragraph breaks no longer work; instead,
391 * they terminate the list. Work around this markdown issue
392 * by using mere line breaks instead.
395 if (list_blocks && outflags & MD_sp) {
396 outflags &= ~MD_sp;
397 outflags |= MD_br;
401 * End the old line if requested.
402 * Escape whitespace at the end of the markdown line
403 * such that it won't look like an output line break.
406 if (outflags & MD_sp)
407 putchar('\n');
408 else if (outflags & MD_br) {
409 putchar(' ');
410 putchar(' ');
411 } else if (outflags & MD_nl && escflags & ESC_EOL)
412 md_named("zwnj");
414 /* Start a new line if necessary. */
416 if (outflags & (MD_nl | MD_br | MD_sp)) {
417 putchar('\n');
418 for (cp = md_stack('\0'); *cp != '\0'; cp++) {
419 putchar(*cp);
420 if (*cp == '>')
421 putchar(' ');
423 outflags &= ~(MD_nl | MD_br | MD_sp);
424 escflags = ESC_BOL;
425 outcount = 0;
427 /* Handle horizontal spacing. */
429 } else if (outflags & MD_spc) {
430 if (outflags & MD_Bk)
431 fputs("&nbsp;", stdout);
432 else
433 putchar(' ');
434 escflags &= ~ESC_FON;
435 outcount++;
438 outflags &= ~(MD_spc_force | MD_nonl);
439 if (outflags & MD_Sm)
440 outflags |= MD_spc;
441 else
442 outflags &= ~MD_spc;
446 * Print markdown syntax elements.
447 * Can also be used for constant strings when neither escaping
448 * nor delimiter handling is required.
450 static void
451 md_rawword(const char *s)
453 md_preword();
455 if (*s == '\0')
456 return;
458 if (escflags & ESC_FON) {
459 escflags &= ~ESC_FON;
460 if (*s == '*' && !code_blocks)
461 fputs("&zwnj;", stdout);
464 while (*s != '\0') {
465 switch(*s) {
466 case '*':
467 if (s[1] == '\0')
468 escflags |= ESC_FON;
469 break;
470 case '[':
471 escflags |= ESC_SQU;
472 break;
473 case ']':
474 escflags |= ESC_HYP;
475 escflags &= ~ESC_SQU;
476 break;
477 default:
478 break;
480 md_char(*s++);
482 if (s[-1] == ' ')
483 escflags |= ESC_EOL;
484 else
485 escflags &= ~ESC_EOL;
489 * Print text and mdoc(7) syntax elements.
491 static void
492 md_word(const char *s)
494 const char *seq, *prevfont, *currfont, *nextfont;
495 char c;
496 int bs, sz, uc, breakline;
498 /* No spacing before closing delimiters. */
499 if (s[0] != '\0' && s[1] == '\0' &&
500 strchr("!),.:;?]", s[0]) != NULL &&
501 (outflags & MD_spc_force) == 0)
502 outflags &= ~MD_spc;
504 md_preword();
506 if (*s == '\0')
507 return;
509 /* No spacing after opening delimiters. */
510 if ((s[0] == '(' || s[0] == '[') && s[1] == '\0')
511 outflags &= ~MD_spc;
513 breakline = 0;
514 prevfont = currfont = "";
515 while ((c = *s++) != '\0') {
516 bs = 0;
517 switch(c) {
518 case ASCII_NBRSP:
519 if (code_blocks)
520 c = ' ';
521 else {
522 md_named("nbsp");
523 c = '\0';
525 break;
526 case ASCII_HYPH:
527 bs = escflags & ESC_BOL && !code_blocks;
528 c = '-';
529 break;
530 case ASCII_BREAK:
531 continue;
532 case '#':
533 case '+':
534 case '-':
535 bs = escflags & ESC_BOL && !code_blocks;
536 break;
537 case '(':
538 bs = escflags & ESC_HYP && !code_blocks;
539 break;
540 case ')':
541 bs = escflags & ESC_NUM && !code_blocks;
542 break;
543 case '*':
544 case '[':
545 case '_':
546 case '`':
547 bs = !code_blocks;
548 break;
549 case '.':
550 bs = escflags & ESC_NUM && !code_blocks;
551 break;
552 case '<':
553 if (code_blocks == 0) {
554 md_named("lt");
555 c = '\0';
557 break;
558 case '=':
559 if (escflags & ESC_BOL && !code_blocks) {
560 md_named("equals");
561 c = '\0';
563 break;
564 case '>':
565 if (code_blocks == 0) {
566 md_named("gt");
567 c = '\0';
569 break;
570 case '\\':
571 uc = 0;
572 nextfont = NULL;
573 switch (mandoc_escape(&s, &seq, &sz)) {
574 case ESCAPE_UNICODE:
575 uc = mchars_num2uc(seq + 1, sz - 1);
576 break;
577 case ESCAPE_NUMBERED:
578 uc = mchars_num2char(seq, sz);
579 break;
580 case ESCAPE_SPECIAL:
581 uc = mchars_spec2cp(seq, sz);
582 break;
583 case ESCAPE_FONTBOLD:
584 nextfont = "**";
585 break;
586 case ESCAPE_FONTITALIC:
587 nextfont = "*";
588 break;
589 case ESCAPE_FONTBI:
590 nextfont = "***";
591 break;
592 case ESCAPE_FONT:
593 case ESCAPE_FONTROMAN:
594 nextfont = "";
595 break;
596 case ESCAPE_FONTPREV:
597 nextfont = prevfont;
598 break;
599 case ESCAPE_BREAK:
600 breakline = 1;
601 break;
602 case ESCAPE_NOSPACE:
603 case ESCAPE_SKIPCHAR:
604 case ESCAPE_OVERSTRIKE:
605 /* XXX not implemented */
606 /* FALLTHROUGH */
607 case ESCAPE_ERROR:
608 default:
609 break;
611 if (nextfont != NULL && !code_blocks) {
612 if (*currfont != '\0') {
613 outflags &= ~MD_spc;
614 md_rawword(currfont);
616 prevfont = currfont;
617 currfont = nextfont;
618 if (*currfont != '\0') {
619 outflags &= ~MD_spc;
620 md_rawword(currfont);
623 if (uc) {
624 if ((uc < 0x20 && uc != 0x09) ||
625 (uc > 0x7E && uc < 0xA0))
626 uc = 0xFFFD;
627 if (code_blocks) {
628 seq = mchars_uc2str(uc);
629 fputs(seq, stdout);
630 outcount += strlen(seq);
631 } else {
632 printf("&#%d;", uc);
633 outcount++;
635 escflags &= ~ESC_FON;
637 c = '\0';
638 break;
639 case ']':
640 bs = escflags & ESC_SQU && !code_blocks;
641 escflags |= ESC_HYP;
642 break;
643 default:
644 break;
646 if (bs)
647 putchar('\\');
648 md_char(c);
649 if (breakline &&
650 (*s == '\0' || *s == ' ' || *s == ASCII_NBRSP)) {
651 printf(" \n");
652 breakline = 0;
653 while (*s == ' ' || *s == ASCII_NBRSP)
654 s++;
657 if (*currfont != '\0') {
658 outflags &= ~MD_spc;
659 md_rawword(currfont);
660 } else if (s[-2] == ' ')
661 escflags |= ESC_EOL;
662 else
663 escflags &= ~ESC_EOL;
667 * Print a single HTML named character reference.
669 static void
670 md_named(const char *s)
672 printf("&%s;", s);
673 escflags &= ~(ESC_FON | ESC_EOL);
674 outcount++;
678 * Print a single raw character and maintain certain escape flags.
680 static void
681 md_char(unsigned char c)
683 if (c != '\0') {
684 putchar(c);
685 if (c == '*')
686 escflags |= ESC_FON;
687 else
688 escflags &= ~ESC_FON;
689 outcount++;
691 if (c != ']')
692 escflags &= ~ESC_HYP;
693 if (c == ' ' || c == '\t' || c == '>')
694 return;
695 if (isdigit(c) == 0)
696 escflags &= ~ESC_NUM;
697 else if (escflags & ESC_BOL)
698 escflags |= ESC_NUM;
699 escflags &= ~ESC_BOL;
702 static int
703 md_cond_head(struct roff_node *n)
705 return n->type == ROFFT_HEAD;
708 static int
709 md_cond_body(struct roff_node *n)
711 return n->type == ROFFT_BODY;
714 static int
715 md_pre_raw(struct roff_node *n)
717 const char *prefix;
719 if ((prefix = md_acts[n->tok].prefix) != NULL) {
720 md_rawword(prefix);
721 outflags &= ~MD_spc;
722 if (*prefix == '`')
723 code_blocks++;
725 return 1;
728 static void
729 md_post_raw(struct roff_node *n)
731 const char *suffix;
733 if ((suffix = md_acts[n->tok].suffix) != NULL) {
734 outflags &= ~(MD_spc | MD_nl);
735 md_rawword(suffix);
736 if (*suffix == '`')
737 code_blocks--;
741 static int
742 md_pre_word(struct roff_node *n)
744 const char *prefix;
746 if ((prefix = md_acts[n->tok].prefix) != NULL) {
747 md_word(prefix);
748 outflags &= ~MD_spc;
750 return 1;
753 static void
754 md_post_word(struct roff_node *n)
756 const char *suffix;
758 if ((suffix = md_acts[n->tok].suffix) != NULL) {
759 outflags &= ~(MD_spc | MD_nl);
760 md_word(suffix);
764 static void
765 md_post_pc(struct roff_node *n)
767 md_post_raw(n);
768 if (n->parent->tok != MDOC_Rs)
769 return;
770 if (n->next != NULL) {
771 md_word(",");
772 if (n->prev != NULL &&
773 n->prev->tok == n->tok &&
774 n->next->tok == n->tok)
775 md_word("and");
776 } else {
777 md_word(".");
778 outflags |= MD_nl;
782 static int
783 md_pre_skip(struct roff_node *n)
785 return 0;
788 static void
789 md_pre_syn(struct roff_node *n)
791 if (n->prev == NULL || ! (n->flags & NODE_SYNPRETTY))
792 return;
794 if (n->prev->tok == n->tok &&
795 n->tok != MDOC_Ft &&
796 n->tok != MDOC_Fo &&
797 n->tok != MDOC_Fn) {
798 outflags |= MD_br;
799 return;
802 switch (n->prev->tok) {
803 case MDOC_Fd:
804 case MDOC_Fn:
805 case MDOC_Fo:
806 case MDOC_In:
807 case MDOC_Vt:
808 outflags |= MD_sp;
809 break;
810 case MDOC_Ft:
811 if (n->tok != MDOC_Fn && n->tok != MDOC_Fo) {
812 outflags |= MD_sp;
813 break;
815 /* FALLTHROUGH */
816 default:
817 outflags |= MD_br;
818 break;
822 static int
823 md_pre_An(struct roff_node *n)
825 switch (n->norm->An.auth) {
826 case AUTH_split:
827 outflags &= ~MD_An_nosplit;
828 outflags |= MD_An_split;
829 return 0;
830 case AUTH_nosplit:
831 outflags &= ~MD_An_split;
832 outflags |= MD_An_nosplit;
833 return 0;
834 default:
835 if (outflags & MD_An_split)
836 outflags |= MD_br;
837 else if (n->sec == SEC_AUTHORS &&
838 ! (outflags & MD_An_nosplit))
839 outflags |= MD_An_split;
840 return 1;
844 static int
845 md_pre_Ap(struct roff_node *n)
847 outflags &= ~MD_spc;
848 md_word("'");
849 outflags &= ~MD_spc;
850 return 0;
853 static int
854 md_pre_Bd(struct roff_node *n)
856 switch (n->norm->Bd.type) {
857 case DISP_unfilled:
858 case DISP_literal:
859 return md_pre_Dl(n);
860 default:
861 return md_pre_D1(n);
865 static int
866 md_pre_Bk(struct roff_node *n)
868 switch (n->type) {
869 case ROFFT_BLOCK:
870 return 1;
871 case ROFFT_BODY:
872 outflags |= MD_Bk;
873 return 1;
874 default:
875 return 0;
879 static void
880 md_post_Bk(struct roff_node *n)
882 if (n->type == ROFFT_BODY)
883 outflags &= ~MD_Bk;
886 static int
887 md_pre_Bl(struct roff_node *n)
889 n->norm->Bl.count = 0;
890 if (n->norm->Bl.type == LIST_column)
891 md_pre_Dl(n);
892 outflags |= MD_sp;
893 return 1;
896 static void
897 md_post_Bl(struct roff_node *n)
899 n->norm->Bl.count = 0;
900 if (n->norm->Bl.type == LIST_column)
901 md_post_D1(n);
902 outflags |= MD_sp;
905 static int
906 md_pre_D1(struct roff_node *n)
909 * Markdown blockquote syntax does not work inside code blocks.
910 * The best we can do is fall back to another nested code block.
912 if (code_blocks) {
913 md_stack('\t');
914 code_blocks++;
915 } else {
916 md_stack('>');
917 quote_blocks++;
919 outflags |= MD_sp;
920 return 1;
923 static void
924 md_post_D1(struct roff_node *n)
926 md_stack((char)-1);
927 if (code_blocks)
928 code_blocks--;
929 else
930 quote_blocks--;
931 outflags |= MD_sp;
934 static int
935 md_pre_Dl(struct roff_node *n)
938 * Markdown code block syntax does not work inside blockquotes.
939 * The best we can do is fall back to another nested blockquote.
941 if (quote_blocks) {
942 md_stack('>');
943 quote_blocks++;
944 } else {
945 md_stack('\t');
946 code_blocks++;
948 outflags |= MD_sp;
949 return 1;
952 static int
953 md_pre_En(struct roff_node *n)
955 if (n->norm->Es == NULL ||
956 n->norm->Es->child == NULL)
957 return 1;
959 md_word(n->norm->Es->child->string);
960 outflags &= ~MD_spc;
961 return 1;
964 static void
965 md_post_En(struct roff_node *n)
967 if (n->norm->Es == NULL ||
968 n->norm->Es->child == NULL ||
969 n->norm->Es->child->next == NULL)
970 return;
972 outflags &= ~MD_spc;
973 md_word(n->norm->Es->child->next->string);
976 static int
977 md_pre_Eo(struct roff_node *n)
979 if (n->end == ENDBODY_NOT &&
980 n->parent->head->child == NULL &&
981 n->child != NULL &&
982 n->child->end != ENDBODY_NOT)
983 md_preword();
984 else if (n->end != ENDBODY_NOT ? n->child != NULL :
985 n->parent->head->child != NULL && (n->child != NULL ||
986 (n->parent->tail != NULL && n->parent->tail->child != NULL)))
987 outflags &= ~(MD_spc | MD_nl);
988 return 1;
991 static void
992 md_post_Eo(struct roff_node *n)
994 if (n->end != ENDBODY_NOT) {
995 outflags |= MD_spc;
996 return;
999 if (n->child == NULL && n->parent->head->child == NULL)
1000 return;
1002 if (n->parent->tail != NULL && n->parent->tail->child != NULL)
1003 outflags &= ~MD_spc;
1004 else
1005 outflags |= MD_spc;
1008 static int
1009 md_pre_Fa(struct roff_node *n)
1011 int am_Fa;
1013 am_Fa = n->tok == MDOC_Fa;
1015 if (am_Fa)
1016 n = n->child;
1018 while (n != NULL) {
1019 md_rawword("*");
1020 outflags &= ~MD_spc;
1021 md_node(n);
1022 outflags &= ~MD_spc;
1023 md_rawword("*");
1024 if ((n = n->next) != NULL)
1025 md_word(",");
1027 return 0;
1030 static void
1031 md_post_Fa(struct roff_node *n)
1033 if (n->next != NULL && n->next->tok == MDOC_Fa)
1034 md_word(",");
1037 static int
1038 md_pre_Fd(struct roff_node *n)
1040 md_pre_syn(n);
1041 md_pre_raw(n);
1042 return 1;
1045 static void
1046 md_post_Fd(struct roff_node *n)
1048 md_post_raw(n);
1049 outflags |= MD_br;
1052 static void
1053 md_post_Fl(struct roff_node *n)
1055 md_post_raw(n);
1056 if (n->child == NULL && n->next != NULL &&
1057 n->next->type != ROFFT_TEXT && !(n->next->flags & NODE_LINE))
1058 outflags &= ~MD_spc;
1061 static int
1062 md_pre_Fn(struct roff_node *n)
1064 md_pre_syn(n);
1066 if ((n = n->child) == NULL)
1067 return 0;
1069 md_rawword("**");
1070 outflags &= ~MD_spc;
1071 md_node(n);
1072 outflags &= ~MD_spc;
1073 md_rawword("**");
1074 outflags &= ~MD_spc;
1075 md_word("(");
1077 if ((n = n->next) != NULL)
1078 md_pre_Fa(n);
1079 return 0;
1082 static void
1083 md_post_Fn(struct roff_node *n)
1085 md_word(")");
1086 if (n->flags & NODE_SYNPRETTY) {
1087 md_word(";");
1088 outflags |= MD_sp;
1092 static int
1093 md_pre_Fo(struct roff_node *n)
1095 switch (n->type) {
1096 case ROFFT_BLOCK:
1097 md_pre_syn(n);
1098 break;
1099 case ROFFT_HEAD:
1100 if (n->child == NULL)
1101 return 0;
1102 md_pre_raw(n);
1103 break;
1104 case ROFFT_BODY:
1105 outflags &= ~(MD_spc | MD_nl);
1106 md_word("(");
1107 break;
1108 default:
1109 break;
1111 return 1;
1114 static void
1115 md_post_Fo(struct roff_node *n)
1117 switch (n->type) {
1118 case ROFFT_HEAD:
1119 if (n->child != NULL)
1120 md_post_raw(n);
1121 break;
1122 case ROFFT_BODY:
1123 md_post_Fn(n);
1124 break;
1125 default:
1126 break;
1130 static int
1131 md_pre_In(struct roff_node *n)
1133 if (n->flags & NODE_SYNPRETTY) {
1134 md_pre_syn(n);
1135 md_rawword("**");
1136 outflags &= ~MD_spc;
1137 md_word("#include <");
1138 } else {
1139 md_word("<");
1140 outflags &= ~MD_spc;
1141 md_rawword("*");
1143 outflags &= ~MD_spc;
1144 return 1;
1147 static void
1148 md_post_In(struct roff_node *n)
1150 if (n->flags & NODE_SYNPRETTY) {
1151 outflags &= ~MD_spc;
1152 md_rawword(">**");
1153 outflags |= MD_nl;
1154 } else {
1155 outflags &= ~MD_spc;
1156 md_rawword("*>");
1160 static int
1161 md_pre_It(struct roff_node *n)
1163 struct roff_node *bln;
1165 switch (n->type) {
1166 case ROFFT_BLOCK:
1167 return 1;
1169 case ROFFT_HEAD:
1170 bln = n->parent->parent;
1171 if (bln->norm->Bl.comp == 0 &&
1172 bln->norm->Bl.type != LIST_column)
1173 outflags |= MD_sp;
1174 outflags |= MD_nl;
1176 switch (bln->norm->Bl.type) {
1177 case LIST_item:
1178 outflags |= MD_br;
1179 return 0;
1180 case LIST_inset:
1181 case LIST_diag:
1182 case LIST_ohang:
1183 outflags |= MD_br;
1184 return 1;
1185 case LIST_tag:
1186 case LIST_hang:
1187 outflags |= MD_sp;
1188 return 1;
1189 case LIST_bullet:
1190 md_rawword("*\t");
1191 break;
1192 case LIST_dash:
1193 case LIST_hyphen:
1194 md_rawword("-\t");
1195 break;
1196 case LIST_enum:
1197 md_preword();
1198 if (bln->norm->Bl.count < 99)
1199 bln->norm->Bl.count++;
1200 printf("%d.\t", bln->norm->Bl.count);
1201 escflags &= ~ESC_FON;
1202 break;
1203 case LIST_column:
1204 outflags |= MD_br;
1205 return 0;
1206 default:
1207 return 0;
1209 outflags &= ~MD_spc;
1210 outflags |= MD_nonl;
1211 outcount = 0;
1212 md_stack('\t');
1213 if (code_blocks || quote_blocks)
1214 list_blocks++;
1215 return 0;
1217 case ROFFT_BODY:
1218 bln = n->parent->parent;
1219 switch (bln->norm->Bl.type) {
1220 case LIST_ohang:
1221 outflags |= MD_br;
1222 break;
1223 case LIST_tag:
1224 case LIST_hang:
1225 md_pre_D1(n);
1226 break;
1227 default:
1228 break;
1230 return 1;
1232 default:
1233 return 0;
1237 static void
1238 md_post_It(struct roff_node *n)
1240 struct roff_node *bln;
1241 int i, nc;
1243 if (n->type != ROFFT_BODY)
1244 return;
1246 bln = n->parent->parent;
1247 switch (bln->norm->Bl.type) {
1248 case LIST_bullet:
1249 case LIST_dash:
1250 case LIST_hyphen:
1251 case LIST_enum:
1252 md_stack((char)-1);
1253 if (code_blocks || quote_blocks)
1254 list_blocks--;
1255 break;
1256 case LIST_tag:
1257 case LIST_hang:
1258 md_post_D1(n);
1259 break;
1261 case LIST_column:
1262 if (n->next == NULL)
1263 break;
1265 /* Calculate the array index of the current column. */
1267 i = 0;
1268 while ((n = n->prev) != NULL && n->type != ROFFT_HEAD)
1269 i++;
1272 * If a width was specified for this column,
1273 * subtract what printed, and
1274 * add the same spacing as in mdoc_term.c.
1277 nc = bln->norm->Bl.ncols;
1278 i = i < nc ? strlen(bln->norm->Bl.cols[i]) - outcount +
1279 (nc < 5 ? 4 : nc == 5 ? 3 : 1) : 1;
1280 if (i < 1)
1281 i = 1;
1282 while (i-- > 0)
1283 putchar(' ');
1285 outflags &= ~MD_spc;
1286 escflags &= ~ESC_FON;
1287 outcount = 0;
1288 break;
1290 default:
1291 break;
1295 static void
1296 md_post_Lb(struct roff_node *n)
1298 if (n->sec == SEC_LIBRARY)
1299 outflags |= MD_br;
1302 static void
1303 md_uri(const char *s)
1305 while (*s != '\0') {
1306 if (strchr("%()<>", *s) != NULL) {
1307 printf("%%%2.2hhX", *s);
1308 outcount += 3;
1309 } else {
1310 putchar(*s);
1311 outcount++;
1313 s++;
1317 static int
1318 md_pre_Lk(struct roff_node *n)
1320 const struct roff_node *link, *descr, *punct;
1322 if ((link = n->child) == NULL)
1323 return 0;
1325 /* Find beginning of trailing punctuation. */
1326 punct = n->last;
1327 while (punct != link && punct->flags & NODE_DELIMC)
1328 punct = punct->prev;
1329 punct = punct->next;
1331 /* Link text. */
1332 descr = link->next;
1333 if (descr == punct)
1334 descr = link; /* no text */
1335 md_rawword("[");
1336 outflags &= ~MD_spc;
1337 do {
1338 md_word(descr->string);
1339 descr = descr->next;
1340 } while (descr != punct);
1341 outflags &= ~MD_spc;
1343 /* Link target. */
1344 md_rawword("](");
1345 md_uri(link->string);
1346 outflags &= ~MD_spc;
1347 md_rawword(")");
1349 /* Trailing punctuation. */
1350 while (punct != NULL) {
1351 md_word(punct->string);
1352 punct = punct->next;
1354 return 0;
1357 static int
1358 md_pre_Mt(struct roff_node *n)
1360 const struct roff_node *nch;
1362 md_rawword("[");
1363 outflags &= ~MD_spc;
1364 for (nch = n->child; nch != NULL; nch = nch->next)
1365 md_word(nch->string);
1366 outflags &= ~MD_spc;
1367 md_rawword("](mailto:");
1368 for (nch = n->child; nch != NULL; nch = nch->next) {
1369 md_uri(nch->string);
1370 if (nch->next != NULL) {
1371 putchar(' ');
1372 outcount++;
1375 outflags &= ~MD_spc;
1376 md_rawword(")");
1377 return 0;
1380 static int
1381 md_pre_Nd(struct roff_node *n)
1383 outflags &= ~MD_nl;
1384 outflags |= MD_spc;
1385 md_word("-");
1386 return 1;
1389 static int
1390 md_pre_Nm(struct roff_node *n)
1392 switch (n->type) {
1393 case ROFFT_BLOCK:
1394 outflags |= MD_Bk;
1395 md_pre_syn(n);
1396 break;
1397 case ROFFT_HEAD:
1398 case ROFFT_ELEM:
1399 md_pre_raw(n);
1400 break;
1401 default:
1402 break;
1404 return 1;
1407 static void
1408 md_post_Nm(struct roff_node *n)
1410 switch (n->type) {
1411 case ROFFT_BLOCK:
1412 outflags &= ~MD_Bk;
1413 break;
1414 case ROFFT_HEAD:
1415 case ROFFT_ELEM:
1416 md_post_raw(n);
1417 break;
1418 default:
1419 break;
1423 static int
1424 md_pre_No(struct roff_node *n)
1426 outflags |= MD_spc_force;
1427 return 1;
1430 static int
1431 md_pre_Ns(struct roff_node *n)
1433 outflags &= ~MD_spc;
1434 return 0;
1437 static void
1438 md_post_Pf(struct roff_node *n)
1440 if (n->next != NULL && (n->next->flags & NODE_LINE) == 0)
1441 outflags &= ~MD_spc;
1444 static int
1445 md_pre_Pp(struct roff_node *n)
1447 outflags |= MD_sp;
1448 return 0;
1451 static int
1452 md_pre_Rs(struct roff_node *n)
1454 if (n->sec == SEC_SEE_ALSO)
1455 outflags |= MD_sp;
1456 return 1;
1459 static int
1460 md_pre_Sh(struct roff_node *n)
1462 switch (n->type) {
1463 case ROFFT_BLOCK:
1464 if (n->sec == SEC_AUTHORS)
1465 outflags &= ~(MD_An_split | MD_An_nosplit);
1466 break;
1467 case ROFFT_HEAD:
1468 outflags |= MD_sp;
1469 md_rawword(n->tok == MDOC_Sh ? "#" : "##");
1470 break;
1471 case ROFFT_BODY:
1472 outflags |= MD_sp;
1473 break;
1474 default:
1475 break;
1477 return 1;
1480 static int
1481 md_pre_Sm(struct roff_node *n)
1483 if (n->child == NULL)
1484 outflags ^= MD_Sm;
1485 else if (strcmp("on", n->child->string) == 0)
1486 outflags |= MD_Sm;
1487 else
1488 outflags &= ~MD_Sm;
1490 if (outflags & MD_Sm)
1491 outflags |= MD_spc;
1493 return 0;
1496 static int
1497 md_pre_Vt(struct roff_node *n)
1499 switch (n->type) {
1500 case ROFFT_BLOCK:
1501 md_pre_syn(n);
1502 return 1;
1503 case ROFFT_BODY:
1504 case ROFFT_ELEM:
1505 md_pre_raw(n);
1506 return 1;
1507 default:
1508 return 0;
1512 static void
1513 md_post_Vt(struct roff_node *n)
1515 switch (n->type) {
1516 case ROFFT_BODY:
1517 case ROFFT_ELEM:
1518 md_post_raw(n);
1519 break;
1520 default:
1521 break;
1525 static int
1526 md_pre_Xr(struct roff_node *n)
1528 n = n->child;
1529 if (n == NULL)
1530 return 0;
1531 md_node(n);
1532 n = n->next;
1533 if (n == NULL)
1534 return 0;
1535 outflags &= ~MD_spc;
1536 md_word("(");
1537 md_node(n);
1538 md_word(")");
1539 return 0;
1542 static int
1543 md_pre__T(struct roff_node *n)
1545 if (n->parent->tok == MDOC_Rs && n->parent->norm->Rs.quote_T)
1546 md_word("\"");
1547 else
1548 md_rawword("*");
1549 outflags &= ~MD_spc;
1550 return 1;
1553 static void
1554 md_post__T(struct roff_node *n)
1556 outflags &= ~MD_spc;
1557 if (n->parent->tok == MDOC_Rs && n->parent->norm->Rs.quote_T)
1558 md_word("\"");
1559 else
1560 md_rawword("*");
1561 md_post_pc(n);
1564 static int
1565 md_pre_br(struct roff_node *n)
1567 outflags |= MD_br;
1568 return 0;