Unleashed v1.4
[unleashed.git] / bin / mandoc / mdoc_markdown.c
blobe9a931218cc4f8ce303705d8435a6e7aec013806
1 /* $Id: mdoc_markdown.c,v 1.30 2018/12/30 00:49:55 schwarze Exp $ */
2 /*
3 * Copyright (c) 2017, 2018 Ingo Schwarze <schwarze@openbsd.org>
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 #include <sys/types.h>
19 #include <assert.h>
20 #include <ctype.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
25 #include "mandoc_aux.h"
26 #include "mandoc.h"
27 #include "roff.h"
28 #include "mdoc.h"
29 #include "main.h"
31 struct md_act {
32 int (*cond)(struct roff_node *n);
33 int (*pre)(struct roff_node *n);
34 void (*post)(struct roff_node *n);
35 const char *prefix; /* pre-node string constant */
36 const char *suffix; /* post-node string constant */
39 static void md_nodelist(struct roff_node *);
40 static void md_node(struct roff_node *);
41 static const char *md_stack(char c);
42 static void md_preword(void);
43 static void md_rawword(const char *);
44 static void md_word(const char *);
45 static void md_named(const char *);
46 static void md_char(unsigned char);
47 static void md_uri(const char *);
49 static int md_cond_head(struct roff_node *);
50 static int md_cond_body(struct roff_node *);
52 static int md_pre_abort(struct roff_node *);
53 static int md_pre_raw(struct roff_node *);
54 static int md_pre_word(struct roff_node *);
55 static int md_pre_skip(struct roff_node *);
56 static void md_pre_syn(struct roff_node *);
57 static int md_pre_An(struct roff_node *);
58 static int md_pre_Ap(struct roff_node *);
59 static int md_pre_Bd(struct roff_node *);
60 static int md_pre_Bk(struct roff_node *);
61 static int md_pre_Bl(struct roff_node *);
62 static int md_pre_D1(struct roff_node *);
63 static int md_pre_Dl(struct roff_node *);
64 static int md_pre_En(struct roff_node *);
65 static int md_pre_Eo(struct roff_node *);
66 static int md_pre_Fa(struct roff_node *);
67 static int md_pre_Fd(struct roff_node *);
68 static int md_pre_Fn(struct roff_node *);
69 static int md_pre_Fo(struct roff_node *);
70 static int md_pre_In(struct roff_node *);
71 static int md_pre_It(struct roff_node *);
72 static int md_pre_Lk(struct roff_node *);
73 static int md_pre_Mt(struct roff_node *);
74 static int md_pre_Nd(struct roff_node *);
75 static int md_pre_Nm(struct roff_node *);
76 static int md_pre_No(struct roff_node *);
77 static int md_pre_Ns(struct roff_node *);
78 static int md_pre_Pp(struct roff_node *);
79 static int md_pre_Rs(struct roff_node *);
80 static int md_pre_Sh(struct roff_node *);
81 static int md_pre_Sm(struct roff_node *);
82 static int md_pre_Vt(struct roff_node *);
83 static int md_pre_Xr(struct roff_node *);
84 static int md_pre__T(struct roff_node *);
85 static int md_pre_br(struct roff_node *);
87 static void md_post_raw(struct roff_node *);
88 static void md_post_word(struct roff_node *);
89 static void md_post_pc(struct roff_node *);
90 static void md_post_Bk(struct roff_node *);
91 static void md_post_Bl(struct roff_node *);
92 static void md_post_D1(struct roff_node *);
93 static void md_post_En(struct roff_node *);
94 static void md_post_Eo(struct roff_node *);
95 static void md_post_Fa(struct roff_node *);
96 static void md_post_Fd(struct roff_node *);
97 static void md_post_Fl(struct roff_node *);
98 static void md_post_Fn(struct roff_node *);
99 static void md_post_Fo(struct roff_node *);
100 static void md_post_In(struct roff_node *);
101 static void md_post_It(struct roff_node *);
102 static void md_post_Lb(struct roff_node *);
103 static void md_post_Nm(struct roff_node *);
104 static void md_post_Pf(struct roff_node *);
105 static void md_post_Vt(struct roff_node *);
106 static void md_post__T(struct roff_node *);
108 static const struct md_act md_acts[MDOC_MAX - MDOC_Dd] = {
109 { NULL, NULL, NULL, NULL, NULL }, /* Dd */
110 { NULL, NULL, NULL, NULL, NULL }, /* Dt */
111 { NULL, NULL, NULL, NULL, NULL }, /* Os */
112 { NULL, md_pre_Sh, NULL, NULL, NULL }, /* Sh */
113 { NULL, md_pre_Sh, NULL, NULL, NULL }, /* Ss */
114 { NULL, md_pre_Pp, NULL, NULL, NULL }, /* Pp */
115 { md_cond_body, md_pre_D1, md_post_D1, NULL, NULL }, /* D1 */
116 { md_cond_body, md_pre_Dl, md_post_D1, NULL, NULL }, /* Dl */
117 { md_cond_body, md_pre_Bd, md_post_D1, NULL, NULL }, /* Bd */
118 { NULL, NULL, NULL, NULL, NULL }, /* Ed */
119 { md_cond_body, md_pre_Bl, md_post_Bl, NULL, NULL }, /* Bl */
120 { NULL, NULL, NULL, NULL, NULL }, /* El */
121 { NULL, md_pre_It, md_post_It, NULL, NULL }, /* It */
122 { NULL, md_pre_raw, md_post_raw, "*", "*" }, /* Ad */
123 { NULL, md_pre_An, NULL, NULL, NULL }, /* An */
124 { NULL, md_pre_Ap, NULL, NULL, NULL }, /* Ap */
125 { NULL, md_pre_raw, md_post_raw, "*", "*" }, /* Ar */
126 { NULL, md_pre_raw, md_post_raw, "**", "**" }, /* Cd */
127 { NULL, md_pre_raw, md_post_raw, "**", "**" }, /* Cm */
128 { NULL, md_pre_raw, md_post_raw, "`", "`" }, /* Dv */
129 { NULL, md_pre_raw, md_post_raw, "`", "`" }, /* Er */
130 { NULL, md_pre_raw, md_post_raw, "`", "`" }, /* Ev */
131 { NULL, NULL, NULL, NULL, NULL }, /* Ex */
132 { NULL, md_pre_Fa, md_post_Fa, NULL, NULL }, /* Fa */
133 { NULL, md_pre_Fd, md_post_Fd, "**", "**" }, /* Fd */
134 { NULL, md_pre_raw, md_post_Fl, "**-", "**" }, /* Fl */
135 { NULL, md_pre_Fn, md_post_Fn, NULL, NULL }, /* Fn */
136 { NULL, md_pre_Fd, md_post_raw, "*", "*" }, /* Ft */
137 { NULL, md_pre_raw, md_post_raw, "**", "**" }, /* Ic */
138 { NULL, md_pre_In, md_post_In, NULL, NULL }, /* In */
139 { NULL, md_pre_raw, md_post_raw, "`", "`" }, /* Li */
140 { md_cond_head, md_pre_Nd, NULL, NULL, NULL }, /* Nd */
141 { NULL, md_pre_Nm, md_post_Nm, "**", "**" }, /* Nm */
142 { md_cond_body, md_pre_word, md_post_word, "[", "]" }, /* Op */
143 { NULL, md_pre_abort, NULL, NULL, NULL }, /* Ot */
144 { NULL, md_pre_raw, md_post_raw, "*", "*" }, /* Pa */
145 { NULL, NULL, NULL, NULL, NULL }, /* Rv */
146 { NULL, NULL, NULL, NULL, NULL }, /* St */
147 { NULL, md_pre_raw, md_post_raw, "*", "*" }, /* Va */
148 { NULL, md_pre_Vt, md_post_Vt, "*", "*" }, /* Vt */
149 { NULL, md_pre_Xr, NULL, NULL, NULL }, /* Xr */
150 { NULL, NULL, md_post_pc, NULL, NULL }, /* %A */
151 { NULL, md_pre_raw, md_post_pc, "*", "*" }, /* %B */
152 { NULL, NULL, md_post_pc, NULL, NULL }, /* %D */
153 { NULL, md_pre_raw, md_post_pc, "*", "*" }, /* %I */
154 { NULL, md_pre_raw, md_post_pc, "*", "*" }, /* %J */
155 { NULL, NULL, md_post_pc, NULL, NULL }, /* %N */
156 { NULL, NULL, md_post_pc, NULL, NULL }, /* %O */
157 { NULL, NULL, md_post_pc, NULL, NULL }, /* %P */
158 { NULL, NULL, md_post_pc, NULL, NULL }, /* %R */
159 { NULL, md_pre__T, md_post__T, NULL, NULL }, /* %T */
160 { NULL, NULL, md_post_pc, NULL, NULL }, /* %V */
161 { NULL, NULL, NULL, NULL, NULL }, /* Ac */
162 { md_cond_body, md_pre_word, md_post_word, "<", ">" }, /* Ao */
163 { md_cond_body, md_pre_word, md_post_word, "<", ">" }, /* Aq */
164 { NULL, NULL, NULL, NULL, NULL }, /* At */
165 { NULL, NULL, NULL, NULL, NULL }, /* Bc */
166 { NULL, NULL, NULL, NULL, NULL }, /* Bf XXX not implemented */
167 { md_cond_body, md_pre_word, md_post_word, "[", "]" }, /* Bo */
168 { md_cond_body, md_pre_word, md_post_word, "[", "]" }, /* Bq */
169 { NULL, NULL, NULL, NULL, NULL }, /* Bsx */
170 { NULL, NULL, NULL, NULL, NULL }, /* Bx */
171 { NULL, NULL, NULL, NULL, NULL }, /* Db */
172 { NULL, NULL, NULL, NULL, NULL }, /* Dc */
173 { md_cond_body, md_pre_word, md_post_word, "\"", "\"" }, /* Do */
174 { md_cond_body, md_pre_word, md_post_word, "\"", "\"" }, /* Dq */
175 { NULL, NULL, NULL, NULL, NULL }, /* Ec */
176 { NULL, NULL, NULL, NULL, NULL }, /* Ef */
177 { NULL, md_pre_raw, md_post_raw, "*", "*" }, /* Em */
178 { md_cond_body, md_pre_Eo, md_post_Eo, NULL, NULL }, /* Eo */
179 { NULL, NULL, NULL, NULL, NULL }, /* Fx */
180 { NULL, md_pre_raw, md_post_raw, "**", "**" }, /* Ms */
181 { NULL, md_pre_No, NULL, NULL, NULL }, /* No */
182 { NULL, md_pre_Ns, NULL, NULL, NULL }, /* Ns */
183 { NULL, NULL, NULL, NULL, NULL }, /* Nx */
184 { NULL, NULL, NULL, NULL, NULL }, /* Ox */
185 { NULL, NULL, NULL, NULL, NULL }, /* Pc */
186 { NULL, NULL, md_post_Pf, NULL, NULL }, /* Pf */
187 { md_cond_body, md_pre_word, md_post_word, "(", ")" }, /* Po */
188 { md_cond_body, md_pre_word, md_post_word, "(", ")" }, /* Pq */
189 { NULL, NULL, NULL, NULL, NULL }, /* Qc */
190 { md_cond_body, md_pre_raw, md_post_raw, "'`", "`'" }, /* Ql */
191 { md_cond_body, md_pre_word, md_post_word, "\"", "\"" }, /* Qo */
192 { md_cond_body, md_pre_word, md_post_word, "\"", "\"" }, /* Qq */
193 { NULL, NULL, NULL, NULL, NULL }, /* Re */
194 { md_cond_body, md_pre_Rs, NULL, NULL, NULL }, /* Rs */
195 { NULL, NULL, NULL, NULL, NULL }, /* Sc */
196 { md_cond_body, md_pre_word, md_post_word, "'", "'" }, /* So */
197 { md_cond_body, md_pre_word, md_post_word, "'", "'" }, /* Sq */
198 { NULL, md_pre_Sm, NULL, NULL, NULL }, /* Sm */
199 { NULL, md_pre_raw, md_post_raw, "*", "*" }, /* Sx */
200 { NULL, md_pre_raw, md_post_raw, "**", "**" }, /* Sy */
201 { NULL, md_pre_raw, md_post_raw, "`", "`" }, /* Tn */
202 { NULL, NULL, NULL, NULL, NULL }, /* Ux */
203 { NULL, NULL, NULL, NULL, NULL }, /* Xc */
204 { NULL, NULL, NULL, NULL, NULL }, /* Xo */
205 { NULL, md_pre_Fo, md_post_Fo, "**", "**" }, /* Fo */
206 { NULL, NULL, NULL, NULL, NULL }, /* Fc */
207 { md_cond_body, md_pre_word, md_post_word, "[", "]" }, /* Oo */
208 { NULL, NULL, NULL, NULL, NULL }, /* Oc */
209 { NULL, md_pre_Bk, md_post_Bk, NULL, NULL }, /* Bk */
210 { NULL, NULL, NULL, NULL, NULL }, /* Ek */
211 { NULL, NULL, NULL, NULL, NULL }, /* Bt */
212 { NULL, NULL, NULL, NULL, NULL }, /* Hf */
213 { NULL, md_pre_raw, md_post_raw, "*", "*" }, /* Fr */
214 { NULL, NULL, NULL, NULL, NULL }, /* Ud */
215 { NULL, NULL, md_post_Lb, NULL, NULL }, /* Lb */
216 { NULL, md_pre_abort, NULL, NULL, NULL }, /* Lp */
217 { NULL, md_pre_Lk, NULL, NULL, NULL }, /* Lk */
218 { NULL, md_pre_Mt, NULL, NULL, NULL }, /* Mt */
219 { md_cond_body, md_pre_word, md_post_word, "{", "}" }, /* Brq */
220 { md_cond_body, md_pre_word, md_post_word, "{", "}" }, /* Bro */
221 { NULL, NULL, NULL, NULL, NULL }, /* Brc */
222 { NULL, NULL, md_post_pc, NULL, NULL }, /* %C */
223 { NULL, md_pre_skip, NULL, NULL, NULL }, /* Es */
224 { md_cond_body, md_pre_En, md_post_En, NULL, NULL }, /* En */
225 { NULL, NULL, NULL, NULL, NULL }, /* Dx */
226 { NULL, NULL, md_post_pc, NULL, NULL }, /* %Q */
227 { NULL, md_pre_Lk, md_post_pc, NULL, NULL }, /* %U */
228 { NULL, NULL, NULL, NULL, NULL }, /* Ta */
230 static const struct md_act *md_act(enum roff_tok);
232 static int outflags;
233 #define MD_spc (1 << 0) /* Blank character before next word. */
234 #define MD_spc_force (1 << 1) /* Even before trailing punctuation. */
235 #define MD_nonl (1 << 2) /* Prevent linebreak in markdown code. */
236 #define MD_nl (1 << 3) /* Break markdown code line. */
237 #define MD_br (1 << 4) /* Insert an output line break. */
238 #define MD_sp (1 << 5) /* Insert a paragraph break. */
239 #define MD_Sm (1 << 6) /* Horizontal spacing mode. */
240 #define MD_Bk (1 << 7) /* Word keep mode. */
241 #define MD_An_split (1 << 8) /* Author mode is "split". */
242 #define MD_An_nosplit (1 << 9) /* Author mode is "nosplit". */
244 static int escflags; /* Escape in generated markdown code: */
245 #define ESC_BOL (1 << 0) /* "#*+-" near the beginning of a line. */
246 #define ESC_NUM (1 << 1) /* "." after a leading number. */
247 #define ESC_HYP (1 << 2) /* "(" immediately after "]". */
248 #define ESC_SQU (1 << 4) /* "]" when "[" is open. */
249 #define ESC_FON (1 << 5) /* "*" immediately after unrelated "*". */
250 #define ESC_EOL (1 << 6) /* " " at the and of a line. */
252 static int code_blocks, quote_blocks, list_blocks;
253 static int outcount;
256 static const struct md_act *
257 md_act(enum roff_tok tok)
259 assert(tok >= MDOC_Dd && tok <= MDOC_MAX);
260 return md_acts + (tok - MDOC_Dd);
263 void
264 markdown_mdoc(void *arg, const struct roff_meta *mdoc)
266 outflags = MD_Sm;
267 md_word(mdoc->title);
268 if (mdoc->msec != NULL) {
269 outflags &= ~MD_spc;
270 md_word("(");
271 md_word(mdoc->msec);
272 md_word(")");
274 md_word("-");
275 md_word(mdoc->vol);
276 if (mdoc->arch != NULL) {
277 md_word("(");
278 md_word(mdoc->arch);
279 md_word(")");
281 outflags |= MD_sp;
283 md_nodelist(mdoc->first->child);
285 outflags |= MD_sp;
286 md_word(mdoc->os);
287 md_word("-");
288 md_word(mdoc->date);
289 putchar('\n');
292 static void
293 md_nodelist(struct roff_node *n)
295 while (n != NULL) {
296 md_node(n);
297 n = n->next;
301 static void
302 md_node(struct roff_node *n)
304 const struct md_act *act;
305 int cond, process_children;
307 if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT)
308 return;
310 if (outflags & MD_nonl)
311 outflags &= ~(MD_nl | MD_sp);
312 else if (outflags & MD_spc && n->flags & NODE_LINE)
313 outflags |= MD_nl;
315 act = NULL;
316 cond = 0;
317 process_children = 1;
318 n->flags &= ~NODE_ENDED;
320 if (n->type == ROFFT_TEXT) {
321 if (n->flags & NODE_DELIMC)
322 outflags &= ~(MD_spc | MD_spc_force);
323 else if (outflags & MD_Sm)
324 outflags |= MD_spc_force;
325 md_word(n->string);
326 if (n->flags & NODE_DELIMO)
327 outflags &= ~(MD_spc | MD_spc_force);
328 else if (outflags & MD_Sm)
329 outflags |= MD_spc;
330 } else if (n->tok < ROFF_MAX) {
331 switch (n->tok) {
332 case ROFF_br:
333 process_children = md_pre_br(n);
334 break;
335 case ROFF_sp:
336 process_children = md_pre_Pp(n);
337 break;
338 default:
339 process_children = 0;
340 break;
342 } else {
343 act = md_act(n->tok);
344 cond = act->cond == NULL || (*act->cond)(n);
345 if (cond && act->pre != NULL &&
346 (n->end == ENDBODY_NOT || n->child != NULL))
347 process_children = (*act->pre)(n);
350 if (process_children && n->child != NULL)
351 md_nodelist(n->child);
353 if (n->flags & NODE_ENDED)
354 return;
356 if (cond && act->post != NULL)
357 (*act->post)(n);
359 if (n->end != ENDBODY_NOT)
360 n->body->flags |= NODE_ENDED;
363 static const char *
364 md_stack(char c)
366 static char *stack;
367 static size_t sz;
368 static size_t cur;
370 switch (c) {
371 case '\0':
372 break;
373 case (char)-1:
374 assert(cur);
375 stack[--cur] = '\0';
376 break;
377 default:
378 if (cur + 1 >= sz) {
379 sz += 8;
380 stack = mandoc_realloc(stack, sz);
382 stack[cur] = c;
383 stack[++cur] = '\0';
384 break;
386 return stack == NULL ? "" : stack;
390 * Handle vertical and horizontal spacing.
392 static void
393 md_preword(void)
395 const char *cp;
398 * If a list block is nested inside a code block or a blockquote,
399 * blank lines for paragraph breaks no longer work; instead,
400 * they terminate the list. Work around this markdown issue
401 * by using mere line breaks instead.
404 if (list_blocks && outflags & MD_sp) {
405 outflags &= ~MD_sp;
406 outflags |= MD_br;
410 * End the old line if requested.
411 * Escape whitespace at the end of the markdown line
412 * such that it won't look like an output line break.
415 if (outflags & MD_sp)
416 putchar('\n');
417 else if (outflags & MD_br) {
418 putchar(' ');
419 putchar(' ');
420 } else if (outflags & MD_nl && escflags & ESC_EOL)
421 md_named("zwnj");
423 /* Start a new line if necessary. */
425 if (outflags & (MD_nl | MD_br | MD_sp)) {
426 putchar('\n');
427 for (cp = md_stack('\0'); *cp != '\0'; cp++) {
428 putchar(*cp);
429 if (*cp == '>')
430 putchar(' ');
432 outflags &= ~(MD_nl | MD_br | MD_sp);
433 escflags = ESC_BOL;
434 outcount = 0;
436 /* Handle horizontal spacing. */
438 } else if (outflags & MD_spc) {
439 if (outflags & MD_Bk)
440 fputs("&nbsp;", stdout);
441 else
442 putchar(' ');
443 escflags &= ~ESC_FON;
444 outcount++;
447 outflags &= ~(MD_spc_force | MD_nonl);
448 if (outflags & MD_Sm)
449 outflags |= MD_spc;
450 else
451 outflags &= ~MD_spc;
455 * Print markdown syntax elements.
456 * Can also be used for constant strings when neither escaping
457 * nor delimiter handling is required.
459 static void
460 md_rawword(const char *s)
462 md_preword();
464 if (*s == '\0')
465 return;
467 if (escflags & ESC_FON) {
468 escflags &= ~ESC_FON;
469 if (*s == '*' && !code_blocks)
470 fputs("&zwnj;", stdout);
473 while (*s != '\0') {
474 switch(*s) {
475 case '*':
476 if (s[1] == '\0')
477 escflags |= ESC_FON;
478 break;
479 case '[':
480 escflags |= ESC_SQU;
481 break;
482 case ']':
483 escflags |= ESC_HYP;
484 escflags &= ~ESC_SQU;
485 break;
486 default:
487 break;
489 md_char(*s++);
491 if (s[-1] == ' ')
492 escflags |= ESC_EOL;
493 else
494 escflags &= ~ESC_EOL;
498 * Print text and mdoc(7) syntax elements.
500 static void
501 md_word(const char *s)
503 const char *seq, *prevfont, *currfont, *nextfont;
504 char c;
505 int bs, sz, uc, breakline;
507 /* No spacing before closing delimiters. */
508 if (s[0] != '\0' && s[1] == '\0' &&
509 strchr("!),.:;?]", s[0]) != NULL &&
510 (outflags & MD_spc_force) == 0)
511 outflags &= ~MD_spc;
513 md_preword();
515 if (*s == '\0')
516 return;
518 /* No spacing after opening delimiters. */
519 if ((s[0] == '(' || s[0] == '[') && s[1] == '\0')
520 outflags &= ~MD_spc;
522 breakline = 0;
523 prevfont = currfont = "";
524 while ((c = *s++) != '\0') {
525 bs = 0;
526 switch(c) {
527 case ASCII_NBRSP:
528 if (code_blocks)
529 c = ' ';
530 else {
531 md_named("nbsp");
532 c = '\0';
534 break;
535 case ASCII_HYPH:
536 bs = escflags & ESC_BOL && !code_blocks;
537 c = '-';
538 break;
539 case ASCII_BREAK:
540 continue;
541 case '#':
542 case '+':
543 case '-':
544 bs = escflags & ESC_BOL && !code_blocks;
545 break;
546 case '(':
547 bs = escflags & ESC_HYP && !code_blocks;
548 break;
549 case ')':
550 bs = escflags & ESC_NUM && !code_blocks;
551 break;
552 case '*':
553 case '[':
554 case '_':
555 case '`':
556 bs = !code_blocks;
557 break;
558 case '.':
559 bs = escflags & ESC_NUM && !code_blocks;
560 break;
561 case '<':
562 if (code_blocks == 0) {
563 md_named("lt");
564 c = '\0';
566 break;
567 case '=':
568 if (escflags & ESC_BOL && !code_blocks) {
569 md_named("equals");
570 c = '\0';
572 break;
573 case '>':
574 if (code_blocks == 0) {
575 md_named("gt");
576 c = '\0';
578 break;
579 case '\\':
580 uc = 0;
581 nextfont = NULL;
582 switch (mandoc_escape(&s, &seq, &sz)) {
583 case ESCAPE_UNICODE:
584 uc = mchars_num2uc(seq + 1, sz - 1);
585 break;
586 case ESCAPE_NUMBERED:
587 uc = mchars_num2char(seq, sz);
588 break;
589 case ESCAPE_SPECIAL:
590 uc = mchars_spec2cp(seq, sz);
591 break;
592 case ESCAPE_UNDEF:
593 uc = *seq;
594 break;
595 case ESCAPE_DEVICE:
596 md_rawword("markdown");
597 continue;
598 case ESCAPE_FONTBOLD:
599 nextfont = "**";
600 break;
601 case ESCAPE_FONTITALIC:
602 nextfont = "*";
603 break;
604 case ESCAPE_FONTBI:
605 nextfont = "***";
606 break;
607 case ESCAPE_FONT:
608 case ESCAPE_FONTCW:
609 case ESCAPE_FONTROMAN:
610 nextfont = "";
611 break;
612 case ESCAPE_FONTPREV:
613 nextfont = prevfont;
614 break;
615 case ESCAPE_BREAK:
616 breakline = 1;
617 break;
618 case ESCAPE_NOSPACE:
619 case ESCAPE_SKIPCHAR:
620 case ESCAPE_OVERSTRIKE:
621 /* XXX not implemented */
622 /* FALLTHROUGH */
623 case ESCAPE_ERROR:
624 default:
625 break;
627 if (nextfont != NULL && !code_blocks) {
628 if (*currfont != '\0') {
629 outflags &= ~MD_spc;
630 md_rawword(currfont);
632 prevfont = currfont;
633 currfont = nextfont;
634 if (*currfont != '\0') {
635 outflags &= ~MD_spc;
636 md_rawword(currfont);
639 if (uc) {
640 if ((uc < 0x20 && uc != 0x09) ||
641 (uc > 0x7E && uc < 0xA0))
642 uc = 0xFFFD;
643 if (code_blocks) {
644 seq = mchars_uc2str(uc);
645 fputs(seq, stdout);
646 outcount += strlen(seq);
647 } else {
648 printf("&#%d;", uc);
649 outcount++;
651 escflags &= ~ESC_FON;
653 c = '\0';
654 break;
655 case ']':
656 bs = escflags & ESC_SQU && !code_blocks;
657 escflags |= ESC_HYP;
658 break;
659 default:
660 break;
662 if (bs)
663 putchar('\\');
664 md_char(c);
665 if (breakline &&
666 (*s == '\0' || *s == ' ' || *s == ASCII_NBRSP)) {
667 printf(" \n");
668 breakline = 0;
669 while (*s == ' ' || *s == ASCII_NBRSP)
670 s++;
673 if (*currfont != '\0') {
674 outflags &= ~MD_spc;
675 md_rawword(currfont);
676 } else if (s[-2] == ' ')
677 escflags |= ESC_EOL;
678 else
679 escflags &= ~ESC_EOL;
683 * Print a single HTML named character reference.
685 static void
686 md_named(const char *s)
688 printf("&%s;", s);
689 escflags &= ~(ESC_FON | ESC_EOL);
690 outcount++;
694 * Print a single raw character and maintain certain escape flags.
696 static void
697 md_char(unsigned char c)
699 if (c != '\0') {
700 putchar(c);
701 if (c == '*')
702 escflags |= ESC_FON;
703 else
704 escflags &= ~ESC_FON;
705 outcount++;
707 if (c != ']')
708 escflags &= ~ESC_HYP;
709 if (c == ' ' || c == '\t' || c == '>')
710 return;
711 if (isdigit(c) == 0)
712 escflags &= ~ESC_NUM;
713 else if (escflags & ESC_BOL)
714 escflags |= ESC_NUM;
715 escflags &= ~ESC_BOL;
718 static int
719 md_cond_head(struct roff_node *n)
721 return n->type == ROFFT_HEAD;
724 static int
725 md_cond_body(struct roff_node *n)
727 return n->type == ROFFT_BODY;
730 static int
731 md_pre_abort(struct roff_node *n)
733 abort();
736 static int
737 md_pre_raw(struct roff_node *n)
739 const char *prefix;
741 if ((prefix = md_act(n->tok)->prefix) != NULL) {
742 md_rawword(prefix);
743 outflags &= ~MD_spc;
744 if (*prefix == '`')
745 code_blocks++;
747 return 1;
750 static void
751 md_post_raw(struct roff_node *n)
753 const char *suffix;
755 if ((suffix = md_act(n->tok)->suffix) != NULL) {
756 outflags &= ~(MD_spc | MD_nl);
757 md_rawword(suffix);
758 if (*suffix == '`')
759 code_blocks--;
763 static int
764 md_pre_word(struct roff_node *n)
766 const char *prefix;
768 if ((prefix = md_act(n->tok)->prefix) != NULL) {
769 md_word(prefix);
770 outflags &= ~MD_spc;
772 return 1;
775 static void
776 md_post_word(struct roff_node *n)
778 const char *suffix;
780 if ((suffix = md_act(n->tok)->suffix) != NULL) {
781 outflags &= ~(MD_spc | MD_nl);
782 md_word(suffix);
786 static void
787 md_post_pc(struct roff_node *n)
789 md_post_raw(n);
790 if (n->parent->tok != MDOC_Rs)
791 return;
792 if (n->next != NULL) {
793 md_word(",");
794 if (n->prev != NULL &&
795 n->prev->tok == n->tok &&
796 n->next->tok == n->tok)
797 md_word("and");
798 } else {
799 md_word(".");
800 outflags |= MD_nl;
804 static int
805 md_pre_skip(struct roff_node *n)
807 return 0;
810 static void
811 md_pre_syn(struct roff_node *n)
813 if (n->prev == NULL || ! (n->flags & NODE_SYNPRETTY))
814 return;
816 if (n->prev->tok == n->tok &&
817 n->tok != MDOC_Ft &&
818 n->tok != MDOC_Fo &&
819 n->tok != MDOC_Fn) {
820 outflags |= MD_br;
821 return;
824 switch (n->prev->tok) {
825 case MDOC_Fd:
826 case MDOC_Fn:
827 case MDOC_Fo:
828 case MDOC_In:
829 case MDOC_Vt:
830 outflags |= MD_sp;
831 break;
832 case MDOC_Ft:
833 if (n->tok != MDOC_Fn && n->tok != MDOC_Fo) {
834 outflags |= MD_sp;
835 break;
837 /* FALLTHROUGH */
838 default:
839 outflags |= MD_br;
840 break;
844 static int
845 md_pre_An(struct roff_node *n)
847 switch (n->norm->An.auth) {
848 case AUTH_split:
849 outflags &= ~MD_An_nosplit;
850 outflags |= MD_An_split;
851 return 0;
852 case AUTH_nosplit:
853 outflags &= ~MD_An_split;
854 outflags |= MD_An_nosplit;
855 return 0;
856 default:
857 if (outflags & MD_An_split)
858 outflags |= MD_br;
859 else if (n->sec == SEC_AUTHORS &&
860 ! (outflags & MD_An_nosplit))
861 outflags |= MD_An_split;
862 return 1;
866 static int
867 md_pre_Ap(struct roff_node *n)
869 outflags &= ~MD_spc;
870 md_word("'");
871 outflags &= ~MD_spc;
872 return 0;
875 static int
876 md_pre_Bd(struct roff_node *n)
878 switch (n->norm->Bd.type) {
879 case DISP_unfilled:
880 case DISP_literal:
881 return md_pre_Dl(n);
882 default:
883 return md_pre_D1(n);
887 static int
888 md_pre_Bk(struct roff_node *n)
890 switch (n->type) {
891 case ROFFT_BLOCK:
892 return 1;
893 case ROFFT_BODY:
894 outflags |= MD_Bk;
895 return 1;
896 default:
897 return 0;
901 static void
902 md_post_Bk(struct roff_node *n)
904 if (n->type == ROFFT_BODY)
905 outflags &= ~MD_Bk;
908 static int
909 md_pre_Bl(struct roff_node *n)
911 n->norm->Bl.count = 0;
912 if (n->norm->Bl.type == LIST_column)
913 md_pre_Dl(n);
914 outflags |= MD_sp;
915 return 1;
918 static void
919 md_post_Bl(struct roff_node *n)
921 n->norm->Bl.count = 0;
922 if (n->norm->Bl.type == LIST_column)
923 md_post_D1(n);
924 outflags |= MD_sp;
927 static int
928 md_pre_D1(struct roff_node *n)
931 * Markdown blockquote syntax does not work inside code blocks.
932 * The best we can do is fall back to another nested code block.
934 if (code_blocks) {
935 md_stack('\t');
936 code_blocks++;
937 } else {
938 md_stack('>');
939 quote_blocks++;
941 outflags |= MD_sp;
942 return 1;
945 static void
946 md_post_D1(struct roff_node *n)
948 md_stack((char)-1);
949 if (code_blocks)
950 code_blocks--;
951 else
952 quote_blocks--;
953 outflags |= MD_sp;
956 static int
957 md_pre_Dl(struct roff_node *n)
960 * Markdown code block syntax does not work inside blockquotes.
961 * The best we can do is fall back to another nested blockquote.
963 if (quote_blocks) {
964 md_stack('>');
965 quote_blocks++;
966 } else {
967 md_stack('\t');
968 code_blocks++;
970 outflags |= MD_sp;
971 return 1;
974 static int
975 md_pre_En(struct roff_node *n)
977 if (n->norm->Es == NULL ||
978 n->norm->Es->child == NULL)
979 return 1;
981 md_word(n->norm->Es->child->string);
982 outflags &= ~MD_spc;
983 return 1;
986 static void
987 md_post_En(struct roff_node *n)
989 if (n->norm->Es == NULL ||
990 n->norm->Es->child == NULL ||
991 n->norm->Es->child->next == NULL)
992 return;
994 outflags &= ~MD_spc;
995 md_word(n->norm->Es->child->next->string);
998 static int
999 md_pre_Eo(struct roff_node *n)
1001 if (n->end == ENDBODY_NOT &&
1002 n->parent->head->child == NULL &&
1003 n->child != NULL &&
1004 n->child->end != ENDBODY_NOT)
1005 md_preword();
1006 else if (n->end != ENDBODY_NOT ? n->child != NULL :
1007 n->parent->head->child != NULL && (n->child != NULL ||
1008 (n->parent->tail != NULL && n->parent->tail->child != NULL)))
1009 outflags &= ~(MD_spc | MD_nl);
1010 return 1;
1013 static void
1014 md_post_Eo(struct roff_node *n)
1016 if (n->end != ENDBODY_NOT) {
1017 outflags |= MD_spc;
1018 return;
1021 if (n->child == NULL && n->parent->head->child == NULL)
1022 return;
1024 if (n->parent->tail != NULL && n->parent->tail->child != NULL)
1025 outflags &= ~MD_spc;
1026 else
1027 outflags |= MD_spc;
1030 static int
1031 md_pre_Fa(struct roff_node *n)
1033 int am_Fa;
1035 am_Fa = n->tok == MDOC_Fa;
1037 if (am_Fa)
1038 n = n->child;
1040 while (n != NULL) {
1041 md_rawword("*");
1042 outflags &= ~MD_spc;
1043 md_node(n);
1044 outflags &= ~MD_spc;
1045 md_rawword("*");
1046 if ((n = n->next) != NULL)
1047 md_word(",");
1049 return 0;
1052 static void
1053 md_post_Fa(struct roff_node *n)
1055 if (n->next != NULL && n->next->tok == MDOC_Fa)
1056 md_word(",");
1059 static int
1060 md_pre_Fd(struct roff_node *n)
1062 md_pre_syn(n);
1063 md_pre_raw(n);
1064 return 1;
1067 static void
1068 md_post_Fd(struct roff_node *n)
1070 md_post_raw(n);
1071 outflags |= MD_br;
1074 static void
1075 md_post_Fl(struct roff_node *n)
1077 md_post_raw(n);
1078 if (n->child == NULL && n->next != NULL &&
1079 n->next->type != ROFFT_TEXT && !(n->next->flags & NODE_LINE))
1080 outflags &= ~MD_spc;
1083 static int
1084 md_pre_Fn(struct roff_node *n)
1086 md_pre_syn(n);
1088 if ((n = n->child) == NULL)
1089 return 0;
1091 md_rawword("**");
1092 outflags &= ~MD_spc;
1093 md_node(n);
1094 outflags &= ~MD_spc;
1095 md_rawword("**");
1096 outflags &= ~MD_spc;
1097 md_word("(");
1099 if ((n = n->next) != NULL)
1100 md_pre_Fa(n);
1101 return 0;
1104 static void
1105 md_post_Fn(struct roff_node *n)
1107 md_word(")");
1108 if (n->flags & NODE_SYNPRETTY) {
1109 md_word(";");
1110 outflags |= MD_sp;
1114 static int
1115 md_pre_Fo(struct roff_node *n)
1117 switch (n->type) {
1118 case ROFFT_BLOCK:
1119 md_pre_syn(n);
1120 break;
1121 case ROFFT_HEAD:
1122 if (n->child == NULL)
1123 return 0;
1124 md_pre_raw(n);
1125 break;
1126 case ROFFT_BODY:
1127 outflags &= ~(MD_spc | MD_nl);
1128 md_word("(");
1129 break;
1130 default:
1131 break;
1133 return 1;
1136 static void
1137 md_post_Fo(struct roff_node *n)
1139 switch (n->type) {
1140 case ROFFT_HEAD:
1141 if (n->child != NULL)
1142 md_post_raw(n);
1143 break;
1144 case ROFFT_BODY:
1145 md_post_Fn(n);
1146 break;
1147 default:
1148 break;
1152 static int
1153 md_pre_In(struct roff_node *n)
1155 if (n->flags & NODE_SYNPRETTY) {
1156 md_pre_syn(n);
1157 md_rawword("**");
1158 outflags &= ~MD_spc;
1159 md_word("#include <");
1160 } else {
1161 md_word("<");
1162 outflags &= ~MD_spc;
1163 md_rawword("*");
1165 outflags &= ~MD_spc;
1166 return 1;
1169 static void
1170 md_post_In(struct roff_node *n)
1172 if (n->flags & NODE_SYNPRETTY) {
1173 outflags &= ~MD_spc;
1174 md_rawword(">**");
1175 outflags |= MD_nl;
1176 } else {
1177 outflags &= ~MD_spc;
1178 md_rawword("*>");
1182 static int
1183 md_pre_It(struct roff_node *n)
1185 struct roff_node *bln;
1187 switch (n->type) {
1188 case ROFFT_BLOCK:
1189 return 1;
1191 case ROFFT_HEAD:
1192 bln = n->parent->parent;
1193 if (bln->norm->Bl.comp == 0 &&
1194 bln->norm->Bl.type != LIST_column)
1195 outflags |= MD_sp;
1196 outflags |= MD_nl;
1198 switch (bln->norm->Bl.type) {
1199 case LIST_item:
1200 outflags |= MD_br;
1201 return 0;
1202 case LIST_inset:
1203 case LIST_diag:
1204 case LIST_ohang:
1205 outflags |= MD_br;
1206 return 1;
1207 case LIST_tag:
1208 case LIST_hang:
1209 outflags |= MD_sp;
1210 return 1;
1211 case LIST_bullet:
1212 md_rawword("*\t");
1213 break;
1214 case LIST_dash:
1215 case LIST_hyphen:
1216 md_rawword("-\t");
1217 break;
1218 case LIST_enum:
1219 md_preword();
1220 if (bln->norm->Bl.count < 99)
1221 bln->norm->Bl.count++;
1222 printf("%d.\t", bln->norm->Bl.count);
1223 escflags &= ~ESC_FON;
1224 break;
1225 case LIST_column:
1226 outflags |= MD_br;
1227 return 0;
1228 default:
1229 return 0;
1231 outflags &= ~MD_spc;
1232 outflags |= MD_nonl;
1233 outcount = 0;
1234 md_stack('\t');
1235 if (code_blocks || quote_blocks)
1236 list_blocks++;
1237 return 0;
1239 case ROFFT_BODY:
1240 bln = n->parent->parent;
1241 switch (bln->norm->Bl.type) {
1242 case LIST_ohang:
1243 outflags |= MD_br;
1244 break;
1245 case LIST_tag:
1246 case LIST_hang:
1247 md_pre_D1(n);
1248 break;
1249 default:
1250 break;
1252 return 1;
1254 default:
1255 return 0;
1259 static void
1260 md_post_It(struct roff_node *n)
1262 struct roff_node *bln;
1263 int i, nc;
1265 if (n->type != ROFFT_BODY)
1266 return;
1268 bln = n->parent->parent;
1269 switch (bln->norm->Bl.type) {
1270 case LIST_bullet:
1271 case LIST_dash:
1272 case LIST_hyphen:
1273 case LIST_enum:
1274 md_stack((char)-1);
1275 if (code_blocks || quote_blocks)
1276 list_blocks--;
1277 break;
1278 case LIST_tag:
1279 case LIST_hang:
1280 md_post_D1(n);
1281 break;
1283 case LIST_column:
1284 if (n->next == NULL)
1285 break;
1287 /* Calculate the array index of the current column. */
1289 i = 0;
1290 while ((n = n->prev) != NULL && n->type != ROFFT_HEAD)
1291 i++;
1294 * If a width was specified for this column,
1295 * subtract what printed, and
1296 * add the same spacing as in mdoc_term.c.
1299 nc = bln->norm->Bl.ncols;
1300 i = i < nc ? strlen(bln->norm->Bl.cols[i]) - outcount +
1301 (nc < 5 ? 4 : nc == 5 ? 3 : 1) : 1;
1302 if (i < 1)
1303 i = 1;
1304 while (i-- > 0)
1305 putchar(' ');
1307 outflags &= ~MD_spc;
1308 escflags &= ~ESC_FON;
1309 outcount = 0;
1310 break;
1312 default:
1313 break;
1317 static void
1318 md_post_Lb(struct roff_node *n)
1320 if (n->sec == SEC_LIBRARY)
1321 outflags |= MD_br;
1324 static void
1325 md_uri(const char *s)
1327 while (*s != '\0') {
1328 if (strchr("%()<>", *s) != NULL) {
1329 printf("%%%2.2hhX", *s);
1330 outcount += 3;
1331 } else {
1332 putchar(*s);
1333 outcount++;
1335 s++;
1339 static int
1340 md_pre_Lk(struct roff_node *n)
1342 const struct roff_node *link, *descr, *punct;
1344 if ((link = n->child) == NULL)
1345 return 0;
1347 /* Find beginning of trailing punctuation. */
1348 punct = n->last;
1349 while (punct != link && punct->flags & NODE_DELIMC)
1350 punct = punct->prev;
1351 punct = punct->next;
1353 /* Link text. */
1354 descr = link->next;
1355 if (descr == punct)
1356 descr = link; /* no text */
1357 md_rawword("[");
1358 outflags &= ~MD_spc;
1359 do {
1360 md_word(descr->string);
1361 descr = descr->next;
1362 } while (descr != punct);
1363 outflags &= ~MD_spc;
1365 /* Link target. */
1366 md_rawword("](");
1367 md_uri(link->string);
1368 outflags &= ~MD_spc;
1369 md_rawword(")");
1371 /* Trailing punctuation. */
1372 while (punct != NULL) {
1373 md_word(punct->string);
1374 punct = punct->next;
1376 return 0;
1379 static int
1380 md_pre_Mt(struct roff_node *n)
1382 const struct roff_node *nch;
1384 md_rawword("[");
1385 outflags &= ~MD_spc;
1386 for (nch = n->child; nch != NULL; nch = nch->next)
1387 md_word(nch->string);
1388 outflags &= ~MD_spc;
1389 md_rawword("](mailto:");
1390 for (nch = n->child; nch != NULL; nch = nch->next) {
1391 md_uri(nch->string);
1392 if (nch->next != NULL) {
1393 putchar(' ');
1394 outcount++;
1397 outflags &= ~MD_spc;
1398 md_rawword(")");
1399 return 0;
1402 static int
1403 md_pre_Nd(struct roff_node *n)
1405 outflags &= ~MD_nl;
1406 outflags |= MD_spc;
1407 md_word("-");
1408 return 1;
1411 static int
1412 md_pre_Nm(struct roff_node *n)
1414 switch (n->type) {
1415 case ROFFT_BLOCK:
1416 outflags |= MD_Bk;
1417 md_pre_syn(n);
1418 break;
1419 case ROFFT_HEAD:
1420 case ROFFT_ELEM:
1421 md_pre_raw(n);
1422 break;
1423 default:
1424 break;
1426 return 1;
1429 static void
1430 md_post_Nm(struct roff_node *n)
1432 switch (n->type) {
1433 case ROFFT_BLOCK:
1434 outflags &= ~MD_Bk;
1435 break;
1436 case ROFFT_HEAD:
1437 case ROFFT_ELEM:
1438 md_post_raw(n);
1439 break;
1440 default:
1441 break;
1445 static int
1446 md_pre_No(struct roff_node *n)
1448 outflags |= MD_spc_force;
1449 return 1;
1452 static int
1453 md_pre_Ns(struct roff_node *n)
1455 outflags &= ~MD_spc;
1456 return 0;
1459 static void
1460 md_post_Pf(struct roff_node *n)
1462 if (n->next != NULL && (n->next->flags & NODE_LINE) == 0)
1463 outflags &= ~MD_spc;
1466 static int
1467 md_pre_Pp(struct roff_node *n)
1469 outflags |= MD_sp;
1470 return 0;
1473 static int
1474 md_pre_Rs(struct roff_node *n)
1476 if (n->sec == SEC_SEE_ALSO)
1477 outflags |= MD_sp;
1478 return 1;
1481 static int
1482 md_pre_Sh(struct roff_node *n)
1484 switch (n->type) {
1485 case ROFFT_BLOCK:
1486 if (n->sec == SEC_AUTHORS)
1487 outflags &= ~(MD_An_split | MD_An_nosplit);
1488 break;
1489 case ROFFT_HEAD:
1490 outflags |= MD_sp;
1491 md_rawword(n->tok == MDOC_Sh ? "#" : "##");
1492 break;
1493 case ROFFT_BODY:
1494 outflags |= MD_sp;
1495 break;
1496 default:
1497 break;
1499 return 1;
1502 static int
1503 md_pre_Sm(struct roff_node *n)
1505 if (n->child == NULL)
1506 outflags ^= MD_Sm;
1507 else if (strcmp("on", n->child->string) == 0)
1508 outflags |= MD_Sm;
1509 else
1510 outflags &= ~MD_Sm;
1512 if (outflags & MD_Sm)
1513 outflags |= MD_spc;
1515 return 0;
1518 static int
1519 md_pre_Vt(struct roff_node *n)
1521 switch (n->type) {
1522 case ROFFT_BLOCK:
1523 md_pre_syn(n);
1524 return 1;
1525 case ROFFT_BODY:
1526 case ROFFT_ELEM:
1527 md_pre_raw(n);
1528 return 1;
1529 default:
1530 return 0;
1534 static void
1535 md_post_Vt(struct roff_node *n)
1537 switch (n->type) {
1538 case ROFFT_BODY:
1539 case ROFFT_ELEM:
1540 md_post_raw(n);
1541 break;
1542 default:
1543 break;
1547 static int
1548 md_pre_Xr(struct roff_node *n)
1550 n = n->child;
1551 if (n == NULL)
1552 return 0;
1553 md_node(n);
1554 n = n->next;
1555 if (n == NULL)
1556 return 0;
1557 outflags &= ~MD_spc;
1558 md_word("(");
1559 md_node(n);
1560 md_word(")");
1561 return 0;
1564 static int
1565 md_pre__T(struct roff_node *n)
1567 if (n->parent->tok == MDOC_Rs && n->parent->norm->Rs.quote_T)
1568 md_word("\"");
1569 else
1570 md_rawword("*");
1571 outflags &= ~MD_spc;
1572 return 1;
1575 static void
1576 md_post__T(struct roff_node *n)
1578 outflags &= ~MD_spc;
1579 if (n->parent->tok == MDOC_Rs && n->parent->norm->Rs.quote_T)
1580 md_word("\"");
1581 else
1582 md_rawword("*");
1583 md_post_pc(n);
1586 static int
1587 md_pre_br(struct roff_node *n)
1589 outflags |= MD_br;
1590 return 0;