9718 update mandoc to 1.14.4
[unleashed.git] / usr / src / cmd / mandoc / roff.c
blob86e145e366856020eb8c6b6963d825a6e05e74f6
1 /* $Id: roff.c,v 1.329 2018/08/01 15:40:17 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015, 2017, 2018 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 #include "config.h"
20 #include <sys/types.h>
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stddef.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
31 #include "mandoc.h"
32 #include "mandoc_aux.h"
33 #include "mandoc_ohash.h"
34 #include "roff.h"
35 #include "libmandoc.h"
36 #include "roff_int.h"
37 #include "libroff.h"
39 /* Maximum number of string expansions per line, to break infinite loops. */
40 #define EXPAND_LIMIT 1000
42 /* Types of definitions of macros and strings. */
43 #define ROFFDEF_USER (1 << 1) /* User-defined. */
44 #define ROFFDEF_PRE (1 << 2) /* Predefined. */
45 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */
46 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */
47 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \
48 ROFFDEF_REN | ROFFDEF_STD)
49 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */
51 /* --- data types --------------------------------------------------------- */
54 * An incredibly-simple string buffer.
56 struct roffstr {
57 char *p; /* nil-terminated buffer */
58 size_t sz; /* saved strlen(p) */
62 * A key-value roffstr pair as part of a singly-linked list.
64 struct roffkv {
65 struct roffstr key;
66 struct roffstr val;
67 struct roffkv *next; /* next in list */
71 * A single number register as part of a singly-linked list.
73 struct roffreg {
74 struct roffstr key;
75 int val;
76 int step;
77 struct roffreg *next;
81 * Association of request and macro names with token IDs.
83 struct roffreq {
84 enum roff_tok tok;
85 char name[];
88 struct roff {
89 struct mparse *parse; /* parse point */
90 struct roff_man *man; /* mdoc or man parser */
91 struct roffnode *last; /* leaf of stack */
92 int *rstack; /* stack of inverted `ie' values */
93 struct ohash *reqtab; /* request lookup table */
94 struct roffreg *regtab; /* number registers */
95 struct roffkv *strtab; /* user-defined strings & macros */
96 struct roffkv *rentab; /* renamed strings & macros */
97 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
98 struct roffstr *xtab; /* single-byte trans table (`tr') */
99 const char *current_string; /* value of last called user macro */
100 struct tbl_node *first_tbl; /* first table parsed */
101 struct tbl_node *last_tbl; /* last table parsed */
102 struct tbl_node *tbl; /* current table being parsed */
103 struct eqn_node *last_eqn; /* equation parser */
104 struct eqn_node *eqn; /* active equation parser */
105 int eqn_inline; /* current equation is inline */
106 int options; /* parse options */
107 int rstacksz; /* current size limit of rstack */
108 int rstackpos; /* position in rstack */
109 int format; /* current file in mdoc or man format */
110 int argc; /* number of args of the last macro */
111 char control; /* control character */
112 char escape; /* escape character */
115 struct roffnode {
116 enum roff_tok tok; /* type of node */
117 struct roffnode *parent; /* up one in stack */
118 int line; /* parse line */
119 int col; /* parse col */
120 char *name; /* node name, e.g. macro name */
121 char *end; /* end-rules: custom token */
122 int endspan; /* end-rules: next-line or infty */
123 int rule; /* current evaluation rule */
126 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
127 enum roff_tok tok, /* tok of macro */ \
128 struct buf *buf, /* input buffer */ \
129 int ln, /* parse line */ \
130 int ppos, /* original pos in buffer */ \
131 int pos, /* current pos in buffer */ \
132 int *offs /* reset offset of buffer data */
134 typedef enum rofferr (*roffproc)(ROFF_ARGS);
136 struct roffmac {
137 roffproc proc; /* process new macro */
138 roffproc text; /* process as child text of macro */
139 roffproc sub; /* process as child of macro */
140 int flags;
141 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
144 struct predef {
145 const char *name; /* predefined input name */
146 const char *str; /* replacement symbol */
149 #define PREDEF(__name, __str) \
150 { (__name), (__str) },
152 /* --- function prototypes ------------------------------------------------ */
154 static void roffnode_cleanscope(struct roff *);
155 static void roffnode_pop(struct roff *);
156 static void roffnode_push(struct roff *, enum roff_tok,
157 const char *, int, int);
158 static void roff_addtbl(struct roff_man *, struct tbl_node *);
159 static enum rofferr roff_als(ROFF_ARGS);
160 static enum rofferr roff_block(ROFF_ARGS);
161 static enum rofferr roff_block_text(ROFF_ARGS);
162 static enum rofferr roff_block_sub(ROFF_ARGS);
163 static enum rofferr roff_br(ROFF_ARGS);
164 static enum rofferr roff_cblock(ROFF_ARGS);
165 static enum rofferr roff_cc(ROFF_ARGS);
166 static void roff_ccond(struct roff *, int, int);
167 static enum rofferr roff_cond(ROFF_ARGS);
168 static enum rofferr roff_cond_text(ROFF_ARGS);
169 static enum rofferr roff_cond_sub(ROFF_ARGS);
170 static enum rofferr roff_ds(ROFF_ARGS);
171 static enum rofferr roff_ec(ROFF_ARGS);
172 static enum rofferr roff_eo(ROFF_ARGS);
173 static enum rofferr roff_eqndelim(struct roff *, struct buf *, int);
174 static int roff_evalcond(struct roff *r, int, char *, int *);
175 static int roff_evalnum(struct roff *, int,
176 const char *, int *, int *, int);
177 static int roff_evalpar(struct roff *, int,
178 const char *, int *, int *, int);
179 static int roff_evalstrcond(const char *, int *);
180 static void roff_free1(struct roff *);
181 static void roff_freereg(struct roffreg *);
182 static void roff_freestr(struct roffkv *);
183 static size_t roff_getname(struct roff *, char **, int, int);
184 static int roff_getnum(const char *, int *, int *, int);
185 static int roff_getop(const char *, int *, char *);
186 static int roff_getregn(struct roff *,
187 const char *, size_t, char);
188 static int roff_getregro(const struct roff *,
189 const char *name);
190 static const char *roff_getstrn(struct roff *,
191 const char *, size_t, int *);
192 static int roff_hasregn(const struct roff *,
193 const char *, size_t);
194 static enum rofferr roff_insec(ROFF_ARGS);
195 static enum rofferr roff_it(ROFF_ARGS);
196 static enum rofferr roff_line_ignore(ROFF_ARGS);
197 static void roff_man_alloc1(struct roff_man *);
198 static void roff_man_free1(struct roff_man *);
199 static enum rofferr roff_manyarg(ROFF_ARGS);
200 static enum rofferr roff_nr(ROFF_ARGS);
201 static enum rofferr roff_onearg(ROFF_ARGS);
202 static enum roff_tok roff_parse(struct roff *, char *, int *,
203 int, int);
204 static enum rofferr roff_parsetext(struct roff *, struct buf *,
205 int, int *);
206 static enum rofferr roff_renamed(ROFF_ARGS);
207 static enum rofferr roff_res(struct roff *, struct buf *, int, int);
208 static enum rofferr roff_rm(ROFF_ARGS);
209 static enum rofferr roff_rn(ROFF_ARGS);
210 static enum rofferr roff_rr(ROFF_ARGS);
211 static void roff_setregn(struct roff *, const char *,
212 size_t, int, char, int);
213 static void roff_setstr(struct roff *,
214 const char *, const char *, int);
215 static void roff_setstrn(struct roffkv **, const char *,
216 size_t, const char *, size_t, int);
217 static enum rofferr roff_so(ROFF_ARGS);
218 static enum rofferr roff_tr(ROFF_ARGS);
219 static enum rofferr roff_Dd(ROFF_ARGS);
220 static enum rofferr roff_TE(ROFF_ARGS);
221 static enum rofferr roff_TS(ROFF_ARGS);
222 static enum rofferr roff_EQ(ROFF_ARGS);
223 static enum rofferr roff_EN(ROFF_ARGS);
224 static enum rofferr roff_T_(ROFF_ARGS);
225 static enum rofferr roff_unsupp(ROFF_ARGS);
226 static enum rofferr roff_userdef(ROFF_ARGS);
228 /* --- constant data ------------------------------------------------------ */
230 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */
231 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */
233 const char *__roff_name[MAN_MAX + 1] = {
234 "br", "ce", "ft", "ll",
235 "mc", "po", "rj", "sp",
236 "ta", "ti", NULL,
237 "ab", "ad", "af", "aln",
238 "als", "am", "am1", "ami",
239 "ami1", "as", "as1", "asciify",
240 "backtrace", "bd", "bleedat", "blm",
241 "box", "boxa", "bp", "BP",
242 "break", "breakchar", "brnl", "brp",
243 "brpnl", "c2", "cc",
244 "cf", "cflags", "ch", "char",
245 "chop", "class", "close", "CL",
246 "color", "composite", "continue", "cp",
247 "cropat", "cs", "cu", "da",
248 "dch", "Dd", "de", "de1",
249 "defcolor", "dei", "dei1", "device",
250 "devicem", "di", "do", "ds",
251 "ds1", "dwh", "dt", "ec",
252 "ecr", "ecs", "el", "em",
253 "EN", "eo", "EP", "EQ",
254 "errprint", "ev", "evc", "ex",
255 "fallback", "fam", "fc", "fchar",
256 "fcolor", "fdeferlig", "feature", "fkern",
257 "fl", "flig", "fp", "fps",
258 "fschar", "fspacewidth", "fspecial", "ftr",
259 "fzoom", "gcolor", "hc", "hcode",
260 "hidechar", "hla", "hlm", "hpf",
261 "hpfa", "hpfcode", "hw", "hy",
262 "hylang", "hylen", "hym", "hypp",
263 "hys", "ie", "if", "ig",
264 "index", "it", "itc", "IX",
265 "kern", "kernafter", "kernbefore", "kernpair",
266 "lc", "lc_ctype", "lds", "length",
267 "letadj", "lf", "lg", "lhang",
268 "linetabs", "lnr", "lnrf", "lpfx",
269 "ls", "lsm", "lt",
270 "mediasize", "minss", "mk", "mso",
271 "na", "ne", "nh", "nhychar",
272 "nm", "nn", "nop", "nr",
273 "nrf", "nroff", "ns", "nx",
274 "open", "opena", "os", "output",
275 "padj", "papersize", "pc", "pev",
276 "pi", "PI", "pl", "pm",
277 "pn", "pnr", "ps",
278 "psbb", "pshape", "pso", "ptr",
279 "pvs", "rchar", "rd", "recursionlimit",
280 "return", "rfschar", "rhang",
281 "rm", "rn", "rnn", "rr",
282 "rs", "rt", "schar", "sentchar",
283 "shc", "shift", "sizes", "so",
284 "spacewidth", "special", "spreadwarn", "ss",
285 "sty", "substring", "sv", "sy",
286 "T&", "tc", "TE",
287 "TH", "tkf", "tl",
288 "tm", "tm1", "tmc", "tr",
289 "track", "transchar", "trf", "trimat",
290 "trin", "trnt", "troff", "TS",
291 "uf", "ul", "unformat", "unwatch",
292 "unwatchn", "vpt", "vs", "warn",
293 "warnscale", "watch", "watchlength", "watchn",
294 "wh", "while", "write", "writec",
295 "writem", "xflag", ".", NULL,
296 NULL, "text",
297 "Dd", "Dt", "Os", "Sh",
298 "Ss", "Pp", "D1", "Dl",
299 "Bd", "Ed", "Bl", "El",
300 "It", "Ad", "An", "Ap",
301 "Ar", "Cd", "Cm", "Dv",
302 "Er", "Ev", "Ex", "Fa",
303 "Fd", "Fl", "Fn", "Ft",
304 "Ic", "In", "Li", "Nd",
305 "Nm", "Op", "Ot", "Pa",
306 "Rv", "St", "Va", "Vt",
307 "Xr", "%A", "%B", "%D",
308 "%I", "%J", "%N", "%O",
309 "%P", "%R", "%T", "%V",
310 "Ac", "Ao", "Aq", "At",
311 "Bc", "Bf", "Bo", "Bq",
312 "Bsx", "Bx", "Db", "Dc",
313 "Do", "Dq", "Ec", "Ef",
314 "Em", "Eo", "Fx", "Ms",
315 "No", "Ns", "Nx", "Ox",
316 "Pc", "Pf", "Po", "Pq",
317 "Qc", "Ql", "Qo", "Qq",
318 "Re", "Rs", "Sc", "So",
319 "Sq", "Sm", "Sx", "Sy",
320 "Tn", "Ux", "Xc", "Xo",
321 "Fo", "Fc", "Oo", "Oc",
322 "Bk", "Ek", "Bt", "Hf",
323 "Fr", "Ud", "Lb", "Lp",
324 "Lk", "Mt", "Brq", "Bro",
325 "Brc", "%C", "Es", "En",
326 "Dx", "%Q", "%U", "Ta",
327 NULL,
328 "TH", "SH", "SS", "TP",
329 "LP", "PP", "P", "IP",
330 "HP", "SM", "SB", "BI",
331 "IB", "BR", "RB", "R",
332 "B", "I", "IR", "RI",
333 "nf", "fi",
334 "RE", "RS", "DT", "UC",
335 "PD", "AT", "in",
336 "OP", "EX", "EE", "UR",
337 "UE", "MT", "ME", NULL
339 const char *const *roff_name = __roff_name;
341 static struct roffmac roffs[TOKEN_NONE] = {
342 { roff_br, NULL, NULL, 0 }, /* br */
343 { roff_onearg, NULL, NULL, 0 }, /* ce */
344 { roff_onearg, NULL, NULL, 0 }, /* ft */
345 { roff_onearg, NULL, NULL, 0 }, /* ll */
346 { roff_onearg, NULL, NULL, 0 }, /* mc */
347 { roff_onearg, NULL, NULL, 0 }, /* po */
348 { roff_onearg, NULL, NULL, 0 }, /* rj */
349 { roff_onearg, NULL, NULL, 0 }, /* sp */
350 { roff_manyarg, NULL, NULL, 0 }, /* ta */
351 { roff_onearg, NULL, NULL, 0 }, /* ti */
352 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */
353 { roff_unsupp, NULL, NULL, 0 }, /* ab */
354 { roff_line_ignore, NULL, NULL, 0 }, /* ad */
355 { roff_line_ignore, NULL, NULL, 0 }, /* af */
356 { roff_unsupp, NULL, NULL, 0 }, /* aln */
357 { roff_als, NULL, NULL, 0 }, /* als */
358 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */
359 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */
360 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */
361 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */
362 { roff_ds, NULL, NULL, 0 }, /* as */
363 { roff_ds, NULL, NULL, 0 }, /* as1 */
364 { roff_unsupp, NULL, NULL, 0 }, /* asciify */
365 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */
366 { roff_line_ignore, NULL, NULL, 0 }, /* bd */
367 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */
368 { roff_unsupp, NULL, NULL, 0 }, /* blm */
369 { roff_unsupp, NULL, NULL, 0 }, /* box */
370 { roff_unsupp, NULL, NULL, 0 }, /* boxa */
371 { roff_line_ignore, NULL, NULL, 0 }, /* bp */
372 { roff_unsupp, NULL, NULL, 0 }, /* BP */
373 { roff_unsupp, NULL, NULL, 0 }, /* break */
374 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */
375 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */
376 { roff_br, NULL, NULL, 0 }, /* brp */
377 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */
378 { roff_unsupp, NULL, NULL, 0 }, /* c2 */
379 { roff_cc, NULL, NULL, 0 }, /* cc */
380 { roff_insec, NULL, NULL, 0 }, /* cf */
381 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */
382 { roff_line_ignore, NULL, NULL, 0 }, /* ch */
383 { roff_unsupp, NULL, NULL, 0 }, /* char */
384 { roff_unsupp, NULL, NULL, 0 }, /* chop */
385 { roff_line_ignore, NULL, NULL, 0 }, /* class */
386 { roff_insec, NULL, NULL, 0 }, /* close */
387 { roff_unsupp, NULL, NULL, 0 }, /* CL */
388 { roff_line_ignore, NULL, NULL, 0 }, /* color */
389 { roff_unsupp, NULL, NULL, 0 }, /* composite */
390 { roff_unsupp, NULL, NULL, 0 }, /* continue */
391 { roff_line_ignore, NULL, NULL, 0 }, /* cp */
392 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */
393 { roff_line_ignore, NULL, NULL, 0 }, /* cs */
394 { roff_line_ignore, NULL, NULL, 0 }, /* cu */
395 { roff_unsupp, NULL, NULL, 0 }, /* da */
396 { roff_unsupp, NULL, NULL, 0 }, /* dch */
397 { roff_Dd, NULL, NULL, 0 }, /* Dd */
398 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */
399 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */
400 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */
401 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */
402 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */
403 { roff_unsupp, NULL, NULL, 0 }, /* device */
404 { roff_unsupp, NULL, NULL, 0 }, /* devicem */
405 { roff_unsupp, NULL, NULL, 0 }, /* di */
406 { roff_unsupp, NULL, NULL, 0 }, /* do */
407 { roff_ds, NULL, NULL, 0 }, /* ds */
408 { roff_ds, NULL, NULL, 0 }, /* ds1 */
409 { roff_unsupp, NULL, NULL, 0 }, /* dwh */
410 { roff_unsupp, NULL, NULL, 0 }, /* dt */
411 { roff_ec, NULL, NULL, 0 }, /* ec */
412 { roff_unsupp, NULL, NULL, 0 }, /* ecr */
413 { roff_unsupp, NULL, NULL, 0 }, /* ecs */
414 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */
415 { roff_unsupp, NULL, NULL, 0 }, /* em */
416 { roff_EN, NULL, NULL, 0 }, /* EN */
417 { roff_eo, NULL, NULL, 0 }, /* eo */
418 { roff_unsupp, NULL, NULL, 0 }, /* EP */
419 { roff_EQ, NULL, NULL, 0 }, /* EQ */
420 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */
421 { roff_unsupp, NULL, NULL, 0 }, /* ev */
422 { roff_unsupp, NULL, NULL, 0 }, /* evc */
423 { roff_unsupp, NULL, NULL, 0 }, /* ex */
424 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */
425 { roff_line_ignore, NULL, NULL, 0 }, /* fam */
426 { roff_unsupp, NULL, NULL, 0 }, /* fc */
427 { roff_unsupp, NULL, NULL, 0 }, /* fchar */
428 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */
429 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */
430 { roff_line_ignore, NULL, NULL, 0 }, /* feature */
431 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */
432 { roff_line_ignore, NULL, NULL, 0 }, /* fl */
433 { roff_line_ignore, NULL, NULL, 0 }, /* flig */
434 { roff_line_ignore, NULL, NULL, 0 }, /* fp */
435 { roff_line_ignore, NULL, NULL, 0 }, /* fps */
436 { roff_unsupp, NULL, NULL, 0 }, /* fschar */
437 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */
438 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */
439 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */
440 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */
441 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */
442 { roff_line_ignore, NULL, NULL, 0 }, /* hc */
443 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */
444 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */
445 { roff_line_ignore, NULL, NULL, 0 }, /* hla */
446 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */
447 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */
448 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */
449 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */
450 { roff_line_ignore, NULL, NULL, 0 }, /* hw */
451 { roff_line_ignore, NULL, NULL, 0 }, /* hy */
452 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */
453 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */
454 { roff_line_ignore, NULL, NULL, 0 }, /* hym */
455 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */
456 { roff_line_ignore, NULL, NULL, 0 }, /* hys */
457 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */
458 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */
459 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */
460 { roff_unsupp, NULL, NULL, 0 }, /* index */
461 { roff_it, NULL, NULL, 0 }, /* it */
462 { roff_unsupp, NULL, NULL, 0 }, /* itc */
463 { roff_line_ignore, NULL, NULL, 0 }, /* IX */
464 { roff_line_ignore, NULL, NULL, 0 }, /* kern */
465 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */
466 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */
467 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */
468 { roff_unsupp, NULL, NULL, 0 }, /* lc */
469 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */
470 { roff_unsupp, NULL, NULL, 0 }, /* lds */
471 { roff_unsupp, NULL, NULL, 0 }, /* length */
472 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */
473 { roff_insec, NULL, NULL, 0 }, /* lf */
474 { roff_line_ignore, NULL, NULL, 0 }, /* lg */
475 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */
476 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */
477 { roff_unsupp, NULL, NULL, 0 }, /* lnr */
478 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */
479 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */
480 { roff_line_ignore, NULL, NULL, 0 }, /* ls */
481 { roff_unsupp, NULL, NULL, 0 }, /* lsm */
482 { roff_line_ignore, NULL, NULL, 0 }, /* lt */
483 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */
484 { roff_line_ignore, NULL, NULL, 0 }, /* minss */
485 { roff_line_ignore, NULL, NULL, 0 }, /* mk */
486 { roff_insec, NULL, NULL, 0 }, /* mso */
487 { roff_line_ignore, NULL, NULL, 0 }, /* na */
488 { roff_line_ignore, NULL, NULL, 0 }, /* ne */
489 { roff_line_ignore, NULL, NULL, 0 }, /* nh */
490 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */
491 { roff_unsupp, NULL, NULL, 0 }, /* nm */
492 { roff_unsupp, NULL, NULL, 0 }, /* nn */
493 { roff_unsupp, NULL, NULL, 0 }, /* nop */
494 { roff_nr, NULL, NULL, 0 }, /* nr */
495 { roff_unsupp, NULL, NULL, 0 }, /* nrf */
496 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */
497 { roff_line_ignore, NULL, NULL, 0 }, /* ns */
498 { roff_insec, NULL, NULL, 0 }, /* nx */
499 { roff_insec, NULL, NULL, 0 }, /* open */
500 { roff_insec, NULL, NULL, 0 }, /* opena */
501 { roff_line_ignore, NULL, NULL, 0 }, /* os */
502 { roff_unsupp, NULL, NULL, 0 }, /* output */
503 { roff_line_ignore, NULL, NULL, 0 }, /* padj */
504 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */
505 { roff_line_ignore, NULL, NULL, 0 }, /* pc */
506 { roff_line_ignore, NULL, NULL, 0 }, /* pev */
507 { roff_insec, NULL, NULL, 0 }, /* pi */
508 { roff_unsupp, NULL, NULL, 0 }, /* PI */
509 { roff_line_ignore, NULL, NULL, 0 }, /* pl */
510 { roff_line_ignore, NULL, NULL, 0 }, /* pm */
511 { roff_line_ignore, NULL, NULL, 0 }, /* pn */
512 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */
513 { roff_line_ignore, NULL, NULL, 0 }, /* ps */
514 { roff_unsupp, NULL, NULL, 0 }, /* psbb */
515 { roff_unsupp, NULL, NULL, 0 }, /* pshape */
516 { roff_insec, NULL, NULL, 0 }, /* pso */
517 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */
518 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */
519 { roff_unsupp, NULL, NULL, 0 }, /* rchar */
520 { roff_line_ignore, NULL, NULL, 0 }, /* rd */
521 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */
522 { roff_unsupp, NULL, NULL, 0 }, /* return */
523 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */
524 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */
525 { roff_rm, NULL, NULL, 0 }, /* rm */
526 { roff_rn, NULL, NULL, 0 }, /* rn */
527 { roff_unsupp, NULL, NULL, 0 }, /* rnn */
528 { roff_rr, NULL, NULL, 0 }, /* rr */
529 { roff_line_ignore, NULL, NULL, 0 }, /* rs */
530 { roff_line_ignore, NULL, NULL, 0 }, /* rt */
531 { roff_unsupp, NULL, NULL, 0 }, /* schar */
532 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */
533 { roff_line_ignore, NULL, NULL, 0 }, /* shc */
534 { roff_unsupp, NULL, NULL, 0 }, /* shift */
535 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */
536 { roff_so, NULL, NULL, 0 }, /* so */
537 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */
538 { roff_line_ignore, NULL, NULL, 0 }, /* special */
539 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */
540 { roff_line_ignore, NULL, NULL, 0 }, /* ss */
541 { roff_line_ignore, NULL, NULL, 0 }, /* sty */
542 { roff_unsupp, NULL, NULL, 0 }, /* substring */
543 { roff_line_ignore, NULL, NULL, 0 }, /* sv */
544 { roff_insec, NULL, NULL, 0 }, /* sy */
545 { roff_T_, NULL, NULL, 0 }, /* T& */
546 { roff_unsupp, NULL, NULL, 0 }, /* tc */
547 { roff_TE, NULL, NULL, 0 }, /* TE */
548 { roff_Dd, NULL, NULL, 0 }, /* TH */
549 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */
550 { roff_unsupp, NULL, NULL, 0 }, /* tl */
551 { roff_line_ignore, NULL, NULL, 0 }, /* tm */
552 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */
553 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */
554 { roff_tr, NULL, NULL, 0 }, /* tr */
555 { roff_line_ignore, NULL, NULL, 0 }, /* track */
556 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */
557 { roff_insec, NULL, NULL, 0 }, /* trf */
558 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */
559 { roff_unsupp, NULL, NULL, 0 }, /* trin */
560 { roff_unsupp, NULL, NULL, 0 }, /* trnt */
561 { roff_line_ignore, NULL, NULL, 0 }, /* troff */
562 { roff_TS, NULL, NULL, 0 }, /* TS */
563 { roff_line_ignore, NULL, NULL, 0 }, /* uf */
564 { roff_line_ignore, NULL, NULL, 0 }, /* ul */
565 { roff_unsupp, NULL, NULL, 0 }, /* unformat */
566 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */
567 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */
568 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */
569 { roff_line_ignore, NULL, NULL, 0 }, /* vs */
570 { roff_line_ignore, NULL, NULL, 0 }, /* warn */
571 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */
572 { roff_line_ignore, NULL, NULL, 0 }, /* watch */
573 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */
574 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */
575 { roff_unsupp, NULL, NULL, 0 }, /* wh */
576 { roff_unsupp, NULL, NULL, 0 }, /* while */
577 { roff_insec, NULL, NULL, 0 }, /* write */
578 { roff_insec, NULL, NULL, 0 }, /* writec */
579 { roff_insec, NULL, NULL, 0 }, /* writem */
580 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */
581 { roff_cblock, NULL, NULL, 0 }, /* . */
582 { roff_renamed, NULL, NULL, 0 },
583 { roff_userdef, NULL, NULL, 0 }
586 /* Array of injected predefined strings. */
587 #define PREDEFS_MAX 38
588 static const struct predef predefs[PREDEFS_MAX] = {
589 #include "predefs.in"
592 static int roffce_lines; /* number of input lines to center */
593 static struct roff_node *roffce_node; /* active request */
594 static int roffit_lines; /* number of lines to delay */
595 static char *roffit_macro; /* nil-terminated macro line */
598 /* --- request table ------------------------------------------------------ */
600 struct ohash *
601 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
603 struct ohash *htab;
604 struct roffreq *req;
605 enum roff_tok tok;
606 size_t sz;
607 unsigned int slot;
609 htab = mandoc_malloc(sizeof(*htab));
610 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
612 for (tok = mintok; tok < maxtok; tok++) {
613 if (roff_name[tok] == NULL)
614 continue;
615 sz = strlen(roff_name[tok]);
616 req = mandoc_malloc(sizeof(*req) + sz + 1);
617 req->tok = tok;
618 memcpy(req->name, roff_name[tok], sz + 1);
619 slot = ohash_qlookup(htab, req->name);
620 ohash_insert(htab, slot, req);
622 return htab;
625 void
626 roffhash_free(struct ohash *htab)
628 struct roffreq *req;
629 unsigned int slot;
631 if (htab == NULL)
632 return;
633 for (req = ohash_first(htab, &slot); req != NULL;
634 req = ohash_next(htab, &slot))
635 free(req);
636 ohash_delete(htab);
637 free(htab);
640 enum roff_tok
641 roffhash_find(struct ohash *htab, const char *name, size_t sz)
643 struct roffreq *req;
644 const char *end;
646 if (sz) {
647 end = name + sz;
648 req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
649 } else
650 req = ohash_find(htab, ohash_qlookup(htab, name));
651 return req == NULL ? TOKEN_NONE : req->tok;
654 /* --- stack of request blocks -------------------------------------------- */
657 * Pop the current node off of the stack of roff instructions currently
658 * pending.
660 static void
661 roffnode_pop(struct roff *r)
663 struct roffnode *p;
665 assert(r->last);
666 p = r->last;
668 r->last = r->last->parent;
669 free(p->name);
670 free(p->end);
671 free(p);
675 * Push a roff node onto the instruction stack. This must later be
676 * removed with roffnode_pop().
678 static void
679 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
680 int line, int col)
682 struct roffnode *p;
684 p = mandoc_calloc(1, sizeof(struct roffnode));
685 p->tok = tok;
686 if (name)
687 p->name = mandoc_strdup(name);
688 p->parent = r->last;
689 p->line = line;
690 p->col = col;
691 p->rule = p->parent ? p->parent->rule : 0;
693 r->last = p;
696 /* --- roff parser state data management ---------------------------------- */
698 static void
699 roff_free1(struct roff *r)
701 struct tbl_node *tbl;
702 int i;
704 while (NULL != (tbl = r->first_tbl)) {
705 r->first_tbl = tbl->next;
706 tbl_free(tbl);
708 r->first_tbl = r->last_tbl = r->tbl = NULL;
710 if (r->last_eqn != NULL)
711 eqn_free(r->last_eqn);
712 r->last_eqn = r->eqn = NULL;
714 while (r->last)
715 roffnode_pop(r);
717 free (r->rstack);
718 r->rstack = NULL;
719 r->rstacksz = 0;
720 r->rstackpos = -1;
722 roff_freereg(r->regtab);
723 r->regtab = NULL;
725 roff_freestr(r->strtab);
726 roff_freestr(r->rentab);
727 roff_freestr(r->xmbtab);
728 r->strtab = r->rentab = r->xmbtab = NULL;
730 if (r->xtab)
731 for (i = 0; i < 128; i++)
732 free(r->xtab[i].p);
733 free(r->xtab);
734 r->xtab = NULL;
737 void
738 roff_reset(struct roff *r)
740 roff_free1(r);
741 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
742 r->control = '\0';
743 r->escape = '\\';
744 roffce_lines = 0;
745 roffce_node = NULL;
746 roffit_lines = 0;
747 roffit_macro = NULL;
750 void
751 roff_free(struct roff *r)
753 roff_free1(r);
754 roffhash_free(r->reqtab);
755 free(r);
758 struct roff *
759 roff_alloc(struct mparse *parse, int options)
761 struct roff *r;
763 r = mandoc_calloc(1, sizeof(struct roff));
764 r->parse = parse;
765 r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
766 r->options = options;
767 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
768 r->rstackpos = -1;
769 r->escape = '\\';
770 return r;
773 /* --- syntax tree state data management ---------------------------------- */
775 static void
776 roff_man_free1(struct roff_man *man)
779 if (man->first != NULL)
780 roff_node_delete(man, man->first);
781 free(man->meta.msec);
782 free(man->meta.vol);
783 free(man->meta.os);
784 free(man->meta.arch);
785 free(man->meta.title);
786 free(man->meta.name);
787 free(man->meta.date);
790 static void
791 roff_man_alloc1(struct roff_man *man)
794 memset(&man->meta, 0, sizeof(man->meta));
795 man->first = mandoc_calloc(1, sizeof(*man->first));
796 man->first->type = ROFFT_ROOT;
797 man->last = man->first;
798 man->last_es = NULL;
799 man->flags = 0;
800 man->macroset = MACROSET_NONE;
801 man->lastsec = man->lastnamed = SEC_NONE;
802 man->next = ROFF_NEXT_CHILD;
805 void
806 roff_man_reset(struct roff_man *man)
809 roff_man_free1(man);
810 roff_man_alloc1(man);
813 void
814 roff_man_free(struct roff_man *man)
817 roff_man_free1(man);
818 free(man);
821 struct roff_man *
822 roff_man_alloc(struct roff *roff, struct mparse *parse,
823 const char *os_s, int quick)
825 struct roff_man *man;
827 man = mandoc_calloc(1, sizeof(*man));
828 man->parse = parse;
829 man->roff = roff;
830 man->os_s = os_s;
831 man->quick = quick;
832 roff_man_alloc1(man);
833 roff->man = man;
834 return man;
837 /* --- syntax tree handling ----------------------------------------------- */
839 struct roff_node *
840 roff_node_alloc(struct roff_man *man, int line, int pos,
841 enum roff_type type, int tok)
843 struct roff_node *n;
845 n = mandoc_calloc(1, sizeof(*n));
846 n->line = line;
847 n->pos = pos;
848 n->tok = tok;
849 n->type = type;
850 n->sec = man->lastsec;
852 if (man->flags & MDOC_SYNOPSIS)
853 n->flags |= NODE_SYNPRETTY;
854 else
855 n->flags &= ~NODE_SYNPRETTY;
856 if (man->flags & MDOC_NEWLINE)
857 n->flags |= NODE_LINE;
858 man->flags &= ~MDOC_NEWLINE;
860 return n;
863 void
864 roff_node_append(struct roff_man *man, struct roff_node *n)
867 switch (man->next) {
868 case ROFF_NEXT_SIBLING:
869 if (man->last->next != NULL) {
870 n->next = man->last->next;
871 man->last->next->prev = n;
872 } else
873 man->last->parent->last = n;
874 man->last->next = n;
875 n->prev = man->last;
876 n->parent = man->last->parent;
877 break;
878 case ROFF_NEXT_CHILD:
879 if (man->last->child != NULL) {
880 n->next = man->last->child;
881 man->last->child->prev = n;
882 } else
883 man->last->last = n;
884 man->last->child = n;
885 n->parent = man->last;
886 break;
887 default:
888 abort();
890 man->last = n;
892 switch (n->type) {
893 case ROFFT_HEAD:
894 n->parent->head = n;
895 break;
896 case ROFFT_BODY:
897 if (n->end != ENDBODY_NOT)
898 return;
899 n->parent->body = n;
900 break;
901 case ROFFT_TAIL:
902 n->parent->tail = n;
903 break;
904 default:
905 return;
909 * Copy over the normalised-data pointer of our parent. Not
910 * everybody has one, but copying a null pointer is fine.
913 n->norm = n->parent->norm;
914 assert(n->parent->type == ROFFT_BLOCK);
917 void
918 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
920 struct roff_node *n;
922 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
923 n->string = roff_strdup(man->roff, word);
924 roff_node_append(man, n);
925 n->flags |= NODE_VALID | NODE_ENDED;
926 man->next = ROFF_NEXT_SIBLING;
929 void
930 roff_word_append(struct roff_man *man, const char *word)
932 struct roff_node *n;
933 char *addstr, *newstr;
935 n = man->last;
936 addstr = roff_strdup(man->roff, word);
937 mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
938 free(addstr);
939 free(n->string);
940 n->string = newstr;
941 man->next = ROFF_NEXT_SIBLING;
944 void
945 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
947 struct roff_node *n;
949 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
950 roff_node_append(man, n);
951 man->next = ROFF_NEXT_CHILD;
954 struct roff_node *
955 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
957 struct roff_node *n;
959 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
960 roff_node_append(man, n);
961 man->next = ROFF_NEXT_CHILD;
962 return n;
965 struct roff_node *
966 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
968 struct roff_node *n;
970 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
971 roff_node_append(man, n);
972 man->next = ROFF_NEXT_CHILD;
973 return n;
976 struct roff_node *
977 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
979 struct roff_node *n;
981 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
982 roff_node_append(man, n);
983 man->next = ROFF_NEXT_CHILD;
984 return n;
987 static void
988 roff_addtbl(struct roff_man *man, struct tbl_node *tbl)
990 struct roff_node *n;
991 const struct tbl_span *span;
993 if (man->macroset == MACROSET_MAN)
994 man_breakscope(man, ROFF_TS);
995 while ((span = tbl_span(tbl)) != NULL) {
996 n = roff_node_alloc(man, tbl->line, 0, ROFFT_TBL, TOKEN_NONE);
997 n->span = span;
998 roff_node_append(man, n);
999 n->flags |= NODE_VALID | NODE_ENDED;
1000 man->next = ROFF_NEXT_SIBLING;
1004 void
1005 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1008 /* Adjust siblings. */
1010 if (n->prev)
1011 n->prev->next = n->next;
1012 if (n->next)
1013 n->next->prev = n->prev;
1015 /* Adjust parent. */
1017 if (n->parent != NULL) {
1018 if (n->parent->child == n)
1019 n->parent->child = n->next;
1020 if (n->parent->last == n)
1021 n->parent->last = n->prev;
1024 /* Adjust parse point. */
1026 if (man == NULL)
1027 return;
1028 if (man->last == n) {
1029 if (n->prev == NULL) {
1030 man->last = n->parent;
1031 man->next = ROFF_NEXT_CHILD;
1032 } else {
1033 man->last = n->prev;
1034 man->next = ROFF_NEXT_SIBLING;
1037 if (man->first == n)
1038 man->first = NULL;
1041 void
1042 roff_node_free(struct roff_node *n)
1045 if (n->args != NULL)
1046 mdoc_argv_free(n->args);
1047 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1048 free(n->norm);
1049 if (n->eqn != NULL)
1050 eqn_box_free(n->eqn);
1051 free(n->string);
1052 free(n);
1055 void
1056 roff_node_delete(struct roff_man *man, struct roff_node *n)
1059 while (n->child != NULL)
1060 roff_node_delete(man, n->child);
1061 roff_node_unlink(man, n);
1062 roff_node_free(n);
1065 void
1066 deroff(char **dest, const struct roff_node *n)
1068 char *cp;
1069 size_t sz;
1071 if (n->type != ROFFT_TEXT) {
1072 for (n = n->child; n != NULL; n = n->next)
1073 deroff(dest, n);
1074 return;
1077 /* Skip leading whitespace. */
1079 for (cp = n->string; *cp != '\0'; cp++) {
1080 if (cp[0] == '\\' && cp[1] != '\0' &&
1081 strchr(" %&0^|~", cp[1]) != NULL)
1082 cp++;
1083 else if ( ! isspace((unsigned char)*cp))
1084 break;
1087 /* Skip trailing backslash. */
1089 sz = strlen(cp);
1090 if (sz > 0 && cp[sz - 1] == '\\')
1091 sz--;
1093 /* Skip trailing whitespace. */
1095 for (; sz; sz--)
1096 if ( ! isspace((unsigned char)cp[sz-1]))
1097 break;
1099 /* Skip empty strings. */
1101 if (sz == 0)
1102 return;
1104 if (*dest == NULL) {
1105 *dest = mandoc_strndup(cp, sz);
1106 return;
1109 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1110 free(*dest);
1111 *dest = cp;
1114 /* --- main functions of the roff parser ---------------------------------- */
1117 * In the current line, expand escape sequences that tend to get
1118 * used in numerical expressions and conditional requests.
1119 * Also check the syntax of the remaining escape sequences.
1121 static enum rofferr
1122 roff_res(struct roff *r, struct buf *buf, int ln, int pos)
1124 char ubuf[24]; /* buffer to print the number */
1125 struct roff_node *n; /* used for header comments */
1126 const char *start; /* start of the string to process */
1127 char *stesc; /* start of an escape sequence ('\\') */
1128 char *ep; /* end of comment string */
1129 const char *stnam; /* start of the name, after "[(*" */
1130 const char *cp; /* end of the name, e.g. before ']' */
1131 const char *res; /* the string to be substituted */
1132 char *nbuf; /* new buffer to copy buf->buf to */
1133 size_t maxl; /* expected length of the escape name */
1134 size_t naml; /* actual length of the escape name */
1135 enum mandoc_esc esc; /* type of the escape sequence */
1136 int inaml; /* length returned from mandoc_escape() */
1137 int expand_count; /* to avoid infinite loops */
1138 int npos; /* position in numeric expression */
1139 int arg_complete; /* argument not interrupted by eol */
1140 int done; /* no more input available */
1141 int deftype; /* type of definition to paste */
1142 int rcsid; /* kind of RCS id seen */
1143 char sign; /* increment number register */
1144 char term; /* character terminating the escape */
1146 /* Search forward for comments. */
1148 done = 0;
1149 start = buf->buf + pos;
1150 for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
1151 if (stesc[0] != r->escape || stesc[1] == '\0')
1152 continue;
1153 stesc++;
1154 if (*stesc != '"' && *stesc != '#')
1155 continue;
1157 /* Comment found, look for RCS id. */
1159 rcsid = 0;
1160 if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) {
1161 rcsid = 1 << MANDOC_OS_OPENBSD;
1162 cp += 8;
1163 } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) {
1164 rcsid = 1 << MANDOC_OS_NETBSD;
1165 cp += 7;
1167 if (cp != NULL &&
1168 isalnum((unsigned char)*cp) == 0 &&
1169 strchr(cp, '$') != NULL) {
1170 if (r->man->meta.rcsids & rcsid)
1171 mandoc_msg(MANDOCERR_RCS_REP, r->parse,
1172 ln, stesc + 1 - buf->buf, stesc + 1);
1173 r->man->meta.rcsids |= rcsid;
1176 /* Handle trailing whitespace. */
1178 ep = strchr(stesc--, '\0') - 1;
1179 if (*ep == '\n') {
1180 done = 1;
1181 ep--;
1183 if (*ep == ' ' || *ep == '\t')
1184 mandoc_msg(MANDOCERR_SPACE_EOL, r->parse,
1185 ln, ep - buf->buf, NULL);
1188 * Save comments preceding the title macro
1189 * in the syntax tree.
1192 if (r->format == 0) {
1193 while (*ep == ' ' || *ep == '\t')
1194 ep--;
1195 ep[1] = '\0';
1196 n = roff_node_alloc(r->man,
1197 ln, stesc + 1 - buf->buf,
1198 ROFFT_COMMENT, TOKEN_NONE);
1199 n->string = mandoc_strdup(stesc + 2);
1200 roff_node_append(r->man, n);
1201 n->flags |= NODE_VALID | NODE_ENDED;
1202 r->man->next = ROFF_NEXT_SIBLING;
1205 /* Discard comments. */
1207 while (stesc > start && stesc[-1] == ' ')
1208 stesc--;
1209 *stesc = '\0';
1210 break;
1212 if (stesc == start)
1213 return ROFF_CONT;
1214 stesc--;
1216 /* Notice the end of the input. */
1218 if (*stesc == '\n') {
1219 *stesc-- = '\0';
1220 done = 1;
1223 expand_count = 0;
1224 while (stesc >= start) {
1226 /* Search backwards for the next backslash. */
1228 if (*stesc != r->escape) {
1229 if (*stesc == '\\') {
1230 *stesc = '\0';
1231 buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
1232 buf->buf, stesc + 1) + 1;
1233 start = nbuf + pos;
1234 stesc = nbuf + (stesc - buf->buf);
1235 free(buf->buf);
1236 buf->buf = nbuf;
1238 stesc--;
1239 continue;
1242 /* If it is escaped, skip it. */
1244 for (cp = stesc - 1; cp >= start; cp--)
1245 if (*cp != r->escape)
1246 break;
1248 if ((stesc - cp) % 2 == 0) {
1249 while (stesc > cp)
1250 *stesc-- = '\\';
1251 continue;
1252 } else if (stesc[1] != '\0') {
1253 *stesc = '\\';
1254 } else {
1255 *stesc-- = '\0';
1256 if (done)
1257 continue;
1258 else
1259 return ROFF_APPEND;
1262 /* Decide whether to expand or to check only. */
1264 term = '\0';
1265 cp = stesc + 1;
1266 switch (*cp) {
1267 case '*':
1268 res = NULL;
1269 break;
1270 case 'B':
1271 case 'w':
1272 term = cp[1];
1273 /* FALLTHROUGH */
1274 case 'n':
1275 sign = cp[1];
1276 if (sign == '+' || sign == '-')
1277 cp++;
1278 res = ubuf;
1279 break;
1280 default:
1281 esc = mandoc_escape(&cp, &stnam, &inaml);
1282 if (esc == ESCAPE_ERROR ||
1283 (esc == ESCAPE_SPECIAL &&
1284 mchars_spec2cp(stnam, inaml) < 0))
1285 mandoc_vmsg(MANDOCERR_ESC_BAD,
1286 r->parse, ln, (int)(stesc - buf->buf),
1287 "%.*s", (int)(cp - stesc), stesc);
1288 stesc--;
1289 continue;
1292 if (EXPAND_LIMIT < ++expand_count) {
1293 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1294 ln, (int)(stesc - buf->buf), NULL);
1295 return ROFF_IGN;
1299 * The third character decides the length
1300 * of the name of the string or register.
1301 * Save a pointer to the name.
1304 if (term == '\0') {
1305 switch (*++cp) {
1306 case '\0':
1307 maxl = 0;
1308 break;
1309 case '(':
1310 cp++;
1311 maxl = 2;
1312 break;
1313 case '[':
1314 cp++;
1315 term = ']';
1316 maxl = 0;
1317 break;
1318 default:
1319 maxl = 1;
1320 break;
1322 } else {
1323 cp += 2;
1324 maxl = 0;
1326 stnam = cp;
1328 /* Advance to the end of the name. */
1330 naml = 0;
1331 arg_complete = 1;
1332 while (maxl == 0 || naml < maxl) {
1333 if (*cp == '\0') {
1334 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
1335 ln, (int)(stesc - buf->buf), stesc);
1336 arg_complete = 0;
1337 break;
1339 if (maxl == 0 && *cp == term) {
1340 cp++;
1341 break;
1343 if (*cp++ != '\\' || stesc[1] != 'w') {
1344 naml++;
1345 continue;
1347 switch (mandoc_escape(&cp, NULL, NULL)) {
1348 case ESCAPE_SPECIAL:
1349 case ESCAPE_UNICODE:
1350 case ESCAPE_NUMBERED:
1351 case ESCAPE_OVERSTRIKE:
1352 naml++;
1353 break;
1354 default:
1355 break;
1360 * Retrieve the replacement string; if it is
1361 * undefined, resume searching for escapes.
1364 switch (stesc[1]) {
1365 case '*':
1366 if (arg_complete) {
1367 deftype = ROFFDEF_USER | ROFFDEF_PRE;
1368 res = roff_getstrn(r, stnam, naml, &deftype);
1370 break;
1371 case 'B':
1372 npos = 0;
1373 ubuf[0] = arg_complete &&
1374 roff_evalnum(r, ln, stnam, &npos,
1375 NULL, ROFFNUM_SCALE) &&
1376 stnam + npos + 1 == cp ? '1' : '0';
1377 ubuf[1] = '\0';
1378 break;
1379 case 'n':
1380 if (arg_complete)
1381 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1382 roff_getregn(r, stnam, naml, sign));
1383 else
1384 ubuf[0] = '\0';
1385 break;
1386 case 'w':
1387 /* use even incomplete args */
1388 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1389 24 * (int)naml);
1390 break;
1393 if (res == NULL) {
1394 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1395 r->parse, ln, (int)(stesc - buf->buf),
1396 "%.*s", (int)naml, stnam);
1397 res = "";
1398 } else if (buf->sz + strlen(res) > SHRT_MAX) {
1399 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1400 ln, (int)(stesc - buf->buf), NULL);
1401 return ROFF_IGN;
1404 /* Replace the escape sequence by the string. */
1406 *stesc = '\0';
1407 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1408 buf->buf, res, cp) + 1;
1410 /* Prepare for the next replacement. */
1412 start = nbuf + pos;
1413 stesc = nbuf + (stesc - buf->buf) + strlen(res);
1414 free(buf->buf);
1415 buf->buf = nbuf;
1417 return ROFF_CONT;
1421 * Process text streams.
1423 static enum rofferr
1424 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1426 size_t sz;
1427 const char *start;
1428 char *p;
1429 int isz;
1430 enum mandoc_esc esc;
1432 /* Spring the input line trap. */
1434 if (roffit_lines == 1) {
1435 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1436 free(buf->buf);
1437 buf->buf = p;
1438 buf->sz = isz + 1;
1439 *offs = 0;
1440 free(roffit_macro);
1441 roffit_lines = 0;
1442 return ROFF_REPARSE;
1443 } else if (roffit_lines > 1)
1444 --roffit_lines;
1446 if (roffce_node != NULL && buf->buf[pos] != '\0') {
1447 if (roffce_lines < 1) {
1448 r->man->last = roffce_node;
1449 r->man->next = ROFF_NEXT_SIBLING;
1450 roffce_lines = 0;
1451 roffce_node = NULL;
1452 } else
1453 roffce_lines--;
1456 /* Convert all breakable hyphens into ASCII_HYPH. */
1458 start = p = buf->buf + pos;
1460 while (*p != '\0') {
1461 sz = strcspn(p, "-\\");
1462 p += sz;
1464 if (*p == '\0')
1465 break;
1467 if (*p == '\\') {
1468 /* Skip over escapes. */
1469 p++;
1470 esc = mandoc_escape((const char **)&p, NULL, NULL);
1471 if (esc == ESCAPE_ERROR)
1472 break;
1473 while (*p == '-')
1474 p++;
1475 continue;
1476 } else if (p == start) {
1477 p++;
1478 continue;
1481 if (isalpha((unsigned char)p[-1]) &&
1482 isalpha((unsigned char)p[1]))
1483 *p = ASCII_HYPH;
1484 p++;
1486 return ROFF_CONT;
1489 enum rofferr
1490 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1492 enum roff_tok t;
1493 enum rofferr e;
1494 int pos; /* parse point */
1495 int spos; /* saved parse point for messages */
1496 int ppos; /* original offset in buf->buf */
1497 int ctl; /* macro line (boolean) */
1499 ppos = pos = *offs;
1501 /* Handle in-line equation delimiters. */
1503 if (r->tbl == NULL &&
1504 r->last_eqn != NULL && r->last_eqn->delim &&
1505 (r->eqn == NULL || r->eqn_inline)) {
1506 e = roff_eqndelim(r, buf, pos);
1507 if (e == ROFF_REPARSE)
1508 return e;
1509 assert(e == ROFF_CONT);
1512 /* Expand some escape sequences. */
1514 e = roff_res(r, buf, ln, pos);
1515 if (e == ROFF_IGN || e == ROFF_APPEND)
1516 return e;
1517 assert(e == ROFF_CONT);
1519 ctl = roff_getcontrol(r, buf->buf, &pos);
1522 * First, if a scope is open and we're not a macro, pass the
1523 * text through the macro's filter.
1524 * Equations process all content themselves.
1525 * Tables process almost all content themselves, but we want
1526 * to warn about macros before passing it there.
1529 if (r->last != NULL && ! ctl) {
1530 t = r->last->tok;
1531 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1532 if (e == ROFF_IGN)
1533 return e;
1534 assert(e == ROFF_CONT);
1536 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1537 eqn_read(r->eqn, buf->buf + ppos);
1538 return ROFF_IGN;
1540 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1541 tbl_read(r->tbl, ln, buf->buf, ppos);
1542 roff_addtbl(r->man, r->tbl);
1543 return ROFF_IGN;
1545 if ( ! ctl)
1546 return roff_parsetext(r, buf, pos, offs);
1548 /* Skip empty request lines. */
1550 if (buf->buf[pos] == '"') {
1551 mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse,
1552 ln, pos, NULL);
1553 return ROFF_IGN;
1554 } else if (buf->buf[pos] == '\0')
1555 return ROFF_IGN;
1558 * If a scope is open, go to the child handler for that macro,
1559 * as it may want to preprocess before doing anything with it.
1560 * Don't do so if an equation is open.
1563 if (r->last) {
1564 t = r->last->tok;
1565 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1568 /* No scope is open. This is a new request or macro. */
1570 spos = pos;
1571 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1573 /* Tables ignore most macros. */
1575 if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS ||
1576 t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) {
1577 mandoc_msg(MANDOCERR_TBLMACRO, r->parse,
1578 ln, pos, buf->buf + spos);
1579 if (t != TOKEN_NONE)
1580 return ROFF_IGN;
1581 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1582 pos++;
1583 while (buf->buf[pos] == ' ')
1584 pos++;
1585 tbl_read(r->tbl, ln, buf->buf, pos);
1586 roff_addtbl(r->man, r->tbl);
1587 return ROFF_IGN;
1590 /* For now, let high level macros abort .ce mode. */
1592 if (ctl && roffce_node != NULL &&
1593 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
1594 t == ROFF_TH || t == ROFF_TS)) {
1595 r->man->last = roffce_node;
1596 r->man->next = ROFF_NEXT_SIBLING;
1597 roffce_lines = 0;
1598 roffce_node = NULL;
1602 * This is neither a roff request nor a user-defined macro.
1603 * Let the standard macro set parsers handle it.
1606 if (t == TOKEN_NONE)
1607 return ROFF_CONT;
1609 /* Execute a roff request or a user defined macro. */
1611 return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs);
1614 void
1615 roff_endparse(struct roff *r)
1617 if (r->last != NULL)
1618 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1619 r->last->line, r->last->col,
1620 roff_name[r->last->tok]);
1622 if (r->eqn != NULL) {
1623 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1624 r->eqn->node->line, r->eqn->node->pos, "EQ");
1625 eqn_parse(r->eqn);
1626 r->eqn = NULL;
1629 if (r->tbl != NULL) {
1630 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1631 r->tbl->line, r->tbl->pos, "TS");
1632 tbl_end(r->tbl);
1633 r->tbl = NULL;
1638 * Parse a roff node's type from the input buffer. This must be in the
1639 * form of ".foo xxx" in the usual way.
1641 static enum roff_tok
1642 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1644 char *cp;
1645 const char *mac;
1646 size_t maclen;
1647 int deftype;
1648 enum roff_tok t;
1650 cp = buf + *pos;
1652 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1653 return TOKEN_NONE;
1655 mac = cp;
1656 maclen = roff_getname(r, &cp, ln, ppos);
1658 deftype = ROFFDEF_USER | ROFFDEF_REN;
1659 r->current_string = roff_getstrn(r, mac, maclen, &deftype);
1660 switch (deftype) {
1661 case ROFFDEF_USER:
1662 t = ROFF_USERDEF;
1663 break;
1664 case ROFFDEF_REN:
1665 t = ROFF_RENAMED;
1666 break;
1667 default:
1668 t = roffhash_find(r->reqtab, mac, maclen);
1669 break;
1671 if (t != TOKEN_NONE)
1672 *pos = cp - buf;
1673 else if (deftype == ROFFDEF_UNDEF) {
1674 /* Using an undefined macro defines it to be empty. */
1675 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
1676 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
1678 return t;
1681 /* --- handling of request blocks ----------------------------------------- */
1683 static enum rofferr
1684 roff_cblock(ROFF_ARGS)
1688 * A block-close `..' should only be invoked as a child of an
1689 * ignore macro, otherwise raise a warning and just ignore it.
1692 if (r->last == NULL) {
1693 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1694 ln, ppos, "..");
1695 return ROFF_IGN;
1698 switch (r->last->tok) {
1699 case ROFF_am:
1700 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
1701 case ROFF_ami:
1702 case ROFF_de:
1703 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1704 case ROFF_dei:
1705 case ROFF_ig:
1706 break;
1707 default:
1708 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1709 ln, ppos, "..");
1710 return ROFF_IGN;
1713 if (buf->buf[pos] != '\0')
1714 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
1715 ".. %s", buf->buf + pos);
1717 roffnode_pop(r);
1718 roffnode_cleanscope(r);
1719 return ROFF_IGN;
1723 static void
1724 roffnode_cleanscope(struct roff *r)
1727 while (r->last) {
1728 if (--r->last->endspan != 0)
1729 break;
1730 roffnode_pop(r);
1734 static void
1735 roff_ccond(struct roff *r, int ln, int ppos)
1738 if (NULL == r->last) {
1739 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1740 ln, ppos, "\\}");
1741 return;
1744 switch (r->last->tok) {
1745 case ROFF_el:
1746 case ROFF_ie:
1747 case ROFF_if:
1748 break;
1749 default:
1750 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1751 ln, ppos, "\\}");
1752 return;
1755 if (r->last->endspan > -1) {
1756 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1757 ln, ppos, "\\}");
1758 return;
1761 roffnode_pop(r);
1762 roffnode_cleanscope(r);
1763 return;
1766 static enum rofferr
1767 roff_block(ROFF_ARGS)
1769 const char *name, *value;
1770 char *call, *cp, *iname, *rname;
1771 size_t csz, namesz, rsz;
1772 int deftype;
1774 /* Ignore groff compatibility mode for now. */
1776 if (tok == ROFF_de1)
1777 tok = ROFF_de;
1778 else if (tok == ROFF_dei1)
1779 tok = ROFF_dei;
1780 else if (tok == ROFF_am1)
1781 tok = ROFF_am;
1782 else if (tok == ROFF_ami1)
1783 tok = ROFF_ami;
1785 /* Parse the macro name argument. */
1787 cp = buf->buf + pos;
1788 if (tok == ROFF_ig) {
1789 iname = NULL;
1790 namesz = 0;
1791 } else {
1792 iname = cp;
1793 namesz = roff_getname(r, &cp, ln, ppos);
1794 iname[namesz] = '\0';
1797 /* Resolve the macro name argument if it is indirect. */
1799 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1800 deftype = ROFFDEF_USER;
1801 name = roff_getstrn(r, iname, namesz, &deftype);
1802 if (name == NULL) {
1803 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1804 r->parse, ln, (int)(iname - buf->buf),
1805 "%.*s", (int)namesz, iname);
1806 namesz = 0;
1807 } else
1808 namesz = strlen(name);
1809 } else
1810 name = iname;
1812 if (namesz == 0 && tok != ROFF_ig) {
1813 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
1814 ln, ppos, roff_name[tok]);
1815 return ROFF_IGN;
1818 roffnode_push(r, tok, name, ln, ppos);
1821 * At the beginning of a `de' macro, clear the existing string
1822 * with the same name, if there is one. New content will be
1823 * appended from roff_block_text() in multiline mode.
1826 if (tok == ROFF_de || tok == ROFF_dei) {
1827 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
1828 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
1829 } else if (tok == ROFF_am || tok == ROFF_ami) {
1830 deftype = ROFFDEF_ANY;
1831 value = roff_getstrn(r, iname, namesz, &deftype);
1832 switch (deftype) { /* Before appending, ... */
1833 case ROFFDEF_PRE: /* copy predefined to user-defined. */
1834 roff_setstrn(&r->strtab, name, namesz,
1835 value, strlen(value), 0);
1836 break;
1837 case ROFFDEF_REN: /* call original standard macro. */
1838 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
1839 (int)strlen(value), value);
1840 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
1841 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
1842 free(call);
1843 break;
1844 case ROFFDEF_STD: /* rename and call standard macro. */
1845 rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
1846 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
1847 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
1848 (int)rsz, rname);
1849 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
1850 free(call);
1851 free(rname);
1852 break;
1853 default:
1854 break;
1858 if (*cp == '\0')
1859 return ROFF_IGN;
1861 /* Get the custom end marker. */
1863 iname = cp;
1864 namesz = roff_getname(r, &cp, ln, ppos);
1866 /* Resolve the end marker if it is indirect. */
1868 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1869 deftype = ROFFDEF_USER;
1870 name = roff_getstrn(r, iname, namesz, &deftype);
1871 if (name == NULL) {
1872 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1873 r->parse, ln, (int)(iname - buf->buf),
1874 "%.*s", (int)namesz, iname);
1875 namesz = 0;
1876 } else
1877 namesz = strlen(name);
1878 } else
1879 name = iname;
1881 if (namesz)
1882 r->last->end = mandoc_strndup(name, namesz);
1884 if (*cp != '\0')
1885 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
1886 ln, pos, ".%s ... %s", roff_name[tok], cp);
1888 return ROFF_IGN;
1891 static enum rofferr
1892 roff_block_sub(ROFF_ARGS)
1894 enum roff_tok t;
1895 int i, j;
1898 * First check whether a custom macro exists at this level. If
1899 * it does, then check against it. This is some of groff's
1900 * stranger behaviours. If we encountered a custom end-scope
1901 * tag and that tag also happens to be a "real" macro, then we
1902 * need to try interpreting it again as a real macro. If it's
1903 * not, then return ignore. Else continue.
1906 if (r->last->end) {
1907 for (i = pos, j = 0; r->last->end[j]; j++, i++)
1908 if (buf->buf[i] != r->last->end[j])
1909 break;
1911 if (r->last->end[j] == '\0' &&
1912 (buf->buf[i] == '\0' ||
1913 buf->buf[i] == ' ' ||
1914 buf->buf[i] == '\t')) {
1915 roffnode_pop(r);
1916 roffnode_cleanscope(r);
1918 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
1919 i++;
1921 pos = i;
1922 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
1923 TOKEN_NONE)
1924 return ROFF_RERUN;
1925 return ROFF_IGN;
1930 * If we have no custom end-query or lookup failed, then try
1931 * pulling it out of the hashtable.
1934 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1936 if (t != ROFF_cblock) {
1937 if (tok != ROFF_ig)
1938 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
1939 return ROFF_IGN;
1942 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
1945 static enum rofferr
1946 roff_block_text(ROFF_ARGS)
1949 if (tok != ROFF_ig)
1950 roff_setstr(r, r->last->name, buf->buf + pos, 2);
1952 return ROFF_IGN;
1955 static enum rofferr
1956 roff_cond_sub(ROFF_ARGS)
1958 enum roff_tok t;
1959 char *ep;
1960 int rr;
1962 rr = r->last->rule;
1963 roffnode_cleanscope(r);
1966 * If `\}' occurs on a macro line without a preceding macro,
1967 * drop the line completely.
1970 ep = buf->buf + pos;
1971 if (ep[0] == '\\' && ep[1] == '}')
1972 rr = 0;
1974 /* Always check for the closing delimiter `\}'. */
1976 while ((ep = strchr(ep, '\\')) != NULL) {
1977 switch (ep[1]) {
1978 case '}':
1979 memmove(ep, ep + 2, strlen(ep + 2) + 1);
1980 roff_ccond(r, ln, ep - buf->buf);
1981 break;
1982 case '\0':
1983 ++ep;
1984 break;
1985 default:
1986 ep += 2;
1987 break;
1992 * Fully handle known macros when they are structurally
1993 * required or when the conditional evaluated to true.
1996 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1997 return t != TOKEN_NONE && (rr || roffs[t].flags & ROFFMAC_STRUCT)
1998 ? (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs) : rr
1999 ? ROFF_CONT : ROFF_IGN;
2002 static enum rofferr
2003 roff_cond_text(ROFF_ARGS)
2005 char *ep;
2006 int rr;
2008 rr = r->last->rule;
2009 roffnode_cleanscope(r);
2011 ep = buf->buf + pos;
2012 while ((ep = strchr(ep, '\\')) != NULL) {
2013 if (*(++ep) == '}') {
2014 *ep = '&';
2015 roff_ccond(r, ln, ep - buf->buf - 1);
2017 if (*ep != '\0')
2018 ++ep;
2020 return rr ? ROFF_CONT : ROFF_IGN;
2023 /* --- handling of numeric and conditional expressions -------------------- */
2026 * Parse a single signed integer number. Stop at the first non-digit.
2027 * If there is at least one digit, return success and advance the
2028 * parse point, else return failure and let the parse point unchanged.
2029 * Ignore overflows, treat them just like the C language.
2031 static int
2032 roff_getnum(const char *v, int *pos, int *res, int flags)
2034 int myres, scaled, n, p;
2036 if (NULL == res)
2037 res = &myres;
2039 p = *pos;
2040 n = v[p] == '-';
2041 if (n || v[p] == '+')
2042 p++;
2044 if (flags & ROFFNUM_WHITE)
2045 while (isspace((unsigned char)v[p]))
2046 p++;
2048 for (*res = 0; isdigit((unsigned char)v[p]); p++)
2049 *res = 10 * *res + v[p] - '0';
2050 if (p == *pos + n)
2051 return 0;
2053 if (n)
2054 *res = -*res;
2056 /* Each number may be followed by one optional scaling unit. */
2058 switch (v[p]) {
2059 case 'f':
2060 scaled = *res * 65536;
2061 break;
2062 case 'i':
2063 scaled = *res * 240;
2064 break;
2065 case 'c':
2066 scaled = *res * 240 / 2.54;
2067 break;
2068 case 'v':
2069 case 'P':
2070 scaled = *res * 40;
2071 break;
2072 case 'm':
2073 case 'n':
2074 scaled = *res * 24;
2075 break;
2076 case 'p':
2077 scaled = *res * 10 / 3;
2078 break;
2079 case 'u':
2080 scaled = *res;
2081 break;
2082 case 'M':
2083 scaled = *res * 6 / 25;
2084 break;
2085 default:
2086 scaled = *res;
2087 p--;
2088 break;
2090 if (flags & ROFFNUM_SCALE)
2091 *res = scaled;
2093 *pos = p + 1;
2094 return 1;
2098 * Evaluate a string comparison condition.
2099 * The first character is the delimiter.
2100 * Succeed if the string up to its second occurrence
2101 * matches the string up to its third occurence.
2102 * Advance the cursor after the third occurrence
2103 * or lacking that, to the end of the line.
2105 static int
2106 roff_evalstrcond(const char *v, int *pos)
2108 const char *s1, *s2, *s3;
2109 int match;
2111 match = 0;
2112 s1 = v + *pos; /* initial delimiter */
2113 s2 = s1 + 1; /* for scanning the first string */
2114 s3 = strchr(s2, *s1); /* for scanning the second string */
2116 if (NULL == s3) /* found no middle delimiter */
2117 goto out;
2119 while ('\0' != *++s3) {
2120 if (*s2 != *s3) { /* mismatch */
2121 s3 = strchr(s3, *s1);
2122 break;
2124 if (*s3 == *s1) { /* found the final delimiter */
2125 match = 1;
2126 break;
2128 s2++;
2131 out:
2132 if (NULL == s3)
2133 s3 = strchr(s2, '\0');
2134 else if (*s3 != '\0')
2135 s3++;
2136 *pos = s3 - v;
2137 return match;
2141 * Evaluate an optionally negated single character, numerical,
2142 * or string condition.
2144 static int
2145 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2147 char *cp, *name;
2148 size_t sz;
2149 int deftype, number, savepos, istrue, wanttrue;
2151 if ('!' == v[*pos]) {
2152 wanttrue = 0;
2153 (*pos)++;
2154 } else
2155 wanttrue = 1;
2157 switch (v[*pos]) {
2158 case '\0':
2159 return 0;
2160 case 'n':
2161 case 'o':
2162 (*pos)++;
2163 return wanttrue;
2164 case 'c':
2165 case 'e':
2166 case 't':
2167 case 'v':
2168 (*pos)++;
2169 return !wanttrue;
2170 case 'd':
2171 case 'r':
2172 cp = v + *pos + 1;
2173 while (*cp == ' ')
2174 cp++;
2175 name = cp;
2176 sz = roff_getname(r, &cp, ln, cp - v);
2177 if (sz == 0)
2178 istrue = 0;
2179 else if (v[*pos] == 'r')
2180 istrue = roff_hasregn(r, name, sz);
2181 else {
2182 deftype = ROFFDEF_ANY;
2183 roff_getstrn(r, name, sz, &deftype);
2184 istrue = !!deftype;
2186 *pos = cp - v;
2187 return istrue == wanttrue;
2188 default:
2189 break;
2192 savepos = *pos;
2193 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2194 return (number > 0) == wanttrue;
2195 else if (*pos == savepos)
2196 return roff_evalstrcond(v, pos) == wanttrue;
2197 else
2198 return 0;
2201 static enum rofferr
2202 roff_line_ignore(ROFF_ARGS)
2205 return ROFF_IGN;
2208 static enum rofferr
2209 roff_insec(ROFF_ARGS)
2212 mandoc_msg(MANDOCERR_REQ_INSEC, r->parse,
2213 ln, ppos, roff_name[tok]);
2214 return ROFF_IGN;
2217 static enum rofferr
2218 roff_unsupp(ROFF_ARGS)
2221 mandoc_msg(MANDOCERR_REQ_UNSUPP, r->parse,
2222 ln, ppos, roff_name[tok]);
2223 return ROFF_IGN;
2226 static enum rofferr
2227 roff_cond(ROFF_ARGS)
2230 roffnode_push(r, tok, NULL, ln, ppos);
2233 * An `.el' has no conditional body: it will consume the value
2234 * of the current rstack entry set in prior `ie' calls or
2235 * defaults to DENY.
2237 * If we're not an `el', however, then evaluate the conditional.
2240 r->last->rule = tok == ROFF_el ?
2241 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2242 roff_evalcond(r, ln, buf->buf, &pos);
2245 * An if-else will put the NEGATION of the current evaluated
2246 * conditional into the stack of rules.
2249 if (tok == ROFF_ie) {
2250 if (r->rstackpos + 1 == r->rstacksz) {
2251 r->rstacksz += 16;
2252 r->rstack = mandoc_reallocarray(r->rstack,
2253 r->rstacksz, sizeof(int));
2255 r->rstack[++r->rstackpos] = !r->last->rule;
2258 /* If the parent has false as its rule, then so do we. */
2260 if (r->last->parent && !r->last->parent->rule)
2261 r->last->rule = 0;
2264 * Determine scope.
2265 * If there is nothing on the line after the conditional,
2266 * not even whitespace, use next-line scope.
2269 if (buf->buf[pos] == '\0') {
2270 r->last->endspan = 2;
2271 goto out;
2274 while (buf->buf[pos] == ' ')
2275 pos++;
2277 /* An opening brace requests multiline scope. */
2279 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2280 r->last->endspan = -1;
2281 pos += 2;
2282 while (buf->buf[pos] == ' ')
2283 pos++;
2284 goto out;
2288 * Anything else following the conditional causes
2289 * single-line scope. Warn if the scope contains
2290 * nothing but trailing whitespace.
2293 if (buf->buf[pos] == '\0')
2294 mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
2295 ln, ppos, roff_name[tok]);
2297 r->last->endspan = 1;
2299 out:
2300 *offs = pos;
2301 return ROFF_RERUN;
2304 static enum rofferr
2305 roff_ds(ROFF_ARGS)
2307 char *string;
2308 const char *name;
2309 size_t namesz;
2311 /* Ignore groff compatibility mode for now. */
2313 if (tok == ROFF_ds1)
2314 tok = ROFF_ds;
2315 else if (tok == ROFF_as1)
2316 tok = ROFF_as;
2319 * The first word is the name of the string.
2320 * If it is empty or terminated by an escape sequence,
2321 * abort the `ds' request without defining anything.
2324 name = string = buf->buf + pos;
2325 if (*name == '\0')
2326 return ROFF_IGN;
2328 namesz = roff_getname(r, &string, ln, pos);
2329 if (name[namesz] == '\\')
2330 return ROFF_IGN;
2332 /* Read past the initial double-quote, if any. */
2333 if (*string == '"')
2334 string++;
2336 /* The rest is the value. */
2337 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2338 ROFF_as == tok);
2339 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2340 return ROFF_IGN;
2344 * Parse a single operator, one or two characters long.
2345 * If the operator is recognized, return success and advance the
2346 * parse point, else return failure and let the parse point unchanged.
2348 static int
2349 roff_getop(const char *v, int *pos, char *res)
2352 *res = v[*pos];
2354 switch (*res) {
2355 case '+':
2356 case '-':
2357 case '*':
2358 case '/':
2359 case '%':
2360 case '&':
2361 case ':':
2362 break;
2363 case '<':
2364 switch (v[*pos + 1]) {
2365 case '=':
2366 *res = 'l';
2367 (*pos)++;
2368 break;
2369 case '>':
2370 *res = '!';
2371 (*pos)++;
2372 break;
2373 case '?':
2374 *res = 'i';
2375 (*pos)++;
2376 break;
2377 default:
2378 break;
2380 break;
2381 case '>':
2382 switch (v[*pos + 1]) {
2383 case '=':
2384 *res = 'g';
2385 (*pos)++;
2386 break;
2387 case '?':
2388 *res = 'a';
2389 (*pos)++;
2390 break;
2391 default:
2392 break;
2394 break;
2395 case '=':
2396 if ('=' == v[*pos + 1])
2397 (*pos)++;
2398 break;
2399 default:
2400 return 0;
2402 (*pos)++;
2404 return *res;
2408 * Evaluate either a parenthesized numeric expression
2409 * or a single signed integer number.
2411 static int
2412 roff_evalpar(struct roff *r, int ln,
2413 const char *v, int *pos, int *res, int flags)
2416 if ('(' != v[*pos])
2417 return roff_getnum(v, pos, res, flags);
2419 (*pos)++;
2420 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2421 return 0;
2424 * Omission of the closing parenthesis
2425 * is an error in validation mode,
2426 * but ignored in evaluation mode.
2429 if (')' == v[*pos])
2430 (*pos)++;
2431 else if (NULL == res)
2432 return 0;
2434 return 1;
2438 * Evaluate a complete numeric expression.
2439 * Proceed left to right, there is no concept of precedence.
2441 static int
2442 roff_evalnum(struct roff *r, int ln, const char *v,
2443 int *pos, int *res, int flags)
2445 int mypos, operand2;
2446 char operator;
2448 if (NULL == pos) {
2449 mypos = 0;
2450 pos = &mypos;
2453 if (flags & ROFFNUM_WHITE)
2454 while (isspace((unsigned char)v[*pos]))
2455 (*pos)++;
2457 if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2458 return 0;
2460 while (1) {
2461 if (flags & ROFFNUM_WHITE)
2462 while (isspace((unsigned char)v[*pos]))
2463 (*pos)++;
2465 if ( ! roff_getop(v, pos, &operator))
2466 break;
2468 if (flags & ROFFNUM_WHITE)
2469 while (isspace((unsigned char)v[*pos]))
2470 (*pos)++;
2472 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2473 return 0;
2475 if (flags & ROFFNUM_WHITE)
2476 while (isspace((unsigned char)v[*pos]))
2477 (*pos)++;
2479 if (NULL == res)
2480 continue;
2482 switch (operator) {
2483 case '+':
2484 *res += operand2;
2485 break;
2486 case '-':
2487 *res -= operand2;
2488 break;
2489 case '*':
2490 *res *= operand2;
2491 break;
2492 case '/':
2493 if (operand2 == 0) {
2494 mandoc_msg(MANDOCERR_DIVZERO,
2495 r->parse, ln, *pos, v);
2496 *res = 0;
2497 break;
2499 *res /= operand2;
2500 break;
2501 case '%':
2502 if (operand2 == 0) {
2503 mandoc_msg(MANDOCERR_DIVZERO,
2504 r->parse, ln, *pos, v);
2505 *res = 0;
2506 break;
2508 *res %= operand2;
2509 break;
2510 case '<':
2511 *res = *res < operand2;
2512 break;
2513 case '>':
2514 *res = *res > operand2;
2515 break;
2516 case 'l':
2517 *res = *res <= operand2;
2518 break;
2519 case 'g':
2520 *res = *res >= operand2;
2521 break;
2522 case '=':
2523 *res = *res == operand2;
2524 break;
2525 case '!':
2526 *res = *res != operand2;
2527 break;
2528 case '&':
2529 *res = *res && operand2;
2530 break;
2531 case ':':
2532 *res = *res || operand2;
2533 break;
2534 case 'i':
2535 if (operand2 < *res)
2536 *res = operand2;
2537 break;
2538 case 'a':
2539 if (operand2 > *res)
2540 *res = operand2;
2541 break;
2542 default:
2543 abort();
2546 return 1;
2549 /* --- register management ------------------------------------------------ */
2551 void
2552 roff_setreg(struct roff *r, const char *name, int val, char sign)
2554 roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
2557 static void
2558 roff_setregn(struct roff *r, const char *name, size_t len,
2559 int val, char sign, int step)
2561 struct roffreg *reg;
2563 /* Search for an existing register with the same name. */
2564 reg = r->regtab;
2566 while (reg != NULL && (reg->key.sz != len ||
2567 strncmp(reg->key.p, name, len) != 0))
2568 reg = reg->next;
2570 if (NULL == reg) {
2571 /* Create a new register. */
2572 reg = mandoc_malloc(sizeof(struct roffreg));
2573 reg->key.p = mandoc_strndup(name, len);
2574 reg->key.sz = len;
2575 reg->val = 0;
2576 reg->step = 0;
2577 reg->next = r->regtab;
2578 r->regtab = reg;
2581 if ('+' == sign)
2582 reg->val += val;
2583 else if ('-' == sign)
2584 reg->val -= val;
2585 else
2586 reg->val = val;
2587 if (step != INT_MIN)
2588 reg->step = step;
2592 * Handle some predefined read-only number registers.
2593 * For now, return -1 if the requested register is not predefined;
2594 * in case a predefined read-only register having the value -1
2595 * were to turn up, another special value would have to be chosen.
2597 static int
2598 roff_getregro(const struct roff *r, const char *name)
2601 switch (*name) {
2602 case '$': /* Number of arguments of the last macro evaluated. */
2603 return r->argc;
2604 case 'A': /* ASCII approximation mode is always off. */
2605 return 0;
2606 case 'g': /* Groff compatibility mode is always on. */
2607 return 1;
2608 case 'H': /* Fixed horizontal resolution. */
2609 return 24;
2610 case 'j': /* Always adjust left margin only. */
2611 return 0;
2612 case 'T': /* Some output device is always defined. */
2613 return 1;
2614 case 'V': /* Fixed vertical resolution. */
2615 return 40;
2616 default:
2617 return -1;
2622 roff_getreg(struct roff *r, const char *name)
2624 return roff_getregn(r, name, strlen(name), '\0');
2627 static int
2628 roff_getregn(struct roff *r, const char *name, size_t len, char sign)
2630 struct roffreg *reg;
2631 int val;
2633 if ('.' == name[0] && 2 == len) {
2634 val = roff_getregro(r, name + 1);
2635 if (-1 != val)
2636 return val;
2639 for (reg = r->regtab; reg; reg = reg->next) {
2640 if (len == reg->key.sz &&
2641 0 == strncmp(name, reg->key.p, len)) {
2642 switch (sign) {
2643 case '+':
2644 reg->val += reg->step;
2645 break;
2646 case '-':
2647 reg->val -= reg->step;
2648 break;
2649 default:
2650 break;
2652 return reg->val;
2656 roff_setregn(r, name, len, 0, '\0', INT_MIN);
2657 return 0;
2660 static int
2661 roff_hasregn(const struct roff *r, const char *name, size_t len)
2663 struct roffreg *reg;
2664 int val;
2666 if ('.' == name[0] && 2 == len) {
2667 val = roff_getregro(r, name + 1);
2668 if (-1 != val)
2669 return 1;
2672 for (reg = r->regtab; reg; reg = reg->next)
2673 if (len == reg->key.sz &&
2674 0 == strncmp(name, reg->key.p, len))
2675 return 1;
2677 return 0;
2680 static void
2681 roff_freereg(struct roffreg *reg)
2683 struct roffreg *old_reg;
2685 while (NULL != reg) {
2686 free(reg->key.p);
2687 old_reg = reg;
2688 reg = reg->next;
2689 free(old_reg);
2693 static enum rofferr
2694 roff_nr(ROFF_ARGS)
2696 char *key, *val, *step;
2697 size_t keysz;
2698 int iv, is, len;
2699 char sign;
2701 key = val = buf->buf + pos;
2702 if (*key == '\0')
2703 return ROFF_IGN;
2705 keysz = roff_getname(r, &val, ln, pos);
2706 if (key[keysz] == '\\')
2707 return ROFF_IGN;
2709 sign = *val;
2710 if (sign == '+' || sign == '-')
2711 val++;
2713 len = 0;
2714 if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0)
2715 return ROFF_IGN;
2717 step = val + len;
2718 while (isspace((unsigned char)*step))
2719 step++;
2720 if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0)
2721 is = INT_MIN;
2723 roff_setregn(r, key, keysz, iv, sign, is);
2724 return ROFF_IGN;
2727 static enum rofferr
2728 roff_rr(ROFF_ARGS)
2730 struct roffreg *reg, **prev;
2731 char *name, *cp;
2732 size_t namesz;
2734 name = cp = buf->buf + pos;
2735 if (*name == '\0')
2736 return ROFF_IGN;
2737 namesz = roff_getname(r, &cp, ln, pos);
2738 name[namesz] = '\0';
2740 prev = &r->regtab;
2741 while (1) {
2742 reg = *prev;
2743 if (reg == NULL || !strcmp(name, reg->key.p))
2744 break;
2745 prev = &reg->next;
2747 if (reg != NULL) {
2748 *prev = reg->next;
2749 free(reg->key.p);
2750 free(reg);
2752 return ROFF_IGN;
2755 /* --- handler functions for roff requests -------------------------------- */
2757 static enum rofferr
2758 roff_rm(ROFF_ARGS)
2760 const char *name;
2761 char *cp;
2762 size_t namesz;
2764 cp = buf->buf + pos;
2765 while (*cp != '\0') {
2766 name = cp;
2767 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
2768 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
2769 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2770 if (name[namesz] == '\\')
2771 break;
2773 return ROFF_IGN;
2776 static enum rofferr
2777 roff_it(ROFF_ARGS)
2779 int iv;
2781 /* Parse the number of lines. */
2783 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
2784 mandoc_msg(MANDOCERR_IT_NONUM, r->parse,
2785 ln, ppos, buf->buf + 1);
2786 return ROFF_IGN;
2789 while (isspace((unsigned char)buf->buf[pos]))
2790 pos++;
2793 * Arm the input line trap.
2794 * Special-casing "an-trap" is an ugly workaround to cope
2795 * with DocBook stupidly fiddling with man(7) internals.
2798 roffit_lines = iv;
2799 roffit_macro = mandoc_strdup(iv != 1 ||
2800 strcmp(buf->buf + pos, "an-trap") ?
2801 buf->buf + pos : "br");
2802 return ROFF_IGN;
2805 static enum rofferr
2806 roff_Dd(ROFF_ARGS)
2808 int mask;
2809 enum roff_tok t, te;
2811 switch (tok) {
2812 case ROFF_Dd:
2813 tok = MDOC_Dd;
2814 te = MDOC_MAX;
2815 if (r->format == 0)
2816 r->format = MPARSE_MDOC;
2817 mask = MPARSE_MDOC | MPARSE_QUICK;
2818 break;
2819 case ROFF_TH:
2820 tok = MAN_TH;
2821 te = MAN_MAX;
2822 if (r->format == 0)
2823 r->format = MPARSE_MAN;
2824 mask = MPARSE_QUICK;
2825 break;
2826 default:
2827 abort();
2829 if ((r->options & mask) == 0)
2830 for (t = tok; t < te; t++)
2831 roff_setstr(r, roff_name[t], NULL, 0);
2832 return ROFF_CONT;
2835 static enum rofferr
2836 roff_TE(ROFF_ARGS)
2838 if (r->tbl == NULL) {
2839 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2840 ln, ppos, "TE");
2841 return ROFF_IGN;
2843 if (tbl_end(r->tbl) == 0) {
2844 r->tbl = NULL;
2845 free(buf->buf);
2846 buf->buf = mandoc_strdup(".sp");
2847 buf->sz = 4;
2848 *offs = 0;
2849 return ROFF_REPARSE;
2851 r->tbl = NULL;
2852 return ROFF_IGN;
2855 static enum rofferr
2856 roff_T_(ROFF_ARGS)
2859 if (NULL == r->tbl)
2860 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2861 ln, ppos, "T&");
2862 else
2863 tbl_restart(ln, ppos, r->tbl);
2865 return ROFF_IGN;
2869 * Handle in-line equation delimiters.
2871 static enum rofferr
2872 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
2874 char *cp1, *cp2;
2875 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
2878 * Outside equations, look for an opening delimiter.
2879 * If we are inside an equation, we already know it is
2880 * in-line, or this function wouldn't have been called;
2881 * so look for a closing delimiter.
2884 cp1 = buf->buf + pos;
2885 cp2 = strchr(cp1, r->eqn == NULL ?
2886 r->last_eqn->odelim : r->last_eqn->cdelim);
2887 if (cp2 == NULL)
2888 return ROFF_CONT;
2890 *cp2++ = '\0';
2891 bef_pr = bef_nl = aft_nl = aft_pr = "";
2893 /* Handle preceding text, protecting whitespace. */
2895 if (*buf->buf != '\0') {
2896 if (r->eqn == NULL)
2897 bef_pr = "\\&";
2898 bef_nl = "\n";
2902 * Prepare replacing the delimiter with an equation macro
2903 * and drop leading white space from the equation.
2906 if (r->eqn == NULL) {
2907 while (*cp2 == ' ')
2908 cp2++;
2909 mac = ".EQ";
2910 } else
2911 mac = ".EN";
2913 /* Handle following text, protecting whitespace. */
2915 if (*cp2 != '\0') {
2916 aft_nl = "\n";
2917 if (r->eqn != NULL)
2918 aft_pr = "\\&";
2921 /* Do the actual replacement. */
2923 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
2924 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
2925 free(buf->buf);
2926 buf->buf = cp1;
2928 /* Toggle the in-line state of the eqn subsystem. */
2930 r->eqn_inline = r->eqn == NULL;
2931 return ROFF_REPARSE;
2934 static enum rofferr
2935 roff_EQ(ROFF_ARGS)
2937 struct roff_node *n;
2939 if (r->man->macroset == MACROSET_MAN)
2940 man_breakscope(r->man, ROFF_EQ);
2941 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
2942 if (ln > r->man->last->line)
2943 n->flags |= NODE_LINE;
2944 n->eqn = mandoc_calloc(1, sizeof(*n->eqn));
2945 n->eqn->expectargs = UINT_MAX;
2946 roff_node_append(r->man, n);
2947 r->man->next = ROFF_NEXT_SIBLING;
2949 assert(r->eqn == NULL);
2950 if (r->last_eqn == NULL)
2951 r->last_eqn = eqn_alloc(r->parse);
2952 else
2953 eqn_reset(r->last_eqn);
2954 r->eqn = r->last_eqn;
2955 r->eqn->node = n;
2957 if (buf->buf[pos] != '\0')
2958 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
2959 ".EQ %s", buf->buf + pos);
2961 return ROFF_IGN;
2964 static enum rofferr
2965 roff_EN(ROFF_ARGS)
2967 if (r->eqn != NULL) {
2968 eqn_parse(r->eqn);
2969 r->eqn = NULL;
2970 } else
2971 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN");
2972 if (buf->buf[pos] != '\0')
2973 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
2974 "EN %s", buf->buf + pos);
2975 return ROFF_IGN;
2978 static enum rofferr
2979 roff_TS(ROFF_ARGS)
2981 if (r->tbl != NULL) {
2982 mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse,
2983 ln, ppos, "TS breaks TS");
2984 tbl_end(r->tbl);
2986 r->tbl = tbl_alloc(ppos, ln, r->parse);
2987 if (r->last_tbl)
2988 r->last_tbl->next = r->tbl;
2989 else
2990 r->first_tbl = r->tbl;
2991 r->last_tbl = r->tbl;
2992 return ROFF_IGN;
2995 static enum rofferr
2996 roff_onearg(ROFF_ARGS)
2998 struct roff_node *n;
2999 char *cp;
3000 int npos;
3002 if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3003 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3004 tok == ROFF_ti))
3005 man_breakscope(r->man, tok);
3007 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3008 r->man->last = roffce_node;
3009 r->man->next = ROFF_NEXT_SIBLING;
3012 roff_elem_alloc(r->man, ln, ppos, tok);
3013 n = r->man->last;
3015 cp = buf->buf + pos;
3016 if (*cp != '\0') {
3017 while (*cp != '\0' && *cp != ' ')
3018 cp++;
3019 while (*cp == ' ')
3020 *cp++ = '\0';
3021 if (*cp != '\0')
3022 mandoc_vmsg(MANDOCERR_ARG_EXCESS,
3023 r->parse, ln, cp - buf->buf,
3024 "%s ... %s", roff_name[tok], cp);
3025 roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3028 if (tok == ROFF_ce || tok == ROFF_rj) {
3029 if (r->man->last->type == ROFFT_ELEM) {
3030 roff_word_alloc(r->man, ln, pos, "1");
3031 r->man->last->flags |= NODE_NOSRC;
3033 npos = 0;
3034 if (roff_evalnum(r, ln, r->man->last->string, &npos,
3035 &roffce_lines, 0) == 0) {
3036 mandoc_vmsg(MANDOCERR_CE_NONUM,
3037 r->parse, ln, pos, "ce %s", buf->buf + pos);
3038 roffce_lines = 1;
3040 if (roffce_lines < 1) {
3041 r->man->last = r->man->last->parent;
3042 roffce_node = NULL;
3043 roffce_lines = 0;
3044 } else
3045 roffce_node = r->man->last->parent;
3046 } else {
3047 n->flags |= NODE_VALID | NODE_ENDED;
3048 r->man->last = n;
3050 n->flags |= NODE_LINE;
3051 r->man->next = ROFF_NEXT_SIBLING;
3052 return ROFF_IGN;
3055 static enum rofferr
3056 roff_manyarg(ROFF_ARGS)
3058 struct roff_node *n;
3059 char *sp, *ep;
3061 roff_elem_alloc(r->man, ln, ppos, tok);
3062 n = r->man->last;
3064 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3065 while (*ep != '\0' && *ep != ' ')
3066 ep++;
3067 while (*ep == ' ')
3068 *ep++ = '\0';
3069 roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3072 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3073 r->man->last = n;
3074 r->man->next = ROFF_NEXT_SIBLING;
3075 return ROFF_IGN;
3078 static enum rofferr
3079 roff_als(ROFF_ARGS)
3081 char *oldn, *newn, *end, *value;
3082 size_t oldsz, newsz, valsz;
3084 newn = oldn = buf->buf + pos;
3085 if (*newn == '\0')
3086 return ROFF_IGN;
3088 newsz = roff_getname(r, &oldn, ln, pos);
3089 if (newn[newsz] == '\\' || *oldn == '\0')
3090 return ROFF_IGN;
3092 end = oldn;
3093 oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3094 if (oldsz == 0)
3095 return ROFF_IGN;
3097 valsz = mandoc_asprintf(&value, ".%.*s \\$*\\\"\n",
3098 (int)oldsz, oldn);
3099 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3100 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3101 free(value);
3102 return ROFF_IGN;
3105 static enum rofferr
3106 roff_br(ROFF_ARGS)
3108 if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3109 man_breakscope(r->man, ROFF_br);
3110 roff_elem_alloc(r->man, ln, ppos, ROFF_br);
3111 if (buf->buf[pos] != '\0')
3112 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
3113 "%s %s", roff_name[tok], buf->buf + pos);
3114 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3115 r->man->next = ROFF_NEXT_SIBLING;
3116 return ROFF_IGN;
3119 static enum rofferr
3120 roff_cc(ROFF_ARGS)
3122 const char *p;
3124 p = buf->buf + pos;
3126 if (*p == '\0' || (r->control = *p++) == '.')
3127 r->control = '\0';
3129 if (*p != '\0')
3130 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
3131 ln, p - buf->buf, "cc ... %s", p);
3133 return ROFF_IGN;
3136 static enum rofferr
3137 roff_ec(ROFF_ARGS)
3139 const char *p;
3141 p = buf->buf + pos;
3142 if (*p == '\0')
3143 r->escape = '\\';
3144 else {
3145 r->escape = *p;
3146 if (*++p != '\0')
3147 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
3148 ln, p - buf->buf, "ec ... %s", p);
3150 return ROFF_IGN;
3153 static enum rofferr
3154 roff_eo(ROFF_ARGS)
3156 r->escape = '\0';
3157 if (buf->buf[pos] != '\0')
3158 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse,
3159 ln, pos, "eo %s", buf->buf + pos);
3160 return ROFF_IGN;
3163 static enum rofferr
3164 roff_tr(ROFF_ARGS)
3166 const char *p, *first, *second;
3167 size_t fsz, ssz;
3168 enum mandoc_esc esc;
3170 p = buf->buf + pos;
3172 if (*p == '\0') {
3173 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, ln, ppos, "tr");
3174 return ROFF_IGN;
3177 while (*p != '\0') {
3178 fsz = ssz = 1;
3180 first = p++;
3181 if (*first == '\\') {
3182 esc = mandoc_escape(&p, NULL, NULL);
3183 if (esc == ESCAPE_ERROR) {
3184 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
3185 ln, (int)(p - buf->buf), first);
3186 return ROFF_IGN;
3188 fsz = (size_t)(p - first);
3191 second = p++;
3192 if (*second == '\\') {
3193 esc = mandoc_escape(&p, NULL, NULL);
3194 if (esc == ESCAPE_ERROR) {
3195 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
3196 ln, (int)(p - buf->buf), second);
3197 return ROFF_IGN;
3199 ssz = (size_t)(p - second);
3200 } else if (*second == '\0') {
3201 mandoc_vmsg(MANDOCERR_TR_ODD, r->parse,
3202 ln, first - buf->buf, "tr %s", first);
3203 second = " ";
3204 p--;
3207 if (fsz > 1) {
3208 roff_setstrn(&r->xmbtab, first, fsz,
3209 second, ssz, 0);
3210 continue;
3213 if (r->xtab == NULL)
3214 r->xtab = mandoc_calloc(128,
3215 sizeof(struct roffstr));
3217 free(r->xtab[(int)*first].p);
3218 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3219 r->xtab[(int)*first].sz = ssz;
3222 return ROFF_IGN;
3225 static enum rofferr
3226 roff_rn(ROFF_ARGS)
3228 const char *value;
3229 char *oldn, *newn, *end;
3230 size_t oldsz, newsz;
3231 int deftype;
3233 oldn = newn = buf->buf + pos;
3234 if (*oldn == '\0')
3235 return ROFF_IGN;
3237 oldsz = roff_getname(r, &newn, ln, pos);
3238 if (oldn[oldsz] == '\\' || *newn == '\0')
3239 return ROFF_IGN;
3241 end = newn;
3242 newsz = roff_getname(r, &end, ln, newn - buf->buf);
3243 if (newsz == 0)
3244 return ROFF_IGN;
3246 deftype = ROFFDEF_ANY;
3247 value = roff_getstrn(r, oldn, oldsz, &deftype);
3248 switch (deftype) {
3249 case ROFFDEF_USER:
3250 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3251 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3252 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3253 break;
3254 case ROFFDEF_PRE:
3255 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3256 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3257 break;
3258 case ROFFDEF_REN:
3259 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3260 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3261 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3262 break;
3263 case ROFFDEF_STD:
3264 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3265 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3266 break;
3267 default:
3268 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3269 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3270 break;
3272 return ROFF_IGN;
3275 static enum rofferr
3276 roff_so(ROFF_ARGS)
3278 char *name, *cp;
3280 name = buf->buf + pos;
3281 mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name);
3284 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
3285 * opening anything that's not in our cwd or anything beneath
3286 * it. Thus, explicitly disallow traversing up the file-system
3287 * or using absolute paths.
3290 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3291 mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
3292 ".so %s", name);
3293 buf->sz = mandoc_asprintf(&cp,
3294 ".sp\nSee the file %s.\n.sp", name) + 1;
3295 free(buf->buf);
3296 buf->buf = cp;
3297 *offs = 0;
3298 return ROFF_REPARSE;
3301 *offs = pos;
3302 return ROFF_SO;
3305 /* --- user defined strings and macros ------------------------------------ */
3307 static enum rofferr
3308 roff_userdef(ROFF_ARGS)
3310 const char *arg[16], *ap;
3311 char *cp, *n1, *n2;
3312 int expand_count, i, ib, ie;
3313 size_t asz, rsz;
3316 * Collect pointers to macro argument strings
3317 * and NUL-terminate them.
3320 r->argc = 0;
3321 cp = buf->buf + pos;
3322 for (i = 0; i < 16; i++) {
3323 if (*cp == '\0')
3324 arg[i] = "";
3325 else {
3326 arg[i] = mandoc_getarg(r->parse, &cp, ln, &pos);
3327 r->argc = i + 1;
3332 * Expand macro arguments.
3335 buf->sz = strlen(r->current_string) + 1;
3336 n1 = n2 = cp = mandoc_malloc(buf->sz);
3337 memcpy(n1, r->current_string, buf->sz);
3338 expand_count = 0;
3339 while (*cp != '\0') {
3341 /* Scan ahead for the next argument invocation. */
3343 if (*cp++ != '\\')
3344 continue;
3345 if (*cp++ != '$')
3346 continue;
3347 if (*cp == '*') { /* \\$* inserts all arguments */
3348 ib = 0;
3349 ie = r->argc - 1;
3350 } else { /* \\$1 .. \\$9 insert one argument */
3351 ib = ie = *cp - '1';
3352 if (ib < 0 || ib > 8)
3353 continue;
3355 cp -= 2;
3358 * Prevent infinite recursion.
3361 if (cp >= n2)
3362 expand_count = 1;
3363 else if (++expand_count > EXPAND_LIMIT) {
3364 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
3365 ln, (int)(cp - n1), NULL);
3366 free(buf->buf);
3367 buf->buf = n1;
3368 *offs = 0;
3369 return ROFF_IGN;
3373 * Determine the size of the expanded argument,
3374 * taking escaping of quotes into account.
3377 asz = ie > ib ? ie - ib : 0; /* for blanks */
3378 for (i = ib; i <= ie; i++) {
3379 for (ap = arg[i]; *ap != '\0'; ap++) {
3380 asz++;
3381 if (*ap == '"')
3382 asz += 3;
3385 if (asz != 3) {
3388 * Determine the size of the rest of the
3389 * unexpanded macro, including the NUL.
3392 rsz = buf->sz - (cp - n1) - 3;
3395 * When shrinking, move before
3396 * releasing the storage.
3399 if (asz < 3)
3400 memmove(cp + asz, cp + 3, rsz);
3403 * Resize the storage for the macro
3404 * and readjust the parse pointer.
3407 buf->sz += asz - 3;
3408 n2 = mandoc_realloc(n1, buf->sz);
3409 cp = n2 + (cp - n1);
3410 n1 = n2;
3413 * When growing, make room
3414 * for the expanded argument.
3417 if (asz > 3)
3418 memmove(cp + asz, cp + 3, rsz);
3421 /* Copy the expanded argument, escaping quotes. */
3423 n2 = cp;
3424 for (i = ib; i <= ie; i++) {
3425 for (ap = arg[i]; *ap != '\0'; ap++) {
3426 if (*ap == '"') {
3427 memcpy(n2, "\\(dq", 4);
3428 n2 += 4;
3429 } else
3430 *n2++ = *ap;
3432 if (i < ie)
3433 *n2++ = ' ';
3438 * Replace the macro invocation
3439 * by the expanded macro.
3442 free(buf->buf);
3443 buf->buf = n1;
3444 *offs = 0;
3446 return buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ?
3447 ROFF_REPARSE : ROFF_APPEND;
3451 * Calling a high-level macro that was renamed with .rn.
3452 * r->current_string has already been set up by roff_parse().
3454 static enum rofferr
3455 roff_renamed(ROFF_ARGS)
3457 char *nbuf;
3459 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
3460 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
3461 free(buf->buf);
3462 buf->buf = nbuf;
3463 *offs = 0;
3464 return ROFF_CONT;
3467 static size_t
3468 roff_getname(struct roff *r, char **cpp, int ln, int pos)
3470 char *name, *cp;
3471 size_t namesz;
3473 name = *cpp;
3474 if ('\0' == *name)
3475 return 0;
3477 /* Read until end of name and terminate it with NUL. */
3478 for (cp = name; 1; cp++) {
3479 if ('\0' == *cp || ' ' == *cp) {
3480 namesz = cp - name;
3481 break;
3483 if ('\\' != *cp)
3484 continue;
3485 namesz = cp - name;
3486 if ('{' == cp[1] || '}' == cp[1])
3487 break;
3488 cp++;
3489 if ('\\' == *cp)
3490 continue;
3491 mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos,
3492 "%.*s", (int)(cp - name + 1), name);
3493 mandoc_escape((const char **)&cp, NULL, NULL);
3494 break;
3497 /* Read past spaces. */
3498 while (' ' == *cp)
3499 cp++;
3501 *cpp = cp;
3502 return namesz;
3506 * Store *string into the user-defined string called *name.
3507 * To clear an existing entry, call with (*r, *name, NULL, 0).
3508 * append == 0: replace mode
3509 * append == 1: single-line append mode
3510 * append == 2: multiline append mode, append '\n' after each call
3512 static void
3513 roff_setstr(struct roff *r, const char *name, const char *string,
3514 int append)
3516 size_t namesz;
3518 namesz = strlen(name);
3519 roff_setstrn(&r->strtab, name, namesz, string,
3520 string ? strlen(string) : 0, append);
3521 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3524 static void
3525 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
3526 const char *string, size_t stringsz, int append)
3528 struct roffkv *n;
3529 char *c;
3530 int i;
3531 size_t oldch, newch;
3533 /* Search for an existing string with the same name. */
3534 n = *r;
3536 while (n && (namesz != n->key.sz ||
3537 strncmp(n->key.p, name, namesz)))
3538 n = n->next;
3540 if (NULL == n) {
3541 /* Create a new string table entry. */
3542 n = mandoc_malloc(sizeof(struct roffkv));
3543 n->key.p = mandoc_strndup(name, namesz);
3544 n->key.sz = namesz;
3545 n->val.p = NULL;
3546 n->val.sz = 0;
3547 n->next = *r;
3548 *r = n;
3549 } else if (0 == append) {
3550 free(n->val.p);
3551 n->val.p = NULL;
3552 n->val.sz = 0;
3555 if (NULL == string)
3556 return;
3559 * One additional byte for the '\n' in multiline mode,
3560 * and one for the terminating '\0'.
3562 newch = stringsz + (1 < append ? 2u : 1u);
3564 if (NULL == n->val.p) {
3565 n->val.p = mandoc_malloc(newch);
3566 *n->val.p = '\0';
3567 oldch = 0;
3568 } else {
3569 oldch = n->val.sz;
3570 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
3573 /* Skip existing content in the destination buffer. */
3574 c = n->val.p + (int)oldch;
3576 /* Append new content to the destination buffer. */
3577 i = 0;
3578 while (i < (int)stringsz) {
3580 * Rudimentary roff copy mode:
3581 * Handle escaped backslashes.
3583 if ('\\' == string[i] && '\\' == string[i + 1])
3584 i++;
3585 *c++ = string[i++];
3588 /* Append terminating bytes. */
3589 if (1 < append)
3590 *c++ = '\n';
3592 *c = '\0';
3593 n->val.sz = (int)(c - n->val.p);
3596 static const char *
3597 roff_getstrn(struct roff *r, const char *name, size_t len,
3598 int *deftype)
3600 const struct roffkv *n;
3601 int found, i;
3602 enum roff_tok tok;
3604 found = 0;
3605 for (n = r->strtab; n != NULL; n = n->next) {
3606 if (strncmp(name, n->key.p, len) != 0 ||
3607 n->key.p[len] != '\0' || n->val.p == NULL)
3608 continue;
3609 if (*deftype & ROFFDEF_USER) {
3610 *deftype = ROFFDEF_USER;
3611 return n->val.p;
3612 } else {
3613 found = 1;
3614 break;
3617 for (n = r->rentab; n != NULL; n = n->next) {
3618 if (strncmp(name, n->key.p, len) != 0 ||
3619 n->key.p[len] != '\0' || n->val.p == NULL)
3620 continue;
3621 if (*deftype & ROFFDEF_REN) {
3622 *deftype = ROFFDEF_REN;
3623 return n->val.p;
3624 } else {
3625 found = 1;
3626 break;
3629 for (i = 0; i < PREDEFS_MAX; i++) {
3630 if (strncmp(name, predefs[i].name, len) != 0 ||
3631 predefs[i].name[len] != '\0')
3632 continue;
3633 if (*deftype & ROFFDEF_PRE) {
3634 *deftype = ROFFDEF_PRE;
3635 return predefs[i].str;
3636 } else {
3637 found = 1;
3638 break;
3641 if (r->man->macroset != MACROSET_MAN) {
3642 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
3643 if (strncmp(name, roff_name[tok], len) != 0 ||
3644 roff_name[tok][len] != '\0')
3645 continue;
3646 if (*deftype & ROFFDEF_STD) {
3647 *deftype = ROFFDEF_STD;
3648 return NULL;
3649 } else {
3650 found = 1;
3651 break;
3655 if (r->man->macroset != MACROSET_MDOC) {
3656 for (tok = MAN_TH; tok < MAN_MAX; tok++) {
3657 if (strncmp(name, roff_name[tok], len) != 0 ||
3658 roff_name[tok][len] != '\0')
3659 continue;
3660 if (*deftype & ROFFDEF_STD) {
3661 *deftype = ROFFDEF_STD;
3662 return NULL;
3663 } else {
3664 found = 1;
3665 break;
3670 if (found == 0 && *deftype != ROFFDEF_ANY) {
3671 if (*deftype & ROFFDEF_REN) {
3673 * This might still be a request,
3674 * so do not treat it as undefined yet.
3676 *deftype = ROFFDEF_UNDEF;
3677 return NULL;
3680 /* Using an undefined string defines it to be empty. */
3682 roff_setstrn(&r->strtab, name, len, "", 0, 0);
3683 roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
3686 *deftype = 0;
3687 return NULL;
3690 static void
3691 roff_freestr(struct roffkv *r)
3693 struct roffkv *n, *nn;
3695 for (n = r; n; n = nn) {
3696 free(n->key.p);
3697 free(n->val.p);
3698 nn = n->next;
3699 free(n);
3703 /* --- accessors and utility functions ------------------------------------ */
3706 * Duplicate an input string, making the appropriate character
3707 * conversations (as stipulated by `tr') along the way.
3708 * Returns a heap-allocated string with all the replacements made.
3710 char *
3711 roff_strdup(const struct roff *r, const char *p)
3713 const struct roffkv *cp;
3714 char *res;
3715 const char *pp;
3716 size_t ssz, sz;
3717 enum mandoc_esc esc;
3719 if (NULL == r->xmbtab && NULL == r->xtab)
3720 return mandoc_strdup(p);
3721 else if ('\0' == *p)
3722 return mandoc_strdup("");
3725 * Step through each character looking for term matches
3726 * (remember that a `tr' can be invoked with an escape, which is
3727 * a glyph but the escape is multi-character).
3728 * We only do this if the character hash has been initialised
3729 * and the string is >0 length.
3732 res = NULL;
3733 ssz = 0;
3735 while ('\0' != *p) {
3736 assert((unsigned int)*p < 128);
3737 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
3738 sz = r->xtab[(int)*p].sz;
3739 res = mandoc_realloc(res, ssz + sz + 1);
3740 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
3741 ssz += sz;
3742 p++;
3743 continue;
3744 } else if ('\\' != *p) {
3745 res = mandoc_realloc(res, ssz + 2);
3746 res[ssz++] = *p++;
3747 continue;
3750 /* Search for term matches. */
3751 for (cp = r->xmbtab; cp; cp = cp->next)
3752 if (0 == strncmp(p, cp->key.p, cp->key.sz))
3753 break;
3755 if (NULL != cp) {
3757 * A match has been found.
3758 * Append the match to the array and move
3759 * forward by its keysize.
3761 res = mandoc_realloc(res,
3762 ssz + cp->val.sz + 1);
3763 memcpy(res + ssz, cp->val.p, cp->val.sz);
3764 ssz += cp->val.sz;
3765 p += (int)cp->key.sz;
3766 continue;
3770 * Handle escapes carefully: we need to copy
3771 * over just the escape itself, or else we might
3772 * do replacements within the escape itself.
3773 * Make sure to pass along the bogus string.
3775 pp = p++;
3776 esc = mandoc_escape(&p, NULL, NULL);
3777 if (ESCAPE_ERROR == esc) {
3778 sz = strlen(pp);
3779 res = mandoc_realloc(res, ssz + sz + 1);
3780 memcpy(res + ssz, pp, sz);
3781 break;
3784 * We bail out on bad escapes.
3785 * No need to warn: we already did so when
3786 * roff_res() was called.
3788 sz = (int)(p - pp);
3789 res = mandoc_realloc(res, ssz + sz + 1);
3790 memcpy(res + ssz, pp, sz);
3791 ssz += sz;
3794 res[(int)ssz] = '\0';
3795 return res;
3799 roff_getformat(const struct roff *r)
3802 return r->format;
3806 * Find out whether a line is a macro line or not.
3807 * If it is, adjust the current position and return one; if it isn't,
3808 * return zero and don't change the current position.
3809 * If the control character has been set with `.cc', then let that grain
3810 * precedence.
3811 * This is slighly contrary to groff, where using the non-breaking
3812 * control character when `cc' has been invoked will cause the
3813 * non-breaking macro contents to be printed verbatim.
3816 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
3818 int pos;
3820 pos = *ppos;
3822 if (r->control != '\0' && cp[pos] == r->control)
3823 pos++;
3824 else if (r->control != '\0')
3825 return 0;
3826 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
3827 pos += 2;
3828 else if ('.' == cp[pos] || '\'' == cp[pos])
3829 pos++;
3830 else
3831 return 0;
3833 while (' ' == cp[pos] || '\t' == cp[pos])
3834 pos++;
3836 *ppos = pos;
3837 return 1;