Restrict -Werror to gcc41.
[dragonfly.git] / usr.bin / mandoc / mdoc_argv.c
blobfd302086be7cf05d0f5df87df2bd4f02b2627161
1 /* $Id: mdoc_argv.c,v 1.18 2009/10/27 21:40:07 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 #include <sys/types.h>
19 #include <assert.h>
20 #include <ctype.h>
21 #include <stdlib.h>
22 #include <stdio.h>
23 #include <string.h>
25 #include "libmdoc.h"
28 * Routines to parse arguments of macros. Arguments follow the syntax
29 * of `-arg [val [valN...]]'. Arguments come in all types: quoted
30 * arguments, multiple arguments per value, no-value arguments, etc.
32 * There's no limit to the number or arguments that may be allocated.
35 #define ARGV_NONE (1 << 0)
36 #define ARGV_SINGLE (1 << 1)
37 #define ARGV_MULTI (1 << 2)
38 #define ARGV_OPT_SINGLE (1 << 3)
40 #define MULTI_STEP 5
42 static int argv_a2arg(int, const char *);
43 static int args(struct mdoc *, int, int *,
44 char *, int, char **);
45 static int argv(struct mdoc *, int,
46 struct mdoc_argv *, int *, char *);
47 static int argv_single(struct mdoc *, int,
48 struct mdoc_argv *, int *, char *);
49 static int argv_opt_single(struct mdoc *, int,
50 struct mdoc_argv *, int *, char *);
51 static int argv_multi(struct mdoc *, int,
52 struct mdoc_argv *, int *, char *);
54 /* Per-argument flags. */
56 static int mdoc_argvflags[MDOC_ARG_MAX] = {
57 ARGV_NONE, /* MDOC_Split */
58 ARGV_NONE, /* MDOC_Nosplit */
59 ARGV_NONE, /* MDOC_Ragged */
60 ARGV_NONE, /* MDOC_Unfilled */
61 ARGV_NONE, /* MDOC_Literal */
62 ARGV_SINGLE, /* MDOC_File */
63 ARGV_OPT_SINGLE, /* MDOC_Offset */
64 ARGV_NONE, /* MDOC_Bullet */
65 ARGV_NONE, /* MDOC_Dash */
66 ARGV_NONE, /* MDOC_Hyphen */
67 ARGV_NONE, /* MDOC_Item */
68 ARGV_NONE, /* MDOC_Enum */
69 ARGV_NONE, /* MDOC_Tag */
70 ARGV_NONE, /* MDOC_Diag */
71 ARGV_NONE, /* MDOC_Hang */
72 ARGV_NONE, /* MDOC_Ohang */
73 ARGV_NONE, /* MDOC_Inset */
74 ARGV_MULTI, /* MDOC_Column */
75 ARGV_SINGLE, /* MDOC_Width */
76 ARGV_NONE, /* MDOC_Compact */
77 ARGV_NONE, /* MDOC_Std */
78 ARGV_NONE, /* MDOC_Filled */
79 ARGV_NONE, /* MDOC_Words */
80 ARGV_NONE, /* MDOC_Emphasis */
81 ARGV_NONE, /* MDOC_Symbolic */
82 ARGV_NONE /* MDOC_Symbolic */
85 static int mdoc_argflags[MDOC_MAX] = {
86 0, /* Ap */
87 0, /* Dd */
88 0, /* Dt */
89 0, /* Os */
90 0, /* Sh */
91 0, /* Ss */
92 ARGS_DELIM, /* Pp */
93 ARGS_DELIM, /* D1 */
94 ARGS_DELIM, /* Dl */
95 0, /* Bd */
96 0, /* Ed */
97 0, /* Bl */
98 0, /* El */
99 0, /* It */
100 ARGS_DELIM, /* Ad */
101 ARGS_DELIM, /* An */
102 ARGS_DELIM, /* Ar */
103 0, /* Cd */
104 ARGS_DELIM, /* Cm */
105 ARGS_DELIM, /* Dv */
106 ARGS_DELIM, /* Er */
107 ARGS_DELIM, /* Ev */
108 0, /* Ex */
109 ARGS_DELIM, /* Fa */
110 0, /* Fd */
111 ARGS_DELIM, /* Fl */
112 ARGS_DELIM, /* Fn */
113 ARGS_DELIM, /* Ft */
114 ARGS_DELIM, /* Ic */
115 0, /* In */
116 ARGS_DELIM, /* Li */
117 0, /* Nd */
118 ARGS_DELIM, /* Nm */
119 ARGS_DELIM, /* Op */
120 0, /* Ot */
121 ARGS_DELIM, /* Pa */
122 0, /* Rv */
123 ARGS_DELIM, /* St */
124 ARGS_DELIM, /* Va */
125 ARGS_DELIM, /* Vt */
126 ARGS_DELIM, /* Xr */
127 0, /* %A */
128 0, /* %B */
129 0, /* %D */
130 0, /* %I */
131 0, /* %J */
132 0, /* %N */
133 0, /* %O */
134 0, /* %P */
135 0, /* %R */
136 0, /* %T */
137 0, /* %V */
138 ARGS_DELIM, /* Ac */
139 0, /* Ao */
140 ARGS_DELIM, /* Aq */
141 ARGS_DELIM, /* At */
142 ARGS_DELIM, /* Bc */
143 0, /* Bf */
144 0, /* Bo */
145 ARGS_DELIM, /* Bq */
146 ARGS_DELIM, /* Bsx */
147 ARGS_DELIM, /* Bx */
148 0, /* Db */
149 ARGS_DELIM, /* Dc */
150 0, /* Do */
151 ARGS_DELIM, /* Dq */
152 ARGS_DELIM, /* Ec */
153 0, /* Ef */
154 ARGS_DELIM, /* Em */
155 0, /* Eo */
156 ARGS_DELIM, /* Fx */
157 ARGS_DELIM, /* Ms */
158 ARGS_DELIM, /* No */
159 ARGS_DELIM, /* Ns */
160 ARGS_DELIM, /* Nx */
161 ARGS_DELIM, /* Ox */
162 ARGS_DELIM, /* Pc */
163 ARGS_DELIM, /* Pf */
164 0, /* Po */
165 ARGS_DELIM, /* Pq */
166 ARGS_DELIM, /* Qc */
167 ARGS_DELIM, /* Ql */
168 0, /* Qo */
169 ARGS_DELIM, /* Qq */
170 0, /* Re */
171 0, /* Rs */
172 ARGS_DELIM, /* Sc */
173 0, /* So */
174 ARGS_DELIM, /* Sq */
175 0, /* Sm */
176 ARGS_DELIM, /* Sx */
177 ARGS_DELIM, /* Sy */
178 ARGS_DELIM, /* Tn */
179 ARGS_DELIM, /* Ux */
180 ARGS_DELIM, /* Xc */
181 0, /* Xo */
182 0, /* Fo */
183 0, /* Fc */
184 0, /* Oo */
185 ARGS_DELIM, /* Oc */
186 0, /* Bk */
187 0, /* Ek */
188 0, /* Bt */
189 0, /* Hf */
190 0, /* Fr */
191 0, /* Ud */
192 0, /* Lb */
193 ARGS_DELIM, /* Lp */
194 ARGS_DELIM, /* Lk */
195 ARGS_DELIM, /* Mt */
196 ARGS_DELIM, /* Brq */
197 0, /* Bro */
198 ARGS_DELIM, /* Brc */
199 0, /* %C */
200 0, /* Es */
201 0, /* En */
202 0, /* Dx */
203 0, /* %Q */
204 0, /* br */
205 0, /* sp */
206 0, /* %U */
211 * Parse an argument from line text. This comes in the form of -key
212 * [value0...], which may either have a single mandatory value, at least
213 * one mandatory value, an optional single value, or no value.
216 mdoc_argv(struct mdoc *m, int line, int tok,
217 struct mdoc_arg **v, int *pos, char *buf)
219 char *p, sv;
220 struct mdoc_argv tmp;
221 struct mdoc_arg *arg;
223 if (0 == buf[*pos])
224 return(ARGV_EOLN);
226 assert(' ' != buf[*pos]);
228 /* Parse through to the first unescaped space. */
230 p = &buf[++(*pos)];
232 assert(*pos > 0);
234 /* LINTED */
235 while (buf[*pos]) {
236 if (' ' == buf[*pos])
237 if ('\\' != buf[*pos - 1])
238 break;
239 (*pos)++;
242 /* XXX - save zeroed byte, if not an argument. */
244 sv = 0;
245 if (buf[*pos]) {
246 sv = buf[*pos];
247 buf[(*pos)++] = 0;
250 (void)memset(&tmp, 0, sizeof(struct mdoc_argv));
251 tmp.line = line;
252 tmp.pos = *pos;
254 /* See if our token accepts the argument. */
256 if (MDOC_ARG_MAX == (tmp.arg = argv_a2arg(tok, p))) {
257 /* XXX - restore saved zeroed byte. */
258 if (sv)
259 buf[*pos - 1] = sv;
260 return(ARGV_WORD);
263 while (buf[*pos] && ' ' == buf[*pos])
264 (*pos)++;
266 if ( ! argv(m, line, &tmp, pos, buf))
267 return(ARGV_ERROR);
269 if (NULL == (arg = *v)) {
270 *v = calloc(1, sizeof(struct mdoc_arg));
271 if (NULL == *v) {
272 (void)mdoc_nerr(m, m->last, EMALLOC);
273 return(ARGV_ERROR);
275 arg = *v;
278 arg->argc++;
279 arg->argv = realloc(arg->argv, arg->argc *
280 sizeof(struct mdoc_argv));
282 if (NULL == arg->argv) {
283 (void)mdoc_nerr(m, m->last, EMALLOC);
284 return(ARGV_ERROR);
287 (void)memcpy(&arg->argv[(int)arg->argc - 1],
288 &tmp, sizeof(struct mdoc_argv));
290 return(ARGV_ARG);
294 void
295 mdoc_argv_free(struct mdoc_arg *p)
297 int i, j;
299 if (NULL == p)
300 return;
302 if (p->refcnt) {
303 --(p->refcnt);
304 if (p->refcnt)
305 return;
307 assert(p->argc);
309 /* LINTED */
310 for (i = 0; i < (int)p->argc; i++) {
311 if (0 == p->argv[i].sz)
312 continue;
313 if (NULL == p->argv[i].value)
314 continue;
316 /* LINTED */
317 for (j = 0; j < (int)p->argv[i].sz; j++)
318 if (p->argv[i].value[j])
319 free(p->argv[i].value[j]);
321 free(p->argv[i].value);
324 free(p->argv);
325 free(p);
330 mdoc_zargs(struct mdoc *m, int line, int *pos,
331 char *buf, int flags, char **v)
334 return(args(m, line, pos, buf, flags, v));
339 mdoc_args(struct mdoc *m, int line,
340 int *pos, char *buf, int tok, char **v)
342 int fl, c, i;
343 struct mdoc_node *n;
345 fl = (0 == tok) ? 0 : mdoc_argflags[tok];
347 if (MDOC_It != tok)
348 return(args(m, line, pos, buf, fl, v));
351 * The `It' macro is a special case, as it acquires parameters from its
352 * parent `Bl' context, specifically, we're concerned with -column.
355 for (n = m->last; n; n = n->parent)
356 if (MDOC_BLOCK == n->type && MDOC_Bl == n->tok)
357 break;
359 assert(n);
360 c = (int)(n->args ? n->args->argc : 0);
361 assert(c > 0);
363 /* LINTED */
364 for (i = 0; i < c; i++) {
365 if (MDOC_Column != n->args->argv[i].arg)
366 continue;
367 fl |= ARGS_TABSEP;
368 fl &= ~ARGS_DELIM;
369 break;
372 return(args(m, line, pos, buf, fl, v));
376 static int
377 args(struct mdoc *m, int line, int *pos,
378 char *buf, int fl, char **v)
380 int i;
381 char *p, *pp;
384 * Parse out the terms (like `val' in `.Xx -arg val' or simply
385 * `.Xx val'), which can have all sorts of properties:
387 * ARGS_DELIM: use special handling if encountering trailing
388 * delimiters in the form of [[::delim::][ ]+]+.
390 * ARGS_NOWARN: don't post warnings. This is only used when
391 * re-parsing delimiters, as the warnings have already been
392 * posted.
394 * ARGS_TABSEP: use special handling for tab/`Ta' separated
395 * phrases like in `Bl -column'.
398 assert(*pos);
399 assert(' ' != buf[*pos]);
401 if (0 == buf[*pos])
402 return(ARGS_EOLN);
405 * If the first character is a delimiter and we're to look for
406 * delimited strings, then pass down the buffer seeing if it
407 * follows the pattern of [[::delim::][ ]+]+.
410 if ((fl & ARGS_DELIM) && mdoc_iscdelim(buf[*pos])) {
411 for (i = *pos; buf[i]; ) {
412 if ( ! mdoc_iscdelim(buf[i]))
413 break;
414 i++;
415 if (0 == buf[i] || ' ' != buf[i])
416 break;
417 i++;
418 while (buf[i] && ' ' == buf[i])
419 i++;
422 if (0 == buf[i]) {
423 *v = &buf[*pos];
424 if (' ' != buf[i - 1])
425 return(ARGS_PUNCT);
426 if (ARGS_NOWARN & fl)
427 return(ARGS_PUNCT);
428 if ( ! mdoc_pwarn(m, line, *pos, ETAILWS))
429 return(ARGS_ERROR);
430 return(ARGS_PUNCT);
434 *v = &buf[*pos];
437 * First handle TABSEP items, restricted to `Bl -column'. This
438 * ignores conventional token parsing and instead uses tabs or
439 * `Ta' macros to separate phrases. Phrases are parsed again
440 * for arguments at a later phase.
443 if (ARGS_TABSEP & fl) {
444 /* Scan ahead to tab (can't be escaped). */
445 p = strchr(*v, '\t');
447 /* Scan ahead to unescaped `Ta'. */
448 for (pp = *v; ; pp++) {
449 if (NULL == (pp = strstr(pp, "Ta")))
450 break;
451 if (pp > *v && ' ' != *(pp - 1))
452 continue;
453 if (' ' == *(pp + 2) || 0 == *(pp + 2))
454 break;
458 * Adjust new-buffer position to be beyond delimiter
459 * mark (e.g., Ta -> end + 2).
461 if (p && pp) {
462 *pos += pp < p ? 2 : 1;
463 p = pp < p ? pp : p;
464 } else if (p && ! pp) {
465 *pos += 1;
466 } else if (pp && ! p) {
467 p = pp;
468 *pos += 2;
469 } else
470 p = strchr(*v, 0);
472 /* Whitespace check for eoln case... */
473 if (0 == *p && ' ' == *(p - 1) && ! (ARGS_NOWARN & fl))
474 if ( ! mdoc_pwarn(m, line, *pos, ETAILWS))
475 return(ARGS_ERROR);
477 *pos += (int)(p - *v);
479 /* Strip delimiter's preceding whitespace. */
480 pp = p - 1;
481 while (pp > *v && ' ' == *pp) {
482 if (pp > *v && '\\' == *(pp - 1))
483 break;
484 pp--;
486 *(pp + 1) = 0;
488 /* Strip delimiter's proceeding whitespace. */
489 for (pp = &buf[*pos]; ' ' == *pp; pp++, (*pos)++)
490 /* Skip ahead. */ ;
492 return(ARGS_PHRASE);
496 * Process a quoted literal. A quote begins with a double-quote
497 * and ends with a double-quote NOT preceded by a double-quote.
498 * Whitespace is NOT involved in literal termination.
501 if ('\"' == buf[*pos]) {
502 *v = &buf[++(*pos)];
504 for ( ; buf[*pos]; (*pos)++) {
505 if ('\"' != buf[*pos])
506 continue;
507 if ('\"' != buf[*pos + 1])
508 break;
509 (*pos)++;
512 if (0 == buf[*pos]) {
513 if (ARGS_NOWARN & fl)
514 return(ARGS_QWORD);
515 if ( ! mdoc_pwarn(m, line, *pos, EQUOTTERM))
516 return(ARGS_ERROR);
517 return(ARGS_QWORD);
520 buf[(*pos)++] = 0;
522 if (0 == buf[*pos])
523 return(ARGS_QWORD);
525 while (' ' == buf[*pos])
526 (*pos)++;
528 if (0 == buf[*pos] && ! (ARGS_NOWARN & fl))
529 if ( ! mdoc_pwarn(m, line, *pos, ETAILWS))
530 return(ARGS_ERROR);
532 return(ARGS_QWORD);
536 * A non-quoted term progresses until either the end of line or
537 * a non-escaped whitespace.
540 for ( ; buf[*pos]; (*pos)++)
541 if (' ' == buf[*pos] && '\\' != buf[*pos - 1])
542 break;
544 if (0 == buf[*pos])
545 return(ARGS_WORD);
547 buf[(*pos)++] = 0;
549 while (' ' == buf[*pos])
550 (*pos)++;
552 if (0 == buf[*pos] && ! (ARGS_NOWARN & fl))
553 if ( ! mdoc_pwarn(m, line, *pos, ETAILWS))
554 return(ARGS_ERROR);
556 return(ARGS_WORD);
560 static int
561 argv_a2arg(int tok, const char *p)
565 * Parse an argument identifier from its text. XXX - this
566 * should really be table-driven to clarify the code.
568 * If you add an argument to the list, make sure that you
569 * register it here with its one or more macros!
572 switch (tok) {
573 case (MDOC_An):
574 if (0 == strcmp(p, "split"))
575 return(MDOC_Split);
576 else if (0 == strcmp(p, "nosplit"))
577 return(MDOC_Nosplit);
578 break;
580 case (MDOC_Bd):
581 if (0 == strcmp(p, "ragged"))
582 return(MDOC_Ragged);
583 else if (0 == strcmp(p, "unfilled"))
584 return(MDOC_Unfilled);
585 else if (0 == strcmp(p, "filled"))
586 return(MDOC_Filled);
587 else if (0 == strcmp(p, "literal"))
588 return(MDOC_Literal);
589 else if (0 == strcmp(p, "file"))
590 return(MDOC_File);
591 else if (0 == strcmp(p, "offset"))
592 return(MDOC_Offset);
593 else if (0 == strcmp(p, "compact"))
594 return(MDOC_Compact);
595 else if (0 == strcmp(p, "centered"))
596 return(MDOC_Centred);
597 break;
599 case (MDOC_Bf):
600 if (0 == strcmp(p, "emphasis"))
601 return(MDOC_Emphasis);
602 else if (0 == strcmp(p, "literal"))
603 return(MDOC_Literal);
604 else if (0 == strcmp(p, "symbolic"))
605 return(MDOC_Symbolic);
606 break;
608 case (MDOC_Bk):
609 if (0 == strcmp(p, "words"))
610 return(MDOC_Words);
611 break;
613 case (MDOC_Bl):
614 if (0 == strcmp(p, "bullet"))
615 return(MDOC_Bullet);
616 else if (0 == strcmp(p, "dash"))
617 return(MDOC_Dash);
618 else if (0 == strcmp(p, "hyphen"))
619 return(MDOC_Hyphen);
620 else if (0 == strcmp(p, "item"))
621 return(MDOC_Item);
622 else if (0 == strcmp(p, "enum"))
623 return(MDOC_Enum);
624 else if (0 == strcmp(p, "tag"))
625 return(MDOC_Tag);
626 else if (0 == strcmp(p, "diag"))
627 return(MDOC_Diag);
628 else if (0 == strcmp(p, "hang"))
629 return(MDOC_Hang);
630 else if (0 == strcmp(p, "ohang"))
631 return(MDOC_Ohang);
632 else if (0 == strcmp(p, "inset"))
633 return(MDOC_Inset);
634 else if (0 == strcmp(p, "column"))
635 return(MDOC_Column);
636 else if (0 == strcmp(p, "width"))
637 return(MDOC_Width);
638 else if (0 == strcmp(p, "offset"))
639 return(MDOC_Offset);
640 else if (0 == strcmp(p, "compact"))
641 return(MDOC_Compact);
642 else if (0 == strcmp(p, "nested"))
643 return(MDOC_Nested);
644 break;
646 case (MDOC_Rv):
647 /* FALLTHROUGH */
648 case (MDOC_Ex):
649 if (0 == strcmp(p, "std"))
650 return(MDOC_Std);
651 break;
652 default:
653 break;
656 return(MDOC_ARG_MAX);
660 static int
661 argv_multi(struct mdoc *m, int line,
662 struct mdoc_argv *v, int *pos, char *buf)
664 int c;
665 char *p;
667 for (v->sz = 0; ; v->sz++) {
668 if ('-' == buf[*pos])
669 break;
670 c = args(m, line, pos, buf, 0, &p);
671 if (ARGS_ERROR == c)
672 return(0);
673 else if (ARGS_EOLN == c)
674 break;
676 if (0 == v->sz % MULTI_STEP) {
677 v->value = realloc(v->value,
678 (v->sz + MULTI_STEP) * sizeof(char *));
679 if (NULL == v->value) {
680 (void)mdoc_nerr(m, m->last, EMALLOC);
681 return(ARGV_ERROR);
684 if (NULL == (v->value[(int)v->sz] = strdup(p)))
685 return(mdoc_nerr(m, m->last, EMALLOC));
688 return(1);
692 static int
693 argv_opt_single(struct mdoc *m, int line,
694 struct mdoc_argv *v, int *pos, char *buf)
696 int c;
697 char *p;
699 if ('-' == buf[*pos])
700 return(1);
702 c = args(m, line, pos, buf, 0, &p);
703 if (ARGS_ERROR == c)
704 return(0);
705 if (ARGS_EOLN == c)
706 return(1);
708 v->sz = 1;
709 if (NULL == (v->value = calloc(1, sizeof(char *))))
710 return(mdoc_nerr(m, m->last, EMALLOC));
711 if (NULL == (v->value[0] = strdup(p)))
712 return(mdoc_nerr(m, m->last, EMALLOC));
714 return(1);
719 * Parse a single, mandatory value from the stream.
721 static int
722 argv_single(struct mdoc *m, int line,
723 struct mdoc_argv *v, int *pos, char *buf)
725 int c, ppos;
726 char *p;
728 ppos = *pos;
730 c = args(m, line, pos, buf, 0, &p);
731 if (ARGS_ERROR == c)
732 return(0);
733 if (ARGS_EOLN == c)
734 return(mdoc_perr(m, line, ppos, EARGVAL));
736 v->sz = 1;
737 if (NULL == (v->value = calloc(1, sizeof(char *))))
738 return(mdoc_nerr(m, m->last, EMALLOC));
739 if (NULL == (v->value[0] = strdup(p)))
740 return(mdoc_nerr(m, m->last, EMALLOC));
742 return(1);
747 * Determine rules for parsing arguments. Arguments can either accept
748 * no parameters, an optional single parameter, one parameter, or
749 * multiple parameters.
751 static int
752 argv(struct mdoc *mdoc, int line,
753 struct mdoc_argv *v, int *pos, char *buf)
756 v->sz = 0;
757 v->value = NULL;
759 switch (mdoc_argvflags[v->arg]) {
760 case (ARGV_SINGLE):
761 return(argv_single(mdoc, line, v, pos, buf));
762 case (ARGV_MULTI):
763 return(argv_multi(mdoc, line, v, pos, buf));
764 case (ARGV_OPT_SINGLE):
765 return(argv_opt_single(mdoc, line, v, pos, buf));
766 default:
767 /* ARGV_NONE */
768 break;
771 return(1);