Make dnl diagnostic print macro name.
[m4/ericb.git] / src / builtin.c
blob88649dac625fa8f95e5f042ec7940e1f289fb293
1 /* GNU m4 -- A simple macro processor
3 Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2000, 2004, 2006, 2007
4 Free Software Foundation, Inc.
6 This file is part of GNU M4.
8 GNU M4 is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 GNU M4 is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 /* Code for all builtin macros, initialization of symbol table, and
23 expansion of user defined macros. */
25 #include "m4.h"
27 extern FILE *popen ();
29 #include "regex.h"
31 #if HAVE_SYS_WAIT_H
32 # include <sys/wait.h>
33 #endif
35 #define ARG(i) (argc > (i) ? TOKEN_DATA_TEXT (argv[i]) : "")
37 /* Initialization of builtin and predefined macros. The table
38 "builtin_tab" is both used for initialization, and by the "builtin"
39 builtin. */
41 #define DECLARE(name) \
42 static void name (struct obstack *, int, token_data **)
44 DECLARE (m4___file__);
45 DECLARE (m4___line__);
46 DECLARE (m4___program__);
47 DECLARE (m4_builtin);
48 DECLARE (m4_changecom);
49 DECLARE (m4_changequote);
50 #ifdef ENABLE_CHANGEWORD
51 DECLARE (m4_changeword);
52 #endif
53 DECLARE (m4_debugmode);
54 DECLARE (m4_debugfile);
55 DECLARE (m4_decr);
56 DECLARE (m4_define);
57 DECLARE (m4_defn);
58 DECLARE (m4_divert);
59 DECLARE (m4_divnum);
60 DECLARE (m4_dnl);
61 DECLARE (m4_dumpdef);
62 DECLARE (m4_errprint);
63 DECLARE (m4_esyscmd);
64 DECLARE (m4_eval);
65 DECLARE (m4_format);
66 DECLARE (m4_ifdef);
67 DECLARE (m4_ifelse);
68 DECLARE (m4_include);
69 DECLARE (m4_incr);
70 DECLARE (m4_index);
71 DECLARE (m4_indir);
72 DECLARE (m4_len);
73 DECLARE (m4_m4exit);
74 DECLARE (m4_m4wrap);
75 DECLARE (m4_maketemp);
76 DECLARE (m4_mkstemp);
77 DECLARE (m4_patsubst);
78 DECLARE (m4_popdef);
79 DECLARE (m4_pushdef);
80 DECLARE (m4_regexp);
81 DECLARE (m4_shift);
82 DECLARE (m4_sinclude);
83 DECLARE (m4_substr);
84 DECLARE (m4_syscmd);
85 DECLARE (m4_sysval);
86 DECLARE (m4_traceoff);
87 DECLARE (m4_traceon);
88 DECLARE (m4_translit);
89 DECLARE (m4_undefine);
90 DECLARE (m4_undivert);
92 #undef DECLARE
94 static builtin
95 builtin_tab[] =
98 /* name GNUext macros blind function */
100 { "__file__", true, false, false, m4___file__ },
101 { "__line__", true, false, false, m4___line__ },
102 { "__program__", true, false, false, m4___program__ },
103 { "builtin", true, true, true, m4_builtin },
104 { "changecom", false, false, false, m4_changecom },
105 { "changequote", false, false, false, m4_changequote },
106 #ifdef ENABLE_CHANGEWORD
107 { "changeword", true, false, true, m4_changeword },
108 #endif
109 { "debugmode", true, false, false, m4_debugmode },
110 { "debugfile", true, false, false, m4_debugfile },
111 { "decr", false, false, true, m4_decr },
112 { "define", false, true, true, m4_define },
113 { "defn", false, false, true, m4_defn },
114 { "divert", false, false, false, m4_divert },
115 { "divnum", false, false, false, m4_divnum },
116 { "dnl", false, false, false, m4_dnl },
117 { "dumpdef", false, false, false, m4_dumpdef },
118 { "errprint", false, false, true, m4_errprint },
119 { "esyscmd", true, false, true, m4_esyscmd },
120 { "eval", false, false, true, m4_eval },
121 { "format", true, false, true, m4_format },
122 { "ifdef", false, false, true, m4_ifdef },
123 { "ifelse", false, false, true, m4_ifelse },
124 { "include", false, false, true, m4_include },
125 { "incr", false, false, true, m4_incr },
126 { "index", false, false, true, m4_index },
127 { "indir", true, true, true, m4_indir },
128 { "len", false, false, true, m4_len },
129 { "m4exit", false, false, false, m4_m4exit },
130 { "m4wrap", false, false, true, m4_m4wrap },
131 { "maketemp", false, false, true, m4_maketemp },
132 { "mkstemp", false, false, true, m4_mkstemp },
133 { "patsubst", true, false, true, m4_patsubst },
134 { "popdef", false, false, true, m4_popdef },
135 { "pushdef", false, true, true, m4_pushdef },
136 { "regexp", true, false, true, m4_regexp },
137 { "shift", false, false, true, m4_shift },
138 { "sinclude", false, false, true, m4_sinclude },
139 { "substr", false, false, true, m4_substr },
140 { "syscmd", false, false, true, m4_syscmd },
141 { "sysval", false, false, false, m4_sysval },
142 { "traceoff", false, false, false, m4_traceoff },
143 { "traceon", false, false, false, m4_traceon },
144 { "translit", false, false, true, m4_translit },
145 { "undefine", false, false, true, m4_undefine },
146 { "undivert", false, false, false, m4_undivert },
148 { 0, false, false, false, 0 },
150 /* placeholder is intentionally stuck after the table end delimiter,
151 so that we can easily find it, while not treating it as a real
152 builtin. */
153 { "placeholder", true, false, false, m4_placeholder },
156 static predefined const
157 predefined_tab[] =
159 #if UNIX
160 { "unix", "__unix__", "" },
161 #elif W32_NATIVE
162 { "windows", "__windows__", "" },
163 #elif OS2
164 { "os2", "__os2__", "" },
165 #else
166 # warning Platform macro not provided
167 #endif
168 { NULL, "__gnu__", "" },
170 { NULL, NULL, NULL },
173 /*----------------------------------------.
174 | Find the builtin, which lives on ADDR. |
175 `----------------------------------------*/
177 const builtin *
178 find_builtin_by_addr (builtin_func *func)
180 const builtin *bp;
182 for (bp = &builtin_tab[0]; bp->name != NULL; bp++)
183 if (bp->func == func)
184 return bp;
185 if (func == m4_placeholder)
186 return bp + 1;
187 return NULL;
190 /*----------------------------------------------------------.
191 | Find the builtin, which has NAME. On failure, return the |
192 | placeholder builtin. |
193 `----------------------------------------------------------*/
195 const builtin *
196 find_builtin_by_name (const char *name)
198 const builtin *bp;
200 for (bp = &builtin_tab[0]; bp->name != NULL; bp++)
201 if (strcmp (bp->name, name) == 0)
202 return bp;
203 return bp + 1;
206 /*-------------------------------------------------------------------------.
207 | Install a builtin macro with name NAME, bound to the C function given in |
208 | BP. MODE is SYMBOL_INSERT or SYMBOL_PUSHDEF. TRACED defines whether |
209 | NAME is to be traced. |
210 `-------------------------------------------------------------------------*/
212 void
213 define_builtin (const char *name, const builtin *bp, symbol_lookup mode)
215 symbol *sym;
217 sym = lookup_symbol (name, mode);
218 SYMBOL_TYPE (sym) = TOKEN_FUNC;
219 SYMBOL_MACRO_ARGS (sym) = bp->groks_macro_args;
220 SYMBOL_BLIND_NO_ARGS (sym) = bp->blind_if_no_args;
221 SYMBOL_FUNC (sym) = bp->func;
224 /* Storage for the compiled regular expression of
225 --warn-macro-sequence. */
226 static struct re_pattern_buffer macro_sequence_buf;
228 /* Storage for the matches of --warn-macro-sequence. */
229 static struct re_registers macro_sequence_regs;
231 /* True if --warn-macro-sequence is in effect. */
232 static bool macro_sequence_inuse;
234 /* Maybe this is worth making runtime tunable. Too small, and nothing
235 gets cached because the working set of active regex is larger than
236 the cache, and we are always swapping out entries. Too large, and
237 the time spent searching the cache for a match overtakes the time
238 saved by caching. For now, this size proved reasonable for the
239 typical working set of Autoconf 2.62. */
240 #define REGEX_CACHE_SIZE 16
242 /* Structure for caching compiled regex. */
243 struct m4_regex {
244 unsigned count; /* usage counter */
245 size_t len; /* length of string */
246 char *str; /* copy of compiled string */
247 struct re_pattern_buffer *buf; /* compiled regex, allocated */
248 struct re_registers regs; /* match registers, reused */
250 typedef struct m4_regex m4_regex;
252 /* Storage for the cache of regular expressions. */
253 static m4_regex regex_cache[REGEX_CACHE_SIZE];
255 #ifdef DEBUG_REGEX
256 extern FILE *trace_file;
257 #endif /* DEBUG_REGEX */
259 /*------------------------------------------------------------------.
260 | Compile STR, with length LEN, into a regex. On success, set BUF |
261 | and REGS to the compiled regex. Compilation is cached, so do not |
262 | free the results here; rather, use free_regex at the end of the |
263 | program. Return NULL on success, or an error message. |
264 `------------------------------------------------------------------*/
265 static const char *
266 compile_pattern (const char *str, size_t len, struct re_pattern_buffer **buf,
267 struct re_registers **regs)
269 int i;
270 m4_regex *victim;
271 unsigned victim_count;
272 struct re_pattern_buffer *new_buf;
273 struct re_registers *new_regs;
274 const char *msg;
276 /* First, check if STR is already cached. If so, increase its use
277 count and return it. */
278 for (i = 0; i < REGEX_CACHE_SIZE; i++)
279 if (len == regex_cache[i].len && regex_cache[i].str
280 && memcmp (str, regex_cache[i].str, len) == 0)
282 *buf = regex_cache[i].buf;
283 *regs = &regex_cache[i].regs;
284 regex_cache[i].count++;
285 #ifdef DEBUG_REGEX
286 if (trace_file)
287 xfprintf (trace_file, "cached:{%s}\n", str);
288 #endif /* DEBUG_REGEX */
289 return NULL;
292 /* Next, check if STR can be compiled. */
293 new_buf = xzalloc (sizeof *new_buf);
294 msg = re_compile_pattern (str, len, new_buf);
295 #ifdef DEBUG_REGEX
296 if (trace_file)
297 xfprintf (trace_file, "compile:{%s}\n", str);
298 #endif /* DEBUG_REGEX */
299 if (msg)
301 regfree (new_buf);
302 free (new_buf);
303 return msg;
306 /* Now, find a victim slot. Decrease the count of all entries, then
307 prime the count of the victim slot at REGEX_CACHE_SIZE. This
308 way, frequently used entries and newly created entries are least
309 likely to be victims next time we have a cache miss. */
310 victim = regex_cache;
311 victim_count = victim->count;
312 if (victim_count)
313 victim->count--;
314 for (i = 1; i < REGEX_CACHE_SIZE; i++)
316 if (regex_cache[i].count < victim_count)
318 victim_count = regex_cache[i].count;
319 victim = &regex_cache[i];
321 if (regex_cache[i].count)
322 regex_cache[i].count--;
324 victim->count = REGEX_CACHE_SIZE;
325 victim->len = len;
326 if (victim->str)
328 #ifdef DEBUG_REGEX
329 if (trace_file)
330 xfprintf (trace_file, "flush:{%s}\n", victim->str);
331 #endif /* DEBUG_REGEX */
332 free (victim->str);
333 regfree (victim->buf);
334 free (victim->buf);
336 victim->str = xstrdup (str);
337 victim->buf = new_buf;
338 new_regs = &victim->regs;
339 re_set_registers (new_buf, new_regs, new_regs->num_regs,
340 new_regs->start, new_regs->end);
341 *buf = new_buf;
342 *regs = new_regs;
343 return NULL;
346 /*----------------------------------------.
347 | Clean up regular expression variables. |
348 `----------------------------------------*/
350 static void
351 free_pattern_buffer (struct re_pattern_buffer *buf, struct re_registers *regs)
353 regfree (buf);
354 free (regs->start);
355 free (regs->end);
358 /*-----------------------------------------------------------------.
359 | Set the regular expression of --warn-macro-sequence that will be |
360 | checked during define and pushdef. Exit on failure. |
361 `-----------------------------------------------------------------*/
362 void
363 set_macro_sequence (const char *regexp)
365 const char *msg;
367 if (!regexp)
368 regexp = DEFAULT_MACRO_SEQUENCE;
369 else if (regexp[0] == '\0')
371 macro_sequence_inuse = false;
372 return;
375 msg = re_compile_pattern (regexp, strlen (regexp), &macro_sequence_buf);
376 if (msg != NULL)
378 M4ERROR ((EXIT_FAILURE, 0,
379 "--warn-macro-sequence: bad regular expression `%s': %s",
380 regexp, msg));
382 re_set_registers (&macro_sequence_buf, &macro_sequence_regs,
383 macro_sequence_regs.num_regs,
384 macro_sequence_regs.start, macro_sequence_regs.end);
385 macro_sequence_inuse = true;
388 /*------------------------------------------------------.
389 | Free dynamic memory utilized by regular expressions. |
390 `------------------------------------------------------*/
391 void
392 free_regex (void)
394 int i;
395 free_pattern_buffer (&macro_sequence_buf, &macro_sequence_regs);
396 for (i = 0; i < REGEX_CACHE_SIZE; i++)
397 if (regex_cache[i].str)
399 free (regex_cache[i].str);
400 free_pattern_buffer (regex_cache[i].buf, &regex_cache[i].regs);
401 free (regex_cache[i].buf);
405 /*-------------------------------------------------------------------------.
406 | Define a predefined or user-defined macro, with name NAME, and expansion |
407 | TEXT. MODE destinguishes between the "define" and the "pushdef" case. |
408 | It is also used from main (). |
409 `-------------------------------------------------------------------------*/
411 void
412 define_user_macro (const char *name, const char *text, symbol_lookup mode)
414 symbol *s;
415 char *defn = xstrdup (text ? text : "");
417 s = lookup_symbol (name, mode);
418 if (SYMBOL_TYPE (s) == TOKEN_TEXT)
419 free (SYMBOL_TEXT (s));
421 SYMBOL_TYPE (s) = TOKEN_TEXT;
422 SYMBOL_TEXT (s) = defn;
424 /* Implement --warn-macro-sequence. */
425 if (macro_sequence_inuse && text)
427 regoff_t offset = 0;
428 size_t len = strlen (defn);
430 while ((offset = re_search (&macro_sequence_buf, defn, len, offset,
431 len - offset, &macro_sequence_regs)) >= 0)
433 /* Skip empty matches. */
434 if (macro_sequence_regs.start[0] == macro_sequence_regs.end[0])
435 offset++;
436 else
438 char tmp;
439 offset = macro_sequence_regs.end[0];
440 tmp = defn[offset];
441 defn[offset] = '\0';
442 M4ERROR ((warning_status, 0,
443 "Warning: definition of `%s' contains sequence `%s'",
444 name, defn + macro_sequence_regs.start[0]));
445 defn[offset] = tmp;
448 if (offset == -2)
449 M4ERROR ((warning_status, 0,
450 "error checking --warn-macro-sequence for macro `%s'",
451 name));
455 /*-----------------------------------------------.
456 | Initialize all builtin and predefined macros. |
457 `-----------------------------------------------*/
459 void
460 builtin_init (void)
462 const builtin *bp;
463 const predefined *pp;
464 char *string;
466 for (bp = &builtin_tab[0]; bp->name != NULL; bp++)
467 if (!no_gnu_extensions || !bp->gnu_extension)
469 if (prefix_all_builtins)
471 string = (char *) xmalloc (strlen (bp->name) + 4);
472 strcpy (string, "m4_");
473 strcat (string, bp->name);
474 define_builtin (string, bp, SYMBOL_INSERT);
475 free (string);
477 else
478 define_builtin (bp->name, bp, SYMBOL_INSERT);
481 for (pp = &predefined_tab[0]; pp->func != NULL; pp++)
482 if (no_gnu_extensions)
484 if (pp->unix_name != NULL)
485 define_user_macro (pp->unix_name, pp->func, SYMBOL_INSERT);
487 else
489 if (pp->gnu_name != NULL)
490 define_user_macro (pp->gnu_name, pp->func, SYMBOL_INSERT);
494 /*------------------------------------------------------------------------.
495 | Give friendly warnings if a builtin macro is passed an inappropriate |
496 | number of arguments. NAME is macro name for messages, ARGC is actual |
497 | number of arguments, MIN is the minimum number of acceptable arguments, |
498 | negative if not applicable, MAX is the maximum number, negative if not |
499 | applicable. |
500 `------------------------------------------------------------------------*/
502 static bool
503 bad_argc (token_data *name, int argc, int min, int max)
505 bool isbad = false;
507 if (min > 0 && argc < min)
509 if (!suppress_warnings)
510 M4ERROR ((warning_status, 0,
511 "Warning: too few arguments to builtin `%s'",
512 TOKEN_DATA_TEXT (name)));
513 isbad = true;
515 else if (max > 0 && argc > max && !suppress_warnings)
516 M4ERROR ((warning_status, 0,
517 "Warning: excess arguments to builtin `%s' ignored",
518 TOKEN_DATA_TEXT (name)));
520 return isbad;
523 /*--------------------------------------------------------------------------.
524 | The function numeric_arg () converts ARG to an int pointed to by VALUEP. |
525 | If the conversion fails, print error message for macro MACRO. Return |
526 | true iff conversion succeeds. |
527 `--------------------------------------------------------------------------*/
529 static bool
530 numeric_arg (token_data *macro, const char *arg, int *valuep)
532 char *endp;
534 if (*arg == '\0')
536 *valuep = 0;
537 M4ERROR ((warning_status, 0,
538 "empty string treated as 0 in builtin `%s'",
539 TOKEN_DATA_TEXT (macro)));
541 else
543 errno = 0;
544 *valuep = strtol (arg, &endp, 10);
545 if (*endp != '\0')
547 M4ERROR ((warning_status, 0,
548 "non-numeric argument to builtin `%s'",
549 TOKEN_DATA_TEXT (macro)));
550 return false;
552 if (isspace (to_uchar (*arg)))
553 M4ERROR ((warning_status, 0,
554 "leading whitespace ignored in builtin `%s'",
555 TOKEN_DATA_TEXT (macro)));
556 else if (errno == ERANGE)
557 M4ERROR ((warning_status, 0,
558 "numeric overflow detected in builtin `%s'",
559 TOKEN_DATA_TEXT (macro)));
561 return true;
564 /*------------------------------------------------------------------------.
565 | The function ntoa () converts VALUE to a signed ascii representation in |
566 | radix RADIX. |
567 `------------------------------------------------------------------------*/
569 /* Digits for number to ascii conversions. */
570 static char const digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";
572 const char *
573 ntoa (int32_t value, int radix)
575 bool negative;
576 uint32_t uvalue;
577 static char str[256];
578 char *s = &str[sizeof str];
580 *--s = '\0';
582 if (value < 0)
584 negative = true;
585 uvalue = -(uint32_t) value;
587 else
589 negative = false;
590 uvalue = (uint32_t) value;
595 *--s = digits[uvalue % radix];
596 uvalue /= radix;
598 while (uvalue > 0);
600 if (negative)
601 *--s = '-';
602 return s;
605 /*----------------------------------------------------------------------.
606 | Format an int VAL, and stuff it into an obstack OBS. Used for macros |
607 | expanding to numbers. |
608 `----------------------------------------------------------------------*/
610 static void
611 shipout_int (struct obstack *obs, int val)
613 const char *s;
615 s = ntoa ((int32_t) val, 10);
616 obstack_grow (obs, s, strlen (s));
619 /*----------------------------------------------------------------------.
620 | Print ARGC arguments from the table ARGV to obstack OBS, separated by |
621 | SEP, and quoted by the current quotes, if QUOTED is true. |
622 `----------------------------------------------------------------------*/
624 static void
625 dump_args (struct obstack *obs, int argc, token_data **argv,
626 const char *sep, bool quoted)
628 int i;
629 size_t len = strlen (sep);
631 for (i = 1; i < argc; i++)
633 if (i > 1)
634 obstack_grow (obs, sep, len);
635 if (quoted)
636 obstack_grow (obs, lquote.string, lquote.length);
637 obstack_grow (obs, ARG (i), strlen (ARG (i)));
638 if (quoted)
639 obstack_grow (obs, rquote.string, rquote.length);
643 /* The rest of this file is code for builtins and expansion of user
644 defined macros. All the functions for builtins have a prototype as:
646 void m4_MACRONAME (struct obstack *obs, int argc, char *argv[]);
648 The function are expected to leave their expansion on the obstack OBS,
649 as an unfinished object. ARGV is a table of ARGC pointers to the
650 individual arguments to the macro. Please note that in general
651 argv[argc] != NULL. */
653 /* The first section are macros for definining, undefining, examining,
654 changing, ... other macros. */
656 /*-------------------------------------------------------------------------.
657 | The function define_macro is common for the builtins "define", |
658 | "undefine", "pushdef" and "popdef". ARGC and ARGV is as for the caller, |
659 | and MODE argument determines how the macro name is entered into the |
660 | symbol table. |
661 `-------------------------------------------------------------------------*/
663 static void
664 define_macro (int argc, token_data **argv, symbol_lookup mode)
666 const builtin *bp;
668 if (bad_argc (argv[0], argc, 2, 3))
669 return;
671 if (TOKEN_DATA_TYPE (argv[1]) != TOKEN_TEXT)
673 M4ERROR ((warning_status, 0,
674 "Warning: %s: invalid macro name ignored", ARG (0)));
675 return;
678 if (argc == 2)
680 define_user_macro (ARG (1), "", mode);
681 return;
684 switch (TOKEN_DATA_TYPE (argv[2]))
686 case TOKEN_TEXT:
687 define_user_macro (ARG (1), ARG (2), mode);
688 break;
690 case TOKEN_FUNC:
691 bp = find_builtin_by_addr (TOKEN_DATA_FUNC (argv[2]));
692 if (bp == NULL)
693 return;
694 else
695 define_builtin (ARG (1), bp, mode);
696 break;
698 default:
699 assert (!"define_macro");
700 abort ();
704 static void
705 m4_define (struct obstack *obs, int argc, token_data **argv)
707 define_macro (argc, argv, SYMBOL_INSERT);
710 static void
711 m4_undefine (struct obstack *obs, int argc, token_data **argv)
713 int i;
714 if (bad_argc (argv[0], argc, 2, -1))
715 return;
716 for (i = 1; i < argc; i++)
717 lookup_symbol (ARG (i), SYMBOL_DELETE);
720 static void
721 m4_pushdef (struct obstack *obs, int argc, token_data **argv)
723 define_macro (argc, argv, SYMBOL_PUSHDEF);
726 static void
727 m4_popdef (struct obstack *obs, int argc, token_data **argv)
729 int i;
730 if (bad_argc (argv[0], argc, 2, -1))
731 return;
732 for (i = 1; i < argc; i++)
733 lookup_symbol (ARG (i), SYMBOL_POPDEF);
736 /*---------------------.
737 | Conditionals of m4. |
738 `---------------------*/
740 static void
741 m4_ifdef (struct obstack *obs, int argc, token_data **argv)
743 symbol *s;
744 const char *result;
746 if (bad_argc (argv[0], argc, 3, 4))
747 return;
748 s = lookup_symbol (ARG (1), SYMBOL_LOOKUP);
750 if (s != NULL && SYMBOL_TYPE (s) != TOKEN_VOID)
751 result = ARG (2);
752 else if (argc >= 4)
753 result = ARG (3);
754 else
755 result = NULL;
757 if (result != NULL)
758 obstack_grow (obs, result, strlen (result));
761 static void
762 m4_ifelse (struct obstack *obs, int argc, token_data **argv)
764 const char *result;
765 token_data *argv0;
767 if (argc == 2)
768 return;
770 if (bad_argc (argv[0], argc, 4, -1))
771 return;
772 else
773 /* Diagnose excess arguments if 5, 8, 11, etc., actual arguments. */
774 bad_argc (argv[0], (argc + 2) % 3, -1, 1);
776 argv0 = argv[0];
777 argv++;
778 argc--;
780 result = NULL;
781 while (result == NULL)
783 if (strcmp (ARG (0), ARG (1)) == 0)
784 result = ARG (2);
786 else
787 switch (argc)
789 case 3:
790 return;
792 case 4:
793 case 5:
794 result = ARG (3);
795 break;
797 default:
798 argc -= 3;
799 argv += 3;
802 obstack_grow (obs, result, strlen (result));
805 /*---------------------------------------------------------------------.
806 | The function dump_symbol () is for use by "dumpdef". It builds up a |
807 | table of all defined, un-shadowed, symbols. |
808 `---------------------------------------------------------------------*/
810 /* The structure dump_symbol_data is used to pass the information needed
811 from call to call to dump_symbol. */
813 struct dump_symbol_data
815 struct obstack *obs; /* obstack for table */
816 symbol **base; /* base of table */
817 int size; /* size of table */
820 static void
821 dump_symbol (symbol *sym, void *arg)
823 struct dump_symbol_data *data = (struct dump_symbol_data *) arg;
824 if (!SYMBOL_SHADOWED (sym) && SYMBOL_TYPE (sym) != TOKEN_VOID)
826 obstack_blank (data->obs, sizeof (symbol *));
827 data->base = (symbol **) obstack_base (data->obs);
828 data->base[data->size++] = sym;
832 /*------------------------------------------------------------------------.
833 | qsort comparison routine, for sorting the table made in m4_dumpdef (). |
834 `------------------------------------------------------------------------*/
836 static int
837 dumpdef_cmp (const void *s1, const void *s2)
839 return strcmp (SYMBOL_NAME (* (symbol *const *) s1),
840 SYMBOL_NAME (* (symbol *const *) s2));
843 /*-------------------------------------------------------------------------.
844 | Implementation of "dumpdef" itself. It builds up a table of pointers to |
845 | symbols, sorts it and prints the sorted table. |
846 `-------------------------------------------------------------------------*/
848 static void
849 m4_dumpdef (struct obstack *obs, int argc, token_data **argv)
851 symbol *s;
852 int i;
853 struct dump_symbol_data data;
854 const builtin *bp;
856 data.obs = obs;
857 data.base = (symbol **) obstack_base (obs);
858 data.size = 0;
860 if (argc == 1)
862 hack_all_symbols (dump_symbol, &data);
864 else
866 for (i = 1; i < argc; i++)
868 s = lookup_symbol (ARG (i), SYMBOL_LOOKUP);
869 if (s != NULL && SYMBOL_TYPE (s) != TOKEN_VOID)
870 dump_symbol (s, &data);
871 else
872 M4ERROR ((warning_status, 0,
873 "undefined macro `%s'", ARG (i)));
877 /* Make table of symbols invisible to expand_macro (). */
879 obstack_finish (obs);
881 qsort (data.base, data.size, sizeof (symbol *), dumpdef_cmp);
883 for (; data.size > 0; --data.size, data.base++)
885 DEBUG_PRINT1 ("%s:\t", SYMBOL_NAME (data.base[0]));
887 switch (SYMBOL_TYPE (data.base[0]))
889 case TOKEN_TEXT:
890 if (debug_level & DEBUG_TRACE_QUOTE)
891 DEBUG_PRINT3 ("%s%s%s\n",
892 lquote.string, SYMBOL_TEXT (data.base[0]), rquote.string);
893 else
894 DEBUG_PRINT1 ("%s\n", SYMBOL_TEXT (data.base[0]));
895 break;
897 case TOKEN_FUNC:
898 bp = find_builtin_by_addr (SYMBOL_FUNC (data.base[0]));
899 if (bp == NULL)
901 assert (!"m4_dumpdef");
902 abort ();
904 DEBUG_PRINT1 ("<%s>\n", bp->name);
905 break;
907 default:
908 assert (!"m4_dumpdef");
909 abort ();
910 break;
915 /*---------------------------------------------------------------------.
916 | The builtin "builtin" allows calls to builtin macros, even if their |
917 | definition has been overridden or shadowed. It is thus possible to |
918 | redefine builtins, and still access their original definition. This |
919 | macro is not available in compatibility mode. |
920 `---------------------------------------------------------------------*/
922 static void
923 m4_builtin (struct obstack *obs, int argc, token_data **argv)
925 const builtin *bp;
926 const char *name;
928 if (bad_argc (argv[0], argc, 2, -1))
929 return;
930 if (TOKEN_DATA_TYPE (argv[1]) != TOKEN_TEXT)
932 M4ERROR ((warning_status, 0,
933 "Warning: %s: invalid macro name ignored", ARG (0)));
934 return;
937 name = ARG (1);
938 bp = find_builtin_by_name (name);
939 if (bp->func == m4_placeholder)
940 M4ERROR ((warning_status, 0,
941 "undefined builtin `%s'", name));
942 else
944 int i;
945 if (!bp->groks_macro_args)
946 for (i = 2; i < argc; i++)
947 if (TOKEN_DATA_TYPE (argv[i]) != TOKEN_TEXT)
949 TOKEN_DATA_TYPE (argv[i]) = TOKEN_TEXT;
950 TOKEN_DATA_TEXT (argv[i]) = (char *) "";
952 bp->func (obs, argc - 1, argv + 1);
956 /*------------------------------------------------------------------------.
957 | The builtin "indir" allows indirect calls to macros, even if their name |
958 | is not a proper macro name. It is thus possible to define macros with |
959 | ill-formed names for internal use in larger macro packages. This macro |
960 | is not available in compatibility mode. |
961 `------------------------------------------------------------------------*/
963 static void
964 m4_indir (struct obstack *obs, int argc, token_data **argv)
966 symbol *s;
967 const char *name;
969 if (bad_argc (argv[0], argc, 2, -1))
970 return;
971 if (TOKEN_DATA_TYPE (argv[1]) != TOKEN_TEXT)
973 M4ERROR ((warning_status, 0,
974 "Warning: %s: invalid macro name ignored", ARG (0)));
975 return;
978 name = ARG (1);
979 s = lookup_symbol (name, SYMBOL_LOOKUP);
980 if (s == NULL || SYMBOL_TYPE (s) == TOKEN_VOID)
981 M4ERROR ((warning_status, 0,
982 "undefined macro `%s'", name));
983 else
985 int i;
986 if (!SYMBOL_MACRO_ARGS (s))
987 for (i = 2; i < argc; i++)
988 if (TOKEN_DATA_TYPE (argv[i]) != TOKEN_TEXT)
990 TOKEN_DATA_TYPE (argv[i]) = TOKEN_TEXT;
991 TOKEN_DATA_TEXT (argv[i]) = (char *) "";
993 call_macro (s, argc - 1, argv + 1, obs);
997 /*-------------------------------------------------------------------------.
998 | The macro "defn" returns the quoted definition of the macro named by the |
999 | first argument. If the macro is builtin, it will push a special |
1000 | macro-definition token on the input stack. |
1001 `-------------------------------------------------------------------------*/
1003 static void
1004 m4_defn (struct obstack *obs, int argc, token_data **argv)
1006 symbol *s;
1007 builtin_func *b;
1008 int i;
1010 if (bad_argc (argv[0], argc, 2, -1))
1011 return;
1013 for (i = 1; i < argc; i++)
1015 s = lookup_symbol (ARG (i), SYMBOL_LOOKUP);
1016 if (s == NULL)
1017 continue;
1019 switch (SYMBOL_TYPE (s))
1021 case TOKEN_TEXT:
1022 obstack_grow (obs, lquote.string, lquote.length);
1023 obstack_grow (obs, SYMBOL_TEXT (s), strlen (SYMBOL_TEXT (s)));
1024 obstack_grow (obs, rquote.string, rquote.length);
1025 break;
1027 case TOKEN_FUNC:
1028 b = SYMBOL_FUNC (s);
1029 if (b == m4_placeholder)
1030 M4ERROR ((warning_status, 0, "\
1031 builtin `%s' requested by frozen file is not supported", ARG (i)));
1032 else if (argc != 2)
1033 M4ERROR ((warning_status, 0,
1034 "Warning: cannot concatenate builtin `%s'",
1035 ARG (i)));
1036 else
1037 push_macro (b);
1038 break;
1040 default:
1041 assert (!"m4_defn");
1042 abort ();
1047 /*------------------------------------------------------------------------.
1048 | This section contains macros to handle the builtins "syscmd", "esyscmd" |
1049 | and "sysval". "esyscmd" is GNU specific. |
1050 `------------------------------------------------------------------------*/
1052 /* Helper macros for readability. */
1053 #if UNIX || defined WEXITSTATUS
1054 # define M4SYSVAL_EXITBITS(status) \
1055 (WIFEXITED (status) ? WEXITSTATUS (status) : 0)
1056 # define M4SYSVAL_TERMSIGBITS(status) \
1057 (WIFSIGNALED (status) ? WTERMSIG (status) << 8 : 0)
1059 #else /* !UNIX && !defined WEXITSTATUS */
1060 /* Platforms such as mingw do not support the notion of reporting
1061 which signal terminated a process. Furthermore if WEXITSTATUS was
1062 not provided, then the exit value is in the low eight bits. */
1063 # define M4SYSVAL_EXITBITS(status) status
1064 # define M4SYSVAL_TERMSIGBITS(status) 0
1065 #endif /* !UNIX && !defined WEXITSTATUS */
1067 /* Fallback definitions if <stdlib.h> or <sys/wait.h> are inadequate. */
1068 #ifndef WEXITSTATUS
1069 # define WEXITSTATUS(status) (((status) >> 8) & 0xff)
1070 #endif
1071 #ifndef WTERMSIG
1072 # define WTERMSIG(status) ((status) & 0x7f)
1073 #endif
1074 #ifndef WIFSIGNALED
1075 # define WIFSIGNALED(status) (WTERMSIG (status) != 0)
1076 #endif
1077 #ifndef WIFEXITED
1078 # define WIFEXITED(status) (WTERMSIG (status) == 0)
1079 #endif
1081 /* Exit code from last "syscmd" command. */
1082 static int sysval;
1084 static void
1085 m4_syscmd (struct obstack *obs, int argc, token_data **argv)
1087 if (bad_argc (argv[0], argc, 2, 2))
1089 /* The empty command is successful. */
1090 sysval = 0;
1091 return;
1094 debug_flush_files ();
1095 sysval = system (ARG (1));
1096 #if FUNC_SYSTEM_BROKEN
1097 /* OS/2 has a buggy system() that returns exit status in the lowest eight
1098 bits, although pclose() and WEXITSTATUS are defined to return exit
1099 status in the next eight bits. This approach can't detect signals, but
1100 at least syscmd(`ls') still works when stdout is a terminal. An
1101 alternate approach is popen/insert_file/pclose, but that makes stdout
1102 a pipe, which can change how some child processes behave. */
1103 if (sysval != -1)
1104 sysval <<= 8;
1105 #endif /* FUNC_SYSTEM_BROKEN */
1108 static void
1109 m4_esyscmd (struct obstack *obs, int argc, token_data **argv)
1111 FILE *pin;
1112 int ch;
1114 if (bad_argc (argv[0], argc, 2, 2))
1116 /* The empty command is successful. */
1117 sysval = 0;
1118 return;
1121 debug_flush_files ();
1122 errno = 0;
1123 pin = popen (ARG (1), "r");
1124 if (pin == NULL)
1126 M4ERROR ((warning_status, errno,
1127 "cannot open pipe to command `%s'", ARG (1)));
1128 sysval = -1;
1130 else
1132 while ((ch = getc (pin)) != EOF)
1133 obstack_1grow (obs, (char) ch);
1134 sysval = pclose (pin);
1138 static void
1139 m4_sysval (struct obstack *obs, int argc, token_data **argv)
1141 shipout_int (obs, (sysval == -1 ? 127
1142 : (M4SYSVAL_EXITBITS (sysval)
1143 | M4SYSVAL_TERMSIGBITS (sysval))));
1146 /*-------------------------------------------------------------------------.
1147 | This section contains the top level code for the "eval" builtin. The |
1148 | actual work is done in the function evaluate (), which lives in eval.c. |
1149 `-------------------------------------------------------------------------*/
1151 static void
1152 m4_eval (struct obstack *obs, int argc, token_data **argv)
1154 int32_t value = 0;
1155 int radix = 10;
1156 int min = 1;
1157 const char *s;
1159 if (bad_argc (argv[0], argc, 2, 4))
1160 return;
1162 if (*ARG (2) && !numeric_arg (argv[0], ARG (2), &radix))
1163 return;
1165 if (radix < 1 || radix > (int) strlen (digits))
1167 M4ERROR ((warning_status, 0,
1168 "radix %d in builtin `%s' out of range",
1169 radix, ARG (0)));
1170 return;
1173 if (argc >= 4 && !numeric_arg (argv[0], ARG (3), &min))
1174 return;
1175 if (min < 0)
1177 M4ERROR ((warning_status, 0,
1178 "negative width to builtin `%s'", ARG (0)));
1179 return;
1182 if (!*ARG (1))
1183 M4ERROR ((warning_status, 0,
1184 "empty string treated as 0 in builtin `%s'", ARG (0)));
1185 else if (evaluate (ARG (1), &value))
1186 return;
1188 if (radix == 1)
1190 if (value < 0)
1192 obstack_1grow (obs, '-');
1193 value = -value;
1195 /* This assumes 2's-complement for correctly handling INT_MIN. */
1196 while (min-- - value > 0)
1197 obstack_1grow (obs, '0');
1198 while (value-- != 0)
1199 obstack_1grow (obs, '1');
1200 obstack_1grow (obs, '\0');
1201 return;
1204 s = ntoa (value, radix);
1206 if (*s == '-')
1208 obstack_1grow (obs, '-');
1209 s++;
1211 for (min -= strlen (s); --min >= 0;)
1212 obstack_1grow (obs, '0');
1214 obstack_grow (obs, s, strlen (s));
1217 static void
1218 m4_incr (struct obstack *obs, int argc, token_data **argv)
1220 int value;
1222 if (bad_argc (argv[0], argc, 2, 2))
1223 return;
1225 if (!numeric_arg (argv[0], ARG (1), &value))
1226 return;
1228 shipout_int (obs, value + 1);
1231 static void
1232 m4_decr (struct obstack *obs, int argc, token_data **argv)
1234 int value;
1236 if (bad_argc (argv[0], argc, 2, 2))
1237 return;
1239 if (!numeric_arg (argv[0], ARG (1), &value))
1240 return;
1242 shipout_int (obs, value - 1);
1245 /* This section contains the macros "divert", "undivert" and "divnum" for
1246 handling diversion. The utility functions used lives in output.c. */
1248 /*-----------------------------------------------------------------------.
1249 | Divert further output to the diversion given by ARGV[1]. Out of range |
1250 | means discard further output. |
1251 `-----------------------------------------------------------------------*/
1253 static void
1254 m4_divert (struct obstack *obs, int argc, token_data **argv)
1256 int i = 0;
1258 if (bad_argc (argv[0], argc, 1, 2))
1259 return;
1261 if (argc >= 2 && !numeric_arg (argv[0], ARG (1), &i))
1262 return;
1264 make_diversion (i);
1267 /*-----------------------------------------------------.
1268 | Expand to the current diversion number, -1 if none. |
1269 `-----------------------------------------------------*/
1271 static void
1272 m4_divnum (struct obstack *obs, int argc, token_data **argv)
1274 if (bad_argc (argv[0], argc, 1, 1))
1275 return;
1276 shipout_int (obs, current_diversion);
1279 /*-----------------------------------------------------------------------.
1280 | Bring back the diversion given by the argument list. If none is |
1281 | specified, bring back all diversions. GNU specific is the option of |
1282 | undiverting named files, by passing a non-numeric argument to undivert |
1283 | (). |
1284 `-----------------------------------------------------------------------*/
1286 static void
1287 m4_undivert (struct obstack *obs, int argc, token_data **argv)
1289 int i, file;
1290 FILE *fp;
1291 char *endp;
1293 if (argc == 1)
1294 undivert_all ();
1295 else
1296 for (i = 1; i < argc; i++)
1298 file = strtol (ARG (i), &endp, 10);
1299 if (*endp == '\0' && !isspace (to_uchar (*ARG (i))))
1300 insert_diversion (file);
1301 else if (no_gnu_extensions)
1302 M4ERROR ((warning_status, 0,
1303 "non-numeric argument to builtin `%s'", ARG (0)));
1304 else
1306 fp = m4_path_search (ARG (i), NULL);
1307 if (fp != NULL)
1309 insert_file (fp);
1310 if (fclose (fp) == EOF)
1311 M4ERROR ((warning_status, errno,
1312 "error undiverting `%s'", ARG (i)));
1314 else
1315 M4ERROR ((warning_status, errno,
1316 "cannot undivert `%s'", ARG (i)));
1321 /* This section contains various macros, which does not fall into any
1322 specific group. These are "dnl", "shift", "changequote", "changecom"
1323 and "changeword". */
1325 /*------------------------------------------------------------------------.
1326 | Delete all subsequent whitespace from input. The function skip_line () |
1327 | lives in input.c. |
1328 `------------------------------------------------------------------------*/
1330 static void
1331 m4_dnl (struct obstack *obs, int argc, token_data **argv)
1333 if (bad_argc (argv[0], argc, 1, 1))
1334 return;
1336 skip_line (ARG (0));
1339 /*-------------------------------------------------------------------------.
1340 | Shift all argument one to the left, discarding the first argument. Each |
1341 | output argument is quoted with the current quotes. |
1342 `-------------------------------------------------------------------------*/
1344 static void
1345 m4_shift (struct obstack *obs, int argc, token_data **argv)
1347 if (bad_argc (argv[0], argc, 2, -1))
1348 return;
1349 dump_args (obs, argc - 1, argv + 1, ",", true);
1352 /*--------------------------------------------------------------------------.
1353 | Change the current quotes. The function set_quotes () lives in input.c. |
1354 `--------------------------------------------------------------------------*/
1356 static void
1357 m4_changequote (struct obstack *obs, int argc, token_data **argv)
1359 if (bad_argc (argv[0], argc, 1, 3))
1360 return;
1362 /* Explicit NULL distinguishes between empty and missing argument. */
1363 set_quotes ((argc >= 2) ? ARG (1) : NULL,
1364 (argc >= 3) ? ARG (2) : NULL);
1367 /*--------------------------------------------------------------------.
1368 | Change the current comment delimiters. The function set_comment () |
1369 | lives in input.c. |
1370 `--------------------------------------------------------------------*/
1372 static void
1373 m4_changecom (struct obstack *obs, int argc, token_data **argv)
1375 if (bad_argc (argv[0], argc, 1, 3))
1376 return;
1378 /* Explicit NULL distinguishes between empty and missing argument. */
1379 set_comment ((argc >= 2) ? ARG (1) : NULL,
1380 (argc >= 3) ? ARG (2) : NULL);
1383 #ifdef ENABLE_CHANGEWORD
1385 /*-----------------------------------------------------------------------.
1386 | Change the regular expression used for breaking the input into words. |
1387 | The function set_word_regexp () lives in input.c. |
1388 `-----------------------------------------------------------------------*/
1390 static void
1391 m4_changeword (struct obstack *obs, int argc, token_data **argv)
1393 if (bad_argc (argv[0], argc, 2, 2))
1394 return;
1396 set_word_regexp (ARG (1));
1399 #endif /* ENABLE_CHANGEWORD */
1401 /* This section contains macros for inclusion of other files -- "include"
1402 and "sinclude". This differs from bringing back diversions, in that
1403 the input is scanned before being copied to the output. */
1405 /*-------------------------------------------------------------------------.
1406 | Generic include function. Include the file given by the first argument, |
1407 | if it exists. Complain about inaccesible files iff SILENT is false. |
1408 `-------------------------------------------------------------------------*/
1410 static void
1411 include (int argc, token_data **argv, bool silent)
1413 FILE *fp;
1414 char *name;
1416 if (bad_argc (argv[0], argc, 2, 2))
1417 return;
1419 fp = m4_path_search (ARG (1), &name);
1420 if (fp == NULL)
1422 if (!silent)
1424 M4ERROR ((warning_status, errno, "cannot open `%s'", ARG (1)));
1425 retcode = EXIT_FAILURE;
1427 return;
1430 push_file (fp, name, true);
1431 free (name);
1434 /*------------------------------------------------.
1435 | Include a file, complaining in case of errors. |
1436 `------------------------------------------------*/
1438 static void
1439 m4_include (struct obstack *obs, int argc, token_data **argv)
1441 include (argc, argv, false);
1444 /*----------------------------------.
1445 | Include a file, ignoring errors. |
1446 `----------------------------------*/
1448 static void
1449 m4_sinclude (struct obstack *obs, int argc, token_data **argv)
1451 include (argc, argv, true);
1454 /* More miscellaneous builtins -- "maketemp", "errprint", "__file__",
1455 "__line__", and "__program__". The last three are GNU specific. */
1457 /*------------------------------------------------------------------.
1458 | Use the first argument as at template for a temporary file name. |
1459 `------------------------------------------------------------------*/
1461 /* Add trailing 'X' to NAME if necessary, securely create the file,
1462 and place the new file name on OBS. */
1463 static void
1464 mkstemp_helper (struct obstack *obs, const char *name)
1466 int fd;
1467 int len;
1468 int i;
1470 /* Guarantee that there are six trailing 'X' characters, even if the
1471 user forgot to supply them. */
1472 len = strlen (name);
1473 obstack_grow (obs, name, len);
1474 for (i = 0; len > 0 && i < 6; i++)
1475 if (name[--len] != 'X')
1476 break;
1477 for (; i < 6; i++)
1478 obstack_1grow (obs, 'X');
1479 obstack_1grow (obs, '\0');
1481 errno = 0;
1482 fd = mkstemp ((char *) obstack_base (obs));
1483 if (fd < 0)
1485 M4ERROR ((0, errno, "cannot create tempfile `%s'", name));
1486 obstack_free (obs, obstack_finish (obs));
1488 else
1489 close (fd);
1492 static void
1493 m4_maketemp (struct obstack *obs, int argc, token_data **argv)
1495 if (bad_argc (argv[0], argc, 2, 2))
1496 return;
1497 if (no_gnu_extensions)
1499 /* POSIX states "any trailing 'X' characters [are] replaced with
1500 the current process ID as a string", without referencing the
1501 file system. Horribly insecure, but we have to do it when we
1502 are in traditional mode.
1504 For reference, Solaris m4 does:
1505 maketemp() -> `'
1506 maketemp(X) -> `X'
1507 maketemp(XX) -> `Xn', where n is last digit of pid
1508 maketemp(XXXXXXXX) -> `X00nnnnn', where nnnnn is 16-bit pid
1510 const char *str = ARG (1);
1511 int len = strlen (str);
1512 int i;
1513 int len2;
1515 M4ERROR ((warning_status, 0, "recommend using mkstemp instead"));
1516 for (i = len; i > 1; i--)
1517 if (str[i - 1] != 'X')
1518 break;
1519 obstack_grow (obs, str, i);
1520 str = ntoa ((int32_t) getpid (), 10);
1521 len2 = strlen (str);
1522 if (len2 > len - i)
1523 obstack_grow0 (obs, str + len2 - (len - i), len - i);
1524 else
1526 while (i++ < len - len2)
1527 obstack_1grow (obs, '0');
1528 obstack_grow0 (obs, str, len2);
1531 else
1532 mkstemp_helper (obs, ARG (1));
1535 static void
1536 m4_mkstemp (struct obstack *obs, int argc, token_data **argv)
1538 if (bad_argc (argv[0], argc, 2, 2))
1539 return;
1540 mkstemp_helper (obs, ARG (1));
1543 /*----------------------------------------.
1544 | Print all arguments on standard error. |
1545 `----------------------------------------*/
1547 static void
1548 m4_errprint (struct obstack *obs, int argc, token_data **argv)
1550 if (bad_argc (argv[0], argc, 2, -1))
1551 return;
1552 dump_args (obs, argc, argv, " ", false);
1553 obstack_1grow (obs, '\0');
1554 debug_flush_files ();
1555 xfprintf (stderr, "%s", (char *) obstack_finish (obs));
1556 fflush (stderr);
1559 static void
1560 m4___file__ (struct obstack *obs, int argc, token_data **argv)
1562 if (bad_argc (argv[0], argc, 1, 1))
1563 return;
1564 obstack_grow (obs, lquote.string, lquote.length);
1565 obstack_grow (obs, current_file, strlen (current_file));
1566 obstack_grow (obs, rquote.string, rquote.length);
1569 static void
1570 m4___line__ (struct obstack *obs, int argc, token_data **argv)
1572 if (bad_argc (argv[0], argc, 1, 1))
1573 return;
1574 shipout_int (obs, current_line);
1577 static void
1578 m4___program__ (struct obstack *obs, int argc, token_data **argv)
1580 if (bad_argc (argv[0], argc, 1, 1))
1581 return;
1582 obstack_grow (obs, lquote.string, lquote.length);
1583 obstack_grow (obs, program_name, strlen (program_name));
1584 obstack_grow (obs, rquote.string, rquote.length);
1587 /* This section contains various macros for exiting, saving input until
1588 EOF is seen, and tracing macro calls. That is: "m4exit", "m4wrap",
1589 "traceon" and "traceoff". */
1591 /*-------------------------------------------------------------------------.
1592 | Exit immediately, with exitcode specified by the first argument, 0 if no |
1593 | arguments are present. |
1594 `-------------------------------------------------------------------------*/
1596 static void
1597 m4_m4exit (struct obstack *obs, int argc, token_data **argv)
1599 int exit_code = EXIT_SUCCESS;
1601 /* Warn on bad arguments, but still exit. */
1602 bad_argc (argv[0], argc, 1, 2);
1603 if (argc >= 2 && !numeric_arg (argv[0], ARG (1), &exit_code))
1604 exit_code = EXIT_FAILURE;
1605 if (exit_code < 0 || exit_code > 255)
1607 M4ERROR ((warning_status, 0,
1608 "exit status out of range: `%d'", exit_code));
1609 exit_code = EXIT_FAILURE;
1611 /* Change debug stream back to stderr, to force flushing debug stream and
1612 detect any errors it might have encountered. */
1613 debug_set_output (NULL);
1614 debug_flush_files ();
1615 if (exit_code == EXIT_SUCCESS && retcode != EXIT_SUCCESS)
1616 exit_code = retcode;
1617 /* Propagate non-zero status to atexit handlers. */
1618 if (exit_code != EXIT_SUCCESS)
1619 exit_failure = exit_code;
1620 exit (exit_code);
1623 /*-------------------------------------------------------------------------.
1624 | Save the argument text until EOF has been seen, allowing for user |
1625 | specified cleanup action. GNU version saves all arguments, the standard |
1626 | version only the first. |
1627 `-------------------------------------------------------------------------*/
1629 static void
1630 m4_m4wrap (struct obstack *obs, int argc, token_data **argv)
1632 if (bad_argc (argv[0], argc, 2, -1))
1633 return;
1634 if (no_gnu_extensions)
1635 obstack_grow (obs, ARG (1), strlen (ARG (1)));
1636 else
1637 dump_args (obs, argc, argv, " ", false);
1638 obstack_1grow (obs, '\0');
1639 push_wrapup ((char *) obstack_finish (obs));
1642 /* Enable tracing of all specified macros, or all, if none is specified.
1643 Tracing is disabled by default, when a macro is defined. This can be
1644 overridden by the "t" debug flag. */
1646 /*-----------------------------------------------------------------------.
1647 | Set_trace () is used by "traceon" and "traceoff" to enable and disable |
1648 | tracing of a macro. It disables tracing if DATA is NULL, otherwise it |
1649 | enable tracing. |
1650 `-----------------------------------------------------------------------*/
1652 static void
1653 set_trace (symbol *sym, void *data)
1655 SYMBOL_TRACED (sym) = data != NULL;
1656 /* Remove placeholder from table if macro is undefined and untraced. */
1657 if (SYMBOL_TYPE (sym) == TOKEN_VOID && data == NULL)
1658 lookup_symbol (SYMBOL_NAME (sym), SYMBOL_POPDEF);
1661 static void
1662 m4_traceon (struct obstack *obs, int argc, token_data **argv)
1664 symbol *s;
1665 int i;
1667 if (argc == 1)
1668 hack_all_symbols (set_trace, obs);
1669 else
1670 for (i = 1; i < argc; i++)
1672 s = lookup_symbol (ARG (i), SYMBOL_INSERT);
1673 set_trace (s, obs);
1677 /*------------------------------------------------------------------------.
1678 | Disable tracing of all specified macros, or all, if none is specified. |
1679 `------------------------------------------------------------------------*/
1681 static void
1682 m4_traceoff (struct obstack *obs, int argc, token_data **argv)
1684 symbol *s;
1685 int i;
1687 if (argc == 1)
1688 hack_all_symbols (set_trace, NULL);
1689 else
1690 for (i = 1; i < argc; i++)
1692 s = lookup_symbol (ARG (i), SYMBOL_LOOKUP);
1693 if (s != NULL)
1694 set_trace (s, NULL);
1698 /*----------------------------------------------------------------------.
1699 | On-the-fly control of the format of the tracing output. It takes one |
1700 | argument, which is a character string like given to the -d option, or |
1701 | none in which case the debug_level is zeroed. |
1702 `----------------------------------------------------------------------*/
1704 static void
1705 m4_debugmode (struct obstack *obs, int argc, token_data **argv)
1707 int new_debug_level;
1708 int change_flag;
1710 if (bad_argc (argv[0], argc, 1, 2))
1711 return;
1713 if (argc == 1)
1714 debug_level = 0;
1715 else
1717 if (ARG (1)[0] == '+' || ARG (1)[0] == '-')
1719 change_flag = ARG (1)[0];
1720 new_debug_level = debug_decode (ARG (1) + 1);
1722 else
1724 change_flag = 0;
1725 new_debug_level = debug_decode (ARG (1));
1728 if (new_debug_level < 0)
1729 M4ERROR ((warning_status, 0,
1730 "Debugmode: bad debug flags: `%s'", ARG (1)));
1731 else
1733 switch (change_flag)
1735 case 0:
1736 debug_level = new_debug_level;
1737 break;
1739 case '+':
1740 debug_level |= new_debug_level;
1741 break;
1743 case '-':
1744 debug_level &= ~new_debug_level;
1745 break;
1751 /*-------------------------------------------------------------------------.
1752 | Specify the destination of the debugging output. With one argument, the |
1753 | argument is taken as a file name, with no arguments, revert to stderr. |
1754 `-------------------------------------------------------------------------*/
1756 static void
1757 m4_debugfile (struct obstack *obs, int argc, token_data **argv)
1759 if (bad_argc (argv[0], argc, 1, 2))
1760 return;
1762 if (argc == 1)
1763 debug_set_output (NULL);
1764 else if (!debug_set_output (ARG (1)))
1765 M4ERROR ((warning_status, errno,
1766 "cannot set error file: `%s'", ARG (1)));
1769 /* This section contains text processing macros: "len", "index",
1770 "substr", "translit", "format", "regexp" and "patsubst". The last
1771 three are GNU specific. */
1773 /*---------------------------------------------.
1774 | Expand to the length of the first argument. |
1775 `---------------------------------------------*/
1777 static void
1778 m4_len (struct obstack *obs, int argc, token_data **argv)
1780 if (bad_argc (argv[0], argc, 2, 2))
1781 return;
1782 shipout_int (obs, strlen (ARG (1)));
1785 /*-------------------------------------------------------------------------.
1786 | The macro expands to the first index of the second argument in the first |
1787 | argument. |
1788 `-------------------------------------------------------------------------*/
1790 static void
1791 m4_index (struct obstack *obs, int argc, token_data **argv)
1793 const char *haystack;
1794 const char *needle;
1795 const char *result = NULL;
1796 int retval = -1;
1798 if (bad_argc (argv[0], argc, 3, 3))
1800 /* builtin(`index') is blank, but index(`abc') is 0. */
1801 if (argc == 2)
1802 shipout_int (obs, 0);
1803 return;
1806 haystack = ARG (1);
1807 needle = ARG (2);
1809 /* Optimize searching for the empty string (always 0) and one byte
1810 (strchr tends to be more efficient than strstr). */
1811 if (!needle[0])
1812 retval = 0;
1813 else if (!needle[1])
1814 result = strchr (haystack, *needle);
1815 else
1816 result = strstr (haystack, needle);
1817 if (result)
1818 retval = result - haystack;
1820 shipout_int (obs, retval);
1823 /*-------------------------------------------------------------------------.
1824 | The macro "substr" extracts substrings from the first argument, starting |
1825 | from the index given by the second argument, extending for a length |
1826 | given by the third argument. If the third argument is missing, the |
1827 | substring extends to the end of the first argument. |
1828 `-------------------------------------------------------------------------*/
1830 static void
1831 m4_substr (struct obstack *obs, int argc, token_data **argv)
1833 int start = 0;
1834 int length, avail;
1836 if (bad_argc (argv[0], argc, 3, 4))
1838 /* builtin(`substr') is blank, but substr(`abc') is abc. */
1839 if (argc == 2)
1840 obstack_grow (obs, ARG (1), strlen (ARG (1)));
1841 return;
1844 length = avail = strlen (ARG (1));
1845 if (!numeric_arg (argv[0], ARG (2), &start))
1846 return;
1848 if (argc >= 4 && !numeric_arg (argv[0], ARG (3), &length))
1849 return;
1851 if (start < 0 || length <= 0 || start >= avail)
1852 return;
1854 if (start + length > avail)
1855 length = avail - start;
1856 obstack_grow (obs, ARG (1) + start, length);
1859 /*------------------------------------------------------------------------.
1860 | For "translit", ranges are allowed in the second and third argument. |
1861 | They are expanded in the following function, and the expanded strings, |
1862 | without any ranges left, are used to translate the characters of the |
1863 | first argument. A single - (dash) can be included in the strings by |
1864 | being the first or the last character in the string. If the first |
1865 | character in a range is after the first in the character set, the range |
1866 | is made backwards, thus 9-0 is the string 9876543210. |
1867 `------------------------------------------------------------------------*/
1869 static const char *
1870 expand_ranges (const char *s, struct obstack *obs)
1872 unsigned char from;
1873 unsigned char to;
1875 for (from = '\0'; *s != '\0'; from = to_uchar (*s++))
1877 if (*s == '-' && from != '\0')
1879 to = to_uchar (*++s);
1880 if (to == '\0')
1882 /* trailing dash */
1883 obstack_1grow (obs, '-');
1884 break;
1886 else if (from <= to)
1888 while (from++ < to)
1889 obstack_1grow (obs, from);
1891 else
1893 while (--from >= to)
1894 obstack_1grow (obs, from);
1897 else
1898 obstack_1grow (obs, *s);
1900 obstack_1grow (obs, '\0');
1901 return (char *) obstack_finish (obs);
1904 /*----------------------------------------------------------------------.
1905 | The macro "translit" translates all characters in the first argument, |
1906 | which are present in the second argument, into the corresponding |
1907 | character from the third argument. If the third argument is shorter |
1908 | than the second, the extra characters in the second argument, are |
1909 | deleted from the first (pueh). |
1910 `----------------------------------------------------------------------*/
1912 static void
1913 m4_translit (struct obstack *obs, int argc, token_data **argv)
1915 const char *data;
1916 const char *from;
1917 const char *to;
1918 char map[256] = {0};
1919 char found[256] = {0};
1920 unsigned char ch;
1922 if (bad_argc (argv[0], argc, 3, 4))
1924 /* builtin(`translit') is blank, but translit(`abc') is abc. */
1925 if (argc == 2)
1926 obstack_grow (obs, ARG (1), strlen (ARG (1)));
1927 return;
1930 from = ARG (2);
1931 if (strchr (from, '-') != NULL)
1933 from = expand_ranges (from, obs);
1934 if (from == NULL)
1935 return;
1938 to = ARG (3);
1939 if (strchr (to, '-') != NULL)
1941 to = expand_ranges (to, obs);
1942 if (to == NULL)
1943 return;
1946 /* Calling strchr(from) for each character in data is quadratic,
1947 since both strings can be arbitrarily long. Instead, create a
1948 from-to mapping in one pass of from, then use that map in one
1949 pass of data, for linear behavior. Traditional behavior is that
1950 only the first instance of a character in from is consulted,
1951 hence the found map. */
1952 for ( ; (ch = *from) != '\0'; from++)
1954 if (!found[ch])
1956 found[ch] = 1;
1957 map[ch] = *to;
1959 if (*to != '\0')
1960 to++;
1963 for (data = ARG (1); (ch = *data) != '\0'; data++)
1965 if (!found[ch])
1966 obstack_1grow (obs, ch);
1967 else if (map[ch])
1968 obstack_1grow (obs, map[ch]);
1972 /*--------------------------------------------------------------.
1973 | Frontend for *printf like formatting. The function format () |
1974 | lives in the file format.c. |
1975 `--------------------------------------------------------------*/
1977 static void
1978 m4_format (struct obstack *obs, int argc, token_data **argv)
1980 if (bad_argc (argv[0], argc, 2, -1))
1981 return;
1982 format (obs, argc - 1, argv + 1);
1985 /*-------------------------------------------------------------------------.
1986 | Function to perform substitution by regular expressions. Used by the |
1987 | builtins regexp and patsubst. The changed text is placed on the |
1988 | obstack. The substitution is REPL, with \& substituted by this part of |
1989 | VICTIM matched by the last whole regular expression, taken from REGS[0], |
1990 | and \N substituted by the text matched by the Nth parenthesized |
1991 | sub-expression, taken from REGS[N]. |
1992 `-------------------------------------------------------------------------*/
1994 static int substitute_warned = 0;
1996 static void
1997 substitute (struct obstack *obs, const char *victim, const char *repl,
1998 struct re_registers *regs)
2000 int ch;
2002 for (;;)
2004 while ((ch = *repl++) != '\\')
2006 if (ch == '\0')
2007 return;
2008 obstack_1grow (obs, ch);
2011 switch ((ch = *repl++))
2013 case '0':
2014 if (!substitute_warned)
2016 M4ERROR ((warning_status, 0, "\
2017 Warning: \\0 will disappear, use \\& instead in replacements"));
2018 substitute_warned = 1;
2020 /* Fall through. */
2022 case '&':
2023 if (regs)
2024 obstack_grow (obs, victim + regs->start[0],
2025 regs->end[0] - regs->start[0]);
2026 break;
2028 case '1': case '2': case '3': case '4': case '5': case '6':
2029 case '7': case '8': case '9':
2030 ch -= '0';
2031 if (!regs || regs->num_regs - 1 <= ch)
2032 M4ERROR ((warning_status, 0,
2033 "Warning: sub-expression %d not present", ch));
2034 else if (regs->end[ch] > 0)
2035 obstack_grow (obs, victim + regs->start[ch],
2036 regs->end[ch] - regs->start[ch]);
2037 break;
2039 case '\0':
2040 M4ERROR ((warning_status, 0,
2041 "Warning: trailing \\ ignored in replacement"));
2042 return;
2044 default:
2045 obstack_1grow (obs, ch);
2046 break;
2051 /*------------------------------------------.
2052 | Initialize regular expression variables. |
2053 `------------------------------------------*/
2055 void
2056 init_pattern_buffer (struct re_pattern_buffer *buf, struct re_registers *regs)
2058 buf->translate = NULL;
2059 buf->fastmap = NULL;
2060 buf->buffer = NULL;
2061 buf->allocated = 0;
2062 if (regs)
2064 regs->start = NULL;
2065 regs->end = NULL;
2069 /*------------------------------------------------------------------.
2070 | Regular expression version of index. Given two arguments, expand |
2071 | to the index of the first match of the second argument (a regexp) |
2072 | in the first. Expand to -1 if there is no match. Given a third |
2073 | argument, a match is substituted according to this argument. |
2074 `------------------------------------------------------------------*/
2076 static void
2077 m4_regexp (struct obstack *obs, int argc, token_data **argv)
2079 const char *victim; /* first argument */
2080 const char *regexp; /* regular expression */
2081 const char *repl; /* replacement string */
2083 struct re_pattern_buffer *buf;/* compiled regular expression */
2084 struct re_registers *regs; /* for subexpression matches */
2085 const char *msg; /* error message from re_compile_pattern */
2086 int startpos; /* start position of match */
2087 int length; /* length of first argument */
2089 if (bad_argc (argv[0], argc, 3, 4))
2091 /* builtin(`regexp') is blank, but regexp(`abc') is 0. */
2092 if (argc == 2)
2093 shipout_int (obs, 0);
2094 return;
2097 victim = ARG (1);
2098 regexp = ARG (2);
2099 repl = ARG (3);
2101 if (!*regexp)
2103 /* The empty regex matches everything! */
2104 if (argc == 3)
2105 shipout_int (obs, 0);
2106 else
2107 substitute (obs, victim, repl, NULL);
2108 return;
2111 #ifdef DEBUG_REGEX
2112 if (trace_file)
2113 xfprintf (trace_file, "r:{%s}:%s%s%s\n", regexp,
2114 argc == 3 ? "" : "{", repl, argc == 3 ? "" : "}");
2115 #endif /* DEBUG_REGEX */
2117 msg = compile_pattern (regexp, strlen (regexp), &buf, &regs);
2118 if (msg != NULL)
2120 M4ERROR ((warning_status, 0,
2121 "bad regular expression: `%s': %s", regexp, msg));
2122 return;
2125 length = strlen (victim);
2126 /* Avoid overhead of allocating regs if we won't use it. */
2127 startpos = re_search (buf, victim, length, 0, length,
2128 argc == 3 ? NULL : regs);
2130 if (startpos == -2)
2131 M4ERROR ((warning_status, 0,
2132 "error matching regular expression `%s'", regexp));
2133 else if (argc == 3)
2134 shipout_int (obs, startpos);
2135 else if (startpos >= 0)
2136 substitute (obs, victim, repl, regs);
2139 /*------------------------------------------------------------------.
2140 | Substitute all matches of a regexp occurring in a string. Each |
2141 | match of the second argument (a regexp) in the first argument is |
2142 | changed to the third argument, with \& substituted by the matched |
2143 | text, and \N substituted by the text matched by the Nth |
2144 | parenthesized sub-expression. |
2145 `------------------------------------------------------------------*/
2147 static void
2148 m4_patsubst (struct obstack *obs, int argc, token_data **argv)
2150 const char *victim; /* first argument */
2151 const char *regexp; /* regular expression */
2152 const char *repl;
2154 struct re_pattern_buffer *buf;/* compiled regular expression */
2155 struct re_registers *regs; /* for subexpression matches */
2156 const char *msg; /* error message from re_compile_pattern */
2157 int matchpos; /* start position of match */
2158 int offset; /* current match offset */
2159 int length; /* length of first argument */
2161 if (bad_argc (argv[0], argc, 3, 4))
2163 /* builtin(`patsubst') is blank, but patsubst(`abc') is abc. */
2164 if (argc == 2)
2165 obstack_grow (obs, ARG (1), strlen (ARG (1)));
2166 return;
2169 victim = ARG (1);
2170 regexp = ARG (2);
2171 repl = ARG (3);
2173 /* The empty regex matches everywhere, but if there is no
2174 replacement, we need not waste time with it. */
2175 if (!*regexp && !*repl)
2177 obstack_grow (obs, victim, strlen (victim));
2178 return;
2181 #ifdef DEBUG_REGEX
2182 if (trace_file)
2183 xfprintf (trace_file, "p:{%s}:{%s}\n", regexp, repl);
2184 #endif /* DEBUG_REGEX */
2186 msg = compile_pattern (regexp, strlen (regexp), &buf, &regs);
2187 if (msg != NULL)
2189 M4ERROR ((warning_status, 0,
2190 "bad regular expression `%s': %s", regexp, msg));
2191 return;
2194 length = strlen (victim);
2196 offset = 0;
2197 matchpos = 0;
2198 while (offset <= length)
2200 matchpos = re_search (buf, victim, length,
2201 offset, length - offset, regs);
2202 if (matchpos < 0)
2205 /* Match failed -- either error or there is no match in the
2206 rest of the string, in which case the rest of the string is
2207 copied verbatim. */
2209 if (matchpos == -2)
2210 M4ERROR ((warning_status, 0,
2211 "error matching regular expression `%s'", regexp));
2212 else if (offset < length)
2213 obstack_grow (obs, victim + offset, length - offset);
2214 break;
2217 /* Copy the part of the string that was skipped by re_search (). */
2219 if (matchpos > offset)
2220 obstack_grow (obs, victim + offset, matchpos - offset);
2222 /* Handle the part of the string that was covered by the match. */
2224 substitute (obs, victim, repl, regs);
2226 /* Update the offset to the end of the match. If the regexp
2227 matched a null string, advance offset one more, to avoid
2228 infinite loops. */
2230 offset = regs->end[0];
2231 if (regs->start[0] == regs->end[0])
2232 obstack_1grow (obs, victim[offset++]);
2234 obstack_1grow (obs, '\0');
2237 /* Finally, a placeholder builtin. This builtin is not installed by
2238 default, but when reading back frozen files, this is associated
2239 with any builtin we don't recognize (for example, if the frozen
2240 file was created with a changeword capable m4, but is then loaded
2241 by a different m4 that does not support changeword). This way, we
2242 can keep 'm4 -R' quiet in the common case that the user did not
2243 know or care about the builtin when the frozen file was created,
2244 while still flagging it as a potential error if an attempt is made
2245 to actually use the builtin. */
2247 /*--------------------------------------------------------------------.
2248 | Issue a warning that this macro is a placeholder for an unsupported |
2249 | builtin that was requested while reloading a frozen file. |
2250 `--------------------------------------------------------------------*/
2252 void
2253 m4_placeholder (struct obstack *obs, int argc, token_data **argv)
2255 M4ERROR ((warning_status, 0, "\
2256 builtin `%s' requested by frozen file is not supported", ARG (0)));
2259 /*-------------------------------------------------------------------------.
2260 | This function handles all expansion of user defined and predefined |
2261 | macros. It is called with an obstack OBS, where the macros expansion |
2262 | will be placed, as an unfinished object. SYM points to the macro |
2263 | definition, giving the expansion text. ARGC and ARGV are the arguments, |
2264 | as usual. |
2265 `-------------------------------------------------------------------------*/
2267 void
2268 expand_user_macro (struct obstack *obs, symbol *sym,
2269 int argc, token_data **argv)
2271 const char *text;
2272 int i;
2274 for (text = SYMBOL_TEXT (sym); *text != '\0';)
2276 if (*text != '$')
2278 obstack_1grow (obs, *text);
2279 text++;
2280 continue;
2282 text++;
2283 switch (*text)
2285 case '0': case '1': case '2': case '3': case '4':
2286 case '5': case '6': case '7': case '8': case '9':
2287 if (no_gnu_extensions)
2289 i = *text++ - '0';
2291 else
2293 for (i = 0; isdigit (to_uchar (*text)); text++)
2294 i = i*10 + (*text - '0');
2296 if (i < argc)
2297 obstack_grow (obs, ARG (i), strlen (ARG (i)));
2298 break;
2300 case '#': /* number of arguments */
2301 shipout_int (obs, argc - 1);
2302 text++;
2303 break;
2305 case '*': /* all arguments */
2306 case '@': /* ... same, but quoted */
2307 dump_args (obs, argc, argv, ",", *text == '@');
2308 text++;
2309 break;
2311 default:
2312 obstack_1grow (obs, '$');
2313 break;