Fix 'make distcheck'.
[m4/ericb.git] / src / builtin.c
blob0b7f5c137b279d21b80981efae0c063a9c2d3538
1 /* GNU m4 -- A simple macro processor
3 Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2000, 2004, 2006, 2007
4 Free Software Foundation, Inc.
6 This file is part of GNU M4.
8 GNU M4 is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 GNU M4 is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 /* Code for all builtin macros, initialization of symbol table, and
23 expansion of user defined macros. */
25 #include "m4.h"
27 extern FILE *popen ();
29 #include "regex.h"
31 #if HAVE_SYS_WAIT_H
32 # include <sys/wait.h>
33 #endif
35 #define ARG(i) (argc > (i) ? TOKEN_DATA_TEXT (argv[i]) : "")
37 /* Initialization of builtin and predefined macros. The table
38 "builtin_tab" is both used for initialization, and by the "builtin"
39 builtin. */
41 #define DECLARE(name) \
42 static void name (struct obstack *, int, token_data **)
44 DECLARE (m4___file__);
45 DECLARE (m4___line__);
46 DECLARE (m4___program__);
47 DECLARE (m4_builtin);
48 DECLARE (m4_changecom);
49 DECLARE (m4_changequote);
50 #ifdef ENABLE_CHANGEWORD
51 DECLARE (m4_changeword);
52 #endif
53 DECLARE (m4_debugmode);
54 DECLARE (m4_debugfile);
55 DECLARE (m4_decr);
56 DECLARE (m4_define);
57 DECLARE (m4_defn);
58 DECLARE (m4_divert);
59 DECLARE (m4_divnum);
60 DECLARE (m4_dnl);
61 DECLARE (m4_dumpdef);
62 DECLARE (m4_errprint);
63 DECLARE (m4_esyscmd);
64 DECLARE (m4_eval);
65 DECLARE (m4_format);
66 DECLARE (m4_ifdef);
67 DECLARE (m4_ifelse);
68 DECLARE (m4_include);
69 DECLARE (m4_incr);
70 DECLARE (m4_index);
71 DECLARE (m4_indir);
72 DECLARE (m4_len);
73 DECLARE (m4_m4exit);
74 DECLARE (m4_m4wrap);
75 DECLARE (m4_maketemp);
76 DECLARE (m4_mkstemp);
77 DECLARE (m4_patsubst);
78 DECLARE (m4_popdef);
79 DECLARE (m4_pushdef);
80 DECLARE (m4_regexp);
81 DECLARE (m4_shift);
82 DECLARE (m4_sinclude);
83 DECLARE (m4_substr);
84 DECLARE (m4_syscmd);
85 DECLARE (m4_sysval);
86 DECLARE (m4_traceoff);
87 DECLARE (m4_traceon);
88 DECLARE (m4_translit);
89 DECLARE (m4_undefine);
90 DECLARE (m4_undivert);
92 #undef DECLARE
94 static builtin
95 builtin_tab[] =
98 /* name GNUext macros blind function */
100 { "__file__", true, false, false, m4___file__ },
101 { "__line__", true, false, false, m4___line__ },
102 { "__program__", true, false, false, m4___program__ },
103 { "builtin", true, true, true, m4_builtin },
104 { "changecom", false, false, false, m4_changecom },
105 { "changequote", false, false, false, m4_changequote },
106 #ifdef ENABLE_CHANGEWORD
107 { "changeword", true, false, true, m4_changeword },
108 #endif
109 { "debugmode", true, false, false, m4_debugmode },
110 { "debugfile", true, false, false, m4_debugfile },
111 { "decr", false, false, true, m4_decr },
112 { "define", false, true, true, m4_define },
113 { "defn", false, false, true, m4_defn },
114 { "divert", false, false, false, m4_divert },
115 { "divnum", false, false, false, m4_divnum },
116 { "dnl", false, false, false, m4_dnl },
117 { "dumpdef", false, false, false, m4_dumpdef },
118 { "errprint", false, false, true, m4_errprint },
119 { "esyscmd", true, false, true, m4_esyscmd },
120 { "eval", false, false, true, m4_eval },
121 { "format", true, false, true, m4_format },
122 { "ifdef", false, false, true, m4_ifdef },
123 { "ifelse", false, false, true, m4_ifelse },
124 { "include", false, false, true, m4_include },
125 { "incr", false, false, true, m4_incr },
126 { "index", false, false, true, m4_index },
127 { "indir", true, true, true, m4_indir },
128 { "len", false, false, true, m4_len },
129 { "m4exit", false, false, false, m4_m4exit },
130 { "m4wrap", false, false, true, m4_m4wrap },
131 { "maketemp", false, false, true, m4_maketemp },
132 { "mkstemp", false, false, true, m4_mkstemp },
133 { "patsubst", true, false, true, m4_patsubst },
134 { "popdef", false, false, true, m4_popdef },
135 { "pushdef", false, true, true, m4_pushdef },
136 { "regexp", true, false, true, m4_regexp },
137 { "shift", false, false, true, m4_shift },
138 { "sinclude", false, false, true, m4_sinclude },
139 { "substr", false, false, true, m4_substr },
140 { "syscmd", false, false, true, m4_syscmd },
141 { "sysval", false, false, false, m4_sysval },
142 { "traceoff", false, false, false, m4_traceoff },
143 { "traceon", false, false, false, m4_traceon },
144 { "translit", false, false, true, m4_translit },
145 { "undefine", false, false, true, m4_undefine },
146 { "undivert", false, false, false, m4_undivert },
148 { 0, false, false, false, 0 },
150 /* placeholder is intentionally stuck after the table end delimiter,
151 so that we can easily find it, while not treating it as a real
152 builtin. */
153 { "placeholder", true, false, false, m4_placeholder },
156 static predefined const
157 predefined_tab[] =
159 #if UNIX
160 { "unix", "__unix__", "" },
161 #elif W32_NATIVE
162 { "windows", "__windows__", "" },
163 #elif OS2
164 { "os2", "__os2__", "" },
165 #else
166 # warning Platform macro not provided
167 #endif
168 { NULL, "__gnu__", "" },
170 { NULL, NULL, NULL },
173 /*----------------------------------------.
174 | Find the builtin, which lives on ADDR. |
175 `----------------------------------------*/
177 const builtin *
178 find_builtin_by_addr (builtin_func *func)
180 const builtin *bp;
182 for (bp = &builtin_tab[0]; bp->name != NULL; bp++)
183 if (bp->func == func)
184 return bp;
185 if (func == m4_placeholder)
186 return bp + 1;
187 return NULL;
190 /*----------------------------------------------------------.
191 | Find the builtin, which has NAME. On failure, return the |
192 | placeholder builtin. |
193 `----------------------------------------------------------*/
195 const builtin *
196 find_builtin_by_name (const char *name)
198 const builtin *bp;
200 for (bp = &builtin_tab[0]; bp->name != NULL; bp++)
201 if (strcmp (bp->name, name) == 0)
202 return bp;
203 return bp + 1;
206 /*-------------------------------------------------------------------------.
207 | Install a builtin macro with name NAME, bound to the C function given in |
208 | BP. MODE is SYMBOL_INSERT or SYMBOL_PUSHDEF. TRACED defines whether |
209 | NAME is to be traced. |
210 `-------------------------------------------------------------------------*/
212 void
213 define_builtin (const char *name, const builtin *bp, symbol_lookup mode)
215 symbol *sym;
217 sym = lookup_symbol (name, mode);
218 SYMBOL_TYPE (sym) = TOKEN_FUNC;
219 SYMBOL_MACRO_ARGS (sym) = bp->groks_macro_args;
220 SYMBOL_BLIND_NO_ARGS (sym) = bp->blind_if_no_args;
221 SYMBOL_FUNC (sym) = bp->func;
224 /* Storage for the compiled regular expression of
225 --warn-macro-sequence. */
226 static struct re_pattern_buffer macro_sequence_buf;
228 /* Storage for the matches of --warn-macro-sequence. */
229 static struct re_registers macro_sequence_regs;
231 /* True if --warn-macro-sequence is in effect. */
232 static bool macro_sequence_inuse;
234 /* Maybe this is worth making runtime tunable. Too small, and nothing
235 gets cached because the working set of active regex is larger than
236 the cache, and we are always swapping out entries. Too large, and
237 the time spent searching the cache for a match overtakes the time
238 saved by caching. For now, this size proved reasonable for the
239 typical working set of Autoconf 2.62. */
240 #define REGEX_CACHE_SIZE 16
242 /* Structure for caching compiled regex. */
243 struct m4_regex {
244 unsigned count; /* usage counter */
245 size_t len; /* length of string */
246 char *str; /* copy of compiled string */
247 struct re_pattern_buffer *buf; /* compiled regex, allocated */
248 struct re_registers regs; /* match registers, reused */
250 typedef struct m4_regex m4_regex;
252 /* Storage for the cache of regular expressions. */
253 static m4_regex regex_cache[REGEX_CACHE_SIZE];
255 #ifdef DEBUG_REGEX
256 extern FILE *trace_file;
257 #endif /* DEBUG_REGEX */
259 /*------------------------------------------------------------------.
260 | Compile STR, with length LEN, into a regex. On success, set BUF |
261 | and REGS to the compiled regex. Compilation is cached, so do not |
262 | free the results here; rather, use free_regex at the end of the |
263 | program. Return NULL on success, or an error message. |
264 `------------------------------------------------------------------*/
265 static const char *
266 compile_pattern (const char *str, size_t len, struct re_pattern_buffer **buf,
267 struct re_registers **regs)
269 int i;
270 m4_regex *victim;
271 unsigned victim_count;
272 struct re_pattern_buffer *new_buf;
273 struct re_registers *new_regs;
274 const char *msg;
276 /* First, check if STR is already cached. If so, increase its use
277 count and return it. */
278 for (i = 0; i < REGEX_CACHE_SIZE; i++)
279 if (len == regex_cache[i].len && regex_cache[i].str
280 && memcmp (str, regex_cache[i].str, len) == 0)
282 *buf = regex_cache[i].buf;
283 *regs = &regex_cache[i].regs;
284 regex_cache[i].count++;
285 #ifdef DEBUG_REGEX
286 if (trace_file)
287 xfprintf (trace_file, "cached:{%s}\n", str);
288 #endif /* DEBUG_REGEX */
289 return NULL;
292 /* Next, check if STR can be compiled. */
293 new_buf = xzalloc (sizeof *new_buf);
294 msg = re_compile_pattern (str, len, new_buf);
295 #ifdef DEBUG_REGEX
296 if (trace_file)
297 xfprintf (trace_file, "compile:{%s}\n", str);
298 #endif /* DEBUG_REGEX */
299 if (msg)
301 regfree (new_buf);
302 free (new_buf);
303 return msg;
306 /* Now, find a victim slot. Decrease the count of all entries, then
307 prime the count of the victim slot at REGEX_CACHE_SIZE. This
308 way, frequently used entries and newly created entries are least
309 likely to be victims next time we have a cache miss. */
310 victim = regex_cache;
311 victim_count = victim->count;
312 if (victim_count)
313 victim->count--;
314 for (i = 1; i < REGEX_CACHE_SIZE; i++)
316 if (regex_cache[i].count < victim_count)
318 victim_count = regex_cache[i].count;
319 victim = &regex_cache[i];
321 if (regex_cache[i].count)
322 regex_cache[i].count--;
324 victim->count = REGEX_CACHE_SIZE;
325 victim->len = len;
326 if (victim->str)
328 #ifdef DEBUG_REGEX
329 if (trace_file)
330 xfprintf (trace_file, "flush:{%s}\n", victim->str);
331 #endif /* DEBUG_REGEX */
332 free (victim->str);
333 regfree (victim->buf);
334 free (victim->buf);
336 victim->str = xstrdup (str);
337 victim->buf = new_buf;
338 new_regs = &victim->regs;
339 re_set_registers (new_buf, new_regs, new_regs->num_regs,
340 new_regs->start, new_regs->end);
341 *buf = new_buf;
342 *regs = new_regs;
343 return NULL;
346 /*----------------------------------------.
347 | Clean up regular expression variables. |
348 `----------------------------------------*/
350 static void
351 free_pattern_buffer (struct re_pattern_buffer *buf, struct re_registers *regs)
353 regfree (buf);
354 free (regs->start);
355 free (regs->end);
358 /*-----------------------------------------------------------------.
359 | Set the regular expression of --warn-macro-sequence that will be |
360 | checked during define and pushdef. Exit on failure. |
361 `-----------------------------------------------------------------*/
362 void
363 set_macro_sequence (const char *regexp)
365 const char *msg;
367 if (! regexp)
368 regexp = DEFAULT_MACRO_SEQUENCE;
369 else if (regexp[0] == '\0')
371 macro_sequence_inuse = false;
372 return;
375 msg = re_compile_pattern (regexp, strlen (regexp), &macro_sequence_buf);
376 if (msg != NULL)
378 M4ERROR ((EXIT_FAILURE, 0,
379 "--warn-macro-sequence: bad regular expression `%s': %s",
380 regexp, msg));
382 re_set_registers (&macro_sequence_buf, &macro_sequence_regs,
383 macro_sequence_regs.num_regs,
384 macro_sequence_regs.start, macro_sequence_regs.end);
385 macro_sequence_inuse = true;
388 /*------------------------------------------------------.
389 | Free dynamic memory utilized by regular expressions. |
390 `------------------------------------------------------*/
391 void
392 free_regex (void)
394 int i;
395 free_pattern_buffer (&macro_sequence_buf, &macro_sequence_regs);
396 for (i = 0; i < REGEX_CACHE_SIZE; i++)
397 if (regex_cache[i].str)
399 free (regex_cache[i].str);
400 free_pattern_buffer (regex_cache[i].buf, &regex_cache[i].regs);
401 free (regex_cache[i].buf);
405 /*-------------------------------------------------------------------------.
406 | Define a predefined or user-defined macro, with name NAME, and expansion |
407 | TEXT. MODE destinguishes between the "define" and the "pushdef" case. |
408 | It is also used from main (). |
409 `-------------------------------------------------------------------------*/
411 void
412 define_user_macro (const char *name, const char *text, symbol_lookup mode)
414 symbol *s;
415 char *defn = xstrdup (text ? text : "");
417 s = lookup_symbol (name, mode);
418 if (SYMBOL_TYPE (s) == TOKEN_TEXT)
419 free (SYMBOL_TEXT (s));
421 SYMBOL_TYPE (s) = TOKEN_TEXT;
422 SYMBOL_TEXT (s) = defn;
424 /* Implement --warn-macro-sequence. */
425 if (macro_sequence_inuse && text)
427 regoff_t offset = 0;
428 size_t len = strlen (defn);
430 while ((offset = re_search (&macro_sequence_buf, defn, len, offset,
431 len - offset, &macro_sequence_regs)) >= 0)
433 /* Skip empty matches. */
434 if (macro_sequence_regs.start[0] == macro_sequence_regs.end[0])
435 offset++;
436 else
438 char tmp;
439 offset = macro_sequence_regs.end[0];
440 tmp = defn[offset];
441 defn[offset] = '\0';
442 M4ERROR ((warning_status, 0,
443 "Warning: definition of `%s' contains sequence `%s'",
444 name, defn + macro_sequence_regs.start[0]));
445 defn[offset] = tmp;
448 if (offset == -2)
449 M4ERROR ((warning_status, 0,
450 "error checking --warn-macro-sequence for macro `%s'",
451 name));
455 /*-----------------------------------------------.
456 | Initialize all builtin and predefined macros. |
457 `-----------------------------------------------*/
459 void
460 builtin_init (void)
462 const builtin *bp;
463 const predefined *pp;
464 char *string;
466 for (bp = &builtin_tab[0]; bp->name != NULL; bp++)
467 if (!no_gnu_extensions || !bp->gnu_extension)
469 if (prefix_all_builtins)
471 string = (char *) xmalloc (strlen (bp->name) + 4);
472 strcpy (string, "m4_");
473 strcat (string, bp->name);
474 define_builtin (string, bp, SYMBOL_INSERT);
475 free (string);
477 else
478 define_builtin (bp->name, bp, SYMBOL_INSERT);
481 for (pp = &predefined_tab[0]; pp->func != NULL; pp++)
482 if (no_gnu_extensions)
484 if (pp->unix_name != NULL)
485 define_user_macro (pp->unix_name, pp->func, SYMBOL_INSERT);
487 else
489 if (pp->gnu_name != NULL)
490 define_user_macro (pp->gnu_name, pp->func, SYMBOL_INSERT);
494 /*------------------------------------------------------------------------.
495 | Give friendly warnings if a builtin macro is passed an inappropriate |
496 | number of arguments. NAME is macro name for messages, ARGC is actual |
497 | number of arguments, MIN is the minimum number of acceptable arguments, |
498 | negative if not applicable, MAX is the maximum number, negative if not |
499 | applicable. |
500 `------------------------------------------------------------------------*/
502 static bool
503 bad_argc (token_data *name, int argc, int min, int max)
505 bool isbad = false;
507 if (min > 0 && argc < min)
509 if (!suppress_warnings)
510 M4ERROR ((warning_status, 0,
511 "Warning: too few arguments to builtin `%s'",
512 TOKEN_DATA_TEXT (name)));
513 isbad = true;
515 else if (max > 0 && argc > max && !suppress_warnings)
516 M4ERROR ((warning_status, 0,
517 "Warning: excess arguments to builtin `%s' ignored",
518 TOKEN_DATA_TEXT (name)));
520 return isbad;
523 /*--------------------------------------------------------------------------.
524 | The function numeric_arg () converts ARG to an int pointed to by VALUEP. |
525 | If the conversion fails, print error message for macro MACRO. Return |
526 | true iff conversion succeeds. |
527 `--------------------------------------------------------------------------*/
529 static bool
530 numeric_arg (token_data *macro, const char *arg, int *valuep)
532 char *endp;
534 if (*arg == '\0')
536 *valuep = 0;
537 M4ERROR ((warning_status, 0,
538 "empty string treated as 0 in builtin `%s'",
539 TOKEN_DATA_TEXT (macro)));
541 else
543 errno = 0;
544 *valuep = strtol (arg, &endp, 10);
545 if (*endp != '\0')
547 M4ERROR ((warning_status, 0,
548 "non-numeric argument to builtin `%s'",
549 TOKEN_DATA_TEXT (macro)));
550 return false;
552 if (isspace (to_uchar (*arg)))
553 M4ERROR ((warning_status, 0,
554 "leading whitespace ignored in builtin `%s'",
555 TOKEN_DATA_TEXT (macro)));
556 else if (errno == ERANGE)
557 M4ERROR ((warning_status, 0,
558 "numeric overflow detected in builtin `%s'",
559 TOKEN_DATA_TEXT (macro)));
561 return true;
564 /*------------------------------------------------------------------------.
565 | The function ntoa () converts VALUE to a signed ascii representation in |
566 | radix RADIX. |
567 `------------------------------------------------------------------------*/
569 /* Digits for number to ascii conversions. */
570 static char const digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";
572 const char *
573 ntoa (int32_t value, int radix)
575 bool negative;
576 uint32_t uvalue;
577 static char str[256];
578 char *s = &str[sizeof str];
580 *--s = '\0';
582 if (value < 0)
584 negative = true;
585 uvalue = -(uint32_t) value;
587 else
589 negative = false;
590 uvalue = (uint32_t) value;
595 *--s = digits[uvalue % radix];
596 uvalue /= radix;
598 while (uvalue > 0);
600 if (negative)
601 *--s = '-';
602 return s;
605 /*----------------------------------------------------------------------.
606 | Format an int VAL, and stuff it into an obstack OBS. Used for macros |
607 | expanding to numbers. |
608 `----------------------------------------------------------------------*/
610 static void
611 shipout_int (struct obstack *obs, int val)
613 const char *s;
615 s = ntoa ((int32_t) val, 10);
616 obstack_grow (obs, s, strlen (s));
619 /*----------------------------------------------------------------------.
620 | Print ARGC arguments from the table ARGV to obstack OBS, separated by |
621 | SEP, and quoted by the current quotes, if QUOTED is true. |
622 `----------------------------------------------------------------------*/
624 static void
625 dump_args (struct obstack *obs, int argc, token_data **argv,
626 const char *sep, bool quoted)
628 int i;
629 size_t len = strlen (sep);
631 for (i = 1; i < argc; i++)
633 if (i > 1)
634 obstack_grow (obs, sep, len);
635 if (quoted)
636 obstack_grow (obs, lquote.string, lquote.length);
637 obstack_grow (obs, TOKEN_DATA_TEXT (argv[i]),
638 strlen (TOKEN_DATA_TEXT (argv[i])));
639 if (quoted)
640 obstack_grow (obs, rquote.string, rquote.length);
644 /* The rest of this file is code for builtins and expansion of user
645 defined macros. All the functions for builtins have a prototype as:
647 void m4_MACRONAME (struct obstack *obs, int argc, char *argv[]);
649 The function are expected to leave their expansion on the obstack OBS,
650 as an unfinished object. ARGV is a table of ARGC pointers to the
651 individual arguments to the macro. Please note that in general
652 argv[argc] != NULL. */
654 /* The first section are macros for definining, undefining, examining,
655 changing, ... other macros. */
657 /*-------------------------------------------------------------------------.
658 | The function define_macro is common for the builtins "define", |
659 | "undefine", "pushdef" and "popdef". ARGC and ARGV is as for the caller, |
660 | and MODE argument determines how the macro name is entered into the |
661 | symbol table. |
662 `-------------------------------------------------------------------------*/
664 static void
665 define_macro (int argc, token_data **argv, symbol_lookup mode)
667 const builtin *bp;
669 if (bad_argc (argv[0], argc, 2, 3))
670 return;
672 if (TOKEN_DATA_TYPE (argv[1]) != TOKEN_TEXT)
674 M4ERROR ((warning_status, 0,
675 "Warning: %s: invalid macro name ignored", ARG (0)));
676 return;
679 if (argc == 2)
681 define_user_macro (ARG (1), "", mode);
682 return;
685 switch (TOKEN_DATA_TYPE (argv[2]))
687 case TOKEN_TEXT:
688 define_user_macro (ARG (1), ARG (2), mode);
689 break;
691 case TOKEN_FUNC:
692 bp = find_builtin_by_addr (TOKEN_DATA_FUNC (argv[2]));
693 if (bp == NULL)
694 return;
695 else
696 define_builtin (ARG (1), bp, mode);
697 break;
699 default:
700 M4ERROR ((warning_status, 0,
701 "INTERNAL ERROR: bad token data type in define_macro ()"));
702 abort ();
706 static void
707 m4_define (struct obstack *obs, int argc, token_data **argv)
709 define_macro (argc, argv, SYMBOL_INSERT);
712 static void
713 m4_undefine (struct obstack *obs, int argc, token_data **argv)
715 int i;
716 if (bad_argc (argv[0], argc, 2, -1))
717 return;
718 for (i = 1; i < argc; i++)
719 lookup_symbol (ARG (i), SYMBOL_DELETE);
722 static void
723 m4_pushdef (struct obstack *obs, int argc, token_data **argv)
725 define_macro (argc, argv, SYMBOL_PUSHDEF);
728 static void
729 m4_popdef (struct obstack *obs, int argc, token_data **argv)
731 int i;
732 if (bad_argc (argv[0], argc, 2, -1))
733 return;
734 for (i = 1; i < argc; i++)
735 lookup_symbol (ARG (i), SYMBOL_POPDEF);
738 /*---------------------.
739 | Conditionals of m4. |
740 `---------------------*/
742 static void
743 m4_ifdef (struct obstack *obs, int argc, token_data **argv)
745 symbol *s;
746 const char *result;
748 if (bad_argc (argv[0], argc, 3, 4))
749 return;
750 s = lookup_symbol (ARG (1), SYMBOL_LOOKUP);
752 if (s != NULL && SYMBOL_TYPE (s) != TOKEN_VOID)
753 result = ARG (2);
754 else if (argc >= 4)
755 result = ARG (3);
756 else
757 result = NULL;
759 if (result != NULL)
760 obstack_grow (obs, result, strlen (result));
763 static void
764 m4_ifelse (struct obstack *obs, int argc, token_data **argv)
766 const char *result;
767 token_data *argv0;
769 if (argc == 2)
770 return;
772 if (bad_argc (argv[0], argc, 4, -1))
773 return;
774 else
775 /* Diagnose excess arguments if 5, 8, 11, etc., actual arguments. */
776 bad_argc (argv[0], (argc + 2) % 3, -1, 1);
778 argv0 = argv[0];
779 argv++;
780 argc--;
782 result = NULL;
783 while (result == NULL)
785 if (strcmp (ARG (0), ARG (1)) == 0)
786 result = ARG (2);
788 else
789 switch (argc)
791 case 3:
792 return;
794 case 4:
795 case 5:
796 result = ARG (3);
797 break;
799 default:
800 argc -= 3;
801 argv += 3;
804 obstack_grow (obs, result, strlen (result));
807 /*---------------------------------------------------------------------.
808 | The function dump_symbol () is for use by "dumpdef". It builds up a |
809 | table of all defined, un-shadowed, symbols. |
810 `---------------------------------------------------------------------*/
812 /* The structure dump_symbol_data is used to pass the information needed
813 from call to call to dump_symbol. */
815 struct dump_symbol_data
817 struct obstack *obs; /* obstack for table */
818 symbol **base; /* base of table */
819 int size; /* size of table */
822 static void
823 dump_symbol (symbol *sym, void *arg)
825 struct dump_symbol_data *data = (struct dump_symbol_data *) arg;
826 if (!SYMBOL_SHADOWED (sym) && SYMBOL_TYPE (sym) != TOKEN_VOID)
828 obstack_blank (data->obs, sizeof (symbol *));
829 data->base = (symbol **) obstack_base (data->obs);
830 data->base[data->size++] = sym;
834 /*------------------------------------------------------------------------.
835 | qsort comparison routine, for sorting the table made in m4_dumpdef (). |
836 `------------------------------------------------------------------------*/
838 static int
839 dumpdef_cmp (const void *s1, const void *s2)
841 return strcmp (SYMBOL_NAME (* (symbol *const *) s1),
842 SYMBOL_NAME (* (symbol *const *) s2));
845 /*-------------------------------------------------------------------------.
846 | Implementation of "dumpdef" itself. It builds up a table of pointers to |
847 | symbols, sorts it and prints the sorted table. |
848 `-------------------------------------------------------------------------*/
850 static void
851 m4_dumpdef (struct obstack *obs, int argc, token_data **argv)
853 symbol *s;
854 int i;
855 struct dump_symbol_data data;
856 const builtin *bp;
858 data.obs = obs;
859 data.base = (symbol **) obstack_base (obs);
860 data.size = 0;
862 if (argc == 1)
864 hack_all_symbols (dump_symbol, &data);
866 else
868 for (i = 1; i < argc; i++)
870 s = lookup_symbol (TOKEN_DATA_TEXT (argv[i]), SYMBOL_LOOKUP);
871 if (s != NULL && SYMBOL_TYPE (s) != TOKEN_VOID)
872 dump_symbol (s, &data);
873 else
874 M4ERROR ((warning_status, 0,
875 "undefined macro `%s'", TOKEN_DATA_TEXT (argv[i])));
879 /* Make table of symbols invisible to expand_macro (). */
881 obstack_finish (obs);
883 qsort (data.base, data.size, sizeof (symbol *), dumpdef_cmp);
885 for (; data.size > 0; --data.size, data.base++)
887 DEBUG_PRINT1 ("%s:\t", SYMBOL_NAME (data.base[0]));
889 switch (SYMBOL_TYPE (data.base[0]))
891 case TOKEN_TEXT:
892 if (debug_level & DEBUG_TRACE_QUOTE)
893 DEBUG_PRINT3 ("%s%s%s\n",
894 lquote.string, SYMBOL_TEXT (data.base[0]), rquote.string);
895 else
896 DEBUG_PRINT1 ("%s\n", SYMBOL_TEXT (data.base[0]));
897 break;
899 case TOKEN_FUNC:
900 bp = find_builtin_by_addr (SYMBOL_FUNC (data.base[0]));
901 if (bp == NULL)
903 M4ERROR ((warning_status, 0, "\
904 INTERNAL ERROR: builtin not found in builtin table"));
905 abort ();
907 DEBUG_PRINT1 ("<%s>\n", bp->name);
908 break;
910 default:
911 M4ERROR ((warning_status, 0,
912 "INTERNAL ERROR: bad token data type in m4_dumpdef ()"));
913 abort ();
914 break;
919 /*---------------------------------------------------------------------.
920 | The builtin "builtin" allows calls to builtin macros, even if their |
921 | definition has been overridden or shadowed. It is thus possible to |
922 | redefine builtins, and still access their original definition. This |
923 | macro is not available in compatibility mode. |
924 `---------------------------------------------------------------------*/
926 static void
927 m4_builtin (struct obstack *obs, int argc, token_data **argv)
929 const builtin *bp;
930 const char *name;
932 if (bad_argc (argv[0], argc, 2, -1))
933 return;
934 if (TOKEN_DATA_TYPE (argv[1]) != TOKEN_TEXT)
936 M4ERROR ((warning_status, 0,
937 "Warning: %s: invalid macro name ignored", ARG (0)));
938 return;
941 name = ARG (1);
942 bp = find_builtin_by_name (name);
943 if (bp->func == m4_placeholder)
944 M4ERROR ((warning_status, 0,
945 "undefined builtin `%s'", name));
946 else
948 int i;
949 if (! bp->groks_macro_args)
950 for (i = 2; i < argc; i++)
951 if (TOKEN_DATA_TYPE (argv[i]) != TOKEN_TEXT)
953 TOKEN_DATA_TYPE (argv[i]) = TOKEN_TEXT;
954 TOKEN_DATA_TEXT (argv[i]) = (char *) "";
956 bp->func (obs, argc - 1, argv + 1);
960 /*------------------------------------------------------------------------.
961 | The builtin "indir" allows indirect calls to macros, even if their name |
962 | is not a proper macro name. It is thus possible to define macros with |
963 | ill-formed names for internal use in larger macro packages. This macro |
964 | is not available in compatibility mode. |
965 `------------------------------------------------------------------------*/
967 static void
968 m4_indir (struct obstack *obs, int argc, token_data **argv)
970 symbol *s;
971 const char *name;
973 if (bad_argc (argv[0], argc, 2, -1))
974 return;
975 if (TOKEN_DATA_TYPE (argv[1]) != TOKEN_TEXT)
977 M4ERROR ((warning_status, 0,
978 "Warning: %s: invalid macro name ignored", ARG (0)));
979 return;
982 name = ARG (1);
983 s = lookup_symbol (name, SYMBOL_LOOKUP);
984 if (s == NULL || SYMBOL_TYPE (s) == TOKEN_VOID)
985 M4ERROR ((warning_status, 0,
986 "undefined macro `%s'", name));
987 else
989 int i;
990 if (! SYMBOL_MACRO_ARGS (s))
991 for (i = 2; i < argc; i++)
992 if (TOKEN_DATA_TYPE (argv[i]) != TOKEN_TEXT)
994 TOKEN_DATA_TYPE (argv[i]) = TOKEN_TEXT;
995 TOKEN_DATA_TEXT (argv[i]) = (char *) "";
997 call_macro (s, argc - 1, argv + 1, obs);
1001 /*-------------------------------------------------------------------------.
1002 | The macro "defn" returns the quoted definition of the macro named by the |
1003 | first argument. If the macro is builtin, it will push a special |
1004 | macro-definition token on the input stack. |
1005 `-------------------------------------------------------------------------*/
1007 static void
1008 m4_defn (struct obstack *obs, int argc, token_data **argv)
1010 symbol *s;
1011 builtin_func *b;
1012 int i;
1014 if (bad_argc (argv[0], argc, 2, -1))
1015 return;
1017 for (i = 1; i < argc; i++)
1019 s = lookup_symbol (ARG (i), SYMBOL_LOOKUP);
1020 if (s == NULL)
1021 continue;
1023 switch (SYMBOL_TYPE (s))
1025 case TOKEN_TEXT:
1026 obstack_grow (obs, lquote.string, lquote.length);
1027 obstack_grow (obs, SYMBOL_TEXT (s), strlen (SYMBOL_TEXT (s)));
1028 obstack_grow (obs, rquote.string, rquote.length);
1029 break;
1031 case TOKEN_FUNC:
1032 b = SYMBOL_FUNC (s);
1033 if (b == m4_placeholder)
1034 M4ERROR ((warning_status, 0, "\
1035 builtin `%s' requested by frozen file is not supported", ARG (i)));
1036 else if (argc != 2)
1037 M4ERROR ((warning_status, 0,
1038 "Warning: cannot concatenate builtin `%s'",
1039 ARG (i)));
1040 else
1041 push_macro (b);
1042 break;
1044 default:
1045 M4ERROR ((warning_status, 0,
1046 "INTERNAL ERROR: bad symbol type in m4_defn ()"));
1047 abort ();
1052 /*------------------------------------------------------------------------.
1053 | This section contains macros to handle the builtins "syscmd", "esyscmd" |
1054 | and "sysval". "esyscmd" is GNU specific. |
1055 `------------------------------------------------------------------------*/
1057 /* Helper macros for readability. */
1058 #if UNIX || defined WEXITSTATUS
1059 # define M4SYSVAL_EXITBITS(status) \
1060 (WIFEXITED (status) ? WEXITSTATUS (status) : 0)
1061 # define M4SYSVAL_TERMSIGBITS(status) \
1062 (WIFSIGNALED (status) ? WTERMSIG (status) << 8 : 0)
1064 #else /* ! UNIX && ! defined WEXITSTATUS */
1065 /* Platforms such as mingw do not support the notion of reporting
1066 which signal terminated a process. Furthermore if WEXITSTATUS was
1067 not provided, then the exit value is in the low eight bits. */
1068 # define M4SYSVAL_EXITBITS(status) status
1069 # define M4SYSVAL_TERMSIGBITS(status) 0
1070 #endif /* ! UNIX && ! defined WEXITSTATUS */
1072 /* Fallback definitions if <stdlib.h> or <sys/wait.h> are inadequate. */
1073 #ifndef WEXITSTATUS
1074 # define WEXITSTATUS(status) (((status) >> 8) & 0xff)
1075 #endif
1076 #ifndef WTERMSIG
1077 # define WTERMSIG(status) ((status) & 0x7f)
1078 #endif
1079 #ifndef WIFSIGNALED
1080 # define WIFSIGNALED(status) (WTERMSIG (status) != 0)
1081 #endif
1082 #ifndef WIFEXITED
1083 # define WIFEXITED(status) (WTERMSIG (status) == 0)
1084 #endif
1086 /* Exit code from last "syscmd" command. */
1087 static int sysval;
1089 static void
1090 m4_syscmd (struct obstack *obs, int argc, token_data **argv)
1092 if (bad_argc (argv[0], argc, 2, 2))
1094 /* The empty command is successful. */
1095 sysval = 0;
1096 return;
1099 debug_flush_files ();
1100 sysval = system (ARG (1));
1101 #if FUNC_SYSTEM_BROKEN
1102 /* OS/2 has a buggy system() that returns exit status in the lowest eight
1103 bits, although pclose() and WEXITSTATUS are defined to return exit
1104 status in the next eight bits. This approach can't detect signals, but
1105 at least syscmd(`ls') still works when stdout is a terminal. An
1106 alternate approach is popen/insert_file/pclose, but that makes stdout
1107 a pipe, which can change how some child processes behave. */
1108 if (sysval != -1)
1109 sysval <<= 8;
1110 #endif /* FUNC_SYSTEM_BROKEN */
1113 static void
1114 m4_esyscmd (struct obstack *obs, int argc, token_data **argv)
1116 FILE *pin;
1117 int ch;
1119 if (bad_argc (argv[0], argc, 2, 2))
1121 /* The empty command is successful. */
1122 sysval = 0;
1123 return;
1126 debug_flush_files ();
1127 errno = 0;
1128 pin = popen (ARG (1), "r");
1129 if (pin == NULL)
1131 M4ERROR ((warning_status, errno,
1132 "cannot open pipe to command `%s'", ARG (1)));
1133 sysval = -1;
1135 else
1137 while ((ch = getc (pin)) != EOF)
1138 obstack_1grow (obs, (char) ch);
1139 sysval = pclose (pin);
1143 static void
1144 m4_sysval (struct obstack *obs, int argc, token_data **argv)
1146 shipout_int (obs, (sysval == -1 ? 127
1147 : (M4SYSVAL_EXITBITS (sysval)
1148 | M4SYSVAL_TERMSIGBITS (sysval))));
1151 /*-------------------------------------------------------------------------.
1152 | This section contains the top level code for the "eval" builtin. The |
1153 | actual work is done in the function evaluate (), which lives in eval.c. |
1154 `-------------------------------------------------------------------------*/
1156 static void
1157 m4_eval (struct obstack *obs, int argc, token_data **argv)
1159 int32_t value = 0;
1160 int radix = 10;
1161 int min = 1;
1162 const char *s;
1164 if (bad_argc (argv[0], argc, 2, 4))
1165 return;
1167 if (*ARG (2) && !numeric_arg (argv[0], ARG (2), &radix))
1168 return;
1170 if (radix < 1 || radix > (int) strlen (digits))
1172 M4ERROR ((warning_status, 0,
1173 "radix %d in builtin `%s' out of range",
1174 radix, ARG (0)));
1175 return;
1178 if (argc >= 4 && !numeric_arg (argv[0], ARG (3), &min))
1179 return;
1180 if (min < 0)
1182 M4ERROR ((warning_status, 0,
1183 "negative width to builtin `%s'", ARG (0)));
1184 return;
1187 if (!*ARG (1))
1188 M4ERROR ((warning_status, 0,
1189 "empty string treated as 0 in builtin `%s'", ARG (0)));
1190 else if (evaluate (ARG (1), &value))
1191 return;
1193 if (radix == 1)
1195 if (value < 0)
1197 obstack_1grow (obs, '-');
1198 value = -value;
1200 /* This assumes 2's-complement for correctly handling INT_MIN. */
1201 while (min-- - value > 0)
1202 obstack_1grow (obs, '0');
1203 while (value-- != 0)
1204 obstack_1grow (obs, '1');
1205 obstack_1grow (obs, '\0');
1206 return;
1209 s = ntoa (value, radix);
1211 if (*s == '-')
1213 obstack_1grow (obs, '-');
1214 s++;
1216 for (min -= strlen (s); --min >= 0;)
1217 obstack_1grow (obs, '0');
1219 obstack_grow (obs, s, strlen (s));
1222 static void
1223 m4_incr (struct obstack *obs, int argc, token_data **argv)
1225 int value;
1227 if (bad_argc (argv[0], argc, 2, 2))
1228 return;
1230 if (!numeric_arg (argv[0], ARG (1), &value))
1231 return;
1233 shipout_int (obs, value + 1);
1236 static void
1237 m4_decr (struct obstack *obs, int argc, token_data **argv)
1239 int value;
1241 if (bad_argc (argv[0], argc, 2, 2))
1242 return;
1244 if (!numeric_arg (argv[0], ARG (1), &value))
1245 return;
1247 shipout_int (obs, value - 1);
1250 /* This section contains the macros "divert", "undivert" and "divnum" for
1251 handling diversion. The utility functions used lives in output.c. */
1253 /*-----------------------------------------------------------------------.
1254 | Divert further output to the diversion given by ARGV[1]. Out of range |
1255 | means discard further output. |
1256 `-----------------------------------------------------------------------*/
1258 static void
1259 m4_divert (struct obstack *obs, int argc, token_data **argv)
1261 int i = 0;
1263 if (bad_argc (argv[0], argc, 1, 2))
1264 return;
1266 if (argc >= 2 && !numeric_arg (argv[0], ARG (1), &i))
1267 return;
1269 make_diversion (i);
1272 /*-----------------------------------------------------.
1273 | Expand to the current diversion number, -1 if none. |
1274 `-----------------------------------------------------*/
1276 static void
1277 m4_divnum (struct obstack *obs, int argc, token_data **argv)
1279 if (bad_argc (argv[0], argc, 1, 1))
1280 return;
1281 shipout_int (obs, current_diversion);
1284 /*-----------------------------------------------------------------------.
1285 | Bring back the diversion given by the argument list. If none is |
1286 | specified, bring back all diversions. GNU specific is the option of |
1287 | undiverting named files, by passing a non-numeric argument to undivert |
1288 | (). |
1289 `-----------------------------------------------------------------------*/
1291 static void
1292 m4_undivert (struct obstack *obs, int argc, token_data **argv)
1294 int i, file;
1295 FILE *fp;
1296 char *endp;
1298 if (argc == 1)
1299 undivert_all ();
1300 else
1301 for (i = 1; i < argc; i++)
1303 file = strtol (ARG (i), &endp, 10);
1304 if (*endp == '\0' && !isspace (to_uchar (*ARG (i))))
1305 insert_diversion (file);
1306 else if (no_gnu_extensions)
1307 M4ERROR ((warning_status, 0,
1308 "non-numeric argument to builtin `%s'", ARG (0)));
1309 else
1311 fp = m4_path_search (ARG (i), NULL);
1312 if (fp != NULL)
1314 insert_file (fp);
1315 if (fclose (fp) == EOF)
1316 M4ERROR ((warning_status, errno,
1317 "error undiverting `%s'", ARG (i)));
1319 else
1320 M4ERROR ((warning_status, errno,
1321 "cannot undivert `%s'", ARG (i)));
1326 /* This section contains various macros, which does not fall into any
1327 specific group. These are "dnl", "shift", "changequote", "changecom"
1328 and "changeword". */
1330 /*------------------------------------------------------------------------.
1331 | Delete all subsequent whitespace from input. The function skip_line () |
1332 | lives in input.c. |
1333 `------------------------------------------------------------------------*/
1335 static void
1336 m4_dnl (struct obstack *obs, int argc, token_data **argv)
1338 if (bad_argc (argv[0], argc, 1, 1))
1339 return;
1341 skip_line ();
1344 /*-------------------------------------------------------------------------.
1345 | Shift all argument one to the left, discarding the first argument. Each |
1346 | output argument is quoted with the current quotes. |
1347 `-------------------------------------------------------------------------*/
1349 static void
1350 m4_shift (struct obstack *obs, int argc, token_data **argv)
1352 if (bad_argc (argv[0], argc, 2, -1))
1353 return;
1354 dump_args (obs, argc - 1, argv + 1, ",", true);
1357 /*--------------------------------------------------------------------------.
1358 | Change the current quotes. The function set_quotes () lives in input.c. |
1359 `--------------------------------------------------------------------------*/
1361 static void
1362 m4_changequote (struct obstack *obs, int argc, token_data **argv)
1364 if (bad_argc (argv[0], argc, 1, 3))
1365 return;
1367 /* Explicit NULL distinguishes between empty and missing argument. */
1368 set_quotes ((argc >= 2) ? TOKEN_DATA_TEXT (argv[1]) : NULL,
1369 (argc >= 3) ? TOKEN_DATA_TEXT (argv[2]) : NULL);
1372 /*--------------------------------------------------------------------.
1373 | Change the current comment delimiters. The function set_comment () |
1374 | lives in input.c. |
1375 `--------------------------------------------------------------------*/
1377 static void
1378 m4_changecom (struct obstack *obs, int argc, token_data **argv)
1380 if (bad_argc (argv[0], argc, 1, 3))
1381 return;
1383 /* Explicit NULL distinguishes between empty and missing argument. */
1384 set_comment ((argc >= 2) ? TOKEN_DATA_TEXT (argv[1]) : NULL,
1385 (argc >= 3) ? TOKEN_DATA_TEXT (argv[2]) : NULL);
1388 #ifdef ENABLE_CHANGEWORD
1390 /*-----------------------------------------------------------------------.
1391 | Change the regular expression used for breaking the input into words. |
1392 | The function set_word_regexp () lives in input.c. |
1393 `-----------------------------------------------------------------------*/
1395 static void
1396 m4_changeword (struct obstack *obs, int argc, token_data **argv)
1398 if (bad_argc (argv[0], argc, 2, 2))
1399 return;
1401 set_word_regexp (TOKEN_DATA_TEXT (argv[1]));
1404 #endif /* ENABLE_CHANGEWORD */
1406 /* This section contains macros for inclusion of other files -- "include"
1407 and "sinclude". This differs from bringing back diversions, in that
1408 the input is scanned before being copied to the output. */
1410 /*-------------------------------------------------------------------------.
1411 | Generic include function. Include the file given by the first argument, |
1412 | if it exists. Complain about inaccesible files iff SILENT is false. |
1413 `-------------------------------------------------------------------------*/
1415 static void
1416 include (int argc, token_data **argv, bool silent)
1418 FILE *fp;
1419 char *name;
1421 if (bad_argc (argv[0], argc, 2, 2))
1422 return;
1424 fp = m4_path_search (ARG (1), &name);
1425 if (fp == NULL)
1427 if (!silent)
1429 M4ERROR ((warning_status, errno, "cannot open `%s'", ARG (1)));
1430 retcode = EXIT_FAILURE;
1432 return;
1435 push_file (fp, name, true);
1436 free (name);
1439 /*------------------------------------------------.
1440 | Include a file, complaining in case of errors. |
1441 `------------------------------------------------*/
1443 static void
1444 m4_include (struct obstack *obs, int argc, token_data **argv)
1446 include (argc, argv, false);
1449 /*----------------------------------.
1450 | Include a file, ignoring errors. |
1451 `----------------------------------*/
1453 static void
1454 m4_sinclude (struct obstack *obs, int argc, token_data **argv)
1456 include (argc, argv, true);
1459 /* More miscellaneous builtins -- "maketemp", "errprint", "__file__",
1460 "__line__", and "__program__". The last three are GNU specific. */
1462 /*------------------------------------------------------------------.
1463 | Use the first argument as at template for a temporary file name. |
1464 `------------------------------------------------------------------*/
1466 /* Add trailing 'X' to NAME if necessary, securely create the file,
1467 and place the new file name on OBS. */
1468 static void
1469 mkstemp_helper (struct obstack *obs, const char *name)
1471 int fd;
1472 int len;
1473 int i;
1475 /* Guarantee that there are six trailing 'X' characters, even if the
1476 user forgot to supply them. */
1477 len = strlen (name);
1478 obstack_grow (obs, name, len);
1479 for (i = 0; len > 0 && i < 6; i++)
1480 if (name[--len] != 'X')
1481 break;
1482 for (; i < 6; i++)
1483 obstack_1grow (obs, 'X');
1484 obstack_1grow (obs, '\0');
1486 errno = 0;
1487 fd = mkstemp ((char *) obstack_base (obs));
1488 if (fd < 0)
1490 M4ERROR ((0, errno, "cannot create tempfile `%s'", name));
1491 obstack_free (obs, obstack_finish (obs));
1493 else
1494 close (fd);
1497 static void
1498 m4_maketemp (struct obstack *obs, int argc, token_data **argv)
1500 if (bad_argc (argv[0], argc, 2, 2))
1501 return;
1502 if (no_gnu_extensions)
1504 /* POSIX states "any trailing 'X' characters [are] replaced with
1505 the current process ID as a string", without referencing the
1506 file system. Horribly insecure, but we have to do it when we
1507 are in traditional mode.
1509 For reference, Solaris m4 does:
1510 maketemp() -> `'
1511 maketemp(X) -> `X'
1512 maketemp(XX) -> `Xn', where n is last digit of pid
1513 maketemp(XXXXXXXX) -> `X00nnnnn', where nnnnn is 16-bit pid
1515 const char *str = ARG (1);
1516 int len = strlen (str);
1517 int i;
1518 int len2;
1520 M4ERROR ((warning_status, 0, "recommend using mkstemp instead"));
1521 for (i = len; i > 1; i--)
1522 if (str[i - 1] != 'X')
1523 break;
1524 obstack_grow (obs, str, i);
1525 str = ntoa ((int32_t) getpid (), 10);
1526 len2 = strlen (str);
1527 if (len2 > len - i)
1528 obstack_grow0 (obs, str + len2 - (len - i), len - i);
1529 else
1531 while (i++ < len - len2)
1532 obstack_1grow (obs, '0');
1533 obstack_grow0 (obs, str, len2);
1536 else
1537 mkstemp_helper (obs, ARG (1));
1540 static void
1541 m4_mkstemp (struct obstack *obs, int argc, token_data **argv)
1543 if (bad_argc (argv[0], argc, 2, 2))
1544 return;
1545 mkstemp_helper (obs, ARG (1));
1548 /*----------------------------------------.
1549 | Print all arguments on standard error. |
1550 `----------------------------------------*/
1552 static void
1553 m4_errprint (struct obstack *obs, int argc, token_data **argv)
1555 if (bad_argc (argv[0], argc, 2, -1))
1556 return;
1557 dump_args (obs, argc, argv, " ", false);
1558 obstack_1grow (obs, '\0');
1559 debug_flush_files ();
1560 xfprintf (stderr, "%s", (char *) obstack_finish (obs));
1561 fflush (stderr);
1564 static void
1565 m4___file__ (struct obstack *obs, int argc, token_data **argv)
1567 if (bad_argc (argv[0], argc, 1, 1))
1568 return;
1569 obstack_grow (obs, lquote.string, lquote.length);
1570 obstack_grow (obs, current_file, strlen (current_file));
1571 obstack_grow (obs, rquote.string, rquote.length);
1574 static void
1575 m4___line__ (struct obstack *obs, int argc, token_data **argv)
1577 if (bad_argc (argv[0], argc, 1, 1))
1578 return;
1579 shipout_int (obs, current_line);
1582 static void
1583 m4___program__ (struct obstack *obs, int argc, token_data **argv)
1585 if (bad_argc (argv[0], argc, 1, 1))
1586 return;
1587 obstack_grow (obs, lquote.string, lquote.length);
1588 obstack_grow (obs, program_name, strlen (program_name));
1589 obstack_grow (obs, rquote.string, rquote.length);
1592 /* This section contains various macros for exiting, saving input until
1593 EOF is seen, and tracing macro calls. That is: "m4exit", "m4wrap",
1594 "traceon" and "traceoff". */
1596 /*-------------------------------------------------------------------------.
1597 | Exit immediately, with exitcode specified by the first argument, 0 if no |
1598 | arguments are present. |
1599 `-------------------------------------------------------------------------*/
1601 static void
1602 m4_m4exit (struct obstack *obs, int argc, token_data **argv)
1604 int exit_code = EXIT_SUCCESS;
1606 /* Warn on bad arguments, but still exit. */
1607 bad_argc (argv[0], argc, 1, 2);
1608 if (argc >= 2 && !numeric_arg (argv[0], ARG (1), &exit_code))
1609 exit_code = EXIT_FAILURE;
1610 if (exit_code < 0 || exit_code > 255)
1612 M4ERROR ((warning_status, 0,
1613 "exit status out of range: `%d'", exit_code));
1614 exit_code = EXIT_FAILURE;
1616 /* Change debug stream back to stderr, to force flushing debug stream and
1617 detect any errors it might have encountered. */
1618 debug_set_output (NULL);
1619 debug_flush_files ();
1620 if (exit_code == EXIT_SUCCESS && retcode != EXIT_SUCCESS)
1621 exit_code = retcode;
1622 /* Propagate non-zero status to atexit handlers. */
1623 if (exit_code != EXIT_SUCCESS)
1624 exit_failure = exit_code;
1625 exit (exit_code);
1628 /*-------------------------------------------------------------------------.
1629 | Save the argument text until EOF has been seen, allowing for user |
1630 | specified cleanup action. GNU version saves all arguments, the standard |
1631 | version only the first. |
1632 `-------------------------------------------------------------------------*/
1634 static void
1635 m4_m4wrap (struct obstack *obs, int argc, token_data **argv)
1637 if (bad_argc (argv[0], argc, 2, -1))
1638 return;
1639 if (no_gnu_extensions)
1640 obstack_grow (obs, ARG (1), strlen (ARG (1)));
1641 else
1642 dump_args (obs, argc, argv, " ", false);
1643 obstack_1grow (obs, '\0');
1644 push_wrapup ((char *) obstack_finish (obs));
1647 /* Enable tracing of all specified macros, or all, if none is specified.
1648 Tracing is disabled by default, when a macro is defined. This can be
1649 overridden by the "t" debug flag. */
1651 /*-----------------------------------------------------------------------.
1652 | Set_trace () is used by "traceon" and "traceoff" to enable and disable |
1653 | tracing of a macro. It disables tracing if DATA is NULL, otherwise it |
1654 | enable tracing. |
1655 `-----------------------------------------------------------------------*/
1657 static void
1658 set_trace (symbol *sym, void *data)
1660 SYMBOL_TRACED (sym) = data != NULL;
1661 /* Remove placeholder from table if macro is undefined and untraced. */
1662 if (SYMBOL_TYPE (sym) == TOKEN_VOID && data == NULL)
1663 lookup_symbol (SYMBOL_NAME (sym), SYMBOL_POPDEF);
1666 static void
1667 m4_traceon (struct obstack *obs, int argc, token_data **argv)
1669 symbol *s;
1670 int i;
1672 if (argc == 1)
1673 hack_all_symbols (set_trace, obs);
1674 else
1675 for (i = 1; i < argc; i++)
1677 s = lookup_symbol (TOKEN_DATA_TEXT (argv[i]), SYMBOL_INSERT);
1678 set_trace (s, obs);
1682 /*------------------------------------------------------------------------.
1683 | Disable tracing of all specified macros, or all, if none is specified. |
1684 `------------------------------------------------------------------------*/
1686 static void
1687 m4_traceoff (struct obstack *obs, int argc, token_data **argv)
1689 symbol *s;
1690 int i;
1692 if (argc == 1)
1693 hack_all_symbols (set_trace, NULL);
1694 else
1695 for (i = 1; i < argc; i++)
1697 s = lookup_symbol (TOKEN_DATA_TEXT (argv[i]), SYMBOL_LOOKUP);
1698 if (s != NULL)
1699 set_trace (s, NULL);
1703 /*----------------------------------------------------------------------.
1704 | On-the-fly control of the format of the tracing output. It takes one |
1705 | argument, which is a character string like given to the -d option, or |
1706 | none in which case the debug_level is zeroed. |
1707 `----------------------------------------------------------------------*/
1709 static void
1710 m4_debugmode (struct obstack *obs, int argc, token_data **argv)
1712 int new_debug_level;
1713 int change_flag;
1715 if (bad_argc (argv[0], argc, 1, 2))
1716 return;
1718 if (argc == 1)
1719 debug_level = 0;
1720 else
1722 if (ARG (1)[0] == '+' || ARG (1)[0] == '-')
1724 change_flag = ARG (1)[0];
1725 new_debug_level = debug_decode (ARG (1) + 1);
1727 else
1729 change_flag = 0;
1730 new_debug_level = debug_decode (ARG (1));
1733 if (new_debug_level < 0)
1734 M4ERROR ((warning_status, 0,
1735 "Debugmode: bad debug flags: `%s'", ARG (1)));
1736 else
1738 switch (change_flag)
1740 case 0:
1741 debug_level = new_debug_level;
1742 break;
1744 case '+':
1745 debug_level |= new_debug_level;
1746 break;
1748 case '-':
1749 debug_level &= ~new_debug_level;
1750 break;
1756 /*-------------------------------------------------------------------------.
1757 | Specify the destination of the debugging output. With one argument, the |
1758 | argument is taken as a file name, with no arguments, revert to stderr. |
1759 `-------------------------------------------------------------------------*/
1761 static void
1762 m4_debugfile (struct obstack *obs, int argc, token_data **argv)
1764 if (bad_argc (argv[0], argc, 1, 2))
1765 return;
1767 if (argc == 1)
1768 debug_set_output (NULL);
1769 else if (!debug_set_output (ARG (1)))
1770 M4ERROR ((warning_status, errno,
1771 "cannot set error file: `%s'", ARG (1)));
1774 /* This section contains text processing macros: "len", "index",
1775 "substr", "translit", "format", "regexp" and "patsubst". The last
1776 three are GNU specific. */
1778 /*---------------------------------------------.
1779 | Expand to the length of the first argument. |
1780 `---------------------------------------------*/
1782 static void
1783 m4_len (struct obstack *obs, int argc, token_data **argv)
1785 if (bad_argc (argv[0], argc, 2, 2))
1786 return;
1787 shipout_int (obs, strlen (ARG (1)));
1790 /*-------------------------------------------------------------------------.
1791 | The macro expands to the first index of the second argument in the first |
1792 | argument. |
1793 `-------------------------------------------------------------------------*/
1795 static void
1796 m4_index (struct obstack *obs, int argc, token_data **argv)
1798 const char *haystack;
1799 const char *needle;
1800 const char *result = NULL;
1801 int retval = -1;
1803 if (bad_argc (argv[0], argc, 3, 3))
1805 /* builtin(`index') is blank, but index(`abc') is 0. */
1806 if (argc == 2)
1807 shipout_int (obs, 0);
1808 return;
1811 haystack = ARG (1);
1812 needle = ARG (2);
1814 /* Optimize searching for the empty string (always 0) and one byte
1815 (strchr tends to be more efficient than strstr). */
1816 if (!needle[0])
1817 retval = 0;
1818 else if (!needle[1])
1819 result = strchr (haystack, *needle);
1820 else
1821 result = strstr (haystack, needle);
1822 if (result)
1823 retval = result - haystack;
1825 shipout_int (obs, retval);
1828 /*-------------------------------------------------------------------------.
1829 | The macro "substr" extracts substrings from the first argument, starting |
1830 | from the index given by the second argument, extending for a length |
1831 | given by the third argument. If the third argument is missing, the |
1832 | substring extends to the end of the first argument. |
1833 `-------------------------------------------------------------------------*/
1835 static void
1836 m4_substr (struct obstack *obs, int argc, token_data **argv)
1838 int start = 0;
1839 int length, avail;
1841 if (bad_argc (argv[0], argc, 3, 4))
1843 /* builtin(`substr') is blank, but substr(`abc') is abc. */
1844 if (argc == 2)
1845 obstack_grow (obs, ARG (1), strlen (ARG (1)));
1846 return;
1849 length = avail = strlen (ARG (1));
1850 if (!numeric_arg (argv[0], ARG (2), &start))
1851 return;
1853 if (argc >= 4 && !numeric_arg (argv[0], ARG (3), &length))
1854 return;
1856 if (start < 0 || length <= 0 || start >= avail)
1857 return;
1859 if (start + length > avail)
1860 length = avail - start;
1861 obstack_grow (obs, ARG (1) + start, length);
1864 /*------------------------------------------------------------------------.
1865 | For "translit", ranges are allowed in the second and third argument. |
1866 | They are expanded in the following function, and the expanded strings, |
1867 | without any ranges left, are used to translate the characters of the |
1868 | first argument. A single - (dash) can be included in the strings by |
1869 | being the first or the last character in the string. If the first |
1870 | character in a range is after the first in the character set, the range |
1871 | is made backwards, thus 9-0 is the string 9876543210. |
1872 `------------------------------------------------------------------------*/
1874 static const char *
1875 expand_ranges (const char *s, struct obstack *obs)
1877 unsigned char from;
1878 unsigned char to;
1880 for (from = '\0'; *s != '\0'; from = to_uchar (*s++))
1882 if (*s == '-' && from != '\0')
1884 to = to_uchar (*++s);
1885 if (to == '\0')
1887 /* trailing dash */
1888 obstack_1grow (obs, '-');
1889 break;
1891 else if (from <= to)
1893 while (from++ < to)
1894 obstack_1grow (obs, from);
1896 else
1898 while (--from >= to)
1899 obstack_1grow (obs, from);
1902 else
1903 obstack_1grow (obs, *s);
1905 obstack_1grow (obs, '\0');
1906 return (char *) obstack_finish (obs);
1909 /*----------------------------------------------------------------------.
1910 | The macro "translit" translates all characters in the first argument, |
1911 | which are present in the second argument, into the corresponding |
1912 | character from the third argument. If the third argument is shorter |
1913 | than the second, the extra characters in the second argument, are |
1914 | deleted from the first (pueh). |
1915 `----------------------------------------------------------------------*/
1917 static void
1918 m4_translit (struct obstack *obs, int argc, token_data **argv)
1920 const char *data;
1921 const char *from;
1922 const char *to;
1923 char map[256] = {0};
1924 char found[256] = {0};
1925 unsigned char ch;
1927 if (bad_argc (argv[0], argc, 3, 4))
1929 /* builtin(`translit') is blank, but translit(`abc') is abc. */
1930 if (argc == 2)
1931 obstack_grow (obs, ARG (1), strlen (ARG (1)));
1932 return;
1935 from = ARG (2);
1936 if (strchr (from, '-') != NULL)
1938 from = expand_ranges (from, obs);
1939 if (from == NULL)
1940 return;
1943 to = ARG (3);
1944 if (strchr (to, '-') != NULL)
1946 to = expand_ranges (to, obs);
1947 if (to == NULL)
1948 return;
1951 /* Calling strchr(from) for each character in data is quadratic,
1952 since both strings can be arbitrarily long. Instead, create a
1953 from-to mapping in one pass of from, then use that map in one
1954 pass of data, for linear behavior. Traditional behavior is that
1955 only the first instance of a character in from is consulted,
1956 hence the found map. */
1957 for ( ; (ch = *from) != '\0'; from++)
1959 if (! found[ch])
1961 found[ch] = 1;
1962 map[ch] = *to;
1964 if (*to != '\0')
1965 to++;
1968 for (data = ARG (1); (ch = *data) != '\0'; data++)
1970 if (! found[ch])
1971 obstack_1grow (obs, ch);
1972 else if (map[ch])
1973 obstack_1grow (obs, map[ch]);
1977 /*--------------------------------------------------------------.
1978 | Frontend for *printf like formatting. The function format () |
1979 | lives in the file format.c. |
1980 `--------------------------------------------------------------*/
1982 static void
1983 m4_format (struct obstack *obs, int argc, token_data **argv)
1985 if (bad_argc (argv[0], argc, 2, -1))
1986 return;
1987 format (obs, argc - 1, argv + 1);
1990 /*-------------------------------------------------------------------------.
1991 | Function to perform substitution by regular expressions. Used by the |
1992 | builtins regexp and patsubst. The changed text is placed on the |
1993 | obstack. The substitution is REPL, with \& substituted by this part of |
1994 | VICTIM matched by the last whole regular expression, taken from REGS[0], |
1995 | and \N substituted by the text matched by the Nth parenthesized |
1996 | sub-expression, taken from REGS[N]. |
1997 `-------------------------------------------------------------------------*/
1999 static int substitute_warned = 0;
2001 static void
2002 substitute (struct obstack *obs, const char *victim, const char *repl,
2003 struct re_registers *regs)
2005 int ch;
2007 for (;;)
2009 while ((ch = *repl++) != '\\')
2011 if (ch == '\0')
2012 return;
2013 obstack_1grow (obs, ch);
2016 switch ((ch = *repl++))
2018 case '0':
2019 if (!substitute_warned)
2021 M4ERROR ((warning_status, 0, "\
2022 Warning: \\0 will disappear, use \\& instead in replacements"));
2023 substitute_warned = 1;
2025 /* Fall through. */
2027 case '&':
2028 if (regs)
2029 obstack_grow (obs, victim + regs->start[0],
2030 regs->end[0] - regs->start[0]);
2031 break;
2033 case '1': case '2': case '3': case '4': case '5': case '6':
2034 case '7': case '8': case '9':
2035 ch -= '0';
2036 if (!regs || regs->num_regs - 1 <= ch)
2037 M4ERROR ((warning_status, 0,
2038 "Warning: sub-expression %d not present", ch));
2039 else if (regs->end[ch] > 0)
2040 obstack_grow (obs, victim + regs->start[ch],
2041 regs->end[ch] - regs->start[ch]);
2042 break;
2044 case '\0':
2045 M4ERROR ((warning_status, 0,
2046 "Warning: trailing \\ ignored in replacement"));
2047 return;
2049 default:
2050 obstack_1grow (obs, ch);
2051 break;
2056 /*------------------------------------------.
2057 | Initialize regular expression variables. |
2058 `------------------------------------------*/
2060 void
2061 init_pattern_buffer (struct re_pattern_buffer *buf, struct re_registers *regs)
2063 buf->translate = NULL;
2064 buf->fastmap = NULL;
2065 buf->buffer = NULL;
2066 buf->allocated = 0;
2067 if (regs)
2069 regs->start = NULL;
2070 regs->end = NULL;
2074 /*------------------------------------------------------------------.
2075 | Regular expression version of index. Given two arguments, expand |
2076 | to the index of the first match of the second argument (a regexp) |
2077 | in the first. Expand to -1 if there is no match. Given a third |
2078 | argument, a match is substituted according to this argument. |
2079 `------------------------------------------------------------------*/
2081 static void
2082 m4_regexp (struct obstack *obs, int argc, token_data **argv)
2084 const char *victim; /* first argument */
2085 const char *regexp; /* regular expression */
2086 const char *repl; /* replacement string */
2088 struct re_pattern_buffer *buf;/* compiled regular expression */
2089 struct re_registers *regs; /* for subexpression matches */
2090 const char *msg; /* error message from re_compile_pattern */
2091 int startpos; /* start position of match */
2092 int length; /* length of first argument */
2094 if (bad_argc (argv[0], argc, 3, 4))
2096 /* builtin(`regexp') is blank, but regexp(`abc') is 0. */
2097 if (argc == 2)
2098 shipout_int (obs, 0);
2099 return;
2102 victim = ARG (1);
2103 regexp = ARG (2);
2104 repl = ARG (3);
2106 if (!*regexp)
2108 /* The empty regex matches everything! */
2109 if (argc == 3)
2110 shipout_int (obs, 0);
2111 else
2112 substitute (obs, victim, repl, NULL);
2113 return;
2116 #ifdef DEBUG_REGEX
2117 if (trace_file)
2118 xfprintf (trace_file, "r:{%s}:%s%s%s\n", regexp,
2119 argc == 3 ? "" : "{", repl, argc == 3 ? "" : "}");
2120 #endif /* DEBUG_REGEX */
2122 msg = compile_pattern (regexp, strlen (regexp), &buf, &regs);
2123 if (msg != NULL)
2125 M4ERROR ((warning_status, 0,
2126 "bad regular expression: `%s': %s", regexp, msg));
2127 return;
2130 length = strlen (victim);
2131 /* Avoid overhead of allocating regs if we won't use it. */
2132 startpos = re_search (buf, victim, length, 0, length,
2133 argc == 3 ? NULL : regs);
2135 if (startpos == -2)
2136 M4ERROR ((warning_status, 0,
2137 "error matching regular expression `%s'", regexp));
2138 else if (argc == 3)
2139 shipout_int (obs, startpos);
2140 else if (startpos >= 0)
2141 substitute (obs, victim, repl, regs);
2144 /*------------------------------------------------------------------.
2145 | Substitute all matches of a regexp occurring in a string. Each |
2146 | match of the second argument (a regexp) in the first argument is |
2147 | changed to the third argument, with \& substituted by the matched |
2148 | text, and \N substituted by the text matched by the Nth |
2149 | parenthesized sub-expression. |
2150 `------------------------------------------------------------------*/
2152 static void
2153 m4_patsubst (struct obstack *obs, int argc, token_data **argv)
2155 const char *victim; /* first argument */
2156 const char *regexp; /* regular expression */
2157 const char *repl;
2159 struct re_pattern_buffer *buf;/* compiled regular expression */
2160 struct re_registers *regs; /* for subexpression matches */
2161 const char *msg; /* error message from re_compile_pattern */
2162 int matchpos; /* start position of match */
2163 int offset; /* current match offset */
2164 int length; /* length of first argument */
2166 if (bad_argc (argv[0], argc, 3, 4))
2168 /* builtin(`patsubst') is blank, but patsubst(`abc') is abc. */
2169 if (argc == 2)
2170 obstack_grow (obs, ARG (1), strlen (ARG (1)));
2171 return;
2174 victim = ARG (1);
2175 regexp = ARG (2);
2176 repl = ARG (3);
2178 /* The empty regex matches everywhere, but if there is no
2179 replacement, we need not waste time with it. */
2180 if (!*regexp && !*repl)
2182 obstack_grow (obs, victim, strlen (victim));
2183 return;
2186 #ifdef DEBUG_REGEX
2187 if (trace_file)
2188 xfprintf (trace_file, "p:{%s}:{%s}\n", regexp, repl);
2189 #endif /* DEBUG_REGEX */
2191 msg = compile_pattern (regexp, strlen (regexp), &buf, &regs);
2192 if (msg != NULL)
2194 M4ERROR ((warning_status, 0,
2195 "bad regular expression `%s': %s", regexp, msg));
2196 return;
2199 length = strlen (victim);
2201 offset = 0;
2202 matchpos = 0;
2203 while (offset <= length)
2205 matchpos = re_search (buf, victim, length,
2206 offset, length - offset, regs);
2207 if (matchpos < 0)
2210 /* Match failed -- either error or there is no match in the
2211 rest of the string, in which case the rest of the string is
2212 copied verbatim. */
2214 if (matchpos == -2)
2215 M4ERROR ((warning_status, 0,
2216 "error matching regular expression `%s'", regexp));
2217 else if (offset < length)
2218 obstack_grow (obs, victim + offset, length - offset);
2219 break;
2222 /* Copy the part of the string that was skipped by re_search (). */
2224 if (matchpos > offset)
2225 obstack_grow (obs, victim + offset, matchpos - offset);
2227 /* Handle the part of the string that was covered by the match. */
2229 substitute (obs, victim, repl, regs);
2231 /* Update the offset to the end of the match. If the regexp
2232 matched a null string, advance offset one more, to avoid
2233 infinite loops. */
2235 offset = regs->end[0];
2236 if (regs->start[0] == regs->end[0])
2237 obstack_1grow (obs, victim[offset++]);
2239 obstack_1grow (obs, '\0');
2242 /* Finally, a placeholder builtin. This builtin is not installed by
2243 default, but when reading back frozen files, this is associated
2244 with any builtin we don't recognize (for example, if the frozen
2245 file was created with a changeword capable m4, but is then loaded
2246 by a different m4 that does not support changeword). This way, we
2247 can keep 'm4 -R' quiet in the common case that the user did not
2248 know or care about the builtin when the frozen file was created,
2249 while still flagging it as a potential error if an attempt is made
2250 to actually use the builtin. */
2252 /*--------------------------------------------------------------------.
2253 | Issue a warning that this macro is a placeholder for an unsupported |
2254 | builtin that was requested while reloading a frozen file. |
2255 `--------------------------------------------------------------------*/
2257 void
2258 m4_placeholder (struct obstack *obs, int argc, token_data **argv)
2260 M4ERROR ((warning_status, 0, "\
2261 builtin `%s' requested by frozen file is not supported", ARG (0)));
2264 /*-------------------------------------------------------------------------.
2265 | This function handles all expansion of user defined and predefined |
2266 | macros. It is called with an obstack OBS, where the macros expansion |
2267 | will be placed, as an unfinished object. SYM points to the macro |
2268 | definition, giving the expansion text. ARGC and ARGV are the arguments, |
2269 | as usual. |
2270 `-------------------------------------------------------------------------*/
2272 void
2273 expand_user_macro (struct obstack *obs, symbol *sym,
2274 int argc, token_data **argv)
2276 const char *text;
2277 int i;
2279 for (text = SYMBOL_TEXT (sym); *text != '\0';)
2281 if (*text != '$')
2283 obstack_1grow (obs, *text);
2284 text++;
2285 continue;
2287 text++;
2288 switch (*text)
2290 case '0': case '1': case '2': case '3': case '4':
2291 case '5': case '6': case '7': case '8': case '9':
2292 if (no_gnu_extensions)
2294 i = *text++ - '0';
2296 else
2298 for (i = 0; isdigit (to_uchar (*text)); text++)
2299 i = i*10 + (*text - '0');
2301 if (i < argc)
2302 obstack_grow (obs, TOKEN_DATA_TEXT (argv[i]),
2303 strlen (TOKEN_DATA_TEXT (argv[i])));
2304 break;
2306 case '#': /* number of arguments */
2307 shipout_int (obs, argc - 1);
2308 text++;
2309 break;
2311 case '*': /* all arguments */
2312 case '@': /* ... same, but quoted */
2313 dump_args (obs, argc, argv, ",", *text == '@');
2314 text++;
2315 break;
2317 default:
2318 obstack_1grow (obs, '$');
2319 break;