* doc/m4.texinfo: Pull in various improvements from head.
[m4.git] / src / builtin.c
blobe195fe02caea9147850083083dcfe57e7ca7cd49
1 /* GNU m4 -- A simple macro processor
3 Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2000, 2004, 2006, 2007
4 Free Software Foundation, Inc.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301 USA
22 /* Code for all builtin macros, initialization of symbol table, and
23 expansion of user defined macros. */
25 #include "m4.h"
27 extern FILE *popen ();
29 #include "regex.h"
30 #include "strstr.h"
32 #if HAVE_SYS_WAIT_H
33 # include <sys/wait.h>
34 #endif
36 #define ARG(i) (argc > (i) ? TOKEN_DATA_TEXT (argv[i]) : "")
38 /* Initialization of builtin and predefined macros. The table
39 "builtin_tab" is both used for initialization, and by the "builtin"
40 builtin. */
42 #define DECLARE(name) \
43 static void name (struct obstack *, int, token_data **)
45 DECLARE (m4___file__);
46 DECLARE (m4___line__);
47 DECLARE (m4___program__);
48 DECLARE (m4_builtin);
49 DECLARE (m4_changecom);
50 DECLARE (m4_changequote);
51 #ifdef ENABLE_CHANGEWORD
52 DECLARE (m4_changeword);
53 #endif
54 DECLARE (m4_debugmode);
55 DECLARE (m4_debugfile);
56 DECLARE (m4_decr);
57 DECLARE (m4_define);
58 DECLARE (m4_defn);
59 DECLARE (m4_divert);
60 DECLARE (m4_divnum);
61 DECLARE (m4_dnl);
62 DECLARE (m4_dumpdef);
63 DECLARE (m4_errprint);
64 DECLARE (m4_esyscmd);
65 DECLARE (m4_eval);
66 DECLARE (m4_format);
67 DECLARE (m4_ifdef);
68 DECLARE (m4_ifelse);
69 DECLARE (m4_include);
70 DECLARE (m4_incr);
71 DECLARE (m4_index);
72 DECLARE (m4_indir);
73 DECLARE (m4_len);
74 DECLARE (m4_m4exit);
75 DECLARE (m4_m4wrap);
76 DECLARE (m4_maketemp);
77 DECLARE (m4_mkstemp);
78 DECLARE (m4_patsubst);
79 DECLARE (m4_popdef);
80 DECLARE (m4_pushdef);
81 DECLARE (m4_regexp);
82 DECLARE (m4_shift);
83 DECLARE (m4_sinclude);
84 DECLARE (m4_substr);
85 DECLARE (m4_syscmd);
86 DECLARE (m4_sysval);
87 DECLARE (m4_traceoff);
88 DECLARE (m4_traceon);
89 DECLARE (m4_translit);
90 DECLARE (m4_undefine);
91 DECLARE (m4_undivert);
93 #undef DECLARE
95 static builtin
96 builtin_tab[] =
99 /* name GNUext macros blind function */
101 { "__file__", true, false, false, m4___file__ },
102 { "__line__", true, false, false, m4___line__ },
103 { "__program__", true, false, false, m4___program__ },
104 { "builtin", true, true, true, m4_builtin },
105 { "changecom", false, false, false, m4_changecom },
106 { "changequote", false, false, false, m4_changequote },
107 #ifdef ENABLE_CHANGEWORD
108 { "changeword", true, false, true, m4_changeword },
109 #endif
110 { "debugmode", true, false, false, m4_debugmode },
111 { "debugfile", true, false, false, m4_debugfile },
112 { "decr", false, false, true, m4_decr },
113 { "define", false, true, true, m4_define },
114 { "defn", false, false, true, m4_defn },
115 { "divert", false, false, false, m4_divert },
116 { "divnum", false, false, false, m4_divnum },
117 { "dnl", false, false, false, m4_dnl },
118 { "dumpdef", false, false, false, m4_dumpdef },
119 { "errprint", false, false, true, m4_errprint },
120 { "esyscmd", true, false, true, m4_esyscmd },
121 { "eval", false, false, true, m4_eval },
122 { "format", true, false, true, m4_format },
123 { "ifdef", false, false, true, m4_ifdef },
124 { "ifelse", false, false, true, m4_ifelse },
125 { "include", false, false, true, m4_include },
126 { "incr", false, false, true, m4_incr },
127 { "index", false, false, true, m4_index },
128 { "indir", true, true, true, m4_indir },
129 { "len", false, false, true, m4_len },
130 { "m4exit", false, false, false, m4_m4exit },
131 { "m4wrap", false, false, true, m4_m4wrap },
132 { "maketemp", false, false, true, m4_maketemp },
133 { "mkstemp", false, false, true, m4_mkstemp },
134 { "patsubst", true, false, true, m4_patsubst },
135 { "popdef", false, false, true, m4_popdef },
136 { "pushdef", false, true, true, m4_pushdef },
137 { "regexp", true, false, true, m4_regexp },
138 { "shift", false, false, true, m4_shift },
139 { "sinclude", false, false, true, m4_sinclude },
140 { "substr", false, false, true, m4_substr },
141 { "syscmd", false, false, true, m4_syscmd },
142 { "sysval", false, false, false, m4_sysval },
143 { "traceoff", false, false, false, m4_traceoff },
144 { "traceon", false, false, false, m4_traceon },
145 { "translit", false, false, true, m4_translit },
146 { "undefine", false, false, true, m4_undefine },
147 { "undivert", false, false, false, m4_undivert },
149 { 0, false, false, false, 0 },
151 /* placeholder is intentionally stuck after the table end delimiter,
152 so that we can easily find it, while not treating it as a real
153 builtin. */
154 { "placeholder", true, false, false, m4_placeholder },
157 static predefined const
158 predefined_tab[] =
160 #if UNIX
161 { "unix", "__unix__", "" },
162 #elif W32_NATIVE
163 { "windows", "__windows__", "" },
164 #elif OS2
165 { "os2", "__os2__", "" },
166 #else
167 # warning Platform macro not provided
168 #endif
169 { NULL, "__gnu__", "" },
171 { NULL, NULL, NULL },
174 /*----------------------------------------.
175 | Find the builtin, which lives on ADDR. |
176 `----------------------------------------*/
178 const builtin *
179 find_builtin_by_addr (builtin_func *func)
181 const builtin *bp;
183 for (bp = &builtin_tab[0]; bp->name != NULL; bp++)
184 if (bp->func == func)
185 return bp;
186 if (func == m4_placeholder)
187 return bp + 1;
188 return NULL;
191 /*----------------------------------------------------------.
192 | Find the builtin, which has NAME. On failure, return the |
193 | placeholder builtin. |
194 `----------------------------------------------------------*/
196 const builtin *
197 find_builtin_by_name (const char *name)
199 const builtin *bp;
201 for (bp = &builtin_tab[0]; bp->name != NULL; bp++)
202 if (strcmp (bp->name, name) == 0)
203 return bp;
204 return bp + 1;
207 /*-------------------------------------------------------------------------.
208 | Install a builtin macro with name NAME, bound to the C function given in |
209 | BP. MODE is SYMBOL_INSERT or SYMBOL_PUSHDEF. TRACED defines whether |
210 | NAME is to be traced. |
211 `-------------------------------------------------------------------------*/
213 void
214 define_builtin (const char *name, const builtin *bp, symbol_lookup mode)
216 symbol *sym;
218 sym = lookup_symbol (name, mode);
219 SYMBOL_TYPE (sym) = TOKEN_FUNC;
220 SYMBOL_MACRO_ARGS (sym) = bp->groks_macro_args;
221 SYMBOL_BLIND_NO_ARGS (sym) = bp->blind_if_no_args;
222 SYMBOL_FUNC (sym) = bp->func;
225 /*-------------------------------------------------------------------------.
226 | Define a predefined or user-defined macro, with name NAME, and expansion |
227 | TEXT. MODE destinguishes between the "define" and the "pushdef" case. |
228 | It is also used from main (). |
229 `-------------------------------------------------------------------------*/
231 void
232 define_user_macro (const char *name, const char *text, symbol_lookup mode)
234 symbol *s;
236 s = lookup_symbol (name, mode);
237 if (SYMBOL_TYPE (s) == TOKEN_TEXT)
238 free (SYMBOL_TEXT (s));
240 SYMBOL_TYPE (s) = TOKEN_TEXT;
241 SYMBOL_TEXT (s) = xstrdup (text ? text : "");
244 /*-----------------------------------------------.
245 | Initialize all builtin and predefined macros. |
246 `-----------------------------------------------*/
248 void
249 builtin_init (void)
251 const builtin *bp;
252 const predefined *pp;
253 char *string;
255 for (bp = &builtin_tab[0]; bp->name != NULL; bp++)
256 if (!no_gnu_extensions || !bp->gnu_extension)
258 if (prefix_all_builtins)
260 string = (char *) xmalloc (strlen (bp->name) + 4);
261 strcpy (string, "m4_");
262 strcat (string, bp->name);
263 define_builtin (string, bp, SYMBOL_INSERT);
264 free (string);
266 else
267 define_builtin (bp->name, bp, SYMBOL_INSERT);
270 for (pp = &predefined_tab[0]; pp->func != NULL; pp++)
271 if (no_gnu_extensions)
273 if (pp->unix_name != NULL)
274 define_user_macro (pp->unix_name, pp->func, SYMBOL_INSERT);
276 else
278 if (pp->gnu_name != NULL)
279 define_user_macro (pp->gnu_name, pp->func, SYMBOL_INSERT);
283 /*------------------------------------------------------------------------.
284 | Give friendly warnings if a builtin macro is passed an inappropriate |
285 | number of arguments. NAME is macro name for messages, ARGC is actual |
286 | number of arguments, MIN is the minimum number of acceptable arguments, |
287 | negative if not applicable, MAX is the maximum number, negative if not |
288 | applicable. |
289 `------------------------------------------------------------------------*/
291 static bool
292 bad_argc (token_data *name, int argc, int min, int max)
294 bool isbad = false;
296 if (min > 0 && argc < min)
298 if (!suppress_warnings)
299 M4ERROR ((warning_status, 0,
300 "Warning: too few arguments to builtin `%s'",
301 TOKEN_DATA_TEXT (name)));
302 isbad = true;
304 else if (max > 0 && argc > max && !suppress_warnings)
305 M4ERROR ((warning_status, 0,
306 "Warning: excess arguments to builtin `%s' ignored",
307 TOKEN_DATA_TEXT (name)));
309 return isbad;
312 /*--------------------------------------------------------------------------.
313 | The function numeric_arg () converts ARG to an int pointed to by VALUEP. |
314 | If the conversion fails, print error message for macro MACRO. Return |
315 | true iff conversion succeeds. |
316 `--------------------------------------------------------------------------*/
318 static bool
319 numeric_arg (token_data *macro, const char *arg, int *valuep)
321 char *endp;
323 if (*arg == '\0')
325 *valuep = 0;
326 M4ERROR ((warning_status, 0,
327 "empty string treated as 0 in builtin `%s'",
328 TOKEN_DATA_TEXT (macro)));
330 else
332 errno = 0;
333 *valuep = strtol (arg, &endp, 10);
334 if (*endp != '\0')
336 M4ERROR ((warning_status, 0,
337 "non-numeric argument to builtin `%s'",
338 TOKEN_DATA_TEXT (macro)));
339 return false;
341 if (isspace (to_uchar (*arg)))
342 M4ERROR ((warning_status, 0,
343 "leading whitespace ignored in builtin `%s'",
344 TOKEN_DATA_TEXT (macro)));
345 else if (errno == ERANGE)
346 M4ERROR ((warning_status, 0,
347 "numeric overflow detected in builtin `%s'",
348 TOKEN_DATA_TEXT (macro)));
350 return true;
353 /*------------------------------------------------------------------------.
354 | The function ntoa () converts VALUE to a signed ascii representation in |
355 | radix RADIX. |
356 `------------------------------------------------------------------------*/
358 /* Digits for number to ascii conversions. */
359 static char const digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";
361 static const char *
362 ntoa (int32_t value, int radix)
364 bool negative;
365 uint32_t uvalue;
366 static char str[256];
367 char *s = &str[sizeof str];
369 *--s = '\0';
371 if (value < 0)
373 negative = true;
374 uvalue = -(uint32_t) value;
376 else
378 negative = false;
379 uvalue = (uint32_t) value;
384 *--s = digits[uvalue % radix];
385 uvalue /= radix;
387 while (uvalue > 0);
389 if (negative)
390 *--s = '-';
391 return s;
394 /*----------------------------------------------------------------------.
395 | Format an int VAL, and stuff it into an obstack OBS. Used for macros |
396 | expanding to numbers. |
397 `----------------------------------------------------------------------*/
399 static void
400 shipout_int (struct obstack *obs, int val)
402 const char *s;
404 s = ntoa ((int32_t) val, 10);
405 obstack_grow (obs, s, strlen (s));
408 /*----------------------------------------------------------------------.
409 | Print ARGC arguments from the table ARGV to obstack OBS, separated by |
410 | SEP, and quoted by the current quotes, if QUOTED is true. |
411 `----------------------------------------------------------------------*/
413 static void
414 dump_args (struct obstack *obs, int argc, token_data **argv,
415 const char *sep, bool quoted)
417 int i;
418 size_t len = strlen (sep);
420 for (i = 1; i < argc; i++)
422 if (i > 1)
423 obstack_grow (obs, sep, len);
424 if (quoted)
425 obstack_grow (obs, lquote.string, lquote.length);
426 obstack_grow (obs, TOKEN_DATA_TEXT (argv[i]),
427 strlen (TOKEN_DATA_TEXT (argv[i])));
428 if (quoted)
429 obstack_grow (obs, rquote.string, rquote.length);
433 /* The rest of this file is code for builtins and expansion of user
434 defined macros. All the functions for builtins have a prototype as:
436 void m4_MACRONAME (struct obstack *obs, int argc, char *argv[]);
438 The function are expected to leave their expansion on the obstack OBS,
439 as an unfinished object. ARGV is a table of ARGC pointers to the
440 individual arguments to the macro. Please note that in general
441 argv[argc] != NULL. */
443 /* The first section are macros for definining, undefining, examining,
444 changing, ... other macros. */
446 /*-------------------------------------------------------------------------.
447 | The function define_macro is common for the builtins "define", |
448 | "undefine", "pushdef" and "popdef". ARGC and ARGV is as for the caller, |
449 | and MODE argument determines how the macro name is entered into the |
450 | symbol table. |
451 `-------------------------------------------------------------------------*/
453 static void
454 define_macro (int argc, token_data **argv, symbol_lookup mode)
456 const builtin *bp;
458 if (bad_argc (argv[0], argc, 2, 3))
459 return;
461 if (TOKEN_DATA_TYPE (argv[1]) != TOKEN_TEXT)
463 M4ERROR ((warning_status, 0,
464 "Warning: %s: invalid macro name ignored", ARG (0)));
465 return;
468 if (argc == 2)
470 define_user_macro (ARG (1), "", mode);
471 return;
474 switch (TOKEN_DATA_TYPE (argv[2]))
476 case TOKEN_TEXT:
477 define_user_macro (ARG (1), ARG (2), mode);
478 break;
480 case TOKEN_FUNC:
481 bp = find_builtin_by_addr (TOKEN_DATA_FUNC (argv[2]));
482 if (bp == NULL)
483 return;
484 else
485 define_builtin (ARG (1), bp, mode);
486 break;
488 default:
489 M4ERROR ((warning_status, 0,
490 "INTERNAL ERROR: bad token data type in define_macro ()"));
491 abort ();
495 static void
496 m4_define (struct obstack *obs, int argc, token_data **argv)
498 define_macro (argc, argv, SYMBOL_INSERT);
501 static void
502 m4_undefine (struct obstack *obs, int argc, token_data **argv)
504 int i;
505 if (bad_argc (argv[0], argc, 2, -1))
506 return;
507 for (i = 1; i < argc; i++)
508 lookup_symbol (ARG (i), SYMBOL_DELETE);
511 static void
512 m4_pushdef (struct obstack *obs, int argc, token_data **argv)
514 define_macro (argc, argv, SYMBOL_PUSHDEF);
517 static void
518 m4_popdef (struct obstack *obs, int argc, token_data **argv)
520 int i;
521 if (bad_argc (argv[0], argc, 2, -1))
522 return;
523 for (i = 1; i < argc; i++)
524 lookup_symbol (ARG (i), SYMBOL_POPDEF);
527 /*---------------------.
528 | Conditionals of m4. |
529 `---------------------*/
531 static void
532 m4_ifdef (struct obstack *obs, int argc, token_data **argv)
534 symbol *s;
535 const char *result;
537 if (bad_argc (argv[0], argc, 3, 4))
538 return;
539 s = lookup_symbol (ARG (1), SYMBOL_LOOKUP);
541 if (s != NULL && SYMBOL_TYPE (s) != TOKEN_VOID)
542 result = ARG (2);
543 else if (argc >= 4)
544 result = ARG (3);
545 else
546 result = NULL;
548 if (result != NULL)
549 obstack_grow (obs, result, strlen (result));
552 static void
553 m4_ifelse (struct obstack *obs, int argc, token_data **argv)
555 const char *result;
556 token_data *argv0;
558 if (argc == 2)
559 return;
561 if (bad_argc (argv[0], argc, 4, -1))
562 return;
563 else
564 /* Diagnose excess arguments if 5, 8, 11, etc., actual arguments. */
565 bad_argc (argv[0], (argc + 2) % 3, -1, 1);
567 argv0 = argv[0];
568 argv++;
569 argc--;
571 result = NULL;
572 while (result == NULL)
574 if (strcmp (ARG (0), ARG (1)) == 0)
575 result = ARG (2);
577 else
578 switch (argc)
580 case 3:
581 return;
583 case 4:
584 case 5:
585 result = ARG (3);
586 break;
588 default:
589 argc -= 3;
590 argv += 3;
593 obstack_grow (obs, result, strlen (result));
596 /*---------------------------------------------------------------------.
597 | The function dump_symbol () is for use by "dumpdef". It builds up a |
598 | table of all defined, un-shadowed, symbols. |
599 `---------------------------------------------------------------------*/
601 /* The structure dump_symbol_data is used to pass the information needed
602 from call to call to dump_symbol. */
604 struct dump_symbol_data
606 struct obstack *obs; /* obstack for table */
607 symbol **base; /* base of table */
608 int size; /* size of table */
611 static void
612 dump_symbol (symbol *sym, void *arg)
614 struct dump_symbol_data *data = (struct dump_symbol_data *) arg;
615 if (!SYMBOL_SHADOWED (sym) && SYMBOL_TYPE (sym) != TOKEN_VOID)
617 obstack_blank (data->obs, sizeof (symbol *));
618 data->base = (symbol **) obstack_base (data->obs);
619 data->base[data->size++] = sym;
623 /*------------------------------------------------------------------------.
624 | qsort comparison routine, for sorting the table made in m4_dumpdef (). |
625 `------------------------------------------------------------------------*/
627 static int
628 dumpdef_cmp (const void *s1, const void *s2)
630 return strcmp (SYMBOL_NAME (* (symbol *const *) s1),
631 SYMBOL_NAME (* (symbol *const *) s2));
634 /*-------------------------------------------------------------------------.
635 | Implementation of "dumpdef" itself. It builds up a table of pointers to |
636 | symbols, sorts it and prints the sorted table. |
637 `-------------------------------------------------------------------------*/
639 static void
640 m4_dumpdef (struct obstack *obs, int argc, token_data **argv)
642 symbol *s;
643 int i;
644 struct dump_symbol_data data;
645 const builtin *bp;
647 data.obs = obs;
648 data.base = (symbol **) obstack_base (obs);
649 data.size = 0;
651 if (argc == 1)
653 hack_all_symbols (dump_symbol, &data);
655 else
657 for (i = 1; i < argc; i++)
659 s = lookup_symbol (TOKEN_DATA_TEXT (argv[i]), SYMBOL_LOOKUP);
660 if (s != NULL && SYMBOL_TYPE (s) != TOKEN_VOID)
661 dump_symbol (s, &data);
662 else
663 M4ERROR ((warning_status, 0,
664 "undefined macro `%s'", TOKEN_DATA_TEXT (argv[i])));
668 /* Make table of symbols invisible to expand_macro (). */
670 obstack_finish (obs);
672 qsort (data.base, data.size, sizeof (symbol *), dumpdef_cmp);
674 for (; data.size > 0; --data.size, data.base++)
676 DEBUG_PRINT1 ("%s:\t", SYMBOL_NAME (data.base[0]));
678 switch (SYMBOL_TYPE (data.base[0]))
680 case TOKEN_TEXT:
681 if (debug_level & DEBUG_TRACE_QUOTE)
682 DEBUG_PRINT3 ("%s%s%s\n",
683 lquote.string, SYMBOL_TEXT (data.base[0]), rquote.string);
684 else
685 DEBUG_PRINT1 ("%s\n", SYMBOL_TEXT (data.base[0]));
686 break;
688 case TOKEN_FUNC:
689 bp = find_builtin_by_addr (SYMBOL_FUNC (data.base[0]));
690 if (bp == NULL)
692 M4ERROR ((warning_status, 0, "\
693 INTERNAL ERROR: builtin not found in builtin table"));
694 abort ();
696 DEBUG_PRINT1 ("<%s>\n", bp->name);
697 break;
699 default:
700 M4ERROR ((warning_status, 0,
701 "INTERNAL ERROR: bad token data type in m4_dumpdef ()"));
702 abort ();
703 break;
708 /*---------------------------------------------------------------------.
709 | The builtin "builtin" allows calls to builtin macros, even if their |
710 | definition has been overridden or shadowed. It is thus possible to |
711 | redefine builtins, and still access their original definition. This |
712 | macro is not available in compatibility mode. |
713 `---------------------------------------------------------------------*/
715 static void
716 m4_builtin (struct obstack *obs, int argc, token_data **argv)
718 const builtin *bp;
719 const char *name;
721 if (bad_argc (argv[0], argc, 2, -1))
722 return;
723 if (TOKEN_DATA_TYPE (argv[1]) != TOKEN_TEXT)
725 M4ERROR ((warning_status, 0,
726 "Warning: %s: invalid macro name ignored", ARG (0)));
727 return;
730 name = ARG (1);
731 bp = find_builtin_by_name (name);
732 if (bp->func == m4_placeholder)
733 M4ERROR ((warning_status, 0,
734 "undefined builtin `%s'", name));
735 else
737 int i;
738 if (! bp->groks_macro_args)
739 for (i = 2; i < argc; i++)
740 if (TOKEN_DATA_TYPE (argv[i]) != TOKEN_TEXT)
742 TOKEN_DATA_TYPE (argv[i]) = TOKEN_TEXT;
743 TOKEN_DATA_TEXT (argv[i]) = (char *) "";
745 bp->func (obs, argc - 1, argv + 1);
749 /*------------------------------------------------------------------------.
750 | The builtin "indir" allows indirect calls to macros, even if their name |
751 | is not a proper macro name. It is thus possible to define macros with |
752 | ill-formed names for internal use in larger macro packages. This macro |
753 | is not available in compatibility mode. |
754 `------------------------------------------------------------------------*/
756 static void
757 m4_indir (struct obstack *obs, int argc, token_data **argv)
759 symbol *s;
760 const char *name;
762 if (bad_argc (argv[0], argc, 2, -1))
763 return;
764 if (TOKEN_DATA_TYPE (argv[1]) != TOKEN_TEXT)
766 M4ERROR ((warning_status, 0,
767 "Warning: %s: invalid macro name ignored", ARG (0)));
768 return;
771 name = ARG (1);
772 s = lookup_symbol (name, SYMBOL_LOOKUP);
773 if (s == NULL || SYMBOL_TYPE (s) == TOKEN_VOID)
774 M4ERROR ((warning_status, 0,
775 "undefined macro `%s'", name));
776 else
778 int i;
779 if (! SYMBOL_MACRO_ARGS (s))
780 for (i = 2; i < argc; i++)
781 if (TOKEN_DATA_TYPE (argv[i]) != TOKEN_TEXT)
783 TOKEN_DATA_TYPE (argv[i]) = TOKEN_TEXT;
784 TOKEN_DATA_TEXT (argv[i]) = (char *) "";
786 call_macro (s, argc - 1, argv + 1, obs);
790 /*-------------------------------------------------------------------------.
791 | The macro "defn" returns the quoted definition of the macro named by the |
792 | first argument. If the macro is builtin, it will push a special |
793 | macro-definition token on the input stack. |
794 `-------------------------------------------------------------------------*/
796 static void
797 m4_defn (struct obstack *obs, int argc, token_data **argv)
799 symbol *s;
800 builtin_func *b;
802 if (bad_argc (argv[0], argc, 2, 2))
803 return;
805 s = lookup_symbol (ARG (1), SYMBOL_LOOKUP);
806 if (s == NULL)
807 return;
809 switch (SYMBOL_TYPE (s))
811 case TOKEN_TEXT:
812 obstack_grow (obs, lquote.string, lquote.length);
813 obstack_grow (obs, SYMBOL_TEXT (s), strlen (SYMBOL_TEXT (s)));
814 obstack_grow (obs, rquote.string, rquote.length);
815 break;
817 case TOKEN_FUNC:
818 b = SYMBOL_FUNC (s);
819 if (b == m4_placeholder)
820 M4ERROR ((warning_status, 0, "\
821 builtin `%s' requested by frozen file is not supported", ARG (1)));
822 else
823 push_macro (b);
824 break;
826 case TOKEN_VOID:
827 break;
829 default:
830 M4ERROR ((warning_status, 0,
831 "INTERNAL ERROR: bad symbol type in m4_defn ()"));
832 abort ();
836 /*------------------------------------------------------------------------.
837 | This section contains macros to handle the builtins "syscmd", "esyscmd" |
838 | and "sysval". "esyscmd" is GNU specific. |
839 `------------------------------------------------------------------------*/
841 /* Helper macros for readability. */
842 #if UNIX || defined WEXITSTATUS
843 # define M4SYSVAL_EXITBITS(status) \
844 (WIFEXITED (status) ? WEXITSTATUS (status) : 0)
845 # define M4SYSVAL_TERMSIGBITS(status) \
846 (WIFSIGNALED (status) ? WTERMSIG (status) << 8 : 0)
848 #else /* ! UNIX && ! defined WEXITSTATUS */
849 /* Platforms such as mingw do not support the notion of reporting
850 which signal terminated a process. Furthermore if WEXITSTATUS was
851 not provided, then the exit value is in the low eight bits. */
852 # define M4SYSVAL_EXITBITS(status) status
853 # define M4SYSVAL_TERMSIGBITS(status) 0
854 #endif /* ! UNIX && ! defined WEXITSTATUS */
856 /* Fallback definitions if <stdlib.h> or <sys/wait.h> are inadequate. */
857 #ifndef WEXITSTATUS
858 # define WEXITSTATUS(status) (((status) >> 8) & 0xff)
859 #endif
860 #ifndef WTERMSIG
861 # define WTERMSIG(status) ((status) & 0x7f)
862 #endif
863 #ifndef WIFSIGNALED
864 # define WIFSIGNALED(status) (WTERMSIG (status) != 0)
865 #endif
866 #ifndef WIFEXITED
867 # define WIFEXITED(status) (WTERMSIG (status) == 0)
868 #endif
870 /* Exit code from last "syscmd" command. */
871 static int sysval;
873 static void
874 m4_syscmd (struct obstack *obs, int argc, token_data **argv)
876 if (bad_argc (argv[0], argc, 2, 2))
878 /* The empty command is successful. */
879 sysval = 0;
880 return;
883 debug_flush_files ();
884 sysval = system (ARG (1));
885 #if FUNC_SYSTEM_BROKEN
886 /* OS/2 has a buggy system() that returns exit status in the lowest eight
887 bits, although pclose() and WEXITSTATUS are defined to return exit
888 status in the next eight bits. This approach can't detect signals, but
889 at least syscmd(`ls') still works when stdout is a terminal. An
890 alternate approach is popen/insert_file/pclose, but that makes stdout
891 a pipe, which can change how some child processes behave. */
892 if (sysval != -1)
893 sysval <<= 8;
894 #endif /* FUNC_SYSTEM_BROKEN */
897 static void
898 m4_esyscmd (struct obstack *obs, int argc, token_data **argv)
900 FILE *pin;
901 int ch;
903 if (bad_argc (argv[0], argc, 2, 2))
905 /* The empty command is successful. */
906 sysval = 0;
907 return;
910 debug_flush_files ();
911 errno = 0;
912 pin = popen (ARG (1), "r");
913 if (pin == NULL)
915 M4ERROR ((warning_status, errno,
916 "cannot open pipe to command `%s'", ARG (1)));
917 sysval = -1;
919 else
921 while ((ch = getc (pin)) != EOF)
922 obstack_1grow (obs, (char) ch);
923 sysval = pclose (pin);
927 static void
928 m4_sysval (struct obstack *obs, int argc, token_data **argv)
930 shipout_int (obs, (sysval == -1 ? 127
931 : (M4SYSVAL_EXITBITS (sysval)
932 | M4SYSVAL_TERMSIGBITS (sysval))));
935 /*-------------------------------------------------------------------------.
936 | This section contains the top level code for the "eval" builtin. The |
937 | actual work is done in the function evaluate (), which lives in eval.c. |
938 `-------------------------------------------------------------------------*/
940 static void
941 m4_eval (struct obstack *obs, int argc, token_data **argv)
943 int32_t value = 0;
944 int radix = 10;
945 int min = 1;
946 const char *s;
948 if (bad_argc (argv[0], argc, 2, 4))
949 return;
951 if (*ARG (2) && !numeric_arg (argv[0], ARG (2), &radix))
952 return;
954 if (radix < 1 || radix > (int) strlen (digits))
956 M4ERROR ((warning_status, 0,
957 "radix %d in builtin `%s' out of range",
958 radix, ARG (0)));
959 return;
962 if (argc >= 4 && !numeric_arg (argv[0], ARG (3), &min))
963 return;
964 if (min < 0)
966 M4ERROR ((warning_status, 0,
967 "negative width to builtin `%s'", ARG (0)));
968 return;
971 if (!*ARG (1))
972 M4ERROR ((warning_status, 0,
973 "empty string treated as 0 in builtin `%s'", ARG (0)));
974 else if (evaluate (ARG (1), &value))
975 return;
977 if (radix == 1)
979 if (value < 0)
981 obstack_1grow (obs, '-');
982 value = -value;
984 /* This assumes 2's-complement for correctly handling INT_MIN. */
985 while (min-- - value > 0)
986 obstack_1grow (obs, '0');
987 while (value-- != 0)
988 obstack_1grow (obs, '1');
989 obstack_1grow (obs, '\0');
990 return;
993 s = ntoa (value, radix);
995 if (*s == '-')
997 obstack_1grow (obs, '-');
998 s++;
1000 for (min -= strlen (s); --min >= 0;)
1001 obstack_1grow (obs, '0');
1003 obstack_grow (obs, s, strlen (s));
1006 static void
1007 m4_incr (struct obstack *obs, int argc, token_data **argv)
1009 int value;
1011 if (bad_argc (argv[0], argc, 2, 2))
1012 return;
1014 if (!numeric_arg (argv[0], ARG (1), &value))
1015 return;
1017 shipout_int (obs, value + 1);
1020 static void
1021 m4_decr (struct obstack *obs, int argc, token_data **argv)
1023 int value;
1025 if (bad_argc (argv[0], argc, 2, 2))
1026 return;
1028 if (!numeric_arg (argv[0], ARG (1), &value))
1029 return;
1031 shipout_int (obs, value - 1);
1034 /* This section contains the macros "divert", "undivert" and "divnum" for
1035 handling diversion. The utility functions used lives in output.c. */
1037 /*-----------------------------------------------------------------------.
1038 | Divert further output to the diversion given by ARGV[1]. Out of range |
1039 | means discard further output. |
1040 `-----------------------------------------------------------------------*/
1042 static void
1043 m4_divert (struct obstack *obs, int argc, token_data **argv)
1045 int i = 0;
1047 if (bad_argc (argv[0], argc, 1, 2))
1048 return;
1050 if (argc >= 2 && !numeric_arg (argv[0], ARG (1), &i))
1051 return;
1053 make_diversion (i);
1056 /*-----------------------------------------------------.
1057 | Expand to the current diversion number, -1 if none. |
1058 `-----------------------------------------------------*/
1060 static void
1061 m4_divnum (struct obstack *obs, int argc, token_data **argv)
1063 if (bad_argc (argv[0], argc, 1, 1))
1064 return;
1065 shipout_int (obs, current_diversion);
1068 /*-----------------------------------------------------------------------.
1069 | Bring back the diversion given by the argument list. If none is |
1070 | specified, bring back all diversions. GNU specific is the option of |
1071 | undiverting named files, by passing a non-numeric argument to undivert |
1072 | (). |
1073 `-----------------------------------------------------------------------*/
1075 static void
1076 m4_undivert (struct obstack *obs, int argc, token_data **argv)
1078 int i, file;
1079 FILE *fp;
1080 char *endp;
1082 if (argc == 1)
1083 undivert_all ();
1084 else
1085 for (i = 1; i < argc; i++)
1087 file = strtol (ARG (i), &endp, 10);
1088 if (*endp == '\0' && !isspace (to_uchar (*ARG (i))))
1089 insert_diversion (file);
1090 else if (no_gnu_extensions)
1091 M4ERROR ((warning_status, 0,
1092 "non-numeric argument to builtin `%s'", ARG (0)));
1093 else
1095 fp = m4_path_search (ARG (i), NULL);
1096 if (fp != NULL)
1098 insert_file (fp);
1099 if (fclose (fp) == EOF)
1100 M4ERROR ((warning_status, errno,
1101 "error undiverting `%s'", ARG (i)));
1103 else
1104 M4ERROR ((warning_status, errno,
1105 "cannot undivert `%s'", ARG (i)));
1110 /* This section contains various macros, which does not fall into any
1111 specific group. These are "dnl", "shift", "changequote", "changecom"
1112 and "changeword". */
1114 /*------------------------------------------------------------------------.
1115 | Delete all subsequent whitespace from input. The function skip_line () |
1116 | lives in input.c. |
1117 `------------------------------------------------------------------------*/
1119 static void
1120 m4_dnl (struct obstack *obs, int argc, token_data **argv)
1122 if (bad_argc (argv[0], argc, 1, 1))
1123 return;
1125 skip_line ();
1128 /*-------------------------------------------------------------------------.
1129 | Shift all argument one to the left, discarding the first argument. Each |
1130 | output argument is quoted with the current quotes. |
1131 `-------------------------------------------------------------------------*/
1133 static void
1134 m4_shift (struct obstack *obs, int argc, token_data **argv)
1136 if (bad_argc (argv[0], argc, 2, -1))
1137 return;
1138 dump_args (obs, argc - 1, argv + 1, ",", true);
1141 /*--------------------------------------------------------------------------.
1142 | Change the current quotes. The function set_quotes () lives in input.c. |
1143 `--------------------------------------------------------------------------*/
1145 static void
1146 m4_changequote (struct obstack *obs, int argc, token_data **argv)
1148 if (bad_argc (argv[0], argc, 1, 3))
1149 return;
1151 /* Explicit NULL distinguishes between empty and missing argument. */
1152 set_quotes ((argc >= 2) ? TOKEN_DATA_TEXT (argv[1]) : NULL,
1153 (argc >= 3) ? TOKEN_DATA_TEXT (argv[2]) : NULL);
1156 /*--------------------------------------------------------------------.
1157 | Change the current comment delimiters. The function set_comment () |
1158 | lives in input.c. |
1159 `--------------------------------------------------------------------*/
1161 static void
1162 m4_changecom (struct obstack *obs, int argc, token_data **argv)
1164 if (bad_argc (argv[0], argc, 1, 3))
1165 return;
1167 /* Explicit NULL distinguishes between empty and missing argument. */
1168 set_comment ((argc >= 2) ? TOKEN_DATA_TEXT (argv[1]) : NULL,
1169 (argc >= 3) ? TOKEN_DATA_TEXT (argv[2]) : NULL);
1172 #ifdef ENABLE_CHANGEWORD
1174 /*-----------------------------------------------------------------------.
1175 | Change the regular expression used for breaking the input into words. |
1176 | The function set_word_regexp () lives in input.c. |
1177 `-----------------------------------------------------------------------*/
1179 static void
1180 m4_changeword (struct obstack *obs, int argc, token_data **argv)
1182 if (bad_argc (argv[0], argc, 2, 2))
1183 return;
1185 set_word_regexp (TOKEN_DATA_TEXT (argv[1]));
1188 #endif /* ENABLE_CHANGEWORD */
1190 /* This section contains macros for inclusion of other files -- "include"
1191 and "sinclude". This differs from bringing back diversions, in that
1192 the input is scanned before being copied to the output. */
1194 /*-------------------------------------------------------------------------.
1195 | Generic include function. Include the file given by the first argument, |
1196 | if it exists. Complain about inaccesible files iff SILENT is false. |
1197 `-------------------------------------------------------------------------*/
1199 static void
1200 include (int argc, token_data **argv, bool silent)
1202 FILE *fp;
1203 char *name;
1205 if (bad_argc (argv[0], argc, 2, 2))
1206 return;
1208 fp = m4_path_search (ARG (1), &name);
1209 if (fp == NULL)
1211 if (!silent)
1213 M4ERROR ((warning_status, errno, "cannot open `%s'", ARG (1)));
1214 retcode = EXIT_FAILURE;
1216 return;
1219 push_file (fp, name, true);
1220 free (name);
1223 /*------------------------------------------------.
1224 | Include a file, complaining in case of errors. |
1225 `------------------------------------------------*/
1227 static void
1228 m4_include (struct obstack *obs, int argc, token_data **argv)
1230 include (argc, argv, false);
1233 /*----------------------------------.
1234 | Include a file, ignoring errors. |
1235 `----------------------------------*/
1237 static void
1238 m4_sinclude (struct obstack *obs, int argc, token_data **argv)
1240 include (argc, argv, true);
1243 /* More miscellaneous builtins -- "maketemp", "errprint", "__file__",
1244 "__line__", and "__program__". The last three are GNU specific. */
1246 /*------------------------------------------------------------------.
1247 | Use the first argument as at template for a temporary file name. |
1248 `------------------------------------------------------------------*/
1250 /* Add trailing 'X' to NAME if necessary, securely create the file,
1251 and place the new file name on OBS. */
1252 static void
1253 mkstemp_helper (struct obstack *obs, const char *name)
1255 int fd;
1256 int len;
1257 int i;
1259 /* Guarantee that there are six trailing 'X' characters, even if the
1260 user forgot to supply them. */
1261 len = strlen (name);
1262 obstack_grow (obs, name, len);
1263 for (i = 0; len > 0 && i < 6; i++)
1264 if (name[--len] != 'X')
1265 break;
1266 for (; i < 6; i++)
1267 obstack_1grow (obs, 'X');
1268 obstack_1grow (obs, '\0');
1270 errno = 0;
1271 fd = mkstemp ((char *) obstack_base (obs));
1272 if (fd < 0)
1274 M4ERROR ((0, errno, "cannot create tempfile `%s'", name));
1275 obstack_free (obs, obstack_finish (obs));
1277 else
1278 close (fd);
1281 static void
1282 m4_maketemp (struct obstack *obs, int argc, token_data **argv)
1284 if (bad_argc (argv[0], argc, 2, 2))
1285 return;
1286 if (no_gnu_extensions)
1288 /* POSIX states "any trailing 'X' characters [are] replaced with
1289 the current process ID as a string", without referencing the
1290 file system. Horribly insecure, but we have to do it when we
1291 are in traditional mode.
1293 For reference, Solaris m4 does:
1294 maketemp() -> `'
1295 maketemp(X) -> `X'
1296 maketemp(XX) -> `Xn', where n is last digit of pid
1297 maketemp(XXXXXXXX) -> `X00nnnnn', where nnnnn is 16-bit pid
1299 const char *str = ARG (1);
1300 int len = strlen (str);
1301 int i;
1302 int len2;
1304 M4ERROR ((warning_status, 0, "recommend using mkstemp instead"));
1305 for (i = len; i > 1; i--)
1306 if (str[i - 1] != 'X')
1307 break;
1308 obstack_grow (obs, str, i);
1309 str = ntoa ((int32_t) getpid (), 10);
1310 len2 = strlen (str);
1311 if (len2 > len - i)
1312 obstack_grow0 (obs, str + len2 - (len - i), len - i);
1313 else
1315 while (i++ < len - len2)
1316 obstack_1grow (obs, '0');
1317 obstack_grow0 (obs, str, len2);
1320 else
1321 mkstemp_helper (obs, ARG (1));
1324 static void
1325 m4_mkstemp (struct obstack *obs, int argc, token_data **argv)
1327 if (bad_argc (argv[0], argc, 2, 2))
1328 return;
1329 mkstemp_helper (obs, ARG (1));
1332 /*----------------------------------------.
1333 | Print all arguments on standard error. |
1334 `----------------------------------------*/
1336 static void
1337 m4_errprint (struct obstack *obs, int argc, token_data **argv)
1339 if (bad_argc (argv[0], argc, 2, -1))
1340 return;
1341 dump_args (obs, argc, argv, " ", false);
1342 obstack_1grow (obs, '\0');
1343 debug_flush_files ();
1344 fprintf (stderr, "%s", (char *) obstack_finish (obs));
1345 fflush (stderr);
1348 static void
1349 m4___file__ (struct obstack *obs, int argc, token_data **argv)
1351 if (bad_argc (argv[0], argc, 1, 1))
1352 return;
1353 obstack_grow (obs, lquote.string, lquote.length);
1354 obstack_grow (obs, current_file, strlen (current_file));
1355 obstack_grow (obs, rquote.string, rquote.length);
1358 static void
1359 m4___line__ (struct obstack *obs, int argc, token_data **argv)
1361 if (bad_argc (argv[0], argc, 1, 1))
1362 return;
1363 shipout_int (obs, current_line);
1366 static void
1367 m4___program__ (struct obstack *obs, int argc, token_data **argv)
1369 if (bad_argc (argv[0], argc, 1, 1))
1370 return;
1371 obstack_grow (obs, lquote.string, lquote.length);
1372 obstack_grow (obs, program_name, strlen (program_name));
1373 obstack_grow (obs, rquote.string, rquote.length);
1376 /* This section contains various macros for exiting, saving input until
1377 EOF is seen, and tracing macro calls. That is: "m4exit", "m4wrap",
1378 "traceon" and "traceoff". */
1380 /*-------------------------------------------------------------------------.
1381 | Exit immediately, with exitcode specified by the first argument, 0 if no |
1382 | arguments are present. |
1383 `-------------------------------------------------------------------------*/
1385 static void
1386 m4_m4exit (struct obstack *obs, int argc, token_data **argv)
1388 int exit_code = EXIT_SUCCESS;
1390 /* Warn on bad arguments, but still exit. */
1391 bad_argc (argv[0], argc, 1, 2);
1392 if (argc >= 2 && !numeric_arg (argv[0], ARG (1), &exit_code))
1393 exit_code = EXIT_FAILURE;
1394 if (exit_code < 0 || exit_code > 255)
1396 M4ERROR ((warning_status, 0,
1397 "exit status out of range: `%d'", exit_code));
1398 exit_code = EXIT_FAILURE;
1400 /* Change debug stream back to stderr, to force flushing debug stream and
1401 detect any errors it might have encountered. */
1402 debug_set_output (NULL);
1403 debug_flush_files ();
1404 if (exit_code == EXIT_SUCCESS && retcode != EXIT_SUCCESS)
1405 exit_code = retcode;
1406 /* Propagate non-zero status to atexit handlers. */
1407 if (exit_code != EXIT_SUCCESS)
1408 exit_failure = exit_code;
1409 exit (exit_code);
1412 /*-------------------------------------------------------------------------.
1413 | Save the argument text until EOF has been seen, allowing for user |
1414 | specified cleanup action. GNU version saves all arguments, the standard |
1415 | version only the first. |
1416 `-------------------------------------------------------------------------*/
1418 static void
1419 m4_m4wrap (struct obstack *obs, int argc, token_data **argv)
1421 if (bad_argc (argv[0], argc, 2, -1))
1422 return;
1423 if (no_gnu_extensions)
1424 obstack_grow (obs, ARG (1), strlen (ARG (1)));
1425 else
1426 dump_args (obs, argc, argv, " ", false);
1427 obstack_1grow (obs, '\0');
1428 push_wrapup ((char *) obstack_finish (obs));
1431 /* Enable tracing of all specified macros, or all, if none is specified.
1432 Tracing is disabled by default, when a macro is defined. This can be
1433 overridden by the "t" debug flag. */
1435 /*-----------------------------------------------------------------------.
1436 | Set_trace () is used by "traceon" and "traceoff" to enable and disable |
1437 | tracing of a macro. It disables tracing if DATA is NULL, otherwise it |
1438 | enable tracing. |
1439 `-----------------------------------------------------------------------*/
1441 static void
1442 set_trace (symbol *sym, void *data)
1444 SYMBOL_TRACED (sym) = data != NULL;
1445 /* Remove placeholder from table if macro is undefined and untraced. */
1446 if (SYMBOL_TYPE (sym) == TOKEN_VOID && data == NULL)
1447 lookup_symbol (SYMBOL_NAME (sym), SYMBOL_POPDEF);
1450 static void
1451 m4_traceon (struct obstack *obs, int argc, token_data **argv)
1453 symbol *s;
1454 int i;
1456 if (argc == 1)
1457 hack_all_symbols (set_trace, obs);
1458 else
1459 for (i = 1; i < argc; i++)
1461 s = lookup_symbol (TOKEN_DATA_TEXT (argv[i]), SYMBOL_INSERT);
1462 set_trace (s, obs);
1466 /*------------------------------------------------------------------------.
1467 | Disable tracing of all specified macros, or all, if none is specified. |
1468 `------------------------------------------------------------------------*/
1470 static void
1471 m4_traceoff (struct obstack *obs, int argc, token_data **argv)
1473 symbol *s;
1474 int i;
1476 if (argc == 1)
1477 hack_all_symbols (set_trace, NULL);
1478 else
1479 for (i = 1; i < argc; i++)
1481 s = lookup_symbol (TOKEN_DATA_TEXT (argv[i]), SYMBOL_LOOKUP);
1482 if (s != NULL)
1483 set_trace (s, NULL);
1487 /*----------------------------------------------------------------------.
1488 | On-the-fly control of the format of the tracing output. It takes one |
1489 | argument, which is a character string like given to the -d option, or |
1490 | none in which case the debug_level is zeroed. |
1491 `----------------------------------------------------------------------*/
1493 static void
1494 m4_debugmode (struct obstack *obs, int argc, token_data **argv)
1496 int new_debug_level;
1497 int change_flag;
1499 if (bad_argc (argv[0], argc, 1, 2))
1500 return;
1502 if (argc == 1)
1503 debug_level = 0;
1504 else
1506 if (ARG (1)[0] == '+' || ARG (1)[0] == '-')
1508 change_flag = ARG (1)[0];
1509 new_debug_level = debug_decode (ARG (1) + 1);
1511 else
1513 change_flag = 0;
1514 new_debug_level = debug_decode (ARG (1));
1517 if (new_debug_level < 0)
1518 M4ERROR ((warning_status, 0,
1519 "Debugmode: bad debug flags: `%s'", ARG (1)));
1520 else
1522 switch (change_flag)
1524 case 0:
1525 debug_level = new_debug_level;
1526 break;
1528 case '+':
1529 debug_level |= new_debug_level;
1530 break;
1532 case '-':
1533 debug_level &= ~new_debug_level;
1534 break;
1540 /*-------------------------------------------------------------------------.
1541 | Specify the destination of the debugging output. With one argument, the |
1542 | argument is taken as a file name, with no arguments, revert to stderr. |
1543 `-------------------------------------------------------------------------*/
1545 static void
1546 m4_debugfile (struct obstack *obs, int argc, token_data **argv)
1548 if (bad_argc (argv[0], argc, 1, 2))
1549 return;
1551 if (argc == 1)
1552 debug_set_output (NULL);
1553 else if (!debug_set_output (ARG (1)))
1554 M4ERROR ((warning_status, errno,
1555 "cannot set error file: `%s'", ARG (1)));
1558 /* This section contains text processing macros: "len", "index",
1559 "substr", "translit", "format", "regexp" and "patsubst". The last
1560 three are GNU specific. */
1562 /*---------------------------------------------.
1563 | Expand to the length of the first argument. |
1564 `---------------------------------------------*/
1566 static void
1567 m4_len (struct obstack *obs, int argc, token_data **argv)
1569 if (bad_argc (argv[0], argc, 2, 2))
1570 return;
1571 shipout_int (obs, strlen (ARG (1)));
1574 /*-------------------------------------------------------------------------.
1575 | The macro expands to the first index of the second argument in the first |
1576 | argument. |
1577 `-------------------------------------------------------------------------*/
1579 static void
1580 m4_index (struct obstack *obs, int argc, token_data **argv)
1582 const char *haystack;
1583 const char *result;
1584 int retval;
1586 if (bad_argc (argv[0], argc, 3, 3))
1588 /* builtin(`index') is blank, but index(`abc') is 0. */
1589 if (argc == 2)
1590 shipout_int (obs, 0);
1591 return;
1594 haystack = ARG (1);
1595 result = strstr (haystack, ARG (2));
1596 retval = result ? result - haystack : -1;
1598 shipout_int (obs, retval);
1601 /*-------------------------------------------------------------------------.
1602 | The macro "substr" extracts substrings from the first argument, starting |
1603 | from the index given by the second argument, extending for a length |
1604 | given by the third argument. If the third argument is missing, the |
1605 | substring extends to the end of the first argument. |
1606 `-------------------------------------------------------------------------*/
1608 static void
1609 m4_substr (struct obstack *obs, int argc, token_data **argv)
1611 int start = 0;
1612 int length, avail;
1614 if (bad_argc (argv[0], argc, 3, 4))
1616 /* builtin(`substr') is blank, but substr(`abc') is abc. */
1617 if (argc == 2)
1618 obstack_grow (obs, ARG (1), strlen (ARG (1)));
1619 return;
1622 length = avail = strlen (ARG (1));
1623 if (!numeric_arg (argv[0], ARG (2), &start))
1624 return;
1626 if (argc >= 4 && !numeric_arg (argv[0], ARG (3), &length))
1627 return;
1629 if (start < 0 || length <= 0 || start >= avail)
1630 return;
1632 if (start + length > avail)
1633 length = avail - start;
1634 obstack_grow (obs, ARG (1) + start, length);
1637 /*------------------------------------------------------------------------.
1638 | For "translit", ranges are allowed in the second and third argument. |
1639 | They are expanded in the following function, and the expanded strings, |
1640 | without any ranges left, are used to translate the characters of the |
1641 | first argument. A single - (dash) can be included in the strings by |
1642 | being the first or the last character in the string. If the first |
1643 | character in a range is after the first in the character set, the range |
1644 | is made backwards, thus 9-0 is the string 9876543210. |
1645 `------------------------------------------------------------------------*/
1647 static const char *
1648 expand_ranges (const char *s, struct obstack *obs)
1650 unsigned char from;
1651 unsigned char to;
1653 for (from = '\0'; *s != '\0'; from = to_uchar (*s++))
1655 if (*s == '-' && from != '\0')
1657 to = to_uchar (*++s);
1658 if (to == '\0')
1660 /* trailing dash */
1661 obstack_1grow (obs, '-');
1662 break;
1664 else if (from <= to)
1666 while (from++ < to)
1667 obstack_1grow (obs, from);
1669 else
1671 while (--from >= to)
1672 obstack_1grow (obs, from);
1675 else
1676 obstack_1grow (obs, *s);
1678 obstack_1grow (obs, '\0');
1679 return (char *) obstack_finish (obs);
1682 /*----------------------------------------------------------------------.
1683 | The macro "translit" translates all characters in the first argument, |
1684 | which are present in the second argument, into the corresponding |
1685 | character from the third argument. If the third argument is shorter |
1686 | than the second, the extra characters in the second argument, are |
1687 | deleted from the first (pueh). |
1688 `----------------------------------------------------------------------*/
1690 static void
1691 m4_translit (struct obstack *obs, int argc, token_data **argv)
1693 const char *data;
1694 const char *from;
1695 const char *to;
1696 char map[256] = {0};
1697 char found[256] = {0};
1698 unsigned char ch;
1700 if (bad_argc (argv[0], argc, 3, 4))
1702 /* builtin(`translit') is blank, but translit(`abc') is abc. */
1703 if (argc == 2)
1704 obstack_grow (obs, ARG (1), strlen (ARG (1)));
1705 return;
1708 from = ARG (2);
1709 if (strchr (from, '-') != NULL)
1711 from = expand_ranges (from, obs);
1712 if (from == NULL)
1713 return;
1716 to = ARG (3);
1717 if (strchr (to, '-') != NULL)
1719 to = expand_ranges (to, obs);
1720 if (to == NULL)
1721 return;
1724 /* Calling strchr(from) for each character in data is quadratic,
1725 since both strings can be arbitrarily long. Instead, create a
1726 from-to mapping in one pass of from, then use that map in one
1727 pass of data, for linear behavior. Traditional behavior is that
1728 only the first instance of a character in from is consulted,
1729 hence the found map. */
1730 for ( ; (ch = *from) != '\0'; from++)
1732 if (! found[ch])
1734 found[ch] = 1;
1735 map[ch] = *to;
1737 if (*to != '\0')
1738 to++;
1741 for (data = ARG (1); (ch = *data) != '\0'; data++)
1743 if (! found[ch])
1744 obstack_1grow (obs, ch);
1745 else if (map[ch])
1746 obstack_1grow (obs, map[ch]);
1750 /*----------------------------------------------------------------------.
1751 | Frontend for printf like formatting. The function format () lives in |
1752 | the file format.c. |
1753 `----------------------------------------------------------------------*/
1755 static void
1756 m4_format (struct obstack *obs, int argc, token_data **argv)
1758 if (bad_argc (argv[0], argc, 2, -1))
1759 return;
1760 format (obs, argc - 1, argv + 1);
1763 /*-------------------------------------------------------------------------.
1764 | Function to perform substitution by regular expressions. Used by the |
1765 | builtins regexp and patsubst. The changed text is placed on the |
1766 | obstack. The substitution is REPL, with \& substituted by this part of |
1767 | VICTIM matched by the last whole regular expression, taken from REGS[0], |
1768 | and \N substituted by the text matched by the Nth parenthesized |
1769 | sub-expression, taken from REGS[N]. |
1770 `-------------------------------------------------------------------------*/
1772 static int substitute_warned = 0;
1774 static void
1775 substitute (struct obstack *obs, const char *victim, const char *repl,
1776 struct re_registers *regs)
1778 int ch;
1780 for (;;)
1782 while ((ch = *repl++) != '\\')
1784 if (ch == '\0')
1785 return;
1786 obstack_1grow (obs, ch);
1789 switch ((ch = *repl++))
1791 case '0':
1792 if (!substitute_warned)
1794 M4ERROR ((warning_status, 0, "\
1795 Warning: \\0 will disappear, use \\& instead in replacements"));
1796 substitute_warned = 1;
1798 /* Fall through. */
1800 case '&':
1801 obstack_grow (obs, victim + regs->start[0],
1802 regs->end[0] - regs->start[0]);
1803 break;
1805 case '1': case '2': case '3': case '4': case '5': case '6':
1806 case '7': case '8': case '9':
1807 ch -= '0';
1808 if (regs->num_regs - 1 <= ch)
1809 M4ERROR ((warning_status, 0, "\
1810 Warning: sub-expression %d not present", ch));
1811 else if (regs->end[ch] > 0)
1812 obstack_grow (obs, victim + regs->start[ch],
1813 regs->end[ch] - regs->start[ch]);
1814 break;
1816 case '\0':
1817 M4ERROR ((warning_status, 0, "\
1818 Warning: trailing \\ ignored in replacement"));
1819 return;
1821 default:
1822 obstack_1grow (obs, ch);
1823 break;
1828 /*------------------------------------------.
1829 | Initialize regular expression variables. |
1830 `------------------------------------------*/
1832 static void
1833 init_pattern_buffer (struct re_pattern_buffer *buf, struct re_registers *regs)
1835 buf->translate = NULL;
1836 buf->fastmap = NULL;
1837 buf->buffer = NULL;
1838 buf->allocated = 0;
1839 regs->start = NULL;
1840 regs->end = NULL;
1843 /*----------------------------------------.
1844 | Clean up regular expression variables. |
1845 `----------------------------------------*/
1847 static void
1848 free_pattern_buffer (struct re_pattern_buffer *buf, struct re_registers *regs)
1850 regfree (buf);
1851 free (regs->start);
1852 free (regs->end);
1855 /*--------------------------------------------------------------------------.
1856 | Regular expression version of index. Given two arguments, expand to the |
1857 | index of the first match of the second argument (a regexp) in the first. |
1858 | Expand to -1 if here is no match. Given a third argument, is changes |
1859 | the expansion to this argument. |
1860 `--------------------------------------------------------------------------*/
1862 static void
1863 m4_regexp (struct obstack *obs, int argc, token_data **argv)
1865 const char *victim; /* first argument */
1866 const char *regexp; /* regular expression */
1867 const char *repl; /* replacement string */
1869 struct re_pattern_buffer buf; /* compiled regular expression */
1870 struct re_registers regs; /* for subexpression matches */
1871 const char *msg; /* error message from re_compile_pattern */
1872 int startpos; /* start position of match */
1873 int length; /* length of first argument */
1875 if (bad_argc (argv[0], argc, 3, 4))
1877 /* builtin(`regexp') is blank, but regexp(`abc') is 0. */
1878 if (argc == 2)
1879 shipout_int (obs, 0);
1880 return;
1883 victim = TOKEN_DATA_TEXT (argv[1]);
1884 regexp = TOKEN_DATA_TEXT (argv[2]);
1886 init_pattern_buffer (&buf, &regs);
1887 msg = re_compile_pattern (regexp, strlen (regexp), &buf);
1889 if (msg != NULL)
1891 M4ERROR ((warning_status, 0,
1892 "bad regular expression: `%s': %s", regexp, msg));
1893 free_pattern_buffer (&buf, &regs);
1894 return;
1897 length = strlen (victim);
1898 /* Avoid overhead of allocating regs if we won't use it. */
1899 startpos = re_search (&buf, victim, length, 0, length,
1900 argc == 3 ? NULL : &regs);
1902 if (startpos == -2)
1903 M4ERROR ((warning_status, 0,
1904 "error matching regular expression `%s'", regexp));
1905 else if (argc == 3)
1906 shipout_int (obs, startpos);
1907 else if (startpos >= 0)
1909 repl = TOKEN_DATA_TEXT (argv[3]);
1910 substitute (obs, victim, repl, &regs);
1913 free_pattern_buffer (&buf, &regs);
1916 /*--------------------------------------------------------------------------.
1917 | Substitute all matches of a regexp occuring in a string. Each match of |
1918 | the second argument (a regexp) in the first argument is changed to the |
1919 | third argument, with \& substituted by the matched text, and \N |
1920 | substituted by the text matched by the Nth parenthesized sub-expression. |
1921 `--------------------------------------------------------------------------*/
1923 static void
1924 m4_patsubst (struct obstack *obs, int argc, token_data **argv)
1926 const char *victim; /* first argument */
1927 const char *regexp; /* regular expression */
1929 struct re_pattern_buffer buf; /* compiled regular expression */
1930 struct re_registers regs; /* for subexpression matches */
1931 const char *msg; /* error message from re_compile_pattern */
1932 int matchpos; /* start position of match */
1933 int offset; /* current match offset */
1934 int length; /* length of first argument */
1936 if (bad_argc (argv[0], argc, 3, 4))
1938 /* builtin(`patsubst') is blank, but patsubst(`abc') is abc. */
1939 if (argc == 2)
1940 obstack_grow (obs, ARG (1), strlen (ARG (1)));
1941 return;
1944 regexp = TOKEN_DATA_TEXT (argv[2]);
1946 init_pattern_buffer (&buf, &regs);
1947 msg = re_compile_pattern (regexp, strlen (regexp), &buf);
1949 if (msg != NULL)
1951 M4ERROR ((warning_status, 0,
1952 "bad regular expression `%s': %s", regexp, msg));
1953 free (buf.buffer);
1954 return;
1957 victim = TOKEN_DATA_TEXT (argv[1]);
1958 length = strlen (victim);
1960 offset = 0;
1961 matchpos = 0;
1962 while (offset <= length)
1964 matchpos = re_search (&buf, victim, length,
1965 offset, length - offset, &regs);
1966 if (matchpos < 0)
1969 /* Match failed -- either error or there is no match in the
1970 rest of the string, in which case the rest of the string is
1971 copied verbatim. */
1973 if (matchpos == -2)
1974 M4ERROR ((warning_status, 0,
1975 "error matching regular expression `%s'", regexp));
1976 else if (offset < length)
1977 obstack_grow (obs, victim + offset, length - offset);
1978 break;
1981 /* Copy the part of the string that was skipped by re_search (). */
1983 if (matchpos > offset)
1984 obstack_grow (obs, victim + offset, matchpos - offset);
1986 /* Handle the part of the string that was covered by the match. */
1988 substitute (obs, victim, ARG (3), &regs);
1990 /* Update the offset to the end of the match. If the regexp
1991 matched a null string, advance offset one more, to avoid
1992 infinite loops. */
1994 offset = regs.end[0];
1995 if (regs.start[0] == regs.end[0])
1996 obstack_1grow (obs, victim[offset++]);
1998 obstack_1grow (obs, '\0');
2000 free_pattern_buffer (&buf, &regs);
2003 /* Finally, a placeholder builtin. This builtin is not installed by
2004 default, but when reading back frozen files, this is associated
2005 with any builtin we don't recognize (for example, if the frozen
2006 file was created with a changeword capable m4, but is then loaded
2007 by a different m4 that does not support changeword). This way, we
2008 can keep 'm4 -R' quiet in the common case that the user did not
2009 know or care about the builtin when the frozen file was created,
2010 while still flagging it as a potential error if an attempt is made
2011 to actually use the builtin. */
2013 /*--------------------------------------------------------------------.
2014 | Issue a warning that this macro is a placeholder for an unsupported |
2015 | builtin that was requested while reloading a frozen file. |
2016 `--------------------------------------------------------------------*/
2018 void
2019 m4_placeholder (struct obstack *obs, int argc, token_data **argv)
2021 M4ERROR ((warning_status, 0, "\
2022 builtin `%s' requested by frozen file is not supported", ARG (0)));
2025 /*-------------------------------------------------------------------------.
2026 | This function handles all expansion of user defined and predefined |
2027 | macros. It is called with an obstack OBS, where the macros expansion |
2028 | will be placed, as an unfinished object. SYM points to the macro |
2029 | definition, giving the expansion text. ARGC and ARGV are the arguments, |
2030 | as usual. |
2031 `-------------------------------------------------------------------------*/
2033 void
2034 expand_user_macro (struct obstack *obs, symbol *sym,
2035 int argc, token_data **argv)
2037 const char *text;
2038 int i;
2040 for (text = SYMBOL_TEXT (sym); *text != '\0';)
2042 if (*text != '$')
2044 obstack_1grow (obs, *text);
2045 text++;
2046 continue;
2048 text++;
2049 switch (*text)
2051 case '0': case '1': case '2': case '3': case '4':
2052 case '5': case '6': case '7': case '8': case '9':
2053 if (no_gnu_extensions)
2055 i = *text++ - '0';
2057 else
2059 for (i = 0; isdigit (to_uchar (*text)); text++)
2060 i = i*10 + (*text - '0');
2062 if (i < argc)
2063 obstack_grow (obs, TOKEN_DATA_TEXT (argv[i]),
2064 strlen (TOKEN_DATA_TEXT (argv[i])));
2065 break;
2067 case '#': /* number of arguments */
2068 shipout_int (obs, argc - 1);
2069 text++;
2070 break;
2072 case '*': /* all arguments */
2073 case '@': /* ... same, but quoted */
2074 dump_args (obs, argc, argv, ",", *text == '@');
2075 text++;
2076 break;
2078 default:
2079 obstack_1grow (obs, '$');
2080 break;