Stage 7: add chained token support to input parser.
[m4/ericb.git] / src / m4.h
blob111f1678069a8ac00006064b1867aa77d5a080dd
1 /* GNU m4 -- A simple macro processor
3 Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2004, 2005, 2006, 2007
4 Free Software Foundation, Inc.
6 This file is part of GNU M4.
8 GNU M4 is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 GNU M4 is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 /* We use <config.h> instead of "config.h" so that a compilation
23 using -I. -I$srcdir will use ./config.h rather than $srcdir/config.h
24 (which it would do because it found this file in $srcdir). */
26 #include <config.h>
28 #include <assert.h>
29 #include <ctype.h>
30 #include <errno.h>
31 #include <limits.h>
32 #include <stdbool.h>
33 #include <stdint.h>
34 #include <string.h>
35 #include <sys/types.h>
37 #include "binary-io.h"
38 #include "clean-temp.h"
39 #include "cloexec.h"
40 #include "close-stream.h"
41 #include "closein.h"
42 #include "error.h"
43 #include "exitfail.h"
44 #include "obstack.h"
45 #include "stdio--.h"
46 #include "stdlib--.h"
47 #include "unistd--.h"
48 #include "verror.h"
49 #include "xalloc.h"
50 #include "xprintf.h"
51 #include "xvasprintf.h"
53 /* Canonicalize UNIX recognition macros. */
54 #if defined unix || defined __unix || defined __unix__ \
55 || defined _POSIX_VERSION || defined _POSIX2_VERSION \
56 || defined __NetBSD__ || defined __OpenBSD__ \
57 || defined __APPLE__ || defined __APPLE_CC__
58 # define UNIX 1
59 #endif
61 /* Canonicalize Windows recognition macros. */
62 #if (defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__
63 # define W32_NATIVE 1
64 #endif
66 /* Canonicalize OS/2 recognition macro. */
67 #ifdef __EMX__
68 # define OS2 1
69 #endif
71 /* Used for version mismatch, when -R detects a frozen file it can't parse. */
72 #define EXIT_MISMATCH 63
74 /* M4 1.4.x is not yet internationalized. But when it is, this can be
75 redefined as gettext(). */
76 #define _(STRING) STRING
78 /* Various declarations. */
80 struct string
82 char *string; /* characters of the string */
83 size_t length; /* length of the string */
85 typedef struct string STRING;
87 /* Memory allocation. */
88 #define obstack_chunk_alloc xmalloc
89 #define obstack_chunk_free free
91 /* glibc's obstack left out the ability to suspend and resume growth
92 of an object on the stack. Reopen OBJECT (previously returned by
93 obstack_alloc or obstack_finish) with SIZE for additional growth,
94 freeing all objects that occur later in the stack. */
95 #define obstack_regrow(OBS, OBJECT, SIZE) \
96 (obstack_free (OBS, (char *) (OBJECT) + (SIZE)), \
97 (OBS)->object_base = (char *) (OBJECT))
99 /* These must come first. */
100 typedef struct input_block input_block;
101 typedef struct token_data token_data;
102 typedef struct macro_arguments macro_arguments;
103 typedef void builtin_func (struct obstack *, int, macro_arguments *);
105 /* Gnulib's stdbool doesn't work with bool bitfields. For nicer
106 debugging, use bool when we know it works, but use the more
107 portable unsigned int elsewhere. */
108 #if __GNUC__ > 2
109 typedef bool bool_bitfield;
110 #else
111 typedef unsigned int bool_bitfield;
112 #endif /* !__GNUC__ */
114 /* Take advantage of GNU C compiler source level optimization hints,
115 using portable macros. */
116 #if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ > 6)
117 # define M4_GNUC_ATTRIBUTE(args) __attribute__ (args)
118 #else
119 # define M4_GNUC_ATTRIBUTE(args)
120 #endif /* __GNUC__ */
122 #define M4_GNUC_UNUSED M4_GNUC_ATTRIBUTE ((__unused__))
123 #define M4_GNUC_PRINTF(fmt, arg) \
124 M4_GNUC_ATTRIBUTE ((__format__ (__printf__, fmt, arg)))
126 /* File: m4.c --- global definitions. */
128 /* Option flags. */
129 extern int sync_output; /* -s */
130 extern int debug_level; /* -d */
131 extern size_t hash_table_size; /* -H */
132 extern int no_gnu_extensions; /* -G */
133 extern int prefix_all_builtins; /* -P */
134 extern int max_debug_argument_length; /* -l */
135 extern int suppress_warnings; /* -Q */
136 extern int warning_status; /* -E */
137 extern int nesting_limit; /* -L */
138 #ifdef ENABLE_CHANGEWORD
139 extern const char *user_word_regexp; /* -W */
140 #endif
142 /* Error handling. */
143 extern int retcode;
144 extern const char *program_name;
146 void m4_error (int, int, const char *, const char *, ...)
147 M4_GNUC_PRINTF (4, 5);
148 void m4_error_at_line (int, int, const char *, int, const char *,
149 const char *, ...) M4_GNUC_PRINTF (6, 7);
150 void m4_warn (int, const char *, const char *, ...) M4_GNUC_PRINTF (3, 4);
151 void m4_warn_at_line (int, const char *, int, const char *,
152 const char *, ...) M4_GNUC_PRINTF (5, 6);
154 #ifdef USE_STACKOVF
155 void setup_stackovf_trap (char *const *, char *const *,
156 void (*handler) (void));
157 #endif
159 /* File: debug.c --- debugging and tracing function. */
161 extern FILE *debug;
163 /* The value of debug_level is a bitmask of the following. */
165 /* a: show arglist in trace output */
166 #define DEBUG_TRACE_ARGS 1
167 /* e: show expansion in trace output */
168 #define DEBUG_TRACE_EXPANSION 2
169 /* q: quote args and expansion in trace output */
170 #define DEBUG_TRACE_QUOTE 4
171 /* t: trace all macros -- overrides trace{on,off} */
172 #define DEBUG_TRACE_ALL 8
173 /* l: add line numbers to trace output */
174 #define DEBUG_TRACE_LINE 16
175 /* f: add file name to trace output */
176 #define DEBUG_TRACE_FILE 32
177 /* p: trace path search of include files */
178 #define DEBUG_TRACE_PATH 64
179 /* c: show macro call before args collection */
180 #define DEBUG_TRACE_CALL 128
181 /* i: trace changes of input files */
182 #define DEBUG_TRACE_INPUT 256
183 /* x: add call id to trace output */
184 #define DEBUG_TRACE_CALLID 512
186 /* V: very verbose -- print everything */
187 #define DEBUG_TRACE_VERBOSE 1023
188 /* default flags -- equiv: aeq */
189 #define DEBUG_TRACE_DEFAULT 7
191 #define DEBUG_PRINT1(Fmt, Arg1) \
192 do \
194 if (debug != NULL) \
195 xfprintf (debug, Fmt, Arg1); \
197 while (0)
199 #define DEBUG_PRINT3(Fmt, Arg1, Arg2, Arg3) \
200 do \
202 if (debug != NULL) \
203 xfprintf (debug, Fmt, Arg1, Arg2, Arg3); \
205 while (0)
207 #define DEBUG_MESSAGE(Fmt) \
208 do \
210 if (debug != NULL) \
212 debug_message_prefix (); \
213 xfprintf (debug, Fmt); \
214 putc ('\n', debug); \
217 while (0)
219 #define DEBUG_MESSAGE1(Fmt, Arg1) \
220 do \
222 if (debug != NULL) \
224 debug_message_prefix (); \
225 xfprintf (debug, Fmt, Arg1); \
226 putc ('\n', debug); \
229 while (0)
231 #define DEBUG_MESSAGE2(Fmt, Arg1, Arg2) \
232 do \
234 if (debug != NULL) \
236 debug_message_prefix (); \
237 xfprintf (debug, Fmt, Arg1, Arg2); \
238 putc ('\n', debug); \
241 while (0)
243 void debug_init (void);
244 int debug_decode (const char *);
245 void debug_flush_files (void);
246 bool debug_set_output (const char *, const char *);
247 void debug_message_prefix (void);
249 void trace_prepre (const char *, int);
250 void trace_pre (const char *, int, macro_arguments *);
251 void trace_post (const char *, int, macro_arguments *,
252 const input_block *);
254 bool obstack_print (struct obstack *, const char *, size_t, int *);
256 /* File: input.c --- lexical definitions. */
258 typedef struct token_chain token_chain;
260 /* Various different token types. Avoid overlap with token_data_type,
261 since the shared prefix of the enumerators is a bit confusing. */
262 enum token_type
264 TOKEN_EOF = 4,/* End of file, TOKEN_VOID. */
265 TOKEN_STRING, /* Quoted string or comment, TOKEN_TEXT or TOKEN_COMP. */
266 TOKEN_WORD, /* An identifier, TOKEN_TEXT. */
267 TOKEN_OPEN, /* Active character `(', TOKEN_TEXT. */
268 TOKEN_COMMA, /* Active character `,', TOKEN_TEXT. */
269 TOKEN_CLOSE, /* Active character `)', TOKEN_TEXT. */
270 TOKEN_SIMPLE, /* Any other single character, TOKEN_TEXT. */
271 TOKEN_MACDEF /* A macro's definition (see "defn"), TOKEN_FUNC. */
274 /* The data for a token, a macro argument, and a macro definition. */
275 enum token_data_type
277 TOKEN_VOID, /* Token still being constructed, u is invalid. */
278 TOKEN_TEXT, /* Straight text, u.u_t is valid. */
279 TOKEN_FUNC, /* Builtin function definition, u.func is valid. */
280 TOKEN_COMP /* Composite argument, u.chain is valid. */
283 /* Composite tokens are built of a linked list of chains. */
284 struct token_chain
286 token_chain *next; /* Pointer to next link of chain. */
287 const char *str; /* NUL-terminated string if text, else NULL. */
288 size_t len; /* Length of str, else 0. */
289 macro_arguments *argv;/* Reference to earlier $@. */
290 unsigned int index; /* Argument index within argv. */
291 bool flatten; /* True to treat builtins as text. */
294 /* The content of a token or macro argument. */
295 struct token_data
297 enum token_data_type type;
298 union
300 struct
302 /* We don't support NUL in text, yet. So len is just a
303 cache for now. But it will be essential if we ever DO
304 support NUL. */
305 size_t len;
306 char *text; /* The contents of the token. */
307 /* The value of quote_age when this token was scanned. If
308 this token is later encountered in the context of
309 scanning a quoted string, and quote_age has not changed,
310 then rescanning this string is provably unnecessary. If
311 zero, then this string potentially contains content that
312 might change the parse on rescan. Ignored for 0 len. */
313 unsigned int quote_age;
314 #ifdef ENABLE_CHANGEWORD
315 /* If changeword is in effect, and contains a () group, then
316 this contains the entire token, while text contains the
317 portion that matched the () group to form a macro name.
318 Otherwise, this field is unused. */
319 const char *original_text;
320 #endif
322 u_t;
323 builtin_func *func;
325 /* Composite text: a linked list of straight text and $@
326 placeholders. */
327 token_chain *chain;
332 #define TOKEN_DATA_TYPE(Td) ((Td)->type)
333 #define TOKEN_DATA_LEN(Td) ((Td)->u.u_t.len)
334 #define TOKEN_DATA_TEXT(Td) ((Td)->u.u_t.text)
335 #define TOKEN_DATA_QUOTE_AGE(Td) ((Td)->u.u_t.quote_age)
336 #ifdef ENABLE_CHANGEWORD
337 # define TOKEN_DATA_ORIG_TEXT(Td) ((Td)->u.u_t.original_text)
338 #endif
339 #define TOKEN_DATA_FUNC(Td) ((Td)->u.func)
341 typedef enum token_type token_type;
342 typedef enum token_data_type token_data_type;
344 void input_init (void);
345 token_type peek_token (void);
346 token_type next_token (token_data *, int *, const char *);
347 void skip_line (const char *);
349 /* push back input */
350 void push_file (FILE *, const char *, bool);
351 void push_macro (builtin_func *);
352 struct obstack *push_string_init (void);
353 void push_token (token_data *, int);
354 const input_block *push_string_finish (void);
355 void push_wrapup (const char *);
356 bool pop_wrapup (void);
357 void input_print (struct obstack *, const input_block *);
359 /* current input file, and line */
360 extern const char *current_file;
361 extern int current_line;
363 /* left and right quote, begin and end comment */
364 extern STRING bcomm, ecomm;
365 extern STRING lquote, rquote;
367 #define DEF_LQUOTE "`"
368 #define DEF_RQUOTE "\'"
369 #define DEF_BCOMM "#"
370 #define DEF_ECOMM "\n"
372 void set_quotes (const char *, const char *);
373 void set_comment (const char *, const char *);
374 #ifdef ENABLE_CHANGEWORD
375 void set_word_regexp (const char *, const char *);
376 #endif
377 unsigned int quote_age (void);
378 bool safe_quotes (void);
380 /* File: output.c --- output functions. */
381 extern int current_diversion;
382 extern int output_current_line;
384 void output_init (void);
385 void output_exit (void);
386 void output_text (const char *, int);
387 void shipout_text (struct obstack *, const char *, int, int);
388 void make_diversion (int);
389 void insert_diversion (int);
390 void insert_file (FILE *);
391 void freeze_diversions (FILE *);
393 /* File symtab.c --- symbol table definitions. */
395 /* Operation modes for lookup_symbol (). */
396 enum symbol_lookup
398 SYMBOL_LOOKUP,
399 SYMBOL_INSERT,
400 SYMBOL_DELETE,
401 SYMBOL_PUSHDEF,
402 SYMBOL_POPDEF
405 /* Symbol table entry. */
406 struct symbol
408 struct symbol *next;
409 bool_bitfield traced : 1;
410 bool_bitfield shadowed : 1;
411 bool_bitfield macro_args : 1;
412 bool_bitfield blind_no_args : 1;
413 bool_bitfield deleted : 1;
414 int pending_expansions;
416 char *name;
417 token_data data; /* Type should be only TOKEN_TEXT or TOKEN_FUNC. */
420 #define SYMBOL_NEXT(S) ((S)->next)
421 #define SYMBOL_TRACED(S) ((S)->traced)
422 #define SYMBOL_SHADOWED(S) ((S)->shadowed)
423 #define SYMBOL_MACRO_ARGS(S) ((S)->macro_args)
424 #define SYMBOL_BLIND_NO_ARGS(S) ((S)->blind_no_args)
425 #define SYMBOL_DELETED(S) ((S)->deleted)
426 #define SYMBOL_PENDING_EXPANSIONS(S) ((S)->pending_expansions)
427 #define SYMBOL_NAME(S) ((S)->name)
428 #define SYMBOL_TYPE(S) (TOKEN_DATA_TYPE (&(S)->data))
429 #define SYMBOL_TEXT(S) (TOKEN_DATA_TEXT (&(S)->data))
430 #define SYMBOL_FUNC(S) (TOKEN_DATA_FUNC (&(S)->data))
432 typedef enum symbol_lookup symbol_lookup;
433 typedef struct symbol symbol;
434 typedef void hack_symbol (symbol *, void *);
436 #define HASHMAX 509 /* default, overridden by -Hsize */
438 extern symbol **symtab;
440 void free_symbol (symbol *sym);
441 void symtab_init (void);
442 symbol *lookup_symbol (const char *, symbol_lookup);
443 void hack_all_symbols (hack_symbol *, void *);
445 /* File: macro.c --- macro expansion. */
447 extern int expansion_level;
449 void expand_input (void);
450 void call_macro (symbol *, int, macro_arguments *, struct obstack *);
452 unsigned int arg_argc (macro_arguments *);
453 token_data_type arg_type (macro_arguments *, unsigned int);
454 const char *arg_text (macro_arguments *, unsigned int);
455 bool arg_equal (macro_arguments *, unsigned int, unsigned int);
456 bool arg_empty (macro_arguments *, unsigned int);
457 size_t arg_len (macro_arguments *, unsigned int);
458 builtin_func *arg_func (macro_arguments *, unsigned int);
459 macro_arguments *make_argv_ref (macro_arguments *, const char *, size_t,
460 bool, bool);
461 void push_arg (struct obstack *, macro_arguments *, unsigned int);
462 void push_args (struct obstack *, macro_arguments *, bool, bool);
465 /* File: builtin.c --- builtins. */
467 struct builtin
469 const char *name;
470 bool_bitfield gnu_extension : 1;
471 bool_bitfield groks_macro_args : 1;
472 bool_bitfield blind_if_no_args : 1;
473 builtin_func *func;
476 struct predefined
478 const char *unix_name;
479 const char *gnu_name;
480 const char *func;
483 typedef struct builtin builtin;
484 typedef struct predefined predefined;
485 struct re_pattern_buffer;
486 struct re_registers;
488 /* The default sequence detects multi-digit parameters (obsolete after
489 1.4.x), and any use of extended arguments with the default ${}
490 syntax (new in 2.0). */
491 #define DEFAULT_MACRO_SEQUENCE "\\$\\({[^}]*}\\|[0-9][0-9]+\\)"
493 void builtin_init (void);
494 void define_builtin (const char *, const builtin *, symbol_lookup);
495 void set_macro_sequence (const char *);
496 void free_regex (void);
497 void define_user_macro (const char *, size_t, const char *, symbol_lookup);
498 void undivert_all (void);
499 void expand_user_macro (struct obstack *, symbol *, int, macro_arguments *);
500 void m4_placeholder (struct obstack *, int, macro_arguments *);
501 void init_pattern_buffer (struct re_pattern_buffer *, struct re_registers *);
502 const char *ntoa (int32_t, int);
504 const builtin *find_builtin_by_addr (builtin_func *);
505 const builtin *find_builtin_by_name (const char *);
507 /* File: path.c --- path search for include files. */
509 void include_init (void);
510 void include_env_init (void);
511 void add_include_directory (const char *);
512 FILE *m4_path_search (const char *, char **);
514 /* File: eval.c --- expression evaluation. */
516 bool evaluate (const char *, const char *, int32_t *);
518 /* File: format.c --- printf like formatting. */
520 void format (struct obstack *, int, macro_arguments *);
522 /* File: freeze.c --- frozen state files. */
524 void produce_frozen_state (const char *);
525 void reload_frozen_state (const char *);
527 /* Debugging the memory allocator. */
529 #ifdef WITH_DMALLOC
530 # define DMALLOC_FUNC_CHECK
531 # include <dmalloc.h>
532 #endif
534 /* Other debug stuff. */
536 #ifdef DEBUG
537 # define DEBUG_INCL 1
538 # define DEBUG_INPUT 1
539 # define DEBUG_MACRO 1
540 # define DEBUG_OUTPUT 1
541 # define DEBUG_REGEX 1
542 # define DEBUG_STKOVF 1
543 # define DEBUG_SYM 1
544 #endif
546 /* Convert a possibly-signed character to an unsigned character. This is
547 a bit safer than casting to unsigned char, since it catches some type
548 errors that the cast doesn't. */
549 #if HAVE_INLINE
550 static inline unsigned char to_uchar (char ch) { return ch; }
551 #else
552 # define to_uchar(C) ((unsigned char) (C))
553 #endif