Stage 5: add notion of quote age.
[m4/ericb.git] / src / m4.h
blobd7b6e0889d4e2207bca9977cf20dd8d23778cbab
1 /* GNU m4 -- A simple macro processor
3 Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2004, 2005, 2006, 2007
4 Free Software Foundation, Inc.
6 This file is part of GNU M4.
8 GNU M4 is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 GNU M4 is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 /* We use <config.h> instead of "config.h" so that a compilation
23 using -I. -I$srcdir will use ./config.h rather than $srcdir/config.h
24 (which it would do because it found this file in $srcdir). */
26 #include <config.h>
28 #include <assert.h>
29 #include <ctype.h>
30 #include <errno.h>
31 #include <stdbool.h>
32 #include <stdint.h>
33 #include <string.h>
34 #include <sys/types.h>
36 #include "binary-io.h"
37 #include "clean-temp.h"
38 #include "cloexec.h"
39 #include "close-stream.h"
40 #include "closein.h"
41 #include "error.h"
42 #include "exitfail.h"
43 #include "obstack.h"
44 #include "stdio--.h"
45 #include "stdlib--.h"
46 #include "unistd--.h"
47 #include "verror.h"
48 #include "xalloc.h"
49 #include "xprintf.h"
50 #include "xvasprintf.h"
52 /* Canonicalize UNIX recognition macros. */
53 #if defined unix || defined __unix || defined __unix__ \
54 || defined _POSIX_VERSION || defined _POSIX2_VERSION \
55 || defined __NetBSD__ || defined __OpenBSD__ \
56 || defined __APPLE__ || defined __APPLE_CC__
57 # define UNIX 1
58 #endif
60 /* Canonicalize Windows recognition macros. */
61 #if (defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__
62 # define W32_NATIVE 1
63 #endif
65 /* Canonicalize OS/2 recognition macro. */
66 #ifdef __EMX__
67 # define OS2 1
68 #endif
70 /* Used for version mismatch, when -R detects a frozen file it can't parse. */
71 #define EXIT_MISMATCH 63
73 /* M4 1.4.x is not yet internationalized. But when it is, this can be
74 redefined as gettext(). */
75 #define _(STRING) STRING
77 /* Various declarations. */
79 struct string
81 char *string; /* characters of the string */
82 size_t length; /* length of the string */
84 typedef struct string STRING;
86 /* Memory allocation. */
87 #define obstack_chunk_alloc xmalloc
88 #define obstack_chunk_free free
90 /* glibc's obstack left out the ability to suspend and resume growth
91 of an object on the stack. Reopen OBJECT (previously returned by
92 obstack_alloc or obstack_finish) with SIZE for additional growth,
93 freeing all objects that occur later in the stack. */
94 #define obstack_regrow(OBS, OBJECT, SIZE) \
95 (obstack_free (OBS, (char *) (OBJECT) + (SIZE)), \
96 (OBS)->object_base = (char *) (OBJECT))
98 /* These must come first. */
99 typedef struct token_data token_data;
100 typedef struct macro_arguments macro_arguments;
101 typedef void builtin_func (struct obstack *, int, macro_arguments *);
103 /* Gnulib's stdbool doesn't work with bool bitfields. For nicer
104 debugging, use bool when we know it works, but use the more
105 portable unsigned int elsewhere. */
106 #if __GNUC__ > 2
107 typedef bool bool_bitfield;
108 #else
109 typedef unsigned int bool_bitfield;
110 #endif /* !__GNUC__ */
112 /* Take advantage of GNU C compiler source level optimization hints,
113 using portable macros. */
114 #if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ > 6)
115 # define M4_GNUC_ATTRIBUTE(args) __attribute__ (args)
116 #else
117 # define M4_GNUC_ATTRIBUTE(args)
118 #endif /* __GNUC__ */
120 #define M4_GNUC_UNUSED M4_GNUC_ATTRIBUTE ((__unused__))
121 #define M4_GNUC_PRINTF(fmt, arg) \
122 M4_GNUC_ATTRIBUTE ((__format__ (__printf__, fmt, arg)))
124 /* File: m4.c --- global definitions. */
126 /* Option flags. */
127 extern int sync_output; /* -s */
128 extern int debug_level; /* -d */
129 extern size_t hash_table_size; /* -H */
130 extern int no_gnu_extensions; /* -G */
131 extern int prefix_all_builtins; /* -P */
132 extern int max_debug_argument_length; /* -l */
133 extern int suppress_warnings; /* -Q */
134 extern int warning_status; /* -E */
135 extern int nesting_limit; /* -L */
136 #ifdef ENABLE_CHANGEWORD
137 extern const char *user_word_regexp; /* -W */
138 #endif
140 /* Error handling. */
141 extern int retcode;
142 extern const char *program_name;
144 void m4_error (int, int, const char *, const char *, ...)
145 M4_GNUC_PRINTF (4, 5);
146 void m4_error_at_line (int, int, const char *, int, const char *,
147 const char *, ...) M4_GNUC_PRINTF (6, 7);
148 void m4_warn (int, const char *, const char *, ...) M4_GNUC_PRINTF (3, 4);
149 void m4_warn_at_line (int, const char *, int, const char *,
150 const char *, ...) M4_GNUC_PRINTF (5, 6);
152 #ifdef USE_STACKOVF
153 void setup_stackovf_trap (char *const *, char *const *,
154 void (*handler) (void));
155 #endif
157 /* File: debug.c --- debugging and tracing function. */
159 extern FILE *debug;
161 /* The value of debug_level is a bitmask of the following. */
163 /* a: show arglist in trace output */
164 #define DEBUG_TRACE_ARGS 1
165 /* e: show expansion in trace output */
166 #define DEBUG_TRACE_EXPANSION 2
167 /* q: quote args and expansion in trace output */
168 #define DEBUG_TRACE_QUOTE 4
169 /* t: trace all macros -- overrides trace{on,off} */
170 #define DEBUG_TRACE_ALL 8
171 /* l: add line numbers to trace output */
172 #define DEBUG_TRACE_LINE 16
173 /* f: add file name to trace output */
174 #define DEBUG_TRACE_FILE 32
175 /* p: trace path search of include files */
176 #define DEBUG_TRACE_PATH 64
177 /* c: show macro call before args collection */
178 #define DEBUG_TRACE_CALL 128
179 /* i: trace changes of input files */
180 #define DEBUG_TRACE_INPUT 256
181 /* x: add call id to trace output */
182 #define DEBUG_TRACE_CALLID 512
184 /* V: very verbose -- print everything */
185 #define DEBUG_TRACE_VERBOSE 1023
186 /* default flags -- equiv: aeq */
187 #define DEBUG_TRACE_DEFAULT 7
189 #define DEBUG_PRINT1(Fmt, Arg1) \
190 do \
192 if (debug != NULL) \
193 xfprintf (debug, Fmt, Arg1); \
195 while (0)
197 #define DEBUG_PRINT3(Fmt, Arg1, Arg2, Arg3) \
198 do \
200 if (debug != NULL) \
201 xfprintf (debug, Fmt, Arg1, Arg2, Arg3); \
203 while (0)
205 #define DEBUG_MESSAGE(Fmt) \
206 do \
208 if (debug != NULL) \
210 debug_message_prefix (); \
211 xfprintf (debug, Fmt); \
212 putc ('\n', debug); \
215 while (0)
217 #define DEBUG_MESSAGE1(Fmt, Arg1) \
218 do \
220 if (debug != NULL) \
222 debug_message_prefix (); \
223 xfprintf (debug, Fmt, Arg1); \
224 putc ('\n', debug); \
227 while (0)
229 #define DEBUG_MESSAGE2(Fmt, Arg1, Arg2) \
230 do \
232 if (debug != NULL) \
234 debug_message_prefix (); \
235 xfprintf (debug, Fmt, Arg1, Arg2); \
236 putc ('\n', debug); \
239 while (0)
241 void debug_init (void);
242 int debug_decode (const char *);
243 void debug_flush_files (void);
244 bool debug_set_output (const char *, const char *);
245 void debug_message_prefix (void);
247 void trace_prepre (const char *, int);
248 void trace_pre (const char *, int, int, macro_arguments *);
249 void trace_post (const char *, int, int, macro_arguments *, const char *);
251 /* File: input.c --- lexical definitions. */
253 typedef struct token_chain token_chain;
255 /* Various different token types. Avoid overlap with token_data_type,
256 since the shared prefix of the enumerators is a bit confusing. */
257 enum token_type
259 TOKEN_EOF = 4,/* End of file, TOKEN_VOID. */
260 TOKEN_STRING, /* Quoted string or comment, TOKEN_TEXT or TOKEN_COMP. */
261 TOKEN_WORD, /* An identifier, TOKEN_TEXT. */
262 TOKEN_OPEN, /* Active character `(', TOKEN_TEXT. */
263 TOKEN_COMMA, /* Active character `,', TOKEN_TEXT. */
264 TOKEN_CLOSE, /* Active character `)', TOKEN_TEXT. */
265 TOKEN_SIMPLE, /* Any other single character, TOKEN_TEXT. */
266 TOKEN_MACDEF /* A macro's definition (see "defn"), TOKEN_FUNC. */
269 /* The data for a token, a macro argument, and a macro definition. */
270 enum token_data_type
272 TOKEN_VOID, /* Token still being constructed, u is invalid. */
273 TOKEN_TEXT, /* Straight text, u.u_t is valid. */
274 TOKEN_FUNC, /* Builtin function definition, u.func is valid. */
275 TOKEN_COMP /* Composite argument, u.chain is valid. */
278 /* Composite tokens are built of a linked list of chains. */
279 struct token_chain
281 token_chain *next; /* Pointer to next link of chain. */
282 char *str; /* NUL-terminated string if text, else NULL. */
283 size_t len; /* Length of str, else 0. */
284 macro_arguments *argv;/* Reference to earlier $@. */
285 unsigned int index; /* Argument index within argv. */
286 bool flatten; /* True to treat builtins as text. */
289 /* The content of a token or macro argument. */
290 struct token_data
292 enum token_data_type type;
293 union
295 struct
297 /* We don't support NUL in text, yet. So len is just a
298 cache for now. But it will be essential if we ever DO
299 support NUL. */
300 size_t len;
301 char *text;
302 /* The value of quote_age when this token was scanned. If
303 this token is later encountered in the context of
304 scanning a quoted string, and quote_age has not changed,
305 then rescanning this string is provably unnecessary. If
306 zero, then this string potentially contains content that
307 might change the parse on rescan. Ignored for 0 len. */
308 unsigned int quote_age;
309 #ifdef ENABLE_CHANGEWORD
310 char *original_text;
311 #endif
313 u_t;
314 builtin_func *func;
316 /* Composite text: a linked list of straight text and $@
317 placeholders. */
318 token_chain *chain;
323 #define TOKEN_DATA_TYPE(Td) ((Td)->type)
324 #define TOKEN_DATA_LEN(Td) ((Td)->u.u_t.len)
325 #define TOKEN_DATA_TEXT(Td) ((Td)->u.u_t.text)
326 #define TOKEN_DATA_QUOTE_AGE(Td) ((Td)->u.u_t.quote_age)
327 #ifdef ENABLE_CHANGEWORD
328 # define TOKEN_DATA_ORIG_TEXT(Td) ((Td)->u.u_t.original_text)
329 #endif
330 #define TOKEN_DATA_FUNC(Td) ((Td)->u.func)
332 typedef enum token_type token_type;
333 typedef enum token_data_type token_data_type;
335 void input_init (void);
336 token_type peek_token (void);
337 token_type next_token (token_data *, int *, const char *);
338 void skip_line (const char *);
340 /* push back input */
341 void push_file (FILE *, const char *, bool);
342 void push_macro (builtin_func *);
343 struct obstack *push_string_init (void);
344 const char *push_string_finish (void);
345 void push_wrapup (const char *);
346 bool pop_wrapup (void);
348 /* current input file, and line */
349 extern const char *current_file;
350 extern int current_line;
352 /* left and right quote, begin and end comment */
353 extern STRING bcomm, ecomm;
354 extern STRING lquote, rquote;
356 #define DEF_LQUOTE "`"
357 #define DEF_RQUOTE "\'"
358 #define DEF_BCOMM "#"
359 #define DEF_ECOMM "\n"
361 void set_quotes (const char *, const char *);
362 void set_comment (const char *, const char *);
363 #ifdef ENABLE_CHANGEWORD
364 void set_word_regexp (const char *, const char *);
365 #endif
366 unsigned int quote_age (void);
367 bool safe_quotes (void);
369 /* File: output.c --- output functions. */
370 extern int current_diversion;
371 extern int output_current_line;
373 void output_init (void);
374 void output_exit (void);
375 void output_text (const char *, int);
376 void shipout_text (struct obstack *, const char *, int, int);
377 void make_diversion (int);
378 void insert_diversion (int);
379 void insert_file (FILE *);
380 void freeze_diversions (FILE *);
382 /* File symtab.c --- symbol table definitions. */
384 /* Operation modes for lookup_symbol (). */
385 enum symbol_lookup
387 SYMBOL_LOOKUP,
388 SYMBOL_INSERT,
389 SYMBOL_DELETE,
390 SYMBOL_PUSHDEF,
391 SYMBOL_POPDEF
394 /* Symbol table entry. */
395 struct symbol
397 struct symbol *next;
398 bool_bitfield traced : 1;
399 bool_bitfield shadowed : 1;
400 bool_bitfield macro_args : 1;
401 bool_bitfield blind_no_args : 1;
402 bool_bitfield deleted : 1;
403 int pending_expansions;
405 char *name;
406 token_data data; /* Type should be only TOKEN_TEXT or TOKEN_FUNC. */
409 #define SYMBOL_NEXT(S) ((S)->next)
410 #define SYMBOL_TRACED(S) ((S)->traced)
411 #define SYMBOL_SHADOWED(S) ((S)->shadowed)
412 #define SYMBOL_MACRO_ARGS(S) ((S)->macro_args)
413 #define SYMBOL_BLIND_NO_ARGS(S) ((S)->blind_no_args)
414 #define SYMBOL_DELETED(S) ((S)->deleted)
415 #define SYMBOL_PENDING_EXPANSIONS(S) ((S)->pending_expansions)
416 #define SYMBOL_NAME(S) ((S)->name)
417 #define SYMBOL_TYPE(S) (TOKEN_DATA_TYPE (&(S)->data))
418 #define SYMBOL_TEXT(S) (TOKEN_DATA_TEXT (&(S)->data))
419 #define SYMBOL_FUNC(S) (TOKEN_DATA_FUNC (&(S)->data))
421 typedef enum symbol_lookup symbol_lookup;
422 typedef struct symbol symbol;
423 typedef void hack_symbol (symbol *, void *);
425 #define HASHMAX 509 /* default, overridden by -Hsize */
427 extern symbol **symtab;
429 void free_symbol (symbol *sym);
430 void symtab_init (void);
431 symbol *lookup_symbol (const char *, symbol_lookup);
432 void hack_all_symbols (hack_symbol *, void *);
434 /* File: macro.c --- macro expansion. */
436 extern int expansion_level;
438 void expand_input (void);
439 void call_macro (symbol *, int, macro_arguments *, struct obstack *);
441 unsigned int arg_argc (macro_arguments *);
442 token_data_type arg_type (macro_arguments *, unsigned int);
443 const char *arg_text (macro_arguments *, unsigned int);
444 bool arg_equal (macro_arguments *, unsigned int, unsigned int);
445 bool arg_empty (macro_arguments *, unsigned int);
446 size_t arg_len (macro_arguments *, unsigned int);
447 builtin_func *arg_func (macro_arguments *, unsigned int);
448 macro_arguments *make_argv_ref (macro_arguments *, const char *, size_t,
449 bool, bool);
452 /* File: builtin.c --- builtins. */
454 struct builtin
456 const char *name;
457 bool_bitfield gnu_extension : 1;
458 bool_bitfield groks_macro_args : 1;
459 bool_bitfield blind_if_no_args : 1;
460 builtin_func *func;
463 struct predefined
465 const char *unix_name;
466 const char *gnu_name;
467 const char *func;
470 typedef struct builtin builtin;
471 typedef struct predefined predefined;
472 struct re_pattern_buffer;
473 struct re_registers;
475 /* The default sequence detects multi-digit parameters (obsolete after
476 1.4.x), and any use of extended arguments with the default ${}
477 syntax (new in 2.0). */
478 #define DEFAULT_MACRO_SEQUENCE "\\$\\({[^}]*}\\|[0-9][0-9]+\\)"
480 void builtin_init (void);
481 void define_builtin (const char *, const builtin *, symbol_lookup);
482 void set_macro_sequence (const char *);
483 void free_regex (void);
484 void define_user_macro (const char *, size_t, const char *, symbol_lookup);
485 void undivert_all (void);
486 void expand_user_macro (struct obstack *, symbol *, int, macro_arguments *);
487 void m4_placeholder (struct obstack *, int, macro_arguments *);
488 void init_pattern_buffer (struct re_pattern_buffer *, struct re_registers *);
489 const char *ntoa (int32_t, int);
491 const builtin *find_builtin_by_addr (builtin_func *);
492 const builtin *find_builtin_by_name (const char *);
494 /* File: path.c --- path search for include files. */
496 void include_init (void);
497 void include_env_init (void);
498 void add_include_directory (const char *);
499 FILE *m4_path_search (const char *, char **);
501 /* File: eval.c --- expression evaluation. */
503 bool evaluate (const char *, const char *, int32_t *);
505 /* File: format.c --- printf like formatting. */
507 void format (struct obstack *, int, macro_arguments *);
509 /* File: freeze.c --- frozen state files. */
511 void produce_frozen_state (const char *);
512 void reload_frozen_state (const char *);
514 /* Debugging the memory allocator. */
516 #ifdef WITH_DMALLOC
517 # define DMALLOC_FUNC_CHECK
518 # include <dmalloc.h>
519 #endif
521 /* Other debug stuff. */
523 #ifdef DEBUG
524 # define DEBUG_INCL 1
525 # define DEBUG_INPUT 1
526 # define DEBUG_MACRO 1
527 # define DEBUG_OUTPUT 1
528 # define DEBUG_REGEX 1
529 # define DEBUG_STKOVF 1
530 # define DEBUG_SYM 1
531 #endif
533 /* Convert a possibly-signed character to an unsigned character. This is
534 a bit safer than casting to unsigned char, since it catches some type
535 errors that the cast doesn't. */
536 #if HAVE_INLINE
537 static inline unsigned char to_uchar (char ch) { return ch; }
538 #else
539 # define to_uchar(C) ((unsigned char) (C))
540 #endif