Stage 10: avoid extra copying of strings and comments.
[m4/ericb.git] / src / m4.h
blobea3947ffd39a035b83b1fc03f3e6641a86436c13
1 /* GNU m4 -- A simple macro processor
3 Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2004, 2005, 2006, 2007,
4 2008 Free Software Foundation, Inc.
6 This file is part of GNU M4.
8 GNU M4 is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 GNU M4 is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 /* We use <config.h> instead of "config.h" so that a compilation
23 using -I. -I$srcdir will use ./config.h rather than $srcdir/config.h
24 (which it would do because it found this file in $srcdir). */
26 #include <config.h>
28 #include <assert.h>
29 #include <ctype.h>
30 #include <errno.h>
31 #include <limits.h>
32 #include <stdbool.h>
33 #include <stdint.h>
34 #include <string.h>
35 #include <sys/types.h>
37 #include "binary-io.h"
38 #include "clean-temp.h"
39 #include "cloexec.h"
40 #include "close-stream.h"
41 #include "closein.h"
42 #include "error.h"
43 #include "exitfail.h"
44 #include "intprops.h"
45 #include "obstack.h"
46 #include "stdio--.h"
47 #include "stdlib--.h"
48 #include "unistd--.h"
49 #include "vasnprintf.h"
50 #include "verror.h"
51 #include "xalloc.h"
52 #include "xprintf.h"
53 #include "xvasprintf.h"
55 /* Canonicalize UNIX recognition macros. */
56 #if defined unix || defined __unix || defined __unix__ \
57 || defined _POSIX_VERSION || defined _POSIX2_VERSION \
58 || defined __NetBSD__ || defined __OpenBSD__ \
59 || defined __APPLE__ || defined __APPLE_CC__
60 # define UNIX 1
61 #endif
63 /* Canonicalize Windows recognition macros. */
64 #if (defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__
65 # define W32_NATIVE 1
66 #endif
68 /* Canonicalize OS/2 recognition macro. */
69 #ifdef __EMX__
70 # define OS2 1
71 #endif
73 /* Used for version mismatch, when -R detects a frozen file it can't parse. */
74 #define EXIT_MISMATCH 63
76 /* M4 1.4.x is not yet internationalized. But when it is, this can be
77 redefined as gettext(). */
78 #define _(STRING) STRING
80 /* Various declarations. */
82 struct string
84 char *string; /* characters of the string */
85 size_t length; /* length of the string */
87 typedef struct string STRING;
89 /* Memory allocation. */
90 #define obstack_chunk_alloc xmalloc
91 #define obstack_chunk_free free
93 /* These must come first. */
94 typedef struct input_block input_block;
95 typedef struct token_data token_data;
96 typedef struct macro_arguments macro_arguments;
97 typedef void builtin_func (struct obstack *, int, macro_arguments *);
99 /* Gnulib's stdbool doesn't work with bool bitfields. For nicer
100 debugging, use bool when we know it works, but use the more
101 portable unsigned int elsewhere. */
102 #if __GNUC__ > 2
103 typedef bool bool_bitfield;
104 #else
105 typedef unsigned int bool_bitfield;
106 #endif /* !__GNUC__ */
108 /* Take advantage of GNU C compiler source level optimization hints,
109 using portable macros. */
110 #if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ > 6)
111 # define M4_GNUC_ATTRIBUTE(args) __attribute__ (args)
112 #else
113 # define M4_GNUC_ATTRIBUTE(args)
114 #endif /* __GNUC__ */
116 #define M4_GNUC_UNUSED M4_GNUC_ATTRIBUTE ((__unused__))
117 #define M4_GNUC_PRINTF(fmt, arg) \
118 M4_GNUC_ATTRIBUTE ((__format__ (__printf__, fmt, arg)))
120 /* File: m4.c --- global definitions. */
122 /* Option flags. */
123 extern int sync_output; /* -s */
124 extern int debug_level; /* -d */
125 extern size_t hash_table_size; /* -H */
126 extern int no_gnu_extensions; /* -G */
127 extern int prefix_all_builtins; /* -P */
128 extern int max_debug_argument_length; /* -l */
129 extern int suppress_warnings; /* -Q */
130 extern int warning_status; /* -E */
131 extern int nesting_limit; /* -L */
132 #ifdef ENABLE_CHANGEWORD
133 extern const char *user_word_regexp; /* -W */
134 #endif
136 /* Error handling. */
137 extern int retcode;
138 extern const char *program_name;
140 void m4_error (int, int, const char *, const char *, ...)
141 M4_GNUC_PRINTF (4, 5);
142 void m4_error_at_line (int, int, const char *, int, const char *,
143 const char *, ...) M4_GNUC_PRINTF (6, 7);
144 void m4_warn (int, const char *, const char *, ...) M4_GNUC_PRINTF (3, 4);
145 void m4_warn_at_line (int, const char *, int, const char *,
146 const char *, ...) M4_GNUC_PRINTF (5, 6);
148 #ifdef USE_STACKOVF
149 void setup_stackovf_trap (char *const *, char *const *,
150 void (*handler) (void));
151 #endif
153 /* File: debug.c --- debugging and tracing function. */
155 extern FILE *debug;
157 /* The value of debug_level is a bitmask of the following. */
159 /* a: show arglist in trace output */
160 #define DEBUG_TRACE_ARGS 1
161 /* e: show expansion in trace output */
162 #define DEBUG_TRACE_EXPANSION 2
163 /* q: quote args and expansion in trace output */
164 #define DEBUG_TRACE_QUOTE 4
165 /* t: trace all macros -- overrides trace{on,off} */
166 #define DEBUG_TRACE_ALL 8
167 /* l: add line numbers to trace output */
168 #define DEBUG_TRACE_LINE 16
169 /* f: add file name to trace output */
170 #define DEBUG_TRACE_FILE 32
171 /* p: trace path search of include files */
172 #define DEBUG_TRACE_PATH 64
173 /* c: show macro call before args collection */
174 #define DEBUG_TRACE_CALL 128
175 /* i: trace changes of input files */
176 #define DEBUG_TRACE_INPUT 256
177 /* x: add call id to trace output */
178 #define DEBUG_TRACE_CALLID 512
180 /* V: very verbose -- print everything */
181 #define DEBUG_TRACE_VERBOSE 1023
182 /* default flags -- equiv: aeq */
183 #define DEBUG_TRACE_DEFAULT 7
185 #define DEBUG_PRINT1(Fmt, Arg1) \
186 do \
188 if (debug != NULL) \
189 xfprintf (debug, Fmt, Arg1); \
191 while (0)
193 #define DEBUG_PRINT3(Fmt, Arg1, Arg2, Arg3) \
194 do \
196 if (debug != NULL) \
197 xfprintf (debug, Fmt, Arg1, Arg2, Arg3); \
199 while (0)
201 #define DEBUG_MESSAGE(Fmt) \
202 do \
204 if (debug != NULL) \
206 debug_message_prefix (); \
207 xfprintf (debug, Fmt); \
208 putc ('\n', debug); \
211 while (0)
213 #define DEBUG_MESSAGE1(Fmt, Arg1) \
214 do \
216 if (debug != NULL) \
218 debug_message_prefix (); \
219 xfprintf (debug, Fmt, Arg1); \
220 putc ('\n', debug); \
223 while (0)
225 #define DEBUG_MESSAGE2(Fmt, Arg1, Arg2) \
226 do \
228 if (debug != NULL) \
230 debug_message_prefix (); \
231 xfprintf (debug, Fmt, Arg1, Arg2); \
232 putc ('\n', debug); \
235 while (0)
237 void debug_init (void);
238 int debug_decode (const char *);
239 void debug_flush_files (void);
240 bool debug_set_output (const char *, const char *);
241 void debug_message_prefix (void);
243 void trace_prepre (const char *, int);
244 void trace_pre (const char *, int, macro_arguments *);
245 void trace_post (const char *, int, macro_arguments *,
246 const input_block *);
248 bool obstack_print (struct obstack *, const char *, size_t, int *);
250 /* File: input.c --- lexical definitions. */
252 typedef struct token_chain token_chain;
254 /* Various different token types. Avoid overlap with token_data_type,
255 since the shared prefix of the enumerators is a bit confusing. */
256 enum token_type
258 TOKEN_EOF = 4,/* End of file, TOKEN_VOID. */
259 TOKEN_STRING, /* Quoted string or comment, TOKEN_TEXT or TOKEN_COMP. */
260 TOKEN_WORD, /* An identifier, TOKEN_TEXT. */
261 TOKEN_OPEN, /* Active character `(', TOKEN_TEXT. */
262 TOKEN_COMMA, /* Active character `,', TOKEN_TEXT. */
263 TOKEN_CLOSE, /* Active character `)', TOKEN_TEXT. */
264 TOKEN_SIMPLE, /* Any other single character, TOKEN_TEXT. */
265 TOKEN_MACDEF /* A macro's definition (see "defn"), TOKEN_FUNC. */
268 /* The data for a token, a macro argument, and a macro definition. */
269 enum token_data_type
271 TOKEN_VOID, /* Token still being constructed, u is invalid. */
272 TOKEN_TEXT, /* Straight text, u.u_t is valid. */
273 TOKEN_FUNC, /* Builtin function definition, u.func is valid. */
274 TOKEN_COMP /* Composite argument, u.chain is valid. */
277 /* Composite tokens are built of a linked list of chains. */
278 struct token_chain
280 token_chain *next; /* Pointer to next link of chain. */
281 const char *str; /* NUL-terminated string if text, else NULL. */
282 size_t len; /* Length of str, else 0. */
283 int level; /* Expansion level of link content, or -1. */
284 macro_arguments *argv;/* Reference to earlier $@. */
285 unsigned int index; /* Argument index within argv. */
286 bool flatten; /* True to treat builtins as text. */
289 /* The content of a token or macro argument. */
290 struct token_data
292 enum token_data_type type;
293 union
295 struct
297 /* We don't support NUL in text, yet. So len is just a
298 cache for now. But it will be essential if we ever DO
299 support NUL. */
300 size_t len;
301 char *text; /* The contents of the token. */
302 /* The value of quote_age when this token was scanned. If
303 this token is later encountered in the context of
304 scanning a quoted string, and quote_age has not changed,
305 then rescanning this string is provably unnecessary. If
306 zero, then this string potentially contains content that
307 might change the parse on rescan. Ignored for 0 len. */
308 unsigned int quote_age;
309 #ifdef ENABLE_CHANGEWORD
310 /* If changeword is in effect, and contains a () group, then
311 this contains the entire token, while text contains the
312 portion that matched the () group to form a macro name.
313 Otherwise, this field is unused. */
314 const char *original_text;
315 #endif
317 u_t;
318 builtin_func *func;
320 /* Composite text: a linked list of straight text and $@
321 placeholders. */
322 token_chain *chain;
327 #define TOKEN_DATA_TYPE(Td) ((Td)->type)
328 #define TOKEN_DATA_LEN(Td) ((Td)->u.u_t.len)
329 #define TOKEN_DATA_TEXT(Td) ((Td)->u.u_t.text)
330 #define TOKEN_DATA_QUOTE_AGE(Td) ((Td)->u.u_t.quote_age)
331 #ifdef ENABLE_CHANGEWORD
332 # define TOKEN_DATA_ORIG_TEXT(Td) ((Td)->u.u_t.original_text)
333 #endif
334 #define TOKEN_DATA_FUNC(Td) ((Td)->u.func)
336 typedef enum token_type token_type;
337 typedef enum token_data_type token_data_type;
339 void input_init (void);
340 token_type peek_token (void);
341 token_type next_token (token_data *, int *, struct obstack *, const char *);
342 void skip_line (const char *);
344 /* push back input */
345 void push_file (FILE *, const char *, bool);
346 void push_macro (builtin_func *);
347 struct obstack *push_string_init (void);
348 bool push_token (token_data *, int);
349 const input_block *push_string_finish (void);
350 void push_wrapup (const char *);
351 bool pop_wrapup (void);
352 void input_print (struct obstack *, const input_block *);
354 /* current input file, and line */
355 extern const char *current_file;
356 extern int current_line;
358 /* left and right quote, begin and end comment */
359 extern STRING bcomm, ecomm;
360 extern STRING lquote, rquote;
362 #define DEF_LQUOTE "`"
363 #define DEF_RQUOTE "\'"
364 #define DEF_BCOMM "#"
365 #define DEF_ECOMM "\n"
367 void set_quotes (const char *, const char *);
368 void set_comment (const char *, const char *);
369 #ifdef ENABLE_CHANGEWORD
370 void set_word_regexp (const char *, const char *);
371 #endif
372 unsigned int quote_age (void);
373 bool safe_quotes (void);
375 /* File: output.c --- output functions. */
376 extern int current_diversion;
377 extern int output_current_line;
379 void output_init (void);
380 void output_exit (void);
381 void output_text (const char *, int);
382 void shipout_text (struct obstack *, const char *, int, int);
383 void make_diversion (int);
384 void insert_diversion (int);
385 void insert_file (FILE *);
386 void freeze_diversions (FILE *);
388 /* File symtab.c --- symbol table definitions. */
390 /* Operation modes for lookup_symbol (). */
391 enum symbol_lookup
393 SYMBOL_LOOKUP,
394 SYMBOL_INSERT,
395 SYMBOL_DELETE,
396 SYMBOL_PUSHDEF,
397 SYMBOL_POPDEF
400 /* Symbol table entry. */
401 struct symbol
403 struct symbol *next;
404 bool_bitfield traced : 1;
405 bool_bitfield shadowed : 1;
406 bool_bitfield macro_args : 1;
407 bool_bitfield blind_no_args : 1;
408 bool_bitfield deleted : 1;
409 int pending_expansions;
411 char *name;
412 token_data data; /* Type should be only TOKEN_TEXT or TOKEN_FUNC. */
415 #define SYMBOL_NEXT(S) ((S)->next)
416 #define SYMBOL_TRACED(S) ((S)->traced)
417 #define SYMBOL_SHADOWED(S) ((S)->shadowed)
418 #define SYMBOL_MACRO_ARGS(S) ((S)->macro_args)
419 #define SYMBOL_BLIND_NO_ARGS(S) ((S)->blind_no_args)
420 #define SYMBOL_DELETED(S) ((S)->deleted)
421 #define SYMBOL_PENDING_EXPANSIONS(S) ((S)->pending_expansions)
422 #define SYMBOL_NAME(S) ((S)->name)
423 #define SYMBOL_TYPE(S) (TOKEN_DATA_TYPE (&(S)->data))
424 #define SYMBOL_TEXT(S) (TOKEN_DATA_TEXT (&(S)->data))
425 #define SYMBOL_FUNC(S) (TOKEN_DATA_FUNC (&(S)->data))
427 typedef enum symbol_lookup symbol_lookup;
428 typedef struct symbol symbol;
429 typedef void hack_symbol (symbol *, void *);
431 #define HASHMAX 509 /* default, overridden by -Hsize */
433 extern symbol **symtab;
435 void free_symbol (symbol *sym);
436 void symtab_init (void);
437 symbol *lookup_symbol (const char *, symbol_lookup);
438 void hack_all_symbols (hack_symbol *, void *);
440 /* File: macro.c --- macro expansion. */
442 extern int expansion_level;
444 void expand_input (void);
445 void call_macro (symbol *, int, macro_arguments *, struct obstack *);
447 unsigned int arg_argc (macro_arguments *);
448 token_data_type arg_type (macro_arguments *, unsigned int);
449 const char *arg_text (macro_arguments *, unsigned int);
450 bool arg_equal (macro_arguments *, unsigned int, unsigned int);
451 bool arg_empty (macro_arguments *, unsigned int);
452 size_t arg_len (macro_arguments *, unsigned int);
453 builtin_func *arg_func (macro_arguments *, unsigned int);
454 struct obstack *arg_scratch (void);
455 macro_arguments *make_argv_ref (macro_arguments *, const char *, size_t,
456 bool, bool);
457 void push_arg (struct obstack *, macro_arguments *, unsigned int);
458 void push_args (struct obstack *, macro_arguments *, bool, bool);
459 size_t adjust_refcount (int, bool);
462 /* File: builtin.c --- builtins. */
464 struct builtin
466 const char *name;
467 bool_bitfield gnu_extension : 1;
468 bool_bitfield groks_macro_args : 1;
469 bool_bitfield blind_if_no_args : 1;
470 builtin_func *func;
473 struct predefined
475 const char *unix_name;
476 const char *gnu_name;
477 const char *func;
480 typedef struct builtin builtin;
481 typedef struct predefined predefined;
482 struct re_pattern_buffer;
483 struct re_registers;
485 /* The default sequence detects multi-digit parameters (obsolete after
486 1.4.x), and any use of extended arguments with the default ${}
487 syntax (new in 2.0). */
488 #define DEFAULT_MACRO_SEQUENCE "\\$\\({[^}]*}\\|[0-9][0-9]+\\)"
490 void builtin_init (void);
491 bool bad_argc (const char *, int, unsigned int, unsigned int);
492 void define_builtin (const char *, const builtin *, symbol_lookup);
493 void set_macro_sequence (const char *);
494 void free_regex (void);
495 void define_user_macro (const char *, size_t, const char *, symbol_lookup);
496 void undivert_all (void);
497 void expand_user_macro (struct obstack *, symbol *, int, macro_arguments *);
498 void m4_placeholder (struct obstack *, int, macro_arguments *);
499 void init_pattern_buffer (struct re_pattern_buffer *, struct re_registers *);
500 const char *ntoa (int32_t, int);
502 const builtin *find_builtin_by_addr (builtin_func *);
503 const builtin *find_builtin_by_name (const char *);
505 /* File: path.c --- path search for include files. */
507 void include_init (void);
508 void include_env_init (void);
509 void add_include_directory (const char *);
510 FILE *m4_path_search (const char *, char **);
512 /* File: eval.c --- expression evaluation. */
514 bool evaluate (const char *, const char *, int32_t *);
516 /* File: format.c --- printf like formatting. */
518 void format (struct obstack *, int, macro_arguments *);
520 /* File: freeze.c --- frozen state files. */
522 void produce_frozen_state (const char *);
523 void reload_frozen_state (const char *);
525 /* Debugging the memory allocator. */
527 #ifdef WITH_DMALLOC
528 # define DMALLOC_FUNC_CHECK
529 # include <dmalloc.h>
530 #endif
532 /* Other debug stuff. */
534 #ifdef DEBUG
535 # define DEBUG_INCL 1
536 # define DEBUG_INPUT 1
537 # define DEBUG_MACRO 1
538 # define DEBUG_OUTPUT 1
539 # define DEBUG_REGEX 1
540 # define DEBUG_STKOVF 1
541 # define DEBUG_SYM 1
542 #endif
544 /* Convert a possibly-signed character to an unsigned character. This is
545 a bit safer than casting to unsigned char, since it catches some type
546 errors that the cast doesn't. */
547 #if HAVE_INLINE
548 static inline unsigned char to_uchar (char ch) { return ch; }
549 #else
550 # define to_uchar(C) ((unsigned char) (C))
551 #endif