1 /* GNU m4 -- A simple macro processor
2 Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2006, 2007, 2008,
3 2009 Free Software Foundation, Inc.
5 This file is part of GNU M4.
7 GNU M4 is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 GNU M4 is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 /* Handling of different input sources, and lexical analysis. */
25 #include "m4private.h"
28 #include "freadseek.h"
30 /* Define this to see runtime debug info. Implied by DEBUG. */
31 /*#define DEBUG_INPUT */
33 /* Maximum number of bytes where it is more efficient to inline the
34 reference as a string than it is to track reference bookkeeping for
36 #define INPUT_INLINE_THRESHOLD 16
39 Unread input can be either files that should be read (from the
40 command line or by include/sinclude), strings which should be
41 rescanned (normal macro expansion text), or quoted builtin
42 definitions (as returned by the builtin "defn"). Unread input is
43 organized in a stack, implemented with an obstack. Each input
44 source is described by a "struct m4_input_block". The obstack is
45 "input_stack". The top of the input stack is "isp".
47 Each input_block has an associated struct input_funcs, which is a
48 vtable that defines polymorphic functions for peeking, reading,
49 unget, cleanup, and printing in trace output. Getting a single
50 character at a time is inefficient, so there are also functions for
51 accessing the readahead buffer and consuming bulk input. All input
52 is done through the function pointers of the input_funcs on the
53 given input_block, and all characters are unsigned, to distinguish
54 between stdio EOF and between special sentinel characters. When a
55 input_block is exhausted, its reader returns CHAR_RETRY which
56 causes the input_block to be popped from the input_stack.
58 The macro "m4wrap" places the text to be saved on another input
59 stack, on the obstack "wrapup_stack", whose top is "wsp". When EOF
60 is seen on normal input (eg, when "current_input" is empty), input
61 is switched over to "wrapup_stack", and the original
62 "current_input" is freed. A new stack is allocated for
63 "wrapup_stack", which will accept any text produced by calls to
64 "m4wrap" from within the wrapped text. This process of shuffling
65 "wrapup_stack" to "current_input" can continue indefinitely, even
66 generating infinite loops (e.g. "define(`f',`m4wrap(`f')')f"),
67 without memory leaks. Adding wrapped data is done through
68 m4__push_wrapup_init/m4__push_wrapup_finish().
70 Pushing new input on the input stack is done by m4_push_file(), the
71 conceptual m4_push_string(), and m4_push_builtin() (for builtin
72 definitions). As an optimization, since most macro expansions
73 result in strings, m4_push_string() is split in two parts,
74 push_string_init(), which returns a pointer to the obstack for
75 growing the output, and push_string_finish(), which returns a
76 pointer to the finished input_block. Thus, instead of creating a
77 new input block for every character pushed, macro expansion need
78 only add text to the top of the obstack. However, it is not safe
79 to alter the input stack while a string is being constructed. This
80 means the input engine is one of two states: consuming input, or
81 collecting a macro's expansion. The input_block *next is used to
82 manage the coordination between the different push routines.
84 Normally, input sources behave in LIFO order, resembling a stack.
85 But thanks to the defn and m4wrap macros, when collecting the
86 expansion of a macro, it is possible that we must intermix multiple
87 input blocks in FIFO order. Therefore, when collecting an
88 expansion, a meta-input block is formed which will visit its
89 children in FIFO order, without losing data when the obstack is
90 cleared in LIFO order.
92 The current file and line number are stored in the context, for use
93 by the error handling functions in utility.c. When collecting a
94 macro's expansion, these variables can be temporarily inconsistent
95 in order to provide better error message locations, but they must
96 be restored before further parsing takes place. Each input block
97 maintains its own notion of the current file and line, so swapping
98 between input blocks must update the context accordingly. */
100 typedef struct m4_input_block m4_input_block
;
102 static int file_peek (m4_input_block
*, m4
*, bool);
103 static int file_read (m4_input_block
*, m4
*, bool, bool,
105 static void file_unget (m4_input_block
*, int);
106 static bool file_clean (m4_input_block
*, m4
*, bool);
107 static void file_print (m4_input_block
*, m4
*, m4_obstack
*,
109 static const char * file_buffer (m4_input_block
*, m4
*, size_t *,
111 static void file_consume (m4_input_block
*, m4
*, size_t);
112 static int string_peek (m4_input_block
*, m4
*, bool);
113 static int string_read (m4_input_block
*, m4
*, bool, bool,
115 static void string_unget (m4_input_block
*, int);
116 static void string_print (m4_input_block
*, m4
*, m4_obstack
*,
118 static const char * string_buffer (m4_input_block
*, m4
*, size_t *,
120 static void string_consume (m4_input_block
*, m4
*, size_t);
121 static int composite_peek (m4_input_block
*, m4
*, bool);
122 static int composite_read (m4_input_block
*, m4
*, bool, bool,
124 static void composite_unget (m4_input_block
*, int);
125 static bool composite_clean (m4_input_block
*, m4
*, bool);
126 static void composite_print (m4_input_block
*, m4
*, m4_obstack
*,
128 static const char * composite_buffer (m4_input_block
*, m4
*, size_t *,
130 static void composite_consume (m4_input_block
*, m4
*, size_t);
131 static int eof_peek (m4_input_block
*, m4
*, bool);
132 static int eof_read (m4_input_block
*, m4
*, bool, bool,
134 static void eof_unget (m4_input_block
*, int);
135 static const char * eof_buffer (m4_input_block
*, m4
*, size_t *,
138 static void init_builtin_token (m4
*, m4_obstack
*,
140 static void append_quote_token (m4
*, m4_obstack
*,
142 static bool match_input (m4
*, const char *, size_t, bool);
143 static int next_char (m4
*, bool, bool, bool);
144 static int peek_char (m4
*, bool);
145 static bool pop_input (m4
*, bool);
146 static void unget_input (int);
147 static const char * next_buffer (m4
*, size_t *, bool);
148 static void consume_buffer (m4
*, size_t);
149 static bool consume_syntax (m4
*, m4_obstack
*, unsigned int);
152 # include "quotearg.h"
154 static int m4_print_token (m4
*, const char *, m4__token_type
,
158 /* Vtable of callbacks for each input method. */
161 /* Peek at input, return an unsigned char, CHAR_BUILTIN if it is a
162 builtin, or CHAR_RETRY if none available. If ALLOW_ARGV, then
163 CHAR_ARGV may be returned. */
164 int (*peek_func
) (m4_input_block
*, m4
*, bool);
166 /* Read input, return an unsigned char, CHAR_BUILTIN if it is a
167 builtin, or CHAR_RETRY if none available. If ALLOW_QUOTE, then
168 CHAR_QUOTE may be returned. If ALLOW_ARGV, then CHAR_ARGV may be
169 returned. If ALLOW_UNGET, then ensure that the next unget_func
170 will work with the returned character. */
171 int (*read_func
) (m4_input_block
*, m4
*, bool allow_quote
,
172 bool allow_argv
, bool allow_unget
);
174 /* Unread a single unsigned character or CHAR_BUILTIN, must be the
175 same character previously read by read_func. */
176 void (*unget_func
) (m4_input_block
*, int);
178 /* Optional function to perform cleanup at end of input. If
179 CLEANUP, it is safe to perform non-recoverable cleanup actions.
180 Return true only if no cleanup remains to be done. */
181 bool (*clean_func
) (m4_input_block
*, m4
*, bool cleanup
);
183 /* Add a representation of the input block to the obstack, for use
184 in trace expansion output. */
185 void (*print_func
) (m4_input_block
*, m4
*, m4_obstack
*, int);
187 /* Return a pointer to the current readahead buffer, and set LEN to
188 the length of the result. If ALLOW_QUOTE, do not return a buffer
189 for a quoted string. If there is data, but the result of
190 next_char() would not fit in a char (for example, CHAR_EOF or
191 CHAR_QUOTE) or there is no readahead data available, return NULL,
192 and the caller must use next_char(). If there is no more data,
193 return buffer_retry. The buffer is only valid until the next
194 consume_buffer() or next_char(). */
195 const char *(*buffer_func
) (m4_input_block
*, m4
*, size_t *, bool);
197 /* Optional function to consume data from a readahead buffer
198 previously obtained through buffer_func. */
199 void (*consume_func
) (m4_input_block
*, m4
*, size_t);
202 /* A block of input to be scanned. */
203 struct m4_input_block
205 m4_input_block
*prev
; /* Previous input_block on the input stack. */
206 struct input_funcs
*funcs
; /* Virtual functions of this input_block. */
207 const char *file
; /* File where this input is from. */
208 int line
; /* Line where this input is from. */
214 char *str
; /* String value. */
215 size_t len
; /* Remaining length. */
217 u_s
; /* See string_funcs. */
220 FILE *fp
; /* Input file handle. */
221 bool_bitfield end
: 1; /* True iff peek returned EOF. */
222 bool_bitfield close
: 1; /* True to close file on pop. */
223 bool_bitfield line_start
: 1; /* Saved start_of_input_line state. */
225 u_f
; /* See file_funcs. */
228 m4__symbol_chain
*chain
; /* Current link in chain. */
229 m4__symbol_chain
*end
; /* Last link in chain. */
231 u_c
; /* See composite_funcs. */
237 /* Obstack for storing individual tokens. */
238 static m4_obstack token_stack
;
240 /* Obstack for storing input file names. */
241 static m4_obstack file_names
;
243 /* Wrapup input stack. */
244 static m4_obstack
*wrapup_stack
;
246 /* Current stack, from input or wrapup. */
247 static m4_obstack
*current_input
;
249 /* Bottom of token_stack, for obstack_free. */
250 static void *token_bottom
;
252 /* Pointer to top of current_input, never NULL. */
253 static m4_input_block
*isp
;
255 /* Pointer to top of wrapup_stack, never NULL. */
256 static m4_input_block
*wsp
;
258 /* Auxiliary for handling split m4_push_string (), NULL when not
259 pushing text for rescanning. */
260 static m4_input_block
*next
;
262 /* Flag for next_char () to increment current_line. */
263 static bool start_of_input_line
;
265 /* Flag for next_char () to recognize change in input block. */
266 static bool input_change
;
268 /* Vtable for handling input from files. */
269 static struct input_funcs file_funcs
= {
270 file_peek
, file_read
, file_unget
, file_clean
, file_print
, file_buffer
,
274 /* Vtable for handling input from strings. */
275 static struct input_funcs string_funcs
= {
276 string_peek
, string_read
, string_unget
, NULL
, string_print
, string_buffer
,
280 /* Vtable for handling input from composite chains. */
281 static struct input_funcs composite_funcs
= {
282 composite_peek
, composite_read
, composite_unget
, composite_clean
,
283 composite_print
, composite_buffer
, composite_consume
286 /* Vtable for recognizing end of input. */
287 static struct input_funcs eof_funcs
= {
288 eof_peek
, eof_read
, eof_unget
, NULL
, NULL
, eof_buffer
, NULL
291 /* Marker at end of an input stack. */
292 static m4_input_block input_eof
= { NULL
, &eof_funcs
, "", 0 };
294 /* Marker for buffer_func when current block has no more data. */
295 static const char buffer_retry
[1];
298 /* Input files, from command line or [s]include. */
300 file_peek (m4_input_block
*me
, m4
*context M4_GNUC_UNUSED
,
301 bool allow_argv M4_GNUC_UNUSED
)
305 ch
= me
->u
.u_f
.end
? EOF
: getc (me
->u
.u_f
.fp
);
308 me
->u
.u_f
.end
= true;
312 ungetc (ch
, me
->u
.u_f
.fp
);
317 file_read (m4_input_block
*me
, m4
*context
, bool allow_quote M4_GNUC_UNUSED
,
318 bool allow_argv M4_GNUC_UNUSED
, bool allow_unget M4_GNUC_UNUSED
)
322 if (start_of_input_line
)
324 start_of_input_line
= false;
325 m4_set_current_line (context
, ++me
->line
);
328 /* If stdin is a terminal, calling getc after peek_char already
329 called it would make the user have to hit ^D twice to quit. */
330 ch
= me
->u
.u_f
.end
? EOF
: getc (me
->u
.u_f
.fp
);
333 me
->u
.u_f
.end
= true;
338 start_of_input_line
= true;
343 file_unget (m4_input_block
*me
, int ch
)
345 assert (ch
< CHAR_EOF
);
346 if (ungetc (ch
, me
->u
.u_f
.fp
) < 0)
348 assert (!"INTERNAL ERROR: failed ungetc!");
349 abort (); /* ungetc should not be called without a previous read. */
351 me
->u
.u_f
.end
= false;
353 start_of_input_line
= false;
357 file_clean (m4_input_block
*me
, m4
*context
, bool cleanup
)
361 if (me
->prev
!= &input_eof
)
362 m4_debug_message (context
, M4_DEBUG_TRACE_INPUT
,
363 _("input reverted to %s, line %d"),
364 me
->prev
->file
, me
->prev
->line
);
366 m4_debug_message (context
, M4_DEBUG_TRACE_INPUT
, _("input exhausted"));
368 if (ferror (me
->u
.u_f
.fp
))
370 m4_error (context
, 0, 0, NULL
, _("error reading %s"),
371 quotearg_style (locale_quoting_style
, me
->file
));
373 fclose (me
->u
.u_f
.fp
);
375 else if (me
->u
.u_f
.close
&& fclose (me
->u
.u_f
.fp
) == EOF
)
376 m4_error (context
, 0, errno
, NULL
, _("error reading %s"),
377 quotearg_style (locale_quoting_style
, me
->file
));
378 start_of_input_line
= me
->u
.u_f
.line_start
;
379 m4_set_output_line (context
, -1);
384 file_print (m4_input_block
*me
, m4
*context M4_GNUC_UNUSED
, m4_obstack
*obs
,
385 int debug_level M4_GNUC_UNUSED
)
387 const char *text
= me
->file
;
388 assert (obstack_object_size (current_input
) == 0);
389 obstack_grow (obs
, "<file: ", strlen ("<file: "));
390 obstack_grow (obs
, text
, strlen (text
));
391 obstack_1grow (obs
, '>');
395 file_buffer (m4_input_block
*me
, m4
*context M4_GNUC_UNUSED
, size_t *len
,
396 bool allow_quote M4_GNUC_UNUSED
)
398 if (start_of_input_line
)
400 start_of_input_line
= false;
401 m4_set_current_line (context
, ++me
->line
);
405 return freadptr (isp
->u
.u_f
.fp
, len
);
409 file_consume (m4_input_block
*me
, m4
*context
, size_t len
)
414 assert (!start_of_input_line
);
415 buf
= freadptr (me
->u
.u_f
.fp
, &buf_len
);
416 assert (buf
&& len
<= buf_len
);
418 while ((p
= memchr (buf
+ buf_len
, '\n', len
- buf_len
)))
420 if (p
== buf
+ len
- 1)
421 start_of_input_line
= true;
423 m4_set_current_line (context
, ++me
->line
);
424 buf_len
= p
- buf
+ 1;
426 if (freadseek (isp
->u
.u_f
.fp
, len
) != 0)
430 /* m4_push_file () pushes an input file FP with name TITLE on the
431 input stack, saving the current file name and line number. If next
432 is non-NULL, this push invalidates a call to m4_push_string_init (),
433 whose storage is consequently released. If CLOSE, then close FP at
436 file_read () manages line numbers for error messages, so they do not
437 get wrong due to lookahead. The token consisting of a newline
438 alone is taken as belonging to the line it ends, and the current
439 line number is not incremented until the next character is read. */
441 m4_push_file (m4
*context
, FILE *fp
, const char *title
, bool close_file
)
447 obstack_free (current_input
, next
);
451 m4_debug_message (context
, M4_DEBUG_TRACE_INPUT
, _("input read from %s"),
452 quotearg_style (locale_quoting_style
, title
));
454 i
= (m4_input_block
*) obstack_alloc (current_input
, sizeof *i
);
455 i
->funcs
= &file_funcs
;
456 /* Save title on a separate obstack, so that wrapped text can refer
457 to it even after the file is popped. */
458 i
->file
= obstack_copy0 (&file_names
, title
, strlen (title
));
462 i
->u
.u_f
.end
= false;
463 i
->u
.u_f
.close
= close_file
;
464 i
->u
.u_f
.line_start
= start_of_input_line
;
466 m4_set_output_line (context
, -1);
474 /* Handle string expansion text. */
476 string_peek (m4_input_block
*me
, m4
*context M4_GNUC_UNUSED
,
477 bool allow_argv M4_GNUC_UNUSED
)
479 return me
->u
.u_s
.len
? to_uchar (*me
->u
.u_s
.str
) : CHAR_RETRY
;
483 string_read (m4_input_block
*me
, m4
*context M4_GNUC_UNUSED
,
484 bool allow_quote M4_GNUC_UNUSED
, bool allow_argv M4_GNUC_UNUSED
,
485 bool allow_unget M4_GNUC_UNUSED
)
490 return to_uchar (*me
->u
.u_s
.str
++);
494 string_unget (m4_input_block
*me
, int ch
)
496 assert (ch
< CHAR_EOF
&& to_uchar (me
->u
.u_s
.str
[-1]) == ch
);
502 string_print (m4_input_block
*me
, m4
*context
, m4_obstack
*obs
,
505 bool quote
= (debug_level
& M4_DEBUG_TRACE_QUOTE
) != 0;
506 size_t arg_length
= m4_get_max_debug_arg_length_opt (context
);
508 assert (!me
->u
.u_s
.len
);
509 m4_shipout_string_trunc (obs
, (char *) obstack_base (current_input
),
510 obstack_object_size (current_input
),
511 quote
? m4_get_syntax_quotes (M4SYNTAX
) : NULL
,
516 string_buffer (m4_input_block
*me
, m4
*context M4_GNUC_UNUSED
, size_t *len
,
517 bool allow_quote M4_GNUC_UNUSED
)
521 *len
= me
->u
.u_s
.len
;
522 return me
->u
.u_s
.str
;
526 string_consume (m4_input_block
*me
, m4
*context M4_GNUC_UNUSED
, size_t len
)
528 assert (len
<= me
->u
.u_s
.len
);
529 me
->u
.u_s
.len
-= len
;
530 me
->u
.u_s
.str
+= len
;
533 /* First half of m4_push_string (). The pointer next points to the
534 new input_block. FILE and LINE describe the location where the
535 macro starts that is generating the expansion (even if the location
536 has advanced in the meantime). Return the obstack that will
537 collect the expansion text. */
539 m4_push_string_init (m4
*context
, const char *file
, int line
)
541 /* Free any memory occupied by completely parsed input. */
543 while (pop_input (context
, false));
545 /* Reserve the next location on the obstack. */
546 next
= (m4_input_block
*) obstack_alloc (current_input
, sizeof *next
);
547 next
->funcs
= &string_funcs
;
552 return current_input
;
555 /* This function allows gathering input from multiple locations,
556 rather than copying everything consecutively onto the input stack.
557 Must be called between push_string_init and push_string_finish.
559 Convert the current input block into a chain if it is not one
560 already, and add the contents of VALUE as a new link in the chain.
561 LEVEL describes the current expansion level, or SIZE_MAX if VALUE
562 is composite, its contents reside entirely on the current_input
563 stack, and VALUE lives in temporary storage. If VALUE is a simple
564 string, then it belongs to the current macro expansion. If VALUE
565 is composite, then each text link has a level of SIZE_MAX if it
566 belongs to the current macro expansion, otherwise it is a
567 back-reference where level tracks which stack it came from. The
568 resulting input block chain contains links with a level of SIZE_MAX
569 if the text belongs to the input stack, otherwise the level where
570 the back-reference comes from.
572 Return true only if a reference was created to the contents of
573 VALUE, in which case, LEVEL is less than SIZE_MAX and the lifetime
574 of VALUE and its contents must last as long as the input engine can
575 parse references from it. INUSE determines whether composite
576 symbols should favor creating back-references or copying text. */
578 m4__push_symbol (m4
*context
, m4_symbol_value
*value
, size_t level
, bool inuse
)
580 m4__symbol_chain
*src_chain
= NULL
;
581 m4__symbol_chain
*chain
;
585 /* Speed consideration - for short enough symbols, the speed and
586 memory overhead of parsing another INPUT_CHAIN link outweighs the
587 time to inline the symbol text. But don't copy text if it
588 already lives on the obstack. */
589 if (m4_is_symbol_value_text (value
))
591 assert (level
< SIZE_MAX
);
592 if (m4_get_symbol_value_len (value
) <= INPUT_INLINE_THRESHOLD
)
594 obstack_grow (current_input
, m4_get_symbol_value_text (value
),
595 m4_get_symbol_value_len (value
));
599 else if (m4_is_symbol_value_func (value
))
601 if (next
->funcs
== &string_funcs
)
603 next
->funcs
= &composite_funcs
;
604 next
->u
.u_c
.chain
= next
->u
.u_c
.end
= NULL
;
606 m4__append_builtin (current_input
, value
->u
.builtin
, &next
->u
.u_c
.chain
,
612 /* For composite values, if argv is already in use, creating
613 additional references for long text segments is more
614 efficient in time. But if argv is not yet in use, and we
615 have a composite value, then the value must already contain a
616 back-reference, and memory usage is more efficient if we can
617 avoid using the current expand_macro, even if it means larger
619 assert (value
->type
== M4_SYMBOL_COMP
);
620 src_chain
= value
->u
.u_c
.chain
;
621 while (level
< SIZE_MAX
&& src_chain
&& src_chain
->type
== M4__CHAIN_STR
622 && (src_chain
->u
.u_s
.len
<= INPUT_INLINE_THRESHOLD
623 || (!inuse
&& src_chain
->u
.u_s
.level
== SIZE_MAX
)))
625 obstack_grow (current_input
, src_chain
->u
.u_s
.str
,
626 src_chain
->u
.u_s
.len
);
627 src_chain
= src_chain
->next
;
633 if (next
->funcs
== &string_funcs
)
635 next
->funcs
= &composite_funcs
;
636 next
->u
.u_c
.chain
= next
->u
.u_c
.end
= NULL
;
638 m4__make_text_link (current_input
, &next
->u
.u_c
.chain
, &next
->u
.u_c
.end
);
639 if (m4_is_symbol_value_text (value
))
641 chain
= (m4__symbol_chain
*) obstack_alloc (current_input
,
644 next
->u
.u_c
.end
->next
= chain
;
646 next
->u
.u_c
.chain
= chain
;
647 next
->u
.u_c
.end
= chain
;
649 chain
->type
= M4__CHAIN_STR
;
650 chain
->quote_age
= m4_get_symbol_value_quote_age (value
);
651 chain
->u
.u_s
.str
= m4_get_symbol_value_text (value
);
652 chain
->u
.u_s
.len
= m4_get_symbol_value_len (value
);
653 chain
->u
.u_s
.level
= level
;
654 m4__adjust_refcount (context
, level
, true);
659 if (src_chain
->type
== M4__CHAIN_FUNC
)
661 m4__append_builtin (current_input
, src_chain
->u
.builtin
,
662 &next
->u
.u_c
.chain
, &next
->u
.u_c
.end
);
663 src_chain
= src_chain
->next
;
666 if (level
== SIZE_MAX
)
668 /* Nothing to copy, since link already lives on obstack. */
669 assert (src_chain
->type
!= M4__CHAIN_STR
670 || src_chain
->u
.u_s
.level
== SIZE_MAX
);
675 /* Allow inlining the final link with subsequent text. */
676 if (!src_chain
->next
&& src_chain
->type
== M4__CHAIN_STR
677 && (src_chain
->u
.u_s
.len
<= INPUT_INLINE_THRESHOLD
678 || (!inuse
&& src_chain
->u
.u_s
.level
== SIZE_MAX
)))
680 obstack_grow (current_input
, src_chain
->u
.u_s
.str
,
681 src_chain
->u
.u_s
.len
);
684 /* We must clone each link in the chain, since next_char
685 destructively modifies the chain it is parsing. */
686 chain
= (m4__symbol_chain
*) obstack_copy (current_input
, src_chain
,
689 if (chain
->type
== M4__CHAIN_STR
&& chain
->u
.u_s
.level
== SIZE_MAX
)
691 if (chain
->u
.u_s
.len
<= INPUT_INLINE_THRESHOLD
|| !inuse
)
692 chain
->u
.u_s
.str
= (char *) obstack_copy (current_input
,
697 chain
->u
.u_s
.level
= level
;
703 next
->u
.u_c
.end
->next
= chain
;
705 next
->u
.u_c
.chain
= chain
;
706 next
->u
.u_c
.end
= chain
;
707 if (chain
->type
== M4__CHAIN_ARGV
)
709 assert (!chain
->u
.u_a
.comma
&& !chain
->u
.u_a
.skip_last
);
710 inuse
|= m4__arg_adjust_refcount (context
, chain
->u
.u_a
.argv
, true);
712 else if (chain
->type
== M4__CHAIN_STR
&& chain
->u
.u_s
.level
< SIZE_MAX
)
713 m4__adjust_refcount (context
, chain
->u
.u_s
.level
, true);
714 src_chain
= src_chain
->next
;
719 /* Last half of m4_push_string (). If next is now NULL, a call to
720 m4_push_file () has pushed a different input block to the top of
721 the stack. Otherwise, all unfinished text on the obstack returned
722 from push_string_init is collected into the input stack. If the
723 new object is empty, we do not push it. */
725 m4_push_string_finish (void)
727 size_t len
= obstack_object_size (current_input
);
735 if (len
|| next
->funcs
== &composite_funcs
)
737 if (next
->funcs
== &string_funcs
)
739 next
->u
.u_s
.str
= (char *) obstack_finish (current_input
);
740 next
->u
.u_s
.len
= len
;
743 m4__make_text_link (current_input
, &next
->u
.u_c
.chain
,
750 obstack_free (current_input
, next
);
755 /* A composite block contains multiple sub-blocks which are processed
756 in FIFO order, even though the obstack allocates memory in LIFO
759 composite_peek (m4_input_block
*me
, m4
*context
, bool allow_argv
)
761 m4__symbol_chain
*chain
= me
->u
.u_c
.chain
;
769 if (chain
->u
.u_s
.len
)
770 return to_uchar (chain
->u
.u_s
.str
[0]);
773 if (chain
->u
.builtin
)
777 argc
= m4_arg_argc (chain
->u
.u_a
.argv
);
778 if (chain
->u
.u_a
.index
== argc
)
780 if (chain
->u
.u_a
.comma
)
781 return ','; /* FIXME - support M4_SYNTAX_COMMA. */
782 /* Only return a reference in the quoting is correct and the
783 reference has more than one argument left. */
784 if (allow_argv
&& chain
->quote_age
== m4__quote_age (M4SYNTAX
)
785 && chain
->u
.u_a
.quotes
&& chain
->u
.u_a
.index
+ 1 < argc
)
787 /* Rather than directly parse argv here, we push another
788 input block containing the next unparsed argument from
790 m4_push_string_init (context
, me
->file
, me
->line
);
791 m4__push_arg_quote (context
, current_input
, chain
->u
.u_a
.argv
,
793 m4__quote_cache (M4SYNTAX
, NULL
,
795 chain
->u
.u_a
.quotes
));
796 chain
->u
.u_a
.index
++;
797 chain
->u
.u_a
.comma
= true;
798 m4_push_string_finish ();
799 return peek_char (context
, allow_argv
);
803 assert (!"composite_peek");
812 composite_read (m4_input_block
*me
, m4
*context
, bool allow_quote
,
813 bool allow_argv
, bool allow_unget
)
815 m4__symbol_chain
*chain
= me
->u
.u_c
.chain
;
819 if (allow_quote
&& chain
->quote_age
== m4__quote_age (M4SYNTAX
))
824 if (chain
->u
.u_s
.len
)
826 /* Partial consumption invalidates quote age. */
827 chain
->quote_age
= 0;
829 return to_uchar (*chain
->u
.u_s
.str
++);
831 if (chain
->u
.u_s
.level
< SIZE_MAX
)
832 m4__adjust_refcount (context
, chain
->u
.u_s
.level
, false);
835 if (chain
->u
.builtin
)
839 argc
= m4_arg_argc (chain
->u
.u_a
.argv
);
840 if (chain
->u
.u_a
.index
== argc
)
842 m4__arg_adjust_refcount (context
, chain
->u
.u_a
.argv
, false);
845 if (chain
->u
.u_a
.comma
)
847 chain
->u
.u_a
.comma
= false;
848 return ','; /* FIXME - support M4_SYNTAX_COMMA. */
850 /* Only return a reference in the quoting is correct and the
851 reference has more than one argument left. */
852 if (allow_argv
&& chain
->quote_age
== m4__quote_age (M4SYNTAX
)
853 && chain
->u
.u_a
.quotes
&& chain
->u
.u_a
.index
+ 1 < argc
)
855 /* Rather than directly parse argv here, we push another
856 input block containing the next unparsed argument from
858 m4_push_string_init (context
, me
->file
, me
->line
);
859 m4__push_arg_quote (context
, current_input
, chain
->u
.u_a
.argv
,
861 m4__quote_cache (M4SYNTAX
, NULL
,
863 chain
->u
.u_a
.quotes
));
864 chain
->u
.u_a
.index
++;
865 chain
->u
.u_a
.comma
= true;
866 m4_push_string_finish ();
867 return next_char (context
, allow_quote
, allow_argv
, allow_unget
);
869 me
->file
= chain
->u
.u_l
.file
;
870 me
->line
= chain
->u
.u_l
.line
;
872 me
->u
.u_c
.chain
= chain
->next
;
873 return next_char (context
, allow_quote
, allow_argv
, allow_unget
);
875 assert (!"composite_read");
878 me
->u
.u_c
.chain
= chain
= chain
->next
;
884 composite_unget (m4_input_block
*me
, int ch
)
886 m4__symbol_chain
*chain
= me
->u
.u_c
.chain
;
890 assert (ch
< CHAR_EOF
&& to_uchar (chain
->u
.u_s
.str
[-1]) == ch
);
895 assert (ch
== CHAR_BUILTIN
&& chain
->u
.builtin
);
898 /* FIXME - support M4_SYNTAX_COMMA. */
899 assert (ch
== ',' && !chain
->u
.u_a
.comma
);
900 chain
->u
.u_a
.comma
= true;
903 assert (!"composite_unget");
909 composite_clean (m4_input_block
*me
, m4
*context
, bool cleanup
)
911 m4__symbol_chain
*chain
= me
->u
.u_c
.chain
;
912 assert (!chain
|| !cleanup
);
918 if (chain
->u
.u_s
.len
)
923 if (chain
->u
.u_s
.level
< SIZE_MAX
)
924 m4__adjust_refcount (context
, chain
->u
.u_s
.level
, false);
927 if (chain
->u
.builtin
)
931 if (chain
->u
.u_a
.index
< m4_arg_argc (chain
->u
.u_a
.argv
))
936 m4__arg_adjust_refcount (context
, chain
->u
.u_a
.argv
, false);
941 assert (!"composite_clean");
944 me
->u
.u_c
.chain
= chain
= chain
->next
;
950 composite_print (m4_input_block
*me
, m4
*context
, m4_obstack
*obs
,
953 bool quote
= (debug_level
& M4_DEBUG_TRACE_QUOTE
) != 0;
954 size_t maxlen
= m4_get_max_debug_arg_length_opt (context
);
955 m4__symbol_chain
*chain
= me
->u
.u_c
.chain
;
956 const m4_string_pair
*quotes
= m4_get_syntax_quotes (M4SYNTAX
);
957 bool module
= (debug_level
& M4_DEBUG_TRACE_MODULE
) != 0;
959 size_t len
= obstack_object_size (current_input
);
962 m4_shipout_string (context
, obs
, quotes
->str1
, quotes
->len1
, false);
963 while (chain
&& !done
)
968 if (m4_shipout_string_trunc (obs
, chain
->u
.u_s
.str
,
969 chain
->u
.u_s
.len
, NULL
, &maxlen
))
973 m4__builtin_print (obs
, chain
->u
.builtin
, false, NULL
, NULL
, module
);
976 assert (!chain
->u
.u_a
.comma
);
977 if (m4__arg_print (context
, obs
, chain
->u
.u_a
.argv
,
979 m4__quote_cache (M4SYNTAX
, NULL
, chain
->quote_age
,
980 chain
->u
.u_a
.quotes
),
981 chain
->u
.u_a
.flatten
, NULL
, NULL
, &maxlen
, false,
986 assert (!"composite_print");
992 m4_shipout_string_trunc (obs
, (char *) obstack_base (current_input
), len
,
995 m4_shipout_string (context
, obs
, quotes
->str2
, quotes
->len2
, false);
999 composite_buffer (m4_input_block
*me
, m4
*context
, size_t *len
,
1002 m4__symbol_chain
*chain
= me
->u
.u_c
.chain
;
1005 if (allow_quote
&& chain
->quote_age
== m4__quote_age (M4SYNTAX
))
1006 return NULL
; /* CHAR_QUOTE doesn't fit in buffer. */
1007 switch (chain
->type
)
1010 if (chain
->u
.u_s
.len
)
1012 *len
= chain
->u
.u_s
.len
;
1013 return chain
->u
.u_s
.str
;
1015 if (chain
->u
.u_s
.level
< SIZE_MAX
)
1016 m4__adjust_refcount (context
, chain
->u
.u_s
.level
, false);
1018 case M4__CHAIN_FUNC
:
1019 if (chain
->u
.builtin
)
1020 return NULL
; /* CHAR_BUILTIN doesn't fit in buffer. */
1022 case M4__CHAIN_ARGV
:
1023 if (chain
->u
.u_a
.index
== m4_arg_argc (chain
->u
.u_a
.argv
))
1025 m4__arg_adjust_refcount (context
, chain
->u
.u_a
.argv
, false);
1028 return NULL
; /* No buffer to provide. */
1030 me
->file
= chain
->u
.u_l
.file
;
1031 me
->line
= chain
->u
.u_l
.line
;
1032 input_change
= true;
1033 me
->u
.u_c
.chain
= chain
->next
;
1034 return next_buffer (context
, len
, allow_quote
);
1036 assert (!"composite_buffer");
1039 me
->u
.u_c
.chain
= chain
= chain
->next
;
1041 return buffer_retry
;
1045 composite_consume (m4_input_block
*me
, m4
*context M4_GNUC_UNUSED
, size_t len
)
1047 m4__symbol_chain
*chain
= me
->u
.u_c
.chain
;
1048 assert (chain
&& chain
->type
== M4__CHAIN_STR
&& len
<= chain
->u
.u_s
.len
);
1049 /* Partial consumption invalidates quote age. */
1050 chain
->quote_age
= 0;
1051 chain
->u
.u_s
.len
-= len
;
1052 chain
->u
.u_s
.str
+= len
;
1055 /* Given an obstack OBS, capture any unfinished text as a link in the
1056 chain that starts at *START and ends at *END. START may be NULL if
1057 *END is non-NULL. */
1059 m4__make_text_link (m4_obstack
*obs
, m4__symbol_chain
**start
,
1060 m4__symbol_chain
**end
)
1062 m4__symbol_chain
*chain
;
1063 size_t len
= obstack_object_size (obs
);
1065 assert (end
&& (start
|| *end
));
1068 char *str
= (char *) obstack_finish (obs
);
1069 chain
= (m4__symbol_chain
*) obstack_alloc (obs
, sizeof *chain
);
1071 (*end
)->next
= chain
;
1076 chain
->type
= M4__CHAIN_STR
;
1077 chain
->quote_age
= 0;
1078 chain
->u
.u_s
.str
= str
;
1079 chain
->u
.u_s
.len
= len
;
1080 chain
->u
.u_s
.level
= SIZE_MAX
;
1084 /* Given an obstack OBS, capture any unfinished text as a link, then
1085 append the builtin FUNC as the next link in the chain that starts
1086 at *START and ends at *END. START may be NULL if *END is
1089 m4__append_builtin (m4_obstack
*obs
, const m4__builtin
*func
,
1090 m4__symbol_chain
**start
, m4__symbol_chain
**end
)
1092 m4__symbol_chain
*chain
;
1095 m4__make_text_link (obs
, start
, end
);
1096 chain
= (m4__symbol_chain
*) obstack_alloc (obs
, sizeof *chain
);
1098 (*end
)->next
= chain
;
1103 chain
->type
= M4__CHAIN_FUNC
;
1104 chain
->quote_age
= 0;
1105 chain
->u
.builtin
= func
;
1108 /* Push TOKEN, which contains a builtin's definition, onto the obstack
1109 OBS, which is either input stack or the wrapup stack. */
1111 m4_push_builtin (m4
*context
, m4_obstack
*obs
, m4_symbol_value
*token
)
1113 m4_input_block
*i
= (obs
== current_input
? next
: wsp
);
1115 if (i
->funcs
== &string_funcs
)
1117 i
->funcs
= &composite_funcs
;
1118 i
->u
.u_c
.chain
= i
->u
.u_c
.end
= NULL
;
1121 assert (i
->funcs
== &composite_funcs
);
1122 m4__append_builtin (obs
, token
->u
.builtin
, &i
->u
.u_c
.chain
, &i
->u
.u_c
.end
);
1126 /* End of input optimization. By providing these dummy callback
1127 functions, we guarantee that the input stack is never NULL, and
1128 thus make fewer execution branches. */
1130 eof_peek (m4_input_block
*me
, m4
*context M4_GNUC_UNUSED
,
1131 bool allow_argv M4_GNUC_UNUSED
)
1133 assert (me
== &input_eof
);
1138 eof_read (m4_input_block
*me
, m4
*context M4_GNUC_UNUSED
,
1139 bool allow_quote M4_GNUC_UNUSED
, bool allow_argv M4_GNUC_UNUSED
,
1140 bool allow_unget M4_GNUC_UNUSED
)
1142 assert (me
== &input_eof
);
1147 eof_unget (m4_input_block
*me M4_GNUC_UNUSED
, int ch
)
1149 assert (ch
== CHAR_EOF
);
1153 eof_buffer (m4_input_block
*me M4_GNUC_UNUSED
, m4
*context M4_GNUC_UNUSED
,
1154 size_t *len M4_GNUC_UNUSED
, bool allow_unget M4_GNUC_UNUSED
)
1160 /* When tracing, print a summary of the contents of the input block
1161 created by push_string_init/push_string_finish to OBS. Use
1162 DEBUG_LEVEL to determine whether to add quotes or module
1165 m4_input_print (m4
*context
, m4_obstack
*obs
, int debug_level
)
1167 m4_input_block
*block
= next
? next
: isp
;
1168 assert (context
&& obs
&& (debug_level
& M4_DEBUG_TRACE_EXPANSION
));
1169 assert (block
->funcs
->print_func
);
1170 block
->funcs
->print_func (block
, context
, obs
, debug_level
);
1173 /* Return an obstack ready for direct expansion of wrapup text, and
1174 set *END to the location that should be updated if any builtin
1175 tokens are wrapped. Store the location of CALLER with the wrapped
1176 text. This should be followed by m4__push_wrapup_finish (). */
1178 m4__push_wrapup_init (m4
*context
, const m4_call_info
*caller
,
1179 m4__symbol_chain
***end
)
1182 m4__symbol_chain
*chain
;
1184 assert (obstack_object_size (wrapup_stack
) == 0);
1185 if (wsp
!= &input_eof
)
1188 assert (i
->funcs
== &composite_funcs
&& i
->u
.u_c
.end
1189 && i
->u
.u_c
.end
->type
!= M4__CHAIN_LOC
);
1193 i
= (m4_input_block
*) obstack_alloc (wrapup_stack
, sizeof *i
);
1195 i
->funcs
= &composite_funcs
;
1196 i
->file
= caller
->file
;
1197 i
->line
= caller
->line
;
1198 i
->u
.u_c
.chain
= i
->u
.u_c
.end
= NULL
;
1201 chain
= (m4__symbol_chain
*) obstack_alloc (wrapup_stack
, sizeof *chain
);
1203 i
->u
.u_c
.end
->next
= chain
;
1205 i
->u
.u_c
.chain
= chain
;
1206 i
->u
.u_c
.end
= chain
;
1208 chain
->type
= M4__CHAIN_LOC
;
1209 chain
->quote_age
= 0;
1210 chain
->u
.u_l
.file
= caller
->file
;
1211 chain
->u
.u_l
.line
= caller
->line
;
1212 *end
= &i
->u
.u_c
.end
;
1213 return wrapup_stack
;
1216 /* After pushing wrapup text, this completes the bookkeeping. */
1218 m4__push_wrapup_finish (void)
1220 m4__make_text_link (wrapup_stack
, &wsp
->u
.u_c
.chain
, &wsp
->u
.u_c
.end
);
1221 assert (wsp
->u
.u_c
.end
->type
!= M4__CHAIN_LOC
);
1225 /* The function pop_input () pops one level of input sources. If
1226 CLEANUP, the current_file and current_line are restored as needed.
1227 The return value is false if cleanup is still required, or if the
1228 current input source is not at the end. */
1230 pop_input (m4
*context
, bool cleanup
)
1232 m4_input_block
*tmp
= isp
->prev
;
1235 if (isp
->funcs
->clean_func
1236 ? !isp
->funcs
->clean_func (isp
, context
, cleanup
)
1237 : (isp
->funcs
->peek_func (isp
, context
, true) != CHAR_RETRY
))
1240 obstack_free (current_input
, isp
);
1241 m4__quote_uncache (M4SYNTAX
);
1242 next
= NULL
; /* might be set in m4_push_string_init () */
1245 input_change
= true;
1249 /* To switch input over to the wrapup stack, main () calls pop_wrapup.
1250 Since wrapup text can install new wrapup text, pop_wrapup ()
1251 returns true if there is more wrapped text to parse. */
1253 m4_pop_wrapup (m4
*context
)
1255 static size_t level
= 0;
1258 obstack_free (current_input
, NULL
);
1259 free (current_input
);
1261 if (wsp
== &input_eof
)
1263 obstack_free (wrapup_stack
, NULL
);
1264 m4_set_current_file (context
, NULL
);
1265 m4_set_current_line (context
, 0);
1266 m4_debug_message (context
, M4_DEBUG_TRACE_INPUT
,
1267 _("input from m4wrap exhausted"));
1268 current_input
= NULL
;
1269 DELETE (wrapup_stack
);
1273 m4_debug_message (context
, M4_DEBUG_TRACE_INPUT
,
1274 _("input from m4wrap recursion level %zu"), ++level
);
1276 current_input
= wrapup_stack
;
1277 wrapup_stack
= (m4_obstack
*) xmalloc (sizeof *wrapup_stack
);
1278 obstack_init (wrapup_stack
);
1282 input_change
= true;
1287 /* Populate TOKEN with the builtin token at the top of the input
1288 stack, then consume the input. If OBS, TOKEN will be converted to
1289 a composite token using storage from OBS as necessary; otherwise,
1290 if TOKEN is NULL, the builtin token is discarded. */
1292 init_builtin_token (m4
*context
, m4_obstack
*obs
, m4_symbol_value
*token
)
1294 m4__symbol_chain
*chain
;
1295 assert (isp
->funcs
== &composite_funcs
);
1296 chain
= isp
->u
.u_c
.chain
;
1297 assert (!chain
->quote_age
&& chain
->type
== M4__CHAIN_FUNC
1298 && chain
->u
.builtin
);
1302 if (token
->type
== M4_SYMBOL_VOID
)
1304 token
->type
= M4_SYMBOL_COMP
;
1305 token
->u
.u_c
.chain
= token
->u
.u_c
.end
= NULL
;
1306 token
->u
.u_c
.wrapper
= false;
1307 token
->u
.u_c
.has_func
= false;
1309 assert (token
->type
== M4_SYMBOL_COMP
);
1310 m4__append_builtin (obs
, chain
->u
.builtin
, &token
->u
.u_c
.chain
,
1315 assert (token
->type
== M4_SYMBOL_VOID
);
1316 m4__set_symbol_value_builtin (token
, chain
->u
.builtin
);
1318 chain
->u
.builtin
= NULL
;
1321 /* When a QUOTE token is seen, convert VALUE to a composite (if it is
1322 not one already), consisting of any unfinished text on OBS, as well
1323 as the quoted token from the top of the input stack. Use OBS for
1324 any additional allocations needed to store the token chain. */
1326 append_quote_token (m4
*context
, m4_obstack
*obs
, m4_symbol_value
*value
)
1328 m4__symbol_chain
*src_chain
= isp
->u
.u_c
.chain
;
1329 m4__symbol_chain
*chain
;
1330 assert (isp
->funcs
== &composite_funcs
&& obs
&& m4__quote_age (M4SYNTAX
));
1331 isp
->u
.u_c
.chain
= src_chain
->next
;
1333 /* Speed consideration - for short enough symbols, the speed and
1334 memory overhead of parsing another INPUT_CHAIN link outweighs the
1335 time to inline the symbol text. */
1336 if (src_chain
->type
== M4__CHAIN_STR
1337 && src_chain
->u
.u_s
.len
<= INPUT_INLINE_THRESHOLD
)
1339 assert (src_chain
->u
.u_s
.level
<= SIZE_MAX
);
1340 obstack_grow (obs
, src_chain
->u
.u_s
.str
, src_chain
->u
.u_s
.len
);
1341 m4__adjust_refcount (context
, src_chain
->u
.u_s
.level
, false);
1345 if (value
->type
== M4_SYMBOL_VOID
)
1347 value
->type
= M4_SYMBOL_COMP
;
1348 value
->u
.u_c
.chain
= value
->u
.u_c
.end
= NULL
;
1349 value
->u
.u_c
.wrapper
= value
->u
.u_c
.has_func
= false;
1351 assert (value
->type
== M4_SYMBOL_COMP
);
1352 m4__make_text_link (obs
, &value
->u
.u_c
.chain
, &value
->u
.u_c
.end
);
1353 chain
= (m4__symbol_chain
*) obstack_copy (obs
, src_chain
, sizeof *chain
);
1354 if (value
->u
.u_c
.end
)
1355 value
->u
.u_c
.end
->next
= chain
;
1357 value
->u
.u_c
.chain
= chain
;
1358 value
->u
.u_c
.end
= chain
;
1359 if (chain
->type
== M4__CHAIN_ARGV
&& chain
->u
.u_a
.has_func
)
1360 value
->u
.u_c
.has_func
= true;
1364 /* When an ARGV token is seen, convert VALUE to point to it via a
1365 composite chain. Use OBS for any additional allocations
1368 init_argv_symbol (m4
*context
, m4_obstack
*obs
, m4_symbol_value
*value
)
1370 m4__symbol_chain
*src_chain
;
1371 m4__symbol_chain
*chain
;
1373 const m4_string_pair
*comments
= m4_get_syntax_comments (M4SYNTAX
);
1375 assert (value
->type
== M4_SYMBOL_VOID
&& isp
->funcs
== &composite_funcs
1376 && isp
->u
.u_c
.chain
->type
== M4__CHAIN_ARGV
1377 && obs
&& obstack_object_size (obs
) == 0);
1379 src_chain
= isp
->u
.u_c
.chain
;
1380 isp
->u
.u_c
.chain
= src_chain
->next
;
1381 value
->type
= M4_SYMBOL_COMP
;
1382 /* Clone the link, since the input will be discarded soon. */
1383 chain
= (m4__symbol_chain
*) obstack_copy (obs
, src_chain
, sizeof *chain
);
1384 value
->u
.u_c
.chain
= value
->u
.u_c
.end
= chain
;
1385 value
->u
.u_c
.wrapper
= true;
1386 value
->u
.u_c
.has_func
= chain
->u
.u_a
.has_func
;
1389 /* If the next character is not ',' or ')', then unlink the last
1390 argument from argv and schedule it for reparsing. This way,
1391 expand_argument never has to deal with concatenation of argv with
1392 arbitrary text. Note that the implementation of safe_quotes
1393 ensures peek_input won't return CHAR_ARGV if the user is perverse
1394 enough to mix comment delimiters with argument separators:
1396 define(n,`$#')define(echo,$*)changecom(`,,',`)')n(echo(a,`,b`)'',c))
1399 Therefore, we do not have to worry about calling MATCH, and thus
1400 do not have to worry about pop_input being called and
1401 invalidating the argv reference.
1403 When the $@ ref is used unchanged, we completely bypass the
1404 decrement of the argv refcount in next_char, since the ref is
1405 still live via the current collect_arguments. However, when the
1406 last element of the $@ ref is reparsed, we must increase the argv
1407 refcount here, to compensate for the fact that it will be
1408 decreased once the final element is parsed. */
1409 assert (!comments
->len1
1410 || (!m4_has_syntax (M4SYNTAX
, *comments
->str1
,
1411 M4_SYNTAX_COMMA
| M4_SYNTAX_CLOSE
)
1412 && *comments
->str1
!= *src_chain
->u
.u_a
.quotes
->str1
));
1413 ch
= peek_char (context
, true);
1414 if (!m4_has_syntax (M4SYNTAX
, ch
, M4_SYNTAX_COMMA
| M4_SYNTAX_CLOSE
))
1416 isp
->u
.u_c
.chain
= src_chain
;
1417 src_chain
->u
.u_a
.index
= m4_arg_argc (chain
->u
.u_a
.argv
) - 1;
1418 src_chain
->u
.u_a
.comma
= true;
1419 chain
->u
.u_a
.skip_last
= true;
1420 m4__arg_adjust_refcount (context
, chain
->u
.u_a
.argv
, true);
1425 /* Low level input is done a character at a time. The function
1426 next_char () is used to read and advance the input to the next
1427 character. If ALLOW_QUOTE, and the current input matches the
1428 current quote age, return CHAR_QUOTE and leave consumption of data
1429 for append_quote_token; otherwise, if ALLOW_ARGV, and the current
1430 input matches an argv reference with the correct quoting, return
1431 CHAR_ARGV and leave consumption of data for init_argv_symbol. If
1432 ALLOW_UNGET, then pop input to avoid returning CHAR_RETRY, and
1433 ensure that unget_input can safely be called next. */
1435 next_char (m4
*context
, bool allow_quote
, bool allow_argv
, bool allow_unget
)
1443 m4_set_current_file (context
, isp
->file
);
1444 m4_set_current_line (context
, isp
->line
);
1445 input_change
= false;
1448 assert (isp
->funcs
->read_func
);
1449 while (((ch
= isp
->funcs
->read_func (isp
, context
, allow_quote
,
1450 allow_argv
, allow_unget
))
1454 /* if (!IS_IGNORE (ch)) */
1458 /* End of input source --- pop one level. */
1459 pop_input (context
, true);
1463 /* The function peek_char () is used to look at the next character in
1464 the input stream. At any given time, it reads from the input_block
1465 on the top of the current input stack. If ALLOW_ARGV, then return
1466 CHAR_ARGV if an entire $@ reference is available for use. */
1468 peek_char (m4
*context
, bool allow_argv
)
1471 m4_input_block
*block
= isp
;
1475 assert (block
->funcs
->peek_func
);
1476 ch
= block
->funcs
->peek_func (block
, context
, allow_argv
);
1477 if (ch
!= CHAR_RETRY
)
1479 /* if (IS_IGNORE (ch)) */
1480 /* return next_char (context, false, true, false); */
1484 block
= block
->prev
;
1488 /* The function unget_input () puts back a character on the input
1489 stack, using an existing input_block if possible. This is not safe
1490 to call except immediately after next_char(context, aq, aa, true). */
1492 unget_input (int ch
)
1494 assert (isp
->funcs
->unget_func
!= NULL
);
1495 isp
->funcs
->unget_func (isp
, ch
);
1498 /* Return a pointer to the available bytes of the current input block,
1499 and set *LEN to the length of the result. If ALLOW_QUOTE, do not
1500 return a buffer for a quoted string. If the result does not fit in
1501 a char (for example, CHAR_EOF or CHAR_QUOTE), or if there is no
1502 readahead data available, return NULL, and the caller must fall
1503 back to next_char(). The buffer is only valid until the next
1504 consume_buffer() or next_char(). */
1506 next_buffer (m4
*context
, size_t *len
, bool allow_quote
)
1514 m4_set_current_file (context
, isp
->file
);
1515 m4_set_current_line (context
, isp
->line
);
1516 input_change
= false;
1519 assert (isp
->funcs
->buffer_func
);
1520 buf
= isp
->funcs
->buffer_func (isp
, context
, len
, allow_quote
);
1521 if (buf
!= buffer_retry
)
1523 /* End of input source --- pop one level. */
1524 pop_input (context
, true);
1528 /* Consume LEN bytes from the current input block, as though by LEN
1529 calls to next_char(). LEN must be less than or equal to the
1530 previous length returned by a successful call to next_buffer(). */
1532 consume_buffer (m4
*context
, size_t len
)
1534 assert (isp
&& !input_change
);
1537 assert (isp
->funcs
->consume_func
);
1538 isp
->funcs
->consume_func (isp
, context
, len
);
1542 /* skip_line () simply discards all immediately following characters,
1543 up to the first newline. It is only used from m4_dnl (). Report
1544 errors on behalf of CALLER. */
1546 m4_skip_line (m4
*context
, const m4_call_info
*caller
)
1553 const char *buffer
= next_buffer (context
, &len
, false);
1556 const char *p
= (char *) memchr (buffer
, '\n', len
);
1559 consume_buffer (context
, p
- buffer
+ 1);
1563 consume_buffer (context
, len
);
1567 ch
= next_char (context
, false, false, false);
1568 if (ch
== CHAR_EOF
|| ch
== '\n')
1573 m4_warn (context
, 0, caller
, _("end of file treated as newline"));
1577 /* If the string S of length LEN matches the next characters of the
1578 input stream, return true. If CONSUME, the first byte has already
1579 been matched. If a match is found and CONSUME is true, the input
1580 is discarded; otherwise any characters read are pushed back again.
1581 The function is used only when multicharacter quotes or comment
1582 delimiters are used.
1584 All strings herein should be unsigned. Otherwise sign-extension
1585 of individual chars might break quotes with 8-bit chars in it.
1587 FIXME - when matching multiquotes that cross file boundaries, we do
1588 not properly restore the current input file and line when we
1589 restore unconsumed characters. */
1591 match_input (m4
*context
, const char *s
, size_t len
, bool consume
)
1593 int n
; /* number of characters matched */
1594 int ch
; /* input character */
1597 bool result
= false;
1605 /* Try a buffer match first. */
1607 t
= next_buffer (context
, &buf_len
, false);
1608 if (t
&& len
<= buf_len
&& memcmp (s
, t
, len
) == 0)
1611 consume_buffer (context
, len
);
1614 /* Fall back on byte matching. */
1615 ch
= peek_char (context
, false);
1616 if (ch
!= to_uchar (*s
))
1622 next_char (context
, false, false, false);
1623 return true; /* short match */
1626 next_char (context
, false, false, false);
1627 for (n
= 1, t
= s
++; (ch
= peek_char (context
, false)) == to_uchar (*s
++); )
1629 next_char (context
, false, false, false);
1631 if (--len
== 1) /* long match */
1640 /* Failed or shouldn't consume, push back input. */
1641 st
= m4_push_string_init (context
, m4_get_current_file (context
),
1642 m4_get_current_line (context
));
1643 obstack_grow (st
, t
, n
);
1644 m4_push_string_finish ();
1648 /* Check whether the current input matches a delimiter, which either
1649 belongs to syntax category CAT or matches the string S of length
1650 LEN. The first character is handled inline for speed, and S[LEN]
1651 must be safe to dereference (it is faster to do character
1652 comparison prior to length checks). This improves efficiency for
1653 the common case of single character quotes and comment delimiters,
1654 while being safe for disabled delimiters as well as longer
1655 delimiters. If CONSUME, then CH is the result of next_char, and a
1656 successful match will discard the matched string. Otherwise, CH is
1657 the result of peek_char, and the input stream is effectively
1659 #define MATCH(C, ch, cat, s, len, consume) \
1660 (m4_has_syntax (m4_get_syntax_table (C), ch, cat) \
1661 || (to_uchar ((s)[0]) == (ch) \
1662 && ((len) >> 1 ? match_input (C, s, len, consume) : (len))))
1664 /* While the current input character has the given SYNTAX, append it
1665 to OBS. Take care not to pop input source unless the next source
1666 would continue the chain. Return true if the chain ended with
1669 consume_syntax (m4
*context
, m4_obstack
*obs
, unsigned int syntax
)
1672 bool allow
= m4__safe_quotes (M4SYNTAX
);
1676 /* Start with a buffer search. */
1678 const char *buffer
= next_buffer (context
, &len
, allow
);
1681 const char *p
= buffer
;
1682 while (len
&& m4_has_syntax (M4SYNTAX
, *p
, syntax
))
1687 obstack_grow (obs
, buffer
, p
- buffer
);
1688 consume_buffer (context
, p
- buffer
);
1692 /* Fall back to byte-wise search. It is safe to call next_char
1693 without first checking peek_char, except at input source
1694 boundaries, which we detect by CHAR_RETRY. */
1695 ch
= next_char (context
, allow
, allow
, true);
1696 if (ch
< CHAR_EOF
&& m4_has_syntax (M4SYNTAX
, ch
, syntax
))
1698 obstack_1grow (obs
, ch
);
1701 if (ch
== CHAR_RETRY
|| ch
== CHAR_QUOTE
|| ch
== CHAR_ARGV
)
1703 ch
= peek_char (context
, false);
1704 /* We exploit the fact that CHAR_EOF, CHAR_BUILTIN,
1705 CHAR_QUOTE, and CHAR_ARGV do not satisfy any syntax
1707 if (m4_has_syntax (M4SYNTAX
, ch
, syntax
))
1709 assert (ch
< CHAR_EOF
);
1710 obstack_1grow (obs
, ch
);
1711 next_char (context
, false, false, false);
1714 return ch
== CHAR_EOF
;
1722 /* Initialize input stacks. */
1724 m4_input_init (m4
*context
)
1726 obstack_init (&file_names
);
1727 m4_set_current_file (context
, NULL
);
1728 m4_set_current_line (context
, 0);
1730 current_input
= (m4_obstack
*) xmalloc (sizeof *current_input
);
1731 obstack_init (current_input
);
1732 wrapup_stack
= (m4_obstack
*) xmalloc (sizeof *wrapup_stack
);
1733 obstack_init (wrapup_stack
);
1735 /* Allocate an object in the current chunk, so that obstack_free
1736 will always work even if the first token parsed spills to a new
1738 obstack_init (&token_stack
);
1739 token_bottom
= obstack_finish (&token_stack
);
1745 start_of_input_line
= false;
1748 /* Free memory used by the input engine. */
1750 m4_input_exit (void)
1752 assert (!current_input
&& isp
== &input_eof
);
1753 assert (!wrapup_stack
&& wsp
== &input_eof
);
1754 obstack_free (&file_names
, NULL
);
1755 obstack_free (&token_stack
, NULL
);
1759 /* Parse and return a single token from the input stream, constructed
1760 into TOKEN. See m4__token_type for the valid return types, along
1761 with a description of what TOKEN will contain. If LINE is not
1762 NULL, set *LINE to the line number where the token starts. If OBS,
1763 expand safe tokens (strings and comments) directly into OBS rather
1764 than in a temporary staging area. If ALLOW_ARGV, OBS must be
1765 non-NULL, and an entire series of arguments can be returned if a $@
1766 reference is encountered. Report errors (unterminated comments or
1767 strings) on behalf of CALLER, if non-NULL.
1769 If OBS is NULL or the token expansion is unknown, the token text is
1770 collected on the obstack token_stack, which never contains more
1771 than one token text at a time. The storage pointed to by the
1772 fields in TOKEN is therefore subject to change the next time
1773 m4__next_token () is called. */
1775 m4__next_token (m4
*context
, m4_symbol_value
*token
, int *line
,
1776 m4_obstack
*obs
, bool allow_argv
, const m4_call_info
*caller
)
1780 m4__token_type type
;
1781 const char *file
= NULL
;
1783 /* The obstack where token data is stored. Generally token_stack,
1784 for tokens where argument collection might not use the literal
1785 token. But for comments and strings, we can output directly into
1786 the argument collection obstack OBS, if provided. */
1787 m4_obstack
*obs_safe
= &token_stack
;
1789 assert (next
== NULL
);
1790 memset (token
, '\0', sizeof *token
);
1792 obstack_free (&token_stack
, token_bottom
);
1794 /* Must consume an input character. */
1795 ch
= next_char (context
, false, allow_argv
&& m4__quote_age (M4SYNTAX
),
1799 *line
= m4_get_current_line (context
);
1800 file
= m4_get_current_file (context
);
1802 if (ch
== CHAR_EOF
) /* EOF */
1805 xfprintf (stderr
, "next_token -> EOF\n");
1807 return M4_TOKEN_EOF
;
1810 if (ch
== CHAR_BUILTIN
) /* BUILTIN TOKEN */
1812 init_builtin_token (context
, obs
, token
);
1814 m4_print_token (context
, "next_token", M4_TOKEN_MACDEF
, token
);
1816 return M4_TOKEN_MACDEF
;
1818 if (ch
== CHAR_ARGV
)
1820 init_argv_symbol (context
, obs
, token
);
1822 m4_print_token (context
, "next_token", M4_TOKEN_ARGV
, token
);
1824 return M4_TOKEN_ARGV
;
1827 if (m4_has_syntax (M4SYNTAX
, ch
, M4_SYNTAX_ESCAPE
))
1828 { /* ESCAPED WORD */
1829 obstack_1grow (&token_stack
, ch
);
1830 if ((ch
= next_char (context
, false, false, false)) < CHAR_EOF
)
1832 obstack_1grow (&token_stack
, ch
);
1833 if (m4_has_syntax (M4SYNTAX
, ch
, M4_SYNTAX_ALPHA
))
1834 consume_syntax (context
, &token_stack
,
1835 M4_SYNTAX_ALPHA
| M4_SYNTAX_NUM
);
1836 type
= M4_TOKEN_WORD
;
1839 type
= M4_TOKEN_SIMPLE
; /* escape before eof */
1841 else if (m4_has_syntax (M4SYNTAX
, ch
, M4_SYNTAX_ALPHA
))
1843 type
= (m4_is_syntax_macro_escaped (M4SYNTAX
)
1844 ? M4_TOKEN_STRING
: M4_TOKEN_WORD
);
1845 if (type
== M4_TOKEN_STRING
&& obs
)
1847 obstack_1grow (obs_safe
, ch
);
1848 consume_syntax (context
, obs_safe
, M4_SYNTAX_ALPHA
| M4_SYNTAX_NUM
);
1850 else if (MATCH (context
, ch
, M4_SYNTAX_LQUOTE
,
1851 context
->syntax
->quote
.str1
,
1852 context
->syntax
->quote
.len1
, true))
1853 { /* QUOTED STRING */
1857 type
= M4_TOKEN_STRING
;
1860 ch
= next_char (context
, obs
&& m4__quote_age (M4SYNTAX
), false,
1867 m4_set_current_file (context
, file
);
1868 m4_set_current_line (context
, *line
);
1870 m4_error (context
, EXIT_FAILURE
, 0, caller
,
1871 _("end of file in string"));
1873 if (ch
== CHAR_BUILTIN
)
1874 init_builtin_token (context
, obs
, obs
? token
: NULL
);
1875 else if (ch
== CHAR_QUOTE
)
1876 append_quote_token (context
, obs
, token
);
1877 else if (MATCH (context
, ch
, M4_SYNTAX_RQUOTE
,
1878 context
->syntax
->quote
.str2
,
1879 context
->syntax
->quote
.len2
, true))
1881 if (--quote_level
== 0)
1883 if (1 < context
->syntax
->quote
.len2
)
1884 obstack_grow (obs_safe
, context
->syntax
->quote
.str2
,
1885 context
->syntax
->quote
.len2
);
1887 obstack_1grow (obs_safe
, ch
);
1889 else if (MATCH (context
, ch
, M4_SYNTAX_LQUOTE
,
1890 context
->syntax
->quote
.str1
,
1891 context
->syntax
->quote
.len1
, true))
1894 if (1 < context
->syntax
->quote
.len1
)
1895 obstack_grow (obs_safe
, context
->syntax
->quote
.str1
,
1896 context
->syntax
->quote
.len1
);
1898 obstack_1grow (obs_safe
, ch
);
1901 obstack_1grow (obs_safe
, ch
);
1904 else if (MATCH (context
, ch
, M4_SYNTAX_BCOMM
,
1905 context
->syntax
->comm
.str1
,
1906 context
->syntax
->comm
.len1
, true))
1908 if (obs
&& !m4_get_discard_comments_opt (context
))
1910 if (1 < context
->syntax
->comm
.len1
)
1911 obstack_grow (obs_safe
, context
->syntax
->comm
.str1
,
1912 context
->syntax
->comm
.len1
);
1914 obstack_1grow (obs_safe
, ch
);
1917 ch
= next_char (context
, false, false, false);
1923 m4_set_current_file (context
, file
);
1924 m4_set_current_line (context
, *line
);
1926 m4_error (context
, EXIT_FAILURE
, 0, caller
,
1927 _("end of file in comment"));
1929 if (ch
== CHAR_BUILTIN
)
1931 init_builtin_token (context
, NULL
, NULL
);
1934 if (MATCH (context
, ch
, M4_SYNTAX_ECOMM
,
1935 context
->syntax
->comm
.str2
,
1936 context
->syntax
->comm
.len2
, true))
1938 if (1 < context
->syntax
->comm
.len2
)
1939 obstack_grow (obs_safe
, context
->syntax
->comm
.str2
,
1940 context
->syntax
->comm
.len2
);
1942 obstack_1grow (obs_safe
, ch
);
1945 assert (ch
< CHAR_EOF
);
1946 obstack_1grow (obs_safe
, ch
);
1948 type
= (m4_get_discard_comments_opt (context
)
1949 ? M4_TOKEN_NONE
: M4_TOKEN_COMMENT
);
1951 else if (m4_has_syntax (M4SYNTAX
, ch
, M4_SYNTAX_ACTIVE
))
1952 { /* ACTIVE CHARACTER */
1953 obstack_1grow (&token_stack
, ch
);
1954 type
= M4_TOKEN_WORD
;
1956 else if (m4_has_syntax (M4SYNTAX
, ch
, M4_SYNTAX_OPEN
))
1957 { /* OPEN PARENTHESIS */
1958 obstack_1grow (&token_stack
, ch
);
1959 type
= M4_TOKEN_OPEN
;
1961 else if (m4_has_syntax (M4SYNTAX
, ch
, M4_SYNTAX_COMMA
))
1963 obstack_1grow (&token_stack
, ch
);
1964 type
= M4_TOKEN_COMMA
;
1966 else if (m4_has_syntax (M4SYNTAX
, ch
, M4_SYNTAX_CLOSE
))
1967 { /* CLOSE PARENTHESIS */
1968 obstack_1grow (&token_stack
, ch
);
1969 type
= M4_TOKEN_CLOSE
;
1972 { /* EVERYTHING ELSE */
1973 assert (ch
< CHAR_EOF
);
1974 obstack_1grow (&token_stack
, ch
);
1975 if (m4_has_syntax (M4SYNTAX
, ch
,
1976 (M4_SYNTAX_OTHER
| M4_SYNTAX_NUM
| M4_SYNTAX_DOLLAR
1977 | M4_SYNTAX_LBRACE
| M4_SYNTAX_RBRACE
)))
1982 obstack_1grow (obs
, ch
);
1984 if (m4__safe_quotes (M4SYNTAX
))
1985 consume_syntax (context
, obs_safe
,
1986 (M4_SYNTAX_OTHER
| M4_SYNTAX_NUM
1987 | M4_SYNTAX_DOLLAR
| M4_SYNTAX_LBRACE
1988 | M4_SYNTAX_RBRACE
));
1989 type
= M4_TOKEN_STRING
;
1991 else if (m4_has_syntax (M4SYNTAX
, ch
, M4_SYNTAX_SPACE
))
1993 /* Coalescing newlines when interactive or when synclines
1994 are enabled is wrong. */
1995 if (!m4_get_interactive_opt (context
)
1996 && !m4_get_syncoutput_opt (context
)
1997 && m4__safe_quotes (M4SYNTAX
))
1998 consume_syntax (context
, &token_stack
, M4_SYNTAX_SPACE
);
1999 type
= M4_TOKEN_SPACE
;
2002 type
= M4_TOKEN_SIMPLE
;
2004 } while (type
== M4_TOKEN_NONE
);
2006 if (token
->type
== M4_SYMBOL_VOID
)
2008 if (obs_safe
!= obs
)
2010 len
= obstack_object_size (&token_stack
);
2011 obstack_1grow (&token_stack
, '\0');
2013 m4_set_symbol_value_text (token
, obstack_finish (&token_stack
), len
,
2014 m4__quote_age (M4SYNTAX
));
2017 assert (type
== M4_TOKEN_STRING
|| type
== M4_TOKEN_COMMENT
);
2020 assert (token
->type
== M4_SYMBOL_COMP
2021 && (type
== M4_TOKEN_STRING
|| type
== M4_TOKEN_COMMENT
));
2022 VALUE_MAX_ARGS (token
) = -1;
2025 if (token
->type
== M4_SYMBOL_VOID
)
2027 len
= obstack_object_size (&token_stack
);
2028 obstack_1grow (&token_stack
, '\0');
2030 m4_set_symbol_value_text (token
, obstack_finish (&token_stack
), len
,
2031 m4__quote_age (M4SYNTAX
));
2034 m4_print_token (context
, "next_token", type
, token
);
2040 /* Peek at the next token in the input stream to see if it is an open
2041 parenthesis. It is possible that what is peeked at may change as a
2042 result of changequote (or friends). This honors multi-character
2043 comments and quotes, just as next_token does. */
2045 m4__next_token_is_open (m4
*context
)
2047 int ch
= peek_char (context
, false);
2049 if (ch
== CHAR_EOF
|| ch
== CHAR_BUILTIN
2050 || m4_has_syntax (M4SYNTAX
, ch
, (M4_SYNTAX_BCOMM
| M4_SYNTAX_ESCAPE
2051 | M4_SYNTAX_ALPHA
| M4_SYNTAX_LQUOTE
2052 | M4_SYNTAX_ACTIVE
))
2053 || (MATCH (context
, ch
, M4_SYNTAX_BCOMM
, context
->syntax
->comm
.str1
,
2054 context
->syntax
->comm
.len1
, false))
2055 || (MATCH (context
, ch
, M4_SYNTAX_LQUOTE
, context
->syntax
->quote
.str1
,
2056 context
->syntax
->quote
.len1
, false)))
2058 return m4_has_syntax (M4SYNTAX
, ch
, M4_SYNTAX_OPEN
);
2065 m4_print_token (m4
*context
, const char *s
, m4__token_type type
,
2066 m4_symbol_value
*token
)
2073 xfprintf (stderr
, "%s: ", s
);
2077 fputs ("eof", stderr
);
2081 fputs ("none", stderr
);
2084 case M4_TOKEN_STRING
:
2085 fputs ("string\t", stderr
);
2087 case M4_TOKEN_COMMENT
:
2088 fputs ("comment\t", stderr
);
2090 case M4_TOKEN_SPACE
:
2091 fputs ("space\t", stderr
);
2094 fputs ("word\t", stderr
);
2097 fputs ("open\t", stderr
);
2099 case M4_TOKEN_COMMA
:
2100 fputs ("comma\t", stderr
);
2102 case M4_TOKEN_CLOSE
:
2103 fputs ("close\t", stderr
);
2105 case M4_TOKEN_SIMPLE
:
2106 fputs ("simple\t", stderr
);
2108 case M4_TOKEN_MACDEF
:
2109 fputs ("builtin\t", stderr
);
2112 fputs ("argv\t", stderr
);
2119 obstack_init (&obs
);
2120 m4__symbol_value_print (context
, token
, &obs
, NULL
, false, NULL
, NULL
,
2122 len
= obstack_object_size (&obs
);
2123 xfprintf (stderr
, "%s\n", quotearg_style_mem (c_maybe_quoting_style
,
2124 obstack_finish (&obs
),
2126 obstack_free (&obs
, NULL
);
2129 fputc ('\n', stderr
);
2132 #endif /* DEBUG_INPUT */