maint: summarize highlights of 1.4.18 release
[m4/ericb.git] / m4 / input.c
blobedce12e40b445949772743bfc9d624be83561ea9
1 /* GNU m4 -- A simple macro processor
2 Copyright (C) 1989-1994, 2006-2010, 2013-2014, 2017 Free Software
3 Foundation, Inc.
5 This file is part of GNU M4.
7 GNU M4 is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 GNU M4 is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 /* Handling of different input sources, and lexical analysis. */
23 #include <config.h>
25 #include "m4private.h"
27 #include "freadptr.h"
28 #include "freadseek.h"
29 #include "memchr2.h"
31 /* Define this to see runtime debug info. Implied by DEBUG. */
32 /*#define DEBUG_INPUT */
34 /* Maximum number of bytes where it is more efficient to inline the
35 reference as a string than it is to track reference bookkeeping for
36 those bytes. */
37 #define INPUT_INLINE_THRESHOLD 16
40 Unread input can be either files that should be read (from the
41 command line or by include/sinclude), strings which should be
42 rescanned (normal macro expansion text), or quoted builtin
43 definitions (as returned by the builtin "defn"). Unread input is
44 organized in a stack, implemented with an obstack. Each input
45 source is described by a "struct m4_input_block". The obstack is
46 "input_stack". The top of the input stack is "isp".
48 Each input_block has an associated struct input_funcs, which is a
49 vtable that defines polymorphic functions for peeking, reading,
50 unget, cleanup, and printing in trace output. Getting a single
51 character at a time is inefficient, so there are also functions for
52 accessing the readahead buffer and consuming bulk input. All input
53 is done through the function pointers of the input_funcs on the
54 given input_block, and all characters are unsigned, to distinguish
55 between stdio EOF and between special sentinel characters. When a
56 input_block is exhausted, its reader returns CHAR_RETRY which
57 causes the input_block to be popped from the input_stack.
59 The macro "m4wrap" places the text to be saved on another input
60 stack, on the obstack "wrapup_stack", whose top is "wsp". When EOF
61 is seen on normal input (eg, when "current_input" is empty), input
62 is switched over to "wrapup_stack", and the original
63 "current_input" is freed. A new stack is allocated for
64 "wrapup_stack", which will accept any text produced by calls to
65 "m4wrap" from within the wrapped text. This process of shuffling
66 "wrapup_stack" to "current_input" can continue indefinitely, even
67 generating infinite loops (e.g. "define(`f',`m4wrap(`f')')f"),
68 without memory leaks. Adding wrapped data is done through
69 m4__push_wrapup_init/m4__push_wrapup_finish().
71 Pushing new input on the input stack is done by m4_push_file(), the
72 conceptual m4_push_string(), and m4_push_builtin() (for builtin
73 definitions). As an optimization, since most macro expansions
74 result in strings, m4_push_string() is split in two parts,
75 push_string_init(), which returns a pointer to the obstack for
76 growing the output, and push_string_finish(), which returns a
77 pointer to the finished input_block. Thus, instead of creating a
78 new input block for every character pushed, macro expansion need
79 only add text to the top of the obstack. However, it is not safe
80 to alter the input stack while a string is being constructed. This
81 means the input engine is one of two states: consuming input, or
82 collecting a macro's expansion. The input_block *next is used to
83 manage the coordination between the different push routines.
85 Normally, input sources behave in LIFO order, resembling a stack.
86 But thanks to the defn and m4wrap macros, when collecting the
87 expansion of a macro, it is possible that we must intermix multiple
88 input blocks in FIFO order. Therefore, when collecting an
89 expansion, a meta-input block is formed which will visit its
90 children in FIFO order, without losing data when the obstack is
91 cleared in LIFO order.
93 The current file and line number are stored in the context, for use
94 by the error handling functions in utility.c. When collecting a
95 macro's expansion, these variables can be temporarily inconsistent
96 in order to provide better error message locations, but they must
97 be restored before further parsing takes place. Each input block
98 maintains its own notion of the current file and line, so swapping
99 between input blocks must update the context accordingly. */
101 typedef struct m4_input_block m4_input_block;
103 static int file_peek (m4_input_block *, m4 *, bool);
104 static int file_read (m4_input_block *, m4 *, bool, bool,
105 bool);
106 static void file_unget (m4_input_block *, int);
107 static bool file_clean (m4_input_block *, m4 *, bool);
108 static void file_print (m4_input_block *, m4 *, m4_obstack *,
109 int);
110 static const char * file_buffer (m4_input_block *, m4 *, size_t *,
111 bool);
112 static void file_consume (m4_input_block *, m4 *, size_t);
113 static int string_peek (m4_input_block *, m4 *, bool);
114 static int string_read (m4_input_block *, m4 *, bool, bool,
115 bool);
116 static void string_unget (m4_input_block *, int);
117 static void string_print (m4_input_block *, m4 *, m4_obstack *,
118 int);
119 static const char * string_buffer (m4_input_block *, m4 *, size_t *,
120 bool);
121 static void string_consume (m4_input_block *, m4 *, size_t);
122 static int composite_peek (m4_input_block *, m4 *, bool);
123 static int composite_read (m4_input_block *, m4 *, bool, bool,
124 bool);
125 static void composite_unget (m4_input_block *, int);
126 static bool composite_clean (m4_input_block *, m4 *, bool);
127 static void composite_print (m4_input_block *, m4 *, m4_obstack *,
128 int);
129 static const char * composite_buffer (m4_input_block *, m4 *, size_t *,
130 bool);
131 static void composite_consume (m4_input_block *, m4 *, size_t);
132 static int eof_peek (m4_input_block *, m4 *, bool);
133 static int eof_read (m4_input_block *, m4 *, bool, bool,
134 bool);
135 static void eof_unget (m4_input_block *, int);
136 static const char * eof_buffer (m4_input_block *, m4 *, size_t *,
137 bool);
139 static void init_builtin_token (m4 *, m4_obstack *,
140 m4_symbol_value *);
141 static void append_quote_token (m4 *, m4_obstack *,
142 m4_symbol_value *);
143 static bool match_input (m4 *, const char *, size_t, bool);
144 static int next_char (m4 *, bool, bool, bool);
145 static int peek_char (m4 *, bool);
146 static bool pop_input (m4 *, bool);
147 static void unget_input (int);
148 static const char * next_buffer (m4 *, size_t *, bool);
149 static void consume_buffer (m4 *, size_t);
150 static bool consume_syntax (m4 *, m4_obstack *, unsigned int);
152 #ifdef DEBUG_INPUT
153 # include "quotearg.h"
155 static int m4_print_token (m4 *, const char *, m4__token_type,
156 m4_symbol_value *);
157 #endif
159 /* Vtable of callbacks for each input method. */
160 struct input_funcs
162 /* Peek at input, return an unsigned char, CHAR_BUILTIN if it is a
163 builtin, or CHAR_RETRY if none available. If ALLOW_ARGV, then
164 CHAR_ARGV may be returned. */
165 int (*peek_func) (m4_input_block *, m4 *, bool);
167 /* Read input, return an unsigned char, CHAR_BUILTIN if it is a
168 builtin, or CHAR_RETRY if none available. If ALLOW_QUOTE, then
169 CHAR_QUOTE may be returned. If ALLOW_ARGV, then CHAR_ARGV may be
170 returned. If ALLOW_UNGET, then ensure that the next unget_func
171 will work with the returned character. */
172 int (*read_func) (m4_input_block *, m4 *, bool allow_quote,
173 bool allow_argv, bool allow_unget);
175 /* Unread a single unsigned character or CHAR_BUILTIN, must be the
176 same character previously read by read_func. */
177 void (*unget_func) (m4_input_block *, int);
179 /* Optional function to perform cleanup at end of input. If
180 CLEANUP, it is safe to perform non-recoverable cleanup actions.
181 Return true only if no cleanup remains to be done. */
182 bool (*clean_func) (m4_input_block *, m4 *, bool cleanup);
184 /* Add a representation of the input block to the obstack, for use
185 in trace expansion output. */
186 void (*print_func) (m4_input_block *, m4 *, m4_obstack *, int);
188 /* Return a pointer to the current readahead buffer, and set LEN to
189 the length of the result. If ALLOW_QUOTE, do not return a buffer
190 for a quoted string. If there is data, but the result of
191 next_char() would not fit in a char (for example, CHAR_EOF or
192 CHAR_QUOTE) or there is no readahead data available, return NULL,
193 and the caller must use next_char(). If there is no more data,
194 return buffer_retry. The buffer is only valid until the next
195 consume_buffer() or next_char(). */
196 const char *(*buffer_func) (m4_input_block *, m4 *, size_t *, bool);
198 /* Optional function to consume data from a readahead buffer
199 previously obtained through buffer_func. */
200 void (*consume_func) (m4_input_block *, m4 *, size_t);
203 /* A block of input to be scanned. */
204 struct m4_input_block
206 m4_input_block *prev; /* Previous input_block on the input stack. */
207 struct input_funcs *funcs; /* Virtual functions of this input_block. */
208 const char *file; /* File where this input is from. */
209 int line; /* Line where this input is from. */
211 union
213 struct
215 char *str; /* String value. */
216 size_t len; /* Remaining length. */
218 u_s; /* See string_funcs. */
219 struct
221 FILE *fp; /* Input file handle. */
222 bool_bitfield end : 1; /* True iff peek returned EOF. */
223 bool_bitfield close : 1; /* True to close file on pop. */
224 bool_bitfield line_start : 1; /* Saved start_of_input_line state. */
226 u_f; /* See file_funcs. */
227 struct
229 m4__symbol_chain *chain; /* Current link in chain. */
230 m4__symbol_chain *end; /* Last link in chain. */
232 u_c; /* See composite_funcs. */
238 /* Obstack for storing individual tokens. */
239 static m4_obstack token_stack;
241 /* Obstack for storing input file names. */
242 static m4_obstack file_names;
244 /* Wrapup input stack. */
245 static m4_obstack *wrapup_stack;
247 /* Current stack, from input or wrapup. */
248 static m4_obstack *current_input;
250 /* Bottom of token_stack, for obstack_free. */
251 static void *token_bottom;
253 /* Pointer to top of current_input, never NULL. */
254 static m4_input_block *isp;
256 /* Pointer to top of wrapup_stack, never NULL. */
257 static m4_input_block *wsp;
259 /* Auxiliary for handling split m4_push_string (), NULL when not
260 pushing text for rescanning. */
261 static m4_input_block *next;
263 /* Flag for next_char () to increment current_line. */
264 static bool start_of_input_line;
266 /* Flag for next_char () to recognize change in input block. */
267 static bool input_change;
269 /* Vtable for handling input from files. */
270 static struct input_funcs file_funcs = {
271 file_peek, file_read, file_unget, file_clean, file_print, file_buffer,
272 file_consume
275 /* Vtable for handling input from strings. */
276 static struct input_funcs string_funcs = {
277 string_peek, string_read, string_unget, NULL, string_print, string_buffer,
278 string_consume
281 /* Vtable for handling input from composite chains. */
282 static struct input_funcs composite_funcs = {
283 composite_peek, composite_read, composite_unget, composite_clean,
284 composite_print, composite_buffer, composite_consume
287 /* Vtable for recognizing end of input. */
288 static struct input_funcs eof_funcs = {
289 eof_peek, eof_read, eof_unget, NULL, NULL, eof_buffer, NULL
292 /* Marker at end of an input stack. */
293 static m4_input_block input_eof = { NULL, &eof_funcs, "", 0 };
295 /* Marker for buffer_func when current block has no more data. */
296 static const char buffer_retry[1];
299 /* Input files, from command line or [s]include. */
300 static int
301 file_peek (m4_input_block *me, m4 *context M4_GNUC_UNUSED,
302 bool allow_argv M4_GNUC_UNUSED)
304 int ch;
306 ch = me->u.u_f.end ? EOF : getc (me->u.u_f.fp);
307 if (ch == EOF)
309 me->u.u_f.end = true;
310 return CHAR_RETRY;
313 ungetc (ch, me->u.u_f.fp);
314 return ch;
317 static int
318 file_read (m4_input_block *me, m4 *context, bool allow_quote M4_GNUC_UNUSED,
319 bool allow_argv M4_GNUC_UNUSED, bool allow_unget M4_GNUC_UNUSED)
321 int ch;
323 if (start_of_input_line)
325 start_of_input_line = false;
326 m4_set_current_line (context, ++me->line);
329 /* If stdin is a terminal, calling getc after peek_char already
330 called it would make the user have to hit ^D twice to quit. */
331 ch = me->u.u_f.end ? EOF : getc (me->u.u_f.fp);
332 if (ch == EOF)
334 me->u.u_f.end = true;
335 return CHAR_RETRY;
338 if (ch == '\n')
339 start_of_input_line = true;
340 return ch;
343 static void
344 file_unget (m4_input_block *me, int ch)
346 assert (ch < CHAR_EOF);
347 if (ungetc (ch, me->u.u_f.fp) < 0)
349 assert (!"INTERNAL ERROR: failed ungetc!");
350 abort (); /* ungetc should not be called without a previous read. */
352 me->u.u_f.end = false;
353 if (ch == '\n')
354 start_of_input_line = false;
357 static bool
358 file_clean (m4_input_block *me, m4 *context, bool cleanup)
360 if (!cleanup)
361 return false;
362 if (me->prev != &input_eof)
363 m4_debug_message (context, M4_DEBUG_TRACE_INPUT,
364 _("input reverted to %s, line %d"),
365 me->prev->file, me->prev->line);
366 else
367 m4_debug_message (context, M4_DEBUG_TRACE_INPUT, _("input exhausted"));
369 if (ferror (me->u.u_f.fp))
371 m4_error (context, 0, 0, NULL, _("error reading %s"),
372 quotearg_style (locale_quoting_style, me->file));
373 if (me->u.u_f.close)
374 fclose (me->u.u_f.fp);
376 else if (me->u.u_f.close && fclose (me->u.u_f.fp) == EOF)
377 m4_error (context, 0, errno, NULL, _("error reading %s"),
378 quotearg_style (locale_quoting_style, me->file));
379 start_of_input_line = me->u.u_f.line_start;
380 m4_set_output_line (context, -1);
381 return true;
384 static void
385 file_print (m4_input_block *me, m4 *context M4_GNUC_UNUSED, m4_obstack *obs,
386 int debug_level M4_GNUC_UNUSED)
388 const char *text = me->file;
389 assert (obstack_object_size (current_input) == 0);
390 obstack_grow (obs, "<file: ", strlen ("<file: "));
391 obstack_grow (obs, text, strlen (text));
392 obstack_1grow (obs, '>');
395 static const char *
396 file_buffer (m4_input_block *me, m4 *context M4_GNUC_UNUSED, size_t *len,
397 bool allow_quote M4_GNUC_UNUSED)
399 if (start_of_input_line)
401 start_of_input_line = false;
402 m4_set_current_line (context, ++me->line);
404 if (me->u.u_f.end)
405 return buffer_retry;
406 return freadptr (isp->u.u_f.fp, len);
409 static void
410 file_consume (m4_input_block *me, m4 *context, size_t len)
412 const char *buf;
413 const char *p;
414 size_t buf_len;
415 assert (!start_of_input_line);
416 buf = freadptr (me->u.u_f.fp, &buf_len);
417 assert (buf && len <= buf_len);
418 buf_len = 0;
419 while ((p = (char *) memchr (buf + buf_len, '\n', len - buf_len)))
421 if (p == buf + len - 1)
422 start_of_input_line = true;
423 else
424 m4_set_current_line (context, ++me->line);
425 buf_len = p - buf + 1;
427 if (freadseek (isp->u.u_f.fp, len) != 0)
428 assert (false);
431 /* m4_push_file () pushes an input file FP with name TITLE on the
432 input stack, saving the current file name and line number. If next
433 is non-NULL, this push invalidates a call to m4_push_string_init (),
434 whose storage is consequently released. If CLOSE, then close FP at
435 end of file.
437 file_read () manages line numbers for error messages, so they do not
438 get wrong due to lookahead. The token consisting of a newline
439 alone is taken as belonging to the line it ends, and the current
440 line number is not incremented until the next character is read. */
441 void
442 m4_push_file (m4 *context, FILE *fp, const char *title, bool close_file)
444 m4_input_block *i;
446 if (next != NULL)
448 obstack_free (current_input, next);
449 next = NULL;
452 m4_debug_message (context, M4_DEBUG_TRACE_INPUT, _("input read from %s"),
453 quotearg_style (locale_quoting_style, title));
455 i = (m4_input_block *) obstack_alloc (current_input, sizeof *i);
456 i->funcs = &file_funcs;
457 /* Save title on a separate obstack, so that wrapped text can refer
458 to it even after the file is popped. */
459 i->file = obstack_copy0 (&file_names, title, strlen (title));
460 i->line = 1;
462 i->u.u_f.fp = fp;
463 i->u.u_f.end = false;
464 i->u.u_f.close = close_file;
465 i->u.u_f.line_start = start_of_input_line;
467 m4_set_output_line (context, -1);
469 i->prev = isp;
470 isp = i;
471 input_change = true;
475 /* Handle string expansion text. */
476 static int
477 string_peek (m4_input_block *me, m4 *context M4_GNUC_UNUSED,
478 bool allow_argv M4_GNUC_UNUSED)
480 return me->u.u_s.len ? to_uchar (*me->u.u_s.str) : CHAR_RETRY;
483 static int
484 string_read (m4_input_block *me, m4 *context M4_GNUC_UNUSED,
485 bool allow_quote M4_GNUC_UNUSED, bool allow_argv M4_GNUC_UNUSED,
486 bool allow_unget M4_GNUC_UNUSED)
488 if (!me->u.u_s.len)
489 return CHAR_RETRY;
490 me->u.u_s.len--;
491 return to_uchar (*me->u.u_s.str++);
494 static void
495 string_unget (m4_input_block *me, int ch)
497 assert (ch < CHAR_EOF && to_uchar (me->u.u_s.str[-1]) == ch);
498 me->u.u_s.str--;
499 me->u.u_s.len++;
502 static void
503 string_print (m4_input_block *me, m4 *context, m4_obstack *obs,
504 int debug_level)
506 bool quote = (debug_level & M4_DEBUG_TRACE_QUOTE) != 0;
507 size_t arg_length = m4_get_max_debug_arg_length_opt (context);
509 assert (!me->u.u_s.len);
510 m4_shipout_string_trunc (obs, (char *) obstack_base (current_input),
511 obstack_object_size (current_input),
512 quote ? m4_get_syntax_quotes (M4SYNTAX) : NULL,
513 &arg_length);
516 static const char *
517 string_buffer (m4_input_block *me, m4 *context M4_GNUC_UNUSED, size_t *len,
518 bool allow_quote M4_GNUC_UNUSED)
520 if (!me->u.u_s.len)
521 return buffer_retry;
522 *len = me->u.u_s.len;
523 return me->u.u_s.str;
526 static void
527 string_consume (m4_input_block *me, m4 *context M4_GNUC_UNUSED, size_t len)
529 assert (len <= me->u.u_s.len);
530 me->u.u_s.len -= len;
531 me->u.u_s.str += len;
534 /* First half of m4_push_string (). The pointer next points to the
535 new input_block. FILE and LINE describe the location where the
536 macro starts that is generating the expansion (even if the location
537 has advanced in the meantime). Return the obstack that will
538 collect the expansion text. */
539 m4_obstack *
540 m4_push_string_init (m4 *context, const char *file, int line)
542 /* Free any memory occupied by completely parsed input. */
543 assert (!next);
544 while (pop_input (context, false));
546 /* Reserve the next location on the obstack. */
547 next = (m4_input_block *) obstack_alloc (current_input, sizeof *next);
548 next->funcs = &string_funcs;
549 next->file = file;
550 next->line = line;
551 next->u.u_s.len = 0;
553 return current_input;
556 /* This function allows gathering input from multiple locations,
557 rather than copying everything consecutively onto the input stack.
558 Must be called between push_string_init and push_string_finish.
560 Convert the current input block into a chain if it is not one
561 already, and add the contents of VALUE as a new link in the chain.
562 LEVEL describes the current expansion level, or SIZE_MAX if VALUE
563 is composite, its contents reside entirely on the current_input
564 stack, and VALUE lives in temporary storage. If VALUE is a simple
565 string, then it belongs to the current macro expansion. If VALUE
566 is composite, then each text link has a level of SIZE_MAX if it
567 belongs to the current macro expansion, otherwise it is a
568 back-reference where level tracks which stack it came from. The
569 resulting input block chain contains links with a level of SIZE_MAX
570 if the text belongs to the input stack, otherwise the level where
571 the back-reference comes from.
573 Return true only if a reference was created to the contents of
574 VALUE, in which case, LEVEL is less than SIZE_MAX and the lifetime
575 of VALUE and its contents must last as long as the input engine can
576 parse references from it. INUSE determines whether composite
577 symbols should favor creating back-references or copying text. */
578 bool
579 m4__push_symbol (m4 *context, m4_symbol_value *value, size_t level, bool inuse)
581 m4__symbol_chain *src_chain = NULL;
582 m4__symbol_chain *chain;
584 assert (next);
586 /* Speed consideration - for short enough symbols, the speed and
587 memory overhead of parsing another INPUT_CHAIN link outweighs the
588 time to inline the symbol text. But don't copy text if it
589 already lives on the obstack. */
590 if (m4_is_symbol_value_text (value))
592 assert (level < SIZE_MAX);
593 if (m4_get_symbol_value_len (value) <= INPUT_INLINE_THRESHOLD)
595 obstack_grow (current_input, m4_get_symbol_value_text (value),
596 m4_get_symbol_value_len (value));
597 return false;
600 else if (m4_is_symbol_value_func (value))
602 if (next->funcs == &string_funcs)
604 next->funcs = &composite_funcs;
605 next->u.u_c.chain = next->u.u_c.end = NULL;
607 m4__append_builtin (current_input, value->u.builtin, &next->u.u_c.chain,
608 &next->u.u_c.end);
609 return false;
611 else
613 /* For composite values, if argv is already in use, creating
614 additional references for long text segments is more
615 efficient in time. But if argv is not yet in use, and we
616 have a composite value, then the value must already contain a
617 back-reference, and memory usage is more efficient if we can
618 avoid using the current expand_macro, even if it means larger
619 copies. */
620 assert (value->type == M4_SYMBOL_COMP);
621 src_chain = value->u.u_c.chain;
622 while (level < SIZE_MAX && src_chain && src_chain->type == M4__CHAIN_STR
623 && (src_chain->u.u_s.len <= INPUT_INLINE_THRESHOLD
624 || (!inuse && src_chain->u.u_s.level == SIZE_MAX)))
626 obstack_grow (current_input, src_chain->u.u_s.str,
627 src_chain->u.u_s.len);
628 src_chain = src_chain->next;
630 if (!src_chain)
631 return false;
634 if (next->funcs == &string_funcs)
636 next->funcs = &composite_funcs;
637 next->u.u_c.chain = next->u.u_c.end = NULL;
639 m4__make_text_link (current_input, &next->u.u_c.chain, &next->u.u_c.end);
640 if (m4_is_symbol_value_text (value))
642 chain = (m4__symbol_chain *) obstack_alloc (current_input,
643 sizeof *chain);
644 if (next->u.u_c.end)
645 next->u.u_c.end->next = chain;
646 else
647 next->u.u_c.chain = chain;
648 next->u.u_c.end = chain;
649 chain->next = NULL;
650 chain->type = M4__CHAIN_STR;
651 chain->quote_age = m4_get_symbol_value_quote_age (value);
652 chain->u.u_s.str = m4_get_symbol_value_text (value);
653 chain->u.u_s.len = m4_get_symbol_value_len (value);
654 chain->u.u_s.level = level;
655 m4__adjust_refcount (context, level, true);
656 inuse = true;
658 while (src_chain)
660 if (src_chain->type == M4__CHAIN_FUNC)
662 m4__append_builtin (current_input, src_chain->u.builtin,
663 &next->u.u_c.chain, &next->u.u_c.end);
664 src_chain = src_chain->next;
665 continue;
667 if (level == SIZE_MAX)
669 /* Nothing to copy, since link already lives on obstack. */
670 assert (src_chain->type != M4__CHAIN_STR
671 || src_chain->u.u_s.level == SIZE_MAX);
672 chain = src_chain;
674 else
676 /* Allow inlining the final link with subsequent text. */
677 if (!src_chain->next && src_chain->type == M4__CHAIN_STR
678 && (src_chain->u.u_s.len <= INPUT_INLINE_THRESHOLD
679 || (!inuse && src_chain->u.u_s.level == SIZE_MAX)))
681 obstack_grow (current_input, src_chain->u.u_s.str,
682 src_chain->u.u_s.len);
683 break;
685 /* We must clone each link in the chain, since next_char
686 destructively modifies the chain it is parsing. */
687 chain = (m4__symbol_chain *) obstack_copy (current_input, src_chain,
688 sizeof *chain);
689 chain->next = NULL;
690 if (chain->type == M4__CHAIN_STR && chain->u.u_s.level == SIZE_MAX)
692 if (chain->u.u_s.len <= INPUT_INLINE_THRESHOLD || !inuse)
693 chain->u.u_s.str = (char *) obstack_copy (current_input,
694 chain->u.u_s.str,
695 chain->u.u_s.len);
696 else
698 chain->u.u_s.level = level;
699 inuse = true;
703 if (next->u.u_c.end)
704 next->u.u_c.end->next = chain;
705 else
706 next->u.u_c.chain = chain;
707 next->u.u_c.end = chain;
708 if (chain->type == M4__CHAIN_ARGV)
710 assert (!chain->u.u_a.comma && !chain->u.u_a.skip_last);
711 inuse |= m4__arg_adjust_refcount (context, chain->u.u_a.argv, true);
713 else if (chain->type == M4__CHAIN_STR && chain->u.u_s.level < SIZE_MAX)
714 m4__adjust_refcount (context, chain->u.u_s.level, true);
715 src_chain = src_chain->next;
717 return inuse;
720 /* Last half of m4_push_string (). If next is now NULL, a call to
721 m4_push_file () has pushed a different input block to the top of
722 the stack. Otherwise, all unfinished text on the obstack returned
723 from push_string_init is collected into the input stack. If the
724 new object is empty, we do not push it. */
725 void
726 m4_push_string_finish (void)
728 size_t len = obstack_object_size (current_input);
730 if (next == NULL)
732 assert (!len);
733 return;
736 if (len || next->funcs == &composite_funcs)
738 if (next->funcs == &string_funcs)
740 next->u.u_s.str = (char *) obstack_finish (current_input);
741 next->u.u_s.len = len;
743 else
744 m4__make_text_link (current_input, &next->u.u_c.chain,
745 &next->u.u_c.end);
746 next->prev = isp;
747 isp = next;
748 input_change = true;
750 else
751 obstack_free (current_input, next);
752 next = NULL;
756 /* A composite block contains multiple sub-blocks which are processed
757 in FIFO order, even though the obstack allocates memory in LIFO
758 order. */
759 static int
760 composite_peek (m4_input_block *me, m4 *context, bool allow_argv)
762 m4__symbol_chain *chain = me->u.u_c.chain;
763 size_t argc;
765 while (chain)
767 switch (chain->type)
769 case M4__CHAIN_STR:
770 if (chain->u.u_s.len)
771 return to_uchar (chain->u.u_s.str[0]);
772 break;
773 case M4__CHAIN_FUNC:
774 if (chain->u.builtin)
775 return CHAR_BUILTIN;
776 break;
777 case M4__CHAIN_ARGV:
778 argc = m4_arg_argc (chain->u.u_a.argv);
779 if (chain->u.u_a.index == argc)
780 break;
781 if (chain->u.u_a.comma)
782 return ','; /* FIXME - support M4_SYNTAX_COMMA. */
783 /* Only return a reference in the quoting is correct and the
784 reference has more than one argument left. */
785 if (allow_argv && chain->quote_age == m4__quote_age (M4SYNTAX)
786 && chain->u.u_a.quotes && chain->u.u_a.index + 1 < argc)
787 return CHAR_ARGV;
788 /* Rather than directly parse argv here, we push another
789 input block containing the next unparsed argument from
790 argv. */
791 m4_push_string_init (context, me->file, me->line);
792 m4__push_arg_quote (context, current_input, chain->u.u_a.argv,
793 chain->u.u_a.index,
794 m4__quote_cache (M4SYNTAX, NULL,
795 chain->quote_age,
796 chain->u.u_a.quotes));
797 chain->u.u_a.index++;
798 chain->u.u_a.comma = true;
799 m4_push_string_finish ();
800 return peek_char (context, allow_argv);
801 case M4__CHAIN_LOC:
802 break;
803 default:
804 assert (!"composite_peek");
805 abort ();
807 chain = chain->next;
809 return CHAR_RETRY;
812 static int
813 composite_read (m4_input_block *me, m4 *context, bool allow_quote,
814 bool allow_argv, bool allow_unget)
816 m4__symbol_chain *chain = me->u.u_c.chain;
817 size_t argc;
818 while (chain)
820 if (allow_quote && chain->quote_age == m4__quote_age (M4SYNTAX))
821 return CHAR_QUOTE;
822 switch (chain->type)
824 case M4__CHAIN_STR:
825 if (chain->u.u_s.len)
827 /* Partial consumption invalidates quote age. */
828 chain->quote_age = 0;
829 chain->u.u_s.len--;
830 return to_uchar (*chain->u.u_s.str++);
832 if (chain->u.u_s.level < SIZE_MAX)
833 m4__adjust_refcount (context, chain->u.u_s.level, false);
834 break;
835 case M4__CHAIN_FUNC:
836 if (chain->u.builtin)
837 return CHAR_BUILTIN;
838 break;
839 case M4__CHAIN_ARGV:
840 argc = m4_arg_argc (chain->u.u_a.argv);
841 if (chain->u.u_a.index == argc)
843 m4__arg_adjust_refcount (context, chain->u.u_a.argv, false);
844 break;
846 if (chain->u.u_a.comma)
848 chain->u.u_a.comma = false;
849 return ','; /* FIXME - support M4_SYNTAX_COMMA. */
851 /* Only return a reference in the quoting is correct and the
852 reference has more than one argument left. */
853 if (allow_argv && chain->quote_age == m4__quote_age (M4SYNTAX)
854 && chain->u.u_a.quotes && chain->u.u_a.index + 1 < argc)
855 return CHAR_ARGV;
856 /* Rather than directly parse argv here, we push another
857 input block containing the next unparsed argument from
858 argv. */
859 m4_push_string_init (context, me->file, me->line);
860 m4__push_arg_quote (context, current_input, chain->u.u_a.argv,
861 chain->u.u_a.index,
862 m4__quote_cache (M4SYNTAX, NULL,
863 chain->quote_age,
864 chain->u.u_a.quotes));
865 chain->u.u_a.index++;
866 chain->u.u_a.comma = true;
867 m4_push_string_finish ();
868 return next_char (context, allow_quote, allow_argv, allow_unget);
869 case M4__CHAIN_LOC:
870 me->file = chain->u.u_l.file;
871 me->line = chain->u.u_l.line;
872 input_change = true;
873 me->u.u_c.chain = chain->next;
874 return next_char (context, allow_quote, allow_argv, allow_unget);
875 default:
876 assert (!"composite_read");
877 abort ();
879 me->u.u_c.chain = chain = chain->next;
881 return CHAR_RETRY;
884 static void
885 composite_unget (m4_input_block *me, int ch)
887 m4__symbol_chain *chain = me->u.u_c.chain;
888 switch (chain->type)
890 case M4__CHAIN_STR:
891 assert (ch < CHAR_EOF && to_uchar (chain->u.u_s.str[-1]) == ch);
892 chain->u.u_s.str--;
893 chain->u.u_s.len++;
894 break;
895 case M4__CHAIN_FUNC:
896 assert (ch == CHAR_BUILTIN && chain->u.builtin);
897 break;
898 case M4__CHAIN_ARGV:
899 /* FIXME - support M4_SYNTAX_COMMA. */
900 assert (ch == ',' && !chain->u.u_a.comma);
901 chain->u.u_a.comma = true;
902 break;
903 default:
904 assert (!"composite_unget");
905 abort ();
909 static bool
910 composite_clean (m4_input_block *me, m4 *context, bool cleanup)
912 m4__symbol_chain *chain = me->u.u_c.chain;
913 assert (!chain || !cleanup);
914 while (chain)
916 switch (chain->type)
918 case M4__CHAIN_STR:
919 if (chain->u.u_s.len)
921 assert (!cleanup);
922 return false;
924 if (chain->u.u_s.level < SIZE_MAX)
925 m4__adjust_refcount (context, chain->u.u_s.level, false);
926 break;
927 case M4__CHAIN_FUNC:
928 if (chain->u.builtin)
929 return false;
930 break;
931 case M4__CHAIN_ARGV:
932 if (chain->u.u_a.index < m4_arg_argc (chain->u.u_a.argv))
934 assert (!cleanup);
935 return false;
937 m4__arg_adjust_refcount (context, chain->u.u_a.argv, false);
938 break;
939 case M4__CHAIN_LOC:
940 return false;
941 default:
942 assert (!"composite_clean");
943 abort ();
945 me->u.u_c.chain = chain = chain->next;
947 return true;
950 static void
951 composite_print (m4_input_block *me, m4 *context, m4_obstack *obs,
952 int debug_level)
954 bool quote = (debug_level & M4_DEBUG_TRACE_QUOTE) != 0;
955 size_t maxlen = m4_get_max_debug_arg_length_opt (context);
956 m4__symbol_chain *chain = me->u.u_c.chain;
957 const m4_string_pair *quotes = m4_get_syntax_quotes (M4SYNTAX);
958 bool module = (debug_level & M4_DEBUG_TRACE_MODULE) != 0;
959 bool done = false;
960 size_t len = obstack_object_size (current_input);
962 if (quote)
963 m4_shipout_string (context, obs, quotes->str1, quotes->len1, false);
964 while (chain && !done)
966 switch (chain->type)
968 case M4__CHAIN_STR:
969 if (m4_shipout_string_trunc (obs, chain->u.u_s.str,
970 chain->u.u_s.len, NULL, &maxlen))
971 done = true;
972 break;
973 case M4__CHAIN_FUNC:
974 m4__builtin_print (obs, chain->u.builtin, false, NULL, NULL, module);
975 break;
976 case M4__CHAIN_ARGV:
977 assert (!chain->u.u_a.comma);
978 if (m4__arg_print (context, obs, chain->u.u_a.argv,
979 chain->u.u_a.index,
980 m4__quote_cache (M4SYNTAX, NULL, chain->quote_age,
981 chain->u.u_a.quotes),
982 chain->u.u_a.flatten, NULL, NULL, &maxlen, false,
983 module))
984 done = true;
985 break;
986 default:
987 assert (!"composite_print");
988 abort ();
990 chain = chain->next;
992 if (len)
993 m4_shipout_string_trunc (obs, (char *) obstack_base (current_input), len,
994 NULL, &maxlen);
995 if (quote)
996 m4_shipout_string (context, obs, quotes->str2, quotes->len2, false);
999 static const char *
1000 composite_buffer (m4_input_block *me, m4 *context, size_t *len,
1001 bool allow_quote)
1003 m4__symbol_chain *chain = me->u.u_c.chain;
1004 while (chain)
1006 if (allow_quote && chain->quote_age == m4__quote_age (M4SYNTAX))
1007 return NULL; /* CHAR_QUOTE doesn't fit in buffer. */
1008 switch (chain->type)
1010 case M4__CHAIN_STR:
1011 if (chain->u.u_s.len)
1013 *len = chain->u.u_s.len;
1014 return chain->u.u_s.str;
1016 if (chain->u.u_s.level < SIZE_MAX)
1017 m4__adjust_refcount (context, chain->u.u_s.level, false);
1018 break;
1019 case M4__CHAIN_FUNC:
1020 if (chain->u.builtin)
1021 return NULL; /* CHAR_BUILTIN doesn't fit in buffer. */
1022 break;
1023 case M4__CHAIN_ARGV:
1024 if (chain->u.u_a.index == m4_arg_argc (chain->u.u_a.argv))
1026 m4__arg_adjust_refcount (context, chain->u.u_a.argv, false);
1027 break;
1029 return NULL; /* No buffer to provide. */
1030 case M4__CHAIN_LOC:
1031 me->file = chain->u.u_l.file;
1032 me->line = chain->u.u_l.line;
1033 input_change = true;
1034 me->u.u_c.chain = chain->next;
1035 return next_buffer (context, len, allow_quote);
1036 default:
1037 assert (!"composite_buffer");
1038 abort ();
1040 me->u.u_c.chain = chain = chain->next;
1042 return buffer_retry;
1045 static void
1046 composite_consume (m4_input_block *me, m4 *context M4_GNUC_UNUSED, size_t len)
1048 m4__symbol_chain *chain = me->u.u_c.chain;
1049 assert (chain && chain->type == M4__CHAIN_STR && len <= chain->u.u_s.len);
1050 /* Partial consumption invalidates quote age. */
1051 chain->quote_age = 0;
1052 chain->u.u_s.len -= len;
1053 chain->u.u_s.str += len;
1056 /* Given an obstack OBS, capture any unfinished text as a link in the
1057 chain that starts at *START and ends at *END. START may be NULL if
1058 *END is non-NULL. */
1059 void
1060 m4__make_text_link (m4_obstack *obs, m4__symbol_chain **start,
1061 m4__symbol_chain **end)
1063 m4__symbol_chain *chain;
1064 size_t len = obstack_object_size (obs);
1066 assert (end && (start || *end));
1067 if (len)
1069 char *str = (char *) obstack_finish (obs);
1070 chain = (m4__symbol_chain *) obstack_alloc (obs, sizeof *chain);
1071 if (*end)
1072 (*end)->next = chain;
1073 else
1074 *start = chain;
1075 *end = chain;
1076 chain->next = NULL;
1077 chain->type = M4__CHAIN_STR;
1078 chain->quote_age = 0;
1079 chain->u.u_s.str = str;
1080 chain->u.u_s.len = len;
1081 chain->u.u_s.level = SIZE_MAX;
1085 /* Given an obstack OBS, capture any unfinished text as a link, then
1086 append the builtin FUNC as the next link in the chain that starts
1087 at *START and ends at *END. START may be NULL if *END is
1088 non-NULL. */
1089 void
1090 m4__append_builtin (m4_obstack *obs, const m4__builtin *func,
1091 m4__symbol_chain **start, m4__symbol_chain **end)
1093 m4__symbol_chain *chain;
1095 assert (func);
1096 m4__make_text_link (obs, start, end);
1097 chain = (m4__symbol_chain *) obstack_alloc (obs, sizeof *chain);
1098 if (*end)
1099 (*end)->next = chain;
1100 else
1101 *start = chain;
1102 *end = chain;
1103 chain->next = NULL;
1104 chain->type = M4__CHAIN_FUNC;
1105 chain->quote_age = 0;
1106 chain->u.builtin = func;
1109 /* Push TOKEN, which contains a builtin's definition, onto the obstack
1110 OBS, which is either input stack or the wrapup stack. */
1111 void
1112 m4_push_builtin (m4 *context, m4_obstack *obs, m4_symbol_value *token)
1114 m4_input_block *i = (obs == current_input ? next : wsp);
1115 assert (i);
1116 if (i->funcs == &string_funcs)
1118 i->funcs = &composite_funcs;
1119 i->u.u_c.chain = i->u.u_c.end = NULL;
1121 else
1122 assert (i->funcs == &composite_funcs);
1123 m4__append_builtin (obs, token->u.builtin, &i->u.u_c.chain, &i->u.u_c.end);
1127 /* End of input optimization. By providing these dummy callback
1128 functions, we guarantee that the input stack is never NULL, and
1129 thus make fewer execution branches. */
1130 static int
1131 eof_peek (m4_input_block *me, m4 *context M4_GNUC_UNUSED,
1132 bool allow_argv M4_GNUC_UNUSED)
1134 assert (me == &input_eof);
1135 return CHAR_EOF;
1138 static int
1139 eof_read (m4_input_block *me, m4 *context M4_GNUC_UNUSED,
1140 bool allow_quote M4_GNUC_UNUSED, bool allow_argv M4_GNUC_UNUSED,
1141 bool allow_unget M4_GNUC_UNUSED)
1143 assert (me == &input_eof);
1144 return CHAR_EOF;
1147 static void
1148 eof_unget (m4_input_block *me M4_GNUC_UNUSED, int ch)
1150 assert (ch == CHAR_EOF);
1153 static const char *
1154 eof_buffer (m4_input_block *me M4_GNUC_UNUSED, m4 *context M4_GNUC_UNUSED,
1155 size_t *len M4_GNUC_UNUSED, bool allow_unget M4_GNUC_UNUSED)
1157 return NULL;
1161 /* When tracing, print a summary of the contents of the input block
1162 created by push_string_init/push_string_finish to OBS. Use
1163 DEBUG_LEVEL to determine whether to add quotes or module
1164 designations. */
1165 void
1166 m4_input_print (m4 *context, m4_obstack *obs, int debug_level)
1168 m4_input_block *block = next ? next : isp;
1169 assert (context && obs && (debug_level & M4_DEBUG_TRACE_EXPANSION));
1170 assert (block->funcs->print_func);
1171 block->funcs->print_func (block, context, obs, debug_level);
1174 /* Return an obstack ready for direct expansion of wrapup text, and
1175 set *END to the location that should be updated if any builtin
1176 tokens are wrapped. Store the location of CALLER with the wrapped
1177 text. This should be followed by m4__push_wrapup_finish (). */
1178 m4_obstack *
1179 m4__push_wrapup_init (m4 *context, const m4_call_info *caller,
1180 m4__symbol_chain ***end)
1182 m4_input_block *i;
1183 m4__symbol_chain *chain;
1185 assert (obstack_object_size (wrapup_stack) == 0);
1186 if (wsp != &input_eof)
1188 i = wsp;
1189 assert (i->funcs == &composite_funcs && i->u.u_c.end
1190 && i->u.u_c.end->type != M4__CHAIN_LOC);
1192 else
1194 i = (m4_input_block *) obstack_alloc (wrapup_stack, sizeof *i);
1195 i->prev = wsp;
1196 i->funcs = &composite_funcs;
1197 i->file = caller->file;
1198 i->line = caller->line;
1199 i->u.u_c.chain = i->u.u_c.end = NULL;
1200 wsp = i;
1202 chain = (m4__symbol_chain *) obstack_alloc (wrapup_stack, sizeof *chain);
1203 if (i->u.u_c.end)
1204 i->u.u_c.end->next = chain;
1205 else
1206 i->u.u_c.chain = chain;
1207 i->u.u_c.end = chain;
1208 chain->next = NULL;
1209 chain->type = M4__CHAIN_LOC;
1210 chain->quote_age = 0;
1211 chain->u.u_l.file = caller->file;
1212 chain->u.u_l.line = caller->line;
1213 *end = &i->u.u_c.end;
1214 return wrapup_stack;
1217 /* After pushing wrapup text, this completes the bookkeeping. */
1218 void
1219 m4__push_wrapup_finish (void)
1221 m4__make_text_link (wrapup_stack, &wsp->u.u_c.chain, &wsp->u.u_c.end);
1222 assert (wsp->u.u_c.end->type != M4__CHAIN_LOC);
1226 /* The function pop_input () pops one level of input sources. If
1227 CLEANUP, the current_file and current_line are restored as needed.
1228 The return value is false if cleanup is still required, or if the
1229 current input source is not at the end. */
1230 static bool
1231 pop_input (m4 *context, bool cleanup)
1233 m4_input_block *tmp = isp->prev;
1235 assert (isp);
1236 if (isp->funcs->clean_func
1237 ? !isp->funcs->clean_func (isp, context, cleanup)
1238 : (isp->funcs->peek_func (isp, context, true) != CHAR_RETRY))
1239 return false;
1241 obstack_free (current_input, isp);
1242 m4__quote_uncache (M4SYNTAX);
1243 next = NULL; /* might be set in m4_push_string_init () */
1245 isp = tmp;
1246 input_change = true;
1247 return true;
1250 /* To switch input over to the wrapup stack, main calls pop_wrapup.
1251 Since wrapup text can install new wrapup text, pop_wrapup ()
1252 returns true if there is more wrapped text to parse. */
1253 bool
1254 m4_pop_wrapup (m4 *context)
1256 static size_t level = 0;
1258 next = NULL;
1259 obstack_free (current_input, NULL);
1260 free (current_input);
1262 if (wsp == &input_eof)
1264 obstack_free (wrapup_stack, NULL);
1265 m4_set_current_file (context, NULL);
1266 m4_set_current_line (context, 0);
1267 m4_debug_message (context, M4_DEBUG_TRACE_INPUT,
1268 _("input from m4wrap exhausted"));
1269 current_input = NULL;
1270 DELETE (wrapup_stack);
1271 return false;
1274 m4_debug_message (context, M4_DEBUG_TRACE_INPUT,
1275 _("input from m4wrap recursion level %zu"), ++level);
1277 current_input = wrapup_stack;
1278 wrapup_stack = (m4_obstack *) xmalloc (sizeof *wrapup_stack);
1279 obstack_init (wrapup_stack);
1281 isp = wsp;
1282 wsp = &input_eof;
1283 input_change = true;
1285 return true;
1288 /* Populate TOKEN with the builtin token at the top of the input
1289 stack, then consume the input. If OBS, TOKEN will be converted to
1290 a composite token using storage from OBS as necessary; otherwise,
1291 if TOKEN is NULL, the builtin token is discarded. */
1292 static void
1293 init_builtin_token (m4 *context, m4_obstack *obs, m4_symbol_value *token)
1295 m4__symbol_chain *chain;
1296 assert (isp->funcs == &composite_funcs);
1297 chain = isp->u.u_c.chain;
1298 assert (!chain->quote_age && chain->type == M4__CHAIN_FUNC
1299 && chain->u.builtin);
1300 if (obs)
1302 assert (token);
1303 if (token->type == M4_SYMBOL_VOID)
1305 token->type = M4_SYMBOL_COMP;
1306 token->u.u_c.chain = token->u.u_c.end = NULL;
1307 token->u.u_c.wrapper = false;
1308 token->u.u_c.has_func = false;
1310 assert (token->type == M4_SYMBOL_COMP);
1311 m4__append_builtin (obs, chain->u.builtin, &token->u.u_c.chain,
1312 &token->u.u_c.end);
1314 else if (token)
1316 assert (token->type == M4_SYMBOL_VOID);
1317 m4__set_symbol_value_builtin (token, chain->u.builtin);
1319 chain->u.builtin = NULL;
1322 /* When a QUOTE token is seen, convert VALUE to a composite (if it is
1323 not one already), consisting of any unfinished text on OBS, as well
1324 as the quoted token from the top of the input stack. Use OBS for
1325 any additional allocations needed to store the token chain. */
1326 static void
1327 append_quote_token (m4 *context, m4_obstack *obs, m4_symbol_value *value)
1329 m4__symbol_chain *src_chain = isp->u.u_c.chain;
1330 m4__symbol_chain *chain;
1331 assert (isp->funcs == &composite_funcs && obs && m4__quote_age (M4SYNTAX));
1332 isp->u.u_c.chain = src_chain->next;
1334 /* Speed consideration - for short enough symbols, the speed and
1335 memory overhead of parsing another INPUT_CHAIN link outweighs the
1336 time to inline the symbol text. */
1337 if (src_chain->type == M4__CHAIN_STR
1338 && src_chain->u.u_s.len <= INPUT_INLINE_THRESHOLD)
1340 assert (src_chain->u.u_s.level <= SIZE_MAX);
1341 obstack_grow (obs, src_chain->u.u_s.str, src_chain->u.u_s.len);
1342 m4__adjust_refcount (context, src_chain->u.u_s.level, false);
1343 return;
1346 if (value->type == M4_SYMBOL_VOID)
1348 value->type = M4_SYMBOL_COMP;
1349 value->u.u_c.chain = value->u.u_c.end = NULL;
1350 value->u.u_c.wrapper = value->u.u_c.has_func = false;
1352 assert (value->type == M4_SYMBOL_COMP);
1353 m4__make_text_link (obs, &value->u.u_c.chain, &value->u.u_c.end);
1354 chain = (m4__symbol_chain *) obstack_copy (obs, src_chain, sizeof *chain);
1355 if (value->u.u_c.end)
1356 value->u.u_c.end->next = chain;
1357 else
1358 value->u.u_c.chain = chain;
1359 value->u.u_c.end = chain;
1360 if (chain->type == M4__CHAIN_ARGV && chain->u.u_a.has_func)
1361 value->u.u_c.has_func = true;
1362 chain->next = NULL;
1365 /* When an ARGV token is seen, convert VALUE to point to it via a
1366 composite chain. Use OBS for any additional allocations
1367 needed. */
1368 static void
1369 init_argv_symbol (m4 *context, m4_obstack *obs, m4_symbol_value *value)
1371 m4__symbol_chain *src_chain;
1372 m4__symbol_chain *chain;
1373 int ch;
1374 const m4_string_pair *comments = m4_get_syntax_comments (M4SYNTAX);
1376 assert (value->type == M4_SYMBOL_VOID && isp->funcs == &composite_funcs
1377 && isp->u.u_c.chain->type == M4__CHAIN_ARGV
1378 && obs && obstack_object_size (obs) == 0);
1380 src_chain = isp->u.u_c.chain;
1381 isp->u.u_c.chain = src_chain->next;
1382 value->type = M4_SYMBOL_COMP;
1383 /* Clone the link, since the input will be discarded soon. */
1384 chain = (m4__symbol_chain *) obstack_copy (obs, src_chain, sizeof *chain);
1385 value->u.u_c.chain = value->u.u_c.end = chain;
1386 value->u.u_c.wrapper = true;
1387 value->u.u_c.has_func = chain->u.u_a.has_func;
1388 chain->next = NULL;
1390 /* If the next character is not ',' or ')', then unlink the last
1391 argument from argv and schedule it for reparsing. This way,
1392 expand_argument never has to deal with concatenation of argv with
1393 arbitrary text. Note that the implementation of safe_quotes
1394 ensures peek_input won't return CHAR_ARGV if the user is perverse
1395 enough to mix comment delimiters with argument separators:
1397 define(n,`$#')define(echo,$*)changecom(`,,',`)')n(echo(a,`,b`)'',c))
1398 => 2 (not 3)
1400 Therefore, we do not have to worry about calling MATCH, and thus
1401 do not have to worry about pop_input being called and
1402 invalidating the argv reference.
1404 When the $@ ref is used unchanged, we completely bypass the
1405 decrement of the argv refcount in next_char, since the ref is
1406 still live via the current collect_arguments. However, when the
1407 last element of the $@ ref is reparsed, we must increase the argv
1408 refcount here, to compensate for the fact that it will be
1409 decreased once the final element is parsed. */
1410 assert (!comments->len1
1411 || (!m4_has_syntax (M4SYNTAX, *comments->str1,
1412 M4_SYNTAX_COMMA | M4_SYNTAX_CLOSE)
1413 && *comments->str1 != *src_chain->u.u_a.quotes->str1));
1414 ch = peek_char (context, true);
1415 if (!m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_COMMA | M4_SYNTAX_CLOSE))
1417 isp->u.u_c.chain = src_chain;
1418 src_chain->u.u_a.index = m4_arg_argc (chain->u.u_a.argv) - 1;
1419 src_chain->u.u_a.comma = true;
1420 chain->u.u_a.skip_last = true;
1421 m4__arg_adjust_refcount (context, chain->u.u_a.argv, true);
1426 /* Low level input is done a character at a time. The function
1427 next_char () is used to read and advance the input to the next
1428 character. If ALLOW_QUOTE, and the current input matches the
1429 current quote age, return CHAR_QUOTE and leave consumption of data
1430 for append_quote_token; otherwise, if ALLOW_ARGV, and the current
1431 input matches an argv reference with the correct quoting, return
1432 CHAR_ARGV and leave consumption of data for init_argv_symbol. If
1433 ALLOW_UNGET, then pop input to avoid returning CHAR_RETRY, and
1434 ensure that unget_input can safely be called next. */
1435 static int
1436 next_char (m4 *context, bool allow_quote, bool allow_argv, bool allow_unget)
1438 int ch;
1440 while (1)
1442 if (input_change)
1444 m4_set_current_file (context, isp->file);
1445 m4_set_current_line (context, isp->line);
1446 input_change = false;
1449 assert (isp->funcs->read_func);
1450 while (((ch = isp->funcs->read_func (isp, context, allow_quote,
1451 allow_argv, allow_unget))
1452 != CHAR_RETRY)
1453 || allow_unget)
1455 /* if (!IS_IGNORE (ch)) */
1456 return ch;
1459 /* End of input source --- pop one level. */
1460 pop_input (context, true);
1464 /* The function peek_char () is used to look at the next character in
1465 the input stream. At any given time, it reads from the input_block
1466 on the top of the current input stack. If ALLOW_ARGV, then return
1467 CHAR_ARGV if an entire $@ reference is available for use. */
1468 static int
1469 peek_char (m4 *context, bool allow_argv)
1471 int ch;
1472 m4_input_block *block = isp;
1474 while (1)
1476 assert (block->funcs->peek_func);
1477 ch = block->funcs->peek_func (block, context, allow_argv);
1478 if (ch != CHAR_RETRY)
1480 /* if (IS_IGNORE (ch)) */
1481 /* return next_char (context, false, true, false); */
1482 return ch;
1485 block = block->prev;
1489 /* The function unget_input () puts back a character on the input
1490 stack, using an existing input_block if possible. This is not safe
1491 to call except immediately after next_char(context, aq, aa, true). */
1492 static void
1493 unget_input (int ch)
1495 assert (isp->funcs->unget_func != NULL);
1496 isp->funcs->unget_func (isp, ch);
1499 /* Return a pointer to the available bytes of the current input block,
1500 and set *LEN to the length of the result. If ALLOW_QUOTE, do not
1501 return a buffer for a quoted string. If the result does not fit in
1502 a char (for example, CHAR_EOF or CHAR_QUOTE), or if there is no
1503 readahead data available, return NULL, and the caller must fall
1504 back to next_char(). The buffer is only valid until the next
1505 consume_buffer() or next_char(). */
1506 static const char *
1507 next_buffer (m4 *context, size_t *len, bool allow_quote)
1509 const char *buf;
1510 while (1)
1512 assert (isp);
1513 if (input_change)
1515 m4_set_current_file (context, isp->file);
1516 m4_set_current_line (context, isp->line);
1517 input_change = false;
1520 assert (isp->funcs->buffer_func);
1521 buf = isp->funcs->buffer_func (isp, context, len, allow_quote);
1522 if (buf != buffer_retry)
1523 return buf;
1524 /* End of input source --- pop one level. */
1525 pop_input (context, true);
1529 /* Consume LEN bytes from the current input block, as though by LEN
1530 calls to next_char(). LEN must be less than or equal to the
1531 previous length returned by a successful call to next_buffer(). */
1532 static void
1533 consume_buffer (m4 *context, size_t len)
1535 assert (isp && !input_change);
1536 if (len)
1538 assert (isp->funcs->consume_func);
1539 isp->funcs->consume_func (isp, context, len);
1543 /* skip_line () simply discards all immediately following characters,
1544 up to the first newline. It is only used from m4_dnl (). Report
1545 errors on behalf of CALLER. */
1546 void
1547 m4_skip_line (m4 *context, const m4_call_info *caller)
1549 int ch;
1551 while (1)
1553 size_t len;
1554 const char *buffer = next_buffer (context, &len, false);
1555 if (buffer)
1557 const char *p = (char *) memchr (buffer, '\n', len);
1558 if (p)
1560 consume_buffer (context, p - buffer + 1);
1561 ch = '\n';
1562 break;
1564 consume_buffer (context, len);
1566 else
1568 ch = next_char (context, false, false, false);
1569 if (ch == CHAR_EOF || ch == '\n')
1570 break;
1573 if (ch == CHAR_EOF)
1574 m4_warn (context, 0, caller, _("end of file treated as newline"));
1578 /* If the string S of length LEN matches the next characters of the
1579 input stream, return true. If CONSUME, the first byte has already
1580 been matched. If a match is found and CONSUME is true, the input
1581 is discarded; otherwise any characters read are pushed back again.
1582 The function is used only when multicharacter quotes or comment
1583 delimiters are used.
1585 All strings herein should be unsigned. Otherwise sign-extension
1586 of individual chars might break quotes with 8-bit chars in it.
1588 FIXME - when matching multiquotes that cross file boundaries, we do
1589 not properly restore the current input file and line when we
1590 restore unconsumed characters. */
1591 static bool
1592 match_input (m4 *context, const char *s, size_t len, bool consume)
1594 int n; /* number of characters matched */
1595 int ch; /* input character */
1596 const char *t;
1597 m4_obstack *st;
1598 bool result = false;
1599 size_t buf_len;
1601 if (consume)
1603 s++;
1604 len--;
1606 /* Try a buffer match first. */
1607 assert (len);
1608 t = next_buffer (context, &buf_len, false);
1609 if (t && len <= buf_len && memcmp (s, t, len) == 0)
1611 if (consume)
1612 consume_buffer (context, len);
1613 return true;
1615 /* Fall back on byte matching. */
1616 ch = peek_char (context, false);
1617 if (ch != to_uchar (*s))
1618 return false;
1620 if (len == 1)
1622 if (consume)
1623 next_char (context, false, false, false);
1624 return true; /* short match */
1627 next_char (context, false, false, false);
1628 for (n = 1, t = s++; peek_char (context, false) == to_uchar (*s++); )
1630 next_char (context, false, false, false);
1631 n++;
1632 if (--len == 1) /* long match */
1634 if (consume)
1635 return true;
1636 result = true;
1637 break;
1641 /* Failed or shouldn't consume, push back input. */
1642 st = m4_push_string_init (context, m4_get_current_file (context),
1643 m4_get_current_line (context));
1644 obstack_grow (st, t, n);
1645 m4_push_string_finish ();
1646 return result;
1649 /* Check whether the current input matches a delimiter, which either
1650 belongs to syntax category CAT or matches the string S of length
1651 LEN. The first character is handled inline for speed, and S[LEN]
1652 must be safe to dereference (it is faster to do character
1653 comparison prior to length checks). This improves efficiency for
1654 the common case of single character quotes and comment delimiters,
1655 while being safe for disabled delimiters as well as longer
1656 delimiters. If CONSUME, then CH is the result of next_char, and a
1657 successful match will discard the matched string. Otherwise, CH is
1658 the result of peek_char, and the input stream is effectively
1659 unchanged. */
1660 #define MATCH(C, ch, cat, s, len, consume) \
1661 (m4_has_syntax (m4_get_syntax_table (C), ch, cat) \
1662 || (to_uchar ((s)[0]) == (ch) \
1663 && ((len) >> 1 ? match_input (C, s, len, consume) : (len))))
1665 /* While the current input character has the given SYNTAX, append it
1666 to OBS. Take care not to pop input source unless the next source
1667 would continue the chain. Return true if the chain ended with
1668 CHAR_EOF. */
1669 static bool
1670 consume_syntax (m4 *context, m4_obstack *obs, unsigned int syntax)
1672 int ch;
1673 bool allow = m4__safe_quotes (M4SYNTAX);
1674 assert (syntax);
1675 while (1)
1677 /* Start with a buffer search. */
1678 size_t len;
1679 const char *buffer = next_buffer (context, &len, allow);
1680 if (buffer)
1682 const char *p = buffer;
1683 while (len && m4_has_syntax (M4SYNTAX, *p, syntax))
1685 len--;
1686 p++;
1688 obstack_grow (obs, buffer, p - buffer);
1689 consume_buffer (context, p - buffer);
1690 if (len)
1691 return false;
1693 /* Fall back to byte-wise search. It is safe to call next_char
1694 without first checking peek_char, except at input source
1695 boundaries, which we detect by CHAR_RETRY. */
1696 ch = next_char (context, allow, allow, true);
1697 if (ch < CHAR_EOF && m4_has_syntax (M4SYNTAX, ch, syntax))
1699 obstack_1grow (obs, ch);
1700 continue;
1702 if (ch == CHAR_RETRY || ch == CHAR_QUOTE || ch == CHAR_ARGV)
1704 ch = peek_char (context, false);
1705 /* We exploit the fact that CHAR_EOF, CHAR_BUILTIN,
1706 CHAR_QUOTE, and CHAR_ARGV do not satisfy any syntax
1707 categories. */
1708 if (m4_has_syntax (M4SYNTAX, ch, syntax))
1710 assert (ch < CHAR_EOF);
1711 obstack_1grow (obs, ch);
1712 next_char (context, false, false, false);
1713 continue;
1715 return ch == CHAR_EOF;
1717 unget_input (ch);
1718 return false;
1723 /* Initialize input stacks. */
1724 void
1725 m4_input_init (m4 *context)
1727 obstack_init (&file_names);
1728 m4_set_current_file (context, NULL);
1729 m4_set_current_line (context, 0);
1731 current_input = (m4_obstack *) xmalloc (sizeof *current_input);
1732 obstack_init (current_input);
1733 wrapup_stack = (m4_obstack *) xmalloc (sizeof *wrapup_stack);
1734 obstack_init (wrapup_stack);
1736 /* Allocate an object in the current chunk, so that obstack_free
1737 will always work even if the first token parsed spills to a new
1738 chunk. */
1739 obstack_init (&token_stack);
1740 token_bottom = obstack_finish (&token_stack);
1742 isp = &input_eof;
1743 wsp = &input_eof;
1744 next = NULL;
1746 start_of_input_line = false;
1749 /* Free memory used by the input engine. */
1750 void
1751 m4_input_exit (void)
1753 assert (!current_input && isp == &input_eof);
1754 assert (!wrapup_stack && wsp == &input_eof);
1755 obstack_free (&file_names, NULL);
1756 obstack_free (&token_stack, NULL);
1760 /* Parse and return a single token from the input stream, constructed
1761 into TOKEN. See m4__token_type for the valid return types, along
1762 with a description of what TOKEN will contain. If LINE is not
1763 NULL, set *LINE to the line number where the token starts. If OBS,
1764 expand safe tokens (strings and comments) directly into OBS rather
1765 than in a temporary staging area. If ALLOW_ARGV, OBS must be
1766 non-NULL, and an entire series of arguments can be returned if a $@
1767 reference is encountered. Report errors (unterminated comments or
1768 strings) on behalf of CALLER, if non-NULL.
1770 If OBS is NULL or the token expansion is unknown, the token text is
1771 collected on the obstack token_stack, which never contains more
1772 than one token text at a time. The storage pointed to by the
1773 fields in TOKEN is therefore subject to change the next time
1774 m4__next_token () is called. */
1775 m4__token_type
1776 m4__next_token (m4 *context, m4_symbol_value *token, int *line,
1777 m4_obstack *obs, bool allow_argv, const m4_call_info *caller)
1779 int ch;
1780 int quote_level;
1781 m4__token_type type;
1782 const char *file = NULL;
1783 size_t len;
1784 /* The obstack where token data is stored. Generally token_stack,
1785 for tokens where argument collection might not use the literal
1786 token. But for comments and strings, we can output directly into
1787 the argument collection obstack OBS, if provided. */
1788 m4_obstack *obs_safe = &token_stack;
1790 assert (next == NULL);
1791 memset (token, '\0', sizeof *token);
1792 do {
1793 obstack_free (&token_stack, token_bottom);
1795 /* Must consume an input character. */
1796 ch = next_char (context, false, allow_argv && m4__quote_age (M4SYNTAX),
1797 false);
1798 if (line)
1800 *line = m4_get_current_line (context);
1801 file = m4_get_current_file (context);
1803 if (ch == CHAR_EOF) /* EOF */
1805 #ifdef DEBUG_INPUT
1806 xfprintf (stderr, "next_token -> EOF\n");
1807 #endif
1808 return M4_TOKEN_EOF;
1811 if (ch == CHAR_BUILTIN) /* BUILTIN TOKEN */
1813 init_builtin_token (context, obs, token);
1814 #ifdef DEBUG_INPUT
1815 m4_print_token (context, "next_token", M4_TOKEN_MACDEF, token);
1816 #endif
1817 return M4_TOKEN_MACDEF;
1819 if (ch == CHAR_ARGV)
1821 init_argv_symbol (context, obs, token);
1822 #ifdef DEBUG_INPUT
1823 m4_print_token (context, "next_token", M4_TOKEN_ARGV, token);
1824 #endif
1825 return M4_TOKEN_ARGV;
1828 if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_ESCAPE))
1829 { /* ESCAPED WORD */
1830 obstack_1grow (&token_stack, ch);
1831 if ((ch = next_char (context, false, false, false)) < CHAR_EOF)
1833 obstack_1grow (&token_stack, ch);
1834 if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_ALPHA))
1835 consume_syntax (context, &token_stack,
1836 M4_SYNTAX_ALPHA | M4_SYNTAX_NUM);
1837 type = M4_TOKEN_WORD;
1839 else
1840 type = M4_TOKEN_SIMPLE; /* escape before eof */
1842 else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_ALPHA))
1844 type = (m4_is_syntax_macro_escaped (M4SYNTAX)
1845 ? M4_TOKEN_STRING : M4_TOKEN_WORD);
1846 if (type == M4_TOKEN_STRING && obs)
1847 obs_safe = obs;
1848 obstack_1grow (obs_safe, ch);
1849 consume_syntax (context, obs_safe, M4_SYNTAX_ALPHA | M4_SYNTAX_NUM);
1851 else if (MATCH (context, ch, M4_SYNTAX_LQUOTE,
1852 context->syntax->quote.str1,
1853 context->syntax->quote.len1, true))
1854 { /* QUOTED STRING */
1855 if (obs)
1856 obs_safe = obs;
1857 quote_level = 1;
1858 type = M4_TOKEN_STRING;
1859 while (1)
1861 /* Start with buffer search for either potential delimiter. */
1862 size_t len;
1863 const char *buffer = next_buffer (context, &len,
1864 obs && m4__quote_age (M4SYNTAX));
1865 if (buffer)
1867 const char *p = buffer;
1868 if (m4_is_syntax_single_quotes (M4SYNTAX))
1871 p = (char *) memchr2 (p, *context->syntax->quote.str1,
1872 *context->syntax->quote.str2,
1873 buffer + len - p);
1875 while (p && m4__quote_age (M4SYNTAX)
1876 && (*p++ == *context->syntax->quote.str2
1877 ? --quote_level : ++quote_level));
1878 else
1880 size_t remaining = len;
1881 assert (context->syntax->quote.len1 == 1
1882 && context->syntax->quote.len2 == 1);
1883 while (remaining && !m4_has_syntax (M4SYNTAX, *p,
1884 (M4_SYNTAX_LQUOTE
1885 | M4_SYNTAX_RQUOTE)))
1887 p++;
1888 remaining--;
1890 if (!remaining)
1891 p = NULL;
1893 if (p)
1895 if (m4__quote_age (M4SYNTAX))
1897 assert (!quote_level
1898 && context->syntax->quote.len1 == 1
1899 && context->syntax->quote.len2 == 1);
1900 obstack_grow (obs_safe, buffer, p - buffer - 1);
1901 consume_buffer (context, p - buffer);
1902 break;
1904 obstack_grow (obs_safe, buffer, p - buffer);
1905 ch = to_uchar (*p);
1906 consume_buffer (context, p - buffer + 1);
1908 else
1910 obstack_grow (obs_safe, buffer, len);
1911 consume_buffer (context, len);
1912 continue;
1915 /* Fall back to byte-wise search. */
1916 else
1917 ch = next_char (context, obs && m4__quote_age (M4SYNTAX), false,
1918 false);
1919 if (ch == CHAR_EOF)
1921 if (!caller)
1923 assert (line);
1924 m4_set_current_file (context, file);
1925 m4_set_current_line (context, *line);
1927 m4_error (context, EXIT_FAILURE, 0, caller,
1928 _("end of file in string"));
1930 if (ch == CHAR_BUILTIN)
1931 init_builtin_token (context, obs, obs ? token : NULL);
1932 else if (ch == CHAR_QUOTE)
1933 append_quote_token (context, obs, token);
1934 else if (MATCH (context, ch, M4_SYNTAX_RQUOTE,
1935 context->syntax->quote.str2,
1936 context->syntax->quote.len2, true))
1938 if (--quote_level == 0)
1939 break;
1940 if (1 < context->syntax->quote.len2)
1941 obstack_grow (obs_safe, context->syntax->quote.str2,
1942 context->syntax->quote.len2);
1943 else
1944 obstack_1grow (obs_safe, ch);
1946 else if (MATCH (context, ch, M4_SYNTAX_LQUOTE,
1947 context->syntax->quote.str1,
1948 context->syntax->quote.len1, true))
1950 quote_level++;
1951 if (1 < context->syntax->quote.len1)
1952 obstack_grow (obs_safe, context->syntax->quote.str1,
1953 context->syntax->quote.len1);
1954 else
1955 obstack_1grow (obs_safe, ch);
1957 else
1958 obstack_1grow (obs_safe, ch);
1961 else if (MATCH (context, ch, M4_SYNTAX_BCOMM,
1962 context->syntax->comm.str1,
1963 context->syntax->comm.len1, true))
1964 { /* COMMENT */
1965 if (obs && !m4_get_discard_comments_opt (context))
1966 obs_safe = obs;
1967 if (1 < context->syntax->comm.len1)
1968 obstack_grow (obs_safe, context->syntax->comm.str1,
1969 context->syntax->comm.len1);
1970 else
1971 obstack_1grow (obs_safe, ch);
1972 while (1)
1974 /* Start with buffer search for potential end delimiter. */
1975 size_t len;
1976 const char *buffer = next_buffer (context, &len, false);
1977 if (buffer)
1979 const char *p;
1980 if (m4_is_syntax_single_comments (M4SYNTAX))
1981 p = (char *) memchr (buffer, *context->syntax->comm.str2,
1982 len);
1983 else
1985 size_t remaining = len;
1986 assert (context->syntax->comm.len2 == 1);
1987 p = buffer;
1988 while (remaining
1989 && !m4_has_syntax (M4SYNTAX, *p, M4_SYNTAX_ECOMM))
1991 p++;
1992 remaining--;
1994 if (!remaining)
1995 p = NULL;
1997 if (p)
1999 obstack_grow (obs_safe, buffer, p - buffer);
2000 ch = to_uchar (*p);
2001 consume_buffer (context, p - buffer + 1);
2003 else
2005 obstack_grow (obs_safe, buffer, len);
2006 consume_buffer (context, len);
2007 continue;
2010 /* Fall back to byte-wise search. */
2011 else
2012 ch = next_char (context, false, false, false);
2013 if (ch == CHAR_EOF)
2015 if (!caller)
2017 assert (line);
2018 m4_set_current_file (context, file);
2019 m4_set_current_line (context, *line);
2021 m4_error (context, EXIT_FAILURE, 0, caller,
2022 _("end of file in comment"));
2024 if (ch == CHAR_BUILTIN)
2026 init_builtin_token (context, NULL, NULL);
2027 continue;
2029 if (MATCH (context, ch, M4_SYNTAX_ECOMM,
2030 context->syntax->comm.str2,
2031 context->syntax->comm.len2, true))
2033 if (1 < context->syntax->comm.len2)
2034 obstack_grow (obs_safe, context->syntax->comm.str2,
2035 context->syntax->comm.len2);
2036 else
2037 obstack_1grow (obs_safe, ch);
2038 break;
2040 assert (ch < CHAR_EOF);
2041 obstack_1grow (obs_safe, ch);
2043 type = (m4_get_discard_comments_opt (context)
2044 ? M4_TOKEN_NONE : M4_TOKEN_COMMENT);
2046 else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_ACTIVE))
2047 { /* ACTIVE CHARACTER */
2048 obstack_1grow (&token_stack, ch);
2049 type = M4_TOKEN_WORD;
2051 else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_OPEN))
2052 { /* OPEN PARENTHESIS */
2053 obstack_1grow (&token_stack, ch);
2054 type = M4_TOKEN_OPEN;
2056 else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_COMMA))
2057 { /* COMMA */
2058 obstack_1grow (&token_stack, ch);
2059 type = M4_TOKEN_COMMA;
2061 else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_CLOSE))
2062 { /* CLOSE PARENTHESIS */
2063 obstack_1grow (&token_stack, ch);
2064 type = M4_TOKEN_CLOSE;
2066 else
2067 { /* EVERYTHING ELSE */
2068 assert (ch < CHAR_EOF);
2069 obstack_1grow (&token_stack, ch);
2070 if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_OTHER | M4_SYNTAX_NUM))
2072 if (obs)
2074 obs_safe = obs;
2075 obstack_1grow (obs, ch);
2077 if (m4__safe_quotes (M4SYNTAX))
2078 consume_syntax (context, obs_safe,
2079 M4_SYNTAX_OTHER | M4_SYNTAX_NUM);
2080 type = M4_TOKEN_STRING;
2082 else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_SPACE))
2084 /* Coalescing newlines when interactive or when synclines
2085 are enabled is wrong. */
2086 if (!m4_get_interactive_opt (context)
2087 && !m4_get_syncoutput_opt (context)
2088 && m4__safe_quotes (M4SYNTAX))
2089 consume_syntax (context, &token_stack, M4_SYNTAX_SPACE);
2090 type = M4_TOKEN_SPACE;
2092 else
2093 type = M4_TOKEN_SIMPLE;
2095 } while (type == M4_TOKEN_NONE);
2097 if (token->type == M4_SYMBOL_VOID)
2099 if (obs_safe != obs)
2101 len = obstack_object_size (&token_stack);
2102 obstack_1grow (&token_stack, '\0');
2104 m4_set_symbol_value_text (token, obstack_finish (&token_stack), len,
2105 m4__quote_age (M4SYNTAX));
2107 else
2108 assert (type == M4_TOKEN_STRING || type == M4_TOKEN_COMMENT);
2110 else
2111 assert (token->type == M4_SYMBOL_COMP
2112 && (type == M4_TOKEN_STRING || type == M4_TOKEN_COMMENT));
2113 VALUE_MAX_ARGS (token) = -1;
2115 #ifdef DEBUG_INPUT
2116 if (token->type == M4_SYMBOL_VOID)
2118 len = obstack_object_size (&token_stack);
2119 obstack_1grow (&token_stack, '\0');
2121 m4_set_symbol_value_text (token, obstack_finish (&token_stack), len,
2122 m4__quote_age (M4SYNTAX));
2125 m4_print_token (context, "next_token", type, token);
2126 #endif
2128 return type;
2131 /* Peek at the next token in the input stream to see if it is an open
2132 parenthesis. It is possible that what is peeked at may change as a
2133 result of changequote (or friends). This honors multi-character
2134 comments and quotes, just as next_token does. */
2135 bool
2136 m4__next_token_is_open (m4 *context)
2138 int ch = peek_char (context, false);
2140 if (ch == CHAR_EOF || ch == CHAR_BUILTIN
2141 || m4_has_syntax (M4SYNTAX, ch, (M4_SYNTAX_BCOMM | M4_SYNTAX_ESCAPE
2142 | M4_SYNTAX_ALPHA | M4_SYNTAX_LQUOTE
2143 | M4_SYNTAX_ACTIVE))
2144 || (MATCH (context, ch, M4_SYNTAX_BCOMM, context->syntax->comm.str1,
2145 context->syntax->comm.len1, false))
2146 || (MATCH (context, ch, M4_SYNTAX_LQUOTE, context->syntax->quote.str1,
2147 context->syntax->quote.len1, false)))
2148 return false;
2149 return m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_OPEN);
2153 #ifdef DEBUG_INPUT
2156 m4_print_token (m4 *context, const char *s, m4__token_type type,
2157 m4_symbol_value *token)
2159 m4_obstack obs;
2160 size_t len;
2162 if (!s)
2163 s = "m4input";
2164 xfprintf (stderr, "%s: ", s);
2165 switch (type)
2166 { /* TOKSW */
2167 case M4_TOKEN_EOF:
2168 fputs ("eof", stderr);
2169 token = NULL;
2170 break;
2171 case M4_TOKEN_NONE:
2172 fputs ("none", stderr);
2173 token = NULL;
2174 break;
2175 case M4_TOKEN_STRING:
2176 fputs ("string\t", stderr);
2177 break;
2178 case M4_TOKEN_COMMENT:
2179 fputs ("comment\t", stderr);
2180 break;
2181 case M4_TOKEN_SPACE:
2182 fputs ("space\t", stderr);
2183 break;
2184 case M4_TOKEN_WORD:
2185 fputs ("word\t", stderr);
2186 break;
2187 case M4_TOKEN_OPEN:
2188 fputs ("open\t", stderr);
2189 break;
2190 case M4_TOKEN_COMMA:
2191 fputs ("comma\t", stderr);
2192 break;
2193 case M4_TOKEN_CLOSE:
2194 fputs ("close\t", stderr);
2195 break;
2196 case M4_TOKEN_SIMPLE:
2197 fputs ("simple\t", stderr);
2198 break;
2199 case M4_TOKEN_MACDEF:
2200 fputs ("builtin\t", stderr);
2201 break;
2202 case M4_TOKEN_ARGV:
2203 fputs ("argv\t", stderr);
2204 break;
2205 default:
2206 abort ();
2208 if (token)
2210 obstack_init (&obs);
2211 m4__symbol_value_print (context, token, &obs, NULL, false, NULL, NULL,
2212 true);
2213 len = obstack_object_size (&obs);
2214 xfprintf (stderr, "%s\n", quotearg_style_mem (c_maybe_quoting_style,
2215 obstack_finish (&obs),
2216 len));
2217 obstack_free (&obs, NULL);
2219 else
2220 fputc ('\n', stderr);
2221 return 0;
2223 #endif /* DEBUG_INPUT */