Typos in source code comments.
[m4.git] / m4 / input.c
blob1a5fbc58a7c0ef739ddda46ebb674d5c5732d179
1 /* GNU m4 -- A simple macro processor
2 Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2006, 2007, 2008
3 Free Software Foundation, Inc.
5 This file is part of GNU M4.
7 GNU M4 is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 GNU M4 is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 /* Handling of different input sources, and lexical analysis. */
23 #include <config.h>
25 #include "m4private.h"
27 /* Define this to see runtime debug info. Implied by DEBUG. */
28 /*#define DEBUG_INPUT */
30 /* Maximum number of bytes where it is more efficient to inline the
31 reference as a string than it is to track reference bookkeeping for
32 those bytes. */
33 #define INPUT_INLINE_THRESHOLD 16
36 Unread input can be either files that should be read (from the
37 command line or by include/sinclude), strings which should be
38 rescanned (normal macro expansion text), or quoted builtin
39 definitions (as returned by the builtin "defn"). Unread input is
40 organized in a stack, implemented with an obstack. Each input
41 source is described by a "struct m4_input_block". The obstack is
42 "input_stack". The top of the input stack is "isp".
44 Each input_block has an associated struct input_funcs, which is a
45 vtable that defines polymorphic functions for peeking, reading,
46 unget, cleanup, and printing in trace output. All input is done
47 through the function pointers of the input_funcs on the given
48 input_block, and all characters are unsigned, to distinguish
49 between stdio EOF and between special sentinel characters. When a
50 input_block is exhausted, its reader returns CHAR_RETRY which
51 causes the input_block to be popped from the input_stack.
53 The macro "m4wrap" places the text to be saved on another input
54 stack, on the obstack "wrapup_stack", whose top is "wsp". When EOF
55 is seen on normal input (eg, when "current_input" is empty), input
56 is switched over to "wrapup_stack", and the original
57 "current_input" is freed. A new stack is allocated for
58 "wrapup_stack", which will accept any text produced by calls to
59 "m4wrap" from within the wrapped text. This process of shuffling
60 "wrapup_stack" to "current_input" can continue indefinitely, even
61 generating infinite loops (e.g. "define(`f',`m4wrap(`f')')f"),
62 without memory leaks. Adding wrapped data is done through
63 m4__push_wrapup_init/m4__push_wrapup_finish().
65 Pushing new input on the input stack is done by m4_push_file(), the
66 conceptual m4_push_string(), and m4_push_builtin() (for builtin
67 definitions). As an optimization, since most macro expansions
68 result in strings, m4_push_string() is split in two parts,
69 push_string_init(), which returns a pointer to the obstack for
70 growing the output, and push_string_finish(), which returns a
71 pointer to the finished input_block. Thus, instead of creating a
72 new input block for every character pushed, macro expansion need
73 only add text to the top of the obstack. However, it is not safe
74 to alter the input stack while a string is being constructed. This
75 means the input engine is one of two states: consuming input, or
76 collecting a macro's expansion. The input_block *next is used to
77 manage the coordination between the different push routines.
79 Normally, input sources behave in LIFO order, resembling a stack.
80 But thanks to the defn and m4wrap macros, when collecting the
81 expansion of a macro, it is possible that we must intermix multiple
82 input blocks in FIFO order. Therefore, when collecting an
83 expansion, a meta-input block is formed which will visit its
84 children in FIFO order, without losing data when the obstack is
85 cleared in LIFO order.
87 The current file and line number are stored in the context, for use
88 by the error handling functions in utility.c. When collecting a
89 macro's expansion, these variables can be temporarily inconsistent
90 in order to provide better error message locations, but they must
91 be restored before further parsing takes place. Each input block
92 maintains its own notion of the current file and line, so swapping
93 between input blocks must update the context accordingly. */
95 typedef struct m4_input_block m4_input_block;
97 static int file_peek (m4_input_block *, m4 *, bool);
98 static int file_read (m4_input_block *, m4 *, bool, bool,
99 bool);
100 static void file_unget (m4_input_block *, int);
101 static bool file_clean (m4_input_block *, m4 *, bool);
102 static void file_print (m4_input_block *, m4 *, m4_obstack *,
103 int);
104 static int string_peek (m4_input_block *, m4 *, bool);
105 static int string_read (m4_input_block *, m4 *, bool, bool,
106 bool);
107 static void string_unget (m4_input_block *, int);
108 static void string_print (m4_input_block *, m4 *, m4_obstack *,
109 int);
110 static int composite_peek (m4_input_block *, m4 *, bool);
111 static int composite_read (m4_input_block *, m4 *, bool, bool,
112 bool);
113 static void composite_unget (m4_input_block *, int);
114 static bool composite_clean (m4_input_block *, m4 *, bool);
115 static void composite_print (m4_input_block *, m4 *, m4_obstack *,
116 int);
117 static int eof_peek (m4_input_block *, m4 *, bool);
118 static int eof_read (m4_input_block *, m4 *, bool, bool,
119 bool);
120 static void eof_unget (m4_input_block *, int);
122 static void init_builtin_token (m4 *, m4_obstack *,
123 m4_symbol_value *);
124 static void append_quote_token (m4 *, m4_obstack *,
125 m4_symbol_value *);
126 static bool match_input (m4 *, const char *, size_t, bool);
127 static int next_char (m4 *, bool, bool, bool);
128 static int peek_char (m4 *, bool);
129 static bool pop_input (m4 *, bool);
130 static void unget_input (int);
131 static bool consume_syntax (m4 *, m4_obstack *, unsigned int);
133 #ifdef DEBUG_INPUT
134 # include "quotearg.h"
136 static int m4_print_token (m4 *, const char *, m4__token_type,
137 m4_symbol_value *);
138 #endif
140 /* Vtable of callbacks for each input method. */
141 struct input_funcs
143 /* Peek at input, return an unsigned char, CHAR_BUILTIN if it is a
144 builtin, or CHAR_RETRY if none available. If ALLOW_ARGV, then
145 CHAR_ARGV may be returned. */
146 int (*peek_func) (m4_input_block *, m4 *, bool);
148 /* Read input, return an unsigned char, CHAR_BUILTIN if it is a
149 builtin, or CHAR_RETRY if none available. If ALLOW_QUOTE, then
150 CHAR_QUOTE may be returned. If ALLOW_ARGV, then CHAR_ARGV may be
151 returned. If ALLOW_UNGET, then ensure that the next unget_func
152 will work with the returned character. */
153 int (*read_func) (m4_input_block *, m4 *, bool allow_quote,
154 bool allow_argv, bool allow_unget);
156 /* Unread a single unsigned character or CHAR_BUILTIN, must be the
157 same character previously read by read_func. */
158 void (*unget_func) (m4_input_block *, int);
160 /* Optional function to perform cleanup at end of input. If
161 CLEANUP, it is safe to perform non-recoverable cleanup actions.
162 Return true only if no cleanup remains to be done. */
163 bool (*clean_func) (m4_input_block *, m4 *, bool cleanup);
165 /* Add a representation of the input block to the obstack, for use
166 in trace expansion output. */
167 void (*print_func) (m4_input_block *, m4 *, m4_obstack *, int);
170 /* A block of input to be scanned. */
171 struct m4_input_block
173 m4_input_block *prev; /* Previous input_block on the input stack. */
174 struct input_funcs *funcs; /* Virtual functions of this input_block. */
175 const char *file; /* File where this input is from. */
176 int line; /* Line where this input is from. */
178 union
180 struct
182 char *str; /* String value. */
183 size_t len; /* Remaining length. */
185 u_s; /* See string_funcs. */
186 struct
188 FILE *fp; /* Input file handle. */
189 bool_bitfield end : 1; /* True iff peek returned EOF. */
190 bool_bitfield close : 1; /* True to close file on pop. */
191 bool_bitfield line_start : 1; /* Saved start_of_input_line state. */
193 u_f; /* See file_funcs. */
194 struct
196 m4__symbol_chain *chain; /* Current link in chain. */
197 m4__symbol_chain *end; /* Last link in chain. */
199 u_c; /* See composite_funcs. */
205 /* Obstack for storing individual tokens. */
206 static m4_obstack token_stack;
208 /* Obstack for storing input file names. */
209 static m4_obstack file_names;
211 /* Wrapup input stack. */
212 static m4_obstack *wrapup_stack;
214 /* Current stack, from input or wrapup. */
215 static m4_obstack *current_input;
217 /* Bottom of token_stack, for obstack_free. */
218 static void *token_bottom;
220 /* Pointer to top of current_input, never NULL. */
221 static m4_input_block *isp;
223 /* Pointer to top of wrapup_stack, never NULL. */
224 static m4_input_block *wsp;
226 /* Auxiliary for handling split m4_push_string (), NULL when not
227 pushing text for rescanning. */
228 static m4_input_block *next;
230 /* Flag for next_char () to increment current_line. */
231 static bool start_of_input_line;
233 /* Flag for next_char () to recognize change in input block. */
234 static bool input_change;
236 /* Vtable for handling input from files. */
237 static struct input_funcs file_funcs = {
238 file_peek, file_read, file_unget, file_clean, file_print
241 /* Vtable for handling input from strings. */
242 static struct input_funcs string_funcs = {
243 string_peek, string_read, string_unget, NULL, string_print
246 /* Vtable for handling input from composite chains. */
247 static struct input_funcs composite_funcs = {
248 composite_peek, composite_read, composite_unget, composite_clean,
249 composite_print
252 /* Vtable for recognizing end of input. */
253 static struct input_funcs eof_funcs = {
254 eof_peek, eof_read, eof_unget, NULL, NULL
257 /* Marker at end of an input stack. */
258 static m4_input_block input_eof = { NULL, &eof_funcs, "", 0 };
261 /* Input files, from command line or [s]include. */
262 static int
263 file_peek (m4_input_block *me, m4 *context M4_GNUC_UNUSED,
264 bool allow_argv M4_GNUC_UNUSED)
266 int ch;
268 ch = me->u.u_f.end ? EOF : getc (me->u.u_f.fp);
269 if (ch == EOF)
271 me->u.u_f.end = true;
272 return CHAR_RETRY;
275 ungetc (ch, me->u.u_f.fp);
276 return ch;
279 static int
280 file_read (m4_input_block *me, m4 *context, bool allow_quote M4_GNUC_UNUSED,
281 bool allow_argv M4_GNUC_UNUSED, bool allow_unget M4_GNUC_UNUSED)
283 int ch;
285 if (start_of_input_line)
287 start_of_input_line = false;
288 m4_set_current_line (context, ++me->line);
291 /* If stdin is a terminal, calling getc after peek_char already
292 called it would make the user have to hit ^D twice to quit. */
293 ch = me->u.u_f.end ? EOF : getc (me->u.u_f.fp);
294 if (ch == EOF)
296 me->u.u_f.end = true;
297 return CHAR_RETRY;
300 if (ch == '\n')
301 start_of_input_line = true;
302 return ch;
305 static void
306 file_unget (m4_input_block *me, int ch)
308 assert (ch < CHAR_EOF);
309 if (ungetc (ch, me->u.u_f.fp) < 0)
311 assert (!"INTERNAL ERROR: failed ungetc!");
312 abort (); /* ungetc should not be called without a previous read. */
314 me->u.u_f.end = false;
315 if (ch == '\n')
316 start_of_input_line = false;
319 static bool
320 file_clean (m4_input_block *me, m4 *context, bool cleanup)
322 if (!cleanup)
323 return false;
324 if (me->prev != &input_eof)
325 m4_debug_message (context, M4_DEBUG_TRACE_INPUT,
326 _("input reverted to %s, line %d"),
327 me->prev->file, me->prev->line);
328 else
329 m4_debug_message (context, M4_DEBUG_TRACE_INPUT, _("input exhausted"));
331 if (ferror (me->u.u_f.fp))
333 m4_error (context, 0, 0, NULL, _("error reading file `%s'"), me->file);
334 if (me->u.u_f.close)
335 fclose (me->u.u_f.fp);
337 else if (me->u.u_f.close && fclose (me->u.u_f.fp) == EOF)
338 m4_error (context, 0, errno, NULL, _("error reading file `%s'"), me->file);
339 start_of_input_line = me->u.u_f.line_start;
340 m4_set_output_line (context, -1);
341 return true;
344 static void
345 file_print (m4_input_block *me, m4 *context M4_GNUC_UNUSED, m4_obstack *obs,
346 int debug_level M4_GNUC_UNUSED)
348 const char *text = me->file;
349 assert (obstack_object_size (current_input) == 0);
350 obstack_grow (obs, "<file: ", strlen ("<file: "));
351 obstack_grow (obs, text, strlen (text));
352 obstack_1grow (obs, '>');
355 /* m4_push_file () pushes an input file FP with name TITLE on the
356 input stack, saving the current file name and line number. If next
357 is non-NULL, this push invalidates a call to m4_push_string_init (),
358 whose storage is consequently released. If CLOSE, then close FP at
359 end of file.
361 file_read () manages line numbers for error messages, so they do not
362 get wrong due to lookahead. The token consisting of a newline
363 alone is taken as belonging to the line it ends, and the current
364 line number is not incremented until the next character is read. */
365 void
366 m4_push_file (m4 *context, FILE *fp, const char *title, bool close_file)
368 m4_input_block *i;
370 if (next != NULL)
372 obstack_free (current_input, next);
373 next = NULL;
376 m4_debug_message (context, M4_DEBUG_TRACE_INPUT,
377 _("input read from %s"), title);
379 i = (m4_input_block *) obstack_alloc (current_input, sizeof *i);
380 i->funcs = &file_funcs;
381 /* Save title on a separate obstack, so that wrapped text can refer
382 to it even after the file is popped. */
383 i->file = obstack_copy0 (&file_names, title, strlen (title));
384 i->line = 1;
386 i->u.u_f.fp = fp;
387 i->u.u_f.end = false;
388 i->u.u_f.close = close_file;
389 i->u.u_f.line_start = start_of_input_line;
391 m4_set_output_line (context, -1);
393 i->prev = isp;
394 isp = i;
395 input_change = true;
399 /* Handle string expansion text. */
400 static int
401 string_peek (m4_input_block *me, m4 *context M4_GNUC_UNUSED,
402 bool allow_argv M4_GNUC_UNUSED)
404 return me->u.u_s.len ? to_uchar (*me->u.u_s.str) : CHAR_RETRY;
407 static int
408 string_read (m4_input_block *me, m4 *context M4_GNUC_UNUSED,
409 bool allow_quote M4_GNUC_UNUSED, bool allow_argv M4_GNUC_UNUSED,
410 bool allow_unget M4_GNUC_UNUSED)
412 if (!me->u.u_s.len)
413 return CHAR_RETRY;
414 me->u.u_s.len--;
415 return to_uchar (*me->u.u_s.str++);
418 static void
419 string_unget (m4_input_block *me, int ch)
421 assert (ch < CHAR_EOF && to_uchar (me->u.u_s.str[-1]) == ch);
422 me->u.u_s.str--;
423 me->u.u_s.len++;
426 static void
427 string_print (m4_input_block *me, m4 *context, m4_obstack *obs,
428 int debug_level)
430 bool quote = (debug_level & M4_DEBUG_TRACE_QUOTE) != 0;
431 size_t arg_length = m4_get_max_debug_arg_length_opt (context);
433 assert (!me->u.u_s.len);
434 m4_shipout_string_trunc (obs, (char *) obstack_base (current_input),
435 obstack_object_size (current_input),
436 quote ? m4_get_syntax_quotes (M4SYNTAX) : NULL,
437 &arg_length);
440 /* First half of m4_push_string (). The pointer next points to the
441 new input_block. FILE and LINE describe the location where the
442 macro starts that is generating the expansion (even if the location
443 has advanced in the meantime). Return the obstack that will
444 collect the expansion text. */
445 m4_obstack *
446 m4_push_string_init (m4 *context, const char *file, int line)
448 /* Free any memory occupied by completely parsed input. */
449 assert (!next);
450 while (pop_input (context, false));
452 /* Reserve the next location on the obstack. */
453 next = (m4_input_block *) obstack_alloc (current_input, sizeof *next);
454 next->funcs = &string_funcs;
455 next->file = file;
456 next->line = line;
457 next->u.u_s.len = 0;
459 return current_input;
462 /* This function allows gathering input from multiple locations,
463 rather than copying everything consecutively onto the input stack.
464 Must be called between push_string_init and push_string_finish.
466 Convert the current input block into a chain if it is not one
467 already, and add the contents of VALUE as a new link in the chain.
468 LEVEL describes the current expansion level, or SIZE_MAX if VALUE
469 is composite, its contents reside entirely on the current_input
470 stack, and VALUE lives in temporary storage. If VALUE is a simple
471 string, then it belongs to the current macro expansion. If VALUE
472 is composite, then each text link has a level of SIZE_MAX if it
473 belongs to the current macro expansion, otherwise it is a
474 back-reference where level tracks which stack it came from. The
475 resulting input block chain contains links with a level of SIZE_MAX
476 if the text belongs to the input stack, otherwise the level where
477 the back-reference comes from.
479 Return true only if a reference was created to the contents of
480 VALUE, in which case, LEVEL is less than SIZE_MAX and the lifetime
481 of VALUE and its contents must last as long as the input engine can
482 parse references from it. INUSE determines whether composite
483 symbols should favor creating back-references or copying text. */
484 bool
485 m4__push_symbol (m4 *context, m4_symbol_value *value, size_t level, bool inuse)
487 m4__symbol_chain *src_chain = NULL;
488 m4__symbol_chain *chain;
490 assert (next);
492 /* Speed consideration - for short enough symbols, the speed and
493 memory overhead of parsing another INPUT_CHAIN link outweighs the
494 time to inline the symbol text. But don't copy text if it
495 already lives on the obstack. */
496 if (m4_is_symbol_value_text (value))
498 assert (level < SIZE_MAX);
499 if (m4_get_symbol_value_len (value) <= INPUT_INLINE_THRESHOLD)
501 obstack_grow (current_input, m4_get_symbol_value_text (value),
502 m4_get_symbol_value_len (value));
503 return false;
506 else if (m4_is_symbol_value_func (value))
508 if (next->funcs == &string_funcs)
510 next->funcs = &composite_funcs;
511 next->u.u_c.chain = next->u.u_c.end = NULL;
513 m4__append_builtin (current_input, value->u.builtin, &next->u.u_c.chain,
514 &next->u.u_c.end);
515 return false;
517 else
519 /* For composite values, if argv is already in use, creating
520 additional references for long text segments is more
521 efficient in time. But if argv is not yet in use, and we
522 have a composite value, then the value must already contain a
523 back-reference, and memory usage is more efficient if we can
524 avoid using the current expand_macro, even if it means larger
525 copies. */
526 assert (value->type == M4_SYMBOL_COMP);
527 src_chain = value->u.u_c.chain;
528 while (level < SIZE_MAX && src_chain && src_chain->type == M4__CHAIN_STR
529 && (src_chain->u.u_s.len <= INPUT_INLINE_THRESHOLD
530 || (!inuse && src_chain->u.u_s.level == SIZE_MAX)))
532 obstack_grow (current_input, src_chain->u.u_s.str,
533 src_chain->u.u_s.len);
534 src_chain = src_chain->next;
536 if (!src_chain)
537 return false;
540 if (next->funcs == &string_funcs)
542 next->funcs = &composite_funcs;
543 next->u.u_c.chain = next->u.u_c.end = NULL;
545 m4__make_text_link (current_input, &next->u.u_c.chain, &next->u.u_c.end);
546 if (m4_is_symbol_value_text (value))
548 chain = (m4__symbol_chain *) obstack_alloc (current_input,
549 sizeof *chain);
550 if (next->u.u_c.end)
551 next->u.u_c.end->next = chain;
552 else
553 next->u.u_c.chain = chain;
554 next->u.u_c.end = chain;
555 chain->next = NULL;
556 chain->type = M4__CHAIN_STR;
557 chain->quote_age = m4_get_symbol_value_quote_age (value);
558 chain->u.u_s.str = m4_get_symbol_value_text (value);
559 chain->u.u_s.len = m4_get_symbol_value_len (value);
560 chain->u.u_s.level = level;
561 m4__adjust_refcount (context, level, true);
562 inuse = true;
564 while (src_chain)
566 if (src_chain->type == M4__CHAIN_FUNC)
568 m4__append_builtin (current_input, src_chain->u.builtin,
569 &next->u.u_c.chain, &next->u.u_c.end);
570 src_chain = src_chain->next;
571 continue;
573 if (level == SIZE_MAX)
575 /* Nothing to copy, since link already lives on obstack. */
576 assert (src_chain->type != M4__CHAIN_STR
577 || src_chain->u.u_s.level == SIZE_MAX);
578 chain = src_chain;
580 else
582 /* Allow inlining the final link with subsequent text. */
583 if (!src_chain->next && src_chain->type == M4__CHAIN_STR
584 && (src_chain->u.u_s.len <= INPUT_INLINE_THRESHOLD
585 || (!inuse && src_chain->u.u_s.level == SIZE_MAX)))
587 obstack_grow (current_input, src_chain->u.u_s.str,
588 src_chain->u.u_s.len);
589 break;
591 /* We must clone each link in the chain, since next_char
592 destructively modifies the chain it is parsing. */
593 chain = (m4__symbol_chain *) obstack_copy (current_input, src_chain,
594 sizeof *chain);
595 chain->next = NULL;
596 if (chain->type == M4__CHAIN_STR && chain->u.u_s.level == SIZE_MAX)
598 if (chain->u.u_s.len <= INPUT_INLINE_THRESHOLD || !inuse)
599 chain->u.u_s.str = (char *) obstack_copy (current_input,
600 chain->u.u_s.str,
601 chain->u.u_s.len);
602 else
604 chain->u.u_s.level = level;
605 inuse = true;
609 if (next->u.u_c.end)
610 next->u.u_c.end->next = chain;
611 else
612 next->u.u_c.chain = chain;
613 next->u.u_c.end = chain;
614 if (chain->type == M4__CHAIN_ARGV)
616 assert (!chain->u.u_a.comma && !chain->u.u_a.skip_last);
617 inuse |= m4__arg_adjust_refcount (context, chain->u.u_a.argv, true);
619 else if (chain->type == M4__CHAIN_STR && chain->u.u_s.level < SIZE_MAX)
620 m4__adjust_refcount (context, chain->u.u_s.level, true);
621 src_chain = src_chain->next;
623 return inuse;
626 /* Last half of m4_push_string (). If next is now NULL, a call to
627 m4_push_file () has pushed a different input block to the top of
628 the stack. Otherwise, all unfinished text on the obstack returned
629 from push_string_init is collected into the input stack. If the
630 new object is empty, we do not push it. */
631 void
632 m4_push_string_finish (void)
634 size_t len = obstack_object_size (current_input);
636 if (next == NULL)
638 assert (!len);
639 return;
642 if (len || next->funcs == &composite_funcs)
644 if (next->funcs == &string_funcs)
646 next->u.u_s.str = (char *) obstack_finish (current_input);
647 next->u.u_s.len = len;
649 else
650 m4__make_text_link (current_input, &next->u.u_c.chain,
651 &next->u.u_c.end);
652 next->prev = isp;
653 isp = next;
654 input_change = true;
656 else
657 obstack_free (current_input, next);
658 next = NULL;
662 /* A composite block contains multiple sub-blocks which are processed
663 in FIFO order, even though the obstack allocates memory in LIFO
664 order. */
665 static int
666 composite_peek (m4_input_block *me, m4 *context, bool allow_argv)
668 m4__symbol_chain *chain = me->u.u_c.chain;
669 size_t argc;
671 while (chain)
673 switch (chain->type)
675 case M4__CHAIN_STR:
676 if (chain->u.u_s.len)
677 return to_uchar (chain->u.u_s.str[0]);
678 break;
679 case M4__CHAIN_FUNC:
680 if (chain->u.builtin)
681 return CHAR_BUILTIN;
682 break;
683 case M4__CHAIN_ARGV:
684 argc = m4_arg_argc (chain->u.u_a.argv);
685 if (chain->u.u_a.index == argc)
686 break;
687 if (chain->u.u_a.comma)
688 return ','; /* FIXME - support M4_SYNTAX_COMMA. */
689 /* Only return a reference in the quoting is correct and the
690 reference has more than one argument left. */
691 if (allow_argv && chain->quote_age == m4__quote_age (M4SYNTAX)
692 && chain->u.u_a.quotes && chain->u.u_a.index + 1 < argc)
693 return CHAR_ARGV;
694 /* Rather than directly parse argv here, we push another
695 input block containing the next unparsed argument from
696 argv. */
697 m4_push_string_init (context, me->file, me->line);
698 m4__push_arg_quote (context, current_input, chain->u.u_a.argv,
699 chain->u.u_a.index,
700 m4__quote_cache (M4SYNTAX, NULL,
701 chain->quote_age,
702 chain->u.u_a.quotes));
703 chain->u.u_a.index++;
704 chain->u.u_a.comma = true;
705 m4_push_string_finish ();
706 return peek_char (context, allow_argv);
707 case M4__CHAIN_LOC:
708 break;
709 default:
710 assert (!"composite_peek");
711 abort ();
713 chain = chain->next;
715 return CHAR_RETRY;
718 static int
719 composite_read (m4_input_block *me, m4 *context, bool allow_quote,
720 bool allow_argv, bool allow_unget)
722 m4__symbol_chain *chain = me->u.u_c.chain;
723 size_t argc;
724 while (chain)
726 if (allow_quote && chain->quote_age == m4__quote_age (M4SYNTAX))
727 return CHAR_QUOTE;
728 switch (chain->type)
730 case M4__CHAIN_STR:
731 if (chain->u.u_s.len)
733 /* Partial consumption invalidates quote age. */
734 chain->quote_age = 0;
735 chain->u.u_s.len--;
736 return to_uchar (*chain->u.u_s.str++);
738 if (chain->u.u_s.level < SIZE_MAX)
739 m4__adjust_refcount (context, chain->u.u_s.level, false);
740 break;
741 case M4__CHAIN_FUNC:
742 if (chain->u.builtin)
743 return CHAR_BUILTIN;
744 break;
745 case M4__CHAIN_ARGV:
746 argc = m4_arg_argc (chain->u.u_a.argv);
747 if (chain->u.u_a.index == argc)
749 m4__arg_adjust_refcount (context, chain->u.u_a.argv, false);
750 break;
752 if (chain->u.u_a.comma)
754 chain->u.u_a.comma = false;
755 return ','; /* FIXME - support M4_SYNTAX_COMMA. */
757 /* Only return a reference in the quoting is correct and the
758 reference has more than one argument left. */
759 if (allow_argv && chain->quote_age == m4__quote_age (M4SYNTAX)
760 && chain->u.u_a.quotes && chain->u.u_a.index + 1 < argc)
761 return CHAR_ARGV;
762 /* Rather than directly parse argv here, we push another
763 input block containing the next unparsed argument from
764 argv. */
765 m4_push_string_init (context, me->file, me->line);
766 m4__push_arg_quote (context, current_input, chain->u.u_a.argv,
767 chain->u.u_a.index,
768 m4__quote_cache (M4SYNTAX, NULL,
769 chain->quote_age,
770 chain->u.u_a.quotes));
771 chain->u.u_a.index++;
772 chain->u.u_a.comma = true;
773 m4_push_string_finish ();
774 return next_char (context, allow_quote, allow_argv, allow_unget);
775 case M4__CHAIN_LOC:
776 me->file = chain->u.u_l.file;
777 me->line = chain->u.u_l.line;
778 input_change = true;
779 me->u.u_c.chain = chain->next;
780 return next_char (context, allow_quote, allow_argv, allow_unget);
781 default:
782 assert (!"composite_read");
783 abort ();
785 me->u.u_c.chain = chain = chain->next;
787 return CHAR_RETRY;
790 static void
791 composite_unget (m4_input_block *me, int ch)
793 m4__symbol_chain *chain = me->u.u_c.chain;
794 switch (chain->type)
796 case M4__CHAIN_STR:
797 assert (ch < CHAR_EOF && to_uchar (chain->u.u_s.str[-1]) == ch);
798 chain->u.u_s.str--;
799 chain->u.u_s.len++;
800 break;
801 case M4__CHAIN_FUNC:
802 assert (ch == CHAR_BUILTIN && chain->u.builtin);
803 break;
804 case M4__CHAIN_ARGV:
805 /* FIXME - support M4_SYNTAX_COMMA. */
806 assert (ch == ',' && !chain->u.u_a.comma);
807 chain->u.u_a.comma = true;
808 break;
809 default:
810 assert (!"composite_unget");
811 abort ();
815 static bool
816 composite_clean (m4_input_block *me, m4 *context, bool cleanup)
818 m4__symbol_chain *chain = me->u.u_c.chain;
819 assert (!chain || !cleanup);
820 while (chain)
822 switch (chain->type)
824 case M4__CHAIN_STR:
825 if (chain->u.u_s.len)
827 assert (!cleanup);
828 return false;
830 if (chain->u.u_s.level < SIZE_MAX)
831 m4__adjust_refcount (context, chain->u.u_s.level, false);
832 break;
833 case M4__CHAIN_FUNC:
834 if (chain->u.builtin)
835 return false;
836 break;
837 case M4__CHAIN_ARGV:
838 if (chain->u.u_a.index < m4_arg_argc (chain->u.u_a.argv))
840 assert (!cleanup);
841 return false;
843 m4__arg_adjust_refcount (context, chain->u.u_a.argv, false);
844 break;
845 case M4__CHAIN_LOC:
846 return false;
847 default:
848 assert (!"composite_clean");
849 abort ();
851 me->u.u_c.chain = chain = chain->next;
853 return true;
856 static void
857 composite_print (m4_input_block *me, m4 *context, m4_obstack *obs,
858 int debug_level)
860 bool quote = (debug_level & M4_DEBUG_TRACE_QUOTE) != 0;
861 size_t maxlen = m4_get_max_debug_arg_length_opt (context);
862 m4__symbol_chain *chain = me->u.u_c.chain;
863 const m4_string_pair *quotes = m4_get_syntax_quotes (M4SYNTAX);
864 bool module = (debug_level & M4_DEBUG_TRACE_MODULE) != 0;
865 bool done = false;
866 size_t len = obstack_object_size (current_input);
868 if (quote)
869 m4_shipout_string (context, obs, quotes->str1, quotes->len1, false);
870 while (chain && !done)
872 switch (chain->type)
874 case M4__CHAIN_STR:
875 if (m4_shipout_string_trunc (obs, chain->u.u_s.str,
876 chain->u.u_s.len, NULL, &maxlen))
877 done = true;
878 break;
879 case M4__CHAIN_FUNC:
880 m4__builtin_print (obs, chain->u.builtin, false, NULL, NULL, module);
881 break;
882 case M4__CHAIN_ARGV:
883 assert (!chain->u.u_a.comma);
884 if (m4__arg_print (context, obs, chain->u.u_a.argv,
885 chain->u.u_a.index,
886 m4__quote_cache (M4SYNTAX, NULL, chain->quote_age,
887 chain->u.u_a.quotes),
888 chain->u.u_a.flatten, NULL, NULL, &maxlen, false,
889 module))
890 done = true;
891 break;
892 default:
893 assert (!"composite_print");
894 abort ();
896 chain = chain->next;
898 if (len)
899 m4_shipout_string_trunc (obs, (char *) obstack_base (current_input), len,
900 NULL, &maxlen);
901 if (quote)
902 m4_shipout_string (context, obs, quotes->str2, quotes->len2, false);
905 /* Given an obstack OBS, capture any unfinished text as a link in the
906 chain that starts at *START and ends at *END. START may be NULL if
907 *END is non-NULL. */
908 void
909 m4__make_text_link (m4_obstack *obs, m4__symbol_chain **start,
910 m4__symbol_chain **end)
912 m4__symbol_chain *chain;
913 size_t len = obstack_object_size (obs);
915 assert (end && (start || *end));
916 if (len)
918 char *str = (char *) obstack_finish (obs);
919 chain = (m4__symbol_chain *) obstack_alloc (obs, sizeof *chain);
920 if (*end)
921 (*end)->next = chain;
922 else
923 *start = chain;
924 *end = chain;
925 chain->next = NULL;
926 chain->type = M4__CHAIN_STR;
927 chain->quote_age = 0;
928 chain->u.u_s.str = str;
929 chain->u.u_s.len = len;
930 chain->u.u_s.level = SIZE_MAX;
934 /* Given an obstack OBS, capture any unfinished text as a link, then
935 append the builtin FUNC as the next link in the chain that starts
936 at *START and ends at *END. START may be NULL if *END is
937 non-NULL. */
938 void
939 m4__append_builtin (m4_obstack *obs, const m4__builtin *func,
940 m4__symbol_chain **start, m4__symbol_chain **end)
942 m4__symbol_chain *chain;
944 assert (func);
945 m4__make_text_link (obs, start, end);
946 chain = (m4__symbol_chain *) obstack_alloc (obs, sizeof *chain);
947 if (*end)
948 (*end)->next = chain;
949 else
950 *start = chain;
951 *end = chain;
952 chain->next = NULL;
953 chain->type = M4__CHAIN_FUNC;
954 chain->quote_age = 0;
955 chain->u.builtin = func;
958 /* Push TOKEN, which contains a builtin's definition, onto the obstack
959 OBS, which is either input stack or the wrapup stack. */
960 void
961 m4_push_builtin (m4 *context, m4_obstack *obs, m4_symbol_value *token)
963 m4_input_block *i = (obs == current_input ? next : wsp);
964 assert (i);
965 if (i->funcs == &string_funcs)
967 i->funcs = &composite_funcs;
968 i->u.u_c.chain = i->u.u_c.end = NULL;
970 else
971 assert (i->funcs == &composite_funcs);
972 m4__append_builtin (obs, token->u.builtin, &i->u.u_c.chain, &i->u.u_c.end);
976 /* End of input optimization. By providing these dummy callback
977 functions, we guarantee that the input stack is never NULL, and
978 thus make fewer execution branches. */
979 static int
980 eof_peek (m4_input_block *me, m4 *context M4_GNUC_UNUSED,
981 bool allow_argv M4_GNUC_UNUSED)
983 assert (me == &input_eof);
984 return CHAR_EOF;
987 static int
988 eof_read (m4_input_block *me, m4 *context M4_GNUC_UNUSED,
989 bool allow_quote M4_GNUC_UNUSED, bool allow_argv M4_GNUC_UNUSED,
990 bool allow_unget M4_GNUC_UNUSED)
992 assert (me == &input_eof);
993 return CHAR_EOF;
996 static void
997 eof_unget (m4_input_block *me M4_GNUC_UNUSED, int ch)
999 assert (ch == CHAR_EOF);
1003 /* When tracing, print a summary of the contents of the input block
1004 created by push_string_init/push_string_finish to OBS. Use
1005 DEBUG_LEVEL to determine whether to add quotes or module
1006 designations. */
1007 void
1008 m4_input_print (m4 *context, m4_obstack *obs, int debug_level)
1010 m4_input_block *block = next ? next : isp;
1011 assert (context && obs && (debug_level & M4_DEBUG_TRACE_EXPANSION));
1012 assert (block->funcs->print_func);
1013 block->funcs->print_func (block, context, obs, debug_level);
1016 /* Return an obstack ready for direct expansion of wrapup text, and
1017 set *END to the location that should be updated if any builtin
1018 tokens are wrapped. Store the location of CALLER with the wrapped
1019 text. This should be followed by m4__push_wrapup_finish (). */
1020 m4_obstack *
1021 m4__push_wrapup_init (m4 *context, const m4_call_info *caller,
1022 m4__symbol_chain ***end)
1024 m4_input_block *i;
1025 m4__symbol_chain *chain;
1027 assert (obstack_object_size (wrapup_stack) == 0);
1028 if (wsp != &input_eof)
1030 i = wsp;
1031 assert (i->funcs == &composite_funcs && i->u.u_c.end
1032 && i->u.u_c.end->type != M4__CHAIN_LOC);
1034 else
1036 i = (m4_input_block *) obstack_alloc (wrapup_stack, sizeof *i);
1037 i->prev = wsp;
1038 i->funcs = &composite_funcs;
1039 i->file = caller->file;
1040 i->line = caller->line;
1041 i->u.u_c.chain = i->u.u_c.end = NULL;
1042 wsp = i;
1044 chain = (m4__symbol_chain *) obstack_alloc (wrapup_stack, sizeof *chain);
1045 if (i->u.u_c.end)
1046 i->u.u_c.end->next = chain;
1047 else
1048 i->u.u_c.chain = chain;
1049 i->u.u_c.end = chain;
1050 chain->next = NULL;
1051 chain->type = M4__CHAIN_LOC;
1052 chain->quote_age = 0;
1053 chain->u.u_l.file = caller->file;
1054 chain->u.u_l.line = caller->line;
1055 *end = &i->u.u_c.end;
1056 return wrapup_stack;
1059 /* After pushing wrapup text, this completes the bookkeeping. */
1060 void
1061 m4__push_wrapup_finish (void)
1063 m4__make_text_link (wrapup_stack, &wsp->u.u_c.chain, &wsp->u.u_c.end);
1064 assert (wsp->u.u_c.end->type != M4__CHAIN_LOC);
1068 /* The function pop_input () pops one level of input sources. If
1069 CLEANUP, the current_file and current_line are restored as needed.
1070 The return value is false if cleanup is still required, or if the
1071 current input source is not at the end. */
1072 static bool
1073 pop_input (m4 *context, bool cleanup)
1075 m4_input_block *tmp = isp->prev;
1077 assert (isp);
1078 if (isp->funcs->clean_func
1079 ? !isp->funcs->clean_func (isp, context, cleanup)
1080 : (isp->funcs->peek_func (isp, context, true) != CHAR_RETRY))
1081 return false;
1083 obstack_free (current_input, isp);
1084 m4__quote_uncache (M4SYNTAX);
1085 next = NULL; /* might be set in m4_push_string_init () */
1087 isp = tmp;
1088 input_change = true;
1089 return true;
1092 /* To switch input over to the wrapup stack, main () calls pop_wrapup.
1093 Since wrapup text can install new wrapup text, pop_wrapup ()
1094 returns true if there is more wrapped text to parse. */
1095 bool
1096 m4_pop_wrapup (m4 *context)
1098 static size_t level = 0;
1100 next = NULL;
1101 obstack_free (current_input, NULL);
1102 free (current_input);
1104 if (wsp == &input_eof)
1106 obstack_free (wrapup_stack, NULL);
1107 m4_set_current_file (context, NULL);
1108 m4_set_current_line (context, 0);
1109 m4_debug_message (context, M4_DEBUG_TRACE_INPUT,
1110 _("input from m4wrap exhausted"));
1111 current_input = NULL;
1112 DELETE (wrapup_stack);
1113 return false;
1116 m4_debug_message (context, M4_DEBUG_TRACE_INPUT,
1117 _("input from m4wrap recursion level %zu"), ++level);
1119 current_input = wrapup_stack;
1120 wrapup_stack = (m4_obstack *) xmalloc (sizeof *wrapup_stack);
1121 obstack_init (wrapup_stack);
1123 isp = wsp;
1124 wsp = &input_eof;
1125 input_change = true;
1127 return true;
1130 /* Populate TOKEN with the builtin token at the top of the input
1131 stack, then consume the input. If OBS, TOKEN will be converted to
1132 a composite token using storage from OBS as necessary; otherwise,
1133 if TOKEN is NULL, the builtin token is discarded. */
1134 static void
1135 init_builtin_token (m4 *context, m4_obstack *obs, m4_symbol_value *token)
1137 m4__symbol_chain *chain;
1138 assert (isp->funcs == &composite_funcs);
1139 chain = isp->u.u_c.chain;
1140 assert (!chain->quote_age && chain->type == M4__CHAIN_FUNC
1141 && chain->u.builtin);
1142 if (obs)
1144 assert (token);
1145 if (token->type == M4_SYMBOL_VOID)
1147 token->type = M4_SYMBOL_COMP;
1148 token->u.u_c.chain = token->u.u_c.end = NULL;
1149 token->u.u_c.wrapper = false;
1150 token->u.u_c.has_func = false;
1152 assert (token->type == M4_SYMBOL_COMP);
1153 m4__append_builtin (obs, chain->u.builtin, &token->u.u_c.chain,
1154 &token->u.u_c.end);
1156 else if (token)
1158 assert (token->type == M4_SYMBOL_VOID);
1159 m4__set_symbol_value_builtin (token, chain->u.builtin);
1161 chain->u.builtin = NULL;
1164 /* When a QUOTE token is seen, convert VALUE to a composite (if it is
1165 not one already), consisting of any unfinished text on OBS, as well
1166 as the quoted token from the top of the input stack. Use OBS for
1167 any additional allocations needed to store the token chain. */
1168 static void
1169 append_quote_token (m4 *context, m4_obstack *obs, m4_symbol_value *value)
1171 m4__symbol_chain *src_chain = isp->u.u_c.chain;
1172 m4__symbol_chain *chain;
1173 assert (isp->funcs == &composite_funcs && obs && m4__quote_age (M4SYNTAX));
1174 isp->u.u_c.chain = src_chain->next;
1176 /* Speed consideration - for short enough symbols, the speed and
1177 memory overhead of parsing another INPUT_CHAIN link outweighs the
1178 time to inline the symbol text. */
1179 if (src_chain->type == M4__CHAIN_STR
1180 && src_chain->u.u_s.len <= INPUT_INLINE_THRESHOLD)
1182 assert (src_chain->u.u_s.level <= SIZE_MAX);
1183 obstack_grow (obs, src_chain->u.u_s.str, src_chain->u.u_s.len);
1184 m4__adjust_refcount (context, src_chain->u.u_s.level, false);
1185 return;
1188 if (value->type == M4_SYMBOL_VOID)
1190 value->type = M4_SYMBOL_COMP;
1191 value->u.u_c.chain = value->u.u_c.end = NULL;
1192 value->u.u_c.wrapper = value->u.u_c.has_func = false;
1194 assert (value->type == M4_SYMBOL_COMP);
1195 m4__make_text_link (obs, &value->u.u_c.chain, &value->u.u_c.end);
1196 chain = (m4__symbol_chain *) obstack_copy (obs, src_chain, sizeof *chain);
1197 if (value->u.u_c.end)
1198 value->u.u_c.end->next = chain;
1199 else
1200 value->u.u_c.chain = chain;
1201 value->u.u_c.end = chain;
1202 if (chain->type == M4__CHAIN_ARGV && chain->u.u_a.has_func)
1203 value->u.u_c.has_func = true;
1204 chain->next = NULL;
1207 /* When an ARGV token is seen, convert VALUE to point to it via a
1208 composite chain. Use OBS for any additional allocations
1209 needed. */
1210 static void
1211 init_argv_symbol (m4 *context, m4_obstack *obs, m4_symbol_value *value)
1213 m4__symbol_chain *src_chain;
1214 m4__symbol_chain *chain;
1215 int ch;
1216 const m4_string_pair *comments = m4_get_syntax_comments (M4SYNTAX);
1218 assert (value->type == M4_SYMBOL_VOID && isp->funcs == &composite_funcs
1219 && isp->u.u_c.chain->type == M4__CHAIN_ARGV
1220 && obs && obstack_object_size (obs) == 0);
1222 src_chain = isp->u.u_c.chain;
1223 isp->u.u_c.chain = src_chain->next;
1224 value->type = M4_SYMBOL_COMP;
1225 /* Clone the link, since the input will be discarded soon. */
1226 chain = (m4__symbol_chain *) obstack_copy (obs, src_chain, sizeof *chain);
1227 value->u.u_c.chain = value->u.u_c.end = chain;
1228 value->u.u_c.wrapper = true;
1229 value->u.u_c.has_func = chain->u.u_a.has_func;
1230 chain->next = NULL;
1232 /* If the next character is not ',' or ')', then unlink the last
1233 argument from argv and schedule it for reparsing. This way,
1234 expand_argument never has to deal with concatenation of argv with
1235 arbitrary text. Note that the implementation of safe_quotes
1236 ensures peek_input won't return CHAR_ARGV if the user is perverse
1237 enough to mix comment delimiters with argument separators:
1239 define(n,`$#')define(echo,$*)changecom(`,,',`)')n(echo(a,`,b`)'',c))
1240 => 2 (not 3)
1242 Therefore, we do not have to worry about calling MATCH, and thus
1243 do not have to worry about pop_input being called and
1244 invalidating the argv reference.
1246 When the $@ ref is used unchanged, we completely bypass the
1247 decrement of the argv refcount in next_char, since the ref is
1248 still live via the current collect_arguments. However, when the
1249 last element of the $@ ref is reparsed, we must increase the argv
1250 refcount here, to compensate for the fact that it will be
1251 decreased once the final element is parsed. */
1252 assert (!comments->len1
1253 || (!m4_has_syntax (M4SYNTAX, *comments->str1,
1254 M4_SYNTAX_COMMA | M4_SYNTAX_CLOSE)
1255 && *comments->str1 != *src_chain->u.u_a.quotes->str1));
1256 ch = peek_char (context, true);
1257 if (!m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_COMMA | M4_SYNTAX_CLOSE))
1259 isp->u.u_c.chain = src_chain;
1260 src_chain->u.u_a.index = m4_arg_argc (chain->u.u_a.argv) - 1;
1261 src_chain->u.u_a.comma = true;
1262 chain->u.u_a.skip_last = true;
1263 m4__arg_adjust_refcount (context, chain->u.u_a.argv, true);
1268 /* Low level input is done a character at a time. The function
1269 next_char () is used to read and advance the input to the next
1270 character. If ALLOW_QUOTE, and the current input matches the
1271 current quote age, return CHAR_QUOTE and leave consumption of data
1272 for append_quote_token; otherwise, if ALLOW_ARGV, and the current
1273 input matches an argv reference with the correct quoting, return
1274 CHAR_ARGV and leave consumption of data for init_argv_symbol. If
1275 ALLOW_UNGET, then pop input to avoid returning CHAR_RETRY, and
1276 ensure that unget_input can safely be called next. */
1277 static int
1278 next_char (m4 *context, bool allow_quote, bool allow_argv, bool allow_unget)
1280 int ch;
1282 while (1)
1284 if (input_change)
1286 m4_set_current_file (context, isp->file);
1287 m4_set_current_line (context, isp->line);
1290 assert (isp->funcs->read_func);
1291 while (((ch = isp->funcs->read_func (isp, context, allow_quote,
1292 allow_argv, allow_unget))
1293 != CHAR_RETRY)
1294 || allow_unget)
1296 /* if (!IS_IGNORE (ch)) */
1297 return ch;
1300 /* End of input source --- pop one level. */
1301 pop_input (context, true);
1305 /* The function peek_char () is used to look at the next character in
1306 the input stream. At any given time, it reads from the input_block
1307 on the top of the current input stack. If ALLOW_ARGV, then return
1308 CHAR_ARGV if an entire $@ reference is available for use. */
1309 static int
1310 peek_char (m4 *context, bool allow_argv)
1312 int ch;
1313 m4_input_block *block = isp;
1315 while (1)
1317 assert (block->funcs->peek_func);
1318 ch = block->funcs->peek_func (block, context, allow_argv);
1319 if (ch != CHAR_RETRY)
1321 /* if (IS_IGNORE (ch)) */
1322 /* return next_char (context, false, true, false); */
1323 return ch;
1326 block = block->prev;
1330 /* The function unget_input () puts back a character on the input
1331 stack, using an existing input_block if possible. This is not safe
1332 to call except immediately after next_char(context, aq, aa, true). */
1333 static void
1334 unget_input (int ch)
1336 assert (isp->funcs->unget_func != NULL);
1337 isp->funcs->unget_func (isp, ch);
1340 /* skip_line () simply discards all immediately following characters,
1341 up to the first newline. It is only used from m4_dnl (). Report
1342 errors on behalf of CALLER. */
1343 void
1344 m4_skip_line (m4 *context, const m4_call_info *caller)
1346 int ch;
1348 while ((ch = next_char (context, false, false, false)) != CHAR_EOF
1349 && ch != '\n')
1351 if (ch == CHAR_EOF)
1352 m4_warn (context, 0, caller, _("end of file treated as newline"));
1356 /* If the string S of length LEN matches the next characters of the
1357 input stream, return true. If CONSUME, the first byte has already
1358 been matched. If a match is found and CONSUME is true, the input
1359 is discarded; otherwise any characters read are pushed back again.
1360 The function is used only when multicharacter quotes or comment
1361 delimiters are used.
1363 All strings herein should be unsigned. Otherwise sign-extension
1364 of individual chars might break quotes with 8-bit chars in it.
1366 FIXME - when matching multiquotes that cross file boundaries, we do
1367 not properly restore the current input file and line when we
1368 restore unconsumed characters. */
1369 static bool
1370 match_input (m4 *context, const char *s, size_t len, bool consume)
1372 int n; /* number of characters matched */
1373 int ch; /* input character */
1374 const char *t;
1375 m4_obstack *st;
1376 bool result = false;
1378 if (consume)
1380 s++;
1381 len--;
1383 assert (len);
1384 ch = peek_char (context, false);
1385 if (ch != to_uchar (*s))
1386 return false; /* fail */
1388 if (len == 1)
1390 if (consume)
1391 next_char (context, false, false, false);
1392 return true; /* short match */
1395 next_char (context, false, false, false);
1396 for (n = 1, t = s++; (ch = peek_char (context, false)) == to_uchar (*s++); )
1398 next_char (context, false, false, false);
1399 n++;
1400 if (--len == 1) /* long match */
1402 if (consume)
1403 return true;
1404 result = true;
1405 break;
1409 /* Failed or shouldn't consume, push back input. */
1410 st = m4_push_string_init (context, m4_get_current_file (context),
1411 m4_get_current_line (context));
1412 obstack_grow (st, t, n);
1413 m4_push_string_finish ();
1414 return result;
1417 /* The macro MATCH() is used to match a string S of length LEN against
1418 the input. The first character is handled inline for speed, and
1419 S[LEN] must be safe to dereference (it is faster to do character
1420 comparison prior to length checks). This improves efficiency for
1421 the common case of single character quotes and comment delimiters,
1422 while being safe for disabled delimiters as well as longer
1423 delimiters. If CONSUME, then CH is the result of next_char, and a
1424 successful match will discard the matched string. Otherwise, CH is
1425 the result of peek_char, and the input stream is effectively
1426 unchanged. */
1427 #define MATCH(C, ch, s, len, consume) \
1428 (to_uchar ((s)[0]) == (ch) \
1429 && ((len) >> 1 ? match_input (C, s, len, consume) : (len)))
1431 /* While the current input character has the given SYNTAX, append it
1432 to OBS. Take care not to pop input source unless the next source
1433 would continue the chain. Return true if the chain ended with
1434 CHAR_EOF. */
1435 static bool
1436 consume_syntax (m4 *context, m4_obstack *obs, unsigned int syntax)
1438 int ch;
1439 bool allow = m4__safe_quotes (M4SYNTAX);
1440 assert (syntax);
1441 while (1)
1443 /* It is safe to call next_char without first checking
1444 peek_char, except at input source boundaries, which we detect
1445 by CHAR_RETRY. We exploit the fact that CHAR_EOF,
1446 CHAR_BUILTIN, CHAR_QUOTE, and CHAR_ARGV do not satisfy any
1447 syntax categories. */
1448 while ((ch = next_char (context, allow, allow, true)) != CHAR_RETRY
1449 && m4_has_syntax (M4SYNTAX, ch, syntax))
1451 assert (ch < CHAR_EOF);
1452 obstack_1grow (obs, ch);
1454 if (ch == CHAR_RETRY || ch == CHAR_QUOTE || ch == CHAR_ARGV)
1456 ch = peek_char (context, false);
1457 if (m4_has_syntax (M4SYNTAX, ch, syntax))
1459 assert (ch < CHAR_EOF);
1460 obstack_1grow (obs, ch);
1461 next_char (context, false, false, false);
1462 continue;
1464 return ch == CHAR_EOF;
1466 unget_input (ch);
1467 return false;
1472 /* Initialize input stacks. */
1473 void
1474 m4_input_init (m4 *context)
1476 obstack_init (&file_names);
1477 m4_set_current_file (context, NULL);
1478 m4_set_current_line (context, 0);
1480 current_input = (m4_obstack *) xmalloc (sizeof *current_input);
1481 obstack_init (current_input);
1482 wrapup_stack = (m4_obstack *) xmalloc (sizeof *wrapup_stack);
1483 obstack_init (wrapup_stack);
1485 /* Allocate an object in the current chunk, so that obstack_free
1486 will always work even if the first token parsed spills to a new
1487 chunk. */
1488 obstack_init (&token_stack);
1489 token_bottom = obstack_finish (&token_stack);
1491 isp = &input_eof;
1492 wsp = &input_eof;
1493 next = NULL;
1495 start_of_input_line = false;
1498 /* Free memory used by the input engine. */
1499 void
1500 m4_input_exit (void)
1502 assert (!current_input && isp == &input_eof);
1503 assert (!wrapup_stack && wsp == &input_eof);
1504 obstack_free (&file_names, NULL);
1505 obstack_free (&token_stack, NULL);
1509 /* Parse and return a single token from the input stream, constructed
1510 into TOKEN. See m4__token_type for the valid return types, along
1511 with a description of what TOKEN will contain. If LINE is not
1512 NULL, set *LINE to the line number where the token starts. If OBS,
1513 expand safe tokens (strings and comments) directly into OBS rather
1514 than in a temporary staging area. If ALLOW_ARGV, OBS must be
1515 non-NULL, and an entire series of arguments can be returned if a $@
1516 reference is encountered. Report errors (unterminated comments or
1517 strings) on behalf of CALLER, if non-NULL.
1519 If OBS is NULL or the token expansion is unknown, the token text is
1520 collected on the obstack token_stack, which never contains more
1521 than one token text at a time. The storage pointed to by the
1522 fields in TOKEN is therefore subject to change the next time
1523 m4__next_token () is called. */
1524 m4__token_type
1525 m4__next_token (m4 *context, m4_symbol_value *token, int *line,
1526 m4_obstack *obs, bool allow_argv, const m4_call_info *caller)
1528 int ch;
1529 int quote_level;
1530 m4__token_type type;
1531 const char *file = NULL;
1532 size_t len;
1533 /* The obstack where token data is stored. Generally token_stack,
1534 for tokens where argument collection might not use the literal
1535 token. But for comments and strings, we can output directly into
1536 the argument collection obstack OBS, if provided. */
1537 m4_obstack *obs_safe = &token_stack;
1539 assert (next == NULL);
1540 memset (token, '\0', sizeof *token);
1541 do {
1542 obstack_free (&token_stack, token_bottom);
1544 /* Must consume an input character. */
1545 ch = next_char (context, false, allow_argv && m4__quote_age (M4SYNTAX),
1546 false);
1547 if (line)
1549 *line = m4_get_current_line (context);
1550 file = m4_get_current_file (context);
1552 if (ch == CHAR_EOF) /* EOF */
1554 #ifdef DEBUG_INPUT
1555 xfprintf (stderr, "next_token -> EOF\n");
1556 #endif
1557 return M4_TOKEN_EOF;
1560 if (ch == CHAR_BUILTIN) /* BUILTIN TOKEN */
1562 init_builtin_token (context, obs, token);
1563 #ifdef DEBUG_INPUT
1564 m4_print_token (context, "next_token", M4_TOKEN_MACDEF, token);
1565 #endif
1566 return M4_TOKEN_MACDEF;
1568 if (ch == CHAR_ARGV)
1570 init_argv_symbol (context, obs, token);
1571 #ifdef DEBUG_INPUT
1572 m4_print_token (context, "next_token", M4_TOKEN_ARGV, token);
1573 #endif
1574 return M4_TOKEN_ARGV;
1577 if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_ESCAPE))
1578 { /* ESCAPED WORD */
1579 obstack_1grow (&token_stack, ch);
1580 if ((ch = next_char (context, false, false, false)) < CHAR_EOF)
1582 obstack_1grow (&token_stack, ch);
1583 if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_ALPHA))
1584 consume_syntax (context, &token_stack,
1585 M4_SYNTAX_ALPHA | M4_SYNTAX_NUM);
1586 type = M4_TOKEN_WORD;
1588 else
1589 type = M4_TOKEN_SIMPLE; /* escape before eof */
1591 else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_ALPHA))
1593 type = (m4_is_syntax_macro_escaped (M4SYNTAX)
1594 ? M4_TOKEN_STRING : M4_TOKEN_WORD);
1595 if (type == M4_TOKEN_STRING && obs)
1596 obs_safe = obs;
1597 obstack_1grow (obs_safe, ch);
1598 consume_syntax (context, obs_safe, M4_SYNTAX_ALPHA | M4_SYNTAX_NUM);
1600 else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_LQUOTE))
1601 { /* QUOTED STRING, SINGLE QUOTES */
1602 if (obs)
1603 obs_safe = obs;
1604 quote_level = 1;
1605 type = M4_TOKEN_STRING;
1606 while (1)
1608 ch = next_char (context, obs && m4__quote_age (M4SYNTAX), false,
1609 false);
1610 if (ch == CHAR_EOF)
1612 if (!caller)
1614 assert (line);
1615 m4_set_current_file (context, file);
1616 m4_set_current_line (context, *line);
1618 m4_error (context, EXIT_FAILURE, 0, caller,
1619 _("end of file in string"));
1621 if (ch == CHAR_BUILTIN)
1622 init_builtin_token (context, obs, obs ? token : NULL);
1623 else if (ch == CHAR_QUOTE)
1624 append_quote_token (context, obs, token);
1625 else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_RQUOTE))
1627 if (--quote_level == 0)
1628 break;
1629 obstack_1grow (obs_safe, ch);
1631 else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_LQUOTE))
1633 quote_level++;
1634 obstack_1grow (obs_safe, ch);
1636 else
1637 obstack_1grow (obs_safe, ch);
1640 else if (!m4_is_syntax_single_quotes (M4SYNTAX)
1641 && MATCH (context, ch, context->syntax->quote.str1,
1642 context->syntax->quote.len1, true))
1643 { /* QUOTED STRING, LONGER QUOTES */
1644 if (obs)
1645 obs_safe = obs;
1646 quote_level = 1;
1647 type = M4_TOKEN_STRING;
1648 assert (!m4__quote_age (M4SYNTAX));
1649 while (1)
1651 ch = next_char (context, false, false, false);
1652 if (ch == CHAR_EOF)
1654 if (!caller)
1656 assert (line);
1657 m4_set_current_file (context, file);
1658 m4_set_current_line (context, *line);
1660 m4_error (context, EXIT_FAILURE, 0, caller,
1661 _("end of file in string"));
1663 if (ch == CHAR_BUILTIN)
1664 init_builtin_token (context, obs, obs ? token : NULL);
1665 else if (MATCH (context, ch, context->syntax->quote.str2,
1666 context->syntax->quote.len2, true))
1668 if (--quote_level == 0)
1669 break;
1670 obstack_grow (obs_safe, context->syntax->quote.str2,
1671 context->syntax->quote.len2);
1673 else if (MATCH (context, ch, context->syntax->quote.str1,
1674 context->syntax->quote.len1, true))
1676 quote_level++;
1677 obstack_grow (obs_safe, context->syntax->quote.str1,
1678 context->syntax->quote.len2);
1680 else
1681 obstack_1grow (obs_safe, ch);
1684 else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_BCOMM))
1685 { /* COMMENT, SHORT DELIM */
1686 if (obs && !m4_get_discard_comments_opt (context))
1687 obs_safe = obs;
1688 obstack_1grow (obs_safe, ch);
1689 while (1)
1691 ch = next_char (context, false, false, false);
1692 if (ch == CHAR_EOF)
1694 if (!caller)
1696 assert (line);
1697 m4_set_current_file (context, file);
1698 m4_set_current_line (context, *line);
1700 m4_error (context, EXIT_FAILURE, 0, caller,
1701 _("end of file in comment"));
1703 if (ch == CHAR_BUILTIN)
1705 init_builtin_token (context, NULL, NULL);
1706 continue;
1708 if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_ECOMM))
1710 obstack_1grow (obs_safe, ch);
1711 break;
1713 assert (ch < CHAR_EOF);
1714 obstack_1grow (obs_safe, ch);
1716 type = (m4_get_discard_comments_opt (context)
1717 ? M4_TOKEN_NONE : M4_TOKEN_COMMENT);
1719 else if (!m4_is_syntax_single_comments (M4SYNTAX)
1720 && MATCH (context, ch, context->syntax->comm.str1,
1721 context->syntax->comm.len1, true))
1722 { /* COMMENT, LONGER DELIM */
1723 if (obs && !m4_get_discard_comments_opt (context))
1724 obs_safe = obs;
1725 obstack_grow (obs_safe, context->syntax->comm.str1,
1726 context->syntax->comm.len1);
1727 while (1)
1729 ch = next_char (context, false, false, false);
1730 if (ch == CHAR_EOF)
1732 if (!caller)
1734 assert (line);
1735 m4_set_current_file (context, file);
1736 m4_set_current_line (context, *line);
1738 m4_error (context, EXIT_FAILURE, 0, caller,
1739 _("end of file in comment"));
1741 if (ch == CHAR_BUILTIN)
1743 init_builtin_token (context, NULL, NULL);
1744 continue;
1746 if (MATCH (context, ch, context->syntax->comm.str2,
1747 context->syntax->comm.len2, true))
1749 obstack_grow (obs_safe, context->syntax->comm.str2,
1750 context->syntax->comm.len2);
1751 break;
1753 assert (ch < CHAR_EOF);
1754 obstack_1grow (obs_safe, ch);
1756 type = (m4_get_discard_comments_opt (context)
1757 ? M4_TOKEN_NONE : M4_TOKEN_COMMENT);
1759 else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_ACTIVE))
1760 { /* ACTIVE CHARACTER */
1761 obstack_1grow (&token_stack, ch);
1762 type = M4_TOKEN_WORD;
1764 else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_OPEN))
1765 { /* OPEN PARENTHESIS */
1766 obstack_1grow (&token_stack, ch);
1767 type = M4_TOKEN_OPEN;
1769 else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_COMMA))
1770 { /* COMMA */
1771 obstack_1grow (&token_stack, ch);
1772 type = M4_TOKEN_COMMA;
1774 else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_CLOSE))
1775 { /* CLOSE PARENTHESIS */
1776 obstack_1grow (&token_stack, ch);
1777 type = M4_TOKEN_CLOSE;
1779 else if (m4_is_syntax_single_quotes (M4SYNTAX)
1780 && m4_is_syntax_single_comments (M4SYNTAX))
1781 { /* EVERYTHING ELSE (SHORT QUOTES AND COMMENTS) */
1782 assert (ch < CHAR_EOF);
1783 obstack_1grow (&token_stack, ch);
1785 if (m4_has_syntax (M4SYNTAX, ch,
1786 (M4_SYNTAX_OTHER | M4_SYNTAX_NUM | M4_SYNTAX_DOLLAR
1787 | M4_SYNTAX_LBRACE | M4_SYNTAX_RBRACE)))
1789 if (obs)
1791 obs_safe = obs;
1792 obstack_1grow (obs, ch);
1794 consume_syntax (context, obs_safe,
1795 (M4_SYNTAX_OTHER | M4_SYNTAX_NUM
1796 | M4_SYNTAX_DOLLAR | M4_SYNTAX_LBRACE
1797 | M4_SYNTAX_RBRACE));
1798 type = M4_TOKEN_STRING;
1800 else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_SPACE))
1802 /* Coalescing newlines when interactive or when synclines
1803 are enabled is wrong. */
1804 if (!m4_get_interactive_opt (context)
1805 && !m4_get_syncoutput_opt (context))
1806 consume_syntax (context, &token_stack, M4_SYNTAX_SPACE);
1807 type = M4_TOKEN_SPACE;
1809 else
1810 type = M4_TOKEN_SIMPLE;
1812 else /* EVERYTHING ELSE (LONG QUOTES OR COMMENTS) */
1814 assert (ch < CHAR_EOF);
1815 obstack_1grow (&token_stack, ch);
1817 if (m4_has_syntax (M4SYNTAX, ch,
1818 (M4_SYNTAX_OTHER | M4_SYNTAX_NUM | M4_SYNTAX_DOLLAR
1819 | M4_SYNTAX_LBRACE | M4_SYNTAX_RBRACE)))
1821 if (obs)
1823 obs_safe = obs;
1824 obstack_1grow (obs, ch);
1826 type = M4_TOKEN_STRING;
1828 else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_SPACE))
1829 type = M4_TOKEN_SPACE;
1830 else
1831 type = M4_TOKEN_SIMPLE;
1833 } while (type == M4_TOKEN_NONE);
1835 if (token->type == M4_SYMBOL_VOID)
1837 if (obs_safe != obs)
1839 len = obstack_object_size (&token_stack);
1840 obstack_1grow (&token_stack, '\0');
1842 m4_set_symbol_value_text (token, obstack_finish (&token_stack), len,
1843 m4__quote_age (M4SYNTAX));
1845 else
1846 assert (type == M4_TOKEN_STRING || type == M4_TOKEN_COMMENT);
1848 else
1849 assert (token->type == M4_SYMBOL_COMP
1850 && (type == M4_TOKEN_STRING || type == M4_TOKEN_COMMENT));
1851 VALUE_MAX_ARGS (token) = -1;
1853 #ifdef DEBUG_INPUT
1854 if (token->type == M4_SYMBOL_VOID)
1856 len = obstack_object_size (&token_stack);
1857 obstack_1grow (&token_stack, '\0');
1859 m4_set_symbol_value_text (token, obstack_finish (&token_stack), len,
1860 m4__quote_age (M4SYNTAX));
1863 m4_print_token (context, "next_token", type, token);
1864 #endif
1866 return type;
1869 /* Peek at the next token in the input stream to see if it is an open
1870 parenthesis. It is possible that what is peeked at may change as a
1871 result of changequote (or friends). This honors multi-character
1872 comments and quotes, just as next_token does. */
1873 bool
1874 m4__next_token_is_open (m4 *context)
1876 int ch = peek_char (context, false);
1878 if (ch == CHAR_EOF || ch == CHAR_BUILTIN
1879 || m4_has_syntax (M4SYNTAX, ch, (M4_SYNTAX_BCOMM | M4_SYNTAX_ESCAPE
1880 | M4_SYNTAX_ALPHA | M4_SYNTAX_LQUOTE
1881 | M4_SYNTAX_ACTIVE))
1882 || (!m4_is_syntax_single_comments (M4SYNTAX)
1883 && MATCH (context, ch, context->syntax->comm.str1,
1884 context->syntax->comm.len1, false))
1885 || (!m4_is_syntax_single_quotes (M4SYNTAX)
1886 && MATCH (context, ch, context->syntax->quote.str1,
1887 context->syntax->quote.len1, false)))
1888 return false;
1889 return m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_OPEN);
1893 #ifdef DEBUG_INPUT
1896 m4_print_token (m4 *context, const char *s, m4__token_type type,
1897 m4_symbol_value *token)
1899 m4_obstack obs;
1900 size_t len;
1902 if (!s)
1903 s = "m4input";
1904 xfprintf (stderr, "%s: ", s);
1905 switch (type)
1906 { /* TOKSW */
1907 case M4_TOKEN_EOF:
1908 fputs ("eof", stderr);
1909 token = NULL;
1910 break;
1911 case M4_TOKEN_NONE:
1912 fputs ("none", stderr);
1913 token = NULL;
1914 break;
1915 case M4_TOKEN_STRING:
1916 fputs ("string\t", stderr);
1917 break;
1918 case M4_TOKEN_COMMENT:
1919 fputs ("comment\t", stderr);
1920 break;
1921 case M4_TOKEN_SPACE:
1922 fputs ("space\t", stderr);
1923 break;
1924 case M4_TOKEN_WORD:
1925 fputs ("word\t", stderr);
1926 break;
1927 case M4_TOKEN_OPEN:
1928 fputs ("open\t", stderr);
1929 break;
1930 case M4_TOKEN_COMMA:
1931 fputs ("comma\t", stderr);
1932 break;
1933 case M4_TOKEN_CLOSE:
1934 fputs ("close\t", stderr);
1935 break;
1936 case M4_TOKEN_SIMPLE:
1937 fputs ("simple\t", stderr);
1938 break;
1939 case M4_TOKEN_MACDEF:
1940 fputs ("builtin\t", stderr);
1941 break;
1942 case M4_TOKEN_ARGV:
1943 fputs ("argv\t", stderr);
1944 break;
1945 default:
1946 abort ();
1948 if (token)
1950 obstack_init (&obs);
1951 m4__symbol_value_print (context, token, &obs, NULL, false, NULL, NULL,
1952 true);
1953 len = obstack_object_size (&obs);
1954 xfprintf (stderr, "%s\n", quotearg_style_mem (c_maybe_quoting_style,
1955 obstack_finish (&obs),
1956 len));
1957 obstack_free (&obs, NULL);
1959 else
1960 fputc ('\n', stderr);
1961 return 0;
1963 #endif /* DEBUG_INPUT */