1 /* GNU m4 -- A simple macro processor
3 Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2004, 2005, 2006, 2007,
4 2008 Free Software Foundation, Inc.
6 This file is part of GNU M4.
8 GNU M4 is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 GNU M4 is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 /* Handling of different input sources, and lexical analysis. */
26 /* Unread input can be either files to be read (command line,
27 "include", "sinclude"), strings which should be rescanned (macro
28 expansion text), or quoted macro definitions (as returned by the
29 builtin "defn"). Unread input is organized in a stack, implemented
30 with an obstack. Each input source is described by a "struct
31 input_block". The obstack is "current_input". The top of the
34 The macro "m4wrap" places the text to be saved on another input
35 stack, on the obstack "wrapup_stack", whose top is "wsp". When EOF
36 is seen on normal input (eg, when "current_input" is empty), input is
37 switched over to "wrapup_stack", and the original "current_input" is
38 freed. A new stack is allocated for "wrapup_stack", which will
39 accept any text produced by calls to "m4wrap" from within the
40 wrapped text. This process of shuffling "wrapup_stack" to
41 "current_input" can continue indefinitely, even generating infinite
42 loops (e.g. "define(`f',`m4wrap(`f')')f"), without memory leaks.
44 Pushing new input on the input stack is done by push_file (),
45 push_string (), push_wrapup_init/push_wrapup_finish () (for wrapup
46 text), and push_macro () (for macro definitions). Because macro
47 expansion needs direct access to the current input obstack (for
48 optimization), push_string () is split in two functions,
49 push_string_init (), which returns a pointer to the current input
50 stack, and push_string_finish (), which returns a pointer to the
51 final text. The input_block *next is used to manage the
52 coordination between the different push routines.
54 The current file and line number are stored in two global
55 variables, for use by the error handling functions in m4.c. Macro
56 expansion wants to report the line where a macro name was detected,
57 rather than where it finished collecting arguments. This also
58 applies to text resulting from macro expansions. So each input
59 block maintains its own notion of the current file and line, and
60 swapping between input blocks updates the global variables
63 #ifdef ENABLE_CHANGEWORD
65 #endif /* ENABLE_CHANGEWORD */
67 /* Number of bytes where it is more efficient to inline the reference
68 as a string than it is to track reference bookkeeping for those
70 #define INPUT_INLINE_THRESHOLD 16
72 /* Type of an input block. */
75 INPUT_STRING
, /* String resulting from macro expansion. */
76 INPUT_FILE
, /* File from command line or include. */
77 INPUT_MACRO
, /* Builtin resulting from defn. */
78 INPUT_CHAIN
/* FIFO chain of separate strings and $@ refs. */
81 typedef enum input_type input_type
;
83 /* A block of input to be scanned. */
86 input_block
*prev
; /* Previous input_block on the input stack. */
87 input_type type
; /* See enum values. */
88 const char *file
; /* File where this input is from. */
89 int line
; /* Line where this input is from. */
94 char *str
; /* Remaining string value. */
95 size_t len
; /* Remaining length. */
97 u_s
; /* INPUT_STRING */
100 FILE *fp
; /* Input file handle. */
101 bool_bitfield end
: 1; /* True if peek has seen EOF. */
102 bool_bitfield close
: 1; /* True to close file on pop. */
103 bool_bitfield advance
: 1; /* Track previous start_of_input_line. */
105 u_f
; /* INPUT_FILE */
106 builtin_func
*func
; /* INPUT_MACRO */
109 token_chain
*chain
; /* Current link in chain. */
110 token_chain
*end
; /* Last link in chain. */
112 u_c
; /* INPUT_CHAIN */
118 /* Current input file name. */
119 const char *current_file
;
121 /* Current input line number. */
124 /* Obstack for storing individual tokens. */
125 static struct obstack token_stack
;
127 /* Obstack for storing file names. */
128 static struct obstack file_names
;
130 /* Wrapup input stack. */
131 static struct obstack
*wrapup_stack
;
133 /* Current stack, from input or wrapup. */
134 static struct obstack
*current_input
;
136 /* Bottom of token_stack, for obstack_free. */
137 static void *token_bottom
;
139 /* Pointer to top of current_input. */
140 static input_block
*isp
;
142 /* Pointer to top of wrapup_stack. */
143 static input_block
*wsp
;
145 /* Aux. for handling split push_string (). */
146 static input_block
*next
;
148 /* Flag for next_char () to increment current_line. */
149 static bool start_of_input_line
;
151 /* Flag for next_char () to recognize change in input block. */
152 static bool input_change
;
154 #define CHAR_EOF (UCHAR_MAX + 1) /* Return on EOF. */
155 #define CHAR_MACRO (UCHAR_MAX + 2) /* Return for MACRO token. */
156 #define CHAR_QUOTE (UCHAR_MAX + 3) /* Return for quoted string. */
157 #define CHAR_ARGV (UCHAR_MAX + 4) /* Return for $@ reference. */
160 string_pair curr_quote
;
163 string_pair curr_comm
;
165 #ifdef ENABLE_CHANGEWORD
167 # define DEFAULT_WORD_REGEXP "[_a-zA-Z][_a-zA-Z0-9]*"
169 /* Current regular expression for detecting words. */
170 static struct re_pattern_buffer word_regexp
;
172 /* True if changeword is not active. */
173 static bool default_word_regexp
;
175 /* Reused memory for detecting matches in word detection. */
176 static struct re_registers regs
;
178 #else /* !ENABLE_CHANGEWORD */
179 # define default_word_regexp true
180 #endif /* !ENABLE_CHANGEWORD */
182 /* Track the current quote age, determined by all significant
183 changequote, changecom, and changeword calls, since any one of
184 these can alter the rescan of a prior parameter in a quoted
186 static unsigned int current_quote_age
;
188 /* Cache a quote pair. See quote_cache. */
189 static string_pair
*cached_quote
;
191 static bool pop_input (bool);
192 static void set_quote_age (void);
195 static const char *token_type_string (token_type
);
196 #endif /* DEBUG_INPUT */
199 /*-------------------------------------------------------------------.
200 | Given an obstack OBS, capture any unfinished text as a link in the |
201 | chain that starts at *START and ends at *END. START may be NULL |
202 | if *END is non-NULL. |
203 `-------------------------------------------------------------------*/
205 make_text_link (struct obstack
*obs
, token_chain
**start
, token_chain
**end
)
208 size_t len
= obstack_object_size (obs
);
210 assert (end
&& (start
|| *end
));
213 char *str
= (char *) obstack_finish (obs
);
214 chain
= (token_chain
*) obstack_alloc (obs
, sizeof *chain
);
216 (*end
)->next
= chain
;
221 chain
->type
= CHAIN_STR
;
222 chain
->quote_age
= 0;
223 chain
->u
.u_s
.str
= str
;
224 chain
->u
.u_s
.len
= len
;
225 chain
->u
.u_s
.level
= -1;
229 /*-------------------------------------------------------------------.
230 | push_file () pushes an input file on the input stack, saving the |
231 | current file name and line number. If next is non-NULL, this push |
232 | invalidates a call to push_string_init (), whose storage is |
233 | consequently released. If CLOSE, then close FP after EOF is |
234 | detected. TITLE is used as the location for text parsed from the |
235 | file (not necessarily the file name). |
236 `-------------------------------------------------------------------*/
239 push_file (FILE *fp
, const char *title
, bool close
)
245 obstack_free (current_input
, next
);
249 if (debug_level
& DEBUG_TRACE_INPUT
)
250 DEBUG_MESSAGE1 ("input read from %s", title
);
252 i
= (input_block
*) obstack_alloc (current_input
, sizeof *i
);
253 i
->type
= INPUT_FILE
;
254 i
->file
= (char *) obstack_copy0 (&file_names
, title
, strlen (title
));
259 i
->u
.u_f
.end
= false;
260 i
->u
.u_f
.close
= close
;
261 i
->u
.u_f
.advance
= start_of_input_line
;
262 output_current_line
= -1;
268 /*-----------------------------------------------------------------.
269 | push_macro () pushes the builtin macro FUNC on the input stack. |
270 | If next is non-NULL, this push invalidates a call to |
271 | push_string_init (), whose storage is consequently released. |
272 `-----------------------------------------------------------------*/
275 push_macro (builtin_func
*func
)
281 obstack_free (current_input
, next
);
286 i
= (input_block
*) obstack_alloc (current_input
, sizeof *i
);
287 i
->type
= INPUT_MACRO
;
288 i
->file
= current_file
;
289 i
->line
= current_line
;
297 /*--------------------------------------------------------------.
298 | First half of push_string (). The return value points to the |
299 | obstack where expansion text should be placed. |
300 `--------------------------------------------------------------*/
303 push_string_init (void)
305 /* Free any memory occupied by completely parsed strings. */
306 assert (next
== NULL
);
307 while (isp
&& pop_input (false));
309 /* Reserve the next location on the obstack. */
310 next
= (input_block
*) obstack_alloc (current_input
, sizeof *next
);
311 next
->type
= INPUT_STRING
;
312 next
->file
= current_file
;
313 next
->line
= current_line
;
315 return current_input
;
318 /*--------------------------------------------------------------------.
319 | This function allows gathering input from multiple locations, |
320 | rather than copying everything consecutively onto the input stack. |
321 | Must be called between push_string_init and push_string_finish. |
323 | If TOKEN contains text, then convert the current input block into |
324 | a chain if it is not one already, and add the contents of TOKEN as |
325 | a new link in the chain. LEVEL describes the current expansion |
326 | level, or -1 if TOKEN is composite, its contents reside entirely |
327 | on the current_input stack, and TOKEN lives in temporary storage. |
328 | If TOKEN is a simple string, then it belongs to the current macro |
329 | expansion. If TOKEN is composite, then each text link has a level |
330 | of -1 if it belongs to the current macro expansion, otherwise it |
331 | is a back-reference where level tracks which stack it came from. |
332 | The resulting input block chain contains links with a level of -1 |
333 | if the text belongs to the input stack, otherwise the level where |
334 | the back-reference comes from. |
336 | Return true only if a reference was created to the contents of |
337 | TOKEN, in which case, LEVEL was non-negative and the lifetime of |
338 | TOKEN and its contents must last as long as the input engine can |
339 | parse references to it. INUSE determines whether composite tokens |
340 | should favor creating back-references or copying text. |
341 `--------------------------------------------------------------------*/
343 push_token (token_data
*token
, int level
, bool inuse
)
345 token_chain
*src_chain
= NULL
;
350 /* Speed consideration - for short enough tokens, the speed and
351 memory overhead of parsing another INPUT_CHAIN link outweighs the
352 time to inline the token text. But don't re-copy text if it
353 already lives on the obstack. */
354 if (TOKEN_DATA_TYPE (token
) == TOKEN_TEXT
)
357 if (TOKEN_DATA_LEN (token
) <= INPUT_INLINE_THRESHOLD
)
359 obstack_grow (current_input
, TOKEN_DATA_TEXT (token
),
360 TOKEN_DATA_LEN (token
));
366 /* For composite tokens, if argv is already in use, creating
367 additional references for long text segments is more
368 efficient in time. But if argv is not yet in use, and we
369 have a composite token, then the token must already contain a
370 back-reference, and memory usage is more efficient if we can
371 avoid using the current expand_macro, even if it means larger
373 assert (TOKEN_DATA_TYPE (token
) == TOKEN_COMP
);
374 src_chain
= token
->u
.u_c
.chain
;
375 while (level
>= 0 && src_chain
&& src_chain
->type
== CHAIN_STR
376 && (src_chain
->u
.u_s
.len
<= INPUT_INLINE_THRESHOLD
377 || (!inuse
&& src_chain
->u
.u_s
.level
== -1)))
379 obstack_grow (current_input
, src_chain
->u
.u_s
.str
,
380 src_chain
->u
.u_s
.len
);
381 src_chain
= src_chain
->next
;
387 if (next
->type
== INPUT_STRING
)
389 next
->type
= INPUT_CHAIN
;
390 next
->u
.u_c
.chain
= next
->u
.u_c
.end
= NULL
;
392 make_text_link (current_input
, &next
->u
.u_c
.chain
, &next
->u
.u_c
.end
);
393 if (TOKEN_DATA_TYPE (token
) == TOKEN_TEXT
)
395 chain
= (token_chain
*) obstack_alloc (current_input
, sizeof *chain
);
397 next
->u
.u_c
.end
->next
= chain
;
399 next
->u
.u_c
.chain
= chain
;
400 next
->u
.u_c
.end
= chain
;
402 chain
->type
= CHAIN_STR
;
403 chain
->quote_age
= TOKEN_DATA_QUOTE_AGE (token
);
404 chain
->u
.u_s
.str
= TOKEN_DATA_TEXT (token
);
405 chain
->u
.u_s
.len
= TOKEN_DATA_LEN (token
);
406 chain
->u
.u_s
.level
= level
;
407 adjust_refcount (level
, true);
414 /* Nothing to copy, since link already lives on obstack. */
415 assert (src_chain
->type
!= CHAIN_STR
416 || src_chain
->u
.u_s
.level
== -1);
421 /* Allow inlining the final link with subsequent text. */
422 if (!src_chain
->next
&& src_chain
->type
== CHAIN_STR
423 && (src_chain
->u
.u_s
.len
<= INPUT_INLINE_THRESHOLD
424 || (!inuse
&& src_chain
->u
.u_s
.level
== -1)))
426 obstack_grow (current_input
, src_chain
->u
.u_s
.str
,
427 src_chain
->u
.u_s
.len
);
430 /* We must clone each link in the chain, since next_char
431 destructively modifies the chain it is parsing. */
432 chain
= (token_chain
*) obstack_copy (current_input
, src_chain
,
434 if (chain
->type
== CHAIN_STR
&& chain
->u
.u_s
.level
== -1)
436 if (chain
->u
.u_s
.len
<= INPUT_INLINE_THRESHOLD
|| !inuse
)
437 chain
->u
.u_s
.str
= (char *) obstack_copy (current_input
,
442 chain
->u
.u_s
.level
= level
;
448 next
->u
.u_c
.end
->next
= chain
;
450 next
->u
.u_c
.chain
= chain
;
451 next
->u
.u_c
.end
= chain
;
452 if (chain
->type
== CHAIN_ARGV
)
454 assert (!chain
->u
.u_a
.comma
&& !chain
->u
.u_a
.skip_last
);
455 inuse
|= arg_adjust_refcount (chain
->u
.u_a
.argv
, true);
457 else if (chain
->type
== CHAIN_STR
&& chain
->u
.u_s
.level
>= 0)
458 adjust_refcount (chain
->u
.u_s
.level
, true);
459 src_chain
= src_chain
->next
;
464 /*-------------------------------------------------------------------.
465 | Last half of push_string (). If next is now NULL, a call to |
466 | push_file () or push_macro () has invalidated the previous call to |
467 | push_string_init (), so we just give up. If the new object is |
468 | void, we do not push it. The function push_string_finish () |
469 | returns an opaque pointer to the finished object, which can then |
470 | be printed with input_print when tracing is enabled. This pointer |
471 | is only for temporary use, since reading the next token will |
472 | invalidate the object. |
473 `-------------------------------------------------------------------*/
476 push_string_finish (void)
478 input_block
*ret
= NULL
;
479 size_t len
= obstack_object_size (current_input
);
487 if (len
|| next
->type
== INPUT_CHAIN
)
489 if (next
->type
== INPUT_STRING
)
491 next
->u
.u_s
.str
= (char *) obstack_finish (current_input
);
492 next
->u
.u_s
.len
= len
;
495 make_text_link (current_input
, &next
->u
.u_c
.chain
, &next
->u
.u_c
.end
);
502 obstack_free (current_input
, next
);
507 /*--------------------------------------------------------------.
508 | The function push_wrapup_init () returns an obstack ready for |
509 | direct expansion of wrapup text, and should be followed by |
510 | push_wrapup_finish (). |
511 `--------------------------------------------------------------*/
514 push_wrapup_init (void)
517 i
= (input_block
*) obstack_alloc (wrapup_stack
, sizeof *i
);
519 i
->type
= INPUT_STRING
;
520 i
->file
= current_file
;
521 i
->line
= current_line
;
526 /*---------------------------------------------------------------.
527 | After pushing wrapup text, push_wrapup_finish () completes the |
529 `---------------------------------------------------------------*/
531 push_wrapup_finish (void)
533 input_block
*i
= wsp
;
534 if (obstack_object_size (wrapup_stack
) == 0)
537 obstack_free (wrapup_stack
, i
);
541 i
->u
.u_s
.len
= obstack_object_size (wrapup_stack
);
542 i
->u
.u_s
.str
= (char *) obstack_finish (wrapup_stack
);
547 /*-------------------------------------------------------------------.
548 | The function pop_input () pops one level of input sources. If |
549 | CLEANUP, and the popped input_block is a file, current_file and |
550 | current_line are reset to the saved values before the memory for |
551 | the input_block is released. The return value is false if cleanup |
552 | is still required, or if the current input source is not |
554 `-------------------------------------------------------------------*/
557 pop_input (bool cleanup
)
559 input_block
*tmp
= isp
->prev
;
565 assert (!cleanup
|| !isp
->u
.u_s
.len
);
576 chain
= isp
->u
.u_c
.chain
;
577 assert (!chain
|| !cleanup
);
583 if (chain
->u
.u_s
.len
)
585 if (chain
->u
.u_s
.level
>= 0)
586 adjust_refcount (chain
->u
.u_s
.level
, false);
589 if (chain
->u
.u_a
.index
< arg_argc (chain
->u
.u_a
.argv
))
591 arg_adjust_refcount (chain
->u
.u_a
.argv
, false);
594 assert (!"pop_input");
597 isp
->u
.u_c
.chain
= chain
= chain
->next
;
604 if (debug_level
& DEBUG_TRACE_INPUT
)
607 DEBUG_MESSAGE2 ("input reverted to %s, line %d",
608 tmp
->file
, tmp
->line
);
610 DEBUG_MESSAGE ("input exhausted");
613 if (ferror (isp
->u
.u_f
.fp
))
615 m4_error (0, 0, NULL
, _("read error"));
616 if (isp
->u
.u_f
.close
)
617 fclose (isp
->u
.u_f
.fp
);
619 else if (isp
->u
.u_f
.close
&& fclose (isp
->u
.u_f
.fp
) == EOF
)
620 m4_error (0, errno
, NULL
, _("error reading file"));
621 start_of_input_line
= isp
->u
.u_f
.advance
;
622 output_current_line
= -1;
626 assert (!"pop_input");
629 obstack_free (current_input
, isp
);
631 next
= NULL
; /* might be set in push_string_init () */
638 /*------------------------------------------------------------------------.
639 | To switch input over to the wrapup stack, main () calls pop_wrapup (). |
640 | Since wrapup text can install new wrapup text, pop_wrapup () returns |
641 | false when there is no wrapup text on the stack, and true otherwise. |
642 `------------------------------------------------------------------------*/
648 obstack_free (current_input
, NULL
);
649 free (current_input
);
653 /* End of the program. Free all memory even though we are about
654 to exit, since it makes leak detection easier. */
655 obstack_free (&token_stack
, NULL
);
656 obstack_free (&file_names
, NULL
);
657 obstack_free (wrapup_stack
, NULL
);
659 #ifdef ENABLE_CHANGEWORD
660 regfree (&word_regexp
);
661 #endif /* ENABLE_CHANGEWORD */
665 current_input
= wrapup_stack
;
666 wrapup_stack
= (struct obstack
*) xmalloc (sizeof *wrapup_stack
);
667 obstack_init (wrapup_stack
);
676 /*--------------------------------------------------------------.
677 | Dump a representation of INPUT to the obstack OBS, for use in |
679 `--------------------------------------------------------------*/
681 input_print (struct obstack
*obs
, const input_block
*input
)
683 int maxlen
= max_debug_argument_length
;
690 shipout_string_trunc (obs
, input
->u
.u_s
.str
, input
->u
.u_s
.len
, &maxlen
);
693 obstack_grow (obs
, "<file: ", strlen ("<file: "));
694 obstack_grow (obs
, input
->file
, strlen (input
->file
));
695 obstack_1grow (obs
, '>');
698 func_print (obs
, find_builtin_by_addr (input
->u
.func
), false, NULL
);
701 chain
= input
->u
.u_c
.chain
;
707 if (shipout_string_trunc (obs
, chain
->u
.u_s
.str
,
708 chain
->u
.u_s
.len
, &maxlen
))
712 assert (!chain
->u
.u_a
.comma
);
713 if (arg_print (obs
, chain
->u
.u_a
.argv
, chain
->u
.u_a
.index
,
714 quote_cache (NULL
, chain
->quote_age
,
715 chain
->u
.u_a
.quotes
),
716 chain
->u
.u_a
.flatten
, NULL
, &maxlen
, false))
720 assert (!"input_print");
727 assert (!"input_print");
733 /*------------------------------------------------------------------.
734 | Low level input is done a character at a time. The function |
735 | peek_input () is used to look at the next character in the input |
736 | stream. At any given time, it reads from the input_block on the |
737 | top of the current input stack. The return value is an unsigned |
738 | char, CHAR_EOF if there is no more input, CHAR_MACRO if a builtin |
739 | token occurs next, or CHAR_ARGV if ALLOW_ARGV and the input is |
740 | visiting an argv reference with the correct quoting. |
741 `------------------------------------------------------------------*/
744 peek_input (bool allow_argv
)
747 input_block
*block
= isp
;
758 if (!block
->u
.u_s
.len
)
760 return to_uchar (block
->u
.u_s
.str
[0]);
763 ch
= getc (block
->u
.u_f
.fp
);
766 ungetc (ch
, block
->u
.u_f
.fp
);
769 block
->u
.u_f
.end
= true;
776 chain
= block
->u
.u_c
.chain
;
783 if (chain
->u
.u_s
.len
)
784 return to_uchar (*chain
->u
.u_s
.str
);
787 argc
= arg_argc (chain
->u
.u_a
.argv
);
788 if (chain
->u
.u_a
.index
== argc
)
790 if (chain
->u
.u_a
.comma
)
792 /* Only return a reference if the quoting is correct
793 and the reference has more than one argument
795 if (allow_argv
&& chain
->quote_age
== current_quote_age
796 && chain
->u
.u_a
.quotes
&& chain
->u
.u_a
.index
+ 1 < argc
)
798 /* Rather than directly parse argv here, we push
799 another input block containing the next unparsed
800 argument from argv. */
802 push_arg_quote (current_input
, chain
->u
.u_a
.argv
,
804 quote_cache (NULL
, chain
->quote_age
,
805 chain
->u
.u_a
.quotes
));
806 chain
->u
.u_a
.index
++;
807 chain
->u
.u_a
.comma
= true;
808 push_string_finish ();
809 return peek_input (allow_argv
);
811 assert (!"peek_input");
819 assert (!"peek_input");
826 /*-------------------------------------------------------------------.
827 | The function next_char () is used to read and advance the input to |
828 | the next character. It also manages line numbers for error |
829 | messages, so they do not get wrong due to lookahead. The token |
830 | consisting of a newline alone is taken as belonging to the line it |
831 | ends, and the current line number is not incremented until the |
832 | next character is read. 99.9% of all calls will read from a |
833 | string, so factor that out into a macro for speed. If |
834 | ALLOW_QUOTE, and the current input matches the current quote age, |
835 | return CHAR_QUOTE and leave consumption of data for |
836 | append_quote_token. |
837 `-------------------------------------------------------------------*/
839 #define next_char(AQ) \
840 (isp && isp->type == INPUT_STRING && isp->u.u_s.len && !input_change \
841 ? (isp->u.u_s.len--, to_uchar (*isp->u.u_s.str++)) \
845 next_char_1 (bool allow_quote
)
861 current_file
= isp
->file
;
862 current_line
= isp
->line
;
863 input_change
= false;
872 return to_uchar (*isp
->u
.u_s
.str
++);
875 if (start_of_input_line
)
877 start_of_input_line
= false;
878 current_line
= ++isp
->line
;
881 /* If stdin is a terminal, calling getc after peek_input
882 already called it would make the user have to hit ^D
884 ch
= isp
->u
.u_f
.end
? EOF
: getc (isp
->u
.u_f
.fp
);
888 start_of_input_line
= true;
894 /* INPUT_MACRO input sources has only one token */
899 chain
= isp
->u
.u_c
.chain
;
902 if (allow_quote
&& chain
->quote_age
== current_quote_age
)
907 if (chain
->u
.u_s
.len
)
909 /* Partial consumption invalidates quote age. */
910 chain
->quote_age
= 0;
912 return to_uchar (*chain
->u
.u_s
.str
++);
914 if (chain
->u
.u_s
.level
>= 0)
915 adjust_refcount (chain
->u
.u_s
.level
, false);
918 if (chain
->u
.u_a
.index
== arg_argc (chain
->u
.u_a
.argv
))
920 arg_adjust_refcount (chain
->u
.u_a
.argv
, false);
923 if (chain
->u
.u_a
.comma
)
925 chain
->u
.u_a
.comma
= false;
928 /* Rather than directly parse argv here, we push
929 another input block containing the next unparsed
930 argument from argv. */
932 push_arg_quote (current_input
, chain
->u
.u_a
.argv
,
934 quote_cache (NULL
, chain
->quote_age
,
935 chain
->u
.u_a
.quotes
));
936 chain
->u
.u_a
.index
++;
937 chain
->u
.u_a
.comma
= true;
938 push_string_finish ();
939 return next_char_1 (allow_quote
);
941 assert (!"next_char_1");
944 isp
->u
.u_c
.chain
= chain
= chain
->next
;
949 assert (!"next_char_1");
953 /* End of input source --- pop one level. */
958 /*-------------------------------------------------------------------.
959 | skip_line () simply discards all immediately following characters, |
960 | up to the first newline. It is only used from m4_dnl (). Report |
961 | warnings on behalf of NAME. |
962 `-------------------------------------------------------------------*/
965 skip_line (const char *name
)
968 const char *file
= current_file
;
969 int line
= current_line
;
971 while ((ch
= next_char (false)) != CHAR_EOF
&& ch
!= '\n')
974 /* current_file changed to "" if we see CHAR_EOF, use the
975 previous value we stored earlier. */
976 m4_warn_at_line (0, file
, line
, name
,
977 _("end of file treated as newline"));
978 /* On the rare occasion that dnl crosses include file boundaries
979 (either the input file did not end in a newline, or changeword
980 was used), calling next_char can update current_file and
981 current_line, and that update will be undone as we return to
982 expand_macro. This informs next_char to fix things again. */
983 if (file
!= current_file
|| line
!= current_line
)
987 /*-------------------------------------------------------------------.
988 | When a MACRO token is seen, next_token () uses init_macro_token () |
989 | to retrieve the value of the function pointer and store it in TD. |
990 `-------------------------------------------------------------------*/
993 init_macro_token (token_data
*td
)
995 assert (isp
->type
== INPUT_MACRO
);
996 TOKEN_DATA_TYPE (td
) = TOKEN_FUNC
;
997 TOKEN_DATA_FUNC (td
) = isp
->u
.func
;
1000 /*-------------------------------------------------------------------.
1001 | When a QUOTE token is seen, convert TD to a composite (if it is |
1002 | not one already), consisting of any unfinished text on OBS, as |
1003 | well as the quoted token from the top of the input stack. Use OBS |
1004 | for any additional allocations needed to store the token chain. |
1005 `-------------------------------------------------------------------*/
1007 append_quote_token (struct obstack
*obs
, token_data
*td
)
1009 token_chain
*src_chain
= isp
->u
.u_c
.chain
;
1012 assert (isp
->type
== INPUT_CHAIN
&& obs
&& current_quote_age
);
1013 isp
->u
.u_c
.chain
= src_chain
->next
;
1015 /* Speed consideration - for short enough tokens, the speed and
1016 memory overhead of parsing another INPUT_CHAIN link outweighs the
1017 time to inline the token text. */
1018 if (src_chain
->type
== CHAIN_STR
1019 && src_chain
->u
.u_s
.len
<= INPUT_INLINE_THRESHOLD
)
1021 assert (src_chain
->u
.u_s
.level
>= 0);
1022 obstack_grow (obs
, src_chain
->u
.u_s
.str
, src_chain
->u
.u_s
.len
);
1023 adjust_refcount (src_chain
->u
.u_s
.level
, false);
1027 if (TOKEN_DATA_TYPE (td
) == TOKEN_VOID
)
1029 TOKEN_DATA_TYPE (td
) = TOKEN_COMP
;
1030 td
->u
.u_c
.chain
= td
->u
.u_c
.end
= NULL
;
1031 td
->u
.u_c
.wrapper
= td
->u
.u_c
.has_func
= false;
1033 assert (TOKEN_DATA_TYPE (td
) == TOKEN_COMP
);
1034 make_text_link (obs
, &td
->u
.u_c
.chain
, &td
->u
.u_c
.end
);
1035 chain
= (token_chain
*) obstack_copy (obs
, src_chain
, sizeof *chain
);
1037 td
->u
.u_c
.end
->next
= chain
;
1039 td
->u
.u_c
.chain
= chain
;
1040 td
->u
.u_c
.end
= chain
;
1041 if (chain
->type
== CHAIN_ARGV
&& chain
->u
.u_a
.has_func
)
1042 td
->u
.u_c
.has_func
= true;
1047 /*-------------------------------------------------------------------.
1048 | When an ARGV token is seen, convert TD to point to it via a |
1049 | composite token. Use OBS for any additional allocations needed to |
1050 | store the token chain. |
1051 `-------------------------------------------------------------------*/
1053 init_argv_token (struct obstack
*obs
, token_data
*td
)
1055 token_chain
*src_chain
;
1057 int ch
= next_char (true);
1059 assert (ch
== CHAR_QUOTE
&& TOKEN_DATA_TYPE (td
) == TOKEN_VOID
1060 && isp
->type
== INPUT_CHAIN
&& isp
->u
.u_c
.chain
->type
== CHAIN_ARGV
1061 && obs
&& obstack_object_size (obs
) == 0);
1063 src_chain
= isp
->u
.u_c
.chain
;
1064 isp
->u
.u_c
.chain
= src_chain
->next
;
1065 TOKEN_DATA_TYPE (td
) = TOKEN_COMP
;
1066 /* Clone the link, since the input will be discarded soon. */
1067 chain
= (token_chain
*) obstack_copy (obs
, src_chain
, sizeof *chain
);
1068 td
->u
.u_c
.chain
= td
->u
.u_c
.end
= chain
;
1069 td
->u
.u_c
.wrapper
= true;
1070 td
->u
.u_c
.has_func
= chain
->u
.u_a
.has_func
;
1073 /* If the next character is not ',' or ')', then unlink the last
1074 argument from argv and schedule it for reparsing. This way,
1075 expand_argument never has to deal with concatenation of argv with
1076 arbitrary text. Note that the implementation of safe_quotes
1077 ensures peek_input won't return CHAR_ARGV if the user is perverse
1078 enough to mix comment delimiters with argument separators:
1080 define(n,`$#')define(echo,$*)changecom(`,,',`)')n(echo(a,`,b`)'',c))
1083 Therefore, we do not have to worry about calling MATCH, and thus
1084 do not have to worry about pop_input being called and
1085 invalidating the argv reference.
1087 When the $@ ref is used unchanged, we completely bypass the
1088 decrement of the argv refcount in next_char_1, since the ref is
1089 still live via the current collect_arguments. However, when the
1090 last element of the $@ ref is reparsed, we must increase the argv
1091 refcount here, to compensate for the fact that it will be
1092 decreased once the final element is parsed. */
1093 assert (*curr_comm
.str1
!= ',' && *curr_comm
.str1
!= ')'
1094 && *curr_comm
.str1
!= *curr_quote
.str1
);
1095 ch
= peek_input (false);
1096 if (ch
!= ',' && ch
!= ')')
1098 isp
->u
.u_c
.chain
= src_chain
;
1099 src_chain
->u
.u_a
.index
= arg_argc (chain
->u
.u_a
.argv
) - 1;
1100 src_chain
->u
.u_a
.comma
= true;
1101 chain
->u
.u_a
.skip_last
= true;
1102 arg_adjust_refcount (chain
->u
.u_a
.argv
, true);
1107 /*------------------------------------------------------------------.
1108 | This function is for matching a string against a prefix of the |
1109 | input stream. If the string S matches the input and CONSUME is |
1110 | true, the input is discarded; otherwise any characters read are |
1111 | pushed back again. The function is used only when multicharacter |
1112 | quotes or comment delimiters are used. |
1113 `------------------------------------------------------------------*/
1116 match_input (const char *s
, bool consume
)
1118 int n
; /* number of characters matched */
1119 int ch
; /* input character */
1121 bool result
= false;
1123 ch
= peek_input (false);
1124 if (ch
!= to_uchar (*s
))
1125 return false; /* fail */
1131 return true; /* short match */
1135 for (n
= 1, t
= s
++; (ch
= peek_input (false)) == to_uchar (*s
++); )
1139 if (*s
== '\0') /* long match */
1148 /* Failed or shouldn't consume, push back input. */
1149 push_string_init ();
1150 obstack_grow (current_input
, t
, n
);
1151 push_string_finish ();
1155 /*--------------------------------------------------------------------.
1156 | The macro MATCH() is used to match a string S against the input. |
1157 | The first character is handled inline, for speed. Hopefully, this |
1158 | will not hurt efficiency too much when single character quotes and |
1159 | comment delimiters are used. If CONSUME, then CH is the result of |
1160 | next_char, and a successful match will discard the matched string. |
1161 | Otherwise, CH is the result of peek_input, and the input stream is |
1162 | effectively unchanged. |
1163 `--------------------------------------------------------------------*/
1165 #define MATCH(ch, s, consume) \
1166 (to_uchar ((s)[0]) == (ch) \
1168 && ((s)[1] == '\0' || (match_input ((s) + (consume), consume))))
1171 /*----------------------------------------------------------.
1172 | Inititialize input stacks, and quote/comment characters. |
1173 `----------------------------------------------------------*/
1181 current_input
= (struct obstack
*) xmalloc (sizeof *current_input
);
1182 obstack_init (current_input
);
1183 wrapup_stack
= (struct obstack
*) xmalloc (sizeof *wrapup_stack
);
1184 obstack_init (wrapup_stack
);
1186 obstack_init (&file_names
);
1188 /* Allocate an object in the current chunk, so that obstack_free
1189 will always work even if the first token parsed spills to a new
1191 obstack_init (&token_stack
);
1192 token_bottom
= obstack_finish (&token_stack
);
1198 start_of_input_line
= false;
1200 curr_quote
.str1
= xstrdup (DEF_LQUOTE
);
1201 curr_quote
.len1
= strlen (curr_quote
.str1
);
1202 curr_quote
.str2
= xstrdup (DEF_RQUOTE
);
1203 curr_quote
.len2
= strlen (curr_quote
.str2
);
1204 curr_comm
.str1
= xstrdup (DEF_BCOMM
);
1205 curr_comm
.len1
= strlen (curr_comm
.str1
);
1206 curr_comm
.str2
= xstrdup (DEF_ECOMM
);
1207 curr_comm
.len2
= strlen (curr_comm
.str2
);
1209 #ifdef ENABLE_CHANGEWORD
1210 set_word_regexp (NULL
, user_word_regexp
);
1211 #endif /* ENABLE_CHANGEWORD */
1217 /*--------------------------------------------------------------------.
1218 | Set the quote delimiters to LQ and RQ. Used by m4_changequote (). |
1219 | Pass NULL if the argument was not present, to distinguish from an |
1220 | explicit empty string. |
1221 `--------------------------------------------------------------------*/
1224 set_quotes (const char *lq
, const char *rq
)
1226 /* POSIX states that with 0 arguments, the default quotes are used.
1227 POSIX XCU ERN 112 states that behavior is implementation-defined
1228 if there was only one argument, or if there is an empty string in
1229 either position when there are two arguments. We allow an empty
1230 left quote to disable quoting, but a non-empty left quote will
1231 always create a non-empty right quote. See the texinfo for what
1232 some other implementations do. */
1238 else if (!rq
|| (*lq
&& !*rq
))
1241 if (strcmp (curr_quote
.str1
, lq
) == 0 && strcmp (curr_quote
.str2
, rq
) == 0)
1244 free (curr_quote
.str1
);
1245 free (curr_quote
.str2
);
1246 curr_quote
.str1
= xstrdup (lq
);
1247 curr_quote
.len1
= strlen (curr_quote
.str1
);
1248 curr_quote
.str2
= xstrdup (rq
);
1249 curr_quote
.len2
= strlen (curr_quote
.str2
);
1253 /*--------------------------------------------------------------------.
1254 | Set the comment delimiters to BC and EC. Used by m4_changecom (). |
1255 | Pass NULL if the argument was not present, to distinguish from an |
1256 | explicit empty string. |
1257 `--------------------------------------------------------------------*/
1260 set_comment (const char *bc
, const char *ec
)
1262 /* POSIX requires no arguments to disable comments. It requires
1263 empty arguments to be used as-is, but this is counter to
1264 traditional behavior, because a non-null begin and null end makes
1265 it impossible to end a comment. An aardvark has been filed:
1266 http://www.opengroup.org/austin/mailarchives/ag-review/msg02168.html
1267 This implementation assumes the aardvark will be approved. See
1268 the texinfo for what some other implementations do. */
1271 else if (!ec
|| (*bc
&& !*ec
))
1274 if (strcmp (curr_comm
.str1
, bc
) == 0 && strcmp (curr_comm
.str2
, ec
) == 0)
1277 free (curr_comm
.str1
);
1278 free (curr_comm
.str2
);
1279 curr_comm
.str1
= xstrdup (bc
);
1280 curr_comm
.len1
= strlen (curr_comm
.str1
);
1281 curr_comm
.str2
= xstrdup (ec
);
1282 curr_comm
.len2
= strlen (curr_comm
.str2
);
1286 #ifdef ENABLE_CHANGEWORD
1288 /*-------------------------------------------------------------------.
1289 | Set the regular expression for recognizing words to REGEXP, and |
1290 | report errors on behalf of CALLER. If REGEXP is NULL, revert back |
1291 | to the default parsing rules. |
1292 `-------------------------------------------------------------------*/
1295 set_word_regexp (const char *caller
, const char *regexp
)
1298 struct re_pattern_buffer new_word_regexp
;
1300 if (!*regexp
|| !strcmp (regexp
, DEFAULT_WORD_REGEXP
))
1302 default_word_regexp
= true;
1307 /* Dry run to see whether the new expression is compilable. */
1308 init_pattern_buffer (&new_word_regexp
, NULL
);
1309 msg
= re_compile_pattern (regexp
, strlen (regexp
), &new_word_regexp
);
1310 regfree (&new_word_regexp
);
1314 m4_warn (0, caller
, _("bad regular expression `%s': %s"), regexp
, msg
);
1318 /* If compilation worked, retry using the word_regexp struct. We
1319 can't rely on struct assigns working, so redo the compilation.
1320 The fastmap can be reused between compilations, and will be freed
1321 by the final regfree. */
1322 if (!word_regexp
.fastmap
)
1323 word_regexp
.fastmap
= xcharalloc (UCHAR_MAX
+ 1);
1324 msg
= re_compile_pattern (regexp
, strlen (regexp
), &word_regexp
);
1326 re_set_registers (&word_regexp
, ®s
, regs
.num_regs
, regs
.start
, regs
.end
);
1327 if (re_compile_fastmap (&word_regexp
))
1330 default_word_regexp
= false;
1334 #endif /* ENABLE_CHANGEWORD */
1336 /* Call this when changing anything that might impact the quote age,
1337 so that quote_age and safe_quotes will reflect the change. */
1339 set_quote_age (void)
1341 /* Multi-character quotes are inherently unsafe, since concatenation
1342 of individual characters can result in a quote delimiter,
1345 define(echo,``$1'')define(a,A)changequote(<[,]>)echo(<[]]><[>a]>)
1348 Also, unquoted close delimiters are unsafe, consider:
1350 define(echo,``$1'')define(a,A)echo(`a''`a')
1353 Comment delimiters that overlap with quote delimiters or active
1354 characters also present a problem, consider:
1356 define(echo,$*)echo(a,a,a`'define(a,A)changecom(`,',`,'))
1357 => A,a,A (not A,A,A)
1359 And let's not even think about the impact of changeword, since it
1360 will disappear for M4 2.0.
1362 So rather than check every token for an unquoted delimiter, we
1363 merely encode current_quote_age to 0 when things are unsafe, and
1364 non-zero when safe (namely, to the 16-bit value composed of the
1365 single-character start and end quote delimiters). There may be
1366 other situations which are safe even when this algorithm sets the
1367 quote_age to zero, but at least a quote_age of zero always produces
1368 correct results (although it may take more time in doing so). */
1370 /* Hueristic of characters that might impact rescan if they appear in
1371 a quote delimiter. */
1372 #define Letters "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
1373 static const char unsafe
[] = Letters
"_0123456789(,) \t\n\r\f\v";
1376 if (curr_quote
.len1
== 1 && curr_quote
.len2
== 1
1377 && strpbrk (curr_quote
.str1
, unsafe
) == NULL
1378 && strpbrk (curr_quote
.str2
, unsafe
) == NULL
1379 && default_word_regexp
&& *curr_quote
.str1
!= *curr_quote
.str2
1380 && *curr_comm
.str1
!= '(' && *curr_comm
.str1
!= ','
1381 && *curr_comm
.str1
!= ')' && *curr_comm
.str1
!= *curr_quote
.str1
)
1382 current_quote_age
= (((*curr_quote
.str1
& 0xff) << 8)
1383 | (*curr_quote
.str2
& 0xff));
1385 current_quote_age
= 0;
1386 cached_quote
= NULL
;
1389 /* Return the current quote age. Each non-trivial changequote alters
1390 this value; the idea is that if quoting hasn't changed, then we can
1391 skip parsing a single argument, quoted or unquoted, within the
1392 context of a quoted string, as well as skip parsing a series of
1393 quoted arguments within the context of argument collection. */
1397 /* This accessor is a function, so that the implementation can
1398 change if needed. See set_quote_age for the current
1400 return current_quote_age
;
1403 /* Return true if the current quote delimiters guarantee that
1404 reparsing the current token in the context of a quoted string will
1405 be safe. This could always return false and behavior would still
1406 be correct, just slower. */
1410 return current_quote_age
!= 0;
1413 /* Interface for caching frequently used quote pairs, using AGE for
1414 optimization. If QUOTES is NULL, don't use quoting. If OBS is
1415 non-NULL, AGE should be the current quote age, and QUOTES should be
1416 &curr_quote; the return value will be a cached quote pair, where
1417 the pointer is valid at least as long as OBS is not reset, but
1418 whose contents are only guaranteed until the next changequote or
1419 quote_cache. Otherwise, OBS is NULL, AGE should be the same as
1420 before, and QUOTES should be a previously returned cache value;
1421 used to refresh the contents of the result. */
1423 quote_cache (struct obstack
*obs
, unsigned int age
, const string_pair
*quotes
)
1425 static char lquote
[2];
1426 static char rquote
[2];
1427 static string_pair simple
= {lquote
, 1, rquote
, 1};
1429 /* Implementation - if AGE is non-zero, then the implementation of
1430 set_quote_age guarantees that we can recreate the return value on
1431 the fly; so we use static storage, and the contents must be used
1432 immediately. If AGE is zero, then we must copy QUOTES onto OBS
1433 (since changequote will invalidate the original), but we might as
1434 well cache that copy (in case the current expansion contains more
1435 than one instance of $@). */
1440 *lquote
= (age
>> 8) & 0xff;
1441 *rquote
= age
& 0xff;
1446 assert (next
&& quotes
== &curr_quote
);
1449 assert (obs
== current_input
&& obstack_object_size (obs
) == 0);
1450 cached_quote
= (string_pair
*) obstack_copy (obs
, quotes
,
1452 cached_quote
->str1
= (char *) obstack_copy0 (obs
, quotes
->str1
,
1454 cached_quote
->str2
= (char *) obstack_copy0 (obs
, quotes
->str2
,
1457 return cached_quote
;
1461 /*--------------------------------------------------------------------.
1462 | Parse a single token from the input stream, set TD to its |
1463 | contents, and return its type. A token is TOKEN_EOF if the |
1464 | input_stack is empty; TOKEN_STRING for a quoted string or comment; |
1465 | TOKEN_WORD for something that is a potential macro name; and |
1466 | TOKEN_SIMPLE for any single character that is not a part of any of |
1467 | the previous types. If LINE is not NULL, set *LINE to the line |
1468 | where the token starts. If OBS is not NULL, expand TOKEN_STRING |
1469 | directly into OBS rather than in token_stack temporary storage |
1470 | area, and TD could be a TOKEN_COMP instead of the usual |
1471 | TOKEN_TEXT. If ALLOW_ARGV, OBS must be non-NULL, and an entire |
1472 | series of arguments can be returned as TOKEN_ARGV when a $@ |
1473 | reference is encountered. Report errors (unterminated comments or |
1474 | strings) on behalf of CALLER, if non-NULL. |
1476 | Next_token () returns the token type, and passes back a pointer to |
1477 | the token data through TD. Non-string token text is collected on |
1478 | the obstack token_stack, which never contains more than one token |
1479 | text at a time. The storage pointed to by the fields in TD is |
1480 | therefore subject to change the next time next_token () is called. |
1481 `--------------------------------------------------------------------*/
1484 next_token (token_data
*td
, int *line
, struct obstack
*obs
, bool allow_argv
,
1490 #ifdef ENABLE_CHANGEWORD
1491 char *orig_text
= NULL
;
1492 #endif /* ENABLE_CHANGEWORD */
1495 /* The obstack where token data is stored. Generally token_stack,
1496 for tokens where argument collection might not use the literal
1497 token. But for comments and strings, we can output directly into
1498 the argument collection obstack obs, if one was provided. */
1499 struct obstack
*obs_td
= &token_stack
;
1501 obstack_free (&token_stack
, token_bottom
);
1505 /* Can't consume character until after CHAR_MACRO is handled. */
1506 TOKEN_DATA_TYPE (td
) = TOKEN_VOID
;
1507 ch
= peek_input (allow_argv
&& current_quote_age
);
1511 xfprintf (stderr
, "next_token -> EOF\n");
1512 #endif /* DEBUG_INPUT */
1516 if (ch
== CHAR_MACRO
)
1518 init_macro_token (td
);
1521 xfprintf (stderr
, "next_token -> MACDEF (%s)\n",
1522 find_builtin_by_addr (TOKEN_DATA_FUNC (td
))->name
);
1523 #endif /* DEBUG_INPUT */
1524 return TOKEN_MACDEF
;
1526 if (ch
== CHAR_ARGV
)
1528 init_argv_token (obs
, td
);
1530 xfprintf (stderr
, "next_token -> ARGV (%d args)\n",
1531 (arg_argc (td
->u
.u_c
.chain
->u
.u_a
.argv
)
1532 - td
->u
.u_c
.chain
->u
.u_a
.index
1533 - (td
->u
.u_c
.chain
->u
.u_a
.skip_last
? 1 : 0)));
1538 next_char (false); /* Consume character we already peeked at. */
1539 file
= current_file
;
1540 *line
= current_line
;
1541 if (MATCH (ch
, curr_comm
.str1
, true))
1545 obstack_grow (obs_td
, curr_comm
.str1
, curr_comm
.len1
);
1546 while ((ch
= next_char (false)) < CHAR_EOF
1547 && !MATCH (ch
, curr_comm
.str2
, true))
1548 obstack_1grow (obs_td
, ch
);
1551 assert (ch
< CHAR_EOF
);
1552 obstack_grow (obs_td
, curr_comm
.str2
, curr_comm
.len2
);
1555 /* Current_file changed to "" if we see CHAR_EOF, use the
1556 previous value we stored earlier. */
1557 m4_error_at_line (EXIT_FAILURE
, 0, file
, *line
, caller
,
1558 _("end of file in comment"));
1560 type
= TOKEN_STRING
;
1562 else if (default_word_regexp
&& (isalpha (ch
) || ch
== '_'))
1564 obstack_1grow (&token_stack
, ch
);
1565 while ((ch
= peek_input (false)) < CHAR_EOF
1566 && (isalnum (ch
) || ch
== '_'))
1568 obstack_1grow (&token_stack
, ch
);
1574 #ifdef ENABLE_CHANGEWORD
1576 else if (!default_word_regexp
&& word_regexp
.fastmap
[ch
])
1578 obstack_1grow (&token_stack
, ch
);
1581 ch
= peek_input (false);
1584 obstack_1grow (&token_stack
, ch
);
1585 if (re_match (&word_regexp
, (char *) obstack_base (&token_stack
),
1586 obstack_object_size (&token_stack
), 0, ®s
)
1587 != obstack_object_size (&token_stack
))
1589 obstack_blank (&token_stack
, -1);
1595 obstack_1grow (&token_stack
, '\0');
1596 orig_text
= (char *) obstack_finish (&token_stack
);
1598 if (regs
.start
[1] != -1)
1599 obstack_grow (&token_stack
, orig_text
+ regs
.start
[1],
1600 regs
.end
[1] - regs
.start
[1]);
1602 obstack_grow (&token_stack
, orig_text
, regs
.end
[0]);
1607 #endif /* ENABLE_CHANGEWORD */
1609 else if (!MATCH (ch
, curr_quote
.str1
, true))
1623 type
= TOKEN_SIMPLE
;
1626 obstack_1grow (&token_stack
, ch
);
1635 ch
= next_char (obs
!= NULL
&& current_quote_age
);
1637 /* Current_file changed to "" if we see CHAR_EOF, use
1638 the previous value we stored earlier. */
1639 m4_error_at_line (EXIT_FAILURE
, 0, file
, *line
, caller
,
1640 _("end of file in string"));
1642 if (ch
== CHAR_QUOTE
)
1643 append_quote_token (obs
, td
);
1644 else if (MATCH (ch
, curr_quote
.str2
, true))
1646 if (--quote_level
== 0)
1648 obstack_grow (obs_td
, curr_quote
.str2
, curr_quote
.len2
);
1650 else if (MATCH (ch
, curr_quote
.str1
, true))
1653 obstack_grow (obs_td
, curr_quote
.str1
, curr_quote
.len1
);
1657 assert (ch
< CHAR_EOF
);
1658 obstack_1grow (obs_td
, ch
);
1661 type
= TOKEN_STRING
;
1664 if (TOKEN_DATA_TYPE (td
) == TOKEN_VOID
)
1666 TOKEN_DATA_TYPE (td
) = TOKEN_TEXT
;
1667 TOKEN_DATA_LEN (td
) = obstack_object_size (obs_td
);
1670 obstack_1grow (obs_td
, '\0');
1671 TOKEN_DATA_TEXT (td
) = (char *) obstack_finish (obs_td
);
1674 TOKEN_DATA_TEXT (td
) = NULL
;
1675 TOKEN_DATA_QUOTE_AGE (td
) = current_quote_age
;
1676 #ifdef ENABLE_CHANGEWORD
1677 if (orig_text
== NULL
)
1678 TOKEN_DATA_ORIG_TEXT (td
) = TOKEN_DATA_TEXT (td
);
1681 TOKEN_DATA_ORIG_TEXT (td
) = orig_text
;
1682 TOKEN_DATA_LEN (td
) = strlen (orig_text
);
1684 #endif /* ENABLE_CHANGEWORD */
1686 xfprintf (stderr
, "next_token -> %s (%s), len %zu\n",
1687 token_type_string (type
), TOKEN_DATA_TEXT (td
),
1688 TOKEN_DATA_LEN (td
));
1689 #endif /* DEBUG_INPUT */
1693 assert (TOKEN_DATA_TYPE (td
) == TOKEN_COMP
&& type
== TOKEN_STRING
);
1699 chain
= td
->u
.u_c
.chain
;
1700 xfprintf (stderr
, "next_token -> %s <chain> (",
1701 token_type_string (type
));
1704 switch (chain
->type
)
1707 xfprintf (stderr
, "%s", chain
->u
.u_s
.str
);
1708 len
+= chain
->u
.u_s
.len
;
1711 xfprintf (stderr
, "{$@}");
1714 assert (!"next_token");
1718 chain
= chain
->next
;
1720 xfprintf (stderr
, "), %d links, len %zu\n",
1723 #endif /* DEBUG_INPUT */
1728 /*-----------------------------------------------.
1729 | Peek at the next token from the input stream. |
1730 `-----------------------------------------------*/
1736 int ch
= peek_input (false);
1742 else if (ch
== CHAR_MACRO
)
1744 result
= TOKEN_MACDEF
;
1746 else if (MATCH (ch
, curr_comm
.str1
, false))
1748 result
= TOKEN_STRING
;
1750 else if ((default_word_regexp
&& (isalpha (ch
) || ch
== '_'))
1751 #ifdef ENABLE_CHANGEWORD
1752 || (!default_word_regexp
&& word_regexp
.fastmap
[ch
])
1753 #endif /* ENABLE_CHANGEWORD */
1756 result
= TOKEN_WORD
;
1758 else if (MATCH (ch
, curr_quote
.str1
, false))
1760 result
= TOKEN_STRING
;
1766 result
= TOKEN_OPEN
;
1769 result
= TOKEN_COMMA
;
1772 result
= TOKEN_CLOSE
;
1775 result
= TOKEN_SIMPLE
;
1779 xfprintf (stderr
, "peek_token -> %s\n", token_type_string (result
));
1780 #endif /* DEBUG_INPUT */
1788 token_type_string (token_type t
)
1814 print_token (const char *s
, token_type t
, token_data
*td
)
1816 xfprintf (stderr
, "%s: ", s
);
1823 xfprintf (stderr
, "char:");
1827 xfprintf (stderr
, "word:");
1831 xfprintf (stderr
, "string:");
1835 xfprintf (stderr
, "macro: %p\n", TOKEN_DATA_FUNC (td
));
1839 xfprintf (stderr
, "eof\n");
1842 xfprintf (stderr
, "\t\"%s\"\n", TOKEN_DATA_TEXT (td
));
1845 static void M4_GNUC_UNUSED
1851 while ((t
= next_token (&td
, NULL
, NULL
, false, "<debug>")) != TOKEN_EOF
)
1852 print_token ("lex", t
, &td
);
1854 #endif /* DEBUG_INPUT */