More error messages tied to macro names.
[m4/ericb.git] / src / input.c
blob3d96ec7a204a23338c9250646c4cfedaff842ba4
1 /* GNU m4 -- A simple macro processor
3 Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2004, 2005, 2006, 2007
4 Free Software Foundation, Inc.
6 This file is part of GNU M4.
8 GNU M4 is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 GNU M4 is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 /* Handling of different input sources, and lexical analysis. */
24 #include "m4.h"
26 /* Unread input can be either files, that should be read (eg. included
27 files), strings, which should be rescanned (eg. macro expansion text),
28 or quoted macro definitions (as returned by the builtin "defn").
29 Unread input are organised in a stack, implemented with an obstack.
30 Each input source is described by a "struct input_block". The obstack
31 is "current_input". The top of the input stack is "isp".
33 The macro "m4wrap" places the text to be saved on another input
34 stack, on the obstack "wrapup_stack", whose top is "wsp". When EOF
35 is seen on normal input (eg, when "current_input" is empty), input is
36 switched over to "wrapup_stack", and the original "current_input" is
37 freed. A new stack is allocated for "wrapup_stack", which will
38 accept any text produced by calls to "m4wrap" from within the
39 wrapped text. This process of shuffling "wrapup_stack" to
40 "current_input" can continue indefinitely, even generating infinite
41 loops (e.g. "define(`f',`m4wrap(`f')')f"), without memory leaks.
43 Pushing new input on the input stack is done by push_file (),
44 push_string (), push_wrapup () (for wrapup text), and push_macro ()
45 (for macro definitions). Because macro expansion needs direct access
46 to the current input obstack (for optimisation), push_string () are
47 split in two functions, push_string_init (), which returns a pointer
48 to the current input stack, and push_string_finish (), which return a
49 pointer to the final text. The input_block *next is used to manage
50 the coordination between the different push routines.
52 The current file and line number are stored in two global
53 variables, for use by the error handling functions in m4.c. Macro
54 expansion wants to report the line where a macro name was detected,
55 rather than where it finished collecting arguments. This also
56 applies to text resulting from macro expansions. So each input
57 block maintains its own notion of the current file and line, and
58 swapping between input blocks updates the global variables
59 accordingly. */
61 #ifdef ENABLE_CHANGEWORD
62 #include "regex.h"
63 #endif
65 enum input_type
67 INPUT_STRING, /* String resulting from macro expansion. */
68 INPUT_FILE, /* File from command line or include. */
69 INPUT_MACRO /* Builtin resulting from defn. */
72 typedef enum input_type input_type;
74 struct input_block
76 struct input_block *prev; /* previous input_block on the input stack */
77 input_type type; /* see enum values */
78 const char *file; /* file where this input is from */
79 int line; /* line where this input is from */
80 union
82 struct
84 char *string; /* remaining string value */
86 u_s; /* INPUT_STRING */
87 struct
89 FILE *fp; /* input file handle */
90 bool_bitfield end : 1; /* true if peek has seen EOF */
91 bool_bitfield close : 1; /* true if we should close file on pop */
92 bool_bitfield advance : 1; /* track previous start_of_input_line */
94 u_f; /* INPUT_FILE */
95 builtin_func *func; /* pointer to macro's function */
100 typedef struct input_block input_block;
103 /* Current input file name. */
104 const char *current_file;
106 /* Current input line number. */
107 int current_line;
109 /* Obstack for storing individual tokens. */
110 static struct obstack token_stack;
112 /* Obstack for storing file names. */
113 static struct obstack file_names;
115 /* Wrapup input stack. */
116 static struct obstack *wrapup_stack;
118 /* Current stack, from input or wrapup. */
119 static struct obstack *current_input;
121 /* Bottom of token_stack, for obstack_free. */
122 static void *token_bottom;
124 /* Pointer to top of current_input. */
125 static input_block *isp;
127 /* Pointer to top of wrapup_stack. */
128 static input_block *wsp;
130 /* Aux. for handling split push_string (). */
131 static input_block *next;
133 /* Flag for next_char () to increment current_line. */
134 static bool start_of_input_line;
136 /* Flag for next_char () to recognize change in input block. */
137 static bool input_change;
139 #define CHAR_EOF 256 /* character return on EOF */
140 #define CHAR_MACRO 257 /* character return for MACRO token */
142 /* Quote chars. */
143 STRING rquote;
144 STRING lquote;
146 /* Comment chars. */
147 STRING bcomm;
148 STRING ecomm;
150 #ifdef ENABLE_CHANGEWORD
152 # define DEFAULT_WORD_REGEXP "[_a-zA-Z][_a-zA-Z0-9]*"
154 static char *word_start;
155 static struct re_pattern_buffer word_regexp;
156 static int default_word_regexp;
157 static struct re_registers regs;
159 #else /* !ENABLE_CHANGEWORD */
160 # define default_word_regexp 1
161 #endif /* !ENABLE_CHANGEWORD */
163 static bool pop_input (bool);
165 #ifdef DEBUG_INPUT
166 static const char *token_type_string (token_type);
167 #endif
170 /*-------------------------------------------------------------------.
171 | push_file () pushes an input file on the input stack, saving the |
172 | current file name and line number. If next is non-NULL, this push |
173 | invalidates a call to push_string_init (), whose storage is |
174 | consequently released. If CLOSE, then close FP after EOF is |
175 | detected. |
176 `-------------------------------------------------------------------*/
178 void
179 push_file (FILE *fp, const char *title, bool close)
181 input_block *i;
183 if (next != NULL)
185 obstack_free (current_input, next);
186 next = NULL;
189 if (debug_level & DEBUG_TRACE_INPUT)
190 DEBUG_MESSAGE1 ("input read from %s", title);
192 i = (input_block *) obstack_alloc (current_input,
193 sizeof (struct input_block));
194 i->type = INPUT_FILE;
195 i->file = (char *) obstack_copy0 (&file_names, title, strlen (title));
196 i->line = 1;
197 input_change = true;
199 i->u.u_f.fp = fp;
200 i->u.u_f.end = false;
201 i->u.u_f.close = close;
202 i->u.u_f.advance = start_of_input_line;
203 output_current_line = -1;
205 i->prev = isp;
206 isp = i;
209 /*---------------------------------------------------------------.
210 | push_macro () pushes a builtin macro's definition on the input |
211 | stack. If next is non-NULL, this push invalidates a call to |
212 | push_string_init (), whose storage is consequently released. |
213 `---------------------------------------------------------------*/
215 void
216 push_macro (builtin_func *func)
218 input_block *i;
220 if (next != NULL)
222 obstack_free (current_input, next);
223 next = NULL;
226 i = (input_block *) obstack_alloc (current_input,
227 sizeof (struct input_block));
228 i->type = INPUT_MACRO;
229 i->file = current_file;
230 i->line = current_line;
231 input_change = true;
233 i->u.func = func;
234 i->prev = isp;
235 isp = i;
238 /*------------------------------------------------------------------.
239 | First half of push_string (). The pointer next points to the new |
240 | input_block. |
241 `------------------------------------------------------------------*/
243 struct obstack *
244 push_string_init (void)
246 /* Free any memory occupied by completely parsed strings. */
247 assert (next == NULL);
248 while (isp && pop_input (false));
250 /* Reserve the next location on the obstack. */
251 next = (input_block *) obstack_alloc (current_input,
252 sizeof (struct input_block));
253 next->type = INPUT_STRING;
254 next->file = current_file;
255 next->line = current_line;
257 return current_input;
260 /*------------------------------------------------------------------------.
261 | Last half of push_string (). If next is now NULL, a call to push_file |
262 | () has invalidated the previous call to push_string_init (), so we just |
263 | give up. If the new object is void, we do not push it. The function |
264 | push_string_finish () returns a pointer to the finished object. This |
265 | pointer is only for temporary use, since reading the next token might |
266 | release the memory used for the object. |
267 `------------------------------------------------------------------------*/
269 const char *
270 push_string_finish (void)
272 const char *ret = NULL;
274 if (next == NULL)
275 return NULL;
277 if (obstack_object_size (current_input) > 0)
279 obstack_1grow (current_input, '\0');
280 next->u.u_s.string = (char *) obstack_finish (current_input);
281 next->prev = isp;
282 isp = next;
283 ret = isp->u.u_s.string; /* for immediate use only */
284 input_change = true;
286 else
287 obstack_free (current_input, next); /* people might leave garbage on it. */
288 next = NULL;
289 return ret;
292 /*------------------------------------------------------------------.
293 | The function push_wrapup () pushes a string on the wrapup stack. |
294 | When the normal input stack gets empty, the wrapup stack will |
295 | become the input stack, and push_string () and push_file () will |
296 | operate on wrapup_stack. Push_wrapup should be done as |
297 | push_string (), but this will suffice, as long as arguments to |
298 | m4_m4wrap () are moderate in size. |
299 `------------------------------------------------------------------*/
301 void
302 push_wrapup (const char *s)
304 input_block *i;
305 i = (input_block *) obstack_alloc (wrapup_stack,
306 sizeof (struct input_block));
307 i->prev = wsp;
308 i->type = INPUT_STRING;
309 i->file = current_file;
310 i->line = current_line;
311 i->u.u_s.string = (char *) obstack_copy0 (wrapup_stack, s, strlen (s));
312 wsp = i;
316 /*-------------------------------------------------------------------.
317 | The function pop_input () pops one level of input sources. If |
318 | CLEANUP, and the popped input_block is a file, current_file and |
319 | current_line are reset to the saved values before the memory for |
320 | the input_block is released. The return value is false if cleanup |
321 | is still required, or if the current input source is not |
322 | exhausted. |
323 `-------------------------------------------------------------------*/
325 static bool
326 pop_input (bool cleanup)
328 input_block *tmp = isp->prev;
330 switch (isp->type)
332 case INPUT_STRING:
333 assert (!cleanup || !*isp->u.u_s.string);
334 if (*isp->u.u_s.string)
335 return false;
336 break;
338 case INPUT_MACRO:
339 if (!cleanup)
340 return false;
341 break;
343 case INPUT_FILE:
344 if (!cleanup)
345 return false;
346 if (debug_level & DEBUG_TRACE_INPUT)
348 if (tmp)
349 DEBUG_MESSAGE2 ("input reverted to %s, line %d",
350 tmp->file, tmp->line);
351 else
352 DEBUG_MESSAGE ("input exhausted");
355 if (ferror (isp->u.u_f.fp))
357 m4_error (0, 0, NULL, _("read error"));
358 if (isp->u.u_f.close)
359 fclose (isp->u.u_f.fp);
361 else if (isp->u.u_f.close && fclose (isp->u.u_f.fp) == EOF)
362 m4_error (0, errno, NULL, _("error reading file"));
363 start_of_input_line = isp->u.u_f.advance;
364 output_current_line = -1;
365 break;
367 default:
368 assert (!"pop_input");
369 abort ();
371 obstack_free (current_input, isp);
372 next = NULL; /* might be set in push_string_init () */
374 isp = tmp;
375 input_change = true;
376 return true;
379 /*------------------------------------------------------------------------.
380 | To switch input over to the wrapup stack, main () calls pop_wrapup (). |
381 | Since wrapup text can install new wrapup text, pop_wrapup () returns |
382 | false when there is no wrapup text on the stack, and true otherwise. |
383 `------------------------------------------------------------------------*/
385 bool
386 pop_wrapup (void)
388 next = NULL;
389 obstack_free (current_input, NULL);
390 free (current_input);
392 if (wsp == NULL)
394 /* End of the program. Free all memory even though we are about
395 to exit, since it makes leak detection easier. */
396 obstack_free (&token_stack, NULL);
397 obstack_free (&file_names, NULL);
398 obstack_free (wrapup_stack, NULL);
399 free (wrapup_stack);
400 return false;
403 current_input = wrapup_stack;
404 wrapup_stack = (struct obstack *) xmalloc (sizeof (struct obstack));
405 obstack_init (wrapup_stack);
407 isp = wsp;
408 wsp = NULL;
409 input_change = true;
411 return true;
414 /*-------------------------------------------------------------------.
415 | When a MACRO token is seen, next_token () uses init_macro_token () |
416 | to retrieve the value of the function pointer. |
417 `-------------------------------------------------------------------*/
419 static void
420 init_macro_token (token_data *td)
422 assert (isp->type == INPUT_MACRO);
423 TOKEN_DATA_TYPE (td) = TOKEN_FUNC;
424 TOKEN_DATA_FUNC (td) = isp->u.func;
428 /*------------------------------------------------------------------------.
429 | Low level input is done a character at a time. The function peek_input |
430 | () is used to look at the next character in the input stream. At any |
431 | given time, it reads from the input_block on the top of the current |
432 | input stack. |
433 `------------------------------------------------------------------------*/
435 static int
436 peek_input (void)
438 int ch;
439 input_block *block = isp;
441 while (1)
443 if (block == NULL)
444 return CHAR_EOF;
446 switch (block->type)
448 case INPUT_STRING:
449 ch = to_uchar (block->u.u_s.string[0]);
450 if (ch != '\0')
451 return ch;
452 break;
454 case INPUT_FILE:
455 ch = getc (block->u.u_f.fp);
456 if (ch != EOF)
458 ungetc (ch, block->u.u_f.fp);
459 return ch;
461 block->u.u_f.end = true;
462 break;
464 case INPUT_MACRO:
465 return CHAR_MACRO;
467 default:
468 assert (!"peek_input");
469 abort ();
471 block = block->prev;
475 /*-------------------------------------------------------------------------.
476 | The function next_char () is used to read and advance the input to the |
477 | next character. It also manages line numbers for error messages, so |
478 | they do not get wrong, due to lookahead. The token consisting of a |
479 | newline alone is taken as belonging to the line it ends, and the current |
480 | line number is not incremented until the next character is read. |
481 | 99.9% of all calls will read from a string, so factor that out into a |
482 | macro for speed. |
483 `-------------------------------------------------------------------------*/
485 #define next_char() \
486 (isp && isp->type == INPUT_STRING && isp->u.u_s.string[0] \
487 && !input_change \
488 ? to_uchar (*isp->u.u_s.string++) \
489 : next_char_1 ())
491 static int
492 next_char_1 (void)
494 int ch;
496 while (1)
498 if (isp == NULL)
500 current_file = "";
501 current_line = 0;
502 return CHAR_EOF;
505 if (input_change)
507 current_file = isp->file;
508 current_line = isp->line;
509 input_change = false;
512 switch (isp->type)
514 case INPUT_STRING:
515 ch = to_uchar (*isp->u.u_s.string);
516 if (ch != '\0')
518 isp->u.u_s.string++;
519 return ch;
521 break;
523 case INPUT_FILE:
524 if (start_of_input_line)
526 start_of_input_line = false;
527 current_line = ++isp->line;
530 /* If stdin is a terminal, calling getc after peek_input
531 already called it would make the user have to hit ^D
532 twice to quit. */
533 ch = isp->u.u_f.end ? EOF : getc (isp->u.u_f.fp);
534 if (ch != EOF)
536 if (ch == '\n')
537 start_of_input_line = true;
538 return ch;
540 break;
542 case INPUT_MACRO:
543 /* INPUT_MACRO input sources has only one token */
544 pop_input (true);
545 return CHAR_MACRO;
547 default:
548 assert (!"next_char_1");
549 abort ();
552 /* End of input source --- pop one level. */
553 pop_input (true);
557 /*-------------------------------------------------------------------.
558 | skip_line () simply discards all immediately following characters, |
559 | up to the first newline. It is only used from m4_dnl (). |
560 `-------------------------------------------------------------------*/
562 void
563 skip_line (const char *name)
565 int ch;
566 const char *file = current_file;
567 int line = current_line;
569 while ((ch = next_char ()) != CHAR_EOF && ch != '\n')
571 if (ch == CHAR_EOF)
572 /* current_file changed to "" if we see CHAR_EOF, use the
573 previous value we stored earlier. */
574 m4_warn_at_line (0, file, line, name,
575 _("end of file treated as newline"));
576 /* On the rare occasion that dnl crosses include file boundaries
577 (either the input file did not end in a newline, or changeword
578 was used), calling next_char can update current_file and
579 current_line, and that update will be undone as we return to
580 expand_macro. This informs next_char to fix things again. */
581 if (file != current_file || line != current_line)
582 input_change = true;
586 /*------------------------------------------------------------------.
587 | This function is for matching a string against a prefix of the |
588 | input stream. If the string matches the input and consume is |
589 | true, the input is discarded; otherwise any characters read are |
590 | pushed back again. The function is used only when multicharacter |
591 | quotes or comment delimiters are used. |
592 `------------------------------------------------------------------*/
594 static bool
595 match_input (const char *s, bool consume)
597 int n; /* number of characters matched */
598 int ch; /* input character */
599 const char *t;
600 bool result = false;
602 ch = peek_input ();
603 if (ch != to_uchar (*s))
604 return false; /* fail */
606 if (s[1] == '\0')
608 if (consume)
609 (void) next_char ();
610 return true; /* short match */
613 (void) next_char ();
614 for (n = 1, t = s++; (ch = peek_input ()) == to_uchar (*s++); )
616 (void) next_char ();
617 n++;
618 if (*s == '\0') /* long match */
620 if (consume)
621 return true;
622 result = true;
623 break;
627 /* Failed or shouldn't consume, push back input. */
628 push_string_init ();
629 obstack_grow (current_input, t, n);
630 push_string_finish ();
631 return result;
634 /*--------------------------------------------------------------------.
635 | The macro MATCH() is used to match a string S against the input. |
636 | The first character is handled inline, for speed. Hopefully, this |
637 | will not hurt efficiency too much when single character quotes and |
638 | comment delimiters are used. If CONSUME, then CH is the result of |
639 | next_char, and a successful match will discard the matched string. |
640 | Otherwise, CH is the result of peek_char, and the input stream is |
641 | effectively unchanged. |
642 `--------------------------------------------------------------------*/
644 #define MATCH(ch, s, consume) \
645 (to_uchar ((s)[0]) == (ch) \
646 && (ch) != '\0' \
647 && ((s)[1] == '\0' || (match_input ((s) + (consume), consume))))
650 /*----------------------------------------------------------.
651 | Inititialise input stacks, and quote/comment characters. |
652 `----------------------------------------------------------*/
654 void
655 input_init (void)
657 current_file = "";
658 current_line = 0;
660 current_input = (struct obstack *) xmalloc (sizeof (struct obstack));
661 obstack_init (current_input);
662 wrapup_stack = (struct obstack *) xmalloc (sizeof (struct obstack));
663 obstack_init (wrapup_stack);
665 obstack_init (&file_names);
667 /* Allocate an object in the current chunk, so that obstack_free
668 will always work even if the first token parsed spills to a new
669 chunk. */
670 obstack_init (&token_stack);
671 obstack_alloc (&token_stack, 1);
672 token_bottom = obstack_base (&token_stack);
674 isp = NULL;
675 wsp = NULL;
676 next = NULL;
678 start_of_input_line = false;
680 lquote.string = xstrdup (DEF_LQUOTE);
681 lquote.length = strlen (lquote.string);
682 rquote.string = xstrdup (DEF_RQUOTE);
683 rquote.length = strlen (rquote.string);
684 bcomm.string = xstrdup (DEF_BCOMM);
685 bcomm.length = strlen (bcomm.string);
686 ecomm.string = xstrdup (DEF_ECOMM);
687 ecomm.length = strlen (ecomm.string);
689 #ifdef ENABLE_CHANGEWORD
690 set_word_regexp (NULL, user_word_regexp);
691 #endif
695 /*------------------------------------------------------------------.
696 | Functions for setting quotes and comment delimiters. Used by |
697 | m4_changecom () and m4_changequote (). Pass NULL if the argument |
698 | was not present, to distinguish from an explicit empty string. |
699 `------------------------------------------------------------------*/
701 void
702 set_quotes (const char *lq, const char *rq)
704 free (lquote.string);
705 free (rquote.string);
707 /* POSIX states that with 0 arguments, the default quotes are used.
708 POSIX XCU ERN 112 states that behavior is implementation-defined
709 if there was only one argument, or if there is an empty string in
710 either position when there are two arguments. We allow an empty
711 left quote to disable quoting, but a non-empty left quote will
712 always create a non-empty right quote. See the texinfo for what
713 some other implementations do. */
714 if (!lq)
716 lq = DEF_LQUOTE;
717 rq = DEF_RQUOTE;
719 else if (!rq || (*lq && !*rq))
720 rq = DEF_RQUOTE;
722 lquote.string = xstrdup (lq);
723 lquote.length = strlen (lquote.string);
724 rquote.string = xstrdup (rq);
725 rquote.length = strlen (rquote.string);
728 void
729 set_comment (const char *bc, const char *ec)
731 free (bcomm.string);
732 free (ecomm.string);
734 /* POSIX requires no arguments to disable comments. It requires
735 empty arguments to be used as-is, but this is counter to
736 traditional behavior, because a non-null begin and null end makes
737 it impossible to end a comment. An aardvark has been filed:
738 http://www.opengroup.org/austin/mailarchives/ag-review/msg02168.html
739 This implementation assumes the aardvark will be approved. See
740 the texinfo for what some other implementations do. */
741 if (!bc)
742 bc = ec = "";
743 else if (!ec || (*bc && !*ec))
744 ec = DEF_ECOMM;
746 bcomm.string = xstrdup (bc);
747 bcomm.length = strlen (bcomm.string);
748 ecomm.string = xstrdup (ec);
749 ecomm.length = strlen (ecomm.string);
752 #ifdef ENABLE_CHANGEWORD
754 void
755 set_word_regexp (const char *caller, const char *regexp)
757 int i;
758 char test[2];
759 const char *msg;
760 struct re_pattern_buffer new_word_regexp;
762 if (!*regexp || !strcmp (regexp, DEFAULT_WORD_REGEXP))
764 default_word_regexp = true;
765 return;
768 /* Dry run to see whether the new expression is compilable. */
769 init_pattern_buffer (&new_word_regexp, NULL);
770 msg = re_compile_pattern (regexp, strlen (regexp), &new_word_regexp);
771 regfree (&new_word_regexp);
773 if (msg != NULL)
775 /* FIXME - report on behalf of macro caller. */
776 m4_warn (0, caller, _("bad regular expression `%s': %s"), regexp, msg);
777 return;
780 /* If compilation worked, retry using the word_regexp struct.
781 Can't rely on struct assigns working, so redo the compilation. */
782 regfree (&word_regexp);
783 msg = re_compile_pattern (regexp, strlen (regexp), &word_regexp);
784 assert (!msg);
785 re_set_registers (&word_regexp, &regs, regs.num_regs, regs.start, regs.end);
787 default_word_regexp = false;
789 if (word_start == NULL)
790 word_start = (char *) xmalloc (256);
792 word_start[0] = '\0';
793 test[1] = '\0';
794 for (i = 1; i < 256; i++)
796 test[0] = i;
797 word_start[i] = re_search (&word_regexp, test, 1, 0, 0, NULL) >= 0;
801 #endif /* ENABLE_CHANGEWORD */
804 /*--------------------------------------------------------------------.
805 | Parse and return a single token from the input stream. A token |
806 | can either be TOKEN_EOF, if the input_stack is empty; it can be |
807 | TOKEN_STRING for a quoted string; TOKEN_WORD for something that is |
808 | a potential macro name; and TOKEN_SIMPLE for any single character |
809 | that is not a part of any of the previous types. If LINE is not |
810 | NULL, set *LINE to the line where the token starts. Report errors |
811 | (unterminated comments or strings) on behalf of CALLER, if |
812 | non-NULL. |
814 | Next_token () returns the token type, and passes back a pointer to |
815 | the token data through TD. The token text is collected on the |
816 | obstack token_stack, which never contains more than one token text |
817 | at a time. The storage pointed to by the fields in TD is |
818 | therefore subject to change the next time next_token () is called. |
819 `--------------------------------------------------------------------*/
821 token_type
822 next_token (token_data *td, int *line, const char *caller)
824 int ch;
825 int quote_level;
826 token_type type;
827 #ifdef ENABLE_CHANGEWORD
828 int startpos;
829 char *orig_text = NULL;
830 #endif
831 const char *file;
832 int dummy;
834 obstack_free (&token_stack, token_bottom);
835 if (!line)
836 line = &dummy;
838 /* Can't consume character until after CHAR_MACRO is handled. */
839 ch = peek_input ();
840 if (ch == CHAR_EOF)
842 #ifdef DEBUG_INPUT
843 xfprintf (stderr, "next_token -> EOF\n");
844 #endif
845 next_char ();
846 return TOKEN_EOF;
848 if (ch == CHAR_MACRO)
850 init_macro_token (td);
851 next_char ();
852 #ifdef DEBUG_INPUT
853 xfprintf (stderr, "next_token -> MACDEF (%s)\n",
854 find_builtin_by_addr (TOKEN_DATA_FUNC (td))->name);
855 #endif
856 return TOKEN_MACDEF;
859 next_char (); /* Consume character we already peeked at. */
860 file = current_file;
861 *line = current_line;
862 if (MATCH (ch, bcomm.string, true))
864 obstack_grow (&token_stack, bcomm.string, bcomm.length);
865 while ((ch = next_char ()) != CHAR_EOF
866 && !MATCH (ch, ecomm.string, true))
867 obstack_1grow (&token_stack, ch);
868 if (ch != CHAR_EOF)
869 obstack_grow (&token_stack, ecomm.string, ecomm.length);
870 else
871 /* current_file changed to "" if we see CHAR_EOF, use the
872 previous value we stored earlier. */
873 m4_error_at_line (EXIT_FAILURE, 0, file, *line, caller,
874 _("end of file in comment"));
876 type = TOKEN_STRING;
878 else if (default_word_regexp && (isalpha (ch) || ch == '_'))
880 obstack_1grow (&token_stack, ch);
881 while ((ch = peek_input ()) != CHAR_EOF && (isalnum (ch) || ch == '_'))
883 obstack_1grow (&token_stack, ch);
884 (void) next_char ();
886 type = TOKEN_WORD;
889 #ifdef ENABLE_CHANGEWORD
891 else if (!default_word_regexp && word_start[ch])
893 obstack_1grow (&token_stack, ch);
894 while (1)
896 ch = peek_input ();
897 if (ch == CHAR_EOF)
898 break;
899 obstack_1grow (&token_stack, ch);
900 startpos = re_search (&word_regexp,
901 (char *) obstack_base (&token_stack),
902 obstack_object_size (&token_stack), 0, 0,
903 &regs);
904 if (startpos != 0 ||
905 regs.end [0] != obstack_object_size (&token_stack))
907 *(((char *) obstack_base (&token_stack)
908 + obstack_object_size (&token_stack)) - 1) = '\0';
909 break;
911 next_char ();
914 obstack_1grow (&token_stack, '\0');
915 orig_text = (char *) obstack_finish (&token_stack);
917 if (regs.start[1] != -1)
918 obstack_grow (&token_stack, orig_text + regs.start[1],
919 regs.end[1] - regs.start[1]);
920 else
921 obstack_grow (&token_stack, orig_text, regs.end[0]);
923 type = TOKEN_WORD;
926 #endif /* ENABLE_CHANGEWORD */
928 else if (!MATCH (ch, lquote.string, true))
930 switch (ch)
932 case '(':
933 type = TOKEN_OPEN;
934 break;
935 case ',':
936 type = TOKEN_COMMA;
937 break;
938 case ')':
939 type = TOKEN_CLOSE;
940 break;
941 default:
942 type = TOKEN_SIMPLE;
943 break;
945 obstack_1grow (&token_stack, ch);
947 else
949 quote_level = 1;
950 while (1)
952 ch = next_char ();
953 if (ch == CHAR_EOF)
954 /* current_file changed to "" if we see CHAR_EOF, use
955 the previous value we stored earlier. */
956 m4_error_at_line (EXIT_FAILURE, 0, file, *line, caller,
957 _("end of file in string"));
959 if (MATCH (ch, rquote.string, true))
961 if (--quote_level == 0)
962 break;
963 obstack_grow (&token_stack, rquote.string, rquote.length);
965 else if (MATCH (ch, lquote.string, true))
967 quote_level++;
968 obstack_grow (&token_stack, lquote.string, lquote.length);
970 else
971 obstack_1grow (&token_stack, ch);
973 type = TOKEN_STRING;
976 obstack_1grow (&token_stack, '\0');
978 TOKEN_DATA_TYPE (td) = TOKEN_TEXT;
979 TOKEN_DATA_TEXT (td) = (char *) obstack_finish (&token_stack);
980 #ifdef ENABLE_CHANGEWORD
981 if (orig_text == NULL)
982 orig_text = TOKEN_DATA_TEXT (td);
983 TOKEN_DATA_ORIG_TEXT (td) = orig_text;
984 #endif
985 #ifdef DEBUG_INPUT
986 xfprintf (stderr, "next_token -> %s (%s)\n",
987 token_type_string (type), TOKEN_DATA_TEXT (td));
988 #endif
989 return type;
992 /*-----------------------------------------------.
993 | Peek at the next token from the input stream. |
994 `-----------------------------------------------*/
996 token_type
997 peek_token (void)
999 token_type result;
1000 int ch = peek_input ();
1002 if (ch == CHAR_EOF)
1004 result = TOKEN_EOF;
1006 else if (ch == CHAR_MACRO)
1008 result = TOKEN_MACDEF;
1010 else if (MATCH (ch, bcomm.string, false))
1012 result = TOKEN_STRING;
1014 else if ((default_word_regexp && (isalpha (ch) || ch == '_'))
1015 #ifdef ENABLE_CHANGEWORD
1016 || (!default_word_regexp && word_start[ch])
1017 #endif /* ENABLE_CHANGEWORD */
1020 result = TOKEN_WORD;
1022 else if (MATCH (ch, lquote.string, false))
1024 result = TOKEN_STRING;
1026 else
1027 switch (ch)
1029 case '(':
1030 result = TOKEN_OPEN;
1031 break;
1032 case ',':
1033 result = TOKEN_COMMA;
1034 break;
1035 case ')':
1036 result = TOKEN_CLOSE;
1037 break;
1038 default:
1039 result = TOKEN_SIMPLE;
1042 #ifdef DEBUG_INPUT
1043 xfprintf (stderr, "peek_token -> %s\n", token_type_string (result));
1044 #endif /* DEBUG_INPUT */
1045 return result;
1049 #ifdef DEBUG_INPUT
1051 static const char *
1052 token_type_string (token_type t)
1054 switch (t)
1055 { /* TOKSW */
1056 case TOKEN_EOF:
1057 return "EOF";
1058 case TOKEN_STRING:
1059 return "STRING";
1060 case TOKEN_WORD:
1061 return "WORD";
1062 case TOKEN_OPEN:
1063 return "OPEN";
1064 case TOKEN_COMMA:
1065 return "COMMA";
1066 case TOKEN_CLOSE:
1067 return "CLOSE";
1068 case TOKEN_SIMPLE:
1069 return "SIMPLE";
1070 case TOKEN_MACDEF:
1071 return "MACDEF";
1072 default:
1073 abort ();
1077 static void
1078 print_token (const char *s, token_type t, token_data *td)
1080 xfprintf (stderr, "%s: ", s);
1081 switch (t)
1082 { /* TOKSW */
1083 case TOKEN_OPEN:
1084 case TOKEN_COMMA:
1085 case TOKEN_CLOSE:
1086 case TOKEN_SIMPLE:
1087 xfprintf (stderr, "char:");
1088 break;
1090 case TOKEN_WORD:
1091 xfprintf (stderr, "word:");
1092 break;
1094 case TOKEN_STRING:
1095 xfprintf (stderr, "string:");
1096 break;
1098 case TOKEN_MACDEF:
1099 xfprintf (stderr, "macro: %p\n", TOKEN_DATA_FUNC (td));
1100 break;
1102 case TOKEN_EOF:
1103 xfprintf (stderr, "eof\n");
1104 break;
1106 xfprintf (stderr, "\t\"%s\"\n", TOKEN_DATA_TEXT (td));
1109 static void M4_GNUC_UNUSED
1110 lex_debug (void)
1112 token_type t;
1113 token_data td;
1115 while ((t = next_token (&td, NULL, "<debug>")) != TOKEN_EOF)
1116 print_token ("lex", t, &td);
1118 #endif