* m4/gnulib-cache.m4: Update to newer gnulib-tool.
[m4/ericb.git] / src / input.c
blobe0223140d97b66becfb6b256dece0a9b5787ebac
1 /* GNU m4 -- A simple macro processor
3 Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2004, 2005, 2006
4 Free Software Foundation, Inc.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301 USA
22 /* Handling of different input sources, and lexical analysis. */
24 #include "m4.h"
26 /* Unread input can be either files, that should be read (eg. included
27 files), strings, which should be rescanned (eg. macro expansion text),
28 or quoted macro definitions (as returned by the builtin "defn").
29 Unread input are organised in a stack, implemented with an obstack.
30 Each input source is described by a "struct input_block". The obstack
31 is "current_input". The top of the input stack is "isp".
33 The macro "m4wrap" places the text to be saved on another input
34 stack, on the obstack "wrapup_stack", whose top is "wsp". When EOF
35 is seen on normal input (eg, when "current_input" is empty), input is
36 switched over to "wrapup_stack", and the original "current_input" is
37 freed. A new stack is allocated for "wrapup_stack", which will
38 accept any text produced by calls to "m4wrap" from within the
39 wrapped text. This process of shuffling "wrapup_stack" to
40 "current_input" can continue indefinitely, even generating infinite
41 loops (e.g. "define(`f',`m4wrap(`f')')f"), without memory leaks.
43 Pushing new input on the input stack is done by push_file (),
44 push_string (), push_wrapup () (for wrapup text), and push_macro ()
45 (for macro definitions). Because macro expansion needs direct access
46 to the current input obstack (for optimisation), push_string () are
47 split in two functions, push_string_init (), which returns a pointer
48 to the current input stack, and push_string_finish (), which return a
49 pointer to the final text. The input_block *next is used to manage
50 the coordination between the different push routines.
52 The current file and line number are stored in two global variables,
53 for use by the error handling functions in m4.c. Whenever a file
54 input_block is pushed, the current file name and line number is saved
55 in the input_block, and the two variables are reset to match the new
56 input file. */
58 #ifdef ENABLE_CHANGEWORD
59 #include "regex.h"
60 #endif
62 enum input_type
64 INPUT_FILE,
65 INPUT_STRING,
66 INPUT_MACRO
69 typedef enum input_type input_type;
71 struct input_block
73 struct input_block *prev; /* previous input_block on the input stack */
74 input_type type; /* INPUT_FILE, INPUT_STRING or INPUT_MACRO */
75 union
77 struct
79 char *string; /* string value */
81 u_s;
82 struct
84 FILE *file; /* input file handle */
85 boolean end; /* true if peek has seen EOF */
86 boolean close; /* true if we should close file on pop */
87 const char *name; /* name of PREVIOUS input file */
88 int lineno; /* current line of previous file */
89 int out_lineno; /* current output line of previous file */
90 boolean advance_line; /* start_of_input_line from next_char () */
92 u_f;
93 builtin_func *func; /* pointer to macro's function */
98 typedef struct input_block input_block;
101 /* Current input file name. */
102 const char *current_file;
104 /* Current input line number. */
105 int current_line;
107 /* Obstack for storing individual tokens. */
108 static struct obstack token_stack;
110 /* Wrapup input stack. */
111 static struct obstack *wrapup_stack;
113 /* Current stack, from input or wrapup. */
114 static struct obstack *current_input;
116 /* Bottom of token_stack, for obstack_free. */
117 static char *token_bottom;
119 /* Pointer to top of current_input. */
120 static input_block *isp;
122 /* Pointer to top of wrapup_stack. */
123 static input_block *wsp;
125 /* Aux. for handling split push_string (). */
126 static input_block *next;
128 /* Flag for next_char () to increment current_line. */
129 static boolean start_of_input_line;
131 #define CHAR_EOF 256 /* character return on EOF */
132 #define CHAR_MACRO 257 /* character return for MACRO token */
134 /* Quote chars. */
135 STRING rquote;
136 STRING lquote;
138 /* Comment chars. */
139 STRING bcomm;
140 STRING ecomm;
142 #ifdef ENABLE_CHANGEWORD
144 # define DEFAULT_WORD_REGEXP "[_a-zA-Z][_a-zA-Z0-9]*"
146 static char *word_start;
147 static struct re_pattern_buffer word_regexp;
148 static int default_word_regexp;
149 static struct re_registers regs;
151 #else /* ! ENABLE_CHANGEWORD */
152 # define default_word_regexp 1
153 #endif /* ! ENABLE_CHANGEWORD */
155 #ifdef DEBUG_INPUT
156 static const char *token_type_string (token_type);
157 #endif
160 /*-------------------------------------------------------------------.
161 | push_file () pushes an input file on the input stack, saving the |
162 | current file name and line number. If next is non-NULL, this push |
163 | invalidates a call to push_string_init (), whose storage is |
164 | consequently released. If CLOSE, then close FP after EOF is |
165 | detected. |
166 `-------------------------------------------------------------------*/
168 void
169 push_file (FILE *fp, const char *title, boolean close)
171 input_block *i;
173 if (next != NULL)
175 obstack_free (current_input, next);
176 next = NULL;
179 if (debug_level & DEBUG_TRACE_INPUT)
180 DEBUG_MESSAGE1 ("input read from %s", title);
182 i = (input_block *) obstack_alloc (current_input,
183 sizeof (struct input_block));
184 i->type = INPUT_FILE;
186 i->u.u_f.end = FALSE;
187 i->u.u_f.close = close;
188 i->u.u_f.name = current_file;
189 i->u.u_f.lineno = current_line;
190 i->u.u_f.out_lineno = output_current_line;
191 i->u.u_f.advance_line = start_of_input_line;
192 current_file = obstack_copy0 (current_input, title, strlen (title));
193 current_line = 1;
194 output_current_line = -1;
196 i->u.u_f.file = fp;
197 i->prev = isp;
198 isp = i;
201 /*---------------------------------------------------------------.
202 | push_macro () pushes a builtin macro's definition on the input |
203 | stack. If next is non-NULL, this push invalidates a call to |
204 | push_string_init (), whose storage is consequently released. |
205 `---------------------------------------------------------------*/
207 void
208 push_macro (builtin_func *func)
210 input_block *i;
212 if (next != NULL)
214 obstack_free (current_input, next);
215 next = NULL;
218 i = (input_block *) obstack_alloc (current_input,
219 sizeof (struct input_block));
220 i->type = INPUT_MACRO;
222 i->u.func = func;
223 i->prev = isp;
224 isp = i;
227 /*------------------------------------------------------------------.
228 | First half of push_string (). The pointer next points to the new |
229 | input_block. |
230 `------------------------------------------------------------------*/
232 struct obstack *
233 push_string_init (void)
235 if (next != NULL)
237 M4ERROR ((warning_status, 0,
238 "INTERNAL ERROR: recursive push_string!"));
239 abort ();
242 next = (input_block *) obstack_alloc (current_input,
243 sizeof (struct input_block));
244 next->type = INPUT_STRING;
245 return current_input;
248 /*------------------------------------------------------------------------.
249 | Last half of push_string (). If next is now NULL, a call to push_file |
250 | () has invalidated the previous call to push_string_init (), so we just |
251 | give up. If the new object is void, we do not push it. The function |
252 | push_string_finish () returns a pointer to the finished object. This |
253 | pointer is only for temporary use, since reading the next token might |
254 | release the memory used for the object. |
255 `------------------------------------------------------------------------*/
257 const char *
258 push_string_finish (void)
260 const char *ret = NULL;
262 if (next == NULL)
263 return NULL;
265 if (obstack_object_size (current_input) > 0)
267 obstack_1grow (current_input, '\0');
268 next->u.u_s.string = obstack_finish (current_input);
269 next->prev = isp;
270 isp = next;
271 ret = isp->u.u_s.string; /* for immediate use only */
273 else
274 obstack_free (current_input, next); /* people might leave garbage on it. */
275 next = NULL;
276 return ret;
279 /*--------------------------------------------------------------------------.
280 | The function push_wrapup () pushes a string on the wrapup stack. When |
281 | he normal input stack gets empty, the wrapup stack will become the input |
282 | stack, and push_string () and push_file () will operate on wrapup_stack. |
283 | Push_wrapup should be done as push_string (), but this will suffice, as |
284 | long as arguments to m4_m4wrap () are moderate in size. |
285 `--------------------------------------------------------------------------*/
287 void
288 push_wrapup (const char *s)
290 input_block *i;
291 i = (input_block *) obstack_alloc (wrapup_stack,
292 sizeof (struct input_block));
293 i->prev = wsp;
294 i->type = INPUT_STRING;
295 i->u.u_s.string = obstack_copy0 (wrapup_stack, s, strlen (s));
296 wsp = i;
300 /*-------------------------------------------------------------------------.
301 | The function pop_input () pops one level of input sources. If the |
302 | popped input_block is a file, current_file and current_line are reset to |
303 | the saved values before the memory for the input_block are released. |
304 `-------------------------------------------------------------------------*/
306 static void
307 pop_input (void)
309 input_block *tmp = isp->prev;
311 switch (isp->type)
313 case INPUT_STRING:
314 case INPUT_MACRO:
315 break;
317 case INPUT_FILE:
318 if (debug_level & DEBUG_TRACE_INPUT)
320 if (isp->u.u_f.lineno)
321 DEBUG_MESSAGE2 ("input reverted to %s, line %d",
322 isp->u.u_f.name, isp->u.u_f.lineno);
323 else
324 DEBUG_MESSAGE ("input exhausted");
327 if (ferror (isp->u.u_f.file))
329 M4ERROR ((warning_status, 0, "read error"));
330 fclose (isp->u.u_f.file);
331 retcode = EXIT_FAILURE;
333 else if (isp->u.u_f.close && fclose (isp->u.u_f.file) == EOF)
335 M4ERROR ((warning_status, errno, "error reading file"));
336 retcode = EXIT_FAILURE;
338 current_file = isp->u.u_f.name;
339 current_line = isp->u.u_f.lineno;
340 output_current_line = isp->u.u_f.out_lineno;
341 start_of_input_line = isp->u.u_f.advance_line;
342 if (tmp == NULL)
344 /* We have exhausted the current input stack. However,
345 freeing the obstack now is a bad idea, since if we are in
346 the middle of a quote, comment, dnl, or argument
347 collection, there is still a pointer to the former
348 current_file that we must not invalidate until after the
349 warning message has been issued. Setting next to a
350 non-string is safe in this case, because the only place
351 more input could come from is another push_file or
352 pop_wrapup, both of which then free the input_block. */
353 next = isp;
354 isp = NULL;
355 return;
357 output_current_line = -1;
358 break;
360 default:
361 M4ERROR ((warning_status, 0,
362 "INTERNAL ERROR: input stack botch in pop_input ()"));
363 abort ();
365 obstack_free (current_input, isp);
366 next = NULL; /* might be set in push_string_init () */
368 isp = tmp;
371 /*------------------------------------------------------------------------.
372 | To switch input over to the wrapup stack, main () calls pop_wrapup (). |
373 | Since wrapup text can install new wrapup text, pop_wrapup () returns |
374 | FALSE when there is no wrapup text on the stack, and TRUE otherwise. |
375 `------------------------------------------------------------------------*/
377 boolean
378 pop_wrapup (void)
380 next = NULL;
381 obstack_free (current_input, NULL);
382 free (current_input);
384 if (wsp == NULL)
386 obstack_free (wrapup_stack, NULL);
387 free (wrapup_stack);
388 return FALSE;
391 current_input = wrapup_stack;
392 wrapup_stack = (struct obstack *) xmalloc (sizeof (struct obstack));
393 obstack_init (wrapup_stack);
395 isp = wsp;
396 wsp = NULL;
398 return TRUE;
401 /*-------------------------------------------------------------------.
402 | When a MACRO token is seen, next_token () uses init_macro_token () |
403 | to retrieve the value of the function pointer. |
404 `-------------------------------------------------------------------*/
406 static void
407 init_macro_token (token_data *td)
409 if (isp->type != INPUT_MACRO)
411 M4ERROR ((warning_status, 0,
412 "INTERNAL ERROR: bad call to init_macro_token ()"));
413 abort ();
416 TOKEN_DATA_TYPE (td) = TOKEN_FUNC;
417 TOKEN_DATA_FUNC (td) = isp->u.func;
421 /*------------------------------------------------------------------------.
422 | Low level input is done a character at a time. The function peek_input |
423 | () is used to look at the next character in the input stream. At any |
424 | given time, it reads from the input_block on the top of the current |
425 | input stack. |
426 `------------------------------------------------------------------------*/
428 static int
429 peek_input (void)
431 int ch;
433 while (1)
435 if (isp == NULL)
436 return CHAR_EOF;
438 switch (isp->type)
440 case INPUT_STRING:
441 ch = to_uchar (isp->u.u_s.string[0]);
442 if (ch != '\0')
443 return ch;
444 break;
446 case INPUT_FILE:
447 ch = getc (isp->u.u_f.file);
448 if (ch != EOF)
450 ungetc (ch, isp->u.u_f.file);
451 return ch;
453 isp->u.u_f.end = TRUE;
454 break;
456 case INPUT_MACRO:
457 return CHAR_MACRO;
459 default:
460 M4ERROR ((warning_status, 0,
461 "INTERNAL ERROR: input stack botch in peek_input ()"));
462 abort ();
464 /* End of current input source --- pop one level if another
465 level still exists. */
466 if (isp->prev != NULL)
467 pop_input ();
468 else
469 return CHAR_EOF;
473 /*-------------------------------------------------------------------------.
474 | The function next_char () is used to read and advance the input to the |
475 | next character. It also manages line numbers for error messages, so |
476 | they do not get wrong, due to lookahead. The token consisting of a |
477 | newline alone is taken as belonging to the line it ends, and the current |
478 | line number is not incremented until the next character is read. |
479 | 99.9% of all calls will read from a string, so factor that out into a |
480 | macro for speed. |
481 `-------------------------------------------------------------------------*/
483 #define next_char() \
484 (isp && isp->type == INPUT_STRING && isp->u.u_s.string[0] \
485 ? to_uchar (*isp->u.u_s.string++) \
486 : next_char_1 ())
488 static int
489 next_char_1 (void)
491 int ch;
493 if (start_of_input_line)
495 start_of_input_line = FALSE;
496 current_line++;
499 while (1)
501 if (isp == NULL)
502 return CHAR_EOF;
504 switch (isp->type)
506 case INPUT_STRING:
507 ch = to_uchar (*isp->u.u_s.string++);
508 if (ch != '\0')
509 return ch;
510 break;
512 case INPUT_FILE:
513 /* If stdin is a terminal, calling getc after peek_input
514 already called it would make the user have to hit ^D
515 twice to quit. */
516 ch = isp->u.u_f.end ? EOF : getc (isp->u.u_f.file);
517 if (ch != EOF)
519 if (ch == '\n')
520 start_of_input_line = TRUE;
521 return ch;
523 break;
525 case INPUT_MACRO:
526 pop_input (); /* INPUT_MACRO input sources has only one
527 token */
528 return CHAR_MACRO;
530 default:
531 M4ERROR ((warning_status, 0,
532 "INTERNAL ERROR: input stack botch in next_char ()"));
533 abort ();
536 /* End of input source --- pop one level. */
537 pop_input ();
541 /*------------------------------------------------------------------------.
542 | skip_line () simply discards all immediately following characters, upto |
543 | the first newline. It is only used from m4_dnl (). |
544 `------------------------------------------------------------------------*/
546 void
547 skip_line (void)
549 int ch;
550 const char *file = current_file;
551 int line = current_line;
553 while ((ch = next_char ()) != CHAR_EOF && ch != '\n')
555 if (ch == CHAR_EOF)
556 /* current_file changed to "" if we see CHAR_EOF, use the
557 previous value we stored earlier. */
558 M4ERROR_AT_LINE ((warning_status, 0, file, line,
559 "Warning: end of file treated as newline"));
563 /*------------------------------------------------------------------.
564 | This function is for matching a string against a prefix of the |
565 | input stream. If the string matches the input and consume is |
566 | TRUE, the input is discarded; otherwise any characters read are |
567 | pushed back again. The function is used only when multicharacter |
568 | quotes or comment delimiters are used. |
569 `------------------------------------------------------------------*/
571 static boolean
572 match_input (const char *s, boolean consume)
574 int n; /* number of characters matched */
575 int ch; /* input character */
576 const char *t;
577 boolean result = FALSE;
579 ch = peek_input ();
580 if (ch != to_uchar (*s))
581 return FALSE; /* fail */
583 if (s[1] == '\0')
585 if (consume)
586 (void) next_char ();
587 return TRUE; /* short match */
590 (void) next_char ();
591 for (n = 1, t = s++; (ch = peek_input ()) == to_uchar (*s++); )
593 (void) next_char ();
594 n++;
595 if (*s == '\0') /* long match */
597 if (consume)
598 return TRUE;
599 result = TRUE;
600 break;
604 /* Failed or shouldn't consume, push back input. */
606 struct obstack *h = push_string_init ();
608 /* `obstack_grow' may be macro evaluating its arg 1 several times. */
609 obstack_grow (h, t, n);
611 push_string_finish ();
612 return result;
615 /*--------------------------------------------------------------------.
616 | The macro MATCH() is used to match a string S against the input. |
617 | The first character is handled inline, for speed. Hopefully, this |
618 | will not hurt efficiency too much when single character quotes and |
619 | comment delimiters are used. If CONSUME, then CH is the result of |
620 | next_char, and a successful match will discard the matched string. |
621 | Otherwise, CH is the result of peek_char, and the input stream is |
622 | effectively unchanged. |
623 `--------------------------------------------------------------------*/
625 #define MATCH(ch, s, consume) \
626 (to_uchar ((s)[0]) == (ch) \
627 && (ch) != '\0' \
628 && ((s)[1] == '\0' || (match_input ((s) + (consume), consume))))
631 /*----------------------------------------------------------.
632 | Inititialise input stacks, and quote/comment characters. |
633 `----------------------------------------------------------*/
635 void
636 input_init (void)
638 current_file = "";
639 current_line = 0;
641 obstack_init (&token_stack);
643 current_input = (struct obstack *) xmalloc (sizeof (struct obstack));
644 obstack_init (current_input);
645 wrapup_stack = (struct obstack *) xmalloc (sizeof (struct obstack));
646 obstack_init (wrapup_stack);
648 obstack_1grow (&token_stack, '\0');
649 token_bottom = obstack_finish (&token_stack);
651 isp = NULL;
652 wsp = NULL;
653 next = NULL;
655 start_of_input_line = FALSE;
657 lquote.string = xstrdup (DEF_LQUOTE);
658 lquote.length = strlen (lquote.string);
659 rquote.string = xstrdup (DEF_RQUOTE);
660 rquote.length = strlen (rquote.string);
661 bcomm.string = xstrdup (DEF_BCOMM);
662 bcomm.length = strlen (bcomm.string);
663 ecomm.string = xstrdup (DEF_ECOMM);
664 ecomm.length = strlen (ecomm.string);
666 #ifdef ENABLE_CHANGEWORD
667 set_word_regexp (user_word_regexp);
668 #endif
672 /*--------------------------------------------------------------.
673 | Functions for setting quotes and comment delimiters. Used by |
674 | m4_changecom () and m4_changequote (). |
675 `--------------------------------------------------------------*/
677 void
678 set_quotes (const char *lq, const char *rq)
680 free (lquote.string);
681 free (rquote.string);
683 lquote.string = xstrdup (lq ? lq : DEF_LQUOTE);
684 lquote.length = strlen (lquote.string);
685 rquote.string = xstrdup (rq ? rq : DEF_RQUOTE);
686 rquote.length = strlen (rquote.string);
689 void
690 set_comment (const char *bc, const char *ec)
692 free (bcomm.string);
693 free (ecomm.string);
695 bcomm.string = xstrdup (bc ? bc : DEF_BCOMM);
696 bcomm.length = strlen (bcomm.string);
697 ecomm.string = xstrdup (ec ? ec : DEF_ECOMM);
698 ecomm.length = strlen (ecomm.string);
701 #ifdef ENABLE_CHANGEWORD
703 static void
704 init_pattern_buffer (struct re_pattern_buffer *buf)
706 buf->translate = NULL;
707 buf->fastmap = NULL;
708 buf->buffer = NULL;
709 buf->allocated = 0;
712 void
713 set_word_regexp (const char *regexp)
715 int i;
716 char test[2];
717 const char *msg;
718 struct re_pattern_buffer new_word_regexp;
720 if (!*regexp || !strcmp (regexp, DEFAULT_WORD_REGEXP))
722 default_word_regexp = TRUE;
723 return;
726 /* Dry run to see whether the new expression is compilable. */
727 init_pattern_buffer (&new_word_regexp);
728 msg = re_compile_pattern (regexp, strlen (regexp), &new_word_regexp);
729 regfree (&new_word_regexp);
731 if (msg != NULL)
733 M4ERROR ((warning_status, 0,
734 "bad regular expression `%s': %s", regexp, msg));
735 return;
738 /* If compilation worked, retry using the word_regexp struct.
739 Can't rely on struct assigns working, so redo the compilation. */
740 regfree (&word_regexp);
741 msg = re_compile_pattern (regexp, strlen (regexp), &word_regexp);
742 re_set_registers (&word_regexp, &regs, regs.num_regs, regs.start, regs.end);
744 if (msg != NULL)
746 M4ERROR ((EXIT_FAILURE, 0,
747 "INTERNAL ERROR: expression recompilation `%s': %s",
748 regexp, msg));
751 default_word_regexp = FALSE;
753 if (word_start == NULL)
754 word_start = xmalloc (256);
756 word_start[0] = '\0';
757 test[1] = '\0';
758 for (i = 1; i < 256; i++)
760 test[0] = i;
761 word_start[i] = re_search (&word_regexp, test, 1, 0, 0, NULL) >= 0;
765 #endif /* ENABLE_CHANGEWORD */
768 /*-------------------------------------------------------------------------.
769 | Parse and return a single token from the input stream. A token can |
770 | either be TOKEN_EOF, if the input_stack is empty; it can be TOKEN_STRING |
771 | for a quoted string; TOKEN_WORD for something that is a potential macro |
772 | name; and TOKEN_SIMPLE for any single character that is not a part of |
773 | any of the previous types. |
775 | Next_token () return the token type, and passes back a pointer to the |
776 | token data through TD. The token text is collected on the obstack |
777 | token_stack, which never contains more than one token text at a time. |
778 | The storage pointed to by the fields in TD is therefore subject to |
779 | change the next time next_token () is called. |
780 `-------------------------------------------------------------------------*/
782 token_type
783 next_token (token_data *td)
785 int ch;
786 int quote_level;
787 token_type type;
788 #ifdef ENABLE_CHANGEWORD
789 int startpos;
790 char *orig_text = 0;
791 #endif
792 const char *file = current_file;
793 int line = current_line;
795 obstack_free (&token_stack, token_bottom);
796 obstack_1grow (&token_stack, '\0');
797 token_bottom = obstack_finish (&token_stack);
799 /* Can't consume character until after CHAR_MACRO is handled. */
800 ch = peek_input ();
801 if (ch == CHAR_EOF)
803 #ifdef DEBUG_INPUT
804 fprintf (stderr, "next_token -> EOF\n");
805 #endif
806 next_char ();
807 return TOKEN_EOF;
809 if (ch == CHAR_MACRO)
811 init_macro_token (td);
812 next_char ();
813 #ifdef DEBUG_INPUT
814 fprintf (stderr, "next_token -> MACDEF (%s)\n",
815 find_builtin_by_addr (TOKEN_DATA_FUNC (td))->name);
816 #endif
817 return TOKEN_MACDEF;
820 next_char (); /* Consume character we already peeked at. */
821 if (MATCH (ch, bcomm.string, TRUE))
823 obstack_grow (&token_stack, bcomm.string, bcomm.length);
824 while ((ch = next_char ()) != CHAR_EOF
825 && !MATCH (ch, ecomm.string, TRUE))
826 obstack_1grow (&token_stack, ch);
827 if (ch != CHAR_EOF)
828 obstack_grow (&token_stack, ecomm.string, ecomm.length);
829 else
830 /* current_file changed to "" if we see CHAR_EOF, use the
831 previous value we stored earlier. */
832 M4ERROR_AT_LINE ((EXIT_FAILURE, 0, file, line,
833 "ERROR: end of file in comment"));
835 type = TOKEN_STRING;
837 else if (default_word_regexp && (isalpha (ch) || ch == '_'))
839 obstack_1grow (&token_stack, ch);
840 while ((ch = peek_input ()) != CHAR_EOF && (isalnum (ch) || ch == '_'))
842 obstack_1grow (&token_stack, ch);
843 (void) next_char ();
845 type = TOKEN_WORD;
848 #ifdef ENABLE_CHANGEWORD
850 else if (!default_word_regexp && word_start[ch])
852 obstack_1grow (&token_stack, ch);
853 while (1)
855 ch = peek_input ();
856 if (ch == CHAR_EOF)
857 break;
858 obstack_1grow (&token_stack, ch);
859 startpos = re_search (&word_regexp, obstack_base (&token_stack),
860 obstack_object_size (&token_stack), 0, 0,
861 &regs);
862 if (startpos != 0 ||
863 regs.end [0] != obstack_object_size (&token_stack))
865 *(((char *) obstack_base (&token_stack)
866 + obstack_object_size (&token_stack)) - 1) = '\0';
867 break;
869 next_char ();
872 obstack_1grow (&token_stack, '\0');
873 orig_text = obstack_finish (&token_stack);
875 if (regs.start[1] != -1)
876 obstack_grow (&token_stack,orig_text + regs.start[1],
877 regs.end[1] - regs.start[1]);
878 else
879 obstack_grow (&token_stack, orig_text,regs.end[0]);
881 type = TOKEN_WORD;
884 #endif /* ENABLE_CHANGEWORD */
886 else if (!MATCH (ch, lquote.string, TRUE))
888 switch (ch)
890 case '(':
891 type = TOKEN_OPEN;
892 break;
893 case ',':
894 type = TOKEN_COMMA;
895 break;
896 case ')':
897 type = TOKEN_CLOSE;
898 break;
899 default:
900 type = TOKEN_SIMPLE;
901 break;
903 obstack_1grow (&token_stack, ch);
905 else
907 quote_level = 1;
908 while (1)
910 ch = next_char ();
911 if (ch == CHAR_EOF)
912 /* current_file changed to "" if we see CHAR_EOF, use
913 the previous value we stored earlier. */
914 M4ERROR_AT_LINE ((EXIT_FAILURE, 0, file, line,
915 "ERROR: end of file in string"));
917 if (MATCH (ch, rquote.string, TRUE))
919 if (--quote_level == 0)
920 break;
921 obstack_grow (&token_stack, rquote.string, rquote.length);
923 else if (MATCH (ch, lquote.string, TRUE))
925 quote_level++;
926 obstack_grow (&token_stack, lquote.string, lquote.length);
928 else
929 obstack_1grow (&token_stack, ch);
931 type = TOKEN_STRING;
934 obstack_1grow (&token_stack, '\0');
936 TOKEN_DATA_TYPE (td) = TOKEN_TEXT;
937 TOKEN_DATA_TEXT (td) = obstack_finish (&token_stack);
938 #ifdef ENABLE_CHANGEWORD
939 if (orig_text == NULL)
940 orig_text = TOKEN_DATA_TEXT (td);
941 TOKEN_DATA_ORIG_TEXT (td) = orig_text;
942 #endif
943 #ifdef DEBUG_INPUT
944 fprintf (stderr, "next_token -> %s (%s)\n",
945 token_type_string (type), TOKEN_DATA_TEXT (td));
946 #endif
947 return type;
950 /*-----------------------------------------------.
951 | Peek at the next token from the input stream. |
952 `-----------------------------------------------*/
954 token_type
955 peek_token (void)
957 int ch = peek_input ();
959 if (ch == CHAR_EOF)
961 #ifdef DEBUG_INPUT
962 fprintf (stderr, "peek_token -> EOF\n");
963 #endif
964 return TOKEN_EOF;
966 if (ch == CHAR_MACRO)
968 #ifdef DEBUG_INPUT
969 fprintf (stderr, "peek_token -> MACDEF\n");
970 #endif
971 return TOKEN_MACDEF;
974 if (MATCH (ch, bcomm.string, FALSE))
976 #ifdef DEBUG_INPUT
977 fprintf (stderr, "peek_token -> COMMENT\n");
978 #endif
979 return TOKEN_STRING;
982 if ((default_word_regexp && (isalpha (ch) || ch == '_'))
983 #ifdef ENABLE_CHANGEWORD
984 || (! default_word_regexp && word_start[ch])
985 #endif /* ENABLE_CHANGEWORD */
988 #ifdef DEBUG_INPUT
989 fprintf (stderr, "peek_token -> WORD\n");
990 #endif
991 return TOKEN_WORD;
994 if (MATCH (ch, lquote.string, FALSE))
996 #ifdef DEBUG_INPUT
997 fprintf (stderr, "peek_token -> QUOTE\n");
998 #endif
999 return TOKEN_STRING;
1002 switch (ch)
1004 case '(':
1005 #ifdef DEBUG_INPUT
1006 fprintf (stderr, "peek_token -> OPEN\n");
1007 #endif
1008 return TOKEN_OPEN;
1009 case ',':
1010 #ifdef DEBUG_INPUT
1011 fprintf (stderr, "peek_token -> COMMA\n");
1012 #endif
1013 return TOKEN_COMMA;
1014 case ')':
1015 #ifdef DEBUG_INPUT
1016 fprintf (stderr, "peek_token -> CLOSE\n");
1017 #endif
1018 return TOKEN_CLOSE;
1019 default:
1020 #ifdef DEBUG_INPUT
1021 fprintf (stderr, "peek_token -> SIMPLE\n");
1022 #endif
1023 return TOKEN_SIMPLE;
1028 #ifdef DEBUG_INPUT
1030 static const char *
1031 token_type_string (token_type t)
1033 switch (t)
1034 { /* TOKSW */
1035 case TOKEN_EOF:
1036 return "EOF";
1037 case TOKEN_STRING:
1038 return "STRING";
1039 case TOKEN_WORD:
1040 return "WORD";
1041 case TOKEN_OPEN:
1042 return "OPEN";
1043 case TOKEN_COMMA:
1044 return "COMMA";
1045 case TOKEN_CLOSE:
1046 return "CLOSE";
1047 case TOKEN_SIMPLE:
1048 return "SIMPLE";
1049 case TOKEN_MACDEF:
1050 return "MACDEF";
1051 default:
1052 abort ();
1056 static void
1057 print_token (const char *s, token_type t, token_data *td)
1059 fprintf (stderr, "%s: ", s);
1060 switch (t)
1061 { /* TOKSW */
1062 case TOKEN_OPEN:
1063 case TOKEN_COMMA:
1064 case TOKEN_CLOSE:
1065 case TOKEN_SIMPLE:
1066 fprintf (stderr, "char:");
1067 break;
1069 case TOKEN_WORD:
1070 fprintf (stderr, "word:");
1071 break;
1073 case TOKEN_STRING:
1074 fprintf (stderr, "string:");
1075 break;
1077 case TOKEN_MACDEF:
1078 fprintf (stderr, "macro: %p\n", TOKEN_DATA_FUNC (td));
1079 break;
1081 case TOKEN_EOF:
1082 fprintf (stderr, "eof\n");
1083 break;
1085 fprintf (stderr, "\t\"%s\"\n", TOKEN_DATA_TEXT (td));
1088 static void M4_GNUC_UNUSED
1089 lex_debug (void)
1091 token_type t;
1092 token_data td;
1094 while ((t = next_token (&td)) != TOKEN_EOF)
1095 print_token ("lex", t, &td);
1097 #endif