* cpplex.c: Update comments.
[official-gcc.git] / gcc / cpplex.c
blobfe337c0a852bf72b4e44a45fbb2c56e4e665c62e
1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
12 later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
25 Cleanups to do:-
27 o -dM and with _cpp_dump_list: too many \n output.
28 o Put a printer object in cpp_reader?
29 o Check line numbers assigned to all errors.
30 o Replace strncmp with memcmp almost everywhere.
31 o lex_line's use of cur_token, flags and list->token_used is a bit opaque.
32 o Convert do_ functions to return void. Kaveh thinks its OK; and said he'll
33 give it a run when we've got some code.
34 o Distinguish integers, floats, and 'other' pp-numbers.
35 o Store ints and char constants as binary values.
36 o New command-line assertion syntax.
37 o Work towards functions in cpperror.c taking a message level parameter.
38 If we do this, merge the common code of do_warning and do_error.
39 o Comment all functions, and describe macro expansion algorithm.
40 o Move as much out of header files as possible.
41 o Remove single quote pairs `', and some '', from diagnostics.
42 o Correct pastability test for CPP_NAME and CPP_NUMBER.
46 #include "config.h"
47 #include "system.h"
48 #include "intl.h"
49 #include "cpplib.h"
50 #include "cpphash.h"
51 #include "symcat.h"
53 static const cpp_token placemarker_token = {0, 0, CPP_PLACEMARKER, 0 UNION_INIT_ZERO};
54 static const cpp_token eof_token = {0, 0, CPP_EOF, 0 UNION_INIT_ZERO};
56 /* Flags for cpp_context. */
57 #define CONTEXT_PASTEL (1 << 0) /* An argument context on LHS of ##. */
58 #define CONTEXT_PASTER (1 << 1) /* An argument context on RHS of ##. */
59 #define CONTEXT_RAW (1 << 2) /* If argument tokens already expanded. */
60 #define CONTEXT_ARG (1 << 3) /* If an argument context. */
62 typedef struct cpp_context cpp_context;
63 struct cpp_context
65 union
67 const cpp_toklist *list; /* Used for macro contexts only. */
68 const cpp_token **arg; /* Used for arg contexts only. */
69 } u;
71 /* Pushed token to be returned by next call to get_raw_token. */
72 const cpp_token *pushed_token;
74 struct macro_args *args; /* The arguments for a function-like
75 macro. NULL otherwise. */
76 unsigned short posn; /* Current posn, index into u. */
77 unsigned short count; /* No. of tokens in u. */
78 unsigned short level;
79 unsigned char flags;
82 typedef struct macro_args macro_args;
83 struct macro_args
85 unsigned int *ends;
86 const cpp_token **tokens;
87 unsigned int capacity;
88 unsigned int used;
89 unsigned short level;
92 static const cpp_token *get_raw_token PARAMS ((cpp_reader *));
93 static const cpp_token *parse_arg PARAMS ((cpp_reader *, int, unsigned int,
94 macro_args *, unsigned int *));
95 static int parse_args PARAMS ((cpp_reader *, cpp_hashnode *, macro_args *));
96 static void save_token PARAMS ((macro_args *, const cpp_token *));
97 static int pop_context PARAMS ((cpp_reader *));
98 static int push_macro_context PARAMS ((cpp_reader *, const cpp_token *));
99 static void push_arg_context PARAMS ((cpp_reader *, const cpp_token *));
100 static void free_macro_args PARAMS ((macro_args *));
102 #define auto_expand_name_space(list) \
103 _cpp_expand_name_space ((list), 1 + (list)->name_cap / 2)
104 static void safe_fwrite PARAMS ((cpp_reader *, const U_CHAR *,
105 size_t, FILE *));
106 static void dump_param_spelling PARAMS ((cpp_reader *, const cpp_toklist *,
107 unsigned int));
108 static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
109 unsigned int));
111 static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
112 static unsigned char *trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
113 unsigned char *));
114 static const unsigned char *backslash_start PARAMS ((cpp_reader *,
115 const unsigned char *));
116 static int skip_block_comment PARAMS ((cpp_reader *));
117 static int skip_line_comment PARAMS ((cpp_reader *));
118 static void adjust_column PARAMS ((cpp_reader *, const U_CHAR *));
119 static void skip_whitespace PARAMS ((cpp_reader *, int));
120 static const U_CHAR *parse_name PARAMS ((cpp_reader *, cpp_token *,
121 const U_CHAR *, const U_CHAR *));
122 static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_string *));
123 static void parse_string PARAMS ((cpp_reader *, cpp_toklist *, cpp_token *,
124 unsigned int));
125 static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
126 static void save_comment PARAMS ((cpp_toklist *, cpp_token *,
127 const unsigned char *,
128 unsigned int, unsigned int));
129 static void lex_line PARAMS ((cpp_reader *, cpp_toklist *));
130 static int lex_next PARAMS ((cpp_reader *, int));
131 static int is_macro_disabled PARAMS ((cpp_reader *, const cpp_toklist *,
132 const cpp_token *));
134 static cpp_token *stringify_arg PARAMS ((cpp_reader *, const cpp_token *));
135 static void expand_context_stack PARAMS ((cpp_reader *));
136 static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
137 unsigned char *));
138 static void output_token PARAMS ((cpp_reader *, const cpp_token *,
139 const cpp_token *));
140 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
141 cpp_token *));
142 static cpp_token *make_string_token PARAMS ((cpp_token *, const U_CHAR *,
143 unsigned int));
144 static cpp_token *alloc_number_token PARAMS ((cpp_reader *, int number));
145 static const cpp_token *special_symbol PARAMS ((cpp_reader *, cpp_hashnode *,
146 const cpp_token *));
147 static cpp_token *duplicate_token PARAMS ((cpp_reader *, const cpp_token *));
148 static const cpp_token *maybe_paste_with_next PARAMS ((cpp_reader *,
149 const cpp_token *));
150 static enum cpp_ttype can_paste PARAMS ((cpp_reader *, const cpp_token *,
151 const cpp_token *, int *));
152 static unsigned int prevent_macro_expansion PARAMS ((cpp_reader *));
153 static void restore_macro_expansion PARAMS ((cpp_reader *, unsigned int));
154 static cpp_token *get_temp_token PARAMS ((cpp_reader *));
155 static void release_temp_tokens PARAMS ((cpp_reader *));
156 static U_CHAR * quote_string PARAMS ((U_CHAR *, const U_CHAR *, unsigned int));
157 static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
159 #define INIT_TOKEN_STR(list, token) \
160 do {(token)->val.str.len = 0; \
161 (token)->val.str.text = (list)->namebuf + (list)->name_used; \
162 } while (0)
164 #define VALID_SIGN(c, prevc) \
165 (((c) == '+' || (c) == '-') && \
166 ((prevc) == 'e' || (prevc) == 'E' \
167 || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
169 /* Handle LF, CR, CR-LF and LF-CR style newlines. Assumes next
170 character, if any, is in buffer. */
172 #define handle_newline(cur, limit, c) \
173 do { \
174 if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
175 (cur)++; \
176 pfile->buffer->lineno++; \
177 pfile->buffer->line_base = (cur); \
178 pfile->col_adjust = 0; \
179 } while (0)
181 #define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITE))
182 #define PREV_TOKEN_TYPE (cur_token[-1].type)
184 #define PUSH_TOKEN(ttype) cur_token++->type = (ttype)
185 #define REVISE_TOKEN(ttype) cur_token[-1].type = (ttype)
186 #define BACKUP_TOKEN(ttype) (--cur_token)->type = (ttype)
187 #define BACKUP_DIGRAPH(ttype) do { \
188 BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
190 /* An upper bound on the number of bytes needed to spell a token,
191 including preceding whitespace. */
192 #define TOKEN_LEN(token) (5 + (TOKEN_SPELL(token) == SPELL_STRING \
193 ? (token)->val.str.len \
194 : (TOKEN_SPELL(token) == SPELL_IDENT \
195 ? (token)->val.node->length \
196 : 0)))
198 #define IS_ARG_CONTEXT(c) ((c)->flags & CONTEXT_ARG)
199 #define CURRENT_CONTEXT(pfile) ((pfile)->contexts + (pfile)->cur_context)
201 #define ASSIGN_FLAGS_AND_POS(d, s) \
202 do {(d)->flags = (s)->flags & (PREV_WHITE | BOL | PASTE_LEFT); \
203 if ((d)->flags & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
204 } while (0)
206 /* f is flags, just consisting of PREV_WHITE | BOL. */
207 #define MODIFY_FLAGS_AND_POS(d, s, f) \
208 do {(d)->flags &= ~(PREV_WHITE | BOL); (d)->flags |= (f); \
209 if ((f) & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
210 } while (0)
212 #define T(e, s) {SPELL_OPERATOR, (const U_CHAR *) s},
213 #define I(e, s) {SPELL_IDENT, s},
214 #define S(e, s) {SPELL_STRING, s},
215 #define C(e, s) {SPELL_CHAR, s},
216 #define N(e, s) {SPELL_NONE, s},
218 const struct token_spelling
219 token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
221 #undef T
222 #undef I
223 #undef S
224 #undef C
225 #undef N
227 /* For debugging: the internal names of the tokens. */
228 #define T(e, s) U STRINGX(e),
229 #define I(e, s) U STRINGX(e),
230 #define S(e, s) U STRINGX(e),
231 #define C(e, s) U STRINGX(e),
232 #define N(e, s) U STRINGX(e),
234 const U_CHAR *const token_names[N_TTYPES] = { TTYPE_TABLE };
236 #undef T
237 #undef I
238 #undef S
239 #undef C
240 #undef N
242 /* The following table is used by trigraph_ok/trigraph_replace. If we
243 have designated initializers, it can be constant data; otherwise,
244 it is set up at runtime by _cpp_init_input_buffer. */
246 #if (GCC_VERSION >= 2007)
247 #define init_trigraph_map() /* nothing */
248 #define TRIGRAPH_MAP \
249 __extension__ static const U_CHAR trigraph_map[UCHAR_MAX + 1] = {
250 #define END };
251 #define s(p, v) [p] = v,
252 #else
253 #define TRIGRAPH_MAP static U_CHAR trigraph_map[UCHAR_MAX + 1] = { 0 }; \
254 static void init_trigraph_map PARAMS ((void)) { \
255 unsigned char *x = trigraph_map;
256 #define END }
257 #define s(p, v) x[p] = v;
258 #endif
260 TRIGRAPH_MAP
261 s('=', '#') s(')', ']') s('!', '|')
262 s('(', '[') s('\'', '^') s('>', '}')
263 s('/', '\\') s('<', '{') s('-', '~')
266 #undef TRIGRAPH_MAP
267 #undef END
268 #undef s
270 /* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
272 void
273 _cpp_grow_token_buffer (pfile, n)
274 cpp_reader *pfile;
275 long n;
277 long old_written = CPP_WRITTEN (pfile);
278 pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
279 pfile->token_buffer = (U_CHAR *)
280 xrealloc(pfile->token_buffer, pfile->token_buffer_size);
281 CPP_SET_WRITTEN (pfile, old_written);
284 /* Deal with the annoying semantics of fwrite. */
285 static void
286 safe_fwrite (pfile, buf, len, fp)
287 cpp_reader *pfile;
288 const U_CHAR *buf;
289 size_t len;
290 FILE *fp;
292 size_t count;
294 while (len)
296 count = fwrite (buf, 1, len, fp);
297 if (count == 0)
298 goto error;
299 len -= count;
300 buf += count;
302 return;
304 error:
305 cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
308 /* Notify the compiler proper that the current line number has jumped,
309 or the current file name has changed. */
311 static void
312 output_line_command (pfile, print, line)
313 cpp_reader *pfile;
314 cpp_printer *print;
315 unsigned int line;
317 cpp_buffer *ip = CPP_BUFFER (pfile);
318 enum { same = 0, enter, leave, rname } change;
319 static const char * const codes[] = { "", " 1", " 2", "" };
321 if (line == 0)
322 return;
324 /* End the previous line of text. */
325 if (pfile->need_newline)
326 putc ('\n', print->outf);
327 pfile->need_newline = 0;
329 if (CPP_OPTION (pfile, no_line_commands))
330 return;
332 /* If ip is null, we've been called from cpp_finish, and they just
333 needed the final flush and trailing newline. */
334 if (!ip)
335 return;
337 if (pfile->include_depth == print->last_id)
339 /* Determine whether the current filename has changed, and if so,
340 how. 'nominal_fname' values are unique, so they can be compared
341 by comparing pointers. */
342 if (ip->nominal_fname == print->last_fname)
343 change = same;
344 else
345 change = rname;
347 else
349 if (pfile->include_depth > print->last_id)
350 change = enter;
351 else
352 change = leave;
353 print->last_id = pfile->include_depth;
355 print->last_fname = ip->nominal_fname;
357 /* If the current file has not changed, we can output a few newlines
358 instead if we want to increase the line number by a small amount.
359 We cannot do this if print->lineno is zero, because that means we
360 haven't output any line commands yet. (The very first line
361 command output is a `same_file' command.) */
362 if (change == same && print->lineno > 0
363 && line >= print->lineno && line < print->lineno + 8)
365 while (line > print->lineno)
367 putc ('\n', print->outf);
368 print->lineno++;
370 return;
373 #ifndef NO_IMPLICIT_EXTERN_C
374 if (CPP_OPTION (pfile, cplusplus))
375 fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
376 codes[change],
377 ip->inc->sysp ? " 3" : "",
378 (ip->inc->sysp == 2) ? " 4" : "");
379 else
380 #endif
381 fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
382 codes[change],
383 ip->inc->sysp ? " 3" : "");
384 print->lineno = line;
387 /* Write the contents of the token_buffer to the output stream, and
388 clear the token_buffer. Also handles generating line commands and
389 keeping track of file transitions. */
391 void
392 cpp_output_tokens (pfile, print, line)
393 cpp_reader *pfile;
394 cpp_printer *print;
395 unsigned int line;
397 if (CPP_WRITTEN (pfile) - print->written)
399 safe_fwrite (pfile, pfile->token_buffer,
400 CPP_WRITTEN (pfile) - print->written, print->outf);
401 pfile->need_newline = 1;
402 if (print->lineno)
403 print->lineno++;
405 CPP_SET_WRITTEN (pfile, print->written);
407 output_line_command (pfile, print, line);
410 /* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output. */
412 void
413 cpp_scan_buffer_nooutput (pfile)
414 cpp_reader *pfile;
416 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
417 const cpp_token *token;
419 /* In no-output mode, we can ignore everything but directives. */
420 for (;;)
422 token = _cpp_get_token (pfile);
424 if (token->type == CPP_EOF)
426 cpp_pop_buffer (pfile);
427 if (CPP_BUFFER (pfile) == stop)
428 break;
431 if (token->type == CPP_HASH && token->flags & BOL
432 && pfile->token_list.directive)
434 process_directive (pfile, token);
435 continue;
438 _cpp_skip_rest_of_line (pfile);
442 /* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT. */
443 void
444 cpp_scan_buffer (pfile, print)
445 cpp_reader *pfile;
446 cpp_printer *print;
448 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
449 const cpp_token *token, *prev = 0;
451 for (;;)
453 token = _cpp_get_token (pfile);
454 if (token->type == CPP_EOF)
456 cpp_pop_buffer (pfile);
457 if (CPP_BUFFER (pfile) == stop)
458 return;
460 cpp_output_tokens (pfile, print, CPP_BUF_LINE (CPP_BUFFER (pfile)));
461 prev = 0;
462 continue;
465 if (token->flags & BOL)
467 if (token->type == CPP_HASH && pfile->token_list.directive)
469 process_directive (pfile, token);
470 continue;
473 cpp_output_tokens (pfile, print, pfile->token_list.line);
474 prev = 0;
477 if (token->type != CPP_PLACEMARKER)
478 output_token (pfile, token, prev);
480 prev = token;
484 /* Scan a single line of the input into the token_buffer. */
486 cpp_scan_line (pfile)
487 cpp_reader *pfile;
489 const cpp_token *token, *prev = 0;
491 if (pfile->buffer == NULL)
492 return 0;
496 token = cpp_get_token (pfile);
497 if (token->type == CPP_EOF)
499 cpp_pop_buffer (pfile);
500 break;
503 /* If the last token on a line results from a macro expansion,
504 the check below will fail to stop us from proceeding to the
505 next line - so make sure we stick in a newline, at least. */
506 if (token->flags & BOL)
507 CPP_PUTC (pfile, '\n');
509 output_token (pfile, token, prev);
510 prev = token;
512 while (pfile->cur_context > 0
513 || pfile->contexts[0].posn < pfile->contexts[0].count);
514 return 1;
517 /* Helper routine used by parse_include, which can't see spell_token.
518 Reinterpret the current line as an h-char-sequence (< ... >); we are
519 looking at the first token after the <. */
520 const cpp_token *
521 _cpp_glue_header_name (pfile)
522 cpp_reader *pfile;
524 unsigned int written = CPP_WRITTEN (pfile);
525 const cpp_token *t;
526 cpp_token *hdr;
527 U_CHAR *buf;
528 size_t len;
530 for (;;)
532 t = _cpp_get_token (pfile);
533 if (t->type == CPP_GREATER || t->type == CPP_EOF)
534 break;
536 CPP_RESERVE (pfile, TOKEN_LEN (t));
537 if (t->flags & PREV_WHITE)
538 CPP_PUTC_Q (pfile, ' ');
539 pfile->limit = spell_token (pfile, t, pfile->limit);
542 if (t->type == CPP_EOF)
543 cpp_error (pfile, "missing terminating > character");
545 len = CPP_WRITTEN (pfile) - written;
546 buf = xmalloc (len);
547 memcpy (buf, pfile->token_buffer + written, len);
548 CPP_SET_WRITTEN (pfile, written);
550 hdr = get_temp_token (pfile);
551 hdr->type = CPP_HEADER_NAME;
552 hdr->flags = 0;
553 hdr->val.str.text = buf;
554 hdr->val.str.len = len;
555 return hdr;
558 /* Token-buffer helper functions. */
560 /* Expand a token list's string space. It is *vital* that
561 list->tokens_used is correct, to get pointer fix-up right. */
562 void
563 _cpp_expand_name_space (list, len)
564 cpp_toklist *list;
565 unsigned int len;
567 const U_CHAR *old_namebuf;
569 old_namebuf = list->namebuf;
570 list->name_cap += len;
571 list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
573 /* Fix up token text pointers. */
574 if (list->namebuf != old_namebuf)
576 unsigned int i;
578 for (i = 0; i < list->tokens_used; i++)
579 if (token_spellings[list->tokens[i].type].type == SPELL_STRING)
580 list->tokens[i].val.str.text += (list->namebuf - old_namebuf);
584 /* If there is not enough room for LEN more characters, expand the
585 list by just enough to have room for LEN characters. */
586 void
587 _cpp_reserve_name_space (list, len)
588 cpp_toklist *list;
589 unsigned int len;
591 unsigned int room = list->name_cap - list->name_used;
593 if (room < len)
594 _cpp_expand_name_space (list, len - room);
597 /* Expand the number of tokens in a list. */
598 void
599 _cpp_expand_token_space (list, count)
600 cpp_toklist *list;
601 unsigned int count;
603 unsigned int n;
605 list->tokens_cap += count;
606 n = list->tokens_cap;
607 if (list->flags & LIST_OFFSET)
608 list->tokens--, n++;
609 list->tokens = (cpp_token *)
610 xrealloc (list->tokens, n * sizeof (cpp_token));
611 if (list->flags & LIST_OFFSET)
612 list->tokens++; /* Skip the dummy. */
615 /* Initialize a token list. If flags is DUMMY_TOKEN, we allocate
616 an extra token in front of the token list, as this allows the lexer
617 to always peek at the previous token without worrying about
618 underflowing the list, and some initial space. Otherwise, no
619 token- or name-space is allocated, and there is no dummy token. */
620 void
621 _cpp_init_toklist (list, flags)
622 cpp_toklist *list;
623 int flags;
625 if (flags == NO_DUMMY_TOKEN)
627 list->tokens_cap = 0;
628 list->tokens = 0;
629 list->name_cap = 0;
630 list->namebuf = 0;
631 list->flags = 0;
633 else
635 /* Initialize token space. Put a dummy token before the start
636 that will fail matches. */
637 list->tokens_cap = 256; /* 4K's worth. */
638 list->tokens = (cpp_token *)
639 xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
640 list->tokens[0].type = CPP_EOF;
641 list->tokens++;
643 /* Initialize name space. */
644 list->name_cap = 1024;
645 list->namebuf = (unsigned char *) xmalloc (list->name_cap);
646 list->flags = LIST_OFFSET;
649 _cpp_clear_toklist (list);
652 /* Clear a token list. */
653 void
654 _cpp_clear_toklist (list)
655 cpp_toklist *list;
657 list->tokens_used = 0;
658 list->name_used = 0;
659 list->directive = 0;
660 list->paramc = 0;
661 list->params_len = 0;
662 list->flags &= LIST_OFFSET; /* clear all but that one */
665 /* Free a token list. Does not free the list itself, which may be
666 embedded in a larger structure. */
667 void
668 _cpp_free_toklist (list)
669 const cpp_toklist *list;
671 if (list->flags & LIST_OFFSET)
672 free (list->tokens - 1); /* Backup over dummy token. */
673 else
674 free (list->tokens);
675 free (list->namebuf);
678 /* Compare two tokens. */
680 _cpp_equiv_tokens (a, b)
681 const cpp_token *a, *b;
683 if (a->type == b->type && a->flags == b->flags)
684 switch (token_spellings[a->type].type)
686 default: /* Keep compiler happy. */
687 case SPELL_OPERATOR:
688 return 1;
689 case SPELL_CHAR:
690 case SPELL_NONE:
691 return a->val.aux == b->val.aux; /* arg_no or character. */
692 case SPELL_IDENT:
693 return a->val.node == b->val.node;
694 case SPELL_STRING:
695 return (a->val.str.len == b->val.str.len
696 && !memcmp (a->val.str.text, b->val.str.text,
697 a->val.str.len));
700 return 0;
703 /* Compare two token lists. */
705 _cpp_equiv_toklists (a, b)
706 const cpp_toklist *a, *b;
708 unsigned int i;
710 if (a->tokens_used != b->tokens_used
711 || a->flags != b->flags
712 || a->paramc != b->paramc)
713 return 0;
715 for (i = 0; i < a->tokens_used; i++)
716 if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
717 return 0;
718 return 1;
721 /* Utility routine:
723 Compares, the token TOKEN to the NUL-terminated string STRING.
724 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
727 cpp_ideq (token, string)
728 const cpp_token *token;
729 const char *string;
731 if (token->type != CPP_NAME)
732 return 0;
734 return !ustrcmp (token->val.node->name, (const U_CHAR *)string);
737 /* Lexing algorithm.
739 The original lexer in cpplib was made up of two passes: a first pass
740 that replaced trigraphs and deleted esacped newlines, and a second
741 pass that tokenized the result of the first pass. Tokenisation was
742 performed by peeking at the next character in the input stream. For
743 example, if the input stream contained "!=", the handler for the !
744 character would peek at the next character, and if it were a '='
745 would skip over it, and return a "!=" token, otherwise it would
746 return just the "!" token.
748 To implement a single-pass lexer, this peeking ahead is unworkable.
749 An arbitrary number of escaped newlines, and trigraphs (in particular
750 ??/ which translates to the escape \), could separate the '!' and '='
751 in the input stream, yet the next token is still a "!=".
753 Suppose instead that we lex by one logical line at a time, producing
754 a token list or stack for each logical line, and when seeing the '!'
755 push a CPP_NOT token on the list. Then if the '!' is part of a
756 longer token ("!=") we know we must see the remainder of the token by
757 the time we reach the end of the logical line. Thus we can have the
758 '=' handler look at the previous token (at the end of the list / top
759 of the stack) and see if it is a "!" token, and if so, instead of
760 pushing a "=" token revise the existing token to be a "!=" token.
762 This works in the presence of escaped newlines, because the '\' would
763 have been pushed on the top of the stack as a CPP_BACKSLASH. The
764 newline ('\n' or '\r') handler looks at the token at the top of the
765 stack to see if it is a CPP_BACKSLASH, and if so discards both.
766 Hence the '=' handler would never see any intervening tokens.
768 To make trigraphs work in this context, as in precedence trigraphs
769 are highest and converted before anything else, the '?' handler does
770 lookahead to see if it is a trigraph, and if so skips the trigraph
771 and pushes the token it represents onto the top of the stack. This
772 also works in the particular case of a CPP_BACKSLASH trigraph.
774 To the preprocessor, whitespace is only significant to the point of
775 knowing whether whitespace precedes a particular token. For example,
776 the '=' handler needs to know whether there was whitespace between it
777 and a "!" token on the top of the stack, to make the token conversion
778 decision correctly. So each token has a PREV_WHITE flag to
779 indicate this - the standard permits consecutive whitespace to be
780 regarded as a single space. The compiler front ends are not
781 interested in whitespace at all; they just require a token stream.
782 Another place where whitespace is significant to the preprocessor is
783 a #define statment - if there is whitespace between the macro name
784 and an initial "(" token the macro is "object-like", otherwise it is
785 a function-like macro that takes arguments.
787 However, all is not rosy. Parsing of identifiers, numbers, comments
788 and strings becomes trickier because of the possibility of raw
789 trigraphs and escaped newlines in the input stream.
791 The trigraphs are three consecutive characters beginning with two
792 question marks. A question mark is not valid as part of a number or
793 identifier, so parsing of a number or identifier terminates normally
794 upon reaching it, returning to the mainloop which handles the
795 trigraph just like it would in any other position. Similarly for the
796 backslash of a backslash-newline combination. So we just need the
797 escaped-newline dropper in the mainloop to check if the token on the
798 top of the stack after dropping the escaped newline is a number or
799 identifier, and if so to continue the processing it as if nothing had
800 happened.
802 For strings, we replace trigraphs whenever we reach a quote or
803 newline, because there might be a backslash trigraph escaping them.
804 We need to be careful that we start trigraph replacing from where we
805 left off previously, because it is possible for a first scan to leave
806 "fake" trigraphs that a second scan would pick up as real (e.g. the
807 sequence "????/\n=" would find a fake ??= trigraph after removing the
808 escaped newline.)
810 For line comments, on reaching a newline we scan the previous
811 character(s) to see if it escaped, and continue if it is. Block
812 comments ignore everything and just focus on finding the comment
813 termination mark. The only difficult thing, and it is surprisingly
814 tricky, is checking if an asterisk precedes the final slash since
815 they could be separated by escaped newlines. If the preprocessor is
816 invoked with the output comments option, we don't bother removing
817 escaped newlines and replacing trigraphs for output.
819 Finally, numbers can begin with a period, which is pushed initially
820 as a CPP_DOT token in its own right. The digit handler checks if the
821 previous token was a CPP_DOT not separated by whitespace, and if so
822 pops it off the stack and pushes a period into the number's buffer
823 before calling the number parser.
827 static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
828 U":>", U"<%", U"%>"};
830 /* Call when a trigraph is encountered. It warns if necessary, and
831 returns true if the trigraph should be honoured. END is the third
832 character of a trigraph in the input stream. */
833 static int
834 trigraph_ok (pfile, end)
835 cpp_reader *pfile;
836 const unsigned char *end;
838 int accept = CPP_OPTION (pfile, trigraphs);
840 if (CPP_OPTION (pfile, warn_trigraphs))
842 unsigned int col = end - 1 - pfile->buffer->line_base;
843 if (accept)
844 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
845 "trigraph ??%c converted to %c",
846 (int) *end, (int) trigraph_map[*end]);
847 else
848 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
849 "trigraph ??%c ignored", (int) *end);
851 return accept;
854 /* Scan a string for trigraphs, warning or replacing them inline as
855 appropriate. When parsing a string, we must call this routine
856 before processing a newline character (if trigraphs are enabled),
857 since the newline might be escaped by a preceding backslash
858 trigraph sequence. Returns a pointer to the end of the name after
859 replacement. */
861 static unsigned char *
862 trigraph_replace (pfile, src, limit)
863 cpp_reader *pfile;
864 unsigned char *src;
865 unsigned char *limit;
867 unsigned char *dest;
869 /* Starting with src[1], find two consecutive '?'. The case of no
870 trigraphs is streamlined. */
872 for (src++; src + 1 < limit; src += 2)
874 if (src[0] != '?')
875 continue;
877 /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s. */
878 if (src[-1] == '?')
879 src--;
880 else if (src + 2 == limit || src[1] != '?')
881 continue;
883 /* Check if it really is a trigraph. */
884 if (trigraph_map[src[2]] == 0)
885 continue;
887 dest = src;
888 goto trigraph_found;
890 return limit;
892 /* Now we have a trigraph, we need to scan the remaining buffer, and
893 copy-shifting its contents left if replacement is enabled. */
894 for (; src + 2 < limit; dest++, src++)
895 if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
897 trigraph_found:
898 src += 2;
899 if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
900 *dest = trigraph_map[*src];
903 /* Copy remaining (at most 2) characters. */
904 while (src < limit)
905 *dest++ = *src++;
906 return dest;
909 /* If CUR is a backslash or the end of a trigraphed backslash, return
910 a pointer to its beginning, otherwise NULL. We don't read beyond
911 the buffer start, because there is the start of the comment in the
912 buffer. */
913 static const unsigned char *
914 backslash_start (pfile, cur)
915 cpp_reader *pfile;
916 const unsigned char *cur;
918 if (cur[0] == '\\')
919 return cur;
920 if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
921 && trigraph_ok (pfile, cur))
922 return cur - 2;
923 return 0;
926 /* Skip a C-style block comment. This is probably the trickiest
927 handler. We find the end of the comment by seeing if an asterisk
928 is before every '/' we encounter. The nasty complication is that a
929 previous asterisk may be separated by one or more escaped newlines.
930 Returns non-zero if comment terminated by EOF, zero otherwise. */
931 static int
932 skip_block_comment (pfile)
933 cpp_reader *pfile;
935 cpp_buffer *buffer = pfile->buffer;
936 const unsigned char *char_after_star = 0;
937 const unsigned char *cur = buffer->cur;
939 for (; cur < buffer->rlimit; )
941 unsigned char c = *cur++;
943 /* People like decorating comments with '*', so check for
944 '/' instead for efficiency. */
945 if (c == '/')
947 /* Don't view / then * then / as finishing the comment. */
948 if ((cur[-2] == '*' && cur - 1 > buffer->cur)
949 || cur - 1 == char_after_star)
951 buffer->cur = cur;
952 return 0;
955 /* Warn about potential nested comments, but not when
956 the final character inside the comment is a '/'.
957 Don't bother to get it right across escaped newlines. */
958 if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
959 && cur[0] == '*' && cur[1] != '/')
961 buffer->cur = cur;
962 cpp_warning (pfile, "'/*' within comment");
965 else if (is_vspace (c))
967 const unsigned char* bslash = backslash_start (pfile, cur - 2);
969 handle_newline (cur, buffer->rlimit, c);
970 /* Work correctly if there is an asterisk before an
971 arbirtrarily long sequence of escaped newlines. */
972 if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
973 char_after_star = cur;
974 else
975 char_after_star = 0;
977 else if (c == '\t')
978 adjust_column (pfile, cur - 1);
981 buffer->cur = cur;
982 return 1;
985 /* Skip a C++ line comment. Handles escaped newlines. Returns
986 non-zero if a multiline comment. */
987 static int
988 skip_line_comment (pfile)
989 cpp_reader *pfile;
991 cpp_buffer *buffer = pfile->buffer;
992 register const unsigned char *cur = buffer->cur;
993 int multiline = 0;
995 for (; cur < buffer->rlimit; )
997 unsigned char c = *cur++;
999 if (is_vspace (c))
1001 /* Check for a (trigaph?) backslash escaping the newline. */
1002 if (!backslash_start (pfile, cur - 2))
1003 goto out;
1004 multiline = 1;
1005 handle_newline (cur, buffer->rlimit, c);
1008 cur++;
1010 out:
1011 buffer->cur = cur - 1; /* Leave newline for caller. */
1012 return multiline;
1015 /* TAB points to a \t character. Update col_adjust so we track the
1016 column correctly. */
1017 static void
1018 adjust_column (pfile, tab)
1019 cpp_reader *pfile;
1020 const U_CHAR *tab;
1022 /* Zero-based column. */
1023 unsigned int col = CPP_BUF_COLUMN (pfile->buffer, tab);
1025 /* Round it up to multiple of the tabstop, but subtract 1 since the
1026 tab itself occupies a character position. */
1027 pfile->col_adjust += (CPP_OPTION (pfile, tabstop)
1028 - col % CPP_OPTION (pfile, tabstop)) - 1;
1031 /* Skips whitespace, stopping at next non-whitespace character.
1032 Adjusts pfile->col_adjust to account for tabs. This enables tokens
1033 to be assigned the correct column. */
1034 static void
1035 skip_whitespace (pfile, in_directive)
1036 cpp_reader *pfile;
1037 int in_directive;
1039 cpp_buffer *buffer = pfile->buffer;
1040 unsigned short warned = 0;
1042 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
1043 while (buffer->cur < buffer->rlimit)
1045 unsigned char c = *buffer->cur;
1047 if (!is_nvspace (c))
1048 break;
1050 buffer->cur++;
1051 /* Horizontal space always OK. */
1052 if (c == ' ')
1053 continue;
1054 else if (c == '\t')
1055 adjust_column (pfile, buffer->cur - 1);
1056 /* Must be \f \v or \0. */
1057 else if (c == '\0')
1059 if (!warned)
1060 cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
1061 CPP_BUF_COL (buffer),
1062 "embedded null character ignored");
1063 warned = 1;
1065 else if (in_directive && CPP_PEDANTIC (pfile))
1066 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
1067 CPP_BUF_COL (buffer),
1068 "%s in preprocessing directive",
1069 c == '\f' ? "form feed" : "vertical tab");
1073 /* Parse (append) an identifier. */
1074 static const U_CHAR *
1075 parse_name (pfile, tok, cur, rlimit)
1076 cpp_reader *pfile;
1077 cpp_token *tok;
1078 const U_CHAR *cur, *rlimit;
1080 const U_CHAR *name = cur;
1081 unsigned int len;
1083 while (cur < rlimit)
1085 if (! is_idchar (*cur))
1086 break;
1087 /* $ is not a legal identifier character in the standard, but is
1088 commonly accepted as an extension. Don't warn about it in
1089 skipped conditional blocks. */
1090 if (*cur == '$' && CPP_PEDANTIC (pfile) && ! pfile->skipping)
1092 CPP_BUFFER (pfile)->cur = cur;
1093 cpp_pedwarn (pfile, "'$' character in identifier");
1095 cur++;
1097 len = cur - name;
1099 if (tok->val.node)
1101 unsigned int oldlen = tok->val.node->length;
1102 U_CHAR *newname = alloca (oldlen + len);
1103 memcpy (newname, tok->val.node->name, oldlen);
1104 memcpy (newname + oldlen, name, len);
1105 len += oldlen;
1106 name = newname;
1109 tok->val.node = cpp_lookup (pfile, name, len);
1110 return cur;
1113 /* Parse (append) a number. */
1114 static void
1115 parse_number (pfile, list, name)
1116 cpp_reader *pfile;
1117 cpp_toklist *list;
1118 cpp_string *name;
1120 const unsigned char *name_limit;
1121 unsigned char *namebuf;
1122 cpp_buffer *buffer = pfile->buffer;
1123 register const unsigned char *cur = buffer->cur;
1125 expanded:
1126 name_limit = list->namebuf + list->name_cap;
1127 namebuf = list->namebuf + list->name_used;
1129 for (; cur < buffer->rlimit && namebuf < name_limit; )
1131 unsigned char c = *namebuf = *cur; /* Copy a single char. */
1133 /* Perhaps we should accept '$' here if we accept it for
1134 identifiers. We know namebuf[-1] is safe, because for c to
1135 be a sign we must have pushed at least one character. */
1136 if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
1137 goto out;
1139 namebuf++;
1140 cur++;
1143 /* Run out of name space? */
1144 if (cur < buffer->rlimit)
1146 list->name_used = namebuf - list->namebuf;
1147 auto_expand_name_space (list);
1148 goto expanded;
1151 out:
1152 buffer->cur = cur;
1153 name->len = namebuf - name->text;
1154 list->name_used = namebuf - list->namebuf;
1157 /* Places a string terminated by an unescaped TERMINATOR into a
1158 cpp_string, which should be expandable and thus at the top of the
1159 list's stack. Handles embedded trigraphs, if necessary, and
1160 escaped newlines.
1162 Can be used for character constants (terminator = '\''), string
1163 constants ('"') and angled headers ('>'). Multi-line strings are
1164 allowed, except for within directives. */
1166 static void
1167 parse_string (pfile, list, token, terminator)
1168 cpp_reader *pfile;
1169 cpp_toklist *list;
1170 cpp_token *token;
1171 unsigned int terminator;
1173 cpp_buffer *buffer = pfile->buffer;
1174 cpp_string *name = &token->val.str;
1175 register const unsigned char *cur = buffer->cur;
1176 const unsigned char *name_limit;
1177 unsigned char *namebuf;
1178 unsigned int null_count = 0;
1179 unsigned int trigraphed = list->name_used;
1181 expanded:
1182 name_limit = list->namebuf + list->name_cap;
1183 namebuf = list->namebuf + list->name_used;
1185 for (; cur < buffer->rlimit && namebuf < name_limit; )
1187 unsigned int c = *namebuf++ = *cur++; /* Copy a single char. */
1189 if (c == '\0')
1190 null_count++;
1191 else if (c == terminator || is_vspace (c))
1193 /* Needed for trigraph_replace and multiline string warning. */
1194 buffer->cur = cur;
1196 /* Scan for trigraphs before checking if backslash-escaped. */
1197 if ((CPP_OPTION (pfile, trigraphs)
1198 || CPP_OPTION (pfile, warn_trigraphs))
1199 && namebuf - (list->namebuf + trigraphed) >= 3)
1201 namebuf = trigraph_replace (pfile, list->namebuf + trigraphed,
1202 namebuf);
1203 /* The test above guarantees trigraphed will be positive. */
1204 trigraphed = namebuf - list->namebuf - 2;
1207 namebuf--; /* Drop the newline / terminator from the name. */
1208 if (is_vspace (c))
1210 /* Drop a backslash newline, and continue. */
1211 if (namebuf[-1] == '\\')
1213 handle_newline (cur, buffer->rlimit, c);
1214 namebuf--;
1215 continue;
1218 cur--;
1220 /* In assembly language, silently terminate strings of
1221 either variety at end of line. This is a kludge
1222 around not knowing where comments are. */
1223 if (CPP_OPTION (pfile, lang_asm))
1224 goto out;
1226 /* Character constants and header names may not extend
1227 over multiple lines. In Standard C, neither may
1228 strings. We accept multiline strings as an
1229 extension. (Even in directives - otherwise, glibc's
1230 longlong.h breaks.) */
1231 if (terminator != '"')
1232 goto unterminated;
1234 cur++; /* Move forwards again. */
1236 if (pfile->multiline_string_line == 0)
1238 pfile->multiline_string_line = token->line;
1239 pfile->multiline_string_column = token->col;
1240 if (CPP_PEDANTIC (pfile))
1241 cpp_pedwarn (pfile, "multi-line string constant");
1244 *namebuf++ = '\n';
1245 handle_newline (cur, buffer->rlimit, c);
1247 else
1249 unsigned char *temp;
1251 /* An odd number of consecutive backslashes represents
1252 an escaped terminator. */
1253 temp = namebuf - 1;
1254 while (temp >= name->text && *temp == '\\')
1255 temp--;
1257 if ((namebuf - temp) & 1)
1258 goto out;
1259 namebuf++;
1264 /* Run out of name space? */
1265 if (cur < buffer->rlimit)
1267 list->name_used = namebuf - list->namebuf;
1268 auto_expand_name_space (list);
1269 goto expanded;
1272 /* We may not have trigraph-replaced the input for this code path,
1273 but as the input is in error by being unterminated we don't
1274 bother. Prevent warnings about no newlines at EOF. */
1275 if (is_vspace (cur[-1]))
1276 cur--;
1278 unterminated:
1279 cpp_error (pfile, "missing terminating %c character", (int) terminator);
1281 if (terminator == '\"' && pfile->multiline_string_line != list->line
1282 && pfile->multiline_string_line != 0)
1284 cpp_error_with_line (pfile, pfile->multiline_string_line,
1285 pfile->multiline_string_column,
1286 "possible start of unterminated string literal");
1287 pfile->multiline_string_line = 0;
1290 out:
1291 buffer->cur = cur;
1292 name->len = namebuf - name->text;
1293 list->name_used = namebuf - list->namebuf;
1295 if (null_count > 0)
1296 cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
1297 : "null character preserved"));
1300 /* The character TYPE helps us distinguish comment types: '*' = C
1301 style, '/' = C++ style. For code simplicity, the stored comment
1302 includes the comment start and any terminator. */
1304 #define COMMENT_START_LEN 2
1305 static void
1306 save_comment (list, token, from, len, type)
1307 cpp_toklist *list;
1308 cpp_token *token;
1309 const unsigned char *from;
1310 unsigned int len;
1311 unsigned int type;
1313 unsigned char *buffer;
1315 len += COMMENT_START_LEN;
1317 if (list->name_used + len > list->name_cap)
1318 _cpp_expand_name_space (list, len);
1320 INIT_TOKEN_STR (list, token);
1321 token->type = CPP_COMMENT;
1322 token->val.str.len = len;
1324 buffer = list->namebuf + list->name_used;
1325 list->name_used += len;
1327 /* Copy the comment. */
1328 if (type == '*')
1330 *buffer++ = '/';
1331 *buffer++ = '*';
1333 else
1335 *buffer++ = type;
1336 *buffer++ = type;
1338 memcpy (buffer, from, len - COMMENT_START_LEN);
1342 * The tokenizer's main loop. Returns a token list, representing a
1343 * logical line in the input file. On EOF after some tokens have
1344 * been processed, we return immediately. Then in next call, or if
1345 * EOF occurred at the beginning of a logical line, a single CPP_EOF
1346 * token is placed in the list.
1348 * Implementation relies almost entirely on lookback, rather than
1349 * looking forwards. This means that tokenization requires just
1350 * a single pass of the file, even in the presence of trigraphs and
1351 * escaped newlines, providing significant performance benefits.
1352 * Trigraph overhead is negligible if they are disabled, and low
1353 * even when enabled.
1356 #define KNOWN_DIRECTIVE() (list->directive != 0)
1357 #define MIGHT_BE_DIRECTIVE() \
1358 (cur_token == &list->tokens[first_token + 1] && cur_token[-1].type == CPP_HASH)
1360 static void
1361 lex_line (pfile, list)
1362 cpp_reader *pfile;
1363 cpp_toklist *list;
1365 cpp_token *cur_token, *token_limit, *first;
1366 cpp_buffer *buffer = pfile->buffer;
1367 const unsigned char *cur = buffer->cur;
1368 unsigned char flags = 0;
1369 unsigned int first_token = list->tokens_used;
1371 if (!(list->flags & LIST_OFFSET))
1372 (abort) ();
1374 list->file = buffer->nominal_fname;
1375 list->line = CPP_BUF_LINE (buffer);
1376 pfile->col_adjust = 0;
1377 pfile->in_lex_line = 1;
1378 if (cur == buffer->buf)
1379 list->flags |= BEG_OF_FILE;
1381 expanded:
1382 token_limit = list->tokens + list->tokens_cap;
1383 cur_token = list->tokens + list->tokens_used;
1385 for (; cur < buffer->rlimit && cur_token < token_limit;)
1387 unsigned char c;
1389 /* Optimize non-vertical whitespace skipping; most tokens are
1390 probably separated by whitespace. (' ' '\t' '\v' '\f' '\0'). */
1391 c = *cur;
1392 if (is_nvspace (c))
1394 buffer->cur = cur;
1395 skip_whitespace (pfile, (list->tokens[first_token].type == CPP_HASH
1396 && cur_token > &list->tokens[first_token]));
1397 cur = buffer->cur;
1399 flags = PREV_WHITE;
1400 if (cur == buffer->rlimit)
1401 break;
1402 c = *cur;
1404 cur++;
1406 /* Initialize current token. CPP_EOF will not be fixed up by
1407 expand_name_space. */
1408 list->tokens_used = cur_token - list->tokens + 1;
1409 cur_token->type = CPP_EOF;
1410 cur_token->col = CPP_BUF_COLUMN (buffer, cur);
1411 cur_token->line = CPP_BUF_LINE (buffer);
1412 cur_token->flags = flags;
1413 flags = 0;
1415 switch (c)
1417 case '0': case '1': case '2': case '3': case '4':
1418 case '5': case '6': case '7': case '8': case '9':
1420 int prev_dot;
1422 cur--; /* Backup character. */
1423 prev_dot = PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ();
1424 if (prev_dot)
1425 cur_token--;
1426 INIT_TOKEN_STR (list, cur_token);
1427 /* Prepend an immediately previous CPP_DOT token. */
1428 if (prev_dot)
1430 if (list->name_cap == list->name_used)
1431 auto_expand_name_space (list);
1433 cur_token->val.str.len = 1;
1434 list->namebuf[list->name_used++] = '.';
1437 continue_number:
1438 cur_token->type = CPP_NUMBER; /* Before parse_number. */
1439 buffer->cur = cur;
1440 parse_number (pfile, list, &cur_token->val.str);
1441 cur = buffer->cur;
1443 /* Check for # 123 form of #line. */
1444 if (MIGHT_BE_DIRECTIVE ())
1445 list->directive = _cpp_check_linemarker (pfile, cur_token,
1446 !(cur_token[-1].flags
1447 & PREV_WHITE));
1448 cur_token++;
1449 break;
1451 letter:
1452 case '_':
1453 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1454 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1455 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1456 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1457 case 'y': case 'z':
1458 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1459 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1460 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1461 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1462 case 'Y': case 'Z':
1463 cur--; /* Backup character. */
1464 cur_token->val.node = 0;
1465 cur_token->type = CPP_NAME; /* Identifier, macro etc. */
1467 continue_name:
1468 cur = parse_name (pfile, cur_token, cur, buffer->rlimit);
1470 if (MIGHT_BE_DIRECTIVE ())
1471 list->directive = _cpp_check_directive (pfile, cur_token,
1472 !(list->tokens[0].flags
1473 & PREV_WHITE));
1474 cur_token++;
1475 break;
1477 case '\'':
1478 case '\"':
1479 cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
1480 /* Do we have a wide string? */
1481 if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
1482 && cur_token[-1].val.node == pfile->spec_nodes->n_L)
1483 BACKUP_TOKEN (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
1485 do_parse_string:
1486 /* Here c is one of ' " or >. */
1487 INIT_TOKEN_STR (list, cur_token);
1488 buffer->cur = cur;
1489 parse_string (pfile, list, cur_token, c);
1490 cur = buffer->cur;
1491 cur_token++;
1492 break;
1494 case '/':
1495 cur_token->type = CPP_DIV;
1496 if (IMMED_TOKEN ())
1498 if (PREV_TOKEN_TYPE == CPP_DIV)
1500 /* We silently allow C++ comments in system headers,
1501 irrespective of conformance mode, because lots of
1502 broken systems do that and trying to clean it up
1503 in fixincludes is a nightmare. */
1504 if (CPP_IN_SYSTEM_HEADER (pfile))
1505 goto do_line_comment;
1506 else if (CPP_OPTION (pfile, cplusplus_comments))
1508 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1509 && ! buffer->warned_cplusplus_comments)
1511 buffer->cur = cur;
1512 cpp_pedwarn (pfile,
1513 "C++ style comments are not allowed in ISO C89");
1514 cpp_pedwarn (pfile,
1515 "(this will be reported only once per input file)");
1516 buffer->warned_cplusplus_comments = 1;
1518 do_line_comment:
1519 buffer->cur = cur;
1520 #if 0 /* Leave until new lexer in place. */
1521 if (cur[-2] != c)
1522 cpp_warning (pfile,
1523 "comment start split across lines");
1524 #endif
1525 if (skip_line_comment (pfile))
1526 cpp_warning (pfile, "multi-line comment");
1528 /* Back-up to first '-' or '/'. */
1529 cur_token--;
1530 if (!CPP_OPTION (pfile, discard_comments)
1531 && (!KNOWN_DIRECTIVE()
1532 || (list->directive->flags & COMMENTS)))
1533 save_comment (list, cur_token++, cur,
1534 buffer->cur - cur, c);
1535 else
1536 flags = PREV_WHITE;
1538 cur = buffer->cur;
1539 break;
1543 cur_token++;
1544 break;
1546 case '*':
1547 cur_token->type = CPP_MULT;
1548 if (IMMED_TOKEN ())
1550 if (PREV_TOKEN_TYPE == CPP_DIV)
1552 buffer->cur = cur;
1553 #if 0 /* Leave until new lexer in place. */
1554 if (cur[-2] != '/')
1555 cpp_warning (pfile,
1556 "comment start '/*' split across lines");
1557 #endif
1558 if (skip_block_comment (pfile))
1559 cpp_error_with_line (pfile, list->line, cur_token[-1].col,
1560 "unterminated comment");
1561 #if 0 /* Leave until new lexer in place. */
1562 else if (buffer->cur[-2] != '*')
1563 cpp_warning (pfile,
1564 "comment end '*/' split across lines");
1565 #endif
1566 /* Back up to opening '/'. */
1567 cur_token--;
1568 if (!CPP_OPTION (pfile, discard_comments)
1569 && (!KNOWN_DIRECTIVE()
1570 || (list->directive->flags & COMMENTS)))
1571 save_comment (list, cur_token++, cur,
1572 buffer->cur - cur, c);
1573 else
1574 flags = PREV_WHITE;
1576 cur = buffer->cur;
1577 break;
1579 else if (CPP_OPTION (pfile, cplusplus))
1581 /* In C++, there are .* and ->* operators. */
1582 if (PREV_TOKEN_TYPE == CPP_DEREF)
1583 BACKUP_TOKEN (CPP_DEREF_STAR);
1584 else if (PREV_TOKEN_TYPE == CPP_DOT)
1585 BACKUP_TOKEN (CPP_DOT_STAR);
1588 cur_token++;
1589 break;
1591 case '\n':
1592 case '\r':
1593 handle_newline (cur, buffer->rlimit, c);
1594 if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
1596 if (IMMED_TOKEN ())
1598 /* Remove the escaped newline. Then continue to process
1599 any interrupted name or number. */
1600 cur_token--;
1601 /* Backslash-newline may not be immediately followed by
1602 EOF (C99 5.1.1.2). */
1603 if (cur >= buffer->rlimit)
1605 cpp_pedwarn (pfile, "backslash-newline at end of file");
1606 break;
1608 if (IMMED_TOKEN ())
1610 cur_token--;
1611 if (cur_token->type == CPP_NAME)
1612 goto continue_name;
1613 else if (cur_token->type == CPP_NUMBER)
1614 goto continue_number;
1615 cur_token++;
1617 /* Remember whitespace setting. */
1618 flags = cur_token->flags;
1619 break;
1621 else
1623 buffer->cur = cur;
1624 cpp_warning (pfile,
1625 "backslash and newline separated by space");
1628 else if (MIGHT_BE_DIRECTIVE ())
1630 /* "Null directive." C99 6.10.7: A preprocessing
1631 directive of the form # <new-line> has no effect.
1633 But it is still a directive, and therefore disappears
1634 from the output. */
1635 cur_token--;
1636 if (cur_token->flags & PREV_WHITE
1637 && CPP_WTRADITIONAL (pfile))
1638 cpp_warning (pfile, "K+R C ignores #\\n with the # indented");
1641 /* Skip vertical space until we have at least one token to
1642 return. */
1643 if (cur_token != &list->tokens[first_token])
1644 goto out;
1645 list->line = CPP_BUF_LINE (buffer);
1646 break;
1648 case '-':
1649 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
1650 REVISE_TOKEN (CPP_MINUS_MINUS);
1651 else
1652 PUSH_TOKEN (CPP_MINUS);
1653 break;
1655 make_hash:
1656 case '#':
1657 /* The digraph flag checking ensures that ## and %:%:
1658 are interpreted as CPP_PASTE, but #%: and %:# are not. */
1659 if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
1660 && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
1661 REVISE_TOKEN (CPP_PASTE);
1662 else
1663 PUSH_TOKEN (CPP_HASH);
1664 break;
1666 case ':':
1667 cur_token->type = CPP_COLON;
1668 if (IMMED_TOKEN ())
1670 if (PREV_TOKEN_TYPE == CPP_COLON
1671 && CPP_OPTION (pfile, cplusplus))
1672 BACKUP_TOKEN (CPP_SCOPE);
1673 else if (CPP_OPTION (pfile, digraphs))
1675 /* Digraph: "<:" is a '[' */
1676 if (PREV_TOKEN_TYPE == CPP_LESS)
1677 BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
1678 /* Digraph: "%:" is a '#' */
1679 else if (PREV_TOKEN_TYPE == CPP_MOD)
1681 (--cur_token)->flags |= DIGRAPH;
1682 goto make_hash;
1686 cur_token++;
1687 break;
1689 case '&':
1690 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
1691 REVISE_TOKEN (CPP_AND_AND);
1692 else
1693 PUSH_TOKEN (CPP_AND);
1694 break;
1696 make_or:
1697 case '|':
1698 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
1699 REVISE_TOKEN (CPP_OR_OR);
1700 else
1701 PUSH_TOKEN (CPP_OR);
1702 break;
1704 case '+':
1705 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
1706 REVISE_TOKEN (CPP_PLUS_PLUS);
1707 else
1708 PUSH_TOKEN (CPP_PLUS);
1709 break;
1711 case '=':
1712 /* This relies on equidistance of "?=" and "?" tokens. */
1713 if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
1714 REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
1715 else
1716 PUSH_TOKEN (CPP_EQ);
1717 break;
1719 case '>':
1720 cur_token->type = CPP_GREATER;
1721 if (IMMED_TOKEN ())
1723 if (PREV_TOKEN_TYPE == CPP_GREATER)
1724 BACKUP_TOKEN (CPP_RSHIFT);
1725 else if (PREV_TOKEN_TYPE == CPP_MINUS)
1726 BACKUP_TOKEN (CPP_DEREF);
1727 else if (CPP_OPTION (pfile, digraphs))
1729 /* Digraph: ":>" is a ']' */
1730 if (PREV_TOKEN_TYPE == CPP_COLON)
1731 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
1732 /* Digraph: "%>" is a '}' */
1733 else if (PREV_TOKEN_TYPE == CPP_MOD)
1734 BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
1737 cur_token++;
1738 break;
1740 case '<':
1741 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
1743 REVISE_TOKEN (CPP_LSHIFT);
1744 break;
1746 /* Is this the beginning of a header name? */
1747 if (KNOWN_DIRECTIVE () && (list->directive->flags & INCL))
1749 c = '>'; /* Terminator. */
1750 cur_token->type = CPP_HEADER_NAME;
1751 goto do_parse_string;
1753 PUSH_TOKEN (CPP_LESS);
1754 break;
1756 case '%':
1757 /* Digraph: "<%" is a '{' */
1758 cur_token->type = CPP_MOD;
1759 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS
1760 && CPP_OPTION (pfile, digraphs))
1761 BACKUP_DIGRAPH (CPP_OPEN_BRACE);
1762 cur_token++;
1763 break;
1765 case '?':
1766 if (cur + 1 < buffer->rlimit && *cur == '?'
1767 && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
1769 /* Handle trigraph. */
1770 cur++;
1771 switch (*cur++)
1773 case '(': goto make_open_square;
1774 case ')': goto make_close_square;
1775 case '<': goto make_open_brace;
1776 case '>': goto make_close_brace;
1777 case '=': goto make_hash;
1778 case '!': goto make_or;
1779 case '-': goto make_complement;
1780 case '/': goto make_backslash;
1781 case '\'': goto make_xor;
1784 if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
1786 /* GNU C++ defines <? and >? operators. */
1787 if (PREV_TOKEN_TYPE == CPP_LESS)
1789 REVISE_TOKEN (CPP_MIN);
1790 break;
1792 else if (PREV_TOKEN_TYPE == CPP_GREATER)
1794 REVISE_TOKEN (CPP_MAX);
1795 break;
1798 PUSH_TOKEN (CPP_QUERY);
1799 break;
1801 case '.':
1802 if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
1803 && IMMED_TOKEN ()
1804 && !(cur_token[-1].flags & PREV_WHITE))
1806 cur_token -= 2;
1807 PUSH_TOKEN (CPP_ELLIPSIS);
1809 else
1810 PUSH_TOKEN (CPP_DOT);
1811 break;
1813 make_complement:
1814 case '~': PUSH_TOKEN (CPP_COMPL); break;
1815 make_xor:
1816 case '^': PUSH_TOKEN (CPP_XOR); break;
1817 make_open_brace:
1818 case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
1819 make_close_brace:
1820 case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
1821 make_open_square:
1822 case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
1823 make_close_square:
1824 case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
1825 make_backslash:
1826 case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
1827 case '!': PUSH_TOKEN (CPP_NOT); break;
1828 case ',': PUSH_TOKEN (CPP_COMMA); break;
1829 case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
1830 case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
1831 case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
1833 case '$':
1834 if (CPP_OPTION (pfile, dollars_in_ident))
1835 goto letter;
1836 /* Fall through */
1837 default:
1838 cur_token->val.aux = c;
1839 PUSH_TOKEN (CPP_OTHER);
1840 break;
1844 /* Run out of token space? */
1845 if (cur_token == token_limit)
1847 list->tokens_used = cur_token - list->tokens;
1848 _cpp_expand_token_space (list, 256);
1849 goto expanded;
1852 cur_token->flags = flags;
1853 if (cur_token == &list->tokens[first_token] && pfile->done_initializing)
1855 if (cur > buffer->buf && !is_vspace (cur[-1]))
1856 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
1857 CPP_BUF_COLUMN (buffer, cur),
1858 "no newline at end of file");
1859 cur_token++->type = CPP_EOF;
1862 out:
1863 /* All tokens are allocated, so the memory location is fixed. */
1864 first = &list->tokens[first_token];
1866 /* Don't complain about the null directive, nor directives in
1867 assembly source: we don't know where the comments are, and # may
1868 introduce assembler pseudo-ops. Don't complain about invalid
1869 directives in skipped conditional groups (6.10 p4). */
1870 if (first->type == CPP_HASH && list->directive == 0 && !pfile->skipping
1871 && cur_token > first + 1 && !CPP_OPTION (pfile, lang_asm))
1873 if (first[1].type == CPP_NAME)
1874 cpp_error (pfile, "invalid preprocessing directive #%.*s",
1875 (int) first[1].val.node->length, first[1].val.node->name);
1876 else
1877 cpp_error (pfile, "invalid preprocessing directive");
1880 /* Put EOF at end of known directives. This covers "directives do
1881 not extend beyond the end of the line (description 6.10 part 2)". */
1882 if (KNOWN_DIRECTIVE () || !pfile->done_initializing)
1884 pfile->first_directive_token = first;
1885 cur_token++->type = CPP_EOF;
1888 /* Directives, known or not, always start a new line. */
1889 if (first_token == 0 || list->tokens[first_token].type == CPP_HASH)
1890 first->flags |= BOL;
1891 else
1892 /* 6.10.3.10: Within the sequence of preprocessing tokens making
1893 up the invocation of a function-like macro, new line is
1894 considered a normal white-space character. */
1895 first->flags |= PREV_WHITE;
1897 buffer->cur = cur;
1898 list->tokens_used = cur_token - list->tokens;
1899 pfile->in_lex_line = 0;
1902 /* Write the spelling of a token TOKEN, with any appropriate
1903 whitespace before it, to the token_buffer. PREV is the previous
1904 token, which is used to determine if we need to shove in an extra
1905 space in order to avoid accidental token paste. */
1906 static void
1907 output_token (pfile, token, prev)
1908 cpp_reader *pfile;
1909 const cpp_token *token, *prev;
1911 int dummy;
1913 if (token->col && (token->flags & BOL))
1915 /* Supply enough whitespace to put this token in its original
1916 column. Don't bother trying to reconstruct tabs; we can't
1917 get it right in general, and nothing ought to care. (Yes,
1918 some things do care; the fault lies with them.) */
1919 unsigned char *buffer;
1920 unsigned int spaces = token->col - 1;
1922 CPP_RESERVE (pfile, token->col);
1923 buffer = pfile->limit;
1925 while (spaces--)
1926 *buffer++ = ' ';
1927 pfile->limit = buffer;
1929 else if (token->flags & PREV_WHITE)
1930 CPP_PUTC (pfile, ' ');
1931 else if (prev)
1933 /* Check for and prevent accidental token pasting. */
1934 if (can_paste (pfile, prev, token, &dummy) != CPP_EOF)
1935 CPP_PUTC (pfile, ' ');
1936 /* can_paste doesn't catch all the accidental pastes.
1937 Consider a + ++b - if there is not a space between the + and ++, it
1938 will be misparsed as a++ + b. */
1939 else if ((prev->type == CPP_PLUS && token->type == CPP_PLUS_PLUS)
1940 || (prev->type == CPP_MINUS && token->type == CPP_MINUS_MINUS))
1941 CPP_PUTC (pfile, ' ');
1944 CPP_RESERVE (pfile, TOKEN_LEN (token));
1945 pfile->limit = spell_token (pfile, token, pfile->limit);
1948 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1949 already contain the enough space to hold the token's spelling.
1950 Returns a pointer to the character after the last character
1951 written. */
1953 static unsigned char *
1954 spell_token (pfile, token, buffer)
1955 cpp_reader *pfile; /* Would be nice to be rid of this... */
1956 const cpp_token *token;
1957 unsigned char *buffer;
1959 switch (token_spellings[token->type].type)
1961 case SPELL_OPERATOR:
1963 const unsigned char *spelling;
1964 unsigned char c;
1966 if (token->flags & DIGRAPH)
1967 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1968 else
1969 spelling = token_spellings[token->type].spelling;
1971 while ((c = *spelling++) != '\0')
1972 *buffer++ = c;
1974 break;
1976 case SPELL_IDENT:
1977 memcpy (buffer, token->val.node->name, token->val.node->length);
1978 buffer += token->val.node->length;
1979 break;
1981 case SPELL_STRING:
1983 if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
1984 *buffer++ = 'L';
1986 if (token->type == CPP_STRING || token->type == CPP_WSTRING)
1987 *buffer++ = '"';
1988 if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
1989 *buffer++ = '\'';
1991 memcpy (buffer, token->val.str.text, token->val.str.len);
1992 buffer += token->val.str.len;
1994 if (token->type == CPP_STRING || token->type == CPP_WSTRING)
1995 *buffer++ = '"';
1996 if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
1997 *buffer++ = '\'';
1999 break;
2001 case SPELL_CHAR:
2002 *buffer++ = token->val.aux;
2003 break;
2005 case SPELL_NONE:
2006 cpp_ice (pfile, "Unspellable token %s", token_names[token->type]);
2007 break;
2010 return buffer;
2013 /* Return the spelling of a token known to be an operator.
2014 Does not distinguish digraphs from their counterparts. */
2015 const unsigned char *
2016 _cpp_spell_operator (type)
2017 enum cpp_ttype type;
2019 if (token_spellings[type].type == SPELL_OPERATOR)
2020 return token_spellings[type].spelling;
2021 else
2022 return token_names[type];
2026 /* Macro expansion algorithm.
2028 Macro expansion is implemented by a single-pass algorithm; there are
2029 no rescan passes involved. cpp_get_token expands just enough to be
2030 able to return a token to the caller, a consequence is that when it
2031 returns the preprocessor can be in a state of mid-expansion. The
2032 algorithm does not work by fully expanding a macro invocation into
2033 some kind of token list, and then returning them one by one.
2035 Our expansion state is recorded in a context stack. We start out with
2036 a single context on the stack, let's call it base context. This
2037 consists of the token list returned by lex_line that forms the next
2038 logical line in the source file.
2040 The current level in the context stack is stored in the cur_context
2041 member of the cpp_reader structure. The context it references keeps,
2042 amongst other things, a count of how many tokens form that context and
2043 our position within those tokens.
2045 Fundamentally, calling cpp_get_token will return the next token from
2046 the current context. If we're at the end of the current context, that
2047 context is popped from the stack first, unless it is the base context,
2048 in which case the next logical line is lexed from the source file.
2050 However, before returning the token, if it is a CPP_NAME token
2051 _cpp_get_token checks to see if it is a macro and if it is enabled.
2052 Each time it encounters a macro name, it calls push_macro_context.
2053 This function checks that the macro should be expanded (with
2054 is_macro_enabled), and if so pushes a new macro context on the stack
2055 which becomes the current context. It then loops back to read the
2056 first token of the macro context.
2058 A macro context basically consists of the token list representing the
2059 macro's replacement list, which was saved in the hash table by
2060 save_macro_expansion when its #define statement was parsed. If the
2061 macro is function-like, it also contains the tokens that form the
2062 arguments to the macro. I say more about macro arguments below, but
2063 for now just saying that each argument is a set of pointers to tokens
2064 is enough.
2066 When taking tokens from a macro context, we may get a CPP_MACRO_ARG
2067 token. This represents an argument passed to the macro, with the
2068 argument number stored in the token's AUX field. The argument should
2069 be substituted, this is achieved by pushing an "argument context". An
2070 argument context is just refers to the tokens forming the argument,
2071 which are obtained directly from the macro context. The STRINGIFY
2072 flag on a CPP_MACRO_ARG token indicates that the argument should be
2073 stringified.
2075 Here's a few simple rules the context stack obeys:-
2077 1) The lex_line token list is always context zero.
2079 2) Context 1, if it exists, must be a macro context.
2081 3) An argument context can only appear above a macro context.
2083 4) A macro context can appear above the base context, another macro
2084 context, or an argument context.
2086 5) These imply that the minimal level of an argument context is 2.
2088 The only tricky thing left is ensuring that macros are enabled and
2089 disabled correctly. The algorithm controls macro expansion by the
2090 level of the context a token is taken from in the context stack. If a
2091 token is taken from a level equal to no_expand_level (a member of
2092 struct cpp_reader), no expansion is performed.
2094 When popping a context off the stack, if no_expand_level equals the
2095 level of the popped context, it is reduced by one to match the new
2096 context level, so that expansion is still disabled. It does not
2097 increase if a context is pushed, though. It starts out life as
2098 UINT_MAX, which has the effect that initially macro expansion is
2099 enabled. I explain how this mechanism works below.
2101 The standard requires:-
2103 1) Arguments to be fully expanded before substitution.
2105 2) Stringified arguments to not be expanded, nor the tokens
2106 immediately surrounding a ## operator.
2108 3) Continual rescanning until there are no more macros left to
2109 replace.
2111 4) Once a macro has been expanded in stage 1) or 3), it cannot be
2112 expanded again during later rescans. This prevents infinite
2113 recursion.
2115 The first thing to observe is that stage 3) is mostly redundant.
2116 Since a macro is disabled once it has been expanded, how can a rescan
2117 find an unexpanded macro name? There are only two cases where this is
2118 possible:-
2120 a) If the macro name results from a token paste operation.
2122 b) If the macro in question is a function-like macro that hasn't
2123 already been expanded because previously there was not the required
2124 '(' token immediately following it. This is only possible when an
2125 argument is substituted, and after substitution the last token of
2126 the argument can bind with a parenthesis appearing in the tokens
2127 following the substitution. Note that if the '(' appears within the
2128 argument, the ')' must too, as expanding macro arguments cannot
2129 "suck in" tokens outside the argument.
2131 So we tackle this as follows. When parsing the macro invocation for
2132 arguments, we record the tokens forming each argument as a list of
2133 pointers to those tokens. We do not expand any tokens that are "raw",
2134 i.e. directly from the macro invocation, but other tokens that come
2135 from (nested) argument substitution are fully expanded.
2137 This is achieved by setting the no_expand_level to that of the macro
2138 invocation. A CPP_MACRO_ARG token never appears in the list of tokens
2139 forming an argument, because parse_args (indirectly) calls
2140 get_raw_token which automatically pushes argument contexts and traces
2141 into them. Since these contexts are at a higher level than the
2142 no_expand_level, they get fully macro expanded.
2144 "Raw" and non-raw tokens are separated in arguments by null pointers,
2145 with the policy that the initial state of an argument is raw. If the
2146 first token is not raw, it should be preceded by a null pointer. When
2147 tracing through the tokens of an argument context, each time
2148 get_raw_token encounters a null pointer, it toggles the flag
2149 CONTEXT_RAW.
2151 This flag, when set, indicates to is_macro_disabled that we are
2152 reading raw tokens which should be macro-expanded. Similarly, if
2153 clear, is_macro_disabled suppresses re-expansion.
2155 It's probably time for an example.
2157 #define hash #
2158 #define str(x) #x
2159 #define xstr(y) str(y hash)
2160 str(hash) // "hash"
2161 xstr(hash) // "# hash"
2163 In the invocation of str, parse_args turns off macro expansion and so
2164 parses the argument as <hash>. This is the only token (pointer)
2165 passed as the argument to str. Since <hash> is raw there is no need
2166 for an initial null pointer. stringify_arg is called from
2167 get_raw_token when tracing through the expansion of str, since the
2168 argument has the STRINGIFY flag set. stringify_arg turns off
2169 macro_expansion by setting the no_expand_level to that of the argument
2170 context. Thus it gets the token <hash> and stringifies it to "hash"
2171 correctly.
2173 Similary xstr is passed <hash>. However, when parse_args is parsing
2174 the invocation of str() in xstr's expansion, get_raw_token encounters
2175 a CPP_MACRO_ARG token for y. Transparently to parse_args, it pushes
2176 an argument context, and enters the tokens of the argument,
2177 i.e. <hash>. This is at a higher context level than parse_args
2178 disabled, and so is_macro_disabled permits expansion of it and a macro
2179 context is pushed on top of the argument context. This contains the
2180 <#> token, and the end result is that <hash> is macro expanded.
2181 However, after popping off the argument context, the <hash> of xstr's
2182 expansion does not get macro expanded because we're back at the
2183 no_expand_level. The end result is that the argument passed to str is
2184 <NULL> <#> <NULL> <hash>. Note the nulls - policy is we start off
2185 raw, <#> is not raw, but then <hash> is.
2190 /* Free the storage allocated for macro arguments. */
2191 static void
2192 free_macro_args (args)
2193 macro_args *args;
2195 if (args->tokens)
2196 free ((PTR) args->tokens);
2197 free (args->ends);
2198 free (args);
2201 /* Determines if a macro has been already used (and is therefore
2202 disabled). */
2203 static int
2204 is_macro_disabled (pfile, expansion, token)
2205 cpp_reader *pfile;
2206 const cpp_toklist *expansion;
2207 const cpp_token *token;
2209 cpp_context *context = CURRENT_CONTEXT (pfile);
2211 /* Don't expand anything if this file has already been preprocessed. */
2212 if (CPP_OPTION (pfile, preprocessed))
2213 return 1;
2215 /* Arguments on either side of ## are inserted in place without
2216 macro expansion (6.10.3.3.2). Conceptually, any macro expansion
2217 occurs during a later rescan pass. The effect is that we expand
2218 iff we would as part of the macro's expansion list, so we should
2219 drop to the macro's context. */
2220 if (IS_ARG_CONTEXT (context))
2222 if (token->flags & PASTED)
2223 context--;
2224 else if (!(context->flags & CONTEXT_RAW))
2225 return 1;
2226 else if (context->flags & (CONTEXT_PASTEL | CONTEXT_PASTER))
2227 context--;
2230 /* Have we already used this macro? */
2231 while (context->level > 0)
2233 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2234 return 1;
2235 /* Raw argument tokens are judged based on the token list they
2236 came from. */
2237 if (context->flags & CONTEXT_RAW)
2238 context = pfile->contexts + context->level;
2239 else
2240 context--;
2243 /* Function-like macros may be disabled if the '(' is not in the
2244 current context. We check this without disrupting the context
2245 stack. */
2246 if (expansion->paramc >= 0)
2248 const cpp_token *next;
2249 unsigned int prev_nme;
2251 context = CURRENT_CONTEXT (pfile);
2252 /* Drop down any contexts we're at the end of: the '(' may
2253 appear in lower macro expansions, or in the rest of the file. */
2254 while (context->posn == context->count && context > pfile->contexts)
2256 context--;
2257 /* If we matched, we are disabled, as we appear in the
2258 expansion of each macro we meet. */
2259 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2260 return 1;
2263 prev_nme = pfile->no_expand_level;
2264 pfile->no_expand_level = context - pfile->contexts;
2265 next = _cpp_get_token (pfile);
2266 restore_macro_expansion (pfile, prev_nme);
2267 if (next->type != CPP_OPEN_PAREN)
2269 _cpp_push_token (pfile, next);
2270 if (CPP_WTRADITIONAL (pfile))
2271 cpp_warning (pfile,
2272 "function macro %.*s must be used with arguments in traditional C",
2273 (int) token->val.node->length, token->val.node->name);
2274 return 1;
2278 return 0;
2281 /* Add a token to the set of tokens forming the arguments to the macro
2282 being parsed in parse_args. */
2283 static void
2284 save_token (args, token)
2285 macro_args *args;
2286 const cpp_token *token;
2288 if (args->used == args->capacity)
2290 args->capacity += args->capacity + 100;
2291 args->tokens = (const cpp_token **)
2292 xrealloc ((PTR) args->tokens,
2293 args->capacity * sizeof (const cpp_token *));
2295 args->tokens[args->used++] = token;
2298 /* Take and save raw tokens until we finish one argument. Empty
2299 arguments are saved as a single CPP_PLACEMARKER token. */
2300 static const cpp_token *
2301 parse_arg (pfile, var_args, paren_context, args, pcount)
2302 cpp_reader *pfile;
2303 int var_args;
2304 unsigned int paren_context;
2305 macro_args *args;
2306 unsigned int *pcount;
2308 const cpp_token *token;
2309 unsigned int paren = 0, count = 0;
2310 int raw, was_raw = 1;
2312 for (count = 0;; count++)
2314 token = _cpp_get_token (pfile);
2316 switch (token->type)
2318 default:
2319 break;
2321 case CPP_OPEN_PAREN:
2322 paren++;
2323 break;
2325 case CPP_CLOSE_PAREN:
2326 if (paren-- != 0)
2327 break;
2328 goto out;
2330 case CPP_COMMA:
2331 /* Commas are not terminators within parantheses or var_args. */
2332 if (paren || var_args)
2333 break;
2334 goto out;
2336 case CPP_EOF: /* Error reported by caller. */
2337 goto out;
2340 raw = pfile->cur_context <= paren_context;
2341 if (raw != was_raw)
2343 was_raw = raw;
2344 save_token (args, 0);
2345 count++;
2347 save_token (args, token);
2350 out:
2351 if (count == 0)
2353 /* Duplicate the placemarker. Then we can set its flags and
2354 position and safely be using more than one. */
2355 save_token (args, duplicate_token (pfile, &placemarker_token));
2356 count++;
2359 *pcount = count;
2360 return token;
2363 /* This macro returns true if the argument starting at offset O of arglist
2364 A is empty - that is, it's either a single PLACEMARKER token, or a null
2365 pointer followed by a PLACEMARKER. */
2367 #define empty_argument(A, O) \
2368 ((A)->tokens[O] ? (A)->tokens[O]->type == CPP_PLACEMARKER \
2369 : (A)->tokens[(O)+1]->type == CPP_PLACEMARKER)
2371 /* Parse the arguments making up a macro invocation. Nested arguments
2372 are automatically macro expanded, but immediate macros are not
2373 expanded; this enables e.g. operator # to work correctly. Returns
2374 non-zero on error. */
2375 static int
2376 parse_args (pfile, hp, args)
2377 cpp_reader *pfile;
2378 cpp_hashnode *hp;
2379 macro_args *args;
2381 const cpp_token *token;
2382 const cpp_toklist *macro;
2383 unsigned int total = 0;
2384 unsigned int paren_context = pfile->cur_context;
2385 int argc = 0;
2387 macro = hp->value.expansion;
2390 unsigned int count;
2392 token = parse_arg (pfile, (argc + 1 == macro->paramc
2393 && (macro->flags & VAR_ARGS)),
2394 paren_context, args, &count);
2395 if (argc < macro->paramc)
2397 total += count;
2398 args->ends[argc] = total;
2400 argc++;
2402 while (token->type != CPP_CLOSE_PAREN && token->type != CPP_EOF);
2404 if (token->type == CPP_EOF)
2406 cpp_error (pfile, "unterminated invocation of macro \"%.*s\"",
2407 hp->length, hp->name);
2408 return 1;
2410 else if (argc < macro->paramc)
2412 /* A rest argument is allowed to not appear in the invocation at all.
2413 e.g. #define debug(format, args...) ...
2414 debug("string");
2415 This is exactly the same as if the rest argument had received no
2416 tokens - debug("string",); This extension is deprecated. */
2418 if (argc + 1 == macro->paramc && (macro->flags & GNU_REST_ARGS))
2420 /* Duplicate the placemarker. Then we can set its flags and
2421 position and safely be using more than one. */
2422 save_token (args, duplicate_token (pfile, &placemarker_token));
2423 args->ends[argc] = total + 1;
2424 return 0;
2426 else
2428 cpp_error (pfile,
2429 "insufficient arguments in invocation of macro \"%.*s\"",
2430 hp->length, hp->name);
2431 return 1;
2434 /* An empty argument to an empty function-like macro is fine. */
2435 else if (argc > macro->paramc
2436 && !(macro->paramc == 0 && argc == 1 && empty_argument (args, 0)))
2438 cpp_error (pfile,
2439 "too many arguments in invocation of macro \"%.*s\"",
2440 hp->length, hp->name);
2441 return 1;
2444 return 0;
2447 /* Adds backslashes before all backslashes and double quotes appearing
2448 in strings. Non-printable characters are converted to octal. */
2449 static U_CHAR *
2450 quote_string (dest, src, len)
2451 U_CHAR *dest;
2452 const U_CHAR *src;
2453 unsigned int len;
2455 while (len--)
2457 U_CHAR c = *src++;
2459 if (c == '\\' || c == '"')
2461 *dest++ = '\\';
2462 *dest++ = c;
2464 else
2466 if (ISPRINT (c))
2467 *dest++ = c;
2468 else
2470 sprintf ((char *) dest, "\\%03o", c);
2471 dest += 4;
2476 return dest;
2479 /* Allocates a buffer to hold a token's TEXT, and converts TOKEN to a
2480 CPP_STRING token containing TEXT in quoted form. */
2481 static cpp_token *
2482 make_string_token (token, text, len)
2483 cpp_token *token;
2484 const U_CHAR *text;
2485 unsigned int len;
2487 U_CHAR *buf;
2489 buf = (U_CHAR *) xmalloc (len * 4);
2490 token->type = CPP_STRING;
2491 token->flags = 0;
2492 token->val.str.text = buf;
2493 token->val.str.len = quote_string (buf, text, len) - buf;
2494 return token;
2497 /* Allocates and converts a temporary token to a CPP_NUMBER token,
2498 evaluating to NUMBER. */
2499 static cpp_token *
2500 alloc_number_token (pfile, number)
2501 cpp_reader *pfile;
2502 int number;
2504 cpp_token *result;
2505 char *buf;
2507 result = get_temp_token (pfile);
2508 buf = xmalloc (20);
2509 sprintf (buf, "%d", number);
2511 result->type = CPP_NUMBER;
2512 result->flags = 0;
2513 result->val.str.text = (U_CHAR *) buf;
2514 result->val.str.len = strlen (buf);
2515 return result;
2518 /* Returns a temporary token from the temporary token store of PFILE. */
2519 static cpp_token *
2520 get_temp_token (pfile)
2521 cpp_reader *pfile;
2523 if (pfile->temp_used == pfile->temp_alloced)
2525 if (pfile->temp_used == pfile->temp_cap)
2527 pfile->temp_cap += pfile->temp_cap + 20;
2528 pfile->temp_tokens = (cpp_token **) xrealloc
2529 (pfile->temp_tokens, pfile->temp_cap * sizeof (cpp_token *));
2531 pfile->temp_tokens[pfile->temp_alloced++] = (cpp_token *) xmalloc
2532 (sizeof (cpp_token));
2535 return pfile->temp_tokens[pfile->temp_used++];
2538 /* Release (not free) for re-use the temporary tokens of PFILE. */
2539 static void
2540 release_temp_tokens (pfile)
2541 cpp_reader *pfile;
2543 while (pfile->temp_used)
2545 cpp_token *token = pfile->temp_tokens[--pfile->temp_used];
2547 if (token_spellings[token->type].type == SPELL_STRING)
2549 free ((char *) token->val.str.text);
2550 token->val.str.text = 0;
2555 /* Free all of PFILE's dynamically-allocated temporary tokens. */
2556 void
2557 _cpp_free_temp_tokens (pfile)
2558 cpp_reader *pfile;
2560 if (pfile->temp_tokens)
2562 /* It is possible, though unlikely (looking for '(' of a funlike
2563 macro into EOF), that we haven't released the tokens yet. */
2564 release_temp_tokens (pfile);
2565 while (pfile->temp_alloced)
2566 free (pfile->temp_tokens[--pfile->temp_alloced]);
2567 free (pfile->temp_tokens);
2570 if (pfile->date)
2572 free ((char *) pfile->date->val.str.text);
2573 free (pfile->date);
2574 free ((char *) pfile->time->val.str.text);
2575 free (pfile->time);
2579 /* Copy TOKEN into a temporary token from PFILE's store. */
2580 static cpp_token *
2581 duplicate_token (pfile, token)
2582 cpp_reader *pfile;
2583 const cpp_token *token;
2585 cpp_token *result = get_temp_token (pfile);
2587 *result = *token;
2588 if (token_spellings[token->type].type == SPELL_STRING)
2590 U_CHAR *buff = (U_CHAR *) xmalloc (token->val.str.len);
2591 memcpy (buff, token->val.str.text, token->val.str.len);
2592 result->val.str.text = buff;
2594 return result;
2597 /* Determine whether two tokens can be pasted together, and if so,
2598 what the resulting token is. Returns CPP_EOF if the tokens cannot
2599 be pasted, or the appropriate type for the merged token if they
2600 can. */
2601 static enum cpp_ttype
2602 can_paste (pfile, token1, token2, digraph)
2603 cpp_reader * pfile;
2604 const cpp_token *token1, *token2;
2605 int* digraph;
2607 enum cpp_ttype a = token1->type, b = token2->type;
2608 int cxx = CPP_OPTION (pfile, cplusplus);
2610 if (a <= CPP_LAST_EQ && b == CPP_EQ)
2611 return a + (CPP_EQ_EQ - CPP_EQ);
2613 switch (a)
2615 case CPP_GREATER:
2616 if (b == a) return CPP_RSHIFT;
2617 if (b == CPP_QUERY && cxx) return CPP_MAX;
2618 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
2619 break;
2620 case CPP_LESS:
2621 if (b == a) return CPP_LSHIFT;
2622 if (b == CPP_QUERY && cxx) return CPP_MIN;
2623 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
2624 if (CPP_OPTION (pfile, digraphs))
2626 if (b == CPP_COLON)
2627 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
2628 if (b == CPP_MOD)
2629 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
2631 break;
2633 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
2634 case CPP_AND: if (b == a) return CPP_AND_AND; break;
2635 case CPP_OR: if (b == a) return CPP_OR_OR; break;
2637 case CPP_MINUS:
2638 if (b == a) return CPP_MINUS_MINUS;
2639 if (b == CPP_GREATER) return CPP_DEREF;
2640 break;
2641 case CPP_COLON:
2642 if (b == a && cxx) return CPP_SCOPE;
2643 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
2644 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
2645 break;
2647 case CPP_MOD:
2648 if (CPP_OPTION (pfile, digraphs))
2650 if (b == CPP_GREATER)
2651 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
2652 if (b == CPP_COLON)
2653 {*digraph = 1; return CPP_HASH;} /* %: digraph */
2655 break;
2656 case CPP_DEREF:
2657 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
2658 break;
2659 case CPP_DOT:
2660 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
2661 if (b == CPP_NUMBER) return CPP_NUMBER;
2662 break;
2664 case CPP_HASH:
2665 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
2666 /* %:%: digraph */
2667 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
2668 break;
2670 case CPP_NAME:
2671 if (b == CPP_NAME) return CPP_NAME;
2672 if (b == CPP_NUMBER
2673 && is_numstart(token2->val.str.text[0])) return CPP_NAME;
2674 if (b == CPP_CHAR
2675 && token1->val.node == pfile->spec_nodes->n_L) return CPP_WCHAR;
2676 if (b == CPP_STRING
2677 && token1->val.node == pfile->spec_nodes->n_L) return CPP_WSTRING;
2678 break;
2680 case CPP_NUMBER:
2681 if (b == CPP_NUMBER) return CPP_NUMBER;
2682 if (b == CPP_NAME) return CPP_NUMBER;
2683 if (b == CPP_DOT) return CPP_NUMBER;
2684 /* Numbers cannot have length zero, so this is safe. */
2685 if ((b == CPP_PLUS || b == CPP_MINUS)
2686 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
2687 return CPP_NUMBER;
2688 break;
2690 default:
2691 break;
2694 return CPP_EOF;
2697 /* Check if TOKEN is to be ##-pasted with the token after it. */
2698 static const cpp_token *
2699 maybe_paste_with_next (pfile, token)
2700 cpp_reader *pfile;
2701 const cpp_token *token;
2703 cpp_token *pasted;
2704 const cpp_token *second;
2705 cpp_context *context = CURRENT_CONTEXT (pfile);
2707 /* Is this token on the LHS of ## ? */
2709 while ((token->flags & PASTE_LEFT)
2710 || ((context->flags & CONTEXT_PASTEL)
2711 && context->posn == context->count))
2713 /* Suppress macro expansion for next token, but don't conflict
2714 with the other method of suppression. If it is an argument,
2715 macro expansion within the argument will still occur. */
2716 pfile->paste_level = pfile->cur_context;
2717 second = _cpp_get_token (pfile);
2718 pfile->paste_level = 0;
2720 /* Ignore placemarker argument tokens (cannot be from an empty
2721 macro since macros are not expanded). */
2722 if (token->type == CPP_PLACEMARKER)
2723 pasted = duplicate_token (pfile, second);
2724 else if (second->type == CPP_PLACEMARKER)
2726 cpp_context *mac_context = CURRENT_CONTEXT (pfile) - 1;
2727 /* GCC has special extended semantics for a ## b where b is
2728 a varargs parameter: a disappears if b consists of no
2729 tokens. This extension is deprecated. */
2730 if ((mac_context->u.list->flags & GNU_REST_ARGS)
2731 && (mac_context->u.list->tokens[mac_context->posn-1].val.aux + 1
2732 == (unsigned) mac_context->u.list->paramc))
2734 cpp_warning (pfile, "deprecated GNU ## extension used");
2735 pasted = duplicate_token (pfile, second);
2737 else
2738 pasted = duplicate_token (pfile, token);
2740 else
2742 int digraph = 0;
2743 enum cpp_ttype type = can_paste (pfile, token, second, &digraph);
2745 if (type == CPP_EOF)
2747 if (CPP_OPTION (pfile, warn_paste))
2748 cpp_warning (pfile,
2749 "pasting would not give a valid preprocessing token");
2750 _cpp_push_token (pfile, second);
2751 return token;
2754 if (type == CPP_NAME || type == CPP_NUMBER)
2756 /* Join spellings. */
2757 U_CHAR *buf, *end;
2759 pasted = get_temp_token (pfile);
2760 buf = (U_CHAR *) alloca (TOKEN_LEN (token) + TOKEN_LEN (second));
2761 end = spell_token (pfile, token, buf);
2762 end = spell_token (pfile, second, end);
2763 *end = '\0';
2765 if (type == CPP_NAME)
2766 pasted->val.node = cpp_lookup (pfile, buf, end - buf);
2767 else
2769 pasted->val.str.text = uxstrdup (buf);
2770 pasted->val.str.len = end - buf;
2773 else if (type == CPP_WCHAR || type == CPP_WSTRING)
2774 pasted = duplicate_token (pfile, second);
2775 else
2777 pasted = get_temp_token (pfile);
2778 pasted->val.integer = 0;
2781 pasted->type = type;
2782 pasted->flags = digraph ? DIGRAPH : 0;
2785 /* The pasted token gets the whitespace flags and position of the
2786 first token, the PASTE_LEFT flag of the second token, plus the
2787 PASTED flag to indicate it is the result of a paste. However, we
2788 want to preserve the DIGRAPH flag. */
2789 pasted->flags &= ~(PREV_WHITE | BOL | PASTE_LEFT);
2790 pasted->flags |= ((token->flags & (PREV_WHITE | BOL))
2791 | (second->flags & PASTE_LEFT) | PASTED);
2792 pasted->col = token->col;
2793 pasted->line = token->line;
2795 /* See if there is another token to be pasted onto the one we just
2796 constructed. */
2797 token = pasted;
2798 context = CURRENT_CONTEXT (pfile);
2799 /* and loop */
2801 return token;
2804 /* Convert a token sequence to a single string token according to the
2805 rules of the ISO C #-operator. */
2806 #define INIT_SIZE 200
2807 static cpp_token *
2808 stringify_arg (pfile, token)
2809 cpp_reader *pfile;
2810 const cpp_token *token;
2812 cpp_token *result;
2813 unsigned char *main_buf;
2814 unsigned int prev_value, backslash_count = 0;
2815 unsigned int buf_used = 0, whitespace = 0, buf_cap = INIT_SIZE;
2817 push_arg_context (pfile, token);
2818 prev_value = prevent_macro_expansion (pfile);
2819 main_buf = (unsigned char *) xmalloc (buf_cap);
2821 result = get_temp_token (pfile);
2822 ASSIGN_FLAGS_AND_POS (result, token);
2824 for (; (token = _cpp_get_token (pfile))->type != CPP_EOF; )
2826 int escape;
2827 unsigned char *buf;
2828 unsigned int len = TOKEN_LEN (token);
2830 escape = (token->type == CPP_STRING || token->type == CPP_WSTRING
2831 || token->type == CPP_CHAR || token->type == CPP_WCHAR);
2832 if (escape)
2833 len *= 4 + 1;
2835 if (buf_used + len > buf_cap)
2837 buf_cap = buf_used + len + INIT_SIZE;
2838 main_buf = xrealloc (main_buf, buf_cap);
2841 if (whitespace && (token->flags & PREV_WHITE))
2842 main_buf[buf_used++] = ' ';
2844 if (escape)
2845 buf = (unsigned char *) xmalloc (len);
2846 else
2847 buf = main_buf + buf_used;
2849 len = spell_token (pfile, token, buf) - buf;
2850 if (escape)
2852 buf_used = quote_string (&main_buf[buf_used], buf, len) - main_buf;
2853 free (buf);
2855 else
2856 buf_used += len;
2858 whitespace = 1;
2859 if (token->type == CPP_BACKSLASH)
2860 backslash_count++;
2861 else
2862 backslash_count = 0;
2865 /* Ignore the final \ of invalid string literals. */
2866 if (backslash_count & 1)
2868 cpp_warning (pfile, "invalid string literal, ignoring final '\\'");
2869 buf_used--;
2872 result->type = CPP_STRING;
2873 result->val.str.text = main_buf;
2874 result->val.str.len = buf_used;
2875 restore_macro_expansion (pfile, prev_value);
2876 return result;
2879 /* Allocate more room on the context stack of PFILE. */
2880 static void
2881 expand_context_stack (pfile)
2882 cpp_reader *pfile;
2884 pfile->context_cap += pfile->context_cap + 20;
2885 pfile->contexts = (cpp_context *)
2886 xrealloc (pfile->contexts, pfile->context_cap * sizeof (cpp_context));
2889 /* Push the context of macro NODE onto the context stack. TOKEN is
2890 the CPP_NAME token invoking the macro. */
2891 static int
2892 push_macro_context (pfile, token)
2893 cpp_reader *pfile;
2894 const cpp_token *token;
2896 unsigned char orig_flags;
2897 macro_args *args;
2898 cpp_context *context;
2899 cpp_hashnode *node = token->val.node;
2901 /* Token's flags may change when parsing args containing a nested
2902 invocation of this macro. */
2903 orig_flags = token->flags & (PREV_WHITE | BOL);
2904 args = 0;
2905 if (node->value.expansion->paramc >= 0)
2907 unsigned int error, prev_nme;
2909 /* Allocate room for the argument contexts, and parse them. */
2910 args = (macro_args *) xmalloc (sizeof (macro_args));
2911 args->ends = (unsigned int *)
2912 xmalloc (node->value.expansion->paramc * sizeof (unsigned int));
2913 args->tokens = 0;
2914 args->capacity = 0;
2915 args->used = 0;
2916 args->level = pfile->cur_context;
2918 prev_nme = prevent_macro_expansion (pfile);
2919 pfile->args = args;
2920 error = parse_args (pfile, node, args);
2921 pfile->args = 0;
2922 restore_macro_expansion (pfile, prev_nme);
2923 if (error)
2925 free_macro_args (args);
2926 return 1;
2930 /* Now push its context. */
2931 pfile->cur_context++;
2932 if (pfile->cur_context == pfile->context_cap)
2933 expand_context_stack (pfile);
2935 context = CURRENT_CONTEXT (pfile);
2936 context->u.list = node->value.expansion;
2937 context->args = args;
2938 context->posn = 0;
2939 context->count = context->u.list->tokens_used;
2940 context->level = pfile->cur_context;
2941 context->flags = 0;
2942 context->pushed_token = 0;
2944 /* Set the flags of the first token. We know there must
2945 be one, empty macros are a single placemarker token. */
2946 MODIFY_FLAGS_AND_POS (&context->u.list->tokens[0], token, orig_flags);
2948 return 0;
2951 /* Push an argument to the current macro onto the context stack.
2952 TOKEN is the MACRO_ARG token representing the argument expansion. */
2953 static void
2954 push_arg_context (pfile, token)
2955 cpp_reader *pfile;
2956 const cpp_token *token;
2958 cpp_context *context;
2959 macro_args *args;
2961 pfile->cur_context++;
2962 if (pfile->cur_context == pfile->context_cap)
2963 expand_context_stack (pfile);
2965 context = CURRENT_CONTEXT (pfile);
2966 args = context[-1].args;
2968 context->count = token->val.aux ? args->ends[token->val.aux - 1]: 0;
2969 context->u.arg = args->tokens + context->count;
2970 context->count = args->ends[token->val.aux] - context->count;
2971 context->args = 0;
2972 context->posn = 0;
2973 context->level = args->level;
2974 context->flags = CONTEXT_ARG | CONTEXT_RAW;
2975 context->pushed_token = 0;
2977 /* Set the flags of the first token. There is one. */
2979 const cpp_token *first = context->u.arg[0];
2980 if (!first)
2981 first = context->u.arg[1];
2983 MODIFY_FLAGS_AND_POS ((cpp_token *) first, token,
2984 token->flags & (PREV_WHITE | BOL));
2987 if (token->flags & PASTE_LEFT)
2988 context->flags |= CONTEXT_PASTEL;
2989 if (pfile->paste_level)
2990 context->flags |= CONTEXT_PASTER;
2993 /* "Unget" a token. It is effectively inserted in the token queue and
2994 will be returned by the next call to get_raw_token. */
2995 void
2996 _cpp_push_token (pfile, token)
2997 cpp_reader *pfile;
2998 const cpp_token *token;
3000 cpp_context *context = CURRENT_CONTEXT (pfile);
3001 if (context->pushed_token)
3002 cpp_ice (pfile, "two tokens pushed in a row");
3003 if (token->type != CPP_EOF)
3004 context->pushed_token = token;
3005 /* Don't push back a directive's CPP_EOF, step back instead. */
3006 else if (pfile->cur_context == 0)
3007 pfile->contexts[0].posn--;
3010 /* Handle a preprocessing directive. TOKEN is the CPP_HASH token
3011 introducing the directive. */
3012 static void
3013 process_directive (pfile, token)
3014 cpp_reader *pfile;
3015 const cpp_token *token;
3017 const struct directive *d = pfile->token_list.directive;
3018 int prev_nme = 0;
3020 /* Skip over the directive name. */
3021 if (token[1].type == CPP_NAME)
3022 _cpp_get_raw_token (pfile);
3023 else if (token[1].type != CPP_NUMBER)
3024 cpp_ice (pfile, "directive begins with %s?!",
3025 token_names[token[1].type]);
3027 /* Flush pending tokens at this point, in case the directive produces
3028 output. XXX Directive output won't be visible to a direct caller of
3029 cpp_get_token. */
3030 if (pfile->printer && CPP_WRITTEN (pfile) - pfile->printer->written)
3031 cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
3033 if (! (d->flags & EXPAND))
3034 prev_nme = prevent_macro_expansion (pfile);
3035 (void) (*d->handler) (pfile);
3036 if (! (d->flags & EXPAND))
3037 restore_macro_expansion (pfile, prev_nme);
3038 _cpp_skip_rest_of_line (pfile);
3041 /* The external interface to return the next token. All macro
3042 expansion and directive processing is handled internally, the
3043 caller only ever sees the output after preprocessing. */
3044 const cpp_token *
3045 cpp_get_token (pfile)
3046 cpp_reader *pfile;
3048 const cpp_token *token;
3049 /* Loop till we hit a non-directive, non-placemarker token. */
3050 for (;;)
3052 token = _cpp_get_token (pfile);
3054 if (token->type == CPP_PLACEMARKER)
3055 continue;
3057 if (token->type == CPP_HASH && token->flags & BOL
3058 && pfile->token_list.directive)
3060 process_directive (pfile, token);
3061 continue;
3064 return token;
3068 /* The internal interface to return the next token. There are two
3069 differences between the internal and external interfaces: the
3070 internal interface may return a PLACEMARKER token, and it does not
3071 process directives. */
3072 const cpp_token *
3073 _cpp_get_token (pfile)
3074 cpp_reader *pfile;
3076 const cpp_token *token;
3077 cpp_hashnode *node;
3079 /* Loop until we hit a non-macro token. */
3080 for (;;)
3082 token = get_raw_token (pfile);
3084 /* Short circuit EOF. */
3085 if (token->type == CPP_EOF)
3086 return token;
3088 /* If we are skipping... */
3089 if (pfile->skipping)
3091 /* we still have to process directives, */
3092 if (pfile->token_list.directive)
3093 return token;
3095 /* but everything else is ignored. */
3096 _cpp_skip_rest_of_line (pfile);
3097 continue;
3100 /* If there's a potential control macro and we get here, then that
3101 #ifndef didn't cover the entire file and its argument shouldn't
3102 be taken as a control macro. */
3103 pfile->potential_control_macro = 0;
3105 /* See if there's a token to paste with this one. */
3106 if (!pfile->paste_level)
3107 token = maybe_paste_with_next (pfile, token);
3109 /* If it isn't a macro, return it now. */
3110 if (token->type != CPP_NAME
3111 || token->val.node->type == T_VOID)
3112 return token;
3114 /* Is macro expansion disabled in general? */
3115 if (pfile->no_expand_level == pfile->cur_context || pfile->paste_level)
3116 return token;
3118 node = token->val.node;
3119 if (node->type != T_MACRO)
3120 return special_symbol (pfile, node, token);
3122 if (is_macro_disabled (pfile, node->value.expansion, token))
3123 return token;
3125 if (pfile->cur_context > CPP_STACK_MAX)
3127 cpp_error (pfile, "macros nested too deep invoking '%s'", node->name);
3128 return token;
3131 if (push_macro_context (pfile, token))
3132 return token;
3133 /* else loop */
3137 /* Returns the next raw token, i.e. without performing macro
3138 expansion. Argument contexts are automatically entered. */
3139 static const cpp_token *
3140 get_raw_token (pfile)
3141 cpp_reader *pfile;
3143 const cpp_token *result;
3144 cpp_context *context;
3146 for (;;)
3148 context = CURRENT_CONTEXT (pfile);
3149 if (context->pushed_token)
3151 result = context->pushed_token;
3152 context->pushed_token = 0;
3154 else if (context->posn == context->count)
3156 if (pop_context (pfile))
3157 return &eof_token;
3158 continue;
3160 else
3162 if (IS_ARG_CONTEXT (context))
3164 result = context->u.arg[context->posn++];
3165 if (result == 0)
3167 context->flags ^= CONTEXT_RAW;
3168 result = context->u.arg[context->posn++];
3170 return result; /* Cannot be a CPP_MACRO_ARG */
3172 result = &context->u.list->tokens[context->posn++];
3175 if (result->type != CPP_MACRO_ARG)
3176 return result;
3178 if (result->flags & STRINGIFY_ARG)
3179 return stringify_arg (pfile, result);
3181 push_arg_context (pfile, result);
3185 /* Internal interface to get the token without macro expanding. */
3186 const cpp_token *
3187 _cpp_get_raw_token (pfile)
3188 cpp_reader *pfile;
3190 int prev_nme = prevent_macro_expansion (pfile);
3191 const cpp_token *result = _cpp_get_token (pfile);
3192 restore_macro_expansion (pfile, prev_nme);
3193 return result;
3196 /* A thin wrapper to lex_line. CLEAR is non-zero if the current token
3197 list should be overwritten, or zero if we need to append
3198 (typically, if we are within the arguments to a macro, or looking
3199 for the '(' to start a function-like macro invocation). */
3200 static int
3201 lex_next (pfile, clear)
3202 cpp_reader *pfile;
3203 int clear;
3205 cpp_toklist *list = &pfile->token_list;
3206 const cpp_token *old_list = list->tokens;
3207 unsigned int old_used = list->tokens_used;
3209 if (clear)
3211 /* Release all temporary tokens. */
3212 _cpp_clear_toklist (list);
3213 pfile->contexts[0].posn = 0;
3214 if (pfile->temp_used)
3215 release_temp_tokens (pfile);
3218 lex_line (pfile, list);
3219 pfile->contexts[0].count = list->tokens_used;
3221 if (!clear && pfile->args)
3223 /* Fix up argument token pointers. */
3224 if (old_list != list->tokens)
3226 unsigned int i;
3228 for (i = 0; i < pfile->args->used; i++)
3230 const cpp_token *token = pfile->args->tokens[i];
3231 if (token >= old_list && token < old_list + old_used)
3232 pfile->args->tokens[i] = (const cpp_token *)
3233 ((char *) token + ((char *) list->tokens - (char *) old_list));
3237 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
3238 tokens within the list of arguments that would otherwise act as
3239 preprocessing directives, the behavior is undefined.
3241 This implementation will report a hard error and treat the
3242 'sequence of preprocessing tokens' as part of the macro argument,
3243 not a directive.
3245 Note if pfile->args == 0, we're OK since we're only inside a
3246 macro argument after a '('. */
3247 if (list->directive)
3249 cpp_error_with_line (pfile, list->tokens[old_used].line,
3250 list->tokens[old_used].col,
3251 "#%s may not be used inside a macro argument",
3252 list->directive->name);
3253 return 1;
3257 return 0;
3260 /* Pops a context off the context stack. If we're at the bottom, lexes
3261 the next logical line. Returns EOF if we're at the end of the
3262 argument list to the # operator, or if it is illegal to "overflow"
3263 into the rest of the file (e.g. 6.10.3.1.1). */
3264 static int
3265 pop_context (pfile)
3266 cpp_reader *pfile;
3268 cpp_context *context;
3270 if (pfile->cur_context == 0)
3272 /* If we are currently processing a directive, do not advance. 6.10
3273 paragraph 2: A new-line character ends the directive even if it
3274 occurs within what would otherwise be an invocation of a
3275 function-like macro. */
3276 if (pfile->token_list.directive)
3277 return 1;
3279 return lex_next (pfile, pfile->no_expand_level == UINT_MAX);
3282 /* Argument contexts, when parsing args or handling # operator
3283 return CPP_EOF at the end. */
3284 context = CURRENT_CONTEXT (pfile);
3285 if (IS_ARG_CONTEXT (context) && pfile->cur_context == pfile->no_expand_level)
3286 return 1;
3288 /* Free resources when leaving macro contexts. */
3289 if (context->args)
3290 free_macro_args (context->args);
3292 if (pfile->cur_context == pfile->no_expand_level)
3293 pfile->no_expand_level--;
3294 pfile->cur_context--;
3296 return 0;
3299 /* Turn off macro expansion at the current context level. */
3300 static unsigned int
3301 prevent_macro_expansion (pfile)
3302 cpp_reader *pfile;
3304 unsigned int prev_value = pfile->no_expand_level;
3305 pfile->no_expand_level = pfile->cur_context;
3306 return prev_value;
3309 /* Restore macro expansion to its previous state. */
3310 static void
3311 restore_macro_expansion (pfile, prev_value)
3312 cpp_reader *pfile;
3313 unsigned int prev_value;
3315 pfile->no_expand_level = prev_value;
3318 /* Used by cpperror.c to obtain the correct line and column to report
3319 in a diagnostic. */
3320 unsigned int
3321 _cpp_get_line (pfile, pcol)
3322 cpp_reader *pfile;
3323 unsigned int *pcol;
3325 unsigned int index;
3326 const cpp_token *cur_token;
3328 if (pfile->in_lex_line)
3329 index = pfile->token_list.tokens_used;
3330 else
3331 index = pfile->contexts[0].posn;
3333 cur_token = &pfile->token_list.tokens[index - 1];
3334 if (pcol)
3335 *pcol = cur_token->col;
3336 return cur_token->line;
3339 #define DSC(str) (const U_CHAR *)str, sizeof str - 1
3340 static const char * const monthnames[] =
3342 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
3343 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
3346 /* Handle builtin macros like __FILE__. */
3347 static const cpp_token *
3348 special_symbol (pfile, node, token)
3349 cpp_reader *pfile;
3350 cpp_hashnode *node;
3351 const cpp_token *token;
3353 cpp_token *result;
3354 cpp_buffer *ip;
3356 switch (node->type)
3358 case T_FILE:
3359 case T_BASE_FILE:
3361 const char *file;
3363 ip = CPP_BUFFER (pfile);
3364 if (ip == 0)
3365 file = "";
3366 else
3368 if (node->type == T_BASE_FILE)
3369 while (CPP_PREV_BUFFER (ip) != NULL)
3370 ip = CPP_PREV_BUFFER (ip);
3372 file = ip->nominal_fname;
3374 result = make_string_token (get_temp_token (pfile), (U_CHAR *) file,
3375 strlen (file));
3377 break;
3379 case T_INCLUDE_LEVEL:
3380 /* pfile->include_depth counts the primary source as level 1,
3381 but historically __INCLUDE_DEPTH__ has called the primary
3382 source level 0. */
3383 result = alloc_number_token (pfile, pfile->include_depth - 1);
3384 break;
3386 case T_SPECLINE:
3387 /* If __LINE__ is embedded in a macro, it must expand to the
3388 line of the macro's invocation, not its definition.
3389 Otherwise things like assert() will not work properly. */
3390 result = alloc_number_token (pfile, _cpp_get_line (pfile, NULL));
3391 break;
3393 case T_STDC:
3395 int stdc = 1;
3397 #ifdef STDC_0_IN_SYSTEM_HEADERS
3398 if (CPP_IN_SYSTEM_HEADER (pfile)
3399 && pfile->spec_nodes->n__STRICT_ANSI__->type == T_VOID)
3400 stdc = 0;
3401 #endif
3402 result = alloc_number_token (pfile, stdc);
3404 break;
3406 case T_DATE:
3407 case T_TIME:
3408 if (pfile->date == 0)
3410 /* Allocate __DATE__ and __TIME__ from permanent storage,
3411 and save them in pfile so we don't have to do this again.
3412 We don't generate these strings at init time because
3413 time() and localtime() are very slow on some systems. */
3414 time_t tt = time (NULL);
3415 struct tm *tb = localtime (&tt);
3417 pfile->date = make_string_token
3418 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("Oct 11 1347"));
3419 pfile->time = make_string_token
3420 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("12:34:56"));
3422 sprintf ((char *) pfile->date->val.str.text, "%s %2d %4d",
3423 monthnames[tb->tm_mon], tb->tm_mday, tb->tm_year + 1900);
3424 sprintf ((char *) pfile->time->val.str.text, "%02d:%02d:%02d",
3425 tb->tm_hour, tb->tm_min, tb->tm_sec);
3427 result = node->type == T_DATE ? pfile->date: pfile->time;
3428 break;
3430 case T_POISON:
3431 cpp_error (pfile, "attempt to use poisoned \"%s\"", node->name);
3432 return token;
3434 default:
3435 cpp_ice (pfile, "invalid special hash type");
3436 return token;
3439 ASSIGN_FLAGS_AND_POS (result, token);
3440 return result;
3442 #undef DSC
3444 /* Dump the original user's spelling of argument index ARG_NO to the
3445 macro whose expansion is LIST. */
3446 static void
3447 dump_param_spelling (pfile, list, arg_no)
3448 cpp_reader *pfile;
3449 const cpp_toklist *list;
3450 unsigned int arg_no;
3452 const U_CHAR *param = list->namebuf;
3454 while (arg_no--)
3455 param += ustrlen (param) + 1;
3456 CPP_PUTS (pfile, param, ustrlen (param));
3459 /* Dump a token list to the output. */
3460 void
3461 _cpp_dump_list (pfile, list, token, flush)
3462 cpp_reader *pfile;
3463 const cpp_toklist *list;
3464 const cpp_token *token;
3465 int flush;
3467 const cpp_token *limit = list->tokens + list->tokens_used;
3468 const cpp_token *prev = 0;
3470 /* Avoid the CPP_EOF. */
3471 if (list->directive)
3472 limit--;
3474 while (token < limit)
3476 if (token->type == CPP_MACRO_ARG)
3478 if (token->flags & PREV_WHITE)
3479 CPP_PUTC (pfile, ' ');
3480 if (token->flags & STRINGIFY_ARG)
3481 CPP_PUTC (pfile, '#');
3482 dump_param_spelling (pfile, list, token->val.aux);
3484 else
3485 output_token (pfile, token, prev);
3486 if (token->flags & PASTE_LEFT)
3487 CPP_PUTS (pfile, " ##", 3);
3488 prev = token;
3489 token++;
3492 if (flush && pfile->printer)
3493 cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
3496 /* Allocate pfile->input_buffer, and initialize trigraph_map[]
3497 if it hasn't happened already. */
3499 void
3500 _cpp_init_input_buffer (pfile)
3501 cpp_reader *pfile;
3503 cpp_context *base;
3505 init_trigraph_map ();
3506 _cpp_init_toklist (&pfile->token_list, DUMMY_TOKEN);
3507 pfile->no_expand_level = UINT_MAX;
3508 pfile->context_cap = 20;
3509 pfile->cur_context = 0;
3511 pfile->contexts = (cpp_context *)
3512 xmalloc (pfile->context_cap * sizeof (cpp_context));
3514 /* Clear the base context. */
3515 base = &pfile->contexts[0];
3516 base->u.list = &pfile->token_list;
3517 base->posn = 0;
3518 base->count = 0;
3519 base->args = 0;
3520 base->level = 0;
3521 base->flags = 0;
3522 base->pushed_token = 0;
3525 /* Moves to the end of the directive line, popping contexts as
3526 necessary. */
3527 void
3528 _cpp_skip_rest_of_line (pfile)
3529 cpp_reader *pfile;
3531 /* Discard all stacked contexts. */
3532 int i;
3533 for (i = pfile->cur_context; i > 0; i--)
3534 if (pfile->contexts[i].args)
3535 free_macro_args (pfile->contexts[i].args);
3537 if (pfile->no_expand_level <= pfile->cur_context)
3538 pfile->no_expand_level = 0;
3539 pfile->cur_context = 0;
3541 /* Clear the base context, and clear the directive pointer so that
3542 get_raw_token will advance to the next line. */
3543 pfile->contexts[0].count = 0;
3544 pfile->contexts[0].posn = 0;
3545 pfile->token_list.directive = 0;
3548 /* Directive handler wrapper used by the command line option
3549 processor. */
3550 void
3551 _cpp_run_directive (pfile, dir, buf, count)
3552 cpp_reader *pfile;
3553 const struct directive *dir;
3554 const char *buf;
3555 size_t count;
3557 if (cpp_push_buffer (pfile, (const U_CHAR *)buf, count) != NULL)
3559 unsigned int prev_lvl = 0;
3561 /* Scan the line now, else prevent_macro_expansion won't work. */
3562 lex_next (pfile, 1);
3563 if (! (dir->flags & EXPAND))
3564 prev_lvl = prevent_macro_expansion (pfile);
3566 (void) (*dir->handler) (pfile);
3568 if (! (dir->flags & EXPAND))
3569 restore_macro_expansion (pfile, prev_lvl);
3571 _cpp_skip_rest_of_line (pfile);
3572 cpp_pop_buffer (pfile);