2000-02-17 Zack Weinberg <zack@wolery.cumb.org>
[official-gcc.git] / gcc / cpplex.c
bloba41e4eea6edffa45d519a2a7b666f9cb451b7462
1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
12 later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
25 Cleanups to do:-
27 o -dM and with _cpp_dump_list: too many \n output.
28 o Put a printer object in cpp_reader?
29 o Check line numbers assigned to all errors.
30 o Replace strncmp with memcmp almost everywhere.
31 o lex_line's use of cur_token, flags and list->token_used is a bit opaque.
32 o Convert do_ functions to return void. Kaveh thinks its OK; and said he'll
33 give it a run when we've got some code.
34 o Distinguish integers, floats, and 'other' pp-numbers.
35 o Store ints and char constants as binary values.
36 o New command-line assertion syntax.
37 o Work towards functions in cpperror.c taking a message level parameter.
38 If we do this, merge the common code of do_warning and do_error.
39 o Comment all functions, and describe macro expansion algorithm.
40 o Move as much out of header files as possible.
41 o Remove single quote pairs `', and some '', from diagnostics.
42 o Correct pastability test for CPP_NAME and CPP_NUMBER.
46 #include "config.h"
47 #include "system.h"
48 #include "intl.h"
49 #include "cpplib.h"
50 #include "cpphash.h"
51 #include "symcat.h"
53 static const cpp_token placemarker_token = {0, 0, CPP_PLACEMARKER, 0 UNION_INIT_ZERO};
54 static const cpp_token eof_token = {0, 0, CPP_EOF, 0 UNION_INIT_ZERO};
56 /* Flags for cpp_context. */
57 #define CONTEXT_PASTEL (1 << 0) /* An argument context on LHS of ##. */
58 #define CONTEXT_PASTER (1 << 1) /* An argument context on RHS of ##. */
59 #define CONTEXT_RAW (1 << 2) /* If argument tokens already expanded. */
60 #define CONTEXT_ARG (1 << 3) /* If an argument context. */
62 typedef struct cpp_context cpp_context;
63 struct cpp_context
65 union
67 const cpp_toklist *list; /* Used for macro contexts only. */
68 const cpp_token **arg; /* Used for arg contexts only. */
69 } u;
71 /* Pushed token to be returned by next call to get_raw_token. */
72 const cpp_token *pushed_token;
74 struct macro_args *args; /* The arguments for a function-like
75 macro. NULL otherwise. */
76 unsigned short posn; /* Current posn, index into u. */
77 unsigned short count; /* No. of tokens in u. */
78 unsigned short level;
79 unsigned char flags;
82 typedef struct macro_args macro_args;
83 struct macro_args
85 unsigned int *ends;
86 const cpp_token **tokens;
87 unsigned int capacity;
88 unsigned int used;
89 unsigned short level;
92 static const cpp_token *get_raw_token PARAMS ((cpp_reader *));
93 static const cpp_token *parse_arg PARAMS ((cpp_reader *, int, unsigned int,
94 macro_args *, unsigned int *));
95 static int parse_args PARAMS ((cpp_reader *, cpp_hashnode *, macro_args *));
96 static void save_token PARAMS ((macro_args *, const cpp_token *));
97 static int pop_context PARAMS ((cpp_reader *));
98 static int push_macro_context PARAMS ((cpp_reader *, const cpp_token *));
99 static void push_arg_context PARAMS ((cpp_reader *, const cpp_token *));
100 static void free_macro_args PARAMS ((macro_args *));
102 #define auto_expand_name_space(list) \
103 _cpp_expand_name_space ((list), 1 + (list)->name_cap / 2)
104 static void safe_fwrite PARAMS ((cpp_reader *, const U_CHAR *,
105 size_t, FILE *));
106 static void dump_param_spelling PARAMS ((cpp_reader *, const cpp_toklist *,
107 unsigned int));
108 static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
109 unsigned int));
111 static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
112 static unsigned char *trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
113 unsigned char *));
114 static const unsigned char *backslash_start PARAMS ((cpp_reader *,
115 const unsigned char *));
116 static int skip_block_comment PARAMS ((cpp_reader *));
117 static int skip_line_comment PARAMS ((cpp_reader *));
118 static void adjust_column PARAMS ((cpp_reader *, const U_CHAR *));
119 static void skip_whitespace PARAMS ((cpp_reader *, int));
120 static const U_CHAR *parse_name PARAMS ((cpp_reader *, cpp_token *,
121 const U_CHAR *, const U_CHAR *));
122 static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_string *));
123 static void parse_string PARAMS ((cpp_reader *, cpp_toklist *, cpp_token *,
124 unsigned int));
125 static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
126 static void save_comment PARAMS ((cpp_toklist *, cpp_token *,
127 const unsigned char *,
128 unsigned int, unsigned int));
129 static void lex_line PARAMS ((cpp_reader *, cpp_toklist *));
130 static int lex_next PARAMS ((cpp_reader *, int));
131 static int is_macro_disabled PARAMS ((cpp_reader *, const cpp_toklist *,
132 const cpp_token *));
134 static cpp_token *stringify_arg PARAMS ((cpp_reader *, const cpp_token *));
135 static void expand_context_stack PARAMS ((cpp_reader *));
136 static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
137 unsigned char *));
138 static void output_token PARAMS ((cpp_reader *, const cpp_token *,
139 const cpp_token *));
140 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
141 cpp_token *));
142 static cpp_token *make_string_token PARAMS ((cpp_token *, const U_CHAR *,
143 unsigned int));
144 static cpp_token *alloc_number_token PARAMS ((cpp_reader *, int number));
145 static const cpp_token *special_symbol PARAMS ((cpp_reader *, cpp_hashnode *,
146 const cpp_token *));
147 static cpp_token *duplicate_token PARAMS ((cpp_reader *, const cpp_token *));
148 static const cpp_token *maybe_paste_with_next PARAMS ((cpp_reader *,
149 const cpp_token *));
150 static enum cpp_ttype can_paste PARAMS ((cpp_reader *, const cpp_token *,
151 const cpp_token *, int *));
152 static unsigned int prevent_macro_expansion PARAMS ((cpp_reader *));
153 static void restore_macro_expansion PARAMS ((cpp_reader *, unsigned int));
154 static cpp_token *get_temp_token PARAMS ((cpp_reader *));
155 static void release_temp_tokens PARAMS ((cpp_reader *));
156 static U_CHAR * quote_string PARAMS ((U_CHAR *, const U_CHAR *, unsigned int));
157 static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
159 #define INIT_TOKEN_STR(list, token) \
160 do {(token)->val.str.len = 0; \
161 (token)->val.str.text = (list)->namebuf + (list)->name_used; \
162 } while (0)
164 #define VALID_SIGN(c, prevc) \
165 (((c) == '+' || (c) == '-') && \
166 ((prevc) == 'e' || (prevc) == 'E' \
167 || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
169 /* Handle LF, CR, CR-LF and LF-CR style newlines. Assumes next
170 character, if any, is in buffer. */
172 #define handle_newline(cur, limit, c) \
173 do { \
174 if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
175 (cur)++; \
176 pfile->buffer->lineno++; \
177 pfile->buffer->line_base = (cur); \
178 pfile->col_adjust = 0; \
179 } while (0)
181 #define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITE))
182 #define PREV_TOKEN_TYPE (cur_token[-1].type)
184 #define PUSH_TOKEN(ttype) cur_token++->type = (ttype)
185 #define REVISE_TOKEN(ttype) cur_token[-1].type = (ttype)
186 #define BACKUP_TOKEN(ttype) (--cur_token)->type = (ttype)
187 #define BACKUP_DIGRAPH(ttype) do { \
188 BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
190 /* An upper bound on the number of bytes needed to spell a token,
191 including preceding whitespace. */
192 #define TOKEN_LEN(token) (5 + (TOKEN_SPELL(token) == SPELL_STRING \
193 ? (token)->val.str.len \
194 : (TOKEN_SPELL(token) == SPELL_IDENT \
195 ? (token)->val.node->length \
196 : 0)))
198 #define IS_ARG_CONTEXT(c) ((c)->flags & CONTEXT_ARG)
199 #define CURRENT_CONTEXT(pfile) ((pfile)->contexts + (pfile)->cur_context)
201 #define ASSIGN_FLAGS_AND_POS(d, s) \
202 do {(d)->flags = (s)->flags & (PREV_WHITE | BOL | PASTE_LEFT); \
203 if ((d)->flags & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
204 } while (0)
206 /* f is flags, just consisting of PREV_WHITE | BOL. */
207 #define MODIFY_FLAGS_AND_POS(d, s, f) \
208 do {(d)->flags &= ~(PREV_WHITE | BOL); (d)->flags |= (f); \
209 if ((f) & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
210 } while (0)
212 #define T(e, s) {SPELL_OPERATOR, (const U_CHAR *) s},
213 #define I(e, s) {SPELL_IDENT, s},
214 #define S(e, s) {SPELL_STRING, s},
215 #define C(e, s) {SPELL_CHAR, s},
216 #define N(e, s) {SPELL_NONE, s},
218 const struct token_spelling
219 token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
221 #undef T
222 #undef I
223 #undef S
224 #undef C
225 #undef N
227 /* For debugging: the internal names of the tokens. */
228 #define T(e, s) U STRINGX(e),
229 #define I(e, s) U STRINGX(e),
230 #define S(e, s) U STRINGX(e),
231 #define C(e, s) U STRINGX(e),
232 #define N(e, s) U STRINGX(e),
234 const U_CHAR *const token_names[N_TTYPES] = { TTYPE_TABLE };
236 #undef T
237 #undef I
238 #undef S
239 #undef C
240 #undef N
242 /* The following table is used by trigraph_ok/trigraph_replace. If we
243 have designated initializers, it can be constant data; otherwise,
244 it is set up at runtime by _cpp_init_input_buffer. */
246 #if (GCC_VERSION >= 2007)
247 #define init_trigraph_map() /* nothing */
248 #define TRIGRAPH_MAP \
249 __extension__ static const U_CHAR trigraph_map[UCHAR_MAX + 1] = {
250 #define END };
251 #define s(p, v) [p] = v,
252 #else
253 #define TRIGRAPH_MAP static U_CHAR trigraph_map[UCHAR_MAX + 1] = { 0 }; \
254 static void init_trigraph_map PARAMS ((void)) { \
255 unsigned char *x = trigraph_map;
256 #define END }
257 #define s(p, v) x[p] = v;
258 #endif
260 TRIGRAPH_MAP
261 s('=', '#') s(')', ']') s('!', '|')
262 s('(', '[') s('\'', '^') s('>', '}')
263 s('/', '\\') s('<', '{') s('-', '~')
266 #undef TRIGRAPH_MAP
267 #undef END
268 #undef s
270 /* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
272 void
273 _cpp_grow_token_buffer (pfile, n)
274 cpp_reader *pfile;
275 long n;
277 long old_written = CPP_WRITTEN (pfile);
278 pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
279 pfile->token_buffer = (U_CHAR *)
280 xrealloc(pfile->token_buffer, pfile->token_buffer_size);
281 CPP_SET_WRITTEN (pfile, old_written);
284 /* Deal with the annoying semantics of fwrite. */
285 static void
286 safe_fwrite (pfile, buf, len, fp)
287 cpp_reader *pfile;
288 const U_CHAR *buf;
289 size_t len;
290 FILE *fp;
292 size_t count;
294 while (len)
296 count = fwrite (buf, 1, len, fp);
297 if (count == 0)
298 goto error;
299 len -= count;
300 buf += count;
302 return;
304 error:
305 cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
308 /* Notify the compiler proper that the current line number has jumped,
309 or the current file name has changed. */
311 static void
312 output_line_command (pfile, print, line)
313 cpp_reader *pfile;
314 cpp_printer *print;
315 unsigned int line;
317 cpp_buffer *ip = CPP_BUFFER (pfile);
318 enum { same = 0, enter, leave, rname } change;
319 static const char * const codes[] = { "", " 1", " 2", "" };
321 if (line == 0)
322 return;
324 /* End the previous line of text. */
325 if (pfile->need_newline)
326 putc ('\n', print->outf);
327 pfile->need_newline = 0;
329 if (CPP_OPTION (pfile, no_line_commands))
330 return;
332 /* If ip is null, we've been called from cpp_finish, and they just
333 needed the final flush and trailing newline. */
334 if (!ip)
335 return;
337 if (pfile->include_depth == print->last_id)
339 /* Determine whether the current filename has changed, and if so,
340 how. 'nominal_fname' values are unique, so they can be compared
341 by comparing pointers. */
342 if (ip->nominal_fname == print->last_fname)
343 change = same;
344 else
345 change = rname;
347 else
349 if (pfile->include_depth > print->last_id)
350 change = enter;
351 else
352 change = leave;
353 print->last_id = pfile->include_depth;
355 print->last_fname = ip->nominal_fname;
357 /* If the current file has not changed, we can output a few newlines
358 instead if we want to increase the line number by a small amount.
359 We cannot do this if print->lineno is zero, because that means we
360 haven't output any line commands yet. (The very first line
361 command output is a `same_file' command.) */
362 if (change == same && print->lineno > 0
363 && line >= print->lineno && line < print->lineno + 8)
365 while (line > print->lineno)
367 putc ('\n', print->outf);
368 print->lineno++;
370 return;
373 #ifndef NO_IMPLICIT_EXTERN_C
374 if (CPP_OPTION (pfile, cplusplus))
375 fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
376 codes[change],
377 ip->inc->sysp ? " 3" : "",
378 (ip->inc->sysp == 2) ? " 4" : "");
379 else
380 #endif
381 fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
382 codes[change],
383 ip->inc->sysp ? " 3" : "");
384 print->lineno = line;
387 /* Write the contents of the token_buffer to the output stream, and
388 clear the token_buffer. Also handles generating line commands and
389 keeping track of file transitions. */
391 void
392 cpp_output_tokens (pfile, print, line)
393 cpp_reader *pfile;
394 cpp_printer *print;
395 unsigned int line;
397 if (CPP_WRITTEN (pfile) - print->written)
399 safe_fwrite (pfile, pfile->token_buffer,
400 CPP_WRITTEN (pfile) - print->written, print->outf);
401 pfile->need_newline = 1;
402 if (print->lineno)
403 print->lineno++;
405 CPP_SET_WRITTEN (pfile, print->written);
407 output_line_command (pfile, print, line);
410 /* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output. */
412 void
413 cpp_scan_buffer_nooutput (pfile)
414 cpp_reader *pfile;
416 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
417 const cpp_token *token;
419 /* In no-output mode, we can ignore everything but directives. */
420 for (;;)
422 token = _cpp_get_token (pfile);
424 if (token->type == CPP_EOF)
426 cpp_pop_buffer (pfile);
427 if (CPP_BUFFER (pfile) == stop)
428 break;
431 if (token->type == CPP_HASH && token->flags & BOL
432 && pfile->token_list.directive)
434 process_directive (pfile, token);
435 continue;
438 _cpp_skip_rest_of_line (pfile);
442 /* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT. */
443 void
444 cpp_scan_buffer (pfile, print)
445 cpp_reader *pfile;
446 cpp_printer *print;
448 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
449 const cpp_token *token, *prev = 0;
451 for (;;)
453 token = _cpp_get_token (pfile);
454 if (token->type == CPP_EOF)
456 cpp_pop_buffer (pfile);
457 if (CPP_BUFFER (pfile) == stop)
458 return;
460 cpp_output_tokens (pfile, print, CPP_BUF_LINE (CPP_BUFFER (pfile)));
461 prev = 0;
462 continue;
465 if (token->flags & BOL)
467 if (token->type == CPP_HASH && pfile->token_list.directive)
469 process_directive (pfile, token);
470 continue;
473 cpp_output_tokens (pfile, print, pfile->token_list.line);
474 prev = 0;
477 if (token->type != CPP_PLACEMARKER)
478 output_token (pfile, token, prev);
480 prev = token;
484 /* Scan a single line of the input into the token_buffer. */
486 cpp_scan_line (pfile)
487 cpp_reader *pfile;
489 const cpp_token *token, *prev = 0;
491 if (pfile->buffer == NULL)
492 return 0;
496 token = cpp_get_token (pfile);
497 if (token->type == CPP_EOF)
499 cpp_pop_buffer (pfile);
500 break;
503 /* If the last token on a line results from a macro expansion,
504 the check below will fail to stop us from proceeding to the
505 next line - so make sure we stick in a newline, at least. */
506 if (token->flags & BOL)
507 CPP_PUTC (pfile, '\n');
509 output_token (pfile, token, prev);
510 prev = token;
512 while (pfile->cur_context > 0
513 || pfile->contexts[0].posn < pfile->contexts[0].count);
514 return 1;
517 /* Helper routine used by parse_include, which can't see spell_token.
518 Reinterpret the current line as an h-char-sequence (< ... >); we are
519 looking at the first token after the <. */
520 const cpp_token *
521 _cpp_glue_header_name (pfile)
522 cpp_reader *pfile;
524 unsigned int written = CPP_WRITTEN (pfile);
525 const cpp_token *t;
526 cpp_token *hdr;
527 U_CHAR *buf;
528 size_t len;
530 for (;;)
532 t = _cpp_get_token (pfile);
533 if (t->type == CPP_GREATER || t->type == CPP_EOF)
534 break;
536 CPP_RESERVE (pfile, TOKEN_LEN (t));
537 if (t->flags & PREV_WHITE)
538 CPP_PUTC_Q (pfile, ' ');
539 pfile->limit = spell_token (pfile, t, pfile->limit);
542 if (t->type == CPP_EOF)
543 cpp_error (pfile, "missing terminating > character");
545 len = CPP_WRITTEN (pfile) - written;
546 buf = xmalloc (len);
547 memcpy (buf, pfile->token_buffer + written, len);
548 CPP_SET_WRITTEN (pfile, written);
550 hdr = get_temp_token (pfile);
551 hdr->type = CPP_HEADER_NAME;
552 hdr->flags = 0;
553 hdr->val.str.text = buf;
554 hdr->val.str.len = len;
555 return hdr;
558 /* Token-buffer helper functions. */
560 /* Expand a token list's string space. It is *vital* that
561 list->tokens_used is correct, to get pointer fix-up right. */
562 void
563 _cpp_expand_name_space (list, len)
564 cpp_toklist *list;
565 unsigned int len;
567 const U_CHAR *old_namebuf;
569 old_namebuf = list->namebuf;
570 list->name_cap += len;
571 list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
573 /* Fix up token text pointers. */
574 if (list->namebuf != old_namebuf)
576 unsigned int i;
578 for (i = 0; i < list->tokens_used; i++)
579 if (token_spellings[list->tokens[i].type].type == SPELL_STRING)
580 list->tokens[i].val.str.text += (list->namebuf - old_namebuf);
584 /* If there is not enough room for LEN more characters, expand the
585 list by just enough to have room for LEN characters. */
586 void
587 _cpp_reserve_name_space (list, len)
588 cpp_toklist *list;
589 unsigned int len;
591 unsigned int room = list->name_cap - list->name_used;
593 if (room < len)
594 _cpp_expand_name_space (list, len - room);
597 /* Expand the number of tokens in a list. */
598 void
599 _cpp_expand_token_space (list, count)
600 cpp_toklist *list;
601 unsigned int count;
603 unsigned int n;
605 list->tokens_cap += count;
606 n = list->tokens_cap;
607 if (list->flags & LIST_OFFSET)
608 list->tokens--, n++;
609 list->tokens = (cpp_token *)
610 xrealloc (list->tokens, n * sizeof (cpp_token));
611 if (list->flags & LIST_OFFSET)
612 list->tokens++; /* Skip the dummy. */
615 /* Initialize a token list. If flags is DUMMY_TOKEN, we allocate
616 an extra token in front of the token list, as this allows the lexer
617 to always peek at the previous token without worrying about
618 underflowing the list, and some initial space. Otherwise, no
619 token- or name-space is allocated, and there is no dummy token. */
620 void
621 _cpp_init_toklist (list, flags)
622 cpp_toklist *list;
623 int flags;
625 if (flags == NO_DUMMY_TOKEN)
627 list->tokens_cap = 0;
628 list->tokens = 0;
629 list->name_cap = 0;
630 list->namebuf = 0;
631 list->flags = 0;
633 else
635 /* Initialize token space. Put a dummy token before the start
636 that will fail matches. */
637 list->tokens_cap = 256; /* 4K's worth. */
638 list->tokens = (cpp_token *)
639 xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
640 list->tokens[0].type = CPP_EOF;
641 list->tokens++;
643 /* Initialize name space. */
644 list->name_cap = 1024;
645 list->namebuf = (unsigned char *) xmalloc (list->name_cap);
646 list->flags = LIST_OFFSET;
649 _cpp_clear_toklist (list);
652 /* Clear a token list. */
653 void
654 _cpp_clear_toklist (list)
655 cpp_toklist *list;
657 list->tokens_used = 0;
658 list->name_used = 0;
659 list->directive = 0;
660 list->paramc = 0;
661 list->params_len = 0;
662 list->flags &= LIST_OFFSET; /* clear all but that one */
665 /* Free a token list. Does not free the list itself, which may be
666 embedded in a larger structure. */
667 void
668 _cpp_free_toklist (list)
669 const cpp_toklist *list;
671 if (list->flags & LIST_OFFSET)
672 free (list->tokens - 1); /* Backup over dummy token. */
673 else
674 free (list->tokens);
675 free (list->namebuf);
678 /* Compare two tokens. */
680 _cpp_equiv_tokens (a, b)
681 const cpp_token *a, *b;
683 if (a->type == b->type && a->flags == b->flags)
684 switch (token_spellings[a->type].type)
686 default: /* Keep compiler happy. */
687 case SPELL_OPERATOR:
688 return 1;
689 case SPELL_CHAR:
690 case SPELL_NONE:
691 return a->val.aux == b->val.aux; /* arg_no or character. */
692 case SPELL_IDENT:
693 return a->val.node == b->val.node;
694 case SPELL_STRING:
695 return (a->val.str.len == b->val.str.len
696 && !memcmp (a->val.str.text, b->val.str.text,
697 a->val.str.len));
700 return 0;
703 /* Compare two token lists. */
705 _cpp_equiv_toklists (a, b)
706 const cpp_toklist *a, *b;
708 unsigned int i;
710 if (a->tokens_used != b->tokens_used
711 || a->flags != b->flags
712 || a->paramc != b->paramc)
713 return 0;
715 for (i = 0; i < a->tokens_used; i++)
716 if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
717 return 0;
718 return 1;
721 /* Utility routine:
723 Compares, the token TOKEN to the NUL-terminated string STRING.
724 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
727 cpp_ideq (token, string)
728 const cpp_token *token;
729 const char *string;
731 if (token->type != CPP_NAME)
732 return 0;
734 return !ustrcmp (token->val.node->name, (const U_CHAR *)string);
737 /* Lexing algorithm.
739 The original lexer in cpplib was made up of two passes: a first pass
740 that replaced trigraphs and deleted esacped newlines, and a second
741 pass that tokenized the result of the first pass. Tokenisation was
742 performed by peeking at the next character in the input stream. For
743 example, if the input stream contained "!=", the handler for the !
744 character would peek at the next character, and if it were a '='
745 would skip over it, and return a "!=" token, otherwise it would
746 return just the "!" token.
748 To implement a single-pass lexer, this peeking ahead is unworkable.
749 An arbitrary number of escaped newlines, and trigraphs (in particular
750 ??/ which translates to the escape \), could separate the '!' and '='
751 in the input stream, yet the next token is still a "!=".
753 Suppose instead that we lex by one logical line at a time, producing
754 a token list or stack for each logical line, and when seeing the '!'
755 push a CPP_NOT token on the list. Then if the '!' is part of a
756 longer token ("!=") we know we must see the remainder of the token by
757 the time we reach the end of the logical line. Thus we can have the
758 '=' handler look at the previous token (at the end of the list / top
759 of the stack) and see if it is a "!" token, and if so, instead of
760 pushing a "=" token revise the existing token to be a "!=" token.
762 This works in the presence of escaped newlines, because the '\' would
763 have been pushed on the top of the stack as a CPP_BACKSLASH. The
764 newline ('\n' or '\r') handler looks at the token at the top of the
765 stack to see if it is a CPP_BACKSLASH, and if so discards both.
766 Hence the '=' handler would never see any intervening tokens.
768 To make trigraphs work in this context, as in precedence trigraphs
769 are highest and converted before anything else, the '?' handler does
770 lookahead to see if it is a trigraph, and if so skips the trigraph
771 and pushes the token it represents onto the top of the stack. This
772 also works in the particular case of a CPP_BACKSLASH trigraph.
774 To the preprocessor, whitespace is only significant to the point of
775 knowing whether whitespace precedes a particular token. For example,
776 the '=' handler needs to know whether there was whitespace between it
777 and a "!" token on the top of the stack, to make the token conversion
778 decision correctly. So each token has a PREV_WHITE flag to
779 indicate this - the standard permits consecutive whitespace to be
780 regarded as a single space. The compiler front ends are not
781 interested in whitespace at all; they just require a token stream.
782 Another place where whitespace is significant to the preprocessor is
783 a #define statment - if there is whitespace between the macro name
784 and an initial "(" token the macro is "object-like", otherwise it is
785 a function-like macro that takes arguments.
787 However, all is not rosy. Parsing of identifiers, numbers, comments
788 and strings becomes trickier because of the possibility of raw
789 trigraphs and escaped newlines in the input stream.
791 The trigraphs are three consecutive characters beginning with two
792 question marks. A question mark is not valid as part of a number or
793 identifier, so parsing of a number or identifier terminates normally
794 upon reaching it, returning to the mainloop which handles the
795 trigraph just like it would in any other position. Similarly for the
796 backslash of a backslash-newline combination. So we just need the
797 escaped-newline dropper in the mainloop to check if the token on the
798 top of the stack after dropping the escaped newline is a number or
799 identifier, and if so to continue the processing it as if nothing had
800 happened.
802 For strings, we replace trigraphs whenever we reach a quote or
803 newline, because there might be a backslash trigraph escaping them.
804 We need to be careful that we start trigraph replacing from where we
805 left off previously, because it is possible for a first scan to leave
806 "fake" trigraphs that a second scan would pick up as real (e.g. the
807 sequence "????/\n=" would find a fake ??= trigraph after removing the
808 escaped newline.)
810 For line comments, on reaching a newline we scan the previous
811 character(s) to see if it escaped, and continue if it is. Block
812 comments ignore everything and just focus on finding the comment
813 termination mark. The only difficult thing, and it is surprisingly
814 tricky, is checking if an asterisk precedes the final slash since
815 they could be separated by escaped newlines. If the preprocessor is
816 invoked with the output comments option, we don't bother removing
817 escaped newlines and replacing trigraphs for output.
819 Finally, numbers can begin with a period, which is pushed initially
820 as a CPP_DOT token in its own right. The digit handler checks if the
821 previous token was a CPP_DOT not separated by whitespace, and if so
822 pops it off the stack and pushes a period into the number's buffer
823 before calling the number parser.
827 static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
828 U":>", U"<%", U"%>"};
830 /* Call when a trigraph is encountered. It warns if necessary, and
831 returns true if the trigraph should be honoured. END is the third
832 character of a trigraph in the input stream. */
833 static int
834 trigraph_ok (pfile, end)
835 cpp_reader *pfile;
836 const unsigned char *end;
838 int accept = CPP_OPTION (pfile, trigraphs);
840 if (CPP_OPTION (pfile, warn_trigraphs))
842 unsigned int col = end - 1 - pfile->buffer->line_base;
843 if (accept)
844 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
845 "trigraph ??%c converted to %c",
846 (int) *end, (int) trigraph_map[*end]);
847 else
848 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
849 "trigraph ??%c ignored", (int) *end);
851 return accept;
854 /* Scan a string for trigraphs, warning or replacing them inline as
855 appropriate. When parsing a string, we must call this routine
856 before processing a newline character (if trigraphs are enabled),
857 since the newline might be escaped by a preceding backslash
858 trigraph sequence. Returns a pointer to the end of the name after
859 replacement. */
861 static unsigned char *
862 trigraph_replace (pfile, src, limit)
863 cpp_reader *pfile;
864 unsigned char *src;
865 unsigned char *limit;
867 unsigned char *dest;
869 /* Starting with src[1], find two consecutive '?'. The case of no
870 trigraphs is streamlined. */
872 for (src++; src + 1 < limit; src += 2)
874 if (src[0] != '?')
875 continue;
877 /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s. */
878 if (src[-1] == '?')
879 src--;
880 else if (src + 2 == limit || src[1] != '?')
881 continue;
883 /* Check if it really is a trigraph. */
884 if (trigraph_map[src[2]] == 0)
885 continue;
887 dest = src;
888 goto trigraph_found;
890 return limit;
892 /* Now we have a trigraph, we need to scan the remaining buffer, and
893 copy-shifting its contents left if replacement is enabled. */
894 for (; src + 2 < limit; dest++, src++)
895 if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
897 trigraph_found:
898 src += 2;
899 if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
900 *dest = trigraph_map[*src];
903 /* Copy remaining (at most 2) characters. */
904 while (src < limit)
905 *dest++ = *src++;
906 return dest;
909 /* If CUR is a backslash or the end of a trigraphed backslash, return
910 a pointer to its beginning, otherwise NULL. We don't read beyond
911 the buffer start, because there is the start of the comment in the
912 buffer. */
913 static const unsigned char *
914 backslash_start (pfile, cur)
915 cpp_reader *pfile;
916 const unsigned char *cur;
918 if (cur[0] == '\\')
919 return cur;
920 if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
921 && trigraph_ok (pfile, cur))
922 return cur - 2;
923 return 0;
926 /* Skip a C-style block comment. This is probably the trickiest
927 handler. We find the end of the comment by seeing if an asterisk
928 is before every '/' we encounter. The nasty complication is that a
929 previous asterisk may be separated by one or more escaped newlines.
930 Returns non-zero if comment terminated by EOF, zero otherwise. */
931 static int
932 skip_block_comment (pfile)
933 cpp_reader *pfile;
935 cpp_buffer *buffer = pfile->buffer;
936 const unsigned char *char_after_star = 0;
937 const unsigned char *cur = buffer->cur;
939 for (; cur < buffer->rlimit; )
941 unsigned char c = *cur++;
943 /* People like decorating comments with '*', so check for
944 '/' instead for efficiency. */
945 if (c == '/')
947 /* Don't view / then * then / as finishing the comment. */
948 if ((cur[-2] == '*' && cur - 1 > buffer->cur)
949 || cur - 1 == char_after_star)
951 buffer->cur = cur;
952 return 0;
955 /* Warn about potential nested comments, but not when
956 the final character inside the comment is a '/'.
957 Don't bother to get it right across escaped newlines. */
958 if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
959 && cur[0] == '*' && cur[1] != '/')
961 buffer->cur = cur;
962 cpp_warning (pfile, "'/*' within comment");
965 else if (is_vspace (c))
967 const unsigned char* bslash = backslash_start (pfile, cur - 2);
969 handle_newline (cur, buffer->rlimit, c);
970 /* Work correctly if there is an asterisk before an
971 arbirtrarily long sequence of escaped newlines. */
972 if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
973 char_after_star = cur;
974 else
975 char_after_star = 0;
977 else if (c == '\t')
978 adjust_column (pfile, cur - 1);
981 buffer->cur = cur;
982 return 1;
985 /* Skip a C++ line comment. Handles escaped newlines. Returns
986 non-zero if a multiline comment. */
987 static int
988 skip_line_comment (pfile)
989 cpp_reader *pfile;
991 cpp_buffer *buffer = pfile->buffer;
992 register const unsigned char *cur = buffer->cur;
993 int multiline = 0;
995 for (; cur < buffer->rlimit; )
997 unsigned char c = *cur++;
999 if (is_vspace (c))
1001 /* Check for a (trigaph?) backslash escaping the newline. */
1002 if (!backslash_start (pfile, cur - 2))
1003 goto out;
1004 multiline = 1;
1005 handle_newline (cur, buffer->rlimit, c);
1008 cur++;
1010 out:
1011 buffer->cur = cur - 1; /* Leave newline for caller. */
1012 return multiline;
1015 /* TAB points to a \t character. Update col_adjust so we track the
1016 column correctly. */
1017 static void
1018 adjust_column (pfile, tab)
1019 cpp_reader *pfile;
1020 const U_CHAR *tab;
1022 /* Zero-based column. */
1023 unsigned int col = CPP_BUF_COLUMN (pfile->buffer, tab);
1025 /* Round it up to multiple of the tabstop, but subtract 1 since the
1026 tab itself occupies a character position. */
1027 pfile->col_adjust += (CPP_OPTION (pfile, tabstop)
1028 - col % CPP_OPTION (pfile, tabstop)) - 1;
1031 /* Skips whitespace, stopping at next non-whitespace character.
1032 Adjusts pfile->col_adjust to account for tabs. This enables tokens
1033 to be assigned the correct column. */
1034 static void
1035 skip_whitespace (pfile, in_directive)
1036 cpp_reader *pfile;
1037 int in_directive;
1039 cpp_buffer *buffer = pfile->buffer;
1040 unsigned short warned = 0;
1042 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
1043 while (buffer->cur < buffer->rlimit)
1045 unsigned char c = *buffer->cur;
1047 if (!is_nvspace (c))
1048 break;
1050 buffer->cur++;
1051 /* Horizontal space always OK. */
1052 if (c == ' ')
1053 continue;
1054 else if (c == '\t')
1055 adjust_column (pfile, buffer->cur - 1);
1056 /* Must be \f \v or \0. */
1057 else if (c == '\0')
1059 if (!warned)
1060 cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
1061 CPP_BUF_COL (buffer),
1062 "embedded null character ignored");
1063 warned = 1;
1065 else if (in_directive && CPP_PEDANTIC (pfile))
1066 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
1067 CPP_BUF_COL (buffer),
1068 "%s in preprocessing directive",
1069 c == '\f' ? "form feed" : "vertical tab");
1073 /* Parse (append) an identifier. Calculates the hash value of the
1074 token while parsing, for performance. The algorithm *must* match
1075 cpp_lookup(). */
1076 static const U_CHAR *
1077 parse_name (pfile, tok, cur, rlimit)
1078 cpp_reader *pfile;
1079 cpp_token *tok;
1080 const U_CHAR *cur, *rlimit;
1082 const U_CHAR *name;
1083 unsigned int len;
1084 unsigned int r;
1086 name = cur;
1087 r = 0;
1088 while (cur < rlimit)
1090 if (! is_idchar (*cur))
1091 break;
1092 /* $ is not a legal identifier character in the standard, but is
1093 commonly accepted as an extension. Don't warn about it in
1094 skipped conditional blocks. */
1095 if (*cur == '$' && CPP_PEDANTIC (pfile) && ! pfile->skipping)
1097 CPP_BUFFER (pfile)->cur = cur;
1098 cpp_pedwarn (pfile, "'$' character in identifier");
1101 r = HASHSTEP (r, cur);
1102 cur++;
1104 len = cur - name;
1106 if (tok->val.node == 0)
1107 tok->val.node = _cpp_lookup_with_hash (pfile, name, len, r);
1108 else
1110 unsigned int oldlen = tok->val.node->length;
1111 U_CHAR *newname = alloca (oldlen + len);
1112 memcpy (newname, tok->val.node->name, oldlen);
1113 memcpy (newname + oldlen, name, len);
1114 tok->val.node = cpp_lookup (pfile, newname, len + oldlen);
1117 return cur;
1120 /* Parse (append) a number. */
1121 static void
1122 parse_number (pfile, list, name)
1123 cpp_reader *pfile;
1124 cpp_toklist *list;
1125 cpp_string *name;
1127 const unsigned char *name_limit;
1128 unsigned char *namebuf;
1129 cpp_buffer *buffer = pfile->buffer;
1130 register const unsigned char *cur = buffer->cur;
1132 expanded:
1133 name_limit = list->namebuf + list->name_cap;
1134 namebuf = list->namebuf + list->name_used;
1136 for (; cur < buffer->rlimit && namebuf < name_limit; )
1138 unsigned char c = *namebuf = *cur; /* Copy a single char. */
1140 /* Perhaps we should accept '$' here if we accept it for
1141 identifiers. We know namebuf[-1] is safe, because for c to
1142 be a sign we must have pushed at least one character. */
1143 if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
1144 goto out;
1146 namebuf++;
1147 cur++;
1150 /* Run out of name space? */
1151 if (cur < buffer->rlimit)
1153 list->name_used = namebuf - list->namebuf;
1154 auto_expand_name_space (list);
1155 goto expanded;
1158 out:
1159 buffer->cur = cur;
1160 name->len = namebuf - name->text;
1161 list->name_used = namebuf - list->namebuf;
1164 /* Places a string terminated by an unescaped TERMINATOR into a
1165 cpp_string, which should be expandable and thus at the top of the
1166 list's stack. Handles embedded trigraphs, if necessary, and
1167 escaped newlines.
1169 Can be used for character constants (terminator = '\''), string
1170 constants ('"') and angled headers ('>'). Multi-line strings are
1171 allowed, except for within directives. */
1173 static void
1174 parse_string (pfile, list, token, terminator)
1175 cpp_reader *pfile;
1176 cpp_toklist *list;
1177 cpp_token *token;
1178 unsigned int terminator;
1180 cpp_buffer *buffer = pfile->buffer;
1181 cpp_string *name = &token->val.str;
1182 register const unsigned char *cur = buffer->cur;
1183 const unsigned char *name_limit;
1184 unsigned char *namebuf;
1185 unsigned int null_count = 0;
1186 unsigned int trigraphed = list->name_used;
1188 expanded:
1189 name_limit = list->namebuf + list->name_cap;
1190 namebuf = list->namebuf + list->name_used;
1192 for (; cur < buffer->rlimit && namebuf < name_limit; )
1194 unsigned int c = *namebuf++ = *cur++; /* Copy a single char. */
1196 if (c == '\0')
1197 null_count++;
1198 else if (c == terminator || is_vspace (c))
1200 /* Needed for trigraph_replace and multiline string warning. */
1201 buffer->cur = cur;
1203 /* Scan for trigraphs before checking if backslash-escaped. */
1204 if ((CPP_OPTION (pfile, trigraphs)
1205 || CPP_OPTION (pfile, warn_trigraphs))
1206 && namebuf - (list->namebuf + trigraphed) >= 3)
1208 namebuf = trigraph_replace (pfile, list->namebuf + trigraphed,
1209 namebuf);
1210 /* The test above guarantees trigraphed will be positive. */
1211 trigraphed = namebuf - list->namebuf - 2;
1214 namebuf--; /* Drop the newline / terminator from the name. */
1215 if (is_vspace (c))
1217 /* Drop a backslash newline, and continue. */
1218 if (namebuf[-1] == '\\')
1220 handle_newline (cur, buffer->rlimit, c);
1221 namebuf--;
1222 continue;
1225 cur--;
1227 /* In assembly language, silently terminate strings of
1228 either variety at end of line. This is a kludge
1229 around not knowing where comments are. */
1230 if (CPP_OPTION (pfile, lang_asm))
1231 goto out;
1233 /* Character constants and header names may not extend
1234 over multiple lines. In Standard C, neither may
1235 strings. We accept multiline strings as an
1236 extension. (Even in directives - otherwise, glibc's
1237 longlong.h breaks.) */
1238 if (terminator != '"')
1239 goto unterminated;
1241 cur++; /* Move forwards again. */
1243 if (pfile->multiline_string_line == 0)
1245 pfile->multiline_string_line = token->line;
1246 pfile->multiline_string_column = token->col;
1247 if (CPP_PEDANTIC (pfile))
1248 cpp_pedwarn (pfile, "multi-line string constant");
1251 *namebuf++ = '\n';
1252 handle_newline (cur, buffer->rlimit, c);
1254 else
1256 unsigned char *temp;
1258 /* An odd number of consecutive backslashes represents
1259 an escaped terminator. */
1260 temp = namebuf - 1;
1261 while (temp >= name->text && *temp == '\\')
1262 temp--;
1264 if ((namebuf - temp) & 1)
1265 goto out;
1266 namebuf++;
1271 /* Run out of name space? */
1272 if (cur < buffer->rlimit)
1274 list->name_used = namebuf - list->namebuf;
1275 auto_expand_name_space (list);
1276 goto expanded;
1279 /* We may not have trigraph-replaced the input for this code path,
1280 but as the input is in error by being unterminated we don't
1281 bother. Prevent warnings about no newlines at EOF. */
1282 if (is_vspace (cur[-1]))
1283 cur--;
1285 unterminated:
1286 cpp_error (pfile, "missing terminating %c character", (int) terminator);
1288 if (terminator == '\"' && pfile->multiline_string_line != list->line
1289 && pfile->multiline_string_line != 0)
1291 cpp_error_with_line (pfile, pfile->multiline_string_line,
1292 pfile->multiline_string_column,
1293 "possible start of unterminated string literal");
1294 pfile->multiline_string_line = 0;
1297 out:
1298 buffer->cur = cur;
1299 name->len = namebuf - name->text;
1300 list->name_used = namebuf - list->namebuf;
1302 if (null_count > 0)
1303 cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
1304 : "null character preserved"));
1307 /* The character TYPE helps us distinguish comment types: '*' = C
1308 style, '/' = C++ style. For code simplicity, the stored comment
1309 includes the comment start and any terminator. */
1311 #define COMMENT_START_LEN 2
1312 static void
1313 save_comment (list, token, from, len, type)
1314 cpp_toklist *list;
1315 cpp_token *token;
1316 const unsigned char *from;
1317 unsigned int len;
1318 unsigned int type;
1320 unsigned char *buffer;
1322 len += COMMENT_START_LEN;
1324 if (list->name_used + len > list->name_cap)
1325 _cpp_expand_name_space (list, len);
1327 INIT_TOKEN_STR (list, token);
1328 token->type = CPP_COMMENT;
1329 token->val.str.len = len;
1331 buffer = list->namebuf + list->name_used;
1332 list->name_used += len;
1334 /* Copy the comment. */
1335 if (type == '*')
1337 *buffer++ = '/';
1338 *buffer++ = '*';
1340 else
1342 *buffer++ = type;
1343 *buffer++ = type;
1345 memcpy (buffer, from, len - COMMENT_START_LEN);
1349 * The tokenizer's main loop. Returns a token list, representing a
1350 * logical line in the input file. On EOF after some tokens have
1351 * been processed, we return immediately. Then in next call, or if
1352 * EOF occurred at the beginning of a logical line, a single CPP_EOF
1353 * token is placed in the list.
1355 * Implementation relies almost entirely on lookback, rather than
1356 * looking forwards. This means that tokenization requires just
1357 * a single pass of the file, even in the presence of trigraphs and
1358 * escaped newlines, providing significant performance benefits.
1359 * Trigraph overhead is negligible if they are disabled, and low
1360 * even when enabled.
1363 #define KNOWN_DIRECTIVE() (list->directive != 0)
1364 #define MIGHT_BE_DIRECTIVE() \
1365 (cur_token == &list->tokens[first_token + 1] && cur_token[-1].type == CPP_HASH)
1367 static void
1368 lex_line (pfile, list)
1369 cpp_reader *pfile;
1370 cpp_toklist *list;
1372 cpp_token *cur_token, *token_limit, *first;
1373 cpp_buffer *buffer = pfile->buffer;
1374 const unsigned char *cur = buffer->cur;
1375 unsigned char flags = 0;
1376 unsigned int first_token = list->tokens_used;
1378 if (!(list->flags & LIST_OFFSET))
1379 (abort) ();
1381 list->file = buffer->nominal_fname;
1382 list->line = CPP_BUF_LINE (buffer);
1383 pfile->col_adjust = 0;
1384 pfile->in_lex_line = 1;
1385 if (cur == buffer->buf)
1386 list->flags |= BEG_OF_FILE;
1388 expanded:
1389 token_limit = list->tokens + list->tokens_cap;
1390 cur_token = list->tokens + list->tokens_used;
1392 for (; cur < buffer->rlimit && cur_token < token_limit;)
1394 unsigned char c;
1396 /* Optimize non-vertical whitespace skipping; most tokens are
1397 probably separated by whitespace. (' ' '\t' '\v' '\f' '\0'). */
1398 c = *cur;
1399 if (is_nvspace (c))
1401 buffer->cur = cur;
1402 skip_whitespace (pfile, (list->tokens[first_token].type == CPP_HASH
1403 && cur_token > &list->tokens[first_token]));
1404 cur = buffer->cur;
1406 flags = PREV_WHITE;
1407 if (cur == buffer->rlimit)
1408 break;
1409 c = *cur;
1411 cur++;
1413 /* Initialize current token. CPP_EOF will not be fixed up by
1414 expand_name_space. */
1415 list->tokens_used = cur_token - list->tokens + 1;
1416 cur_token->type = CPP_EOF;
1417 cur_token->col = CPP_BUF_COLUMN (buffer, cur);
1418 cur_token->line = CPP_BUF_LINE (buffer);
1419 cur_token->flags = flags;
1420 flags = 0;
1422 switch (c)
1424 case '0': case '1': case '2': case '3': case '4':
1425 case '5': case '6': case '7': case '8': case '9':
1427 int prev_dot;
1429 cur--; /* Backup character. */
1430 prev_dot = PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ();
1431 if (prev_dot)
1432 cur_token--;
1433 INIT_TOKEN_STR (list, cur_token);
1434 /* Prepend an immediately previous CPP_DOT token. */
1435 if (prev_dot)
1437 if (list->name_cap == list->name_used)
1438 auto_expand_name_space (list);
1440 cur_token->val.str.len = 1;
1441 list->namebuf[list->name_used++] = '.';
1444 continue_number:
1445 cur_token->type = CPP_NUMBER; /* Before parse_number. */
1446 buffer->cur = cur;
1447 parse_number (pfile, list, &cur_token->val.str);
1448 cur = buffer->cur;
1450 /* Check for # 123 form of #line. */
1451 if (MIGHT_BE_DIRECTIVE ())
1452 list->directive = _cpp_check_linemarker (pfile, cur_token,
1453 !(cur_token[-1].flags
1454 & PREV_WHITE));
1455 cur_token++;
1456 break;
1458 letter:
1459 case '_':
1460 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1461 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1462 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1463 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1464 case 'y': case 'z':
1465 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1466 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1467 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1468 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1469 case 'Y': case 'Z':
1470 cur--; /* Backup character. */
1471 cur_token->val.node = 0;
1472 cur_token->type = CPP_NAME; /* Identifier, macro etc. */
1474 continue_name:
1475 cur = parse_name (pfile, cur_token, cur, buffer->rlimit);
1477 if (MIGHT_BE_DIRECTIVE ())
1478 list->directive = _cpp_check_directive (pfile, cur_token,
1479 !(list->tokens[0].flags
1480 & PREV_WHITE));
1481 cur_token++;
1482 break;
1484 case '\'':
1485 case '\"':
1486 cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
1487 /* Do we have a wide string? */
1488 if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
1489 && cur_token[-1].val.node == pfile->spec_nodes->n_L)
1490 BACKUP_TOKEN (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
1492 do_parse_string:
1493 /* Here c is one of ' " or >. */
1494 INIT_TOKEN_STR (list, cur_token);
1495 buffer->cur = cur;
1496 parse_string (pfile, list, cur_token, c);
1497 cur = buffer->cur;
1498 cur_token++;
1499 break;
1501 case '/':
1502 cur_token->type = CPP_DIV;
1503 if (IMMED_TOKEN ())
1505 if (PREV_TOKEN_TYPE == CPP_DIV)
1507 /* We silently allow C++ comments in system headers,
1508 irrespective of conformance mode, because lots of
1509 broken systems do that and trying to clean it up
1510 in fixincludes is a nightmare. */
1511 if (CPP_IN_SYSTEM_HEADER (pfile))
1512 goto do_line_comment;
1513 else if (CPP_OPTION (pfile, cplusplus_comments))
1515 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1516 && ! buffer->warned_cplusplus_comments)
1518 buffer->cur = cur;
1519 cpp_pedwarn (pfile,
1520 "C++ style comments are not allowed in ISO C89");
1521 cpp_pedwarn (pfile,
1522 "(this will be reported only once per input file)");
1523 buffer->warned_cplusplus_comments = 1;
1525 do_line_comment:
1526 buffer->cur = cur;
1527 #if 0 /* Leave until new lexer in place. */
1528 if (cur[-2] != c)
1529 cpp_warning (pfile,
1530 "comment start split across lines");
1531 #endif
1532 if (skip_line_comment (pfile))
1533 cpp_warning (pfile, "multi-line comment");
1535 /* Back-up to first '-' or '/'. */
1536 cur_token--;
1537 if (!CPP_OPTION (pfile, discard_comments)
1538 && (!KNOWN_DIRECTIVE()
1539 || (list->directive->flags & COMMENTS)))
1540 save_comment (list, cur_token++, cur,
1541 buffer->cur - cur, c);
1542 else
1543 flags = PREV_WHITE;
1545 cur = buffer->cur;
1546 break;
1550 cur_token++;
1551 break;
1553 case '*':
1554 cur_token->type = CPP_MULT;
1555 if (IMMED_TOKEN ())
1557 if (PREV_TOKEN_TYPE == CPP_DIV)
1559 buffer->cur = cur;
1560 #if 0 /* Leave until new lexer in place. */
1561 if (cur[-2] != '/')
1562 cpp_warning (pfile,
1563 "comment start '/*' split across lines");
1564 #endif
1565 if (skip_block_comment (pfile))
1566 cpp_error_with_line (pfile, list->line, cur_token[-1].col,
1567 "unterminated comment");
1568 #if 0 /* Leave until new lexer in place. */
1569 else if (buffer->cur[-2] != '*')
1570 cpp_warning (pfile,
1571 "comment end '*/' split across lines");
1572 #endif
1573 /* Back up to opening '/'. */
1574 cur_token--;
1575 if (!CPP_OPTION (pfile, discard_comments)
1576 && (!KNOWN_DIRECTIVE()
1577 || (list->directive->flags & COMMENTS)))
1578 save_comment (list, cur_token++, cur,
1579 buffer->cur - cur, c);
1580 else
1581 flags = PREV_WHITE;
1583 cur = buffer->cur;
1584 break;
1586 else if (CPP_OPTION (pfile, cplusplus))
1588 /* In C++, there are .* and ->* operators. */
1589 if (PREV_TOKEN_TYPE == CPP_DEREF)
1590 BACKUP_TOKEN (CPP_DEREF_STAR);
1591 else if (PREV_TOKEN_TYPE == CPP_DOT)
1592 BACKUP_TOKEN (CPP_DOT_STAR);
1595 cur_token++;
1596 break;
1598 case '\n':
1599 case '\r':
1600 handle_newline (cur, buffer->rlimit, c);
1601 if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
1603 if (IMMED_TOKEN ())
1605 /* Remove the escaped newline. Then continue to process
1606 any interrupted name or number. */
1607 cur_token--;
1608 /* Backslash-newline may not be immediately followed by
1609 EOF (C99 5.1.1.2). */
1610 if (cur >= buffer->rlimit)
1612 cpp_pedwarn (pfile, "backslash-newline at end of file");
1613 break;
1615 if (IMMED_TOKEN ())
1617 cur_token--;
1618 if (cur_token->type == CPP_NAME)
1619 goto continue_name;
1620 else if (cur_token->type == CPP_NUMBER)
1621 goto continue_number;
1622 cur_token++;
1624 /* Remember whitespace setting. */
1625 flags = cur_token->flags;
1626 break;
1628 else
1630 buffer->cur = cur;
1631 cpp_warning (pfile,
1632 "backslash and newline separated by space");
1635 else if (MIGHT_BE_DIRECTIVE ())
1637 /* "Null directive." C99 6.10.7: A preprocessing
1638 directive of the form # <new-line> has no effect.
1640 But it is still a directive, and therefore disappears
1641 from the output. */
1642 cur_token--;
1643 if (cur_token->flags & PREV_WHITE
1644 && CPP_WTRADITIONAL (pfile))
1645 cpp_warning (pfile, "K+R C ignores #\\n with the # indented");
1648 /* Skip vertical space until we have at least one token to
1649 return. */
1650 if (cur_token != &list->tokens[first_token])
1651 goto out;
1652 list->line = CPP_BUF_LINE (buffer);
1653 break;
1655 case '-':
1656 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
1657 REVISE_TOKEN (CPP_MINUS_MINUS);
1658 else
1659 PUSH_TOKEN (CPP_MINUS);
1660 break;
1662 make_hash:
1663 case '#':
1664 /* The digraph flag checking ensures that ## and %:%:
1665 are interpreted as CPP_PASTE, but #%: and %:# are not. */
1666 if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
1667 && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
1668 REVISE_TOKEN (CPP_PASTE);
1669 else
1670 PUSH_TOKEN (CPP_HASH);
1671 break;
1673 case ':':
1674 cur_token->type = CPP_COLON;
1675 if (IMMED_TOKEN ())
1677 if (PREV_TOKEN_TYPE == CPP_COLON
1678 && CPP_OPTION (pfile, cplusplus))
1679 BACKUP_TOKEN (CPP_SCOPE);
1680 else if (CPP_OPTION (pfile, digraphs))
1682 /* Digraph: "<:" is a '[' */
1683 if (PREV_TOKEN_TYPE == CPP_LESS)
1684 BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
1685 /* Digraph: "%:" is a '#' */
1686 else if (PREV_TOKEN_TYPE == CPP_MOD)
1688 (--cur_token)->flags |= DIGRAPH;
1689 goto make_hash;
1693 cur_token++;
1694 break;
1696 case '&':
1697 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
1698 REVISE_TOKEN (CPP_AND_AND);
1699 else
1700 PUSH_TOKEN (CPP_AND);
1701 break;
1703 make_or:
1704 case '|':
1705 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
1706 REVISE_TOKEN (CPP_OR_OR);
1707 else
1708 PUSH_TOKEN (CPP_OR);
1709 break;
1711 case '+':
1712 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
1713 REVISE_TOKEN (CPP_PLUS_PLUS);
1714 else
1715 PUSH_TOKEN (CPP_PLUS);
1716 break;
1718 case '=':
1719 /* This relies on equidistance of "?=" and "?" tokens. */
1720 if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
1721 REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
1722 else
1723 PUSH_TOKEN (CPP_EQ);
1724 break;
1726 case '>':
1727 cur_token->type = CPP_GREATER;
1728 if (IMMED_TOKEN ())
1730 if (PREV_TOKEN_TYPE == CPP_GREATER)
1731 BACKUP_TOKEN (CPP_RSHIFT);
1732 else if (PREV_TOKEN_TYPE == CPP_MINUS)
1733 BACKUP_TOKEN (CPP_DEREF);
1734 else if (CPP_OPTION (pfile, digraphs))
1736 /* Digraph: ":>" is a ']' */
1737 if (PREV_TOKEN_TYPE == CPP_COLON)
1738 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
1739 /* Digraph: "%>" is a '}' */
1740 else if (PREV_TOKEN_TYPE == CPP_MOD)
1741 BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
1744 cur_token++;
1745 break;
1747 case '<':
1748 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
1750 REVISE_TOKEN (CPP_LSHIFT);
1751 break;
1753 /* Is this the beginning of a header name? */
1754 if (KNOWN_DIRECTIVE () && (list->directive->flags & INCL))
1756 c = '>'; /* Terminator. */
1757 cur_token->type = CPP_HEADER_NAME;
1758 goto do_parse_string;
1760 PUSH_TOKEN (CPP_LESS);
1761 break;
1763 case '%':
1764 /* Digraph: "<%" is a '{' */
1765 cur_token->type = CPP_MOD;
1766 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS
1767 && CPP_OPTION (pfile, digraphs))
1768 BACKUP_DIGRAPH (CPP_OPEN_BRACE);
1769 cur_token++;
1770 break;
1772 case '?':
1773 if (cur + 1 < buffer->rlimit && *cur == '?'
1774 && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
1776 /* Handle trigraph. */
1777 cur++;
1778 switch (*cur++)
1780 case '(': goto make_open_square;
1781 case ')': goto make_close_square;
1782 case '<': goto make_open_brace;
1783 case '>': goto make_close_brace;
1784 case '=': goto make_hash;
1785 case '!': goto make_or;
1786 case '-': goto make_complement;
1787 case '/': goto make_backslash;
1788 case '\'': goto make_xor;
1791 if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
1793 /* GNU C++ defines <? and >? operators. */
1794 if (PREV_TOKEN_TYPE == CPP_LESS)
1796 REVISE_TOKEN (CPP_MIN);
1797 break;
1799 else if (PREV_TOKEN_TYPE == CPP_GREATER)
1801 REVISE_TOKEN (CPP_MAX);
1802 break;
1805 PUSH_TOKEN (CPP_QUERY);
1806 break;
1808 case '.':
1809 if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
1810 && IMMED_TOKEN ()
1811 && !(cur_token[-1].flags & PREV_WHITE))
1813 cur_token -= 2;
1814 PUSH_TOKEN (CPP_ELLIPSIS);
1816 else
1817 PUSH_TOKEN (CPP_DOT);
1818 break;
1820 make_complement:
1821 case '~': PUSH_TOKEN (CPP_COMPL); break;
1822 make_xor:
1823 case '^': PUSH_TOKEN (CPP_XOR); break;
1824 make_open_brace:
1825 case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
1826 make_close_brace:
1827 case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
1828 make_open_square:
1829 case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
1830 make_close_square:
1831 case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
1832 make_backslash:
1833 case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
1834 case '!': PUSH_TOKEN (CPP_NOT); break;
1835 case ',': PUSH_TOKEN (CPP_COMMA); break;
1836 case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
1837 case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
1838 case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
1840 case '$':
1841 if (CPP_OPTION (pfile, dollars_in_ident))
1842 goto letter;
1843 /* Fall through */
1844 default:
1845 cur_token->val.aux = c;
1846 PUSH_TOKEN (CPP_OTHER);
1847 break;
1851 /* Run out of token space? */
1852 if (cur_token == token_limit)
1854 list->tokens_used = cur_token - list->tokens;
1855 _cpp_expand_token_space (list, 256);
1856 goto expanded;
1859 cur_token->flags = flags;
1860 if (cur_token == &list->tokens[first_token] && pfile->done_initializing)
1862 if (cur > buffer->buf && !is_vspace (cur[-1]))
1863 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
1864 CPP_BUF_COLUMN (buffer, cur),
1865 "no newline at end of file");
1866 cur_token++->type = CPP_EOF;
1869 out:
1870 /* All tokens are allocated, so the memory location is fixed. */
1871 first = &list->tokens[first_token];
1873 /* Don't complain about the null directive, nor directives in
1874 assembly source: we don't know where the comments are, and # may
1875 introduce assembler pseudo-ops. Don't complain about invalid
1876 directives in skipped conditional groups (6.10 p4). */
1877 if (first->type == CPP_HASH && list->directive == 0 && !pfile->skipping
1878 && cur_token > first + 1 && !CPP_OPTION (pfile, lang_asm))
1880 if (first[1].type == CPP_NAME)
1881 cpp_error (pfile, "invalid preprocessing directive #%.*s",
1882 (int) first[1].val.node->length, first[1].val.node->name);
1883 else
1884 cpp_error (pfile, "invalid preprocessing directive");
1887 /* Put EOF at end of known directives. This covers "directives do
1888 not extend beyond the end of the line (description 6.10 part 2)". */
1889 if (KNOWN_DIRECTIVE () || !pfile->done_initializing)
1891 pfile->first_directive_token = first;
1892 cur_token++->type = CPP_EOF;
1895 /* Directives, known or not, always start a new line. */
1896 if (first_token == 0 || list->tokens[first_token].type == CPP_HASH)
1897 first->flags |= BOL;
1898 else
1899 /* 6.10.3.10: Within the sequence of preprocessing tokens making
1900 up the invocation of a function-like macro, new line is
1901 considered a normal white-space character. */
1902 first->flags |= PREV_WHITE;
1904 buffer->cur = cur;
1905 list->tokens_used = cur_token - list->tokens;
1906 pfile->in_lex_line = 0;
1909 /* Write the spelling of a token TOKEN, with any appropriate
1910 whitespace before it, to the token_buffer. PREV is the previous
1911 token, which is used to determine if we need to shove in an extra
1912 space in order to avoid accidental token paste. */
1913 static void
1914 output_token (pfile, token, prev)
1915 cpp_reader *pfile;
1916 const cpp_token *token, *prev;
1918 int dummy;
1920 if (token->col && (token->flags & BOL))
1922 /* Supply enough whitespace to put this token in its original
1923 column. Don't bother trying to reconstruct tabs; we can't
1924 get it right in general, and nothing ought to care. (Yes,
1925 some things do care; the fault lies with them.) */
1926 unsigned char *buffer;
1927 unsigned int spaces = token->col - 1;
1929 CPP_RESERVE (pfile, token->col);
1930 buffer = pfile->limit;
1932 while (spaces--)
1933 *buffer++ = ' ';
1934 pfile->limit = buffer;
1936 else if (token->flags & PREV_WHITE)
1937 CPP_PUTC (pfile, ' ');
1938 else if (prev)
1940 /* Check for and prevent accidental token pasting. */
1941 if (can_paste (pfile, prev, token, &dummy) != CPP_EOF)
1942 CPP_PUTC (pfile, ' ');
1943 /* can_paste doesn't catch all the accidental pastes.
1944 Consider a + ++b - if there is not a space between the + and ++, it
1945 will be misparsed as a++ + b. */
1946 else if ((prev->type == CPP_PLUS && token->type == CPP_PLUS_PLUS)
1947 || (prev->type == CPP_MINUS && token->type == CPP_MINUS_MINUS))
1948 CPP_PUTC (pfile, ' ');
1951 CPP_RESERVE (pfile, TOKEN_LEN (token));
1952 pfile->limit = spell_token (pfile, token, pfile->limit);
1955 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1956 already contain the enough space to hold the token's spelling.
1957 Returns a pointer to the character after the last character
1958 written. */
1960 static unsigned char *
1961 spell_token (pfile, token, buffer)
1962 cpp_reader *pfile; /* Would be nice to be rid of this... */
1963 const cpp_token *token;
1964 unsigned char *buffer;
1966 switch (token_spellings[token->type].type)
1968 case SPELL_OPERATOR:
1970 const unsigned char *spelling;
1971 unsigned char c;
1973 if (token->flags & DIGRAPH)
1974 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1975 else
1976 spelling = token_spellings[token->type].spelling;
1978 while ((c = *spelling++) != '\0')
1979 *buffer++ = c;
1981 break;
1983 case SPELL_IDENT:
1984 memcpy (buffer, token->val.node->name, token->val.node->length);
1985 buffer += token->val.node->length;
1986 break;
1988 case SPELL_STRING:
1990 if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
1991 *buffer++ = 'L';
1993 if (token->type == CPP_STRING || token->type == CPP_WSTRING)
1994 *buffer++ = '"';
1995 if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
1996 *buffer++ = '\'';
1998 memcpy (buffer, token->val.str.text, token->val.str.len);
1999 buffer += token->val.str.len;
2001 if (token->type == CPP_STRING || token->type == CPP_WSTRING)
2002 *buffer++ = '"';
2003 if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
2004 *buffer++ = '\'';
2006 break;
2008 case SPELL_CHAR:
2009 *buffer++ = token->val.aux;
2010 break;
2012 case SPELL_NONE:
2013 cpp_ice (pfile, "Unspellable token %s", token_names[token->type]);
2014 break;
2017 return buffer;
2020 /* Return the spelling of a token known to be an operator.
2021 Does not distinguish digraphs from their counterparts. */
2022 const unsigned char *
2023 _cpp_spell_operator (type)
2024 enum cpp_ttype type;
2026 if (token_spellings[type].type == SPELL_OPERATOR)
2027 return token_spellings[type].spelling;
2028 else
2029 return token_names[type];
2033 /* Macro expansion algorithm.
2035 Macro expansion is implemented by a single-pass algorithm; there are
2036 no rescan passes involved. cpp_get_token expands just enough to be
2037 able to return a token to the caller, a consequence is that when it
2038 returns the preprocessor can be in a state of mid-expansion. The
2039 algorithm does not work by fully expanding a macro invocation into
2040 some kind of token list, and then returning them one by one.
2042 Our expansion state is recorded in a context stack. We start out with
2043 a single context on the stack, let's call it base context. This
2044 consists of the token list returned by lex_line that forms the next
2045 logical line in the source file.
2047 The current level in the context stack is stored in the cur_context
2048 member of the cpp_reader structure. The context it references keeps,
2049 amongst other things, a count of how many tokens form that context and
2050 our position within those tokens.
2052 Fundamentally, calling cpp_get_token will return the next token from
2053 the current context. If we're at the end of the current context, that
2054 context is popped from the stack first, unless it is the base context,
2055 in which case the next logical line is lexed from the source file.
2057 However, before returning the token, if it is a CPP_NAME token
2058 _cpp_get_token checks to see if it is a macro and if it is enabled.
2059 Each time it encounters a macro name, it calls push_macro_context.
2060 This function checks that the macro should be expanded (with
2061 is_macro_enabled), and if so pushes a new macro context on the stack
2062 which becomes the current context. It then loops back to read the
2063 first token of the macro context.
2065 A macro context basically consists of the token list representing the
2066 macro's replacement list, which was saved in the hash table by
2067 save_macro_expansion when its #define statement was parsed. If the
2068 macro is function-like, it also contains the tokens that form the
2069 arguments to the macro. I say more about macro arguments below, but
2070 for now just saying that each argument is a set of pointers to tokens
2071 is enough.
2073 When taking tokens from a macro context, we may get a CPP_MACRO_ARG
2074 token. This represents an argument passed to the macro, with the
2075 argument number stored in the token's AUX field. The argument should
2076 be substituted, this is achieved by pushing an "argument context". An
2077 argument context is just refers to the tokens forming the argument,
2078 which are obtained directly from the macro context. The STRINGIFY
2079 flag on a CPP_MACRO_ARG token indicates that the argument should be
2080 stringified.
2082 Here's a few simple rules the context stack obeys:-
2084 1) The lex_line token list is always context zero.
2086 2) Context 1, if it exists, must be a macro context.
2088 3) An argument context can only appear above a macro context.
2090 4) A macro context can appear above the base context, another macro
2091 context, or an argument context.
2093 5) These imply that the minimal level of an argument context is 2.
2095 The only tricky thing left is ensuring that macros are enabled and
2096 disabled correctly. The algorithm controls macro expansion by the
2097 level of the context a token is taken from in the context stack. If a
2098 token is taken from a level equal to no_expand_level (a member of
2099 struct cpp_reader), no expansion is performed.
2101 When popping a context off the stack, if no_expand_level equals the
2102 level of the popped context, it is reduced by one to match the new
2103 context level, so that expansion is still disabled. It does not
2104 increase if a context is pushed, though. It starts out life as
2105 UINT_MAX, which has the effect that initially macro expansion is
2106 enabled. I explain how this mechanism works below.
2108 The standard requires:-
2110 1) Arguments to be fully expanded before substitution.
2112 2) Stringified arguments to not be expanded, nor the tokens
2113 immediately surrounding a ## operator.
2115 3) Continual rescanning until there are no more macros left to
2116 replace.
2118 4) Once a macro has been expanded in stage 1) or 3), it cannot be
2119 expanded again during later rescans. This prevents infinite
2120 recursion.
2122 The first thing to observe is that stage 3) is mostly redundant.
2123 Since a macro is disabled once it has been expanded, how can a rescan
2124 find an unexpanded macro name? There are only two cases where this is
2125 possible:-
2127 a) If the macro name results from a token paste operation.
2129 b) If the macro in question is a function-like macro that hasn't
2130 already been expanded because previously there was not the required
2131 '(' token immediately following it. This is only possible when an
2132 argument is substituted, and after substitution the last token of
2133 the argument can bind with a parenthesis appearing in the tokens
2134 following the substitution. Note that if the '(' appears within the
2135 argument, the ')' must too, as expanding macro arguments cannot
2136 "suck in" tokens outside the argument.
2138 So we tackle this as follows. When parsing the macro invocation for
2139 arguments, we record the tokens forming each argument as a list of
2140 pointers to those tokens. We do not expand any tokens that are "raw",
2141 i.e. directly from the macro invocation, but other tokens that come
2142 from (nested) argument substitution are fully expanded.
2144 This is achieved by setting the no_expand_level to that of the macro
2145 invocation. A CPP_MACRO_ARG token never appears in the list of tokens
2146 forming an argument, because parse_args (indirectly) calls
2147 get_raw_token which automatically pushes argument contexts and traces
2148 into them. Since these contexts are at a higher level than the
2149 no_expand_level, they get fully macro expanded.
2151 "Raw" and non-raw tokens are separated in arguments by null pointers,
2152 with the policy that the initial state of an argument is raw. If the
2153 first token is not raw, it should be preceded by a null pointer. When
2154 tracing through the tokens of an argument context, each time
2155 get_raw_token encounters a null pointer, it toggles the flag
2156 CONTEXT_RAW.
2158 This flag, when set, indicates to is_macro_disabled that we are
2159 reading raw tokens which should be macro-expanded. Similarly, if
2160 clear, is_macro_disabled suppresses re-expansion.
2162 It's probably time for an example.
2164 #define hash #
2165 #define str(x) #x
2166 #define xstr(y) str(y hash)
2167 str(hash) // "hash"
2168 xstr(hash) // "# hash"
2170 In the invocation of str, parse_args turns off macro expansion and so
2171 parses the argument as <hash>. This is the only token (pointer)
2172 passed as the argument to str. Since <hash> is raw there is no need
2173 for an initial null pointer. stringify_arg is called from
2174 get_raw_token when tracing through the expansion of str, since the
2175 argument has the STRINGIFY flag set. stringify_arg turns off
2176 macro_expansion by setting the no_expand_level to that of the argument
2177 context. Thus it gets the token <hash> and stringifies it to "hash"
2178 correctly.
2180 Similary xstr is passed <hash>. However, when parse_args is parsing
2181 the invocation of str() in xstr's expansion, get_raw_token encounters
2182 a CPP_MACRO_ARG token for y. Transparently to parse_args, it pushes
2183 an argument context, and enters the tokens of the argument,
2184 i.e. <hash>. This is at a higher context level than parse_args
2185 disabled, and so is_macro_disabled permits expansion of it and a macro
2186 context is pushed on top of the argument context. This contains the
2187 <#> token, and the end result is that <hash> is macro expanded.
2188 However, after popping off the argument context, the <hash> of xstr's
2189 expansion does not get macro expanded because we're back at the
2190 no_expand_level. The end result is that the argument passed to str is
2191 <NULL> <#> <NULL> <hash>. Note the nulls - policy is we start off
2192 raw, <#> is not raw, but then <hash> is.
2197 /* Free the storage allocated for macro arguments. */
2198 static void
2199 free_macro_args (args)
2200 macro_args *args;
2202 if (args->tokens)
2203 free ((PTR) args->tokens);
2204 free (args->ends);
2205 free (args);
2208 /* Determines if a macro has been already used (and is therefore
2209 disabled). */
2210 static int
2211 is_macro_disabled (pfile, expansion, token)
2212 cpp_reader *pfile;
2213 const cpp_toklist *expansion;
2214 const cpp_token *token;
2216 cpp_context *context = CURRENT_CONTEXT (pfile);
2218 /* Don't expand anything if this file has already been preprocessed. */
2219 if (CPP_OPTION (pfile, preprocessed))
2220 return 1;
2222 /* Arguments on either side of ## are inserted in place without
2223 macro expansion (6.10.3.3.2). Conceptually, any macro expansion
2224 occurs during a later rescan pass. The effect is that we expand
2225 iff we would as part of the macro's expansion list, so we should
2226 drop to the macro's context. */
2227 if (IS_ARG_CONTEXT (context))
2229 if (token->flags & PASTED)
2230 context--;
2231 else if (!(context->flags & CONTEXT_RAW))
2232 return 1;
2233 else if (context->flags & (CONTEXT_PASTEL | CONTEXT_PASTER))
2234 context--;
2237 /* Have we already used this macro? */
2238 while (context->level > 0)
2240 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2241 return 1;
2242 /* Raw argument tokens are judged based on the token list they
2243 came from. */
2244 if (context->flags & CONTEXT_RAW)
2245 context = pfile->contexts + context->level;
2246 else
2247 context--;
2250 /* Function-like macros may be disabled if the '(' is not in the
2251 current context. We check this without disrupting the context
2252 stack. */
2253 if (expansion->paramc >= 0)
2255 const cpp_token *next;
2256 unsigned int prev_nme;
2258 context = CURRENT_CONTEXT (pfile);
2259 /* Drop down any contexts we're at the end of: the '(' may
2260 appear in lower macro expansions, or in the rest of the file. */
2261 while (context->posn == context->count && context > pfile->contexts)
2263 context--;
2264 /* If we matched, we are disabled, as we appear in the
2265 expansion of each macro we meet. */
2266 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2267 return 1;
2270 prev_nme = pfile->no_expand_level;
2271 pfile->no_expand_level = context - pfile->contexts;
2272 next = _cpp_get_token (pfile);
2273 restore_macro_expansion (pfile, prev_nme);
2274 if (next->type != CPP_OPEN_PAREN)
2276 _cpp_push_token (pfile, next);
2277 if (CPP_WTRADITIONAL (pfile))
2278 cpp_warning (pfile,
2279 "function macro %.*s must be used with arguments in traditional C",
2280 (int) token->val.node->length, token->val.node->name);
2281 return 1;
2285 return 0;
2288 /* Add a token to the set of tokens forming the arguments to the macro
2289 being parsed in parse_args. */
2290 static void
2291 save_token (args, token)
2292 macro_args *args;
2293 const cpp_token *token;
2295 if (args->used == args->capacity)
2297 args->capacity += args->capacity + 100;
2298 args->tokens = (const cpp_token **)
2299 xrealloc ((PTR) args->tokens,
2300 args->capacity * sizeof (const cpp_token *));
2302 args->tokens[args->used++] = token;
2305 /* Take and save raw tokens until we finish one argument. Empty
2306 arguments are saved as a single CPP_PLACEMARKER token. */
2307 static const cpp_token *
2308 parse_arg (pfile, var_args, paren_context, args, pcount)
2309 cpp_reader *pfile;
2310 int var_args;
2311 unsigned int paren_context;
2312 macro_args *args;
2313 unsigned int *pcount;
2315 const cpp_token *token;
2316 unsigned int paren = 0, count = 0;
2317 int raw, was_raw = 1;
2319 for (count = 0;; count++)
2321 token = _cpp_get_token (pfile);
2323 switch (token->type)
2325 default:
2326 break;
2328 case CPP_OPEN_PAREN:
2329 paren++;
2330 break;
2332 case CPP_CLOSE_PAREN:
2333 if (paren-- != 0)
2334 break;
2335 goto out;
2337 case CPP_COMMA:
2338 /* Commas are not terminators within parantheses or var_args. */
2339 if (paren || var_args)
2340 break;
2341 goto out;
2343 case CPP_EOF: /* Error reported by caller. */
2344 goto out;
2347 raw = pfile->cur_context <= paren_context;
2348 if (raw != was_raw)
2350 was_raw = raw;
2351 save_token (args, 0);
2352 count++;
2354 save_token (args, token);
2357 out:
2358 if (count == 0)
2360 /* Duplicate the placemarker. Then we can set its flags and
2361 position and safely be using more than one. */
2362 save_token (args, duplicate_token (pfile, &placemarker_token));
2363 count++;
2366 *pcount = count;
2367 return token;
2370 /* This macro returns true if the argument starting at offset O of arglist
2371 A is empty - that is, it's either a single PLACEMARKER token, or a null
2372 pointer followed by a PLACEMARKER. */
2374 #define empty_argument(A, O) \
2375 ((A)->tokens[O] ? (A)->tokens[O]->type == CPP_PLACEMARKER \
2376 : (A)->tokens[(O)+1]->type == CPP_PLACEMARKER)
2378 /* Parse the arguments making up a macro invocation. Nested arguments
2379 are automatically macro expanded, but immediate macros are not
2380 expanded; this enables e.g. operator # to work correctly. Returns
2381 non-zero on error. */
2382 static int
2383 parse_args (pfile, hp, args)
2384 cpp_reader *pfile;
2385 cpp_hashnode *hp;
2386 macro_args *args;
2388 const cpp_token *token;
2389 const cpp_toklist *macro;
2390 unsigned int total = 0;
2391 unsigned int paren_context = pfile->cur_context;
2392 int argc = 0;
2394 macro = hp->value.expansion;
2397 unsigned int count;
2399 token = parse_arg (pfile, (argc + 1 == macro->paramc
2400 && (macro->flags & VAR_ARGS)),
2401 paren_context, args, &count);
2402 if (argc < macro->paramc)
2404 total += count;
2405 args->ends[argc] = total;
2407 argc++;
2409 while (token->type != CPP_CLOSE_PAREN && token->type != CPP_EOF);
2411 if (token->type == CPP_EOF)
2413 cpp_error (pfile, "unterminated invocation of macro \"%.*s\"",
2414 hp->length, hp->name);
2415 return 1;
2417 else if (argc < macro->paramc)
2419 /* A rest argument is allowed to not appear in the invocation at all.
2420 e.g. #define debug(format, args...) ...
2421 debug("string");
2422 This is exactly the same as if the rest argument had received no
2423 tokens - debug("string",); This extension is deprecated. */
2425 if (argc + 1 == macro->paramc && (macro->flags & GNU_REST_ARGS))
2427 /* Duplicate the placemarker. Then we can set its flags and
2428 position and safely be using more than one. */
2429 save_token (args, duplicate_token (pfile, &placemarker_token));
2430 args->ends[argc] = total + 1;
2431 return 0;
2433 else
2435 cpp_error (pfile,
2436 "insufficient arguments in invocation of macro \"%.*s\"",
2437 hp->length, hp->name);
2438 return 1;
2441 /* An empty argument to an empty function-like macro is fine. */
2442 else if (argc > macro->paramc
2443 && !(macro->paramc == 0 && argc == 1 && empty_argument (args, 0)))
2445 cpp_error (pfile,
2446 "too many arguments in invocation of macro \"%.*s\"",
2447 hp->length, hp->name);
2448 return 1;
2451 return 0;
2454 /* Adds backslashes before all backslashes and double quotes appearing
2455 in strings. Non-printable characters are converted to octal. */
2456 static U_CHAR *
2457 quote_string (dest, src, len)
2458 U_CHAR *dest;
2459 const U_CHAR *src;
2460 unsigned int len;
2462 while (len--)
2464 U_CHAR c = *src++;
2466 if (c == '\\' || c == '"')
2468 *dest++ = '\\';
2469 *dest++ = c;
2471 else
2473 if (ISPRINT (c))
2474 *dest++ = c;
2475 else
2477 sprintf ((char *) dest, "\\%03o", c);
2478 dest += 4;
2483 return dest;
2486 /* Allocates a buffer to hold a token's TEXT, and converts TOKEN to a
2487 CPP_STRING token containing TEXT in quoted form. */
2488 static cpp_token *
2489 make_string_token (token, text, len)
2490 cpp_token *token;
2491 const U_CHAR *text;
2492 unsigned int len;
2494 U_CHAR *buf;
2496 buf = (U_CHAR *) xmalloc (len * 4);
2497 token->type = CPP_STRING;
2498 token->flags = 0;
2499 token->val.str.text = buf;
2500 token->val.str.len = quote_string (buf, text, len) - buf;
2501 return token;
2504 /* Allocates and converts a temporary token to a CPP_NUMBER token,
2505 evaluating to NUMBER. */
2506 static cpp_token *
2507 alloc_number_token (pfile, number)
2508 cpp_reader *pfile;
2509 int number;
2511 cpp_token *result;
2512 char *buf;
2514 result = get_temp_token (pfile);
2515 buf = xmalloc (20);
2516 sprintf (buf, "%d", number);
2518 result->type = CPP_NUMBER;
2519 result->flags = 0;
2520 result->val.str.text = (U_CHAR *) buf;
2521 result->val.str.len = strlen (buf);
2522 return result;
2525 /* Returns a temporary token from the temporary token store of PFILE. */
2526 static cpp_token *
2527 get_temp_token (pfile)
2528 cpp_reader *pfile;
2530 if (pfile->temp_used == pfile->temp_alloced)
2532 if (pfile->temp_used == pfile->temp_cap)
2534 pfile->temp_cap += pfile->temp_cap + 20;
2535 pfile->temp_tokens = (cpp_token **) xrealloc
2536 (pfile->temp_tokens, pfile->temp_cap * sizeof (cpp_token *));
2538 pfile->temp_tokens[pfile->temp_alloced++] = (cpp_token *) xmalloc
2539 (sizeof (cpp_token));
2542 return pfile->temp_tokens[pfile->temp_used++];
2545 /* Release (not free) for re-use the temporary tokens of PFILE. */
2546 static void
2547 release_temp_tokens (pfile)
2548 cpp_reader *pfile;
2550 while (pfile->temp_used)
2552 cpp_token *token = pfile->temp_tokens[--pfile->temp_used];
2554 if (token_spellings[token->type].type == SPELL_STRING)
2556 free ((char *) token->val.str.text);
2557 token->val.str.text = 0;
2562 /* Free all of PFILE's dynamically-allocated temporary tokens. */
2563 void
2564 _cpp_free_temp_tokens (pfile)
2565 cpp_reader *pfile;
2567 if (pfile->temp_tokens)
2569 /* It is possible, though unlikely (looking for '(' of a funlike
2570 macro into EOF), that we haven't released the tokens yet. */
2571 release_temp_tokens (pfile);
2572 while (pfile->temp_alloced)
2573 free (pfile->temp_tokens[--pfile->temp_alloced]);
2574 free (pfile->temp_tokens);
2577 if (pfile->date)
2579 free ((char *) pfile->date->val.str.text);
2580 free (pfile->date);
2581 free ((char *) pfile->time->val.str.text);
2582 free (pfile->time);
2586 /* Copy TOKEN into a temporary token from PFILE's store. */
2587 static cpp_token *
2588 duplicate_token (pfile, token)
2589 cpp_reader *pfile;
2590 const cpp_token *token;
2592 cpp_token *result = get_temp_token (pfile);
2594 *result = *token;
2595 if (token_spellings[token->type].type == SPELL_STRING)
2597 U_CHAR *buff = (U_CHAR *) xmalloc (token->val.str.len);
2598 memcpy (buff, token->val.str.text, token->val.str.len);
2599 result->val.str.text = buff;
2601 return result;
2604 /* Determine whether two tokens can be pasted together, and if so,
2605 what the resulting token is. Returns CPP_EOF if the tokens cannot
2606 be pasted, or the appropriate type for the merged token if they
2607 can. */
2608 static enum cpp_ttype
2609 can_paste (pfile, token1, token2, digraph)
2610 cpp_reader * pfile;
2611 const cpp_token *token1, *token2;
2612 int* digraph;
2614 enum cpp_ttype a = token1->type, b = token2->type;
2615 int cxx = CPP_OPTION (pfile, cplusplus);
2617 if (a <= CPP_LAST_EQ && b == CPP_EQ)
2618 return a + (CPP_EQ_EQ - CPP_EQ);
2620 switch (a)
2622 case CPP_GREATER:
2623 if (b == a) return CPP_RSHIFT;
2624 if (b == CPP_QUERY && cxx) return CPP_MAX;
2625 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
2626 break;
2627 case CPP_LESS:
2628 if (b == a) return CPP_LSHIFT;
2629 if (b == CPP_QUERY && cxx) return CPP_MIN;
2630 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
2631 if (CPP_OPTION (pfile, digraphs))
2633 if (b == CPP_COLON)
2634 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
2635 if (b == CPP_MOD)
2636 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
2638 break;
2640 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
2641 case CPP_AND: if (b == a) return CPP_AND_AND; break;
2642 case CPP_OR: if (b == a) return CPP_OR_OR; break;
2644 case CPP_MINUS:
2645 if (b == a) return CPP_MINUS_MINUS;
2646 if (b == CPP_GREATER) return CPP_DEREF;
2647 break;
2648 case CPP_COLON:
2649 if (b == a && cxx) return CPP_SCOPE;
2650 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
2651 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
2652 break;
2654 case CPP_MOD:
2655 if (CPP_OPTION (pfile, digraphs))
2657 if (b == CPP_GREATER)
2658 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
2659 if (b == CPP_COLON)
2660 {*digraph = 1; return CPP_HASH;} /* %: digraph */
2662 break;
2663 case CPP_DEREF:
2664 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
2665 break;
2666 case CPP_DOT:
2667 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
2668 if (b == CPP_NUMBER) return CPP_NUMBER;
2669 break;
2671 case CPP_HASH:
2672 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
2673 /* %:%: digraph */
2674 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
2675 break;
2677 case CPP_NAME:
2678 if (b == CPP_NAME) return CPP_NAME;
2679 if (b == CPP_NUMBER
2680 && is_numstart(token2->val.str.text[0])) return CPP_NAME;
2681 if (b == CPP_CHAR
2682 && token1->val.node == pfile->spec_nodes->n_L) return CPP_WCHAR;
2683 if (b == CPP_STRING
2684 && token1->val.node == pfile->spec_nodes->n_L) return CPP_WSTRING;
2685 break;
2687 case CPP_NUMBER:
2688 if (b == CPP_NUMBER) return CPP_NUMBER;
2689 if (b == CPP_NAME) return CPP_NUMBER;
2690 if (b == CPP_DOT) return CPP_NUMBER;
2691 /* Numbers cannot have length zero, so this is safe. */
2692 if ((b == CPP_PLUS || b == CPP_MINUS)
2693 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
2694 return CPP_NUMBER;
2695 break;
2697 default:
2698 break;
2701 return CPP_EOF;
2704 /* Check if TOKEN is to be ##-pasted with the token after it. */
2705 static const cpp_token *
2706 maybe_paste_with_next (pfile, token)
2707 cpp_reader *pfile;
2708 const cpp_token *token;
2710 cpp_token *pasted;
2711 const cpp_token *second;
2712 cpp_context *context = CURRENT_CONTEXT (pfile);
2714 /* Is this token on the LHS of ## ? */
2716 while ((token->flags & PASTE_LEFT)
2717 || ((context->flags & CONTEXT_PASTEL)
2718 && context->posn == context->count))
2720 /* Suppress macro expansion for next token, but don't conflict
2721 with the other method of suppression. If it is an argument,
2722 macro expansion within the argument will still occur. */
2723 pfile->paste_level = pfile->cur_context;
2724 second = _cpp_get_token (pfile);
2725 pfile->paste_level = 0;
2727 /* Ignore placemarker argument tokens (cannot be from an empty
2728 macro since macros are not expanded). */
2729 if (token->type == CPP_PLACEMARKER)
2730 pasted = duplicate_token (pfile, second);
2731 else if (second->type == CPP_PLACEMARKER)
2733 cpp_context *mac_context = CURRENT_CONTEXT (pfile) - 1;
2734 /* GCC has special extended semantics for a ## b where b is
2735 a varargs parameter: a disappears if b consists of no
2736 tokens. This extension is deprecated. */
2737 if ((mac_context->u.list->flags & GNU_REST_ARGS)
2738 && (mac_context->u.list->tokens[mac_context->posn-1].val.aux + 1
2739 == (unsigned) mac_context->u.list->paramc))
2741 cpp_warning (pfile, "deprecated GNU ## extension used");
2742 pasted = duplicate_token (pfile, second);
2744 else
2745 pasted = duplicate_token (pfile, token);
2747 else
2749 int digraph = 0;
2750 enum cpp_ttype type = can_paste (pfile, token, second, &digraph);
2752 if (type == CPP_EOF)
2754 if (CPP_OPTION (pfile, warn_paste))
2755 cpp_warning (pfile,
2756 "pasting would not give a valid preprocessing token");
2757 _cpp_push_token (pfile, second);
2758 return token;
2761 if (type == CPP_NAME || type == CPP_NUMBER)
2763 /* Join spellings. */
2764 U_CHAR *buf, *end;
2766 pasted = get_temp_token (pfile);
2767 buf = (U_CHAR *) alloca (TOKEN_LEN (token) + TOKEN_LEN (second));
2768 end = spell_token (pfile, token, buf);
2769 end = spell_token (pfile, second, end);
2770 *end = '\0';
2772 if (type == CPP_NAME)
2773 pasted->val.node = cpp_lookup (pfile, buf, end - buf);
2774 else
2776 pasted->val.str.text = uxstrdup (buf);
2777 pasted->val.str.len = end - buf;
2780 else if (type == CPP_WCHAR || type == CPP_WSTRING)
2781 pasted = duplicate_token (pfile, second);
2782 else
2784 pasted = get_temp_token (pfile);
2785 pasted->val.integer = 0;
2788 pasted->type = type;
2789 pasted->flags = digraph ? DIGRAPH : 0;
2792 /* The pasted token gets the whitespace flags and position of the
2793 first token, the PASTE_LEFT flag of the second token, plus the
2794 PASTED flag to indicate it is the result of a paste. However, we
2795 want to preserve the DIGRAPH flag. */
2796 pasted->flags &= ~(PREV_WHITE | BOL | PASTE_LEFT);
2797 pasted->flags |= ((token->flags & (PREV_WHITE | BOL))
2798 | (second->flags & PASTE_LEFT) | PASTED);
2799 pasted->col = token->col;
2800 pasted->line = token->line;
2802 /* See if there is another token to be pasted onto the one we just
2803 constructed. */
2804 token = pasted;
2805 context = CURRENT_CONTEXT (pfile);
2806 /* and loop */
2808 return token;
2811 /* Convert a token sequence to a single string token according to the
2812 rules of the ISO C #-operator. */
2813 #define INIT_SIZE 200
2814 static cpp_token *
2815 stringify_arg (pfile, token)
2816 cpp_reader *pfile;
2817 const cpp_token *token;
2819 cpp_token *result;
2820 unsigned char *main_buf;
2821 unsigned int prev_value, backslash_count = 0;
2822 unsigned int buf_used = 0, whitespace = 0, buf_cap = INIT_SIZE;
2824 push_arg_context (pfile, token);
2825 prev_value = prevent_macro_expansion (pfile);
2826 main_buf = (unsigned char *) xmalloc (buf_cap);
2828 result = get_temp_token (pfile);
2829 ASSIGN_FLAGS_AND_POS (result, token);
2831 for (; (token = _cpp_get_token (pfile))->type != CPP_EOF; )
2833 int escape;
2834 unsigned char *buf;
2835 unsigned int len = TOKEN_LEN (token);
2837 escape = (token->type == CPP_STRING || token->type == CPP_WSTRING
2838 || token->type == CPP_CHAR || token->type == CPP_WCHAR);
2839 if (escape)
2840 len *= 4 + 1;
2842 if (buf_used + len > buf_cap)
2844 buf_cap = buf_used + len + INIT_SIZE;
2845 main_buf = xrealloc (main_buf, buf_cap);
2848 if (whitespace && (token->flags & PREV_WHITE))
2849 main_buf[buf_used++] = ' ';
2851 if (escape)
2852 buf = (unsigned char *) xmalloc (len);
2853 else
2854 buf = main_buf + buf_used;
2856 len = spell_token (pfile, token, buf) - buf;
2857 if (escape)
2859 buf_used = quote_string (&main_buf[buf_used], buf, len) - main_buf;
2860 free (buf);
2862 else
2863 buf_used += len;
2865 whitespace = 1;
2866 if (token->type == CPP_BACKSLASH)
2867 backslash_count++;
2868 else
2869 backslash_count = 0;
2872 /* Ignore the final \ of invalid string literals. */
2873 if (backslash_count & 1)
2875 cpp_warning (pfile, "invalid string literal, ignoring final '\\'");
2876 buf_used--;
2879 result->type = CPP_STRING;
2880 result->val.str.text = main_buf;
2881 result->val.str.len = buf_used;
2882 restore_macro_expansion (pfile, prev_value);
2883 return result;
2886 /* Allocate more room on the context stack of PFILE. */
2887 static void
2888 expand_context_stack (pfile)
2889 cpp_reader *pfile;
2891 pfile->context_cap += pfile->context_cap + 20;
2892 pfile->contexts = (cpp_context *)
2893 xrealloc (pfile->contexts, pfile->context_cap * sizeof (cpp_context));
2896 /* Push the context of macro NODE onto the context stack. TOKEN is
2897 the CPP_NAME token invoking the macro. */
2898 static int
2899 push_macro_context (pfile, token)
2900 cpp_reader *pfile;
2901 const cpp_token *token;
2903 unsigned char orig_flags;
2904 macro_args *args;
2905 cpp_context *context;
2906 cpp_hashnode *node = token->val.node;
2908 /* Token's flags may change when parsing args containing a nested
2909 invocation of this macro. */
2910 orig_flags = token->flags & (PREV_WHITE | BOL);
2911 args = 0;
2912 if (node->value.expansion->paramc >= 0)
2914 unsigned int error, prev_nme;
2916 /* Allocate room for the argument contexts, and parse them. */
2917 args = (macro_args *) xmalloc (sizeof (macro_args));
2918 args->ends = (unsigned int *)
2919 xmalloc (node->value.expansion->paramc * sizeof (unsigned int));
2920 args->tokens = 0;
2921 args->capacity = 0;
2922 args->used = 0;
2923 args->level = pfile->cur_context;
2925 prev_nme = prevent_macro_expansion (pfile);
2926 pfile->args = args;
2927 error = parse_args (pfile, node, args);
2928 pfile->args = 0;
2929 restore_macro_expansion (pfile, prev_nme);
2930 if (error)
2932 free_macro_args (args);
2933 return 1;
2937 /* Now push its context. */
2938 pfile->cur_context++;
2939 if (pfile->cur_context == pfile->context_cap)
2940 expand_context_stack (pfile);
2942 context = CURRENT_CONTEXT (pfile);
2943 context->u.list = node->value.expansion;
2944 context->args = args;
2945 context->posn = 0;
2946 context->count = context->u.list->tokens_used;
2947 context->level = pfile->cur_context;
2948 context->flags = 0;
2949 context->pushed_token = 0;
2951 /* Set the flags of the first token. We know there must
2952 be one, empty macros are a single placemarker token. */
2953 MODIFY_FLAGS_AND_POS (&context->u.list->tokens[0], token, orig_flags);
2955 return 0;
2958 /* Push an argument to the current macro onto the context stack.
2959 TOKEN is the MACRO_ARG token representing the argument expansion. */
2960 static void
2961 push_arg_context (pfile, token)
2962 cpp_reader *pfile;
2963 const cpp_token *token;
2965 cpp_context *context;
2966 macro_args *args;
2968 pfile->cur_context++;
2969 if (pfile->cur_context == pfile->context_cap)
2970 expand_context_stack (pfile);
2972 context = CURRENT_CONTEXT (pfile);
2973 args = context[-1].args;
2975 context->count = token->val.aux ? args->ends[token->val.aux - 1]: 0;
2976 context->u.arg = args->tokens + context->count;
2977 context->count = args->ends[token->val.aux] - context->count;
2978 context->args = 0;
2979 context->posn = 0;
2980 context->level = args->level;
2981 context->flags = CONTEXT_ARG | CONTEXT_RAW;
2982 context->pushed_token = 0;
2984 /* Set the flags of the first token. There is one. */
2986 const cpp_token *first = context->u.arg[0];
2987 if (!first)
2988 first = context->u.arg[1];
2990 MODIFY_FLAGS_AND_POS ((cpp_token *) first, token,
2991 token->flags & (PREV_WHITE | BOL));
2994 if (token->flags & PASTE_LEFT)
2995 context->flags |= CONTEXT_PASTEL;
2996 if (pfile->paste_level)
2997 context->flags |= CONTEXT_PASTER;
3000 /* "Unget" a token. It is effectively inserted in the token queue and
3001 will be returned by the next call to get_raw_token. */
3002 void
3003 _cpp_push_token (pfile, token)
3004 cpp_reader *pfile;
3005 const cpp_token *token;
3007 cpp_context *context = CURRENT_CONTEXT (pfile);
3008 if (context->pushed_token)
3009 cpp_ice (pfile, "two tokens pushed in a row");
3010 if (token->type != CPP_EOF)
3011 context->pushed_token = token;
3012 /* Don't push back a directive's CPP_EOF, step back instead. */
3013 else if (pfile->cur_context == 0)
3014 pfile->contexts[0].posn--;
3017 /* Handle a preprocessing directive. TOKEN is the CPP_HASH token
3018 introducing the directive. */
3019 static void
3020 process_directive (pfile, token)
3021 cpp_reader *pfile;
3022 const cpp_token *token;
3024 const struct directive *d = pfile->token_list.directive;
3025 int prev_nme = 0;
3027 /* Skip over the directive name. */
3028 if (token[1].type == CPP_NAME)
3029 _cpp_get_raw_token (pfile);
3030 else if (token[1].type != CPP_NUMBER)
3031 cpp_ice (pfile, "directive begins with %s?!",
3032 token_names[token[1].type]);
3034 /* Flush pending tokens at this point, in case the directive produces
3035 output. XXX Directive output won't be visible to a direct caller of
3036 cpp_get_token. */
3037 if (pfile->printer && CPP_WRITTEN (pfile) - pfile->printer->written)
3038 cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
3040 if (! (d->flags & EXPAND))
3041 prev_nme = prevent_macro_expansion (pfile);
3042 (void) (*d->handler) (pfile);
3043 if (! (d->flags & EXPAND))
3044 restore_macro_expansion (pfile, prev_nme);
3045 _cpp_skip_rest_of_line (pfile);
3048 /* The external interface to return the next token. All macro
3049 expansion and directive processing is handled internally, the
3050 caller only ever sees the output after preprocessing. */
3051 const cpp_token *
3052 cpp_get_token (pfile)
3053 cpp_reader *pfile;
3055 const cpp_token *token;
3056 /* Loop till we hit a non-directive, non-placemarker token. */
3057 for (;;)
3059 token = _cpp_get_token (pfile);
3061 if (token->type == CPP_PLACEMARKER)
3062 continue;
3064 if (token->type == CPP_HASH && token->flags & BOL
3065 && pfile->token_list.directive)
3067 process_directive (pfile, token);
3068 continue;
3071 return token;
3075 /* The internal interface to return the next token. There are two
3076 differences between the internal and external interfaces: the
3077 internal interface may return a PLACEMARKER token, and it does not
3078 process directives. */
3079 const cpp_token *
3080 _cpp_get_token (pfile)
3081 cpp_reader *pfile;
3083 const cpp_token *token;
3084 cpp_hashnode *node;
3086 /* Loop until we hit a non-macro token. */
3087 for (;;)
3089 token = get_raw_token (pfile);
3091 /* Short circuit EOF. */
3092 if (token->type == CPP_EOF)
3093 return token;
3095 /* If we are skipping... */
3096 if (pfile->skipping)
3098 /* we still have to process directives, */
3099 if (pfile->token_list.directive)
3100 return token;
3102 /* but everything else is ignored. */
3103 _cpp_skip_rest_of_line (pfile);
3104 continue;
3107 /* If there's a potential control macro and we get here, then that
3108 #ifndef didn't cover the entire file and its argument shouldn't
3109 be taken as a control macro. */
3110 pfile->potential_control_macro = 0;
3112 /* See if there's a token to paste with this one. */
3113 if (!pfile->paste_level)
3114 token = maybe_paste_with_next (pfile, token);
3116 /* If it isn't a macro, return it now. */
3117 if (token->type != CPP_NAME
3118 || token->val.node->type == T_VOID)
3119 return token;
3121 /* Is macro expansion disabled in general? */
3122 if (pfile->no_expand_level == pfile->cur_context || pfile->paste_level)
3123 return token;
3125 node = token->val.node;
3126 if (node->type != T_MACRO)
3127 return special_symbol (pfile, node, token);
3129 if (is_macro_disabled (pfile, node->value.expansion, token))
3130 return token;
3132 if (pfile->cur_context > CPP_STACK_MAX)
3134 cpp_error (pfile, "macros nested too deep invoking '%s'", node->name);
3135 return token;
3138 if (push_macro_context (pfile, token))
3139 return token;
3140 /* else loop */
3144 /* Returns the next raw token, i.e. without performing macro
3145 expansion. Argument contexts are automatically entered. */
3146 static const cpp_token *
3147 get_raw_token (pfile)
3148 cpp_reader *pfile;
3150 const cpp_token *result;
3151 cpp_context *context;
3153 for (;;)
3155 context = CURRENT_CONTEXT (pfile);
3156 if (context->pushed_token)
3158 result = context->pushed_token;
3159 context->pushed_token = 0;
3161 else if (context->posn == context->count)
3163 if (pop_context (pfile))
3164 return &eof_token;
3165 continue;
3167 else
3169 if (IS_ARG_CONTEXT (context))
3171 result = context->u.arg[context->posn++];
3172 if (result == 0)
3174 context->flags ^= CONTEXT_RAW;
3175 result = context->u.arg[context->posn++];
3177 return result; /* Cannot be a CPP_MACRO_ARG */
3179 result = &context->u.list->tokens[context->posn++];
3182 if (result->type != CPP_MACRO_ARG)
3183 return result;
3185 if (result->flags & STRINGIFY_ARG)
3186 return stringify_arg (pfile, result);
3188 push_arg_context (pfile, result);
3192 /* Internal interface to get the token without macro expanding. */
3193 const cpp_token *
3194 _cpp_get_raw_token (pfile)
3195 cpp_reader *pfile;
3197 int prev_nme = prevent_macro_expansion (pfile);
3198 const cpp_token *result = _cpp_get_token (pfile);
3199 restore_macro_expansion (pfile, prev_nme);
3200 return result;
3203 /* A thin wrapper to lex_line. CLEAR is non-zero if the current token
3204 list should be overwritten, or zero if we need to append
3205 (typically, if we are within the arguments to a macro, or looking
3206 for the '(' to start a function-like macro invocation). */
3207 static int
3208 lex_next (pfile, clear)
3209 cpp_reader *pfile;
3210 int clear;
3212 cpp_toklist *list = &pfile->token_list;
3213 const cpp_token *old_list = list->tokens;
3214 unsigned int old_used = list->tokens_used;
3216 if (clear)
3218 /* Release all temporary tokens. */
3219 _cpp_clear_toklist (list);
3220 pfile->contexts[0].posn = 0;
3221 if (pfile->temp_used)
3222 release_temp_tokens (pfile);
3225 lex_line (pfile, list);
3226 pfile->contexts[0].count = list->tokens_used;
3228 if (!clear && pfile->args)
3230 /* Fix up argument token pointers. */
3231 if (old_list != list->tokens)
3233 unsigned int i;
3235 for (i = 0; i < pfile->args->used; i++)
3237 const cpp_token *token = pfile->args->tokens[i];
3238 if (token >= old_list && token < old_list + old_used)
3239 pfile->args->tokens[i] = (const cpp_token *)
3240 ((char *) token + ((char *) list->tokens - (char *) old_list));
3244 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
3245 tokens within the list of arguments that would otherwise act as
3246 preprocessing directives, the behavior is undefined.
3248 This implementation will report a hard error and treat the
3249 'sequence of preprocessing tokens' as part of the macro argument,
3250 not a directive.
3252 Note if pfile->args == 0, we're OK since we're only inside a
3253 macro argument after a '('. */
3254 if (list->directive)
3256 cpp_error_with_line (pfile, list->tokens[old_used].line,
3257 list->tokens[old_used].col,
3258 "#%s may not be used inside a macro argument",
3259 list->directive->name);
3260 return 1;
3264 return 0;
3267 /* Pops a context off the context stack. If we're at the bottom, lexes
3268 the next logical line. Returns EOF if we're at the end of the
3269 argument list to the # operator, or if it is illegal to "overflow"
3270 into the rest of the file (e.g. 6.10.3.1.1). */
3271 static int
3272 pop_context (pfile)
3273 cpp_reader *pfile;
3275 cpp_context *context;
3277 if (pfile->cur_context == 0)
3279 /* If we are currently processing a directive, do not advance. 6.10
3280 paragraph 2: A new-line character ends the directive even if it
3281 occurs within what would otherwise be an invocation of a
3282 function-like macro. */
3283 if (pfile->token_list.directive)
3284 return 1;
3286 return lex_next (pfile, pfile->no_expand_level == UINT_MAX);
3289 /* Argument contexts, when parsing args or handling # operator
3290 return CPP_EOF at the end. */
3291 context = CURRENT_CONTEXT (pfile);
3292 if (IS_ARG_CONTEXT (context) && pfile->cur_context == pfile->no_expand_level)
3293 return 1;
3295 /* Free resources when leaving macro contexts. */
3296 if (context->args)
3297 free_macro_args (context->args);
3299 if (pfile->cur_context == pfile->no_expand_level)
3300 pfile->no_expand_level--;
3301 pfile->cur_context--;
3303 return 0;
3306 /* Turn off macro expansion at the current context level. */
3307 static unsigned int
3308 prevent_macro_expansion (pfile)
3309 cpp_reader *pfile;
3311 unsigned int prev_value = pfile->no_expand_level;
3312 pfile->no_expand_level = pfile->cur_context;
3313 return prev_value;
3316 /* Restore macro expansion to its previous state. */
3317 static void
3318 restore_macro_expansion (pfile, prev_value)
3319 cpp_reader *pfile;
3320 unsigned int prev_value;
3322 pfile->no_expand_level = prev_value;
3325 /* Used by cpperror.c to obtain the correct line and column to report
3326 in a diagnostic. */
3327 unsigned int
3328 _cpp_get_line (pfile, pcol)
3329 cpp_reader *pfile;
3330 unsigned int *pcol;
3332 unsigned int index;
3333 const cpp_token *cur_token;
3335 if (pfile->in_lex_line)
3336 index = pfile->token_list.tokens_used;
3337 else
3338 index = pfile->contexts[0].posn;
3340 cur_token = &pfile->token_list.tokens[index - 1];
3341 if (pcol)
3342 *pcol = cur_token->col;
3343 return cur_token->line;
3346 #define DSC(str) (const U_CHAR *)str, sizeof str - 1
3347 static const char * const monthnames[] =
3349 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
3350 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
3353 /* Handle builtin macros like __FILE__. */
3354 static const cpp_token *
3355 special_symbol (pfile, node, token)
3356 cpp_reader *pfile;
3357 cpp_hashnode *node;
3358 const cpp_token *token;
3360 cpp_token *result;
3361 cpp_buffer *ip;
3363 switch (node->type)
3365 case T_FILE:
3366 case T_BASE_FILE:
3368 const char *file;
3370 ip = CPP_BUFFER (pfile);
3371 if (ip == 0)
3372 file = "";
3373 else
3375 if (node->type == T_BASE_FILE)
3376 while (CPP_PREV_BUFFER (ip) != NULL)
3377 ip = CPP_PREV_BUFFER (ip);
3379 file = ip->nominal_fname;
3381 result = make_string_token (get_temp_token (pfile), (U_CHAR *) file,
3382 strlen (file));
3384 break;
3386 case T_INCLUDE_LEVEL:
3387 /* pfile->include_depth counts the primary source as level 1,
3388 but historically __INCLUDE_DEPTH__ has called the primary
3389 source level 0. */
3390 result = alloc_number_token (pfile, pfile->include_depth - 1);
3391 break;
3393 case T_SPECLINE:
3394 /* If __LINE__ is embedded in a macro, it must expand to the
3395 line of the macro's invocation, not its definition.
3396 Otherwise things like assert() will not work properly. */
3397 result = alloc_number_token (pfile, _cpp_get_line (pfile, NULL));
3398 break;
3400 case T_STDC:
3402 int stdc = 1;
3404 #ifdef STDC_0_IN_SYSTEM_HEADERS
3405 if (CPP_IN_SYSTEM_HEADER (pfile)
3406 && pfile->spec_nodes->n__STRICT_ANSI__->type == T_VOID)
3407 stdc = 0;
3408 #endif
3409 result = alloc_number_token (pfile, stdc);
3411 break;
3413 case T_DATE:
3414 case T_TIME:
3415 if (pfile->date == 0)
3417 /* Allocate __DATE__ and __TIME__ from permanent storage,
3418 and save them in pfile so we don't have to do this again.
3419 We don't generate these strings at init time because
3420 time() and localtime() are very slow on some systems. */
3421 time_t tt = time (NULL);
3422 struct tm *tb = localtime (&tt);
3424 pfile->date = make_string_token
3425 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("Oct 11 1347"));
3426 pfile->time = make_string_token
3427 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("12:34:56"));
3429 sprintf ((char *) pfile->date->val.str.text, "%s %2d %4d",
3430 monthnames[tb->tm_mon], tb->tm_mday, tb->tm_year + 1900);
3431 sprintf ((char *) pfile->time->val.str.text, "%02d:%02d:%02d",
3432 tb->tm_hour, tb->tm_min, tb->tm_sec);
3434 result = node->type == T_DATE ? pfile->date: pfile->time;
3435 break;
3437 case T_POISON:
3438 cpp_error (pfile, "attempt to use poisoned \"%s\"", node->name);
3439 return token;
3441 default:
3442 cpp_ice (pfile, "invalid special hash type");
3443 return token;
3446 ASSIGN_FLAGS_AND_POS (result, token);
3447 return result;
3449 #undef DSC
3451 /* Dump the original user's spelling of argument index ARG_NO to the
3452 macro whose expansion is LIST. */
3453 static void
3454 dump_param_spelling (pfile, list, arg_no)
3455 cpp_reader *pfile;
3456 const cpp_toklist *list;
3457 unsigned int arg_no;
3459 const U_CHAR *param = list->namebuf;
3461 while (arg_no--)
3462 param += ustrlen (param) + 1;
3463 CPP_PUTS (pfile, param, ustrlen (param));
3466 /* Dump a token list to the output. */
3467 void
3468 _cpp_dump_list (pfile, list, token, flush)
3469 cpp_reader *pfile;
3470 const cpp_toklist *list;
3471 const cpp_token *token;
3472 int flush;
3474 const cpp_token *limit = list->tokens + list->tokens_used;
3475 const cpp_token *prev = 0;
3477 /* Avoid the CPP_EOF. */
3478 if (list->directive)
3479 limit--;
3481 while (token < limit)
3483 if (token->type == CPP_MACRO_ARG)
3485 if (token->flags & PREV_WHITE)
3486 CPP_PUTC (pfile, ' ');
3487 if (token->flags & STRINGIFY_ARG)
3488 CPP_PUTC (pfile, '#');
3489 dump_param_spelling (pfile, list, token->val.aux);
3491 else
3492 output_token (pfile, token, prev);
3493 if (token->flags & PASTE_LEFT)
3494 CPP_PUTS (pfile, " ##", 3);
3495 prev = token;
3496 token++;
3499 if (flush && pfile->printer)
3500 cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
3503 /* Allocate pfile->input_buffer, and initialize trigraph_map[]
3504 if it hasn't happened already. */
3506 void
3507 _cpp_init_input_buffer (pfile)
3508 cpp_reader *pfile;
3510 cpp_context *base;
3512 init_trigraph_map ();
3513 _cpp_init_toklist (&pfile->token_list, DUMMY_TOKEN);
3514 pfile->no_expand_level = UINT_MAX;
3515 pfile->context_cap = 20;
3516 pfile->cur_context = 0;
3518 pfile->contexts = (cpp_context *)
3519 xmalloc (pfile->context_cap * sizeof (cpp_context));
3521 /* Clear the base context. */
3522 base = &pfile->contexts[0];
3523 base->u.list = &pfile->token_list;
3524 base->posn = 0;
3525 base->count = 0;
3526 base->args = 0;
3527 base->level = 0;
3528 base->flags = 0;
3529 base->pushed_token = 0;
3532 /* Moves to the end of the directive line, popping contexts as
3533 necessary. */
3534 void
3535 _cpp_skip_rest_of_line (pfile)
3536 cpp_reader *pfile;
3538 /* Discard all stacked contexts. */
3539 int i;
3540 for (i = pfile->cur_context; i > 0; i--)
3541 if (pfile->contexts[i].args)
3542 free_macro_args (pfile->contexts[i].args);
3544 if (pfile->no_expand_level <= pfile->cur_context)
3545 pfile->no_expand_level = 0;
3546 pfile->cur_context = 0;
3548 /* Clear the base context, and clear the directive pointer so that
3549 get_raw_token will advance to the next line. */
3550 pfile->contexts[0].count = 0;
3551 pfile->contexts[0].posn = 0;
3552 pfile->token_list.directive = 0;
3555 /* Directive handler wrapper used by the command line option
3556 processor. */
3557 void
3558 _cpp_run_directive (pfile, dir, buf, count)
3559 cpp_reader *pfile;
3560 const struct directive *dir;
3561 const char *buf;
3562 size_t count;
3564 if (cpp_push_buffer (pfile, (const U_CHAR *)buf, count) != NULL)
3566 unsigned int prev_lvl = 0;
3568 /* Scan the line now, else prevent_macro_expansion won't work. */
3569 lex_next (pfile, 1);
3570 if (! (dir->flags & EXPAND))
3571 prev_lvl = prevent_macro_expansion (pfile);
3573 (void) (*dir->handler) (pfile);
3575 if (! (dir->flags & EXPAND))
3576 restore_macro_expansion (pfile, prev_lvl);
3578 _cpp_skip_rest_of_line (pfile);
3579 cpp_pop_buffer (pfile);