Regenerate after this patch:
[official-gcc.git] / gcc / cpplex.c
blob5b600961f42a1d5e261dfc39171d6ae67609dc64
1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
12 later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
25 Cleanups to do:-
27 o -dM and with _cpp_dump_list: too many \n output.
28 o Put a printer object in cpp_reader?
29 o Check line numbers assigned to all errors.
30 o Replace strncmp with memcmp almost everywhere.
31 o lex_line's use of cur_token, flags and list->token_used is a bit opaque.
32 o Convert do_ functions to return void. Kaveh thinks its OK; and said he'll
33 give it a run when we've got some code.
34 o Distinguish integers, floats, and 'other' pp-numbers.
35 o Store ints and char constants as binary values.
36 o New command-line assertion syntax.
37 o Work towards functions in cpperror.c taking a message level parameter.
38 If we do this, merge the common code of do_warning and do_error.
39 o Comment all functions, and describe macro expansion algorithm.
40 o Move as much out of header files as possible.
41 o Remove single quote pairs `', and some '', from diagnostics.
42 o Correct pastability test for CPP_NAME and CPP_NUMBER.
46 #include "config.h"
47 #include "system.h"
48 #include "intl.h"
49 #include "cpplib.h"
50 #include "cpphash.h"
51 #include "symcat.h"
53 #define auto_expand_name_space(list) \
54 _cpp_expand_name_space ((list), 1 + (list)->name_cap / 2)
55 static void safe_fwrite PARAMS ((cpp_reader *, const U_CHAR *,
56 size_t, FILE *));
57 static void dump_param_spelling PARAMS ((cpp_reader *, const cpp_toklist *,
58 unsigned int));
59 static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
60 unsigned int));
62 static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
63 static unsigned char *trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
64 unsigned char *));
65 static const unsigned char *backslash_start PARAMS ((cpp_reader *,
66 const unsigned char *));
67 static int skip_block_comment PARAMS ((cpp_reader *));
68 static int skip_line_comment PARAMS ((cpp_reader *));
69 static void skip_whitespace PARAMS ((cpp_reader *, int));
70 static const U_CHAR *parse_name PARAMS ((cpp_reader *, cpp_token *,
71 const U_CHAR *, const U_CHAR *));
72 static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_string *));
73 static void parse_string PARAMS ((cpp_reader *, cpp_toklist *, cpp_token *,
74 unsigned int));
75 static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
76 static void save_comment PARAMS ((cpp_toklist *, cpp_token *,
77 const unsigned char *,
78 unsigned int, unsigned int));
79 static void lex_line PARAMS ((cpp_reader *, cpp_toklist *));
80 static int lex_next PARAMS ((cpp_reader *, int));
81 static int is_macro_disabled PARAMS ((cpp_reader *, const cpp_toklist *,
82 const cpp_token *));
84 static cpp_token *stringify_arg PARAMS ((cpp_reader *, const cpp_token *));
85 static void expand_context_stack PARAMS ((cpp_reader *));
86 static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
87 unsigned char *));
88 static void output_token PARAMS ((cpp_reader *, const cpp_token *,
89 const cpp_token *));
90 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
91 cpp_token *));
92 static cpp_token *make_string_token PARAMS ((cpp_token *, const U_CHAR *,
93 unsigned int));
94 static cpp_token *alloc_number_token PARAMS ((cpp_reader *, int number));
95 static const cpp_token *special_symbol PARAMS ((cpp_reader *, cpp_hashnode *,
96 const cpp_token *));
97 static cpp_token *duplicate_token PARAMS ((cpp_reader *, const cpp_token *));
98 static const cpp_token *maybe_paste_with_next PARAMS ((cpp_reader *,
99 const cpp_token *));
100 static enum cpp_ttype can_paste PARAMS ((cpp_reader *, const cpp_token *,
101 const cpp_token *, int *));
102 static unsigned int prevent_macro_expansion PARAMS ((cpp_reader *));
103 static void restore_macro_expansion PARAMS ((cpp_reader *, unsigned int));
104 static cpp_token *get_temp_token PARAMS ((cpp_reader *));
105 static void release_temp_tokens PARAMS ((cpp_reader *));
106 static U_CHAR * quote_string PARAMS ((U_CHAR *, const U_CHAR *, unsigned int));
107 static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
109 #define INIT_TOKEN_STR(list, token) \
110 do {(token)->val.str.len = 0; \
111 (token)->val.str.text = (list)->namebuf + (list)->name_used; \
112 } while (0)
114 #define VALID_SIGN(c, prevc) \
115 (((c) == '+' || (c) == '-') && \
116 ((prevc) == 'e' || (prevc) == 'E' \
117 || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
119 /* Handle LF, CR, CR-LF and LF-CR style newlines. Assumes next
120 character, if any, is in buffer. */
122 #define handle_newline(cur, limit, c) \
123 do { \
124 if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
125 (cur)++; \
126 pfile->buffer->lineno++; \
127 pfile->buffer->line_base = (cur); \
128 pfile->col_adjust = 0; \
129 } while (0)
131 #define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITE))
132 #define PREV_TOKEN_TYPE (cur_token[-1].type)
134 #define PUSH_TOKEN(ttype) cur_token++->type = ttype
135 #define REVISE_TOKEN(ttype) cur_token[-1].type = ttype
136 #define BACKUP_TOKEN(ttype) (--cur_token)->type = ttype
137 #define BACKUP_DIGRAPH(ttype) do { \
138 BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
140 /* An upper bound on the number of bytes needed to spell a token,
141 including preceding whitespace. */
142 #define TOKEN_SPELL(token) token_spellings[(token)->type].type
143 #define TOKEN_LEN(token) (5 + (TOKEN_SPELL(token) == SPELL_STRING \
144 ? (token)->val.str.len \
145 : (TOKEN_SPELL(token) == SPELL_IDENT \
146 ? (token)->val.node->length \
147 : 0)))
149 #define T(e, s) {SPELL_OPERATOR, (const U_CHAR *) s},
150 #define I(e, s) {SPELL_IDENT, s},
151 #define S(e, s) {SPELL_STRING, s},
152 #define C(e, s) {SPELL_CHAR, s},
153 #define N(e, s) {SPELL_NONE, s},
155 const struct token_spelling
156 token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
158 #undef T
159 #undef I
160 #undef S
161 #undef C
162 #undef N
164 /* For debugging: the internal names of the tokens. */
165 #define T(e, s) U STRINGX(e),
166 #define I(e, s) U STRINGX(e),
167 #define S(e, s) U STRINGX(e),
168 #define C(e, s) U STRINGX(e),
169 #define N(e, s) U STRINGX(e),
171 const U_CHAR *const token_names[N_TTYPES] = { TTYPE_TABLE };
173 #undef T
174 #undef I
175 #undef S
176 #undef C
177 #undef N
179 /* The following table is used by trigraph_ok/trigraph_replace. If we
180 have designated initializers, it can be constant data; otherwise,
181 it is set up at runtime by _cpp_init_input_buffer. */
183 #if (GCC_VERSION >= 2007)
184 #define init_trigraph_map() /* nothing */
185 #define TRIGRAPH_MAP \
186 __extension__ static const U_CHAR trigraph_map[UCHAR_MAX + 1] = {
187 #define END };
188 #define s(p, v) [p] = v,
189 #else
190 #define TRIGRAPH_MAP static U_CHAR trigraph_map[UCHAR_MAX + 1] = { 0 }; \
191 static void init_trigraph_map PARAMS ((void)) { \
192 unsigned char *x = trigraph_map;
193 #define END }
194 #define s(p, v) x[p] = v;
195 #endif
197 TRIGRAPH_MAP
198 s('=', '#') s(')', ']') s('!', '|')
199 s('(', '[') s('\'', '^') s('>', '}')
200 s('/', '\\') s('<', '{') s('-', '~')
203 #undef TRIGRAPH_MAP
204 #undef END
205 #undef s
207 /* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
209 void
210 _cpp_grow_token_buffer (pfile, n)
211 cpp_reader *pfile;
212 long n;
214 long old_written = CPP_WRITTEN (pfile);
215 pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
216 pfile->token_buffer = (U_CHAR *)
217 xrealloc(pfile->token_buffer, pfile->token_buffer_size);
218 CPP_SET_WRITTEN (pfile, old_written);
221 /* Deal with the annoying semantics of fwrite. */
222 static void
223 safe_fwrite (pfile, buf, len, fp)
224 cpp_reader *pfile;
225 const U_CHAR *buf;
226 size_t len;
227 FILE *fp;
229 size_t count;
231 while (len)
233 count = fwrite (buf, 1, len, fp);
234 if (count == 0)
235 goto error;
236 len -= count;
237 buf += count;
239 return;
241 error:
242 cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
245 /* Notify the compiler proper that the current line number has jumped,
246 or the current file name has changed. */
248 static void
249 output_line_command (pfile, print, line)
250 cpp_reader *pfile;
251 cpp_printer *print;
252 unsigned int line;
254 cpp_buffer *ip = CPP_BUFFER (pfile);
255 enum { same = 0, enter, leave, rname } change;
256 static const char * const codes[] = { "", " 1", " 2", "" };
258 if (line == 0)
259 return;
261 /* End the previous line of text. */
262 if (pfile->need_newline)
263 putc ('\n', print->outf);
264 pfile->need_newline = 0;
266 if (CPP_OPTION (pfile, no_line_commands))
267 return;
269 /* If ip is null, we've been called from cpp_finish, and they just
270 needed the final flush and trailing newline. */
271 if (!ip)
272 return;
274 if (pfile->include_depth == print->last_id)
276 /* Determine whether the current filename has changed, and if so,
277 how. 'nominal_fname' values are unique, so they can be compared
278 by comparing pointers. */
279 if (ip->nominal_fname == print->last_fname)
280 change = same;
281 else
282 change = rname;
284 else
286 if (pfile->include_depth > print->last_id)
287 change = enter;
288 else
289 change = leave;
290 print->last_id = pfile->include_depth;
292 print->last_fname = ip->nominal_fname;
294 /* If the current file has not changed, we can output a few newlines
295 instead if we want to increase the line number by a small amount.
296 We cannot do this if print->lineno is zero, because that means we
297 haven't output any line commands yet. (The very first line
298 command output is a `same_file' command.) */
299 if (change == same && print->lineno > 0
300 && line >= print->lineno && line < print->lineno + 8)
302 while (line > print->lineno)
304 putc ('\n', print->outf);
305 print->lineno++;
307 return;
310 #ifndef NO_IMPLICIT_EXTERN_C
311 if (CPP_OPTION (pfile, cplusplus))
312 fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
313 codes[change],
314 ip->inc->sysp ? " 3" : "",
315 (ip->inc->sysp == 2) ? " 4" : "");
316 else
317 #endif
318 fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
319 codes[change],
320 ip->inc->sysp ? " 3" : "");
321 print->lineno = line;
324 /* Write the contents of the token_buffer to the output stream, and
325 clear the token_buffer. Also handles generating line commands and
326 keeping track of file transitions. */
328 void
329 cpp_output_tokens (pfile, print, line)
330 cpp_reader *pfile;
331 cpp_printer *print;
332 unsigned int line;
334 if (CPP_WRITTEN (pfile) - print->written)
336 safe_fwrite (pfile, pfile->token_buffer,
337 CPP_WRITTEN (pfile) - print->written, print->outf);
338 pfile->need_newline = 1;
339 if (print->lineno)
340 print->lineno++;
342 CPP_SET_WRITTEN (pfile, print->written);
344 output_line_command (pfile, print, line);
347 /* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output. */
349 void
350 cpp_scan_buffer_nooutput (pfile)
351 cpp_reader *pfile;
353 unsigned int old_written = CPP_WRITTEN (pfile);
354 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
356 for (;;)
358 /* In no-output mode, we can ignore everything but directives. */
359 const cpp_token *token = cpp_get_token (pfile);
360 if (token->type == CPP_EOF)
362 cpp_pop_buffer (pfile);
363 if (CPP_BUFFER (pfile) == stop)
364 break;
366 _cpp_skip_rest_of_line (pfile);
368 CPP_SET_WRITTEN (pfile, old_written);
371 /* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT. */
373 void
374 cpp_scan_buffer (pfile, print)
375 cpp_reader *pfile;
376 cpp_printer *print;
378 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
379 const cpp_token *token, *prev = 0;
381 for (;;)
383 token = cpp_get_token (pfile);
384 if (token->type == CPP_EOF)
386 cpp_pop_buffer (pfile);
387 if (CPP_BUFFER (pfile) == stop)
388 return;
389 cpp_output_tokens (pfile, print, CPP_BUF_LINE (CPP_BUFFER (pfile)));
390 prev = 0;
391 continue;
394 if (token->flags & BOL)
396 cpp_output_tokens (pfile, print, pfile->token_list.line);
397 prev = 0;
400 output_token (pfile, token, prev);
401 prev = token;
405 /* Helper routine used by parse_include, which can't see spell_token.
406 Reinterpret the current line as an h-char-sequence (< ... >); we are
407 looking at the first token after the <. */
408 const cpp_token *
409 _cpp_glue_header_name (pfile)
410 cpp_reader *pfile;
412 unsigned int written = CPP_WRITTEN (pfile);
413 const cpp_token *t;
414 cpp_token *hdr;
415 U_CHAR *buf;
416 size_t len;
418 for (;;)
420 t = cpp_get_token (pfile);
421 if (t->type == CPP_GREATER || t->type == CPP_EOF)
422 break;
424 CPP_RESERVE (pfile, TOKEN_LEN (t));
425 if (t->flags & PREV_WHITE)
426 CPP_PUTC_Q (pfile, ' ');
427 pfile->limit = spell_token (pfile, t, pfile->limit);
430 if (t->type == CPP_EOF)
431 cpp_error (pfile, "missing terminating > character");
433 len = CPP_WRITTEN (pfile) - written;
434 buf = xmalloc (len);
435 memcpy (buf, pfile->token_buffer + written, len);
436 CPP_SET_WRITTEN (pfile, written);
438 hdr = get_temp_token (pfile);
439 hdr->type = CPP_HEADER_NAME;
440 hdr->flags = 0;
441 hdr->val.str.text = buf;
442 hdr->val.str.len = len;
443 return hdr;
446 /* Token-buffer helper functions. */
448 /* Expand a token list's string space. It is *vital* that
449 list->tokens_used is correct, to get pointer fix-up right. */
450 void
451 _cpp_expand_name_space (list, len)
452 cpp_toklist *list;
453 unsigned int len;
455 const U_CHAR *old_namebuf;
457 old_namebuf = list->namebuf;
458 list->name_cap += len;
459 list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
461 /* Fix up token text pointers. */
462 if (list->namebuf != old_namebuf)
464 unsigned int i;
466 for (i = 0; i < list->tokens_used; i++)
467 if (token_spellings[list->tokens[i].type].type == SPELL_STRING)
468 list->tokens[i].val.str.text += (list->namebuf - old_namebuf);
472 /* If there is not enough room for LEN more characters, expand the
473 list by just enough to have room for LEN characters. */
474 void
475 _cpp_reserve_name_space (list, len)
476 cpp_toklist *list;
477 unsigned int len;
479 unsigned int room = list->name_cap - list->name_used;
481 if (room < len)
482 _cpp_expand_name_space (list, len - room);
485 /* Expand the number of tokens in a list. */
486 void
487 _cpp_expand_token_space (list, count)
488 cpp_toklist *list;
489 unsigned int count;
491 unsigned int n;
493 list->tokens_cap += count;
494 n = list->tokens_cap;
495 if (list->flags & LIST_OFFSET)
496 list->tokens--, n++;
497 list->tokens = (cpp_token *)
498 xrealloc (list->tokens, n * sizeof (cpp_token));
499 if (list->flags & LIST_OFFSET)
500 list->tokens++; /* Skip the dummy. */
503 /* Initialize a token list. If flags is DUMMY_TOKEN, we allocate
504 an extra token in front of the token list, as this allows the lexer
505 to always peek at the previous token without worrying about
506 underflowing the list, and some initial space. Otherwise, no
507 token- or name-space is allocated, and there is no dummy token. */
508 void
509 _cpp_init_toklist (list, flags)
510 cpp_toklist *list;
511 int flags;
513 if (flags == NO_DUMMY_TOKEN)
515 list->tokens_cap = 0;
516 list->tokens = 0;
517 list->name_cap = 0;
518 list->namebuf = 0;
519 list->flags = 0;
521 else
523 /* Initialize token space. Put a dummy token before the start
524 that will fail matches. */
525 list->tokens_cap = 256; /* 4K's worth. */
526 list->tokens = (cpp_token *)
527 xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
528 list->tokens[0].type = CPP_EOF;
529 list->tokens++;
531 /* Initialize name space. */
532 list->name_cap = 1024;
533 list->namebuf = (unsigned char *) xmalloc (list->name_cap);
534 list->flags = LIST_OFFSET;
537 _cpp_clear_toklist (list);
540 /* Clear a token list. */
541 void
542 _cpp_clear_toklist (list)
543 cpp_toklist *list;
545 list->tokens_used = 0;
546 list->name_used = 0;
547 list->directive = 0;
548 list->paramc = 0;
549 list->params_len = 0;
550 list->flags &= LIST_OFFSET; /* clear all but that one */
553 /* Free a token list. Does not free the list itself, which may be
554 embedded in a larger structure. */
555 void
556 _cpp_free_toklist (list)
557 const cpp_toklist *list;
559 if (list->flags & LIST_OFFSET)
560 free (list->tokens - 1); /* Backup over dummy token. */
561 else
562 free (list->tokens);
563 free (list->namebuf);
566 /* Compare two tokens. */
568 _cpp_equiv_tokens (a, b)
569 const cpp_token *a, *b;
571 if (a->type == b->type && a->flags == b->flags)
572 switch (token_spellings[a->type].type)
574 default: /* Keep compiler happy. */
575 case SPELL_OPERATOR:
576 return 1;
577 case SPELL_CHAR:
578 case SPELL_NONE:
579 return a->val.aux == b->val.aux; /* arg_no or character. */
580 case SPELL_IDENT:
581 return a->val.node == b->val.node;
582 case SPELL_STRING:
583 return (a->val.str.len == b->val.str.len
584 && !memcmp (a->val.str.text, b->val.str.text,
585 a->val.str.len));
588 return 0;
591 /* Compare two token lists. */
593 _cpp_equiv_toklists (a, b)
594 const cpp_toklist *a, *b;
596 unsigned int i;
598 if (a->tokens_used != b->tokens_used
599 || a->flags != b->flags
600 || a->paramc != b->paramc)
601 return 0;
603 for (i = 0; i < a->tokens_used; i++)
604 if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
605 return 0;
606 return 1;
609 /* Utility routine:
611 Compares, the token TOKEN to the NUL-terminated string STRING.
612 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
615 cpp_ideq (token, string)
616 const cpp_token *token;
617 const char *string;
619 if (token->type != CPP_NAME)
620 return 0;
622 return !ustrcmp (token->val.node->name, (const U_CHAR *)string);
625 /* Lexing algorithm.
627 The original lexer in cpplib was made up of two passes: a first pass
628 that replaced trigraphs and deleted esacped newlines, and a second
629 pass that tokenized the result of the first pass. Tokenisation was
630 performed by peeking at the next character in the input stream. For
631 example, if the input stream contained "!=", the handler for the !
632 character would peek at the next character, and if it were a '='
633 would skip over it, and return a "!=" token, otherwise it would
634 return just the "!" token.
636 To implement a single-pass lexer, this peeking ahead is unworkable.
637 An arbitrary number of escaped newlines, and trigraphs (in particular
638 ??/ which translates to the escape \), could separate the '!' and '='
639 in the input stream, yet the next token is still a "!=".
641 Suppose instead that we lex by one logical line at a time, producing
642 a token list or stack for each logical line, and when seeing the '!'
643 push a CPP_NOT token on the list. Then if the '!' is part of a
644 longer token ("!=") we know we must see the remainder of the token by
645 the time we reach the end of the logical line. Thus we can have the
646 '=' handler look at the previous token (at the end of the list / top
647 of the stack) and see if it is a "!" token, and if so, instead of
648 pushing a "=" token revise the existing token to be a "!=" token.
650 This works in the presence of escaped newlines, because the '\' would
651 have been pushed on the top of the stack as a CPP_BACKSLASH. The
652 newline ('\n' or '\r') handler looks at the token at the top of the
653 stack to see if it is a CPP_BACKSLASH, and if so discards both.
654 Otherwise it pushes the newline (CPP_VSPACE) token as normal. Hence
655 the '=' handler would never see any intervening escaped newlines.
657 To make trigraphs work in this context, as in precedence trigraphs
658 are highest and converted before anything else, the '?' handler does
659 lookahead to see if it is a trigraph, and if so skips the trigraph
660 and pushes the token it represents onto the top of the stack. This
661 also works in the particular case of a CPP_BACKSLASH trigraph.
663 To the preprocessor, whitespace is only significant to the point of
664 knowing whether whitespace precedes a particular token. For example,
665 the '=' handler needs to know whether there was whitespace between it
666 and a "!" token on the top of the stack, to make the token conversion
667 decision correctly. So each token has a PREV_WHITE flag to
668 indicate this - the standard permits consecutive whitespace to be
669 regarded as a single space. The compiler front ends are not
670 interested in whitespace at all; they just require a token stream.
671 Another place where whitespace is significant to the preprocessor is
672 a #define statment - if there is whitespace between the macro name
673 and an initial "(" token the macro is "object-like", otherwise it is
674 a function-like macro that takes arguments.
676 However, all is not rosy. Parsing of identifiers, numbers, comments
677 and strings becomes trickier because of the possibility of raw
678 trigraphs and escaped newlines in the input stream.
680 The trigraphs are three consecutive characters beginning with two
681 question marks. A question mark is not valid as part of a number or
682 identifier, so parsing of a number or identifier terminates normally
683 upon reaching it, returning to the mainloop which handles the
684 trigraph just like it would in any other position. Similarly for the
685 backslash of a backslash-newline combination. So we just need the
686 escaped-newline dropper in the mainloop to check if the token on the
687 top of the stack after dropping the escaped newline is a number or
688 identifier, and if so to continue the processing it as if nothing had
689 happened.
691 For strings, we replace trigraphs whenever we reach a quote or
692 newline, because there might be a backslash trigraph escaping them.
693 We need to be careful that we start trigraph replacing from where we
694 left off previously, because it is possible for a first scan to leave
695 "fake" trigraphs that a second scan would pick up as real (e.g. the
696 sequence "????/\n=" would find a fake ??= trigraph after removing the
697 escaped newline.)
699 For line comments, on reaching a newline we scan the previous
700 character(s) to see if it escaped, and continue if it is. Block
701 comments ignore everything and just focus on finding the comment
702 termination mark. The only difficult thing, and it is surprisingly
703 tricky, is checking if an asterisk precedes the final slash since
704 they could be separated by escaped newlines. If the preprocessor is
705 invoked with the output comments option, we don't bother removing
706 escaped newlines and replacing trigraphs for output.
708 Finally, numbers can begin with a period, which is pushed initially
709 as a CPP_DOT token in its own right. The digit handler checks if the
710 previous token was a CPP_DOT not separated by whitespace, and if so
711 pops it off the stack and pushes a period into the number's buffer
712 before calling the number parser.
716 static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
717 U":>", U"<%", U"%>"};
719 /* Call when a trigraph is encountered. It warns if necessary, and
720 returns true if the trigraph should be honoured. END is the third
721 character of a trigraph in the input stream. */
722 static int
723 trigraph_ok (pfile, end)
724 cpp_reader *pfile;
725 const unsigned char *end;
727 int accept = CPP_OPTION (pfile, trigraphs);
729 if (CPP_OPTION (pfile, warn_trigraphs))
731 unsigned int col = end - 1 - pfile->buffer->line_base;
732 if (accept)
733 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
734 "trigraph ??%c converted to %c",
735 (int) *end, (int) trigraph_map[*end]);
736 else
737 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
738 "trigraph ??%c ignored", (int) *end);
740 return accept;
743 /* Scan a string for trigraphs, warning or replacing them inline as
744 appropriate. When parsing a string, we must call this routine
745 before processing a newline character (if trigraphs are enabled),
746 since the newline might be escaped by a preceding backslash
747 trigraph sequence. Returns a pointer to the end of the name after
748 replacement. */
750 static unsigned char *
751 trigraph_replace (pfile, src, limit)
752 cpp_reader *pfile;
753 unsigned char *src;
754 unsigned char *limit;
756 unsigned char *dest;
758 /* Starting with src[1], find two consecutive '?'. The case of no
759 trigraphs is streamlined. */
761 for (src++; src + 1 < limit; src += 2)
763 if (src[0] != '?')
764 continue;
766 /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s. */
767 if (src[-1] == '?')
768 src--;
769 else if (src + 2 == limit || src[1] != '?')
770 continue;
772 /* Check if it really is a trigraph. */
773 if (trigraph_map[src[2]] == 0)
774 continue;
776 dest = src;
777 goto trigraph_found;
779 return limit;
781 /* Now we have a trigraph, we need to scan the remaining buffer, and
782 copy-shifting its contents left if replacement is enabled. */
783 for (; src + 2 < limit; dest++, src++)
784 if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
786 trigraph_found:
787 src += 2;
788 if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
789 *dest = trigraph_map[*src];
792 /* Copy remaining (at most 2) characters. */
793 while (src < limit)
794 *dest++ = *src++;
795 return dest;
798 /* If CUR is a backslash or the end of a trigraphed backslash, return
799 a pointer to its beginning, otherwise NULL. We don't read beyond
800 the buffer start, because there is the start of the comment in the
801 buffer. */
802 static const unsigned char *
803 backslash_start (pfile, cur)
804 cpp_reader *pfile;
805 const unsigned char *cur;
807 if (cur[0] == '\\')
808 return cur;
809 if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
810 && trigraph_ok (pfile, cur))
811 return cur - 2;
812 return 0;
815 /* Skip a C-style block comment. This is probably the trickiest
816 handler. We find the end of the comment by seeing if an asterisk
817 is before every '/' we encounter. The nasty complication is that a
818 previous asterisk may be separated by one or more escaped newlines.
819 Returns non-zero if comment terminated by EOF, zero otherwise. */
820 static int
821 skip_block_comment (pfile)
822 cpp_reader *pfile;
824 cpp_buffer *buffer = pfile->buffer;
825 const unsigned char *char_after_star = 0;
826 register const unsigned char *cur = buffer->cur;
827 int seen_eof = 0;
829 /* Inner loop would think the comment has ended if the first comment
830 character is a '/'. Avoid this and keep the inner loop clean by
831 skipping such a character. */
832 if (cur < buffer->rlimit && cur[0] == '/')
833 cur++;
835 for (; cur < buffer->rlimit; )
837 unsigned char c = *cur++;
839 /* People like decorating comments with '*', so check for
840 '/' instead for efficiency. */
841 if (c == '/')
843 if (cur[-2] == '*' || cur - 1 == char_after_star)
844 goto out;
846 /* Warn about potential nested comments, but not when
847 the final character inside the comment is a '/'.
848 Don't bother to get it right across escaped newlines. */
849 if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
850 && cur[0] == '*' && cur[1] != '/')
852 buffer->cur = cur;
853 cpp_warning (pfile, "'/*' within comment");
856 else if (is_vspace (c))
858 const unsigned char* bslash = backslash_start (pfile, cur - 2);
860 handle_newline (cur, buffer->rlimit, c);
861 /* Work correctly if there is an asterisk before an
862 arbirtrarily long sequence of escaped newlines. */
863 if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
864 char_after_star = cur;
865 else
866 char_after_star = 0;
869 seen_eof = 1;
871 out:
872 buffer->cur = cur;
873 return seen_eof;
876 /* Skip a C++ or Chill line comment. Handles escaped newlines.
877 Returns non-zero if a multiline comment. */
878 static int
879 skip_line_comment (pfile)
880 cpp_reader *pfile;
882 cpp_buffer *buffer = pfile->buffer;
883 register const unsigned char *cur = buffer->cur;
884 int multiline = 0;
886 for (; cur < buffer->rlimit; )
888 unsigned char c = *cur++;
890 if (is_vspace (c))
892 /* Check for a (trigaph?) backslash escaping the newline. */
893 if (!backslash_start (pfile, cur - 2))
894 goto out;
895 multiline = 1;
896 handle_newline (cur, buffer->rlimit, c);
899 cur++;
901 out:
902 buffer->cur = cur - 1; /* Leave newline for caller. */
903 return multiline;
906 /* Skips whitespace, stopping at next non-whitespace character.
907 Adjusts pfile->col_adjust to account for tabs. This enables tokens
908 to be assigned the correct column. */
909 static void
910 skip_whitespace (pfile, in_directive)
911 cpp_reader *pfile;
912 int in_directive;
914 cpp_buffer *buffer = pfile->buffer;
915 unsigned short warned = 0;
917 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
918 while (buffer->cur < buffer->rlimit)
920 unsigned char c = *buffer->cur;
922 if (!is_nvspace (c))
923 break;
925 buffer->cur++;
926 /* Horizontal space always OK. */
927 if (c == ' ')
928 continue;
929 else if (c == '\t')
930 pfile->col_adjust += CPP_OPTION (pfile, tabstop) - 1
931 - (CPP_BUF_COL (buffer) - 1) % CPP_OPTION(pfile, tabstop);
932 /* Must be \f \v or \0. */
933 else if (c == '\0')
935 if (!warned)
936 cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
937 CPP_BUF_COL (buffer),
938 "embedded null character ignored");
939 warned = 1;
941 else if (in_directive && CPP_PEDANTIC (pfile))
942 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
943 CPP_BUF_COL (buffer),
944 "%s in preprocessing directive",
945 c == '\f' ? "form feed" : "vertical tab");
949 /* Parse (append) an identifier. */
950 static inline const U_CHAR *
951 parse_name (pfile, tok, cur, rlimit)
952 cpp_reader *pfile;
953 cpp_token *tok;
954 const U_CHAR *cur, *rlimit;
956 const U_CHAR *name = cur;
957 unsigned int len;
959 while (cur < rlimit)
961 if (! is_idchar (*cur))
962 break;
963 /* $ is not a legal identifier character in the standard, but is
964 commonly accepted as an extension. Don't warn about it in
965 skipped conditional blocks. */
966 if (*cur == '$' && CPP_PEDANTIC (pfile) && ! pfile->skipping)
968 CPP_BUFFER (pfile)->cur = cur;
969 cpp_pedwarn (pfile, "'$' character in identifier");
971 cur++;
973 len = cur - name;
975 if (tok->val.node)
977 unsigned int oldlen = tok->val.node->length;
978 U_CHAR *newname = alloca (oldlen + len);
979 memcpy (newname, tok->val.node->name, oldlen);
980 memcpy (newname + oldlen, name, len);
981 len += oldlen;
982 name = newname;
985 tok->val.node = cpp_lookup (pfile, name, len);
986 return cur;
989 /* Parse (append) a number. */
990 static void
991 parse_number (pfile, list, name)
992 cpp_reader *pfile;
993 cpp_toklist *list;
994 cpp_string *name;
996 const unsigned char *name_limit;
997 unsigned char *namebuf;
998 cpp_buffer *buffer = pfile->buffer;
999 register const unsigned char *cur = buffer->cur;
1001 expanded:
1002 name_limit = list->namebuf + list->name_cap;
1003 namebuf = list->namebuf + list->name_used;
1005 for (; cur < buffer->rlimit && namebuf < name_limit; )
1007 unsigned char c = *namebuf = *cur; /* Copy a single char. */
1009 /* Perhaps we should accept '$' here if we accept it for
1010 identifiers. We know namebuf[-1] is safe, because for c to
1011 be a sign we must have pushed at least one character. */
1012 if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
1013 goto out;
1015 namebuf++;
1016 cur++;
1019 /* Run out of name space? */
1020 if (cur < buffer->rlimit)
1022 list->name_used = namebuf - list->namebuf;
1023 auto_expand_name_space (list);
1024 goto expanded;
1027 out:
1028 buffer->cur = cur;
1029 name->len = namebuf - name->text;
1030 list->name_used = namebuf - list->namebuf;
1033 /* Places a string terminated by an unescaped TERMINATOR into a
1034 cpp_string, which should be expandable and thus at the top of the
1035 list's stack. Handles embedded trigraphs, if necessary, and
1036 escaped newlines.
1038 Can be used for character constants (terminator = '\''), string
1039 constants ('"') and angled headers ('>'). Multi-line strings are
1040 allowed, except for within directives. */
1042 static void
1043 parse_string (pfile, list, token, terminator)
1044 cpp_reader *pfile;
1045 cpp_toklist *list;
1046 cpp_token *token;
1047 unsigned int terminator;
1049 cpp_buffer *buffer = pfile->buffer;
1050 cpp_string *name = &token->val.str;
1051 register const unsigned char *cur = buffer->cur;
1052 const unsigned char *name_limit;
1053 unsigned char *namebuf;
1054 unsigned int null_count = 0;
1055 unsigned int trigraphed = list->name_used;
1057 expanded:
1058 name_limit = list->namebuf + list->name_cap;
1059 namebuf = list->namebuf + list->name_used;
1061 for (; cur < buffer->rlimit && namebuf < name_limit; )
1063 unsigned int c = *namebuf++ = *cur++; /* Copy a single char. */
1065 if (c == '\0')
1066 null_count++;
1067 else if (c == terminator || is_vspace (c))
1069 /* Needed for trigraph_replace and multiline string warning. */
1070 buffer->cur = cur;
1072 /* Scan for trigraphs before checking if backslash-escaped. */
1073 if ((CPP_OPTION (pfile, trigraphs)
1074 || CPP_OPTION (pfile, warn_trigraphs))
1075 && namebuf - (list->namebuf + trigraphed) >= 3)
1077 namebuf = trigraph_replace (pfile, list->namebuf + trigraphed,
1078 namebuf);
1079 /* The test above guarantees trigraphed will be positive. */
1080 trigraphed = namebuf - list->namebuf - 2;
1083 namebuf--; /* Drop the newline / terminator from the name. */
1084 if (is_vspace (c))
1086 /* Drop a backslash newline, and continue. */
1087 if (namebuf[-1] == '\\')
1089 handle_newline (cur, buffer->rlimit, c);
1090 namebuf--;
1091 continue;
1094 cur--;
1096 /* In Fortran and assembly language, silently terminate
1097 strings of either variety at end of line. This is a
1098 kludge around not knowing where comments are in these
1099 languages. */
1100 if (CPP_OPTION (pfile, lang_fortran)
1101 || CPP_OPTION (pfile, lang_asm))
1102 goto out;
1104 /* Character constants, headers and asserts may not
1105 extend over multiple lines. In Standard C, neither
1106 may strings. We accept multiline strings as an
1107 extension. (Even in directives - otherwise, glibc's
1108 longlong.h breaks.) */
1109 if (terminator != '"')
1110 goto unterminated;
1112 cur++; /* Move forwards again. */
1114 if (pfile->multiline_string_line == 0)
1116 pfile->multiline_string_line = token->line;
1117 pfile->multiline_string_column = token->col;
1118 if (CPP_PEDANTIC (pfile))
1119 cpp_pedwarn (pfile, "multi-line string constant");
1122 *namebuf++ = '\n';
1123 handle_newline (cur, buffer->rlimit, c);
1125 else
1127 unsigned char *temp;
1129 /* An odd number of consecutive backslashes represents
1130 an escaped terminator. */
1131 temp = namebuf - 1;
1132 while (temp >= name->text && *temp == '\\')
1133 temp--;
1135 if ((namebuf - temp) & 1)
1136 goto out;
1137 namebuf++;
1142 /* Run out of name space? */
1143 if (cur < buffer->rlimit)
1145 list->name_used = namebuf - list->namebuf;
1146 auto_expand_name_space (list);
1147 goto expanded;
1150 /* We may not have trigraph-replaced the input for this code path,
1151 but as the input is in error by being unterminated we don't
1152 bother. Prevent warnings about no newlines at EOF. */
1153 if (is_vspace (cur[-1]))
1154 cur--;
1156 unterminated:
1157 cpp_error (pfile, "missing terminating %c character", (int) terminator);
1159 if (terminator == '\"' && pfile->multiline_string_line != list->line
1160 && pfile->multiline_string_line != 0)
1162 cpp_error_with_line (pfile, pfile->multiline_string_line,
1163 pfile->multiline_string_column,
1164 "possible start of unterminated string literal");
1165 pfile->multiline_string_line = 0;
1168 out:
1169 buffer->cur = cur;
1170 name->len = namebuf - name->text;
1171 list->name_used = namebuf - list->namebuf;
1173 if (null_count > 0)
1174 cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
1175 : "null character preserved"));
1178 /* The character TYPE helps us distinguish comment types: '*' = C
1179 style, '-' = Chill-style and '/' = C++ style. For code simplicity,
1180 the stored comment includes the comment start and any terminator. */
1182 #define COMMENT_START_LEN 2
1183 static void
1184 save_comment (list, token, from, len, type)
1185 cpp_toklist *list;
1186 cpp_token *token;
1187 const unsigned char *from;
1188 unsigned int len;
1189 unsigned int type;
1191 unsigned char *buffer;
1193 len += COMMENT_START_LEN;
1195 if (list->name_used + len > list->name_cap)
1196 _cpp_expand_name_space (list, len);
1198 INIT_TOKEN_STR (list, token);
1199 token->type = CPP_COMMENT;
1200 token->val.str.len = len;
1202 buffer = list->namebuf + list->name_used;
1203 list->name_used += len;
1205 /* Copy the comment. */
1206 if (type == '*')
1208 *buffer++ = '/';
1209 *buffer++ = '*';
1211 else
1213 *buffer++ = type;
1214 *buffer++ = type;
1216 memcpy (buffer, from, len - COMMENT_START_LEN);
1220 * The tokenizer's main loop. Returns a token list, representing a
1221 * logical line in the input file. On EOF after some tokens have
1222 * been processed, we return immediately. Then in next call, or if
1223 * EOF occurred at the beginning of a logical line, a single CPP_EOF
1224 * token is placed in the list.
1226 * Implementation relies almost entirely on lookback, rather than
1227 * looking forwards. This means that tokenization requires just
1228 * a single pass of the file, even in the presence of trigraphs and
1229 * escaped newlines, providing significant performance benefits.
1230 * Trigraph overhead is negligible if they are disabled, and low
1231 * even when enabled.
1234 #define KNOWN_DIRECTIVE() (list->directive != 0)
1235 #define MIGHT_BE_DIRECTIVE() \
1236 (cur_token == &list->tokens[first_token + 1] && cur_token[-1].type == CPP_HASH)
1238 static void
1239 lex_line (pfile, list)
1240 cpp_reader *pfile;
1241 cpp_toklist *list;
1243 cpp_token *cur_token, *token_limit, *first;
1244 cpp_buffer *buffer = pfile->buffer;
1245 const unsigned char *cur = buffer->cur;
1246 unsigned char flags = 0;
1247 unsigned int first_token = list->tokens_used;
1249 if (!(list->flags & LIST_OFFSET))
1250 (abort) ();
1252 list->file = buffer->nominal_fname;
1253 list->line = CPP_BUF_LINE (buffer);
1254 pfile->col_adjust = 0;
1255 pfile->in_lex_line = 1;
1256 if (cur == buffer->buf)
1257 list->flags |= BEG_OF_FILE;
1259 expanded:
1260 token_limit = list->tokens + list->tokens_cap;
1261 cur_token = list->tokens + list->tokens_used;
1263 for (; cur < buffer->rlimit && cur_token < token_limit;)
1265 unsigned char c;
1267 /* Optimize non-vertical whitespace skipping; most tokens are
1268 probably separated by whitespace. (' ' '\t' '\v' '\f' '\0'). */
1269 c = *cur;
1270 if (is_nvspace (c))
1272 buffer->cur = cur;
1273 skip_whitespace (pfile, (list->tokens[first_token].type == CPP_HASH
1274 && cur_token > &list->tokens[first_token]));
1275 cur = buffer->cur;
1277 flags = PREV_WHITE;
1278 if (cur == buffer->rlimit)
1279 break;
1280 c = *cur;
1282 cur++;
1284 /* Initialize current token. CPP_EOF will not be fixed up by
1285 expand_name_space. */
1286 list->tokens_used = cur_token - list->tokens + 1;
1287 cur_token->type = CPP_EOF;
1288 cur_token->col = CPP_BUF_COLUMN (buffer, cur);
1289 cur_token->line = CPP_BUF_LINE (buffer);
1290 cur_token->flags = flags;
1291 flags = 0;
1293 switch (c)
1295 case '0': case '1': case '2': case '3': case '4':
1296 case '5': case '6': case '7': case '8': case '9':
1298 int prev_dot;
1300 cur--; /* Backup character. */
1301 prev_dot = PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ();
1302 if (prev_dot)
1303 cur_token--;
1304 INIT_TOKEN_STR (list, cur_token);
1305 /* Prepend an immediately previous CPP_DOT token. */
1306 if (prev_dot)
1308 if (list->name_cap == list->name_used)
1309 auto_expand_name_space (list);
1311 cur_token->val.str.len = 1;
1312 list->namebuf[list->name_used++] = '.';
1315 continue_number:
1316 cur_token->type = CPP_NUMBER; /* Before parse_number. */
1317 buffer->cur = cur;
1318 parse_number (pfile, list, &cur_token->val.str);
1319 cur = buffer->cur;
1321 /* Check for # 123 form of #line. */
1322 if (MIGHT_BE_DIRECTIVE ())
1323 list->directive = _cpp_check_linemarker (pfile, cur_token,
1324 !(cur_token[-1].flags
1325 & PREV_WHITE));
1326 cur_token++;
1327 break;
1329 letter:
1330 case '_':
1331 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1332 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1333 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1334 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1335 case 'y': case 'z':
1336 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1337 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1338 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1339 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1340 case 'Y': case 'Z':
1341 cur--; /* Backup character. */
1342 cur_token->val.node = 0;
1343 cur_token->type = CPP_NAME; /* Identifier, macro etc. */
1345 continue_name:
1346 cur = parse_name (pfile, cur_token, cur, buffer->rlimit);
1348 if (MIGHT_BE_DIRECTIVE ())
1349 list->directive = _cpp_check_directive (pfile, cur_token,
1350 !(list->tokens[0].flags
1351 & PREV_WHITE));
1352 cur_token++;
1353 break;
1355 case '\'':
1356 /* Character constants are not recognized when processing Fortran,
1357 or if -traditional. */
1358 if (CPP_OPTION (pfile, lang_fortran) || CPP_TRADITIONAL (pfile))
1359 goto other;
1361 /* Fall through. */
1362 case '\"':
1363 /* Traditionally, escaped strings are not strings. */
1364 if (CPP_TRADITIONAL (pfile) && IMMED_TOKEN ()
1365 && PREV_TOKEN_TYPE == CPP_BACKSLASH)
1366 goto other;
1368 cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
1369 /* Do we have a wide string? */
1370 if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
1371 && cur_token[-1].val.node == pfile->spec_nodes->n_L
1372 && !CPP_TRADITIONAL (pfile))
1374 (--cur_token)->type = (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
1377 do_parse_string:
1378 /* Here c is one of ' " or >. */
1379 INIT_TOKEN_STR (list, cur_token);
1380 buffer->cur = cur;
1381 parse_string (pfile, list, cur_token, c);
1382 cur = buffer->cur;
1383 cur_token++;
1384 break;
1386 case '/':
1387 cur_token->type = CPP_DIV;
1388 if (IMMED_TOKEN ())
1390 if (PREV_TOKEN_TYPE == CPP_DIV)
1392 /* We silently allow C++ comments in system headers,
1393 irrespective of conformance mode, because lots of
1394 broken systems do that and trying to clean it up
1395 in fixincludes is a nightmare. */
1396 if (CPP_IN_SYSTEM_HEADER (pfile))
1397 goto do_line_comment;
1398 else if (CPP_OPTION (pfile, cplusplus_comments))
1400 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1401 && ! buffer->warned_cplusplus_comments)
1403 buffer->cur = cur;
1404 cpp_pedwarn (pfile,
1405 "C++ style comments are not allowed in ISO C89");
1406 cpp_pedwarn (pfile,
1407 "(this will be reported only once per input file)");
1408 buffer->warned_cplusplus_comments = 1;
1410 do_line_comment:
1411 buffer->cur = cur;
1412 #if 0 /* Leave until new lexer in place. */
1413 if (cur[-2] != c)
1414 cpp_warning (pfile,
1415 "comment start split across lines");
1416 #endif
1417 if (skip_line_comment (pfile))
1418 cpp_warning (pfile, "multi-line comment");
1420 /* Back-up to first '-' or '/'. */
1421 cur_token--;
1422 if (!CPP_OPTION (pfile, discard_comments)
1423 && (!KNOWN_DIRECTIVE()
1424 || (list->directive->flags & COMMENTS)))
1425 save_comment (list, cur_token++, cur,
1426 buffer->cur - cur, c);
1427 else if (!CPP_OPTION (pfile, traditional))
1428 flags = PREV_WHITE;
1430 cur = buffer->cur;
1431 break;
1435 cur_token++;
1436 break;
1438 case '*':
1439 cur_token->type = CPP_MULT;
1440 if (IMMED_TOKEN ())
1442 if (PREV_TOKEN_TYPE == CPP_DIV)
1444 buffer->cur = cur;
1445 #if 0 /* Leave until new lexer in place. */
1446 if (cur[-2] != '/')
1447 cpp_warning (pfile,
1448 "comment start '/*' split across lines");
1449 #endif
1450 if (skip_block_comment (pfile))
1451 cpp_error_with_line (pfile, list->line, cur_token[-1].col,
1452 "unterminated comment");
1453 #if 0 /* Leave until new lexer in place. */
1454 else if (buffer->cur[-2] != '*')
1455 cpp_warning (pfile,
1456 "comment end '*/' split across lines");
1457 #endif
1458 /* Back up to opening '/'. */
1459 cur_token--;
1460 if (!CPP_OPTION (pfile, discard_comments)
1461 && (!KNOWN_DIRECTIVE()
1462 || (list->directive->flags & COMMENTS)))
1463 save_comment (list, cur_token++, cur,
1464 buffer->cur - cur, c);
1465 else if (!CPP_OPTION (pfile, traditional))
1466 flags = PREV_WHITE;
1468 cur = buffer->cur;
1469 break;
1471 else if (CPP_OPTION (pfile, cplusplus))
1473 /* In C++, there are .* and ->* operators. */
1474 if (PREV_TOKEN_TYPE == CPP_DEREF)
1475 BACKUP_TOKEN (CPP_DEREF_STAR);
1476 else if (PREV_TOKEN_TYPE == CPP_DOT)
1477 BACKUP_TOKEN (CPP_DOT_STAR);
1480 cur_token++;
1481 break;
1483 case '\n':
1484 case '\r':
1485 handle_newline (cur, buffer->rlimit, c);
1486 if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
1488 if (IMMED_TOKEN ())
1490 /* Remove the escaped newline. Then continue to process
1491 any interrupted name or number. */
1492 cur_token--;
1493 /* Backslash-newline may not be immediately followed by
1494 EOF (C99 5.1.1.2). */
1495 if (cur >= buffer->rlimit)
1497 cpp_pedwarn (pfile, "backslash-newline at end of file");
1498 break;
1500 if (IMMED_TOKEN ())
1502 cur_token--;
1503 if (cur_token->type == CPP_NAME)
1504 goto continue_name;
1505 else if (cur_token->type == CPP_NUMBER)
1506 goto continue_number;
1507 cur_token++;
1509 /* Remember whitespace setting. */
1510 flags = cur_token->flags;
1511 break;
1513 else
1515 buffer->cur = cur;
1516 cpp_warning (pfile,
1517 "backslash and newline separated by space");
1520 else if (MIGHT_BE_DIRECTIVE ())
1522 /* "Null directive." C99 6.10.7: A preprocessing
1523 directive of the form # <new-line> has no effect.
1525 But it is still a directive, and therefore disappears
1526 from the output. */
1527 cur_token--;
1528 if (cur_token->flags & PREV_WHITE)
1530 if (CPP_WTRADITIONAL (pfile))
1531 cpp_warning (pfile,
1532 "K+R C ignores #\\n with the # indented");
1533 if (CPP_TRADITIONAL (pfile))
1534 cur_token++;
1538 /* Skip vertical space until we have at least one token to
1539 return. */
1540 if (cur_token != &list->tokens[first_token])
1541 goto out;
1542 list->line = CPP_BUF_LINE (buffer);
1543 break;
1545 case '-':
1546 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
1548 if (CPP_OPTION (pfile, chill))
1549 goto do_line_comment;
1550 REVISE_TOKEN (CPP_MINUS_MINUS);
1552 else
1553 PUSH_TOKEN (CPP_MINUS);
1554 break;
1556 make_hash:
1557 case '#':
1558 /* The digraph flag checking ensures that ## and %:%:
1559 are interpreted as CPP_PASTE, but #%: and %:# are not. */
1560 if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
1561 && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
1562 REVISE_TOKEN (CPP_PASTE);
1563 else
1564 PUSH_TOKEN (CPP_HASH);
1565 break;
1567 case ':':
1568 cur_token->type = CPP_COLON;
1569 if (IMMED_TOKEN ())
1571 if (PREV_TOKEN_TYPE == CPP_COLON
1572 && CPP_OPTION (pfile, cplusplus))
1573 BACKUP_TOKEN (CPP_SCOPE);
1574 else if (CPP_OPTION (pfile, digraphs))
1576 /* Digraph: "<:" is a '[' */
1577 if (PREV_TOKEN_TYPE == CPP_LESS)
1578 BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
1579 /* Digraph: "%:" is a '#' */
1580 else if (PREV_TOKEN_TYPE == CPP_MOD)
1582 (--cur_token)->flags |= DIGRAPH;
1583 goto make_hash;
1587 cur_token++;
1588 break;
1590 case '&':
1591 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
1592 REVISE_TOKEN (CPP_AND_AND);
1593 else
1594 PUSH_TOKEN (CPP_AND);
1595 break;
1597 make_or:
1598 case '|':
1599 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
1600 REVISE_TOKEN (CPP_OR_OR);
1601 else
1602 PUSH_TOKEN (CPP_OR);
1603 break;
1605 case '+':
1606 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
1607 REVISE_TOKEN (CPP_PLUS_PLUS);
1608 else
1609 PUSH_TOKEN (CPP_PLUS);
1610 break;
1612 case '=':
1613 /* This relies on equidistance of "?=" and "?" tokens. */
1614 if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
1615 REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
1616 else
1617 PUSH_TOKEN (CPP_EQ);
1618 break;
1620 case '>':
1621 cur_token->type = CPP_GREATER;
1622 if (IMMED_TOKEN ())
1624 if (PREV_TOKEN_TYPE == CPP_GREATER)
1625 BACKUP_TOKEN (CPP_RSHIFT);
1626 else if (PREV_TOKEN_TYPE == CPP_MINUS)
1627 BACKUP_TOKEN (CPP_DEREF);
1628 else if (CPP_OPTION (pfile, digraphs))
1630 /* Digraph: ":>" is a ']' */
1631 if (PREV_TOKEN_TYPE == CPP_COLON)
1632 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
1633 /* Digraph: "%>" is a '}' */
1634 else if (PREV_TOKEN_TYPE == CPP_MOD)
1635 BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
1638 cur_token++;
1639 break;
1641 case '<':
1642 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
1644 REVISE_TOKEN (CPP_LSHIFT);
1645 break;
1647 /* Is this the beginning of a header name? */
1648 if (KNOWN_DIRECTIVE () && (list->directive->flags & INCL))
1650 c = '>'; /* Terminator. */
1651 cur_token->type = CPP_HEADER_NAME;
1652 goto do_parse_string;
1654 PUSH_TOKEN (CPP_LESS);
1655 break;
1657 case '%':
1658 /* Digraph: "<%" is a '{' */
1659 cur_token->type = CPP_MOD;
1660 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS
1661 && CPP_OPTION (pfile, digraphs))
1662 BACKUP_DIGRAPH (CPP_OPEN_BRACE);
1663 cur_token++;
1664 break;
1666 case '?':
1667 if (cur + 1 < buffer->rlimit && *cur == '?'
1668 && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
1670 /* Handle trigraph. */
1671 cur++;
1672 switch (*cur++)
1674 case '(': goto make_open_square;
1675 case ')': goto make_close_square;
1676 case '<': goto make_open_brace;
1677 case '>': goto make_close_brace;
1678 case '=': goto make_hash;
1679 case '!': goto make_or;
1680 case '-': goto make_complement;
1681 case '/': goto make_backslash;
1682 case '\'': goto make_xor;
1685 if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
1687 /* GNU C++ defines <? and >? operators. */
1688 if (PREV_TOKEN_TYPE == CPP_LESS)
1690 REVISE_TOKEN (CPP_MIN);
1691 break;
1693 else if (PREV_TOKEN_TYPE == CPP_GREATER)
1695 REVISE_TOKEN (CPP_MAX);
1696 break;
1699 PUSH_TOKEN (CPP_QUERY);
1700 break;
1702 case '.':
1703 if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
1704 && IMMED_TOKEN ()
1705 && !(cur_token[-1].flags & PREV_WHITE))
1707 cur_token -= 2;
1708 PUSH_TOKEN (CPP_ELLIPSIS);
1710 else
1711 PUSH_TOKEN (CPP_DOT);
1712 break;
1714 make_complement:
1715 case '~': PUSH_TOKEN (CPP_COMPL); break;
1716 make_xor:
1717 case '^': PUSH_TOKEN (CPP_XOR); break;
1718 make_open_brace:
1719 case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
1720 make_close_brace:
1721 case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
1722 make_open_square:
1723 case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
1724 make_close_square:
1725 case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
1726 make_backslash:
1727 case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
1728 case '!': PUSH_TOKEN (CPP_NOT); break;
1729 case ',': PUSH_TOKEN (CPP_COMMA); break;
1730 case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
1731 case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
1732 case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
1734 case '$':
1735 if (CPP_OPTION (pfile, dollars_in_ident))
1736 goto letter;
1737 /* Fall through */
1738 other:
1739 default:
1740 cur_token->val.aux = c;
1741 PUSH_TOKEN (CPP_OTHER);
1742 break;
1746 /* Run out of token space? */
1747 if (cur_token == token_limit)
1749 list->tokens_used = cur_token - list->tokens;
1750 _cpp_expand_token_space (list, 256);
1751 goto expanded;
1754 cur_token->flags = flags;
1755 if (cur_token == &list->tokens[first_token] && pfile->done_initializing)
1757 if (cur > buffer->buf && !is_vspace (cur[-1]))
1758 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
1759 CPP_BUF_COLUMN (buffer, cur),
1760 "no newline at end of file");
1761 cur_token++->type = CPP_EOF;
1764 out:
1765 /* All tokens are allocated, so the memory location is fixed. */
1766 first = &list->tokens[first_token];
1768 /* Don't complain about the null directive, nor directives in
1769 assembly source: we don't know where the comments are, and # may
1770 introduce assembler pseudo-ops. Don't complain about invalid
1771 directives in skipped conditional groups (6.10 p4). */
1772 if (first->type == CPP_HASH && list->directive == 0 && !pfile->skipping
1773 && cur_token > first + 1 && !CPP_OPTION (pfile, lang_asm))
1775 if (first[1].type == CPP_NAME)
1776 cpp_error (pfile, "invalid preprocessing directive #%.*s",
1777 (int) first[1].val.node->length, first[1].val.node->name);
1778 else
1779 cpp_error (pfile, "invalid preprocessing directive");
1782 /* Put EOF at end of known directives. This covers "directives do
1783 not extend beyond the end of the line (description 6.10 part 2)". */
1784 if (KNOWN_DIRECTIVE () || !pfile->done_initializing)
1786 pfile->first_directive_token = first;
1787 cur_token++->type = CPP_EOF;
1790 /* Directives, known or not, always start a new line. */
1791 if (first_token == 0 || list->tokens[first_token].type == CPP_HASH)
1792 first->flags |= BOL;
1793 else
1794 /* 6.10.3.10: Within the sequence of preprocessing tokens making
1795 up the invocation of a function-like macro, new line is
1796 considered a normal white-space character. */
1797 first->flags |= PREV_WHITE;
1799 buffer->cur = cur;
1800 list->tokens_used = cur_token - list->tokens;
1801 pfile->in_lex_line = 0;
1804 /* Write the spelling of a token TOKEN, with any appropriate
1805 whitespace before it, to the token_buffer. PREV is the previous
1806 token, which is used to determine if we need to shove in an extra
1807 space in order to avoid accidental token paste. */
1808 static void
1809 output_token (pfile, token, prev)
1810 cpp_reader *pfile;
1811 const cpp_token *token, *prev;
1813 int dummy;
1815 if (token->col && (token->flags & BOL))
1817 /* Supply enough whitespace to put this token in its original
1818 column. Don't bother trying to reconstruct tabs; we can't
1819 get it right in general, and nothing ought to care. (Yes,
1820 some things do care; the fault lies with them.) */
1821 unsigned char *buffer;
1822 unsigned int spaces = token->col - 1;
1824 CPP_RESERVE (pfile, token->col);
1825 buffer = pfile->limit;
1827 while (spaces--)
1828 *buffer++ = ' ';
1829 pfile->limit = buffer;
1831 else if (token->flags & PREV_WHITE)
1832 CPP_PUTC (pfile, ' ');
1833 /* Check for and prevent accidental token pasting, in ANSI mode. */
1835 else if (!CPP_TRADITIONAL (pfile) && prev)
1837 if (can_paste (pfile, prev, token, &dummy) != CPP_EOF)
1838 CPP_PUTC (pfile, ' ');
1839 /* can_paste catches most of the accidental paste cases, but not all.
1840 Consider a + ++b - if there is not a space between the + and ++, it
1841 will be misparsed as a++ + b. */
1842 else if ((prev->type == CPP_PLUS && token->type == CPP_PLUS_PLUS)
1843 || (prev->type == CPP_MINUS && token->type == CPP_MINUS_MINUS))
1844 CPP_PUTC (pfile, ' ');
1847 CPP_RESERVE (pfile, TOKEN_LEN (token));
1848 pfile->limit = spell_token (pfile, token, pfile->limit);
1851 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1852 already contain the enough space to hold the token's spelling.
1853 Returns a pointer to the character after the last character
1854 written. */
1856 static unsigned char *
1857 spell_token (pfile, token, buffer)
1858 cpp_reader *pfile; /* Would be nice to be rid of this... */
1859 const cpp_token *token;
1860 unsigned char *buffer;
1862 switch (token_spellings[token->type].type)
1864 case SPELL_OPERATOR:
1866 const unsigned char *spelling;
1867 unsigned char c;
1869 if (token->flags & DIGRAPH)
1870 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1871 else
1872 spelling = token_spellings[token->type].spelling;
1874 while ((c = *spelling++) != '\0')
1875 *buffer++ = c;
1877 break;
1879 case SPELL_IDENT:
1880 memcpy (buffer, token->val.node->name, token->val.node->length);
1881 buffer += token->val.node->length;
1882 break;
1884 case SPELL_STRING:
1886 if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
1887 *buffer++ = 'L';
1889 if (token->type == CPP_STRING || token->type == CPP_WSTRING)
1890 *buffer++ = '"';
1891 if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
1892 *buffer++ = '\'';
1894 memcpy (buffer, token->val.str.text, token->val.str.len);
1895 buffer += token->val.str.len;
1897 if (token->type == CPP_STRING || token->type == CPP_WSTRING)
1898 *buffer++ = '"';
1899 if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
1900 *buffer++ = '\'';
1902 break;
1904 case SPELL_CHAR:
1905 *buffer++ = token->val.aux;
1906 break;
1908 case SPELL_NONE:
1909 cpp_ice (pfile, "Unspellable token %s", token_names[token->type]);
1910 break;
1913 return buffer;
1916 /* Return the spelling of a token known to be an operator.
1917 Does not distinguish digraphs from their counterparts. */
1918 const unsigned char *
1919 _cpp_spell_operator (type)
1920 enum cpp_ttype type;
1922 if (token_spellings[type].type == SPELL_OPERATOR)
1923 return token_spellings[type].spelling;
1924 else
1925 return token_names[type];
1929 /* Macro expansion algorithm. TODO. */
1931 static const cpp_token placemarker_token = {0, 0, CPP_PLACEMARKER, 0 UNION_INIT_ZERO};
1932 static const cpp_token eof_token = {0, 0, CPP_EOF, 0 UNION_INIT_ZERO};
1934 #define IS_ARG_CONTEXT(c) ((c)->flags & CONTEXT_ARG)
1935 #define CURRENT_CONTEXT(pfile) ((pfile)->contexts + (pfile)->cur_context)
1937 /* Flags for cpp_context. */
1938 #define CONTEXT_PASTEL (1 << 0) /* An argument context on LHS of ##. */
1939 #define CONTEXT_PASTER (1 << 1) /* An argument context on RHS of ##. */
1940 #define CONTEXT_RAW (1 << 2) /* If argument tokens already expanded. */
1941 #define CONTEXT_ARG (1 << 3) /* If an argument context. */
1943 #define ASSIGN_FLAGS_AND_POS(d, s) \
1944 do {(d)->flags = (s)->flags & (PREV_WHITE | BOL | PASTE_LEFT); \
1945 if ((d)->flags & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
1946 } while (0)
1948 /* f is flags, just consisting of PREV_WHITE | BOL. */
1949 #define MODIFY_FLAGS_AND_POS(d, s, f) \
1950 do {(d)->flags &= ~(PREV_WHITE | BOL); (d)->flags |= (f); \
1951 if ((f) & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
1952 } while (0)
1954 typedef struct cpp_context cpp_context;
1955 struct cpp_context
1957 union
1959 const cpp_toklist *list; /* Used for macro contexts only. */
1960 const cpp_token **arg; /* Used for arg contexts only. */
1961 } u;
1963 /* Pushed token to be returned by next call to cpp_get_token. */
1964 const cpp_token *pushed_token;
1966 struct macro_args *args; /* 0 for arguments and object-like macros. */
1967 unsigned short posn; /* Current posn, index into u. */
1968 unsigned short count; /* No. of tokens in u. */
1969 unsigned short level;
1970 unsigned char flags;
1973 typedef struct macro_args macro_args;
1974 struct macro_args
1976 unsigned int *ends;
1977 const cpp_token **tokens;
1978 unsigned int capacity;
1979 unsigned int used;
1980 unsigned short level;
1983 static const cpp_token *get_raw_token PARAMS ((cpp_reader *));
1984 static const cpp_token *parse_arg PARAMS ((cpp_reader *, int, unsigned int,
1985 macro_args *, unsigned int *));
1986 static int parse_args PARAMS ((cpp_reader *, cpp_hashnode *, macro_args *));
1987 static void save_token PARAMS ((macro_args *, const cpp_token *));
1988 static const cpp_token *push_arg_context PARAMS ((cpp_reader *,
1989 const cpp_token *));
1990 static int do_pop_context PARAMS ((cpp_reader *));
1991 static const cpp_token *pop_context PARAMS ((cpp_reader *));
1992 static const cpp_token *push_macro_context PARAMS ((cpp_reader *,
1993 cpp_hashnode *,
1994 const cpp_token *));
1995 static void free_macro_args PARAMS ((macro_args *));
1997 /* Free the storage allocated for macro arguments. */
1998 static void
1999 free_macro_args (args)
2000 macro_args *args;
2002 if (args->tokens)
2003 free (args->tokens);
2004 free (args->ends);
2005 free (args);
2008 /* Determines if a macro has been already used (and is therefore
2009 disabled). */
2010 static int
2011 is_macro_disabled (pfile, expansion, token)
2012 cpp_reader *pfile;
2013 const cpp_toklist *expansion;
2014 const cpp_token *token;
2016 cpp_context *context = CURRENT_CONTEXT (pfile);
2018 /* Don't expand anything if this file has already been preprocessed. */
2019 if (CPP_OPTION (pfile, preprocessed))
2020 return 1;
2022 /* Arguments on either side of ## are inserted in place without
2023 macro expansion (6.10.3.3.2). Conceptually, any macro expansion
2024 occurs during a later rescan pass. The effect is that we expand
2025 iff we would as part of the macro's expansion list, so we should
2026 drop to the macro's context. */
2027 if (IS_ARG_CONTEXT (context))
2029 if (token->flags & PASTED)
2030 context--;
2031 else if (!(context->flags & CONTEXT_RAW))
2032 return 1;
2033 else if (context->flags & (CONTEXT_PASTEL | CONTEXT_PASTER))
2034 context--;
2037 /* Have we already used this macro? */
2038 while (context->level > 0)
2040 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2041 return 1;
2042 /* Raw argument tokens are judged based on the token list they
2043 came from. */
2044 if (context->flags & CONTEXT_RAW)
2045 context = pfile->contexts + context->level;
2046 else
2047 context--;
2050 /* Function-like macros may be disabled if the '(' is not in the
2051 current context. We check this without disrupting the context
2052 stack. */
2053 if (expansion->paramc >= 0)
2055 const cpp_token *next;
2056 unsigned int prev_nme;
2058 context = CURRENT_CONTEXT (pfile);
2059 /* Drop down any contexts we're at the end of: the '(' may
2060 appear in lower macro expansions, or in the rest of the file. */
2061 while (context->posn == context->count && context > pfile->contexts)
2063 context--;
2064 /* If we matched, we are disabled, as we appear in the
2065 expansion of each macro we meet. */
2066 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2067 return 1;
2070 prev_nme = pfile->no_expand_level;
2071 pfile->no_expand_level = context - pfile->contexts;
2072 next = cpp_get_token (pfile);
2073 restore_macro_expansion (pfile, prev_nme);
2074 if (next->type != CPP_OPEN_PAREN)
2076 _cpp_push_token (pfile, next);
2077 if (CPP_OPTION (pfile, warn_traditional))
2078 cpp_warning (pfile,
2079 "function macro %.*s must be used with arguments in traditional C",
2080 (int) token->val.node->length, token->val.node->name);
2081 return 1;
2085 return 0;
2088 /* Add a token to the set of tokens forming the arguments to the macro
2089 being parsed in parse_args. */
2090 static void
2091 save_token (args, token)
2092 macro_args *args;
2093 const cpp_token *token;
2095 if (args->used == args->capacity)
2097 args->capacity += args->capacity + 100;
2098 args->tokens = (const cpp_token **)
2099 xrealloc (args->tokens, args->capacity * sizeof (const cpp_token *));
2101 args->tokens[args->used++] = token;
2104 /* Take and save raw tokens until we finish one argument. Empty
2105 arguments are saved as a single CPP_PLACEMARKER token. */
2106 static const cpp_token *
2107 parse_arg (pfile, var_args, paren_context, args, pcount)
2108 cpp_reader *pfile;
2109 int var_args;
2110 unsigned int paren_context;
2111 macro_args *args;
2112 unsigned int *pcount;
2114 const cpp_token *token;
2115 unsigned int paren = 0, count = 0;
2116 int raw, was_raw = 1;
2118 for (count = 0;; count++)
2120 token = cpp_get_token (pfile);
2122 switch (token->type)
2124 default:
2125 break;
2127 case CPP_OPEN_PAREN:
2128 paren++;
2129 break;
2131 case CPP_CLOSE_PAREN:
2132 if (paren-- != 0)
2133 break;
2134 goto out;
2136 case CPP_COMMA:
2137 /* Commas are not terminators within parantheses or var_args. */
2138 if (paren || var_args)
2139 break;
2140 goto out;
2142 case CPP_EOF: /* Error reported by caller. */
2143 goto out;
2146 raw = pfile->cur_context <= paren_context;
2147 if (raw != was_raw)
2149 was_raw = raw;
2150 save_token (args, 0);
2151 count++;
2153 save_token (args, token);
2156 out:
2157 if (count == 0)
2159 /* Duplicate the placemarker. Then we can set its flags and
2160 position and safely be using more than one. */
2161 save_token (args, duplicate_token (pfile, &placemarker_token));
2162 count++;
2165 *pcount = count;
2166 return token;
2169 /* This macro returns true if the argument starting at offset O of arglist
2170 A is empty - that is, it's either a single PLACEMARKER token, or a null
2171 pointer followed by a PLACEMARKER. */
2173 #define empty_argument(A, O) \
2174 ((A)->tokens[O] ? (A)->tokens[O]->type == CPP_PLACEMARKER \
2175 : (A)->tokens[(O)+1]->type == CPP_PLACEMARKER)
2177 /* Parse the arguments making up a macro invocation. Nested arguments
2178 are automatically macro expanded, but immediate macros are not
2179 expanded; this enables e.g. operator # to work correctly. Returns
2180 non-zero on error. */
2181 static int
2182 parse_args (pfile, hp, args)
2183 cpp_reader *pfile;
2184 cpp_hashnode *hp;
2185 macro_args *args;
2187 const cpp_token *token;
2188 const cpp_toklist *macro;
2189 unsigned int total = 0;
2190 unsigned int paren_context = pfile->cur_context;
2191 int argc = 0;
2193 macro = hp->value.expansion;
2196 unsigned int count;
2198 token = parse_arg (pfile, (argc + 1 == macro->paramc
2199 && (macro->flags & VAR_ARGS)),
2200 paren_context, args, &count);
2201 if (argc < macro->paramc)
2203 total += count;
2204 args->ends[argc] = total;
2206 argc++;
2208 while (token->type != CPP_CLOSE_PAREN && token->type != CPP_EOF);
2210 if (token->type == CPP_EOF)
2212 cpp_error (pfile, "unterminated invocation of macro \"%.*s\"",
2213 hp->length, hp->name);
2214 return 1;
2216 else if (argc < macro->paramc)
2218 /* A rest argument is allowed to not appear in the invocation at all.
2219 e.g. #define debug(format, args...) ...
2220 debug("string");
2221 This is exactly the same as if the rest argument had received no
2222 tokens - debug("string",); This extension is deprecated. */
2224 if (argc + 1 == macro->paramc && (macro->flags & GNU_REST_ARGS))
2226 /* Duplicate the placemarker. Then we can set its flags and
2227 position and safely be using more than one. */
2228 save_token (args, duplicate_token (pfile, &placemarker_token));
2229 args->ends[argc] = total + 1;
2230 return 0;
2232 else
2234 cpp_error (pfile,
2235 "insufficient arguments in invocation of macro \"%.*s\"",
2236 hp->length, hp->name);
2237 return 1;
2240 /* An empty argument to an empty function-like macro is fine. */
2241 else if (argc > macro->paramc
2242 && !(macro->paramc == 0 && argc == 1 && empty_argument (args, 0)))
2244 cpp_error (pfile,
2245 "too many arguments in invocation of macro \"%.*s\"",
2246 hp->length, hp->name);
2247 return 1;
2250 return 0;
2253 /* Adds backslashes before all backslashes and double quotes appearing
2254 in strings. Non-printable characters are converted to octal. */
2255 static U_CHAR *
2256 quote_string (dest, src, len)
2257 U_CHAR *dest;
2258 const U_CHAR *src;
2259 unsigned int len;
2261 while (len--)
2263 U_CHAR c = *src++;
2265 if (c == '\\' || c == '"')
2267 *dest++ = '\\';
2268 *dest++ = c;
2270 else
2272 if (ISPRINT (c))
2273 *dest++ = c;
2274 else
2276 sprintf ((char *) dest, "\\%03o", c);
2277 dest += 4;
2282 return dest;
2285 /* Allocates a buffer to hold a token's TEXT, and converts TOKEN to a
2286 CPP_STRING token containing TEXT in quoted form. */
2287 static cpp_token *
2288 make_string_token (token, text, len)
2289 cpp_token *token;
2290 const U_CHAR *text;
2291 unsigned int len;
2293 U_CHAR *buf;
2295 buf = (U_CHAR *) xmalloc (len * 4);
2296 token->type = CPP_STRING;
2297 token->flags = 0;
2298 token->val.str.text = buf;
2299 token->val.str.len = quote_string (buf, text, len) - buf;
2300 return token;
2303 /* Allocates and converts a temporary token to a CPP_NUMBER token,
2304 evaluating to NUMBER. */
2305 static cpp_token *
2306 alloc_number_token (pfile, number)
2307 cpp_reader *pfile;
2308 int number;
2310 cpp_token *result;
2311 char *buf;
2313 result = get_temp_token (pfile);
2314 buf = xmalloc (20);
2315 sprintf (buf, "%d", number);
2317 result->type = CPP_NUMBER;
2318 result->flags = 0;
2319 result->val.str.text = (U_CHAR *) buf;
2320 result->val.str.len = strlen (buf);
2321 return result;
2324 /* Returns a temporary token from the temporary token store of PFILE. */
2325 static cpp_token *
2326 get_temp_token (pfile)
2327 cpp_reader *pfile;
2329 if (pfile->temp_used == pfile->temp_alloced)
2331 if (pfile->temp_used == pfile->temp_cap)
2333 pfile->temp_cap += pfile->temp_cap + 20;
2334 pfile->temp_tokens = (cpp_token **) xrealloc
2335 (pfile->temp_tokens, pfile->temp_cap * sizeof (cpp_token *));
2337 pfile->temp_tokens[pfile->temp_alloced++] = (cpp_token *) xmalloc
2338 (sizeof (cpp_token));
2341 return pfile->temp_tokens[pfile->temp_used++];
2344 /* Release (not free) for re-use the temporary tokens of PFILE. */
2345 static void
2346 release_temp_tokens (pfile)
2347 cpp_reader *pfile;
2349 while (pfile->temp_used)
2351 cpp_token *token = pfile->temp_tokens[--pfile->temp_used];
2353 if (token_spellings[token->type].type == SPELL_STRING)
2355 free ((char *) token->val.str.text);
2356 token->val.str.text = 0;
2361 /* Free all of PFILE's dynamically-allocated temporary tokens. */
2362 void
2363 _cpp_free_temp_tokens (pfile)
2364 cpp_reader *pfile;
2366 if (pfile->temp_tokens)
2368 /* It is possible, though unlikely (looking for '(' of a funlike
2369 macro into EOF), that we haven't released the tokens yet. */
2370 release_temp_tokens (pfile);
2371 while (pfile->temp_alloced)
2372 free (pfile->temp_tokens[--pfile->temp_alloced]);
2373 free (pfile->temp_tokens);
2376 if (pfile->date)
2378 free ((char *) pfile->date->val.str.text);
2379 free (pfile->date);
2380 free ((char *) pfile->time->val.str.text);
2381 free (pfile->time);
2385 /* Copy TOKEN into a temporary token from PFILE's store. */
2386 static cpp_token *
2387 duplicate_token (pfile, token)
2388 cpp_reader *pfile;
2389 const cpp_token *token;
2391 cpp_token *result = get_temp_token (pfile);
2393 *result = *token;
2394 if (token_spellings[token->type].type == SPELL_STRING)
2396 U_CHAR *buff = (U_CHAR *) xmalloc (token->val.str.len);
2397 memcpy (buff, token->val.str.text, token->val.str.len);
2398 result->val.str.text = buff;
2400 return result;
2403 /* Determine whether two tokens can be pasted together, and if so,
2404 what the resulting token is. Returns CPP_EOF if the tokens cannot
2405 be pasted, or the appropriate type for the merged token if they
2406 can. */
2407 static enum cpp_ttype
2408 can_paste (pfile, token1, token2, digraph)
2409 cpp_reader * pfile;
2410 const cpp_token *token1, *token2;
2411 int* digraph;
2413 enum cpp_ttype a = token1->type, b = token2->type;
2414 int cxx = CPP_OPTION (pfile, cplusplus);
2416 if (a <= CPP_LAST_EQ && b == CPP_EQ)
2417 return a + (CPP_EQ_EQ - CPP_EQ);
2419 switch (a)
2421 case CPP_GREATER:
2422 if (b == a) return CPP_RSHIFT;
2423 if (b == CPP_QUERY && cxx) return CPP_MAX;
2424 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
2425 break;
2426 case CPP_LESS:
2427 if (b == a) return CPP_LSHIFT;
2428 if (b == CPP_QUERY && cxx) return CPP_MIN;
2429 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
2430 if (CPP_OPTION (pfile, digraphs))
2432 if (b == CPP_COLON)
2433 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
2434 if (b == CPP_MOD)
2435 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
2437 break;
2439 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
2440 case CPP_AND: if (b == a) return CPP_AND_AND; break;
2441 case CPP_OR: if (b == a) return CPP_OR_OR; break;
2443 case CPP_MINUS:
2444 if (b == a) return CPP_MINUS_MINUS;
2445 if (b == CPP_GREATER) return CPP_DEREF;
2446 break;
2447 case CPP_COLON:
2448 if (b == a && cxx) return CPP_SCOPE;
2449 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
2450 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
2451 break;
2453 case CPP_MOD:
2454 if (CPP_OPTION (pfile, digraphs))
2456 if (b == CPP_GREATER)
2457 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
2458 if (b == CPP_COLON)
2459 {*digraph = 1; return CPP_HASH;} /* %: digraph */
2461 break;
2462 case CPP_DEREF:
2463 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
2464 break;
2465 case CPP_DOT:
2466 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
2467 if (b == CPP_NUMBER) return CPP_NUMBER;
2468 break;
2470 case CPP_HASH:
2471 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
2472 /* %:%: digraph */
2473 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
2474 break;
2476 case CPP_NAME:
2477 if (b == CPP_NAME) return CPP_NAME;
2478 if (b == CPP_NUMBER
2479 && is_numstart(token2->val.str.text[0])) return CPP_NAME;
2480 if (b == CPP_CHAR
2481 && token1->val.node == pfile->spec_nodes->n_L) return CPP_WCHAR;
2482 if (b == CPP_STRING
2483 && token1->val.node == pfile->spec_nodes->n_L) return CPP_WSTRING;
2484 break;
2486 case CPP_NUMBER:
2487 if (b == CPP_NUMBER) return CPP_NUMBER;
2488 if (b == CPP_NAME) return CPP_NUMBER;
2489 if (b == CPP_DOT) return CPP_NUMBER;
2490 /* Numbers cannot have length zero, so this is safe. */
2491 if ((b == CPP_PLUS || b == CPP_MINUS)
2492 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
2493 return CPP_NUMBER;
2494 break;
2496 default:
2497 break;
2500 return CPP_EOF;
2503 /* Check if TOKEN is to be ##-pasted with the token after it. */
2504 static const cpp_token *
2505 maybe_paste_with_next (pfile, token)
2506 cpp_reader *pfile;
2507 const cpp_token *token;
2509 cpp_token *pasted;
2510 const cpp_token *second;
2511 cpp_context *context = CURRENT_CONTEXT (pfile);
2513 /* Is this token on the LHS of ## ? */
2514 if (!((context->flags & CONTEXT_PASTEL) && context->posn == context->count)
2515 && !(token->flags & PASTE_LEFT))
2516 return token;
2518 /* Prevent recursion, and possibly pushing back more than one token. */
2519 if (pfile->paste_level)
2520 return token;
2522 /* Suppress macro expansion for next token, but don't conflict with
2523 the other method of suppression. If it is an argument, macro
2524 expansion within the argument will still occur. */
2525 pfile->paste_level = pfile->cur_context;
2526 second = cpp_get_token (pfile);
2527 pfile->paste_level = 0;
2529 /* Ignore placemarker argument tokens (cannot be from an empty macro
2530 since macros are not expanded). */
2531 if (token->type == CPP_PLACEMARKER)
2532 pasted = duplicate_token (pfile, second);
2533 else if (second->type == CPP_PLACEMARKER)
2535 cpp_context *mac_context = CURRENT_CONTEXT (pfile) - 1;
2536 /* GCC has special extended semantics for a ## b where b is a
2537 varargs parameter: a disappears if b consists of no tokens.
2538 This extension is deprecated. */
2539 if ((mac_context->u.list->flags & GNU_REST_ARGS)
2540 && (mac_context->u.list->tokens[mac_context->posn - 1].val.aux + 1
2541 == (unsigned) mac_context->u.list->paramc))
2543 cpp_warning (pfile, "deprecated GNU ## extension used");
2544 pasted = duplicate_token (pfile, second);
2546 else
2547 pasted = duplicate_token (pfile, token);
2549 else
2551 int digraph = 0;
2552 enum cpp_ttype type = can_paste (pfile, token, second, &digraph);
2554 if (type == CPP_EOF)
2556 if (CPP_OPTION (pfile, warn_paste))
2557 cpp_warning (pfile,
2558 "pasting would not give a valid preprocessing token");
2559 _cpp_push_token (pfile, second);
2560 return token;
2563 if (type == CPP_NAME || type == CPP_NUMBER)
2565 /* Join spellings. */
2566 U_CHAR *buf, *end;
2568 pasted = get_temp_token (pfile);
2569 buf = (U_CHAR *) alloca (TOKEN_LEN (token) + TOKEN_LEN (second));
2570 end = spell_token (pfile, token, buf);
2571 end = spell_token (pfile, second, end);
2572 *end = '\0';
2574 if (type == CPP_NAME)
2575 pasted->val.node = cpp_lookup (pfile, buf, end - buf);
2576 else
2578 pasted->val.str.text = uxstrdup (buf);
2579 pasted->val.str.len = end - buf;
2582 else if (type == CPP_WCHAR || type == CPP_WSTRING)
2583 pasted = duplicate_token (pfile, second);
2584 else
2586 pasted = get_temp_token (pfile);
2587 pasted->val.integer = 0;
2590 pasted->type = type;
2591 pasted->flags = digraph ? DIGRAPH : 0;
2594 /* The pasted token gets the whitespace flags and position of the
2595 first token, the PASTE_LEFT flag of the second token, plus the
2596 PASTED flag to indicate it is the result of a paste. However, we
2597 want to preserve the DIGRAPH flag. */
2598 pasted->flags &= ~(PREV_WHITE | BOL | PASTE_LEFT);
2599 pasted->flags |= ((token->flags & (PREV_WHITE | BOL))
2600 | (second->flags & PASTE_LEFT) | PASTED);
2601 pasted->col = token->col;
2602 pasted->line = token->line;
2604 return maybe_paste_with_next (pfile, pasted);
2607 /* Convert a token sequence to a single string token according to the
2608 rules of the ISO C #-operator. */
2609 #define INIT_SIZE 200
2610 static cpp_token *
2611 stringify_arg (pfile, token)
2612 cpp_reader *pfile;
2613 const cpp_token *token;
2615 cpp_token *result;
2616 unsigned char *main_buf;
2617 unsigned int prev_value, backslash_count = 0;
2618 unsigned int buf_used = 0, whitespace = 0, buf_cap = INIT_SIZE;
2620 prev_value = prevent_macro_expansion (pfile);
2621 main_buf = (unsigned char *) xmalloc (buf_cap);
2623 result = get_temp_token (pfile);
2624 ASSIGN_FLAGS_AND_POS (result, token);
2626 for (; (token = cpp_get_token (pfile))->type != CPP_EOF; )
2628 int escape;
2629 unsigned char *buf;
2630 unsigned int len = TOKEN_LEN (token);
2632 escape = (token->type == CPP_STRING || token->type == CPP_WSTRING
2633 || token->type == CPP_CHAR || token->type == CPP_WCHAR);
2634 if (escape)
2635 len *= 4 + 1;
2637 if (buf_used + len > buf_cap)
2639 buf_cap = buf_used + len + INIT_SIZE;
2640 main_buf = xrealloc (main_buf, buf_cap);
2643 if (whitespace && (token->flags & PREV_WHITE))
2644 main_buf[buf_used++] = ' ';
2646 if (escape)
2647 buf = (unsigned char *) xmalloc (len);
2648 else
2649 buf = main_buf + buf_used;
2651 len = spell_token (pfile, token, buf) - buf;
2652 if (escape)
2654 buf_used = quote_string (&main_buf[buf_used], buf, len) - main_buf;
2655 free (buf);
2657 else
2658 buf_used += len;
2660 whitespace = 1;
2661 if (token->type == CPP_BACKSLASH)
2662 backslash_count++;
2663 else
2664 backslash_count = 0;
2667 /* Ignore the final \ of invalid string literals. */
2668 if (backslash_count & 1)
2670 cpp_warning (pfile, "invalid string literal, ignoring final '\\'");
2671 buf_used--;
2674 result->type = CPP_STRING;
2675 result->val.str.text = main_buf;
2676 result->val.str.len = buf_used;
2677 restore_macro_expansion (pfile, prev_value);
2678 return result;
2681 /* Allocate more room on the context stack of PFILE. */
2682 static void
2683 expand_context_stack (pfile)
2684 cpp_reader *pfile;
2686 pfile->context_cap += pfile->context_cap + 20;
2687 pfile->contexts = (cpp_context *)
2688 xrealloc (pfile->contexts, pfile->context_cap * sizeof (cpp_context));
2691 /* Push the context of macro NODE onto the context stack. TOKEN is
2692 the CPP_NAME token invoking the macro. */
2693 static const cpp_token *
2694 push_macro_context (pfile, node, token)
2695 cpp_reader *pfile;
2696 cpp_hashnode *node;
2697 const cpp_token *token;
2699 unsigned char orig_flags;
2700 macro_args *args;
2701 cpp_context *context;
2703 if (pfile->cur_context > CPP_STACK_MAX)
2705 cpp_error (pfile, "infinite macro recursion invoking '%s'", node->name);
2706 return token;
2709 /* Token's flags may change when parsing args containing a nested
2710 invocation of this macro. */
2711 orig_flags = token->flags & (PREV_WHITE | BOL);
2712 args = 0;
2713 if (node->value.expansion->paramc >= 0)
2715 unsigned int error, prev_nme;
2717 /* Allocate room for the argument contexts, and parse them. */
2718 args = (macro_args *) xmalloc (sizeof (macro_args));
2719 args->ends = (unsigned int *)
2720 xmalloc (node->value.expansion->paramc * sizeof (unsigned int));
2721 args->tokens = 0;
2722 args->capacity = 0;
2723 args->used = 0;
2724 args->level = pfile->cur_context;
2726 prev_nme = prevent_macro_expansion (pfile);
2727 pfile->args = args;
2728 error = parse_args (pfile, node, args);
2729 pfile->args = 0;
2730 restore_macro_expansion (pfile, prev_nme);
2731 if (error)
2733 free_macro_args (args);
2734 return token;
2738 /* Now push its context. */
2739 pfile->cur_context++;
2740 if (pfile->cur_context == pfile->context_cap)
2741 expand_context_stack (pfile);
2743 context = CURRENT_CONTEXT (pfile);
2744 context->u.list = node->value.expansion;
2745 context->args = args;
2746 context->posn = 0;
2747 context->count = context->u.list->tokens_used;
2748 context->level = pfile->cur_context;
2749 context->flags = 0;
2750 context->pushed_token = 0;
2752 /* Set the flags of the first token. We know there must
2753 be one, empty macros are a single placemarker token. */
2754 MODIFY_FLAGS_AND_POS (&context->u.list->tokens[0], token, orig_flags);
2756 return cpp_get_token (pfile);
2759 /* Push an argument to the current macro onto the context stack.
2760 TOKEN is the MACRO_ARG token representing the argument expansion. */
2761 static const cpp_token *
2762 push_arg_context (pfile, token)
2763 cpp_reader *pfile;
2764 const cpp_token *token;
2766 cpp_context *context;
2767 macro_args *args;
2769 pfile->cur_context++;
2770 if (pfile->cur_context == pfile->context_cap)
2771 expand_context_stack (pfile);
2773 context = CURRENT_CONTEXT (pfile);
2774 args = context[-1].args;
2776 context->count = token->val.aux ? args->ends[token->val.aux - 1]: 0;
2777 context->u.arg = args->tokens + context->count;
2778 context->count = args->ends[token->val.aux] - context->count;
2779 context->args = 0;
2780 context->posn = 0;
2781 context->level = args->level;
2782 context->flags = CONTEXT_ARG | CONTEXT_RAW;
2783 context->pushed_token = 0;
2785 /* Set the flags of the first token. There is one. */
2787 const cpp_token *first = context->u.arg[0];
2788 if (!first)
2789 first = context->u.arg[1];
2791 MODIFY_FLAGS_AND_POS ((cpp_token *) first, token,
2792 token->flags & (PREV_WHITE | BOL));
2795 if (token->flags & STRINGIFY_ARG)
2796 return stringify_arg (pfile, token);
2798 if (token->flags & PASTE_LEFT)
2799 context->flags |= CONTEXT_PASTEL;
2800 if (pfile->paste_level)
2801 context->flags |= CONTEXT_PASTER;
2803 return get_raw_token (pfile);
2806 /* "Unget" a token. It is effectively inserted in the token queue and
2807 will be returned by the next call to get_raw_token. */
2808 void
2809 _cpp_push_token (pfile, token)
2810 cpp_reader *pfile;
2811 const cpp_token *token;
2813 cpp_context *context = CURRENT_CONTEXT (pfile);
2814 if (context->pushed_token)
2815 cpp_ice (pfile, "two tokens pushed in a row");
2816 if (token->type != CPP_EOF)
2817 context->pushed_token = token;
2818 /* Don't push back a directive's CPP_EOF, step back instead. */
2819 else if (pfile->cur_context == 0)
2820 pfile->contexts[0].posn--;
2823 /* Handle a preprocessing directive. TOKEN is the CPP_HASH token
2824 introducing the directive. */
2825 static void
2826 process_directive (pfile, token)
2827 cpp_reader *pfile;
2828 const cpp_token *token;
2830 const struct directive *d = pfile->token_list.directive;
2831 int prev_nme = 0;
2833 /* Skip over the directive name. */
2834 if (token[1].type == CPP_NAME)
2835 _cpp_get_raw_token (pfile);
2836 else if (token[1].type != CPP_NUMBER)
2837 cpp_ice (pfile, "directive begins with %s?!",
2838 token_names[token[1].type]);
2840 /* Flush pending tokens at this point, in case the directive produces
2841 output. XXX Directive output won't be visible to a direct caller of
2842 cpp_get_token. */
2843 if (pfile->printer && CPP_WRITTEN (pfile) - pfile->printer->written)
2844 cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
2846 if (! (d->flags & EXPAND))
2847 prev_nme = prevent_macro_expansion (pfile);
2848 (void) (*d->handler) (pfile);
2849 if (! (d->flags & EXPAND))
2850 restore_macro_expansion (pfile, prev_nme);
2851 _cpp_skip_rest_of_line (pfile);
2854 /* The external interface to return the next token. All macro
2855 expansion and directive processing is handled internally, the
2856 caller only ever sees the output after preprocessing. */
2857 const cpp_token *
2858 cpp_get_token (pfile)
2859 cpp_reader *pfile;
2861 const cpp_token *token;
2862 cpp_hashnode *node;
2864 /* Loop till we hit a non-directive, non-skipped, non-placemarker token. */
2865 for (;;)
2867 token = get_raw_token (pfile);
2868 if (token->flags & BOL && token->type == CPP_HASH
2869 && pfile->token_list.directive)
2871 process_directive (pfile, token);
2872 continue;
2875 /* Short circuit EOF. */
2876 if (token->type == CPP_EOF)
2877 return token;
2879 if (pfile->skipping && ! pfile->token_list.directive)
2881 _cpp_skip_rest_of_line (pfile);
2882 continue;
2884 break;
2887 /* If there's a potential control macro and we get here, then that
2888 #ifndef didn't cover the entire file and its argument shouldn't
2889 be taken as a control macro. */
2890 pfile->potential_control_macro = 0;
2892 token = maybe_paste_with_next (pfile, token);
2894 if (token->type != CPP_NAME)
2895 return token;
2897 /* Is macro expansion disabled in general? */
2898 if (pfile->no_expand_level == pfile->cur_context || pfile->paste_level)
2899 return token;
2901 node = token->val.node;
2902 if (node->type == T_VOID)
2903 return token;
2905 if (node->type == T_MACRO)
2907 if (is_macro_disabled (pfile, node->value.expansion, token))
2908 return token;
2910 return push_macro_context (pfile, node, token);
2912 else
2913 return special_symbol (pfile, node, token);
2916 /* Returns the next raw token, i.e. without performing macro
2917 expansion. Argument contexts are automatically entered. */
2918 static const cpp_token *
2919 get_raw_token (pfile)
2920 cpp_reader *pfile;
2922 const cpp_token *result;
2923 cpp_context *context = CURRENT_CONTEXT (pfile);
2925 if (context->pushed_token)
2927 result = context->pushed_token;
2928 context->pushed_token = 0;
2930 else if (context->posn == context->count)
2931 result = pop_context (pfile);
2932 else
2934 if (IS_ARG_CONTEXT (context))
2936 result = context->u.arg[context->posn++];
2937 if (result == 0)
2939 context->flags ^= CONTEXT_RAW;
2940 result = context->u.arg[context->posn++];
2942 return result; /* Cannot be a CPP_MACRO_ARG */
2944 result = &context->u.list->tokens[context->posn++];
2947 if (result->type == CPP_MACRO_ARG)
2948 result = push_arg_context (pfile, result);
2949 return result;
2952 /* Internal interface to get the token without macro expanding. */
2953 const cpp_token *
2954 _cpp_get_raw_token (pfile)
2955 cpp_reader *pfile;
2957 int prev_nme = prevent_macro_expansion (pfile);
2958 const cpp_token *result = cpp_get_token (pfile);
2959 restore_macro_expansion (pfile, prev_nme);
2960 return result;
2963 /* A thin wrapper to lex_line. CLEAR is non-zero if the current token
2964 list should be overwritten, or zero if we need to append
2965 (typically, if we are within the arguments to a macro, or looking
2966 for the '(' to start a function-like macro invocation). */
2967 static int
2968 lex_next (pfile, clear)
2969 cpp_reader *pfile;
2970 int clear;
2972 cpp_toklist *list = &pfile->token_list;
2973 const cpp_token *old_list = list->tokens;
2974 unsigned int old_used = list->tokens_used;
2976 /* If we are currently processing a directive, do not advance. 6.10
2977 paragraph 2: A new-line character ends the directive even if it
2978 occurs within what would otherwise be an invocation of a
2979 function-like macro.
2981 It is possible that clear == 1 too; e.g. "#if funlike_macro ("
2982 since parse_args swallowed the directive's EOF. */
2983 if (list->directive)
2984 return 1;
2986 if (clear)
2988 /* Release all temporary tokens. */
2989 _cpp_clear_toklist (list);
2990 pfile->contexts[0].posn = 0;
2991 if (pfile->temp_used)
2992 release_temp_tokens (pfile);
2995 lex_line (pfile, list);
2996 pfile->contexts[0].count = list->tokens_used;
2998 if (!clear && pfile->args)
3000 /* Fix up argument token pointers. */
3001 if (old_list != list->tokens)
3003 unsigned int i;
3005 for (i = 0; i < pfile->args->used; i++)
3007 const cpp_token *token = pfile->args->tokens[i];
3008 if (token >= old_list && token < old_list + old_used)
3009 pfile->args->tokens[i] = (const cpp_token *)
3010 ((char *) token + ((char *) list->tokens - (char *) old_list));
3014 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
3015 tokens within the list of arguments that would otherwise act as
3016 preprocessing directives, the behavior is undefined.
3018 This implementation will report a hard error and treat the
3019 'sequence of preprocessing tokens' as part of the macro argument,
3020 not a directive.
3022 Note if pfile->args == 0, we're OK since we're only inside a
3023 macro argument after a '('. */
3024 if (list->directive)
3026 cpp_error_with_line (pfile, list->tokens[old_used].line,
3027 list->tokens[old_used].col,
3028 "#%s may not be used inside a macro argument",
3029 list->directive->name);
3030 return 1;
3034 return 0;
3037 /* Pops a context of the context stack. If we're at the bottom, lexes
3038 the next logical line. Returns 1 if we're at the end of the
3039 argument list to the # operator, or if it is illegal to "overflow"
3040 into the rest of the file (e.g. 6.10.3.1.1). */
3041 static int
3042 do_pop_context (pfile)
3043 cpp_reader *pfile;
3045 cpp_context *context;
3047 if (pfile->cur_context == 0)
3048 return lex_next (pfile, pfile->no_expand_level == UINT_MAX);
3050 /* Argument contexts, when parsing args or handling # operator
3051 return CPP_EOF at the end. */
3052 context = CURRENT_CONTEXT (pfile);
3053 if (IS_ARG_CONTEXT (context) && pfile->cur_context == pfile->no_expand_level)
3054 return 1;
3056 /* Free resources when leaving macro contexts. */
3057 if (context->args)
3058 free_macro_args (context->args);
3060 if (pfile->cur_context == pfile->no_expand_level)
3061 pfile->no_expand_level--;
3062 pfile->cur_context--;
3064 return 0;
3067 /* Move down the context stack, and return the next raw token. */
3068 static const cpp_token *
3069 pop_context (pfile)
3070 cpp_reader *pfile;
3072 if (do_pop_context (pfile))
3073 return &eof_token;
3074 return get_raw_token (pfile);
3077 /* Turn off macro expansion at the current context level. */
3078 static unsigned int
3079 prevent_macro_expansion (pfile)
3080 cpp_reader *pfile;
3082 unsigned int prev_value = pfile->no_expand_level;
3083 pfile->no_expand_level = pfile->cur_context;
3084 return prev_value;
3087 /* Restore macro expansion to its previous state. */
3088 static void
3089 restore_macro_expansion (pfile, prev_value)
3090 cpp_reader *pfile;
3091 unsigned int prev_value;
3093 pfile->no_expand_level = prev_value;
3096 /* Used by cpperror.c to obtain the correct line and column to report
3097 in a diagnostic. */
3098 unsigned int
3099 _cpp_get_line (pfile, pcol)
3100 cpp_reader *pfile;
3101 unsigned int *pcol;
3103 unsigned int index;
3104 const cpp_token *cur_token;
3106 if (pfile->in_lex_line)
3107 index = pfile->token_list.tokens_used;
3108 else
3109 index = pfile->contexts[0].posn;
3111 cur_token = &pfile->token_list.tokens[index - 1];
3112 if (pcol)
3113 *pcol = cur_token->col;
3114 return cur_token->line;
3117 #define DSC(str) (const U_CHAR *)str, sizeof str - 1
3118 static const char * const monthnames[] =
3120 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
3121 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
3124 /* Handle builtin macros like __FILE__. */
3125 static const cpp_token *
3126 special_symbol (pfile, node, token)
3127 cpp_reader *pfile;
3128 cpp_hashnode *node;
3129 const cpp_token *token;
3131 cpp_token *result;
3132 cpp_buffer *ip;
3134 switch (node->type)
3136 case T_FILE:
3137 case T_BASE_FILE:
3139 const char *file;
3141 ip = CPP_BUFFER (pfile);
3142 if (ip == 0)
3143 file = "";
3144 else
3146 if (node->type == T_BASE_FILE)
3147 while (CPP_PREV_BUFFER (ip) != NULL)
3148 ip = CPP_PREV_BUFFER (ip);
3150 file = ip->nominal_fname;
3152 result = make_string_token (get_temp_token (pfile), (U_CHAR *) file,
3153 strlen (file));
3155 break;
3157 case T_INCLUDE_LEVEL:
3159 int true_indepth = 0;
3161 /* Do not count the primary source file in the include level. */
3162 ip = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
3163 while (ip)
3165 true_indepth++;
3166 ip = CPP_PREV_BUFFER (ip);
3168 result = alloc_number_token (pfile, true_indepth);
3170 break;
3172 case T_SPECLINE:
3173 /* If __LINE__ is embedded in a macro, it must expand to the
3174 line of the macro's invocation, not its definition.
3175 Otherwise things like assert() will not work properly. */
3176 result = alloc_number_token (pfile, _cpp_get_line (pfile, NULL));
3177 break;
3179 case T_STDC:
3181 int stdc = 1;
3183 #ifdef STDC_0_IN_SYSTEM_HEADERS
3184 if (CPP_IN_SYSTEM_HEADER (pfile)
3185 && pfile->spec_nodes->n__STRICT_ANSI__->type == T_VOID)
3186 stdc = 0;
3187 #endif
3188 result = alloc_number_token (pfile, stdc);
3190 break;
3192 case T_DATE:
3193 case T_TIME:
3194 if (pfile->date == 0)
3196 /* Allocate __DATE__ and __TIME__ from permanent storage,
3197 and save them in pfile so we don't have to do this again.
3198 We don't generate these strings at init time because
3199 time() and localtime() are very slow on some systems. */
3200 time_t tt = time (NULL);
3201 struct tm *tb = localtime (&tt);
3203 pfile->date = make_string_token
3204 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("Oct 11 1347"));
3205 pfile->time = make_string_token
3206 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("12:34:56"));
3208 sprintf ((char *) pfile->date->val.str.text, "%s %2d %4d",
3209 monthnames[tb->tm_mon], tb->tm_mday, tb->tm_year + 1900);
3210 sprintf ((char *) pfile->time->val.str.text, "%02d:%02d:%02d",
3211 tb->tm_hour, tb->tm_min, tb->tm_sec);
3213 result = node->type == T_DATE ? pfile->date: pfile->time;
3214 break;
3216 case T_POISON:
3217 cpp_error (pfile, "attempt to use poisoned \"%s\"", node->name);
3218 return token;
3220 default:
3221 cpp_ice (pfile, "invalid special hash type");
3222 return token;
3225 ASSIGN_FLAGS_AND_POS (result, token);
3226 return result;
3228 #undef DSC
3230 /* Dump the original user's spelling of argument index ARG_NO to the
3231 macro whose expansion is LIST. */
3232 static void
3233 dump_param_spelling (pfile, list, arg_no)
3234 cpp_reader *pfile;
3235 const cpp_toklist *list;
3236 unsigned int arg_no;
3238 const U_CHAR *param = list->namebuf;
3240 while (arg_no--)
3241 param += ustrlen (param) + 1;
3242 CPP_PUTS (pfile, param, ustrlen (param));
3245 /* Dump a token list to the output. */
3246 void
3247 _cpp_dump_list (pfile, list, token, flush)
3248 cpp_reader *pfile;
3249 const cpp_toklist *list;
3250 const cpp_token *token;
3251 int flush;
3253 const cpp_token *limit = list->tokens + list->tokens_used;
3254 const cpp_token *prev = 0;
3256 /* Avoid the CPP_EOF. */
3257 if (list->directive)
3258 limit--;
3260 while (token < limit)
3262 if (token->type == CPP_MACRO_ARG)
3264 if (token->flags & PREV_WHITE)
3265 CPP_PUTC (pfile, ' ');
3266 if (token->flags & STRINGIFY_ARG)
3267 CPP_PUTC (pfile, '#');
3268 dump_param_spelling (pfile, list, token->val.aux);
3270 else
3271 output_token (pfile, token, prev);
3272 if (token->flags & PASTE_LEFT)
3273 CPP_PUTS (pfile, " ##", 3);
3274 prev = token;
3275 token++;
3278 if (flush && pfile->printer)
3279 cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
3282 /* Allocate pfile->input_buffer, and initialize trigraph_map[]
3283 if it hasn't happened already. */
3285 void
3286 _cpp_init_input_buffer (pfile)
3287 cpp_reader *pfile;
3289 init_trigraph_map ();
3290 pfile->context_cap = 20;
3291 pfile->contexts = (cpp_context *)
3292 xmalloc (pfile->context_cap * sizeof (cpp_context));
3293 pfile->cur_context = 0;
3294 pfile->contexts[0].u.list = &pfile->token_list;
3296 pfile->contexts[0].posn = 0;
3297 pfile->contexts[0].count = 0;
3298 pfile->no_expand_level = UINT_MAX;
3300 _cpp_init_toklist (&pfile->token_list, DUMMY_TOKEN);
3303 /* Moves to the end of the directive line, popping contexts as
3304 necessary. */
3305 void
3306 _cpp_skip_rest_of_line (pfile)
3307 cpp_reader *pfile;
3309 /* Get to base context. Clear parsing args and each contexts flags,
3310 since these can cause pop_context to return without popping. */
3311 pfile->no_expand_level = UINT_MAX;
3312 while (pfile->cur_context != 0)
3314 pfile->contexts[pfile->cur_context].flags = 0;
3315 do_pop_context (pfile);
3318 pfile->contexts[pfile->cur_context].count = 0;
3319 pfile->contexts[pfile->cur_context].posn = 0;
3320 pfile->token_list.directive = 0;
3323 /* Directive handler wrapper used by the command line option
3324 processor. */
3325 void
3326 _cpp_run_directive (pfile, dir, buf, count)
3327 cpp_reader *pfile;
3328 const struct directive *dir;
3329 const char *buf;
3330 size_t count;
3332 if (cpp_push_buffer (pfile, (const U_CHAR *)buf, count) != NULL)
3334 unsigned int prev_lvl = 0;
3335 /* scan the line now, else prevent_macro_expansion won't work */
3336 do_pop_context (pfile);
3337 if (! (dir->flags & EXPAND))
3338 prev_lvl = prevent_macro_expansion (pfile);
3340 (void) (*dir->handler) (pfile);
3342 if (! (dir->flags & EXPAND))
3343 restore_macro_expansion (pfile, prev_lvl);
3345 _cpp_skip_rest_of_line (pfile);
3346 cpp_pop_buffer (pfile);