2000-07-03 Donn Terry (donnte@microsoft.com)
[official-gcc.git] / gcc / cpplex.c
blob23d65401e67c3bb1e2f3d4538ce71c2ead1ebf38
1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
12 later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
25 Cleanups to do:-
27 o Fix ISTABLE to flag the parts we want for IS_HSPACE and IS_NEWLINE.
28 o Get use of digraphs in sync with the standard reqd on the command line.
29 o -dM and with _cpp_dump_list: too many \n output.
30 o Put a printer object in cpp_reader?
31 o Check line numbers assigned to all errors.
32 o Replace strncmp with memcmp almost everywhere.
33 o lex_line's use of cur_token, flags and list->token_used is a bit opaque.
34 o Get rid of cpp_get_directive_token.
35 o Convert do_ functions to return void. Kaveh thinks its OK; and said he'll
36 give it a run when we've got some code.
37 o _cpp_parse_expr updated to new lexer.
38 o Distinguish integers, floats, and 'other' pp-numbers.
39 o Store ints and char constants as binary values.
40 o New command-line assertion syntax.
41 o Merge hash table text pointer and token list text pointer for identifiers.
42 o Have _cpp_parse_expr use all the information the new lexer provides.
43 o Work towards functions in cpperror.c taking a message level parameter.
44 If we do this, merge the common code of do_warning and do_error.
45 o Comment all functions, and describe macro expansion algorithm.
46 o Move as much out of header files as possible.
47 o Remove single quote pairs `', and some '', from diagnostics.
48 o Correct pastability test for CPP_NAME and CPP_NUMBER.
52 #include "config.h"
53 #include "system.h"
54 #include "intl.h"
55 #include "cpplib.h"
56 #include "cpphash.h"
57 #include "symcat.h"
59 #define auto_expand_name_space(list) \
60 _cpp_expand_name_space ((list), 1 + (list)->name_cap / 2)
61 static void safe_fwrite PARAMS ((cpp_reader *, const U_CHAR *,
62 size_t, FILE *));
63 static void dump_param_spelling PARAMS ((cpp_reader *, const cpp_toklist *,
64 unsigned int));
65 static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
66 unsigned int));
68 static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
69 static unsigned char *trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
70 unsigned char *));
71 static const unsigned char *backslash_start PARAMS ((cpp_reader *,
72 const unsigned char *));
73 static int skip_block_comment PARAMS ((cpp_reader *));
74 static int skip_line_comment PARAMS ((cpp_reader *));
75 static void skip_whitespace PARAMS ((cpp_reader *, int));
76 static void parse_name PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
77 static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
78 static void parse_string PARAMS ((cpp_reader *, cpp_toklist *, cpp_token *,
79 unsigned int));
80 static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
81 static void save_comment PARAMS ((cpp_toklist *, cpp_token *,
82 const unsigned char *,
83 unsigned int, unsigned int));
84 static void lex_line PARAMS ((cpp_reader *, cpp_toklist *));
85 static int lex_next PARAMS ((cpp_reader *, int));
86 static int is_macro_disabled PARAMS ((cpp_reader *, const cpp_toklist *,
87 const cpp_token *));
89 static cpp_token *stringify_arg PARAMS ((cpp_reader *, const cpp_token *));
90 static void expand_context_stack PARAMS ((cpp_reader *));
91 static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
92 unsigned char *));
93 static void output_token PARAMS ((cpp_reader *, const cpp_token *,
94 const cpp_token *));
95 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
96 cpp_token *));
97 static cpp_token *make_string_token PARAMS ((cpp_token *, const U_CHAR *,
98 unsigned int));
99 static cpp_token *alloc_number_token PARAMS ((cpp_reader *, int number));
100 static const cpp_token *special_symbol PARAMS ((cpp_reader *, cpp_hashnode *,
101 const cpp_token *));
102 static cpp_token *duplicate_token PARAMS ((cpp_reader *, const cpp_token *));
103 static const cpp_token *maybe_paste_with_next PARAMS ((cpp_reader *,
104 const cpp_token *));
105 static enum cpp_ttype can_paste PARAMS ((cpp_reader *, const cpp_token *,
106 const cpp_token *, int *));
107 static unsigned int prevent_macro_expansion PARAMS ((cpp_reader *));
108 static void restore_macro_expansion PARAMS ((cpp_reader *, unsigned int));
109 static cpp_token *get_temp_token PARAMS ((cpp_reader *));
110 static void release_temp_tokens PARAMS ((cpp_reader *));
111 static U_CHAR * quote_string PARAMS ((U_CHAR *, const U_CHAR *, unsigned int));
112 static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
114 #define INIT_TOKEN_NAME(list, token) \
115 do {(token)->val.name.len = 0; \
116 (token)->val.name.text = (list)->namebuf + (list)->name_used; \
117 } while (0)
119 #define VALID_SIGN(c, prevc) \
120 (((c) == '+' || (c) == '-') && \
121 ((prevc) == 'e' || (prevc) == 'E' \
122 || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
124 /* Maybe put these in the ISTABLE eventually. */
125 #define IS_HSPACE(c) ((c) == ' ' || (c) == '\t')
126 #define IS_NEWLINE(c) ((c) == '\n' || (c) == '\r')
128 /* Handle LF, CR, CR-LF and LF-CR style newlines. Assumes next
129 character, if any, is in buffer. */
131 #define handle_newline(cur, limit, c) \
132 do { \
133 if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
134 (cur)++; \
135 pfile->buffer->lineno++; \
136 pfile->buffer->line_base = (cur); \
137 pfile->col_adjust = 0; \
138 } while (0)
140 #define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITE))
141 #define PREV_TOKEN_TYPE (cur_token[-1].type)
143 #define PUSH_TOKEN(ttype) cur_token++->type = ttype
144 #define REVISE_TOKEN(ttype) cur_token[-1].type = ttype
145 #define BACKUP_TOKEN(ttype) (--cur_token)->type = ttype
146 #define BACKUP_DIGRAPH(ttype) do { \
147 BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
149 /* An upper bound on the number of bytes needed to spell a token,
150 including preceding whitespace. */
151 #define TOKEN_LEN(token) (5 + (token_spellings[(token)->type].type > \
152 SPELL_NONE ? (token)->val.name.len: 0))
154 #define T(e, s) {SPELL_OPERATOR, (const U_CHAR *) s},
155 #define I(e, s) {SPELL_IDENT, s},
156 #define S(e, s) {SPELL_STRING, s},
157 #define C(e, s) {SPELL_CHAR, s},
158 #define N(e, s) {SPELL_NONE, s},
160 const struct token_spelling
161 token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
163 #undef T
164 #undef I
165 #undef S
166 #undef C
167 #undef N
169 /* For debugging: the internal names of the tokens. */
170 #define T(e, s) U STRINGX(e) + 4,
171 #define I(e, s) U STRINGX(e) + 4,
172 #define S(e, s) U STRINGX(e) + 4,
173 #define C(e, s) U STRINGX(e) + 4,
174 #define N(e, s) U STRINGX(e) + 4,
176 const U_CHAR *const token_names[N_TTYPES] = { TTYPE_TABLE };
178 #undef T
179 #undef I
180 #undef S
181 #undef C
182 #undef N
184 /* The following table is used by trigraph_ok/trigraph_replace. If we
185 have designated initializers, it can be constant data; otherwise,
186 it is set up at runtime by _cpp_init_input_buffer. */
188 #if (GCC_VERSION >= 2007)
189 #define init_trigraph_map() /* nothing */
190 #define TRIGRAPH_MAP \
191 __extension__ static const U_CHAR trigraph_map[UCHAR_MAX + 1] = {
192 #define END };
193 #define s(p, v) [p] = v,
194 #else
195 #define TRIGRAPH_MAP static U_CHAR trigraph_map[UCHAR_MAX + 1] = { 0 }; \
196 static void init_trigraph_map PARAMS ((void)) { \
197 unsigned char *x = trigraph_map;
198 #define END }
199 #define s(p, v) x[p] = v;
200 #endif
202 TRIGRAPH_MAP
203 s('=', '#') s(')', ']') s('!', '|')
204 s('(', '[') s('\'', '^') s('>', '}')
205 s('/', '\\') s('<', '{') s('-', '~')
208 #undef TRIGRAPH_MAP
209 #undef END
210 #undef s
212 /* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
214 void
215 _cpp_grow_token_buffer (pfile, n)
216 cpp_reader *pfile;
217 long n;
219 long old_written = CPP_WRITTEN (pfile);
220 pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
221 pfile->token_buffer = (U_CHAR *)
222 xrealloc(pfile->token_buffer, pfile->token_buffer_size);
223 CPP_SET_WRITTEN (pfile, old_written);
226 /* Deal with the annoying semantics of fwrite. */
227 static void
228 safe_fwrite (pfile, buf, len, fp)
229 cpp_reader *pfile;
230 const U_CHAR *buf;
231 size_t len;
232 FILE *fp;
234 size_t count;
236 while (len)
238 count = fwrite (buf, 1, len, fp);
239 if (count == 0)
240 goto error;
241 len -= count;
242 buf += count;
244 return;
246 error:
247 cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
250 /* Notify the compiler proper that the current line number has jumped,
251 or the current file name has changed. */
253 static void
254 output_line_command (pfile, print, line)
255 cpp_reader *pfile;
256 cpp_printer *print;
257 unsigned int line;
259 cpp_buffer *ip = CPP_BUFFER (pfile);
260 enum { same = 0, enter, leave, rname } change;
261 static const char * const codes[] = { "", " 1", " 2", "" };
263 if (line == 0)
264 return;
266 /* End the previous line of text. */
267 if (pfile->need_newline)
268 putc ('\n', print->outf);
269 pfile->need_newline = 0;
271 if (CPP_OPTION (pfile, no_line_commands))
272 return;
274 /* If ip is null, we've been called from cpp_finish, and they just
275 needed the final flush and trailing newline. */
276 if (!ip)
277 return;
279 if (pfile->include_depth == print->last_id)
281 /* Determine whether the current filename has changed, and if so,
282 how. 'nominal_fname' values are unique, so they can be compared
283 by comparing pointers. */
284 if (ip->nominal_fname == print->last_fname)
285 change = same;
286 else
287 change = rname;
289 else
291 if (pfile->include_depth > print->last_id)
292 change = enter;
293 else
294 change = leave;
295 print->last_id = pfile->include_depth;
297 print->last_fname = ip->nominal_fname;
299 /* If the current file has not changed, we can output a few newlines
300 instead if we want to increase the line number by a small amount.
301 We cannot do this if print->lineno is zero, because that means we
302 haven't output any line commands yet. (The very first line
303 command output is a `same_file' command.) */
304 if (change == same && print->lineno > 0
305 && line >= print->lineno && line < print->lineno + 8)
307 while (line > print->lineno)
309 putc ('\n', print->outf);
310 print->lineno++;
312 return;
315 #ifndef NO_IMPLICIT_EXTERN_C
316 if (CPP_OPTION (pfile, cplusplus))
317 fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
318 codes[change],
319 ip->inc->sysp ? " 3" : "",
320 (ip->inc->sysp == 2) ? " 4" : "");
321 else
322 #endif
323 fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
324 codes[change],
325 ip->inc->sysp ? " 3" : "");
326 print->lineno = line;
329 /* Write the contents of the token_buffer to the output stream, and
330 clear the token_buffer. Also handles generating line commands and
331 keeping track of file transitions. */
333 void
334 cpp_output_tokens (pfile, print, line)
335 cpp_reader *pfile;
336 cpp_printer *print;
337 unsigned int line;
339 if (CPP_WRITTEN (pfile) - print->written)
341 safe_fwrite (pfile, pfile->token_buffer,
342 CPP_WRITTEN (pfile) - print->written, print->outf);
343 pfile->need_newline = 1;
344 if (print->lineno)
345 print->lineno++;
347 CPP_SET_WRITTEN (pfile, print->written);
349 output_line_command (pfile, print, line);
352 /* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output. */
354 void
355 cpp_scan_buffer_nooutput (pfile)
356 cpp_reader *pfile;
358 unsigned int old_written = CPP_WRITTEN (pfile);
359 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
361 for (;;)
363 /* In no-output mode, we can ignore everything but directives. */
364 const cpp_token *token = cpp_get_token (pfile);
365 if (token->type == CPP_EOF)
367 cpp_pop_buffer (pfile);
368 if (CPP_BUFFER (pfile) == stop)
369 break;
371 _cpp_skip_rest_of_line (pfile);
373 CPP_SET_WRITTEN (pfile, old_written);
376 /* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT. */
378 void
379 cpp_scan_buffer (pfile, print)
380 cpp_reader *pfile;
381 cpp_printer *print;
383 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
384 const cpp_token *token, *prev = 0;
386 for (;;)
388 token = cpp_get_token (pfile);
389 if (token->type == CPP_EOF)
391 cpp_pop_buffer (pfile);
392 if (CPP_BUFFER (pfile) == stop)
393 return;
394 cpp_output_tokens (pfile, print, CPP_BUF_LINE (CPP_BUFFER (pfile)));
395 prev = 0;
396 continue;
399 if (token->flags & BOL)
401 cpp_output_tokens (pfile, print, pfile->token_list.line);
402 prev = 0;
405 output_token (pfile, token, prev);
406 prev = token;
410 /* Helper routine used by parse_include, which can't see spell_token.
411 Reinterpret the current line as an h-char-sequence (< ... >); we are
412 looking at the first token after the <. */
413 const cpp_token *
414 _cpp_glue_header_name (pfile)
415 cpp_reader *pfile;
417 unsigned int written = CPP_WRITTEN (pfile);
418 const cpp_token *t;
419 cpp_token *hdr;
420 U_CHAR *buf;
421 size_t len;
423 for (;;)
425 t = cpp_get_token (pfile);
426 if (t->type == CPP_GREATER || t->type == CPP_EOF)
427 break;
429 CPP_RESERVE (pfile, TOKEN_LEN (t));
430 if (t->flags & PREV_WHITE)
431 CPP_PUTC_Q (pfile, ' ');
432 pfile->limit = spell_token (pfile, t, pfile->limit);
435 if (t->type == CPP_EOF)
436 cpp_error (pfile, "missing terminating > character");
438 len = CPP_WRITTEN (pfile) - written;
439 buf = xmalloc (len);
440 memcpy (buf, pfile->token_buffer + written, len);
441 CPP_SET_WRITTEN (pfile, written);
443 hdr = get_temp_token (pfile);
444 hdr->type = CPP_HEADER_NAME;
445 hdr->flags = 0;
446 hdr->val.name.text = buf;
447 hdr->val.name.len = len;
448 return hdr;
451 /* Token-buffer helper functions. */
453 /* Expand a token list's string space. It is *vital* that
454 list->tokens_used is correct, to get pointer fix-up right. */
455 void
456 _cpp_expand_name_space (list, len)
457 cpp_toklist *list;
458 unsigned int len;
460 const U_CHAR *old_namebuf;
462 old_namebuf = list->namebuf;
463 list->name_cap += len;
464 list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
466 /* Fix up token text pointers. */
467 if (list->namebuf != old_namebuf)
469 unsigned int i;
471 for (i = 0; i < list->tokens_used; i++)
472 if (token_spellings[list->tokens[i].type].type > SPELL_NONE)
473 list->tokens[i].val.name.text += (list->namebuf - old_namebuf);
477 /* If there is not enough room for LEN more characters, expand the
478 list by just enough to have room for LEN characters. */
479 void
480 _cpp_reserve_name_space (list, len)
481 cpp_toklist *list;
482 unsigned int len;
484 unsigned int room = list->name_cap - list->name_used;
486 if (room < len)
487 _cpp_expand_name_space (list, len - room);
490 /* Expand the number of tokens in a list. */
491 void
492 _cpp_expand_token_space (list, count)
493 cpp_toklist *list;
494 unsigned int count;
496 unsigned int n;
498 list->tokens_cap += count;
499 n = list->tokens_cap;
500 if (list->flags & LIST_OFFSET)
501 list->tokens--, n++;
502 list->tokens = (cpp_token *)
503 xrealloc (list->tokens, n * sizeof (cpp_token));
504 if (list->flags & LIST_OFFSET)
505 list->tokens++; /* Skip the dummy. */
508 /* Initialize a token list. If flags is DUMMY_TOKEN, we allocate
509 an extra token in front of the token list, as this allows the lexer
510 to always peek at the previous token without worrying about
511 underflowing the list, and some initial space. Otherwise, no
512 token- or name-space is allocated, and there is no dummy token. */
513 void
514 _cpp_init_toklist (list, flags)
515 cpp_toklist *list;
516 int flags;
518 if (flags == NO_DUMMY_TOKEN)
520 list->tokens_cap = 0;
521 list->tokens = 0;
522 list->name_cap = 0;
523 list->namebuf = 0;
524 list->flags = 0;
526 else
528 /* Initialize token space. Put a dummy token before the start
529 that will fail matches. */
530 list->tokens_cap = 256; /* 4K's worth. */
531 list->tokens = (cpp_token *)
532 xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
533 list->tokens[0].type = CPP_EOF;
534 list->tokens++;
536 /* Initialize name space. */
537 list->name_cap = 1024;
538 list->namebuf = (unsigned char *) xmalloc (list->name_cap);
539 list->flags = LIST_OFFSET;
542 _cpp_clear_toklist (list);
545 /* Clear a token list. */
546 void
547 _cpp_clear_toklist (list)
548 cpp_toklist *list;
550 list->tokens_used = 0;
551 list->name_used = 0;
552 list->directive = 0;
553 list->paramc = 0;
554 list->params_len = 0;
555 list->flags &= LIST_OFFSET; /* clear all but that one */
558 /* Free a token list. Does not free the list itself, which may be
559 embedded in a larger structure. */
560 void
561 _cpp_free_toklist (list)
562 const cpp_toklist *list;
564 if (list->flags & LIST_OFFSET)
565 free (list->tokens - 1); /* Backup over dummy token. */
566 else
567 free (list->tokens);
568 free (list->namebuf);
571 /* Compare two tokens. */
573 _cpp_equiv_tokens (a, b)
574 const cpp_token *a, *b;
576 if (a->type == b->type && a->flags == b->flags)
577 switch (token_spellings[a->type].type)
579 default: /* Keep compiler happy. */
580 case SPELL_OPERATOR:
581 return 1;
582 case SPELL_CHAR:
583 case SPELL_NONE:
584 return a->val.aux == b->val.aux; /* arg_no or character. */
585 case SPELL_IDENT:
586 case SPELL_STRING:
587 return (a->val.name.len == b->val.name.len
588 && !memcmp (a->val.name.text, b->val.name.text,
589 a->val.name.len));
592 return 0;
595 /* Compare two token lists. */
597 _cpp_equiv_toklists (a, b)
598 const cpp_toklist *a, *b;
600 unsigned int i;
602 if (a->tokens_used != b->tokens_used
603 || a->flags != b->flags
604 || a->paramc != b->paramc)
605 return 0;
607 for (i = 0; i < a->tokens_used; i++)
608 if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
609 return 0;
610 return 1;
613 /* Utility routine:
614 Compares, in the manner of strcmp(3), the token beginning at TOKEN
615 and extending for LEN characters to the NUL-terminated string
616 STRING. Typical usage:
618 if (! cpp_idcmp (pfile->token_buffer + here, CPP_WRITTEN (pfile) - here,
619 "inline"))
620 { ... }
624 cpp_idcmp (token, len, string)
625 const U_CHAR *token;
626 size_t len;
627 const char *string;
629 size_t len2 = strlen (string);
630 int r;
632 if ((r = memcmp (token, string, MIN (len, len2))))
633 return r;
635 /* The longer of the two strings sorts after the shorter. */
636 if (len == len2)
637 return 0;
638 else if (len < len2)
639 return -1;
640 else
641 return 1;
644 /* Lexing algorithm.
646 The original lexer in cpplib was made up of two passes: a first pass
647 that replaced trigraphs and deleted esacped newlines, and a second
648 pass that tokenized the result of the first pass. Tokenisation was
649 performed by peeking at the next character in the input stream. For
650 example, if the input stream contained "!=", the handler for the !
651 character would peek at the next character, and if it were a '='
652 would skip over it, and return a "!=" token, otherwise it would
653 return just the "!" token.
655 To implement a single-pass lexer, this peeking ahead is unworkable.
656 An arbitrary number of escaped newlines, and trigraphs (in particular
657 ??/ which translates to the escape \), could separate the '!' and '='
658 in the input stream, yet the next token is still a "!=".
660 Suppose instead that we lex by one logical line at a time, producing
661 a token list or stack for each logical line, and when seeing the '!'
662 push a CPP_NOT token on the list. Then if the '!' is part of a
663 longer token ("!=") we know we must see the remainder of the token by
664 the time we reach the end of the logical line. Thus we can have the
665 '=' handler look at the previous token (at the end of the list / top
666 of the stack) and see if it is a "!" token, and if so, instead of
667 pushing a "=" token revise the existing token to be a "!=" token.
669 This works in the presence of escaped newlines, because the '\' would
670 have been pushed on the top of the stack as a CPP_BACKSLASH. The
671 newline ('\n' or '\r') handler looks at the token at the top of the
672 stack to see if it is a CPP_BACKSLASH, and if so discards both.
673 Otherwise it pushes the newline (CPP_VSPACE) token as normal. Hence
674 the '=' handler would never see any intervening escaped newlines.
676 To make trigraphs work in this context, as in precedence trigraphs
677 are highest and converted before anything else, the '?' handler does
678 lookahead to see if it is a trigraph, and if so skips the trigraph
679 and pushes the token it represents onto the top of the stack. This
680 also works in the particular case of a CPP_BACKSLASH trigraph.
682 To the preprocessor, whitespace is only significant to the point of
683 knowing whether whitespace precedes a particular token. For example,
684 the '=' handler needs to know whether there was whitespace between it
685 and a "!" token on the top of the stack, to make the token conversion
686 decision correctly. So each token has a PREV_WHITE flag to
687 indicate this - the standard permits consecutive whitespace to be
688 regarded as a single space. The compiler front ends are not
689 interested in whitespace at all; they just require a token stream.
690 Another place where whitespace is significant to the preprocessor is
691 a #define statment - if there is whitespace between the macro name
692 and an initial "(" token the macro is "object-like", otherwise it is
693 a function-like macro that takes arguments.
695 However, all is not rosy. Parsing of identifiers, numbers, comments
696 and strings becomes trickier because of the possibility of raw
697 trigraphs and escaped newlines in the input stream.
699 The trigraphs are three consecutive characters beginning with two
700 question marks. A question mark is not valid as part of a number or
701 identifier, so parsing of a number or identifier terminates normally
702 upon reaching it, returning to the mainloop which handles the
703 trigraph just like it would in any other position. Similarly for the
704 backslash of a backslash-newline combination. So we just need the
705 escaped-newline dropper in the mainloop to check if the token on the
706 top of the stack after dropping the escaped newline is a number or
707 identifier, and if so to continue the processing it as if nothing had
708 happened.
710 For strings, we replace trigraphs whenever we reach a quote or
711 newline, because there might be a backslash trigraph escaping them.
712 We need to be careful that we start trigraph replacing from where we
713 left off previously, because it is possible for a first scan to leave
714 "fake" trigraphs that a second scan would pick up as real (e.g. the
715 sequence "????/\n=" would find a fake ??= trigraph after removing the
716 escaped newline.)
718 For line comments, on reaching a newline we scan the previous
719 character(s) to see if it escaped, and continue if it is. Block
720 comments ignore everything and just focus on finding the comment
721 termination mark. The only difficult thing, and it is surprisingly
722 tricky, is checking if an asterisk precedes the final slash since
723 they could be separated by escaped newlines. If the preprocessor is
724 invoked with the output comments option, we don't bother removing
725 escaped newlines and replacing trigraphs for output.
727 Finally, numbers can begin with a period, which is pushed initially
728 as a CPP_DOT token in its own right. The digit handler checks if the
729 previous token was a CPP_DOT not separated by whitespace, and if so
730 pops it off the stack and pushes a period into the number's buffer
731 before calling the number parser.
735 static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
736 U":>", U"<%", U"%>"};
738 /* Call when a trigraph is encountered. It warns if necessary, and
739 returns true if the trigraph should be honoured. END is the third
740 character of a trigraph in the input stream. */
741 static int
742 trigraph_ok (pfile, end)
743 cpp_reader *pfile;
744 const unsigned char *end;
746 int accept = CPP_OPTION (pfile, trigraphs);
748 if (CPP_OPTION (pfile, warn_trigraphs))
750 unsigned int col = end - 1 - pfile->buffer->line_base;
751 if (accept)
752 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
753 "trigraph ??%c converted to %c",
754 (int) *end, (int) trigraph_map[*end]);
755 else
756 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
757 "trigraph ??%c ignored", (int) *end);
759 return accept;
762 /* Scan a string for trigraphs, warning or replacing them inline as
763 appropriate. When parsing a string, we must call this routine
764 before processing a newline character (if trigraphs are enabled),
765 since the newline might be escaped by a preceding backslash
766 trigraph sequence. Returns a pointer to the end of the name after
767 replacement. */
769 static unsigned char *
770 trigraph_replace (pfile, src, limit)
771 cpp_reader *pfile;
772 unsigned char *src;
773 unsigned char *limit;
775 unsigned char *dest;
777 /* Starting with src[1], find two consecutive '?'. The case of no
778 trigraphs is streamlined. */
780 for (src++; src + 1 < limit; src += 2)
782 if (src[0] != '?')
783 continue;
785 /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s. */
786 if (src[-1] == '?')
787 src--;
788 else if (src + 2 == limit || src[1] != '?')
789 continue;
791 /* Check if it really is a trigraph. */
792 if (trigraph_map[src[2]] == 0)
793 continue;
795 dest = src;
796 goto trigraph_found;
798 return limit;
800 /* Now we have a trigraph, we need to scan the remaining buffer, and
801 copy-shifting its contents left if replacement is enabled. */
802 for (; src + 2 < limit; dest++, src++)
803 if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
805 trigraph_found:
806 src += 2;
807 if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
808 *dest = trigraph_map[*src];
811 /* Copy remaining (at most 2) characters. */
812 while (src < limit)
813 *dest++ = *src++;
814 return dest;
817 /* If CUR is a backslash or the end of a trigraphed backslash, return
818 a pointer to its beginning, otherwise NULL. We don't read beyond
819 the buffer start, because there is the start of the comment in the
820 buffer. */
821 static const unsigned char *
822 backslash_start (pfile, cur)
823 cpp_reader *pfile;
824 const unsigned char *cur;
826 if (cur[0] == '\\')
827 return cur;
828 if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
829 && trigraph_ok (pfile, cur))
830 return cur - 2;
831 return 0;
834 /* Skip a C-style block comment. This is probably the trickiest
835 handler. We find the end of the comment by seeing if an asterisk
836 is before every '/' we encounter. The nasty complication is that a
837 previous asterisk may be separated by one or more escaped newlines.
838 Returns non-zero if comment terminated by EOF, zero otherwise. */
839 static int
840 skip_block_comment (pfile)
841 cpp_reader *pfile;
843 cpp_buffer *buffer = pfile->buffer;
844 const unsigned char *char_after_star = 0;
845 register const unsigned char *cur = buffer->cur;
846 int seen_eof = 0;
848 /* Inner loop would think the comment has ended if the first comment
849 character is a '/'. Avoid this and keep the inner loop clean by
850 skipping such a character. */
851 if (cur < buffer->rlimit && cur[0] == '/')
852 cur++;
854 for (; cur < buffer->rlimit; )
856 unsigned char c = *cur++;
858 /* People like decorating comments with '*', so check for
859 '/' instead for efficiency. */
860 if (c == '/')
862 if (cur[-2] == '*' || cur - 1 == char_after_star)
863 goto out;
865 /* Warn about potential nested comments, but not when
866 the final character inside the comment is a '/'.
867 Don't bother to get it right across escaped newlines. */
868 if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
869 && cur[0] == '*' && cur[1] != '/')
871 buffer->cur = cur;
872 cpp_warning (pfile, "'/*' within comment");
875 else if (IS_NEWLINE(c))
877 const unsigned char* bslash = backslash_start (pfile, cur - 2);
879 handle_newline (cur, buffer->rlimit, c);
880 /* Work correctly if there is an asterisk before an
881 arbirtrarily long sequence of escaped newlines. */
882 if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
883 char_after_star = cur;
884 else
885 char_after_star = 0;
888 seen_eof = 1;
890 out:
891 buffer->cur = cur;
892 return seen_eof;
895 /* Skip a C++ or Chill line comment. Handles escaped newlines.
896 Returns non-zero if a multiline comment. */
897 static int
898 skip_line_comment (pfile)
899 cpp_reader *pfile;
901 cpp_buffer *buffer = pfile->buffer;
902 register const unsigned char *cur = buffer->cur;
903 int multiline = 0;
905 for (; cur < buffer->rlimit; )
907 unsigned char c = *cur++;
909 if (IS_NEWLINE (c))
911 /* Check for a (trigaph?) backslash escaping the newline. */
912 if (!backslash_start (pfile, cur - 2))
913 goto out;
914 multiline = 1;
915 handle_newline (cur, buffer->rlimit, c);
918 cur++;
920 out:
921 buffer->cur = cur - 1; /* Leave newline for caller. */
922 return multiline;
925 /* Skips whitespace, stopping at next non-whitespace character.
926 Adjusts pfile->col_adjust to account for tabs. This enables tokens
927 to be assigned the correct column. */
928 static void
929 skip_whitespace (pfile, in_directive)
930 cpp_reader *pfile;
931 int in_directive;
933 cpp_buffer *buffer = pfile->buffer;
934 register const unsigned char *cur = buffer->cur;
935 unsigned short null_count = 0;
937 for (; cur < buffer->rlimit; )
939 unsigned char c = *cur++;
941 if (c == '\t')
943 unsigned int col = CPP_BUF_COLUMN (buffer, cur - 1);
944 pfile->col_adjust += (CPP_OPTION (pfile, tabstop) - 1
945 - col % CPP_OPTION(pfile, tabstop));
947 if (IS_HSPACE(c)) /* FIXME: Fix ISTABLE. */
948 continue;
949 if (!is_space(c) || IS_NEWLINE (c)) /* Main loop handles newlines. */
950 goto out;
951 if (c == '\0')
952 null_count++;
953 /* Mut be '\f' or '\v' */
954 else if (in_directive && CPP_PEDANTIC (pfile))
955 cpp_pedwarn (pfile, "%s in preprocessing directive",
956 c == '\f' ? "formfeed" : "vertical tab");
958 cur++;
960 out:
961 buffer->cur = cur - 1;
962 if (null_count)
963 cpp_warning (pfile, null_count > 1 ? "embedded null characters ignored"
964 : "embedded null character ignored");
967 /* Parse (append) an identifier. */
968 static void
969 parse_name (pfile, list, name)
970 cpp_reader *pfile;
971 cpp_toklist *list;
972 cpp_name *name;
974 const unsigned char *name_limit;
975 unsigned char *namebuf;
976 cpp_buffer *buffer = pfile->buffer;
977 register const unsigned char *cur = buffer->cur;
979 expanded:
980 name_limit = list->namebuf + list->name_cap;
981 namebuf = list->namebuf + list->name_used;
983 for (; cur < buffer->rlimit && namebuf < name_limit; )
985 unsigned char c = *namebuf = *cur; /* Copy a single char. */
987 if (! is_idchar(c))
988 goto out;
989 namebuf++;
990 cur++;
991 /* $ is not a legal identifier character in the standard, but is
992 commonly accepted as an extension. Don't warn about it in
993 skipped conditional blocks. */
994 if (c == '$' && CPP_PEDANTIC (pfile) && ! pfile->skipping)
996 buffer->cur = cur;
997 cpp_pedwarn (pfile, "'$' character in identifier");
1001 /* Run out of name space? */
1002 if (cur < buffer->rlimit)
1004 list->name_used = namebuf - list->namebuf;
1005 auto_expand_name_space (list);
1006 goto expanded;
1009 out:
1010 buffer->cur = cur;
1011 name->len = namebuf - name->text;
1012 list->name_used = namebuf - list->namebuf;
1015 /* Parse (append) a number. */
1016 static void
1017 parse_number (pfile, list, name)
1018 cpp_reader *pfile;
1019 cpp_toklist *list;
1020 cpp_name *name;
1022 const unsigned char *name_limit;
1023 unsigned char *namebuf;
1024 cpp_buffer *buffer = pfile->buffer;
1025 register const unsigned char *cur = buffer->cur;
1027 expanded:
1028 name_limit = list->namebuf + list->name_cap;
1029 namebuf = list->namebuf + list->name_used;
1031 for (; cur < buffer->rlimit && namebuf < name_limit; )
1033 unsigned char c = *namebuf = *cur; /* Copy a single char. */
1035 /* Perhaps we should accept '$' here if we accept it for
1036 identifiers. We know namebuf[-1] is safe, because for c to
1037 be a sign we must have pushed at least one character. */
1038 if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
1039 goto out;
1041 namebuf++;
1042 cur++;
1045 /* Run out of name space? */
1046 if (cur < buffer->rlimit)
1048 list->name_used = namebuf - list->namebuf;
1049 auto_expand_name_space (list);
1050 goto expanded;
1053 out:
1054 buffer->cur = cur;
1055 name->len = namebuf - name->text;
1056 list->name_used = namebuf - list->namebuf;
1059 /* Places a string terminated by an unescaped TERMINATOR into a
1060 cpp_name, which should be expandable and thus at the top of the
1061 list's stack. Handles embedded trigraphs, if necessary, and
1062 escaped newlines.
1064 Can be used for character constants (terminator = '\''), string
1065 constants ('"') and angled headers ('>'). Multi-line strings are
1066 allowed, except for within directives. */
1068 static void
1069 parse_string (pfile, list, token, terminator)
1070 cpp_reader *pfile;
1071 cpp_toklist *list;
1072 cpp_token *token;
1073 unsigned int terminator;
1075 cpp_buffer *buffer = pfile->buffer;
1076 cpp_name *name = &token->val.name;
1077 register const unsigned char *cur = buffer->cur;
1078 const unsigned char *name_limit;
1079 unsigned char *namebuf;
1080 unsigned int null_count = 0;
1081 unsigned int trigraphed = list->name_used;
1083 expanded:
1084 name_limit = list->namebuf + list->name_cap;
1085 namebuf = list->namebuf + list->name_used;
1087 for (; cur < buffer->rlimit && namebuf < name_limit; )
1089 unsigned int c = *namebuf++ = *cur++; /* Copy a single char. */
1091 if (c == '\0')
1092 null_count++;
1093 else if (c == terminator || IS_NEWLINE (c))
1095 /* Needed for trigraph_replace and multiline string warning. */
1096 buffer->cur = cur;
1098 /* Scan for trigraphs before checking if backslash-escaped. */
1099 if ((CPP_OPTION (pfile, trigraphs)
1100 || CPP_OPTION (pfile, warn_trigraphs))
1101 && namebuf - (list->namebuf + trigraphed) >= 3)
1103 namebuf = trigraph_replace (pfile, list->namebuf + trigraphed,
1104 namebuf);
1105 /* The test above guarantees trigraphed will be positive. */
1106 trigraphed = namebuf - list->namebuf - 2;
1109 namebuf--; /* Drop the newline / terminator from the name. */
1110 if (IS_NEWLINE (c))
1112 /* Drop a backslash newline, and continue. */
1113 if (namebuf[-1] == '\\')
1115 handle_newline (cur, buffer->rlimit, c);
1116 namebuf--;
1117 continue;
1120 cur--;
1122 /* In Fortran and assembly language, silently terminate
1123 strings of either variety at end of line. This is a
1124 kludge around not knowing where comments are in these
1125 languages. */
1126 if (CPP_OPTION (pfile, lang_fortran)
1127 || CPP_OPTION (pfile, lang_asm))
1128 goto out;
1130 /* Character constants, headers and asserts may not
1131 extend over multiple lines. In Standard C, neither
1132 may strings. We accept multiline strings as an
1133 extension. (Even in directives - otherwise, glibc's
1134 longlong.h breaks.) */
1135 if (terminator != '"')
1136 goto unterminated;
1138 cur++; /* Move forwards again. */
1140 if (pfile->multiline_string_line == 0)
1142 pfile->multiline_string_line = token->line;
1143 pfile->multiline_string_column = token->col;
1144 if (CPP_PEDANTIC (pfile))
1145 cpp_pedwarn (pfile, "multi-line string constant");
1148 *namebuf++ = '\n';
1149 handle_newline (cur, buffer->rlimit, c);
1151 else
1153 unsigned char *temp;
1155 /* An odd number of consecutive backslashes represents
1156 an escaped terminator. */
1157 temp = namebuf - 1;
1158 while (temp >= name->text && *temp == '\\')
1159 temp--;
1161 if ((namebuf - temp) & 1)
1162 goto out;
1163 namebuf++;
1168 /* Run out of name space? */
1169 if (cur < buffer->rlimit)
1171 list->name_used = namebuf - list->namebuf;
1172 auto_expand_name_space (list);
1173 goto expanded;
1176 /* We may not have trigraph-replaced the input for this code path,
1177 but as the input is in error by being unterminated we don't
1178 bother. Prevent warnings about no newlines at EOF. */
1179 if (IS_NEWLINE(cur[-1]))
1180 cur--;
1182 unterminated:
1183 cpp_error (pfile, "missing terminating %c character", (int) terminator);
1185 if (terminator == '\"' && pfile->multiline_string_line != list->line
1186 && pfile->multiline_string_line != 0)
1188 cpp_error_with_line (pfile, pfile->multiline_string_line,
1189 pfile->multiline_string_column,
1190 "possible start of unterminated string literal");
1191 pfile->multiline_string_line = 0;
1194 out:
1195 buffer->cur = cur;
1196 name->len = namebuf - name->text;
1197 list->name_used = namebuf - list->namebuf;
1199 if (null_count > 0)
1200 cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
1201 : "null character preserved"));
1204 /* The character TYPE helps us distinguish comment types: '*' = C
1205 style, '-' = Chill-style and '/' = C++ style. For code simplicity,
1206 the stored comment includes the comment start and any terminator. */
1208 #define COMMENT_START_LEN 2
1209 static void
1210 save_comment (list, token, from, len, type)
1211 cpp_toklist *list;
1212 cpp_token *token;
1213 const unsigned char *from;
1214 unsigned int len;
1215 unsigned int type;
1217 unsigned char *buffer;
1219 len += COMMENT_START_LEN;
1221 if (list->name_used + len > list->name_cap)
1222 _cpp_expand_name_space (list, len);
1224 INIT_TOKEN_NAME (list, token);
1225 token->type = CPP_COMMENT;
1226 token->val.name.len = len;
1228 buffer = list->namebuf + list->name_used;
1229 list->name_used += len;
1231 /* Copy the comment. */
1232 if (type == '*')
1234 *buffer++ = '/';
1235 *buffer++ = '*';
1237 else
1239 *buffer++ = type;
1240 *buffer++ = type;
1242 memcpy (buffer, from, len - COMMENT_START_LEN);
1246 * The tokenizer's main loop. Returns a token list, representing a
1247 * logical line in the input file. On EOF after some tokens have
1248 * been processed, we return immediately. Then in next call, or if
1249 * EOF occurred at the beginning of a logical line, a single CPP_EOF
1250 * token is placed in the list.
1252 * Implementation relies almost entirely on lookback, rather than
1253 * looking forwards. This means that tokenization requires just
1254 * a single pass of the file, even in the presence of trigraphs and
1255 * escaped newlines, providing significant performance benefits.
1256 * Trigraph overhead is negligible if they are disabled, and low
1257 * even when enabled.
1260 #define IS_DIRECTIVE() (list->directive != 0)
1261 #define MIGHT_BE_DIRECTIVE() \
1262 (cur_token == &list->tokens[first_token + 1] && cur_token[-1].type == CPP_HASH)
1264 static void
1265 lex_line (pfile, list)
1266 cpp_reader *pfile;
1267 cpp_toklist *list;
1269 cpp_token *cur_token, *token_limit, *first;
1270 cpp_buffer *buffer = pfile->buffer;
1271 const unsigned char *cur = buffer->cur;
1272 unsigned char flags = 0;
1273 unsigned int first_token = list->tokens_used;
1275 if (!(list->flags & LIST_OFFSET))
1276 (abort) ();
1278 list->file = buffer->nominal_fname;
1279 list->line = CPP_BUF_LINE (buffer);
1280 pfile->col_adjust = 0;
1281 pfile->in_lex_line = 1;
1282 if (cur == buffer->buf)
1283 list->flags |= BEG_OF_FILE;
1285 expanded:
1286 token_limit = list->tokens + list->tokens_cap;
1287 cur_token = list->tokens + list->tokens_used;
1289 for (; cur < buffer->rlimit && cur_token < token_limit;)
1291 unsigned char c;
1293 /* Optimize whitespace skipping, as most tokens are probably
1294 separated by whitespace. (' ' '\t' '\v' '\f' '\0'). */
1295 c = *cur++;
1296 if (is_hspace (c))
1298 /* Step back to get the null warning and tab correction. */
1299 buffer->cur = cur - 1;
1300 skip_whitespace (pfile, IS_DIRECTIVE ());
1301 cur = buffer->cur;
1303 flags = PREV_WHITE;
1304 if (cur == buffer->rlimit)
1305 break;
1306 c = *cur++;
1309 /* Initialize current token. CPP_EOF will not be fixed up by
1310 expand_name_space. */
1311 list->tokens_used = cur_token - list->tokens + 1;
1312 cur_token->type = CPP_EOF;
1313 cur_token->col = CPP_BUF_COLUMN (buffer, cur);
1314 cur_token->line = CPP_BUF_LINE (buffer);
1315 cur_token->flags = flags;
1316 flags = 0;
1318 switch (c)
1320 case '0': case '1': case '2': case '3': case '4':
1321 case '5': case '6': case '7': case '8': case '9':
1323 int prev_dot;
1325 cur--; /* Backup character. */
1326 prev_dot = PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ();
1327 if (prev_dot)
1328 cur_token--;
1329 INIT_TOKEN_NAME (list, cur_token);
1330 /* Prepend an immediately previous CPP_DOT token. */
1331 if (prev_dot)
1333 if (list->name_cap == list->name_used)
1334 auto_expand_name_space (list);
1336 cur_token->val.name.len = 1;
1337 list->namebuf[list->name_used++] = '.';
1340 continue_number:
1341 cur_token->type = CPP_NUMBER; /* Before parse_number. */
1342 buffer->cur = cur;
1343 parse_number (pfile, list, &cur_token->val.name);
1344 cur = buffer->cur;
1346 /* Check for # 123 form of #line. */
1347 if (MIGHT_BE_DIRECTIVE ())
1348 list->directive = _cpp_check_linemarker (pfile, cur_token,
1349 !(cur_token[-1].flags
1350 & PREV_WHITE));
1351 cur_token++;
1352 break;
1354 letter:
1355 case '_':
1356 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1357 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1358 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1359 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1360 case 'y': case 'z':
1361 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1362 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1363 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1364 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1365 case 'Y': case 'Z':
1366 cur--; /* Backup character. */
1367 INIT_TOKEN_NAME (list, cur_token);
1368 cur_token->type = CPP_NAME; /* Identifier, macro etc. */
1370 continue_name:
1371 buffer->cur = cur;
1372 parse_name (pfile, list, &cur_token->val.name);
1373 cur = buffer->cur;
1375 if (MIGHT_BE_DIRECTIVE ())
1376 list->directive = _cpp_check_directive (pfile, cur_token,
1377 !(list->tokens[0].flags
1378 & PREV_WHITE));
1379 cur_token++;
1380 break;
1382 case '\'':
1383 /* Character constants are not recognized when processing Fortran,
1384 or if -traditional. */
1385 if (CPP_OPTION (pfile, lang_fortran) || CPP_TRADITIONAL (pfile))
1386 goto other;
1388 /* Fall through. */
1389 case '\"':
1390 /* Traditionally, escaped strings are not strings. */
1391 if (CPP_TRADITIONAL (pfile) && IMMED_TOKEN ()
1392 && PREV_TOKEN_TYPE == CPP_BACKSLASH)
1393 goto other;
1395 cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
1396 /* Do we have a wide string? */
1397 if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
1398 && cur_token[-1].val.name.len == 1
1399 && cur_token[-1].val.name.text[0] == 'L'
1400 && !CPP_TRADITIONAL (pfile))
1402 /* No need for 'L' any more. */
1403 list->name_used--;
1404 (--cur_token)->type = (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
1407 do_parse_string:
1408 /* Here c is one of ' " or >. */
1409 INIT_TOKEN_NAME (list, cur_token);
1410 buffer->cur = cur;
1411 parse_string (pfile, list, cur_token, c);
1412 cur = buffer->cur;
1413 cur_token++;
1414 break;
1416 case '/':
1417 cur_token->type = CPP_DIV;
1418 if (IMMED_TOKEN ())
1420 if (PREV_TOKEN_TYPE == CPP_DIV)
1422 /* We silently allow C++ comments in system headers,
1423 irrespective of conformance mode, because lots of
1424 broken systems do that and trying to clean it up
1425 in fixincludes is a nightmare. */
1426 if (CPP_IN_SYSTEM_HEADER (pfile))
1427 goto do_line_comment;
1428 else if (CPP_OPTION (pfile, cplusplus_comments))
1430 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1431 && ! buffer->warned_cplusplus_comments)
1433 buffer->cur = cur;
1434 cpp_pedwarn (pfile,
1435 "C++ style comments are not allowed in ISO C89");
1436 cpp_pedwarn (pfile,
1437 "(this will be reported only once per input file)");
1438 buffer->warned_cplusplus_comments = 1;
1440 do_line_comment:
1441 buffer->cur = cur;
1442 #if 0 /* Leave until new lexer in place. */
1443 if (cur[-2] != c)
1444 cpp_warning (pfile,
1445 "comment start split across lines");
1446 #endif
1447 if (skip_line_comment (pfile))
1448 cpp_warning (pfile, "multi-line comment");
1450 /* Back-up to first '-' or '/'. */
1451 cur_token--;
1452 if (!CPP_OPTION (pfile, discard_comments)
1453 && (!IS_DIRECTIVE()
1454 || (list->directive->flags & COMMENTS)))
1455 save_comment (list, cur_token++, cur,
1456 buffer->cur - cur, c);
1457 else if (!CPP_OPTION (pfile, traditional))
1458 flags = PREV_WHITE;
1460 cur = buffer->cur;
1461 break;
1465 cur_token++;
1466 break;
1468 case '*':
1469 cur_token->type = CPP_MULT;
1470 if (IMMED_TOKEN ())
1472 if (PREV_TOKEN_TYPE == CPP_DIV)
1474 buffer->cur = cur;
1475 #if 0 /* Leave until new lexer in place. */
1476 if (cur[-2] != '/')
1477 cpp_warning (pfile,
1478 "comment start '/*' split across lines");
1479 #endif
1480 if (skip_block_comment (pfile))
1481 cpp_error_with_line (pfile, list->line, cur_token[-1].col,
1482 "unterminated comment");
1483 #if 0 /* Leave until new lexer in place. */
1484 else if (buffer->cur[-2] != '*')
1485 cpp_warning (pfile,
1486 "comment end '*/' split across lines");
1487 #endif
1488 /* Back up to opening '/'. */
1489 cur_token--;
1490 if (!CPP_OPTION (pfile, discard_comments)
1491 && (!IS_DIRECTIVE()
1492 || (list->directive->flags & COMMENTS)))
1493 save_comment (list, cur_token++, cur,
1494 buffer->cur - cur, c);
1495 else if (!CPP_OPTION (pfile, traditional))
1496 flags = PREV_WHITE;
1498 cur = buffer->cur;
1499 break;
1501 else if (CPP_OPTION (pfile, cplusplus))
1503 /* In C++, there are .* and ->* operators. */
1504 if (PREV_TOKEN_TYPE == CPP_DEREF)
1505 BACKUP_TOKEN (CPP_DEREF_STAR);
1506 else if (PREV_TOKEN_TYPE == CPP_DOT)
1507 BACKUP_TOKEN (CPP_DOT_STAR);
1510 cur_token++;
1511 break;
1513 case '\n':
1514 case '\r':
1515 handle_newline (cur, buffer->rlimit, c);
1516 if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
1518 if (IMMED_TOKEN ())
1520 /* Remove the escaped newline. Then continue to process
1521 any interrupted name or number. */
1522 cur_token--;
1523 /* Backslash-newline may not be immediately followed by
1524 EOF (C99 5.1.1.2). */
1525 if (cur >= buffer->rlimit)
1527 cpp_pedwarn (pfile, "backslash-newline at end of file");
1528 break;
1530 if (IMMED_TOKEN ())
1532 cur_token--;
1533 if (cur_token->type == CPP_NAME)
1534 goto continue_name;
1535 else if (cur_token->type == CPP_NUMBER)
1536 goto continue_number;
1537 cur_token++;
1539 /* Remember whitespace setting. */
1540 flags = cur_token->flags;
1541 break;
1543 else
1545 buffer->cur = cur;
1546 cpp_warning (pfile,
1547 "backslash and newline separated by space");
1550 else if (MIGHT_BE_DIRECTIVE ())
1552 /* "Null directive." C99 6.10.7: A preprocessing
1553 directive of the form # <new-line> has no effect.
1555 But it is still a directive, and therefore disappears
1556 from the output. */
1557 cur_token--;
1558 if (cur_token->flags & PREV_WHITE)
1560 if (CPP_WTRADITIONAL (pfile))
1561 cpp_warning (pfile,
1562 "K+R C ignores #\\n with the # indented");
1563 if (CPP_TRADITIONAL (pfile))
1564 cur_token++;
1568 /* Skip vertical space until we have at least one token to
1569 return. */
1570 if (cur_token != &list->tokens[first_token])
1571 goto out;
1572 list->line = CPP_BUF_LINE (buffer);
1573 break;
1575 case '-':
1576 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
1578 if (CPP_OPTION (pfile, chill))
1579 goto do_line_comment;
1580 REVISE_TOKEN (CPP_MINUS_MINUS);
1582 else
1583 PUSH_TOKEN (CPP_MINUS);
1584 break;
1586 make_hash:
1587 case '#':
1588 /* The digraph flag checking ensures that ## and %:%:
1589 are interpreted as CPP_PASTE, but #%: and %:# are not. */
1590 if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
1591 && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
1592 REVISE_TOKEN (CPP_PASTE);
1593 else
1594 PUSH_TOKEN (CPP_HASH);
1595 break;
1597 case ':':
1598 cur_token->type = CPP_COLON;
1599 if (IMMED_TOKEN ())
1601 if (PREV_TOKEN_TYPE == CPP_COLON
1602 && CPP_OPTION (pfile, cplusplus))
1603 BACKUP_TOKEN (CPP_SCOPE);
1604 /* Digraph: "<:" is a '[' */
1605 else if (PREV_TOKEN_TYPE == CPP_LESS)
1606 BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
1607 /* Digraph: "%:" is a '#' */
1608 else if (PREV_TOKEN_TYPE == CPP_MOD)
1610 (--cur_token)->flags |= DIGRAPH;
1611 goto make_hash;
1614 cur_token++;
1615 break;
1617 case '&':
1618 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
1619 REVISE_TOKEN (CPP_AND_AND);
1620 else
1621 PUSH_TOKEN (CPP_AND);
1622 break;
1624 make_or:
1625 case '|':
1626 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
1627 REVISE_TOKEN (CPP_OR_OR);
1628 else
1629 PUSH_TOKEN (CPP_OR);
1630 break;
1632 case '+':
1633 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
1634 REVISE_TOKEN (CPP_PLUS_PLUS);
1635 else
1636 PUSH_TOKEN (CPP_PLUS);
1637 break;
1639 case '=':
1640 /* This relies on equidistance of "?=" and "?" tokens. */
1641 if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
1642 REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
1643 else
1644 PUSH_TOKEN (CPP_EQ);
1645 break;
1647 case '>':
1648 cur_token->type = CPP_GREATER;
1649 if (IMMED_TOKEN ())
1651 if (PREV_TOKEN_TYPE == CPP_GREATER)
1652 BACKUP_TOKEN (CPP_RSHIFT);
1653 else if (PREV_TOKEN_TYPE == CPP_MINUS)
1654 BACKUP_TOKEN (CPP_DEREF);
1655 /* Digraph: ":>" is a ']' */
1656 else if (PREV_TOKEN_TYPE == CPP_COLON)
1657 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
1658 /* Digraph: "%>" is a '}' */
1659 else if (PREV_TOKEN_TYPE == CPP_MOD)
1660 BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
1662 cur_token++;
1663 break;
1665 case '<':
1666 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
1668 REVISE_TOKEN (CPP_LSHIFT);
1669 break;
1671 /* Is this the beginning of a header name? */
1672 if (IS_DIRECTIVE () && (list->directive->flags & INCL))
1674 c = '>'; /* Terminator. */
1675 cur_token->type = CPP_HEADER_NAME;
1676 goto do_parse_string;
1678 PUSH_TOKEN (CPP_LESS);
1679 break;
1681 case '%':
1682 /* Digraph: "<%" is a '{' */
1683 cur_token->type = CPP_MOD;
1684 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
1685 BACKUP_DIGRAPH (CPP_OPEN_BRACE);
1686 cur_token++;
1687 break;
1689 case '?':
1690 if (cur + 1 < buffer->rlimit && *cur == '?'
1691 && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
1693 /* Handle trigraph. */
1694 cur++;
1695 switch (*cur++)
1697 case '(': goto make_open_square;
1698 case ')': goto make_close_square;
1699 case '<': goto make_open_brace;
1700 case '>': goto make_close_brace;
1701 case '=': goto make_hash;
1702 case '!': goto make_or;
1703 case '-': goto make_complement;
1704 case '/': goto make_backslash;
1705 case '\'': goto make_xor;
1708 if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
1710 /* GNU C++ defines <? and >? operators. */
1711 if (PREV_TOKEN_TYPE == CPP_LESS)
1713 REVISE_TOKEN (CPP_MIN);
1714 break;
1716 else if (PREV_TOKEN_TYPE == CPP_GREATER)
1718 REVISE_TOKEN (CPP_MAX);
1719 break;
1722 PUSH_TOKEN (CPP_QUERY);
1723 break;
1725 case '.':
1726 if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
1727 && IMMED_TOKEN ()
1728 && !(cur_token[-1].flags & PREV_WHITE))
1730 cur_token -= 2;
1731 PUSH_TOKEN (CPP_ELLIPSIS);
1733 else
1734 PUSH_TOKEN (CPP_DOT);
1735 break;
1737 make_complement:
1738 case '~': PUSH_TOKEN (CPP_COMPL); break;
1739 make_xor:
1740 case '^': PUSH_TOKEN (CPP_XOR); break;
1741 make_open_brace:
1742 case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
1743 make_close_brace:
1744 case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
1745 make_open_square:
1746 case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
1747 make_close_square:
1748 case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
1749 make_backslash:
1750 case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
1751 case '!': PUSH_TOKEN (CPP_NOT); break;
1752 case ',': PUSH_TOKEN (CPP_COMMA); break;
1753 case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
1754 case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
1755 case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
1757 case '$':
1758 if (CPP_OPTION (pfile, dollars_in_ident))
1759 goto letter;
1760 /* Fall through */
1761 other:
1762 default:
1763 cur_token->val.aux = c;
1764 PUSH_TOKEN (CPP_OTHER);
1765 break;
1769 /* Run out of token space? */
1770 if (cur_token == token_limit)
1772 list->tokens_used = cur_token - list->tokens;
1773 _cpp_expand_token_space (list, 256);
1774 goto expanded;
1777 cur_token->flags = flags;
1778 if (cur_token == &list->tokens[first_token] && pfile->done_initializing)
1780 if (cur > buffer->buf && !IS_NEWLINE (cur[-1]))
1781 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
1782 CPP_BUF_COLUMN (buffer, cur),
1783 "no newline at end of file");
1784 cur_token++->type = CPP_EOF;
1787 out:
1788 /* All tokens are allocated, so the memory location is fixed. */
1789 first = &list->tokens[first_token];
1791 /* Don't complain about the null directive, nor directives in
1792 assembly source: we don't know where the comments are, and # may
1793 introduce assembler pseudo-ops. Don't complain about invalid
1794 directives in skipped conditional groups (6.10 p4). */
1795 if (first->type == CPP_HASH && list->directive == 0 && !pfile->skipping
1796 && cur_token > first + 1 && !CPP_OPTION (pfile, lang_asm))
1798 if (first[1].type == CPP_NAME)
1799 cpp_error (pfile, "invalid preprocessing directive #%.*s",
1800 (int) first[1].val.name.len, first[1].val.name.text);
1801 else
1802 cpp_error (pfile, "invalid preprocessing directive");
1805 /* Put EOF at end of directives. This covers "directives do not
1806 extend beyond the end of the line (description 6.10 part 2)". */
1807 if (IS_DIRECTIVE () || !pfile->done_initializing)
1809 pfile->first_directive_token = first;
1810 cur_token++->type = CPP_EOF;
1813 if (first_token == 0 || IS_DIRECTIVE ())
1814 /* Set beginning of line flag. */
1815 first->flags |= BOL;
1816 else
1817 /* 6.10.3.10: Within the sequence of preprocessing tokens making
1818 up the invocation of a function-like macro, new line is
1819 considered a normal white-space character. */
1820 first->flags |= PREV_WHITE;
1822 buffer->cur = cur;
1823 list->tokens_used = cur_token - list->tokens;
1824 pfile->in_lex_line = 0;
1827 /* Write the spelling of a token TOKEN, with any appropriate
1828 whitespace before it, to the token_buffer. PREV is the previous
1829 token, which is used to determine if we need to shove in an extra
1830 space in order to avoid accidental token paste. */
1831 static void
1832 output_token (pfile, token, prev)
1833 cpp_reader *pfile;
1834 const cpp_token *token, *prev;
1836 int dummy;
1838 if (token->col && (token->flags & BOL))
1840 /* Supply enough whitespace to put this token in its original
1841 column. Don't bother trying to reconstruct tabs; we can't
1842 get it right in general, and nothing ought to care. (Yes,
1843 some things do care; the fault lies with them.) */
1844 unsigned char *buffer;
1845 unsigned int spaces = token->col - 1;
1847 CPP_RESERVE (pfile, token->col);
1848 buffer = pfile->limit;
1850 while (spaces--)
1851 *buffer++ = ' ';
1852 pfile->limit = buffer;
1854 else if (token->flags & PREV_WHITE)
1855 CPP_PUTC (pfile, ' ');
1856 /* Check for and prevent accidental token pasting, in ANSI mode. */
1858 else if (!CPP_TRADITIONAL (pfile) && prev)
1860 if (can_paste (pfile, prev, token, &dummy) != CPP_EOF)
1861 CPP_PUTC (pfile, ' ');
1862 /* can_paste catches most of the accidental paste cases, but not all.
1863 Consider a + ++b - if there is not a space between the + and ++, it
1864 will be misparsed as a++ + b. */
1865 else if ((prev->type == CPP_PLUS && token->type == CPP_PLUS_PLUS)
1866 || (prev->type == CPP_MINUS && token->type == CPP_MINUS_MINUS))
1867 CPP_PUTC (pfile, ' ');
1870 CPP_RESERVE (pfile, TOKEN_LEN (token));
1871 pfile->limit = spell_token (pfile, token, pfile->limit);
1874 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1875 already contain the enough space to hold the token's spelling.
1876 Returns a pointer to the character after the last character
1877 written. */
1879 static unsigned char *
1880 spell_token (pfile, token, buffer)
1881 cpp_reader *pfile; /* Would be nice to be rid of this... */
1882 const cpp_token *token;
1883 unsigned char *buffer;
1885 switch (token_spellings[token->type].type)
1887 case SPELL_OPERATOR:
1889 const unsigned char *spelling;
1890 unsigned char c;
1892 if (token->flags & DIGRAPH)
1893 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1894 else
1895 spelling = token_spellings[token->type].spelling;
1897 while ((c = *spelling++) != '\0')
1898 *buffer++ = c;
1900 break;
1902 case SPELL_IDENT:
1903 memcpy (buffer, token->val.name.text, token->val.name.len);
1904 buffer += token->val.name.len;
1905 break;
1907 case SPELL_STRING:
1909 unsigned char c;
1911 if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
1912 *buffer++ = 'L';
1913 c = '\'';
1914 if (token->type == CPP_STRING || token->type == CPP_WSTRING)
1915 c = '"';
1916 *buffer++ = c;
1917 memcpy (buffer, token->val.name.text, token->val.name.len);
1918 buffer += token->val.name.len;
1919 *buffer++ = c;
1921 break;
1923 case SPELL_CHAR:
1924 *buffer++ = token->val.aux;
1925 break;
1927 case SPELL_NONE:
1928 cpp_ice (pfile, "Unspellable token %s", token_names[token->type]);
1929 break;
1932 return buffer;
1935 /* Return the spelling of a token known to be an operator.
1936 Does not distinguish digraphs from their counterparts. */
1937 const unsigned char *
1938 _cpp_spell_operator (type)
1939 enum cpp_ttype type;
1941 if (token_spellings[type].type == SPELL_OPERATOR)
1942 return token_spellings[type].spelling;
1943 else
1944 return token_names[type];
1948 /* Macro expansion algorithm. TODO. */
1950 static const cpp_token placemarker_token = {0, 0, CPP_PLACEMARKER, 0 UNION_INIT_ZERO};
1951 static const cpp_token eof_token = {0, 0, CPP_EOF, 0 UNION_INIT_ZERO};
1953 #define IS_ARG_CONTEXT(c) ((c)->flags & CONTEXT_ARG)
1954 #define CURRENT_CONTEXT(pfile) ((pfile)->contexts + (pfile)->cur_context)
1956 /* Flags for cpp_context. */
1957 #define CONTEXT_PASTEL (1 << 0) /* An argument context on LHS of ##. */
1958 #define CONTEXT_PASTER (1 << 1) /* An argument context on RHS of ##. */
1959 #define CONTEXT_RAW (1 << 2) /* If argument tokens already expanded. */
1960 #define CONTEXT_ARG (1 << 3) /* If an argument context. */
1962 #define ASSIGN_FLAGS_AND_POS(d, s) \
1963 do {(d)->flags = (s)->flags & (PREV_WHITE | BOL | PASTE_LEFT); \
1964 if ((d)->flags & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
1965 } while (0)
1967 /* f is flags, just consisting of PREV_WHITE | BOL. */
1968 #define MODIFY_FLAGS_AND_POS(d, s, f) \
1969 do {(d)->flags &= ~(PREV_WHITE | BOL); (d)->flags |= (f); \
1970 if ((f) & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
1971 } while (0)
1973 typedef struct cpp_context cpp_context;
1974 struct cpp_context
1976 union
1978 const cpp_toklist *list; /* Used for macro contexts only. */
1979 const cpp_token **arg; /* Used for arg contexts only. */
1980 } u;
1982 /* Pushed token to be returned by next call to cpp_get_token. */
1983 const cpp_token *pushed_token;
1985 struct macro_args *args; /* 0 for arguments and object-like macros. */
1986 unsigned short posn; /* Current posn, index into u. */
1987 unsigned short count; /* No. of tokens in u. */
1988 unsigned short level;
1989 unsigned char flags;
1992 typedef struct macro_args macro_args;
1993 struct macro_args
1995 unsigned int *ends;
1996 const cpp_token **tokens;
1997 unsigned int capacity;
1998 unsigned int used;
1999 unsigned short level;
2002 static const cpp_token *get_raw_token PARAMS ((cpp_reader *));
2003 static const cpp_token *parse_arg PARAMS ((cpp_reader *, int, unsigned int,
2004 macro_args *, unsigned int *));
2005 static int parse_args PARAMS ((cpp_reader *, cpp_hashnode *, macro_args *));
2006 static void save_token PARAMS ((macro_args *, const cpp_token *));
2007 static const cpp_token *push_arg_context PARAMS ((cpp_reader *,
2008 const cpp_token *));
2009 static int do_pop_context PARAMS ((cpp_reader *));
2010 static const cpp_token *pop_context PARAMS ((cpp_reader *));
2011 static const cpp_token *push_macro_context PARAMS ((cpp_reader *,
2012 cpp_hashnode *,
2013 const cpp_token *));
2014 static void free_macro_args PARAMS ((macro_args *));
2016 /* Free the storage allocated for macro arguments. */
2017 static void
2018 free_macro_args (args)
2019 macro_args *args;
2021 if (args->tokens)
2022 free (args->tokens);
2023 free (args->ends);
2024 free (args);
2027 /* Determines if a macro has been already used (and is therefore
2028 disabled). */
2029 static int
2030 is_macro_disabled (pfile, expansion, token)
2031 cpp_reader *pfile;
2032 const cpp_toklist *expansion;
2033 const cpp_token *token;
2035 cpp_context *context = CURRENT_CONTEXT (pfile);
2037 /* Don't expand anything if this file has already been preprocessed. */
2038 if (CPP_OPTION (pfile, preprocessed))
2039 return 1;
2041 /* Arguments on either side of ## are inserted in place without
2042 macro expansion (6.10.3.3.2). Conceptually, any macro expansion
2043 occurs during a later rescan pass. The effect is that we expand
2044 iff we would as part of the macro's expansion list, so we should
2045 drop to the macro's context. */
2046 if (IS_ARG_CONTEXT (context))
2048 if (token->flags & PASTED)
2049 context--;
2050 else if (!(context->flags & CONTEXT_RAW))
2051 return 1;
2052 else if (context->flags & (CONTEXT_PASTEL | CONTEXT_PASTER))
2053 context--;
2056 /* Have we already used this macro? */
2057 while (context->level > 0)
2059 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2060 return 1;
2061 /* Raw argument tokens are judged based on the token list they
2062 came from. */
2063 if (context->flags & CONTEXT_RAW)
2064 context = pfile->contexts + context->level;
2065 else
2066 context--;
2069 /* Function-like macros may be disabled if the '(' is not in the
2070 current context. We check this without disrupting the context
2071 stack. */
2072 if (expansion->paramc >= 0)
2074 const cpp_token *next;
2075 unsigned int prev_nme;
2077 context = CURRENT_CONTEXT (pfile);
2078 /* Drop down any contexts we're at the end of: the '(' may
2079 appear in lower macro expansions, or in the rest of the file. */
2080 while (context->posn == context->count && context > pfile->contexts)
2082 context--;
2083 /* If we matched, we are disabled, as we appear in the
2084 expansion of each macro we meet. */
2085 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2086 return 1;
2089 prev_nme = pfile->no_expand_level;
2090 pfile->no_expand_level = context - pfile->contexts;
2091 next = cpp_get_token (pfile);
2092 restore_macro_expansion (pfile, prev_nme);
2093 if (next->type != CPP_OPEN_PAREN)
2095 _cpp_push_token (pfile, next);
2096 if (CPP_OPTION (pfile, warn_traditional))
2097 cpp_warning (pfile,
2098 "function macro %.*s must be used with arguments in traditional C",
2099 (int) token->val.name.len, token->val.name.text);
2100 return 1;
2104 return 0;
2107 /* Add a token to the set of tokens forming the arguments to the macro
2108 being parsed in parse_args. */
2109 static void
2110 save_token (args, token)
2111 macro_args *args;
2112 const cpp_token *token;
2114 if (args->used == args->capacity)
2116 args->capacity += args->capacity + 100;
2117 args->tokens = (const cpp_token **)
2118 xrealloc (args->tokens, args->capacity * sizeof (const cpp_token *));
2120 args->tokens[args->used++] = token;
2123 /* Take and save raw tokens until we finish one argument. Empty
2124 arguments are saved as a single CPP_PLACEMARKER token. */
2125 static const cpp_token *
2126 parse_arg (pfile, var_args, paren_context, args, pcount)
2127 cpp_reader *pfile;
2128 int var_args;
2129 unsigned int paren_context;
2130 macro_args *args;
2131 unsigned int *pcount;
2133 const cpp_token *token;
2134 unsigned int paren = 0, count = 0;
2135 int raw, was_raw = 1;
2137 for (count = 0;; count++)
2139 token = cpp_get_token (pfile);
2141 switch (token->type)
2143 default:
2144 break;
2146 case CPP_OPEN_PAREN:
2147 paren++;
2148 break;
2150 case CPP_CLOSE_PAREN:
2151 if (paren-- != 0)
2152 break;
2153 goto out;
2155 case CPP_COMMA:
2156 /* Commas are not terminators within parantheses or var_args. */
2157 if (paren || var_args)
2158 break;
2159 goto out;
2161 case CPP_EOF: /* Error reported by caller. */
2162 goto out;
2165 raw = pfile->cur_context <= paren_context;
2166 if (raw != was_raw)
2168 was_raw = raw;
2169 save_token (args, 0);
2170 count++;
2172 save_token (args, token);
2175 out:
2176 if (count == 0)
2178 /* Duplicate the placemarker. Then we can set its flags and
2179 position and safely be using more than one. */
2180 save_token (args, duplicate_token (pfile, &placemarker_token));
2181 count++;
2184 *pcount = count;
2185 return token;
2188 /* This macro returns true if the argument starting at offset O of arglist
2189 A is empty - that is, it's either a single PLACEMARKER token, or a null
2190 pointer followed by a PLACEMARKER. */
2192 #define empty_argument(A, O) \
2193 ((A)->tokens[O] ? (A)->tokens[O]->type == CPP_PLACEMARKER \
2194 : (A)->tokens[(O)+1]->type == CPP_PLACEMARKER)
2196 /* Parse the arguments making up a macro invocation. Nested arguments
2197 are automatically macro expanded, but immediate macros are not
2198 expanded; this enables e.g. operator # to work correctly. Returns
2199 non-zero on error. */
2200 static int
2201 parse_args (pfile, hp, args)
2202 cpp_reader *pfile;
2203 cpp_hashnode *hp;
2204 macro_args *args;
2206 const cpp_token *token;
2207 const cpp_toklist *macro;
2208 unsigned int total = 0;
2209 unsigned int paren_context = pfile->cur_context;
2210 int argc = 0;
2212 macro = hp->value.expansion;
2215 unsigned int count;
2217 token = parse_arg (pfile, (argc + 1 == macro->paramc
2218 && (macro->flags & VAR_ARGS)),
2219 paren_context, args, &count);
2220 if (argc < macro->paramc)
2222 total += count;
2223 args->ends[argc] = total;
2225 argc++;
2227 while (token->type != CPP_CLOSE_PAREN && token->type != CPP_EOF);
2229 if (token->type == CPP_EOF)
2231 cpp_error (pfile, "unterminated invocation of macro \"%.*s\"",
2232 hp->length, hp->name);
2233 return 1;
2235 else if (argc < macro->paramc)
2237 /* A rest argument is allowed to not appear in the invocation at all.
2238 e.g. #define debug(format, args...) ...
2239 debug("string");
2240 This is exactly the same as if the rest argument had received no
2241 tokens - debug("string",); This extension is deprecated. */
2243 if (argc + 1 == macro->paramc && (macro->flags & GNU_REST_ARGS))
2245 /* Duplicate the placemarker. Then we can set its flags and
2246 position and safely be using more than one. */
2247 save_token (args, duplicate_token (pfile, &placemarker_token));
2248 args->ends[argc] = total + 1;
2249 return 0;
2251 else
2253 cpp_error (pfile,
2254 "insufficient arguments in invocation of macro \"%.*s\"",
2255 hp->length, hp->name);
2256 return 1;
2259 /* An empty argument to an empty function-like macro is fine. */
2260 else if (argc > macro->paramc
2261 && !(macro->paramc == 0 && argc == 1 && empty_argument (args, 0)))
2263 cpp_error (pfile,
2264 "too many arguments in invocation of macro \"%.*s\"",
2265 hp->length, hp->name);
2266 return 1;
2269 return 0;
2272 /* Adds backslashes before all backslashes and double quotes appearing
2273 in strings. Non-printable characters are converted to octal. */
2274 static U_CHAR *
2275 quote_string (dest, src, len)
2276 U_CHAR *dest;
2277 const U_CHAR *src;
2278 unsigned int len;
2280 while (len--)
2282 U_CHAR c = *src++;
2284 if (c == '\\' || c == '"')
2286 *dest++ = '\\';
2287 *dest++ = c;
2289 else
2291 if (ISPRINT (c))
2292 *dest++ = c;
2293 else
2295 sprintf ((char *) dest, "\\%03o", c);
2296 dest += 4;
2301 return dest;
2304 /* Allocates a buffer to hold a token's TEXT, and converts TOKEN to a
2305 CPP_STRING token containing TEXT in quoted form. */
2306 static cpp_token *
2307 make_string_token (token, text, len)
2308 cpp_token *token;
2309 const U_CHAR *text;
2310 unsigned int len;
2312 U_CHAR *buf;
2314 buf = (U_CHAR *) xmalloc (len * 4);
2315 token->type = CPP_STRING;
2316 token->flags = 0;
2317 token->val.name.text = buf;
2318 token->val.name.len = quote_string (buf, text, len) - buf;
2319 return token;
2322 /* Allocates and converts a temporary token to a CPP_NUMBER token,
2323 evaluating to NUMBER. */
2324 static cpp_token *
2325 alloc_number_token (pfile, number)
2326 cpp_reader *pfile;
2327 int number;
2329 cpp_token *result;
2330 char *buf;
2332 result = get_temp_token (pfile);
2333 buf = xmalloc (20);
2334 sprintf (buf, "%d", number);
2336 result->type = CPP_NUMBER;
2337 result->flags = 0;
2338 result->val.name.text = (U_CHAR *) buf;
2339 result->val.name.len = strlen (buf);
2340 return result;
2343 /* Returns a temporary token from the temporary token store of PFILE. */
2344 static cpp_token *
2345 get_temp_token (pfile)
2346 cpp_reader *pfile;
2348 if (pfile->temp_used == pfile->temp_alloced)
2350 if (pfile->temp_used == pfile->temp_cap)
2352 pfile->temp_cap += pfile->temp_cap + 20;
2353 pfile->temp_tokens = (cpp_token **) xrealloc
2354 (pfile->temp_tokens, pfile->temp_cap * sizeof (cpp_token *));
2356 pfile->temp_tokens[pfile->temp_alloced++] = (cpp_token *) xmalloc
2357 (sizeof (cpp_token));
2360 return pfile->temp_tokens[pfile->temp_used++];
2363 /* Release (not free) for re-use the temporary tokens of PFILE. */
2364 static void
2365 release_temp_tokens (pfile)
2366 cpp_reader *pfile;
2368 while (pfile->temp_used)
2370 cpp_token *token = pfile->temp_tokens[--pfile->temp_used];
2372 if (token_spellings[token->type].type > SPELL_NONE)
2374 free ((char *) token->val.name.text);
2375 token->val.name.text = 0;
2380 /* Free all of PFILE's dynamically-allocated temporary tokens. */
2381 void
2382 _cpp_free_temp_tokens (pfile)
2383 cpp_reader *pfile;
2385 if (pfile->temp_tokens)
2387 /* It is possible, though unlikely (looking for '(' of a funlike
2388 macro into EOF), that we haven't released the tokens yet. */
2389 release_temp_tokens (pfile);
2390 while (pfile->temp_alloced)
2391 free (pfile->temp_tokens[--pfile->temp_alloced]);
2392 free (pfile->temp_tokens);
2395 if (pfile->date)
2397 free ((char *) pfile->date->val.name.text);
2398 free (pfile->date);
2399 free ((char *) pfile->time->val.name.text);
2400 free (pfile->time);
2404 /* Copy TOKEN into a temporary token from PFILE's store. */
2405 static cpp_token *
2406 duplicate_token (pfile, token)
2407 cpp_reader *pfile;
2408 const cpp_token *token;
2410 cpp_token *result = get_temp_token (pfile);
2412 *result = *token;
2413 if (token_spellings[token->type].type > SPELL_NONE)
2415 U_CHAR *buff = (U_CHAR *) xmalloc (token->val.name.len);
2416 memcpy (buff, token->val.name.text, token->val.name.len);
2417 result->val.name.text = buff;
2419 return result;
2422 /* Determine whether two tokens can be pasted together, and if so,
2423 what the resulting token is. Returns CPP_EOF if the tokens cannot
2424 be pasted, or the appropriate type for the merged token if they
2425 can. */
2426 static enum cpp_ttype
2427 can_paste (pfile, token1, token2, digraph)
2428 cpp_reader * pfile;
2429 const cpp_token *token1, *token2;
2430 int* digraph;
2432 enum cpp_ttype a = token1->type, b = token2->type;
2433 int cxx = CPP_OPTION (pfile, cplusplus);
2435 if (a <= CPP_LAST_EQ && b == CPP_EQ)
2436 return a + (CPP_EQ_EQ - CPP_EQ);
2438 switch (a)
2440 case CPP_GREATER:
2441 if (b == a) return CPP_RSHIFT;
2442 if (b == CPP_QUERY && cxx) return CPP_MAX;
2443 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
2444 break;
2445 case CPP_LESS:
2446 if (b == a) return CPP_LSHIFT;
2447 if (b == CPP_QUERY && cxx) return CPP_MIN;
2448 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
2449 if (b == CPP_COLON)
2450 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
2451 if (b == CPP_MOD)
2452 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
2453 break;
2455 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
2456 case CPP_AND: if (b == a) return CPP_AND_AND; break;
2457 case CPP_OR: if (b == a) return CPP_OR_OR; break;
2459 case CPP_MINUS:
2460 if (b == a) return CPP_MINUS_MINUS;
2461 if (b == CPP_GREATER) return CPP_DEREF;
2462 break;
2463 case CPP_COLON:
2464 if (b == a && cxx) return CPP_SCOPE;
2465 if (b == CPP_GREATER)
2466 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
2467 break;
2469 case CPP_MOD:
2470 if (b == CPP_GREATER)
2471 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
2472 if (b == CPP_COLON)
2473 {*digraph = 1; return CPP_HASH;} /* %: digraph */
2474 break;
2475 case CPP_DEREF:
2476 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
2477 break;
2478 case CPP_DOT:
2479 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
2480 if (b == CPP_NUMBER) return CPP_NUMBER;
2481 break;
2483 case CPP_HASH:
2484 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
2485 /* %:%: digraph */
2486 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
2487 break;
2489 case CPP_NAME:
2490 if (b == CPP_NAME) return CPP_NAME;
2491 if (b == CPP_NUMBER
2492 && is_numstart(token2->val.name.text[0])) return CPP_NAME;
2493 if (b == CPP_CHAR
2494 && token1->val.name.len == 1
2495 && token1->val.name.text[0] == 'L') return CPP_WCHAR;
2496 if (b == CPP_STRING
2497 && token1->val.name.len == 1
2498 && token1->val.name.text[0] == 'L') return CPP_WSTRING;
2499 break;
2501 case CPP_NUMBER:
2502 if (b == CPP_NUMBER) return CPP_NUMBER;
2503 if (b == CPP_NAME) return CPP_NUMBER;
2504 if (b == CPP_DOT) return CPP_NUMBER;
2505 /* Numbers cannot have length zero, so this is safe. */
2506 if ((b == CPP_PLUS || b == CPP_MINUS)
2507 && VALID_SIGN ('+', token1->val.name.text[token1->val.name.len - 1]))
2508 return CPP_NUMBER;
2509 break;
2511 default:
2512 break;
2515 return CPP_EOF;
2518 /* Check if TOKEN is to be ##-pasted with the token after it. */
2519 static const cpp_token *
2520 maybe_paste_with_next (pfile, token)
2521 cpp_reader *pfile;
2522 const cpp_token *token;
2524 cpp_token *pasted;
2525 const cpp_token *second;
2526 cpp_context *context = CURRENT_CONTEXT (pfile);
2528 /* Is this token on the LHS of ## ? */
2529 if (!((context->flags & CONTEXT_PASTEL) && context->posn == context->count)
2530 && !(token->flags & PASTE_LEFT))
2531 return token;
2533 /* Prevent recursion, and possibly pushing back more than one token. */
2534 if (pfile->paste_level)
2535 return token;
2537 /* Suppress macro expansion for next token, but don't conflict with
2538 the other method of suppression. If it is an argument, macro
2539 expansion within the argument will still occur. */
2540 pfile->paste_level = pfile->cur_context;
2541 second = cpp_get_token (pfile);
2542 pfile->paste_level = 0;
2544 /* Ignore placemarker argument tokens (cannot be from an empty macro
2545 since macros are not expanded). */
2546 if (token->type == CPP_PLACEMARKER)
2547 pasted = duplicate_token (pfile, second);
2548 else if (second->type == CPP_PLACEMARKER)
2550 cpp_context *mac_context = CURRENT_CONTEXT (pfile) - 1;
2551 /* GCC has special extended semantics for a ## b where b is a
2552 varargs parameter: a disappears if b consists of no tokens.
2553 This extension is deprecated. */
2554 if ((mac_context->u.list->flags & GNU_REST_ARGS)
2555 && (mac_context->u.list->tokens[mac_context->posn - 1].val.aux + 1
2556 == (unsigned) mac_context->u.list->paramc))
2558 cpp_warning (pfile, "deprecated GNU ## extension used");
2559 pasted = duplicate_token (pfile, second);
2561 else
2562 pasted = duplicate_token (pfile, token);
2564 else
2566 int digraph = 0;
2567 enum cpp_ttype type = can_paste (pfile, token, second, &digraph);
2569 if (type == CPP_EOF)
2571 if (CPP_OPTION (pfile, warn_paste))
2572 cpp_warning (pfile,
2573 "pasting would not give a valid preprocessing token");
2574 _cpp_push_token (pfile, second);
2575 return token;
2578 if (type == CPP_NAME || type == CPP_NUMBER)
2580 /* Join spellings. */
2581 U_CHAR *buff, *buff2;
2583 pasted = get_temp_token (pfile);
2584 buff = (U_CHAR *) xmalloc (TOKEN_LEN (token) + TOKEN_LEN (second));
2585 buff2 = spell_token (pfile, token, buff);
2586 buff2 = spell_token (pfile, second, buff2);
2588 pasted->val.name.text = buff;
2589 pasted->val.name.len = buff2 - buff;
2591 else if (type == CPP_WCHAR || type == CPP_WSTRING)
2592 pasted = duplicate_token (pfile, second);
2593 else
2595 pasted = get_temp_token (pfile);
2596 pasted->val.integer = 0;
2599 pasted->type = type;
2600 pasted->flags = digraph ? DIGRAPH: 0;
2603 /* The pasted token gets the whitespace flags and position of the
2604 first token, the PASTE_LEFT flag of the second token, plus the
2605 PASTED flag to indicate it is the result of a paste. However, we
2606 want to preserve the DIGRAPH flag. */
2607 pasted->flags &= ~(PREV_WHITE | BOL | PASTE_LEFT);
2608 pasted->flags |= ((token->flags & (PREV_WHITE | BOL))
2609 | (second->flags & PASTE_LEFT) | PASTED);
2610 pasted->col = token->col;
2611 pasted->line = token->line;
2613 return maybe_paste_with_next (pfile, pasted);
2616 /* Convert a token sequence to a single string token according to the
2617 rules of the ISO C #-operator. */
2618 #define INIT_SIZE 200
2619 static cpp_token *
2620 stringify_arg (pfile, token)
2621 cpp_reader *pfile;
2622 const cpp_token *token;
2624 cpp_token *result;
2625 unsigned char *main_buf;
2626 unsigned int prev_value, backslash_count = 0;
2627 unsigned int buf_used = 0, whitespace = 0, buf_cap = INIT_SIZE;
2629 prev_value = prevent_macro_expansion (pfile);
2630 main_buf = (unsigned char *) xmalloc (buf_cap);
2632 result = get_temp_token (pfile);
2633 ASSIGN_FLAGS_AND_POS (result, token);
2635 for (; (token = cpp_get_token (pfile))->type != CPP_EOF; )
2637 int escape;
2638 unsigned char *buf;
2639 unsigned int len = TOKEN_LEN (token);
2641 escape = (token->type == CPP_STRING || token->type == CPP_WSTRING
2642 || token->type == CPP_CHAR || token->type == CPP_WCHAR);
2643 if (escape)
2644 len *= 4 + 1;
2646 if (buf_used + len > buf_cap)
2648 buf_cap = buf_used + len + INIT_SIZE;
2649 main_buf = xrealloc (main_buf, buf_cap);
2652 if (whitespace && (token->flags & PREV_WHITE))
2653 main_buf[buf_used++] = ' ';
2655 if (escape)
2656 buf = (unsigned char *) xmalloc (len);
2657 else
2658 buf = main_buf + buf_used;
2660 len = spell_token (pfile, token, buf) - buf;
2661 if (escape)
2663 buf_used = quote_string (&main_buf[buf_used], buf, len) - main_buf;
2664 free (buf);
2666 else
2667 buf_used += len;
2669 whitespace = 1;
2670 if (token->type == CPP_BACKSLASH)
2671 backslash_count++;
2672 else
2673 backslash_count = 0;
2676 /* Ignore the final \ of invalid string literals. */
2677 if (backslash_count & 1)
2679 cpp_warning (pfile, "invalid string literal, ignoring final '\\'");
2680 buf_used--;
2683 result->type = CPP_STRING;
2684 result->val.name.text = main_buf;
2685 result->val.name.len = buf_used;
2686 restore_macro_expansion (pfile, prev_value);
2687 return result;
2690 /* Allocate more room on the context stack of PFILE. */
2691 static void
2692 expand_context_stack (pfile)
2693 cpp_reader *pfile;
2695 pfile->context_cap += pfile->context_cap + 20;
2696 pfile->contexts = (cpp_context *)
2697 xrealloc (pfile->contexts, pfile->context_cap * sizeof (cpp_context));
2700 /* Push the context of macro NODE onto the context stack. TOKEN is
2701 the CPP_NAME token invoking the macro. */
2702 static const cpp_token *
2703 push_macro_context (pfile, node, token)
2704 cpp_reader *pfile;
2705 cpp_hashnode *node;
2706 const cpp_token *token;
2708 unsigned char orig_flags;
2709 macro_args *args;
2710 cpp_context *context;
2712 if (pfile->cur_context > CPP_STACK_MAX)
2714 cpp_error (pfile, "infinite macro recursion invoking '%s'", node->name);
2715 return token;
2718 /* Token's flags may change when parsing args containing a nested
2719 invocation of this macro. */
2720 orig_flags = token->flags & (PREV_WHITE | BOL);
2721 args = 0;
2722 if (node->value.expansion->paramc >= 0)
2724 unsigned int error, prev_nme;
2726 /* Allocate room for the argument contexts, and parse them. */
2727 args = (macro_args *) xmalloc (sizeof (macro_args));
2728 args->ends = (unsigned int *)
2729 xmalloc (node->value.expansion->paramc * sizeof (unsigned int));
2730 args->tokens = 0;
2731 args->capacity = 0;
2732 args->used = 0;
2733 args->level = pfile->cur_context;
2735 prev_nme = prevent_macro_expansion (pfile);
2736 pfile->args = args;
2737 error = parse_args (pfile, node, args);
2738 pfile->args = 0;
2739 restore_macro_expansion (pfile, prev_nme);
2740 if (error)
2742 free_macro_args (args);
2743 return token;
2747 /* Now push its context. */
2748 pfile->cur_context++;
2749 if (pfile->cur_context == pfile->context_cap)
2750 expand_context_stack (pfile);
2752 context = CURRENT_CONTEXT (pfile);
2753 context->u.list = node->value.expansion;
2754 context->args = args;
2755 context->posn = 0;
2756 context->count = context->u.list->tokens_used;
2757 context->level = pfile->cur_context;
2758 context->flags = 0;
2759 context->pushed_token = 0;
2761 /* Set the flags of the first token. We know there must
2762 be one, empty macros are a single placemarker token. */
2763 MODIFY_FLAGS_AND_POS (&context->u.list->tokens[0], token, orig_flags);
2765 return cpp_get_token (pfile);
2768 /* Push an argument to the current macro onto the context stack.
2769 TOKEN is the MACRO_ARG token representing the argument expansion. */
2770 static const cpp_token *
2771 push_arg_context (pfile, token)
2772 cpp_reader *pfile;
2773 const cpp_token *token;
2775 cpp_context *context;
2776 macro_args *args;
2778 pfile->cur_context++;
2779 if (pfile->cur_context == pfile->context_cap)
2780 expand_context_stack (pfile);
2782 context = CURRENT_CONTEXT (pfile);
2783 args = context[-1].args;
2785 context->count = token->val.aux ? args->ends[token->val.aux - 1]: 0;
2786 context->u.arg = args->tokens + context->count;
2787 context->count = args->ends[token->val.aux] - context->count;
2788 context->args = 0;
2789 context->posn = 0;
2790 context->level = args->level;
2791 context->flags = CONTEXT_ARG | CONTEXT_RAW;
2792 context->pushed_token = 0;
2794 /* Set the flags of the first token. There is one. */
2796 const cpp_token *first = context->u.arg[0];
2797 if (!first)
2798 first = context->u.arg[1];
2800 MODIFY_FLAGS_AND_POS ((cpp_token *) first, token,
2801 token->flags & (PREV_WHITE | BOL));
2804 if (token->flags & STRINGIFY_ARG)
2805 return stringify_arg (pfile, token);
2807 if (token->flags & PASTE_LEFT)
2808 context->flags |= CONTEXT_PASTEL;
2809 if (pfile->paste_level)
2810 context->flags |= CONTEXT_PASTER;
2812 return get_raw_token (pfile);
2815 /* "Unget" a token. It is effectively inserted in the token queue and
2816 will be returned by the next call to get_raw_token. */
2817 void
2818 _cpp_push_token (pfile, token)
2819 cpp_reader *pfile;
2820 const cpp_token *token;
2822 cpp_context *context = CURRENT_CONTEXT (pfile);
2823 if (context->pushed_token)
2824 cpp_ice (pfile, "two tokens pushed in a row");
2825 if (token->type != CPP_EOF)
2826 context->pushed_token = token;
2827 /* Don't push back a directive's CPP_EOF, step back instead. */
2828 else if (pfile->cur_context == 0)
2829 pfile->contexts[0].posn--;
2832 /* Handle a preprocessing directive. TOKEN is the CPP_HASH token
2833 introducing the directive. */
2834 static void
2835 process_directive (pfile, token)
2836 cpp_reader *pfile;
2837 const cpp_token *token;
2839 const struct directive *d = pfile->token_list.directive;
2840 int prev_nme = 0;
2842 /* Skip over the directive name. */
2843 if (token[1].type == CPP_NAME)
2844 _cpp_get_raw_token (pfile);
2845 else if (token[1].type != CPP_NUMBER)
2846 cpp_ice (pfile, "directive begins with %s?!",
2847 token_names[token[1].type]);
2849 /* Flush pending tokens at this point, in case the directive produces
2850 output. XXX Directive output won't be visible to a direct caller of
2851 cpp_get_token. */
2852 if (pfile->printer && CPP_WRITTEN (pfile) - pfile->printer->written)
2853 cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
2855 if (! (d->flags & EXPAND))
2856 prev_nme = prevent_macro_expansion (pfile);
2857 (void) (*d->handler) (pfile);
2858 if (! (d->flags & EXPAND))
2859 restore_macro_expansion (pfile, prev_nme);
2860 _cpp_skip_rest_of_line (pfile);
2863 /* The external interface to return the next token. All macro
2864 expansion and directive processing is handled internally, the
2865 caller only ever sees the output after preprocessing. */
2866 const cpp_token *
2867 cpp_get_token (pfile)
2868 cpp_reader *pfile;
2870 const cpp_token *token;
2871 cpp_hashnode *node;
2873 /* Loop till we hit a non-directive, non-skipped, non-placemarker token. */
2874 for (;;)
2876 token = get_raw_token (pfile);
2877 if (token->flags & BOL && token->type == CPP_HASH
2878 && pfile->token_list.directive)
2880 process_directive (pfile, token);
2881 continue;
2884 /* Short circuit EOF. */
2885 if (token->type == CPP_EOF)
2886 return token;
2888 if (pfile->skipping && ! pfile->token_list.directive)
2890 _cpp_skip_rest_of_line (pfile);
2891 continue;
2893 break;
2896 /* If there's a potential control macro and we get here, then that
2897 #ifndef didn't cover the entire file and its argument shouldn't
2898 be taken as a control macro. */
2899 pfile->potential_control_macro = 0;
2901 token = maybe_paste_with_next (pfile, token);
2903 if (token->type != CPP_NAME)
2904 return token;
2906 /* Is macro expansion disabled in general? */
2907 if (pfile->no_expand_level == pfile->cur_context || pfile->paste_level)
2908 return token;
2910 node = cpp_lookup (pfile, token->val.name.text, token->val.name.len);
2911 if (node->type == T_VOID)
2912 return token;
2914 if (node->type == T_MACRO)
2916 if (is_macro_disabled (pfile, node->value.expansion, token))
2917 return token;
2919 return push_macro_context (pfile, node, token);
2921 else
2922 return special_symbol (pfile, node, token);
2925 /* Returns the next raw token, i.e. without performing macro
2926 expansion. Argument contexts are automatically entered. */
2927 static const cpp_token *
2928 get_raw_token (pfile)
2929 cpp_reader *pfile;
2931 const cpp_token *result;
2932 cpp_context *context = CURRENT_CONTEXT (pfile);
2934 if (context->pushed_token)
2936 result = context->pushed_token;
2937 context->pushed_token = 0;
2939 else if (context->posn == context->count)
2940 result = pop_context (pfile);
2941 else
2943 if (IS_ARG_CONTEXT (context))
2945 result = context->u.arg[context->posn++];
2946 if (result == 0)
2948 context->flags ^= CONTEXT_RAW;
2949 result = context->u.arg[context->posn++];
2951 return result; /* Cannot be a CPP_MACRO_ARG */
2953 result = &context->u.list->tokens[context->posn++];
2956 if (result->type == CPP_MACRO_ARG)
2957 result = push_arg_context (pfile, result);
2958 return result;
2961 /* Internal interface to get the token without macro expanding. */
2962 const cpp_token *
2963 _cpp_get_raw_token (pfile)
2964 cpp_reader *pfile;
2966 int prev_nme = prevent_macro_expansion (pfile);
2967 const cpp_token *result = cpp_get_token (pfile);
2968 restore_macro_expansion (pfile, prev_nme);
2969 return result;
2972 /* A thin wrapper to lex_line. CLEAR is non-zero if the current token
2973 list should be overwritten, or zero if we need to append
2974 (typically, if we are within the arguments to a macro, or looking
2975 for the '(' to start a function-like macro invocation). */
2976 static int
2977 lex_next (pfile, clear)
2978 cpp_reader *pfile;
2979 int clear;
2981 cpp_toklist *list = &pfile->token_list;
2982 const cpp_token *old_list = list->tokens;
2983 unsigned int old_used = list->tokens_used;
2985 if (clear)
2987 /* Release all temporary tokens. */
2988 _cpp_clear_toklist (list);
2989 pfile->contexts[0].posn = 0;
2990 if (pfile->temp_used)
2991 release_temp_tokens (pfile);
2993 else
2995 /* If we are currently processing a directive, do not advance.
2996 (6.10 paragraph 2: A new-line character ends the directive
2997 even if it occurs within what would otherwise be an
2998 invocation of a function-like macro.) */
2999 if (list->directive)
3000 return 1;
3003 lex_line (pfile, list);
3004 pfile->contexts[0].count = list->tokens_used;
3006 if (!clear && pfile->args)
3008 /* Fix up argument token pointers. */
3009 if (old_list != list->tokens)
3011 unsigned int i;
3013 for (i = 0; i < pfile->args->used; i++)
3015 const cpp_token *token = pfile->args->tokens[i];
3016 if (token >= old_list && token < old_list + old_used)
3017 pfile->args->tokens[i] = (const cpp_token *)
3018 ((char *) token + ((char *) list->tokens - (char *) old_list));
3022 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
3023 tokens within the list of arguments that would otherwise act as
3024 preprocessing directives, the behavior is undefined.
3026 This implementation will report a hard error and treat the
3027 'sequence of preprocessing tokens' as part of the macro argument,
3028 not a directive.
3030 Note if pfile->args == 0, we're OK since we're only inside a
3031 macro argument after a '('. */
3032 if (list->directive)
3034 cpp_error_with_line (pfile, list->tokens[old_used].line,
3035 list->tokens[old_used].col,
3036 "#%s may not be used inside a macro argument",
3037 list->directive->name);
3038 /* Don't treat as a directive: clear list->directive,
3039 prune the final EOF from the list. */
3040 list->directive = 0;
3041 list->tokens_used--;
3042 pfile->contexts[0].count--;
3046 return 0;
3049 /* Pops a context of the context stack. If we're at the bottom, lexes
3050 the next logical line. Returns 1 if we're at the end of the
3051 argument list to the # operator, or if it is illegal to "overflow"
3052 into the rest of the file (e.g. 6.10.3.1.1). */
3053 static int
3054 do_pop_context (pfile)
3055 cpp_reader *pfile;
3057 cpp_context *context;
3059 if (pfile->cur_context == 0)
3060 return lex_next (pfile, pfile->no_expand_level == UINT_MAX);
3062 /* Argument contexts, when parsing args or handling # operator
3063 return CPP_EOF at the end. */
3064 context = CURRENT_CONTEXT (pfile);
3065 if (IS_ARG_CONTEXT (context) && pfile->cur_context == pfile->no_expand_level)
3066 return 1;
3068 /* Free resources when leaving macro contexts. */
3069 if (context->args)
3070 free_macro_args (context->args);
3072 if (pfile->cur_context == pfile->no_expand_level)
3073 pfile->no_expand_level--;
3074 pfile->cur_context--;
3076 return 0;
3079 /* Move down the context stack, and return the next raw token. */
3080 static const cpp_token *
3081 pop_context (pfile)
3082 cpp_reader *pfile;
3084 if (do_pop_context (pfile))
3085 return &eof_token;
3086 return get_raw_token (pfile);
3089 /* Turn off macro expansion at the current context level. */
3090 static unsigned int
3091 prevent_macro_expansion (pfile)
3092 cpp_reader *pfile;
3094 unsigned int prev_value = pfile->no_expand_level;
3095 pfile->no_expand_level = pfile->cur_context;
3096 return prev_value;
3099 /* Restore macro expansion to its previous state. */
3100 static void
3101 restore_macro_expansion (pfile, prev_value)
3102 cpp_reader *pfile;
3103 unsigned int prev_value;
3105 pfile->no_expand_level = prev_value;
3108 /* Used by cpperror.c to obtain the correct line and column to report
3109 in a diagnostic. */
3110 unsigned int
3111 _cpp_get_line (pfile, pcol)
3112 cpp_reader *pfile;
3113 unsigned int *pcol;
3115 unsigned int index;
3116 const cpp_token *cur_token;
3118 if (pfile->in_lex_line)
3119 index = pfile->token_list.tokens_used;
3120 else
3121 index = pfile->contexts[0].posn;
3123 cur_token = &pfile->token_list.tokens[index - 1];
3124 if (pcol)
3125 *pcol = cur_token->col;
3126 return cur_token->line;
3129 #define DSC(str) (const U_CHAR *)str, sizeof str - 1
3130 static const char * const monthnames[] =
3132 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
3133 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
3136 /* Handle builtin macros like __FILE__. */
3137 static const cpp_token *
3138 special_symbol (pfile, node, token)
3139 cpp_reader *pfile;
3140 cpp_hashnode *node;
3141 const cpp_token *token;
3143 cpp_token *result;
3144 cpp_buffer *ip;
3146 switch (node->type)
3148 case T_FILE:
3149 case T_BASE_FILE:
3151 const char *file;
3153 ip = CPP_BUFFER (pfile);
3154 if (ip == 0)
3155 file = "";
3156 else
3158 if (node->type == T_BASE_FILE)
3159 while (CPP_PREV_BUFFER (ip) != NULL)
3160 ip = CPP_PREV_BUFFER (ip);
3162 file = ip->nominal_fname;
3164 result = make_string_token (get_temp_token (pfile), (U_CHAR *) file,
3165 strlen (file));
3167 break;
3169 case T_INCLUDE_LEVEL:
3171 int true_indepth = 0;
3173 /* Do not count the primary source file in the include level. */
3174 ip = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
3175 while (ip)
3177 true_indepth++;
3178 ip = CPP_PREV_BUFFER (ip);
3180 result = alloc_number_token (pfile, true_indepth);
3182 break;
3184 case T_SPECLINE:
3185 /* If __LINE__ is embedded in a macro, it must expand to the
3186 line of the macro's invocation, not its definition.
3187 Otherwise things like assert() will not work properly. */
3188 result = alloc_number_token (pfile, _cpp_get_line (pfile, NULL));
3189 break;
3191 case T_STDC:
3193 int stdc = 1;
3195 #ifdef STDC_0_IN_SYSTEM_HEADERS
3196 if (CPP_IN_SYSTEM_HEADER (pfile)
3197 && !cpp_defined (pfile, DSC("__STRICT_ANSI__")))
3198 stdc = 0;
3199 #endif
3200 result = alloc_number_token (pfile, stdc);
3202 break;
3204 case T_DATE:
3205 case T_TIME:
3206 if (pfile->date == 0)
3208 /* Allocate __DATE__ and __TIME__ from permanent storage,
3209 and save them in pfile so we don't have to do this again.
3210 We don't generate these strings at init time because
3211 time() and localtime() are very slow on some systems. */
3212 time_t tt = time (NULL);
3213 struct tm *tb = localtime (&tt);
3215 pfile->date = make_string_token
3216 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("Oct 11 1347"));
3217 pfile->time = make_string_token
3218 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("12:34:56"));
3220 sprintf ((char *) pfile->date->val.name.text, "%s %2d %4d",
3221 monthnames[tb->tm_mon], tb->tm_mday, tb->tm_year + 1900);
3222 sprintf ((char *) pfile->time->val.name.text, "%02d:%02d:%02d",
3223 tb->tm_hour, tb->tm_min, tb->tm_sec);
3225 result = node->type == T_DATE ? pfile->date: pfile->time;
3226 break;
3228 case T_POISON:
3229 cpp_error (pfile, "attempt to use poisoned \"%s\".", node->name);
3230 return token;
3232 default:
3233 cpp_ice (pfile, "invalid special hash type");
3234 return token;
3237 ASSIGN_FLAGS_AND_POS (result, token);
3238 return result;
3240 #undef DSC
3242 /* Dump the original user's spelling of argument index ARG_NO to the
3243 macro whose expansion is LIST. */
3244 static void
3245 dump_param_spelling (pfile, list, arg_no)
3246 cpp_reader *pfile;
3247 const cpp_toklist *list;
3248 unsigned int arg_no;
3250 const U_CHAR *param = list->namebuf;
3252 while (arg_no--)
3253 param += ustrlen (param) + 1;
3254 CPP_PUTS (pfile, param, ustrlen (param));
3257 /* Dump a token list to the output. */
3258 void
3259 _cpp_dump_list (pfile, list, token, flush)
3260 cpp_reader *pfile;
3261 const cpp_toklist *list;
3262 const cpp_token *token;
3263 int flush;
3265 const cpp_token *limit = list->tokens + list->tokens_used;
3266 const cpp_token *prev = 0;
3268 /* Avoid the CPP_EOF. */
3269 if (list->directive)
3270 limit--;
3272 while (token < limit)
3274 if (token->type == CPP_MACRO_ARG)
3276 if (token->flags & PREV_WHITE)
3277 CPP_PUTC (pfile, ' ');
3278 if (token->flags & STRINGIFY_ARG)
3279 CPP_PUTC (pfile, '#');
3280 dump_param_spelling (pfile, list, token->val.aux);
3282 else
3283 output_token (pfile, token, prev);
3284 if (token->flags & PASTE_LEFT)
3285 CPP_PUTS (pfile, " ##", 3);
3286 prev = token;
3287 token++;
3290 if (flush && pfile->printer)
3291 cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
3294 /* Allocate pfile->input_buffer, and initialize trigraph_map[]
3295 if it hasn't happened already. */
3297 void
3298 _cpp_init_input_buffer (pfile)
3299 cpp_reader *pfile;
3301 init_trigraph_map ();
3302 pfile->context_cap = 20;
3303 pfile->contexts = (cpp_context *)
3304 xmalloc (pfile->context_cap * sizeof (cpp_context));
3305 pfile->cur_context = 0;
3306 pfile->contexts[0].u.list = &pfile->token_list;
3308 pfile->contexts[0].posn = 0;
3309 pfile->contexts[0].count = 0;
3310 pfile->no_expand_level = UINT_MAX;
3312 _cpp_init_toklist (&pfile->token_list, DUMMY_TOKEN);
3315 /* Moves to the end of the directive line, popping contexts as
3316 necessary. */
3317 void
3318 _cpp_skip_rest_of_line (pfile)
3319 cpp_reader *pfile;
3321 /* Get to base context. Clear parsing args and each contexts flags,
3322 since these can cause pop_context to return without popping. */
3323 pfile->no_expand_level = UINT_MAX;
3324 while (pfile->cur_context != 0)
3326 pfile->contexts[pfile->cur_context].flags = 0;
3327 do_pop_context (pfile);
3330 pfile->contexts[pfile->cur_context].count = 0;
3331 pfile->contexts[pfile->cur_context].posn = 0;
3332 pfile->token_list.directive = 0;
3335 /* Directive handler wrapper used by the command line option
3336 processor. */
3337 void
3338 _cpp_run_directive (pfile, dir, buf, count)
3339 cpp_reader *pfile;
3340 const struct directive *dir;
3341 const char *buf;
3342 size_t count;
3344 if (cpp_push_buffer (pfile, (const U_CHAR *)buf, count) != NULL)
3346 unsigned int prev_lvl = 0;
3347 /* scan the line now, else prevent_macro_expansion won't work */
3348 do_pop_context (pfile);
3349 if (! (dir->flags & EXPAND))
3350 prev_lvl = prevent_macro_expansion (pfile);
3352 (void) (*dir->handler) (pfile);
3354 if (! (dir->flags & EXPAND))
3355 restore_macro_expansion (pfile, prev_lvl);
3357 _cpp_skip_rest_of_line (pfile);
3358 cpp_pop_buffer (pfile);