gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 /*
  24
  25 Cleanups to do:-
  26
  27 o -dM and with _cpp_dump_list: too many \n output.
  28 o Put a printer object in cpp_reader?
  29 o Check line numbers assigned to all errors.
  30 o Replace strncmp with memcmp almost everywhere.
  31 o lex_line's use of cur_token, flags and list->token_used is a bit opaque.
  32 o Convert do_ functions to return void.  Kaveh thinks its OK; and said he'll
  33   give it a run when we've got some code.
  34 o Distinguish integers, floats, and 'other' pp-numbers.
  35 o Store ints and char constants as binary values.
  36 o New command-line assertion syntax.
  37 o Work towards functions in cpperror.c taking a message level parameter.
  38   If we do this, merge the common code of do_warning and do_error.
  39 o Comment all functions, and describe macro expansion algorithm.
  40 o Move as much out of header files as possible.
  41 o Remove single quote pairs `', and some '', from diagnostics.
  42 o Correct pastability test for CPP_NAME and CPP_NUMBER.
  43
  44 */
  45
  46 #include "config.h"
  47 #include "system.h"
  48 #include "intl.h"
  49 #include "cpplib.h"
  50 #include "cpphash.h"
  51 #include "symcat.h"
  52
  53 static const cpp_token placemarker_token = {0, 0, CPP_PLACEMARKER, 0 UNION_INIT_ZERO};
  54 static const cpp_token eof_token = {0, 0, CPP_EOF, 0 UNION_INIT_ZERO};
  55
  56 /* Flags for cpp_context.  */
  57 #define CONTEXT_PASTEL  (1 << 0) /* An argument context on LHS of ##.  */
  58 #define CONTEXT_PASTER  (1 << 1) /* An argument context on RHS of ##.  */
  59 #define CONTEXT_RAW     (1 << 2) /* If argument tokens already expanded.  */
  60 #define CONTEXT_ARG     (1 << 3) /* If an argument context.  */
  61
  62 typedef struct cpp_context cpp_context;
  63 struct cpp_context
  64 {
  65   union
  66   {
  67     const cpp_toklist *list;    /* Used for macro contexts only.  */
  68     const cpp_token **arg;      /* Used for arg contexts only.  */
  69   } u;
  70
  71   /* Pushed token to be returned by next call to get_raw_token.  */
  72   const cpp_token *pushed_token;
  73
  74   struct macro_args *args;      /* The arguments for a function-like
  75                                    macro.  NULL otherwise.  */
  76   unsigned short posn;          /* Current posn, index into u.  */
  77   unsigned short count;         /* No. of tokens in u.  */
  78   unsigned short level;
  79   unsigned char flags;
  80 };
  81
  82 typedef struct macro_args macro_args;
  83 struct macro_args
  84 {
  85   unsigned int *ends;
  86   const cpp_token **tokens;
  87   unsigned int capacity;
  88   unsigned int used;
  89   unsigned short level;
  90 };
  91
  92 static const cpp_token *get_raw_token PARAMS ((cpp_reader *));
  93 static const cpp_token *parse_arg PARAMS ((cpp_reader *, int, unsigned int,
  94                                            macro_args *, unsigned int *));
  95 static int parse_args PARAMS ((cpp_reader *, cpp_hashnode *, macro_args *));
  96 static void save_token PARAMS ((macro_args *, const cpp_token *));
  97 static int pop_context PARAMS ((cpp_reader *));
  98 static int push_macro_context PARAMS ((cpp_reader *, const cpp_token *));
  99 static void push_arg_context PARAMS ((cpp_reader *, const cpp_token *));
 100 static void free_macro_args PARAMS ((macro_args *));
 101
 102 #define auto_expand_name_space(list) \
 103     _cpp_expand_name_space ((list), 1 + (list)->name_cap / 2)
 104 static void safe_fwrite         PARAMS ((cpp_reader *, const U_CHAR *,
 105                                          size_t, FILE *));
 106 static void dump_param_spelling PARAMS ((cpp_reader *, const cpp_toklist *,
 107                                          unsigned int));
 108 static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
 109                                          unsigned int));
 110
 111 static void process_directive   PARAMS ((cpp_reader *, const cpp_token *));
 112 static unsigned char *trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
 113                                                 unsigned char *));
 114 static const unsigned char *backslash_start PARAMS ((cpp_reader *,
 115                                                      const unsigned char *));
 116 static int skip_block_comment PARAMS ((cpp_reader *));
 117 static int skip_line_comment PARAMS ((cpp_reader *));
 118 static void adjust_column PARAMS ((cpp_reader *, const U_CHAR *));
 119 static void skip_whitespace PARAMS ((cpp_reader *, int));
 120 static const U_CHAR *parse_name PARAMS ((cpp_reader *, cpp_token *,
 121                                    const U_CHAR *, const U_CHAR *));
 122 static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_string *));
 123 static void parse_string PARAMS ((cpp_reader *, cpp_toklist *, cpp_token *,
 124                                   unsigned int));
 125 static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
 126 static void save_comment PARAMS ((cpp_toklist *, cpp_token *,
 127                                   const unsigned char *,
 128                                   unsigned int, unsigned int));
 129 static void lex_line PARAMS ((cpp_reader *, cpp_toklist *));
 130 static int lex_next PARAMS ((cpp_reader *, int));
 131 static int is_macro_disabled PARAMS ((cpp_reader *, const cpp_toklist *,
 132                                       const cpp_token *));
 133
 134 static cpp_token *stringify_arg PARAMS ((cpp_reader *, const cpp_token *));
 135 static void expand_context_stack PARAMS ((cpp_reader *));
 136 static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
 137                                             unsigned char *));
 138 static void output_token PARAMS ((cpp_reader *, const cpp_token *,
 139                                   const cpp_token *));
 140 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
 141                                           cpp_token *));
 142 static cpp_token *make_string_token PARAMS ((cpp_token *, const U_CHAR *,
 143                                             unsigned int));
 144 static cpp_token *alloc_number_token PARAMS ((cpp_reader *, int number));
 145 static const cpp_token *special_symbol PARAMS ((cpp_reader *, cpp_hashnode *,
 146                                                 const cpp_token *));
 147 static cpp_token *duplicate_token PARAMS ((cpp_reader *, const cpp_token *));
 148 static const cpp_token *maybe_paste_with_next PARAMS ((cpp_reader *,
 149                                                        const cpp_token *));
 150 static enum cpp_ttype can_paste PARAMS ((cpp_reader *, const cpp_token *,
 151                                          const cpp_token *, int *));
 152 static unsigned int prevent_macro_expansion     PARAMS ((cpp_reader *));
 153 static void restore_macro_expansion     PARAMS ((cpp_reader *, unsigned int));
 154 static cpp_token *get_temp_token        PARAMS ((cpp_reader *));
 155 static void release_temp_tokens         PARAMS ((cpp_reader *));
 156 static U_CHAR * quote_string PARAMS ((U_CHAR *, const U_CHAR *, unsigned int));
 157 static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
 158
 159 #define INIT_TOKEN_STR(list, token) \
 160   do {(token)->val.str.len = 0; \
 161       (token)->val.str.text = (list)->namebuf + (list)->name_used; \
 162   } while (0)
 163
 164 #define VALID_SIGN(c, prevc) \
 165   (((c) == '+' || (c) == '-') && \
 166    ((prevc) == 'e' || (prevc) == 'E' \
 167     || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
 168
 169 /* Handle LF, CR, CR-LF and LF-CR style newlines.  Assumes next
 170    character, if any, is in buffer.  */
 171
 172 #define handle_newline(cur, limit, c) \
 173  do { \
 174   if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
 175     (cur)++; \
 176   pfile->buffer->lineno++; \
 177   pfile->buffer->line_base = (cur); \
 178   pfile->col_adjust = 0; \
 179  } while (0)
 180
 181 #define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITE))
 182 #define PREV_TOKEN_TYPE (cur_token[-1].type)
 183
 184 #define PUSH_TOKEN(ttype) cur_token++->type = (ttype)
 185 #define REVISE_TOKEN(ttype) cur_token[-1].type = (ttype)
 186 #define BACKUP_TOKEN(ttype) (--cur_token)->type = (ttype)
 187 #define BACKUP_DIGRAPH(ttype) do { \
 188   BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
 189
 190 /* An upper bound on the number of bytes needed to spell a token,
 191    including preceding whitespace.  */
 192 #define TOKEN_LEN(token) (5 + (TOKEN_SPELL(token) == SPELL_STRING       \
 193                                ? (token)->val.str.len                   \
 194                                : (TOKEN_SPELL(token) == SPELL_IDENT     \
 195                                   ? (token)->val.node->length           \
 196                                   : 0)))
 197
 198 #define IS_ARG_CONTEXT(c) ((c)->flags & CONTEXT_ARG)
 199 #define CURRENT_CONTEXT(pfile) ((pfile)->contexts + (pfile)->cur_context)
 200
 201 #define ASSIGN_FLAGS_AND_POS(d, s) \
 202   do {(d)->flags = (s)->flags & (PREV_WHITE | BOL | PASTE_LEFT); \
 203       if ((d)->flags & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
 204   } while (0)
 205
 206 /* f is flags, just consisting of PREV_WHITE | BOL.  */
 207 #define MODIFY_FLAGS_AND_POS(d, s, f) \
 208   do {(d)->flags &= ~(PREV_WHITE | BOL); (d)->flags |= (f); \
 209       if ((f) & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
 210   } while (0)
 211
 212 #define T(e, s) {SPELL_OPERATOR, (const U_CHAR *) s},
 213 #define I(e, s) {SPELL_IDENT, s},
 214 #define S(e, s) {SPELL_STRING, s},
 215 #define C(e, s) {SPELL_CHAR, s},
 216 #define N(e, s) {SPELL_NONE, s},
 217
 218 const struct token_spelling
 219 token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
 220
 221 #undef T
 222 #undef I
 223 #undef S
 224 #undef C
 225 #undef N
 226
 227 /* For debugging: the internal names of the tokens.  */
 228 #define T(e, s) U STRINGX(e),
 229 #define I(e, s) U STRINGX(e),
 230 #define S(e, s) U STRINGX(e),
 231 #define C(e, s) U STRINGX(e),
 232 #define N(e, s) U STRINGX(e),
 233
 234 const U_CHAR *const token_names[N_TTYPES] = { TTYPE_TABLE };
 235
 236 #undef T
 237 #undef I
 238 #undef S
 239 #undef C
 240 #undef N
 241
 242 /* The following table is used by trigraph_ok/trigraph_replace.  If we
 243    have designated initializers, it can be constant data; otherwise,
 244    it is set up at runtime by _cpp_init_input_buffer.  */
 245
 246 #if (GCC_VERSION >= 2007)
 247 #define init_trigraph_map()  /* nothing */
 248 #define TRIGRAPH_MAP \
 249 __extension__ static const U_CHAR trigraph_map[UCHAR_MAX + 1] = {
 250 #define END };
 251 #define s(p, v) [p] = v,
 252 #else
 253 #define TRIGRAPH_MAP static U_CHAR trigraph_map[UCHAR_MAX + 1] = { 0 }; \
 254  static void init_trigraph_map PARAMS ((void)) { \
 255  unsigned char *x = trigraph_map;
 256 #define END }
 257 #define s(p, v) x[p] = v;
 258 #endif
 259
 260 TRIGRAPH_MAP
 261   s('=', '#')   s(')', ']')     s('!', '|')
 262   s('(', '[')   s('\'', '^')    s('>', '}')
 263   s('/', '\\')  s('<', '{')     s('-', '~')
 264 END
 265
 266 #undef TRIGRAPH_MAP
 267 #undef END
 268 #undef s
 269
 270 /* Re-allocates PFILE->token_buffer so it will hold at least N more chars.  */
 271
 272 void
 273 _cpp_grow_token_buffer (pfile, n)
 274      cpp_reader *pfile;
 275      long n;
 276 {
 277   long old_written = CPP_WRITTEN (pfile);
 278   pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
 279   pfile->token_buffer = (U_CHAR *)
 280     xrealloc(pfile->token_buffer, pfile->token_buffer_size);
 281   CPP_SET_WRITTEN (pfile, old_written);
 282 }
 283
 284 /* Deal with the annoying semantics of fwrite.  */
 285 static void
 286 safe_fwrite (pfile, buf, len, fp)
 287      cpp_reader *pfile;
 288      const U_CHAR *buf;
 289      size_t len;
 290      FILE *fp;
 291 {
 292   size_t count;
 293
 294   while (len)
 295     {
 296       count = fwrite (buf, 1, len, fp);
 297       if (count == 0)
 298         goto error;
 299       len -= count;
 300       buf += count;
 301     }
 302   return;
 303
 304  error:
 305   cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
 306 }
 307
 308 /* Notify the compiler proper that the current line number has jumped,
 309    or the current file name has changed.  */
 310
 311 static void
 312 output_line_command (pfile, print, line)
 313      cpp_reader *pfile;
 314      cpp_printer *print;
 315      unsigned int line;
 316 {
 317   cpp_buffer *ip = CPP_BUFFER (pfile);
 318   enum { same = 0, enter, leave, rname } change;
 319   static const char * const codes[] = { "", " 1", " 2", "" };
 320
 321   if (line == 0)
 322     return;
 323
 324   /* End the previous line of text.  */
 325   if (pfile->need_newline)
 326     putc ('\n', print->outf);
 327   pfile->need_newline = 0;
 328
 329   if (CPP_OPTION (pfile, no_line_commands))
 330     return;
 331
 332   /* If ip is null, we've been called from cpp_finish, and they just
 333      needed the final flush and trailing newline.  */
 334   if (!ip)
 335     return;
 336
 337   if (pfile->include_depth == print->last_id)
 338     {
 339       /* Determine whether the current filename has changed, and if so,
 340          how.  'nominal_fname' values are unique, so they can be compared
 341          by comparing pointers.  */
 342       if (ip->nominal_fname == print->last_fname)
 343         change = same;
 344       else
 345         change = rname;
 346     }
 347   else
 348     {
 349       if (pfile->include_depth > print->last_id)
 350         change = enter;
 351       else
 352         change = leave;
 353       print->last_id = pfile->include_depth;
 354     }
 355   print->last_fname = ip->nominal_fname;
 356
 357   /* If the current file has not changed, we can output a few newlines
 358      instead if we want to increase the line number by a small amount.
 359      We cannot do this if print->lineno is zero, because that means we
 360      haven't output any line commands yet.  (The very first line
 361      command output is a `same_file' command.)  */
 362   if (change == same && print->lineno > 0
 363       && line >= print->lineno && line < print->lineno + 8)
 364     {
 365       while (line > print->lineno)
 366         {
 367           putc ('\n', print->outf);
 368           print->lineno++;
 369         }
 370       return;
 371     }
 372
 373 #ifndef NO_IMPLICIT_EXTERN_C
 374   if (CPP_OPTION (pfile, cplusplus))
 375     fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
 376              codes[change],
 377              ip->inc->sysp ? " 3" : "",
 378              (ip->inc->sysp == 2) ? " 4" : "");
 379   else
 380 #endif
 381     fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
 382              codes[change],
 383              ip->inc->sysp ? " 3" : "");
 384   print->lineno = line;
 385 }
 386
 387 /* Write the contents of the token_buffer to the output stream, and
 388    clear the token_buffer.  Also handles generating line commands and
 389    keeping track of file transitions.  */
 390
 391 void
 392 cpp_output_tokens (pfile, print, line)
 393      cpp_reader *pfile;
 394      cpp_printer *print;
 395      unsigned int line;
 396 {
 397   if (CPP_WRITTEN (pfile) - print->written)
 398     {
 399       safe_fwrite (pfile, pfile->token_buffer,
 400                    CPP_WRITTEN (pfile) - print->written, print->outf);
 401       pfile->need_newline = 1;
 402       if (print->lineno)
 403         print->lineno++;
 404
 405       CPP_SET_WRITTEN (pfile, print->written);
 406     }
 407   output_line_command (pfile, print, line);
 408 }
 409
 410 /* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output.  */
 411
 412 void
 413 cpp_scan_buffer_nooutput (pfile)
 414      cpp_reader *pfile;
 415 {
 416   cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
 417   const cpp_token *token;
 418
 419   /* In no-output mode, we can ignore everything but directives.  */
 420   for (;;)
 421     {
 422       token = _cpp_get_token (pfile);
 423
 424       if (token->type == CPP_EOF)
 425         {
 426           cpp_pop_buffer (pfile);
 427           if (CPP_BUFFER (pfile) == stop)
 428             break;
 429         }
 430
 431       if (token->type == CPP_HASH && token->flags & BOL
 432           && pfile->token_list.directive)
 433         {
 434           process_directive (pfile, token);
 435           continue;
 436         }
 437
 438       _cpp_skip_rest_of_line (pfile);
 439     }
 440 }
 441
 442 /* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT.  */
 443 void
 444 cpp_scan_buffer (pfile, print)
 445      cpp_reader *pfile;
 446      cpp_printer *print;
 447 {
 448   cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
 449   const cpp_token *token, *prev = 0;
 450
 451   for (;;)
 452     {
 453       token = _cpp_get_token (pfile);
 454       if (token->type == CPP_EOF)
 455         {
 456           cpp_pop_buffer (pfile);
 457           if (CPP_BUFFER (pfile) == stop)
 458             return;
 459
 460           cpp_output_tokens (pfile, print, CPP_BUF_LINE (CPP_BUFFER (pfile)));
 461           prev = 0;
 462           continue;
 463         }
 464
 465       if (token->flags & BOL)
 466         {
 467           if (token->type == CPP_HASH && pfile->token_list.directive)
 468             {
 469               process_directive (pfile, token);
 470               continue;
 471             }
 472
 473           cpp_output_tokens (pfile, print, pfile->token_list.line);
 474           prev = 0;
 475         }
 476
 477       if (token->type != CPP_PLACEMARKER)
 478         output_token (pfile, token, prev);
 479
 480       prev = token;
 481     }
 482 }
 483
 484 /* Scan a single line of the input into the token_buffer.  */
 485 int
 486 cpp_scan_line (pfile)
 487      cpp_reader *pfile;
 488 {
 489   const cpp_token *token, *prev = 0;
 490
 491   if (pfile->buffer == NULL)
 492     return 0;
 493
 494   do
 495     {
 496       token = cpp_get_token (pfile);
 497       if (token->type == CPP_EOF)
 498         {
 499           cpp_pop_buffer (pfile);
 500           break;
 501         }
 502
 503       /* If the last token on a line results from a macro expansion,
 504          the check below will fail to stop us from proceeding to the
 505          next line - so make sure we stick in a newline, at least.  */
 506       if (token->flags & BOL)
 507         CPP_PUTC (pfile, '\n');
 508
 509       output_token (pfile, token, prev);
 510       prev = token;
 511     }
 512   while (pfile->cur_context > 0
 513          || pfile->contexts[0].posn < pfile->contexts[0].count);
 514   return 1;
 515 }
 516
 517 /* Helper routine used by parse_include, which can't see spell_token.
 518    Reinterpret the current line as an h-char-sequence (< ... >); we are
 519    looking at the first token after the <.  */
 520 const cpp_token *
 521 _cpp_glue_header_name (pfile)
 522      cpp_reader *pfile;
 523 {
 524   unsigned int written = CPP_WRITTEN (pfile);
 525   const cpp_token *t;
 526   cpp_token *hdr;
 527   U_CHAR *buf;
 528   size_t len;
 529
 530   for (;;)
 531     {
 532       t = _cpp_get_token (pfile);
 533       if (t->type == CPP_GREATER || t->type == CPP_EOF)
 534         break;
 535
 536       CPP_RESERVE (pfile, TOKEN_LEN (t));
 537       if (t->flags & PREV_WHITE)
 538         CPP_PUTC_Q (pfile, ' ');
 539       pfile->limit = spell_token (pfile, t, pfile->limit);
 540     }
 541
 542   if (t->type == CPP_EOF)
 543     cpp_error (pfile, "missing terminating > character");
 544
 545   len = CPP_WRITTEN (pfile) - written;
 546   buf = xmalloc (len);
 547   memcpy (buf, pfile->token_buffer + written, len);
 548   CPP_SET_WRITTEN (pfile, written);
 549
 550   hdr = get_temp_token (pfile);
 551   hdr->type = CPP_HEADER_NAME;
 552   hdr->flags = 0;
 553   hdr->val.str.text = buf;
 554   hdr->val.str.len = len;
 555   return hdr;
 556 }
 557
 558 /* Token-buffer helper functions.  */
 559
 560 /* Expand a token list's string space. It is *vital* that
 561    list->tokens_used is correct, to get pointer fix-up right.  */
 562 void
 563 _cpp_expand_name_space (list, len)
 564      cpp_toklist *list;
 565      unsigned int len;
 566 {
 567   const U_CHAR *old_namebuf;
 568
 569   old_namebuf = list->namebuf;
 570   list->name_cap += len;
 571   list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
 572
 573   /* Fix up token text pointers.  */
 574   if (list->namebuf != old_namebuf)
 575     {
 576       unsigned int i;
 577
 578       for (i = 0; i < list->tokens_used; i++)
 579         if (token_spellings[list->tokens[i].type].type == SPELL_STRING)
 580           list->tokens[i].val.str.text += (list->namebuf - old_namebuf);
 581     }
 582 }
 583
 584 /* If there is not enough room for LEN more characters, expand the
 585    list by just enough to have room for LEN characters.  */
 586 void
 587 _cpp_reserve_name_space (list, len)
 588      cpp_toklist *list;
 589      unsigned int len;
 590 {
 591   unsigned int room = list->name_cap - list->name_used;
 592
 593   if (room < len)
 594     _cpp_expand_name_space (list, len - room);
 595 }
 596
 597 /* Expand the number of tokens in a list.  */
 598 void
 599 _cpp_expand_token_space (list, count)
 600      cpp_toklist *list;
 601      unsigned int count;
 602 {
 603   unsigned int n;
 604
 605   list->tokens_cap += count;
 606   n = list->tokens_cap;
 607   if (list->flags & LIST_OFFSET)
 608     list->tokens--, n++;
 609   list->tokens = (cpp_token *)
 610     xrealloc (list->tokens, n * sizeof (cpp_token));
 611   if (list->flags & LIST_OFFSET)
 612     list->tokens++;             /* Skip the dummy.  */
 613 }
 614
 615 /* Initialize a token list.  If flags is DUMMY_TOKEN, we allocate
 616    an extra token in front of the token list, as this allows the lexer
 617    to always peek at the previous token without worrying about
 618    underflowing the list, and some initial space.  Otherwise, no
 619    token- or name-space is allocated, and there is no dummy token.  */
 620 void
 621 _cpp_init_toklist (list, flags)
 622      cpp_toklist *list;
 623      int flags;
 624 {
 625   if (flags == NO_DUMMY_TOKEN)
 626     {
 627       list->tokens_cap = 0;
 628       list->tokens = 0;
 629       list->name_cap = 0;
 630       list->namebuf = 0;
 631       list->flags = 0;
 632     }
 633   else
 634     {
 635       /* Initialize token space.  Put a dummy token before the start
 636          that will fail matches.  */
 637       list->tokens_cap = 256;   /* 4K's worth.  */
 638       list->tokens = (cpp_token *)
 639         xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
 640       list->tokens[0].type = CPP_EOF;
 641       list->tokens++;
 642
 643       /* Initialize name space.  */
 644       list->name_cap = 1024;
 645       list->namebuf = (unsigned char *) xmalloc (list->name_cap);
 646       list->flags = LIST_OFFSET;
 647     }
 648
 649   _cpp_clear_toklist (list);
 650 }
 651
 652 /* Clear a token list.  */
 653 void
 654 _cpp_clear_toklist (list)
 655      cpp_toklist *list;
 656 {
 657   list->tokens_used = 0;
 658   list->name_used = 0;
 659   list->directive = 0;
 660   list->paramc = 0;
 661   list->params_len = 0;
 662   list->flags &= LIST_OFFSET;  /* clear all but that one */
 663 }
 664
 665 /* Free a token list.  Does not free the list itself, which may be
 666    embedded in a larger structure.  */
 667 void
 668 _cpp_free_toklist (list)
 669      const cpp_toklist *list;
 670 {
 671   if (list->flags & LIST_OFFSET)
 672     free (list->tokens - 1);    /* Backup over dummy token.  */
 673   else
 674     free (list->tokens);
 675   free (list->namebuf);
 676 }
 677
 678 /* Compare two tokens.  */
 679 int
 680 _cpp_equiv_tokens (a, b)
 681      const cpp_token *a, *b;
 682 {
 683   if (a->type == b->type && a->flags == b->flags)
 684     switch (token_spellings[a->type].type)
 685       {
 686       default:                  /* Keep compiler happy.  */
 687       case SPELL_OPERATOR:
 688         return 1;
 689       case SPELL_CHAR:
 690       case SPELL_NONE:
 691         return a->val.aux == b->val.aux; /* arg_no or character.  */
 692       case SPELL_IDENT:
 693         return a->val.node == b->val.node;
 694       case SPELL_STRING:
 695         return (a->val.str.len == b->val.str.len
 696                 && !memcmp (a->val.str.text, b->val.str.text,
 697                             a->val.str.len));
 698       }
 699
 700   return 0;
 701 }
 702
 703 /* Compare two token lists.  */
 704 int
 705 _cpp_equiv_toklists (a, b)
 706      const cpp_toklist *a, *b;
 707 {
 708   unsigned int i;
 709
 710   if (a->tokens_used != b->tokens_used
 711       || a->flags != b->flags
 712       || a->paramc != b->paramc)
 713     return 0;
 714
 715   for (i = 0; i < a->tokens_used; i++)
 716     if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
 717       return 0;
 718   return 1;
 719 }
 720
 721 /* Utility routine:
 722
 723    Compares, the token TOKEN to the NUL-terminated string STRING.
 724    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
 725
 726 int
 727 cpp_ideq (token, string)
 728      const cpp_token *token;
 729      const char *string;
 730 {
 731   if (token->type != CPP_NAME)
 732     return 0;
 733
 734   return !ustrcmp (token->val.node->name, (const U_CHAR *)string);
 735 }
 736
 737 /* Lexing algorithm.
 738
 739  The original lexer in cpplib was made up of two passes: a first pass
 740  that replaced trigraphs and deleted esacped newlines, and a second
 741  pass that tokenized the result of the first pass.  Tokenisation was
 742  performed by peeking at the next character in the input stream.  For
 743  example, if the input stream contained "!=", the handler for the !
 744  character would peek at the next character, and if it were a '='
 745  would skip over it, and return a "!=" token, otherwise it would
 746  return just the "!" token.
 747
 748  To implement a single-pass lexer, this peeking ahead is unworkable.
 749  An arbitrary number of escaped newlines, and trigraphs (in particular
 750  ??/ which translates to the escape \), could separate the '!' and '='
 751  in the input stream, yet the next token is still a "!=".
 752
 753  Suppose instead that we lex by one logical line at a time, producing
 754  a token list or stack for each logical line, and when seeing the '!'
 755  push a CPP_NOT token on the list.  Then if the '!' is part of a
 756  longer token ("!=") we know we must see the remainder of the token by
 757  the time we reach the end of the logical line.  Thus we can have the
 758  '=' handler look at the previous token (at the end of the list / top
 759  of the stack) and see if it is a "!" token, and if so, instead of
 760  pushing a "=" token revise the existing token to be a "!=" token.
 761
 762  This works in the presence of escaped newlines, because the '\' would
 763  have been pushed on the top of the stack as a CPP_BACKSLASH.  The
 764  newline ('\n' or '\r') handler looks at the token at the top of the
 765  stack to see if it is a CPP_BACKSLASH, and if so discards both.
 766  Hence the '=' handler would never see any intervening tokens.
 767
 768  To make trigraphs work in this context, as in precedence trigraphs
 769  are highest and converted before anything else, the '?' handler does
 770  lookahead to see if it is a trigraph, and if so skips the trigraph
 771  and pushes the token it represents onto the top of the stack.  This
 772  also works in the particular case of a CPP_BACKSLASH trigraph.
 773
 774  To the preprocessor, whitespace is only significant to the point of
 775  knowing whether whitespace precedes a particular token.  For example,
 776  the '=' handler needs to know whether there was whitespace between it
 777  and a "!" token on the top of the stack, to make the token conversion
 778  decision correctly.  So each token has a PREV_WHITE flag to
 779  indicate this - the standard permits consecutive whitespace to be
 780  regarded as a single space.  The compiler front ends are not
 781  interested in whitespace at all; they just require a token stream.
 782  Another place where whitespace is significant to the preprocessor is
 783  a #define statment - if there is whitespace between the macro name
 784  and an initial "(" token the macro is "object-like", otherwise it is
 785  a function-like macro that takes arguments.
 786
 787  However, all is not rosy.  Parsing of identifiers, numbers, comments
 788  and strings becomes trickier because of the possibility of raw
 789  trigraphs and escaped newlines in the input stream.
 790
 791  The trigraphs are three consecutive characters beginning with two
 792  question marks.  A question mark is not valid as part of a number or
 793  identifier, so parsing of a number or identifier terminates normally
 794  upon reaching it, returning to the mainloop which handles the
 795  trigraph just like it would in any other position.  Similarly for the
 796  backslash of a backslash-newline combination.  So we just need the
 797  escaped-newline dropper in the mainloop to check if the token on the
 798  top of the stack after dropping the escaped newline is a number or
 799  identifier, and if so to continue the processing it as if nothing had
 800  happened.
 801
 802  For strings, we replace trigraphs whenever we reach a quote or
 803  newline, because there might be a backslash trigraph escaping them.
 804  We need to be careful that we start trigraph replacing from where we
 805  left off previously, because it is possible for a first scan to leave
 806  "fake" trigraphs that a second scan would pick up as real (e.g. the
 807  sequence "????/\n=" would find a fake ??= trigraph after removing the
 808  escaped newline.)
 809
 810  For line comments, on reaching a newline we scan the previous
 811  character(s) to see if it escaped, and continue if it is.  Block
 812  comments ignore everything and just focus on finding the comment
 813  termination mark.  The only difficult thing, and it is surprisingly
 814  tricky, is checking if an asterisk precedes the final slash since
 815  they could be separated by escaped newlines.  If the preprocessor is
 816  invoked with the output comments option, we don't bother removing
 817  escaped newlines and replacing trigraphs for output.
 818
 819  Finally, numbers can begin with a period, which is pushed initially
 820  as a CPP_DOT token in its own right.  The digit handler checks if the
 821  previous token was a CPP_DOT not separated by whitespace, and if so
 822  pops it off the stack and pushes a period into the number's buffer
 823  before calling the number parser.
 824
 825 */
 826
 827 static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
 828                                                     U":>", U"<%", U"%>"};
 829
 830 /* Call when a trigraph is encountered.  It warns if necessary, and
 831    returns true if the trigraph should be honoured.  END is the third
 832    character of a trigraph in the input stream.  */
 833 static int
 834 trigraph_ok (pfile, end)
 835      cpp_reader *pfile;
 836      const unsigned char *end;
 837 {
 838   int accept = CPP_OPTION (pfile, trigraphs);
 839
 840   if (CPP_OPTION (pfile, warn_trigraphs))
 841     {
 842       unsigned int col = end - 1 - pfile->buffer->line_base;
 843       if (accept)
 844         cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
 845                                "trigraph ??%c converted to %c",
 846                                (int) *end, (int) trigraph_map[*end]);
 847       else
 848         cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
 849                                "trigraph ??%c ignored", (int) *end);
 850     }
 851   return accept;
 852 }
 853
 854 /* Scan a string for trigraphs, warning or replacing them inline as
 855    appropriate.  When parsing a string, we must call this routine
 856    before processing a newline character (if trigraphs are enabled),
 857    since the newline might be escaped by a preceding backslash
 858    trigraph sequence.  Returns a pointer to the end of the name after
 859    replacement.  */
 860
 861 static unsigned char *
 862 trigraph_replace (pfile, src, limit)
 863      cpp_reader *pfile;
 864      unsigned char *src;
 865      unsigned char *limit;
 866 {
 867   unsigned char *dest;
 868
 869   /* Starting with src[1], find two consecutive '?'.  The case of no
 870      trigraphs is streamlined.  */
 871
 872   for (src++; src + 1 < limit; src += 2)
 873     {
 874       if (src[0] != '?')
 875         continue;
 876
 877       /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s.  */
 878       if (src[-1] == '?')
 879         src--;
 880       else if (src + 2 == limit || src[1] != '?')
 881         continue;
 882
 883       /* Check if it really is a trigraph.  */
 884       if (trigraph_map[src[2]] == 0)
 885         continue;
 886
 887       dest = src;
 888       goto trigraph_found;
 889     }
 890   return limit;
 891
 892   /* Now we have a trigraph, we need to scan the remaining buffer, and
 893      copy-shifting its contents left if replacement is enabled.  */
 894   for (; src + 2 < limit; dest++, src++)
 895     if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
 896       {
 897       trigraph_found:
 898         src += 2;
 899         if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
 900           *dest = trigraph_map[*src];
 901       }
 902
 903   /* Copy remaining (at most 2) characters.  */
 904   while (src < limit)
 905     *dest++ = *src++;
 906   return dest;
 907 }
 908
 909 /* If CUR is a backslash or the end of a trigraphed backslash, return
 910    a pointer to its beginning, otherwise NULL.  We don't read beyond
 911    the buffer start, because there is the start of the comment in the
 912    buffer.  */
 913 static const unsigned char *
 914 backslash_start (pfile, cur)
 915      cpp_reader *pfile;
 916      const unsigned char *cur;
 917 {
 918   if (cur[0] == '\\')
 919     return cur;
 920   if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
 921       && trigraph_ok (pfile, cur))
 922     return cur - 2;
 923   return 0;
 924 }
 925
 926 /* Skip a C-style block comment.  This is probably the trickiest
 927    handler.  We find the end of the comment by seeing if an asterisk
 928    is before every '/' we encounter.  The nasty complication is that a
 929    previous asterisk may be separated by one or more escaped newlines.
 930    Returns non-zero if comment terminated by EOF, zero otherwise.  */
 931 static int
 932 skip_block_comment (pfile)
 933      cpp_reader *pfile;
 934 {
 935   cpp_buffer *buffer = pfile->buffer;
 936   const unsigned char *char_after_star = 0;
 937   const unsigned char *cur = buffer->cur;
 938
 939   for (; cur < buffer->rlimit; )
 940     {
 941       unsigned char c = *cur++;
 942
 943       /* People like decorating comments with '*', so check for
 944          '/' instead for efficiency.  */
 945       if (c == '/')
 946         {
 947           /* Don't view / then * then / as finishing the comment.  */
 948           if ((cur[-2] == '*' && cur - 1 > buffer->cur)
 949               || cur - 1 == char_after_star)
 950             {
 951               buffer->cur = cur;
 952               return 0;
 953             }
 954
 955           /* Warn about potential nested comments, but not when
 956              the final character inside the comment is a '/'.
 957              Don't bother to get it right across escaped newlines.  */
 958           if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
 959               && cur[0] == '*' && cur[1] != '/')
 960             {
 961               buffer->cur = cur;
 962               cpp_warning (pfile, "'/*' within comment");
 963             }
 964         }
 965       else if (is_vspace (c))
 966         {
 967           const unsigned char* bslash = backslash_start (pfile, cur - 2);
 968
 969           handle_newline (cur, buffer->rlimit, c);
 970           /* Work correctly if there is an asterisk before an
 971              arbirtrarily long sequence of escaped newlines.  */
 972           if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
 973             char_after_star = cur;
 974           else
 975             char_after_star = 0;
 976         }
 977       else if (c == '\t')
 978         adjust_column (pfile, cur - 1);
 979     }
 980
 981   buffer->cur = cur;
 982   return 1;
 983 }
 984
 985 /* Skip a C++ line comment.  Handles escaped newlines.  Returns
 986    non-zero if a multiline comment.  */
 987 static int
 988 skip_line_comment (pfile)
 989      cpp_reader *pfile;
 990 {
 991   cpp_buffer *buffer = pfile->buffer;
 992   register const unsigned char *cur = buffer->cur;
 993   int multiline = 0;
 994
 995   for (; cur < buffer->rlimit; )
 996     {
 997       unsigned char c = *cur++;
 998
 999       if (is_vspace (c))
1000         {
1001           /* Check for a (trigaph?) backslash escaping the newline.  */
1002           if (!backslash_start (pfile, cur - 2))
1003             goto out;
1004           multiline = 1;
1005           handle_newline (cur, buffer->rlimit, c);
1006         }
1007     }
1008   cur++;
1009
1010  out:
1011   buffer->cur = cur - 1;        /* Leave newline for caller.  */
1012   return multiline;
1013 }
1014
1015 /* TAB points to a \t character.  Update col_adjust so we track the
1016    column correctly.  */
1017 static void
1018 adjust_column (pfile, tab)
1019      cpp_reader *pfile;
1020      const U_CHAR *tab;
1021 {
1022   /* Zero-based column.  */
1023   unsigned int col = CPP_BUF_COLUMN (pfile->buffer, tab);
1024
1025   /* Round it up to multiple of the tabstop, but subtract 1 since the
1026      tab itself occupies a character position.  */
1027   pfile->col_adjust += (CPP_OPTION (pfile, tabstop)
1028                         - col % CPP_OPTION (pfile, tabstop)) - 1;
1029 }
1030
1031 /* Skips whitespace, stopping at next non-whitespace character.
1032    Adjusts pfile->col_adjust to account for tabs.  This enables tokens
1033    to be assigned the correct column.  */
1034 static void
1035 skip_whitespace (pfile, in_directive)
1036      cpp_reader *pfile;
1037      int in_directive;
1038 {
1039   cpp_buffer *buffer = pfile->buffer;
1040   unsigned short warned = 0;
1041
1042   /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
1043   while (buffer->cur < buffer->rlimit)
1044     {
1045       unsigned char c = *buffer->cur;
1046
1047       if (!is_nvspace (c))
1048         break;
1049
1050       buffer->cur++;
1051       /* Horizontal space always OK.  */
1052       if (c == ' ')
1053         continue;
1054       else if (c == '\t')
1055         adjust_column (pfile, buffer->cur - 1);
1056       /* Must be \f \v or \0.  */
1057       else if (c == '\0')
1058         {
1059           if (!warned)
1060             cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
1061                                    CPP_BUF_COL (buffer),
1062                                    "embedded null character ignored");
1063           warned = 1;
1064         }
1065       else if (in_directive && CPP_PEDANTIC (pfile))
1066         cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
1067                                CPP_BUF_COL (buffer),
1068                                "%s in preprocessing directive",
1069                                c == '\f' ? "form feed" : "vertical tab");
1070     }
1071 }
1072
1073 /* Parse (append) an identifier.  */
1074 static const U_CHAR *
1075 parse_name (pfile, tok, cur, rlimit)
1076      cpp_reader *pfile;
1077      cpp_token *tok;
1078      const U_CHAR *cur, *rlimit;
1079 {
1080   const U_CHAR *name = cur;
1081   unsigned int len;
1082
1083   while (cur < rlimit)
1084     {
1085       if (! is_idchar (*cur))
1086         break;
1087       /* $ is not a legal identifier character in the standard, but is
1088          commonly accepted as an extension.  Don't warn about it in
1089          skipped conditional blocks. */
1090       if (*cur == '$' && CPP_PEDANTIC (pfile) && ! pfile->skipping)
1091         {
1092           CPP_BUFFER (pfile)->cur = cur;
1093           cpp_pedwarn (pfile, "'$' character in identifier");
1094         }
1095       cur++;
1096     }
1097   len = cur - name;
1098
1099   if (tok->val.node)
1100     {
1101       unsigned int oldlen = tok->val.node->length;
1102       U_CHAR *newname = alloca (oldlen + len);
1103       memcpy (newname, tok->val.node->name, oldlen);
1104       memcpy (newname + oldlen, name, len);
1105       len += oldlen;
1106       name = newname;
1107     }
1108
1109   tok->val.node = cpp_lookup (pfile, name, len);
1110   return cur;
1111 }
1112
1113 /* Parse (append) a number.  */
1114 static void
1115 parse_number (pfile, list, name)
1116      cpp_reader *pfile;
1117      cpp_toklist *list;
1118      cpp_string *name;
1119 {
1120   const unsigned char *name_limit;
1121   unsigned char *namebuf;
1122   cpp_buffer *buffer = pfile->buffer;
1123   register const unsigned char *cur = buffer->cur;
1124
1125  expanded:
1126   name_limit = list->namebuf + list->name_cap;
1127   namebuf = list->namebuf + list->name_used;
1128
1129   for (; cur < buffer->rlimit && namebuf < name_limit; )
1130     {
1131       unsigned char c = *namebuf = *cur; /* Copy a single char.  */
1132
1133       /* Perhaps we should accept '$' here if we accept it for
1134          identifiers.  We know namebuf[-1] is safe, because for c to
1135          be a sign we must have pushed at least one character.  */
1136       if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
1137         goto out;
1138
1139       namebuf++;
1140       cur++;
1141     }
1142
1143   /* Run out of name space?  */
1144   if (cur < buffer->rlimit)
1145     {
1146       list->name_used = namebuf - list->namebuf;
1147       auto_expand_name_space (list);
1148       goto expanded;
1149     }
1150
1151  out:
1152   buffer->cur = cur;
1153   name->len = namebuf - name->text;
1154   list->name_used = namebuf - list->namebuf;
1155 }
1156
1157 /* Places a string terminated by an unescaped TERMINATOR into a
1158    cpp_string, which should be expandable and thus at the top of the
1159    list's stack.  Handles embedded trigraphs, if necessary, and
1160    escaped newlines.
1161
1162    Can be used for character constants (terminator = '\''), string
1163    constants ('"') and angled headers ('>').  Multi-line strings are
1164    allowed, except for within directives.  */
1165
1166 static void
1167 parse_string (pfile, list, token, terminator)
1168      cpp_reader *pfile;
1169      cpp_toklist *list;
1170      cpp_token *token;
1171      unsigned int terminator;
1172 {
1173   cpp_buffer *buffer = pfile->buffer;
1174   cpp_string *name = &token->val.str;
1175   register const unsigned char *cur = buffer->cur;
1176   const unsigned char *name_limit;
1177   unsigned char *namebuf;
1178   unsigned int null_count = 0;
1179   unsigned int trigraphed = list->name_used;
1180
1181  expanded:
1182   name_limit = list->namebuf + list->name_cap;
1183   namebuf = list->namebuf + list->name_used;
1184
1185   for (; cur < buffer->rlimit && namebuf < name_limit; )
1186     {
1187       unsigned int c = *namebuf++ = *cur++; /* Copy a single char.  */
1188
1189       if (c == '\0')
1190         null_count++;
1191       else if (c == terminator || is_vspace (c))
1192         {
1193           /* Needed for trigraph_replace and multiline string warning.  */
1194           buffer->cur = cur;
1195
1196           /* Scan for trigraphs before checking if backslash-escaped.  */
1197           if ((CPP_OPTION (pfile, trigraphs)
1198                || CPP_OPTION (pfile, warn_trigraphs))
1199               && namebuf - (list->namebuf + trigraphed) >= 3)
1200             {
1201               namebuf = trigraph_replace (pfile, list->namebuf + trigraphed,
1202                                           namebuf);
1203               /* The test above guarantees trigraphed will be positive.  */
1204               trigraphed = namebuf - list->namebuf - 2;
1205             }
1206
1207           namebuf--;     /* Drop the newline / terminator from the name.  */
1208           if (is_vspace (c))
1209             {
1210               /* Drop a backslash newline, and continue. */
1211               if (namebuf[-1] == '\\')
1212                 {
1213                   handle_newline (cur, buffer->rlimit, c);
1214                   namebuf--;
1215                   continue;
1216                 }
1217
1218               cur--;
1219
1220               /* In assembly language, silently terminate strings of
1221                  either variety at end of line.  This is a kludge
1222                  around not knowing where comments are.  */
1223               if (CPP_OPTION (pfile, lang_asm))
1224                 goto out;
1225
1226               /* Character constants and header names may not extend
1227                  over multiple lines.  In Standard C, neither may
1228                  strings.  We accept multiline strings as an
1229                  extension.  (Even in directives - otherwise, glibc's
1230                  longlong.h breaks.)  */
1231               if (terminator != '"')
1232                 goto unterminated;
1233
1234               cur++;  /* Move forwards again.  */
1235
1236               if (pfile->multiline_string_line == 0)
1237                 {
1238                   pfile->multiline_string_line = token->line;
1239                   pfile->multiline_string_column = token->col;
1240                   if (CPP_PEDANTIC (pfile))
1241                     cpp_pedwarn (pfile, "multi-line string constant");
1242                 }
1243
1244               *namebuf++ = '\n';
1245               handle_newline (cur, buffer->rlimit, c);
1246             }
1247           else
1248             {
1249               unsigned char *temp;
1250
1251               /* An odd number of consecutive backslashes represents
1252                  an escaped terminator.  */
1253               temp = namebuf - 1;
1254               while (temp >= name->text && *temp == '\\')
1255                 temp--;
1256
1257               if ((namebuf - temp) & 1)
1258                 goto out;
1259               namebuf++;
1260             }
1261         }
1262     }
1263
1264   /* Run out of name space?  */
1265   if (cur < buffer->rlimit)
1266     {
1267       list->name_used = namebuf - list->namebuf;
1268       auto_expand_name_space (list);
1269       goto expanded;
1270     }
1271
1272   /* We may not have trigraph-replaced the input for this code path,
1273      but as the input is in error by being unterminated we don't
1274      bother.  Prevent warnings about no newlines at EOF.  */
1275   if (is_vspace (cur[-1]))
1276     cur--;
1277
1278  unterminated:
1279   cpp_error (pfile, "missing terminating %c character", (int) terminator);
1280
1281   if (terminator == '\"' && pfile->multiline_string_line != list->line
1282       && pfile->multiline_string_line != 0)
1283     {
1284       cpp_error_with_line (pfile, pfile->multiline_string_line,
1285                            pfile->multiline_string_column,
1286                            "possible start of unterminated string literal");
1287       pfile->multiline_string_line = 0;
1288     }
1289
1290  out:
1291   buffer->cur = cur;
1292   name->len = namebuf - name->text;
1293   list->name_used = namebuf - list->namebuf;
1294
1295   if (null_count > 0)
1296     cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
1297                          : "null character preserved"));
1298 }
1299
1300 /* The character TYPE helps us distinguish comment types: '*' = C
1301    style, '/' = C++ style.  For code simplicity, the stored comment
1302    includes the comment start and any terminator.  */
1303
1304 #define COMMENT_START_LEN 2
1305 static void
1306 save_comment (list, token, from, len, type)
1307      cpp_toklist *list;
1308      cpp_token *token;
1309      const unsigned char *from;
1310      unsigned int len;
1311      unsigned int type;
1312 {
1313   unsigned char *buffer;
1314
1315   len += COMMENT_START_LEN;
1316
1317   if (list->name_used + len > list->name_cap)
1318     _cpp_expand_name_space (list, len);
1319
1320   INIT_TOKEN_STR (list, token);
1321   token->type = CPP_COMMENT;
1322   token->val.str.len = len;
1323
1324   buffer = list->namebuf + list->name_used;
1325   list->name_used += len;
1326
1327   /* Copy the comment.  */
1328   if (type == '*')
1329     {
1330       *buffer++ = '/';
1331       *buffer++ = '*';
1332     }
1333   else
1334     {
1335       *buffer++ = type;
1336       *buffer++ = type;
1337     }
1338   memcpy (buffer, from, len - COMMENT_START_LEN);
1339 }
1340
1341 /*
1342  *  The tokenizer's main loop.  Returns a token list, representing a
1343  *  logical line in the input file.  On EOF after some tokens have
1344  *  been processed, we return immediately.  Then in next call, or if
1345  *  EOF occurred at the beginning of a logical line, a single CPP_EOF
1346  *  token is placed in the list.
1347  *
1348  *  Implementation relies almost entirely on lookback, rather than
1349  *  looking forwards.  This means that tokenization requires just
1350  *  a single pass of the file, even in the presence of trigraphs and
1351  *  escaped newlines, providing significant performance benefits.
1352  *  Trigraph overhead is negligible if they are disabled, and low
1353  *  even when enabled.
1354  */
1355
1356 #define KNOWN_DIRECTIVE() (list->directive != 0)
1357 #define MIGHT_BE_DIRECTIVE() \
1358 (cur_token == &list->tokens[first_token + 1] && cur_token[-1].type == CPP_HASH)
1359
1360 static void
1361 lex_line (pfile, list)
1362      cpp_reader *pfile;
1363      cpp_toklist *list;
1364 {
1365   cpp_token *cur_token, *token_limit, *first;
1366   cpp_buffer *buffer = pfile->buffer;
1367   const unsigned char *cur = buffer->cur;
1368   unsigned char flags = 0;
1369   unsigned int first_token = list->tokens_used;
1370
1371   if (!(list->flags & LIST_OFFSET))
1372     (abort) ();
1373
1374   list->file = buffer->nominal_fname;
1375   list->line = CPP_BUF_LINE (buffer);
1376   pfile->col_adjust = 0;
1377   pfile->in_lex_line = 1;
1378   if (cur == buffer->buf)
1379     list->flags |= BEG_OF_FILE;
1380
1381  expanded:
1382   token_limit = list->tokens + list->tokens_cap;
1383   cur_token = list->tokens + list->tokens_used;
1384
1385   for (; cur < buffer->rlimit && cur_token < token_limit;)
1386     {
1387       unsigned char c;
1388
1389       /* Optimize non-vertical whitespace skipping; most tokens are
1390          probably separated by whitespace. (' ' '\t' '\v' '\f' '\0').  */
1391       c = *cur;
1392       if (is_nvspace (c))
1393         {
1394           buffer->cur = cur;
1395           skip_whitespace (pfile, (list->tokens[first_token].type == CPP_HASH
1396                                    && cur_token > &list->tokens[first_token]));
1397           cur = buffer->cur;
1398
1399           flags = PREV_WHITE;
1400           if (cur == buffer->rlimit)
1401             break;
1402           c = *cur;
1403         }
1404       cur++;
1405
1406       /* Initialize current token.  CPP_EOF will not be fixed up by
1407          expand_name_space.  */
1408       list->tokens_used = cur_token - list->tokens + 1;
1409       cur_token->type = CPP_EOF;
1410       cur_token->col = CPP_BUF_COLUMN (buffer, cur);
1411       cur_token->line = CPP_BUF_LINE (buffer);
1412       cur_token->flags = flags;
1413       flags = 0;
1414
1415       switch (c)
1416         {
1417         case '0': case '1': case '2': case '3': case '4':
1418         case '5': case '6': case '7': case '8': case '9':
1419           {
1420             int prev_dot;
1421
1422             cur--;              /* Backup character.  */
1423             prev_dot = PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ();
1424             if (prev_dot)
1425               cur_token--;
1426             INIT_TOKEN_STR (list, cur_token);
1427             /* Prepend an immediately previous CPP_DOT token.  */
1428             if (prev_dot)
1429               {
1430                 if (list->name_cap == list->name_used)
1431                   auto_expand_name_space (list);
1432
1433                 cur_token->val.str.len = 1;
1434                 list->namebuf[list->name_used++] = '.';
1435               }
1436
1437           continue_number:
1438             cur_token->type = CPP_NUMBER; /* Before parse_number.  */
1439             buffer->cur = cur;
1440             parse_number (pfile, list, &cur_token->val.str);
1441             cur = buffer->cur;
1442           }
1443           /* Check for # 123 form of #line.  */
1444           if (MIGHT_BE_DIRECTIVE ())
1445             list->directive = _cpp_check_linemarker (pfile, cur_token,
1446                                                      !(cur_token[-1].flags
1447                                                        & PREV_WHITE));
1448           cur_token++;
1449           break;
1450
1451         letter:
1452         case '_':
1453         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1454         case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1455         case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1456         case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1457         case 'y': case 'z':
1458         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1459         case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1460         case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1461         case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1462         case 'Y': case 'Z':
1463           cur--;                     /* Backup character.  */
1464           cur_token->val.node = 0;
1465           cur_token->type = CPP_NAME; /* Identifier, macro etc.  */
1466
1467         continue_name:
1468           cur = parse_name (pfile, cur_token, cur, buffer->rlimit);
1469
1470           if (MIGHT_BE_DIRECTIVE ())
1471             list->directive = _cpp_check_directive (pfile, cur_token,
1472                                                     !(list->tokens[0].flags
1473                                                       & PREV_WHITE));
1474           cur_token++;
1475           break;
1476
1477         case '\'':
1478         case '\"':
1479           cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
1480           /* Do we have a wide string?  */
1481           if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
1482               && cur_token[-1].val.node == pfile->spec_nodes->n_L)
1483             BACKUP_TOKEN (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
1484
1485         do_parse_string:
1486           /* Here c is one of ' " or >.  */
1487           INIT_TOKEN_STR (list, cur_token);
1488           buffer->cur = cur;
1489           parse_string (pfile, list, cur_token, c);
1490           cur = buffer->cur;
1491           cur_token++;
1492           break;
1493
1494         case '/':
1495           cur_token->type = CPP_DIV;
1496           if (IMMED_TOKEN ())
1497             {
1498               if (PREV_TOKEN_TYPE == CPP_DIV)
1499                 {
1500                   /* We silently allow C++ comments in system headers,
1501                      irrespective of conformance mode, because lots of
1502                      broken systems do that and trying to clean it up
1503                      in fixincludes is a nightmare.  */
1504                   if (CPP_IN_SYSTEM_HEADER (pfile))
1505                     goto do_line_comment;
1506                   else if (CPP_OPTION (pfile, cplusplus_comments))
1507                     {
1508                       if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1509                           && ! buffer->warned_cplusplus_comments)
1510                         {
1511                           buffer->cur = cur;
1512                           cpp_pedwarn (pfile,
1513                              "C++ style comments are not allowed in ISO C89");
1514                           cpp_pedwarn (pfile,
1515                           "(this will be reported only once per input file)");
1516                           buffer->warned_cplusplus_comments = 1;
1517                         }
1518                     do_line_comment:
1519                       buffer->cur = cur;
1520 #if 0 /* Leave until new lexer in place.  */
1521                       if (cur[-2] != c)
1522                         cpp_warning (pfile,
1523                                      "comment start split across lines");
1524 #endif
1525                       if (skip_line_comment (pfile))
1526                         cpp_warning (pfile, "multi-line comment");
1527
1528                       /* Back-up to first '-' or '/'.  */
1529                       cur_token--;
1530                       if (!CPP_OPTION (pfile, discard_comments)
1531                           && (!KNOWN_DIRECTIVE()
1532                               || (list->directive->flags & COMMENTS)))
1533                         save_comment (list, cur_token++, cur,
1534                                       buffer->cur - cur, c);
1535                       else
1536                         flags = PREV_WHITE;
1537
1538                       cur = buffer->cur;
1539                       break;
1540                     }
1541                 }
1542             }
1543           cur_token++;
1544           break;
1545
1546         case '*':
1547           cur_token->type = CPP_MULT;
1548           if (IMMED_TOKEN ())
1549             {
1550               if (PREV_TOKEN_TYPE == CPP_DIV)
1551                 {
1552                   buffer->cur = cur;
1553 #if 0 /* Leave until new lexer in place.  */
1554                   if (cur[-2] != '/')
1555                     cpp_warning (pfile,
1556                                  "comment start '/*' split across lines");
1557 #endif
1558                   if (skip_block_comment (pfile))
1559                     cpp_error_with_line (pfile, list->line, cur_token[-1].col,
1560                                          "unterminated comment");
1561 #if 0 /* Leave until new lexer in place.  */
1562                   else if (buffer->cur[-2] != '*')
1563                     cpp_warning (pfile,
1564                                  "comment end '*/' split across lines");
1565 #endif
1566                   /* Back up to opening '/'.  */
1567                   cur_token--;
1568                   if (!CPP_OPTION (pfile, discard_comments)
1569                       && (!KNOWN_DIRECTIVE()
1570                           || (list->directive->flags & COMMENTS)))
1571                     save_comment (list, cur_token++, cur,
1572                                   buffer->cur - cur, c);
1573                   else
1574                     flags = PREV_WHITE;
1575
1576                   cur = buffer->cur;
1577                   break;
1578                 }
1579               else if (CPP_OPTION (pfile, cplusplus))
1580                 {
1581                   /* In C++, there are .* and ->* operators.  */
1582                   if (PREV_TOKEN_TYPE == CPP_DEREF)
1583                     BACKUP_TOKEN (CPP_DEREF_STAR);
1584                   else if (PREV_TOKEN_TYPE == CPP_DOT)
1585                     BACKUP_TOKEN (CPP_DOT_STAR);
1586                 }
1587             }
1588           cur_token++;
1589           break;
1590
1591         case '\n':
1592         case '\r':
1593           handle_newline (cur, buffer->rlimit, c);
1594           if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
1595             {
1596               if (IMMED_TOKEN ())
1597                 {
1598                   /* Remove the escaped newline.  Then continue to process
1599                      any interrupted name or number.  */
1600                   cur_token--;
1601                   /* Backslash-newline may not be immediately followed by
1602                      EOF (C99 5.1.1.2).  */
1603                   if (cur >= buffer->rlimit)
1604                     {
1605                       cpp_pedwarn (pfile, "backslash-newline at end of file");
1606                       break;
1607                     }
1608                   if (IMMED_TOKEN ())
1609                     {
1610                       cur_token--;
1611                       if (cur_token->type == CPP_NAME)
1612                         goto continue_name;
1613                       else if (cur_token->type == CPP_NUMBER)
1614                         goto continue_number;
1615                       cur_token++;
1616                     }
1617                   /* Remember whitespace setting.  */
1618                   flags = cur_token->flags;
1619                   break;
1620                 }
1621               else
1622                 {
1623                   buffer->cur = cur;
1624                   cpp_warning (pfile,
1625                                "backslash and newline separated by space");
1626                 }
1627             }
1628           else if (MIGHT_BE_DIRECTIVE ())
1629             {
1630               /* "Null directive." C99 6.10.7: A preprocessing
1631                  directive of the form # <new-line> has no effect.
1632
1633                  But it is still a directive, and therefore disappears
1634                  from the output. */
1635               cur_token--;
1636               if (cur_token->flags & PREV_WHITE
1637                   && CPP_WTRADITIONAL (pfile))
1638                 cpp_warning (pfile, "K+R C ignores #\\n with the # indented");
1639             }
1640
1641           /* Skip vertical space until we have at least one token to
1642              return.  */
1643           if (cur_token != &list->tokens[first_token])
1644             goto out;
1645           list->line = CPP_BUF_LINE (buffer);
1646           break;
1647
1648         case '-':
1649           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
1650             REVISE_TOKEN (CPP_MINUS_MINUS);
1651           else
1652             PUSH_TOKEN (CPP_MINUS);
1653           break;
1654
1655         make_hash:
1656         case '#':
1657           /* The digraph flag checking ensures that ## and %:%:
1658              are interpreted as CPP_PASTE, but #%: and %:# are not.  */
1659           if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
1660               && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
1661             REVISE_TOKEN (CPP_PASTE);
1662           else
1663             PUSH_TOKEN (CPP_HASH);
1664           break;
1665
1666         case ':':
1667           cur_token->type = CPP_COLON;
1668           if (IMMED_TOKEN ())
1669             {
1670               if (PREV_TOKEN_TYPE == CPP_COLON
1671                   && CPP_OPTION (pfile, cplusplus))
1672                 BACKUP_TOKEN (CPP_SCOPE);
1673               else if (CPP_OPTION (pfile, digraphs))
1674                 {
1675                   /* Digraph: "<:" is a '['  */
1676                   if (PREV_TOKEN_TYPE == CPP_LESS)
1677                     BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
1678                   /* Digraph: "%:" is a '#'  */
1679                   else if (PREV_TOKEN_TYPE == CPP_MOD)
1680                     {
1681                       (--cur_token)->flags |= DIGRAPH;
1682                       goto make_hash;
1683                     }
1684                 }
1685             }
1686           cur_token++;
1687           break;
1688
1689         case '&':
1690           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
1691             REVISE_TOKEN (CPP_AND_AND);
1692           else
1693             PUSH_TOKEN (CPP_AND);
1694           break;
1695
1696         make_or:
1697         case '|':
1698           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
1699             REVISE_TOKEN (CPP_OR_OR);
1700           else
1701             PUSH_TOKEN (CPP_OR);
1702           break;
1703
1704         case '+':
1705           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
1706             REVISE_TOKEN (CPP_PLUS_PLUS);
1707           else
1708             PUSH_TOKEN (CPP_PLUS);
1709           break;
1710
1711         case '=':
1712             /* This relies on equidistance of "?=" and "?" tokens.  */
1713           if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
1714             REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
1715           else
1716             PUSH_TOKEN (CPP_EQ);
1717           break;
1718
1719         case '>':
1720           cur_token->type = CPP_GREATER;
1721           if (IMMED_TOKEN ())
1722             {
1723               if (PREV_TOKEN_TYPE == CPP_GREATER)
1724                 BACKUP_TOKEN (CPP_RSHIFT);
1725               else if (PREV_TOKEN_TYPE == CPP_MINUS)
1726                 BACKUP_TOKEN (CPP_DEREF);
1727               else if (CPP_OPTION (pfile, digraphs))
1728                 {
1729                   /* Digraph: ":>" is a ']'  */
1730                   if (PREV_TOKEN_TYPE == CPP_COLON)
1731                     BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
1732                   /* Digraph: "%>" is a '}'  */
1733                   else if (PREV_TOKEN_TYPE == CPP_MOD)
1734                     BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
1735                 }
1736             }
1737           cur_token++;
1738           break;
1739
1740         case '<':
1741           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
1742             {
1743               REVISE_TOKEN (CPP_LSHIFT);
1744               break;
1745             }
1746           /* Is this the beginning of a header name?  */
1747           if (KNOWN_DIRECTIVE () && (list->directive->flags & INCL))
1748             {
1749               c = '>';  /* Terminator.  */
1750               cur_token->type = CPP_HEADER_NAME;
1751               goto do_parse_string;
1752             }
1753           PUSH_TOKEN (CPP_LESS);
1754           break;
1755
1756         case '%':
1757           /* Digraph: "<%" is a '{'  */
1758           cur_token->type = CPP_MOD;
1759           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS
1760               && CPP_OPTION (pfile, digraphs))
1761             BACKUP_DIGRAPH (CPP_OPEN_BRACE);
1762           cur_token++;
1763           break;
1764
1765         case '?':
1766           if (cur + 1 < buffer->rlimit && *cur == '?'
1767               && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
1768             {
1769               /* Handle trigraph.  */
1770               cur++;
1771               switch (*cur++)
1772                 {
1773                 case '(': goto make_open_square;
1774                 case ')': goto make_close_square;
1775                 case '<': goto make_open_brace;
1776                 case '>': goto make_close_brace;
1777                 case '=': goto make_hash;
1778                 case '!': goto make_or;
1779                 case '-': goto make_complement;
1780                 case '/': goto make_backslash;
1781                 case '\'': goto make_xor;
1782                 }
1783             }
1784           if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
1785             {
1786               /* GNU C++ defines <? and >? operators.  */
1787               if (PREV_TOKEN_TYPE == CPP_LESS)
1788                 {
1789                   REVISE_TOKEN (CPP_MIN);
1790                   break;
1791                 }
1792               else if (PREV_TOKEN_TYPE == CPP_GREATER)
1793                 {
1794                   REVISE_TOKEN (CPP_MAX);
1795                   break;
1796                 }
1797             }
1798           PUSH_TOKEN (CPP_QUERY);
1799           break;
1800
1801         case '.':
1802           if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
1803               && IMMED_TOKEN ()
1804               && !(cur_token[-1].flags & PREV_WHITE))
1805             {
1806               cur_token -= 2;
1807               PUSH_TOKEN (CPP_ELLIPSIS);
1808             }
1809           else
1810             PUSH_TOKEN (CPP_DOT);
1811           break;
1812
1813         make_complement:
1814         case '~': PUSH_TOKEN (CPP_COMPL); break;
1815         make_xor:
1816         case '^': PUSH_TOKEN (CPP_XOR); break;
1817         make_open_brace:
1818         case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
1819         make_close_brace:
1820         case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
1821         make_open_square:
1822         case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
1823         make_close_square:
1824         case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
1825         make_backslash:
1826         case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
1827         case '!': PUSH_TOKEN (CPP_NOT); break;
1828         case ',': PUSH_TOKEN (CPP_COMMA); break;
1829         case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
1830         case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
1831         case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
1832
1833         case '$':
1834           if (CPP_OPTION (pfile, dollars_in_ident))
1835             goto letter;
1836           /* Fall through */
1837         default:
1838           cur_token->val.aux = c;
1839           PUSH_TOKEN (CPP_OTHER);
1840           break;
1841         }
1842     }
1843
1844   /* Run out of token space?  */
1845   if (cur_token == token_limit)
1846     {
1847       list->tokens_used = cur_token - list->tokens;
1848       _cpp_expand_token_space (list, 256);
1849       goto expanded;
1850     }
1851
1852   cur_token->flags = flags;
1853   if (cur_token == &list->tokens[first_token] && pfile->done_initializing)
1854     {
1855       if (cur > buffer->buf && !is_vspace (cur[-1]))
1856         cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
1857                                CPP_BUF_COLUMN (buffer, cur),
1858                                "no newline at end of file");
1859       cur_token++->type = CPP_EOF;
1860     }
1861
1862  out:
1863   /* All tokens are allocated, so the memory location is fixed.  */
1864   first = &list->tokens[first_token];
1865
1866   /* Don't complain about the null directive, nor directives in
1867      assembly source: we don't know where the comments are, and # may
1868      introduce assembler pseudo-ops.  Don't complain about invalid
1869      directives in skipped conditional groups (6.10 p4).  */
1870   if (first->type == CPP_HASH && list->directive == 0 && !pfile->skipping
1871       && cur_token > first + 1 && !CPP_OPTION (pfile, lang_asm))
1872     {
1873       if (first[1].type == CPP_NAME)
1874         cpp_error (pfile, "invalid preprocessing directive #%.*s",
1875                    (int) first[1].val.node->length, first[1].val.node->name);
1876       else
1877         cpp_error (pfile, "invalid preprocessing directive");
1878     }
1879
1880   /* Put EOF at end of known directives.  This covers "directives do
1881      not extend beyond the end of the line (description 6.10 part 2)".  */
1882   if (KNOWN_DIRECTIVE () || !pfile->done_initializing)
1883     {
1884       pfile->first_directive_token = first;
1885       cur_token++->type = CPP_EOF;
1886     }
1887
1888   /* Directives, known or not, always start a new line.  */
1889   if (first_token == 0 || list->tokens[first_token].type == CPP_HASH)
1890     first->flags |= BOL;
1891   else
1892     /* 6.10.3.10: Within the sequence of preprocessing tokens making
1893        up the invocation of a function-like macro, new line is
1894        considered a normal white-space character.  */
1895     first->flags |= PREV_WHITE;
1896
1897   buffer->cur = cur;
1898   list->tokens_used = cur_token - list->tokens;
1899   pfile->in_lex_line = 0;
1900 }
1901
1902 /* Write the spelling of a token TOKEN, with any appropriate
1903    whitespace before it, to the token_buffer.  PREV is the previous
1904    token, which is used to determine if we need to shove in an extra
1905    space in order to avoid accidental token paste.  */
1906 static void
1907 output_token (pfile, token, prev)
1908      cpp_reader *pfile;
1909      const cpp_token *token, *prev;
1910 {
1911   int dummy;
1912
1913   if (token->col && (token->flags & BOL))
1914     {
1915       /* Supply enough whitespace to put this token in its original
1916          column.  Don't bother trying to reconstruct tabs; we can't
1917          get it right in general, and nothing ought to care.  (Yes,
1918          some things do care; the fault lies with them.)  */
1919       unsigned char *buffer;
1920       unsigned int spaces = token->col - 1;
1921
1922       CPP_RESERVE (pfile, token->col);
1923       buffer = pfile->limit;
1924
1925       while (spaces--)
1926         *buffer++ = ' ';
1927       pfile->limit = buffer;
1928     }
1929   else if (token->flags & PREV_WHITE)
1930     CPP_PUTC (pfile, ' ');
1931   else if (prev)
1932     {
1933       /* Check for and prevent accidental token pasting.  */
1934       if (can_paste (pfile, prev, token, &dummy) != CPP_EOF)
1935         CPP_PUTC (pfile, ' ');
1936       /* can_paste doesn't catch all the accidental pastes.
1937          Consider a + ++b - if there is not a space between the + and ++, it
1938          will be misparsed as a++ + b.  */
1939       else if ((prev->type == CPP_PLUS && token->type == CPP_PLUS_PLUS)
1940                || (prev->type == CPP_MINUS && token->type == CPP_MINUS_MINUS))
1941         CPP_PUTC (pfile, ' ');
1942     }
1943
1944   CPP_RESERVE (pfile, TOKEN_LEN (token));
1945   pfile->limit = spell_token (pfile, token, pfile->limit);
1946 }
1947
1948 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1949    already contain the enough space to hold the token's spelling.
1950    Returns a pointer to the character after the last character
1951    written.  */
1952
1953 static unsigned char *
1954 spell_token (pfile, token, buffer)
1955      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1956      const cpp_token *token;
1957      unsigned char *buffer;
1958 {
1959   switch (token_spellings[token->type].type)
1960     {
1961     case SPELL_OPERATOR:
1962       {
1963         const unsigned char *spelling;
1964         unsigned char c;
1965
1966         if (token->flags & DIGRAPH)
1967           spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1968         else
1969           spelling = token_spellings[token->type].spelling;
1970
1971         while ((c = *spelling++) != '\0')
1972           *buffer++ = c;
1973       }
1974       break;
1975
1976     case SPELL_IDENT:
1977       memcpy (buffer, token->val.node->name, token->val.node->length);
1978       buffer += token->val.node->length;
1979       break;
1980
1981     case SPELL_STRING:
1982       {
1983         if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
1984           *buffer++ = 'L';
1985
1986         if (token->type == CPP_STRING || token->type == CPP_WSTRING)
1987           *buffer++ = '"';
1988         if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
1989           *buffer++ = '\'';
1990
1991         memcpy (buffer, token->val.str.text, token->val.str.len);
1992         buffer += token->val.str.len;
1993
1994         if (token->type == CPP_STRING || token->type == CPP_WSTRING)
1995           *buffer++ = '"';
1996         if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
1997           *buffer++ = '\'';
1998       }
1999       break;
2000
2001     case SPELL_CHAR:
2002       *buffer++ = token->val.aux;
2003       break;
2004
2005     case SPELL_NONE:
2006       cpp_ice (pfile, "Unspellable token %s", token_names[token->type]);
2007       break;
2008     }
2009
2010   return buffer;
2011 }
2012
2013 /* Return the spelling of a token known to be an operator.
2014    Does not distinguish digraphs from their counterparts.  */
2015 const unsigned char *
2016 _cpp_spell_operator (type)
2017      enum cpp_ttype type;
2018 {
2019   if (token_spellings[type].type == SPELL_OPERATOR)
2020     return token_spellings[type].spelling;
2021   else
2022     return token_names[type];
2023 }
2024
2025
2026 /* Macro expansion algorithm.
2027
2028 Macro expansion is implemented by a single-pass algorithm; there are
2029 no rescan passes involved.  cpp_get_token expands just enough to be
2030 able to return a token to the caller, a consequence is that when it
2031 returns the preprocessor can be in a state of mid-expansion.  The
2032 algorithm does not work by fully expanding a macro invocation into
2033 some kind of token list, and then returning them one by one.
2034
2035 Our expansion state is recorded in a context stack.  We start out with
2036 a single context on the stack, let's call it base context.  This
2037 consists of the token list returned by lex_line that forms the next
2038 logical line in the source file.
2039
2040 The current level in the context stack is stored in the cur_context
2041 member of the cpp_reader structure.  The context it references keeps,
2042 amongst other things, a count of how many tokens form that context and
2043 our position within those tokens.
2044
2045 Fundamentally, calling cpp_get_token will return the next token from
2046 the current context.  If we're at the end of the current context, that
2047 context is popped from the stack first, unless it is the base context,
2048 in which case the next logical line is lexed from the source file.
2049
2050 However, before returning the token, if it is a CPP_NAME token
2051 _cpp_get_token checks to see if it is a macro and if it is enabled.
2052 Each time it encounters a macro name, it calls push_macro_context.
2053 This function checks that the macro should be expanded (with
2054 is_macro_enabled), and if so pushes a new macro context on the stack
2055 which becomes the current context.  It then loops back to read the
2056 first token of the macro context.
2057
2058 A macro context basically consists of the token list representing the
2059 macro's replacement list, which was saved in the hash table by
2060 save_macro_expansion when its #define statement was parsed.  If the
2061 macro is function-like, it also contains the tokens that form the
2062 arguments to the macro.  I say more about macro arguments below, but
2063 for now just saying that each argument is a set of pointers to tokens
2064 is enough.
2065
2066 When taking tokens from a macro context, we may get a CPP_MACRO_ARG
2067 token.  This represents an argument passed to the macro, with the
2068 argument number stored in the token's AUX field.  The argument should
2069 be substituted, this is achieved by pushing an "argument context".  An
2070 argument context is just refers to the tokens forming the argument,
2071 which are obtained directly from the macro context.  The STRINGIFY
2072 flag on a CPP_MACRO_ARG token indicates that the argument should be
2073 stringified.
2074
2075 Here's a few simple rules the context stack obeys:-
2076
2077   1) The lex_line token list is always context zero.
2078
2079   2) Context 1, if it exists, must be a macro context.
2080
2081   3) An argument context can only appear above a macro context.
2082
2083   4) A macro context can appear above the base context, another macro
2084   context, or an argument context.
2085
2086   5) These imply that the minimal level of an argument context is 2.
2087
2088 The only tricky thing left is ensuring that macros are enabled and
2089 disabled correctly.  The algorithm controls macro expansion by the
2090 level of the context a token is taken from in the context stack.  If a
2091 token is taken from a level equal to no_expand_level (a member of
2092 struct cpp_reader), no expansion is performed.
2093
2094 When popping a context off the stack, if no_expand_level equals the
2095 level of the popped context, it is reduced by one to match the new
2096 context level, so that expansion is still disabled.  It does not
2097 increase if a context is pushed, though.  It starts out life as
2098 UINT_MAX, which has the effect that initially macro expansion is
2099 enabled.  I explain how this mechanism works below.
2100
2101 The standard requires:-
2102
2103   1) Arguments to be fully expanded before substitution.
2104
2105   2) Stringified arguments to not be expanded, nor the tokens
2106   immediately surrounding a ## operator.
2107
2108   3) Continual rescanning until there are no more macros left to
2109   replace.
2110
2111   4) Once a macro has been expanded in stage 1) or 3), it cannot be
2112   expanded again during later rescans.  This prevents infinite
2113   recursion.
2114
2115 The first thing to observe is that stage 3) is mostly redundant.
2116 Since a macro is disabled once it has been expanded, how can a rescan
2117 find an unexpanded macro name?  There are only two cases where this is
2118 possible:-
2119
2120   a) If the macro name results from a token paste operation.
2121
2122   b) If the macro in question is a function-like macro that hasn't
2123   already been expanded because previously there was not the required
2124   '(' token immediately following it.  This is only possible when an
2125   argument is substituted, and after substitution the last token of
2126   the argument can bind with a parenthesis appearing in the tokens
2127   following the substitution.  Note that if the '(' appears within the
2128   argument, the ')' must too, as expanding macro arguments cannot
2129   "suck in" tokens outside the argument.
2130
2131 So we tackle this as follows.  When parsing the macro invocation for
2132 arguments, we record the tokens forming each argument as a list of
2133 pointers to those tokens.  We do not expand any tokens that are "raw",
2134 i.e. directly from the macro invocation, but other tokens that come
2135 from (nested) argument substitution are fully expanded.
2136
2137 This is achieved by setting the no_expand_level to that of the macro
2138 invocation.  A CPP_MACRO_ARG token never appears in the list of tokens
2139 forming an argument, because parse_args (indirectly) calls
2140 get_raw_token which automatically pushes argument contexts and traces
2141 into them.  Since these contexts are at a higher level than the
2142 no_expand_level, they get fully macro expanded.
2143
2144 "Raw" and non-raw tokens are separated in arguments by null pointers,
2145 with the policy that the initial state of an argument is raw.  If the
2146 first token is not raw, it should be preceded by a null pointer.  When
2147 tracing through the tokens of an argument context, each time
2148 get_raw_token encounters a null pointer, it toggles the flag
2149 CONTEXT_RAW.
2150
2151 This flag, when set, indicates to is_macro_disabled that we are
2152 reading raw tokens which should be macro-expanded.  Similarly, if
2153 clear, is_macro_disabled suppresses re-expansion.
2154
2155 It's probably time for an example.
2156
2157 #define hash #
2158 #define str(x) #x
2159 #define xstr(y) str(y hash)
2160 str(hash)                       // "hash"
2161 xstr(hash)                      // "# hash"
2162
2163 In the invocation of str, parse_args turns off macro expansion and so
2164 parses the argument as <hash>.  This is the only token (pointer)
2165 passed as the argument to str.  Since <hash> is raw there is no need
2166 for an initial null pointer.  stringify_arg is called from
2167 get_raw_token when tracing through the expansion of str, since the
2168 argument has the STRINGIFY flag set.  stringify_arg turns off
2169 macro_expansion by setting the no_expand_level to that of the argument
2170 context.  Thus it gets the token <hash> and stringifies it to "hash"
2171 correctly.
2172
2173 Similary xstr is passed <hash>.  However, when parse_args is parsing
2174 the invocation of str() in xstr's expansion, get_raw_token encounters
2175 a CPP_MACRO_ARG token for y.  Transparently to parse_args, it pushes
2176 an argument context, and enters the tokens of the argument,
2177 i.e. <hash>.  This is at a higher context level than parse_args
2178 disabled, and so is_macro_disabled permits expansion of it and a macro
2179 context is pushed on top of the argument context.  This contains the
2180 <#> token, and the end result is that <hash> is macro expanded.
2181 However, after popping off the argument context, the <hash> of xstr's
2182 expansion does not get macro expanded because we're back at the
2183 no_expand_level.  The end result is that the argument passed to str is
2184 <NULL> <#> <NULL> <hash>.  Note the nulls - policy is we start off
2185 raw, <#> is not raw, but then <hash> is.
2186
2187 */
2188
2189
2190 /* Free the storage allocated for macro arguments.  */
2191 static void
2192 free_macro_args (args)
2193      macro_args *args;
2194 {
2195   if (args->tokens)
2196     free ((PTR) args->tokens);
2197   free (args->ends);
2198   free (args);
2199 }
2200
2201 /* Determines if a macro has been already used (and is therefore
2202    disabled).  */
2203 static int
2204 is_macro_disabled (pfile, expansion, token)
2205      cpp_reader *pfile;
2206      const cpp_toklist *expansion;
2207      const cpp_token *token;
2208 {
2209   cpp_context *context = CURRENT_CONTEXT (pfile);
2210
2211   /* Don't expand anything if this file has already been preprocessed.  */
2212   if (CPP_OPTION (pfile, preprocessed))
2213     return 1;
2214
2215   /* Arguments on either side of ## are inserted in place without
2216      macro expansion (6.10.3.3.2).  Conceptually, any macro expansion
2217      occurs during a later rescan pass.  The effect is that we expand
2218      iff we would as part of the macro's expansion list, so we should
2219      drop to the macro's context.  */
2220   if (IS_ARG_CONTEXT (context))
2221     {
2222       if (token->flags & PASTED)
2223         context--;
2224       else if (!(context->flags & CONTEXT_RAW))
2225         return 1;
2226       else if (context->flags & (CONTEXT_PASTEL | CONTEXT_PASTER))
2227         context--;
2228     }
2229
2230   /* Have we already used this macro?  */
2231   while (context->level > 0)
2232     {
2233       if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2234         return 1;
2235       /* Raw argument tokens are judged based on the token list they
2236          came from.  */
2237       if (context->flags & CONTEXT_RAW)
2238         context = pfile->contexts + context->level;
2239       else
2240         context--;
2241     }
2242
2243   /* Function-like macros may be disabled if the '(' is not in the
2244      current context.  We check this without disrupting the context
2245      stack.  */
2246   if (expansion->paramc >= 0)
2247     {
2248       const cpp_token *next;
2249       unsigned int prev_nme;
2250
2251       context = CURRENT_CONTEXT (pfile);
2252       /* Drop down any contexts we're at the end of: the '(' may
2253          appear in lower macro expansions, or in the rest of the file.  */
2254       while (context->posn == context->count && context > pfile->contexts)
2255         {
2256           context--;
2257           /* If we matched, we are disabled, as we appear in the
2258              expansion of each macro we meet.  */
2259           if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2260             return 1;
2261         }
2262
2263       prev_nme = pfile->no_expand_level;
2264       pfile->no_expand_level = context - pfile->contexts;
2265       next = _cpp_get_token (pfile);
2266       restore_macro_expansion (pfile, prev_nme);
2267       if (next->type != CPP_OPEN_PAREN)
2268         {
2269           _cpp_push_token (pfile, next);
2270           if (CPP_WTRADITIONAL (pfile))
2271             cpp_warning (pfile,
2272          "function macro %.*s must be used with arguments in traditional C",
2273                          (int) token->val.node->length, token->val.node->name);
2274           return 1;
2275         }
2276     }
2277
2278   return 0;
2279 }
2280
2281 /* Add a token to the set of tokens forming the arguments to the macro
2282    being parsed in parse_args.  */
2283 static void
2284 save_token (args, token)
2285      macro_args *args;
2286      const cpp_token *token;
2287 {
2288   if (args->used == args->capacity)
2289     {
2290       args->capacity += args->capacity + 100;
2291       args->tokens = (const cpp_token **)
2292         xrealloc ((PTR) args->tokens,
2293                   args->capacity * sizeof (const cpp_token *));
2294     }
2295   args->tokens[args->used++] = token;
2296 }
2297
2298 /* Take and save raw tokens until we finish one argument.  Empty
2299    arguments are saved as a single CPP_PLACEMARKER token.  */
2300 static const cpp_token *
2301 parse_arg (pfile, var_args, paren_context, args, pcount)
2302      cpp_reader *pfile;
2303      int var_args;
2304      unsigned int paren_context;
2305      macro_args *args;
2306      unsigned int *pcount;
2307 {
2308   const cpp_token *token;
2309   unsigned int paren = 0, count = 0;
2310   int raw, was_raw = 1;
2311
2312   for (count = 0;; count++)
2313     {
2314       token = _cpp_get_token (pfile);
2315
2316       switch (token->type)
2317         {
2318         default:
2319           break;
2320
2321         case CPP_OPEN_PAREN:
2322           paren++;
2323           break;
2324
2325         case CPP_CLOSE_PAREN:
2326           if (paren-- != 0)
2327             break;
2328           goto out;
2329
2330         case CPP_COMMA:
2331           /* Commas are not terminators within parantheses or var_args.  */
2332           if (paren || var_args)
2333             break;
2334           goto out;
2335
2336         case CPP_EOF:           /* Error reported by caller.  */
2337           goto out;
2338         }
2339
2340       raw = pfile->cur_context <= paren_context;
2341       if (raw != was_raw)
2342         {
2343           was_raw = raw;
2344           save_token (args, 0);
2345           count++;
2346         }
2347       save_token (args, token);
2348     }
2349
2350  out:
2351   if (count == 0)
2352     {
2353       /* Duplicate the placemarker.  Then we can set its flags and
2354          position and safely be using more than one.  */
2355       save_token (args, duplicate_token (pfile, &placemarker_token));
2356       count++;
2357     }
2358
2359   *pcount = count;
2360   return token;
2361 }
2362
2363 /* This macro returns true if the argument starting at offset O of arglist
2364    A is empty - that is, it's either a single PLACEMARKER token, or a null
2365    pointer followed by a PLACEMARKER.  */
2366
2367 #define empty_argument(A, O) \
2368  ((A)->tokens[O] ? (A)->tokens[O]->type == CPP_PLACEMARKER \
2369                  : (A)->tokens[(O)+1]->type == CPP_PLACEMARKER)
2370
2371 /* Parse the arguments making up a macro invocation.  Nested arguments
2372    are automatically macro expanded, but immediate macros are not
2373    expanded; this enables e.g. operator # to work correctly.  Returns
2374    non-zero on error.  */
2375 static int
2376 parse_args (pfile, hp, args)
2377      cpp_reader *pfile;
2378      cpp_hashnode *hp;
2379      macro_args *args;
2380 {
2381   const cpp_token *token;
2382   const cpp_toklist *macro;
2383   unsigned int total = 0;
2384   unsigned int paren_context = pfile->cur_context;
2385   int argc = 0;
2386
2387   macro = hp->value.expansion;
2388   do
2389     {
2390       unsigned int count;
2391
2392       token = parse_arg (pfile, (argc + 1 == macro->paramc
2393                                  && (macro->flags & VAR_ARGS)),
2394                          paren_context, args, &count);
2395       if (argc < macro->paramc)
2396         {
2397           total += count;
2398           args->ends[argc] = total;
2399         }
2400       argc++;
2401     }
2402   while (token->type != CPP_CLOSE_PAREN && token->type != CPP_EOF);
2403
2404   if (token->type == CPP_EOF)
2405     {
2406       cpp_error (pfile, "unterminated invocation of macro \"%.*s\"",
2407                  hp->length, hp->name);
2408       return 1;
2409     }
2410   else if (argc < macro->paramc)
2411     {
2412       /* A rest argument is allowed to not appear in the invocation at all.
2413          e.g. #define debug(format, args...) ...
2414          debug("string");
2415          This is exactly the same as if the rest argument had received no
2416          tokens - debug("string",);  This extension is deprecated.  */
2417
2418       if (argc + 1 == macro->paramc && (macro->flags & GNU_REST_ARGS))
2419         {
2420           /* Duplicate the placemarker.  Then we can set its flags and
2421              position and safely be using more than one.  */
2422           save_token (args, duplicate_token (pfile, &placemarker_token));
2423           args->ends[argc] = total + 1;
2424           return 0;
2425         }
2426       else
2427         {
2428           cpp_error (pfile,
2429                      "insufficient arguments in invocation of macro \"%.*s\"",
2430                      hp->length, hp->name);
2431           return 1;
2432         }
2433     }
2434   /* An empty argument to an empty function-like macro is fine.  */
2435   else if (argc > macro->paramc
2436            && !(macro->paramc == 0 && argc == 1 && empty_argument (args, 0)))
2437     {
2438       cpp_error (pfile,
2439                  "too many arguments in invocation of macro \"%.*s\"",
2440                  hp->length, hp->name);
2441       return 1;
2442     }
2443
2444   return 0;
2445 }
2446
2447 /* Adds backslashes before all backslashes and double quotes appearing
2448    in strings.  Non-printable characters are converted to octal.  */
2449 static U_CHAR *
2450 quote_string (dest, src, len)
2451      U_CHAR *dest;
2452      const U_CHAR *src;
2453      unsigned int len;
2454 {
2455   while (len--)
2456     {
2457       U_CHAR c = *src++;
2458
2459       if (c == '\\' || c == '"')
2460         {
2461           *dest++ = '\\';
2462           *dest++ = c;
2463         }
2464       else
2465         {
2466           if (ISPRINT (c))
2467             *dest++ = c;
2468           else
2469             {
2470               sprintf ((char *) dest, "\\%03o", c);
2471               dest += 4;
2472             }
2473         }
2474     }
2475
2476   return dest;
2477 }
2478
2479 /* Allocates a buffer to hold a token's TEXT, and converts TOKEN to a
2480    CPP_STRING token containing TEXT in quoted form.  */
2481 static cpp_token *
2482 make_string_token (token, text, len)
2483      cpp_token *token;
2484      const U_CHAR *text;
2485      unsigned int len;
2486 {
2487   U_CHAR *buf;
2488
2489   buf = (U_CHAR *) xmalloc (len * 4);
2490   token->type = CPP_STRING;
2491   token->flags = 0;
2492   token->val.str.text = buf;
2493   token->val.str.len = quote_string (buf, text, len) - buf;
2494   return token;
2495 }
2496
2497 /* Allocates and converts a temporary token to a CPP_NUMBER token,
2498    evaluating to NUMBER.  */
2499 static cpp_token *
2500 alloc_number_token (pfile, number)
2501      cpp_reader *pfile;
2502      int number;
2503 {
2504   cpp_token *result;
2505   char *buf;
2506
2507   result = get_temp_token (pfile);
2508   buf = xmalloc (20);
2509   sprintf (buf, "%d", number);
2510
2511   result->type = CPP_NUMBER;
2512   result->flags = 0;
2513   result->val.str.text = (U_CHAR *) buf;
2514   result->val.str.len = strlen (buf);
2515   return result;
2516 }
2517
2518 /* Returns a temporary token from the temporary token store of PFILE.  */
2519 static cpp_token *
2520 get_temp_token (pfile)
2521      cpp_reader *pfile;
2522 {
2523   if (pfile->temp_used == pfile->temp_alloced)
2524     {
2525       if (pfile->temp_used == pfile->temp_cap)
2526         {
2527           pfile->temp_cap += pfile->temp_cap + 20;
2528           pfile->temp_tokens = (cpp_token **) xrealloc
2529             (pfile->temp_tokens, pfile->temp_cap * sizeof (cpp_token *));
2530         }
2531       pfile->temp_tokens[pfile->temp_alloced++] = (cpp_token *) xmalloc
2532         (sizeof (cpp_token));
2533     }
2534
2535   return pfile->temp_tokens[pfile->temp_used++];
2536 }
2537
2538 /* Release (not free) for re-use the temporary tokens of PFILE.  */
2539 static void
2540 release_temp_tokens (pfile)
2541      cpp_reader *pfile;
2542 {
2543   while (pfile->temp_used)
2544     {
2545       cpp_token *token = pfile->temp_tokens[--pfile->temp_used];
2546
2547       if (token_spellings[token->type].type == SPELL_STRING)
2548         {
2549           free ((char *) token->val.str.text);
2550           token->val.str.text = 0;
2551         }
2552     }
2553 }
2554
2555 /* Free all of PFILE's dynamically-allocated temporary tokens.  */
2556 void
2557 _cpp_free_temp_tokens (pfile)
2558      cpp_reader *pfile;
2559 {
2560   if (pfile->temp_tokens)
2561     {
2562       /* It is possible, though unlikely (looking for '(' of a funlike
2563          macro into EOF), that we haven't released the tokens yet.  */
2564       release_temp_tokens (pfile);
2565       while (pfile->temp_alloced)
2566         free (pfile->temp_tokens[--pfile->temp_alloced]);
2567       free (pfile->temp_tokens);
2568     }
2569
2570   if (pfile->date)
2571     {
2572       free ((char *) pfile->date->val.str.text);
2573       free (pfile->date);
2574       free ((char *) pfile->time->val.str.text);
2575       free (pfile->time);
2576     }
2577 }
2578
2579 /* Copy TOKEN into a temporary token from PFILE's store.  */
2580 static cpp_token *
2581 duplicate_token (pfile, token)
2582      cpp_reader *pfile;
2583      const cpp_token *token;
2584 {
2585   cpp_token *result = get_temp_token (pfile);
2586
2587   *result = *token;
2588   if (token_spellings[token->type].type == SPELL_STRING)
2589     {
2590       U_CHAR *buff = (U_CHAR *) xmalloc (token->val.str.len);
2591       memcpy (buff, token->val.str.text, token->val.str.len);
2592       result->val.str.text = buff;
2593     }
2594   return result;
2595 }
2596
2597 /* Determine whether two tokens can be pasted together, and if so,
2598    what the resulting token is.  Returns CPP_EOF if the tokens cannot
2599    be pasted, or the appropriate type for the merged token if they
2600    can.  */
2601 static enum cpp_ttype
2602 can_paste (pfile, token1, token2, digraph)
2603      cpp_reader * pfile;
2604      const cpp_token *token1, *token2;
2605      int* digraph;
2606 {
2607   enum cpp_ttype a = token1->type, b = token2->type;
2608   int cxx = CPP_OPTION (pfile, cplusplus);
2609
2610   if (a <= CPP_LAST_EQ && b == CPP_EQ)
2611     return a + (CPP_EQ_EQ - CPP_EQ);
2612
2613   switch (a)
2614     {
2615     case CPP_GREATER:
2616       if (b == a) return CPP_RSHIFT;
2617       if (b == CPP_QUERY && cxx)        return CPP_MAX;
2618       if (b == CPP_GREATER_EQ)  return CPP_RSHIFT_EQ;
2619       break;
2620     case CPP_LESS:
2621       if (b == a) return CPP_LSHIFT;
2622       if (b == CPP_QUERY && cxx)        return CPP_MIN;
2623       if (b == CPP_LESS_EQ)     return CPP_LSHIFT_EQ;
2624       if (CPP_OPTION (pfile, digraphs))
2625         {
2626           if (b == CPP_COLON)
2627             {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
2628           if (b == CPP_MOD)
2629             {*digraph = 1; return CPP_OPEN_BRACE;}      /* <% digraph */
2630         }
2631       break;
2632
2633     case CPP_PLUS: if (b == a)  return CPP_PLUS_PLUS; break;
2634     case CPP_AND:  if (b == a)  return CPP_AND_AND; break;
2635     case CPP_OR:   if (b == a)  return CPP_OR_OR;   break;
2636
2637     case CPP_MINUS:
2638       if (b == a)               return CPP_MINUS_MINUS;
2639       if (b == CPP_GREATER)     return CPP_DEREF;
2640       break;
2641     case CPP_COLON:
2642       if (b == a && cxx)        return CPP_SCOPE;
2643       if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
2644         {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
2645       break;
2646
2647     case CPP_MOD:
2648       if (CPP_OPTION (pfile, digraphs))
2649         {
2650           if (b == CPP_GREATER)
2651             {*digraph = 1; return CPP_CLOSE_BRACE;}  /* %> digraph */
2652           if (b == CPP_COLON)
2653             {*digraph = 1; return CPP_HASH;}         /* %: digraph */
2654         }
2655       break;
2656     case CPP_DEREF:
2657       if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
2658       break;
2659     case CPP_DOT:
2660       if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
2661       if (b == CPP_NUMBER)      return CPP_NUMBER;
2662       break;
2663
2664     case CPP_HASH:
2665       if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
2666         /* %:%: digraph */
2667         {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
2668       break;
2669
2670     case CPP_NAME:
2671       if (b == CPP_NAME)        return CPP_NAME;
2672       if (b == CPP_NUMBER
2673           && is_numstart(token2->val.str.text[0]))       return CPP_NAME;
2674       if (b == CPP_CHAR
2675           && token1->val.node == pfile->spec_nodes->n_L) return CPP_WCHAR;
2676       if (b == CPP_STRING
2677           && token1->val.node == pfile->spec_nodes->n_L) return CPP_WSTRING;
2678       break;
2679
2680     case CPP_NUMBER:
2681       if (b == CPP_NUMBER)      return CPP_NUMBER;
2682       if (b == CPP_NAME)        return CPP_NUMBER;
2683       if (b == CPP_DOT)         return CPP_NUMBER;
2684       /* Numbers cannot have length zero, so this is safe.  */
2685       if ((b == CPP_PLUS || b == CPP_MINUS)
2686           && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
2687         return CPP_NUMBER;
2688       break;
2689
2690     default:
2691       break;
2692     }
2693
2694   return CPP_EOF;
2695 }
2696
2697 /* Check if TOKEN is to be ##-pasted with the token after it.  */
2698 static const cpp_token *
2699 maybe_paste_with_next (pfile, token)
2700      cpp_reader *pfile;
2701      const cpp_token *token;
2702 {
2703   cpp_token *pasted;
2704   const cpp_token *second;
2705   cpp_context *context = CURRENT_CONTEXT (pfile);
2706
2707   /* Is this token on the LHS of ## ? */
2708
2709   while ((token->flags & PASTE_LEFT)
2710          || ((context->flags & CONTEXT_PASTEL)
2711              && context->posn == context->count))
2712     {
2713       /* Suppress macro expansion for next token, but don't conflict
2714          with the other method of suppression.  If it is an argument,
2715          macro expansion within the argument will still occur.  */
2716       pfile->paste_level = pfile->cur_context;
2717       second = _cpp_get_token (pfile);
2718       pfile->paste_level = 0;
2719
2720       /* Ignore placemarker argument tokens (cannot be from an empty
2721          macro since macros are not expanded).  */
2722       if (token->type == CPP_PLACEMARKER)
2723         pasted = duplicate_token (pfile, second);
2724       else if (second->type == CPP_PLACEMARKER)
2725         {
2726           cpp_context *mac_context = CURRENT_CONTEXT (pfile) - 1;
2727           /* GCC has special extended semantics for a ## b where b is
2728              a varargs parameter: a disappears if b consists of no
2729              tokens.  This extension is deprecated.  */
2730           if ((mac_context->u.list->flags & GNU_REST_ARGS)
2731               && (mac_context->u.list->tokens[mac_context->posn-1].val.aux + 1
2732                   == (unsigned) mac_context->u.list->paramc))
2733             {
2734               cpp_warning (pfile, "deprecated GNU ## extension used");
2735               pasted = duplicate_token (pfile, second);
2736             }
2737           else
2738             pasted = duplicate_token (pfile, token);
2739         }
2740       else
2741         {
2742           int digraph = 0;
2743           enum cpp_ttype type = can_paste (pfile, token, second, &digraph);
2744
2745           if (type == CPP_EOF)
2746             {
2747               if (CPP_OPTION (pfile, warn_paste))
2748                 cpp_warning (pfile,
2749                         "pasting would not give a valid preprocessing token");
2750               _cpp_push_token (pfile, second);
2751               return token;
2752             }
2753
2754           if (type == CPP_NAME || type == CPP_NUMBER)
2755             {
2756               /* Join spellings.  */
2757               U_CHAR *buf, *end;
2758
2759               pasted = get_temp_token (pfile);
2760               buf = (U_CHAR *) alloca (TOKEN_LEN (token) + TOKEN_LEN (second));
2761               end = spell_token (pfile, token, buf);
2762               end = spell_token (pfile, second, end);
2763               *end = '\0';
2764
2765               if (type == CPP_NAME)
2766                 pasted->val.node = cpp_lookup (pfile, buf, end - buf);
2767               else
2768                 {
2769                   pasted->val.str.text = uxstrdup (buf);
2770                   pasted->val.str.len = end - buf;
2771                 }
2772             }
2773           else if (type == CPP_WCHAR || type == CPP_WSTRING)
2774             pasted = duplicate_token (pfile, second);
2775           else
2776             {
2777               pasted = get_temp_token (pfile);
2778               pasted->val.integer = 0;
2779             }
2780
2781           pasted->type = type;
2782           pasted->flags = digraph ? DIGRAPH : 0;
2783         }
2784
2785       /* The pasted token gets the whitespace flags and position of the
2786          first token, the PASTE_LEFT flag of the second token, plus the
2787          PASTED flag to indicate it is the result of a paste.  However, we
2788          want to preserve the DIGRAPH flag.  */
2789       pasted->flags &= ~(PREV_WHITE | BOL | PASTE_LEFT);
2790       pasted->flags |= ((token->flags & (PREV_WHITE | BOL))
2791                         | (second->flags & PASTE_LEFT) | PASTED);
2792       pasted->col = token->col;
2793       pasted->line = token->line;
2794
2795       /* See if there is another token to be pasted onto the one we just
2796          constructed.  */
2797       token = pasted;
2798       context = CURRENT_CONTEXT (pfile);
2799       /* and loop */
2800     }
2801   return token;
2802 }
2803
2804 /* Convert a token sequence to a single string token according to the
2805    rules of the ISO C #-operator.  */
2806 #define INIT_SIZE 200
2807 static cpp_token *
2808 stringify_arg (pfile, token)
2809      cpp_reader *pfile;
2810      const cpp_token *token;
2811 {
2812   cpp_token *result;
2813   unsigned char *main_buf;
2814   unsigned int prev_value, backslash_count = 0;
2815   unsigned int buf_used = 0, whitespace = 0, buf_cap = INIT_SIZE;
2816
2817   push_arg_context (pfile, token);
2818   prev_value  = prevent_macro_expansion (pfile);
2819   main_buf = (unsigned char *) xmalloc (buf_cap);
2820
2821   result = get_temp_token (pfile);
2822   ASSIGN_FLAGS_AND_POS (result, token);
2823
2824   for (; (token = _cpp_get_token (pfile))->type != CPP_EOF; )
2825     {
2826       int escape;
2827       unsigned char *buf;
2828       unsigned int len = TOKEN_LEN (token);
2829
2830       escape = (token->type == CPP_STRING || token->type == CPP_WSTRING
2831                 || token->type == CPP_CHAR || token->type == CPP_WCHAR);
2832       if (escape)
2833         len *= 4 + 1;
2834
2835       if (buf_used + len > buf_cap)
2836         {
2837           buf_cap = buf_used + len + INIT_SIZE;
2838           main_buf = xrealloc (main_buf, buf_cap);
2839         }
2840
2841       if (whitespace && (token->flags & PREV_WHITE))
2842         main_buf[buf_used++] = ' ';
2843
2844       if (escape)
2845         buf = (unsigned char *) xmalloc (len);
2846       else
2847         buf = main_buf + buf_used;
2848
2849       len = spell_token (pfile, token, buf) - buf;
2850       if (escape)
2851         {
2852           buf_used = quote_string (&main_buf[buf_used], buf, len) - main_buf;
2853           free (buf);
2854         }
2855       else
2856         buf_used += len;
2857
2858       whitespace = 1;
2859       if (token->type == CPP_BACKSLASH)
2860         backslash_count++;
2861       else
2862         backslash_count = 0;
2863     }
2864
2865   /* Ignore the final \ of invalid string literals.  */
2866   if (backslash_count & 1)
2867     {
2868       cpp_warning (pfile, "invalid string literal, ignoring final '\\'");
2869       buf_used--;
2870     }
2871
2872   result->type = CPP_STRING;
2873   result->val.str.text = main_buf;
2874   result->val.str.len = buf_used;
2875   restore_macro_expansion (pfile, prev_value);
2876   return result;
2877 }
2878
2879 /* Allocate more room on the context stack of PFILE.  */
2880 static void
2881 expand_context_stack (pfile)
2882      cpp_reader *pfile;
2883 {
2884   pfile->context_cap += pfile->context_cap + 20;
2885   pfile->contexts = (cpp_context *)
2886     xrealloc (pfile->contexts, pfile->context_cap * sizeof (cpp_context));
2887 }
2888
2889 /* Push the context of macro NODE onto the context stack.  TOKEN is
2890    the CPP_NAME token invoking the macro.  */
2891 static int
2892 push_macro_context (pfile, token)
2893      cpp_reader *pfile;
2894      const cpp_token *token;
2895 {
2896   unsigned char orig_flags;
2897   macro_args *args;
2898   cpp_context *context;
2899   cpp_hashnode *node = token->val.node;
2900
2901   /* Token's flags may change when parsing args containing a nested
2902      invocation of this macro.  */
2903   orig_flags = token->flags & (PREV_WHITE | BOL);
2904   args = 0;
2905   if (node->value.expansion->paramc >= 0)
2906     {
2907       unsigned int error, prev_nme;
2908
2909       /* Allocate room for the argument contexts, and parse them.  */
2910       args  = (macro_args *) xmalloc (sizeof (macro_args));
2911       args->ends = (unsigned int *)
2912         xmalloc (node->value.expansion->paramc * sizeof (unsigned int));
2913       args->tokens = 0;
2914       args->capacity = 0;
2915       args->used = 0;
2916       args->level = pfile->cur_context;
2917
2918       prev_nme = prevent_macro_expansion (pfile);
2919       pfile->args = args;
2920       error = parse_args (pfile, node, args);
2921       pfile->args = 0;
2922       restore_macro_expansion (pfile, prev_nme);
2923       if (error)
2924         {
2925           free_macro_args (args);
2926           return 1;
2927         }
2928     }
2929
2930   /* Now push its context.  */
2931   pfile->cur_context++;
2932   if (pfile->cur_context == pfile->context_cap)
2933     expand_context_stack (pfile);
2934
2935   context = CURRENT_CONTEXT (pfile);
2936   context->u.list = node->value.expansion;
2937   context->args = args;
2938   context->posn = 0;
2939   context->count = context->u.list->tokens_used;
2940   context->level = pfile->cur_context;
2941   context->flags = 0;
2942   context->pushed_token = 0;
2943
2944   /* Set the flags of the first token.  We know there must
2945      be one, empty macros are a single placemarker token.  */
2946   MODIFY_FLAGS_AND_POS (&context->u.list->tokens[0], token, orig_flags);
2947
2948   return 0;
2949 }
2950
2951 /* Push an argument to the current macro onto the context stack.
2952    TOKEN is the MACRO_ARG token representing the argument expansion.  */
2953 static void
2954 push_arg_context (pfile, token)
2955      cpp_reader *pfile;
2956      const cpp_token *token;
2957 {
2958   cpp_context *context;
2959   macro_args *args;
2960
2961   pfile->cur_context++;
2962   if (pfile->cur_context == pfile->context_cap)
2963       expand_context_stack (pfile);
2964
2965   context = CURRENT_CONTEXT (pfile);
2966   args = context[-1].args;
2967
2968   context->count = token->val.aux ? args->ends[token->val.aux - 1]: 0;
2969   context->u.arg = args->tokens + context->count;
2970   context->count = args->ends[token->val.aux] - context->count;
2971   context->args = 0;
2972   context->posn = 0;
2973   context->level = args->level;
2974   context->flags = CONTEXT_ARG | CONTEXT_RAW;
2975   context->pushed_token = 0;
2976
2977   /* Set the flags of the first token.  There is one.  */
2978   {
2979     const cpp_token *first = context->u.arg[0];
2980     if (!first)
2981       first = context->u.arg[1];
2982
2983     MODIFY_FLAGS_AND_POS ((cpp_token *) first, token,
2984                           token->flags & (PREV_WHITE | BOL));
2985   }
2986
2987   if (token->flags & PASTE_LEFT)
2988     context->flags |= CONTEXT_PASTEL;
2989   if (pfile->paste_level)
2990     context->flags |= CONTEXT_PASTER;
2991 }
2992
2993 /* "Unget" a token.  It is effectively inserted in the token queue and
2994    will be returned by the next call to get_raw_token.  */
2995 void
2996 _cpp_push_token (pfile, token)
2997      cpp_reader *pfile;
2998      const cpp_token *token;
2999 {
3000   cpp_context *context = CURRENT_CONTEXT (pfile);
3001   if (context->pushed_token)
3002     cpp_ice (pfile, "two tokens pushed in a row");
3003   if (token->type != CPP_EOF)
3004     context->pushed_token = token;
3005   /* Don't push back a directive's CPP_EOF, step back instead.  */
3006   else if (pfile->cur_context == 0)
3007     pfile->contexts[0].posn--;
3008 }
3009
3010 /* Handle a preprocessing directive.  TOKEN is the CPP_HASH token
3011    introducing the directive.  */
3012 static void
3013 process_directive (pfile, token)
3014      cpp_reader *pfile;
3015      const cpp_token *token;
3016 {
3017   const struct directive *d = pfile->token_list.directive;
3018   int prev_nme = 0;
3019
3020   /* Skip over the directive name.  */
3021   if (token[1].type == CPP_NAME)
3022     _cpp_get_raw_token (pfile);
3023   else if (token[1].type != CPP_NUMBER)
3024     cpp_ice (pfile, "directive begins with %s?!",
3025              token_names[token[1].type]);
3026
3027   /* Flush pending tokens at this point, in case the directive produces
3028      output.  XXX Directive output won't be visible to a direct caller of
3029      cpp_get_token.  */
3030   if (pfile->printer && CPP_WRITTEN (pfile) - pfile->printer->written)
3031     cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
3032
3033   if (! (d->flags & EXPAND))
3034     prev_nme = prevent_macro_expansion (pfile);
3035   (void) (*d->handler) (pfile);
3036   if (! (d->flags & EXPAND))
3037     restore_macro_expansion (pfile, prev_nme);
3038   _cpp_skip_rest_of_line (pfile);
3039 }
3040
3041 /* The external interface to return the next token.  All macro
3042    expansion and directive processing is handled internally, the
3043    caller only ever sees the output after preprocessing.  */
3044 const cpp_token *
3045 cpp_get_token (pfile)
3046      cpp_reader *pfile;
3047 {
3048   const cpp_token *token;
3049   /* Loop till we hit a non-directive, non-placemarker token.  */
3050   for (;;)
3051     {
3052       token = _cpp_get_token (pfile);
3053
3054       if (token->type == CPP_PLACEMARKER)
3055         continue;
3056
3057       if (token->type == CPP_HASH && token->flags & BOL
3058           && pfile->token_list.directive)
3059         {
3060           process_directive (pfile, token);
3061           continue;
3062         }
3063
3064       return token;
3065     }
3066 }
3067
3068 /* The internal interface to return the next token.  There are two
3069    differences between the internal and external interfaces: the
3070    internal interface may return a PLACEMARKER token, and it does not
3071    process directives.  */
3072 const cpp_token *
3073 _cpp_get_token (pfile)
3074      cpp_reader *pfile;
3075 {
3076   const cpp_token *token;
3077   cpp_hashnode *node;
3078
3079   /* Loop until we hit a non-macro token.  */
3080   for (;;)
3081     {
3082       token = get_raw_token (pfile);
3083
3084       /* Short circuit EOF. */
3085       if (token->type == CPP_EOF)
3086         return token;
3087
3088       /* If we are skipping... */
3089       if (pfile->skipping)
3090         {
3091           /* we still have to process directives,  */
3092           if (pfile->token_list.directive)
3093             return token;
3094
3095           /* but everything else is ignored.  */
3096           _cpp_skip_rest_of_line (pfile);
3097           continue;
3098         }
3099
3100       /* If there's a potential control macro and we get here, then that
3101          #ifndef didn't cover the entire file and its argument shouldn't
3102          be taken as a control macro.  */
3103       pfile->potential_control_macro = 0;
3104
3105       /* See if there's a token to paste with this one.  */
3106       if (!pfile->paste_level)
3107         token = maybe_paste_with_next (pfile, token);
3108
3109       /* If it isn't a macro, return it now.  */
3110       if (token->type != CPP_NAME
3111           || token->val.node->type == T_VOID)
3112         return token;
3113
3114       /* Is macro expansion disabled in general?  */
3115       if (pfile->no_expand_level == pfile->cur_context || pfile->paste_level)
3116         return token;
3117
3118       node = token->val.node;
3119       if (node->type != T_MACRO)
3120         return special_symbol (pfile, node, token);
3121
3122       if (is_macro_disabled (pfile, node->value.expansion, token))
3123         return token;
3124
3125       if (pfile->cur_context > CPP_STACK_MAX)
3126         {
3127           cpp_error (pfile, "macros nested too deep invoking '%s'", node->name);
3128           return token;
3129         }
3130
3131       if (push_macro_context (pfile, token))
3132         return token;
3133       /* else loop */
3134     }
3135 }
3136
3137 /* Returns the next raw token, i.e. without performing macro
3138    expansion.  Argument contexts are automatically entered.  */
3139 static const cpp_token *
3140 get_raw_token (pfile)
3141      cpp_reader *pfile;
3142 {
3143   const cpp_token *result;
3144   cpp_context *context;
3145
3146   for (;;)
3147     {
3148       context = CURRENT_CONTEXT (pfile);
3149       if (context->pushed_token)
3150         {
3151           result = context->pushed_token;
3152           context->pushed_token = 0;
3153         }
3154       else if (context->posn == context->count)
3155         {
3156           if (pop_context (pfile))
3157             return &eof_token;
3158           continue;
3159         }
3160       else
3161         {
3162           if (IS_ARG_CONTEXT (context))
3163             {
3164               result = context->u.arg[context->posn++];
3165               if (result == 0)
3166                 {
3167                   context->flags ^= CONTEXT_RAW;
3168                   result = context->u.arg[context->posn++];
3169                 }
3170               return result;    /* Cannot be a CPP_MACRO_ARG */
3171             }
3172           result = &context->u.list->tokens[context->posn++];
3173         }
3174
3175       if (result->type != CPP_MACRO_ARG)
3176         return result;
3177
3178       if (result->flags & STRINGIFY_ARG)
3179         return stringify_arg (pfile, result);
3180
3181       push_arg_context (pfile, result);
3182     }
3183 }
3184
3185 /* Internal interface to get the token without macro expanding.  */
3186 const cpp_token *
3187 _cpp_get_raw_token (pfile)
3188      cpp_reader *pfile;
3189 {
3190   int prev_nme = prevent_macro_expansion (pfile);
3191   const cpp_token *result = _cpp_get_token (pfile);
3192   restore_macro_expansion (pfile, prev_nme);
3193   return result;
3194 }
3195
3196 /* A thin wrapper to lex_line.  CLEAR is non-zero if the current token
3197    list should be overwritten, or zero if we need to append
3198    (typically, if we are within the arguments to a macro, or looking
3199    for the '(' to start a function-like macro invocation).  */
3200 static int
3201 lex_next (pfile, clear)
3202      cpp_reader *pfile;
3203      int clear;
3204 {
3205   cpp_toklist *list = &pfile->token_list;
3206   const cpp_token *old_list = list->tokens;
3207   unsigned int old_used = list->tokens_used;
3208
3209   if (clear)
3210     {
3211       /* Release all temporary tokens.  */
3212       _cpp_clear_toklist (list);
3213       pfile->contexts[0].posn = 0;
3214       if (pfile->temp_used)
3215         release_temp_tokens (pfile);
3216     }
3217
3218   lex_line (pfile, list);
3219   pfile->contexts[0].count = list->tokens_used;
3220
3221   if (!clear && pfile->args)
3222     {
3223       /* Fix up argument token pointers.  */
3224       if (old_list != list->tokens)
3225         {
3226           unsigned int i;
3227
3228           for (i = 0; i < pfile->args->used; i++)
3229             {
3230               const cpp_token *token = pfile->args->tokens[i];
3231               if (token >= old_list && token < old_list + old_used)
3232                 pfile->args->tokens[i] = (const cpp_token *)
3233                 ((char *) token + ((char *) list->tokens - (char *) old_list));
3234             }
3235         }
3236
3237       /* 6.10.3 paragraph 11: If there are sequences of preprocessing
3238          tokens within the list of arguments that would otherwise act as
3239          preprocessing directives, the behavior is undefined.
3240
3241          This implementation will report a hard error and treat the
3242          'sequence of preprocessing tokens' as part of the macro argument,
3243          not a directive.
3244
3245          Note if pfile->args == 0, we're OK since we're only inside a
3246          macro argument after a '('.  */
3247       if (list->directive)
3248         {
3249           cpp_error_with_line (pfile, list->tokens[old_used].line,
3250                                list->tokens[old_used].col,
3251                                "#%s may not be used inside a macro argument",
3252                                list->directive->name);
3253           return 1;
3254         }
3255     }
3256
3257   return 0;
3258 }
3259
3260 /* Pops a context off the context stack.  If we're at the bottom, lexes
3261    the next logical line.  Returns EOF if we're at the end of the
3262    argument list to the # operator, or if it is illegal to "overflow"
3263    into the rest of the file (e.g. 6.10.3.1.1).  */
3264 static int
3265 pop_context (pfile)
3266      cpp_reader *pfile;
3267 {
3268   cpp_context *context;
3269
3270   if (pfile->cur_context == 0)
3271     {
3272       /* If we are currently processing a directive, do not advance.  6.10
3273          paragraph 2: A new-line character ends the directive even if it
3274          occurs within what would otherwise be an invocation of a
3275          function-like macro.  */
3276       if (pfile->token_list.directive)
3277         return 1;
3278
3279       return lex_next (pfile, pfile->no_expand_level == UINT_MAX);
3280     }
3281
3282   /* Argument contexts, when parsing args or handling # operator
3283      return CPP_EOF at the end.  */
3284   context = CURRENT_CONTEXT (pfile);
3285   if (IS_ARG_CONTEXT (context) && pfile->cur_context == pfile->no_expand_level)
3286     return 1;
3287
3288   /* Free resources when leaving macro contexts.  */
3289   if (context->args)
3290     free_macro_args (context->args);
3291
3292   if (pfile->cur_context == pfile->no_expand_level)
3293     pfile->no_expand_level--;
3294   pfile->cur_context--;
3295
3296   return 0;
3297 }
3298
3299 /* Turn off macro expansion at the current context level.  */
3300 static unsigned int
3301 prevent_macro_expansion (pfile)
3302      cpp_reader *pfile;
3303 {
3304   unsigned int prev_value = pfile->no_expand_level;
3305   pfile->no_expand_level = pfile->cur_context;
3306   return prev_value;
3307 }
3308
3309 /* Restore macro expansion to its previous state.  */
3310 static void
3311 restore_macro_expansion (pfile, prev_value)
3312      cpp_reader *pfile;
3313      unsigned int prev_value;
3314 {
3315   pfile->no_expand_level = prev_value;
3316 }
3317
3318 /* Used by cpperror.c to obtain the correct line and column to report
3319    in a diagnostic.  */
3320 unsigned int
3321 _cpp_get_line (pfile, pcol)
3322      cpp_reader *pfile;
3323      unsigned int *pcol;
3324 {
3325   unsigned int index;
3326   const cpp_token *cur_token;
3327
3328   if (pfile->in_lex_line)
3329     index = pfile->token_list.tokens_used;
3330   else
3331     index = pfile->contexts[0].posn;
3332
3333   cur_token = &pfile->token_list.tokens[index - 1];
3334   if (pcol)
3335     *pcol = cur_token->col;
3336   return cur_token->line;
3337 }
3338
3339 #define DSC(str) (const U_CHAR *)str, sizeof str - 1
3340 static const char * const monthnames[] =
3341 {
3342   "Jan", "Feb", "Mar", "Apr", "May", "Jun",
3343   "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
3344 };
3345
3346 /* Handle builtin macros like __FILE__.  */
3347 static const cpp_token *
3348 special_symbol (pfile, node, token)
3349      cpp_reader *pfile;
3350      cpp_hashnode *node;
3351      const cpp_token *token;
3352 {
3353   cpp_token *result;
3354   cpp_buffer *ip;
3355
3356   switch (node->type)
3357     {
3358     case T_FILE:
3359     case T_BASE_FILE:
3360       {
3361         const char *file;
3362
3363         ip = CPP_BUFFER (pfile);
3364         if (ip == 0)
3365           file = "";
3366         else
3367           {
3368             if (node->type == T_BASE_FILE)
3369               while (CPP_PREV_BUFFER (ip) != NULL)
3370                 ip = CPP_PREV_BUFFER (ip);
3371
3372             file = ip->nominal_fname;
3373           }
3374         result = make_string_token (get_temp_token (pfile), (U_CHAR *) file,
3375                                     strlen (file));
3376       }
3377       break;
3378
3379     case T_INCLUDE_LEVEL:
3380       /* pfile->include_depth counts the primary source as level 1,
3381          but historically __INCLUDE_DEPTH__ has called the primary
3382          source level 0.  */
3383       result = alloc_number_token (pfile, pfile->include_depth - 1);
3384       break;
3385
3386     case T_SPECLINE:
3387       /* If __LINE__ is embedded in a macro, it must expand to the
3388          line of the macro's invocation, not its definition.
3389          Otherwise things like assert() will not work properly.  */
3390       result = alloc_number_token (pfile, _cpp_get_line (pfile, NULL));
3391       break;
3392
3393     case T_STDC:
3394       {
3395         int stdc = 1;
3396
3397 #ifdef STDC_0_IN_SYSTEM_HEADERS
3398         if (CPP_IN_SYSTEM_HEADER (pfile)
3399             && pfile->spec_nodes->n__STRICT_ANSI__->type == T_VOID)
3400           stdc = 0;
3401 #endif
3402         result = alloc_number_token (pfile, stdc);
3403       }
3404       break;
3405
3406     case T_DATE:
3407     case T_TIME:
3408       if (pfile->date == 0)
3409         {
3410           /* Allocate __DATE__ and __TIME__ from permanent storage,
3411              and save them in pfile so we don't have to do this again.
3412              We don't generate these strings at init time because
3413              time() and localtime() are very slow on some systems.  */
3414           time_t tt = time (NULL);
3415           struct tm *tb = localtime (&tt);
3416
3417           pfile->date = make_string_token
3418             ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("Oct 11 1347"));
3419           pfile->time = make_string_token
3420             ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("12:34:56"));
3421
3422           sprintf ((char *) pfile->date->val.str.text, "%s %2d %4d",
3423                    monthnames[tb->tm_mon], tb->tm_mday, tb->tm_year + 1900);
3424           sprintf ((char *) pfile->time->val.str.text, "%02d:%02d:%02d",
3425                    tb->tm_hour, tb->tm_min, tb->tm_sec);
3426         }
3427       result = node->type == T_DATE ? pfile->date: pfile->time;
3428       break;
3429
3430     case T_POISON:
3431       cpp_error (pfile, "attempt to use poisoned \"%s\"", node->name);
3432       return token;
3433
3434     default:
3435       cpp_ice (pfile, "invalid special hash type");
3436       return token;
3437     }
3438
3439   ASSIGN_FLAGS_AND_POS (result, token);
3440   return result;
3441 }
3442 #undef DSC
3443
3444 /* Dump the original user's spelling of argument index ARG_NO to the
3445    macro whose expansion is LIST.  */
3446 static void
3447 dump_param_spelling (pfile, list, arg_no)
3448      cpp_reader *pfile;
3449      const cpp_toklist *list;
3450      unsigned int arg_no;
3451 {
3452   const U_CHAR *param = list->namebuf;
3453
3454   while (arg_no--)
3455     param += ustrlen (param) + 1;
3456   CPP_PUTS (pfile, param, ustrlen (param));
3457 }
3458
3459 /* Dump a token list to the output.  */
3460 void
3461 _cpp_dump_list (pfile, list, token, flush)
3462      cpp_reader *pfile;
3463      const cpp_toklist *list;
3464      const cpp_token *token;
3465      int flush;
3466 {
3467   const cpp_token *limit = list->tokens + list->tokens_used;
3468   const cpp_token *prev = 0;
3469
3470   /* Avoid the CPP_EOF.  */
3471   if (list->directive)
3472     limit--;
3473
3474   while (token < limit)
3475     {
3476       if (token->type == CPP_MACRO_ARG)
3477         {
3478           if (token->flags & PREV_WHITE)
3479             CPP_PUTC (pfile, ' ');
3480           if (token->flags & STRINGIFY_ARG)
3481             CPP_PUTC (pfile, '#');
3482           dump_param_spelling (pfile, list, token->val.aux);
3483         }
3484       else
3485         output_token (pfile, token, prev);
3486       if (token->flags & PASTE_LEFT)
3487         CPP_PUTS (pfile, " ##", 3);
3488       prev = token;
3489       token++;
3490     }
3491
3492   if (flush && pfile->printer)
3493     cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
3494 }
3495
3496 /* Allocate pfile->input_buffer, and initialize trigraph_map[]
3497    if it hasn't happened already.  */
3498
3499 void
3500 _cpp_init_input_buffer (pfile)
3501      cpp_reader *pfile;
3502 {
3503   cpp_context *base;
3504
3505   init_trigraph_map ();
3506   _cpp_init_toklist (&pfile->token_list, DUMMY_TOKEN);
3507   pfile->no_expand_level = UINT_MAX;
3508   pfile->context_cap = 20;
3509   pfile->cur_context = 0;
3510
3511   pfile->contexts = (cpp_context *)
3512     xmalloc (pfile->context_cap * sizeof (cpp_context));
3513
3514   /* Clear the base context.  */
3515   base = &pfile->contexts[0];
3516   base->u.list = &pfile->token_list;
3517   base->posn = 0;
3518   base->count = 0;
3519   base->args = 0;
3520   base->level = 0;
3521   base->flags = 0;
3522   base->pushed_token = 0;
3523 }
3524
3525 /* Moves to the end of the directive line, popping contexts as
3526    necessary.  */
3527 void
3528 _cpp_skip_rest_of_line (pfile)
3529      cpp_reader *pfile;
3530 {
3531   /* Discard all stacked contexts.  */
3532   int i;
3533   for (i = pfile->cur_context; i > 0; i--)
3534     if (pfile->contexts[i].args)
3535       free_macro_args (pfile->contexts[i].args);
3536
3537   if (pfile->no_expand_level <= pfile->cur_context)
3538     pfile->no_expand_level = 0;
3539   pfile->cur_context = 0;
3540
3541   /* Clear the base context, and clear the directive pointer so that
3542      get_raw_token will advance to the next line.  */
3543   pfile->contexts[0].count = 0;
3544   pfile->contexts[0].posn = 0;
3545   pfile->token_list.directive = 0;
3546 }
3547
3548 /* Directive handler wrapper used by the command line option
3549    processor.  */
3550 void
3551 _cpp_run_directive (pfile, dir, buf, count)
3552      cpp_reader *pfile;
3553      const struct directive *dir;
3554      const char *buf;
3555      size_t count;
3556 {
3557   if (cpp_push_buffer (pfile, (const U_CHAR *)buf, count) != NULL)
3558     {
3559       unsigned int prev_lvl = 0;
3560
3561       /* Scan the line now, else prevent_macro_expansion won't work.  */
3562       lex_next (pfile, 1);
3563       if (! (dir->flags & EXPAND))
3564         prev_lvl = prevent_macro_expansion (pfile);
3565
3566       (void) (*dir->handler) (pfile);
3567
3568       if (! (dir->flags & EXPAND))
3569         restore_macro_expansion (pfile, prev_lvl);
3570
3571       _cpp_skip_rest_of_line (pfile);
3572       cpp_pop_buffer (pfile);
3573     }
3574 }