gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 /*
  24
  25 Cleanups to do:-
  26
  27 o -dM and with _cpp_dump_list: too many \n output.
  28 o Put a printer object in cpp_reader?
  29 o Check line numbers assigned to all errors.
  30 o Replace strncmp with memcmp almost everywhere.
  31 o lex_line's use of cur_token, flags and list->token_used is a bit opaque.
  32 o Convert do_ functions to return void.  Kaveh thinks its OK; and said he'll
  33   give it a run when we've got some code.
  34 o Distinguish integers, floats, and 'other' pp-numbers.
  35 o Store ints and char constants as binary values.
  36 o New command-line assertion syntax.
  37 o Work towards functions in cpperror.c taking a message level parameter.
  38   If we do this, merge the common code of do_warning and do_error.
  39 o Comment all functions, and describe macro expansion algorithm.
  40 o Move as much out of header files as possible.
  41 o Remove single quote pairs `', and some '', from diagnostics.
  42 o Correct pastability test for CPP_NAME and CPP_NUMBER.
  43
  44 */
  45
  46 #include "config.h"
  47 #include "system.h"
  48 #include "intl.h"
  49 #include "cpplib.h"
  50 #include "cpphash.h"
  51 #include "symcat.h"
  52
  53 #define auto_expand_name_space(list) \
  54     _cpp_expand_name_space ((list), 1 + (list)->name_cap / 2)
  55 static void safe_fwrite         PARAMS ((cpp_reader *, const U_CHAR *,
  56                                          size_t, FILE *));
  57 static void dump_param_spelling PARAMS ((cpp_reader *, const cpp_toklist *,
  58                                          unsigned int));
  59 static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
  60                                          unsigned int));
  61
  62 static void process_directive   PARAMS ((cpp_reader *, const cpp_token *));
  63 static unsigned char *trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
  64                                                 unsigned char *));
  65 static const unsigned char *backslash_start PARAMS ((cpp_reader *,
  66                                                      const unsigned char *));
  67 static int skip_block_comment PARAMS ((cpp_reader *));
  68 static int skip_line_comment PARAMS ((cpp_reader *));
  69 static void skip_whitespace PARAMS ((cpp_reader *, int));
  70 static const U_CHAR *parse_name PARAMS ((cpp_reader *, cpp_token *,
  71                                    const U_CHAR *, const U_CHAR *));
  72 static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_string *));
  73 static void parse_string PARAMS ((cpp_reader *, cpp_toklist *, cpp_token *,
  74                                   unsigned int));
  75 static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
  76 static void save_comment PARAMS ((cpp_toklist *, cpp_token *,
  77                                   const unsigned char *,
  78                                   unsigned int, unsigned int));
  79 static void lex_line PARAMS ((cpp_reader *, cpp_toklist *));
  80 static int lex_next PARAMS ((cpp_reader *, int));
  81 static int is_macro_disabled PARAMS ((cpp_reader *, const cpp_toklist *,
  82                                       const cpp_token *));
  83
  84 static cpp_token *stringify_arg PARAMS ((cpp_reader *, const cpp_token *));
  85 static void expand_context_stack PARAMS ((cpp_reader *));
  86 static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
  87                                             unsigned char *));
  88 static void output_token PARAMS ((cpp_reader *, const cpp_token *,
  89                                   const cpp_token *));
  90 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
  91                                           cpp_token *));
  92 static cpp_token *make_string_token PARAMS ((cpp_token *, const U_CHAR *,
  93                                             unsigned int));
  94 static cpp_token *alloc_number_token PARAMS ((cpp_reader *, int number));
  95 static const cpp_token *special_symbol PARAMS ((cpp_reader *, cpp_hashnode *,
  96                                                 const cpp_token *));
  97 static cpp_token *duplicate_token PARAMS ((cpp_reader *, const cpp_token *));
  98 static const cpp_token *maybe_paste_with_next PARAMS ((cpp_reader *,
  99                                                        const cpp_token *));
 100 static enum cpp_ttype can_paste PARAMS ((cpp_reader *, const cpp_token *,
 101                                          const cpp_token *, int *));
 102 static unsigned int prevent_macro_expansion     PARAMS ((cpp_reader *));
 103 static void restore_macro_expansion     PARAMS ((cpp_reader *, unsigned int));
 104 static cpp_token *get_temp_token        PARAMS ((cpp_reader *));
 105 static void release_temp_tokens         PARAMS ((cpp_reader *));
 106 static U_CHAR * quote_string PARAMS ((U_CHAR *, const U_CHAR *, unsigned int));
 107 static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
 108
 109 #define INIT_TOKEN_STR(list, token) \
 110   do {(token)->val.str.len = 0; \
 111       (token)->val.str.text = (list)->namebuf + (list)->name_used; \
 112   } while (0)
 113
 114 #define VALID_SIGN(c, prevc) \
 115   (((c) == '+' || (c) == '-') && \
 116    ((prevc) == 'e' || (prevc) == 'E' \
 117     || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
 118
 119 /* Handle LF, CR, CR-LF and LF-CR style newlines.  Assumes next
 120    character, if any, is in buffer.  */
 121
 122 #define handle_newline(cur, limit, c) \
 123  do { \
 124   if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
 125     (cur)++; \
 126   pfile->buffer->lineno++; \
 127   pfile->buffer->line_base = (cur); \
 128   pfile->col_adjust = 0; \
 129  } while (0)
 130
 131 #define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITE))
 132 #define PREV_TOKEN_TYPE (cur_token[-1].type)
 133
 134 #define PUSH_TOKEN(ttype) cur_token++->type = ttype
 135 #define REVISE_TOKEN(ttype) cur_token[-1].type = ttype
 136 #define BACKUP_TOKEN(ttype) (--cur_token)->type = ttype
 137 #define BACKUP_DIGRAPH(ttype) do { \
 138   BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
 139
 140 /* An upper bound on the number of bytes needed to spell a token,
 141    including preceding whitespace.  */
 142 #define TOKEN_SPELL(token) token_spellings[(token)->type].type
 143 #define TOKEN_LEN(token) (5 + (TOKEN_SPELL(token) == SPELL_STRING       \
 144                                ? (token)->val.str.len                   \
 145                                : (TOKEN_SPELL(token) == SPELL_IDENT     \
 146                                   ? (token)->val.node->length           \
 147                                   : 0)))
 148
 149 #define T(e, s) {SPELL_OPERATOR, (const U_CHAR *) s},
 150 #define I(e, s) {SPELL_IDENT, s},
 151 #define S(e, s) {SPELL_STRING, s},
 152 #define C(e, s) {SPELL_CHAR, s},
 153 #define N(e, s) {SPELL_NONE, s},
 154
 155 const struct token_spelling
 156 token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
 157
 158 #undef T
 159 #undef I
 160 #undef S
 161 #undef C
 162 #undef N
 163
 164 /* For debugging: the internal names of the tokens.  */
 165 #define T(e, s) U STRINGX(e),
 166 #define I(e, s) U STRINGX(e),
 167 #define S(e, s) U STRINGX(e),
 168 #define C(e, s) U STRINGX(e),
 169 #define N(e, s) U STRINGX(e),
 170
 171 const U_CHAR *const token_names[N_TTYPES] = { TTYPE_TABLE };
 172
 173 #undef T
 174 #undef I
 175 #undef S
 176 #undef C
 177 #undef N
 178
 179 /* The following table is used by trigraph_ok/trigraph_replace.  If we
 180    have designated initializers, it can be constant data; otherwise,
 181    it is set up at runtime by _cpp_init_input_buffer.  */
 182
 183 #if (GCC_VERSION >= 2007)
 184 #define init_trigraph_map()  /* nothing */
 185 #define TRIGRAPH_MAP \
 186 __extension__ static const U_CHAR trigraph_map[UCHAR_MAX + 1] = {
 187 #define END };
 188 #define s(p, v) [p] = v,
 189 #else
 190 #define TRIGRAPH_MAP static U_CHAR trigraph_map[UCHAR_MAX + 1] = { 0 }; \
 191  static void init_trigraph_map PARAMS ((void)) { \
 192  unsigned char *x = trigraph_map;
 193 #define END }
 194 #define s(p, v) x[p] = v;
 195 #endif
 196
 197 TRIGRAPH_MAP
 198   s('=', '#')   s(')', ']')     s('!', '|')
 199   s('(', '[')   s('\'', '^')    s('>', '}')
 200   s('/', '\\')  s('<', '{')     s('-', '~')
 201 END
 202
 203 #undef TRIGRAPH_MAP
 204 #undef END
 205 #undef s
 206
 207 /* Re-allocates PFILE->token_buffer so it will hold at least N more chars.  */
 208
 209 void
 210 _cpp_grow_token_buffer (pfile, n)
 211      cpp_reader *pfile;
 212      long n;
 213 {
 214   long old_written = CPP_WRITTEN (pfile);
 215   pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
 216   pfile->token_buffer = (U_CHAR *)
 217     xrealloc(pfile->token_buffer, pfile->token_buffer_size);
 218   CPP_SET_WRITTEN (pfile, old_written);
 219 }
 220
 221 /* Deal with the annoying semantics of fwrite.  */
 222 static void
 223 safe_fwrite (pfile, buf, len, fp)
 224      cpp_reader *pfile;
 225      const U_CHAR *buf;
 226      size_t len;
 227      FILE *fp;
 228 {
 229   size_t count;
 230
 231   while (len)
 232     {
 233       count = fwrite (buf, 1, len, fp);
 234       if (count == 0)
 235         goto error;
 236       len -= count;
 237       buf += count;
 238     }
 239   return;
 240
 241  error:
 242   cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
 243 }
 244
 245 /* Notify the compiler proper that the current line number has jumped,
 246    or the current file name has changed.  */
 247
 248 static void
 249 output_line_command (pfile, print, line)
 250      cpp_reader *pfile;
 251      cpp_printer *print;
 252      unsigned int line;
 253 {
 254   cpp_buffer *ip = CPP_BUFFER (pfile);
 255   enum { same = 0, enter, leave, rname } change;
 256   static const char * const codes[] = { "", " 1", " 2", "" };
 257
 258   if (line == 0)
 259     return;
 260
 261   /* End the previous line of text.  */
 262   if (pfile->need_newline)
 263     putc ('\n', print->outf);
 264   pfile->need_newline = 0;
 265
 266   if (CPP_OPTION (pfile, no_line_commands))
 267     return;
 268
 269   /* If ip is null, we've been called from cpp_finish, and they just
 270      needed the final flush and trailing newline.  */
 271   if (!ip)
 272     return;
 273
 274   if (pfile->include_depth == print->last_id)
 275     {
 276       /* Determine whether the current filename has changed, and if so,
 277          how.  'nominal_fname' values are unique, so they can be compared
 278          by comparing pointers.  */
 279       if (ip->nominal_fname == print->last_fname)
 280         change = same;
 281       else
 282         change = rname;
 283     }
 284   else
 285     {
 286       if (pfile->include_depth > print->last_id)
 287         change = enter;
 288       else
 289         change = leave;
 290       print->last_id = pfile->include_depth;
 291     }
 292   print->last_fname = ip->nominal_fname;
 293
 294   /* If the current file has not changed, we can output a few newlines
 295      instead if we want to increase the line number by a small amount.
 296      We cannot do this if print->lineno is zero, because that means we
 297      haven't output any line commands yet.  (The very first line
 298      command output is a `same_file' command.)  */
 299   if (change == same && print->lineno > 0
 300       && line >= print->lineno && line < print->lineno + 8)
 301     {
 302       while (line > print->lineno)
 303         {
 304           putc ('\n', print->outf);
 305           print->lineno++;
 306         }
 307       return;
 308     }
 309
 310 #ifndef NO_IMPLICIT_EXTERN_C
 311   if (CPP_OPTION (pfile, cplusplus))
 312     fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
 313              codes[change],
 314              ip->inc->sysp ? " 3" : "",
 315              (ip->inc->sysp == 2) ? " 4" : "");
 316   else
 317 #endif
 318     fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
 319              codes[change],
 320              ip->inc->sysp ? " 3" : "");
 321   print->lineno = line;
 322 }
 323
 324 /* Write the contents of the token_buffer to the output stream, and
 325    clear the token_buffer.  Also handles generating line commands and
 326    keeping track of file transitions.  */
 327
 328 void
 329 cpp_output_tokens (pfile, print, line)
 330      cpp_reader *pfile;
 331      cpp_printer *print;
 332      unsigned int line;
 333 {
 334   if (CPP_WRITTEN (pfile) - print->written)
 335     {
 336       safe_fwrite (pfile, pfile->token_buffer,
 337                    CPP_WRITTEN (pfile) - print->written, print->outf);
 338       pfile->need_newline = 1;
 339       if (print->lineno)
 340         print->lineno++;
 341
 342       CPP_SET_WRITTEN (pfile, print->written);
 343     }
 344   output_line_command (pfile, print, line);
 345 }
 346
 347 /* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output.  */
 348
 349 void
 350 cpp_scan_buffer_nooutput (pfile)
 351      cpp_reader *pfile;
 352 {
 353   unsigned int old_written = CPP_WRITTEN (pfile);
 354   cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
 355
 356   for (;;)
 357     {
 358       /* In no-output mode, we can ignore everything but directives.  */
 359       const cpp_token *token = cpp_get_token (pfile);
 360       if (token->type == CPP_EOF)
 361         {
 362           cpp_pop_buffer (pfile);
 363           if (CPP_BUFFER (pfile) == stop)
 364             break;
 365         }
 366       _cpp_skip_rest_of_line (pfile);
 367     }
 368   CPP_SET_WRITTEN (pfile, old_written);
 369 }
 370
 371 /* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT.  */
 372
 373 void
 374 cpp_scan_buffer (pfile, print)
 375      cpp_reader *pfile;
 376      cpp_printer *print;
 377 {
 378   cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
 379   const cpp_token *token, *prev = 0;
 380
 381   for (;;)
 382     {
 383       token = cpp_get_token (pfile);
 384       if (token->type == CPP_EOF)
 385         {
 386           cpp_pop_buffer (pfile);
 387           if (CPP_BUFFER (pfile) == stop)
 388             return;
 389           cpp_output_tokens (pfile, print, CPP_BUF_LINE (CPP_BUFFER (pfile)));
 390           prev = 0;
 391           continue;
 392         }
 393
 394       if (token->flags & BOL)
 395         {
 396           cpp_output_tokens (pfile, print, pfile->token_list.line);
 397           prev = 0;
 398         }
 399
 400       output_token (pfile, token, prev);
 401       prev = token;
 402     }
 403 }
 404
 405 /* Helper routine used by parse_include, which can't see spell_token.
 406    Reinterpret the current line as an h-char-sequence (< ... >); we are
 407    looking at the first token after the <.  */
 408 const cpp_token *
 409 _cpp_glue_header_name (pfile)
 410      cpp_reader *pfile;
 411 {
 412   unsigned int written = CPP_WRITTEN (pfile);
 413   const cpp_token *t;
 414   cpp_token *hdr;
 415   U_CHAR *buf;
 416   size_t len;
 417
 418   for (;;)
 419     {
 420       t = cpp_get_token (pfile);
 421       if (t->type == CPP_GREATER || t->type == CPP_EOF)
 422         break;
 423
 424       CPP_RESERVE (pfile, TOKEN_LEN (t));
 425       if (t->flags & PREV_WHITE)
 426         CPP_PUTC_Q (pfile, ' ');
 427       pfile->limit = spell_token (pfile, t, pfile->limit);
 428     }
 429
 430   if (t->type == CPP_EOF)
 431     cpp_error (pfile, "missing terminating > character");
 432
 433   len = CPP_WRITTEN (pfile) - written;
 434   buf = xmalloc (len);
 435   memcpy (buf, pfile->token_buffer + written, len);
 436   CPP_SET_WRITTEN (pfile, written);
 437
 438   hdr = get_temp_token (pfile);
 439   hdr->type = CPP_HEADER_NAME;
 440   hdr->flags = 0;
 441   hdr->val.str.text = buf;
 442   hdr->val.str.len = len;
 443   return hdr;
 444 }
 445
 446 /* Token-buffer helper functions.  */
 447
 448 /* Expand a token list's string space. It is *vital* that
 449    list->tokens_used is correct, to get pointer fix-up right.  */
 450 void
 451 _cpp_expand_name_space (list, len)
 452      cpp_toklist *list;
 453      unsigned int len;
 454 {
 455   const U_CHAR *old_namebuf;
 456
 457   old_namebuf = list->namebuf;
 458   list->name_cap += len;
 459   list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
 460
 461   /* Fix up token text pointers.  */
 462   if (list->namebuf != old_namebuf)
 463     {
 464       unsigned int i;
 465
 466       for (i = 0; i < list->tokens_used; i++)
 467         if (token_spellings[list->tokens[i].type].type == SPELL_STRING)
 468           list->tokens[i].val.str.text += (list->namebuf - old_namebuf);
 469     }
 470 }
 471
 472 /* If there is not enough room for LEN more characters, expand the
 473    list by just enough to have room for LEN characters.  */
 474 void
 475 _cpp_reserve_name_space (list, len)
 476      cpp_toklist *list;
 477      unsigned int len;
 478 {
 479   unsigned int room = list->name_cap - list->name_used;
 480
 481   if (room < len)
 482     _cpp_expand_name_space (list, len - room);
 483 }
 484
 485 /* Expand the number of tokens in a list.  */
 486 void
 487 _cpp_expand_token_space (list, count)
 488      cpp_toklist *list;
 489      unsigned int count;
 490 {
 491   unsigned int n;
 492
 493   list->tokens_cap += count;
 494   n = list->tokens_cap;
 495   if (list->flags & LIST_OFFSET)
 496     list->tokens--, n++;
 497   list->tokens = (cpp_token *)
 498     xrealloc (list->tokens, n * sizeof (cpp_token));
 499   if (list->flags & LIST_OFFSET)
 500     list->tokens++;             /* Skip the dummy.  */
 501 }
 502
 503 /* Initialize a token list.  If flags is DUMMY_TOKEN, we allocate
 504    an extra token in front of the token list, as this allows the lexer
 505    to always peek at the previous token without worrying about
 506    underflowing the list, and some initial space.  Otherwise, no
 507    token- or name-space is allocated, and there is no dummy token.  */
 508 void
 509 _cpp_init_toklist (list, flags)
 510      cpp_toklist *list;
 511      int flags;
 512 {
 513   if (flags == NO_DUMMY_TOKEN)
 514     {
 515       list->tokens_cap = 0;
 516       list->tokens = 0;
 517       list->name_cap = 0;
 518       list->namebuf = 0;
 519       list->flags = 0;
 520     }
 521   else
 522     {
 523       /* Initialize token space.  Put a dummy token before the start
 524          that will fail matches.  */
 525       list->tokens_cap = 256;   /* 4K's worth.  */
 526       list->tokens = (cpp_token *)
 527         xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
 528       list->tokens[0].type = CPP_EOF;
 529       list->tokens++;
 530
 531       /* Initialize name space.  */
 532       list->name_cap = 1024;
 533       list->namebuf = (unsigned char *) xmalloc (list->name_cap);
 534       list->flags = LIST_OFFSET;
 535     }
 536
 537   _cpp_clear_toklist (list);
 538 }
 539
 540 /* Clear a token list.  */
 541 void
 542 _cpp_clear_toklist (list)
 543      cpp_toklist *list;
 544 {
 545   list->tokens_used = 0;
 546   list->name_used = 0;
 547   list->directive = 0;
 548   list->paramc = 0;
 549   list->params_len = 0;
 550   list->flags &= LIST_OFFSET;  /* clear all but that one */
 551 }
 552
 553 /* Free a token list.  Does not free the list itself, which may be
 554    embedded in a larger structure.  */
 555 void
 556 _cpp_free_toklist (list)
 557      const cpp_toklist *list;
 558 {
 559   if (list->flags & LIST_OFFSET)
 560     free (list->tokens - 1);    /* Backup over dummy token.  */
 561   else
 562     free (list->tokens);
 563   free (list->namebuf);
 564 }
 565
 566 /* Compare two tokens.  */
 567 int
 568 _cpp_equiv_tokens (a, b)
 569      const cpp_token *a, *b;
 570 {
 571   if (a->type == b->type && a->flags == b->flags)
 572     switch (token_spellings[a->type].type)
 573       {
 574       default:                  /* Keep compiler happy.  */
 575       case SPELL_OPERATOR:
 576         return 1;
 577       case SPELL_CHAR:
 578       case SPELL_NONE:
 579         return a->val.aux == b->val.aux; /* arg_no or character.  */
 580       case SPELL_IDENT:
 581         return a->val.node == b->val.node;
 582       case SPELL_STRING:
 583         return (a->val.str.len == b->val.str.len
 584                 && !memcmp (a->val.str.text, b->val.str.text,
 585                             a->val.str.len));
 586       }
 587
 588   return 0;
 589 }
 590
 591 /* Compare two token lists.  */
 592 int
 593 _cpp_equiv_toklists (a, b)
 594      const cpp_toklist *a, *b;
 595 {
 596   unsigned int i;
 597
 598   if (a->tokens_used != b->tokens_used
 599       || a->flags != b->flags
 600       || a->paramc != b->paramc)
 601     return 0;
 602
 603   for (i = 0; i < a->tokens_used; i++)
 604     if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
 605       return 0;
 606   return 1;
 607 }
 608
 609 /* Utility routine:
 610
 611    Compares, the token TOKEN to the NUL-terminated string STRING.
 612    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
 613
 614 int
 615 cpp_ideq (token, string)
 616      const cpp_token *token;
 617      const char *string;
 618 {
 619   if (token->type != CPP_NAME)
 620     return 0;
 621
 622   return !ustrcmp (token->val.node->name, (const U_CHAR *)string);
 623 }
 624
 625 /* Lexing algorithm.
 626
 627  The original lexer in cpplib was made up of two passes: a first pass
 628  that replaced trigraphs and deleted esacped newlines, and a second
 629  pass that tokenized the result of the first pass.  Tokenisation was
 630  performed by peeking at the next character in the input stream.  For
 631  example, if the input stream contained "!=", the handler for the !
 632  character would peek at the next character, and if it were a '='
 633  would skip over it, and return a "!=" token, otherwise it would
 634  return just the "!" token.
 635
 636  To implement a single-pass lexer, this peeking ahead is unworkable.
 637  An arbitrary number of escaped newlines, and trigraphs (in particular
 638  ??/ which translates to the escape \), could separate the '!' and '='
 639  in the input stream, yet the next token is still a "!=".
 640
 641  Suppose instead that we lex by one logical line at a time, producing
 642  a token list or stack for each logical line, and when seeing the '!'
 643  push a CPP_NOT token on the list.  Then if the '!' is part of a
 644  longer token ("!=") we know we must see the remainder of the token by
 645  the time we reach the end of the logical line.  Thus we can have the
 646  '=' handler look at the previous token (at the end of the list / top
 647  of the stack) and see if it is a "!" token, and if so, instead of
 648  pushing a "=" token revise the existing token to be a "!=" token.
 649
 650  This works in the presence of escaped newlines, because the '\' would
 651  have been pushed on the top of the stack as a CPP_BACKSLASH.  The
 652  newline ('\n' or '\r') handler looks at the token at the top of the
 653  stack to see if it is a CPP_BACKSLASH, and if so discards both.
 654  Otherwise it pushes the newline (CPP_VSPACE) token as normal.  Hence
 655  the '=' handler would never see any intervening escaped newlines.
 656
 657  To make trigraphs work in this context, as in precedence trigraphs
 658  are highest and converted before anything else, the '?' handler does
 659  lookahead to see if it is a trigraph, and if so skips the trigraph
 660  and pushes the token it represents onto the top of the stack.  This
 661  also works in the particular case of a CPP_BACKSLASH trigraph.
 662
 663  To the preprocessor, whitespace is only significant to the point of
 664  knowing whether whitespace precedes a particular token.  For example,
 665  the '=' handler needs to know whether there was whitespace between it
 666  and a "!" token on the top of the stack, to make the token conversion
 667  decision correctly.  So each token has a PREV_WHITE flag to
 668  indicate this - the standard permits consecutive whitespace to be
 669  regarded as a single space.  The compiler front ends are not
 670  interested in whitespace at all; they just require a token stream.
 671  Another place where whitespace is significant to the preprocessor is
 672  a #define statment - if there is whitespace between the macro name
 673  and an initial "(" token the macro is "object-like", otherwise it is
 674  a function-like macro that takes arguments.
 675
 676  However, all is not rosy.  Parsing of identifiers, numbers, comments
 677  and strings becomes trickier because of the possibility of raw
 678  trigraphs and escaped newlines in the input stream.
 679
 680  The trigraphs are three consecutive characters beginning with two
 681  question marks.  A question mark is not valid as part of a number or
 682  identifier, so parsing of a number or identifier terminates normally
 683  upon reaching it, returning to the mainloop which handles the
 684  trigraph just like it would in any other position.  Similarly for the
 685  backslash of a backslash-newline combination.  So we just need the
 686  escaped-newline dropper in the mainloop to check if the token on the
 687  top of the stack after dropping the escaped newline is a number or
 688  identifier, and if so to continue the processing it as if nothing had
 689  happened.
 690
 691  For strings, we replace trigraphs whenever we reach a quote or
 692  newline, because there might be a backslash trigraph escaping them.
 693  We need to be careful that we start trigraph replacing from where we
 694  left off previously, because it is possible for a first scan to leave
 695  "fake" trigraphs that a second scan would pick up as real (e.g. the
 696  sequence "????/\n=" would find a fake ??= trigraph after removing the
 697  escaped newline.)
 698
 699  For line comments, on reaching a newline we scan the previous
 700  character(s) to see if it escaped, and continue if it is.  Block
 701  comments ignore everything and just focus on finding the comment
 702  termination mark.  The only difficult thing, and it is surprisingly
 703  tricky, is checking if an asterisk precedes the final slash since
 704  they could be separated by escaped newlines.  If the preprocessor is
 705  invoked with the output comments option, we don't bother removing
 706  escaped newlines and replacing trigraphs for output.
 707
 708  Finally, numbers can begin with a period, which is pushed initially
 709  as a CPP_DOT token in its own right.  The digit handler checks if the
 710  previous token was a CPP_DOT not separated by whitespace, and if so
 711  pops it off the stack and pushes a period into the number's buffer
 712  before calling the number parser.
 713
 714 */
 715
 716 static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
 717                                                     U":>", U"<%", U"%>"};
 718
 719 /* Call when a trigraph is encountered.  It warns if necessary, and
 720    returns true if the trigraph should be honoured.  END is the third
 721    character of a trigraph in the input stream.  */
 722 static int
 723 trigraph_ok (pfile, end)
 724      cpp_reader *pfile;
 725      const unsigned char *end;
 726 {
 727   int accept = CPP_OPTION (pfile, trigraphs);
 728
 729   if (CPP_OPTION (pfile, warn_trigraphs))
 730     {
 731       unsigned int col = end - 1 - pfile->buffer->line_base;
 732       if (accept)
 733         cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
 734                                "trigraph ??%c converted to %c",
 735                                (int) *end, (int) trigraph_map[*end]);
 736       else
 737         cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
 738                                "trigraph ??%c ignored", (int) *end);
 739     }
 740   return accept;
 741 }
 742
 743 /* Scan a string for trigraphs, warning or replacing them inline as
 744    appropriate.  When parsing a string, we must call this routine
 745    before processing a newline character (if trigraphs are enabled),
 746    since the newline might be escaped by a preceding backslash
 747    trigraph sequence.  Returns a pointer to the end of the name after
 748    replacement.  */
 749
 750 static unsigned char *
 751 trigraph_replace (pfile, src, limit)
 752      cpp_reader *pfile;
 753      unsigned char *src;
 754      unsigned char *limit;
 755 {
 756   unsigned char *dest;
 757
 758   /* Starting with src[1], find two consecutive '?'.  The case of no
 759      trigraphs is streamlined.  */
 760
 761   for (src++; src + 1 < limit; src += 2)
 762     {
 763       if (src[0] != '?')
 764         continue;
 765
 766       /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s.  */
 767       if (src[-1] == '?')
 768         src--;
 769       else if (src + 2 == limit || src[1] != '?')
 770         continue;
 771
 772       /* Check if it really is a trigraph.  */
 773       if (trigraph_map[src[2]] == 0)
 774         continue;
 775
 776       dest = src;
 777       goto trigraph_found;
 778     }
 779   return limit;
 780
 781   /* Now we have a trigraph, we need to scan the remaining buffer, and
 782      copy-shifting its contents left if replacement is enabled.  */
 783   for (; src + 2 < limit; dest++, src++)
 784     if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
 785       {
 786       trigraph_found:
 787         src += 2;
 788         if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
 789           *dest = trigraph_map[*src];
 790       }
 791
 792   /* Copy remaining (at most 2) characters.  */
 793   while (src < limit)
 794     *dest++ = *src++;
 795   return dest;
 796 }
 797
 798 /* If CUR is a backslash or the end of a trigraphed backslash, return
 799    a pointer to its beginning, otherwise NULL.  We don't read beyond
 800    the buffer start, because there is the start of the comment in the
 801    buffer.  */
 802 static const unsigned char *
 803 backslash_start (pfile, cur)
 804      cpp_reader *pfile;
 805      const unsigned char *cur;
 806 {
 807   if (cur[0] == '\\')
 808     return cur;
 809   if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
 810       && trigraph_ok (pfile, cur))
 811     return cur - 2;
 812   return 0;
 813 }
 814
 815 /* Skip a C-style block comment.  This is probably the trickiest
 816    handler.  We find the end of the comment by seeing if an asterisk
 817    is before every '/' we encounter.  The nasty complication is that a
 818    previous asterisk may be separated by one or more escaped newlines.
 819    Returns non-zero if comment terminated by EOF, zero otherwise.  */
 820 static int
 821 skip_block_comment (pfile)
 822      cpp_reader *pfile;
 823 {
 824   cpp_buffer *buffer = pfile->buffer;
 825   const unsigned char *char_after_star = 0;
 826   register const unsigned char *cur = buffer->cur;
 827   int seen_eof = 0;
 828
 829   /* Inner loop would think the comment has ended if the first comment
 830      character is a '/'.  Avoid this and keep the inner loop clean by
 831      skipping such a character.  */
 832   if (cur < buffer->rlimit && cur[0] == '/')
 833     cur++;
 834
 835   for (; cur < buffer->rlimit; )
 836     {
 837       unsigned char c = *cur++;
 838
 839       /* People like decorating comments with '*', so check for
 840          '/' instead for efficiency.  */
 841       if (c == '/')
 842         {
 843           if (cur[-2] == '*' || cur - 1 == char_after_star)
 844             goto out;
 845
 846           /* Warn about potential nested comments, but not when
 847              the final character inside the comment is a '/'.
 848              Don't bother to get it right across escaped newlines.  */
 849           if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
 850               && cur[0] == '*' && cur[1] != '/')
 851             {
 852               buffer->cur = cur;
 853               cpp_warning (pfile, "'/*' within comment");
 854             }
 855         }
 856       else if (is_vspace (c))
 857         {
 858           const unsigned char* bslash = backslash_start (pfile, cur - 2);
 859
 860           handle_newline (cur, buffer->rlimit, c);
 861           /* Work correctly if there is an asterisk before an
 862              arbirtrarily long sequence of escaped newlines.  */
 863           if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
 864             char_after_star = cur;
 865           else
 866             char_after_star = 0;
 867         }
 868     }
 869   seen_eof = 1;
 870
 871  out:
 872   buffer->cur = cur;
 873   return seen_eof;
 874 }
 875
 876 /* Skip a C++ or Chill line comment.  Handles escaped newlines.
 877    Returns non-zero if a multiline comment.  */
 878 static int
 879 skip_line_comment (pfile)
 880      cpp_reader *pfile;
 881 {
 882   cpp_buffer *buffer = pfile->buffer;
 883   register const unsigned char *cur = buffer->cur;
 884   int multiline = 0;
 885
 886   for (; cur < buffer->rlimit; )
 887     {
 888       unsigned char c = *cur++;
 889
 890       if (is_vspace (c))
 891         {
 892           /* Check for a (trigaph?) backslash escaping the newline.  */
 893           if (!backslash_start (pfile, cur - 2))
 894             goto out;
 895           multiline = 1;
 896           handle_newline (cur, buffer->rlimit, c);
 897         }
 898     }
 899   cur++;
 900
 901  out:
 902   buffer->cur = cur - 1;        /* Leave newline for caller.  */
 903   return multiline;
 904 }
 905
 906 /* Skips whitespace, stopping at next non-whitespace character.
 907    Adjusts pfile->col_adjust to account for tabs.  This enables tokens
 908    to be assigned the correct column.  */
 909 static void
 910 skip_whitespace (pfile, in_directive)
 911      cpp_reader *pfile;
 912      int in_directive;
 913 {
 914   cpp_buffer *buffer = pfile->buffer;
 915   unsigned short warned = 0;
 916
 917   /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
 918   while (buffer->cur < buffer->rlimit)
 919     {
 920       unsigned char c = *buffer->cur;
 921
 922       if (!is_nvspace (c))
 923         break;
 924
 925       buffer->cur++;
 926       /* Horizontal space always OK.  */
 927       if (c == ' ')
 928         continue;
 929       else if (c == '\t')
 930         pfile->col_adjust += CPP_OPTION (pfile, tabstop) - 1
 931           - (CPP_BUF_COL (buffer) - 1) % CPP_OPTION(pfile, tabstop);
 932       /* Must be \f \v or \0.  */
 933       else if (c == '\0')
 934         {
 935           if (!warned)
 936             cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
 937                                    CPP_BUF_COL (buffer),
 938                                    "embedded null character ignored");
 939           warned = 1;
 940         }
 941       else if (in_directive && CPP_PEDANTIC (pfile))
 942         cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
 943                                CPP_BUF_COL (buffer),
 944                                "%s in preprocessing directive",
 945                                c == '\f' ? "form feed" : "vertical tab");
 946     }
 947 }
 948
 949 /* Parse (append) an identifier.  */
 950 static inline const U_CHAR *
 951 parse_name (pfile, tok, cur, rlimit)
 952      cpp_reader *pfile;
 953      cpp_token *tok;
 954      const U_CHAR *cur, *rlimit;
 955 {
 956   const U_CHAR *name = cur;
 957   unsigned int len;
 958
 959   while (cur < rlimit)
 960     {
 961       if (! is_idchar (*cur))
 962         break;
 963       /* $ is not a legal identifier character in the standard, but is
 964          commonly accepted as an extension.  Don't warn about it in
 965          skipped conditional blocks. */
 966       if (*cur == '$' && CPP_PEDANTIC (pfile) && ! pfile->skipping)
 967         {
 968           CPP_BUFFER (pfile)->cur = cur;
 969           cpp_pedwarn (pfile, "'$' character in identifier");
 970         }
 971       cur++;
 972     }
 973   len = cur - name;
 974
 975   if (tok->val.node)
 976     {
 977       unsigned int oldlen = tok->val.node->length;
 978       U_CHAR *newname = alloca (oldlen + len);
 979       memcpy (newname, tok->val.node->name, oldlen);
 980       memcpy (newname + oldlen, name, len);
 981       len += oldlen;
 982       name = newname;
 983     }
 984
 985   tok->val.node = cpp_lookup (pfile, name, len);
 986   return cur;
 987 }
 988
 989 /* Parse (append) a number.  */
 990 static void
 991 parse_number (pfile, list, name)
 992      cpp_reader *pfile;
 993      cpp_toklist *list;
 994      cpp_string *name;
 995 {
 996   const unsigned char *name_limit;
 997   unsigned char *namebuf;
 998   cpp_buffer *buffer = pfile->buffer;
 999   register const unsigned char *cur = buffer->cur;
1000
1001  expanded:
1002   name_limit = list->namebuf + list->name_cap;
1003   namebuf = list->namebuf + list->name_used;
1004
1005   for (; cur < buffer->rlimit && namebuf < name_limit; )
1006     {
1007       unsigned char c = *namebuf = *cur; /* Copy a single char.  */
1008
1009       /* Perhaps we should accept '$' here if we accept it for
1010          identifiers.  We know namebuf[-1] is safe, because for c to
1011          be a sign we must have pushed at least one character.  */
1012       if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
1013         goto out;
1014
1015       namebuf++;
1016       cur++;
1017     }
1018
1019   /* Run out of name space?  */
1020   if (cur < buffer->rlimit)
1021     {
1022       list->name_used = namebuf - list->namebuf;
1023       auto_expand_name_space (list);
1024       goto expanded;
1025     }
1026
1027  out:
1028   buffer->cur = cur;
1029   name->len = namebuf - name->text;
1030   list->name_used = namebuf - list->namebuf;
1031 }
1032
1033 /* Places a string terminated by an unescaped TERMINATOR into a
1034    cpp_string, which should be expandable and thus at the top of the
1035    list's stack.  Handles embedded trigraphs, if necessary, and
1036    escaped newlines.
1037
1038    Can be used for character constants (terminator = '\''), string
1039    constants ('"') and angled headers ('>').  Multi-line strings are
1040    allowed, except for within directives.  */
1041
1042 static void
1043 parse_string (pfile, list, token, terminator)
1044      cpp_reader *pfile;
1045      cpp_toklist *list;
1046      cpp_token *token;
1047      unsigned int terminator;
1048 {
1049   cpp_buffer *buffer = pfile->buffer;
1050   cpp_string *name = &token->val.str;
1051   register const unsigned char *cur = buffer->cur;
1052   const unsigned char *name_limit;
1053   unsigned char *namebuf;
1054   unsigned int null_count = 0;
1055   unsigned int trigraphed = list->name_used;
1056
1057  expanded:
1058   name_limit = list->namebuf + list->name_cap;
1059   namebuf = list->namebuf + list->name_used;
1060
1061   for (; cur < buffer->rlimit && namebuf < name_limit; )
1062     {
1063       unsigned int c = *namebuf++ = *cur++; /* Copy a single char.  */
1064
1065       if (c == '\0')
1066         null_count++;
1067       else if (c == terminator || is_vspace (c))
1068         {
1069           /* Needed for trigraph_replace and multiline string warning.  */
1070           buffer->cur = cur;
1071
1072           /* Scan for trigraphs before checking if backslash-escaped.  */
1073           if ((CPP_OPTION (pfile, trigraphs)
1074                || CPP_OPTION (pfile, warn_trigraphs))
1075               && namebuf - (list->namebuf + trigraphed) >= 3)
1076             {
1077               namebuf = trigraph_replace (pfile, list->namebuf + trigraphed,
1078                                           namebuf);
1079               /* The test above guarantees trigraphed will be positive.  */
1080               trigraphed = namebuf - list->namebuf - 2;
1081             }
1082
1083           namebuf--;     /* Drop the newline / terminator from the name.  */
1084           if (is_vspace (c))
1085             {
1086               /* Drop a backslash newline, and continue. */
1087               if (namebuf[-1] == '\\')
1088                 {
1089                   handle_newline (cur, buffer->rlimit, c);
1090                   namebuf--;
1091                   continue;
1092                 }
1093
1094               cur--;
1095
1096               /* In Fortran and assembly language, silently terminate
1097                  strings of either variety at end of line.  This is a
1098                  kludge around not knowing where comments are in these
1099                  languages.  */
1100               if (CPP_OPTION (pfile, lang_fortran)
1101                   || CPP_OPTION (pfile, lang_asm))
1102                 goto out;
1103
1104               /* Character constants, headers and asserts may not
1105                  extend over multiple lines.  In Standard C, neither
1106                  may strings.  We accept multiline strings as an
1107                  extension.  (Even in directives - otherwise, glibc's
1108                  longlong.h breaks.)  */
1109               if (terminator != '"')
1110                 goto unterminated;
1111
1112               cur++;  /* Move forwards again.  */
1113
1114               if (pfile->multiline_string_line == 0)
1115                 {
1116                   pfile->multiline_string_line = token->line;
1117                   pfile->multiline_string_column = token->col;
1118                   if (CPP_PEDANTIC (pfile))
1119                     cpp_pedwarn (pfile, "multi-line string constant");
1120                 }
1121
1122               *namebuf++ = '\n';
1123               handle_newline (cur, buffer->rlimit, c);
1124             }
1125           else
1126             {
1127               unsigned char *temp;
1128
1129               /* An odd number of consecutive backslashes represents
1130                  an escaped terminator.  */
1131               temp = namebuf - 1;
1132               while (temp >= name->text && *temp == '\\')
1133                 temp--;
1134
1135               if ((namebuf - temp) & 1)
1136                 goto out;
1137               namebuf++;
1138             }
1139         }
1140     }
1141
1142   /* Run out of name space?  */
1143   if (cur < buffer->rlimit)
1144     {
1145       list->name_used = namebuf - list->namebuf;
1146       auto_expand_name_space (list);
1147       goto expanded;
1148     }
1149
1150   /* We may not have trigraph-replaced the input for this code path,
1151      but as the input is in error by being unterminated we don't
1152      bother.  Prevent warnings about no newlines at EOF.  */
1153   if (is_vspace (cur[-1]))
1154     cur--;
1155
1156  unterminated:
1157   cpp_error (pfile, "missing terminating %c character", (int) terminator);
1158
1159   if (terminator == '\"' && pfile->multiline_string_line != list->line
1160       && pfile->multiline_string_line != 0)
1161     {
1162       cpp_error_with_line (pfile, pfile->multiline_string_line,
1163                            pfile->multiline_string_column,
1164                            "possible start of unterminated string literal");
1165       pfile->multiline_string_line = 0;
1166     }
1167
1168  out:
1169   buffer->cur = cur;
1170   name->len = namebuf - name->text;
1171   list->name_used = namebuf - list->namebuf;
1172
1173   if (null_count > 0)
1174     cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
1175                          : "null character preserved"));
1176 }
1177
1178 /* The character TYPE helps us distinguish comment types: '*' = C
1179    style, '-' = Chill-style and '/' = C++ style.  For code simplicity,
1180    the stored comment includes the comment start and any terminator.  */
1181
1182 #define COMMENT_START_LEN 2
1183 static void
1184 save_comment (list, token, from, len, type)
1185      cpp_toklist *list;
1186      cpp_token *token;
1187      const unsigned char *from;
1188      unsigned int len;
1189      unsigned int type;
1190 {
1191   unsigned char *buffer;
1192
1193   len += COMMENT_START_LEN;
1194
1195   if (list->name_used + len > list->name_cap)
1196     _cpp_expand_name_space (list, len);
1197
1198   INIT_TOKEN_STR (list, token);
1199   token->type = CPP_COMMENT;
1200   token->val.str.len = len;
1201
1202   buffer = list->namebuf + list->name_used;
1203   list->name_used += len;
1204
1205   /* Copy the comment.  */
1206   if (type == '*')
1207     {
1208       *buffer++ = '/';
1209       *buffer++ = '*';
1210     }
1211   else
1212     {
1213       *buffer++ = type;
1214       *buffer++ = type;
1215     }
1216   memcpy (buffer, from, len - COMMENT_START_LEN);
1217 }
1218
1219 /*
1220  *  The tokenizer's main loop.  Returns a token list, representing a
1221  *  logical line in the input file.  On EOF after some tokens have
1222  *  been processed, we return immediately.  Then in next call, or if
1223  *  EOF occurred at the beginning of a logical line, a single CPP_EOF
1224  *  token is placed in the list.
1225  *
1226  *  Implementation relies almost entirely on lookback, rather than
1227  *  looking forwards.  This means that tokenization requires just
1228  *  a single pass of the file, even in the presence of trigraphs and
1229  *  escaped newlines, providing significant performance benefits.
1230  *  Trigraph overhead is negligible if they are disabled, and low
1231  *  even when enabled.
1232  */
1233
1234 #define KNOWN_DIRECTIVE() (list->directive != 0)
1235 #define MIGHT_BE_DIRECTIVE() \
1236 (cur_token == &list->tokens[first_token + 1] && cur_token[-1].type == CPP_HASH)
1237
1238 static void
1239 lex_line (pfile, list)
1240      cpp_reader *pfile;
1241      cpp_toklist *list;
1242 {
1243   cpp_token *cur_token, *token_limit, *first;
1244   cpp_buffer *buffer = pfile->buffer;
1245   const unsigned char *cur = buffer->cur;
1246   unsigned char flags = 0;
1247   unsigned int first_token = list->tokens_used;
1248
1249   if (!(list->flags & LIST_OFFSET))
1250     (abort) ();
1251
1252   list->file = buffer->nominal_fname;
1253   list->line = CPP_BUF_LINE (buffer);
1254   pfile->col_adjust = 0;
1255   pfile->in_lex_line = 1;
1256   if (cur == buffer->buf)
1257     list->flags |= BEG_OF_FILE;
1258
1259  expanded:
1260   token_limit = list->tokens + list->tokens_cap;
1261   cur_token = list->tokens + list->tokens_used;
1262
1263   for (; cur < buffer->rlimit && cur_token < token_limit;)
1264     {
1265       unsigned char c;
1266
1267       /* Optimize non-vertical whitespace skipping; most tokens are
1268          probably separated by whitespace. (' ' '\t' '\v' '\f' '\0').  */
1269       c = *cur;
1270       if (is_nvspace (c))
1271         {
1272           buffer->cur = cur;
1273           skip_whitespace (pfile, (list->tokens[first_token].type == CPP_HASH
1274                                    && cur_token > &list->tokens[first_token]));
1275           cur = buffer->cur;
1276
1277           flags = PREV_WHITE;
1278           if (cur == buffer->rlimit)
1279             break;
1280           c = *cur;
1281         }
1282       cur++;
1283
1284       /* Initialize current token.  CPP_EOF will not be fixed up by
1285          expand_name_space.  */
1286       list->tokens_used = cur_token - list->tokens + 1;
1287       cur_token->type = CPP_EOF;
1288       cur_token->col = CPP_BUF_COLUMN (buffer, cur);
1289       cur_token->line = CPP_BUF_LINE (buffer);
1290       cur_token->flags = flags;
1291       flags = 0;
1292
1293       switch (c)
1294         {
1295         case '0': case '1': case '2': case '3': case '4':
1296         case '5': case '6': case '7': case '8': case '9':
1297           {
1298             int prev_dot;
1299
1300             cur--;              /* Backup character.  */
1301             prev_dot = PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ();
1302             if (prev_dot)
1303               cur_token--;
1304             INIT_TOKEN_STR (list, cur_token);
1305             /* Prepend an immediately previous CPP_DOT token.  */
1306             if (prev_dot)
1307               {
1308                 if (list->name_cap == list->name_used)
1309                   auto_expand_name_space (list);
1310
1311                 cur_token->val.str.len = 1;
1312                 list->namebuf[list->name_used++] = '.';
1313               }
1314
1315           continue_number:
1316             cur_token->type = CPP_NUMBER; /* Before parse_number.  */
1317             buffer->cur = cur;
1318             parse_number (pfile, list, &cur_token->val.str);
1319             cur = buffer->cur;
1320           }
1321           /* Check for # 123 form of #line.  */
1322           if (MIGHT_BE_DIRECTIVE ())
1323             list->directive = _cpp_check_linemarker (pfile, cur_token,
1324                                                      !(cur_token[-1].flags
1325                                                        & PREV_WHITE));
1326           cur_token++;
1327           break;
1328
1329         letter:
1330         case '_':
1331         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1332         case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1333         case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1334         case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1335         case 'y': case 'z':
1336         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1337         case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1338         case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1339         case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1340         case 'Y': case 'Z':
1341           cur--;                     /* Backup character.  */
1342           cur_token->val.node = 0;
1343           cur_token->type = CPP_NAME; /* Identifier, macro etc.  */
1344
1345         continue_name:
1346           cur = parse_name (pfile, cur_token, cur, buffer->rlimit);
1347
1348           if (MIGHT_BE_DIRECTIVE ())
1349             list->directive = _cpp_check_directive (pfile, cur_token,
1350                                                     !(list->tokens[0].flags
1351                                                       & PREV_WHITE));
1352           cur_token++;
1353           break;
1354
1355         case '\'':
1356           /* Character constants are not recognized when processing Fortran,
1357              or if -traditional.  */
1358           if (CPP_OPTION (pfile, lang_fortran) || CPP_TRADITIONAL (pfile))
1359             goto other;
1360
1361           /* Fall through.  */
1362         case '\"':
1363           /* Traditionally, escaped strings are not strings.  */
1364           if (CPP_TRADITIONAL (pfile) && IMMED_TOKEN ()
1365               && PREV_TOKEN_TYPE == CPP_BACKSLASH)
1366             goto other;
1367
1368           cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
1369           /* Do we have a wide string?  */
1370           if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
1371               && cur_token[-1].val.node == pfile->spec_nodes->n_L
1372               && !CPP_TRADITIONAL (pfile))
1373             {
1374               (--cur_token)->type = (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
1375             }
1376
1377         do_parse_string:
1378           /* Here c is one of ' " or >.  */
1379           INIT_TOKEN_STR (list, cur_token);
1380           buffer->cur = cur;
1381           parse_string (pfile, list, cur_token, c);
1382           cur = buffer->cur;
1383           cur_token++;
1384           break;
1385
1386         case '/':
1387           cur_token->type = CPP_DIV;
1388           if (IMMED_TOKEN ())
1389             {
1390               if (PREV_TOKEN_TYPE == CPP_DIV)
1391                 {
1392                   /* We silently allow C++ comments in system headers,
1393                      irrespective of conformance mode, because lots of
1394                      broken systems do that and trying to clean it up
1395                      in fixincludes is a nightmare.  */
1396                   if (CPP_IN_SYSTEM_HEADER (pfile))
1397                     goto do_line_comment;
1398                   else if (CPP_OPTION (pfile, cplusplus_comments))
1399                     {
1400                       if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1401                           && ! buffer->warned_cplusplus_comments)
1402                         {
1403                           buffer->cur = cur;
1404                           cpp_pedwarn (pfile,
1405                              "C++ style comments are not allowed in ISO C89");
1406                           cpp_pedwarn (pfile,
1407                           "(this will be reported only once per input file)");
1408                           buffer->warned_cplusplus_comments = 1;
1409                         }
1410                     do_line_comment:
1411                       buffer->cur = cur;
1412 #if 0 /* Leave until new lexer in place.  */
1413                       if (cur[-2] != c)
1414                         cpp_warning (pfile,
1415                                      "comment start split across lines");
1416 #endif
1417                       if (skip_line_comment (pfile))
1418                         cpp_warning (pfile, "multi-line comment");
1419
1420                       /* Back-up to first '-' or '/'.  */
1421                       cur_token--;
1422                       if (!CPP_OPTION (pfile, discard_comments)
1423                           && (!KNOWN_DIRECTIVE()
1424                               || (list->directive->flags & COMMENTS)))
1425                         save_comment (list, cur_token++, cur,
1426                                       buffer->cur - cur, c);
1427                       else if (!CPP_OPTION (pfile, traditional))
1428                         flags = PREV_WHITE;
1429
1430                       cur = buffer->cur;
1431                       break;
1432                     }
1433                 }
1434             }
1435           cur_token++;
1436           break;
1437
1438         case '*':
1439           cur_token->type = CPP_MULT;
1440           if (IMMED_TOKEN ())
1441             {
1442               if (PREV_TOKEN_TYPE == CPP_DIV)
1443                 {
1444                   buffer->cur = cur;
1445 #if 0 /* Leave until new lexer in place.  */
1446                   if (cur[-2] != '/')
1447                     cpp_warning (pfile,
1448                                  "comment start '/*' split across lines");
1449 #endif
1450                   if (skip_block_comment (pfile))
1451                     cpp_error_with_line (pfile, list->line, cur_token[-1].col,
1452                                          "unterminated comment");
1453 #if 0 /* Leave until new lexer in place.  */
1454                   else if (buffer->cur[-2] != '*')
1455                     cpp_warning (pfile,
1456                                  "comment end '*/' split across lines");
1457 #endif
1458                   /* Back up to opening '/'.  */
1459                   cur_token--;
1460                   if (!CPP_OPTION (pfile, discard_comments)
1461                       && (!KNOWN_DIRECTIVE()
1462                           || (list->directive->flags & COMMENTS)))
1463                     save_comment (list, cur_token++, cur,
1464                                   buffer->cur - cur, c);
1465                   else if (!CPP_OPTION (pfile, traditional))
1466                     flags = PREV_WHITE;
1467
1468                   cur = buffer->cur;
1469                   break;
1470                 }
1471               else if (CPP_OPTION (pfile, cplusplus))
1472                 {
1473                   /* In C++, there are .* and ->* operators.  */
1474                   if (PREV_TOKEN_TYPE == CPP_DEREF)
1475                     BACKUP_TOKEN (CPP_DEREF_STAR);
1476                   else if (PREV_TOKEN_TYPE == CPP_DOT)
1477                     BACKUP_TOKEN (CPP_DOT_STAR);
1478                 }
1479             }
1480           cur_token++;
1481           break;
1482
1483         case '\n':
1484         case '\r':
1485           handle_newline (cur, buffer->rlimit, c);
1486           if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
1487             {
1488               if (IMMED_TOKEN ())
1489                 {
1490                   /* Remove the escaped newline.  Then continue to process
1491                      any interrupted name or number.  */
1492                   cur_token--;
1493                   /* Backslash-newline may not be immediately followed by
1494                      EOF (C99 5.1.1.2).  */
1495                   if (cur >= buffer->rlimit)
1496                     {
1497                       cpp_pedwarn (pfile, "backslash-newline at end of file");
1498                       break;
1499                     }
1500                   if (IMMED_TOKEN ())
1501                     {
1502                       cur_token--;
1503                       if (cur_token->type == CPP_NAME)
1504                         goto continue_name;
1505                       else if (cur_token->type == CPP_NUMBER)
1506                         goto continue_number;
1507                       cur_token++;
1508                     }
1509                   /* Remember whitespace setting.  */
1510                   flags = cur_token->flags;
1511                   break;
1512                 }
1513               else
1514                 {
1515                   buffer->cur = cur;
1516                   cpp_warning (pfile,
1517                                "backslash and newline separated by space");
1518                 }
1519             }
1520           else if (MIGHT_BE_DIRECTIVE ())
1521             {
1522               /* "Null directive." C99 6.10.7: A preprocessing
1523                  directive of the form # <new-line> has no effect.
1524
1525                  But it is still a directive, and therefore disappears
1526                  from the output. */
1527               cur_token--;
1528               if (cur_token->flags & PREV_WHITE)
1529                 {
1530                   if (CPP_WTRADITIONAL (pfile))
1531                     cpp_warning (pfile,
1532                                  "K+R C ignores #\\n with the # indented");
1533                   if (CPP_TRADITIONAL (pfile))
1534                     cur_token++;
1535                 }
1536             }
1537
1538           /* Skip vertical space until we have at least one token to
1539              return.  */
1540           if (cur_token != &list->tokens[first_token])
1541             goto out;
1542           list->line = CPP_BUF_LINE (buffer);
1543           break;
1544
1545         case '-':
1546           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
1547             {
1548               if (CPP_OPTION (pfile, chill))
1549                 goto do_line_comment;
1550               REVISE_TOKEN (CPP_MINUS_MINUS);
1551             }
1552           else
1553             PUSH_TOKEN (CPP_MINUS);
1554           break;
1555
1556         make_hash:
1557         case '#':
1558           /* The digraph flag checking ensures that ## and %:%:
1559              are interpreted as CPP_PASTE, but #%: and %:# are not.  */
1560           if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
1561               && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
1562             REVISE_TOKEN (CPP_PASTE);
1563           else
1564             PUSH_TOKEN (CPP_HASH);
1565           break;
1566
1567         case ':':
1568           cur_token->type = CPP_COLON;
1569           if (IMMED_TOKEN ())
1570             {
1571               if (PREV_TOKEN_TYPE == CPP_COLON
1572                   && CPP_OPTION (pfile, cplusplus))
1573                 BACKUP_TOKEN (CPP_SCOPE);
1574               else if (CPP_OPTION (pfile, digraphs))
1575                 {
1576                   /* Digraph: "<:" is a '['  */
1577                   if (PREV_TOKEN_TYPE == CPP_LESS)
1578                     BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
1579                   /* Digraph: "%:" is a '#'  */
1580                   else if (PREV_TOKEN_TYPE == CPP_MOD)
1581                     {
1582                       (--cur_token)->flags |= DIGRAPH;
1583                       goto make_hash;
1584                     }
1585                 }
1586             }
1587           cur_token++;
1588           break;
1589
1590         case '&':
1591           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
1592             REVISE_TOKEN (CPP_AND_AND);
1593           else
1594             PUSH_TOKEN (CPP_AND);
1595           break;
1596
1597         make_or:
1598         case '|':
1599           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
1600             REVISE_TOKEN (CPP_OR_OR);
1601           else
1602             PUSH_TOKEN (CPP_OR);
1603           break;
1604
1605         case '+':
1606           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
1607             REVISE_TOKEN (CPP_PLUS_PLUS);
1608           else
1609             PUSH_TOKEN (CPP_PLUS);
1610           break;
1611
1612         case '=':
1613             /* This relies on equidistance of "?=" and "?" tokens.  */
1614           if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
1615             REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
1616           else
1617             PUSH_TOKEN (CPP_EQ);
1618           break;
1619
1620         case '>':
1621           cur_token->type = CPP_GREATER;
1622           if (IMMED_TOKEN ())
1623             {
1624               if (PREV_TOKEN_TYPE == CPP_GREATER)
1625                 BACKUP_TOKEN (CPP_RSHIFT);
1626               else if (PREV_TOKEN_TYPE == CPP_MINUS)
1627                 BACKUP_TOKEN (CPP_DEREF);
1628               else if (CPP_OPTION (pfile, digraphs))
1629                 {
1630                   /* Digraph: ":>" is a ']'  */
1631                   if (PREV_TOKEN_TYPE == CPP_COLON)
1632                     BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
1633                   /* Digraph: "%>" is a '}'  */
1634                   else if (PREV_TOKEN_TYPE == CPP_MOD)
1635                     BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
1636                 }
1637             }
1638           cur_token++;
1639           break;
1640
1641         case '<':
1642           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
1643             {
1644               REVISE_TOKEN (CPP_LSHIFT);
1645               break;
1646             }
1647           /* Is this the beginning of a header name?  */
1648           if (KNOWN_DIRECTIVE () && (list->directive->flags & INCL))
1649             {
1650               c = '>';  /* Terminator.  */
1651               cur_token->type = CPP_HEADER_NAME;
1652               goto do_parse_string;
1653             }
1654           PUSH_TOKEN (CPP_LESS);
1655           break;
1656
1657         case '%':
1658           /* Digraph: "<%" is a '{'  */
1659           cur_token->type = CPP_MOD;
1660           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS
1661               && CPP_OPTION (pfile, digraphs))
1662             BACKUP_DIGRAPH (CPP_OPEN_BRACE);
1663           cur_token++;
1664           break;
1665
1666         case '?':
1667           if (cur + 1 < buffer->rlimit && *cur == '?'
1668               && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
1669             {
1670               /* Handle trigraph.  */
1671               cur++;
1672               switch (*cur++)
1673                 {
1674                 case '(': goto make_open_square;
1675                 case ')': goto make_close_square;
1676                 case '<': goto make_open_brace;
1677                 case '>': goto make_close_brace;
1678                 case '=': goto make_hash;
1679                 case '!': goto make_or;
1680                 case '-': goto make_complement;
1681                 case '/': goto make_backslash;
1682                 case '\'': goto make_xor;
1683                 }
1684             }
1685           if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
1686             {
1687               /* GNU C++ defines <? and >? operators.  */
1688               if (PREV_TOKEN_TYPE == CPP_LESS)
1689                 {
1690                   REVISE_TOKEN (CPP_MIN);
1691                   break;
1692                 }
1693               else if (PREV_TOKEN_TYPE == CPP_GREATER)
1694                 {
1695                   REVISE_TOKEN (CPP_MAX);
1696                   break;
1697                 }
1698             }
1699           PUSH_TOKEN (CPP_QUERY);
1700           break;
1701
1702         case '.':
1703           if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
1704               && IMMED_TOKEN ()
1705               && !(cur_token[-1].flags & PREV_WHITE))
1706             {
1707               cur_token -= 2;
1708               PUSH_TOKEN (CPP_ELLIPSIS);
1709             }
1710           else
1711             PUSH_TOKEN (CPP_DOT);
1712           break;
1713
1714         make_complement:
1715         case '~': PUSH_TOKEN (CPP_COMPL); break;
1716         make_xor:
1717         case '^': PUSH_TOKEN (CPP_XOR); break;
1718         make_open_brace:
1719         case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
1720         make_close_brace:
1721         case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
1722         make_open_square:
1723         case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
1724         make_close_square:
1725         case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
1726         make_backslash:
1727         case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
1728         case '!': PUSH_TOKEN (CPP_NOT); break;
1729         case ',': PUSH_TOKEN (CPP_COMMA); break;
1730         case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
1731         case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
1732         case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
1733
1734         case '$':
1735           if (CPP_OPTION (pfile, dollars_in_ident))
1736             goto letter;
1737           /* Fall through */
1738         other:
1739         default:
1740           cur_token->val.aux = c;
1741           PUSH_TOKEN (CPP_OTHER);
1742           break;
1743         }
1744     }
1745
1746   /* Run out of token space?  */
1747   if (cur_token == token_limit)
1748     {
1749       list->tokens_used = cur_token - list->tokens;
1750       _cpp_expand_token_space (list, 256);
1751       goto expanded;
1752     }
1753
1754   cur_token->flags = flags;
1755   if (cur_token == &list->tokens[first_token] && pfile->done_initializing)
1756     {
1757       if (cur > buffer->buf && !is_vspace (cur[-1]))
1758         cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
1759                                CPP_BUF_COLUMN (buffer, cur),
1760                                "no newline at end of file");
1761       cur_token++->type = CPP_EOF;
1762     }
1763
1764  out:
1765   /* All tokens are allocated, so the memory location is fixed.  */
1766   first = &list->tokens[first_token];
1767
1768   /* Don't complain about the null directive, nor directives in
1769      assembly source: we don't know where the comments are, and # may
1770      introduce assembler pseudo-ops.  Don't complain about invalid
1771      directives in skipped conditional groups (6.10 p4).  */
1772   if (first->type == CPP_HASH && list->directive == 0 && !pfile->skipping
1773       && cur_token > first + 1 && !CPP_OPTION (pfile, lang_asm))
1774     {
1775       if (first[1].type == CPP_NAME)
1776         cpp_error (pfile, "invalid preprocessing directive #%.*s",
1777                    (int) first[1].val.node->length, first[1].val.node->name);
1778       else
1779         cpp_error (pfile, "invalid preprocessing directive");
1780     }
1781
1782   /* Put EOF at end of known directives.  This covers "directives do
1783      not extend beyond the end of the line (description 6.10 part 2)".  */
1784   if (KNOWN_DIRECTIVE () || !pfile->done_initializing)
1785     {
1786       pfile->first_directive_token = first;
1787       cur_token++->type = CPP_EOF;
1788     }
1789
1790   /* Directives, known or not, always start a new line.  */
1791   if (first_token == 0 || list->tokens[first_token].type == CPP_HASH)
1792     first->flags |= BOL;
1793   else
1794     /* 6.10.3.10: Within the sequence of preprocessing tokens making
1795        up the invocation of a function-like macro, new line is
1796        considered a normal white-space character.  */
1797     first->flags |= PREV_WHITE;
1798
1799   buffer->cur = cur;
1800   list->tokens_used = cur_token - list->tokens;
1801   pfile->in_lex_line = 0;
1802 }
1803
1804 /* Write the spelling of a token TOKEN, with any appropriate
1805    whitespace before it, to the token_buffer.  PREV is the previous
1806    token, which is used to determine if we need to shove in an extra
1807    space in order to avoid accidental token paste.  */
1808 static void
1809 output_token (pfile, token, prev)
1810      cpp_reader *pfile;
1811      const cpp_token *token, *prev;
1812 {
1813   int dummy;
1814
1815   if (token->col && (token->flags & BOL))
1816     {
1817       /* Supply enough whitespace to put this token in its original
1818          column.  Don't bother trying to reconstruct tabs; we can't
1819          get it right in general, and nothing ought to care.  (Yes,
1820          some things do care; the fault lies with them.)  */
1821       unsigned char *buffer;
1822       unsigned int spaces = token->col - 1;
1823
1824       CPP_RESERVE (pfile, token->col);
1825       buffer = pfile->limit;
1826
1827       while (spaces--)
1828         *buffer++ = ' ';
1829       pfile->limit = buffer;
1830     }
1831   else if (token->flags & PREV_WHITE)
1832     CPP_PUTC (pfile, ' ');
1833   /* Check for and prevent accidental token pasting, in ANSI mode.  */
1834
1835   else if (!CPP_TRADITIONAL (pfile) && prev)
1836     {
1837       if (can_paste (pfile, prev, token, &dummy) != CPP_EOF)
1838         CPP_PUTC (pfile, ' ');
1839       /* can_paste catches most of the accidental paste cases, but not all.
1840          Consider a + ++b - if there is not a space between the + and ++, it
1841          will be misparsed as a++ + b.  */
1842       else if ((prev->type == CPP_PLUS && token->type == CPP_PLUS_PLUS)
1843                || (prev->type == CPP_MINUS && token->type == CPP_MINUS_MINUS))
1844         CPP_PUTC (pfile, ' ');
1845     }
1846
1847   CPP_RESERVE (pfile, TOKEN_LEN (token));
1848   pfile->limit = spell_token (pfile, token, pfile->limit);
1849 }
1850
1851 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1852    already contain the enough space to hold the token's spelling.
1853    Returns a pointer to the character after the last character
1854    written.  */
1855
1856 static unsigned char *
1857 spell_token (pfile, token, buffer)
1858      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1859      const cpp_token *token;
1860      unsigned char *buffer;
1861 {
1862   switch (token_spellings[token->type].type)
1863     {
1864     case SPELL_OPERATOR:
1865       {
1866         const unsigned char *spelling;
1867         unsigned char c;
1868
1869         if (token->flags & DIGRAPH)
1870           spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1871         else
1872           spelling = token_spellings[token->type].spelling;
1873
1874         while ((c = *spelling++) != '\0')
1875           *buffer++ = c;
1876       }
1877       break;
1878
1879     case SPELL_IDENT:
1880       memcpy (buffer, token->val.node->name, token->val.node->length);
1881       buffer += token->val.node->length;
1882       break;
1883
1884     case SPELL_STRING:
1885       {
1886         if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
1887           *buffer++ = 'L';
1888
1889         if (token->type == CPP_STRING || token->type == CPP_WSTRING)
1890           *buffer++ = '"';
1891         if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
1892           *buffer++ = '\'';
1893
1894         memcpy (buffer, token->val.str.text, token->val.str.len);
1895         buffer += token->val.str.len;
1896
1897         if (token->type == CPP_STRING || token->type == CPP_WSTRING)
1898           *buffer++ = '"';
1899         if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
1900           *buffer++ = '\'';
1901       }
1902       break;
1903
1904     case SPELL_CHAR:
1905       *buffer++ = token->val.aux;
1906       break;
1907
1908     case SPELL_NONE:
1909       cpp_ice (pfile, "Unspellable token %s", token_names[token->type]);
1910       break;
1911     }
1912
1913   return buffer;
1914 }
1915
1916 /* Return the spelling of a token known to be an operator.
1917    Does not distinguish digraphs from their counterparts.  */
1918 const unsigned char *
1919 _cpp_spell_operator (type)
1920      enum cpp_ttype type;
1921 {
1922   if (token_spellings[type].type == SPELL_OPERATOR)
1923     return token_spellings[type].spelling;
1924   else
1925     return token_names[type];
1926 }
1927
1928
1929 /* Macro expansion algorithm.  TODO.  */
1930
1931 static const cpp_token placemarker_token = {0, 0, CPP_PLACEMARKER, 0 UNION_INIT_ZERO};
1932 static const cpp_token eof_token = {0, 0, CPP_EOF, 0 UNION_INIT_ZERO};
1933
1934 #define IS_ARG_CONTEXT(c) ((c)->flags & CONTEXT_ARG)
1935 #define CURRENT_CONTEXT(pfile) ((pfile)->contexts + (pfile)->cur_context)
1936
1937 /* Flags for cpp_context.  */
1938 #define CONTEXT_PASTEL  (1 << 0) /* An argument context on LHS of ##.  */
1939 #define CONTEXT_PASTER  (1 << 1) /* An argument context on RHS of ##.  */
1940 #define CONTEXT_RAW     (1 << 2) /* If argument tokens already expanded.  */
1941 #define CONTEXT_ARG     (1 << 3) /* If an argument context.  */
1942
1943 #define ASSIGN_FLAGS_AND_POS(d, s) \
1944   do {(d)->flags = (s)->flags & (PREV_WHITE | BOL | PASTE_LEFT); \
1945       if ((d)->flags & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
1946   } while (0)
1947
1948 /* f is flags, just consisting of PREV_WHITE | BOL.  */
1949 #define MODIFY_FLAGS_AND_POS(d, s, f) \
1950   do {(d)->flags &= ~(PREV_WHITE | BOL); (d)->flags |= (f); \
1951       if ((f) & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
1952   } while (0)
1953
1954 typedef struct cpp_context cpp_context;
1955 struct cpp_context
1956 {
1957   union
1958   {
1959     const cpp_toklist *list;    /* Used for macro contexts only.  */
1960     const cpp_token **arg;      /* Used for arg contexts only.  */
1961   } u;
1962
1963   /* Pushed token to be returned by next call to cpp_get_token.  */
1964   const cpp_token *pushed_token;
1965
1966   struct macro_args *args;      /* 0 for arguments and object-like macros.  */
1967   unsigned short posn;          /* Current posn, index into u.  */
1968   unsigned short count;         /* No. of tokens in u.  */
1969   unsigned short level;
1970   unsigned char flags;
1971 };
1972
1973 typedef struct macro_args macro_args;
1974 struct macro_args
1975 {
1976   unsigned int *ends;
1977   const cpp_token **tokens;
1978   unsigned int capacity;
1979   unsigned int used;
1980   unsigned short level;
1981 };
1982
1983 static const cpp_token *get_raw_token PARAMS ((cpp_reader *));
1984 static const cpp_token *parse_arg PARAMS ((cpp_reader *, int, unsigned int,
1985                                            macro_args *, unsigned int *));
1986 static int parse_args PARAMS ((cpp_reader *, cpp_hashnode *, macro_args *));
1987 static void save_token PARAMS ((macro_args *, const cpp_token *));
1988 static const cpp_token *push_arg_context PARAMS ((cpp_reader *,
1989                                                   const cpp_token *));
1990 static int do_pop_context PARAMS ((cpp_reader *));
1991 static const cpp_token *pop_context PARAMS ((cpp_reader *));
1992 static const cpp_token *push_macro_context PARAMS ((cpp_reader *,
1993                                                     cpp_hashnode *,
1994                                                     const cpp_token *));
1995 static void free_macro_args PARAMS ((macro_args *));
1996
1997 /* Free the storage allocated for macro arguments.  */
1998 static void
1999 free_macro_args (args)
2000      macro_args *args;
2001 {
2002   if (args->tokens)
2003     free (args->tokens);
2004   free (args->ends);
2005   free (args);
2006 }
2007
2008 /* Determines if a macro has been already used (and is therefore
2009    disabled).  */
2010 static int
2011 is_macro_disabled (pfile, expansion, token)
2012      cpp_reader *pfile;
2013      const cpp_toklist *expansion;
2014      const cpp_token *token;
2015 {
2016   cpp_context *context = CURRENT_CONTEXT (pfile);
2017
2018   /* Don't expand anything if this file has already been preprocessed.  */
2019   if (CPP_OPTION (pfile, preprocessed))
2020     return 1;
2021
2022   /* Arguments on either side of ## are inserted in place without
2023      macro expansion (6.10.3.3.2).  Conceptually, any macro expansion
2024      occurs during a later rescan pass.  The effect is that we expand
2025      iff we would as part of the macro's expansion list, so we should
2026      drop to the macro's context.  */
2027   if (IS_ARG_CONTEXT (context))
2028     {
2029       if (token->flags & PASTED)
2030         context--;
2031       else if (!(context->flags & CONTEXT_RAW))
2032         return 1;
2033       else if (context->flags & (CONTEXT_PASTEL | CONTEXT_PASTER))
2034         context--;
2035     }
2036
2037   /* Have we already used this macro?  */
2038   while (context->level > 0)
2039     {
2040       if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2041         return 1;
2042       /* Raw argument tokens are judged based on the token list they
2043          came from.  */
2044       if (context->flags & CONTEXT_RAW)
2045         context = pfile->contexts + context->level;
2046       else
2047         context--;
2048     }
2049
2050   /* Function-like macros may be disabled if the '(' is not in the
2051      current context.  We check this without disrupting the context
2052      stack.  */
2053   if (expansion->paramc >= 0)
2054     {
2055       const cpp_token *next;
2056       unsigned int prev_nme;
2057
2058       context = CURRENT_CONTEXT (pfile);
2059       /* Drop down any contexts we're at the end of: the '(' may
2060          appear in lower macro expansions, or in the rest of the file.  */
2061       while (context->posn == context->count && context > pfile->contexts)
2062         {
2063           context--;
2064           /* If we matched, we are disabled, as we appear in the
2065              expansion of each macro we meet.  */
2066           if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2067             return 1;
2068         }
2069
2070       prev_nme = pfile->no_expand_level;
2071       pfile->no_expand_level = context - pfile->contexts;
2072       next = cpp_get_token (pfile);
2073       restore_macro_expansion (pfile, prev_nme);
2074       if (next->type != CPP_OPEN_PAREN)
2075         {
2076           _cpp_push_token (pfile, next);
2077           if (CPP_OPTION (pfile, warn_traditional))
2078             cpp_warning (pfile,
2079          "function macro %.*s must be used with arguments in traditional C",
2080                          (int) token->val.node->length, token->val.node->name);
2081           return 1;
2082         }
2083     }
2084
2085   return 0;
2086 }
2087
2088 /* Add a token to the set of tokens forming the arguments to the macro
2089    being parsed in parse_args.  */
2090 static void
2091 save_token (args, token)
2092      macro_args *args;
2093      const cpp_token *token;
2094 {
2095   if (args->used == args->capacity)
2096     {
2097       args->capacity += args->capacity + 100;
2098       args->tokens = (const cpp_token **)
2099         xrealloc (args->tokens, args->capacity * sizeof (const cpp_token *));
2100     }
2101   args->tokens[args->used++] = token;
2102 }
2103
2104 /* Take and save raw tokens until we finish one argument.  Empty
2105    arguments are saved as a single CPP_PLACEMARKER token.  */
2106 static const cpp_token *
2107 parse_arg (pfile, var_args, paren_context, args, pcount)
2108      cpp_reader *pfile;
2109      int var_args;
2110      unsigned int paren_context;
2111      macro_args *args;
2112      unsigned int *pcount;
2113 {
2114   const cpp_token *token;
2115   unsigned int paren = 0, count = 0;
2116   int raw, was_raw = 1;
2117
2118   for (count = 0;; count++)
2119     {
2120       token = cpp_get_token (pfile);
2121
2122       switch (token->type)
2123         {
2124         default:
2125           break;
2126
2127         case CPP_OPEN_PAREN:
2128           paren++;
2129           break;
2130
2131         case CPP_CLOSE_PAREN:
2132           if (paren-- != 0)
2133             break;
2134           goto out;
2135
2136         case CPP_COMMA:
2137           /* Commas are not terminators within parantheses or var_args.  */
2138           if (paren || var_args)
2139             break;
2140           goto out;
2141
2142         case CPP_EOF:           /* Error reported by caller.  */
2143           goto out;
2144         }
2145
2146       raw = pfile->cur_context <= paren_context;
2147       if (raw != was_raw)
2148         {
2149           was_raw = raw;
2150           save_token (args, 0);
2151           count++;
2152         }
2153       save_token (args, token);
2154     }
2155
2156  out:
2157   if (count == 0)
2158     {
2159       /* Duplicate the placemarker.  Then we can set its flags and
2160          position and safely be using more than one.  */
2161       save_token (args, duplicate_token (pfile, &placemarker_token));
2162       count++;
2163     }
2164
2165   *pcount = count;
2166   return token;
2167 }
2168
2169 /* This macro returns true if the argument starting at offset O of arglist
2170    A is empty - that is, it's either a single PLACEMARKER token, or a null
2171    pointer followed by a PLACEMARKER.  */
2172
2173 #define empty_argument(A, O) \
2174  ((A)->tokens[O] ? (A)->tokens[O]->type == CPP_PLACEMARKER \
2175                  : (A)->tokens[(O)+1]->type == CPP_PLACEMARKER)
2176
2177 /* Parse the arguments making up a macro invocation.  Nested arguments
2178    are automatically macro expanded, but immediate macros are not
2179    expanded; this enables e.g. operator # to work correctly.  Returns
2180    non-zero on error.  */
2181 static int
2182 parse_args (pfile, hp, args)
2183      cpp_reader *pfile;
2184      cpp_hashnode *hp;
2185      macro_args *args;
2186 {
2187   const cpp_token *token;
2188   const cpp_toklist *macro;
2189   unsigned int total = 0;
2190   unsigned int paren_context = pfile->cur_context;
2191   int argc = 0;
2192
2193   macro = hp->value.expansion;
2194   do
2195     {
2196       unsigned int count;
2197
2198       token = parse_arg (pfile, (argc + 1 == macro->paramc
2199                                  && (macro->flags & VAR_ARGS)),
2200                          paren_context, args, &count);
2201       if (argc < macro->paramc)
2202         {
2203           total += count;
2204           args->ends[argc] = total;
2205         }
2206       argc++;
2207     }
2208   while (token->type != CPP_CLOSE_PAREN && token->type != CPP_EOF);
2209
2210   if (token->type == CPP_EOF)
2211     {
2212       cpp_error (pfile, "unterminated invocation of macro \"%.*s\"",
2213                  hp->length, hp->name);
2214       return 1;
2215     }
2216   else if (argc < macro->paramc)
2217     {
2218       /* A rest argument is allowed to not appear in the invocation at all.
2219          e.g. #define debug(format, args...) ...
2220          debug("string");
2221          This is exactly the same as if the rest argument had received no
2222          tokens - debug("string",);  This extension is deprecated.  */
2223
2224       if (argc + 1 == macro->paramc && (macro->flags & GNU_REST_ARGS))
2225         {
2226           /* Duplicate the placemarker.  Then we can set its flags and
2227              position and safely be using more than one.  */
2228           save_token (args, duplicate_token (pfile, &placemarker_token));
2229           args->ends[argc] = total + 1;
2230           return 0;
2231         }
2232       else
2233         {
2234           cpp_error (pfile,
2235                      "insufficient arguments in invocation of macro \"%.*s\"",
2236                      hp->length, hp->name);
2237           return 1;
2238         }
2239     }
2240   /* An empty argument to an empty function-like macro is fine.  */
2241   else if (argc > macro->paramc
2242            && !(macro->paramc == 0 && argc == 1 && empty_argument (args, 0)))
2243     {
2244       cpp_error (pfile,
2245                  "too many arguments in invocation of macro \"%.*s\"",
2246                  hp->length, hp->name);
2247       return 1;
2248     }
2249
2250   return 0;
2251 }
2252
2253 /* Adds backslashes before all backslashes and double quotes appearing
2254    in strings.  Non-printable characters are converted to octal.  */
2255 static U_CHAR *
2256 quote_string (dest, src, len)
2257      U_CHAR *dest;
2258      const U_CHAR *src;
2259      unsigned int len;
2260 {
2261   while (len--)
2262     {
2263       U_CHAR c = *src++;
2264
2265       if (c == '\\' || c == '"')
2266         {
2267           *dest++ = '\\';
2268           *dest++ = c;
2269         }
2270       else
2271         {
2272           if (ISPRINT (c))
2273             *dest++ = c;
2274           else
2275             {
2276               sprintf ((char *) dest, "\\%03o", c);
2277               dest += 4;
2278             }
2279         }
2280     }
2281
2282   return dest;
2283 }
2284
2285 /* Allocates a buffer to hold a token's TEXT, and converts TOKEN to a
2286    CPP_STRING token containing TEXT in quoted form.  */
2287 static cpp_token *
2288 make_string_token (token, text, len)
2289      cpp_token *token;
2290      const U_CHAR *text;
2291      unsigned int len;
2292 {
2293   U_CHAR *buf;
2294
2295   buf = (U_CHAR *) xmalloc (len * 4);
2296   token->type = CPP_STRING;
2297   token->flags = 0;
2298   token->val.str.text = buf;
2299   token->val.str.len = quote_string (buf, text, len) - buf;
2300   return token;
2301 }
2302
2303 /* Allocates and converts a temporary token to a CPP_NUMBER token,
2304    evaluating to NUMBER.  */
2305 static cpp_token *
2306 alloc_number_token (pfile, number)
2307      cpp_reader *pfile;
2308      int number;
2309 {
2310   cpp_token *result;
2311   char *buf;
2312
2313   result = get_temp_token (pfile);
2314   buf = xmalloc (20);
2315   sprintf (buf, "%d", number);
2316
2317   result->type = CPP_NUMBER;
2318   result->flags = 0;
2319   result->val.str.text = (U_CHAR *) buf;
2320   result->val.str.len = strlen (buf);
2321   return result;
2322 }
2323
2324 /* Returns a temporary token from the temporary token store of PFILE.  */
2325 static cpp_token *
2326 get_temp_token (pfile)
2327      cpp_reader *pfile;
2328 {
2329   if (pfile->temp_used == pfile->temp_alloced)
2330     {
2331       if (pfile->temp_used == pfile->temp_cap)
2332         {
2333           pfile->temp_cap += pfile->temp_cap + 20;
2334           pfile->temp_tokens = (cpp_token **) xrealloc
2335             (pfile->temp_tokens, pfile->temp_cap * sizeof (cpp_token *));
2336         }
2337       pfile->temp_tokens[pfile->temp_alloced++] = (cpp_token *) xmalloc
2338         (sizeof (cpp_token));
2339     }
2340
2341   return pfile->temp_tokens[pfile->temp_used++];
2342 }
2343
2344 /* Release (not free) for re-use the temporary tokens of PFILE.  */
2345 static void
2346 release_temp_tokens (pfile)
2347      cpp_reader *pfile;
2348 {
2349   while (pfile->temp_used)
2350     {
2351       cpp_token *token = pfile->temp_tokens[--pfile->temp_used];
2352
2353       if (token_spellings[token->type].type == SPELL_STRING)
2354         {
2355           free ((char *) token->val.str.text);
2356           token->val.str.text = 0;
2357         }
2358     }
2359 }
2360
2361 /* Free all of PFILE's dynamically-allocated temporary tokens.  */
2362 void
2363 _cpp_free_temp_tokens (pfile)
2364      cpp_reader *pfile;
2365 {
2366   if (pfile->temp_tokens)
2367     {
2368       /* It is possible, though unlikely (looking for '(' of a funlike
2369          macro into EOF), that we haven't released the tokens yet.  */
2370       release_temp_tokens (pfile);
2371       while (pfile->temp_alloced)
2372         free (pfile->temp_tokens[--pfile->temp_alloced]);
2373       free (pfile->temp_tokens);
2374     }
2375
2376   if (pfile->date)
2377     {
2378       free ((char *) pfile->date->val.str.text);
2379       free (pfile->date);
2380       free ((char *) pfile->time->val.str.text);
2381       free (pfile->time);
2382     }
2383 }
2384
2385 /* Copy TOKEN into a temporary token from PFILE's store.  */
2386 static cpp_token *
2387 duplicate_token (pfile, token)
2388      cpp_reader *pfile;
2389      const cpp_token *token;
2390 {
2391   cpp_token *result = get_temp_token (pfile);
2392
2393   *result = *token;
2394   if (token_spellings[token->type].type == SPELL_STRING)
2395     {
2396       U_CHAR *buff = (U_CHAR *) xmalloc (token->val.str.len);
2397       memcpy (buff, token->val.str.text, token->val.str.len);
2398       result->val.str.text = buff;
2399     }
2400   return result;
2401 }
2402
2403 /* Determine whether two tokens can be pasted together, and if so,
2404    what the resulting token is.  Returns CPP_EOF if the tokens cannot
2405    be pasted, or the appropriate type for the merged token if they
2406    can.  */
2407 static enum cpp_ttype
2408 can_paste (pfile, token1, token2, digraph)
2409      cpp_reader * pfile;
2410      const cpp_token *token1, *token2;
2411      int* digraph;
2412 {
2413   enum cpp_ttype a = token1->type, b = token2->type;
2414   int cxx = CPP_OPTION (pfile, cplusplus);
2415
2416   if (a <= CPP_LAST_EQ && b == CPP_EQ)
2417     return a + (CPP_EQ_EQ - CPP_EQ);
2418
2419   switch (a)
2420     {
2421     case CPP_GREATER:
2422       if (b == a) return CPP_RSHIFT;
2423       if (b == CPP_QUERY && cxx)        return CPP_MAX;
2424       if (b == CPP_GREATER_EQ)  return CPP_RSHIFT_EQ;
2425       break;
2426     case CPP_LESS:
2427       if (b == a) return CPP_LSHIFT;
2428       if (b == CPP_QUERY && cxx)        return CPP_MIN;
2429       if (b == CPP_LESS_EQ)     return CPP_LSHIFT_EQ;
2430       if (CPP_OPTION (pfile, digraphs))
2431         {
2432           if (b == CPP_COLON)
2433             {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
2434           if (b == CPP_MOD)
2435             {*digraph = 1; return CPP_OPEN_BRACE;}      /* <% digraph */
2436         }
2437       break;
2438
2439     case CPP_PLUS: if (b == a)  return CPP_PLUS_PLUS; break;
2440     case CPP_AND:  if (b == a)  return CPP_AND_AND; break;
2441     case CPP_OR:   if (b == a)  return CPP_OR_OR;   break;
2442
2443     case CPP_MINUS:
2444       if (b == a)               return CPP_MINUS_MINUS;
2445       if (b == CPP_GREATER)     return CPP_DEREF;
2446       break;
2447     case CPP_COLON:
2448       if (b == a && cxx)        return CPP_SCOPE;
2449       if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
2450         {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
2451       break;
2452
2453     case CPP_MOD:
2454       if (CPP_OPTION (pfile, digraphs))
2455         {
2456           if (b == CPP_GREATER)
2457             {*digraph = 1; return CPP_CLOSE_BRACE;}  /* %> digraph */
2458           if (b == CPP_COLON)
2459             {*digraph = 1; return CPP_HASH;}         /* %: digraph */
2460         }
2461       break;
2462     case CPP_DEREF:
2463       if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
2464       break;
2465     case CPP_DOT:
2466       if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
2467       if (b == CPP_NUMBER)      return CPP_NUMBER;
2468       break;
2469
2470     case CPP_HASH:
2471       if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
2472         /* %:%: digraph */
2473         {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
2474       break;
2475
2476     case CPP_NAME:
2477       if (b == CPP_NAME)        return CPP_NAME;
2478       if (b == CPP_NUMBER
2479           && is_numstart(token2->val.str.text[0]))       return CPP_NAME;
2480       if (b == CPP_CHAR
2481           && token1->val.node == pfile->spec_nodes->n_L) return CPP_WCHAR;
2482       if (b == CPP_STRING
2483           && token1->val.node == pfile->spec_nodes->n_L) return CPP_WSTRING;
2484       break;
2485
2486     case CPP_NUMBER:
2487       if (b == CPP_NUMBER)      return CPP_NUMBER;
2488       if (b == CPP_NAME)        return CPP_NUMBER;
2489       if (b == CPP_DOT)         return CPP_NUMBER;
2490       /* Numbers cannot have length zero, so this is safe.  */
2491       if ((b == CPP_PLUS || b == CPP_MINUS)
2492           && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
2493         return CPP_NUMBER;
2494       break;
2495
2496     default:
2497       break;
2498     }
2499
2500   return CPP_EOF;
2501 }
2502
2503 /* Check if TOKEN is to be ##-pasted with the token after it.  */
2504 static const cpp_token *
2505 maybe_paste_with_next (pfile, token)
2506      cpp_reader *pfile;
2507      const cpp_token *token;
2508 {
2509   cpp_token *pasted;
2510   const cpp_token *second;
2511   cpp_context *context = CURRENT_CONTEXT (pfile);
2512
2513   /* Is this token on the LHS of ## ? */
2514   if (!((context->flags & CONTEXT_PASTEL) && context->posn == context->count)
2515       && !(token->flags & PASTE_LEFT))
2516     return token;
2517
2518   /* Prevent recursion, and possibly pushing back more than one token.  */
2519   if (pfile->paste_level)
2520     return token;
2521
2522   /* Suppress macro expansion for next token, but don't conflict with
2523      the other method of suppression.  If it is an argument, macro
2524      expansion within the argument will still occur.  */
2525   pfile->paste_level = pfile->cur_context;
2526   second = cpp_get_token (pfile);
2527   pfile->paste_level = 0;
2528
2529   /* Ignore placemarker argument tokens (cannot be from an empty macro
2530      since macros are not expanded).  */
2531   if (token->type == CPP_PLACEMARKER)
2532      pasted = duplicate_token (pfile, second);
2533   else if (second->type == CPP_PLACEMARKER)
2534     {
2535       cpp_context *mac_context = CURRENT_CONTEXT (pfile) - 1;
2536       /* GCC has special extended semantics for a ## b where b is a
2537          varargs parameter: a disappears if b consists of no tokens.
2538          This extension is deprecated.  */
2539       if ((mac_context->u.list->flags & GNU_REST_ARGS)
2540           && (mac_context->u.list->tokens[mac_context->posn - 1].val.aux + 1
2541               == (unsigned) mac_context->u.list->paramc))
2542         {
2543           cpp_warning (pfile, "deprecated GNU ## extension used");
2544           pasted = duplicate_token (pfile, second);
2545         }
2546       else
2547         pasted = duplicate_token (pfile, token);
2548     }
2549   else
2550     {
2551       int digraph = 0;
2552       enum cpp_ttype type = can_paste (pfile, token, second, &digraph);
2553
2554       if (type == CPP_EOF)
2555         {
2556           if (CPP_OPTION (pfile, warn_paste))
2557             cpp_warning (pfile,
2558                          "pasting would not give a valid preprocessing token");
2559           _cpp_push_token (pfile, second);
2560           return token;
2561         }
2562
2563       if (type == CPP_NAME || type == CPP_NUMBER)
2564         {
2565           /* Join spellings.  */
2566           U_CHAR *buf, *end;
2567
2568           pasted = get_temp_token (pfile);
2569           buf = (U_CHAR *) alloca (TOKEN_LEN (token) + TOKEN_LEN (second));
2570           end = spell_token (pfile, token, buf);
2571           end = spell_token (pfile, second, end);
2572           *end = '\0';
2573
2574           if (type == CPP_NAME)
2575             pasted->val.node = cpp_lookup (pfile, buf, end - buf);
2576           else
2577             {
2578               pasted->val.str.text = uxstrdup (buf);
2579               pasted->val.str.len = end - buf;
2580             }
2581         }
2582       else if (type == CPP_WCHAR || type == CPP_WSTRING)
2583         pasted = duplicate_token (pfile, second);
2584       else
2585         {
2586           pasted = get_temp_token (pfile);
2587           pasted->val.integer = 0;
2588         }
2589
2590       pasted->type = type;
2591       pasted->flags = digraph ? DIGRAPH : 0;
2592     }
2593
2594   /* The pasted token gets the whitespace flags and position of the
2595      first token, the PASTE_LEFT flag of the second token, plus the
2596      PASTED flag to indicate it is the result of a paste.  However, we
2597      want to preserve the DIGRAPH flag.  */
2598   pasted->flags &= ~(PREV_WHITE | BOL | PASTE_LEFT);
2599   pasted->flags |= ((token->flags & (PREV_WHITE | BOL))
2600                     | (second->flags & PASTE_LEFT) | PASTED);
2601   pasted->col = token->col;
2602   pasted->line = token->line;
2603
2604   return maybe_paste_with_next (pfile, pasted);
2605 }
2606
2607 /* Convert a token sequence to a single string token according to the
2608    rules of the ISO C #-operator.  */
2609 #define INIT_SIZE 200
2610 static cpp_token *
2611 stringify_arg (pfile, token)
2612      cpp_reader *pfile;
2613      const cpp_token *token;
2614 {
2615   cpp_token *result;
2616   unsigned char *main_buf;
2617   unsigned int prev_value, backslash_count = 0;
2618   unsigned int buf_used = 0, whitespace = 0, buf_cap = INIT_SIZE;
2619
2620   prev_value  = prevent_macro_expansion (pfile);
2621   main_buf = (unsigned char *) xmalloc (buf_cap);
2622
2623   result = get_temp_token (pfile);
2624   ASSIGN_FLAGS_AND_POS (result, token);
2625
2626   for (; (token = cpp_get_token (pfile))->type != CPP_EOF; )
2627     {
2628       int escape;
2629       unsigned char *buf;
2630       unsigned int len = TOKEN_LEN (token);
2631
2632       escape = (token->type == CPP_STRING || token->type == CPP_WSTRING
2633                 || token->type == CPP_CHAR || token->type == CPP_WCHAR);
2634       if (escape)
2635         len *= 4 + 1;
2636
2637       if (buf_used + len > buf_cap)
2638         {
2639           buf_cap = buf_used + len + INIT_SIZE;
2640           main_buf = xrealloc (main_buf, buf_cap);
2641         }
2642
2643       if (whitespace && (token->flags & PREV_WHITE))
2644         main_buf[buf_used++] = ' ';
2645
2646       if (escape)
2647         buf = (unsigned char *) xmalloc (len);
2648       else
2649         buf = main_buf + buf_used;
2650
2651       len = spell_token (pfile, token, buf) - buf;
2652       if (escape)
2653         {
2654           buf_used = quote_string (&main_buf[buf_used], buf, len) - main_buf;
2655           free (buf);
2656         }
2657       else
2658         buf_used += len;
2659
2660       whitespace = 1;
2661       if (token->type == CPP_BACKSLASH)
2662         backslash_count++;
2663       else
2664         backslash_count = 0;
2665     }
2666
2667   /* Ignore the final \ of invalid string literals.  */
2668   if (backslash_count & 1)
2669     {
2670       cpp_warning (pfile, "invalid string literal, ignoring final '\\'");
2671       buf_used--;
2672     }
2673
2674   result->type = CPP_STRING;
2675   result->val.str.text = main_buf;
2676   result->val.str.len = buf_used;
2677   restore_macro_expansion (pfile, prev_value);
2678   return result;
2679 }
2680
2681 /* Allocate more room on the context stack of PFILE.  */
2682 static void
2683 expand_context_stack (pfile)
2684      cpp_reader *pfile;
2685 {
2686   pfile->context_cap += pfile->context_cap + 20;
2687   pfile->contexts = (cpp_context *)
2688     xrealloc (pfile->contexts, pfile->context_cap * sizeof (cpp_context));
2689 }
2690
2691 /* Push the context of macro NODE onto the context stack.  TOKEN is
2692    the CPP_NAME token invoking the macro.  */
2693 static const cpp_token *
2694 push_macro_context (pfile, node, token)
2695      cpp_reader *pfile;
2696      cpp_hashnode *node;
2697      const cpp_token *token;
2698 {
2699   unsigned char orig_flags;
2700   macro_args *args;
2701   cpp_context *context;
2702
2703   if (pfile->cur_context > CPP_STACK_MAX)
2704     {
2705       cpp_error (pfile, "infinite macro recursion invoking '%s'", node->name);
2706       return token;
2707     }
2708
2709   /* Token's flags may change when parsing args containing a nested
2710      invocation of this macro.  */
2711   orig_flags = token->flags & (PREV_WHITE | BOL);
2712   args = 0;
2713   if (node->value.expansion->paramc >= 0)
2714     {
2715       unsigned int error, prev_nme;
2716
2717       /* Allocate room for the argument contexts, and parse them.  */
2718       args  = (macro_args *) xmalloc (sizeof (macro_args));
2719       args->ends = (unsigned int *)
2720         xmalloc (node->value.expansion->paramc * sizeof (unsigned int));
2721       args->tokens = 0;
2722       args->capacity = 0;
2723       args->used = 0;
2724       args->level = pfile->cur_context;
2725
2726       prev_nme = prevent_macro_expansion (pfile);
2727       pfile->args = args;
2728       error = parse_args (pfile, node, args);
2729       pfile->args = 0;
2730       restore_macro_expansion (pfile, prev_nme);
2731       if (error)
2732         {
2733           free_macro_args (args);
2734           return token;
2735         }
2736     }
2737
2738   /* Now push its context.  */
2739   pfile->cur_context++;
2740   if (pfile->cur_context == pfile->context_cap)
2741     expand_context_stack (pfile);
2742
2743   context = CURRENT_CONTEXT (pfile);
2744   context->u.list = node->value.expansion;
2745   context->args = args;
2746   context->posn = 0;
2747   context->count = context->u.list->tokens_used;
2748   context->level = pfile->cur_context;
2749   context->flags = 0;
2750   context->pushed_token = 0;
2751
2752   /* Set the flags of the first token.  We know there must
2753      be one, empty macros are a single placemarker token.  */
2754   MODIFY_FLAGS_AND_POS (&context->u.list->tokens[0], token, orig_flags);
2755
2756   return cpp_get_token (pfile);
2757 }
2758
2759 /* Push an argument to the current macro onto the context stack.
2760    TOKEN is the MACRO_ARG token representing the argument expansion.  */
2761 static const cpp_token *
2762 push_arg_context (pfile, token)
2763      cpp_reader *pfile;
2764      const cpp_token *token;
2765 {
2766   cpp_context *context;
2767   macro_args *args;
2768
2769   pfile->cur_context++;
2770   if (pfile->cur_context == pfile->context_cap)
2771       expand_context_stack (pfile);
2772
2773   context = CURRENT_CONTEXT (pfile);
2774   args = context[-1].args;
2775
2776   context->count = token->val.aux ? args->ends[token->val.aux - 1]: 0;
2777   context->u.arg = args->tokens + context->count;
2778   context->count = args->ends[token->val.aux] - context->count;
2779   context->args = 0;
2780   context->posn = 0;
2781   context->level = args->level;
2782   context->flags = CONTEXT_ARG | CONTEXT_RAW;
2783   context->pushed_token = 0;
2784
2785   /* Set the flags of the first token.  There is one.  */
2786   {
2787     const cpp_token *first = context->u.arg[0];
2788     if (!first)
2789       first = context->u.arg[1];
2790
2791     MODIFY_FLAGS_AND_POS ((cpp_token *) first, token,
2792                           token->flags & (PREV_WHITE | BOL));
2793   }
2794
2795   if (token->flags & STRINGIFY_ARG)
2796     return stringify_arg (pfile, token);
2797
2798   if (token->flags & PASTE_LEFT)
2799     context->flags |= CONTEXT_PASTEL;
2800   if (pfile->paste_level)
2801     context->flags |= CONTEXT_PASTER;
2802
2803   return get_raw_token (pfile);
2804 }
2805
2806 /* "Unget" a token.  It is effectively inserted in the token queue and
2807    will be returned by the next call to get_raw_token.  */
2808 void
2809 _cpp_push_token (pfile, token)
2810      cpp_reader *pfile;
2811      const cpp_token *token;
2812 {
2813   cpp_context *context = CURRENT_CONTEXT (pfile);
2814   if (context->pushed_token)
2815     cpp_ice (pfile, "two tokens pushed in a row");
2816   if (token->type != CPP_EOF)
2817     context->pushed_token = token;
2818   /* Don't push back a directive's CPP_EOF, step back instead.  */
2819   else if (pfile->cur_context == 0)
2820     pfile->contexts[0].posn--;
2821 }
2822
2823 /* Handle a preprocessing directive.  TOKEN is the CPP_HASH token
2824    introducing the directive.  */
2825 static void
2826 process_directive (pfile, token)
2827      cpp_reader *pfile;
2828      const cpp_token *token;
2829 {
2830   const struct directive *d = pfile->token_list.directive;
2831   int prev_nme = 0;
2832
2833   /* Skip over the directive name.  */
2834   if (token[1].type == CPP_NAME)
2835     _cpp_get_raw_token (pfile);
2836   else if (token[1].type != CPP_NUMBER)
2837     cpp_ice (pfile, "directive begins with %s?!",
2838              token_names[token[1].type]);
2839
2840   /* Flush pending tokens at this point, in case the directive produces
2841      output.  XXX Directive output won't be visible to a direct caller of
2842      cpp_get_token.  */
2843   if (pfile->printer && CPP_WRITTEN (pfile) - pfile->printer->written)
2844     cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
2845
2846   if (! (d->flags & EXPAND))
2847     prev_nme = prevent_macro_expansion (pfile);
2848   (void) (*d->handler) (pfile);
2849   if (! (d->flags & EXPAND))
2850     restore_macro_expansion (pfile, prev_nme);
2851   _cpp_skip_rest_of_line (pfile);
2852 }
2853
2854 /* The external interface to return the next token.  All macro
2855    expansion and directive processing is handled internally, the
2856    caller only ever sees the output after preprocessing.  */
2857 const cpp_token *
2858 cpp_get_token (pfile)
2859      cpp_reader *pfile;
2860 {
2861   const cpp_token *token;
2862   cpp_hashnode *node;
2863
2864   /* Loop till we hit a non-directive, non-skipped, non-placemarker token.  */
2865   for (;;)
2866     {
2867       token = get_raw_token (pfile);
2868       if (token->flags & BOL && token->type == CPP_HASH
2869           && pfile->token_list.directive)
2870         {
2871           process_directive (pfile, token);
2872           continue;
2873         }
2874
2875       /* Short circuit EOF. */
2876       if (token->type == CPP_EOF)
2877         return token;
2878
2879       if (pfile->skipping && ! pfile->token_list.directive)
2880         {
2881           _cpp_skip_rest_of_line (pfile);
2882           continue;
2883         }
2884       break;
2885     }
2886
2887   /* If there's a potential control macro and we get here, then that
2888      #ifndef didn't cover the entire file and its argument shouldn't
2889      be taken as a control macro.  */
2890   pfile->potential_control_macro = 0;
2891
2892   token = maybe_paste_with_next (pfile, token);
2893
2894   if (token->type != CPP_NAME)
2895     return token;
2896
2897   /* Is macro expansion disabled in general?  */
2898   if (pfile->no_expand_level == pfile->cur_context || pfile->paste_level)
2899     return token;
2900
2901   node = token->val.node;
2902   if (node->type == T_VOID)
2903     return token;
2904
2905   if (node->type == T_MACRO)
2906     {
2907       if (is_macro_disabled (pfile, node->value.expansion, token))
2908         return token;
2909
2910       return push_macro_context (pfile, node, token);
2911     }
2912   else
2913     return special_symbol (pfile, node, token);
2914 }
2915
2916 /* Returns the next raw token, i.e. without performing macro
2917    expansion.  Argument contexts are automatically entered.  */
2918 static const cpp_token *
2919 get_raw_token (pfile)
2920      cpp_reader *pfile;
2921 {
2922   const cpp_token *result;
2923   cpp_context *context = CURRENT_CONTEXT (pfile);
2924
2925   if (context->pushed_token)
2926     {
2927       result = context->pushed_token;
2928       context->pushed_token = 0;
2929     }
2930   else if (context->posn == context->count)
2931     result = pop_context (pfile);
2932   else
2933     {
2934       if (IS_ARG_CONTEXT (context))
2935         {
2936           result = context->u.arg[context->posn++];
2937           if (result == 0)
2938             {
2939               context->flags ^= CONTEXT_RAW;
2940               result = context->u.arg[context->posn++];
2941             }
2942           return result;        /* Cannot be a CPP_MACRO_ARG */
2943         }
2944       result = &context->u.list->tokens[context->posn++];
2945     }
2946
2947   if (result->type == CPP_MACRO_ARG)
2948     result = push_arg_context (pfile, result);
2949   return result;
2950 }
2951
2952 /* Internal interface to get the token without macro expanding.  */
2953 const cpp_token *
2954 _cpp_get_raw_token (pfile)
2955      cpp_reader *pfile;
2956 {
2957   int prev_nme = prevent_macro_expansion (pfile);
2958   const cpp_token *result = cpp_get_token (pfile);
2959   restore_macro_expansion (pfile, prev_nme);
2960   return result;
2961 }
2962
2963 /* A thin wrapper to lex_line.  CLEAR is non-zero if the current token
2964    list should be overwritten, or zero if we need to append
2965    (typically, if we are within the arguments to a macro, or looking
2966    for the '(' to start a function-like macro invocation).  */
2967 static int
2968 lex_next (pfile, clear)
2969      cpp_reader *pfile;
2970      int clear;
2971 {
2972   cpp_toklist *list = &pfile->token_list;
2973   const cpp_token *old_list = list->tokens;
2974   unsigned int old_used = list->tokens_used;
2975
2976   /* If we are currently processing a directive, do not advance.  6.10
2977      paragraph 2: A new-line character ends the directive even if it
2978      occurs within what would otherwise be an invocation of a
2979      function-like macro.
2980
2981      It is possible that clear == 1 too; e.g. "#if funlike_macro ("
2982      since parse_args swallowed the directive's EOF.  */
2983   if (list->directive)
2984     return 1;
2985
2986   if (clear)
2987     {
2988       /* Release all temporary tokens.  */
2989       _cpp_clear_toklist (list);
2990       pfile->contexts[0].posn = 0;
2991       if (pfile->temp_used)
2992         release_temp_tokens (pfile);
2993     }
2994
2995   lex_line (pfile, list);
2996   pfile->contexts[0].count = list->tokens_used;
2997
2998   if (!clear && pfile->args)
2999     {
3000       /* Fix up argument token pointers.  */
3001       if (old_list != list->tokens)
3002         {
3003           unsigned int i;
3004
3005           for (i = 0; i < pfile->args->used; i++)
3006             {
3007               const cpp_token *token = pfile->args->tokens[i];
3008               if (token >= old_list && token < old_list + old_used)
3009                 pfile->args->tokens[i] = (const cpp_token *)
3010                 ((char *) token + ((char *) list->tokens - (char *) old_list));
3011             }
3012         }
3013
3014       /* 6.10.3 paragraph 11: If there are sequences of preprocessing
3015          tokens within the list of arguments that would otherwise act as
3016          preprocessing directives, the behavior is undefined.
3017
3018          This implementation will report a hard error and treat the
3019          'sequence of preprocessing tokens' as part of the macro argument,
3020          not a directive.
3021
3022          Note if pfile->args == 0, we're OK since we're only inside a
3023          macro argument after a '('.  */
3024       if (list->directive)
3025         {
3026           cpp_error_with_line (pfile, list->tokens[old_used].line,
3027                                list->tokens[old_used].col,
3028                                "#%s may not be used inside a macro argument",
3029                                list->directive->name);
3030           return 1;
3031         }
3032     }
3033
3034   return 0;
3035 }
3036
3037 /* Pops a context of the context stack.  If we're at the bottom, lexes
3038    the next logical line.  Returns 1 if we're at the end of the
3039    argument list to the # operator, or if it is illegal to "overflow"
3040    into the rest of the file (e.g. 6.10.3.1.1).  */
3041 static int
3042 do_pop_context (pfile)
3043      cpp_reader *pfile;
3044 {
3045   cpp_context *context;
3046
3047   if (pfile->cur_context == 0)
3048     return lex_next (pfile, pfile->no_expand_level == UINT_MAX);
3049
3050   /* Argument contexts, when parsing args or handling # operator
3051      return CPP_EOF at the end.  */
3052   context = CURRENT_CONTEXT (pfile);
3053   if (IS_ARG_CONTEXT (context) && pfile->cur_context == pfile->no_expand_level)
3054     return 1;
3055
3056   /* Free resources when leaving macro contexts.  */
3057   if (context->args)
3058     free_macro_args (context->args);
3059
3060   if (pfile->cur_context == pfile->no_expand_level)
3061     pfile->no_expand_level--;
3062   pfile->cur_context--;
3063
3064   return 0;
3065 }
3066
3067 /* Move down the context stack, and return the next raw token.  */
3068 static const cpp_token *
3069 pop_context (pfile)
3070      cpp_reader *pfile;
3071 {
3072   if (do_pop_context (pfile))
3073     return &eof_token;
3074   return get_raw_token (pfile);
3075 }
3076
3077 /* Turn off macro expansion at the current context level.  */
3078 static unsigned int
3079 prevent_macro_expansion (pfile)
3080      cpp_reader *pfile;
3081 {
3082   unsigned int prev_value = pfile->no_expand_level;
3083   pfile->no_expand_level = pfile->cur_context;
3084   return prev_value;
3085 }
3086
3087 /* Restore macro expansion to its previous state.  */
3088 static void
3089 restore_macro_expansion (pfile, prev_value)
3090      cpp_reader *pfile;
3091      unsigned int prev_value;
3092 {
3093   pfile->no_expand_level = prev_value;
3094 }
3095
3096 /* Used by cpperror.c to obtain the correct line and column to report
3097    in a diagnostic.  */
3098 unsigned int
3099 _cpp_get_line (pfile, pcol)
3100      cpp_reader *pfile;
3101      unsigned int *pcol;
3102 {
3103   unsigned int index;
3104   const cpp_token *cur_token;
3105
3106   if (pfile->in_lex_line)
3107     index = pfile->token_list.tokens_used;
3108   else
3109     index = pfile->contexts[0].posn;
3110
3111   cur_token = &pfile->token_list.tokens[index - 1];
3112   if (pcol)
3113     *pcol = cur_token->col;
3114   return cur_token->line;
3115 }
3116
3117 #define DSC(str) (const U_CHAR *)str, sizeof str - 1
3118 static const char * const monthnames[] =
3119 {
3120   "Jan", "Feb", "Mar", "Apr", "May", "Jun",
3121   "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
3122 };
3123
3124 /* Handle builtin macros like __FILE__.  */
3125 static const cpp_token *
3126 special_symbol (pfile, node, token)
3127      cpp_reader *pfile;
3128      cpp_hashnode *node;
3129      const cpp_token *token;
3130 {
3131   cpp_token *result;
3132   cpp_buffer *ip;
3133
3134   switch (node->type)
3135     {
3136     case T_FILE:
3137     case T_BASE_FILE:
3138       {
3139         const char *file;
3140
3141         ip = CPP_BUFFER (pfile);
3142         if (ip == 0)
3143           file = "";
3144         else
3145           {
3146             if (node->type == T_BASE_FILE)
3147               while (CPP_PREV_BUFFER (ip) != NULL)
3148                 ip = CPP_PREV_BUFFER (ip);
3149
3150             file = ip->nominal_fname;
3151           }
3152         result = make_string_token (get_temp_token (pfile), (U_CHAR *) file,
3153                                     strlen (file));
3154       }
3155       break;
3156
3157     case T_INCLUDE_LEVEL:
3158       {
3159         int true_indepth = 0;
3160
3161         /* Do not count the primary source file in the include level.  */
3162         ip = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
3163         while (ip)
3164           {
3165             true_indepth++;
3166             ip = CPP_PREV_BUFFER (ip);
3167           }
3168         result = alloc_number_token (pfile, true_indepth);
3169       }
3170       break;
3171
3172     case T_SPECLINE:
3173       /* If __LINE__ is embedded in a macro, it must expand to the
3174          line of the macro's invocation, not its definition.
3175          Otherwise things like assert() will not work properly.  */
3176       result = alloc_number_token (pfile, _cpp_get_line (pfile, NULL));
3177       break;
3178
3179     case T_STDC:
3180       {
3181         int stdc = 1;
3182
3183 #ifdef STDC_0_IN_SYSTEM_HEADERS
3184         if (CPP_IN_SYSTEM_HEADER (pfile)
3185             && pfile->spec_nodes->n__STRICT_ANSI__->type == T_VOID)
3186           stdc = 0;
3187 #endif
3188         result = alloc_number_token (pfile, stdc);
3189       }
3190       break;
3191
3192     case T_DATE:
3193     case T_TIME:
3194       if (pfile->date == 0)
3195         {
3196           /* Allocate __DATE__ and __TIME__ from permanent storage,
3197              and save them in pfile so we don't have to do this again.
3198              We don't generate these strings at init time because
3199              time() and localtime() are very slow on some systems.  */
3200           time_t tt = time (NULL);
3201           struct tm *tb = localtime (&tt);
3202
3203           pfile->date = make_string_token
3204             ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("Oct 11 1347"));
3205           pfile->time = make_string_token
3206             ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("12:34:56"));
3207
3208           sprintf ((char *) pfile->date->val.str.text, "%s %2d %4d",
3209                    monthnames[tb->tm_mon], tb->tm_mday, tb->tm_year + 1900);
3210           sprintf ((char *) pfile->time->val.str.text, "%02d:%02d:%02d",
3211                    tb->tm_hour, tb->tm_min, tb->tm_sec);
3212         }
3213       result = node->type == T_DATE ? pfile->date: pfile->time;
3214       break;
3215
3216     case T_POISON:
3217       cpp_error (pfile, "attempt to use poisoned \"%s\"", node->name);
3218       return token;
3219
3220     default:
3221       cpp_ice (pfile, "invalid special hash type");
3222       return token;
3223     }
3224
3225   ASSIGN_FLAGS_AND_POS (result, token);
3226   return result;
3227 }
3228 #undef DSC
3229
3230 /* Dump the original user's spelling of argument index ARG_NO to the
3231    macro whose expansion is LIST.  */
3232 static void
3233 dump_param_spelling (pfile, list, arg_no)
3234      cpp_reader *pfile;
3235      const cpp_toklist *list;
3236      unsigned int arg_no;
3237 {
3238   const U_CHAR *param = list->namebuf;
3239
3240   while (arg_no--)
3241     param += ustrlen (param) + 1;
3242   CPP_PUTS (pfile, param, ustrlen (param));
3243 }
3244
3245 /* Dump a token list to the output.  */
3246 void
3247 _cpp_dump_list (pfile, list, token, flush)
3248      cpp_reader *pfile;
3249      const cpp_toklist *list;
3250      const cpp_token *token;
3251      int flush;
3252 {
3253   const cpp_token *limit = list->tokens + list->tokens_used;
3254   const cpp_token *prev = 0;
3255
3256   /* Avoid the CPP_EOF.  */
3257   if (list->directive)
3258     limit--;
3259
3260   while (token < limit)
3261     {
3262       if (token->type == CPP_MACRO_ARG)
3263         {
3264           if (token->flags & PREV_WHITE)
3265             CPP_PUTC (pfile, ' ');
3266           if (token->flags & STRINGIFY_ARG)
3267             CPP_PUTC (pfile, '#');
3268           dump_param_spelling (pfile, list, token->val.aux);
3269         }
3270       else
3271         output_token (pfile, token, prev);
3272       if (token->flags & PASTE_LEFT)
3273         CPP_PUTS (pfile, " ##", 3);
3274       prev = token;
3275       token++;
3276     }
3277
3278   if (flush && pfile->printer)
3279     cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
3280 }
3281
3282 /* Allocate pfile->input_buffer, and initialize trigraph_map[]
3283    if it hasn't happened already.  */
3284
3285 void
3286 _cpp_init_input_buffer (pfile)
3287      cpp_reader *pfile;
3288 {
3289   init_trigraph_map ();
3290   pfile->context_cap = 20;
3291   pfile->contexts = (cpp_context *)
3292     xmalloc (pfile->context_cap * sizeof (cpp_context));
3293   pfile->cur_context = 0;
3294   pfile->contexts[0].u.list = &pfile->token_list;
3295
3296   pfile->contexts[0].posn = 0;
3297   pfile->contexts[0].count = 0;
3298   pfile->no_expand_level = UINT_MAX;
3299
3300   _cpp_init_toklist (&pfile->token_list, DUMMY_TOKEN);
3301 }
3302
3303 /* Moves to the end of the directive line, popping contexts as
3304    necessary.  */
3305 void
3306 _cpp_skip_rest_of_line (pfile)
3307      cpp_reader *pfile;
3308 {
3309   /* Get to base context.  Clear parsing args and each contexts flags,
3310      since these can cause pop_context to return without popping.  */
3311   pfile->no_expand_level = UINT_MAX;
3312   while (pfile->cur_context != 0)
3313     {
3314       pfile->contexts[pfile->cur_context].flags = 0;
3315       do_pop_context (pfile);
3316     }
3317
3318   pfile->contexts[pfile->cur_context].count = 0;
3319   pfile->contexts[pfile->cur_context].posn = 0;
3320   pfile->token_list.directive = 0;
3321 }
3322
3323 /* Directive handler wrapper used by the command line option
3324    processor.  */
3325 void
3326 _cpp_run_directive (pfile, dir, buf, count)
3327      cpp_reader *pfile;
3328      const struct directive *dir;
3329      const char *buf;
3330      size_t count;
3331 {
3332   if (cpp_push_buffer (pfile, (const U_CHAR *)buf, count) != NULL)
3333     {
3334       unsigned int prev_lvl = 0;
3335       /* scan the line now, else prevent_macro_expansion won't work */
3336       do_pop_context (pfile);
3337       if (! (dir->flags & EXPAND))
3338         prev_lvl = prevent_macro_expansion (pfile);
3339
3340       (void) (*dir->handler) (pfile);
3341
3342       if (! (dir->flags & EXPAND))
3343         restore_macro_expansion (pfile, prev_lvl);
3344
3345       _cpp_skip_rest_of_line (pfile);
3346       cpp_pop_buffer (pfile);
3347     }
3348 }