gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 /*
  24
  25 Cleanups to do:-
  26
  27 o Fix ISTABLE to flag the parts we want for IS_HSPACE and IS_NEWLINE.
  28 o Get use of digraphs in sync with the standard reqd on the command line.
  29 o -dM and with _cpp_dump_list: too many \n output.
  30 o Put a printer object in cpp_reader?
  31 o Check line numbers assigned to all errors.
  32 o Replace strncmp with memcmp almost everywhere.
  33 o lex_line's use of cur_token, flags and list->token_used is a bit opaque.
  34 o Get rid of cpp_get_directive_token.
  35 o Convert do_ functions to return void.  Kaveh thinks its OK; and said he'll
  36   give it a run when we've got some code.
  37 o _cpp_parse_expr updated to new lexer.
  38 o Distinguish integers, floats, and 'other' pp-numbers.
  39 o Store ints and char constants as binary values.
  40 o New command-line assertion syntax.
  41 o Merge hash table text pointer and token list text pointer for identifiers.
  42 o Have _cpp_parse_expr use all the information the new lexer provides.
  43 o Work towards functions in cpperror.c taking a message level parameter.
  44   If we do this, merge the common code of do_warning and do_error.
  45 o Comment all functions, and describe macro expansion algorithm.
  46 o Move as much out of header files as possible.
  47 o Remove single quote pairs `', and some '', from diagnostics.
  48 o Correct pastability test for CPP_NAME and CPP_NUMBER.
  49
  50 */
  51
  52 #include "config.h"
  53 #include "system.h"
  54 #include "intl.h"
  55 #include "cpplib.h"
  56 #include "cpphash.h"
  57 #include "symcat.h"
  58
  59 #define auto_expand_name_space(list) \
  60     _cpp_expand_name_space ((list), 1 + (list)->name_cap / 2)
  61 static void safe_fwrite         PARAMS ((cpp_reader *, const U_CHAR *,
  62                                          size_t, FILE *));
  63 static void dump_param_spelling PARAMS ((cpp_reader *, const cpp_toklist *,
  64                                          unsigned int));
  65 static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
  66                                          unsigned int));
  67
  68 static void process_directive   PARAMS ((cpp_reader *, const cpp_token *));
  69 static unsigned char *trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
  70                                                 unsigned char *));
  71 static const unsigned char *backslash_start PARAMS ((cpp_reader *,
  72                                                      const unsigned char *));
  73 static int skip_block_comment PARAMS ((cpp_reader *));
  74 static int skip_line_comment PARAMS ((cpp_reader *));
  75 static void skip_whitespace PARAMS ((cpp_reader *, int));
  76 static void parse_name PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
  77 static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
  78 static void parse_string PARAMS ((cpp_reader *, cpp_toklist *, cpp_token *,
  79                                   unsigned int));
  80 static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
  81 static void save_comment PARAMS ((cpp_toklist *, cpp_token *,
  82                                   const unsigned char *,
  83                                   unsigned int, unsigned int));
  84 static void lex_line PARAMS ((cpp_reader *, cpp_toklist *));
  85 static int lex_next PARAMS ((cpp_reader *, int));
  86 static int is_macro_disabled PARAMS ((cpp_reader *, const cpp_toklist *,
  87                                       const cpp_token *));
  88
  89 static cpp_token *stringify_arg PARAMS ((cpp_reader *, const cpp_token *));
  90 static void expand_context_stack PARAMS ((cpp_reader *));
  91 static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
  92                                             unsigned char *));
  93 static void output_token PARAMS ((cpp_reader *, const cpp_token *,
  94                                   const cpp_token *));
  95 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
  96                                           cpp_token *));
  97 static cpp_token *make_string_token PARAMS ((cpp_token *, const U_CHAR *,
  98                                             unsigned int));
  99 static cpp_token *alloc_number_token PARAMS ((cpp_reader *, int number));
 100 static const cpp_token *special_symbol PARAMS ((cpp_reader *, cpp_hashnode *,
 101                                                 const cpp_token *));
 102 static cpp_token *duplicate_token PARAMS ((cpp_reader *, const cpp_token *));
 103 static const cpp_token *maybe_paste_with_next PARAMS ((cpp_reader *,
 104                                                        const cpp_token *));
 105 static enum cpp_ttype can_paste PARAMS ((cpp_reader *, const cpp_token *,
 106                                          const cpp_token *, int *));
 107 static unsigned int prevent_macro_expansion     PARAMS ((cpp_reader *));
 108 static void restore_macro_expansion     PARAMS ((cpp_reader *, unsigned int));
 109 static cpp_token *get_temp_token        PARAMS ((cpp_reader *));
 110 static void release_temp_tokens         PARAMS ((cpp_reader *));
 111 static U_CHAR * quote_string PARAMS ((U_CHAR *, const U_CHAR *, unsigned int));
 112 static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
 113
 114 #define INIT_TOKEN_NAME(list, token) \
 115   do {(token)->val.name.len = 0; \
 116       (token)->val.name.text = (list)->namebuf + (list)->name_used; \
 117   } while (0)
 118
 119 #define VALID_SIGN(c, prevc) \
 120   (((c) == '+' || (c) == '-') && \
 121    ((prevc) == 'e' || (prevc) == 'E' \
 122     || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
 123
 124 /* Maybe put these in the ISTABLE eventually.  */
 125 #define IS_HSPACE(c) ((c) == ' ' || (c) == '\t')
 126 #define IS_NEWLINE(c) ((c) == '\n' || (c) == '\r')
 127
 128 /* Handle LF, CR, CR-LF and LF-CR style newlines.  Assumes next
 129    character, if any, is in buffer.  */
 130
 131 #define handle_newline(cur, limit, c) \
 132  do { \
 133   if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
 134     (cur)++; \
 135   pfile->buffer->lineno++; \
 136   pfile->buffer->line_base = (cur); \
 137   pfile->col_adjust = 0; \
 138  } while (0)
 139
 140 #define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITE))
 141 #define PREV_TOKEN_TYPE (cur_token[-1].type)
 142
 143 #define PUSH_TOKEN(ttype) cur_token++->type = ttype
 144 #define REVISE_TOKEN(ttype) cur_token[-1].type = ttype
 145 #define BACKUP_TOKEN(ttype) (--cur_token)->type = ttype
 146 #define BACKUP_DIGRAPH(ttype) do { \
 147   BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
 148
 149 /* An upper bound on the number of bytes needed to spell a token,
 150    including preceding whitespace.  */
 151 #define TOKEN_LEN(token) (5 + (token_spellings[(token)->type].type > \
 152                                SPELL_NONE ? (token)->val.name.len: 0))
 153
 154 #define T(e, s) {SPELL_OPERATOR, (const U_CHAR *) s},
 155 #define I(e, s) {SPELL_IDENT, s},
 156 #define S(e, s) {SPELL_STRING, s},
 157 #define C(e, s) {SPELL_CHAR, s},
 158 #define N(e, s) {SPELL_NONE, s},
 159
 160 const struct token_spelling
 161 token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
 162
 163 #undef T
 164 #undef I
 165 #undef S
 166 #undef C
 167 #undef N
 168
 169 /* For debugging: the internal names of the tokens.  */
 170 #define T(e, s) U STRINGX(e) + 4,
 171 #define I(e, s) U STRINGX(e) + 4,
 172 #define S(e, s) U STRINGX(e) + 4,
 173 #define C(e, s) U STRINGX(e) + 4,
 174 #define N(e, s) U STRINGX(e) + 4,
 175
 176 const U_CHAR *const token_names[N_TTYPES] = { TTYPE_TABLE };
 177
 178 #undef T
 179 #undef I
 180 #undef S
 181 #undef C
 182 #undef N
 183
 184 /* The following table is used by trigraph_ok/trigraph_replace.  If we
 185    have designated initializers, it can be constant data; otherwise,
 186    it is set up at runtime by _cpp_init_input_buffer.  */
 187
 188 #if (GCC_VERSION >= 2007)
 189 #define init_trigraph_map()  /* nothing */
 190 #define TRIGRAPH_MAP \
 191 __extension__ static const U_CHAR trigraph_map[UCHAR_MAX + 1] = {
 192 #define END };
 193 #define s(p, v) [p] = v,
 194 #else
 195 #define TRIGRAPH_MAP static U_CHAR trigraph_map[UCHAR_MAX + 1] = { 0 }; \
 196  static void init_trigraph_map PARAMS ((void)) { \
 197  unsigned char *x = trigraph_map;
 198 #define END }
 199 #define s(p, v) x[p] = v;
 200 #endif
 201
 202 TRIGRAPH_MAP
 203   s('=', '#')   s(')', ']')     s('!', '|')
 204   s('(', '[')   s('\'', '^')    s('>', '}')
 205   s('/', '\\')  s('<', '{')     s('-', '~')
 206 END
 207
 208 #undef TRIGRAPH_MAP
 209 #undef END
 210 #undef s
 211
 212 /* Re-allocates PFILE->token_buffer so it will hold at least N more chars.  */
 213
 214 void
 215 _cpp_grow_token_buffer (pfile, n)
 216      cpp_reader *pfile;
 217      long n;
 218 {
 219   long old_written = CPP_WRITTEN (pfile);
 220   pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
 221   pfile->token_buffer = (U_CHAR *)
 222     xrealloc(pfile->token_buffer, pfile->token_buffer_size);
 223   CPP_SET_WRITTEN (pfile, old_written);
 224 }
 225
 226 /* Deal with the annoying semantics of fwrite.  */
 227 static void
 228 safe_fwrite (pfile, buf, len, fp)
 229      cpp_reader *pfile;
 230      const U_CHAR *buf;
 231      size_t len;
 232      FILE *fp;
 233 {
 234   size_t count;
 235
 236   while (len)
 237     {
 238       count = fwrite (buf, 1, len, fp);
 239       if (count == 0)
 240         goto error;
 241       len -= count;
 242       buf += count;
 243     }
 244   return;
 245
 246  error:
 247   cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
 248 }
 249
 250 /* Notify the compiler proper that the current line number has jumped,
 251    or the current file name has changed.  */
 252
 253 static void
 254 output_line_command (pfile, print, line)
 255      cpp_reader *pfile;
 256      cpp_printer *print;
 257      unsigned int line;
 258 {
 259   cpp_buffer *ip = CPP_BUFFER (pfile);
 260   enum { same = 0, enter, leave, rname } change;
 261   static const char * const codes[] = { "", " 1", " 2", "" };
 262
 263   if (line == 0)
 264     return;
 265
 266   /* End the previous line of text.  */
 267   if (pfile->need_newline)
 268     putc ('\n', print->outf);
 269   pfile->need_newline = 0;
 270
 271   if (CPP_OPTION (pfile, no_line_commands))
 272     return;
 273
 274   /* If ip is null, we've been called from cpp_finish, and they just
 275      needed the final flush and trailing newline.  */
 276   if (!ip)
 277     return;
 278
 279   if (pfile->include_depth == print->last_id)
 280     {
 281       /* Determine whether the current filename has changed, and if so,
 282          how.  'nominal_fname' values are unique, so they can be compared
 283          by comparing pointers.  */
 284       if (ip->nominal_fname == print->last_fname)
 285         change = same;
 286       else
 287         change = rname;
 288     }
 289   else
 290     {
 291       if (pfile->include_depth > print->last_id)
 292         change = enter;
 293       else
 294         change = leave;
 295       print->last_id = pfile->include_depth;
 296     }
 297   print->last_fname = ip->nominal_fname;
 298
 299   /* If the current file has not changed, we can output a few newlines
 300      instead if we want to increase the line number by a small amount.
 301      We cannot do this if print->lineno is zero, because that means we
 302      haven't output any line commands yet.  (The very first line
 303      command output is a `same_file' command.)  */
 304   if (change == same && print->lineno > 0
 305       && line >= print->lineno && line < print->lineno + 8)
 306     {
 307       while (line > print->lineno)
 308         {
 309           putc ('\n', print->outf);
 310           print->lineno++;
 311         }
 312       return;
 313     }
 314
 315 #ifndef NO_IMPLICIT_EXTERN_C
 316   if (CPP_OPTION (pfile, cplusplus))
 317     fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
 318              codes[change],
 319              ip->inc->sysp ? " 3" : "",
 320              (ip->inc->sysp == 2) ? " 4" : "");
 321   else
 322 #endif
 323     fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
 324              codes[change],
 325              ip->inc->sysp ? " 3" : "");
 326   print->lineno = line;
 327 }
 328
 329 /* Write the contents of the token_buffer to the output stream, and
 330    clear the token_buffer.  Also handles generating line commands and
 331    keeping track of file transitions.  */
 332
 333 void
 334 cpp_output_tokens (pfile, print, line)
 335      cpp_reader *pfile;
 336      cpp_printer *print;
 337      unsigned int line;
 338 {
 339   if (CPP_WRITTEN (pfile) - print->written)
 340     {
 341       safe_fwrite (pfile, pfile->token_buffer,
 342                    CPP_WRITTEN (pfile) - print->written, print->outf);
 343       pfile->need_newline = 1;
 344       if (print->lineno)
 345         print->lineno++;
 346
 347       CPP_SET_WRITTEN (pfile, print->written);
 348     }
 349   output_line_command (pfile, print, line);
 350 }
 351
 352 /* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output.  */
 353
 354 void
 355 cpp_scan_buffer_nooutput (pfile)
 356      cpp_reader *pfile;
 357 {
 358   unsigned int old_written = CPP_WRITTEN (pfile);
 359   cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
 360
 361   for (;;)
 362     {
 363       /* In no-output mode, we can ignore everything but directives.  */
 364       const cpp_token *token = cpp_get_token (pfile);
 365       if (token->type == CPP_EOF)
 366         {
 367           cpp_pop_buffer (pfile);
 368           if (CPP_BUFFER (pfile) == stop)
 369             break;
 370         }
 371       _cpp_skip_rest_of_line (pfile);
 372     }
 373   CPP_SET_WRITTEN (pfile, old_written);
 374 }
 375
 376 /* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT.  */
 377
 378 void
 379 cpp_scan_buffer (pfile, print)
 380      cpp_reader *pfile;
 381      cpp_printer *print;
 382 {
 383   cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
 384   const cpp_token *token, *prev = 0;
 385
 386   for (;;)
 387     {
 388       token = cpp_get_token (pfile);
 389       if (token->type == CPP_EOF)
 390         {
 391           cpp_pop_buffer (pfile);
 392           if (CPP_BUFFER (pfile) == stop)
 393             return;
 394           cpp_output_tokens (pfile, print, CPP_BUF_LINE (CPP_BUFFER (pfile)));
 395           prev = 0;
 396           continue;
 397         }
 398
 399       if (token->flags & BOL)
 400         {
 401           cpp_output_tokens (pfile, print, pfile->token_list.line);
 402           prev = 0;
 403         }
 404
 405       output_token (pfile, token, prev);
 406       prev = token;
 407     }
 408 }
 409
 410 /* Helper routine used by parse_include, which can't see spell_token.
 411    Reinterpret the current line as an h-char-sequence (< ... >); we are
 412    looking at the first token after the <.  */
 413 const cpp_token *
 414 _cpp_glue_header_name (pfile)
 415      cpp_reader *pfile;
 416 {
 417   unsigned int written = CPP_WRITTEN (pfile);
 418   const cpp_token *t;
 419   cpp_token *hdr;
 420   U_CHAR *buf;
 421   size_t len;
 422
 423   for (;;)
 424     {
 425       t = cpp_get_token (pfile);
 426       if (t->type == CPP_GREATER || t->type == CPP_EOF)
 427         break;
 428
 429       CPP_RESERVE (pfile, TOKEN_LEN (t));
 430       if (t->flags & PREV_WHITE)
 431         CPP_PUTC_Q (pfile, ' ');
 432       pfile->limit = spell_token (pfile, t, pfile->limit);
 433     }
 434
 435   if (t->type == CPP_EOF)
 436     cpp_error (pfile, "missing terminating > character");
 437
 438   len = CPP_WRITTEN (pfile) - written;
 439   buf = xmalloc (len);
 440   memcpy (buf, pfile->token_buffer + written, len);
 441   CPP_SET_WRITTEN (pfile, written);
 442
 443   hdr = get_temp_token (pfile);
 444   hdr->type = CPP_HEADER_NAME;
 445   hdr->flags = 0;
 446   hdr->val.name.text = buf;
 447   hdr->val.name.len = len;
 448   return hdr;
 449 }
 450
 451 /* Token-buffer helper functions.  */
 452
 453 /* Expand a token list's string space. It is *vital* that
 454    list->tokens_used is correct, to get pointer fix-up right.  */
 455 void
 456 _cpp_expand_name_space (list, len)
 457      cpp_toklist *list;
 458      unsigned int len;
 459 {
 460   const U_CHAR *old_namebuf;
 461
 462   old_namebuf = list->namebuf;
 463   list->name_cap += len;
 464   list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
 465
 466   /* Fix up token text pointers.  */
 467   if (list->namebuf != old_namebuf)
 468     {
 469       unsigned int i;
 470
 471       for (i = 0; i < list->tokens_used; i++)
 472         if (token_spellings[list->tokens[i].type].type > SPELL_NONE)
 473           list->tokens[i].val.name.text += (list->namebuf - old_namebuf);
 474     }
 475 }
 476
 477 /* If there is not enough room for LEN more characters, expand the
 478    list by just enough to have room for LEN characters.  */
 479 void
 480 _cpp_reserve_name_space (list, len)
 481      cpp_toklist *list;
 482      unsigned int len;
 483 {
 484   unsigned int room = list->name_cap - list->name_used;
 485
 486   if (room < len)
 487     _cpp_expand_name_space (list, len - room);
 488 }
 489
 490 /* Expand the number of tokens in a list.  */
 491 void
 492 _cpp_expand_token_space (list, count)
 493      cpp_toklist *list;
 494      unsigned int count;
 495 {
 496   unsigned int n;
 497
 498   list->tokens_cap += count;
 499   n = list->tokens_cap;
 500   if (list->flags & LIST_OFFSET)
 501     list->tokens--, n++;
 502   list->tokens = (cpp_token *)
 503     xrealloc (list->tokens, n * sizeof (cpp_token));
 504   if (list->flags & LIST_OFFSET)
 505     list->tokens++;             /* Skip the dummy.  */
 506 }
 507
 508 /* Initialize a token list.  If flags is DUMMY_TOKEN, we allocate
 509    an extra token in front of the token list, as this allows the lexer
 510    to always peek at the previous token without worrying about
 511    underflowing the list, and some initial space.  Otherwise, no
 512    token- or name-space is allocated, and there is no dummy token.  */
 513 void
 514 _cpp_init_toklist (list, flags)
 515      cpp_toklist *list;
 516      int flags;
 517 {
 518   if (flags == NO_DUMMY_TOKEN)
 519     {
 520       list->tokens_cap = 0;
 521       list->tokens = 0;
 522       list->name_cap = 0;
 523       list->namebuf = 0;
 524       list->flags = 0;
 525     }
 526   else
 527     {
 528       /* Initialize token space.  Put a dummy token before the start
 529          that will fail matches.  */
 530       list->tokens_cap = 256;   /* 4K's worth.  */
 531       list->tokens = (cpp_token *)
 532         xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
 533       list->tokens[0].type = CPP_EOF;
 534       list->tokens++;
 535
 536       /* Initialize name space.  */
 537       list->name_cap = 1024;
 538       list->namebuf = (unsigned char *) xmalloc (list->name_cap);
 539       list->flags = LIST_OFFSET;
 540     }
 541
 542   _cpp_clear_toklist (list);
 543 }
 544
 545 /* Clear a token list.  */
 546 void
 547 _cpp_clear_toklist (list)
 548      cpp_toklist *list;
 549 {
 550   list->tokens_used = 0;
 551   list->name_used = 0;
 552   list->directive = 0;
 553   list->paramc = 0;
 554   list->params_len = 0;
 555   list->flags &= LIST_OFFSET;  /* clear all but that one */
 556 }
 557
 558 /* Free a token list.  Does not free the list itself, which may be
 559    embedded in a larger structure.  */
 560 void
 561 _cpp_free_toklist (list)
 562      const cpp_toklist *list;
 563 {
 564   if (list->flags & LIST_OFFSET)
 565     free (list->tokens - 1);    /* Backup over dummy token.  */
 566   else
 567     free (list->tokens);
 568   free (list->namebuf);
 569 }
 570
 571 /* Compare two tokens.  */
 572 int
 573 _cpp_equiv_tokens (a, b)
 574      const cpp_token *a, *b;
 575 {
 576   if (a->type == b->type && a->flags == b->flags)
 577     switch (token_spellings[a->type].type)
 578       {
 579       default:                  /* Keep compiler happy.  */
 580       case SPELL_OPERATOR:
 581         return 1;
 582       case SPELL_CHAR:
 583       case SPELL_NONE:
 584         return a->val.aux == b->val.aux; /* arg_no or character.  */
 585       case SPELL_IDENT:
 586       case SPELL_STRING:
 587         return (a->val.name.len == b->val.name.len
 588                 && !memcmp (a->val.name.text, b->val.name.text,
 589                             a->val.name.len));
 590       }
 591
 592   return 0;
 593 }
 594
 595 /* Compare two token lists.  */
 596 int
 597 _cpp_equiv_toklists (a, b)
 598      const cpp_toklist *a, *b;
 599 {
 600   unsigned int i;
 601
 602   if (a->tokens_used != b->tokens_used
 603       || a->flags != b->flags
 604       || a->paramc != b->paramc)
 605     return 0;
 606
 607   for (i = 0; i < a->tokens_used; i++)
 608     if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
 609       return 0;
 610   return 1;
 611 }
 612
 613 /* Utility routine:
 614    Compares, in the manner of strcmp(3), the token beginning at TOKEN
 615    and extending for LEN characters to the NUL-terminated string
 616    STRING.  Typical usage:
 617
 618    if (! cpp_idcmp (pfile->token_buffer + here, CPP_WRITTEN (pfile) - here,
 619                  "inline"))
 620      { ... }
 621  */
 622
 623 int
 624 cpp_idcmp (token, len, string)
 625      const U_CHAR *token;
 626      size_t len;
 627      const char *string;
 628 {
 629   size_t len2 = strlen (string);
 630   int r;
 631
 632   if ((r = memcmp (token, string, MIN (len, len2))))
 633     return r;
 634
 635   /* The longer of the two strings sorts after the shorter.  */
 636   if (len == len2)
 637     return 0;
 638   else if (len < len2)
 639     return -1;
 640   else
 641     return 1;
 642 }
 643
 644 /* Lexing algorithm.
 645
 646  The original lexer in cpplib was made up of two passes: a first pass
 647  that replaced trigraphs and deleted esacped newlines, and a second
 648  pass that tokenized the result of the first pass.  Tokenisation was
 649  performed by peeking at the next character in the input stream.  For
 650  example, if the input stream contained "!=", the handler for the !
 651  character would peek at the next character, and if it were a '='
 652  would skip over it, and return a "!=" token, otherwise it would
 653  return just the "!" token.
 654
 655  To implement a single-pass lexer, this peeking ahead is unworkable.
 656  An arbitrary number of escaped newlines, and trigraphs (in particular
 657  ??/ which translates to the escape \), could separate the '!' and '='
 658  in the input stream, yet the next token is still a "!=".
 659
 660  Suppose instead that we lex by one logical line at a time, producing
 661  a token list or stack for each logical line, and when seeing the '!'
 662  push a CPP_NOT token on the list.  Then if the '!' is part of a
 663  longer token ("!=") we know we must see the remainder of the token by
 664  the time we reach the end of the logical line.  Thus we can have the
 665  '=' handler look at the previous token (at the end of the list / top
 666  of the stack) and see if it is a "!" token, and if so, instead of
 667  pushing a "=" token revise the existing token to be a "!=" token.
 668
 669  This works in the presence of escaped newlines, because the '\' would
 670  have been pushed on the top of the stack as a CPP_BACKSLASH.  The
 671  newline ('\n' or '\r') handler looks at the token at the top of the
 672  stack to see if it is a CPP_BACKSLASH, and if so discards both.
 673  Otherwise it pushes the newline (CPP_VSPACE) token as normal.  Hence
 674  the '=' handler would never see any intervening escaped newlines.
 675
 676  To make trigraphs work in this context, as in precedence trigraphs
 677  are highest and converted before anything else, the '?' handler does
 678  lookahead to see if it is a trigraph, and if so skips the trigraph
 679  and pushes the token it represents onto the top of the stack.  This
 680  also works in the particular case of a CPP_BACKSLASH trigraph.
 681
 682  To the preprocessor, whitespace is only significant to the point of
 683  knowing whether whitespace precedes a particular token.  For example,
 684  the '=' handler needs to know whether there was whitespace between it
 685  and a "!" token on the top of the stack, to make the token conversion
 686  decision correctly.  So each token has a PREV_WHITE flag to
 687  indicate this - the standard permits consecutive whitespace to be
 688  regarded as a single space.  The compiler front ends are not
 689  interested in whitespace at all; they just require a token stream.
 690  Another place where whitespace is significant to the preprocessor is
 691  a #define statment - if there is whitespace between the macro name
 692  and an initial "(" token the macro is "object-like", otherwise it is
 693  a function-like macro that takes arguments.
 694
 695  However, all is not rosy.  Parsing of identifiers, numbers, comments
 696  and strings becomes trickier because of the possibility of raw
 697  trigraphs and escaped newlines in the input stream.
 698
 699  The trigraphs are three consecutive characters beginning with two
 700  question marks.  A question mark is not valid as part of a number or
 701  identifier, so parsing of a number or identifier terminates normally
 702  upon reaching it, returning to the mainloop which handles the
 703  trigraph just like it would in any other position.  Similarly for the
 704  backslash of a backslash-newline combination.  So we just need the
 705  escaped-newline dropper in the mainloop to check if the token on the
 706  top of the stack after dropping the escaped newline is a number or
 707  identifier, and if so to continue the processing it as if nothing had
 708  happened.
 709
 710  For strings, we replace trigraphs whenever we reach a quote or
 711  newline, because there might be a backslash trigraph escaping them.
 712  We need to be careful that we start trigraph replacing from where we
 713  left off previously, because it is possible for a first scan to leave
 714  "fake" trigraphs that a second scan would pick up as real (e.g. the
 715  sequence "????/\n=" would find a fake ??= trigraph after removing the
 716  escaped newline.)
 717
 718  For line comments, on reaching a newline we scan the previous
 719  character(s) to see if it escaped, and continue if it is.  Block
 720  comments ignore everything and just focus on finding the comment
 721  termination mark.  The only difficult thing, and it is surprisingly
 722  tricky, is checking if an asterisk precedes the final slash since
 723  they could be separated by escaped newlines.  If the preprocessor is
 724  invoked with the output comments option, we don't bother removing
 725  escaped newlines and replacing trigraphs for output.
 726
 727  Finally, numbers can begin with a period, which is pushed initially
 728  as a CPP_DOT token in its own right.  The digit handler checks if the
 729  previous token was a CPP_DOT not separated by whitespace, and if so
 730  pops it off the stack and pushes a period into the number's buffer
 731  before calling the number parser.
 732
 733 */
 734
 735 static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
 736                                                     U":>", U"<%", U"%>"};
 737
 738 /* Call when a trigraph is encountered.  It warns if necessary, and
 739    returns true if the trigraph should be honoured.  END is the third
 740    character of a trigraph in the input stream.  */
 741 static int
 742 trigraph_ok (pfile, end)
 743      cpp_reader *pfile;
 744      const unsigned char *end;
 745 {
 746   int accept = CPP_OPTION (pfile, trigraphs);
 747
 748   if (CPP_OPTION (pfile, warn_trigraphs))
 749     {
 750       unsigned int col = end - 1 - pfile->buffer->line_base;
 751       if (accept)
 752         cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
 753                                "trigraph ??%c converted to %c",
 754                                (int) *end, (int) trigraph_map[*end]);
 755       else
 756         cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
 757                                "trigraph ??%c ignored", (int) *end);
 758     }
 759   return accept;
 760 }
 761
 762 /* Scan a string for trigraphs, warning or replacing them inline as
 763    appropriate.  When parsing a string, we must call this routine
 764    before processing a newline character (if trigraphs are enabled),
 765    since the newline might be escaped by a preceding backslash
 766    trigraph sequence.  Returns a pointer to the end of the name after
 767    replacement.  */
 768
 769 static unsigned char *
 770 trigraph_replace (pfile, src, limit)
 771      cpp_reader *pfile;
 772      unsigned char *src;
 773      unsigned char *limit;
 774 {
 775   unsigned char *dest;
 776
 777   /* Starting with src[1], find two consecutive '?'.  The case of no
 778      trigraphs is streamlined.  */
 779
 780   for (src++; src + 1 < limit; src += 2)
 781     {
 782       if (src[0] != '?')
 783         continue;
 784
 785       /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s.  */
 786       if (src[-1] == '?')
 787         src--;
 788       else if (src + 2 == limit || src[1] != '?')
 789         continue;
 790
 791       /* Check if it really is a trigraph.  */
 792       if (trigraph_map[src[2]] == 0)
 793         continue;
 794
 795       dest = src;
 796       goto trigraph_found;
 797     }
 798   return limit;
 799
 800   /* Now we have a trigraph, we need to scan the remaining buffer, and
 801      copy-shifting its contents left if replacement is enabled.  */
 802   for (; src + 2 < limit; dest++, src++)
 803     if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
 804       {
 805       trigraph_found:
 806         src += 2;
 807         if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
 808           *dest = trigraph_map[*src];
 809       }
 810
 811   /* Copy remaining (at most 2) characters.  */
 812   while (src < limit)
 813     *dest++ = *src++;
 814   return dest;
 815 }
 816
 817 /* If CUR is a backslash or the end of a trigraphed backslash, return
 818    a pointer to its beginning, otherwise NULL.  We don't read beyond
 819    the buffer start, because there is the start of the comment in the
 820    buffer.  */
 821 static const unsigned char *
 822 backslash_start (pfile, cur)
 823      cpp_reader *pfile;
 824      const unsigned char *cur;
 825 {
 826   if (cur[0] == '\\')
 827     return cur;
 828   if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
 829       && trigraph_ok (pfile, cur))
 830     return cur - 2;
 831   return 0;
 832 }
 833
 834 /* Skip a C-style block comment.  This is probably the trickiest
 835    handler.  We find the end of the comment by seeing if an asterisk
 836    is before every '/' we encounter.  The nasty complication is that a
 837    previous asterisk may be separated by one or more escaped newlines.
 838    Returns non-zero if comment terminated by EOF, zero otherwise.  */
 839 static int
 840 skip_block_comment (pfile)
 841      cpp_reader *pfile;
 842 {
 843   cpp_buffer *buffer = pfile->buffer;
 844   const unsigned char *char_after_star = 0;
 845   register const unsigned char *cur = buffer->cur;
 846   int seen_eof = 0;
 847
 848   /* Inner loop would think the comment has ended if the first comment
 849      character is a '/'.  Avoid this and keep the inner loop clean by
 850      skipping such a character.  */
 851   if (cur < buffer->rlimit && cur[0] == '/')
 852     cur++;
 853
 854   for (; cur < buffer->rlimit; )
 855     {
 856       unsigned char c = *cur++;
 857
 858       /* People like decorating comments with '*', so check for
 859          '/' instead for efficiency.  */
 860       if (c == '/')
 861         {
 862           if (cur[-2] == '*' || cur - 1 == char_after_star)
 863             goto out;
 864
 865           /* Warn about potential nested comments, but not when
 866              the final character inside the comment is a '/'.
 867              Don't bother to get it right across escaped newlines.  */
 868           if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
 869               && cur[0] == '*' && cur[1] != '/')
 870             {
 871               buffer->cur = cur;
 872               cpp_warning (pfile, "'/*' within comment");
 873             }
 874         }
 875       else if (IS_NEWLINE(c))
 876         {
 877           const unsigned char* bslash = backslash_start (pfile, cur - 2);
 878
 879           handle_newline (cur, buffer->rlimit, c);
 880           /* Work correctly if there is an asterisk before an
 881              arbirtrarily long sequence of escaped newlines.  */
 882           if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
 883             char_after_star = cur;
 884           else
 885             char_after_star = 0;
 886         }
 887     }
 888   seen_eof = 1;
 889
 890  out:
 891   buffer->cur = cur;
 892   return seen_eof;
 893 }
 894
 895 /* Skip a C++ or Chill line comment.  Handles escaped newlines.
 896    Returns non-zero if a multiline comment.  */
 897 static int
 898 skip_line_comment (pfile)
 899      cpp_reader *pfile;
 900 {
 901   cpp_buffer *buffer = pfile->buffer;
 902   register const unsigned char *cur = buffer->cur;
 903   int multiline = 0;
 904
 905   for (; cur < buffer->rlimit; )
 906     {
 907       unsigned char c = *cur++;
 908
 909       if (IS_NEWLINE (c))
 910         {
 911           /* Check for a (trigaph?) backslash escaping the newline.  */
 912           if (!backslash_start (pfile, cur - 2))
 913             goto out;
 914           multiline = 1;
 915           handle_newline (cur, buffer->rlimit, c);
 916         }
 917     }
 918   cur++;
 919
 920  out:
 921   buffer->cur = cur - 1;        /* Leave newline for caller.  */
 922   return multiline;
 923 }
 924
 925 /* Skips whitespace, stopping at next non-whitespace character.
 926    Adjusts pfile->col_adjust to account for tabs.  This enables tokens
 927    to be assigned the correct column.  */
 928 static void
 929 skip_whitespace (pfile, in_directive)
 930      cpp_reader *pfile;
 931      int in_directive;
 932 {
 933   cpp_buffer *buffer = pfile->buffer;
 934   register const unsigned char *cur = buffer->cur;
 935   unsigned short null_count = 0;
 936
 937   for (; cur < buffer->rlimit; )
 938     {
 939       unsigned char c = *cur++;
 940
 941       if (c == '\t')
 942         {
 943           unsigned int col = CPP_BUF_COLUMN (buffer, cur - 1);
 944           pfile->col_adjust += (CPP_OPTION (pfile, tabstop) - 1
 945                                 - col % CPP_OPTION(pfile, tabstop));
 946         }
 947       if (IS_HSPACE(c))         /* FIXME: Fix ISTABLE.  */
 948         continue;
 949       if (!is_space(c) || IS_NEWLINE (c)) /* Main loop handles newlines.  */
 950         goto out;
 951       if (c == '\0')
 952         null_count++;
 953       /* Mut be '\f' or '\v' */
 954       else if (in_directive && CPP_PEDANTIC (pfile))
 955         cpp_pedwarn (pfile, "%s in preprocessing directive",
 956                      c == '\f' ? "formfeed" : "vertical tab");
 957     }
 958   cur++;
 959
 960  out:
 961   buffer->cur = cur - 1;
 962   if (null_count)
 963     cpp_warning (pfile, null_count > 1 ? "embedded null characters ignored"
 964                  : "embedded null character ignored");
 965 }
 966
 967 /* Parse (append) an identifier.  */
 968 static void
 969 parse_name (pfile, list, name)
 970      cpp_reader *pfile;
 971      cpp_toklist *list;
 972      cpp_name *name;
 973 {
 974   const unsigned char *name_limit;
 975   unsigned char *namebuf;
 976   cpp_buffer *buffer = pfile->buffer;
 977   register const unsigned char *cur = buffer->cur;
 978
 979  expanded:
 980   name_limit = list->namebuf + list->name_cap;
 981   namebuf = list->namebuf + list->name_used;
 982
 983   for (; cur < buffer->rlimit && namebuf < name_limit; )
 984     {
 985       unsigned char c = *namebuf = *cur; /* Copy a single char.  */
 986
 987       if (! is_idchar(c))
 988         goto out;
 989       namebuf++;
 990       cur++;
 991       /* $ is not a legal identifier character in the standard, but is
 992          commonly accepted as an extension.  Don't warn about it in
 993          skipped conditional blocks. */
 994       if (c == '$' && CPP_PEDANTIC (pfile) && ! pfile->skipping)
 995         {
 996           buffer->cur = cur;
 997           cpp_pedwarn (pfile, "'$' character in identifier");
 998         }
 999     }
1000
1001   /* Run out of name space?  */
1002   if (cur < buffer->rlimit)
1003     {
1004       list->name_used = namebuf - list->namebuf;
1005       auto_expand_name_space (list);
1006       goto expanded;
1007     }
1008
1009  out:
1010   buffer->cur = cur;
1011   name->len = namebuf - name->text;
1012   list->name_used = namebuf - list->namebuf;
1013 }
1014
1015 /* Parse (append) a number.  */
1016 static void
1017 parse_number (pfile, list, name)
1018      cpp_reader *pfile;
1019      cpp_toklist *list;
1020      cpp_name *name;
1021 {
1022   const unsigned char *name_limit;
1023   unsigned char *namebuf;
1024   cpp_buffer *buffer = pfile->buffer;
1025   register const unsigned char *cur = buffer->cur;
1026
1027  expanded:
1028   name_limit = list->namebuf + list->name_cap;
1029   namebuf = list->namebuf + list->name_used;
1030
1031   for (; cur < buffer->rlimit && namebuf < name_limit; )
1032     {
1033       unsigned char c = *namebuf = *cur; /* Copy a single char.  */
1034
1035       /* Perhaps we should accept '$' here if we accept it for
1036          identifiers.  We know namebuf[-1] is safe, because for c to
1037          be a sign we must have pushed at least one character.  */
1038       if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
1039         goto out;
1040
1041       namebuf++;
1042       cur++;
1043     }
1044
1045   /* Run out of name space?  */
1046   if (cur < buffer->rlimit)
1047     {
1048       list->name_used = namebuf - list->namebuf;
1049       auto_expand_name_space (list);
1050       goto expanded;
1051     }
1052
1053  out:
1054   buffer->cur = cur;
1055   name->len = namebuf - name->text;
1056   list->name_used = namebuf - list->namebuf;
1057 }
1058
1059 /* Places a string terminated by an unescaped TERMINATOR into a
1060    cpp_name, which should be expandable and thus at the top of the
1061    list's stack.  Handles embedded trigraphs, if necessary, and
1062    escaped newlines.
1063
1064    Can be used for character constants (terminator = '\''), string
1065    constants ('"') and angled headers ('>').  Multi-line strings are
1066    allowed, except for within directives.  */
1067
1068 static void
1069 parse_string (pfile, list, token, terminator)
1070      cpp_reader *pfile;
1071      cpp_toklist *list;
1072      cpp_token *token;
1073      unsigned int terminator;
1074 {
1075   cpp_buffer *buffer = pfile->buffer;
1076   cpp_name *name = &token->val.name;
1077   register const unsigned char *cur = buffer->cur;
1078   const unsigned char *name_limit;
1079   unsigned char *namebuf;
1080   unsigned int null_count = 0;
1081   unsigned int trigraphed = list->name_used;
1082
1083  expanded:
1084   name_limit = list->namebuf + list->name_cap;
1085   namebuf = list->namebuf + list->name_used;
1086
1087   for (; cur < buffer->rlimit && namebuf < name_limit; )
1088     {
1089       unsigned int c = *namebuf++ = *cur++; /* Copy a single char.  */
1090
1091       if (c == '\0')
1092         null_count++;
1093       else if (c == terminator || IS_NEWLINE (c))
1094         {
1095           /* Needed for trigraph_replace and multiline string warning.  */
1096           buffer->cur = cur;
1097
1098           /* Scan for trigraphs before checking if backslash-escaped.  */
1099           if ((CPP_OPTION (pfile, trigraphs)
1100                || CPP_OPTION (pfile, warn_trigraphs))
1101               && namebuf - (list->namebuf + trigraphed) >= 3)
1102             {
1103               namebuf = trigraph_replace (pfile, list->namebuf + trigraphed,
1104                                           namebuf);
1105               /* The test above guarantees trigraphed will be positive.  */
1106               trigraphed = namebuf - list->namebuf - 2;
1107             }
1108
1109           namebuf--;     /* Drop the newline / terminator from the name.  */
1110           if (IS_NEWLINE (c))
1111             {
1112               /* Drop a backslash newline, and continue. */
1113               if (namebuf[-1] == '\\')
1114                 {
1115                   handle_newline (cur, buffer->rlimit, c);
1116                   namebuf--;
1117                   continue;
1118                 }
1119
1120               cur--;
1121
1122               /* In Fortran and assembly language, silently terminate
1123                  strings of either variety at end of line.  This is a
1124                  kludge around not knowing where comments are in these
1125                  languages.  */
1126               if (CPP_OPTION (pfile, lang_fortran)
1127                   || CPP_OPTION (pfile, lang_asm))
1128                 goto out;
1129
1130               /* Character constants, headers and asserts may not
1131                  extend over multiple lines.  In Standard C, neither
1132                  may strings.  We accept multiline strings as an
1133                  extension.  (Even in directives - otherwise, glibc's
1134                  longlong.h breaks.)  */
1135               if (terminator != '"')
1136                 goto unterminated;
1137
1138               cur++;  /* Move forwards again.  */
1139
1140               if (pfile->multiline_string_line == 0)
1141                 {
1142                   pfile->multiline_string_line = token->line;
1143                   pfile->multiline_string_column = token->col;
1144                   if (CPP_PEDANTIC (pfile))
1145                     cpp_pedwarn (pfile, "multi-line string constant");
1146                 }
1147
1148               *namebuf++ = '\n';
1149               handle_newline (cur, buffer->rlimit, c);
1150             }
1151           else
1152             {
1153               unsigned char *temp;
1154
1155               /* An odd number of consecutive backslashes represents
1156                  an escaped terminator.  */
1157               temp = namebuf - 1;
1158               while (temp >= name->text && *temp == '\\')
1159                 temp--;
1160
1161               if ((namebuf - temp) & 1)
1162                 goto out;
1163               namebuf++;
1164             }
1165         }
1166     }
1167
1168   /* Run out of name space?  */
1169   if (cur < buffer->rlimit)
1170     {
1171       list->name_used = namebuf - list->namebuf;
1172       auto_expand_name_space (list);
1173       goto expanded;
1174     }
1175
1176   /* We may not have trigraph-replaced the input for this code path,
1177      but as the input is in error by being unterminated we don't
1178      bother.  Prevent warnings about no newlines at EOF.  */
1179   if (IS_NEWLINE(cur[-1]))
1180     cur--;
1181
1182  unterminated:
1183   cpp_error (pfile, "missing terminating %c character", (int) terminator);
1184
1185   if (terminator == '\"' && pfile->multiline_string_line != list->line
1186       && pfile->multiline_string_line != 0)
1187     {
1188       cpp_error_with_line (pfile, pfile->multiline_string_line,
1189                            pfile->multiline_string_column,
1190                            "possible start of unterminated string literal");
1191       pfile->multiline_string_line = 0;
1192     }
1193
1194  out:
1195   buffer->cur = cur;
1196   name->len = namebuf - name->text;
1197   list->name_used = namebuf - list->namebuf;
1198
1199   if (null_count > 0)
1200     cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
1201                          : "null character preserved"));
1202 }
1203
1204 /* The character TYPE helps us distinguish comment types: '*' = C
1205    style, '-' = Chill-style and '/' = C++ style.  For code simplicity,
1206    the stored comment includes the comment start and any terminator.  */
1207
1208 #define COMMENT_START_LEN 2
1209 static void
1210 save_comment (list, token, from, len, type)
1211      cpp_toklist *list;
1212      cpp_token *token;
1213      const unsigned char *from;
1214      unsigned int len;
1215      unsigned int type;
1216 {
1217   unsigned char *buffer;
1218
1219   len += COMMENT_START_LEN;
1220
1221   if (list->name_used + len > list->name_cap)
1222     _cpp_expand_name_space (list, len);
1223
1224   INIT_TOKEN_NAME (list, token);
1225   token->type = CPP_COMMENT;
1226   token->val.name.len = len;
1227
1228   buffer = list->namebuf + list->name_used;
1229   list->name_used += len;
1230
1231   /* Copy the comment.  */
1232   if (type == '*')
1233     {
1234       *buffer++ = '/';
1235       *buffer++ = '*';
1236     }
1237   else
1238     {
1239       *buffer++ = type;
1240       *buffer++ = type;
1241     }
1242   memcpy (buffer, from, len - COMMENT_START_LEN);
1243 }
1244
1245 /*
1246  *  The tokenizer's main loop.  Returns a token list, representing a
1247  *  logical line in the input file.  On EOF after some tokens have
1248  *  been processed, we return immediately.  Then in next call, or if
1249  *  EOF occurred at the beginning of a logical line, a single CPP_EOF
1250  *  token is placed in the list.
1251  *
1252  *  Implementation relies almost entirely on lookback, rather than
1253  *  looking forwards.  This means that tokenization requires just
1254  *  a single pass of the file, even in the presence of trigraphs and
1255  *  escaped newlines, providing significant performance benefits.
1256  *  Trigraph overhead is negligible if they are disabled, and low
1257  *  even when enabled.
1258  */
1259
1260 #define IS_DIRECTIVE() (list->directive != 0)
1261 #define MIGHT_BE_DIRECTIVE() \
1262 (cur_token == &list->tokens[first_token + 1] && cur_token[-1].type == CPP_HASH)
1263
1264 static void
1265 lex_line (pfile, list)
1266      cpp_reader *pfile;
1267      cpp_toklist *list;
1268 {
1269   cpp_token *cur_token, *token_limit, *first;
1270   cpp_buffer *buffer = pfile->buffer;
1271   const unsigned char *cur = buffer->cur;
1272   unsigned char flags = 0;
1273   unsigned int first_token = list->tokens_used;
1274
1275   if (!(list->flags & LIST_OFFSET))
1276     (abort) ();
1277
1278   list->file = buffer->nominal_fname;
1279   list->line = CPP_BUF_LINE (buffer);
1280   pfile->col_adjust = 0;
1281   pfile->in_lex_line = 1;
1282   if (cur == buffer->buf)
1283     list->flags |= BEG_OF_FILE;
1284
1285  expanded:
1286   token_limit = list->tokens + list->tokens_cap;
1287   cur_token = list->tokens + list->tokens_used;
1288
1289   for (; cur < buffer->rlimit && cur_token < token_limit;)
1290     {
1291       unsigned char c;
1292
1293       /* Optimize whitespace skipping, as most tokens are probably
1294          separated by whitespace. (' ' '\t' '\v' '\f' '\0').  */
1295       c = *cur++;
1296       if (is_hspace (c))
1297         {
1298           /* Step back to get the null warning and tab correction.  */
1299           buffer->cur = cur - 1;
1300           skip_whitespace (pfile, IS_DIRECTIVE ());
1301           cur = buffer->cur;
1302
1303           flags = PREV_WHITE;
1304           if (cur == buffer->rlimit)
1305             break;
1306           c = *cur++;
1307         }
1308
1309       /* Initialize current token.  CPP_EOF will not be fixed up by
1310          expand_name_space.  */
1311       list->tokens_used = cur_token - list->tokens + 1;
1312       cur_token->type = CPP_EOF;
1313       cur_token->col = CPP_BUF_COLUMN (buffer, cur);
1314       cur_token->line = CPP_BUF_LINE (buffer);
1315       cur_token->flags = flags;
1316       flags = 0;
1317
1318       switch (c)
1319         {
1320         case '0': case '1': case '2': case '3': case '4':
1321         case '5': case '6': case '7': case '8': case '9':
1322           {
1323             int prev_dot;
1324
1325             cur--;              /* Backup character.  */
1326             prev_dot = PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ();
1327             if (prev_dot)
1328               cur_token--;
1329             INIT_TOKEN_NAME (list, cur_token);
1330             /* Prepend an immediately previous CPP_DOT token.  */
1331             if (prev_dot)
1332               {
1333                 if (list->name_cap == list->name_used)
1334                   auto_expand_name_space (list);
1335
1336                 cur_token->val.name.len = 1;
1337                 list->namebuf[list->name_used++] = '.';
1338               }
1339
1340           continue_number:
1341             cur_token->type = CPP_NUMBER; /* Before parse_number.  */
1342             buffer->cur = cur;
1343             parse_number (pfile, list, &cur_token->val.name);
1344             cur = buffer->cur;
1345           }
1346           /* Check for # 123 form of #line.  */
1347           if (MIGHT_BE_DIRECTIVE ())
1348             list->directive = _cpp_check_linemarker (pfile, cur_token,
1349                                                      !(cur_token[-1].flags
1350                                                        & PREV_WHITE));
1351           cur_token++;
1352           break;
1353
1354         letter:
1355         case '_':
1356         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1357         case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1358         case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1359         case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1360         case 'y': case 'z':
1361         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1362         case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1363         case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1364         case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1365         case 'Y': case 'Z':
1366           cur--;                     /* Backup character.  */
1367           INIT_TOKEN_NAME (list, cur_token);
1368           cur_token->type = CPP_NAME; /* Identifier, macro etc.  */
1369
1370         continue_name:
1371           buffer->cur = cur;
1372           parse_name (pfile, list, &cur_token->val.name);
1373           cur = buffer->cur;
1374
1375           if (MIGHT_BE_DIRECTIVE ())
1376             list->directive = _cpp_check_directive (pfile, cur_token,
1377                                                     !(list->tokens[0].flags
1378                                                       & PREV_WHITE));
1379           cur_token++;
1380           break;
1381
1382         case '\'':
1383           /* Character constants are not recognized when processing Fortran,
1384              or if -traditional.  */
1385           if (CPP_OPTION (pfile, lang_fortran) || CPP_TRADITIONAL (pfile))
1386             goto other;
1387
1388           /* Fall through.  */
1389         case '\"':
1390           /* Traditionally, escaped strings are not strings.  */
1391           if (CPP_TRADITIONAL (pfile) && IMMED_TOKEN ()
1392               && PREV_TOKEN_TYPE == CPP_BACKSLASH)
1393             goto other;
1394
1395           cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
1396           /* Do we have a wide string?  */
1397           if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
1398               && cur_token[-1].val.name.len == 1
1399               && cur_token[-1].val.name.text[0] == 'L'
1400               && !CPP_TRADITIONAL (pfile))
1401             {
1402               /* No need for 'L' any more.  */
1403               list->name_used--;
1404               (--cur_token)->type = (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
1405             }
1406
1407         do_parse_string:
1408           /* Here c is one of ' " or >.  */
1409           INIT_TOKEN_NAME (list, cur_token);
1410           buffer->cur = cur;
1411           parse_string (pfile, list, cur_token, c);
1412           cur = buffer->cur;
1413           cur_token++;
1414           break;
1415
1416         case '/':
1417           cur_token->type = CPP_DIV;
1418           if (IMMED_TOKEN ())
1419             {
1420               if (PREV_TOKEN_TYPE == CPP_DIV)
1421                 {
1422                   /* We silently allow C++ comments in system headers,
1423                      irrespective of conformance mode, because lots of
1424                      broken systems do that and trying to clean it up
1425                      in fixincludes is a nightmare.  */
1426                   if (CPP_IN_SYSTEM_HEADER (pfile))
1427                     goto do_line_comment;
1428                   else if (CPP_OPTION (pfile, cplusplus_comments))
1429                     {
1430                       if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1431                           && ! buffer->warned_cplusplus_comments)
1432                         {
1433                           buffer->cur = cur;
1434                           cpp_pedwarn (pfile,
1435                              "C++ style comments are not allowed in ISO C89");
1436                           cpp_pedwarn (pfile,
1437                           "(this will be reported only once per input file)");
1438                           buffer->warned_cplusplus_comments = 1;
1439                         }
1440                     do_line_comment:
1441                       buffer->cur = cur;
1442 #if 0 /* Leave until new lexer in place.  */
1443                       if (cur[-2] != c)
1444                         cpp_warning (pfile,
1445                                      "comment start split across lines");
1446 #endif
1447                       if (skip_line_comment (pfile))
1448                         cpp_warning (pfile, "multi-line comment");
1449
1450                       /* Back-up to first '-' or '/'.  */
1451                       cur_token--;
1452                       if (!CPP_OPTION (pfile, discard_comments)
1453                           && (!IS_DIRECTIVE()
1454                               || (list->directive->flags & COMMENTS)))
1455                         save_comment (list, cur_token++, cur,
1456                                       buffer->cur - cur, c);
1457                       else if (!CPP_OPTION (pfile, traditional))
1458                         flags = PREV_WHITE;
1459
1460                       cur = buffer->cur;
1461                       break;
1462                     }
1463                 }
1464             }
1465           cur_token++;
1466           break;
1467
1468         case '*':
1469           cur_token->type = CPP_MULT;
1470           if (IMMED_TOKEN ())
1471             {
1472               if (PREV_TOKEN_TYPE == CPP_DIV)
1473                 {
1474                   buffer->cur = cur;
1475 #if 0 /* Leave until new lexer in place.  */
1476                   if (cur[-2] != '/')
1477                     cpp_warning (pfile,
1478                                  "comment start '/*' split across lines");
1479 #endif
1480                   if (skip_block_comment (pfile))
1481                     cpp_error_with_line (pfile, list->line, cur_token[-1].col,
1482                                          "unterminated comment");
1483 #if 0 /* Leave until new lexer in place.  */
1484                   else if (buffer->cur[-2] != '*')
1485                     cpp_warning (pfile,
1486                                  "comment end '*/' split across lines");
1487 #endif
1488                   /* Back up to opening '/'.  */
1489                   cur_token--;
1490                   if (!CPP_OPTION (pfile, discard_comments)
1491                       && (!IS_DIRECTIVE()
1492                           || (list->directive->flags & COMMENTS)))
1493                     save_comment (list, cur_token++, cur,
1494                                   buffer->cur - cur, c);
1495                   else if (!CPP_OPTION (pfile, traditional))
1496                     flags = PREV_WHITE;
1497
1498                   cur = buffer->cur;
1499                   break;
1500                 }
1501               else if (CPP_OPTION (pfile, cplusplus))
1502                 {
1503                   /* In C++, there are .* and ->* operators.  */
1504                   if (PREV_TOKEN_TYPE == CPP_DEREF)
1505                     BACKUP_TOKEN (CPP_DEREF_STAR);
1506                   else if (PREV_TOKEN_TYPE == CPP_DOT)
1507                     BACKUP_TOKEN (CPP_DOT_STAR);
1508                 }
1509             }
1510           cur_token++;
1511           break;
1512
1513         case '\n':
1514         case '\r':
1515           handle_newline (cur, buffer->rlimit, c);
1516           if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
1517             {
1518               if (IMMED_TOKEN ())
1519                 {
1520                   /* Remove the escaped newline.  Then continue to process
1521                      any interrupted name or number.  */
1522                   cur_token--;
1523                   /* Backslash-newline may not be immediately followed by
1524                      EOF (C99 5.1.1.2).  */
1525                   if (cur >= buffer->rlimit)
1526                     {
1527                       cpp_pedwarn (pfile, "backslash-newline at end of file");
1528                       break;
1529                     }
1530                   if (IMMED_TOKEN ())
1531                     {
1532                       cur_token--;
1533                       if (cur_token->type == CPP_NAME)
1534                         goto continue_name;
1535                       else if (cur_token->type == CPP_NUMBER)
1536                         goto continue_number;
1537                       cur_token++;
1538                     }
1539                   /* Remember whitespace setting.  */
1540                   flags = cur_token->flags;
1541                   break;
1542                 }
1543               else
1544                 {
1545                   buffer->cur = cur;
1546                   cpp_warning (pfile,
1547                                "backslash and newline separated by space");
1548                 }
1549             }
1550           else if (MIGHT_BE_DIRECTIVE ())
1551             {
1552               /* "Null directive." C99 6.10.7: A preprocessing
1553                  directive of the form # <new-line> has no effect.
1554
1555                  But it is still a directive, and therefore disappears
1556                  from the output. */
1557               cur_token--;
1558               if (cur_token->flags & PREV_WHITE)
1559                 {
1560                   if (CPP_WTRADITIONAL (pfile))
1561                     cpp_warning (pfile,
1562                                  "K+R C ignores #\\n with the # indented");
1563                   if (CPP_TRADITIONAL (pfile))
1564                     cur_token++;
1565                 }
1566             }
1567
1568           /* Skip vertical space until we have at least one token to
1569              return.  */
1570           if (cur_token != &list->tokens[first_token])
1571             goto out;
1572           list->line = CPP_BUF_LINE (buffer);
1573           break;
1574
1575         case '-':
1576           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
1577             {
1578               if (CPP_OPTION (pfile, chill))
1579                 goto do_line_comment;
1580               REVISE_TOKEN (CPP_MINUS_MINUS);
1581             }
1582           else
1583             PUSH_TOKEN (CPP_MINUS);
1584           break;
1585
1586         make_hash:
1587         case '#':
1588           /* The digraph flag checking ensures that ## and %:%:
1589              are interpreted as CPP_PASTE, but #%: and %:# are not.  */
1590           if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
1591               && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
1592             REVISE_TOKEN (CPP_PASTE);
1593           else
1594             PUSH_TOKEN (CPP_HASH);
1595           break;
1596
1597         case ':':
1598           cur_token->type = CPP_COLON;
1599           if (IMMED_TOKEN ())
1600             {
1601               if (PREV_TOKEN_TYPE == CPP_COLON
1602                   && CPP_OPTION (pfile, cplusplus))
1603                 BACKUP_TOKEN (CPP_SCOPE);
1604               /* Digraph: "<:" is a '['  */
1605               else if (PREV_TOKEN_TYPE == CPP_LESS)
1606                 BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
1607               /* Digraph: "%:" is a '#'  */
1608               else if (PREV_TOKEN_TYPE == CPP_MOD)
1609                 {
1610                   (--cur_token)->flags |= DIGRAPH;
1611                   goto make_hash;
1612                 }
1613             }
1614           cur_token++;
1615           break;
1616
1617         case '&':
1618           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
1619             REVISE_TOKEN (CPP_AND_AND);
1620           else
1621             PUSH_TOKEN (CPP_AND);
1622           break;
1623
1624         make_or:
1625         case '|':
1626           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
1627             REVISE_TOKEN (CPP_OR_OR);
1628           else
1629             PUSH_TOKEN (CPP_OR);
1630           break;
1631
1632         case '+':
1633           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
1634             REVISE_TOKEN (CPP_PLUS_PLUS);
1635           else
1636             PUSH_TOKEN (CPP_PLUS);
1637           break;
1638
1639         case '=':
1640             /* This relies on equidistance of "?=" and "?" tokens.  */
1641           if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
1642             REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
1643           else
1644             PUSH_TOKEN (CPP_EQ);
1645           break;
1646
1647         case '>':
1648           cur_token->type = CPP_GREATER;
1649           if (IMMED_TOKEN ())
1650             {
1651               if (PREV_TOKEN_TYPE == CPP_GREATER)
1652                 BACKUP_TOKEN (CPP_RSHIFT);
1653               else if (PREV_TOKEN_TYPE == CPP_MINUS)
1654                 BACKUP_TOKEN (CPP_DEREF);
1655               /* Digraph: ":>" is a ']'  */
1656               else if (PREV_TOKEN_TYPE == CPP_COLON)
1657                 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
1658               /* Digraph: "%>" is a '}'  */
1659               else if (PREV_TOKEN_TYPE == CPP_MOD)
1660                 BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
1661             }
1662           cur_token++;
1663           break;
1664
1665         case '<':
1666           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
1667             {
1668               REVISE_TOKEN (CPP_LSHIFT);
1669               break;
1670             }
1671           /* Is this the beginning of a header name?  */
1672           if (IS_DIRECTIVE () && (list->directive->flags & INCL))
1673             {
1674               c = '>';  /* Terminator.  */
1675               cur_token->type = CPP_HEADER_NAME;
1676               goto do_parse_string;
1677             }
1678           PUSH_TOKEN (CPP_LESS);
1679           break;
1680
1681         case '%':
1682           /* Digraph: "<%" is a '{'  */
1683           cur_token->type = CPP_MOD;
1684           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
1685             BACKUP_DIGRAPH (CPP_OPEN_BRACE);
1686           cur_token++;
1687           break;
1688
1689         case '?':
1690           if (cur + 1 < buffer->rlimit && *cur == '?'
1691               && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
1692             {
1693               /* Handle trigraph.  */
1694               cur++;
1695               switch (*cur++)
1696                 {
1697                 case '(': goto make_open_square;
1698                 case ')': goto make_close_square;
1699                 case '<': goto make_open_brace;
1700                 case '>': goto make_close_brace;
1701                 case '=': goto make_hash;
1702                 case '!': goto make_or;
1703                 case '-': goto make_complement;
1704                 case '/': goto make_backslash;
1705                 case '\'': goto make_xor;
1706                 }
1707             }
1708           if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
1709             {
1710               /* GNU C++ defines <? and >? operators.  */
1711               if (PREV_TOKEN_TYPE == CPP_LESS)
1712                 {
1713                   REVISE_TOKEN (CPP_MIN);
1714                   break;
1715                 }
1716               else if (PREV_TOKEN_TYPE == CPP_GREATER)
1717                 {
1718                   REVISE_TOKEN (CPP_MAX);
1719                   break;
1720                 }
1721             }
1722           PUSH_TOKEN (CPP_QUERY);
1723           break;
1724
1725         case '.':
1726           if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
1727               && IMMED_TOKEN ()
1728               && !(cur_token[-1].flags & PREV_WHITE))
1729             {
1730               cur_token -= 2;
1731               PUSH_TOKEN (CPP_ELLIPSIS);
1732             }
1733           else
1734             PUSH_TOKEN (CPP_DOT);
1735           break;
1736
1737         make_complement:
1738         case '~': PUSH_TOKEN (CPP_COMPL); break;
1739         make_xor:
1740         case '^': PUSH_TOKEN (CPP_XOR); break;
1741         make_open_brace:
1742         case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
1743         make_close_brace:
1744         case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
1745         make_open_square:
1746         case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
1747         make_close_square:
1748         case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
1749         make_backslash:
1750         case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
1751         case '!': PUSH_TOKEN (CPP_NOT); break;
1752         case ',': PUSH_TOKEN (CPP_COMMA); break;
1753         case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
1754         case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
1755         case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
1756
1757         case '$':
1758           if (CPP_OPTION (pfile, dollars_in_ident))
1759             goto letter;
1760           /* Fall through */
1761         other:
1762         default:
1763           cur_token->val.aux = c;
1764           PUSH_TOKEN (CPP_OTHER);
1765           break;
1766         }
1767     }
1768
1769   /* Run out of token space?  */
1770   if (cur_token == token_limit)
1771     {
1772       list->tokens_used = cur_token - list->tokens;
1773       _cpp_expand_token_space (list, 256);
1774       goto expanded;
1775     }
1776
1777   cur_token->flags = flags;
1778   if (cur_token == &list->tokens[first_token] && pfile->done_initializing)
1779     {
1780       if (cur > buffer->buf && !IS_NEWLINE (cur[-1]))
1781         cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
1782                                CPP_BUF_COLUMN (buffer, cur),
1783                                "no newline at end of file");
1784       cur_token++->type = CPP_EOF;
1785     }
1786
1787  out:
1788   /* All tokens are allocated, so the memory location is fixed.  */
1789   first = &list->tokens[first_token];
1790
1791   /* Don't complain about the null directive, nor directives in
1792      assembly source: we don't know where the comments are, and # may
1793      introduce assembler pseudo-ops.  Don't complain about invalid
1794      directives in skipped conditional groups (6.10 p4).  */
1795   if (first->type == CPP_HASH && list->directive == 0 && !pfile->skipping
1796       && cur_token > first + 1 && !CPP_OPTION (pfile, lang_asm))
1797     {
1798       if (first[1].type == CPP_NAME)
1799         cpp_error (pfile, "invalid preprocessing directive #%.*s",
1800                    (int) first[1].val.name.len, first[1].val.name.text);
1801       else
1802         cpp_error (pfile, "invalid preprocessing directive");
1803     }
1804
1805   /* Put EOF at end of directives.  This covers "directives do not
1806      extend beyond the end of the line (description 6.10 part 2)".  */
1807   if (IS_DIRECTIVE () || !pfile->done_initializing)
1808     {
1809       pfile->first_directive_token = first;
1810       cur_token++->type = CPP_EOF;
1811     }
1812
1813   if (first_token == 0 || IS_DIRECTIVE ())
1814     /* Set beginning of line flag.  */
1815     first->flags |= BOL;
1816   else
1817     /* 6.10.3.10: Within the sequence of preprocessing tokens making
1818        up the invocation of a function-like macro, new line is
1819        considered a normal white-space character.  */
1820     first->flags |= PREV_WHITE;
1821
1822   buffer->cur = cur;
1823   list->tokens_used = cur_token - list->tokens;
1824   pfile->in_lex_line = 0;
1825 }
1826
1827 /* Write the spelling of a token TOKEN, with any appropriate
1828    whitespace before it, to the token_buffer.  PREV is the previous
1829    token, which is used to determine if we need to shove in an extra
1830    space in order to avoid accidental token paste.  */
1831 static void
1832 output_token (pfile, token, prev)
1833      cpp_reader *pfile;
1834      const cpp_token *token, *prev;
1835 {
1836   int dummy;
1837
1838   if (token->col && (token->flags & BOL))
1839     {
1840       /* Supply enough whitespace to put this token in its original
1841          column.  Don't bother trying to reconstruct tabs; we can't
1842          get it right in general, and nothing ought to care.  (Yes,
1843          some things do care; the fault lies with them.)  */
1844       unsigned char *buffer;
1845       unsigned int spaces = token->col - 1;
1846
1847       CPP_RESERVE (pfile, token->col);
1848       buffer = pfile->limit;
1849
1850       while (spaces--)
1851         *buffer++ = ' ';
1852       pfile->limit = buffer;
1853     }
1854   else if (token->flags & PREV_WHITE)
1855     CPP_PUTC (pfile, ' ');
1856   /* Check for and prevent accidental token pasting, in ANSI mode.  */
1857
1858   else if (!CPP_TRADITIONAL (pfile) && prev)
1859     {
1860       if (can_paste (pfile, prev, token, &dummy) != CPP_EOF)
1861         CPP_PUTC (pfile, ' ');
1862       /* can_paste catches most of the accidental paste cases, but not all.
1863          Consider a + ++b - if there is not a space between the + and ++, it
1864          will be misparsed as a++ + b.  */
1865       else if ((prev->type == CPP_PLUS && token->type == CPP_PLUS_PLUS)
1866                || (prev->type == CPP_MINUS && token->type == CPP_MINUS_MINUS))
1867         CPP_PUTC (pfile, ' ');
1868     }
1869
1870   CPP_RESERVE (pfile, TOKEN_LEN (token));
1871   pfile->limit = spell_token (pfile, token, pfile->limit);
1872 }
1873
1874 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1875    already contain the enough space to hold the token's spelling.
1876    Returns a pointer to the character after the last character
1877    written.  */
1878
1879 static unsigned char *
1880 spell_token (pfile, token, buffer)
1881      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1882      const cpp_token *token;
1883      unsigned char *buffer;
1884 {
1885   switch (token_spellings[token->type].type)
1886     {
1887     case SPELL_OPERATOR:
1888       {
1889         const unsigned char *spelling;
1890         unsigned char c;
1891
1892         if (token->flags & DIGRAPH)
1893           spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1894         else
1895           spelling = token_spellings[token->type].spelling;
1896
1897         while ((c = *spelling++) != '\0')
1898           *buffer++ = c;
1899       }
1900       break;
1901
1902     case SPELL_IDENT:
1903       memcpy (buffer, token->val.name.text, token->val.name.len);
1904       buffer += token->val.name.len;
1905       break;
1906
1907     case SPELL_STRING:
1908       {
1909         unsigned char c;
1910
1911         if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
1912           *buffer++ = 'L';
1913         c = '\'';
1914         if (token->type == CPP_STRING || token->type == CPP_WSTRING)
1915           c = '"';
1916         *buffer++ = c;
1917         memcpy (buffer, token->val.name.text, token->val.name.len);
1918         buffer += token->val.name.len;
1919         *buffer++ = c;
1920       }
1921       break;
1922
1923     case SPELL_CHAR:
1924       *buffer++ = token->val.aux;
1925       break;
1926
1927     case SPELL_NONE:
1928       cpp_ice (pfile, "Unspellable token %s", token_names[token->type]);
1929       break;
1930     }
1931
1932   return buffer;
1933 }
1934
1935 /* Return the spelling of a token known to be an operator.
1936    Does not distinguish digraphs from their counterparts.  */
1937 const unsigned char *
1938 _cpp_spell_operator (type)
1939      enum cpp_ttype type;
1940 {
1941   if (token_spellings[type].type == SPELL_OPERATOR)
1942     return token_spellings[type].spelling;
1943   else
1944     return token_names[type];
1945 }
1946
1947
1948 /* Macro expansion algorithm.  TODO.  */
1949
1950 static const cpp_token placemarker_token = {0, 0, CPP_PLACEMARKER, 0 UNION_INIT_ZERO};
1951 static const cpp_token eof_token = {0, 0, CPP_EOF, 0 UNION_INIT_ZERO};
1952
1953 #define IS_ARG_CONTEXT(c) ((c)->flags & CONTEXT_ARG)
1954 #define CURRENT_CONTEXT(pfile) ((pfile)->contexts + (pfile)->cur_context)
1955
1956 /* Flags for cpp_context.  */
1957 #define CONTEXT_PASTEL  (1 << 0) /* An argument context on LHS of ##.  */
1958 #define CONTEXT_PASTER  (1 << 1) /* An argument context on RHS of ##.  */
1959 #define CONTEXT_RAW     (1 << 2) /* If argument tokens already expanded.  */
1960 #define CONTEXT_ARG     (1 << 3) /* If an argument context.  */
1961
1962 #define ASSIGN_FLAGS_AND_POS(d, s) \
1963   do {(d)->flags = (s)->flags & (PREV_WHITE | BOL | PASTE_LEFT); \
1964       if ((d)->flags & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
1965   } while (0)
1966
1967 /* f is flags, just consisting of PREV_WHITE | BOL.  */
1968 #define MODIFY_FLAGS_AND_POS(d, s, f) \
1969   do {(d)->flags &= ~(PREV_WHITE | BOL); (d)->flags |= (f); \
1970       if ((f) & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
1971   } while (0)
1972
1973 typedef struct cpp_context cpp_context;
1974 struct cpp_context
1975 {
1976   union
1977   {
1978     const cpp_toklist *list;    /* Used for macro contexts only.  */
1979     const cpp_token **arg;      /* Used for arg contexts only.  */
1980   } u;
1981
1982   /* Pushed token to be returned by next call to cpp_get_token.  */
1983   const cpp_token *pushed_token;
1984
1985   struct macro_args *args;      /* 0 for arguments and object-like macros.  */
1986   unsigned short posn;          /* Current posn, index into u.  */
1987   unsigned short count;         /* No. of tokens in u.  */
1988   unsigned short level;
1989   unsigned char flags;
1990 };
1991
1992 typedef struct macro_args macro_args;
1993 struct macro_args
1994 {
1995   unsigned int *ends;
1996   const cpp_token **tokens;
1997   unsigned int capacity;
1998   unsigned int used;
1999   unsigned short level;
2000 };
2001
2002 static const cpp_token *get_raw_token PARAMS ((cpp_reader *));
2003 static const cpp_token *parse_arg PARAMS ((cpp_reader *, int, unsigned int,
2004                                            macro_args *, unsigned int *));
2005 static int parse_args PARAMS ((cpp_reader *, cpp_hashnode *, macro_args *));
2006 static void save_token PARAMS ((macro_args *, const cpp_token *));
2007 static const cpp_token *push_arg_context PARAMS ((cpp_reader *,
2008                                                   const cpp_token *));
2009 static int do_pop_context PARAMS ((cpp_reader *));
2010 static const cpp_token *pop_context PARAMS ((cpp_reader *));
2011 static const cpp_token *push_macro_context PARAMS ((cpp_reader *,
2012                                                     cpp_hashnode *,
2013                                                     const cpp_token *));
2014 static void free_macro_args PARAMS ((macro_args *));
2015
2016 /* Free the storage allocated for macro arguments.  */
2017 static void
2018 free_macro_args (args)
2019      macro_args *args;
2020 {
2021   if (args->tokens)
2022     free (args->tokens);
2023   free (args->ends);
2024   free (args);
2025 }
2026
2027 /* Determines if a macro has been already used (and is therefore
2028    disabled).  */
2029 static int
2030 is_macro_disabled (pfile, expansion, token)
2031      cpp_reader *pfile;
2032      const cpp_toklist *expansion;
2033      const cpp_token *token;
2034 {
2035   cpp_context *context = CURRENT_CONTEXT (pfile);
2036
2037   /* Don't expand anything if this file has already been preprocessed.  */
2038   if (CPP_OPTION (pfile, preprocessed))
2039     return 1;
2040
2041   /* Arguments on either side of ## are inserted in place without
2042      macro expansion (6.10.3.3.2).  Conceptually, any macro expansion
2043      occurs during a later rescan pass.  The effect is that we expand
2044      iff we would as part of the macro's expansion list, so we should
2045      drop to the macro's context.  */
2046   if (IS_ARG_CONTEXT (context))
2047     {
2048       if (token->flags & PASTED)
2049         context--;
2050       else if (!(context->flags & CONTEXT_RAW))
2051         return 1;
2052       else if (context->flags & (CONTEXT_PASTEL | CONTEXT_PASTER))
2053         context--;
2054     }
2055
2056   /* Have we already used this macro?  */
2057   while (context->level > 0)
2058     {
2059       if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2060         return 1;
2061       /* Raw argument tokens are judged based on the token list they
2062          came from.  */
2063       if (context->flags & CONTEXT_RAW)
2064         context = pfile->contexts + context->level;
2065       else
2066         context--;
2067     }
2068
2069   /* Function-like macros may be disabled if the '(' is not in the
2070      current context.  We check this without disrupting the context
2071      stack.  */
2072   if (expansion->paramc >= 0)
2073     {
2074       const cpp_token *next;
2075       unsigned int prev_nme;
2076
2077       context = CURRENT_CONTEXT (pfile);
2078       /* Drop down any contexts we're at the end of: the '(' may
2079          appear in lower macro expansions, or in the rest of the file.  */
2080       while (context->posn == context->count && context > pfile->contexts)
2081         {
2082           context--;
2083           /* If we matched, we are disabled, as we appear in the
2084              expansion of each macro we meet.  */
2085           if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2086             return 1;
2087         }
2088
2089       prev_nme = pfile->no_expand_level;
2090       pfile->no_expand_level = context - pfile->contexts;
2091       next = cpp_get_token (pfile);
2092       restore_macro_expansion (pfile, prev_nme);
2093       if (next->type != CPP_OPEN_PAREN)
2094         {
2095           _cpp_push_token (pfile, next);
2096           if (CPP_OPTION (pfile, warn_traditional))
2097             cpp_warning (pfile,
2098          "function macro %.*s must be used with arguments in traditional C",
2099                          (int) token->val.name.len, token->val.name.text);
2100           return 1;
2101         }
2102     }
2103
2104   return 0;
2105 }
2106
2107 /* Add a token to the set of tokens forming the arguments to the macro
2108    being parsed in parse_args.  */
2109 static void
2110 save_token (args, token)
2111      macro_args *args;
2112      const cpp_token *token;
2113 {
2114   if (args->used == args->capacity)
2115     {
2116       args->capacity += args->capacity + 100;
2117       args->tokens = (const cpp_token **)
2118         xrealloc (args->tokens, args->capacity * sizeof (const cpp_token *));
2119     }
2120   args->tokens[args->used++] = token;
2121 }
2122
2123 /* Take and save raw tokens until we finish one argument.  Empty
2124    arguments are saved as a single CPP_PLACEMARKER token.  */
2125 static const cpp_token *
2126 parse_arg (pfile, var_args, paren_context, args, pcount)
2127      cpp_reader *pfile;
2128      int var_args;
2129      unsigned int paren_context;
2130      macro_args *args;
2131      unsigned int *pcount;
2132 {
2133   const cpp_token *token;
2134   unsigned int paren = 0, count = 0;
2135   int raw, was_raw = 1;
2136
2137   for (count = 0;; count++)
2138     {
2139       token = cpp_get_token (pfile);
2140
2141       switch (token->type)
2142         {
2143         default:
2144           break;
2145
2146         case CPP_OPEN_PAREN:
2147           paren++;
2148           break;
2149
2150         case CPP_CLOSE_PAREN:
2151           if (paren-- != 0)
2152             break;
2153           goto out;
2154
2155         case CPP_COMMA:
2156           /* Commas are not terminators within parantheses or var_args.  */
2157           if (paren || var_args)
2158             break;
2159           goto out;
2160
2161         case CPP_EOF:           /* Error reported by caller.  */
2162           goto out;
2163         }
2164
2165       raw = pfile->cur_context <= paren_context;
2166       if (raw != was_raw)
2167         {
2168           was_raw = raw;
2169           save_token (args, 0);
2170           count++;
2171         }
2172       save_token (args, token);
2173     }
2174
2175  out:
2176   if (count == 0)
2177     {
2178       /* Duplicate the placemarker.  Then we can set its flags and
2179          position and safely be using more than one.  */
2180       save_token (args, duplicate_token (pfile, &placemarker_token));
2181       count++;
2182     }
2183
2184   *pcount = count;
2185   return token;
2186 }
2187
2188 /* This macro returns true if the argument starting at offset O of arglist
2189    A is empty - that is, it's either a single PLACEMARKER token, or a null
2190    pointer followed by a PLACEMARKER.  */
2191
2192 #define empty_argument(A, O) \
2193  ((A)->tokens[O] ? (A)->tokens[O]->type == CPP_PLACEMARKER \
2194                  : (A)->tokens[(O)+1]->type == CPP_PLACEMARKER)
2195
2196 /* Parse the arguments making up a macro invocation.  Nested arguments
2197    are automatically macro expanded, but immediate macros are not
2198    expanded; this enables e.g. operator # to work correctly.  Returns
2199    non-zero on error.  */
2200 static int
2201 parse_args (pfile, hp, args)
2202      cpp_reader *pfile;
2203      cpp_hashnode *hp;
2204      macro_args *args;
2205 {
2206   const cpp_token *token;
2207   const cpp_toklist *macro;
2208   unsigned int total = 0;
2209   unsigned int paren_context = pfile->cur_context;
2210   int argc = 0;
2211
2212   macro = hp->value.expansion;
2213   do
2214     {
2215       unsigned int count;
2216
2217       token = parse_arg (pfile, (argc + 1 == macro->paramc
2218                                  && (macro->flags & VAR_ARGS)),
2219                          paren_context, args, &count);
2220       if (argc < macro->paramc)
2221         {
2222           total += count;
2223           args->ends[argc] = total;
2224         }
2225       argc++;
2226     }
2227   while (token->type != CPP_CLOSE_PAREN && token->type != CPP_EOF);
2228
2229   if (token->type == CPP_EOF)
2230     {
2231       cpp_error (pfile, "unterminated invocation of macro \"%.*s\"",
2232                  hp->length, hp->name);
2233       return 1;
2234     }
2235   else if (argc < macro->paramc)
2236     {
2237       /* A rest argument is allowed to not appear in the invocation at all.
2238          e.g. #define debug(format, args...) ...
2239          debug("string");
2240          This is exactly the same as if the rest argument had received no
2241          tokens - debug("string",);  This extension is deprecated.  */
2242
2243       if (argc + 1 == macro->paramc && (macro->flags & GNU_REST_ARGS))
2244         {
2245           /* Duplicate the placemarker.  Then we can set its flags and
2246              position and safely be using more than one.  */
2247           save_token (args, duplicate_token (pfile, &placemarker_token));
2248           args->ends[argc] = total + 1;
2249           return 0;
2250         }
2251       else
2252         {
2253           cpp_error (pfile,
2254                      "insufficient arguments in invocation of macro \"%.*s\"",
2255                      hp->length, hp->name);
2256           return 1;
2257         }
2258     }
2259   /* An empty argument to an empty function-like macro is fine.  */
2260   else if (argc > macro->paramc
2261            && !(macro->paramc == 0 && argc == 1 && empty_argument (args, 0)))
2262     {
2263       cpp_error (pfile,
2264                  "too many arguments in invocation of macro \"%.*s\"",
2265                  hp->length, hp->name);
2266       return 1;
2267     }
2268
2269   return 0;
2270 }
2271
2272 /* Adds backslashes before all backslashes and double quotes appearing
2273    in strings.  Non-printable characters are converted to octal.  */
2274 static U_CHAR *
2275 quote_string (dest, src, len)
2276      U_CHAR *dest;
2277      const U_CHAR *src;
2278      unsigned int len;
2279 {
2280   while (len--)
2281     {
2282       U_CHAR c = *src++;
2283
2284       if (c == '\\' || c == '"')
2285         {
2286           *dest++ = '\\';
2287           *dest++ = c;
2288         }
2289       else
2290         {
2291           if (ISPRINT (c))
2292             *dest++ = c;
2293           else
2294             {
2295               sprintf ((char *) dest, "\\%03o", c);
2296               dest += 4;
2297             }
2298         }
2299     }
2300
2301   return dest;
2302 }
2303
2304 /* Allocates a buffer to hold a token's TEXT, and converts TOKEN to a
2305    CPP_STRING token containing TEXT in quoted form.  */
2306 static cpp_token *
2307 make_string_token (token, text, len)
2308      cpp_token *token;
2309      const U_CHAR *text;
2310      unsigned int len;
2311 {
2312   U_CHAR *buf;
2313
2314   buf = (U_CHAR *) xmalloc (len * 4);
2315   token->type = CPP_STRING;
2316   token->flags = 0;
2317   token->val.name.text = buf;
2318   token->val.name.len = quote_string (buf, text, len) - buf;
2319   return token;
2320 }
2321
2322 /* Allocates and converts a temporary token to a CPP_NUMBER token,
2323    evaluating to NUMBER.  */
2324 static cpp_token *
2325 alloc_number_token (pfile, number)
2326      cpp_reader *pfile;
2327      int number;
2328 {
2329   cpp_token *result;
2330   char *buf;
2331
2332   result = get_temp_token (pfile);
2333   buf = xmalloc (20);
2334   sprintf (buf, "%d", number);
2335
2336   result->type = CPP_NUMBER;
2337   result->flags = 0;
2338   result->val.name.text = (U_CHAR *) buf;
2339   result->val.name.len = strlen (buf);
2340   return result;
2341 }
2342
2343 /* Returns a temporary token from the temporary token store of PFILE.  */
2344 static cpp_token *
2345 get_temp_token (pfile)
2346      cpp_reader *pfile;
2347 {
2348   if (pfile->temp_used == pfile->temp_alloced)
2349     {
2350       if (pfile->temp_used == pfile->temp_cap)
2351         {
2352           pfile->temp_cap += pfile->temp_cap + 20;
2353           pfile->temp_tokens = (cpp_token **) xrealloc
2354             (pfile->temp_tokens, pfile->temp_cap * sizeof (cpp_token *));
2355         }
2356       pfile->temp_tokens[pfile->temp_alloced++] = (cpp_token *) xmalloc
2357         (sizeof (cpp_token));
2358     }
2359
2360   return pfile->temp_tokens[pfile->temp_used++];
2361 }
2362
2363 /* Release (not free) for re-use the temporary tokens of PFILE.  */
2364 static void
2365 release_temp_tokens (pfile)
2366      cpp_reader *pfile;
2367 {
2368   while (pfile->temp_used)
2369     {
2370       cpp_token *token = pfile->temp_tokens[--pfile->temp_used];
2371
2372       if (token_spellings[token->type].type > SPELL_NONE)
2373         {
2374           free ((char *) token->val.name.text);
2375           token->val.name.text = 0;
2376         }
2377     }
2378 }
2379
2380 /* Free all of PFILE's dynamically-allocated temporary tokens.  */
2381 void
2382 _cpp_free_temp_tokens (pfile)
2383      cpp_reader *pfile;
2384 {
2385   if (pfile->temp_tokens)
2386     {
2387       /* It is possible, though unlikely (looking for '(' of a funlike
2388          macro into EOF), that we haven't released the tokens yet.  */
2389       release_temp_tokens (pfile);
2390       while (pfile->temp_alloced)
2391         free (pfile->temp_tokens[--pfile->temp_alloced]);
2392       free (pfile->temp_tokens);
2393     }
2394
2395   if (pfile->date)
2396     {
2397       free ((char *) pfile->date->val.name.text);
2398       free (pfile->date);
2399       free ((char *) pfile->time->val.name.text);
2400       free (pfile->time);
2401     }
2402 }
2403
2404 /* Copy TOKEN into a temporary token from PFILE's store.  */
2405 static cpp_token *
2406 duplicate_token (pfile, token)
2407      cpp_reader *pfile;
2408      const cpp_token *token;
2409 {
2410   cpp_token *result = get_temp_token (pfile);
2411
2412   *result = *token;
2413   if (token_spellings[token->type].type > SPELL_NONE)
2414     {
2415       U_CHAR *buff = (U_CHAR *) xmalloc (token->val.name.len);
2416       memcpy (buff, token->val.name.text, token->val.name.len);
2417       result->val.name.text = buff;
2418     }
2419   return result;
2420 }
2421
2422 /* Determine whether two tokens can be pasted together, and if so,
2423    what the resulting token is.  Returns CPP_EOF if the tokens cannot
2424    be pasted, or the appropriate type for the merged token if they
2425    can.  */
2426 static enum cpp_ttype
2427 can_paste (pfile, token1, token2, digraph)
2428      cpp_reader * pfile;
2429      const cpp_token *token1, *token2;
2430      int* digraph;
2431 {
2432   enum cpp_ttype a = token1->type, b = token2->type;
2433   int cxx = CPP_OPTION (pfile, cplusplus);
2434
2435   if (a <= CPP_LAST_EQ && b == CPP_EQ)
2436     return a + (CPP_EQ_EQ - CPP_EQ);
2437
2438   switch (a)
2439     {
2440     case CPP_GREATER:
2441       if (b == a) return CPP_RSHIFT;
2442       if (b == CPP_QUERY && cxx)        return CPP_MAX;
2443       if (b == CPP_GREATER_EQ)  return CPP_RSHIFT_EQ;
2444       break;
2445     case CPP_LESS:
2446       if (b == a) return CPP_LSHIFT;
2447       if (b == CPP_QUERY && cxx)        return CPP_MIN;
2448       if (b == CPP_LESS_EQ)     return CPP_LSHIFT_EQ;
2449       if (b == CPP_COLON)
2450         {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
2451       if (b == CPP_MOD)
2452         {*digraph = 1; return CPP_OPEN_BRACE;}  /* <% digraph */
2453       break;
2454
2455     case CPP_PLUS: if (b == a)  return CPP_PLUS_PLUS; break;
2456     case CPP_AND:  if (b == a)  return CPP_AND_AND; break;
2457     case CPP_OR:   if (b == a)  return CPP_OR_OR;   break;
2458
2459     case CPP_MINUS:
2460       if (b == a)               return CPP_MINUS_MINUS;
2461       if (b == CPP_GREATER)     return CPP_DEREF;
2462       break;
2463     case CPP_COLON:
2464       if (b == a && cxx)        return CPP_SCOPE;
2465       if (b == CPP_GREATER)
2466         {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
2467       break;
2468
2469     case CPP_MOD:
2470       if (b == CPP_GREATER)
2471         {*digraph = 1; return CPP_CLOSE_BRACE;}  /* %> digraph */
2472       if (b == CPP_COLON)
2473         {*digraph = 1; return CPP_HASH;}         /* %: digraph */
2474       break;
2475     case CPP_DEREF:
2476       if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
2477       break;
2478     case CPP_DOT:
2479       if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
2480       if (b == CPP_NUMBER)      return CPP_NUMBER;
2481       break;
2482
2483     case CPP_HASH:
2484       if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
2485         /* %:%: digraph */
2486         {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
2487       break;
2488
2489     case CPP_NAME:
2490       if (b == CPP_NAME)        return CPP_NAME;
2491       if (b == CPP_NUMBER
2492           && is_numstart(token2->val.name.text[0]))     return CPP_NAME;
2493       if (b == CPP_CHAR
2494           && token1->val.name.len == 1
2495           && token1->val.name.text[0] == 'L')   return CPP_WCHAR;
2496       if (b == CPP_STRING
2497           && token1->val.name.len == 1
2498           && token1->val.name.text[0] == 'L')   return CPP_WSTRING;
2499       break;
2500
2501     case CPP_NUMBER:
2502       if (b == CPP_NUMBER)      return CPP_NUMBER;
2503       if (b == CPP_NAME)        return CPP_NUMBER;
2504       if (b == CPP_DOT)         return CPP_NUMBER;
2505       /* Numbers cannot have length zero, so this is safe.  */
2506       if ((b == CPP_PLUS || b == CPP_MINUS)
2507           && VALID_SIGN ('+', token1->val.name.text[token1->val.name.len - 1]))
2508         return CPP_NUMBER;
2509       break;
2510
2511     default:
2512       break;
2513     }
2514
2515   return CPP_EOF;
2516 }
2517
2518 /* Check if TOKEN is to be ##-pasted with the token after it.  */
2519 static const cpp_token *
2520 maybe_paste_with_next (pfile, token)
2521      cpp_reader *pfile;
2522      const cpp_token *token;
2523 {
2524   cpp_token *pasted;
2525   const cpp_token *second;
2526   cpp_context *context = CURRENT_CONTEXT (pfile);
2527
2528   /* Is this token on the LHS of ## ? */
2529   if (!((context->flags & CONTEXT_PASTEL) && context->posn == context->count)
2530       && !(token->flags & PASTE_LEFT))
2531     return token;
2532
2533   /* Prevent recursion, and possibly pushing back more than one token.  */
2534   if (pfile->paste_level)
2535     return token;
2536
2537   /* Suppress macro expansion for next token, but don't conflict with
2538      the other method of suppression.  If it is an argument, macro
2539      expansion within the argument will still occur.  */
2540   pfile->paste_level = pfile->cur_context;
2541   second = cpp_get_token (pfile);
2542   pfile->paste_level = 0;
2543
2544   /* Ignore placemarker argument tokens (cannot be from an empty macro
2545      since macros are not expanded).  */
2546   if (token->type == CPP_PLACEMARKER)
2547      pasted = duplicate_token (pfile, second);
2548   else if (second->type == CPP_PLACEMARKER)
2549     {
2550       cpp_context *mac_context = CURRENT_CONTEXT (pfile) - 1;
2551       /* GCC has special extended semantics for a ## b where b is a
2552          varargs parameter: a disappears if b consists of no tokens.
2553          This extension is deprecated.  */
2554       if ((mac_context->u.list->flags & GNU_REST_ARGS)
2555           && (mac_context->u.list->tokens[mac_context->posn - 1].val.aux + 1
2556               == (unsigned) mac_context->u.list->paramc))
2557         {
2558           cpp_warning (pfile, "deprecated GNU ## extension used");
2559           pasted = duplicate_token (pfile, second);
2560         }
2561       else
2562         pasted = duplicate_token (pfile, token);
2563     }
2564   else
2565     {
2566       int digraph = 0;
2567       enum cpp_ttype type = can_paste (pfile, token, second, &digraph);
2568
2569       if (type == CPP_EOF)
2570         {
2571           if (CPP_OPTION (pfile, warn_paste))
2572             cpp_warning (pfile,
2573                          "pasting would not give a valid preprocessing token");
2574           _cpp_push_token (pfile, second);
2575           return token;
2576         }
2577
2578       if (type == CPP_NAME || type == CPP_NUMBER)
2579         {
2580           /* Join spellings.  */
2581           U_CHAR *buff, *buff2;
2582
2583           pasted = get_temp_token (pfile);
2584           buff = (U_CHAR *) xmalloc (TOKEN_LEN (token) + TOKEN_LEN (second));
2585           buff2 = spell_token (pfile, token, buff);
2586           buff2 = spell_token (pfile, second, buff2);
2587
2588           pasted->val.name.text = buff;
2589           pasted->val.name.len = buff2 - buff;
2590         }
2591       else if (type == CPP_WCHAR || type == CPP_WSTRING)
2592         pasted = duplicate_token (pfile, second);
2593       else
2594         {
2595           pasted = get_temp_token (pfile);
2596           pasted->val.integer = 0;
2597         }
2598
2599       pasted->type = type;
2600       pasted->flags = digraph ? DIGRAPH: 0;
2601     }
2602
2603   /* The pasted token gets the whitespace flags and position of the
2604      first token, the PASTE_LEFT flag of the second token, plus the
2605      PASTED flag to indicate it is the result of a paste.  However, we
2606      want to preserve the DIGRAPH flag.  */
2607   pasted->flags &= ~(PREV_WHITE | BOL | PASTE_LEFT);
2608   pasted->flags |= ((token->flags & (PREV_WHITE | BOL))
2609                     | (second->flags & PASTE_LEFT) | PASTED);
2610   pasted->col = token->col;
2611   pasted->line = token->line;
2612
2613   return maybe_paste_with_next (pfile, pasted);
2614 }
2615
2616 /* Convert a token sequence to a single string token according to the
2617    rules of the ISO C #-operator.  */
2618 #define INIT_SIZE 200
2619 static cpp_token *
2620 stringify_arg (pfile, token)
2621      cpp_reader *pfile;
2622      const cpp_token *token;
2623 {
2624   cpp_token *result;
2625   unsigned char *main_buf;
2626   unsigned int prev_value, backslash_count = 0;
2627   unsigned int buf_used = 0, whitespace = 0, buf_cap = INIT_SIZE;
2628
2629   prev_value  = prevent_macro_expansion (pfile);
2630   main_buf = (unsigned char *) xmalloc (buf_cap);
2631
2632   result = get_temp_token (pfile);
2633   ASSIGN_FLAGS_AND_POS (result, token);
2634
2635   for (; (token = cpp_get_token (pfile))->type != CPP_EOF; )
2636     {
2637       int escape;
2638       unsigned char *buf;
2639       unsigned int len = TOKEN_LEN (token);
2640
2641       escape = (token->type == CPP_STRING || token->type == CPP_WSTRING
2642                 || token->type == CPP_CHAR || token->type == CPP_WCHAR);
2643       if (escape)
2644         len *= 4 + 1;
2645
2646       if (buf_used + len > buf_cap)
2647         {
2648           buf_cap = buf_used + len + INIT_SIZE;
2649           main_buf = xrealloc (main_buf, buf_cap);
2650         }
2651
2652       if (whitespace && (token->flags & PREV_WHITE))
2653         main_buf[buf_used++] = ' ';
2654
2655       if (escape)
2656         buf = (unsigned char *) xmalloc (len);
2657       else
2658         buf = main_buf + buf_used;
2659
2660       len = spell_token (pfile, token, buf) - buf;
2661       if (escape)
2662         {
2663           buf_used = quote_string (&main_buf[buf_used], buf, len) - main_buf;
2664           free (buf);
2665         }
2666       else
2667         buf_used += len;
2668
2669       whitespace = 1;
2670       if (token->type == CPP_BACKSLASH)
2671         backslash_count++;
2672       else
2673         backslash_count = 0;
2674     }
2675
2676   /* Ignore the final \ of invalid string literals.  */
2677   if (backslash_count & 1)
2678     {
2679       cpp_warning (pfile, "invalid string literal, ignoring final '\\'");
2680       buf_used--;
2681     }
2682
2683   result->type = CPP_STRING;
2684   result->val.name.text = main_buf;
2685   result->val.name.len = buf_used;
2686   restore_macro_expansion (pfile, prev_value);
2687   return result;
2688 }
2689
2690 /* Allocate more room on the context stack of PFILE.  */
2691 static void
2692 expand_context_stack (pfile)
2693      cpp_reader *pfile;
2694 {
2695   pfile->context_cap += pfile->context_cap + 20;
2696   pfile->contexts = (cpp_context *)
2697     xrealloc (pfile->contexts, pfile->context_cap * sizeof (cpp_context));
2698 }
2699
2700 /* Push the context of macro NODE onto the context stack.  TOKEN is
2701    the CPP_NAME token invoking the macro.  */
2702 static const cpp_token *
2703 push_macro_context (pfile, node, token)
2704      cpp_reader *pfile;
2705      cpp_hashnode *node;
2706      const cpp_token *token;
2707 {
2708   unsigned char orig_flags;
2709   macro_args *args;
2710   cpp_context *context;
2711
2712   if (pfile->cur_context > CPP_STACK_MAX)
2713     {
2714       cpp_error (pfile, "infinite macro recursion invoking '%s'", node->name);
2715       return token;
2716     }
2717
2718   /* Token's flags may change when parsing args containing a nested
2719      invocation of this macro.  */
2720   orig_flags = token->flags & (PREV_WHITE | BOL);
2721   args = 0;
2722   if (node->value.expansion->paramc >= 0)
2723     {
2724       unsigned int error, prev_nme;
2725
2726       /* Allocate room for the argument contexts, and parse them.  */
2727       args  = (macro_args *) xmalloc (sizeof (macro_args));
2728       args->ends = (unsigned int *)
2729         xmalloc (node->value.expansion->paramc * sizeof (unsigned int));
2730       args->tokens = 0;
2731       args->capacity = 0;
2732       args->used = 0;
2733       args->level = pfile->cur_context;
2734
2735       prev_nme = prevent_macro_expansion (pfile);
2736       pfile->args = args;
2737       error = parse_args (pfile, node, args);
2738       pfile->args = 0;
2739       restore_macro_expansion (pfile, prev_nme);
2740       if (error)
2741         {
2742           free_macro_args (args);
2743           return token;
2744         }
2745     }
2746
2747   /* Now push its context.  */
2748   pfile->cur_context++;
2749   if (pfile->cur_context == pfile->context_cap)
2750     expand_context_stack (pfile);
2751
2752   context = CURRENT_CONTEXT (pfile);
2753   context->u.list = node->value.expansion;
2754   context->args = args;
2755   context->posn = 0;
2756   context->count = context->u.list->tokens_used;
2757   context->level = pfile->cur_context;
2758   context->flags = 0;
2759   context->pushed_token = 0;
2760
2761   /* Set the flags of the first token.  We know there must
2762      be one, empty macros are a single placemarker token.  */
2763   MODIFY_FLAGS_AND_POS (&context->u.list->tokens[0], token, orig_flags);
2764
2765   return cpp_get_token (pfile);
2766 }
2767
2768 /* Push an argument to the current macro onto the context stack.
2769    TOKEN is the MACRO_ARG token representing the argument expansion.  */
2770 static const cpp_token *
2771 push_arg_context (pfile, token)
2772      cpp_reader *pfile;
2773      const cpp_token *token;
2774 {
2775   cpp_context *context;
2776   macro_args *args;
2777
2778   pfile->cur_context++;
2779   if (pfile->cur_context == pfile->context_cap)
2780       expand_context_stack (pfile);
2781
2782   context = CURRENT_CONTEXT (pfile);
2783   args = context[-1].args;
2784
2785   context->count = token->val.aux ? args->ends[token->val.aux - 1]: 0;
2786   context->u.arg = args->tokens + context->count;
2787   context->count = args->ends[token->val.aux] - context->count;
2788   context->args = 0;
2789   context->posn = 0;
2790   context->level = args->level;
2791   context->flags = CONTEXT_ARG | CONTEXT_RAW;
2792   context->pushed_token = 0;
2793
2794   /* Set the flags of the first token.  There is one.  */
2795   {
2796     const cpp_token *first = context->u.arg[0];
2797     if (!first)
2798       first = context->u.arg[1];
2799
2800     MODIFY_FLAGS_AND_POS ((cpp_token *) first, token,
2801                           token->flags & (PREV_WHITE | BOL));
2802   }
2803
2804   if (token->flags & STRINGIFY_ARG)
2805     return stringify_arg (pfile, token);
2806
2807   if (token->flags & PASTE_LEFT)
2808     context->flags |= CONTEXT_PASTEL;
2809   if (pfile->paste_level)
2810     context->flags |= CONTEXT_PASTER;
2811
2812   return get_raw_token (pfile);
2813 }
2814
2815 /* "Unget" a token.  It is effectively inserted in the token queue and
2816    will be returned by the next call to get_raw_token.  */
2817 void
2818 _cpp_push_token (pfile, token)
2819      cpp_reader *pfile;
2820      const cpp_token *token;
2821 {
2822   cpp_context *context = CURRENT_CONTEXT (pfile);
2823   if (context->pushed_token)
2824     cpp_ice (pfile, "two tokens pushed in a row");
2825   if (token->type != CPP_EOF)
2826     context->pushed_token = token;
2827   /* Don't push back a directive's CPP_EOF, step back instead.  */
2828   else if (pfile->cur_context == 0)
2829     pfile->contexts[0].posn--;
2830 }
2831
2832 /* Handle a preprocessing directive.  TOKEN is the CPP_HASH token
2833    introducing the directive.  */
2834 static void
2835 process_directive (pfile, token)
2836      cpp_reader *pfile;
2837      const cpp_token *token;
2838 {
2839   const struct directive *d = pfile->token_list.directive;
2840   int prev_nme = 0;
2841
2842   /* Skip over the directive name.  */
2843   if (token[1].type == CPP_NAME)
2844     _cpp_get_raw_token (pfile);
2845   else if (token[1].type != CPP_NUMBER)
2846     cpp_ice (pfile, "directive begins with %s?!",
2847              token_names[token[1].type]);
2848
2849   /* Flush pending tokens at this point, in case the directive produces
2850      output.  XXX Directive output won't be visible to a direct caller of
2851      cpp_get_token.  */
2852   if (pfile->printer && CPP_WRITTEN (pfile) - pfile->printer->written)
2853     cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
2854
2855   if (! (d->flags & EXPAND))
2856     prev_nme = prevent_macro_expansion (pfile);
2857   (void) (*d->handler) (pfile);
2858   if (! (d->flags & EXPAND))
2859     restore_macro_expansion (pfile, prev_nme);
2860   _cpp_skip_rest_of_line (pfile);
2861 }
2862
2863 /* The external interface to return the next token.  All macro
2864    expansion and directive processing is handled internally, the
2865    caller only ever sees the output after preprocessing.  */
2866 const cpp_token *
2867 cpp_get_token (pfile)
2868      cpp_reader *pfile;
2869 {
2870   const cpp_token *token;
2871   cpp_hashnode *node;
2872
2873   /* Loop till we hit a non-directive, non-skipped, non-placemarker token.  */
2874   for (;;)
2875     {
2876       token = get_raw_token (pfile);
2877       if (token->flags & BOL && token->type == CPP_HASH
2878           && pfile->token_list.directive)
2879         {
2880           process_directive (pfile, token);
2881           continue;
2882         }
2883
2884       /* Short circuit EOF. */
2885       if (token->type == CPP_EOF)
2886         return token;
2887
2888       if (pfile->skipping && ! pfile->token_list.directive)
2889         {
2890           _cpp_skip_rest_of_line (pfile);
2891           continue;
2892         }
2893       break;
2894     }
2895
2896   /* If there's a potential control macro and we get here, then that
2897      #ifndef didn't cover the entire file and its argument shouldn't
2898      be taken as a control macro.  */
2899   pfile->potential_control_macro = 0;
2900
2901   token = maybe_paste_with_next (pfile, token);
2902
2903   if (token->type != CPP_NAME)
2904     return token;
2905
2906   /* Is macro expansion disabled in general?  */
2907   if (pfile->no_expand_level == pfile->cur_context || pfile->paste_level)
2908     return token;
2909
2910   node = cpp_lookup (pfile, token->val.name.text, token->val.name.len);
2911   if (node->type == T_VOID)
2912     return token;
2913
2914   if (node->type == T_MACRO)
2915     {
2916       if (is_macro_disabled (pfile, node->value.expansion, token))
2917         return token;
2918
2919       return push_macro_context (pfile, node, token);
2920     }
2921   else
2922     return special_symbol (pfile, node, token);
2923 }
2924
2925 /* Returns the next raw token, i.e. without performing macro
2926    expansion.  Argument contexts are automatically entered.  */
2927 static const cpp_token *
2928 get_raw_token (pfile)
2929      cpp_reader *pfile;
2930 {
2931   const cpp_token *result;
2932   cpp_context *context = CURRENT_CONTEXT (pfile);
2933
2934   if (context->pushed_token)
2935     {
2936       result = context->pushed_token;
2937       context->pushed_token = 0;
2938     }
2939   else if (context->posn == context->count)
2940     result = pop_context (pfile);
2941   else
2942     {
2943       if (IS_ARG_CONTEXT (context))
2944         {
2945           result = context->u.arg[context->posn++];
2946           if (result == 0)
2947             {
2948               context->flags ^= CONTEXT_RAW;
2949               result = context->u.arg[context->posn++];
2950             }
2951           return result;        /* Cannot be a CPP_MACRO_ARG */
2952         }
2953       result = &context->u.list->tokens[context->posn++];
2954     }
2955
2956   if (result->type == CPP_MACRO_ARG)
2957     result = push_arg_context (pfile, result);
2958   return result;
2959 }
2960
2961 /* Internal interface to get the token without macro expanding.  */
2962 const cpp_token *
2963 _cpp_get_raw_token (pfile)
2964      cpp_reader *pfile;
2965 {
2966   int prev_nme = prevent_macro_expansion (pfile);
2967   const cpp_token *result = cpp_get_token (pfile);
2968   restore_macro_expansion (pfile, prev_nme);
2969   return result;
2970 }
2971
2972 /* A thin wrapper to lex_line.  CLEAR is non-zero if the current token
2973    list should be overwritten, or zero if we need to append
2974    (typically, if we are within the arguments to a macro, or looking
2975    for the '(' to start a function-like macro invocation).  */
2976 static int
2977 lex_next (pfile, clear)
2978      cpp_reader *pfile;
2979      int clear;
2980 {
2981   cpp_toklist *list = &pfile->token_list;
2982   const cpp_token *old_list = list->tokens;
2983   unsigned int old_used = list->tokens_used;
2984
2985   if (clear)
2986     {
2987       /* Release all temporary tokens.  */
2988       _cpp_clear_toklist (list);
2989       pfile->contexts[0].posn = 0;
2990       if (pfile->temp_used)
2991         release_temp_tokens (pfile);
2992     }
2993   else
2994     {
2995       /* If we are currently processing a directive, do not advance.
2996          (6.10 paragraph 2: A new-line character ends the directive
2997          even if it occurs within what would otherwise be an
2998          invocation of a function-like macro.)  */
2999       if (list->directive)
3000         return 1;
3001     }
3002
3003   lex_line (pfile, list);
3004   pfile->contexts[0].count = list->tokens_used;
3005
3006   if (!clear && pfile->args)
3007     {
3008       /* Fix up argument token pointers.  */
3009       if (old_list != list->tokens)
3010         {
3011           unsigned int i;
3012
3013           for (i = 0; i < pfile->args->used; i++)
3014             {
3015               const cpp_token *token = pfile->args->tokens[i];
3016               if (token >= old_list && token < old_list + old_used)
3017                 pfile->args->tokens[i] = (const cpp_token *)
3018                 ((char *) token + ((char *) list->tokens - (char *) old_list));
3019             }
3020         }
3021
3022       /* 6.10.3 paragraph 11: If there are sequences of preprocessing
3023          tokens within the list of arguments that would otherwise act as
3024          preprocessing directives, the behavior is undefined.
3025
3026          This implementation will report a hard error and treat the
3027          'sequence of preprocessing tokens' as part of the macro argument,
3028          not a directive.
3029
3030          Note if pfile->args == 0, we're OK since we're only inside a
3031          macro argument after a '('.  */
3032       if (list->directive)
3033         {
3034           cpp_error_with_line (pfile, list->tokens[old_used].line,
3035                                list->tokens[old_used].col,
3036                                "#%s may not be used inside a macro argument",
3037                                list->directive->name);
3038           /* Don't treat as a directive: clear list->directive,
3039              prune the final EOF from the list.  */
3040           list->directive = 0;
3041           list->tokens_used--;
3042           pfile->contexts[0].count--;
3043         }
3044     }
3045
3046   return 0;
3047 }
3048
3049 /* Pops a context of the context stack.  If we're at the bottom, lexes
3050    the next logical line.  Returns 1 if we're at the end of the
3051    argument list to the # operator, or if it is illegal to "overflow"
3052    into the rest of the file (e.g. 6.10.3.1.1).  */
3053 static int
3054 do_pop_context (pfile)
3055      cpp_reader *pfile;
3056 {
3057   cpp_context *context;
3058
3059   if (pfile->cur_context == 0)
3060     return lex_next (pfile, pfile->no_expand_level == UINT_MAX);
3061
3062   /* Argument contexts, when parsing args or handling # operator
3063      return CPP_EOF at the end.  */
3064   context = CURRENT_CONTEXT (pfile);
3065   if (IS_ARG_CONTEXT (context) && pfile->cur_context == pfile->no_expand_level)
3066     return 1;
3067
3068   /* Free resources when leaving macro contexts.  */
3069   if (context->args)
3070     free_macro_args (context->args);
3071
3072   if (pfile->cur_context == pfile->no_expand_level)
3073     pfile->no_expand_level--;
3074   pfile->cur_context--;
3075
3076   return 0;
3077 }
3078
3079 /* Move down the context stack, and return the next raw token.  */
3080 static const cpp_token *
3081 pop_context (pfile)
3082      cpp_reader *pfile;
3083 {
3084   if (do_pop_context (pfile))
3085     return &eof_token;
3086   return get_raw_token (pfile);
3087 }
3088
3089 /* Turn off macro expansion at the current context level.  */
3090 static unsigned int
3091 prevent_macro_expansion (pfile)
3092      cpp_reader *pfile;
3093 {
3094   unsigned int prev_value = pfile->no_expand_level;
3095   pfile->no_expand_level = pfile->cur_context;
3096   return prev_value;
3097 }
3098
3099 /* Restore macro expansion to its previous state.  */
3100 static void
3101 restore_macro_expansion (pfile, prev_value)
3102      cpp_reader *pfile;
3103      unsigned int prev_value;
3104 {
3105   pfile->no_expand_level = prev_value;
3106 }
3107
3108 /* Used by cpperror.c to obtain the correct line and column to report
3109    in a diagnostic.  */
3110 unsigned int
3111 _cpp_get_line (pfile, pcol)
3112      cpp_reader *pfile;
3113      unsigned int *pcol;
3114 {
3115   unsigned int index;
3116   const cpp_token *cur_token;
3117
3118   if (pfile->in_lex_line)
3119     index = pfile->token_list.tokens_used;
3120   else
3121     index = pfile->contexts[0].posn;
3122
3123   cur_token = &pfile->token_list.tokens[index - 1];
3124   if (pcol)
3125     *pcol = cur_token->col;
3126   return cur_token->line;
3127 }
3128
3129 #define DSC(str) (const U_CHAR *)str, sizeof str - 1
3130 static const char * const monthnames[] =
3131 {
3132   "Jan", "Feb", "Mar", "Apr", "May", "Jun",
3133   "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
3134 };
3135
3136 /* Handle builtin macros like __FILE__.  */
3137 static const cpp_token *
3138 special_symbol (pfile, node, token)
3139      cpp_reader *pfile;
3140      cpp_hashnode *node;
3141      const cpp_token *token;
3142 {
3143   cpp_token *result;
3144   cpp_buffer *ip;
3145
3146   switch (node->type)
3147     {
3148     case T_FILE:
3149     case T_BASE_FILE:
3150       {
3151         const char *file;
3152
3153         ip = CPP_BUFFER (pfile);
3154         if (ip == 0)
3155           file = "";
3156         else
3157           {
3158             if (node->type == T_BASE_FILE)
3159               while (CPP_PREV_BUFFER (ip) != NULL)
3160                 ip = CPP_PREV_BUFFER (ip);
3161
3162             file = ip->nominal_fname;
3163           }
3164         result = make_string_token (get_temp_token (pfile), (U_CHAR *) file,
3165                                     strlen (file));
3166       }
3167       break;
3168
3169     case T_INCLUDE_LEVEL:
3170       {
3171         int true_indepth = 0;
3172
3173         /* Do not count the primary source file in the include level.  */
3174         ip = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
3175         while (ip)
3176           {
3177             true_indepth++;
3178             ip = CPP_PREV_BUFFER (ip);
3179           }
3180         result = alloc_number_token (pfile, true_indepth);
3181       }
3182       break;
3183
3184     case T_SPECLINE:
3185       /* If __LINE__ is embedded in a macro, it must expand to the
3186          line of the macro's invocation, not its definition.
3187          Otherwise things like assert() will not work properly.  */
3188       result = alloc_number_token (pfile, _cpp_get_line (pfile, NULL));
3189       break;
3190
3191     case T_STDC:
3192       {
3193         int stdc = 1;
3194
3195 #ifdef STDC_0_IN_SYSTEM_HEADERS
3196         if (CPP_IN_SYSTEM_HEADER (pfile)
3197             && !cpp_defined (pfile, DSC("__STRICT_ANSI__")))
3198           stdc = 0;
3199 #endif
3200         result = alloc_number_token (pfile, stdc);
3201       }
3202       break;
3203
3204     case T_DATE:
3205     case T_TIME:
3206       if (pfile->date == 0)
3207         {
3208           /* Allocate __DATE__ and __TIME__ from permanent storage,
3209              and save them in pfile so we don't have to do this again.
3210              We don't generate these strings at init time because
3211              time() and localtime() are very slow on some systems.  */
3212           time_t tt = time (NULL);
3213           struct tm *tb = localtime (&tt);
3214
3215           pfile->date = make_string_token
3216             ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("Oct 11 1347"));
3217           pfile->time = make_string_token
3218             ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("12:34:56"));
3219
3220           sprintf ((char *) pfile->date->val.name.text, "%s %2d %4d",
3221                    monthnames[tb->tm_mon], tb->tm_mday, tb->tm_year + 1900);
3222           sprintf ((char *) pfile->time->val.name.text, "%02d:%02d:%02d",
3223                    tb->tm_hour, tb->tm_min, tb->tm_sec);
3224         }
3225       result = node->type == T_DATE ? pfile->date: pfile->time;
3226       break;
3227
3228     case T_POISON:
3229       cpp_error (pfile, "attempt to use poisoned \"%s\".", node->name);
3230       return token;
3231
3232     default:
3233       cpp_ice (pfile, "invalid special hash type");
3234       return token;
3235     }
3236
3237   ASSIGN_FLAGS_AND_POS (result, token);
3238   return result;
3239 }
3240 #undef DSC
3241
3242 /* Dump the original user's spelling of argument index ARG_NO to the
3243    macro whose expansion is LIST.  */
3244 static void
3245 dump_param_spelling (pfile, list, arg_no)
3246      cpp_reader *pfile;
3247      const cpp_toklist *list;
3248      unsigned int arg_no;
3249 {
3250   const U_CHAR *param = list->namebuf;
3251
3252   while (arg_no--)
3253     param += ustrlen (param) + 1;
3254   CPP_PUTS (pfile, param, ustrlen (param));
3255 }
3256
3257 /* Dump a token list to the output.  */
3258 void
3259 _cpp_dump_list (pfile, list, token, flush)
3260      cpp_reader *pfile;
3261      const cpp_toklist *list;
3262      const cpp_token *token;
3263      int flush;
3264 {
3265   const cpp_token *limit = list->tokens + list->tokens_used;
3266   const cpp_token *prev = 0;
3267
3268   /* Avoid the CPP_EOF.  */
3269   if (list->directive)
3270     limit--;
3271
3272   while (token < limit)
3273     {
3274       if (token->type == CPP_MACRO_ARG)
3275         {
3276           if (token->flags & PREV_WHITE)
3277             CPP_PUTC (pfile, ' ');
3278           if (token->flags & STRINGIFY_ARG)
3279             CPP_PUTC (pfile, '#');
3280           dump_param_spelling (pfile, list, token->val.aux);
3281         }
3282       else
3283         output_token (pfile, token, prev);
3284       if (token->flags & PASTE_LEFT)
3285         CPP_PUTS (pfile, " ##", 3);
3286       prev = token;
3287       token++;
3288     }
3289
3290   if (flush && pfile->printer)
3291     cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
3292 }
3293
3294 /* Allocate pfile->input_buffer, and initialize trigraph_map[]
3295    if it hasn't happened already.  */
3296
3297 void
3298 _cpp_init_input_buffer (pfile)
3299      cpp_reader *pfile;
3300 {
3301   init_trigraph_map ();
3302   pfile->context_cap = 20;
3303   pfile->contexts = (cpp_context *)
3304     xmalloc (pfile->context_cap * sizeof (cpp_context));
3305   pfile->cur_context = 0;
3306   pfile->contexts[0].u.list = &pfile->token_list;
3307
3308   pfile->contexts[0].posn = 0;
3309   pfile->contexts[0].count = 0;
3310   pfile->no_expand_level = UINT_MAX;
3311
3312   _cpp_init_toklist (&pfile->token_list, DUMMY_TOKEN);
3313 }
3314
3315 /* Moves to the end of the directive line, popping contexts as
3316    necessary.  */
3317 void
3318 _cpp_skip_rest_of_line (pfile)
3319      cpp_reader *pfile;
3320 {
3321   /* Get to base context.  Clear parsing args and each contexts flags,
3322      since these can cause pop_context to return without popping.  */
3323   pfile->no_expand_level = UINT_MAX;
3324   while (pfile->cur_context != 0)
3325     {
3326       pfile->contexts[pfile->cur_context].flags = 0;
3327       do_pop_context (pfile);
3328     }
3329
3330   pfile->contexts[pfile->cur_context].count = 0;
3331   pfile->contexts[pfile->cur_context].posn = 0;
3332   pfile->token_list.directive = 0;
3333 }
3334
3335 /* Directive handler wrapper used by the command line option
3336    processor.  */
3337 void
3338 _cpp_run_directive (pfile, dir, buf, count)
3339      cpp_reader *pfile;
3340      const struct directive *dir;
3341      const char *buf;
3342      size_t count;
3343 {
3344   if (cpp_push_buffer (pfile, (const U_CHAR *)buf, count) != NULL)
3345     {
3346       unsigned int prev_lvl = 0;
3347       /* scan the line now, else prevent_macro_expansion won't work */
3348       do_pop_context (pfile);
3349       if (! (dir->flags & EXPAND))
3350         prev_lvl = prevent_macro_expansion (pfile);
3351
3352       (void) (*dir->handler) (pfile);
3353
3354       if (! (dir->flags & EXPAND))
3355         restore_macro_expansion (pfile, prev_lvl);
3356
3357       _cpp_skip_rest_of_line (pfile);
3358       cpp_pop_buffer (pfile);
3359     }
3360 }