libcpp/lex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7
   8 This program is free software; you can redistribute it and/or modify it
   9 under the terms of the GNU General Public License as published by the
  10 Free Software Foundation; either version 2, or (at your option) any
  11 later version.
  12
  13 This program is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with this program; if not, write to the Free Software
  20 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "cpplib.h"
  25 #include "internal.h"
  26
  27 enum spell_type
  28 {
  29   SPELL_OPERATOR = 0,
  30   SPELL_IDENT,
  31   SPELL_LITERAL,
  32   SPELL_NONE
  33 };
  34
  35 struct token_spelling
  36 {
  37   enum spell_type category;
  38   const unsigned char *name;
  39 };
  40
  41 static const unsigned char *const digraph_spellings[] =
  42 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
  43
  44 #define OP(e, s) { SPELL_OPERATOR, U s           },
  45 #define TK(e, s) { s,              U #e },
  46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  47 #undef OP
  48 #undef TK
  49
  50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  52
  53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
  54 static int skip_line_comment (cpp_reader *);
  55 static void skip_whitespace (cpp_reader *, cppchar_t);
  56 static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *);
  57 static void lex_number (cpp_reader *, cpp_string *);
  58 static bool forms_identifier_p (cpp_reader *, int);
  59 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
  60 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
  61 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
  62                             unsigned int, enum cpp_ttype);
  63 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
  64 static int name_p (cpp_reader *, const cpp_string *);
  65 static tokenrun *next_tokenrun (tokenrun *);
  66
  67 static _cpp_buff *new_buff (size_t);
  68
  69
  70 /* Utility routine:
  71
  72    Compares, the token TOKEN to the NUL-terminated string STRING.
  73    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  74 int
  75 cpp_ideq (const cpp_token *token, const char *string)
  76 {
  77   if (token->type != CPP_NAME)
  78     return 0;
  79
  80   return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
  81 }
  82
  83 /* Record a note TYPE at byte POS into the current cleaned logical
  84    line.  */
  85 static void
  86 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
  87 {
  88   if (buffer->notes_used == buffer->notes_cap)
  89     {
  90       buffer->notes_cap = buffer->notes_cap * 2 + 200;
  91       buffer->notes = xrealloc (buffer->notes,
  92                                 buffer->notes_cap * sizeof (_cpp_line_note));
  93     }
  94
  95   buffer->notes[buffer->notes_used].pos = pos;
  96   buffer->notes[buffer->notes_used].type = type;
  97   buffer->notes_used++;
  98 }
  99
 100 /* Returns with a logical line that contains no escaped newlines or
 101    trigraphs.  This is a time-critical inner loop.  */
 102 void
 103 _cpp_clean_line (cpp_reader *pfile)
 104 {
 105   cpp_buffer *buffer;
 106   const uchar *s;
 107   uchar c, *d, *p;
 108
 109   buffer = pfile->buffer;
 110   buffer->cur_note = buffer->notes_used = 0;
 111   buffer->cur = buffer->line_base = buffer->next_line;
 112   buffer->need_line = false;
 113   s = buffer->next_line - 1;
 114
 115   if (!buffer->from_stage3)
 116     {
 117       /* Short circuit for the common case of an un-escaped line with
 118          no trigraphs.  The primary win here is by not writing any
 119          data back to memory until we have to.  */
 120       for (;;)
 121         {
 122           c = *++s;
 123           if (c == '\n' || c == '\r')
 124             {
 125               d = (uchar *) s;
 126
 127               if (s == buffer->rlimit)
 128                 goto done;
 129
 130               /* DOS line ending? */
 131               if (c == '\r' && s[1] == '\n')
 132                 s++;
 133
 134               if (s == buffer->rlimit)
 135                 goto done;
 136
 137               /* check for escaped newline */
 138               p = d;
 139               while (p != buffer->next_line && is_nvspace (p[-1]))
 140                 p--;
 141               if (p == buffer->next_line || p[-1] != '\\')
 142                 goto done;
 143
 144               /* Have an escaped newline; process it and proceed to
 145                  the slow path.  */
 146               add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
 147               d = p - 2;
 148               buffer->next_line = p - 1;
 149               break;
 150             }
 151           if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
 152             {
 153               /* Have a trigraph.  We may or may not have to convert
 154                  it.  Add a line note regardless, for -Wtrigraphs.  */
 155               add_line_note (buffer, s, s[2]);
 156               if (CPP_OPTION (pfile, trigraphs))
 157                 {
 158                   /* We do, and that means we have to switch to the
 159                      slow path.  */
 160                   d = (uchar *) s;
 161                   *d = _cpp_trigraph_map[s[2]];
 162                   s += 2;
 163                   break;
 164                 }
 165             }
 166         }
 167
 168
 169       for (;;)
 170         {
 171           c = *++s;
 172           *++d = c;
 173
 174           if (c == '\n' || c == '\r')
 175             {
 176                   /* Handle DOS line endings.  */
 177               if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
 178                 s++;
 179               if (s == buffer->rlimit)
 180                 break;
 181
 182               /* Escaped?  */
 183               p = d;
 184               while (p != buffer->next_line && is_nvspace (p[-1]))
 185                 p--;
 186               if (p == buffer->next_line || p[-1] != '\\')
 187                 break;
 188
 189               add_line_note (buffer, p - 1, p != d ? ' ': '\\');
 190               d = p - 2;
 191               buffer->next_line = p - 1;
 192             }
 193           else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
 194             {
 195               /* Add a note regardless, for the benefit of -Wtrigraphs.  */
 196               add_line_note (buffer, d, s[2]);
 197               if (CPP_OPTION (pfile, trigraphs))
 198                 {
 199                   *d = _cpp_trigraph_map[s[2]];
 200                   s += 2;
 201                 }
 202             }
 203         }
 204     }
 205   else
 206     {
 207       do
 208         s++;
 209       while (*s != '\n' && *s != '\r');
 210       d = (uchar *) s;
 211
 212       /* Handle DOS line endings.  */
 213       if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
 214         s++;
 215     }
 216
 217  done:
 218   *d = '\n';
 219   /* A sentinel note that should never be processed.  */
 220   add_line_note (buffer, d + 1, '\n');
 221   buffer->next_line = s + 1;
 222 }
 223
 224 /* Return true if the trigraph indicated by NOTE should be warned
 225    about in a comment.  */
 226 static bool
 227 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
 228 {
 229   const uchar *p;
 230
 231   /* Within comments we don't warn about trigraphs, unless the
 232      trigraph forms an escaped newline, as that may change
 233      behavior.  */
 234   if (note->type != '/')
 235     return false;
 236
 237   /* If -trigraphs, then this was an escaped newline iff the next note
 238      is coincident.  */
 239   if (CPP_OPTION (pfile, trigraphs))
 240     return note[1].pos == note->pos;
 241
 242   /* Otherwise, see if this forms an escaped newline.  */
 243   p = note->pos + 3;
 244   while (is_nvspace (*p))
 245     p++;
 246
 247   /* There might have been escaped newlines between the trigraph and the
 248      newline we found.  Hence the position test.  */
 249   return (*p == '\n' && p < note[1].pos);
 250 }
 251
 252 /* Process the notes created by add_line_note as far as the current
 253    location.  */
 254 void
 255 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
 256 {
 257   cpp_buffer *buffer = pfile->buffer;
 258
 259   for (;;)
 260     {
 261       _cpp_line_note *note = &buffer->notes[buffer->cur_note];
 262       unsigned int col;
 263
 264       if (note->pos > buffer->cur)
 265         break;
 266
 267       buffer->cur_note++;
 268       col = CPP_BUF_COLUMN (buffer, note->pos + 1);
 269
 270       if (note->type == '\\' || note->type == ' ')
 271         {
 272           if (note->type == ' ' && !in_comment)
 273             cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
 274                                  "backslash and newline separated by space");
 275
 276           if (buffer->next_line > buffer->rlimit)
 277             {
 278               cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
 279                                    "backslash-newline at end of file");
 280               /* Prevent "no newline at end of file" warning.  */
 281               buffer->next_line = buffer->rlimit;
 282             }
 283
 284           buffer->line_base = note->pos;
 285           CPP_INCREMENT_LINE (pfile, 0);
 286         }
 287       else if (_cpp_trigraph_map[note->type])
 288         {
 289           if (CPP_OPTION (pfile, warn_trigraphs)
 290               && (!in_comment || warn_in_comment (pfile, note)))
 291             {
 292               if (CPP_OPTION (pfile, trigraphs))
 293                 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
 294                                      "trigraph ??%c converted to %c",
 295                                      note->type,
 296                                      (int) _cpp_trigraph_map[note->type]);
 297               else
 298                 {
 299                   cpp_error_with_line
 300                     (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
 301                      "trigraph ??%c ignored, use -trigraphs to enable",
 302                      note->type);
 303                 }
 304             }
 305         }
 306       else
 307         abort ();
 308     }
 309 }
 310
 311 /* Skip a C-style block comment.  We find the end of the comment by
 312    seeing if an asterisk is before every '/' we encounter.  Returns
 313    nonzero if comment terminated by EOF, zero otherwise.
 314
 315    Buffer->cur points to the initial asterisk of the comment.  */
 316 bool
 317 _cpp_skip_block_comment (cpp_reader *pfile)
 318 {
 319   cpp_buffer *buffer = pfile->buffer;
 320   const uchar *cur = buffer->cur;
 321   uchar c;
 322
 323   cur++;
 324   if (*cur == '/')
 325     cur++;
 326
 327   for (;;)
 328     {
 329       /* People like decorating comments with '*', so check for '/'
 330          instead for efficiency.  */
 331       c = *cur++;
 332
 333       if (c == '/')
 334         {
 335           if (cur[-2] == '*')
 336             break;
 337
 338           /* Warn about potential nested comments, but not if the '/'
 339              comes immediately before the true comment delimiter.
 340              Don't bother to get it right across escaped newlines.  */
 341           if (CPP_OPTION (pfile, warn_comments)
 342               && cur[0] == '*' && cur[1] != '/')
 343             {
 344               buffer->cur = cur;
 345               cpp_error_with_line (pfile, CPP_DL_WARNING,
 346                                    pfile->line_table->highest_line, CPP_BUF_COL (buffer),
 347                                    "\"/*\" within comment");
 348             }
 349         }
 350       else if (c == '\n')
 351         {
 352           unsigned int cols;
 353           buffer->cur = cur - 1;
 354           _cpp_process_line_notes (pfile, true);
 355           if (buffer->next_line >= buffer->rlimit)
 356             return true;
 357           _cpp_clean_line (pfile);
 358
 359           cols = buffer->next_line - buffer->line_base;
 360           CPP_INCREMENT_LINE (pfile, cols);
 361
 362           cur = buffer->cur;
 363         }
 364     }
 365
 366   buffer->cur = cur;
 367   _cpp_process_line_notes (pfile, true);
 368   return false;
 369 }
 370
 371 /* Skip a C++ line comment, leaving buffer->cur pointing to the
 372    terminating newline.  Handles escaped newlines.  Returns nonzero
 373    if a multiline comment.  */
 374 static int
 375 skip_line_comment (cpp_reader *pfile)
 376 {
 377   cpp_buffer *buffer = pfile->buffer;
 378   unsigned int orig_line = pfile->line_table->highest_line;
 379
 380   while (*buffer->cur != '\n')
 381     buffer->cur++;
 382
 383   _cpp_process_line_notes (pfile, true);
 384   return orig_line != pfile->line_table->highest_line;
 385 }
 386
 387 /* Skips whitespace, saving the next non-whitespace character.  */
 388 static void
 389 skip_whitespace (cpp_reader *pfile, cppchar_t c)
 390 {
 391   cpp_buffer *buffer = pfile->buffer;
 392   bool saw_NUL = false;
 393
 394   do
 395     {
 396       /* Horizontal space always OK.  */
 397       if (c == ' ' || c == '\t')
 398         ;
 399       /* Just \f \v or \0 left.  */
 400       else if (c == '\0')
 401         saw_NUL = true;
 402       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 403         cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
 404                              CPP_BUF_COL (buffer),
 405                              "%s in preprocessing directive",
 406                              c == '\f' ? "form feed" : "vertical tab");
 407
 408       c = *buffer->cur++;
 409     }
 410   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 411   while (is_nvspace (c));
 412
 413   if (saw_NUL)
 414     cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
 415
 416   buffer->cur--;
 417 }
 418
 419 /* See if the characters of a number token are valid in a name (no
 420    '.', '+' or '-').  */
 421 static int
 422 name_p (cpp_reader *pfile, const cpp_string *string)
 423 {
 424   unsigned int i;
 425
 426   for (i = 0; i < string->len; i++)
 427     if (!is_idchar (string->text[i]))
 428       return 0;
 429
 430   return 1;
 431 }
 432
 433 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
 434    an identifier.  FIRST is TRUE if this starts an identifier.  */
 435 static bool
 436 forms_identifier_p (cpp_reader *pfile, int first)
 437 {
 438   cpp_buffer *buffer = pfile->buffer;
 439
 440   if (*buffer->cur == '$')
 441     {
 442       if (!CPP_OPTION (pfile, dollars_in_ident))
 443         return false;
 444
 445       buffer->cur++;
 446       if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
 447         {
 448           CPP_OPTION (pfile, warn_dollars) = 0;
 449           cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
 450         }
 451
 452       return true;
 453     }
 454
 455   /* Is this a syntactically valid UCN?  */
 456   if (0 && *buffer->cur == '\\'
 457       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
 458     {
 459       buffer->cur += 2;
 460       if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first))
 461         return true;
 462       buffer->cur -= 2;
 463     }
 464
 465   return false;
 466 }
 467
 468 /* Lex an identifier starting at BUFFER->CUR - 1.  */
 469 static cpp_hashnode *
 470 lex_identifier (cpp_reader *pfile, const uchar *base)
 471 {
 472   cpp_hashnode *result;
 473   const uchar *cur, *limit;
 474   unsigned int len;
 475   unsigned int hash = HT_HASHSTEP (0, *base);
 476
 477   cur = pfile->buffer->cur;
 478   for (;;)
 479     {
 480       /* N.B. ISIDNUM does not include $.  */
 481       while (ISIDNUM (*cur))
 482         {
 483           hash = HT_HASHSTEP (hash, *cur);
 484           cur++;
 485         }
 486
 487       pfile->buffer->cur = cur;
 488       if (!forms_identifier_p (pfile, false))
 489         break;
 490
 491       limit = pfile->buffer->cur;
 492       while (cur < limit)
 493         {
 494           hash = HT_HASHSTEP (hash, *cur);
 495           cur++;
 496         }
 497     }
 498   len = cur - base;
 499   hash = HT_HASHFINISH (hash, len);
 500
 501   result = (cpp_hashnode *)
 502     ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC);
 503
 504   /* Rarely, identifiers require diagnostics when lexed.  */
 505   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 506                         && !pfile->state.skipping, 0))
 507     {
 508       /* It is allowed to poison the same identifier twice.  */
 509       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 510         cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
 511                    NODE_NAME (result));
 512
 513       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 514          replacement list of a variadic macro.  */
 515       if (result == pfile->spec_nodes.n__VA_ARGS__
 516           && !pfile->state.va_args_ok)
 517         cpp_error (pfile, CPP_DL_PEDWARN,
 518                    "__VA_ARGS__ can only appear in the expansion"
 519                    " of a C99 variadic macro");
 520     }
 521
 522   return result;
 523 }
 524
 525 /* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
 526 static void
 527 lex_number (cpp_reader *pfile, cpp_string *number)
 528 {
 529   const uchar *cur;
 530   const uchar *base;
 531   uchar *dest;
 532
 533   base = pfile->buffer->cur - 1;
 534   do
 535     {
 536       cur = pfile->buffer->cur;
 537
 538       /* N.B. ISIDNUM does not include $.  */
 539       while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
 540         cur++;
 541
 542       pfile->buffer->cur = cur;
 543     }
 544   while (forms_identifier_p (pfile, false));
 545
 546   number->len = cur - base;
 547   dest = _cpp_unaligned_alloc (pfile, number->len + 1);
 548   memcpy (dest, base, number->len);
 549   dest[number->len] = '\0';
 550   number->text = dest;
 551 }
 552
 553 /* Create a token of type TYPE with a literal spelling.  */
 554 static void
 555 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
 556                 unsigned int len, enum cpp_ttype type)
 557 {
 558   uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
 559
 560   memcpy (dest, base, len);
 561   dest[len] = '\0';
 562   token->type = type;
 563   token->val.str.len = len;
 564   token->val.str.text = dest;
 565 }
 566
 567 /* Lexes a string, character constant, or angle-bracketed header file
 568    name.  The stored string contains the spelling, including opening
 569    quote and leading any leading 'L'.  It returns the type of the
 570    literal, or CPP_OTHER if it was not properly terminated.
 571
 572    The spelling is NUL-terminated, but it is not guaranteed that this
 573    is the first NUL since embedded NULs are preserved.  */
 574 static void
 575 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
 576 {
 577   bool saw_NUL = false;
 578   const uchar *cur;
 579   cppchar_t terminator;
 580   enum cpp_ttype type;
 581
 582   cur = base;
 583   terminator = *cur++;
 584   if (terminator == 'L')
 585     terminator = *cur++;
 586   if (terminator == '\"')
 587     type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
 588   else if (terminator == '\'')
 589     type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
 590   else
 591     terminator = '>', type = CPP_HEADER_NAME;
 592
 593   for (;;)
 594     {
 595       cppchar_t c = *cur++;
 596
 597       /* In #include-style directives, terminators are not escapable.  */
 598       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
 599         cur++;
 600       else if (c == terminator)
 601         break;
 602       else if (c == '\n')
 603         {
 604           cur--;
 605           type = CPP_OTHER;
 606           break;
 607         }
 608       else if (c == '\0')
 609         saw_NUL = true;
 610     }
 611
 612   if (saw_NUL && !pfile->state.skipping)
 613     cpp_error (pfile, CPP_DL_WARNING,
 614                "null character(s) preserved in literal");
 615
 616   pfile->buffer->cur = cur;
 617   create_literal (pfile, token, base, cur - base, type);
 618 }
 619
 620 /* The stored comment includes the comment start and any terminator.  */
 621 static void
 622 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
 623               cppchar_t type)
 624 {
 625   unsigned char *buffer;
 626   unsigned int len, clen;
 627
 628   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 629
 630   /* C++ comments probably (not definitely) have moved past a new
 631      line, which we don't want to save in the comment.  */
 632   if (is_vspace (pfile->buffer->cur[-1]))
 633     len--;
 634
 635   /* If we are currently in a directive, then we need to store all
 636      C++ comments as C comments internally, and so we need to
 637      allocate a little extra space in that case.
 638
 639      Note that the only time we encounter a directive here is
 640      when we are saving comments in a "#define".  */
 641   clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
 642
 643   buffer = _cpp_unaligned_alloc (pfile, clen);
 644
 645   token->type = CPP_COMMENT;
 646   token->val.str.len = clen;
 647   token->val.str.text = buffer;
 648
 649   buffer[0] = '/';
 650   memcpy (buffer + 1, from, len - 1);
 651
 652   /* Finish conversion to a C comment, if necessary.  */
 653   if (pfile->state.in_directive && type == '/')
 654     {
 655       buffer[1] = '*';
 656       buffer[clen - 2] = '*';
 657       buffer[clen - 1] = '/';
 658     }
 659 }
 660
 661 /* Allocate COUNT tokens for RUN.  */
 662 void
 663 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
 664 {
 665   run->base = XNEWVEC (cpp_token, count);
 666   run->limit = run->base + count;
 667   run->next = NULL;
 668 }
 669
 670 /* Returns the next tokenrun, or creates one if there is none.  */
 671 static tokenrun *
 672 next_tokenrun (tokenrun *run)
 673 {
 674   if (run->next == NULL)
 675     {
 676       run->next = XNEW (tokenrun);
 677       run->next->prev = run;
 678       _cpp_init_tokenrun (run->next, 250);
 679     }
 680
 681   return run->next;
 682 }
 683
 684 /* Allocate a single token that is invalidated at the same time as the
 685    rest of the tokens on the line.  Has its line and col set to the
 686    same as the last lexed token, so that diagnostics appear in the
 687    right place.  */
 688 cpp_token *
 689 _cpp_temp_token (cpp_reader *pfile)
 690 {
 691   cpp_token *old, *result;
 692
 693   old = pfile->cur_token - 1;
 694   if (pfile->cur_token == pfile->cur_run->limit)
 695     {
 696       pfile->cur_run = next_tokenrun (pfile->cur_run);
 697       pfile->cur_token = pfile->cur_run->base;
 698     }
 699
 700   result = pfile->cur_token++;
 701   result->src_loc = old->src_loc;
 702   return result;
 703 }
 704
 705 /* Lex a token into RESULT (external interface).  Takes care of issues
 706    like directive handling, token lookahead, multiple include
 707    optimization and skipping.  */
 708 const cpp_token *
 709 _cpp_lex_token (cpp_reader *pfile)
 710 {
 711   cpp_token *result;
 712
 713   for (;;)
 714     {
 715       if (pfile->cur_token == pfile->cur_run->limit)
 716         {
 717           pfile->cur_run = next_tokenrun (pfile->cur_run);
 718           pfile->cur_token = pfile->cur_run->base;
 719         }
 720
 721       if (pfile->lookaheads)
 722         {
 723           pfile->lookaheads--;
 724           result = pfile->cur_token++;
 725         }
 726       else
 727         result = _cpp_lex_direct (pfile);
 728
 729       if (result->flags & BOL)
 730         {
 731           /* Is this a directive.  If _cpp_handle_directive returns
 732              false, it is an assembler #.  */
 733           if (result->type == CPP_HASH
 734               /* 6.10.3 p 11: Directives in a list of macro arguments
 735                  gives undefined behavior.  This implementation
 736                  handles the directive as normal.  */
 737               && pfile->state.parsing_args != 1
 738               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
 739             continue;
 740           if (pfile->cb.line_change && !pfile->state.skipping)
 741             pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
 742         }
 743
 744       /* We don't skip tokens in directives.  */
 745       if (pfile->state.in_directive)
 746         break;
 747
 748       /* Outside a directive, invalidate controlling macros.  At file
 749          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
 750          get here and MI optimization works.  */
 751       pfile->mi_valid = false;
 752
 753       if (!pfile->state.skipping || result->type == CPP_EOF)
 754         break;
 755     }
 756
 757   return result;
 758 }
 759
 760 /* Returns true if a fresh line has been loaded.  */
 761 bool
 762 _cpp_get_fresh_line (cpp_reader *pfile)
 763 {
 764   int return_at_eof;
 765
 766   /* We can't get a new line until we leave the current directive.  */
 767   if (pfile->state.in_directive)
 768     return false;
 769
 770   for (;;)
 771     {
 772       cpp_buffer *buffer = pfile->buffer;
 773
 774       if (!buffer->need_line)
 775         return true;
 776
 777       if (buffer->next_line < buffer->rlimit)
 778         {
 779           _cpp_clean_line (pfile);
 780           return true;
 781         }
 782
 783       /* First, get out of parsing arguments state.  */
 784       if (pfile->state.parsing_args)
 785         return false;
 786
 787       /* End of buffer.  Non-empty files should end in a newline.  */
 788       if (buffer->buf != buffer->rlimit
 789           && buffer->next_line > buffer->rlimit
 790           && !buffer->from_stage3)
 791         {
 792           /* Only warn once.  */
 793           buffer->next_line = buffer->rlimit;
 794           cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
 795                                CPP_BUF_COLUMN (buffer, buffer->cur),
 796                                "no newline at end of file");
 797         }
 798
 799       return_at_eof = buffer->return_at_eof;
 800       _cpp_pop_buffer (pfile);
 801       if (pfile->buffer == NULL || return_at_eof)
 802         return false;
 803     }
 804 }
 805
 806 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)          \
 807   do                                                    \
 808     {                                                   \
 809       result->type = ELSE_TYPE;                         \
 810       if (*buffer->cur == CHAR)                         \
 811         buffer->cur++, result->type = THEN_TYPE;        \
 812     }                                                   \
 813   while (0)
 814
 815 /* Lex a token into pfile->cur_token, which is also incremented, to
 816    get diagnostics pointing to the correct location.
 817
 818    Does not handle issues such as token lookahead, multiple-include
 819    optimization, directives, skipping etc.  This function is only
 820    suitable for use by _cpp_lex_token, and in special cases like
 821    lex_expansion_token which doesn't care for any of these issues.
 822
 823    When meeting a newline, returns CPP_EOF if parsing a directive,
 824    otherwise returns to the start of the token buffer if permissible.
 825    Returns the location of the lexed token.  */
 826 cpp_token *
 827 _cpp_lex_direct (cpp_reader *pfile)
 828 {
 829   cppchar_t c;
 830   cpp_buffer *buffer;
 831   const unsigned char *comment_start;
 832   cpp_token *result = pfile->cur_token++;
 833
 834  fresh_line:
 835   result->flags = 0;
 836   buffer = pfile->buffer;
 837   if (buffer->need_line)
 838     {
 839       if (!_cpp_get_fresh_line (pfile))
 840         {
 841           result->type = CPP_EOF;
 842           if (!pfile->state.in_directive)
 843             {
 844               /* Tell the compiler the line number of the EOF token.  */
 845               result->src_loc = pfile->line_table->highest_line;
 846               result->flags = BOL;
 847             }
 848           return result;
 849         }
 850       if (!pfile->keep_tokens)
 851         {
 852           pfile->cur_run = &pfile->base_run;
 853           result = pfile->base_run.base;
 854           pfile->cur_token = result + 1;
 855         }
 856       result->flags = BOL;
 857       if (pfile->state.parsing_args == 2)
 858         result->flags |= PREV_WHITE;
 859     }
 860   buffer = pfile->buffer;
 861  update_tokens_line:
 862   result->src_loc = pfile->line_table->highest_line;
 863
 864  skipped_white:
 865   if (buffer->cur >= buffer->notes[buffer->cur_note].pos
 866       && !pfile->overlaid_buffer)
 867     {
 868       _cpp_process_line_notes (pfile, false);
 869       result->src_loc = pfile->line_table->highest_line;
 870     }
 871   c = *buffer->cur++;
 872
 873   LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
 874                                CPP_BUF_COLUMN (buffer, buffer->cur));
 875
 876   switch (c)
 877     {
 878     case ' ': case '\t': case '\f': case '\v': case '\0':
 879       result->flags |= PREV_WHITE;
 880       skip_whitespace (pfile, c);
 881       goto skipped_white;
 882
 883     case '\n':
 884       if (buffer->cur < buffer->rlimit)
 885         CPP_INCREMENT_LINE (pfile, 0);
 886       buffer->need_line = true;
 887       goto fresh_line;
 888
 889     case '0': case '1': case '2': case '3': case '4':
 890     case '5': case '6': case '7': case '8': case '9':
 891       result->type = CPP_NUMBER;
 892       lex_number (pfile, &result->val.str);
 893       break;
 894
 895     case 'L':
 896       /* 'L' may introduce wide characters or strings.  */
 897       if (*buffer->cur == '\'' || *buffer->cur == '"')
 898         {
 899           lex_string (pfile, result, buffer->cur - 1);
 900           break;
 901         }
 902       /* Fall through.  */
 903
 904     case '_':
 905     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 906     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 907     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 908     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 909     case 'y': case 'z':
 910     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 911     case 'G': case 'H': case 'I': case 'J': case 'K':
 912     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 913     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 914     case 'Y': case 'Z':
 915       result->type = CPP_NAME;
 916       result->val.node = lex_identifier (pfile, buffer->cur - 1);
 917
 918       /* Convert named operators to their proper types.  */
 919       if (result->val.node->flags & NODE_OPERATOR)
 920         {
 921           result->flags |= NAMED_OP;
 922           result->type = result->val.node->directive_index;
 923         }
 924       break;
 925
 926     case '\'':
 927     case '"':
 928       lex_string (pfile, result, buffer->cur - 1);
 929       break;
 930
 931     case '/':
 932       /* A potential block or line comment.  */
 933       comment_start = buffer->cur;
 934       c = *buffer->cur;
 935
 936       if (c == '*')
 937         {
 938           if (_cpp_skip_block_comment (pfile))
 939             cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
 940         }
 941       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
 942                             || cpp_in_system_header (pfile)))
 943         {
 944           /* Warn about comments only if pedantically GNUC89, and not
 945              in system headers.  */
 946           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
 947               && ! buffer->warned_cplusplus_comments)
 948             {
 949               cpp_error (pfile, CPP_DL_PEDWARN,
 950                          "C++ style comments are not allowed in ISO C90");
 951               cpp_error (pfile, CPP_DL_PEDWARN,
 952                          "(this will be reported only once per input file)");
 953               buffer->warned_cplusplus_comments = 1;
 954             }
 955
 956           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
 957             cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
 958         }
 959       else if (c == '=')
 960         {
 961           buffer->cur++;
 962           result->type = CPP_DIV_EQ;
 963           break;
 964         }
 965       else
 966         {
 967           result->type = CPP_DIV;
 968           break;
 969         }
 970
 971       if (!pfile->state.save_comments)
 972         {
 973           result->flags |= PREV_WHITE;
 974           goto update_tokens_line;
 975         }
 976
 977       /* Save the comment as a token in its own right.  */
 978       save_comment (pfile, result, comment_start, c);
 979       break;
 980
 981     case '<':
 982       if (pfile->state.angled_headers)
 983         {
 984           lex_string (pfile, result, buffer->cur - 1);
 985           break;
 986         }
 987
 988       result->type = CPP_LESS;
 989       if (*buffer->cur == '=')
 990         buffer->cur++, result->type = CPP_LESS_EQ;
 991       else if (*buffer->cur == '<')
 992         {
 993           buffer->cur++;
 994           IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
 995         }
 996       else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
 997         {
 998           buffer->cur++;
 999           IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1000         }
1001       else if (CPP_OPTION (pfile, digraphs))
1002         {
1003           if (*buffer->cur == ':')
1004             {
1005               buffer->cur++;
1006               result->flags |= DIGRAPH;
1007               result->type = CPP_OPEN_SQUARE;
1008             }
1009           else if (*buffer->cur == '%')
1010             {
1011               buffer->cur++;
1012               result->flags |= DIGRAPH;
1013               result->type = CPP_OPEN_BRACE;
1014             }
1015         }
1016       break;
1017
1018     case '>':
1019       result->type = CPP_GREATER;
1020       if (*buffer->cur == '=')
1021         buffer->cur++, result->type = CPP_GREATER_EQ;
1022       else if (*buffer->cur == '>')
1023         {
1024           buffer->cur++;
1025           IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1026         }
1027       else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
1028         {
1029           buffer->cur++;
1030           IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1031         }
1032       break;
1033
1034     case '%':
1035       result->type = CPP_MOD;
1036       if (*buffer->cur == '=')
1037         buffer->cur++, result->type = CPP_MOD_EQ;
1038       else if (CPP_OPTION (pfile, digraphs))
1039         {
1040           if (*buffer->cur == ':')
1041             {
1042               buffer->cur++;
1043               result->flags |= DIGRAPH;
1044               result->type = CPP_HASH;
1045               if (*buffer->cur == '%' && buffer->cur[1] == ':')
1046                 buffer->cur += 2, result->type = CPP_PASTE;
1047             }
1048           else if (*buffer->cur == '>')
1049             {
1050               buffer->cur++;
1051               result->flags |= DIGRAPH;
1052               result->type = CPP_CLOSE_BRACE;
1053             }
1054         }
1055       break;
1056
1057     case '.':
1058       result->type = CPP_DOT;
1059       if (ISDIGIT (*buffer->cur))
1060         {
1061           result->type = CPP_NUMBER;
1062           lex_number (pfile, &result->val.str);
1063         }
1064       else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1065         buffer->cur += 2, result->type = CPP_ELLIPSIS;
1066       else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1067         buffer->cur++, result->type = CPP_DOT_STAR;
1068       break;
1069
1070     case '+':
1071       result->type = CPP_PLUS;
1072       if (*buffer->cur == '+')
1073         buffer->cur++, result->type = CPP_PLUS_PLUS;
1074       else if (*buffer->cur == '=')
1075         buffer->cur++, result->type = CPP_PLUS_EQ;
1076       break;
1077
1078     case '-':
1079       result->type = CPP_MINUS;
1080       if (*buffer->cur == '>')
1081         {
1082           buffer->cur++;
1083           result->type = CPP_DEREF;
1084           if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1085             buffer->cur++, result->type = CPP_DEREF_STAR;
1086         }
1087       else if (*buffer->cur == '-')
1088         buffer->cur++, result->type = CPP_MINUS_MINUS;
1089       else if (*buffer->cur == '=')
1090         buffer->cur++, result->type = CPP_MINUS_EQ;
1091       break;
1092
1093     case '&':
1094       result->type = CPP_AND;
1095       if (*buffer->cur == '&')
1096         buffer->cur++, result->type = CPP_AND_AND;
1097       else if (*buffer->cur == '=')
1098         buffer->cur++, result->type = CPP_AND_EQ;
1099       break;
1100
1101     case '|':
1102       result->type = CPP_OR;
1103       if (*buffer->cur == '|')
1104         buffer->cur++, result->type = CPP_OR_OR;
1105       else if (*buffer->cur == '=')
1106         buffer->cur++, result->type = CPP_OR_EQ;
1107       break;
1108
1109     case ':':
1110       result->type = CPP_COLON;
1111       if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1112         buffer->cur++, result->type = CPP_SCOPE;
1113       else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1114         {
1115           buffer->cur++;
1116           result->flags |= DIGRAPH;
1117           result->type = CPP_CLOSE_SQUARE;
1118         }
1119       break;
1120
1121     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1122     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1123     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1124     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1125     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1126
1127     case '?': result->type = CPP_QUERY; break;
1128     case '~': result->type = CPP_COMPL; break;
1129     case ',': result->type = CPP_COMMA; break;
1130     case '(': result->type = CPP_OPEN_PAREN; break;
1131     case ')': result->type = CPP_CLOSE_PAREN; break;
1132     case '[': result->type = CPP_OPEN_SQUARE; break;
1133     case ']': result->type = CPP_CLOSE_SQUARE; break;
1134     case '{': result->type = CPP_OPEN_BRACE; break;
1135     case '}': result->type = CPP_CLOSE_BRACE; break;
1136     case ';': result->type = CPP_SEMICOLON; break;
1137
1138       /* @ is a punctuator in Objective-C.  */
1139     case '@': result->type = CPP_ATSIGN; break;
1140
1141     case '$':
1142     case '\\':
1143       {
1144         const uchar *base = --buffer->cur;
1145
1146         if (forms_identifier_p (pfile, true))
1147           {
1148             result->type = CPP_NAME;
1149             result->val.node = lex_identifier (pfile, base);
1150             break;
1151           }
1152         buffer->cur++;
1153       }
1154
1155     default:
1156       create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1157       break;
1158     }
1159
1160   return result;
1161 }
1162
1163 /* An upper bound on the number of bytes needed to spell TOKEN.
1164    Does not include preceding whitespace.  */
1165 unsigned int
1166 cpp_token_len (const cpp_token *token)
1167 {
1168   unsigned int len;
1169
1170   switch (TOKEN_SPELL (token))
1171     {
1172     default:            len = 4;                                break;
1173     case SPELL_LITERAL: len = token->val.str.len;               break;
1174     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1175     }
1176
1177   return len;
1178 }
1179
1180 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1181    already contain the enough space to hold the token's spelling.
1182    Returns a pointer to the character after the last character written.
1183    FIXME: Would be nice if we didn't need the PFILE argument.  */
1184 unsigned char *
1185 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1186                  unsigned char *buffer)
1187 {
1188   switch (TOKEN_SPELL (token))
1189     {
1190     case SPELL_OPERATOR:
1191       {
1192         const unsigned char *spelling;
1193         unsigned char c;
1194
1195         if (token->flags & DIGRAPH)
1196           spelling
1197             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1198         else if (token->flags & NAMED_OP)
1199           goto spell_ident;
1200         else
1201           spelling = TOKEN_NAME (token);
1202
1203         while ((c = *spelling++) != '\0')
1204           *buffer++ = c;
1205       }
1206       break;
1207
1208     spell_ident:
1209     case SPELL_IDENT:
1210       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1211       buffer += NODE_LEN (token->val.node);
1212       break;
1213
1214     case SPELL_LITERAL:
1215       memcpy (buffer, token->val.str.text, token->val.str.len);
1216       buffer += token->val.str.len;
1217       break;
1218
1219     case SPELL_NONE:
1220       cpp_error (pfile, CPP_DL_ICE,
1221                  "unspellable token %s", TOKEN_NAME (token));
1222       break;
1223     }
1224
1225   return buffer;
1226 }
1227
1228 /* Returns TOKEN spelt as a null-terminated string.  The string is
1229    freed when the reader is destroyed.  Useful for diagnostics.  */
1230 unsigned char *
1231 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1232 {
1233   unsigned int len = cpp_token_len (token) + 1;
1234   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1235
1236   end = cpp_spell_token (pfile, token, start);
1237   end[0] = '\0';
1238
1239   return start;
1240 }
1241
1242 /* Used by C front ends, which really should move to using
1243    cpp_token_as_text.  */
1244 const char *
1245 cpp_type2name (enum cpp_ttype type)
1246 {
1247   return (const char *) token_spellings[type].name;
1248 }
1249
1250 /* Writes the spelling of token to FP, without any preceding space.
1251    Separated from cpp_spell_token for efficiency - to avoid stdio
1252    double-buffering.  */
1253 void
1254 cpp_output_token (const cpp_token *token, FILE *fp)
1255 {
1256   switch (TOKEN_SPELL (token))
1257     {
1258     case SPELL_OPERATOR:
1259       {
1260         const unsigned char *spelling;
1261         int c;
1262
1263         if (token->flags & DIGRAPH)
1264           spelling
1265             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1266         else if (token->flags & NAMED_OP)
1267           goto spell_ident;
1268         else
1269           spelling = TOKEN_NAME (token);
1270
1271         c = *spelling;
1272         do
1273           putc (c, fp);
1274         while ((c = *++spelling) != '\0');
1275       }
1276       break;
1277
1278     spell_ident:
1279     case SPELL_IDENT:
1280       fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1281     break;
1282
1283     case SPELL_LITERAL:
1284       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1285       break;
1286
1287     case SPELL_NONE:
1288       /* An error, most probably.  */
1289       break;
1290     }
1291 }
1292
1293 /* Compare two tokens.  */
1294 int
1295 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1296 {
1297   if (a->type == b->type && a->flags == b->flags)
1298     switch (TOKEN_SPELL (a))
1299       {
1300       default:                  /* Keep compiler happy.  */
1301       case SPELL_OPERATOR:
1302         return 1;
1303       case SPELL_NONE:
1304         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1305       case SPELL_IDENT:
1306         return a->val.node == b->val.node;
1307       case SPELL_LITERAL:
1308         return (a->val.str.len == b->val.str.len
1309                 && !memcmp (a->val.str.text, b->val.str.text,
1310                             a->val.str.len));
1311       }
1312
1313   return 0;
1314 }
1315
1316 /* Returns nonzero if a space should be inserted to avoid an
1317    accidental token paste for output.  For simplicity, it is
1318    conservative, and occasionally advises a space where one is not
1319    needed, e.g. "." and ".2".  */
1320 int
1321 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1322                  const cpp_token *token2)
1323 {
1324   enum cpp_ttype a = token1->type, b = token2->type;
1325   cppchar_t c;
1326
1327   if (token1->flags & NAMED_OP)
1328     a = CPP_NAME;
1329   if (token2->flags & NAMED_OP)
1330     b = CPP_NAME;
1331
1332   c = EOF;
1333   if (token2->flags & DIGRAPH)
1334     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1335   else if (token_spellings[b].category == SPELL_OPERATOR)
1336     c = token_spellings[b].name[0];
1337
1338   /* Quickly get everything that can paste with an '='.  */
1339   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1340     return 1;
1341
1342   switch (a)
1343     {
1344     case CPP_GREATER:   return c == '>' || c == '?';
1345     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1346     case CPP_PLUS:      return c == '+';
1347     case CPP_MINUS:     return c == '-' || c == '>';
1348     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1349     case CPP_MOD:       return c == ':' || c == '>';
1350     case CPP_AND:       return c == '&';
1351     case CPP_OR:        return c == '|';
1352     case CPP_COLON:     return c == ':' || c == '>';
1353     case CPP_DEREF:     return c == '*';
1354     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1355     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1356     case CPP_NAME:      return ((b == CPP_NUMBER
1357                                  && name_p (pfile, &token2->val.str))
1358                                 || b == CPP_NAME
1359                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1360     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1361                                 || c == '.' || c == '+' || c == '-');
1362                                       /* UCNs */
1363     case CPP_OTHER:     return ((token1->val.str.text[0] == '\\'
1364                                  && b == CPP_NAME)
1365                                 || (CPP_OPTION (pfile, objc)
1366                                     && token1->val.str.text[0] == '@'
1367                                     && (b == CPP_NAME || b == CPP_STRING)));
1368     default:            break;
1369     }
1370
1371   return 0;
1372 }
1373
1374 /* Output all the remaining tokens on the current line, and a newline
1375    character, to FP.  Leading whitespace is removed.  If there are
1376    macros, special token padding is not performed.  */
1377 void
1378 cpp_output_line (cpp_reader *pfile, FILE *fp)
1379 {
1380   const cpp_token *token;
1381
1382   token = cpp_get_token (pfile);
1383   while (token->type != CPP_EOF)
1384     {
1385       cpp_output_token (token, fp);
1386       token = cpp_get_token (pfile);
1387       if (token->flags & PREV_WHITE)
1388         putc (' ', fp);
1389     }
1390
1391   putc ('\n', fp);
1392 }
1393
1394 /* Memory buffers.  Changing these three constants can have a dramatic
1395    effect on performance.  The values here are reasonable defaults,
1396    but might be tuned.  If you adjust them, be sure to test across a
1397    range of uses of cpplib, including heavy nested function-like macro
1398    expansion.  Also check the change in peak memory usage (NJAMD is a
1399    good tool for this).  */
1400 #define MIN_BUFF_SIZE 8000
1401 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1402 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1403         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1404
1405 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1406   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1407 #endif
1408
1409 /* Create a new allocation buffer.  Place the control block at the end
1410    of the buffer, so that buffer overflows will cause immediate chaos.  */
1411 static _cpp_buff *
1412 new_buff (size_t len)
1413 {
1414   _cpp_buff *result;
1415   unsigned char *base;
1416
1417   if (len < MIN_BUFF_SIZE)
1418     len = MIN_BUFF_SIZE;
1419   len = CPP_ALIGN (len);
1420
1421   base = xmalloc (len + sizeof (_cpp_buff));
1422   result = (_cpp_buff *) (base + len);
1423   result->base = base;
1424   result->cur = base;
1425   result->limit = base + len;
1426   result->next = NULL;
1427   return result;
1428 }
1429
1430 /* Place a chain of unwanted allocation buffers on the free list.  */
1431 void
1432 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1433 {
1434   _cpp_buff *end = buff;
1435
1436   while (end->next)
1437     end = end->next;
1438   end->next = pfile->free_buffs;
1439   pfile->free_buffs = buff;
1440 }
1441
1442 /* Return a free buffer of size at least MIN_SIZE.  */
1443 _cpp_buff *
1444 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1445 {
1446   _cpp_buff *result, **p;
1447
1448   for (p = &pfile->free_buffs;; p = &(*p)->next)
1449     {
1450       size_t size;
1451
1452       if (*p == NULL)
1453         return new_buff (min_size);
1454       result = *p;
1455       size = result->limit - result->base;
1456       /* Return a buffer that's big enough, but don't waste one that's
1457          way too big.  */
1458       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1459         break;
1460     }
1461
1462   *p = result->next;
1463   result->next = NULL;
1464   result->cur = result->base;
1465   return result;
1466 }
1467
1468 /* Creates a new buffer with enough space to hold the uncommitted
1469    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
1470    the excess bytes to the new buffer.  Chains the new buffer after
1471    BUFF, and returns the new buffer.  */
1472 _cpp_buff *
1473 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1474 {
1475   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1476   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1477
1478   buff->next = new_buff;
1479   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1480   return new_buff;
1481 }
1482
1483 /* Creates a new buffer with enough space to hold the uncommitted
1484    remaining bytes of the buffer pointed to by BUFF, and at least
1485    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
1486    Chains the new buffer before the buffer pointed to by BUFF, and
1487    updates the pointer to point to the new buffer.  */
1488 void
1489 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1490 {
1491   _cpp_buff *new_buff, *old_buff = *pbuff;
1492   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1493
1494   new_buff = _cpp_get_buff (pfile, size);
1495   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1496   new_buff->next = old_buff;
1497   *pbuff = new_buff;
1498 }
1499
1500 /* Free a chain of buffers starting at BUFF.  */
1501 void
1502 _cpp_free_buff (_cpp_buff *buff)
1503 {
1504   _cpp_buff *next;
1505
1506   for (; buff; buff = next)
1507     {
1508       next = buff->next;
1509       free (buff->base);
1510     }
1511 }
1512
1513 /* Allocate permanent, unaligned storage of length LEN.  */
1514 unsigned char *
1515 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1516 {
1517   _cpp_buff *buff = pfile->u_buff;
1518   unsigned char *result = buff->cur;
1519
1520   if (len > (size_t) (buff->limit - result))
1521     {
1522       buff = _cpp_get_buff (pfile, len);
1523       buff->next = pfile->u_buff;
1524       pfile->u_buff = buff;
1525       result = buff->cur;
1526     }
1527
1528   buff->cur = result + len;
1529   return result;
1530 }
1531
1532 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1533    That buffer is used for growing allocations when saving macro
1534    replacement lists in a #define, and when parsing an answer to an
1535    assertion in #assert, #unassert or #if (and therefore possibly
1536    whilst expanding macros).  It therefore must not be used by any
1537    code that they might call: specifically the lexer and the guts of
1538    the macro expander.
1539
1540    All existing other uses clearly fit this restriction: storing
1541    registered pragmas during initialization.  */
1542 unsigned char *
1543 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1544 {
1545   _cpp_buff *buff = pfile->a_buff;
1546   unsigned char *result = buff->cur;
1547
1548   if (len > (size_t) (buff->limit - result))
1549     {
1550       buff = _cpp_get_buff (pfile, len);
1551       buff->next = pfile->a_buff;
1552       pfile->a_buff = buff;
1553       result = buff->cur;
1554     }
1555
1556   buff->cur = result + len;
1557   return result;
1558 }
1559
1560 /* Say which field of TOK is in use.  */
1561
1562 enum cpp_token_fld_kind
1563 cpp_token_val_index (cpp_token *tok)
1564 {
1565   switch (TOKEN_SPELL (tok))
1566     {
1567     case SPELL_IDENT:
1568       return CPP_TOKEN_FLD_NODE;
1569     case SPELL_LITERAL:
1570       return CPP_TOKEN_FLD_STR;
1571     case SPELL_NONE:
1572       if (tok->type == CPP_MACRO_ARG)
1573         return CPP_TOKEN_FLD_ARG_NO;
1574       else if (tok->type == CPP_PADDING)
1575         return CPP_TOKEN_FLD_SOURCE;
1576       /* else fall through */
1577     default:
1578       return CPP_TOKEN_FLD_NONE;
1579     }
1580 }