libcpp/lex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7
   8 This program is free software; you can redistribute it and/or modify it
   9 under the terms of the GNU General Public License as published by the
  10 Free Software Foundation; either version 2, or (at your option) any
  11 later version.
  12
  13 This program is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with this program; if not, write to the Free Software
  20 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "cpplib.h"
  25 #include "internal.h"
  26
  27 enum spell_type
  28 {
  29   SPELL_OPERATOR = 0,
  30   SPELL_IDENT,
  31   SPELL_LITERAL,
  32   SPELL_NONE
  33 };
  34
  35 struct token_spelling
  36 {
  37   enum spell_type category;
  38   const unsigned char *name;
  39 };
  40
  41 static const unsigned char *const digraph_spellings[] =
  42 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
  43
  44 #define OP(e, s) { SPELL_OPERATOR, U s  },
  45 #define TK(e, s) { SPELL_ ## s,    U #e },
  46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  47 #undef OP
  48 #undef TK
  49
  50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  52
  53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
  54 static int skip_line_comment (cpp_reader *);
  55 static void skip_whitespace (cpp_reader *, cppchar_t);
  56 static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *, bool);
  57 static void lex_number (cpp_reader *, cpp_string *);
  58 static bool forms_identifier_p (cpp_reader *, int);
  59 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
  60 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
  61 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
  62                             unsigned int, enum cpp_ttype);
  63 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
  64 static int name_p (cpp_reader *, const cpp_string *);
  65 static tokenrun *next_tokenrun (tokenrun *);
  66
  67 static _cpp_buff *new_buff (size_t);
  68
  69
  70 /* Utility routine:
  71
  72    Compares, the token TOKEN to the NUL-terminated string STRING.
  73    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  74 int
  75 cpp_ideq (const cpp_token *token, const char *string)
  76 {
  77   if (token->type != CPP_NAME)
  78     return 0;
  79
  80   return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
  81 }
  82
  83 /* Record a note TYPE at byte POS into the current cleaned logical
  84    line.  */
  85 static void
  86 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
  87 {
  88   if (buffer->notes_used == buffer->notes_cap)
  89     {
  90       buffer->notes_cap = buffer->notes_cap * 2 + 200;
  91       buffer->notes = xrealloc (buffer->notes,
  92                                 buffer->notes_cap * sizeof (_cpp_line_note));
  93     }
  94
  95   buffer->notes[buffer->notes_used].pos = pos;
  96   buffer->notes[buffer->notes_used].type = type;
  97   buffer->notes_used++;
  98 }
  99
 100 /* Returns with a logical line that contains no escaped newlines or
 101    trigraphs.  This is a time-critical inner loop.  */
 102 void
 103 _cpp_clean_line (cpp_reader *pfile)
 104 {
 105   cpp_buffer *buffer;
 106   const uchar *s;
 107   uchar c, *d, *p;
 108
 109   buffer = pfile->buffer;
 110   buffer->cur_note = buffer->notes_used = 0;
 111   buffer->cur = buffer->line_base = buffer->next_line;
 112   buffer->need_line = false;
 113   s = buffer->next_line - 1;
 114
 115   if (!buffer->from_stage3)
 116     {
 117       /* Short circuit for the common case of an un-escaped line with
 118          no trigraphs.  The primary win here is by not writing any
 119          data back to memory until we have to.  */
 120       for (;;)
 121         {
 122           c = *++s;
 123           if (c == '\n' || c == '\r')
 124             {
 125               d = (uchar *) s;
 126
 127               if (s == buffer->rlimit)
 128                 goto done;
 129
 130               /* DOS line ending? */
 131               if (c == '\r' && s[1] == '\n')
 132                 s++;
 133
 134               if (s == buffer->rlimit)
 135                 goto done;
 136
 137               /* check for escaped newline */
 138               p = d;
 139               while (p != buffer->next_line && is_nvspace (p[-1]))
 140                 p--;
 141               if (p == buffer->next_line || p[-1] != '\\')
 142                 goto done;
 143
 144               /* Have an escaped newline; process it and proceed to
 145                  the slow path.  */
 146               add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
 147               d = p - 2;
 148               buffer->next_line = p - 1;
 149               break;
 150             }
 151           if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
 152             {
 153               /* Have a trigraph.  We may or may not have to convert
 154                  it.  Add a line note regardless, for -Wtrigraphs.  */
 155               add_line_note (buffer, s, s[2]);
 156               if (CPP_OPTION (pfile, trigraphs))
 157                 {
 158                   /* We do, and that means we have to switch to the
 159                      slow path.  */
 160                   d = (uchar *) s;
 161                   *d = _cpp_trigraph_map[s[2]];
 162                   s += 2;
 163                   break;
 164                 }
 165             }
 166         }
 167
 168
 169       for (;;)
 170         {
 171           c = *++s;
 172           *++d = c;
 173
 174           if (c == '\n' || c == '\r')
 175             {
 176                   /* Handle DOS line endings.  */
 177               if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
 178                 s++;
 179               if (s == buffer->rlimit)
 180                 break;
 181
 182               /* Escaped?  */
 183               p = d;
 184               while (p != buffer->next_line && is_nvspace (p[-1]))
 185                 p--;
 186               if (p == buffer->next_line || p[-1] != '\\')
 187                 break;
 188
 189               add_line_note (buffer, p - 1, p != d ? ' ': '\\');
 190               d = p - 2;
 191               buffer->next_line = p - 1;
 192             }
 193           else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
 194             {
 195               /* Add a note regardless, for the benefit of -Wtrigraphs.  */
 196               add_line_note (buffer, d, s[2]);
 197               if (CPP_OPTION (pfile, trigraphs))
 198                 {
 199                   *d = _cpp_trigraph_map[s[2]];
 200                   s += 2;
 201                 }
 202             }
 203         }
 204     }
 205   else
 206     {
 207       do
 208         s++;
 209       while (*s != '\n' && *s != '\r');
 210       d = (uchar *) s;
 211
 212       /* Handle DOS line endings.  */
 213       if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
 214         s++;
 215     }
 216
 217  done:
 218   *d = '\n';
 219   /* A sentinel note that should never be processed.  */
 220   add_line_note (buffer, d + 1, '\n');
 221   buffer->next_line = s + 1;
 222 }
 223
 224 /* Return true if the trigraph indicated by NOTE should be warned
 225    about in a comment.  */
 226 static bool
 227 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
 228 {
 229   const uchar *p;
 230
 231   /* Within comments we don't warn about trigraphs, unless the
 232      trigraph forms an escaped newline, as that may change
 233      behavior.  */
 234   if (note->type != '/')
 235     return false;
 236
 237   /* If -trigraphs, then this was an escaped newline iff the next note
 238      is coincident.  */
 239   if (CPP_OPTION (pfile, trigraphs))
 240     return note[1].pos == note->pos;
 241
 242   /* Otherwise, see if this forms an escaped newline.  */
 243   p = note->pos + 3;
 244   while (is_nvspace (*p))
 245     p++;
 246
 247   /* There might have been escaped newlines between the trigraph and the
 248      newline we found.  Hence the position test.  */
 249   return (*p == '\n' && p < note[1].pos);
 250 }
 251
 252 /* Process the notes created by add_line_note as far as the current
 253    location.  */
 254 void
 255 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
 256 {
 257   cpp_buffer *buffer = pfile->buffer;
 258
 259   for (;;)
 260     {
 261       _cpp_line_note *note = &buffer->notes[buffer->cur_note];
 262       unsigned int col;
 263
 264       if (note->pos > buffer->cur)
 265         break;
 266
 267       buffer->cur_note++;
 268       col = CPP_BUF_COLUMN (buffer, note->pos + 1);
 269
 270       if (note->type == '\\' || note->type == ' ')
 271         {
 272           if (note->type == ' ' && !in_comment)
 273             cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
 274                                  "backslash and newline separated by space");
 275
 276           if (buffer->next_line > buffer->rlimit)
 277             {
 278               cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
 279                                    "backslash-newline at end of file");
 280               /* Prevent "no newline at end of file" warning.  */
 281               buffer->next_line = buffer->rlimit;
 282             }
 283
 284           buffer->line_base = note->pos;
 285           CPP_INCREMENT_LINE (pfile, 0);
 286         }
 287       else if (_cpp_trigraph_map[note->type])
 288         {
 289           if (CPP_OPTION (pfile, warn_trigraphs)
 290               && (!in_comment || warn_in_comment (pfile, note)))
 291             {
 292               if (CPP_OPTION (pfile, trigraphs))
 293                 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
 294                                      "trigraph ??%c converted to %c",
 295                                      note->type,
 296                                      (int) _cpp_trigraph_map[note->type]);
 297               else
 298                 {
 299                   cpp_error_with_line
 300                     (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
 301                      "trigraph ??%c ignored, use -trigraphs to enable",
 302                      note->type);
 303                 }
 304             }
 305         }
 306       else
 307         abort ();
 308     }
 309 }
 310
 311 /* Skip a C-style block comment.  We find the end of the comment by
 312    seeing if an asterisk is before every '/' we encounter.  Returns
 313    nonzero if comment terminated by EOF, zero otherwise.
 314
 315    Buffer->cur points to the initial asterisk of the comment.  */
 316 bool
 317 _cpp_skip_block_comment (cpp_reader *pfile)
 318 {
 319   cpp_buffer *buffer = pfile->buffer;
 320   const uchar *cur = buffer->cur;
 321   uchar c;
 322
 323   cur++;
 324   if (*cur == '/')
 325     cur++;
 326
 327   for (;;)
 328     {
 329       /* People like decorating comments with '*', so check for '/'
 330          instead for efficiency.  */
 331       c = *cur++;
 332
 333       if (c == '/')
 334         {
 335           if (cur[-2] == '*')
 336             break;
 337
 338           /* Warn about potential nested comments, but not if the '/'
 339              comes immediately before the true comment delimiter.
 340              Don't bother to get it right across escaped newlines.  */
 341           if (CPP_OPTION (pfile, warn_comments)
 342               && cur[0] == '*' && cur[1] != '/')
 343             {
 344               buffer->cur = cur;
 345               cpp_error_with_line (pfile, CPP_DL_WARNING,
 346                                    pfile->line_table->highest_line, CPP_BUF_COL (buffer),
 347                                    "\"/*\" within comment");
 348             }
 349         }
 350       else if (c == '\n')
 351         {
 352           unsigned int cols;
 353           buffer->cur = cur - 1;
 354           _cpp_process_line_notes (pfile, true);
 355           if (buffer->next_line >= buffer->rlimit)
 356             return true;
 357           _cpp_clean_line (pfile);
 358
 359           cols = buffer->next_line - buffer->line_base;
 360           CPP_INCREMENT_LINE (pfile, cols);
 361
 362           cur = buffer->cur;
 363         }
 364     }
 365
 366   buffer->cur = cur;
 367   _cpp_process_line_notes (pfile, true);
 368   return false;
 369 }
 370
 371 /* Skip a C++ line comment, leaving buffer->cur pointing to the
 372    terminating newline.  Handles escaped newlines.  Returns nonzero
 373    if a multiline comment.  */
 374 static int
 375 skip_line_comment (cpp_reader *pfile)
 376 {
 377   cpp_buffer *buffer = pfile->buffer;
 378   unsigned int orig_line = pfile->line_table->highest_line;
 379
 380   while (*buffer->cur != '\n')
 381     buffer->cur++;
 382
 383   _cpp_process_line_notes (pfile, true);
 384   return orig_line != pfile->line_table->highest_line;
 385 }
 386
 387 /* Skips whitespace, saving the next non-whitespace character.  */
 388 static void
 389 skip_whitespace (cpp_reader *pfile, cppchar_t c)
 390 {
 391   cpp_buffer *buffer = pfile->buffer;
 392   bool saw_NUL = false;
 393
 394   do
 395     {
 396       /* Horizontal space always OK.  */
 397       if (c == ' ' || c == '\t')
 398         ;
 399       /* Just \f \v or \0 left.  */
 400       else if (c == '\0')
 401         saw_NUL = true;
 402       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 403         cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
 404                              CPP_BUF_COL (buffer),
 405                              "%s in preprocessing directive",
 406                              c == '\f' ? "form feed" : "vertical tab");
 407
 408       c = *buffer->cur++;
 409     }
 410   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 411   while (is_nvspace (c));
 412
 413   if (saw_NUL)
 414     cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
 415
 416   buffer->cur--;
 417 }
 418
 419 /* See if the characters of a number token are valid in a name (no
 420    '.', '+' or '-').  */
 421 static int
 422 name_p (cpp_reader *pfile, const cpp_string *string)
 423 {
 424   unsigned int i;
 425
 426   for (i = 0; i < string->len; i++)
 427     if (!is_idchar (string->text[i]))
 428       return 0;
 429
 430   return 1;
 431 }
 432
 433 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
 434    an identifier.  FIRST is TRUE if this starts an identifier.  */
 435 static bool
 436 forms_identifier_p (cpp_reader *pfile, int first)
 437 {
 438   cpp_buffer *buffer = pfile->buffer;
 439
 440   if (*buffer->cur == '$')
 441     {
 442       if (!CPP_OPTION (pfile, dollars_in_ident))
 443         return false;
 444
 445       buffer->cur++;
 446       if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
 447         {
 448           CPP_OPTION (pfile, warn_dollars) = 0;
 449           cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
 450         }
 451
 452       return true;
 453     }
 454
 455   /* Is this a syntactically valid UCN?  */
 456   if (*buffer->cur == '\\'
 457       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
 458     {
 459       buffer->cur += 2;
 460       if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first))
 461         return true;
 462       buffer->cur -= 2;
 463     }
 464
 465   return false;
 466 }
 467
 468 /* Lex an identifier starting at BUFFER->CUR - 1.  */
 469 static cpp_hashnode *
 470 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn)
 471 {
 472   cpp_hashnode *result;
 473   const uchar *cur;
 474   unsigned int len;
 475   unsigned int hash = HT_HASHSTEP (0, *base);
 476
 477   cur = pfile->buffer->cur;
 478   if (! starts_ucn)
 479     while (ISIDNUM (*cur))
 480       {
 481         hash = HT_HASHSTEP (hash, *cur);
 482         cur++;
 483       }
 484   pfile->buffer->cur = cur;
 485   if (starts_ucn || forms_identifier_p (pfile, false))
 486     {
 487       /* Slower version for identifiers containing UCNs (or $).  */
 488       do {
 489         while (ISIDNUM (*pfile->buffer->cur))
 490           pfile->buffer->cur++;
 491       } while (forms_identifier_p (pfile, false));
 492       result = _cpp_interpret_identifier (pfile, base,
 493                                           pfile->buffer->cur - base);
 494     }
 495   else
 496     {
 497       len = cur - base;
 498       hash = HT_HASHFINISH (hash, len);
 499
 500       result = (cpp_hashnode *)
 501         ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC);
 502     }
 503
 504   /* Rarely, identifiers require diagnostics when lexed.  */
 505   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 506                         && !pfile->state.skipping, 0))
 507     {
 508       /* It is allowed to poison the same identifier twice.  */
 509       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 510         cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
 511                    NODE_NAME (result));
 512
 513       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 514          replacement list of a variadic macro.  */
 515       if (result == pfile->spec_nodes.n__VA_ARGS__
 516           && !pfile->state.va_args_ok)
 517         cpp_error (pfile, CPP_DL_PEDWARN,
 518                    "__VA_ARGS__ can only appear in the expansion"
 519                    " of a C99 variadic macro");
 520     }
 521
 522   return result;
 523 }
 524
 525 /* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
 526 static void
 527 lex_number (cpp_reader *pfile, cpp_string *number)
 528 {
 529   const uchar *cur;
 530   const uchar *base;
 531   uchar *dest;
 532
 533   base = pfile->buffer->cur - 1;
 534   do
 535     {
 536       cur = pfile->buffer->cur;
 537
 538       /* N.B. ISIDNUM does not include $.  */
 539       while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
 540         cur++;
 541
 542       pfile->buffer->cur = cur;
 543     }
 544   while (forms_identifier_p (pfile, false));
 545
 546   number->len = cur - base;
 547   dest = _cpp_unaligned_alloc (pfile, number->len + 1);
 548   memcpy (dest, base, number->len);
 549   dest[number->len] = '\0';
 550   number->text = dest;
 551 }
 552
 553 /* Create a token of type TYPE with a literal spelling.  */
 554 static void
 555 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
 556                 unsigned int len, enum cpp_ttype type)
 557 {
 558   uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
 559
 560   memcpy (dest, base, len);
 561   dest[len] = '\0';
 562   token->type = type;
 563   token->val.str.len = len;
 564   token->val.str.text = dest;
 565 }
 566
 567 /* Lexes a string, character constant, or angle-bracketed header file
 568    name.  The stored string contains the spelling, including opening
 569    quote and leading any leading 'L'.  It returns the type of the
 570    literal, or CPP_OTHER if it was not properly terminated.
 571
 572    The spelling is NUL-terminated, but it is not guaranteed that this
 573    is the first NUL since embedded NULs are preserved.  */
 574 static void
 575 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
 576 {
 577   bool saw_NUL = false;
 578   const uchar *cur;
 579   cppchar_t terminator;
 580   enum cpp_ttype type;
 581
 582   cur = base;
 583   terminator = *cur++;
 584   if (terminator == 'L')
 585     terminator = *cur++;
 586   if (terminator == '\"')
 587     type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
 588   else if (terminator == '\'')
 589     type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
 590   else
 591     terminator = '>', type = CPP_HEADER_NAME;
 592
 593   for (;;)
 594     {
 595       cppchar_t c = *cur++;
 596
 597       /* In #include-style directives, terminators are not escapable.  */
 598       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
 599         cur++;
 600       else if (c == terminator)
 601         break;
 602       else if (c == '\n')
 603         {
 604           cur--;
 605           type = CPP_OTHER;
 606           break;
 607         }
 608       else if (c == '\0')
 609         saw_NUL = true;
 610     }
 611
 612   if (saw_NUL && !pfile->state.skipping)
 613     cpp_error (pfile, CPP_DL_WARNING,
 614                "null character(s) preserved in literal");
 615
 616   pfile->buffer->cur = cur;
 617   create_literal (pfile, token, base, cur - base, type);
 618 }
 619
 620 /* The stored comment includes the comment start and any terminator.  */
 621 static void
 622 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
 623               cppchar_t type)
 624 {
 625   unsigned char *buffer;
 626   unsigned int len, clen;
 627
 628   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 629
 630   /* C++ comments probably (not definitely) have moved past a new
 631      line, which we don't want to save in the comment.  */
 632   if (is_vspace (pfile->buffer->cur[-1]))
 633     len--;
 634
 635   /* If we are currently in a directive, then we need to store all
 636      C++ comments as C comments internally, and so we need to
 637      allocate a little extra space in that case.
 638
 639      Note that the only time we encounter a directive here is
 640      when we are saving comments in a "#define".  */
 641   clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
 642
 643   buffer = _cpp_unaligned_alloc (pfile, clen);
 644
 645   token->type = CPP_COMMENT;
 646   token->val.str.len = clen;
 647   token->val.str.text = buffer;
 648
 649   buffer[0] = '/';
 650   memcpy (buffer + 1, from, len - 1);
 651
 652   /* Finish conversion to a C comment, if necessary.  */
 653   if (pfile->state.in_directive && type == '/')
 654     {
 655       buffer[1] = '*';
 656       buffer[clen - 2] = '*';
 657       buffer[clen - 1] = '/';
 658     }
 659 }
 660
 661 /* Allocate COUNT tokens for RUN.  */
 662 void
 663 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
 664 {
 665   run->base = XNEWVEC (cpp_token, count);
 666   run->limit = run->base + count;
 667   run->next = NULL;
 668 }
 669
 670 /* Returns the next tokenrun, or creates one if there is none.  */
 671 static tokenrun *
 672 next_tokenrun (tokenrun *run)
 673 {
 674   if (run->next == NULL)
 675     {
 676       run->next = XNEW (tokenrun);
 677       run->next->prev = run;
 678       _cpp_init_tokenrun (run->next, 250);
 679     }
 680
 681   return run->next;
 682 }
 683
 684 /* Allocate a single token that is invalidated at the same time as the
 685    rest of the tokens on the line.  Has its line and col set to the
 686    same as the last lexed token, so that diagnostics appear in the
 687    right place.  */
 688 cpp_token *
 689 _cpp_temp_token (cpp_reader *pfile)
 690 {
 691   cpp_token *old, *result;
 692
 693   old = pfile->cur_token - 1;
 694   if (pfile->cur_token == pfile->cur_run->limit)
 695     {
 696       pfile->cur_run = next_tokenrun (pfile->cur_run);
 697       pfile->cur_token = pfile->cur_run->base;
 698     }
 699
 700   result = pfile->cur_token++;
 701   result->src_loc = old->src_loc;
 702   return result;
 703 }
 704
 705 /* Lex a token into RESULT (external interface).  Takes care of issues
 706    like directive handling, token lookahead, multiple include
 707    optimization and skipping.  */
 708 const cpp_token *
 709 _cpp_lex_token (cpp_reader *pfile)
 710 {
 711   cpp_token *result;
 712
 713   for (;;)
 714     {
 715       if (pfile->cur_token == pfile->cur_run->limit)
 716         {
 717           pfile->cur_run = next_tokenrun (pfile->cur_run);
 718           pfile->cur_token = pfile->cur_run->base;
 719         }
 720
 721       if (pfile->lookaheads)
 722         {
 723           pfile->lookaheads--;
 724           result = pfile->cur_token++;
 725         }
 726       else
 727         result = _cpp_lex_direct (pfile);
 728
 729       if (result->flags & BOL)
 730         {
 731           /* Is this a directive.  If _cpp_handle_directive returns
 732              false, it is an assembler #.  */
 733           if (result->type == CPP_HASH
 734               /* 6.10.3 p 11: Directives in a list of macro arguments
 735                  gives undefined behavior.  This implementation
 736                  handles the directive as normal.  */
 737               && pfile->state.parsing_args != 1
 738               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
 739             {
 740               if (pfile->directive_result.type == CPP_PADDING)
 741                 continue;
 742               else
 743                 {
 744                   result = &pfile->directive_result;
 745                   break;
 746                 }
 747             }
 748
 749           if (pfile->cb.line_change && !pfile->state.skipping)
 750             pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
 751         }
 752
 753       /* We don't skip tokens in directives.  */
 754       if (pfile->state.in_directive)
 755         break;
 756
 757       /* Outside a directive, invalidate controlling macros.  At file
 758          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
 759          get here and MI optimization works.  */
 760       pfile->mi_valid = false;
 761
 762       if (!pfile->state.skipping || result->type == CPP_EOF)
 763         break;
 764     }
 765
 766   return result;
 767 }
 768
 769 /* Returns true if a fresh line has been loaded.  */
 770 bool
 771 _cpp_get_fresh_line (cpp_reader *pfile)
 772 {
 773   int return_at_eof;
 774
 775   /* We can't get a new line until we leave the current directive.  */
 776   if (pfile->state.in_directive)
 777     return false;
 778
 779   for (;;)
 780     {
 781       cpp_buffer *buffer = pfile->buffer;
 782
 783       if (!buffer->need_line)
 784         return true;
 785
 786       if (buffer->next_line < buffer->rlimit)
 787         {
 788           _cpp_clean_line (pfile);
 789           return true;
 790         }
 791
 792       /* First, get out of parsing arguments state.  */
 793       if (pfile->state.parsing_args)
 794         return false;
 795
 796       /* End of buffer.  Non-empty files should end in a newline.  */
 797       if (buffer->buf != buffer->rlimit
 798           && buffer->next_line > buffer->rlimit
 799           && !buffer->from_stage3)
 800         {
 801           /* Only warn once.  */
 802           buffer->next_line = buffer->rlimit;
 803           cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
 804                                CPP_BUF_COLUMN (buffer, buffer->cur),
 805                                "no newline at end of file");
 806         }
 807
 808       return_at_eof = buffer->return_at_eof;
 809       _cpp_pop_buffer (pfile);
 810       if (pfile->buffer == NULL || return_at_eof)
 811         return false;
 812     }
 813 }
 814
 815 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)          \
 816   do                                                    \
 817     {                                                   \
 818       result->type = ELSE_TYPE;                         \
 819       if (*buffer->cur == CHAR)                         \
 820         buffer->cur++, result->type = THEN_TYPE;        \
 821     }                                                   \
 822   while (0)
 823
 824 /* Lex a token into pfile->cur_token, which is also incremented, to
 825    get diagnostics pointing to the correct location.
 826
 827    Does not handle issues such as token lookahead, multiple-include
 828    optimization, directives, skipping etc.  This function is only
 829    suitable for use by _cpp_lex_token, and in special cases like
 830    lex_expansion_token which doesn't care for any of these issues.
 831
 832    When meeting a newline, returns CPP_EOF if parsing a directive,
 833    otherwise returns to the start of the token buffer if permissible.
 834    Returns the location of the lexed token.  */
 835 cpp_token *
 836 _cpp_lex_direct (cpp_reader *pfile)
 837 {
 838   cppchar_t c;
 839   cpp_buffer *buffer;
 840   const unsigned char *comment_start;
 841   cpp_token *result = pfile->cur_token++;
 842
 843  fresh_line:
 844   result->flags = 0;
 845   buffer = pfile->buffer;
 846   if (buffer->need_line)
 847     {
 848       if (!_cpp_get_fresh_line (pfile))
 849         {
 850           result->type = CPP_EOF;
 851           if (!pfile->state.in_directive)
 852             {
 853               /* Tell the compiler the line number of the EOF token.  */
 854               result->src_loc = pfile->line_table->highest_line;
 855               result->flags = BOL;
 856             }
 857           return result;
 858         }
 859       if (!pfile->keep_tokens)
 860         {
 861           pfile->cur_run = &pfile->base_run;
 862           result = pfile->base_run.base;
 863           pfile->cur_token = result + 1;
 864         }
 865       result->flags = BOL;
 866       if (pfile->state.parsing_args == 2)
 867         result->flags |= PREV_WHITE;
 868     }
 869   buffer = pfile->buffer;
 870  update_tokens_line:
 871   result->src_loc = pfile->line_table->highest_line;
 872
 873  skipped_white:
 874   if (buffer->cur >= buffer->notes[buffer->cur_note].pos
 875       && !pfile->overlaid_buffer)
 876     {
 877       _cpp_process_line_notes (pfile, false);
 878       result->src_loc = pfile->line_table->highest_line;
 879     }
 880   c = *buffer->cur++;
 881
 882   LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
 883                                CPP_BUF_COLUMN (buffer, buffer->cur));
 884
 885   switch (c)
 886     {
 887     case ' ': case '\t': case '\f': case '\v': case '\0':
 888       result->flags |= PREV_WHITE;
 889       skip_whitespace (pfile, c);
 890       goto skipped_white;
 891
 892     case '\n':
 893       if (buffer->cur < buffer->rlimit)
 894         CPP_INCREMENT_LINE (pfile, 0);
 895       buffer->need_line = true;
 896       goto fresh_line;
 897
 898     case '0': case '1': case '2': case '3': case '4':
 899     case '5': case '6': case '7': case '8': case '9':
 900       result->type = CPP_NUMBER;
 901       lex_number (pfile, &result->val.str);
 902       break;
 903
 904     case 'L':
 905       /* 'L' may introduce wide characters or strings.  */
 906       if (*buffer->cur == '\'' || *buffer->cur == '"')
 907         {
 908           lex_string (pfile, result, buffer->cur - 1);
 909           break;
 910         }
 911       /* Fall through.  */
 912
 913     case '_':
 914     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 915     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 916     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 917     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 918     case 'y': case 'z':
 919     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 920     case 'G': case 'H': case 'I': case 'J': case 'K':
 921     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 922     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 923     case 'Y': case 'Z':
 924       result->type = CPP_NAME;
 925       result->val.node = lex_identifier (pfile, buffer->cur - 1, false);
 926
 927       /* Convert named operators to their proper types.  */
 928       if (result->val.node->flags & NODE_OPERATOR)
 929         {
 930           result->flags |= NAMED_OP;
 931           result->type = result->val.node->directive_index;
 932         }
 933       break;
 934
 935     case '\'':
 936     case '"':
 937       lex_string (pfile, result, buffer->cur - 1);
 938       break;
 939
 940     case '/':
 941       /* A potential block or line comment.  */
 942       comment_start = buffer->cur;
 943       c = *buffer->cur;
 944
 945       if (c == '*')
 946         {
 947           if (_cpp_skip_block_comment (pfile))
 948             cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
 949         }
 950       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
 951                             || cpp_in_system_header (pfile)))
 952         {
 953           /* Warn about comments only if pedantically GNUC89, and not
 954              in system headers.  */
 955           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
 956               && ! buffer->warned_cplusplus_comments)
 957             {
 958               cpp_error (pfile, CPP_DL_PEDWARN,
 959                          "C++ style comments are not allowed in ISO C90");
 960               cpp_error (pfile, CPP_DL_PEDWARN,
 961                          "(this will be reported only once per input file)");
 962               buffer->warned_cplusplus_comments = 1;
 963             }
 964
 965           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
 966             cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
 967         }
 968       else if (c == '=')
 969         {
 970           buffer->cur++;
 971           result->type = CPP_DIV_EQ;
 972           break;
 973         }
 974       else
 975         {
 976           result->type = CPP_DIV;
 977           break;
 978         }
 979
 980       if (!pfile->state.save_comments)
 981         {
 982           result->flags |= PREV_WHITE;
 983           goto update_tokens_line;
 984         }
 985
 986       /* Save the comment as a token in its own right.  */
 987       save_comment (pfile, result, comment_start, c);
 988       break;
 989
 990     case '<':
 991       if (pfile->state.angled_headers)
 992         {
 993           lex_string (pfile, result, buffer->cur - 1);
 994           break;
 995         }
 996
 997       result->type = CPP_LESS;
 998       if (*buffer->cur == '=')
 999         buffer->cur++, result->type = CPP_LESS_EQ;
1000       else if (*buffer->cur == '<')
1001         {
1002           buffer->cur++;
1003           IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1004         }
1005       else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
1006         {
1007           buffer->cur++;
1008           IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1009         }
1010       else if (CPP_OPTION (pfile, digraphs))
1011         {
1012           if (*buffer->cur == ':')
1013             {
1014               buffer->cur++;
1015               result->flags |= DIGRAPH;
1016               result->type = CPP_OPEN_SQUARE;
1017             }
1018           else if (*buffer->cur == '%')
1019             {
1020               buffer->cur++;
1021               result->flags |= DIGRAPH;
1022               result->type = CPP_OPEN_BRACE;
1023             }
1024         }
1025       break;
1026
1027     case '>':
1028       result->type = CPP_GREATER;
1029       if (*buffer->cur == '=')
1030         buffer->cur++, result->type = CPP_GREATER_EQ;
1031       else if (*buffer->cur == '>')
1032         {
1033           buffer->cur++;
1034           IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1035         }
1036       else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
1037         {
1038           buffer->cur++;
1039           IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1040         }
1041       break;
1042
1043     case '%':
1044       result->type = CPP_MOD;
1045       if (*buffer->cur == '=')
1046         buffer->cur++, result->type = CPP_MOD_EQ;
1047       else if (CPP_OPTION (pfile, digraphs))
1048         {
1049           if (*buffer->cur == ':')
1050             {
1051               buffer->cur++;
1052               result->flags |= DIGRAPH;
1053               result->type = CPP_HASH;
1054               if (*buffer->cur == '%' && buffer->cur[1] == ':')
1055                 buffer->cur += 2, result->type = CPP_PASTE;
1056             }
1057           else if (*buffer->cur == '>')
1058             {
1059               buffer->cur++;
1060               result->flags |= DIGRAPH;
1061               result->type = CPP_CLOSE_BRACE;
1062             }
1063         }
1064       break;
1065
1066     case '.':
1067       result->type = CPP_DOT;
1068       if (ISDIGIT (*buffer->cur))
1069         {
1070           result->type = CPP_NUMBER;
1071           lex_number (pfile, &result->val.str);
1072         }
1073       else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1074         buffer->cur += 2, result->type = CPP_ELLIPSIS;
1075       else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1076         buffer->cur++, result->type = CPP_DOT_STAR;
1077       break;
1078
1079     case '+':
1080       result->type = CPP_PLUS;
1081       if (*buffer->cur == '+')
1082         buffer->cur++, result->type = CPP_PLUS_PLUS;
1083       else if (*buffer->cur == '=')
1084         buffer->cur++, result->type = CPP_PLUS_EQ;
1085       break;
1086
1087     case '-':
1088       result->type = CPP_MINUS;
1089       if (*buffer->cur == '>')
1090         {
1091           buffer->cur++;
1092           result->type = CPP_DEREF;
1093           if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1094             buffer->cur++, result->type = CPP_DEREF_STAR;
1095         }
1096       else if (*buffer->cur == '-')
1097         buffer->cur++, result->type = CPP_MINUS_MINUS;
1098       else if (*buffer->cur == '=')
1099         buffer->cur++, result->type = CPP_MINUS_EQ;
1100       break;
1101
1102     case '&':
1103       result->type = CPP_AND;
1104       if (*buffer->cur == '&')
1105         buffer->cur++, result->type = CPP_AND_AND;
1106       else if (*buffer->cur == '=')
1107         buffer->cur++, result->type = CPP_AND_EQ;
1108       break;
1109
1110     case '|':
1111       result->type = CPP_OR;
1112       if (*buffer->cur == '|')
1113         buffer->cur++, result->type = CPP_OR_OR;
1114       else if (*buffer->cur == '=')
1115         buffer->cur++, result->type = CPP_OR_EQ;
1116       break;
1117
1118     case ':':
1119       result->type = CPP_COLON;
1120       if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1121         buffer->cur++, result->type = CPP_SCOPE;
1122       else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1123         {
1124           buffer->cur++;
1125           result->flags |= DIGRAPH;
1126           result->type = CPP_CLOSE_SQUARE;
1127         }
1128       break;
1129
1130     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1131     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1132     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1133     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1134     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1135
1136     case '?': result->type = CPP_QUERY; break;
1137     case '~': result->type = CPP_COMPL; break;
1138     case ',': result->type = CPP_COMMA; break;
1139     case '(': result->type = CPP_OPEN_PAREN; break;
1140     case ')': result->type = CPP_CLOSE_PAREN; break;
1141     case '[': result->type = CPP_OPEN_SQUARE; break;
1142     case ']': result->type = CPP_CLOSE_SQUARE; break;
1143     case '{': result->type = CPP_OPEN_BRACE; break;
1144     case '}': result->type = CPP_CLOSE_BRACE; break;
1145     case ';': result->type = CPP_SEMICOLON; break;
1146
1147       /* @ is a punctuator in Objective-C.  */
1148     case '@': result->type = CPP_ATSIGN; break;
1149
1150     case '$':
1151     case '\\':
1152       {
1153         const uchar *base = --buffer->cur;
1154
1155         if (forms_identifier_p (pfile, true))
1156           {
1157             result->type = CPP_NAME;
1158             result->val.node = lex_identifier (pfile, base, true);
1159             break;
1160           }
1161         buffer->cur++;
1162       }
1163
1164     default:
1165       create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1166       break;
1167     }
1168
1169   return result;
1170 }
1171
1172 /* An upper bound on the number of bytes needed to spell TOKEN.
1173    Does not include preceding whitespace.  */
1174 unsigned int
1175 cpp_token_len (const cpp_token *token)
1176 {
1177   unsigned int len;
1178
1179   switch (TOKEN_SPELL (token))
1180     {
1181     default:            len = 4;                                break;
1182     case SPELL_LITERAL: len = token->val.str.len;               break;
1183     case SPELL_IDENT:   len = NODE_LEN (token->val.node) * 10;  break;
1184     }
1185
1186   return len;
1187 }
1188
1189 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1190    Return the number of bytes read out of NAME.  (There are always
1191    10 bytes written to BUFFER.)  */
1192
1193 static size_t
1194 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1195 {
1196   int j;
1197   int ucn_len = 0;
1198   int ucn_len_c;
1199   unsigned t;
1200   unsigned long utf32;
1201
1202   /* Compute the length of the UTF-8 sequence.  */
1203   for (t = *name; t & 0x80; t <<= 1)
1204     ucn_len++;
1205
1206   utf32 = *name & (0x7F >> ucn_len);
1207   for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1208     {
1209       utf32 = (utf32 << 6) | (*++name & 0x3F);
1210
1211       /* Ill-formed UTF-8.  */
1212       if ((*name & ~0x3F) != 0x80)
1213         abort ();
1214     }
1215
1216   *buffer++ = '\\';
1217   *buffer++ = 'U';
1218   for (j = 7; j >= 0; j--)
1219     *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1220   return ucn_len;
1221 }
1222
1223
1224 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1225    already contain the enough space to hold the token's spelling.
1226    Returns a pointer to the character after the last character written.
1227    FORSTRING is true if this is to be the spelling after translation
1228    phase 1 (this is different for UCNs).
1229    FIXME: Would be nice if we didn't need the PFILE argument.  */
1230 unsigned char *
1231 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1232                  unsigned char *buffer, bool forstring)
1233 {
1234   switch (TOKEN_SPELL (token))
1235     {
1236     case SPELL_OPERATOR:
1237       {
1238         const unsigned char *spelling;
1239         unsigned char c;
1240
1241         if (token->flags & DIGRAPH)
1242           spelling
1243             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1244         else if (token->flags & NAMED_OP)
1245           goto spell_ident;
1246         else
1247           spelling = TOKEN_NAME (token);
1248
1249         while ((c = *spelling++) != '\0')
1250           *buffer++ = c;
1251       }
1252       break;
1253
1254     spell_ident:
1255     case SPELL_IDENT:
1256       if (forstring)
1257         {
1258           memcpy (buffer, NODE_NAME (token->val.node),
1259                   NODE_LEN (token->val.node));
1260           buffer += NODE_LEN (token->val.node);
1261         }
1262       else
1263         {
1264           size_t i;
1265           const unsigned char * name = NODE_NAME (token->val.node);
1266
1267           for (i = 0; i < NODE_LEN (token->val.node); i++)
1268             if (name[i] & ~0x7F)
1269               {
1270                 i += utf8_to_ucn (buffer, name + i) - 1;
1271                 buffer += 10;
1272               }
1273             else
1274               *buffer++ = NODE_NAME (token->val.node)[i];
1275         }
1276       break;
1277
1278     case SPELL_LITERAL:
1279       memcpy (buffer, token->val.str.text, token->val.str.len);
1280       buffer += token->val.str.len;
1281       break;
1282
1283     case SPELL_NONE:
1284       cpp_error (pfile, CPP_DL_ICE,
1285                  "unspellable token %s", TOKEN_NAME (token));
1286       break;
1287     }
1288
1289   return buffer;
1290 }
1291
1292 /* Returns TOKEN spelt as a null-terminated string.  The string is
1293    freed when the reader is destroyed.  Useful for diagnostics.  */
1294 unsigned char *
1295 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1296 {
1297   unsigned int len = cpp_token_len (token) + 1;
1298   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1299
1300   end = cpp_spell_token (pfile, token, start, false);
1301   end[0] = '\0';
1302
1303   return start;
1304 }
1305
1306 /* Used by C front ends, which really should move to using
1307    cpp_token_as_text.  */
1308 const char *
1309 cpp_type2name (enum cpp_ttype type)
1310 {
1311   return (const char *) token_spellings[type].name;
1312 }
1313
1314 /* Writes the spelling of token to FP, without any preceding space.
1315    Separated from cpp_spell_token for efficiency - to avoid stdio
1316    double-buffering.  */
1317 void
1318 cpp_output_token (const cpp_token *token, FILE *fp)
1319 {
1320   switch (TOKEN_SPELL (token))
1321     {
1322     case SPELL_OPERATOR:
1323       {
1324         const unsigned char *spelling;
1325         int c;
1326
1327         if (token->flags & DIGRAPH)
1328           spelling
1329             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1330         else if (token->flags & NAMED_OP)
1331           goto spell_ident;
1332         else
1333           spelling = TOKEN_NAME (token);
1334
1335         c = *spelling;
1336         do
1337           putc (c, fp);
1338         while ((c = *++spelling) != '\0');
1339       }
1340       break;
1341
1342     spell_ident:
1343     case SPELL_IDENT:
1344       {
1345         size_t i;
1346         const unsigned char * name = NODE_NAME (token->val.node);
1347
1348         for (i = 0; i < NODE_LEN (token->val.node); i++)
1349           if (name[i] & ~0x7F)
1350             {
1351               unsigned char buffer[10];
1352               i += utf8_to_ucn (buffer, name + i) - 1;
1353               fwrite (buffer, 1, 10, fp);
1354             }
1355           else
1356             fputc (NODE_NAME (token->val.node)[i], fp);
1357       }
1358       break;
1359
1360     case SPELL_LITERAL:
1361       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1362       break;
1363
1364     case SPELL_NONE:
1365       /* An error, most probably.  */
1366       break;
1367     }
1368 }
1369
1370 /* Compare two tokens.  */
1371 int
1372 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1373 {
1374   if (a->type == b->type && a->flags == b->flags)
1375     switch (TOKEN_SPELL (a))
1376       {
1377       default:                  /* Keep compiler happy.  */
1378       case SPELL_OPERATOR:
1379         return 1;
1380       case SPELL_NONE:
1381         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1382       case SPELL_IDENT:
1383         return a->val.node == b->val.node;
1384       case SPELL_LITERAL:
1385         return (a->val.str.len == b->val.str.len
1386                 && !memcmp (a->val.str.text, b->val.str.text,
1387                             a->val.str.len));
1388       }
1389
1390   return 0;
1391 }
1392
1393 /* Returns nonzero if a space should be inserted to avoid an
1394    accidental token paste for output.  For simplicity, it is
1395    conservative, and occasionally advises a space where one is not
1396    needed, e.g. "." and ".2".  */
1397 int
1398 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1399                  const cpp_token *token2)
1400 {
1401   enum cpp_ttype a = token1->type, b = token2->type;
1402   cppchar_t c;
1403
1404   if (token1->flags & NAMED_OP)
1405     a = CPP_NAME;
1406   if (token2->flags & NAMED_OP)
1407     b = CPP_NAME;
1408
1409   c = EOF;
1410   if (token2->flags & DIGRAPH)
1411     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1412   else if (token_spellings[b].category == SPELL_OPERATOR)
1413     c = token_spellings[b].name[0];
1414
1415   /* Quickly get everything that can paste with an '='.  */
1416   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1417     return 1;
1418
1419   switch (a)
1420     {
1421     case CPP_GREATER:   return c == '>' || c == '?';
1422     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1423     case CPP_PLUS:      return c == '+';
1424     case CPP_MINUS:     return c == '-' || c == '>';
1425     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1426     case CPP_MOD:       return c == ':' || c == '>';
1427     case CPP_AND:       return c == '&';
1428     case CPP_OR:        return c == '|';
1429     case CPP_COLON:     return c == ':' || c == '>';
1430     case CPP_DEREF:     return c == '*';
1431     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1432     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1433     case CPP_NAME:      return ((b == CPP_NUMBER
1434                                  && name_p (pfile, &token2->val.str))
1435                                 || b == CPP_NAME
1436                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1437     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1438                                 || c == '.' || c == '+' || c == '-');
1439                                       /* UCNs */
1440     case CPP_OTHER:     return ((token1->val.str.text[0] == '\\'
1441                                  && b == CPP_NAME)
1442                                 || (CPP_OPTION (pfile, objc)
1443                                     && token1->val.str.text[0] == '@'
1444                                     && (b == CPP_NAME || b == CPP_STRING)));
1445     default:            break;
1446     }
1447
1448   return 0;
1449 }
1450
1451 /* Output all the remaining tokens on the current line, and a newline
1452    character, to FP.  Leading whitespace is removed.  If there are
1453    macros, special token padding is not performed.  */
1454 void
1455 cpp_output_line (cpp_reader *pfile, FILE *fp)
1456 {
1457   const cpp_token *token;
1458
1459   token = cpp_get_token (pfile);
1460   while (token->type != CPP_EOF)
1461     {
1462       cpp_output_token (token, fp);
1463       token = cpp_get_token (pfile);
1464       if (token->flags & PREV_WHITE)
1465         putc (' ', fp);
1466     }
1467
1468   putc ('\n', fp);
1469 }
1470
1471 /* Memory buffers.  Changing these three constants can have a dramatic
1472    effect on performance.  The values here are reasonable defaults,
1473    but might be tuned.  If you adjust them, be sure to test across a
1474    range of uses of cpplib, including heavy nested function-like macro
1475    expansion.  Also check the change in peak memory usage (NJAMD is a
1476    good tool for this).  */
1477 #define MIN_BUFF_SIZE 8000
1478 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1479 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1480         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1481
1482 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1483   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1484 #endif
1485
1486 /* Create a new allocation buffer.  Place the control block at the end
1487    of the buffer, so that buffer overflows will cause immediate chaos.  */
1488 static _cpp_buff *
1489 new_buff (size_t len)
1490 {
1491   _cpp_buff *result;
1492   unsigned char *base;
1493
1494   if (len < MIN_BUFF_SIZE)
1495     len = MIN_BUFF_SIZE;
1496   len = CPP_ALIGN (len);
1497
1498   base = xmalloc (len + sizeof (_cpp_buff));
1499   result = (_cpp_buff *) (base + len);
1500   result->base = base;
1501   result->cur = base;
1502   result->limit = base + len;
1503   result->next = NULL;
1504   return result;
1505 }
1506
1507 /* Place a chain of unwanted allocation buffers on the free list.  */
1508 void
1509 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1510 {
1511   _cpp_buff *end = buff;
1512
1513   while (end->next)
1514     end = end->next;
1515   end->next = pfile->free_buffs;
1516   pfile->free_buffs = buff;
1517 }
1518
1519 /* Return a free buffer of size at least MIN_SIZE.  */
1520 _cpp_buff *
1521 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1522 {
1523   _cpp_buff *result, **p;
1524
1525   for (p = &pfile->free_buffs;; p = &(*p)->next)
1526     {
1527       size_t size;
1528
1529       if (*p == NULL)
1530         return new_buff (min_size);
1531       result = *p;
1532       size = result->limit - result->base;
1533       /* Return a buffer that's big enough, but don't waste one that's
1534          way too big.  */
1535       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1536         break;
1537     }
1538
1539   *p = result->next;
1540   result->next = NULL;
1541   result->cur = result->base;
1542   return result;
1543 }
1544
1545 /* Creates a new buffer with enough space to hold the uncommitted
1546    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
1547    the excess bytes to the new buffer.  Chains the new buffer after
1548    BUFF, and returns the new buffer.  */
1549 _cpp_buff *
1550 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1551 {
1552   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1553   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1554
1555   buff->next = new_buff;
1556   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1557   return new_buff;
1558 }
1559
1560 /* Creates a new buffer with enough space to hold the uncommitted
1561    remaining bytes of the buffer pointed to by BUFF, and at least
1562    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
1563    Chains the new buffer before the buffer pointed to by BUFF, and
1564    updates the pointer to point to the new buffer.  */
1565 void
1566 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1567 {
1568   _cpp_buff *new_buff, *old_buff = *pbuff;
1569   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1570
1571   new_buff = _cpp_get_buff (pfile, size);
1572   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1573   new_buff->next = old_buff;
1574   *pbuff = new_buff;
1575 }
1576
1577 /* Free a chain of buffers starting at BUFF.  */
1578 void
1579 _cpp_free_buff (_cpp_buff *buff)
1580 {
1581   _cpp_buff *next;
1582
1583   for (; buff; buff = next)
1584     {
1585       next = buff->next;
1586       free (buff->base);
1587     }
1588 }
1589
1590 /* Allocate permanent, unaligned storage of length LEN.  */
1591 unsigned char *
1592 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1593 {
1594   _cpp_buff *buff = pfile->u_buff;
1595   unsigned char *result = buff->cur;
1596
1597   if (len > (size_t) (buff->limit - result))
1598     {
1599       buff = _cpp_get_buff (pfile, len);
1600       buff->next = pfile->u_buff;
1601       pfile->u_buff = buff;
1602       result = buff->cur;
1603     }
1604
1605   buff->cur = result + len;
1606   return result;
1607 }
1608
1609 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1610    That buffer is used for growing allocations when saving macro
1611    replacement lists in a #define, and when parsing an answer to an
1612    assertion in #assert, #unassert or #if (and therefore possibly
1613    whilst expanding macros).  It therefore must not be used by any
1614    code that they might call: specifically the lexer and the guts of
1615    the macro expander.
1616
1617    All existing other uses clearly fit this restriction: storing
1618    registered pragmas during initialization.  */
1619 unsigned char *
1620 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1621 {
1622   _cpp_buff *buff = pfile->a_buff;
1623   unsigned char *result = buff->cur;
1624
1625   if (len > (size_t) (buff->limit - result))
1626     {
1627       buff = _cpp_get_buff (pfile, len);
1628       buff->next = pfile->a_buff;
1629       pfile->a_buff = buff;
1630       result = buff->cur;
1631     }
1632
1633   buff->cur = result + len;
1634   return result;
1635 }
1636
1637 /* Say which field of TOK is in use.  */
1638
1639 enum cpp_token_fld_kind
1640 cpp_token_val_index (cpp_token *tok)
1641 {
1642   switch (TOKEN_SPELL (tok))
1643     {
1644     case SPELL_IDENT:
1645       return CPP_TOKEN_FLD_NODE;
1646     case SPELL_LITERAL:
1647       return CPP_TOKEN_FLD_STR;
1648     case SPELL_NONE:
1649       if (tok->type == CPP_MACRO_ARG)
1650         return CPP_TOKEN_FLD_ARG_NO;
1651       else if (tok->type == CPP_PADDING)
1652         return CPP_TOKEN_FLD_SOURCE;
1653       else if (tok->type == CPP_PRAGMA)
1654         return CPP_TOKEN_FLD_STR;
1655       /* else fall through */
1656     default:
1657       return CPP_TOKEN_FLD_NONE;
1658     }
1659 }