libcpp/lex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009
   3    Free Software Foundation, Inc.
   4    Contributed by Per Bothner, 1994-95.
   5    Based on CCCP program by Paul Rubin, June 1986
   6    Adapted to ANSI C, Richard Stallman, Jan 1987
   7    Broken out to separate file, Zack Weinberg, Mar 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 3, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; see the file COPYING3.  If not see
  21 <http://www.gnu.org/licenses/>.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "cpplib.h"
  26 #include "internal.h"
  27
  28 enum spell_type
  29 {
  30   SPELL_OPERATOR = 0,
  31   SPELL_IDENT,
  32   SPELL_LITERAL,
  33   SPELL_NONE
  34 };
  35
  36 struct token_spelling
  37 {
  38   enum spell_type category;
  39   const unsigned char *name;
  40 };
  41
  42 static const unsigned char *const digraph_spellings[] =
  43 { UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
  44
  45 #define OP(e, s) { SPELL_OPERATOR, UC s  },
  46 #define TK(e, s) { SPELL_ ## s,    UC #e },
  47 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  48 #undef OP
  49 #undef TK
  50
  51 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  52 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  53
  54 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
  55 static int skip_line_comment (cpp_reader *);
  56 static void skip_whitespace (cpp_reader *, cppchar_t);
  57 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
  58 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
  59 static void store_comment (cpp_reader *, cpp_token *);
  60 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
  61                             unsigned int, enum cpp_ttype);
  62 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
  63 static int name_p (cpp_reader *, const cpp_string *);
  64 static tokenrun *next_tokenrun (tokenrun *);
  65
  66 static _cpp_buff *new_buff (size_t);
  67
  68
  69 /* Utility routine:
  70
  71    Compares, the token TOKEN to the NUL-terminated string STRING.
  72    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  73 int
  74 cpp_ideq (const cpp_token *token, const char *string)
  75 {
  76   if (token->type != CPP_NAME)
  77     return 0;
  78
  79   return !ustrcmp (NODE_NAME (token->val.node.node), (const uchar *) string);
  80 }
  81
  82 /* Record a note TYPE at byte POS into the current cleaned logical
  83    line.  */
  84 static void
  85 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
  86 {
  87   if (buffer->notes_used == buffer->notes_cap)
  88     {
  89       buffer->notes_cap = buffer->notes_cap * 2 + 200;
  90       buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
  91                                   buffer->notes_cap);
  92     }
  93
  94   buffer->notes[buffer->notes_used].pos = pos;
  95   buffer->notes[buffer->notes_used].type = type;
  96   buffer->notes_used++;
  97 }
  98
  99 /* Returns with a logical line that contains no escaped newlines or
 100    trigraphs.  This is a time-critical inner loop.  */
 101 void
 102 _cpp_clean_line (cpp_reader *pfile)
 103 {
 104   cpp_buffer *buffer;
 105   const uchar *s;
 106   uchar c, *d, *p;
 107
 108   buffer = pfile->buffer;
 109   buffer->cur_note = buffer->notes_used = 0;
 110   buffer->cur = buffer->line_base = buffer->next_line;
 111   buffer->need_line = false;
 112   s = buffer->next_line - 1;
 113
 114   if (!buffer->from_stage3)
 115     {
 116       const uchar *pbackslash = NULL;
 117
 118       /* Short circuit for the common case of an un-escaped line with
 119          no trigraphs.  The primary win here is by not writing any
 120          data back to memory until we have to.  */
 121       for (;;)
 122         {
 123           c = *++s;
 124           if (__builtin_expect (c == '\n', false)
 125               || __builtin_expect (c == '\r', false))
 126             {
 127               d = (uchar *) s;
 128
 129               if (__builtin_expect (s == buffer->rlimit, false))
 130                 goto done;
 131
 132               /* DOS line ending? */
 133               if (__builtin_expect (c == '\r', false)
 134                   && s[1] == '\n')
 135                 {
 136                   s++;
 137                   if (s == buffer->rlimit)
 138                     goto done;
 139                 }
 140
 141               if (__builtin_expect (pbackslash == NULL, true))
 142                 goto done;
 143
 144               /* Check for escaped newline.  */
 145               p = d;
 146               while (is_nvspace (p[-1]))
 147                 p--;
 148               if (p - 1 != pbackslash)
 149                 goto done;
 150
 151               /* Have an escaped newline; process it and proceed to
 152                  the slow path.  */
 153               add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
 154               d = p - 2;
 155               buffer->next_line = p - 1;
 156               break;
 157             }
 158           if (__builtin_expect (c == '\\', false))
 159             pbackslash = s;
 160           else if (__builtin_expect (c == '?', false)
 161                    && __builtin_expect (s[1] == '?', false)
 162                    && _cpp_trigraph_map[s[2]])
 163             {
 164               /* Have a trigraph.  We may or may not have to convert
 165                  it.  Add a line note regardless, for -Wtrigraphs.  */
 166               add_line_note (buffer, s, s[2]);
 167               if (CPP_OPTION (pfile, trigraphs))
 168                 {
 169                   /* We do, and that means we have to switch to the
 170                      slow path.  */
 171                   d = (uchar *) s;
 172                   *d = _cpp_trigraph_map[s[2]];
 173                   s += 2;
 174                   break;
 175                 }
 176             }
 177         }
 178
 179
 180       for (;;)
 181         {
 182           c = *++s;
 183           *++d = c;
 184
 185           if (c == '\n' || c == '\r')
 186             {
 187                   /* Handle DOS line endings.  */
 188               if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
 189                 s++;
 190               if (s == buffer->rlimit)
 191                 break;
 192
 193               /* Escaped?  */
 194               p = d;
 195               while (p != buffer->next_line && is_nvspace (p[-1]))
 196                 p--;
 197               if (p == buffer->next_line || p[-1] != '\\')
 198                 break;
 199
 200               add_line_note (buffer, p - 1, p != d ? ' ': '\\');
 201               d = p - 2;
 202               buffer->next_line = p - 1;
 203             }
 204           else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
 205             {
 206               /* Add a note regardless, for the benefit of -Wtrigraphs.  */
 207               add_line_note (buffer, d, s[2]);
 208               if (CPP_OPTION (pfile, trigraphs))
 209                 {
 210                   *d = _cpp_trigraph_map[s[2]];
 211                   s += 2;
 212                 }
 213             }
 214         }
 215     }
 216   else
 217     {
 218       do
 219         s++;
 220       while (*s != '\n' && *s != '\r');
 221       d = (uchar *) s;
 222
 223       /* Handle DOS line endings.  */
 224       if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
 225         s++;
 226     }
 227
 228  done:
 229   *d = '\n';
 230   /* A sentinel note that should never be processed.  */
 231   add_line_note (buffer, d + 1, '\n');
 232   buffer->next_line = s + 1;
 233 }
 234
 235 /* Return true if the trigraph indicated by NOTE should be warned
 236    about in a comment.  */
 237 static bool
 238 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
 239 {
 240   const uchar *p;
 241
 242   /* Within comments we don't warn about trigraphs, unless the
 243      trigraph forms an escaped newline, as that may change
 244      behavior.  */
 245   if (note->type != '/')
 246     return false;
 247
 248   /* If -trigraphs, then this was an escaped newline iff the next note
 249      is coincident.  */
 250   if (CPP_OPTION (pfile, trigraphs))
 251     return note[1].pos == note->pos;
 252
 253   /* Otherwise, see if this forms an escaped newline.  */
 254   p = note->pos + 3;
 255   while (is_nvspace (*p))
 256     p++;
 257
 258   /* There might have been escaped newlines between the trigraph and the
 259      newline we found.  Hence the position test.  */
 260   return (*p == '\n' && p < note[1].pos);
 261 }
 262
 263 /* Process the notes created by add_line_note as far as the current
 264    location.  */
 265 void
 266 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
 267 {
 268   cpp_buffer *buffer = pfile->buffer;
 269
 270   for (;;)
 271     {
 272       _cpp_line_note *note = &buffer->notes[buffer->cur_note];
 273       unsigned int col;
 274
 275       if (note->pos > buffer->cur)
 276         break;
 277
 278       buffer->cur_note++;
 279       col = CPP_BUF_COLUMN (buffer, note->pos + 1);
 280
 281       if (note->type == '\\' || note->type == ' ')
 282         {
 283           if (note->type == ' ' && !in_comment)
 284             cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
 285                                  "backslash and newline separated by space");
 286
 287           if (buffer->next_line > buffer->rlimit)
 288             {
 289               cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
 290                                    "backslash-newline at end of file");
 291               /* Prevent "no newline at end of file" warning.  */
 292               buffer->next_line = buffer->rlimit;
 293             }
 294
 295           buffer->line_base = note->pos;
 296           CPP_INCREMENT_LINE (pfile, 0);
 297         }
 298       else if (_cpp_trigraph_map[note->type])
 299         {
 300           if (CPP_OPTION (pfile, warn_trigraphs)
 301               && (!in_comment || warn_in_comment (pfile, note)))
 302             {
 303               if (CPP_OPTION (pfile, trigraphs))
 304                 cpp_warning_with_line (pfile, CPP_W_TRIGRAPHS,
 305                                        pfile->line_table->highest_line, col,
 306                                        "trigraph ??%c converted to %c",
 307                                        note->type,
 308                                        (int) _cpp_trigraph_map[note->type]);
 309               else
 310                 {
 311                   cpp_warning_with_line
 312                     (pfile, CPP_W_TRIGRAPHS,
 313                      pfile->line_table->highest_line, col,
 314                      "trigraph ??%c ignored, use -trigraphs to enable",
 315                      note->type);
 316                 }
 317             }
 318         }
 319       else if (note->type == 0)
 320         /* Already processed in lex_raw_string.  */;
 321       else
 322         abort ();
 323     }
 324 }
 325
 326 /* Skip a C-style block comment.  We find the end of the comment by
 327    seeing if an asterisk is before every '/' we encounter.  Returns
 328    nonzero if comment terminated by EOF, zero otherwise.
 329
 330    Buffer->cur points to the initial asterisk of the comment.  */
 331 bool
 332 _cpp_skip_block_comment (cpp_reader *pfile)
 333 {
 334   cpp_buffer *buffer = pfile->buffer;
 335   const uchar *cur = buffer->cur;
 336   uchar c;
 337
 338   cur++;
 339   if (*cur == '/')
 340     cur++;
 341
 342   for (;;)
 343     {
 344       /* People like decorating comments with '*', so check for '/'
 345          instead for efficiency.  */
 346       c = *cur++;
 347
 348       if (c == '/')
 349         {
 350           if (cur[-2] == '*')
 351             break;
 352
 353           /* Warn about potential nested comments, but not if the '/'
 354              comes immediately before the true comment delimiter.
 355              Don't bother to get it right across escaped newlines.  */
 356           if (CPP_OPTION (pfile, warn_comments)
 357               && cur[0] == '*' && cur[1] != '/')
 358             {
 359               buffer->cur = cur;
 360               cpp_warning_with_line (pfile, CPP_W_COMMENTS,
 361                                      pfile->line_table->highest_line,
 362                                      CPP_BUF_COL (buffer),
 363                                      "\"/*\" within comment");
 364             }
 365         }
 366       else if (c == '\n')
 367         {
 368           unsigned int cols;
 369           buffer->cur = cur - 1;
 370           _cpp_process_line_notes (pfile, true);
 371           if (buffer->next_line >= buffer->rlimit)
 372             return true;
 373           _cpp_clean_line (pfile);
 374
 375           cols = buffer->next_line - buffer->line_base;
 376           CPP_INCREMENT_LINE (pfile, cols);
 377
 378           cur = buffer->cur;
 379         }
 380     }
 381
 382   buffer->cur = cur;
 383   _cpp_process_line_notes (pfile, true);
 384   return false;
 385 }
 386
 387 /* Skip a C++ line comment, leaving buffer->cur pointing to the
 388    terminating newline.  Handles escaped newlines.  Returns nonzero
 389    if a multiline comment.  */
 390 static int
 391 skip_line_comment (cpp_reader *pfile)
 392 {
 393   cpp_buffer *buffer = pfile->buffer;
 394   source_location orig_line = pfile->line_table->highest_line;
 395
 396   while (*buffer->cur != '\n')
 397     buffer->cur++;
 398
 399   _cpp_process_line_notes (pfile, true);
 400   return orig_line != pfile->line_table->highest_line;
 401 }
 402
 403 /* Skips whitespace, saving the next non-whitespace character.  */
 404 static void
 405 skip_whitespace (cpp_reader *pfile, cppchar_t c)
 406 {
 407   cpp_buffer *buffer = pfile->buffer;
 408   bool saw_NUL = false;
 409
 410   do
 411     {
 412       /* Horizontal space always OK.  */
 413       if (c == ' ' || c == '\t')
 414         ;
 415       /* Just \f \v or \0 left.  */
 416       else if (c == '\0')
 417         saw_NUL = true;
 418       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 419         cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
 420                              CPP_BUF_COL (buffer),
 421                              "%s in preprocessing directive",
 422                              c == '\f' ? "form feed" : "vertical tab");
 423
 424       c = *buffer->cur++;
 425     }
 426   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 427   while (is_nvspace (c));
 428
 429   if (saw_NUL)
 430     cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
 431
 432   buffer->cur--;
 433 }
 434
 435 /* See if the characters of a number token are valid in a name (no
 436    '.', '+' or '-').  */
 437 static int
 438 name_p (cpp_reader *pfile, const cpp_string *string)
 439 {
 440   unsigned int i;
 441
 442   for (i = 0; i < string->len; i++)
 443     if (!is_idchar (string->text[i]))
 444       return 0;
 445
 446   return 1;
 447 }
 448
 449 /* After parsing an identifier or other sequence, produce a warning about
 450    sequences not in NFC/NFKC.  */
 451 static void
 452 warn_about_normalization (cpp_reader *pfile,
 453                           const cpp_token *token,
 454                           const struct normalize_state *s)
 455 {
 456   if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
 457       && !pfile->state.skipping)
 458     {
 459       /* Make sure that the token is printed using UCNs, even
 460          if we'd otherwise happily print UTF-8.  */
 461       unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
 462       size_t sz;
 463
 464       sz = cpp_spell_token (pfile, token, buf, false) - buf;
 465       if (NORMALIZE_STATE_RESULT (s) == normalized_C)
 466         cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
 467                                "`%.*s' is not in NFKC", (int) sz, buf);
 468       else
 469         cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
 470                                "`%.*s' is not in NFC", (int) sz, buf);
 471     }
 472 }
 473
 474 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
 475    an identifier.  FIRST is TRUE if this starts an identifier.  */
 476 static bool
 477 forms_identifier_p (cpp_reader *pfile, int first,
 478                     struct normalize_state *state)
 479 {
 480   cpp_buffer *buffer = pfile->buffer;
 481
 482   if (*buffer->cur == '$')
 483     {
 484       if (!CPP_OPTION (pfile, dollars_in_ident))
 485         return false;
 486
 487       buffer->cur++;
 488       if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
 489         {
 490           CPP_OPTION (pfile, warn_dollars) = 0;
 491           cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
 492         }
 493
 494       return true;
 495     }
 496
 497   /* Is this a syntactically valid UCN?  */
 498   if (CPP_OPTION (pfile, extended_identifiers)
 499       && *buffer->cur == '\\'
 500       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
 501     {
 502       buffer->cur += 2;
 503       if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
 504                           state))
 505         return true;
 506       buffer->cur -= 2;
 507     }
 508
 509   return false;
 510 }
 511
 512 /* Helper function to get the cpp_hashnode of the identifier BASE.  */
 513 static cpp_hashnode *
 514 lex_identifier_intern (cpp_reader *pfile, const uchar *base)
 515 {
 516   cpp_hashnode *result;
 517   const uchar *cur;
 518   unsigned int len;
 519   unsigned int hash = HT_HASHSTEP (0, *base);
 520
 521   cur = base + 1;
 522   while (ISIDNUM (*cur))
 523     {
 524       hash = HT_HASHSTEP (hash, *cur);
 525       cur++;
 526     }
 527   len = cur - base;
 528   hash = HT_HASHFINISH (hash, len);
 529   result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
 530                                               base, len, hash, HT_ALLOC));
 531
 532   /* Rarely, identifiers require diagnostics when lexed.  */
 533   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 534                         && !pfile->state.skipping, 0))
 535     {
 536       /* It is allowed to poison the same identifier twice.  */
 537       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 538         cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
 539                    NODE_NAME (result));
 540
 541       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 542          replacement list of a variadic macro.  */
 543       if (result == pfile->spec_nodes.n__VA_ARGS__
 544           && !pfile->state.va_args_ok)
 545         cpp_error (pfile, CPP_DL_PEDWARN,
 546                    "__VA_ARGS__ can only appear in the expansion"
 547                    " of a C99 variadic macro");
 548
 549       /* For -Wc++-compat, warn about use of C++ named operators.  */
 550       if (result->flags & NODE_WARN_OPERATOR)
 551         cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
 552                      "identifier \"%s\" is a special operator name in C++",
 553                      NODE_NAME (result));
 554     }
 555
 556   return result;
 557 }
 558
 559 /* Get the cpp_hashnode of an identifier specified by NAME in
 560    the current cpp_reader object.  If none is found, NULL is returned.  */
 561 cpp_hashnode *
 562 _cpp_lex_identifier (cpp_reader *pfile, const char *name)
 563 {
 564   cpp_hashnode *result;
 565   result = lex_identifier_intern (pfile, (uchar *) name);
 566   return result;
 567 }
 568
 569 /* Lex an identifier starting at BUFFER->CUR - 1.  */
 570 static cpp_hashnode *
 571 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
 572                 struct normalize_state *nst)
 573 {
 574   cpp_hashnode *result;
 575   const uchar *cur;
 576   unsigned int len;
 577   unsigned int hash = HT_HASHSTEP (0, *base);
 578
 579   cur = pfile->buffer->cur;
 580   if (! starts_ucn)
 581     while (ISIDNUM (*cur))
 582       {
 583         hash = HT_HASHSTEP (hash, *cur);
 584         cur++;
 585       }
 586   pfile->buffer->cur = cur;
 587   if (starts_ucn || forms_identifier_p (pfile, false, nst))
 588     {
 589       /* Slower version for identifiers containing UCNs (or $).  */
 590       do {
 591         while (ISIDNUM (*pfile->buffer->cur))
 592           {
 593             pfile->buffer->cur++;
 594             NORMALIZE_STATE_UPDATE_IDNUM (nst);
 595           }
 596       } while (forms_identifier_p (pfile, false, nst));
 597       result = _cpp_interpret_identifier (pfile, base,
 598                                           pfile->buffer->cur - base);
 599     }
 600   else
 601     {
 602       len = cur - base;
 603       hash = HT_HASHFINISH (hash, len);
 604
 605       result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
 606                                                   base, len, hash, HT_ALLOC));
 607     }
 608
 609   /* Rarely, identifiers require diagnostics when lexed.  */
 610   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 611                         && !pfile->state.skipping, 0))
 612     {
 613       /* It is allowed to poison the same identifier twice.  */
 614       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 615         cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
 616                    NODE_NAME (result));
 617
 618       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 619          replacement list of a variadic macro.  */
 620       if (result == pfile->spec_nodes.n__VA_ARGS__
 621           && !pfile->state.va_args_ok)
 622         cpp_error (pfile, CPP_DL_PEDWARN,
 623                    "__VA_ARGS__ can only appear in the expansion"
 624                    " of a C99 variadic macro");
 625
 626       /* For -Wc++-compat, warn about use of C++ named operators.  */
 627       if (result->flags & NODE_WARN_OPERATOR)
 628         cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
 629                      "identifier \"%s\" is a special operator name in C++",
 630                      NODE_NAME (result));
 631     }
 632
 633   return result;
 634 }
 635
 636 /* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
 637 static void
 638 lex_number (cpp_reader *pfile, cpp_string *number,
 639             struct normalize_state *nst)
 640 {
 641   const uchar *cur;
 642   const uchar *base;
 643   uchar *dest;
 644
 645   base = pfile->buffer->cur - 1;
 646   do
 647     {
 648       cur = pfile->buffer->cur;
 649
 650       /* N.B. ISIDNUM does not include $.  */
 651       while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
 652         {
 653           cur++;
 654           NORMALIZE_STATE_UPDATE_IDNUM (nst);
 655         }
 656
 657       pfile->buffer->cur = cur;
 658     }
 659   while (forms_identifier_p (pfile, false, nst));
 660
 661   number->len = cur - base;
 662   dest = _cpp_unaligned_alloc (pfile, number->len + 1);
 663   memcpy (dest, base, number->len);
 664   dest[number->len] = '\0';
 665   number->text = dest;
 666 }
 667
 668 /* Create a token of type TYPE with a literal spelling.  */
 669 static void
 670 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
 671                 unsigned int len, enum cpp_ttype type)
 672 {
 673   uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
 674
 675   memcpy (dest, base, len);
 676   dest[len] = '\0';
 677   token->type = type;
 678   token->val.str.len = len;
 679   token->val.str.text = dest;
 680 }
 681
 682 /* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
 683    sequence from *FIRST_BUFF_P to LAST_BUFF_P.  */
 684
 685 static void
 686 bufring_append (cpp_reader *pfile, const uchar *base, size_t len,
 687                 _cpp_buff **first_buff_p, _cpp_buff **last_buff_p)
 688 {
 689   _cpp_buff *first_buff = *first_buff_p;
 690   _cpp_buff *last_buff = *last_buff_p;
 691
 692   if (first_buff == NULL)
 693     first_buff = last_buff = _cpp_get_buff (pfile, len);
 694   else if (len > BUFF_ROOM (last_buff))
 695     {
 696       size_t room = BUFF_ROOM (last_buff);
 697       memcpy (BUFF_FRONT (last_buff), base, room);
 698       BUFF_FRONT (last_buff) += room;
 699       base += room;
 700       len -= room;
 701       last_buff = _cpp_append_extend_buff (pfile, last_buff, len);
 702     }
 703
 704   memcpy (BUFF_FRONT (last_buff), base, len);
 705   BUFF_FRONT (last_buff) += len;
 706
 707   *first_buff_p = first_buff;
 708   *last_buff_p = last_buff;
 709 }
 710
 711 /* Lexes a raw string.  The stored string contains the spelling, including
 712    double quotes, delimiter string, '(' and ')', any leading
 713    'L', 'u', 'U' or 'u8' and 'R' modifier.  It returns the type of the
 714    literal, or CPP_OTHER if it was not properly terminated.
 715
 716    The spelling is NUL-terminated, but it is not guaranteed that this
 717    is the first NUL since embedded NULs are preserved.  */
 718
 719 static void
 720 lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
 721                 const uchar *cur)
 722 {
 723   source_location saw_NUL = 0;
 724   const uchar *raw_prefix;
 725   unsigned int raw_prefix_len = 0;
 726   enum cpp_ttype type;
 727   size_t total_len = 0;
 728   _cpp_buff *first_buff = NULL, *last_buff = NULL;
 729   _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
 730
 731   type = (*base == 'L' ? CPP_WSTRING :
 732           *base == 'U' ? CPP_STRING32 :
 733           *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
 734           : CPP_STRING);
 735
 736   raw_prefix = cur + 1;
 737   while (raw_prefix_len < 16)
 738     {
 739       switch (raw_prefix[raw_prefix_len])
 740         {
 741         case ' ': case '(': case ')': case '\\': case '\t':
 742         case '\v': case '\f': case '\n': default:
 743           break;
 744         /* Basic source charset except the above chars.  */
 745         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 746         case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 747         case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 748         case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 749         case 'y': case 'z':
 750         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 751         case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 752         case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 753         case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 754         case 'Y': case 'Z':
 755         case '0': case '1': case '2': case '3': case '4': case '5':
 756         case '6': case '7': case '8': case '9':
 757         case '_': case '{': case '}': case '#': case '[': case ']':
 758         case '<': case '>': case '%': case ':': case ';': case '.':
 759         case '?': case '*': case '+': case '-': case '/': case '^':
 760         case '&': case '|': case '~': case '!': case '=': case ',':
 761         case '"': case '\'':
 762           raw_prefix_len++;
 763           continue;
 764         }
 765       break;
 766     }
 767
 768   if (raw_prefix[raw_prefix_len] != '(')
 769     {
 770       int col = CPP_BUF_COLUMN (pfile->buffer, raw_prefix + raw_prefix_len)
 771                 + 1;
 772       if (raw_prefix_len == 16)
 773         cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
 774                              "raw string delimiter longer than 16 characters");
 775       else
 776         cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
 777                              "invalid character '%c' in raw string delimiter",
 778                              (int) raw_prefix[raw_prefix_len]);
 779       pfile->buffer->cur = raw_prefix - 1;
 780       create_literal (pfile, token, base, raw_prefix - 1 - base, CPP_OTHER);
 781       return;
 782     }
 783
 784   cur = raw_prefix + raw_prefix_len + 1;
 785   for (;;)
 786     {
 787 #define BUF_APPEND(STR,LEN)                                     \
 788       do {                                                      \
 789         bufring_append (pfile, (const uchar *)(STR), (LEN),     \
 790                         &first_buff, &last_buff);               \
 791         total_len += (LEN);                                     \
 792       } while (0);
 793
 794       cppchar_t c;
 795
 796       /* If we previously performed any trigraph or line splicing
 797          transformations, undo them within the body of the raw string.  */
 798       while (note->pos < cur)
 799         ++note;
 800       for (; note->pos == cur; ++note)
 801         {
 802           switch (note->type)
 803             {
 804             case '\\':
 805             case ' ':
 806               /* Restore backslash followed by newline.  */
 807               BUF_APPEND (base, cur - base);
 808               base = cur;
 809               BUF_APPEND ("\\", 1);
 810             after_backslash:
 811               if (note->type == ' ')
 812                 {
 813                   /* GNU backslash whitespace newline extension.  FIXME
 814                      could be any sequence of non-vertical space.  When we
 815                      can properly restore any such sequence, we should mark
 816                      this note as handled so _cpp_process_line_notes
 817                      doesn't warn.  */
 818                   BUF_APPEND (" ", 1);
 819                 }
 820
 821               BUF_APPEND ("\n", 1);
 822               break;
 823
 824             case 0:
 825               /* Already handled.  */
 826               break;
 827
 828             default:
 829               if (_cpp_trigraph_map[note->type])
 830                 {
 831                   /* Don't warn about this trigraph in
 832                      _cpp_process_line_notes, since trigraphs show up as
 833                      trigraphs in raw strings.  */
 834                   uchar type = note->type;
 835                   note->type = 0;
 836
 837                   if (!CPP_OPTION (pfile, trigraphs))
 838                     /* If we didn't convert the trigraph in the first
 839                        place, don't do anything now either.  */
 840                     break;
 841
 842                   BUF_APPEND (base, cur - base);
 843                   base = cur;
 844                   BUF_APPEND ("??", 2);
 845
 846                   /* ??/ followed by newline gets two line notes, one for
 847                      the trigraph and one for the backslash/newline.  */
 848                   if (type == '/' && note[1].pos == cur)
 849                     {
 850                       if (note[1].type != '\\'
 851                           && note[1].type != ' ')
 852                         abort ();
 853                       BUF_APPEND ("/", 1);
 854                       ++note;
 855                       goto after_backslash;
 856                     }
 857                   /* The ) from ??) could be part of the suffix.  */
 858                   else if (type == ')'
 859                            && strncmp ((const char *) cur+1,
 860                                        (const char *) raw_prefix,
 861                                        raw_prefix_len) == 0
 862                            && cur[raw_prefix_len+1] == '"')
 863                     {
 864                       cur += raw_prefix_len+2;
 865                       goto break_outer_loop;
 866                     }
 867                   else
 868                     {
 869                       /* Skip the replacement character.  */
 870                       base = ++cur;
 871                       BUF_APPEND (&type, 1);
 872                     }
 873                 }
 874               else
 875                 abort ();
 876               break;
 877             }
 878         }
 879       c = *cur++;
 880
 881       if (c == ')'
 882           && strncmp ((const char *) cur, (const char *) raw_prefix,
 883                       raw_prefix_len) == 0
 884           && cur[raw_prefix_len] == '"')
 885         {
 886           cur += raw_prefix_len + 1;
 887           break;
 888         }
 889       else if (c == '\n')
 890         {
 891           if (pfile->state.in_directive
 892               || pfile->state.parsing_args
 893               || pfile->state.in_deferred_pragma)
 894             {
 895               cur--;
 896               type = CPP_OTHER;
 897               cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
 898                                    "unterminated raw string");
 899               break;
 900             }
 901
 902           BUF_APPEND (base, cur - base);
 903
 904           if (pfile->buffer->cur < pfile->buffer->rlimit)
 905             CPP_INCREMENT_LINE (pfile, 0);
 906           pfile->buffer->need_line = true;
 907
 908           pfile->buffer->cur = cur-1;
 909           _cpp_process_line_notes (pfile, false);
 910           if (!_cpp_get_fresh_line (pfile))
 911             {
 912               source_location src_loc = token->src_loc;
 913               token->type = CPP_EOF;
 914               /* Tell the compiler the line number of the EOF token.  */
 915               token->src_loc = pfile->line_table->highest_line;
 916               token->flags = BOL;
 917               if (first_buff != NULL)
 918                 _cpp_release_buff (pfile, first_buff);
 919               cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
 920                                    "unterminated raw string");
 921               return;
 922             }
 923
 924           cur = base = pfile->buffer->cur;
 925           note = &pfile->buffer->notes[pfile->buffer->cur_note];
 926         }
 927       else if (c == '\0' && !saw_NUL)
 928         LINEMAP_POSITION_FOR_COLUMN (saw_NUL, pfile->line_table,
 929                                      CPP_BUF_COLUMN (pfile->buffer, cur));
 930     }
 931  break_outer_loop:
 932
 933   if (saw_NUL && !pfile->state.skipping)
 934     cpp_error_with_line (pfile, CPP_DL_WARNING, saw_NUL, 0,
 935                "null character(s) preserved in literal");
 936
 937   pfile->buffer->cur = cur;
 938   if (first_buff == NULL)
 939     create_literal (pfile, token, base, cur - base, type);
 940   else
 941     {
 942       uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1);
 943
 944       token->type = type;
 945       token->val.str.len = total_len + (cur - base);
 946       token->val.str.text = dest;
 947       last_buff = first_buff;
 948       while (last_buff != NULL)
 949         {
 950           memcpy (dest, last_buff->base,
 951                   BUFF_FRONT (last_buff) - last_buff->base);
 952           dest += BUFF_FRONT (last_buff) - last_buff->base;
 953           last_buff = last_buff->next;
 954         }
 955       _cpp_release_buff (pfile, first_buff);
 956       memcpy (dest, base, cur - base);
 957       dest[cur - base] = '\0';
 958     }
 959 }
 960
 961 /* Lexes a string, character constant, or angle-bracketed header file
 962    name.  The stored string contains the spelling, including opening
 963    quote and any leading 'L', 'u', 'U' or 'u8' and optional
 964    'R' modifier.  It returns the type of the literal, or CPP_OTHER
 965    if it was not properly terminated, or CPP_LESS for an unterminated
 966    header name which must be relexed as normal tokens.
 967
 968    The spelling is NUL-terminated, but it is not guaranteed that this
 969    is the first NUL since embedded NULs are preserved.  */
 970 static void
 971 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
 972 {
 973   bool saw_NUL = false;
 974   const uchar *cur;
 975   cppchar_t terminator;
 976   enum cpp_ttype type;
 977
 978   cur = base;
 979   terminator = *cur++;
 980   if (terminator == 'L' || terminator == 'U')
 981     terminator = *cur++;
 982   else if (terminator == 'u')
 983     {
 984       terminator = *cur++;
 985       if (terminator == '8')
 986         terminator = *cur++;
 987     }
 988   if (terminator == 'R')
 989     {
 990       lex_raw_string (pfile, token, base, cur);
 991       return;
 992     }
 993   if (terminator == '"')
 994     type = (*base == 'L' ? CPP_WSTRING :
 995             *base == 'U' ? CPP_STRING32 :
 996             *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
 997                          : CPP_STRING);
 998   else if (terminator == '\'')
 999     type = (*base == 'L' ? CPP_WCHAR :
1000             *base == 'U' ? CPP_CHAR32 :
1001             *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
1002   else
1003     terminator = '>', type = CPP_HEADER_NAME;
1004
1005   for (;;)
1006     {
1007       cppchar_t c = *cur++;
1008
1009       /* In #include-style directives, terminators are not escapable.  */
1010       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
1011         cur++;
1012       else if (c == terminator)
1013         break;
1014       else if (c == '\n')
1015         {
1016           cur--;
1017           /* Unmatched quotes always yield undefined behavior, but
1018              greedy lexing means that what appears to be an unterminated
1019              header name may actually be a legitimate sequence of tokens.  */
1020           if (terminator == '>')
1021             {
1022               token->type = CPP_LESS;
1023               return;
1024             }
1025           type = CPP_OTHER;
1026           break;
1027         }
1028       else if (c == '\0')
1029         saw_NUL = true;
1030     }
1031
1032   if (saw_NUL && !pfile->state.skipping)
1033     cpp_error (pfile, CPP_DL_WARNING,
1034                "null character(s) preserved in literal");
1035
1036   if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
1037     cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
1038                (int) terminator);
1039
1040   pfile->buffer->cur = cur;
1041   create_literal (pfile, token, base, cur - base, type);
1042 }
1043
1044 /* Return the comment table. The client may not make any assumption
1045    about the ordering of the table.  */
1046 cpp_comment_table *
1047 cpp_get_comments (cpp_reader *pfile)
1048 {
1049   return &pfile->comments;
1050 }
1051
1052 /* Append a comment to the end of the comment table. */
1053 static void
1054 store_comment (cpp_reader *pfile, cpp_token *token)
1055 {
1056   int len;
1057
1058   if (pfile->comments.allocated == 0)
1059     {
1060       pfile->comments.allocated = 256;
1061       pfile->comments.entries = (cpp_comment *) xmalloc
1062         (pfile->comments.allocated * sizeof (cpp_comment));
1063     }
1064
1065   if (pfile->comments.count == pfile->comments.allocated)
1066     {
1067       pfile->comments.allocated *= 2;
1068       pfile->comments.entries = (cpp_comment *) xrealloc
1069         (pfile->comments.entries,
1070          pfile->comments.allocated * sizeof (cpp_comment));
1071     }
1072
1073   len = token->val.str.len;
1074
1075   /* Copy comment. Note, token may not be NULL terminated. */
1076   pfile->comments.entries[pfile->comments.count].comment =
1077     (char *) xmalloc (sizeof (char) * (len + 1));
1078   memcpy (pfile->comments.entries[pfile->comments.count].comment,
1079           token->val.str.text, len);
1080   pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
1081
1082   /* Set source location. */
1083   pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
1084
1085   /* Increment the count of entries in the comment table. */
1086   pfile->comments.count++;
1087 }
1088
1089 /* The stored comment includes the comment start and any terminator.  */
1090 static void
1091 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
1092               cppchar_t type)
1093 {
1094   unsigned char *buffer;
1095   unsigned int len, clen;
1096
1097   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
1098
1099   /* C++ comments probably (not definitely) have moved past a new
1100      line, which we don't want to save in the comment.  */
1101   if (is_vspace (pfile->buffer->cur[-1]))
1102     len--;
1103
1104   /* If we are currently in a directive, then we need to store all
1105      C++ comments as C comments internally, and so we need to
1106      allocate a little extra space in that case.
1107
1108      Note that the only time we encounter a directive here is
1109      when we are saving comments in a "#define".  */
1110   clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
1111
1112   buffer = _cpp_unaligned_alloc (pfile, clen);
1113
1114   token->type = CPP_COMMENT;
1115   token->val.str.len = clen;
1116   token->val.str.text = buffer;
1117
1118   buffer[0] = '/';
1119   memcpy (buffer + 1, from, len - 1);
1120
1121   /* Finish conversion to a C comment, if necessary.  */
1122   if (pfile->state.in_directive && type == '/')
1123     {
1124       buffer[1] = '*';
1125       buffer[clen - 2] = '*';
1126       buffer[clen - 1] = '/';
1127     }
1128
1129   /* Finally store this comment for use by clients of libcpp. */
1130   store_comment (pfile, token);
1131 }
1132
1133 /* Allocate COUNT tokens for RUN.  */
1134 void
1135 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
1136 {
1137   run->base = XNEWVEC (cpp_token, count);
1138   run->limit = run->base + count;
1139   run->next = NULL;
1140 }
1141
1142 /* Returns the next tokenrun, or creates one if there is none.  */
1143 static tokenrun *
1144 next_tokenrun (tokenrun *run)
1145 {
1146   if (run->next == NULL)
1147     {
1148       run->next = XNEW (tokenrun);
1149       run->next->prev = run;
1150       _cpp_init_tokenrun (run->next, 250);
1151     }
1152
1153   return run->next;
1154 }
1155
1156 /* Look ahead in the input stream.  */
1157 const cpp_token *
1158 cpp_peek_token (cpp_reader *pfile, int index)
1159 {
1160   cpp_context *context = pfile->context;
1161   const cpp_token *peektok;
1162   int count;
1163
1164   /* First, scan through any pending cpp_context objects.  */
1165   while (context->prev)
1166     {
1167       ptrdiff_t sz = (context->direct_p
1168                       ? LAST (context).token - FIRST (context).token
1169                       : LAST (context).ptoken - FIRST (context).ptoken);
1170
1171       if (index < (int) sz)
1172         return (context->direct_p
1173                 ? FIRST (context).token + index
1174                 : *(FIRST (context).ptoken + index));
1175
1176       index -= (int) sz;
1177       context = context->prev;
1178     }
1179
1180   /* We will have to read some new tokens after all (and do so
1181      without invalidating preceding tokens).  */
1182   count = index;
1183   pfile->keep_tokens++;
1184
1185   do
1186     {
1187       peektok = _cpp_lex_token (pfile);
1188       if (peektok->type == CPP_EOF)
1189         return peektok;
1190     }
1191   while (index--);
1192
1193   _cpp_backup_tokens_direct (pfile, count + 1);
1194   pfile->keep_tokens--;
1195
1196   return peektok;
1197 }
1198
1199 /* Allocate a single token that is invalidated at the same time as the
1200    rest of the tokens on the line.  Has its line and col set to the
1201    same as the last lexed token, so that diagnostics appear in the
1202    right place.  */
1203 cpp_token *
1204 _cpp_temp_token (cpp_reader *pfile)
1205 {
1206   cpp_token *old, *result;
1207   ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
1208   ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
1209
1210   old = pfile->cur_token - 1;
1211   /* Any pre-existing lookaheads must not be clobbered.  */
1212   if (la)
1213     {
1214       if (sz <= la)
1215         {
1216           tokenrun *next = next_tokenrun (pfile->cur_run);
1217
1218           if (sz < la)
1219             memmove (next->base + 1, next->base,
1220                      (la - sz) * sizeof (cpp_token));
1221
1222           next->base[0] = pfile->cur_run->limit[-1];
1223         }
1224
1225       if (sz > 1)
1226         memmove (pfile->cur_token + 1, pfile->cur_token,
1227                  MIN (la, sz - 1) * sizeof (cpp_token));
1228     }
1229
1230   if (!sz && pfile->cur_token == pfile->cur_run->limit)
1231     {
1232       pfile->cur_run = next_tokenrun (pfile->cur_run);
1233       pfile->cur_token = pfile->cur_run->base;
1234     }
1235
1236   result = pfile->cur_token++;
1237   result->src_loc = old->src_loc;
1238   return result;
1239 }
1240
1241 /* Lex a token into RESULT (external interface).  Takes care of issues
1242    like directive handling, token lookahead, multiple include
1243    optimization and skipping.  */
1244 const cpp_token *
1245 _cpp_lex_token (cpp_reader *pfile)
1246 {
1247   cpp_token *result;
1248
1249   for (;;)
1250     {
1251       if (pfile->cur_token == pfile->cur_run->limit)
1252         {
1253           pfile->cur_run = next_tokenrun (pfile->cur_run);
1254           pfile->cur_token = pfile->cur_run->base;
1255         }
1256       /* We assume that the current token is somewhere in the current
1257          run.  */
1258       if (pfile->cur_token < pfile->cur_run->base
1259           || pfile->cur_token >= pfile->cur_run->limit)
1260         abort ();
1261
1262       if (pfile->lookaheads)
1263         {
1264           pfile->lookaheads--;
1265           result = pfile->cur_token++;
1266         }
1267       else
1268         result = _cpp_lex_direct (pfile);
1269
1270       if (result->flags & BOL)
1271         {
1272           /* Is this a directive.  If _cpp_handle_directive returns
1273              false, it is an assembler #.  */
1274           if (result->type == CPP_HASH
1275               /* 6.10.3 p 11: Directives in a list of macro arguments
1276                  gives undefined behavior.  This implementation
1277                  handles the directive as normal.  */
1278               && pfile->state.parsing_args != 1)
1279             {
1280               if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1281                 {
1282                   if (pfile->directive_result.type == CPP_PADDING)
1283                     continue;
1284                   result = &pfile->directive_result;
1285                 }
1286             }
1287           else if (pfile->state.in_deferred_pragma)
1288             result = &pfile->directive_result;
1289
1290           if (pfile->cb.line_change && !pfile->state.skipping)
1291             pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
1292         }
1293
1294       /* We don't skip tokens in directives.  */
1295       if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
1296         break;
1297
1298       /* Outside a directive, invalidate controlling macros.  At file
1299          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
1300          get here and MI optimization works.  */
1301       pfile->mi_valid = false;
1302
1303       if (!pfile->state.skipping || result->type == CPP_EOF)
1304         break;
1305     }
1306
1307   return result;
1308 }
1309
1310 /* Returns true if a fresh line has been loaded.  */
1311 bool
1312 _cpp_get_fresh_line (cpp_reader *pfile)
1313 {
1314   int return_at_eof;
1315
1316   /* We can't get a new line until we leave the current directive.  */
1317   if (pfile->state.in_directive)
1318     return false;
1319
1320   for (;;)
1321     {
1322       cpp_buffer *buffer = pfile->buffer;
1323
1324       if (!buffer->need_line)
1325         return true;
1326
1327       if (buffer->next_line < buffer->rlimit)
1328         {
1329           _cpp_clean_line (pfile);
1330           return true;
1331         }
1332
1333       /* First, get out of parsing arguments state.  */
1334       if (pfile->state.parsing_args)
1335         return false;
1336
1337       /* End of buffer.  Non-empty files should end in a newline.  */
1338       if (buffer->buf != buffer->rlimit
1339           && buffer->next_line > buffer->rlimit
1340           && !buffer->from_stage3)
1341         {
1342           /* Clip to buffer size.  */
1343           buffer->next_line = buffer->rlimit;
1344         }
1345
1346       return_at_eof = buffer->return_at_eof;
1347       _cpp_pop_buffer (pfile);
1348       if (pfile->buffer == NULL || return_at_eof)
1349         return false;
1350     }
1351 }
1352
1353 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)          \
1354   do                                                    \
1355     {                                                   \
1356       result->type = ELSE_TYPE;                         \
1357       if (*buffer->cur == CHAR)                         \
1358         buffer->cur++, result->type = THEN_TYPE;        \
1359     }                                                   \
1360   while (0)
1361
1362 /* Lex a token into pfile->cur_token, which is also incremented, to
1363    get diagnostics pointing to the correct location.
1364
1365    Does not handle issues such as token lookahead, multiple-include
1366    optimization, directives, skipping etc.  This function is only
1367    suitable for use by _cpp_lex_token, and in special cases like
1368    lex_expansion_token which doesn't care for any of these issues.
1369
1370    When meeting a newline, returns CPP_EOF if parsing a directive,
1371    otherwise returns to the start of the token buffer if permissible.
1372    Returns the location of the lexed token.  */
1373 cpp_token *
1374 _cpp_lex_direct (cpp_reader *pfile)
1375 {
1376   cppchar_t c;
1377   cpp_buffer *buffer;
1378   const unsigned char *comment_start;
1379   cpp_token *result = pfile->cur_token++;
1380
1381  fresh_line:
1382   result->flags = 0;
1383   buffer = pfile->buffer;
1384   if (buffer->need_line)
1385     {
1386       if (pfile->state.in_deferred_pragma)
1387         {
1388           result->type = CPP_PRAGMA_EOL;
1389           pfile->state.in_deferred_pragma = false;
1390           if (!pfile->state.pragma_allow_expansion)
1391             pfile->state.prevent_expansion--;
1392           return result;
1393         }
1394       if (!_cpp_get_fresh_line (pfile))
1395         {
1396           result->type = CPP_EOF;
1397           if (!pfile->state.in_directive)
1398             {
1399               /* Tell the compiler the line number of the EOF token.  */
1400               result->src_loc = pfile->line_table->highest_line;
1401               result->flags = BOL;
1402             }
1403           return result;
1404         }
1405       if (!pfile->keep_tokens)
1406         {
1407           pfile->cur_run = &pfile->base_run;
1408           result = pfile->base_run.base;
1409           pfile->cur_token = result + 1;
1410         }
1411       result->flags = BOL;
1412       if (pfile->state.parsing_args == 2)
1413         result->flags |= PREV_WHITE;
1414     }
1415   buffer = pfile->buffer;
1416  update_tokens_line:
1417   result->src_loc = pfile->line_table->highest_line;
1418
1419  skipped_white:
1420   if (buffer->cur >= buffer->notes[buffer->cur_note].pos
1421       && !pfile->overlaid_buffer)
1422     {
1423       _cpp_process_line_notes (pfile, false);
1424       result->src_loc = pfile->line_table->highest_line;
1425     }
1426   c = *buffer->cur++;
1427
1428   LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
1429                                CPP_BUF_COLUMN (buffer, buffer->cur));
1430
1431   switch (c)
1432     {
1433     case ' ': case '\t': case '\f': case '\v': case '\0':
1434       result->flags |= PREV_WHITE;
1435       skip_whitespace (pfile, c);
1436       goto skipped_white;
1437
1438     case '\n':
1439       if (buffer->cur < buffer->rlimit)
1440         CPP_INCREMENT_LINE (pfile, 0);
1441       buffer->need_line = true;
1442       goto fresh_line;
1443
1444     case '0': case '1': case '2': case '3': case '4':
1445     case '5': case '6': case '7': case '8': case '9':
1446       {
1447         struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1448         result->type = CPP_NUMBER;
1449         lex_number (pfile, &result->val.str, &nst);
1450         warn_about_normalization (pfile, result, &nst);
1451         break;
1452       }
1453
1454     case 'L':
1455     case 'u':
1456     case 'U':
1457     case 'R':
1458       /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
1459          wide strings or raw strings.  */
1460       if (c == 'L' || CPP_OPTION (pfile, uliterals))
1461         {
1462           if ((*buffer->cur == '\'' && c != 'R')
1463               || *buffer->cur == '"'
1464               || (*buffer->cur == 'R'
1465                   && c != 'R'
1466                   && buffer->cur[1] == '"'
1467                   && CPP_OPTION (pfile, uliterals))
1468               || (*buffer->cur == '8'
1469                   && c == 'u'
1470                   && (buffer->cur[1] == '"'
1471                       || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'))))
1472             {
1473               lex_string (pfile, result, buffer->cur - 1);
1474               break;
1475             }
1476         }
1477       /* Fall through.  */
1478
1479     case '_':
1480     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1481     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1482     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1483     case 's': case 't':           case 'v': case 'w': case 'x':
1484     case 'y': case 'z':
1485     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1486     case 'G': case 'H': case 'I': case 'J': case 'K':
1487     case 'M': case 'N': case 'O': case 'P': case 'Q':
1488     case 'S': case 'T':           case 'V': case 'W': case 'X':
1489     case 'Y': case 'Z':
1490       result->type = CPP_NAME;
1491       {
1492         struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1493         result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
1494                                                 &nst);
1495         warn_about_normalization (pfile, result, &nst);
1496       }
1497
1498       /* Convert named operators to their proper types.  */
1499       if (result->val.node.node->flags & NODE_OPERATOR)
1500         {
1501           result->flags |= NAMED_OP;
1502           result->type = (enum cpp_ttype) result->val.node.node->directive_index;
1503         }
1504       break;
1505
1506     case '\'':
1507     case '"':
1508       lex_string (pfile, result, buffer->cur - 1);
1509       break;
1510
1511     case '/':
1512       /* A potential block or line comment.  */
1513       comment_start = buffer->cur;
1514       c = *buffer->cur;
1515
1516       if (c == '*')
1517         {
1518           if (_cpp_skip_block_comment (pfile))
1519             cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
1520         }
1521       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1522                             || cpp_in_system_header (pfile)))
1523         {
1524           /* Warn about comments only if pedantically GNUC89, and not
1525              in system headers.  */
1526           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1527               && ! buffer->warned_cplusplus_comments)
1528             {
1529               cpp_error (pfile, CPP_DL_PEDWARN,
1530                          "C++ style comments are not allowed in ISO C90");
1531               cpp_error (pfile, CPP_DL_PEDWARN,
1532                          "(this will be reported only once per input file)");
1533               buffer->warned_cplusplus_comments = 1;
1534             }
1535
1536           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1537             cpp_warning (pfile, CPP_W_COMMENTS, "multi-line comment");
1538         }
1539       else if (c == '=')
1540         {
1541           buffer->cur++;
1542           result->type = CPP_DIV_EQ;
1543           break;
1544         }
1545       else
1546         {
1547           result->type = CPP_DIV;
1548           break;
1549         }
1550
1551       if (!pfile->state.save_comments)
1552         {
1553           result->flags |= PREV_WHITE;
1554           goto update_tokens_line;
1555         }
1556
1557       /* Save the comment as a token in its own right.  */
1558       save_comment (pfile, result, comment_start, c);
1559       break;
1560
1561     case '<':
1562       if (pfile->state.angled_headers)
1563         {
1564           lex_string (pfile, result, buffer->cur - 1);
1565           if (result->type != CPP_LESS)
1566             break;
1567         }
1568
1569       result->type = CPP_LESS;
1570       if (*buffer->cur == '=')
1571         buffer->cur++, result->type = CPP_LESS_EQ;
1572       else if (*buffer->cur == '<')
1573         {
1574           buffer->cur++;
1575           IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1576         }
1577       else if (CPP_OPTION (pfile, digraphs))
1578         {
1579           if (*buffer->cur == ':')
1580             {
1581               buffer->cur++;
1582               result->flags |= DIGRAPH;
1583               result->type = CPP_OPEN_SQUARE;
1584             }
1585           else if (*buffer->cur == '%')
1586             {
1587               buffer->cur++;
1588               result->flags |= DIGRAPH;
1589               result->type = CPP_OPEN_BRACE;
1590             }
1591         }
1592       break;
1593
1594     case '>':
1595       result->type = CPP_GREATER;
1596       if (*buffer->cur == '=')
1597         buffer->cur++, result->type = CPP_GREATER_EQ;
1598       else if (*buffer->cur == '>')
1599         {
1600           buffer->cur++;
1601           IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1602         }
1603       break;
1604
1605     case '%':
1606       result->type = CPP_MOD;
1607       if (*buffer->cur == '=')
1608         buffer->cur++, result->type = CPP_MOD_EQ;
1609       else if (CPP_OPTION (pfile, digraphs))
1610         {
1611           if (*buffer->cur == ':')
1612             {
1613               buffer->cur++;
1614               result->flags |= DIGRAPH;
1615               result->type = CPP_HASH;
1616               if (*buffer->cur == '%' && buffer->cur[1] == ':')
1617                 buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0;
1618             }
1619           else if (*buffer->cur == '>')
1620             {
1621               buffer->cur++;
1622               result->flags |= DIGRAPH;
1623               result->type = CPP_CLOSE_BRACE;
1624             }
1625         }
1626       break;
1627
1628     case '.':
1629       result->type = CPP_DOT;
1630       if (ISDIGIT (*buffer->cur))
1631         {
1632           struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1633           result->type = CPP_NUMBER;
1634           lex_number (pfile, &result->val.str, &nst);
1635           warn_about_normalization (pfile, result, &nst);
1636         }
1637       else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1638         buffer->cur += 2, result->type = CPP_ELLIPSIS;
1639       else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1640         buffer->cur++, result->type = CPP_DOT_STAR;
1641       break;
1642
1643     case '+':
1644       result->type = CPP_PLUS;
1645       if (*buffer->cur == '+')
1646         buffer->cur++, result->type = CPP_PLUS_PLUS;
1647       else if (*buffer->cur == '=')
1648         buffer->cur++, result->type = CPP_PLUS_EQ;
1649       break;
1650
1651     case '-':
1652       result->type = CPP_MINUS;
1653       if (*buffer->cur == '>')
1654         {
1655           buffer->cur++;
1656           result->type = CPP_DEREF;
1657           if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1658             buffer->cur++, result->type = CPP_DEREF_STAR;
1659         }
1660       else if (*buffer->cur == '-')
1661         buffer->cur++, result->type = CPP_MINUS_MINUS;
1662       else if (*buffer->cur == '=')
1663         buffer->cur++, result->type = CPP_MINUS_EQ;
1664       break;
1665
1666     case '&':
1667       result->type = CPP_AND;
1668       if (*buffer->cur == '&')
1669         buffer->cur++, result->type = CPP_AND_AND;
1670       else if (*buffer->cur == '=')
1671         buffer->cur++, result->type = CPP_AND_EQ;
1672       break;
1673
1674     case '|':
1675       result->type = CPP_OR;
1676       if (*buffer->cur == '|')
1677         buffer->cur++, result->type = CPP_OR_OR;
1678       else if (*buffer->cur == '=')
1679         buffer->cur++, result->type = CPP_OR_EQ;
1680       break;
1681
1682     case ':':
1683       result->type = CPP_COLON;
1684       if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1685         buffer->cur++, result->type = CPP_SCOPE;
1686       else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1687         {
1688           buffer->cur++;
1689           result->flags |= DIGRAPH;
1690           result->type = CPP_CLOSE_SQUARE;
1691         }
1692       break;
1693
1694     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1695     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1696     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1697     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1698     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.token_no = 0; break;
1699
1700     case '?': result->type = CPP_QUERY; break;
1701     case '~': result->type = CPP_COMPL; break;
1702     case ',': result->type = CPP_COMMA; break;
1703     case '(': result->type = CPP_OPEN_PAREN; break;
1704     case ')': result->type = CPP_CLOSE_PAREN; break;
1705     case '[': result->type = CPP_OPEN_SQUARE; break;
1706     case ']': result->type = CPP_CLOSE_SQUARE; break;
1707     case '{': result->type = CPP_OPEN_BRACE; break;
1708     case '}': result->type = CPP_CLOSE_BRACE; break;
1709     case ';': result->type = CPP_SEMICOLON; break;
1710
1711       /* @ is a punctuator in Objective-C.  */
1712     case '@': result->type = CPP_ATSIGN; break;
1713
1714     case '$':
1715     case '\\':
1716       {
1717         const uchar *base = --buffer->cur;
1718         struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1719
1720         if (forms_identifier_p (pfile, true, &nst))
1721           {
1722             result->type = CPP_NAME;
1723             result->val.node.node = lex_identifier (pfile, base, true, &nst);
1724             warn_about_normalization (pfile, result, &nst);
1725             break;
1726           }
1727         buffer->cur++;
1728       }
1729
1730     default:
1731       create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1732       break;
1733     }
1734
1735   return result;
1736 }
1737
1738 /* An upper bound on the number of bytes needed to spell TOKEN.
1739    Does not include preceding whitespace.  */
1740 unsigned int
1741 cpp_token_len (const cpp_token *token)
1742 {
1743   unsigned int len;
1744
1745   switch (TOKEN_SPELL (token))
1746     {
1747     default:            len = 6;                                break;
1748     case SPELL_LITERAL: len = token->val.str.len;               break;
1749     case SPELL_IDENT:   len = NODE_LEN (token->val.node.node) * 10;     break;
1750     }
1751
1752   return len;
1753 }
1754
1755 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1756    Return the number of bytes read out of NAME.  (There are always
1757    10 bytes written to BUFFER.)  */
1758
1759 static size_t
1760 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1761 {
1762   int j;
1763   int ucn_len = 0;
1764   int ucn_len_c;
1765   unsigned t;
1766   unsigned long utf32;
1767
1768   /* Compute the length of the UTF-8 sequence.  */
1769   for (t = *name; t & 0x80; t <<= 1)
1770     ucn_len++;
1771
1772   utf32 = *name & (0x7F >> ucn_len);
1773   for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1774     {
1775       utf32 = (utf32 << 6) | (*++name & 0x3F);
1776
1777       /* Ill-formed UTF-8.  */
1778       if ((*name & ~0x3F) != 0x80)
1779         abort ();
1780     }
1781
1782   *buffer++ = '\\';
1783   *buffer++ = 'U';
1784   for (j = 7; j >= 0; j--)
1785     *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1786   return ucn_len;
1787 }
1788
1789 /* Given a token TYPE corresponding to a digraph, return a pointer to
1790    the spelling of the digraph.  */
1791 static const unsigned char *
1792 cpp_digraph2name (enum cpp_ttype type)
1793 {
1794   return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
1795 }
1796
1797 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1798    already contain the enough space to hold the token's spelling.
1799    Returns a pointer to the character after the last character written.
1800    FORSTRING is true if this is to be the spelling after translation
1801    phase 1 (this is different for UCNs).
1802    FIXME: Would be nice if we didn't need the PFILE argument.  */
1803 unsigned char *
1804 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1805                  unsigned char *buffer, bool forstring)
1806 {
1807   switch (TOKEN_SPELL (token))
1808     {
1809     case SPELL_OPERATOR:
1810       {
1811         const unsigned char *spelling;
1812         unsigned char c;
1813
1814         if (token->flags & DIGRAPH)
1815           spelling = cpp_digraph2name (token->type);
1816         else if (token->flags & NAMED_OP)
1817           goto spell_ident;
1818         else
1819           spelling = TOKEN_NAME (token);
1820
1821         while ((c = *spelling++) != '\0')
1822           *buffer++ = c;
1823       }
1824       break;
1825
1826     spell_ident:
1827     case SPELL_IDENT:
1828       if (forstring)
1829         {
1830           memcpy (buffer, NODE_NAME (token->val.node.node),
1831                   NODE_LEN (token->val.node.node));
1832           buffer += NODE_LEN (token->val.node.node);
1833         }
1834       else
1835         {
1836           size_t i;
1837           const unsigned char * name = NODE_NAME (token->val.node.node);
1838
1839           for (i = 0; i < NODE_LEN (token->val.node.node); i++)
1840             if (name[i] & ~0x7F)
1841               {
1842                 i += utf8_to_ucn (buffer, name + i) - 1;
1843                 buffer += 10;
1844               }
1845             else
1846               *buffer++ = NODE_NAME (token->val.node.node)[i];
1847         }
1848       break;
1849
1850     case SPELL_LITERAL:
1851       memcpy (buffer, token->val.str.text, token->val.str.len);
1852       buffer += token->val.str.len;
1853       break;
1854
1855     case SPELL_NONE:
1856       cpp_error (pfile, CPP_DL_ICE,
1857                  "unspellable token %s", TOKEN_NAME (token));
1858       break;
1859     }
1860
1861   return buffer;
1862 }
1863
1864 /* Returns TOKEN spelt as a null-terminated string.  The string is
1865    freed when the reader is destroyed.  Useful for diagnostics.  */
1866 unsigned char *
1867 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1868 {
1869   unsigned int len = cpp_token_len (token) + 1;
1870   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1871
1872   end = cpp_spell_token (pfile, token, start, false);
1873   end[0] = '\0';
1874
1875   return start;
1876 }
1877
1878 /* Returns a pointer to a string which spells the token defined by
1879    TYPE and FLAGS.  Used by C front ends, which really should move to
1880    using cpp_token_as_text.  */
1881 const char *
1882 cpp_type2name (enum cpp_ttype type, unsigned char flags)
1883 {
1884   if (flags & DIGRAPH)
1885     return (const char *) cpp_digraph2name (type);
1886   else if (flags & NAMED_OP)
1887     return cpp_named_operator2name (type);
1888
1889   return (const char *) token_spellings[type].name;
1890 }
1891
1892 /* Writes the spelling of token to FP, without any preceding space.
1893    Separated from cpp_spell_token for efficiency - to avoid stdio
1894    double-buffering.  */
1895 void
1896 cpp_output_token (const cpp_token *token, FILE *fp)
1897 {
1898   switch (TOKEN_SPELL (token))
1899     {
1900     case SPELL_OPERATOR:
1901       {
1902         const unsigned char *spelling;
1903         int c;
1904
1905         if (token->flags & DIGRAPH)
1906           spelling = cpp_digraph2name (token->type);
1907         else if (token->flags & NAMED_OP)
1908           goto spell_ident;
1909         else
1910           spelling = TOKEN_NAME (token);
1911
1912         c = *spelling;
1913         do
1914           putc (c, fp);
1915         while ((c = *++spelling) != '\0');
1916       }
1917       break;
1918
1919     spell_ident:
1920     case SPELL_IDENT:
1921       {
1922         size_t i;
1923         const unsigned char * name = NODE_NAME (token->val.node.node);
1924
1925         for (i = 0; i < NODE_LEN (token->val.node.node); i++)
1926           if (name[i] & ~0x7F)
1927             {
1928               unsigned char buffer[10];
1929               i += utf8_to_ucn (buffer, name + i) - 1;
1930               fwrite (buffer, 1, 10, fp);
1931             }
1932           else
1933             fputc (NODE_NAME (token->val.node.node)[i], fp);
1934       }
1935       break;
1936
1937     case SPELL_LITERAL:
1938       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1939       break;
1940
1941     case SPELL_NONE:
1942       /* An error, most probably.  */
1943       break;
1944     }
1945 }
1946
1947 /* Compare two tokens.  */
1948 int
1949 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1950 {
1951   if (a->type == b->type && a->flags == b->flags)
1952     switch (TOKEN_SPELL (a))
1953       {
1954       default:                  /* Keep compiler happy.  */
1955       case SPELL_OPERATOR:
1956         /* token_no is used to track where multiple consecutive ##
1957            tokens were originally located.  */
1958         return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
1959       case SPELL_NONE:
1960         return (a->type != CPP_MACRO_ARG
1961                 || a->val.macro_arg.arg_no == b->val.macro_arg.arg_no);
1962       case SPELL_IDENT:
1963         return a->val.node.node == b->val.node.node;
1964       case SPELL_LITERAL:
1965         return (a->val.str.len == b->val.str.len
1966                 && !memcmp (a->val.str.text, b->val.str.text,
1967                             a->val.str.len));
1968       }
1969
1970   return 0;
1971 }
1972
1973 /* Returns nonzero if a space should be inserted to avoid an
1974    accidental token paste for output.  For simplicity, it is
1975    conservative, and occasionally advises a space where one is not
1976    needed, e.g. "." and ".2".  */
1977 int
1978 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1979                  const cpp_token *token2)
1980 {
1981   enum cpp_ttype a = token1->type, b = token2->type;
1982   cppchar_t c;
1983
1984   if (token1->flags & NAMED_OP)
1985     a = CPP_NAME;
1986   if (token2->flags & NAMED_OP)
1987     b = CPP_NAME;
1988
1989   c = EOF;
1990   if (token2->flags & DIGRAPH)
1991     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1992   else if (token_spellings[b].category == SPELL_OPERATOR)
1993     c = token_spellings[b].name[0];
1994
1995   /* Quickly get everything that can paste with an '='.  */
1996   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1997     return 1;
1998
1999   switch (a)
2000     {
2001     case CPP_GREATER:   return c == '>';
2002     case CPP_LESS:      return c == '<' || c == '%' || c == ':';
2003     case CPP_PLUS:      return c == '+';
2004     case CPP_MINUS:     return c == '-' || c == '>';
2005     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
2006     case CPP_MOD:       return c == ':' || c == '>';
2007     case CPP_AND:       return c == '&';
2008     case CPP_OR:        return c == '|';
2009     case CPP_COLON:     return c == ':' || c == '>';
2010     case CPP_DEREF:     return c == '*';
2011     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
2012     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
2013     case CPP_NAME:      return ((b == CPP_NUMBER
2014                                  && name_p (pfile, &token2->val.str))
2015                                 || b == CPP_NAME
2016                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
2017     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
2018                                 || c == '.' || c == '+' || c == '-');
2019                                       /* UCNs */
2020     case CPP_OTHER:     return ((token1->val.str.text[0] == '\\'
2021                                  && b == CPP_NAME)
2022                                 || (CPP_OPTION (pfile, objc)
2023                                     && token1->val.str.text[0] == '@'
2024                                     && (b == CPP_NAME || b == CPP_STRING)));
2025     default:            break;
2026     }
2027
2028   return 0;
2029 }
2030
2031 /* Output all the remaining tokens on the current line, and a newline
2032    character, to FP.  Leading whitespace is removed.  If there are
2033    macros, special token padding is not performed.  */
2034 void
2035 cpp_output_line (cpp_reader *pfile, FILE *fp)
2036 {
2037   const cpp_token *token;
2038
2039   token = cpp_get_token (pfile);
2040   while (token->type != CPP_EOF)
2041     {
2042       cpp_output_token (token, fp);
2043       token = cpp_get_token (pfile);
2044       if (token->flags & PREV_WHITE)
2045         putc (' ', fp);
2046     }
2047
2048   putc ('\n', fp);
2049 }
2050
2051 /* Return a string representation of all the remaining tokens on the
2052    current line.  The result is allocated using xmalloc and must be
2053    freed by the caller.  */
2054 unsigned char *
2055 cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
2056 {
2057   const cpp_token *token;
2058   unsigned int out = dir_name ? ustrlen (dir_name) : 0;
2059   unsigned int alloced = 120 + out;
2060   unsigned char *result = (unsigned char *) xmalloc (alloced);
2061
2062   /* If DIR_NAME is empty, there are no initial contents.  */
2063   if (dir_name)
2064     {
2065       sprintf ((char *) result, "#%s ", dir_name);
2066       out += 2;
2067     }
2068
2069   token = cpp_get_token (pfile);
2070   while (token->type != CPP_EOF)
2071     {
2072       unsigned char *last;
2073       /* Include room for a possible space and the terminating nul.  */
2074       unsigned int len = cpp_token_len (token) + 2;
2075
2076       if (out + len > alloced)
2077         {
2078           alloced *= 2;
2079           if (out + len > alloced)
2080             alloced = out + len;
2081           result = (unsigned char *) xrealloc (result, alloced);
2082         }
2083
2084       last = cpp_spell_token (pfile, token, &result[out], 0);
2085       out = last - result;
2086
2087       token = cpp_get_token (pfile);
2088       if (token->flags & PREV_WHITE)
2089         result[out++] = ' ';
2090     }
2091
2092   result[out] = '\0';
2093   return result;
2094 }
2095
2096 /* Memory buffers.  Changing these three constants can have a dramatic
2097    effect on performance.  The values here are reasonable defaults,
2098    but might be tuned.  If you adjust them, be sure to test across a
2099    range of uses of cpplib, including heavy nested function-like macro
2100    expansion.  Also check the change in peak memory usage (NJAMD is a
2101    good tool for this).  */
2102 #define MIN_BUFF_SIZE 8000
2103 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
2104 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2105         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2106
2107 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2108   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2109 #endif
2110
2111 /* Create a new allocation buffer.  Place the control block at the end
2112    of the buffer, so that buffer overflows will cause immediate chaos.  */
2113 static _cpp_buff *
2114 new_buff (size_t len)
2115 {
2116   _cpp_buff *result;
2117   unsigned char *base;
2118
2119   if (len < MIN_BUFF_SIZE)
2120     len = MIN_BUFF_SIZE;
2121   len = CPP_ALIGN (len);
2122
2123   base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
2124   result = (_cpp_buff *) (base + len);
2125   result->base = base;
2126   result->cur = base;
2127   result->limit = base + len;
2128   result->next = NULL;
2129   return result;
2130 }
2131
2132 /* Place a chain of unwanted allocation buffers on the free list.  */
2133 void
2134 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
2135 {
2136   _cpp_buff *end = buff;
2137
2138   while (end->next)
2139     end = end->next;
2140   end->next = pfile->free_buffs;
2141   pfile->free_buffs = buff;
2142 }
2143
2144 /* Return a free buffer of size at least MIN_SIZE.  */
2145 _cpp_buff *
2146 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
2147 {
2148   _cpp_buff *result, **p;
2149
2150   for (p = &pfile->free_buffs;; p = &(*p)->next)
2151     {
2152       size_t size;
2153
2154       if (*p == NULL)
2155         return new_buff (min_size);
2156       result = *p;
2157       size = result->limit - result->base;
2158       /* Return a buffer that's big enough, but don't waste one that's
2159          way too big.  */
2160       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2161         break;
2162     }
2163
2164   *p = result->next;
2165   result->next = NULL;
2166   result->cur = result->base;
2167   return result;
2168 }
2169
2170 /* Creates a new buffer with enough space to hold the uncommitted
2171    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
2172    the excess bytes to the new buffer.  Chains the new buffer after
2173    BUFF, and returns the new buffer.  */
2174 _cpp_buff *
2175 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
2176 {
2177   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2178   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2179
2180   buff->next = new_buff;
2181   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2182   return new_buff;
2183 }
2184
2185 /* Creates a new buffer with enough space to hold the uncommitted
2186    remaining bytes of the buffer pointed to by BUFF, and at least
2187    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
2188    Chains the new buffer before the buffer pointed to by BUFF, and
2189    updates the pointer to point to the new buffer.  */
2190 void
2191 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
2192 {
2193   _cpp_buff *new_buff, *old_buff = *pbuff;
2194   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2195
2196   new_buff = _cpp_get_buff (pfile, size);
2197   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2198   new_buff->next = old_buff;
2199   *pbuff = new_buff;
2200 }
2201
2202 /* Free a chain of buffers starting at BUFF.  */
2203 void
2204 _cpp_free_buff (_cpp_buff *buff)
2205 {
2206   _cpp_buff *next;
2207
2208   for (; buff; buff = next)
2209     {
2210       next = buff->next;
2211       free (buff->base);
2212     }
2213 }
2214
2215 /* Allocate permanent, unaligned storage of length LEN.  */
2216 unsigned char *
2217 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
2218 {
2219   _cpp_buff *buff = pfile->u_buff;
2220   unsigned char *result = buff->cur;
2221
2222   if (len > (size_t) (buff->limit - result))
2223     {
2224       buff = _cpp_get_buff (pfile, len);
2225       buff->next = pfile->u_buff;
2226       pfile->u_buff = buff;
2227       result = buff->cur;
2228     }
2229
2230   buff->cur = result + len;
2231   return result;
2232 }
2233
2234 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2235    That buffer is used for growing allocations when saving macro
2236    replacement lists in a #define, and when parsing an answer to an
2237    assertion in #assert, #unassert or #if (and therefore possibly
2238    whilst expanding macros).  It therefore must not be used by any
2239    code that they might call: specifically the lexer and the guts of
2240    the macro expander.
2241
2242    All existing other uses clearly fit this restriction: storing
2243    registered pragmas during initialization.  */
2244 unsigned char *
2245 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
2246 {
2247   _cpp_buff *buff = pfile->a_buff;
2248   unsigned char *result = buff->cur;
2249
2250   if (len > (size_t) (buff->limit - result))
2251     {
2252       buff = _cpp_get_buff (pfile, len);
2253       buff->next = pfile->a_buff;
2254       pfile->a_buff = buff;
2255       result = buff->cur;
2256     }
2257
2258   buff->cur = result + len;
2259   return result;
2260 }
2261
2262 /* Say which field of TOK is in use.  */
2263
2264 enum cpp_token_fld_kind
2265 cpp_token_val_index (cpp_token *tok)
2266 {
2267   switch (TOKEN_SPELL (tok))
2268     {
2269     case SPELL_IDENT:
2270       return CPP_TOKEN_FLD_NODE;
2271     case SPELL_LITERAL:
2272       return CPP_TOKEN_FLD_STR;
2273     case SPELL_OPERATOR:
2274       if (tok->type == CPP_PASTE)
2275         return CPP_TOKEN_FLD_TOKEN_NO;
2276       else
2277         return CPP_TOKEN_FLD_NONE;
2278     case SPELL_NONE:
2279       if (tok->type == CPP_MACRO_ARG)
2280         return CPP_TOKEN_FLD_ARG_NO;
2281       else if (tok->type == CPP_PADDING)
2282         return CPP_TOKEN_FLD_SOURCE;
2283       else if (tok->type == CPP_PRAGMA)
2284         return CPP_TOKEN_FLD_PRAGMA;
2285       /* else fall through */
2286     default:
2287       return CPP_TOKEN_FLD_NONE;
2288     }
2289 }