libcpp/lex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7
   8 This program is free software; you can redistribute it and/or modify it
   9 under the terms of the GNU General Public License as published by the
  10 Free Software Foundation; either version 2, or (at your option) any
  11 later version.
  12
  13 This program is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with this program; if not, write to the Free Software
  20 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "cpplib.h"
  25 #include "internal.h"
  26
  27 enum spell_type
  28 {
  29   SPELL_OPERATOR = 0,
  30   SPELL_IDENT,
  31   SPELL_LITERAL,
  32   SPELL_NONE
  33 };
  34
  35 struct token_spelling
  36 {
  37   enum spell_type category;
  38   const unsigned char *name;
  39 };
  40
  41 static const unsigned char *const digraph_spellings[] =
  42 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
  43
  44 #define OP(e, s) { SPELL_OPERATOR, U s  },
  45 #define TK(e, s) { SPELL_ ## s,    U #e },
  46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  47 #undef OP
  48 #undef TK
  49
  50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  52
  53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
  54 static int skip_line_comment (cpp_reader *);
  55 static void skip_whitespace (cpp_reader *, cppchar_t);
  56 static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *);
  57 static void lex_number (cpp_reader *, cpp_string *);
  58 static bool forms_identifier_p (cpp_reader *, int);
  59 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
  60 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
  61 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
  62                             unsigned int, enum cpp_ttype);
  63 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
  64 static int name_p (cpp_reader *, const cpp_string *);
  65 static tokenrun *next_tokenrun (tokenrun *);
  66
  67 static _cpp_buff *new_buff (size_t);
  68
  69
  70 /* Utility routine:
  71
  72    Compares, the token TOKEN to the NUL-terminated string STRING.
  73    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  74 int
  75 cpp_ideq (const cpp_token *token, const char *string)
  76 {
  77   if (token->type != CPP_NAME)
  78     return 0;
  79
  80   return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
  81 }
  82
  83 /* Record a note TYPE at byte POS into the current cleaned logical
  84    line.  */
  85 static void
  86 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
  87 {
  88   if (buffer->notes_used == buffer->notes_cap)
  89     {
  90       buffer->notes_cap = buffer->notes_cap * 2 + 200;
  91       buffer->notes = xrealloc (buffer->notes,
  92                                 buffer->notes_cap * sizeof (_cpp_line_note));
  93     }
  94
  95   buffer->notes[buffer->notes_used].pos = pos;
  96   buffer->notes[buffer->notes_used].type = type;
  97   buffer->notes_used++;
  98 }
  99
 100 /* Returns with a logical line that contains no escaped newlines or
 101    trigraphs.  This is a time-critical inner loop.  */
 102 void
 103 _cpp_clean_line (cpp_reader *pfile)
 104 {
 105   cpp_buffer *buffer;
 106   const uchar *s;
 107   uchar c, *d, *p;
 108
 109   buffer = pfile->buffer;
 110   buffer->cur_note = buffer->notes_used = 0;
 111   buffer->cur = buffer->line_base = buffer->next_line;
 112   buffer->need_line = false;
 113   s = buffer->next_line - 1;
 114
 115   if (!buffer->from_stage3)
 116     {
 117       /* Short circuit for the common case of an un-escaped line with
 118          no trigraphs.  The primary win here is by not writing any
 119          data back to memory until we have to.  */
 120       for (;;)
 121         {
 122           c = *++s;
 123           if (c == '\n' || c == '\r')
 124             {
 125               d = (uchar *) s;
 126
 127               if (s == buffer->rlimit)
 128                 goto done;
 129
 130               /* DOS line ending? */
 131               if (c == '\r' && s[1] == '\n')
 132                 s++;
 133
 134               if (s == buffer->rlimit)
 135                 goto done;
 136
 137               /* check for escaped newline */
 138               p = d;
 139               while (p != buffer->next_line && is_nvspace (p[-1]))
 140                 p--;
 141               if (p == buffer->next_line || p[-1] != '\\')
 142                 goto done;
 143
 144               /* Have an escaped newline; process it and proceed to
 145                  the slow path.  */
 146               add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
 147               d = p - 2;
 148               buffer->next_line = p - 1;
 149               break;
 150             }
 151           if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
 152             {
 153               /* Have a trigraph.  We may or may not have to convert
 154                  it.  Add a line note regardless, for -Wtrigraphs.  */
 155               add_line_note (buffer, s, s[2]);
 156               if (CPP_OPTION (pfile, trigraphs))
 157                 {
 158                   /* We do, and that means we have to switch to the
 159                      slow path.  */
 160                   d = (uchar *) s;
 161                   *d = _cpp_trigraph_map[s[2]];
 162                   s += 2;
 163                   break;
 164                 }
 165             }
 166         }
 167
 168
 169       for (;;)
 170         {
 171           c = *++s;
 172           *++d = c;
 173
 174           if (c == '\n' || c == '\r')
 175             {
 176                   /* Handle DOS line endings.  */
 177               if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
 178                 s++;
 179               if (s == buffer->rlimit)
 180                 break;
 181
 182               /* Escaped?  */
 183               p = d;
 184               while (p != buffer->next_line && is_nvspace (p[-1]))
 185                 p--;
 186               if (p == buffer->next_line || p[-1] != '\\')
 187                 break;
 188
 189               add_line_note (buffer, p - 1, p != d ? ' ': '\\');
 190               d = p - 2;
 191               buffer->next_line = p - 1;
 192             }
 193           else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
 194             {
 195               /* Add a note regardless, for the benefit of -Wtrigraphs.  */
 196               add_line_note (buffer, d, s[2]);
 197               if (CPP_OPTION (pfile, trigraphs))
 198                 {
 199                   *d = _cpp_trigraph_map[s[2]];
 200                   s += 2;
 201                 }
 202             }
 203         }
 204     }
 205   else
 206     {
 207       do
 208         s++;
 209       while (*s != '\n' && *s != '\r');
 210       d = (uchar *) s;
 211
 212       /* Handle DOS line endings.  */
 213       if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
 214         s++;
 215     }
 216
 217  done:
 218   *d = '\n';
 219   /* A sentinel note that should never be processed.  */
 220   add_line_note (buffer, d + 1, '\n');
 221   buffer->next_line = s + 1;
 222 }
 223
 224 /* Return true if the trigraph indicated by NOTE should be warned
 225    about in a comment.  */
 226 static bool
 227 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
 228 {
 229   const uchar *p;
 230
 231   /* Within comments we don't warn about trigraphs, unless the
 232      trigraph forms an escaped newline, as that may change
 233      behavior.  */
 234   if (note->type != '/')
 235     return false;
 236
 237   /* If -trigraphs, then this was an escaped newline iff the next note
 238      is coincident.  */
 239   if (CPP_OPTION (pfile, trigraphs))
 240     return note[1].pos == note->pos;
 241
 242   /* Otherwise, see if this forms an escaped newline.  */
 243   p = note->pos + 3;
 244   while (is_nvspace (*p))
 245     p++;
 246
 247   /* There might have been escaped newlines between the trigraph and the
 248      newline we found.  Hence the position test.  */
 249   return (*p == '\n' && p < note[1].pos);
 250 }
 251
 252 /* Process the notes created by add_line_note as far as the current
 253    location.  */
 254 void
 255 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
 256 {
 257   cpp_buffer *buffer = pfile->buffer;
 258
 259   for (;;)
 260     {
 261       _cpp_line_note *note = &buffer->notes[buffer->cur_note];
 262       unsigned int col;
 263
 264       if (note->pos > buffer->cur)
 265         break;
 266
 267       buffer->cur_note++;
 268       col = CPP_BUF_COLUMN (buffer, note->pos + 1);
 269
 270       if (note->type == '\\' || note->type == ' ')
 271         {
 272           if (note->type == ' ' && !in_comment)
 273             cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
 274                                  "backslash and newline separated by space");
 275
 276           if (buffer->next_line > buffer->rlimit)
 277             {
 278               cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
 279                                    "backslash-newline at end of file");
 280               /* Prevent "no newline at end of file" warning.  */
 281               buffer->next_line = buffer->rlimit;
 282             }
 283
 284           buffer->line_base = note->pos;
 285           CPP_INCREMENT_LINE (pfile, 0);
 286         }
 287       else if (_cpp_trigraph_map[note->type])
 288         {
 289           if (CPP_OPTION (pfile, warn_trigraphs)
 290               && (!in_comment || warn_in_comment (pfile, note)))
 291             {
 292               if (CPP_OPTION (pfile, trigraphs))
 293                 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
 294                                      "trigraph ??%c converted to %c",
 295                                      note->type,
 296                                      (int) _cpp_trigraph_map[note->type]);
 297               else
 298                 {
 299                   cpp_error_with_line
 300                     (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
 301                      "trigraph ??%c ignored, use -trigraphs to enable",
 302                      note->type);
 303                 }
 304             }
 305         }
 306       else
 307         abort ();
 308     }
 309 }
 310
 311 /* Skip a C-style block comment.  We find the end of the comment by
 312    seeing if an asterisk is before every '/' we encounter.  Returns
 313    nonzero if comment terminated by EOF, zero otherwise.
 314
 315    Buffer->cur points to the initial asterisk of the comment.  */
 316 bool
 317 _cpp_skip_block_comment (cpp_reader *pfile)
 318 {
 319   cpp_buffer *buffer = pfile->buffer;
 320   const uchar *cur = buffer->cur;
 321   uchar c;
 322
 323   cur++;
 324   if (*cur == '/')
 325     cur++;
 326
 327   for (;;)
 328     {
 329       /* People like decorating comments with '*', so check for '/'
 330          instead for efficiency.  */
 331       c = *cur++;
 332
 333       if (c == '/')
 334         {
 335           if (cur[-2] == '*')
 336             break;
 337
 338           /* Warn about potential nested comments, but not if the '/'
 339              comes immediately before the true comment delimiter.
 340              Don't bother to get it right across escaped newlines.  */
 341           if (CPP_OPTION (pfile, warn_comments)
 342               && cur[0] == '*' && cur[1] != '/')
 343             {
 344               buffer->cur = cur;
 345               cpp_error_with_line (pfile, CPP_DL_WARNING,
 346                                    pfile->line_table->highest_line, CPP_BUF_COL (buffer),
 347                                    "\"/*\" within comment");
 348             }
 349         }
 350       else if (c == '\n')
 351         {
 352           unsigned int cols;
 353           buffer->cur = cur - 1;
 354           _cpp_process_line_notes (pfile, true);
 355           if (buffer->next_line >= buffer->rlimit)
 356             return true;
 357           _cpp_clean_line (pfile);
 358
 359           cols = buffer->next_line - buffer->line_base;
 360           CPP_INCREMENT_LINE (pfile, cols);
 361
 362           cur = buffer->cur;
 363         }
 364     }
 365
 366   buffer->cur = cur;
 367   _cpp_process_line_notes (pfile, true);
 368   return false;
 369 }
 370
 371 /* Skip a C++ line comment, leaving buffer->cur pointing to the
 372    terminating newline.  Handles escaped newlines.  Returns nonzero
 373    if a multiline comment.  */
 374 static int
 375 skip_line_comment (cpp_reader *pfile)
 376 {
 377   cpp_buffer *buffer = pfile->buffer;
 378   unsigned int orig_line = pfile->line_table->highest_line;
 379
 380   while (*buffer->cur != '\n')
 381     buffer->cur++;
 382
 383   _cpp_process_line_notes (pfile, true);
 384   return orig_line != pfile->line_table->highest_line;
 385 }
 386
 387 /* Skips whitespace, saving the next non-whitespace character.  */
 388 static void
 389 skip_whitespace (cpp_reader *pfile, cppchar_t c)
 390 {
 391   cpp_buffer *buffer = pfile->buffer;
 392   bool saw_NUL = false;
 393
 394   do
 395     {
 396       /* Horizontal space always OK.  */
 397       if (c == ' ' || c == '\t')
 398         ;
 399       /* Just \f \v or \0 left.  */
 400       else if (c == '\0')
 401         saw_NUL = true;
 402       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 403         cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
 404                              CPP_BUF_COL (buffer),
 405                              "%s in preprocessing directive",
 406                              c == '\f' ? "form feed" : "vertical tab");
 407
 408       c = *buffer->cur++;
 409     }
 410   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 411   while (is_nvspace (c));
 412
 413   if (saw_NUL)
 414     cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
 415
 416   buffer->cur--;
 417 }
 418
 419 /* See if the characters of a number token are valid in a name (no
 420    '.', '+' or '-').  */
 421 static int
 422 name_p (cpp_reader *pfile, const cpp_string *string)
 423 {
 424   unsigned int i;
 425
 426   for (i = 0; i < string->len; i++)
 427     if (!is_idchar (string->text[i]))
 428       return 0;
 429
 430   return 1;
 431 }
 432
 433 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
 434    an identifier.  FIRST is TRUE if this starts an identifier.  */
 435 static bool
 436 forms_identifier_p (cpp_reader *pfile, int first)
 437 {
 438   cpp_buffer *buffer = pfile->buffer;
 439
 440   if (*buffer->cur == '$')
 441     {
 442       if (!CPP_OPTION (pfile, dollars_in_ident))
 443         return false;
 444
 445       buffer->cur++;
 446       if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
 447         {
 448           CPP_OPTION (pfile, warn_dollars) = 0;
 449           cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
 450         }
 451
 452       return true;
 453     }
 454
 455   /* Is this a syntactically valid UCN?  */
 456   if (0 && *buffer->cur == '\\'
 457       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
 458     {
 459       buffer->cur += 2;
 460       if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first))
 461         return true;
 462       buffer->cur -= 2;
 463     }
 464
 465   return false;
 466 }
 467
 468 /* Lex an identifier starting at BUFFER->CUR - 1.  */
 469 static cpp_hashnode *
 470 lex_identifier (cpp_reader *pfile, const uchar *base)
 471 {
 472   cpp_hashnode *result;
 473   const uchar *cur, *limit;
 474   unsigned int len;
 475   unsigned int hash = HT_HASHSTEP (0, *base);
 476
 477   cur = pfile->buffer->cur;
 478   for (;;)
 479     {
 480       /* N.B. ISIDNUM does not include $.  */
 481       while (ISIDNUM (*cur))
 482         {
 483           hash = HT_HASHSTEP (hash, *cur);
 484           cur++;
 485         }
 486
 487       pfile->buffer->cur = cur;
 488       if (!forms_identifier_p (pfile, false))
 489         break;
 490
 491       limit = pfile->buffer->cur;
 492       while (cur < limit)
 493         {
 494           hash = HT_HASHSTEP (hash, *cur);
 495           cur++;
 496         }
 497     }
 498   len = cur - base;
 499   hash = HT_HASHFINISH (hash, len);
 500
 501   result = (cpp_hashnode *)
 502     ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC);
 503
 504   /* Rarely, identifiers require diagnostics when lexed.  */
 505   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 506                         && !pfile->state.skipping, 0))
 507     {
 508       /* It is allowed to poison the same identifier twice.  */
 509       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 510         cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
 511                    NODE_NAME (result));
 512
 513       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 514          replacement list of a variadic macro.  */
 515       if (result == pfile->spec_nodes.n__VA_ARGS__
 516           && !pfile->state.va_args_ok)
 517         cpp_error (pfile, CPP_DL_PEDWARN,
 518                    "__VA_ARGS__ can only appear in the expansion"
 519                    " of a C99 variadic macro");
 520     }
 521
 522   return result;
 523 }
 524
 525 /* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
 526 static void
 527 lex_number (cpp_reader *pfile, cpp_string *number)
 528 {
 529   const uchar *cur;
 530   const uchar *base;
 531   uchar *dest;
 532
 533   base = pfile->buffer->cur - 1;
 534   do
 535     {
 536       cur = pfile->buffer->cur;
 537
 538       /* N.B. ISIDNUM does not include $.  */
 539       while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
 540         cur++;
 541
 542       pfile->buffer->cur = cur;
 543     }
 544   while (forms_identifier_p (pfile, false));
 545
 546   number->len = cur - base;
 547   dest = _cpp_unaligned_alloc (pfile, number->len + 1);
 548   memcpy (dest, base, number->len);
 549   dest[number->len] = '\0';
 550   number->text = dest;
 551 }
 552
 553 /* Create a token of type TYPE with a literal spelling.  */
 554 static void
 555 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
 556                 unsigned int len, enum cpp_ttype type)
 557 {
 558   uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
 559
 560   memcpy (dest, base, len);
 561   dest[len] = '\0';
 562   token->type = type;
 563   token->val.str.len = len;
 564   token->val.str.text = dest;
 565 }
 566
 567 /* Lexes a string, character constant, or angle-bracketed header file
 568    name.  The stored string contains the spelling, including opening
 569    quote and leading any leading 'L'.  It returns the type of the
 570    literal, or CPP_OTHER if it was not properly terminated.
 571
 572    The spelling is NUL-terminated, but it is not guaranteed that this
 573    is the first NUL since embedded NULs are preserved.  */
 574 static void
 575 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
 576 {
 577   bool saw_NUL = false;
 578   const uchar *cur;
 579   cppchar_t terminator;
 580   enum cpp_ttype type;
 581
 582   cur = base;
 583   terminator = *cur++;
 584   if (terminator == 'L')
 585     terminator = *cur++;
 586   if (terminator == '\"')
 587     type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
 588   else if (terminator == '\'')
 589     type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
 590   else
 591     terminator = '>', type = CPP_HEADER_NAME;
 592
 593   for (;;)
 594     {
 595       cppchar_t c = *cur++;
 596
 597       /* In #include-style directives, terminators are not escapable.  */
 598       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
 599         cur++;
 600       else if (c == terminator)
 601         break;
 602       else if (c == '\n')
 603         {
 604           cur--;
 605           type = CPP_OTHER;
 606           break;
 607         }
 608       else if (c == '\0')
 609         saw_NUL = true;
 610     }
 611
 612   if (saw_NUL && !pfile->state.skipping)
 613     cpp_error (pfile, CPP_DL_WARNING,
 614                "null character(s) preserved in literal");
 615
 616   pfile->buffer->cur = cur;
 617   create_literal (pfile, token, base, cur - base, type);
 618 }
 619
 620 /* The stored comment includes the comment start and any terminator.  */
 621 static void
 622 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
 623               cppchar_t type)
 624 {
 625   unsigned char *buffer;
 626   unsigned int len, clen;
 627
 628   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 629
 630   /* C++ comments probably (not definitely) have moved past a new
 631      line, which we don't want to save in the comment.  */
 632   if (is_vspace (pfile->buffer->cur[-1]))
 633     len--;
 634
 635   /* If we are currently in a directive, then we need to store all
 636      C++ comments as C comments internally, and so we need to
 637      allocate a little extra space in that case.
 638
 639      Note that the only time we encounter a directive here is
 640      when we are saving comments in a "#define".  */
 641   clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
 642
 643   buffer = _cpp_unaligned_alloc (pfile, clen);
 644
 645   token->type = CPP_COMMENT;
 646   token->val.str.len = clen;
 647   token->val.str.text = buffer;
 648
 649   buffer[0] = '/';
 650   memcpy (buffer + 1, from, len - 1);
 651
 652   /* Finish conversion to a C comment, if necessary.  */
 653   if (pfile->state.in_directive && type == '/')
 654     {
 655       buffer[1] = '*';
 656       buffer[clen - 2] = '*';
 657       buffer[clen - 1] = '/';
 658     }
 659 }
 660
 661 /* Allocate COUNT tokens for RUN.  */
 662 void
 663 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
 664 {
 665   run->base = XNEWVEC (cpp_token, count);
 666   run->limit = run->base + count;
 667   run->next = NULL;
 668 }
 669
 670 /* Returns the next tokenrun, or creates one if there is none.  */
 671 static tokenrun *
 672 next_tokenrun (tokenrun *run)
 673 {
 674   if (run->next == NULL)
 675     {
 676       run->next = XNEW (tokenrun);
 677       run->next->prev = run;
 678       _cpp_init_tokenrun (run->next, 250);
 679     }
 680
 681   return run->next;
 682 }
 683
 684 /* Allocate a single token that is invalidated at the same time as the
 685    rest of the tokens on the line.  Has its line and col set to the
 686    same as the last lexed token, so that diagnostics appear in the
 687    right place.  */
 688 cpp_token *
 689 _cpp_temp_token (cpp_reader *pfile)
 690 {
 691   cpp_token *old, *result;
 692
 693   old = pfile->cur_token - 1;
 694   if (pfile->cur_token == pfile->cur_run->limit)
 695     {
 696       pfile->cur_run = next_tokenrun (pfile->cur_run);
 697       pfile->cur_token = pfile->cur_run->base;
 698     }
 699
 700   result = pfile->cur_token++;
 701   result->src_loc = old->src_loc;
 702   return result;
 703 }
 704
 705 /* Lex a token into RESULT (external interface).  Takes care of issues
 706    like directive handling, token lookahead, multiple include
 707    optimization and skipping.  */
 708 const cpp_token *
 709 _cpp_lex_token (cpp_reader *pfile)
 710 {
 711   cpp_token *result;
 712
 713   for (;;)
 714     {
 715       if (pfile->cur_token == pfile->cur_run->limit)
 716         {
 717           pfile->cur_run = next_tokenrun (pfile->cur_run);
 718           pfile->cur_token = pfile->cur_run->base;
 719         }
 720
 721       if (pfile->lookaheads)
 722         {
 723           pfile->lookaheads--;
 724           result = pfile->cur_token++;
 725         }
 726       else
 727         result = _cpp_lex_direct (pfile);
 728
 729       if (result->flags & BOL)
 730         {
 731           /* Is this a directive.  If _cpp_handle_directive returns
 732              false, it is an assembler #.  */
 733           if (result->type == CPP_HASH
 734               /* 6.10.3 p 11: Directives in a list of macro arguments
 735                  gives undefined behavior.  This implementation
 736                  handles the directive as normal.  */
 737               && pfile->state.parsing_args != 1
 738               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
 739             {
 740               if (pfile->directive_result.type == CPP_PADDING)
 741                 continue;
 742               else
 743                 {
 744                   result = &pfile->directive_result;
 745                   break;
 746                 }
 747             }
 748
 749           if (pfile->cb.line_change && !pfile->state.skipping)
 750             pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
 751         }
 752
 753       /* We don't skip tokens in directives.  */
 754       if (pfile->state.in_directive)
 755         break;
 756
 757       /* Outside a directive, invalidate controlling macros.  At file
 758          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
 759          get here and MI optimization works.  */
 760       pfile->mi_valid = false;
 761
 762       if (!pfile->state.skipping || result->type == CPP_EOF)
 763         break;
 764     }
 765
 766   return result;
 767 }
 768
 769 /* Returns true if a fresh line has been loaded.  */
 770 bool
 771 _cpp_get_fresh_line (cpp_reader *pfile)
 772 {
 773   int return_at_eof;
 774
 775   /* We can't get a new line until we leave the current directive.  */
 776   if (pfile->state.in_directive)
 777     return false;
 778
 779   for (;;)
 780     {
 781       cpp_buffer *buffer = pfile->buffer;
 782
 783       if (!buffer->need_line)
 784         return true;
 785
 786       if (buffer->next_line < buffer->rlimit)
 787         {
 788           _cpp_clean_line (pfile);
 789           return true;
 790         }
 791
 792       /* First, get out of parsing arguments state.  */
 793       if (pfile->state.parsing_args)
 794         return false;
 795
 796       /* End of buffer.  Non-empty files should end in a newline.  */
 797       if (buffer->buf != buffer->rlimit
 798           && buffer->next_line > buffer->rlimit
 799           && !buffer->from_stage3)
 800         {
 801           /* Only warn once.  */
 802           buffer->next_line = buffer->rlimit;
 803           cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
 804                                CPP_BUF_COLUMN (buffer, buffer->cur),
 805                                "no newline at end of file");
 806         }
 807
 808       return_at_eof = buffer->return_at_eof;
 809       _cpp_pop_buffer (pfile);
 810       if (pfile->buffer == NULL || return_at_eof)
 811         return false;
 812     }
 813 }
 814
 815 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)          \
 816   do                                                    \
 817     {                                                   \
 818       result->type = ELSE_TYPE;                         \
 819       if (*buffer->cur == CHAR)                         \
 820         buffer->cur++, result->type = THEN_TYPE;        \
 821     }                                                   \
 822   while (0)
 823
 824 /* Lex a token into pfile->cur_token, which is also incremented, to
 825    get diagnostics pointing to the correct location.
 826
 827    Does not handle issues such as token lookahead, multiple-include
 828    optimization, directives, skipping etc.  This function is only
 829    suitable for use by _cpp_lex_token, and in special cases like
 830    lex_expansion_token which doesn't care for any of these issues.
 831
 832    When meeting a newline, returns CPP_EOF if parsing a directive,
 833    otherwise returns to the start of the token buffer if permissible.
 834    Returns the location of the lexed token.  */
 835 cpp_token *
 836 _cpp_lex_direct (cpp_reader *pfile)
 837 {
 838   cppchar_t c;
 839   cpp_buffer *buffer;
 840   const unsigned char *comment_start;
 841   cpp_token *result = pfile->cur_token++;
 842
 843  fresh_line:
 844   result->flags = 0;
 845   buffer = pfile->buffer;
 846   if (buffer->need_line)
 847     {
 848       if (!_cpp_get_fresh_line (pfile))
 849         {
 850           result->type = CPP_EOF;
 851           if (!pfile->state.in_directive)
 852             {
 853               /* Tell the compiler the line number of the EOF token.  */
 854               result->src_loc = pfile->line_table->highest_line;
 855               result->flags = BOL;
 856             }
 857           return result;
 858         }
 859       if (!pfile->keep_tokens)
 860         {
 861           pfile->cur_run = &pfile->base_run;
 862           result = pfile->base_run.base;
 863           pfile->cur_token = result + 1;
 864         }
 865       result->flags = BOL;
 866       if (pfile->state.parsing_args == 2)
 867         result->flags |= PREV_WHITE;
 868     }
 869   buffer = pfile->buffer;
 870  update_tokens_line:
 871   result->src_loc = pfile->line_table->highest_line;
 872
 873  skipped_white:
 874   if (buffer->cur >= buffer->notes[buffer->cur_note].pos
 875       && !pfile->overlaid_buffer)
 876     {
 877       _cpp_process_line_notes (pfile, false);
 878       result->src_loc = pfile->line_table->highest_line;
 879     }
 880   c = *buffer->cur++;
 881
 882   LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
 883                                CPP_BUF_COLUMN (buffer, buffer->cur));
 884
 885   switch (c)
 886     {
 887     case ' ': case '\t': case '\f': case '\v': case '\0':
 888       result->flags |= PREV_WHITE;
 889       skip_whitespace (pfile, c);
 890       goto skipped_white;
 891
 892     case '\n':
 893       if (buffer->cur < buffer->rlimit)
 894         CPP_INCREMENT_LINE (pfile, 0);
 895       buffer->need_line = true;
 896       goto fresh_line;
 897
 898     case '0': case '1': case '2': case '3': case '4':
 899     case '5': case '6': case '7': case '8': case '9':
 900       result->type = CPP_NUMBER;
 901       lex_number (pfile, &result->val.str);
 902       break;
 903
 904     case 'L':
 905       /* 'L' may introduce wide characters or strings.  */
 906       if (*buffer->cur == '\'' || *buffer->cur == '"')
 907         {
 908           lex_string (pfile, result, buffer->cur - 1);
 909           break;
 910         }
 911       /* Fall through.  */
 912
 913     case '_':
 914     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 915     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 916     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 917     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 918     case 'y': case 'z':
 919     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 920     case 'G': case 'H': case 'I': case 'J': case 'K':
 921     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 922     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 923     case 'Y': case 'Z':
 924       result->type = CPP_NAME;
 925       result->val.node = lex_identifier (pfile, buffer->cur - 1);
 926
 927       /* Convert named operators to their proper types.  */
 928       if (result->val.node->flags & NODE_OPERATOR)
 929         {
 930           result->flags |= NAMED_OP;
 931           result->type = result->val.node->directive_index;
 932         }
 933       break;
 934
 935     case '\'':
 936     case '"':
 937       lex_string (pfile, result, buffer->cur - 1);
 938       break;
 939
 940     case '/':
 941       /* A potential block or line comment.  */
 942       comment_start = buffer->cur;
 943       c = *buffer->cur;
 944
 945       if (c == '*')
 946         {
 947           if (_cpp_skip_block_comment (pfile))
 948             cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
 949         }
 950       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
 951                             || cpp_in_system_header (pfile)))
 952         {
 953           /* Warn about comments only if pedantically GNUC89, and not
 954              in system headers.  */
 955           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
 956               && ! buffer->warned_cplusplus_comments)
 957             {
 958               cpp_error (pfile, CPP_DL_PEDWARN,
 959                          "C++ style comments are not allowed in ISO C90");
 960               cpp_error (pfile, CPP_DL_PEDWARN,
 961                          "(this will be reported only once per input file)");
 962               buffer->warned_cplusplus_comments = 1;
 963             }
 964
 965           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
 966             cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
 967         }
 968       else if (c == '=')
 969         {
 970           buffer->cur++;
 971           result->type = CPP_DIV_EQ;
 972           break;
 973         }
 974       else
 975         {
 976           result->type = CPP_DIV;
 977           break;
 978         }
 979
 980       if (!pfile->state.save_comments)
 981         {
 982           result->flags |= PREV_WHITE;
 983           goto update_tokens_line;
 984         }
 985
 986       /* Save the comment as a token in its own right.  */
 987       save_comment (pfile, result, comment_start, c);
 988       break;
 989
 990     case '<':
 991       if (pfile->state.angled_headers)
 992         {
 993           lex_string (pfile, result, buffer->cur - 1);
 994           break;
 995         }
 996
 997       result->type = CPP_LESS;
 998       if (*buffer->cur == '=')
 999         buffer->cur++, result->type = CPP_LESS_EQ;
1000       else if (*buffer->cur == '<')
1001         {
1002           buffer->cur++;
1003           IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1004         }
1005       else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
1006         {
1007           buffer->cur++;
1008           IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1009         }
1010       else if (CPP_OPTION (pfile, digraphs))
1011         {
1012           if (*buffer->cur == ':')
1013             {
1014               buffer->cur++;
1015               result->flags |= DIGRAPH;
1016               result->type = CPP_OPEN_SQUARE;
1017             }
1018           else if (*buffer->cur == '%')
1019             {
1020               buffer->cur++;
1021               result->flags |= DIGRAPH;
1022               result->type = CPP_OPEN_BRACE;
1023             }
1024         }
1025       break;
1026
1027     case '>':
1028       result->type = CPP_GREATER;
1029       if (*buffer->cur == '=')
1030         buffer->cur++, result->type = CPP_GREATER_EQ;
1031       else if (*buffer->cur == '>')
1032         {
1033           buffer->cur++;
1034           IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1035         }
1036       else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
1037         {
1038           buffer->cur++;
1039           IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1040         }
1041       break;
1042
1043     case '%':
1044       result->type = CPP_MOD;
1045       if (*buffer->cur == '=')
1046         buffer->cur++, result->type = CPP_MOD_EQ;
1047       else if (CPP_OPTION (pfile, digraphs))
1048         {
1049           if (*buffer->cur == ':')
1050             {
1051               buffer->cur++;
1052               result->flags |= DIGRAPH;
1053               result->type = CPP_HASH;
1054               if (*buffer->cur == '%' && buffer->cur[1] == ':')
1055                 buffer->cur += 2, result->type = CPP_PASTE;
1056             }
1057           else if (*buffer->cur == '>')
1058             {
1059               buffer->cur++;
1060               result->flags |= DIGRAPH;
1061               result->type = CPP_CLOSE_BRACE;
1062             }
1063         }
1064       break;
1065
1066     case '.':
1067       result->type = CPP_DOT;
1068       if (ISDIGIT (*buffer->cur))
1069         {
1070           result->type = CPP_NUMBER;
1071           lex_number (pfile, &result->val.str);
1072         }
1073       else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1074         buffer->cur += 2, result->type = CPP_ELLIPSIS;
1075       else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1076         buffer->cur++, result->type = CPP_DOT_STAR;
1077       break;
1078
1079     case '+':
1080       result->type = CPP_PLUS;
1081       if (*buffer->cur == '+')
1082         buffer->cur++, result->type = CPP_PLUS_PLUS;
1083       else if (*buffer->cur == '=')
1084         buffer->cur++, result->type = CPP_PLUS_EQ;
1085       break;
1086
1087     case '-':
1088       result->type = CPP_MINUS;
1089       if (*buffer->cur == '>')
1090         {
1091           buffer->cur++;
1092           result->type = CPP_DEREF;
1093           if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1094             buffer->cur++, result->type = CPP_DEREF_STAR;
1095         }
1096       else if (*buffer->cur == '-')
1097         buffer->cur++, result->type = CPP_MINUS_MINUS;
1098       else if (*buffer->cur == '=')
1099         buffer->cur++, result->type = CPP_MINUS_EQ;
1100       break;
1101
1102     case '&':
1103       result->type = CPP_AND;
1104       if (*buffer->cur == '&')
1105         buffer->cur++, result->type = CPP_AND_AND;
1106       else if (*buffer->cur == '=')
1107         buffer->cur++, result->type = CPP_AND_EQ;
1108       break;
1109
1110     case '|':
1111       result->type = CPP_OR;
1112       if (*buffer->cur == '|')
1113         buffer->cur++, result->type = CPP_OR_OR;
1114       else if (*buffer->cur == '=')
1115         buffer->cur++, result->type = CPP_OR_EQ;
1116       break;
1117
1118     case ':':
1119       result->type = CPP_COLON;
1120       if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1121         buffer->cur++, result->type = CPP_SCOPE;
1122       else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1123         {
1124           buffer->cur++;
1125           result->flags |= DIGRAPH;
1126           result->type = CPP_CLOSE_SQUARE;
1127         }
1128       break;
1129
1130     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1131     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1132     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1133     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1134     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1135
1136     case '?': result->type = CPP_QUERY; break;
1137     case '~': result->type = CPP_COMPL; break;
1138     case ',': result->type = CPP_COMMA; break;
1139     case '(': result->type = CPP_OPEN_PAREN; break;
1140     case ')': result->type = CPP_CLOSE_PAREN; break;
1141     case '[': result->type = CPP_OPEN_SQUARE; break;
1142     case ']': result->type = CPP_CLOSE_SQUARE; break;
1143     case '{': result->type = CPP_OPEN_BRACE; break;
1144     case '}': result->type = CPP_CLOSE_BRACE; break;
1145     case ';': result->type = CPP_SEMICOLON; break;
1146
1147       /* @ is a punctuator in Objective-C.  */
1148     case '@': result->type = CPP_ATSIGN; break;
1149
1150     case '$':
1151     case '\\':
1152       {
1153         const uchar *base = --buffer->cur;
1154
1155         if (forms_identifier_p (pfile, true))
1156           {
1157             result->type = CPP_NAME;
1158             result->val.node = lex_identifier (pfile, base);
1159             break;
1160           }
1161         buffer->cur++;
1162       }
1163
1164     default:
1165       create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1166       break;
1167     }
1168
1169   return result;
1170 }
1171
1172 /* An upper bound on the number of bytes needed to spell TOKEN.
1173    Does not include preceding whitespace.  */
1174 unsigned int
1175 cpp_token_len (const cpp_token *token)
1176 {
1177   unsigned int len;
1178
1179   switch (TOKEN_SPELL (token))
1180     {
1181     default:            len = 4;                                break;
1182     case SPELL_LITERAL: len = token->val.str.len;               break;
1183     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1184     }
1185
1186   return len;
1187 }
1188
1189 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1190    already contain the enough space to hold the token's spelling.
1191    Returns a pointer to the character after the last character written.
1192    FIXME: Would be nice if we didn't need the PFILE argument.  */
1193 unsigned char *
1194 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1195                  unsigned char *buffer)
1196 {
1197   switch (TOKEN_SPELL (token))
1198     {
1199     case SPELL_OPERATOR:
1200       {
1201         const unsigned char *spelling;
1202         unsigned char c;
1203
1204         if (token->flags & DIGRAPH)
1205           spelling
1206             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1207         else if (token->flags & NAMED_OP)
1208           goto spell_ident;
1209         else
1210           spelling = TOKEN_NAME (token);
1211
1212         while ((c = *spelling++) != '\0')
1213           *buffer++ = c;
1214       }
1215       break;
1216
1217     spell_ident:
1218     case SPELL_IDENT:
1219       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1220       buffer += NODE_LEN (token->val.node);
1221       break;
1222
1223     case SPELL_LITERAL:
1224       memcpy (buffer, token->val.str.text, token->val.str.len);
1225       buffer += token->val.str.len;
1226       break;
1227
1228     case SPELL_NONE:
1229       cpp_error (pfile, CPP_DL_ICE,
1230                  "unspellable token %s", TOKEN_NAME (token));
1231       break;
1232     }
1233
1234   return buffer;
1235 }
1236
1237 /* Returns TOKEN spelt as a null-terminated string.  The string is
1238    freed when the reader is destroyed.  Useful for diagnostics.  */
1239 unsigned char *
1240 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1241 {
1242   unsigned int len = cpp_token_len (token) + 1;
1243   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1244
1245   end = cpp_spell_token (pfile, token, start);
1246   end[0] = '\0';
1247
1248   return start;
1249 }
1250
1251 /* Used by C front ends, which really should move to using
1252    cpp_token_as_text.  */
1253 const char *
1254 cpp_type2name (enum cpp_ttype type)
1255 {
1256   return (const char *) token_spellings[type].name;
1257 }
1258
1259 /* Writes the spelling of token to FP, without any preceding space.
1260    Separated from cpp_spell_token for efficiency - to avoid stdio
1261    double-buffering.  */
1262 void
1263 cpp_output_token (const cpp_token *token, FILE *fp)
1264 {
1265   switch (TOKEN_SPELL (token))
1266     {
1267     case SPELL_OPERATOR:
1268       {
1269         const unsigned char *spelling;
1270         int c;
1271
1272         if (token->flags & DIGRAPH)
1273           spelling
1274             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1275         else if (token->flags & NAMED_OP)
1276           goto spell_ident;
1277         else
1278           spelling = TOKEN_NAME (token);
1279
1280         c = *spelling;
1281         do
1282           putc (c, fp);
1283         while ((c = *++spelling) != '\0');
1284       }
1285       break;
1286
1287     spell_ident:
1288     case SPELL_IDENT:
1289       fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1290     break;
1291
1292     case SPELL_LITERAL:
1293       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1294       break;
1295
1296     case SPELL_NONE:
1297       /* An error, most probably.  */
1298       break;
1299     }
1300 }
1301
1302 /* Compare two tokens.  */
1303 int
1304 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1305 {
1306   if (a->type == b->type && a->flags == b->flags)
1307     switch (TOKEN_SPELL (a))
1308       {
1309       default:                  /* Keep compiler happy.  */
1310       case SPELL_OPERATOR:
1311         return 1;
1312       case SPELL_NONE:
1313         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1314       case SPELL_IDENT:
1315         return a->val.node == b->val.node;
1316       case SPELL_LITERAL:
1317         return (a->val.str.len == b->val.str.len
1318                 && !memcmp (a->val.str.text, b->val.str.text,
1319                             a->val.str.len));
1320       }
1321
1322   return 0;
1323 }
1324
1325 /* Returns nonzero if a space should be inserted to avoid an
1326    accidental token paste for output.  For simplicity, it is
1327    conservative, and occasionally advises a space where one is not
1328    needed, e.g. "." and ".2".  */
1329 int
1330 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1331                  const cpp_token *token2)
1332 {
1333   enum cpp_ttype a = token1->type, b = token2->type;
1334   cppchar_t c;
1335
1336   if (token1->flags & NAMED_OP)
1337     a = CPP_NAME;
1338   if (token2->flags & NAMED_OP)
1339     b = CPP_NAME;
1340
1341   c = EOF;
1342   if (token2->flags & DIGRAPH)
1343     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1344   else if (token_spellings[b].category == SPELL_OPERATOR)
1345     c = token_spellings[b].name[0];
1346
1347   /* Quickly get everything that can paste with an '='.  */
1348   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1349     return 1;
1350
1351   switch (a)
1352     {
1353     case CPP_GREATER:   return c == '>' || c == '?';
1354     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1355     case CPP_PLUS:      return c == '+';
1356     case CPP_MINUS:     return c == '-' || c == '>';
1357     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1358     case CPP_MOD:       return c == ':' || c == '>';
1359     case CPP_AND:       return c == '&';
1360     case CPP_OR:        return c == '|';
1361     case CPP_COLON:     return c == ':' || c == '>';
1362     case CPP_DEREF:     return c == '*';
1363     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1364     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1365     case CPP_NAME:      return ((b == CPP_NUMBER
1366                                  && name_p (pfile, &token2->val.str))
1367                                 || b == CPP_NAME
1368                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1369     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1370                                 || c == '.' || c == '+' || c == '-');
1371                                       /* UCNs */
1372     case CPP_OTHER:     return ((token1->val.str.text[0] == '\\'
1373                                  && b == CPP_NAME)
1374                                 || (CPP_OPTION (pfile, objc)
1375                                     && token1->val.str.text[0] == '@'
1376                                     && (b == CPP_NAME || b == CPP_STRING)));
1377     default:            break;
1378     }
1379
1380   return 0;
1381 }
1382
1383 /* Output all the remaining tokens on the current line, and a newline
1384    character, to FP.  Leading whitespace is removed.  If there are
1385    macros, special token padding is not performed.  */
1386 void
1387 cpp_output_line (cpp_reader *pfile, FILE *fp)
1388 {
1389   const cpp_token *token;
1390
1391   token = cpp_get_token (pfile);
1392   while (token->type != CPP_EOF)
1393     {
1394       cpp_output_token (token, fp);
1395       token = cpp_get_token (pfile);
1396       if (token->flags & PREV_WHITE)
1397         putc (' ', fp);
1398     }
1399
1400   putc ('\n', fp);
1401 }
1402
1403 /* Memory buffers.  Changing these three constants can have a dramatic
1404    effect on performance.  The values here are reasonable defaults,
1405    but might be tuned.  If you adjust them, be sure to test across a
1406    range of uses of cpplib, including heavy nested function-like macro
1407    expansion.  Also check the change in peak memory usage (NJAMD is a
1408    good tool for this).  */
1409 #define MIN_BUFF_SIZE 8000
1410 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1411 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1412         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1413
1414 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1415   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1416 #endif
1417
1418 /* Create a new allocation buffer.  Place the control block at the end
1419    of the buffer, so that buffer overflows will cause immediate chaos.  */
1420 static _cpp_buff *
1421 new_buff (size_t len)
1422 {
1423   _cpp_buff *result;
1424   unsigned char *base;
1425
1426   if (len < MIN_BUFF_SIZE)
1427     len = MIN_BUFF_SIZE;
1428   len = CPP_ALIGN (len);
1429
1430   base = xmalloc (len + sizeof (_cpp_buff));
1431   result = (_cpp_buff *) (base + len);
1432   result->base = base;
1433   result->cur = base;
1434   result->limit = base + len;
1435   result->next = NULL;
1436   return result;
1437 }
1438
1439 /* Place a chain of unwanted allocation buffers on the free list.  */
1440 void
1441 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1442 {
1443   _cpp_buff *end = buff;
1444
1445   while (end->next)
1446     end = end->next;
1447   end->next = pfile->free_buffs;
1448   pfile->free_buffs = buff;
1449 }
1450
1451 /* Return a free buffer of size at least MIN_SIZE.  */
1452 _cpp_buff *
1453 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1454 {
1455   _cpp_buff *result, **p;
1456
1457   for (p = &pfile->free_buffs;; p = &(*p)->next)
1458     {
1459       size_t size;
1460
1461       if (*p == NULL)
1462         return new_buff (min_size);
1463       result = *p;
1464       size = result->limit - result->base;
1465       /* Return a buffer that's big enough, but don't waste one that's
1466          way too big.  */
1467       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1468         break;
1469     }
1470
1471   *p = result->next;
1472   result->next = NULL;
1473   result->cur = result->base;
1474   return result;
1475 }
1476
1477 /* Creates a new buffer with enough space to hold the uncommitted
1478    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
1479    the excess bytes to the new buffer.  Chains the new buffer after
1480    BUFF, and returns the new buffer.  */
1481 _cpp_buff *
1482 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1483 {
1484   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1485   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1486
1487   buff->next = new_buff;
1488   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1489   return new_buff;
1490 }
1491
1492 /* Creates a new buffer with enough space to hold the uncommitted
1493    remaining bytes of the buffer pointed to by BUFF, and at least
1494    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
1495    Chains the new buffer before the buffer pointed to by BUFF, and
1496    updates the pointer to point to the new buffer.  */
1497 void
1498 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1499 {
1500   _cpp_buff *new_buff, *old_buff = *pbuff;
1501   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1502
1503   new_buff = _cpp_get_buff (pfile, size);
1504   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1505   new_buff->next = old_buff;
1506   *pbuff = new_buff;
1507 }
1508
1509 /* Free a chain of buffers starting at BUFF.  */
1510 void
1511 _cpp_free_buff (_cpp_buff *buff)
1512 {
1513   _cpp_buff *next;
1514
1515   for (; buff; buff = next)
1516     {
1517       next = buff->next;
1518       free (buff->base);
1519     }
1520 }
1521
1522 /* Allocate permanent, unaligned storage of length LEN.  */
1523 unsigned char *
1524 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1525 {
1526   _cpp_buff *buff = pfile->u_buff;
1527   unsigned char *result = buff->cur;
1528
1529   if (len > (size_t) (buff->limit - result))
1530     {
1531       buff = _cpp_get_buff (pfile, len);
1532       buff->next = pfile->u_buff;
1533       pfile->u_buff = buff;
1534       result = buff->cur;
1535     }
1536
1537   buff->cur = result + len;
1538   return result;
1539 }
1540
1541 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1542    That buffer is used for growing allocations when saving macro
1543    replacement lists in a #define, and when parsing an answer to an
1544    assertion in #assert, #unassert or #if (and therefore possibly
1545    whilst expanding macros).  It therefore must not be used by any
1546    code that they might call: specifically the lexer and the guts of
1547    the macro expander.
1548
1549    All existing other uses clearly fit this restriction: storing
1550    registered pragmas during initialization.  */
1551 unsigned char *
1552 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1553 {
1554   _cpp_buff *buff = pfile->a_buff;
1555   unsigned char *result = buff->cur;
1556
1557   if (len > (size_t) (buff->limit - result))
1558     {
1559       buff = _cpp_get_buff (pfile, len);
1560       buff->next = pfile->a_buff;
1561       pfile->a_buff = buff;
1562       result = buff->cur;
1563     }
1564
1565   buff->cur = result + len;
1566   return result;
1567 }
1568
1569 /* Say which field of TOK is in use.  */
1570
1571 enum cpp_token_fld_kind
1572 cpp_token_val_index (cpp_token *tok)
1573 {
1574   switch (TOKEN_SPELL (tok))
1575     {
1576     case SPELL_IDENT:
1577       return CPP_TOKEN_FLD_NODE;
1578     case SPELL_LITERAL:
1579       return CPP_TOKEN_FLD_STR;
1580     case SPELL_NONE:
1581       if (tok->type == CPP_MACRO_ARG)
1582         return CPP_TOKEN_FLD_ARG_NO;
1583       else if (tok->type == CPP_PADDING)
1584         return CPP_TOKEN_FLD_SOURCE;
1585       else if (tok->type == CPP_PRAGMA)
1586         return CPP_TOKEN_FLD_STR;
1587       /* else fall through */
1588     default:
1589       return CPP_TOKEN_FLD_NONE;
1590     }
1591 }