gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7
   8 This program is free software; you can redistribute it and/or modify it
   9 under the terms of the GNU General Public License as published by the
  10 Free Software Foundation; either version 2, or (at your option) any
  11 later version.
  12
  13 This program is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with this program; if not, write to the Free Software
  20 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "cpplib.h"
  25 #include "cpphash.h"
  26
  27 enum spell_type
  28 {
  29   SPELL_OPERATOR = 0,
  30   SPELL_IDENT,
  31   SPELL_LITERAL,
  32   SPELL_NONE
  33 };
  34
  35 struct token_spelling
  36 {
  37   enum spell_type category;
  38   const unsigned char *name;
  39 };
  40
  41 static const unsigned char *const digraph_spellings[] =
  42 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
  43
  44 #define OP(e, s) { SPELL_OPERATOR, U s           },
  45 #define TK(e, s) { s,              U #e },
  46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  47 #undef OP
  48 #undef TK
  49
  50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  52
  53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
  54 static int skip_line_comment (cpp_reader *);
  55 static void skip_whitespace (cpp_reader *, cppchar_t);
  56 static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *);
  57 static void lex_number (cpp_reader *, cpp_string *);
  58 static bool forms_identifier_p (cpp_reader *, int);
  59 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
  60 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
  61 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
  62                             unsigned int, enum cpp_ttype);
  63 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
  64 static int name_p (cpp_reader *, const cpp_string *);
  65 static tokenrun *next_tokenrun (tokenrun *);
  66
  67 static _cpp_buff *new_buff (size_t);
  68
  69
  70 /* Utility routine:
  71
  72    Compares, the token TOKEN to the NUL-terminated string STRING.
  73    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  74 int
  75 cpp_ideq (const cpp_token *token, const char *string)
  76 {
  77   if (token->type != CPP_NAME)
  78     return 0;
  79
  80   return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
  81 }
  82
  83 /* Record a note TYPE at byte POS into the current cleaned logical
  84    line.  */
  85 static void
  86 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
  87 {
  88   if (buffer->notes_used == buffer->notes_cap)
  89     {
  90       buffer->notes_cap = buffer->notes_cap * 2 + 200;
  91       buffer->notes = xrealloc (buffer->notes,
  92                                 buffer->notes_cap * sizeof (_cpp_line_note));
  93     }
  94
  95   buffer->notes[buffer->notes_used].pos = pos;
  96   buffer->notes[buffer->notes_used].type = type;
  97   buffer->notes_used++;
  98 }
  99
 100 /* Returns with a logical line that contains no escaped newlines or
 101    trigraphs.  This is a time-critical inner loop.  */
 102 void
 103 _cpp_clean_line (cpp_reader *pfile)
 104 {
 105   cpp_buffer *buffer;
 106   const uchar *s;
 107   uchar c, *d, *p;
 108
 109   buffer = pfile->buffer;
 110   buffer->cur_note = buffer->notes_used = 0;
 111   buffer->cur = buffer->line_base = buffer->next_line;
 112   buffer->need_line = false;
 113   s = buffer->next_line - 1;
 114
 115   if (!buffer->from_stage3)
 116     {
 117       /* Short circuit for the common case of an un-escaped line with
 118          no trigraphs.  The primary win here is by not writing any
 119          data back to memory until we have to.  */
 120       for (;;)
 121         {
 122           c = *++s;
 123           if (c == '\n' || c == '\r')
 124             {
 125               d = (uchar *) s;
 126
 127               if (s == buffer->rlimit)
 128                 goto done;
 129
 130               /* DOS line ending? */
 131               if (c == '\r' && s[1] == '\n')
 132                 s++;
 133
 134               if (s == buffer->rlimit)
 135                 goto done;
 136
 137               /* check for escaped newline */
 138               p = d;
 139               while (p != buffer->next_line && is_nvspace (p[-1]))
 140                 p--;
 141               if (p == buffer->next_line || p[-1] != '\\')
 142                 goto done;
 143
 144               /* Have an escaped newline; process it and proceed to
 145                  the slow path.  */
 146               add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
 147               d = p - 2;
 148               buffer->next_line = p - 1;
 149               break;
 150             }
 151           if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
 152             {
 153               /* Have a trigraph.  We may or may not have to convert
 154                  it.  Add a line note regardless, for -Wtrigraphs.  */
 155               add_line_note (buffer, s, s[2]);
 156               if (CPP_OPTION (pfile, trigraphs))
 157                 {
 158                   /* We do, and that means we have to switch to the
 159                      slow path.  */
 160                   d = (uchar *) s;
 161                   *d = _cpp_trigraph_map[s[2]];
 162                   s += 2;
 163                   break;
 164                 }
 165             }
 166         }
 167
 168
 169       for (;;)
 170         {
 171           c = *++s;
 172           *++d = c;
 173
 174           if (c == '\n' || c == '\r')
 175             {
 176                   /* Handle DOS line endings.  */
 177               if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
 178                 s++;
 179               if (s == buffer->rlimit)
 180                 break;
 181
 182               /* Escaped?  */
 183               p = d;
 184               while (p != buffer->next_line && is_nvspace (p[-1]))
 185                 p--;
 186               if (p == buffer->next_line || p[-1] != '\\')
 187                 break;
 188
 189               add_line_note (buffer, p - 1, p != d ? ' ': '\\');
 190               d = p - 2;
 191               buffer->next_line = p - 1;
 192             }
 193           else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
 194             {
 195               /* Add a note regardless, for the benefit of -Wtrigraphs.  */
 196               add_line_note (buffer, d, s[2]);
 197               if (CPP_OPTION (pfile, trigraphs))
 198                 {
 199                   *d = _cpp_trigraph_map[s[2]];
 200                   s += 2;
 201                 }
 202             }
 203         }
 204     }
 205   else
 206     {
 207       do
 208         s++;
 209       while (*s != '\n' && *s != '\r');
 210       d = (uchar *) s;
 211
 212       /* Handle DOS line endings.  */
 213       if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
 214         s++;
 215     }
 216
 217  done:
 218   *d = '\n';
 219   /* A sentinel note that should never be processed.  */
 220   add_line_note (buffer, d + 1, '\n');
 221   buffer->next_line = s + 1;
 222 }
 223
 224 /* Return true if the trigraph indicated by NOTE should be warned
 225    about in a comment.  */
 226 static bool
 227 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
 228 {
 229   const uchar *p;
 230
 231   /* Within comments we don't warn about trigraphs, unless the
 232      trigraph forms an escaped newline, as that may change
 233      behavior.  */
 234   if (note->type != '/')
 235     return false;
 236
 237   /* If -trigraphs, then this was an escaped newline iff the next note
 238      is coincident.  */
 239   if (CPP_OPTION (pfile, trigraphs))
 240     return note[1].pos == note->pos;
 241
 242   /* Otherwise, see if this forms an escaped newline.  */
 243   p = note->pos + 3;
 244   while (is_nvspace (*p))
 245     p++;
 246
 247   /* There might have been escaped newlines between the trigraph and the
 248      newline we found.  Hence the position test.  */
 249   return (*p == '\n' && p < note[1].pos);
 250 }
 251
 252 /* Process the notes created by add_line_note as far as the current
 253    location.  */
 254 void
 255 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
 256 {
 257   cpp_buffer *buffer = pfile->buffer;
 258
 259   for (;;)
 260     {
 261       _cpp_line_note *note = &buffer->notes[buffer->cur_note];
 262       unsigned int col;
 263
 264       if (note->pos > buffer->cur)
 265         break;
 266
 267       buffer->cur_note++;
 268       col = CPP_BUF_COLUMN (buffer, note->pos + 1);
 269
 270       if (note->type == '\\' || note->type == ' ')
 271         {
 272           if (note->type == ' ' && !in_comment)
 273             cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
 274                                  "backslash and newline separated by space");
 275
 276           if (buffer->next_line > buffer->rlimit)
 277             {
 278               cpp_error_with_line (pfile, DL_PEDWARN, pfile->line, col,
 279                                    "backslash-newline at end of file");
 280               /* Prevent "no newline at end of file" warning.  */
 281               buffer->next_line = buffer->rlimit;
 282             }
 283
 284           buffer->line_base = note->pos;
 285           pfile->line++;
 286         }
 287       else if (_cpp_trigraph_map[note->type])
 288         {
 289           if (CPP_OPTION (pfile, warn_trigraphs)
 290               && (!in_comment || warn_in_comment (pfile, note)))
 291             {
 292               if (CPP_OPTION (pfile, trigraphs))
 293                 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
 294                                      "trigraph ??%c converted to %c",
 295                                      note->type,
 296                                      (int) _cpp_trigraph_map[note->type]);
 297               else
 298                 {
 299                   cpp_error_with_line
 300                     (pfile, DL_WARNING, pfile->line, col,
 301                      "trigraph ??%c ignored, use -trigraphs to enable",
 302                      note->type);
 303                 }
 304             }
 305         }
 306       else
 307         abort ();
 308     }
 309 }
 310
 311 /* Skip a C-style block comment.  We find the end of the comment by
 312    seeing if an asterisk is before every '/' we encounter.  Returns
 313    nonzero if comment terminated by EOF, zero otherwise.
 314
 315    Buffer->cur points to the initial asterisk of the comment.  */
 316 bool
 317 _cpp_skip_block_comment (cpp_reader *pfile)
 318 {
 319   cpp_buffer *buffer = pfile->buffer;
 320   const uchar *cur = buffer->cur;
 321   uchar c;
 322
 323   cur++;
 324   if (*cur == '/')
 325     cur++;
 326
 327   for (;;)
 328     {
 329       /* People like decorating comments with '*', so check for '/'
 330          instead for efficiency.  */
 331       c = *cur++;
 332
 333       if (c == '/')
 334         {
 335           if (cur[-2] == '*')
 336             break;
 337
 338           /* Warn about potential nested comments, but not if the '/'
 339              comes immediately before the true comment delimiter.
 340              Don't bother to get it right across escaped newlines.  */
 341           if (CPP_OPTION (pfile, warn_comments)
 342               && cur[0] == '*' && cur[1] != '/')
 343             {
 344               buffer->cur = cur;
 345               cpp_error_with_line (pfile, DL_WARNING,
 346                                    pfile->line, CPP_BUF_COL (buffer),
 347                                    "\"/*\" within comment");
 348             }
 349         }
 350       else if (c == '\n')
 351         {
 352           buffer->cur = cur - 1;
 353           _cpp_process_line_notes (pfile, true);
 354           if (buffer->next_line >= buffer->rlimit)
 355             return true;
 356           _cpp_clean_line (pfile);
 357           pfile->line++;
 358           cur = buffer->cur;
 359         }
 360     }
 361
 362   buffer->cur = cur;
 363   _cpp_process_line_notes (pfile, true);
 364   return false;
 365 }
 366
 367 /* Skip a C++ line comment, leaving buffer->cur pointing to the
 368    terminating newline.  Handles escaped newlines.  Returns nonzero
 369    if a multiline comment.  */
 370 static int
 371 skip_line_comment (cpp_reader *pfile)
 372 {
 373   cpp_buffer *buffer = pfile->buffer;
 374   unsigned int orig_line = pfile->line;
 375
 376   while (*buffer->cur != '\n')
 377     buffer->cur++;
 378
 379   _cpp_process_line_notes (pfile, true);
 380   return orig_line != pfile->line;
 381 }
 382
 383 /* Skips whitespace, saving the next non-whitespace character.  */
 384 static void
 385 skip_whitespace (cpp_reader *pfile, cppchar_t c)
 386 {
 387   cpp_buffer *buffer = pfile->buffer;
 388   bool saw_NUL = false;
 389
 390   do
 391     {
 392       /* Horizontal space always OK.  */
 393       if (c == ' ' || c == '\t')
 394         ;
 395       /* Just \f \v or \0 left.  */
 396       else if (c == '\0')
 397         saw_NUL = true;
 398       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 399         cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
 400                              CPP_BUF_COL (buffer),
 401                              "%s in preprocessing directive",
 402                              c == '\f' ? "form feed" : "vertical tab");
 403
 404       c = *buffer->cur++;
 405     }
 406   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 407   while (is_nvspace (c));
 408
 409   if (saw_NUL)
 410     cpp_error (pfile, DL_WARNING, "null character(s) ignored");
 411
 412   buffer->cur--;
 413 }
 414
 415 /* See if the characters of a number token are valid in a name (no
 416    '.', '+' or '-').  */
 417 static int
 418 name_p (cpp_reader *pfile, const cpp_string *string)
 419 {
 420   unsigned int i;
 421
 422   for (i = 0; i < string->len; i++)
 423     if (!is_idchar (string->text[i]))
 424       return 0;
 425
 426   return 1;
 427 }
 428
 429 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
 430    an identifier.  FIRST is TRUE if this starts an identifier.  */
 431 static bool
 432 forms_identifier_p (cpp_reader *pfile, int first)
 433 {
 434   cpp_buffer *buffer = pfile->buffer;
 435
 436   if (*buffer->cur == '$')
 437     {
 438       if (!CPP_OPTION (pfile, dollars_in_ident))
 439         return false;
 440
 441       buffer->cur++;
 442       if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
 443         {
 444           CPP_OPTION (pfile, warn_dollars) = 0;
 445           cpp_error (pfile, DL_PEDWARN, "'$' in identifier or number");
 446         }
 447
 448       return true;
 449     }
 450
 451   /* Is this a syntactically valid UCN?  */
 452   if (0 && *buffer->cur == '\\'
 453       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
 454     {
 455       buffer->cur += 2;
 456       if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first))
 457         return true;
 458       buffer->cur -= 2;
 459     }
 460
 461   return false;
 462 }
 463
 464 /* Lex an identifier starting at BUFFER->CUR - 1.  */
 465 static cpp_hashnode *
 466 lex_identifier (cpp_reader *pfile, const uchar *base)
 467 {
 468   cpp_hashnode *result;
 469   const uchar *cur;
 470
 471   do
 472     {
 473       cur = pfile->buffer->cur;
 474
 475       /* N.B. ISIDNUM does not include $.  */
 476       while (ISIDNUM (*cur))
 477         cur++;
 478
 479       pfile->buffer->cur = cur;
 480     }
 481   while (forms_identifier_p (pfile, false));
 482
 483   result = (cpp_hashnode *)
 484     ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
 485
 486   /* Rarely, identifiers require diagnostics when lexed.  */
 487   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 488                         && !pfile->state.skipping, 0))
 489     {
 490       /* It is allowed to poison the same identifier twice.  */
 491       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 492         cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
 493                    NODE_NAME (result));
 494
 495       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 496          replacement list of a variadic macro.  */
 497       if (result == pfile->spec_nodes.n__VA_ARGS__
 498           && !pfile->state.va_args_ok)
 499         cpp_error (pfile, DL_PEDWARN,
 500                    "__VA_ARGS__ can only appear in the expansion"
 501                    " of a C99 variadic macro");
 502     }
 503
 504   return result;
 505 }
 506
 507 /* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
 508 static void
 509 lex_number (cpp_reader *pfile, cpp_string *number)
 510 {
 511   const uchar *cur;
 512   const uchar *base;
 513   uchar *dest;
 514
 515   base = pfile->buffer->cur - 1;
 516   do
 517     {
 518       cur = pfile->buffer->cur;
 519
 520       /* N.B. ISIDNUM does not include $.  */
 521       while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
 522         cur++;
 523
 524       pfile->buffer->cur = cur;
 525     }
 526   while (forms_identifier_p (pfile, false));
 527
 528   number->len = cur - base;
 529   dest = _cpp_unaligned_alloc (pfile, number->len + 1);
 530   memcpy (dest, base, number->len);
 531   dest[number->len] = '\0';
 532   number->text = dest;
 533 }
 534
 535 /* Create a token of type TYPE with a literal spelling.  */
 536 static void
 537 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
 538                 unsigned int len, enum cpp_ttype type)
 539 {
 540   uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
 541
 542   memcpy (dest, base, len);
 543   dest[len] = '\0';
 544   token->type = type;
 545   token->val.str.len = len;
 546   token->val.str.text = dest;
 547 }
 548
 549 /* Lexes a string, character constant, or angle-bracketed header file
 550    name.  The stored string contains the spelling, including opening
 551    quote and leading any leading 'L'.  It returns the type of the
 552    literal, or CPP_OTHER if it was not properly terminated.
 553
 554    The spelling is NUL-terminated, but it is not guaranteed that this
 555    is the first NUL since embedded NULs are preserved.  */
 556 static void
 557 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
 558 {
 559   bool saw_NUL = false;
 560   const uchar *cur;
 561   cppchar_t terminator;
 562   enum cpp_ttype type;
 563
 564   cur = base;
 565   terminator = *cur++;
 566   if (terminator == 'L')
 567     terminator = *cur++;
 568   if (terminator == '\"')
 569     type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
 570   else if (terminator == '\'')
 571     type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
 572   else
 573     terminator = '>', type = CPP_HEADER_NAME;
 574
 575   for (;;)
 576     {
 577       cppchar_t c = *cur++;
 578
 579       /* In #include-style directives, terminators are not escapable.  */
 580       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
 581         cur++;
 582       else if (c == terminator)
 583         break;
 584       else if (c == '\n')
 585         {
 586           cur--;
 587           type = CPP_OTHER;
 588           break;
 589         }
 590       else if (c == '\0')
 591         saw_NUL = true;
 592     }
 593
 594   if (saw_NUL && !pfile->state.skipping)
 595     cpp_error (pfile, DL_WARNING, "null character(s) preserved in literal");
 596
 597   pfile->buffer->cur = cur;
 598   create_literal (pfile, token, base, cur - base, type);
 599 }
 600
 601 /* The stored comment includes the comment start and any terminator.  */
 602 static void
 603 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
 604               cppchar_t type)
 605 {
 606   unsigned char *buffer;
 607   unsigned int len, clen;
 608
 609   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 610
 611   /* C++ comments probably (not definitely) have moved past a new
 612      line, which we don't want to save in the comment.  */
 613   if (is_vspace (pfile->buffer->cur[-1]))
 614     len--;
 615
 616   /* If we are currently in a directive, then we need to store all
 617      C++ comments as C comments internally, and so we need to
 618      allocate a little extra space in that case.
 619
 620      Note that the only time we encounter a directive here is
 621      when we are saving comments in a "#define".  */
 622   clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
 623
 624   buffer = _cpp_unaligned_alloc (pfile, clen);
 625
 626   token->type = CPP_COMMENT;
 627   token->val.str.len = clen;
 628   token->val.str.text = buffer;
 629
 630   buffer[0] = '/';
 631   memcpy (buffer + 1, from, len - 1);
 632
 633   /* Finish conversion to a C comment, if necessary.  */
 634   if (pfile->state.in_directive && type == '/')
 635     {
 636       buffer[1] = '*';
 637       buffer[clen - 2] = '*';
 638       buffer[clen - 1] = '/';
 639     }
 640 }
 641
 642 /* Allocate COUNT tokens for RUN.  */
 643 void
 644 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
 645 {
 646   run->base = xnewvec (cpp_token, count);
 647   run->limit = run->base + count;
 648   run->next = NULL;
 649 }
 650
 651 /* Returns the next tokenrun, or creates one if there is none.  */
 652 static tokenrun *
 653 next_tokenrun (tokenrun *run)
 654 {
 655   if (run->next == NULL)
 656     {
 657       run->next = xnew (tokenrun);
 658       run->next->prev = run;
 659       _cpp_init_tokenrun (run->next, 250);
 660     }
 661
 662   return run->next;
 663 }
 664
 665 /* Allocate a single token that is invalidated at the same time as the
 666    rest of the tokens on the line.  Has its line and col set to the
 667    same as the last lexed token, so that diagnostics appear in the
 668    right place.  */
 669 cpp_token *
 670 _cpp_temp_token (cpp_reader *pfile)
 671 {
 672   cpp_token *old, *result;
 673
 674   old = pfile->cur_token - 1;
 675   if (pfile->cur_token == pfile->cur_run->limit)
 676     {
 677       pfile->cur_run = next_tokenrun (pfile->cur_run);
 678       pfile->cur_token = pfile->cur_run->base;
 679     }
 680
 681   result = pfile->cur_token++;
 682   result->line = old->line;
 683   result->col = old->col;
 684   return result;
 685 }
 686
 687 /* Lex a token into RESULT (external interface).  Takes care of issues
 688    like directive handling, token lookahead, multiple include
 689    optimization and skipping.  */
 690 const cpp_token *
 691 _cpp_lex_token (cpp_reader *pfile)
 692 {
 693   cpp_token *result;
 694
 695   for (;;)
 696     {
 697       if (pfile->cur_token == pfile->cur_run->limit)
 698         {
 699           pfile->cur_run = next_tokenrun (pfile->cur_run);
 700           pfile->cur_token = pfile->cur_run->base;
 701         }
 702
 703       if (pfile->lookaheads)
 704         {
 705           pfile->lookaheads--;
 706           result = pfile->cur_token++;
 707         }
 708       else
 709         result = _cpp_lex_direct (pfile);
 710
 711       if (result->flags & BOL)
 712         {
 713           /* Is this a directive.  If _cpp_handle_directive returns
 714              false, it is an assembler #.  */
 715           if (result->type == CPP_HASH
 716               /* 6.10.3 p 11: Directives in a list of macro arguments
 717                  gives undefined behavior.  This implementation
 718                  handles the directive as normal.  */
 719               && pfile->state.parsing_args != 1
 720               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
 721             continue;
 722           if (pfile->cb.line_change && !pfile->state.skipping)
 723             pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
 724         }
 725
 726       /* We don't skip tokens in directives.  */
 727       if (pfile->state.in_directive)
 728         break;
 729
 730       /* Outside a directive, invalidate controlling macros.  At file
 731          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
 732          get here and MI optimization works.  */
 733       pfile->mi_valid = false;
 734
 735       if (!pfile->state.skipping || result->type == CPP_EOF)
 736         break;
 737     }
 738
 739   return result;
 740 }
 741
 742 /* Returns true if a fresh line has been loaded.  */
 743 bool
 744 _cpp_get_fresh_line (cpp_reader *pfile)
 745 {
 746   /* We can't get a new line until we leave the current directive.  */
 747   if (pfile->state.in_directive)
 748     return false;
 749
 750   for (;;)
 751     {
 752       cpp_buffer *buffer = pfile->buffer;
 753
 754       if (!buffer->need_line)
 755         return true;
 756
 757       if (buffer->next_line < buffer->rlimit)
 758         {
 759           _cpp_clean_line (pfile);
 760           return true;
 761         }
 762
 763       /* First, get out of parsing arguments state.  */
 764       if (pfile->state.parsing_args)
 765         return false;
 766
 767       /* End of buffer.  Non-empty files should end in a newline.  */
 768       if (buffer->buf != buffer->rlimit
 769           && buffer->next_line > buffer->rlimit
 770           && !buffer->from_stage3)
 771         {
 772           /* Only warn once.  */
 773           buffer->next_line = buffer->rlimit;
 774           cpp_error_with_line (pfile, DL_PEDWARN, pfile->line - 1,
 775                                CPP_BUF_COLUMN (buffer, buffer->cur),
 776                                "no newline at end of file");
 777         }
 778
 779       _cpp_pop_buffer (pfile);
 780       if (pfile->buffer == NULL)
 781         return false;
 782     }
 783 }
 784
 785 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)          \
 786   do                                                    \
 787     {                                                   \
 788       result->type = ELSE_TYPE;                         \
 789       if (*buffer->cur == CHAR)                         \
 790         buffer->cur++, result->type = THEN_TYPE;        \
 791     }                                                   \
 792   while (0)
 793
 794 /* Lex a token into pfile->cur_token, which is also incremented, to
 795    get diagnostics pointing to the correct location.
 796
 797    Does not handle issues such as token lookahead, multiple-include
 798    optimization, directives, skipping etc.  This function is only
 799    suitable for use by _cpp_lex_token, and in special cases like
 800    lex_expansion_token which doesn't care for any of these issues.
 801
 802    When meeting a newline, returns CPP_EOF if parsing a directive,
 803    otherwise returns to the start of the token buffer if permissible.
 804    Returns the location of the lexed token.  */
 805 cpp_token *
 806 _cpp_lex_direct (cpp_reader *pfile)
 807 {
 808   cppchar_t c;
 809   cpp_buffer *buffer;
 810   const unsigned char *comment_start;
 811   cpp_token *result = pfile->cur_token++;
 812
 813  fresh_line:
 814   result->flags = 0;
 815   buffer = pfile->buffer;
 816   if (buffer->need_line)
 817     {
 818       if (!_cpp_get_fresh_line (pfile))
 819         {
 820           result->type = CPP_EOF;
 821           if (!pfile->state.in_directive)
 822             {
 823               /* Tell the compiler the line number of the EOF token.  */
 824               result->line = pfile->line;
 825               result->flags = BOL;
 826             }
 827           return result;
 828         }
 829       if (!pfile->keep_tokens)
 830         {
 831           pfile->cur_run = &pfile->base_run;
 832           result = pfile->base_run.base;
 833           pfile->cur_token = result + 1;
 834         }
 835       result->flags = BOL;
 836       if (pfile->state.parsing_args == 2)
 837         result->flags |= PREV_WHITE;
 838     }
 839   buffer = pfile->buffer;
 840  update_tokens_line:
 841   result->line = pfile->line;
 842
 843  skipped_white:
 844   if (buffer->cur >= buffer->notes[buffer->cur_note].pos
 845       && !pfile->overlaid_buffer)
 846     {
 847       _cpp_process_line_notes (pfile, false);
 848       result->line = pfile->line;
 849     }
 850   c = *buffer->cur++;
 851   result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
 852
 853   switch (c)
 854     {
 855     case ' ': case '\t': case '\f': case '\v': case '\0':
 856       result->flags |= PREV_WHITE;
 857       skip_whitespace (pfile, c);
 858       goto skipped_white;
 859
 860     case '\n':
 861       pfile->line++;
 862       buffer->need_line = true;
 863       goto fresh_line;
 864
 865     case '0': case '1': case '2': case '3': case '4':
 866     case '5': case '6': case '7': case '8': case '9':
 867       result->type = CPP_NUMBER;
 868       lex_number (pfile, &result->val.str);
 869       break;
 870
 871     case 'L':
 872       /* 'L' may introduce wide characters or strings.  */
 873       if (*buffer->cur == '\'' || *buffer->cur == '"')
 874         {
 875           lex_string (pfile, result, buffer->cur - 1);
 876           break;
 877         }
 878       /* Fall through.  */
 879
 880     case '_':
 881     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 882     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 883     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 884     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 885     case 'y': case 'z':
 886     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 887     case 'G': case 'H': case 'I': case 'J': case 'K':
 888     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 889     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 890     case 'Y': case 'Z':
 891       result->type = CPP_NAME;
 892       result->val.node = lex_identifier (pfile, buffer->cur - 1);
 893
 894       /* Convert named operators to their proper types.  */
 895       if (result->val.node->flags & NODE_OPERATOR)
 896         {
 897           result->flags |= NAMED_OP;
 898           result->type = result->val.node->directive_index;
 899         }
 900       break;
 901
 902     case '\'':
 903     case '"':
 904       lex_string (pfile, result, buffer->cur - 1);
 905       break;
 906
 907     case '/':
 908       /* A potential block or line comment.  */
 909       comment_start = buffer->cur;
 910       c = *buffer->cur;
 911
 912       if (c == '*')
 913         {
 914           if (_cpp_skip_block_comment (pfile))
 915             cpp_error (pfile, DL_ERROR, "unterminated comment");
 916         }
 917       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
 918                             || CPP_IN_SYSTEM_HEADER (pfile)))
 919         {
 920           /* Warn about comments only if pedantically GNUC89, and not
 921              in system headers.  */
 922           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
 923               && ! buffer->warned_cplusplus_comments)
 924             {
 925               cpp_error (pfile, DL_PEDWARN,
 926                          "C++ style comments are not allowed in ISO C90");
 927               cpp_error (pfile, DL_PEDWARN,
 928                          "(this will be reported only once per input file)");
 929               buffer->warned_cplusplus_comments = 1;
 930             }
 931
 932           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
 933             cpp_error (pfile, DL_WARNING, "multi-line comment");
 934         }
 935       else if (c == '=')
 936         {
 937           buffer->cur++;
 938           result->type = CPP_DIV_EQ;
 939           break;
 940         }
 941       else
 942         {
 943           result->type = CPP_DIV;
 944           break;
 945         }
 946
 947       if (!pfile->state.save_comments)
 948         {
 949           result->flags |= PREV_WHITE;
 950           goto update_tokens_line;
 951         }
 952
 953       /* Save the comment as a token in its own right.  */
 954       save_comment (pfile, result, comment_start, c);
 955       break;
 956
 957     case '<':
 958       if (pfile->state.angled_headers)
 959         {
 960           lex_string (pfile, result, buffer->cur - 1);
 961           break;
 962         }
 963
 964       result->type = CPP_LESS;
 965       if (*buffer->cur == '=')
 966         buffer->cur++, result->type = CPP_LESS_EQ;
 967       else if (*buffer->cur == '<')
 968         {
 969           buffer->cur++;
 970           IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
 971         }
 972       else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
 973         {
 974           buffer->cur++;
 975           IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
 976         }
 977       else if (CPP_OPTION (pfile, digraphs))
 978         {
 979           if (*buffer->cur == ':')
 980             {
 981               buffer->cur++;
 982               result->flags |= DIGRAPH;
 983               result->type = CPP_OPEN_SQUARE;
 984             }
 985           else if (*buffer->cur == '%')
 986             {
 987               buffer->cur++;
 988               result->flags |= DIGRAPH;
 989               result->type = CPP_OPEN_BRACE;
 990             }
 991         }
 992       break;
 993
 994     case '>':
 995       result->type = CPP_GREATER;
 996       if (*buffer->cur == '=')
 997         buffer->cur++, result->type = CPP_GREATER_EQ;
 998       else if (*buffer->cur == '>')
 999         {
1000           buffer->cur++;
1001           IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1002         }
1003       else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
1004         {
1005           buffer->cur++;
1006           IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1007         }
1008       break;
1009
1010     case '%':
1011       result->type = CPP_MOD;
1012       if (*buffer->cur == '=')
1013         buffer->cur++, result->type = CPP_MOD_EQ;
1014       else if (CPP_OPTION (pfile, digraphs))
1015         {
1016           if (*buffer->cur == ':')
1017             {
1018               buffer->cur++;
1019               result->flags |= DIGRAPH;
1020               result->type = CPP_HASH;
1021               if (*buffer->cur == '%' && buffer->cur[1] == ':')
1022                 buffer->cur += 2, result->type = CPP_PASTE;
1023             }
1024           else if (*buffer->cur == '>')
1025             {
1026               buffer->cur++;
1027               result->flags |= DIGRAPH;
1028               result->type = CPP_CLOSE_BRACE;
1029             }
1030         }
1031       break;
1032
1033     case '.':
1034       result->type = CPP_DOT;
1035       if (ISDIGIT (*buffer->cur))
1036         {
1037           result->type = CPP_NUMBER;
1038           lex_number (pfile, &result->val.str);
1039         }
1040       else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1041         buffer->cur += 2, result->type = CPP_ELLIPSIS;
1042       else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1043         buffer->cur++, result->type = CPP_DOT_STAR;
1044       break;
1045
1046     case '+':
1047       result->type = CPP_PLUS;
1048       if (*buffer->cur == '+')
1049         buffer->cur++, result->type = CPP_PLUS_PLUS;
1050       else if (*buffer->cur == '=')
1051         buffer->cur++, result->type = CPP_PLUS_EQ;
1052       break;
1053
1054     case '-':
1055       result->type = CPP_MINUS;
1056       if (*buffer->cur == '>')
1057         {
1058           buffer->cur++;
1059           result->type = CPP_DEREF;
1060           if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1061             buffer->cur++, result->type = CPP_DEREF_STAR;
1062         }
1063       else if (*buffer->cur == '-')
1064         buffer->cur++, result->type = CPP_MINUS_MINUS;
1065       else if (*buffer->cur == '=')
1066         buffer->cur++, result->type = CPP_MINUS_EQ;
1067       break;
1068
1069     case '&':
1070       result->type = CPP_AND;
1071       if (*buffer->cur == '&')
1072         buffer->cur++, result->type = CPP_AND_AND;
1073       else if (*buffer->cur == '=')
1074         buffer->cur++, result->type = CPP_AND_EQ;
1075       break;
1076
1077     case '|':
1078       result->type = CPP_OR;
1079       if (*buffer->cur == '|')
1080         buffer->cur++, result->type = CPP_OR_OR;
1081       else if (*buffer->cur == '=')
1082         buffer->cur++, result->type = CPP_OR_EQ;
1083       break;
1084
1085     case ':':
1086       result->type = CPP_COLON;
1087       if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1088         buffer->cur++, result->type = CPP_SCOPE;
1089       else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1090         {
1091           buffer->cur++;
1092           result->flags |= DIGRAPH;
1093           result->type = CPP_CLOSE_SQUARE;
1094         }
1095       break;
1096
1097     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1098     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1099     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1100     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1101     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1102
1103     case '?': result->type = CPP_QUERY; break;
1104     case '~': result->type = CPP_COMPL; break;
1105     case ',': result->type = CPP_COMMA; break;
1106     case '(': result->type = CPP_OPEN_PAREN; break;
1107     case ')': result->type = CPP_CLOSE_PAREN; break;
1108     case '[': result->type = CPP_OPEN_SQUARE; break;
1109     case ']': result->type = CPP_CLOSE_SQUARE; break;
1110     case '{': result->type = CPP_OPEN_BRACE; break;
1111     case '}': result->type = CPP_CLOSE_BRACE; break;
1112     case ';': result->type = CPP_SEMICOLON; break;
1113
1114       /* @ is a punctuator in Objective-C.  */
1115     case '@': result->type = CPP_ATSIGN; break;
1116
1117     case '$':
1118     case '\\':
1119       {
1120         const uchar *base = --buffer->cur;
1121
1122         if (forms_identifier_p (pfile, true))
1123           {
1124             result->type = CPP_NAME;
1125             result->val.node = lex_identifier (pfile, base);
1126             break;
1127           }
1128         buffer->cur++;
1129       }
1130
1131     default:
1132       create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1133       break;
1134     }
1135
1136   return result;
1137 }
1138
1139 /* An upper bound on the number of bytes needed to spell TOKEN.
1140    Does not include preceding whitespace.  */
1141 unsigned int
1142 cpp_token_len (const cpp_token *token)
1143 {
1144   unsigned int len;
1145
1146   switch (TOKEN_SPELL (token))
1147     {
1148     default:            len = 4;                                break;
1149     case SPELL_LITERAL: len = token->val.str.len;               break;
1150     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1151     }
1152
1153   return len;
1154 }
1155
1156 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1157    already contain the enough space to hold the token's spelling.
1158    Returns a pointer to the character after the last character written.
1159    FIXME: Would be nice if we didn't need the PFILE argument.  */
1160 unsigned char *
1161 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1162                  unsigned char *buffer)
1163 {
1164   switch (TOKEN_SPELL (token))
1165     {
1166     case SPELL_OPERATOR:
1167       {
1168         const unsigned char *spelling;
1169         unsigned char c;
1170
1171         if (token->flags & DIGRAPH)
1172           spelling
1173             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1174         else if (token->flags & NAMED_OP)
1175           goto spell_ident;
1176         else
1177           spelling = TOKEN_NAME (token);
1178
1179         while ((c = *spelling++) != '\0')
1180           *buffer++ = c;
1181       }
1182       break;
1183
1184     spell_ident:
1185     case SPELL_IDENT:
1186       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1187       buffer += NODE_LEN (token->val.node);
1188       break;
1189
1190     case SPELL_LITERAL:
1191       memcpy (buffer, token->val.str.text, token->val.str.len);
1192       buffer += token->val.str.len;
1193       break;
1194
1195     case SPELL_NONE:
1196       cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
1197       break;
1198     }
1199
1200   return buffer;
1201 }
1202
1203 /* Returns TOKEN spelt as a null-terminated string.  The string is
1204    freed when the reader is destroyed.  Useful for diagnostics.  */
1205 unsigned char *
1206 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1207 {
1208   unsigned int len = cpp_token_len (token) + 1;
1209   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1210
1211   end = cpp_spell_token (pfile, token, start);
1212   end[0] = '\0';
1213
1214   return start;
1215 }
1216
1217 /* Used by C front ends, which really should move to using
1218    cpp_token_as_text.  */
1219 const char *
1220 cpp_type2name (enum cpp_ttype type)
1221 {
1222   return (const char *) token_spellings[type].name;
1223 }
1224
1225 /* Writes the spelling of token to FP, without any preceding space.
1226    Separated from cpp_spell_token for efficiency - to avoid stdio
1227    double-buffering.  */
1228 void
1229 cpp_output_token (const cpp_token *token, FILE *fp)
1230 {
1231   switch (TOKEN_SPELL (token))
1232     {
1233     case SPELL_OPERATOR:
1234       {
1235         const unsigned char *spelling;
1236         int c;
1237
1238         if (token->flags & DIGRAPH)
1239           spelling
1240             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1241         else if (token->flags & NAMED_OP)
1242           goto spell_ident;
1243         else
1244           spelling = TOKEN_NAME (token);
1245
1246         c = *spelling;
1247         do
1248           putc (c, fp);
1249         while ((c = *++spelling) != '\0');
1250       }
1251       break;
1252
1253     spell_ident:
1254     case SPELL_IDENT:
1255       fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1256     break;
1257
1258     case SPELL_LITERAL:
1259       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1260       break;
1261
1262     case SPELL_NONE:
1263       /* An error, most probably.  */
1264       break;
1265     }
1266 }
1267
1268 /* Compare two tokens.  */
1269 int
1270 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1271 {
1272   if (a->type == b->type && a->flags == b->flags)
1273     switch (TOKEN_SPELL (a))
1274       {
1275       default:                  /* Keep compiler happy.  */
1276       case SPELL_OPERATOR:
1277         return 1;
1278       case SPELL_NONE:
1279         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1280       case SPELL_IDENT:
1281         return a->val.node == b->val.node;
1282       case SPELL_LITERAL:
1283         return (a->val.str.len == b->val.str.len
1284                 && !memcmp (a->val.str.text, b->val.str.text,
1285                             a->val.str.len));
1286       }
1287
1288   return 0;
1289 }
1290
1291 /* Returns nonzero if a space should be inserted to avoid an
1292    accidental token paste for output.  For simplicity, it is
1293    conservative, and occasionally advises a space where one is not
1294    needed, e.g. "." and ".2".  */
1295 int
1296 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1297                  const cpp_token *token2)
1298 {
1299   enum cpp_ttype a = token1->type, b = token2->type;
1300   cppchar_t c;
1301
1302   if (token1->flags & NAMED_OP)
1303     a = CPP_NAME;
1304   if (token2->flags & NAMED_OP)
1305     b = CPP_NAME;
1306
1307   c = EOF;
1308   if (token2->flags & DIGRAPH)
1309     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1310   else if (token_spellings[b].category == SPELL_OPERATOR)
1311     c = token_spellings[b].name[0];
1312
1313   /* Quickly get everything that can paste with an '='.  */
1314   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1315     return 1;
1316
1317   switch (a)
1318     {
1319     case CPP_GREATER:   return c == '>' || c == '?';
1320     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1321     case CPP_PLUS:      return c == '+';
1322     case CPP_MINUS:     return c == '-' || c == '>';
1323     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1324     case CPP_MOD:       return c == ':' || c == '>';
1325     case CPP_AND:       return c == '&';
1326     case CPP_OR:        return c == '|';
1327     case CPP_COLON:     return c == ':' || c == '>';
1328     case CPP_DEREF:     return c == '*';
1329     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1330     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1331     case CPP_NAME:      return ((b == CPP_NUMBER
1332                                  && name_p (pfile, &token2->val.str))
1333                                 || b == CPP_NAME
1334                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1335     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1336                                 || c == '.' || c == '+' || c == '-');
1337                                       /* UCNs */
1338     case CPP_OTHER:     return ((token1->val.str.text[0] == '\\'
1339                                  && b == CPP_NAME)
1340                                 || (CPP_OPTION (pfile, objc)
1341                                     && token1->val.str.text[0] == '@'
1342                                     && (b == CPP_NAME || b == CPP_STRING)));
1343     default:            break;
1344     }
1345
1346   return 0;
1347 }
1348
1349 /* Output all the remaining tokens on the current line, and a newline
1350    character, to FP.  Leading whitespace is removed.  If there are
1351    macros, special token padding is not performed.  */
1352 void
1353 cpp_output_line (cpp_reader *pfile, FILE *fp)
1354 {
1355   const cpp_token *token;
1356
1357   token = cpp_get_token (pfile);
1358   while (token->type != CPP_EOF)
1359     {
1360       cpp_output_token (token, fp);
1361       token = cpp_get_token (pfile);
1362       if (token->flags & PREV_WHITE)
1363         putc (' ', fp);
1364     }
1365
1366   putc ('\n', fp);
1367 }
1368
1369 /* Memory buffers.  Changing these three constants can have a dramatic
1370    effect on performance.  The values here are reasonable defaults,
1371    but might be tuned.  If you adjust them, be sure to test across a
1372    range of uses of cpplib, including heavy nested function-like macro
1373    expansion.  Also check the change in peak memory usage (NJAMD is a
1374    good tool for this).  */
1375 #define MIN_BUFF_SIZE 8000
1376 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1377 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1378         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1379
1380 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1381   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1382 #endif
1383
1384 /* Create a new allocation buffer.  Place the control block at the end
1385    of the buffer, so that buffer overflows will cause immediate chaos.  */
1386 static _cpp_buff *
1387 new_buff (size_t len)
1388 {
1389   _cpp_buff *result;
1390   unsigned char *base;
1391
1392   if (len < MIN_BUFF_SIZE)
1393     len = MIN_BUFF_SIZE;
1394   len = CPP_ALIGN (len);
1395
1396   base = xmalloc (len + sizeof (_cpp_buff));
1397   result = (_cpp_buff *) (base + len);
1398   result->base = base;
1399   result->cur = base;
1400   result->limit = base + len;
1401   result->next = NULL;
1402   return result;
1403 }
1404
1405 /* Place a chain of unwanted allocation buffers on the free list.  */
1406 void
1407 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1408 {
1409   _cpp_buff *end = buff;
1410
1411   while (end->next)
1412     end = end->next;
1413   end->next = pfile->free_buffs;
1414   pfile->free_buffs = buff;
1415 }
1416
1417 /* Return a free buffer of size at least MIN_SIZE.  */
1418 _cpp_buff *
1419 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1420 {
1421   _cpp_buff *result, **p;
1422
1423   for (p = &pfile->free_buffs;; p = &(*p)->next)
1424     {
1425       size_t size;
1426
1427       if (*p == NULL)
1428         return new_buff (min_size);
1429       result = *p;
1430       size = result->limit - result->base;
1431       /* Return a buffer that's big enough, but don't waste one that's
1432          way too big.  */
1433       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1434         break;
1435     }
1436
1437   *p = result->next;
1438   result->next = NULL;
1439   result->cur = result->base;
1440   return result;
1441 }
1442
1443 /* Creates a new buffer with enough space to hold the uncommitted
1444    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
1445    the excess bytes to the new buffer.  Chains the new buffer after
1446    BUFF, and returns the new buffer.  */
1447 _cpp_buff *
1448 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1449 {
1450   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1451   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1452
1453   buff->next = new_buff;
1454   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1455   return new_buff;
1456 }
1457
1458 /* Creates a new buffer with enough space to hold the uncommitted
1459    remaining bytes of the buffer pointed to by BUFF, and at least
1460    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
1461    Chains the new buffer before the buffer pointed to by BUFF, and
1462    updates the pointer to point to the new buffer.  */
1463 void
1464 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1465 {
1466   _cpp_buff *new_buff, *old_buff = *pbuff;
1467   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1468
1469   new_buff = _cpp_get_buff (pfile, size);
1470   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1471   new_buff->next = old_buff;
1472   *pbuff = new_buff;
1473 }
1474
1475 /* Free a chain of buffers starting at BUFF.  */
1476 void
1477 _cpp_free_buff (_cpp_buff *buff)
1478 {
1479   _cpp_buff *next;
1480
1481   for (; buff; buff = next)
1482     {
1483       next = buff->next;
1484       free (buff->base);
1485     }
1486 }
1487
1488 /* Allocate permanent, unaligned storage of length LEN.  */
1489 unsigned char *
1490 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1491 {
1492   _cpp_buff *buff = pfile->u_buff;
1493   unsigned char *result = buff->cur;
1494
1495   if (len > (size_t) (buff->limit - result))
1496     {
1497       buff = _cpp_get_buff (pfile, len);
1498       buff->next = pfile->u_buff;
1499       pfile->u_buff = buff;
1500       result = buff->cur;
1501     }
1502
1503   buff->cur = result + len;
1504   return result;
1505 }
1506
1507 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1508    That buffer is used for growing allocations when saving macro
1509    replacement lists in a #define, and when parsing an answer to an
1510    assertion in #assert, #unassert or #if (and therefore possibly
1511    whilst expanding macros).  It therefore must not be used by any
1512    code that they might call: specifically the lexer and the guts of
1513    the macro expander.
1514
1515    All existing other uses clearly fit this restriction: storing
1516    registered pragmas during initialization.  */
1517 unsigned char *
1518 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1519 {
1520   _cpp_buff *buff = pfile->a_buff;
1521   unsigned char *result = buff->cur;
1522
1523   if (len > (size_t) (buff->limit - result))
1524     {
1525       buff = _cpp_get_buff (pfile, len);
1526       buff->next = pfile->a_buff;
1527       pfile->a_buff = buff;
1528       result = buff->cur;
1529     }
1530
1531   buff->cur = result + len;
1532   return result;
1533 }