gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7
   8 This program is free software; you can redistribute it and/or modify it
   9 under the terms of the GNU General Public License as published by the
  10 Free Software Foundation; either version 2, or (at your option) any
  11 later version.
  12
  13 This program is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with this program; if not, write to the Free Software
  20 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "cpplib.h"
  25 #include "cpphash.h"
  26
  27 enum spell_type
  28 {
  29   SPELL_OPERATOR = 0,
  30   SPELL_IDENT,
  31   SPELL_LITERAL,
  32   SPELL_NONE
  33 };
  34
  35 struct token_spelling
  36 {
  37   enum spell_type category;
  38   const unsigned char *name;
  39 };
  40
  41 static const unsigned char *const digraph_spellings[] =
  42 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
  43
  44 #define OP(e, s) { SPELL_OPERATOR, U s           },
  45 #define TK(e, s) { s,              U #e },
  46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  47 #undef OP
  48 #undef TK
  49
  50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  52
  53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
  54 static int skip_line_comment (cpp_reader *);
  55 static void skip_whitespace (cpp_reader *, cppchar_t);
  56 static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *);
  57 static void lex_number (cpp_reader *, cpp_string *);
  58 static bool forms_identifier_p (cpp_reader *, int);
  59 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
  60 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
  61 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
  62                             unsigned int, enum cpp_ttype);
  63 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
  64 static int name_p (cpp_reader *, const cpp_string *);
  65 static tokenrun *next_tokenrun (tokenrun *);
  66
  67 static _cpp_buff *new_buff (size_t);
  68
  69
  70 /* Utility routine:
  71
  72    Compares, the token TOKEN to the NUL-terminated string STRING.
  73    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  74 int
  75 cpp_ideq (const cpp_token *token, const char *string)
  76 {
  77   if (token->type != CPP_NAME)
  78     return 0;
  79
  80   return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
  81 }
  82
  83 /* Record a note TYPE at byte POS into the current cleaned logical
  84    line.  */
  85 static void
  86 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
  87 {
  88   if (buffer->notes_used == buffer->notes_cap)
  89     {
  90       buffer->notes_cap = buffer->notes_cap * 2 + 200;
  91       buffer->notes = xrealloc (buffer->notes,
  92                                 buffer->notes_cap * sizeof (_cpp_line_note));
  93     }
  94
  95   buffer->notes[buffer->notes_used].pos = pos;
  96   buffer->notes[buffer->notes_used].type = type;
  97   buffer->notes_used++;
  98 }
  99
 100 /* Returns with a logical line that contains no escaped newlines or
 101    trigraphs.  This is a time-critical inner loop.  */
 102 void
 103 _cpp_clean_line (cpp_reader *pfile)
 104 {
 105   cpp_buffer *buffer;
 106   const uchar *s;
 107   uchar c, *d, *p;
 108
 109   buffer = pfile->buffer;
 110   buffer->cur_note = buffer->notes_used = 0;
 111   buffer->cur = buffer->line_base = buffer->next_line;
 112   buffer->need_line = false;
 113   s = buffer->next_line - 1;
 114
 115   if (!buffer->from_stage3)
 116     {
 117       /* Short circuit for the common case of an un-escaped line with
 118          no trigraphs.  The primary win here is by not writing any
 119          data back to memory until we have to.  */
 120       for (;;)
 121         {
 122           c = *++s;
 123           if (c == '\n' || c == '\r')
 124             {
 125               d = (uchar *) s;
 126
 127               if (s == buffer->rlimit)
 128                 goto done;
 129
 130               /* DOS line ending? */
 131               if (c == '\r' && s[1] == '\n')
 132                 s++;
 133
 134               if (s == buffer->rlimit)
 135                 goto done;
 136
 137               /* check for escaped newline */
 138               p = d;
 139               while (p != buffer->next_line && is_nvspace (p[-1]))
 140                 p--;
 141               if (p == buffer->next_line || p[-1] != '\\')
 142                 goto done;
 143
 144               /* Have an escaped newline; process it and proceed to
 145                  the slow path.  */
 146               add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
 147               d = p - 2;
 148               buffer->next_line = p - 1;
 149               break;
 150             }
 151           if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
 152             {
 153               /* Have a trigraph.  We may or may not have to convert
 154                  it.  Add a line note regardless, for -Wtrigraphs.  */
 155               add_line_note (buffer, s, s[2]);
 156               if (CPP_OPTION (pfile, trigraphs))
 157                 {
 158                   /* We do, and that means we have to switch to the
 159                      slow path.  */
 160                   d = (uchar *) s;
 161                   *d = _cpp_trigraph_map[s[2]];
 162                   s += 2;
 163                   break;
 164                 }
 165             }
 166         }
 167
 168
 169       for (;;)
 170         {
 171           c = *++s;
 172           *++d = c;
 173
 174           if (c == '\n' || c == '\r')
 175             {
 176                   /* Handle DOS line endings.  */
 177               if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
 178                 s++;
 179               if (s == buffer->rlimit)
 180                 break;
 181
 182               /* Escaped?  */
 183               p = d;
 184               while (p != buffer->next_line && is_nvspace (p[-1]))
 185                 p--;
 186               if (p == buffer->next_line || p[-1] != '\\')
 187                 break;
 188
 189               add_line_note (buffer, p - 1, p != d ? ' ': '\\');
 190               d = p - 2;
 191               buffer->next_line = p - 1;
 192             }
 193           else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
 194             {
 195               /* Add a note regardless, for the benefit of -Wtrigraphs.  */
 196               add_line_note (buffer, d, s[2]);
 197               if (CPP_OPTION (pfile, trigraphs))
 198                 {
 199                   *d = _cpp_trigraph_map[s[2]];
 200                   s += 2;
 201                 }
 202             }
 203         }
 204     }
 205   else
 206     {
 207       do
 208         s++;
 209       while (*s != '\n' && *s != '\r');
 210       d = (uchar *) s;
 211
 212       /* Handle DOS line endings.  */
 213       if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
 214         s++;
 215     }
 216
 217  done:
 218   *d = '\n';
 219   /* A sentinel note that should never be processed.  */
 220   add_line_note (buffer, d + 1, '\n');
 221   buffer->next_line = s + 1;
 222 }
 223
 224 /* Return true if the trigraph indicated by NOTE should be warned
 225    about in a comment.  */
 226 static bool
 227 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
 228 {
 229   const uchar *p;
 230
 231   /* Within comments we don't warn about trigraphs, unless the
 232      trigraph forms an escaped newline, as that may change
 233      behavior.  */
 234   if (note->type != '/')
 235     return false;
 236
 237   /* If -trigraphs, then this was an escaped newline iff the next note
 238      is coincident.  */
 239   if (CPP_OPTION (pfile, trigraphs))
 240     return note[1].pos == note->pos;
 241
 242   /* Otherwise, see if this forms an escaped newline.  */
 243   p = note->pos + 3;
 244   while (is_nvspace (*p))
 245     p++;
 246
 247   /* There might have been escaped newlines between the trigraph and the
 248      newline we found.  Hence the position test.  */
 249   return (*p == '\n' && p < note[1].pos);
 250 }
 251
 252 /* Process the notes created by add_line_note as far as the current
 253    location.  */
 254 void
 255 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
 256 {
 257   cpp_buffer *buffer = pfile->buffer;
 258
 259   for (;;)
 260     {
 261       _cpp_line_note *note = &buffer->notes[buffer->cur_note];
 262       unsigned int col;
 263
 264       if (note->pos > buffer->cur)
 265         break;
 266
 267       buffer->cur_note++;
 268       col = CPP_BUF_COLUMN (buffer, note->pos + 1);
 269
 270       if (note->type == '\\' || note->type == ' ')
 271         {
 272           if (note->type == ' ' && !in_comment)
 273             cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line, col,
 274                                  "backslash and newline separated by space");
 275
 276           if (buffer->next_line > buffer->rlimit)
 277             {
 278               cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line, col,
 279                                    "backslash-newline at end of file");
 280               /* Prevent "no newline at end of file" warning.  */
 281               buffer->next_line = buffer->rlimit;
 282             }
 283
 284           buffer->line_base = note->pos;
 285           pfile->line++;
 286         }
 287       else if (_cpp_trigraph_map[note->type])
 288         {
 289           if (CPP_OPTION (pfile, warn_trigraphs)
 290               && (!in_comment || warn_in_comment (pfile, note)))
 291             {
 292               if (CPP_OPTION (pfile, trigraphs))
 293                 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line, col,
 294                                      "trigraph ??%c converted to %c",
 295                                      note->type,
 296                                      (int) _cpp_trigraph_map[note->type]);
 297               else
 298                 {
 299                   cpp_error_with_line
 300                     (pfile, CPP_DL_WARNING, pfile->line, col,
 301                      "trigraph ??%c ignored, use -trigraphs to enable",
 302                      note->type);
 303                 }
 304             }
 305         }
 306       else
 307         abort ();
 308     }
 309 }
 310
 311 /* Skip a C-style block comment.  We find the end of the comment by
 312    seeing if an asterisk is before every '/' we encounter.  Returns
 313    nonzero if comment terminated by EOF, zero otherwise.
 314
 315    Buffer->cur points to the initial asterisk of the comment.  */
 316 bool
 317 _cpp_skip_block_comment (cpp_reader *pfile)
 318 {
 319   cpp_buffer *buffer = pfile->buffer;
 320   const uchar *cur = buffer->cur;
 321   uchar c;
 322
 323   cur++;
 324   if (*cur == '/')
 325     cur++;
 326
 327   for (;;)
 328     {
 329       /* People like decorating comments with '*', so check for '/'
 330          instead for efficiency.  */
 331       c = *cur++;
 332
 333       if (c == '/')
 334         {
 335           if (cur[-2] == '*')
 336             break;
 337
 338           /* Warn about potential nested comments, but not if the '/'
 339              comes immediately before the true comment delimiter.
 340              Don't bother to get it right across escaped newlines.  */
 341           if (CPP_OPTION (pfile, warn_comments)
 342               && cur[0] == '*' && cur[1] != '/')
 343             {
 344               buffer->cur = cur;
 345               cpp_error_with_line (pfile, CPP_DL_WARNING,
 346                                    pfile->line, CPP_BUF_COL (buffer),
 347                                    "\"/*\" within comment");
 348             }
 349         }
 350       else if (c == '\n')
 351         {
 352           buffer->cur = cur - 1;
 353           _cpp_process_line_notes (pfile, true);
 354           if (buffer->next_line >= buffer->rlimit)
 355             return true;
 356           _cpp_clean_line (pfile);
 357           pfile->line++;
 358           cur = buffer->cur;
 359         }
 360     }
 361
 362   buffer->cur = cur;
 363   _cpp_process_line_notes (pfile, true);
 364   return false;
 365 }
 366
 367 /* Skip a C++ line comment, leaving buffer->cur pointing to the
 368    terminating newline.  Handles escaped newlines.  Returns nonzero
 369    if a multiline comment.  */
 370 static int
 371 skip_line_comment (cpp_reader *pfile)
 372 {
 373   cpp_buffer *buffer = pfile->buffer;
 374   unsigned int orig_line = pfile->line;
 375
 376   while (*buffer->cur != '\n')
 377     buffer->cur++;
 378
 379   _cpp_process_line_notes (pfile, true);
 380   return orig_line != pfile->line;
 381 }
 382
 383 /* Skips whitespace, saving the next non-whitespace character.  */
 384 static void
 385 skip_whitespace (cpp_reader *pfile, cppchar_t c)
 386 {
 387   cpp_buffer *buffer = pfile->buffer;
 388   bool saw_NUL = false;
 389
 390   do
 391     {
 392       /* Horizontal space always OK.  */
 393       if (c == ' ' || c == '\t')
 394         ;
 395       /* Just \f \v or \0 left.  */
 396       else if (c == '\0')
 397         saw_NUL = true;
 398       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 399         cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line,
 400                              CPP_BUF_COL (buffer),
 401                              "%s in preprocessing directive",
 402                              c == '\f' ? "form feed" : "vertical tab");
 403
 404       c = *buffer->cur++;
 405     }
 406   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 407   while (is_nvspace (c));
 408
 409   if (saw_NUL)
 410     cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
 411
 412   buffer->cur--;
 413 }
 414
 415 /* See if the characters of a number token are valid in a name (no
 416    '.', '+' or '-').  */
 417 static int
 418 name_p (cpp_reader *pfile, const cpp_string *string)
 419 {
 420   unsigned int i;
 421
 422   for (i = 0; i < string->len; i++)
 423     if (!is_idchar (string->text[i]))
 424       return 0;
 425
 426   return 1;
 427 }
 428
 429 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
 430    an identifier.  FIRST is TRUE if this starts an identifier.  */
 431 static bool
 432 forms_identifier_p (cpp_reader *pfile, int first)
 433 {
 434   cpp_buffer *buffer = pfile->buffer;
 435
 436   if (*buffer->cur == '$')
 437     {
 438       if (!CPP_OPTION (pfile, dollars_in_ident))
 439         return false;
 440
 441       buffer->cur++;
 442       if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
 443         {
 444           CPP_OPTION (pfile, warn_dollars) = 0;
 445           cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
 446         }
 447
 448       return true;
 449     }
 450
 451   /* Is this a syntactically valid UCN?  */
 452   if (0 && *buffer->cur == '\\'
 453       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
 454     {
 455       buffer->cur += 2;
 456       if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first))
 457         return true;
 458       buffer->cur -= 2;
 459     }
 460
 461   return false;
 462 }
 463
 464 /* Lex an identifier starting at BUFFER->CUR - 1.  */
 465 static cpp_hashnode *
 466 lex_identifier (cpp_reader *pfile, const uchar *base)
 467 {
 468   cpp_hashnode *result;
 469   const uchar *cur;
 470
 471   do
 472     {
 473       cur = pfile->buffer->cur;
 474
 475       /* N.B. ISIDNUM does not include $.  */
 476       while (ISIDNUM (*cur))
 477         cur++;
 478
 479       pfile->buffer->cur = cur;
 480     }
 481   while (forms_identifier_p (pfile, false));
 482
 483   result = (cpp_hashnode *)
 484     ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
 485
 486   /* Rarely, identifiers require diagnostics when lexed.  */
 487   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 488                         && !pfile->state.skipping, 0))
 489     {
 490       /* It is allowed to poison the same identifier twice.  */
 491       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 492         cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
 493                    NODE_NAME (result));
 494
 495       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 496          replacement list of a variadic macro.  */
 497       if (result == pfile->spec_nodes.n__VA_ARGS__
 498           && !pfile->state.va_args_ok)
 499         cpp_error (pfile, CPP_DL_PEDWARN,
 500                    "__VA_ARGS__ can only appear in the expansion"
 501                    " of a C99 variadic macro");
 502     }
 503
 504   return result;
 505 }
 506
 507 /* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
 508 static void
 509 lex_number (cpp_reader *pfile, cpp_string *number)
 510 {
 511   const uchar *cur;
 512   const uchar *base;
 513   uchar *dest;
 514
 515   base = pfile->buffer->cur - 1;
 516   do
 517     {
 518       cur = pfile->buffer->cur;
 519
 520       /* N.B. ISIDNUM does not include $.  */
 521       while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
 522         cur++;
 523
 524       pfile->buffer->cur = cur;
 525     }
 526   while (forms_identifier_p (pfile, false));
 527
 528   number->len = cur - base;
 529   dest = _cpp_unaligned_alloc (pfile, number->len + 1);
 530   memcpy (dest, base, number->len);
 531   dest[number->len] = '\0';
 532   number->text = dest;
 533 }
 534
 535 /* Create a token of type TYPE with a literal spelling.  */
 536 static void
 537 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
 538                 unsigned int len, enum cpp_ttype type)
 539 {
 540   uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
 541
 542   memcpy (dest, base, len);
 543   dest[len] = '\0';
 544   token->type = type;
 545   token->val.str.len = len;
 546   token->val.str.text = dest;
 547 }
 548
 549 /* Lexes a string, character constant, or angle-bracketed header file
 550    name.  The stored string contains the spelling, including opening
 551    quote and leading any leading 'L'.  It returns the type of the
 552    literal, or CPP_OTHER if it was not properly terminated.
 553
 554    The spelling is NUL-terminated, but it is not guaranteed that this
 555    is the first NUL since embedded NULs are preserved.  */
 556 static void
 557 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
 558 {
 559   bool saw_NUL = false;
 560   const uchar *cur;
 561   cppchar_t terminator;
 562   enum cpp_ttype type;
 563
 564   cur = base;
 565   terminator = *cur++;
 566   if (terminator == 'L')
 567     terminator = *cur++;
 568   if (terminator == '\"')
 569     type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
 570   else if (terminator == '\'')
 571     type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
 572   else
 573     terminator = '>', type = CPP_HEADER_NAME;
 574
 575   for (;;)
 576     {
 577       cppchar_t c = *cur++;
 578
 579       /* In #include-style directives, terminators are not escapable.  */
 580       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
 581         cur++;
 582       else if (c == terminator)
 583         break;
 584       else if (c == '\n')
 585         {
 586           cur--;
 587           type = CPP_OTHER;
 588           break;
 589         }
 590       else if (c == '\0')
 591         saw_NUL = true;
 592     }
 593
 594   if (saw_NUL && !pfile->state.skipping)
 595     cpp_error (pfile, CPP_DL_WARNING,
 596                "null character(s) preserved in literal");
 597
 598   pfile->buffer->cur = cur;
 599   create_literal (pfile, token, base, cur - base, type);
 600 }
 601
 602 /* The stored comment includes the comment start and any terminator.  */
 603 static void
 604 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
 605               cppchar_t type)
 606 {
 607   unsigned char *buffer;
 608   unsigned int len, clen;
 609
 610   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 611
 612   /* C++ comments probably (not definitely) have moved past a new
 613      line, which we don't want to save in the comment.  */
 614   if (is_vspace (pfile->buffer->cur[-1]))
 615     len--;
 616
 617   /* If we are currently in a directive, then we need to store all
 618      C++ comments as C comments internally, and so we need to
 619      allocate a little extra space in that case.
 620
 621      Note that the only time we encounter a directive here is
 622      when we are saving comments in a "#define".  */
 623   clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
 624
 625   buffer = _cpp_unaligned_alloc (pfile, clen);
 626
 627   token->type = CPP_COMMENT;
 628   token->val.str.len = clen;
 629   token->val.str.text = buffer;
 630
 631   buffer[0] = '/';
 632   memcpy (buffer + 1, from, len - 1);
 633
 634   /* Finish conversion to a C comment, if necessary.  */
 635   if (pfile->state.in_directive && type == '/')
 636     {
 637       buffer[1] = '*';
 638       buffer[clen - 2] = '*';
 639       buffer[clen - 1] = '/';
 640     }
 641 }
 642
 643 /* Allocate COUNT tokens for RUN.  */
 644 void
 645 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
 646 {
 647   run->base = xnewvec (cpp_token, count);
 648   run->limit = run->base + count;
 649   run->next = NULL;
 650 }
 651
 652 /* Returns the next tokenrun, or creates one if there is none.  */
 653 static tokenrun *
 654 next_tokenrun (tokenrun *run)
 655 {
 656   if (run->next == NULL)
 657     {
 658       run->next = xnew (tokenrun);
 659       run->next->prev = run;
 660       _cpp_init_tokenrun (run->next, 250);
 661     }
 662
 663   return run->next;
 664 }
 665
 666 /* Allocate a single token that is invalidated at the same time as the
 667    rest of the tokens on the line.  Has its line and col set to the
 668    same as the last lexed token, so that diagnostics appear in the
 669    right place.  */
 670 cpp_token *
 671 _cpp_temp_token (cpp_reader *pfile)
 672 {
 673   cpp_token *old, *result;
 674
 675   old = pfile->cur_token - 1;
 676   if (pfile->cur_token == pfile->cur_run->limit)
 677     {
 678       pfile->cur_run = next_tokenrun (pfile->cur_run);
 679       pfile->cur_token = pfile->cur_run->base;
 680     }
 681
 682   result = pfile->cur_token++;
 683   result->line = old->line;
 684   result->col = old->col;
 685   return result;
 686 }
 687
 688 /* Lex a token into RESULT (external interface).  Takes care of issues
 689    like directive handling, token lookahead, multiple include
 690    optimization and skipping.  */
 691 const cpp_token *
 692 _cpp_lex_token (cpp_reader *pfile)
 693 {
 694   cpp_token *result;
 695
 696   for (;;)
 697     {
 698       if (pfile->cur_token == pfile->cur_run->limit)
 699         {
 700           pfile->cur_run = next_tokenrun (pfile->cur_run);
 701           pfile->cur_token = pfile->cur_run->base;
 702         }
 703
 704       if (pfile->lookaheads)
 705         {
 706           pfile->lookaheads--;
 707           result = pfile->cur_token++;
 708         }
 709       else
 710         result = _cpp_lex_direct (pfile);
 711
 712       if (result->flags & BOL)
 713         {
 714           /* Is this a directive.  If _cpp_handle_directive returns
 715              false, it is an assembler #.  */
 716           if (result->type == CPP_HASH
 717               /* 6.10.3 p 11: Directives in a list of macro arguments
 718                  gives undefined behavior.  This implementation
 719                  handles the directive as normal.  */
 720               && pfile->state.parsing_args != 1
 721               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
 722             continue;
 723           if (pfile->cb.line_change && !pfile->state.skipping)
 724             pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
 725         }
 726
 727       /* We don't skip tokens in directives.  */
 728       if (pfile->state.in_directive)
 729         break;
 730
 731       /* Outside a directive, invalidate controlling macros.  At file
 732          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
 733          get here and MI optimization works.  */
 734       pfile->mi_valid = false;
 735
 736       if (!pfile->state.skipping || result->type == CPP_EOF)
 737         break;
 738     }
 739
 740   return result;
 741 }
 742
 743 /* Returns true if a fresh line has been loaded.  */
 744 bool
 745 _cpp_get_fresh_line (cpp_reader *pfile)
 746 {
 747   /* We can't get a new line until we leave the current directive.  */
 748   if (pfile->state.in_directive)
 749     return false;
 750
 751   for (;;)
 752     {
 753       cpp_buffer *buffer = pfile->buffer;
 754
 755       if (!buffer->need_line)
 756         return true;
 757
 758       if (buffer->next_line < buffer->rlimit)
 759         {
 760           _cpp_clean_line (pfile);
 761           return true;
 762         }
 763
 764       /* First, get out of parsing arguments state.  */
 765       if (pfile->state.parsing_args)
 766         return false;
 767
 768       /* End of buffer.  Non-empty files should end in a newline.  */
 769       if (buffer->buf != buffer->rlimit
 770           && buffer->next_line > buffer->rlimit
 771           && !buffer->from_stage3)
 772         {
 773           /* Only warn once.  */
 774           buffer->next_line = buffer->rlimit;
 775           cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line - 1,
 776                                CPP_BUF_COLUMN (buffer, buffer->cur),
 777                                "no newline at end of file");
 778         }
 779
 780       _cpp_pop_buffer (pfile);
 781       if (pfile->buffer == NULL)
 782         return false;
 783     }
 784 }
 785
 786 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)          \
 787   do                                                    \
 788     {                                                   \
 789       result->type = ELSE_TYPE;                         \
 790       if (*buffer->cur == CHAR)                         \
 791         buffer->cur++, result->type = THEN_TYPE;        \
 792     }                                                   \
 793   while (0)
 794
 795 /* Lex a token into pfile->cur_token, which is also incremented, to
 796    get diagnostics pointing to the correct location.
 797
 798    Does not handle issues such as token lookahead, multiple-include
 799    optimization, directives, skipping etc.  This function is only
 800    suitable for use by _cpp_lex_token, and in special cases like
 801    lex_expansion_token which doesn't care for any of these issues.
 802
 803    When meeting a newline, returns CPP_EOF if parsing a directive,
 804    otherwise returns to the start of the token buffer if permissible.
 805    Returns the location of the lexed token.  */
 806 cpp_token *
 807 _cpp_lex_direct (cpp_reader *pfile)
 808 {
 809   cppchar_t c;
 810   cpp_buffer *buffer;
 811   const unsigned char *comment_start;
 812   cpp_token *result = pfile->cur_token++;
 813
 814  fresh_line:
 815   result->flags = 0;
 816   buffer = pfile->buffer;
 817   if (buffer->need_line)
 818     {
 819       if (!_cpp_get_fresh_line (pfile))
 820         {
 821           result->type = CPP_EOF;
 822           if (!pfile->state.in_directive)
 823             {
 824               /* Tell the compiler the line number of the EOF token.  */
 825               result->line = pfile->line;
 826               result->flags = BOL;
 827             }
 828           return result;
 829         }
 830       if (!pfile->keep_tokens)
 831         {
 832           pfile->cur_run = &pfile->base_run;
 833           result = pfile->base_run.base;
 834           pfile->cur_token = result + 1;
 835         }
 836       result->flags = BOL;
 837       if (pfile->state.parsing_args == 2)
 838         result->flags |= PREV_WHITE;
 839     }
 840   buffer = pfile->buffer;
 841  update_tokens_line:
 842   result->line = pfile->line;
 843
 844  skipped_white:
 845   if (buffer->cur >= buffer->notes[buffer->cur_note].pos
 846       && !pfile->overlaid_buffer)
 847     {
 848       _cpp_process_line_notes (pfile, false);
 849       result->line = pfile->line;
 850     }
 851   c = *buffer->cur++;
 852   result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
 853
 854   switch (c)
 855     {
 856     case ' ': case '\t': case '\f': case '\v': case '\0':
 857       result->flags |= PREV_WHITE;
 858       skip_whitespace (pfile, c);
 859       goto skipped_white;
 860
 861     case '\n':
 862       pfile->line++;
 863       buffer->need_line = true;
 864       goto fresh_line;
 865
 866     case '0': case '1': case '2': case '3': case '4':
 867     case '5': case '6': case '7': case '8': case '9':
 868       result->type = CPP_NUMBER;
 869       lex_number (pfile, &result->val.str);
 870       break;
 871
 872     case 'L':
 873       /* 'L' may introduce wide characters or strings.  */
 874       if (*buffer->cur == '\'' || *buffer->cur == '"')
 875         {
 876           lex_string (pfile, result, buffer->cur - 1);
 877           break;
 878         }
 879       /* Fall through.  */
 880
 881     case '_':
 882     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 883     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 884     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 885     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 886     case 'y': case 'z':
 887     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 888     case 'G': case 'H': case 'I': case 'J': case 'K':
 889     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 890     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 891     case 'Y': case 'Z':
 892       result->type = CPP_NAME;
 893       result->val.node = lex_identifier (pfile, buffer->cur - 1);
 894
 895       /* Convert named operators to their proper types.  */
 896       if (result->val.node->flags & NODE_OPERATOR)
 897         {
 898           result->flags |= NAMED_OP;
 899           result->type = result->val.node->directive_index;
 900         }
 901       break;
 902
 903     case '\'':
 904     case '"':
 905       lex_string (pfile, result, buffer->cur - 1);
 906       break;
 907
 908     case '/':
 909       /* A potential block or line comment.  */
 910       comment_start = buffer->cur;
 911       c = *buffer->cur;
 912
 913       if (c == '*')
 914         {
 915           if (_cpp_skip_block_comment (pfile))
 916             cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
 917         }
 918       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
 919                             || CPP_IN_SYSTEM_HEADER (pfile)))
 920         {
 921           /* Warn about comments only if pedantically GNUC89, and not
 922              in system headers.  */
 923           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
 924               && ! buffer->warned_cplusplus_comments)
 925             {
 926               cpp_error (pfile, CPP_DL_PEDWARN,
 927                          "C++ style comments are not allowed in ISO C90");
 928               cpp_error (pfile, CPP_DL_PEDWARN,
 929                          "(this will be reported only once per input file)");
 930               buffer->warned_cplusplus_comments = 1;
 931             }
 932
 933           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
 934             cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
 935         }
 936       else if (c == '=')
 937         {
 938           buffer->cur++;
 939           result->type = CPP_DIV_EQ;
 940           break;
 941         }
 942       else
 943         {
 944           result->type = CPP_DIV;
 945           break;
 946         }
 947
 948       if (!pfile->state.save_comments)
 949         {
 950           result->flags |= PREV_WHITE;
 951           goto update_tokens_line;
 952         }
 953
 954       /* Save the comment as a token in its own right.  */
 955       save_comment (pfile, result, comment_start, c);
 956       break;
 957
 958     case '<':
 959       if (pfile->state.angled_headers)
 960         {
 961           lex_string (pfile, result, buffer->cur - 1);
 962           break;
 963         }
 964
 965       result->type = CPP_LESS;
 966       if (*buffer->cur == '=')
 967         buffer->cur++, result->type = CPP_LESS_EQ;
 968       else if (*buffer->cur == '<')
 969         {
 970           buffer->cur++;
 971           IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
 972         }
 973       else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
 974         {
 975           buffer->cur++;
 976           IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
 977         }
 978       else if (CPP_OPTION (pfile, digraphs))
 979         {
 980           if (*buffer->cur == ':')
 981             {
 982               buffer->cur++;
 983               result->flags |= DIGRAPH;
 984               result->type = CPP_OPEN_SQUARE;
 985             }
 986           else if (*buffer->cur == '%')
 987             {
 988               buffer->cur++;
 989               result->flags |= DIGRAPH;
 990               result->type = CPP_OPEN_BRACE;
 991             }
 992         }
 993       break;
 994
 995     case '>':
 996       result->type = CPP_GREATER;
 997       if (*buffer->cur == '=')
 998         buffer->cur++, result->type = CPP_GREATER_EQ;
 999       else if (*buffer->cur == '>')
1000         {
1001           buffer->cur++;
1002           IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1003         }
1004       else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
1005         {
1006           buffer->cur++;
1007           IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1008         }
1009       break;
1010
1011     case '%':
1012       result->type = CPP_MOD;
1013       if (*buffer->cur == '=')
1014         buffer->cur++, result->type = CPP_MOD_EQ;
1015       else if (CPP_OPTION (pfile, digraphs))
1016         {
1017           if (*buffer->cur == ':')
1018             {
1019               buffer->cur++;
1020               result->flags |= DIGRAPH;
1021               result->type = CPP_HASH;
1022               if (*buffer->cur == '%' && buffer->cur[1] == ':')
1023                 buffer->cur += 2, result->type = CPP_PASTE;
1024             }
1025           else if (*buffer->cur == '>')
1026             {
1027               buffer->cur++;
1028               result->flags |= DIGRAPH;
1029               result->type = CPP_CLOSE_BRACE;
1030             }
1031         }
1032       break;
1033
1034     case '.':
1035       result->type = CPP_DOT;
1036       if (ISDIGIT (*buffer->cur))
1037         {
1038           result->type = CPP_NUMBER;
1039           lex_number (pfile, &result->val.str);
1040         }
1041       else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1042         buffer->cur += 2, result->type = CPP_ELLIPSIS;
1043       else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1044         buffer->cur++, result->type = CPP_DOT_STAR;
1045       break;
1046
1047     case '+':
1048       result->type = CPP_PLUS;
1049       if (*buffer->cur == '+')
1050         buffer->cur++, result->type = CPP_PLUS_PLUS;
1051       else if (*buffer->cur == '=')
1052         buffer->cur++, result->type = CPP_PLUS_EQ;
1053       break;
1054
1055     case '-':
1056       result->type = CPP_MINUS;
1057       if (*buffer->cur == '>')
1058         {
1059           buffer->cur++;
1060           result->type = CPP_DEREF;
1061           if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1062             buffer->cur++, result->type = CPP_DEREF_STAR;
1063         }
1064       else if (*buffer->cur == '-')
1065         buffer->cur++, result->type = CPP_MINUS_MINUS;
1066       else if (*buffer->cur == '=')
1067         buffer->cur++, result->type = CPP_MINUS_EQ;
1068       break;
1069
1070     case '&':
1071       result->type = CPP_AND;
1072       if (*buffer->cur == '&')
1073         buffer->cur++, result->type = CPP_AND_AND;
1074       else if (*buffer->cur == '=')
1075         buffer->cur++, result->type = CPP_AND_EQ;
1076       break;
1077
1078     case '|':
1079       result->type = CPP_OR;
1080       if (*buffer->cur == '|')
1081         buffer->cur++, result->type = CPP_OR_OR;
1082       else if (*buffer->cur == '=')
1083         buffer->cur++, result->type = CPP_OR_EQ;
1084       break;
1085
1086     case ':':
1087       result->type = CPP_COLON;
1088       if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1089         buffer->cur++, result->type = CPP_SCOPE;
1090       else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1091         {
1092           buffer->cur++;
1093           result->flags |= DIGRAPH;
1094           result->type = CPP_CLOSE_SQUARE;
1095         }
1096       break;
1097
1098     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1099     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1100     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1101     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1102     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1103
1104     case '?': result->type = CPP_QUERY; break;
1105     case '~': result->type = CPP_COMPL; break;
1106     case ',': result->type = CPP_COMMA; break;
1107     case '(': result->type = CPP_OPEN_PAREN; break;
1108     case ')': result->type = CPP_CLOSE_PAREN; break;
1109     case '[': result->type = CPP_OPEN_SQUARE; break;
1110     case ']': result->type = CPP_CLOSE_SQUARE; break;
1111     case '{': result->type = CPP_OPEN_BRACE; break;
1112     case '}': result->type = CPP_CLOSE_BRACE; break;
1113     case ';': result->type = CPP_SEMICOLON; break;
1114
1115       /* @ is a punctuator in Objective-C.  */
1116     case '@': result->type = CPP_ATSIGN; break;
1117
1118     case '$':
1119     case '\\':
1120       {
1121         const uchar *base = --buffer->cur;
1122
1123         if (forms_identifier_p (pfile, true))
1124           {
1125             result->type = CPP_NAME;
1126             result->val.node = lex_identifier (pfile, base);
1127             break;
1128           }
1129         buffer->cur++;
1130       }
1131
1132     default:
1133       create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1134       break;
1135     }
1136
1137   return result;
1138 }
1139
1140 /* An upper bound on the number of bytes needed to spell TOKEN.
1141    Does not include preceding whitespace.  */
1142 unsigned int
1143 cpp_token_len (const cpp_token *token)
1144 {
1145   unsigned int len;
1146
1147   switch (TOKEN_SPELL (token))
1148     {
1149     default:            len = 4;                                break;
1150     case SPELL_LITERAL: len = token->val.str.len;               break;
1151     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1152     }
1153
1154   return len;
1155 }
1156
1157 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1158    already contain the enough space to hold the token's spelling.
1159    Returns a pointer to the character after the last character written.
1160    FIXME: Would be nice if we didn't need the PFILE argument.  */
1161 unsigned char *
1162 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1163                  unsigned char *buffer)
1164 {
1165   switch (TOKEN_SPELL (token))
1166     {
1167     case SPELL_OPERATOR:
1168       {
1169         const unsigned char *spelling;
1170         unsigned char c;
1171
1172         if (token->flags & DIGRAPH)
1173           spelling
1174             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1175         else if (token->flags & NAMED_OP)
1176           goto spell_ident;
1177         else
1178           spelling = TOKEN_NAME (token);
1179
1180         while ((c = *spelling++) != '\0')
1181           *buffer++ = c;
1182       }
1183       break;
1184
1185     spell_ident:
1186     case SPELL_IDENT:
1187       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1188       buffer += NODE_LEN (token->val.node);
1189       break;
1190
1191     case SPELL_LITERAL:
1192       memcpy (buffer, token->val.str.text, token->val.str.len);
1193       buffer += token->val.str.len;
1194       break;
1195
1196     case SPELL_NONE:
1197       cpp_error (pfile, CPP_DL_ICE,
1198                  "unspellable token %s", TOKEN_NAME (token));
1199       break;
1200     }
1201
1202   return buffer;
1203 }
1204
1205 /* Returns TOKEN spelt as a null-terminated string.  The string is
1206    freed when the reader is destroyed.  Useful for diagnostics.  */
1207 unsigned char *
1208 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1209 {
1210   unsigned int len = cpp_token_len (token) + 1;
1211   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1212
1213   end = cpp_spell_token (pfile, token, start);
1214   end[0] = '\0';
1215
1216   return start;
1217 }
1218
1219 /* Used by C front ends, which really should move to using
1220    cpp_token_as_text.  */
1221 const char *
1222 cpp_type2name (enum cpp_ttype type)
1223 {
1224   return (const char *) token_spellings[type].name;
1225 }
1226
1227 /* Writes the spelling of token to FP, without any preceding space.
1228    Separated from cpp_spell_token for efficiency - to avoid stdio
1229    double-buffering.  */
1230 void
1231 cpp_output_token (const cpp_token *token, FILE *fp)
1232 {
1233   switch (TOKEN_SPELL (token))
1234     {
1235     case SPELL_OPERATOR:
1236       {
1237         const unsigned char *spelling;
1238         int c;
1239
1240         if (token->flags & DIGRAPH)
1241           spelling
1242             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1243         else if (token->flags & NAMED_OP)
1244           goto spell_ident;
1245         else
1246           spelling = TOKEN_NAME (token);
1247
1248         c = *spelling;
1249         do
1250           putc (c, fp);
1251         while ((c = *++spelling) != '\0');
1252       }
1253       break;
1254
1255     spell_ident:
1256     case SPELL_IDENT:
1257       fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1258     break;
1259
1260     case SPELL_LITERAL:
1261       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1262       break;
1263
1264     case SPELL_NONE:
1265       /* An error, most probably.  */
1266       break;
1267     }
1268 }
1269
1270 /* Compare two tokens.  */
1271 int
1272 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1273 {
1274   if (a->type == b->type && a->flags == b->flags)
1275     switch (TOKEN_SPELL (a))
1276       {
1277       default:                  /* Keep compiler happy.  */
1278       case SPELL_OPERATOR:
1279         return 1;
1280       case SPELL_NONE:
1281         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1282       case SPELL_IDENT:
1283         return a->val.node == b->val.node;
1284       case SPELL_LITERAL:
1285         return (a->val.str.len == b->val.str.len
1286                 && !memcmp (a->val.str.text, b->val.str.text,
1287                             a->val.str.len));
1288       }
1289
1290   return 0;
1291 }
1292
1293 /* Returns nonzero if a space should be inserted to avoid an
1294    accidental token paste for output.  For simplicity, it is
1295    conservative, and occasionally advises a space where one is not
1296    needed, e.g. "." and ".2".  */
1297 int
1298 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1299                  const cpp_token *token2)
1300 {
1301   enum cpp_ttype a = token1->type, b = token2->type;
1302   cppchar_t c;
1303
1304   if (token1->flags & NAMED_OP)
1305     a = CPP_NAME;
1306   if (token2->flags & NAMED_OP)
1307     b = CPP_NAME;
1308
1309   c = EOF;
1310   if (token2->flags & DIGRAPH)
1311     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1312   else if (token_spellings[b].category == SPELL_OPERATOR)
1313     c = token_spellings[b].name[0];
1314
1315   /* Quickly get everything that can paste with an '='.  */
1316   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1317     return 1;
1318
1319   switch (a)
1320     {
1321     case CPP_GREATER:   return c == '>' || c == '?';
1322     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1323     case CPP_PLUS:      return c == '+';
1324     case CPP_MINUS:     return c == '-' || c == '>';
1325     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1326     case CPP_MOD:       return c == ':' || c == '>';
1327     case CPP_AND:       return c == '&';
1328     case CPP_OR:        return c == '|';
1329     case CPP_COLON:     return c == ':' || c == '>';
1330     case CPP_DEREF:     return c == '*';
1331     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1332     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1333     case CPP_NAME:      return ((b == CPP_NUMBER
1334                                  && name_p (pfile, &token2->val.str))
1335                                 || b == CPP_NAME
1336                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1337     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1338                                 || c == '.' || c == '+' || c == '-');
1339                                       /* UCNs */
1340     case CPP_OTHER:     return ((token1->val.str.text[0] == '\\'
1341                                  && b == CPP_NAME)
1342                                 || (CPP_OPTION (pfile, objc)
1343                                     && token1->val.str.text[0] == '@'
1344                                     && (b == CPP_NAME || b == CPP_STRING)));
1345     default:            break;
1346     }
1347
1348   return 0;
1349 }
1350
1351 /* Output all the remaining tokens on the current line, and a newline
1352    character, to FP.  Leading whitespace is removed.  If there are
1353    macros, special token padding is not performed.  */
1354 void
1355 cpp_output_line (cpp_reader *pfile, FILE *fp)
1356 {
1357   const cpp_token *token;
1358
1359   token = cpp_get_token (pfile);
1360   while (token->type != CPP_EOF)
1361     {
1362       cpp_output_token (token, fp);
1363       token = cpp_get_token (pfile);
1364       if (token->flags & PREV_WHITE)
1365         putc (' ', fp);
1366     }
1367
1368   putc ('\n', fp);
1369 }
1370
1371 /* Memory buffers.  Changing these three constants can have a dramatic
1372    effect on performance.  The values here are reasonable defaults,
1373    but might be tuned.  If you adjust them, be sure to test across a
1374    range of uses of cpplib, including heavy nested function-like macro
1375    expansion.  Also check the change in peak memory usage (NJAMD is a
1376    good tool for this).  */
1377 #define MIN_BUFF_SIZE 8000
1378 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1379 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1380         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1381
1382 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1383   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1384 #endif
1385
1386 /* Create a new allocation buffer.  Place the control block at the end
1387    of the buffer, so that buffer overflows will cause immediate chaos.  */
1388 static _cpp_buff *
1389 new_buff (size_t len)
1390 {
1391   _cpp_buff *result;
1392   unsigned char *base;
1393
1394   if (len < MIN_BUFF_SIZE)
1395     len = MIN_BUFF_SIZE;
1396   len = CPP_ALIGN (len);
1397
1398   base = xmalloc (len + sizeof (_cpp_buff));
1399   result = (_cpp_buff *) (base + len);
1400   result->base = base;
1401   result->cur = base;
1402   result->limit = base + len;
1403   result->next = NULL;
1404   return result;
1405 }
1406
1407 /* Place a chain of unwanted allocation buffers on the free list.  */
1408 void
1409 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1410 {
1411   _cpp_buff *end = buff;
1412
1413   while (end->next)
1414     end = end->next;
1415   end->next = pfile->free_buffs;
1416   pfile->free_buffs = buff;
1417 }
1418
1419 /* Return a free buffer of size at least MIN_SIZE.  */
1420 _cpp_buff *
1421 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1422 {
1423   _cpp_buff *result, **p;
1424
1425   for (p = &pfile->free_buffs;; p = &(*p)->next)
1426     {
1427       size_t size;
1428
1429       if (*p == NULL)
1430         return new_buff (min_size);
1431       result = *p;
1432       size = result->limit - result->base;
1433       /* Return a buffer that's big enough, but don't waste one that's
1434          way too big.  */
1435       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1436         break;
1437     }
1438
1439   *p = result->next;
1440   result->next = NULL;
1441   result->cur = result->base;
1442   return result;
1443 }
1444
1445 /* Creates a new buffer with enough space to hold the uncommitted
1446    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
1447    the excess bytes to the new buffer.  Chains the new buffer after
1448    BUFF, and returns the new buffer.  */
1449 _cpp_buff *
1450 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1451 {
1452   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1453   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1454
1455   buff->next = new_buff;
1456   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1457   return new_buff;
1458 }
1459
1460 /* Creates a new buffer with enough space to hold the uncommitted
1461    remaining bytes of the buffer pointed to by BUFF, and at least
1462    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
1463    Chains the new buffer before the buffer pointed to by BUFF, and
1464    updates the pointer to point to the new buffer.  */
1465 void
1466 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1467 {
1468   _cpp_buff *new_buff, *old_buff = *pbuff;
1469   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1470
1471   new_buff = _cpp_get_buff (pfile, size);
1472   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1473   new_buff->next = old_buff;
1474   *pbuff = new_buff;
1475 }
1476
1477 /* Free a chain of buffers starting at BUFF.  */
1478 void
1479 _cpp_free_buff (_cpp_buff *buff)
1480 {
1481   _cpp_buff *next;
1482
1483   for (; buff; buff = next)
1484     {
1485       next = buff->next;
1486       free (buff->base);
1487     }
1488 }
1489
1490 /* Allocate permanent, unaligned storage of length LEN.  */
1491 unsigned char *
1492 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1493 {
1494   _cpp_buff *buff = pfile->u_buff;
1495   unsigned char *result = buff->cur;
1496
1497   if (len > (size_t) (buff->limit - result))
1498     {
1499       buff = _cpp_get_buff (pfile, len);
1500       buff->next = pfile->u_buff;
1501       pfile->u_buff = buff;
1502       result = buff->cur;
1503     }
1504
1505   buff->cur = result + len;
1506   return result;
1507 }
1508
1509 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1510    That buffer is used for growing allocations when saving macro
1511    replacement lists in a #define, and when parsing an answer to an
1512    assertion in #assert, #unassert or #if (and therefore possibly
1513    whilst expanding macros).  It therefore must not be used by any
1514    code that they might call: specifically the lexer and the guts of
1515    the macro expander.
1516
1517    All existing other uses clearly fit this restriction: storing
1518    registered pragmas during initialization.  */
1519 unsigned char *
1520 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1521 {
1522   _cpp_buff *buff = pfile->a_buff;
1523   unsigned char *result = buff->cur;
1524
1525   if (len > (size_t) (buff->limit - result))
1526     {
1527       buff = _cpp_get_buff (pfile, len);
1528       buff->next = pfile->a_buff;
1529       pfile->a_buff = buff;
1530       result = buff->cur;
1531     }
1532
1533   buff->cur = result + len;
1534   return result;
1535 }