contrib/gcc-3.4/gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7
   8 This program is free software; you can redistribute it and/or modify it
   9 under the terms of the GNU General Public License as published by the
  10 Free Software Foundation; either version 2, or (at your option) any
  11 later version.
  12
  13 This program is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with this program; if not, write to the Free Software
  20 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "cpplib.h"
  25 #include "cpphash.h"
  26
  27 enum spell_type
  28 {
  29   SPELL_OPERATOR = 0,
  30   SPELL_IDENT,
  31   SPELL_LITERAL,
  32   SPELL_NONE
  33 };
  34
  35 struct token_spelling
  36 {
  37   enum spell_type category;
  38   const unsigned char *name;
  39 };
  40
  41 static const unsigned char *const digraph_spellings[] =
  42 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
  43
  44 #define OP(e, s) { SPELL_OPERATOR, U s           },
  45 #define TK(e, s) { s,              U #e },
  46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  47 #undef OP
  48 #undef TK
  49
  50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  52
  53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
  54 static int skip_line_comment (cpp_reader *);
  55 static void skip_whitespace (cpp_reader *, cppchar_t);
  56 static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *);
  57 static void lex_number (cpp_reader *, cpp_string *);
  58 static bool forms_identifier_p (cpp_reader *, int);
  59 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
  60 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
  61 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
  62                             unsigned int, enum cpp_ttype);
  63 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
  64 static int name_p (cpp_reader *, const cpp_string *);
  65 static tokenrun *next_tokenrun (tokenrun *);
  66
  67 static _cpp_buff *new_buff (size_t);
  68
  69
  70 /* Utility routine:
  71
  72    Compares, the token TOKEN to the NUL-terminated string STRING.
  73    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  74 int
  75 cpp_ideq (const cpp_token *token, const char *string)
  76 {
  77   if (token->type != CPP_NAME)
  78     return 0;
  79
  80   return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
  81 }
  82
  83 /* Record a note TYPE at byte POS into the current cleaned logical
  84    line.  */
  85 static void
  86 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
  87 {
  88   if (buffer->notes_used == buffer->notes_cap)
  89     {
  90       buffer->notes_cap = buffer->notes_cap * 2 + 200;
  91       buffer->notes = xrealloc (buffer->notes,
  92                                 buffer->notes_cap * sizeof (_cpp_line_note));
  93     }
  94
  95   buffer->notes[buffer->notes_used].pos = pos;
  96   buffer->notes[buffer->notes_used].type = type;
  97   buffer->notes_used++;
  98 }
  99
 100 /* Returns with a logical line that contains no escaped newlines or
 101    trigraphs.  This is a time-critical inner loop.  */
 102 void
 103 _cpp_clean_line (cpp_reader *pfile)
 104 {
 105   cpp_buffer *buffer;
 106   const uchar *s;
 107   uchar c, *d, *p;
 108
 109   buffer = pfile->buffer;
 110   buffer->cur_note = buffer->notes_used = 0;
 111   buffer->cur = buffer->line_base = buffer->next_line;
 112   buffer->need_line = false;
 113   s = buffer->next_line - 1;
 114
 115   if (!buffer->from_stage3)
 116     {
 117       /* Short circuit for the common case of an un-escaped line with
 118          no trigraphs.  The primary win here is by not writing any
 119          data back to memory until we have to.  */
 120       for (;;)
 121         {
 122           c = *++s;
 123           if (c == '\n' || c == '\r')
 124             {
 125               d = (uchar *) s;
 126
 127               if (s == buffer->rlimit)
 128                 goto done;
 129
 130               /* DOS line ending? */
 131               if (c == '\r' && s[1] == '\n')
 132                 s++;
 133
 134               if (s == buffer->rlimit)
 135                 goto done;
 136
 137               /* check for escaped newline */
 138               p = d;
 139               while (p != buffer->next_line && is_nvspace (p[-1]))
 140                 p--;
 141               if (p == buffer->next_line || p[-1] != '\\')
 142                 goto done;
 143
 144               /* Have an escaped newline; process it and proceed to
 145                  the slow path.  */
 146               add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
 147               d = p - 2;
 148               buffer->next_line = p - 1;
 149               break;
 150             }
 151           if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
 152             {
 153               /* Have a trigraph.  We may or may not have to convert
 154                  it.  Add a line note regardless, for -Wtrigraphs.  */
 155               add_line_note (buffer, s, s[2]);
 156               if (CPP_OPTION (pfile, trigraphs))
 157                 {
 158                   /* We do, and that means we have to switch to the
 159                      slow path.  */
 160                   d = (uchar *) s;
 161                   *d = _cpp_trigraph_map[s[2]];
 162                   s += 2;
 163                   break;
 164                 }
 165             }
 166         }
 167
 168
 169       for (;;)
 170         {
 171           c = *++s;
 172           *++d = c;
 173
 174           if (c == '\n' || c == '\r')
 175             {
 176                   /* Handle DOS line endings.  */
 177               if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
 178                 s++;
 179               if (s == buffer->rlimit)
 180                 break;
 181
 182               /* Escaped?  */
 183               p = d;
 184               while (p != buffer->next_line && is_nvspace (p[-1]))
 185                 p--;
 186               if (p == buffer->next_line || p[-1] != '\\')
 187                 break;
 188
 189               add_line_note (buffer, p - 1, p != d ? ' ': '\\');
 190               d = p - 2;
 191               buffer->next_line = p - 1;
 192             }
 193           else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
 194             {
 195               /* Add a note regardless, for the benefit of -Wtrigraphs.  */
 196               add_line_note (buffer, d, s[2]);
 197               if (CPP_OPTION (pfile, trigraphs))
 198                 {
 199                   *d = _cpp_trigraph_map[s[2]];
 200                   s += 2;
 201                 }
 202             }
 203         }
 204     }
 205   else
 206     {
 207       do
 208         s++;
 209       while (*s != '\n' && *s != '\r');
 210       d = (uchar *) s;
 211
 212       /* Handle DOS line endings.  */
 213       if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
 214         s++;
 215     }
 216
 217  done:
 218   *d = '\n';
 219   /* A sentinel note that should never be processed.  */
 220   add_line_note (buffer, d + 1, '\n');
 221   buffer->next_line = s + 1;
 222 }
 223
 224 /* Return true if the trigraph indicated by NOTE should be warned
 225    about in a comment.  */
 226 static bool
 227 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
 228 {
 229   const uchar *p;
 230
 231   /* Within comments we don't warn about trigraphs, unless the
 232      trigraph forms an escaped newline, as that may change
 233      behavior.  */
 234   if (note->type != '/')
 235     return false;
 236
 237   /* If -trigraphs, then this was an escaped newline iff the next note
 238      is coincident.  */
 239   if (CPP_OPTION (pfile, trigraphs))
 240     return note[1].pos == note->pos;
 241
 242   /* Otherwise, see if this forms an escaped newline.  */
 243   p = note->pos + 3;
 244   while (is_nvspace (*p))
 245     p++;
 246
 247   /* There might have been escaped newlines between the trigraph and the
 248      newline we found.  Hence the position test.  */
 249   return (*p == '\n' && p < note[1].pos);
 250 }
 251
 252 /* Process the notes created by add_line_note as far as the current
 253    location.  */
 254 void
 255 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
 256 {
 257   cpp_buffer *buffer = pfile->buffer;
 258
 259   for (;;)
 260     {
 261       _cpp_line_note *note = &buffer->notes[buffer->cur_note];
 262       unsigned int col;
 263
 264       if (note->pos > buffer->cur)
 265         break;
 266
 267       buffer->cur_note++;
 268       col = CPP_BUF_COLUMN (buffer, note->pos + 1);
 269
 270       if (note->type == '\\' || note->type == ' ')
 271         {
 272           if (note->type == ' ' && !in_comment)
 273             cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line, col,
 274                                  "backslash and newline separated by space");
 275
 276           if (buffer->next_line > buffer->rlimit)
 277             {
 278               cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line, col,
 279                                    "backslash-newline at end of file");
 280               /* Prevent "no newline at end of file" warning.  */
 281               buffer->next_line = buffer->rlimit;
 282             }
 283
 284           buffer->line_base = note->pos;
 285           pfile->line++;
 286         }
 287       else if (_cpp_trigraph_map[note->type])
 288         {
 289           if (CPP_OPTION (pfile, warn_trigraphs)
 290               && (!in_comment || warn_in_comment (pfile, note)))
 291             {
 292               if (CPP_OPTION (pfile, trigraphs))
 293                 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line, col,
 294                                      "trigraph ??%c converted to %c",
 295                                      note->type,
 296                                      (int) _cpp_trigraph_map[note->type]);
 297               else
 298                 {
 299                   cpp_error_with_line
 300                     (pfile, CPP_DL_WARNING, pfile->line, col,
 301                      "trigraph ??%c ignored, use -trigraphs to enable",
 302                      note->type);
 303                 }
 304             }
 305         }
 306       else
 307         abort ();
 308     }
 309 }
 310
 311 /* Skip a C-style block comment.  We find the end of the comment by
 312    seeing if an asterisk is before every '/' we encounter.  Returns
 313    nonzero if comment terminated by EOF, zero otherwise.
 314
 315    Buffer->cur points to the initial asterisk of the comment.  */
 316 bool
 317 _cpp_skip_block_comment (cpp_reader *pfile)
 318 {
 319   cpp_buffer *buffer = pfile->buffer;
 320   const uchar *cur = buffer->cur;
 321   uchar c;
 322
 323   cur++;
 324   if (*cur == '/')
 325     cur++;
 326
 327   for (;;)
 328     {
 329       /* People like decorating comments with '*', so check for '/'
 330          instead for efficiency.  */
 331       c = *cur++;
 332
 333       if (c == '/')
 334         {
 335           if (cur[-2] == '*')
 336             break;
 337
 338           /* Warn about potential nested comments, but not if the '/'
 339              comes immediately before the true comment delimiter.
 340              Don't bother to get it right across escaped newlines.  */
 341           if (CPP_OPTION (pfile, warn_comments)
 342               && cur[0] == '*' && cur[1] != '/')
 343             {
 344               buffer->cur = cur;
 345               cpp_error_with_line (pfile, CPP_DL_WARNING,
 346                                    pfile->line, CPP_BUF_COL (buffer),
 347                                    "\"/*\" within comment");
 348             }
 349         }
 350       else if (c == '\n')
 351         {
 352           buffer->cur = cur - 1;
 353           _cpp_process_line_notes (pfile, true);
 354           if (buffer->next_line >= buffer->rlimit)
 355             return true;
 356           _cpp_clean_line (pfile);
 357           pfile->line++;
 358           cur = buffer->cur;
 359         }
 360     }
 361
 362   buffer->cur = cur;
 363   _cpp_process_line_notes (pfile, true);
 364   return false;
 365 }
 366
 367 /* Skip a C++ line comment, leaving buffer->cur pointing to the
 368    terminating newline.  Handles escaped newlines.  Returns nonzero
 369    if a multiline comment.  */
 370 static int
 371 skip_line_comment (cpp_reader *pfile)
 372 {
 373   cpp_buffer *buffer = pfile->buffer;
 374   unsigned int orig_line = pfile->line;
 375
 376   while (*buffer->cur != '\n')
 377     buffer->cur++;
 378
 379   _cpp_process_line_notes (pfile, true);
 380   return orig_line != pfile->line;
 381 }
 382
 383 /* Skips whitespace, saving the next non-whitespace character.  */
 384 static void
 385 skip_whitespace (cpp_reader *pfile, cppchar_t c)
 386 {
 387   cpp_buffer *buffer = pfile->buffer;
 388   bool saw_NUL = false;
 389
 390   do
 391     {
 392       /* Horizontal space always OK.  */
 393       if (c == ' ' || c == '\t')
 394         ;
 395       /* Just \f \v or \0 left.  */
 396       else if (c == '\0')
 397         saw_NUL = true;
 398       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 399         cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line,
 400                              CPP_BUF_COL (buffer),
 401                              "%s in preprocessing directive",
 402                              c == '\f' ? "form feed" : "vertical tab");
 403
 404       c = *buffer->cur++;
 405     }
 406   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 407   while (is_nvspace (c));
 408
 409   if (saw_NUL)
 410     cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
 411
 412   buffer->cur--;
 413 }
 414
 415 /* See if the characters of a number token are valid in a name (no
 416    '.', '+' or '-').  */
 417 static int
 418 name_p (cpp_reader *pfile, const cpp_string *string)
 419 {
 420   unsigned int i;
 421
 422   for (i = 0; i < string->len; i++)
 423     if (!is_idchar (string->text[i]))
 424       return 0;
 425
 426   return 1;
 427 }
 428
 429 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
 430    an identifier.  FIRST is TRUE if this starts an identifier.  */
 431 static bool
 432 forms_identifier_p (cpp_reader *pfile, int first)
 433 {
 434   cpp_buffer *buffer = pfile->buffer;
 435
 436   if (*buffer->cur == '$')
 437     {
 438       if (!CPP_OPTION (pfile, dollars_in_ident))
 439         return false;
 440
 441       buffer->cur++;
 442       if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
 443         {
 444           CPP_OPTION (pfile, warn_dollars) = 0;
 445           cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
 446         }
 447
 448       return true;
 449     }
 450
 451   /* Is this a syntactically valid UCN?  */
 452   if (0 && *buffer->cur == '\\'
 453       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
 454     {
 455       buffer->cur += 2;
 456       if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first))
 457         return true;
 458       buffer->cur -= 2;
 459     }
 460
 461   return false;
 462 }
 463
 464 /* Lex an identifier starting at BUFFER->CUR - 1.  */
 465 static cpp_hashnode *
 466 lex_identifier (cpp_reader *pfile, const uchar *base)
 467 {
 468   cpp_hashnode *result;
 469   const uchar *cur;
 470
 471   do
 472     {
 473       cur = pfile->buffer->cur;
 474
 475       /* N.B. ISIDNUM does not include $.  */
 476       while (ISIDNUM (*cur))
 477         cur++;
 478
 479       pfile->buffer->cur = cur;
 480     }
 481   while (forms_identifier_p (pfile, false));
 482
 483   result = (cpp_hashnode *)
 484     ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
 485
 486   /* Rarely, identifiers require diagnostics when lexed.  */
 487   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 488                         && !pfile->state.skipping, 0))
 489     {
 490       /* It is allowed to poison the same identifier twice.  */
 491       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 492         cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
 493                    NODE_NAME (result));
 494
 495       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 496          replacement list of a variadic macro.  */
 497       if (result == pfile->spec_nodes.n__VA_ARGS__
 498           && !pfile->state.va_args_ok)
 499         cpp_error (pfile, CPP_DL_PEDWARN,
 500                    "__VA_ARGS__ can only appear in the expansion"
 501                    " of a C99 variadic macro");
 502     }
 503
 504   return result;
 505 }
 506
 507 /* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
 508 static void
 509 lex_number (cpp_reader *pfile, cpp_string *number)
 510 {
 511   const uchar *cur;
 512   const uchar *base;
 513   uchar *dest;
 514
 515   base = pfile->buffer->cur - 1;
 516   do
 517     {
 518       cur = pfile->buffer->cur;
 519
 520       /* N.B. ISIDNUM does not include $.  */
 521       while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
 522         cur++;
 523
 524       pfile->buffer->cur = cur;
 525     }
 526   while (forms_identifier_p (pfile, false));
 527
 528   number->len = cur - base;
 529   dest = _cpp_unaligned_alloc (pfile, number->len + 1);
 530   memcpy (dest, base, number->len);
 531   dest[number->len] = '\0';
 532   number->text = dest;
 533 }
 534
 535 /* Create a token of type TYPE with a literal spelling.  */
 536 static void
 537 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
 538                 unsigned int len, enum cpp_ttype type)
 539 {
 540   uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
 541
 542   memcpy (dest, base, len);
 543   dest[len] = '\0';
 544   token->type = type;
 545   token->val.str.len = len;
 546   token->val.str.text = dest;
 547 }
 548
 549 /* Lexes a string, character constant, or angle-bracketed header file
 550    name.  The stored string contains the spelling, including opening
 551    quote and leading any leading 'L'.  It returns the type of the
 552    literal, or CPP_OTHER if it was not properly terminated.
 553
 554    The spelling is NUL-terminated, but it is not guaranteed that this
 555    is the first NUL since embedded NULs are preserved.  */
 556 static void
 557 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
 558 {
 559   bool saw_NUL = false;
 560   const uchar *cur;
 561   cppchar_t terminator;
 562   enum cpp_ttype type;
 563
 564   cur = base;
 565   terminator = *cur++;
 566   if (terminator == 'L')
 567     terminator = *cur++;
 568   if (terminator == '\"')
 569     type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
 570   else if (terminator == '\'')
 571     type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
 572   else
 573     terminator = '>', type = CPP_HEADER_NAME;
 574
 575   for (;;)
 576     {
 577       cppchar_t c = *cur++;
 578
 579       /* In #include-style directives, terminators are not escapable.  */
 580       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
 581         cur++;
 582       else if (c == terminator)
 583         break;
 584       else if (c == '\n')
 585         {
 586           cur--;
 587           type = CPP_OTHER;
 588           break;
 589         }
 590       else if (c == '\0')
 591         saw_NUL = true;
 592     }
 593
 594   if (saw_NUL && !pfile->state.skipping)
 595     cpp_error (pfile, CPP_DL_WARNING,
 596                "null character(s) preserved in literal");
 597
 598   pfile->buffer->cur = cur;
 599   create_literal (pfile, token, base, cur - base, type);
 600 }
 601
 602 /* The stored comment includes the comment start and any terminator.  */
 603 static void
 604 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
 605               cppchar_t type)
 606 {
 607   unsigned char *buffer;
 608   unsigned int len, clen;
 609
 610   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 611
 612   /* C++ comments probably (not definitely) have moved past a new
 613      line, which we don't want to save in the comment.  */
 614   if (is_vspace (pfile->buffer->cur[-1]))
 615     len--;
 616
 617   /* If we are currently in a directive, then we need to store all
 618      C++ comments as C comments internally, and so we need to
 619      allocate a little extra space in that case.
 620
 621      Note that the only time we encounter a directive here is
 622      when we are saving comments in a "#define".  */
 623   clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
 624
 625   buffer = _cpp_unaligned_alloc (pfile, clen);
 626
 627   token->type = CPP_COMMENT;
 628   token->val.str.len = clen;
 629   token->val.str.text = buffer;
 630
 631   buffer[0] = '/';
 632   memcpy (buffer + 1, from, len - 1);
 633
 634   /* Finish conversion to a C comment, if necessary.  */
 635   if (pfile->state.in_directive && type == '/')
 636     {
 637       buffer[1] = '*';
 638       buffer[clen - 2] = '*';
 639       buffer[clen - 1] = '/';
 640     }
 641 }
 642
 643 /* Allocate COUNT tokens for RUN.  */
 644 void
 645 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
 646 {
 647   run->base = xnewvec (cpp_token, count);
 648   run->limit = run->base + count;
 649   run->next = NULL;
 650 }
 651
 652 /* Returns the next tokenrun, or creates one if there is none.  */
 653 static tokenrun *
 654 next_tokenrun (tokenrun *run)
 655 {
 656   if (run->next == NULL)
 657     {
 658       run->next = xnew (tokenrun);
 659       run->next->prev = run;
 660       _cpp_init_tokenrun (run->next, 250);
 661     }
 662
 663   return run->next;
 664 }
 665
 666 /* Allocate a single token that is invalidated at the same time as the
 667    rest of the tokens on the line.  Has its line and col set to the
 668    same as the last lexed token, so that diagnostics appear in the
 669    right place.  */
 670 cpp_token *
 671 _cpp_temp_token (cpp_reader *pfile)
 672 {
 673   cpp_token *old, *result;
 674
 675   old = pfile->cur_token - 1;
 676   if (pfile->cur_token == pfile->cur_run->limit)
 677     {
 678       pfile->cur_run = next_tokenrun (pfile->cur_run);
 679       pfile->cur_token = pfile->cur_run->base;
 680     }
 681
 682   result = pfile->cur_token++;
 683   result->line = old->line;
 684   result->col = old->col;
 685   return result;
 686 }
 687
 688 /* Lex a token into RESULT (external interface).  Takes care of issues
 689    like directive handling, token lookahead, multiple include
 690    optimization and skipping.  */
 691 const cpp_token *
 692 _cpp_lex_token (cpp_reader *pfile)
 693 {
 694   cpp_token *result;
 695
 696   for (;;)
 697     {
 698       if (pfile->cur_token == pfile->cur_run->limit)
 699         {
 700           pfile->cur_run = next_tokenrun (pfile->cur_run);
 701           pfile->cur_token = pfile->cur_run->base;
 702         }
 703
 704       if (pfile->lookaheads)
 705         {
 706           pfile->lookaheads--;
 707           result = pfile->cur_token++;
 708         }
 709       else
 710         result = _cpp_lex_direct (pfile);
 711
 712       if (result->flags & BOL)
 713         {
 714           /* Is this a directive.  If _cpp_handle_directive returns
 715              false, it is an assembler #.  */
 716           if (result->type == CPP_HASH
 717               /* 6.10.3 p 11: Directives in a list of macro arguments
 718                  gives undefined behavior.  This implementation
 719                  handles the directive as normal.  */
 720               && pfile->state.parsing_args != 1
 721               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
 722             continue;
 723           if (pfile->cb.line_change && !pfile->state.skipping)
 724             pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
 725         }
 726
 727       /* We don't skip tokens in directives.  */
 728       if (pfile->state.in_directive)
 729         break;
 730
 731       /* Outside a directive, invalidate controlling macros.  At file
 732          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
 733          get here and MI optimization works.  */
 734       pfile->mi_valid = false;
 735
 736       if (!pfile->state.skipping || result->type == CPP_EOF)
 737         break;
 738     }
 739
 740   return result;
 741 }
 742
 743 /* Returns true if a fresh line has been loaded.  */
 744 bool
 745 _cpp_get_fresh_line (cpp_reader *pfile)
 746 {
 747   int return_at_eof;
 748
 749   /* We can't get a new line until we leave the current directive.  */
 750   if (pfile->state.in_directive)
 751     return false;
 752
 753   for (;;)
 754     {
 755       cpp_buffer *buffer = pfile->buffer;
 756
 757       if (!buffer->need_line)
 758         return true;
 759
 760       if (buffer->next_line < buffer->rlimit)
 761         {
 762           _cpp_clean_line (pfile);
 763           return true;
 764         }
 765
 766       /* First, get out of parsing arguments state.  */
 767       if (pfile->state.parsing_args)
 768         return false;
 769
 770       /* End of buffer.  Non-empty files should end in a newline.  */
 771       if (buffer->buf != buffer->rlimit
 772           && buffer->next_line > buffer->rlimit
 773           && !buffer->from_stage3)
 774         {
 775           /* Only warn once.  */
 776           buffer->next_line = buffer->rlimit;
 777           cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line - 1,
 778                                CPP_BUF_COLUMN (buffer, buffer->cur),
 779                                "no newline at end of file");
 780         }
 781
 782       return_at_eof = buffer->return_at_eof;
 783       _cpp_pop_buffer (pfile);
 784       if (pfile->buffer == NULL || return_at_eof)
 785         return false;
 786     }
 787 }
 788
 789 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)          \
 790   do                                                    \
 791     {                                                   \
 792       result->type = ELSE_TYPE;                         \
 793       if (*buffer->cur == CHAR)                         \
 794         buffer->cur++, result->type = THEN_TYPE;        \
 795     }                                                   \
 796   while (0)
 797
 798 /* Lex a token into pfile->cur_token, which is also incremented, to
 799    get diagnostics pointing to the correct location.
 800
 801    Does not handle issues such as token lookahead, multiple-include
 802    optimization, directives, skipping etc.  This function is only
 803    suitable for use by _cpp_lex_token, and in special cases like
 804    lex_expansion_token which doesn't care for any of these issues.
 805
 806    When meeting a newline, returns CPP_EOF if parsing a directive,
 807    otherwise returns to the start of the token buffer if permissible.
 808    Returns the location of the lexed token.  */
 809 cpp_token *
 810 _cpp_lex_direct (cpp_reader *pfile)
 811 {
 812   cppchar_t c;
 813   cpp_buffer *buffer;
 814   const unsigned char *comment_start;
 815   cpp_token *result = pfile->cur_token++;
 816
 817  fresh_line:
 818   result->flags = 0;
 819   buffer = pfile->buffer;
 820   if (buffer->need_line)
 821     {
 822       if (!_cpp_get_fresh_line (pfile))
 823         {
 824           result->type = CPP_EOF;
 825           if (!pfile->state.in_directive)
 826             {
 827               /* Tell the compiler the line number of the EOF token.  */
 828               result->line = pfile->line;
 829               result->flags = BOL;
 830             }
 831           return result;
 832         }
 833       if (!pfile->keep_tokens)
 834         {
 835           pfile->cur_run = &pfile->base_run;
 836           result = pfile->base_run.base;
 837           pfile->cur_token = result + 1;
 838         }
 839       result->flags = BOL;
 840       if (pfile->state.parsing_args == 2)
 841         result->flags |= PREV_WHITE;
 842     }
 843   buffer = pfile->buffer;
 844  update_tokens_line:
 845   result->line = pfile->line;
 846
 847  skipped_white:
 848   if (buffer->cur >= buffer->notes[buffer->cur_note].pos
 849       && !pfile->overlaid_buffer)
 850     {
 851       _cpp_process_line_notes (pfile, false);
 852       result->line = pfile->line;
 853     }
 854   c = *buffer->cur++;
 855   result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
 856
 857   switch (c)
 858     {
 859     case ' ': case '\t': case '\f': case '\v': case '\0':
 860       result->flags |= PREV_WHITE;
 861       skip_whitespace (pfile, c);
 862       goto skipped_white;
 863
 864     case '\n':
 865       pfile->line++;
 866       buffer->need_line = true;
 867       goto fresh_line;
 868
 869     case '0': case '1': case '2': case '3': case '4':
 870     case '5': case '6': case '7': case '8': case '9':
 871       result->type = CPP_NUMBER;
 872       lex_number (pfile, &result->val.str);
 873       break;
 874
 875     case 'L':
 876       /* 'L' may introduce wide characters or strings.  */
 877       if (*buffer->cur == '\'' || *buffer->cur == '"')
 878         {
 879           lex_string (pfile, result, buffer->cur - 1);
 880           break;
 881         }
 882       /* Fall through.  */
 883
 884     case '_':
 885     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 886     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 887     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 888     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 889     case 'y': case 'z':
 890     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 891     case 'G': case 'H': case 'I': case 'J': case 'K':
 892     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 893     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 894     case 'Y': case 'Z':
 895       result->type = CPP_NAME;
 896       result->val.node = lex_identifier (pfile, buffer->cur - 1);
 897
 898       /* Convert named operators to their proper types.  */
 899       if (result->val.node->flags & NODE_OPERATOR)
 900         {
 901           result->flags |= NAMED_OP;
 902           result->type = result->val.node->directive_index;
 903         }
 904       break;
 905
 906     case '\'':
 907     case '"':
 908       lex_string (pfile, result, buffer->cur - 1);
 909       break;
 910
 911     case '/':
 912       /* A potential block or line comment.  */
 913       comment_start = buffer->cur;
 914       c = *buffer->cur;
 915
 916       if (c == '*')
 917         {
 918           if (_cpp_skip_block_comment (pfile))
 919             cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
 920         }
 921       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
 922                             || CPP_IN_SYSTEM_HEADER (pfile)))
 923         {
 924           /* Warn about comments only if pedantically GNUC89, and not
 925              in system headers.  */
 926           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
 927               && ! buffer->warned_cplusplus_comments)
 928             {
 929               cpp_error (pfile, CPP_DL_PEDWARN,
 930                          "C++ style comments are not allowed in ISO C90");
 931               cpp_error (pfile, CPP_DL_PEDWARN,
 932                          "(this will be reported only once per input file)");
 933               buffer->warned_cplusplus_comments = 1;
 934             }
 935
 936           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
 937             cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
 938         }
 939       else if (c == '=')
 940         {
 941           buffer->cur++;
 942           result->type = CPP_DIV_EQ;
 943           break;
 944         }
 945       else
 946         {
 947           result->type = CPP_DIV;
 948           break;
 949         }
 950
 951       if (!pfile->state.save_comments)
 952         {
 953           result->flags |= PREV_WHITE;
 954           goto update_tokens_line;
 955         }
 956
 957       /* Save the comment as a token in its own right.  */
 958       save_comment (pfile, result, comment_start, c);
 959       break;
 960
 961     case '<':
 962       if (pfile->state.angled_headers)
 963         {
 964           lex_string (pfile, result, buffer->cur - 1);
 965           break;
 966         }
 967
 968       result->type = CPP_LESS;
 969       if (*buffer->cur == '=')
 970         buffer->cur++, result->type = CPP_LESS_EQ;
 971       else if (*buffer->cur == '<')
 972         {
 973           buffer->cur++;
 974           IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
 975         }
 976       else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
 977         {
 978           buffer->cur++;
 979           IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
 980         }
 981       else if (CPP_OPTION (pfile, digraphs))
 982         {
 983           if (*buffer->cur == ':')
 984             {
 985               buffer->cur++;
 986               result->flags |= DIGRAPH;
 987               result->type = CPP_OPEN_SQUARE;
 988             }
 989           else if (*buffer->cur == '%')
 990             {
 991               buffer->cur++;
 992               result->flags |= DIGRAPH;
 993               result->type = CPP_OPEN_BRACE;
 994             }
 995         }
 996       break;
 997
 998     case '>':
 999       result->type = CPP_GREATER;
1000       if (*buffer->cur == '=')
1001         buffer->cur++, result->type = CPP_GREATER_EQ;
1002       else if (*buffer->cur == '>')
1003         {
1004           buffer->cur++;
1005           IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1006         }
1007       else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
1008         {
1009           buffer->cur++;
1010           IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1011         }
1012       break;
1013
1014     case '%':
1015       result->type = CPP_MOD;
1016       if (*buffer->cur == '=')
1017         buffer->cur++, result->type = CPP_MOD_EQ;
1018       else if (CPP_OPTION (pfile, digraphs))
1019         {
1020           if (*buffer->cur == ':')
1021             {
1022               buffer->cur++;
1023               result->flags |= DIGRAPH;
1024               result->type = CPP_HASH;
1025               if (*buffer->cur == '%' && buffer->cur[1] == ':')
1026                 buffer->cur += 2, result->type = CPP_PASTE;
1027             }
1028           else if (*buffer->cur == '>')
1029             {
1030               buffer->cur++;
1031               result->flags |= DIGRAPH;
1032               result->type = CPP_CLOSE_BRACE;
1033             }
1034         }
1035       break;
1036
1037     case '.':
1038       result->type = CPP_DOT;
1039       if (ISDIGIT (*buffer->cur))
1040         {
1041           result->type = CPP_NUMBER;
1042           lex_number (pfile, &result->val.str);
1043         }
1044       else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1045         buffer->cur += 2, result->type = CPP_ELLIPSIS;
1046       else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1047         buffer->cur++, result->type = CPP_DOT_STAR;
1048       break;
1049
1050     case '+':
1051       result->type = CPP_PLUS;
1052       if (*buffer->cur == '+')
1053         buffer->cur++, result->type = CPP_PLUS_PLUS;
1054       else if (*buffer->cur == '=')
1055         buffer->cur++, result->type = CPP_PLUS_EQ;
1056       break;
1057
1058     case '-':
1059       result->type = CPP_MINUS;
1060       if (*buffer->cur == '>')
1061         {
1062           buffer->cur++;
1063           result->type = CPP_DEREF;
1064           if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1065             buffer->cur++, result->type = CPP_DEREF_STAR;
1066         }
1067       else if (*buffer->cur == '-')
1068         buffer->cur++, result->type = CPP_MINUS_MINUS;
1069       else if (*buffer->cur == '=')
1070         buffer->cur++, result->type = CPP_MINUS_EQ;
1071       break;
1072
1073     case '&':
1074       result->type = CPP_AND;
1075       if (*buffer->cur == '&')
1076         buffer->cur++, result->type = CPP_AND_AND;
1077       else if (*buffer->cur == '=')
1078         buffer->cur++, result->type = CPP_AND_EQ;
1079       break;
1080
1081     case '|':
1082       result->type = CPP_OR;
1083       if (*buffer->cur == '|')
1084         buffer->cur++, result->type = CPP_OR_OR;
1085       else if (*buffer->cur == '=')
1086         buffer->cur++, result->type = CPP_OR_EQ;
1087       break;
1088
1089     case ':':
1090       result->type = CPP_COLON;
1091       if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1092         buffer->cur++, result->type = CPP_SCOPE;
1093       else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1094         {
1095           buffer->cur++;
1096           result->flags |= DIGRAPH;
1097           result->type = CPP_CLOSE_SQUARE;
1098         }
1099       break;
1100
1101     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1102     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1103     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1104     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1105     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1106
1107     case '?': result->type = CPP_QUERY; break;
1108     case '~': result->type = CPP_COMPL; break;
1109     case ',': result->type = CPP_COMMA; break;
1110     case '(': result->type = CPP_OPEN_PAREN; break;
1111     case ')': result->type = CPP_CLOSE_PAREN; break;
1112     case '[': result->type = CPP_OPEN_SQUARE; break;
1113     case ']': result->type = CPP_CLOSE_SQUARE; break;
1114     case '{': result->type = CPP_OPEN_BRACE; break;
1115     case '}': result->type = CPP_CLOSE_BRACE; break;
1116     case ';': result->type = CPP_SEMICOLON; break;
1117
1118       /* @ is a punctuator in Objective-C.  */
1119     case '@': result->type = CPP_ATSIGN; break;
1120
1121     case '$':
1122     case '\\':
1123       {
1124         const uchar *base = --buffer->cur;
1125
1126         if (forms_identifier_p (pfile, true))
1127           {
1128             result->type = CPP_NAME;
1129             result->val.node = lex_identifier (pfile, base);
1130             break;
1131           }
1132         buffer->cur++;
1133       }
1134
1135     default:
1136       create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1137       break;
1138     }
1139
1140   return result;
1141 }
1142
1143 /* An upper bound on the number of bytes needed to spell TOKEN.
1144    Does not include preceding whitespace.  */
1145 unsigned int
1146 cpp_token_len (const cpp_token *token)
1147 {
1148   unsigned int len;
1149
1150   switch (TOKEN_SPELL (token))
1151     {
1152     default:            len = 4;                                break;
1153     case SPELL_LITERAL: len = token->val.str.len;               break;
1154     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1155     }
1156
1157   return len;
1158 }
1159
1160 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1161    already contain the enough space to hold the token's spelling.
1162    Returns a pointer to the character after the last character written.
1163    FIXME: Would be nice if we didn't need the PFILE argument.  */
1164 unsigned char *
1165 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1166                  unsigned char *buffer)
1167 {
1168   switch (TOKEN_SPELL (token))
1169     {
1170     case SPELL_OPERATOR:
1171       {
1172         const unsigned char *spelling;
1173         unsigned char c;
1174
1175         if (token->flags & DIGRAPH)
1176           spelling
1177             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1178         else if (token->flags & NAMED_OP)
1179           goto spell_ident;
1180         else
1181           spelling = TOKEN_NAME (token);
1182
1183         while ((c = *spelling++) != '\0')
1184           *buffer++ = c;
1185       }
1186       break;
1187
1188     spell_ident:
1189     case SPELL_IDENT:
1190       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1191       buffer += NODE_LEN (token->val.node);
1192       break;
1193
1194     case SPELL_LITERAL:
1195       memcpy (buffer, token->val.str.text, token->val.str.len);
1196       buffer += token->val.str.len;
1197       break;
1198
1199     case SPELL_NONE:
1200       cpp_error (pfile, CPP_DL_ICE,
1201                  "unspellable token %s", TOKEN_NAME (token));
1202       break;
1203     }
1204
1205   return buffer;
1206 }
1207
1208 /* Returns TOKEN spelt as a null-terminated string.  The string is
1209    freed when the reader is destroyed.  Useful for diagnostics.  */
1210 unsigned char *
1211 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1212 {
1213   unsigned int len = cpp_token_len (token) + 1;
1214   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1215
1216   end = cpp_spell_token (pfile, token, start);
1217   end[0] = '\0';
1218
1219   return start;
1220 }
1221
1222 /* Used by C front ends, which really should move to using
1223    cpp_token_as_text.  */
1224 const char *
1225 cpp_type2name (enum cpp_ttype type)
1226 {
1227   return (const char *) token_spellings[type].name;
1228 }
1229
1230 /* Writes the spelling of token to FP, without any preceding space.
1231    Separated from cpp_spell_token for efficiency - to avoid stdio
1232    double-buffering.  */
1233 void
1234 cpp_output_token (const cpp_token *token, FILE *fp)
1235 {
1236   switch (TOKEN_SPELL (token))
1237     {
1238     case SPELL_OPERATOR:
1239       {
1240         const unsigned char *spelling;
1241         int c;
1242
1243         if (token->flags & DIGRAPH)
1244           spelling
1245             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1246         else if (token->flags & NAMED_OP)
1247           goto spell_ident;
1248         else
1249           spelling = TOKEN_NAME (token);
1250
1251         c = *spelling;
1252         do
1253           putc (c, fp);
1254         while ((c = *++spelling) != '\0');
1255       }
1256       break;
1257
1258     spell_ident:
1259     case SPELL_IDENT:
1260       fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1261     break;
1262
1263     case SPELL_LITERAL:
1264       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1265       break;
1266
1267     case SPELL_NONE:
1268       /* An error, most probably.  */
1269       break;
1270     }
1271 }
1272
1273 /* Compare two tokens.  */
1274 int
1275 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1276 {
1277   if (a->type == b->type && a->flags == b->flags)
1278     switch (TOKEN_SPELL (a))
1279       {
1280       default:                  /* Keep compiler happy.  */
1281       case SPELL_OPERATOR:
1282         return 1;
1283       case SPELL_NONE:
1284         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1285       case SPELL_IDENT:
1286         return a->val.node == b->val.node;
1287       case SPELL_LITERAL:
1288         return (a->val.str.len == b->val.str.len
1289                 && !memcmp (a->val.str.text, b->val.str.text,
1290                             a->val.str.len));
1291       }
1292
1293   return 0;
1294 }
1295
1296 /* Returns nonzero if a space should be inserted to avoid an
1297    accidental token paste for output.  For simplicity, it is
1298    conservative, and occasionally advises a space where one is not
1299    needed, e.g. "." and ".2".  */
1300 int
1301 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1302                  const cpp_token *token2)
1303 {
1304   enum cpp_ttype a = token1->type, b = token2->type;
1305   cppchar_t c;
1306
1307   if (token1->flags & NAMED_OP)
1308     a = CPP_NAME;
1309   if (token2->flags & NAMED_OP)
1310     b = CPP_NAME;
1311
1312   c = EOF;
1313   if (token2->flags & DIGRAPH)
1314     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1315   else if (token_spellings[b].category == SPELL_OPERATOR)
1316     c = token_spellings[b].name[0];
1317
1318   /* Quickly get everything that can paste with an '='.  */
1319   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1320     return 1;
1321
1322   switch (a)
1323     {
1324     case CPP_GREATER:   return c == '>' || c == '?';
1325     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1326     case CPP_PLUS:      return c == '+';
1327     case CPP_MINUS:     return c == '-' || c == '>';
1328     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1329     case CPP_MOD:       return c == ':' || c == '>';
1330     case CPP_AND:       return c == '&';
1331     case CPP_OR:        return c == '|';
1332     case CPP_COLON:     return c == ':' || c == '>';
1333     case CPP_DEREF:     return c == '*';
1334     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1335     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1336     case CPP_NAME:      return ((b == CPP_NUMBER
1337                                  && name_p (pfile, &token2->val.str))
1338                                 || b == CPP_NAME
1339                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1340     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1341                                 || c == '.' || c == '+' || c == '-');
1342                                       /* UCNs */
1343     case CPP_OTHER:     return ((token1->val.str.text[0] == '\\'
1344                                  && b == CPP_NAME)
1345                                 || (CPP_OPTION (pfile, objc)
1346                                     && token1->val.str.text[0] == '@'
1347                                     && (b == CPP_NAME || b == CPP_STRING)));
1348     default:            break;
1349     }
1350
1351   return 0;
1352 }
1353
1354 /* Output all the remaining tokens on the current line, and a newline
1355    character, to FP.  Leading whitespace is removed.  If there are
1356    macros, special token padding is not performed.  */
1357 void
1358 cpp_output_line (cpp_reader *pfile, FILE *fp)
1359 {
1360   const cpp_token *token;
1361
1362   token = cpp_get_token (pfile);
1363   while (token->type != CPP_EOF)
1364     {
1365       cpp_output_token (token, fp);
1366       token = cpp_get_token (pfile);
1367       if (token->flags & PREV_WHITE)
1368         putc (' ', fp);
1369     }
1370
1371   putc ('\n', fp);
1372 }
1373
1374 /* Memory buffers.  Changing these three constants can have a dramatic
1375    effect on performance.  The values here are reasonable defaults,
1376    but might be tuned.  If you adjust them, be sure to test across a
1377    range of uses of cpplib, including heavy nested function-like macro
1378    expansion.  Also check the change in peak memory usage (NJAMD is a
1379    good tool for this).  */
1380 #define MIN_BUFF_SIZE 8000
1381 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1382 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1383         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1384
1385 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1386   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1387 #endif
1388
1389 /* Create a new allocation buffer.  Place the control block at the end
1390    of the buffer, so that buffer overflows will cause immediate chaos.  */
1391 static _cpp_buff *
1392 new_buff (size_t len)
1393 {
1394   _cpp_buff *result;
1395   unsigned char *base;
1396
1397   if (len < MIN_BUFF_SIZE)
1398     len = MIN_BUFF_SIZE;
1399   len = CPP_ALIGN (len);
1400
1401   base = xmalloc (len + sizeof (_cpp_buff));
1402   result = (_cpp_buff *) (base + len);
1403   result->base = base;
1404   result->cur = base;
1405   result->limit = base + len;
1406   result->next = NULL;
1407   return result;
1408 }
1409
1410 /* Place a chain of unwanted allocation buffers on the free list.  */
1411 void
1412 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1413 {
1414   _cpp_buff *end = buff;
1415
1416   while (end->next)
1417     end = end->next;
1418   end->next = pfile->free_buffs;
1419   pfile->free_buffs = buff;
1420 }
1421
1422 /* Return a free buffer of size at least MIN_SIZE.  */
1423 _cpp_buff *
1424 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1425 {
1426   _cpp_buff *result, **p;
1427
1428   for (p = &pfile->free_buffs;; p = &(*p)->next)
1429     {
1430       size_t size;
1431
1432       if (*p == NULL)
1433         return new_buff (min_size);
1434       result = *p;
1435       size = result->limit - result->base;
1436       /* Return a buffer that's big enough, but don't waste one that's
1437          way too big.  */
1438       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1439         break;
1440     }
1441
1442   *p = result->next;
1443   result->next = NULL;
1444   result->cur = result->base;
1445   return result;
1446 }
1447
1448 /* Creates a new buffer with enough space to hold the uncommitted
1449    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
1450    the excess bytes to the new buffer.  Chains the new buffer after
1451    BUFF, and returns the new buffer.  */
1452 _cpp_buff *
1453 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1454 {
1455   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1456   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1457
1458   buff->next = new_buff;
1459   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1460   return new_buff;
1461 }
1462
1463 /* Creates a new buffer with enough space to hold the uncommitted
1464    remaining bytes of the buffer pointed to by BUFF, and at least
1465    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
1466    Chains the new buffer before the buffer pointed to by BUFF, and
1467    updates the pointer to point to the new buffer.  */
1468 void
1469 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1470 {
1471   _cpp_buff *new_buff, *old_buff = *pbuff;
1472   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1473
1474   new_buff = _cpp_get_buff (pfile, size);
1475   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1476   new_buff->next = old_buff;
1477   *pbuff = new_buff;
1478 }
1479
1480 /* Free a chain of buffers starting at BUFF.  */
1481 void
1482 _cpp_free_buff (_cpp_buff *buff)
1483 {
1484   _cpp_buff *next;
1485
1486   for (; buff; buff = next)
1487     {
1488       next = buff->next;
1489       free (buff->base);
1490     }
1491 }
1492
1493 /* Allocate permanent, unaligned storage of length LEN.  */
1494 unsigned char *
1495 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1496 {
1497   _cpp_buff *buff = pfile->u_buff;
1498   unsigned char *result = buff->cur;
1499
1500   if (len > (size_t) (buff->limit - result))
1501     {
1502       buff = _cpp_get_buff (pfile, len);
1503       buff->next = pfile->u_buff;
1504       pfile->u_buff = buff;
1505       result = buff->cur;
1506     }
1507
1508   buff->cur = result + len;
1509   return result;
1510 }
1511
1512 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1513    That buffer is used for growing allocations when saving macro
1514    replacement lists in a #define, and when parsing an answer to an
1515    assertion in #assert, #unassert or #if (and therefore possibly
1516    whilst expanding macros).  It therefore must not be used by any
1517    code that they might call: specifically the lexer and the guts of
1518    the macro expander.
1519
1520    All existing other uses clearly fit this restriction: storing
1521    registered pragmas during initialization.  */
1522 unsigned char *
1523 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1524 {
1525   _cpp_buff *buff = pfile->a_buff;
1526   unsigned char *result = buff->cur;
1527
1528   if (len > (size_t) (buff->limit - result))
1529     {
1530       buff = _cpp_get_buff (pfile, len);
1531       buff->next = pfile->a_buff;
1532       pfile->a_buff = buff;
1533       result = buff->cur;
1534     }
1535
1536   buff->cur = result + len;
1537   return result;
1538 }