gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "cpplib.h"
  26 #include "cpphash.h"
  27
  28 #ifdef MULTIBYTE_CHARS
  29 #include "mbchar.h"
  30 #include <locale.h>
  31 #endif
  32
  33 /* Tokens with SPELL_STRING store their spelling in the token list,
  34    and it's length in the token->val.name.len.  */
  35 enum spell_type
  36 {
  37   SPELL_OPERATOR = 0,
  38   SPELL_CHAR,
  39   SPELL_IDENT,
  40   SPELL_NUMBER,
  41   SPELL_STRING,
  42   SPELL_NONE
  43 };
  44
  45 struct token_spelling
  46 {
  47   enum spell_type category;
  48   const unsigned char *name;
  49 };
  50
  51 static const unsigned char *const digraph_spellings[] =
  52 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
  53
  54 #define OP(e, s) { SPELL_OPERATOR, U s           },
  55 #define TK(e, s) { s,              U STRINGX (e) },
  56 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  57 #undef OP
  58 #undef TK
  59
  60 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  61 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  62 #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
  63
  64 static void handle_newline PARAMS ((cpp_reader *));
  65 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *));
  66 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
  67
  68 static int skip_block_comment PARAMS ((cpp_reader *));
  69 static int skip_line_comment PARAMS ((cpp_reader *));
  70 static void adjust_column PARAMS ((cpp_reader *));
  71 static int skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  72 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
  73 static uchar *parse_slow PARAMS ((cpp_reader *, const uchar *, int,
  74                                   unsigned int *));
  75 static void parse_number PARAMS ((cpp_reader *, cpp_string *, int));
  76 static int unescaped_terminator_p PARAMS ((cpp_reader *, const uchar *));
  77 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  78 static bool trigraph_p PARAMS ((cpp_reader *));
  79 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
  80                                   cppchar_t));
  81 static bool continue_after_nul PARAMS ((cpp_reader *));
  82 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
  83 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
  84                                    const unsigned char *, cppchar_t *));
  85 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
  86
  87 static unsigned int hex_digit_value PARAMS ((unsigned int));
  88 static _cpp_buff *new_buff PARAMS ((size_t));
  89
  90 /* Utility routine:
  91
  92    Compares, the token TOKEN to the NUL-terminated string STRING.
  93    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  94 int
  95 cpp_ideq (token, string)
  96      const cpp_token *token;
  97      const char *string;
  98 {
  99   if (token->type != CPP_NAME)
 100     return 0;
 101
 102   return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
 103 }
 104
 105 /* Call when meeting a newline, assumed to be in buffer->cur[-1].
 106    Returns with buffer->cur pointing to the character immediately
 107    following the newline (combination).  */
 108 static void
 109 handle_newline (pfile)
 110      cpp_reader *pfile;
 111 {
 112   cpp_buffer *buffer = pfile->buffer;
 113
 114   /* Handle CR-LF and LF-CR.  Most other implementations (e.g. java)
 115      only accept CR-LF; maybe we should fall back to that behaviour?  */
 116   if (buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
 117     buffer->cur++;
 118
 119   buffer->line_base = buffer->cur;
 120   buffer->col_adjust = 0;
 121   pfile->line++;
 122 }
 123
 124 /* Subroutine of skip_escaped_newlines; called when a 3-character
 125    sequence beginning with "??" is encountered.  buffer->cur points to
 126    the second '?'.
 127
 128    Warn if necessary, and returns true if the sequence forms a
 129    trigraph and the trigraph should be honoured.  */
 130 static bool
 131 trigraph_p (pfile)
 132      cpp_reader *pfile;
 133 {
 134   cpp_buffer *buffer = pfile->buffer;
 135   cppchar_t from_char = buffer->cur[1];
 136   bool accept;
 137
 138   if (!_cpp_trigraph_map[from_char])
 139     return false;
 140
 141   accept = CPP_OPTION (pfile, trigraphs);
 142
 143   /* Don't warn about trigraphs in comments.  */
 144   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 145     {
 146       if (accept)
 147         cpp_error_with_line (pfile, DL_WARNING,
 148                              pfile->line, CPP_BUF_COL (buffer) - 1,
 149                              "trigraph ??%c converted to %c",
 150                              (int) from_char,
 151                              (int) _cpp_trigraph_map[from_char]);
 152       else if (buffer->cur != buffer->last_Wtrigraphs)
 153         {
 154           buffer->last_Wtrigraphs = buffer->cur;
 155           cpp_error_with_line (pfile, DL_WARNING,
 156                                pfile->line, CPP_BUF_COL (buffer) - 1,
 157                                "trigraph ??%c ignored", (int) from_char);
 158         }
 159     }
 160
 161   return accept;
 162 }
 163
 164 /* Skips any escaped newlines introduced by '?' or a '\\', assumed to
 165    lie in buffer->cur[-1].  Returns the next byte, which will be in
 166    buffer->cur[-1].  This routine performs preprocessing stages 1 and
 167    2 of the ISO C standard.  */
 168 static cppchar_t
 169 skip_escaped_newlines (pfile)
 170      cpp_reader *pfile;
 171 {
 172   cpp_buffer *buffer = pfile->buffer;
 173   cppchar_t next = buffer->cur[-1];
 174
 175   /* Only do this if we apply stages 1 and 2.  */
 176   if (!buffer->from_stage3)
 177     {
 178       const unsigned char *saved_cur;
 179       cppchar_t next1;
 180
 181       do
 182         {
 183           if (next == '?')
 184             {
 185               if (buffer->cur[0] != '?' || !trigraph_p (pfile))
 186                 break;
 187
 188               /* Translate the trigraph.  */
 189               next = _cpp_trigraph_map[buffer->cur[1]];
 190               buffer->cur += 2;
 191               if (next != '\\')
 192                 break;
 193             }
 194
 195           if (buffer->cur == buffer->rlimit)
 196             break;
 197
 198           /* We have a backslash, and room for at least one more
 199              character.  Skip horizontal whitespace.  */
 200           saved_cur = buffer->cur;
 201           do
 202             next1 = *buffer->cur++;
 203           while (is_nvspace (next1) && buffer->cur < buffer->rlimit);
 204
 205           if (!is_vspace (next1))
 206             {
 207               buffer->cur = saved_cur;
 208               break;
 209             }
 210
 211           if (saved_cur != buffer->cur - 1
 212               && !pfile->state.lexing_comment)
 213             cpp_error (pfile, DL_WARNING,
 214                        "backslash and newline separated by space");
 215
 216           handle_newline (pfile);
 217           buffer->backup_to = buffer->cur;
 218           if (buffer->cur == buffer->rlimit)
 219             {
 220               cpp_error (pfile, DL_PEDWARN,
 221                          "backslash-newline at end of file");
 222               next = EOF;
 223             }
 224           else
 225             next = *buffer->cur++;
 226         }
 227       while (next == '\\' || next == '?');
 228     }
 229
 230   return next;
 231 }
 232
 233 /* Obtain the next character, after trigraph conversion and skipping
 234    an arbitrarily long string of escaped newlines.  The common case of
 235    no trigraphs or escaped newlines falls through quickly.  On return,
 236    buffer->backup_to points to where to return to if the character is
 237    not to be processed.  */
 238 static cppchar_t
 239 get_effective_char (pfile)
 240      cpp_reader *pfile;
 241 {
 242   cppchar_t next;
 243   cpp_buffer *buffer = pfile->buffer;
 244
 245   buffer->backup_to = buffer->cur;
 246   next = *buffer->cur++;
 247   if (__builtin_expect (next == '?' || next == '\\', 0))
 248     next = skip_escaped_newlines (pfile);
 249
 250   return next;
 251 }
 252
 253 /* Skip a C-style block comment.  We find the end of the comment by
 254    seeing if an asterisk is before every '/' we encounter.  Returns
 255    non-zero if comment terminated by EOF, zero otherwise.  */
 256 static int
 257 skip_block_comment (pfile)
 258      cpp_reader *pfile;
 259 {
 260   cpp_buffer *buffer = pfile->buffer;
 261   cppchar_t c = EOF, prevc = EOF;
 262
 263   pfile->state.lexing_comment = 1;
 264   while (buffer->cur != buffer->rlimit)
 265     {
 266       prevc = c, c = *buffer->cur++;
 267
 268       /* FIXME: For speed, create a new character class of characters
 269          of interest inside block comments.  */
 270       if (c == '?' || c == '\\')
 271         c = skip_escaped_newlines (pfile);
 272
 273       /* People like decorating comments with '*', so check for '/'
 274          instead for efficiency.  */
 275       if (c == '/')
 276         {
 277           if (prevc == '*')
 278             break;
 279
 280           /* Warn about potential nested comments, but not if the '/'
 281              comes immediately before the true comment delimiter.
 282              Don't bother to get it right across escaped newlines.  */
 283           if (CPP_OPTION (pfile, warn_comments)
 284               && buffer->cur[0] == '*' && buffer->cur[1] != '/')
 285             cpp_error_with_line (pfile, DL_WARNING,
 286                                  pfile->line, CPP_BUF_COL (buffer),
 287                                  "\"/*\" within comment");
 288         }
 289       else if (is_vspace (c))
 290         handle_newline (pfile);
 291       else if (c == '\t')
 292         adjust_column (pfile);
 293     }
 294
 295   pfile->state.lexing_comment = 0;
 296   return c != '/' || prevc != '*';
 297 }
 298
 299 /* Skip a C++ line comment, leaving buffer->cur pointing to the
 300    terminating newline.  Handles escaped newlines.  Returns non-zero
 301    if a multiline comment.  */
 302 static int
 303 skip_line_comment (pfile)
 304      cpp_reader *pfile;
 305 {
 306   cpp_buffer *buffer = pfile->buffer;
 307   unsigned int orig_line = pfile->line;
 308   cppchar_t c;
 309 #ifdef MULTIBYTE_CHARS
 310   wchar_t wc;
 311   int char_len;
 312 #endif
 313
 314   pfile->state.lexing_comment = 1;
 315 #ifdef MULTIBYTE_CHARS
 316   /* Reset multibyte conversion state.  */
 317   (void) local_mbtowc (NULL, NULL, 0);
 318 #endif
 319   do
 320     {
 321       if (buffer->cur == buffer->rlimit)
 322         goto at_eof;
 323
 324 #ifdef MULTIBYTE_CHARS
 325       char_len = local_mbtowc (&wc, (const char *) buffer->cur,
 326                                buffer->rlimit - buffer->cur);
 327       if (char_len == -1)
 328         {
 329           cpp_error (pfile, DL_WARNING,
 330                      "ignoring invalid multibyte character");
 331           char_len = 1;
 332           c = *buffer->cur++;
 333         }
 334       else
 335         {
 336           buffer->cur += char_len;
 337           c = wc;
 338         }
 339 #else
 340       c = *buffer->cur++;
 341 #endif
 342       if (c == '?' || c == '\\')
 343         c = skip_escaped_newlines (pfile);
 344     }
 345   while (!is_vspace (c));
 346
 347   /* Step back over the newline, except at EOF.  */
 348   buffer->cur--;
 349  at_eof:
 350
 351   pfile->state.lexing_comment = 0;
 352   return orig_line != pfile->line;
 353 }
 354
 355 /* pfile->buffer->cur is one beyond the \t character.  Update
 356    col_adjust so we track the column correctly.  */
 357 static void
 358 adjust_column (pfile)
 359      cpp_reader *pfile;
 360 {
 361   cpp_buffer *buffer = pfile->buffer;
 362   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 363
 364   /* Round it up to multiple of the tabstop, but subtract 1 since the
 365      tab itself occupies a character position.  */
 366   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 367                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 368 }
 369
 370 /* Skips whitespace, saving the next non-whitespace character.
 371    Adjusts pfile->col_adjust to account for tabs.  Without this,
 372    tokens might be assigned an incorrect column.  */
 373 static int
 374 skip_whitespace (pfile, c)
 375      cpp_reader *pfile;
 376      cppchar_t c;
 377 {
 378   cpp_buffer *buffer = pfile->buffer;
 379   unsigned int warned = 0;
 380
 381   do
 382     {
 383       /* Horizontal space always OK.  */
 384       if (c == ' ')
 385         ;
 386       else if (c == '\t')
 387         adjust_column (pfile);
 388       /* Just \f \v or \0 left.  */
 389       else if (c == '\0')
 390         {
 391           if (buffer->cur - 1 == buffer->rlimit)
 392             return 0;
 393           if (!warned)
 394             {
 395               cpp_error (pfile, DL_WARNING, "null character(s) ignored");
 396               warned = 1;
 397             }
 398         }
 399       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 400         cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
 401                              CPP_BUF_COL (buffer),
 402                              "%s in preprocessing directive",
 403                              c == '\f' ? "form feed" : "vertical tab");
 404
 405       c = *buffer->cur++;
 406     }
 407   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 408   while (is_nvspace (c));
 409
 410   buffer->cur--;
 411   return 1;
 412 }
 413
 414 /* See if the characters of a number token are valid in a name (no
 415    '.', '+' or '-').  */
 416 static int
 417 name_p (pfile, string)
 418      cpp_reader *pfile;
 419      const cpp_string *string;
 420 {
 421   unsigned int i;
 422
 423   for (i = 0; i < string->len; i++)
 424     if (!is_idchar (string->text[i]))
 425       return 0;
 426
 427   return 1;
 428 }
 429
 430 /* Parse an identifier, skipping embedded backslash-newlines.  This is
 431    a critical inner loop.  The common case is an identifier which has
 432    not been split by backslash-newline, does not contain a dollar
 433    sign, and has already been scanned (roughly 10:1 ratio of
 434    seen:unseen identifiers in normal code; the distribution is
 435    Poisson-like).  Second most common case is a new identifier, not
 436    split and no dollar sign.  The other possibilities are rare and
 437    have been relegated to parse_slow.  */
 438 static cpp_hashnode *
 439 parse_identifier (pfile)
 440      cpp_reader *pfile;
 441 {
 442   cpp_hashnode *result;
 443   const uchar *cur, *base;
 444
 445   /* Fast-path loop.  Skim over a normal identifier.
 446      N.B. ISIDNUM does not include $.  */
 447   cur = pfile->buffer->cur;
 448   while (ISIDNUM (*cur))
 449     cur++;
 450
 451   /* Check for slow-path cases.  */
 452   if (*cur == '?' || *cur == '\\' || *cur == '$')
 453     {
 454       unsigned int len;
 455
 456       base = parse_slow (pfile, cur, 0, &len);
 457       result = (cpp_hashnode *)
 458         ht_lookup (pfile->hash_table, base, len, HT_ALLOCED);
 459     }
 460   else
 461     {
 462       base = pfile->buffer->cur - 1;
 463       pfile->buffer->cur = cur;
 464       result = (cpp_hashnode *)
 465         ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
 466     }
 467
 468   /* Rarely, identifiers require diagnostics when lexed.
 469      XXX Has to be forced out of the fast path.  */
 470   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 471                         && !pfile->state.skipping, 0))
 472     {
 473       /* It is allowed to poison the same identifier twice.  */
 474       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 475         cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
 476                    NODE_NAME (result));
 477
 478       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 479          replacement list of a variadic macro.  */
 480       if (result == pfile->spec_nodes.n__VA_ARGS__
 481           && !pfile->state.va_args_ok)
 482         cpp_error (pfile, DL_PEDWARN,
 483         "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 484     }
 485
 486   return result;
 487 }
 488
 489 /* Slow path.  This handles numbers and identifiers which have been
 490    split, or contain dollar signs.  The part of the token from
 491    PFILE->buffer->cur-1 to CUR has already been scanned.  NUMBER_P is
 492    1 if it's a number, and 2 if it has a leading period.  Returns a
 493    pointer to the token's NUL-terminated spelling in permanent
 494    storage, and sets PLEN to its length.  */
 495 static uchar *
 496 parse_slow (pfile, cur, number_p, plen)
 497      cpp_reader *pfile;
 498      const uchar *cur;
 499      int number_p;
 500      unsigned int *plen;
 501 {
 502   cpp_buffer *buffer = pfile->buffer;
 503   const uchar *base = buffer->cur - 1;
 504   struct obstack *stack = &pfile->hash_table->stack;
 505   unsigned int c, prevc, saw_dollar = 0;
 506
 507   /* Place any leading period.  */
 508   if (number_p == 2)
 509     obstack_1grow (stack, '.');
 510
 511   /* Copy the part of the token which is known to be okay.  */
 512   obstack_grow (stack, base, cur - base);
 513
 514   /* Now process the part which isn't.  We are looking at one of
 515      '$', '\\', or '?' on entry to this loop.  */
 516   prevc = cur[-1];
 517   c = *cur++;
 518   buffer->cur = cur;
 519   for (;;)
 520     {
 521       /* Potential escaped newline?  */
 522       buffer->backup_to = buffer->cur - 1;
 523       if (c == '?' || c == '\\')
 524         c = skip_escaped_newlines (pfile);
 525
 526       if (!is_idchar (c))
 527         {
 528           if (!number_p)
 529             break;
 530           if (c != '.' && !VALID_SIGN (c, prevc))
 531             break;
 532         }
 533
 534       /* Handle normal identifier characters in this loop.  */
 535       do
 536         {
 537           prevc = c;
 538           obstack_1grow (stack, c);
 539
 540           if (c == '$')
 541             saw_dollar++;
 542
 543           c = *buffer->cur++;
 544         }
 545       while (is_idchar (c));
 546     }
 547
 548   /* Step back over the unwanted char.  */
 549   BACKUP ();
 550
 551   /* $ is not an identifier character in the standard, but is commonly
 552      accepted as an extension.  Don't warn about it in skipped
 553      conditional blocks.  */
 554   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
 555     cpp_error (pfile, DL_PEDWARN, "'$' character(s) in identifier or number");
 556
 557   /* Identifiers and numbers are null-terminated.  */
 558   *plen = obstack_object_size (stack);
 559   obstack_1grow (stack, '\0');
 560   return obstack_finish (stack);
 561 }
 562
 563 /* Parse a number, beginning with character C, skipping embedded
 564    backslash-newlines.  LEADING_PERIOD is non-zero if there was a "."
 565    before C.  Place the result in NUMBER.  */
 566 static void
 567 parse_number (pfile, number, leading_period)
 568      cpp_reader *pfile;
 569      cpp_string *number;
 570      int leading_period;
 571 {
 572   const uchar *cur;
 573
 574   /* Fast-path loop.  Skim over a normal number.
 575      N.B. ISIDNUM does not include $.  */
 576   cur = pfile->buffer->cur;
 577   while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
 578     cur++;
 579
 580   /* Check for slow-path cases.  */
 581   if (*cur == '?' || *cur == '\\' || *cur == '$')
 582     number->text = parse_slow (pfile, cur, 1 + leading_period, &number->len);
 583   else
 584     {
 585       const uchar *base = pfile->buffer->cur - 1;
 586       uchar *dest;
 587
 588       number->len = cur - base + leading_period;
 589       dest = _cpp_unaligned_alloc (pfile, number->len + 1);
 590       dest[number->len] = '\0';
 591       number->text = dest;
 592
 593       if (leading_period)
 594         *dest++ = '.';
 595       memcpy (dest, base, cur - base);
 596       pfile->buffer->cur = cur;
 597     }
 598 }
 599
 600 /* Subroutine of parse_string.  */
 601 static int
 602 unescaped_terminator_p (pfile, dest)
 603      cpp_reader *pfile;
 604      const unsigned char *dest;
 605 {
 606   const unsigned char *start, *temp;
 607
 608   /* In #include-style directives, terminators are not escapeable.  */
 609   if (pfile->state.angled_headers)
 610     return 1;
 611
 612   start = BUFF_FRONT (pfile->u_buff);
 613
 614   /* An odd number of consecutive backslashes represents an escaped
 615      terminator.  */
 616   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 617     ;
 618
 619   return ((dest - temp) & 1) == 0;
 620 }
 621
 622 /* Parses a string, character constant, or angle-bracketed header file
 623    name.  Handles embedded trigraphs and escaped newlines.  The stored
 624    string is guaranteed NUL-terminated, but it is not guaranteed that
 625    this is the first NUL since embedded NULs are preserved.
 626
 627    When this function returns, buffer->cur points to the next
 628    character to be processed.  */
 629 static void
 630 parse_string (pfile, token, terminator)
 631      cpp_reader *pfile;
 632      cpp_token *token;
 633      cppchar_t terminator;
 634 {
 635   cpp_buffer *buffer = pfile->buffer;
 636   unsigned char *dest, *limit;
 637   cppchar_t c;
 638   bool warned_nulls = false;
 639 #ifdef MULTIBYTE_CHARS
 640   wchar_t wc;
 641   int char_len;
 642 #endif
 643
 644   dest = BUFF_FRONT (pfile->u_buff);
 645   limit = BUFF_LIMIT (pfile->u_buff);
 646
 647 #ifdef MULTIBYTE_CHARS
 648   /* Reset multibyte conversion state.  */
 649   (void) local_mbtowc (NULL, NULL, 0);
 650 #endif
 651   for (;;)
 652     {
 653       /* We need room for another char, possibly the terminating NUL.  */
 654       if ((size_t) (limit - dest) < 1)
 655         {
 656           size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
 657           _cpp_extend_buff (pfile, &pfile->u_buff, 2);
 658           dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
 659           limit = BUFF_LIMIT (pfile->u_buff);
 660         }
 661
 662 #ifdef MULTIBYTE_CHARS
 663       char_len = local_mbtowc (&wc, (const char *) buffer->cur,
 664                                buffer->rlimit - buffer->cur);
 665       if (char_len == -1)
 666         {
 667           cpp_error (pfile, DL_WARNING,
 668                      "ignoring invalid multibyte character");
 669           char_len = 1;
 670           c = *buffer->cur++;
 671         }
 672       else
 673         {
 674           buffer->cur += char_len;
 675           c = wc;
 676         }
 677 #else
 678       c = *buffer->cur++;
 679 #endif
 680
 681       /* Handle trigraphs, escaped newlines etc.  */
 682       if (c == '?' || c == '\\')
 683         c = skip_escaped_newlines (pfile);
 684
 685       if (c == terminator)
 686         {
 687           if (unescaped_terminator_p (pfile, dest))
 688             break;
 689         }
 690       else if (is_vspace (c))
 691         {
 692           /* No string literal may extend over multiple lines.  In
 693              assembly language, suppress the error except for <>
 694              includes.  This is a kludge around not knowing where
 695              comments are.  */
 696         unterminated:
 697           if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>')
 698             cpp_error (pfile, DL_ERROR, "missing terminating %c character",
 699                        (int) terminator);
 700           buffer->cur--;
 701           break;
 702         }
 703       else if (c == '\0')
 704         {
 705           if (buffer->cur - 1 == buffer->rlimit)
 706             goto unterminated;
 707           if (!warned_nulls)
 708             {
 709               warned_nulls = true;
 710               cpp_error (pfile, DL_WARNING,
 711                          "null character(s) preserved in literal");
 712             }
 713         }
 714 #ifdef MULTIBYTE_CHARS
 715       if (char_len > 1)
 716         {
 717           for ( ; char_len > 0; --char_len)
 718             *dest++ = (*buffer->cur - char_len);
 719         }
 720       else
 721 #endif
 722         *dest++ = c;
 723     }
 724
 725   *dest = '\0';
 726
 727   token->val.str.text = BUFF_FRONT (pfile->u_buff);
 728   token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
 729   BUFF_FRONT (pfile->u_buff) = dest + 1;
 730 }
 731
 732 /* The stored comment includes the comment start and any terminator.  */
 733 static void
 734 save_comment (pfile, token, from, type)
 735      cpp_reader *pfile;
 736      cpp_token *token;
 737      const unsigned char *from;
 738      cppchar_t type;
 739 {
 740   unsigned char *buffer;
 741   unsigned int len, clen;
 742
 743   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 744
 745   /* C++ comments probably (not definitely) have moved past a new
 746      line, which we don't want to save in the comment.  */
 747   if (is_vspace (pfile->buffer->cur[-1]))
 748     len--;
 749
 750   /* If we are currently in a directive, then we need to store all
 751      C++ comments as C comments internally, and so we need to
 752      allocate a little extra space in that case.
 753
 754      Note that the only time we encounter a directive here is
 755      when we are saving comments in a "#define".  */
 756   clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
 757
 758   buffer = _cpp_unaligned_alloc (pfile, clen);
 759
 760   token->type = CPP_COMMENT;
 761   token->val.str.len = clen;
 762   token->val.str.text = buffer;
 763
 764   buffer[0] = '/';
 765   memcpy (buffer + 1, from, len - 1);
 766
 767   /* Finish conversion to a C comment, if necessary.  */
 768   if (pfile->state.in_directive && type == '/')
 769     {
 770       buffer[1] = '*';
 771       buffer[clen - 2] = '*';
 772       buffer[clen - 1] = '/';
 773     }
 774 }
 775
 776 /* Allocate COUNT tokens for RUN.  */
 777 void
 778 _cpp_init_tokenrun (run, count)
 779      tokenrun *run;
 780      unsigned int count;
 781 {
 782   run->base = xnewvec (cpp_token, count);
 783   run->limit = run->base + count;
 784   run->next = NULL;
 785 }
 786
 787 /* Returns the next tokenrun, or creates one if there is none.  */
 788 static tokenrun *
 789 next_tokenrun (run)
 790      tokenrun *run;
 791 {
 792   if (run->next == NULL)
 793     {
 794       run->next = xnew (tokenrun);
 795       run->next->prev = run;
 796       _cpp_init_tokenrun (run->next, 250);
 797     }
 798
 799   return run->next;
 800 }
 801
 802 /* Allocate a single token that is invalidated at the same time as the
 803    rest of the tokens on the line.  Has its line and col set to the
 804    same as the last lexed token, so that diagnostics appear in the
 805    right place.  */
 806 cpp_token *
 807 _cpp_temp_token (pfile)
 808      cpp_reader *pfile;
 809 {
 810   cpp_token *old, *result;
 811
 812   old = pfile->cur_token - 1;
 813   if (pfile->cur_token == pfile->cur_run->limit)
 814     {
 815       pfile->cur_run = next_tokenrun (pfile->cur_run);
 816       pfile->cur_token = pfile->cur_run->base;
 817     }
 818
 819   result = pfile->cur_token++;
 820   result->line = old->line;
 821   result->col = old->col;
 822   return result;
 823 }
 824
 825 /* Lex a token into RESULT (external interface).  Takes care of issues
 826    like directive handling, token lookahead, multiple include
 827    optimization and skipping.  */
 828 const cpp_token *
 829 _cpp_lex_token (pfile)
 830      cpp_reader *pfile;
 831 {
 832   cpp_token *result;
 833
 834   for (;;)
 835     {
 836       if (pfile->cur_token == pfile->cur_run->limit)
 837         {
 838           pfile->cur_run = next_tokenrun (pfile->cur_run);
 839           pfile->cur_token = pfile->cur_run->base;
 840         }
 841
 842       if (pfile->lookaheads)
 843         {
 844           pfile->lookaheads--;
 845           result = pfile->cur_token++;
 846         }
 847       else
 848         result = _cpp_lex_direct (pfile);
 849
 850       if (result->flags & BOL)
 851         {
 852           /* Is this a directive.  If _cpp_handle_directive returns
 853              false, it is an assembler #.  */
 854           if (result->type == CPP_HASH
 855               /* 6.10.3 p 11: Directives in a list of macro arguments
 856                  gives undefined behavior.  This implementation
 857                  handles the directive as normal.  */
 858               && pfile->state.parsing_args != 1
 859               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
 860             continue;
 861           if (pfile->cb.line_change && !pfile->state.skipping)
 862             (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
 863         }
 864
 865       /* We don't skip tokens in directives.  */
 866       if (pfile->state.in_directive)
 867         break;
 868
 869       /* Outside a directive, invalidate controlling macros.  At file
 870          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
 871          get here and MI optimisation works.  */
 872       pfile->mi_valid = false;
 873
 874       if (!pfile->state.skipping || result->type == CPP_EOF)
 875         break;
 876     }
 877
 878   return result;
 879 }
 880
 881 /* A NUL terminates the current buffer.  For ISO preprocessing this is
 882    EOF, but for traditional preprocessing it indicates we need a line
 883    refill.  Returns TRUE to continue preprocessing a new buffer, FALSE
 884    to return a CPP_EOF to the caller.  */
 885 static bool
 886 continue_after_nul (pfile)
 887      cpp_reader *pfile;
 888 {
 889   cpp_buffer *buffer = pfile->buffer;
 890   bool more = false;
 891
 892   buffer->saved_flags = BOL;
 893   if (CPP_OPTION (pfile, traditional))
 894     more = _cpp_read_logical_line_trad (pfile);
 895   else
 896     {
 897       /* Stop parsing arguments with a CPP_EOF.  When we finally come
 898          back here, do the work of popping the buffer.  */
 899       if (!pfile->state.parsing_args)
 900         {
 901           if (buffer->cur != buffer->line_base)
 902             {
 903               /* Non-empty files should end in a newline.  Don't warn
 904                  for command line and _Pragma buffers.  */
 905               if (!buffer->from_stage3)
 906                 cpp_error (pfile, DL_PEDWARN, "no newline at end of file");
 907               handle_newline (pfile);
 908             }
 909
 910           /* Similarly, finish an in-progress directive with CPP_EOF
 911              before popping the buffer.  */
 912           if (!pfile->state.in_directive && buffer->prev)
 913             {
 914               more = !buffer->return_at_eof;
 915               _cpp_pop_buffer (pfile);
 916             }
 917         }
 918     }
 919
 920   return more;
 921 }
 922
 923 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)  \
 924   do {                                          \
 925     if (get_effective_char (pfile) == CHAR)     \
 926       result->type = THEN_TYPE;                 \
 927     else                                        \
 928       {                                         \
 929         BACKUP ();                              \
 930         result->type = ELSE_TYPE;               \
 931       }                                         \
 932   } while (0)
 933
 934 /* Lex a token into pfile->cur_token, which is also incremented, to
 935    get diagnostics pointing to the correct location.
 936
 937    Does not handle issues such as token lookahead, multiple-include
 938    optimisation, directives, skipping etc.  This function is only
 939    suitable for use by _cpp_lex_token, and in special cases like
 940    lex_expansion_token which doesn't care for any of these issues.
 941
 942    When meeting a newline, returns CPP_EOF if parsing a directive,
 943    otherwise returns to the start of the token buffer if permissible.
 944    Returns the location of the lexed token.  */
 945 cpp_token *
 946 _cpp_lex_direct (pfile)
 947      cpp_reader *pfile;
 948 {
 949   cppchar_t c;
 950   cpp_buffer *buffer;
 951   const unsigned char *comment_start;
 952   cpp_token *result = pfile->cur_token++;
 953
 954  fresh_line:
 955   buffer = pfile->buffer;
 956   result->flags = buffer->saved_flags;
 957   buffer->saved_flags = 0;
 958  update_tokens_line:
 959   result->line = pfile->line;
 960
 961  skipped_white:
 962   c = *buffer->cur++;
 963   result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
 964
 965  trigraph:
 966   switch (c)
 967     {
 968     case ' ': case '\t': case '\f': case '\v': case '\0':
 969       result->flags |= PREV_WHITE;
 970       if (skip_whitespace (pfile, c))
 971         goto skipped_white;
 972
 973       /* End of buffer.  */
 974       buffer->cur--;
 975       if (continue_after_nul (pfile))
 976         goto fresh_line;
 977       result->type = CPP_EOF;
 978       break;
 979
 980     case '\n': case '\r':
 981       handle_newline (pfile);
 982       buffer->saved_flags = BOL;
 983       if (! pfile->state.in_directive)
 984         {
 985           if (pfile->state.parsing_args == 2)
 986             buffer->saved_flags |= PREV_WHITE;
 987           if (!pfile->keep_tokens)
 988             {
 989               pfile->cur_run = &pfile->base_run;
 990               result = pfile->base_run.base;
 991               pfile->cur_token = result + 1;
 992             }
 993           goto fresh_line;
 994         }
 995       result->type = CPP_EOF;
 996       break;
 997
 998     case '?':
 999     case '\\':
1000       /* These could start an escaped newline, or '?' a trigraph.  Let
1001          skip_escaped_newlines do all the work.  */
1002       {
1003         unsigned int line = pfile->line;
1004
1005         c = skip_escaped_newlines (pfile);
1006         if (line != pfile->line)
1007           {
1008             buffer->cur--;
1009             /* We had at least one escaped newline of some sort.
1010                Update the token's line and column.  */
1011             goto update_tokens_line;
1012           }
1013       }
1014
1015       /* We are either the original '?' or '\\', or a trigraph.  */
1016       if (c == '?')
1017         result->type = CPP_QUERY;
1018       else if (c == '\\')
1019         goto random_char;
1020       else
1021         goto trigraph;
1022       break;
1023
1024     case '0': case '1': case '2': case '3': case '4':
1025     case '5': case '6': case '7': case '8': case '9':
1026       result->type = CPP_NUMBER;
1027       parse_number (pfile, &result->val.str, 0);
1028       break;
1029
1030     case 'L':
1031       /* 'L' may introduce wide characters or strings.  */
1032       {
1033         const unsigned char *pos = buffer->cur;
1034
1035         c = get_effective_char (pfile);
1036         if (c == '\'' || c == '"')
1037           {
1038             result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1039             parse_string (pfile, result, c);
1040             break;
1041           }
1042         buffer->cur = pos;
1043       }
1044       /* Fall through.  */
1045
1046     start_ident:
1047     case '_':
1048     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1049     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1050     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1051     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1052     case 'y': case 'z':
1053     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1054     case 'G': case 'H': case 'I': case 'J': case 'K':
1055     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1056     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1057     case 'Y': case 'Z':
1058       result->type = CPP_NAME;
1059       result->val.node = parse_identifier (pfile);
1060
1061       /* Convert named operators to their proper types.  */
1062       if (result->val.node->flags & NODE_OPERATOR)
1063         {
1064           result->flags |= NAMED_OP;
1065           result->type = result->val.node->value.operator;
1066         }
1067       break;
1068
1069     case '\'':
1070     case '"':
1071       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1072       parse_string (pfile, result, c);
1073       break;
1074
1075     case '/':
1076       /* A potential block or line comment.  */
1077       comment_start = buffer->cur;
1078       c = get_effective_char (pfile);
1079
1080       if (c == '*')
1081         {
1082           if (skip_block_comment (pfile))
1083             cpp_error (pfile, DL_ERROR, "unterminated comment");
1084         }
1085       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1086                             || CPP_IN_SYSTEM_HEADER (pfile)))
1087         {
1088           /* Warn about comments only if pedantically GNUC89, and not
1089              in system headers.  */
1090           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1091               && ! buffer->warned_cplusplus_comments)
1092             {
1093               cpp_error (pfile, DL_PEDWARN,
1094                          "C++ style comments are not allowed in ISO C89");
1095               cpp_error (pfile, DL_PEDWARN,
1096                          "(this will be reported only once per input file)");
1097               buffer->warned_cplusplus_comments = 1;
1098             }
1099
1100           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1101             cpp_error (pfile, DL_WARNING, "multi-line comment");
1102         }
1103       else if (c == '=')
1104         {
1105           result->type = CPP_DIV_EQ;
1106           break;
1107         }
1108       else
1109         {
1110           BACKUP ();
1111           result->type = CPP_DIV;
1112           break;
1113         }
1114
1115       if (!pfile->state.save_comments)
1116         {
1117           result->flags |= PREV_WHITE;
1118           goto update_tokens_line;
1119         }
1120
1121       /* Save the comment as a token in its own right.  */
1122       save_comment (pfile, result, comment_start, c);
1123       break;
1124
1125     case '<':
1126       if (pfile->state.angled_headers)
1127         {
1128           result->type = CPP_HEADER_NAME;
1129           parse_string (pfile, result, '>');
1130           break;
1131         }
1132
1133       c = get_effective_char (pfile);
1134       if (c == '=')
1135         result->type = CPP_LESS_EQ;
1136       else if (c == '<')
1137         IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1138       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1139         IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1140       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1141         {
1142           result->type = CPP_OPEN_SQUARE;
1143           result->flags |= DIGRAPH;
1144         }
1145       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1146         {
1147           result->type = CPP_OPEN_BRACE;
1148           result->flags |= DIGRAPH;
1149         }
1150       else
1151         {
1152           BACKUP ();
1153           result->type = CPP_LESS;
1154         }
1155       break;
1156
1157     case '>':
1158       c = get_effective_char (pfile);
1159       if (c == '=')
1160         result->type = CPP_GREATER_EQ;
1161       else if (c == '>')
1162         IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1163       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1164         IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1165       else
1166         {
1167           BACKUP ();
1168           result->type = CPP_GREATER;
1169         }
1170       break;
1171
1172     case '%':
1173       c = get_effective_char (pfile);
1174       if (c == '=')
1175         result->type = CPP_MOD_EQ;
1176       else if (CPP_OPTION (pfile, digraphs) && c == ':')
1177         {
1178           result->flags |= DIGRAPH;
1179           result->type = CPP_HASH;
1180           if (get_effective_char (pfile) == '%')
1181             {
1182               const unsigned char *pos = buffer->cur;
1183
1184               if (get_effective_char (pfile) == ':')
1185                 result->type = CPP_PASTE;
1186               else
1187                 buffer->cur = pos - 1;
1188             }
1189           else
1190             BACKUP ();
1191         }
1192       else if (CPP_OPTION (pfile, digraphs) && c == '>')
1193         {
1194           result->flags |= DIGRAPH;
1195           result->type = CPP_CLOSE_BRACE;
1196         }
1197       else
1198         {
1199           BACKUP ();
1200           result->type = CPP_MOD;
1201         }
1202       break;
1203
1204     case '.':
1205       result->type = CPP_DOT;
1206       c = get_effective_char (pfile);
1207       if (c == '.')
1208         {
1209           const unsigned char *pos = buffer->cur;
1210
1211           if (get_effective_char (pfile) == '.')
1212             result->type = CPP_ELLIPSIS;
1213           else
1214             buffer->cur = pos - 1;
1215         }
1216       /* All known character sets have 0...9 contiguous.  */
1217       else if (ISDIGIT (c))
1218         {
1219           result->type = CPP_NUMBER;
1220           parse_number (pfile, &result->val.str, 1);
1221         }
1222       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1223         result->type = CPP_DOT_STAR;
1224       else
1225         BACKUP ();
1226       break;
1227
1228     case '+':
1229       c = get_effective_char (pfile);
1230       if (c == '+')
1231         result->type = CPP_PLUS_PLUS;
1232       else if (c == '=')
1233         result->type = CPP_PLUS_EQ;
1234       else
1235         {
1236           BACKUP ();
1237           result->type = CPP_PLUS;
1238         }
1239       break;
1240
1241     case '-':
1242       c = get_effective_char (pfile);
1243       if (c == '>')
1244         {
1245           result->type = CPP_DEREF;
1246           if (CPP_OPTION (pfile, cplusplus))
1247             {
1248               if (get_effective_char (pfile) == '*')
1249                 result->type = CPP_DEREF_STAR;
1250               else
1251                 BACKUP ();
1252             }
1253         }
1254       else if (c == '-')
1255         result->type = CPP_MINUS_MINUS;
1256       else if (c == '=')
1257         result->type = CPP_MINUS_EQ;
1258       else
1259         {
1260           BACKUP ();
1261           result->type = CPP_MINUS;
1262         }
1263       break;
1264
1265     case '&':
1266       c = get_effective_char (pfile);
1267       if (c == '&')
1268         result->type = CPP_AND_AND;
1269       else if (c == '=')
1270         result->type = CPP_AND_EQ;
1271       else
1272         {
1273           BACKUP ();
1274           result->type = CPP_AND;
1275         }
1276       break;
1277
1278     case '|':
1279       c = get_effective_char (pfile);
1280       if (c == '|')
1281         result->type = CPP_OR_OR;
1282       else if (c == '=')
1283         result->type = CPP_OR_EQ;
1284       else
1285         {
1286           BACKUP ();
1287           result->type = CPP_OR;
1288         }
1289       break;
1290
1291     case ':':
1292       c = get_effective_char (pfile);
1293       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1294         result->type = CPP_SCOPE;
1295       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1296         {
1297           result->flags |= DIGRAPH;
1298           result->type = CPP_CLOSE_SQUARE;
1299         }
1300       else
1301         {
1302           BACKUP ();
1303           result->type = CPP_COLON;
1304         }
1305       break;
1306
1307     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1308     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1309     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1310     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1311     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1312
1313     case '~': result->type = CPP_COMPL; break;
1314     case ',': result->type = CPP_COMMA; break;
1315     case '(': result->type = CPP_OPEN_PAREN; break;
1316     case ')': result->type = CPP_CLOSE_PAREN; break;
1317     case '[': result->type = CPP_OPEN_SQUARE; break;
1318     case ']': result->type = CPP_CLOSE_SQUARE; break;
1319     case '{': result->type = CPP_OPEN_BRACE; break;
1320     case '}': result->type = CPP_CLOSE_BRACE; break;
1321     case ';': result->type = CPP_SEMICOLON; break;
1322
1323       /* @ is a punctuator in Objective C.  */
1324     case '@': result->type = CPP_ATSIGN; break;
1325
1326     case '$':
1327       if (CPP_OPTION (pfile, dollars_in_ident))
1328         goto start_ident;
1329       /* Fall through...  */
1330
1331     random_char:
1332     default:
1333       result->type = CPP_OTHER;
1334       result->val.c = c;
1335       break;
1336     }
1337
1338   return result;
1339 }
1340
1341 /* An upper bound on the number of bytes needed to spell TOKEN,
1342    including preceding whitespace.  */
1343 unsigned int
1344 cpp_token_len (token)
1345      const cpp_token *token;
1346 {
1347   unsigned int len;
1348
1349   switch (TOKEN_SPELL (token))
1350     {
1351     default:            len = 0;                                break;
1352     case SPELL_NUMBER:
1353     case SPELL_STRING:  len = token->val.str.len;               break;
1354     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1355     }
1356   /* 1 for whitespace, 4 for comment delimiters.  */
1357   return len + 5;
1358 }
1359
1360 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1361    already contain the enough space to hold the token's spelling.
1362    Returns a pointer to the character after the last character
1363    written.  */
1364 unsigned char *
1365 cpp_spell_token (pfile, token, buffer)
1366      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1367      const cpp_token *token;
1368      unsigned char *buffer;
1369 {
1370   switch (TOKEN_SPELL (token))
1371     {
1372     case SPELL_OPERATOR:
1373       {
1374         const unsigned char *spelling;
1375         unsigned char c;
1376
1377         if (token->flags & DIGRAPH)
1378           spelling
1379             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1380         else if (token->flags & NAMED_OP)
1381           goto spell_ident;
1382         else
1383           spelling = TOKEN_NAME (token);
1384
1385         while ((c = *spelling++) != '\0')
1386           *buffer++ = c;
1387       }
1388       break;
1389
1390     case SPELL_CHAR:
1391       *buffer++ = token->val.c;
1392       break;
1393
1394     spell_ident:
1395     case SPELL_IDENT:
1396       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1397       buffer += NODE_LEN (token->val.node);
1398       break;
1399
1400     case SPELL_NUMBER:
1401       memcpy (buffer, token->val.str.text, token->val.str.len);
1402       buffer += token->val.str.len;
1403       break;
1404
1405     case SPELL_STRING:
1406       {
1407         int left, right, tag;
1408         switch (token->type)
1409           {
1410           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1411           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1412           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1413           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1414           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1415           default:
1416             cpp_error (pfile, DL_ICE, "unknown string token %s\n",
1417                        TOKEN_NAME (token));
1418             return buffer;
1419           }
1420         if (tag) *buffer++ = tag;
1421         *buffer++ = left;
1422         memcpy (buffer, token->val.str.text, token->val.str.len);
1423         buffer += token->val.str.len;
1424         *buffer++ = right;
1425       }
1426       break;
1427
1428     case SPELL_NONE:
1429       cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
1430       break;
1431     }
1432
1433   return buffer;
1434 }
1435
1436 /* Returns TOKEN spelt as a null-terminated string.  The string is
1437    freed when the reader is destroyed.  Useful for diagnostics.  */
1438 unsigned char *
1439 cpp_token_as_text (pfile, token)
1440      cpp_reader *pfile;
1441      const cpp_token *token;
1442 {
1443   unsigned int len = cpp_token_len (token);
1444   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1445
1446   end = cpp_spell_token (pfile, token, start);
1447   end[0] = '\0';
1448
1449   return start;
1450 }
1451
1452 /* Used by C front ends, which really should move to using
1453    cpp_token_as_text.  */
1454 const char *
1455 cpp_type2name (type)
1456      enum cpp_ttype type;
1457 {
1458   return (const char *) token_spellings[type].name;
1459 }
1460
1461 /* Writes the spelling of token to FP, without any preceding space.
1462    Separated from cpp_spell_token for efficiency - to avoid stdio
1463    double-buffering.  */
1464 void
1465 cpp_output_token (token, fp)
1466      const cpp_token *token;
1467      FILE *fp;
1468 {
1469   switch (TOKEN_SPELL (token))
1470     {
1471     case SPELL_OPERATOR:
1472       {
1473         const unsigned char *spelling;
1474         int c;
1475
1476         if (token->flags & DIGRAPH)
1477           spelling
1478             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1479         else if (token->flags & NAMED_OP)
1480           goto spell_ident;
1481         else
1482           spelling = TOKEN_NAME (token);
1483
1484         c = *spelling;
1485         do
1486           putc (c, fp);
1487         while ((c = *++spelling) != '\0');
1488       }
1489       break;
1490
1491     case SPELL_CHAR:
1492       putc (token->val.c, fp);
1493       break;
1494
1495     spell_ident:
1496     case SPELL_IDENT:
1497       fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1498     break;
1499
1500     case SPELL_NUMBER:
1501       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1502       break;
1503
1504     case SPELL_STRING:
1505       {
1506         int left, right, tag;
1507         switch (token->type)
1508           {
1509           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1510           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1511           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1512           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1513           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1514           default:
1515             fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1516             return;
1517           }
1518         if (tag) putc (tag, fp);
1519         putc (left, fp);
1520         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1521         putc (right, fp);
1522       }
1523       break;
1524
1525     case SPELL_NONE:
1526       /* An error, most probably.  */
1527       break;
1528     }
1529 }
1530
1531 /* Compare two tokens.  */
1532 int
1533 _cpp_equiv_tokens (a, b)
1534      const cpp_token *a, *b;
1535 {
1536   if (a->type == b->type && a->flags == b->flags)
1537     switch (TOKEN_SPELL (a))
1538       {
1539       default:                  /* Keep compiler happy.  */
1540       case SPELL_OPERATOR:
1541         return 1;
1542       case SPELL_CHAR:
1543         return a->val.c == b->val.c; /* Character.  */
1544       case SPELL_NONE:
1545         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1546       case SPELL_IDENT:
1547         return a->val.node == b->val.node;
1548       case SPELL_NUMBER:
1549       case SPELL_STRING:
1550         return (a->val.str.len == b->val.str.len
1551                 && !memcmp (a->val.str.text, b->val.str.text,
1552                             a->val.str.len));
1553       }
1554
1555   return 0;
1556 }
1557
1558 /* Returns nonzero if a space should be inserted to avoid an
1559    accidental token paste for output.  For simplicity, it is
1560    conservative, and occasionally advises a space where one is not
1561    needed, e.g. "." and ".2".  */
1562 int
1563 cpp_avoid_paste (pfile, token1, token2)
1564      cpp_reader *pfile;
1565      const cpp_token *token1, *token2;
1566 {
1567   enum cpp_ttype a = token1->type, b = token2->type;
1568   cppchar_t c;
1569
1570   if (token1->flags & NAMED_OP)
1571     a = CPP_NAME;
1572   if (token2->flags & NAMED_OP)
1573     b = CPP_NAME;
1574
1575   c = EOF;
1576   if (token2->flags & DIGRAPH)
1577     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1578   else if (token_spellings[b].category == SPELL_OPERATOR)
1579     c = token_spellings[b].name[0];
1580
1581   /* Quickly get everything that can paste with an '='.  */
1582   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1583     return 1;
1584
1585   switch (a)
1586     {
1587     case CPP_GREATER:   return c == '>' || c == '?';
1588     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1589     case CPP_PLUS:      return c == '+';
1590     case CPP_MINUS:     return c == '-' || c == '>';
1591     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1592     case CPP_MOD:       return c == ':' || c == '>';
1593     case CPP_AND:       return c == '&';
1594     case CPP_OR:        return c == '|';
1595     case CPP_COLON:     return c == ':' || c == '>';
1596     case CPP_DEREF:     return c == '*';
1597     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1598     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1599     case CPP_NAME:      return ((b == CPP_NUMBER
1600                                  && name_p (pfile, &token2->val.str))
1601                                 || b == CPP_NAME
1602                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1603     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1604                                 || c == '.' || c == '+' || c == '-');
1605     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1606                                 && token1->val.c == '@'
1607                                 && (b == CPP_NAME || b == CPP_STRING));
1608     default:            break;
1609     }
1610
1611   return 0;
1612 }
1613
1614 /* Output all the remaining tokens on the current line, and a newline
1615    character, to FP.  Leading whitespace is removed.  If there are
1616    macros, special token padding is not performed.  */
1617 void
1618 cpp_output_line (pfile, fp)
1619      cpp_reader *pfile;
1620      FILE *fp;
1621 {
1622   const cpp_token *token;
1623
1624   token = cpp_get_token (pfile);
1625   while (token->type != CPP_EOF)
1626     {
1627       cpp_output_token (token, fp);
1628       token = cpp_get_token (pfile);
1629       if (token->flags & PREV_WHITE)
1630         putc (' ', fp);
1631     }
1632
1633   putc ('\n', fp);
1634 }
1635
1636 /* Returns the value of a hexadecimal digit.  */
1637 static unsigned int
1638 hex_digit_value (c)
1639      unsigned int c;
1640 {
1641   if (hex_p (c))
1642     return hex_value (c);
1643   else
1644     abort ();
1645 }
1646
1647 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.  Returns 1 to indicate
1648    failure if cpplib is not parsing C++ or C99.  Such failure is
1649    silent, and no variables are updated.  Otherwise returns 0, and
1650    warns if -Wtraditional.
1651
1652    [lex.charset]: The character designated by the universal character
1653    name \UNNNNNNNN is that character whose character short name in
1654    ISO/IEC 10646 is NNNNNNNN; the character designated by the
1655    universal character name \uNNNN is that character whose character
1656    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
1657    for a universal character name is less than 0x20 or in the range
1658    0x7F-0x9F (inclusive), or if the universal character name
1659    designates a character in the basic source character set, then the
1660    program is ill-formed.
1661
1662    We assume that wchar_t is Unicode, so we don't need to do any
1663    mapping.  Is this ever wrong?
1664
1665    PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1666    LIMIT is the end of the string or charconst.  PSTR is updated to
1667    point after the UCS on return, and the UCS is written into PC.  */
1668
1669 static int
1670 maybe_read_ucs (pfile, pstr, limit, pc)
1671      cpp_reader *pfile;
1672      const unsigned char **pstr;
1673      const unsigned char *limit;
1674      cppchar_t *pc;
1675 {
1676   const unsigned char *p = *pstr;
1677   unsigned int code = 0;
1678   unsigned int c = *pc, length;
1679
1680   /* Only attempt to interpret a UCS for C++ and C99.  */
1681   if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1682     return 1;
1683
1684   if (CPP_WTRADITIONAL (pfile))
1685     cpp_error (pfile, DL_WARNING,
1686                "the meaning of '\\%c' is different in traditional C", c);
1687
1688   length = (c == 'u' ? 4: 8);
1689
1690   if ((size_t) (limit - p) < length)
1691     {
1692       cpp_error (pfile, DL_ERROR, "incomplete universal-character-name");
1693       /* Skip to the end to avoid more diagnostics.  */
1694       p = limit;
1695     }
1696   else
1697     {
1698       for (; length; length--, p++)
1699         {
1700           c = *p;
1701           if (ISXDIGIT (c))
1702             code = (code << 4) + hex_digit_value (c);
1703           else
1704             {
1705               cpp_error (pfile, DL_ERROR,
1706                          "non-hex digit '%c' in universal-character-name", c);
1707               /* We shouldn't skip in case there are multibyte chars.  */
1708               break;
1709             }
1710         }
1711     }
1712
1713 #ifdef TARGET_EBCDIC
1714   cpp_error (pfile, DL_ERROR, "universal-character-name on EBCDIC target");
1715   code = 0x3f;  /* EBCDIC invalid character */
1716 #else
1717  /* True extended characters are OK.  */
1718   if (code >= 0xa0
1719       && !(code & 0x80000000)
1720       && !(code >= 0xD800 && code <= 0xDFFF))
1721     ;
1722   /* The standard permits $, @ and ` to be specified as UCNs.  We use
1723      hex escapes so that this also works with EBCDIC hosts.  */
1724   else if (code == 0x24 || code == 0x40 || code == 0x60)
1725     ;
1726   /* Don't give another error if one occurred above.  */
1727   else if (length == 0)
1728     cpp_error (pfile, DL_ERROR, "universal-character-name out of range");
1729 #endif
1730
1731   *pstr = p;
1732   *pc = code;
1733   return 0;
1734 }
1735
1736 /* Returns the value of an escape sequence, truncated to the correct
1737    target precision.  PSTR points to the input pointer, which is just
1738    after the backslash.  LIMIT is how much text we have.  WIDE is true
1739    if the escape sequence is part of a wide character constant or
1740    string literal.  Handles all relevant diagnostics.  */
1741 cppchar_t
1742 cpp_parse_escape (pfile, pstr, limit, wide)
1743      cpp_reader *pfile;
1744      const unsigned char **pstr;
1745      const unsigned char *limit;
1746      int wide;
1747 {
1748   int unknown = 0;
1749   const unsigned char *str = *pstr;
1750   cppchar_t c, mask;
1751   unsigned int width;
1752
1753   if (wide)
1754     width = CPP_OPTION (pfile, wchar_precision);
1755   else
1756     width = CPP_OPTION (pfile, char_precision);
1757   if (width < BITS_PER_CPPCHAR_T)
1758     mask = ((cppchar_t) 1 << width) - 1;
1759   else
1760     mask = ~0;
1761
1762   c = *str++;
1763   switch (c)
1764     {
1765     case '\\': case '\'': case '"': case '?': break;
1766     case 'b': c = TARGET_BS;      break;
1767     case 'f': c = TARGET_FF;      break;
1768     case 'n': c = TARGET_NEWLINE; break;
1769     case 'r': c = TARGET_CR;      break;
1770     case 't': c = TARGET_TAB;     break;
1771     case 'v': c = TARGET_VT;      break;
1772
1773     case '(': case '{': case '[': case '%':
1774       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1775          '\%' is used to prevent SCCS from getting confused.  */
1776       unknown = CPP_PEDANTIC (pfile);
1777       break;
1778
1779     case 'a':
1780       if (CPP_WTRADITIONAL (pfile))
1781         cpp_error (pfile, DL_WARNING,
1782                    "the meaning of '\\a' is different in traditional C");
1783       c = TARGET_BELL;
1784       break;
1785
1786     case 'e': case 'E':
1787       if (CPP_PEDANTIC (pfile))
1788         cpp_error (pfile, DL_PEDWARN,
1789                    "non-ISO-standard escape sequence, '\\%c'", (int) c);
1790       c = TARGET_ESC;
1791       break;
1792
1793     case 'u': case 'U':
1794       unknown = maybe_read_ucs (pfile, &str, limit, &c);
1795       break;
1796
1797     case 'x':
1798       if (CPP_WTRADITIONAL (pfile))
1799         cpp_error (pfile, DL_WARNING,
1800                    "the meaning of '\\x' is different in traditional C");
1801
1802       {
1803         cppchar_t i = 0, overflow = 0;
1804         int digits_found = 0;
1805
1806         while (str < limit)
1807           {
1808             c = *str;
1809             if (! ISXDIGIT (c))
1810               break;
1811             str++;
1812             overflow |= i ^ (i << 4 >> 4);
1813             i = (i << 4) + hex_digit_value (c);
1814             digits_found = 1;
1815           }
1816
1817         if (!digits_found)
1818           cpp_error (pfile, DL_ERROR,
1819                        "\\x used with no following hex digits");
1820
1821         if (overflow | (i != (i & mask)))
1822           {
1823             cpp_error (pfile, DL_PEDWARN,
1824                        "hex escape sequence out of range");
1825             i &= mask;
1826           }
1827         c = i;
1828       }
1829       break;
1830
1831     case '0':  case '1':  case '2':  case '3':
1832     case '4':  case '5':  case '6':  case '7':
1833       {
1834         size_t count = 0;
1835         cppchar_t i = c - '0';
1836
1837         while (str < limit && ++count < 3)
1838           {
1839             c = *str;
1840             if (c < '0' || c > '7')
1841               break;
1842             str++;
1843             i = (i << 3) + c - '0';
1844           }
1845
1846         if (i != (i & mask))
1847           {
1848             cpp_error (pfile, DL_PEDWARN,
1849                        "octal escape sequence out of range");
1850             i &= mask;
1851           }
1852         c = i;
1853       }
1854       break;
1855
1856     default:
1857       unknown = 1;
1858       break;
1859     }
1860
1861   if (unknown)
1862     {
1863       if (ISGRAPH (c))
1864         cpp_error (pfile, DL_PEDWARN,
1865                    "unknown escape sequence '\\%c'", (int) c);
1866       else
1867         cpp_error (pfile, DL_PEDWARN,
1868                    "unknown escape sequence: '\\%03o'", (int) c);
1869     }
1870
1871   if (c > mask)
1872     {
1873       cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for its type");
1874       c &= mask;
1875     }
1876
1877   *pstr = str;
1878   return c;
1879 }
1880
1881 /* Interpret a (possibly wide) character constant in TOKEN.
1882    WARN_MULTI warns about multi-character charconsts.  PCHARS_SEEN
1883    points to a variable that is filled in with the number of
1884    characters seen, and UNSIGNEDP to a variable that indicates whether
1885    the result has signed type.  */
1886 cppchar_t
1887 cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp)
1888      cpp_reader *pfile;
1889      const cpp_token *token;
1890      unsigned int *pchars_seen;
1891      int *unsignedp;
1892 {
1893   const unsigned char *str = token->val.str.text;
1894   const unsigned char *limit = str + token->val.str.len;
1895   unsigned int chars_seen = 0;
1896   size_t width, max_chars;
1897   cppchar_t c, mask, result = 0;
1898   bool unsigned_p;
1899
1900 #ifdef MULTIBYTE_CHARS
1901   (void) local_mbtowc (NULL, NULL, 0);
1902 #endif
1903
1904   /* Width in bits.  */
1905   if (token->type == CPP_CHAR)
1906     {
1907       width = CPP_OPTION (pfile, char_precision);
1908       max_chars = CPP_OPTION (pfile, int_precision) / width;
1909       unsigned_p = CPP_OPTION (pfile, unsigned_char);
1910     }
1911   else
1912     {
1913       width = CPP_OPTION (pfile, wchar_precision);
1914       max_chars = 1;
1915       unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
1916     }
1917
1918   if (width < BITS_PER_CPPCHAR_T)
1919     mask = ((cppchar_t) 1 << width) - 1;
1920   else
1921     mask = ~0;
1922
1923   while (str < limit)
1924     {
1925 #ifdef MULTIBYTE_CHARS
1926       wchar_t wc;
1927       int char_len;
1928
1929       char_len = local_mbtowc (&wc, str, limit - str);
1930       if (char_len == -1)
1931         {
1932           cpp_error (pfile, DL_WARNING,
1933                      "ignoring invalid multibyte character");
1934           c = *str++;
1935         }
1936       else
1937         {
1938           str += char_len;
1939           c = wc;
1940         }
1941 #else
1942       c = *str++;
1943 #endif
1944
1945       if (c == '\\')
1946         c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
1947
1948 #ifdef MAP_CHARACTER
1949       if (ISPRINT (c))
1950         c = MAP_CHARACTER (c);
1951 #endif
1952
1953       chars_seen++;
1954
1955       /* Truncate the character, scale the result and merge the two.  */
1956       c &= mask;
1957       if (width < BITS_PER_CPPCHAR_T)
1958         result = (result << width) | c;
1959       else
1960         result = c;
1961     }
1962
1963   if (chars_seen == 0)
1964     cpp_error (pfile, DL_ERROR, "empty character constant");
1965   else if (chars_seen > 1)
1966     {
1967       /* Multichar charconsts are of type int and therefore signed.  */
1968       unsigned_p = 0;
1969
1970       if (chars_seen > max_chars)
1971         {
1972           chars_seen = max_chars;
1973           cpp_error (pfile, DL_WARNING,
1974                      "character constant too long for its type");
1975         }
1976       else if (CPP_OPTION (pfile, warn_multichar))
1977         cpp_error (pfile, DL_WARNING, "multi-character character constant");
1978     }
1979
1980   /* Sign-extend or truncate the constant to cppchar_t.  The value is
1981      in WIDTH bits, but for multi-char charconsts it's value is the
1982      full target type's width.  */
1983   if (chars_seen > 1)
1984     width *= max_chars;
1985   if (width < BITS_PER_CPPCHAR_T)
1986     {
1987       mask = ((cppchar_t) 1 << width) - 1;
1988       if (unsigned_p || !(result & (1 << (width - 1))))
1989         result &= mask;
1990       else
1991         result |= ~mask;
1992     }
1993
1994   *pchars_seen = chars_seen;
1995   *unsignedp = unsigned_p;
1996   return result;
1997 }
1998
1999 /* Memory buffers.  Changing these three constants can have a dramatic
2000    effect on performance.  The values here are reasonable defaults,
2001    but might be tuned.  If you adjust them, be sure to test across a
2002    range of uses of cpplib, including heavy nested function-like macro
2003    expansion.  Also check the change in peak memory usage (NJAMD is a
2004    good tool for this).  */
2005 #define MIN_BUFF_SIZE 8000
2006 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
2007 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2008         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2009
2010 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2011   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2012 #endif
2013
2014 struct dummy
2015 {
2016   char c;
2017   union
2018   {
2019     double d;
2020     int *p;
2021   } u;
2022 };
2023
2024 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2025 #define CPP_ALIGN(size, align) (((size) + ((align) - 1)) & ~((align) - 1))
2026
2027 /* Create a new allocation buffer.  Place the control block at the end
2028    of the buffer, so that buffer overflows will cause immediate chaos.  */
2029 static _cpp_buff *
2030 new_buff (len)
2031      size_t len;
2032 {
2033   _cpp_buff *result;
2034   unsigned char *base;
2035
2036   if (len < MIN_BUFF_SIZE)
2037     len = MIN_BUFF_SIZE;
2038   len = CPP_ALIGN (len, DEFAULT_ALIGNMENT);
2039
2040   base = xmalloc (len + sizeof (_cpp_buff));
2041   result = (_cpp_buff *) (base + len);
2042   result->base = base;
2043   result->cur = base;
2044   result->limit = base + len;
2045   result->next = NULL;
2046   return result;
2047 }
2048
2049 /* Place a chain of unwanted allocation buffers on the free list.  */
2050 void
2051 _cpp_release_buff (pfile, buff)
2052      cpp_reader *pfile;
2053      _cpp_buff *buff;
2054 {
2055   _cpp_buff *end = buff;
2056
2057   while (end->next)
2058     end = end->next;
2059   end->next = pfile->free_buffs;
2060   pfile->free_buffs = buff;
2061 }
2062
2063 /* Return a free buffer of size at least MIN_SIZE.  */
2064 _cpp_buff *
2065 _cpp_get_buff (pfile, min_size)
2066      cpp_reader *pfile;
2067      size_t min_size;
2068 {
2069   _cpp_buff *result, **p;
2070
2071   for (p = &pfile->free_buffs;; p = &(*p)->next)
2072     {
2073       size_t size;
2074
2075       if (*p == NULL)
2076         return new_buff (min_size);
2077       result = *p;
2078       size = result->limit - result->base;
2079       /* Return a buffer that's big enough, but don't waste one that's
2080          way too big.  */
2081       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2082         break;
2083     }
2084
2085   *p = result->next;
2086   result->next = NULL;
2087   result->cur = result->base;
2088   return result;
2089 }
2090
2091 /* Creates a new buffer with enough space to hold the uncommitted
2092    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
2093    the excess bytes to the new buffer.  Chains the new buffer after
2094    BUFF, and returns the new buffer.  */
2095 _cpp_buff *
2096 _cpp_append_extend_buff (pfile, buff, min_extra)
2097      cpp_reader *pfile;
2098      _cpp_buff *buff;
2099      size_t min_extra;
2100 {
2101   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2102   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2103
2104   buff->next = new_buff;
2105   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2106   return new_buff;
2107 }
2108
2109 /* Creates a new buffer with enough space to hold the uncommitted
2110    remaining bytes of the buffer pointed to by BUFF, and at least
2111    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
2112    Chains the new buffer before the buffer pointed to by BUFF, and
2113    updates the pointer to point to the new buffer.  */
2114 void
2115 _cpp_extend_buff (pfile, pbuff, min_extra)
2116      cpp_reader *pfile;
2117      _cpp_buff **pbuff;
2118      size_t min_extra;
2119 {
2120   _cpp_buff *new_buff, *old_buff = *pbuff;
2121   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2122
2123   new_buff = _cpp_get_buff (pfile, size);
2124   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2125   new_buff->next = old_buff;
2126   *pbuff = new_buff;
2127 }
2128
2129 /* Free a chain of buffers starting at BUFF.  */
2130 void
2131 _cpp_free_buff (buff)
2132      _cpp_buff *buff;
2133 {
2134   _cpp_buff *next;
2135
2136   for (; buff; buff = next)
2137     {
2138       next = buff->next;
2139       free (buff->base);
2140     }
2141 }
2142
2143 /* Allocate permanent, unaligned storage of length LEN.  */
2144 unsigned char *
2145 _cpp_unaligned_alloc (pfile, len)
2146      cpp_reader *pfile;
2147      size_t len;
2148 {
2149   _cpp_buff *buff = pfile->u_buff;
2150   unsigned char *result = buff->cur;
2151
2152   if (len > (size_t) (buff->limit - result))
2153     {
2154       buff = _cpp_get_buff (pfile, len);
2155       buff->next = pfile->u_buff;
2156       pfile->u_buff = buff;
2157       result = buff->cur;
2158     }
2159
2160   buff->cur = result + len;
2161   return result;
2162 }
2163
2164 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2165    That buffer is used for growing allocations when saving macro
2166    replacement lists in a #define, and when parsing an answer to an
2167    assertion in #assert, #unassert or #if (and therefore possibly
2168    whilst expanding macros).  It therefore must not be used by any
2169    code that they might call: specifically the lexer and the guts of
2170    the macro expander.
2171
2172    All existing other uses clearly fit this restriction: storing
2173    registered pragmas during initialization.  */
2174 unsigned char *
2175 _cpp_aligned_alloc (pfile, len)
2176      cpp_reader *pfile;
2177      size_t len;
2178 {
2179   _cpp_buff *buff = pfile->a_buff;
2180   unsigned char *result = buff->cur;
2181
2182   if (len > (size_t) (buff->limit - result))
2183     {
2184       buff = _cpp_get_buff (pfile, len);
2185       buff->next = pfile->a_buff;
2186       pfile->a_buff = buff;
2187       result = buff->cur;
2188     }
2189
2190   buff->cur = result + len;
2191   return result;
2192 }