gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "cpplib.h"
  26 #include "cpphash.h"
  27
  28 #ifdef MULTIBYTE_CHARS
  29 #include "mbchar.h"
  30 #include <locale.h>
  31 #endif
  32
  33 /* Tokens with SPELL_STRING store their spelling in the token list,
  34    and it's length in the token->val.name.len.  */
  35 enum spell_type
  36 {
  37   SPELL_OPERATOR = 0,
  38   SPELL_CHAR,
  39   SPELL_IDENT,
  40   SPELL_NUMBER,
  41   SPELL_STRING,
  42   SPELL_NONE
  43 };
  44
  45 struct token_spelling
  46 {
  47   enum spell_type category;
  48   const unsigned char *name;
  49 };
  50
  51 static const unsigned char *const digraph_spellings[] =
  52 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
  53
  54 #define OP(e, s) { SPELL_OPERATOR, U s           },
  55 #define TK(e, s) { s,              U STRINGX (e) },
  56 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  57 #undef OP
  58 #undef TK
  59
  60 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  61 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  62 #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
  63
  64 static void handle_newline PARAMS ((cpp_reader *));
  65 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *));
  66 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
  67
  68 static int skip_block_comment PARAMS ((cpp_reader *));
  69 static int skip_line_comment PARAMS ((cpp_reader *));
  70 static void adjust_column PARAMS ((cpp_reader *));
  71 static int skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  72 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
  73 static uchar *parse_slow PARAMS ((cpp_reader *, const uchar *, int,
  74                                   unsigned int *));
  75 static void parse_number PARAMS ((cpp_reader *, cpp_string *, int));
  76 static int unescaped_terminator_p PARAMS ((cpp_reader *, const uchar *));
  77 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  78 static bool trigraph_p PARAMS ((cpp_reader *));
  79 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
  80                                   cppchar_t));
  81 static bool continue_after_nul PARAMS ((cpp_reader *));
  82 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
  83 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
  84                                    const unsigned char *, cppchar_t *));
  85 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
  86
  87 static unsigned int hex_digit_value PARAMS ((unsigned int));
  88 static _cpp_buff *new_buff PARAMS ((size_t));
  89
  90 /* Utility routine:
  91
  92    Compares, the token TOKEN to the NUL-terminated string STRING.
  93    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  94 int
  95 cpp_ideq (token, string)
  96      const cpp_token *token;
  97      const char *string;
  98 {
  99   if (token->type != CPP_NAME)
 100     return 0;
 101
 102   return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
 103 }
 104
 105 /* Call when meeting a newline, assumed to be in buffer->cur[-1].
 106    Returns with buffer->cur pointing to the character immediately
 107    following the newline (combination).  */
 108 static void
 109 handle_newline (pfile)
 110      cpp_reader *pfile;
 111 {
 112   cpp_buffer *buffer = pfile->buffer;
 113
 114   /* Handle CR-LF and LF-CR.  Most other implementations (e.g. java)
 115      only accept CR-LF; maybe we should fall back to that behaviour?  */
 116   if (buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
 117     buffer->cur++;
 118
 119   buffer->line_base = buffer->cur;
 120   buffer->col_adjust = 0;
 121   pfile->line++;
 122 }
 123
 124 /* Subroutine of skip_escaped_newlines; called when a 3-character
 125    sequence beginning with "??" is encountered.  buffer->cur points to
 126    the second '?'.
 127
 128    Warn if necessary, and returns true if the sequence forms a
 129    trigraph and the trigraph should be honoured.  */
 130 static bool
 131 trigraph_p (pfile)
 132      cpp_reader *pfile;
 133 {
 134   cpp_buffer *buffer = pfile->buffer;
 135   cppchar_t from_char = buffer->cur[1];
 136   bool accept;
 137
 138   if (!_cpp_trigraph_map[from_char])
 139     return false;
 140
 141   accept = CPP_OPTION (pfile, trigraphs);
 142
 143   /* Don't warn about trigraphs in comments.  */
 144   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 145     {
 146       if (accept)
 147         cpp_error_with_line (pfile, DL_WARNING,
 148                              pfile->line, CPP_BUF_COL (buffer) - 1,
 149                              "trigraph ??%c converted to %c",
 150                              (int) from_char,
 151                              (int) _cpp_trigraph_map[from_char]);
 152       else if (buffer->cur != buffer->last_Wtrigraphs)
 153         {
 154           buffer->last_Wtrigraphs = buffer->cur;
 155           cpp_error_with_line (pfile, DL_WARNING,
 156                                pfile->line, CPP_BUF_COL (buffer) - 1,
 157                                "trigraph ??%c ignored", (int) from_char);
 158         }
 159     }
 160
 161   return accept;
 162 }
 163
 164 /* Skips any escaped newlines introduced by '?' or a '\\', assumed to
 165    lie in buffer->cur[-1].  Returns the next byte, which will be in
 166    buffer->cur[-1].  This routine performs preprocessing stages 1 and
 167    2 of the ISO C standard.  */
 168 static cppchar_t
 169 skip_escaped_newlines (pfile)
 170      cpp_reader *pfile;
 171 {
 172   cpp_buffer *buffer = pfile->buffer;
 173   cppchar_t next = buffer->cur[-1];
 174
 175   /* Only do this if we apply stages 1 and 2.  */
 176   if (!buffer->from_stage3)
 177     {
 178       const unsigned char *saved_cur;
 179       cppchar_t next1;
 180
 181       do
 182         {
 183           if (next == '?')
 184             {
 185               if (buffer->cur[0] != '?' || !trigraph_p (pfile))
 186                 break;
 187
 188               /* Translate the trigraph.  */
 189               next = _cpp_trigraph_map[buffer->cur[1]];
 190               buffer->cur += 2;
 191               if (next != '\\')
 192                 break;
 193             }
 194
 195           if (buffer->cur == buffer->rlimit)
 196             break;
 197
 198           /* We have a backslash, and room for at least one more
 199              character.  Skip horizontal whitespace.  */
 200           saved_cur = buffer->cur;
 201           do
 202             next1 = *buffer->cur++;
 203           while (is_nvspace (next1) && buffer->cur < buffer->rlimit);
 204
 205           if (!is_vspace (next1))
 206             {
 207               buffer->cur = saved_cur;
 208               break;
 209             }
 210
 211           if (saved_cur != buffer->cur - 1
 212               && !pfile->state.lexing_comment)
 213             cpp_error (pfile, DL_WARNING,
 214                        "backslash and newline separated by space");
 215
 216           handle_newline (pfile);
 217           buffer->backup_to = buffer->cur;
 218           if (buffer->cur == buffer->rlimit)
 219             {
 220               cpp_error (pfile, DL_PEDWARN,
 221                          "backslash-newline at end of file");
 222               next = EOF;
 223             }
 224           else
 225             next = *buffer->cur++;
 226         }
 227       while (next == '\\' || next == '?');
 228     }
 229
 230   return next;
 231 }
 232
 233 /* Obtain the next character, after trigraph conversion and skipping
 234    an arbitrarily long string of escaped newlines.  The common case of
 235    no trigraphs or escaped newlines falls through quickly.  On return,
 236    buffer->backup_to points to where to return to if the character is
 237    not to be processed.  */
 238 static cppchar_t
 239 get_effective_char (pfile)
 240      cpp_reader *pfile;
 241 {
 242   cppchar_t next;
 243   cpp_buffer *buffer = pfile->buffer;
 244
 245   buffer->backup_to = buffer->cur;
 246   next = *buffer->cur++;
 247   if (__builtin_expect (next == '?' || next == '\\', 0))
 248     next = skip_escaped_newlines (pfile);
 249
 250   return next;
 251 }
 252
 253 /* Skip a C-style block comment.  We find the end of the comment by
 254    seeing if an asterisk is before every '/' we encounter.  Returns
 255    non-zero if comment terminated by EOF, zero otherwise.  */
 256 static int
 257 skip_block_comment (pfile)
 258      cpp_reader *pfile;
 259 {
 260   cpp_buffer *buffer = pfile->buffer;
 261   cppchar_t c = EOF, prevc = EOF;
 262
 263   pfile->state.lexing_comment = 1;
 264   while (buffer->cur != buffer->rlimit)
 265     {
 266       prevc = c, c = *buffer->cur++;
 267
 268       /* FIXME: For speed, create a new character class of characters
 269          of interest inside block comments.  */
 270       if (c == '?' || c == '\\')
 271         c = skip_escaped_newlines (pfile);
 272
 273       /* People like decorating comments with '*', so check for '/'
 274          instead for efficiency.  */
 275       if (c == '/')
 276         {
 277           if (prevc == '*')
 278             break;
 279
 280           /* Warn about potential nested comments, but not if the '/'
 281              comes immediately before the true comment delimiter.
 282              Don't bother to get it right across escaped newlines.  */
 283           if (CPP_OPTION (pfile, warn_comments)
 284               && buffer->cur[0] == '*' && buffer->cur[1] != '/')
 285             cpp_error_with_line (pfile, DL_WARNING,
 286                                  pfile->line, CPP_BUF_COL (buffer),
 287                                  "\"/*\" within comment");
 288         }
 289       else if (is_vspace (c))
 290         handle_newline (pfile);
 291       else if (c == '\t')
 292         adjust_column (pfile);
 293     }
 294
 295   pfile->state.lexing_comment = 0;
 296   return c != '/' || prevc != '*';
 297 }
 298
 299 /* Skip a C++ line comment, leaving buffer->cur pointing to the
 300    terminating newline.  Handles escaped newlines.  Returns non-zero
 301    if a multiline comment.  */
 302 static int
 303 skip_line_comment (pfile)
 304      cpp_reader *pfile;
 305 {
 306   cpp_buffer *buffer = pfile->buffer;
 307   unsigned int orig_line = pfile->line;
 308   cppchar_t c;
 309 #ifdef MULTIBYTE_CHARS
 310   wchar_t wc;
 311   int char_len;
 312 #endif
 313
 314   pfile->state.lexing_comment = 1;
 315 #ifdef MULTIBYTE_CHARS
 316   /* Reset multibyte conversion state.  */
 317   (void) local_mbtowc (NULL, NULL, 0);
 318 #endif
 319   do
 320     {
 321       if (buffer->cur == buffer->rlimit)
 322         goto at_eof;
 323
 324 #ifdef MULTIBYTE_CHARS
 325       char_len = local_mbtowc (&wc, (const char *) buffer->cur,
 326                                buffer->rlimit - buffer->cur);
 327       if (char_len == -1)
 328         {
 329           cpp_error (pfile, DL_WARNING,
 330                      "ignoring invalid multibyte character");
 331           char_len = 1;
 332           c = *buffer->cur++;
 333         }
 334       else
 335         {
 336           buffer->cur += char_len;
 337           c = wc;
 338         }
 339 #else
 340       c = *buffer->cur++;
 341 #endif
 342       if (c == '?' || c == '\\')
 343         c = skip_escaped_newlines (pfile);
 344     }
 345   while (!is_vspace (c));
 346
 347   /* Step back over the newline, except at EOF.  */
 348   buffer->cur--;
 349  at_eof:
 350
 351   pfile->state.lexing_comment = 0;
 352   return orig_line != pfile->line;
 353 }
 354
 355 /* pfile->buffer->cur is one beyond the \t character.  Update
 356    col_adjust so we track the column correctly.  */
 357 static void
 358 adjust_column (pfile)
 359      cpp_reader *pfile;
 360 {
 361   cpp_buffer *buffer = pfile->buffer;
 362   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 363
 364   /* Round it up to multiple of the tabstop, but subtract 1 since the
 365      tab itself occupies a character position.  */
 366   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 367                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 368 }
 369
 370 /* Skips whitespace, saving the next non-whitespace character.
 371    Adjusts pfile->col_adjust to account for tabs.  Without this,
 372    tokens might be assigned an incorrect column.  */
 373 static int
 374 skip_whitespace (pfile, c)
 375      cpp_reader *pfile;
 376      cppchar_t c;
 377 {
 378   cpp_buffer *buffer = pfile->buffer;
 379   unsigned int warned = 0;
 380
 381   do
 382     {
 383       /* Horizontal space always OK.  */
 384       if (c == ' ')
 385         ;
 386       else if (c == '\t')
 387         adjust_column (pfile);
 388       /* Just \f \v or \0 left.  */
 389       else if (c == '\0')
 390         {
 391           if (buffer->cur - 1 == buffer->rlimit)
 392             return 0;
 393           if (!warned)
 394             {
 395               cpp_error (pfile, DL_WARNING, "null character(s) ignored");
 396               warned = 1;
 397             }
 398         }
 399       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 400         cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
 401                              CPP_BUF_COL (buffer),
 402                              "%s in preprocessing directive",
 403                              c == '\f' ? "form feed" : "vertical tab");
 404
 405       c = *buffer->cur++;
 406     }
 407   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 408   while (is_nvspace (c));
 409
 410   buffer->cur--;
 411   return 1;
 412 }
 413
 414 /* See if the characters of a number token are valid in a name (no
 415    '.', '+' or '-').  */
 416 static int
 417 name_p (pfile, string)
 418      cpp_reader *pfile;
 419      const cpp_string *string;
 420 {
 421   unsigned int i;
 422
 423   for (i = 0; i < string->len; i++)
 424     if (!is_idchar (string->text[i]))
 425       return 0;
 426
 427   return 1;
 428 }
 429
 430 /* Parse an identifier, skipping embedded backslash-newlines.  This is
 431    a critical inner loop.  The common case is an identifier which has
 432    not been split by backslash-newline, does not contain a dollar
 433    sign, and has already been scanned (roughly 10:1 ratio of
 434    seen:unseen identifiers in normal code; the distribution is
 435    Poisson-like).  Second most common case is a new identifier, not
 436    split and no dollar sign.  The other possibilities are rare and
 437    have been relegated to parse_slow.  */
 438 static cpp_hashnode *
 439 parse_identifier (pfile)
 440      cpp_reader *pfile;
 441 {
 442   cpp_hashnode *result;
 443   const uchar *cur, *base;
 444
 445   /* Fast-path loop.  Skim over a normal identifier.
 446      N.B. ISIDNUM does not include $.  */
 447   cur = pfile->buffer->cur;
 448   while (ISIDNUM (*cur))
 449     cur++;
 450
 451   /* Check for slow-path cases.  */
 452   if (*cur == '?' || *cur == '\\' || *cur == '$')
 453     {
 454       unsigned int len;
 455
 456       base = parse_slow (pfile, cur, 0, &len);
 457       result = (cpp_hashnode *)
 458         ht_lookup (pfile->hash_table, base, len, HT_ALLOCED);
 459     }
 460   else
 461     {
 462       base = pfile->buffer->cur - 1;
 463       pfile->buffer->cur = cur;
 464       result = (cpp_hashnode *)
 465         ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
 466     }
 467
 468   /* Rarely, identifiers require diagnostics when lexed.
 469      XXX Has to be forced out of the fast path.  */
 470   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 471                         && !pfile->state.skipping, 0))
 472     {
 473       /* It is allowed to poison the same identifier twice.  */
 474       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 475         cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
 476                    NODE_NAME (result));
 477
 478       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 479          replacement list of a variadic macro.  */
 480       if (result == pfile->spec_nodes.n__VA_ARGS__
 481           && !pfile->state.va_args_ok)
 482         cpp_error (pfile, DL_PEDWARN,
 483         "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 484     }
 485
 486   return result;
 487 }
 488
 489 /* Slow path.  This handles numbers and identifiers which have been
 490    split, or contain dollar signs.  The part of the token from
 491    PFILE->buffer->cur-1 to CUR has already been scanned.  NUMBER_P is
 492    1 if it's a number, and 2 if it has a leading period.  Returns a
 493    pointer to the token's NUL-terminated spelling in permanent
 494    storage, and sets PLEN to its length.  */
 495 static uchar *
 496 parse_slow (pfile, cur, number_p, plen)
 497      cpp_reader *pfile;
 498      const uchar *cur;
 499      int number_p;
 500      unsigned int *plen;
 501 {
 502   cpp_buffer *buffer = pfile->buffer;
 503   const uchar *base = buffer->cur - 1;
 504   struct obstack *stack = &pfile->hash_table->stack;
 505   unsigned int c, prevc, saw_dollar = 0;
 506
 507   /* Place any leading period.  */
 508   if (number_p == 2)
 509     obstack_1grow (stack, '.');
 510
 511   /* Copy the part of the token which is known to be okay.  */
 512   obstack_grow (stack, base, cur - base);
 513
 514   /* Now process the part which isn't.  We are looking at one of
 515      '$', '\\', or '?' on entry to this loop.  */
 516   prevc = cur[-1];
 517   c = *cur++;
 518   buffer->cur = cur;
 519   for (;;)
 520     {
 521       /* Potential escaped newline?  */
 522       buffer->backup_to = buffer->cur - 1;
 523       if (c == '?' || c == '\\')
 524         c = skip_escaped_newlines (pfile);
 525
 526       if (!is_idchar (c))
 527         {
 528           if (!number_p)
 529             break;
 530           if (c != '.' && !VALID_SIGN (c, prevc))
 531             break;
 532         }
 533
 534       /* Handle normal identifier characters in this loop.  */
 535       do
 536         {
 537           prevc = c;
 538           obstack_1grow (stack, c);
 539
 540           if (c == '$')
 541             saw_dollar++;
 542
 543           c = *buffer->cur++;
 544         }
 545       while (is_idchar (c));
 546     }
 547
 548   /* Step back over the unwanted char.  */
 549   BACKUP ();
 550
 551   /* $ is not an identifier character in the standard, but is commonly
 552      accepted as an extension.  Don't warn about it in skipped
 553      conditional blocks.  */
 554   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
 555     cpp_error (pfile, DL_PEDWARN, "'$' character(s) in identifier or number");
 556
 557   /* Identifiers and numbers are null-terminated.  */
 558   *plen = obstack_object_size (stack);
 559   obstack_1grow (stack, '\0');
 560   return obstack_finish (stack);
 561 }
 562
 563 /* Parse a number, beginning with character C, skipping embedded
 564    backslash-newlines.  LEADING_PERIOD is non-zero if there was a "."
 565    before C.  Place the result in NUMBER.  */
 566 static void
 567 parse_number (pfile, number, leading_period)
 568      cpp_reader *pfile;
 569      cpp_string *number;
 570      int leading_period;
 571 {
 572   const uchar *cur;
 573
 574   /* Fast-path loop.  Skim over a normal number.
 575      N.B. ISIDNUM does not include $.  */
 576   cur = pfile->buffer->cur;
 577   while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
 578     cur++;
 579
 580   /* Check for slow-path cases.  */
 581   if (*cur == '?' || *cur == '\\' || *cur == '$')
 582     number->text = parse_slow (pfile, cur, 1 + leading_period, &number->len);
 583   else
 584     {
 585       const uchar *base = pfile->buffer->cur - 1;
 586       uchar *dest;
 587
 588       number->len = cur - base + leading_period;
 589       dest = _cpp_unaligned_alloc (pfile, number->len + 1);
 590       dest[number->len] = '\0';
 591       number->text = dest;
 592
 593       if (leading_period)
 594         *dest++ = '.';
 595       memcpy (dest, base, cur - base);
 596       pfile->buffer->cur = cur;
 597     }
 598 }
 599
 600 /* Subroutine of parse_string.  */
 601 static int
 602 unescaped_terminator_p (pfile, dest)
 603      cpp_reader *pfile;
 604      const unsigned char *dest;
 605 {
 606   const unsigned char *start, *temp;
 607
 608   /* In #include-style directives, terminators are not escapeable.  */
 609   if (pfile->state.angled_headers)
 610     return 1;
 611
 612   start = BUFF_FRONT (pfile->u_buff);
 613
 614   /* An odd number of consecutive backslashes represents an escaped
 615      terminator.  */
 616   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 617     ;
 618
 619   return ((dest - temp) & 1) == 0;
 620 }
 621
 622 /* Parses a string, character constant, or angle-bracketed header file
 623    name.  Handles embedded trigraphs and escaped newlines.  The stored
 624    string is guaranteed NUL-terminated, but it is not guaranteed that
 625    this is the first NUL since embedded NULs are preserved.
 626
 627    When this function returns, buffer->cur points to the next
 628    character to be processed.  */
 629 static void
 630 parse_string (pfile, token, terminator)
 631      cpp_reader *pfile;
 632      cpp_token *token;
 633      cppchar_t terminator;
 634 {
 635   cpp_buffer *buffer = pfile->buffer;
 636   unsigned char *dest, *limit;
 637   cppchar_t c;
 638   bool warned_nulls = false;
 639 #ifdef MULTIBYTE_CHARS
 640   wchar_t wc;
 641   int char_len;
 642 #endif
 643
 644   dest = BUFF_FRONT (pfile->u_buff);
 645   limit = BUFF_LIMIT (pfile->u_buff);
 646
 647 #ifdef MULTIBYTE_CHARS
 648   /* Reset multibyte conversion state.  */
 649   (void) local_mbtowc (NULL, NULL, 0);
 650 #endif
 651   for (;;)
 652     {
 653       /* We need room for another char, possibly the terminating NUL.  */
 654       if ((size_t) (limit - dest) < 1)
 655         {
 656           size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
 657           _cpp_extend_buff (pfile, &pfile->u_buff, 2);
 658           dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
 659           limit = BUFF_LIMIT (pfile->u_buff);
 660         }
 661
 662 #ifdef MULTIBYTE_CHARS
 663       char_len = local_mbtowc (&wc, (const char *) buffer->cur,
 664                                buffer->rlimit - buffer->cur);
 665       if (char_len == -1)
 666         {
 667           cpp_error (pfile, DL_WARNING,
 668                      "ignoring invalid multibyte character");
 669           char_len = 1;
 670           c = *buffer->cur++;
 671         }
 672       else
 673         {
 674           buffer->cur += char_len;
 675           c = wc;
 676         }
 677 #else
 678       c = *buffer->cur++;
 679 #endif
 680
 681       /* Handle trigraphs, escaped newlines etc.  */
 682       if (c == '?' || c == '\\')
 683         c = skip_escaped_newlines (pfile);
 684
 685       if (c == terminator)
 686         {
 687           if (unescaped_terminator_p (pfile, dest))
 688             break;
 689         }
 690       else if (is_vspace (c))
 691         {
 692           /* No string literal may extend over multiple lines.  In
 693              assembly language, suppress the error except for <>
 694              includes.  This is a kludge around not knowing where
 695              comments are.  */
 696         unterminated:
 697           if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>')
 698             cpp_error (pfile, DL_ERROR, "missing terminating %c character",
 699                        (int) terminator);
 700           buffer->cur--;
 701           break;
 702         }
 703       else if (c == '\0')
 704         {
 705           if (buffer->cur - 1 == buffer->rlimit)
 706             goto unterminated;
 707           if (!warned_nulls)
 708             {
 709               warned_nulls = true;
 710               cpp_error (pfile, DL_WARNING,
 711                          "null character(s) preserved in literal");
 712             }
 713         }
 714 #ifdef MULTIBYTE_CHARS
 715       if (char_len > 1)
 716         {
 717           for ( ; char_len > 0; --char_len)
 718             *dest++ = (*buffer->cur - char_len);
 719         }
 720       else
 721 #endif
 722         *dest++ = c;
 723     }
 724
 725   *dest = '\0';
 726
 727   token->val.str.text = BUFF_FRONT (pfile->u_buff);
 728   token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
 729   BUFF_FRONT (pfile->u_buff) = dest + 1;
 730 }
 731
 732 /* The stored comment includes the comment start and any terminator.  */
 733 static void
 734 save_comment (pfile, token, from, type)
 735      cpp_reader *pfile;
 736      cpp_token *token;
 737      const unsigned char *from;
 738      cppchar_t type;
 739 {
 740   unsigned char *buffer;
 741   unsigned int len, clen;
 742
 743   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 744
 745   /* C++ comments probably (not definitely) have moved past a new
 746      line, which we don't want to save in the comment.  */
 747   if (is_vspace (pfile->buffer->cur[-1]))
 748     len--;
 749
 750   /* If we are currently in a directive, then we need to store all
 751      C++ comments as C comments internally, and so we need to
 752      allocate a little extra space in that case.
 753
 754      Note that the only time we encounter a directive here is
 755      when we are saving comments in a "#define".  */
 756   clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
 757
 758   buffer = _cpp_unaligned_alloc (pfile, clen);
 759
 760   token->type = CPP_COMMENT;
 761   token->val.str.len = clen;
 762   token->val.str.text = buffer;
 763
 764   buffer[0] = '/';
 765   memcpy (buffer + 1, from, len - 1);
 766
 767   /* Finish conversion to a C comment, if necessary.  */
 768   if (pfile->state.in_directive && type == '/')
 769     {
 770       buffer[1] = '*';
 771       buffer[clen - 2] = '*';
 772       buffer[clen - 1] = '/';
 773     }
 774 }
 775
 776 /* Allocate COUNT tokens for RUN.  */
 777 void
 778 _cpp_init_tokenrun (run, count)
 779      tokenrun *run;
 780      unsigned int count;
 781 {
 782   run->base = xnewvec (cpp_token, count);
 783   run->limit = run->base + count;
 784   run->next = NULL;
 785 }
 786
 787 /* Returns the next tokenrun, or creates one if there is none.  */
 788 static tokenrun *
 789 next_tokenrun (run)
 790      tokenrun *run;
 791 {
 792   if (run->next == NULL)
 793     {
 794       run->next = xnew (tokenrun);
 795       run->next->prev = run;
 796       _cpp_init_tokenrun (run->next, 250);
 797     }
 798
 799   return run->next;
 800 }
 801
 802 /* Allocate a single token that is invalidated at the same time as the
 803    rest of the tokens on the line.  Has its line and col set to the
 804    same as the last lexed token, so that diagnostics appear in the
 805    right place.  */
 806 cpp_token *
 807 _cpp_temp_token (pfile)
 808      cpp_reader *pfile;
 809 {
 810   cpp_token *old, *result;
 811
 812   old = pfile->cur_token - 1;
 813   if (pfile->cur_token == pfile->cur_run->limit)
 814     {
 815       pfile->cur_run = next_tokenrun (pfile->cur_run);
 816       pfile->cur_token = pfile->cur_run->base;
 817     }
 818
 819   result = pfile->cur_token++;
 820   result->line = old->line;
 821   result->col = old->col;
 822   return result;
 823 }
 824
 825 /* Lex a token into RESULT (external interface).  Takes care of issues
 826    like directive handling, token lookahead, multiple include
 827    optimization and skipping.  */
 828 const cpp_token *
 829 _cpp_lex_token (pfile)
 830      cpp_reader *pfile;
 831 {
 832   cpp_token *result;
 833
 834   for (;;)
 835     {
 836       if (pfile->cur_token == pfile->cur_run->limit)
 837         {
 838           pfile->cur_run = next_tokenrun (pfile->cur_run);
 839           pfile->cur_token = pfile->cur_run->base;
 840         }
 841
 842       if (pfile->lookaheads)
 843         {
 844           pfile->lookaheads--;
 845           result = pfile->cur_token++;
 846         }
 847       else
 848         result = _cpp_lex_direct (pfile);
 849
 850       if (result->flags & BOL)
 851         {
 852           /* Is this a directive.  If _cpp_handle_directive returns
 853              false, it is an assembler #.  */
 854           if (result->type == CPP_HASH
 855               /* 6.10.3 p 11: Directives in a list of macro arguments
 856                  gives undefined behavior.  This implementation
 857                  handles the directive as normal.  */
 858               && pfile->state.parsing_args != 1
 859               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
 860             continue;
 861           if (pfile->cb.line_change && !pfile->state.skipping)
 862             (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
 863         }
 864
 865       /* We don't skip tokens in directives.  */
 866       if (pfile->state.in_directive)
 867         break;
 868
 869       /* Outside a directive, invalidate controlling macros.  At file
 870          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
 871          get here and MI optimisation works.  */
 872       pfile->mi_valid = false;
 873
 874       if (!pfile->state.skipping || result->type == CPP_EOF)
 875         break;
 876     }
 877
 878   return result;
 879 }
 880
 881 /* A NUL terminates the current buffer.  For ISO preprocessing this is
 882    EOF, but for traditional preprocessing it indicates we need a line
 883    refill.  Returns TRUE to continue preprocessing a new buffer, FALSE
 884    to return a CPP_EOF to the caller.  */
 885 static bool
 886 continue_after_nul (pfile)
 887      cpp_reader *pfile;
 888 {
 889   cpp_buffer *buffer = pfile->buffer;
 890   bool more = false;
 891
 892   buffer->saved_flags = BOL;
 893   if (CPP_OPTION (pfile, traditional))
 894     {
 895       if (pfile->state.in_directive)
 896         return false;
 897
 898       _cpp_remove_overlay (pfile);
 899       more = _cpp_read_logical_line_trad (pfile);
 900       _cpp_overlay_buffer (pfile, pfile->out.base,
 901                            pfile->out.cur - pfile->out.base);
 902       pfile->line = pfile->out.first_line;
 903     }
 904   else
 905     {
 906       /* Stop parsing arguments with a CPP_EOF.  When we finally come
 907          back here, do the work of popping the buffer.  */
 908       if (!pfile->state.parsing_args)
 909         {
 910           if (buffer->cur != buffer->line_base)
 911             {
 912               /* Non-empty files should end in a newline.  Don't warn
 913                  for command line and _Pragma buffers.  */
 914               if (!buffer->from_stage3)
 915                 cpp_error (pfile, DL_PEDWARN, "no newline at end of file");
 916               handle_newline (pfile);
 917             }
 918
 919           /* Similarly, finish an in-progress directive with CPP_EOF
 920              before popping the buffer.  */
 921           if (!pfile->state.in_directive && buffer->prev)
 922             {
 923               more = !buffer->return_at_eof;
 924               _cpp_pop_buffer (pfile);
 925             }
 926         }
 927     }
 928
 929   return more;
 930 }
 931
 932 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)  \
 933   do {                                          \
 934     if (get_effective_char (pfile) == CHAR)     \
 935       result->type = THEN_TYPE;                 \
 936     else                                        \
 937       {                                         \
 938         BACKUP ();                              \
 939         result->type = ELSE_TYPE;               \
 940       }                                         \
 941   } while (0)
 942
 943 /* Lex a token into pfile->cur_token, which is also incremented, to
 944    get diagnostics pointing to the correct location.
 945
 946    Does not handle issues such as token lookahead, multiple-include
 947    optimisation, directives, skipping etc.  This function is only
 948    suitable for use by _cpp_lex_token, and in special cases like
 949    lex_expansion_token which doesn't care for any of these issues.
 950
 951    When meeting a newline, returns CPP_EOF if parsing a directive,
 952    otherwise returns to the start of the token buffer if permissible.
 953    Returns the location of the lexed token.  */
 954 cpp_token *
 955 _cpp_lex_direct (pfile)
 956      cpp_reader *pfile;
 957 {
 958   cppchar_t c;
 959   cpp_buffer *buffer;
 960   const unsigned char *comment_start;
 961   cpp_token *result = pfile->cur_token++;
 962
 963  fresh_line:
 964   buffer = pfile->buffer;
 965   result->flags = buffer->saved_flags;
 966   buffer->saved_flags = 0;
 967  update_tokens_line:
 968   result->line = pfile->line;
 969
 970  skipped_white:
 971   c = *buffer->cur++;
 972   result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
 973
 974  trigraph:
 975   switch (c)
 976     {
 977     case ' ': case '\t': case '\f': case '\v': case '\0':
 978       result->flags |= PREV_WHITE;
 979       if (skip_whitespace (pfile, c))
 980         goto skipped_white;
 981
 982       /* End of buffer.  */
 983       buffer->cur--;
 984       if (continue_after_nul (pfile))
 985         goto fresh_line;
 986       result->type = CPP_EOF;
 987       break;
 988
 989     case '\n': case '\r':
 990       handle_newline (pfile);
 991       buffer->saved_flags = BOL;
 992       if (! pfile->state.in_directive)
 993         {
 994           if (pfile->state.parsing_args == 2)
 995             buffer->saved_flags |= PREV_WHITE;
 996           if (!pfile->keep_tokens)
 997             {
 998               pfile->cur_run = &pfile->base_run;
 999               result = pfile->base_run.base;
1000               pfile->cur_token = result + 1;
1001             }
1002           goto fresh_line;
1003         }
1004       result->type = CPP_EOF;
1005       break;
1006
1007     case '?':
1008     case '\\':
1009       /* These could start an escaped newline, or '?' a trigraph.  Let
1010          skip_escaped_newlines do all the work.  */
1011       {
1012         unsigned int line = pfile->line;
1013
1014         c = skip_escaped_newlines (pfile);
1015         if (line != pfile->line)
1016           {
1017             buffer->cur--;
1018             /* We had at least one escaped newline of some sort.
1019                Update the token's line and column.  */
1020             goto update_tokens_line;
1021           }
1022       }
1023
1024       /* We are either the original '?' or '\\', or a trigraph.  */
1025       if (c == '?')
1026         result->type = CPP_QUERY;
1027       else if (c == '\\')
1028         goto random_char;
1029       else
1030         goto trigraph;
1031       break;
1032
1033     case '0': case '1': case '2': case '3': case '4':
1034     case '5': case '6': case '7': case '8': case '9':
1035       result->type = CPP_NUMBER;
1036       parse_number (pfile, &result->val.str, 0);
1037       break;
1038
1039     case 'L':
1040       /* 'L' may introduce wide characters or strings.  */
1041       {
1042         const unsigned char *pos = buffer->cur;
1043
1044         c = get_effective_char (pfile);
1045         if (c == '\'' || c == '"')
1046           {
1047             result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1048             parse_string (pfile, result, c);
1049             break;
1050           }
1051         buffer->cur = pos;
1052       }
1053       /* Fall through.  */
1054
1055     start_ident:
1056     case '_':
1057     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1058     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1059     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1060     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1061     case 'y': case 'z':
1062     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1063     case 'G': case 'H': case 'I': case 'J': case 'K':
1064     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1065     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1066     case 'Y': case 'Z':
1067       result->type = CPP_NAME;
1068       result->val.node = parse_identifier (pfile);
1069
1070       /* Convert named operators to their proper types.  */
1071       if (result->val.node->flags & NODE_OPERATOR)
1072         {
1073           result->flags |= NAMED_OP;
1074           result->type = result->val.node->value.operator;
1075         }
1076       break;
1077
1078     case '\'':
1079     case '"':
1080       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1081       parse_string (pfile, result, c);
1082       break;
1083
1084     case '/':
1085       /* A potential block or line comment.  */
1086       comment_start = buffer->cur;
1087       c = get_effective_char (pfile);
1088
1089       if (c == '*')
1090         {
1091           if (skip_block_comment (pfile))
1092             cpp_error (pfile, DL_ERROR, "unterminated comment");
1093         }
1094       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1095                             || CPP_IN_SYSTEM_HEADER (pfile)))
1096         {
1097           /* Warn about comments only if pedantically GNUC89, and not
1098              in system headers.  */
1099           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1100               && ! buffer->warned_cplusplus_comments)
1101             {
1102               cpp_error (pfile, DL_PEDWARN,
1103                          "C++ style comments are not allowed in ISO C89");
1104               cpp_error (pfile, DL_PEDWARN,
1105                          "(this will be reported only once per input file)");
1106               buffer->warned_cplusplus_comments = 1;
1107             }
1108
1109           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1110             cpp_error (pfile, DL_WARNING, "multi-line comment");
1111         }
1112       else if (c == '=')
1113         {
1114           result->type = CPP_DIV_EQ;
1115           break;
1116         }
1117       else
1118         {
1119           BACKUP ();
1120           result->type = CPP_DIV;
1121           break;
1122         }
1123
1124       if (!pfile->state.save_comments)
1125         {
1126           result->flags |= PREV_WHITE;
1127           goto update_tokens_line;
1128         }
1129
1130       /* Save the comment as a token in its own right.  */
1131       save_comment (pfile, result, comment_start, c);
1132       break;
1133
1134     case '<':
1135       if (pfile->state.angled_headers)
1136         {
1137           result->type = CPP_HEADER_NAME;
1138           parse_string (pfile, result, '>');
1139           break;
1140         }
1141
1142       c = get_effective_char (pfile);
1143       if (c == '=')
1144         result->type = CPP_LESS_EQ;
1145       else if (c == '<')
1146         IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1147       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1148         IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1149       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1150         {
1151           result->type = CPP_OPEN_SQUARE;
1152           result->flags |= DIGRAPH;
1153         }
1154       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1155         {
1156           result->type = CPP_OPEN_BRACE;
1157           result->flags |= DIGRAPH;
1158         }
1159       else
1160         {
1161           BACKUP ();
1162           result->type = CPP_LESS;
1163         }
1164       break;
1165
1166     case '>':
1167       c = get_effective_char (pfile);
1168       if (c == '=')
1169         result->type = CPP_GREATER_EQ;
1170       else if (c == '>')
1171         IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1172       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1173         IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1174       else
1175         {
1176           BACKUP ();
1177           result->type = CPP_GREATER;
1178         }
1179       break;
1180
1181     case '%':
1182       c = get_effective_char (pfile);
1183       if (c == '=')
1184         result->type = CPP_MOD_EQ;
1185       else if (CPP_OPTION (pfile, digraphs) && c == ':')
1186         {
1187           result->flags |= DIGRAPH;
1188           result->type = CPP_HASH;
1189           if (get_effective_char (pfile) == '%')
1190             {
1191               const unsigned char *pos = buffer->cur;
1192
1193               if (get_effective_char (pfile) == ':')
1194                 result->type = CPP_PASTE;
1195               else
1196                 buffer->cur = pos - 1;
1197             }
1198           else
1199             BACKUP ();
1200         }
1201       else if (CPP_OPTION (pfile, digraphs) && c == '>')
1202         {
1203           result->flags |= DIGRAPH;
1204           result->type = CPP_CLOSE_BRACE;
1205         }
1206       else
1207         {
1208           BACKUP ();
1209           result->type = CPP_MOD;
1210         }
1211       break;
1212
1213     case '.':
1214       result->type = CPP_DOT;
1215       c = get_effective_char (pfile);
1216       if (c == '.')
1217         {
1218           const unsigned char *pos = buffer->cur;
1219
1220           if (get_effective_char (pfile) == '.')
1221             result->type = CPP_ELLIPSIS;
1222           else
1223             buffer->cur = pos - 1;
1224         }
1225       /* All known character sets have 0...9 contiguous.  */
1226       else if (ISDIGIT (c))
1227         {
1228           result->type = CPP_NUMBER;
1229           parse_number (pfile, &result->val.str, 1);
1230         }
1231       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1232         result->type = CPP_DOT_STAR;
1233       else
1234         BACKUP ();
1235       break;
1236
1237     case '+':
1238       c = get_effective_char (pfile);
1239       if (c == '+')
1240         result->type = CPP_PLUS_PLUS;
1241       else if (c == '=')
1242         result->type = CPP_PLUS_EQ;
1243       else
1244         {
1245           BACKUP ();
1246           result->type = CPP_PLUS;
1247         }
1248       break;
1249
1250     case '-':
1251       c = get_effective_char (pfile);
1252       if (c == '>')
1253         {
1254           result->type = CPP_DEREF;
1255           if (CPP_OPTION (pfile, cplusplus))
1256             {
1257               if (get_effective_char (pfile) == '*')
1258                 result->type = CPP_DEREF_STAR;
1259               else
1260                 BACKUP ();
1261             }
1262         }
1263       else if (c == '-')
1264         result->type = CPP_MINUS_MINUS;
1265       else if (c == '=')
1266         result->type = CPP_MINUS_EQ;
1267       else
1268         {
1269           BACKUP ();
1270           result->type = CPP_MINUS;
1271         }
1272       break;
1273
1274     case '&':
1275       c = get_effective_char (pfile);
1276       if (c == '&')
1277         result->type = CPP_AND_AND;
1278       else if (c == '=')
1279         result->type = CPP_AND_EQ;
1280       else
1281         {
1282           BACKUP ();
1283           result->type = CPP_AND;
1284         }
1285       break;
1286
1287     case '|':
1288       c = get_effective_char (pfile);
1289       if (c == '|')
1290         result->type = CPP_OR_OR;
1291       else if (c == '=')
1292         result->type = CPP_OR_EQ;
1293       else
1294         {
1295           BACKUP ();
1296           result->type = CPP_OR;
1297         }
1298       break;
1299
1300     case ':':
1301       c = get_effective_char (pfile);
1302       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1303         result->type = CPP_SCOPE;
1304       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1305         {
1306           result->flags |= DIGRAPH;
1307           result->type = CPP_CLOSE_SQUARE;
1308         }
1309       else
1310         {
1311           BACKUP ();
1312           result->type = CPP_COLON;
1313         }
1314       break;
1315
1316     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1317     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1318     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1319     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1320     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1321
1322     case '~': result->type = CPP_COMPL; break;
1323     case ',': result->type = CPP_COMMA; break;
1324     case '(': result->type = CPP_OPEN_PAREN; break;
1325     case ')': result->type = CPP_CLOSE_PAREN; break;
1326     case '[': result->type = CPP_OPEN_SQUARE; break;
1327     case ']': result->type = CPP_CLOSE_SQUARE; break;
1328     case '{': result->type = CPP_OPEN_BRACE; break;
1329     case '}': result->type = CPP_CLOSE_BRACE; break;
1330     case ';': result->type = CPP_SEMICOLON; break;
1331
1332       /* @ is a punctuator in Objective C.  */
1333     case '@': result->type = CPP_ATSIGN; break;
1334
1335     case '$':
1336       if (CPP_OPTION (pfile, dollars_in_ident))
1337         goto start_ident;
1338       /* Fall through...  */
1339
1340     random_char:
1341     default:
1342       result->type = CPP_OTHER;
1343       result->val.c = c;
1344       break;
1345     }
1346
1347   return result;
1348 }
1349
1350 /* An upper bound on the number of bytes needed to spell TOKEN,
1351    including preceding whitespace.  */
1352 unsigned int
1353 cpp_token_len (token)
1354      const cpp_token *token;
1355 {
1356   unsigned int len;
1357
1358   switch (TOKEN_SPELL (token))
1359     {
1360     default:            len = 0;                                break;
1361     case SPELL_NUMBER:
1362     case SPELL_STRING:  len = token->val.str.len;               break;
1363     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1364     }
1365   /* 1 for whitespace, 4 for comment delimiters.  */
1366   return len + 5;
1367 }
1368
1369 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1370    already contain the enough space to hold the token's spelling.
1371    Returns a pointer to the character after the last character
1372    written.  */
1373 unsigned char *
1374 cpp_spell_token (pfile, token, buffer)
1375      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1376      const cpp_token *token;
1377      unsigned char *buffer;
1378 {
1379   switch (TOKEN_SPELL (token))
1380     {
1381     case SPELL_OPERATOR:
1382       {
1383         const unsigned char *spelling;
1384         unsigned char c;
1385
1386         if (token->flags & DIGRAPH)
1387           spelling
1388             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1389         else if (token->flags & NAMED_OP)
1390           goto spell_ident;
1391         else
1392           spelling = TOKEN_NAME (token);
1393
1394         while ((c = *spelling++) != '\0')
1395           *buffer++ = c;
1396       }
1397       break;
1398
1399     case SPELL_CHAR:
1400       *buffer++ = token->val.c;
1401       break;
1402
1403     spell_ident:
1404     case SPELL_IDENT:
1405       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1406       buffer += NODE_LEN (token->val.node);
1407       break;
1408
1409     case SPELL_NUMBER:
1410       memcpy (buffer, token->val.str.text, token->val.str.len);
1411       buffer += token->val.str.len;
1412       break;
1413
1414     case SPELL_STRING:
1415       {
1416         int left, right, tag;
1417         switch (token->type)
1418           {
1419           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1420           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1421           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1422           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1423           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1424           default:
1425             cpp_error (pfile, DL_ICE, "unknown string token %s\n",
1426                        TOKEN_NAME (token));
1427             return buffer;
1428           }
1429         if (tag) *buffer++ = tag;
1430         *buffer++ = left;
1431         memcpy (buffer, token->val.str.text, token->val.str.len);
1432         buffer += token->val.str.len;
1433         *buffer++ = right;
1434       }
1435       break;
1436
1437     case SPELL_NONE:
1438       cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
1439       break;
1440     }
1441
1442   return buffer;
1443 }
1444
1445 /* Returns TOKEN spelt as a null-terminated string.  The string is
1446    freed when the reader is destroyed.  Useful for diagnostics.  */
1447 unsigned char *
1448 cpp_token_as_text (pfile, token)
1449      cpp_reader *pfile;
1450      const cpp_token *token;
1451 {
1452   unsigned int len = cpp_token_len (token);
1453   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1454
1455   end = cpp_spell_token (pfile, token, start);
1456   end[0] = '\0';
1457
1458   return start;
1459 }
1460
1461 /* Used by C front ends, which really should move to using
1462    cpp_token_as_text.  */
1463 const char *
1464 cpp_type2name (type)
1465      enum cpp_ttype type;
1466 {
1467   return (const char *) token_spellings[type].name;
1468 }
1469
1470 /* Writes the spelling of token to FP, without any preceding space.
1471    Separated from cpp_spell_token for efficiency - to avoid stdio
1472    double-buffering.  */
1473 void
1474 cpp_output_token (token, fp)
1475      const cpp_token *token;
1476      FILE *fp;
1477 {
1478   switch (TOKEN_SPELL (token))
1479     {
1480     case SPELL_OPERATOR:
1481       {
1482         const unsigned char *spelling;
1483         int c;
1484
1485         if (token->flags & DIGRAPH)
1486           spelling
1487             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1488         else if (token->flags & NAMED_OP)
1489           goto spell_ident;
1490         else
1491           spelling = TOKEN_NAME (token);
1492
1493         c = *spelling;
1494         do
1495           putc (c, fp);
1496         while ((c = *++spelling) != '\0');
1497       }
1498       break;
1499
1500     case SPELL_CHAR:
1501       putc (token->val.c, fp);
1502       break;
1503
1504     spell_ident:
1505     case SPELL_IDENT:
1506       fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1507     break;
1508
1509     case SPELL_NUMBER:
1510       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1511       break;
1512
1513     case SPELL_STRING:
1514       {
1515         int left, right, tag;
1516         switch (token->type)
1517           {
1518           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1519           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1520           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1521           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1522           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1523           default:
1524             fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1525             return;
1526           }
1527         if (tag) putc (tag, fp);
1528         putc (left, fp);
1529         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1530         putc (right, fp);
1531       }
1532       break;
1533
1534     case SPELL_NONE:
1535       /* An error, most probably.  */
1536       break;
1537     }
1538 }
1539
1540 /* Compare two tokens.  */
1541 int
1542 _cpp_equiv_tokens (a, b)
1543      const cpp_token *a, *b;
1544 {
1545   if (a->type == b->type && a->flags == b->flags)
1546     switch (TOKEN_SPELL (a))
1547       {
1548       default:                  /* Keep compiler happy.  */
1549       case SPELL_OPERATOR:
1550         return 1;
1551       case SPELL_CHAR:
1552         return a->val.c == b->val.c; /* Character.  */
1553       case SPELL_NONE:
1554         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1555       case SPELL_IDENT:
1556         return a->val.node == b->val.node;
1557       case SPELL_NUMBER:
1558       case SPELL_STRING:
1559         return (a->val.str.len == b->val.str.len
1560                 && !memcmp (a->val.str.text, b->val.str.text,
1561                             a->val.str.len));
1562       }
1563
1564   return 0;
1565 }
1566
1567 /* Returns nonzero if a space should be inserted to avoid an
1568    accidental token paste for output.  For simplicity, it is
1569    conservative, and occasionally advises a space where one is not
1570    needed, e.g. "." and ".2".  */
1571 int
1572 cpp_avoid_paste (pfile, token1, token2)
1573      cpp_reader *pfile;
1574      const cpp_token *token1, *token2;
1575 {
1576   enum cpp_ttype a = token1->type, b = token2->type;
1577   cppchar_t c;
1578
1579   if (token1->flags & NAMED_OP)
1580     a = CPP_NAME;
1581   if (token2->flags & NAMED_OP)
1582     b = CPP_NAME;
1583
1584   c = EOF;
1585   if (token2->flags & DIGRAPH)
1586     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1587   else if (token_spellings[b].category == SPELL_OPERATOR)
1588     c = token_spellings[b].name[0];
1589
1590   /* Quickly get everything that can paste with an '='.  */
1591   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1592     return 1;
1593
1594   switch (a)
1595     {
1596     case CPP_GREATER:   return c == '>' || c == '?';
1597     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1598     case CPP_PLUS:      return c == '+';
1599     case CPP_MINUS:     return c == '-' || c == '>';
1600     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1601     case CPP_MOD:       return c == ':' || c == '>';
1602     case CPP_AND:       return c == '&';
1603     case CPP_OR:        return c == '|';
1604     case CPP_COLON:     return c == ':' || c == '>';
1605     case CPP_DEREF:     return c == '*';
1606     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1607     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1608     case CPP_NAME:      return ((b == CPP_NUMBER
1609                                  && name_p (pfile, &token2->val.str))
1610                                 || b == CPP_NAME
1611                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1612     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1613                                 || c == '.' || c == '+' || c == '-');
1614     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1615                                 && token1->val.c == '@'
1616                                 && (b == CPP_NAME || b == CPP_STRING));
1617     default:            break;
1618     }
1619
1620   return 0;
1621 }
1622
1623 /* Output all the remaining tokens on the current line, and a newline
1624    character, to FP.  Leading whitespace is removed.  If there are
1625    macros, special token padding is not performed.  */
1626 void
1627 cpp_output_line (pfile, fp)
1628      cpp_reader *pfile;
1629      FILE *fp;
1630 {
1631   const cpp_token *token;
1632
1633   token = cpp_get_token (pfile);
1634   while (token->type != CPP_EOF)
1635     {
1636       cpp_output_token (token, fp);
1637       token = cpp_get_token (pfile);
1638       if (token->flags & PREV_WHITE)
1639         putc (' ', fp);
1640     }
1641
1642   putc ('\n', fp);
1643 }
1644
1645 /* Returns the value of a hexadecimal digit.  */
1646 static unsigned int
1647 hex_digit_value (c)
1648      unsigned int c;
1649 {
1650   if (hex_p (c))
1651     return hex_value (c);
1652   else
1653     abort ();
1654 }
1655
1656 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.  Returns 1 to indicate
1657    failure if cpplib is not parsing C++ or C99.  Such failure is
1658    silent, and no variables are updated.  Otherwise returns 0, and
1659    warns if -Wtraditional.
1660
1661    [lex.charset]: The character designated by the universal character
1662    name \UNNNNNNNN is that character whose character short name in
1663    ISO/IEC 10646 is NNNNNNNN; the character designated by the
1664    universal character name \uNNNN is that character whose character
1665    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
1666    for a universal character name is less than 0x20 or in the range
1667    0x7F-0x9F (inclusive), or if the universal character name
1668    designates a character in the basic source character set, then the
1669    program is ill-formed.
1670
1671    We assume that wchar_t is Unicode, so we don't need to do any
1672    mapping.  Is this ever wrong?
1673
1674    PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1675    LIMIT is the end of the string or charconst.  PSTR is updated to
1676    point after the UCS on return, and the UCS is written into PC.  */
1677
1678 static int
1679 maybe_read_ucs (pfile, pstr, limit, pc)
1680      cpp_reader *pfile;
1681      const unsigned char **pstr;
1682      const unsigned char *limit;
1683      cppchar_t *pc;
1684 {
1685   const unsigned char *p = *pstr;
1686   unsigned int code = 0;
1687   unsigned int c = *pc, length;
1688
1689   /* Only attempt to interpret a UCS for C++ and C99.  */
1690   if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1691     return 1;
1692
1693   if (CPP_WTRADITIONAL (pfile))
1694     cpp_error (pfile, DL_WARNING,
1695                "the meaning of '\\%c' is different in traditional C", c);
1696
1697   length = (c == 'u' ? 4: 8);
1698
1699   if ((size_t) (limit - p) < length)
1700     {
1701       cpp_error (pfile, DL_ERROR, "incomplete universal-character-name");
1702       /* Skip to the end to avoid more diagnostics.  */
1703       p = limit;
1704     }
1705   else
1706     {
1707       for (; length; length--, p++)
1708         {
1709           c = *p;
1710           if (ISXDIGIT (c))
1711             code = (code << 4) + hex_digit_value (c);
1712           else
1713             {
1714               cpp_error (pfile, DL_ERROR,
1715                          "non-hex digit '%c' in universal-character-name", c);
1716               /* We shouldn't skip in case there are multibyte chars.  */
1717               break;
1718             }
1719         }
1720     }
1721
1722 #ifdef TARGET_EBCDIC
1723   cpp_error (pfile, DL_ERROR, "universal-character-name on EBCDIC target");
1724   code = 0x3f;  /* EBCDIC invalid character */
1725 #else
1726  /* True extended characters are OK.  */
1727   if (code >= 0xa0
1728       && !(code & 0x80000000)
1729       && !(code >= 0xD800 && code <= 0xDFFF))
1730     ;
1731   /* The standard permits $, @ and ` to be specified as UCNs.  We use
1732      hex escapes so that this also works with EBCDIC hosts.  */
1733   else if (code == 0x24 || code == 0x40 || code == 0x60)
1734     ;
1735   /* Don't give another error if one occurred above.  */
1736   else if (length == 0)
1737     cpp_error (pfile, DL_ERROR, "universal-character-name out of range");
1738 #endif
1739
1740   *pstr = p;
1741   *pc = code;
1742   return 0;
1743 }
1744
1745 /* Returns the value of an escape sequence, truncated to the correct
1746    target precision.  PSTR points to the input pointer, which is just
1747    after the backslash.  LIMIT is how much text we have.  WIDE is true
1748    if the escape sequence is part of a wide character constant or
1749    string literal.  Handles all relevant diagnostics.  */
1750 cppchar_t
1751 cpp_parse_escape (pfile, pstr, limit, wide)
1752      cpp_reader *pfile;
1753      const unsigned char **pstr;
1754      const unsigned char *limit;
1755      int wide;
1756 {
1757   int unknown = 0;
1758   const unsigned char *str = *pstr;
1759   cppchar_t c, mask;
1760   unsigned int width;
1761
1762   if (wide)
1763     width = CPP_OPTION (pfile, wchar_precision);
1764   else
1765     width = CPP_OPTION (pfile, char_precision);
1766   if (width < BITS_PER_CPPCHAR_T)
1767     mask = ((cppchar_t) 1 << width) - 1;
1768   else
1769     mask = ~0;
1770
1771   c = *str++;
1772   switch (c)
1773     {
1774     case '\\': case '\'': case '"': case '?': break;
1775     case 'b': c = TARGET_BS;      break;
1776     case 'f': c = TARGET_FF;      break;
1777     case 'n': c = TARGET_NEWLINE; break;
1778     case 'r': c = TARGET_CR;      break;
1779     case 't': c = TARGET_TAB;     break;
1780     case 'v': c = TARGET_VT;      break;
1781
1782     case '(': case '{': case '[': case '%':
1783       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1784          '\%' is used to prevent SCCS from getting confused.  */
1785       unknown = CPP_PEDANTIC (pfile);
1786       break;
1787
1788     case 'a':
1789       if (CPP_WTRADITIONAL (pfile))
1790         cpp_error (pfile, DL_WARNING,
1791                    "the meaning of '\\a' is different in traditional C");
1792       c = TARGET_BELL;
1793       break;
1794
1795     case 'e': case 'E':
1796       if (CPP_PEDANTIC (pfile))
1797         cpp_error (pfile, DL_PEDWARN,
1798                    "non-ISO-standard escape sequence, '\\%c'", (int) c);
1799       c = TARGET_ESC;
1800       break;
1801
1802     case 'u': case 'U':
1803       unknown = maybe_read_ucs (pfile, &str, limit, &c);
1804       break;
1805
1806     case 'x':
1807       if (CPP_WTRADITIONAL (pfile))
1808         cpp_error (pfile, DL_WARNING,
1809                    "the meaning of '\\x' is different in traditional C");
1810
1811       {
1812         cppchar_t i = 0, overflow = 0;
1813         int digits_found = 0;
1814
1815         while (str < limit)
1816           {
1817             c = *str;
1818             if (! ISXDIGIT (c))
1819               break;
1820             str++;
1821             overflow |= i ^ (i << 4 >> 4);
1822             i = (i << 4) + hex_digit_value (c);
1823             digits_found = 1;
1824           }
1825
1826         if (!digits_found)
1827           cpp_error (pfile, DL_ERROR,
1828                        "\\x used with no following hex digits");
1829
1830         if (overflow | (i != (i & mask)))
1831           {
1832             cpp_error (pfile, DL_PEDWARN,
1833                        "hex escape sequence out of range");
1834             i &= mask;
1835           }
1836         c = i;
1837       }
1838       break;
1839
1840     case '0':  case '1':  case '2':  case '3':
1841     case '4':  case '5':  case '6':  case '7':
1842       {
1843         size_t count = 0;
1844         cppchar_t i = c - '0';
1845
1846         while (str < limit && ++count < 3)
1847           {
1848             c = *str;
1849             if (c < '0' || c > '7')
1850               break;
1851             str++;
1852             i = (i << 3) + c - '0';
1853           }
1854
1855         if (i != (i & mask))
1856           {
1857             cpp_error (pfile, DL_PEDWARN,
1858                        "octal escape sequence out of range");
1859             i &= mask;
1860           }
1861         c = i;
1862       }
1863       break;
1864
1865     default:
1866       unknown = 1;
1867       break;
1868     }
1869
1870   if (unknown)
1871     {
1872       if (ISGRAPH (c))
1873         cpp_error (pfile, DL_PEDWARN,
1874                    "unknown escape sequence '\\%c'", (int) c);
1875       else
1876         cpp_error (pfile, DL_PEDWARN,
1877                    "unknown escape sequence: '\\%03o'", (int) c);
1878     }
1879
1880   if (c > mask)
1881     {
1882       cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for its type");
1883       c &= mask;
1884     }
1885
1886   *pstr = str;
1887   return c;
1888 }
1889
1890 /* Interpret a (possibly wide) character constant in TOKEN.
1891    WARN_MULTI warns about multi-character charconsts.  PCHARS_SEEN
1892    points to a variable that is filled in with the number of
1893    characters seen, and UNSIGNEDP to a variable that indicates whether
1894    the result has signed type.  */
1895 cppchar_t
1896 cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp)
1897      cpp_reader *pfile;
1898      const cpp_token *token;
1899      unsigned int *pchars_seen;
1900      int *unsignedp;
1901 {
1902   const unsigned char *str = token->val.str.text;
1903   const unsigned char *limit = str + token->val.str.len;
1904   unsigned int chars_seen = 0;
1905   size_t width, max_chars;
1906   cppchar_t c, mask, result = 0;
1907   bool unsigned_p;
1908
1909 #ifdef MULTIBYTE_CHARS
1910   (void) local_mbtowc (NULL, NULL, 0);
1911 #endif
1912
1913   /* Width in bits.  */
1914   if (token->type == CPP_CHAR)
1915     {
1916       width = CPP_OPTION (pfile, char_precision);
1917       max_chars = CPP_OPTION (pfile, int_precision) / width;
1918       unsigned_p = CPP_OPTION (pfile, unsigned_char);
1919     }
1920   else
1921     {
1922       width = CPP_OPTION (pfile, wchar_precision);
1923       max_chars = 1;
1924       unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
1925     }
1926
1927   if (width < BITS_PER_CPPCHAR_T)
1928     mask = ((cppchar_t) 1 << width) - 1;
1929   else
1930     mask = ~0;
1931
1932   while (str < limit)
1933     {
1934 #ifdef MULTIBYTE_CHARS
1935       wchar_t wc;
1936       int char_len;
1937
1938       char_len = local_mbtowc (&wc, str, limit - str);
1939       if (char_len == -1)
1940         {
1941           cpp_error (pfile, DL_WARNING,
1942                      "ignoring invalid multibyte character");
1943           c = *str++;
1944         }
1945       else
1946         {
1947           str += char_len;
1948           c = wc;
1949         }
1950 #else
1951       c = *str++;
1952 #endif
1953
1954       if (c == '\\')
1955         c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
1956
1957 #ifdef MAP_CHARACTER
1958       if (ISPRINT (c))
1959         c = MAP_CHARACTER (c);
1960 #endif
1961
1962       chars_seen++;
1963
1964       /* Truncate the character, scale the result and merge the two.  */
1965       c &= mask;
1966       if (width < BITS_PER_CPPCHAR_T)
1967         result = (result << width) | c;
1968       else
1969         result = c;
1970     }
1971
1972   if (chars_seen == 0)
1973     cpp_error (pfile, DL_ERROR, "empty character constant");
1974   else if (chars_seen > 1)
1975     {
1976       /* Multichar charconsts are of type int and therefore signed.  */
1977       unsigned_p = 0;
1978
1979       if (chars_seen > max_chars)
1980         {
1981           chars_seen = max_chars;
1982           cpp_error (pfile, DL_WARNING,
1983                      "character constant too long for its type");
1984         }
1985       else if (CPP_OPTION (pfile, warn_multichar))
1986         cpp_error (pfile, DL_WARNING, "multi-character character constant");
1987     }
1988
1989   /* Sign-extend or truncate the constant to cppchar_t.  The value is
1990      in WIDTH bits, but for multi-char charconsts it's value is the
1991      full target type's width.  */
1992   if (chars_seen > 1)
1993     width *= max_chars;
1994   if (width < BITS_PER_CPPCHAR_T)
1995     {
1996       mask = ((cppchar_t) 1 << width) - 1;
1997       if (unsigned_p || !(result & (1 << (width - 1))))
1998         result &= mask;
1999       else
2000         result |= ~mask;
2001     }
2002
2003   *pchars_seen = chars_seen;
2004   *unsignedp = unsigned_p;
2005   return result;
2006 }
2007
2008 /* Memory buffers.  Changing these three constants can have a dramatic
2009    effect on performance.  The values here are reasonable defaults,
2010    but might be tuned.  If you adjust them, be sure to test across a
2011    range of uses of cpplib, including heavy nested function-like macro
2012    expansion.  Also check the change in peak memory usage (NJAMD is a
2013    good tool for this).  */
2014 #define MIN_BUFF_SIZE 8000
2015 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
2016 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2017         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2018
2019 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2020   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2021 #endif
2022
2023 /* Create a new allocation buffer.  Place the control block at the end
2024    of the buffer, so that buffer overflows will cause immediate chaos.  */
2025 static _cpp_buff *
2026 new_buff (len)
2027      size_t len;
2028 {
2029   _cpp_buff *result;
2030   unsigned char *base;
2031
2032   if (len < MIN_BUFF_SIZE)
2033     len = MIN_BUFF_SIZE;
2034   len = CPP_ALIGN (len);
2035
2036   base = xmalloc (len + sizeof (_cpp_buff));
2037   result = (_cpp_buff *) (base + len);
2038   result->base = base;
2039   result->cur = base;
2040   result->limit = base + len;
2041   result->next = NULL;
2042   return result;
2043 }
2044
2045 /* Place a chain of unwanted allocation buffers on the free list.  */
2046 void
2047 _cpp_release_buff (pfile, buff)
2048      cpp_reader *pfile;
2049      _cpp_buff *buff;
2050 {
2051   _cpp_buff *end = buff;
2052
2053   while (end->next)
2054     end = end->next;
2055   end->next = pfile->free_buffs;
2056   pfile->free_buffs = buff;
2057 }
2058
2059 /* Return a free buffer of size at least MIN_SIZE.  */
2060 _cpp_buff *
2061 _cpp_get_buff (pfile, min_size)
2062      cpp_reader *pfile;
2063      size_t min_size;
2064 {
2065   _cpp_buff *result, **p;
2066
2067   for (p = &pfile->free_buffs;; p = &(*p)->next)
2068     {
2069       size_t size;
2070
2071       if (*p == NULL)
2072         return new_buff (min_size);
2073       result = *p;
2074       size = result->limit - result->base;
2075       /* Return a buffer that's big enough, but don't waste one that's
2076          way too big.  */
2077       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2078         break;
2079     }
2080
2081   *p = result->next;
2082   result->next = NULL;
2083   result->cur = result->base;
2084   return result;
2085 }
2086
2087 /* Creates a new buffer with enough space to hold the uncommitted
2088    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
2089    the excess bytes to the new buffer.  Chains the new buffer after
2090    BUFF, and returns the new buffer.  */
2091 _cpp_buff *
2092 _cpp_append_extend_buff (pfile, buff, min_extra)
2093      cpp_reader *pfile;
2094      _cpp_buff *buff;
2095      size_t min_extra;
2096 {
2097   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2098   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2099
2100   buff->next = new_buff;
2101   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2102   return new_buff;
2103 }
2104
2105 /* Creates a new buffer with enough space to hold the uncommitted
2106    remaining bytes of the buffer pointed to by BUFF, and at least
2107    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
2108    Chains the new buffer before the buffer pointed to by BUFF, and
2109    updates the pointer to point to the new buffer.  */
2110 void
2111 _cpp_extend_buff (pfile, pbuff, min_extra)
2112      cpp_reader *pfile;
2113      _cpp_buff **pbuff;
2114      size_t min_extra;
2115 {
2116   _cpp_buff *new_buff, *old_buff = *pbuff;
2117   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2118
2119   new_buff = _cpp_get_buff (pfile, size);
2120   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2121   new_buff->next = old_buff;
2122   *pbuff = new_buff;
2123 }
2124
2125 /* Free a chain of buffers starting at BUFF.  */
2126 void
2127 _cpp_free_buff (buff)
2128      _cpp_buff *buff;
2129 {
2130   _cpp_buff *next;
2131
2132   for (; buff; buff = next)
2133     {
2134       next = buff->next;
2135       free (buff->base);
2136     }
2137 }
2138
2139 /* Allocate permanent, unaligned storage of length LEN.  */
2140 unsigned char *
2141 _cpp_unaligned_alloc (pfile, len)
2142      cpp_reader *pfile;
2143      size_t len;
2144 {
2145   _cpp_buff *buff = pfile->u_buff;
2146   unsigned char *result = buff->cur;
2147
2148   if (len > (size_t) (buff->limit - result))
2149     {
2150       buff = _cpp_get_buff (pfile, len);
2151       buff->next = pfile->u_buff;
2152       pfile->u_buff = buff;
2153       result = buff->cur;
2154     }
2155
2156   buff->cur = result + len;
2157   return result;
2158 }
2159
2160 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2161    That buffer is used for growing allocations when saving macro
2162    replacement lists in a #define, and when parsing an answer to an
2163    assertion in #assert, #unassert or #if (and therefore possibly
2164    whilst expanding macros).  It therefore must not be used by any
2165    code that they might call: specifically the lexer and the guts of
2166    the macro expander.
2167
2168    All existing other uses clearly fit this restriction: storing
2169    registered pragmas during initialization.  */
2170 unsigned char *
2171 _cpp_aligned_alloc (pfile, len)
2172      cpp_reader *pfile;
2173      size_t len;
2174 {
2175   _cpp_buff *buff = pfile->a_buff;
2176   unsigned char *result = buff->cur;
2177
2178   if (len > (size_t) (buff->limit - result))
2179     {
2180       buff = _cpp_get_buff (pfile, len);
2181       buff->next = pfile->a_buff;
2182       pfile->a_buff = buff;
2183       result = buff->cur;
2184     }
2185
2186   buff->cur = result + len;
2187   return result;
2188 }