gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "coretypes.h"
  26 #include "tm.h"
  27 #include "cpplib.h"
  28 #include "cpphash.h"
  29
  30 #ifdef MULTIBYTE_CHARS
  31 #include "mbchar.h"
  32 #include <locale.h>
  33 #endif
  34
  35 /* Tokens with SPELL_STRING store their spelling in the token list,
  36    and it's length in the token->val.name.len.  */
  37 enum spell_type
  38 {
  39   SPELL_OPERATOR = 0,
  40   SPELL_CHAR,
  41   SPELL_IDENT,
  42   SPELL_NUMBER,
  43   SPELL_STRING,
  44   SPELL_NONE
  45 };
  46
  47 struct token_spelling
  48 {
  49   enum spell_type category;
  50   const unsigned char *name;
  51 };
  52
  53 static const unsigned char *const digraph_spellings[] =
  54 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
  55
  56 #define OP(e, s) { SPELL_OPERATOR, U s           },
  57 #define TK(e, s) { s,              U STRINGX (e) },
  58 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  59 #undef OP
  60 #undef TK
  61
  62 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  63 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  64 #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
  65
  66 static void handle_newline PARAMS ((cpp_reader *));
  67 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *));
  68 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
  69
  70 static int skip_block_comment PARAMS ((cpp_reader *));
  71 static int skip_line_comment PARAMS ((cpp_reader *));
  72 static void adjust_column PARAMS ((cpp_reader *));
  73 static int skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  74 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
  75 static uchar *parse_slow PARAMS ((cpp_reader *, const uchar *, int,
  76                                   unsigned int *));
  77 static void parse_number PARAMS ((cpp_reader *, cpp_string *, int));
  78 static int unescaped_terminator_p PARAMS ((cpp_reader *, const uchar *));
  79 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  80 static bool trigraph_p PARAMS ((cpp_reader *));
  81 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
  82                                   cppchar_t));
  83 static bool continue_after_nul PARAMS ((cpp_reader *));
  84 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
  85 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
  86                                    const unsigned char *, cppchar_t *));
  87 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
  88
  89 static unsigned int hex_digit_value PARAMS ((unsigned int));
  90 static _cpp_buff *new_buff PARAMS ((size_t));
  91
  92 /* Utility routine:
  93
  94    Compares, the token TOKEN to the NUL-terminated string STRING.
  95    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  96 int
  97 cpp_ideq (token, string)
  98      const cpp_token *token;
  99      const char *string;
 100 {
 101   if (token->type != CPP_NAME)
 102     return 0;
 103
 104   return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
 105 }
 106
 107 /* Call when meeting a newline, assumed to be in buffer->cur[-1].
 108    Returns with buffer->cur pointing to the character immediately
 109    following the newline (combination).  */
 110 static void
 111 handle_newline (pfile)
 112      cpp_reader *pfile;
 113 {
 114   cpp_buffer *buffer = pfile->buffer;
 115
 116   /* Handle CR-LF and LF-CR.  Most other implementations (e.g. java)
 117      only accept CR-LF; maybe we should fall back to that behavior?  */
 118   if (buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
 119     buffer->cur++;
 120
 121   buffer->line_base = buffer->cur;
 122   buffer->col_adjust = 0;
 123   pfile->line++;
 124 }
 125
 126 /* Subroutine of skip_escaped_newlines; called when a 3-character
 127    sequence beginning with "??" is encountered.  buffer->cur points to
 128    the second '?'.
 129
 130    Warn if necessary, and returns true if the sequence forms a
 131    trigraph and the trigraph should be honored.  */
 132 static bool
 133 trigraph_p (pfile)
 134      cpp_reader *pfile;
 135 {
 136   cpp_buffer *buffer = pfile->buffer;
 137   cppchar_t from_char = buffer->cur[1];
 138   bool accept;
 139
 140   if (!_cpp_trigraph_map[from_char])
 141     return false;
 142
 143   accept = CPP_OPTION (pfile, trigraphs);
 144
 145   /* Don't warn about trigraphs in comments.  */
 146   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 147     {
 148       if (accept)
 149         cpp_error_with_line (pfile, DL_WARNING,
 150                              pfile->line, CPP_BUF_COL (buffer) - 1,
 151                              "trigraph ??%c converted to %c",
 152                              (int) from_char,
 153                              (int) _cpp_trigraph_map[from_char]);
 154       else if (buffer->cur != buffer->last_Wtrigraphs)
 155         {
 156           buffer->last_Wtrigraphs = buffer->cur;
 157           cpp_error_with_line (pfile, DL_WARNING,
 158                                pfile->line, CPP_BUF_COL (buffer) - 1,
 159                                "trigraph ??%c ignored", (int) from_char);
 160         }
 161     }
 162
 163   return accept;
 164 }
 165
 166 /* Skips any escaped newlines introduced by '?' or a '\\', assumed to
 167    lie in buffer->cur[-1].  Returns the next byte, which will be in
 168    buffer->cur[-1].  This routine performs preprocessing stages 1 and
 169    2 of the ISO C standard.  */
 170 static cppchar_t
 171 skip_escaped_newlines (pfile)
 172      cpp_reader *pfile;
 173 {
 174   cpp_buffer *buffer = pfile->buffer;
 175   cppchar_t next = buffer->cur[-1];
 176
 177   /* Only do this if we apply stages 1 and 2.  */
 178   if (!buffer->from_stage3)
 179     {
 180       const unsigned char *saved_cur;
 181       cppchar_t next1;
 182
 183       do
 184         {
 185           if (next == '?')
 186             {
 187               if (buffer->cur[0] != '?' || !trigraph_p (pfile))
 188                 break;
 189
 190               /* Translate the trigraph.  */
 191               next = _cpp_trigraph_map[buffer->cur[1]];
 192               buffer->cur += 2;
 193               if (next != '\\')
 194                 break;
 195             }
 196
 197           if (buffer->cur == buffer->rlimit)
 198             break;
 199
 200           /* We have a backslash, and room for at least one more
 201              character.  Skip horizontal whitespace.  */
 202           saved_cur = buffer->cur;
 203           do
 204             next1 = *buffer->cur++;
 205           while (is_nvspace (next1) && buffer->cur < buffer->rlimit);
 206
 207           if (!is_vspace (next1))
 208             {
 209               buffer->cur = saved_cur;
 210               break;
 211             }
 212
 213           if (saved_cur != buffer->cur - 1
 214               && !pfile->state.lexing_comment)
 215             cpp_error (pfile, DL_WARNING,
 216                        "backslash and newline separated by space");
 217
 218           handle_newline (pfile);
 219           buffer->backup_to = buffer->cur;
 220           if (buffer->cur == buffer->rlimit)
 221             {
 222               cpp_error (pfile, DL_PEDWARN,
 223                          "backslash-newline at end of file");
 224               next = EOF;
 225             }
 226           else
 227             next = *buffer->cur++;
 228         }
 229       while (next == '\\' || next == '?');
 230     }
 231
 232   return next;
 233 }
 234
 235 /* Obtain the next character, after trigraph conversion and skipping
 236    an arbitrarily long string of escaped newlines.  The common case of
 237    no trigraphs or escaped newlines falls through quickly.  On return,
 238    buffer->backup_to points to where to return to if the character is
 239    not to be processed.  */
 240 static cppchar_t
 241 get_effective_char (pfile)
 242      cpp_reader *pfile;
 243 {
 244   cppchar_t next;
 245   cpp_buffer *buffer = pfile->buffer;
 246
 247   buffer->backup_to = buffer->cur;
 248   next = *buffer->cur++;
 249   if (__builtin_expect (next == '?' || next == '\\', 0))
 250     next = skip_escaped_newlines (pfile);
 251
 252   return next;
 253 }
 254
 255 /* Skip a C-style block comment.  We find the end of the comment by
 256    seeing if an asterisk is before every '/' we encounter.  Returns
 257    nonzero if comment terminated by EOF, zero otherwise.  */
 258 static int
 259 skip_block_comment (pfile)
 260      cpp_reader *pfile;
 261 {
 262   cpp_buffer *buffer = pfile->buffer;
 263   cppchar_t c = EOF, prevc = EOF;
 264
 265   pfile->state.lexing_comment = 1;
 266   while (buffer->cur != buffer->rlimit)
 267     {
 268       prevc = c, c = *buffer->cur++;
 269
 270       /* FIXME: For speed, create a new character class of characters
 271          of interest inside block comments.  */
 272       if (c == '?' || c == '\\')
 273         c = skip_escaped_newlines (pfile);
 274
 275       /* People like decorating comments with '*', so check for '/'
 276          instead for efficiency.  */
 277       if (c == '/')
 278         {
 279           if (prevc == '*')
 280             break;
 281
 282           /* Warn about potential nested comments, but not if the '/'
 283              comes immediately before the true comment delimiter.
 284              Don't bother to get it right across escaped newlines.  */
 285           if (CPP_OPTION (pfile, warn_comments)
 286               && buffer->cur[0] == '*' && buffer->cur[1] != '/')
 287             cpp_error_with_line (pfile, DL_WARNING,
 288                                  pfile->line, CPP_BUF_COL (buffer),
 289                                  "\"/*\" within comment");
 290         }
 291       else if (is_vspace (c))
 292         handle_newline (pfile);
 293       else if (c == '\t')
 294         adjust_column (pfile);
 295     }
 296
 297   pfile->state.lexing_comment = 0;
 298   return c != '/' || prevc != '*';
 299 }
 300
 301 /* Skip a C++ line comment, leaving buffer->cur pointing to the
 302    terminating newline.  Handles escaped newlines.  Returns nonzero
 303    if a multiline comment.  */
 304 static int
 305 skip_line_comment (pfile)
 306      cpp_reader *pfile;
 307 {
 308   cpp_buffer *buffer = pfile->buffer;
 309   unsigned int orig_line = pfile->line;
 310   cppchar_t c;
 311 #ifdef MULTIBYTE_CHARS
 312   wchar_t wc;
 313   int char_len;
 314 #endif
 315
 316   pfile->state.lexing_comment = 1;
 317 #ifdef MULTIBYTE_CHARS
 318   /* Reset multibyte conversion state.  */
 319   (void) local_mbtowc (NULL, NULL, 0);
 320 #endif
 321   do
 322     {
 323       if (buffer->cur == buffer->rlimit)
 324         goto at_eof;
 325
 326 #ifdef MULTIBYTE_CHARS
 327       char_len = local_mbtowc (&wc, (const char *) buffer->cur,
 328                                buffer->rlimit - buffer->cur);
 329       if (char_len == -1)
 330         {
 331           cpp_error (pfile, DL_WARNING,
 332                      "ignoring invalid multibyte character");
 333           char_len = 1;
 334           c = *buffer->cur++;
 335         }
 336       else
 337         {
 338           buffer->cur += char_len;
 339           c = wc;
 340         }
 341 #else
 342       c = *buffer->cur++;
 343 #endif
 344       if (c == '?' || c == '\\')
 345         c = skip_escaped_newlines (pfile);
 346     }
 347   while (!is_vspace (c));
 348
 349   /* Step back over the newline, except at EOF.  */
 350   buffer->cur--;
 351  at_eof:
 352
 353   pfile->state.lexing_comment = 0;
 354   return orig_line != pfile->line;
 355 }
 356
 357 /* pfile->buffer->cur is one beyond the \t character.  Update
 358    col_adjust so we track the column correctly.  */
 359 static void
 360 adjust_column (pfile)
 361      cpp_reader *pfile;
 362 {
 363   cpp_buffer *buffer = pfile->buffer;
 364   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 365
 366   /* Round it up to multiple of the tabstop, but subtract 1 since the
 367      tab itself occupies a character position.  */
 368   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 369                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 370 }
 371
 372 /* Skips whitespace, saving the next non-whitespace character.
 373    Adjusts pfile->col_adjust to account for tabs.  Without this,
 374    tokens might be assigned an incorrect column.  */
 375 static int
 376 skip_whitespace (pfile, c)
 377      cpp_reader *pfile;
 378      cppchar_t c;
 379 {
 380   cpp_buffer *buffer = pfile->buffer;
 381   unsigned int warned = 0;
 382
 383   do
 384     {
 385       /* Horizontal space always OK.  */
 386       if (c == ' ')
 387         ;
 388       else if (c == '\t')
 389         adjust_column (pfile);
 390       /* Just \f \v or \0 left.  */
 391       else if (c == '\0')
 392         {
 393           if (buffer->cur - 1 == buffer->rlimit)
 394             return 0;
 395           if (!warned)
 396             {
 397               cpp_error (pfile, DL_WARNING, "null character(s) ignored");
 398               warned = 1;
 399             }
 400         }
 401       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 402         cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
 403                              CPP_BUF_COL (buffer),
 404                              "%s in preprocessing directive",
 405                              c == '\f' ? "form feed" : "vertical tab");
 406
 407       c = *buffer->cur++;
 408     }
 409   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 410   while (is_nvspace (c));
 411
 412   buffer->cur--;
 413   return 1;
 414 }
 415
 416 /* See if the characters of a number token are valid in a name (no
 417    '.', '+' or '-').  */
 418 static int
 419 name_p (pfile, string)
 420      cpp_reader *pfile;
 421      const cpp_string *string;
 422 {
 423   unsigned int i;
 424
 425   for (i = 0; i < string->len; i++)
 426     if (!is_idchar (string->text[i]))
 427       return 0;
 428
 429   return 1;
 430 }
 431
 432 /* Parse an identifier, skipping embedded backslash-newlines.  This is
 433    a critical inner loop.  The common case is an identifier which has
 434    not been split by backslash-newline, does not contain a dollar
 435    sign, and has already been scanned (roughly 10:1 ratio of
 436    seen:unseen identifiers in normal code; the distribution is
 437    Poisson-like).  Second most common case is a new identifier, not
 438    split and no dollar sign.  The other possibilities are rare and
 439    have been relegated to parse_slow.  */
 440 static cpp_hashnode *
 441 parse_identifier (pfile)
 442      cpp_reader *pfile;
 443 {
 444   cpp_hashnode *result;
 445   const uchar *cur, *base;
 446
 447   /* Fast-path loop.  Skim over a normal identifier.
 448      N.B. ISIDNUM does not include $.  */
 449   cur = pfile->buffer->cur;
 450   while (ISIDNUM (*cur))
 451     cur++;
 452
 453   /* Check for slow-path cases.  */
 454   if (*cur == '?' || *cur == '\\' || *cur == '$')
 455     {
 456       unsigned int len;
 457
 458       base = parse_slow (pfile, cur, 0, &len);
 459       result = (cpp_hashnode *)
 460         ht_lookup (pfile->hash_table, base, len, HT_ALLOCED);
 461     }
 462   else
 463     {
 464       base = pfile->buffer->cur - 1;
 465       pfile->buffer->cur = cur;
 466       result = (cpp_hashnode *)
 467         ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
 468     }
 469
 470   /* Rarely, identifiers require diagnostics when lexed.
 471      XXX Has to be forced out of the fast path.  */
 472   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 473                         && !pfile->state.skipping, 0))
 474     {
 475       /* It is allowed to poison the same identifier twice.  */
 476       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 477         cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
 478                    NODE_NAME (result));
 479
 480       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 481          replacement list of a variadic macro.  */
 482       if (result == pfile->spec_nodes.n__VA_ARGS__
 483           && !pfile->state.va_args_ok)
 484         cpp_error (pfile, DL_PEDWARN,
 485         "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 486     }
 487
 488   return result;
 489 }
 490
 491 /* Slow path.  This handles numbers and identifiers which have been
 492    split, or contain dollar signs.  The part of the token from
 493    PFILE->buffer->cur-1 to CUR has already been scanned.  NUMBER_P is
 494    1 if it's a number, and 2 if it has a leading period.  Returns a
 495    pointer to the token's NUL-terminated spelling in permanent
 496    storage, and sets PLEN to its length.  */
 497 static uchar *
 498 parse_slow (pfile, cur, number_p, plen)
 499      cpp_reader *pfile;
 500      const uchar *cur;
 501      int number_p;
 502      unsigned int *plen;
 503 {
 504   cpp_buffer *buffer = pfile->buffer;
 505   const uchar *base = buffer->cur - 1;
 506   struct obstack *stack = &pfile->hash_table->stack;
 507   unsigned int c, prevc, saw_dollar = 0;
 508
 509   /* Place any leading period.  */
 510   if (number_p == 2)
 511     obstack_1grow (stack, '.');
 512
 513   /* Copy the part of the token which is known to be okay.  */
 514   obstack_grow (stack, base, cur - base);
 515
 516   /* Now process the part which isn't.  We are looking at one of
 517      '$', '\\', or '?' on entry to this loop.  */
 518   prevc = cur[-1];
 519   c = *cur++;
 520   buffer->cur = cur;
 521   for (;;)
 522     {
 523       /* Potential escaped newline?  */
 524       buffer->backup_to = buffer->cur - 1;
 525       if (c == '?' || c == '\\')
 526         c = skip_escaped_newlines (pfile);
 527
 528       if (!is_idchar (c))
 529         {
 530           if (!number_p)
 531             break;
 532           if (c != '.' && !VALID_SIGN (c, prevc))
 533             break;
 534         }
 535
 536       /* Handle normal identifier characters in this loop.  */
 537       do
 538         {
 539           prevc = c;
 540           obstack_1grow (stack, c);
 541
 542           if (c == '$')
 543             saw_dollar++;
 544
 545           c = *buffer->cur++;
 546         }
 547       while (is_idchar (c));
 548     }
 549
 550   /* Step back over the unwanted char.  */
 551   BACKUP ();
 552
 553   /* $ is not an identifier character in the standard, but is commonly
 554      accepted as an extension.  Don't warn about it in skipped
 555      conditional blocks.  */
 556   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
 557     cpp_error (pfile, DL_PEDWARN, "'$' character(s) in identifier or number");
 558
 559   /* Identifiers and numbers are null-terminated.  */
 560   *plen = obstack_object_size (stack);
 561   obstack_1grow (stack, '\0');
 562   return obstack_finish (stack);
 563 }
 564
 565 /* Parse a number, beginning with character C, skipping embedded
 566    backslash-newlines.  LEADING_PERIOD is nonzero if there was a "."
 567    before C.  Place the result in NUMBER.  */
 568 static void
 569 parse_number (pfile, number, leading_period)
 570      cpp_reader *pfile;
 571      cpp_string *number;
 572      int leading_period;
 573 {
 574   const uchar *cur;
 575
 576   /* Fast-path loop.  Skim over a normal number.
 577      N.B. ISIDNUM does not include $.  */
 578   cur = pfile->buffer->cur;
 579   while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
 580     cur++;
 581
 582   /* Check for slow-path cases.  */
 583   if (*cur == '?' || *cur == '\\' || *cur == '$')
 584     number->text = parse_slow (pfile, cur, 1 + leading_period, &number->len);
 585   else
 586     {
 587       const uchar *base = pfile->buffer->cur - 1;
 588       uchar *dest;
 589
 590       number->len = cur - base + leading_period;
 591       dest = _cpp_unaligned_alloc (pfile, number->len + 1);
 592       dest[number->len] = '\0';
 593       number->text = dest;
 594
 595       if (leading_period)
 596         *dest++ = '.';
 597       memcpy (dest, base, cur - base);
 598       pfile->buffer->cur = cur;
 599     }
 600 }
 601
 602 /* Subroutine of parse_string.  */
 603 static int
 604 unescaped_terminator_p (pfile, dest)
 605      cpp_reader *pfile;
 606      const unsigned char *dest;
 607 {
 608   const unsigned char *start, *temp;
 609
 610   /* In #include-style directives, terminators are not escapeable.  */
 611   if (pfile->state.angled_headers)
 612     return 1;
 613
 614   start = BUFF_FRONT (pfile->u_buff);
 615
 616   /* An odd number of consecutive backslashes represents an escaped
 617      terminator.  */
 618   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 619     ;
 620
 621   return ((dest - temp) & 1) == 0;
 622 }
 623
 624 /* Parses a string, character constant, or angle-bracketed header file
 625    name.  Handles embedded trigraphs and escaped newlines.  The stored
 626    string is guaranteed NUL-terminated, but it is not guaranteed that
 627    this is the first NUL since embedded NULs are preserved.
 628
 629    When this function returns, buffer->cur points to the next
 630    character to be processed.  */
 631 static void
 632 parse_string (pfile, token, terminator)
 633      cpp_reader *pfile;
 634      cpp_token *token;
 635      cppchar_t terminator;
 636 {
 637   cpp_buffer *buffer = pfile->buffer;
 638   unsigned char *dest, *limit;
 639   cppchar_t c;
 640   bool warned_nulls = false;
 641 #ifdef MULTIBYTE_CHARS
 642   wchar_t wc;
 643   int char_len;
 644 #endif
 645
 646   dest = BUFF_FRONT (pfile->u_buff);
 647   limit = BUFF_LIMIT (pfile->u_buff);
 648
 649 #ifdef MULTIBYTE_CHARS
 650   /* Reset multibyte conversion state.  */
 651   (void) local_mbtowc (NULL, NULL, 0);
 652 #endif
 653   for (;;)
 654     {
 655       /* We need room for another char, possibly the terminating NUL.  */
 656       if ((size_t) (limit - dest) < 1)
 657         {
 658           size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
 659           _cpp_extend_buff (pfile, &pfile->u_buff, 2);
 660           dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
 661           limit = BUFF_LIMIT (pfile->u_buff);
 662         }
 663
 664 #ifdef MULTIBYTE_CHARS
 665       char_len = local_mbtowc (&wc, (const char *) buffer->cur,
 666                                buffer->rlimit - buffer->cur);
 667       if (char_len == -1)
 668         {
 669           cpp_error (pfile, DL_WARNING,
 670                      "ignoring invalid multibyte character");
 671           char_len = 1;
 672           c = *buffer->cur++;
 673         }
 674       else
 675         {
 676           buffer->cur += char_len;
 677           c = wc;
 678         }
 679 #else
 680       c = *buffer->cur++;
 681 #endif
 682
 683       /* Handle trigraphs, escaped newlines etc.  */
 684       if (c == '?' || c == '\\')
 685         c = skip_escaped_newlines (pfile);
 686
 687       if (c == terminator)
 688         {
 689           if (unescaped_terminator_p (pfile, dest))
 690             break;
 691         }
 692       else if (is_vspace (c))
 693         {
 694           /* No string literal may extend over multiple lines.  In
 695              assembly language, suppress the error except for <>
 696              includes.  This is a kludge around not knowing where
 697              comments are.  */
 698         unterminated:
 699           if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>')
 700             cpp_error (pfile, DL_ERROR, "missing terminating %c character",
 701                        (int) terminator);
 702           buffer->cur--;
 703           break;
 704         }
 705       else if (c == '\0')
 706         {
 707           if (buffer->cur - 1 == buffer->rlimit)
 708             goto unterminated;
 709           if (!warned_nulls)
 710             {
 711               warned_nulls = true;
 712               cpp_error (pfile, DL_WARNING,
 713                          "null character(s) preserved in literal");
 714             }
 715         }
 716 #ifdef MULTIBYTE_CHARS
 717       if (char_len > 1)
 718         {
 719           for ( ; char_len > 0; --char_len)
 720             *dest++ = (*buffer->cur - char_len);
 721         }
 722       else
 723 #endif
 724         *dest++ = c;
 725     }
 726
 727   *dest = '\0';
 728
 729   token->val.str.text = BUFF_FRONT (pfile->u_buff);
 730   token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
 731   BUFF_FRONT (pfile->u_buff) = dest + 1;
 732 }
 733
 734 /* The stored comment includes the comment start and any terminator.  */
 735 static void
 736 save_comment (pfile, token, from, type)
 737      cpp_reader *pfile;
 738      cpp_token *token;
 739      const unsigned char *from;
 740      cppchar_t type;
 741 {
 742   unsigned char *buffer;
 743   unsigned int len, clen;
 744
 745   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 746
 747   /* C++ comments probably (not definitely) have moved past a new
 748      line, which we don't want to save in the comment.  */
 749   if (is_vspace (pfile->buffer->cur[-1]))
 750     len--;
 751
 752   /* If we are currently in a directive, then we need to store all
 753      C++ comments as C comments internally, and so we need to
 754      allocate a little extra space in that case.
 755
 756      Note that the only time we encounter a directive here is
 757      when we are saving comments in a "#define".  */
 758   clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
 759
 760   buffer = _cpp_unaligned_alloc (pfile, clen);
 761
 762   token->type = CPP_COMMENT;
 763   token->val.str.len = clen;
 764   token->val.str.text = buffer;
 765
 766   buffer[0] = '/';
 767   memcpy (buffer + 1, from, len - 1);
 768
 769   /* Finish conversion to a C comment, if necessary.  */
 770   if (pfile->state.in_directive && type == '/')
 771     {
 772       buffer[1] = '*';
 773       buffer[clen - 2] = '*';
 774       buffer[clen - 1] = '/';
 775     }
 776 }
 777
 778 /* Allocate COUNT tokens for RUN.  */
 779 void
 780 _cpp_init_tokenrun (run, count)
 781      tokenrun *run;
 782      unsigned int count;
 783 {
 784   run->base = xnewvec (cpp_token, count);
 785   run->limit = run->base + count;
 786   run->next = NULL;
 787 }
 788
 789 /* Returns the next tokenrun, or creates one if there is none.  */
 790 static tokenrun *
 791 next_tokenrun (run)
 792      tokenrun *run;
 793 {
 794   if (run->next == NULL)
 795     {
 796       run->next = xnew (tokenrun);
 797       run->next->prev = run;
 798       _cpp_init_tokenrun (run->next, 250);
 799     }
 800
 801   return run->next;
 802 }
 803
 804 /* Allocate a single token that is invalidated at the same time as the
 805    rest of the tokens on the line.  Has its line and col set to the
 806    same as the last lexed token, so that diagnostics appear in the
 807    right place.  */
 808 cpp_token *
 809 _cpp_temp_token (pfile)
 810      cpp_reader *pfile;
 811 {
 812   cpp_token *old, *result;
 813
 814   old = pfile->cur_token - 1;
 815   if (pfile->cur_token == pfile->cur_run->limit)
 816     {
 817       pfile->cur_run = next_tokenrun (pfile->cur_run);
 818       pfile->cur_token = pfile->cur_run->base;
 819     }
 820
 821   result = pfile->cur_token++;
 822   result->line = old->line;
 823   result->col = old->col;
 824   return result;
 825 }
 826
 827 /* Lex a token into RESULT (external interface).  Takes care of issues
 828    like directive handling, token lookahead, multiple include
 829    optimization and skipping.  */
 830 const cpp_token *
 831 _cpp_lex_token (pfile)
 832      cpp_reader *pfile;
 833 {
 834   cpp_token *result;
 835
 836   for (;;)
 837     {
 838       if (pfile->cur_token == pfile->cur_run->limit)
 839         {
 840           pfile->cur_run = next_tokenrun (pfile->cur_run);
 841           pfile->cur_token = pfile->cur_run->base;
 842         }
 843
 844       if (pfile->lookaheads)
 845         {
 846           pfile->lookaheads--;
 847           result = pfile->cur_token++;
 848         }
 849       else
 850         result = _cpp_lex_direct (pfile);
 851
 852       if (result->flags & BOL)
 853         {
 854           /* Is this a directive.  If _cpp_handle_directive returns
 855              false, it is an assembler #.  */
 856           if (result->type == CPP_HASH
 857               /* 6.10.3 p 11: Directives in a list of macro arguments
 858                  gives undefined behavior.  This implementation
 859                  handles the directive as normal.  */
 860               && pfile->state.parsing_args != 1
 861               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
 862             continue;
 863           if (pfile->cb.line_change && !pfile->state.skipping)
 864             (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
 865         }
 866
 867       /* We don't skip tokens in directives.  */
 868       if (pfile->state.in_directive)
 869         break;
 870
 871       /* Outside a directive, invalidate controlling macros.  At file
 872          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
 873          get here and MI optimisation works.  */
 874       pfile->mi_valid = false;
 875
 876       if (!pfile->state.skipping || result->type == CPP_EOF)
 877         break;
 878     }
 879
 880   return result;
 881 }
 882
 883 /* A NUL terminates the current buffer.  For ISO preprocessing this is
 884    EOF, but for traditional preprocessing it indicates we need a line
 885    refill.  Returns TRUE to continue preprocessing a new buffer, FALSE
 886    to return a CPP_EOF to the caller.  */
 887 static bool
 888 continue_after_nul (pfile)
 889      cpp_reader *pfile;
 890 {
 891   cpp_buffer *buffer = pfile->buffer;
 892   bool more = false;
 893
 894   buffer->saved_flags = BOL;
 895   if (CPP_OPTION (pfile, traditional))
 896     {
 897       if (pfile->state.in_directive)
 898         return false;
 899
 900       _cpp_remove_overlay (pfile);
 901       more = _cpp_read_logical_line_trad (pfile);
 902       _cpp_overlay_buffer (pfile, pfile->out.base,
 903                            pfile->out.cur - pfile->out.base);
 904       pfile->line = pfile->out.first_line;
 905     }
 906   else
 907     {
 908       /* Stop parsing arguments with a CPP_EOF.  When we finally come
 909          back here, do the work of popping the buffer.  */
 910       if (!pfile->state.parsing_args)
 911         {
 912           if (buffer->cur != buffer->line_base)
 913             {
 914               /* Non-empty files should end in a newline.  Don't warn
 915                  for command line and _Pragma buffers.  */
 916               if (!buffer->from_stage3)
 917                 cpp_error (pfile, DL_PEDWARN, "no newline at end of file");
 918               handle_newline (pfile);
 919             }
 920
 921           /* Similarly, finish an in-progress directive with CPP_EOF
 922              before popping the buffer.  */
 923           if (!pfile->state.in_directive && buffer->prev)
 924             {
 925               more = !buffer->return_at_eof;
 926               _cpp_pop_buffer (pfile);
 927             }
 928         }
 929     }
 930
 931   return more;
 932 }
 933
 934 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)  \
 935   do {                                          \
 936     if (get_effective_char (pfile) == CHAR)     \
 937       result->type = THEN_TYPE;                 \
 938     else                                        \
 939       {                                         \
 940         BACKUP ();                              \
 941         result->type = ELSE_TYPE;               \
 942       }                                         \
 943   } while (0)
 944
 945 /* Lex a token into pfile->cur_token, which is also incremented, to
 946    get diagnostics pointing to the correct location.
 947
 948    Does not handle issues such as token lookahead, multiple-include
 949    optimisation, directives, skipping etc.  This function is only
 950    suitable for use by _cpp_lex_token, and in special cases like
 951    lex_expansion_token which doesn't care for any of these issues.
 952
 953    When meeting a newline, returns CPP_EOF if parsing a directive,
 954    otherwise returns to the start of the token buffer if permissible.
 955    Returns the location of the lexed token.  */
 956 cpp_token *
 957 _cpp_lex_direct (pfile)
 958      cpp_reader *pfile;
 959 {
 960   cppchar_t c;
 961   cpp_buffer *buffer;
 962   const unsigned char *comment_start;
 963   cpp_token *result = pfile->cur_token++;
 964
 965  fresh_line:
 966   buffer = pfile->buffer;
 967   result->flags = buffer->saved_flags;
 968   buffer->saved_flags = 0;
 969  update_tokens_line:
 970   result->line = pfile->line;
 971
 972  skipped_white:
 973   c = *buffer->cur++;
 974   result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
 975
 976  trigraph:
 977   switch (c)
 978     {
 979     case ' ': case '\t': case '\f': case '\v': case '\0':
 980       result->flags |= PREV_WHITE;
 981       if (skip_whitespace (pfile, c))
 982         goto skipped_white;
 983
 984       /* End of buffer.  */
 985       buffer->cur--;
 986       if (continue_after_nul (pfile))
 987         goto fresh_line;
 988       result->type = CPP_EOF;
 989       break;
 990
 991     case '\n': case '\r':
 992       handle_newline (pfile);
 993       buffer->saved_flags = BOL;
 994       if (! pfile->state.in_directive)
 995         {
 996           if (pfile->state.parsing_args == 2)
 997             buffer->saved_flags |= PREV_WHITE;
 998           if (!pfile->keep_tokens)
 999             {
1000               pfile->cur_run = &pfile->base_run;
1001               result = pfile->base_run.base;
1002               pfile->cur_token = result + 1;
1003             }
1004           goto fresh_line;
1005         }
1006       result->type = CPP_EOF;
1007       break;
1008
1009     case '?':
1010     case '\\':
1011       /* These could start an escaped newline, or '?' a trigraph.  Let
1012          skip_escaped_newlines do all the work.  */
1013       {
1014         unsigned int line = pfile->line;
1015
1016         c = skip_escaped_newlines (pfile);
1017         if (line != pfile->line)
1018           {
1019             buffer->cur--;
1020             /* We had at least one escaped newline of some sort.
1021                Update the token's line and column.  */
1022             goto update_tokens_line;
1023           }
1024       }
1025
1026       /* We are either the original '?' or '\\', or a trigraph.  */
1027       if (c == '?')
1028         result->type = CPP_QUERY;
1029       else if (c == '\\')
1030         goto random_char;
1031       else
1032         goto trigraph;
1033       break;
1034
1035     case '0': case '1': case '2': case '3': case '4':
1036     case '5': case '6': case '7': case '8': case '9':
1037       result->type = CPP_NUMBER;
1038       parse_number (pfile, &result->val.str, 0);
1039       break;
1040
1041     case 'L':
1042       /* 'L' may introduce wide characters or strings.  */
1043       {
1044         const unsigned char *pos = buffer->cur;
1045
1046         c = get_effective_char (pfile);
1047         if (c == '\'' || c == '"')
1048           {
1049             result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1050             parse_string (pfile, result, c);
1051             break;
1052           }
1053         buffer->cur = pos;
1054       }
1055       /* Fall through.  */
1056
1057     start_ident:
1058     case '_':
1059     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1060     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1061     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1062     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1063     case 'y': case 'z':
1064     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1065     case 'G': case 'H': case 'I': case 'J': case 'K':
1066     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1067     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1068     case 'Y': case 'Z':
1069       result->type = CPP_NAME;
1070       result->val.node = parse_identifier (pfile);
1071
1072       /* Convert named operators to their proper types.  */
1073       if (result->val.node->flags & NODE_OPERATOR)
1074         {
1075           result->flags |= NAMED_OP;
1076           result->type = result->val.node->directive_index;
1077         }
1078       break;
1079
1080     case '\'':
1081     case '"':
1082       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1083       parse_string (pfile, result, c);
1084       break;
1085
1086     case '/':
1087       /* A potential block or line comment.  */
1088       comment_start = buffer->cur;
1089       c = get_effective_char (pfile);
1090
1091       if (c == '*')
1092         {
1093           if (skip_block_comment (pfile))
1094             cpp_error (pfile, DL_ERROR, "unterminated comment");
1095         }
1096       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1097                             || CPP_IN_SYSTEM_HEADER (pfile)))
1098         {
1099           /* Warn about comments only if pedantically GNUC89, and not
1100              in system headers.  */
1101           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1102               && ! buffer->warned_cplusplus_comments)
1103             {
1104               cpp_error (pfile, DL_PEDWARN,
1105                          "C++ style comments are not allowed in ISO C90");
1106               cpp_error (pfile, DL_PEDWARN,
1107                          "(this will be reported only once per input file)");
1108               buffer->warned_cplusplus_comments = 1;
1109             }
1110
1111           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1112             cpp_error (pfile, DL_WARNING, "multi-line comment");
1113         }
1114       else if (c == '=')
1115         {
1116           result->type = CPP_DIV_EQ;
1117           break;
1118         }
1119       else
1120         {
1121           BACKUP ();
1122           result->type = CPP_DIV;
1123           break;
1124         }
1125
1126       if (!pfile->state.save_comments)
1127         {
1128           result->flags |= PREV_WHITE;
1129           goto update_tokens_line;
1130         }
1131
1132       /* Save the comment as a token in its own right.  */
1133       save_comment (pfile, result, comment_start, c);
1134       break;
1135
1136     case '<':
1137       if (pfile->state.angled_headers)
1138         {
1139           result->type = CPP_HEADER_NAME;
1140           parse_string (pfile, result, '>');
1141           break;
1142         }
1143
1144       c = get_effective_char (pfile);
1145       if (c == '=')
1146         result->type = CPP_LESS_EQ;
1147       else if (c == '<')
1148         IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1149       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1150         IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1151       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1152         {
1153           result->type = CPP_OPEN_SQUARE;
1154           result->flags |= DIGRAPH;
1155         }
1156       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1157         {
1158           result->type = CPP_OPEN_BRACE;
1159           result->flags |= DIGRAPH;
1160         }
1161       else
1162         {
1163           BACKUP ();
1164           result->type = CPP_LESS;
1165         }
1166       break;
1167
1168     case '>':
1169       c = get_effective_char (pfile);
1170       if (c == '=')
1171         result->type = CPP_GREATER_EQ;
1172       else if (c == '>')
1173         IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1174       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1175         IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1176       else
1177         {
1178           BACKUP ();
1179           result->type = CPP_GREATER;
1180         }
1181       break;
1182
1183     case '%':
1184       c = get_effective_char (pfile);
1185       if (c == '=')
1186         result->type = CPP_MOD_EQ;
1187       else if (CPP_OPTION (pfile, digraphs) && c == ':')
1188         {
1189           result->flags |= DIGRAPH;
1190           result->type = CPP_HASH;
1191           if (get_effective_char (pfile) == '%')
1192             {
1193               const unsigned char *pos = buffer->cur;
1194
1195               if (get_effective_char (pfile) == ':')
1196                 result->type = CPP_PASTE;
1197               else
1198                 buffer->cur = pos - 1;
1199             }
1200           else
1201             BACKUP ();
1202         }
1203       else if (CPP_OPTION (pfile, digraphs) && c == '>')
1204         {
1205           result->flags |= DIGRAPH;
1206           result->type = CPP_CLOSE_BRACE;
1207         }
1208       else
1209         {
1210           BACKUP ();
1211           result->type = CPP_MOD;
1212         }
1213       break;
1214
1215     case '.':
1216       result->type = CPP_DOT;
1217       c = get_effective_char (pfile);
1218       if (c == '.')
1219         {
1220           const unsigned char *pos = buffer->cur;
1221
1222           if (get_effective_char (pfile) == '.')
1223             result->type = CPP_ELLIPSIS;
1224           else
1225             buffer->cur = pos - 1;
1226         }
1227       /* All known character sets have 0...9 contiguous.  */
1228       else if (ISDIGIT (c))
1229         {
1230           result->type = CPP_NUMBER;
1231           parse_number (pfile, &result->val.str, 1);
1232         }
1233       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1234         result->type = CPP_DOT_STAR;
1235       else
1236         BACKUP ();
1237       break;
1238
1239     case '+':
1240       c = get_effective_char (pfile);
1241       if (c == '+')
1242         result->type = CPP_PLUS_PLUS;
1243       else if (c == '=')
1244         result->type = CPP_PLUS_EQ;
1245       else
1246         {
1247           BACKUP ();
1248           result->type = CPP_PLUS;
1249         }
1250       break;
1251
1252     case '-':
1253       c = get_effective_char (pfile);
1254       if (c == '>')
1255         {
1256           result->type = CPP_DEREF;
1257           if (CPP_OPTION (pfile, cplusplus))
1258             {
1259               if (get_effective_char (pfile) == '*')
1260                 result->type = CPP_DEREF_STAR;
1261               else
1262                 BACKUP ();
1263             }
1264         }
1265       else if (c == '-')
1266         result->type = CPP_MINUS_MINUS;
1267       else if (c == '=')
1268         result->type = CPP_MINUS_EQ;
1269       else
1270         {
1271           BACKUP ();
1272           result->type = CPP_MINUS;
1273         }
1274       break;
1275
1276     case '&':
1277       c = get_effective_char (pfile);
1278       if (c == '&')
1279         result->type = CPP_AND_AND;
1280       else if (c == '=')
1281         result->type = CPP_AND_EQ;
1282       else
1283         {
1284           BACKUP ();
1285           result->type = CPP_AND;
1286         }
1287       break;
1288
1289     case '|':
1290       c = get_effective_char (pfile);
1291       if (c == '|')
1292         result->type = CPP_OR_OR;
1293       else if (c == '=')
1294         result->type = CPP_OR_EQ;
1295       else
1296         {
1297           BACKUP ();
1298           result->type = CPP_OR;
1299         }
1300       break;
1301
1302     case ':':
1303       c = get_effective_char (pfile);
1304       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1305         result->type = CPP_SCOPE;
1306       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1307         {
1308           result->flags |= DIGRAPH;
1309           result->type = CPP_CLOSE_SQUARE;
1310         }
1311       else
1312         {
1313           BACKUP ();
1314           result->type = CPP_COLON;
1315         }
1316       break;
1317
1318     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1319     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1320     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1321     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1322     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1323
1324     case '~': result->type = CPP_COMPL; break;
1325     case ',': result->type = CPP_COMMA; break;
1326     case '(': result->type = CPP_OPEN_PAREN; break;
1327     case ')': result->type = CPP_CLOSE_PAREN; break;
1328     case '[': result->type = CPP_OPEN_SQUARE; break;
1329     case ']': result->type = CPP_CLOSE_SQUARE; break;
1330     case '{': result->type = CPP_OPEN_BRACE; break;
1331     case '}': result->type = CPP_CLOSE_BRACE; break;
1332     case ';': result->type = CPP_SEMICOLON; break;
1333
1334       /* @ is a punctuator in Objective-C.  */
1335     case '@': result->type = CPP_ATSIGN; break;
1336
1337     case '$':
1338       if (CPP_OPTION (pfile, dollars_in_ident))
1339         goto start_ident;
1340       /* Fall through...  */
1341
1342     random_char:
1343     default:
1344       result->type = CPP_OTHER;
1345       result->val.c = c;
1346       break;
1347     }
1348
1349   return result;
1350 }
1351
1352 /* An upper bound on the number of bytes needed to spell TOKEN,
1353    including preceding whitespace.  */
1354 unsigned int
1355 cpp_token_len (token)
1356      const cpp_token *token;
1357 {
1358   unsigned int len;
1359
1360   switch (TOKEN_SPELL (token))
1361     {
1362     default:            len = 0;                                break;
1363     case SPELL_NUMBER:
1364     case SPELL_STRING:  len = token->val.str.len;               break;
1365     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1366     }
1367   /* 1 for whitespace, 4 for comment delimiters.  */
1368   return len + 5;
1369 }
1370
1371 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1372    already contain the enough space to hold the token's spelling.
1373    Returns a pointer to the character after the last character
1374    written.  */
1375 unsigned char *
1376 cpp_spell_token (pfile, token, buffer)
1377      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1378      const cpp_token *token;
1379      unsigned char *buffer;
1380 {
1381   switch (TOKEN_SPELL (token))
1382     {
1383     case SPELL_OPERATOR:
1384       {
1385         const unsigned char *spelling;
1386         unsigned char c;
1387
1388         if (token->flags & DIGRAPH)
1389           spelling
1390             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1391         else if (token->flags & NAMED_OP)
1392           goto spell_ident;
1393         else
1394           spelling = TOKEN_NAME (token);
1395
1396         while ((c = *spelling++) != '\0')
1397           *buffer++ = c;
1398       }
1399       break;
1400
1401     case SPELL_CHAR:
1402       *buffer++ = token->val.c;
1403       break;
1404
1405     spell_ident:
1406     case SPELL_IDENT:
1407       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1408       buffer += NODE_LEN (token->val.node);
1409       break;
1410
1411     case SPELL_NUMBER:
1412       memcpy (buffer, token->val.str.text, token->val.str.len);
1413       buffer += token->val.str.len;
1414       break;
1415
1416     case SPELL_STRING:
1417       {
1418         int left, right, tag;
1419         switch (token->type)
1420           {
1421           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1422           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1423           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1424           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1425           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1426           default:
1427             cpp_error (pfile, DL_ICE, "unknown string token %s\n",
1428                        TOKEN_NAME (token));
1429             return buffer;
1430           }
1431         if (tag) *buffer++ = tag;
1432         *buffer++ = left;
1433         memcpy (buffer, token->val.str.text, token->val.str.len);
1434         buffer += token->val.str.len;
1435         *buffer++ = right;
1436       }
1437       break;
1438
1439     case SPELL_NONE:
1440       cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
1441       break;
1442     }
1443
1444   return buffer;
1445 }
1446
1447 /* Returns TOKEN spelt as a null-terminated string.  The string is
1448    freed when the reader is destroyed.  Useful for diagnostics.  */
1449 unsigned char *
1450 cpp_token_as_text (pfile, token)
1451      cpp_reader *pfile;
1452      const cpp_token *token;
1453 {
1454   unsigned int len = cpp_token_len (token);
1455   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1456
1457   end = cpp_spell_token (pfile, token, start);
1458   end[0] = '\0';
1459
1460   return start;
1461 }
1462
1463 /* Used by C front ends, which really should move to using
1464    cpp_token_as_text.  */
1465 const char *
1466 cpp_type2name (type)
1467      enum cpp_ttype type;
1468 {
1469   return (const char *) token_spellings[type].name;
1470 }
1471
1472 /* Writes the spelling of token to FP, without any preceding space.
1473    Separated from cpp_spell_token for efficiency - to avoid stdio
1474    double-buffering.  */
1475 void
1476 cpp_output_token (token, fp)
1477      const cpp_token *token;
1478      FILE *fp;
1479 {
1480   switch (TOKEN_SPELL (token))
1481     {
1482     case SPELL_OPERATOR:
1483       {
1484         const unsigned char *spelling;
1485         int c;
1486
1487         if (token->flags & DIGRAPH)
1488           spelling
1489             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1490         else if (token->flags & NAMED_OP)
1491           goto spell_ident;
1492         else
1493           spelling = TOKEN_NAME (token);
1494
1495         c = *spelling;
1496         do
1497           putc (c, fp);
1498         while ((c = *++spelling) != '\0');
1499       }
1500       break;
1501
1502     case SPELL_CHAR:
1503       putc (token->val.c, fp);
1504       break;
1505
1506     spell_ident:
1507     case SPELL_IDENT:
1508       fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1509     break;
1510
1511     case SPELL_NUMBER:
1512       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1513       break;
1514
1515     case SPELL_STRING:
1516       {
1517         int left, right, tag;
1518         switch (token->type)
1519           {
1520           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1521           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1522           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1523           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1524           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1525           default:
1526             fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1527             return;
1528           }
1529         if (tag) putc (tag, fp);
1530         putc (left, fp);
1531         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1532         putc (right, fp);
1533       }
1534       break;
1535
1536     case SPELL_NONE:
1537       /* An error, most probably.  */
1538       break;
1539     }
1540 }
1541
1542 /* Compare two tokens.  */
1543 int
1544 _cpp_equiv_tokens (a, b)
1545      const cpp_token *a, *b;
1546 {
1547   if (a->type == b->type && a->flags == b->flags)
1548     switch (TOKEN_SPELL (a))
1549       {
1550       default:                  /* Keep compiler happy.  */
1551       case SPELL_OPERATOR:
1552         return 1;
1553       case SPELL_CHAR:
1554         return a->val.c == b->val.c; /* Character.  */
1555       case SPELL_NONE:
1556         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1557       case SPELL_IDENT:
1558         return a->val.node == b->val.node;
1559       case SPELL_NUMBER:
1560       case SPELL_STRING:
1561         return (a->val.str.len == b->val.str.len
1562                 && !memcmp (a->val.str.text, b->val.str.text,
1563                             a->val.str.len));
1564       }
1565
1566   return 0;
1567 }
1568
1569 /* Returns nonzero if a space should be inserted to avoid an
1570    accidental token paste for output.  For simplicity, it is
1571    conservative, and occasionally advises a space where one is not
1572    needed, e.g. "." and ".2".  */
1573 int
1574 cpp_avoid_paste (pfile, token1, token2)
1575      cpp_reader *pfile;
1576      const cpp_token *token1, *token2;
1577 {
1578   enum cpp_ttype a = token1->type, b = token2->type;
1579   cppchar_t c;
1580
1581   if (token1->flags & NAMED_OP)
1582     a = CPP_NAME;
1583   if (token2->flags & NAMED_OP)
1584     b = CPP_NAME;
1585
1586   c = EOF;
1587   if (token2->flags & DIGRAPH)
1588     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1589   else if (token_spellings[b].category == SPELL_OPERATOR)
1590     c = token_spellings[b].name[0];
1591
1592   /* Quickly get everything that can paste with an '='.  */
1593   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1594     return 1;
1595
1596   switch (a)
1597     {
1598     case CPP_GREATER:   return c == '>' || c == '?';
1599     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1600     case CPP_PLUS:      return c == '+';
1601     case CPP_MINUS:     return c == '-' || c == '>';
1602     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1603     case CPP_MOD:       return c == ':' || c == '>';
1604     case CPP_AND:       return c == '&';
1605     case CPP_OR:        return c == '|';
1606     case CPP_COLON:     return c == ':' || c == '>';
1607     case CPP_DEREF:     return c == '*';
1608     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1609     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1610     case CPP_NAME:      return ((b == CPP_NUMBER
1611                                  && name_p (pfile, &token2->val.str))
1612                                 || b == CPP_NAME
1613                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1614     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1615                                 || c == '.' || c == '+' || c == '-');
1616     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1617                                 && token1->val.c == '@'
1618                                 && (b == CPP_NAME || b == CPP_STRING));
1619     default:            break;
1620     }
1621
1622   return 0;
1623 }
1624
1625 /* Output all the remaining tokens on the current line, and a newline
1626    character, to FP.  Leading whitespace is removed.  If there are
1627    macros, special token padding is not performed.  */
1628 void
1629 cpp_output_line (pfile, fp)
1630      cpp_reader *pfile;
1631      FILE *fp;
1632 {
1633   const cpp_token *token;
1634
1635   token = cpp_get_token (pfile);
1636   while (token->type != CPP_EOF)
1637     {
1638       cpp_output_token (token, fp);
1639       token = cpp_get_token (pfile);
1640       if (token->flags & PREV_WHITE)
1641         putc (' ', fp);
1642     }
1643
1644   putc ('\n', fp);
1645 }
1646
1647 /* Returns the value of a hexadecimal digit.  */
1648 static unsigned int
1649 hex_digit_value (c)
1650      unsigned int c;
1651 {
1652   if (hex_p (c))
1653     return hex_value (c);
1654   else
1655     abort ();
1656 }
1657
1658 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.  Returns 1 to indicate
1659    failure if cpplib is not parsing C++ or C99.  Such failure is
1660    silent, and no variables are updated.  Otherwise returns 0, and
1661    warns if -Wtraditional.
1662
1663    [lex.charset]: The character designated by the universal character
1664    name \UNNNNNNNN is that character whose character short name in
1665    ISO/IEC 10646 is NNNNNNNN; the character designated by the
1666    universal character name \uNNNN is that character whose character
1667    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
1668    for a universal character name is less than 0x20 or in the range
1669    0x7F-0x9F (inclusive), or if the universal character name
1670    designates a character in the basic source character set, then the
1671    program is ill-formed.
1672
1673    We assume that wchar_t is Unicode, so we don't need to do any
1674    mapping.  Is this ever wrong?
1675
1676    PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1677    LIMIT is the end of the string or charconst.  PSTR is updated to
1678    point after the UCS on return, and the UCS is written into PC.  */
1679
1680 static int
1681 maybe_read_ucs (pfile, pstr, limit, pc)
1682      cpp_reader *pfile;
1683      const unsigned char **pstr;
1684      const unsigned char *limit;
1685      cppchar_t *pc;
1686 {
1687   const unsigned char *p = *pstr;
1688   unsigned int code = 0;
1689   unsigned int c = *pc, length;
1690
1691   /* Only attempt to interpret a UCS for C++ and C99.  */
1692   if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1693     return 1;
1694
1695   if (CPP_WTRADITIONAL (pfile))
1696     cpp_error (pfile, DL_WARNING,
1697                "the meaning of '\\%c' is different in traditional C", c);
1698
1699   length = (c == 'u' ? 4: 8);
1700
1701   if ((size_t) (limit - p) < length)
1702     {
1703       cpp_error (pfile, DL_ERROR, "incomplete universal-character-name");
1704       /* Skip to the end to avoid more diagnostics.  */
1705       p = limit;
1706     }
1707   else
1708     {
1709       for (; length; length--, p++)
1710         {
1711           c = *p;
1712           if (ISXDIGIT (c))
1713             code = (code << 4) + hex_digit_value (c);
1714           else
1715             {
1716               cpp_error (pfile, DL_ERROR,
1717                          "non-hex digit '%c' in universal-character-name", c);
1718               /* We shouldn't skip in case there are multibyte chars.  */
1719               break;
1720             }
1721         }
1722     }
1723
1724 #ifdef TARGET_EBCDIC
1725   cpp_error (pfile, DL_ERROR, "universal-character-name on EBCDIC target");
1726   code = 0x3f;  /* EBCDIC invalid character */
1727 #else
1728  /* True extended characters are OK.  */
1729   if (code >= 0xa0
1730       && !(code & 0x80000000)
1731       && !(code >= 0xD800 && code <= 0xDFFF))
1732     ;
1733   /* The standard permits $, @ and ` to be specified as UCNs.  We use
1734      hex escapes so that this also works with EBCDIC hosts.  */
1735   else if (code == 0x24 || code == 0x40 || code == 0x60)
1736     ;
1737   /* Don't give another error if one occurred above.  */
1738   else if (length == 0)
1739     cpp_error (pfile, DL_ERROR, "universal-character-name out of range");
1740 #endif
1741
1742   *pstr = p;
1743   *pc = code;
1744   return 0;
1745 }
1746
1747 /* Returns the value of an escape sequence, truncated to the correct
1748    target precision.  PSTR points to the input pointer, which is just
1749    after the backslash.  LIMIT is how much text we have.  WIDE is true
1750    if the escape sequence is part of a wide character constant or
1751    string literal.  Handles all relevant diagnostics.  */
1752 cppchar_t
1753 cpp_parse_escape (pfile, pstr, limit, wide)
1754      cpp_reader *pfile;
1755      const unsigned char **pstr;
1756      const unsigned char *limit;
1757      int wide;
1758 {
1759   int unknown = 0;
1760   const unsigned char *str = *pstr;
1761   cppchar_t c, mask;
1762   unsigned int width;
1763
1764   if (wide)
1765     width = CPP_OPTION (pfile, wchar_precision);
1766   else
1767     width = CPP_OPTION (pfile, char_precision);
1768   if (width < BITS_PER_CPPCHAR_T)
1769     mask = ((cppchar_t) 1 << width) - 1;
1770   else
1771     mask = ~0;
1772
1773   c = *str++;
1774   switch (c)
1775     {
1776     case '\\': case '\'': case '"': case '?': break;
1777     case 'b': c = TARGET_BS;      break;
1778     case 'f': c = TARGET_FF;      break;
1779     case 'n': c = TARGET_NEWLINE; break;
1780     case 'r': c = TARGET_CR;      break;
1781     case 't': c = TARGET_TAB;     break;
1782     case 'v': c = TARGET_VT;      break;
1783
1784     case '(': case '{': case '[': case '%':
1785       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1786          '\%' is used to prevent SCCS from getting confused.  */
1787       unknown = CPP_PEDANTIC (pfile);
1788       break;
1789
1790     case 'a':
1791       if (CPP_WTRADITIONAL (pfile))
1792         cpp_error (pfile, DL_WARNING,
1793                    "the meaning of '\\a' is different in traditional C");
1794       c = TARGET_BELL;
1795       break;
1796
1797     case 'e': case 'E':
1798       if (CPP_PEDANTIC (pfile))
1799         cpp_error (pfile, DL_PEDWARN,
1800                    "non-ISO-standard escape sequence, '\\%c'", (int) c);
1801       c = TARGET_ESC;
1802       break;
1803
1804     case 'u': case 'U':
1805       unknown = maybe_read_ucs (pfile, &str, limit, &c);
1806       break;
1807
1808     case 'x':
1809       if (CPP_WTRADITIONAL (pfile))
1810         cpp_error (pfile, DL_WARNING,
1811                    "the meaning of '\\x' is different in traditional C");
1812
1813       {
1814         cppchar_t i = 0, overflow = 0;
1815         int digits_found = 0;
1816
1817         while (str < limit)
1818           {
1819             c = *str;
1820             if (! ISXDIGIT (c))
1821               break;
1822             str++;
1823             overflow |= i ^ (i << 4 >> 4);
1824             i = (i << 4) + hex_digit_value (c);
1825             digits_found = 1;
1826           }
1827
1828         if (!digits_found)
1829           cpp_error (pfile, DL_ERROR,
1830                        "\\x used with no following hex digits");
1831
1832         if (overflow | (i != (i & mask)))
1833           {
1834             cpp_error (pfile, DL_PEDWARN,
1835                        "hex escape sequence out of range");
1836             i &= mask;
1837           }
1838         c = i;
1839       }
1840       break;
1841
1842     case '0':  case '1':  case '2':  case '3':
1843     case '4':  case '5':  case '6':  case '7':
1844       {
1845         size_t count = 0;
1846         cppchar_t i = c - '0';
1847
1848         while (str < limit && ++count < 3)
1849           {
1850             c = *str;
1851             if (c < '0' || c > '7')
1852               break;
1853             str++;
1854             i = (i << 3) + c - '0';
1855           }
1856
1857         if (i != (i & mask))
1858           {
1859             cpp_error (pfile, DL_PEDWARN,
1860                        "octal escape sequence out of range");
1861             i &= mask;
1862           }
1863         c = i;
1864       }
1865       break;
1866
1867     default:
1868       unknown = 1;
1869       break;
1870     }
1871
1872   if (unknown)
1873     {
1874       if (ISGRAPH (c))
1875         cpp_error (pfile, DL_PEDWARN,
1876                    "unknown escape sequence '\\%c'", (int) c);
1877       else
1878         cpp_error (pfile, DL_PEDWARN,
1879                    "unknown escape sequence: '\\%03o'", (int) c);
1880     }
1881
1882   if (c > mask)
1883     {
1884       cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for its type");
1885       c &= mask;
1886     }
1887
1888   *pstr = str;
1889   return c;
1890 }
1891
1892 /* Interpret a (possibly wide) character constant in TOKEN.
1893    WARN_MULTI warns about multi-character charconsts.  PCHARS_SEEN
1894    points to a variable that is filled in with the number of
1895    characters seen, and UNSIGNEDP to a variable that indicates whether
1896    the result has signed type.  */
1897 cppchar_t
1898 cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp)
1899      cpp_reader *pfile;
1900      const cpp_token *token;
1901      unsigned int *pchars_seen;
1902      int *unsignedp;
1903 {
1904   const unsigned char *str = token->val.str.text;
1905   const unsigned char *limit = str + token->val.str.len;
1906   unsigned int chars_seen = 0;
1907   size_t width, max_chars;
1908   cppchar_t c, mask, result = 0;
1909   bool unsigned_p;
1910
1911 #ifdef MULTIBYTE_CHARS
1912   (void) local_mbtowc (NULL, NULL, 0);
1913 #endif
1914
1915   /* Width in bits.  */
1916   if (token->type == CPP_CHAR)
1917     {
1918       width = CPP_OPTION (pfile, char_precision);
1919       max_chars = CPP_OPTION (pfile, int_precision) / width;
1920       unsigned_p = CPP_OPTION (pfile, unsigned_char);
1921     }
1922   else
1923     {
1924       width = CPP_OPTION (pfile, wchar_precision);
1925       max_chars = 1;
1926       unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
1927     }
1928
1929   if (width < BITS_PER_CPPCHAR_T)
1930     mask = ((cppchar_t) 1 << width) - 1;
1931   else
1932     mask = ~0;
1933
1934   while (str < limit)
1935     {
1936 #ifdef MULTIBYTE_CHARS
1937       wchar_t wc;
1938       int char_len;
1939
1940       char_len = local_mbtowc (&wc, str, limit - str);
1941       if (char_len == -1)
1942         {
1943           cpp_error (pfile, DL_WARNING,
1944                      "ignoring invalid multibyte character");
1945           c = *str++;
1946         }
1947       else
1948         {
1949           str += char_len;
1950           c = wc;
1951         }
1952 #else
1953       c = *str++;
1954 #endif
1955
1956       if (c == '\\')
1957         c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
1958
1959 #ifdef MAP_CHARACTER
1960       if (ISPRINT (c))
1961         c = MAP_CHARACTER (c);
1962 #endif
1963
1964       chars_seen++;
1965
1966       /* Truncate the character, scale the result and merge the two.  */
1967       c &= mask;
1968       if (width < BITS_PER_CPPCHAR_T)
1969         result = (result << width) | c;
1970       else
1971         result = c;
1972     }
1973
1974   if (chars_seen == 0)
1975     cpp_error (pfile, DL_ERROR, "empty character constant");
1976   else if (chars_seen > 1)
1977     {
1978       /* Multichar charconsts are of type int and therefore signed.  */
1979       unsigned_p = 0;
1980
1981       if (chars_seen > max_chars)
1982         {
1983           chars_seen = max_chars;
1984           cpp_error (pfile, DL_WARNING,
1985                      "character constant too long for its type");
1986         }
1987       else if (CPP_OPTION (pfile, warn_multichar))
1988         cpp_error (pfile, DL_WARNING, "multi-character character constant");
1989     }
1990
1991   /* Sign-extend or truncate the constant to cppchar_t.  The value is
1992      in WIDTH bits, but for multi-char charconsts it's value is the
1993      full target type's width.  */
1994   if (chars_seen > 1)
1995     width *= max_chars;
1996   if (width < BITS_PER_CPPCHAR_T)
1997     {
1998       mask = ((cppchar_t) 1 << width) - 1;
1999       if (unsigned_p || !(result & (1 << (width - 1))))
2000         result &= mask;
2001       else
2002         result |= ~mask;
2003     }
2004
2005   *pchars_seen = chars_seen;
2006   *unsignedp = unsigned_p;
2007   return result;
2008 }
2009
2010 /* Memory buffers.  Changing these three constants can have a dramatic
2011    effect on performance.  The values here are reasonable defaults,
2012    but might be tuned.  If you adjust them, be sure to test across a
2013    range of uses of cpplib, including heavy nested function-like macro
2014    expansion.  Also check the change in peak memory usage (NJAMD is a
2015    good tool for this).  */
2016 #define MIN_BUFF_SIZE 8000
2017 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
2018 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2019         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2020
2021 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2022   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2023 #endif
2024
2025 /* Create a new allocation buffer.  Place the control block at the end
2026    of the buffer, so that buffer overflows will cause immediate chaos.  */
2027 static _cpp_buff *
2028 new_buff (len)
2029      size_t len;
2030 {
2031   _cpp_buff *result;
2032   unsigned char *base;
2033
2034   if (len < MIN_BUFF_SIZE)
2035     len = MIN_BUFF_SIZE;
2036   len = CPP_ALIGN (len);
2037
2038   base = xmalloc (len + sizeof (_cpp_buff));
2039   result = (_cpp_buff *) (base + len);
2040   result->base = base;
2041   result->cur = base;
2042   result->limit = base + len;
2043   result->next = NULL;
2044   return result;
2045 }
2046
2047 /* Place a chain of unwanted allocation buffers on the free list.  */
2048 void
2049 _cpp_release_buff (pfile, buff)
2050      cpp_reader *pfile;
2051      _cpp_buff *buff;
2052 {
2053   _cpp_buff *end = buff;
2054
2055   while (end->next)
2056     end = end->next;
2057   end->next = pfile->free_buffs;
2058   pfile->free_buffs = buff;
2059 }
2060
2061 /* Return a free buffer of size at least MIN_SIZE.  */
2062 _cpp_buff *
2063 _cpp_get_buff (pfile, min_size)
2064      cpp_reader *pfile;
2065      size_t min_size;
2066 {
2067   _cpp_buff *result, **p;
2068
2069   for (p = &pfile->free_buffs;; p = &(*p)->next)
2070     {
2071       size_t size;
2072
2073       if (*p == NULL)
2074         return new_buff (min_size);
2075       result = *p;
2076       size = result->limit - result->base;
2077       /* Return a buffer that's big enough, but don't waste one that's
2078          way too big.  */
2079       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2080         break;
2081     }
2082
2083   *p = result->next;
2084   result->next = NULL;
2085   result->cur = result->base;
2086   return result;
2087 }
2088
2089 /* Creates a new buffer with enough space to hold the uncommitted
2090    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
2091    the excess bytes to the new buffer.  Chains the new buffer after
2092    BUFF, and returns the new buffer.  */
2093 _cpp_buff *
2094 _cpp_append_extend_buff (pfile, buff, min_extra)
2095      cpp_reader *pfile;
2096      _cpp_buff *buff;
2097      size_t min_extra;
2098 {
2099   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2100   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2101
2102   buff->next = new_buff;
2103   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2104   return new_buff;
2105 }
2106
2107 /* Creates a new buffer with enough space to hold the uncommitted
2108    remaining bytes of the buffer pointed to by BUFF, and at least
2109    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
2110    Chains the new buffer before the buffer pointed to by BUFF, and
2111    updates the pointer to point to the new buffer.  */
2112 void
2113 _cpp_extend_buff (pfile, pbuff, min_extra)
2114      cpp_reader *pfile;
2115      _cpp_buff **pbuff;
2116      size_t min_extra;
2117 {
2118   _cpp_buff *new_buff, *old_buff = *pbuff;
2119   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2120
2121   new_buff = _cpp_get_buff (pfile, size);
2122   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2123   new_buff->next = old_buff;
2124   *pbuff = new_buff;
2125 }
2126
2127 /* Free a chain of buffers starting at BUFF.  */
2128 void
2129 _cpp_free_buff (buff)
2130      _cpp_buff *buff;
2131 {
2132   _cpp_buff *next;
2133
2134   for (; buff; buff = next)
2135     {
2136       next = buff->next;
2137       free (buff->base);
2138     }
2139 }
2140
2141 /* Allocate permanent, unaligned storage of length LEN.  */
2142 unsigned char *
2143 _cpp_unaligned_alloc (pfile, len)
2144      cpp_reader *pfile;
2145      size_t len;
2146 {
2147   _cpp_buff *buff = pfile->u_buff;
2148   unsigned char *result = buff->cur;
2149
2150   if (len > (size_t) (buff->limit - result))
2151     {
2152       buff = _cpp_get_buff (pfile, len);
2153       buff->next = pfile->u_buff;
2154       pfile->u_buff = buff;
2155       result = buff->cur;
2156     }
2157
2158   buff->cur = result + len;
2159   return result;
2160 }
2161
2162 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2163    That buffer is used for growing allocations when saving macro
2164    replacement lists in a #define, and when parsing an answer to an
2165    assertion in #assert, #unassert or #if (and therefore possibly
2166    whilst expanding macros).  It therefore must not be used by any
2167    code that they might call: specifically the lexer and the guts of
2168    the macro expander.
2169
2170    All existing other uses clearly fit this restriction: storing
2171    registered pragmas during initialization.  */
2172 unsigned char *
2173 _cpp_aligned_alloc (pfile, len)
2174      cpp_reader *pfile;
2175      size_t len;
2176 {
2177   _cpp_buff *buff = pfile->a_buff;
2178   unsigned char *result = buff->cur;
2179
2180   if (len > (size_t) (buff->limit - result))
2181     {
2182       buff = _cpp_get_buff (pfile, len);
2183       buff->next = pfile->a_buff;
2184       pfile->a_buff = buff;
2185       result = buff->cur;
2186     }
2187
2188   buff->cur = result + len;
2189   return result;
2190 }