gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "cpplib.h"
  26 #include "cpphash.h"
  27
  28 #ifdef MULTIBYTE_CHARS
  29 #include "mbchar.h"
  30 #include <locale.h>
  31 #endif
  32
  33 /* Tokens with SPELL_STRING store their spelling in the token list,
  34    and it's length in the token->val.name.len.  */
  35 enum spell_type
  36 {
  37   SPELL_OPERATOR = 0,
  38   SPELL_CHAR,
  39   SPELL_IDENT,
  40   SPELL_NUMBER,
  41   SPELL_STRING,
  42   SPELL_NONE
  43 };
  44
  45 struct token_spelling
  46 {
  47   enum spell_type category;
  48   const unsigned char *name;
  49 };
  50
  51 static const unsigned char *const digraph_spellings[] =
  52 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
  53
  54 #define OP(e, s) { SPELL_OPERATOR, U s           },
  55 #define TK(e, s) { s,              U STRINGX (e) },
  56 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  57 #undef OP
  58 #undef TK
  59
  60 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  61 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  62 #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
  63
  64 static void handle_newline PARAMS ((cpp_reader *));
  65 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *));
  66 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
  67
  68 static int skip_block_comment PARAMS ((cpp_reader *));
  69 static int skip_line_comment PARAMS ((cpp_reader *));
  70 static void adjust_column PARAMS ((cpp_reader *));
  71 static int skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  72 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
  73 static uchar *parse_slow PARAMS ((cpp_reader *, const uchar *, int,
  74                                    unsigned int *));
  75 static void parse_number PARAMS ((cpp_reader *, cpp_string *, int));
  76 static int unescaped_terminator_p PARAMS ((cpp_reader *, const uchar *));
  77 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  78 static bool trigraph_p PARAMS ((cpp_reader *));
  79 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
  80                                   cppchar_t));
  81 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
  82 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
  83                                    const unsigned char *, cppchar_t *));
  84 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
  85
  86 static unsigned int hex_digit_value PARAMS ((unsigned int));
  87 static _cpp_buff *new_buff PARAMS ((size_t));
  88
  89 /* Utility routine:
  90
  91    Compares, the token TOKEN to the NUL-terminated string STRING.
  92    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  93 int
  94 cpp_ideq (token, string)
  95      const cpp_token *token;
  96      const char *string;
  97 {
  98   if (token->type != CPP_NAME)
  99     return 0;
 100
 101   return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
 102 }
 103
 104 /* Call when meeting a newline, assumed to be in buffer->cur[-1].
 105    Returns with buffer->cur pointing to the character immediately
 106    following the newline (combination).  */
 107 static void
 108 handle_newline (pfile)
 109      cpp_reader *pfile;
 110 {
 111   cpp_buffer *buffer = pfile->buffer;
 112
 113   /* Handle CR-LF and LF-CR.  Most other implementations (e.g. java)
 114      only accept CR-LF; maybe we should fall back to that behaviour?  */
 115   if (buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
 116     buffer->cur++;
 117
 118   buffer->line_base = buffer->cur;
 119   buffer->col_adjust = 0;
 120   pfile->line++;
 121 }
 122
 123 /* Subroutine of skip_escaped_newlines; called when a 3-character
 124    sequence beginning with "??" is encountered.  buffer->cur points to
 125    the second '?'.
 126
 127    Warn if necessary, and returns true if the sequence forms a
 128    trigraph and the trigraph should be honoured.  */
 129 static bool
 130 trigraph_p (pfile)
 131      cpp_reader *pfile;
 132 {
 133   cpp_buffer *buffer = pfile->buffer;
 134   cppchar_t from_char = buffer->cur[1];
 135   bool accept;
 136
 137   if (!_cpp_trigraph_map[from_char])
 138     return false;
 139
 140   accept = CPP_OPTION (pfile, trigraphs);
 141
 142   /* Don't warn about trigraphs in comments.  */
 143   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 144     {
 145       if (accept)
 146         cpp_error_with_line (pfile, DL_WARNING,
 147                              pfile->line, CPP_BUF_COL (buffer) - 1,
 148                              "trigraph ??%c converted to %c",
 149                              (int) from_char,
 150                              (int) _cpp_trigraph_map[from_char]);
 151       else if (buffer->cur != buffer->last_Wtrigraphs)
 152         {
 153           buffer->last_Wtrigraphs = buffer->cur;
 154           cpp_error_with_line (pfile, DL_WARNING,
 155                                pfile->line, CPP_BUF_COL (buffer) - 1,
 156                                "trigraph ??%c ignored", (int) from_char);
 157         }
 158     }
 159
 160   return accept;
 161 }
 162
 163 /* Skips any escaped newlines introduced by '?' or a '\\', assumed to
 164    lie in buffer->cur[-1].  Returns the next byte, which will be in
 165    buffer->cur[-1].  This routine performs preprocessing stages 1 and
 166    2 of the ISO C standard.  */
 167 static cppchar_t
 168 skip_escaped_newlines (pfile)
 169      cpp_reader *pfile;
 170 {
 171   cpp_buffer *buffer = pfile->buffer;
 172   cppchar_t next = buffer->cur[-1];
 173
 174   /* Only do this if we apply stages 1 and 2.  */
 175   if (!buffer->from_stage3)
 176     {
 177       const unsigned char *saved_cur;
 178       cppchar_t next1;
 179
 180       do
 181         {
 182           if (next == '?')
 183             {
 184               if (buffer->cur[0] != '?' || !trigraph_p (pfile))
 185                 break;
 186
 187               /* Translate the trigraph.  */
 188               next = _cpp_trigraph_map[buffer->cur[1]];
 189               buffer->cur += 2;
 190               if (next != '\\')
 191                 break;
 192             }
 193
 194           if (buffer->cur == buffer->rlimit)
 195             break;
 196
 197           /* We have a backslash, and room for at least one more
 198              character.  Skip horizontal whitespace.  */
 199           saved_cur = buffer->cur;
 200           do
 201             next1 = *buffer->cur++;
 202           while (is_nvspace (next1) && buffer->cur < buffer->rlimit);
 203
 204           if (!is_vspace (next1))
 205             {
 206               buffer->cur = saved_cur;
 207               break;
 208             }
 209
 210           if (saved_cur != buffer->cur - 1
 211               && !pfile->state.lexing_comment)
 212             cpp_error (pfile, DL_WARNING,
 213                        "backslash and newline separated by space");
 214
 215           handle_newline (pfile);
 216           buffer->backup_to = buffer->cur;
 217           if (buffer->cur == buffer->rlimit)
 218             {
 219               cpp_error (pfile, DL_PEDWARN,
 220                          "backslash-newline at end of file");
 221               next = EOF;
 222             }
 223           else
 224             next = *buffer->cur++;
 225         }
 226       while (next == '\\' || next == '?');
 227     }
 228
 229   return next;
 230 }
 231
 232 /* Obtain the next character, after trigraph conversion and skipping
 233    an arbitrarily long string of escaped newlines.  The common case of
 234    no trigraphs or escaped newlines falls through quickly.  On return,
 235    buffer->backup_to points to where to return to if the character is
 236    not to be processed.  */
 237 static cppchar_t
 238 get_effective_char (pfile)
 239      cpp_reader *pfile;
 240 {
 241   cppchar_t next;
 242   cpp_buffer *buffer = pfile->buffer;
 243
 244   buffer->backup_to = buffer->cur;
 245   next = *buffer->cur++;
 246   if (__builtin_expect (next == '?' || next == '\\', 0))
 247     next = skip_escaped_newlines (pfile);
 248
 249    return next;
 250 }
 251
 252 /* Skip a C-style block comment.  We find the end of the comment by
 253    seeing if an asterisk is before every '/' we encounter.  Returns
 254    non-zero if comment terminated by EOF, zero otherwise.  */
 255 static int
 256 skip_block_comment (pfile)
 257      cpp_reader *pfile;
 258 {
 259   cpp_buffer *buffer = pfile->buffer;
 260   cppchar_t c = EOF, prevc = EOF;
 261
 262   pfile->state.lexing_comment = 1;
 263   while (buffer->cur != buffer->rlimit)
 264     {
 265       prevc = c, c = *buffer->cur++;
 266
 267       /* FIXME: For speed, create a new character class of characters
 268          of interest inside block comments.  */
 269       if (c == '?' || c == '\\')
 270         c = skip_escaped_newlines (pfile);
 271
 272       /* People like decorating comments with '*', so check for '/'
 273          instead for efficiency.  */
 274       if (c == '/')
 275         {
 276           if (prevc == '*')
 277             break;
 278
 279           /* Warn about potential nested comments, but not if the '/'
 280              comes immediately before the true comment delimiter.
 281              Don't bother to get it right across escaped newlines.  */
 282           if (CPP_OPTION (pfile, warn_comments)
 283               && buffer->cur[0] == '*' && buffer->cur[1] != '/')
 284             cpp_error_with_line (pfile, DL_WARNING,
 285                                  pfile->line, CPP_BUF_COL (buffer),
 286                                  "\"/*\" within comment");
 287         }
 288       else if (is_vspace (c))
 289         handle_newline (pfile);
 290       else if (c == '\t')
 291         adjust_column (pfile);
 292     }
 293
 294   pfile->state.lexing_comment = 0;
 295   return c != '/' || prevc != '*';
 296 }
 297
 298 /* Skip a C++ line comment, leaving buffer->cur pointing to the
 299    terminating newline.  Handles escaped newlines.  Returns non-zero
 300    if a multiline comment.  */
 301 static int
 302 skip_line_comment (pfile)
 303      cpp_reader *pfile;
 304 {
 305   cpp_buffer *buffer = pfile->buffer;
 306   unsigned int orig_line = pfile->line;
 307   cppchar_t c;
 308 #ifdef MULTIBYTE_CHARS
 309   wchar_t wc;
 310   int char_len;
 311 #endif
 312
 313   pfile->state.lexing_comment = 1;
 314 #ifdef MULTIBYTE_CHARS
 315   /* Reset multibyte conversion state.  */
 316   (void) local_mbtowc (NULL, NULL, 0);
 317 #endif
 318   do
 319     {
 320       if (buffer->cur == buffer->rlimit)
 321         goto at_eof;
 322
 323 #ifdef MULTIBYTE_CHARS
 324       char_len = local_mbtowc (&wc, (const char *) buffer->cur,
 325                                buffer->rlimit - buffer->cur);
 326       if (char_len == -1)
 327         {
 328           cpp_error (pfile, DL_WARNING,
 329                      "ignoring invalid multibyte character");
 330           char_len = 1;
 331           c = *buffer->cur++;
 332         }
 333       else
 334         {
 335           buffer->cur += char_len;
 336           c = wc;
 337         }
 338 #else
 339       c = *buffer->cur++;
 340 #endif
 341       if (c == '?' || c == '\\')
 342         c = skip_escaped_newlines (pfile);
 343     }
 344   while (!is_vspace (c));
 345
 346   /* Step back over the newline, except at EOF.  */
 347   buffer->cur--;
 348  at_eof:
 349
 350   pfile->state.lexing_comment = 0;
 351   return orig_line != pfile->line;
 352 }
 353
 354 /* pfile->buffer->cur is one beyond the \t character.  Update
 355    col_adjust so we track the column correctly.  */
 356 static void
 357 adjust_column (pfile)
 358      cpp_reader *pfile;
 359 {
 360   cpp_buffer *buffer = pfile->buffer;
 361   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 362
 363   /* Round it up to multiple of the tabstop, but subtract 1 since the
 364      tab itself occupies a character position.  */
 365   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 366                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 367 }
 368
 369 /* Skips whitespace, saving the next non-whitespace character.
 370    Adjusts pfile->col_adjust to account for tabs.  Without this,
 371    tokens might be assigned an incorrect column.  */
 372 static int
 373 skip_whitespace (pfile, c)
 374      cpp_reader *pfile;
 375      cppchar_t c;
 376 {
 377   cpp_buffer *buffer = pfile->buffer;
 378   unsigned int warned = 0;
 379
 380   do
 381     {
 382       /* Horizontal space always OK.  */
 383       if (c == ' ')
 384         ;
 385       else if (c == '\t')
 386         adjust_column (pfile);
 387       /* Just \f \v or \0 left.  */
 388       else if (c == '\0')
 389         {
 390           if (buffer->cur - 1 == buffer->rlimit)
 391             return 0;
 392           if (!warned)
 393             {
 394               cpp_error (pfile, DL_WARNING, "null character(s) ignored");
 395               warned = 1;
 396             }
 397         }
 398       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 399         cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
 400                              CPP_BUF_COL (buffer),
 401                              "%s in preprocessing directive",
 402                              c == '\f' ? "form feed" : "vertical tab");
 403
 404       c = *buffer->cur++;
 405     }
 406   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 407   while (is_nvspace (c));
 408
 409   buffer->cur--;
 410   return 1;
 411 }
 412
 413 /* See if the characters of a number token are valid in a name (no
 414    '.', '+' or '-').  */
 415 static int
 416 name_p (pfile, string)
 417      cpp_reader *pfile;
 418      const cpp_string *string;
 419 {
 420   unsigned int i;
 421
 422   for (i = 0; i < string->len; i++)
 423     if (!is_idchar (string->text[i]))
 424       return 0;
 425
 426   return 1;
 427 }
 428
 429 /* Parse an identifier, skipping embedded backslash-newlines.  This is
 430    a critical inner loop.  The common case is an identifier which has
 431    not been split by backslash-newline, does not contain a dollar
 432    sign, and has already been scanned (roughly 10:1 ratio of
 433    seen:unseen identifiers in normal code; the distribution is
 434    Poisson-like).  Second most common case is a new identifier, not
 435    split and no dollar sign.  The other possibilities are rare and
 436    have been relegated to parse_slow.  */
 437 static cpp_hashnode *
 438 parse_identifier (pfile)
 439      cpp_reader *pfile;
 440 {
 441   cpp_hashnode *result;
 442   const uchar *cur, *base;
 443
 444   /* Fast-path loop.  Skim over a normal identifier.
 445      N.B. ISIDNUM does not include $.  */
 446   cur = pfile->buffer->cur;
 447   while (ISIDNUM (*cur))
 448     cur++;
 449
 450   /* Check for slow-path cases.  */
 451   if (*cur == '?' || *cur == '\\' || *cur == '$')
 452     {
 453       unsigned int len;
 454
 455       base = parse_slow (pfile, cur, 0, &len);
 456       result = (cpp_hashnode *)
 457         ht_lookup (pfile->hash_table, base, len, HT_ALLOCED);
 458     }
 459   else
 460     {
 461       base = pfile->buffer->cur - 1;
 462       pfile->buffer->cur = cur;
 463       result = (cpp_hashnode *)
 464         ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
 465     }
 466
 467   /* Rarely, identifiers require diagnostics when lexed.
 468      XXX Has to be forced out of the fast path.  */
 469   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 470                         && !pfile->state.skipping, 0))
 471     {
 472       /* It is allowed to poison the same identifier twice.  */
 473       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 474         cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
 475                    NODE_NAME (result));
 476
 477       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 478          replacement list of a variadic macro.  */
 479       if (result == pfile->spec_nodes.n__VA_ARGS__
 480           && !pfile->state.va_args_ok)
 481         cpp_error (pfile, DL_PEDWARN,
 482         "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 483     }
 484
 485   return result;
 486 }
 487
 488 /* Slow path.  This handles numbers and identifiers which have been
 489    split, or contain dollar signs.  The part of the token from
 490    PFILE->buffer->cur-1 to CUR has already been scanned.  NUMBER_P is
 491    1 if it's a number, and 2 if it has a leading period.  Returns a
 492    pointer to the token's NUL-terminated spelling in permanent
 493    storage, and sets PLEN to its length.  */
 494 static uchar *
 495 parse_slow (pfile, cur, number_p, plen)
 496      cpp_reader *pfile;
 497      const uchar *cur;
 498      int number_p;
 499      unsigned int *plen;
 500 {
 501   cpp_buffer *buffer = pfile->buffer;
 502   const uchar *base = buffer->cur - 1;
 503   struct obstack *stack = &pfile->hash_table->stack;
 504   unsigned int c, prevc, saw_dollar = 0;
 505
 506   /* Place any leading period.  */
 507   if (number_p == 2)
 508     obstack_1grow (stack, '.');
 509
 510   /* Copy the part of the token which is known to be okay.  */
 511   obstack_grow (stack, base, cur - base);
 512
 513   /* Now process the part which isn't.  We are looking at one of
 514      '$', '\\', or '?' on entry to this loop.  */
 515   prevc = cur[-1];
 516   c = *cur++;
 517   buffer->cur = cur;
 518   for (;;)
 519     {
 520       /* Potential escaped newline?  */
 521       buffer->backup_to = buffer->cur - 1;
 522       if (c == '?' || c == '\\')
 523         c = skip_escaped_newlines (pfile);
 524
 525       if (!is_idchar (c))
 526         {
 527           if (!number_p)
 528             break;
 529           if (c != '.' && !VALID_SIGN (c, prevc))
 530             break;
 531         }
 532
 533       /* Handle normal identifier characters in this loop.  */
 534       do
 535         {
 536           prevc = c;
 537           obstack_1grow (stack, c);
 538
 539           if (c == '$')
 540             saw_dollar++;
 541
 542           c = *buffer->cur++;
 543         }
 544       while (is_idchar (c));
 545     }
 546
 547   /* Step back over the unwanted char.  */
 548   BACKUP ();
 549
 550   /* $ is not an identifier character in the standard, but is commonly
 551      accepted as an extension.  Don't warn about it in skipped
 552      conditional blocks.  */
 553   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
 554     cpp_error (pfile, DL_PEDWARN, "'$' character(s) in identifier or number");
 555
 556   /* Identifiers and numbers are null-terminated.  */
 557   *plen = obstack_object_size (stack);
 558   obstack_1grow (stack, '\0');
 559   return obstack_finish (stack);
 560 }
 561
 562 /* Parse a number, beginning with character C, skipping embedded
 563    backslash-newlines.  LEADING_PERIOD is non-zero if there was a "."
 564    before C.  Place the result in NUMBER.  */
 565 static void
 566 parse_number (pfile, number, leading_period)
 567      cpp_reader *pfile;
 568      cpp_string *number;
 569      int leading_period;
 570 {
 571   const uchar *cur;
 572
 573   /* Fast-path loop.  Skim over a normal number.
 574      N.B. ISIDNUM does not include $.  */
 575   cur = pfile->buffer->cur;
 576   while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
 577     cur++;
 578
 579   /* Check for slow-path cases.  */
 580   if (*cur == '?' || *cur == '\\' || *cur == '$')
 581     number->text = parse_slow (pfile, cur, 1 + leading_period, &number->len);
 582   else
 583     {
 584       const uchar *base = pfile->buffer->cur - 1;
 585       uchar *dest;
 586
 587       number->len = cur - base + leading_period;
 588       dest = _cpp_unaligned_alloc (pfile, number->len + 1);
 589       dest[number->len] = '\0';
 590       number->text = dest;
 591
 592       if (leading_period)
 593         *dest++ = '.';
 594       memcpy (dest, base, cur - base);
 595       pfile->buffer->cur = cur;
 596     }
 597 }
 598
 599 /* Subroutine of parse_string.  */
 600 static int
 601 unescaped_terminator_p (pfile, dest)
 602      cpp_reader *pfile;
 603      const unsigned char *dest;
 604 {
 605   const unsigned char *start, *temp;
 606
 607   /* In #include-style directives, terminators are not escapeable.  */
 608   if (pfile->state.angled_headers)
 609     return 1;
 610
 611   start = BUFF_FRONT (pfile->u_buff);
 612
 613   /* An odd number of consecutive backslashes represents an escaped
 614      terminator.  */
 615   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 616     ;
 617
 618   return ((dest - temp) & 1) == 0;
 619 }
 620
 621 /* Parses a string, character constant, or angle-bracketed header file
 622    name.  Handles embedded trigraphs and escaped newlines.  The stored
 623    string is guaranteed NUL-terminated, but it is not guaranteed that
 624    this is the first NUL since embedded NULs are preserved.
 625
 626    When this function returns, buffer->cur points to the next
 627    character to be processed.  */
 628 static void
 629 parse_string (pfile, token, terminator)
 630      cpp_reader *pfile;
 631      cpp_token *token;
 632      cppchar_t terminator;
 633 {
 634   cpp_buffer *buffer = pfile->buffer;
 635   unsigned char *dest, *limit;
 636   cppchar_t c;
 637   bool warned_nulls = false;
 638 #ifdef MULTIBYTE_CHARS
 639   wchar_t wc;
 640   int char_len;
 641 #endif
 642
 643   dest = BUFF_FRONT (pfile->u_buff);
 644   limit = BUFF_LIMIT (pfile->u_buff);
 645
 646 #ifdef MULTIBYTE_CHARS
 647   /* Reset multibyte conversion state.  */
 648   (void) local_mbtowc (NULL, NULL, 0);
 649 #endif
 650   for (;;)
 651     {
 652       /* We need room for another char, possibly the terminating NUL.  */
 653       if ((size_t) (limit - dest) < 1)
 654         {
 655           size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
 656           _cpp_extend_buff (pfile, &pfile->u_buff, 2);
 657           dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
 658           limit = BUFF_LIMIT (pfile->u_buff);
 659         }
 660
 661 #ifdef MULTIBYTE_CHARS
 662       char_len = local_mbtowc (&wc, (const char *) buffer->cur,
 663                                buffer->rlimit - buffer->cur);
 664       if (char_len == -1)
 665         {
 666           cpp_error (pfile, DL_WARNING,
 667                        "ignoring invalid multibyte character");
 668           char_len = 1;
 669           c = *buffer->cur++;
 670         }
 671       else
 672         {
 673           buffer->cur += char_len;
 674           c = wc;
 675         }
 676 #else
 677       c = *buffer->cur++;
 678 #endif
 679
 680       /* Handle trigraphs, escaped newlines etc.  */
 681       if (c == '?' || c == '\\')
 682         c = skip_escaped_newlines (pfile);
 683
 684       if (c == terminator)
 685         {
 686           if (unescaped_terminator_p (pfile, dest))
 687             break;
 688         }
 689       else if (is_vspace (c))
 690         {
 691           /* No string literal may extend over multiple lines.  In
 692              assembly language, suppress the error except for <>
 693              includes.  This is a kludge around not knowing where
 694              comments are.  */
 695         unterminated:
 696           if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>')
 697             cpp_error (pfile, DL_ERROR, "missing terminating %c character",
 698                        (int) terminator);
 699           buffer->cur--;
 700           break;
 701         }
 702       else if (c == '\0')
 703         {
 704           if (buffer->cur - 1 == buffer->rlimit)
 705             goto unterminated;
 706           if (!warned_nulls)
 707             {
 708               warned_nulls = true;
 709               cpp_error (pfile, DL_WARNING,
 710                          "null character(s) preserved in literal");
 711             }
 712         }
 713 #ifdef MULTIBYTE_CHARS
 714       if (char_len > 1)
 715         {
 716           for ( ; char_len > 0; --char_len)
 717             *dest++ = (*buffer->cur - char_len);
 718         }
 719       else
 720 #endif
 721         *dest++ = c;
 722     }
 723
 724   *dest = '\0';
 725
 726   token->val.str.text = BUFF_FRONT (pfile->u_buff);
 727   token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
 728   BUFF_FRONT (pfile->u_buff) = dest + 1;
 729 }
 730
 731 /* The stored comment includes the comment start and any terminator.  */
 732 static void
 733 save_comment (pfile, token, from, type)
 734      cpp_reader *pfile;
 735      cpp_token *token;
 736      const unsigned char *from;
 737      cppchar_t type;
 738 {
 739   unsigned char *buffer;
 740   unsigned int len, clen;
 741
 742   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 743
 744   /* C++ comments probably (not definitely) have moved past a new
 745      line, which we don't want to save in the comment.  */
 746   if (is_vspace (pfile->buffer->cur[-1]))
 747     len--;
 748
 749   /* If we are currently in a directive, then we need to store all
 750      C++ comments as C comments internally, and so we need to
 751      allocate a little extra space in that case.
 752
 753      Note that the only time we encounter a directive here is
 754      when we are saving comments in a "#define".  */
 755   clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
 756
 757   buffer = _cpp_unaligned_alloc (pfile, clen);
 758
 759   token->type = CPP_COMMENT;
 760   token->val.str.len = clen;
 761   token->val.str.text = buffer;
 762
 763   buffer[0] = '/';
 764   memcpy (buffer + 1, from, len - 1);
 765
 766   /* Finish conversion to a C comment, if necessary.  */
 767   if (pfile->state.in_directive && type == '/')
 768     {
 769       buffer[1] = '*';
 770       buffer[clen - 2] = '*';
 771       buffer[clen - 1] = '/';
 772     }
 773 }
 774
 775 /* Allocate COUNT tokens for RUN.  */
 776 void
 777 _cpp_init_tokenrun (run, count)
 778      tokenrun *run;
 779      unsigned int count;
 780 {
 781   run->base = xnewvec (cpp_token, count);
 782   run->limit = run->base + count;
 783   run->next = NULL;
 784 }
 785
 786 /* Returns the next tokenrun, or creates one if there is none.  */
 787 static tokenrun *
 788 next_tokenrun (run)
 789      tokenrun *run;
 790 {
 791   if (run->next == NULL)
 792     {
 793       run->next = xnew (tokenrun);
 794       run->next->prev = run;
 795       _cpp_init_tokenrun (run->next, 250);
 796     }
 797
 798   return run->next;
 799 }
 800
 801 /* Allocate a single token that is invalidated at the same time as the
 802    rest of the tokens on the line.  Has its line and col set to the
 803    same as the last lexed token, so that diagnostics appear in the
 804    right place.  */
 805 cpp_token *
 806 _cpp_temp_token (pfile)
 807      cpp_reader *pfile;
 808 {
 809   cpp_token *old, *result;
 810
 811   old = pfile->cur_token - 1;
 812   if (pfile->cur_token == pfile->cur_run->limit)
 813     {
 814       pfile->cur_run = next_tokenrun (pfile->cur_run);
 815       pfile->cur_token = pfile->cur_run->base;
 816     }
 817
 818   result = pfile->cur_token++;
 819   result->line = old->line;
 820   result->col = old->col;
 821   return result;
 822 }
 823
 824 /* Lex a token into RESULT (external interface).  Takes care of issues
 825    like directive handling, token lookahead, multiple include
 826    optimization and skipping.  */
 827 const cpp_token *
 828 _cpp_lex_token (pfile)
 829      cpp_reader *pfile;
 830 {
 831   cpp_token *result;
 832
 833   for (;;)
 834     {
 835       if (pfile->cur_token == pfile->cur_run->limit)
 836         {
 837           pfile->cur_run = next_tokenrun (pfile->cur_run);
 838           pfile->cur_token = pfile->cur_run->base;
 839         }
 840
 841       if (pfile->lookaheads)
 842         {
 843           pfile->lookaheads--;
 844           result = pfile->cur_token++;
 845         }
 846       else
 847         result = _cpp_lex_direct (pfile);
 848
 849       if (result->flags & BOL)
 850         {
 851           /* Is this a directive.  If _cpp_handle_directive returns
 852              false, it is an assembler #.  */
 853           if (result->type == CPP_HASH
 854               /* 6.10.3 p 11: Directives in a list of macro arguments
 855                  gives undefined behavior.  This implementation
 856                  handles the directive as normal.  */
 857               && pfile->state.parsing_args != 1
 858               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
 859             continue;
 860           if (pfile->cb.line_change && !pfile->state.skipping)
 861             (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
 862         }
 863
 864       /* We don't skip tokens in directives.  */
 865       if (pfile->state.in_directive)
 866         break;
 867
 868       /* Outside a directive, invalidate controlling macros.  At file
 869          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
 870          get here and MI optimisation works.  */
 871       pfile->mi_valid = false;
 872
 873       if (!pfile->state.skipping || result->type == CPP_EOF)
 874         break;
 875     }
 876
 877   return result;
 878 }
 879
 880 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)  \
 881   do {                                          \
 882     if (get_effective_char (pfile) == CHAR)     \
 883       result->type = THEN_TYPE;                 \
 884     else                                        \
 885       {                                         \
 886         BACKUP ();                              \
 887         result->type = ELSE_TYPE;               \
 888       }                                         \
 889   } while (0)
 890
 891 /* Lex a token into pfile->cur_token, which is also incremented, to
 892    get diagnostics pointing to the correct location.
 893
 894    Does not handle issues such as token lookahead, multiple-include
 895    optimisation, directives, skipping etc.  This function is only
 896    suitable for use by _cpp_lex_token, and in special cases like
 897    lex_expansion_token which doesn't care for any of these issues.
 898
 899    When meeting a newline, returns CPP_EOF if parsing a directive,
 900    otherwise returns to the start of the token buffer if permissible.
 901    Returns the location of the lexed token.  */
 902 cpp_token *
 903 _cpp_lex_direct (pfile)
 904      cpp_reader *pfile;
 905 {
 906   cppchar_t c;
 907   cpp_buffer *buffer;
 908   const unsigned char *comment_start;
 909   cpp_token *result = pfile->cur_token++;
 910
 911  fresh_line:
 912   buffer = pfile->buffer;
 913   result->flags = buffer->saved_flags;
 914   buffer->saved_flags = 0;
 915  update_tokens_line:
 916   result->line = pfile->line;
 917
 918  skipped_white:
 919   c = *buffer->cur++;
 920   result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
 921
 922  trigraph:
 923   switch (c)
 924     {
 925     case ' ': case '\t': case '\f': case '\v': case '\0':
 926       result->flags |= PREV_WHITE;
 927       if (skip_whitespace (pfile, c))
 928         goto skipped_white;
 929
 930       /* EOF.  */
 931       buffer->cur--;
 932       buffer->saved_flags = BOL;
 933       if (!pfile->state.parsing_args && !pfile->state.in_directive)
 934         {
 935           if (buffer->cur != buffer->line_base)
 936             {
 937               /* Non-empty files should end in a newline.  Don't warn
 938                  for command line and _Pragma buffers.  */
 939               if (!buffer->from_stage3)
 940                 cpp_error (pfile, DL_PEDWARN, "no newline at end of file");
 941               handle_newline (pfile);
 942             }
 943
 944           /* Don't pop the last buffer.  */
 945           if (buffer->prev)
 946             {
 947               unsigned char stop = buffer->return_at_eof;
 948
 949               _cpp_pop_buffer (pfile);
 950               if (!stop)
 951                 goto fresh_line;
 952             }
 953         }
 954       result->type = CPP_EOF;
 955       break;
 956
 957     case '\n': case '\r':
 958       handle_newline (pfile);
 959       buffer->saved_flags = BOL;
 960       if (! pfile->state.in_directive)
 961         {
 962           if (pfile->state.parsing_args == 2)
 963             buffer->saved_flags |= PREV_WHITE;
 964           if (!pfile->keep_tokens)
 965             {
 966               pfile->cur_run = &pfile->base_run;
 967               result = pfile->base_run.base;
 968               pfile->cur_token = result + 1;
 969             }
 970           goto fresh_line;
 971         }
 972       result->type = CPP_EOF;
 973       break;
 974
 975     case '?':
 976     case '\\':
 977       /* These could start an escaped newline, or '?' a trigraph.  Let
 978          skip_escaped_newlines do all the work.  */
 979       {
 980         unsigned int line = pfile->line;
 981
 982         c = skip_escaped_newlines (pfile);
 983         if (line != pfile->line)
 984           {
 985             buffer->cur--;
 986             /* We had at least one escaped newline of some sort.
 987                Update the token's line and column.  */
 988             goto update_tokens_line;
 989           }
 990       }
 991
 992       /* We are either the original '?' or '\\', or a trigraph.  */
 993       if (c == '?')
 994         result->type = CPP_QUERY;
 995       else if (c == '\\')
 996         goto random_char;
 997       else
 998         goto trigraph;
 999       break;
1000
1001     case '0': case '1': case '2': case '3': case '4':
1002     case '5': case '6': case '7': case '8': case '9':
1003       result->type = CPP_NUMBER;
1004       parse_number (pfile, &result->val.str, 0);
1005       break;
1006
1007     case 'L':
1008       /* 'L' may introduce wide characters or strings.  */
1009         {
1010           const unsigned char *pos = buffer->cur;
1011
1012           c = get_effective_char (pfile);
1013           if (c == '\'' || c == '"')
1014             {
1015               result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1016               parse_string (pfile, result, c);
1017               break;
1018             }
1019           buffer->cur = pos;
1020         }
1021         /* Fall through.  */
1022
1023     start_ident:
1024     case '_':
1025     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1026     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1027     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1028     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1029     case 'y': case 'z':
1030     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1031     case 'G': case 'H': case 'I': case 'J': case 'K':
1032     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1033     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1034     case 'Y': case 'Z':
1035       result->type = CPP_NAME;
1036       result->val.node = parse_identifier (pfile);
1037
1038       /* Convert named operators to their proper types.  */
1039       if (result->val.node->flags & NODE_OPERATOR)
1040         {
1041           result->flags |= NAMED_OP;
1042           result->type = result->val.node->value.operator;
1043         }
1044       break;
1045
1046     case '\'':
1047     case '"':
1048       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1049       parse_string (pfile, result, c);
1050       break;
1051
1052     case '/':
1053       /* A potential block or line comment.  */
1054       comment_start = buffer->cur;
1055       c = get_effective_char (pfile);
1056
1057       if (c == '*')
1058         {
1059           if (skip_block_comment (pfile))
1060             cpp_error (pfile, DL_ERROR, "unterminated comment");
1061         }
1062       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1063                             || CPP_IN_SYSTEM_HEADER (pfile)))
1064         {
1065           /* Warn about comments only if pedantically GNUC89, and not
1066              in system headers.  */
1067           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1068               && ! buffer->warned_cplusplus_comments)
1069             {
1070               cpp_error (pfile, DL_PEDWARN,
1071                            "C++ style comments are not allowed in ISO C89");
1072               cpp_error (pfile, DL_PEDWARN,
1073                          "(this will be reported only once per input file)");
1074               buffer->warned_cplusplus_comments = 1;
1075             }
1076
1077           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1078             cpp_error (pfile, DL_WARNING, "multi-line comment");
1079         }
1080       else if (c == '=')
1081         {
1082           result->type = CPP_DIV_EQ;
1083           break;
1084         }
1085       else
1086         {
1087           BACKUP ();
1088           result->type = CPP_DIV;
1089           break;
1090         }
1091
1092       if (!pfile->state.save_comments)
1093         {
1094           result->flags |= PREV_WHITE;
1095           goto update_tokens_line;
1096         }
1097
1098       /* Save the comment as a token in its own right.  */
1099       save_comment (pfile, result, comment_start, c);
1100       break;
1101
1102     case '<':
1103       if (pfile->state.angled_headers)
1104         {
1105           result->type = CPP_HEADER_NAME;
1106           parse_string (pfile, result, '>');
1107           break;
1108         }
1109
1110       c = get_effective_char (pfile);
1111       if (c == '=')
1112         result->type = CPP_LESS_EQ;
1113       else if (c == '<')
1114         IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1115       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1116         IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1117       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1118         {
1119           result->type = CPP_OPEN_SQUARE;
1120           result->flags |= DIGRAPH;
1121         }
1122       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1123         {
1124           result->type = CPP_OPEN_BRACE;
1125           result->flags |= DIGRAPH;
1126         }
1127       else
1128         {
1129           BACKUP ();
1130           result->type = CPP_LESS;
1131         }
1132       break;
1133
1134     case '>':
1135       c = get_effective_char (pfile);
1136       if (c == '=')
1137         result->type = CPP_GREATER_EQ;
1138       else if (c == '>')
1139         IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1140       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1141         IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1142       else
1143         {
1144           BACKUP ();
1145           result->type = CPP_GREATER;
1146         }
1147       break;
1148
1149     case '%':
1150       c = get_effective_char (pfile);
1151       if (c == '=')
1152         result->type = CPP_MOD_EQ;
1153       else if (CPP_OPTION (pfile, digraphs) && c == ':')
1154         {
1155           result->flags |= DIGRAPH;
1156           result->type = CPP_HASH;
1157           if (get_effective_char (pfile) == '%')
1158             {
1159               const unsigned char *pos = buffer->cur;
1160
1161               if (get_effective_char (pfile) == ':')
1162                 result->type = CPP_PASTE;
1163               else
1164                 buffer->cur = pos - 1;
1165             }
1166           else
1167             BACKUP ();
1168         }
1169       else if (CPP_OPTION (pfile, digraphs) && c == '>')
1170         {
1171           result->flags |= DIGRAPH;
1172           result->type = CPP_CLOSE_BRACE;
1173         }
1174       else
1175         {
1176           BACKUP ();
1177           result->type = CPP_MOD;
1178         }
1179       break;
1180
1181     case '.':
1182       result->type = CPP_DOT;
1183       c = get_effective_char (pfile);
1184       if (c == '.')
1185         {
1186           const unsigned char *pos = buffer->cur;
1187
1188           if (get_effective_char (pfile) == '.')
1189             result->type = CPP_ELLIPSIS;
1190           else
1191             buffer->cur = pos - 1;
1192         }
1193       /* All known character sets have 0...9 contiguous.  */
1194       else if (ISDIGIT (c))
1195         {
1196           result->type = CPP_NUMBER;
1197           parse_number (pfile, &result->val.str, 1);
1198         }
1199       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1200         result->type = CPP_DOT_STAR;
1201       else
1202         BACKUP ();
1203       break;
1204
1205     case '+':
1206       c = get_effective_char (pfile);
1207       if (c == '+')
1208         result->type = CPP_PLUS_PLUS;
1209       else if (c == '=')
1210         result->type = CPP_PLUS_EQ;
1211       else
1212         {
1213           BACKUP ();
1214           result->type = CPP_PLUS;
1215         }
1216       break;
1217
1218     case '-':
1219       c = get_effective_char (pfile);
1220       if (c == '>')
1221         {
1222           result->type = CPP_DEREF;
1223           if (CPP_OPTION (pfile, cplusplus))
1224             {
1225               if (get_effective_char (pfile) == '*')
1226                 result->type = CPP_DEREF_STAR;
1227               else
1228                 BACKUP ();
1229             }
1230         }
1231       else if (c == '-')
1232         result->type = CPP_MINUS_MINUS;
1233       else if (c == '=')
1234         result->type = CPP_MINUS_EQ;
1235       else
1236         {
1237           BACKUP ();
1238           result->type = CPP_MINUS;
1239         }
1240       break;
1241
1242     case '&':
1243       c = get_effective_char (pfile);
1244       if (c == '&')
1245         result->type = CPP_AND_AND;
1246       else if (c == '=')
1247         result->type = CPP_AND_EQ;
1248       else
1249         {
1250           BACKUP ();
1251           result->type = CPP_AND;
1252         }
1253       break;
1254
1255     case '|':
1256       c = get_effective_char (pfile);
1257       if (c == '|')
1258         result->type = CPP_OR_OR;
1259       else if (c == '=')
1260         result->type = CPP_OR_EQ;
1261       else
1262         {
1263           BACKUP ();
1264           result->type = CPP_OR;
1265         }
1266       break;
1267
1268     case ':':
1269       c = get_effective_char (pfile);
1270       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1271         result->type = CPP_SCOPE;
1272       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1273         {
1274           result->flags |= DIGRAPH;
1275           result->type = CPP_CLOSE_SQUARE;
1276         }
1277       else
1278         {
1279           BACKUP ();
1280           result->type = CPP_COLON;
1281         }
1282       break;
1283
1284     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1285     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1286     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1287     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1288     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1289
1290     case '~': result->type = CPP_COMPL; break;
1291     case ',': result->type = CPP_COMMA; break;
1292     case '(': result->type = CPP_OPEN_PAREN; break;
1293     case ')': result->type = CPP_CLOSE_PAREN; break;
1294     case '[': result->type = CPP_OPEN_SQUARE; break;
1295     case ']': result->type = CPP_CLOSE_SQUARE; break;
1296     case '{': result->type = CPP_OPEN_BRACE; break;
1297     case '}': result->type = CPP_CLOSE_BRACE; break;
1298     case ';': result->type = CPP_SEMICOLON; break;
1299
1300       /* @ is a punctuator in Objective C.  */
1301     case '@': result->type = CPP_ATSIGN; break;
1302
1303     case '$':
1304       if (CPP_OPTION (pfile, dollars_in_ident))
1305         goto start_ident;
1306       /* Fall through...  */
1307
1308     random_char:
1309     default:
1310       result->type = CPP_OTHER;
1311       result->val.c = c;
1312       break;
1313     }
1314
1315   return result;
1316 }
1317
1318 /* An upper bound on the number of bytes needed to spell TOKEN,
1319    including preceding whitespace.  */
1320 unsigned int
1321 cpp_token_len (token)
1322      const cpp_token *token;
1323 {
1324   unsigned int len;
1325
1326   switch (TOKEN_SPELL (token))
1327     {
1328     default:            len = 0;                                break;
1329     case SPELL_NUMBER:
1330     case SPELL_STRING:  len = token->val.str.len;               break;
1331     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1332     }
1333   /* 1 for whitespace, 4 for comment delimiters.  */
1334   return len + 5;
1335 }
1336
1337 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1338    already contain the enough space to hold the token's spelling.
1339    Returns a pointer to the character after the last character
1340    written.  */
1341 unsigned char *
1342 cpp_spell_token (pfile, token, buffer)
1343      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1344      const cpp_token *token;
1345      unsigned char *buffer;
1346 {
1347   switch (TOKEN_SPELL (token))
1348     {
1349     case SPELL_OPERATOR:
1350       {
1351         const unsigned char *spelling;
1352         unsigned char c;
1353
1354         if (token->flags & DIGRAPH)
1355           spelling
1356             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1357         else if (token->flags & NAMED_OP)
1358           goto spell_ident;
1359         else
1360           spelling = TOKEN_NAME (token);
1361
1362         while ((c = *spelling++) != '\0')
1363           *buffer++ = c;
1364       }
1365       break;
1366
1367     case SPELL_CHAR:
1368       *buffer++ = token->val.c;
1369       break;
1370
1371     spell_ident:
1372     case SPELL_IDENT:
1373       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1374       buffer += NODE_LEN (token->val.node);
1375       break;
1376
1377     case SPELL_NUMBER:
1378       memcpy (buffer, token->val.str.text, token->val.str.len);
1379       buffer += token->val.str.len;
1380       break;
1381
1382     case SPELL_STRING:
1383       {
1384         int left, right, tag;
1385         switch (token->type)
1386           {
1387           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1388           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1389           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1390           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1391           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1392           default:
1393             cpp_error (pfile, DL_ICE, "unknown string token %s\n",
1394                        TOKEN_NAME (token));
1395             return buffer;
1396           }
1397         if (tag) *buffer++ = tag;
1398         *buffer++ = left;
1399         memcpy (buffer, token->val.str.text, token->val.str.len);
1400         buffer += token->val.str.len;
1401         *buffer++ = right;
1402       }
1403       break;
1404
1405     case SPELL_NONE:
1406       cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
1407       break;
1408     }
1409
1410   return buffer;
1411 }
1412
1413 /* Returns TOKEN spelt as a null-terminated string.  The string is
1414    freed when the reader is destroyed.  Useful for diagnostics.  */
1415 unsigned char *
1416 cpp_token_as_text (pfile, token)
1417      cpp_reader *pfile;
1418      const cpp_token *token;
1419 {
1420   unsigned int len = cpp_token_len (token);
1421   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1422
1423   end = cpp_spell_token (pfile, token, start);
1424   end[0] = '\0';
1425
1426   return start;
1427 }
1428
1429 /* Used by C front ends, which really should move to using
1430    cpp_token_as_text.  */
1431 const char *
1432 cpp_type2name (type)
1433      enum cpp_ttype type;
1434 {
1435   return (const char *) token_spellings[type].name;
1436 }
1437
1438 /* Writes the spelling of token to FP, without any preceding space.
1439    Separated from cpp_spell_token for efficiency - to avoid stdio
1440    double-buffering.  */
1441 void
1442 cpp_output_token (token, fp)
1443      const cpp_token *token;
1444      FILE *fp;
1445 {
1446   switch (TOKEN_SPELL (token))
1447     {
1448     case SPELL_OPERATOR:
1449       {
1450         const unsigned char *spelling;
1451         int c;
1452
1453         if (token->flags & DIGRAPH)
1454           spelling
1455             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1456         else if (token->flags & NAMED_OP)
1457           goto spell_ident;
1458         else
1459           spelling = TOKEN_NAME (token);
1460
1461         c = *spelling;
1462         do
1463           putc (c, fp);
1464         while ((c = *++spelling) != '\0');
1465       }
1466       break;
1467
1468     case SPELL_CHAR:
1469       putc (token->val.c, fp);
1470       break;
1471
1472     spell_ident:
1473     case SPELL_IDENT:
1474       fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1475     break;
1476
1477     case SPELL_NUMBER:
1478       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1479       break;
1480
1481     case SPELL_STRING:
1482       {
1483         int left, right, tag;
1484         switch (token->type)
1485           {
1486           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1487           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1488           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1489           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1490           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1491           default:
1492             fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1493             return;
1494           }
1495         if (tag) putc (tag, fp);
1496         putc (left, fp);
1497         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1498         putc (right, fp);
1499       }
1500       break;
1501
1502     case SPELL_NONE:
1503       /* An error, most probably.  */
1504       break;
1505     }
1506 }
1507
1508 /* Compare two tokens.  */
1509 int
1510 _cpp_equiv_tokens (a, b)
1511      const cpp_token *a, *b;
1512 {
1513   if (a->type == b->type && a->flags == b->flags)
1514     switch (TOKEN_SPELL (a))
1515       {
1516       default:                  /* Keep compiler happy.  */
1517       case SPELL_OPERATOR:
1518         return 1;
1519       case SPELL_CHAR:
1520         return a->val.c == b->val.c; /* Character.  */
1521       case SPELL_NONE:
1522         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1523       case SPELL_IDENT:
1524         return a->val.node == b->val.node;
1525       case SPELL_NUMBER:
1526       case SPELL_STRING:
1527         return (a->val.str.len == b->val.str.len
1528                 && !memcmp (a->val.str.text, b->val.str.text,
1529                             a->val.str.len));
1530       }
1531
1532   return 0;
1533 }
1534
1535 /* Returns nonzero if a space should be inserted to avoid an
1536    accidental token paste for output.  For simplicity, it is
1537    conservative, and occasionally advises a space where one is not
1538    needed, e.g. "." and ".2".  */
1539 int
1540 cpp_avoid_paste (pfile, token1, token2)
1541      cpp_reader *pfile;
1542      const cpp_token *token1, *token2;
1543 {
1544   enum cpp_ttype a = token1->type, b = token2->type;
1545   cppchar_t c;
1546
1547   if (token1->flags & NAMED_OP)
1548     a = CPP_NAME;
1549   if (token2->flags & NAMED_OP)
1550     b = CPP_NAME;
1551
1552   c = EOF;
1553   if (token2->flags & DIGRAPH)
1554     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1555   else if (token_spellings[b].category == SPELL_OPERATOR)
1556     c = token_spellings[b].name[0];
1557
1558   /* Quickly get everything that can paste with an '='.  */
1559   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1560     return 1;
1561
1562   switch (a)
1563     {
1564     case CPP_GREATER:   return c == '>' || c == '?';
1565     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1566     case CPP_PLUS:      return c == '+';
1567     case CPP_MINUS:     return c == '-' || c == '>';
1568     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1569     case CPP_MOD:       return c == ':' || c == '>';
1570     case CPP_AND:       return c == '&';
1571     case CPP_OR:        return c == '|';
1572     case CPP_COLON:     return c == ':' || c == '>';
1573     case CPP_DEREF:     return c == '*';
1574     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1575     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1576     case CPP_NAME:      return ((b == CPP_NUMBER
1577                                  && name_p (pfile, &token2->val.str))
1578                                 || b == CPP_NAME
1579                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1580     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1581                                 || c == '.' || c == '+' || c == '-');
1582     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1583                                 && token1->val.c == '@'
1584                                 && (b == CPP_NAME || b == CPP_STRING));
1585     default:            break;
1586     }
1587
1588   return 0;
1589 }
1590
1591 /* Output all the remaining tokens on the current line, and a newline
1592    character, to FP.  Leading whitespace is removed.  If there are
1593    macros, special token padding is not performed.  */
1594 void
1595 cpp_output_line (pfile, fp)
1596      cpp_reader *pfile;
1597      FILE *fp;
1598 {
1599   const cpp_token *token;
1600
1601   token = cpp_get_token (pfile);
1602   while (token->type != CPP_EOF)
1603     {
1604       cpp_output_token (token, fp);
1605       token = cpp_get_token (pfile);
1606       if (token->flags & PREV_WHITE)
1607         putc (' ', fp);
1608     }
1609
1610   putc ('\n', fp);
1611 }
1612
1613 /* Returns the value of a hexadecimal digit.  */
1614 static unsigned int
1615 hex_digit_value (c)
1616      unsigned int c;
1617 {
1618   if (hex_p (c))
1619     return hex_value (c);
1620   else
1621     abort ();
1622 }
1623
1624 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.  Returns 1 to indicate
1625    failure if cpplib is not parsing C++ or C99.  Such failure is
1626    silent, and no variables are updated.  Otherwise returns 0, and
1627    warns if -Wtraditional.
1628
1629    [lex.charset]: The character designated by the universal character
1630    name \UNNNNNNNN is that character whose character short name in
1631    ISO/IEC 10646 is NNNNNNNN; the character designated by the
1632    universal character name \uNNNN is that character whose character
1633    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
1634    for a universal character name is less than 0x20 or in the range
1635    0x7F-0x9F (inclusive), or if the universal character name
1636    designates a character in the basic source character set, then the
1637    program is ill-formed.
1638
1639    We assume that wchar_t is Unicode, so we don't need to do any
1640    mapping.  Is this ever wrong?
1641
1642    PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1643    LIMIT is the end of the string or charconst.  PSTR is updated to
1644    point after the UCS on return, and the UCS is written into PC.  */
1645
1646 static int
1647 maybe_read_ucs (pfile, pstr, limit, pc)
1648      cpp_reader *pfile;
1649      const unsigned char **pstr;
1650      const unsigned char *limit;
1651      cppchar_t *pc;
1652 {
1653   const unsigned char *p = *pstr;
1654   unsigned int code = 0;
1655   unsigned int c = *pc, length;
1656
1657   /* Only attempt to interpret a UCS for C++ and C99.  */
1658   if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1659     return 1;
1660
1661   if (CPP_WTRADITIONAL (pfile))
1662     cpp_error (pfile, DL_WARNING,
1663                "the meaning of '\\%c' is different in traditional C", c);
1664
1665   length = (c == 'u' ? 4: 8);
1666
1667   if ((size_t) (limit - p) < length)
1668     {
1669       cpp_error (pfile, DL_ERROR, "incomplete universal-character-name");
1670       /* Skip to the end to avoid more diagnostics.  */
1671       p = limit;
1672     }
1673   else
1674     {
1675       for (; length; length--, p++)
1676         {
1677           c = *p;
1678           if (ISXDIGIT (c))
1679             code = (code << 4) + hex_digit_value (c);
1680           else
1681             {
1682               cpp_error (pfile, DL_ERROR,
1683                          "non-hex digit '%c' in universal-character-name", c);
1684               /* We shouldn't skip in case there are multibyte chars.  */
1685               break;
1686             }
1687         }
1688     }
1689
1690 #ifdef TARGET_EBCDIC
1691   cpp_error (pfile, DL_ERROR, "universal-character-name on EBCDIC target");
1692   code = 0x3f;  /* EBCDIC invalid character */
1693 #else
1694  /* True extended characters are OK.  */
1695   if (code >= 0xa0
1696       && !(code & 0x80000000)
1697       && !(code >= 0xD800 && code <= 0xDFFF))
1698     ;
1699   /* The standard permits $, @ and ` to be specified as UCNs.  We use
1700      hex escapes so that this also works with EBCDIC hosts.  */
1701   else if (code == 0x24 || code == 0x40 || code == 0x60)
1702     ;
1703   /* Don't give another error if one occurred above.  */
1704   else if (length == 0)
1705     cpp_error (pfile, DL_ERROR, "universal-character-name out of range");
1706 #endif
1707
1708   *pstr = p;
1709   *pc = code;
1710   return 0;
1711 }
1712
1713 /* Returns the value of an escape sequence, truncated to the correct
1714    target precision.  PSTR points to the input pointer, which is just
1715    after the backslash.  LIMIT is how much text we have.  WIDE is true
1716    if the escape sequence is part of a wide character constant or
1717    string literal.  Handles all relevant diagnostics.  */
1718 cppchar_t
1719 cpp_parse_escape (pfile, pstr, limit, wide)
1720      cpp_reader *pfile;
1721      const unsigned char **pstr;
1722      const unsigned char *limit;
1723      int wide;
1724 {
1725   int unknown = 0;
1726   const unsigned char *str = *pstr;
1727   cppchar_t c, mask;
1728   unsigned int width;
1729
1730   if (wide)
1731     width = CPP_OPTION (pfile, wchar_precision);
1732   else
1733     width = CPP_OPTION (pfile, char_precision);
1734   if (width < BITS_PER_CPPCHAR_T)
1735     mask = ((cppchar_t) 1 << width) - 1;
1736   else
1737     mask = ~0;
1738
1739   c = *str++;
1740   switch (c)
1741     {
1742     case '\\': case '\'': case '"': case '?': break;
1743     case 'b': c = TARGET_BS;      break;
1744     case 'f': c = TARGET_FF;      break;
1745     case 'n': c = TARGET_NEWLINE; break;
1746     case 'r': c = TARGET_CR;      break;
1747     case 't': c = TARGET_TAB;     break;
1748     case 'v': c = TARGET_VT;      break;
1749
1750     case '(': case '{': case '[': case '%':
1751       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1752          '\%' is used to prevent SCCS from getting confused.  */
1753       unknown = CPP_PEDANTIC (pfile);
1754       break;
1755
1756     case 'a':
1757       if (CPP_WTRADITIONAL (pfile))
1758         cpp_error (pfile, DL_WARNING,
1759                    "the meaning of '\\a' is different in traditional C");
1760       c = TARGET_BELL;
1761       break;
1762
1763     case 'e': case 'E':
1764       if (CPP_PEDANTIC (pfile))
1765         cpp_error (pfile, DL_PEDWARN,
1766                    "non-ISO-standard escape sequence, '\\%c'", (int) c);
1767       c = TARGET_ESC;
1768       break;
1769
1770     case 'u': case 'U':
1771       unknown = maybe_read_ucs (pfile, &str, limit, &c);
1772       break;
1773
1774     case 'x':
1775       if (CPP_WTRADITIONAL (pfile))
1776         cpp_error (pfile, DL_WARNING,
1777                    "the meaning of '\\x' is different in traditional C");
1778
1779         {
1780           cppchar_t i = 0, overflow = 0;
1781           int digits_found = 0;
1782
1783           while (str < limit)
1784             {
1785               c = *str;
1786               if (! ISXDIGIT (c))
1787                 break;
1788               str++;
1789               overflow |= i ^ (i << 4 >> 4);
1790               i = (i << 4) + hex_digit_value (c);
1791               digits_found = 1;
1792             }
1793
1794           if (!digits_found)
1795             cpp_error (pfile, DL_ERROR,
1796                        "\\x used with no following hex digits");
1797
1798           if (overflow | (i != (i & mask)))
1799             {
1800               cpp_error (pfile, DL_PEDWARN,
1801                          "hex escape sequence out of range");
1802               i &= mask;
1803             }
1804           c = i;
1805         }
1806       break;
1807
1808     case '0':  case '1':  case '2':  case '3':
1809     case '4':  case '5':  case '6':  case '7':
1810       {
1811         size_t count = 0;
1812         cppchar_t i = c - '0';
1813
1814         while (str < limit && ++count < 3)
1815           {
1816             c = *str;
1817             if (c < '0' || c > '7')
1818               break;
1819             str++;
1820             i = (i << 3) + c - '0';
1821           }
1822
1823         if (i != (i & mask))
1824           {
1825             cpp_error (pfile, DL_PEDWARN,
1826                        "octal escape sequence out of range");
1827             i &= mask;
1828           }
1829         c = i;
1830       }
1831       break;
1832
1833     default:
1834       unknown = 1;
1835       break;
1836     }
1837
1838   if (unknown)
1839     {
1840       if (ISGRAPH (c))
1841         cpp_error (pfile, DL_PEDWARN,
1842                    "unknown escape sequence '\\%c'", (int) c);
1843       else
1844         cpp_error (pfile, DL_PEDWARN,
1845                    "unknown escape sequence: '\\%03o'", (int) c);
1846     }
1847
1848   if (c > mask)
1849     {
1850       cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for its type");
1851       c &= mask;
1852     }
1853
1854   *pstr = str;
1855   return c;
1856 }
1857
1858 /* Interpret a (possibly wide) character constant in TOKEN.
1859    WARN_MULTI warns about multi-character charconsts.  PCHARS_SEEN
1860    points to a variable that is filled in with the number of
1861    characters seen, and UNSIGNEDP to a variable that indicates whether
1862    the result has signed type.  */
1863 cppchar_t
1864 cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen, unsignedp)
1865      cpp_reader *pfile;
1866      const cpp_token *token;
1867      int warn_multi;
1868      unsigned int *pchars_seen;
1869      int *unsignedp;
1870 {
1871   const unsigned char *str = token->val.str.text;
1872   const unsigned char *limit = str + token->val.str.len;
1873   unsigned int chars_seen = 0;
1874   size_t width, max_chars;
1875   cppchar_t c, mask, result = 0;
1876   bool unsigned_p;
1877
1878 #ifdef MULTIBYTE_CHARS
1879   (void) local_mbtowc (NULL, NULL, 0);
1880 #endif
1881
1882   /* Width in bits.  */
1883   if (token->type == CPP_CHAR)
1884     {
1885       width = CPP_OPTION (pfile, char_precision);
1886       max_chars = CPP_OPTION (pfile, int_precision) / width;
1887       unsigned_p = CPP_OPTION (pfile, signed_char) == 0;
1888     }
1889   else
1890     {
1891       width = CPP_OPTION (pfile, wchar_precision);
1892       max_chars = 1;
1893       unsigned_p = WCHAR_UNSIGNED;
1894     }
1895
1896   if (width < BITS_PER_CPPCHAR_T)
1897     mask = ((cppchar_t) 1 << width) - 1;
1898   else
1899     mask = ~0;
1900
1901   while (str < limit)
1902     {
1903 #ifdef MULTIBYTE_CHARS
1904       wchar_t wc;
1905       int char_len;
1906
1907       char_len = local_mbtowc (&wc, str, limit - str);
1908       if (char_len == -1)
1909         {
1910           cpp_error (pfile, DL_WARNING,
1911                      "ignoring invalid multibyte character");
1912           c = *str++;
1913         }
1914       else
1915         {
1916           str += char_len;
1917           c = wc;
1918         }
1919 #else
1920       c = *str++;
1921 #endif
1922
1923       if (c == '\\')
1924         c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
1925
1926 #ifdef MAP_CHARACTER
1927       if (ISPRINT (c))
1928         c = MAP_CHARACTER (c);
1929 #endif
1930
1931       chars_seen++;
1932
1933       /* Sign-extend the character, scale result, and add the two.  */
1934       if (!unsigned_p && (c & (1 << (width - 1))))
1935         c |= ~mask;
1936       if (width < BITS_PER_CPPCHAR_T)
1937         result = (result << width) + c;
1938       else
1939         result = c;
1940     }
1941
1942   if (chars_seen == 0)
1943     cpp_error (pfile, DL_ERROR, "empty character constant");
1944   else if (chars_seen > 1)
1945     {
1946       /* Multichar charconsts are of type int and therefore signed.  */
1947       unsigned_p = 0;
1948       if (chars_seen > max_chars)
1949         {
1950           chars_seen = max_chars;
1951           cpp_error (pfile, DL_WARNING,
1952                      "character constant too long for its type");
1953         }
1954       else if (warn_multi)
1955         cpp_error (pfile, DL_WARNING, "multi-character character constant");
1956     }
1957
1958   *pchars_seen = chars_seen;
1959   *unsignedp = unsigned_p;
1960   return result;
1961 }
1962
1963 /* Memory buffers.  Changing these three constants can have a dramatic
1964    effect on performance.  The values here are reasonable defaults,
1965    but might be tuned.  If you adjust them, be sure to test across a
1966    range of uses of cpplib, including heavy nested function-like macro
1967    expansion.  Also check the change in peak memory usage (NJAMD is a
1968    good tool for this).  */
1969 #define MIN_BUFF_SIZE 8000
1970 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1971 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1972         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1973
1974 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1975   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1976 #endif
1977
1978 struct dummy
1979 {
1980   char c;
1981   union
1982   {
1983     double d;
1984     int *p;
1985   } u;
1986 };
1987
1988 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
1989 #define CPP_ALIGN(size, align) (((size) + ((align) - 1)) & ~((align) - 1))
1990
1991 /* Create a new allocation buffer.  Place the control block at the end
1992    of the buffer, so that buffer overflows will cause immediate chaos.  */
1993 static _cpp_buff *
1994 new_buff (len)
1995      size_t len;
1996 {
1997   _cpp_buff *result;
1998   unsigned char *base;
1999
2000   if (len < MIN_BUFF_SIZE)
2001     len = MIN_BUFF_SIZE;
2002   len = CPP_ALIGN (len, DEFAULT_ALIGNMENT);
2003
2004   base = xmalloc (len + sizeof (_cpp_buff));
2005   result = (_cpp_buff *) (base + len);
2006   result->base = base;
2007   result->cur = base;
2008   result->limit = base + len;
2009   result->next = NULL;
2010   return result;
2011 }
2012
2013 /* Place a chain of unwanted allocation buffers on the free list.  */
2014 void
2015 _cpp_release_buff (pfile, buff)
2016      cpp_reader *pfile;
2017      _cpp_buff *buff;
2018 {
2019   _cpp_buff *end = buff;
2020
2021   while (end->next)
2022     end = end->next;
2023   end->next = pfile->free_buffs;
2024   pfile->free_buffs = buff;
2025 }
2026
2027 /* Return a free buffer of size at least MIN_SIZE.  */
2028 _cpp_buff *
2029 _cpp_get_buff (pfile, min_size)
2030      cpp_reader *pfile;
2031      size_t min_size;
2032 {
2033   _cpp_buff *result, **p;
2034
2035   for (p = &pfile->free_buffs;; p = &(*p)->next)
2036     {
2037       size_t size;
2038
2039       if (*p == NULL)
2040         return new_buff (min_size);
2041       result = *p;
2042       size = result->limit - result->base;
2043       /* Return a buffer that's big enough, but don't waste one that's
2044          way too big.  */
2045       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2046         break;
2047     }
2048
2049   *p = result->next;
2050   result->next = NULL;
2051   result->cur = result->base;
2052   return result;
2053 }
2054
2055 /* Creates a new buffer with enough space to hold the uncommitted
2056    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
2057    the excess bytes to the new buffer.  Chains the new buffer after
2058    BUFF, and returns the new buffer.  */
2059 _cpp_buff *
2060 _cpp_append_extend_buff (pfile, buff, min_extra)
2061      cpp_reader *pfile;
2062      _cpp_buff *buff;
2063      size_t min_extra;
2064 {
2065   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2066   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2067
2068   buff->next = new_buff;
2069   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2070   return new_buff;
2071 }
2072
2073 /* Creates a new buffer with enough space to hold the uncommitted
2074    remaining bytes of the buffer pointed to by BUFF, and at least
2075    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
2076    Chains the new buffer before the buffer pointed to by BUFF, and
2077    updates the pointer to point to the new buffer.  */
2078 void
2079 _cpp_extend_buff (pfile, pbuff, min_extra)
2080      cpp_reader *pfile;
2081      _cpp_buff **pbuff;
2082      size_t min_extra;
2083 {
2084   _cpp_buff *new_buff, *old_buff = *pbuff;
2085   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2086
2087   new_buff = _cpp_get_buff (pfile, size);
2088   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2089   new_buff->next = old_buff;
2090   *pbuff = new_buff;
2091 }
2092
2093 /* Free a chain of buffers starting at BUFF.  */
2094 void
2095 _cpp_free_buff (buff)
2096      _cpp_buff *buff;
2097 {
2098   _cpp_buff *next;
2099
2100   for (; buff; buff = next)
2101     {
2102       next = buff->next;
2103       free (buff->base);
2104     }
2105 }
2106
2107 /* Allocate permanent, unaligned storage of length LEN.  */
2108 unsigned char *
2109 _cpp_unaligned_alloc (pfile, len)
2110      cpp_reader *pfile;
2111      size_t len;
2112 {
2113   _cpp_buff *buff = pfile->u_buff;
2114   unsigned char *result = buff->cur;
2115
2116   if (len > (size_t) (buff->limit - result))
2117     {
2118       buff = _cpp_get_buff (pfile, len);
2119       buff->next = pfile->u_buff;
2120       pfile->u_buff = buff;
2121       result = buff->cur;
2122     }
2123
2124   buff->cur = result + len;
2125   return result;
2126 }
2127
2128 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2129    That buffer is used for growing allocations when saving macro
2130    replacement lists in a #define, and when parsing an answer to an
2131    assertion in #assert, #unassert or #if (and therefore possibly
2132    whilst expanding macros).  It therefore must not be used by any
2133    code that they might call: specifically the lexer and the guts of
2134    the macro expander.
2135
2136    All existing other uses clearly fit this restriction: storing
2137    registered pragmas during initialization.  */
2138 unsigned char *
2139 _cpp_aligned_alloc (pfile, len)
2140      cpp_reader *pfile;
2141      size_t len;
2142 {
2143   _cpp_buff *buff = pfile->a_buff;
2144   unsigned char *result = buff->cur;
2145
2146   if (len > (size_t) (buff->limit - result))
2147     {
2148       buff = _cpp_get_buff (pfile, len);
2149       buff->next = pfile->a_buff;
2150       pfile->a_buff = buff;
2151       result = buff->cur;
2152     }
2153
2154   buff->cur = result + len;
2155   return result;
2156 }