gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "coretypes.h"
  26 #include "tm.h"
  27 #include "cpplib.h"
  28 #include "cpphash.h"
  29
  30 #ifdef MULTIBYTE_CHARS
  31 #include "mbchar.h"
  32 #include <locale.h>
  33 #endif
  34
  35 /* Tokens with SPELL_STRING store their spelling in the token list,
  36    and it's length in the token->val.name.len.  */
  37 enum spell_type
  38 {
  39   SPELL_OPERATOR = 0,
  40   SPELL_CHAR,
  41   SPELL_IDENT,
  42   SPELL_NUMBER,
  43   SPELL_STRING,
  44   SPELL_NONE
  45 };
  46
  47 struct token_spelling
  48 {
  49   enum spell_type category;
  50   const unsigned char *name;
  51 };
  52
  53 static const unsigned char *const digraph_spellings[] =
  54 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
  55
  56 #define OP(e, s) { SPELL_OPERATOR, U s           },
  57 #define TK(e, s) { s,              U STRINGX (e) },
  58 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  59 #undef OP
  60 #undef TK
  61
  62 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  63 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  64 #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
  65
  66 static void handle_newline PARAMS ((cpp_reader *));
  67 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *));
  68 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
  69
  70 static int skip_block_comment PARAMS ((cpp_reader *));
  71 static int skip_line_comment PARAMS ((cpp_reader *));
  72 static void adjust_column PARAMS ((cpp_reader *));
  73 static int skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  74 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
  75 static uchar *parse_slow PARAMS ((cpp_reader *, const uchar *, int,
  76                                   unsigned int *));
  77 static void parse_number PARAMS ((cpp_reader *, cpp_string *, int));
  78 static int unescaped_terminator_p PARAMS ((cpp_reader *, const uchar *));
  79 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  80 static bool trigraph_p PARAMS ((cpp_reader *));
  81 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
  82                                   cppchar_t));
  83 static bool continue_after_nul PARAMS ((cpp_reader *));
  84 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
  85 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
  86                                    const unsigned char *, cppchar_t *));
  87 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
  88
  89 static unsigned int hex_digit_value PARAMS ((unsigned int));
  90 static _cpp_buff *new_buff PARAMS ((size_t));
  91
  92 /* Change to the native locale for multibyte conversions.  */
  93 void
  94 _cpp_init_mbchar ()
  95 {
  96 #ifdef MULTIBYTE_CHARS
  97   setlocale (LC_CTYPE, "");
  98   GET_ENVIRONMENT (literal_codeset, "LANG");
  99 #endif
 100 }
 101
 102 /* Utility routine:
 103
 104    Compares, the token TOKEN to the NUL-terminated string STRING.
 105    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
 106 int
 107 cpp_ideq (token, string)
 108      const cpp_token *token;
 109      const char *string;
 110 {
 111   if (token->type != CPP_NAME)
 112     return 0;
 113
 114   return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
 115 }
 116
 117 /* Call when meeting a newline, assumed to be in buffer->cur[-1].
 118    Returns with buffer->cur pointing to the character immediately
 119    following the newline (combination).  */
 120 static void
 121 handle_newline (pfile)
 122      cpp_reader *pfile;
 123 {
 124   cpp_buffer *buffer = pfile->buffer;
 125
 126   /* Handle CR-LF and LF-CR.  Most other implementations (e.g. java)
 127      only accept CR-LF; maybe we should fall back to that behavior?  */
 128   if (buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
 129     buffer->cur++;
 130
 131   buffer->line_base = buffer->cur;
 132   buffer->col_adjust = 0;
 133   pfile->line++;
 134 }
 135
 136 /* Subroutine of skip_escaped_newlines; called when a 3-character
 137    sequence beginning with "??" is encountered.  buffer->cur points to
 138    the second '?'.
 139
 140    Warn if necessary, and returns true if the sequence forms a
 141    trigraph and the trigraph should be honored.  */
 142 static bool
 143 trigraph_p (pfile)
 144      cpp_reader *pfile;
 145 {
 146   cpp_buffer *buffer = pfile->buffer;
 147   cppchar_t from_char = buffer->cur[1];
 148   bool accept;
 149
 150   if (!_cpp_trigraph_map[from_char])
 151     return false;
 152
 153   accept = CPP_OPTION (pfile, trigraphs);
 154
 155   /* Don't warn about trigraphs in comments.  */
 156   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 157     {
 158       if (accept)
 159         cpp_error_with_line (pfile, DL_WARNING,
 160                              pfile->line, CPP_BUF_COL (buffer) - 1,
 161                              "trigraph ??%c converted to %c",
 162                              (int) from_char,
 163                              (int) _cpp_trigraph_map[from_char]);
 164       else if (buffer->cur != buffer->last_Wtrigraphs)
 165         {
 166           buffer->last_Wtrigraphs = buffer->cur;
 167           cpp_error_with_line (pfile, DL_WARNING,
 168                                pfile->line, CPP_BUF_COL (buffer) - 1,
 169                                "trigraph ??%c ignored", (int) from_char);
 170         }
 171     }
 172
 173   return accept;
 174 }
 175
 176 /* Skips any escaped newlines introduced by '?' or a '\\', assumed to
 177    lie in buffer->cur[-1].  Returns the next byte, which will be in
 178    buffer->cur[-1].  This routine performs preprocessing stages 1 and
 179    2 of the ISO C standard.  */
 180 static cppchar_t
 181 skip_escaped_newlines (pfile)
 182      cpp_reader *pfile;
 183 {
 184   cpp_buffer *buffer = pfile->buffer;
 185   cppchar_t next = buffer->cur[-1];
 186
 187   /* Only do this if we apply stages 1 and 2.  */
 188   if (!buffer->from_stage3)
 189     {
 190       const unsigned char *saved_cur;
 191       cppchar_t next1;
 192
 193       do
 194         {
 195           if (next == '?')
 196             {
 197               if (buffer->cur[0] != '?' || !trigraph_p (pfile))
 198                 break;
 199
 200               /* Translate the trigraph.  */
 201               next = _cpp_trigraph_map[buffer->cur[1]];
 202               buffer->cur += 2;
 203               if (next != '\\')
 204                 break;
 205             }
 206
 207           if (buffer->cur == buffer->rlimit)
 208             break;
 209
 210           /* We have a backslash, and room for at least one more
 211              character.  Skip horizontal whitespace.  */
 212           saved_cur = buffer->cur;
 213           do
 214             next1 = *buffer->cur++;
 215           while (is_nvspace (next1) && buffer->cur < buffer->rlimit);
 216
 217           if (!is_vspace (next1))
 218             {
 219               buffer->cur = saved_cur;
 220               break;
 221             }
 222
 223           if (saved_cur != buffer->cur - 1
 224               && !pfile->state.lexing_comment)
 225             cpp_error (pfile, DL_WARNING,
 226                        "backslash and newline separated by space");
 227
 228           handle_newline (pfile);
 229           buffer->backup_to = buffer->cur;
 230           if (buffer->cur == buffer->rlimit)
 231             {
 232               cpp_error (pfile, DL_PEDWARN,
 233                          "backslash-newline at end of file");
 234               next = EOF;
 235             }
 236           else
 237             next = *buffer->cur++;
 238         }
 239       while (next == '\\' || next == '?');
 240     }
 241
 242   return next;
 243 }
 244
 245 /* Obtain the next character, after trigraph conversion and skipping
 246    an arbitrarily long string of escaped newlines.  The common case of
 247    no trigraphs or escaped newlines falls through quickly.  On return,
 248    buffer->backup_to points to where to return to if the character is
 249    not to be processed.  */
 250 static cppchar_t
 251 get_effective_char (pfile)
 252      cpp_reader *pfile;
 253 {
 254   cppchar_t next;
 255   cpp_buffer *buffer = pfile->buffer;
 256
 257   buffer->backup_to = buffer->cur;
 258   next = *buffer->cur++;
 259   if (__builtin_expect (next == '?' || next == '\\', 0))
 260     next = skip_escaped_newlines (pfile);
 261
 262   return next;
 263 }
 264
 265 /* Skip a C-style block comment.  We find the end of the comment by
 266    seeing if an asterisk is before every '/' we encounter.  Returns
 267    nonzero if comment terminated by EOF, zero otherwise.  */
 268 static int
 269 skip_block_comment (pfile)
 270      cpp_reader *pfile;
 271 {
 272   cpp_buffer *buffer = pfile->buffer;
 273   cppchar_t c = EOF, prevc = EOF;
 274
 275   pfile->state.lexing_comment = 1;
 276   while (buffer->cur != buffer->rlimit)
 277     {
 278       prevc = c, c = *buffer->cur++;
 279
 280       /* FIXME: For speed, create a new character class of characters
 281          of interest inside block comments.  */
 282       if (c == '?' || c == '\\')
 283         c = skip_escaped_newlines (pfile);
 284
 285       /* People like decorating comments with '*', so check for '/'
 286          instead for efficiency.  */
 287       if (c == '/')
 288         {
 289           if (prevc == '*')
 290             break;
 291
 292           /* Warn about potential nested comments, but not if the '/'
 293              comes immediately before the true comment delimiter.
 294              Don't bother to get it right across escaped newlines.  */
 295           if (CPP_OPTION (pfile, warn_comments)
 296               && buffer->cur[0] == '*' && buffer->cur[1] != '/')
 297             cpp_error_with_line (pfile, DL_WARNING,
 298                                  pfile->line, CPP_BUF_COL (buffer),
 299                                  "\"/*\" within comment");
 300         }
 301       else if (is_vspace (c))
 302         handle_newline (pfile);
 303       else if (c == '\t')
 304         adjust_column (pfile);
 305     }
 306
 307   pfile->state.lexing_comment = 0;
 308   return c != '/' || prevc != '*';
 309 }
 310
 311 /* Skip a C++ line comment, leaving buffer->cur pointing to the
 312    terminating newline.  Handles escaped newlines.  Returns nonzero
 313    if a multiline comment.  */
 314 static int
 315 skip_line_comment (pfile)
 316      cpp_reader *pfile;
 317 {
 318   cpp_buffer *buffer = pfile->buffer;
 319   unsigned int orig_line = pfile->line;
 320   cppchar_t c;
 321 #ifdef MULTIBYTE_CHARS
 322   wchar_t wc;
 323   int char_len;
 324 #endif
 325
 326   pfile->state.lexing_comment = 1;
 327 #ifdef MULTIBYTE_CHARS
 328   /* Reset multibyte conversion state.  */
 329   (void) local_mbtowc (NULL, NULL, 0);
 330 #endif
 331   do
 332     {
 333       if (buffer->cur == buffer->rlimit)
 334         goto at_eof;
 335
 336 #ifdef MULTIBYTE_CHARS
 337       char_len = local_mbtowc (&wc, (const char *) buffer->cur,
 338                                buffer->rlimit - buffer->cur);
 339       if (char_len == -1)
 340         {
 341           cpp_error (pfile, DL_WARNING,
 342                      "ignoring invalid multibyte character");
 343           char_len = 1;
 344           c = *buffer->cur++;
 345         }
 346       else
 347         {
 348           buffer->cur += char_len;
 349           c = wc;
 350         }
 351 #else
 352       c = *buffer->cur++;
 353 #endif
 354       if (c == '?' || c == '\\')
 355         c = skip_escaped_newlines (pfile);
 356     }
 357   while (!is_vspace (c));
 358
 359   /* Step back over the newline, except at EOF.  */
 360   buffer->cur--;
 361  at_eof:
 362
 363   pfile->state.lexing_comment = 0;
 364   return orig_line != pfile->line;
 365 }
 366
 367 /* pfile->buffer->cur is one beyond the \t character.  Update
 368    col_adjust so we track the column correctly.  */
 369 static void
 370 adjust_column (pfile)
 371      cpp_reader *pfile;
 372 {
 373   cpp_buffer *buffer = pfile->buffer;
 374   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 375
 376   /* Round it up to multiple of the tabstop, but subtract 1 since the
 377      tab itself occupies a character position.  */
 378   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 379                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 380 }
 381
 382 /* Skips whitespace, saving the next non-whitespace character.
 383    Adjusts pfile->col_adjust to account for tabs.  Without this,
 384    tokens might be assigned an incorrect column.  */
 385 static int
 386 skip_whitespace (pfile, c)
 387      cpp_reader *pfile;
 388      cppchar_t c;
 389 {
 390   cpp_buffer *buffer = pfile->buffer;
 391   unsigned int warned = 0;
 392
 393   do
 394     {
 395       /* Horizontal space always OK.  */
 396       if (c == ' ')
 397         ;
 398       else if (c == '\t')
 399         adjust_column (pfile);
 400       /* Just \f \v or \0 left.  */
 401       else if (c == '\0')
 402         {
 403           if (buffer->cur - 1 == buffer->rlimit)
 404             return 0;
 405           if (!warned)
 406             {
 407               cpp_error (pfile, DL_WARNING, "null character(s) ignored");
 408               warned = 1;
 409             }
 410         }
 411       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 412         cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
 413                              CPP_BUF_COL (buffer),
 414                              "%s in preprocessing directive",
 415                              c == '\f' ? "form feed" : "vertical tab");
 416
 417       c = *buffer->cur++;
 418     }
 419   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 420   while (is_nvspace (c));
 421
 422   buffer->cur--;
 423   return 1;
 424 }
 425
 426 /* See if the characters of a number token are valid in a name (no
 427    '.', '+' or '-').  */
 428 static int
 429 name_p (pfile, string)
 430      cpp_reader *pfile;
 431      const cpp_string *string;
 432 {
 433   unsigned int i;
 434
 435   for (i = 0; i < string->len; i++)
 436     if (!is_idchar (string->text[i]))
 437       return 0;
 438
 439   return 1;
 440 }
 441
 442 /* Parse an identifier, skipping embedded backslash-newlines.  This is
 443    a critical inner loop.  The common case is an identifier which has
 444    not been split by backslash-newline, does not contain a dollar
 445    sign, and has already been scanned (roughly 10:1 ratio of
 446    seen:unseen identifiers in normal code; the distribution is
 447    Poisson-like).  Second most common case is a new identifier, not
 448    split and no dollar sign.  The other possibilities are rare and
 449    have been relegated to parse_slow.  */
 450 static cpp_hashnode *
 451 parse_identifier (pfile)
 452      cpp_reader *pfile;
 453 {
 454   cpp_hashnode *result;
 455   const uchar *cur, *base;
 456
 457   /* Fast-path loop.  Skim over a normal identifier.
 458      N.B. ISIDNUM does not include $.  */
 459   cur = pfile->buffer->cur;
 460   while (ISIDNUM (*cur))
 461     cur++;
 462
 463   /* Check for slow-path cases.  */
 464   if (*cur == '?' || *cur == '\\' || *cur == '$')
 465     {
 466       unsigned int len;
 467
 468       base = parse_slow (pfile, cur, 0, &len);
 469       result = (cpp_hashnode *)
 470         ht_lookup (pfile->hash_table, base, len, HT_ALLOCED);
 471     }
 472   else
 473     {
 474       base = pfile->buffer->cur - 1;
 475       pfile->buffer->cur = cur;
 476       result = (cpp_hashnode *)
 477         ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
 478     }
 479
 480   /* Rarely, identifiers require diagnostics when lexed.
 481      XXX Has to be forced out of the fast path.  */
 482   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 483                         && !pfile->state.skipping, 0))
 484     {
 485       /* It is allowed to poison the same identifier twice.  */
 486       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 487         cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
 488                    NODE_NAME (result));
 489
 490       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 491          replacement list of a variadic macro.  */
 492       if (result == pfile->spec_nodes.n__VA_ARGS__
 493           && !pfile->state.va_args_ok)
 494         cpp_error (pfile, DL_PEDWARN,
 495         "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 496     }
 497
 498   return result;
 499 }
 500
 501 /* Slow path.  This handles numbers and identifiers which have been
 502    split, or contain dollar signs.  The part of the token from
 503    PFILE->buffer->cur-1 to CUR has already been scanned.  NUMBER_P is
 504    1 if it's a number, and 2 if it has a leading period.  Returns a
 505    pointer to the token's NUL-terminated spelling in permanent
 506    storage, and sets PLEN to its length.  */
 507 static uchar *
 508 parse_slow (pfile, cur, number_p, plen)
 509      cpp_reader *pfile;
 510      const uchar *cur;
 511      int number_p;
 512      unsigned int *plen;
 513 {
 514   cpp_buffer *buffer = pfile->buffer;
 515   const uchar *base = buffer->cur - 1;
 516   struct obstack *stack = &pfile->hash_table->stack;
 517   unsigned int c, prevc, saw_dollar = 0;
 518
 519   /* Place any leading period.  */
 520   if (number_p == 2)
 521     obstack_1grow (stack, '.');
 522
 523   /* Copy the part of the token which is known to be okay.  */
 524   obstack_grow (stack, base, cur - base);
 525
 526   /* Now process the part which isn't.  We are looking at one of
 527      '$', '\\', or '?' on entry to this loop.  */
 528   prevc = cur[-1];
 529   c = *cur++;
 530   buffer->cur = cur;
 531   for (;;)
 532     {
 533       /* Potential escaped newline?  */
 534       buffer->backup_to = buffer->cur - 1;
 535       if (c == '?' || c == '\\')
 536         c = skip_escaped_newlines (pfile);
 537
 538       if (!is_idchar (c))
 539         {
 540           if (!number_p)
 541             break;
 542           if (c != '.' && !VALID_SIGN (c, prevc))
 543             break;
 544         }
 545
 546       /* Handle normal identifier characters in this loop.  */
 547       do
 548         {
 549           prevc = c;
 550           obstack_1grow (stack, c);
 551
 552           if (c == '$')
 553             saw_dollar++;
 554
 555           c = *buffer->cur++;
 556         }
 557       while (is_idchar (c));
 558     }
 559
 560   /* Step back over the unwanted char.  */
 561   BACKUP ();
 562
 563   /* $ is not an identifier character in the standard, but is commonly
 564      accepted as an extension.  Don't warn about it in skipped
 565      conditional blocks.  */
 566   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
 567     cpp_error (pfile, DL_PEDWARN, "'$' character(s) in identifier or number");
 568
 569   /* Identifiers and numbers are null-terminated.  */
 570   *plen = obstack_object_size (stack);
 571   obstack_1grow (stack, '\0');
 572   return obstack_finish (stack);
 573 }
 574
 575 /* Parse a number, beginning with character C, skipping embedded
 576    backslash-newlines.  LEADING_PERIOD is nonzero if there was a "."
 577    before C.  Place the result in NUMBER.  */
 578 static void
 579 parse_number (pfile, number, leading_period)
 580      cpp_reader *pfile;
 581      cpp_string *number;
 582      int leading_period;
 583 {
 584   const uchar *cur;
 585
 586   /* Fast-path loop.  Skim over a normal number.
 587      N.B. ISIDNUM does not include $.  */
 588   cur = pfile->buffer->cur;
 589   while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
 590     cur++;
 591
 592   /* Check for slow-path cases.  */
 593   if (*cur == '?' || *cur == '\\' || *cur == '$')
 594     number->text = parse_slow (pfile, cur, 1 + leading_period, &number->len);
 595   else
 596     {
 597       const uchar *base = pfile->buffer->cur - 1;
 598       uchar *dest;
 599
 600       number->len = cur - base + leading_period;
 601       dest = _cpp_unaligned_alloc (pfile, number->len + 1);
 602       dest[number->len] = '\0';
 603       number->text = dest;
 604
 605       if (leading_period)
 606         *dest++ = '.';
 607       memcpy (dest, base, cur - base);
 608       pfile->buffer->cur = cur;
 609     }
 610 }
 611
 612 /* Subroutine of parse_string.  */
 613 static int
 614 unescaped_terminator_p (pfile, dest)
 615      cpp_reader *pfile;
 616      const unsigned char *dest;
 617 {
 618   const unsigned char *start, *temp;
 619
 620   /* In #include-style directives, terminators are not escapable.  */
 621   if (pfile->state.angled_headers)
 622     return 1;
 623
 624   start = BUFF_FRONT (pfile->u_buff);
 625
 626   /* An odd number of consecutive backslashes represents an escaped
 627      terminator.  */
 628   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 629     ;
 630
 631   return ((dest - temp) & 1) == 0;
 632 }
 633
 634 /* Parses a string, character constant, or angle-bracketed header file
 635    name.  Handles embedded trigraphs and escaped newlines.  The stored
 636    string is guaranteed NUL-terminated, but it is not guaranteed that
 637    this is the first NUL since embedded NULs are preserved.
 638
 639    When this function returns, buffer->cur points to the next
 640    character to be processed.  */
 641 static void
 642 parse_string (pfile, token, terminator)
 643      cpp_reader *pfile;
 644      cpp_token *token;
 645      cppchar_t terminator;
 646 {
 647   cpp_buffer *buffer = pfile->buffer;
 648   unsigned char *dest, *limit;
 649   cppchar_t c;
 650   bool warned_nulls = false;
 651 #ifdef MULTIBYTE_CHARS
 652   wchar_t wc;
 653   int char_len;
 654 #endif
 655
 656   dest = BUFF_FRONT (pfile->u_buff);
 657   limit = BUFF_LIMIT (pfile->u_buff);
 658
 659 #ifdef MULTIBYTE_CHARS
 660   /* Reset multibyte conversion state.  */
 661   (void) local_mbtowc (NULL, NULL, 0);
 662 #endif
 663   for (;;)
 664     {
 665       /* We need room for another char, possibly the terminating NUL.  */
 666       if ((size_t) (limit - dest) < 1)
 667         {
 668           size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
 669           _cpp_extend_buff (pfile, &pfile->u_buff, 2);
 670           dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
 671           limit = BUFF_LIMIT (pfile->u_buff);
 672         }
 673
 674 #ifdef MULTIBYTE_CHARS
 675       char_len = local_mbtowc (&wc, (const char *) buffer->cur,
 676                                buffer->rlimit - buffer->cur);
 677       if (char_len == -1)
 678         {
 679           cpp_error (pfile, DL_WARNING,
 680                      "ignoring invalid multibyte character");
 681           char_len = 1;
 682           c = *buffer->cur++;
 683         }
 684       else
 685         {
 686           buffer->cur += char_len;
 687           c = wc;
 688         }
 689 #else
 690       c = *buffer->cur++;
 691 #endif
 692
 693       /* Handle trigraphs, escaped newlines etc.  */
 694       if (c == '?' || c == '\\')
 695         c = skip_escaped_newlines (pfile);
 696
 697       if (c == terminator)
 698         {
 699           if (unescaped_terminator_p (pfile, dest))
 700             break;
 701         }
 702       else if (is_vspace (c))
 703         {
 704           /* No string literal may extend over multiple lines.  In
 705              assembly language, suppress the error except for <>
 706              includes.  This is a kludge around not knowing where
 707              comments are.  */
 708         unterminated:
 709           if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>')
 710             cpp_error (pfile, DL_ERROR, "missing terminating %c character",
 711                        (int) terminator);
 712           buffer->cur--;
 713           break;
 714         }
 715       else if (c == '\0')
 716         {
 717           if (buffer->cur - 1 == buffer->rlimit)
 718             goto unterminated;
 719           if (!warned_nulls)
 720             {
 721               warned_nulls = true;
 722               cpp_error (pfile, DL_WARNING,
 723                          "null character(s) preserved in literal");
 724             }
 725         }
 726 #ifdef MULTIBYTE_CHARS
 727       if (char_len > 1)
 728         {
 729           for ( ; char_len > 0; --char_len)
 730             *dest++ = (*buffer->cur - char_len);
 731         }
 732       else
 733 #endif
 734         *dest++ = c;
 735     }
 736
 737   *dest = '\0';
 738
 739   token->val.str.text = BUFF_FRONT (pfile->u_buff);
 740   token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
 741   BUFF_FRONT (pfile->u_buff) = dest + 1;
 742 }
 743
 744 /* The stored comment includes the comment start and any terminator.  */
 745 static void
 746 save_comment (pfile, token, from, type)
 747      cpp_reader *pfile;
 748      cpp_token *token;
 749      const unsigned char *from;
 750      cppchar_t type;
 751 {
 752   unsigned char *buffer;
 753   unsigned int len, clen;
 754
 755   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 756
 757   /* C++ comments probably (not definitely) have moved past a new
 758      line, which we don't want to save in the comment.  */
 759   if (is_vspace (pfile->buffer->cur[-1]))
 760     len--;
 761
 762   /* If we are currently in a directive, then we need to store all
 763      C++ comments as C comments internally, and so we need to
 764      allocate a little extra space in that case.
 765
 766      Note that the only time we encounter a directive here is
 767      when we are saving comments in a "#define".  */
 768   clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
 769
 770   buffer = _cpp_unaligned_alloc (pfile, clen);
 771
 772   token->type = CPP_COMMENT;
 773   token->val.str.len = clen;
 774   token->val.str.text = buffer;
 775
 776   buffer[0] = '/';
 777   memcpy (buffer + 1, from, len - 1);
 778
 779   /* Finish conversion to a C comment, if necessary.  */
 780   if (pfile->state.in_directive && type == '/')
 781     {
 782       buffer[1] = '*';
 783       buffer[clen - 2] = '*';
 784       buffer[clen - 1] = '/';
 785     }
 786 }
 787
 788 /* Allocate COUNT tokens for RUN.  */
 789 void
 790 _cpp_init_tokenrun (run, count)
 791      tokenrun *run;
 792      unsigned int count;
 793 {
 794   run->base = xnewvec (cpp_token, count);
 795   run->limit = run->base + count;
 796   run->next = NULL;
 797 }
 798
 799 /* Returns the next tokenrun, or creates one if there is none.  */
 800 static tokenrun *
 801 next_tokenrun (run)
 802      tokenrun *run;
 803 {
 804   if (run->next == NULL)
 805     {
 806       run->next = xnew (tokenrun);
 807       run->next->prev = run;
 808       _cpp_init_tokenrun (run->next, 250);
 809     }
 810
 811   return run->next;
 812 }
 813
 814 /* Allocate a single token that is invalidated at the same time as the
 815    rest of the tokens on the line.  Has its line and col set to the
 816    same as the last lexed token, so that diagnostics appear in the
 817    right place.  */
 818 cpp_token *
 819 _cpp_temp_token (pfile)
 820      cpp_reader *pfile;
 821 {
 822   cpp_token *old, *result;
 823
 824   old = pfile->cur_token - 1;
 825   if (pfile->cur_token == pfile->cur_run->limit)
 826     {
 827       pfile->cur_run = next_tokenrun (pfile->cur_run);
 828       pfile->cur_token = pfile->cur_run->base;
 829     }
 830
 831   result = pfile->cur_token++;
 832   result->line = old->line;
 833   result->col = old->col;
 834   return result;
 835 }
 836
 837 /* Lex a token into RESULT (external interface).  Takes care of issues
 838    like directive handling, token lookahead, multiple include
 839    optimization and skipping.  */
 840 const cpp_token *
 841 _cpp_lex_token (pfile)
 842      cpp_reader *pfile;
 843 {
 844   cpp_token *result;
 845
 846   for (;;)
 847     {
 848       if (pfile->cur_token == pfile->cur_run->limit)
 849         {
 850           pfile->cur_run = next_tokenrun (pfile->cur_run);
 851           pfile->cur_token = pfile->cur_run->base;
 852         }
 853
 854       if (pfile->lookaheads)
 855         {
 856           pfile->lookaheads--;
 857           result = pfile->cur_token++;
 858         }
 859       else
 860         result = _cpp_lex_direct (pfile);
 861
 862       if (result->flags & BOL)
 863         {
 864           /* Is this a directive.  If _cpp_handle_directive returns
 865              false, it is an assembler #.  */
 866           if (result->type == CPP_HASH
 867               /* 6.10.3 p 11: Directives in a list of macro arguments
 868                  gives undefined behavior.  This implementation
 869                  handles the directive as normal.  */
 870               && pfile->state.parsing_args != 1
 871               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
 872             continue;
 873           if (pfile->cb.line_change && !pfile->state.skipping)
 874             (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
 875         }
 876
 877       /* We don't skip tokens in directives.  */
 878       if (pfile->state.in_directive)
 879         break;
 880
 881       /* Outside a directive, invalidate controlling macros.  At file
 882          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
 883          get here and MI optimisation works.  */
 884       pfile->mi_valid = false;
 885
 886       if (!pfile->state.skipping || result->type == CPP_EOF)
 887         break;
 888     }
 889
 890   return result;
 891 }
 892
 893 /* A NUL terminates the current buffer.  For ISO preprocessing this is
 894    EOF, but for traditional preprocessing it indicates we need a line
 895    refill.  Returns TRUE to continue preprocessing a new buffer, FALSE
 896    to return a CPP_EOF to the caller.  */
 897 static bool
 898 continue_after_nul (pfile)
 899      cpp_reader *pfile;
 900 {
 901   cpp_buffer *buffer = pfile->buffer;
 902   bool more = false;
 903
 904   buffer->saved_flags = BOL;
 905   if (CPP_OPTION (pfile, traditional))
 906     {
 907       if (pfile->state.in_directive)
 908         return false;
 909
 910       _cpp_remove_overlay (pfile);
 911       more = _cpp_read_logical_line_trad (pfile);
 912       _cpp_overlay_buffer (pfile, pfile->out.base,
 913                            pfile->out.cur - pfile->out.base);
 914       pfile->line = pfile->out.first_line;
 915     }
 916   else
 917     {
 918       /* Stop parsing arguments with a CPP_EOF.  When we finally come
 919          back here, do the work of popping the buffer.  */
 920       if (!pfile->state.parsing_args)
 921         {
 922           if (buffer->cur != buffer->line_base)
 923             {
 924               /* Non-empty files should end in a newline.  Don't warn
 925                  for command line and _Pragma buffers.  */
 926               if (!buffer->from_stage3)
 927                 cpp_error (pfile, DL_PEDWARN, "no newline at end of file");
 928               handle_newline (pfile);
 929             }
 930
 931           /* Similarly, finish an in-progress directive with CPP_EOF
 932              before popping the buffer.  */
 933           if (!pfile->state.in_directive && buffer->prev)
 934             {
 935               more = !buffer->return_at_eof;
 936               _cpp_pop_buffer (pfile);
 937             }
 938         }
 939     }
 940
 941   return more;
 942 }
 943
 944 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)  \
 945   do {                                          \
 946     if (get_effective_char (pfile) == CHAR)     \
 947       result->type = THEN_TYPE;                 \
 948     else                                        \
 949       {                                         \
 950         BACKUP ();                              \
 951         result->type = ELSE_TYPE;               \
 952       }                                         \
 953   } while (0)
 954
 955 /* Lex a token into pfile->cur_token, which is also incremented, to
 956    get diagnostics pointing to the correct location.
 957
 958    Does not handle issues such as token lookahead, multiple-include
 959    optimisation, directives, skipping etc.  This function is only
 960    suitable for use by _cpp_lex_token, and in special cases like
 961    lex_expansion_token which doesn't care for any of these issues.
 962
 963    When meeting a newline, returns CPP_EOF if parsing a directive,
 964    otherwise returns to the start of the token buffer if permissible.
 965    Returns the location of the lexed token.  */
 966 cpp_token *
 967 _cpp_lex_direct (pfile)
 968      cpp_reader *pfile;
 969 {
 970   cppchar_t c;
 971   cpp_buffer *buffer;
 972   const unsigned char *comment_start;
 973   cpp_token *result = pfile->cur_token++;
 974
 975  fresh_line:
 976   buffer = pfile->buffer;
 977   result->flags = buffer->saved_flags;
 978   buffer->saved_flags = 0;
 979  update_tokens_line:
 980   result->line = pfile->line;
 981
 982  skipped_white:
 983   c = *buffer->cur++;
 984   result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
 985
 986  trigraph:
 987   switch (c)
 988     {
 989     case ' ': case '\t': case '\f': case '\v': case '\0':
 990       result->flags |= PREV_WHITE;
 991       if (skip_whitespace (pfile, c))
 992         goto skipped_white;
 993
 994       /* End of buffer.  */
 995       buffer->cur--;
 996       if (continue_after_nul (pfile))
 997         goto fresh_line;
 998       result->type = CPP_EOF;
 999       break;
1000
1001     case '\n': case '\r':
1002       handle_newline (pfile);
1003       buffer->saved_flags = BOL;
1004       if (! pfile->state.in_directive)
1005         {
1006           if (pfile->state.parsing_args == 2)
1007             buffer->saved_flags |= PREV_WHITE;
1008           if (!pfile->keep_tokens)
1009             {
1010               pfile->cur_run = &pfile->base_run;
1011               result = pfile->base_run.base;
1012               pfile->cur_token = result + 1;
1013             }
1014           goto fresh_line;
1015         }
1016       result->type = CPP_EOF;
1017       break;
1018
1019     case '?':
1020     case '\\':
1021       /* These could start an escaped newline, or '?' a trigraph.  Let
1022          skip_escaped_newlines do all the work.  */
1023       {
1024         unsigned int line = pfile->line;
1025
1026         c = skip_escaped_newlines (pfile);
1027         if (line != pfile->line)
1028           {
1029             buffer->cur--;
1030             /* We had at least one escaped newline of some sort.
1031                Update the token's line and column.  */
1032             goto update_tokens_line;
1033           }
1034       }
1035
1036       /* We are either the original '?' or '\\', or a trigraph.  */
1037       if (c == '?')
1038         result->type = CPP_QUERY;
1039       else if (c == '\\')
1040         goto random_char;
1041       else
1042         goto trigraph;
1043       break;
1044
1045     case '0': case '1': case '2': case '3': case '4':
1046     case '5': case '6': case '7': case '8': case '9':
1047       result->type = CPP_NUMBER;
1048       parse_number (pfile, &result->val.str, 0);
1049       break;
1050
1051     case 'L':
1052       /* 'L' may introduce wide characters or strings.  */
1053       {
1054         const unsigned char *pos = buffer->cur;
1055
1056         c = get_effective_char (pfile);
1057         if (c == '\'' || c == '"')
1058           {
1059             result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1060             parse_string (pfile, result, c);
1061             break;
1062           }
1063         buffer->cur = pos;
1064       }
1065       /* Fall through.  */
1066
1067     start_ident:
1068     case '_':
1069     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1070     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1071     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1072     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1073     case 'y': case 'z':
1074     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1075     case 'G': case 'H': case 'I': case 'J': case 'K':
1076     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1077     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1078     case 'Y': case 'Z':
1079       result->type = CPP_NAME;
1080       result->val.node = parse_identifier (pfile);
1081
1082       /* Convert named operators to their proper types.  */
1083       if (result->val.node->flags & NODE_OPERATOR)
1084         {
1085           result->flags |= NAMED_OP;
1086           result->type = result->val.node->directive_index;
1087         }
1088       break;
1089
1090     case '\'':
1091     case '"':
1092       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1093       parse_string (pfile, result, c);
1094       break;
1095
1096     case '/':
1097       /* A potential block or line comment.  */
1098       comment_start = buffer->cur;
1099       c = get_effective_char (pfile);
1100
1101       if (c == '*')
1102         {
1103           if (skip_block_comment (pfile))
1104             cpp_error (pfile, DL_ERROR, "unterminated comment");
1105         }
1106       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1107                             || CPP_IN_SYSTEM_HEADER (pfile)))
1108         {
1109           /* Warn about comments only if pedantically GNUC89, and not
1110              in system headers.  */
1111           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1112               && ! buffer->warned_cplusplus_comments)
1113             {
1114               cpp_error (pfile, DL_PEDWARN,
1115                          "C++ style comments are not allowed in ISO C90");
1116               cpp_error (pfile, DL_PEDWARN,
1117                          "(this will be reported only once per input file)");
1118               buffer->warned_cplusplus_comments = 1;
1119             }
1120
1121           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1122             cpp_error (pfile, DL_WARNING, "multi-line comment");
1123         }
1124       else if (c == '=')
1125         {
1126           result->type = CPP_DIV_EQ;
1127           break;
1128         }
1129       else
1130         {
1131           BACKUP ();
1132           result->type = CPP_DIV;
1133           break;
1134         }
1135
1136       if (!pfile->state.save_comments)
1137         {
1138           result->flags |= PREV_WHITE;
1139           goto update_tokens_line;
1140         }
1141
1142       /* Save the comment as a token in its own right.  */
1143       save_comment (pfile, result, comment_start, c);
1144       break;
1145
1146     case '<':
1147       if (pfile->state.angled_headers)
1148         {
1149           result->type = CPP_HEADER_NAME;
1150           parse_string (pfile, result, '>');
1151           break;
1152         }
1153
1154       c = get_effective_char (pfile);
1155       if (c == '=')
1156         result->type = CPP_LESS_EQ;
1157       else if (c == '<')
1158         IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1159       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1160         IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1161       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1162         {
1163           result->type = CPP_OPEN_SQUARE;
1164           result->flags |= DIGRAPH;
1165         }
1166       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1167         {
1168           result->type = CPP_OPEN_BRACE;
1169           result->flags |= DIGRAPH;
1170         }
1171       else
1172         {
1173           BACKUP ();
1174           result->type = CPP_LESS;
1175         }
1176       break;
1177
1178     case '>':
1179       c = get_effective_char (pfile);
1180       if (c == '=')
1181         result->type = CPP_GREATER_EQ;
1182       else if (c == '>')
1183         IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1184       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1185         IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1186       else
1187         {
1188           BACKUP ();
1189           result->type = CPP_GREATER;
1190         }
1191       break;
1192
1193     case '%':
1194       c = get_effective_char (pfile);
1195       if (c == '=')
1196         result->type = CPP_MOD_EQ;
1197       else if (CPP_OPTION (pfile, digraphs) && c == ':')
1198         {
1199           result->flags |= DIGRAPH;
1200           result->type = CPP_HASH;
1201           if (get_effective_char (pfile) == '%')
1202             {
1203               const unsigned char *pos = buffer->cur;
1204
1205               if (get_effective_char (pfile) == ':')
1206                 result->type = CPP_PASTE;
1207               else
1208                 buffer->cur = pos - 1;
1209             }
1210           else
1211             BACKUP ();
1212         }
1213       else if (CPP_OPTION (pfile, digraphs) && c == '>')
1214         {
1215           result->flags |= DIGRAPH;
1216           result->type = CPP_CLOSE_BRACE;
1217         }
1218       else
1219         {
1220           BACKUP ();
1221           result->type = CPP_MOD;
1222         }
1223       break;
1224
1225     case '.':
1226       result->type = CPP_DOT;
1227       c = get_effective_char (pfile);
1228       if (c == '.')
1229         {
1230           const unsigned char *pos = buffer->cur;
1231
1232           if (get_effective_char (pfile) == '.')
1233             result->type = CPP_ELLIPSIS;
1234           else
1235             buffer->cur = pos - 1;
1236         }
1237       /* All known character sets have 0...9 contiguous.  */
1238       else if (ISDIGIT (c))
1239         {
1240           result->type = CPP_NUMBER;
1241           parse_number (pfile, &result->val.str, 1);
1242         }
1243       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1244         result->type = CPP_DOT_STAR;
1245       else
1246         BACKUP ();
1247       break;
1248
1249     case '+':
1250       c = get_effective_char (pfile);
1251       if (c == '+')
1252         result->type = CPP_PLUS_PLUS;
1253       else if (c == '=')
1254         result->type = CPP_PLUS_EQ;
1255       else
1256         {
1257           BACKUP ();
1258           result->type = CPP_PLUS;
1259         }
1260       break;
1261
1262     case '-':
1263       c = get_effective_char (pfile);
1264       if (c == '>')
1265         {
1266           result->type = CPP_DEREF;
1267           if (CPP_OPTION (pfile, cplusplus))
1268             {
1269               if (get_effective_char (pfile) == '*')
1270                 result->type = CPP_DEREF_STAR;
1271               else
1272                 BACKUP ();
1273             }
1274         }
1275       else if (c == '-')
1276         result->type = CPP_MINUS_MINUS;
1277       else if (c == '=')
1278         result->type = CPP_MINUS_EQ;
1279       else
1280         {
1281           BACKUP ();
1282           result->type = CPP_MINUS;
1283         }
1284       break;
1285
1286     case '&':
1287       c = get_effective_char (pfile);
1288       if (c == '&')
1289         result->type = CPP_AND_AND;
1290       else if (c == '=')
1291         result->type = CPP_AND_EQ;
1292       else
1293         {
1294           BACKUP ();
1295           result->type = CPP_AND;
1296         }
1297       break;
1298
1299     case '|':
1300       c = get_effective_char (pfile);
1301       if (c == '|')
1302         result->type = CPP_OR_OR;
1303       else if (c == '=')
1304         result->type = CPP_OR_EQ;
1305       else
1306         {
1307           BACKUP ();
1308           result->type = CPP_OR;
1309         }
1310       break;
1311
1312     case ':':
1313       c = get_effective_char (pfile);
1314       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1315         result->type = CPP_SCOPE;
1316       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1317         {
1318           result->flags |= DIGRAPH;
1319           result->type = CPP_CLOSE_SQUARE;
1320         }
1321       else
1322         {
1323           BACKUP ();
1324           result->type = CPP_COLON;
1325         }
1326       break;
1327
1328     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1329     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1330     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1331     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1332     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1333
1334     case '~': result->type = CPP_COMPL; break;
1335     case ',': result->type = CPP_COMMA; break;
1336     case '(': result->type = CPP_OPEN_PAREN; break;
1337     case ')': result->type = CPP_CLOSE_PAREN; break;
1338     case '[': result->type = CPP_OPEN_SQUARE; break;
1339     case ']': result->type = CPP_CLOSE_SQUARE; break;
1340     case '{': result->type = CPP_OPEN_BRACE; break;
1341     case '}': result->type = CPP_CLOSE_BRACE; break;
1342     case ';': result->type = CPP_SEMICOLON; break;
1343
1344       /* @ is a punctuator in Objective-C.  */
1345     case '@': result->type = CPP_ATSIGN; break;
1346
1347     case '$':
1348       if (CPP_OPTION (pfile, dollars_in_ident))
1349         goto start_ident;
1350       /* Fall through...  */
1351
1352     random_char:
1353     default:
1354       result->type = CPP_OTHER;
1355       result->val.c = c;
1356       break;
1357     }
1358
1359   return result;
1360 }
1361
1362 /* An upper bound on the number of bytes needed to spell TOKEN,
1363    including preceding whitespace.  */
1364 unsigned int
1365 cpp_token_len (token)
1366      const cpp_token *token;
1367 {
1368   unsigned int len;
1369
1370   switch (TOKEN_SPELL (token))
1371     {
1372     default:            len = 0;                                break;
1373     case SPELL_NUMBER:
1374     case SPELL_STRING:  len = token->val.str.len;               break;
1375     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1376     }
1377   /* 1 for whitespace, 4 for comment delimiters.  */
1378   return len + 5;
1379 }
1380
1381 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1382    already contain the enough space to hold the token's spelling.
1383    Returns a pointer to the character after the last character
1384    written.  */
1385 unsigned char *
1386 cpp_spell_token (pfile, token, buffer)
1387      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1388      const cpp_token *token;
1389      unsigned char *buffer;
1390 {
1391   switch (TOKEN_SPELL (token))
1392     {
1393     case SPELL_OPERATOR:
1394       {
1395         const unsigned char *spelling;
1396         unsigned char c;
1397
1398         if (token->flags & DIGRAPH)
1399           spelling
1400             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1401         else if (token->flags & NAMED_OP)
1402           goto spell_ident;
1403         else
1404           spelling = TOKEN_NAME (token);
1405
1406         while ((c = *spelling++) != '\0')
1407           *buffer++ = c;
1408       }
1409       break;
1410
1411     case SPELL_CHAR:
1412       *buffer++ = token->val.c;
1413       break;
1414
1415     spell_ident:
1416     case SPELL_IDENT:
1417       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1418       buffer += NODE_LEN (token->val.node);
1419       break;
1420
1421     case SPELL_NUMBER:
1422       memcpy (buffer, token->val.str.text, token->val.str.len);
1423       buffer += token->val.str.len;
1424       break;
1425
1426     case SPELL_STRING:
1427       {
1428         int left, right, tag;
1429         switch (token->type)
1430           {
1431           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1432           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1433           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1434           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1435           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1436           default:
1437             cpp_error (pfile, DL_ICE, "unknown string token %s\n",
1438                        TOKEN_NAME (token));
1439             return buffer;
1440           }
1441         if (tag) *buffer++ = tag;
1442         *buffer++ = left;
1443         memcpy (buffer, token->val.str.text, token->val.str.len);
1444         buffer += token->val.str.len;
1445         *buffer++ = right;
1446       }
1447       break;
1448
1449     case SPELL_NONE:
1450       cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
1451       break;
1452     }
1453
1454   return buffer;
1455 }
1456
1457 /* Returns TOKEN spelt as a null-terminated string.  The string is
1458    freed when the reader is destroyed.  Useful for diagnostics.  */
1459 unsigned char *
1460 cpp_token_as_text (pfile, token)
1461      cpp_reader *pfile;
1462      const cpp_token *token;
1463 {
1464   unsigned int len = cpp_token_len (token);
1465   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1466
1467   end = cpp_spell_token (pfile, token, start);
1468   end[0] = '\0';
1469
1470   return start;
1471 }
1472
1473 /* Used by C front ends, which really should move to using
1474    cpp_token_as_text.  */
1475 const char *
1476 cpp_type2name (type)
1477      enum cpp_ttype type;
1478 {
1479   return (const char *) token_spellings[type].name;
1480 }
1481
1482 /* Writes the spelling of token to FP, without any preceding space.
1483    Separated from cpp_spell_token for efficiency - to avoid stdio
1484    double-buffering.  */
1485 void
1486 cpp_output_token (token, fp)
1487      const cpp_token *token;
1488      FILE *fp;
1489 {
1490   switch (TOKEN_SPELL (token))
1491     {
1492     case SPELL_OPERATOR:
1493       {
1494         const unsigned char *spelling;
1495         int c;
1496
1497         if (token->flags & DIGRAPH)
1498           spelling
1499             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1500         else if (token->flags & NAMED_OP)
1501           goto spell_ident;
1502         else
1503           spelling = TOKEN_NAME (token);
1504
1505         c = *spelling;
1506         do
1507           putc (c, fp);
1508         while ((c = *++spelling) != '\0');
1509       }
1510       break;
1511
1512     case SPELL_CHAR:
1513       putc (token->val.c, fp);
1514       break;
1515
1516     spell_ident:
1517     case SPELL_IDENT:
1518       fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1519     break;
1520
1521     case SPELL_NUMBER:
1522       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1523       break;
1524
1525     case SPELL_STRING:
1526       {
1527         int left, right, tag;
1528         switch (token->type)
1529           {
1530           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1531           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1532           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1533           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1534           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1535           default:
1536             fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1537             return;
1538           }
1539         if (tag) putc (tag, fp);
1540         putc (left, fp);
1541         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1542         putc (right, fp);
1543       }
1544       break;
1545
1546     case SPELL_NONE:
1547       /* An error, most probably.  */
1548       break;
1549     }
1550 }
1551
1552 /* Compare two tokens.  */
1553 int
1554 _cpp_equiv_tokens (a, b)
1555      const cpp_token *a, *b;
1556 {
1557   if (a->type == b->type && a->flags == b->flags)
1558     switch (TOKEN_SPELL (a))
1559       {
1560       default:                  /* Keep compiler happy.  */
1561       case SPELL_OPERATOR:
1562         return 1;
1563       case SPELL_CHAR:
1564         return a->val.c == b->val.c; /* Character.  */
1565       case SPELL_NONE:
1566         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1567       case SPELL_IDENT:
1568         return a->val.node == b->val.node;
1569       case SPELL_NUMBER:
1570       case SPELL_STRING:
1571         return (a->val.str.len == b->val.str.len
1572                 && !memcmp (a->val.str.text, b->val.str.text,
1573                             a->val.str.len));
1574       }
1575
1576   return 0;
1577 }
1578
1579 /* Returns nonzero if a space should be inserted to avoid an
1580    accidental token paste for output.  For simplicity, it is
1581    conservative, and occasionally advises a space where one is not
1582    needed, e.g. "." and ".2".  */
1583 int
1584 cpp_avoid_paste (pfile, token1, token2)
1585      cpp_reader *pfile;
1586      const cpp_token *token1, *token2;
1587 {
1588   enum cpp_ttype a = token1->type, b = token2->type;
1589   cppchar_t c;
1590
1591   if (token1->flags & NAMED_OP)
1592     a = CPP_NAME;
1593   if (token2->flags & NAMED_OP)
1594     b = CPP_NAME;
1595
1596   c = EOF;
1597   if (token2->flags & DIGRAPH)
1598     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1599   else if (token_spellings[b].category == SPELL_OPERATOR)
1600     c = token_spellings[b].name[0];
1601
1602   /* Quickly get everything that can paste with an '='.  */
1603   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1604     return 1;
1605
1606   switch (a)
1607     {
1608     case CPP_GREATER:   return c == '>' || c == '?';
1609     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1610     case CPP_PLUS:      return c == '+';
1611     case CPP_MINUS:     return c == '-' || c == '>';
1612     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1613     case CPP_MOD:       return c == ':' || c == '>';
1614     case CPP_AND:       return c == '&';
1615     case CPP_OR:        return c == '|';
1616     case CPP_COLON:     return c == ':' || c == '>';
1617     case CPP_DEREF:     return c == '*';
1618     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1619     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1620     case CPP_NAME:      return ((b == CPP_NUMBER
1621                                  && name_p (pfile, &token2->val.str))
1622                                 || b == CPP_NAME
1623                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1624     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1625                                 || c == '.' || c == '+' || c == '-');
1626     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1627                                 && token1->val.c == '@'
1628                                 && (b == CPP_NAME || b == CPP_STRING));
1629     default:            break;
1630     }
1631
1632   return 0;
1633 }
1634
1635 /* Output all the remaining tokens on the current line, and a newline
1636    character, to FP.  Leading whitespace is removed.  If there are
1637    macros, special token padding is not performed.  */
1638 void
1639 cpp_output_line (pfile, fp)
1640      cpp_reader *pfile;
1641      FILE *fp;
1642 {
1643   const cpp_token *token;
1644
1645   token = cpp_get_token (pfile);
1646   while (token->type != CPP_EOF)
1647     {
1648       cpp_output_token (token, fp);
1649       token = cpp_get_token (pfile);
1650       if (token->flags & PREV_WHITE)
1651         putc (' ', fp);
1652     }
1653
1654   putc ('\n', fp);
1655 }
1656
1657 /* Returns the value of a hexadecimal digit.  */
1658 static unsigned int
1659 hex_digit_value (c)
1660      unsigned int c;
1661 {
1662   if (hex_p (c))
1663     return hex_value (c);
1664   else
1665     abort ();
1666 }
1667
1668 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.  Returns 1 to indicate
1669    failure if cpplib is not parsing C++ or C99.  Such failure is
1670    silent, and no variables are updated.  Otherwise returns 0, and
1671    warns if -Wtraditional.
1672
1673    [lex.charset]: The character designated by the universal character
1674    name \UNNNNNNNN is that character whose character short name in
1675    ISO/IEC 10646 is NNNNNNNN; the character designated by the
1676    universal character name \uNNNN is that character whose character
1677    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
1678    for a universal character name is less than 0x20 or in the range
1679    0x7F-0x9F (inclusive), or if the universal character name
1680    designates a character in the basic source character set, then the
1681    program is ill-formed.
1682
1683    We assume that wchar_t is Unicode, so we don't need to do any
1684    mapping.  Is this ever wrong?
1685
1686    PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1687    LIMIT is the end of the string or charconst.  PSTR is updated to
1688    point after the UCS on return, and the UCS is written into PC.  */
1689
1690 static int
1691 maybe_read_ucs (pfile, pstr, limit, pc)
1692      cpp_reader *pfile;
1693      const unsigned char **pstr;
1694      const unsigned char *limit;
1695      cppchar_t *pc;
1696 {
1697   const unsigned char *p = *pstr;
1698   unsigned int code = 0;
1699   unsigned int c = *pc, length;
1700
1701   /* Only attempt to interpret a UCS for C++ and C99.  */
1702   if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1703     return 1;
1704
1705   if (CPP_WTRADITIONAL (pfile))
1706     cpp_error (pfile, DL_WARNING,
1707                "the meaning of '\\%c' is different in traditional C", c);
1708
1709   length = (c == 'u' ? 4: 8);
1710
1711   if ((size_t) (limit - p) < length)
1712     {
1713       cpp_error (pfile, DL_ERROR, "incomplete universal-character-name");
1714       /* Skip to the end to avoid more diagnostics.  */
1715       p = limit;
1716     }
1717   else
1718     {
1719       for (; length; length--, p++)
1720         {
1721           c = *p;
1722           if (ISXDIGIT (c))
1723             code = (code << 4) + hex_digit_value (c);
1724           else
1725             {
1726               cpp_error (pfile, DL_ERROR,
1727                          "non-hex digit '%c' in universal-character-name", c);
1728               /* We shouldn't skip in case there are multibyte chars.  */
1729               break;
1730             }
1731         }
1732     }
1733
1734 #ifdef TARGET_EBCDIC
1735   cpp_error (pfile, DL_ERROR, "universal-character-name on EBCDIC target");
1736   code = 0x3f;  /* EBCDIC invalid character */
1737 #else
1738  /* True extended characters are OK.  */
1739   if (code >= 0xa0
1740       && !(code & 0x80000000)
1741       && !(code >= 0xD800 && code <= 0xDFFF))
1742     ;
1743   /* The standard permits $, @ and ` to be specified as UCNs.  We use
1744      hex escapes so that this also works with EBCDIC hosts.  */
1745   else if (code == 0x24 || code == 0x40 || code == 0x60)
1746     ;
1747   /* Don't give another error if one occurred above.  */
1748   else if (length == 0)
1749     cpp_error (pfile, DL_ERROR, "universal-character-name out of range");
1750 #endif
1751
1752   *pstr = p;
1753   *pc = code;
1754   return 0;
1755 }
1756
1757 /* Returns the value of an escape sequence, truncated to the correct
1758    target precision.  PSTR points to the input pointer, which is just
1759    after the backslash.  LIMIT is how much text we have.  WIDE is true
1760    if the escape sequence is part of a wide character constant or
1761    string literal.  Handles all relevant diagnostics.  */
1762 cppchar_t
1763 cpp_parse_escape (pfile, pstr, limit, wide)
1764      cpp_reader *pfile;
1765      const unsigned char **pstr;
1766      const unsigned char *limit;
1767      int wide;
1768 {
1769   int unknown = 0;
1770   const unsigned char *str = *pstr;
1771   cppchar_t c, mask;
1772   unsigned int width;
1773
1774   if (wide)
1775     width = CPP_OPTION (pfile, wchar_precision);
1776   else
1777     width = CPP_OPTION (pfile, char_precision);
1778   if (width < BITS_PER_CPPCHAR_T)
1779     mask = ((cppchar_t) 1 << width) - 1;
1780   else
1781     mask = ~0;
1782
1783   c = *str++;
1784   switch (c)
1785     {
1786     case '\\': case '\'': case '"': case '?': break;
1787     case 'b': c = TARGET_BS;      break;
1788     case 'f': c = TARGET_FF;      break;
1789     case 'n': c = TARGET_NEWLINE; break;
1790     case 'r': c = TARGET_CR;      break;
1791     case 't': c = TARGET_TAB;     break;
1792     case 'v': c = TARGET_VT;      break;
1793
1794     case '(': case '{': case '[': case '%':
1795       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1796          '\%' is used to prevent SCCS from getting confused.  */
1797       unknown = CPP_PEDANTIC (pfile);
1798       break;
1799
1800     case 'a':
1801       if (CPP_WTRADITIONAL (pfile))
1802         cpp_error (pfile, DL_WARNING,
1803                    "the meaning of '\\a' is different in traditional C");
1804       c = TARGET_BELL;
1805       break;
1806
1807     case 'e': case 'E':
1808       if (CPP_PEDANTIC (pfile))
1809         cpp_error (pfile, DL_PEDWARN,
1810                    "non-ISO-standard escape sequence, '\\%c'", (int) c);
1811       c = TARGET_ESC;
1812       break;
1813
1814     case 'u': case 'U':
1815       unknown = maybe_read_ucs (pfile, &str, limit, &c);
1816       break;
1817
1818     case 'x':
1819       if (CPP_WTRADITIONAL (pfile))
1820         cpp_error (pfile, DL_WARNING,
1821                    "the meaning of '\\x' is different in traditional C");
1822
1823       {
1824         cppchar_t i = 0, overflow = 0;
1825         int digits_found = 0;
1826
1827         while (str < limit)
1828           {
1829             c = *str;
1830             if (! ISXDIGIT (c))
1831               break;
1832             str++;
1833             overflow |= i ^ (i << 4 >> 4);
1834             i = (i << 4) + hex_digit_value (c);
1835             digits_found = 1;
1836           }
1837
1838         if (!digits_found)
1839           cpp_error (pfile, DL_ERROR,
1840                        "\\x used with no following hex digits");
1841
1842         if (overflow | (i != (i & mask)))
1843           {
1844             cpp_error (pfile, DL_PEDWARN,
1845                        "hex escape sequence out of range");
1846             i &= mask;
1847           }
1848         c = i;
1849       }
1850       break;
1851
1852     case '0':  case '1':  case '2':  case '3':
1853     case '4':  case '5':  case '6':  case '7':
1854       {
1855         size_t count = 0;
1856         cppchar_t i = c - '0';
1857
1858         while (str < limit && ++count < 3)
1859           {
1860             c = *str;
1861             if (c < '0' || c > '7')
1862               break;
1863             str++;
1864             i = (i << 3) + c - '0';
1865           }
1866
1867         if (i != (i & mask))
1868           {
1869             cpp_error (pfile, DL_PEDWARN,
1870                        "octal escape sequence out of range");
1871             i &= mask;
1872           }
1873         c = i;
1874       }
1875       break;
1876
1877     default:
1878       unknown = 1;
1879       break;
1880     }
1881
1882   if (unknown)
1883     {
1884       if (ISGRAPH (c))
1885         cpp_error (pfile, DL_PEDWARN,
1886                    "unknown escape sequence '\\%c'", (int) c);
1887       else
1888         cpp_error (pfile, DL_PEDWARN,
1889                    "unknown escape sequence: '\\%03o'", (int) c);
1890     }
1891
1892   if (c > mask)
1893     {
1894       cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for its type");
1895       c &= mask;
1896     }
1897
1898   *pstr = str;
1899   return c;
1900 }
1901
1902 /* Interpret a (possibly wide) character constant in TOKEN.
1903    WARN_MULTI warns about multi-character charconsts.  PCHARS_SEEN
1904    points to a variable that is filled in with the number of
1905    characters seen, and UNSIGNEDP to a variable that indicates whether
1906    the result has signed type.  */
1907 cppchar_t
1908 cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp)
1909      cpp_reader *pfile;
1910      const cpp_token *token;
1911      unsigned int *pchars_seen;
1912      int *unsignedp;
1913 {
1914   const unsigned char *str = token->val.str.text;
1915   const unsigned char *limit = str + token->val.str.len;
1916   unsigned int chars_seen = 0;
1917   size_t width, max_chars;
1918   cppchar_t c, mask, result = 0;
1919   bool unsigned_p;
1920
1921 #ifdef MULTIBYTE_CHARS
1922   (void) local_mbtowc (NULL, NULL, 0);
1923 #endif
1924
1925   /* Width in bits.  */
1926   if (token->type == CPP_CHAR)
1927     {
1928       width = CPP_OPTION (pfile, char_precision);
1929       max_chars = CPP_OPTION (pfile, int_precision) / width;
1930       unsigned_p = CPP_OPTION (pfile, unsigned_char);
1931     }
1932   else
1933     {
1934       width = CPP_OPTION (pfile, wchar_precision);
1935       max_chars = 1;
1936       unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
1937     }
1938
1939   if (width < BITS_PER_CPPCHAR_T)
1940     mask = ((cppchar_t) 1 << width) - 1;
1941   else
1942     mask = ~0;
1943
1944   while (str < limit)
1945     {
1946 #ifdef MULTIBYTE_CHARS
1947       wchar_t wc;
1948       int char_len;
1949
1950       char_len = local_mbtowc (&wc, (const char *)str, limit - str);
1951       if (char_len == -1)
1952         {
1953           cpp_error (pfile, DL_WARNING,
1954                      "ignoring invalid multibyte character");
1955           c = *str++;
1956         }
1957       else
1958         {
1959           str += char_len;
1960           c = wc;
1961         }
1962 #else
1963       c = *str++;
1964 #endif
1965
1966       if (c == '\\')
1967         c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
1968
1969 #ifdef MAP_CHARACTER
1970       if (ISPRINT (c))
1971         c = MAP_CHARACTER (c);
1972 #endif
1973
1974       chars_seen++;
1975
1976       /* Truncate the character, scale the result and merge the two.  */
1977       c &= mask;
1978       if (width < BITS_PER_CPPCHAR_T)
1979         result = (result << width) | c;
1980       else
1981         result = c;
1982     }
1983
1984   if (chars_seen == 0)
1985     cpp_error (pfile, DL_ERROR, "empty character constant");
1986   else if (chars_seen > 1)
1987     {
1988       /* Multichar charconsts are of type int and therefore signed.  */
1989       unsigned_p = 0;
1990
1991       if (chars_seen > max_chars)
1992         {
1993           chars_seen = max_chars;
1994           cpp_error (pfile, DL_WARNING,
1995                      "character constant too long for its type");
1996         }
1997       else if (CPP_OPTION (pfile, warn_multichar))
1998         cpp_error (pfile, DL_WARNING, "multi-character character constant");
1999     }
2000
2001   /* Sign-extend or truncate the constant to cppchar_t.  The value is
2002      in WIDTH bits, but for multi-char charconsts it's value is the
2003      full target type's width.  */
2004   if (chars_seen > 1)
2005     width *= max_chars;
2006   if (width < BITS_PER_CPPCHAR_T)
2007     {
2008       mask = ((cppchar_t) 1 << width) - 1;
2009       if (unsigned_p || !(result & (1 << (width - 1))))
2010         result &= mask;
2011       else
2012         result |= ~mask;
2013     }
2014
2015   *pchars_seen = chars_seen;
2016   *unsignedp = unsigned_p;
2017   return result;
2018 }
2019
2020 /* Memory buffers.  Changing these three constants can have a dramatic
2021    effect on performance.  The values here are reasonable defaults,
2022    but might be tuned.  If you adjust them, be sure to test across a
2023    range of uses of cpplib, including heavy nested function-like macro
2024    expansion.  Also check the change in peak memory usage (NJAMD is a
2025    good tool for this).  */
2026 #define MIN_BUFF_SIZE 8000
2027 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
2028 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2029         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2030
2031 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2032   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2033 #endif
2034
2035 /* Create a new allocation buffer.  Place the control block at the end
2036    of the buffer, so that buffer overflows will cause immediate chaos.  */
2037 static _cpp_buff *
2038 new_buff (len)
2039      size_t len;
2040 {
2041   _cpp_buff *result;
2042   unsigned char *base;
2043
2044   if (len < MIN_BUFF_SIZE)
2045     len = MIN_BUFF_SIZE;
2046   len = CPP_ALIGN (len);
2047
2048   base = xmalloc (len + sizeof (_cpp_buff));
2049   result = (_cpp_buff *) (base + len);
2050   result->base = base;
2051   result->cur = base;
2052   result->limit = base + len;
2053   result->next = NULL;
2054   return result;
2055 }
2056
2057 /* Place a chain of unwanted allocation buffers on the free list.  */
2058 void
2059 _cpp_release_buff (pfile, buff)
2060      cpp_reader *pfile;
2061      _cpp_buff *buff;
2062 {
2063   _cpp_buff *end = buff;
2064
2065   while (end->next)
2066     end = end->next;
2067   end->next = pfile->free_buffs;
2068   pfile->free_buffs = buff;
2069 }
2070
2071 /* Return a free buffer of size at least MIN_SIZE.  */
2072 _cpp_buff *
2073 _cpp_get_buff (pfile, min_size)
2074      cpp_reader *pfile;
2075      size_t min_size;
2076 {
2077   _cpp_buff *result, **p;
2078
2079   for (p = &pfile->free_buffs;; p = &(*p)->next)
2080     {
2081       size_t size;
2082
2083       if (*p == NULL)
2084         return new_buff (min_size);
2085       result = *p;
2086       size = result->limit - result->base;
2087       /* Return a buffer that's big enough, but don't waste one that's
2088          way too big.  */
2089       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2090         break;
2091     }
2092
2093   *p = result->next;
2094   result->next = NULL;
2095   result->cur = result->base;
2096   return result;
2097 }
2098
2099 /* Creates a new buffer with enough space to hold the uncommitted
2100    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
2101    the excess bytes to the new buffer.  Chains the new buffer after
2102    BUFF, and returns the new buffer.  */
2103 _cpp_buff *
2104 _cpp_append_extend_buff (pfile, buff, min_extra)
2105      cpp_reader *pfile;
2106      _cpp_buff *buff;
2107      size_t min_extra;
2108 {
2109   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2110   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2111
2112   buff->next = new_buff;
2113   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2114   return new_buff;
2115 }
2116
2117 /* Creates a new buffer with enough space to hold the uncommitted
2118    remaining bytes of the buffer pointed to by BUFF, and at least
2119    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
2120    Chains the new buffer before the buffer pointed to by BUFF, and
2121    updates the pointer to point to the new buffer.  */
2122 void
2123 _cpp_extend_buff (pfile, pbuff, min_extra)
2124      cpp_reader *pfile;
2125      _cpp_buff **pbuff;
2126      size_t min_extra;
2127 {
2128   _cpp_buff *new_buff, *old_buff = *pbuff;
2129   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2130
2131   new_buff = _cpp_get_buff (pfile, size);
2132   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2133   new_buff->next = old_buff;
2134   *pbuff = new_buff;
2135 }
2136
2137 /* Free a chain of buffers starting at BUFF.  */
2138 void
2139 _cpp_free_buff (buff)
2140      _cpp_buff *buff;
2141 {
2142   _cpp_buff *next;
2143
2144   for (; buff; buff = next)
2145     {
2146       next = buff->next;
2147       free (buff->base);
2148     }
2149 }
2150
2151 /* Allocate permanent, unaligned storage of length LEN.  */
2152 unsigned char *
2153 _cpp_unaligned_alloc (pfile, len)
2154      cpp_reader *pfile;
2155      size_t len;
2156 {
2157   _cpp_buff *buff = pfile->u_buff;
2158   unsigned char *result = buff->cur;
2159
2160   if (len > (size_t) (buff->limit - result))
2161     {
2162       buff = _cpp_get_buff (pfile, len);
2163       buff->next = pfile->u_buff;
2164       pfile->u_buff = buff;
2165       result = buff->cur;
2166     }
2167
2168   buff->cur = result + len;
2169   return result;
2170 }
2171
2172 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2173    That buffer is used for growing allocations when saving macro
2174    replacement lists in a #define, and when parsing an answer to an
2175    assertion in #assert, #unassert or #if (and therefore possibly
2176    whilst expanding macros).  It therefore must not be used by any
2177    code that they might call: specifically the lexer and the guts of
2178    the macro expander.
2179
2180    All existing other uses clearly fit this restriction: storing
2181    registered pragmas during initialization.  */
2182 unsigned char *
2183 _cpp_aligned_alloc (pfile, len)
2184      cpp_reader *pfile;
2185      size_t len;
2186 {
2187   _cpp_buff *buff = pfile->a_buff;
2188   unsigned char *result = buff->cur;
2189
2190   if (len > (size_t) (buff->limit - result))
2191     {
2192       buff = _cpp_get_buff (pfile, len);
2193       buff->next = pfile->a_buff;
2194       pfile->a_buff = buff;
2195       result = buff->cur;
2196     }
2197
2198   buff->cur = result + len;
2199   return result;
2200 }