gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "cpplib.h"
  26 #include "cpphash.h"
  27
  28 /* MULTIBYTE_CHARS support only works for native compilers.
  29    ??? Ideally what we want is to model widechar support after
  30    the current floating point support.  */
  31 #ifdef CROSS_COMPILE
  32 #undef MULTIBYTE_CHARS
  33 #endif
  34
  35 #ifdef MULTIBYTE_CHARS
  36 #include "mbchar.h"
  37 #include <locale.h>
  38 #endif
  39
  40 /* Tokens with SPELL_STRING store their spelling in the token list,
  41    and it's length in the token->val.name.len.  */
  42 enum spell_type
  43 {
  44   SPELL_OPERATOR = 0,
  45   SPELL_CHAR,
  46   SPELL_IDENT,
  47   SPELL_NUMBER,
  48   SPELL_STRING,
  49   SPELL_NONE
  50 };
  51
  52 struct token_spelling
  53 {
  54   enum spell_type category;
  55   const unsigned char *name;
  56 };
  57
  58 static const unsigned char *const digraph_spellings[] =
  59 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
  60
  61 #define OP(e, s) { SPELL_OPERATOR, U s           },
  62 #define TK(e, s) { s,              U STRINGX (e) },
  63 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  64 #undef OP
  65 #undef TK
  66
  67 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  68 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  69 #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
  70
  71 static void handle_newline PARAMS ((cpp_reader *));
  72 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *));
  73 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
  74
  75 static int skip_block_comment PARAMS ((cpp_reader *));
  76 static int skip_line_comment PARAMS ((cpp_reader *));
  77 static void adjust_column PARAMS ((cpp_reader *));
  78 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  79 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
  80 static cpp_hashnode *parse_identifier_slow PARAMS ((cpp_reader *,
  81                                                     const U_CHAR *));
  82 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
  83 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
  84 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  85 static void unterminated PARAMS ((cpp_reader *, int));
  86 static bool trigraph_p PARAMS ((cpp_reader *));
  87 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
  88 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
  89 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
  90                                    const unsigned char *, unsigned int *));
  91 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
  92
  93 static unsigned int hex_digit_value PARAMS ((unsigned int));
  94 static _cpp_buff *new_buff PARAMS ((size_t));
  95
  96 /* Utility routine:
  97
  98    Compares, the token TOKEN to the NUL-terminated string STRING.
  99    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
 100
 101 int
 102 cpp_ideq (token, string)
 103      const cpp_token *token;
 104      const char *string;
 105 {
 106   if (token->type != CPP_NAME)
 107     return 0;
 108
 109   return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
 110 }
 111
 112 /* Call when meeting a newline, assumed to be in buffer->cur[-1].
 113    Returns with buffer->cur pointing to the character immediately
 114    following the newline (combination).  */
 115 static void
 116 handle_newline (pfile)
 117      cpp_reader *pfile;
 118 {
 119   cpp_buffer *buffer = pfile->buffer;
 120
 121   /* Handle CR-LF and LF-CR.  Most other implementations (e.g. java)
 122      only accept CR-LF; maybe we should fall back to that behaviour?
 123
 124      NOTE: the EOF case in _cpp_lex_direct currently requires the
 125      buffer->cur != buffer->rlimit test here for 0-length files.  */
 126   if (buffer->cur != buffer->rlimit
 127       && buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
 128     buffer->cur++;
 129
 130   buffer->line_base = buffer->cur;
 131   buffer->col_adjust = 0;
 132   pfile->line++;
 133 }
 134
 135 /* Subroutine of skip_escaped_newlines; called when a 3-character
 136    sequence beginning with "??" is encountered.  buffer->cur points to
 137    the second '?'.
 138
 139    Warn if necessary, and returns true if the sequence forms a
 140    trigraph and the trigraph should be honoured.  */
 141 static bool
 142 trigraph_p (pfile)
 143      cpp_reader *pfile;
 144 {
 145   cpp_buffer *buffer = pfile->buffer;
 146   cppchar_t from_char = buffer->cur[1];
 147   bool accept;
 148
 149   if (!_cpp_trigraph_map[from_char])
 150     return false;
 151
 152   accept = CPP_OPTION (pfile, trigraphs);
 153
 154   /* Don't warn about trigraphs in comments.  */
 155   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 156     {
 157       if (accept)
 158         cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 1,
 159                                "trigraph ??%c converted to %c",
 160                                (int) from_char,
 161                                (int) _cpp_trigraph_map[from_char]);
 162       else if (buffer->cur != buffer->last_Wtrigraphs)
 163         {
 164           buffer->last_Wtrigraphs = buffer->cur;
 165           cpp_warning_with_line (pfile, pfile->line,
 166                                  CPP_BUF_COL (buffer) - 1,
 167                                  "trigraph ??%c ignored", (int) from_char);
 168         }
 169     }
 170
 171   return accept;
 172 }
 173
 174 /* Skips any escaped newlines introduced by '?' or a '\\', assumed to
 175    lie in buffer->cur[-1].  Returns the next byte, which will be in
 176    buffer->cur[-1].  This routine performs preprocessing stages 1 and
 177    2 of the ISO C standard.  */
 178 static cppchar_t
 179 skip_escaped_newlines (pfile)
 180      cpp_reader *pfile;
 181 {
 182   cpp_buffer *buffer = pfile->buffer;
 183   cppchar_t next = buffer->cur[-1];
 184
 185   /* Only do this if we apply stages 1 and 2.  */
 186   if (!buffer->from_stage3)
 187     {
 188       const unsigned char *saved_cur;
 189       cppchar_t next1;
 190
 191       do
 192         {
 193           if (buffer->cur == buffer->rlimit)
 194             break;
 195
 196           if (next == '?')
 197             {
 198               if (buffer->cur[0] != '?' || buffer->cur + 1 == buffer->rlimit)
 199                 break;
 200
 201               if (!trigraph_p (pfile))
 202                 break;
 203
 204               /* Translate the trigraph.  */
 205               next = _cpp_trigraph_map[buffer->cur[1]];
 206               buffer->cur += 2;
 207               if (next != '\\' || buffer->cur == buffer->rlimit)
 208                 break;
 209             }
 210
 211           /* We have a backslash, and room for at least one more
 212              character.  Skip horizontal whitespace.  */
 213           saved_cur = buffer->cur;
 214           do
 215             next1 = *buffer->cur++;
 216           while (is_nvspace (next1) && buffer->cur < buffer->rlimit);
 217
 218           if (!is_vspace (next1))
 219             {
 220               buffer->cur = saved_cur;
 221               break;
 222             }
 223
 224           if (saved_cur != buffer->cur - 1
 225               && !pfile->state.lexing_comment)
 226             cpp_warning (pfile, "backslash and newline separated by space");
 227
 228           handle_newline (pfile);
 229           buffer->backup_to = buffer->cur;
 230           if (buffer->cur == buffer->rlimit)
 231             {
 232               cpp_pedwarn (pfile, "backslash-newline at end of file");
 233               next = EOF;
 234             }
 235           else
 236             next = *buffer->cur++;
 237         }
 238       while (next == '\\' || next == '?');
 239     }
 240
 241   return next;
 242 }
 243
 244 /* Obtain the next character, after trigraph conversion and skipping
 245    an arbitrarily long string of escaped newlines.  The common case of
 246    no trigraphs or escaped newlines falls through quickly.  On return,
 247    buffer->backup_to points to where to return to if the character is
 248    not to be processed.  */
 249 static cppchar_t
 250 get_effective_char (pfile)
 251      cpp_reader *pfile;
 252 {
 253   cppchar_t next = EOF;
 254   cpp_buffer *buffer = pfile->buffer;
 255
 256   buffer->backup_to = buffer->cur;
 257   if (buffer->cur < buffer->rlimit)
 258     {
 259       next = *buffer->cur++;
 260       if (__builtin_expect (next == '?' || next == '\\', 0))
 261         next = skip_escaped_newlines (pfile);
 262     }
 263
 264    return next;
 265 }
 266
 267 /* Skip a C-style block comment.  We find the end of the comment by
 268    seeing if an asterisk is before every '/' we encounter.  Returns
 269    non-zero if comment terminated by EOF, zero otherwise.  */
 270 static int
 271 skip_block_comment (pfile)
 272      cpp_reader *pfile;
 273 {
 274   cpp_buffer *buffer = pfile->buffer;
 275   cppchar_t c = EOF, prevc = EOF;
 276
 277   pfile->state.lexing_comment = 1;
 278   while (buffer->cur != buffer->rlimit)
 279     {
 280       prevc = c, c = *buffer->cur++;
 281
 282       /* FIXME: For speed, create a new character class of characters
 283          of interest inside block comments.  */
 284       if (c == '?' || c == '\\')
 285         c = skip_escaped_newlines (pfile);
 286
 287       /* People like decorating comments with '*', so check for '/'
 288          instead for efficiency.  */
 289       if (c == '/')
 290         {
 291           if (prevc == '*')
 292             break;
 293
 294           /* Warn about potential nested comments, but not if the '/'
 295              comes immediately before the true comment delimeter.
 296              Don't bother to get it right across escaped newlines.  */
 297           if (CPP_OPTION (pfile, warn_comments)
 298               && buffer->cur + 1 < buffer->rlimit
 299               && buffer->cur[0] == '*' && buffer->cur[1] != '/')
 300             cpp_warning_with_line (pfile,
 301                                    pfile->line, CPP_BUF_COL (buffer),
 302                                    "\"/*\" within comment");
 303         }
 304       else if (is_vspace (c))
 305         handle_newline (pfile);
 306       else if (c == '\t')
 307         adjust_column (pfile);
 308     }
 309
 310   pfile->state.lexing_comment = 0;
 311   return c != '/' || prevc != '*';
 312 }
 313
 314 /* Skip a C++ line comment, leaving buffer->cur pointing to the
 315    terminating newline.  Handles escaped newlines.  Returns non-zero
 316    if a multiline comment.  */
 317 static int
 318 skip_line_comment (pfile)
 319      cpp_reader *pfile;
 320 {
 321   cpp_buffer *buffer = pfile->buffer;
 322   unsigned int orig_line = pfile->line;
 323   cppchar_t c;
 324
 325   pfile->state.lexing_comment = 1;
 326   do
 327     {
 328       if (buffer->cur == buffer->rlimit)
 329         goto at_eof;
 330
 331       c = *buffer->cur++;
 332       if (c == '?' || c == '\\')
 333         c = skip_escaped_newlines (pfile);
 334     }
 335   while (!is_vspace (c));
 336
 337   /* Step back over the newline, except at EOF.  */
 338   buffer->cur--;
 339  at_eof:
 340
 341   pfile->state.lexing_comment = 0;
 342   return orig_line != pfile->line;
 343 }
 344
 345 /* pfile->buffer->cur is one beyond the \t character.  Update
 346    col_adjust so we track the column correctly.  */
 347 static void
 348 adjust_column (pfile)
 349      cpp_reader *pfile;
 350 {
 351   cpp_buffer *buffer = pfile->buffer;
 352   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 353
 354   /* Round it up to multiple of the tabstop, but subtract 1 since the
 355      tab itself occupies a character position.  */
 356   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 357                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 358 }
 359
 360 /* Skips whitespace, saving the next non-whitespace character.
 361    Adjusts pfile->col_adjust to account for tabs.  Without this,
 362    tokens might be assigned an incorrect column.  */
 363 static void
 364 skip_whitespace (pfile, c)
 365      cpp_reader *pfile;
 366      cppchar_t c;
 367 {
 368   cpp_buffer *buffer = pfile->buffer;
 369   unsigned int warned = 0;
 370
 371   do
 372     {
 373       /* Horizontal space always OK.  */
 374       if (c == ' ')
 375         ;
 376       else if (c == '\t')
 377         adjust_column (pfile);
 378       /* Just \f \v or \0 left.  */
 379       else if (c == '\0')
 380         {
 381           if (!warned)
 382             {
 383               cpp_warning (pfile, "null character(s) ignored");
 384               warned = 1;
 385             }
 386         }
 387       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 388         cpp_pedwarn_with_line (pfile, pfile->line,
 389                                CPP_BUF_COL (buffer),
 390                                "%s in preprocessing directive",
 391                                c == '\f' ? "form feed" : "vertical tab");
 392
 393       if (buffer->cur == buffer->rlimit)
 394         return;
 395       c = *buffer->cur++;
 396     }
 397   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 398   while (is_nvspace (c));
 399
 400   buffer->cur--;
 401 }
 402
 403 /* See if the characters of a number token are valid in a name (no
 404    '.', '+' or '-').  */
 405 static int
 406 name_p (pfile, string)
 407      cpp_reader *pfile;
 408      const cpp_string *string;
 409 {
 410   unsigned int i;
 411
 412   for (i = 0; i < string->len; i++)
 413     if (!is_idchar (string->text[i]))
 414       return 0;
 415
 416   return 1;
 417 }
 418
 419 /* Parse an identifier, skipping embedded backslash-newlines.  This is
 420    a critical inner loop.  The common case is an identifier which has
 421    not been split by backslash-newline, does not contain a dollar
 422    sign, and has already been scanned (roughly 10:1 ratio of
 423    seen:unseen identifiers in normal code; the distribution is
 424    Poisson-like).  Second most common case is a new identifier, not
 425    split and no dollar sign.  The other possibilities are rare and
 426    have been relegated to parse_identifier_slow.  */
 427
 428 static cpp_hashnode *
 429 parse_identifier (pfile)
 430      cpp_reader *pfile;
 431 {
 432   cpp_hashnode *result;
 433   const U_CHAR *cur, *rlimit;
 434
 435   /* Fast-path loop.  Skim over a normal identifier.
 436      N.B. ISIDNUM does not include $.  */
 437   cur    = pfile->buffer->cur - 1;
 438   rlimit = pfile->buffer->rlimit;
 439   do
 440     cur++;
 441   while (cur < rlimit && ISIDNUM (*cur));
 442
 443   /* Check for slow-path cases.  */
 444   if (cur < rlimit && (*cur == '?' || *cur == '\\' || *cur == '$'))
 445     result = parse_identifier_slow (pfile, cur);
 446   else
 447     {
 448       const U_CHAR *base = pfile->buffer->cur - 1;
 449       result = (cpp_hashnode *)
 450         ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
 451       pfile->buffer->cur = cur;
 452     }
 453
 454   /* Rarely, identifiers require diagnostics when lexed.
 455      XXX Has to be forced out of the fast path.  */
 456   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 457                         && !pfile->state.skipping, 0))
 458     {
 459       /* It is allowed to poison the same identifier twice.  */
 460       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 461         cpp_error (pfile, "attempt to use poisoned \"%s\"",
 462                    NODE_NAME (result));
 463
 464       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 465          replacement list of a variadic macro.  */
 466       if (result == pfile->spec_nodes.n__VA_ARGS__
 467           && !pfile->state.va_args_ok)
 468         cpp_pedwarn (pfile,
 469         "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 470     }
 471
 472   return result;
 473 }
 474
 475 /* Slow path.  This handles identifiers which have been split, and
 476    identifiers which contain dollar signs.  The part of the identifier
 477    from PFILE->buffer->cur-1 to CUR has already been scanned.  */
 478 static cpp_hashnode *
 479 parse_identifier_slow (pfile, cur)
 480      cpp_reader *pfile;
 481      const U_CHAR *cur;
 482 {
 483   cpp_buffer *buffer = pfile->buffer;
 484   const U_CHAR *base = buffer->cur - 1;
 485   struct obstack *stack = &pfile->hash_table->stack;
 486   unsigned int c, saw_dollar = 0, len;
 487
 488   /* Copy the part of the token which is known to be okay.  */
 489   obstack_grow (stack, base, cur - base);
 490
 491   /* Now process the part which isn't.  We are looking at one of
 492      '$', '\\', or '?' on entry to this loop.  */
 493   c = *cur++;
 494   buffer->cur = cur;
 495   do
 496     {
 497       while (is_idchar (c))
 498         {
 499           obstack_1grow (stack, c);
 500
 501           if (c == '$')
 502             saw_dollar++;
 503
 504           if (buffer->cur == buffer->rlimit)
 505             goto at_eof;
 506
 507           c = *buffer->cur++;
 508         }
 509
 510       /* Potential escaped newline?  */
 511       buffer->backup_to = buffer->cur - 1;
 512       if (c != '?' && c != '\\')
 513         break;
 514       c = skip_escaped_newlines (pfile);
 515     }
 516   while (is_idchar (c));
 517
 518   /* Step back over the unwanted char, except at EOF.  */
 519   BACKUP ();
 520  at_eof:
 521
 522   /* $ is not an identifier character in the standard, but is commonly
 523      accepted as an extension.  Don't warn about it in skipped
 524      conditional blocks.  */
 525   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
 526     cpp_pedwarn (pfile, "'$' character(s) in identifier");
 527
 528   /* Identifiers are null-terminated.  */
 529   len = obstack_object_size (stack);
 530   obstack_1grow (stack, '\0');
 531
 532   return (cpp_hashnode *)
 533     ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
 534 }
 535
 536 /* Parse a number, skipping embedded backslash-newlines.  */
 537 static void
 538 parse_number (pfile, number, c, leading_period)
 539      cpp_reader *pfile;
 540      cpp_string *number;
 541      cppchar_t c;
 542      int leading_period;
 543 {
 544   cpp_buffer *buffer = pfile->buffer;
 545   unsigned char *dest, *limit;
 546
 547   dest = BUFF_FRONT (pfile->u_buff);
 548   limit = BUFF_LIMIT (pfile->u_buff);
 549
 550   /* Place a leading period.  */
 551   if (leading_period)
 552     {
 553       if (dest == limit)
 554         {
 555           _cpp_extend_buff (pfile, &pfile->u_buff, 1);
 556           dest = BUFF_FRONT (pfile->u_buff);
 557           limit = BUFF_LIMIT (pfile->u_buff);
 558         }
 559       *dest++ = '.';
 560     }
 561
 562   do
 563     {
 564       do
 565         {
 566           /* Need room for terminating null.  */
 567           if ((size_t) (limit - dest) < 2)
 568             {
 569               size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
 570               _cpp_extend_buff (pfile, &pfile->u_buff, 2);
 571               dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
 572               limit = BUFF_LIMIT (pfile->u_buff);
 573             }
 574           *dest++ = c;
 575
 576           if (buffer->cur == buffer->rlimit)
 577             goto at_eof;
 578
 579           c = *buffer->cur++;
 580         }
 581       while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 582
 583       /* Potential escaped newline?  */
 584       buffer->backup_to = buffer->cur - 1;
 585       if (c != '?' && c != '\\')
 586         break;
 587       c = skip_escaped_newlines (pfile);
 588     }
 589   while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 590
 591   /* Step back over the unwanted char, except at EOF.  */
 592   BACKUP ();
 593  at_eof:
 594
 595   /* Null-terminate the number.  */
 596   *dest = '\0';
 597
 598   number->text = BUFF_FRONT (pfile->u_buff);
 599   number->len = dest - number->text;
 600   BUFF_FRONT (pfile->u_buff) = dest + 1;
 601 }
 602
 603 /* Subroutine of parse_string.  Emits error for unterminated strings.  */
 604 static void
 605 unterminated (pfile, term)
 606      cpp_reader *pfile;
 607      int term;
 608 {
 609   cpp_error (pfile, "missing terminating %c character", term);
 610
 611   if (term == '\"' && pfile->mls_line && pfile->mls_line != pfile->line)
 612     {
 613       cpp_error_with_line (pfile, pfile->mls_line, pfile->mls_col,
 614                            "possible start of unterminated string literal");
 615       pfile->mls_line = 0;
 616     }
 617 }
 618
 619 /* Subroutine of parse_string.  */
 620 static int
 621 unescaped_terminator_p (pfile, dest)
 622      cpp_reader *pfile;
 623      const unsigned char *dest;
 624 {
 625   const unsigned char *start, *temp;
 626
 627   /* In #include-style directives, terminators are not escapeable.  */
 628   if (pfile->state.angled_headers)
 629     return 1;
 630
 631   start = BUFF_FRONT (pfile->u_buff);
 632
 633   /* An odd number of consecutive backslashes represents an escaped
 634      terminator.  */
 635   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 636     ;
 637
 638   return ((dest - temp) & 1) == 0;
 639 }
 640
 641 /* Parses a string, character constant, or angle-bracketed header file
 642    name.  Handles embedded trigraphs and escaped newlines.  The stored
 643    string is guaranteed NUL-terminated, but it is not guaranteed that
 644    this is the first NUL since embedded NULs are preserved.
 645    Multi-line strings are allowed, but they are deprecated.
 646
 647    When this function returns, buffer->cur points to the next
 648    character to be processed.  */
 649 static void
 650 parse_string (pfile, token, terminator)
 651      cpp_reader *pfile;
 652      cpp_token *token;
 653      cppchar_t terminator;
 654 {
 655   cpp_buffer *buffer = pfile->buffer;
 656   unsigned char *dest, *limit;
 657   cppchar_t c;
 658   bool warned_nulls = false, warned_multi = false;
 659
 660   dest = BUFF_FRONT (pfile->u_buff);
 661   limit = BUFF_LIMIT (pfile->u_buff);
 662
 663   for (;;)
 664     {
 665       /* We need room for another char, possibly the terminating NUL.  */
 666       if ((size_t) (limit - dest) < 1)
 667         {
 668           size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
 669           _cpp_extend_buff (pfile, &pfile->u_buff, 2);
 670           dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
 671           limit = BUFF_LIMIT (pfile->u_buff);
 672         }
 673
 674       if (buffer->cur == buffer->rlimit)
 675         {
 676           unterminated (pfile, terminator);
 677           break;
 678         }
 679
 680       /* Handle trigraphs, escaped newlines etc.  */
 681       c = *buffer->cur++;
 682       if (c == '?' || c == '\\')
 683         c = skip_escaped_newlines (pfile);
 684
 685       if (c == terminator)
 686         {
 687           if (unescaped_terminator_p (pfile, dest))
 688             break;
 689         }
 690       else if (is_vspace (c))
 691         {
 692           /* In assembly language, silently terminate string and
 693              character literals at end of line.  This is a kludge
 694              around not knowing where comments are.  */
 695           if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
 696             {
 697               buffer->cur--;
 698               break;
 699             }
 700
 701           /* Character constants and header names may not extend over
 702              multiple lines.  In Standard C, neither may strings.
 703              Unfortunately, we accept multiline strings as an
 704              extension, except in #include family directives.  */
 705           if (terminator != '"' || pfile->state.angled_headers)
 706             {
 707               unterminated (pfile, terminator);
 708               buffer->cur--;
 709               break;
 710             }
 711
 712           if (!warned_multi)
 713             {
 714               warned_multi = true;
 715               cpp_pedwarn (pfile, "multi-line string literals are deprecated");
 716             }
 717
 718           if (pfile->mls_line == 0)
 719             {
 720               pfile->mls_line = token->line;
 721               pfile->mls_col = token->col;
 722             }
 723
 724           handle_newline (pfile);
 725           c = '\n';
 726         }
 727       else if (c == '\0' && !warned_nulls)
 728         {
 729           warned_nulls = true;
 730           cpp_warning (pfile, "null character(s) preserved in literal");
 731         }
 732
 733       *dest++ = c;
 734     }
 735
 736   *dest = '\0';
 737
 738   token->val.str.text = BUFF_FRONT (pfile->u_buff);
 739   token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
 740   BUFF_FRONT (pfile->u_buff) = dest + 1;
 741 }
 742
 743 /* The stored comment includes the comment start and any terminator.  */
 744 static void
 745 save_comment (pfile, token, from)
 746      cpp_reader *pfile;
 747      cpp_token *token;
 748      const unsigned char *from;
 749 {
 750   unsigned char *buffer;
 751   unsigned int len;
 752
 753   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 754
 755   /* C++ comments probably (not definitely) have moved past a new
 756      line, which we don't want to save in the comment.  */
 757   if (is_vspace (pfile->buffer->cur[-1]))
 758     len--;
 759   buffer = _cpp_unaligned_alloc (pfile, len);
 760
 761   token->type = CPP_COMMENT;
 762   token->val.str.len = len;
 763   token->val.str.text = buffer;
 764
 765   buffer[0] = '/';
 766   memcpy (buffer + 1, from, len - 1);
 767 }
 768
 769 /* Allocate COUNT tokens for RUN.  */
 770 void
 771 _cpp_init_tokenrun (run, count)
 772      tokenrun *run;
 773      unsigned int count;
 774 {
 775   run->base = xnewvec (cpp_token, count);
 776   run->limit = run->base + count;
 777   run->next = NULL;
 778 }
 779
 780 /* Returns the next tokenrun, or creates one if there is none.  */
 781 static tokenrun *
 782 next_tokenrun (run)
 783      tokenrun *run;
 784 {
 785   if (run->next == NULL)
 786     {
 787       run->next = xnew (tokenrun);
 788       run->next->prev = run;
 789       _cpp_init_tokenrun (run->next, 250);
 790     }
 791
 792   return run->next;
 793 }
 794
 795 /* Allocate a single token that is invalidated at the same time as the
 796    rest of the tokens on the line.  Has its line and col set to the
 797    same as the last lexed token, so that diagnostics appear in the
 798    right place.  */
 799 cpp_token *
 800 _cpp_temp_token (pfile)
 801      cpp_reader *pfile;
 802 {
 803   cpp_token *old, *result;
 804
 805   old = pfile->cur_token - 1;
 806   if (pfile->cur_token == pfile->cur_run->limit)
 807     {
 808       pfile->cur_run = next_tokenrun (pfile->cur_run);
 809       pfile->cur_token = pfile->cur_run->base;
 810     }
 811
 812   result = pfile->cur_token++;
 813   result->line = old->line;
 814   result->col = old->col;
 815   return result;
 816 }
 817
 818 /* Lex a token into RESULT (external interface).  Takes care of issues
 819    like directive handling, token lookahead, multiple include
 820    opimisation and skipping.  */
 821 const cpp_token *
 822 _cpp_lex_token (pfile)
 823      cpp_reader *pfile;
 824 {
 825   cpp_token *result;
 826
 827   for (;;)
 828     {
 829       if (pfile->cur_token == pfile->cur_run->limit)
 830         {
 831           pfile->cur_run = next_tokenrun (pfile->cur_run);
 832           pfile->cur_token = pfile->cur_run->base;
 833         }
 834
 835       if (pfile->lookaheads)
 836         {
 837           pfile->lookaheads--;
 838           result = pfile->cur_token++;
 839         }
 840       else
 841         result = _cpp_lex_direct (pfile);
 842
 843       if (result->flags & BOL)
 844         {
 845           /* Is this a directive.  If _cpp_handle_directive returns
 846              false, it is an assembler #.  */
 847           if (result->type == CPP_HASH
 848               && !pfile->state.parsing_args
 849               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
 850             continue;
 851           if (pfile->cb.line_change && !pfile->state.skipping)
 852             (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
 853         }
 854
 855       /* We don't skip tokens in directives.  */
 856       if (pfile->state.in_directive)
 857         break;
 858
 859       /* Outside a directive, invalidate controlling macros.  At file
 860          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
 861          get here and MI optimisation works.  */
 862       pfile->mi_valid = false;
 863
 864       if (!pfile->state.skipping || result->type == CPP_EOF)
 865         break;
 866     }
 867
 868   return result;
 869 }
 870
 871 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)  \
 872   do {                                          \
 873     if (get_effective_char (pfile) == CHAR)     \
 874       result->type = THEN_TYPE;                 \
 875     else                                        \
 876       {                                         \
 877         BACKUP ();                              \
 878         result->type = ELSE_TYPE;               \
 879       }                                         \
 880   } while (0)
 881
 882 /* Lex a token into pfile->cur_token, which is also incremented, to
 883    get diagnostics pointing to the correct location.
 884
 885    Does not handle issues such as token lookahead, multiple-include
 886    optimisation, directives, skipping etc.  This function is only
 887    suitable for use by _cpp_lex_token, and in special cases like
 888    lex_expansion_token which doesn't care for any of these issues.
 889
 890    When meeting a newline, returns CPP_EOF if parsing a directive,
 891    otherwise returns to the start of the token buffer if permissible.
 892    Returns the location of the lexed token.  */
 893 cpp_token *
 894 _cpp_lex_direct (pfile)
 895      cpp_reader *pfile;
 896 {
 897   cppchar_t c;
 898   cpp_buffer *buffer;
 899   const unsigned char *comment_start;
 900   cpp_token *result = pfile->cur_token++;
 901
 902  fresh_line:
 903   buffer = pfile->buffer;
 904   result->flags = buffer->saved_flags;
 905   buffer->saved_flags = 0;
 906  update_tokens_line:
 907   result->line = pfile->line;
 908
 909  skipped_white:
 910   if (buffer->cur == buffer->rlimit)
 911     goto at_eof;
 912   c = *buffer->cur++;
 913   result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
 914
 915  trigraph:
 916   switch (c)
 917     {
 918     at_eof:
 919       buffer->saved_flags = BOL;
 920       if (!pfile->state.parsing_args && !pfile->state.in_directive)
 921         {
 922           if (buffer->cur != buffer->line_base)
 923             {
 924               /* Non-empty files should end in a newline.  Don't warn
 925                  for command line and _Pragma buffers.  */
 926               if (!buffer->from_stage3)
 927                 cpp_pedwarn (pfile, "no newline at end of file");
 928               handle_newline (pfile);
 929             }
 930
 931           /* Don't pop the last buffer.  */
 932           if (buffer->prev)
 933             {
 934               unsigned char stop = buffer->return_at_eof;
 935
 936               _cpp_pop_buffer (pfile);
 937               if (!stop)
 938                 goto fresh_line;
 939             }
 940         }
 941       result->type = CPP_EOF;
 942       break;
 943
 944     case ' ': case '\t': case '\f': case '\v': case '\0':
 945       skip_whitespace (pfile, c);
 946       result->flags |= PREV_WHITE;
 947       goto skipped_white;
 948
 949     case '\n': case '\r':
 950       handle_newline (pfile);
 951       buffer->saved_flags = BOL;
 952       if (! pfile->state.in_directive)
 953         {
 954           if (pfile->state.parsing_args == 2)
 955             buffer->saved_flags |= PREV_WHITE;
 956           if (!pfile->keep_tokens)
 957             {
 958               pfile->cur_run = &pfile->base_run;
 959               result = pfile->base_run.base;
 960               pfile->cur_token = result + 1;
 961             }
 962           goto fresh_line;
 963         }
 964       result->type = CPP_EOF;
 965       break;
 966
 967     case '?':
 968     case '\\':
 969       /* These could start an escaped newline, or '?' a trigraph.  Let
 970          skip_escaped_newlines do all the work.  */
 971       {
 972         unsigned int line = pfile->line;
 973
 974         c = skip_escaped_newlines (pfile);
 975         if (line != pfile->line)
 976           {
 977             buffer->cur--;
 978             /* We had at least one escaped newline of some sort.
 979                Update the token's line and column.  */
 980             goto update_tokens_line;
 981           }
 982       }
 983
 984       /* We are either the original '?' or '\\', or a trigraph.  */
 985       if (c == '?')
 986         result->type = CPP_QUERY;
 987       else if (c == '\\')
 988         goto random_char;
 989       else
 990         goto trigraph;
 991       break;
 992
 993     case '0': case '1': case '2': case '3': case '4':
 994     case '5': case '6': case '7': case '8': case '9':
 995       result->type = CPP_NUMBER;
 996       parse_number (pfile, &result->val.str, c, 0);
 997       break;
 998
 999     case '$':
1000       if (!CPP_OPTION (pfile, dollars_in_ident))
1001         goto random_char;
1002       /* Fall through...  */
1003
1004     case '_':
1005     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1006     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1007     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1008     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1009     case 'y': case 'z':
1010     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1011     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1012     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1013     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1014     case 'Y': case 'Z':
1015       result->type = CPP_NAME;
1016       result->val.node = parse_identifier (pfile);
1017
1018       /* 'L' may introduce wide characters or strings.  */
1019       if (result->val.node == pfile->spec_nodes.n_L
1020           && buffer->cur < buffer->rlimit)
1021         {
1022           c = *buffer->cur;
1023           if (c == '\'' || c == '"')
1024             {
1025               buffer->cur++;
1026               result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1027               parse_string (pfile, result, c);
1028             }
1029         }
1030       /* Convert named operators to their proper types.  */
1031       else if (result->val.node->flags & NODE_OPERATOR)
1032         {
1033           result->flags |= NAMED_OP;
1034           result->type = result->val.node->value.operator;
1035         }
1036       break;
1037
1038     case '\'':
1039     case '"':
1040       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1041       parse_string (pfile, result, c);
1042       break;
1043
1044     case '/':
1045       /* A potential block or line comment.  */
1046       comment_start = buffer->cur;
1047       c = get_effective_char (pfile);
1048
1049       if (c == '*')
1050         {
1051           if (skip_block_comment (pfile))
1052             cpp_error (pfile, "unterminated comment");
1053         }
1054       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1055                             || CPP_IN_SYSTEM_HEADER (pfile)))
1056         {
1057           /* Warn about comments only if pedantically GNUC89, and not
1058              in system headers.  */
1059           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1060               && ! buffer->warned_cplusplus_comments)
1061             {
1062               cpp_pedwarn (pfile,
1063                            "C++ style comments are not allowed in ISO C89");
1064               cpp_pedwarn (pfile,
1065                            "(this will be reported only once per input file)");
1066               buffer->warned_cplusplus_comments = 1;
1067             }
1068
1069           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1070             cpp_warning (pfile, "multi-line comment");
1071         }
1072       else if (c == '=')
1073         {
1074           result->type = CPP_DIV_EQ;
1075           break;
1076         }
1077       else
1078         {
1079           BACKUP ();
1080           result->type = CPP_DIV;
1081           break;
1082         }
1083
1084       if (!pfile->state.save_comments)
1085         {
1086           result->flags |= PREV_WHITE;
1087           goto update_tokens_line;
1088         }
1089
1090       /* Save the comment as a token in its own right.  */
1091       save_comment (pfile, result, comment_start);
1092       break;
1093
1094     case '<':
1095       if (pfile->state.angled_headers)
1096         {
1097           result->type = CPP_HEADER_NAME;
1098           parse_string (pfile, result, '>');
1099           break;
1100         }
1101
1102       c = get_effective_char (pfile);
1103       if (c == '=')
1104         result->type = CPP_LESS_EQ;
1105       else if (c == '<')
1106         IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1107       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1108         IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1109       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1110         {
1111           result->type = CPP_OPEN_SQUARE;
1112           result->flags |= DIGRAPH;
1113         }
1114       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1115         {
1116           result->type = CPP_OPEN_BRACE;
1117           result->flags |= DIGRAPH;
1118         }
1119       else
1120         {
1121           BACKUP ();
1122           result->type = CPP_LESS;
1123         }
1124       break;
1125
1126     case '>':
1127       c = get_effective_char (pfile);
1128       if (c == '=')
1129         result->type = CPP_GREATER_EQ;
1130       else if (c == '>')
1131         IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1132       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1133         IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1134       else
1135         {
1136           BACKUP ();
1137           result->type = CPP_GREATER;
1138         }
1139       break;
1140
1141     case '%':
1142       c = get_effective_char (pfile);
1143       if (c == '=')
1144         result->type = CPP_MOD_EQ;
1145       else if (CPP_OPTION (pfile, digraphs) && c == ':')
1146         {
1147           result->flags |= DIGRAPH;
1148           result->type = CPP_HASH;
1149           if (get_effective_char (pfile) == '%')
1150             {
1151               const unsigned char *pos = buffer->cur;
1152
1153               if (get_effective_char (pfile) == ':')
1154                 result->type = CPP_PASTE;
1155               else
1156                 buffer->cur = pos - 1;
1157             }
1158           else
1159             BACKUP ();
1160         }
1161       else if (CPP_OPTION (pfile, digraphs) && c == '>')
1162         {
1163           result->flags |= DIGRAPH;
1164           result->type = CPP_CLOSE_BRACE;
1165         }
1166       else
1167         {
1168           BACKUP ();
1169           result->type = CPP_MOD;
1170         }
1171       break;
1172
1173     case '.':
1174       result->type = CPP_DOT;
1175       c = get_effective_char (pfile);
1176       if (c == '.')
1177         {
1178           const unsigned char *pos = buffer->cur;
1179
1180           if (get_effective_char (pfile) == '.')
1181             result->type = CPP_ELLIPSIS;
1182           else
1183             buffer->cur = pos - 1;
1184         }
1185       /* All known character sets have 0...9 contiguous.  */
1186       else if (ISDIGIT (c))
1187         {
1188           result->type = CPP_NUMBER;
1189           parse_number (pfile, &result->val.str, c, 1);
1190         }
1191       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1192         result->type = CPP_DOT_STAR;
1193       else
1194         BACKUP ();
1195       break;
1196
1197     case '+':
1198       c = get_effective_char (pfile);
1199       if (c == '+')
1200         result->type = CPP_PLUS_PLUS;
1201       else if (c == '=')
1202         result->type = CPP_PLUS_EQ;
1203       else
1204         {
1205           BACKUP ();
1206           result->type = CPP_PLUS;
1207         }
1208       break;
1209
1210     case '-':
1211       c = get_effective_char (pfile);
1212       if (c == '>')
1213         {
1214           result->type = CPP_DEREF;
1215           if (CPP_OPTION (pfile, cplusplus))
1216             {
1217               if (get_effective_char (pfile) == '*')
1218                 result->type = CPP_DEREF_STAR;
1219               else
1220                 BACKUP ();
1221             }
1222         }
1223       else if (c == '-')
1224         result->type = CPP_MINUS_MINUS;
1225       else if (c == '=')
1226         result->type = CPP_MINUS_EQ;
1227       else
1228         {
1229           BACKUP ();
1230           result->type = CPP_MINUS;
1231         }
1232       break;
1233
1234     case '&':
1235       c = get_effective_char (pfile);
1236       if (c == '&')
1237         result->type = CPP_AND_AND;
1238       else if (c == '=')
1239         result->type = CPP_AND_EQ;
1240       else
1241         {
1242           BACKUP ();
1243           result->type = CPP_AND;
1244         }
1245       break;
1246
1247     case '|':
1248       c = get_effective_char (pfile);
1249       if (c == '|')
1250         result->type = CPP_OR_OR;
1251       else if (c == '=')
1252         result->type = CPP_OR_EQ;
1253       else
1254         {
1255           BACKUP ();
1256           result->type = CPP_OR;
1257         }
1258       break;
1259
1260     case ':':
1261       c = get_effective_char (pfile);
1262       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1263         result->type = CPP_SCOPE;
1264       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1265         {
1266           result->flags |= DIGRAPH;
1267           result->type = CPP_CLOSE_SQUARE;
1268         }
1269       else
1270         {
1271           BACKUP ();
1272           result->type = CPP_COLON;
1273         }
1274       break;
1275
1276     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1277     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1278     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1279     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1280     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1281
1282     case '~': result->type = CPP_COMPL; break;
1283     case ',': result->type = CPP_COMMA; break;
1284     case '(': result->type = CPP_OPEN_PAREN; break;
1285     case ')': result->type = CPP_CLOSE_PAREN; break;
1286     case '[': result->type = CPP_OPEN_SQUARE; break;
1287     case ']': result->type = CPP_CLOSE_SQUARE; break;
1288     case '{': result->type = CPP_OPEN_BRACE; break;
1289     case '}': result->type = CPP_CLOSE_BRACE; break;
1290     case ';': result->type = CPP_SEMICOLON; break;
1291
1292       /* @ is a punctuator in Objective C.  */
1293     case '@': result->type = CPP_ATSIGN; break;
1294
1295     random_char:
1296     default:
1297       result->type = CPP_OTHER;
1298       result->val.c = c;
1299       break;
1300     }
1301
1302   return result;
1303 }
1304
1305 /* An upper bound on the number of bytes needed to spell a token,
1306    including preceding whitespace.  */
1307 unsigned int
1308 cpp_token_len (token)
1309      const cpp_token *token;
1310 {
1311   unsigned int len;
1312
1313   switch (TOKEN_SPELL (token))
1314     {
1315     default:            len = 0;                                break;
1316     case SPELL_NUMBER:
1317     case SPELL_STRING:  len = token->val.str.len;               break;
1318     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1319     }
1320   /* 1 for whitespace, 4 for comment delimiters.  */
1321   return len + 5;
1322 }
1323
1324 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1325    already contain the enough space to hold the token's spelling.
1326    Returns a pointer to the character after the last character
1327    written.  */
1328 unsigned char *
1329 cpp_spell_token (pfile, token, buffer)
1330      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1331      const cpp_token *token;
1332      unsigned char *buffer;
1333 {
1334   switch (TOKEN_SPELL (token))
1335     {
1336     case SPELL_OPERATOR:
1337       {
1338         const unsigned char *spelling;
1339         unsigned char c;
1340
1341         if (token->flags & DIGRAPH)
1342           spelling
1343             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1344         else if (token->flags & NAMED_OP)
1345           goto spell_ident;
1346         else
1347           spelling = TOKEN_NAME (token);
1348
1349         while ((c = *spelling++) != '\0')
1350           *buffer++ = c;
1351       }
1352       break;
1353
1354     case SPELL_CHAR:
1355       *buffer++ = token->val.c;
1356       break;
1357
1358     spell_ident:
1359     case SPELL_IDENT:
1360       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1361       buffer += NODE_LEN (token->val.node);
1362       break;
1363
1364     case SPELL_NUMBER:
1365       memcpy (buffer, token->val.str.text, token->val.str.len);
1366       buffer += token->val.str.len;
1367       break;
1368
1369     case SPELL_STRING:
1370       {
1371         int left, right, tag;
1372         switch (token->type)
1373           {
1374           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1375           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1376           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1377           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1378           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1379           default:
1380             cpp_ice (pfile, "unknown string token %s\n", TOKEN_NAME (token));
1381             return buffer;
1382           }
1383         if (tag) *buffer++ = tag;
1384         *buffer++ = left;
1385         memcpy (buffer, token->val.str.text, token->val.str.len);
1386         buffer += token->val.str.len;
1387         *buffer++ = right;
1388       }
1389       break;
1390
1391     case SPELL_NONE:
1392       cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1393       break;
1394     }
1395
1396   return buffer;
1397 }
1398
1399 /* Returns a token as a null-terminated string.  The string is
1400    temporary, and automatically freed later.  Useful for diagnostics.  */
1401 unsigned char *
1402 cpp_token_as_text (pfile, token)
1403      cpp_reader *pfile;
1404      const cpp_token *token;
1405 {
1406   unsigned int len = cpp_token_len (token);
1407   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1408
1409   end = cpp_spell_token (pfile, token, start);
1410   end[0] = '\0';
1411
1412   return start;
1413 }
1414
1415 /* Used by C front ends.  Should really move to using cpp_token_as_text.  */
1416 const char *
1417 cpp_type2name (type)
1418      enum cpp_ttype type;
1419 {
1420   return (const char *) token_spellings[type].name;
1421 }
1422
1423 /* Writes the spelling of token to FP, without any preceding space.
1424    Separated from cpp_spell_token for efficiency - to avoid stdio
1425    double-buffering.  */
1426 void
1427 cpp_output_token (token, fp)
1428      const cpp_token *token;
1429      FILE *fp;
1430 {
1431   switch (TOKEN_SPELL (token))
1432     {
1433     case SPELL_OPERATOR:
1434       {
1435         const unsigned char *spelling;
1436         int c;
1437
1438         if (token->flags & DIGRAPH)
1439           spelling
1440             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1441         else if (token->flags & NAMED_OP)
1442           goto spell_ident;
1443         else
1444           spelling = TOKEN_NAME (token);
1445
1446         c = *spelling;
1447         do
1448           putc (c, fp);
1449         while ((c = *++spelling) != '\0');
1450       }
1451       break;
1452
1453     case SPELL_CHAR:
1454       putc (token->val.c, fp);
1455       break;
1456
1457     spell_ident:
1458     case SPELL_IDENT:
1459       fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1460     break;
1461
1462     case SPELL_NUMBER:
1463       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1464       break;
1465
1466     case SPELL_STRING:
1467       {
1468         int left, right, tag;
1469         switch (token->type)
1470           {
1471           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1472           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1473           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1474           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1475           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1476           default:
1477             fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1478             return;
1479           }
1480         if (tag) putc (tag, fp);
1481         putc (left, fp);
1482         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1483         putc (right, fp);
1484       }
1485       break;
1486
1487     case SPELL_NONE:
1488       /* An error, most probably.  */
1489       break;
1490     }
1491 }
1492
1493 /* Compare two tokens.  */
1494 int
1495 _cpp_equiv_tokens (a, b)
1496      const cpp_token *a, *b;
1497 {
1498   if (a->type == b->type && a->flags == b->flags)
1499     switch (TOKEN_SPELL (a))
1500       {
1501       default:                  /* Keep compiler happy.  */
1502       case SPELL_OPERATOR:
1503         return 1;
1504       case SPELL_CHAR:
1505         return a->val.c == b->val.c; /* Character.  */
1506       case SPELL_NONE:
1507         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1508       case SPELL_IDENT:
1509         return a->val.node == b->val.node;
1510       case SPELL_NUMBER:
1511       case SPELL_STRING:
1512         return (a->val.str.len == b->val.str.len
1513                 && !memcmp (a->val.str.text, b->val.str.text,
1514                             a->val.str.len));
1515       }
1516
1517   return 0;
1518 }
1519
1520 /* Returns nonzero if a space should be inserted to avoid an
1521    accidental token paste for output.  For simplicity, it is
1522    conservative, and occasionally advises a space where one is not
1523    needed, e.g. "." and ".2".  */
1524
1525 int
1526 cpp_avoid_paste (pfile, token1, token2)
1527      cpp_reader *pfile;
1528      const cpp_token *token1, *token2;
1529 {
1530   enum cpp_ttype a = token1->type, b = token2->type;
1531   cppchar_t c;
1532
1533   if (token1->flags & NAMED_OP)
1534     a = CPP_NAME;
1535   if (token2->flags & NAMED_OP)
1536     b = CPP_NAME;
1537
1538   c = EOF;
1539   if (token2->flags & DIGRAPH)
1540     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1541   else if (token_spellings[b].category == SPELL_OPERATOR)
1542     c = token_spellings[b].name[0];
1543
1544   /* Quickly get everything that can paste with an '='.  */
1545   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1546     return 1;
1547
1548   switch (a)
1549     {
1550     case CPP_GREATER:   return c == '>' || c == '?';
1551     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1552     case CPP_PLUS:      return c == '+';
1553     case CPP_MINUS:     return c == '-' || c == '>';
1554     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1555     case CPP_MOD:       return c == ':' || c == '>';
1556     case CPP_AND:       return c == '&';
1557     case CPP_OR:        return c == '|';
1558     case CPP_COLON:     return c == ':' || c == '>';
1559     case CPP_DEREF:     return c == '*';
1560     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1561     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1562     case CPP_NAME:      return ((b == CPP_NUMBER
1563                                  && name_p (pfile, &token2->val.str))
1564                                 || b == CPP_NAME
1565                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1566     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1567                                 || c == '.' || c == '+' || c == '-');
1568     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1569                                 && token1->val.c == '@'
1570                                 && (b == CPP_NAME || b == CPP_STRING));
1571     default:            break;
1572     }
1573
1574   return 0;
1575 }
1576
1577 /* Output all the remaining tokens on the current line, and a newline
1578    character, to FP.  Leading whitespace is removed.  If there are
1579    macros, special token padding is not performed.  */
1580 void
1581 cpp_output_line (pfile, fp)
1582      cpp_reader *pfile;
1583      FILE *fp;
1584 {
1585   const cpp_token *token;
1586
1587   token = cpp_get_token (pfile);
1588   while (token->type != CPP_EOF)
1589     {
1590       cpp_output_token (token, fp);
1591       token = cpp_get_token (pfile);
1592       if (token->flags & PREV_WHITE)
1593         putc (' ', fp);
1594     }
1595
1596   putc ('\n', fp);
1597 }
1598
1599 /* Returns the value of a hexadecimal digit.  */
1600 static unsigned int
1601 hex_digit_value (c)
1602      unsigned int c;
1603 {
1604   if (c >= 'a' && c <= 'f')
1605     return c - 'a' + 10;
1606   if (c >= 'A' && c <= 'F')
1607     return c - 'A' + 10;
1608   if (c >= '0' && c <= '9')
1609     return c - '0';
1610   abort ();
1611 }
1612
1613 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.  Returns 1 to indicate
1614    failure if cpplib is not parsing C++ or C99.  Such failure is
1615    silent, and no variables are updated.  Otherwise returns 0, and
1616    warns if -Wtraditional.
1617
1618    [lex.charset]: The character designated by the universal character
1619    name \UNNNNNNNN is that character whose character short name in
1620    ISO/IEC 10646 is NNNNNNNN; the character designated by the
1621    universal character name \uNNNN is that character whose character
1622    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
1623    for a universal character name is less than 0x20 or in the range
1624    0x7F-0x9F (inclusive), or if the universal character name
1625    designates a character in the basic source character set, then the
1626    program is ill-formed.
1627
1628    We assume that wchar_t is Unicode, so we don't need to do any
1629    mapping.  Is this ever wrong?
1630
1631    PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1632    LIMIT is the end of the string or charconst.  PSTR is updated to
1633    point after the UCS on return, and the UCS is written into PC.  */
1634
1635 static int
1636 maybe_read_ucs (pfile, pstr, limit, pc)
1637      cpp_reader *pfile;
1638      const unsigned char **pstr;
1639      const unsigned char *limit;
1640      unsigned int *pc;
1641 {
1642   const unsigned char *p = *pstr;
1643   unsigned int code = 0;
1644   unsigned int c = *pc, length;
1645
1646   /* Only attempt to interpret a UCS for C++ and C99.  */
1647   if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1648     return 1;
1649
1650   if (CPP_WTRADITIONAL (pfile))
1651     cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
1652
1653   length = (c == 'u' ? 4: 8);
1654
1655   if ((size_t) (limit - p) < length)
1656     {
1657       cpp_error (pfile, "incomplete universal-character-name");
1658       /* Skip to the end to avoid more diagnostics.  */
1659       p = limit;
1660     }
1661   else
1662     {
1663       for (; length; length--, p++)
1664         {
1665           c = *p;
1666           if (ISXDIGIT (c))
1667             code = (code << 4) + hex_digit_value (c);
1668           else
1669             {
1670               cpp_error (pfile,
1671                          "non-hex digit '%c' in universal-character-name", c);
1672               /* We shouldn't skip in case there are multibyte chars.  */
1673               break;
1674             }
1675         }
1676     }
1677
1678 #ifdef TARGET_EBCDIC
1679   cpp_error (pfile, "universal-character-name on EBCDIC target");
1680   code = 0x3f;  /* EBCDIC invalid character */
1681 #else
1682  /* True extended characters are OK.  */
1683   if (code >= 0xa0
1684       && !(code & 0x80000000)
1685       && !(code >= 0xD800 && code <= 0xDFFF))
1686     ;
1687   /* The standard permits $, @ and ` to be specified as UCNs.  We use
1688      hex escapes so that this also works with EBCDIC hosts.  */
1689   else if (code == 0x24 || code == 0x40 || code == 0x60)
1690     ;
1691   /* Don't give another error if one occurred above.  */
1692   else if (length == 0)
1693     cpp_error (pfile, "universal-character-name out of range");
1694 #endif
1695
1696   *pstr = p;
1697   *pc = code;
1698   return 0;
1699 }
1700
1701 /* Interpret an escape sequence, and return its value.  PSTR points to
1702    the input pointer, which is just after the backslash.  LIMIT is how
1703    much text we have.  MASK is a bitmask for the precision for the
1704    destination type (char or wchar_t).  TRADITIONAL, if true, does not
1705    interpret escapes that did not exist in traditional C.
1706
1707    Handles all relevant diagnostics.  */
1708
1709 unsigned int
1710 cpp_parse_escape (pfile, pstr, limit, mask, traditional)
1711      cpp_reader *pfile;
1712      const unsigned char **pstr;
1713      const unsigned char *limit;
1714      unsigned HOST_WIDE_INT mask;
1715      int traditional;
1716 {
1717   int unknown = 0;
1718   const unsigned char *str = *pstr;
1719   unsigned int c = *str++;
1720
1721   switch (c)
1722     {
1723     case '\\': case '\'': case '"': case '?': break;
1724     case 'b': c = TARGET_BS;      break;
1725     case 'f': c = TARGET_FF;      break;
1726     case 'n': c = TARGET_NEWLINE; break;
1727     case 'r': c = TARGET_CR;      break;
1728     case 't': c = TARGET_TAB;     break;
1729     case 'v': c = TARGET_VT;      break;
1730
1731     case '(': case '{': case '[': case '%':
1732       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1733          '\%' is used to prevent SCCS from getting confused.  */
1734       unknown = CPP_PEDANTIC (pfile);
1735       break;
1736
1737     case 'a':
1738       if (CPP_WTRADITIONAL (pfile))
1739         cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1740       if (!traditional)
1741         c = TARGET_BELL;
1742       break;
1743
1744     case 'e': case 'E':
1745       if (CPP_PEDANTIC (pfile))
1746         cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1747       c = TARGET_ESC;
1748       break;
1749
1750     case 'u': case 'U':
1751       unknown = maybe_read_ucs (pfile, &str, limit, &c);
1752       break;
1753
1754     case 'x':
1755       if (CPP_WTRADITIONAL (pfile))
1756         cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1757
1758       if (!traditional)
1759         {
1760           unsigned int i = 0, overflow = 0;
1761           int digits_found = 0;
1762
1763           while (str < limit)
1764             {
1765               c = *str;
1766               if (! ISXDIGIT (c))
1767                 break;
1768               str++;
1769               overflow |= i ^ (i << 4 >> 4);
1770               i = (i << 4) + hex_digit_value (c);
1771               digits_found = 1;
1772             }
1773
1774           if (!digits_found)
1775             cpp_error (pfile, "\\x used with no following hex digits");
1776
1777           if (overflow | (i != (i & mask)))
1778             {
1779               cpp_pedwarn (pfile, "hex escape sequence out of range");
1780               i &= mask;
1781             }
1782           c = i;
1783         }
1784       break;
1785
1786     case '0':  case '1':  case '2':  case '3':
1787     case '4':  case '5':  case '6':  case '7':
1788       {
1789         unsigned int i = c - '0';
1790         int count = 0;
1791
1792         while (str < limit && ++count < 3)
1793           {
1794             c = *str;
1795             if (c < '0' || c > '7')
1796               break;
1797             str++;
1798             i = (i << 3) + c - '0';
1799           }
1800
1801         if (i != (i & mask))
1802           {
1803             cpp_pedwarn (pfile, "octal escape sequence out of range");
1804             i &= mask;
1805           }
1806         c = i;
1807       }
1808       break;
1809
1810     default:
1811       unknown = 1;
1812       break;
1813     }
1814
1815   if (unknown)
1816     {
1817       if (ISGRAPH (c))
1818         cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1819       else
1820         cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1821     }
1822
1823   if (c > mask)
1824     cpp_pedwarn (pfile, "escape sequence out of range for character");
1825
1826   *pstr = str;
1827   return c;
1828 }
1829
1830 #ifndef MAX_CHAR_TYPE_SIZE
1831 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1832 #endif
1833
1834 #ifndef MAX_WCHAR_TYPE_SIZE
1835 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1836 #endif
1837
1838 /* Interpret a (possibly wide) character constant in TOKEN.
1839    WARN_MULTI warns about multi-character charconsts, if not
1840    TRADITIONAL.  TRADITIONAL also indicates not to interpret escapes
1841    that did not exist in traditional C.  PCHARS_SEEN points to a
1842    variable that is filled in with the number of characters seen.  */
1843 HOST_WIDE_INT
1844 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1845      cpp_reader *pfile;
1846      const cpp_token *token;
1847      int warn_multi;
1848      int traditional;
1849      unsigned int *pchars_seen;
1850 {
1851   const unsigned char *str = token->val.str.text;
1852   const unsigned char *limit = str + token->val.str.len;
1853   unsigned int chars_seen = 0;
1854   unsigned int width, max_chars, c;
1855   unsigned HOST_WIDE_INT mask;
1856   HOST_WIDE_INT result = 0;
1857
1858 #ifdef MULTIBYTE_CHARS
1859   (void) local_mbtowc (NULL, NULL, 0);
1860 #endif
1861
1862   /* Width in bits.  */
1863   if (token->type == CPP_CHAR)
1864     width = MAX_CHAR_TYPE_SIZE;
1865   else
1866     width = MAX_WCHAR_TYPE_SIZE;
1867
1868   if (width < HOST_BITS_PER_WIDE_INT)
1869     mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1870   else
1871     mask = ~0;
1872   max_chars = HOST_BITS_PER_WIDE_INT / width;
1873
1874   while (str < limit)
1875     {
1876 #ifdef MULTIBYTE_CHARS
1877       wchar_t wc;
1878       int char_len;
1879
1880       char_len = local_mbtowc (&wc, str, limit - str);
1881       if (char_len == -1)
1882         {
1883           cpp_warning (pfile, "ignoring invalid multibyte character");
1884           c = *str++;
1885         }
1886       else
1887         {
1888           str += char_len;
1889           c = wc;
1890         }
1891 #else
1892       c = *str++;
1893 #endif
1894
1895       if (c == '\\')
1896         c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
1897
1898 #ifdef MAP_CHARACTER
1899       if (ISPRINT (c))
1900         c = MAP_CHARACTER (c);
1901 #endif
1902
1903       /* Merge character into result; ignore excess chars.  */
1904       if (++chars_seen <= max_chars)
1905         {
1906           if (width < HOST_BITS_PER_WIDE_INT)
1907             result = (result << width) | (c & mask);
1908           else
1909             result = c;
1910         }
1911     }
1912
1913   if (chars_seen == 0)
1914     cpp_error (pfile, "empty character constant");
1915   else if (chars_seen > max_chars)
1916     {
1917       chars_seen = max_chars;
1918       cpp_warning (pfile, "character constant too long");
1919     }
1920   else if (chars_seen > 1 && !traditional && warn_multi)
1921     cpp_warning (pfile, "multi-character character constant");
1922
1923   /* If char type is signed, sign-extend the constant.  The
1924      __CHAR_UNSIGNED__ macro is set by the driver if appropriate.  */
1925   if (token->type == CPP_CHAR && chars_seen)
1926     {
1927       unsigned int nbits = chars_seen * width;
1928       unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
1929
1930       if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
1931           || ((result >> (nbits - 1)) & 1) == 0)
1932         result &= mask;
1933       else
1934         result |= ~mask;
1935     }
1936
1937   *pchars_seen = chars_seen;
1938   return result;
1939 }
1940
1941 /* Memory buffers.  Changing these three constants can have a dramatic
1942    effect on performance.  The values here are reasonable defaults,
1943    but might be tuned.  If you adjust them, be sure to test across a
1944    range of uses of cpplib, including heavy nested function-like macro
1945    expansion.  Also check the change in peak memory usage (NJAMD is a
1946    good tool for this).  */
1947 #define MIN_BUFF_SIZE 8000
1948 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1949 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1950         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1951
1952 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1953   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1954 #endif
1955
1956 struct dummy
1957 {
1958   char c;
1959   union
1960   {
1961     double d;
1962     int *p;
1963   } u;
1964 };
1965
1966 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
1967 #define CPP_ALIGN(size, align) (((size) + ((align) - 1)) & ~((align) - 1))
1968
1969 /* Create a new allocation buffer.  Place the control block at the end
1970    of the buffer, so that buffer overflows will cause immediate chaos.  */
1971 static _cpp_buff *
1972 new_buff (len)
1973      size_t len;
1974 {
1975   _cpp_buff *result;
1976   unsigned char *base;
1977
1978   if (len < MIN_BUFF_SIZE)
1979     len = MIN_BUFF_SIZE;
1980   len = CPP_ALIGN (len, DEFAULT_ALIGNMENT);
1981
1982   base = xmalloc (len + sizeof (_cpp_buff));
1983   result = (_cpp_buff *) (base + len);
1984   result->base = base;
1985   result->cur = base;
1986   result->limit = base + len;
1987   result->next = NULL;
1988   return result;
1989 }
1990
1991 /* Place a chain of unwanted allocation buffers on the free list.  */
1992 void
1993 _cpp_release_buff (pfile, buff)
1994      cpp_reader *pfile;
1995      _cpp_buff *buff;
1996 {
1997   _cpp_buff *end = buff;
1998
1999   while (end->next)
2000     end = end->next;
2001   end->next = pfile->free_buffs;
2002   pfile->free_buffs = buff;
2003 }
2004
2005 /* Return a free buffer of size at least MIN_SIZE.  */
2006 _cpp_buff *
2007 _cpp_get_buff (pfile, min_size)
2008      cpp_reader *pfile;
2009      size_t min_size;
2010 {
2011   _cpp_buff *result, **p;
2012
2013   for (p = &pfile->free_buffs;; p = &(*p)->next)
2014     {
2015       size_t size;
2016
2017       if (*p == NULL)
2018         return new_buff (min_size);
2019       result = *p;
2020       size = result->limit - result->base;
2021       /* Return a buffer that's big enough, but don't waste one that's
2022          way too big.  */
2023       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2024         break;
2025     }
2026
2027   *p = result->next;
2028   result->next = NULL;
2029   result->cur = result->base;
2030   return result;
2031 }
2032
2033 /* Creates a new buffer with enough space to hold the uncommitted
2034    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
2035    the excess bytes to the new buffer.  Chains the new buffer after
2036    BUFF, and returns the new buffer.  */
2037 _cpp_buff *
2038 _cpp_append_extend_buff (pfile, buff, min_extra)
2039      cpp_reader *pfile;
2040      _cpp_buff *buff;
2041      size_t min_extra;
2042 {
2043   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2044   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2045
2046   buff->next = new_buff;
2047   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2048   return new_buff;
2049 }
2050
2051 /* Creates a new buffer with enough space to hold the uncommitted
2052    remaining bytes of the buffer pointed to by BUFF, and at least
2053    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
2054    Chains the new buffer before the buffer pointed to by BUFF, and
2055    updates the pointer to point to the new buffer.  */
2056 void
2057 _cpp_extend_buff (pfile, pbuff, min_extra)
2058      cpp_reader *pfile;
2059      _cpp_buff **pbuff;
2060      size_t min_extra;
2061 {
2062   _cpp_buff *new_buff, *old_buff = *pbuff;
2063   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2064
2065   new_buff = _cpp_get_buff (pfile, size);
2066   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2067   new_buff->next = old_buff;
2068   *pbuff = new_buff;
2069 }
2070
2071 /* Free a chain of buffers starting at BUFF.  */
2072 void
2073 _cpp_free_buff (buff)
2074      _cpp_buff *buff;
2075 {
2076   _cpp_buff *next;
2077
2078   for (; buff; buff = next)
2079     {
2080       next = buff->next;
2081       free (buff->base);
2082     }
2083 }
2084
2085 /* Allocate permanent, unaligned storage of length LEN.  */
2086 unsigned char *
2087 _cpp_unaligned_alloc (pfile, len)
2088      cpp_reader *pfile;
2089      size_t len;
2090 {
2091   _cpp_buff *buff = pfile->u_buff;
2092   unsigned char *result = buff->cur;
2093
2094   if (len > (size_t) (buff->limit - result))
2095     {
2096       buff = _cpp_get_buff (pfile, len);
2097       buff->next = pfile->u_buff;
2098       pfile->u_buff = buff;
2099       result = buff->cur;
2100     }
2101
2102   buff->cur = result + len;
2103   return result;
2104 }
2105
2106 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2107    That buffer is used for growing allocations when saving macro
2108    replacement lists in a #define, and when parsing an answer to an
2109    assertion in #assert, #unassert or #if (and therefore possibly
2110    whilst expanding macros).  It therefore must not be used by any
2111    code that they might call: specifically the lexer and the guts of
2112    the macro expander.
2113
2114    All existing other uses clearly fit this restriction: storing
2115    registered pragmas during initialization.  */
2116 unsigned char *
2117 _cpp_aligned_alloc (pfile, len)
2118      cpp_reader *pfile;
2119      size_t len;
2120 {
2121   _cpp_buff *buff = pfile->a_buff;
2122   unsigned char *result = buff->cur;
2123
2124   if (len > (size_t) (buff->limit - result))
2125     {
2126       buff = _cpp_get_buff (pfile, len);
2127       buff->next = pfile->a_buff;
2128       pfile->a_buff = buff;
2129       result = buff->cur;
2130     }
2131
2132   buff->cur = result + len;
2133   return result;
2134 }