gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 /* This lexer works with a single pass of the file.  Recently I
  24    re-wrote it to minimize the places where we step backwards in the
  25    input stream, to make future changes to support multi-byte
  26    character sets fairly straight-forward.
  27
  28    There is now only one routine where we do step backwards:
  29    skip_escaped_newlines.  This routine could probably also be changed
  30    so that it doesn't need to step back.  One possibility is to use a
  31    trick similar to that used in lex_period and lex_percent.  Two
  32    extra characters might be needed, but skip_escaped_newlines itself
  33    would probably be the only place that needs to be aware of that,
  34    and changes to the remaining routines would probably only be needed
  35    if they process a backslash.  */
  36
  37 #include "config.h"
  38 #include "system.h"
  39 #include "cpplib.h"
  40 #include "cpphash.h"
  41
  42 /* MULTIBYTE_CHARS support only works for native compilers.
  43    ??? Ideally what we want is to model widechar support after
  44    the current floating point support.  */
  45 #ifdef CROSS_COMPILE
  46 #undef MULTIBYTE_CHARS
  47 #endif
  48
  49 #ifdef MULTIBYTE_CHARS
  50 #include "mbchar.h"
  51 #include <locale.h>
  52 #endif
  53
  54 /* Tokens with SPELL_STRING store their spelling in the token list,
  55    and it's length in the token->val.name.len.  */
  56 enum spell_type
  57 {
  58   SPELL_OPERATOR = 0,
  59   SPELL_CHAR,
  60   SPELL_IDENT,
  61   SPELL_STRING,
  62   SPELL_NONE
  63 };
  64
  65 struct token_spelling
  66 {
  67   enum spell_type category;
  68   const unsigned char *name;
  69 };
  70
  71 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
  72                                              U":>", U"<%", U"%>"};
  73
  74 #define OP(e, s) { SPELL_OPERATOR, U s           },
  75 #define TK(e, s) { s,              U STRINGX (e) },
  76 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
  77 #undef OP
  78 #undef TK
  79
  80 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  81 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  82
  83 static cppchar_t handle_newline PARAMS ((cpp_buffer *, cppchar_t));
  84 static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
  85 static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
  86
  87 static int skip_block_comment PARAMS ((cpp_reader *));
  88 static int skip_line_comment PARAMS ((cpp_reader *));
  89 static void adjust_column PARAMS ((cpp_reader *));
  90 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  91 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
  92 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
  93 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
  94 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  95 static void unterminated PARAMS ((cpp_reader *, int));
  96 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
  97 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
  98 static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
  99 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
 100 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
 101 static unsigned int parse_escape PARAMS ((cpp_reader *, const unsigned char **,
 102                                           const unsigned char *, HOST_WIDE_INT,
 103                                           int));
 104 static unsigned int read_ucs PARAMS ((cpp_reader *, const unsigned char **,
 105                                       const unsigned char *, unsigned int));
 106
 107 static cpp_chunk *new_chunk PARAMS ((unsigned int));
 108 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
 109 static unsigned int hex_digit_value PARAMS ((unsigned int));
 110
 111 /* Utility routine:
 112
 113    Compares, the token TOKEN to the NUL-terminated string STRING.
 114    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
 115
 116 int
 117 cpp_ideq (token, string)
 118      const cpp_token *token;
 119      const char *string;
 120 {
 121   if (token->type != CPP_NAME)
 122     return 0;
 123
 124   return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
 125 }
 126
 127 /* Call when meeting a newline.  Returns the character after the newline
 128    (or carriage-return newline combination), or EOF.  */
 129 static cppchar_t
 130 handle_newline (buffer, newline_char)
 131      cpp_buffer *buffer;
 132      cppchar_t newline_char;
 133 {
 134   cppchar_t next = EOF;
 135
 136   buffer->col_adjust = 0;
 137   buffer->lineno++;
 138   buffer->line_base = buffer->cur;
 139
 140   /* Handle CR-LF and LF-CR combinations, get the next character.  */
 141   if (buffer->cur < buffer->rlimit)
 142     {
 143       next = *buffer->cur++;
 144       if (next + newline_char == '\r' + '\n')
 145         {
 146           buffer->line_base = buffer->cur;
 147           if (buffer->cur < buffer->rlimit)
 148             next = *buffer->cur++;
 149           else
 150             next = EOF;
 151         }
 152     }
 153
 154   buffer->read_ahead = next;
 155   return next;
 156 }
 157
 158 /* Subroutine of skip_escaped_newlines; called when a trigraph is
 159    encountered.  It warns if necessary, and returns true if the
 160    trigraph should be honoured.  FROM_CHAR is the third character of a
 161    trigraph, and presumed to be the previous character for position
 162    reporting.  */
 163 static int
 164 trigraph_ok (pfile, from_char)
 165      cpp_reader *pfile;
 166      cppchar_t from_char;
 167 {
 168   int accept = CPP_OPTION (pfile, trigraphs);
 169
 170   /* Don't warn about trigraphs in comments.  */
 171   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 172     {
 173       cpp_buffer *buffer = pfile->buffer;
 174       if (accept)
 175         cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
 176                                "trigraph ??%c converted to %c",
 177                                (int) from_char,
 178                                (int) _cpp_trigraph_map[from_char]);
 179       else if (buffer->cur != buffer->last_Wtrigraphs)
 180         {
 181           buffer->last_Wtrigraphs = buffer->cur;
 182           cpp_warning_with_line (pfile, buffer->lineno,
 183                                  CPP_BUF_COL (buffer) - 2,
 184                                  "trigraph ??%c ignored", (int) from_char);
 185         }
 186     }
 187
 188   return accept;
 189 }
 190
 191 /* Assumes local variables buffer and result.  */
 192 #define ACCEPT_CHAR(t) \
 193   do { result->type = t; buffer->read_ahead = EOF; } while (0)
 194
 195 /* When we move to multibyte character sets, add to these something
 196    that saves and restores the state of the multibyte conversion
 197    library.  This probably involves saving and restoring a "cookie".
 198    In the case of glibc it is an 8-byte structure, so is not a high
 199    overhead operation.  In any case, it's out of the fast path.  */
 200 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
 201 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
 202
 203 /* Skips any escaped newlines introduced by NEXT, which is either a
 204    '?' or a '\\'.  Returns the next character, which will also have
 205    been placed in buffer->read_ahead.  This routine performs
 206    preprocessing stages 1 and 2 of the ISO C standard.  */
 207 static cppchar_t
 208 skip_escaped_newlines (buffer, next)
 209      cpp_buffer *buffer;
 210      cppchar_t next;
 211 {
 212   /* Only do this if we apply stages 1 and 2.  */
 213   if (!buffer->from_stage3)
 214     {
 215       cppchar_t next1;
 216       const unsigned char *saved_cur;
 217       int space;
 218
 219       do
 220         {
 221           if (buffer->cur == buffer->rlimit)
 222             break;
 223
 224           SAVE_STATE ();
 225           if (next == '?')
 226             {
 227               next1 = *buffer->cur++;
 228               if (next1 != '?' || buffer->cur == buffer->rlimit)
 229                 {
 230                   RESTORE_STATE ();
 231                   break;
 232                 }
 233
 234               next1 = *buffer->cur++;
 235               if (!_cpp_trigraph_map[next1]
 236                   || !trigraph_ok (buffer->pfile, next1))
 237                 {
 238                   RESTORE_STATE ();
 239                   break;
 240                 }
 241
 242               /* We have a full trigraph here.  */
 243               next = _cpp_trigraph_map[next1];
 244               if (next != '\\' || buffer->cur == buffer->rlimit)
 245                 break;
 246               SAVE_STATE ();
 247             }
 248
 249           /* We have a backslash, and room for at least one more character.  */
 250           space = 0;
 251           do
 252             {
 253               next1 = *buffer->cur++;
 254               if (!is_nvspace (next1))
 255                 break;
 256               space = 1;
 257             }
 258           while (buffer->cur < buffer->rlimit);
 259
 260           if (!is_vspace (next1))
 261             {
 262               RESTORE_STATE ();
 263               break;
 264             }
 265
 266           if (space && !buffer->pfile->state.lexing_comment)
 267             cpp_warning (buffer->pfile,
 268                          "backslash and newline separated by space");
 269
 270           next = handle_newline (buffer, next1);
 271           if (next == EOF)
 272             cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
 273         }
 274       while (next == '\\' || next == '?');
 275     }
 276
 277   buffer->read_ahead = next;
 278   return next;
 279 }
 280
 281 /* Obtain the next character, after trigraph conversion and skipping
 282    an arbitrary string of escaped newlines.  The common case of no
 283    trigraphs or escaped newlines falls through quickly.  */
 284 static cppchar_t
 285 get_effective_char (buffer)
 286      cpp_buffer *buffer;
 287 {
 288   cppchar_t next = EOF;
 289
 290   if (buffer->cur < buffer->rlimit)
 291     {
 292       next = *buffer->cur++;
 293
 294       /* '?' can introduce trigraphs (and therefore backslash); '\\'
 295          can introduce escaped newlines, which we want to skip, or
 296          UCNs, which, depending upon lexer state, we will handle in
 297          the future.  */
 298       if (next == '?' || next == '\\')
 299         next = skip_escaped_newlines (buffer, next);
 300     }
 301
 302   buffer->read_ahead = next;
 303   return next;
 304 }
 305
 306 /* Skip a C-style block comment.  We find the end of the comment by
 307    seeing if an asterisk is before every '/' we encounter.  Returns
 308    non-zero if comment terminated by EOF, zero otherwise.  */
 309 static int
 310 skip_block_comment (pfile)
 311      cpp_reader *pfile;
 312 {
 313   cpp_buffer *buffer = pfile->buffer;
 314   cppchar_t c = EOF, prevc = EOF;
 315
 316   pfile->state.lexing_comment = 1;
 317   while (buffer->cur != buffer->rlimit)
 318     {
 319       prevc = c, c = *buffer->cur++;
 320
 321     next_char:
 322       /* FIXME: For speed, create a new character class of characters
 323          of interest inside block comments.  */
 324       if (c == '?' || c == '\\')
 325         c = skip_escaped_newlines (buffer, c);
 326
 327       /* People like decorating comments with '*', so check for '/'
 328          instead for efficiency.  */
 329       if (c == '/')
 330         {
 331           if (prevc == '*')
 332             break;
 333
 334           /* Warn about potential nested comments, but not if the '/'
 335              comes immediately before the true comment delimeter.
 336              Don't bother to get it right across escaped newlines.  */
 337           if (CPP_OPTION (pfile, warn_comments)
 338               && buffer->cur != buffer->rlimit)
 339             {
 340               prevc = c, c = *buffer->cur++;
 341               if (c == '*' && buffer->cur != buffer->rlimit)
 342                 {
 343                   prevc = c, c = *buffer->cur++;
 344                   if (c != '/')
 345                     cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
 346                                            CPP_BUF_COL (buffer),
 347                                            "\"/*\" within comment");
 348                 }
 349               goto next_char;
 350             }
 351         }
 352       else if (is_vspace (c))
 353         {
 354           prevc = c, c = handle_newline (buffer, c);
 355           goto next_char;
 356         }
 357       else if (c == '\t')
 358         adjust_column (pfile);
 359     }
 360
 361   pfile->state.lexing_comment = 0;
 362   buffer->read_ahead = EOF;
 363   return c != '/' || prevc != '*';
 364 }
 365
 366 /* Skip a C++ line comment.  Handles escaped newlines.  Returns
 367    non-zero if a multiline comment.  The following new line, if any,
 368    is left in buffer->read_ahead.  */
 369 static int
 370 skip_line_comment (pfile)
 371      cpp_reader *pfile;
 372 {
 373   cpp_buffer *buffer = pfile->buffer;
 374   unsigned int orig_lineno = buffer->lineno;
 375   cppchar_t c;
 376
 377   pfile->state.lexing_comment = 1;
 378   do
 379     {
 380       c = EOF;
 381       if (buffer->cur == buffer->rlimit)
 382         break;
 383
 384       c = *buffer->cur++;
 385       if (c == '?' || c == '\\')
 386         c = skip_escaped_newlines (buffer, c);
 387     }
 388   while (!is_vspace (c));
 389
 390   pfile->state.lexing_comment = 0;
 391   buffer->read_ahead = c;       /* Leave any newline for caller.  */
 392   return orig_lineno != buffer->lineno;
 393 }
 394
 395 /* pfile->buffer->cur is one beyond the \t character.  Update
 396    col_adjust so we track the column correctly.  */
 397 static void
 398 adjust_column (pfile)
 399      cpp_reader *pfile;
 400 {
 401   cpp_buffer *buffer = pfile->buffer;
 402   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 403
 404   /* Round it up to multiple of the tabstop, but subtract 1 since the
 405      tab itself occupies a character position.  */
 406   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 407                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 408 }
 409
 410 /* Skips whitespace, saving the next non-whitespace character.
 411    Adjusts pfile->col_adjust to account for tabs.  Without this,
 412    tokens might be assigned an incorrect column.  */
 413 static void
 414 skip_whitespace (pfile, c)
 415      cpp_reader *pfile;
 416      cppchar_t c;
 417 {
 418   cpp_buffer *buffer = pfile->buffer;
 419   unsigned int warned = 0;
 420
 421   do
 422     {
 423       /* Horizontal space always OK.  */
 424       if (c == ' ')
 425         ;
 426       else if (c == '\t')
 427         adjust_column (pfile);
 428       /* Just \f \v or \0 left.  */
 429       else if (c == '\0')
 430         {
 431           if (!warned)
 432             {
 433               cpp_warning (pfile, "null character(s) ignored");
 434               warned = 1;
 435             }
 436         }
 437       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 438         cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
 439                                CPP_BUF_COL (buffer),
 440                                "%s in preprocessing directive",
 441                                c == '\f' ? "form feed" : "vertical tab");
 442
 443       c = EOF;
 444       if (buffer->cur == buffer->rlimit)
 445         break;
 446       c = *buffer->cur++;
 447     }
 448   /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
 449   while (is_nvspace (c));
 450
 451   /* Remember the next character.  */
 452   buffer->read_ahead = c;
 453 }
 454
 455 /* See if the characters of a number token are valid in a name (no
 456    '.', '+' or '-').  */
 457 static int
 458 name_p (pfile, string)
 459      cpp_reader *pfile;
 460      const cpp_string *string;
 461 {
 462   unsigned int i;
 463
 464   for (i = 0; i < string->len; i++)
 465     if (!is_idchar (string->text[i]))
 466       return 0;
 467
 468   return 1;
 469 }
 470
 471 /* Parse an identifier, skipping embedded backslash-newlines.
 472    Calculate the hash value of the token while parsing, for improved
 473    performance.  The hashing algorithm *must* match cpp_lookup().  */
 474
 475 static cpp_hashnode *
 476 parse_identifier (pfile, c)
 477      cpp_reader *pfile;
 478      cppchar_t c;
 479 {
 480   cpp_hashnode *result;
 481   cpp_buffer *buffer = pfile->buffer;
 482   unsigned char *dest, *limit;
 483   unsigned int r = 0, saw_dollar = 0;
 484
 485   dest = POOL_FRONT (&pfile->ident_pool);
 486   limit = POOL_LIMIT (&pfile->ident_pool);
 487
 488   do
 489     {
 490       do
 491         {
 492           /* Need room for terminating null.  */
 493           if (dest + 1 >= limit)
 494             limit = _cpp_next_chunk (&pfile->ident_pool, 0, &dest);
 495
 496           *dest++ = c;
 497           r = HASHSTEP (r, c);
 498
 499           if (c == '$')
 500             saw_dollar++;
 501
 502           c = EOF;
 503           if (buffer->cur == buffer->rlimit)
 504             break;
 505
 506           c = *buffer->cur++;
 507         }
 508       while (is_idchar (c));
 509
 510       /* Potential escaped newline?  */
 511       if (c != '?' && c != '\\')
 512         break;
 513       c = skip_escaped_newlines (buffer, c);
 514     }
 515   while (is_idchar (c));
 516
 517   /* Remember the next character.  */
 518   buffer->read_ahead = c;
 519
 520   /* $ is not a identifier character in the standard, but is commonly
 521      accepted as an extension.  Don't warn about it in skipped
 522      conditional blocks.  */
 523   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->skipping)
 524     cpp_pedwarn (pfile, "'$' character(s) in identifier");
 525
 526   /* Identifiers are null-terminated.  */
 527   *dest = '\0';
 528
 529   /* This routine commits the memory if necessary.  */
 530   result = _cpp_lookup_with_hash (pfile,
 531                                   dest - POOL_FRONT (&pfile->ident_pool), r);
 532
 533   /* Some identifiers require diagnostics when lexed.  */
 534   if (result->flags & NODE_DIAGNOSTIC && !pfile->skipping)
 535     {
 536       /* It is allowed to poison the same identifier twice.  */
 537       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 538         cpp_error (pfile, "attempt to use poisoned \"%s\"",
 539                    NODE_NAME (result));
 540
 541       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 542          replacement list of a variadic macro.  */
 543       if (result == pfile->spec_nodes.n__VA_ARGS__
 544           && !pfile->state.va_args_ok)
 545         cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 546     }
 547
 548   return result;
 549 }
 550
 551 /* Parse a number, skipping embedded backslash-newlines.  */
 552 static void
 553 parse_number (pfile, number, c, leading_period)
 554      cpp_reader *pfile;
 555      cpp_string *number;
 556      cppchar_t c;
 557      int leading_period;
 558 {
 559   cpp_buffer *buffer = pfile->buffer;
 560   cpp_pool *pool = &pfile->ident_pool;
 561   unsigned char *dest, *limit;
 562
 563   dest = POOL_FRONT (pool);
 564   limit = POOL_LIMIT (pool);
 565
 566   /* Place a leading period.  */
 567   if (leading_period)
 568     {
 569       if (dest >= limit)
 570         limit = _cpp_next_chunk (pool, 0, &dest);
 571       *dest++ = '.';
 572     }
 573
 574   do
 575     {
 576       do
 577         {
 578           /* Need room for terminating null.  */
 579           if (dest + 1 >= limit)
 580             limit = _cpp_next_chunk (pool, 0, &dest);
 581           *dest++ = c;
 582
 583           c = EOF;
 584           if (buffer->cur == buffer->rlimit)
 585             break;
 586
 587           c = *buffer->cur++;
 588         }
 589       while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 590
 591       /* Potential escaped newline?  */
 592       if (c != '?' && c != '\\')
 593         break;
 594       c = skip_escaped_newlines (buffer, c);
 595     }
 596   while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 597
 598   /* Remember the next character.  */
 599   buffer->read_ahead = c;
 600
 601   /* Null-terminate the number.  */
 602   *dest = '\0';
 603
 604   number->text = POOL_FRONT (pool);
 605   number->len = dest - number->text;
 606   POOL_COMMIT (pool, number->len + 1);
 607 }
 608
 609 /* Subroutine of parse_string.  Emits error for unterminated strings.  */
 610 static void
 611 unterminated (pfile, term)
 612      cpp_reader *pfile;
 613      int term;
 614 {
 615   cpp_error (pfile, "missing terminating %c character", term);
 616
 617   if (term == '\"' && pfile->mlstring_pos.line
 618       && pfile->mlstring_pos.line != pfile->lexer_pos.line)
 619     {
 620       cpp_error_with_line (pfile, pfile->mlstring_pos.line,
 621                            pfile->mlstring_pos.col,
 622                            "possible start of unterminated string literal");
 623       pfile->mlstring_pos.line = 0;
 624     }
 625 }
 626
 627 /* Subroutine of parse_string.  */
 628 static int
 629 unescaped_terminator_p (pfile, dest)
 630      cpp_reader *pfile;
 631      const unsigned char *dest;
 632 {
 633   const unsigned char *start, *temp;
 634
 635   /* In #include-style directives, terminators are not escapeable.  */
 636   if (pfile->state.angled_headers)
 637     return 1;
 638
 639   start = POOL_FRONT (&pfile->ident_pool);
 640
 641   /* An odd number of consecutive backslashes represents an escaped
 642      terminator.  */
 643   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 644     ;
 645
 646   return ((dest - temp) & 1) == 0;
 647 }
 648
 649 /* Parses a string, character constant, or angle-bracketed header file
 650    name.  Handles embedded trigraphs and escaped newlines.  The stored
 651    string is guaranteed NUL-terminated, but it is not guaranteed that
 652    this is the first NUL since embedded NULs are preserved.
 653
 654    Multi-line strings are allowed, but they are deprecated.  */
 655 static void
 656 parse_string (pfile, token, terminator)
 657      cpp_reader *pfile;
 658      cpp_token *token;
 659      cppchar_t terminator;
 660 {
 661   cpp_buffer *buffer = pfile->buffer;
 662   cpp_pool *pool = &pfile->ident_pool;
 663   unsigned char *dest, *limit;
 664   cppchar_t c;
 665   unsigned int nulls = 0;
 666
 667   dest = POOL_FRONT (pool);
 668   limit = POOL_LIMIT (pool);
 669
 670   for (;;)
 671     {
 672       if (buffer->cur == buffer->rlimit)
 673         c = EOF;
 674       else
 675         c = *buffer->cur++;
 676
 677     have_char:
 678       /* We need space for the terminating NUL.  */
 679       if (dest >= limit)
 680         limit = _cpp_next_chunk (pool, 0, &dest);
 681
 682       if (c == EOF)
 683         {
 684           unterminated (pfile, terminator);
 685           break;
 686         }
 687
 688       /* Handle trigraphs, escaped newlines etc.  */
 689       if (c == '?' || c == '\\')
 690         c = skip_escaped_newlines (buffer, c);
 691
 692       if (c == terminator && unescaped_terminator_p (pfile, dest))
 693         {
 694           c = EOF;
 695           break;
 696         }
 697       else if (is_vspace (c))
 698         {
 699           /* In assembly language, silently terminate string and
 700              character literals at end of line.  This is a kludge
 701              around not knowing where comments are.  */
 702           if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
 703             break;
 704
 705           /* Character constants and header names may not extend over
 706              multiple lines.  In Standard C, neither may strings.
 707              Unfortunately, we accept multiline strings as an
 708              extension, except in #include family directives.  */
 709           if (terminator != '"' || pfile->state.angled_headers)
 710             {
 711               unterminated (pfile, terminator);
 712               break;
 713             }
 714
 715           cpp_pedwarn (pfile, "multi-line string literals are deprecated");
 716           if (pfile->mlstring_pos.line == 0)
 717             pfile->mlstring_pos = pfile->lexer_pos;
 718
 719           c = handle_newline (buffer, c);
 720           *dest++ = '\n';
 721           goto have_char;
 722         }
 723       else if (c == '\0')
 724         {
 725           if (nulls++ == 0)
 726             cpp_warning (pfile, "null character(s) preserved in literal");
 727         }
 728
 729       *dest++ = c;
 730     }
 731
 732   /* Remember the next character.  */
 733   buffer->read_ahead = c;
 734   *dest = '\0';
 735
 736   token->val.str.text = POOL_FRONT (pool);
 737   token->val.str.len = dest - token->val.str.text;
 738   POOL_COMMIT (pool, token->val.str.len + 1);
 739 }
 740
 741 /* The stored comment includes the comment start and any terminator.  */
 742 static void
 743 save_comment (pfile, token, from)
 744      cpp_reader *pfile;
 745      cpp_token *token;
 746      const unsigned char *from;
 747 {
 748   unsigned char *buffer;
 749   unsigned int len;
 750
 751   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 752   /* C++ comments probably (not definitely) have moved past a new
 753      line, which we don't want to save in the comment.  */
 754   if (pfile->buffer->read_ahead != EOF)
 755     len--;
 756   buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
 757
 758   token->type = CPP_COMMENT;
 759   token->val.str.len = len;
 760   token->val.str.text = buffer;
 761
 762   buffer[0] = '/';
 763   memcpy (buffer + 1, from, len - 1);
 764 }
 765
 766 /* Subroutine of lex_token to handle '%'.  A little tricky, since we
 767    want to avoid stepping back when lexing %:%X.  */
 768 static void
 769 lex_percent (buffer, result)
 770      cpp_buffer *buffer;
 771      cpp_token *result;
 772 {
 773   cppchar_t c;
 774
 775   result->type = CPP_MOD;
 776   /* Parsing %:%X could leave an extra character.  */
 777   if (buffer->extra_char == EOF)
 778     c = get_effective_char (buffer);
 779   else
 780     {
 781       c = buffer->read_ahead = buffer->extra_char;
 782       buffer->extra_char = EOF;
 783     }
 784
 785   if (c == '=')
 786     ACCEPT_CHAR (CPP_MOD_EQ);
 787   else if (CPP_OPTION (buffer->pfile, digraphs))
 788     {
 789       if (c == ':')
 790         {
 791           result->flags |= DIGRAPH;
 792           ACCEPT_CHAR (CPP_HASH);
 793           if (get_effective_char (buffer) == '%')
 794             {
 795               buffer->extra_char = get_effective_char (buffer);
 796               if (buffer->extra_char == ':')
 797                 {
 798                   buffer->extra_char = EOF;
 799                   ACCEPT_CHAR (CPP_PASTE);
 800                 }
 801               else
 802                 /* We'll catch the extra_char when we're called back.  */
 803                 buffer->read_ahead = '%';
 804             }
 805         }
 806       else if (c == '>')
 807         {
 808           result->flags |= DIGRAPH;
 809           ACCEPT_CHAR (CPP_CLOSE_BRACE);
 810         }
 811     }
 812 }
 813
 814 /* Subroutine of lex_token to handle '.'.  This is tricky, since we
 815    want to avoid stepping back when lexing '...' or '.123'.  In the
 816    latter case we should also set a flag for parse_number.  */
 817 static void
 818 lex_dot (pfile, result)
 819      cpp_reader *pfile;
 820      cpp_token *result;
 821 {
 822   cpp_buffer *buffer = pfile->buffer;
 823   cppchar_t c;
 824
 825   /* Parsing ..X could leave an extra character.  */
 826   if (buffer->extra_char == EOF)
 827     c = get_effective_char (buffer);
 828   else
 829     {
 830       c = buffer->read_ahead = buffer->extra_char;
 831       buffer->extra_char = EOF;
 832     }
 833
 834   /* All known character sets have 0...9 contiguous.  */
 835   if (c >= '0' && c <= '9')
 836     {
 837       result->type = CPP_NUMBER;
 838       parse_number (pfile, &result->val.str, c, 1);
 839     }
 840   else
 841     {
 842       result->type = CPP_DOT;
 843       if (c == '.')
 844         {
 845           buffer->extra_char = get_effective_char (buffer);
 846           if (buffer->extra_char == '.')
 847             {
 848               buffer->extra_char = EOF;
 849               ACCEPT_CHAR (CPP_ELLIPSIS);
 850             }
 851           else
 852             /* We'll catch the extra_char when we're called back.  */
 853             buffer->read_ahead = '.';
 854         }
 855       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
 856         ACCEPT_CHAR (CPP_DOT_STAR);
 857     }
 858 }
 859
 860 void
 861 _cpp_lex_token (pfile, result)
 862      cpp_reader *pfile;
 863      cpp_token *result;
 864 {
 865   cppchar_t c;
 866   cpp_buffer *buffer;
 867   const unsigned char *comment_start;
 868   unsigned char bol;
 869
 870  skip:
 871   bol = pfile->state.next_bol;
 872  done_directive:
 873   buffer = pfile->buffer;
 874   pfile->state.next_bol = 0;
 875   result->flags = buffer->saved_flags;
 876   buffer->saved_flags = 0;
 877  next_char:
 878   pfile->lexer_pos.line = buffer->lineno;
 879  next_char2:
 880   pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
 881
 882   c = buffer->read_ahead;
 883   if (c == EOF && buffer->cur < buffer->rlimit)
 884     {
 885       c = *buffer->cur++;
 886       pfile->lexer_pos.col++;
 887     }
 888
 889  do_switch:
 890   buffer->read_ahead = EOF;
 891   switch (c)
 892     {
 893     case EOF:
 894       /* Non-empty files should end in a newline.  Checking "bol" too
 895           prevents multiple warnings when hitting the EOF more than
 896           once, like in a directive.  Don't warn for command line and
 897           _Pragma buffers.  */
 898       if (pfile->lexer_pos.col != 0 && !bol && !buffer->from_stage3)
 899         cpp_pedwarn (pfile, "no newline at end of file");
 900       pfile->state.next_bol = 1;
 901       pfile->skipping = 0;      /* In case missing #endif.  */
 902       result->type = CPP_EOF;
 903       /* Don't do MI optimisation.  */
 904       return;
 905
 906     case ' ': case '\t': case '\f': case '\v': case '\0':
 907       skip_whitespace (pfile, c);
 908       result->flags |= PREV_WHITE;
 909       goto next_char2;
 910
 911     case '\n': case '\r':
 912       if (!pfile->state.in_directive)
 913         {
 914           handle_newline (buffer, c);
 915           bol = 1;
 916           pfile->lexer_pos.output_line = buffer->lineno;
 917           /* This is a new line, so clear any white space flag.
 918              Newlines in arguments are white space (6.10.3.10);
 919              parse_arg takes care of that.  */
 920           result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
 921           goto next_char;
 922         }
 923
 924       /* Don't let directives spill over to the next line.  */
 925       buffer->read_ahead = c;
 926       pfile->state.next_bol = 1;
 927       result->type = CPP_EOF;
 928       /* Don't break; pfile->skipping might be true.  */
 929       return;
 930
 931     case '?':
 932     case '\\':
 933       /* These could start an escaped newline, or '?' a trigraph.  Let
 934          skip_escaped_newlines do all the work.  */
 935       {
 936         unsigned int lineno = buffer->lineno;
 937
 938         c = skip_escaped_newlines (buffer, c);
 939         if (lineno != buffer->lineno)
 940           /* We had at least one escaped newline of some sort, and the
 941              next character is in buffer->read_ahead.  Update the
 942              token's line and column.  */
 943             goto next_char;
 944
 945         /* We are either the original '?' or '\\', or a trigraph.  */
 946         result->type = CPP_QUERY;
 947         buffer->read_ahead = EOF;
 948         if (c == '\\')
 949           goto random_char;
 950         else if (c != '?')
 951           goto do_switch;
 952       }
 953       break;
 954
 955     case '0': case '1': case '2': case '3': case '4':
 956     case '5': case '6': case '7': case '8': case '9':
 957       result->type = CPP_NUMBER;
 958       parse_number (pfile, &result->val.str, c, 0);
 959       break;
 960
 961     case '$':
 962       if (!CPP_OPTION (pfile, dollars_in_ident))
 963         goto random_char;
 964       /* Fall through... */
 965
 966     case '_':
 967     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 968     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 969     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 970     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 971     case 'y': case 'z':
 972     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 973     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 974     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 975     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 976     case 'Y': case 'Z':
 977       result->type = CPP_NAME;
 978       result->val.node = parse_identifier (pfile, c);
 979
 980       /* 'L' may introduce wide characters or strings.  */
 981       if (result->val.node == pfile->spec_nodes.n_L)
 982         {
 983           c = buffer->read_ahead; /* For make_string.  */
 984           if (c == '\'' || c == '"')
 985             {
 986               ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
 987               goto make_string;
 988             }
 989         }
 990       /* Convert named operators to their proper types.  */
 991       else if (result->val.node->flags & NODE_OPERATOR)
 992         {
 993           result->flags |= NAMED_OP;
 994           result->type = result->val.node->value.operator;
 995         }
 996       break;
 997
 998     case '\'':
 999     case '"':
1000       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1001     make_string:
1002       parse_string (pfile, result, c);
1003       break;
1004
1005     case '/':
1006       /* A potential block or line comment.  */
1007       comment_start = buffer->cur;
1008       result->type = CPP_DIV;
1009       c = get_effective_char (buffer);
1010       if (c == '=')
1011         ACCEPT_CHAR (CPP_DIV_EQ);
1012       if (c != '/' && c != '*')
1013         break;
1014       if (buffer->from_stage3)
1015         break;
1016
1017       if (c == '*')
1018         {
1019           if (skip_block_comment (pfile))
1020             cpp_error_with_line (pfile, pfile->lexer_pos.line,
1021                                  pfile->lexer_pos.col,
1022                                  "unterminated comment");
1023         }
1024       else
1025         {
1026           if (!CPP_OPTION (pfile, cplusplus_comments)
1027               && !CPP_IN_SYSTEM_HEADER (pfile))
1028             break;
1029
1030           /* Warn about comments only if pedantically GNUC89, and not
1031              in system headers.  */
1032           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1033               && ! buffer->warned_cplusplus_comments)
1034             {
1035               cpp_pedwarn (pfile,
1036                            "C++ style comments are not allowed in ISO C89");
1037               cpp_pedwarn (pfile,
1038                            "(this will be reported only once per input file)");
1039               buffer->warned_cplusplus_comments = 1;
1040             }
1041
1042           /* Skip_line_comment updates buffer->read_ahead.  */
1043           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1044             cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1045                                    pfile->lexer_pos.col,
1046                                    "multi-line comment");
1047         }
1048
1049       /* Skipping the comment has updated buffer->read_ahead.  */
1050       if (!pfile->state.save_comments)
1051         {
1052           result->flags |= PREV_WHITE;
1053           goto next_char;
1054         }
1055
1056       /* Save the comment as a token in its own right.  */
1057       save_comment (pfile, result, comment_start);
1058       /* Don't do MI optimisation.  */
1059       return;
1060
1061     case '<':
1062       if (pfile->state.angled_headers)
1063         {
1064           result->type = CPP_HEADER_NAME;
1065           c = '>';              /* terminator.  */
1066           goto make_string;
1067         }
1068
1069       result->type = CPP_LESS;
1070       c = get_effective_char (buffer);
1071       if (c == '=')
1072         ACCEPT_CHAR (CPP_LESS_EQ);
1073       else if (c == '<')
1074         {
1075           ACCEPT_CHAR (CPP_LSHIFT);
1076           if (get_effective_char (buffer) == '=')
1077             ACCEPT_CHAR (CPP_LSHIFT_EQ);
1078         }
1079       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1080         {
1081           ACCEPT_CHAR (CPP_MIN);
1082           if (get_effective_char (buffer) == '=')
1083             ACCEPT_CHAR (CPP_MIN_EQ);
1084         }
1085       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1086         {
1087           ACCEPT_CHAR (CPP_OPEN_SQUARE);
1088           result->flags |= DIGRAPH;
1089         }
1090       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1091         {
1092           ACCEPT_CHAR (CPP_OPEN_BRACE);
1093           result->flags |= DIGRAPH;
1094         }
1095       break;
1096
1097     case '>':
1098       result->type = CPP_GREATER;
1099       c = get_effective_char (buffer);
1100       if (c == '=')
1101         ACCEPT_CHAR (CPP_GREATER_EQ);
1102       else if (c == '>')
1103         {
1104           ACCEPT_CHAR (CPP_RSHIFT);
1105           if (get_effective_char (buffer) == '=')
1106             ACCEPT_CHAR (CPP_RSHIFT_EQ);
1107         }
1108       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1109         {
1110           ACCEPT_CHAR (CPP_MAX);
1111           if (get_effective_char (buffer) == '=')
1112             ACCEPT_CHAR (CPP_MAX_EQ);
1113         }
1114       break;
1115
1116     case '%':
1117       lex_percent (buffer, result);
1118       if (result->type == CPP_HASH)
1119         goto do_hash;
1120       break;
1121
1122     case '.':
1123       lex_dot (pfile, result);
1124       break;
1125
1126     case '+':
1127       result->type = CPP_PLUS;
1128       c = get_effective_char (buffer);
1129       if (c == '=')
1130         ACCEPT_CHAR (CPP_PLUS_EQ);
1131       else if (c == '+')
1132         ACCEPT_CHAR (CPP_PLUS_PLUS);
1133       break;
1134
1135     case '-':
1136       result->type = CPP_MINUS;
1137       c = get_effective_char (buffer);
1138       if (c == '>')
1139         {
1140           ACCEPT_CHAR (CPP_DEREF);
1141           if (CPP_OPTION (pfile, cplusplus)
1142               && get_effective_char (buffer) == '*')
1143             ACCEPT_CHAR (CPP_DEREF_STAR);
1144         }
1145       else if (c == '=')
1146         ACCEPT_CHAR (CPP_MINUS_EQ);
1147       else if (c == '-')
1148         ACCEPT_CHAR (CPP_MINUS_MINUS);
1149       break;
1150
1151     case '*':
1152       result->type = CPP_MULT;
1153       if (get_effective_char (buffer) == '=')
1154         ACCEPT_CHAR (CPP_MULT_EQ);
1155       break;
1156
1157     case '=':
1158       result->type = CPP_EQ;
1159       if (get_effective_char (buffer) == '=')
1160         ACCEPT_CHAR (CPP_EQ_EQ);
1161       break;
1162
1163     case '!':
1164       result->type = CPP_NOT;
1165       if (get_effective_char (buffer) == '=')
1166         ACCEPT_CHAR (CPP_NOT_EQ);
1167       break;
1168
1169     case '&':
1170       result->type = CPP_AND;
1171       c = get_effective_char (buffer);
1172       if (c == '=')
1173         ACCEPT_CHAR (CPP_AND_EQ);
1174       else if (c == '&')
1175         ACCEPT_CHAR (CPP_AND_AND);
1176       break;
1177
1178     case '#':
1179       c = buffer->extra_char;   /* Can be set by error condition below.  */
1180       if (c != EOF)
1181         {
1182           buffer->read_ahead = c;
1183           buffer->extra_char = EOF;
1184         }
1185       else
1186         c = get_effective_char (buffer);
1187
1188       if (c == '#')
1189         {
1190           ACCEPT_CHAR (CPP_PASTE);
1191           break;
1192         }
1193
1194       result->type = CPP_HASH;
1195     do_hash:
1196       if (!bol)
1197         break;
1198       /* 6.10.3 paragraph 11: If there are sequences of preprocessing
1199          tokens within the list of arguments that would otherwise act
1200          as preprocessing directives, the behavior is undefined.
1201
1202          This implementation will report a hard error, terminate the
1203          macro invocation, and proceed to process the directive.  */
1204       if (pfile->state.parsing_args)
1205         {
1206           if (pfile->state.parsing_args == 2)
1207             cpp_error (pfile,
1208                        "directives may not be used inside a macro argument");
1209
1210           /* Put a '#' in lookahead, return CPP_EOF for parse_arg.  */
1211           buffer->extra_char = buffer->read_ahead;
1212           buffer->read_ahead = '#';
1213           pfile->state.next_bol = 1;
1214           result->type = CPP_EOF;
1215
1216           /* Get whitespace right - newline_in_args sets it.  */
1217           if (pfile->lexer_pos.col == 1)
1218             result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
1219         }
1220       else
1221         {
1222           /* This is the hash introducing a directive.  */
1223           if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1224             goto done_directive; /* bol still 1.  */
1225           /* This is in fact an assembler #.  */
1226         }
1227       break;
1228
1229     case '|':
1230       result->type = CPP_OR;
1231       c = get_effective_char (buffer);
1232       if (c == '=')
1233         ACCEPT_CHAR (CPP_OR_EQ);
1234       else if (c == '|')
1235         ACCEPT_CHAR (CPP_OR_OR);
1236       break;
1237
1238     case '^':
1239       result->type = CPP_XOR;
1240       if (get_effective_char (buffer) == '=')
1241         ACCEPT_CHAR (CPP_XOR_EQ);
1242       break;
1243
1244     case ':':
1245       result->type = CPP_COLON;
1246       c = get_effective_char (buffer);
1247       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1248         ACCEPT_CHAR (CPP_SCOPE);
1249       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1250         {
1251           result->flags |= DIGRAPH;
1252           ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1253         }
1254       break;
1255
1256     case '~': result->type = CPP_COMPL; break;
1257     case ',': result->type = CPP_COMMA; break;
1258     case '(': result->type = CPP_OPEN_PAREN; break;
1259     case ')': result->type = CPP_CLOSE_PAREN; break;
1260     case '[': result->type = CPP_OPEN_SQUARE; break;
1261     case ']': result->type = CPP_CLOSE_SQUARE; break;
1262     case '{': result->type = CPP_OPEN_BRACE; break;
1263     case '}': result->type = CPP_CLOSE_BRACE; break;
1264     case ';': result->type = CPP_SEMICOLON; break;
1265
1266       /* @ is a punctuator in Objective C.  */
1267     case '@': result->type = CPP_ATSIGN; break;
1268
1269     random_char:
1270     default:
1271       result->type = CPP_OTHER;
1272       result->val.c = c;
1273       break;
1274     }
1275
1276   if (pfile->skipping)
1277     goto skip;
1278
1279   /* If not in a directive, this token invalidates controlling macros.  */
1280   if (!pfile->state.in_directive)
1281     pfile->mi_state = MI_FAILED;
1282 }
1283
1284 /* An upper bound on the number of bytes needed to spell a token,
1285    including preceding whitespace.  */
1286 unsigned int
1287 cpp_token_len (token)
1288      const cpp_token *token;
1289 {
1290   unsigned int len;
1291
1292   switch (TOKEN_SPELL (token))
1293     {
1294     default:            len = 0;                                break;
1295     case SPELL_STRING:  len = token->val.str.len;               break;
1296     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1297     }
1298   /* 1 for whitespace, 4 for comment delimeters.  */
1299   return len + 5;
1300 }
1301
1302 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1303    already contain the enough space to hold the token's spelling.
1304    Returns a pointer to the character after the last character
1305    written.  */
1306 unsigned char *
1307 cpp_spell_token (pfile, token, buffer)
1308      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1309      const cpp_token *token;
1310      unsigned char *buffer;
1311 {
1312   switch (TOKEN_SPELL (token))
1313     {
1314     case SPELL_OPERATOR:
1315       {
1316         const unsigned char *spelling;
1317         unsigned char c;
1318
1319         if (token->flags & DIGRAPH)
1320           spelling
1321             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1322         else if (token->flags & NAMED_OP)
1323           goto spell_ident;
1324         else
1325           spelling = TOKEN_NAME (token);
1326
1327         while ((c = *spelling++) != '\0')
1328           *buffer++ = c;
1329       }
1330       break;
1331
1332     case SPELL_IDENT:
1333       spell_ident:
1334       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1335       buffer += NODE_LEN (token->val.node);
1336       break;
1337
1338     case SPELL_STRING:
1339       {
1340         int left, right, tag;
1341         switch (token->type)
1342           {
1343           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1344           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1345           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1346           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1347           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1348           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1349           }
1350         if (tag) *buffer++ = tag;
1351         if (left) *buffer++ = left;
1352         memcpy (buffer, token->val.str.text, token->val.str.len);
1353         buffer += token->val.str.len;
1354         if (right) *buffer++ = right;
1355       }
1356       break;
1357
1358     case SPELL_CHAR:
1359       *buffer++ = token->val.c;
1360       break;
1361
1362     case SPELL_NONE:
1363       cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1364       break;
1365     }
1366
1367   return buffer;
1368 }
1369
1370 /* Returns a token as a null-terminated string.  The string is
1371    temporary, and automatically freed later.  Useful for diagnostics.  */
1372 unsigned char *
1373 cpp_token_as_text (pfile, token)
1374      cpp_reader *pfile;
1375      const cpp_token *token;
1376 {
1377   unsigned int len = cpp_token_len (token);
1378   unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
1379
1380   end = cpp_spell_token (pfile, token, start);
1381   end[0] = '\0';
1382
1383   return start;
1384 }
1385
1386 /* Used by C front ends.  Should really move to using cpp_token_as_text.  */
1387 const char *
1388 cpp_type2name (type)
1389      enum cpp_ttype type;
1390 {
1391   return (const char *) token_spellings[type].name;
1392 }
1393
1394 /* Writes the spelling of token to FP.  Separate from cpp_spell_token
1395    for efficiency - to avoid double-buffering.  Also, outputs a space
1396    if PREV_WHITE is flagged.  */
1397 void
1398 cpp_output_token (token, fp)
1399      const cpp_token *token;
1400      FILE *fp;
1401 {
1402   if (token->flags & PREV_WHITE)
1403     putc (' ', fp);
1404
1405   switch (TOKEN_SPELL (token))
1406     {
1407     case SPELL_OPERATOR:
1408       {
1409         const unsigned char *spelling;
1410
1411         if (token->flags & DIGRAPH)
1412           spelling
1413             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1414         else if (token->flags & NAMED_OP)
1415           goto spell_ident;
1416         else
1417           spelling = TOKEN_NAME (token);
1418
1419         ufputs (spelling, fp);
1420       }
1421       break;
1422
1423     spell_ident:
1424     case SPELL_IDENT:
1425       ufputs (NODE_NAME (token->val.node), fp);
1426     break;
1427
1428     case SPELL_STRING:
1429       {
1430         int left, right, tag;
1431         switch (token->type)
1432           {
1433           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1434           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1435           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1436           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1437           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1438           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1439           }
1440         if (tag) putc (tag, fp);
1441         if (left) putc (left, fp);
1442         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1443         if (right) putc (right, fp);
1444       }
1445       break;
1446
1447     case SPELL_CHAR:
1448       putc (token->val.c, fp);
1449       break;
1450
1451     case SPELL_NONE:
1452       /* An error, most probably.  */
1453       break;
1454     }
1455 }
1456
1457 /* Compare two tokens.  */
1458 int
1459 _cpp_equiv_tokens (a, b)
1460      const cpp_token *a, *b;
1461 {
1462   if (a->type == b->type && a->flags == b->flags)
1463     switch (TOKEN_SPELL (a))
1464       {
1465       default:                  /* Keep compiler happy.  */
1466       case SPELL_OPERATOR:
1467         return 1;
1468       case SPELL_CHAR:
1469         return a->val.c == b->val.c; /* Character.  */
1470       case SPELL_NONE:
1471         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1472       case SPELL_IDENT:
1473         return a->val.node == b->val.node;
1474       case SPELL_STRING:
1475         return (a->val.str.len == b->val.str.len
1476                 && !memcmp (a->val.str.text, b->val.str.text,
1477                             a->val.str.len));
1478       }
1479
1480   return 0;
1481 }
1482
1483 /* Determine whether two tokens can be pasted together, and if so,
1484    what the resulting token is.  Returns CPP_EOF if the tokens cannot
1485    be pasted, or the appropriate type for the merged token if they
1486    can.  */
1487 enum cpp_ttype
1488 cpp_can_paste (pfile, token1, token2, digraph)
1489      cpp_reader * pfile;
1490      const cpp_token *token1, *token2;
1491      int* digraph;
1492 {
1493   enum cpp_ttype a = token1->type, b = token2->type;
1494   int cxx = CPP_OPTION (pfile, cplusplus);
1495
1496   /* Treat named operators as if they were ordinary NAMEs.  */
1497   if (token1->flags & NAMED_OP)
1498     a = CPP_NAME;
1499   if (token2->flags & NAMED_OP)
1500     b = CPP_NAME;
1501
1502   if ((int) a <= (int) CPP_LAST_EQ && b == CPP_EQ)
1503     return (enum cpp_ttype) ((int) a + ((int) CPP_EQ_EQ - (int) CPP_EQ));
1504
1505   switch (a)
1506     {
1507     case CPP_GREATER:
1508       if (b == a) return CPP_RSHIFT;
1509       if (b == CPP_QUERY && cxx)        return CPP_MAX;
1510       if (b == CPP_GREATER_EQ)  return CPP_RSHIFT_EQ;
1511       break;
1512     case CPP_LESS:
1513       if (b == a) return CPP_LSHIFT;
1514       if (b == CPP_QUERY && cxx)        return CPP_MIN;
1515       if (b == CPP_LESS_EQ)     return CPP_LSHIFT_EQ;
1516       if (CPP_OPTION (pfile, digraphs))
1517         {
1518           if (b == CPP_COLON)
1519             {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1520           if (b == CPP_MOD)
1521             {*digraph = 1; return CPP_OPEN_BRACE;}      /* <% digraph */
1522         }
1523       break;
1524
1525     case CPP_PLUS: if (b == a)  return CPP_PLUS_PLUS; break;
1526     case CPP_AND:  if (b == a)  return CPP_AND_AND; break;
1527     case CPP_OR:   if (b == a)  return CPP_OR_OR;   break;
1528
1529     case CPP_MINUS:
1530       if (b == a)               return CPP_MINUS_MINUS;
1531       if (b == CPP_GREATER)     return CPP_DEREF;
1532       break;
1533     case CPP_COLON:
1534       if (b == a && cxx)        return CPP_SCOPE;
1535       if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1536         {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1537       break;
1538
1539     case CPP_MOD:
1540       if (CPP_OPTION (pfile, digraphs))
1541         {
1542           if (b == CPP_GREATER)
1543             {*digraph = 1; return CPP_CLOSE_BRACE;}  /* %> digraph */
1544           if (b == CPP_COLON)
1545             {*digraph = 1; return CPP_HASH;}         /* %: digraph */
1546         }
1547       break;
1548     case CPP_DEREF:
1549       if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1550       break;
1551     case CPP_DOT:
1552       if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1553       if (b == CPP_NUMBER)      return CPP_NUMBER;
1554       break;
1555
1556     case CPP_HASH:
1557       if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1558         /* %:%: digraph */
1559         {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1560       break;
1561
1562     case CPP_NAME:
1563       if (b == CPP_NAME)        return CPP_NAME;
1564       if (b == CPP_NUMBER
1565           && name_p (pfile, &token2->val.str)) return CPP_NAME;
1566       if (b == CPP_CHAR
1567           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1568       if (b == CPP_STRING
1569           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1570       break;
1571
1572     case CPP_NUMBER:
1573       if (b == CPP_NUMBER)      return CPP_NUMBER;
1574       if (b == CPP_NAME)        return CPP_NUMBER;
1575       if (b == CPP_DOT)         return CPP_NUMBER;
1576       /* Numbers cannot have length zero, so this is safe.  */
1577       if ((b == CPP_PLUS || b == CPP_MINUS)
1578           && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1579         return CPP_NUMBER;
1580       break;
1581
1582     default:
1583       break;
1584     }
1585
1586   return CPP_EOF;
1587 }
1588
1589 /* Returns nonzero if a space should be inserted to avoid an
1590    accidental token paste for output.  For simplicity, it is
1591    conservative, and occasionally advises a space where one is not
1592    needed, e.g. "." and ".2".  */
1593
1594 int
1595 cpp_avoid_paste (pfile, token1, token2)
1596      cpp_reader *pfile;
1597      const cpp_token *token1, *token2;
1598 {
1599   enum cpp_ttype a = token1->type, b = token2->type;
1600   cppchar_t c;
1601
1602   if (token1->flags & NAMED_OP)
1603     a = CPP_NAME;
1604   if (token2->flags & NAMED_OP)
1605     b = CPP_NAME;
1606
1607   c = EOF;
1608   if (token2->flags & DIGRAPH)
1609     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1610   else if (token_spellings[b].category == SPELL_OPERATOR)
1611     c = token_spellings[b].name[0];
1612
1613   /* Quickly get everything that can paste with an '='.  */
1614   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1615     return 1;
1616
1617   switch (a)
1618     {
1619     case CPP_GREATER:   return c == '>' || c == '?';
1620     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1621     case CPP_PLUS:      return c == '+';
1622     case CPP_MINUS:     return c == '-' || c == '>';
1623     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1624     case CPP_MOD:       return c == ':' || c == '>';
1625     case CPP_AND:       return c == '&';
1626     case CPP_OR:        return c == '|';
1627     case CPP_COLON:     return c == ':' || c == '>';
1628     case CPP_DEREF:     return c == '*';
1629     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1630     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1631     case CPP_NAME:      return ((b == CPP_NUMBER
1632                                  && name_p (pfile, &token2->val.str))
1633                                 || b == CPP_NAME
1634                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1635     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1636                                 || c == '.' || c == '+' || c == '-');
1637     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1638                                 && token1->val.c == '@'
1639                                 && (b == CPP_NAME || b == CPP_STRING));
1640     default:            break;
1641     }
1642
1643   return 0;
1644 }
1645
1646 /* Output all the remaining tokens on the current line, and a newline
1647    character, to FP.  Leading whitespace is removed.  */
1648 void
1649 cpp_output_line (pfile, fp)
1650      cpp_reader *pfile;
1651      FILE *fp;
1652 {
1653   cpp_token token;
1654
1655   cpp_get_token (pfile, &token);
1656   token.flags &= ~PREV_WHITE;
1657   while (token.type != CPP_EOF)
1658     {
1659       cpp_output_token (&token, fp);
1660       cpp_get_token (pfile, &token);
1661     }
1662
1663   putc ('\n', fp);
1664 }
1665
1666 /* Returns the value of a hexadecimal digit.  */
1667 static unsigned int
1668 hex_digit_value (c)
1669      unsigned int c;
1670 {
1671   if (c >= 'a' && c <= 'f')
1672     return c - 'a' + 10;
1673   if (c >= 'A' && c <= 'F')
1674     return c - 'A' + 10;
1675   if (c >= '0' && c <= '9')
1676     return c - '0';
1677   abort ();
1678 }
1679
1680 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence (C++ and C99).
1681
1682    [lex.charset]: The character designated by the universal character
1683    name \UNNNNNNNN is that character whose character short name in
1684    ISO/IEC 10646 is NNNNNNNN; the character designated by the
1685    universal character name \uNNNN is that character whose character
1686    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
1687    for a universal character name is less than 0x20 or in the range
1688    0x7F-0x9F (inclusive), or if the universal character name
1689    designates a character in the basic source character set, then the
1690    program is ill-formed.
1691
1692    We assume that wchar_t is Unicode, so we don't need to do any
1693    mapping.  Is this ever wrong?  */
1694
1695 static unsigned int
1696 read_ucs (pfile, pstr, limit, length)
1697      cpp_reader *pfile;
1698      const unsigned char **pstr;
1699      const unsigned char *limit;
1700      unsigned int length;
1701 {
1702   const unsigned char *p = *pstr;
1703   unsigned int c, code = 0;
1704
1705   for (; length; --length)
1706     {
1707       if (p >= limit)
1708         {
1709           cpp_error (pfile, "incomplete universal-character-name");
1710           break;
1711         }
1712
1713       c = *p;
1714       if (ISXDIGIT (c))
1715         {
1716           code = (code << 4) + hex_digit_value (c);
1717           p++;
1718         }
1719       else
1720         {
1721           cpp_error (pfile,
1722                      "non-hex digit '%c' in universal-character-name", c);
1723           break;
1724         }
1725
1726     }
1727
1728 #ifdef TARGET_EBCDIC
1729   cpp_error (pfile, "universal-character-name on EBCDIC target");
1730   code = 0x3f;  /* EBCDIC invalid character */
1731 #else
1732   if (code > 0x9f && !(code & 0x80000000))
1733     ; /* True extended character, OK.  */
1734   else if (code >= 0x20 && code < 0x7f)
1735     {
1736       /* ASCII printable character.  The C character set consists of all of
1737          these except $, @ and `.  We use hex escapes so that this also
1738          works with EBCDIC hosts.  */
1739       if (code != 0x24 && code != 0x40 && code != 0x60)
1740         cpp_error (pfile, "universal-character-name used for '%c'", code);
1741     }
1742   else
1743     cpp_error (pfile, "invalid universal-character-name");
1744 #endif
1745
1746   *pstr = p;
1747   return code;
1748 }
1749
1750 /* Interpret an escape sequence, and return its value.  PSTR points to
1751    the input pointer, which is just after the backslash.  LIMIT is how
1752    much text we have.  MASK is the precision for the target type (char
1753    or wchar_t).  TRADITIONAL, if true, does not interpret escapes that
1754    did not exist in traditional C.  */
1755
1756 static unsigned int
1757 parse_escape (pfile, pstr, limit, mask, traditional)
1758      cpp_reader *pfile;
1759      const unsigned char **pstr;
1760      const unsigned char *limit;
1761      HOST_WIDE_INT mask;
1762      int traditional;
1763 {
1764   int unknown = 0;
1765   const unsigned char *str = *pstr;
1766   unsigned int c = *str++;
1767
1768   switch (c)
1769     {
1770     case '\\': case '\'': case '"': case '?': break;
1771     case 'b': c = TARGET_BS;      break;
1772     case 'f': c = TARGET_FF;      break;
1773     case 'n': c = TARGET_NEWLINE; break;
1774     case 'r': c = TARGET_CR;      break;
1775     case 't': c = TARGET_TAB;     break;
1776     case 'v': c = TARGET_VT;      break;
1777
1778     case '(': case '{': case '[': case '%':
1779       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1780          '\%' is used to prevent SCCS from getting confused.  */
1781       unknown = CPP_PEDANTIC (pfile);
1782       break;
1783
1784     case 'a':
1785       if (CPP_WTRADITIONAL (pfile))
1786         cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1787       if (!traditional)
1788         c = TARGET_BELL;
1789       break;
1790
1791     case 'e': case 'E':
1792       if (CPP_PEDANTIC (pfile))
1793         cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1794       c = TARGET_ESC;
1795       break;
1796
1797       /* Warnings and support checks handled by read_ucs().  */
1798     case 'u': case 'U':
1799       if (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99))
1800         {
1801           if (CPP_WTRADITIONAL (pfile))
1802             cpp_warning (pfile,
1803                          "the meaning of '\\%c' varies with -traditional", c);
1804           c = read_ucs (pfile, &str, limit, c == 'u' ? 4 : 8);
1805         }
1806       else
1807         unknown = 1;
1808       break;
1809
1810     case 'x':
1811       if (CPP_WTRADITIONAL (pfile))
1812         cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1813
1814       if (!traditional)
1815         {
1816           unsigned int i = 0, overflow = 0;
1817           int digits_found = 0;
1818
1819           while (str < limit)
1820             {
1821               c = *str;
1822               if (! ISXDIGIT (c))
1823                 break;
1824               str++;
1825               overflow |= i ^ (i << 4 >> 4);
1826               i = (i << 4) + hex_digit_value (c);
1827               digits_found = 1;
1828             }
1829
1830           if (!digits_found)
1831             cpp_error (pfile, "\\x used with no following hex digits");
1832
1833           if (overflow | (i != (i & mask)))
1834             {
1835               cpp_pedwarn (pfile, "hex escape sequence out of range");
1836               i &= mask;
1837             }
1838           c = i;
1839         }
1840       break;
1841
1842     case '0':  case '1':  case '2':  case '3':
1843     case '4':  case '5':  case '6':  case '7':
1844       {
1845         unsigned int i = c - '0';
1846         int count = 0;
1847
1848         while (str < limit && ++count < 3)
1849           {
1850             c = *str;
1851             if (c < '0' || c > '7')
1852               break;
1853             str++;
1854             i = (i << 3) + c - '0';
1855           }
1856
1857         if (i != (i & mask))
1858           {
1859             cpp_pedwarn (pfile, "octal escape sequence out of range");
1860             i &= mask;
1861           }
1862         c = i;
1863       }
1864       break;
1865
1866     default:
1867       unknown = 1;
1868       break;
1869     }
1870
1871   if (unknown)
1872     {
1873       if (ISGRAPH (c))
1874         cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1875       else
1876         cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1877     }
1878
1879   *pstr = str;
1880   return c;
1881 }
1882
1883 #ifndef MAX_CHAR_TYPE_SIZE
1884 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1885 #endif
1886
1887 #ifndef MAX_WCHAR_TYPE_SIZE
1888 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1889 #endif
1890
1891 /* Interpret a (possibly wide) character constant in TOKEN.
1892    WARN_MULTI warns about multi-character charconsts, if not
1893    TRADITIONAL.  TRADITIONAL also indicates not to interpret escapes
1894    that did not exist in traditional C.  PCHARS_SEEN points to a
1895    variable that is filled in with the number of characters seen.  */
1896 HOST_WIDE_INT
1897 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1898      cpp_reader *pfile;
1899      const cpp_token *token;
1900      int warn_multi;
1901      int traditional;
1902      unsigned int *pchars_seen;
1903 {
1904   const unsigned char *str = token->val.str.text;
1905   const unsigned char *limit = str + token->val.str.len;
1906   unsigned int chars_seen = 0;
1907   unsigned int width, max_chars, c;
1908   HOST_WIDE_INT result = 0, mask;
1909
1910 #ifdef MULTIBYTE_CHARS
1911   (void) local_mbtowc (NULL, NULL, 0);
1912 #endif
1913
1914   /* Width in bits.  */
1915   if (token->type == CPP_CHAR)
1916     width = MAX_CHAR_TYPE_SIZE;
1917   else
1918     width = MAX_WCHAR_TYPE_SIZE;
1919
1920   if (width < HOST_BITS_PER_WIDE_INT)
1921     mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1922   else
1923     mask = ~0;
1924   max_chars = HOST_BITS_PER_WIDE_INT / width;
1925
1926   while (str < limit)
1927     {
1928 #ifdef MULTIBYTE_CHARS
1929       wchar_t wc;
1930       int char_len;
1931
1932       char_len = local_mbtowc (&wc, str, limit - str);
1933       if (char_len == -1)
1934         {
1935           cpp_warning (pfile, "ignoring invalid multibyte character");
1936           c = *str++;
1937         }
1938       else
1939         {
1940           str += char_len;
1941           c = wc;
1942         }
1943 #else
1944       c = *str++;
1945 #endif
1946
1947       if (c == '\\')
1948         {
1949           c = parse_escape (pfile, &str, limit, mask, traditional);
1950           if (width < HOST_BITS_PER_WIDE_INT && c > mask)
1951             cpp_pedwarn (pfile, "escape sequence out of range for character");
1952         }
1953
1954 #ifdef MAP_CHARACTER
1955       if (ISPRINT (c))
1956         c = MAP_CHARACTER (c);
1957 #endif
1958
1959       /* Merge character into result; ignore excess chars.  */
1960       if (++chars_seen <= max_chars)
1961         {
1962           if (width < HOST_BITS_PER_WIDE_INT)
1963             result = (result << width) | (c & mask);
1964           else
1965             result = c;
1966         }
1967     }
1968
1969   if (chars_seen == 0)
1970     cpp_error (pfile, "empty character constant");
1971   else if (chars_seen > max_chars)
1972     {
1973       chars_seen = max_chars;
1974       cpp_error (pfile, "character constant too long");
1975     }
1976   else if (chars_seen > 1 && !traditional && warn_multi)
1977     cpp_warning (pfile, "multi-character character constant");
1978
1979   /* If char type is signed, sign-extend the constant.  The
1980      __CHAR_UNSIGNED__ macro is set by the driver if appropriate.  */
1981   if (token->type == CPP_CHAR && chars_seen)
1982     {
1983       unsigned int nbits = chars_seen * width;
1984       unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
1985
1986       if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
1987           || ((result >> (nbits - 1)) & 1) == 0)
1988         result &= mask;
1989       else
1990         result |= ~mask;
1991     }
1992
1993   *pchars_seen = chars_seen;
1994   return result;
1995 }
1996
1997 /* Memory pools.  */
1998
1999 struct dummy
2000 {
2001   char c;
2002   union
2003   {
2004     double d;
2005     int *p;
2006   } u;
2007 };
2008
2009 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2010
2011 static int
2012 chunk_suitable (pool, chunk, size)
2013      cpp_pool *pool;
2014      cpp_chunk *chunk;
2015      unsigned int size;
2016 {
2017   /* Being at least twice SIZE means we can use memcpy in
2018      _cpp_next_chunk rather than memmove.  Besides, it's a good idea
2019      anyway.  */
2020   return (chunk && pool->locked != chunk
2021           && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
2022 }
2023
2024 /* Returns the end of the new pool.  PTR points to a char in the old
2025    pool, and is updated to point to the same char in the new pool.  */
2026 unsigned char *
2027 _cpp_next_chunk (pool, len, ptr)
2028      cpp_pool *pool;
2029      unsigned int len;
2030      unsigned char **ptr;
2031 {
2032   cpp_chunk *chunk = pool->cur->next;
2033
2034   /* LEN is the minimum size we want in the new pool.  */
2035   len += POOL_ROOM (pool);
2036   if (! chunk_suitable (pool, chunk, len))
2037     {
2038       chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
2039
2040       chunk->next = pool->cur->next;
2041       pool->cur->next = chunk;
2042     }
2043
2044   /* Update the pointer before changing chunk's front.  */
2045   if (ptr)
2046     *ptr += chunk->base - POOL_FRONT (pool);
2047
2048   memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
2049   chunk->front = chunk->base;
2050
2051   pool->cur = chunk;
2052   return POOL_LIMIT (pool);
2053 }
2054
2055 static cpp_chunk *
2056 new_chunk (size)
2057      unsigned int size;
2058 {
2059   unsigned char *base;
2060   cpp_chunk *result;
2061
2062   size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
2063   base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
2064   /* Put the chunk descriptor at the end.  Then chunk overruns will
2065      cause obvious chaos.  */
2066   result = (cpp_chunk *) (base + size);
2067   result->base = base;
2068   result->front = base;
2069   result->limit = base + size;
2070   result->next = 0;
2071
2072   return result;
2073 }
2074
2075 void
2076 _cpp_init_pool (pool, size, align, temp)
2077      cpp_pool *pool;
2078      unsigned int size, align, temp;
2079 {
2080   if (align == 0)
2081     align = DEFAULT_ALIGNMENT;
2082   if (align & (align - 1))
2083     abort ();
2084   pool->align = align;
2085   pool->cur = new_chunk (size);
2086   pool->locked = 0;
2087   pool->locks = 0;
2088   if (temp)
2089     pool->cur->next = pool->cur;
2090 }
2091
2092 void
2093 _cpp_lock_pool (pool)
2094      cpp_pool *pool;
2095 {
2096   if (pool->locks++ == 0)
2097     pool->locked = pool->cur;
2098 }
2099
2100 void
2101 _cpp_unlock_pool (pool)
2102      cpp_pool *pool;
2103 {
2104   if (--pool->locks == 0)
2105     pool->locked = 0;
2106 }
2107
2108 void
2109 _cpp_free_pool (pool)
2110      cpp_pool *pool;
2111 {
2112   cpp_chunk *chunk = pool->cur, *next;
2113
2114   do
2115     {
2116       next = chunk->next;
2117       free (chunk->base);
2118       chunk = next;
2119     }
2120   while (chunk && chunk != pool->cur);
2121 }
2122
2123 /* Reserve LEN bytes from a memory pool.  */
2124 unsigned char *
2125 _cpp_pool_reserve (pool, len)
2126      cpp_pool *pool;
2127      unsigned int len;
2128 {
2129   len = POOL_ALIGN (len, pool->align);
2130   if (len > (unsigned int) POOL_ROOM (pool))
2131     _cpp_next_chunk (pool, len, 0);
2132
2133   return POOL_FRONT (pool);
2134 }
2135
2136 /* Allocate LEN bytes from a memory pool.  */
2137 unsigned char *
2138 _cpp_pool_alloc (pool, len)
2139      cpp_pool *pool;
2140      unsigned int len;
2141 {
2142   unsigned char *result = _cpp_pool_reserve (pool, len);
2143
2144   POOL_COMMIT (pool, len);
2145   return result;
2146 }