gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 /* This lexer works with a single pass of the file.  Recently I
  24    re-wrote it to minimize the places where we step backwards in the
  25    input stream, to make future changes to support multi-byte
  26    character sets fairly straight-forward.
  27
  28    There is now only one routine where we do step backwards:
  29    skip_escaped_newlines.  This routine could probably also be changed
  30    so that it doesn't need to step back.  One possibility is to use a
  31    trick similar to that used in lex_period and lex_percent.  Two
  32    extra characters might be needed, but skip_escaped_newlines itself
  33    would probably be the only place that needs to be aware of that,
  34    and changes to the remaining routines would probably only be needed
  35    if they process a backslash.  */
  36
  37 #include "config.h"
  38 #include "system.h"
  39 #include "cpplib.h"
  40 #include "cpphash.h"
  41
  42 /* MULTIBYTE_CHARS support only works for native compilers.
  43    ??? Ideally what we want is to model widechar support after
  44    the current floating point support.  */
  45 #ifdef CROSS_COMPILE
  46 #undef MULTIBYTE_CHARS
  47 #endif
  48
  49 #ifdef MULTIBYTE_CHARS
  50 #include "mbchar.h"
  51 #include <locale.h>
  52 #endif
  53
  54 /* Tokens with SPELL_STRING store their spelling in the token list,
  55    and it's length in the token->val.name.len.  */
  56 enum spell_type
  57 {
  58   SPELL_OPERATOR = 0,
  59   SPELL_CHAR,
  60   SPELL_IDENT,
  61   SPELL_STRING,
  62   SPELL_NONE
  63 };
  64
  65 struct token_spelling
  66 {
  67   enum spell_type category;
  68   const unsigned char *name;
  69 };
  70
  71 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
  72                                              U":>", U"<%", U"%>"};
  73
  74 #define OP(e, s) { SPELL_OPERATOR, U s           },
  75 #define TK(e, s) { s,              U STRINGX (e) },
  76 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
  77 #undef OP
  78 #undef TK
  79
  80 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  81 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  82
  83 static cppchar_t handle_newline PARAMS ((cpp_buffer *, cppchar_t));
  84 static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
  85 static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
  86
  87 static int skip_block_comment PARAMS ((cpp_reader *));
  88 static int skip_line_comment PARAMS ((cpp_reader *));
  89 static void adjust_column PARAMS ((cpp_reader *));
  90 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  91 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
  92 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
  93 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
  94 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  95 static void unterminated PARAMS ((cpp_reader *, int));
  96 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
  97 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
  98 static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
  99 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
 100 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
 101 static unsigned int parse_escape PARAMS ((cpp_reader *, const unsigned char **,
 102                                           const unsigned char *, HOST_WIDE_INT,
 103                                           int));
 104 static unsigned int read_ucs PARAMS ((cpp_reader *, const unsigned char **,
 105                                       const unsigned char *, unsigned int));
 106
 107 static cpp_chunk *new_chunk PARAMS ((unsigned int));
 108 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
 109 static unsigned int hex_digit_value PARAMS ((unsigned int));
 110
 111 /* Utility routine:
 112
 113    Compares, the token TOKEN to the NUL-terminated string STRING.
 114    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
 115
 116 int
 117 cpp_ideq (token, string)
 118      const cpp_token *token;
 119      const char *string;
 120 {
 121   if (token->type != CPP_NAME)
 122     return 0;
 123
 124   return !ustrcmp (token->val.node->name, (const U_CHAR *) string);
 125 }
 126
 127 /* Call when meeting a newline.  Returns the character after the newline
 128    (or carriage-return newline combination), or EOF.  */
 129 static cppchar_t
 130 handle_newline (buffer, newline_char)
 131      cpp_buffer *buffer;
 132      cppchar_t newline_char;
 133 {
 134   cppchar_t next = EOF;
 135
 136   buffer->col_adjust = 0;
 137   buffer->lineno++;
 138   buffer->line_base = buffer->cur;
 139
 140   /* Handle CR-LF and LF-CR combinations, get the next character.  */
 141   if (buffer->cur < buffer->rlimit)
 142     {
 143       next = *buffer->cur++;
 144       if (next + newline_char == '\r' + '\n')
 145         {
 146           buffer->line_base = buffer->cur;
 147           if (buffer->cur < buffer->rlimit)
 148             next = *buffer->cur++;
 149           else
 150             next = EOF;
 151         }
 152     }
 153
 154   buffer->read_ahead = next;
 155   return next;
 156 }
 157
 158 /* Subroutine of skip_escaped_newlines; called when a trigraph is
 159    encountered.  It warns if necessary, and returns true if the
 160    trigraph should be honoured.  FROM_CHAR is the third character of a
 161    trigraph, and presumed to be the previous character for position
 162    reporting.  */
 163 static int
 164 trigraph_ok (pfile, from_char)
 165      cpp_reader *pfile;
 166      cppchar_t from_char;
 167 {
 168   int accept = CPP_OPTION (pfile, trigraphs);
 169
 170   /* Don't warn about trigraphs in comments.  */
 171   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 172     {
 173       cpp_buffer *buffer = pfile->buffer;
 174       if (accept)
 175         cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
 176                                "trigraph ??%c converted to %c",
 177                                (int) from_char,
 178                                (int) _cpp_trigraph_map[from_char]);
 179       else if (buffer->cur != buffer->last_Wtrigraphs)
 180         {
 181           buffer->last_Wtrigraphs = buffer->cur;
 182           cpp_warning_with_line (pfile, buffer->lineno,
 183                                  CPP_BUF_COL (buffer) - 2,
 184                                  "trigraph ??%c ignored", (int) from_char);
 185         }
 186     }
 187
 188   return accept;
 189 }
 190
 191 /* Assumes local variables buffer and result.  */
 192 #define ACCEPT_CHAR(t) \
 193   do { result->type = t; buffer->read_ahead = EOF; } while (0)
 194
 195 /* When we move to multibyte character sets, add to these something
 196    that saves and restores the state of the multibyte conversion
 197    library.  This probably involves saving and restoring a "cookie".
 198    In the case of glibc it is an 8-byte structure, so is not a high
 199    overhead operation.  In any case, it's out of the fast path.  */
 200 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
 201 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
 202
 203 /* Skips any escaped newlines introduced by NEXT, which is either a
 204    '?' or a '\\'.  Returns the next character, which will also have
 205    been placed in buffer->read_ahead.  This routine performs
 206    preprocessing stages 1 and 2 of the ISO C standard.  */
 207 static cppchar_t
 208 skip_escaped_newlines (buffer, next)
 209      cpp_buffer *buffer;
 210      cppchar_t next;
 211 {
 212   /* Only do this if we apply stages 1 and 2.  */
 213   if (!buffer->from_stage3)
 214     {
 215       cppchar_t next1;
 216       const unsigned char *saved_cur;
 217       int space;
 218
 219       do
 220         {
 221           if (buffer->cur == buffer->rlimit)
 222             break;
 223
 224           SAVE_STATE ();
 225           if (next == '?')
 226             {
 227               next1 = *buffer->cur++;
 228               if (next1 != '?' || buffer->cur == buffer->rlimit)
 229                 {
 230                   RESTORE_STATE ();
 231                   break;
 232                 }
 233
 234               next1 = *buffer->cur++;
 235               if (!_cpp_trigraph_map[next1]
 236                   || !trigraph_ok (buffer->pfile, next1))
 237                 {
 238                   RESTORE_STATE ();
 239                   break;
 240                 }
 241
 242               /* We have a full trigraph here.  */
 243               next = _cpp_trigraph_map[next1];
 244               if (next != '\\' || buffer->cur == buffer->rlimit)
 245                 break;
 246               SAVE_STATE ();
 247             }
 248
 249           /* We have a backslash, and room for at least one more character.  */
 250           space = 0;
 251           do
 252             {
 253               next1 = *buffer->cur++;
 254               if (!is_nvspace (next1))
 255                 break;
 256               space = 1;
 257             }
 258           while (buffer->cur < buffer->rlimit);
 259
 260           if (!is_vspace (next1))
 261             {
 262               RESTORE_STATE ();
 263               break;
 264             }
 265
 266           if (space && !buffer->pfile->state.lexing_comment)
 267             cpp_warning (buffer->pfile,
 268                          "backslash and newline separated by space");
 269
 270           next = handle_newline (buffer, next1);
 271           if (next == EOF)
 272             cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
 273         }
 274       while (next == '\\' || next == '?');
 275     }
 276
 277   buffer->read_ahead = next;
 278   return next;
 279 }
 280
 281 /* Obtain the next character, after trigraph conversion and skipping
 282    an arbitrary string of escaped newlines.  The common case of no
 283    trigraphs or escaped newlines falls through quickly.  */
 284 static cppchar_t
 285 get_effective_char (buffer)
 286      cpp_buffer *buffer;
 287 {
 288   cppchar_t next = EOF;
 289
 290   if (buffer->cur < buffer->rlimit)
 291     {
 292       next = *buffer->cur++;
 293
 294       /* '?' can introduce trigraphs (and therefore backslash); '\\'
 295          can introduce escaped newlines, which we want to skip, or
 296          UCNs, which, depending upon lexer state, we will handle in
 297          the future.  */
 298       if (next == '?' || next == '\\')
 299         next = skip_escaped_newlines (buffer, next);
 300     }
 301
 302   buffer->read_ahead = next;
 303   return next;
 304 }
 305
 306 /* Skip a C-style block comment.  We find the end of the comment by
 307    seeing if an asterisk is before every '/' we encounter.  Returns
 308    non-zero if comment terminated by EOF, zero otherwise.  */
 309 static int
 310 skip_block_comment (pfile)
 311      cpp_reader *pfile;
 312 {
 313   cpp_buffer *buffer = pfile->buffer;
 314   cppchar_t c = EOF, prevc = EOF;
 315
 316   pfile->state.lexing_comment = 1;
 317   while (buffer->cur != buffer->rlimit)
 318     {
 319       prevc = c, c = *buffer->cur++;
 320
 321     next_char:
 322       /* FIXME: For speed, create a new character class of characters
 323          of interest inside block comments.  */
 324       if (c == '?' || c == '\\')
 325         c = skip_escaped_newlines (buffer, c);
 326
 327       /* People like decorating comments with '*', so check for '/'
 328          instead for efficiency.  */
 329       if (c == '/')
 330         {
 331           if (prevc == '*')
 332             break;
 333
 334           /* Warn about potential nested comments, but not if the '/'
 335              comes immediately before the true comment delimeter.
 336              Don't bother to get it right across escaped newlines.  */
 337           if (CPP_OPTION (pfile, warn_comments)
 338               && buffer->cur != buffer->rlimit)
 339             {
 340               prevc = c, c = *buffer->cur++;
 341               if (c == '*' && buffer->cur != buffer->rlimit)
 342                 {
 343                   prevc = c, c = *buffer->cur++;
 344                   if (c != '/')
 345                     cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
 346                                            CPP_BUF_COL (buffer),
 347                                            "\"/*\" within comment");
 348                 }
 349               goto next_char;
 350             }
 351         }
 352       else if (is_vspace (c))
 353         {
 354           prevc = c, c = handle_newline (buffer, c);
 355           goto next_char;
 356         }
 357       else if (c == '\t')
 358         adjust_column (pfile);
 359     }
 360
 361   pfile->state.lexing_comment = 0;
 362   buffer->read_ahead = EOF;
 363   return c != '/' || prevc != '*';
 364 }
 365
 366 /* Skip a C++ line comment.  Handles escaped newlines.  Returns
 367    non-zero if a multiline comment.  The following new line, if any,
 368    is left in buffer->read_ahead.  */
 369 static int
 370 skip_line_comment (pfile)
 371      cpp_reader *pfile;
 372 {
 373   cpp_buffer *buffer = pfile->buffer;
 374   unsigned int orig_lineno = buffer->lineno;
 375   cppchar_t c;
 376
 377   pfile->state.lexing_comment = 1;
 378   do
 379     {
 380       c = EOF;
 381       if (buffer->cur == buffer->rlimit)
 382         break;
 383
 384       c = *buffer->cur++;
 385       if (c == '?' || c == '\\')
 386         c = skip_escaped_newlines (buffer, c);
 387     }
 388   while (!is_vspace (c));
 389
 390   pfile->state.lexing_comment = 0;
 391   buffer->read_ahead = c;       /* Leave any newline for caller.  */
 392   return orig_lineno != buffer->lineno;
 393 }
 394
 395 /* pfile->buffer->cur is one beyond the \t character.  Update
 396    col_adjust so we track the column correctly.  */
 397 static void
 398 adjust_column (pfile)
 399      cpp_reader *pfile;
 400 {
 401   cpp_buffer *buffer = pfile->buffer;
 402   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 403
 404   /* Round it up to multiple of the tabstop, but subtract 1 since the
 405      tab itself occupies a character position.  */
 406   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 407                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 408 }
 409
 410 /* Skips whitespace, saving the next non-whitespace character.
 411    Adjusts pfile->col_adjust to account for tabs.  Without this,
 412    tokens might be assigned an incorrect column.  */
 413 static void
 414 skip_whitespace (pfile, c)
 415      cpp_reader *pfile;
 416      cppchar_t c;
 417 {
 418   cpp_buffer *buffer = pfile->buffer;
 419   unsigned int warned = 0;
 420
 421   do
 422     {
 423       /* Horizontal space always OK.  */
 424       if (c == ' ')
 425         ;
 426       else if (c == '\t')
 427         adjust_column (pfile);
 428       /* Just \f \v or \0 left.  */
 429       else if (c == '\0')
 430         {
 431           if (!warned)
 432             {
 433               cpp_warning (pfile, "null character(s) ignored");
 434               warned = 1;
 435             }
 436         }
 437       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 438         cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
 439                                CPP_BUF_COL (buffer),
 440                                "%s in preprocessing directive",
 441                                c == '\f' ? "form feed" : "vertical tab");
 442
 443       c = EOF;
 444       if (buffer->cur == buffer->rlimit)
 445         break;
 446       c = *buffer->cur++;
 447     }
 448   /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
 449   while (is_nvspace (c));
 450
 451   /* Remember the next character.  */
 452   buffer->read_ahead = c;
 453 }
 454
 455 /* See if the characters of a number token are valid in a name (no
 456    '.', '+' or '-').  */
 457 static int
 458 name_p (pfile, string)
 459      cpp_reader *pfile;
 460      const cpp_string *string;
 461 {
 462   unsigned int i;
 463
 464   for (i = 0; i < string->len; i++)
 465     if (!is_idchar (string->text[i]))
 466       return 0;
 467
 468   return 1;
 469 }
 470
 471 /* Parse an identifier, skipping embedded backslash-newlines.
 472    Calculate the hash value of the token while parsing, for improved
 473    performance.  The hashing algorithm *must* match cpp_lookup().  */
 474
 475 static cpp_hashnode *
 476 parse_identifier (pfile, c)
 477      cpp_reader *pfile;
 478      cppchar_t c;
 479 {
 480   cpp_hashnode *result;
 481   cpp_buffer *buffer = pfile->buffer;
 482   unsigned char *dest, *limit;
 483   unsigned int r = 0, saw_dollar = 0;
 484
 485   dest = POOL_FRONT (&pfile->ident_pool);
 486   limit = POOL_LIMIT (&pfile->ident_pool);
 487
 488   do
 489     {
 490       do
 491         {
 492           /* Need room for terminating null.  */
 493           if (dest + 1 >= limit)
 494             limit = _cpp_next_chunk (&pfile->ident_pool, 0, &dest);
 495
 496           *dest++ = c;
 497           r = HASHSTEP (r, c);
 498
 499           if (c == '$')
 500             saw_dollar++;
 501
 502           c = EOF;
 503           if (buffer->cur == buffer->rlimit)
 504             break;
 505
 506           c = *buffer->cur++;
 507         }
 508       while (is_idchar (c));
 509
 510       /* Potential escaped newline?  */
 511       if (c != '?' && c != '\\')
 512         break;
 513       c = skip_escaped_newlines (buffer, c);
 514     }
 515   while (is_idchar (c));
 516
 517   /* Remember the next character.  */
 518   buffer->read_ahead = c;
 519
 520   /* $ is not a identifier character in the standard, but is commonly
 521      accepted as an extension.  Don't warn about it in skipped
 522      conditional blocks.  */
 523   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->skipping)
 524     cpp_pedwarn (pfile, "'$' character(s) in identifier");
 525
 526   /* Identifiers are null-terminated.  */
 527   *dest = '\0';
 528
 529   /* This routine commits the memory if necessary.  */
 530   result = _cpp_lookup_with_hash (pfile,
 531                                   dest - POOL_FRONT (&pfile->ident_pool), r);
 532
 533   /* Some identifiers require diagnostics when lexed.  */
 534   if (result->flags & NODE_DIAGNOSTIC && !pfile->skipping)
 535     {
 536       /* It is allowed to poison the same identifier twice.  */
 537       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 538         cpp_error (pfile, "attempt to use poisoned \"%s\"", result->name);
 539
 540       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 541          replacement list of a variadic macro.  */
 542       if (result == pfile->spec_nodes.n__VA_ARGS__
 543           && !pfile->state.va_args_ok)
 544         cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 545     }
 546
 547   return result;
 548 }
 549
 550 /* Parse a number, skipping embedded backslash-newlines.  */
 551 static void
 552 parse_number (pfile, number, c, leading_period)
 553      cpp_reader *pfile;
 554      cpp_string *number;
 555      cppchar_t c;
 556      int leading_period;
 557 {
 558   cpp_buffer *buffer = pfile->buffer;
 559   cpp_pool *pool = &pfile->ident_pool;
 560   unsigned char *dest, *limit;
 561
 562   dest = POOL_FRONT (pool);
 563   limit = POOL_LIMIT (pool);
 564
 565   /* Place a leading period.  */
 566   if (leading_period)
 567     {
 568       if (dest >= limit)
 569         limit = _cpp_next_chunk (pool, 0, &dest);
 570       *dest++ = '.';
 571     }
 572
 573   do
 574     {
 575       do
 576         {
 577           /* Need room for terminating null.  */
 578           if (dest + 1 >= limit)
 579             limit = _cpp_next_chunk (pool, 0, &dest);
 580           *dest++ = c;
 581
 582           c = EOF;
 583           if (buffer->cur == buffer->rlimit)
 584             break;
 585
 586           c = *buffer->cur++;
 587         }
 588       while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 589
 590       /* Potential escaped newline?  */
 591       if (c != '?' && c != '\\')
 592         break;
 593       c = skip_escaped_newlines (buffer, c);
 594     }
 595   while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 596
 597   /* Remember the next character.  */
 598   buffer->read_ahead = c;
 599
 600   /* Null-terminate the number.  */
 601   *dest = '\0';
 602
 603   number->text = POOL_FRONT (pool);
 604   number->len = dest - number->text;
 605   POOL_COMMIT (pool, number->len + 1);
 606 }
 607
 608 /* Subroutine of parse_string.  Emits error for unterminated strings.  */
 609 static void
 610 unterminated (pfile, term)
 611      cpp_reader *pfile;
 612      int term;
 613 {
 614   cpp_error (pfile, "missing terminating %c character", term);
 615
 616   if (term == '\"' && pfile->mlstring_pos.line
 617       && pfile->mlstring_pos.line != pfile->lexer_pos.line)
 618     {
 619       cpp_error_with_line (pfile, pfile->mlstring_pos.line,
 620                            pfile->mlstring_pos.col,
 621                            "possible start of unterminated string literal");
 622       pfile->mlstring_pos.line = 0;
 623     }
 624 }
 625
 626 /* Subroutine of parse_string.  */
 627 static int
 628 unescaped_terminator_p (pfile, dest)
 629      cpp_reader *pfile;
 630      const unsigned char *dest;
 631 {
 632   const unsigned char *start, *temp;
 633
 634   /* In #include-style directives, terminators are not escapeable.  */
 635   if (pfile->state.angled_headers)
 636     return 1;
 637
 638   start = POOL_FRONT (&pfile->ident_pool);
 639
 640   /* An odd number of consecutive backslashes represents an escaped
 641      terminator.  */
 642   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 643     ;
 644
 645   return ((dest - temp) & 1) == 0;
 646 }
 647
 648 /* Parses a string, character constant, or angle-bracketed header file
 649    name.  Handles embedded trigraphs and escaped newlines.  The stored
 650    string is guaranteed NUL-terminated, but it is not guaranteed that
 651    this is the first NUL since embedded NULs are preserved.
 652
 653    Multi-line strings are allowed, but they are deprecated.  */
 654 static void
 655 parse_string (pfile, token, terminator)
 656      cpp_reader *pfile;
 657      cpp_token *token;
 658      cppchar_t terminator;
 659 {
 660   cpp_buffer *buffer = pfile->buffer;
 661   cpp_pool *pool = &pfile->ident_pool;
 662   unsigned char *dest, *limit;
 663   cppchar_t c;
 664   unsigned int nulls = 0;
 665
 666   dest = POOL_FRONT (pool);
 667   limit = POOL_LIMIT (pool);
 668
 669   for (;;)
 670     {
 671       if (buffer->cur == buffer->rlimit)
 672         c = EOF;
 673       else
 674         c = *buffer->cur++;
 675
 676     have_char:
 677       /* We need space for the terminating NUL.  */
 678       if (dest >= limit)
 679         limit = _cpp_next_chunk (pool, 0, &dest);
 680
 681       if (c == EOF)
 682         {
 683           unterminated (pfile, terminator);
 684           break;
 685         }
 686
 687       /* Handle trigraphs, escaped newlines etc.  */
 688       if (c == '?' || c == '\\')
 689         c = skip_escaped_newlines (buffer, c);
 690
 691       if (c == terminator && unescaped_terminator_p (pfile, dest))
 692         {
 693           c = EOF;
 694           break;
 695         }
 696       else if (is_vspace (c))
 697         {
 698           /* In assembly language, silently terminate string and
 699              character literals at end of line.  This is a kludge
 700              around not knowing where comments are.  */
 701           if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
 702             break;
 703
 704           /* Character constants and header names may not extend over
 705              multiple lines.  In Standard C, neither may strings.
 706              Unfortunately, we accept multiline strings as an
 707              extension, except in #include family directives.  */
 708           if (terminator != '"' || pfile->state.angled_headers)
 709             {
 710               unterminated (pfile, terminator);
 711               break;
 712             }
 713
 714           cpp_pedwarn (pfile, "multi-line string literals are deprecated");
 715           if (pfile->mlstring_pos.line == 0)
 716             pfile->mlstring_pos = pfile->lexer_pos;
 717
 718           c = handle_newline (buffer, c);
 719           *dest++ = '\n';
 720           goto have_char;
 721         }
 722       else if (c == '\0')
 723         {
 724           if (nulls++ == 0)
 725             cpp_warning (pfile, "null character(s) preserved in literal");
 726         }
 727
 728       *dest++ = c;
 729     }
 730
 731   /* Remember the next character.  */
 732   buffer->read_ahead = c;
 733   *dest = '\0';
 734
 735   token->val.str.text = POOL_FRONT (pool);
 736   token->val.str.len = dest - token->val.str.text;
 737   POOL_COMMIT (pool, token->val.str.len + 1);
 738 }
 739
 740 /* The stored comment includes the comment start and any terminator.  */
 741 static void
 742 save_comment (pfile, token, from)
 743      cpp_reader *pfile;
 744      cpp_token *token;
 745      const unsigned char *from;
 746 {
 747   unsigned char *buffer;
 748   unsigned int len;
 749
 750   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 751   /* C++ comments probably (not definitely) have moved past a new
 752      line, which we don't want to save in the comment.  */
 753   if (pfile->buffer->read_ahead != EOF)
 754     len--;
 755   buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
 756
 757   token->type = CPP_COMMENT;
 758   token->val.str.len = len;
 759   token->val.str.text = buffer;
 760
 761   buffer[0] = '/';
 762   memcpy (buffer + 1, from, len - 1);
 763 }
 764
 765 /* Subroutine of lex_token to handle '%'.  A little tricky, since we
 766    want to avoid stepping back when lexing %:%X.  */
 767 static void
 768 lex_percent (buffer, result)
 769      cpp_buffer *buffer;
 770      cpp_token *result;
 771 {
 772   cppchar_t c;
 773
 774   result->type = CPP_MOD;
 775   /* Parsing %:%X could leave an extra character.  */
 776   if (buffer->extra_char == EOF)
 777     c = get_effective_char (buffer);
 778   else
 779     {
 780       c = buffer->read_ahead = buffer->extra_char;
 781       buffer->extra_char = EOF;
 782     }
 783
 784   if (c == '=')
 785     ACCEPT_CHAR (CPP_MOD_EQ);
 786   else if (CPP_OPTION (buffer->pfile, digraphs))
 787     {
 788       if (c == ':')
 789         {
 790           result->flags |= DIGRAPH;
 791           ACCEPT_CHAR (CPP_HASH);
 792           if (get_effective_char (buffer) == '%')
 793             {
 794               buffer->extra_char = get_effective_char (buffer);
 795               if (buffer->extra_char == ':')
 796                 {
 797                   buffer->extra_char = EOF;
 798                   ACCEPT_CHAR (CPP_PASTE);
 799                 }
 800               else
 801                 /* We'll catch the extra_char when we're called back.  */
 802                 buffer->read_ahead = '%';
 803             }
 804         }
 805       else if (c == '>')
 806         {
 807           result->flags |= DIGRAPH;
 808           ACCEPT_CHAR (CPP_CLOSE_BRACE);
 809         }
 810     }
 811 }
 812
 813 /* Subroutine of lex_token to handle '.'.  This is tricky, since we
 814    want to avoid stepping back when lexing '...' or '.123'.  In the
 815    latter case we should also set a flag for parse_number.  */
 816 static void
 817 lex_dot (pfile, result)
 818      cpp_reader *pfile;
 819      cpp_token *result;
 820 {
 821   cpp_buffer *buffer = pfile->buffer;
 822   cppchar_t c;
 823
 824   /* Parsing ..X could leave an extra character.  */
 825   if (buffer->extra_char == EOF)
 826     c = get_effective_char (buffer);
 827   else
 828     {
 829       c = buffer->read_ahead = buffer->extra_char;
 830       buffer->extra_char = EOF;
 831     }
 832
 833   /* All known character sets have 0...9 contiguous.  */
 834   if (c >= '0' && c <= '9')
 835     {
 836       result->type = CPP_NUMBER;
 837       parse_number (pfile, &result->val.str, c, 1);
 838     }
 839   else
 840     {
 841       result->type = CPP_DOT;
 842       if (c == '.')
 843         {
 844           buffer->extra_char = get_effective_char (buffer);
 845           if (buffer->extra_char == '.')
 846             {
 847               buffer->extra_char = EOF;
 848               ACCEPT_CHAR (CPP_ELLIPSIS);
 849             }
 850           else
 851             /* We'll catch the extra_char when we're called back.  */
 852             buffer->read_ahead = '.';
 853         }
 854       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
 855         ACCEPT_CHAR (CPP_DOT_STAR);
 856     }
 857 }
 858
 859 void
 860 _cpp_lex_token (pfile, result)
 861      cpp_reader *pfile;
 862      cpp_token *result;
 863 {
 864   cppchar_t c;
 865   cpp_buffer *buffer;
 866   const unsigned char *comment_start;
 867   unsigned char bol;
 868
 869  skip:
 870   bol = pfile->state.next_bol;
 871  done_directive:
 872   buffer = pfile->buffer;
 873   pfile->state.next_bol = 0;
 874   result->flags = buffer->saved_flags;
 875   buffer->saved_flags = 0;
 876  next_char:
 877   pfile->lexer_pos.line = buffer->lineno;
 878  next_char2:
 879   pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
 880
 881   c = buffer->read_ahead;
 882   if (c == EOF && buffer->cur < buffer->rlimit)
 883     {
 884       c = *buffer->cur++;
 885       pfile->lexer_pos.col++;
 886     }
 887
 888  do_switch:
 889   buffer->read_ahead = EOF;
 890   switch (c)
 891     {
 892     case EOF:
 893       /* Non-empty files should end in a newline.  Checking "bol" too
 894           prevents multiple warnings when hitting the EOF more than
 895           once, like in a directive.  Don't warn for command line and
 896           _Pragma buffers.  */
 897       if (pfile->lexer_pos.col != 0 && !bol && !buffer->from_stage3)
 898         cpp_pedwarn (pfile, "no newline at end of file");
 899       pfile->state.next_bol = 1;
 900       pfile->skipping = 0;      /* In case missing #endif.  */
 901       result->type = CPP_EOF;
 902       /* Don't do MI optimisation.  */
 903       return;
 904
 905     case ' ': case '\t': case '\f': case '\v': case '\0':
 906       skip_whitespace (pfile, c);
 907       result->flags |= PREV_WHITE;
 908       goto next_char2;
 909
 910     case '\n': case '\r':
 911       if (!pfile->state.in_directive)
 912         {
 913           handle_newline (buffer, c);
 914           bol = 1;
 915           pfile->lexer_pos.output_line = buffer->lineno;
 916           /* This is a new line, so clear any white space flag.
 917              Newlines in arguments are white space (6.10.3.10);
 918              parse_arg takes care of that.  */
 919           result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
 920           goto next_char;
 921         }
 922
 923       /* Don't let directives spill over to the next line.  */
 924       buffer->read_ahead = c;
 925       pfile->state.next_bol = 1;
 926       result->type = CPP_EOF;
 927       /* Don't break; pfile->skipping might be true.  */
 928       return;
 929
 930     case '?':
 931     case '\\':
 932       /* These could start an escaped newline, or '?' a trigraph.  Let
 933          skip_escaped_newlines do all the work.  */
 934       {
 935         unsigned int lineno = buffer->lineno;
 936
 937         c = skip_escaped_newlines (buffer, c);
 938         if (lineno != buffer->lineno)
 939           /* We had at least one escaped newline of some sort, and the
 940              next character is in buffer->read_ahead.  Update the
 941              token's line and column.  */
 942             goto next_char;
 943
 944         /* We are either the original '?' or '\\', or a trigraph.  */
 945         result->type = CPP_QUERY;
 946         buffer->read_ahead = EOF;
 947         if (c == '\\')
 948           goto random_char;
 949         else if (c != '?')
 950           goto do_switch;
 951       }
 952       break;
 953
 954     case '0': case '1': case '2': case '3': case '4':
 955     case '5': case '6': case '7': case '8': case '9':
 956       result->type = CPP_NUMBER;
 957       parse_number (pfile, &result->val.str, c, 0);
 958       break;
 959
 960     case '$':
 961       if (!CPP_OPTION (pfile, dollars_in_ident))
 962         goto random_char;
 963       /* Fall through... */
 964
 965     case '_':
 966     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 967     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 968     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 969     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 970     case 'y': case 'z':
 971     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 972     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 973     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 974     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 975     case 'Y': case 'Z':
 976       result->type = CPP_NAME;
 977       result->val.node = parse_identifier (pfile, c);
 978
 979       /* 'L' may introduce wide characters or strings.  */
 980       if (result->val.node == pfile->spec_nodes.n_L)
 981         {
 982           c = buffer->read_ahead; /* For make_string.  */
 983           if (c == '\'' || c == '"')
 984             {
 985               ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
 986               goto make_string;
 987             }
 988         }
 989       /* Convert named operators to their proper types.  */
 990       else if (result->val.node->flags & NODE_OPERATOR)
 991         {
 992           result->flags |= NAMED_OP;
 993           result->type = result->val.node->value.operator;
 994         }
 995       break;
 996
 997     case '\'':
 998     case '"':
 999       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1000     make_string:
1001       parse_string (pfile, result, c);
1002       break;
1003
1004     case '/':
1005       /* A potential block or line comment.  */
1006       comment_start = buffer->cur;
1007       result->type = CPP_DIV;
1008       c = get_effective_char (buffer);
1009       if (c == '=')
1010         ACCEPT_CHAR (CPP_DIV_EQ);
1011       if (c != '/' && c != '*')
1012         break;
1013
1014       if (c == '*')
1015         {
1016           if (skip_block_comment (pfile))
1017             cpp_error_with_line (pfile, pfile->lexer_pos.line,
1018                                  pfile->lexer_pos.col,
1019                                  "unterminated comment");
1020         }
1021       else
1022         {
1023           if (!CPP_OPTION (pfile, cplusplus_comments)
1024               && !CPP_IN_SYSTEM_HEADER (pfile))
1025             break;
1026
1027           /* Warn about comments only if pedantically GNUC89, and not
1028              in system headers.  */
1029           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1030               && ! buffer->warned_cplusplus_comments)
1031             {
1032               cpp_pedwarn (pfile,
1033                            "C++ style comments are not allowed in ISO C89");
1034               cpp_pedwarn (pfile,
1035                            "(this will be reported only once per input file)");
1036               buffer->warned_cplusplus_comments = 1;
1037             }
1038
1039           /* Skip_line_comment updates buffer->read_ahead.  */
1040           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1041             cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1042                                    pfile->lexer_pos.col,
1043                                    "multi-line comment");
1044         }
1045
1046       /* Skipping the comment has updated buffer->read_ahead.  */
1047       if (!pfile->state.save_comments)
1048         {
1049           result->flags |= PREV_WHITE;
1050           goto next_char;
1051         }
1052
1053       /* Save the comment as a token in its own right.  */
1054       save_comment (pfile, result, comment_start);
1055       /* Don't do MI optimisation.  */
1056       return;
1057
1058     case '<':
1059       if (pfile->state.angled_headers)
1060         {
1061           result->type = CPP_HEADER_NAME;
1062           c = '>';              /* terminator.  */
1063           goto make_string;
1064         }
1065
1066       result->type = CPP_LESS;
1067       c = get_effective_char (buffer);
1068       if (c == '=')
1069         ACCEPT_CHAR (CPP_LESS_EQ);
1070       else if (c == '<')
1071         {
1072           ACCEPT_CHAR (CPP_LSHIFT);
1073           if (get_effective_char (buffer) == '=')
1074             ACCEPT_CHAR (CPP_LSHIFT_EQ);
1075         }
1076       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1077         {
1078           ACCEPT_CHAR (CPP_MIN);
1079           if (get_effective_char (buffer) == '=')
1080             ACCEPT_CHAR (CPP_MIN_EQ);
1081         }
1082       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1083         {
1084           ACCEPT_CHAR (CPP_OPEN_SQUARE);
1085           result->flags |= DIGRAPH;
1086         }
1087       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1088         {
1089           ACCEPT_CHAR (CPP_OPEN_BRACE);
1090           result->flags |= DIGRAPH;
1091         }
1092       break;
1093
1094     case '>':
1095       result->type = CPP_GREATER;
1096       c = get_effective_char (buffer);
1097       if (c == '=')
1098         ACCEPT_CHAR (CPP_GREATER_EQ);
1099       else if (c == '>')
1100         {
1101           ACCEPT_CHAR (CPP_RSHIFT);
1102           if (get_effective_char (buffer) == '=')
1103             ACCEPT_CHAR (CPP_RSHIFT_EQ);
1104         }
1105       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1106         {
1107           ACCEPT_CHAR (CPP_MAX);
1108           if (get_effective_char (buffer) == '=')
1109             ACCEPT_CHAR (CPP_MAX_EQ);
1110         }
1111       break;
1112
1113     case '%':
1114       lex_percent (buffer, result);
1115       if (result->type == CPP_HASH)
1116         goto do_hash;
1117       break;
1118
1119     case '.':
1120       lex_dot (pfile, result);
1121       break;
1122
1123     case '+':
1124       result->type = CPP_PLUS;
1125       c = get_effective_char (buffer);
1126       if (c == '=')
1127         ACCEPT_CHAR (CPP_PLUS_EQ);
1128       else if (c == '+')
1129         ACCEPT_CHAR (CPP_PLUS_PLUS);
1130       break;
1131
1132     case '-':
1133       result->type = CPP_MINUS;
1134       c = get_effective_char (buffer);
1135       if (c == '>')
1136         {
1137           ACCEPT_CHAR (CPP_DEREF);
1138           if (CPP_OPTION (pfile, cplusplus)
1139               && get_effective_char (buffer) == '*')
1140             ACCEPT_CHAR (CPP_DEREF_STAR);
1141         }
1142       else if (c == '=')
1143         ACCEPT_CHAR (CPP_MINUS_EQ);
1144       else if (c == '-')
1145         ACCEPT_CHAR (CPP_MINUS_MINUS);
1146       break;
1147
1148     case '*':
1149       result->type = CPP_MULT;
1150       if (get_effective_char (buffer) == '=')
1151         ACCEPT_CHAR (CPP_MULT_EQ);
1152       break;
1153
1154     case '=':
1155       result->type = CPP_EQ;
1156       if (get_effective_char (buffer) == '=')
1157         ACCEPT_CHAR (CPP_EQ_EQ);
1158       break;
1159
1160     case '!':
1161       result->type = CPP_NOT;
1162       if (get_effective_char (buffer) == '=')
1163         ACCEPT_CHAR (CPP_NOT_EQ);
1164       break;
1165
1166     case '&':
1167       result->type = CPP_AND;
1168       c = get_effective_char (buffer);
1169       if (c == '=')
1170         ACCEPT_CHAR (CPP_AND_EQ);
1171       else if (c == '&')
1172         ACCEPT_CHAR (CPP_AND_AND);
1173       break;
1174
1175     case '#':
1176       c = buffer->extra_char;   /* Can be set by error condition below.  */
1177       if (c != EOF)
1178         {
1179           buffer->read_ahead = c;
1180           buffer->extra_char = EOF;
1181         }
1182       else
1183         c = get_effective_char (buffer);
1184
1185       if (c == '#')
1186         {
1187           ACCEPT_CHAR (CPP_PASTE);
1188           break;
1189         }
1190
1191       result->type = CPP_HASH;
1192     do_hash:
1193       if (!bol)
1194         break;
1195       /* 6.10.3 paragraph 11: If there are sequences of preprocessing
1196          tokens within the list of arguments that would otherwise act
1197          as preprocessing directives, the behavior is undefined.
1198
1199          This implementation will report a hard error, terminate the
1200          macro invocation, and proceed to process the directive.  */
1201       if (pfile->state.parsing_args)
1202         {
1203           if (pfile->state.parsing_args == 2)
1204             cpp_error (pfile,
1205                        "directives may not be used inside a macro argument");
1206
1207           /* Put a '#' in lookahead, return CPP_EOF for parse_arg.  */
1208           buffer->extra_char = buffer->read_ahead;
1209           buffer->read_ahead = '#';
1210           pfile->state.next_bol = 1;
1211           result->type = CPP_EOF;
1212
1213           /* Get whitespace right - newline_in_args sets it.  */
1214           if (pfile->lexer_pos.col == 1)
1215             result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
1216         }
1217       else
1218         {
1219           /* This is the hash introducing a directive.  */
1220           if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1221             goto done_directive; /* bol still 1.  */
1222           /* This is in fact an assembler #.  */
1223         }
1224       break;
1225
1226     case '|':
1227       result->type = CPP_OR;
1228       c = get_effective_char (buffer);
1229       if (c == '=')
1230         ACCEPT_CHAR (CPP_OR_EQ);
1231       else if (c == '|')
1232         ACCEPT_CHAR (CPP_OR_OR);
1233       break;
1234
1235     case '^':
1236       result->type = CPP_XOR;
1237       if (get_effective_char (buffer) == '=')
1238         ACCEPT_CHAR (CPP_XOR_EQ);
1239       break;
1240
1241     case ':':
1242       result->type = CPP_COLON;
1243       c = get_effective_char (buffer);
1244       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1245         ACCEPT_CHAR (CPP_SCOPE);
1246       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1247         {
1248           result->flags |= DIGRAPH;
1249           ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1250         }
1251       break;
1252
1253     case '~': result->type = CPP_COMPL; break;
1254     case ',': result->type = CPP_COMMA; break;
1255     case '(': result->type = CPP_OPEN_PAREN; break;
1256     case ')': result->type = CPP_CLOSE_PAREN; break;
1257     case '[': result->type = CPP_OPEN_SQUARE; break;
1258     case ']': result->type = CPP_CLOSE_SQUARE; break;
1259     case '{': result->type = CPP_OPEN_BRACE; break;
1260     case '}': result->type = CPP_CLOSE_BRACE; break;
1261     case ';': result->type = CPP_SEMICOLON; break;
1262
1263       /* @ is a punctuator in Objective C.  */
1264     case '@': result->type = CPP_ATSIGN; break;
1265
1266     random_char:
1267     default:
1268       result->type = CPP_OTHER;
1269       result->val.c = c;
1270       break;
1271     }
1272
1273   if (pfile->skipping)
1274     goto skip;
1275
1276   /* If not in a directive, this token invalidates controlling macros.  */
1277   if (!pfile->state.in_directive)
1278     pfile->mi_state = MI_FAILED;
1279 }
1280
1281 /* An upper bound on the number of bytes needed to spell a token,
1282    including preceding whitespace.  */
1283 unsigned int
1284 cpp_token_len (token)
1285      const cpp_token *token;
1286 {
1287   unsigned int len;
1288
1289   switch (TOKEN_SPELL (token))
1290     {
1291     default:            len = 0;                        break;
1292     case SPELL_STRING:  len = token->val.str.len;       break;
1293     case SPELL_IDENT:   len = token->val.node->length;  break;
1294     }
1295   /* 1 for whitespace, 4 for comment delimeters.  */
1296   return len + 5;
1297 }
1298
1299 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1300    already contain the enough space to hold the token's spelling.
1301    Returns a pointer to the character after the last character
1302    written.  */
1303 unsigned char *
1304 cpp_spell_token (pfile, token, buffer)
1305      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1306      const cpp_token *token;
1307      unsigned char *buffer;
1308 {
1309   switch (TOKEN_SPELL (token))
1310     {
1311     case SPELL_OPERATOR:
1312       {
1313         const unsigned char *spelling;
1314         unsigned char c;
1315
1316         if (token->flags & DIGRAPH)
1317           spelling
1318             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1319         else if (token->flags & NAMED_OP)
1320           goto spell_ident;
1321         else
1322           spelling = TOKEN_NAME (token);
1323
1324         while ((c = *spelling++) != '\0')
1325           *buffer++ = c;
1326       }
1327       break;
1328
1329     case SPELL_IDENT:
1330       spell_ident:
1331       memcpy (buffer, token->val.node->name, token->val.node->length);
1332       buffer += token->val.node->length;
1333       break;
1334
1335     case SPELL_STRING:
1336       {
1337         int left, right, tag;
1338         switch (token->type)
1339           {
1340           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1341           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1342           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1343           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1344           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1345           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1346           }
1347         if (tag) *buffer++ = tag;
1348         if (left) *buffer++ = left;
1349         memcpy (buffer, token->val.str.text, token->val.str.len);
1350         buffer += token->val.str.len;
1351         if (right) *buffer++ = right;
1352       }
1353       break;
1354
1355     case SPELL_CHAR:
1356       *buffer++ = token->val.c;
1357       break;
1358
1359     case SPELL_NONE:
1360       cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1361       break;
1362     }
1363
1364   return buffer;
1365 }
1366
1367 /* Returns a token as a null-terminated string.  The string is
1368    temporary, and automatically freed later.  Useful for diagnostics.  */
1369 unsigned char *
1370 cpp_token_as_text (pfile, token)
1371      cpp_reader *pfile;
1372      const cpp_token *token;
1373 {
1374   unsigned int len = cpp_token_len (token);
1375   unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
1376
1377   end = cpp_spell_token (pfile, token, start);
1378   end[0] = '\0';
1379
1380   return start;
1381 }
1382
1383 /* Used by C front ends.  Should really move to using cpp_token_as_text.  */
1384 const char *
1385 cpp_type2name (type)
1386      enum cpp_ttype type;
1387 {
1388   return (const char *) token_spellings[type].name;
1389 }
1390
1391 /* Writes the spelling of token to FP.  Separate from cpp_spell_token
1392    for efficiency - to avoid double-buffering.  Also, outputs a space
1393    if PREV_WHITE is flagged.  */
1394 void
1395 cpp_output_token (token, fp)
1396      const cpp_token *token;
1397      FILE *fp;
1398 {
1399   if (token->flags & PREV_WHITE)
1400     putc (' ', fp);
1401
1402   switch (TOKEN_SPELL (token))
1403     {
1404     case SPELL_OPERATOR:
1405       {
1406         const unsigned char *spelling;
1407
1408         if (token->flags & DIGRAPH)
1409           spelling
1410             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1411         else if (token->flags & NAMED_OP)
1412           goto spell_ident;
1413         else
1414           spelling = TOKEN_NAME (token);
1415
1416         ufputs (spelling, fp);
1417       }
1418       break;
1419
1420     spell_ident:
1421     case SPELL_IDENT:
1422       ufputs (token->val.node->name, fp);
1423     break;
1424
1425     case SPELL_STRING:
1426       {
1427         int left, right, tag;
1428         switch (token->type)
1429           {
1430           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1431           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1432           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1433           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1434           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1435           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1436           }
1437         if (tag) putc (tag, fp);
1438         if (left) putc (left, fp);
1439         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1440         if (right) putc (right, fp);
1441       }
1442       break;
1443
1444     case SPELL_CHAR:
1445       putc (token->val.c, fp);
1446       break;
1447
1448     case SPELL_NONE:
1449       /* An error, most probably.  */
1450       break;
1451     }
1452 }
1453
1454 /* Compare two tokens.  */
1455 int
1456 _cpp_equiv_tokens (a, b)
1457      const cpp_token *a, *b;
1458 {
1459   if (a->type == b->type && a->flags == b->flags)
1460     switch (TOKEN_SPELL (a))
1461       {
1462       default:                  /* Keep compiler happy.  */
1463       case SPELL_OPERATOR:
1464         return 1;
1465       case SPELL_CHAR:
1466         return a->val.c == b->val.c; /* Character.  */
1467       case SPELL_NONE:
1468         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1469       case SPELL_IDENT:
1470         return a->val.node == b->val.node;
1471       case SPELL_STRING:
1472         return (a->val.str.len == b->val.str.len
1473                 && !memcmp (a->val.str.text, b->val.str.text,
1474                             a->val.str.len));
1475       }
1476
1477   return 0;
1478 }
1479
1480 /* Determine whether two tokens can be pasted together, and if so,
1481    what the resulting token is.  Returns CPP_EOF if the tokens cannot
1482    be pasted, or the appropriate type for the merged token if they
1483    can.  */
1484 enum cpp_ttype
1485 cpp_can_paste (pfile, token1, token2, digraph)
1486      cpp_reader * pfile;
1487      const cpp_token *token1, *token2;
1488      int* digraph;
1489 {
1490   enum cpp_ttype a = token1->type, b = token2->type;
1491   int cxx = CPP_OPTION (pfile, cplusplus);
1492
1493   /* Treat named operators as if they were ordinary NAMEs.  */
1494   if (token1->flags & NAMED_OP)
1495     a = CPP_NAME;
1496   if (token2->flags & NAMED_OP)
1497     b = CPP_NAME;
1498
1499   if ((int) a <= (int) CPP_LAST_EQ && b == CPP_EQ)
1500     return (enum cpp_ttype) ((int) a + ((int) CPP_EQ_EQ - (int) CPP_EQ));
1501
1502   switch (a)
1503     {
1504     case CPP_GREATER:
1505       if (b == a) return CPP_RSHIFT;
1506       if (b == CPP_QUERY && cxx)        return CPP_MAX;
1507       if (b == CPP_GREATER_EQ)  return CPP_RSHIFT_EQ;
1508       break;
1509     case CPP_LESS:
1510       if (b == a) return CPP_LSHIFT;
1511       if (b == CPP_QUERY && cxx)        return CPP_MIN;
1512       if (b == CPP_LESS_EQ)     return CPP_LSHIFT_EQ;
1513       if (CPP_OPTION (pfile, digraphs))
1514         {
1515           if (b == CPP_COLON)
1516             {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1517           if (b == CPP_MOD)
1518             {*digraph = 1; return CPP_OPEN_BRACE;}      /* <% digraph */
1519         }
1520       break;
1521
1522     case CPP_PLUS: if (b == a)  return CPP_PLUS_PLUS; break;
1523     case CPP_AND:  if (b == a)  return CPP_AND_AND; break;
1524     case CPP_OR:   if (b == a)  return CPP_OR_OR;   break;
1525
1526     case CPP_MINUS:
1527       if (b == a)               return CPP_MINUS_MINUS;
1528       if (b == CPP_GREATER)     return CPP_DEREF;
1529       break;
1530     case CPP_COLON:
1531       if (b == a && cxx)        return CPP_SCOPE;
1532       if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1533         {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1534       break;
1535
1536     case CPP_MOD:
1537       if (CPP_OPTION (pfile, digraphs))
1538         {
1539           if (b == CPP_GREATER)
1540             {*digraph = 1; return CPP_CLOSE_BRACE;}  /* %> digraph */
1541           if (b == CPP_COLON)
1542             {*digraph = 1; return CPP_HASH;}         /* %: digraph */
1543         }
1544       break;
1545     case CPP_DEREF:
1546       if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1547       break;
1548     case CPP_DOT:
1549       if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1550       if (b == CPP_NUMBER)      return CPP_NUMBER;
1551       break;
1552
1553     case CPP_HASH:
1554       if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1555         /* %:%: digraph */
1556         {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1557       break;
1558
1559     case CPP_NAME:
1560       if (b == CPP_NAME)        return CPP_NAME;
1561       if (b == CPP_NUMBER
1562           && name_p (pfile, &token2->val.str)) return CPP_NAME;
1563       if (b == CPP_CHAR
1564           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1565       if (b == CPP_STRING
1566           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1567       break;
1568
1569     case CPP_NUMBER:
1570       if (b == CPP_NUMBER)      return CPP_NUMBER;
1571       if (b == CPP_NAME)        return CPP_NUMBER;
1572       if (b == CPP_DOT)         return CPP_NUMBER;
1573       /* Numbers cannot have length zero, so this is safe.  */
1574       if ((b == CPP_PLUS || b == CPP_MINUS)
1575           && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1576         return CPP_NUMBER;
1577       break;
1578
1579     default:
1580       break;
1581     }
1582
1583   return CPP_EOF;
1584 }
1585
1586 /* Returns nonzero if a space should be inserted to avoid an
1587    accidental token paste for output.  For simplicity, it is
1588    conservative, and occasionally advises a space where one is not
1589    needed, e.g. "." and ".2".  */
1590
1591 int
1592 cpp_avoid_paste (pfile, token1, token2)
1593      cpp_reader *pfile;
1594      const cpp_token *token1, *token2;
1595 {
1596   enum cpp_ttype a = token1->type, b = token2->type;
1597   cppchar_t c;
1598
1599   if (token1->flags & NAMED_OP)
1600     a = CPP_NAME;
1601   if (token2->flags & NAMED_OP)
1602     b = CPP_NAME;
1603
1604   c = EOF;
1605   if (token2->flags & DIGRAPH)
1606     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1607   else if (token_spellings[b].category == SPELL_OPERATOR)
1608     c = token_spellings[b].name[0];
1609
1610   /* Quickly get everything that can paste with an '='.  */
1611   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1612     return 1;
1613
1614   switch (a)
1615     {
1616     case CPP_GREATER:   return c == '>' || c == '?';
1617     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1618     case CPP_PLUS:      return c == '+';
1619     case CPP_MINUS:     return c == '-' || c == '>';
1620     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1621     case CPP_MOD:       return c == ':' || c == '>';
1622     case CPP_AND:       return c == '&';
1623     case CPP_OR:        return c == '|';
1624     case CPP_COLON:     return c == ':' || c == '>';
1625     case CPP_DEREF:     return c == '*';
1626     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1627     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1628     case CPP_NAME:      return ((b == CPP_NUMBER
1629                                  && name_p (pfile, &token2->val.str))
1630                                 || b == CPP_NAME
1631                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1632     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1633                                 || c == '.' || c == '+' || c == '-');
1634     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1635                                 && token1->val.c == '@'
1636                                 && (b == CPP_NAME || b == CPP_STRING));
1637     default:            break;
1638     }
1639
1640   return 0;
1641 }
1642
1643 /* Output all the remaining tokens on the current line, and a newline
1644    character, to FP.  Leading whitespace is removed.  */
1645 void
1646 cpp_output_line (pfile, fp)
1647      cpp_reader *pfile;
1648      FILE *fp;
1649 {
1650   cpp_token token;
1651
1652   cpp_get_token (pfile, &token);
1653   token.flags &= ~PREV_WHITE;
1654   while (token.type != CPP_EOF)
1655     {
1656       cpp_output_token (&token, fp);
1657       cpp_get_token (pfile, &token);
1658     }
1659
1660   putc ('\n', fp);
1661 }
1662
1663 /* Returns the value of a hexadecimal digit.  */
1664 static unsigned int
1665 hex_digit_value (c)
1666      unsigned int c;
1667 {
1668   if (c >= 'a' && c <= 'f')
1669     return c - 'a' + 10;
1670   if (c >= 'A' && c <= 'F')
1671     return c - 'A' + 10;
1672   if (c >= '0' && c <= '9')
1673     return c - '0';
1674   abort ();
1675 }
1676
1677 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence (C++ and C99).
1678
1679    [lex.charset]: The character designated by the universal character
1680    name \UNNNNNNNN is that character whose character short name in
1681    ISO/IEC 10646 is NNNNNNNN; the character designated by the
1682    universal character name \uNNNN is that character whose character
1683    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
1684    for a universal character name is less than 0x20 or in the range
1685    0x7F-0x9F (inclusive), or if the universal character name
1686    designates a character in the basic source character set, then the
1687    program is ill-formed.
1688
1689    We assume that wchar_t is Unicode, so we don't need to do any
1690    mapping.  Is this ever wrong?  */
1691
1692 static unsigned int
1693 read_ucs (pfile, pstr, limit, length)
1694      cpp_reader *pfile;
1695      const unsigned char **pstr;
1696      const unsigned char *limit;
1697      unsigned int length;
1698 {
1699   const unsigned char *p = *pstr;
1700   unsigned int c, code = 0;
1701
1702   for (; length; --length)
1703     {
1704       if (p >= limit)
1705         {
1706           cpp_error (pfile, "incomplete universal-character-name");
1707           break;
1708         }
1709
1710       c = *p;
1711       if (ISXDIGIT (c))
1712         {
1713           code = (code << 4) + hex_digit_value (c);
1714           p++;
1715         }
1716       else
1717         {
1718           cpp_error (pfile,
1719                      "non-hex digit '%c' in universal-character-name", c);
1720           break;
1721         }
1722
1723     }
1724
1725 #ifdef TARGET_EBCDIC
1726   cpp_error (pfile, "universal-character-name on EBCDIC target");
1727   code = 0x3f;  /* EBCDIC invalid character */
1728 #else
1729   if (code > 0x9f && !(code & 0x80000000))
1730     ; /* True extended character, OK.  */
1731   else if (code >= 0x20 && code < 0x7f)
1732     {
1733       /* ASCII printable character.  The C character set consists of all of
1734          these except $, @ and `.  We use hex escapes so that this also
1735          works with EBCDIC hosts.  */
1736       if (code != 0x24 && code != 0x40 && code != 0x60)
1737         cpp_error (pfile, "universal-character-name used for '%c'", code);
1738     }
1739   else
1740     cpp_error (pfile, "invalid universal-character-name");
1741 #endif
1742
1743   *pstr = p;
1744   return code;
1745 }
1746
1747 /* Interpret an escape sequence, and return its value.  PSTR points to
1748    the input pointer, which is just after the backslash.  LIMIT is how
1749    much text we have.  MASK is the precision for the target type (char
1750    or wchar_t).  TRADITIONAL, if true, does not interpret escapes that
1751    did not exist in traditional C.  */
1752
1753 static unsigned int
1754 parse_escape (pfile, pstr, limit, mask, traditional)
1755      cpp_reader *pfile;
1756      const unsigned char **pstr;
1757      const unsigned char *limit;
1758      HOST_WIDE_INT mask;
1759      int traditional;
1760 {
1761   int unknown = 0;
1762   const unsigned char *str = *pstr;
1763   unsigned int c = *str++;
1764
1765   switch (c)
1766     {
1767     case '\\': case '\'': case '"': case '?': break;
1768     case 'b': c = TARGET_BS;      break;
1769     case 'f': c = TARGET_FF;      break;
1770     case 'n': c = TARGET_NEWLINE; break;
1771     case 'r': c = TARGET_CR;      break;
1772     case 't': c = TARGET_TAB;     break;
1773     case 'v': c = TARGET_VT;      break;
1774
1775     case '(': case '{': case '[': case '%':
1776       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1777          '\%' is used to prevent SCCS from getting confused.  */
1778       unknown = CPP_PEDANTIC (pfile);
1779       break;
1780
1781     case 'a':
1782       if (CPP_WTRADITIONAL (pfile))
1783         cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1784       if (!traditional)
1785         c = TARGET_BELL;
1786       break;
1787
1788     case 'e': case 'E':
1789       if (CPP_PEDANTIC (pfile))
1790         cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1791       c = TARGET_ESC;
1792       break;
1793
1794       /* Warnings and support checks handled by read_ucs().  */
1795     case 'u': case 'U':
1796       if (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99))
1797         {
1798           if (CPP_WTRADITIONAL (pfile))
1799             cpp_warning (pfile,
1800                          "the meaning of '\\%c' varies with -traditional", c);
1801           c = read_ucs (pfile, &str, limit, c == 'u' ? 4 : 8);
1802         }
1803       else
1804         unknown = 1;
1805       break;
1806
1807     case 'x':
1808       if (CPP_WTRADITIONAL (pfile))
1809         cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1810
1811       if (!traditional)
1812         {
1813           unsigned int i = 0, overflow = 0;
1814           int digits_found = 0;
1815
1816           while (str < limit)
1817             {
1818               c = *str;
1819               if (! ISXDIGIT (c))
1820                 break;
1821               str++;
1822               overflow |= i ^ (i << 4 >> 4);
1823               i = (i << 4) + hex_digit_value (c);
1824               digits_found = 1;
1825             }
1826
1827           if (!digits_found)
1828             cpp_error (pfile, "\\x used with no following hex digits");
1829
1830           if (overflow | (i != (i & mask)))
1831             {
1832               cpp_pedwarn (pfile, "hex escape sequence out of range");
1833               i &= mask;
1834             }
1835           c = i;
1836         }
1837       break;
1838
1839     case '0':  case '1':  case '2':  case '3':
1840     case '4':  case '5':  case '6':  case '7':
1841       {
1842         unsigned int i = c - '0';
1843         int count = 0;
1844
1845         while (str < limit && ++count < 3)
1846           {
1847             c = *str;
1848             if (c < '0' || c > '7')
1849               break;
1850             str++;
1851             i = (i << 3) + c - '0';
1852           }
1853
1854         if (i != (i & mask))
1855           {
1856             cpp_pedwarn (pfile, "octal escape sequence out of range");
1857             i &= mask;
1858           }
1859         c = i;
1860       }
1861       break;
1862
1863     default:
1864       unknown = 1;
1865       break;
1866     }
1867
1868   if (unknown)
1869     {
1870       if (ISGRAPH (c))
1871         cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1872       else
1873         cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1874     }
1875
1876   *pstr = str;
1877   return c;
1878 }
1879
1880 #ifndef MAX_CHAR_TYPE_SIZE
1881 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1882 #endif
1883
1884 #ifndef MAX_WCHAR_TYPE_SIZE
1885 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1886 #endif
1887
1888 /* Interpret a (possibly wide) character constant in TOKEN.
1889    WARN_MULTI warns about multi-character charconsts, if not
1890    TRADITIONAL.  TRADITIONAL also indicates not to interpret escapes
1891    that did not exist in traditional C.  PCHARS_SEEN points to a
1892    variable that is filled in with the number of characters seen.  */
1893 HOST_WIDE_INT
1894 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1895      cpp_reader *pfile;
1896      const cpp_token *token;
1897      int warn_multi;
1898      int traditional;
1899      unsigned int *pchars_seen;
1900 {
1901   const unsigned char *str = token->val.str.text;
1902   const unsigned char *limit = str + token->val.str.len;
1903   unsigned int chars_seen = 0;
1904   unsigned int width, max_chars, c;
1905   HOST_WIDE_INT result = 0, mask;
1906
1907 #ifdef MULTIBYTE_CHARS
1908   (void) local_mbtowc (NULL, NULL, 0);
1909 #endif
1910
1911   /* Width in bits.  */
1912   if (token->type == CPP_CHAR)
1913     width = MAX_CHAR_TYPE_SIZE;
1914   else
1915     width = MAX_WCHAR_TYPE_SIZE;
1916
1917   if (width < HOST_BITS_PER_WIDE_INT)
1918     mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1919   else
1920     mask = ~0;
1921   max_chars = HOST_BITS_PER_WIDE_INT / width;
1922
1923   while (str < limit)
1924     {
1925 #ifdef MULTIBYTE_CHARS
1926       wchar_t wc;
1927       int char_len;
1928
1929       char_len = local_mbtowc (&wc, str, limit - str);
1930       if (char_len == -1)
1931         {
1932           cpp_warning (pfile, "ignoring invalid multibyte character");
1933           c = *str++;
1934         }
1935       else
1936         {
1937           str += char_len;
1938           c = wc;
1939         }
1940 #else
1941       c = *str++;
1942 #endif
1943
1944       if (c == '\\')
1945         {
1946           c = parse_escape (pfile, &str, limit, mask, traditional);
1947           if (width < HOST_BITS_PER_WIDE_INT && c > mask)
1948             cpp_pedwarn (pfile, "escape sequence out of range for character");
1949         }
1950
1951 #ifdef MAP_CHARACTER
1952       if (ISPRINT (c))
1953         c = MAP_CHARACTER (c);
1954 #endif
1955
1956       /* Merge character into result; ignore excess chars.  */
1957       if (++chars_seen <= max_chars)
1958         {
1959           if (width < HOST_BITS_PER_WIDE_INT)
1960             result = (result << width) | (c & mask);
1961           else
1962             result = c;
1963         }
1964     }
1965
1966   if (chars_seen == 0)
1967     cpp_error (pfile, "empty character constant");
1968   else if (chars_seen > max_chars)
1969     {
1970       chars_seen = max_chars;
1971       cpp_error (pfile, "character constant too long");
1972     }
1973   else if (chars_seen > 1 && !traditional && warn_multi)
1974     cpp_warning (pfile, "multi-character character constant");
1975
1976   /* If char type is signed, sign-extend the constant.  The
1977      __CHAR_UNSIGNED__ macro is set by the driver if appropriate.  */
1978   if (token->type == CPP_CHAR && chars_seen)
1979     {
1980       unsigned int nbits = chars_seen * width;
1981       unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
1982
1983       if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
1984           || ((result >> (nbits - 1)) & 1) == 0)
1985         result &= mask;
1986       else
1987         result |= ~mask;
1988     }
1989
1990   *pchars_seen = chars_seen;
1991   return result;
1992 }
1993
1994 /* Memory pools.  */
1995
1996 struct dummy
1997 {
1998   char c;
1999   union
2000   {
2001     double d;
2002     int *p;
2003   } u;
2004 };
2005
2006 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2007
2008 static int
2009 chunk_suitable (pool, chunk, size)
2010      cpp_pool *pool;
2011      cpp_chunk *chunk;
2012      unsigned int size;
2013 {
2014   /* Being at least twice SIZE means we can use memcpy in
2015      _cpp_next_chunk rather than memmove.  Besides, it's a good idea
2016      anyway.  */
2017   return (chunk && pool->locked != chunk
2018           && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
2019 }
2020
2021 /* Returns the end of the new pool.  PTR points to a char in the old
2022    pool, and is updated to point to the same char in the new pool.  */
2023 unsigned char *
2024 _cpp_next_chunk (pool, len, ptr)
2025      cpp_pool *pool;
2026      unsigned int len;
2027      unsigned char **ptr;
2028 {
2029   cpp_chunk *chunk = pool->cur->next;
2030
2031   /* LEN is the minimum size we want in the new pool.  */
2032   len += POOL_ROOM (pool);
2033   if (! chunk_suitable (pool, chunk, len))
2034     {
2035       chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
2036
2037       chunk->next = pool->cur->next;
2038       pool->cur->next = chunk;
2039     }
2040
2041   /* Update the pointer before changing chunk's front.  */
2042   if (ptr)
2043     *ptr += chunk->base - POOL_FRONT (pool);
2044
2045   memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
2046   chunk->front = chunk->base;
2047
2048   pool->cur = chunk;
2049   return POOL_LIMIT (pool);
2050 }
2051
2052 static cpp_chunk *
2053 new_chunk (size)
2054      unsigned int size;
2055 {
2056   unsigned char *base;
2057   cpp_chunk *result;
2058
2059   size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
2060   base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
2061   /* Put the chunk descriptor at the end.  Then chunk overruns will
2062      cause obvious chaos.  */
2063   result = (cpp_chunk *) (base + size);
2064   result->base = base;
2065   result->front = base;
2066   result->limit = base + size;
2067   result->next = 0;
2068
2069   return result;
2070 }
2071
2072 void
2073 _cpp_init_pool (pool, size, align, temp)
2074      cpp_pool *pool;
2075      unsigned int size, align, temp;
2076 {
2077   if (align == 0)
2078     align = DEFAULT_ALIGNMENT;
2079   if (align & (align - 1))
2080     abort ();
2081   pool->align = align;
2082   pool->cur = new_chunk (size);
2083   pool->locked = 0;
2084   pool->locks = 0;
2085   if (temp)
2086     pool->cur->next = pool->cur;
2087 }
2088
2089 void
2090 _cpp_lock_pool (pool)
2091      cpp_pool *pool;
2092 {
2093   if (pool->locks++ == 0)
2094     pool->locked = pool->cur;
2095 }
2096
2097 void
2098 _cpp_unlock_pool (pool)
2099      cpp_pool *pool;
2100 {
2101   if (--pool->locks == 0)
2102     pool->locked = 0;
2103 }
2104
2105 void
2106 _cpp_free_pool (pool)
2107      cpp_pool *pool;
2108 {
2109   cpp_chunk *chunk = pool->cur, *next;
2110
2111   do
2112     {
2113       next = chunk->next;
2114       free (chunk->base);
2115       chunk = next;
2116     }
2117   while (chunk && chunk != pool->cur);
2118 }
2119
2120 /* Reserve LEN bytes from a memory pool.  */
2121 unsigned char *
2122 _cpp_pool_reserve (pool, len)
2123      cpp_pool *pool;
2124      unsigned int len;
2125 {
2126   len = POOL_ALIGN (len, pool->align);
2127   if (len > (unsigned int) POOL_ROOM (pool))
2128     _cpp_next_chunk (pool, len, 0);
2129
2130   return POOL_FRONT (pool);
2131 }
2132
2133 /* Allocate LEN bytes from a memory pool.  */
2134 unsigned char *
2135 _cpp_pool_alloc (pool, len)
2136      cpp_pool *pool;
2137      unsigned int len;
2138 {
2139   unsigned char *result = _cpp_pool_reserve (pool, len);
2140
2141   POOL_COMMIT (pool, len);
2142   return result;
2143 }