gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 /* This lexer works with a single pass of the file.  Recently I
  24    re-wrote it to minimize the places where we step backwards in the
  25    input stream, to make future changes to support multi-byte
  26    character sets fairly straight-forward.
  27
  28    There is now only one routine where we do step backwards:
  29    skip_escaped_newlines.  This routine could probably also be changed
  30    so that it doesn't need to step back.  One possibility is to use a
  31    trick similar to that used in lex_period and lex_percent.  Two
  32    extra characters might be needed, but skip_escaped_newlines itself
  33    would probably be the only place that needs to be aware of that,
  34    and changes to the remaining routines would probably only be needed
  35    if they process a backslash.  */
  36
  37 #include "config.h"
  38 #include "system.h"
  39 #include "cpplib.h"
  40 #include "cpphash.h"
  41
  42 /* MULTIBYTE_CHARS support only works for native compilers.
  43    ??? Ideally what we want is to model widechar support after
  44    the current floating point support.  */
  45 #ifdef CROSS_COMPILE
  46 #undef MULTIBYTE_CHARS
  47 #endif
  48
  49 #ifdef MULTIBYTE_CHARS
  50 #include "mbchar.h"
  51 #include <locale.h>
  52 #endif
  53
  54 /* Tokens with SPELL_STRING store their spelling in the token list,
  55    and it's length in the token->val.name.len.  */
  56 enum spell_type
  57 {
  58   SPELL_OPERATOR = 0,
  59   SPELL_CHAR,
  60   SPELL_IDENT,
  61   SPELL_STRING,
  62   SPELL_NONE
  63 };
  64
  65 struct token_spelling
  66 {
  67   enum spell_type category;
  68   const unsigned char *name;
  69 };
  70
  71 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
  72                                              U":>", U"<%", U"%>"};
  73
  74 #define OP(e, s) { SPELL_OPERATOR, U s           },
  75 #define TK(e, s) { s,              U STRINGX (e) },
  76 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
  77 #undef OP
  78 #undef TK
  79
  80 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  81 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  82
  83 static cppchar_t handle_newline PARAMS ((cpp_buffer *, cppchar_t));
  84 static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
  85 static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
  86
  87 static int skip_block_comment PARAMS ((cpp_reader *));
  88 static int skip_line_comment PARAMS ((cpp_reader *));
  89 static void adjust_column PARAMS ((cpp_reader *));
  90 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  91 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
  92 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
  93 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
  94 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  95 static void unterminated PARAMS ((cpp_reader *, int));
  96 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
  97 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
  98 static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
  99 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
 100 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
 101 static unsigned int parse_escape PARAMS ((cpp_reader *, const unsigned char **,
 102                                           const unsigned char *, HOST_WIDE_INT,
 103                                           int));
 104 static unsigned int read_ucs PARAMS ((cpp_reader *, const unsigned char **,
 105                                       const unsigned char *, unsigned int));
 106
 107 static cpp_chunk *new_chunk PARAMS ((unsigned int));
 108 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
 109 static unsigned int hex_digit_value PARAMS ((unsigned int));
 110
 111 /* Utility routine:
 112
 113    Compares, the token TOKEN to the NUL-terminated string STRING.
 114    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
 115
 116 int
 117 cpp_ideq (token, string)
 118      const cpp_token *token;
 119      const char *string;
 120 {
 121   if (token->type != CPP_NAME)
 122     return 0;
 123
 124   return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
 125 }
 126
 127 /* Call when meeting a newline.  Returns the character after the newline
 128    (or carriage-return newline combination), or EOF.  */
 129 static cppchar_t
 130 handle_newline (buffer, newline_char)
 131      cpp_buffer *buffer;
 132      cppchar_t newline_char;
 133 {
 134   cppchar_t next = EOF;
 135
 136   buffer->col_adjust = 0;
 137   buffer->lineno++;
 138   buffer->line_base = buffer->cur;
 139
 140   /* Handle CR-LF and LF-CR combinations, get the next character.  */
 141   if (buffer->cur < buffer->rlimit)
 142     {
 143       next = *buffer->cur++;
 144       if (next + newline_char == '\r' + '\n')
 145         {
 146           buffer->line_base = buffer->cur;
 147           if (buffer->cur < buffer->rlimit)
 148             next = *buffer->cur++;
 149           else
 150             next = EOF;
 151         }
 152     }
 153
 154   buffer->read_ahead = next;
 155   return next;
 156 }
 157
 158 /* Subroutine of skip_escaped_newlines; called when a trigraph is
 159    encountered.  It warns if necessary, and returns true if the
 160    trigraph should be honoured.  FROM_CHAR is the third character of a
 161    trigraph, and presumed to be the previous character for position
 162    reporting.  */
 163 static int
 164 trigraph_ok (pfile, from_char)
 165      cpp_reader *pfile;
 166      cppchar_t from_char;
 167 {
 168   int accept = CPP_OPTION (pfile, trigraphs);
 169
 170   /* Don't warn about trigraphs in comments.  */
 171   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 172     {
 173       cpp_buffer *buffer = pfile->buffer;
 174       if (accept)
 175         cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
 176                                "trigraph ??%c converted to %c",
 177                                (int) from_char,
 178                                (int) _cpp_trigraph_map[from_char]);
 179       else if (buffer->cur != buffer->last_Wtrigraphs)
 180         {
 181           buffer->last_Wtrigraphs = buffer->cur;
 182           cpp_warning_with_line (pfile, buffer->lineno,
 183                                  CPP_BUF_COL (buffer) - 2,
 184                                  "trigraph ??%c ignored", (int) from_char);
 185         }
 186     }
 187
 188   return accept;
 189 }
 190
 191 /* Assumes local variables buffer and result.  */
 192 #define ACCEPT_CHAR(t) \
 193   do { result->type = t; buffer->read_ahead = EOF; } while (0)
 194
 195 /* When we move to multibyte character sets, add to these something
 196    that saves and restores the state of the multibyte conversion
 197    library.  This probably involves saving and restoring a "cookie".
 198    In the case of glibc it is an 8-byte structure, so is not a high
 199    overhead operation.  In any case, it's out of the fast path.  */
 200 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
 201 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
 202
 203 /* Skips any escaped newlines introduced by NEXT, which is either a
 204    '?' or a '\\'.  Returns the next character, which will also have
 205    been placed in buffer->read_ahead.  This routine performs
 206    preprocessing stages 1 and 2 of the ISO C standard.  */
 207 static cppchar_t
 208 skip_escaped_newlines (buffer, next)
 209      cpp_buffer *buffer;
 210      cppchar_t next;
 211 {
 212   /* Only do this if we apply stages 1 and 2.  */
 213   if (!buffer->from_stage3)
 214     {
 215       cppchar_t next1;
 216       const unsigned char *saved_cur;
 217       int space;
 218
 219       do
 220         {
 221           if (buffer->cur == buffer->rlimit)
 222             break;
 223
 224           SAVE_STATE ();
 225           if (next == '?')
 226             {
 227               next1 = *buffer->cur++;
 228               if (next1 != '?' || buffer->cur == buffer->rlimit)
 229                 {
 230                   RESTORE_STATE ();
 231                   break;
 232                 }
 233
 234               next1 = *buffer->cur++;
 235               if (!_cpp_trigraph_map[next1]
 236                   || !trigraph_ok (buffer->pfile, next1))
 237                 {
 238                   RESTORE_STATE ();
 239                   break;
 240                 }
 241
 242               /* We have a full trigraph here.  */
 243               next = _cpp_trigraph_map[next1];
 244               if (next != '\\' || buffer->cur == buffer->rlimit)
 245                 break;
 246               SAVE_STATE ();
 247             }
 248
 249           /* We have a backslash, and room for at least one more character.  */
 250           space = 0;
 251           do
 252             {
 253               next1 = *buffer->cur++;
 254               if (!is_nvspace (next1))
 255                 break;
 256               space = 1;
 257             }
 258           while (buffer->cur < buffer->rlimit);
 259
 260           if (!is_vspace (next1))
 261             {
 262               RESTORE_STATE ();
 263               break;
 264             }
 265
 266           if (space && !buffer->pfile->state.lexing_comment)
 267             cpp_warning (buffer->pfile,
 268                          "backslash and newline separated by space");
 269
 270           next = handle_newline (buffer, next1);
 271           if (next == EOF)
 272             cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
 273         }
 274       while (next == '\\' || next == '?');
 275     }
 276
 277   buffer->read_ahead = next;
 278   return next;
 279 }
 280
 281 /* Obtain the next character, after trigraph conversion and skipping
 282    an arbitrary string of escaped newlines.  The common case of no
 283    trigraphs or escaped newlines falls through quickly.  */
 284 static cppchar_t
 285 get_effective_char (buffer)
 286      cpp_buffer *buffer;
 287 {
 288   cppchar_t next = EOF;
 289
 290   if (buffer->cur < buffer->rlimit)
 291     {
 292       next = *buffer->cur++;
 293
 294       /* '?' can introduce trigraphs (and therefore backslash); '\\'
 295          can introduce escaped newlines, which we want to skip, or
 296          UCNs, which, depending upon lexer state, we will handle in
 297          the future.  */
 298       if (next == '?' || next == '\\')
 299         next = skip_escaped_newlines (buffer, next);
 300     }
 301
 302   buffer->read_ahead = next;
 303   return next;
 304 }
 305
 306 /* Skip a C-style block comment.  We find the end of the comment by
 307    seeing if an asterisk is before every '/' we encounter.  Returns
 308    non-zero if comment terminated by EOF, zero otherwise.  */
 309 static int
 310 skip_block_comment (pfile)
 311      cpp_reader *pfile;
 312 {
 313   cpp_buffer *buffer = pfile->buffer;
 314   cppchar_t c = EOF, prevc = EOF;
 315
 316   pfile->state.lexing_comment = 1;
 317   while (buffer->cur != buffer->rlimit)
 318     {
 319       prevc = c, c = *buffer->cur++;
 320
 321     next_char:
 322       /* FIXME: For speed, create a new character class of characters
 323          of interest inside block comments.  */
 324       if (c == '?' || c == '\\')
 325         c = skip_escaped_newlines (buffer, c);
 326
 327       /* People like decorating comments with '*', so check for '/'
 328          instead for efficiency.  */
 329       if (c == '/')
 330         {
 331           if (prevc == '*')
 332             break;
 333
 334           /* Warn about potential nested comments, but not if the '/'
 335              comes immediately before the true comment delimeter.
 336              Don't bother to get it right across escaped newlines.  */
 337           if (CPP_OPTION (pfile, warn_comments)
 338               && buffer->cur != buffer->rlimit)
 339             {
 340               prevc = c, c = *buffer->cur++;
 341               if (c == '*' && buffer->cur != buffer->rlimit)
 342                 {
 343                   prevc = c, c = *buffer->cur++;
 344                   if (c != '/')
 345                     cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
 346                                            CPP_BUF_COL (buffer),
 347                                            "\"/*\" within comment");
 348                 }
 349               goto next_char;
 350             }
 351         }
 352       else if (is_vspace (c))
 353         {
 354           prevc = c, c = handle_newline (buffer, c);
 355           goto next_char;
 356         }
 357       else if (c == '\t')
 358         adjust_column (pfile);
 359     }
 360
 361   pfile->state.lexing_comment = 0;
 362   buffer->read_ahead = EOF;
 363   return c != '/' || prevc != '*';
 364 }
 365
 366 /* Skip a C++ line comment.  Handles escaped newlines.  Returns
 367    non-zero if a multiline comment.  The following new line, if any,
 368    is left in buffer->read_ahead.  */
 369 static int
 370 skip_line_comment (pfile)
 371      cpp_reader *pfile;
 372 {
 373   cpp_buffer *buffer = pfile->buffer;
 374   unsigned int orig_lineno = buffer->lineno;
 375   cppchar_t c;
 376
 377   pfile->state.lexing_comment = 1;
 378   do
 379     {
 380       c = EOF;
 381       if (buffer->cur == buffer->rlimit)
 382         break;
 383
 384       c = *buffer->cur++;
 385       if (c == '?' || c == '\\')
 386         c = skip_escaped_newlines (buffer, c);
 387     }
 388   while (!is_vspace (c));
 389
 390   pfile->state.lexing_comment = 0;
 391   buffer->read_ahead = c;       /* Leave any newline for caller.  */
 392   return orig_lineno != buffer->lineno;
 393 }
 394
 395 /* pfile->buffer->cur is one beyond the \t character.  Update
 396    col_adjust so we track the column correctly.  */
 397 static void
 398 adjust_column (pfile)
 399      cpp_reader *pfile;
 400 {
 401   cpp_buffer *buffer = pfile->buffer;
 402   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 403
 404   /* Round it up to multiple of the tabstop, but subtract 1 since the
 405      tab itself occupies a character position.  */
 406   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 407                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 408 }
 409
 410 /* Skips whitespace, saving the next non-whitespace character.
 411    Adjusts pfile->col_adjust to account for tabs.  Without this,
 412    tokens might be assigned an incorrect column.  */
 413 static void
 414 skip_whitespace (pfile, c)
 415      cpp_reader *pfile;
 416      cppchar_t c;
 417 {
 418   cpp_buffer *buffer = pfile->buffer;
 419   unsigned int warned = 0;
 420
 421   do
 422     {
 423       /* Horizontal space always OK.  */
 424       if (c == ' ')
 425         ;
 426       else if (c == '\t')
 427         adjust_column (pfile);
 428       /* Just \f \v or \0 left.  */
 429       else if (c == '\0')
 430         {
 431           if (!warned)
 432             {
 433               cpp_warning (pfile, "null character(s) ignored");
 434               warned = 1;
 435             }
 436         }
 437       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 438         cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
 439                                CPP_BUF_COL (buffer),
 440                                "%s in preprocessing directive",
 441                                c == '\f' ? "form feed" : "vertical tab");
 442
 443       c = EOF;
 444       if (buffer->cur == buffer->rlimit)
 445         break;
 446       c = *buffer->cur++;
 447     }
 448   /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
 449   while (is_nvspace (c));
 450
 451   /* Remember the next character.  */
 452   buffer->read_ahead = c;
 453 }
 454
 455 /* See if the characters of a number token are valid in a name (no
 456    '.', '+' or '-').  */
 457 static int
 458 name_p (pfile, string)
 459      cpp_reader *pfile;
 460      const cpp_string *string;
 461 {
 462   unsigned int i;
 463
 464   for (i = 0; i < string->len; i++)
 465     if (!is_idchar (string->text[i]))
 466       return 0;
 467
 468   return 1;
 469 }
 470
 471 /* Parse an identifier, skipping embedded backslash-newlines.
 472    Calculate the hash value of the token while parsing, for improved
 473    performance.  The hashing algorithm *must* match cpp_lookup().  */
 474
 475 static cpp_hashnode *
 476 parse_identifier (pfile, c)
 477      cpp_reader *pfile;
 478      cppchar_t c;
 479 {
 480   cpp_hashnode *result;
 481   cpp_buffer *buffer = pfile->buffer;
 482   unsigned int saw_dollar = 0, len;
 483   struct obstack *stack = &pfile->hash_table->stack;
 484
 485   do
 486     {
 487       do
 488         {
 489           obstack_1grow (stack, c);
 490
 491           if (c == '$')
 492             saw_dollar++;
 493
 494           c = EOF;
 495           if (buffer->cur == buffer->rlimit)
 496             break;
 497
 498           c = *buffer->cur++;
 499         }
 500       while (is_idchar (c));
 501
 502       /* Potential escaped newline?  */
 503       if (c != '?' && c != '\\')
 504         break;
 505       c = skip_escaped_newlines (buffer, c);
 506     }
 507   while (is_idchar (c));
 508
 509   /* Remember the next character.  */
 510   buffer->read_ahead = c;
 511
 512   /* $ is not a identifier character in the standard, but is commonly
 513      accepted as an extension.  Don't warn about it in skipped
 514      conditional blocks.  */
 515   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->skipping)
 516     cpp_pedwarn (pfile, "'$' character(s) in identifier");
 517
 518   /* Identifiers are null-terminated.  */
 519   len = obstack_object_size (stack);
 520   obstack_1grow (stack, '\0');
 521
 522   /* This routine commits the memory if necessary.  */
 523   result = (cpp_hashnode *)
 524     ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
 525
 526   /* Some identifiers require diagnostics when lexed.  */
 527   if (result->flags & NODE_DIAGNOSTIC && !pfile->skipping)
 528     {
 529       /* It is allowed to poison the same identifier twice.  */
 530       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 531         cpp_error (pfile, "attempt to use poisoned \"%s\"",
 532                    NODE_NAME (result));
 533
 534       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 535          replacement list of a variadic macro.  */
 536       if (result == pfile->spec_nodes.n__VA_ARGS__
 537           && !pfile->state.va_args_ok)
 538         cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 539     }
 540
 541   return result;
 542 }
 543
 544 /* Parse a number, skipping embedded backslash-newlines.  */
 545 static void
 546 parse_number (pfile, number, c, leading_period)
 547      cpp_reader *pfile;
 548      cpp_string *number;
 549      cppchar_t c;
 550      int leading_period;
 551 {
 552   cpp_buffer *buffer = pfile->buffer;
 553   cpp_pool *pool = &pfile->ident_pool;
 554   unsigned char *dest, *limit;
 555
 556   dest = POOL_FRONT (pool);
 557   limit = POOL_LIMIT (pool);
 558
 559   /* Place a leading period.  */
 560   if (leading_period)
 561     {
 562       if (dest >= limit)
 563         limit = _cpp_next_chunk (pool, 0, &dest);
 564       *dest++ = '.';
 565     }
 566
 567   do
 568     {
 569       do
 570         {
 571           /* Need room for terminating null.  */
 572           if (dest + 1 >= limit)
 573             limit = _cpp_next_chunk (pool, 0, &dest);
 574           *dest++ = c;
 575
 576           c = EOF;
 577           if (buffer->cur == buffer->rlimit)
 578             break;
 579
 580           c = *buffer->cur++;
 581         }
 582       while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 583
 584       /* Potential escaped newline?  */
 585       if (c != '?' && c != '\\')
 586         break;
 587       c = skip_escaped_newlines (buffer, c);
 588     }
 589   while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 590
 591   /* Remember the next character.  */
 592   buffer->read_ahead = c;
 593
 594   /* Null-terminate the number.  */
 595   *dest = '\0';
 596
 597   number->text = POOL_FRONT (pool);
 598   number->len = dest - number->text;
 599   POOL_COMMIT (pool, number->len + 1);
 600 }
 601
 602 /* Subroutine of parse_string.  Emits error for unterminated strings.  */
 603 static void
 604 unterminated (pfile, term)
 605      cpp_reader *pfile;
 606      int term;
 607 {
 608   cpp_error (pfile, "missing terminating %c character", term);
 609
 610   if (term == '\"' && pfile->mlstring_pos.line
 611       && pfile->mlstring_pos.line != pfile->lexer_pos.line)
 612     {
 613       cpp_error_with_line (pfile, pfile->mlstring_pos.line,
 614                            pfile->mlstring_pos.col,
 615                            "possible start of unterminated string literal");
 616       pfile->mlstring_pos.line = 0;
 617     }
 618 }
 619
 620 /* Subroutine of parse_string.  */
 621 static int
 622 unescaped_terminator_p (pfile, dest)
 623      cpp_reader *pfile;
 624      const unsigned char *dest;
 625 {
 626   const unsigned char *start, *temp;
 627
 628   /* In #include-style directives, terminators are not escapeable.  */
 629   if (pfile->state.angled_headers)
 630     return 1;
 631
 632   start = POOL_FRONT (&pfile->ident_pool);
 633
 634   /* An odd number of consecutive backslashes represents an escaped
 635      terminator.  */
 636   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 637     ;
 638
 639   return ((dest - temp) & 1) == 0;
 640 }
 641
 642 /* Parses a string, character constant, or angle-bracketed header file
 643    name.  Handles embedded trigraphs and escaped newlines.  The stored
 644    string is guaranteed NUL-terminated, but it is not guaranteed that
 645    this is the first NUL since embedded NULs are preserved.
 646
 647    Multi-line strings are allowed, but they are deprecated.  */
 648 static void
 649 parse_string (pfile, token, terminator)
 650      cpp_reader *pfile;
 651      cpp_token *token;
 652      cppchar_t terminator;
 653 {
 654   cpp_buffer *buffer = pfile->buffer;
 655   cpp_pool *pool = &pfile->ident_pool;
 656   unsigned char *dest, *limit;
 657   cppchar_t c;
 658   unsigned int nulls = 0;
 659
 660   dest = POOL_FRONT (pool);
 661   limit = POOL_LIMIT (pool);
 662
 663   for (;;)
 664     {
 665       if (buffer->cur == buffer->rlimit)
 666         c = EOF;
 667       else
 668         c = *buffer->cur++;
 669
 670     have_char:
 671       /* We need space for the terminating NUL.  */
 672       if (dest >= limit)
 673         limit = _cpp_next_chunk (pool, 0, &dest);
 674
 675       if (c == EOF)
 676         {
 677           unterminated (pfile, terminator);
 678           break;
 679         }
 680
 681       /* Handle trigraphs, escaped newlines etc.  */
 682       if (c == '?' || c == '\\')
 683         c = skip_escaped_newlines (buffer, c);
 684
 685       if (c == terminator && unescaped_terminator_p (pfile, dest))
 686         {
 687           c = EOF;
 688           break;
 689         }
 690       else if (is_vspace (c))
 691         {
 692           /* In assembly language, silently terminate string and
 693              character literals at end of line.  This is a kludge
 694              around not knowing where comments are.  */
 695           if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
 696             break;
 697
 698           /* Character constants and header names may not extend over
 699              multiple lines.  In Standard C, neither may strings.
 700              Unfortunately, we accept multiline strings as an
 701              extension, except in #include family directives.  */
 702           if (terminator != '"' || pfile->state.angled_headers)
 703             {
 704               unterminated (pfile, terminator);
 705               break;
 706             }
 707
 708           cpp_pedwarn (pfile, "multi-line string literals are deprecated");
 709           if (pfile->mlstring_pos.line == 0)
 710             pfile->mlstring_pos = pfile->lexer_pos;
 711
 712           c = handle_newline (buffer, c);
 713           *dest++ = '\n';
 714           goto have_char;
 715         }
 716       else if (c == '\0')
 717         {
 718           if (nulls++ == 0)
 719             cpp_warning (pfile, "null character(s) preserved in literal");
 720         }
 721
 722       *dest++ = c;
 723     }
 724
 725   /* Remember the next character.  */
 726   buffer->read_ahead = c;
 727   *dest = '\0';
 728
 729   token->val.str.text = POOL_FRONT (pool);
 730   token->val.str.len = dest - token->val.str.text;
 731   POOL_COMMIT (pool, token->val.str.len + 1);
 732 }
 733
 734 /* The stored comment includes the comment start and any terminator.  */
 735 static void
 736 save_comment (pfile, token, from)
 737      cpp_reader *pfile;
 738      cpp_token *token;
 739      const unsigned char *from;
 740 {
 741   unsigned char *buffer;
 742   unsigned int len;
 743
 744   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 745   /* C++ comments probably (not definitely) have moved past a new
 746      line, which we don't want to save in the comment.  */
 747   if (pfile->buffer->read_ahead != EOF)
 748     len--;
 749   buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
 750
 751   token->type = CPP_COMMENT;
 752   token->val.str.len = len;
 753   token->val.str.text = buffer;
 754
 755   buffer[0] = '/';
 756   memcpy (buffer + 1, from, len - 1);
 757 }
 758
 759 /* Subroutine of lex_token to handle '%'.  A little tricky, since we
 760    want to avoid stepping back when lexing %:%X.  */
 761 static void
 762 lex_percent (buffer, result)
 763      cpp_buffer *buffer;
 764      cpp_token *result;
 765 {
 766   cppchar_t c;
 767
 768   result->type = CPP_MOD;
 769   /* Parsing %:%X could leave an extra character.  */
 770   if (buffer->extra_char == EOF)
 771     c = get_effective_char (buffer);
 772   else
 773     {
 774       c = buffer->read_ahead = buffer->extra_char;
 775       buffer->extra_char = EOF;
 776     }
 777
 778   if (c == '=')
 779     ACCEPT_CHAR (CPP_MOD_EQ);
 780   else if (CPP_OPTION (buffer->pfile, digraphs))
 781     {
 782       if (c == ':')
 783         {
 784           result->flags |= DIGRAPH;
 785           ACCEPT_CHAR (CPP_HASH);
 786           if (get_effective_char (buffer) == '%')
 787             {
 788               buffer->extra_char = get_effective_char (buffer);
 789               if (buffer->extra_char == ':')
 790                 {
 791                   buffer->extra_char = EOF;
 792                   ACCEPT_CHAR (CPP_PASTE);
 793                 }
 794               else
 795                 /* We'll catch the extra_char when we're called back.  */
 796                 buffer->read_ahead = '%';
 797             }
 798         }
 799       else if (c == '>')
 800         {
 801           result->flags |= DIGRAPH;
 802           ACCEPT_CHAR (CPP_CLOSE_BRACE);
 803         }
 804     }
 805 }
 806
 807 /* Subroutine of lex_token to handle '.'.  This is tricky, since we
 808    want to avoid stepping back when lexing '...' or '.123'.  In the
 809    latter case we should also set a flag for parse_number.  */
 810 static void
 811 lex_dot (pfile, result)
 812      cpp_reader *pfile;
 813      cpp_token *result;
 814 {
 815   cpp_buffer *buffer = pfile->buffer;
 816   cppchar_t c;
 817
 818   /* Parsing ..X could leave an extra character.  */
 819   if (buffer->extra_char == EOF)
 820     c = get_effective_char (buffer);
 821   else
 822     {
 823       c = buffer->read_ahead = buffer->extra_char;
 824       buffer->extra_char = EOF;
 825     }
 826
 827   /* All known character sets have 0...9 contiguous.  */
 828   if (c >= '0' && c <= '9')
 829     {
 830       result->type = CPP_NUMBER;
 831       parse_number (pfile, &result->val.str, c, 1);
 832     }
 833   else
 834     {
 835       result->type = CPP_DOT;
 836       if (c == '.')
 837         {
 838           buffer->extra_char = get_effective_char (buffer);
 839           if (buffer->extra_char == '.')
 840             {
 841               buffer->extra_char = EOF;
 842               ACCEPT_CHAR (CPP_ELLIPSIS);
 843             }
 844           else
 845             /* We'll catch the extra_char when we're called back.  */
 846             buffer->read_ahead = '.';
 847         }
 848       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
 849         ACCEPT_CHAR (CPP_DOT_STAR);
 850     }
 851 }
 852
 853 void
 854 _cpp_lex_token (pfile, result)
 855      cpp_reader *pfile;
 856      cpp_token *result;
 857 {
 858   cppchar_t c;
 859   cpp_buffer *buffer;
 860   const unsigned char *comment_start;
 861   unsigned char bol;
 862
 863  skip:
 864   bol = pfile->state.next_bol;
 865  done_directive:
 866   buffer = pfile->buffer;
 867   pfile->state.next_bol = 0;
 868   result->flags = buffer->saved_flags;
 869   buffer->saved_flags = 0;
 870  next_char:
 871   pfile->lexer_pos.line = buffer->lineno;
 872  next_char2:
 873   pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
 874
 875   c = buffer->read_ahead;
 876   if (c == EOF && buffer->cur < buffer->rlimit)
 877     {
 878       c = *buffer->cur++;
 879       pfile->lexer_pos.col++;
 880     }
 881
 882  do_switch:
 883   buffer->read_ahead = EOF;
 884   switch (c)
 885     {
 886     case EOF:
 887       /* Non-empty files should end in a newline.  Checking "bol" too
 888           prevents multiple warnings when hitting the EOF more than
 889           once, like in a directive.  Don't warn for command line and
 890           _Pragma buffers.  */
 891       if (pfile->lexer_pos.col != 0 && !bol && !buffer->from_stage3)
 892         cpp_pedwarn (pfile, "no newline at end of file");
 893       pfile->state.next_bol = 1;
 894       pfile->skipping = 0;      /* In case missing #endif.  */
 895       result->type = CPP_EOF;
 896       /* Don't do MI optimisation.  */
 897       return;
 898
 899     case ' ': case '\t': case '\f': case '\v': case '\0':
 900       skip_whitespace (pfile, c);
 901       result->flags |= PREV_WHITE;
 902       goto next_char2;
 903
 904     case '\n': case '\r':
 905       if (!pfile->state.in_directive)
 906         {
 907           handle_newline (buffer, c);
 908           bol = 1;
 909           pfile->lexer_pos.output_line = buffer->lineno;
 910           /* This is a new line, so clear any white space flag.
 911              Newlines in arguments are white space (6.10.3.10);
 912              parse_arg takes care of that.  */
 913           result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
 914           goto next_char;
 915         }
 916
 917       /* Don't let directives spill over to the next line.  */
 918       buffer->read_ahead = c;
 919       pfile->state.next_bol = 1;
 920       result->type = CPP_EOF;
 921       /* Don't break; pfile->skipping might be true.  */
 922       return;
 923
 924     case '?':
 925     case '\\':
 926       /* These could start an escaped newline, or '?' a trigraph.  Let
 927          skip_escaped_newlines do all the work.  */
 928       {
 929         unsigned int lineno = buffer->lineno;
 930
 931         c = skip_escaped_newlines (buffer, c);
 932         if (lineno != buffer->lineno)
 933           /* We had at least one escaped newline of some sort, and the
 934              next character is in buffer->read_ahead.  Update the
 935              token's line and column.  */
 936             goto next_char;
 937
 938         /* We are either the original '?' or '\\', or a trigraph.  */
 939         result->type = CPP_QUERY;
 940         buffer->read_ahead = EOF;
 941         if (c == '\\')
 942           goto random_char;
 943         else if (c != '?')
 944           goto do_switch;
 945       }
 946       break;
 947
 948     case '0': case '1': case '2': case '3': case '4':
 949     case '5': case '6': case '7': case '8': case '9':
 950       result->type = CPP_NUMBER;
 951       parse_number (pfile, &result->val.str, c, 0);
 952       break;
 953
 954     case '$':
 955       if (!CPP_OPTION (pfile, dollars_in_ident))
 956         goto random_char;
 957       /* Fall through... */
 958
 959     case '_':
 960     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 961     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 962     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 963     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 964     case 'y': case 'z':
 965     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 966     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 967     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 968     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 969     case 'Y': case 'Z':
 970       result->type = CPP_NAME;
 971       result->val.node = parse_identifier (pfile, c);
 972
 973       /* 'L' may introduce wide characters or strings.  */
 974       if (result->val.node == pfile->spec_nodes.n_L)
 975         {
 976           c = buffer->read_ahead; /* For make_string.  */
 977           if (c == '\'' || c == '"')
 978             {
 979               ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
 980               goto make_string;
 981             }
 982         }
 983       /* Convert named operators to their proper types.  */
 984       else if (result->val.node->flags & NODE_OPERATOR)
 985         {
 986           result->flags |= NAMED_OP;
 987           result->type = result->val.node->value.operator;
 988         }
 989       break;
 990
 991     case '\'':
 992     case '"':
 993       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
 994     make_string:
 995       parse_string (pfile, result, c);
 996       break;
 997
 998     case '/':
 999       /* A potential block or line comment.  */
1000       comment_start = buffer->cur;
1001       result->type = CPP_DIV;
1002       c = get_effective_char (buffer);
1003       if (c == '=')
1004         ACCEPT_CHAR (CPP_DIV_EQ);
1005       if (c != '/' && c != '*')
1006         break;
1007       if (buffer->from_stage3)
1008         break;
1009
1010       if (c == '*')
1011         {
1012           if (skip_block_comment (pfile))
1013             cpp_error_with_line (pfile, pfile->lexer_pos.line,
1014                                  pfile->lexer_pos.col,
1015                                  "unterminated comment");
1016         }
1017       else
1018         {
1019           if (!CPP_OPTION (pfile, cplusplus_comments)
1020               && !CPP_IN_SYSTEM_HEADER (pfile))
1021             break;
1022
1023           /* Warn about comments only if pedantically GNUC89, and not
1024              in system headers.  */
1025           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1026               && ! buffer->warned_cplusplus_comments)
1027             {
1028               cpp_pedwarn (pfile,
1029                            "C++ style comments are not allowed in ISO C89");
1030               cpp_pedwarn (pfile,
1031                            "(this will be reported only once per input file)");
1032               buffer->warned_cplusplus_comments = 1;
1033             }
1034
1035           /* Skip_line_comment updates buffer->read_ahead.  */
1036           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1037             cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1038                                    pfile->lexer_pos.col,
1039                                    "multi-line comment");
1040         }
1041
1042       /* Skipping the comment has updated buffer->read_ahead.  */
1043       if (!pfile->state.save_comments)
1044         {
1045           result->flags |= PREV_WHITE;
1046           goto next_char;
1047         }
1048
1049       /* Save the comment as a token in its own right.  */
1050       save_comment (pfile, result, comment_start);
1051       /* Don't do MI optimisation.  */
1052       return;
1053
1054     case '<':
1055       if (pfile->state.angled_headers)
1056         {
1057           result->type = CPP_HEADER_NAME;
1058           c = '>';              /* terminator.  */
1059           goto make_string;
1060         }
1061
1062       result->type = CPP_LESS;
1063       c = get_effective_char (buffer);
1064       if (c == '=')
1065         ACCEPT_CHAR (CPP_LESS_EQ);
1066       else if (c == '<')
1067         {
1068           ACCEPT_CHAR (CPP_LSHIFT);
1069           if (get_effective_char (buffer) == '=')
1070             ACCEPT_CHAR (CPP_LSHIFT_EQ);
1071         }
1072       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1073         {
1074           ACCEPT_CHAR (CPP_MIN);
1075           if (get_effective_char (buffer) == '=')
1076             ACCEPT_CHAR (CPP_MIN_EQ);
1077         }
1078       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1079         {
1080           ACCEPT_CHAR (CPP_OPEN_SQUARE);
1081           result->flags |= DIGRAPH;
1082         }
1083       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1084         {
1085           ACCEPT_CHAR (CPP_OPEN_BRACE);
1086           result->flags |= DIGRAPH;
1087         }
1088       break;
1089
1090     case '>':
1091       result->type = CPP_GREATER;
1092       c = get_effective_char (buffer);
1093       if (c == '=')
1094         ACCEPT_CHAR (CPP_GREATER_EQ);
1095       else if (c == '>')
1096         {
1097           ACCEPT_CHAR (CPP_RSHIFT);
1098           if (get_effective_char (buffer) == '=')
1099             ACCEPT_CHAR (CPP_RSHIFT_EQ);
1100         }
1101       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1102         {
1103           ACCEPT_CHAR (CPP_MAX);
1104           if (get_effective_char (buffer) == '=')
1105             ACCEPT_CHAR (CPP_MAX_EQ);
1106         }
1107       break;
1108
1109     case '%':
1110       lex_percent (buffer, result);
1111       if (result->type == CPP_HASH)
1112         goto do_hash;
1113       break;
1114
1115     case '.':
1116       lex_dot (pfile, result);
1117       break;
1118
1119     case '+':
1120       result->type = CPP_PLUS;
1121       c = get_effective_char (buffer);
1122       if (c == '=')
1123         ACCEPT_CHAR (CPP_PLUS_EQ);
1124       else if (c == '+')
1125         ACCEPT_CHAR (CPP_PLUS_PLUS);
1126       break;
1127
1128     case '-':
1129       result->type = CPP_MINUS;
1130       c = get_effective_char (buffer);
1131       if (c == '>')
1132         {
1133           ACCEPT_CHAR (CPP_DEREF);
1134           if (CPP_OPTION (pfile, cplusplus)
1135               && get_effective_char (buffer) == '*')
1136             ACCEPT_CHAR (CPP_DEREF_STAR);
1137         }
1138       else if (c == '=')
1139         ACCEPT_CHAR (CPP_MINUS_EQ);
1140       else if (c == '-')
1141         ACCEPT_CHAR (CPP_MINUS_MINUS);
1142       break;
1143
1144     case '*':
1145       result->type = CPP_MULT;
1146       if (get_effective_char (buffer) == '=')
1147         ACCEPT_CHAR (CPP_MULT_EQ);
1148       break;
1149
1150     case '=':
1151       result->type = CPP_EQ;
1152       if (get_effective_char (buffer) == '=')
1153         ACCEPT_CHAR (CPP_EQ_EQ);
1154       break;
1155
1156     case '!':
1157       result->type = CPP_NOT;
1158       if (get_effective_char (buffer) == '=')
1159         ACCEPT_CHAR (CPP_NOT_EQ);
1160       break;
1161
1162     case '&':
1163       result->type = CPP_AND;
1164       c = get_effective_char (buffer);
1165       if (c == '=')
1166         ACCEPT_CHAR (CPP_AND_EQ);
1167       else if (c == '&')
1168         ACCEPT_CHAR (CPP_AND_AND);
1169       break;
1170
1171     case '#':
1172       c = buffer->extra_char;   /* Can be set by error condition below.  */
1173       if (c != EOF)
1174         {
1175           buffer->read_ahead = c;
1176           buffer->extra_char = EOF;
1177         }
1178       else
1179         c = get_effective_char (buffer);
1180
1181       if (c == '#')
1182         {
1183           ACCEPT_CHAR (CPP_PASTE);
1184           break;
1185         }
1186
1187       result->type = CPP_HASH;
1188     do_hash:
1189       if (!bol)
1190         break;
1191       /* 6.10.3 paragraph 11: If there are sequences of preprocessing
1192          tokens within the list of arguments that would otherwise act
1193          as preprocessing directives, the behavior is undefined.
1194
1195          This implementation will report a hard error, terminate the
1196          macro invocation, and proceed to process the directive.  */
1197       if (pfile->state.parsing_args)
1198         {
1199           if (pfile->state.parsing_args == 2)
1200             cpp_error (pfile,
1201                        "directives may not be used inside a macro argument");
1202
1203           /* Put a '#' in lookahead, return CPP_EOF for parse_arg.  */
1204           buffer->extra_char = buffer->read_ahead;
1205           buffer->read_ahead = '#';
1206           pfile->state.next_bol = 1;
1207           result->type = CPP_EOF;
1208
1209           /* Get whitespace right - newline_in_args sets it.  */
1210           if (pfile->lexer_pos.col == 1)
1211             result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
1212         }
1213       else
1214         {
1215           /* This is the hash introducing a directive.  */
1216           if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1217             goto done_directive; /* bol still 1.  */
1218           /* This is in fact an assembler #.  */
1219         }
1220       break;
1221
1222     case '|':
1223       result->type = CPP_OR;
1224       c = get_effective_char (buffer);
1225       if (c == '=')
1226         ACCEPT_CHAR (CPP_OR_EQ);
1227       else if (c == '|')
1228         ACCEPT_CHAR (CPP_OR_OR);
1229       break;
1230
1231     case '^':
1232       result->type = CPP_XOR;
1233       if (get_effective_char (buffer) == '=')
1234         ACCEPT_CHAR (CPP_XOR_EQ);
1235       break;
1236
1237     case ':':
1238       result->type = CPP_COLON;
1239       c = get_effective_char (buffer);
1240       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1241         ACCEPT_CHAR (CPP_SCOPE);
1242       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1243         {
1244           result->flags |= DIGRAPH;
1245           ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1246         }
1247       break;
1248
1249     case '~': result->type = CPP_COMPL; break;
1250     case ',': result->type = CPP_COMMA; break;
1251     case '(': result->type = CPP_OPEN_PAREN; break;
1252     case ')': result->type = CPP_CLOSE_PAREN; break;
1253     case '[': result->type = CPP_OPEN_SQUARE; break;
1254     case ']': result->type = CPP_CLOSE_SQUARE; break;
1255     case '{': result->type = CPP_OPEN_BRACE; break;
1256     case '}': result->type = CPP_CLOSE_BRACE; break;
1257     case ';': result->type = CPP_SEMICOLON; break;
1258
1259       /* @ is a punctuator in Objective C.  */
1260     case '@': result->type = CPP_ATSIGN; break;
1261
1262     random_char:
1263     default:
1264       result->type = CPP_OTHER;
1265       result->val.c = c;
1266       break;
1267     }
1268
1269   if (pfile->skipping)
1270     goto skip;
1271
1272   /* If not in a directive, this token invalidates controlling macros.  */
1273   if (!pfile->state.in_directive)
1274     pfile->mi_state = MI_FAILED;
1275 }
1276
1277 /* An upper bound on the number of bytes needed to spell a token,
1278    including preceding whitespace.  */
1279 unsigned int
1280 cpp_token_len (token)
1281      const cpp_token *token;
1282 {
1283   unsigned int len;
1284
1285   switch (TOKEN_SPELL (token))
1286     {
1287     default:            len = 0;                                break;
1288     case SPELL_STRING:  len = token->val.str.len;               break;
1289     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1290     }
1291   /* 1 for whitespace, 4 for comment delimeters.  */
1292   return len + 5;
1293 }
1294
1295 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1296    already contain the enough space to hold the token's spelling.
1297    Returns a pointer to the character after the last character
1298    written.  */
1299 unsigned char *
1300 cpp_spell_token (pfile, token, buffer)
1301      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1302      const cpp_token *token;
1303      unsigned char *buffer;
1304 {
1305   switch (TOKEN_SPELL (token))
1306     {
1307     case SPELL_OPERATOR:
1308       {
1309         const unsigned char *spelling;
1310         unsigned char c;
1311
1312         if (token->flags & DIGRAPH)
1313           spelling
1314             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1315         else if (token->flags & NAMED_OP)
1316           goto spell_ident;
1317         else
1318           spelling = TOKEN_NAME (token);
1319
1320         while ((c = *spelling++) != '\0')
1321           *buffer++ = c;
1322       }
1323       break;
1324
1325     case SPELL_IDENT:
1326       spell_ident:
1327       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1328       buffer += NODE_LEN (token->val.node);
1329       break;
1330
1331     case SPELL_STRING:
1332       {
1333         int left, right, tag;
1334         switch (token->type)
1335           {
1336           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1337           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1338           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1339           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1340           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1341           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1342           }
1343         if (tag) *buffer++ = tag;
1344         if (left) *buffer++ = left;
1345         memcpy (buffer, token->val.str.text, token->val.str.len);
1346         buffer += token->val.str.len;
1347         if (right) *buffer++ = right;
1348       }
1349       break;
1350
1351     case SPELL_CHAR:
1352       *buffer++ = token->val.c;
1353       break;
1354
1355     case SPELL_NONE:
1356       cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1357       break;
1358     }
1359
1360   return buffer;
1361 }
1362
1363 /* Returns a token as a null-terminated string.  The string is
1364    temporary, and automatically freed later.  Useful for diagnostics.  */
1365 unsigned char *
1366 cpp_token_as_text (pfile, token)
1367      cpp_reader *pfile;
1368      const cpp_token *token;
1369 {
1370   unsigned int len = cpp_token_len (token);
1371   unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
1372
1373   end = cpp_spell_token (pfile, token, start);
1374   end[0] = '\0';
1375
1376   return start;
1377 }
1378
1379 /* Used by C front ends.  Should really move to using cpp_token_as_text.  */
1380 const char *
1381 cpp_type2name (type)
1382      enum cpp_ttype type;
1383 {
1384   return (const char *) token_spellings[type].name;
1385 }
1386
1387 /* Writes the spelling of token to FP.  Separate from cpp_spell_token
1388    for efficiency - to avoid double-buffering.  Also, outputs a space
1389    if PREV_WHITE is flagged.  */
1390 void
1391 cpp_output_token (token, fp)
1392      const cpp_token *token;
1393      FILE *fp;
1394 {
1395   if (token->flags & PREV_WHITE)
1396     putc (' ', fp);
1397
1398   switch (TOKEN_SPELL (token))
1399     {
1400     case SPELL_OPERATOR:
1401       {
1402         const unsigned char *spelling;
1403
1404         if (token->flags & DIGRAPH)
1405           spelling
1406             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1407         else if (token->flags & NAMED_OP)
1408           goto spell_ident;
1409         else
1410           spelling = TOKEN_NAME (token);
1411
1412         ufputs (spelling, fp);
1413       }
1414       break;
1415
1416     spell_ident:
1417     case SPELL_IDENT:
1418       ufputs (NODE_NAME (token->val.node), fp);
1419     break;
1420
1421     case SPELL_STRING:
1422       {
1423         int left, right, tag;
1424         switch (token->type)
1425           {
1426           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1427           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1428           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1429           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1430           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1431           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1432           }
1433         if (tag) putc (tag, fp);
1434         if (left) putc (left, fp);
1435         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1436         if (right) putc (right, fp);
1437       }
1438       break;
1439
1440     case SPELL_CHAR:
1441       putc (token->val.c, fp);
1442       break;
1443
1444     case SPELL_NONE:
1445       /* An error, most probably.  */
1446       break;
1447     }
1448 }
1449
1450 /* Compare two tokens.  */
1451 int
1452 _cpp_equiv_tokens (a, b)
1453      const cpp_token *a, *b;
1454 {
1455   if (a->type == b->type && a->flags == b->flags)
1456     switch (TOKEN_SPELL (a))
1457       {
1458       default:                  /* Keep compiler happy.  */
1459       case SPELL_OPERATOR:
1460         return 1;
1461       case SPELL_CHAR:
1462         return a->val.c == b->val.c; /* Character.  */
1463       case SPELL_NONE:
1464         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1465       case SPELL_IDENT:
1466         return a->val.node == b->val.node;
1467       case SPELL_STRING:
1468         return (a->val.str.len == b->val.str.len
1469                 && !memcmp (a->val.str.text, b->val.str.text,
1470                             a->val.str.len));
1471       }
1472
1473   return 0;
1474 }
1475
1476 /* Determine whether two tokens can be pasted together, and if so,
1477    what the resulting token is.  Returns CPP_EOF if the tokens cannot
1478    be pasted, or the appropriate type for the merged token if they
1479    can.  */
1480 enum cpp_ttype
1481 cpp_can_paste (pfile, token1, token2, digraph)
1482      cpp_reader * pfile;
1483      const cpp_token *token1, *token2;
1484      int* digraph;
1485 {
1486   enum cpp_ttype a = token1->type, b = token2->type;
1487   int cxx = CPP_OPTION (pfile, cplusplus);
1488
1489   /* Treat named operators as if they were ordinary NAMEs.  */
1490   if (token1->flags & NAMED_OP)
1491     a = CPP_NAME;
1492   if (token2->flags & NAMED_OP)
1493     b = CPP_NAME;
1494
1495   if ((int) a <= (int) CPP_LAST_EQ && b == CPP_EQ)
1496     return (enum cpp_ttype) ((int) a + ((int) CPP_EQ_EQ - (int) CPP_EQ));
1497
1498   switch (a)
1499     {
1500     case CPP_GREATER:
1501       if (b == a) return CPP_RSHIFT;
1502       if (b == CPP_QUERY && cxx)        return CPP_MAX;
1503       if (b == CPP_GREATER_EQ)  return CPP_RSHIFT_EQ;
1504       break;
1505     case CPP_LESS:
1506       if (b == a) return CPP_LSHIFT;
1507       if (b == CPP_QUERY && cxx)        return CPP_MIN;
1508       if (b == CPP_LESS_EQ)     return CPP_LSHIFT_EQ;
1509       if (CPP_OPTION (pfile, digraphs))
1510         {
1511           if (b == CPP_COLON)
1512             {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1513           if (b == CPP_MOD)
1514             {*digraph = 1; return CPP_OPEN_BRACE;}      /* <% digraph */
1515         }
1516       break;
1517
1518     case CPP_PLUS: if (b == a)  return CPP_PLUS_PLUS; break;
1519     case CPP_AND:  if (b == a)  return CPP_AND_AND; break;
1520     case CPP_OR:   if (b == a)  return CPP_OR_OR;   break;
1521
1522     case CPP_MINUS:
1523       if (b == a)               return CPP_MINUS_MINUS;
1524       if (b == CPP_GREATER)     return CPP_DEREF;
1525       break;
1526     case CPP_COLON:
1527       if (b == a && cxx)        return CPP_SCOPE;
1528       if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1529         {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1530       break;
1531
1532     case CPP_MOD:
1533       if (CPP_OPTION (pfile, digraphs))
1534         {
1535           if (b == CPP_GREATER)
1536             {*digraph = 1; return CPP_CLOSE_BRACE;}  /* %> digraph */
1537           if (b == CPP_COLON)
1538             {*digraph = 1; return CPP_HASH;}         /* %: digraph */
1539         }
1540       break;
1541     case CPP_DEREF:
1542       if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1543       break;
1544     case CPP_DOT:
1545       if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1546       if (b == CPP_NUMBER)      return CPP_NUMBER;
1547       break;
1548
1549     case CPP_HASH:
1550       if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1551         /* %:%: digraph */
1552         {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1553       break;
1554
1555     case CPP_NAME:
1556       if (b == CPP_NAME)        return CPP_NAME;
1557       if (b == CPP_NUMBER
1558           && name_p (pfile, &token2->val.str)) return CPP_NAME;
1559       if (b == CPP_CHAR
1560           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1561       if (b == CPP_STRING
1562           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1563       break;
1564
1565     case CPP_NUMBER:
1566       if (b == CPP_NUMBER)      return CPP_NUMBER;
1567       if (b == CPP_NAME)        return CPP_NUMBER;
1568       if (b == CPP_DOT)         return CPP_NUMBER;
1569       /* Numbers cannot have length zero, so this is safe.  */
1570       if ((b == CPP_PLUS || b == CPP_MINUS)
1571           && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1572         return CPP_NUMBER;
1573       break;
1574
1575     default:
1576       break;
1577     }
1578
1579   return CPP_EOF;
1580 }
1581
1582 /* Returns nonzero if a space should be inserted to avoid an
1583    accidental token paste for output.  For simplicity, it is
1584    conservative, and occasionally advises a space where one is not
1585    needed, e.g. "." and ".2".  */
1586
1587 int
1588 cpp_avoid_paste (pfile, token1, token2)
1589      cpp_reader *pfile;
1590      const cpp_token *token1, *token2;
1591 {
1592   enum cpp_ttype a = token1->type, b = token2->type;
1593   cppchar_t c;
1594
1595   if (token1->flags & NAMED_OP)
1596     a = CPP_NAME;
1597   if (token2->flags & NAMED_OP)
1598     b = CPP_NAME;
1599
1600   c = EOF;
1601   if (token2->flags & DIGRAPH)
1602     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1603   else if (token_spellings[b].category == SPELL_OPERATOR)
1604     c = token_spellings[b].name[0];
1605
1606   /* Quickly get everything that can paste with an '='.  */
1607   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1608     return 1;
1609
1610   switch (a)
1611     {
1612     case CPP_GREATER:   return c == '>' || c == '?';
1613     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1614     case CPP_PLUS:      return c == '+';
1615     case CPP_MINUS:     return c == '-' || c == '>';
1616     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1617     case CPP_MOD:       return c == ':' || c == '>';
1618     case CPP_AND:       return c == '&';
1619     case CPP_OR:        return c == '|';
1620     case CPP_COLON:     return c == ':' || c == '>';
1621     case CPP_DEREF:     return c == '*';
1622     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1623     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1624     case CPP_NAME:      return ((b == CPP_NUMBER
1625                                  && name_p (pfile, &token2->val.str))
1626                                 || b == CPP_NAME
1627                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1628     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1629                                 || c == '.' || c == '+' || c == '-');
1630     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1631                                 && token1->val.c == '@'
1632                                 && (b == CPP_NAME || b == CPP_STRING));
1633     default:            break;
1634     }
1635
1636   return 0;
1637 }
1638
1639 /* Output all the remaining tokens on the current line, and a newline
1640    character, to FP.  Leading whitespace is removed.  */
1641 void
1642 cpp_output_line (pfile, fp)
1643      cpp_reader *pfile;
1644      FILE *fp;
1645 {
1646   cpp_token token;
1647
1648   cpp_get_token (pfile, &token);
1649   token.flags &= ~PREV_WHITE;
1650   while (token.type != CPP_EOF)
1651     {
1652       cpp_output_token (&token, fp);
1653       cpp_get_token (pfile, &token);
1654     }
1655
1656   putc ('\n', fp);
1657 }
1658
1659 /* Returns the value of a hexadecimal digit.  */
1660 static unsigned int
1661 hex_digit_value (c)
1662      unsigned int c;
1663 {
1664   if (c >= 'a' && c <= 'f')
1665     return c - 'a' + 10;
1666   if (c >= 'A' && c <= 'F')
1667     return c - 'A' + 10;
1668   if (c >= '0' && c <= '9')
1669     return c - '0';
1670   abort ();
1671 }
1672
1673 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence (C++ and C99).
1674
1675    [lex.charset]: The character designated by the universal character
1676    name \UNNNNNNNN is that character whose character short name in
1677    ISO/IEC 10646 is NNNNNNNN; the character designated by the
1678    universal character name \uNNNN is that character whose character
1679    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
1680    for a universal character name is less than 0x20 or in the range
1681    0x7F-0x9F (inclusive), or if the universal character name
1682    designates a character in the basic source character set, then the
1683    program is ill-formed.
1684
1685    We assume that wchar_t is Unicode, so we don't need to do any
1686    mapping.  Is this ever wrong?  */
1687
1688 static unsigned int
1689 read_ucs (pfile, pstr, limit, length)
1690      cpp_reader *pfile;
1691      const unsigned char **pstr;
1692      const unsigned char *limit;
1693      unsigned int length;
1694 {
1695   const unsigned char *p = *pstr;
1696   unsigned int c, code = 0;
1697
1698   for (; length; --length)
1699     {
1700       if (p >= limit)
1701         {
1702           cpp_error (pfile, "incomplete universal-character-name");
1703           break;
1704         }
1705
1706       c = *p;
1707       if (ISXDIGIT (c))
1708         {
1709           code = (code << 4) + hex_digit_value (c);
1710           p++;
1711         }
1712       else
1713         {
1714           cpp_error (pfile,
1715                      "non-hex digit '%c' in universal-character-name", c);
1716           break;
1717         }
1718
1719     }
1720
1721 #ifdef TARGET_EBCDIC
1722   cpp_error (pfile, "universal-character-name on EBCDIC target");
1723   code = 0x3f;  /* EBCDIC invalid character */
1724 #else
1725   if (code > 0x9f && !(code & 0x80000000))
1726     ; /* True extended character, OK.  */
1727   else if (code >= 0x20 && code < 0x7f)
1728     {
1729       /* ASCII printable character.  The C character set consists of all of
1730          these except $, @ and `.  We use hex escapes so that this also
1731          works with EBCDIC hosts.  */
1732       if (code != 0x24 && code != 0x40 && code != 0x60)
1733         cpp_error (pfile, "universal-character-name used for '%c'", code);
1734     }
1735   else
1736     cpp_error (pfile, "invalid universal-character-name");
1737 #endif
1738
1739   *pstr = p;
1740   return code;
1741 }
1742
1743 /* Interpret an escape sequence, and return its value.  PSTR points to
1744    the input pointer, which is just after the backslash.  LIMIT is how
1745    much text we have.  MASK is the precision for the target type (char
1746    or wchar_t).  TRADITIONAL, if true, does not interpret escapes that
1747    did not exist in traditional C.  */
1748
1749 static unsigned int
1750 parse_escape (pfile, pstr, limit, mask, traditional)
1751      cpp_reader *pfile;
1752      const unsigned char **pstr;
1753      const unsigned char *limit;
1754      HOST_WIDE_INT mask;
1755      int traditional;
1756 {
1757   int unknown = 0;
1758   const unsigned char *str = *pstr;
1759   unsigned int c = *str++;
1760
1761   switch (c)
1762     {
1763     case '\\': case '\'': case '"': case '?': break;
1764     case 'b': c = TARGET_BS;      break;
1765     case 'f': c = TARGET_FF;      break;
1766     case 'n': c = TARGET_NEWLINE; break;
1767     case 'r': c = TARGET_CR;      break;
1768     case 't': c = TARGET_TAB;     break;
1769     case 'v': c = TARGET_VT;      break;
1770
1771     case '(': case '{': case '[': case '%':
1772       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1773          '\%' is used to prevent SCCS from getting confused.  */
1774       unknown = CPP_PEDANTIC (pfile);
1775       break;
1776
1777     case 'a':
1778       if (CPP_WTRADITIONAL (pfile))
1779         cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1780       if (!traditional)
1781         c = TARGET_BELL;
1782       break;
1783
1784     case 'e': case 'E':
1785       if (CPP_PEDANTIC (pfile))
1786         cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1787       c = TARGET_ESC;
1788       break;
1789
1790       /* Warnings and support checks handled by read_ucs().  */
1791     case 'u': case 'U':
1792       if (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99))
1793         {
1794           if (CPP_WTRADITIONAL (pfile))
1795             cpp_warning (pfile,
1796                          "the meaning of '\\%c' varies with -traditional", c);
1797           c = read_ucs (pfile, &str, limit, c == 'u' ? 4 : 8);
1798         }
1799       else
1800         unknown = 1;
1801       break;
1802
1803     case 'x':
1804       if (CPP_WTRADITIONAL (pfile))
1805         cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1806
1807       if (!traditional)
1808         {
1809           unsigned int i = 0, overflow = 0;
1810           int digits_found = 0;
1811
1812           while (str < limit)
1813             {
1814               c = *str;
1815               if (! ISXDIGIT (c))
1816                 break;
1817               str++;
1818               overflow |= i ^ (i << 4 >> 4);
1819               i = (i << 4) + hex_digit_value (c);
1820               digits_found = 1;
1821             }
1822
1823           if (!digits_found)
1824             cpp_error (pfile, "\\x used with no following hex digits");
1825
1826           if (overflow | (i != (i & mask)))
1827             {
1828               cpp_pedwarn (pfile, "hex escape sequence out of range");
1829               i &= mask;
1830             }
1831           c = i;
1832         }
1833       break;
1834
1835     case '0':  case '1':  case '2':  case '3':
1836     case '4':  case '5':  case '6':  case '7':
1837       {
1838         unsigned int i = c - '0';
1839         int count = 0;
1840
1841         while (str < limit && ++count < 3)
1842           {
1843             c = *str;
1844             if (c < '0' || c > '7')
1845               break;
1846             str++;
1847             i = (i << 3) + c - '0';
1848           }
1849
1850         if (i != (i & mask))
1851           {
1852             cpp_pedwarn (pfile, "octal escape sequence out of range");
1853             i &= mask;
1854           }
1855         c = i;
1856       }
1857       break;
1858
1859     default:
1860       unknown = 1;
1861       break;
1862     }
1863
1864   if (unknown)
1865     {
1866       if (ISGRAPH (c))
1867         cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1868       else
1869         cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1870     }
1871
1872   *pstr = str;
1873   return c;
1874 }
1875
1876 #ifndef MAX_CHAR_TYPE_SIZE
1877 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1878 #endif
1879
1880 #ifndef MAX_WCHAR_TYPE_SIZE
1881 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1882 #endif
1883
1884 /* Interpret a (possibly wide) character constant in TOKEN.
1885    WARN_MULTI warns about multi-character charconsts, if not
1886    TRADITIONAL.  TRADITIONAL also indicates not to interpret escapes
1887    that did not exist in traditional C.  PCHARS_SEEN points to a
1888    variable that is filled in with the number of characters seen.  */
1889 HOST_WIDE_INT
1890 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1891      cpp_reader *pfile;
1892      const cpp_token *token;
1893      int warn_multi;
1894      int traditional;
1895      unsigned int *pchars_seen;
1896 {
1897   const unsigned char *str = token->val.str.text;
1898   const unsigned char *limit = str + token->val.str.len;
1899   unsigned int chars_seen = 0;
1900   unsigned int width, max_chars, c;
1901   unsigned HOST_WIDE_INT mask;
1902   HOST_WIDE_INT result = 0;
1903
1904 #ifdef MULTIBYTE_CHARS
1905   (void) local_mbtowc (NULL, NULL, 0);
1906 #endif
1907
1908   /* Width in bits.  */
1909   if (token->type == CPP_CHAR)
1910     width = MAX_CHAR_TYPE_SIZE;
1911   else
1912     width = MAX_WCHAR_TYPE_SIZE;
1913
1914   if (width < HOST_BITS_PER_WIDE_INT)
1915     mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1916   else
1917     mask = ~0;
1918   max_chars = HOST_BITS_PER_WIDE_INT / width;
1919
1920   while (str < limit)
1921     {
1922 #ifdef MULTIBYTE_CHARS
1923       wchar_t wc;
1924       int char_len;
1925
1926       char_len = local_mbtowc (&wc, str, limit - str);
1927       if (char_len == -1)
1928         {
1929           cpp_warning (pfile, "ignoring invalid multibyte character");
1930           c = *str++;
1931         }
1932       else
1933         {
1934           str += char_len;
1935           c = wc;
1936         }
1937 #else
1938       c = *str++;
1939 #endif
1940
1941       if (c == '\\')
1942         {
1943           c = parse_escape (pfile, &str, limit, mask, traditional);
1944           if (width < HOST_BITS_PER_WIDE_INT && c > mask)
1945             cpp_pedwarn (pfile, "escape sequence out of range for character");
1946         }
1947
1948 #ifdef MAP_CHARACTER
1949       if (ISPRINT (c))
1950         c = MAP_CHARACTER (c);
1951 #endif
1952
1953       /* Merge character into result; ignore excess chars.  */
1954       if (++chars_seen <= max_chars)
1955         {
1956           if (width < HOST_BITS_PER_WIDE_INT)
1957             result = (result << width) | (c & mask);
1958           else
1959             result = c;
1960         }
1961     }
1962
1963   if (chars_seen == 0)
1964     cpp_error (pfile, "empty character constant");
1965   else if (chars_seen > max_chars)
1966     {
1967       chars_seen = max_chars;
1968       cpp_error (pfile, "character constant too long");
1969     }
1970   else if (chars_seen > 1 && !traditional && warn_multi)
1971     cpp_warning (pfile, "multi-character character constant");
1972
1973   /* If char type is signed, sign-extend the constant.  The
1974      __CHAR_UNSIGNED__ macro is set by the driver if appropriate.  */
1975   if (token->type == CPP_CHAR && chars_seen)
1976     {
1977       unsigned int nbits = chars_seen * width;
1978       unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
1979
1980       if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
1981           || ((result >> (nbits - 1)) & 1) == 0)
1982         result &= mask;
1983       else
1984         result |= ~mask;
1985     }
1986
1987   *pchars_seen = chars_seen;
1988   return result;
1989 }
1990
1991 /* Memory pools.  */
1992
1993 struct dummy
1994 {
1995   char c;
1996   union
1997   {
1998     double d;
1999     int *p;
2000   } u;
2001 };
2002
2003 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2004
2005 static int
2006 chunk_suitable (pool, chunk, size)
2007      cpp_pool *pool;
2008      cpp_chunk *chunk;
2009      unsigned int size;
2010 {
2011   /* Being at least twice SIZE means we can use memcpy in
2012      _cpp_next_chunk rather than memmove.  Besides, it's a good idea
2013      anyway.  */
2014   return (chunk && pool->locked != chunk
2015           && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
2016 }
2017
2018 /* Returns the end of the new pool.  PTR points to a char in the old
2019    pool, and is updated to point to the same char in the new pool.  */
2020 unsigned char *
2021 _cpp_next_chunk (pool, len, ptr)
2022      cpp_pool *pool;
2023      unsigned int len;
2024      unsigned char **ptr;
2025 {
2026   cpp_chunk *chunk = pool->cur->next;
2027
2028   /* LEN is the minimum size we want in the new pool.  */
2029   len += POOL_ROOM (pool);
2030   if (! chunk_suitable (pool, chunk, len))
2031     {
2032       chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
2033
2034       chunk->next = pool->cur->next;
2035       pool->cur->next = chunk;
2036     }
2037
2038   /* Update the pointer before changing chunk's front.  */
2039   if (ptr)
2040     *ptr += chunk->base - POOL_FRONT (pool);
2041
2042   memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
2043   chunk->front = chunk->base;
2044
2045   pool->cur = chunk;
2046   return POOL_LIMIT (pool);
2047 }
2048
2049 static cpp_chunk *
2050 new_chunk (size)
2051      unsigned int size;
2052 {
2053   unsigned char *base;
2054   cpp_chunk *result;
2055
2056   size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
2057   base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
2058   /* Put the chunk descriptor at the end.  Then chunk overruns will
2059      cause obvious chaos.  */
2060   result = (cpp_chunk *) (base + size);
2061   result->base = base;
2062   result->front = base;
2063   result->limit = base + size;
2064   result->next = 0;
2065
2066   return result;
2067 }
2068
2069 void
2070 _cpp_init_pool (pool, size, align, temp)
2071      cpp_pool *pool;
2072      unsigned int size, align, temp;
2073 {
2074   if (align == 0)
2075     align = DEFAULT_ALIGNMENT;
2076   if (align & (align - 1))
2077     abort ();
2078   pool->align = align;
2079   pool->cur = new_chunk (size);
2080   pool->locked = 0;
2081   pool->locks = 0;
2082   if (temp)
2083     pool->cur->next = pool->cur;
2084 }
2085
2086 void
2087 _cpp_lock_pool (pool)
2088      cpp_pool *pool;
2089 {
2090   if (pool->locks++ == 0)
2091     pool->locked = pool->cur;
2092 }
2093
2094 void
2095 _cpp_unlock_pool (pool)
2096      cpp_pool *pool;
2097 {
2098   if (--pool->locks == 0)
2099     pool->locked = 0;
2100 }
2101
2102 void
2103 _cpp_free_pool (pool)
2104      cpp_pool *pool;
2105 {
2106   cpp_chunk *chunk = pool->cur, *next;
2107
2108   do
2109     {
2110       next = chunk->next;
2111       free (chunk->base);
2112       chunk = next;
2113     }
2114   while (chunk && chunk != pool->cur);
2115 }
2116
2117 /* Reserve LEN bytes from a memory pool.  */
2118 unsigned char *
2119 _cpp_pool_reserve (pool, len)
2120      cpp_pool *pool;
2121      unsigned int len;
2122 {
2123   len = POOL_ALIGN (len, pool->align);
2124   if (len > (unsigned int) POOL_ROOM (pool))
2125     _cpp_next_chunk (pool, len, 0);
2126
2127   return POOL_FRONT (pool);
2128 }
2129
2130 /* Allocate LEN bytes from a memory pool.  */
2131 unsigned char *
2132 _cpp_pool_alloc (pool, len)
2133      cpp_pool *pool;
2134      unsigned int len;
2135 {
2136   unsigned char *result = _cpp_pool_reserve (pool, len);
2137
2138   POOL_COMMIT (pool, len);
2139   return result;
2140 }