gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 /* This lexer works with a single pass of the file.  Recently I
  24    re-wrote it to minimize the places where we step backwards in the
  25    input stream, to make future changes to support multi-byte
  26    character sets fairly straight-forward.
  27
  28    There is now only one routine where we do step backwards:
  29    skip_escaped_newlines.  This routine could probably also be changed
  30    so that it doesn't need to step back.  One possibility is to use a
  31    trick similar to that used in lex_period and lex_percent.  Two
  32    extra characters might be needed, but skip_escaped_newlines itself
  33    would probably be the only place that needs to be aware of that,
  34    and changes to the remaining routines would probably only be needed
  35    if they process a backslash.  */
  36
  37 #include "config.h"
  38 #include "system.h"
  39 #include "cpplib.h"
  40 #include "cpphash.h"
  41
  42 /* MULTIBYTE_CHARS support only works for native compilers.
  43    ??? Ideally what we want is to model widechar support after
  44    the current floating point support.  */
  45 #ifdef CROSS_COMPILE
  46 #undef MULTIBYTE_CHARS
  47 #endif
  48
  49 #ifdef MULTIBYTE_CHARS
  50 #include "mbchar.h"
  51 #include <locale.h>
  52 #endif
  53
  54 /* Tokens with SPELL_STRING store their spelling in the token list,
  55    and it's length in the token->val.name.len.  */
  56 enum spell_type
  57 {
  58   SPELL_OPERATOR = 0,
  59   SPELL_CHAR,
  60   SPELL_IDENT,
  61   SPELL_STRING,
  62   SPELL_NONE
  63 };
  64
  65 struct token_spelling
  66 {
  67   enum spell_type category;
  68   const unsigned char *name;
  69 };
  70
  71 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
  72                                              U":>", U"<%", U"%>"};
  73
  74 #define OP(e, s) { SPELL_OPERATOR, U s           },
  75 #define TK(e, s) { s,              U STRINGX (e) },
  76 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
  77 #undef OP
  78 #undef TK
  79
  80 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  81 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  82
  83 static cppchar_t handle_newline PARAMS ((cpp_buffer *, cppchar_t));
  84 static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
  85 static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
  86
  87 static int skip_block_comment PARAMS ((cpp_reader *));
  88 static int skip_line_comment PARAMS ((cpp_reader *));
  89 static void adjust_column PARAMS ((cpp_reader *));
  90 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  91 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
  92 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
  93 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
  94 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  95 static void unterminated PARAMS ((cpp_reader *, int));
  96 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
  97 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
  98 static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
  99 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
 100 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
 101 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
 102                                    const unsigned char *, unsigned int *));
 103
 104 static cpp_chunk *new_chunk PARAMS ((unsigned int));
 105 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
 106 static unsigned int hex_digit_value PARAMS ((unsigned int));
 107
 108 /* Utility routine:
 109
 110    Compares, the token TOKEN to the NUL-terminated string STRING.
 111    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
 112
 113 int
 114 cpp_ideq (token, string)
 115      const cpp_token *token;
 116      const char *string;
 117 {
 118   if (token->type != CPP_NAME)
 119     return 0;
 120
 121   return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
 122 }
 123
 124 /* Call when meeting a newline.  Returns the character after the newline
 125    (or carriage-return newline combination), or EOF.  */
 126 static cppchar_t
 127 handle_newline (buffer, newline_char)
 128      cpp_buffer *buffer;
 129      cppchar_t newline_char;
 130 {
 131   cppchar_t next = EOF;
 132
 133   buffer->col_adjust = 0;
 134   buffer->lineno++;
 135   buffer->line_base = buffer->cur;
 136
 137   /* Handle CR-LF and LF-CR combinations, get the next character.  */
 138   if (buffer->cur < buffer->rlimit)
 139     {
 140       next = *buffer->cur++;
 141       if (next + newline_char == '\r' + '\n')
 142         {
 143           buffer->line_base = buffer->cur;
 144           if (buffer->cur < buffer->rlimit)
 145             next = *buffer->cur++;
 146           else
 147             next = EOF;
 148         }
 149     }
 150
 151   buffer->read_ahead = next;
 152   return next;
 153 }
 154
 155 /* Subroutine of skip_escaped_newlines; called when a trigraph is
 156    encountered.  It warns if necessary, and returns true if the
 157    trigraph should be honoured.  FROM_CHAR is the third character of a
 158    trigraph, and presumed to be the previous character for position
 159    reporting.  */
 160 static int
 161 trigraph_ok (pfile, from_char)
 162      cpp_reader *pfile;
 163      cppchar_t from_char;
 164 {
 165   int accept = CPP_OPTION (pfile, trigraphs);
 166
 167   /* Don't warn about trigraphs in comments.  */
 168   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 169     {
 170       cpp_buffer *buffer = pfile->buffer;
 171       if (accept)
 172         cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
 173                                "trigraph ??%c converted to %c",
 174                                (int) from_char,
 175                                (int) _cpp_trigraph_map[from_char]);
 176       else if (buffer->cur != buffer->last_Wtrigraphs)
 177         {
 178           buffer->last_Wtrigraphs = buffer->cur;
 179           cpp_warning_with_line (pfile, buffer->lineno,
 180                                  CPP_BUF_COL (buffer) - 2,
 181                                  "trigraph ??%c ignored", (int) from_char);
 182         }
 183     }
 184
 185   return accept;
 186 }
 187
 188 /* Assumes local variables buffer and result.  */
 189 #define ACCEPT_CHAR(t) \
 190   do { result->type = t; buffer->read_ahead = EOF; } while (0)
 191
 192 /* When we move to multibyte character sets, add to these something
 193    that saves and restores the state of the multibyte conversion
 194    library.  This probably involves saving and restoring a "cookie".
 195    In the case of glibc it is an 8-byte structure, so is not a high
 196    overhead operation.  In any case, it's out of the fast path.  */
 197 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
 198 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
 199
 200 /* Skips any escaped newlines introduced by NEXT, which is either a
 201    '?' or a '\\'.  Returns the next character, which will also have
 202    been placed in buffer->read_ahead.  This routine performs
 203    preprocessing stages 1 and 2 of the ISO C standard.  */
 204 static cppchar_t
 205 skip_escaped_newlines (buffer, next)
 206      cpp_buffer *buffer;
 207      cppchar_t next;
 208 {
 209   /* Only do this if we apply stages 1 and 2.  */
 210   if (!buffer->from_stage3)
 211     {
 212       cppchar_t next1;
 213       const unsigned char *saved_cur;
 214       int space;
 215
 216       do
 217         {
 218           if (buffer->cur == buffer->rlimit)
 219             break;
 220
 221           SAVE_STATE ();
 222           if (next == '?')
 223             {
 224               next1 = *buffer->cur++;
 225               if (next1 != '?' || buffer->cur == buffer->rlimit)
 226                 {
 227                   RESTORE_STATE ();
 228                   break;
 229                 }
 230
 231               next1 = *buffer->cur++;
 232               if (!_cpp_trigraph_map[next1]
 233                   || !trigraph_ok (buffer->pfile, next1))
 234                 {
 235                   RESTORE_STATE ();
 236                   break;
 237                 }
 238
 239               /* We have a full trigraph here.  */
 240               next = _cpp_trigraph_map[next1];
 241               if (next != '\\' || buffer->cur == buffer->rlimit)
 242                 break;
 243               SAVE_STATE ();
 244             }
 245
 246           /* We have a backslash, and room for at least one more character.  */
 247           space = 0;
 248           do
 249             {
 250               next1 = *buffer->cur++;
 251               if (!is_nvspace (next1))
 252                 break;
 253               space = 1;
 254             }
 255           while (buffer->cur < buffer->rlimit);
 256
 257           if (!is_vspace (next1))
 258             {
 259               RESTORE_STATE ();
 260               break;
 261             }
 262
 263           if (space && !buffer->pfile->state.lexing_comment)
 264             cpp_warning (buffer->pfile,
 265                          "backslash and newline separated by space");
 266
 267           next = handle_newline (buffer, next1);
 268           if (next == EOF)
 269             cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
 270         }
 271       while (next == '\\' || next == '?');
 272     }
 273
 274   buffer->read_ahead = next;
 275   return next;
 276 }
 277
 278 /* Obtain the next character, after trigraph conversion and skipping
 279    an arbitrary string of escaped newlines.  The common case of no
 280    trigraphs or escaped newlines falls through quickly.  */
 281 static cppchar_t
 282 get_effective_char (buffer)
 283      cpp_buffer *buffer;
 284 {
 285   cppchar_t next = EOF;
 286
 287   if (buffer->cur < buffer->rlimit)
 288     {
 289       next = *buffer->cur++;
 290
 291       /* '?' can introduce trigraphs (and therefore backslash); '\\'
 292          can introduce escaped newlines, which we want to skip, or
 293          UCNs, which, depending upon lexer state, we will handle in
 294          the future.  */
 295       if (next == '?' || next == '\\')
 296         next = skip_escaped_newlines (buffer, next);
 297     }
 298
 299   buffer->read_ahead = next;
 300   return next;
 301 }
 302
 303 /* Skip a C-style block comment.  We find the end of the comment by
 304    seeing if an asterisk is before every '/' we encounter.  Returns
 305    non-zero if comment terminated by EOF, zero otherwise.  */
 306 static int
 307 skip_block_comment (pfile)
 308      cpp_reader *pfile;
 309 {
 310   cpp_buffer *buffer = pfile->buffer;
 311   cppchar_t c = EOF, prevc = EOF;
 312
 313   pfile->state.lexing_comment = 1;
 314   while (buffer->cur != buffer->rlimit)
 315     {
 316       prevc = c, c = *buffer->cur++;
 317
 318     next_char:
 319       /* FIXME: For speed, create a new character class of characters
 320          of interest inside block comments.  */
 321       if (c == '?' || c == '\\')
 322         c = skip_escaped_newlines (buffer, c);
 323
 324       /* People like decorating comments with '*', so check for '/'
 325          instead for efficiency.  */
 326       if (c == '/')
 327         {
 328           if (prevc == '*')
 329             break;
 330
 331           /* Warn about potential nested comments, but not if the '/'
 332              comes immediately before the true comment delimeter.
 333              Don't bother to get it right across escaped newlines.  */
 334           if (CPP_OPTION (pfile, warn_comments)
 335               && buffer->cur != buffer->rlimit)
 336             {
 337               prevc = c, c = *buffer->cur++;
 338               if (c == '*' && buffer->cur != buffer->rlimit)
 339                 {
 340                   prevc = c, c = *buffer->cur++;
 341                   if (c != '/')
 342                     cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
 343                                            CPP_BUF_COL (buffer),
 344                                            "\"/*\" within comment");
 345                 }
 346               goto next_char;
 347             }
 348         }
 349       else if (is_vspace (c))
 350         {
 351           prevc = c, c = handle_newline (buffer, c);
 352           goto next_char;
 353         }
 354       else if (c == '\t')
 355         adjust_column (pfile);
 356     }
 357
 358   pfile->state.lexing_comment = 0;
 359   buffer->read_ahead = EOF;
 360   return c != '/' || prevc != '*';
 361 }
 362
 363 /* Skip a C++ line comment.  Handles escaped newlines.  Returns
 364    non-zero if a multiline comment.  The following new line, if any,
 365    is left in buffer->read_ahead.  */
 366 static int
 367 skip_line_comment (pfile)
 368      cpp_reader *pfile;
 369 {
 370   cpp_buffer *buffer = pfile->buffer;
 371   unsigned int orig_lineno = buffer->lineno;
 372   cppchar_t c;
 373
 374   pfile->state.lexing_comment = 1;
 375   do
 376     {
 377       c = EOF;
 378       if (buffer->cur == buffer->rlimit)
 379         break;
 380
 381       c = *buffer->cur++;
 382       if (c == '?' || c == '\\')
 383         c = skip_escaped_newlines (buffer, c);
 384     }
 385   while (!is_vspace (c));
 386
 387   pfile->state.lexing_comment = 0;
 388   buffer->read_ahead = c;       /* Leave any newline for caller.  */
 389   return orig_lineno != buffer->lineno;
 390 }
 391
 392 /* pfile->buffer->cur is one beyond the \t character.  Update
 393    col_adjust so we track the column correctly.  */
 394 static void
 395 adjust_column (pfile)
 396      cpp_reader *pfile;
 397 {
 398   cpp_buffer *buffer = pfile->buffer;
 399   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 400
 401   /* Round it up to multiple of the tabstop, but subtract 1 since the
 402      tab itself occupies a character position.  */
 403   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 404                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 405 }
 406
 407 /* Skips whitespace, saving the next non-whitespace character.
 408    Adjusts pfile->col_adjust to account for tabs.  Without this,
 409    tokens might be assigned an incorrect column.  */
 410 static void
 411 skip_whitespace (pfile, c)
 412      cpp_reader *pfile;
 413      cppchar_t c;
 414 {
 415   cpp_buffer *buffer = pfile->buffer;
 416   unsigned int warned = 0;
 417
 418   do
 419     {
 420       /* Horizontal space always OK.  */
 421       if (c == ' ')
 422         ;
 423       else if (c == '\t')
 424         adjust_column (pfile);
 425       /* Just \f \v or \0 left.  */
 426       else if (c == '\0')
 427         {
 428           if (!warned)
 429             {
 430               cpp_warning (pfile, "null character(s) ignored");
 431               warned = 1;
 432             }
 433         }
 434       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 435         cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
 436                                CPP_BUF_COL (buffer),
 437                                "%s in preprocessing directive",
 438                                c == '\f' ? "form feed" : "vertical tab");
 439
 440       c = EOF;
 441       if (buffer->cur == buffer->rlimit)
 442         break;
 443       c = *buffer->cur++;
 444     }
 445   /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
 446   while (is_nvspace (c));
 447
 448   /* Remember the next character.  */
 449   buffer->read_ahead = c;
 450 }
 451
 452 /* See if the characters of a number token are valid in a name (no
 453    '.', '+' or '-').  */
 454 static int
 455 name_p (pfile, string)
 456      cpp_reader *pfile;
 457      const cpp_string *string;
 458 {
 459   unsigned int i;
 460
 461   for (i = 0; i < string->len; i++)
 462     if (!is_idchar (string->text[i]))
 463       return 0;
 464
 465   return 1;
 466 }
 467
 468 /* Parse an identifier, skipping embedded backslash-newlines.
 469    Calculate the hash value of the token while parsing, for improved
 470    performance.  The hashing algorithm *must* match cpp_lookup().  */
 471
 472 static cpp_hashnode *
 473 parse_identifier (pfile, c)
 474      cpp_reader *pfile;
 475      cppchar_t c;
 476 {
 477   cpp_hashnode *result;
 478   cpp_buffer *buffer = pfile->buffer;
 479   unsigned int saw_dollar = 0, len;
 480   struct obstack *stack = &pfile->hash_table->stack;
 481
 482   do
 483     {
 484       do
 485         {
 486           obstack_1grow (stack, c);
 487
 488           if (c == '$')
 489             saw_dollar++;
 490
 491           c = EOF;
 492           if (buffer->cur == buffer->rlimit)
 493             break;
 494
 495           c = *buffer->cur++;
 496         }
 497       while (is_idchar (c));
 498
 499       /* Potential escaped newline?  */
 500       if (c != '?' && c != '\\')
 501         break;
 502       c = skip_escaped_newlines (buffer, c);
 503     }
 504   while (is_idchar (c));
 505
 506   /* Remember the next character.  */
 507   buffer->read_ahead = c;
 508
 509   /* $ is not a identifier character in the standard, but is commonly
 510      accepted as an extension.  Don't warn about it in skipped
 511      conditional blocks.  */
 512   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->skipping)
 513     cpp_pedwarn (pfile, "'$' character(s) in identifier");
 514
 515   /* Identifiers are null-terminated.  */
 516   len = obstack_object_size (stack);
 517   obstack_1grow (stack, '\0');
 518
 519   /* This routine commits the memory if necessary.  */
 520   result = (cpp_hashnode *)
 521     ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
 522
 523   /* Some identifiers require diagnostics when lexed.  */
 524   if (result->flags & NODE_DIAGNOSTIC && !pfile->skipping)
 525     {
 526       /* It is allowed to poison the same identifier twice.  */
 527       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 528         cpp_error (pfile, "attempt to use poisoned \"%s\"",
 529                    NODE_NAME (result));
 530
 531       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 532          replacement list of a variadic macro.  */
 533       if (result == pfile->spec_nodes.n__VA_ARGS__
 534           && !pfile->state.va_args_ok)
 535         cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 536     }
 537
 538   return result;
 539 }
 540
 541 /* Parse a number, skipping embedded backslash-newlines.  */
 542 static void
 543 parse_number (pfile, number, c, leading_period)
 544      cpp_reader *pfile;
 545      cpp_string *number;
 546      cppchar_t c;
 547      int leading_period;
 548 {
 549   cpp_buffer *buffer = pfile->buffer;
 550   cpp_pool *pool = &pfile->ident_pool;
 551   unsigned char *dest, *limit;
 552
 553   dest = POOL_FRONT (pool);
 554   limit = POOL_LIMIT (pool);
 555
 556   /* Place a leading period.  */
 557   if (leading_period)
 558     {
 559       if (dest >= limit)
 560         limit = _cpp_next_chunk (pool, 0, &dest);
 561       *dest++ = '.';
 562     }
 563
 564   do
 565     {
 566       do
 567         {
 568           /* Need room for terminating null.  */
 569           if (dest + 1 >= limit)
 570             limit = _cpp_next_chunk (pool, 0, &dest);
 571           *dest++ = c;
 572
 573           c = EOF;
 574           if (buffer->cur == buffer->rlimit)
 575             break;
 576
 577           c = *buffer->cur++;
 578         }
 579       while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 580
 581       /* Potential escaped newline?  */
 582       if (c != '?' && c != '\\')
 583         break;
 584       c = skip_escaped_newlines (buffer, c);
 585     }
 586   while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 587
 588   /* Remember the next character.  */
 589   buffer->read_ahead = c;
 590
 591   /* Null-terminate the number.  */
 592   *dest = '\0';
 593
 594   number->text = POOL_FRONT (pool);
 595   number->len = dest - number->text;
 596   POOL_COMMIT (pool, number->len + 1);
 597 }
 598
 599 /* Subroutine of parse_string.  Emits error for unterminated strings.  */
 600 static void
 601 unterminated (pfile, term)
 602      cpp_reader *pfile;
 603      int term;
 604 {
 605   cpp_error (pfile, "missing terminating %c character", term);
 606
 607   if (term == '\"' && pfile->mlstring_pos.line
 608       && pfile->mlstring_pos.line != pfile->lexer_pos.line)
 609     {
 610       cpp_error_with_line (pfile, pfile->mlstring_pos.line,
 611                            pfile->mlstring_pos.col,
 612                            "possible start of unterminated string literal");
 613       pfile->mlstring_pos.line = 0;
 614     }
 615 }
 616
 617 /* Subroutine of parse_string.  */
 618 static int
 619 unescaped_terminator_p (pfile, dest)
 620      cpp_reader *pfile;
 621      const unsigned char *dest;
 622 {
 623   const unsigned char *start, *temp;
 624
 625   /* In #include-style directives, terminators are not escapeable.  */
 626   if (pfile->state.angled_headers)
 627     return 1;
 628
 629   start = POOL_FRONT (&pfile->ident_pool);
 630
 631   /* An odd number of consecutive backslashes represents an escaped
 632      terminator.  */
 633   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 634     ;
 635
 636   return ((dest - temp) & 1) == 0;
 637 }
 638
 639 /* Parses a string, character constant, or angle-bracketed header file
 640    name.  Handles embedded trigraphs and escaped newlines.  The stored
 641    string is guaranteed NUL-terminated, but it is not guaranteed that
 642    this is the first NUL since embedded NULs are preserved.
 643
 644    Multi-line strings are allowed, but they are deprecated.  */
 645 static void
 646 parse_string (pfile, token, terminator)
 647      cpp_reader *pfile;
 648      cpp_token *token;
 649      cppchar_t terminator;
 650 {
 651   cpp_buffer *buffer = pfile->buffer;
 652   cpp_pool *pool = &pfile->ident_pool;
 653   unsigned char *dest, *limit;
 654   cppchar_t c;
 655   unsigned int nulls = 0;
 656
 657   dest = POOL_FRONT (pool);
 658   limit = POOL_LIMIT (pool);
 659
 660   for (;;)
 661     {
 662       if (buffer->cur == buffer->rlimit)
 663         c = EOF;
 664       else
 665         c = *buffer->cur++;
 666
 667     have_char:
 668       /* We need space for the terminating NUL.  */
 669       if (dest >= limit)
 670         limit = _cpp_next_chunk (pool, 0, &dest);
 671
 672       if (c == EOF)
 673         {
 674           unterminated (pfile, terminator);
 675           break;
 676         }
 677
 678       /* Handle trigraphs, escaped newlines etc.  */
 679       if (c == '?' || c == '\\')
 680         c = skip_escaped_newlines (buffer, c);
 681
 682       if (c == terminator && unescaped_terminator_p (pfile, dest))
 683         {
 684           c = EOF;
 685           break;
 686         }
 687       else if (is_vspace (c))
 688         {
 689           /* In assembly language, silently terminate string and
 690              character literals at end of line.  This is a kludge
 691              around not knowing where comments are.  */
 692           if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
 693             break;
 694
 695           /* Character constants and header names may not extend over
 696              multiple lines.  In Standard C, neither may strings.
 697              Unfortunately, we accept multiline strings as an
 698              extension, except in #include family directives.  */
 699           if (terminator != '"' || pfile->state.angled_headers)
 700             {
 701               unterminated (pfile, terminator);
 702               break;
 703             }
 704
 705           cpp_pedwarn (pfile, "multi-line string literals are deprecated");
 706           if (pfile->mlstring_pos.line == 0)
 707             pfile->mlstring_pos = pfile->lexer_pos;
 708
 709           c = handle_newline (buffer, c);
 710           *dest++ = '\n';
 711           goto have_char;
 712         }
 713       else if (c == '\0')
 714         {
 715           if (nulls++ == 0)
 716             cpp_warning (pfile, "null character(s) preserved in literal");
 717         }
 718
 719       *dest++ = c;
 720     }
 721
 722   /* Remember the next character.  */
 723   buffer->read_ahead = c;
 724   *dest = '\0';
 725
 726   token->val.str.text = POOL_FRONT (pool);
 727   token->val.str.len = dest - token->val.str.text;
 728   POOL_COMMIT (pool, token->val.str.len + 1);
 729 }
 730
 731 /* The stored comment includes the comment start and any terminator.  */
 732 static void
 733 save_comment (pfile, token, from)
 734      cpp_reader *pfile;
 735      cpp_token *token;
 736      const unsigned char *from;
 737 {
 738   unsigned char *buffer;
 739   unsigned int len;
 740
 741   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 742   /* C++ comments probably (not definitely) have moved past a new
 743      line, which we don't want to save in the comment.  */
 744   if (pfile->buffer->read_ahead != EOF)
 745     len--;
 746   buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
 747
 748   token->type = CPP_COMMENT;
 749   token->val.str.len = len;
 750   token->val.str.text = buffer;
 751
 752   buffer[0] = '/';
 753   memcpy (buffer + 1, from, len - 1);
 754 }
 755
 756 /* Subroutine of lex_token to handle '%'.  A little tricky, since we
 757    want to avoid stepping back when lexing %:%X.  */
 758 static void
 759 lex_percent (buffer, result)
 760      cpp_buffer *buffer;
 761      cpp_token *result;
 762 {
 763   cppchar_t c;
 764
 765   result->type = CPP_MOD;
 766   /* Parsing %:%X could leave an extra character.  */
 767   if (buffer->extra_char == EOF)
 768     c = get_effective_char (buffer);
 769   else
 770     {
 771       c = buffer->read_ahead = buffer->extra_char;
 772       buffer->extra_char = EOF;
 773     }
 774
 775   if (c == '=')
 776     ACCEPT_CHAR (CPP_MOD_EQ);
 777   else if (CPP_OPTION (buffer->pfile, digraphs))
 778     {
 779       if (c == ':')
 780         {
 781           result->flags |= DIGRAPH;
 782           ACCEPT_CHAR (CPP_HASH);
 783           if (get_effective_char (buffer) == '%')
 784             {
 785               buffer->extra_char = get_effective_char (buffer);
 786               if (buffer->extra_char == ':')
 787                 {
 788                   buffer->extra_char = EOF;
 789                   ACCEPT_CHAR (CPP_PASTE);
 790                 }
 791               else
 792                 /* We'll catch the extra_char when we're called back.  */
 793                 buffer->read_ahead = '%';
 794             }
 795         }
 796       else if (c == '>')
 797         {
 798           result->flags |= DIGRAPH;
 799           ACCEPT_CHAR (CPP_CLOSE_BRACE);
 800         }
 801     }
 802 }
 803
 804 /* Subroutine of lex_token to handle '.'.  This is tricky, since we
 805    want to avoid stepping back when lexing '...' or '.123'.  In the
 806    latter case we should also set a flag for parse_number.  */
 807 static void
 808 lex_dot (pfile, result)
 809      cpp_reader *pfile;
 810      cpp_token *result;
 811 {
 812   cpp_buffer *buffer = pfile->buffer;
 813   cppchar_t c;
 814
 815   /* Parsing ..X could leave an extra character.  */
 816   if (buffer->extra_char == EOF)
 817     c = get_effective_char (buffer);
 818   else
 819     {
 820       c = buffer->read_ahead = buffer->extra_char;
 821       buffer->extra_char = EOF;
 822     }
 823
 824   /* All known character sets have 0...9 contiguous.  */
 825   if (c >= '0' && c <= '9')
 826     {
 827       result->type = CPP_NUMBER;
 828       parse_number (pfile, &result->val.str, c, 1);
 829     }
 830   else
 831     {
 832       result->type = CPP_DOT;
 833       if (c == '.')
 834         {
 835           buffer->extra_char = get_effective_char (buffer);
 836           if (buffer->extra_char == '.')
 837             {
 838               buffer->extra_char = EOF;
 839               ACCEPT_CHAR (CPP_ELLIPSIS);
 840             }
 841           else
 842             /* We'll catch the extra_char when we're called back.  */
 843             buffer->read_ahead = '.';
 844         }
 845       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
 846         ACCEPT_CHAR (CPP_DOT_STAR);
 847     }
 848 }
 849
 850 void
 851 _cpp_lex_token (pfile, result)
 852      cpp_reader *pfile;
 853      cpp_token *result;
 854 {
 855   cppchar_t c;
 856   cpp_buffer *buffer;
 857   const unsigned char *comment_start;
 858   unsigned char bol;
 859
 860  skip:
 861   bol = pfile->state.next_bol;
 862  done_directive:
 863   buffer = pfile->buffer;
 864   pfile->state.next_bol = 0;
 865   result->flags = buffer->saved_flags;
 866   buffer->saved_flags = 0;
 867  next_char:
 868   pfile->lexer_pos.line = buffer->lineno;
 869  next_char2:
 870   pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
 871
 872   c = buffer->read_ahead;
 873   if (c == EOF && buffer->cur < buffer->rlimit)
 874     {
 875       c = *buffer->cur++;
 876       pfile->lexer_pos.col++;
 877     }
 878
 879  do_switch:
 880   buffer->read_ahead = EOF;
 881   switch (c)
 882     {
 883     case EOF:
 884       /* Non-empty files should end in a newline.  Checking "bol" too
 885           prevents multiple warnings when hitting the EOF more than
 886           once, like in a directive.  Don't warn for command line and
 887           _Pragma buffers.  */
 888       if (pfile->lexer_pos.col != 0 && !bol && !buffer->from_stage3)
 889         cpp_pedwarn (pfile, "no newline at end of file");
 890       pfile->state.next_bol = 1;
 891       pfile->skipping = 0;      /* In case missing #endif.  */
 892       result->type = CPP_EOF;
 893       /* Don't do MI optimisation.  */
 894       return;
 895
 896     case ' ': case '\t': case '\f': case '\v': case '\0':
 897       skip_whitespace (pfile, c);
 898       result->flags |= PREV_WHITE;
 899       goto next_char2;
 900
 901     case '\n': case '\r':
 902       if (!pfile->state.in_directive)
 903         {
 904           handle_newline (buffer, c);
 905           bol = 1;
 906           pfile->lexer_pos.output_line = buffer->lineno;
 907           /* This is a new line, so clear any white space flag.
 908              Newlines in arguments are white space (6.10.3.10);
 909              parse_arg takes care of that.  */
 910           result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
 911           goto next_char;
 912         }
 913
 914       /* Don't let directives spill over to the next line.  */
 915       buffer->read_ahead = c;
 916       pfile->state.next_bol = 1;
 917       result->type = CPP_EOF;
 918       /* Don't break; pfile->skipping might be true.  */
 919       return;
 920
 921     case '?':
 922     case '\\':
 923       /* These could start an escaped newline, or '?' a trigraph.  Let
 924          skip_escaped_newlines do all the work.  */
 925       {
 926         unsigned int lineno = buffer->lineno;
 927
 928         c = skip_escaped_newlines (buffer, c);
 929         if (lineno != buffer->lineno)
 930           /* We had at least one escaped newline of some sort, and the
 931              next character is in buffer->read_ahead.  Update the
 932              token's line and column.  */
 933             goto next_char;
 934
 935         /* We are either the original '?' or '\\', or a trigraph.  */
 936         result->type = CPP_QUERY;
 937         buffer->read_ahead = EOF;
 938         if (c == '\\')
 939           goto random_char;
 940         else if (c != '?')
 941           goto do_switch;
 942       }
 943       break;
 944
 945     case '0': case '1': case '2': case '3': case '4':
 946     case '5': case '6': case '7': case '8': case '9':
 947       result->type = CPP_NUMBER;
 948       parse_number (pfile, &result->val.str, c, 0);
 949       break;
 950
 951     case '$':
 952       if (!CPP_OPTION (pfile, dollars_in_ident))
 953         goto random_char;
 954       /* Fall through... */
 955
 956     case '_':
 957     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 958     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 959     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 960     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 961     case 'y': case 'z':
 962     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 963     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 964     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 965     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 966     case 'Y': case 'Z':
 967       result->type = CPP_NAME;
 968       result->val.node = parse_identifier (pfile, c);
 969
 970       /* 'L' may introduce wide characters or strings.  */
 971       if (result->val.node == pfile->spec_nodes.n_L)
 972         {
 973           c = buffer->read_ahead; /* For make_string.  */
 974           if (c == '\'' || c == '"')
 975             {
 976               ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
 977               goto make_string;
 978             }
 979         }
 980       /* Convert named operators to their proper types.  */
 981       else if (result->val.node->flags & NODE_OPERATOR)
 982         {
 983           result->flags |= NAMED_OP;
 984           result->type = result->val.node->value.operator;
 985         }
 986       break;
 987
 988     case '\'':
 989     case '"':
 990       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
 991     make_string:
 992       parse_string (pfile, result, c);
 993       break;
 994
 995     case '/':
 996       /* A potential block or line comment.  */
 997       comment_start = buffer->cur;
 998       result->type = CPP_DIV;
 999       c = get_effective_char (buffer);
1000       if (c == '=')
1001         ACCEPT_CHAR (CPP_DIV_EQ);
1002       if (c != '/' && c != '*')
1003         break;
1004       if (buffer->from_stage3)
1005         break;
1006
1007       if (c == '*')
1008         {
1009           if (skip_block_comment (pfile))
1010             cpp_error_with_line (pfile, pfile->lexer_pos.line,
1011                                  pfile->lexer_pos.col,
1012                                  "unterminated comment");
1013         }
1014       else
1015         {
1016           if (!CPP_OPTION (pfile, cplusplus_comments)
1017               && !CPP_IN_SYSTEM_HEADER (pfile))
1018             break;
1019
1020           /* Warn about comments only if pedantically GNUC89, and not
1021              in system headers.  */
1022           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1023               && ! buffer->warned_cplusplus_comments)
1024             {
1025               cpp_pedwarn (pfile,
1026                            "C++ style comments are not allowed in ISO C89");
1027               cpp_pedwarn (pfile,
1028                            "(this will be reported only once per input file)");
1029               buffer->warned_cplusplus_comments = 1;
1030             }
1031
1032           /* Skip_line_comment updates buffer->read_ahead.  */
1033           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1034             cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1035                                    pfile->lexer_pos.col,
1036                                    "multi-line comment");
1037         }
1038
1039       /* Skipping the comment has updated buffer->read_ahead.  */
1040       if (!pfile->state.save_comments)
1041         {
1042           result->flags |= PREV_WHITE;
1043           goto next_char;
1044         }
1045
1046       /* Save the comment as a token in its own right.  */
1047       save_comment (pfile, result, comment_start);
1048       /* Don't do MI optimisation.  */
1049       return;
1050
1051     case '<':
1052       if (pfile->state.angled_headers)
1053         {
1054           result->type = CPP_HEADER_NAME;
1055           c = '>';              /* terminator.  */
1056           goto make_string;
1057         }
1058
1059       result->type = CPP_LESS;
1060       c = get_effective_char (buffer);
1061       if (c == '=')
1062         ACCEPT_CHAR (CPP_LESS_EQ);
1063       else if (c == '<')
1064         {
1065           ACCEPT_CHAR (CPP_LSHIFT);
1066           if (get_effective_char (buffer) == '=')
1067             ACCEPT_CHAR (CPP_LSHIFT_EQ);
1068         }
1069       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1070         {
1071           ACCEPT_CHAR (CPP_MIN);
1072           if (get_effective_char (buffer) == '=')
1073             ACCEPT_CHAR (CPP_MIN_EQ);
1074         }
1075       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1076         {
1077           ACCEPT_CHAR (CPP_OPEN_SQUARE);
1078           result->flags |= DIGRAPH;
1079         }
1080       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1081         {
1082           ACCEPT_CHAR (CPP_OPEN_BRACE);
1083           result->flags |= DIGRAPH;
1084         }
1085       break;
1086
1087     case '>':
1088       result->type = CPP_GREATER;
1089       c = get_effective_char (buffer);
1090       if (c == '=')
1091         ACCEPT_CHAR (CPP_GREATER_EQ);
1092       else if (c == '>')
1093         {
1094           ACCEPT_CHAR (CPP_RSHIFT);
1095           if (get_effective_char (buffer) == '=')
1096             ACCEPT_CHAR (CPP_RSHIFT_EQ);
1097         }
1098       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1099         {
1100           ACCEPT_CHAR (CPP_MAX);
1101           if (get_effective_char (buffer) == '=')
1102             ACCEPT_CHAR (CPP_MAX_EQ);
1103         }
1104       break;
1105
1106     case '%':
1107       lex_percent (buffer, result);
1108       if (result->type == CPP_HASH)
1109         goto do_hash;
1110       break;
1111
1112     case '.':
1113       lex_dot (pfile, result);
1114       break;
1115
1116     case '+':
1117       result->type = CPP_PLUS;
1118       c = get_effective_char (buffer);
1119       if (c == '=')
1120         ACCEPT_CHAR (CPP_PLUS_EQ);
1121       else if (c == '+')
1122         ACCEPT_CHAR (CPP_PLUS_PLUS);
1123       break;
1124
1125     case '-':
1126       result->type = CPP_MINUS;
1127       c = get_effective_char (buffer);
1128       if (c == '>')
1129         {
1130           ACCEPT_CHAR (CPP_DEREF);
1131           if (CPP_OPTION (pfile, cplusplus)
1132               && get_effective_char (buffer) == '*')
1133             ACCEPT_CHAR (CPP_DEREF_STAR);
1134         }
1135       else if (c == '=')
1136         ACCEPT_CHAR (CPP_MINUS_EQ);
1137       else if (c == '-')
1138         ACCEPT_CHAR (CPP_MINUS_MINUS);
1139       break;
1140
1141     case '*':
1142       result->type = CPP_MULT;
1143       if (get_effective_char (buffer) == '=')
1144         ACCEPT_CHAR (CPP_MULT_EQ);
1145       break;
1146
1147     case '=':
1148       result->type = CPP_EQ;
1149       if (get_effective_char (buffer) == '=')
1150         ACCEPT_CHAR (CPP_EQ_EQ);
1151       break;
1152
1153     case '!':
1154       result->type = CPP_NOT;
1155       if (get_effective_char (buffer) == '=')
1156         ACCEPT_CHAR (CPP_NOT_EQ);
1157       break;
1158
1159     case '&':
1160       result->type = CPP_AND;
1161       c = get_effective_char (buffer);
1162       if (c == '=')
1163         ACCEPT_CHAR (CPP_AND_EQ);
1164       else if (c == '&')
1165         ACCEPT_CHAR (CPP_AND_AND);
1166       break;
1167
1168     case '#':
1169       c = buffer->extra_char;   /* Can be set by error condition below.  */
1170       if (c != EOF)
1171         {
1172           buffer->read_ahead = c;
1173           buffer->extra_char = EOF;
1174         }
1175       else
1176         c = get_effective_char (buffer);
1177
1178       if (c == '#')
1179         {
1180           ACCEPT_CHAR (CPP_PASTE);
1181           break;
1182         }
1183
1184       result->type = CPP_HASH;
1185     do_hash:
1186       if (!bol)
1187         break;
1188       /* 6.10.3 paragraph 11: If there are sequences of preprocessing
1189          tokens within the list of arguments that would otherwise act
1190          as preprocessing directives, the behavior is undefined.
1191
1192          This implementation will report a hard error, terminate the
1193          macro invocation, and proceed to process the directive.  */
1194       if (pfile->state.parsing_args)
1195         {
1196           if (pfile->state.parsing_args == 2)
1197             cpp_error (pfile,
1198                        "directives may not be used inside a macro argument");
1199
1200           /* Put a '#' in lookahead, return CPP_EOF for parse_arg.  */
1201           buffer->extra_char = buffer->read_ahead;
1202           buffer->read_ahead = '#';
1203           pfile->state.next_bol = 1;
1204           result->type = CPP_EOF;
1205
1206           /* Get whitespace right - newline_in_args sets it.  */
1207           if (pfile->lexer_pos.col == 1)
1208             result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
1209         }
1210       else
1211         {
1212           /* This is the hash introducing a directive.  */
1213           if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1214             goto done_directive; /* bol still 1.  */
1215           /* This is in fact an assembler #.  */
1216         }
1217       break;
1218
1219     case '|':
1220       result->type = CPP_OR;
1221       c = get_effective_char (buffer);
1222       if (c == '=')
1223         ACCEPT_CHAR (CPP_OR_EQ);
1224       else if (c == '|')
1225         ACCEPT_CHAR (CPP_OR_OR);
1226       break;
1227
1228     case '^':
1229       result->type = CPP_XOR;
1230       if (get_effective_char (buffer) == '=')
1231         ACCEPT_CHAR (CPP_XOR_EQ);
1232       break;
1233
1234     case ':':
1235       result->type = CPP_COLON;
1236       c = get_effective_char (buffer);
1237       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1238         ACCEPT_CHAR (CPP_SCOPE);
1239       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1240         {
1241           result->flags |= DIGRAPH;
1242           ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1243         }
1244       break;
1245
1246     case '~': result->type = CPP_COMPL; break;
1247     case ',': result->type = CPP_COMMA; break;
1248     case '(': result->type = CPP_OPEN_PAREN; break;
1249     case ')': result->type = CPP_CLOSE_PAREN; break;
1250     case '[': result->type = CPP_OPEN_SQUARE; break;
1251     case ']': result->type = CPP_CLOSE_SQUARE; break;
1252     case '{': result->type = CPP_OPEN_BRACE; break;
1253     case '}': result->type = CPP_CLOSE_BRACE; break;
1254     case ';': result->type = CPP_SEMICOLON; break;
1255
1256       /* @ is a punctuator in Objective C.  */
1257     case '@': result->type = CPP_ATSIGN; break;
1258
1259     random_char:
1260     default:
1261       result->type = CPP_OTHER;
1262       result->val.c = c;
1263       break;
1264     }
1265
1266   if (pfile->skipping)
1267     goto skip;
1268
1269   /* If not in a directive, this token invalidates controlling macros.  */
1270   if (!pfile->state.in_directive)
1271     pfile->mi_state = MI_FAILED;
1272 }
1273
1274 /* An upper bound on the number of bytes needed to spell a token,
1275    including preceding whitespace.  */
1276 unsigned int
1277 cpp_token_len (token)
1278      const cpp_token *token;
1279 {
1280   unsigned int len;
1281
1282   switch (TOKEN_SPELL (token))
1283     {
1284     default:            len = 0;                                break;
1285     case SPELL_STRING:  len = token->val.str.len;               break;
1286     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1287     }
1288   /* 1 for whitespace, 4 for comment delimeters.  */
1289   return len + 5;
1290 }
1291
1292 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1293    already contain the enough space to hold the token's spelling.
1294    Returns a pointer to the character after the last character
1295    written.  */
1296 unsigned char *
1297 cpp_spell_token (pfile, token, buffer)
1298      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1299      const cpp_token *token;
1300      unsigned char *buffer;
1301 {
1302   switch (TOKEN_SPELL (token))
1303     {
1304     case SPELL_OPERATOR:
1305       {
1306         const unsigned char *spelling;
1307         unsigned char c;
1308
1309         if (token->flags & DIGRAPH)
1310           spelling
1311             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1312         else if (token->flags & NAMED_OP)
1313           goto spell_ident;
1314         else
1315           spelling = TOKEN_NAME (token);
1316
1317         while ((c = *spelling++) != '\0')
1318           *buffer++ = c;
1319       }
1320       break;
1321
1322     case SPELL_IDENT:
1323       spell_ident:
1324       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1325       buffer += NODE_LEN (token->val.node);
1326       break;
1327
1328     case SPELL_STRING:
1329       {
1330         int left, right, tag;
1331         switch (token->type)
1332           {
1333           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1334           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1335           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1336           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1337           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1338           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1339           }
1340         if (tag) *buffer++ = tag;
1341         if (left) *buffer++ = left;
1342         memcpy (buffer, token->val.str.text, token->val.str.len);
1343         buffer += token->val.str.len;
1344         if (right) *buffer++ = right;
1345       }
1346       break;
1347
1348     case SPELL_CHAR:
1349       *buffer++ = token->val.c;
1350       break;
1351
1352     case SPELL_NONE:
1353       cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1354       break;
1355     }
1356
1357   return buffer;
1358 }
1359
1360 /* Returns a token as a null-terminated string.  The string is
1361    temporary, and automatically freed later.  Useful for diagnostics.  */
1362 unsigned char *
1363 cpp_token_as_text (pfile, token)
1364      cpp_reader *pfile;
1365      const cpp_token *token;
1366 {
1367   unsigned int len = cpp_token_len (token);
1368   unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
1369
1370   end = cpp_spell_token (pfile, token, start);
1371   end[0] = '\0';
1372
1373   return start;
1374 }
1375
1376 /* Used by C front ends.  Should really move to using cpp_token_as_text.  */
1377 const char *
1378 cpp_type2name (type)
1379      enum cpp_ttype type;
1380 {
1381   return (const char *) token_spellings[type].name;
1382 }
1383
1384 /* Writes the spelling of token to FP.  Separate from cpp_spell_token
1385    for efficiency - to avoid double-buffering.  Also, outputs a space
1386    if PREV_WHITE is flagged.  */
1387 void
1388 cpp_output_token (token, fp)
1389      const cpp_token *token;
1390      FILE *fp;
1391 {
1392   if (token->flags & PREV_WHITE)
1393     putc (' ', fp);
1394
1395   switch (TOKEN_SPELL (token))
1396     {
1397     case SPELL_OPERATOR:
1398       {
1399         const unsigned char *spelling;
1400
1401         if (token->flags & DIGRAPH)
1402           spelling
1403             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1404         else if (token->flags & NAMED_OP)
1405           goto spell_ident;
1406         else
1407           spelling = TOKEN_NAME (token);
1408
1409         ufputs (spelling, fp);
1410       }
1411       break;
1412
1413     spell_ident:
1414     case SPELL_IDENT:
1415       ufputs (NODE_NAME (token->val.node), fp);
1416     break;
1417
1418     case SPELL_STRING:
1419       {
1420         int left, right, tag;
1421         switch (token->type)
1422           {
1423           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1424           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1425           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1426           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1427           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1428           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1429           }
1430         if (tag) putc (tag, fp);
1431         if (left) putc (left, fp);
1432         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1433         if (right) putc (right, fp);
1434       }
1435       break;
1436
1437     case SPELL_CHAR:
1438       putc (token->val.c, fp);
1439       break;
1440
1441     case SPELL_NONE:
1442       /* An error, most probably.  */
1443       break;
1444     }
1445 }
1446
1447 /* Compare two tokens.  */
1448 int
1449 _cpp_equiv_tokens (a, b)
1450      const cpp_token *a, *b;
1451 {
1452   if (a->type == b->type && a->flags == b->flags)
1453     switch (TOKEN_SPELL (a))
1454       {
1455       default:                  /* Keep compiler happy.  */
1456       case SPELL_OPERATOR:
1457         return 1;
1458       case SPELL_CHAR:
1459         return a->val.c == b->val.c; /* Character.  */
1460       case SPELL_NONE:
1461         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1462       case SPELL_IDENT:
1463         return a->val.node == b->val.node;
1464       case SPELL_STRING:
1465         return (a->val.str.len == b->val.str.len
1466                 && !memcmp (a->val.str.text, b->val.str.text,
1467                             a->val.str.len));
1468       }
1469
1470   return 0;
1471 }
1472
1473 /* Determine whether two tokens can be pasted together, and if so,
1474    what the resulting token is.  Returns CPP_EOF if the tokens cannot
1475    be pasted, or the appropriate type for the merged token if they
1476    can.  */
1477 enum cpp_ttype
1478 cpp_can_paste (pfile, token1, token2, digraph)
1479      cpp_reader * pfile;
1480      const cpp_token *token1, *token2;
1481      int* digraph;
1482 {
1483   enum cpp_ttype a = token1->type, b = token2->type;
1484   int cxx = CPP_OPTION (pfile, cplusplus);
1485
1486   /* Treat named operators as if they were ordinary NAMEs.  */
1487   if (token1->flags & NAMED_OP)
1488     a = CPP_NAME;
1489   if (token2->flags & NAMED_OP)
1490     b = CPP_NAME;
1491
1492   if ((int) a <= (int) CPP_LAST_EQ && b == CPP_EQ)
1493     return (enum cpp_ttype) ((int) a + ((int) CPP_EQ_EQ - (int) CPP_EQ));
1494
1495   switch (a)
1496     {
1497     case CPP_GREATER:
1498       if (b == a) return CPP_RSHIFT;
1499       if (b == CPP_QUERY && cxx)        return CPP_MAX;
1500       if (b == CPP_GREATER_EQ)  return CPP_RSHIFT_EQ;
1501       break;
1502     case CPP_LESS:
1503       if (b == a) return CPP_LSHIFT;
1504       if (b == CPP_QUERY && cxx)        return CPP_MIN;
1505       if (b == CPP_LESS_EQ)     return CPP_LSHIFT_EQ;
1506       if (CPP_OPTION (pfile, digraphs))
1507         {
1508           if (b == CPP_COLON)
1509             {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1510           if (b == CPP_MOD)
1511             {*digraph = 1; return CPP_OPEN_BRACE;}      /* <% digraph */
1512         }
1513       break;
1514
1515     case CPP_PLUS: if (b == a)  return CPP_PLUS_PLUS; break;
1516     case CPP_AND:  if (b == a)  return CPP_AND_AND; break;
1517     case CPP_OR:   if (b == a)  return CPP_OR_OR;   break;
1518
1519     case CPP_MINUS:
1520       if (b == a)               return CPP_MINUS_MINUS;
1521       if (b == CPP_GREATER)     return CPP_DEREF;
1522       break;
1523     case CPP_COLON:
1524       if (b == a && cxx)        return CPP_SCOPE;
1525       if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1526         {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1527       break;
1528
1529     case CPP_MOD:
1530       if (CPP_OPTION (pfile, digraphs))
1531         {
1532           if (b == CPP_GREATER)
1533             {*digraph = 1; return CPP_CLOSE_BRACE;}  /* %> digraph */
1534           if (b == CPP_COLON)
1535             {*digraph = 1; return CPP_HASH;}         /* %: digraph */
1536         }
1537       break;
1538     case CPP_DEREF:
1539       if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1540       break;
1541     case CPP_DOT:
1542       if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1543       if (b == CPP_NUMBER)      return CPP_NUMBER;
1544       break;
1545
1546     case CPP_HASH:
1547       if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1548         /* %:%: digraph */
1549         {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1550       break;
1551
1552     case CPP_NAME:
1553       if (b == CPP_NAME)        return CPP_NAME;
1554       if (b == CPP_NUMBER
1555           && name_p (pfile, &token2->val.str)) return CPP_NAME;
1556       if (b == CPP_CHAR
1557           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1558       if (b == CPP_STRING
1559           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1560       break;
1561
1562     case CPP_NUMBER:
1563       if (b == CPP_NUMBER)      return CPP_NUMBER;
1564       if (b == CPP_NAME)        return CPP_NUMBER;
1565       if (b == CPP_DOT)         return CPP_NUMBER;
1566       /* Numbers cannot have length zero, so this is safe.  */
1567       if ((b == CPP_PLUS || b == CPP_MINUS)
1568           && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1569         return CPP_NUMBER;
1570       break;
1571
1572     default:
1573       break;
1574     }
1575
1576   return CPP_EOF;
1577 }
1578
1579 /* Returns nonzero if a space should be inserted to avoid an
1580    accidental token paste for output.  For simplicity, it is
1581    conservative, and occasionally advises a space where one is not
1582    needed, e.g. "." and ".2".  */
1583
1584 int
1585 cpp_avoid_paste (pfile, token1, token2)
1586      cpp_reader *pfile;
1587      const cpp_token *token1, *token2;
1588 {
1589   enum cpp_ttype a = token1->type, b = token2->type;
1590   cppchar_t c;
1591
1592   if (token1->flags & NAMED_OP)
1593     a = CPP_NAME;
1594   if (token2->flags & NAMED_OP)
1595     b = CPP_NAME;
1596
1597   c = EOF;
1598   if (token2->flags & DIGRAPH)
1599     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1600   else if (token_spellings[b].category == SPELL_OPERATOR)
1601     c = token_spellings[b].name[0];
1602
1603   /* Quickly get everything that can paste with an '='.  */
1604   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1605     return 1;
1606
1607   switch (a)
1608     {
1609     case CPP_GREATER:   return c == '>' || c == '?';
1610     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1611     case CPP_PLUS:      return c == '+';
1612     case CPP_MINUS:     return c == '-' || c == '>';
1613     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1614     case CPP_MOD:       return c == ':' || c == '>';
1615     case CPP_AND:       return c == '&';
1616     case CPP_OR:        return c == '|';
1617     case CPP_COLON:     return c == ':' || c == '>';
1618     case CPP_DEREF:     return c == '*';
1619     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1620     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1621     case CPP_NAME:      return ((b == CPP_NUMBER
1622                                  && name_p (pfile, &token2->val.str))
1623                                 || b == CPP_NAME
1624                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1625     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1626                                 || c == '.' || c == '+' || c == '-');
1627     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1628                                 && token1->val.c == '@'
1629                                 && (b == CPP_NAME || b == CPP_STRING));
1630     default:            break;
1631     }
1632
1633   return 0;
1634 }
1635
1636 /* Output all the remaining tokens on the current line, and a newline
1637    character, to FP.  Leading whitespace is removed.  */
1638 void
1639 cpp_output_line (pfile, fp)
1640      cpp_reader *pfile;
1641      FILE *fp;
1642 {
1643   cpp_token token;
1644
1645   cpp_get_token (pfile, &token);
1646   token.flags &= ~PREV_WHITE;
1647   while (token.type != CPP_EOF)
1648     {
1649       cpp_output_token (&token, fp);
1650       cpp_get_token (pfile, &token);
1651     }
1652
1653   putc ('\n', fp);
1654 }
1655
1656 /* Returns the value of a hexadecimal digit.  */
1657 static unsigned int
1658 hex_digit_value (c)
1659      unsigned int c;
1660 {
1661   if (c >= 'a' && c <= 'f')
1662     return c - 'a' + 10;
1663   if (c >= 'A' && c <= 'F')
1664     return c - 'A' + 10;
1665   if (c >= '0' && c <= '9')
1666     return c - '0';
1667   abort ();
1668 }
1669
1670 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.  Returns 1 to indicate
1671    failure if cpplib is not parsing C++ or C99.  Such failure is
1672    silent, and no variables are updated.  Otherwise returns 0, and
1673    warns if -Wtraditional.
1674
1675    [lex.charset]: The character designated by the universal character
1676    name \UNNNNNNNN is that character whose character short name in
1677    ISO/IEC 10646 is NNNNNNNN; the character designated by the
1678    universal character name \uNNNN is that character whose character
1679    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
1680    for a universal character name is less than 0x20 or in the range
1681    0x7F-0x9F (inclusive), or if the universal character name
1682    designates a character in the basic source character set, then the
1683    program is ill-formed.
1684
1685    We assume that wchar_t is Unicode, so we don't need to do any
1686    mapping.  Is this ever wrong?
1687
1688    PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1689    LIMIT is the end of the string or charconst.  PSTR is updated to
1690    point after the UCS on return, and the UCS is written into PC.  */
1691
1692 static int
1693 maybe_read_ucs (pfile, pstr, limit, pc)
1694      cpp_reader *pfile;
1695      const unsigned char **pstr;
1696      const unsigned char *limit;
1697      unsigned int *pc;
1698 {
1699   const unsigned char *p = *pstr;
1700   unsigned int code = 0;
1701   unsigned int c = *pc, length;
1702
1703   /* Only attempt to interpret a UCS for C++ and C99.  */
1704   if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1705     return 1;
1706
1707   if (CPP_WTRADITIONAL (pfile))
1708     cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
1709
1710   length = (c == 'u' ? 4: 8);
1711
1712   if ((size_t) (limit - p) < length)
1713     {
1714       cpp_error (pfile, "incomplete universal-character-name");
1715       /* Skip to the end to avoid more diagnostics.  */
1716       p = limit;
1717     }
1718   else
1719     {
1720       for (; length; length--, p++)
1721         {
1722           c = *p;
1723           if (ISXDIGIT (c))
1724             code = (code << 4) + hex_digit_value (c);
1725           else
1726             {
1727               cpp_error (pfile,
1728                          "non-hex digit '%c' in universal-character-name", c);
1729               /* We shouldn't skip in case there are multibyte chars.  */
1730               break;
1731             }
1732         }
1733     }
1734
1735 #ifdef TARGET_EBCDIC
1736   cpp_error (pfile, "universal-character-name on EBCDIC target");
1737   code = 0x3f;  /* EBCDIC invalid character */
1738 #else
1739  /* True extended characters are OK.  */
1740   if (code >= 0xa0
1741       && !(code & 0x80000000)
1742       && !(code >= 0xD800 && code <= 0xDFFF))
1743     ;
1744   /* The standard permits $, @ and ` to be specified as UCNs.  We use
1745      hex escapes so that this also works with EBCDIC hosts.  */
1746   else if (code == 0x24 || code == 0x40 || code == 0x60)
1747     ;
1748   /* Don't give another error if one occurred above.  */
1749   else if (length == 0)
1750     cpp_error (pfile, "universal-character-name out of range");
1751 #endif
1752
1753   *pstr = p;
1754   *pc = code;
1755   return 0;
1756 }
1757
1758 /* Interpret an escape sequence, and return its value.  PSTR points to
1759    the input pointer, which is just after the backslash.  LIMIT is how
1760    much text we have.  MASK is a bitmask for the precision for the
1761    destination type (char or wchar_t).  TRADITIONAL, if true, does not
1762    interpret escapes that did not exist in traditional C.
1763
1764    Handles all relevant diagnostics.  */
1765
1766 unsigned int
1767 cpp_parse_escape (pfile, pstr, limit, mask, traditional)
1768      cpp_reader *pfile;
1769      const unsigned char **pstr;
1770      const unsigned char *limit;
1771      unsigned HOST_WIDE_INT mask;
1772      int traditional;
1773 {
1774   int unknown = 0;
1775   const unsigned char *str = *pstr;
1776   unsigned int c = *str++;
1777
1778   switch (c)
1779     {
1780     case '\\': case '\'': case '"': case '?': break;
1781     case 'b': c = TARGET_BS;      break;
1782     case 'f': c = TARGET_FF;      break;
1783     case 'n': c = TARGET_NEWLINE; break;
1784     case 'r': c = TARGET_CR;      break;
1785     case 't': c = TARGET_TAB;     break;
1786     case 'v': c = TARGET_VT;      break;
1787
1788     case '(': case '{': case '[': case '%':
1789       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1790          '\%' is used to prevent SCCS from getting confused.  */
1791       unknown = CPP_PEDANTIC (pfile);
1792       break;
1793
1794     case 'a':
1795       if (CPP_WTRADITIONAL (pfile))
1796         cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1797       if (!traditional)
1798         c = TARGET_BELL;
1799       break;
1800
1801     case 'e': case 'E':
1802       if (CPP_PEDANTIC (pfile))
1803         cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1804       c = TARGET_ESC;
1805       break;
1806
1807     case 'u': case 'U':
1808       unknown = maybe_read_ucs (pfile, &str, limit, &c);
1809       break;
1810
1811     case 'x':
1812       if (CPP_WTRADITIONAL (pfile))
1813         cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1814
1815       if (!traditional)
1816         {
1817           unsigned int i = 0, overflow = 0;
1818           int digits_found = 0;
1819
1820           while (str < limit)
1821             {
1822               c = *str;
1823               if (! ISXDIGIT (c))
1824                 break;
1825               str++;
1826               overflow |= i ^ (i << 4 >> 4);
1827               i = (i << 4) + hex_digit_value (c);
1828               digits_found = 1;
1829             }
1830
1831           if (!digits_found)
1832             cpp_error (pfile, "\\x used with no following hex digits");
1833
1834           if (overflow | (i != (i & mask)))
1835             {
1836               cpp_pedwarn (pfile, "hex escape sequence out of range");
1837               i &= mask;
1838             }
1839           c = i;
1840         }
1841       break;
1842
1843     case '0':  case '1':  case '2':  case '3':
1844     case '4':  case '5':  case '6':  case '7':
1845       {
1846         unsigned int i = c - '0';
1847         int count = 0;
1848
1849         while (str < limit && ++count < 3)
1850           {
1851             c = *str;
1852             if (c < '0' || c > '7')
1853               break;
1854             str++;
1855             i = (i << 3) + c - '0';
1856           }
1857
1858         if (i != (i & mask))
1859           {
1860             cpp_pedwarn (pfile, "octal escape sequence out of range");
1861             i &= mask;
1862           }
1863         c = i;
1864       }
1865       break;
1866
1867     default:
1868       unknown = 1;
1869       break;
1870     }
1871
1872   if (unknown)
1873     {
1874       if (ISGRAPH (c))
1875         cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1876       else
1877         cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1878     }
1879
1880   if (c > mask)
1881     cpp_pedwarn (pfile, "escape sequence out of range for character");
1882
1883   *pstr = str;
1884   return c;
1885 }
1886
1887 #ifndef MAX_CHAR_TYPE_SIZE
1888 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1889 #endif
1890
1891 #ifndef MAX_WCHAR_TYPE_SIZE
1892 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1893 #endif
1894
1895 /* Interpret a (possibly wide) character constant in TOKEN.
1896    WARN_MULTI warns about multi-character charconsts, if not
1897    TRADITIONAL.  TRADITIONAL also indicates not to interpret escapes
1898    that did not exist in traditional C.  PCHARS_SEEN points to a
1899    variable that is filled in with the number of characters seen.  */
1900 HOST_WIDE_INT
1901 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1902      cpp_reader *pfile;
1903      const cpp_token *token;
1904      int warn_multi;
1905      int traditional;
1906      unsigned int *pchars_seen;
1907 {
1908   const unsigned char *str = token->val.str.text;
1909   const unsigned char *limit = str + token->val.str.len;
1910   unsigned int chars_seen = 0;
1911   unsigned int width, max_chars, c;
1912   unsigned HOST_WIDE_INT mask;
1913   HOST_WIDE_INT result = 0;
1914
1915 #ifdef MULTIBYTE_CHARS
1916   (void) local_mbtowc (NULL, NULL, 0);
1917 #endif
1918
1919   /* Width in bits.  */
1920   if (token->type == CPP_CHAR)
1921     width = MAX_CHAR_TYPE_SIZE;
1922   else
1923     width = MAX_WCHAR_TYPE_SIZE;
1924
1925   if (width < HOST_BITS_PER_WIDE_INT)
1926     mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1927   else
1928     mask = ~0;
1929   max_chars = HOST_BITS_PER_WIDE_INT / width;
1930
1931   while (str < limit)
1932     {
1933 #ifdef MULTIBYTE_CHARS
1934       wchar_t wc;
1935       int char_len;
1936
1937       char_len = local_mbtowc (&wc, str, limit - str);
1938       if (char_len == -1)
1939         {
1940           cpp_warning (pfile, "ignoring invalid multibyte character");
1941           c = *str++;
1942         }
1943       else
1944         {
1945           str += char_len;
1946           c = wc;
1947         }
1948 #else
1949       c = *str++;
1950 #endif
1951
1952       if (c == '\\')
1953         c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
1954
1955 #ifdef MAP_CHARACTER
1956       if (ISPRINT (c))
1957         c = MAP_CHARACTER (c);
1958 #endif
1959
1960       /* Merge character into result; ignore excess chars.  */
1961       if (++chars_seen <= max_chars)
1962         {
1963           if (width < HOST_BITS_PER_WIDE_INT)
1964             result = (result << width) | (c & mask);
1965           else
1966             result = c;
1967         }
1968     }
1969
1970   if (chars_seen == 0)
1971     cpp_error (pfile, "empty character constant");
1972   else if (chars_seen > max_chars)
1973     {
1974       chars_seen = max_chars;
1975       cpp_warning (pfile, "character constant too long");
1976     }
1977   else if (chars_seen > 1 && !traditional && warn_multi)
1978     cpp_warning (pfile, "multi-character character constant");
1979
1980   /* If char type is signed, sign-extend the constant.  The
1981      __CHAR_UNSIGNED__ macro is set by the driver if appropriate.  */
1982   if (token->type == CPP_CHAR && chars_seen)
1983     {
1984       unsigned int nbits = chars_seen * width;
1985       unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
1986
1987       if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
1988           || ((result >> (nbits - 1)) & 1) == 0)
1989         result &= mask;
1990       else
1991         result |= ~mask;
1992     }
1993
1994   *pchars_seen = chars_seen;
1995   return result;
1996 }
1997
1998 /* Memory pools.  */
1999
2000 struct dummy
2001 {
2002   char c;
2003   union
2004   {
2005     double d;
2006     int *p;
2007   } u;
2008 };
2009
2010 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2011
2012 static int
2013 chunk_suitable (pool, chunk, size)
2014      cpp_pool *pool;
2015      cpp_chunk *chunk;
2016      unsigned int size;
2017 {
2018   /* Being at least twice SIZE means we can use memcpy in
2019      _cpp_next_chunk rather than memmove.  Besides, it's a good idea
2020      anyway.  */
2021   return (chunk && pool->locked != chunk
2022           && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
2023 }
2024
2025 /* Returns the end of the new pool.  PTR points to a char in the old
2026    pool, and is updated to point to the same char in the new pool.  */
2027 unsigned char *
2028 _cpp_next_chunk (pool, len, ptr)
2029      cpp_pool *pool;
2030      unsigned int len;
2031      unsigned char **ptr;
2032 {
2033   cpp_chunk *chunk = pool->cur->next;
2034
2035   /* LEN is the minimum size we want in the new pool.  */
2036   len += POOL_ROOM (pool);
2037   if (! chunk_suitable (pool, chunk, len))
2038     {
2039       chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
2040
2041       chunk->next = pool->cur->next;
2042       pool->cur->next = chunk;
2043     }
2044
2045   /* Update the pointer before changing chunk's front.  */
2046   if (ptr)
2047     *ptr += chunk->base - POOL_FRONT (pool);
2048
2049   memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
2050   chunk->front = chunk->base;
2051
2052   pool->cur = chunk;
2053   return POOL_LIMIT (pool);
2054 }
2055
2056 static cpp_chunk *
2057 new_chunk (size)
2058      unsigned int size;
2059 {
2060   unsigned char *base;
2061   cpp_chunk *result;
2062
2063   size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
2064   base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
2065   /* Put the chunk descriptor at the end.  Then chunk overruns will
2066      cause obvious chaos.  */
2067   result = (cpp_chunk *) (base + size);
2068   result->base = base;
2069   result->front = base;
2070   result->limit = base + size;
2071   result->next = 0;
2072
2073   return result;
2074 }
2075
2076 void
2077 _cpp_init_pool (pool, size, align, temp)
2078      cpp_pool *pool;
2079      unsigned int size, align, temp;
2080 {
2081   if (align == 0)
2082     align = DEFAULT_ALIGNMENT;
2083   if (align & (align - 1))
2084     abort ();
2085   pool->align = align;
2086   pool->cur = new_chunk (size);
2087   pool->locked = 0;
2088   pool->locks = 0;
2089   if (temp)
2090     pool->cur->next = pool->cur;
2091 }
2092
2093 void
2094 _cpp_lock_pool (pool)
2095      cpp_pool *pool;
2096 {
2097   if (pool->locks++ == 0)
2098     pool->locked = pool->cur;
2099 }
2100
2101 void
2102 _cpp_unlock_pool (pool)
2103      cpp_pool *pool;
2104 {
2105   if (--pool->locks == 0)
2106     pool->locked = 0;
2107 }
2108
2109 void
2110 _cpp_free_pool (pool)
2111      cpp_pool *pool;
2112 {
2113   cpp_chunk *chunk = pool->cur, *next;
2114
2115   do
2116     {
2117       next = chunk->next;
2118       free (chunk->base);
2119       chunk = next;
2120     }
2121   while (chunk && chunk != pool->cur);
2122 }
2123
2124 /* Reserve LEN bytes from a memory pool.  */
2125 unsigned char *
2126 _cpp_pool_reserve (pool, len)
2127      cpp_pool *pool;
2128      unsigned int len;
2129 {
2130   len = POOL_ALIGN (len, pool->align);
2131   if (len > (unsigned int) POOL_ROOM (pool))
2132     _cpp_next_chunk (pool, len, 0);
2133
2134   return POOL_FRONT (pool);
2135 }
2136
2137 /* Allocate LEN bytes from a memory pool.  */
2138 unsigned char *
2139 _cpp_pool_alloc (pool, len)
2140      cpp_pool *pool;
2141      unsigned int len;
2142 {
2143   unsigned char *result = _cpp_pool_reserve (pool, len);
2144
2145   POOL_COMMIT (pool, len);
2146   return result;
2147 }