gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 /* This lexer works with a single pass of the file.  Recently I
  24    re-wrote it to minimize the places where we step backwards in the
  25    input stream, to make future changes to support multi-byte
  26    character sets fairly straight-forward.
  27
  28    There is now only one routine where we do step backwards:
  29    skip_escaped_newlines.  This routine could probably also be changed
  30    so that it doesn't need to step back.  One possibility is to use a
  31    trick similar to that used in lex_period and lex_percent.  Two
  32    extra characters might be needed, but skip_escaped_newlines itself
  33    would probably be the only place that needs to be aware of that,
  34    and changes to the remaining routines would probably only be needed
  35    if they process a backslash.  */
  36
  37 #include "config.h"
  38 #include "system.h"
  39 #include "cpplib.h"
  40 #include "cpphash.h"
  41 #include "symcat.h"
  42
  43 /* Tokens with SPELL_STRING store their spelling in the token list,
  44    and it's length in the token->val.name.len.  */
  45 enum spell_type
  46 {
  47   SPELL_OPERATOR = 0,
  48   SPELL_CHAR,
  49   SPELL_IDENT,
  50   SPELL_STRING,
  51   SPELL_NONE
  52 };
  53
  54 struct token_spelling
  55 {
  56   enum spell_type category;
  57   const unsigned char *name;
  58 };
  59
  60 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
  61                                              U":>", U"<%", U"%>"};
  62
  63 #define OP(e, s) { SPELL_OPERATOR, U s           },
  64 #define TK(e, s) { s,              U STRINGX (e) },
  65 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
  66 #undef OP
  67 #undef TK
  68
  69 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  70 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  71
  72 static cppchar_t handle_newline PARAMS ((cpp_buffer *, cppchar_t));
  73 static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
  74 static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
  75
  76 static int skip_block_comment PARAMS ((cpp_reader *));
  77 static int skip_line_comment PARAMS ((cpp_reader *));
  78 static void adjust_column PARAMS ((cpp_reader *));
  79 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  80 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
  81 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
  82 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
  83 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  84 static void unterminated PARAMS ((cpp_reader *, int));
  85 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
  86 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
  87 static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
  88 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
  89 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
  90
  91 static cpp_chunk *new_chunk PARAMS ((unsigned int));
  92 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
  93
  94 /* Utility routine:
  95
  96    Compares, the token TOKEN to the NUL-terminated string STRING.
  97    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  98
  99 int
 100 cpp_ideq (token, string)
 101      const cpp_token *token;
 102      const char *string;
 103 {
 104   if (token->type != CPP_NAME)
 105     return 0;
 106
 107   return !ustrcmp (token->val.node->name, (const U_CHAR *) string);
 108 }
 109
 110 /* Call when meeting a newline.  Returns the character after the newline
 111    (or carriage-return newline combination), or EOF.  */
 112 static cppchar_t
 113 handle_newline (buffer, newline_char)
 114      cpp_buffer *buffer;
 115      cppchar_t newline_char;
 116 {
 117   cppchar_t next = EOF;
 118
 119   buffer->col_adjust = 0;
 120   buffer->lineno++;
 121   buffer->line_base = buffer->cur;
 122
 123   /* Handle CR-LF and LF-CR combinations, get the next character.  */
 124   if (buffer->cur < buffer->rlimit)
 125     {
 126       next = *buffer->cur++;
 127       if (next + newline_char == '\r' + '\n')
 128         {
 129           buffer->line_base = buffer->cur;
 130           if (buffer->cur < buffer->rlimit)
 131             next = *buffer->cur++;
 132           else
 133             next = EOF;
 134         }
 135     }
 136
 137   buffer->read_ahead = next;
 138   return next;
 139 }
 140
 141 /* Subroutine of skip_escaped_newlines; called when a trigraph is
 142    encountered.  It warns if necessary, and returns true if the
 143    trigraph should be honoured.  FROM_CHAR is the third character of a
 144    trigraph, and presumed to be the previous character for position
 145    reporting.  */
 146 static int
 147 trigraph_ok (pfile, from_char)
 148      cpp_reader *pfile;
 149      cppchar_t from_char;
 150 {
 151   int accept = CPP_OPTION (pfile, trigraphs);
 152
 153   /* Don't warn about trigraphs in comments.  */
 154   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 155     {
 156       cpp_buffer *buffer = pfile->buffer;
 157       if (accept)
 158         cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
 159                                "trigraph ??%c converted to %c",
 160                                (int) from_char,
 161                                (int) _cpp_trigraph_map[from_char]);
 162       else
 163         cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
 164                                "trigraph ??%c ignored", (int) from_char);
 165     }
 166
 167   return accept;
 168 }
 169
 170 /* Assumes local variables buffer and result.  */
 171 #define ACCEPT_CHAR(t) \
 172   do { result->type = t; buffer->read_ahead = EOF; } while (0)
 173
 174 /* When we move to multibyte character sets, add to these something
 175    that saves and restores the state of the multibyte conversion
 176    library.  This probably involves saving and restoring a "cookie".
 177    In the case of glibc it is an 8-byte structure, so is not a high
 178    overhead operation.  In any case, it's out of the fast path.  */
 179 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
 180 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
 181
 182 /* Skips any escaped newlines introduced by NEXT, which is either a
 183    '?' or a '\\'.  Returns the next character, which will also have
 184    been placed in buffer->read_ahead.  This routine performs
 185    preprocessing stages 1 and 2 of the ISO C standard.  */
 186 static cppchar_t
 187 skip_escaped_newlines (buffer, next)
 188      cpp_buffer *buffer;
 189      cppchar_t next;
 190 {
 191   /* Only do this if we apply stages 1 and 2.  */
 192   if (!buffer->from_stage3)
 193     {
 194       cppchar_t next1;
 195       const unsigned char *saved_cur;
 196       int space;
 197
 198       do
 199         {
 200           if (buffer->cur == buffer->rlimit)
 201             break;
 202
 203           SAVE_STATE ();
 204           if (next == '?')
 205             {
 206               next1 = *buffer->cur++;
 207               if (next1 != '?' || buffer->cur == buffer->rlimit)
 208                 {
 209                   RESTORE_STATE ();
 210                   break;
 211                 }
 212
 213               next1 = *buffer->cur++;
 214               if (!_cpp_trigraph_map[next1]
 215                   || !trigraph_ok (buffer->pfile, next1))
 216                 {
 217                   RESTORE_STATE ();
 218                   break;
 219                 }
 220
 221               /* We have a full trigraph here.  */
 222               next = _cpp_trigraph_map[next1];
 223               if (next != '\\' || buffer->cur == buffer->rlimit)
 224                 break;
 225               SAVE_STATE ();
 226             }
 227
 228           /* We have a backslash, and room for at least one more character.  */
 229           space = 0;
 230           do
 231             {
 232               next1 = *buffer->cur++;
 233               if (!is_nvspace (next1))
 234                 break;
 235               space = 1;
 236             }
 237           while (buffer->cur < buffer->rlimit);
 238
 239           if (!is_vspace (next1))
 240             {
 241               RESTORE_STATE ();
 242               break;
 243             }
 244
 245           if (space)
 246             cpp_warning (buffer->pfile,
 247                          "backslash and newline separated by space");
 248
 249           next = handle_newline (buffer, next1);
 250           if (next == EOF)
 251             cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
 252         }
 253       while (next == '\\' || next == '?');
 254     }
 255
 256   buffer->read_ahead = next;
 257   return next;
 258 }
 259
 260 /* Obtain the next character, after trigraph conversion and skipping
 261    an arbitrary string of escaped newlines.  The common case of no
 262    trigraphs or escaped newlines falls through quickly.  */
 263 static cppchar_t
 264 get_effective_char (buffer)
 265      cpp_buffer *buffer;
 266 {
 267   cppchar_t next = EOF;
 268
 269   if (buffer->cur < buffer->rlimit)
 270     {
 271       next = *buffer->cur++;
 272
 273       /* '?' can introduce trigraphs (and therefore backslash); '\\'
 274          can introduce escaped newlines, which we want to skip, or
 275          UCNs, which, depending upon lexer state, we will handle in
 276          the future.  */
 277       if (next == '?' || next == '\\')
 278         next = skip_escaped_newlines (buffer, next);
 279     }
 280
 281   buffer->read_ahead = next;
 282   return next;
 283 }
 284
 285 /* Skip a C-style block comment.  We find the end of the comment by
 286    seeing if an asterisk is before every '/' we encounter.  Returns
 287    non-zero if comment terminated by EOF, zero otherwise.  */
 288 static int
 289 skip_block_comment (pfile)
 290      cpp_reader *pfile;
 291 {
 292   cpp_buffer *buffer = pfile->buffer;
 293   cppchar_t c = EOF, prevc = EOF;
 294
 295   pfile->state.lexing_comment = 1;
 296   while (buffer->cur != buffer->rlimit)
 297     {
 298       prevc = c, c = *buffer->cur++;
 299
 300     next_char:
 301       /* FIXME: For speed, create a new character class of characters
 302          of interest inside block comments.  */
 303       if (c == '?' || c == '\\')
 304         c = skip_escaped_newlines (buffer, c);
 305
 306       /* People like decorating comments with '*', so check for '/'
 307          instead for efficiency.  */
 308       if (c == '/')
 309         {
 310           if (prevc == '*')
 311             break;
 312
 313           /* Warn about potential nested comments, but not if the '/'
 314              comes immediately before the true comment delimeter.
 315              Don't bother to get it right across escaped newlines.  */
 316           if (CPP_OPTION (pfile, warn_comments)
 317               && buffer->cur != buffer->rlimit)
 318             {
 319               prevc = c, c = *buffer->cur++;
 320               if (c == '*' && buffer->cur != buffer->rlimit)
 321                 {
 322                   prevc = c, c = *buffer->cur++;
 323                   if (c != '/')
 324                     cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
 325                                            CPP_BUF_COL (buffer),
 326                                            "\"/*\" within comment");
 327                 }
 328               goto next_char;
 329             }
 330         }
 331       else if (is_vspace (c))
 332         {
 333           prevc = c, c = handle_newline (buffer, c);
 334           goto next_char;
 335         }
 336       else if (c == '\t')
 337         adjust_column (pfile);
 338     }
 339
 340   pfile->state.lexing_comment = 0;
 341   buffer->read_ahead = EOF;
 342   return c != '/' || prevc != '*';
 343 }
 344
 345 /* Skip a C++ line comment.  Handles escaped newlines.  Returns
 346    non-zero if a multiline comment.  The following new line, if any,
 347    is left in buffer->read_ahead.  */
 348 static int
 349 skip_line_comment (pfile)
 350      cpp_reader *pfile;
 351 {
 352   cpp_buffer *buffer = pfile->buffer;
 353   unsigned int orig_lineno = buffer->lineno;
 354   cppchar_t c;
 355
 356   pfile->state.lexing_comment = 1;
 357   do
 358     {
 359       c = EOF;
 360       if (buffer->cur == buffer->rlimit)
 361         break;
 362
 363       c = *buffer->cur++;
 364       if (c == '?' || c == '\\')
 365         c = skip_escaped_newlines (buffer, c);
 366     }
 367   while (!is_vspace (c));
 368
 369   pfile->state.lexing_comment = 0;
 370   buffer->read_ahead = c;       /* Leave any newline for caller.  */
 371   return orig_lineno != buffer->lineno;
 372 }
 373
 374 /* pfile->buffer->cur is one beyond the \t character.  Update
 375    col_adjust so we track the column correctly.  */
 376 static void
 377 adjust_column (pfile)
 378      cpp_reader *pfile;
 379 {
 380   cpp_buffer *buffer = pfile->buffer;
 381   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 382
 383   /* Round it up to multiple of the tabstop, but subtract 1 since the
 384      tab itself occupies a character position.  */
 385   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 386                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 387 }
 388
 389 /* Skips whitespace, saving the next non-whitespace character.
 390    Adjusts pfile->col_adjust to account for tabs.  Without this,
 391    tokens might be assigned an incorrect column.  */
 392 static void
 393 skip_whitespace (pfile, c)
 394      cpp_reader *pfile;
 395      cppchar_t c;
 396 {
 397   cpp_buffer *buffer = pfile->buffer;
 398   unsigned int warned = 0;
 399
 400   do
 401     {
 402       /* Horizontal space always OK.  */
 403       if (c == ' ')
 404         ;
 405       else if (c == '\t')
 406         adjust_column (pfile);
 407       /* Just \f \v or \0 left.  */
 408       else if (c == '\0')
 409         {
 410           if (!warned)
 411             {
 412               cpp_warning (pfile, "null character(s) ignored");
 413               warned = 1;
 414             }
 415         }
 416       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 417         cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
 418                                CPP_BUF_COL (buffer),
 419                                "%s in preprocessing directive",
 420                                c == '\f' ? "form feed" : "vertical tab");
 421
 422       c = EOF;
 423       if (buffer->cur == buffer->rlimit)
 424         break;
 425       c = *buffer->cur++;
 426     }
 427   /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
 428   while (is_nvspace (c));
 429
 430   /* Remember the next character.  */
 431   buffer->read_ahead = c;
 432 }
 433
 434 /* See if the characters of a number token are valid in a name (no
 435    '.', '+' or '-').  */
 436 static int
 437 name_p (pfile, string)
 438      cpp_reader *pfile;
 439      const cpp_string *string;
 440 {
 441   unsigned int i;
 442
 443   for (i = 0; i < string->len; i++)
 444     if (!is_idchar (string->text[i]))
 445       return 0;
 446
 447   return 1;
 448 }
 449
 450 /* Parse an identifier, skipping embedded backslash-newlines.
 451    Calculate the hash value of the token while parsing, for improved
 452    performance.  The hashing algorithm *must* match cpp_lookup().  */
 453
 454 static cpp_hashnode *
 455 parse_identifier (pfile, c)
 456      cpp_reader *pfile;
 457      cppchar_t c;
 458 {
 459   cpp_hashnode *result;
 460   cpp_buffer *buffer = pfile->buffer;
 461   unsigned char *dest, *limit;
 462   unsigned int r = 0, saw_dollar = 0;
 463
 464   dest = POOL_FRONT (&pfile->ident_pool);
 465   limit = POOL_LIMIT (&pfile->ident_pool);
 466
 467   do
 468     {
 469       do
 470         {
 471           /* Need room for terminating null.  */
 472           if (dest + 1 >= limit)
 473             limit = _cpp_next_chunk (&pfile->ident_pool, 0, &dest);
 474
 475           *dest++ = c;
 476           r = HASHSTEP (r, c);
 477
 478           if (c == '$')
 479             saw_dollar++;
 480
 481           c = EOF;
 482           if (buffer->cur == buffer->rlimit)
 483             break;
 484
 485           c = *buffer->cur++;
 486         }
 487       while (is_idchar (c));
 488
 489       /* Potential escaped newline?  */
 490       if (c != '?' && c != '\\')
 491         break;
 492       c = skip_escaped_newlines (buffer, c);
 493     }
 494   while (is_idchar (c));
 495
 496   /* Remember the next character.  */
 497   buffer->read_ahead = c;
 498
 499   /* $ is not a identifier character in the standard, but is commonly
 500      accepted as an extension.  Don't warn about it in skipped
 501      conditional blocks.  */
 502   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->skipping)
 503     cpp_pedwarn (pfile, "'$' character(s) in identifier");
 504
 505   /* Identifiers are null-terminated.  */
 506   *dest = '\0';
 507
 508   /* This routine commits the memory if necessary.  */
 509   result = _cpp_lookup_with_hash (pfile,
 510                                   dest - POOL_FRONT (&pfile->ident_pool), r);
 511
 512   /* Some identifiers require diagnostics when lexed.  */
 513   if (result->flags & NODE_DIAGNOSTIC && !pfile->skipping)
 514     {
 515       /* It is allowed to poison the same identifier twice.  */
 516       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 517         cpp_error (pfile, "attempt to use poisoned \"%s\"", result->name);
 518
 519       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 520          replacement list of a variable-arguments macro.  */
 521       if (result == pfile->spec_nodes.n__VA_ARGS__
 522           && !pfile->state.va_args_ok)
 523         cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variable-argument macro");
 524     }
 525
 526   return result;
 527 }
 528
 529 /* Parse a number, skipping embedded backslash-newlines.  */
 530 static void
 531 parse_number (pfile, number, c, leading_period)
 532      cpp_reader *pfile;
 533      cpp_string *number;
 534      cppchar_t c;
 535      int leading_period;
 536 {
 537   cpp_buffer *buffer = pfile->buffer;
 538   cpp_pool *pool = pfile->string_pool;
 539   unsigned char *dest, *limit;
 540
 541   dest = POOL_FRONT (pool);
 542   limit = POOL_LIMIT (pool);
 543
 544   /* Place a leading period.  */
 545   if (leading_period)
 546     {
 547       if (dest >= limit)
 548         limit = _cpp_next_chunk (pool, 0, &dest);
 549       *dest++ = '.';
 550     }
 551
 552   do
 553     {
 554       do
 555         {
 556           /* Need room for terminating null.  */
 557           if (dest + 1 >= limit)
 558             limit = _cpp_next_chunk (pool, 0, &dest);
 559           *dest++ = c;
 560
 561           c = EOF;
 562           if (buffer->cur == buffer->rlimit)
 563             break;
 564
 565           c = *buffer->cur++;
 566         }
 567       while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 568
 569       /* Potential escaped newline?  */
 570       if (c != '?' && c != '\\')
 571         break;
 572       c = skip_escaped_newlines (buffer, c);
 573     }
 574   while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 575
 576   /* Remember the next character.  */
 577   buffer->read_ahead = c;
 578
 579   /* Null-terminate the number.  */
 580   *dest = '\0';
 581
 582   number->text = POOL_FRONT (pool);
 583   number->len = dest - number->text;
 584   POOL_COMMIT (pool, number->len + 1);
 585 }
 586
 587 /* Subroutine of parse_string.  Emits error for unterminated strings.  */
 588 static void
 589 unterminated (pfile, term)
 590      cpp_reader *pfile;
 591      int term;
 592 {
 593   cpp_error (pfile, "missing terminating %c character", term);
 594
 595   if (term == '\"' && pfile->mlstring_pos.line
 596       && pfile->mlstring_pos.line != pfile->lexer_pos.line)
 597     {
 598       cpp_error_with_line (pfile, pfile->mlstring_pos.line,
 599                            pfile->mlstring_pos.col,
 600                            "possible start of unterminated string literal");
 601       pfile->mlstring_pos.line = 0;
 602     }
 603 }
 604
 605 /* Subroutine of parse_string.  */
 606 static int
 607 unescaped_terminator_p (pfile, dest)
 608      cpp_reader *pfile;
 609      const unsigned char *dest;
 610 {
 611   const unsigned char *start, *temp;
 612
 613   /* In #include-style directives, terminators are not escapeable.  */
 614   if (pfile->state.angled_headers)
 615     return 1;
 616
 617   start = POOL_FRONT (pfile->string_pool);
 618
 619   /* An odd number of consecutive backslashes represents an escaped
 620      terminator.  */
 621   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 622     ;
 623
 624   return ((dest - temp) & 1) == 0;
 625 }
 626
 627 /* Parses a string, character constant, or angle-bracketed header file
 628    name.  Handles embedded trigraphs and escaped newlines.
 629
 630    Multi-line strings are allowed, but they are deprecated within
 631    directives.  */
 632 static void
 633 parse_string (pfile, token, terminator)
 634      cpp_reader *pfile;
 635      cpp_token *token;
 636      cppchar_t terminator;
 637 {
 638   cpp_buffer *buffer = pfile->buffer;
 639   cpp_pool *pool = pfile->string_pool;
 640   unsigned char *dest, *limit;
 641   cppchar_t c;
 642   unsigned int nulls = 0;
 643
 644   dest = POOL_FRONT (pool);
 645   limit = POOL_LIMIT (pool);
 646
 647   for (;;)
 648     {
 649       if (buffer->cur == buffer->rlimit)
 650         {
 651           c = EOF;
 652           unterminated (pfile, terminator);
 653           break;
 654         }
 655       c = *buffer->cur++;
 656
 657     have_char:
 658       /* Handle trigraphs, escaped newlines etc.  */
 659       if (c == '?' || c == '\\')
 660         c = skip_escaped_newlines (buffer, c);
 661
 662       if (c == terminator && unescaped_terminator_p (pfile, dest))
 663         {
 664           c = EOF;
 665           break;
 666         }
 667       else if (is_vspace (c))
 668         {
 669           /* In assembly language, silently terminate string and
 670              character literals at end of line.  This is a kludge
 671              around not knowing where comments are.  */
 672           if (CPP_OPTION (pfile, lang_asm) && terminator != '>')
 673             break;
 674
 675           /* Character constants and header names may not extend over
 676              multiple lines.  In Standard C, neither may strings.
 677              Unfortunately, we accept multiline strings as an
 678              extension, except in #include family directives.  */
 679           if (terminator != '"' || pfile->state.angled_headers)
 680             {
 681               unterminated (pfile, terminator);
 682               break;
 683             }
 684
 685           if (pfile->mlstring_pos.line == 0)
 686             {
 687               pfile->mlstring_pos = pfile->lexer_pos;
 688               if (CPP_PEDANTIC (pfile))
 689                 cpp_pedwarn (pfile, "multi-line string constant");
 690             }
 691
 692           handle_newline (buffer, c);  /* Stores to read_ahead.  */
 693           c = '\n';
 694         }
 695       else if (c == '\0')
 696         {
 697           if (nulls++ == 0)
 698             cpp_warning (pfile, "null character(s) preserved in literal");
 699         }
 700
 701       /* No terminating null for strings - they could contain nulls.  */
 702       if (dest >= limit)
 703         limit = _cpp_next_chunk (pool, 0, &dest);
 704       *dest++ = c;
 705
 706       /* If we had a new line, the next character is in read_ahead.  */
 707       if (c != '\n')
 708         continue;
 709       c = buffer->read_ahead;
 710       if (c != EOF)
 711         goto have_char;
 712     }
 713
 714   /* Remember the next character.  */
 715   buffer->read_ahead = c;
 716
 717   token->val.str.text = POOL_FRONT (pool);
 718   token->val.str.len = dest - token->val.str.text;
 719   POOL_COMMIT (pool, token->val.str.len);
 720 }
 721
 722 /* The stored comment includes the comment start and any terminator.  */
 723 static void
 724 save_comment (pfile, token, from)
 725      cpp_reader *pfile;
 726      cpp_token *token;
 727      const unsigned char *from;
 728 {
 729   unsigned char *buffer;
 730   unsigned int len;
 731
 732   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 733   /* C++ comments probably (not definitely) have moved past a new
 734      line, which we don't want to save in the comment.  */
 735   if (pfile->buffer->read_ahead != EOF)
 736     len--;
 737   buffer = _cpp_pool_alloc (pfile->string_pool, len);
 738
 739   token->type = CPP_COMMENT;
 740   token->val.str.len = len;
 741   token->val.str.text = buffer;
 742
 743   buffer[0] = '/';
 744   memcpy (buffer + 1, from, len - 1);
 745 }
 746
 747 /* Subroutine of lex_token to handle '%'.  A little tricky, since we
 748    want to avoid stepping back when lexing %:%X.  */
 749 static void
 750 lex_percent (buffer, result)
 751      cpp_buffer *buffer;
 752      cpp_token *result;
 753 {
 754   cppchar_t c;
 755
 756   result->type = CPP_MOD;
 757   /* Parsing %:%X could leave an extra character.  */
 758   if (buffer->extra_char == EOF)
 759     c = get_effective_char (buffer);
 760   else
 761     {
 762       c = buffer->read_ahead = buffer->extra_char;
 763       buffer->extra_char = EOF;
 764     }
 765
 766   if (c == '=')
 767     ACCEPT_CHAR (CPP_MOD_EQ);
 768   else if (CPP_OPTION (buffer->pfile, digraphs))
 769     {
 770       if (c == ':')
 771         {
 772           result->flags |= DIGRAPH;
 773           ACCEPT_CHAR (CPP_HASH);
 774           if (get_effective_char (buffer) == '%')
 775             {
 776               buffer->extra_char = get_effective_char (buffer);
 777               if (buffer->extra_char == ':')
 778                 {
 779                   buffer->extra_char = EOF;
 780                   ACCEPT_CHAR (CPP_PASTE);
 781                 }
 782               else
 783                 /* We'll catch the extra_char when we're called back.  */
 784                 buffer->read_ahead = '%';
 785             }
 786         }
 787       else if (c == '>')
 788         {
 789           result->flags |= DIGRAPH;
 790           ACCEPT_CHAR (CPP_CLOSE_BRACE);
 791         }
 792     }
 793 }
 794
 795 /* Subroutine of lex_token to handle '.'.  This is tricky, since we
 796    want to avoid stepping back when lexing '...' or '.123'.  In the
 797    latter case we should also set a flag for parse_number.  */
 798 static void
 799 lex_dot (pfile, result)
 800      cpp_reader *pfile;
 801      cpp_token *result;
 802 {
 803   cpp_buffer *buffer = pfile->buffer;
 804   cppchar_t c;
 805
 806   /* Parsing ..X could leave an extra character.  */
 807   if (buffer->extra_char == EOF)
 808     c = get_effective_char (buffer);
 809   else
 810     {
 811       c = buffer->read_ahead = buffer->extra_char;
 812       buffer->extra_char = EOF;
 813     }
 814
 815   /* All known character sets have 0...9 contiguous.  */
 816   if (c >= '0' && c <= '9')
 817     {
 818       result->type = CPP_NUMBER;
 819       parse_number (pfile, &result->val.str, c, 1);
 820     }
 821   else
 822     {
 823       result->type = CPP_DOT;
 824       if (c == '.')
 825         {
 826           buffer->extra_char = get_effective_char (buffer);
 827           if (buffer->extra_char == '.')
 828             {
 829               buffer->extra_char = EOF;
 830               ACCEPT_CHAR (CPP_ELLIPSIS);
 831             }
 832           else
 833             /* We'll catch the extra_char when we're called back.  */
 834             buffer->read_ahead = '.';
 835         }
 836       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
 837         ACCEPT_CHAR (CPP_DOT_STAR);
 838     }
 839 }
 840
 841 void
 842 _cpp_lex_token (pfile, result)
 843      cpp_reader *pfile;
 844      cpp_token *result;
 845 {
 846   cppchar_t c;
 847   cpp_buffer *buffer;
 848   const unsigned char *comment_start;
 849   unsigned char was_skip_newlines = pfile->state.skip_newlines;
 850   unsigned char newline_in_args = 0;
 851
 852  done_directive:
 853   buffer = pfile->buffer;
 854   pfile->state.skip_newlines = 0;
 855   result->flags = 0;
 856  next_char:
 857   pfile->lexer_pos.line = buffer->lineno;
 858  next_char2:
 859   pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
 860
 861   c = buffer->read_ahead;
 862   if (c == EOF && buffer->cur < buffer->rlimit)
 863     {
 864       c = *buffer->cur++;
 865       pfile->lexer_pos.col++;
 866     }
 867
 868  do_switch:
 869   buffer->read_ahead = EOF;
 870   switch (c)
 871     {
 872     case EOF:
 873       /* Non-empty files should end in a newline.  Ignore for command
 874          line and _Pragma buffers.  */
 875       if (pfile->lexer_pos.col != 0 && !buffer->from_stage3)
 876         cpp_pedwarn (pfile, "no newline at end of file");
 877       pfile->state.skip_newlines = 1;
 878       result->type = CPP_EOF;
 879       break;
 880
 881     case ' ': case '\t': case '\f': case '\v': case '\0':
 882       skip_whitespace (pfile, c);
 883       result->flags |= PREV_WHITE;
 884       goto next_char2;
 885
 886     case '\n': case '\r':
 887       /* Don't let directives spill over to the next line.  */
 888       if (pfile->state.in_directive)
 889         buffer->read_ahead = c;
 890       else
 891         {
 892           handle_newline (buffer, c);
 893
 894           pfile->lexer_pos.output_line = buffer->lineno;
 895
 896           /* Skip newlines in macro arguments (except in directives).  */
 897           if (pfile->state.parsing_args)
 898             {
 899               /* Set the whitespace flag.   */
 900               newline_in_args = 1;
 901               result->flags |= PREV_WHITE;
 902               goto next_char;
 903             }
 904
 905           if (was_skip_newlines)
 906             {
 907               /* Clear any whitespace flag.   */
 908               result->flags &= ~PREV_WHITE;
 909               goto next_char;
 910             }
 911         }
 912
 913       /* Next we're at BOL, so skip new lines.  */
 914       pfile->state.skip_newlines = 1;
 915       result->type = CPP_EOF;
 916       break;
 917
 918     case '?':
 919     case '\\':
 920       /* These could start an escaped newline, or '?' a trigraph.  Let
 921          skip_escaped_newlines do all the work.  */
 922       {
 923         unsigned int lineno = buffer->lineno;
 924
 925         c = skip_escaped_newlines (buffer, c);
 926         if (lineno != buffer->lineno)
 927           /* We had at least one escaped newline of some sort, and the
 928              next character is in buffer->read_ahead.  Update the
 929              token's line and column.  */
 930             goto next_char;
 931
 932         /* We are either the original '?' or '\\', or a trigraph.  */
 933         result->type = CPP_QUERY;
 934         buffer->read_ahead = EOF;
 935         if (c == '\\')
 936           goto random_char;
 937         else if (c != '?')
 938           goto do_switch;
 939       }
 940       break;
 941
 942     case '0': case '1': case '2': case '3': case '4':
 943     case '5': case '6': case '7': case '8': case '9':
 944       result->type = CPP_NUMBER;
 945       parse_number (pfile, &result->val.str, c, 0);
 946       break;
 947
 948     case '$':
 949       if (!CPP_OPTION (pfile, dollars_in_ident))
 950         goto random_char;
 951       /* Fall through... */
 952
 953     case '_':
 954     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 955     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 956     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 957     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 958     case 'y': case 'z':
 959     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 960     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 961     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 962     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 963     case 'Y': case 'Z':
 964       result->type = CPP_NAME;
 965       result->val.node = parse_identifier (pfile, c);
 966
 967       /* 'L' may introduce wide characters or strings.  */
 968       if (result->val.node == pfile->spec_nodes.n_L)
 969         {
 970           c = buffer->read_ahead; /* For make_string.  */
 971           if (c == '\'' || c == '"')
 972             {
 973               ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
 974               goto make_string;
 975             }
 976         }
 977       /* Convert named operators to their proper types.  */
 978       else if (result->val.node->flags & NODE_OPERATOR)
 979         {
 980           result->flags |= NAMED_OP;
 981           result->type = result->val.node->value.operator;
 982         }
 983       break;
 984
 985     case '\'':
 986     case '"':
 987       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
 988     make_string:
 989       parse_string (pfile, result, c);
 990       break;
 991
 992     case '/':
 993       /* A potential block or line comment.  */
 994       comment_start = buffer->cur;
 995       result->type = CPP_DIV;
 996       c = get_effective_char (buffer);
 997       if (c == '=')
 998         ACCEPT_CHAR (CPP_DIV_EQ);
 999       if (c != '/' && c != '*')
1000         break;
1001
1002       if (c == '*')
1003         {
1004           if (skip_block_comment (pfile))
1005             cpp_error_with_line (pfile, pfile->lexer_pos.line,
1006                                  pfile->lexer_pos.col,
1007                                  "unterminated comment");
1008         }
1009       else
1010         {
1011           if (!CPP_OPTION (pfile, cplusplus_comments)
1012               && !CPP_IN_SYSTEM_HEADER (pfile))
1013             break;
1014
1015           /* We silently allow C++ comments in system headers,
1016              irrespective of conformance mode, because lots of
1017              broken systems do that and trying to clean it up in
1018              fixincludes is a nightmare.  */
1019           if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1020               && ! buffer->warned_cplusplus_comments)
1021             {
1022               cpp_pedwarn (pfile,
1023                            "C++ style comments are not allowed in ISO C89");
1024               cpp_pedwarn (pfile,
1025                            "(this will be reported only once per input file)");
1026               buffer->warned_cplusplus_comments = 1;
1027             }
1028
1029           /* Skip_line_comment updates buffer->read_ahead.  */
1030           if (skip_line_comment (pfile))
1031             cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1032                                    pfile->lexer_pos.col,
1033                                    "multi-line comment");
1034         }
1035
1036       /* Skipping the comment has updated buffer->read_ahead.  */
1037       if (!pfile->state.save_comments)
1038         {
1039           result->flags |= PREV_WHITE;
1040           goto next_char;
1041         }
1042
1043       /* Save the comment as a token in its own right.  */
1044       save_comment (pfile, result, comment_start);
1045       break;
1046
1047     case '<':
1048       if (pfile->state.angled_headers)
1049         {
1050           result->type = CPP_HEADER_NAME;
1051           c = '>';              /* terminator.  */
1052           goto make_string;
1053         }
1054
1055       result->type = CPP_LESS;
1056       c = get_effective_char (buffer);
1057       if (c == '=')
1058         ACCEPT_CHAR (CPP_LESS_EQ);
1059       else if (c == '<')
1060         {
1061           ACCEPT_CHAR (CPP_LSHIFT);
1062           if (get_effective_char (buffer) == '=')
1063             ACCEPT_CHAR (CPP_LSHIFT_EQ);
1064         }
1065       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1066         {
1067           ACCEPT_CHAR (CPP_MIN);
1068           if (get_effective_char (buffer) == '=')
1069             ACCEPT_CHAR (CPP_MIN_EQ);
1070         }
1071       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1072         {
1073           ACCEPT_CHAR (CPP_OPEN_SQUARE);
1074           result->flags |= DIGRAPH;
1075         }
1076       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1077         {
1078           ACCEPT_CHAR (CPP_OPEN_BRACE);
1079           result->flags |= DIGRAPH;
1080         }
1081       break;
1082
1083     case '>':
1084       result->type = CPP_GREATER;
1085       c = get_effective_char (buffer);
1086       if (c == '=')
1087         ACCEPT_CHAR (CPP_GREATER_EQ);
1088       else if (c == '>')
1089         {
1090           ACCEPT_CHAR (CPP_RSHIFT);
1091           if (get_effective_char (buffer) == '=')
1092             ACCEPT_CHAR (CPP_RSHIFT_EQ);
1093         }
1094       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1095         {
1096           ACCEPT_CHAR (CPP_MAX);
1097           if (get_effective_char (buffer) == '=')
1098             ACCEPT_CHAR (CPP_MAX_EQ);
1099         }
1100       break;
1101
1102     case '%':
1103       lex_percent (buffer, result);
1104       if (result->type == CPP_HASH)
1105         goto do_hash;
1106       break;
1107
1108     case '.':
1109       lex_dot (pfile, result);
1110       break;
1111
1112     case '+':
1113       result->type = CPP_PLUS;
1114       c = get_effective_char (buffer);
1115       if (c == '=')
1116         ACCEPT_CHAR (CPP_PLUS_EQ);
1117       else if (c == '+')
1118         ACCEPT_CHAR (CPP_PLUS_PLUS);
1119       break;
1120
1121     case '-':
1122       result->type = CPP_MINUS;
1123       c = get_effective_char (buffer);
1124       if (c == '>')
1125         {
1126           ACCEPT_CHAR (CPP_DEREF);
1127           if (CPP_OPTION (pfile, cplusplus)
1128               && get_effective_char (buffer) == '*')
1129             ACCEPT_CHAR (CPP_DEREF_STAR);
1130         }
1131       else if (c == '=')
1132         ACCEPT_CHAR (CPP_MINUS_EQ);
1133       else if (c == '-')
1134         ACCEPT_CHAR (CPP_MINUS_MINUS);
1135       break;
1136
1137     case '*':
1138       result->type = CPP_MULT;
1139       if (get_effective_char (buffer) == '=')
1140         ACCEPT_CHAR (CPP_MULT_EQ);
1141       break;
1142
1143     case '=':
1144       result->type = CPP_EQ;
1145       if (get_effective_char (buffer) == '=')
1146         ACCEPT_CHAR (CPP_EQ_EQ);
1147       break;
1148
1149     case '!':
1150       result->type = CPP_NOT;
1151       if (get_effective_char (buffer) == '=')
1152         ACCEPT_CHAR (CPP_NOT_EQ);
1153       break;
1154
1155     case '&':
1156       result->type = CPP_AND;
1157       c = get_effective_char (buffer);
1158       if (c == '=')
1159         ACCEPT_CHAR (CPP_AND_EQ);
1160       else if (c == '&')
1161         ACCEPT_CHAR (CPP_AND_AND);
1162       break;
1163
1164     case '#':
1165       c = buffer->extra_char;   /* Can be set by error condition below.  */
1166       if (c != EOF)
1167         {
1168           buffer->read_ahead = c;
1169           buffer->extra_char = EOF;
1170         }
1171       else
1172         c = get_effective_char (buffer);
1173
1174       if (c == '#')
1175         ACCEPT_CHAR (CPP_PASTE);
1176       else
1177         {
1178           result->type = CPP_HASH;
1179         do_hash:
1180           if (newline_in_args)
1181             {
1182               /* 6.10.3 paragraph 11: If there are sequences of
1183                  preprocessing tokens within the list of arguments that
1184                  would otherwise act as preprocessing directives, the
1185                  behavior is undefined.
1186
1187                  This implementation will report a hard error, terminate
1188                  the macro invocation, and proceed to process the
1189                  directive.  */
1190               cpp_error (pfile,
1191                          "directives may not be used inside a macro argument");
1192
1193               /* Put a '#' in lookahead, return CPP_EOF for parse_arg.  */
1194               buffer->extra_char = buffer->read_ahead;
1195               buffer->read_ahead = '#';
1196               pfile->state.skip_newlines = 1;
1197               result->type = CPP_EOF;
1198
1199               /* Get whitespace right - newline_in_args sets it.  */
1200               if (pfile->lexer_pos.col == 1)
1201                 result->flags &= ~PREV_WHITE;
1202             }
1203           else if (was_skip_newlines)
1204             {
1205               /* This is the hash introducing a directive.  */
1206               if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1207                 goto done_directive; /* was_skip_newlines still 1.  */
1208               /* This is in fact an assembler #.  */
1209             }
1210         }
1211       break;
1212
1213     case '|':
1214       result->type = CPP_OR;
1215       c = get_effective_char (buffer);
1216       if (c == '=')
1217         ACCEPT_CHAR (CPP_OR_EQ);
1218       else if (c == '|')
1219         ACCEPT_CHAR (CPP_OR_OR);
1220       break;
1221
1222     case '^':
1223       result->type = CPP_XOR;
1224       if (get_effective_char (buffer) == '=')
1225         ACCEPT_CHAR (CPP_XOR_EQ);
1226       break;
1227
1228     case ':':
1229       result->type = CPP_COLON;
1230       c = get_effective_char (buffer);
1231       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1232         ACCEPT_CHAR (CPP_SCOPE);
1233       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1234         {
1235           result->flags |= DIGRAPH;
1236           ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1237         }
1238       break;
1239
1240     case '~': result->type = CPP_COMPL; break;
1241     case ',': result->type = CPP_COMMA; break;
1242     case '(': result->type = CPP_OPEN_PAREN; break;
1243     case ')': result->type = CPP_CLOSE_PAREN; break;
1244     case '[': result->type = CPP_OPEN_SQUARE; break;
1245     case ']': result->type = CPP_CLOSE_SQUARE; break;
1246     case '{': result->type = CPP_OPEN_BRACE; break;
1247     case '}': result->type = CPP_CLOSE_BRACE; break;
1248     case ';': result->type = CPP_SEMICOLON; break;
1249
1250     case '@':
1251       if (CPP_OPTION (pfile, objc))
1252         {
1253           /* In Objective C, '@' may begin keywords or strings, like
1254              @keyword or @"string".  It would be nice to call
1255              get_effective_char here and test the result.  However, we
1256              would then need to pass 2 characters to parse_identifier,
1257              making it ugly and slowing down its main loop.  Instead,
1258              we assume we have an identifier, and recover if not.  */
1259           result->type = CPP_NAME;
1260           result->val.node = parse_identifier (pfile, c);
1261           if (result->val.node->length != 1)
1262             break;
1263
1264           /* OK, so it wasn't an identifier.  Maybe a string?  */
1265           if (buffer->read_ahead == '"')
1266             {
1267               c = '"';
1268               ACCEPT_CHAR (CPP_OSTRING);
1269               goto make_string;
1270             }
1271         }
1272       goto random_char;
1273
1274     random_char:
1275     default:
1276       result->type = CPP_OTHER;
1277       result->val.c = c;
1278       break;
1279     }
1280 }
1281
1282 /* An upper bound on the number of bytes needed to spell a token,
1283    including preceding whitespace.  */
1284 unsigned int
1285 cpp_token_len (token)
1286      const cpp_token *token;
1287 {
1288   unsigned int len;
1289
1290   switch (TOKEN_SPELL (token))
1291     {
1292     default:            len = 0;                        break;
1293     case SPELL_STRING:  len = token->val.str.len;       break;
1294     case SPELL_IDENT:   len = token->val.node->length;  break;
1295     }
1296   /* 1 for whitespace, 4 for comment delimeters.  */
1297   return len + 5;
1298 }
1299
1300 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1301    already contain the enough space to hold the token's spelling.
1302    Returns a pointer to the character after the last character
1303    written.  */
1304 unsigned char *
1305 cpp_spell_token (pfile, token, buffer)
1306      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1307      const cpp_token *token;
1308      unsigned char *buffer;
1309 {
1310   switch (TOKEN_SPELL (token))
1311     {
1312     case SPELL_OPERATOR:
1313       {
1314         const unsigned char *spelling;
1315         unsigned char c;
1316
1317         if (token->flags & DIGRAPH)
1318           spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1319         else if (token->flags & NAMED_OP)
1320           goto spell_ident;
1321         else
1322           spelling = TOKEN_NAME (token);
1323
1324         while ((c = *spelling++) != '\0')
1325           *buffer++ = c;
1326       }
1327       break;
1328
1329     case SPELL_IDENT:
1330       spell_ident:
1331       memcpy (buffer, token->val.node->name, token->val.node->length);
1332       buffer += token->val.node->length;
1333       break;
1334
1335     case SPELL_STRING:
1336       {
1337         int left, right, tag;
1338         switch (token->type)
1339           {
1340           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1341           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1342           case CPP_OSTRING:     left = '"';  right = '"';  tag = '@';  break;
1343           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1344           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1345           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1346           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1347           }
1348         if (tag) *buffer++ = tag;
1349         if (left) *buffer++ = left;
1350         memcpy (buffer, token->val.str.text, token->val.str.len);
1351         buffer += token->val.str.len;
1352         if (right) *buffer++ = right;
1353       }
1354       break;
1355
1356     case SPELL_CHAR:
1357       *buffer++ = token->val.c;
1358       break;
1359
1360     case SPELL_NONE:
1361       cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1362       break;
1363     }
1364
1365   return buffer;
1366 }
1367
1368 /* Returns a token as a null-terminated string.  The string is
1369    temporary, and automatically freed later.  Useful for diagnostics.  */
1370 unsigned char *
1371 cpp_token_as_text (pfile, token)
1372      cpp_reader *pfile;
1373      const cpp_token *token;
1374 {
1375   unsigned int len = cpp_token_len (token);
1376   unsigned char *start = _cpp_pool_alloc (&pfile->temp_string_pool, len), *end;
1377
1378   end = cpp_spell_token (pfile, token, start);
1379   end[0] = '\0';
1380
1381   return start;
1382 }
1383
1384 /* Used by C front ends.  Should really move to using cpp_token_as_text.  */
1385 const char *
1386 cpp_type2name (type)
1387      enum cpp_ttype type;
1388 {
1389   return (const char *) token_spellings[type].name;
1390 }
1391
1392 /* Writes the spelling of token to FP.  Separate from cpp_spell_token
1393    for efficiency - to avoid double-buffering.  Also, outputs a space
1394    if PREV_WHITE is flagged.  */
1395 void
1396 cpp_output_token (token, fp)
1397      const cpp_token *token;
1398      FILE *fp;
1399 {
1400   if (token->flags & PREV_WHITE)
1401     putc (' ', fp);
1402
1403   switch (TOKEN_SPELL (token))
1404     {
1405     case SPELL_OPERATOR:
1406       {
1407         const unsigned char *spelling;
1408
1409         if (token->flags & DIGRAPH)
1410           spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1411         else if (token->flags & NAMED_OP)
1412           goto spell_ident;
1413         else
1414           spelling = TOKEN_NAME (token);
1415
1416         ufputs (spelling, fp);
1417       }
1418       break;
1419
1420     spell_ident:
1421     case SPELL_IDENT:
1422       ufputs (token->val.node->name, fp);
1423     break;
1424
1425     case SPELL_STRING:
1426       {
1427         int left, right, tag;
1428         switch (token->type)
1429           {
1430           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1431           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1432           case CPP_OSTRING:     left = '"';  right = '"';  tag = '@';  break;
1433           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1434           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1435           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1436           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1437           }
1438         if (tag) putc (tag, fp);
1439         if (left) putc (left, fp);
1440         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1441         if (right) putc (right, fp);
1442       }
1443       break;
1444
1445     case SPELL_CHAR:
1446       putc (token->val.c, fp);
1447       break;
1448
1449     case SPELL_NONE:
1450       /* An error, most probably.  */
1451       break;
1452     }
1453 }
1454
1455 /* Compare two tokens.  */
1456 int
1457 _cpp_equiv_tokens (a, b)
1458      const cpp_token *a, *b;
1459 {
1460   if (a->type == b->type && a->flags == b->flags)
1461     switch (TOKEN_SPELL (a))
1462       {
1463       default:                  /* Keep compiler happy.  */
1464       case SPELL_OPERATOR:
1465         return 1;
1466       case SPELL_CHAR:
1467         return a->val.c == b->val.c; /* Character.  */
1468       case SPELL_NONE:
1469         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1470       case SPELL_IDENT:
1471         return a->val.node == b->val.node;
1472       case SPELL_STRING:
1473         return (a->val.str.len == b->val.str.len
1474                 && !memcmp (a->val.str.text, b->val.str.text,
1475                             a->val.str.len));
1476       }
1477
1478   return 0;
1479 }
1480
1481 #if 0
1482 /* Compare two token lists.  */
1483 int
1484 _cpp_equiv_toklists (a, b)
1485      const struct toklist *a, *b;
1486 {
1487   unsigned int i, count;
1488
1489   count = a->limit - a->first;
1490   if (count != (b->limit - b->first))
1491     return 0;
1492
1493   for (i = 0; i < count; i++)
1494     if (! _cpp_equiv_tokens (&a->first[i], &b->first[i]))
1495       return 0;
1496
1497   return 1;
1498 }
1499 #endif
1500
1501 /* Determine whether two tokens can be pasted together, and if so,
1502    what the resulting token is.  Returns CPP_EOF if the tokens cannot
1503    be pasted, or the appropriate type for the merged token if they
1504    can.  */
1505 enum cpp_ttype
1506 cpp_can_paste (pfile, token1, token2, digraph)
1507      cpp_reader * pfile;
1508      const cpp_token *token1, *token2;
1509      int* digraph;
1510 {
1511   enum cpp_ttype a = token1->type, b = token2->type;
1512   int cxx = CPP_OPTION (pfile, cplusplus);
1513
1514   /* Treat named operators as if they were ordinary NAMEs.  */
1515   if (token1->flags & NAMED_OP)
1516     a = CPP_NAME;
1517   if (token2->flags & NAMED_OP)
1518     b = CPP_NAME;
1519
1520   if (a <= CPP_LAST_EQ && b == CPP_EQ)
1521     return a + (CPP_EQ_EQ - CPP_EQ);
1522
1523   switch (a)
1524     {
1525     case CPP_GREATER:
1526       if (b == a) return CPP_RSHIFT;
1527       if (b == CPP_QUERY && cxx)        return CPP_MAX;
1528       if (b == CPP_GREATER_EQ)  return CPP_RSHIFT_EQ;
1529       break;
1530     case CPP_LESS:
1531       if (b == a) return CPP_LSHIFT;
1532       if (b == CPP_QUERY && cxx)        return CPP_MIN;
1533       if (b == CPP_LESS_EQ)     return CPP_LSHIFT_EQ;
1534       if (CPP_OPTION (pfile, digraphs))
1535         {
1536           if (b == CPP_COLON)
1537             {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1538           if (b == CPP_MOD)
1539             {*digraph = 1; return CPP_OPEN_BRACE;}      /* <% digraph */
1540         }
1541       break;
1542
1543     case CPP_PLUS: if (b == a)  return CPP_PLUS_PLUS; break;
1544     case CPP_AND:  if (b == a)  return CPP_AND_AND; break;
1545     case CPP_OR:   if (b == a)  return CPP_OR_OR;   break;
1546
1547     case CPP_MINUS:
1548       if (b == a)               return CPP_MINUS_MINUS;
1549       if (b == CPP_GREATER)     return CPP_DEREF;
1550       break;
1551     case CPP_COLON:
1552       if (b == a && cxx)        return CPP_SCOPE;
1553       if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1554         {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1555       break;
1556
1557     case CPP_MOD:
1558       if (CPP_OPTION (pfile, digraphs))
1559         {
1560           if (b == CPP_GREATER)
1561             {*digraph = 1; return CPP_CLOSE_BRACE;}  /* %> digraph */
1562           if (b == CPP_COLON)
1563             {*digraph = 1; return CPP_HASH;}         /* %: digraph */
1564         }
1565       break;
1566     case CPP_DEREF:
1567       if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1568       break;
1569     case CPP_DOT:
1570       if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1571       if (b == CPP_NUMBER)      return CPP_NUMBER;
1572       break;
1573
1574     case CPP_HASH:
1575       if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1576         /* %:%: digraph */
1577         {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1578       break;
1579
1580     case CPP_NAME:
1581       if (b == CPP_NAME)        return CPP_NAME;
1582       if (b == CPP_NUMBER
1583           && name_p (pfile, &token2->val.str)) return CPP_NAME;
1584       if (b == CPP_CHAR
1585           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1586       if (b == CPP_STRING
1587           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1588       break;
1589
1590     case CPP_NUMBER:
1591       if (b == CPP_NUMBER)      return CPP_NUMBER;
1592       if (b == CPP_NAME)        return CPP_NUMBER;
1593       if (b == CPP_DOT)         return CPP_NUMBER;
1594       /* Numbers cannot have length zero, so this is safe.  */
1595       if ((b == CPP_PLUS || b == CPP_MINUS)
1596           && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1597         return CPP_NUMBER;
1598       break;
1599
1600     case CPP_OTHER:
1601       if (CPP_OPTION (pfile, objc) && token1->val.c == '@')
1602         {
1603           if (b == CPP_NAME)    return CPP_NAME;
1604           if (b == CPP_STRING)  return CPP_OSTRING;
1605         }
1606
1607     default:
1608       break;
1609     }
1610
1611   return CPP_EOF;
1612 }
1613
1614 /* Returns nonzero if a space should be inserted to avoid an
1615    accidental token paste for output.  For simplicity, it is
1616    conservative, and occasionally advises a space where one is not
1617    needed, e.g. "." and ".2".  */
1618
1619 int
1620 cpp_avoid_paste (pfile, token1, token2)
1621      cpp_reader *pfile;
1622      const cpp_token *token1, *token2;
1623 {
1624   enum cpp_ttype a = token1->type, b = token2->type;
1625   cppchar_t c;
1626
1627   if (token1->flags & NAMED_OP)
1628     a = CPP_NAME;
1629   if (token2->flags & NAMED_OP)
1630     b = CPP_NAME;
1631
1632   c = EOF;
1633   if (token2->flags & DIGRAPH)
1634     c = digraph_spellings[b - CPP_FIRST_DIGRAPH][0];
1635   else if (token_spellings[b].category == SPELL_OPERATOR)
1636     c = token_spellings[b].name[0];
1637
1638   /* Quickly get everything that can paste with an '='.  */
1639   if (a <= CPP_LAST_EQ && c == '=')
1640     return 1;
1641
1642   switch (a)
1643     {
1644     case CPP_GREATER:   return c == '>' || c == '?';
1645     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1646     case CPP_PLUS:      return c == '+';
1647     case CPP_MINUS:     return c == '-' || c == '>';
1648     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1649     case CPP_MOD:       return c == ':' || c == '>';
1650     case CPP_AND:       return c == '&';
1651     case CPP_OR:        return c == '|';
1652     case CPP_COLON:     return c == ':' || c == '>';
1653     case CPP_DEREF:     return c == '*';
1654     case CPP_DOT:       return c == '.' || c == '%';
1655     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1656     case CPP_NAME:      return ((b == CPP_NUMBER
1657                                  && name_p (pfile, &token2->val.str))
1658                                 || b == CPP_NAME
1659                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1660     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1661                                 || c == '.' || c == '+' || c == '-');
1662     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1663                                 && token1->val.c == '@'
1664                                 && (b == CPP_NAME || b == CPP_STRING));
1665     default:            break;
1666     }
1667
1668   return 0;
1669 }
1670
1671 /* Output all the remaining tokens on the current line, and a newline
1672    character, to FP.  Leading whitespace is removed.  */
1673 void
1674 cpp_output_line (pfile, fp)
1675      cpp_reader *pfile;
1676      FILE *fp;
1677 {
1678   cpp_token token;
1679
1680   _cpp_get_token (pfile, &token);
1681   token.flags &= ~PREV_WHITE;
1682   while (token.type != CPP_EOF)
1683     {
1684       cpp_output_token (&token, fp);
1685       _cpp_get_token (pfile, &token);
1686     }
1687
1688   putc ('\n', fp);
1689 }
1690
1691 /* Memory pools.  */
1692
1693 struct dummy
1694 {
1695   char c;
1696   union
1697   {
1698     double d;
1699     int *p;
1700   } u;
1701 };
1702
1703 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
1704
1705 static int
1706 chunk_suitable (pool, chunk, size)
1707      cpp_pool *pool;
1708      cpp_chunk *chunk;
1709      unsigned int size;
1710 {
1711   /* Being at least twice SIZE means we can use memcpy in
1712      _cpp_next_chunk rather than memmove.  Besides, it's a good idea
1713      anyway.  */
1714   return (chunk && pool->locked != chunk
1715           && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
1716 }
1717
1718 /* Returns the end of the new pool.  PTR points to a char in the old
1719    pool, and is updated to point to the same char in the new pool.  */
1720 unsigned char *
1721 _cpp_next_chunk (pool, len, ptr)
1722      cpp_pool *pool;
1723      unsigned int len;
1724      unsigned char **ptr;
1725 {
1726   cpp_chunk *chunk = pool->cur->next;
1727
1728   /* LEN is the minimum size we want in the new pool.  */
1729   len += POOL_ROOM (pool);
1730   if (! chunk_suitable (pool, chunk, len))
1731     {
1732       chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
1733
1734       chunk->next = pool->cur->next;
1735       pool->cur->next = chunk;
1736     }
1737
1738   /* Update the pointer before changing chunk's front.  */
1739   if (ptr)
1740     *ptr += chunk->base - POOL_FRONT (pool);
1741
1742   memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
1743   chunk->front = chunk->base;
1744
1745   pool->cur = chunk;
1746   return POOL_LIMIT (pool);
1747 }
1748
1749 static cpp_chunk *
1750 new_chunk (size)
1751      unsigned int size;
1752 {
1753   unsigned char *base;
1754   cpp_chunk *result;
1755
1756   size = ALIGN (size, DEFAULT_ALIGNMENT);
1757   base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
1758   /* Put the chunk descriptor at the end.  Then chunk overruns will
1759      cause obvious chaos.  */
1760   result = (cpp_chunk *) (base + size);
1761   result->base = base;
1762   result->front = base;
1763   result->limit = base + size;
1764   result->next = 0;
1765
1766   return result;
1767 }
1768
1769 void
1770 _cpp_init_pool (pool, size, align, temp)
1771      cpp_pool *pool;
1772      unsigned int size, align, temp;
1773 {
1774   if (align == 0)
1775     align = DEFAULT_ALIGNMENT;
1776   if (align & (align - 1))
1777     abort ();
1778   pool->align = align;
1779   pool->cur = new_chunk (size);
1780   pool->locked = 0;
1781   pool->locks = 0;
1782   if (temp)
1783     pool->cur->next = pool->cur;
1784 }
1785
1786 void
1787 _cpp_lock_pool (pool)
1788      cpp_pool *pool;
1789 {
1790   if (pool->locks++ == 0)
1791     pool->locked = pool->cur;
1792 }
1793
1794 void
1795 _cpp_unlock_pool (pool)
1796      cpp_pool *pool;
1797 {
1798   if (--pool->locks == 0)
1799     pool->locked = 0;
1800 }
1801
1802 void
1803 _cpp_free_pool (pool)
1804      cpp_pool *pool;
1805 {
1806   cpp_chunk *chunk = pool->cur, *next;
1807
1808   do
1809     {
1810       next = chunk->next;
1811       free (chunk->base);
1812       chunk = next;
1813     }
1814   while (chunk && chunk != pool->cur);
1815 }
1816
1817 /* Reserve LEN bytes from a memory pool.  */
1818 unsigned char *
1819 _cpp_pool_reserve (pool, len)
1820      cpp_pool *pool;
1821      unsigned int len;
1822 {
1823   len = ALIGN (len, pool->align);
1824   if (len > (unsigned int) POOL_ROOM (pool))
1825     _cpp_next_chunk (pool, len, 0);
1826
1827   return POOL_FRONT (pool);
1828 }
1829
1830 /* Allocate LEN bytes from a memory pool.  */
1831 unsigned char *
1832 _cpp_pool_alloc (pool, len)
1833      cpp_pool *pool;
1834      unsigned int len;
1835 {
1836   unsigned char *result = _cpp_pool_reserve (pool, len);
1837
1838   POOL_COMMIT (pool, len);
1839   return result;
1840 }