gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 /* This lexer works with a single pass of the file.  Recently I
  24    re-wrote it to minimize the places where we step backwards in the
  25    input stream, to make future changes to support multi-byte
  26    character sets fairly straight-forward.
  27
  28    There is now only one routine where we do step backwards:
  29    skip_escaped_newlines.  This routine could probably also be changed
  30    so that it doesn't need to step back.  One possibility is to use a
  31    trick similar to that used in lex_period and lex_percent.  Two
  32    extra characters might be needed, but skip_escaped_newlines itself
  33    would probably be the only place that needs to be aware of that,
  34    and changes to the remaining routines would probably only be needed
  35    if they process a backslash.  */
  36
  37 #include "config.h"
  38 #include "system.h"
  39 #include "cpplib.h"
  40 #include "cpphash.h"
  41
  42 /* MULTIBYTE_CHARS support only works for native compilers.
  43    ??? Ideally what we want is to model widechar support after
  44    the current floating point support.  */
  45 #ifdef CROSS_COMPILE
  46 #undef MULTIBYTE_CHARS
  47 #endif
  48
  49 #ifdef MULTIBYTE_CHARS
  50 #include "mbchar.h"
  51 #include <locale.h>
  52 #endif
  53
  54 /* Tokens with SPELL_STRING store their spelling in the token list,
  55    and it's length in the token->val.name.len.  */
  56 enum spell_type
  57 {
  58   SPELL_OPERATOR = 0,
  59   SPELL_CHAR,
  60   SPELL_IDENT,
  61   SPELL_STRING,
  62   SPELL_NONE
  63 };
  64
  65 struct token_spelling
  66 {
  67   enum spell_type category;
  68   const unsigned char *name;
  69 };
  70
  71 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
  72                                              U":>", U"<%", U"%>"};
  73
  74 #define OP(e, s) { SPELL_OPERATOR, U s           },
  75 #define TK(e, s) { s,              U STRINGX (e) },
  76 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
  77 #undef OP
  78 #undef TK
  79
  80 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  81 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  82
  83 static cppchar_t handle_newline PARAMS ((cpp_reader *, cppchar_t));
  84 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *, cppchar_t));
  85 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
  86
  87 static int skip_block_comment PARAMS ((cpp_reader *));
  88 static int skip_line_comment PARAMS ((cpp_reader *));
  89 static void adjust_column PARAMS ((cpp_reader *));
  90 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  91 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
  92 static cpp_hashnode *parse_identifier_slow PARAMS ((cpp_reader *,
  93                                                     const U_CHAR *));
  94 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
  95 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
  96 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  97 static void unterminated PARAMS ((cpp_reader *, int));
  98 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
  99 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
 100 static void lex_percent PARAMS ((cpp_reader *, cpp_token *));
 101 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
 102 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
 103 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
 104                                    const unsigned char *, unsigned int *));
 105 static cpp_token *lex_token PARAMS ((cpp_reader *, cpp_token *));
 106 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
 107
 108 static cpp_chunk *new_chunk PARAMS ((unsigned int));
 109 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
 110 static unsigned int hex_digit_value PARAMS ((unsigned int));
 111
 112 /* Utility routine:
 113
 114    Compares, the token TOKEN to the NUL-terminated string STRING.
 115    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
 116
 117 int
 118 cpp_ideq (token, string)
 119      const cpp_token *token;
 120      const char *string;
 121 {
 122   if (token->type != CPP_NAME)
 123     return 0;
 124
 125   return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
 126 }
 127
 128 /* Call when meeting a newline.  Returns the character after the newline
 129    (or carriage-return newline combination), or EOF.  */
 130 static cppchar_t
 131 handle_newline (pfile, newline_char)
 132      cpp_reader *pfile;
 133      cppchar_t newline_char;
 134 {
 135   cpp_buffer *buffer;
 136   cppchar_t next = EOF;
 137
 138   pfile->line++;
 139   buffer = pfile->buffer;
 140   buffer->col_adjust = 0;
 141   buffer->line_base = buffer->cur;
 142
 143   /* Handle CR-LF and LF-CR combinations, get the next character.  */
 144   if (buffer->cur < buffer->rlimit)
 145     {
 146       next = *buffer->cur++;
 147       if (next + newline_char == '\r' + '\n')
 148         {
 149           buffer->line_base = buffer->cur;
 150           if (buffer->cur < buffer->rlimit)
 151             next = *buffer->cur++;
 152           else
 153             next = EOF;
 154         }
 155     }
 156
 157   buffer->read_ahead = next;
 158   return next;
 159 }
 160
 161 /* Subroutine of skip_escaped_newlines; called when a trigraph is
 162    encountered.  It warns if necessary, and returns true if the
 163    trigraph should be honoured.  FROM_CHAR is the third character of a
 164    trigraph, and presumed to be the previous character for position
 165    reporting.  */
 166 static int
 167 trigraph_ok (pfile, from_char)
 168      cpp_reader *pfile;
 169      cppchar_t from_char;
 170 {
 171   int accept = CPP_OPTION (pfile, trigraphs);
 172
 173   /* Don't warn about trigraphs in comments.  */
 174   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 175     {
 176       cpp_buffer *buffer = pfile->buffer;
 177
 178       if (accept)
 179         cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 2,
 180                                "trigraph ??%c converted to %c",
 181                                (int) from_char,
 182                                (int) _cpp_trigraph_map[from_char]);
 183       else if (buffer->cur != buffer->last_Wtrigraphs)
 184         {
 185           buffer->last_Wtrigraphs = buffer->cur;
 186           cpp_warning_with_line (pfile, pfile->line,
 187                                  CPP_BUF_COL (buffer) - 2,
 188                                  "trigraph ??%c ignored", (int) from_char);
 189         }
 190     }
 191
 192   return accept;
 193 }
 194
 195 /* Assumes local variables buffer and result.  */
 196 #define ACCEPT_CHAR(t) \
 197   do { result->type = t; buffer->read_ahead = EOF; } while (0)
 198
 199 /* When we move to multibyte character sets, add to these something
 200    that saves and restores the state of the multibyte conversion
 201    library.  This probably involves saving and restoring a "cookie".
 202    In the case of glibc it is an 8-byte structure, so is not a high
 203    overhead operation.  In any case, it's out of the fast path.  */
 204 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
 205 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
 206
 207 /* Skips any escaped newlines introduced by NEXT, which is either a
 208    '?' or a '\\'.  Returns the next character, which will also have
 209    been placed in buffer->read_ahead.  This routine performs
 210    preprocessing stages 1 and 2 of the ISO C standard.  */
 211 static cppchar_t
 212 skip_escaped_newlines (pfile, next)
 213      cpp_reader *pfile;
 214      cppchar_t next;
 215 {
 216   cpp_buffer *buffer = pfile->buffer;
 217
 218   /* Only do this if we apply stages 1 and 2.  */
 219   if (!buffer->from_stage3)
 220     {
 221       cppchar_t next1;
 222       const unsigned char *saved_cur;
 223       int space;
 224
 225       do
 226         {
 227           if (buffer->cur == buffer->rlimit)
 228             break;
 229
 230           SAVE_STATE ();
 231           if (next == '?')
 232             {
 233               next1 = *buffer->cur++;
 234               if (next1 != '?' || buffer->cur == buffer->rlimit)
 235                 {
 236                   RESTORE_STATE ();
 237                   break;
 238                 }
 239
 240               next1 = *buffer->cur++;
 241               if (!_cpp_trigraph_map[next1]
 242                   || !trigraph_ok (pfile, next1))
 243                 {
 244                   RESTORE_STATE ();
 245                   break;
 246                 }
 247
 248               /* We have a full trigraph here.  */
 249               next = _cpp_trigraph_map[next1];
 250               if (next != '\\' || buffer->cur == buffer->rlimit)
 251                 break;
 252               SAVE_STATE ();
 253             }
 254
 255           /* We have a backslash, and room for at least one more character.  */
 256           space = 0;
 257           do
 258             {
 259               next1 = *buffer->cur++;
 260               if (!is_nvspace (next1))
 261                 break;
 262               space = 1;
 263             }
 264           while (buffer->cur < buffer->rlimit);
 265
 266           if (!is_vspace (next1))
 267             {
 268               RESTORE_STATE ();
 269               break;
 270             }
 271
 272           if (space && !pfile->state.lexing_comment)
 273             cpp_warning (pfile, "backslash and newline separated by space");
 274
 275           next = handle_newline (pfile, next1);
 276           if (next == EOF)
 277             cpp_pedwarn (pfile, "backslash-newline at end of file");
 278         }
 279       while (next == '\\' || next == '?');
 280     }
 281
 282   buffer->read_ahead = next;
 283   return next;
 284 }
 285
 286 /* Obtain the next character, after trigraph conversion and skipping
 287    an arbitrary string of escaped newlines.  The common case of no
 288    trigraphs or escaped newlines falls through quickly.  */
 289 static cppchar_t
 290 get_effective_char (pfile)
 291      cpp_reader *pfile;
 292 {
 293   cpp_buffer *buffer = pfile->buffer;
 294   cppchar_t next = EOF;
 295
 296   if (buffer->cur < buffer->rlimit)
 297     {
 298       next = *buffer->cur++;
 299
 300       /* '?' can introduce trigraphs (and therefore backslash); '\\'
 301          can introduce escaped newlines, which we want to skip, or
 302          UCNs, which, depending upon lexer state, we will handle in
 303          the future.  */
 304       if (next == '?' || next == '\\')
 305         next = skip_escaped_newlines (pfile, next);
 306     }
 307
 308   buffer->read_ahead = next;
 309   return next;
 310 }
 311
 312 /* Skip a C-style block comment.  We find the end of the comment by
 313    seeing if an asterisk is before every '/' we encounter.  Returns
 314    non-zero if comment terminated by EOF, zero otherwise.  */
 315 static int
 316 skip_block_comment (pfile)
 317      cpp_reader *pfile;
 318 {
 319   cpp_buffer *buffer = pfile->buffer;
 320   cppchar_t c = EOF, prevc = EOF;
 321
 322   pfile->state.lexing_comment = 1;
 323   while (buffer->cur != buffer->rlimit)
 324     {
 325       prevc = c, c = *buffer->cur++;
 326
 327     next_char:
 328       /* FIXME: For speed, create a new character class of characters
 329          of interest inside block comments.  */
 330       if (c == '?' || c == '\\')
 331         c = skip_escaped_newlines (pfile, c);
 332
 333       /* People like decorating comments with '*', so check for '/'
 334          instead for efficiency.  */
 335       if (c == '/')
 336         {
 337           if (prevc == '*')
 338             break;
 339
 340           /* Warn about potential nested comments, but not if the '/'
 341              comes immediately before the true comment delimeter.
 342              Don't bother to get it right across escaped newlines.  */
 343           if (CPP_OPTION (pfile, warn_comments)
 344               && buffer->cur != buffer->rlimit)
 345             {
 346               prevc = c, c = *buffer->cur++;
 347               if (c == '*' && buffer->cur != buffer->rlimit)
 348                 {
 349                   prevc = c, c = *buffer->cur++;
 350                   if (c != '/')
 351                     cpp_warning_with_line (pfile, pfile->line,
 352                                            CPP_BUF_COL (buffer) - 2,
 353                                            "\"/*\" within comment");
 354                 }
 355               goto next_char;
 356             }
 357         }
 358       else if (is_vspace (c))
 359         {
 360           prevc = c, c = handle_newline (pfile, c);
 361           goto next_char;
 362         }
 363       else if (c == '\t')
 364         adjust_column (pfile);
 365     }
 366
 367   pfile->state.lexing_comment = 0;
 368   buffer->read_ahead = EOF;
 369   return c != '/' || prevc != '*';
 370 }
 371
 372 /* Skip a C++ line comment.  Handles escaped newlines.  Returns
 373    non-zero if a multiline comment.  The following new line, if any,
 374    is left in buffer->read_ahead.  */
 375 static int
 376 skip_line_comment (pfile)
 377      cpp_reader *pfile;
 378 {
 379   cpp_buffer *buffer = pfile->buffer;
 380   unsigned int orig_line = pfile->line;
 381   cppchar_t c;
 382
 383   pfile->state.lexing_comment = 1;
 384   do
 385     {
 386       c = EOF;
 387       if (buffer->cur == buffer->rlimit)
 388         break;
 389
 390       c = *buffer->cur++;
 391       if (c == '?' || c == '\\')
 392         c = skip_escaped_newlines (pfile, c);
 393     }
 394   while (!is_vspace (c));
 395
 396   pfile->state.lexing_comment = 0;
 397   buffer->read_ahead = c;       /* Leave any newline for caller.  */
 398   return orig_line != pfile->line;
 399 }
 400
 401 /* pfile->buffer->cur is one beyond the \t character.  Update
 402    col_adjust so we track the column correctly.  */
 403 static void
 404 adjust_column (pfile)
 405      cpp_reader *pfile;
 406 {
 407   cpp_buffer *buffer = pfile->buffer;
 408   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 409
 410   /* Round it up to multiple of the tabstop, but subtract 1 since the
 411      tab itself occupies a character position.  */
 412   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 413                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 414 }
 415
 416 /* Skips whitespace, saving the next non-whitespace character.
 417    Adjusts pfile->col_adjust to account for tabs.  Without this,
 418    tokens might be assigned an incorrect column.  */
 419 static void
 420 skip_whitespace (pfile, c)
 421      cpp_reader *pfile;
 422      cppchar_t c;
 423 {
 424   cpp_buffer *buffer = pfile->buffer;
 425   unsigned int warned = 0;
 426
 427   do
 428     {
 429       /* Horizontal space always OK.  */
 430       if (c == ' ')
 431         ;
 432       else if (c == '\t')
 433         adjust_column (pfile);
 434       /* Just \f \v or \0 left.  */
 435       else if (c == '\0')
 436         {
 437           if (!warned)
 438             {
 439               cpp_warning (pfile, "null character(s) ignored");
 440               warned = 1;
 441             }
 442         }
 443       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 444         cpp_pedwarn_with_line (pfile, pfile->line,
 445                                CPP_BUF_COL (buffer),
 446                                "%s in preprocessing directive",
 447                                c == '\f' ? "form feed" : "vertical tab");
 448
 449       c = EOF;
 450       if (buffer->cur == buffer->rlimit)
 451         break;
 452       c = *buffer->cur++;
 453     }
 454   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 455   while (is_nvspace (c));
 456
 457   /* Remember the next character.  */
 458   buffer->read_ahead = c;
 459 }
 460
 461 /* See if the characters of a number token are valid in a name (no
 462    '.', '+' or '-').  */
 463 static int
 464 name_p (pfile, string)
 465      cpp_reader *pfile;
 466      const cpp_string *string;
 467 {
 468   unsigned int i;
 469
 470   for (i = 0; i < string->len; i++)
 471     if (!is_idchar (string->text[i]))
 472       return 0;
 473
 474   return 1;
 475 }
 476
 477 /* Parse an identifier, skipping embedded backslash-newlines.  This is
 478    a critical inner loop.  The common case is an identifier which has
 479    not been split by backslash-newline, does not contain a dollar
 480    sign, and has already been scanned (roughly 10:1 ratio of
 481    seen:unseen identifiers in normal code; the distribution is
 482    Poisson-like).  Second most common case is a new identifier, not
 483    split and no dollar sign.  The other possibilities are rare and
 484    have been relegated to parse_identifier_slow.  */
 485
 486 static cpp_hashnode *
 487 parse_identifier (pfile)
 488      cpp_reader *pfile;
 489 {
 490   cpp_hashnode *result;
 491   const U_CHAR *cur, *rlimit;
 492
 493   /* Fast-path loop.  Skim over a normal identifier.
 494      N.B. ISIDNUM does not include $.  */
 495   cur    = pfile->buffer->cur - 1;
 496   rlimit = pfile->buffer->rlimit;
 497   do
 498     cur++;
 499   while (cur < rlimit && ISIDNUM (*cur));
 500
 501   /* Check for slow-path cases.  */
 502   if (cur < rlimit && (*cur == '?' || *cur == '\\' || *cur == '$'))
 503     result = parse_identifier_slow (pfile, cur);
 504   else
 505     {
 506       const U_CHAR *base = pfile->buffer->cur - 1;
 507       result = (cpp_hashnode *)
 508         ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
 509       pfile->buffer->cur = cur;
 510     }
 511
 512   /* Rarely, identifiers require diagnostics when lexed.
 513      XXX Has to be forced out of the fast path.  */
 514   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 515                         && !pfile->state.skipping, 0))
 516     {
 517       /* It is allowed to poison the same identifier twice.  */
 518       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 519         cpp_error (pfile, "attempt to use poisoned \"%s\"",
 520                    NODE_NAME (result));
 521
 522       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 523          replacement list of a variadic macro.  */
 524       if (result == pfile->spec_nodes.n__VA_ARGS__
 525           && !pfile->state.va_args_ok)
 526         cpp_pedwarn (pfile,
 527         "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 528     }
 529
 530   return result;
 531 }
 532
 533 /* Slow path.  This handles identifiers which have been split, and
 534    identifiers which contain dollar signs.  The part of the identifier
 535    from PFILE->buffer->cur-1 to CUR has already been scanned.  */
 536 static cpp_hashnode *
 537 parse_identifier_slow (pfile, cur)
 538      cpp_reader *pfile;
 539      const U_CHAR *cur;
 540 {
 541   cpp_buffer *buffer = pfile->buffer;
 542   const U_CHAR *base = buffer->cur - 1;
 543   struct obstack *stack = &pfile->hash_table->stack;
 544   unsigned int c, saw_dollar = 0, len;
 545
 546   /* Copy the part of the token which is known to be okay.  */
 547   obstack_grow (stack, base, cur - base);
 548
 549   /* Now process the part which isn't.  We are looking at one of
 550      '$', '\\', or '?' on entry to this loop.  */
 551   c = *cur++;
 552   buffer->cur = cur;
 553   do
 554     {
 555       while (is_idchar (c))
 556         {
 557           obstack_1grow (stack, c);
 558
 559           if (c == '$')
 560             saw_dollar++;
 561
 562           c = EOF;
 563           if (buffer->cur == buffer->rlimit)
 564             break;
 565
 566           c = *buffer->cur++;
 567         }
 568
 569       /* Potential escaped newline?  */
 570       if (c != '?' && c != '\\')
 571         break;
 572       c = skip_escaped_newlines (pfile, c);
 573     }
 574   while (is_idchar (c));
 575
 576   /* Remember the next character.  */
 577   buffer->read_ahead = c;
 578
 579   /* $ is not a identifier character in the standard, but is commonly
 580      accepted as an extension.  Don't warn about it in skipped
 581      conditional blocks.  */
 582   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
 583     cpp_pedwarn (pfile, "'$' character(s) in identifier");
 584
 585   /* Identifiers are null-terminated.  */
 586   len = obstack_object_size (stack);
 587   obstack_1grow (stack, '\0');
 588
 589   return (cpp_hashnode *)
 590     ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
 591 }
 592
 593 /* Parse a number, skipping embedded backslash-newlines.  */
 594 static void
 595 parse_number (pfile, number, c, leading_period)
 596      cpp_reader *pfile;
 597      cpp_string *number;
 598      cppchar_t c;
 599      int leading_period;
 600 {
 601   cpp_buffer *buffer = pfile->buffer;
 602   cpp_pool *pool = &pfile->ident_pool;
 603   unsigned char *dest, *limit;
 604
 605   dest = POOL_FRONT (pool);
 606   limit = POOL_LIMIT (pool);
 607
 608   /* Place a leading period.  */
 609   if (leading_period)
 610     {
 611       if (dest >= limit)
 612         limit = _cpp_next_chunk (pool, 0, &dest);
 613       *dest++ = '.';
 614     }
 615
 616   do
 617     {
 618       do
 619         {
 620           /* Need room for terminating null.  */
 621           if (dest + 1 >= limit)
 622             limit = _cpp_next_chunk (pool, 0, &dest);
 623           *dest++ = c;
 624
 625           c = EOF;
 626           if (buffer->cur == buffer->rlimit)
 627             break;
 628
 629           c = *buffer->cur++;
 630         }
 631       while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 632
 633       /* Potential escaped newline?  */
 634       if (c != '?' && c != '\\')
 635         break;
 636       c = skip_escaped_newlines (pfile, c);
 637     }
 638   while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 639
 640   /* Remember the next character.  */
 641   buffer->read_ahead = c;
 642
 643   /* Null-terminate the number.  */
 644   *dest = '\0';
 645
 646   number->text = POOL_FRONT (pool);
 647   number->len = dest - number->text;
 648   POOL_COMMIT (pool, number->len + 1);
 649 }
 650
 651 /* Subroutine of parse_string.  Emits error for unterminated strings.  */
 652 static void
 653 unterminated (pfile, term)
 654      cpp_reader *pfile;
 655      int term;
 656 {
 657   cpp_error (pfile, "missing terminating %c character", term);
 658
 659   if (term == '\"' && pfile->mlstring_pos.line
 660       && pfile->mlstring_pos.line != pfile->lexer_pos.line)
 661     {
 662       cpp_error_with_line (pfile, pfile->mlstring_pos.line,
 663                            pfile->mlstring_pos.col,
 664                            "possible start of unterminated string literal");
 665       pfile->mlstring_pos.line = 0;
 666     }
 667 }
 668
 669 /* Subroutine of parse_string.  */
 670 static int
 671 unescaped_terminator_p (pfile, dest)
 672      cpp_reader *pfile;
 673      const unsigned char *dest;
 674 {
 675   const unsigned char *start, *temp;
 676
 677   /* In #include-style directives, terminators are not escapeable.  */
 678   if (pfile->state.angled_headers)
 679     return 1;
 680
 681   start = POOL_FRONT (&pfile->ident_pool);
 682
 683   /* An odd number of consecutive backslashes represents an escaped
 684      terminator.  */
 685   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 686     ;
 687
 688   return ((dest - temp) & 1) == 0;
 689 }
 690
 691 /* Parses a string, character constant, or angle-bracketed header file
 692    name.  Handles embedded trigraphs and escaped newlines.  The stored
 693    string is guaranteed NUL-terminated, but it is not guaranteed that
 694    this is the first NUL since embedded NULs are preserved.
 695
 696    Multi-line strings are allowed, but they are deprecated.  */
 697 static void
 698 parse_string (pfile, token, terminator)
 699      cpp_reader *pfile;
 700      cpp_token *token;
 701      cppchar_t terminator;
 702 {
 703   cpp_buffer *buffer = pfile->buffer;
 704   cpp_pool *pool = &pfile->ident_pool;
 705   unsigned char *dest, *limit;
 706   cppchar_t c;
 707   bool warned_nulls = false, warned_multi = false;
 708
 709   dest = POOL_FRONT (pool);
 710   limit = POOL_LIMIT (pool);
 711
 712   for (;;)
 713     {
 714       if (buffer->cur == buffer->rlimit)
 715         c = EOF;
 716       else
 717         c = *buffer->cur++;
 718
 719     have_char:
 720       /* We need space for the terminating NUL.  */
 721       if (dest >= limit)
 722         limit = _cpp_next_chunk (pool, 0, &dest);
 723
 724       if (c == EOF)
 725         {
 726           unterminated (pfile, terminator);
 727           break;
 728         }
 729
 730       /* Handle trigraphs, escaped newlines etc.  */
 731       if (c == '?' || c == '\\')
 732         c = skip_escaped_newlines (pfile, c);
 733
 734       if (c == terminator && unescaped_terminator_p (pfile, dest))
 735         {
 736           c = EOF;
 737           break;
 738         }
 739       else if (is_vspace (c))
 740         {
 741           /* In assembly language, silently terminate string and
 742              character literals at end of line.  This is a kludge
 743              around not knowing where comments are.  */
 744           if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
 745             break;
 746
 747           /* Character constants and header names may not extend over
 748              multiple lines.  In Standard C, neither may strings.
 749              Unfortunately, we accept multiline strings as an
 750              extension, except in #include family directives.  */
 751           if (terminator != '"' || pfile->state.angled_headers)
 752             {
 753               unterminated (pfile, terminator);
 754               break;
 755             }
 756
 757           if (!warned_multi)
 758             {
 759               warned_multi = true;
 760               cpp_pedwarn (pfile, "multi-line string literals are deprecated");
 761             }
 762
 763           if (pfile->mlstring_pos.line == 0)
 764             pfile->mlstring_pos = pfile->lexer_pos;
 765
 766           c = handle_newline (pfile, c);
 767           *dest++ = '\n';
 768           goto have_char;
 769         }
 770       else if (c == '\0' && !warned_nulls)
 771         {
 772           warned_nulls = true;
 773           cpp_warning (pfile, "null character(s) preserved in literal");
 774         }
 775
 776       *dest++ = c;
 777     }
 778
 779   /* Remember the next character.  */
 780   buffer->read_ahead = c;
 781   *dest = '\0';
 782
 783   token->val.str.text = POOL_FRONT (pool);
 784   token->val.str.len = dest - token->val.str.text;
 785   POOL_COMMIT (pool, token->val.str.len + 1);
 786 }
 787
 788 /* The stored comment includes the comment start and any terminator.  */
 789 static void
 790 save_comment (pfile, token, from)
 791      cpp_reader *pfile;
 792      cpp_token *token;
 793      const unsigned char *from;
 794 {
 795   unsigned char *buffer;
 796   unsigned int len;
 797
 798   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 799   /* C++ comments probably (not definitely) have moved past a new
 800      line, which we don't want to save in the comment.  */
 801   if (pfile->buffer->read_ahead != EOF)
 802     len--;
 803   buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
 804
 805   token->type = CPP_COMMENT;
 806   token->val.str.len = len;
 807   token->val.str.text = buffer;
 808
 809   buffer[0] = '/';
 810   memcpy (buffer + 1, from, len - 1);
 811 }
 812
 813 /* Subroutine of lex_token to handle '%'.  A little tricky, since we
 814    want to avoid stepping back when lexing %:%X.  */
 815 static void
 816 lex_percent (pfile, result)
 817      cpp_reader *pfile;
 818      cpp_token *result;
 819 {
 820   cpp_buffer *buffer= pfile->buffer;
 821   cppchar_t c;
 822
 823   result->type = CPP_MOD;
 824   /* Parsing %:%X could leave an extra character.  */
 825   if (buffer->extra_char == EOF)
 826     c = get_effective_char (pfile);
 827   else
 828     {
 829       c = buffer->read_ahead = buffer->extra_char;
 830       buffer->extra_char = EOF;
 831     }
 832
 833   if (c == '=')
 834     ACCEPT_CHAR (CPP_MOD_EQ);
 835   else if (CPP_OPTION (pfile, digraphs))
 836     {
 837       if (c == ':')
 838         {
 839           result->flags |= DIGRAPH;
 840           ACCEPT_CHAR (CPP_HASH);
 841           if (get_effective_char (pfile) == '%')
 842             {
 843               buffer->extra_char = get_effective_char (pfile);
 844               if (buffer->extra_char == ':')
 845                 {
 846                   buffer->extra_char = EOF;
 847                   ACCEPT_CHAR (CPP_PASTE);
 848                 }
 849               else
 850                 /* We'll catch the extra_char when we're called back.  */
 851                 buffer->read_ahead = '%';
 852             }
 853         }
 854       else if (c == '>')
 855         {
 856           result->flags |= DIGRAPH;
 857           ACCEPT_CHAR (CPP_CLOSE_BRACE);
 858         }
 859     }
 860 }
 861
 862 /* Subroutine of lex_token to handle '.'.  This is tricky, since we
 863    want to avoid stepping back when lexing '...' or '.123'.  In the
 864    latter case we should also set a flag for parse_number.  */
 865 static void
 866 lex_dot (pfile, result)
 867      cpp_reader *pfile;
 868      cpp_token *result;
 869 {
 870   cpp_buffer *buffer = pfile->buffer;
 871   cppchar_t c;
 872
 873   /* Parsing ..X could leave an extra character.  */
 874   if (buffer->extra_char == EOF)
 875     c = get_effective_char (pfile);
 876   else
 877     {
 878       c = buffer->read_ahead = buffer->extra_char;
 879       buffer->extra_char = EOF;
 880     }
 881
 882   /* All known character sets have 0...9 contiguous.  */
 883   if (c >= '0' && c <= '9')
 884     {
 885       result->type = CPP_NUMBER;
 886       parse_number (pfile, &result->val.str, c, 1);
 887     }
 888   else
 889     {
 890       result->type = CPP_DOT;
 891       if (c == '.')
 892         {
 893           buffer->extra_char = get_effective_char (pfile);
 894           if (buffer->extra_char == '.')
 895             {
 896               buffer->extra_char = EOF;
 897               ACCEPT_CHAR (CPP_ELLIPSIS);
 898             }
 899           else
 900             /* We'll catch the extra_char when we're called back.  */
 901             buffer->read_ahead = '.';
 902         }
 903       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
 904         ACCEPT_CHAR (CPP_DOT_STAR);
 905     }
 906 }
 907
 908 /* Allocate COUNT tokens for RUN.  */
 909 void
 910 _cpp_init_tokenrun (run, count)
 911      tokenrun *run;
 912      unsigned int count;
 913 {
 914   run->base = xnewvec (cpp_token, count);
 915   run->limit = run->base + count;
 916   run->next = NULL;
 917 }
 918
 919 /* Returns the next tokenrun, or creates one if there is none.  */
 920 static tokenrun *
 921 next_tokenrun (run)
 922      tokenrun *run;
 923 {
 924   if (run->next == NULL)
 925     {
 926       run->next = xnew (tokenrun);
 927       run->next->prev = run;
 928       _cpp_init_tokenrun (run->next, 250);
 929     }
 930
 931   return run->next;
 932 }
 933
 934 /* Lex a token into RESULT (external interface).  */
 935 void
 936 _cpp_lex_token (pfile, dest)
 937      cpp_reader *pfile;
 938      cpp_token *dest;
 939 {
 940   cpp_token *result;
 941
 942   for (;;)
 943     {
 944       if (pfile->cur_token == pfile->cur_run->limit)
 945         {
 946           pfile->cur_run = next_tokenrun (pfile->cur_run);
 947           pfile->cur_token = pfile->cur_run->base;
 948         }
 949       result = pfile->cur_token++;
 950
 951       if (pfile->lookaheads)
 952         pfile->lookaheads--;
 953       else
 954         result = lex_token (pfile, result);
 955
 956       if (result->flags & BOL)
 957         {
 958           pfile->lexer_pos.output_line = result->line;
 959           /* Is this a directive.  If _cpp_handle_directive returns
 960              false, it is an assembler #.  */
 961           if (result->type == CPP_HASH
 962               && !pfile->state.parsing_args
 963               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
 964             continue;
 965         }
 966
 967       /* We don't skip tokens in directives.  */
 968       if (pfile->state.in_directive)
 969         break;
 970
 971       /* Outside a directive, invalidate controlling macros.  At file
 972          EOF, lex_token takes care of popping the buffer, so we never
 973          get here and MI optimisation works.  */
 974       pfile->mi_valid = false;
 975
 976       if (!pfile->state.skipping || result->type == CPP_EOF)
 977         break;
 978     }
 979
 980   *dest = *result;
 981 }
 982
 983 /* Lex a token into RESULT.  When meeting a newline, returns CPP_EOF
 984    if parsing a directive, otherwise returns to the start of the token
 985    buffer if permissible.  Returns the location of the lexed token.  */
 986 static cpp_token *
 987 lex_token (pfile, result)
 988      cpp_reader *pfile;
 989      cpp_token *result;
 990 {
 991   cppchar_t c;
 992   cpp_buffer *buffer;
 993   const unsigned char *comment_start;
 994
 995  fresh_line:
 996   buffer = pfile->buffer;
 997   result->flags = buffer->saved_flags;
 998   buffer->saved_flags = 0;
 999  update_tokens_line:
1000   pfile->lexer_pos.line = pfile->line;
1001   result->line = pfile->line;
1002
1003  skipped_white:
1004   c = buffer->read_ahead;
1005   if (c == EOF && buffer->cur < buffer->rlimit)
1006     c = *buffer->cur++;
1007   result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
1008   pfile->lexer_pos.col = result->col;
1009   buffer->read_ahead = EOF;
1010
1011  trigraph:
1012   switch (c)
1013     {
1014     case EOF:
1015       buffer->saved_flags = BOL;
1016       if (!pfile->state.parsing_args && !pfile->state.in_directive)
1017         {
1018           if (buffer->cur != buffer->line_base)
1019             {
1020               /* Non-empty files should end in a newline.  Don't warn
1021                  for command line and _Pragma buffers.  */
1022               if (!buffer->from_stage3)
1023                 cpp_pedwarn (pfile, "no newline at end of file");
1024               handle_newline (pfile, '\n');
1025             }
1026
1027           /* Don't pop the last buffer.  */
1028           if (buffer->prev)
1029             {
1030               unsigned char stop = buffer->return_at_eof;
1031
1032               _cpp_pop_buffer (pfile);
1033               if (!stop)
1034                 goto fresh_line;
1035             }
1036         }
1037       result->type = CPP_EOF;
1038       break;
1039
1040     case ' ': case '\t': case '\f': case '\v': case '\0':
1041       skip_whitespace (pfile, c);
1042       result->flags |= PREV_WHITE;
1043       goto skipped_white;
1044
1045     case '\n': case '\r':
1046       handle_newline (pfile, c);
1047       buffer->saved_flags = BOL;
1048       if (! pfile->state.in_directive)
1049         {
1050           if (!pfile->keep_tokens)
1051             {
1052               pfile->cur_run = &pfile->base_run;
1053               result = pfile->base_run.base;
1054               pfile->cur_token = result + 1;
1055             }
1056           goto fresh_line;
1057         }
1058       result->type = CPP_EOF;
1059       break;
1060
1061     case '?':
1062     case '\\':
1063       /* These could start an escaped newline, or '?' a trigraph.  Let
1064          skip_escaped_newlines do all the work.  */
1065       {
1066         unsigned int line = pfile->line;
1067
1068         c = skip_escaped_newlines (pfile, c);
1069         if (line != pfile->line)
1070           /* We had at least one escaped newline of some sort, and the
1071              next character is in buffer->read_ahead.  Update the
1072              token's line and column.  */
1073             goto update_tokens_line;
1074
1075         /* We are either the original '?' or '\\', or a trigraph.  */
1076         result->type = CPP_QUERY;
1077         buffer->read_ahead = EOF;
1078         if (c == '\\')
1079           goto random_char;
1080         else if (c != '?')
1081           goto trigraph;
1082       }
1083       break;
1084
1085     case '0': case '1': case '2': case '3': case '4':
1086     case '5': case '6': case '7': case '8': case '9':
1087       result->type = CPP_NUMBER;
1088       parse_number (pfile, &result->val.str, c, 0);
1089       break;
1090
1091     case '$':
1092       if (!CPP_OPTION (pfile, dollars_in_ident))
1093         goto random_char;
1094       /* Fall through...  */
1095
1096     case '_':
1097     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1098     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1099     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1100     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1101     case 'y': case 'z':
1102     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1103     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1104     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1105     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1106     case 'Y': case 'Z':
1107       result->type = CPP_NAME;
1108       result->val.node = parse_identifier (pfile);
1109
1110       /* 'L' may introduce wide characters or strings.  */
1111       if (result->val.node == pfile->spec_nodes.n_L)
1112         {
1113           c = buffer->read_ahead;
1114           if (c == EOF && buffer->cur < buffer->rlimit)
1115             c = *buffer->cur;
1116           if (c == '\'' || c == '"')
1117             {
1118               buffer->cur++;
1119               ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1120               goto make_string;
1121             }
1122         }
1123       /* Convert named operators to their proper types.  */
1124       else if (result->val.node->flags & NODE_OPERATOR)
1125         {
1126           result->flags |= NAMED_OP;
1127           result->type = result->val.node->value.operator;
1128         }
1129       break;
1130
1131     case '\'':
1132     case '"':
1133       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1134     make_string:
1135       parse_string (pfile, result, c);
1136       break;
1137
1138     case '/':
1139       /* A potential block or line comment.  */
1140       comment_start = buffer->cur;
1141       result->type = CPP_DIV;
1142       c = get_effective_char (pfile);
1143       if (c == '=')
1144         ACCEPT_CHAR (CPP_DIV_EQ);
1145       if (c != '/' && c != '*')
1146         break;
1147
1148       if (c == '*')
1149         {
1150           if (skip_block_comment (pfile))
1151             cpp_error (pfile, "unterminated comment");
1152         }
1153       else
1154         {
1155           if (!CPP_OPTION (pfile, cplusplus_comments)
1156               && !CPP_IN_SYSTEM_HEADER (pfile))
1157             break;
1158
1159           /* Warn about comments only if pedantically GNUC89, and not
1160              in system headers.  */
1161           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1162               && ! buffer->warned_cplusplus_comments)
1163             {
1164               cpp_pedwarn (pfile,
1165                            "C++ style comments are not allowed in ISO C89");
1166               cpp_pedwarn (pfile,
1167                            "(this will be reported only once per input file)");
1168               buffer->warned_cplusplus_comments = 1;
1169             }
1170
1171           /* Skip_line_comment updates buffer->read_ahead.  */
1172           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1173             cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1174                                    pfile->lexer_pos.col,
1175                                    "multi-line comment");
1176         }
1177
1178       /* Skipping the comment has updated buffer->read_ahead.  */
1179       if (!pfile->state.save_comments)
1180         {
1181           result->flags |= PREV_WHITE;
1182           goto update_tokens_line;
1183         }
1184
1185       /* Save the comment as a token in its own right.  */
1186       save_comment (pfile, result, comment_start);
1187       /* Don't do MI optimisation.  */
1188       break;
1189
1190     case '<':
1191       if (pfile->state.angled_headers)
1192         {
1193           result->type = CPP_HEADER_NAME;
1194           c = '>';              /* terminator.  */
1195           goto make_string;
1196         }
1197
1198       result->type = CPP_LESS;
1199       c = get_effective_char (pfile);
1200       if (c == '=')
1201         ACCEPT_CHAR (CPP_LESS_EQ);
1202       else if (c == '<')
1203         {
1204           ACCEPT_CHAR (CPP_LSHIFT);
1205           if (get_effective_char (pfile) == '=')
1206             ACCEPT_CHAR (CPP_LSHIFT_EQ);
1207         }
1208       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1209         {
1210           ACCEPT_CHAR (CPP_MIN);
1211           if (get_effective_char (pfile) == '=')
1212             ACCEPT_CHAR (CPP_MIN_EQ);
1213         }
1214       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1215         {
1216           ACCEPT_CHAR (CPP_OPEN_SQUARE);
1217           result->flags |= DIGRAPH;
1218         }
1219       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1220         {
1221           ACCEPT_CHAR (CPP_OPEN_BRACE);
1222           result->flags |= DIGRAPH;
1223         }
1224       break;
1225
1226     case '>':
1227       result->type = CPP_GREATER;
1228       c = get_effective_char (pfile);
1229       if (c == '=')
1230         ACCEPT_CHAR (CPP_GREATER_EQ);
1231       else if (c == '>')
1232         {
1233           ACCEPT_CHAR (CPP_RSHIFT);
1234           if (get_effective_char (pfile) == '=')
1235             ACCEPT_CHAR (CPP_RSHIFT_EQ);
1236         }
1237       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1238         {
1239           ACCEPT_CHAR (CPP_MAX);
1240           if (get_effective_char (pfile) == '=')
1241             ACCEPT_CHAR (CPP_MAX_EQ);
1242         }
1243       break;
1244
1245     case '%':
1246       lex_percent (pfile, result);
1247       break;
1248
1249     case '.':
1250       lex_dot (pfile, result);
1251       break;
1252
1253     case '+':
1254       result->type = CPP_PLUS;
1255       c = get_effective_char (pfile);
1256       if (c == '=')
1257         ACCEPT_CHAR (CPP_PLUS_EQ);
1258       else if (c == '+')
1259         ACCEPT_CHAR (CPP_PLUS_PLUS);
1260       break;
1261
1262     case '-':
1263       result->type = CPP_MINUS;
1264       c = get_effective_char (pfile);
1265       if (c == '>')
1266         {
1267           ACCEPT_CHAR (CPP_DEREF);
1268           if (CPP_OPTION (pfile, cplusplus)
1269               && get_effective_char (pfile) == '*')
1270             ACCEPT_CHAR (CPP_DEREF_STAR);
1271         }
1272       else if (c == '=')
1273         ACCEPT_CHAR (CPP_MINUS_EQ);
1274       else if (c == '-')
1275         ACCEPT_CHAR (CPP_MINUS_MINUS);
1276       break;
1277
1278     case '*':
1279       result->type = CPP_MULT;
1280       if (get_effective_char (pfile) == '=')
1281         ACCEPT_CHAR (CPP_MULT_EQ);
1282       break;
1283
1284     case '=':
1285       result->type = CPP_EQ;
1286       if (get_effective_char (pfile) == '=')
1287         ACCEPT_CHAR (CPP_EQ_EQ);
1288       break;
1289
1290     case '!':
1291       result->type = CPP_NOT;
1292       if (get_effective_char (pfile) == '=')
1293         ACCEPT_CHAR (CPP_NOT_EQ);
1294       break;
1295
1296     case '&':
1297       result->type = CPP_AND;
1298       c = get_effective_char (pfile);
1299       if (c == '=')
1300         ACCEPT_CHAR (CPP_AND_EQ);
1301       else if (c == '&')
1302         ACCEPT_CHAR (CPP_AND_AND);
1303       break;
1304
1305     case '#':
1306       result->type = CPP_HASH;
1307       if (get_effective_char (pfile) == '#')
1308           ACCEPT_CHAR (CPP_PASTE);
1309       break;
1310
1311     case '|':
1312       result->type = CPP_OR;
1313       c = get_effective_char (pfile);
1314       if (c == '=')
1315         ACCEPT_CHAR (CPP_OR_EQ);
1316       else if (c == '|')
1317         ACCEPT_CHAR (CPP_OR_OR);
1318       break;
1319
1320     case '^':
1321       result->type = CPP_XOR;
1322       if (get_effective_char (pfile) == '=')
1323         ACCEPT_CHAR (CPP_XOR_EQ);
1324       break;
1325
1326     case ':':
1327       result->type = CPP_COLON;
1328       c = get_effective_char (pfile);
1329       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1330         ACCEPT_CHAR (CPP_SCOPE);
1331       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1332         {
1333           result->flags |= DIGRAPH;
1334           ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1335         }
1336       break;
1337
1338     case '~': result->type = CPP_COMPL; break;
1339     case ',': result->type = CPP_COMMA; break;
1340     case '(': result->type = CPP_OPEN_PAREN; break;
1341     case ')': result->type = CPP_CLOSE_PAREN; break;
1342     case '[': result->type = CPP_OPEN_SQUARE; break;
1343     case ']': result->type = CPP_CLOSE_SQUARE; break;
1344     case '{': result->type = CPP_OPEN_BRACE; break;
1345     case '}': result->type = CPP_CLOSE_BRACE; break;
1346     case ';': result->type = CPP_SEMICOLON; break;
1347
1348       /* @ is a punctuator in Objective C.  */
1349     case '@': result->type = CPP_ATSIGN; break;
1350
1351     random_char:
1352     default:
1353       result->type = CPP_OTHER;
1354       result->val.c = c;
1355       break;
1356     }
1357
1358   return result;
1359 }
1360
1361 /* An upper bound on the number of bytes needed to spell a token,
1362    including preceding whitespace.  */
1363 unsigned int
1364 cpp_token_len (token)
1365      const cpp_token *token;
1366 {
1367   unsigned int len;
1368
1369   switch (TOKEN_SPELL (token))
1370     {
1371     default:            len = 0;                                break;
1372     case SPELL_STRING:  len = token->val.str.len;               break;
1373     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1374     }
1375   /* 1 for whitespace, 4 for comment delimeters.  */
1376   return len + 5;
1377 }
1378
1379 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1380    already contain the enough space to hold the token's spelling.
1381    Returns a pointer to the character after the last character
1382    written.  */
1383 unsigned char *
1384 cpp_spell_token (pfile, token, buffer)
1385      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1386      const cpp_token *token;
1387      unsigned char *buffer;
1388 {
1389   switch (TOKEN_SPELL (token))
1390     {
1391     case SPELL_OPERATOR:
1392       {
1393         const unsigned char *spelling;
1394         unsigned char c;
1395
1396         if (token->flags & DIGRAPH)
1397           spelling
1398             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1399         else if (token->flags & NAMED_OP)
1400           goto spell_ident;
1401         else
1402           spelling = TOKEN_NAME (token);
1403
1404         while ((c = *spelling++) != '\0')
1405           *buffer++ = c;
1406       }
1407       break;
1408
1409     case SPELL_IDENT:
1410       spell_ident:
1411       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1412       buffer += NODE_LEN (token->val.node);
1413       break;
1414
1415     case SPELL_STRING:
1416       {
1417         int left, right, tag;
1418         switch (token->type)
1419           {
1420           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1421           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1422           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1423           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1424           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1425           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1426           }
1427         if (tag) *buffer++ = tag;
1428         if (left) *buffer++ = left;
1429         memcpy (buffer, token->val.str.text, token->val.str.len);
1430         buffer += token->val.str.len;
1431         if (right) *buffer++ = right;
1432       }
1433       break;
1434
1435     case SPELL_CHAR:
1436       *buffer++ = token->val.c;
1437       break;
1438
1439     case SPELL_NONE:
1440       cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1441       break;
1442     }
1443
1444   return buffer;
1445 }
1446
1447 /* Returns a token as a null-terminated string.  The string is
1448    temporary, and automatically freed later.  Useful for diagnostics.  */
1449 unsigned char *
1450 cpp_token_as_text (pfile, token)
1451      cpp_reader *pfile;
1452      const cpp_token *token;
1453 {
1454   unsigned int len = cpp_token_len (token);
1455   unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
1456
1457   end = cpp_spell_token (pfile, token, start);
1458   end[0] = '\0';
1459
1460   return start;
1461 }
1462
1463 /* Used by C front ends.  Should really move to using cpp_token_as_text.  */
1464 const char *
1465 cpp_type2name (type)
1466      enum cpp_ttype type;
1467 {
1468   return (const char *) token_spellings[type].name;
1469 }
1470
1471 /* Writes the spelling of token to FP.  Separate from cpp_spell_token
1472    for efficiency - to avoid double-buffering.  Also, outputs a space
1473    if PREV_WHITE is flagged.  */
1474 void
1475 cpp_output_token (token, fp)
1476      const cpp_token *token;
1477      FILE *fp;
1478 {
1479   if (token->flags & PREV_WHITE)
1480     putc (' ', fp);
1481
1482   switch (TOKEN_SPELL (token))
1483     {
1484     case SPELL_OPERATOR:
1485       {
1486         const unsigned char *spelling;
1487
1488         if (token->flags & DIGRAPH)
1489           spelling
1490             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1491         else if (token->flags & NAMED_OP)
1492           goto spell_ident;
1493         else
1494           spelling = TOKEN_NAME (token);
1495
1496         ufputs (spelling, fp);
1497       }
1498       break;
1499
1500     spell_ident:
1501     case SPELL_IDENT:
1502       ufputs (NODE_NAME (token->val.node), fp);
1503     break;
1504
1505     case SPELL_STRING:
1506       {
1507         int left, right, tag;
1508         switch (token->type)
1509           {
1510           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1511           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1512           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1513           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1514           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1515           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1516           }
1517         if (tag) putc (tag, fp);
1518         if (left) putc (left, fp);
1519         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1520         if (right) putc (right, fp);
1521       }
1522       break;
1523
1524     case SPELL_CHAR:
1525       putc (token->val.c, fp);
1526       break;
1527
1528     case SPELL_NONE:
1529       /* An error, most probably.  */
1530       break;
1531     }
1532 }
1533
1534 /* Compare two tokens.  */
1535 int
1536 _cpp_equiv_tokens (a, b)
1537      const cpp_token *a, *b;
1538 {
1539   if (a->type == b->type && a->flags == b->flags)
1540     switch (TOKEN_SPELL (a))
1541       {
1542       default:                  /* Keep compiler happy.  */
1543       case SPELL_OPERATOR:
1544         return 1;
1545       case SPELL_CHAR:
1546         return a->val.c == b->val.c; /* Character.  */
1547       case SPELL_NONE:
1548         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1549       case SPELL_IDENT:
1550         return a->val.node == b->val.node;
1551       case SPELL_STRING:
1552         return (a->val.str.len == b->val.str.len
1553                 && !memcmp (a->val.str.text, b->val.str.text,
1554                             a->val.str.len));
1555       }
1556
1557   return 0;
1558 }
1559
1560 /* Determine whether two tokens can be pasted together, and if so,
1561    what the resulting token is.  Returns CPP_EOF if the tokens cannot
1562    be pasted, or the appropriate type for the merged token if they
1563    can.  */
1564 enum cpp_ttype
1565 cpp_can_paste (pfile, token1, token2, digraph)
1566      cpp_reader * pfile;
1567      const cpp_token *token1, *token2;
1568      int* digraph;
1569 {
1570   enum cpp_ttype a = token1->type, b = token2->type;
1571   int cxx = CPP_OPTION (pfile, cplusplus);
1572
1573   /* Treat named operators as if they were ordinary NAMEs.  */
1574   if (token1->flags & NAMED_OP)
1575     a = CPP_NAME;
1576   if (token2->flags & NAMED_OP)
1577     b = CPP_NAME;
1578
1579   if ((int) a <= (int) CPP_LAST_EQ && b == CPP_EQ)
1580     return (enum cpp_ttype) ((int) a + ((int) CPP_EQ_EQ - (int) CPP_EQ));
1581
1582   switch (a)
1583     {
1584     case CPP_GREATER:
1585       if (b == a) return CPP_RSHIFT;
1586       if (b == CPP_QUERY && cxx)        return CPP_MAX;
1587       if (b == CPP_GREATER_EQ)  return CPP_RSHIFT_EQ;
1588       break;
1589     case CPP_LESS:
1590       if (b == a) return CPP_LSHIFT;
1591       if (b == CPP_QUERY && cxx)        return CPP_MIN;
1592       if (b == CPP_LESS_EQ)     return CPP_LSHIFT_EQ;
1593       if (CPP_OPTION (pfile, digraphs))
1594         {
1595           if (b == CPP_COLON)
1596             {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1597           if (b == CPP_MOD)
1598             {*digraph = 1; return CPP_OPEN_BRACE;}      /* <% digraph */
1599         }
1600       break;
1601
1602     case CPP_PLUS: if (b == a)  return CPP_PLUS_PLUS; break;
1603     case CPP_AND:  if (b == a)  return CPP_AND_AND; break;
1604     case CPP_OR:   if (b == a)  return CPP_OR_OR;   break;
1605
1606     case CPP_MINUS:
1607       if (b == a)               return CPP_MINUS_MINUS;
1608       if (b == CPP_GREATER)     return CPP_DEREF;
1609       break;
1610     case CPP_COLON:
1611       if (b == a && cxx)        return CPP_SCOPE;
1612       if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1613         {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1614       break;
1615
1616     case CPP_MOD:
1617       if (CPP_OPTION (pfile, digraphs))
1618         {
1619           if (b == CPP_GREATER)
1620             {*digraph = 1; return CPP_CLOSE_BRACE;}  /* %> digraph */
1621           if (b == CPP_COLON)
1622             {*digraph = 1; return CPP_HASH;}         /* %: digraph */
1623         }
1624       break;
1625     case CPP_DEREF:
1626       if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1627       break;
1628     case CPP_DOT:
1629       if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1630       if (b == CPP_NUMBER)      return CPP_NUMBER;
1631       break;
1632
1633     case CPP_HASH:
1634       if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1635         /* %:%: digraph */
1636         {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1637       break;
1638
1639     case CPP_NAME:
1640       if (b == CPP_NAME)        return CPP_NAME;
1641       if (b == CPP_NUMBER
1642           && name_p (pfile, &token2->val.str)) return CPP_NAME;
1643       if (b == CPP_CHAR
1644           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1645       if (b == CPP_STRING
1646           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1647       break;
1648
1649     case CPP_NUMBER:
1650       if (b == CPP_NUMBER)      return CPP_NUMBER;
1651       if (b == CPP_NAME)        return CPP_NUMBER;
1652       if (b == CPP_DOT)         return CPP_NUMBER;
1653       /* Numbers cannot have length zero, so this is safe.  */
1654       if ((b == CPP_PLUS || b == CPP_MINUS)
1655           && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1656         return CPP_NUMBER;
1657       break;
1658
1659     default:
1660       break;
1661     }
1662
1663   return CPP_EOF;
1664 }
1665
1666 /* Returns nonzero if a space should be inserted to avoid an
1667    accidental token paste for output.  For simplicity, it is
1668    conservative, and occasionally advises a space where one is not
1669    needed, e.g. "." and ".2".  */
1670
1671 int
1672 cpp_avoid_paste (pfile, token1, token2)
1673      cpp_reader *pfile;
1674      const cpp_token *token1, *token2;
1675 {
1676   enum cpp_ttype a = token1->type, b = token2->type;
1677   cppchar_t c;
1678
1679   if (token1->flags & NAMED_OP)
1680     a = CPP_NAME;
1681   if (token2->flags & NAMED_OP)
1682     b = CPP_NAME;
1683
1684   c = EOF;
1685   if (token2->flags & DIGRAPH)
1686     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1687   else if (token_spellings[b].category == SPELL_OPERATOR)
1688     c = token_spellings[b].name[0];
1689
1690   /* Quickly get everything that can paste with an '='.  */
1691   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1692     return 1;
1693
1694   switch (a)
1695     {
1696     case CPP_GREATER:   return c == '>' || c == '?';
1697     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1698     case CPP_PLUS:      return c == '+';
1699     case CPP_MINUS:     return c == '-' || c == '>';
1700     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1701     case CPP_MOD:       return c == ':' || c == '>';
1702     case CPP_AND:       return c == '&';
1703     case CPP_OR:        return c == '|';
1704     case CPP_COLON:     return c == ':' || c == '>';
1705     case CPP_DEREF:     return c == '*';
1706     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1707     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1708     case CPP_NAME:      return ((b == CPP_NUMBER
1709                                  && name_p (pfile, &token2->val.str))
1710                                 || b == CPP_NAME
1711                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1712     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1713                                 || c == '.' || c == '+' || c == '-');
1714     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1715                                 && token1->val.c == '@'
1716                                 && (b == CPP_NAME || b == CPP_STRING));
1717     default:            break;
1718     }
1719
1720   return 0;
1721 }
1722
1723 /* Output all the remaining tokens on the current line, and a newline
1724    character, to FP.  Leading whitespace is removed.  */
1725 void
1726 cpp_output_line (pfile, fp)
1727      cpp_reader *pfile;
1728      FILE *fp;
1729 {
1730   cpp_token token;
1731
1732   cpp_get_token (pfile, &token);
1733   token.flags &= ~PREV_WHITE;
1734   while (token.type != CPP_EOF)
1735     {
1736       cpp_output_token (&token, fp);
1737       cpp_get_token (pfile, &token);
1738     }
1739
1740   putc ('\n', fp);
1741 }
1742
1743 /* Returns the value of a hexadecimal digit.  */
1744 static unsigned int
1745 hex_digit_value (c)
1746      unsigned int c;
1747 {
1748   if (c >= 'a' && c <= 'f')
1749     return c - 'a' + 10;
1750   if (c >= 'A' && c <= 'F')
1751     return c - 'A' + 10;
1752   if (c >= '0' && c <= '9')
1753     return c - '0';
1754   abort ();
1755 }
1756
1757 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.  Returns 1 to indicate
1758    failure if cpplib is not parsing C++ or C99.  Such failure is
1759    silent, and no variables are updated.  Otherwise returns 0, and
1760    warns if -Wtraditional.
1761
1762    [lex.charset]: The character designated by the universal character
1763    name \UNNNNNNNN is that character whose character short name in
1764    ISO/IEC 10646 is NNNNNNNN; the character designated by the
1765    universal character name \uNNNN is that character whose character
1766    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
1767    for a universal character name is less than 0x20 or in the range
1768    0x7F-0x9F (inclusive), or if the universal character name
1769    designates a character in the basic source character set, then the
1770    program is ill-formed.
1771
1772    We assume that wchar_t is Unicode, so we don't need to do any
1773    mapping.  Is this ever wrong?
1774
1775    PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1776    LIMIT is the end of the string or charconst.  PSTR is updated to
1777    point after the UCS on return, and the UCS is written into PC.  */
1778
1779 static int
1780 maybe_read_ucs (pfile, pstr, limit, pc)
1781      cpp_reader *pfile;
1782      const unsigned char **pstr;
1783      const unsigned char *limit;
1784      unsigned int *pc;
1785 {
1786   const unsigned char *p = *pstr;
1787   unsigned int code = 0;
1788   unsigned int c = *pc, length;
1789
1790   /* Only attempt to interpret a UCS for C++ and C99.  */
1791   if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1792     return 1;
1793
1794   if (CPP_WTRADITIONAL (pfile))
1795     cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
1796
1797   length = (c == 'u' ? 4: 8);
1798
1799   if ((size_t) (limit - p) < length)
1800     {
1801       cpp_error (pfile, "incomplete universal-character-name");
1802       /* Skip to the end to avoid more diagnostics.  */
1803       p = limit;
1804     }
1805   else
1806     {
1807       for (; length; length--, p++)
1808         {
1809           c = *p;
1810           if (ISXDIGIT (c))
1811             code = (code << 4) + hex_digit_value (c);
1812           else
1813             {
1814               cpp_error (pfile,
1815                          "non-hex digit '%c' in universal-character-name", c);
1816               /* We shouldn't skip in case there are multibyte chars.  */
1817               break;
1818             }
1819         }
1820     }
1821
1822 #ifdef TARGET_EBCDIC
1823   cpp_error (pfile, "universal-character-name on EBCDIC target");
1824   code = 0x3f;  /* EBCDIC invalid character */
1825 #else
1826  /* True extended characters are OK.  */
1827   if (code >= 0xa0
1828       && !(code & 0x80000000)
1829       && !(code >= 0xD800 && code <= 0xDFFF))
1830     ;
1831   /* The standard permits $, @ and ` to be specified as UCNs.  We use
1832      hex escapes so that this also works with EBCDIC hosts.  */
1833   else if (code == 0x24 || code == 0x40 || code == 0x60)
1834     ;
1835   /* Don't give another error if one occurred above.  */
1836   else if (length == 0)
1837     cpp_error (pfile, "universal-character-name out of range");
1838 #endif
1839
1840   *pstr = p;
1841   *pc = code;
1842   return 0;
1843 }
1844
1845 /* Interpret an escape sequence, and return its value.  PSTR points to
1846    the input pointer, which is just after the backslash.  LIMIT is how
1847    much text we have.  MASK is a bitmask for the precision for the
1848    destination type (char or wchar_t).  TRADITIONAL, if true, does not
1849    interpret escapes that did not exist in traditional C.
1850
1851    Handles all relevant diagnostics.  */
1852
1853 unsigned int
1854 cpp_parse_escape (pfile, pstr, limit, mask, traditional)
1855      cpp_reader *pfile;
1856      const unsigned char **pstr;
1857      const unsigned char *limit;
1858      unsigned HOST_WIDE_INT mask;
1859      int traditional;
1860 {
1861   int unknown = 0;
1862   const unsigned char *str = *pstr;
1863   unsigned int c = *str++;
1864
1865   switch (c)
1866     {
1867     case '\\': case '\'': case '"': case '?': break;
1868     case 'b': c = TARGET_BS;      break;
1869     case 'f': c = TARGET_FF;      break;
1870     case 'n': c = TARGET_NEWLINE; break;
1871     case 'r': c = TARGET_CR;      break;
1872     case 't': c = TARGET_TAB;     break;
1873     case 'v': c = TARGET_VT;      break;
1874
1875     case '(': case '{': case '[': case '%':
1876       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1877          '\%' is used to prevent SCCS from getting confused.  */
1878       unknown = CPP_PEDANTIC (pfile);
1879       break;
1880
1881     case 'a':
1882       if (CPP_WTRADITIONAL (pfile))
1883         cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1884       if (!traditional)
1885         c = TARGET_BELL;
1886       break;
1887
1888     case 'e': case 'E':
1889       if (CPP_PEDANTIC (pfile))
1890         cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1891       c = TARGET_ESC;
1892       break;
1893
1894     case 'u': case 'U':
1895       unknown = maybe_read_ucs (pfile, &str, limit, &c);
1896       break;
1897
1898     case 'x':
1899       if (CPP_WTRADITIONAL (pfile))
1900         cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1901
1902       if (!traditional)
1903         {
1904           unsigned int i = 0, overflow = 0;
1905           int digits_found = 0;
1906
1907           while (str < limit)
1908             {
1909               c = *str;
1910               if (! ISXDIGIT (c))
1911                 break;
1912               str++;
1913               overflow |= i ^ (i << 4 >> 4);
1914               i = (i << 4) + hex_digit_value (c);
1915               digits_found = 1;
1916             }
1917
1918           if (!digits_found)
1919             cpp_error (pfile, "\\x used with no following hex digits");
1920
1921           if (overflow | (i != (i & mask)))
1922             {
1923               cpp_pedwarn (pfile, "hex escape sequence out of range");
1924               i &= mask;
1925             }
1926           c = i;
1927         }
1928       break;
1929
1930     case '0':  case '1':  case '2':  case '3':
1931     case '4':  case '5':  case '6':  case '7':
1932       {
1933         unsigned int i = c - '0';
1934         int count = 0;
1935
1936         while (str < limit && ++count < 3)
1937           {
1938             c = *str;
1939             if (c < '0' || c > '7')
1940               break;
1941             str++;
1942             i = (i << 3) + c - '0';
1943           }
1944
1945         if (i != (i & mask))
1946           {
1947             cpp_pedwarn (pfile, "octal escape sequence out of range");
1948             i &= mask;
1949           }
1950         c = i;
1951       }
1952       break;
1953
1954     default:
1955       unknown = 1;
1956       break;
1957     }
1958
1959   if (unknown)
1960     {
1961       if (ISGRAPH (c))
1962         cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1963       else
1964         cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1965     }
1966
1967   if (c > mask)
1968     cpp_pedwarn (pfile, "escape sequence out of range for character");
1969
1970   *pstr = str;
1971   return c;
1972 }
1973
1974 #ifndef MAX_CHAR_TYPE_SIZE
1975 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1976 #endif
1977
1978 #ifndef MAX_WCHAR_TYPE_SIZE
1979 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1980 #endif
1981
1982 /* Interpret a (possibly wide) character constant in TOKEN.
1983    WARN_MULTI warns about multi-character charconsts, if not
1984    TRADITIONAL.  TRADITIONAL also indicates not to interpret escapes
1985    that did not exist in traditional C.  PCHARS_SEEN points to a
1986    variable that is filled in with the number of characters seen.  */
1987 HOST_WIDE_INT
1988 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1989      cpp_reader *pfile;
1990      const cpp_token *token;
1991      int warn_multi;
1992      int traditional;
1993      unsigned int *pchars_seen;
1994 {
1995   const unsigned char *str = token->val.str.text;
1996   const unsigned char *limit = str + token->val.str.len;
1997   unsigned int chars_seen = 0;
1998   unsigned int width, max_chars, c;
1999   unsigned HOST_WIDE_INT mask;
2000   HOST_WIDE_INT result = 0;
2001
2002 #ifdef MULTIBYTE_CHARS
2003   (void) local_mbtowc (NULL, NULL, 0);
2004 #endif
2005
2006   /* Width in bits.  */
2007   if (token->type == CPP_CHAR)
2008     width = MAX_CHAR_TYPE_SIZE;
2009   else
2010     width = MAX_WCHAR_TYPE_SIZE;
2011
2012   if (width < HOST_BITS_PER_WIDE_INT)
2013     mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
2014   else
2015     mask = ~0;
2016   max_chars = HOST_BITS_PER_WIDE_INT / width;
2017
2018   while (str < limit)
2019     {
2020 #ifdef MULTIBYTE_CHARS
2021       wchar_t wc;
2022       int char_len;
2023
2024       char_len = local_mbtowc (&wc, str, limit - str);
2025       if (char_len == -1)
2026         {
2027           cpp_warning (pfile, "ignoring invalid multibyte character");
2028           c = *str++;
2029         }
2030       else
2031         {
2032           str += char_len;
2033           c = wc;
2034         }
2035 #else
2036       c = *str++;
2037 #endif
2038
2039       if (c == '\\')
2040         c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
2041
2042 #ifdef MAP_CHARACTER
2043       if (ISPRINT (c))
2044         c = MAP_CHARACTER (c);
2045 #endif
2046
2047       /* Merge character into result; ignore excess chars.  */
2048       if (++chars_seen <= max_chars)
2049         {
2050           if (width < HOST_BITS_PER_WIDE_INT)
2051             result = (result << width) | (c & mask);
2052           else
2053             result = c;
2054         }
2055     }
2056
2057   if (chars_seen == 0)
2058     cpp_error (pfile, "empty character constant");
2059   else if (chars_seen > max_chars)
2060     {
2061       chars_seen = max_chars;
2062       cpp_warning (pfile, "character constant too long");
2063     }
2064   else if (chars_seen > 1 && !traditional && warn_multi)
2065     cpp_warning (pfile, "multi-character character constant");
2066
2067   /* If char type is signed, sign-extend the constant.  The
2068      __CHAR_UNSIGNED__ macro is set by the driver if appropriate.  */
2069   if (token->type == CPP_CHAR && chars_seen)
2070     {
2071       unsigned int nbits = chars_seen * width;
2072       unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
2073
2074       if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
2075           || ((result >> (nbits - 1)) & 1) == 0)
2076         result &= mask;
2077       else
2078         result |= ~mask;
2079     }
2080
2081   *pchars_seen = chars_seen;
2082   return result;
2083 }
2084
2085 /* Memory pools.  */
2086
2087 struct dummy
2088 {
2089   char c;
2090   union
2091   {
2092     double d;
2093     int *p;
2094   } u;
2095 };
2096
2097 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2098
2099 static int
2100 chunk_suitable (pool, chunk, size)
2101      cpp_pool *pool;
2102      cpp_chunk *chunk;
2103      unsigned int size;
2104 {
2105   /* Being at least twice SIZE means we can use memcpy in
2106      _cpp_next_chunk rather than memmove.  Besides, it's a good idea
2107      anyway.  */
2108   return (chunk && pool->locked != chunk
2109           && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
2110 }
2111
2112 /* Returns the end of the new pool.  PTR points to a char in the old
2113    pool, and is updated to point to the same char in the new pool.  */
2114 unsigned char *
2115 _cpp_next_chunk (pool, len, ptr)
2116      cpp_pool *pool;
2117      unsigned int len;
2118      unsigned char **ptr;
2119 {
2120   cpp_chunk *chunk = pool->cur->next;
2121
2122   /* LEN is the minimum size we want in the new pool.  */
2123   len += POOL_ROOM (pool);
2124   if (! chunk_suitable (pool, chunk, len))
2125     {
2126       chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
2127
2128       chunk->next = pool->cur->next;
2129       pool->cur->next = chunk;
2130     }
2131
2132   /* Update the pointer before changing chunk's front.  */
2133   if (ptr)
2134     *ptr += chunk->base - POOL_FRONT (pool);
2135
2136   memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
2137   chunk->front = chunk->base;
2138
2139   pool->cur = chunk;
2140   return POOL_LIMIT (pool);
2141 }
2142
2143 static cpp_chunk *
2144 new_chunk (size)
2145      unsigned int size;
2146 {
2147   unsigned char *base;
2148   cpp_chunk *result;
2149
2150   size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
2151   base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
2152   /* Put the chunk descriptor at the end.  Then chunk overruns will
2153      cause obvious chaos.  */
2154   result = (cpp_chunk *) (base + size);
2155   result->base = base;
2156   result->front = base;
2157   result->limit = base + size;
2158   result->next = 0;
2159
2160   return result;
2161 }
2162
2163 void
2164 _cpp_init_pool (pool, size, align, temp)
2165      cpp_pool *pool;
2166      unsigned int size, align, temp;
2167 {
2168   if (align == 0)
2169     align = DEFAULT_ALIGNMENT;
2170   if (align & (align - 1))
2171     abort ();
2172   pool->align = align;
2173   pool->first = new_chunk (size);
2174   pool->cur = pool->first;
2175   pool->locked = 0;
2176   pool->locks = 0;
2177   if (temp)
2178     pool->cur->next = pool->cur;
2179 }
2180
2181 void
2182 _cpp_lock_pool (pool)
2183      cpp_pool *pool;
2184 {
2185   if (pool->locks++ == 0)
2186     pool->locked = pool->cur;
2187 }
2188
2189 void
2190 _cpp_unlock_pool (pool)
2191      cpp_pool *pool;
2192 {
2193   if (--pool->locks == 0)
2194     pool->locked = 0;
2195 }
2196
2197 void
2198 _cpp_free_pool (pool)
2199      cpp_pool *pool;
2200 {
2201   cpp_chunk *chunk = pool->first, *next;
2202
2203   do
2204     {
2205       next = chunk->next;
2206       free (chunk->base);
2207       chunk = next;
2208     }
2209   while (chunk && chunk != pool->first);
2210 }
2211
2212 /* Reserve LEN bytes from a memory pool.  */
2213 unsigned char *
2214 _cpp_pool_reserve (pool, len)
2215      cpp_pool *pool;
2216      unsigned int len;
2217 {
2218   len = POOL_ALIGN (len, pool->align);
2219   if (len > (unsigned int) POOL_ROOM (pool))
2220     _cpp_next_chunk (pool, len, 0);
2221
2222   return POOL_FRONT (pool);
2223 }
2224
2225 /* Allocate LEN bytes from a memory pool.  */
2226 unsigned char *
2227 _cpp_pool_alloc (pool, len)
2228      cpp_pool *pool;
2229      unsigned int len;
2230 {
2231   unsigned char *result = _cpp_pool_reserve (pool, len);
2232
2233   POOL_COMMIT (pool, len);
2234   return result;
2235 }