gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7
   8 This program is free software; you can redistribute it and/or modify it
   9 under the terms of the GNU General Public License as published by the
  10 Free Software Foundation; either version 2, or (at your option) any
  11 later version.
  12
  13 This program is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with this program; if not, write to the Free Software
  20 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "cpplib.h"
  27 #include "cpphash.h"
  28
  29 enum spell_type
  30 {
  31   SPELL_OPERATOR = 0,
  32   SPELL_IDENT,
  33   SPELL_LITERAL,
  34   SPELL_NONE
  35 };
  36
  37 struct token_spelling
  38 {
  39   enum spell_type category;
  40   const unsigned char *name;
  41 };
  42
  43 static const unsigned char *const digraph_spellings[] =
  44 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
  45
  46 #define OP(e, s) { SPELL_OPERATOR, U s           },
  47 #define TK(e, s) { s,              U STRINGX (e) },
  48 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  49 #undef OP
  50 #undef TK
  51
  52 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  53 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  54
  55 static void add_line_note PARAMS ((cpp_buffer *, const uchar *, unsigned int));
  56 static int skip_line_comment PARAMS ((cpp_reader *));
  57 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  58 static cpp_hashnode *lex_identifier PARAMS ((cpp_reader *, const uchar *));
  59 static void lex_number PARAMS ((cpp_reader *, cpp_string *));
  60 static bool forms_identifier_p PARAMS ((cpp_reader *, int));
  61 static void lex_string PARAMS ((cpp_reader *, cpp_token *, const uchar *));
  62 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
  63                                   cppchar_t));
  64 static void create_literal PARAMS ((cpp_reader *, cpp_token *, const uchar *,
  65                                     unsigned int, enum cpp_ttype));
  66 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
  67 static cppchar_t maybe_read_ucn PARAMS ((cpp_reader *, const uchar **));
  68 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
  69
  70 static unsigned int hex_digit_value PARAMS ((unsigned int));
  71 static _cpp_buff *new_buff PARAMS ((size_t));
  72
  73
  74 /* Utility routine:
  75
  76    Compares, the token TOKEN to the NUL-terminated string STRING.
  77    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  78 int
  79 cpp_ideq (token, string)
  80      const cpp_token *token;
  81      const char *string;
  82 {
  83   if (token->type != CPP_NAME)
  84     return 0;
  85
  86   return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
  87 }
  88
  89 /* Record a note TYPE at byte POS into the current cleaned logical
  90    line.  */
  91 static void
  92 add_line_note (buffer, pos, type)
  93      cpp_buffer *buffer;
  94      const uchar *pos;
  95      unsigned int type;
  96 {
  97   if (buffer->notes_used == buffer->notes_cap)
  98     {
  99       buffer->notes_cap = buffer->notes_cap * 2 + 200;
 100       buffer->notes = (_cpp_line_note *)
 101         xrealloc (buffer->notes, buffer->notes_cap * sizeof (_cpp_line_note));
 102     }
 103
 104   buffer->notes[buffer->notes_used].pos = pos;
 105   buffer->notes[buffer->notes_used].type = type;
 106   buffer->notes_used++;
 107 }
 108
 109 /* Returns with a logical line that contains no escaped newlines or
 110    trigraphs.  This is a time-critical inner loop.  */
 111 void
 112 _cpp_clean_line (pfile)
 113      cpp_reader *pfile;
 114 {
 115   cpp_buffer *buffer;
 116   const uchar *s;
 117   uchar c, *d, *p;
 118
 119   buffer = pfile->buffer;
 120   buffer->cur_note = buffer->notes_used = 0;
 121   buffer->cur = buffer->line_base = buffer->next_line;
 122   buffer->need_line = false;
 123   s = buffer->next_line - 1;
 124
 125   if (!buffer->from_stage3)
 126     {
 127       d = (uchar *) s;
 128
 129       for (;;)
 130         {
 131           c = *++s;
 132           *++d = c;
 133
 134           if (c == '\n' || c == '\r')
 135             {
 136                   /* Handle DOS line endings.  */
 137               if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
 138                 s++;
 139               if (s == buffer->rlimit)
 140                 break;
 141
 142               /* Escaped?  */
 143               p = d;
 144               while (p != buffer->next_line && is_nvspace (p[-1]))
 145                 p--;
 146               if (p == buffer->next_line || p[-1] != '\\')
 147                 break;
 148
 149               add_line_note (buffer, p - 1, p != d ? ' ': '\\');
 150               d = p - 2;
 151               buffer->next_line = p - 1;
 152             }
 153           else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
 154             {
 155               /* Add a note regardless, for the benefit of -Wtrigraphs.  */
 156               add_line_note (buffer, d, s[2]);
 157               if (CPP_OPTION (pfile, trigraphs))
 158                 {
 159                   *d = _cpp_trigraph_map[s[2]];
 160                   s += 2;
 161                 }
 162             }
 163         }
 164     }
 165   else
 166     {
 167       do
 168         s++;
 169       while (*s != '\n' && *s != '\r');
 170       d = (uchar *) s;
 171
 172       /* Handle DOS line endings.  */
 173       if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
 174         s++;
 175     }
 176
 177   *d = '\n';
 178   /* A sentinel note that should never be processed.  */
 179   add_line_note (buffer, d + 1, '\n');
 180   buffer->next_line = s + 1;
 181 }
 182
 183 /* Process the notes created by add_line_note as far as the current
 184    location.  */
 185 void
 186 _cpp_process_line_notes (pfile, in_comment)
 187      cpp_reader *pfile;
 188      int in_comment;
 189 {
 190   cpp_buffer *buffer = pfile->buffer;
 191
 192   for (;;)
 193     {
 194       _cpp_line_note *note = &buffer->notes[buffer->cur_note];
 195       unsigned int col;
 196
 197       if (note->pos > buffer->cur)
 198         break;
 199
 200       buffer->cur_note++;
 201       col = CPP_BUF_COLUMN (buffer, note->pos + 1);
 202
 203       if (note->type == '\\' || note->type == ' ')
 204         {
 205           if (note->type == ' ' && !in_comment)
 206             cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
 207                                  "backslash and newline separated by space");
 208
 209           if (buffer->next_line > buffer->rlimit)
 210             {
 211               cpp_error_with_line (pfile, DL_PEDWARN, pfile->line, col,
 212                                    "backslash-newline at end of file");
 213               /* Prevent "no newline at end of file" warning.  */
 214               buffer->next_line = buffer->rlimit;
 215             }
 216
 217           buffer->line_base = note->pos;
 218           pfile->line++;
 219         }
 220       else if (_cpp_trigraph_map[note->type])
 221         {
 222           if (!in_comment && CPP_OPTION (pfile, warn_trigraphs))
 223             {
 224               if (CPP_OPTION (pfile, trigraphs))
 225                 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
 226                                      "trigraph ??%c converted to %c",
 227                                      note->type,
 228                                      (int) _cpp_trigraph_map[note->type]);
 229               else
 230                 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
 231                                      "trigraph ??%c ignored",
 232                                      note->type);
 233             }
 234         }
 235       else
 236         abort ();
 237     }
 238 }
 239
 240 /* Skip a C-style block comment.  We find the end of the comment by
 241    seeing if an asterisk is before every '/' we encounter.  Returns
 242    nonzero if comment terminated by EOF, zero otherwise.
 243
 244    Buffer->cur points to the initial asterisk of the comment.  */
 245 bool
 246 _cpp_skip_block_comment (pfile)
 247      cpp_reader *pfile;
 248 {
 249   cpp_buffer *buffer = pfile->buffer;
 250   cppchar_t c;
 251
 252   buffer->cur++;
 253   if (*buffer->cur == '/')
 254     buffer->cur++;
 255
 256   for (;;)
 257     {
 258       c = *buffer->cur++;
 259
 260       /* People like decorating comments with '*', so check for '/'
 261          instead for efficiency.  */
 262       if (c == '/')
 263         {
 264           if (buffer->cur[-2] == '*')
 265             break;
 266
 267           /* Warn about potential nested comments, but not if the '/'
 268              comes immediately before the true comment delimiter.
 269              Don't bother to get it right across escaped newlines.  */
 270           if (CPP_OPTION (pfile, warn_comments)
 271               && buffer->cur[0] == '*' && buffer->cur[1] != '/')
 272             cpp_error_with_line (pfile, DL_WARNING,
 273                                  pfile->line, CPP_BUF_COL (buffer),
 274                                  "\"/*\" within comment");
 275         }
 276       else if (c == '\n')
 277         {
 278           buffer->cur--;
 279           _cpp_process_line_notes (pfile, true);
 280           if (buffer->next_line >= buffer->rlimit)
 281             return true;
 282           _cpp_clean_line (pfile);
 283           pfile->line++;
 284         }
 285     }
 286
 287   return false;
 288 }
 289
 290 /* Skip a C++ line comment, leaving buffer->cur pointing to the
 291    terminating newline.  Handles escaped newlines.  Returns nonzero
 292    if a multiline comment.  */
 293 static int
 294 skip_line_comment (pfile)
 295      cpp_reader *pfile;
 296 {
 297   cpp_buffer *buffer = pfile->buffer;
 298   unsigned int orig_line = pfile->line;
 299
 300   while (*buffer->cur != '\n')
 301     buffer->cur++;
 302
 303   _cpp_process_line_notes (pfile, true);
 304   return orig_line != pfile->line;
 305 }
 306
 307 /* Skips whitespace, saving the next non-whitespace character.  */
 308 static void
 309 skip_whitespace (pfile, c)
 310      cpp_reader *pfile;
 311      cppchar_t c;
 312 {
 313   cpp_buffer *buffer = pfile->buffer;
 314   bool saw_NUL = false;
 315
 316   do
 317     {
 318       /* Horizontal space always OK.  */
 319       if (c == ' ' || c == '\t')
 320         ;
 321       /* Just \f \v or \0 left.  */
 322       else if (c == '\0')
 323         saw_NUL = true;
 324       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 325         cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
 326                              CPP_BUF_COL (buffer),
 327                              "%s in preprocessing directive",
 328                              c == '\f' ? "form feed" : "vertical tab");
 329
 330       c = *buffer->cur++;
 331     }
 332   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 333   while (is_nvspace (c));
 334
 335   if (saw_NUL)
 336     cpp_error (pfile, DL_WARNING, "null character(s) ignored");
 337
 338   buffer->cur--;
 339 }
 340
 341 /* See if the characters of a number token are valid in a name (no
 342    '.', '+' or '-').  */
 343 static int
 344 name_p (pfile, string)
 345      cpp_reader *pfile;
 346      const cpp_string *string;
 347 {
 348   unsigned int i;
 349
 350   for (i = 0; i < string->len; i++)
 351     if (!is_idchar (string->text[i]))
 352       return 0;
 353
 354   return 1;
 355 }
 356
 357 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
 358    an identifier.  FIRST is TRUE if this starts an identifier.  */
 359 static bool
 360 forms_identifier_p (pfile, first)
 361      cpp_reader *pfile;
 362      int first;
 363 {
 364   cpp_buffer *buffer = pfile->buffer;
 365
 366   if (*buffer->cur == '$')
 367     {
 368       if (!CPP_OPTION (pfile, dollars_in_ident))
 369         return false;
 370
 371       buffer->cur++;
 372       if (CPP_PEDANTIC (pfile)
 373           && !pfile->state.skipping
 374           && !pfile->warned_dollar)
 375         {
 376           pfile->warned_dollar = true;
 377           cpp_error (pfile, DL_PEDWARN, "'$' in identifier or number");
 378         }
 379
 380       return true;
 381     }
 382
 383   /* Is this a syntactically valid UCN?  */
 384   if (0 && *buffer->cur == '\\'
 385       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
 386     {
 387       buffer->cur += 2;
 388       if (_cpp_valid_ucn (pfile, &buffer->cur, 1 + !first))
 389         return true;
 390       buffer->cur -= 2;
 391     }
 392
 393   return false;
 394 }
 395
 396 /* Lex an identifier starting at BUFFER->CUR - 1.  */
 397 static cpp_hashnode *
 398 lex_identifier (pfile, base)
 399      cpp_reader *pfile;
 400      const uchar *base;
 401 {
 402   cpp_hashnode *result;
 403   const uchar *cur;
 404
 405   do
 406     {
 407       cur = pfile->buffer->cur;
 408
 409       /* N.B. ISIDNUM does not include $.  */
 410       while (ISIDNUM (*cur))
 411         cur++;
 412
 413       pfile->buffer->cur = cur;
 414     }
 415   while (forms_identifier_p (pfile, false));
 416
 417   result = (cpp_hashnode *)
 418     ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
 419
 420   /* Rarely, identifiers require diagnostics when lexed.  */
 421   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 422                         && !pfile->state.skipping, 0))
 423     {
 424       /* It is allowed to poison the same identifier twice.  */
 425       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 426         cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
 427                    NODE_NAME (result));
 428
 429       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 430          replacement list of a variadic macro.  */
 431       if (result == pfile->spec_nodes.n__VA_ARGS__
 432           && !pfile->state.va_args_ok)
 433         cpp_error (pfile, DL_PEDWARN,
 434         "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 435     }
 436
 437   return result;
 438 }
 439
 440 /* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
 441 static void
 442 lex_number (pfile, number)
 443      cpp_reader *pfile;
 444      cpp_string *number;
 445 {
 446   const uchar *cur;
 447   const uchar *base;
 448   uchar *dest;
 449
 450   base = pfile->buffer->cur - 1;
 451   do
 452     {
 453       cur = pfile->buffer->cur;
 454
 455       /* N.B. ISIDNUM does not include $.  */
 456       while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
 457         cur++;
 458
 459       pfile->buffer->cur = cur;
 460     }
 461   while (forms_identifier_p (pfile, false));
 462
 463   number->len = cur - base;
 464   dest = _cpp_unaligned_alloc (pfile, number->len + 1);
 465   memcpy (dest, base, number->len);
 466   dest[number->len] = '\0';
 467   number->text = dest;
 468 }
 469
 470 /* Create a token of type TYPE with a literal spelling.  */
 471 static void
 472 create_literal (pfile, token, base, len, type)
 473      cpp_reader *pfile;
 474      cpp_token *token;
 475      const uchar *base;
 476      unsigned int len;
 477      enum cpp_ttype type;
 478 {
 479   uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
 480
 481   memcpy (dest, base, len);
 482   dest[len] = '\0';
 483   token->type = type;
 484   token->val.str.len = len;
 485   token->val.str.text = dest;
 486 }
 487
 488 /* Lexes a string, character constant, or angle-bracketed header file
 489    name.  The stored string contains the spelling, including opening
 490    quote and leading any leading 'L'.  It returns the type of the
 491    literal, or CPP_OTHER if it was not properly terminated.
 492
 493    The spelling is NUL-terminated, but it is not guaranteed that this
 494    is the first NUL since embedded NULs are preserved.  */
 495 static void
 496 lex_string (pfile, token, base)
 497      cpp_reader *pfile;
 498      cpp_token *token;
 499      const uchar *base;
 500 {
 501   bool saw_NUL = false;
 502   const uchar *cur;
 503   cppchar_t terminator;
 504   enum cpp_ttype type;
 505
 506   cur = base;
 507   terminator = *cur++;
 508   if (terminator == 'L')
 509     terminator = *cur++;
 510   if (terminator == '\"')
 511     type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
 512   else if (terminator == '\'')
 513     type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
 514   else
 515     terminator = '>', type = CPP_HEADER_NAME;
 516
 517   for (;;)
 518     {
 519       cppchar_t c = *cur++;
 520
 521       /* In #include-style directives, terminators are not escapable.  */
 522       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
 523         cur++;
 524       else if (c == terminator)
 525         break;
 526       else if (c == '\n')
 527         {
 528           cur--;
 529           type = CPP_OTHER;
 530           break;
 531         }
 532       else if (c == '\0')
 533         saw_NUL = true;
 534     }
 535
 536   if (saw_NUL && !pfile->state.skipping)
 537     cpp_error (pfile, DL_WARNING, "null character(s) preserved in literal");
 538
 539   pfile->buffer->cur = cur;
 540   create_literal (pfile, token, base, cur - base, type);
 541 }
 542
 543 /* The stored comment includes the comment start and any terminator.  */
 544 static void
 545 save_comment (pfile, token, from, type)
 546      cpp_reader *pfile;
 547      cpp_token *token;
 548      const unsigned char *from;
 549      cppchar_t type;
 550 {
 551   unsigned char *buffer;
 552   unsigned int len, clen;
 553
 554   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 555
 556   /* C++ comments probably (not definitely) have moved past a new
 557      line, which we don't want to save in the comment.  */
 558   if (is_vspace (pfile->buffer->cur[-1]))
 559     len--;
 560
 561   /* If we are currently in a directive, then we need to store all
 562      C++ comments as C comments internally, and so we need to
 563      allocate a little extra space in that case.
 564
 565      Note that the only time we encounter a directive here is
 566      when we are saving comments in a "#define".  */
 567   clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
 568
 569   buffer = _cpp_unaligned_alloc (pfile, clen);
 570
 571   token->type = CPP_COMMENT;
 572   token->val.str.len = clen;
 573   token->val.str.text = buffer;
 574
 575   buffer[0] = '/';
 576   memcpy (buffer + 1, from, len - 1);
 577
 578   /* Finish conversion to a C comment, if necessary.  */
 579   if (pfile->state.in_directive && type == '/')
 580     {
 581       buffer[1] = '*';
 582       buffer[clen - 2] = '*';
 583       buffer[clen - 1] = '/';
 584     }
 585 }
 586
 587 /* Allocate COUNT tokens for RUN.  */
 588 void
 589 _cpp_init_tokenrun (run, count)
 590      tokenrun *run;
 591      unsigned int count;
 592 {
 593   run->base = xnewvec (cpp_token, count);
 594   run->limit = run->base + count;
 595   run->next = NULL;
 596 }
 597
 598 /* Returns the next tokenrun, or creates one if there is none.  */
 599 static tokenrun *
 600 next_tokenrun (run)
 601      tokenrun *run;
 602 {
 603   if (run->next == NULL)
 604     {
 605       run->next = xnew (tokenrun);
 606       run->next->prev = run;
 607       _cpp_init_tokenrun (run->next, 250);
 608     }
 609
 610   return run->next;
 611 }
 612
 613 /* Allocate a single token that is invalidated at the same time as the
 614    rest of the tokens on the line.  Has its line and col set to the
 615    same as the last lexed token, so that diagnostics appear in the
 616    right place.  */
 617 cpp_token *
 618 _cpp_temp_token (pfile)
 619      cpp_reader *pfile;
 620 {
 621   cpp_token *old, *result;
 622
 623   old = pfile->cur_token - 1;
 624   if (pfile->cur_token == pfile->cur_run->limit)
 625     {
 626       pfile->cur_run = next_tokenrun (pfile->cur_run);
 627       pfile->cur_token = pfile->cur_run->base;
 628     }
 629
 630   result = pfile->cur_token++;
 631   result->line = old->line;
 632   result->col = old->col;
 633   return result;
 634 }
 635
 636 /* Lex a token into RESULT (external interface).  Takes care of issues
 637    like directive handling, token lookahead, multiple include
 638    optimization and skipping.  */
 639 const cpp_token *
 640 _cpp_lex_token (pfile)
 641      cpp_reader *pfile;
 642 {
 643   cpp_token *result;
 644
 645   for (;;)
 646     {
 647       if (pfile->cur_token == pfile->cur_run->limit)
 648         {
 649           pfile->cur_run = next_tokenrun (pfile->cur_run);
 650           pfile->cur_token = pfile->cur_run->base;
 651         }
 652
 653       if (pfile->lookaheads)
 654         {
 655           pfile->lookaheads--;
 656           result = pfile->cur_token++;
 657         }
 658       else
 659         result = _cpp_lex_direct (pfile);
 660
 661       if (result->flags & BOL)
 662         {
 663           /* Is this a directive.  If _cpp_handle_directive returns
 664              false, it is an assembler #.  */
 665           if (result->type == CPP_HASH
 666               /* 6.10.3 p 11: Directives in a list of macro arguments
 667                  gives undefined behavior.  This implementation
 668                  handles the directive as normal.  */
 669               && pfile->state.parsing_args != 1
 670               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
 671             continue;
 672           if (pfile->cb.line_change && !pfile->state.skipping)
 673             (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
 674         }
 675
 676       /* We don't skip tokens in directives.  */
 677       if (pfile->state.in_directive)
 678         break;
 679
 680       /* Outside a directive, invalidate controlling macros.  At file
 681          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
 682          get here and MI optimisation works.  */
 683       pfile->mi_valid = false;
 684
 685       if (!pfile->state.skipping || result->type == CPP_EOF)
 686         break;
 687     }
 688
 689   return result;
 690 }
 691
 692 /* Returns true if a fresh line has been loaded.  */
 693 bool
 694 _cpp_get_fresh_line (pfile)
 695      cpp_reader *pfile;
 696 {
 697   /* We can't get a new line until we leave the current directive.  */
 698   if (pfile->state.in_directive)
 699     return false;
 700
 701   for (;;)
 702     {
 703       cpp_buffer *buffer = pfile->buffer;
 704
 705       if (!buffer->need_line)
 706         return true;
 707
 708       if (buffer->next_line < buffer->rlimit)
 709         {
 710           _cpp_clean_line (pfile);
 711           return true;
 712         }
 713
 714       /* First, get out of parsing arguments state.  */
 715       if (pfile->state.parsing_args)
 716         return false;
 717
 718       /* End of buffer.  Non-empty files should end in a newline.  */
 719       if (buffer->buf != buffer->rlimit
 720           && buffer->next_line > buffer->rlimit
 721           && !buffer->from_stage3)
 722         {
 723           /* Only warn once.  */
 724           buffer->next_line = buffer->rlimit;
 725           cpp_error_with_line (pfile, DL_PEDWARN, pfile->line - 1,
 726                                CPP_BUF_COLUMN (buffer, buffer->cur),
 727                                "no newline at end of file");
 728         }
 729
 730       if (!buffer->prev)
 731         return false;
 732
 733       if (buffer->return_at_eof)
 734         {
 735           _cpp_pop_buffer (pfile);
 736           return false;
 737         }
 738
 739       _cpp_pop_buffer (pfile);
 740     }
 741 }
 742
 743 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)          \
 744   do                                                    \
 745     {                                                   \
 746       result->type = ELSE_TYPE;                         \
 747       if (*buffer->cur == CHAR)                         \
 748         buffer->cur++, result->type = THEN_TYPE;        \
 749     }                                                   \
 750   while (0)
 751
 752 /* Lex a token into pfile->cur_token, which is also incremented, to
 753    get diagnostics pointing to the correct location.
 754
 755    Does not handle issues such as token lookahead, multiple-include
 756    optimisation, directives, skipping etc.  This function is only
 757    suitable for use by _cpp_lex_token, and in special cases like
 758    lex_expansion_token which doesn't care for any of these issues.
 759
 760    When meeting a newline, returns CPP_EOF if parsing a directive,
 761    otherwise returns to the start of the token buffer if permissible.
 762    Returns the location of the lexed token.  */
 763 cpp_token *
 764 _cpp_lex_direct (pfile)
 765      cpp_reader *pfile;
 766 {
 767   cppchar_t c;
 768   cpp_buffer *buffer;
 769   const unsigned char *comment_start;
 770   cpp_token *result = pfile->cur_token++;
 771
 772  fresh_line:
 773   result->flags = 0;
 774   if (pfile->buffer->need_line)
 775     {
 776       if (!_cpp_get_fresh_line (pfile))
 777         {
 778           result->type = CPP_EOF;
 779           if (!pfile->state.in_directive)
 780             {
 781               /* Tell the compiler the line number of the EOF token.  */
 782               result->line = pfile->line;
 783               result->flags = BOL;
 784             }
 785           return result;
 786         }
 787       if (!pfile->keep_tokens)
 788         {
 789           pfile->cur_run = &pfile->base_run;
 790           result = pfile->base_run.base;
 791           pfile->cur_token = result + 1;
 792         }
 793       result->flags = BOL;
 794       if (pfile->state.parsing_args == 2)
 795         result->flags |= PREV_WHITE;
 796     }
 797   buffer = pfile->buffer;
 798  update_tokens_line:
 799   result->line = pfile->line;
 800
 801  skipped_white:
 802   if (buffer->cur >= buffer->notes[buffer->cur_note].pos
 803       && !pfile->overlaid_buffer)
 804     {
 805       _cpp_process_line_notes (pfile, false);
 806       result->line = pfile->line;
 807     }
 808   c = *buffer->cur++;
 809   result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
 810
 811   switch (c)
 812     {
 813     case ' ': case '\t': case '\f': case '\v': case '\0':
 814       result->flags |= PREV_WHITE;
 815       skip_whitespace (pfile, c);
 816       goto skipped_white;
 817
 818     case '\n':
 819       pfile->line++;
 820       buffer->need_line = true;
 821       goto fresh_line;
 822
 823     case '0': case '1': case '2': case '3': case '4':
 824     case '5': case '6': case '7': case '8': case '9':
 825       result->type = CPP_NUMBER;
 826       lex_number (pfile, &result->val.str);
 827       break;
 828
 829     case 'L':
 830       /* 'L' may introduce wide characters or strings.  */
 831       if (*buffer->cur == '\'' || *buffer->cur == '"')
 832         {
 833           lex_string (pfile, result, buffer->cur - 1);
 834           break;
 835         }
 836       /* Fall through.  */
 837
 838     case '_':
 839     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 840     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 841     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 842     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 843     case 'y': case 'z':
 844     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 845     case 'G': case 'H': case 'I': case 'J': case 'K':
 846     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 847     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 848     case 'Y': case 'Z':
 849       result->type = CPP_NAME;
 850       result->val.node = lex_identifier (pfile, buffer->cur - 1);
 851
 852       /* Convert named operators to their proper types.  */
 853       if (result->val.node->flags & NODE_OPERATOR)
 854         {
 855           result->flags |= NAMED_OP;
 856           result->type = result->val.node->directive_index;
 857         }
 858       break;
 859
 860     case '\'':
 861     case '"':
 862       lex_string (pfile, result, buffer->cur - 1);
 863       break;
 864
 865     case '/':
 866       /* A potential block or line comment.  */
 867       comment_start = buffer->cur;
 868       c = *buffer->cur;
 869
 870       if (c == '*')
 871         {
 872           if (_cpp_skip_block_comment (pfile))
 873             cpp_error (pfile, DL_ERROR, "unterminated comment");
 874         }
 875       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
 876                             || CPP_IN_SYSTEM_HEADER (pfile)))
 877         {
 878           /* Warn about comments only if pedantically GNUC89, and not
 879              in system headers.  */
 880           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
 881               && ! buffer->warned_cplusplus_comments)
 882             {
 883               cpp_error (pfile, DL_PEDWARN,
 884                          "C++ style comments are not allowed in ISO C90");
 885               cpp_error (pfile, DL_PEDWARN,
 886                          "(this will be reported only once per input file)");
 887               buffer->warned_cplusplus_comments = 1;
 888             }
 889
 890           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
 891             cpp_error (pfile, DL_WARNING, "multi-line comment");
 892         }
 893       else if (c == '=')
 894         {
 895           buffer->cur++;
 896           result->type = CPP_DIV_EQ;
 897           break;
 898         }
 899       else
 900         {
 901           result->type = CPP_DIV;
 902           break;
 903         }
 904
 905       if (!pfile->state.save_comments)
 906         {
 907           result->flags |= PREV_WHITE;
 908           goto update_tokens_line;
 909         }
 910
 911       /* Save the comment as a token in its own right.  */
 912       save_comment (pfile, result, comment_start, c);
 913       break;
 914
 915     case '<':
 916       if (pfile->state.angled_headers)
 917         {
 918           lex_string (pfile, result, buffer->cur - 1);
 919           break;
 920         }
 921
 922       result->type = CPP_LESS;
 923       if (*buffer->cur == '=')
 924         buffer->cur++, result->type = CPP_LESS_EQ;
 925       else if (*buffer->cur == '<')
 926         {
 927           buffer->cur++;
 928           IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
 929         }
 930       else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
 931         {
 932           buffer->cur++;
 933           IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
 934         }
 935       else if (CPP_OPTION (pfile, digraphs))
 936         {
 937           if (*buffer->cur == ':')
 938             {
 939               buffer->cur++;
 940               result->flags |= DIGRAPH;
 941               result->type = CPP_OPEN_SQUARE;
 942             }
 943           else if (*buffer->cur == '%')
 944             {
 945               buffer->cur++;
 946               result->flags |= DIGRAPH;
 947               result->type = CPP_OPEN_BRACE;
 948             }
 949         }
 950       break;
 951
 952     case '>':
 953       result->type = CPP_GREATER;
 954       if (*buffer->cur == '=')
 955         buffer->cur++, result->type = CPP_GREATER_EQ;
 956       else if (*buffer->cur == '>')
 957         {
 958           buffer->cur++;
 959           IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
 960         }
 961       else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
 962         {
 963           buffer->cur++;
 964           IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
 965         }
 966       break;
 967
 968     case '%':
 969       result->type = CPP_MOD;
 970       if (*buffer->cur == '=')
 971         buffer->cur++, result->type = CPP_MOD_EQ;
 972       else if (CPP_OPTION (pfile, digraphs))
 973         {
 974           if (*buffer->cur == ':')
 975             {
 976               buffer->cur++;
 977               result->flags |= DIGRAPH;
 978               result->type = CPP_HASH;
 979               if (*buffer->cur == '%' && buffer->cur[1] == ':')
 980                 buffer->cur += 2, result->type = CPP_PASTE;
 981             }
 982           else if (*buffer->cur == '>')
 983             {
 984               buffer->cur++;
 985               result->flags |= DIGRAPH;
 986               result->type = CPP_CLOSE_BRACE;
 987             }
 988         }
 989       break;
 990
 991     case '.':
 992       result->type = CPP_DOT;
 993       if (ISDIGIT (*buffer->cur))
 994         {
 995           result->type = CPP_NUMBER;
 996           lex_number (pfile, &result->val.str);
 997         }
 998       else if (*buffer->cur == '.' && buffer->cur[1] == '.')
 999         buffer->cur += 2, result->type = CPP_ELLIPSIS;
1000       else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1001         buffer->cur++, result->type = CPP_DOT_STAR;
1002       break;
1003
1004     case '+':
1005       result->type = CPP_PLUS;
1006       if (*buffer->cur == '+')
1007         buffer->cur++, result->type = CPP_PLUS_PLUS;
1008       else if (*buffer->cur == '=')
1009         buffer->cur++, result->type = CPP_PLUS_EQ;
1010       break;
1011
1012     case '-':
1013       result->type = CPP_MINUS;
1014       if (*buffer->cur == '>')
1015         {
1016           buffer->cur++;
1017           result->type = CPP_DEREF;
1018           if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1019             buffer->cur++, result->type = CPP_DEREF_STAR;
1020         }
1021       else if (*buffer->cur == '-')
1022         buffer->cur++, result->type = CPP_MINUS_MINUS;
1023       else if (*buffer->cur == '=')
1024         buffer->cur++, result->type = CPP_MINUS_EQ;
1025       break;
1026
1027     case '&':
1028       result->type = CPP_AND;
1029       if (*buffer->cur == '&')
1030         buffer->cur++, result->type = CPP_AND_AND;
1031       else if (*buffer->cur == '=')
1032         buffer->cur++, result->type = CPP_AND_EQ;
1033       break;
1034
1035     case '|':
1036       result->type = CPP_OR;
1037       if (*buffer->cur == '|')
1038         buffer->cur++, result->type = CPP_OR_OR;
1039       else if (*buffer->cur == '=')
1040         buffer->cur++, result->type = CPP_OR_EQ;
1041       break;
1042
1043     case ':':
1044       result->type = CPP_COLON;
1045       if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1046         buffer->cur++, result->type = CPP_SCOPE;
1047       else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1048         {
1049           buffer->cur++;
1050           result->flags |= DIGRAPH;
1051           result->type = CPP_CLOSE_SQUARE;
1052         }
1053       break;
1054
1055     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1056     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1057     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1058     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1059     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1060
1061     case '?': result->type = CPP_QUERY; break;
1062     case '~': result->type = CPP_COMPL; break;
1063     case ',': result->type = CPP_COMMA; break;
1064     case '(': result->type = CPP_OPEN_PAREN; break;
1065     case ')': result->type = CPP_CLOSE_PAREN; break;
1066     case '[': result->type = CPP_OPEN_SQUARE; break;
1067     case ']': result->type = CPP_CLOSE_SQUARE; break;
1068     case '{': result->type = CPP_OPEN_BRACE; break;
1069     case '}': result->type = CPP_CLOSE_BRACE; break;
1070     case ';': result->type = CPP_SEMICOLON; break;
1071
1072       /* @ is a punctuator in Objective-C.  */
1073     case '@': result->type = CPP_ATSIGN; break;
1074
1075     case '$':
1076     case '\\':
1077       {
1078         const uchar *base = --buffer->cur;
1079
1080         if (forms_identifier_p (pfile, true))
1081           {
1082             result->type = CPP_NAME;
1083             result->val.node = lex_identifier (pfile, base);
1084             break;
1085           }
1086         buffer->cur++;
1087       }
1088
1089     default:
1090       create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1091       break;
1092     }
1093
1094   return result;
1095 }
1096
1097 /* An upper bound on the number of bytes needed to spell TOKEN.
1098    Does not include preceding whitespace.  */
1099 unsigned int
1100 cpp_token_len (token)
1101      const cpp_token *token;
1102 {
1103   unsigned int len;
1104
1105   switch (TOKEN_SPELL (token))
1106     {
1107     default:            len = 4;                                break;
1108     case SPELL_LITERAL: len = token->val.str.len;               break;
1109     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1110     }
1111
1112   return len;
1113 }
1114
1115 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1116    already contain the enough space to hold the token's spelling.
1117    Returns a pointer to the character after the last character
1118    written.  */
1119 unsigned char *
1120 cpp_spell_token (pfile, token, buffer)
1121      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1122      const cpp_token *token;
1123      unsigned char *buffer;
1124 {
1125   switch (TOKEN_SPELL (token))
1126     {
1127     case SPELL_OPERATOR:
1128       {
1129         const unsigned char *spelling;
1130         unsigned char c;
1131
1132         if (token->flags & DIGRAPH)
1133           spelling
1134             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1135         else if (token->flags & NAMED_OP)
1136           goto spell_ident;
1137         else
1138           spelling = TOKEN_NAME (token);
1139
1140         while ((c = *spelling++) != '\0')
1141           *buffer++ = c;
1142       }
1143       break;
1144
1145     spell_ident:
1146     case SPELL_IDENT:
1147       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1148       buffer += NODE_LEN (token->val.node);
1149       break;
1150
1151     case SPELL_LITERAL:
1152       memcpy (buffer, token->val.str.text, token->val.str.len);
1153       buffer += token->val.str.len;
1154       break;
1155
1156     case SPELL_NONE:
1157       cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
1158       break;
1159     }
1160
1161   return buffer;
1162 }
1163
1164 /* Returns TOKEN spelt as a null-terminated string.  The string is
1165    freed when the reader is destroyed.  Useful for diagnostics.  */
1166 unsigned char *
1167 cpp_token_as_text (pfile, token)
1168      cpp_reader *pfile;
1169      const cpp_token *token;
1170 {
1171   unsigned int len = cpp_token_len (token) + 1;
1172   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1173
1174   end = cpp_spell_token (pfile, token, start);
1175   end[0] = '\0';
1176
1177   return start;
1178 }
1179
1180 /* Used by C front ends, which really should move to using
1181    cpp_token_as_text.  */
1182 const char *
1183 cpp_type2name (type)
1184      enum cpp_ttype type;
1185 {
1186   return (const char *) token_spellings[type].name;
1187 }
1188
1189 /* Writes the spelling of token to FP, without any preceding space.
1190    Separated from cpp_spell_token for efficiency - to avoid stdio
1191    double-buffering.  */
1192 void
1193 cpp_output_token (token, fp)
1194      const cpp_token *token;
1195      FILE *fp;
1196 {
1197   switch (TOKEN_SPELL (token))
1198     {
1199     case SPELL_OPERATOR:
1200       {
1201         const unsigned char *spelling;
1202         int c;
1203
1204         if (token->flags & DIGRAPH)
1205           spelling
1206             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1207         else if (token->flags & NAMED_OP)
1208           goto spell_ident;
1209         else
1210           spelling = TOKEN_NAME (token);
1211
1212         c = *spelling;
1213         do
1214           putc (c, fp);
1215         while ((c = *++spelling) != '\0');
1216       }
1217       break;
1218
1219     spell_ident:
1220     case SPELL_IDENT:
1221       fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1222     break;
1223
1224     case SPELL_LITERAL:
1225       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1226       break;
1227
1228     case SPELL_NONE:
1229       /* An error, most probably.  */
1230       break;
1231     }
1232 }
1233
1234 /* Compare two tokens.  */
1235 int
1236 _cpp_equiv_tokens (a, b)
1237      const cpp_token *a, *b;
1238 {
1239   if (a->type == b->type && a->flags == b->flags)
1240     switch (TOKEN_SPELL (a))
1241       {
1242       default:                  /* Keep compiler happy.  */
1243       case SPELL_OPERATOR:
1244         return 1;
1245       case SPELL_NONE:
1246         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1247       case SPELL_IDENT:
1248         return a->val.node == b->val.node;
1249       case SPELL_LITERAL:
1250         return (a->val.str.len == b->val.str.len
1251                 && !memcmp (a->val.str.text, b->val.str.text,
1252                             a->val.str.len));
1253       }
1254
1255   return 0;
1256 }
1257
1258 /* Returns nonzero if a space should be inserted to avoid an
1259    accidental token paste for output.  For simplicity, it is
1260    conservative, and occasionally advises a space where one is not
1261    needed, e.g. "." and ".2".  */
1262 int
1263 cpp_avoid_paste (pfile, token1, token2)
1264      cpp_reader *pfile;
1265      const cpp_token *token1, *token2;
1266 {
1267   enum cpp_ttype a = token1->type, b = token2->type;
1268   cppchar_t c;
1269
1270   if (token1->flags & NAMED_OP)
1271     a = CPP_NAME;
1272   if (token2->flags & NAMED_OP)
1273     b = CPP_NAME;
1274
1275   c = EOF;
1276   if (token2->flags & DIGRAPH)
1277     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1278   else if (token_spellings[b].category == SPELL_OPERATOR)
1279     c = token_spellings[b].name[0];
1280
1281   /* Quickly get everything that can paste with an '='.  */
1282   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1283     return 1;
1284
1285   switch (a)
1286     {
1287     case CPP_GREATER:   return c == '>' || c == '?';
1288     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1289     case CPP_PLUS:      return c == '+';
1290     case CPP_MINUS:     return c == '-' || c == '>';
1291     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1292     case CPP_MOD:       return c == ':' || c == '>';
1293     case CPP_AND:       return c == '&';
1294     case CPP_OR:        return c == '|';
1295     case CPP_COLON:     return c == ':' || c == '>';
1296     case CPP_DEREF:     return c == '*';
1297     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1298     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1299     case CPP_NAME:      return ((b == CPP_NUMBER
1300                                  && name_p (pfile, &token2->val.str))
1301                                 || b == CPP_NAME
1302                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1303     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1304                                 || c == '.' || c == '+' || c == '-');
1305                                       /* UCNs */
1306     case CPP_OTHER:     return ((token1->val.str.text[0] == '\\'
1307                                  && b == CPP_NAME)
1308                                 || (CPP_OPTION (pfile, objc)
1309                                     && token1->val.str.text[0] == '@'
1310                                     && (b == CPP_NAME || b == CPP_STRING)));
1311     default:            break;
1312     }
1313
1314   return 0;
1315 }
1316
1317 /* Output all the remaining tokens on the current line, and a newline
1318    character, to FP.  Leading whitespace is removed.  If there are
1319    macros, special token padding is not performed.  */
1320 void
1321 cpp_output_line (pfile, fp)
1322      cpp_reader *pfile;
1323      FILE *fp;
1324 {
1325   const cpp_token *token;
1326
1327   token = cpp_get_token (pfile);
1328   while (token->type != CPP_EOF)
1329     {
1330       cpp_output_token (token, fp);
1331       token = cpp_get_token (pfile);
1332       if (token->flags & PREV_WHITE)
1333         putc (' ', fp);
1334     }
1335
1336   putc ('\n', fp);
1337 }
1338
1339 /* Returns the value of a hexadecimal digit.  */
1340 static unsigned int
1341 hex_digit_value (c)
1342      unsigned int c;
1343 {
1344   if (hex_p (c))
1345     return hex_value (c);
1346   else
1347     abort ();
1348 }
1349
1350 /* Read a possible universal character name starting at *PSTR.  */
1351 static cppchar_t
1352 maybe_read_ucn (pfile, pstr)
1353      cpp_reader *pfile;
1354      const uchar **pstr;
1355 {
1356   cppchar_t result, c = (*pstr)[-1];
1357
1358   result = _cpp_valid_ucn (pfile, pstr, false);
1359   if (result)
1360     {
1361       if (CPP_WTRADITIONAL (pfile))
1362         cpp_error (pfile, DL_WARNING,
1363                    "the meaning of '\\%c' is different in traditional C",
1364                    (int) c);
1365
1366       if (CPP_OPTION (pfile, EBCDIC))
1367         {
1368           cpp_error (pfile, DL_ERROR,
1369                      "universal character with an EBCDIC target");
1370           result = 0x3f;  /* EBCDIC invalid character */
1371         }
1372     }
1373
1374   return result;
1375 }
1376
1377 /* Returns the value of an escape sequence, truncated to the correct
1378    target precision.  PSTR points to the input pointer, which is just
1379    after the backslash.  LIMIT is how much text we have.  WIDE is true
1380    if the escape sequence is part of a wide character constant or
1381    string literal.  Handles all relevant diagnostics.  */
1382 cppchar_t
1383 cpp_parse_escape (pfile, pstr, limit, wide)
1384      cpp_reader *pfile;
1385      const unsigned char **pstr;
1386      const unsigned char *limit;
1387      int wide;
1388 {
1389   /* Values of \a \b \e \f \n \r \t \v respectively.  */
1390   static const uchar ascii[]  = {  7,  8, 27, 12, 10, 13,  9, 11 };
1391   static const uchar ebcdic[] = { 47, 22, 39, 12, 21, 13,  5, 11 };
1392
1393   int unknown = 0;
1394   const unsigned char *str = *pstr, *charconsts;
1395   cppchar_t c, ucn, mask;
1396   unsigned int width;
1397
1398   if (CPP_OPTION (pfile, EBCDIC))
1399     charconsts = ebcdic;
1400   else
1401     charconsts = ascii;
1402
1403   if (wide)
1404     width = CPP_OPTION (pfile, wchar_precision);
1405   else
1406     width = CPP_OPTION (pfile, char_precision);
1407   if (width < BITS_PER_CPPCHAR_T)
1408     mask = ((cppchar_t) 1 << width) - 1;
1409   else
1410     mask = ~0;
1411
1412   c = *str++;
1413   switch (c)
1414     {
1415     case '\\': case '\'': case '"': case '?': break;
1416     case 'b': c = charconsts[1];  break;
1417     case 'f': c = charconsts[3];  break;
1418     case 'n': c = charconsts[4];  break;
1419     case 'r': c = charconsts[5];  break;
1420     case 't': c = charconsts[6];  break;
1421     case 'v': c = charconsts[7];  break;
1422
1423     case '(': case '{': case '[': case '%':
1424       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1425          '\%' is used to prevent SCCS from getting confused.  */
1426       unknown = CPP_PEDANTIC (pfile);
1427       break;
1428
1429     case 'a':
1430       if (CPP_WTRADITIONAL (pfile))
1431         cpp_error (pfile, DL_WARNING,
1432                    "the meaning of '\\a' is different in traditional C");
1433       c = charconsts[0];
1434       break;
1435
1436     case 'e': case 'E':
1437       if (CPP_PEDANTIC (pfile))
1438         cpp_error (pfile, DL_PEDWARN,
1439                    "non-ISO-standard escape sequence, '\\%c'", (int) c);
1440       c = charconsts[2];
1441       break;
1442
1443     case 'u': case 'U':
1444       ucn = maybe_read_ucn (pfile, &str);
1445       if (ucn)
1446         c = ucn;
1447       else
1448         unknown = true;
1449       break;
1450
1451     case 'x':
1452       if (CPP_WTRADITIONAL (pfile))
1453         cpp_error (pfile, DL_WARNING,
1454                    "the meaning of '\\x' is different in traditional C");
1455
1456       {
1457         cppchar_t i = 0, overflow = 0;
1458         int digits_found = 0;
1459
1460         while (str < limit)
1461           {
1462             c = *str;
1463             if (! ISXDIGIT (c))
1464               break;
1465             str++;
1466             overflow |= i ^ (i << 4 >> 4);
1467             i = (i << 4) + hex_digit_value (c);
1468             digits_found = 1;
1469           }
1470
1471         if (!digits_found)
1472           cpp_error (pfile, DL_ERROR,
1473                        "\\x used with no following hex digits");
1474
1475         if (overflow | (i != (i & mask)))
1476           {
1477             cpp_error (pfile, DL_PEDWARN,
1478                        "hex escape sequence out of range");
1479             i &= mask;
1480           }
1481         c = i;
1482       }
1483       break;
1484
1485     case '0':  case '1':  case '2':  case '3':
1486     case '4':  case '5':  case '6':  case '7':
1487       {
1488         size_t count = 0;
1489         cppchar_t i = c - '0';
1490
1491         while (str < limit && ++count < 3)
1492           {
1493             c = *str;
1494             if (c < '0' || c > '7')
1495               break;
1496             str++;
1497             i = (i << 3) + c - '0';
1498           }
1499
1500         if (i != (i & mask))
1501           {
1502             cpp_error (pfile, DL_PEDWARN,
1503                        "octal escape sequence out of range");
1504             i &= mask;
1505           }
1506         c = i;
1507       }
1508       break;
1509
1510     default:
1511       unknown = 1;
1512       break;
1513     }
1514
1515   if (unknown)
1516     {
1517       if (ISGRAPH (c))
1518         cpp_error (pfile, DL_PEDWARN,
1519                    "unknown escape sequence '\\%c'", (int) c);
1520       else
1521         cpp_error (pfile, DL_PEDWARN,
1522                    "unknown escape sequence: '\\%03o'", (int) c);
1523     }
1524
1525   if (c > mask)
1526     {
1527       cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for its type");
1528       c &= mask;
1529     }
1530
1531   *pstr = str;
1532   return c;
1533 }
1534
1535 /* Interpret a (possibly wide) character constant in TOKEN.
1536    WARN_MULTI warns about multi-character charconsts.  PCHARS_SEEN
1537    points to a variable that is filled in with the number of
1538    characters seen, and UNSIGNEDP to a variable that indicates whether
1539    the result has signed type.  */
1540 cppchar_t
1541 cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp)
1542      cpp_reader *pfile;
1543      const cpp_token *token;
1544      unsigned int *pchars_seen;
1545      int *unsignedp;
1546 {
1547   const unsigned char *str, *limit;
1548   unsigned int chars_seen = 0;
1549   size_t width, max_chars;
1550   cppchar_t c, mask, result = 0;
1551   bool unsigned_p;
1552
1553   str = token->val.str.text + 1 + (token->type == CPP_WCHAR);
1554   limit = token->val.str.text + token->val.str.len - 1;
1555
1556   if (token->type == CPP_CHAR)
1557     {
1558       width = CPP_OPTION (pfile, char_precision);
1559       max_chars = CPP_OPTION (pfile, int_precision) / width;
1560       unsigned_p = CPP_OPTION (pfile, unsigned_char);
1561     }
1562   else
1563     {
1564       width = CPP_OPTION (pfile, wchar_precision);
1565       max_chars = 1;
1566       unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
1567     }
1568
1569   if (width < BITS_PER_CPPCHAR_T)
1570     mask = ((cppchar_t) 1 << width) - 1;
1571   else
1572     mask = ~0;
1573
1574   while (str < limit)
1575     {
1576       c = *str++;
1577
1578       if (c == '\\')
1579         c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
1580
1581 #ifdef MAP_CHARACTER
1582       if (ISPRINT (c))
1583         c = MAP_CHARACTER (c);
1584 #endif
1585
1586       chars_seen++;
1587
1588       /* Truncate the character, scale the result and merge the two.  */
1589       c &= mask;
1590       if (width < BITS_PER_CPPCHAR_T)
1591         result = (result << width) | c;
1592       else
1593         result = c;
1594     }
1595
1596   if (chars_seen == 0)
1597     cpp_error (pfile, DL_ERROR, "empty character constant");
1598   else if (chars_seen > 1)
1599     {
1600       /* Multichar charconsts are of type int and therefore signed.  */
1601       unsigned_p = 0;
1602
1603       if (chars_seen > max_chars)
1604         {
1605           chars_seen = max_chars;
1606           cpp_error (pfile, DL_WARNING,
1607                      "character constant too long for its type");
1608         }
1609       else if (CPP_OPTION (pfile, warn_multichar))
1610         cpp_error (pfile, DL_WARNING, "multi-character character constant");
1611     }
1612
1613   /* Sign-extend or truncate the constant to cppchar_t.  The value is
1614      in WIDTH bits, but for multi-char charconsts it's value is the
1615      full target type's width.  */
1616   if (chars_seen > 1)
1617     width *= max_chars;
1618   if (width < BITS_PER_CPPCHAR_T)
1619     {
1620       mask = ((cppchar_t) 1 << width) - 1;
1621       if (unsigned_p || !(result & (1 << (width - 1))))
1622         result &= mask;
1623       else
1624         result |= ~mask;
1625     }
1626
1627   *pchars_seen = chars_seen;
1628   *unsignedp = unsigned_p;
1629   return result;
1630 }
1631
1632 /* Memory buffers.  Changing these three constants can have a dramatic
1633    effect on performance.  The values here are reasonable defaults,
1634    but might be tuned.  If you adjust them, be sure to test across a
1635    range of uses of cpplib, including heavy nested function-like macro
1636    expansion.  Also check the change in peak memory usage (NJAMD is a
1637    good tool for this).  */
1638 #define MIN_BUFF_SIZE 8000
1639 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1640 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1641         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1642
1643 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1644   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1645 #endif
1646
1647 /* Create a new allocation buffer.  Place the control block at the end
1648    of the buffer, so that buffer overflows will cause immediate chaos.  */
1649 static _cpp_buff *
1650 new_buff (len)
1651      size_t len;
1652 {
1653   _cpp_buff *result;
1654   unsigned char *base;
1655
1656   if (len < MIN_BUFF_SIZE)
1657     len = MIN_BUFF_SIZE;
1658   len = CPP_ALIGN (len);
1659
1660   base = xmalloc (len + sizeof (_cpp_buff));
1661   result = (_cpp_buff *) (base + len);
1662   result->base = base;
1663   result->cur = base;
1664   result->limit = base + len;
1665   result->next = NULL;
1666   return result;
1667 }
1668
1669 /* Place a chain of unwanted allocation buffers on the free list.  */
1670 void
1671 _cpp_release_buff (pfile, buff)
1672      cpp_reader *pfile;
1673      _cpp_buff *buff;
1674 {
1675   _cpp_buff *end = buff;
1676
1677   while (end->next)
1678     end = end->next;
1679   end->next = pfile->free_buffs;
1680   pfile->free_buffs = buff;
1681 }
1682
1683 /* Return a free buffer of size at least MIN_SIZE.  */
1684 _cpp_buff *
1685 _cpp_get_buff (pfile, min_size)
1686      cpp_reader *pfile;
1687      size_t min_size;
1688 {
1689   _cpp_buff *result, **p;
1690
1691   for (p = &pfile->free_buffs;; p = &(*p)->next)
1692     {
1693       size_t size;
1694
1695       if (*p == NULL)
1696         return new_buff (min_size);
1697       result = *p;
1698       size = result->limit - result->base;
1699       /* Return a buffer that's big enough, but don't waste one that's
1700          way too big.  */
1701       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1702         break;
1703     }
1704
1705   *p = result->next;
1706   result->next = NULL;
1707   result->cur = result->base;
1708   return result;
1709 }
1710
1711 /* Creates a new buffer with enough space to hold the uncommitted
1712    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
1713    the excess bytes to the new buffer.  Chains the new buffer after
1714    BUFF, and returns the new buffer.  */
1715 _cpp_buff *
1716 _cpp_append_extend_buff (pfile, buff, min_extra)
1717      cpp_reader *pfile;
1718      _cpp_buff *buff;
1719      size_t min_extra;
1720 {
1721   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1722   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1723
1724   buff->next = new_buff;
1725   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1726   return new_buff;
1727 }
1728
1729 /* Creates a new buffer with enough space to hold the uncommitted
1730    remaining bytes of the buffer pointed to by BUFF, and at least
1731    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
1732    Chains the new buffer before the buffer pointed to by BUFF, and
1733    updates the pointer to point to the new buffer.  */
1734 void
1735 _cpp_extend_buff (pfile, pbuff, min_extra)
1736      cpp_reader *pfile;
1737      _cpp_buff **pbuff;
1738      size_t min_extra;
1739 {
1740   _cpp_buff *new_buff, *old_buff = *pbuff;
1741   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1742
1743   new_buff = _cpp_get_buff (pfile, size);
1744   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1745   new_buff->next = old_buff;
1746   *pbuff = new_buff;
1747 }
1748
1749 /* Free a chain of buffers starting at BUFF.  */
1750 void
1751 _cpp_free_buff (buff)
1752      _cpp_buff *buff;
1753 {
1754   _cpp_buff *next;
1755
1756   for (; buff; buff = next)
1757     {
1758       next = buff->next;
1759       free (buff->base);
1760     }
1761 }
1762
1763 /* Allocate permanent, unaligned storage of length LEN.  */
1764 unsigned char *
1765 _cpp_unaligned_alloc (pfile, len)
1766      cpp_reader *pfile;
1767      size_t len;
1768 {
1769   _cpp_buff *buff = pfile->u_buff;
1770   unsigned char *result = buff->cur;
1771
1772   if (len > (size_t) (buff->limit - result))
1773     {
1774       buff = _cpp_get_buff (pfile, len);
1775       buff->next = pfile->u_buff;
1776       pfile->u_buff = buff;
1777       result = buff->cur;
1778     }
1779
1780   buff->cur = result + len;
1781   return result;
1782 }
1783
1784 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1785    That buffer is used for growing allocations when saving macro
1786    replacement lists in a #define, and when parsing an answer to an
1787    assertion in #assert, #unassert or #if (and therefore possibly
1788    whilst expanding macros).  It therefore must not be used by any
1789    code that they might call: specifically the lexer and the guts of
1790    the macro expander.
1791
1792    All existing other uses clearly fit this restriction: storing
1793    registered pragmas during initialization.  */
1794 unsigned char *
1795 _cpp_aligned_alloc (pfile, len)
1796      cpp_reader *pfile;
1797      size_t len;
1798 {
1799   _cpp_buff *buff = pfile->a_buff;
1800   unsigned char *result = buff->cur;
1801
1802   if (len > (size_t) (buff->limit - result))
1803     {
1804       buff = _cpp_get_buff (pfile, len);
1805       buff->next = pfile->a_buff;
1806       pfile->a_buff = buff;
1807       result = buff->cur;
1808     }
1809
1810   buff->cur = result + len;
1811   return result;
1812 }