libcpp/lex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000-2016 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7
   8 This program is free software; you can redistribute it and/or modify it
   9 under the terms of the GNU General Public License as published by the
  10 Free Software Foundation; either version 3, or (at your option) any
  11 later version.
  12
  13 This program is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with this program; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "cpplib.h"
  25 #include "internal.h"
  26
  27 enum spell_type
  28 {
  29   SPELL_OPERATOR = 0,
  30   SPELL_IDENT,
  31   SPELL_LITERAL,
  32   SPELL_NONE
  33 };
  34
  35 struct token_spelling
  36 {
  37   enum spell_type category;
  38   const unsigned char *name;
  39 };
  40
  41 static const unsigned char *const digraph_spellings[] =
  42 { UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
  43
  44 #define OP(e, s) { SPELL_OPERATOR, UC s  },
  45 #define TK(e, s) { SPELL_ ## s,    UC #e },
  46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  47 #undef OP
  48 #undef TK
  49
  50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  52
  53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
  54 static int skip_line_comment (cpp_reader *);
  55 static void skip_whitespace (cpp_reader *, cppchar_t);
  56 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
  57 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
  58 static void store_comment (cpp_reader *, cpp_token *);
  59 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
  60                             unsigned int, enum cpp_ttype);
  61 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
  62 static int name_p (cpp_reader *, const cpp_string *);
  63 static tokenrun *next_tokenrun (tokenrun *);
  64
  65 static _cpp_buff *new_buff (size_t);
  66
  67
  68 /* Utility routine:
  69
  70    Compares, the token TOKEN to the NUL-terminated string STRING.
  71    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  72 int
  73 cpp_ideq (const cpp_token *token, const char *string)
  74 {
  75   if (token->type != CPP_NAME)
  76     return 0;
  77
  78   return !ustrcmp (NODE_NAME (token->val.node.node), (const uchar *) string);
  79 }
  80
  81 /* Record a note TYPE at byte POS into the current cleaned logical
  82    line.  */
  83 static void
  84 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
  85 {
  86   if (buffer->notes_used == buffer->notes_cap)
  87     {
  88       buffer->notes_cap = buffer->notes_cap * 2 + 200;
  89       buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
  90                                   buffer->notes_cap);
  91     }
  92
  93   buffer->notes[buffer->notes_used].pos = pos;
  94   buffer->notes[buffer->notes_used].type = type;
  95   buffer->notes_used++;
  96 }
  97
  98 \f
  99 /* Fast path to find line special characters using optimized character
 100    scanning algorithms.  Anything complicated falls back to the slow
 101    path below.  Since this loop is very hot it's worth doing these kinds
 102    of optimizations.
 103
 104    One of the paths through the ifdefs should provide
 105
 106      const uchar *search_line_fast (const uchar *s, const uchar *end);
 107
 108    Between S and END, search for \n, \r, \\, ?.  Return a pointer to
 109    the found character.
 110
 111    Note that the last character of the buffer is *always* a newline,
 112    as forced by _cpp_convert_input.  This fact can be used to avoid
 113    explicitly looking for the end of the buffer.  */
 114
 115 /* Configure gives us an ifdef test.  */
 116 #ifndef WORDS_BIGENDIAN
 117 #define WORDS_BIGENDIAN 0
 118 #endif
 119
 120 /* We'd like the largest integer that fits into a register.  There's nothing
 121    in <stdint.h> that gives us that.  For most hosts this is unsigned long,
 122    but MS decided on an LLP64 model.  Thankfully when building with GCC we
 123    can get the "real" word size.  */
 124 #ifdef __GNUC__
 125 typedef unsigned int word_type __attribute__((__mode__(__word__)));
 126 #else
 127 typedef unsigned long word_type;
 128 #endif
 129
 130 /* The code below is only expecting sizes 4 or 8.
 131    Die at compile-time if this expectation is violated.  */
 132 typedef char check_word_type_size
 133   [(sizeof(word_type) == 8 || sizeof(word_type) == 4) * 2 - 1];
 134
 135 /* Return X with the first N bytes forced to values that won't match one
 136    of the interesting characters.  Note that NUL is not interesting.  */
 137
 138 static inline word_type
 139 acc_char_mask_misalign (word_type val, unsigned int n)
 140 {
 141   word_type mask = -1;
 142   if (WORDS_BIGENDIAN)
 143     mask >>= n * 8;
 144   else
 145     mask <<= n * 8;
 146   return val & mask;
 147 }
 148
 149 /* Return X replicated to all byte positions within WORD_TYPE.  */
 150
 151 static inline word_type
 152 acc_char_replicate (uchar x)
 153 {
 154   word_type ret;
 155
 156   ret = (x << 24) | (x << 16) | (x << 8) | x;
 157   if (sizeof(word_type) == 8)
 158     ret = (ret << 16 << 16) | ret;
 159   return ret;
 160 }
 161
 162 /* Return non-zero if some byte of VAL is (probably) C.  */
 163
 164 static inline word_type
 165 acc_char_cmp (word_type val, word_type c)
 166 {
 167 #if defined(__GNUC__) && defined(__alpha__)
 168   /* We can get exact results using a compare-bytes instruction.
 169      Get (val == c) via (0 >= (val ^ c)).  */
 170   return __builtin_alpha_cmpbge (0, val ^ c);
 171 #else
 172   word_type magic = 0x7efefefeU;
 173   if (sizeof(word_type) == 8)
 174     magic = (magic << 16 << 16) | 0xfefefefeU;
 175   magic |= 1;
 176
 177   val ^= c;
 178   return ((val + magic) ^ ~val) & ~magic;
 179 #endif
 180 }
 181
 182 /* Given the result of acc_char_cmp is non-zero, return the index of
 183    the found character.  If this was a false positive, return -1.  */
 184
 185 static inline int
 186 acc_char_index (word_type cmp ATTRIBUTE_UNUSED,
 187                 word_type val ATTRIBUTE_UNUSED)
 188 {
 189 #if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN
 190   /* The cmpbge instruction sets *bits* of the result corresponding to
 191      matches in the bytes with no false positives.  */
 192   return __builtin_ctzl (cmp);
 193 #else
 194   unsigned int i;
 195
 196   /* ??? It would be nice to force unrolling here,
 197      and have all of these constants folded.  */
 198   for (i = 0; i < sizeof(word_type); ++i)
 199     {
 200       uchar c;
 201       if (WORDS_BIGENDIAN)
 202         c = (val >> (sizeof(word_type) - i - 1) * 8) & 0xff;
 203       else
 204         c = (val >> i * 8) & 0xff;
 205
 206       if (c == '\n' || c == '\r' || c == '\\' || c == '?')
 207         return i;
 208     }
 209
 210   return -1;
 211 #endif
 212 }
 213
 214 /* A version of the fast scanner using bit fiddling techniques.
 215
 216    For 32-bit words, one would normally perform 16 comparisons and
 217    16 branches.  With this algorithm one performs 24 arithmetic
 218    operations and one branch.  Whether this is faster with a 32-bit
 219    word size is going to be somewhat system dependent.
 220
 221    For 64-bit words, we eliminate twice the number of comparisons
 222    and branches without increasing the number of arithmetic operations.
 223    It's almost certainly going to be a win with 64-bit word size.  */
 224
 225 static const uchar * search_line_acc_char (const uchar *, const uchar *)
 226   ATTRIBUTE_UNUSED;
 227
 228 static const uchar *
 229 search_line_acc_char (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
 230 {
 231   const word_type repl_nl = acc_char_replicate ('\n');
 232   const word_type repl_cr = acc_char_replicate ('\r');
 233   const word_type repl_bs = acc_char_replicate ('\\');
 234   const word_type repl_qm = acc_char_replicate ('?');
 235
 236   unsigned int misalign;
 237   const word_type *p;
 238   word_type val, t;
 239
 240   /* Align the buffer.  Mask out any bytes from before the beginning.  */
 241   p = (word_type *)((uintptr_t)s & -sizeof(word_type));
 242   val = *p;
 243   misalign = (uintptr_t)s & (sizeof(word_type) - 1);
 244   if (misalign)
 245     val = acc_char_mask_misalign (val, misalign);
 246
 247   /* Main loop.  */
 248   while (1)
 249     {
 250       t  = acc_char_cmp (val, repl_nl);
 251       t |= acc_char_cmp (val, repl_cr);
 252       t |= acc_char_cmp (val, repl_bs);
 253       t |= acc_char_cmp (val, repl_qm);
 254
 255       if (__builtin_expect (t != 0, 0))
 256         {
 257           int i = acc_char_index (t, val);
 258           if (i >= 0)
 259             return (const uchar *)p + i;
 260         }
 261
 262       val = *++p;
 263     }
 264 }
 265
 266 /* Disable on Solaris 2/x86 until the following problem can be properly
 267    autoconfed:
 268
 269    The Solaris 10+ assembler tags objects with the instruction set
 270    extensions used, so SSE4.2 executables cannot run on machines that
 271    don't support that extension.  */
 272
 273 #if (GCC_VERSION >= 4005) && (__GNUC__ >= 5 || !defined(__PIC__)) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
 274
 275 /* Replicated character data to be shared between implementations.
 276    Recall that outside of a context with vector support we can't
 277    define compatible vector types, therefore these are all defined
 278    in terms of raw characters.  */
 279 static const char repl_chars[4][16] __attribute__((aligned(16))) = {
 280   { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
 281     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' },
 282   { '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
 283     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' },
 284   { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
 285     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' },
 286   { '?', '?', '?', '?', '?', '?', '?', '?',
 287     '?', '?', '?', '?', '?', '?', '?', '?' },
 288 };
 289
 290 /* A version of the fast scanner using MMX vectorized byte compare insns.
 291
 292    This uses the PMOVMSKB instruction which was introduced with "MMX2",
 293    which was packaged into SSE1; it is also present in the AMD MMX
 294    extension.  Mark the function as using "sse" so that we emit a real
 295    "emms" instruction, rather than the 3dNOW "femms" instruction.  */
 296
 297 static const uchar *
 298 #ifndef __SSE__
 299 __attribute__((__target__("sse")))
 300 #endif
 301 search_line_mmx (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
 302 {
 303   typedef char v8qi __attribute__ ((__vector_size__ (8)));
 304   typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
 305
 306   const v8qi repl_nl = *(const v8qi *)repl_chars[0];
 307   const v8qi repl_cr = *(const v8qi *)repl_chars[1];
 308   const v8qi repl_bs = *(const v8qi *)repl_chars[2];
 309   const v8qi repl_qm = *(const v8qi *)repl_chars[3];
 310
 311   unsigned int misalign, found, mask;
 312   const v8qi *p;
 313   v8qi data, t, c;
 314
 315   /* Align the source pointer.  While MMX doesn't generate unaligned data
 316      faults, this allows us to safely scan to the end of the buffer without
 317      reading beyond the end of the last page.  */
 318   misalign = (uintptr_t)s & 7;
 319   p = (const v8qi *)((uintptr_t)s & -8);
 320   data = *p;
 321
 322   /* Create a mask for the bytes that are valid within the first
 323      16-byte block.  The Idea here is that the AND with the mask
 324      within the loop is "free", since we need some AND or TEST
 325      insn in order to set the flags for the branch anyway.  */
 326   mask = -1u << misalign;
 327
 328   /* Main loop processing 8 bytes at a time.  */
 329   goto start;
 330   do
 331     {
 332       data = *++p;
 333       mask = -1;
 334
 335     start:
 336       t = __builtin_ia32_pcmpeqb(data, repl_nl);
 337       c = __builtin_ia32_pcmpeqb(data, repl_cr);
 338       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
 339       c = __builtin_ia32_pcmpeqb(data, repl_bs);
 340       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
 341       c = __builtin_ia32_pcmpeqb(data, repl_qm);
 342       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
 343       found = __builtin_ia32_pmovmskb (t);
 344       found &= mask;
 345     }
 346   while (!found);
 347
 348   __builtin_ia32_emms ();
 349
 350   /* FOUND contains 1 in bits for which we matched a relevant
 351      character.  Conversion to the byte index is trivial.  */
 352   found = __builtin_ctz(found);
 353   return (const uchar *)p + found;
 354 }
 355
 356 /* A version of the fast scanner using SSE2 vectorized byte compare insns.  */
 357
 358 static const uchar *
 359 #ifndef __SSE2__
 360 __attribute__((__target__("sse2")))
 361 #endif
 362 search_line_sse2 (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
 363 {
 364   typedef char v16qi __attribute__ ((__vector_size__ (16)));
 365
 366   const v16qi repl_nl = *(const v16qi *)repl_chars[0];
 367   const v16qi repl_cr = *(const v16qi *)repl_chars[1];
 368   const v16qi repl_bs = *(const v16qi *)repl_chars[2];
 369   const v16qi repl_qm = *(const v16qi *)repl_chars[3];
 370
 371   unsigned int misalign, found, mask;
 372   const v16qi *p;
 373   v16qi data, t;
 374
 375   /* Align the source pointer.  */
 376   misalign = (uintptr_t)s & 15;
 377   p = (const v16qi *)((uintptr_t)s & -16);
 378   data = *p;
 379
 380   /* Create a mask for the bytes that are valid within the first
 381      16-byte block.  The Idea here is that the AND with the mask
 382      within the loop is "free", since we need some AND or TEST
 383      insn in order to set the flags for the branch anyway.  */
 384   mask = -1u << misalign;
 385
 386   /* Main loop processing 16 bytes at a time.  */
 387   goto start;
 388   do
 389     {
 390       data = *++p;
 391       mask = -1;
 392
 393     start:
 394       t  = __builtin_ia32_pcmpeqb128(data, repl_nl);
 395       t |= __builtin_ia32_pcmpeqb128(data, repl_cr);
 396       t |= __builtin_ia32_pcmpeqb128(data, repl_bs);
 397       t |= __builtin_ia32_pcmpeqb128(data, repl_qm);
 398       found = __builtin_ia32_pmovmskb128 (t);
 399       found &= mask;
 400     }
 401   while (!found);
 402
 403   /* FOUND contains 1 in bits for which we matched a relevant
 404      character.  Conversion to the byte index is trivial.  */
 405   found = __builtin_ctz(found);
 406   return (const uchar *)p + found;
 407 }
 408
 409 #ifdef HAVE_SSE4
 410 /* A version of the fast scanner using SSE 4.2 vectorized string insns.  */
 411
 412 static const uchar *
 413 #ifndef __SSE4_2__
 414 __attribute__((__target__("sse4.2")))
 415 #endif
 416 search_line_sse42 (const uchar *s, const uchar *end)
 417 {
 418   typedef char v16qi __attribute__ ((__vector_size__ (16)));
 419   static const v16qi search = { '\n', '\r', '?', '\\' };
 420
 421   uintptr_t si = (uintptr_t)s;
 422   uintptr_t index;
 423
 424   /* Check for unaligned input.  */
 425   if (si & 15)
 426     {
 427       v16qi sv;
 428
 429       if (__builtin_expect (end - s < 16, 0)
 430           && __builtin_expect ((si & 0xfff) > 0xff0, 0))
 431         {
 432           /* There are less than 16 bytes left in the buffer, and less
 433              than 16 bytes left on the page.  Reading 16 bytes at this
 434              point might generate a spurious page fault.  Defer to the
 435              SSE2 implementation, which already handles alignment.  */
 436           return search_line_sse2 (s, end);
 437         }
 438
 439       /* ??? The builtin doesn't understand that the PCMPESTRI read from
 440          memory need not be aligned.  */
 441       sv = __builtin_ia32_loaddqu ((const char *) s);
 442       index = __builtin_ia32_pcmpestri128 (search, 4, sv, 16, 0);
 443
 444       if (__builtin_expect (index < 16, 0))
 445         goto found;
 446
 447       /* Advance the pointer to an aligned address.  We will re-scan a
 448          few bytes, but we no longer need care for reading past the
 449          end of a page, since we're guaranteed a match.  */
 450       s = (const uchar *)((si + 15) & -16);
 451     }
 452
 453   /* Main loop, processing 16 bytes at a time.  */
 454 #ifdef __GCC_ASM_FLAG_OUTPUTS__
 455   while (1)
 456     {
 457       char f;
 458
 459       /* By using inline assembly instead of the builtin,
 460          we can use the result, as well as the flags set.  */
 461       __asm ("%vpcmpestri\t$0, %2, %3"
 462              : "=c"(index), "=@ccc"(f)
 463              : "m"(*s), "x"(search), "a"(4), "d"(16));
 464       if (f)
 465         break;
 466
 467       s += 16;
 468     }
 469 #else
 470   s -= 16;
 471   /* By doing the whole loop in inline assembly,
 472      we can make proper use of the flags set.  */
 473   __asm (      ".balign 16\n"
 474         "0:     add $16, %1\n"
 475         "       %vpcmpestri\t$0, (%1), %2\n"
 476         "       jnc 0b"
 477         : "=&c"(index), "+r"(s)
 478         : "x"(search), "a"(4), "d"(16));
 479 #endif
 480
 481  found:
 482   return s + index;
 483 }
 484
 485 #else
 486 /* Work around out-dated assemblers without sse4 support.  */
 487 #define search_line_sse42 search_line_sse2
 488 #endif
 489
 490 /* Check the CPU capabilities.  */
 491
 492 #include "../gcc/config/i386/cpuid.h"
 493
 494 typedef const uchar * (*search_line_fast_type) (const uchar *, const uchar *);
 495 static search_line_fast_type search_line_fast;
 496
 497 #define HAVE_init_vectorized_lexer 1
 498 static inline void
 499 init_vectorized_lexer (void)
 500 {
 501   unsigned dummy, ecx = 0, edx = 0;
 502   search_line_fast_type impl = search_line_acc_char;
 503   int minimum = 0;
 504
 505 #if defined(__SSE4_2__)
 506   minimum = 3;
 507 #elif defined(__SSE2__)
 508   minimum = 2;
 509 #elif defined(__SSE__)
 510   minimum = 1;
 511 #endif
 512
 513   if (minimum == 3)
 514     impl = search_line_sse42;
 515   else if (__get_cpuid (1, &dummy, &dummy, &ecx, &edx) || minimum == 2)
 516     {
 517       if (minimum == 3 || (ecx & bit_SSE4_2))
 518         impl = search_line_sse42;
 519       else if (minimum == 2 || (edx & bit_SSE2))
 520         impl = search_line_sse2;
 521       else if (minimum == 1 || (edx & bit_SSE))
 522         impl = search_line_mmx;
 523     }
 524   else if (__get_cpuid (0x80000001, &dummy, &dummy, &dummy, &edx))
 525     {
 526       if (minimum == 1
 527           || (edx & (bit_MMXEXT | bit_CMOV)) == (bit_MMXEXT | bit_CMOV))
 528         impl = search_line_mmx;
 529     }
 530
 531   search_line_fast = impl;
 532 }
 533
 534 #elif defined(_ARCH_PWR8) && defined(__ALTIVEC__)
 535
 536 /* A vection of the fast scanner using AltiVec vectorized byte compares
 537    and VSX unaligned loads (when VSX is available).  This is otherwise
 538    the same as the pre-GCC 5 version.  */
 539
 540 ATTRIBUTE_NO_SANITIZE_UNDEFINED
 541 static const uchar *
 542 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
 543 {
 544   typedef __attribute__((altivec(vector))) unsigned char vc;
 545
 546   const vc repl_nl = {
 547     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
 548     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
 549   };
 550   const vc repl_cr = {
 551     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
 552     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
 553   };
 554   const vc repl_bs = {
 555     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
 556     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
 557   };
 558   const vc repl_qm = {
 559     '?', '?', '?', '?', '?', '?', '?', '?',
 560     '?', '?', '?', '?', '?', '?', '?', '?',
 561   };
 562   const vc zero = { 0 };
 563
 564   vc data, t;
 565
 566   /* Main loop processing 16 bytes at a time.  */
 567   do
 568     {
 569       vc m_nl, m_cr, m_bs, m_qm;
 570
 571       data = *((const vc *)s);
 572       s += 16;
 573
 574       m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
 575       m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
 576       m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
 577       m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
 578       t = (m_nl | m_cr) | (m_bs | m_qm);
 579
 580       /* T now contains 0xff in bytes for which we matched one of the relevant
 581          characters.  We want to exit the loop if any byte in T is non-zero.
 582          Below is the expansion of vec_any_ne(t, zero).  */
 583     }
 584   while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
 585
 586   /* Restore s to to point to the 16 bytes we just processed.  */
 587   s -= 16;
 588
 589   {
 590 #define N  (sizeof(vc) / sizeof(long))
 591
 592     union {
 593       vc v;
 594       /* Statically assert that N is 2 or 4.  */
 595       unsigned long l[(N == 2 || N == 4) ? N : -1];
 596     } u;
 597     unsigned long l, i = 0;
 598
 599     u.v = t;
 600
 601     /* Find the first word of T that is non-zero.  */
 602     switch (N)
 603       {
 604       case 4:
 605         l = u.l[i++];
 606         if (l != 0)
 607           break;
 608         s += sizeof(unsigned long);
 609         l = u.l[i++];
 610         if (l != 0)
 611           break;
 612         s += sizeof(unsigned long);
 613       case 2:
 614         l = u.l[i++];
 615         if (l != 0)
 616           break;
 617         s += sizeof(unsigned long);
 618         l = u.l[i];
 619       }
 620
 621     /* L now contains 0xff in bytes for which we matched one of the
 622        relevant characters.  We can find the byte index by finding
 623        its bit index and dividing by 8.  */
 624 #ifdef __BIG_ENDIAN__
 625     l = __builtin_clzl(l) >> 3;
 626 #else
 627     l = __builtin_ctzl(l) >> 3;
 628 #endif
 629     return s + l;
 630
 631 #undef N
 632   }
 633 }
 634
 635 #elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) && defined (__BIG_ENDIAN__)
 636
 637 /* A vection of the fast scanner using AltiVec vectorized byte compares.
 638    This cannot be used for little endian because vec_lvsl/lvsr are
 639    deprecated for little endian and the code won't work properly.  */
 640 /* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
 641    so we can't compile this function without -maltivec on the command line
 642    (or implied by some other switch).  */
 643
 644 static const uchar *
 645 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
 646 {
 647   typedef __attribute__((altivec(vector))) unsigned char vc;
 648
 649   const vc repl_nl = {
 650     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
 651     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
 652   };
 653   const vc repl_cr = {
 654     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
 655     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
 656   };
 657   const vc repl_bs = {
 658     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
 659     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
 660   };
 661   const vc repl_qm = {
 662     '?', '?', '?', '?', '?', '?', '?', '?',
 663     '?', '?', '?', '?', '?', '?', '?', '?',
 664   };
 665   const vc ones = {
 666     -1, -1, -1, -1, -1, -1, -1, -1,
 667     -1, -1, -1, -1, -1, -1, -1, -1,
 668   };
 669   const vc zero = { 0 };
 670
 671   vc data, mask, t;
 672
 673   /* Altivec loads automatically mask addresses with -16.  This lets us
 674      issue the first load as early as possible.  */
 675   data = __builtin_vec_ld(0, (const vc *)s);
 676
 677   /* Discard bytes before the beginning of the buffer.  Do this by
 678      beginning with all ones and shifting in zeros according to the
 679      mis-alignment.  The LVSR instruction pulls the exact shift we
 680      want from the address.  */
 681   mask = __builtin_vec_lvsr(0, s);
 682   mask = __builtin_vec_perm(zero, ones, mask);
 683   data &= mask;
 684
 685   /* While altivec loads mask addresses, we still need to align S so
 686      that the offset we compute at the end is correct.  */
 687   s = (const uchar *)((uintptr_t)s & -16);
 688
 689   /* Main loop processing 16 bytes at a time.  */
 690   goto start;
 691   do
 692     {
 693       vc m_nl, m_cr, m_bs, m_qm;
 694
 695       s += 16;
 696       data = __builtin_vec_ld(0, (const vc *)s);
 697
 698     start:
 699       m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
 700       m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
 701       m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
 702       m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
 703       t = (m_nl | m_cr) | (m_bs | m_qm);
 704
 705       /* T now contains 0xff in bytes for which we matched one of the relevant
 706          characters.  We want to exit the loop if any byte in T is non-zero.
 707          Below is the expansion of vec_any_ne(t, zero).  */
 708     }
 709   while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
 710
 711   {
 712 #define N  (sizeof(vc) / sizeof(long))
 713
 714     union {
 715       vc v;
 716       /* Statically assert that N is 2 or 4.  */
 717       unsigned long l[(N == 2 || N == 4) ? N : -1];
 718     } u;
 719     unsigned long l, i = 0;
 720
 721     u.v = t;
 722
 723     /* Find the first word of T that is non-zero.  */
 724     switch (N)
 725       {
 726       case 4:
 727         l = u.l[i++];
 728         if (l != 0)
 729           break;
 730         s += sizeof(unsigned long);
 731         l = u.l[i++];
 732         if (l != 0)
 733           break;
 734         s += sizeof(unsigned long);
 735       case 2:
 736         l = u.l[i++];
 737         if (l != 0)
 738           break;
 739         s += sizeof(unsigned long);
 740         l = u.l[i];
 741       }
 742
 743     /* L now contains 0xff in bytes for which we matched one of the
 744        relevant characters.  We can find the byte index by finding
 745        its bit index and dividing by 8.  */
 746     l = __builtin_clzl(l) >> 3;
 747     return s + l;
 748
 749 #undef N
 750   }
 751 }
 752
 753 #elif defined (__ARM_NEON)
 754 #include "arm_neon.h"
 755
 756 static const uchar *
 757 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
 758 {
 759   const uint8x16_t repl_nl = vdupq_n_u8 ('\n');
 760   const uint8x16_t repl_cr = vdupq_n_u8 ('\r');
 761   const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
 762   const uint8x16_t repl_qm = vdupq_n_u8 ('?');
 763   const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
 764
 765   unsigned int misalign, found, mask;
 766   const uint8_t *p;
 767   uint8x16_t data;
 768
 769   /* Align the source pointer.  */
 770   misalign = (uintptr_t)s & 15;
 771   p = (const uint8_t *)((uintptr_t)s & -16);
 772   data = vld1q_u8 (p);
 773
 774   /* Create a mask for the bytes that are valid within the first
 775      16-byte block.  The Idea here is that the AND with the mask
 776      within the loop is "free", since we need some AND or TEST
 777      insn in order to set the flags for the branch anyway.  */
 778   mask = (-1u << misalign) & 0xffff;
 779
 780   /* Main loop, processing 16 bytes at a time.  */
 781   goto start;
 782
 783   do
 784     {
 785       uint8x8_t l;
 786       uint16x4_t m;
 787       uint32x2_t n;
 788       uint8x16_t t, u, v, w;
 789
 790       p += 16;
 791       data = vld1q_u8 (p);
 792       mask = 0xffff;
 793
 794     start:
 795       t = vceqq_u8 (data, repl_nl);
 796       u = vceqq_u8 (data, repl_cr);
 797       v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
 798       w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
 799       t = vandq_u8 (vorrq_u8 (v, w), xmask);
 800       l = vpadd_u8 (vget_low_u8 (t), vget_high_u8 (t));
 801       m = vpaddl_u8 (l);
 802       n = vpaddl_u16 (m);
 803
 804       found = vget_lane_u32 ((uint32x2_t) vorr_u64 ((uint64x1_t) n,
 805               vshr_n_u64 ((uint64x1_t) n, 24)), 0);
 806       found &= mask;
 807     }
 808   while (!found);
 809
 810   /* FOUND contains 1 in bits for which we matched a relevant
 811      character.  Conversion to the byte index is trivial.  */
 812   found = __builtin_ctz (found);
 813   return (const uchar *)p + found;
 814 }
 815
 816 #else
 817
 818 /* We only have one accellerated alternative.  Use a direct call so that
 819    we encourage inlining.  */
 820
 821 #define search_line_fast  search_line_acc_char
 822
 823 #endif
 824
 825 /* Initialize the lexer if needed.  */
 826
 827 void
 828 _cpp_init_lexer (void)
 829 {
 830 #ifdef HAVE_init_vectorized_lexer
 831   init_vectorized_lexer ();
 832 #endif
 833 }
 834
 835 /* Returns with a logical line that contains no escaped newlines or
 836    trigraphs.  This is a time-critical inner loop.  */
 837 void
 838 _cpp_clean_line (cpp_reader *pfile)
 839 {
 840   cpp_buffer *buffer;
 841   const uchar *s;
 842   uchar c, *d, *p;
 843
 844   buffer = pfile->buffer;
 845   buffer->cur_note = buffer->notes_used = 0;
 846   buffer->cur = buffer->line_base = buffer->next_line;
 847   buffer->need_line = false;
 848   s = buffer->next_line;
 849
 850   if (!buffer->from_stage3)
 851     {
 852       const uchar *pbackslash = NULL;
 853
 854       /* Fast path.  This is the common case of an un-escaped line with
 855          no trigraphs.  The primary win here is by not writing any
 856          data back to memory until we have to.  */
 857       while (1)
 858         {
 859           /* Perform an optimized search for \n, \r, \\, ?.  */
 860           s = search_line_fast (s, buffer->rlimit);
 861
 862           c = *s;
 863           if (c == '\\')
 864             {
 865               /* Record the location of the backslash and continue.  */
 866               pbackslash = s++;
 867             }
 868           else if (__builtin_expect (c == '?', 0))
 869             {
 870               if (__builtin_expect (s[1] == '?', false)
 871                    && _cpp_trigraph_map[s[2]])
 872                 {
 873                   /* Have a trigraph.  We may or may not have to convert
 874                      it.  Add a line note regardless, for -Wtrigraphs.  */
 875                   add_line_note (buffer, s, s[2]);
 876                   if (CPP_OPTION (pfile, trigraphs))
 877                     {
 878                       /* We do, and that means we have to switch to the
 879                          slow path.  */
 880                       d = (uchar *) s;
 881                       *d = _cpp_trigraph_map[s[2]];
 882                       s += 2;
 883                       goto slow_path;
 884                     }
 885                 }
 886               /* Not a trigraph.  Continue on fast-path.  */
 887               s++;
 888             }
 889           else
 890             break;
 891         }
 892
 893       /* This must be \r or \n.  We're either done, or we'll be forced
 894          to write back to the buffer and continue on the slow path.  */
 895       d = (uchar *) s;
 896
 897       if (__builtin_expect (s == buffer->rlimit, false))
 898         goto done;
 899
 900       /* DOS line ending? */
 901       if (__builtin_expect (c == '\r', false) && s[1] == '\n')
 902         {
 903           s++;
 904           if (s == buffer->rlimit)
 905             goto done;
 906         }
 907
 908       if (__builtin_expect (pbackslash == NULL, true))
 909         goto done;
 910
 911       /* Check for escaped newline.  */
 912       p = d;
 913       while (is_nvspace (p[-1]))
 914         p--;
 915       if (p - 1 != pbackslash)
 916         goto done;
 917
 918       /* Have an escaped newline; process it and proceed to
 919          the slow path.  */
 920       add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
 921       d = p - 2;
 922       buffer->next_line = p - 1;
 923
 924     slow_path:
 925       while (1)
 926         {
 927           c = *++s;
 928           *++d = c;
 929
 930           if (c == '\n' || c == '\r')
 931             {
 932               /* Handle DOS line endings.  */
 933               if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
 934                 s++;
 935               if (s == buffer->rlimit)
 936                 break;
 937
 938               /* Escaped?  */
 939               p = d;
 940               while (p != buffer->next_line && is_nvspace (p[-1]))
 941                 p--;
 942               if (p == buffer->next_line || p[-1] != '\\')
 943                 break;
 944
 945               add_line_note (buffer, p - 1, p != d ? ' ': '\\');
 946               d = p - 2;
 947               buffer->next_line = p - 1;
 948             }
 949           else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
 950             {
 951               /* Add a note regardless, for the benefit of -Wtrigraphs.  */
 952               add_line_note (buffer, d, s[2]);
 953               if (CPP_OPTION (pfile, trigraphs))
 954                 {
 955                   *d = _cpp_trigraph_map[s[2]];
 956                   s += 2;
 957                 }
 958             }
 959         }
 960     }
 961   else
 962     {
 963       while (*s != '\n' && *s != '\r')
 964         s++;
 965       d = (uchar *) s;
 966
 967       /* Handle DOS line endings.  */
 968       if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
 969         s++;
 970     }
 971
 972  done:
 973   *d = '\n';
 974   /* A sentinel note that should never be processed.  */
 975   add_line_note (buffer, d + 1, '\n');
 976   buffer->next_line = s + 1;
 977 }
 978
 979 /* Return true if the trigraph indicated by NOTE should be warned
 980    about in a comment.  */
 981 static bool
 982 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
 983 {
 984   const uchar *p;
 985
 986   /* Within comments we don't warn about trigraphs, unless the
 987      trigraph forms an escaped newline, as that may change
 988      behavior.  */
 989   if (note->type != '/')
 990     return false;
 991
 992   /* If -trigraphs, then this was an escaped newline iff the next note
 993      is coincident.  */
 994   if (CPP_OPTION (pfile, trigraphs))
 995     return note[1].pos == note->pos;
 996
 997   /* Otherwise, see if this forms an escaped newline.  */
 998   p = note->pos + 3;
 999   while (is_nvspace (*p))
1000     p++;
1001
1002   /* There might have been escaped newlines between the trigraph and the
1003      newline we found.  Hence the position test.  */
1004   return (*p == '\n' && p < note[1].pos);
1005 }
1006
1007 /* Process the notes created by add_line_note as far as the current
1008    location.  */
1009 void
1010 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
1011 {
1012   cpp_buffer *buffer = pfile->buffer;
1013
1014   for (;;)
1015     {
1016       _cpp_line_note *note = &buffer->notes[buffer->cur_note];
1017       unsigned int col;
1018
1019       if (note->pos > buffer->cur)
1020         break;
1021
1022       buffer->cur_note++;
1023       col = CPP_BUF_COLUMN (buffer, note->pos + 1);
1024
1025       if (note->type == '\\' || note->type == ' ')
1026         {
1027           if (note->type == ' ' && !in_comment)
1028             cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
1029                                  "backslash and newline separated by space");
1030
1031           if (buffer->next_line > buffer->rlimit)
1032             {
1033               cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
1034                                    "backslash-newline at end of file");
1035               /* Prevent "no newline at end of file" warning.  */
1036               buffer->next_line = buffer->rlimit;
1037             }
1038
1039           buffer->line_base = note->pos;
1040           CPP_INCREMENT_LINE (pfile, 0);
1041         }
1042       else if (_cpp_trigraph_map[note->type])
1043         {
1044           if (CPP_OPTION (pfile, warn_trigraphs)
1045               && (!in_comment || warn_in_comment (pfile, note)))
1046             {
1047               if (CPP_OPTION (pfile, trigraphs))
1048                 cpp_warning_with_line (pfile, CPP_W_TRIGRAPHS,
1049                                        pfile->line_table->highest_line, col,
1050                                        "trigraph ??%c converted to %c",
1051                                        note->type,
1052                                        (int) _cpp_trigraph_map[note->type]);
1053               else
1054                 {
1055                   cpp_warning_with_line
1056                     (pfile, CPP_W_TRIGRAPHS,
1057                      pfile->line_table->highest_line, col,
1058                      "trigraph ??%c ignored, use -trigraphs to enable",
1059                      note->type);
1060                 }
1061             }
1062         }
1063       else if (note->type == 0)
1064         /* Already processed in lex_raw_string.  */;
1065       else
1066         abort ();
1067     }
1068 }
1069
1070 /* Skip a C-style block comment.  We find the end of the comment by
1071    seeing if an asterisk is before every '/' we encounter.  Returns
1072    nonzero if comment terminated by EOF, zero otherwise.
1073
1074    Buffer->cur points to the initial asterisk of the comment.  */
1075 bool
1076 _cpp_skip_block_comment (cpp_reader *pfile)
1077 {
1078   cpp_buffer *buffer = pfile->buffer;
1079   const uchar *cur = buffer->cur;
1080   uchar c;
1081
1082   cur++;
1083   if (*cur == '/')
1084     cur++;
1085
1086   for (;;)
1087     {
1088       /* People like decorating comments with '*', so check for '/'
1089          instead for efficiency.  */
1090       c = *cur++;
1091
1092       if (c == '/')
1093         {
1094           if (cur[-2] == '*')
1095             break;
1096
1097           /* Warn about potential nested comments, but not if the '/'
1098              comes immediately before the true comment delimiter.
1099              Don't bother to get it right across escaped newlines.  */
1100           if (CPP_OPTION (pfile, warn_comments)
1101               && cur[0] == '*' && cur[1] != '/')
1102             {
1103               buffer->cur = cur;
1104               cpp_warning_with_line (pfile, CPP_W_COMMENTS,
1105                                      pfile->line_table->highest_line,
1106                                      CPP_BUF_COL (buffer),
1107                                      "\"/*\" within comment");
1108             }
1109         }
1110       else if (c == '\n')
1111         {
1112           unsigned int cols;
1113           buffer->cur = cur - 1;
1114           _cpp_process_line_notes (pfile, true);
1115           if (buffer->next_line >= buffer->rlimit)
1116             return true;
1117           _cpp_clean_line (pfile);
1118
1119           cols = buffer->next_line - buffer->line_base;
1120           CPP_INCREMENT_LINE (pfile, cols);
1121
1122           cur = buffer->cur;
1123         }
1124     }
1125
1126   buffer->cur = cur;
1127   _cpp_process_line_notes (pfile, true);
1128   return false;
1129 }
1130
1131 /* Skip a C++ line comment, leaving buffer->cur pointing to the
1132    terminating newline.  Handles escaped newlines.  Returns nonzero
1133    if a multiline comment.  */
1134 static int
1135 skip_line_comment (cpp_reader *pfile)
1136 {
1137   cpp_buffer *buffer = pfile->buffer;
1138   source_location orig_line = pfile->line_table->highest_line;
1139
1140   while (*buffer->cur != '\n')
1141     buffer->cur++;
1142
1143   _cpp_process_line_notes (pfile, true);
1144   return orig_line != pfile->line_table->highest_line;
1145 }
1146
1147 /* Skips whitespace, saving the next non-whitespace character.  */
1148 static void
1149 skip_whitespace (cpp_reader *pfile, cppchar_t c)
1150 {
1151   cpp_buffer *buffer = pfile->buffer;
1152   bool saw_NUL = false;
1153
1154   do
1155     {
1156       /* Horizontal space always OK.  */
1157       if (c == ' ' || c == '\t')
1158         ;
1159       /* Just \f \v or \0 left.  */
1160       else if (c == '\0')
1161         saw_NUL = true;
1162       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
1163         cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
1164                              CPP_BUF_COL (buffer),
1165                              "%s in preprocessing directive",
1166                              c == '\f' ? "form feed" : "vertical tab");
1167
1168       c = *buffer->cur++;
1169     }
1170   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
1171   while (is_nvspace (c));
1172
1173   if (saw_NUL)
1174     cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
1175
1176   buffer->cur--;
1177 }
1178
1179 /* See if the characters of a number token are valid in a name (no
1180    '.', '+' or '-').  */
1181 static int
1182 name_p (cpp_reader *pfile, const cpp_string *string)
1183 {
1184   unsigned int i;
1185
1186   for (i = 0; i < string->len; i++)
1187     if (!is_idchar (string->text[i]))
1188       return 0;
1189
1190   return 1;
1191 }
1192
1193 /* After parsing an identifier or other sequence, produce a warning about
1194    sequences not in NFC/NFKC.  */
1195 static void
1196 warn_about_normalization (cpp_reader *pfile,
1197                           const cpp_token *token,
1198                           const struct normalize_state *s)
1199 {
1200   if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
1201       && !pfile->state.skipping)
1202     {
1203       /* Make sure that the token is printed using UCNs, even
1204          if we'd otherwise happily print UTF-8.  */
1205       unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
1206       size_t sz;
1207
1208       sz = cpp_spell_token (pfile, token, buf, false) - buf;
1209       if (NORMALIZE_STATE_RESULT (s) == normalized_C)
1210         cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1211                                "`%.*s' is not in NFKC", (int) sz, buf);
1212       else
1213         cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1214                                "`%.*s' is not in NFC", (int) sz, buf);
1215       free (buf);
1216     }
1217 }
1218
1219 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
1220    an identifier.  FIRST is TRUE if this starts an identifier.  */
1221 static bool
1222 forms_identifier_p (cpp_reader *pfile, int first,
1223                     struct normalize_state *state)
1224 {
1225   cpp_buffer *buffer = pfile->buffer;
1226
1227   if (*buffer->cur == '$')
1228     {
1229       if (!CPP_OPTION (pfile, dollars_in_ident))
1230         return false;
1231
1232       buffer->cur++;
1233       if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
1234         {
1235           CPP_OPTION (pfile, warn_dollars) = 0;
1236           cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
1237         }
1238
1239       return true;
1240     }
1241
1242   /* Is this a syntactically valid UCN?  */
1243   if (CPP_OPTION (pfile, extended_identifiers)
1244       && *buffer->cur == '\\'
1245       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
1246     {
1247       cppchar_t s;
1248       buffer->cur += 2;
1249       if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
1250                           state, &s))
1251         return true;
1252       buffer->cur -= 2;
1253     }
1254
1255   return false;
1256 }
1257
1258 /* Helper function to get the cpp_hashnode of the identifier BASE.  */
1259 static cpp_hashnode *
1260 lex_identifier_intern (cpp_reader *pfile, const uchar *base)
1261 {
1262   cpp_hashnode *result;
1263   const uchar *cur;
1264   unsigned int len;
1265   unsigned int hash = HT_HASHSTEP (0, *base);
1266
1267   cur = base + 1;
1268   while (ISIDNUM (*cur))
1269     {
1270       hash = HT_HASHSTEP (hash, *cur);
1271       cur++;
1272     }
1273   len = cur - base;
1274   hash = HT_HASHFINISH (hash, len);
1275   result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1276                                               base, len, hash, HT_ALLOC));
1277
1278   /* Rarely, identifiers require diagnostics when lexed.  */
1279   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1280                         && !pfile->state.skipping, 0))
1281     {
1282       /* It is allowed to poison the same identifier twice.  */
1283       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
1284         cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
1285                    NODE_NAME (result));
1286
1287       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1288          replacement list of a variadic macro.  */
1289       if (result == pfile->spec_nodes.n__VA_ARGS__
1290           && !pfile->state.va_args_ok)
1291         {
1292           if (CPP_OPTION (pfile, cplusplus))
1293             cpp_error (pfile, CPP_DL_PEDWARN,
1294                        "__VA_ARGS__ can only appear in the expansion"
1295                        " of a C++11 variadic macro");
1296           else
1297             cpp_error (pfile, CPP_DL_PEDWARN,
1298                        "__VA_ARGS__ can only appear in the expansion"
1299                        " of a C99 variadic macro");
1300         }
1301
1302       /* For -Wc++-compat, warn about use of C++ named operators.  */
1303       if (result->flags & NODE_WARN_OPERATOR)
1304         cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1305                      "identifier \"%s\" is a special operator name in C++",
1306                      NODE_NAME (result));
1307     }
1308
1309   return result;
1310 }
1311
1312 /* Get the cpp_hashnode of an identifier specified by NAME in
1313    the current cpp_reader object.  If none is found, NULL is returned.  */
1314 cpp_hashnode *
1315 _cpp_lex_identifier (cpp_reader *pfile, const char *name)
1316 {
1317   cpp_hashnode *result;
1318   result = lex_identifier_intern (pfile, (uchar *) name);
1319   return result;
1320 }
1321
1322 /* Lex an identifier starting at BUFFER->CUR - 1.  */
1323 static cpp_hashnode *
1324 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
1325                 struct normalize_state *nst, cpp_hashnode **spelling)
1326 {
1327   cpp_hashnode *result;
1328   const uchar *cur;
1329   unsigned int len;
1330   unsigned int hash = HT_HASHSTEP (0, *base);
1331
1332   cur = pfile->buffer->cur;
1333   if (! starts_ucn)
1334     {
1335       while (ISIDNUM (*cur))
1336         {
1337           hash = HT_HASHSTEP (hash, *cur);
1338           cur++;
1339         }
1340       NORMALIZE_STATE_UPDATE_IDNUM (nst, *(cur - 1));
1341     }
1342   pfile->buffer->cur = cur;
1343   if (starts_ucn || forms_identifier_p (pfile, false, nst))
1344     {
1345       /* Slower version for identifiers containing UCNs (or $).  */
1346       do {
1347         while (ISIDNUM (*pfile->buffer->cur))
1348           {
1349             NORMALIZE_STATE_UPDATE_IDNUM (nst, *pfile->buffer->cur);
1350             pfile->buffer->cur++;
1351           }
1352       } while (forms_identifier_p (pfile, false, nst));
1353       result = _cpp_interpret_identifier (pfile, base,
1354                                           pfile->buffer->cur - base);
1355       *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base);
1356     }
1357   else
1358     {
1359       len = cur - base;
1360       hash = HT_HASHFINISH (hash, len);
1361
1362       result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1363                                                   base, len, hash, HT_ALLOC));
1364       *spelling = result;
1365     }
1366
1367   /* Rarely, identifiers require diagnostics when lexed.  */
1368   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1369                         && !pfile->state.skipping, 0))
1370     {
1371       /* It is allowed to poison the same identifier twice.  */
1372       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
1373         cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
1374                    NODE_NAME (result));
1375
1376       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1377          replacement list of a variadic macro.  */
1378       if (result == pfile->spec_nodes.n__VA_ARGS__
1379           && !pfile->state.va_args_ok)
1380         {
1381           if (CPP_OPTION (pfile, cplusplus))
1382             cpp_error (pfile, CPP_DL_PEDWARN,
1383                        "__VA_ARGS__ can only appear in the expansion"
1384                        " of a C++11 variadic macro");
1385           else
1386             cpp_error (pfile, CPP_DL_PEDWARN,
1387                        "__VA_ARGS__ can only appear in the expansion"
1388                        " of a C99 variadic macro");
1389         }
1390
1391       /* For -Wc++-compat, warn about use of C++ named operators.  */
1392       if (result->flags & NODE_WARN_OPERATOR)
1393         cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1394                      "identifier \"%s\" is a special operator name in C++",
1395                      NODE_NAME (result));
1396     }
1397
1398   return result;
1399 }
1400
1401 /* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
1402 static void
1403 lex_number (cpp_reader *pfile, cpp_string *number,
1404             struct normalize_state *nst)
1405 {
1406   const uchar *cur;
1407   const uchar *base;
1408   uchar *dest;
1409
1410   base = pfile->buffer->cur - 1;
1411   do
1412     {
1413       cur = pfile->buffer->cur;
1414
1415       /* N.B. ISIDNUM does not include $.  */
1416       while (ISIDNUM (*cur) || *cur == '.' || DIGIT_SEP (*cur)
1417              || VALID_SIGN (*cur, cur[-1]))
1418         {
1419           NORMALIZE_STATE_UPDATE_IDNUM (nst, *cur);
1420           cur++;
1421         }
1422       /* A number can't end with a digit separator.  */
1423       while (cur > pfile->buffer->cur && DIGIT_SEP (cur[-1]))
1424         --cur;
1425
1426       pfile->buffer->cur = cur;
1427     }
1428   while (forms_identifier_p (pfile, false, nst));
1429
1430   number->len = cur - base;
1431   dest = _cpp_unaligned_alloc (pfile, number->len + 1);
1432   memcpy (dest, base, number->len);
1433   dest[number->len] = '\0';
1434   number->text = dest;
1435 }
1436
1437 /* Create a token of type TYPE with a literal spelling.  */
1438 static void
1439 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
1440                 unsigned int len, enum cpp_ttype type)
1441 {
1442   uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
1443
1444   memcpy (dest, base, len);
1445   dest[len] = '\0';
1446   token->type = type;
1447   token->val.str.len = len;
1448   token->val.str.text = dest;
1449 }
1450
1451 /* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
1452    sequence from *FIRST_BUFF_P to LAST_BUFF_P.  */
1453
1454 static void
1455 bufring_append (cpp_reader *pfile, const uchar *base, size_t len,
1456                 _cpp_buff **first_buff_p, _cpp_buff **last_buff_p)
1457 {
1458   _cpp_buff *first_buff = *first_buff_p;
1459   _cpp_buff *last_buff = *last_buff_p;
1460
1461   if (first_buff == NULL)
1462     first_buff = last_buff = _cpp_get_buff (pfile, len);
1463   else if (len > BUFF_ROOM (last_buff))
1464     {
1465       size_t room = BUFF_ROOM (last_buff);
1466       memcpy (BUFF_FRONT (last_buff), base, room);
1467       BUFF_FRONT (last_buff) += room;
1468       base += room;
1469       len -= room;
1470       last_buff = _cpp_append_extend_buff (pfile, last_buff, len);
1471     }
1472
1473   memcpy (BUFF_FRONT (last_buff), base, len);
1474   BUFF_FRONT (last_buff) += len;
1475
1476   *first_buff_p = first_buff;
1477   *last_buff_p = last_buff;
1478 }
1479
1480
1481 /* Returns true if a macro has been defined.
1482    This might not work if compile with -save-temps,
1483    or preprocess separately from compilation.  */
1484
1485 static bool
1486 is_macro(cpp_reader *pfile, const uchar *base)
1487 {
1488   const uchar *cur = base;
1489   if (! ISIDST (*cur))
1490     return false;
1491   unsigned int hash = HT_HASHSTEP (0, *cur);
1492   ++cur;
1493   while (ISIDNUM (*cur))
1494     {
1495       hash = HT_HASHSTEP (hash, *cur);
1496       ++cur;
1497     }
1498   hash = HT_HASHFINISH (hash, cur - base);
1499
1500   cpp_hashnode *result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1501                                         base, cur - base, hash, HT_NO_INSERT));
1502
1503   return !result ? false : (result->type == NT_MACRO);
1504 }
1505
1506
1507 /* Lexes a raw string.  The stored string contains the spelling, including
1508    double quotes, delimiter string, '(' and ')', any leading
1509    'L', 'u', 'U' or 'u8' and 'R' modifier.  It returns the type of the
1510    literal, or CPP_OTHER if it was not properly terminated.
1511
1512    The spelling is NUL-terminated, but it is not guaranteed that this
1513    is the first NUL since embedded NULs are preserved.  */
1514
1515 static void
1516 lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
1517                 const uchar *cur)
1518 {
1519   uchar raw_prefix[17];
1520   uchar temp_buffer[18];
1521   const uchar *orig_base;
1522   unsigned int raw_prefix_len = 0, raw_suffix_len = 0;
1523   enum raw_str_phase { RAW_STR_PREFIX, RAW_STR, RAW_STR_SUFFIX };
1524   raw_str_phase phase = RAW_STR_PREFIX;
1525   enum cpp_ttype type;
1526   size_t total_len = 0;
1527   /* Index into temp_buffer during phases other than RAW_STR,
1528      during RAW_STR phase 17 to tell BUF_APPEND that nothing should
1529      be appended to temp_buffer.  */
1530   size_t temp_buffer_len = 0;
1531   _cpp_buff *first_buff = NULL, *last_buff = NULL;
1532   size_t raw_prefix_start;
1533   _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
1534
1535   type = (*base == 'L' ? CPP_WSTRING :
1536           *base == 'U' ? CPP_STRING32 :
1537           *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1538           : CPP_STRING);
1539
1540 #define BUF_APPEND(STR,LEN)                                     \
1541       do {                                                      \
1542         bufring_append (pfile, (const uchar *)(STR), (LEN),     \
1543                         &first_buff, &last_buff);               \
1544         total_len += (LEN);                                     \
1545         if (__builtin_expect (temp_buffer_len < 17, 0)          \
1546             && (const uchar *)(STR) != base                     \
1547             && (LEN) <= 2)                                      \
1548           {                                                     \
1549             memcpy (temp_buffer + temp_buffer_len,              \
1550                     (const uchar *)(STR), (LEN));               \
1551             temp_buffer_len += (LEN);                           \
1552           }                                                     \
1553       } while (0);
1554
1555   orig_base = base;
1556   ++cur;
1557   raw_prefix_start = cur - base;
1558   for (;;)
1559     {
1560       cppchar_t c;
1561
1562       /* If we previously performed any trigraph or line splicing
1563          transformations, undo them in between the opening and closing
1564          double quote.  */
1565       while (note->pos < cur)
1566         ++note;
1567       for (; note->pos == cur; ++note)
1568         {
1569           switch (note->type)
1570             {
1571             case '\\':
1572             case ' ':
1573               /* Restore backslash followed by newline.  */
1574               BUF_APPEND (base, cur - base);
1575               base = cur;
1576               BUF_APPEND ("\\", 1);
1577             after_backslash:
1578               if (note->type == ' ')
1579                 {
1580                   /* GNU backslash whitespace newline extension.  FIXME
1581                      could be any sequence of non-vertical space.  When we
1582                      can properly restore any such sequence, we should mark
1583                      this note as handled so _cpp_process_line_notes
1584                      doesn't warn.  */
1585                   BUF_APPEND (" ", 1);
1586                 }
1587
1588               BUF_APPEND ("\n", 1);
1589               break;
1590
1591             case 0:
1592               /* Already handled.  */
1593               break;
1594
1595             default:
1596               if (_cpp_trigraph_map[note->type])
1597                 {
1598                   /* Don't warn about this trigraph in
1599                      _cpp_process_line_notes, since trigraphs show up as
1600                      trigraphs in raw strings.  */
1601                   uchar type = note->type;
1602                   note->type = 0;
1603
1604                   if (!CPP_OPTION (pfile, trigraphs))
1605                     /* If we didn't convert the trigraph in the first
1606                        place, don't do anything now either.  */
1607                     break;
1608
1609                   BUF_APPEND (base, cur - base);
1610                   base = cur;
1611                   BUF_APPEND ("??", 2);
1612
1613                   /* ??/ followed by newline gets two line notes, one for
1614                      the trigraph and one for the backslash/newline.  */
1615                   if (type == '/' && note[1].pos == cur)
1616                     {
1617                       if (note[1].type != '\\'
1618                           && note[1].type != ' ')
1619                         abort ();
1620                       BUF_APPEND ("/", 1);
1621                       ++note;
1622                       goto after_backslash;
1623                     }
1624                   else
1625                     {
1626                       /* Skip the replacement character.  */
1627                       base = ++cur;
1628                       BUF_APPEND (&type, 1);
1629                       c = type;
1630                       goto check_c;
1631                     }
1632                 }
1633               else
1634                 abort ();
1635               break;
1636             }
1637         }
1638       c = *cur++;
1639       if (__builtin_expect (temp_buffer_len < 17, 0))
1640         temp_buffer[temp_buffer_len++] = c;
1641
1642      check_c:
1643       if (phase == RAW_STR_PREFIX)
1644         {
1645           while (raw_prefix_len < temp_buffer_len)
1646             {
1647               raw_prefix[raw_prefix_len] = temp_buffer[raw_prefix_len];
1648               switch (raw_prefix[raw_prefix_len])
1649                 {
1650                 case ' ': case '(': case ')': case '\\': case '\t':
1651                 case '\v': case '\f': case '\n': default:
1652                   break;
1653                 /* Basic source charset except the above chars.  */
1654                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1655                 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1656                 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1657                 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1658                 case 'y': case 'z':
1659                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1660                 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1661                 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1662                 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1663                 case 'Y': case 'Z':
1664                 case '0': case '1': case '2': case '3': case '4': case '5':
1665                 case '6': case '7': case '8': case '9':
1666                 case '_': case '{': case '}': case '#': case '[': case ']':
1667                 case '<': case '>': case '%': case ':': case ';': case '.':
1668                 case '?': case '*': case '+': case '-': case '/': case '^':
1669                 case '&': case '|': case '~': case '!': case '=': case ',':
1670                 case '"': case '\'':
1671                   if (raw_prefix_len < 16)
1672                     {
1673                       raw_prefix_len++;
1674                       continue;
1675                     }
1676                   break;
1677                 }
1678
1679               if (raw_prefix[raw_prefix_len] != '(')
1680                 {
1681                   int col = CPP_BUF_COLUMN (pfile->buffer, cur) + 1;
1682                   if (raw_prefix_len == 16)
1683                     cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1684                                          col, "raw string delimiter longer "
1685                                               "than 16 characters");
1686                   else if (raw_prefix[raw_prefix_len] == '\n')
1687                     cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1688                                          col, "invalid new-line in raw "
1689                                               "string delimiter");
1690                   else
1691                     cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1692                                          col, "invalid character '%c' in "
1693                                               "raw string delimiter",
1694                                          (int) raw_prefix[raw_prefix_len]);
1695                   pfile->buffer->cur = orig_base + raw_prefix_start - 1;
1696                   create_literal (pfile, token, orig_base,
1697                                   raw_prefix_start - 1, CPP_OTHER);
1698                   if (first_buff)
1699                     _cpp_release_buff (pfile, first_buff);
1700                   return;
1701                 }
1702               raw_prefix[raw_prefix_len] = '"';
1703               phase = RAW_STR;
1704               /* Nothing should be appended to temp_buffer during
1705                  RAW_STR phase.  */
1706               temp_buffer_len = 17;
1707               break;
1708             }
1709           continue;
1710         }
1711       else if (phase == RAW_STR_SUFFIX)
1712         {
1713           while (raw_suffix_len <= raw_prefix_len
1714                  && raw_suffix_len < temp_buffer_len
1715                  && temp_buffer[raw_suffix_len] == raw_prefix[raw_suffix_len])
1716             raw_suffix_len++;
1717           if (raw_suffix_len > raw_prefix_len)
1718             break;
1719           if (raw_suffix_len == temp_buffer_len)
1720             continue;
1721           phase = RAW_STR;
1722           /* Nothing should be appended to temp_buffer during
1723              RAW_STR phase.  */
1724           temp_buffer_len = 17;
1725         }
1726       if (c == ')')
1727         {
1728           phase = RAW_STR_SUFFIX;
1729           raw_suffix_len = 0;
1730           temp_buffer_len = 0;
1731         }
1732       else if (c == '\n')
1733         {
1734           if (pfile->state.in_directive
1735               || (pfile->state.parsing_args
1736                   && pfile->buffer->next_line >= pfile->buffer->rlimit))
1737             {
1738               cur--;
1739               type = CPP_OTHER;
1740               cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
1741                                    "unterminated raw string");
1742               break;
1743             }
1744
1745           BUF_APPEND (base, cur - base);
1746
1747           if (pfile->buffer->cur < pfile->buffer->rlimit)
1748             CPP_INCREMENT_LINE (pfile, 0);
1749           pfile->buffer->need_line = true;
1750
1751           pfile->buffer->cur = cur-1;
1752           _cpp_process_line_notes (pfile, false);
1753           if (!_cpp_get_fresh_line (pfile))
1754             {
1755               source_location src_loc = token->src_loc;
1756               token->type = CPP_EOF;
1757               /* Tell the compiler the line number of the EOF token.  */
1758               token->src_loc = pfile->line_table->highest_line;
1759               token->flags = BOL;
1760               if (first_buff != NULL)
1761                 _cpp_release_buff (pfile, first_buff);
1762               cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
1763                                    "unterminated raw string");
1764               return;
1765             }
1766
1767           cur = base = pfile->buffer->cur;
1768           note = &pfile->buffer->notes[pfile->buffer->cur_note];
1769         }
1770     }
1771
1772   if (CPP_OPTION (pfile, user_literals))
1773     {
1774       /* If a string format macro, say from inttypes.h, is placed touching
1775          a string literal it could be parsed as a C++11 user-defined string
1776          literal thus breaking the program.
1777          Try to identify macros with is_macro. A warning is issued. */
1778       if (is_macro (pfile, cur))
1779         {
1780           /* Raise a warning, but do not consume subsequent tokens.  */
1781           if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
1782             cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
1783                                    token->src_loc, 0,
1784                                    "invalid suffix on literal; C++11 requires "
1785                                    "a space between literal and string macro");
1786         }
1787       /* Grab user defined literal suffix.  */
1788       else if (ISIDST (*cur))
1789         {
1790           type = cpp_userdef_string_add_type (type);
1791           ++cur;
1792
1793           while (ISIDNUM (*cur))
1794             ++cur;
1795         }
1796     }
1797
1798   pfile->buffer->cur = cur;
1799   if (first_buff == NULL)
1800     create_literal (pfile, token, base, cur - base, type);
1801   else
1802     {
1803       uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1);
1804
1805       token->type = type;
1806       token->val.str.len = total_len + (cur - base);
1807       token->val.str.text = dest;
1808       last_buff = first_buff;
1809       while (last_buff != NULL)
1810         {
1811           memcpy (dest, last_buff->base,
1812                   BUFF_FRONT (last_buff) - last_buff->base);
1813           dest += BUFF_FRONT (last_buff) - last_buff->base;
1814           last_buff = last_buff->next;
1815         }
1816       _cpp_release_buff (pfile, first_buff);
1817       memcpy (dest, base, cur - base);
1818       dest[cur - base] = '\0';
1819     }
1820 }
1821
1822 /* Lexes a string, character constant, or angle-bracketed header file
1823    name.  The stored string contains the spelling, including opening
1824    quote and any leading 'L', 'u', 'U' or 'u8' and optional
1825    'R' modifier.  It returns the type of the literal, or CPP_OTHER
1826    if it was not properly terminated, or CPP_LESS for an unterminated
1827    header name which must be relexed as normal tokens.
1828
1829    The spelling is NUL-terminated, but it is not guaranteed that this
1830    is the first NUL since embedded NULs are preserved.  */
1831 static void
1832 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
1833 {
1834   bool saw_NUL = false;
1835   const uchar *cur;
1836   cppchar_t terminator;
1837   enum cpp_ttype type;
1838
1839   cur = base;
1840   terminator = *cur++;
1841   if (terminator == 'L' || terminator == 'U')
1842     terminator = *cur++;
1843   else if (terminator == 'u')
1844     {
1845       terminator = *cur++;
1846       if (terminator == '8')
1847         terminator = *cur++;
1848     }
1849   if (terminator == 'R')
1850     {
1851       lex_raw_string (pfile, token, base, cur);
1852       return;
1853     }
1854   if (terminator == '"')
1855     type = (*base == 'L' ? CPP_WSTRING :
1856             *base == 'U' ? CPP_STRING32 :
1857             *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1858                          : CPP_STRING);
1859   else if (terminator == '\'')
1860     type = (*base == 'L' ? CPP_WCHAR :
1861             *base == 'U' ? CPP_CHAR32 :
1862             *base == 'u' ? (base[1] == '8' ? CPP_UTF8CHAR : CPP_CHAR16)
1863                          : CPP_CHAR);
1864   else
1865     terminator = '>', type = CPP_HEADER_NAME;
1866
1867   for (;;)
1868     {
1869       cppchar_t c = *cur++;
1870
1871       /* In #include-style directives, terminators are not escapable.  */
1872       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
1873         cur++;
1874       else if (c == terminator)
1875         break;
1876       else if (c == '\n')
1877         {
1878           cur--;
1879           /* Unmatched quotes always yield undefined behavior, but
1880              greedy lexing means that what appears to be an unterminated
1881              header name may actually be a legitimate sequence of tokens.  */
1882           if (terminator == '>')
1883             {
1884               token->type = CPP_LESS;
1885               return;
1886             }
1887           type = CPP_OTHER;
1888           break;
1889         }
1890       else if (c == '\0')
1891         saw_NUL = true;
1892     }
1893
1894   if (saw_NUL && !pfile->state.skipping)
1895     cpp_error (pfile, CPP_DL_WARNING,
1896                "null character(s) preserved in literal");
1897
1898   if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
1899     cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
1900                (int) terminator);
1901
1902   if (CPP_OPTION (pfile, user_literals))
1903     {
1904       /* If a string format macro, say from inttypes.h, is placed touching
1905          a string literal it could be parsed as a C++11 user-defined string
1906          literal thus breaking the program.
1907          Try to identify macros with is_macro. A warning is issued. */
1908       if (is_macro (pfile, cur))
1909         {
1910           /* Raise a warning, but do not consume subsequent tokens.  */
1911           if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
1912             cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
1913                                    token->src_loc, 0,
1914                                    "invalid suffix on literal; C++11 requires "
1915                                    "a space between literal and string macro");
1916         }
1917       /* Grab user defined literal suffix.  */
1918       else if (ISIDST (*cur))
1919         {
1920           type = cpp_userdef_char_add_type (type);
1921           type = cpp_userdef_string_add_type (type);
1922           ++cur;
1923
1924           while (ISIDNUM (*cur))
1925             ++cur;
1926         }
1927     }
1928   else if (CPP_OPTION (pfile, cpp_warn_cxx11_compat)
1929            && is_macro (pfile, cur)
1930            && !pfile->state.skipping)
1931     cpp_warning_with_line (pfile, CPP_W_CXX11_COMPAT,
1932                            token->src_loc, 0, "C++11 requires a space "
1933                            "between string literal and macro");
1934
1935   pfile->buffer->cur = cur;
1936   create_literal (pfile, token, base, cur - base, type);
1937 }
1938
1939 /* Return the comment table. The client may not make any assumption
1940    about the ordering of the table.  */
1941 cpp_comment_table *
1942 cpp_get_comments (cpp_reader *pfile)
1943 {
1944   return &pfile->comments;
1945 }
1946
1947 /* Append a comment to the end of the comment table. */
1948 static void
1949 store_comment (cpp_reader *pfile, cpp_token *token)
1950 {
1951   int len;
1952
1953   if (pfile->comments.allocated == 0)
1954     {
1955       pfile->comments.allocated = 256;
1956       pfile->comments.entries = (cpp_comment *) xmalloc
1957         (pfile->comments.allocated * sizeof (cpp_comment));
1958     }
1959
1960   if (pfile->comments.count == pfile->comments.allocated)
1961     {
1962       pfile->comments.allocated *= 2;
1963       pfile->comments.entries = (cpp_comment *) xrealloc
1964         (pfile->comments.entries,
1965          pfile->comments.allocated * sizeof (cpp_comment));
1966     }
1967
1968   len = token->val.str.len;
1969
1970   /* Copy comment. Note, token may not be NULL terminated. */
1971   pfile->comments.entries[pfile->comments.count].comment =
1972     (char *) xmalloc (sizeof (char) * (len + 1));
1973   memcpy (pfile->comments.entries[pfile->comments.count].comment,
1974           token->val.str.text, len);
1975   pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
1976
1977   /* Set source location. */
1978   pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
1979
1980   /* Increment the count of entries in the comment table. */
1981   pfile->comments.count++;
1982 }
1983
1984 /* The stored comment includes the comment start and any terminator.  */
1985 static void
1986 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
1987               cppchar_t type)
1988 {
1989   unsigned char *buffer;
1990   unsigned int len, clen, i;
1991
1992   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
1993
1994   /* C++ comments probably (not definitely) have moved past a new
1995      line, which we don't want to save in the comment.  */
1996   if (is_vspace (pfile->buffer->cur[-1]))
1997     len--;
1998
1999   /* If we are currently in a directive or in argument parsing, then
2000      we need to store all C++ comments as C comments internally, and
2001      so we need to allocate a little extra space in that case.
2002
2003      Note that the only time we encounter a directive here is
2004      when we are saving comments in a "#define".  */
2005   clen = ((pfile->state.in_directive || pfile->state.parsing_args)
2006           && type == '/') ? len + 2 : len;
2007
2008   buffer = _cpp_unaligned_alloc (pfile, clen);
2009
2010   token->type = CPP_COMMENT;
2011   token->val.str.len = clen;
2012   token->val.str.text = buffer;
2013
2014   buffer[0] = '/';
2015   memcpy (buffer + 1, from, len - 1);
2016
2017   /* Finish conversion to a C comment, if necessary.  */
2018   if ((pfile->state.in_directive || pfile->state.parsing_args) && type == '/')
2019     {
2020       buffer[1] = '*';
2021       buffer[clen - 2] = '*';
2022       buffer[clen - 1] = '/';
2023       /* As there can be in a C++ comments illegal sequences for C comments
2024          we need to filter them out.  */
2025       for (i = 2; i < (clen - 2); i++)
2026         if (buffer[i] == '/' && (buffer[i - 1] == '*' || buffer[i + 1] == '*'))
2027           buffer[i] = '|';
2028     }
2029
2030   /* Finally store this comment for use by clients of libcpp. */
2031   store_comment (pfile, token);
2032 }
2033
2034 /* Allocate COUNT tokens for RUN.  */
2035 void
2036 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
2037 {
2038   run->base = XNEWVEC (cpp_token, count);
2039   run->limit = run->base + count;
2040   run->next = NULL;
2041 }
2042
2043 /* Returns the next tokenrun, or creates one if there is none.  */
2044 static tokenrun *
2045 next_tokenrun (tokenrun *run)
2046 {
2047   if (run->next == NULL)
2048     {
2049       run->next = XNEW (tokenrun);
2050       run->next->prev = run;
2051       _cpp_init_tokenrun (run->next, 250);
2052     }
2053
2054   return run->next;
2055 }
2056
2057 /* Return the number of not yet processed token in a given
2058    context.  */
2059 int
2060 _cpp_remaining_tokens_num_in_context (cpp_context *context)
2061 {
2062   if (context->tokens_kind == TOKENS_KIND_DIRECT)
2063     return (LAST (context).token - FIRST (context).token);
2064   else if (context->tokens_kind == TOKENS_KIND_INDIRECT
2065            || context->tokens_kind == TOKENS_KIND_EXTENDED)
2066     return (LAST (context).ptoken - FIRST (context).ptoken);
2067   else
2068       abort ();
2069 }
2070
2071 /* Returns the token present at index INDEX in a given context.  If
2072    INDEX is zero, the next token to be processed is returned.  */
2073 static const cpp_token*
2074 _cpp_token_from_context_at (cpp_context *context, int index)
2075 {
2076   if (context->tokens_kind == TOKENS_KIND_DIRECT)
2077     return &(FIRST (context).token[index]);
2078   else if (context->tokens_kind == TOKENS_KIND_INDIRECT
2079            || context->tokens_kind == TOKENS_KIND_EXTENDED)
2080     return FIRST (context).ptoken[index];
2081  else
2082    abort ();
2083 }
2084
2085 /* Look ahead in the input stream.  */
2086 const cpp_token *
2087 cpp_peek_token (cpp_reader *pfile, int index)
2088 {
2089   cpp_context *context = pfile->context;
2090   const cpp_token *peektok;
2091   int count;
2092
2093   /* First, scan through any pending cpp_context objects.  */
2094   while (context->prev)
2095     {
2096       ptrdiff_t sz = _cpp_remaining_tokens_num_in_context (context);
2097
2098       if (index < (int) sz)
2099         return _cpp_token_from_context_at (context, index);
2100       index -= (int) sz;
2101       context = context->prev;
2102     }
2103
2104   /* We will have to read some new tokens after all (and do so
2105      without invalidating preceding tokens).  */
2106   count = index;
2107   pfile->keep_tokens++;
2108
2109   /* For peeked tokens temporarily disable line_change reporting,
2110      until the tokens are parsed for real.  */
2111   void (*line_change) (cpp_reader *, const cpp_token *, int)
2112     = pfile->cb.line_change;
2113   pfile->cb.line_change = NULL;
2114
2115   do
2116     {
2117       peektok = _cpp_lex_token (pfile);
2118       if (peektok->type == CPP_EOF)
2119         {
2120           index--;
2121           break;
2122         }
2123     }
2124   while (index--);
2125
2126   _cpp_backup_tokens_direct (pfile, count - index);
2127   pfile->keep_tokens--;
2128   pfile->cb.line_change = line_change;
2129
2130   return peektok;
2131 }
2132
2133 /* Allocate a single token that is invalidated at the same time as the
2134    rest of the tokens on the line.  Has its line and col set to the
2135    same as the last lexed token, so that diagnostics appear in the
2136    right place.  */
2137 cpp_token *
2138 _cpp_temp_token (cpp_reader *pfile)
2139 {
2140   cpp_token *old, *result;
2141   ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
2142   ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
2143
2144   old = pfile->cur_token - 1;
2145   /* Any pre-existing lookaheads must not be clobbered.  */
2146   if (la)
2147     {
2148       if (sz <= la)
2149         {
2150           tokenrun *next = next_tokenrun (pfile->cur_run);
2151
2152           if (sz < la)
2153             memmove (next->base + 1, next->base,
2154                      (la - sz) * sizeof (cpp_token));
2155
2156           next->base[0] = pfile->cur_run->limit[-1];
2157         }
2158
2159       if (sz > 1)
2160         memmove (pfile->cur_token + 1, pfile->cur_token,
2161                  MIN (la, sz - 1) * sizeof (cpp_token));
2162     }
2163
2164   if (!sz && pfile->cur_token == pfile->cur_run->limit)
2165     {
2166       pfile->cur_run = next_tokenrun (pfile->cur_run);
2167       pfile->cur_token = pfile->cur_run->base;
2168     }
2169
2170   result = pfile->cur_token++;
2171   result->src_loc = old->src_loc;
2172   return result;
2173 }
2174
2175 /* Lex a token into RESULT (external interface).  Takes care of issues
2176    like directive handling, token lookahead, multiple include
2177    optimization and skipping.  */
2178 const cpp_token *
2179 _cpp_lex_token (cpp_reader *pfile)
2180 {
2181   cpp_token *result;
2182
2183   for (;;)
2184     {
2185       if (pfile->cur_token == pfile->cur_run->limit)
2186         {
2187           pfile->cur_run = next_tokenrun (pfile->cur_run);
2188           pfile->cur_token = pfile->cur_run->base;
2189         }
2190       /* We assume that the current token is somewhere in the current
2191          run.  */
2192       if (pfile->cur_token < pfile->cur_run->base
2193           || pfile->cur_token >= pfile->cur_run->limit)
2194         abort ();
2195
2196       if (pfile->lookaheads)
2197         {
2198           pfile->lookaheads--;
2199           result = pfile->cur_token++;
2200         }
2201       else
2202         result = _cpp_lex_direct (pfile);
2203
2204       if (result->flags & BOL)
2205         {
2206           /* Is this a directive.  If _cpp_handle_directive returns
2207              false, it is an assembler #.  */
2208           if (result->type == CPP_HASH
2209               /* 6.10.3 p 11: Directives in a list of macro arguments
2210                  gives undefined behavior.  This implementation
2211                  handles the directive as normal.  */
2212               && pfile->state.parsing_args != 1)
2213             {
2214               if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
2215                 {
2216                   if (pfile->directive_result.type == CPP_PADDING)
2217                     continue;
2218                   result = &pfile->directive_result;
2219                 }
2220             }
2221           else if (pfile->state.in_deferred_pragma)
2222             result = &pfile->directive_result;
2223
2224           if (pfile->cb.line_change && !pfile->state.skipping)
2225             pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
2226         }
2227
2228       /* We don't skip tokens in directives.  */
2229       if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
2230         break;
2231
2232       /* Outside a directive, invalidate controlling macros.  At file
2233          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
2234          get here and MI optimization works.  */
2235       pfile->mi_valid = false;
2236
2237       if (!pfile->state.skipping || result->type == CPP_EOF)
2238         break;
2239     }
2240
2241   return result;
2242 }
2243
2244 /* Returns true if a fresh line has been loaded.  */
2245 bool
2246 _cpp_get_fresh_line (cpp_reader *pfile)
2247 {
2248   int return_at_eof;
2249
2250   /* We can't get a new line until we leave the current directive.  */
2251   if (pfile->state.in_directive)
2252     return false;
2253
2254   for (;;)
2255     {
2256       cpp_buffer *buffer = pfile->buffer;
2257
2258       if (!buffer->need_line)
2259         return true;
2260
2261       if (buffer->next_line < buffer->rlimit)
2262         {
2263           _cpp_clean_line (pfile);
2264           return true;
2265         }
2266
2267       /* First, get out of parsing arguments state.  */
2268       if (pfile->state.parsing_args)
2269         return false;
2270
2271       /* End of buffer.  Non-empty files should end in a newline.  */
2272       if (buffer->buf != buffer->rlimit
2273           && buffer->next_line > buffer->rlimit
2274           && !buffer->from_stage3)
2275         {
2276           /* Clip to buffer size.  */
2277           buffer->next_line = buffer->rlimit;
2278         }
2279
2280       return_at_eof = buffer->return_at_eof;
2281       _cpp_pop_buffer (pfile);
2282       if (pfile->buffer == NULL || return_at_eof)
2283         return false;
2284     }
2285 }
2286
2287 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)          \
2288   do                                                    \
2289     {                                                   \
2290       result->type = ELSE_TYPE;                         \
2291       if (*buffer->cur == CHAR)                         \
2292         buffer->cur++, result->type = THEN_TYPE;        \
2293     }                                                   \
2294   while (0)
2295
2296 /* Lex a token into pfile->cur_token, which is also incremented, to
2297    get diagnostics pointing to the correct location.
2298
2299    Does not handle issues such as token lookahead, multiple-include
2300    optimization, directives, skipping etc.  This function is only
2301    suitable for use by _cpp_lex_token, and in special cases like
2302    lex_expansion_token which doesn't care for any of these issues.
2303
2304    When meeting a newline, returns CPP_EOF if parsing a directive,
2305    otherwise returns to the start of the token buffer if permissible.
2306    Returns the location of the lexed token.  */
2307 cpp_token *
2308 _cpp_lex_direct (cpp_reader *pfile)
2309 {
2310   cppchar_t c;
2311   cpp_buffer *buffer;
2312   const unsigned char *comment_start;
2313   cpp_token *result = pfile->cur_token++;
2314
2315  fresh_line:
2316   result->flags = 0;
2317   buffer = pfile->buffer;
2318   if (buffer->need_line)
2319     {
2320       if (pfile->state.in_deferred_pragma)
2321         {
2322           result->type = CPP_PRAGMA_EOL;
2323           pfile->state.in_deferred_pragma = false;
2324           if (!pfile->state.pragma_allow_expansion)
2325             pfile->state.prevent_expansion--;
2326           return result;
2327         }
2328       if (!_cpp_get_fresh_line (pfile))
2329         {
2330           result->type = CPP_EOF;
2331           if (!pfile->state.in_directive)
2332             {
2333               /* Tell the compiler the line number of the EOF token.  */
2334               result->src_loc = pfile->line_table->highest_line;
2335               result->flags = BOL;
2336             }
2337           return result;
2338         }
2339       if (!pfile->keep_tokens)
2340         {
2341           pfile->cur_run = &pfile->base_run;
2342           result = pfile->base_run.base;
2343           pfile->cur_token = result + 1;
2344         }
2345       result->flags = BOL;
2346       if (pfile->state.parsing_args == 2)
2347         result->flags |= PREV_WHITE;
2348     }
2349   buffer = pfile->buffer;
2350  update_tokens_line:
2351   result->src_loc = pfile->line_table->highest_line;
2352
2353  skipped_white:
2354   if (buffer->cur >= buffer->notes[buffer->cur_note].pos
2355       && !pfile->overlaid_buffer)
2356     {
2357       _cpp_process_line_notes (pfile, false);
2358       result->src_loc = pfile->line_table->highest_line;
2359     }
2360   c = *buffer->cur++;
2361
2362   if (pfile->forced_token_location_p)
2363     result->src_loc = *pfile->forced_token_location_p;
2364   else
2365     result->src_loc = linemap_position_for_column (pfile->line_table,
2366                                           CPP_BUF_COLUMN (buffer, buffer->cur));
2367
2368   switch (c)
2369     {
2370     case ' ': case '\t': case '\f': case '\v': case '\0':
2371       result->flags |= PREV_WHITE;
2372       skip_whitespace (pfile, c);
2373       goto skipped_white;
2374
2375     case '\n':
2376       if (buffer->cur < buffer->rlimit)
2377         CPP_INCREMENT_LINE (pfile, 0);
2378       buffer->need_line = true;
2379       goto fresh_line;
2380
2381     case '0': case '1': case '2': case '3': case '4':
2382     case '5': case '6': case '7': case '8': case '9':
2383       {
2384         struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2385         result->type = CPP_NUMBER;
2386         lex_number (pfile, &result->val.str, &nst);
2387         warn_about_normalization (pfile, result, &nst);
2388         break;
2389       }
2390
2391     case 'L':
2392     case 'u':
2393     case 'U':
2394     case 'R':
2395       /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
2396          wide strings or raw strings.  */
2397       if (c == 'L' || CPP_OPTION (pfile, rliterals)
2398           || (c != 'R' && CPP_OPTION (pfile, uliterals)))
2399         {
2400           if ((*buffer->cur == '\'' && c != 'R')
2401               || *buffer->cur == '"'
2402               || (*buffer->cur == 'R'
2403                   && c != 'R'
2404                   && buffer->cur[1] == '"'
2405                   && CPP_OPTION (pfile, rliterals))
2406               || (*buffer->cur == '8'
2407                   && c == 'u'
2408                   && ((buffer->cur[1] == '"' || (buffer->cur[1] == '\''
2409                                 && CPP_OPTION (pfile, utf8_char_literals)))
2410                       || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'
2411                           && CPP_OPTION (pfile, rliterals)))))
2412             {
2413               lex_string (pfile, result, buffer->cur - 1);
2414               break;
2415             }
2416         }
2417       /* Fall through.  */
2418
2419     case '_':
2420     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2421     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2422     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2423     case 's': case 't':           case 'v': case 'w': case 'x':
2424     case 'y': case 'z':
2425     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2426     case 'G': case 'H': case 'I': case 'J': case 'K':
2427     case 'M': case 'N': case 'O': case 'P': case 'Q':
2428     case 'S': case 'T':           case 'V': case 'W': case 'X':
2429     case 'Y': case 'Z':
2430       result->type = CPP_NAME;
2431       {
2432         struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2433         result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
2434                                                 &nst,
2435                                                 &result->val.node.spelling);
2436         warn_about_normalization (pfile, result, &nst);
2437       }
2438
2439       /* Convert named operators to their proper types.  */
2440       if (result->val.node.node->flags & NODE_OPERATOR)
2441         {
2442           result->flags |= NAMED_OP;
2443           result->type = (enum cpp_ttype) result->val.node.node->directive_index;
2444         }
2445       break;
2446
2447     case '\'':
2448     case '"':
2449       lex_string (pfile, result, buffer->cur - 1);
2450       break;
2451
2452     case '/':
2453       /* A potential block or line comment.  */
2454       comment_start = buffer->cur;
2455       c = *buffer->cur;
2456
2457       if (c == '*')
2458         {
2459           if (_cpp_skip_block_comment (pfile))
2460             cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
2461         }
2462       else if (c == '/' && ! CPP_OPTION (pfile, traditional))
2463         {
2464           /* Don't warn for system headers.  */
2465           if (cpp_in_system_header (pfile))
2466             ;
2467           /* Warn about comments if pedantically GNUC89, and not
2468              in system headers.  */
2469           else if (CPP_OPTION (pfile, lang) == CLK_GNUC89
2470                    && CPP_PEDANTIC (pfile)
2471                    && ! buffer->warned_cplusplus_comments)
2472             {
2473               cpp_error (pfile, CPP_DL_PEDWARN,
2474                          "C++ style comments are not allowed in ISO C90");
2475               cpp_error (pfile, CPP_DL_PEDWARN,
2476                          "(this will be reported only once per input file)");
2477               buffer->warned_cplusplus_comments = 1;
2478             }
2479           /* Or if specifically desired via -Wc90-c99-compat.  */
2480           else if (CPP_OPTION (pfile, cpp_warn_c90_c99_compat) > 0
2481                    && ! CPP_OPTION (pfile, cplusplus)
2482                    && ! buffer->warned_cplusplus_comments)
2483             {
2484               cpp_error (pfile, CPP_DL_WARNING,
2485                          "C++ style comments are incompatible with C90");
2486               cpp_error (pfile, CPP_DL_WARNING,
2487                          "(this will be reported only once per input file)");
2488               buffer->warned_cplusplus_comments = 1;
2489             }
2490           /* In C89/C94, C++ style comments are forbidden.  */
2491           else if ((CPP_OPTION (pfile, lang) == CLK_STDC89
2492                     || CPP_OPTION (pfile, lang) == CLK_STDC94))
2493             {
2494               /* But don't be confused about valid code such as
2495                  - // immediately followed by *,
2496                  - // in a preprocessing directive,
2497                  - // in an #if 0 block.  */
2498               if (buffer->cur[1] == '*'
2499                   || pfile->state.in_directive
2500                   || pfile->state.skipping)
2501                 {
2502                   result->type = CPP_DIV;
2503                   break;
2504                 }
2505               else if (! buffer->warned_cplusplus_comments)
2506                 {
2507                   cpp_error (pfile, CPP_DL_ERROR,
2508                              "C++ style comments are not allowed in ISO C90");
2509                   cpp_error (pfile, CPP_DL_ERROR,
2510                              "(this will be reported only once per input "
2511                              "file)");
2512                   buffer->warned_cplusplus_comments = 1;
2513                 }
2514             }
2515           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
2516             cpp_warning (pfile, CPP_W_COMMENTS, "multi-line comment");
2517         }
2518       else if (c == '=')
2519         {
2520           buffer->cur++;
2521           result->type = CPP_DIV_EQ;
2522           break;
2523         }
2524       else
2525         {
2526           result->type = CPP_DIV;
2527           break;
2528         }
2529
2530       if (!pfile->state.save_comments)
2531         {
2532           result->flags |= PREV_WHITE;
2533           goto update_tokens_line;
2534         }
2535
2536       /* Save the comment as a token in its own right.  */
2537       save_comment (pfile, result, comment_start, c);
2538       break;
2539
2540     case '<':
2541       if (pfile->state.angled_headers)
2542         {
2543           lex_string (pfile, result, buffer->cur - 1);
2544           if (result->type != CPP_LESS)
2545             break;
2546         }
2547
2548       result->type = CPP_LESS;
2549       if (*buffer->cur == '=')
2550         buffer->cur++, result->type = CPP_LESS_EQ;
2551       else if (*buffer->cur == '<')
2552         {
2553           buffer->cur++;
2554           IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
2555         }
2556       else if (CPP_OPTION (pfile, digraphs))
2557         {
2558           if (*buffer->cur == ':')
2559             {
2560               /* C++11 [2.5/3 lex.pptoken], "Otherwise, if the next
2561                  three characters are <:: and the subsequent character
2562                  is neither : nor >, the < is treated as a preprocessor
2563                  token by itself".  */
2564               if (CPP_OPTION (pfile, cplusplus)
2565                   && CPP_OPTION (pfile, lang) != CLK_CXX98
2566                   && CPP_OPTION (pfile, lang) != CLK_GNUCXX
2567                   && buffer->cur[1] == ':'
2568                   && buffer->cur[2] != ':' && buffer->cur[2] != '>')
2569                 break;
2570
2571               buffer->cur++;
2572               result->flags |= DIGRAPH;
2573               result->type = CPP_OPEN_SQUARE;
2574             }
2575           else if (*buffer->cur == '%')
2576             {
2577               buffer->cur++;
2578               result->flags |= DIGRAPH;
2579               result->type = CPP_OPEN_BRACE;
2580             }
2581         }
2582       break;
2583
2584     case '>':
2585       result->type = CPP_GREATER;
2586       if (*buffer->cur == '=')
2587         buffer->cur++, result->type = CPP_GREATER_EQ;
2588       else if (*buffer->cur == '>')
2589         {
2590           buffer->cur++;
2591           IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
2592         }
2593       break;
2594
2595     case '%':
2596       result->type = CPP_MOD;
2597       if (*buffer->cur == '=')
2598         buffer->cur++, result->type = CPP_MOD_EQ;
2599       else if (CPP_OPTION (pfile, digraphs))
2600         {
2601           if (*buffer->cur == ':')
2602             {
2603               buffer->cur++;
2604               result->flags |= DIGRAPH;
2605               result->type = CPP_HASH;
2606               if (*buffer->cur == '%' && buffer->cur[1] == ':')
2607                 buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0;
2608             }
2609           else if (*buffer->cur == '>')
2610             {
2611               buffer->cur++;
2612               result->flags |= DIGRAPH;
2613               result->type = CPP_CLOSE_BRACE;
2614             }
2615         }
2616       break;
2617
2618     case '.':
2619       result->type = CPP_DOT;
2620       if (ISDIGIT (*buffer->cur))
2621         {
2622           struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2623           result->type = CPP_NUMBER;
2624           lex_number (pfile, &result->val.str, &nst);
2625           warn_about_normalization (pfile, result, &nst);
2626         }
2627       else if (*buffer->cur == '.' && buffer->cur[1] == '.')
2628         buffer->cur += 2, result->type = CPP_ELLIPSIS;
2629       else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
2630         buffer->cur++, result->type = CPP_DOT_STAR;
2631       break;
2632
2633     case '+':
2634       result->type = CPP_PLUS;
2635       if (*buffer->cur == '+')
2636         buffer->cur++, result->type = CPP_PLUS_PLUS;
2637       else if (*buffer->cur == '=')
2638         buffer->cur++, result->type = CPP_PLUS_EQ;
2639       break;
2640
2641     case '-':
2642       result->type = CPP_MINUS;
2643       if (*buffer->cur == '>')
2644         {
2645           buffer->cur++;
2646           result->type = CPP_DEREF;
2647           if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
2648             buffer->cur++, result->type = CPP_DEREF_STAR;
2649         }
2650       else if (*buffer->cur == '-')
2651         buffer->cur++, result->type = CPP_MINUS_MINUS;
2652       else if (*buffer->cur == '=')
2653         buffer->cur++, result->type = CPP_MINUS_EQ;
2654       break;
2655
2656     case '&':
2657       result->type = CPP_AND;
2658       if (*buffer->cur == '&')
2659         buffer->cur++, result->type = CPP_AND_AND;
2660       else if (*buffer->cur == '=')
2661         buffer->cur++, result->type = CPP_AND_EQ;
2662       break;
2663
2664     case '|':
2665       result->type = CPP_OR;
2666       if (*buffer->cur == '|')
2667         buffer->cur++, result->type = CPP_OR_OR;
2668       else if (*buffer->cur == '=')
2669         buffer->cur++, result->type = CPP_OR_EQ;
2670       break;
2671
2672     case ':':
2673       result->type = CPP_COLON;
2674       if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
2675         buffer->cur++, result->type = CPP_SCOPE;
2676       else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
2677         {
2678           buffer->cur++;
2679           result->flags |= DIGRAPH;
2680           result->type = CPP_CLOSE_SQUARE;
2681         }
2682       break;
2683
2684     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
2685     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
2686     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
2687     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
2688     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.token_no = 0; break;
2689
2690     case '?': result->type = CPP_QUERY; break;
2691     case '~': result->type = CPP_COMPL; break;
2692     case ',': result->type = CPP_COMMA; break;
2693     case '(': result->type = CPP_OPEN_PAREN; break;
2694     case ')': result->type = CPP_CLOSE_PAREN; break;
2695     case '[': result->type = CPP_OPEN_SQUARE; break;
2696     case ']': result->type = CPP_CLOSE_SQUARE; break;
2697     case '{': result->type = CPP_OPEN_BRACE; break;
2698     case '}': result->type = CPP_CLOSE_BRACE; break;
2699     case ';': result->type = CPP_SEMICOLON; break;
2700
2701       /* @ is a punctuator in Objective-C.  */
2702     case '@': result->type = CPP_ATSIGN; break;
2703
2704     case '$':
2705     case '\\':
2706       {
2707         const uchar *base = --buffer->cur;
2708         struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2709
2710         if (forms_identifier_p (pfile, true, &nst))
2711           {
2712             result->type = CPP_NAME;
2713             result->val.node.node = lex_identifier (pfile, base, true, &nst,
2714                                                     &result->val.node.spelling);
2715             warn_about_normalization (pfile, result, &nst);
2716             break;
2717           }
2718         buffer->cur++;
2719       }
2720
2721     default:
2722       create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
2723       break;
2724     }
2725
2726   source_range tok_range;
2727   tok_range.m_start = result->src_loc;
2728   if (result->src_loc >= RESERVED_LOCATION_COUNT)
2729     tok_range.m_finish
2730       = linemap_position_for_column (pfile->line_table,
2731                                      CPP_BUF_COLUMN (buffer, buffer->cur));
2732   else
2733     tok_range.m_finish = tok_range.m_start;
2734
2735   result->src_loc = COMBINE_LOCATION_DATA (pfile->line_table,
2736                                            result->src_loc,
2737                                            tok_range, NULL);
2738
2739   return result;
2740 }
2741
2742 /* An upper bound on the number of bytes needed to spell TOKEN.
2743    Does not include preceding whitespace.  */
2744 unsigned int
2745 cpp_token_len (const cpp_token *token)
2746 {
2747   unsigned int len;
2748
2749   switch (TOKEN_SPELL (token))
2750     {
2751     default:            len = 6;                                break;
2752     case SPELL_LITERAL: len = token->val.str.len;               break;
2753     case SPELL_IDENT:   len = NODE_LEN (token->val.node.node) * 10;     break;
2754     }
2755
2756   return len;
2757 }
2758
2759 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
2760    Return the number of bytes read out of NAME.  (There are always
2761    10 bytes written to BUFFER.)  */
2762
2763 static size_t
2764 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
2765 {
2766   int j;
2767   int ucn_len = 0;
2768   int ucn_len_c;
2769   unsigned t;
2770   unsigned long utf32;
2771
2772   /* Compute the length of the UTF-8 sequence.  */
2773   for (t = *name; t & 0x80; t <<= 1)
2774     ucn_len++;
2775
2776   utf32 = *name & (0x7F >> ucn_len);
2777   for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
2778     {
2779       utf32 = (utf32 << 6) | (*++name & 0x3F);
2780
2781       /* Ill-formed UTF-8.  */
2782       if ((*name & ~0x3F) != 0x80)
2783         abort ();
2784     }
2785
2786   *buffer++ = '\\';
2787   *buffer++ = 'U';
2788   for (j = 7; j >= 0; j--)
2789     *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
2790   return ucn_len;
2791 }
2792
2793 /* Given a token TYPE corresponding to a digraph, return a pointer to
2794    the spelling of the digraph.  */
2795 static const unsigned char *
2796 cpp_digraph2name (enum cpp_ttype type)
2797 {
2798   return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
2799 }
2800
2801 /* Write the spelling of an identifier IDENT, using UCNs, to BUFFER.
2802    The buffer must already contain the enough space to hold the
2803    token's spelling.  Returns a pointer to the character after the
2804    last character written.  */
2805 unsigned char *
2806 _cpp_spell_ident_ucns (unsigned char *buffer, cpp_hashnode *ident)
2807 {
2808   size_t i;
2809   const unsigned char *name = NODE_NAME (ident);
2810
2811   for (i = 0; i < NODE_LEN (ident); i++)
2812     if (name[i] & ~0x7F)
2813       {
2814         i += utf8_to_ucn (buffer, name + i) - 1;
2815         buffer += 10;
2816       }
2817     else
2818       *buffer++ = name[i];
2819
2820   return buffer;
2821 }
2822
2823 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
2824    already contain the enough space to hold the token's spelling.
2825    Returns a pointer to the character after the last character written.
2826    FORSTRING is true if this is to be the spelling after translation
2827    phase 1 (with the original spelling of extended identifiers), false
2828    if extended identifiers should always be written using UCNs (there is
2829    no option for always writing them in the internal UTF-8 form).
2830    FIXME: Would be nice if we didn't need the PFILE argument.  */
2831 unsigned char *
2832 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
2833                  unsigned char *buffer, bool forstring)
2834 {
2835   switch (TOKEN_SPELL (token))
2836     {
2837     case SPELL_OPERATOR:
2838       {
2839         const unsigned char *spelling;
2840         unsigned char c;
2841
2842         if (token->flags & DIGRAPH)
2843           spelling = cpp_digraph2name (token->type);
2844         else if (token->flags & NAMED_OP)
2845           goto spell_ident;
2846         else
2847           spelling = TOKEN_NAME (token);
2848
2849         while ((c = *spelling++) != '\0')
2850           *buffer++ = c;
2851       }
2852       break;
2853
2854     spell_ident:
2855     case SPELL_IDENT:
2856       if (forstring)
2857         {
2858           memcpy (buffer, NODE_NAME (token->val.node.spelling),
2859                   NODE_LEN (token->val.node.spelling));
2860           buffer += NODE_LEN (token->val.node.spelling);
2861         }
2862       else
2863         buffer = _cpp_spell_ident_ucns (buffer, token->val.node.node);
2864       break;
2865
2866     case SPELL_LITERAL:
2867       memcpy (buffer, token->val.str.text, token->val.str.len);
2868       buffer += token->val.str.len;
2869       break;
2870
2871     case SPELL_NONE:
2872       cpp_error (pfile, CPP_DL_ICE,
2873                  "unspellable token %s", TOKEN_NAME (token));
2874       break;
2875     }
2876
2877   return buffer;
2878 }
2879
2880 /* Returns TOKEN spelt as a null-terminated string.  The string is
2881    freed when the reader is destroyed.  Useful for diagnostics.  */
2882 unsigned char *
2883 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
2884 {
2885   unsigned int len = cpp_token_len (token) + 1;
2886   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
2887
2888   end = cpp_spell_token (pfile, token, start, false);
2889   end[0] = '\0';
2890
2891   return start;
2892 }
2893
2894 /* Returns a pointer to a string which spells the token defined by
2895    TYPE and FLAGS.  Used by C front ends, which really should move to
2896    using cpp_token_as_text.  */
2897 const char *
2898 cpp_type2name (enum cpp_ttype type, unsigned char flags)
2899 {
2900   if (flags & DIGRAPH)
2901     return (const char *) cpp_digraph2name (type);
2902   else if (flags & NAMED_OP)
2903     return cpp_named_operator2name (type);
2904
2905   return (const char *) token_spellings[type].name;
2906 }
2907
2908 /* Writes the spelling of token to FP, without any preceding space.
2909    Separated from cpp_spell_token for efficiency - to avoid stdio
2910    double-buffering.  */
2911 void
2912 cpp_output_token (const cpp_token *token, FILE *fp)
2913 {
2914   switch (TOKEN_SPELL (token))
2915     {
2916     case SPELL_OPERATOR:
2917       {
2918         const unsigned char *spelling;
2919         int c;
2920
2921         if (token->flags & DIGRAPH)
2922           spelling = cpp_digraph2name (token->type);
2923         else if (token->flags & NAMED_OP)
2924           goto spell_ident;
2925         else
2926           spelling = TOKEN_NAME (token);
2927
2928         c = *spelling;
2929         do
2930           putc (c, fp);
2931         while ((c = *++spelling) != '\0');
2932       }
2933       break;
2934
2935     spell_ident:
2936     case SPELL_IDENT:
2937       {
2938         size_t i;
2939         const unsigned char * name = NODE_NAME (token->val.node.node);
2940
2941         for (i = 0; i < NODE_LEN (token->val.node.node); i++)
2942           if (name[i] & ~0x7F)
2943             {
2944               unsigned char buffer[10];
2945               i += utf8_to_ucn (buffer, name + i) - 1;
2946               fwrite (buffer, 1, 10, fp);
2947             }
2948           else
2949             fputc (NODE_NAME (token->val.node.node)[i], fp);
2950       }
2951       break;
2952
2953     case SPELL_LITERAL:
2954       fwrite (token->val.str.text, 1, token->val.str.len, fp);
2955       break;
2956
2957     case SPELL_NONE:
2958       /* An error, most probably.  */
2959       break;
2960     }
2961 }
2962
2963 /* Compare two tokens.  */
2964 int
2965 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
2966 {
2967   if (a->type == b->type && a->flags == b->flags)
2968     switch (TOKEN_SPELL (a))
2969       {
2970       default:                  /* Keep compiler happy.  */
2971       case SPELL_OPERATOR:
2972         /* token_no is used to track where multiple consecutive ##
2973            tokens were originally located.  */
2974         return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
2975       case SPELL_NONE:
2976         return (a->type != CPP_MACRO_ARG
2977                 || (a->val.macro_arg.arg_no == b->val.macro_arg.arg_no
2978                     && a->val.macro_arg.spelling == b->val.macro_arg.spelling));
2979       case SPELL_IDENT:
2980         return (a->val.node.node == b->val.node.node
2981                 && a->val.node.spelling == b->val.node.spelling);
2982       case SPELL_LITERAL:
2983         return (a->val.str.len == b->val.str.len
2984                 && !memcmp (a->val.str.text, b->val.str.text,
2985                             a->val.str.len));
2986       }
2987
2988   return 0;
2989 }
2990
2991 /* Returns nonzero if a space should be inserted to avoid an
2992    accidental token paste for output.  For simplicity, it is
2993    conservative, and occasionally advises a space where one is not
2994    needed, e.g. "." and ".2".  */
2995 int
2996 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
2997                  const cpp_token *token2)
2998 {
2999   enum cpp_ttype a = token1->type, b = token2->type;
3000   cppchar_t c;
3001
3002   if (token1->flags & NAMED_OP)
3003     a = CPP_NAME;
3004   if (token2->flags & NAMED_OP)
3005     b = CPP_NAME;
3006
3007   c = EOF;
3008   if (token2->flags & DIGRAPH)
3009     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
3010   else if (token_spellings[b].category == SPELL_OPERATOR)
3011     c = token_spellings[b].name[0];
3012
3013   /* Quickly get everything that can paste with an '='.  */
3014   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
3015     return 1;
3016
3017   switch (a)
3018     {
3019     case CPP_GREATER:   return c == '>';
3020     case CPP_LESS:      return c == '<' || c == '%' || c == ':';
3021     case CPP_PLUS:      return c == '+';
3022     case CPP_MINUS:     return c == '-' || c == '>';
3023     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
3024     case CPP_MOD:       return c == ':' || c == '>';
3025     case CPP_AND:       return c == '&';
3026     case CPP_OR:        return c == '|';
3027     case CPP_COLON:     return c == ':' || c == '>';
3028     case CPP_DEREF:     return c == '*';
3029     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
3030     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
3031     case CPP_NAME:      return ((b == CPP_NUMBER
3032                                  && name_p (pfile, &token2->val.str))
3033                                 || b == CPP_NAME
3034                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
3035     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
3036                                 || c == '.' || c == '+' || c == '-');
3037                                       /* UCNs */
3038     case CPP_OTHER:     return ((token1->val.str.text[0] == '\\'
3039                                  && b == CPP_NAME)
3040                                 || (CPP_OPTION (pfile, objc)
3041                                     && token1->val.str.text[0] == '@'
3042                                     && (b == CPP_NAME || b == CPP_STRING)));
3043     case CPP_STRING:
3044     case CPP_WSTRING:
3045     case CPP_UTF8STRING:
3046     case CPP_STRING16:
3047     case CPP_STRING32:  return (CPP_OPTION (pfile, user_literals)
3048                                 && (b == CPP_NAME
3049                                     || (TOKEN_SPELL (token2) == SPELL_LITERAL
3050                                         && ISIDST (token2->val.str.text[0]))));
3051
3052     default:            break;
3053     }
3054
3055   return 0;
3056 }
3057
3058 /* Output all the remaining tokens on the current line, and a newline
3059    character, to FP.  Leading whitespace is removed.  If there are
3060    macros, special token padding is not performed.  */
3061 void
3062 cpp_output_line (cpp_reader *pfile, FILE *fp)
3063 {
3064   const cpp_token *token;
3065
3066   token = cpp_get_token (pfile);
3067   while (token->type != CPP_EOF)
3068     {
3069       cpp_output_token (token, fp);
3070       token = cpp_get_token (pfile);
3071       if (token->flags & PREV_WHITE)
3072         putc (' ', fp);
3073     }
3074
3075   putc ('\n', fp);
3076 }
3077
3078 /* Return a string representation of all the remaining tokens on the
3079    current line.  The result is allocated using xmalloc and must be
3080    freed by the caller.  */
3081 unsigned char *
3082 cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
3083 {
3084   const cpp_token *token;
3085   unsigned int out = dir_name ? ustrlen (dir_name) : 0;
3086   unsigned int alloced = 120 + out;
3087   unsigned char *result = (unsigned char *) xmalloc (alloced);
3088
3089   /* If DIR_NAME is empty, there are no initial contents.  */
3090   if (dir_name)
3091     {
3092       sprintf ((char *) result, "#%s ", dir_name);
3093       out += 2;
3094     }
3095
3096   token = cpp_get_token (pfile);
3097   while (token->type != CPP_EOF)
3098     {
3099       unsigned char *last;
3100       /* Include room for a possible space and the terminating nul.  */
3101       unsigned int len = cpp_token_len (token) + 2;
3102
3103       if (out + len > alloced)
3104         {
3105           alloced *= 2;
3106           if (out + len > alloced)
3107             alloced = out + len;
3108           result = (unsigned char *) xrealloc (result, alloced);
3109         }
3110
3111       last = cpp_spell_token (pfile, token, &result[out], 0);
3112       out = last - result;
3113
3114       token = cpp_get_token (pfile);
3115       if (token->flags & PREV_WHITE)
3116         result[out++] = ' ';
3117     }
3118
3119   result[out] = '\0';
3120   return result;
3121 }
3122
3123 /* Memory buffers.  Changing these three constants can have a dramatic
3124    effect on performance.  The values here are reasonable defaults,
3125    but might be tuned.  If you adjust them, be sure to test across a
3126    range of uses of cpplib, including heavy nested function-like macro
3127    expansion.  Also check the change in peak memory usage (NJAMD is a
3128    good tool for this).  */
3129 #define MIN_BUFF_SIZE 8000
3130 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
3131 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
3132         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
3133
3134 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
3135   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
3136 #endif
3137
3138 /* Create a new allocation buffer.  Place the control block at the end
3139    of the buffer, so that buffer overflows will cause immediate chaos.  */
3140 static _cpp_buff *
3141 new_buff (size_t len)
3142 {
3143   _cpp_buff *result;
3144   unsigned char *base;
3145
3146   if (len < MIN_BUFF_SIZE)
3147     len = MIN_BUFF_SIZE;
3148   len = CPP_ALIGN (len);
3149
3150 #ifdef ENABLE_VALGRIND_CHECKING
3151   /* Valgrind warns about uses of interior pointers, so put _cpp_buff
3152      struct first.  */
3153   size_t slen = CPP_ALIGN2 (sizeof (_cpp_buff), 2 * DEFAULT_ALIGNMENT);
3154   base = XNEWVEC (unsigned char, len + slen);
3155   result = (_cpp_buff *) base;
3156   base += slen;
3157 #else
3158   base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
3159   result = (_cpp_buff *) (base + len);
3160 #endif
3161   result->base = base;
3162   result->cur = base;
3163   result->limit = base + len;
3164   result->next = NULL;
3165   return result;
3166 }
3167
3168 /* Place a chain of unwanted allocation buffers on the free list.  */
3169 void
3170 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
3171 {
3172   _cpp_buff *end = buff;
3173
3174   while (end->next)
3175     end = end->next;
3176   end->next = pfile->free_buffs;
3177   pfile->free_buffs = buff;
3178 }
3179
3180 /* Return a free buffer of size at least MIN_SIZE.  */
3181 _cpp_buff *
3182 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
3183 {
3184   _cpp_buff *result, **p;
3185
3186   for (p = &pfile->free_buffs;; p = &(*p)->next)
3187     {
3188       size_t size;
3189
3190       if (*p == NULL)
3191         return new_buff (min_size);
3192       result = *p;
3193       size = result->limit - result->base;
3194       /* Return a buffer that's big enough, but don't waste one that's
3195          way too big.  */
3196       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
3197         break;
3198     }
3199
3200   *p = result->next;
3201   result->next = NULL;
3202   result->cur = result->base;
3203   return result;
3204 }
3205
3206 /* Creates a new buffer with enough space to hold the uncommitted
3207    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
3208    the excess bytes to the new buffer.  Chains the new buffer after
3209    BUFF, and returns the new buffer.  */
3210 _cpp_buff *
3211 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
3212 {
3213   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
3214   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
3215
3216   buff->next = new_buff;
3217   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
3218   return new_buff;
3219 }
3220
3221 /* Creates a new buffer with enough space to hold the uncommitted
3222    remaining bytes of the buffer pointed to by BUFF, and at least
3223    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
3224    Chains the new buffer before the buffer pointed to by BUFF, and
3225    updates the pointer to point to the new buffer.  */
3226 void
3227 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
3228 {
3229   _cpp_buff *new_buff, *old_buff = *pbuff;
3230   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
3231
3232   new_buff = _cpp_get_buff (pfile, size);
3233   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
3234   new_buff->next = old_buff;
3235   *pbuff = new_buff;
3236 }
3237
3238 /* Free a chain of buffers starting at BUFF.  */
3239 void
3240 _cpp_free_buff (_cpp_buff *buff)
3241 {
3242   _cpp_buff *next;
3243
3244   for (; buff; buff = next)
3245     {
3246       next = buff->next;
3247 #ifdef ENABLE_VALGRIND_CHECKING
3248       free (buff);
3249 #else
3250       free (buff->base);
3251 #endif
3252     }
3253 }
3254
3255 /* Allocate permanent, unaligned storage of length LEN.  */
3256 unsigned char *
3257 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
3258 {
3259   _cpp_buff *buff = pfile->u_buff;
3260   unsigned char *result = buff->cur;
3261
3262   if (len > (size_t) (buff->limit - result))
3263     {
3264       buff = _cpp_get_buff (pfile, len);
3265       buff->next = pfile->u_buff;
3266       pfile->u_buff = buff;
3267       result = buff->cur;
3268     }
3269
3270   buff->cur = result + len;
3271   return result;
3272 }
3273
3274 /* Allocate permanent, unaligned storage of length LEN from a_buff.
3275    That buffer is used for growing allocations when saving macro
3276    replacement lists in a #define, and when parsing an answer to an
3277    assertion in #assert, #unassert or #if (and therefore possibly
3278    whilst expanding macros).  It therefore must not be used by any
3279    code that they might call: specifically the lexer and the guts of
3280    the macro expander.
3281
3282    All existing other uses clearly fit this restriction: storing
3283    registered pragmas during initialization.  */
3284 unsigned char *
3285 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
3286 {
3287   _cpp_buff *buff = pfile->a_buff;
3288   unsigned char *result = buff->cur;
3289
3290   if (len > (size_t) (buff->limit - result))
3291     {
3292       buff = _cpp_get_buff (pfile, len);
3293       buff->next = pfile->a_buff;
3294       pfile->a_buff = buff;
3295       result = buff->cur;
3296     }
3297
3298   buff->cur = result + len;
3299   return result;
3300 }
3301
3302 /* Say which field of TOK is in use.  */
3303
3304 enum cpp_token_fld_kind
3305 cpp_token_val_index (const cpp_token *tok)
3306 {
3307   switch (TOKEN_SPELL (tok))
3308     {
3309     case SPELL_IDENT:
3310       return CPP_TOKEN_FLD_NODE;
3311     case SPELL_LITERAL:
3312       return CPP_TOKEN_FLD_STR;
3313     case SPELL_OPERATOR:
3314       if (tok->type == CPP_PASTE)
3315         return CPP_TOKEN_FLD_TOKEN_NO;
3316       else
3317         return CPP_TOKEN_FLD_NONE;
3318     case SPELL_NONE:
3319       if (tok->type == CPP_MACRO_ARG)
3320         return CPP_TOKEN_FLD_ARG_NO;
3321       else if (tok->type == CPP_PADDING)
3322         return CPP_TOKEN_FLD_SOURCE;
3323       else if (tok->type == CPP_PRAGMA)
3324         return CPP_TOKEN_FLD_PRAGMA;
3325       /* else fall through */
3326     default:
3327       return CPP_TOKEN_FLD_NONE;
3328     }
3329 }
3330
3331 /* All tokens lexed in R after calling this function will be forced to have
3332    their source_location the same as the location referenced by P, until
3333    cpp_stop_forcing_token_locations is called for R.  */
3334
3335 void
3336 cpp_force_token_locations (cpp_reader *r, source_location *p)
3337 {
3338   r->forced_token_location_p = p;
3339 }
3340
3341 /* Go back to assigning locations naturally for lexed tokens.  */
3342
3343 void
3344 cpp_stop_forcing_token_locations (cpp_reader *r)
3345 {
3346   r->forced_token_location_p = NULL;
3347 }