libcpp/lex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000-2015 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7
   8 This program is free software; you can redistribute it and/or modify it
   9 under the terms of the GNU General Public License as published by the
  10 Free Software Foundation; either version 3, or (at your option) any
  11 later version.
  12
  13 This program is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with this program; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "cpplib.h"
  25 #include "internal.h"
  26
  27 enum spell_type
  28 {
  29   SPELL_OPERATOR = 0,
  30   SPELL_IDENT,
  31   SPELL_LITERAL,
  32   SPELL_NONE
  33 };
  34
  35 struct token_spelling
  36 {
  37   enum spell_type category;
  38   const unsigned char *name;
  39 };
  40
  41 static const unsigned char *const digraph_spellings[] =
  42 { UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
  43
  44 #define OP(e, s) { SPELL_OPERATOR, UC s  },
  45 #define TK(e, s) { SPELL_ ## s,    UC #e },
  46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  47 #undef OP
  48 #undef TK
  49
  50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  52
  53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
  54 static int skip_line_comment (cpp_reader *);
  55 static void skip_whitespace (cpp_reader *, cppchar_t);
  56 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
  57 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
  58 static void store_comment (cpp_reader *, cpp_token *);
  59 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
  60                             unsigned int, enum cpp_ttype);
  61 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
  62 static int name_p (cpp_reader *, const cpp_string *);
  63 static tokenrun *next_tokenrun (tokenrun *);
  64
  65 static _cpp_buff *new_buff (size_t);
  66
  67
  68 /* Utility routine:
  69
  70    Compares, the token TOKEN to the NUL-terminated string STRING.
  71    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  72 int
  73 cpp_ideq (const cpp_token *token, const char *string)
  74 {
  75   if (token->type != CPP_NAME)
  76     return 0;
  77
  78   return !ustrcmp (NODE_NAME (token->val.node.node), (const uchar *) string);
  79 }
  80
  81 /* Record a note TYPE at byte POS into the current cleaned logical
  82    line.  */
  83 static void
  84 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
  85 {
  86   if (buffer->notes_used == buffer->notes_cap)
  87     {
  88       buffer->notes_cap = buffer->notes_cap * 2 + 200;
  89       buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
  90                                   buffer->notes_cap);
  91     }
  92
  93   buffer->notes[buffer->notes_used].pos = pos;
  94   buffer->notes[buffer->notes_used].type = type;
  95   buffer->notes_used++;
  96 }
  97
  98 \f
  99 /* Fast path to find line special characters using optimized character
 100    scanning algorithms.  Anything complicated falls back to the slow
 101    path below.  Since this loop is very hot it's worth doing these kinds
 102    of optimizations.
 103
 104    One of the paths through the ifdefs should provide
 105
 106      const uchar *search_line_fast (const uchar *s, const uchar *end);
 107
 108    Between S and END, search for \n, \r, \\, ?.  Return a pointer to
 109    the found character.
 110
 111    Note that the last character of the buffer is *always* a newline,
 112    as forced by _cpp_convert_input.  This fact can be used to avoid
 113    explicitly looking for the end of the buffer.  */
 114
 115 /* Configure gives us an ifdef test.  */
 116 #ifndef WORDS_BIGENDIAN
 117 #define WORDS_BIGENDIAN 0
 118 #endif
 119
 120 /* We'd like the largest integer that fits into a register.  There's nothing
 121    in <stdint.h> that gives us that.  For most hosts this is unsigned long,
 122    but MS decided on an LLP64 model.  Thankfully when building with GCC we
 123    can get the "real" word size.  */
 124 #ifdef __GNUC__
 125 typedef unsigned int word_type __attribute__((__mode__(__word__)));
 126 #else
 127 typedef unsigned long word_type;
 128 #endif
 129
 130 /* The code below is only expecting sizes 4 or 8.
 131    Die at compile-time if this expectation is violated.  */
 132 typedef char check_word_type_size
 133   [(sizeof(word_type) == 8 || sizeof(word_type) == 4) * 2 - 1];
 134
 135 /* Return X with the first N bytes forced to values that won't match one
 136    of the interesting characters.  Note that NUL is not interesting.  */
 137
 138 static inline word_type
 139 acc_char_mask_misalign (word_type val, unsigned int n)
 140 {
 141   word_type mask = -1;
 142   if (WORDS_BIGENDIAN)
 143     mask >>= n * 8;
 144   else
 145     mask <<= n * 8;
 146   return val & mask;
 147 }
 148
 149 /* Return X replicated to all byte positions within WORD_TYPE.  */
 150
 151 static inline word_type
 152 acc_char_replicate (uchar x)
 153 {
 154   word_type ret;
 155
 156   ret = (x << 24) | (x << 16) | (x << 8) | x;
 157   if (sizeof(word_type) == 8)
 158     ret = (ret << 16 << 16) | ret;
 159   return ret;
 160 }
 161
 162 /* Return non-zero if some byte of VAL is (probably) C.  */
 163
 164 static inline word_type
 165 acc_char_cmp (word_type val, word_type c)
 166 {
 167 #if defined(__GNUC__) && defined(__alpha__)
 168   /* We can get exact results using a compare-bytes instruction.
 169      Get (val == c) via (0 >= (val ^ c)).  */
 170   return __builtin_alpha_cmpbge (0, val ^ c);
 171 #else
 172   word_type magic = 0x7efefefeU;
 173   if (sizeof(word_type) == 8)
 174     magic = (magic << 16 << 16) | 0xfefefefeU;
 175   magic |= 1;
 176
 177   val ^= c;
 178   return ((val + magic) ^ ~val) & ~magic;
 179 #endif
 180 }
 181
 182 /* Given the result of acc_char_cmp is non-zero, return the index of
 183    the found character.  If this was a false positive, return -1.  */
 184
 185 static inline int
 186 acc_char_index (word_type cmp ATTRIBUTE_UNUSED,
 187                 word_type val ATTRIBUTE_UNUSED)
 188 {
 189 #if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN
 190   /* The cmpbge instruction sets *bits* of the result corresponding to
 191      matches in the bytes with no false positives.  */
 192   return __builtin_ctzl (cmp);
 193 #else
 194   unsigned int i;
 195
 196   /* ??? It would be nice to force unrolling here,
 197      and have all of these constants folded.  */
 198   for (i = 0; i < sizeof(word_type); ++i)
 199     {
 200       uchar c;
 201       if (WORDS_BIGENDIAN)
 202         c = (val >> (sizeof(word_type) - i - 1) * 8) & 0xff;
 203       else
 204         c = (val >> i * 8) & 0xff;
 205
 206       if (c == '\n' || c == '\r' || c == '\\' || c == '?')
 207         return i;
 208     }
 209
 210   return -1;
 211 #endif
 212 }
 213
 214 /* A version of the fast scanner using bit fiddling techniques.
 215
 216    For 32-bit words, one would normally perform 16 comparisons and
 217    16 branches.  With this algorithm one performs 24 arithmetic
 218    operations and one branch.  Whether this is faster with a 32-bit
 219    word size is going to be somewhat system dependent.
 220
 221    For 64-bit words, we eliminate twice the number of comparisons
 222    and branches without increasing the number of arithmetic operations.
 223    It's almost certainly going to be a win with 64-bit word size.  */
 224
 225 static const uchar * search_line_acc_char (const uchar *, const uchar *)
 226   ATTRIBUTE_UNUSED;
 227
 228 static const uchar *
 229 search_line_acc_char (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
 230 {
 231   const word_type repl_nl = acc_char_replicate ('\n');
 232   const word_type repl_cr = acc_char_replicate ('\r');
 233   const word_type repl_bs = acc_char_replicate ('\\');
 234   const word_type repl_qm = acc_char_replicate ('?');
 235
 236   unsigned int misalign;
 237   const word_type *p;
 238   word_type val, t;
 239
 240   /* Align the buffer.  Mask out any bytes from before the beginning.  */
 241   p = (word_type *)((uintptr_t)s & -sizeof(word_type));
 242   val = *p;
 243   misalign = (uintptr_t)s & (sizeof(word_type) - 1);
 244   if (misalign)
 245     val = acc_char_mask_misalign (val, misalign);
 246
 247   /* Main loop.  */
 248   while (1)
 249     {
 250       t  = acc_char_cmp (val, repl_nl);
 251       t |= acc_char_cmp (val, repl_cr);
 252       t |= acc_char_cmp (val, repl_bs);
 253       t |= acc_char_cmp (val, repl_qm);
 254
 255       if (__builtin_expect (t != 0, 0))
 256         {
 257           int i = acc_char_index (t, val);
 258           if (i >= 0)
 259             return (const uchar *)p + i;
 260         }
 261
 262       val = *++p;
 263     }
 264 }
 265
 266 /* Disable on Solaris 2/x86 until the following problem can be properly
 267    autoconfed:
 268
 269    The Solaris 10+ assembler tags objects with the instruction set
 270    extensions used, so SSE4.2 executables cannot run on machines that
 271    don't support that extension.  */
 272
 273 #if (GCC_VERSION >= 4005) && (__GNUC__ >= 5 || !defined(__PIC__)) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
 274
 275 /* Replicated character data to be shared between implementations.
 276    Recall that outside of a context with vector support we can't
 277    define compatible vector types, therefore these are all defined
 278    in terms of raw characters.  */
 279 static const char repl_chars[4][16] __attribute__((aligned(16))) = {
 280   { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
 281     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' },
 282   { '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
 283     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' },
 284   { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
 285     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' },
 286   { '?', '?', '?', '?', '?', '?', '?', '?',
 287     '?', '?', '?', '?', '?', '?', '?', '?' },
 288 };
 289
 290 /* A version of the fast scanner using MMX vectorized byte compare insns.
 291
 292    This uses the PMOVMSKB instruction which was introduced with "MMX2",
 293    which was packaged into SSE1; it is also present in the AMD MMX
 294    extension.  Mark the function as using "sse" so that we emit a real
 295    "emms" instruction, rather than the 3dNOW "femms" instruction.  */
 296
 297 static const uchar *
 298 #ifndef __SSE__
 299 __attribute__((__target__("sse")))
 300 #endif
 301 search_line_mmx (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
 302 {
 303   typedef char v8qi __attribute__ ((__vector_size__ (8)));
 304   typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
 305
 306   const v8qi repl_nl = *(const v8qi *)repl_chars[0];
 307   const v8qi repl_cr = *(const v8qi *)repl_chars[1];
 308   const v8qi repl_bs = *(const v8qi *)repl_chars[2];
 309   const v8qi repl_qm = *(const v8qi *)repl_chars[3];
 310
 311   unsigned int misalign, found, mask;
 312   const v8qi *p;
 313   v8qi data, t, c;
 314
 315   /* Align the source pointer.  While MMX doesn't generate unaligned data
 316      faults, this allows us to safely scan to the end of the buffer without
 317      reading beyond the end of the last page.  */
 318   misalign = (uintptr_t)s & 7;
 319   p = (const v8qi *)((uintptr_t)s & -8);
 320   data = *p;
 321
 322   /* Create a mask for the bytes that are valid within the first
 323      16-byte block.  The Idea here is that the AND with the mask
 324      within the loop is "free", since we need some AND or TEST
 325      insn in order to set the flags for the branch anyway.  */
 326   mask = -1u << misalign;
 327
 328   /* Main loop processing 8 bytes at a time.  */
 329   goto start;
 330   do
 331     {
 332       data = *++p;
 333       mask = -1;
 334
 335     start:
 336       t = __builtin_ia32_pcmpeqb(data, repl_nl);
 337       c = __builtin_ia32_pcmpeqb(data, repl_cr);
 338       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
 339       c = __builtin_ia32_pcmpeqb(data, repl_bs);
 340       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
 341       c = __builtin_ia32_pcmpeqb(data, repl_qm);
 342       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
 343       found = __builtin_ia32_pmovmskb (t);
 344       found &= mask;
 345     }
 346   while (!found);
 347
 348   __builtin_ia32_emms ();
 349
 350   /* FOUND contains 1 in bits for which we matched a relevant
 351      character.  Conversion to the byte index is trivial.  */
 352   found = __builtin_ctz(found);
 353   return (const uchar *)p + found;
 354 }
 355
 356 /* A version of the fast scanner using SSE2 vectorized byte compare insns.  */
 357
 358 static const uchar *
 359 #ifndef __SSE2__
 360 __attribute__((__target__("sse2")))
 361 #endif
 362 search_line_sse2 (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
 363 {
 364   typedef char v16qi __attribute__ ((__vector_size__ (16)));
 365
 366   const v16qi repl_nl = *(const v16qi *)repl_chars[0];
 367   const v16qi repl_cr = *(const v16qi *)repl_chars[1];
 368   const v16qi repl_bs = *(const v16qi *)repl_chars[2];
 369   const v16qi repl_qm = *(const v16qi *)repl_chars[3];
 370
 371   unsigned int misalign, found, mask;
 372   const v16qi *p;
 373   v16qi data, t;
 374
 375   /* Align the source pointer.  */
 376   misalign = (uintptr_t)s & 15;
 377   p = (const v16qi *)((uintptr_t)s & -16);
 378   data = *p;
 379
 380   /* Create a mask for the bytes that are valid within the first
 381      16-byte block.  The Idea here is that the AND with the mask
 382      within the loop is "free", since we need some AND or TEST
 383      insn in order to set the flags for the branch anyway.  */
 384   mask = -1u << misalign;
 385
 386   /* Main loop processing 16 bytes at a time.  */
 387   goto start;
 388   do
 389     {
 390       data = *++p;
 391       mask = -1;
 392
 393     start:
 394       t  = __builtin_ia32_pcmpeqb128(data, repl_nl);
 395       t |= __builtin_ia32_pcmpeqb128(data, repl_cr);
 396       t |= __builtin_ia32_pcmpeqb128(data, repl_bs);
 397       t |= __builtin_ia32_pcmpeqb128(data, repl_qm);
 398       found = __builtin_ia32_pmovmskb128 (t);
 399       found &= mask;
 400     }
 401   while (!found);
 402
 403   /* FOUND contains 1 in bits for which we matched a relevant
 404      character.  Conversion to the byte index is trivial.  */
 405   found = __builtin_ctz(found);
 406   return (const uchar *)p + found;
 407 }
 408
 409 #ifdef HAVE_SSE4
 410 /* A version of the fast scanner using SSE 4.2 vectorized string insns.  */
 411
 412 static const uchar *
 413 #ifndef __SSE4_2__
 414 __attribute__((__target__("sse4.2")))
 415 #endif
 416 search_line_sse42 (const uchar *s, const uchar *end)
 417 {
 418   typedef char v16qi __attribute__ ((__vector_size__ (16)));
 419   static const v16qi search = { '\n', '\r', '?', '\\' };
 420
 421   uintptr_t si = (uintptr_t)s;
 422   uintptr_t index;
 423
 424   /* Check for unaligned input.  */
 425   if (si & 15)
 426     {
 427       v16qi sv;
 428
 429       if (__builtin_expect (end - s < 16, 0)
 430           && __builtin_expect ((si & 0xfff) > 0xff0, 0))
 431         {
 432           /* There are less than 16 bytes left in the buffer, and less
 433              than 16 bytes left on the page.  Reading 16 bytes at this
 434              point might generate a spurious page fault.  Defer to the
 435              SSE2 implementation, which already handles alignment.  */
 436           return search_line_sse2 (s, end);
 437         }
 438
 439       /* ??? The builtin doesn't understand that the PCMPESTRI read from
 440          memory need not be aligned.  */
 441       sv = __builtin_ia32_loaddqu ((const char *) s);
 442       index = __builtin_ia32_pcmpestri128 (search, 4, sv, 16, 0);
 443
 444       if (__builtin_expect (index < 16, 0))
 445         goto found;
 446
 447       /* Advance the pointer to an aligned address.  We will re-scan a
 448          few bytes, but we no longer need care for reading past the
 449          end of a page, since we're guaranteed a match.  */
 450       s = (const uchar *)((si + 16) & -16);
 451     }
 452
 453   /* Main loop, processing 16 bytes at a time.  By doing the whole loop
 454      in inline assembly, we can make proper use of the flags set.  */
 455   __asm (      "sub $16, %1\n"
 456         "       .balign 16\n"
 457         "0:     add $16, %1\n"
 458         "       %vpcmpestri $0, (%1), %2\n"
 459         "       jnc 0b"
 460         : "=&c"(index), "+r"(s)
 461         : "x"(search), "a"(4), "d"(16));
 462
 463  found:
 464   return s + index;
 465 }
 466
 467 #else
 468 /* Work around out-dated assemblers without sse4 support.  */
 469 #define search_line_sse42 search_line_sse2
 470 #endif
 471
 472 /* Check the CPU capabilities.  */
 473
 474 #include "../gcc/config/i386/cpuid.h"
 475
 476 typedef const uchar * (*search_line_fast_type) (const uchar *, const uchar *);
 477 static search_line_fast_type search_line_fast;
 478
 479 #define HAVE_init_vectorized_lexer 1
 480 static inline void
 481 init_vectorized_lexer (void)
 482 {
 483   unsigned dummy, ecx = 0, edx = 0;
 484   search_line_fast_type impl = search_line_acc_char;
 485   int minimum = 0;
 486
 487 #if defined(__SSE4_2__)
 488   minimum = 3;
 489 #elif defined(__SSE2__)
 490   minimum = 2;
 491 #elif defined(__SSE__)
 492   minimum = 1;
 493 #endif
 494
 495   if (minimum == 3)
 496     impl = search_line_sse42;
 497   else if (__get_cpuid (1, &dummy, &dummy, &ecx, &edx) || minimum == 2)
 498     {
 499       if (minimum == 3 || (ecx & bit_SSE4_2))
 500         impl = search_line_sse42;
 501       else if (minimum == 2 || (edx & bit_SSE2))
 502         impl = search_line_sse2;
 503       else if (minimum == 1 || (edx & bit_SSE))
 504         impl = search_line_mmx;
 505     }
 506   else if (__get_cpuid (0x80000001, &dummy, &dummy, &dummy, &edx))
 507     {
 508       if (minimum == 1
 509           || (edx & (bit_MMXEXT | bit_CMOV)) == (bit_MMXEXT | bit_CMOV))
 510         impl = search_line_mmx;
 511     }
 512
 513   search_line_fast = impl;
 514 }
 515
 516 #elif defined(_ARCH_PWR8) && defined(__ALTIVEC__)
 517
 518 /* A vection of the fast scanner using AltiVec vectorized byte compares
 519    and VSX unaligned loads (when VSX is available).  This is otherwise
 520    the same as the pre-GCC 5 version.  */
 521
 522 static const uchar *
 523 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
 524 {
 525   typedef __attribute__((altivec(vector))) unsigned char vc;
 526
 527   const vc repl_nl = {
 528     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
 529     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
 530   };
 531   const vc repl_cr = {
 532     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
 533     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
 534   };
 535   const vc repl_bs = {
 536     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
 537     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
 538   };
 539   const vc repl_qm = {
 540     '?', '?', '?', '?', '?', '?', '?', '?',
 541     '?', '?', '?', '?', '?', '?', '?', '?',
 542   };
 543   const vc zero = { 0 };
 544
 545   vc data, t;
 546
 547   /* Main loop processing 16 bytes at a time.  */
 548   do
 549     {
 550       vc m_nl, m_cr, m_bs, m_qm;
 551
 552       data = *((const vc *)s);
 553       s += 16;
 554
 555       m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
 556       m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
 557       m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
 558       m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
 559       t = (m_nl | m_cr) | (m_bs | m_qm);
 560
 561       /* T now contains 0xff in bytes for which we matched one of the relevant
 562          characters.  We want to exit the loop if any byte in T is non-zero.
 563          Below is the expansion of vec_any_ne(t, zero).  */
 564     }
 565   while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
 566
 567   /* Restore s to to point to the 16 bytes we just processed.  */
 568   s -= 16;
 569
 570   {
 571 #define N  (sizeof(vc) / sizeof(long))
 572
 573     union {
 574       vc v;
 575       /* Statically assert that N is 2 or 4.  */
 576       unsigned long l[(N == 2 || N == 4) ? N : -1];
 577     } u;
 578     unsigned long l, i = 0;
 579
 580     u.v = t;
 581
 582     /* Find the first word of T that is non-zero.  */
 583     switch (N)
 584       {
 585       case 4:
 586         l = u.l[i++];
 587         if (l != 0)
 588           break;
 589         s += sizeof(unsigned long);
 590         l = u.l[i++];
 591         if (l != 0)
 592           break;
 593         s += sizeof(unsigned long);
 594       case 2:
 595         l = u.l[i++];
 596         if (l != 0)
 597           break;
 598         s += sizeof(unsigned long);
 599         l = u.l[i];
 600       }
 601
 602     /* L now contains 0xff in bytes for which we matched one of the
 603        relevant characters.  We can find the byte index by finding
 604        its bit index and dividing by 8.  */
 605 #ifdef __BIG_ENDIAN__
 606     l = __builtin_clzl(l) >> 3;
 607 #else
 608     l = __builtin_ctzl(l) >> 3;
 609 #endif
 610     return s + l;
 611
 612 #undef N
 613   }
 614 }
 615
 616 #elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) && defined (__BIG_ENDIAN__)
 617
 618 /* A vection of the fast scanner using AltiVec vectorized byte compares.
 619    This cannot be used for little endian because vec_lvsl/lvsr are
 620    deprecated for little endian and the code won't work properly.  */
 621 /* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
 622    so we can't compile this function without -maltivec on the command line
 623    (or implied by some other switch).  */
 624
 625 static const uchar *
 626 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
 627 {
 628   typedef __attribute__((altivec(vector))) unsigned char vc;
 629
 630   const vc repl_nl = {
 631     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
 632     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
 633   };
 634   const vc repl_cr = {
 635     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
 636     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
 637   };
 638   const vc repl_bs = {
 639     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
 640     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
 641   };
 642   const vc repl_qm = {
 643     '?', '?', '?', '?', '?', '?', '?', '?',
 644     '?', '?', '?', '?', '?', '?', '?', '?',
 645   };
 646   const vc ones = {
 647     -1, -1, -1, -1, -1, -1, -1, -1,
 648     -1, -1, -1, -1, -1, -1, -1, -1,
 649   };
 650   const vc zero = { 0 };
 651
 652   vc data, mask, t;
 653
 654   /* Altivec loads automatically mask addresses with -16.  This lets us
 655      issue the first load as early as possible.  */
 656   data = __builtin_vec_ld(0, (const vc *)s);
 657
 658   /* Discard bytes before the beginning of the buffer.  Do this by
 659      beginning with all ones and shifting in zeros according to the
 660      mis-alignment.  The LVSR instruction pulls the exact shift we
 661      want from the address.  */
 662   mask = __builtin_vec_lvsr(0, s);
 663   mask = __builtin_vec_perm(zero, ones, mask);
 664   data &= mask;
 665
 666   /* While altivec loads mask addresses, we still need to align S so
 667      that the offset we compute at the end is correct.  */
 668   s = (const uchar *)((uintptr_t)s & -16);
 669
 670   /* Main loop processing 16 bytes at a time.  */
 671   goto start;
 672   do
 673     {
 674       vc m_nl, m_cr, m_bs, m_qm;
 675
 676       s += 16;
 677       data = __builtin_vec_ld(0, (const vc *)s);
 678
 679     start:
 680       m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
 681       m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
 682       m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
 683       m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
 684       t = (m_nl | m_cr) | (m_bs | m_qm);
 685
 686       /* T now contains 0xff in bytes for which we matched one of the relevant
 687          characters.  We want to exit the loop if any byte in T is non-zero.
 688          Below is the expansion of vec_any_ne(t, zero).  */
 689     }
 690   while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
 691
 692   {
 693 #define N  (sizeof(vc) / sizeof(long))
 694
 695     union {
 696       vc v;
 697       /* Statically assert that N is 2 or 4.  */
 698       unsigned long l[(N == 2 || N == 4) ? N : -1];
 699     } u;
 700     unsigned long l, i = 0;
 701
 702     u.v = t;
 703
 704     /* Find the first word of T that is non-zero.  */
 705     switch (N)
 706       {
 707       case 4:
 708         l = u.l[i++];
 709         if (l != 0)
 710           break;
 711         s += sizeof(unsigned long);
 712         l = u.l[i++];
 713         if (l != 0)
 714           break;
 715         s += sizeof(unsigned long);
 716       case 2:
 717         l = u.l[i++];
 718         if (l != 0)
 719           break;
 720         s += sizeof(unsigned long);
 721         l = u.l[i];
 722       }
 723
 724     /* L now contains 0xff in bytes for which we matched one of the
 725        relevant characters.  We can find the byte index by finding
 726        its bit index and dividing by 8.  */
 727     l = __builtin_clzl(l) >> 3;
 728     return s + l;
 729
 730 #undef N
 731   }
 732 }
 733
 734 #elif defined (__ARM_NEON)
 735 #include "arm_neon.h"
 736
 737 static const uchar *
 738 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
 739 {
 740   const uint8x16_t repl_nl = vdupq_n_u8 ('\n');
 741   const uint8x16_t repl_cr = vdupq_n_u8 ('\r');
 742   const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
 743   const uint8x16_t repl_qm = vdupq_n_u8 ('?');
 744   const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
 745
 746   unsigned int misalign, found, mask;
 747   const uint8_t *p;
 748   uint8x16_t data;
 749
 750   /* Align the source pointer.  */
 751   misalign = (uintptr_t)s & 15;
 752   p = (const uint8_t *)((uintptr_t)s & -16);
 753   data = vld1q_u8 (p);
 754
 755   /* Create a mask for the bytes that are valid within the first
 756      16-byte block.  The Idea here is that the AND with the mask
 757      within the loop is "free", since we need some AND or TEST
 758      insn in order to set the flags for the branch anyway.  */
 759   mask = (-1u << misalign) & 0xffff;
 760
 761   /* Main loop, processing 16 bytes at a time.  */
 762   goto start;
 763
 764   do
 765     {
 766       uint8x8_t l;
 767       uint16x4_t m;
 768       uint32x2_t n;
 769       uint8x16_t t, u, v, w;
 770
 771       p += 16;
 772       data = vld1q_u8 (p);
 773       mask = 0xffff;
 774
 775     start:
 776       t = vceqq_u8 (data, repl_nl);
 777       u = vceqq_u8 (data, repl_cr);
 778       v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
 779       w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
 780       t = vandq_u8 (vorrq_u8 (v, w), xmask);
 781       l = vpadd_u8 (vget_low_u8 (t), vget_high_u8 (t));
 782       m = vpaddl_u8 (l);
 783       n = vpaddl_u16 (m);
 784
 785       found = vget_lane_u32 ((uint32x2_t) vorr_u64 ((uint64x1_t) n,
 786               vshr_n_u64 ((uint64x1_t) n, 24)), 0);
 787       found &= mask;
 788     }
 789   while (!found);
 790
 791   /* FOUND contains 1 in bits for which we matched a relevant
 792      character.  Conversion to the byte index is trivial.  */
 793   found = __builtin_ctz (found);
 794   return (const uchar *)p + found;
 795 }
 796
 797 #else
 798
 799 /* We only have one accellerated alternative.  Use a direct call so that
 800    we encourage inlining.  */
 801
 802 #define search_line_fast  search_line_acc_char
 803
 804 #endif
 805
 806 /* Initialize the lexer if needed.  */
 807
 808 void
 809 _cpp_init_lexer (void)
 810 {
 811 #ifdef HAVE_init_vectorized_lexer
 812   init_vectorized_lexer ();
 813 #endif
 814 }
 815
 816 /* Returns with a logical line that contains no escaped newlines or
 817    trigraphs.  This is a time-critical inner loop.  */
 818 void
 819 _cpp_clean_line (cpp_reader *pfile)
 820 {
 821   cpp_buffer *buffer;
 822   const uchar *s;
 823   uchar c, *d, *p;
 824
 825   buffer = pfile->buffer;
 826   buffer->cur_note = buffer->notes_used = 0;
 827   buffer->cur = buffer->line_base = buffer->next_line;
 828   buffer->need_line = false;
 829   s = buffer->next_line;
 830
 831   if (!buffer->from_stage3)
 832     {
 833       const uchar *pbackslash = NULL;
 834
 835       /* Fast path.  This is the common case of an un-escaped line with
 836          no trigraphs.  The primary win here is by not writing any
 837          data back to memory until we have to.  */
 838       while (1)
 839         {
 840           /* Perform an optimized search for \n, \r, \\, ?.  */
 841           s = search_line_fast (s, buffer->rlimit);
 842
 843           c = *s;
 844           if (c == '\\')
 845             {
 846               /* Record the location of the backslash and continue.  */
 847               pbackslash = s++;
 848             }
 849           else if (__builtin_expect (c == '?', 0))
 850             {
 851               if (__builtin_expect (s[1] == '?', false)
 852                    && _cpp_trigraph_map[s[2]])
 853                 {
 854                   /* Have a trigraph.  We may or may not have to convert
 855                      it.  Add a line note regardless, for -Wtrigraphs.  */
 856                   add_line_note (buffer, s, s[2]);
 857                   if (CPP_OPTION (pfile, trigraphs))
 858                     {
 859                       /* We do, and that means we have to switch to the
 860                          slow path.  */
 861                       d = (uchar *) s;
 862                       *d = _cpp_trigraph_map[s[2]];
 863                       s += 2;
 864                       goto slow_path;
 865                     }
 866                 }
 867               /* Not a trigraph.  Continue on fast-path.  */
 868               s++;
 869             }
 870           else
 871             break;
 872         }
 873
 874       /* This must be \r or \n.  We're either done, or we'll be forced
 875          to write back to the buffer and continue on the slow path.  */
 876       d = (uchar *) s;
 877
 878       if (__builtin_expect (s == buffer->rlimit, false))
 879         goto done;
 880
 881       /* DOS line ending? */
 882       if (__builtin_expect (c == '\r', false) && s[1] == '\n')
 883         {
 884           s++;
 885           if (s == buffer->rlimit)
 886             goto done;
 887         }
 888
 889       if (__builtin_expect (pbackslash == NULL, true))
 890         goto done;
 891
 892       /* Check for escaped newline.  */
 893       p = d;
 894       while (is_nvspace (p[-1]))
 895         p--;
 896       if (p - 1 != pbackslash)
 897         goto done;
 898
 899       /* Have an escaped newline; process it and proceed to
 900          the slow path.  */
 901       add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
 902       d = p - 2;
 903       buffer->next_line = p - 1;
 904
 905     slow_path:
 906       while (1)
 907         {
 908           c = *++s;
 909           *++d = c;
 910
 911           if (c == '\n' || c == '\r')
 912             {
 913               /* Handle DOS line endings.  */
 914               if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
 915                 s++;
 916               if (s == buffer->rlimit)
 917                 break;
 918
 919               /* Escaped?  */
 920               p = d;
 921               while (p != buffer->next_line && is_nvspace (p[-1]))
 922                 p--;
 923               if (p == buffer->next_line || p[-1] != '\\')
 924                 break;
 925
 926               add_line_note (buffer, p - 1, p != d ? ' ': '\\');
 927               d = p - 2;
 928               buffer->next_line = p - 1;
 929             }
 930           else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
 931             {
 932               /* Add a note regardless, for the benefit of -Wtrigraphs.  */
 933               add_line_note (buffer, d, s[2]);
 934               if (CPP_OPTION (pfile, trigraphs))
 935                 {
 936                   *d = _cpp_trigraph_map[s[2]];
 937                   s += 2;
 938                 }
 939             }
 940         }
 941     }
 942   else
 943     {
 944       while (*s != '\n' && *s != '\r')
 945         s++;
 946       d = (uchar *) s;
 947
 948       /* Handle DOS line endings.  */
 949       if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
 950         s++;
 951     }
 952
 953  done:
 954   *d = '\n';
 955   /* A sentinel note that should never be processed.  */
 956   add_line_note (buffer, d + 1, '\n');
 957   buffer->next_line = s + 1;
 958 }
 959
 960 /* Return true if the trigraph indicated by NOTE should be warned
 961    about in a comment.  */
 962 static bool
 963 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
 964 {
 965   const uchar *p;
 966
 967   /* Within comments we don't warn about trigraphs, unless the
 968      trigraph forms an escaped newline, as that may change
 969      behavior.  */
 970   if (note->type != '/')
 971     return false;
 972
 973   /* If -trigraphs, then this was an escaped newline iff the next note
 974      is coincident.  */
 975   if (CPP_OPTION (pfile, trigraphs))
 976     return note[1].pos == note->pos;
 977
 978   /* Otherwise, see if this forms an escaped newline.  */
 979   p = note->pos + 3;
 980   while (is_nvspace (*p))
 981     p++;
 982
 983   /* There might have been escaped newlines between the trigraph and the
 984      newline we found.  Hence the position test.  */
 985   return (*p == '\n' && p < note[1].pos);
 986 }
 987
 988 /* Process the notes created by add_line_note as far as the current
 989    location.  */
 990 void
 991 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
 992 {
 993   cpp_buffer *buffer = pfile->buffer;
 994
 995   for (;;)
 996     {
 997       _cpp_line_note *note = &buffer->notes[buffer->cur_note];
 998       unsigned int col;
 999
1000       if (note->pos > buffer->cur)
1001         break;
1002
1003       buffer->cur_note++;
1004       col = CPP_BUF_COLUMN (buffer, note->pos + 1);
1005
1006       if (note->type == '\\' || note->type == ' ')
1007         {
1008           if (note->type == ' ' && !in_comment)
1009             cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
1010                                  "backslash and newline separated by space");
1011
1012           if (buffer->next_line > buffer->rlimit)
1013             {
1014               cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
1015                                    "backslash-newline at end of file");
1016               /* Prevent "no newline at end of file" warning.  */
1017               buffer->next_line = buffer->rlimit;
1018             }
1019
1020           buffer->line_base = note->pos;
1021           CPP_INCREMENT_LINE (pfile, 0);
1022         }
1023       else if (_cpp_trigraph_map[note->type])
1024         {
1025           if (CPP_OPTION (pfile, warn_trigraphs)
1026               && (!in_comment || warn_in_comment (pfile, note)))
1027             {
1028               if (CPP_OPTION (pfile, trigraphs))
1029                 cpp_warning_with_line (pfile, CPP_W_TRIGRAPHS,
1030                                        pfile->line_table->highest_line, col,
1031                                        "trigraph ??%c converted to %c",
1032                                        note->type,
1033                                        (int) _cpp_trigraph_map[note->type]);
1034               else
1035                 {
1036                   cpp_warning_with_line
1037                     (pfile, CPP_W_TRIGRAPHS,
1038                      pfile->line_table->highest_line, col,
1039                      "trigraph ??%c ignored, use -trigraphs to enable",
1040                      note->type);
1041                 }
1042             }
1043         }
1044       else if (note->type == 0)
1045         /* Already processed in lex_raw_string.  */;
1046       else
1047         abort ();
1048     }
1049 }
1050
1051 /* Skip a C-style block comment.  We find the end of the comment by
1052    seeing if an asterisk is before every '/' we encounter.  Returns
1053    nonzero if comment terminated by EOF, zero otherwise.
1054
1055    Buffer->cur points to the initial asterisk of the comment.  */
1056 bool
1057 _cpp_skip_block_comment (cpp_reader *pfile)
1058 {
1059   cpp_buffer *buffer = pfile->buffer;
1060   const uchar *cur = buffer->cur;
1061   uchar c;
1062
1063   cur++;
1064   if (*cur == '/')
1065     cur++;
1066
1067   for (;;)
1068     {
1069       /* People like decorating comments with '*', so check for '/'
1070          instead for efficiency.  */
1071       c = *cur++;
1072
1073       if (c == '/')
1074         {
1075           if (cur[-2] == '*')
1076             break;
1077
1078           /* Warn about potential nested comments, but not if the '/'
1079              comes immediately before the true comment delimiter.
1080              Don't bother to get it right across escaped newlines.  */
1081           if (CPP_OPTION (pfile, warn_comments)
1082               && cur[0] == '*' && cur[1] != '/')
1083             {
1084               buffer->cur = cur;
1085               cpp_warning_with_line (pfile, CPP_W_COMMENTS,
1086                                      pfile->line_table->highest_line,
1087                                      CPP_BUF_COL (buffer),
1088                                      "\"/*\" within comment");
1089             }
1090         }
1091       else if (c == '\n')
1092         {
1093           unsigned int cols;
1094           buffer->cur = cur - 1;
1095           _cpp_process_line_notes (pfile, true);
1096           if (buffer->next_line >= buffer->rlimit)
1097             return true;
1098           _cpp_clean_line (pfile);
1099
1100           cols = buffer->next_line - buffer->line_base;
1101           CPP_INCREMENT_LINE (pfile, cols);
1102
1103           cur = buffer->cur;
1104         }
1105     }
1106
1107   buffer->cur = cur;
1108   _cpp_process_line_notes (pfile, true);
1109   return false;
1110 }
1111
1112 /* Skip a C++ line comment, leaving buffer->cur pointing to the
1113    terminating newline.  Handles escaped newlines.  Returns nonzero
1114    if a multiline comment.  */
1115 static int
1116 skip_line_comment (cpp_reader *pfile)
1117 {
1118   cpp_buffer *buffer = pfile->buffer;
1119   source_location orig_line = pfile->line_table->highest_line;
1120
1121   while (*buffer->cur != '\n')
1122     buffer->cur++;
1123
1124   _cpp_process_line_notes (pfile, true);
1125   return orig_line != pfile->line_table->highest_line;
1126 }
1127
1128 /* Skips whitespace, saving the next non-whitespace character.  */
1129 static void
1130 skip_whitespace (cpp_reader *pfile, cppchar_t c)
1131 {
1132   cpp_buffer *buffer = pfile->buffer;
1133   bool saw_NUL = false;
1134
1135   do
1136     {
1137       /* Horizontal space always OK.  */
1138       if (c == ' ' || c == '\t')
1139         ;
1140       /* Just \f \v or \0 left.  */
1141       else if (c == '\0')
1142         saw_NUL = true;
1143       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
1144         cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
1145                              CPP_BUF_COL (buffer),
1146                              "%s in preprocessing directive",
1147                              c == '\f' ? "form feed" : "vertical tab");
1148
1149       c = *buffer->cur++;
1150     }
1151   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
1152   while (is_nvspace (c));
1153
1154   if (saw_NUL)
1155     cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
1156
1157   buffer->cur--;
1158 }
1159
1160 /* See if the characters of a number token are valid in a name (no
1161    '.', '+' or '-').  */
1162 static int
1163 name_p (cpp_reader *pfile, const cpp_string *string)
1164 {
1165   unsigned int i;
1166
1167   for (i = 0; i < string->len; i++)
1168     if (!is_idchar (string->text[i]))
1169       return 0;
1170
1171   return 1;
1172 }
1173
1174 /* After parsing an identifier or other sequence, produce a warning about
1175    sequences not in NFC/NFKC.  */
1176 static void
1177 warn_about_normalization (cpp_reader *pfile,
1178                           const cpp_token *token,
1179                           const struct normalize_state *s)
1180 {
1181   if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
1182       && !pfile->state.skipping)
1183     {
1184       /* Make sure that the token is printed using UCNs, even
1185          if we'd otherwise happily print UTF-8.  */
1186       unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
1187       size_t sz;
1188
1189       sz = cpp_spell_token (pfile, token, buf, false) - buf;
1190       if (NORMALIZE_STATE_RESULT (s) == normalized_C)
1191         cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1192                                "`%.*s' is not in NFKC", (int) sz, buf);
1193       else
1194         cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1195                                "`%.*s' is not in NFC", (int) sz, buf);
1196       free (buf);
1197     }
1198 }
1199
1200 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
1201    an identifier.  FIRST is TRUE if this starts an identifier.  */
1202 static bool
1203 forms_identifier_p (cpp_reader *pfile, int first,
1204                     struct normalize_state *state)
1205 {
1206   cpp_buffer *buffer = pfile->buffer;
1207
1208   if (*buffer->cur == '$')
1209     {
1210       if (!CPP_OPTION (pfile, dollars_in_ident))
1211         return false;
1212
1213       buffer->cur++;
1214       if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
1215         {
1216           CPP_OPTION (pfile, warn_dollars) = 0;
1217           cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
1218         }
1219
1220       return true;
1221     }
1222
1223   /* Is this a syntactically valid UCN?  */
1224   if (CPP_OPTION (pfile, extended_identifiers)
1225       && *buffer->cur == '\\'
1226       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
1227     {
1228       buffer->cur += 2;
1229       if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
1230                           state))
1231         return true;
1232       buffer->cur -= 2;
1233     }
1234
1235   return false;
1236 }
1237
1238 /* Helper function to get the cpp_hashnode of the identifier BASE.  */
1239 static cpp_hashnode *
1240 lex_identifier_intern (cpp_reader *pfile, const uchar *base)
1241 {
1242   cpp_hashnode *result;
1243   const uchar *cur;
1244   unsigned int len;
1245   unsigned int hash = HT_HASHSTEP (0, *base);
1246
1247   cur = base + 1;
1248   while (ISIDNUM (*cur))
1249     {
1250       hash = HT_HASHSTEP (hash, *cur);
1251       cur++;
1252     }
1253   len = cur - base;
1254   hash = HT_HASHFINISH (hash, len);
1255   result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1256                                               base, len, hash, HT_ALLOC));
1257
1258   /* Rarely, identifiers require diagnostics when lexed.  */
1259   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1260                         && !pfile->state.skipping, 0))
1261     {
1262       /* It is allowed to poison the same identifier twice.  */
1263       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
1264         cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
1265                    NODE_NAME (result));
1266
1267       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1268          replacement list of a variadic macro.  */
1269       if (result == pfile->spec_nodes.n__VA_ARGS__
1270           && !pfile->state.va_args_ok)
1271         {
1272           if (CPP_OPTION (pfile, cplusplus))
1273             cpp_error (pfile, CPP_DL_PEDWARN,
1274                        "__VA_ARGS__ can only appear in the expansion"
1275                        " of a C++11 variadic macro");
1276           else
1277             cpp_error (pfile, CPP_DL_PEDWARN,
1278                        "__VA_ARGS__ can only appear in the expansion"
1279                        " of a C99 variadic macro");
1280         }
1281
1282       /* For -Wc++-compat, warn about use of C++ named operators.  */
1283       if (result->flags & NODE_WARN_OPERATOR)
1284         cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1285                      "identifier \"%s\" is a special operator name in C++",
1286                      NODE_NAME (result));
1287     }
1288
1289   return result;
1290 }
1291
1292 /* Get the cpp_hashnode of an identifier specified by NAME in
1293    the current cpp_reader object.  If none is found, NULL is returned.  */
1294 cpp_hashnode *
1295 _cpp_lex_identifier (cpp_reader *pfile, const char *name)
1296 {
1297   cpp_hashnode *result;
1298   result = lex_identifier_intern (pfile, (uchar *) name);
1299   return result;
1300 }
1301
1302 /* Lex an identifier starting at BUFFER->CUR - 1.  */
1303 static cpp_hashnode *
1304 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
1305                 struct normalize_state *nst, cpp_hashnode **spelling)
1306 {
1307   cpp_hashnode *result;
1308   const uchar *cur;
1309   unsigned int len;
1310   unsigned int hash = HT_HASHSTEP (0, *base);
1311
1312   cur = pfile->buffer->cur;
1313   if (! starts_ucn)
1314     {
1315       while (ISIDNUM (*cur))
1316         {
1317           hash = HT_HASHSTEP (hash, *cur);
1318           cur++;
1319         }
1320       NORMALIZE_STATE_UPDATE_IDNUM (nst, *(cur - 1));
1321     }
1322   pfile->buffer->cur = cur;
1323   if (starts_ucn || forms_identifier_p (pfile, false, nst))
1324     {
1325       /* Slower version for identifiers containing UCNs (or $).  */
1326       do {
1327         while (ISIDNUM (*pfile->buffer->cur))
1328           {
1329             NORMALIZE_STATE_UPDATE_IDNUM (nst, *pfile->buffer->cur);
1330             pfile->buffer->cur++;
1331           }
1332       } while (forms_identifier_p (pfile, false, nst));
1333       result = _cpp_interpret_identifier (pfile, base,
1334                                           pfile->buffer->cur - base);
1335       *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base);
1336     }
1337   else
1338     {
1339       len = cur - base;
1340       hash = HT_HASHFINISH (hash, len);
1341
1342       result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1343                                                   base, len, hash, HT_ALLOC));
1344       *spelling = result;
1345     }
1346
1347   /* Rarely, identifiers require diagnostics when lexed.  */
1348   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1349                         && !pfile->state.skipping, 0))
1350     {
1351       /* It is allowed to poison the same identifier twice.  */
1352       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
1353         cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
1354                    NODE_NAME (result));
1355
1356       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1357          replacement list of a variadic macro.  */
1358       if (result == pfile->spec_nodes.n__VA_ARGS__
1359           && !pfile->state.va_args_ok)
1360         {
1361           if (CPP_OPTION (pfile, cplusplus))
1362             cpp_error (pfile, CPP_DL_PEDWARN,
1363                        "__VA_ARGS__ can only appear in the expansion"
1364                        " of a C++11 variadic macro");
1365           else
1366             cpp_error (pfile, CPP_DL_PEDWARN,
1367                        "__VA_ARGS__ can only appear in the expansion"
1368                        " of a C99 variadic macro");
1369         }
1370
1371       /* For -Wc++-compat, warn about use of C++ named operators.  */
1372       if (result->flags & NODE_WARN_OPERATOR)
1373         cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1374                      "identifier \"%s\" is a special operator name in C++",
1375                      NODE_NAME (result));
1376     }
1377
1378   return result;
1379 }
1380
1381 /* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
1382 static void
1383 lex_number (cpp_reader *pfile, cpp_string *number,
1384             struct normalize_state *nst)
1385 {
1386   const uchar *cur;
1387   const uchar *base;
1388   uchar *dest;
1389
1390   base = pfile->buffer->cur - 1;
1391   do
1392     {
1393       cur = pfile->buffer->cur;
1394
1395       /* N.B. ISIDNUM does not include $.  */
1396       while (ISIDNUM (*cur) || *cur == '.' || DIGIT_SEP (*cur)
1397              || VALID_SIGN (*cur, cur[-1]))
1398         {
1399           NORMALIZE_STATE_UPDATE_IDNUM (nst, *cur);
1400           cur++;
1401         }
1402
1403       pfile->buffer->cur = cur;
1404     }
1405   while (forms_identifier_p (pfile, false, nst));
1406
1407   number->len = cur - base;
1408   dest = _cpp_unaligned_alloc (pfile, number->len + 1);
1409   memcpy (dest, base, number->len);
1410   dest[number->len] = '\0';
1411   number->text = dest;
1412 }
1413
1414 /* Create a token of type TYPE with a literal spelling.  */
1415 static void
1416 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
1417                 unsigned int len, enum cpp_ttype type)
1418 {
1419   uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
1420
1421   memcpy (dest, base, len);
1422   dest[len] = '\0';
1423   token->type = type;
1424   token->val.str.len = len;
1425   token->val.str.text = dest;
1426 }
1427
1428 /* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
1429    sequence from *FIRST_BUFF_P to LAST_BUFF_P.  */
1430
1431 static void
1432 bufring_append (cpp_reader *pfile, const uchar *base, size_t len,
1433                 _cpp_buff **first_buff_p, _cpp_buff **last_buff_p)
1434 {
1435   _cpp_buff *first_buff = *first_buff_p;
1436   _cpp_buff *last_buff = *last_buff_p;
1437
1438   if (first_buff == NULL)
1439     first_buff = last_buff = _cpp_get_buff (pfile, len);
1440   else if (len > BUFF_ROOM (last_buff))
1441     {
1442       size_t room = BUFF_ROOM (last_buff);
1443       memcpy (BUFF_FRONT (last_buff), base, room);
1444       BUFF_FRONT (last_buff) += room;
1445       base += room;
1446       len -= room;
1447       last_buff = _cpp_append_extend_buff (pfile, last_buff, len);
1448     }
1449
1450   memcpy (BUFF_FRONT (last_buff), base, len);
1451   BUFF_FRONT (last_buff) += len;
1452
1453   *first_buff_p = first_buff;
1454   *last_buff_p = last_buff;
1455 }
1456
1457
1458 /* Returns true if a macro has been defined.
1459    This might not work if compile with -save-temps,
1460    or preprocess separately from compilation.  */
1461
1462 static bool
1463 is_macro(cpp_reader *pfile, const uchar *base)
1464 {
1465   const uchar *cur = base;
1466   if (! ISIDST (*cur))
1467     return false;
1468   unsigned int hash = HT_HASHSTEP (0, *cur);
1469   ++cur;
1470   while (ISIDNUM (*cur))
1471     {
1472       hash = HT_HASHSTEP (hash, *cur);
1473       ++cur;
1474     }
1475   hash = HT_HASHFINISH (hash, cur - base);
1476
1477   cpp_hashnode *result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1478                                         base, cur - base, hash, HT_NO_INSERT));
1479
1480   return !result ? false : (result->type == NT_MACRO);
1481 }
1482
1483
1484 /* Lexes a raw string.  The stored string contains the spelling, including
1485    double quotes, delimiter string, '(' and ')', any leading
1486    'L', 'u', 'U' or 'u8' and 'R' modifier.  It returns the type of the
1487    literal, or CPP_OTHER if it was not properly terminated.
1488
1489    The spelling is NUL-terminated, but it is not guaranteed that this
1490    is the first NUL since embedded NULs are preserved.  */
1491
1492 static void
1493 lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
1494                 const uchar *cur)
1495 {
1496   uchar raw_prefix[17];
1497   uchar temp_buffer[18];
1498   const uchar *orig_base;
1499   unsigned int raw_prefix_len = 0, raw_suffix_len = 0;
1500   enum raw_str_phase { RAW_STR_PREFIX, RAW_STR, RAW_STR_SUFFIX };
1501   raw_str_phase phase = RAW_STR_PREFIX;
1502   enum cpp_ttype type;
1503   size_t total_len = 0;
1504   /* Index into temp_buffer during phases other than RAW_STR,
1505      during RAW_STR phase 17 to tell BUF_APPEND that nothing should
1506      be appended to temp_buffer.  */
1507   size_t temp_buffer_len = 0;
1508   _cpp_buff *first_buff = NULL, *last_buff = NULL;
1509   size_t raw_prefix_start;
1510   _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
1511
1512   type = (*base == 'L' ? CPP_WSTRING :
1513           *base == 'U' ? CPP_STRING32 :
1514           *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1515           : CPP_STRING);
1516
1517 #define BUF_APPEND(STR,LEN)                                     \
1518       do {                                                      \
1519         bufring_append (pfile, (const uchar *)(STR), (LEN),     \
1520                         &first_buff, &last_buff);               \
1521         total_len += (LEN);                                     \
1522         if (__builtin_expect (temp_buffer_len < 17, 0)          \
1523             && (const uchar *)(STR) != base                     \
1524             && (LEN) <= 2)                                      \
1525           {                                                     \
1526             memcpy (temp_buffer + temp_buffer_len,              \
1527                     (const uchar *)(STR), (LEN));               \
1528             temp_buffer_len += (LEN);                           \
1529           }                                                     \
1530       } while (0);
1531
1532   orig_base = base;
1533   ++cur;
1534   raw_prefix_start = cur - base;
1535   for (;;)
1536     {
1537       cppchar_t c;
1538
1539       /* If we previously performed any trigraph or line splicing
1540          transformations, undo them in between the opening and closing
1541          double quote.  */
1542       while (note->pos < cur)
1543         ++note;
1544       for (; note->pos == cur; ++note)
1545         {
1546           switch (note->type)
1547             {
1548             case '\\':
1549             case ' ':
1550               /* Restore backslash followed by newline.  */
1551               BUF_APPEND (base, cur - base);
1552               base = cur;
1553               BUF_APPEND ("\\", 1);
1554             after_backslash:
1555               if (note->type == ' ')
1556                 {
1557                   /* GNU backslash whitespace newline extension.  FIXME
1558                      could be any sequence of non-vertical space.  When we
1559                      can properly restore any such sequence, we should mark
1560                      this note as handled so _cpp_process_line_notes
1561                      doesn't warn.  */
1562                   BUF_APPEND (" ", 1);
1563                 }
1564
1565               BUF_APPEND ("\n", 1);
1566               break;
1567
1568             case 0:
1569               /* Already handled.  */
1570               break;
1571
1572             default:
1573               if (_cpp_trigraph_map[note->type])
1574                 {
1575                   /* Don't warn about this trigraph in
1576                      _cpp_process_line_notes, since trigraphs show up as
1577                      trigraphs in raw strings.  */
1578                   uchar type = note->type;
1579                   note->type = 0;
1580
1581                   if (!CPP_OPTION (pfile, trigraphs))
1582                     /* If we didn't convert the trigraph in the first
1583                        place, don't do anything now either.  */
1584                     break;
1585
1586                   BUF_APPEND (base, cur - base);
1587                   base = cur;
1588                   BUF_APPEND ("??", 2);
1589
1590                   /* ??/ followed by newline gets two line notes, one for
1591                      the trigraph and one for the backslash/newline.  */
1592                   if (type == '/' && note[1].pos == cur)
1593                     {
1594                       if (note[1].type != '\\'
1595                           && note[1].type != ' ')
1596                         abort ();
1597                       BUF_APPEND ("/", 1);
1598                       ++note;
1599                       goto after_backslash;
1600                     }
1601                   else
1602                     {
1603                       /* Skip the replacement character.  */
1604                       base = ++cur;
1605                       BUF_APPEND (&type, 1);
1606                       c = type;
1607                       goto check_c;
1608                     }
1609                 }
1610               else
1611                 abort ();
1612               break;
1613             }
1614         }
1615       c = *cur++;
1616       if (__builtin_expect (temp_buffer_len < 17, 0))
1617         temp_buffer[temp_buffer_len++] = c;
1618
1619      check_c:
1620       if (phase == RAW_STR_PREFIX)
1621         {
1622           while (raw_prefix_len < temp_buffer_len)
1623             {
1624               raw_prefix[raw_prefix_len] = temp_buffer[raw_prefix_len];
1625               switch (raw_prefix[raw_prefix_len])
1626                 {
1627                 case ' ': case '(': case ')': case '\\': case '\t':
1628                 case '\v': case '\f': case '\n': default:
1629                   break;
1630                 /* Basic source charset except the above chars.  */
1631                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1632                 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1633                 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1634                 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1635                 case 'y': case 'z':
1636                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1637                 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1638                 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1639                 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1640                 case 'Y': case 'Z':
1641                 case '0': case '1': case '2': case '3': case '4': case '5':
1642                 case '6': case '7': case '8': case '9':
1643                 case '_': case '{': case '}': case '#': case '[': case ']':
1644                 case '<': case '>': case '%': case ':': case ';': case '.':
1645                 case '?': case '*': case '+': case '-': case '/': case '^':
1646                 case '&': case '|': case '~': case '!': case '=': case ',':
1647                 case '"': case '\'':
1648                   if (raw_prefix_len < 16)
1649                     {
1650                       raw_prefix_len++;
1651                       continue;
1652                     }
1653                   break;
1654                 }
1655
1656               if (raw_prefix[raw_prefix_len] != '(')
1657                 {
1658                   int col = CPP_BUF_COLUMN (pfile->buffer, cur) + 1;
1659                   if (raw_prefix_len == 16)
1660                     cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1661                                          col, "raw string delimiter longer "
1662                                               "than 16 characters");
1663                   else if (raw_prefix[raw_prefix_len] == '\n')
1664                     cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1665                                          col, "invalid new-line in raw "
1666                                               "string delimiter");
1667                   else
1668                     cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1669                                          col, "invalid character '%c' in "
1670                                               "raw string delimiter",
1671                                          (int) raw_prefix[raw_prefix_len]);
1672                   pfile->buffer->cur = orig_base + raw_prefix_start - 1;
1673                   create_literal (pfile, token, orig_base,
1674                                   raw_prefix_start - 1, CPP_OTHER);
1675                   if (first_buff)
1676                     _cpp_release_buff (pfile, first_buff);
1677                   return;
1678                 }
1679               raw_prefix[raw_prefix_len] = '"';
1680               phase = RAW_STR;
1681               /* Nothing should be appended to temp_buffer during
1682                  RAW_STR phase.  */
1683               temp_buffer_len = 17;
1684               break;
1685             }
1686           continue;
1687         }
1688       else if (phase == RAW_STR_SUFFIX)
1689         {
1690           while (raw_suffix_len <= raw_prefix_len
1691                  && raw_suffix_len < temp_buffer_len
1692                  && temp_buffer[raw_suffix_len] == raw_prefix[raw_suffix_len])
1693             raw_suffix_len++;
1694           if (raw_suffix_len > raw_prefix_len)
1695             break;
1696           if (raw_suffix_len == temp_buffer_len)
1697             continue;
1698           phase = RAW_STR;
1699           /* Nothing should be appended to temp_buffer during
1700              RAW_STR phase.  */
1701           temp_buffer_len = 17;
1702         }
1703       if (c == ')')
1704         {
1705           phase = RAW_STR_SUFFIX;
1706           raw_suffix_len = 0;
1707           temp_buffer_len = 0;
1708         }
1709       else if (c == '\n')
1710         {
1711           if (pfile->state.in_directive
1712               || (pfile->state.parsing_args
1713                   && pfile->buffer->next_line >= pfile->buffer->rlimit))
1714             {
1715               cur--;
1716               type = CPP_OTHER;
1717               cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
1718                                    "unterminated raw string");
1719               break;
1720             }
1721
1722           BUF_APPEND (base, cur - base);
1723
1724           if (pfile->buffer->cur < pfile->buffer->rlimit)
1725             CPP_INCREMENT_LINE (pfile, 0);
1726           pfile->buffer->need_line = true;
1727
1728           pfile->buffer->cur = cur-1;
1729           _cpp_process_line_notes (pfile, false);
1730           if (!_cpp_get_fresh_line (pfile))
1731             {
1732               source_location src_loc = token->src_loc;
1733               token->type = CPP_EOF;
1734               /* Tell the compiler the line number of the EOF token.  */
1735               token->src_loc = pfile->line_table->highest_line;
1736               token->flags = BOL;
1737               if (first_buff != NULL)
1738                 _cpp_release_buff (pfile, first_buff);
1739               cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
1740                                    "unterminated raw string");
1741               return;
1742             }
1743
1744           cur = base = pfile->buffer->cur;
1745           note = &pfile->buffer->notes[pfile->buffer->cur_note];
1746         }
1747     }
1748
1749   if (CPP_OPTION (pfile, user_literals))
1750     {
1751       /* If a string format macro, say from inttypes.h, is placed touching
1752          a string literal it could be parsed as a C++11 user-defined string
1753          literal thus breaking the program.
1754          Try to identify macros with is_macro. A warning is issued. */
1755       if (is_macro (pfile, cur))
1756         {
1757           /* Raise a warning, but do not consume subsequent tokens.  */
1758           if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
1759             cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
1760                                    token->src_loc, 0,
1761                                    "invalid suffix on literal; C++11 requires "
1762                                    "a space between literal and string macro");
1763         }
1764       /* Grab user defined literal suffix.  */
1765       else if (ISIDST (*cur))
1766         {
1767           type = cpp_userdef_string_add_type (type);
1768           ++cur;
1769
1770           while (ISIDNUM (*cur))
1771             ++cur;
1772         }
1773     }
1774
1775   pfile->buffer->cur = cur;
1776   if (first_buff == NULL)
1777     create_literal (pfile, token, base, cur - base, type);
1778   else
1779     {
1780       uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1);
1781
1782       token->type = type;
1783       token->val.str.len = total_len + (cur - base);
1784       token->val.str.text = dest;
1785       last_buff = first_buff;
1786       while (last_buff != NULL)
1787         {
1788           memcpy (dest, last_buff->base,
1789                   BUFF_FRONT (last_buff) - last_buff->base);
1790           dest += BUFF_FRONT (last_buff) - last_buff->base;
1791           last_buff = last_buff->next;
1792         }
1793       _cpp_release_buff (pfile, first_buff);
1794       memcpy (dest, base, cur - base);
1795       dest[cur - base] = '\0';
1796     }
1797 }
1798
1799 /* Lexes a string, character constant, or angle-bracketed header file
1800    name.  The stored string contains the spelling, including opening
1801    quote and any leading 'L', 'u', 'U' or 'u8' and optional
1802    'R' modifier.  It returns the type of the literal, or CPP_OTHER
1803    if it was not properly terminated, or CPP_LESS for an unterminated
1804    header name which must be relexed as normal tokens.
1805
1806    The spelling is NUL-terminated, but it is not guaranteed that this
1807    is the first NUL since embedded NULs are preserved.  */
1808 static void
1809 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
1810 {
1811   bool saw_NUL = false;
1812   const uchar *cur;
1813   cppchar_t terminator;
1814   enum cpp_ttype type;
1815
1816   cur = base;
1817   terminator = *cur++;
1818   if (terminator == 'L' || terminator == 'U')
1819     terminator = *cur++;
1820   else if (terminator == 'u')
1821     {
1822       terminator = *cur++;
1823       if (terminator == '8')
1824         terminator = *cur++;
1825     }
1826   if (terminator == 'R')
1827     {
1828       lex_raw_string (pfile, token, base, cur);
1829       return;
1830     }
1831   if (terminator == '"')
1832     type = (*base == 'L' ? CPP_WSTRING :
1833             *base == 'U' ? CPP_STRING32 :
1834             *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1835                          : CPP_STRING);
1836   else if (terminator == '\'')
1837     type = (*base == 'L' ? CPP_WCHAR :
1838             *base == 'U' ? CPP_CHAR32 :
1839             *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
1840   else
1841     terminator = '>', type = CPP_HEADER_NAME;
1842
1843   for (;;)
1844     {
1845       cppchar_t c = *cur++;
1846
1847       /* In #include-style directives, terminators are not escapable.  */
1848       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
1849         cur++;
1850       else if (c == terminator)
1851         break;
1852       else if (c == '\n')
1853         {
1854           cur--;
1855           /* Unmatched quotes always yield undefined behavior, but
1856              greedy lexing means that what appears to be an unterminated
1857              header name may actually be a legitimate sequence of tokens.  */
1858           if (terminator == '>')
1859             {
1860               token->type = CPP_LESS;
1861               return;
1862             }
1863           type = CPP_OTHER;
1864           break;
1865         }
1866       else if (c == '\0')
1867         saw_NUL = true;
1868     }
1869
1870   if (saw_NUL && !pfile->state.skipping)
1871     cpp_error (pfile, CPP_DL_WARNING,
1872                "null character(s) preserved in literal");
1873
1874   if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
1875     cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
1876                (int) terminator);
1877
1878   if (CPP_OPTION (pfile, user_literals))
1879     {
1880       /* If a string format macro, say from inttypes.h, is placed touching
1881          a string literal it could be parsed as a C++11 user-defined string
1882          literal thus breaking the program.
1883          Try to identify macros with is_macro. A warning is issued. */
1884       if (is_macro (pfile, cur))
1885         {
1886           /* Raise a warning, but do not consume subsequent tokens.  */
1887           if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
1888             cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
1889                                    token->src_loc, 0,
1890                                    "invalid suffix on literal; C++11 requires "
1891                                    "a space between literal and string macro");
1892         }
1893       /* Grab user defined literal suffix.  */
1894       else if (ISIDST (*cur))
1895         {
1896           type = cpp_userdef_char_add_type (type);
1897           type = cpp_userdef_string_add_type (type);
1898           ++cur;
1899
1900           while (ISIDNUM (*cur))
1901             ++cur;
1902         }
1903     }
1904
1905   pfile->buffer->cur = cur;
1906   create_literal (pfile, token, base, cur - base, type);
1907 }
1908
1909 /* Return the comment table. The client may not make any assumption
1910    about the ordering of the table.  */
1911 cpp_comment_table *
1912 cpp_get_comments (cpp_reader *pfile)
1913 {
1914   return &pfile->comments;
1915 }
1916
1917 /* Append a comment to the end of the comment table. */
1918 static void
1919 store_comment (cpp_reader *pfile, cpp_token *token)
1920 {
1921   int len;
1922
1923   if (pfile->comments.allocated == 0)
1924     {
1925       pfile->comments.allocated = 256;
1926       pfile->comments.entries = (cpp_comment *) xmalloc
1927         (pfile->comments.allocated * sizeof (cpp_comment));
1928     }
1929
1930   if (pfile->comments.count == pfile->comments.allocated)
1931     {
1932       pfile->comments.allocated *= 2;
1933       pfile->comments.entries = (cpp_comment *) xrealloc
1934         (pfile->comments.entries,
1935          pfile->comments.allocated * sizeof (cpp_comment));
1936     }
1937
1938   len = token->val.str.len;
1939
1940   /* Copy comment. Note, token may not be NULL terminated. */
1941   pfile->comments.entries[pfile->comments.count].comment =
1942     (char *) xmalloc (sizeof (char) * (len + 1));
1943   memcpy (pfile->comments.entries[pfile->comments.count].comment,
1944           token->val.str.text, len);
1945   pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
1946
1947   /* Set source location. */
1948   pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
1949
1950   /* Increment the count of entries in the comment table. */
1951   pfile->comments.count++;
1952 }
1953
1954 /* The stored comment includes the comment start and any terminator.  */
1955 static void
1956 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
1957               cppchar_t type)
1958 {
1959   unsigned char *buffer;
1960   unsigned int len, clen, i;
1961
1962   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
1963
1964   /* C++ comments probably (not definitely) have moved past a new
1965      line, which we don't want to save in the comment.  */
1966   if (is_vspace (pfile->buffer->cur[-1]))
1967     len--;
1968
1969   /* If we are currently in a directive or in argument parsing, then
1970      we need to store all C++ comments as C comments internally, and
1971      so we need to allocate a little extra space in that case.
1972
1973      Note that the only time we encounter a directive here is
1974      when we are saving comments in a "#define".  */
1975   clen = ((pfile->state.in_directive || pfile->state.parsing_args)
1976           && type == '/') ? len + 2 : len;
1977
1978   buffer = _cpp_unaligned_alloc (pfile, clen);
1979
1980   token->type = CPP_COMMENT;
1981   token->val.str.len = clen;
1982   token->val.str.text = buffer;
1983
1984   buffer[0] = '/';
1985   memcpy (buffer + 1, from, len - 1);
1986
1987   /* Finish conversion to a C comment, if necessary.  */
1988   if ((pfile->state.in_directive || pfile->state.parsing_args) && type == '/')
1989     {
1990       buffer[1] = '*';
1991       buffer[clen - 2] = '*';
1992       buffer[clen - 1] = '/';
1993       /* As there can be in a C++ comments illegal sequences for C comments
1994          we need to filter them out.  */
1995       for (i = 2; i < (clen - 2); i++)
1996         if (buffer[i] == '/' && (buffer[i - 1] == '*' || buffer[i + 1] == '*'))
1997           buffer[i] = '|';
1998     }
1999
2000   /* Finally store this comment for use by clients of libcpp. */
2001   store_comment (pfile, token);
2002 }
2003
2004 /* Allocate COUNT tokens for RUN.  */
2005 void
2006 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
2007 {
2008   run->base = XNEWVEC (cpp_token, count);
2009   run->limit = run->base + count;
2010   run->next = NULL;
2011 }
2012
2013 /* Returns the next tokenrun, or creates one if there is none.  */
2014 static tokenrun *
2015 next_tokenrun (tokenrun *run)
2016 {
2017   if (run->next == NULL)
2018     {
2019       run->next = XNEW (tokenrun);
2020       run->next->prev = run;
2021       _cpp_init_tokenrun (run->next, 250);
2022     }
2023
2024   return run->next;
2025 }
2026
2027 /* Return the number of not yet processed token in a given
2028    context.  */
2029 int
2030 _cpp_remaining_tokens_num_in_context (cpp_context *context)
2031 {
2032   if (context->tokens_kind == TOKENS_KIND_DIRECT)
2033     return (LAST (context).token - FIRST (context).token);
2034   else if (context->tokens_kind == TOKENS_KIND_INDIRECT
2035            || context->tokens_kind == TOKENS_KIND_EXTENDED)
2036     return (LAST (context).ptoken - FIRST (context).ptoken);
2037   else
2038       abort ();
2039 }
2040
2041 /* Returns the token present at index INDEX in a given context.  If
2042    INDEX is zero, the next token to be processed is returned.  */
2043 static const cpp_token*
2044 _cpp_token_from_context_at (cpp_context *context, int index)
2045 {
2046   if (context->tokens_kind == TOKENS_KIND_DIRECT)
2047     return &(FIRST (context).token[index]);
2048   else if (context->tokens_kind == TOKENS_KIND_INDIRECT
2049            || context->tokens_kind == TOKENS_KIND_EXTENDED)
2050     return FIRST (context).ptoken[index];
2051  else
2052    abort ();
2053 }
2054
2055 /* Look ahead in the input stream.  */
2056 const cpp_token *
2057 cpp_peek_token (cpp_reader *pfile, int index)
2058 {
2059   cpp_context *context = pfile->context;
2060   const cpp_token *peektok;
2061   int count;
2062
2063   /* First, scan through any pending cpp_context objects.  */
2064   while (context->prev)
2065     {
2066       ptrdiff_t sz = _cpp_remaining_tokens_num_in_context (context);
2067
2068       if (index < (int) sz)
2069         return _cpp_token_from_context_at (context, index);
2070       index -= (int) sz;
2071       context = context->prev;
2072     }
2073
2074   /* We will have to read some new tokens after all (and do so
2075      without invalidating preceding tokens).  */
2076   count = index;
2077   pfile->keep_tokens++;
2078
2079   do
2080     {
2081       peektok = _cpp_lex_token (pfile);
2082       if (peektok->type == CPP_EOF)
2083         return peektok;
2084     }
2085   while (index--);
2086
2087   _cpp_backup_tokens_direct (pfile, count + 1);
2088   pfile->keep_tokens--;
2089
2090   return peektok;
2091 }
2092
2093 /* Allocate a single token that is invalidated at the same time as the
2094    rest of the tokens on the line.  Has its line and col set to the
2095    same as the last lexed token, so that diagnostics appear in the
2096    right place.  */
2097 cpp_token *
2098 _cpp_temp_token (cpp_reader *pfile)
2099 {
2100   cpp_token *old, *result;
2101   ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
2102   ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
2103
2104   old = pfile->cur_token - 1;
2105   /* Any pre-existing lookaheads must not be clobbered.  */
2106   if (la)
2107     {
2108       if (sz <= la)
2109         {
2110           tokenrun *next = next_tokenrun (pfile->cur_run);
2111
2112           if (sz < la)
2113             memmove (next->base + 1, next->base,
2114                      (la - sz) * sizeof (cpp_token));
2115
2116           next->base[0] = pfile->cur_run->limit[-1];
2117         }
2118
2119       if (sz > 1)
2120         memmove (pfile->cur_token + 1, pfile->cur_token,
2121                  MIN (la, sz - 1) * sizeof (cpp_token));
2122     }
2123
2124   if (!sz && pfile->cur_token == pfile->cur_run->limit)
2125     {
2126       pfile->cur_run = next_tokenrun (pfile->cur_run);
2127       pfile->cur_token = pfile->cur_run->base;
2128     }
2129
2130   result = pfile->cur_token++;
2131   result->src_loc = old->src_loc;
2132   return result;
2133 }
2134
2135 /* Lex a token into RESULT (external interface).  Takes care of issues
2136    like directive handling, token lookahead, multiple include
2137    optimization and skipping.  */
2138 const cpp_token *
2139 _cpp_lex_token (cpp_reader *pfile)
2140 {
2141   cpp_token *result;
2142
2143   for (;;)
2144     {
2145       if (pfile->cur_token == pfile->cur_run->limit)
2146         {
2147           pfile->cur_run = next_tokenrun (pfile->cur_run);
2148           pfile->cur_token = pfile->cur_run->base;
2149         }
2150       /* We assume that the current token is somewhere in the current
2151          run.  */
2152       if (pfile->cur_token < pfile->cur_run->base
2153           || pfile->cur_token >= pfile->cur_run->limit)
2154         abort ();
2155
2156       if (pfile->lookaheads)
2157         {
2158           pfile->lookaheads--;
2159           result = pfile->cur_token++;
2160         }
2161       else
2162         result = _cpp_lex_direct (pfile);
2163
2164       if (result->flags & BOL)
2165         {
2166           /* Is this a directive.  If _cpp_handle_directive returns
2167              false, it is an assembler #.  */
2168           if (result->type == CPP_HASH
2169               /* 6.10.3 p 11: Directives in a list of macro arguments
2170                  gives undefined behavior.  This implementation
2171                  handles the directive as normal.  */
2172               && pfile->state.parsing_args != 1)
2173             {
2174               if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
2175                 {
2176                   if (pfile->directive_result.type == CPP_PADDING)
2177                     continue;
2178                   result = &pfile->directive_result;
2179                 }
2180             }
2181           else if (pfile->state.in_deferred_pragma)
2182             result = &pfile->directive_result;
2183
2184           if (pfile->cb.line_change && !pfile->state.skipping)
2185             pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
2186         }
2187
2188       /* We don't skip tokens in directives.  */
2189       if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
2190         break;
2191
2192       /* Outside a directive, invalidate controlling macros.  At file
2193          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
2194          get here and MI optimization works.  */
2195       pfile->mi_valid = false;
2196
2197       if (!pfile->state.skipping || result->type == CPP_EOF)
2198         break;
2199     }
2200
2201   return result;
2202 }
2203
2204 /* Returns true if a fresh line has been loaded.  */
2205 bool
2206 _cpp_get_fresh_line (cpp_reader *pfile)
2207 {
2208   int return_at_eof;
2209
2210   /* We can't get a new line until we leave the current directive.  */
2211   if (pfile->state.in_directive)
2212     return false;
2213
2214   for (;;)
2215     {
2216       cpp_buffer *buffer = pfile->buffer;
2217
2218       if (!buffer->need_line)
2219         return true;
2220
2221       if (buffer->next_line < buffer->rlimit)
2222         {
2223           _cpp_clean_line (pfile);
2224           return true;
2225         }
2226
2227       /* First, get out of parsing arguments state.  */
2228       if (pfile->state.parsing_args)
2229         return false;
2230
2231       /* End of buffer.  Non-empty files should end in a newline.  */
2232       if (buffer->buf != buffer->rlimit
2233           && buffer->next_line > buffer->rlimit
2234           && !buffer->from_stage3)
2235         {
2236           /* Clip to buffer size.  */
2237           buffer->next_line = buffer->rlimit;
2238         }
2239
2240       return_at_eof = buffer->return_at_eof;
2241       _cpp_pop_buffer (pfile);
2242       if (pfile->buffer == NULL || return_at_eof)
2243         return false;
2244     }
2245 }
2246
2247 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)          \
2248   do                                                    \
2249     {                                                   \
2250       result->type = ELSE_TYPE;                         \
2251       if (*buffer->cur == CHAR)                         \
2252         buffer->cur++, result->type = THEN_TYPE;        \
2253     }                                                   \
2254   while (0)
2255
2256 /* Lex a token into pfile->cur_token, which is also incremented, to
2257    get diagnostics pointing to the correct location.
2258
2259    Does not handle issues such as token lookahead, multiple-include
2260    optimization, directives, skipping etc.  This function is only
2261    suitable for use by _cpp_lex_token, and in special cases like
2262    lex_expansion_token which doesn't care for any of these issues.
2263
2264    When meeting a newline, returns CPP_EOF if parsing a directive,
2265    otherwise returns to the start of the token buffer if permissible.
2266    Returns the location of the lexed token.  */
2267 cpp_token *
2268 _cpp_lex_direct (cpp_reader *pfile)
2269 {
2270   cppchar_t c;
2271   cpp_buffer *buffer;
2272   const unsigned char *comment_start;
2273   cpp_token *result = pfile->cur_token++;
2274
2275  fresh_line:
2276   result->flags = 0;
2277   buffer = pfile->buffer;
2278   if (buffer->need_line)
2279     {
2280       if (pfile->state.in_deferred_pragma)
2281         {
2282           result->type = CPP_PRAGMA_EOL;
2283           pfile->state.in_deferred_pragma = false;
2284           if (!pfile->state.pragma_allow_expansion)
2285             pfile->state.prevent_expansion--;
2286           return result;
2287         }
2288       if (!_cpp_get_fresh_line (pfile))
2289         {
2290           result->type = CPP_EOF;
2291           if (!pfile->state.in_directive)
2292             {
2293               /* Tell the compiler the line number of the EOF token.  */
2294               result->src_loc = pfile->line_table->highest_line;
2295               result->flags = BOL;
2296             }
2297           return result;
2298         }
2299       if (!pfile->keep_tokens)
2300         {
2301           pfile->cur_run = &pfile->base_run;
2302           result = pfile->base_run.base;
2303           pfile->cur_token = result + 1;
2304         }
2305       result->flags = BOL;
2306       if (pfile->state.parsing_args == 2)
2307         result->flags |= PREV_WHITE;
2308     }
2309   buffer = pfile->buffer;
2310  update_tokens_line:
2311   result->src_loc = pfile->line_table->highest_line;
2312
2313  skipped_white:
2314   if (buffer->cur >= buffer->notes[buffer->cur_note].pos
2315       && !pfile->overlaid_buffer)
2316     {
2317       _cpp_process_line_notes (pfile, false);
2318       result->src_loc = pfile->line_table->highest_line;
2319     }
2320   c = *buffer->cur++;
2321
2322   if (pfile->forced_token_location_p)
2323     result->src_loc = *pfile->forced_token_location_p;
2324   else
2325     result->src_loc = linemap_position_for_column (pfile->line_table,
2326                                           CPP_BUF_COLUMN (buffer, buffer->cur));
2327
2328   switch (c)
2329     {
2330     case ' ': case '\t': case '\f': case '\v': case '\0':
2331       result->flags |= PREV_WHITE;
2332       skip_whitespace (pfile, c);
2333       goto skipped_white;
2334
2335     case '\n':
2336       if (buffer->cur < buffer->rlimit)
2337         CPP_INCREMENT_LINE (pfile, 0);
2338       buffer->need_line = true;
2339       goto fresh_line;
2340
2341     case '0': case '1': case '2': case '3': case '4':
2342     case '5': case '6': case '7': case '8': case '9':
2343       {
2344         struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2345         result->type = CPP_NUMBER;
2346         lex_number (pfile, &result->val.str, &nst);
2347         warn_about_normalization (pfile, result, &nst);
2348         break;
2349       }
2350
2351     case 'L':
2352     case 'u':
2353     case 'U':
2354     case 'R':
2355       /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
2356          wide strings or raw strings.  */
2357       if (c == 'L' || CPP_OPTION (pfile, rliterals)
2358           || (c != 'R' && CPP_OPTION (pfile, uliterals)))
2359         {
2360           if ((*buffer->cur == '\'' && c != 'R')
2361               || *buffer->cur == '"'
2362               || (*buffer->cur == 'R'
2363                   && c != 'R'
2364                   && buffer->cur[1] == '"'
2365                   && CPP_OPTION (pfile, rliterals))
2366               || (*buffer->cur == '8'
2367                   && c == 'u'
2368                   && (buffer->cur[1] == '"'
2369                       || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'
2370                           && CPP_OPTION (pfile, rliterals)))))
2371             {
2372               lex_string (pfile, result, buffer->cur - 1);
2373               break;
2374             }
2375         }
2376       /* Fall through.  */
2377
2378     case '_':
2379     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2380     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2381     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2382     case 's': case 't':           case 'v': case 'w': case 'x':
2383     case 'y': case 'z':
2384     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2385     case 'G': case 'H': case 'I': case 'J': case 'K':
2386     case 'M': case 'N': case 'O': case 'P': case 'Q':
2387     case 'S': case 'T':           case 'V': case 'W': case 'X':
2388     case 'Y': case 'Z':
2389       result->type = CPP_NAME;
2390       {
2391         struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2392         result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
2393                                                 &nst,
2394                                                 &result->val.node.spelling);
2395         warn_about_normalization (pfile, result, &nst);
2396       }
2397
2398       /* Convert named operators to their proper types.  */
2399       if (result->val.node.node->flags & NODE_OPERATOR)
2400         {
2401           result->flags |= NAMED_OP;
2402           result->type = (enum cpp_ttype) result->val.node.node->directive_index;
2403         }
2404       break;
2405
2406     case '\'':
2407     case '"':
2408       lex_string (pfile, result, buffer->cur - 1);
2409       break;
2410
2411     case '/':
2412       /* A potential block or line comment.  */
2413       comment_start = buffer->cur;
2414       c = *buffer->cur;
2415
2416       if (c == '*')
2417         {
2418           if (_cpp_skip_block_comment (pfile))
2419             cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
2420         }
2421       else if (c == '/' && ! CPP_OPTION (pfile, traditional))
2422         {
2423           /* Don't warn for system headers.  */
2424           if (cpp_in_system_header (pfile))
2425             ;
2426           /* Warn about comments if pedantically GNUC89, and not
2427              in system headers.  */
2428           else if (CPP_OPTION (pfile, lang) == CLK_GNUC89
2429                    && CPP_PEDANTIC (pfile)
2430                    && ! buffer->warned_cplusplus_comments)
2431             {
2432               cpp_error (pfile, CPP_DL_PEDWARN,
2433                          "C++ style comments are not allowed in ISO C90");
2434               cpp_error (pfile, CPP_DL_PEDWARN,
2435                          "(this will be reported only once per input file)");
2436               buffer->warned_cplusplus_comments = 1;
2437             }
2438           /* Or if specifically desired via -Wc90-c99-compat.  */
2439           else if (CPP_OPTION (pfile, cpp_warn_c90_c99_compat) > 0
2440                    && ! CPP_OPTION (pfile, cplusplus)
2441                    && ! buffer->warned_cplusplus_comments)
2442             {
2443               cpp_error (pfile, CPP_DL_WARNING,
2444                          "C++ style comments are incompatible with C90");
2445               cpp_error (pfile, CPP_DL_WARNING,
2446                          "(this will be reported only once per input file)");
2447               buffer->warned_cplusplus_comments = 1;
2448             }
2449           /* In C89/C94, C++ style comments are forbidden.  */
2450           else if ((CPP_OPTION (pfile, lang) == CLK_STDC89
2451                     || CPP_OPTION (pfile, lang) == CLK_STDC94))
2452             {
2453               /* But don't be confused about valid code such as
2454                  - // immediately followed by *,
2455                  - // in a preprocessing directive,
2456                  - // in an #if 0 block.  */
2457               if (buffer->cur[1] == '*'
2458                   || pfile->state.in_directive
2459                   || pfile->state.skipping)
2460                 {
2461                   result->type = CPP_DIV;
2462                   break;
2463                 }
2464               else if (! buffer->warned_cplusplus_comments)
2465                 {
2466                   cpp_error (pfile, CPP_DL_ERROR,
2467                              "C++ style comments are not allowed in ISO C90");
2468                   cpp_error (pfile, CPP_DL_ERROR,
2469                              "(this will be reported only once per input "
2470                              "file)");
2471                   buffer->warned_cplusplus_comments = 1;
2472                 }
2473             }
2474           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
2475             cpp_warning (pfile, CPP_W_COMMENTS, "multi-line comment");
2476         }
2477       else if (c == '=')
2478         {
2479           buffer->cur++;
2480           result->type = CPP_DIV_EQ;
2481           break;
2482         }
2483       else
2484         {
2485           result->type = CPP_DIV;
2486           break;
2487         }
2488
2489       if (!pfile->state.save_comments)
2490         {
2491           result->flags |= PREV_WHITE;
2492           goto update_tokens_line;
2493         }
2494
2495       /* Save the comment as a token in its own right.  */
2496       save_comment (pfile, result, comment_start, c);
2497       break;
2498
2499     case '<':
2500       if (pfile->state.angled_headers)
2501         {
2502           lex_string (pfile, result, buffer->cur - 1);
2503           if (result->type != CPP_LESS)
2504             break;
2505         }
2506
2507       result->type = CPP_LESS;
2508       if (*buffer->cur == '=')
2509         buffer->cur++, result->type = CPP_LESS_EQ;
2510       else if (*buffer->cur == '<')
2511         {
2512           buffer->cur++;
2513           IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
2514         }
2515       else if (CPP_OPTION (pfile, digraphs))
2516         {
2517           if (*buffer->cur == ':')
2518             {
2519               /* C++11 [2.5/3 lex.pptoken], "Otherwise, if the next
2520                  three characters are <:: and the subsequent character
2521                  is neither : nor >, the < is treated as a preprocessor
2522                  token by itself".  */
2523               if (CPP_OPTION (pfile, cplusplus)
2524                   && CPP_OPTION (pfile, lang) != CLK_CXX98
2525                   && CPP_OPTION (pfile, lang) != CLK_GNUCXX
2526                   && buffer->cur[1] == ':'
2527                   && buffer->cur[2] != ':' && buffer->cur[2] != '>')
2528                 break;
2529
2530               buffer->cur++;
2531               result->flags |= DIGRAPH;
2532               result->type = CPP_OPEN_SQUARE;
2533             }
2534           else if (*buffer->cur == '%')
2535             {
2536               buffer->cur++;
2537               result->flags |= DIGRAPH;
2538               result->type = CPP_OPEN_BRACE;
2539             }
2540         }
2541       break;
2542
2543     case '>':
2544       result->type = CPP_GREATER;
2545       if (*buffer->cur == '=')
2546         buffer->cur++, result->type = CPP_GREATER_EQ;
2547       else if (*buffer->cur == '>')
2548         {
2549           buffer->cur++;
2550           IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
2551         }
2552       break;
2553
2554     case '%':
2555       result->type = CPP_MOD;
2556       if (*buffer->cur == '=')
2557         buffer->cur++, result->type = CPP_MOD_EQ;
2558       else if (CPP_OPTION (pfile, digraphs))
2559         {
2560           if (*buffer->cur == ':')
2561             {
2562               buffer->cur++;
2563               result->flags |= DIGRAPH;
2564               result->type = CPP_HASH;
2565               if (*buffer->cur == '%' && buffer->cur[1] == ':')
2566                 buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0;
2567             }
2568           else if (*buffer->cur == '>')
2569             {
2570               buffer->cur++;
2571               result->flags |= DIGRAPH;
2572               result->type = CPP_CLOSE_BRACE;
2573             }
2574         }
2575       break;
2576
2577     case '.':
2578       result->type = CPP_DOT;
2579       if (ISDIGIT (*buffer->cur))
2580         {
2581           struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2582           result->type = CPP_NUMBER;
2583           lex_number (pfile, &result->val.str, &nst);
2584           warn_about_normalization (pfile, result, &nst);
2585         }
2586       else if (*buffer->cur == '.' && buffer->cur[1] == '.')
2587         buffer->cur += 2, result->type = CPP_ELLIPSIS;
2588       else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
2589         buffer->cur++, result->type = CPP_DOT_STAR;
2590       break;
2591
2592     case '+':
2593       result->type = CPP_PLUS;
2594       if (*buffer->cur == '+')
2595         buffer->cur++, result->type = CPP_PLUS_PLUS;
2596       else if (*buffer->cur == '=')
2597         buffer->cur++, result->type = CPP_PLUS_EQ;
2598       break;
2599
2600     case '-':
2601       result->type = CPP_MINUS;
2602       if (*buffer->cur == '>')
2603         {
2604           buffer->cur++;
2605           result->type = CPP_DEREF;
2606           if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
2607             buffer->cur++, result->type = CPP_DEREF_STAR;
2608         }
2609       else if (*buffer->cur == '-')
2610         buffer->cur++, result->type = CPP_MINUS_MINUS;
2611       else if (*buffer->cur == '=')
2612         buffer->cur++, result->type = CPP_MINUS_EQ;
2613       break;
2614
2615     case '&':
2616       result->type = CPP_AND;
2617       if (*buffer->cur == '&')
2618         buffer->cur++, result->type = CPP_AND_AND;
2619       else if (*buffer->cur == '=')
2620         buffer->cur++, result->type = CPP_AND_EQ;
2621       break;
2622
2623     case '|':
2624       result->type = CPP_OR;
2625       if (*buffer->cur == '|')
2626         buffer->cur++, result->type = CPP_OR_OR;
2627       else if (*buffer->cur == '=')
2628         buffer->cur++, result->type = CPP_OR_EQ;
2629       break;
2630
2631     case ':':
2632       result->type = CPP_COLON;
2633       if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
2634         buffer->cur++, result->type = CPP_SCOPE;
2635       else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
2636         {
2637           buffer->cur++;
2638           result->flags |= DIGRAPH;
2639           result->type = CPP_CLOSE_SQUARE;
2640         }
2641       break;
2642
2643     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
2644     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
2645     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
2646     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
2647     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.token_no = 0; break;
2648
2649     case '?': result->type = CPP_QUERY; break;
2650     case '~': result->type = CPP_COMPL; break;
2651     case ',': result->type = CPP_COMMA; break;
2652     case '(': result->type = CPP_OPEN_PAREN; break;
2653     case ')': result->type = CPP_CLOSE_PAREN; break;
2654     case '[': result->type = CPP_OPEN_SQUARE; break;
2655     case ']': result->type = CPP_CLOSE_SQUARE; break;
2656     case '{': result->type = CPP_OPEN_BRACE; break;
2657     case '}': result->type = CPP_CLOSE_BRACE; break;
2658     case ';': result->type = CPP_SEMICOLON; break;
2659
2660       /* @ is a punctuator in Objective-C.  */
2661     case '@': result->type = CPP_ATSIGN; break;
2662
2663     case '$':
2664     case '\\':
2665       {
2666         const uchar *base = --buffer->cur;
2667         struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2668
2669         if (forms_identifier_p (pfile, true, &nst))
2670           {
2671             result->type = CPP_NAME;
2672             result->val.node.node = lex_identifier (pfile, base, true, &nst,
2673                                                     &result->val.node.spelling);
2674             warn_about_normalization (pfile, result, &nst);
2675             break;
2676           }
2677         buffer->cur++;
2678       }
2679
2680     default:
2681       create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
2682       break;
2683     }
2684
2685   return result;
2686 }
2687
2688 /* An upper bound on the number of bytes needed to spell TOKEN.
2689    Does not include preceding whitespace.  */
2690 unsigned int
2691 cpp_token_len (const cpp_token *token)
2692 {
2693   unsigned int len;
2694
2695   switch (TOKEN_SPELL (token))
2696     {
2697     default:            len = 6;                                break;
2698     case SPELL_LITERAL: len = token->val.str.len;               break;
2699     case SPELL_IDENT:   len = NODE_LEN (token->val.node.node) * 10;     break;
2700     }
2701
2702   return len;
2703 }
2704
2705 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
2706    Return the number of bytes read out of NAME.  (There are always
2707    10 bytes written to BUFFER.)  */
2708
2709 static size_t
2710 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
2711 {
2712   int j;
2713   int ucn_len = 0;
2714   int ucn_len_c;
2715   unsigned t;
2716   unsigned long utf32;
2717
2718   /* Compute the length of the UTF-8 sequence.  */
2719   for (t = *name; t & 0x80; t <<= 1)
2720     ucn_len++;
2721
2722   utf32 = *name & (0x7F >> ucn_len);
2723   for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
2724     {
2725       utf32 = (utf32 << 6) | (*++name & 0x3F);
2726
2727       /* Ill-formed UTF-8.  */
2728       if ((*name & ~0x3F) != 0x80)
2729         abort ();
2730     }
2731
2732   *buffer++ = '\\';
2733   *buffer++ = 'U';
2734   for (j = 7; j >= 0; j--)
2735     *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
2736   return ucn_len;
2737 }
2738
2739 /* Given a token TYPE corresponding to a digraph, return a pointer to
2740    the spelling of the digraph.  */
2741 static const unsigned char *
2742 cpp_digraph2name (enum cpp_ttype type)
2743 {
2744   return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
2745 }
2746
2747 /* Write the spelling of an identifier IDENT, using UCNs, to BUFFER.
2748    The buffer must already contain the enough space to hold the
2749    token's spelling.  Returns a pointer to the character after the
2750    last character written.  */
2751 unsigned char *
2752 _cpp_spell_ident_ucns (unsigned char *buffer, cpp_hashnode *ident)
2753 {
2754   size_t i;
2755   const unsigned char *name = NODE_NAME (ident);
2756
2757   for (i = 0; i < NODE_LEN (ident); i++)
2758     if (name[i] & ~0x7F)
2759       {
2760         i += utf8_to_ucn (buffer, name + i) - 1;
2761         buffer += 10;
2762       }
2763     else
2764       *buffer++ = name[i];
2765
2766   return buffer;
2767 }
2768
2769 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
2770    already contain the enough space to hold the token's spelling.
2771    Returns a pointer to the character after the last character written.
2772    FORSTRING is true if this is to be the spelling after translation
2773    phase 1 (with the original spelling of extended identifiers), false
2774    if extended identifiers should always be written using UCNs (there is
2775    no option for always writing them in the internal UTF-8 form).
2776    FIXME: Would be nice if we didn't need the PFILE argument.  */
2777 unsigned char *
2778 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
2779                  unsigned char *buffer, bool forstring)
2780 {
2781   switch (TOKEN_SPELL (token))
2782     {
2783     case SPELL_OPERATOR:
2784       {
2785         const unsigned char *spelling;
2786         unsigned char c;
2787
2788         if (token->flags & DIGRAPH)
2789           spelling = cpp_digraph2name (token->type);
2790         else if (token->flags & NAMED_OP)
2791           goto spell_ident;
2792         else
2793           spelling = TOKEN_NAME (token);
2794
2795         while ((c = *spelling++) != '\0')
2796           *buffer++ = c;
2797       }
2798       break;
2799
2800     spell_ident:
2801     case SPELL_IDENT:
2802       if (forstring)
2803         {
2804           memcpy (buffer, NODE_NAME (token->val.node.spelling),
2805                   NODE_LEN (token->val.node.spelling));
2806           buffer += NODE_LEN (token->val.node.spelling);
2807         }
2808       else
2809         buffer = _cpp_spell_ident_ucns (buffer, token->val.node.node);
2810       break;
2811
2812     case SPELL_LITERAL:
2813       memcpy (buffer, token->val.str.text, token->val.str.len);
2814       buffer += token->val.str.len;
2815       break;
2816
2817     case SPELL_NONE:
2818       cpp_error (pfile, CPP_DL_ICE,
2819                  "unspellable token %s", TOKEN_NAME (token));
2820       break;
2821     }
2822
2823   return buffer;
2824 }
2825
2826 /* Returns TOKEN spelt as a null-terminated string.  The string is
2827    freed when the reader is destroyed.  Useful for diagnostics.  */
2828 unsigned char *
2829 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
2830 {
2831   unsigned int len = cpp_token_len (token) + 1;
2832   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
2833
2834   end = cpp_spell_token (pfile, token, start, false);
2835   end[0] = '\0';
2836
2837   return start;
2838 }
2839
2840 /* Returns a pointer to a string which spells the token defined by
2841    TYPE and FLAGS.  Used by C front ends, which really should move to
2842    using cpp_token_as_text.  */
2843 const char *
2844 cpp_type2name (enum cpp_ttype type, unsigned char flags)
2845 {
2846   if (flags & DIGRAPH)
2847     return (const char *) cpp_digraph2name (type);
2848   else if (flags & NAMED_OP)
2849     return cpp_named_operator2name (type);
2850
2851   return (const char *) token_spellings[type].name;
2852 }
2853
2854 /* Writes the spelling of token to FP, without any preceding space.
2855    Separated from cpp_spell_token for efficiency - to avoid stdio
2856    double-buffering.  */
2857 void
2858 cpp_output_token (const cpp_token *token, FILE *fp)
2859 {
2860   switch (TOKEN_SPELL (token))
2861     {
2862     case SPELL_OPERATOR:
2863       {
2864         const unsigned char *spelling;
2865         int c;
2866
2867         if (token->flags & DIGRAPH)
2868           spelling = cpp_digraph2name (token->type);
2869         else if (token->flags & NAMED_OP)
2870           goto spell_ident;
2871         else
2872           spelling = TOKEN_NAME (token);
2873
2874         c = *spelling;
2875         do
2876           putc (c, fp);
2877         while ((c = *++spelling) != '\0');
2878       }
2879       break;
2880
2881     spell_ident:
2882     case SPELL_IDENT:
2883       {
2884         size_t i;
2885         const unsigned char * name = NODE_NAME (token->val.node.node);
2886
2887         for (i = 0; i < NODE_LEN (token->val.node.node); i++)
2888           if (name[i] & ~0x7F)
2889             {
2890               unsigned char buffer[10];
2891               i += utf8_to_ucn (buffer, name + i) - 1;
2892               fwrite (buffer, 1, 10, fp);
2893             }
2894           else
2895             fputc (NODE_NAME (token->val.node.node)[i], fp);
2896       }
2897       break;
2898
2899     case SPELL_LITERAL:
2900       fwrite (token->val.str.text, 1, token->val.str.len, fp);
2901       break;
2902
2903     case SPELL_NONE:
2904       /* An error, most probably.  */
2905       break;
2906     }
2907 }
2908
2909 /* Compare two tokens.  */
2910 int
2911 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
2912 {
2913   if (a->type == b->type && a->flags == b->flags)
2914     switch (TOKEN_SPELL (a))
2915       {
2916       default:                  /* Keep compiler happy.  */
2917       case SPELL_OPERATOR:
2918         /* token_no is used to track where multiple consecutive ##
2919            tokens were originally located.  */
2920         return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
2921       case SPELL_NONE:
2922         return (a->type != CPP_MACRO_ARG
2923                 || (a->val.macro_arg.arg_no == b->val.macro_arg.arg_no
2924                     && a->val.macro_arg.spelling == b->val.macro_arg.spelling));
2925       case SPELL_IDENT:
2926         return (a->val.node.node == b->val.node.node
2927                 && a->val.node.spelling == b->val.node.spelling);
2928       case SPELL_LITERAL:
2929         return (a->val.str.len == b->val.str.len
2930                 && !memcmp (a->val.str.text, b->val.str.text,
2931                             a->val.str.len));
2932       }
2933
2934   return 0;
2935 }
2936
2937 /* Returns nonzero if a space should be inserted to avoid an
2938    accidental token paste for output.  For simplicity, it is
2939    conservative, and occasionally advises a space where one is not
2940    needed, e.g. "." and ".2".  */
2941 int
2942 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
2943                  const cpp_token *token2)
2944 {
2945   enum cpp_ttype a = token1->type, b = token2->type;
2946   cppchar_t c;
2947
2948   if (token1->flags & NAMED_OP)
2949     a = CPP_NAME;
2950   if (token2->flags & NAMED_OP)
2951     b = CPP_NAME;
2952
2953   c = EOF;
2954   if (token2->flags & DIGRAPH)
2955     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
2956   else if (token_spellings[b].category == SPELL_OPERATOR)
2957     c = token_spellings[b].name[0];
2958
2959   /* Quickly get everything that can paste with an '='.  */
2960   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
2961     return 1;
2962
2963   switch (a)
2964     {
2965     case CPP_GREATER:   return c == '>';
2966     case CPP_LESS:      return c == '<' || c == '%' || c == ':';
2967     case CPP_PLUS:      return c == '+';
2968     case CPP_MINUS:     return c == '-' || c == '>';
2969     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
2970     case CPP_MOD:       return c == ':' || c == '>';
2971     case CPP_AND:       return c == '&';
2972     case CPP_OR:        return c == '|';
2973     case CPP_COLON:     return c == ':' || c == '>';
2974     case CPP_DEREF:     return c == '*';
2975     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
2976     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
2977     case CPP_NAME:      return ((b == CPP_NUMBER
2978                                  && name_p (pfile, &token2->val.str))
2979                                 || b == CPP_NAME
2980                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
2981     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
2982                                 || c == '.' || c == '+' || c == '-');
2983                                       /* UCNs */
2984     case CPP_OTHER:     return ((token1->val.str.text[0] == '\\'
2985                                  && b == CPP_NAME)
2986                                 || (CPP_OPTION (pfile, objc)
2987                                     && token1->val.str.text[0] == '@'
2988                                     && (b == CPP_NAME || b == CPP_STRING)));
2989     case CPP_STRING:
2990     case CPP_WSTRING:
2991     case CPP_UTF8STRING:
2992     case CPP_STRING16:
2993     case CPP_STRING32:  return (CPP_OPTION (pfile, user_literals)
2994                                 && (b == CPP_NAME
2995                                     || (TOKEN_SPELL (token2) == SPELL_LITERAL
2996                                         && ISIDST (token2->val.str.text[0]))));
2997
2998     default:            break;
2999     }
3000
3001   return 0;
3002 }
3003
3004 /* Output all the remaining tokens on the current line, and a newline
3005    character, to FP.  Leading whitespace is removed.  If there are
3006    macros, special token padding is not performed.  */
3007 void
3008 cpp_output_line (cpp_reader *pfile, FILE *fp)
3009 {
3010   const cpp_token *token;
3011
3012   token = cpp_get_token (pfile);
3013   while (token->type != CPP_EOF)
3014     {
3015       cpp_output_token (token, fp);
3016       token = cpp_get_token (pfile);
3017       if (token->flags & PREV_WHITE)
3018         putc (' ', fp);
3019     }
3020
3021   putc ('\n', fp);
3022 }
3023
3024 /* Return a string representation of all the remaining tokens on the
3025    current line.  The result is allocated using xmalloc and must be
3026    freed by the caller.  */
3027 unsigned char *
3028 cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
3029 {
3030   const cpp_token *token;
3031   unsigned int out = dir_name ? ustrlen (dir_name) : 0;
3032   unsigned int alloced = 120 + out;
3033   unsigned char *result = (unsigned char *) xmalloc (alloced);
3034
3035   /* If DIR_NAME is empty, there are no initial contents.  */
3036   if (dir_name)
3037     {
3038       sprintf ((char *) result, "#%s ", dir_name);
3039       out += 2;
3040     }
3041
3042   token = cpp_get_token (pfile);
3043   while (token->type != CPP_EOF)
3044     {
3045       unsigned char *last;
3046       /* Include room for a possible space and the terminating nul.  */
3047       unsigned int len = cpp_token_len (token) + 2;
3048
3049       if (out + len > alloced)
3050         {
3051           alloced *= 2;
3052           if (out + len > alloced)
3053             alloced = out + len;
3054           result = (unsigned char *) xrealloc (result, alloced);
3055         }
3056
3057       last = cpp_spell_token (pfile, token, &result[out], 0);
3058       out = last - result;
3059
3060       token = cpp_get_token (pfile);
3061       if (token->flags & PREV_WHITE)
3062         result[out++] = ' ';
3063     }
3064
3065   result[out] = '\0';
3066   return result;
3067 }
3068
3069 /* Memory buffers.  Changing these three constants can have a dramatic
3070    effect on performance.  The values here are reasonable defaults,
3071    but might be tuned.  If you adjust them, be sure to test across a
3072    range of uses of cpplib, including heavy nested function-like macro
3073    expansion.  Also check the change in peak memory usage (NJAMD is a
3074    good tool for this).  */
3075 #define MIN_BUFF_SIZE 8000
3076 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
3077 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
3078         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
3079
3080 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
3081   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
3082 #endif
3083
3084 /* Create a new allocation buffer.  Place the control block at the end
3085    of the buffer, so that buffer overflows will cause immediate chaos.  */
3086 static _cpp_buff *
3087 new_buff (size_t len)
3088 {
3089   _cpp_buff *result;
3090   unsigned char *base;
3091
3092   if (len < MIN_BUFF_SIZE)
3093     len = MIN_BUFF_SIZE;
3094   len = CPP_ALIGN (len);
3095
3096 #ifdef ENABLE_VALGRIND_CHECKING
3097   /* Valgrind warns about uses of interior pointers, so put _cpp_buff
3098      struct first.  */
3099   size_t slen = CPP_ALIGN2 (sizeof (_cpp_buff), 2 * DEFAULT_ALIGNMENT);
3100   base = XNEWVEC (unsigned char, len + slen);
3101   result = (_cpp_buff *) base;
3102   base += slen;
3103 #else
3104   base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
3105   result = (_cpp_buff *) (base + len);
3106 #endif
3107   result->base = base;
3108   result->cur = base;
3109   result->limit = base + len;
3110   result->next = NULL;
3111   return result;
3112 }
3113
3114 /* Place a chain of unwanted allocation buffers on the free list.  */
3115 void
3116 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
3117 {
3118   _cpp_buff *end = buff;
3119
3120   while (end->next)
3121     end = end->next;
3122   end->next = pfile->free_buffs;
3123   pfile->free_buffs = buff;
3124 }
3125
3126 /* Return a free buffer of size at least MIN_SIZE.  */
3127 _cpp_buff *
3128 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
3129 {
3130   _cpp_buff *result, **p;
3131
3132   for (p = &pfile->free_buffs;; p = &(*p)->next)
3133     {
3134       size_t size;
3135
3136       if (*p == NULL)
3137         return new_buff (min_size);
3138       result = *p;
3139       size = result->limit - result->base;
3140       /* Return a buffer that's big enough, but don't waste one that's
3141          way too big.  */
3142       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
3143         break;
3144     }
3145
3146   *p = result->next;
3147   result->next = NULL;
3148   result->cur = result->base;
3149   return result;
3150 }
3151
3152 /* Creates a new buffer with enough space to hold the uncommitted
3153    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
3154    the excess bytes to the new buffer.  Chains the new buffer after
3155    BUFF, and returns the new buffer.  */
3156 _cpp_buff *
3157 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
3158 {
3159   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
3160   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
3161
3162   buff->next = new_buff;
3163   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
3164   return new_buff;
3165 }
3166
3167 /* Creates a new buffer with enough space to hold the uncommitted
3168    remaining bytes of the buffer pointed to by BUFF, and at least
3169    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
3170    Chains the new buffer before the buffer pointed to by BUFF, and
3171    updates the pointer to point to the new buffer.  */
3172 void
3173 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
3174 {
3175   _cpp_buff *new_buff, *old_buff = *pbuff;
3176   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
3177
3178   new_buff = _cpp_get_buff (pfile, size);
3179   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
3180   new_buff->next = old_buff;
3181   *pbuff = new_buff;
3182 }
3183
3184 /* Free a chain of buffers starting at BUFF.  */
3185 void
3186 _cpp_free_buff (_cpp_buff *buff)
3187 {
3188   _cpp_buff *next;
3189
3190   for (; buff; buff = next)
3191     {
3192       next = buff->next;
3193 #ifdef ENABLE_VALGRIND_CHECKING
3194       free (buff);
3195 #else
3196       free (buff->base);
3197 #endif
3198     }
3199 }
3200
3201 /* Allocate permanent, unaligned storage of length LEN.  */
3202 unsigned char *
3203 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
3204 {
3205   _cpp_buff *buff = pfile->u_buff;
3206   unsigned char *result = buff->cur;
3207
3208   if (len > (size_t) (buff->limit - result))
3209     {
3210       buff = _cpp_get_buff (pfile, len);
3211       buff->next = pfile->u_buff;
3212       pfile->u_buff = buff;
3213       result = buff->cur;
3214     }
3215
3216   buff->cur = result + len;
3217   return result;
3218 }
3219
3220 /* Allocate permanent, unaligned storage of length LEN from a_buff.
3221    That buffer is used for growing allocations when saving macro
3222    replacement lists in a #define, and when parsing an answer to an
3223    assertion in #assert, #unassert or #if (and therefore possibly
3224    whilst expanding macros).  It therefore must not be used by any
3225    code that they might call: specifically the lexer and the guts of
3226    the macro expander.
3227
3228    All existing other uses clearly fit this restriction: storing
3229    registered pragmas during initialization.  */
3230 unsigned char *
3231 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
3232 {
3233   _cpp_buff *buff = pfile->a_buff;
3234   unsigned char *result = buff->cur;
3235
3236   if (len > (size_t) (buff->limit - result))
3237     {
3238       buff = _cpp_get_buff (pfile, len);
3239       buff->next = pfile->a_buff;
3240       pfile->a_buff = buff;
3241       result = buff->cur;
3242     }
3243
3244   buff->cur = result + len;
3245   return result;
3246 }
3247
3248 /* Say which field of TOK is in use.  */
3249
3250 enum cpp_token_fld_kind
3251 cpp_token_val_index (const cpp_token *tok)
3252 {
3253   switch (TOKEN_SPELL (tok))
3254     {
3255     case SPELL_IDENT:
3256       return CPP_TOKEN_FLD_NODE;
3257     case SPELL_LITERAL:
3258       return CPP_TOKEN_FLD_STR;
3259     case SPELL_OPERATOR:
3260       if (tok->type == CPP_PASTE)
3261         return CPP_TOKEN_FLD_TOKEN_NO;
3262       else
3263         return CPP_TOKEN_FLD_NONE;
3264     case SPELL_NONE:
3265       if (tok->type == CPP_MACRO_ARG)
3266         return CPP_TOKEN_FLD_ARG_NO;
3267       else if (tok->type == CPP_PADDING)
3268         return CPP_TOKEN_FLD_SOURCE;
3269       else if (tok->type == CPP_PRAGMA)
3270         return CPP_TOKEN_FLD_PRAGMA;
3271       /* else fall through */
3272     default:
3273       return CPP_TOKEN_FLD_NONE;
3274     }
3275 }
3276
3277 /* All tokens lexed in R after calling this function will be forced to have
3278    their source_location the same as the location referenced by P, until
3279    cpp_stop_forcing_token_locations is called for R.  */
3280
3281 void
3282 cpp_force_token_locations (cpp_reader *r, source_location *p)
3283 {
3284   r->forced_token_location_p = p;
3285 }
3286
3287 /* Go back to assigning locations naturally for lexed tokens.  */
3288
3289 void
3290 cpp_stop_forcing_token_locations (cpp_reader *r)
3291 {
3292   r->forced_token_location_p = NULL;
3293 }