libcpp/lex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000-2014 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7
   8 This program is free software; you can redistribute it and/or modify it
   9 under the terms of the GNU General Public License as published by the
  10 Free Software Foundation; either version 3, or (at your option) any
  11 later version.
  12
  13 This program is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with this program; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "cpplib.h"
  25 #include "internal.h"
  26
  27 enum spell_type
  28 {
  29   SPELL_OPERATOR = 0,
  30   SPELL_IDENT,
  31   SPELL_LITERAL,
  32   SPELL_NONE
  33 };
  34
  35 struct token_spelling
  36 {
  37   enum spell_type category;
  38   const unsigned char *name;
  39 };
  40
  41 static const unsigned char *const digraph_spellings[] =
  42 { UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
  43
  44 #define OP(e, s) { SPELL_OPERATOR, UC s  },
  45 #define TK(e, s) { SPELL_ ## s,    UC #e },
  46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  47 #undef OP
  48 #undef TK
  49
  50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  52
  53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
  54 static int skip_line_comment (cpp_reader *);
  55 static void skip_whitespace (cpp_reader *, cppchar_t);
  56 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
  57 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
  58 static void store_comment (cpp_reader *, cpp_token *);
  59 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
  60                             unsigned int, enum cpp_ttype);
  61 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
  62 static int name_p (cpp_reader *, const cpp_string *);
  63 static tokenrun *next_tokenrun (tokenrun *);
  64
  65 static _cpp_buff *new_buff (size_t);
  66
  67
  68 /* Utility routine:
  69
  70    Compares, the token TOKEN to the NUL-terminated string STRING.
  71    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  72 int
  73 cpp_ideq (const cpp_token *token, const char *string)
  74 {
  75   if (token->type != CPP_NAME)
  76     return 0;
  77
  78   return !ustrcmp (NODE_NAME (token->val.node.node), (const uchar *) string);
  79 }
  80
  81 /* Record a note TYPE at byte POS into the current cleaned logical
  82    line.  */
  83 static void
  84 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
  85 {
  86   if (buffer->notes_used == buffer->notes_cap)
  87     {
  88       buffer->notes_cap = buffer->notes_cap * 2 + 200;
  89       buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
  90                                   buffer->notes_cap);
  91     }
  92
  93   buffer->notes[buffer->notes_used].pos = pos;
  94   buffer->notes[buffer->notes_used].type = type;
  95   buffer->notes_used++;
  96 }
  97
  98 \f
  99 /* Fast path to find line special characters using optimized character
 100    scanning algorithms.  Anything complicated falls back to the slow
 101    path below.  Since this loop is very hot it's worth doing these kinds
 102    of optimizations.
 103
 104    One of the paths through the ifdefs should provide
 105
 106      const uchar *search_line_fast (const uchar *s, const uchar *end);
 107
 108    Between S and END, search for \n, \r, \\, ?.  Return a pointer to
 109    the found character.
 110
 111    Note that the last character of the buffer is *always* a newline,
 112    as forced by _cpp_convert_input.  This fact can be used to avoid
 113    explicitly looking for the end of the buffer.  */
 114
 115 /* Configure gives us an ifdef test.  */
 116 #ifndef WORDS_BIGENDIAN
 117 #define WORDS_BIGENDIAN 0
 118 #endif
 119
 120 /* We'd like the largest integer that fits into a register.  There's nothing
 121    in <stdint.h> that gives us that.  For most hosts this is unsigned long,
 122    but MS decided on an LLP64 model.  Thankfully when building with GCC we
 123    can get the "real" word size.  */
 124 #ifdef __GNUC__
 125 typedef unsigned int word_type __attribute__((__mode__(__word__)));
 126 #else
 127 typedef unsigned long word_type;
 128 #endif
 129
 130 /* The code below is only expecting sizes 4 or 8.
 131    Die at compile-time if this expectation is violated.  */
 132 typedef char check_word_type_size
 133   [(sizeof(word_type) == 8 || sizeof(word_type) == 4) * 2 - 1];
 134
 135 /* Return X with the first N bytes forced to values that won't match one
 136    of the interesting characters.  Note that NUL is not interesting.  */
 137
 138 static inline word_type
 139 acc_char_mask_misalign (word_type val, unsigned int n)
 140 {
 141   word_type mask = -1;
 142   if (WORDS_BIGENDIAN)
 143     mask >>= n * 8;
 144   else
 145     mask <<= n * 8;
 146   return val & mask;
 147 }
 148
 149 /* Return X replicated to all byte positions within WORD_TYPE.  */
 150
 151 static inline word_type
 152 acc_char_replicate (uchar x)
 153 {
 154   word_type ret;
 155
 156   ret = (x << 24) | (x << 16) | (x << 8) | x;
 157   if (sizeof(word_type) == 8)
 158     ret = (ret << 16 << 16) | ret;
 159   return ret;
 160 }
 161
 162 /* Return non-zero if some byte of VAL is (probably) C.  */
 163
 164 static inline word_type
 165 acc_char_cmp (word_type val, word_type c)
 166 {
 167 #if defined(__GNUC__) && defined(__alpha__)
 168   /* We can get exact results using a compare-bytes instruction.
 169      Get (val == c) via (0 >= (val ^ c)).  */
 170   return __builtin_alpha_cmpbge (0, val ^ c);
 171 #else
 172   word_type magic = 0x7efefefeU;
 173   if (sizeof(word_type) == 8)
 174     magic = (magic << 16 << 16) | 0xfefefefeU;
 175   magic |= 1;
 176
 177   val ^= c;
 178   return ((val + magic) ^ ~val) & ~magic;
 179 #endif
 180 }
 181
 182 /* Given the result of acc_char_cmp is non-zero, return the index of
 183    the found character.  If this was a false positive, return -1.  */
 184
 185 static inline int
 186 acc_char_index (word_type cmp ATTRIBUTE_UNUSED,
 187                 word_type val ATTRIBUTE_UNUSED)
 188 {
 189 #if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN
 190   /* The cmpbge instruction sets *bits* of the result corresponding to
 191      matches in the bytes with no false positives.  */
 192   return __builtin_ctzl (cmp);
 193 #else
 194   unsigned int i;
 195
 196   /* ??? It would be nice to force unrolling here,
 197      and have all of these constants folded.  */
 198   for (i = 0; i < sizeof(word_type); ++i)
 199     {
 200       uchar c;
 201       if (WORDS_BIGENDIAN)
 202         c = (val >> (sizeof(word_type) - i - 1) * 8) & 0xff;
 203       else
 204         c = (val >> i * 8) & 0xff;
 205
 206       if (c == '\n' || c == '\r' || c == '\\' || c == '?')
 207         return i;
 208     }
 209
 210   return -1;
 211 #endif
 212 }
 213
 214 /* A version of the fast scanner using bit fiddling techniques.
 215
 216    For 32-bit words, one would normally perform 16 comparisons and
 217    16 branches.  With this algorithm one performs 24 arithmetic
 218    operations and one branch.  Whether this is faster with a 32-bit
 219    word size is going to be somewhat system dependent.
 220
 221    For 64-bit words, we eliminate twice the number of comparisons
 222    and branches without increasing the number of arithmetic operations.
 223    It's almost certainly going to be a win with 64-bit word size.  */
 224
 225 static const uchar * search_line_acc_char (const uchar *, const uchar *)
 226   ATTRIBUTE_UNUSED;
 227
 228 static const uchar *
 229 search_line_acc_char (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
 230 {
 231   const word_type repl_nl = acc_char_replicate ('\n');
 232   const word_type repl_cr = acc_char_replicate ('\r');
 233   const word_type repl_bs = acc_char_replicate ('\\');
 234   const word_type repl_qm = acc_char_replicate ('?');
 235
 236   unsigned int misalign;
 237   const word_type *p;
 238   word_type val, t;
 239
 240   /* Align the buffer.  Mask out any bytes from before the beginning.  */
 241   p = (word_type *)((uintptr_t)s & -sizeof(word_type));
 242   val = *p;
 243   misalign = (uintptr_t)s & (sizeof(word_type) - 1);
 244   if (misalign)
 245     val = acc_char_mask_misalign (val, misalign);
 246
 247   /* Main loop.  */
 248   while (1)
 249     {
 250       t  = acc_char_cmp (val, repl_nl);
 251       t |= acc_char_cmp (val, repl_cr);
 252       t |= acc_char_cmp (val, repl_bs);
 253       t |= acc_char_cmp (val, repl_qm);
 254
 255       if (__builtin_expect (t != 0, 0))
 256         {
 257           int i = acc_char_index (t, val);
 258           if (i >= 0)
 259             return (const uchar *)p + i;
 260         }
 261
 262       val = *++p;
 263     }
 264 }
 265
 266 /* Disable on Solaris 2/x86 until the following problem can be properly
 267    autoconfed:
 268
 269    The Solaris 10+ assembler tags objects with the instruction set
 270    extensions used, so SSE4.2 executables cannot run on machines that
 271    don't support that extension.  */
 272
 273 #if (GCC_VERSION >= 4005) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
 274
 275 /* Replicated character data to be shared between implementations.
 276    Recall that outside of a context with vector support we can't
 277    define compatible vector types, therefore these are all defined
 278    in terms of raw characters.  */
 279 static const char repl_chars[4][16] __attribute__((aligned(16))) = {
 280   { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
 281     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' },
 282   { '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
 283     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' },
 284   { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
 285     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' },
 286   { '?', '?', '?', '?', '?', '?', '?', '?',
 287     '?', '?', '?', '?', '?', '?', '?', '?' },
 288 };
 289
 290 /* A version of the fast scanner using MMX vectorized byte compare insns.
 291
 292    This uses the PMOVMSKB instruction which was introduced with "MMX2",
 293    which was packaged into SSE1; it is also present in the AMD MMX
 294    extension.  Mark the function as using "sse" so that we emit a real
 295    "emms" instruction, rather than the 3dNOW "femms" instruction.  */
 296
 297 static const uchar *
 298 #ifndef __SSE__
 299 __attribute__((__target__("sse")))
 300 #endif
 301 search_line_mmx (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
 302 {
 303   typedef char v8qi __attribute__ ((__vector_size__ (8)));
 304   typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
 305
 306   const v8qi repl_nl = *(const v8qi *)repl_chars[0];
 307   const v8qi repl_cr = *(const v8qi *)repl_chars[1];
 308   const v8qi repl_bs = *(const v8qi *)repl_chars[2];
 309   const v8qi repl_qm = *(const v8qi *)repl_chars[3];
 310
 311   unsigned int misalign, found, mask;
 312   const v8qi *p;
 313   v8qi data, t, c;
 314
 315   /* Align the source pointer.  While MMX doesn't generate unaligned data
 316      faults, this allows us to safely scan to the end of the buffer without
 317      reading beyond the end of the last page.  */
 318   misalign = (uintptr_t)s & 7;
 319   p = (const v8qi *)((uintptr_t)s & -8);
 320   data = *p;
 321
 322   /* Create a mask for the bytes that are valid within the first
 323      16-byte block.  The Idea here is that the AND with the mask
 324      within the loop is "free", since we need some AND or TEST
 325      insn in order to set the flags for the branch anyway.  */
 326   mask = -1u << misalign;
 327
 328   /* Main loop processing 8 bytes at a time.  */
 329   goto start;
 330   do
 331     {
 332       data = *++p;
 333       mask = -1;
 334
 335     start:
 336       t = __builtin_ia32_pcmpeqb(data, repl_nl);
 337       c = __builtin_ia32_pcmpeqb(data, repl_cr);
 338       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
 339       c = __builtin_ia32_pcmpeqb(data, repl_bs);
 340       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
 341       c = __builtin_ia32_pcmpeqb(data, repl_qm);
 342       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
 343       found = __builtin_ia32_pmovmskb (t);
 344       found &= mask;
 345     }
 346   while (!found);
 347
 348   __builtin_ia32_emms ();
 349
 350   /* FOUND contains 1 in bits for which we matched a relevant
 351      character.  Conversion to the byte index is trivial.  */
 352   found = __builtin_ctz(found);
 353   return (const uchar *)p + found;
 354 }
 355
 356 /* A version of the fast scanner using SSE2 vectorized byte compare insns.  */
 357
 358 static const uchar *
 359 #ifndef __SSE2__
 360 __attribute__((__target__("sse2")))
 361 #endif
 362 search_line_sse2 (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
 363 {
 364   typedef char v16qi __attribute__ ((__vector_size__ (16)));
 365
 366   const v16qi repl_nl = *(const v16qi *)repl_chars[0];
 367   const v16qi repl_cr = *(const v16qi *)repl_chars[1];
 368   const v16qi repl_bs = *(const v16qi *)repl_chars[2];
 369   const v16qi repl_qm = *(const v16qi *)repl_chars[3];
 370
 371   unsigned int misalign, found, mask;
 372   const v16qi *p;
 373   v16qi data, t;
 374
 375   /* Align the source pointer.  */
 376   misalign = (uintptr_t)s & 15;
 377   p = (const v16qi *)((uintptr_t)s & -16);
 378   data = *p;
 379
 380   /* Create a mask for the bytes that are valid within the first
 381      16-byte block.  The Idea here is that the AND with the mask
 382      within the loop is "free", since we need some AND or TEST
 383      insn in order to set the flags for the branch anyway.  */
 384   mask = -1u << misalign;
 385
 386   /* Main loop processing 16 bytes at a time.  */
 387   goto start;
 388   do
 389     {
 390       data = *++p;
 391       mask = -1;
 392
 393     start:
 394       t  = __builtin_ia32_pcmpeqb128(data, repl_nl);
 395       t |= __builtin_ia32_pcmpeqb128(data, repl_cr);
 396       t |= __builtin_ia32_pcmpeqb128(data, repl_bs);
 397       t |= __builtin_ia32_pcmpeqb128(data, repl_qm);
 398       found = __builtin_ia32_pmovmskb128 (t);
 399       found &= mask;
 400     }
 401   while (!found);
 402
 403   /* FOUND contains 1 in bits for which we matched a relevant
 404      character.  Conversion to the byte index is trivial.  */
 405   found = __builtin_ctz(found);
 406   return (const uchar *)p + found;
 407 }
 408
 409 #ifdef HAVE_SSE4
 410 /* A version of the fast scanner using SSE 4.2 vectorized string insns.  */
 411
 412 static const uchar *
 413 #ifndef __SSE4_2__
 414 __attribute__((__target__("sse4.2")))
 415 #endif
 416 search_line_sse42 (const uchar *s, const uchar *end)
 417 {
 418   typedef char v16qi __attribute__ ((__vector_size__ (16)));
 419   static const v16qi search = { '\n', '\r', '?', '\\' };
 420
 421   uintptr_t si = (uintptr_t)s;
 422   uintptr_t index;
 423
 424   /* Check for unaligned input.  */
 425   if (si & 15)
 426     {
 427       v16qi sv;
 428
 429       if (__builtin_expect (end - s < 16, 0)
 430           && __builtin_expect ((si & 0xfff) > 0xff0, 0))
 431         {
 432           /* There are less than 16 bytes left in the buffer, and less
 433              than 16 bytes left on the page.  Reading 16 bytes at this
 434              point might generate a spurious page fault.  Defer to the
 435              SSE2 implementation, which already handles alignment.  */
 436           return search_line_sse2 (s, end);
 437         }
 438
 439       /* ??? The builtin doesn't understand that the PCMPESTRI read from
 440          memory need not be aligned.  */
 441       sv = __builtin_ia32_loaddqu ((const char *) s);
 442       index = __builtin_ia32_pcmpestri128 (search, 4, sv, 16, 0);
 443
 444       if (__builtin_expect (index < 16, 0))
 445         goto found;
 446
 447       /* Advance the pointer to an aligned address.  We will re-scan a
 448          few bytes, but we no longer need care for reading past the
 449          end of a page, since we're guaranteed a match.  */
 450       s = (const uchar *)((si + 16) & -16);
 451     }
 452
 453   /* Main loop, processing 16 bytes at a time.  By doing the whole loop
 454      in inline assembly, we can make proper use of the flags set.  */
 455   __asm (      "sub $16, %1\n"
 456         "       .balign 16\n"
 457         "0:     add $16, %1\n"
 458         "       %vpcmpestri $0, (%1), %2\n"
 459         "       jnc 0b"
 460         : "=&c"(index), "+r"(s)
 461         : "x"(search), "a"(4), "d"(16));
 462
 463  found:
 464   return s + index;
 465 }
 466
 467 #else
 468 /* Work around out-dated assemblers without sse4 support.  */
 469 #define search_line_sse42 search_line_sse2
 470 #endif
 471
 472 /* Check the CPU capabilities.  */
 473
 474 #if (GCC_VERSION >= 5000)
 475 #include "../gcc/config/i386/cpuid.h"
 476 #else
 477 #include <cpuid.h>
 478 #endif
 479
 480 typedef const uchar * (*search_line_fast_type) (const uchar *, const uchar *);
 481 static search_line_fast_type search_line_fast;
 482
 483 #define HAVE_init_vectorized_lexer 1
 484 static inline void
 485 init_vectorized_lexer (void)
 486 {
 487   unsigned dummy, ecx = 0, edx = 0;
 488   search_line_fast_type impl = search_line_acc_char;
 489   int minimum = 0;
 490
 491 #if defined(__SSE4_2__)
 492   minimum = 3;
 493 #elif defined(__SSE2__)
 494   minimum = 2;
 495 #elif defined(__SSE__)
 496   minimum = 1;
 497 #endif
 498
 499   if (minimum == 3)
 500     impl = search_line_sse42;
 501   else if (__get_cpuid (1, &dummy, &dummy, &ecx, &edx) || minimum == 2)
 502     {
 503       if (minimum == 3 || (ecx & bit_SSE4_2))
 504         impl = search_line_sse42;
 505       else if (minimum == 2 || (edx & bit_SSE2))
 506         impl = search_line_sse2;
 507       else if (minimum == 1 || (edx & bit_SSE))
 508         impl = search_line_mmx;
 509     }
 510   else if (__get_cpuid (0x80000001, &dummy, &dummy, &dummy, &edx))
 511     {
 512       if (minimum == 1
 513           || (edx & (bit_MMXEXT | bit_CMOV)) == (bit_MMXEXT | bit_CMOV))
 514         impl = search_line_mmx;
 515     }
 516
 517   search_line_fast = impl;
 518 }
 519
 520 #elif defined(_ARCH_PWR8) && defined(__ALTIVEC__)
 521
 522 /* A vection of the fast scanner using AltiVec vectorized byte compares
 523    and VSX unaligned loads (when VSX is available).  This is otherwise
 524    the same as the pre-GCC 5 version.  */
 525
 526 static const uchar *
 527 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
 528 {
 529   typedef __attribute__((altivec(vector))) unsigned char vc;
 530
 531   const vc repl_nl = {
 532     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
 533     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
 534   };
 535   const vc repl_cr = {
 536     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
 537     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
 538   };
 539   const vc repl_bs = {
 540     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
 541     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
 542   };
 543   const vc repl_qm = {
 544     '?', '?', '?', '?', '?', '?', '?', '?',
 545     '?', '?', '?', '?', '?', '?', '?', '?',
 546   };
 547   const vc zero = { 0 };
 548
 549   vc data, t;
 550
 551   /* Main loop processing 16 bytes at a time.  */
 552   do
 553     {
 554       vc m_nl, m_cr, m_bs, m_qm;
 555
 556       data = *((const vc *)s);
 557       s += 16;
 558
 559       m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
 560       m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
 561       m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
 562       m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
 563       t = (m_nl | m_cr) | (m_bs | m_qm);
 564
 565       /* T now contains 0xff in bytes for which we matched one of the relevant
 566          characters.  We want to exit the loop if any byte in T is non-zero.
 567          Below is the expansion of vec_any_ne(t, zero).  */
 568     }
 569   while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
 570
 571   /* Restore s to to point to the 16 bytes we just processed.  */
 572   s -= 16;
 573
 574   {
 575 #define N  (sizeof(vc) / sizeof(long))
 576
 577     union {
 578       vc v;
 579       /* Statically assert that N is 2 or 4.  */
 580       unsigned long l[(N == 2 || N == 4) ? N : -1];
 581     } u;
 582     unsigned long l, i = 0;
 583
 584     u.v = t;
 585
 586     /* Find the first word of T that is non-zero.  */
 587     switch (N)
 588       {
 589       case 4:
 590         l = u.l[i++];
 591         if (l != 0)
 592           break;
 593         s += sizeof(unsigned long);
 594         l = u.l[i++];
 595         if (l != 0)
 596           break;
 597         s += sizeof(unsigned long);
 598       case 2:
 599         l = u.l[i++];
 600         if (l != 0)
 601           break;
 602         s += sizeof(unsigned long);
 603         l = u.l[i];
 604       }
 605
 606     /* L now contains 0xff in bytes for which we matched one of the
 607        relevant characters.  We can find the byte index by finding
 608        its bit index and dividing by 8.  */
 609 #ifdef __BIG_ENDIAN__
 610     l = __builtin_clzl(l) >> 3;
 611 #else
 612     l = __builtin_ctzl(l) >> 3;
 613 #endif
 614     return s + l;
 615
 616 #undef N
 617   }
 618 }
 619
 620 #elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) && defined (__BIG_ENDIAN__)
 621
 622 /* A vection of the fast scanner using AltiVec vectorized byte compares.
 623    This cannot be used for little endian because vec_lvsl/lvsr are
 624    deprecated for little endian and the code won't work properly.  */
 625 /* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
 626    so we can't compile this function without -maltivec on the command line
 627    (or implied by some other switch).  */
 628
 629 static const uchar *
 630 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
 631 {
 632   typedef __attribute__((altivec(vector))) unsigned char vc;
 633
 634   const vc repl_nl = {
 635     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
 636     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
 637   };
 638   const vc repl_cr = {
 639     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
 640     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
 641   };
 642   const vc repl_bs = {
 643     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
 644     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
 645   };
 646   const vc repl_qm = {
 647     '?', '?', '?', '?', '?', '?', '?', '?',
 648     '?', '?', '?', '?', '?', '?', '?', '?',
 649   };
 650   const vc ones = {
 651     -1, -1, -1, -1, -1, -1, -1, -1,
 652     -1, -1, -1, -1, -1, -1, -1, -1,
 653   };
 654   const vc zero = { 0 };
 655
 656   vc data, mask, t;
 657
 658   /* Altivec loads automatically mask addresses with -16.  This lets us
 659      issue the first load as early as possible.  */
 660   data = __builtin_vec_ld(0, (const vc *)s);
 661
 662   /* Discard bytes before the beginning of the buffer.  Do this by
 663      beginning with all ones and shifting in zeros according to the
 664      mis-alignment.  The LVSR instruction pulls the exact shift we
 665      want from the address.  */
 666   mask = __builtin_vec_lvsr(0, s);
 667   mask = __builtin_vec_perm(zero, ones, mask);
 668   data &= mask;
 669
 670   /* While altivec loads mask addresses, we still need to align S so
 671      that the offset we compute at the end is correct.  */
 672   s = (const uchar *)((uintptr_t)s & -16);
 673
 674   /* Main loop processing 16 bytes at a time.  */
 675   goto start;
 676   do
 677     {
 678       vc m_nl, m_cr, m_bs, m_qm;
 679
 680       s += 16;
 681       data = __builtin_vec_ld(0, (const vc *)s);
 682
 683     start:
 684       m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
 685       m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
 686       m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
 687       m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
 688       t = (m_nl | m_cr) | (m_bs | m_qm);
 689
 690       /* T now contains 0xff in bytes for which we matched one of the relevant
 691          characters.  We want to exit the loop if any byte in T is non-zero.
 692          Below is the expansion of vec_any_ne(t, zero).  */
 693     }
 694   while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
 695
 696   {
 697 #define N  (sizeof(vc) / sizeof(long))
 698
 699     union {
 700       vc v;
 701       /* Statically assert that N is 2 or 4.  */
 702       unsigned long l[(N == 2 || N == 4) ? N : -1];
 703     } u;
 704     unsigned long l, i = 0;
 705
 706     u.v = t;
 707
 708     /* Find the first word of T that is non-zero.  */
 709     switch (N)
 710       {
 711       case 4:
 712         l = u.l[i++];
 713         if (l != 0)
 714           break;
 715         s += sizeof(unsigned long);
 716         l = u.l[i++];
 717         if (l != 0)
 718           break;
 719         s += sizeof(unsigned long);
 720       case 2:
 721         l = u.l[i++];
 722         if (l != 0)
 723           break;
 724         s += sizeof(unsigned long);
 725         l = u.l[i];
 726       }
 727
 728     /* L now contains 0xff in bytes for which we matched one of the
 729        relevant characters.  We can find the byte index by finding
 730        its bit index and dividing by 8.  */
 731     l = __builtin_clzl(l) >> 3;
 732     return s + l;
 733
 734 #undef N
 735   }
 736 }
 737
 738 #elif defined (__ARM_NEON__)
 739 #include "arm_neon.h"
 740
 741 static const uchar *
 742 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
 743 {
 744   const uint8x16_t repl_nl = vdupq_n_u8 ('\n');
 745   const uint8x16_t repl_cr = vdupq_n_u8 ('\r');
 746   const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
 747   const uint8x16_t repl_qm = vdupq_n_u8 ('?');
 748   const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
 749
 750   unsigned int misalign, found, mask;
 751   const uint8_t *p;
 752   uint8x16_t data;
 753
 754   /* Align the source pointer.  */
 755   misalign = (uintptr_t)s & 15;
 756   p = (const uint8_t *)((uintptr_t)s & -16);
 757   data = vld1q_u8 (p);
 758
 759   /* Create a mask for the bytes that are valid within the first
 760      16-byte block.  The Idea here is that the AND with the mask
 761      within the loop is "free", since we need some AND or TEST
 762      insn in order to set the flags for the branch anyway.  */
 763   mask = (-1u << misalign) & 0xffff;
 764
 765   /* Main loop, processing 16 bytes at a time.  */
 766   goto start;
 767
 768   do
 769     {
 770       uint8x8_t l;
 771       uint16x4_t m;
 772       uint32x2_t n;
 773       uint8x16_t t, u, v, w;
 774
 775       p += 16;
 776       data = vld1q_u8 (p);
 777       mask = 0xffff;
 778
 779     start:
 780       t = vceqq_u8 (data, repl_nl);
 781       u = vceqq_u8 (data, repl_cr);
 782       v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
 783       w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
 784       t = vandq_u8 (vorrq_u8 (v, w), xmask);
 785       l = vpadd_u8 (vget_low_u8 (t), vget_high_u8 (t));
 786       m = vpaddl_u8 (l);
 787       n = vpaddl_u16 (m);
 788
 789       found = vget_lane_u32 ((uint32x2_t) vorr_u64 ((uint64x1_t) n,
 790               vshr_n_u64 ((uint64x1_t) n, 24)), 0);
 791       found &= mask;
 792     }
 793   while (!found);
 794
 795   /* FOUND contains 1 in bits for which we matched a relevant
 796      character.  Conversion to the byte index is trivial.  */
 797   found = __builtin_ctz (found);
 798   return (const uchar *)p + found;
 799 }
 800
 801 #else
 802
 803 /* We only have one accellerated alternative.  Use a direct call so that
 804    we encourage inlining.  */
 805
 806 #define search_line_fast  search_line_acc_char
 807
 808 #endif
 809
 810 /* Initialize the lexer if needed.  */
 811
 812 void
 813 _cpp_init_lexer (void)
 814 {
 815 #ifdef HAVE_init_vectorized_lexer
 816   init_vectorized_lexer ();
 817 #endif
 818 }
 819
 820 /* Returns with a logical line that contains no escaped newlines or
 821    trigraphs.  This is a time-critical inner loop.  */
 822 void
 823 _cpp_clean_line (cpp_reader *pfile)
 824 {
 825   cpp_buffer *buffer;
 826   const uchar *s;
 827   uchar c, *d, *p;
 828
 829   buffer = pfile->buffer;
 830   buffer->cur_note = buffer->notes_used = 0;
 831   buffer->cur = buffer->line_base = buffer->next_line;
 832   buffer->need_line = false;
 833   s = buffer->next_line;
 834
 835   if (!buffer->from_stage3)
 836     {
 837       const uchar *pbackslash = NULL;
 838
 839       /* Fast path.  This is the common case of an un-escaped line with
 840          no trigraphs.  The primary win here is by not writing any
 841          data back to memory until we have to.  */
 842       while (1)
 843         {
 844           /* Perform an optimized search for \n, \r, \\, ?.  */
 845           s = search_line_fast (s, buffer->rlimit);
 846
 847           c = *s;
 848           if (c == '\\')
 849             {
 850               /* Record the location of the backslash and continue.  */
 851               pbackslash = s++;
 852             }
 853           else if (__builtin_expect (c == '?', 0))
 854             {
 855               if (__builtin_expect (s[1] == '?', false)
 856                    && _cpp_trigraph_map[s[2]])
 857                 {
 858                   /* Have a trigraph.  We may or may not have to convert
 859                      it.  Add a line note regardless, for -Wtrigraphs.  */
 860                   add_line_note (buffer, s, s[2]);
 861                   if (CPP_OPTION (pfile, trigraphs))
 862                     {
 863                       /* We do, and that means we have to switch to the
 864                          slow path.  */
 865                       d = (uchar *) s;
 866                       *d = _cpp_trigraph_map[s[2]];
 867                       s += 2;
 868                       goto slow_path;
 869                     }
 870                 }
 871               /* Not a trigraph.  Continue on fast-path.  */
 872               s++;
 873             }
 874           else
 875             break;
 876         }
 877
 878       /* This must be \r or \n.  We're either done, or we'll be forced
 879          to write back to the buffer and continue on the slow path.  */
 880       d = (uchar *) s;
 881
 882       if (__builtin_expect (s == buffer->rlimit, false))
 883         goto done;
 884
 885       /* DOS line ending? */
 886       if (__builtin_expect (c == '\r', false) && s[1] == '\n')
 887         {
 888           s++;
 889           if (s == buffer->rlimit)
 890             goto done;
 891         }
 892
 893       if (__builtin_expect (pbackslash == NULL, true))
 894         goto done;
 895
 896       /* Check for escaped newline.  */
 897       p = d;
 898       while (is_nvspace (p[-1]))
 899         p--;
 900       if (p - 1 != pbackslash)
 901         goto done;
 902
 903       /* Have an escaped newline; process it and proceed to
 904          the slow path.  */
 905       add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
 906       d = p - 2;
 907       buffer->next_line = p - 1;
 908
 909     slow_path:
 910       while (1)
 911         {
 912           c = *++s;
 913           *++d = c;
 914
 915           if (c == '\n' || c == '\r')
 916             {
 917               /* Handle DOS line endings.  */
 918               if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
 919                 s++;
 920               if (s == buffer->rlimit)
 921                 break;
 922
 923               /* Escaped?  */
 924               p = d;
 925               while (p != buffer->next_line && is_nvspace (p[-1]))
 926                 p--;
 927               if (p == buffer->next_line || p[-1] != '\\')
 928                 break;
 929
 930               add_line_note (buffer, p - 1, p != d ? ' ': '\\');
 931               d = p - 2;
 932               buffer->next_line = p - 1;
 933             }
 934           else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
 935             {
 936               /* Add a note regardless, for the benefit of -Wtrigraphs.  */
 937               add_line_note (buffer, d, s[2]);
 938               if (CPP_OPTION (pfile, trigraphs))
 939                 {
 940                   *d = _cpp_trigraph_map[s[2]];
 941                   s += 2;
 942                 }
 943             }
 944         }
 945     }
 946   else
 947     {
 948       while (*s != '\n' && *s != '\r')
 949         s++;
 950       d = (uchar *) s;
 951
 952       /* Handle DOS line endings.  */
 953       if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
 954         s++;
 955     }
 956
 957  done:
 958   *d = '\n';
 959   /* A sentinel note that should never be processed.  */
 960   add_line_note (buffer, d + 1, '\n');
 961   buffer->next_line = s + 1;
 962 }
 963
 964 /* Return true if the trigraph indicated by NOTE should be warned
 965    about in a comment.  */
 966 static bool
 967 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
 968 {
 969   const uchar *p;
 970
 971   /* Within comments we don't warn about trigraphs, unless the
 972      trigraph forms an escaped newline, as that may change
 973      behavior.  */
 974   if (note->type != '/')
 975     return false;
 976
 977   /* If -trigraphs, then this was an escaped newline iff the next note
 978      is coincident.  */
 979   if (CPP_OPTION (pfile, trigraphs))
 980     return note[1].pos == note->pos;
 981
 982   /* Otherwise, see if this forms an escaped newline.  */
 983   p = note->pos + 3;
 984   while (is_nvspace (*p))
 985     p++;
 986
 987   /* There might have been escaped newlines between the trigraph and the
 988      newline we found.  Hence the position test.  */
 989   return (*p == '\n' && p < note[1].pos);
 990 }
 991
 992 /* Process the notes created by add_line_note as far as the current
 993    location.  */
 994 void
 995 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
 996 {
 997   cpp_buffer *buffer = pfile->buffer;
 998
 999   for (;;)
1000     {
1001       _cpp_line_note *note = &buffer->notes[buffer->cur_note];
1002       unsigned int col;
1003
1004       if (note->pos > buffer->cur)
1005         break;
1006
1007       buffer->cur_note++;
1008       col = CPP_BUF_COLUMN (buffer, note->pos + 1);
1009
1010       if (note->type == '\\' || note->type == ' ')
1011         {
1012           if (note->type == ' ' && !in_comment)
1013             cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
1014                                  "backslash and newline separated by space");
1015
1016           if (buffer->next_line > buffer->rlimit)
1017             {
1018               cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
1019                                    "backslash-newline at end of file");
1020               /* Prevent "no newline at end of file" warning.  */
1021               buffer->next_line = buffer->rlimit;
1022             }
1023
1024           buffer->line_base = note->pos;
1025           CPP_INCREMENT_LINE (pfile, 0);
1026         }
1027       else if (_cpp_trigraph_map[note->type])
1028         {
1029           if (CPP_OPTION (pfile, warn_trigraphs)
1030               && (!in_comment || warn_in_comment (pfile, note)))
1031             {
1032               if (CPP_OPTION (pfile, trigraphs))
1033                 cpp_warning_with_line (pfile, CPP_W_TRIGRAPHS,
1034                                        pfile->line_table->highest_line, col,
1035                                        "trigraph ??%c converted to %c",
1036                                        note->type,
1037                                        (int) _cpp_trigraph_map[note->type]);
1038               else
1039                 {
1040                   cpp_warning_with_line
1041                     (pfile, CPP_W_TRIGRAPHS,
1042                      pfile->line_table->highest_line, col,
1043                      "trigraph ??%c ignored, use -trigraphs to enable",
1044                      note->type);
1045                 }
1046             }
1047         }
1048       else if (note->type == 0)
1049         /* Already processed in lex_raw_string.  */;
1050       else
1051         abort ();
1052     }
1053 }
1054
1055 /* Skip a C-style block comment.  We find the end of the comment by
1056    seeing if an asterisk is before every '/' we encounter.  Returns
1057    nonzero if comment terminated by EOF, zero otherwise.
1058
1059    Buffer->cur points to the initial asterisk of the comment.  */
1060 bool
1061 _cpp_skip_block_comment (cpp_reader *pfile)
1062 {
1063   cpp_buffer *buffer = pfile->buffer;
1064   const uchar *cur = buffer->cur;
1065   uchar c;
1066
1067   cur++;
1068   if (*cur == '/')
1069     cur++;
1070
1071   for (;;)
1072     {
1073       /* People like decorating comments with '*', so check for '/'
1074          instead for efficiency.  */
1075       c = *cur++;
1076
1077       if (c == '/')
1078         {
1079           if (cur[-2] == '*')
1080             break;
1081
1082           /* Warn about potential nested comments, but not if the '/'
1083              comes immediately before the true comment delimiter.
1084              Don't bother to get it right across escaped newlines.  */
1085           if (CPP_OPTION (pfile, warn_comments)
1086               && cur[0] == '*' && cur[1] != '/')
1087             {
1088               buffer->cur = cur;
1089               cpp_warning_with_line (pfile, CPP_W_COMMENTS,
1090                                      pfile->line_table->highest_line,
1091                                      CPP_BUF_COL (buffer),
1092                                      "\"/*\" within comment");
1093             }
1094         }
1095       else if (c == '\n')
1096         {
1097           unsigned int cols;
1098           buffer->cur = cur - 1;
1099           _cpp_process_line_notes (pfile, true);
1100           if (buffer->next_line >= buffer->rlimit)
1101             return true;
1102           _cpp_clean_line (pfile);
1103
1104           cols = buffer->next_line - buffer->line_base;
1105           CPP_INCREMENT_LINE (pfile, cols);
1106
1107           cur = buffer->cur;
1108         }
1109     }
1110
1111   buffer->cur = cur;
1112   _cpp_process_line_notes (pfile, true);
1113   return false;
1114 }
1115
1116 /* Skip a C++ line comment, leaving buffer->cur pointing to the
1117    terminating newline.  Handles escaped newlines.  Returns nonzero
1118    if a multiline comment.  */
1119 static int
1120 skip_line_comment (cpp_reader *pfile)
1121 {
1122   cpp_buffer *buffer = pfile->buffer;
1123   source_location orig_line = pfile->line_table->highest_line;
1124
1125   while (*buffer->cur != '\n')
1126     buffer->cur++;
1127
1128   _cpp_process_line_notes (pfile, true);
1129   return orig_line != pfile->line_table->highest_line;
1130 }
1131
1132 /* Skips whitespace, saving the next non-whitespace character.  */
1133 static void
1134 skip_whitespace (cpp_reader *pfile, cppchar_t c)
1135 {
1136   cpp_buffer *buffer = pfile->buffer;
1137   bool saw_NUL = false;
1138
1139   do
1140     {
1141       /* Horizontal space always OK.  */
1142       if (c == ' ' || c == '\t')
1143         ;
1144       /* Just \f \v or \0 left.  */
1145       else if (c == '\0')
1146         saw_NUL = true;
1147       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
1148         cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
1149                              CPP_BUF_COL (buffer),
1150                              "%s in preprocessing directive",
1151                              c == '\f' ? "form feed" : "vertical tab");
1152
1153       c = *buffer->cur++;
1154     }
1155   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
1156   while (is_nvspace (c));
1157
1158   if (saw_NUL)
1159     cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
1160
1161   buffer->cur--;
1162 }
1163
1164 /* See if the characters of a number token are valid in a name (no
1165    '.', '+' or '-').  */
1166 static int
1167 name_p (cpp_reader *pfile, const cpp_string *string)
1168 {
1169   unsigned int i;
1170
1171   for (i = 0; i < string->len; i++)
1172     if (!is_idchar (string->text[i]))
1173       return 0;
1174
1175   return 1;
1176 }
1177
1178 /* After parsing an identifier or other sequence, produce a warning about
1179    sequences not in NFC/NFKC.  */
1180 static void
1181 warn_about_normalization (cpp_reader *pfile,
1182                           const cpp_token *token,
1183                           const struct normalize_state *s)
1184 {
1185   if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
1186       && !pfile->state.skipping)
1187     {
1188       /* Make sure that the token is printed using UCNs, even
1189          if we'd otherwise happily print UTF-8.  */
1190       unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
1191       size_t sz;
1192
1193       sz = cpp_spell_token (pfile, token, buf, false) - buf;
1194       if (NORMALIZE_STATE_RESULT (s) == normalized_C)
1195         cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1196                                "`%.*s' is not in NFKC", (int) sz, buf);
1197       else
1198         cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1199                                "`%.*s' is not in NFC", (int) sz, buf);
1200       free (buf);
1201     }
1202 }
1203
1204 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
1205    an identifier.  FIRST is TRUE if this starts an identifier.  */
1206 static bool
1207 forms_identifier_p (cpp_reader *pfile, int first,
1208                     struct normalize_state *state)
1209 {
1210   cpp_buffer *buffer = pfile->buffer;
1211
1212   if (*buffer->cur == '$')
1213     {
1214       if (!CPP_OPTION (pfile, dollars_in_ident))
1215         return false;
1216
1217       buffer->cur++;
1218       if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
1219         {
1220           CPP_OPTION (pfile, warn_dollars) = 0;
1221           cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
1222         }
1223
1224       return true;
1225     }
1226
1227   /* Is this a syntactically valid UCN?  */
1228   if (CPP_OPTION (pfile, extended_identifiers)
1229       && *buffer->cur == '\\'
1230       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
1231     {
1232       buffer->cur += 2;
1233       if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
1234                           state))
1235         return true;
1236       buffer->cur -= 2;
1237     }
1238
1239   return false;
1240 }
1241
1242 /* Helper function to get the cpp_hashnode of the identifier BASE.  */
1243 static cpp_hashnode *
1244 lex_identifier_intern (cpp_reader *pfile, const uchar *base)
1245 {
1246   cpp_hashnode *result;
1247   const uchar *cur;
1248   unsigned int len;
1249   unsigned int hash = HT_HASHSTEP (0, *base);
1250
1251   cur = base + 1;
1252   while (ISIDNUM (*cur))
1253     {
1254       hash = HT_HASHSTEP (hash, *cur);
1255       cur++;
1256     }
1257   len = cur - base;
1258   hash = HT_HASHFINISH (hash, len);
1259   result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1260                                               base, len, hash, HT_ALLOC));
1261
1262   /* Rarely, identifiers require diagnostics when lexed.  */
1263   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1264                         && !pfile->state.skipping, 0))
1265     {
1266       /* It is allowed to poison the same identifier twice.  */
1267       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
1268         cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
1269                    NODE_NAME (result));
1270
1271       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1272          replacement list of a variadic macro.  */
1273       if (result == pfile->spec_nodes.n__VA_ARGS__
1274           && !pfile->state.va_args_ok)
1275         {
1276           if (CPP_OPTION (pfile, cplusplus))
1277             cpp_error (pfile, CPP_DL_PEDWARN,
1278                        "__VA_ARGS__ can only appear in the expansion"
1279                        " of a C++11 variadic macro");
1280           else
1281             cpp_error (pfile, CPP_DL_PEDWARN,
1282                        "__VA_ARGS__ can only appear in the expansion"
1283                        " of a C99 variadic macro");
1284         }
1285
1286       /* For -Wc++-compat, warn about use of C++ named operators.  */
1287       if (result->flags & NODE_WARN_OPERATOR)
1288         cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1289                      "identifier \"%s\" is a special operator name in C++",
1290                      NODE_NAME (result));
1291     }
1292
1293   return result;
1294 }
1295
1296 /* Get the cpp_hashnode of an identifier specified by NAME in
1297    the current cpp_reader object.  If none is found, NULL is returned.  */
1298 cpp_hashnode *
1299 _cpp_lex_identifier (cpp_reader *pfile, const char *name)
1300 {
1301   cpp_hashnode *result;
1302   result = lex_identifier_intern (pfile, (uchar *) name);
1303   return result;
1304 }
1305
1306 /* Lex an identifier starting at BUFFER->CUR - 1.  */
1307 static cpp_hashnode *
1308 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
1309                 struct normalize_state *nst, cpp_hashnode **spelling)
1310 {
1311   cpp_hashnode *result;
1312   const uchar *cur;
1313   unsigned int len;
1314   unsigned int hash = HT_HASHSTEP (0, *base);
1315
1316   cur = pfile->buffer->cur;
1317   if (! starts_ucn)
1318     {
1319       while (ISIDNUM (*cur))
1320         {
1321           hash = HT_HASHSTEP (hash, *cur);
1322           cur++;
1323         }
1324       NORMALIZE_STATE_UPDATE_IDNUM (nst, *(cur - 1));
1325     }
1326   pfile->buffer->cur = cur;
1327   if (starts_ucn || forms_identifier_p (pfile, false, nst))
1328     {
1329       /* Slower version for identifiers containing UCNs (or $).  */
1330       do {
1331         while (ISIDNUM (*pfile->buffer->cur))
1332           {
1333             NORMALIZE_STATE_UPDATE_IDNUM (nst, *pfile->buffer->cur);
1334             pfile->buffer->cur++;
1335           }
1336       } while (forms_identifier_p (pfile, false, nst));
1337       result = _cpp_interpret_identifier (pfile, base,
1338                                           pfile->buffer->cur - base);
1339       *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base);
1340     }
1341   else
1342     {
1343       len = cur - base;
1344       hash = HT_HASHFINISH (hash, len);
1345
1346       result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1347                                                   base, len, hash, HT_ALLOC));
1348       *spelling = result;
1349     }
1350
1351   /* Rarely, identifiers require diagnostics when lexed.  */
1352   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1353                         && !pfile->state.skipping, 0))
1354     {
1355       /* It is allowed to poison the same identifier twice.  */
1356       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
1357         cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
1358                    NODE_NAME (result));
1359
1360       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1361          replacement list of a variadic macro.  */
1362       if (result == pfile->spec_nodes.n__VA_ARGS__
1363           && !pfile->state.va_args_ok)
1364         {
1365           if (CPP_OPTION (pfile, cplusplus))
1366             cpp_error (pfile, CPP_DL_PEDWARN,
1367                        "__VA_ARGS__ can only appear in the expansion"
1368                        " of a C++11 variadic macro");
1369           else
1370             cpp_error (pfile, CPP_DL_PEDWARN,
1371                        "__VA_ARGS__ can only appear in the expansion"
1372                        " of a C99 variadic macro");
1373         }
1374
1375       /* For -Wc++-compat, warn about use of C++ named operators.  */
1376       if (result->flags & NODE_WARN_OPERATOR)
1377         cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1378                      "identifier \"%s\" is a special operator name in C++",
1379                      NODE_NAME (result));
1380     }
1381
1382   return result;
1383 }
1384
1385 /* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
1386 static void
1387 lex_number (cpp_reader *pfile, cpp_string *number,
1388             struct normalize_state *nst)
1389 {
1390   const uchar *cur;
1391   const uchar *base;
1392   uchar *dest;
1393
1394   base = pfile->buffer->cur - 1;
1395   do
1396     {
1397       cur = pfile->buffer->cur;
1398
1399       /* N.B. ISIDNUM does not include $.  */
1400       while (ISIDNUM (*cur) || *cur == '.' || DIGIT_SEP (*cur)
1401              || VALID_SIGN (*cur, cur[-1]))
1402         {
1403           NORMALIZE_STATE_UPDATE_IDNUM (nst, *cur);
1404           cur++;
1405         }
1406
1407       pfile->buffer->cur = cur;
1408     }
1409   while (forms_identifier_p (pfile, false, nst));
1410
1411   number->len = cur - base;
1412   dest = _cpp_unaligned_alloc (pfile, number->len + 1);
1413   memcpy (dest, base, number->len);
1414   dest[number->len] = '\0';
1415   number->text = dest;
1416 }
1417
1418 /* Create a token of type TYPE with a literal spelling.  */
1419 static void
1420 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
1421                 unsigned int len, enum cpp_ttype type)
1422 {
1423   uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
1424
1425   memcpy (dest, base, len);
1426   dest[len] = '\0';
1427   token->type = type;
1428   token->val.str.len = len;
1429   token->val.str.text = dest;
1430 }
1431
1432 /* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
1433    sequence from *FIRST_BUFF_P to LAST_BUFF_P.  */
1434
1435 static void
1436 bufring_append (cpp_reader *pfile, const uchar *base, size_t len,
1437                 _cpp_buff **first_buff_p, _cpp_buff **last_buff_p)
1438 {
1439   _cpp_buff *first_buff = *first_buff_p;
1440   _cpp_buff *last_buff = *last_buff_p;
1441
1442   if (first_buff == NULL)
1443     first_buff = last_buff = _cpp_get_buff (pfile, len);
1444   else if (len > BUFF_ROOM (last_buff))
1445     {
1446       size_t room = BUFF_ROOM (last_buff);
1447       memcpy (BUFF_FRONT (last_buff), base, room);
1448       BUFF_FRONT (last_buff) += room;
1449       base += room;
1450       len -= room;
1451       last_buff = _cpp_append_extend_buff (pfile, last_buff, len);
1452     }
1453
1454   memcpy (BUFF_FRONT (last_buff), base, len);
1455   BUFF_FRONT (last_buff) += len;
1456
1457   *first_buff_p = first_buff;
1458   *last_buff_p = last_buff;
1459 }
1460
1461
1462 /* Returns true if a macro has been defined.
1463    This might not work if compile with -save-temps,
1464    or preprocess separately from compilation.  */
1465
1466 static bool
1467 is_macro(cpp_reader *pfile, const uchar *base)
1468 {
1469   const uchar *cur = base;
1470   if (! ISIDST (*cur))
1471     return false;
1472   unsigned int hash = HT_HASHSTEP (0, *cur);
1473   ++cur;
1474   while (ISIDNUM (*cur))
1475     {
1476       hash = HT_HASHSTEP (hash, *cur);
1477       ++cur;
1478     }
1479   hash = HT_HASHFINISH (hash, cur - base);
1480
1481   cpp_hashnode *result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1482                                         base, cur - base, hash, HT_NO_INSERT));
1483
1484   return !result ? false : (result->type == NT_MACRO);
1485 }
1486
1487
1488 /* Lexes a raw string.  The stored string contains the spelling, including
1489    double quotes, delimiter string, '(' and ')', any leading
1490    'L', 'u', 'U' or 'u8' and 'R' modifier.  It returns the type of the
1491    literal, or CPP_OTHER if it was not properly terminated.
1492
1493    The spelling is NUL-terminated, but it is not guaranteed that this
1494    is the first NUL since embedded NULs are preserved.  */
1495
1496 static void
1497 lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
1498                 const uchar *cur)
1499 {
1500   uchar raw_prefix[17];
1501   uchar temp_buffer[18];
1502   const uchar *orig_base;
1503   unsigned int raw_prefix_len = 0, raw_suffix_len = 0;
1504   enum raw_str_phase { RAW_STR_PREFIX, RAW_STR, RAW_STR_SUFFIX };
1505   raw_str_phase phase = RAW_STR_PREFIX;
1506   enum cpp_ttype type;
1507   size_t total_len = 0;
1508   /* Index into temp_buffer during phases other than RAW_STR,
1509      during RAW_STR phase 17 to tell BUF_APPEND that nothing should
1510      be appended to temp_buffer.  */
1511   size_t temp_buffer_len = 0;
1512   _cpp_buff *first_buff = NULL, *last_buff = NULL;
1513   size_t raw_prefix_start;
1514   _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
1515
1516   type = (*base == 'L' ? CPP_WSTRING :
1517           *base == 'U' ? CPP_STRING32 :
1518           *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1519           : CPP_STRING);
1520
1521 #define BUF_APPEND(STR,LEN)                                     \
1522       do {                                                      \
1523         bufring_append (pfile, (const uchar *)(STR), (LEN),     \
1524                         &first_buff, &last_buff);               \
1525         total_len += (LEN);                                     \
1526         if (__builtin_expect (temp_buffer_len < 17, 0)          \
1527             && (const uchar *)(STR) != base                     \
1528             && (LEN) <= 2)                                      \
1529           {                                                     \
1530             memcpy (temp_buffer + temp_buffer_len,              \
1531                     (const uchar *)(STR), (LEN));               \
1532             temp_buffer_len += (LEN);                           \
1533           }                                                     \
1534       } while (0);
1535
1536   orig_base = base;
1537   ++cur;
1538   raw_prefix_start = cur - base;
1539   for (;;)
1540     {
1541       cppchar_t c;
1542
1543       /* If we previously performed any trigraph or line splicing
1544          transformations, undo them in between the opening and closing
1545          double quote.  */
1546       while (note->pos < cur)
1547         ++note;
1548       for (; note->pos == cur; ++note)
1549         {
1550           switch (note->type)
1551             {
1552             case '\\':
1553             case ' ':
1554               /* Restore backslash followed by newline.  */
1555               BUF_APPEND (base, cur - base);
1556               base = cur;
1557               BUF_APPEND ("\\", 1);
1558             after_backslash:
1559               if (note->type == ' ')
1560                 {
1561                   /* GNU backslash whitespace newline extension.  FIXME
1562                      could be any sequence of non-vertical space.  When we
1563                      can properly restore any such sequence, we should mark
1564                      this note as handled so _cpp_process_line_notes
1565                      doesn't warn.  */
1566                   BUF_APPEND (" ", 1);
1567                 }
1568
1569               BUF_APPEND ("\n", 1);
1570               break;
1571
1572             case 0:
1573               /* Already handled.  */
1574               break;
1575
1576             default:
1577               if (_cpp_trigraph_map[note->type])
1578                 {
1579                   /* Don't warn about this trigraph in
1580                      _cpp_process_line_notes, since trigraphs show up as
1581                      trigraphs in raw strings.  */
1582                   uchar type = note->type;
1583                   note->type = 0;
1584
1585                   if (!CPP_OPTION (pfile, trigraphs))
1586                     /* If we didn't convert the trigraph in the first
1587                        place, don't do anything now either.  */
1588                     break;
1589
1590                   BUF_APPEND (base, cur - base);
1591                   base = cur;
1592                   BUF_APPEND ("??", 2);
1593
1594                   /* ??/ followed by newline gets two line notes, one for
1595                      the trigraph and one for the backslash/newline.  */
1596                   if (type == '/' && note[1].pos == cur)
1597                     {
1598                       if (note[1].type != '\\'
1599                           && note[1].type != ' ')
1600                         abort ();
1601                       BUF_APPEND ("/", 1);
1602                       ++note;
1603                       goto after_backslash;
1604                     }
1605                   else
1606                     {
1607                       /* Skip the replacement character.  */
1608                       base = ++cur;
1609                       BUF_APPEND (&type, 1);
1610                       c = type;
1611                       goto check_c;
1612                     }
1613                 }
1614               else
1615                 abort ();
1616               break;
1617             }
1618         }
1619       c = *cur++;
1620       if (__builtin_expect (temp_buffer_len < 17, 0))
1621         temp_buffer[temp_buffer_len++] = c;
1622
1623      check_c:
1624       if (phase == RAW_STR_PREFIX)
1625         {
1626           while (raw_prefix_len < temp_buffer_len)
1627             {
1628               raw_prefix[raw_prefix_len] = temp_buffer[raw_prefix_len];
1629               switch (raw_prefix[raw_prefix_len])
1630                 {
1631                 case ' ': case '(': case ')': case '\\': case '\t':
1632                 case '\v': case '\f': case '\n': default:
1633                   break;
1634                 /* Basic source charset except the above chars.  */
1635                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1636                 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1637                 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1638                 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1639                 case 'y': case 'z':
1640                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1641                 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1642                 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1643                 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1644                 case 'Y': case 'Z':
1645                 case '0': case '1': case '2': case '3': case '4': case '5':
1646                 case '6': case '7': case '8': case '9':
1647                 case '_': case '{': case '}': case '#': case '[': case ']':
1648                 case '<': case '>': case '%': case ':': case ';': case '.':
1649                 case '?': case '*': case '+': case '-': case '/': case '^':
1650                 case '&': case '|': case '~': case '!': case '=': case ',':
1651                 case '"': case '\'':
1652                   if (raw_prefix_len < 16)
1653                     {
1654                       raw_prefix_len++;
1655                       continue;
1656                     }
1657                   break;
1658                 }
1659
1660               if (raw_prefix[raw_prefix_len] != '(')
1661                 {
1662                   int col = CPP_BUF_COLUMN (pfile->buffer, cur) + 1;
1663                   if (raw_prefix_len == 16)
1664                     cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1665                                          col, "raw string delimiter longer "
1666                                               "than 16 characters");
1667                   else if (raw_prefix[raw_prefix_len] == '\n')
1668                     cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1669                                          col, "invalid new-line in raw "
1670                                               "string delimiter");
1671                   else
1672                     cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1673                                          col, "invalid character '%c' in "
1674                                               "raw string delimiter",
1675                                          (int) raw_prefix[raw_prefix_len]);
1676                   pfile->buffer->cur = orig_base + raw_prefix_start - 1;
1677                   create_literal (pfile, token, orig_base,
1678                                   raw_prefix_start - 1, CPP_OTHER);
1679                   if (first_buff)
1680                     _cpp_release_buff (pfile, first_buff);
1681                   return;
1682                 }
1683               raw_prefix[raw_prefix_len] = '"';
1684               phase = RAW_STR;
1685               /* Nothing should be appended to temp_buffer during
1686                  RAW_STR phase.  */
1687               temp_buffer_len = 17;
1688               break;
1689             }
1690           continue;
1691         }
1692       else if (phase == RAW_STR_SUFFIX)
1693         {
1694           while (raw_suffix_len <= raw_prefix_len
1695                  && raw_suffix_len < temp_buffer_len
1696                  && temp_buffer[raw_suffix_len] == raw_prefix[raw_suffix_len])
1697             raw_suffix_len++;
1698           if (raw_suffix_len > raw_prefix_len)
1699             break;
1700           if (raw_suffix_len == temp_buffer_len)
1701             continue;
1702           phase = RAW_STR;
1703           /* Nothing should be appended to temp_buffer during
1704              RAW_STR phase.  */
1705           temp_buffer_len = 17;
1706         }
1707       if (c == ')')
1708         {
1709           phase = RAW_STR_SUFFIX;
1710           raw_suffix_len = 0;
1711           temp_buffer_len = 0;
1712         }
1713       else if (c == '\n')
1714         {
1715           if (pfile->state.in_directive
1716               || (pfile->state.parsing_args
1717                   && pfile->buffer->next_line >= pfile->buffer->rlimit))
1718             {
1719               cur--;
1720               type = CPP_OTHER;
1721               cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
1722                                    "unterminated raw string");
1723               break;
1724             }
1725
1726           BUF_APPEND (base, cur - base);
1727
1728           if (pfile->buffer->cur < pfile->buffer->rlimit)
1729             CPP_INCREMENT_LINE (pfile, 0);
1730           pfile->buffer->need_line = true;
1731
1732           pfile->buffer->cur = cur-1;
1733           _cpp_process_line_notes (pfile, false);
1734           if (!_cpp_get_fresh_line (pfile))
1735             {
1736               source_location src_loc = token->src_loc;
1737               token->type = CPP_EOF;
1738               /* Tell the compiler the line number of the EOF token.  */
1739               token->src_loc = pfile->line_table->highest_line;
1740               token->flags = BOL;
1741               if (first_buff != NULL)
1742                 _cpp_release_buff (pfile, first_buff);
1743               cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
1744                                    "unterminated raw string");
1745               return;
1746             }
1747
1748           cur = base = pfile->buffer->cur;
1749           note = &pfile->buffer->notes[pfile->buffer->cur_note];
1750         }
1751     }
1752
1753   if (CPP_OPTION (pfile, user_literals))
1754     {
1755       /* If a string format macro, say from inttypes.h, is placed touching
1756          a string literal it could be parsed as a C++11 user-defined string
1757          literal thus breaking the program.
1758          Try to identify macros with is_macro. A warning is issued. */
1759       if (is_macro (pfile, cur))
1760         {
1761           /* Raise a warning, but do not consume subsequent tokens.  */
1762           if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
1763             cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
1764                                    token->src_loc, 0,
1765                                    "invalid suffix on literal; C++11 requires "
1766                                    "a space between literal and string macro");
1767         }
1768       /* Grab user defined literal suffix.  */
1769       else if (ISIDST (*cur))
1770         {
1771           type = cpp_userdef_string_add_type (type);
1772           ++cur;
1773
1774           while (ISIDNUM (*cur))
1775             ++cur;
1776         }
1777     }
1778
1779   pfile->buffer->cur = cur;
1780   if (first_buff == NULL)
1781     create_literal (pfile, token, base, cur - base, type);
1782   else
1783     {
1784       uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1);
1785
1786       token->type = type;
1787       token->val.str.len = total_len + (cur - base);
1788       token->val.str.text = dest;
1789       last_buff = first_buff;
1790       while (last_buff != NULL)
1791         {
1792           memcpy (dest, last_buff->base,
1793                   BUFF_FRONT (last_buff) - last_buff->base);
1794           dest += BUFF_FRONT (last_buff) - last_buff->base;
1795           last_buff = last_buff->next;
1796         }
1797       _cpp_release_buff (pfile, first_buff);
1798       memcpy (dest, base, cur - base);
1799       dest[cur - base] = '\0';
1800     }
1801 }
1802
1803 /* Lexes a string, character constant, or angle-bracketed header file
1804    name.  The stored string contains the spelling, including opening
1805    quote and any leading 'L', 'u', 'U' or 'u8' and optional
1806    'R' modifier.  It returns the type of the literal, or CPP_OTHER
1807    if it was not properly terminated, or CPP_LESS for an unterminated
1808    header name which must be relexed as normal tokens.
1809
1810    The spelling is NUL-terminated, but it is not guaranteed that this
1811    is the first NUL since embedded NULs are preserved.  */
1812 static void
1813 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
1814 {
1815   bool saw_NUL = false;
1816   const uchar *cur;
1817   cppchar_t terminator;
1818   enum cpp_ttype type;
1819
1820   cur = base;
1821   terminator = *cur++;
1822   if (terminator == 'L' || terminator == 'U')
1823     terminator = *cur++;
1824   else if (terminator == 'u')
1825     {
1826       terminator = *cur++;
1827       if (terminator == '8')
1828         terminator = *cur++;
1829     }
1830   if (terminator == 'R')
1831     {
1832       lex_raw_string (pfile, token, base, cur);
1833       return;
1834     }
1835   if (terminator == '"')
1836     type = (*base == 'L' ? CPP_WSTRING :
1837             *base == 'U' ? CPP_STRING32 :
1838             *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1839                          : CPP_STRING);
1840   else if (terminator == '\'')
1841     type = (*base == 'L' ? CPP_WCHAR :
1842             *base == 'U' ? CPP_CHAR32 :
1843             *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
1844   else
1845     terminator = '>', type = CPP_HEADER_NAME;
1846
1847   for (;;)
1848     {
1849       cppchar_t c = *cur++;
1850
1851       /* In #include-style directives, terminators are not escapable.  */
1852       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
1853         cur++;
1854       else if (c == terminator)
1855         break;
1856       else if (c == '\n')
1857         {
1858           cur--;
1859           /* Unmatched quotes always yield undefined behavior, but
1860              greedy lexing means that what appears to be an unterminated
1861              header name may actually be a legitimate sequence of tokens.  */
1862           if (terminator == '>')
1863             {
1864               token->type = CPP_LESS;
1865               return;
1866             }
1867           type = CPP_OTHER;
1868           break;
1869         }
1870       else if (c == '\0')
1871         saw_NUL = true;
1872     }
1873
1874   if (saw_NUL && !pfile->state.skipping)
1875     cpp_error (pfile, CPP_DL_WARNING,
1876                "null character(s) preserved in literal");
1877
1878   if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
1879     cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
1880                (int) terminator);
1881
1882   if (CPP_OPTION (pfile, user_literals))
1883     {
1884       /* If a string format macro, say from inttypes.h, is placed touching
1885          a string literal it could be parsed as a C++11 user-defined string
1886          literal thus breaking the program.
1887          Try to identify macros with is_macro. A warning is issued. */
1888       if (is_macro (pfile, cur))
1889         {
1890           /* Raise a warning, but do not consume subsequent tokens.  */
1891           if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
1892             cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
1893                                    token->src_loc, 0,
1894                                    "invalid suffix on literal; C++11 requires "
1895                                    "a space between literal and string macro");
1896         }
1897       /* Grab user defined literal suffix.  */
1898       else if (ISIDST (*cur))
1899         {
1900           type = cpp_userdef_char_add_type (type);
1901           type = cpp_userdef_string_add_type (type);
1902           ++cur;
1903
1904           while (ISIDNUM (*cur))
1905             ++cur;
1906         }
1907     }
1908
1909   pfile->buffer->cur = cur;
1910   create_literal (pfile, token, base, cur - base, type);
1911 }
1912
1913 /* Return the comment table. The client may not make any assumption
1914    about the ordering of the table.  */
1915 cpp_comment_table *
1916 cpp_get_comments (cpp_reader *pfile)
1917 {
1918   return &pfile->comments;
1919 }
1920
1921 /* Append a comment to the end of the comment table. */
1922 static void
1923 store_comment (cpp_reader *pfile, cpp_token *token)
1924 {
1925   int len;
1926
1927   if (pfile->comments.allocated == 0)
1928     {
1929       pfile->comments.allocated = 256;
1930       pfile->comments.entries = (cpp_comment *) xmalloc
1931         (pfile->comments.allocated * sizeof (cpp_comment));
1932     }
1933
1934   if (pfile->comments.count == pfile->comments.allocated)
1935     {
1936       pfile->comments.allocated *= 2;
1937       pfile->comments.entries = (cpp_comment *) xrealloc
1938         (pfile->comments.entries,
1939          pfile->comments.allocated * sizeof (cpp_comment));
1940     }
1941
1942   len = token->val.str.len;
1943
1944   /* Copy comment. Note, token may not be NULL terminated. */
1945   pfile->comments.entries[pfile->comments.count].comment =
1946     (char *) xmalloc (sizeof (char) * (len + 1));
1947   memcpy (pfile->comments.entries[pfile->comments.count].comment,
1948           token->val.str.text, len);
1949   pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
1950
1951   /* Set source location. */
1952   pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
1953
1954   /* Increment the count of entries in the comment table. */
1955   pfile->comments.count++;
1956 }
1957
1958 /* The stored comment includes the comment start and any terminator.  */
1959 static void
1960 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
1961               cppchar_t type)
1962 {
1963   unsigned char *buffer;
1964   unsigned int len, clen, i;
1965
1966   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
1967
1968   /* C++ comments probably (not definitely) have moved past a new
1969      line, which we don't want to save in the comment.  */
1970   if (is_vspace (pfile->buffer->cur[-1]))
1971     len--;
1972
1973   /* If we are currently in a directive or in argument parsing, then
1974      we need to store all C++ comments as C comments internally, and
1975      so we need to allocate a little extra space in that case.
1976
1977      Note that the only time we encounter a directive here is
1978      when we are saving comments in a "#define".  */
1979   clen = ((pfile->state.in_directive || pfile->state.parsing_args)
1980           && type == '/') ? len + 2 : len;
1981
1982   buffer = _cpp_unaligned_alloc (pfile, clen);
1983
1984   token->type = CPP_COMMENT;
1985   token->val.str.len = clen;
1986   token->val.str.text = buffer;
1987
1988   buffer[0] = '/';
1989   memcpy (buffer + 1, from, len - 1);
1990
1991   /* Finish conversion to a C comment, if necessary.  */
1992   if ((pfile->state.in_directive || pfile->state.parsing_args) && type == '/')
1993     {
1994       buffer[1] = '*';
1995       buffer[clen - 2] = '*';
1996       buffer[clen - 1] = '/';
1997       /* As there can be in a C++ comments illegal sequences for C comments
1998          we need to filter them out.  */
1999       for (i = 2; i < (clen - 2); i++)
2000         if (buffer[i] == '/' && (buffer[i - 1] == '*' || buffer[i + 1] == '*'))
2001           buffer[i] = '|';
2002     }
2003
2004   /* Finally store this comment for use by clients of libcpp. */
2005   store_comment (pfile, token);
2006 }
2007
2008 /* Allocate COUNT tokens for RUN.  */
2009 void
2010 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
2011 {
2012   run->base = XNEWVEC (cpp_token, count);
2013   run->limit = run->base + count;
2014   run->next = NULL;
2015 }
2016
2017 /* Returns the next tokenrun, or creates one if there is none.  */
2018 static tokenrun *
2019 next_tokenrun (tokenrun *run)
2020 {
2021   if (run->next == NULL)
2022     {
2023       run->next = XNEW (tokenrun);
2024       run->next->prev = run;
2025       _cpp_init_tokenrun (run->next, 250);
2026     }
2027
2028   return run->next;
2029 }
2030
2031 /* Return the number of not yet processed token in a given
2032    context.  */
2033 int
2034 _cpp_remaining_tokens_num_in_context (cpp_context *context)
2035 {
2036   if (context->tokens_kind == TOKENS_KIND_DIRECT)
2037     return (LAST (context).token - FIRST (context).token);
2038   else if (context->tokens_kind == TOKENS_KIND_INDIRECT
2039            || context->tokens_kind == TOKENS_KIND_EXTENDED)
2040     return (LAST (context).ptoken - FIRST (context).ptoken);
2041   else
2042       abort ();
2043 }
2044
2045 /* Returns the token present at index INDEX in a given context.  If
2046    INDEX is zero, the next token to be processed is returned.  */
2047 static const cpp_token*
2048 _cpp_token_from_context_at (cpp_context *context, int index)
2049 {
2050   if (context->tokens_kind == TOKENS_KIND_DIRECT)
2051     return &(FIRST (context).token[index]);
2052   else if (context->tokens_kind == TOKENS_KIND_INDIRECT
2053            || context->tokens_kind == TOKENS_KIND_EXTENDED)
2054     return FIRST (context).ptoken[index];
2055  else
2056    abort ();
2057 }
2058
2059 /* Look ahead in the input stream.  */
2060 const cpp_token *
2061 cpp_peek_token (cpp_reader *pfile, int index)
2062 {
2063   cpp_context *context = pfile->context;
2064   const cpp_token *peektok;
2065   int count;
2066
2067   /* First, scan through any pending cpp_context objects.  */
2068   while (context->prev)
2069     {
2070       ptrdiff_t sz = _cpp_remaining_tokens_num_in_context (context);
2071
2072       if (index < (int) sz)
2073         return _cpp_token_from_context_at (context, index);
2074       index -= (int) sz;
2075       context = context->prev;
2076     }
2077
2078   /* We will have to read some new tokens after all (and do so
2079      without invalidating preceding tokens).  */
2080   count = index;
2081   pfile->keep_tokens++;
2082
2083   do
2084     {
2085       peektok = _cpp_lex_token (pfile);
2086       if (peektok->type == CPP_EOF)
2087         return peektok;
2088     }
2089   while (index--);
2090
2091   _cpp_backup_tokens_direct (pfile, count + 1);
2092   pfile->keep_tokens--;
2093
2094   return peektok;
2095 }
2096
2097 /* Allocate a single token that is invalidated at the same time as the
2098    rest of the tokens on the line.  Has its line and col set to the
2099    same as the last lexed token, so that diagnostics appear in the
2100    right place.  */
2101 cpp_token *
2102 _cpp_temp_token (cpp_reader *pfile)
2103 {
2104   cpp_token *old, *result;
2105   ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
2106   ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
2107
2108   old = pfile->cur_token - 1;
2109   /* Any pre-existing lookaheads must not be clobbered.  */
2110   if (la)
2111     {
2112       if (sz <= la)
2113         {
2114           tokenrun *next = next_tokenrun (pfile->cur_run);
2115
2116           if (sz < la)
2117             memmove (next->base + 1, next->base,
2118                      (la - sz) * sizeof (cpp_token));
2119
2120           next->base[0] = pfile->cur_run->limit[-1];
2121         }
2122
2123       if (sz > 1)
2124         memmove (pfile->cur_token + 1, pfile->cur_token,
2125                  MIN (la, sz - 1) * sizeof (cpp_token));
2126     }
2127
2128   if (!sz && pfile->cur_token == pfile->cur_run->limit)
2129     {
2130       pfile->cur_run = next_tokenrun (pfile->cur_run);
2131       pfile->cur_token = pfile->cur_run->base;
2132     }
2133
2134   result = pfile->cur_token++;
2135   result->src_loc = old->src_loc;
2136   return result;
2137 }
2138
2139 /* Lex a token into RESULT (external interface).  Takes care of issues
2140    like directive handling, token lookahead, multiple include
2141    optimization and skipping.  */
2142 const cpp_token *
2143 _cpp_lex_token (cpp_reader *pfile)
2144 {
2145   cpp_token *result;
2146
2147   for (;;)
2148     {
2149       if (pfile->cur_token == pfile->cur_run->limit)
2150         {
2151           pfile->cur_run = next_tokenrun (pfile->cur_run);
2152           pfile->cur_token = pfile->cur_run->base;
2153         }
2154       /* We assume that the current token is somewhere in the current
2155          run.  */
2156       if (pfile->cur_token < pfile->cur_run->base
2157           || pfile->cur_token >= pfile->cur_run->limit)
2158         abort ();
2159
2160       if (pfile->lookaheads)
2161         {
2162           pfile->lookaheads--;
2163           result = pfile->cur_token++;
2164         }
2165       else
2166         result = _cpp_lex_direct (pfile);
2167
2168       if (result->flags & BOL)
2169         {
2170           /* Is this a directive.  If _cpp_handle_directive returns
2171              false, it is an assembler #.  */
2172           if (result->type == CPP_HASH
2173               /* 6.10.3 p 11: Directives in a list of macro arguments
2174                  gives undefined behavior.  This implementation
2175                  handles the directive as normal.  */
2176               && pfile->state.parsing_args != 1)
2177             {
2178               if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
2179                 {
2180                   if (pfile->directive_result.type == CPP_PADDING)
2181                     continue;
2182                   result = &pfile->directive_result;
2183                 }
2184             }
2185           else if (pfile->state.in_deferred_pragma)
2186             result = &pfile->directive_result;
2187
2188           if (pfile->cb.line_change && !pfile->state.skipping)
2189             pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
2190         }
2191
2192       /* We don't skip tokens in directives.  */
2193       if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
2194         break;
2195
2196       /* Outside a directive, invalidate controlling macros.  At file
2197          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
2198          get here and MI optimization works.  */
2199       pfile->mi_valid = false;
2200
2201       if (!pfile->state.skipping || result->type == CPP_EOF)
2202         break;
2203     }
2204
2205   return result;
2206 }
2207
2208 /* Returns true if a fresh line has been loaded.  */
2209 bool
2210 _cpp_get_fresh_line (cpp_reader *pfile)
2211 {
2212   int return_at_eof;
2213
2214   /* We can't get a new line until we leave the current directive.  */
2215   if (pfile->state.in_directive)
2216     return false;
2217
2218   for (;;)
2219     {
2220       cpp_buffer *buffer = pfile->buffer;
2221
2222       if (!buffer->need_line)
2223         return true;
2224
2225       if (buffer->next_line < buffer->rlimit)
2226         {
2227           _cpp_clean_line (pfile);
2228           return true;
2229         }
2230
2231       /* First, get out of parsing arguments state.  */
2232       if (pfile->state.parsing_args)
2233         return false;
2234
2235       /* End of buffer.  Non-empty files should end in a newline.  */
2236       if (buffer->buf != buffer->rlimit
2237           && buffer->next_line > buffer->rlimit
2238           && !buffer->from_stage3)
2239         {
2240           /* Clip to buffer size.  */
2241           buffer->next_line = buffer->rlimit;
2242         }
2243
2244       return_at_eof = buffer->return_at_eof;
2245       _cpp_pop_buffer (pfile);
2246       if (pfile->buffer == NULL || return_at_eof)
2247         return false;
2248     }
2249 }
2250
2251 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)          \
2252   do                                                    \
2253     {                                                   \
2254       result->type = ELSE_TYPE;                         \
2255       if (*buffer->cur == CHAR)                         \
2256         buffer->cur++, result->type = THEN_TYPE;        \
2257     }                                                   \
2258   while (0)
2259
2260 /* Lex a token into pfile->cur_token, which is also incremented, to
2261    get diagnostics pointing to the correct location.
2262
2263    Does not handle issues such as token lookahead, multiple-include
2264    optimization, directives, skipping etc.  This function is only
2265    suitable for use by _cpp_lex_token, and in special cases like
2266    lex_expansion_token which doesn't care for any of these issues.
2267
2268    When meeting a newline, returns CPP_EOF if parsing a directive,
2269    otherwise returns to the start of the token buffer if permissible.
2270    Returns the location of the lexed token.  */
2271 cpp_token *
2272 _cpp_lex_direct (cpp_reader *pfile)
2273 {
2274   cppchar_t c;
2275   cpp_buffer *buffer;
2276   const unsigned char *comment_start;
2277   cpp_token *result = pfile->cur_token++;
2278
2279  fresh_line:
2280   result->flags = 0;
2281   buffer = pfile->buffer;
2282   if (buffer->need_line)
2283     {
2284       if (pfile->state.in_deferred_pragma)
2285         {
2286           result->type = CPP_PRAGMA_EOL;
2287           pfile->state.in_deferred_pragma = false;
2288           if (!pfile->state.pragma_allow_expansion)
2289             pfile->state.prevent_expansion--;
2290           return result;
2291         }
2292       if (!_cpp_get_fresh_line (pfile))
2293         {
2294           result->type = CPP_EOF;
2295           if (!pfile->state.in_directive)
2296             {
2297               /* Tell the compiler the line number of the EOF token.  */
2298               result->src_loc = pfile->line_table->highest_line;
2299               result->flags = BOL;
2300             }
2301           return result;
2302         }
2303       if (!pfile->keep_tokens)
2304         {
2305           pfile->cur_run = &pfile->base_run;
2306           result = pfile->base_run.base;
2307           pfile->cur_token = result + 1;
2308         }
2309       result->flags = BOL;
2310       if (pfile->state.parsing_args == 2)
2311         result->flags |= PREV_WHITE;
2312     }
2313   buffer = pfile->buffer;
2314  update_tokens_line:
2315   result->src_loc = pfile->line_table->highest_line;
2316
2317  skipped_white:
2318   if (buffer->cur >= buffer->notes[buffer->cur_note].pos
2319       && !pfile->overlaid_buffer)
2320     {
2321       _cpp_process_line_notes (pfile, false);
2322       result->src_loc = pfile->line_table->highest_line;
2323     }
2324   c = *buffer->cur++;
2325
2326   if (pfile->forced_token_location_p)
2327     result->src_loc = *pfile->forced_token_location_p;
2328   else
2329     result->src_loc = linemap_position_for_column (pfile->line_table,
2330                                           CPP_BUF_COLUMN (buffer, buffer->cur));
2331
2332   switch (c)
2333     {
2334     case ' ': case '\t': case '\f': case '\v': case '\0':
2335       result->flags |= PREV_WHITE;
2336       skip_whitespace (pfile, c);
2337       goto skipped_white;
2338
2339     case '\n':
2340       if (buffer->cur < buffer->rlimit)
2341         CPP_INCREMENT_LINE (pfile, 0);
2342       buffer->need_line = true;
2343       goto fresh_line;
2344
2345     case '0': case '1': case '2': case '3': case '4':
2346     case '5': case '6': case '7': case '8': case '9':
2347       {
2348         struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2349         result->type = CPP_NUMBER;
2350         lex_number (pfile, &result->val.str, &nst);
2351         warn_about_normalization (pfile, result, &nst);
2352         break;
2353       }
2354
2355     case 'L':
2356     case 'u':
2357     case 'U':
2358     case 'R':
2359       /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
2360          wide strings or raw strings.  */
2361       if (c == 'L' || CPP_OPTION (pfile, rliterals)
2362           || (c != 'R' && CPP_OPTION (pfile, uliterals)))
2363         {
2364           if ((*buffer->cur == '\'' && c != 'R')
2365               || *buffer->cur == '"'
2366               || (*buffer->cur == 'R'
2367                   && c != 'R'
2368                   && buffer->cur[1] == '"'
2369                   && CPP_OPTION (pfile, rliterals))
2370               || (*buffer->cur == '8'
2371                   && c == 'u'
2372                   && (buffer->cur[1] == '"'
2373                       || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'
2374                           && CPP_OPTION (pfile, rliterals)))))
2375             {
2376               lex_string (pfile, result, buffer->cur - 1);
2377               break;
2378             }
2379         }
2380       /* Fall through.  */
2381
2382     case '_':
2383     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2384     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2385     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2386     case 's': case 't':           case 'v': case 'w': case 'x':
2387     case 'y': case 'z':
2388     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2389     case 'G': case 'H': case 'I': case 'J': case 'K':
2390     case 'M': case 'N': case 'O': case 'P': case 'Q':
2391     case 'S': case 'T':           case 'V': case 'W': case 'X':
2392     case 'Y': case 'Z':
2393       result->type = CPP_NAME;
2394       {
2395         struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2396         result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
2397                                                 &nst,
2398                                                 &result->val.node.spelling);
2399         warn_about_normalization (pfile, result, &nst);
2400       }
2401
2402       /* Convert named operators to their proper types.  */
2403       if (result->val.node.node->flags & NODE_OPERATOR)
2404         {
2405           result->flags |= NAMED_OP;
2406           result->type = (enum cpp_ttype) result->val.node.node->directive_index;
2407         }
2408       break;
2409
2410     case '\'':
2411     case '"':
2412       lex_string (pfile, result, buffer->cur - 1);
2413       break;
2414
2415     case '/':
2416       /* A potential block or line comment.  */
2417       comment_start = buffer->cur;
2418       c = *buffer->cur;
2419
2420       if (c == '*')
2421         {
2422           if (_cpp_skip_block_comment (pfile))
2423             cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
2424         }
2425       else if (c == '/' && ! CPP_OPTION (pfile, traditional))
2426         {
2427           /* Don't warn for system headers.  */
2428           if (cpp_in_system_header (pfile))
2429             ;
2430           /* Warn about comments if pedantically GNUC89, and not
2431              in system headers.  */
2432           else if (CPP_OPTION (pfile, lang) == CLK_GNUC89
2433                    && CPP_PEDANTIC (pfile)
2434                    && ! buffer->warned_cplusplus_comments)
2435             {
2436               cpp_error (pfile, CPP_DL_PEDWARN,
2437                          "C++ style comments are not allowed in ISO C90");
2438               cpp_error (pfile, CPP_DL_PEDWARN,
2439                          "(this will be reported only once per input file)");
2440               buffer->warned_cplusplus_comments = 1;
2441             }
2442           /* Or if specifically desired via -Wc90-c99-compat.  */
2443           else if (CPP_OPTION (pfile, cpp_warn_c90_c99_compat) > 0
2444                    && ! CPP_OPTION (pfile, cplusplus)
2445                    && ! buffer->warned_cplusplus_comments)
2446             {
2447               cpp_error (pfile, CPP_DL_WARNING,
2448                          "C++ style comments are incompatible with C90");
2449               cpp_error (pfile, CPP_DL_WARNING,
2450                          "(this will be reported only once per input file)");
2451               buffer->warned_cplusplus_comments = 1;
2452             }
2453           /* In C89/C94, C++ style comments are forbidden.  */
2454           else if ((CPP_OPTION (pfile, lang) == CLK_STDC89
2455                     || CPP_OPTION (pfile, lang) == CLK_STDC94))
2456             {
2457               /* But don't be confused about valid code such as
2458                  - // immediately followed by *,
2459                  - // in a preprocessing directive,
2460                  - // in an #if 0 block.  */
2461               if (buffer->cur[1] == '*'
2462                   || pfile->state.in_directive
2463                   || pfile->state.skipping)
2464                 {
2465                   result->type = CPP_DIV;
2466                   break;
2467                 }
2468               else if (! buffer->warned_cplusplus_comments)
2469                 {
2470                   cpp_error (pfile, CPP_DL_ERROR,
2471                              "C++ style comments are not allowed in ISO C90");
2472                   cpp_error (pfile, CPP_DL_ERROR,
2473                              "(this will be reported only once per input "
2474                              "file)");
2475                   buffer->warned_cplusplus_comments = 1;
2476                 }
2477             }
2478           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
2479             cpp_warning (pfile, CPP_W_COMMENTS, "multi-line comment");
2480         }
2481       else if (c == '=')
2482         {
2483           buffer->cur++;
2484           result->type = CPP_DIV_EQ;
2485           break;
2486         }
2487       else
2488         {
2489           result->type = CPP_DIV;
2490           break;
2491         }
2492
2493       if (!pfile->state.save_comments)
2494         {
2495           result->flags |= PREV_WHITE;
2496           goto update_tokens_line;
2497         }
2498
2499       /* Save the comment as a token in its own right.  */
2500       save_comment (pfile, result, comment_start, c);
2501       break;
2502
2503     case '<':
2504       if (pfile->state.angled_headers)
2505         {
2506           lex_string (pfile, result, buffer->cur - 1);
2507           if (result->type != CPP_LESS)
2508             break;
2509         }
2510
2511       result->type = CPP_LESS;
2512       if (*buffer->cur == '=')
2513         buffer->cur++, result->type = CPP_LESS_EQ;
2514       else if (*buffer->cur == '<')
2515         {
2516           buffer->cur++;
2517           IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
2518         }
2519       else if (CPP_OPTION (pfile, digraphs))
2520         {
2521           if (*buffer->cur == ':')
2522             {
2523               /* C++11 [2.5/3 lex.pptoken], "Otherwise, if the next
2524                  three characters are <:: and the subsequent character
2525                  is neither : nor >, the < is treated as a preprocessor
2526                  token by itself".  */
2527               if (CPP_OPTION (pfile, cplusplus)
2528                   && CPP_OPTION (pfile, lang) != CLK_CXX98
2529                   && CPP_OPTION (pfile, lang) != CLK_GNUCXX
2530                   && buffer->cur[1] == ':'
2531                   && buffer->cur[2] != ':' && buffer->cur[2] != '>')
2532                 break;
2533
2534               buffer->cur++;
2535               result->flags |= DIGRAPH;
2536               result->type = CPP_OPEN_SQUARE;
2537             }
2538           else if (*buffer->cur == '%')
2539             {
2540               buffer->cur++;
2541               result->flags |= DIGRAPH;
2542               result->type = CPP_OPEN_BRACE;
2543             }
2544         }
2545       break;
2546
2547     case '>':
2548       result->type = CPP_GREATER;
2549       if (*buffer->cur == '=')
2550         buffer->cur++, result->type = CPP_GREATER_EQ;
2551       else if (*buffer->cur == '>')
2552         {
2553           buffer->cur++;
2554           IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
2555         }
2556       break;
2557
2558     case '%':
2559       result->type = CPP_MOD;
2560       if (*buffer->cur == '=')
2561         buffer->cur++, result->type = CPP_MOD_EQ;
2562       else if (CPP_OPTION (pfile, digraphs))
2563         {
2564           if (*buffer->cur == ':')
2565             {
2566               buffer->cur++;
2567               result->flags |= DIGRAPH;
2568               result->type = CPP_HASH;
2569               if (*buffer->cur == '%' && buffer->cur[1] == ':')
2570                 buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0;
2571             }
2572           else if (*buffer->cur == '>')
2573             {
2574               buffer->cur++;
2575               result->flags |= DIGRAPH;
2576               result->type = CPP_CLOSE_BRACE;
2577             }
2578         }
2579       break;
2580
2581     case '.':
2582       result->type = CPP_DOT;
2583       if (ISDIGIT (*buffer->cur))
2584         {
2585           struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2586           result->type = CPP_NUMBER;
2587           lex_number (pfile, &result->val.str, &nst);
2588           warn_about_normalization (pfile, result, &nst);
2589         }
2590       else if (*buffer->cur == '.' && buffer->cur[1] == '.')
2591         buffer->cur += 2, result->type = CPP_ELLIPSIS;
2592       else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
2593         buffer->cur++, result->type = CPP_DOT_STAR;
2594       break;
2595
2596     case '+':
2597       result->type = CPP_PLUS;
2598       if (*buffer->cur == '+')
2599         buffer->cur++, result->type = CPP_PLUS_PLUS;
2600       else if (*buffer->cur == '=')
2601         buffer->cur++, result->type = CPP_PLUS_EQ;
2602       break;
2603
2604     case '-':
2605       result->type = CPP_MINUS;
2606       if (*buffer->cur == '>')
2607         {
2608           buffer->cur++;
2609           result->type = CPP_DEREF;
2610           if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
2611             buffer->cur++, result->type = CPP_DEREF_STAR;
2612         }
2613       else if (*buffer->cur == '-')
2614         buffer->cur++, result->type = CPP_MINUS_MINUS;
2615       else if (*buffer->cur == '=')
2616         buffer->cur++, result->type = CPP_MINUS_EQ;
2617       break;
2618
2619     case '&':
2620       result->type = CPP_AND;
2621       if (*buffer->cur == '&')
2622         buffer->cur++, result->type = CPP_AND_AND;
2623       else if (*buffer->cur == '=')
2624         buffer->cur++, result->type = CPP_AND_EQ;
2625       break;
2626
2627     case '|':
2628       result->type = CPP_OR;
2629       if (*buffer->cur == '|')
2630         buffer->cur++, result->type = CPP_OR_OR;
2631       else if (*buffer->cur == '=')
2632         buffer->cur++, result->type = CPP_OR_EQ;
2633       break;
2634
2635     case ':':
2636       result->type = CPP_COLON;
2637       if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
2638         buffer->cur++, result->type = CPP_SCOPE;
2639       else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
2640         {
2641           buffer->cur++;
2642           result->flags |= DIGRAPH;
2643           result->type = CPP_CLOSE_SQUARE;
2644         }
2645       break;
2646
2647     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
2648     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
2649     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
2650     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
2651     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.token_no = 0; break;
2652
2653     case '?': result->type = CPP_QUERY; break;
2654     case '~': result->type = CPP_COMPL; break;
2655     case ',': result->type = CPP_COMMA; break;
2656     case '(': result->type = CPP_OPEN_PAREN; break;
2657     case ')': result->type = CPP_CLOSE_PAREN; break;
2658     case '[': result->type = CPP_OPEN_SQUARE; break;
2659     case ']': result->type = CPP_CLOSE_SQUARE; break;
2660     case '{': result->type = CPP_OPEN_BRACE; break;
2661     case '}': result->type = CPP_CLOSE_BRACE; break;
2662     case ';': result->type = CPP_SEMICOLON; break;
2663
2664       /* @ is a punctuator in Objective-C.  */
2665     case '@': result->type = CPP_ATSIGN; break;
2666
2667     case '$':
2668     case '\\':
2669       {
2670         const uchar *base = --buffer->cur;
2671         struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2672
2673         if (forms_identifier_p (pfile, true, &nst))
2674           {
2675             result->type = CPP_NAME;
2676             result->val.node.node = lex_identifier (pfile, base, true, &nst,
2677                                                     &result->val.node.spelling);
2678             warn_about_normalization (pfile, result, &nst);
2679             break;
2680           }
2681         buffer->cur++;
2682       }
2683
2684     default:
2685       create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
2686       break;
2687     }
2688
2689   return result;
2690 }
2691
2692 /* An upper bound on the number of bytes needed to spell TOKEN.
2693    Does not include preceding whitespace.  */
2694 unsigned int
2695 cpp_token_len (const cpp_token *token)
2696 {
2697   unsigned int len;
2698
2699   switch (TOKEN_SPELL (token))
2700     {
2701     default:            len = 6;                                break;
2702     case SPELL_LITERAL: len = token->val.str.len;               break;
2703     case SPELL_IDENT:   len = NODE_LEN (token->val.node.node) * 10;     break;
2704     }
2705
2706   return len;
2707 }
2708
2709 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
2710    Return the number of bytes read out of NAME.  (There are always
2711    10 bytes written to BUFFER.)  */
2712
2713 static size_t
2714 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
2715 {
2716   int j;
2717   int ucn_len = 0;
2718   int ucn_len_c;
2719   unsigned t;
2720   unsigned long utf32;
2721
2722   /* Compute the length of the UTF-8 sequence.  */
2723   for (t = *name; t & 0x80; t <<= 1)
2724     ucn_len++;
2725
2726   utf32 = *name & (0x7F >> ucn_len);
2727   for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
2728     {
2729       utf32 = (utf32 << 6) | (*++name & 0x3F);
2730
2731       /* Ill-formed UTF-8.  */
2732       if ((*name & ~0x3F) != 0x80)
2733         abort ();
2734     }
2735
2736   *buffer++ = '\\';
2737   *buffer++ = 'U';
2738   for (j = 7; j >= 0; j--)
2739     *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
2740   return ucn_len;
2741 }
2742
2743 /* Given a token TYPE corresponding to a digraph, return a pointer to
2744    the spelling of the digraph.  */
2745 static const unsigned char *
2746 cpp_digraph2name (enum cpp_ttype type)
2747 {
2748   return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
2749 }
2750
2751 /* Write the spelling of an identifier IDENT, using UCNs, to BUFFER.
2752    The buffer must already contain the enough space to hold the
2753    token's spelling.  Returns a pointer to the character after the
2754    last character written.  */
2755 unsigned char *
2756 _cpp_spell_ident_ucns (unsigned char *buffer, cpp_hashnode *ident)
2757 {
2758   size_t i;
2759   const unsigned char *name = NODE_NAME (ident);
2760
2761   for (i = 0; i < NODE_LEN (ident); i++)
2762     if (name[i] & ~0x7F)
2763       {
2764         i += utf8_to_ucn (buffer, name + i) - 1;
2765         buffer += 10;
2766       }
2767     else
2768       *buffer++ = name[i];
2769
2770   return buffer;
2771 }
2772
2773 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
2774    already contain the enough space to hold the token's spelling.
2775    Returns a pointer to the character after the last character written.
2776    FORSTRING is true if this is to be the spelling after translation
2777    phase 1 (with the original spelling of extended identifiers), false
2778    if extended identifiers should always be written using UCNs (there is
2779    no option for always writing them in the internal UTF-8 form).
2780    FIXME: Would be nice if we didn't need the PFILE argument.  */
2781 unsigned char *
2782 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
2783                  unsigned char *buffer, bool forstring)
2784 {
2785   switch (TOKEN_SPELL (token))
2786     {
2787     case SPELL_OPERATOR:
2788       {
2789         const unsigned char *spelling;
2790         unsigned char c;
2791
2792         if (token->flags & DIGRAPH)
2793           spelling = cpp_digraph2name (token->type);
2794         else if (token->flags & NAMED_OP)
2795           goto spell_ident;
2796         else
2797           spelling = TOKEN_NAME (token);
2798
2799         while ((c = *spelling++) != '\0')
2800           *buffer++ = c;
2801       }
2802       break;
2803
2804     spell_ident:
2805     case SPELL_IDENT:
2806       if (forstring)
2807         {
2808           memcpy (buffer, NODE_NAME (token->val.node.spelling),
2809                   NODE_LEN (token->val.node.spelling));
2810           buffer += NODE_LEN (token->val.node.spelling);
2811         }
2812       else
2813         buffer = _cpp_spell_ident_ucns (buffer, token->val.node.node);
2814       break;
2815
2816     case SPELL_LITERAL:
2817       memcpy (buffer, token->val.str.text, token->val.str.len);
2818       buffer += token->val.str.len;
2819       break;
2820
2821     case SPELL_NONE:
2822       cpp_error (pfile, CPP_DL_ICE,
2823                  "unspellable token %s", TOKEN_NAME (token));
2824       break;
2825     }
2826
2827   return buffer;
2828 }
2829
2830 /* Returns TOKEN spelt as a null-terminated string.  The string is
2831    freed when the reader is destroyed.  Useful for diagnostics.  */
2832 unsigned char *
2833 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
2834 {
2835   unsigned int len = cpp_token_len (token) + 1;
2836   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
2837
2838   end = cpp_spell_token (pfile, token, start, false);
2839   end[0] = '\0';
2840
2841   return start;
2842 }
2843
2844 /* Returns a pointer to a string which spells the token defined by
2845    TYPE and FLAGS.  Used by C front ends, which really should move to
2846    using cpp_token_as_text.  */
2847 const char *
2848 cpp_type2name (enum cpp_ttype type, unsigned char flags)
2849 {
2850   if (flags & DIGRAPH)
2851     return (const char *) cpp_digraph2name (type);
2852   else if (flags & NAMED_OP)
2853     return cpp_named_operator2name (type);
2854
2855   return (const char *) token_spellings[type].name;
2856 }
2857
2858 /* Writes the spelling of token to FP, without any preceding space.
2859    Separated from cpp_spell_token for efficiency - to avoid stdio
2860    double-buffering.  */
2861 void
2862 cpp_output_token (const cpp_token *token, FILE *fp)
2863 {
2864   switch (TOKEN_SPELL (token))
2865     {
2866     case SPELL_OPERATOR:
2867       {
2868         const unsigned char *spelling;
2869         int c;
2870
2871         if (token->flags & DIGRAPH)
2872           spelling = cpp_digraph2name (token->type);
2873         else if (token->flags & NAMED_OP)
2874           goto spell_ident;
2875         else
2876           spelling = TOKEN_NAME (token);
2877
2878         c = *spelling;
2879         do
2880           putc (c, fp);
2881         while ((c = *++spelling) != '\0');
2882       }
2883       break;
2884
2885     spell_ident:
2886     case SPELL_IDENT:
2887       {
2888         size_t i;
2889         const unsigned char * name = NODE_NAME (token->val.node.node);
2890
2891         for (i = 0; i < NODE_LEN (token->val.node.node); i++)
2892           if (name[i] & ~0x7F)
2893             {
2894               unsigned char buffer[10];
2895               i += utf8_to_ucn (buffer, name + i) - 1;
2896               fwrite (buffer, 1, 10, fp);
2897             }
2898           else
2899             fputc (NODE_NAME (token->val.node.node)[i], fp);
2900       }
2901       break;
2902
2903     case SPELL_LITERAL:
2904       fwrite (token->val.str.text, 1, token->val.str.len, fp);
2905       break;
2906
2907     case SPELL_NONE:
2908       /* An error, most probably.  */
2909       break;
2910     }
2911 }
2912
2913 /* Compare two tokens.  */
2914 int
2915 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
2916 {
2917   if (a->type == b->type && a->flags == b->flags)
2918     switch (TOKEN_SPELL (a))
2919       {
2920       default:                  /* Keep compiler happy.  */
2921       case SPELL_OPERATOR:
2922         /* token_no is used to track where multiple consecutive ##
2923            tokens were originally located.  */
2924         return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
2925       case SPELL_NONE:
2926         return (a->type != CPP_MACRO_ARG
2927                 || (a->val.macro_arg.arg_no == b->val.macro_arg.arg_no
2928                     && a->val.macro_arg.spelling == b->val.macro_arg.spelling));
2929       case SPELL_IDENT:
2930         return (a->val.node.node == b->val.node.node
2931                 && a->val.node.spelling == b->val.node.spelling);
2932       case SPELL_LITERAL:
2933         return (a->val.str.len == b->val.str.len
2934                 && !memcmp (a->val.str.text, b->val.str.text,
2935                             a->val.str.len));
2936       }
2937
2938   return 0;
2939 }
2940
2941 /* Returns nonzero if a space should be inserted to avoid an
2942    accidental token paste for output.  For simplicity, it is
2943    conservative, and occasionally advises a space where one is not
2944    needed, e.g. "." and ".2".  */
2945 int
2946 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
2947                  const cpp_token *token2)
2948 {
2949   enum cpp_ttype a = token1->type, b = token2->type;
2950   cppchar_t c;
2951
2952   if (token1->flags & NAMED_OP)
2953     a = CPP_NAME;
2954   if (token2->flags & NAMED_OP)
2955     b = CPP_NAME;
2956
2957   c = EOF;
2958   if (token2->flags & DIGRAPH)
2959     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
2960   else if (token_spellings[b].category == SPELL_OPERATOR)
2961     c = token_spellings[b].name[0];
2962
2963   /* Quickly get everything that can paste with an '='.  */
2964   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
2965     return 1;
2966
2967   switch (a)
2968     {
2969     case CPP_GREATER:   return c == '>';
2970     case CPP_LESS:      return c == '<' || c == '%' || c == ':';
2971     case CPP_PLUS:      return c == '+';
2972     case CPP_MINUS:     return c == '-' || c == '>';
2973     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
2974     case CPP_MOD:       return c == ':' || c == '>';
2975     case CPP_AND:       return c == '&';
2976     case CPP_OR:        return c == '|';
2977     case CPP_COLON:     return c == ':' || c == '>';
2978     case CPP_DEREF:     return c == '*';
2979     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
2980     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
2981     case CPP_NAME:      return ((b == CPP_NUMBER
2982                                  && name_p (pfile, &token2->val.str))
2983                                 || b == CPP_NAME
2984                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
2985     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
2986                                 || c == '.' || c == '+' || c == '-');
2987                                       /* UCNs */
2988     case CPP_OTHER:     return ((token1->val.str.text[0] == '\\'
2989                                  && b == CPP_NAME)
2990                                 || (CPP_OPTION (pfile, objc)
2991                                     && token1->val.str.text[0] == '@'
2992                                     && (b == CPP_NAME || b == CPP_STRING)));
2993     case CPP_STRING:
2994     case CPP_WSTRING:
2995     case CPP_UTF8STRING:
2996     case CPP_STRING16:
2997     case CPP_STRING32:  return (CPP_OPTION (pfile, user_literals)
2998                                 && (b == CPP_NAME
2999                                     || (TOKEN_SPELL (token2) == SPELL_LITERAL
3000                                         && ISIDST (token2->val.str.text[0]))));
3001
3002     default:            break;
3003     }
3004
3005   return 0;
3006 }
3007
3008 /* Output all the remaining tokens on the current line, and a newline
3009    character, to FP.  Leading whitespace is removed.  If there are
3010    macros, special token padding is not performed.  */
3011 void
3012 cpp_output_line (cpp_reader *pfile, FILE *fp)
3013 {
3014   const cpp_token *token;
3015
3016   token = cpp_get_token (pfile);
3017   while (token->type != CPP_EOF)
3018     {
3019       cpp_output_token (token, fp);
3020       token = cpp_get_token (pfile);
3021       if (token->flags & PREV_WHITE)
3022         putc (' ', fp);
3023     }
3024
3025   putc ('\n', fp);
3026 }
3027
3028 /* Return a string representation of all the remaining tokens on the
3029    current line.  The result is allocated using xmalloc and must be
3030    freed by the caller.  */
3031 unsigned char *
3032 cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
3033 {
3034   const cpp_token *token;
3035   unsigned int out = dir_name ? ustrlen (dir_name) : 0;
3036   unsigned int alloced = 120 + out;
3037   unsigned char *result = (unsigned char *) xmalloc (alloced);
3038
3039   /* If DIR_NAME is empty, there are no initial contents.  */
3040   if (dir_name)
3041     {
3042       sprintf ((char *) result, "#%s ", dir_name);
3043       out += 2;
3044     }
3045
3046   token = cpp_get_token (pfile);
3047   while (token->type != CPP_EOF)
3048     {
3049       unsigned char *last;
3050       /* Include room for a possible space and the terminating nul.  */
3051       unsigned int len = cpp_token_len (token) + 2;
3052
3053       if (out + len > alloced)
3054         {
3055           alloced *= 2;
3056           if (out + len > alloced)
3057             alloced = out + len;
3058           result = (unsigned char *) xrealloc (result, alloced);
3059         }
3060
3061       last = cpp_spell_token (pfile, token, &result[out], 0);
3062       out = last - result;
3063
3064       token = cpp_get_token (pfile);
3065       if (token->flags & PREV_WHITE)
3066         result[out++] = ' ';
3067     }
3068
3069   result[out] = '\0';
3070   return result;
3071 }
3072
3073 /* Memory buffers.  Changing these three constants can have a dramatic
3074    effect on performance.  The values here are reasonable defaults,
3075    but might be tuned.  If you adjust them, be sure to test across a
3076    range of uses of cpplib, including heavy nested function-like macro
3077    expansion.  Also check the change in peak memory usage (NJAMD is a
3078    good tool for this).  */
3079 #define MIN_BUFF_SIZE 8000
3080 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
3081 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
3082         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
3083
3084 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
3085   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
3086 #endif
3087
3088 /* Create a new allocation buffer.  Place the control block at the end
3089    of the buffer, so that buffer overflows will cause immediate chaos.  */
3090 static _cpp_buff *
3091 new_buff (size_t len)
3092 {
3093   _cpp_buff *result;
3094   unsigned char *base;
3095
3096   if (len < MIN_BUFF_SIZE)
3097     len = MIN_BUFF_SIZE;
3098   len = CPP_ALIGN (len);
3099
3100 #ifdef ENABLE_VALGRIND_CHECKING
3101   /* Valgrind warns about uses of interior pointers, so put _cpp_buff
3102      struct first.  */
3103   size_t slen = CPP_ALIGN2 (sizeof (_cpp_buff), 2 * DEFAULT_ALIGNMENT);
3104   base = XNEWVEC (unsigned char, len + slen);
3105   result = (_cpp_buff *) base;
3106   base += slen;
3107 #else
3108   base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
3109   result = (_cpp_buff *) (base + len);
3110 #endif
3111   result->base = base;
3112   result->cur = base;
3113   result->limit = base + len;
3114   result->next = NULL;
3115   return result;
3116 }
3117
3118 /* Place a chain of unwanted allocation buffers on the free list.  */
3119 void
3120 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
3121 {
3122   _cpp_buff *end = buff;
3123
3124   while (end->next)
3125     end = end->next;
3126   end->next = pfile->free_buffs;
3127   pfile->free_buffs = buff;
3128 }
3129
3130 /* Return a free buffer of size at least MIN_SIZE.  */
3131 _cpp_buff *
3132 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
3133 {
3134   _cpp_buff *result, **p;
3135
3136   for (p = &pfile->free_buffs;; p = &(*p)->next)
3137     {
3138       size_t size;
3139
3140       if (*p == NULL)
3141         return new_buff (min_size);
3142       result = *p;
3143       size = result->limit - result->base;
3144       /* Return a buffer that's big enough, but don't waste one that's
3145          way too big.  */
3146       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
3147         break;
3148     }
3149
3150   *p = result->next;
3151   result->next = NULL;
3152   result->cur = result->base;
3153   return result;
3154 }
3155
3156 /* Creates a new buffer with enough space to hold the uncommitted
3157    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
3158    the excess bytes to the new buffer.  Chains the new buffer after
3159    BUFF, and returns the new buffer.  */
3160 _cpp_buff *
3161 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
3162 {
3163   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
3164   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
3165
3166   buff->next = new_buff;
3167   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
3168   return new_buff;
3169 }
3170
3171 /* Creates a new buffer with enough space to hold the uncommitted
3172    remaining bytes of the buffer pointed to by BUFF, and at least
3173    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
3174    Chains the new buffer before the buffer pointed to by BUFF, and
3175    updates the pointer to point to the new buffer.  */
3176 void
3177 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
3178 {
3179   _cpp_buff *new_buff, *old_buff = *pbuff;
3180   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
3181
3182   new_buff = _cpp_get_buff (pfile, size);
3183   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
3184   new_buff->next = old_buff;
3185   *pbuff = new_buff;
3186 }
3187
3188 /* Free a chain of buffers starting at BUFF.  */
3189 void
3190 _cpp_free_buff (_cpp_buff *buff)
3191 {
3192   _cpp_buff *next;
3193
3194   for (; buff; buff = next)
3195     {
3196       next = buff->next;
3197 #ifdef ENABLE_VALGRIND_CHECKING
3198       free (buff);
3199 #else
3200       free (buff->base);
3201 #endif
3202     }
3203 }
3204
3205 /* Allocate permanent, unaligned storage of length LEN.  */
3206 unsigned char *
3207 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
3208 {
3209   _cpp_buff *buff = pfile->u_buff;
3210   unsigned char *result = buff->cur;
3211
3212   if (len > (size_t) (buff->limit - result))
3213     {
3214       buff = _cpp_get_buff (pfile, len);
3215       buff->next = pfile->u_buff;
3216       pfile->u_buff = buff;
3217       result = buff->cur;
3218     }
3219
3220   buff->cur = result + len;
3221   return result;
3222 }
3223
3224 /* Allocate permanent, unaligned storage of length LEN from a_buff.
3225    That buffer is used for growing allocations when saving macro
3226    replacement lists in a #define, and when parsing an answer to an
3227    assertion in #assert, #unassert or #if (and therefore possibly
3228    whilst expanding macros).  It therefore must not be used by any
3229    code that they might call: specifically the lexer and the guts of
3230    the macro expander.
3231
3232    All existing other uses clearly fit this restriction: storing
3233    registered pragmas during initialization.  */
3234 unsigned char *
3235 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
3236 {
3237   _cpp_buff *buff = pfile->a_buff;
3238   unsigned char *result = buff->cur;
3239
3240   if (len > (size_t) (buff->limit - result))
3241     {
3242       buff = _cpp_get_buff (pfile, len);
3243       buff->next = pfile->a_buff;
3244       pfile->a_buff = buff;
3245       result = buff->cur;
3246     }
3247
3248   buff->cur = result + len;
3249   return result;
3250 }
3251
3252 /* Say which field of TOK is in use.  */
3253
3254 enum cpp_token_fld_kind
3255 cpp_token_val_index (const cpp_token *tok)
3256 {
3257   switch (TOKEN_SPELL (tok))
3258     {
3259     case SPELL_IDENT:
3260       return CPP_TOKEN_FLD_NODE;
3261     case SPELL_LITERAL:
3262       return CPP_TOKEN_FLD_STR;
3263     case SPELL_OPERATOR:
3264       if (tok->type == CPP_PASTE)
3265         return CPP_TOKEN_FLD_TOKEN_NO;
3266       else
3267         return CPP_TOKEN_FLD_NONE;
3268     case SPELL_NONE:
3269       if (tok->type == CPP_MACRO_ARG)
3270         return CPP_TOKEN_FLD_ARG_NO;
3271       else if (tok->type == CPP_PADDING)
3272         return CPP_TOKEN_FLD_SOURCE;
3273       else if (tok->type == CPP_PRAGMA)
3274         return CPP_TOKEN_FLD_PRAGMA;
3275       /* else fall through */
3276     default:
3277       return CPP_TOKEN_FLD_NONE;
3278     }
3279 }
3280
3281 /* All tokens lexed in R after calling this function will be forced to have
3282    their source_location the same as the location referenced by P, until
3283    cpp_stop_forcing_token_locations is called for R.  */
3284
3285 void
3286 cpp_force_token_locations (cpp_reader *r, source_location *p)
3287 {
3288   r->forced_token_location_p = p;
3289 }
3290
3291 /* Go back to assigning locations naturally for lexed tokens.  */
3292
3293 void
3294 cpp_stop_forcing_token_locations (cpp_reader *r)
3295 {
3296   r->forced_token_location_p = NULL;
3297 }