libcpp/lex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000-2015 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7
   8 This program is free software; you can redistribute it and/or modify it
   9 under the terms of the GNU General Public License as published by the
  10 Free Software Foundation; either version 3, or (at your option) any
  11 later version.
  12
  13 This program is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with this program; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "cpplib.h"
  25 #include "internal.h"
  26
  27 enum spell_type
  28 {
  29   SPELL_OPERATOR = 0,
  30   SPELL_IDENT,
  31   SPELL_LITERAL,
  32   SPELL_NONE
  33 };
  34
  35 struct token_spelling
  36 {
  37   enum spell_type category;
  38   const unsigned char *name;
  39 };
  40
  41 static const unsigned char *const digraph_spellings[] =
  42 { UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
  43
  44 #define OP(e, s) { SPELL_OPERATOR, UC s  },
  45 #define TK(e, s) { SPELL_ ## s,    UC #e },
  46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  47 #undef OP
  48 #undef TK
  49
  50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  52
  53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
  54 static int skip_line_comment (cpp_reader *);
  55 static void skip_whitespace (cpp_reader *, cppchar_t);
  56 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
  57 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
  58 static void store_comment (cpp_reader *, cpp_token *);
  59 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
  60                             unsigned int, enum cpp_ttype);
  61 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
  62 static int name_p (cpp_reader *, const cpp_string *);
  63 static tokenrun *next_tokenrun (tokenrun *);
  64
  65 static _cpp_buff *new_buff (size_t);
  66
  67
  68 /* Utility routine:
  69
  70    Compares, the token TOKEN to the NUL-terminated string STRING.
  71    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  72 int
  73 cpp_ideq (const cpp_token *token, const char *string)
  74 {
  75   if (token->type != CPP_NAME)
  76     return 0;
  77
  78   return !ustrcmp (NODE_NAME (token->val.node.node), (const uchar *) string);
  79 }
  80
  81 /* Record a note TYPE at byte POS into the current cleaned logical
  82    line.  */
  83 static void
  84 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
  85 {
  86   if (buffer->notes_used == buffer->notes_cap)
  87     {
  88       buffer->notes_cap = buffer->notes_cap * 2 + 200;
  89       buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
  90                                   buffer->notes_cap);
  91     }
  92
  93   buffer->notes[buffer->notes_used].pos = pos;
  94   buffer->notes[buffer->notes_used].type = type;
  95   buffer->notes_used++;
  96 }
  97
  98 \f
  99 /* Fast path to find line special characters using optimized character
 100    scanning algorithms.  Anything complicated falls back to the slow
 101    path below.  Since this loop is very hot it's worth doing these kinds
 102    of optimizations.
 103
 104    One of the paths through the ifdefs should provide
 105
 106      const uchar *search_line_fast (const uchar *s, const uchar *end);
 107
 108    Between S and END, search for \n, \r, \\, ?.  Return a pointer to
 109    the found character.
 110
 111    Note that the last character of the buffer is *always* a newline,
 112    as forced by _cpp_convert_input.  This fact can be used to avoid
 113    explicitly looking for the end of the buffer.  */
 114
 115 /* Configure gives us an ifdef test.  */
 116 #ifndef WORDS_BIGENDIAN
 117 #define WORDS_BIGENDIAN 0
 118 #endif
 119
 120 /* We'd like the largest integer that fits into a register.  There's nothing
 121    in <stdint.h> that gives us that.  For most hosts this is unsigned long,
 122    but MS decided on an LLP64 model.  Thankfully when building with GCC we
 123    can get the "real" word size.  */
 124 #ifdef __GNUC__
 125 typedef unsigned int word_type __attribute__((__mode__(__word__)));
 126 #else
 127 typedef unsigned long word_type;
 128 #endif
 129
 130 /* The code below is only expecting sizes 4 or 8.
 131    Die at compile-time if this expectation is violated.  */
 132 typedef char check_word_type_size
 133   [(sizeof(word_type) == 8 || sizeof(word_type) == 4) * 2 - 1];
 134
 135 /* Return X with the first N bytes forced to values that won't match one
 136    of the interesting characters.  Note that NUL is not interesting.  */
 137
 138 static inline word_type
 139 acc_char_mask_misalign (word_type val, unsigned int n)
 140 {
 141   word_type mask = -1;
 142   if (WORDS_BIGENDIAN)
 143     mask >>= n * 8;
 144   else
 145     mask <<= n * 8;
 146   return val & mask;
 147 }
 148
 149 /* Return X replicated to all byte positions within WORD_TYPE.  */
 150
 151 static inline word_type
 152 acc_char_replicate (uchar x)
 153 {
 154   word_type ret;
 155
 156   ret = (x << 24) | (x << 16) | (x << 8) | x;
 157   if (sizeof(word_type) == 8)
 158     ret = (ret << 16 << 16) | ret;
 159   return ret;
 160 }
 161
 162 /* Return non-zero if some byte of VAL is (probably) C.  */
 163
 164 static inline word_type
 165 acc_char_cmp (word_type val, word_type c)
 166 {
 167 #if defined(__GNUC__) && defined(__alpha__)
 168   /* We can get exact results using a compare-bytes instruction.
 169      Get (val == c) via (0 >= (val ^ c)).  */
 170   return __builtin_alpha_cmpbge (0, val ^ c);
 171 #else
 172   word_type magic = 0x7efefefeU;
 173   if (sizeof(word_type) == 8)
 174     magic = (magic << 16 << 16) | 0xfefefefeU;
 175   magic |= 1;
 176
 177   val ^= c;
 178   return ((val + magic) ^ ~val) & ~magic;
 179 #endif
 180 }
 181
 182 /* Given the result of acc_char_cmp is non-zero, return the index of
 183    the found character.  If this was a false positive, return -1.  */
 184
 185 static inline int
 186 acc_char_index (word_type cmp ATTRIBUTE_UNUSED,
 187                 word_type val ATTRIBUTE_UNUSED)
 188 {
 189 #if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN
 190   /* The cmpbge instruction sets *bits* of the result corresponding to
 191      matches in the bytes with no false positives.  */
 192   return __builtin_ctzl (cmp);
 193 #else
 194   unsigned int i;
 195
 196   /* ??? It would be nice to force unrolling here,
 197      and have all of these constants folded.  */
 198   for (i = 0; i < sizeof(word_type); ++i)
 199     {
 200       uchar c;
 201       if (WORDS_BIGENDIAN)
 202         c = (val >> (sizeof(word_type) - i - 1) * 8) & 0xff;
 203       else
 204         c = (val >> i * 8) & 0xff;
 205
 206       if (c == '\n' || c == '\r' || c == '\\' || c == '?')
 207         return i;
 208     }
 209
 210   return -1;
 211 #endif
 212 }
 213
 214 /* A version of the fast scanner using bit fiddling techniques.
 215
 216    For 32-bit words, one would normally perform 16 comparisons and
 217    16 branches.  With this algorithm one performs 24 arithmetic
 218    operations and one branch.  Whether this is faster with a 32-bit
 219    word size is going to be somewhat system dependent.
 220
 221    For 64-bit words, we eliminate twice the number of comparisons
 222    and branches without increasing the number of arithmetic operations.
 223    It's almost certainly going to be a win with 64-bit word size.  */
 224
 225 static const uchar * search_line_acc_char (const uchar *, const uchar *)
 226   ATTRIBUTE_UNUSED;
 227
 228 static const uchar *
 229 search_line_acc_char (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
 230 {
 231   const word_type repl_nl = acc_char_replicate ('\n');
 232   const word_type repl_cr = acc_char_replicate ('\r');
 233   const word_type repl_bs = acc_char_replicate ('\\');
 234   const word_type repl_qm = acc_char_replicate ('?');
 235
 236   unsigned int misalign;
 237   const word_type *p;
 238   word_type val, t;
 239
 240   /* Align the buffer.  Mask out any bytes from before the beginning.  */
 241   p = (word_type *)((uintptr_t)s & -sizeof(word_type));
 242   val = *p;
 243   misalign = (uintptr_t)s & (sizeof(word_type) - 1);
 244   if (misalign)
 245     val = acc_char_mask_misalign (val, misalign);
 246
 247   /* Main loop.  */
 248   while (1)
 249     {
 250       t  = acc_char_cmp (val, repl_nl);
 251       t |= acc_char_cmp (val, repl_cr);
 252       t |= acc_char_cmp (val, repl_bs);
 253       t |= acc_char_cmp (val, repl_qm);
 254
 255       if (__builtin_expect (t != 0, 0))
 256         {
 257           int i = acc_char_index (t, val);
 258           if (i >= 0)
 259             return (const uchar *)p + i;
 260         }
 261
 262       val = *++p;
 263     }
 264 }
 265
 266 /* Disable on Solaris 2/x86 until the following problem can be properly
 267    autoconfed:
 268
 269    The Solaris 10+ assembler tags objects with the instruction set
 270    extensions used, so SSE4.2 executables cannot run on machines that
 271    don't support that extension.  */
 272
 273 #if (GCC_VERSION >= 4005) && (__GNUC__ >= 5 || !defined(__PIC__)) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
 274
 275 /* Replicated character data to be shared between implementations.
 276    Recall that outside of a context with vector support we can't
 277    define compatible vector types, therefore these are all defined
 278    in terms of raw characters.  */
 279 static const char repl_chars[4][16] __attribute__((aligned(16))) = {
 280   { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
 281     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' },
 282   { '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
 283     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' },
 284   { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
 285     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' },
 286   { '?', '?', '?', '?', '?', '?', '?', '?',
 287     '?', '?', '?', '?', '?', '?', '?', '?' },
 288 };
 289
 290 /* A version of the fast scanner using MMX vectorized byte compare insns.
 291
 292    This uses the PMOVMSKB instruction which was introduced with "MMX2",
 293    which was packaged into SSE1; it is also present in the AMD MMX
 294    extension.  Mark the function as using "sse" so that we emit a real
 295    "emms" instruction, rather than the 3dNOW "femms" instruction.  */
 296
 297 static const uchar *
 298 #ifndef __SSE__
 299 __attribute__((__target__("sse")))
 300 #endif
 301 search_line_mmx (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
 302 {
 303   typedef char v8qi __attribute__ ((__vector_size__ (8)));
 304   typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
 305
 306   const v8qi repl_nl = *(const v8qi *)repl_chars[0];
 307   const v8qi repl_cr = *(const v8qi *)repl_chars[1];
 308   const v8qi repl_bs = *(const v8qi *)repl_chars[2];
 309   const v8qi repl_qm = *(const v8qi *)repl_chars[3];
 310
 311   unsigned int misalign, found, mask;
 312   const v8qi *p;
 313   v8qi data, t, c;
 314
 315   /* Align the source pointer.  While MMX doesn't generate unaligned data
 316      faults, this allows us to safely scan to the end of the buffer without
 317      reading beyond the end of the last page.  */
 318   misalign = (uintptr_t)s & 7;
 319   p = (const v8qi *)((uintptr_t)s & -8);
 320   data = *p;
 321
 322   /* Create a mask for the bytes that are valid within the first
 323      16-byte block.  The Idea here is that the AND with the mask
 324      within the loop is "free", since we need some AND or TEST
 325      insn in order to set the flags for the branch anyway.  */
 326   mask = -1u << misalign;
 327
 328   /* Main loop processing 8 bytes at a time.  */
 329   goto start;
 330   do
 331     {
 332       data = *++p;
 333       mask = -1;
 334
 335     start:
 336       t = __builtin_ia32_pcmpeqb(data, repl_nl);
 337       c = __builtin_ia32_pcmpeqb(data, repl_cr);
 338       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
 339       c = __builtin_ia32_pcmpeqb(data, repl_bs);
 340       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
 341       c = __builtin_ia32_pcmpeqb(data, repl_qm);
 342       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
 343       found = __builtin_ia32_pmovmskb (t);
 344       found &= mask;
 345     }
 346   while (!found);
 347
 348   __builtin_ia32_emms ();
 349
 350   /* FOUND contains 1 in bits for which we matched a relevant
 351      character.  Conversion to the byte index is trivial.  */
 352   found = __builtin_ctz(found);
 353   return (const uchar *)p + found;
 354 }
 355
 356 /* A version of the fast scanner using SSE2 vectorized byte compare insns.  */
 357
 358 static const uchar *
 359 #ifndef __SSE2__
 360 __attribute__((__target__("sse2")))
 361 #endif
 362 search_line_sse2 (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
 363 {
 364   typedef char v16qi __attribute__ ((__vector_size__ (16)));
 365
 366   const v16qi repl_nl = *(const v16qi *)repl_chars[0];
 367   const v16qi repl_cr = *(const v16qi *)repl_chars[1];
 368   const v16qi repl_bs = *(const v16qi *)repl_chars[2];
 369   const v16qi repl_qm = *(const v16qi *)repl_chars[3];
 370
 371   unsigned int misalign, found, mask;
 372   const v16qi *p;
 373   v16qi data, t;
 374
 375   /* Align the source pointer.  */
 376   misalign = (uintptr_t)s & 15;
 377   p = (const v16qi *)((uintptr_t)s & -16);
 378   data = *p;
 379
 380   /* Create a mask for the bytes that are valid within the first
 381      16-byte block.  The Idea here is that the AND with the mask
 382      within the loop is "free", since we need some AND or TEST
 383      insn in order to set the flags for the branch anyway.  */
 384   mask = -1u << misalign;
 385
 386   /* Main loop processing 16 bytes at a time.  */
 387   goto start;
 388   do
 389     {
 390       data = *++p;
 391       mask = -1;
 392
 393     start:
 394       t  = __builtin_ia32_pcmpeqb128(data, repl_nl);
 395       t |= __builtin_ia32_pcmpeqb128(data, repl_cr);
 396       t |= __builtin_ia32_pcmpeqb128(data, repl_bs);
 397       t |= __builtin_ia32_pcmpeqb128(data, repl_qm);
 398       found = __builtin_ia32_pmovmskb128 (t);
 399       found &= mask;
 400     }
 401   while (!found);
 402
 403   /* FOUND contains 1 in bits for which we matched a relevant
 404      character.  Conversion to the byte index is trivial.  */
 405   found = __builtin_ctz(found);
 406   return (const uchar *)p + found;
 407 }
 408
 409 #ifdef HAVE_SSE4
 410 /* A version of the fast scanner using SSE 4.2 vectorized string insns.  */
 411
 412 static const uchar *
 413 #ifndef __SSE4_2__
 414 __attribute__((__target__("sse4.2")))
 415 #endif
 416 search_line_sse42 (const uchar *s, const uchar *end)
 417 {
 418   typedef char v16qi __attribute__ ((__vector_size__ (16)));
 419   static const v16qi search = { '\n', '\r', '?', '\\' };
 420
 421   uintptr_t si = (uintptr_t)s;
 422   uintptr_t index;
 423
 424   /* Check for unaligned input.  */
 425   if (si & 15)
 426     {
 427       v16qi sv;
 428
 429       if (__builtin_expect (end - s < 16, 0)
 430           && __builtin_expect ((si & 0xfff) > 0xff0, 0))
 431         {
 432           /* There are less than 16 bytes left in the buffer, and less
 433              than 16 bytes left on the page.  Reading 16 bytes at this
 434              point might generate a spurious page fault.  Defer to the
 435              SSE2 implementation, which already handles alignment.  */
 436           return search_line_sse2 (s, end);
 437         }
 438
 439       /* ??? The builtin doesn't understand that the PCMPESTRI read from
 440          memory need not be aligned.  */
 441       sv = __builtin_ia32_loaddqu ((const char *) s);
 442       index = __builtin_ia32_pcmpestri128 (search, 4, sv, 16, 0);
 443
 444       if (__builtin_expect (index < 16, 0))
 445         goto found;
 446
 447       /* Advance the pointer to an aligned address.  We will re-scan a
 448          few bytes, but we no longer need care for reading past the
 449          end of a page, since we're guaranteed a match.  */
 450       s = (const uchar *)((si + 16) & -16);
 451     }
 452
 453   /* Main loop, processing 16 bytes at a time.  By doing the whole loop
 454      in inline assembly, we can make proper use of the flags set.  */
 455   __asm (      "sub $16, %1\n"
 456         "       .balign 16\n"
 457         "0:     add $16, %1\n"
 458         "       %vpcmpestri $0, (%1), %2\n"
 459         "       jnc 0b"
 460         : "=&c"(index), "+r"(s)
 461         : "x"(search), "a"(4), "d"(16));
 462
 463  found:
 464   return s + index;
 465 }
 466
 467 #else
 468 /* Work around out-dated assemblers without sse4 support.  */
 469 #define search_line_sse42 search_line_sse2
 470 #endif
 471
 472 /* Check the CPU capabilities.  */
 473
 474 #include "../gcc/config/i386/cpuid.h"
 475
 476 typedef const uchar * (*search_line_fast_type) (const uchar *, const uchar *);
 477 static search_line_fast_type search_line_fast;
 478
 479 #define HAVE_init_vectorized_lexer 1
 480 static inline void
 481 init_vectorized_lexer (void)
 482 {
 483   unsigned dummy, ecx = 0, edx = 0;
 484   search_line_fast_type impl = search_line_acc_char;
 485   int minimum = 0;
 486
 487 #if defined(__SSE4_2__)
 488   minimum = 3;
 489 #elif defined(__SSE2__)
 490   minimum = 2;
 491 #elif defined(__SSE__)
 492   minimum = 1;
 493 #endif
 494
 495   if (minimum == 3)
 496     impl = search_line_sse42;
 497   else if (__get_cpuid (1, &dummy, &dummy, &ecx, &edx) || minimum == 2)
 498     {
 499       if (minimum == 3 || (ecx & bit_SSE4_2))
 500         impl = search_line_sse42;
 501       else if (minimum == 2 || (edx & bit_SSE2))
 502         impl = search_line_sse2;
 503       else if (minimum == 1 || (edx & bit_SSE))
 504         impl = search_line_mmx;
 505     }
 506   else if (__get_cpuid (0x80000001, &dummy, &dummy, &dummy, &edx))
 507     {
 508       if (minimum == 1
 509           || (edx & (bit_MMXEXT | bit_CMOV)) == (bit_MMXEXT | bit_CMOV))
 510         impl = search_line_mmx;
 511     }
 512
 513   search_line_fast = impl;
 514 }
 515
 516 #elif defined(_ARCH_PWR8) && defined(__ALTIVEC__)
 517
 518 /* A vection of the fast scanner using AltiVec vectorized byte compares
 519    and VSX unaligned loads (when VSX is available).  This is otherwise
 520    the same as the pre-GCC 5 version.  */
 521
 522 ATTRIBUTE_NO_SANITIZE_UNDEFINED
 523 static const uchar *
 524 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
 525 {
 526   typedef __attribute__((altivec(vector))) unsigned char vc;
 527
 528   const vc repl_nl = {
 529     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
 530     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
 531   };
 532   const vc repl_cr = {
 533     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
 534     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
 535   };
 536   const vc repl_bs = {
 537     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
 538     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
 539   };
 540   const vc repl_qm = {
 541     '?', '?', '?', '?', '?', '?', '?', '?',
 542     '?', '?', '?', '?', '?', '?', '?', '?',
 543   };
 544   const vc zero = { 0 };
 545
 546   vc data, t;
 547
 548   /* Main loop processing 16 bytes at a time.  */
 549   do
 550     {
 551       vc m_nl, m_cr, m_bs, m_qm;
 552
 553       data = *((const vc *)s);
 554       s += 16;
 555
 556       m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
 557       m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
 558       m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
 559       m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
 560       t = (m_nl | m_cr) | (m_bs | m_qm);
 561
 562       /* T now contains 0xff in bytes for which we matched one of the relevant
 563          characters.  We want to exit the loop if any byte in T is non-zero.
 564          Below is the expansion of vec_any_ne(t, zero).  */
 565     }
 566   while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
 567
 568   /* Restore s to to point to the 16 bytes we just processed.  */
 569   s -= 16;
 570
 571   {
 572 #define N  (sizeof(vc) / sizeof(long))
 573
 574     union {
 575       vc v;
 576       /* Statically assert that N is 2 or 4.  */
 577       unsigned long l[(N == 2 || N == 4) ? N : -1];
 578     } u;
 579     unsigned long l, i = 0;
 580
 581     u.v = t;
 582
 583     /* Find the first word of T that is non-zero.  */
 584     switch (N)
 585       {
 586       case 4:
 587         l = u.l[i++];
 588         if (l != 0)
 589           break;
 590         s += sizeof(unsigned long);
 591         l = u.l[i++];
 592         if (l != 0)
 593           break;
 594         s += sizeof(unsigned long);
 595       case 2:
 596         l = u.l[i++];
 597         if (l != 0)
 598           break;
 599         s += sizeof(unsigned long);
 600         l = u.l[i];
 601       }
 602
 603     /* L now contains 0xff in bytes for which we matched one of the
 604        relevant characters.  We can find the byte index by finding
 605        its bit index and dividing by 8.  */
 606 #ifdef __BIG_ENDIAN__
 607     l = __builtin_clzl(l) >> 3;
 608 #else
 609     l = __builtin_ctzl(l) >> 3;
 610 #endif
 611     return s + l;
 612
 613 #undef N
 614   }
 615 }
 616
 617 #elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) && defined (__BIG_ENDIAN__)
 618
 619 /* A vection of the fast scanner using AltiVec vectorized byte compares.
 620    This cannot be used for little endian because vec_lvsl/lvsr are
 621    deprecated for little endian and the code won't work properly.  */
 622 /* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
 623    so we can't compile this function without -maltivec on the command line
 624    (or implied by some other switch).  */
 625
 626 static const uchar *
 627 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
 628 {
 629   typedef __attribute__((altivec(vector))) unsigned char vc;
 630
 631   const vc repl_nl = {
 632     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
 633     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
 634   };
 635   const vc repl_cr = {
 636     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
 637     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
 638   };
 639   const vc repl_bs = {
 640     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
 641     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
 642   };
 643   const vc repl_qm = {
 644     '?', '?', '?', '?', '?', '?', '?', '?',
 645     '?', '?', '?', '?', '?', '?', '?', '?',
 646   };
 647   const vc ones = {
 648     -1, -1, -1, -1, -1, -1, -1, -1,
 649     -1, -1, -1, -1, -1, -1, -1, -1,
 650   };
 651   const vc zero = { 0 };
 652
 653   vc data, mask, t;
 654
 655   /* Altivec loads automatically mask addresses with -16.  This lets us
 656      issue the first load as early as possible.  */
 657   data = __builtin_vec_ld(0, (const vc *)s);
 658
 659   /* Discard bytes before the beginning of the buffer.  Do this by
 660      beginning with all ones and shifting in zeros according to the
 661      mis-alignment.  The LVSR instruction pulls the exact shift we
 662      want from the address.  */
 663   mask = __builtin_vec_lvsr(0, s);
 664   mask = __builtin_vec_perm(zero, ones, mask);
 665   data &= mask;
 666
 667   /* While altivec loads mask addresses, we still need to align S so
 668      that the offset we compute at the end is correct.  */
 669   s = (const uchar *)((uintptr_t)s & -16);
 670
 671   /* Main loop processing 16 bytes at a time.  */
 672   goto start;
 673   do
 674     {
 675       vc m_nl, m_cr, m_bs, m_qm;
 676
 677       s += 16;
 678       data = __builtin_vec_ld(0, (const vc *)s);
 679
 680     start:
 681       m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
 682       m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
 683       m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
 684       m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
 685       t = (m_nl | m_cr) | (m_bs | m_qm);
 686
 687       /* T now contains 0xff in bytes for which we matched one of the relevant
 688          characters.  We want to exit the loop if any byte in T is non-zero.
 689          Below is the expansion of vec_any_ne(t, zero).  */
 690     }
 691   while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
 692
 693   {
 694 #define N  (sizeof(vc) / sizeof(long))
 695
 696     union {
 697       vc v;
 698       /* Statically assert that N is 2 or 4.  */
 699       unsigned long l[(N == 2 || N == 4) ? N : -1];
 700     } u;
 701     unsigned long l, i = 0;
 702
 703     u.v = t;
 704
 705     /* Find the first word of T that is non-zero.  */
 706     switch (N)
 707       {
 708       case 4:
 709         l = u.l[i++];
 710         if (l != 0)
 711           break;
 712         s += sizeof(unsigned long);
 713         l = u.l[i++];
 714         if (l != 0)
 715           break;
 716         s += sizeof(unsigned long);
 717       case 2:
 718         l = u.l[i++];
 719         if (l != 0)
 720           break;
 721         s += sizeof(unsigned long);
 722         l = u.l[i];
 723       }
 724
 725     /* L now contains 0xff in bytes for which we matched one of the
 726        relevant characters.  We can find the byte index by finding
 727        its bit index and dividing by 8.  */
 728     l = __builtin_clzl(l) >> 3;
 729     return s + l;
 730
 731 #undef N
 732   }
 733 }
 734
 735 #elif defined (__ARM_NEON)
 736 #include "arm_neon.h"
 737
 738 static const uchar *
 739 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
 740 {
 741   const uint8x16_t repl_nl = vdupq_n_u8 ('\n');
 742   const uint8x16_t repl_cr = vdupq_n_u8 ('\r');
 743   const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
 744   const uint8x16_t repl_qm = vdupq_n_u8 ('?');
 745   const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
 746
 747   unsigned int misalign, found, mask;
 748   const uint8_t *p;
 749   uint8x16_t data;
 750
 751   /* Align the source pointer.  */
 752   misalign = (uintptr_t)s & 15;
 753   p = (const uint8_t *)((uintptr_t)s & -16);
 754   data = vld1q_u8 (p);
 755
 756   /* Create a mask for the bytes that are valid within the first
 757      16-byte block.  The Idea here is that the AND with the mask
 758      within the loop is "free", since we need some AND or TEST
 759      insn in order to set the flags for the branch anyway.  */
 760   mask = (-1u << misalign) & 0xffff;
 761
 762   /* Main loop, processing 16 bytes at a time.  */
 763   goto start;
 764
 765   do
 766     {
 767       uint8x8_t l;
 768       uint16x4_t m;
 769       uint32x2_t n;
 770       uint8x16_t t, u, v, w;
 771
 772       p += 16;
 773       data = vld1q_u8 (p);
 774       mask = 0xffff;
 775
 776     start:
 777       t = vceqq_u8 (data, repl_nl);
 778       u = vceqq_u8 (data, repl_cr);
 779       v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
 780       w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
 781       t = vandq_u8 (vorrq_u8 (v, w), xmask);
 782       l = vpadd_u8 (vget_low_u8 (t), vget_high_u8 (t));
 783       m = vpaddl_u8 (l);
 784       n = vpaddl_u16 (m);
 785
 786       found = vget_lane_u32 ((uint32x2_t) vorr_u64 ((uint64x1_t) n,
 787               vshr_n_u64 ((uint64x1_t) n, 24)), 0);
 788       found &= mask;
 789     }
 790   while (!found);
 791
 792   /* FOUND contains 1 in bits for which we matched a relevant
 793      character.  Conversion to the byte index is trivial.  */
 794   found = __builtin_ctz (found);
 795   return (const uchar *)p + found;
 796 }
 797
 798 #else
 799
 800 /* We only have one accellerated alternative.  Use a direct call so that
 801    we encourage inlining.  */
 802
 803 #define search_line_fast  search_line_acc_char
 804
 805 #endif
 806
 807 /* Initialize the lexer if needed.  */
 808
 809 void
 810 _cpp_init_lexer (void)
 811 {
 812 #ifdef HAVE_init_vectorized_lexer
 813   init_vectorized_lexer ();
 814 #endif
 815 }
 816
 817 /* Returns with a logical line that contains no escaped newlines or
 818    trigraphs.  This is a time-critical inner loop.  */
 819 void
 820 _cpp_clean_line (cpp_reader *pfile)
 821 {
 822   cpp_buffer *buffer;
 823   const uchar *s;
 824   uchar c, *d, *p;
 825
 826   buffer = pfile->buffer;
 827   buffer->cur_note = buffer->notes_used = 0;
 828   buffer->cur = buffer->line_base = buffer->next_line;
 829   buffer->need_line = false;
 830   s = buffer->next_line;
 831
 832   if (!buffer->from_stage3)
 833     {
 834       const uchar *pbackslash = NULL;
 835
 836       /* Fast path.  This is the common case of an un-escaped line with
 837          no trigraphs.  The primary win here is by not writing any
 838          data back to memory until we have to.  */
 839       while (1)
 840         {
 841           /* Perform an optimized search for \n, \r, \\, ?.  */
 842           s = search_line_fast (s, buffer->rlimit);
 843
 844           c = *s;
 845           if (c == '\\')
 846             {
 847               /* Record the location of the backslash and continue.  */
 848               pbackslash = s++;
 849             }
 850           else if (__builtin_expect (c == '?', 0))
 851             {
 852               if (__builtin_expect (s[1] == '?', false)
 853                    && _cpp_trigraph_map[s[2]])
 854                 {
 855                   /* Have a trigraph.  We may or may not have to convert
 856                      it.  Add a line note regardless, for -Wtrigraphs.  */
 857                   add_line_note (buffer, s, s[2]);
 858                   if (CPP_OPTION (pfile, trigraphs))
 859                     {
 860                       /* We do, and that means we have to switch to the
 861                          slow path.  */
 862                       d = (uchar *) s;
 863                       *d = _cpp_trigraph_map[s[2]];
 864                       s += 2;
 865                       goto slow_path;
 866                     }
 867                 }
 868               /* Not a trigraph.  Continue on fast-path.  */
 869               s++;
 870             }
 871           else
 872             break;
 873         }
 874
 875       /* This must be \r or \n.  We're either done, or we'll be forced
 876          to write back to the buffer and continue on the slow path.  */
 877       d = (uchar *) s;
 878
 879       if (__builtin_expect (s == buffer->rlimit, false))
 880         goto done;
 881
 882       /* DOS line ending? */
 883       if (__builtin_expect (c == '\r', false) && s[1] == '\n')
 884         {
 885           s++;
 886           if (s == buffer->rlimit)
 887             goto done;
 888         }
 889
 890       if (__builtin_expect (pbackslash == NULL, true))
 891         goto done;
 892
 893       /* Check for escaped newline.  */
 894       p = d;
 895       while (is_nvspace (p[-1]))
 896         p--;
 897       if (p - 1 != pbackslash)
 898         goto done;
 899
 900       /* Have an escaped newline; process it and proceed to
 901          the slow path.  */
 902       add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
 903       d = p - 2;
 904       buffer->next_line = p - 1;
 905
 906     slow_path:
 907       while (1)
 908         {
 909           c = *++s;
 910           *++d = c;
 911
 912           if (c == '\n' || c == '\r')
 913             {
 914               /* Handle DOS line endings.  */
 915               if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
 916                 s++;
 917               if (s == buffer->rlimit)
 918                 break;
 919
 920               /* Escaped?  */
 921               p = d;
 922               while (p != buffer->next_line && is_nvspace (p[-1]))
 923                 p--;
 924               if (p == buffer->next_line || p[-1] != '\\')
 925                 break;
 926
 927               add_line_note (buffer, p - 1, p != d ? ' ': '\\');
 928               d = p - 2;
 929               buffer->next_line = p - 1;
 930             }
 931           else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
 932             {
 933               /* Add a note regardless, for the benefit of -Wtrigraphs.  */
 934               add_line_note (buffer, d, s[2]);
 935               if (CPP_OPTION (pfile, trigraphs))
 936                 {
 937                   *d = _cpp_trigraph_map[s[2]];
 938                   s += 2;
 939                 }
 940             }
 941         }
 942     }
 943   else
 944     {
 945       while (*s != '\n' && *s != '\r')
 946         s++;
 947       d = (uchar *) s;
 948
 949       /* Handle DOS line endings.  */
 950       if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
 951         s++;
 952     }
 953
 954  done:
 955   *d = '\n';
 956   /* A sentinel note that should never be processed.  */
 957   add_line_note (buffer, d + 1, '\n');
 958   buffer->next_line = s + 1;
 959 }
 960
 961 /* Return true if the trigraph indicated by NOTE should be warned
 962    about in a comment.  */
 963 static bool
 964 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
 965 {
 966   const uchar *p;
 967
 968   /* Within comments we don't warn about trigraphs, unless the
 969      trigraph forms an escaped newline, as that may change
 970      behavior.  */
 971   if (note->type != '/')
 972     return false;
 973
 974   /* If -trigraphs, then this was an escaped newline iff the next note
 975      is coincident.  */
 976   if (CPP_OPTION (pfile, trigraphs))
 977     return note[1].pos == note->pos;
 978
 979   /* Otherwise, see if this forms an escaped newline.  */
 980   p = note->pos + 3;
 981   while (is_nvspace (*p))
 982     p++;
 983
 984   /* There might have been escaped newlines between the trigraph and the
 985      newline we found.  Hence the position test.  */
 986   return (*p == '\n' && p < note[1].pos);
 987 }
 988
 989 /* Process the notes created by add_line_note as far as the current
 990    location.  */
 991 void
 992 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
 993 {
 994   cpp_buffer *buffer = pfile->buffer;
 995
 996   for (;;)
 997     {
 998       _cpp_line_note *note = &buffer->notes[buffer->cur_note];
 999       unsigned int col;
1000
1001       if (note->pos > buffer->cur)
1002         break;
1003
1004       buffer->cur_note++;
1005       col = CPP_BUF_COLUMN (buffer, note->pos + 1);
1006
1007       if (note->type == '\\' || note->type == ' ')
1008         {
1009           if (note->type == ' ' && !in_comment)
1010             cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
1011                                  "backslash and newline separated by space");
1012
1013           if (buffer->next_line > buffer->rlimit)
1014             {
1015               cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
1016                                    "backslash-newline at end of file");
1017               /* Prevent "no newline at end of file" warning.  */
1018               buffer->next_line = buffer->rlimit;
1019             }
1020
1021           buffer->line_base = note->pos;
1022           CPP_INCREMENT_LINE (pfile, 0);
1023         }
1024       else if (_cpp_trigraph_map[note->type])
1025         {
1026           if (CPP_OPTION (pfile, warn_trigraphs)
1027               && (!in_comment || warn_in_comment (pfile, note)))
1028             {
1029               if (CPP_OPTION (pfile, trigraphs))
1030                 cpp_warning_with_line (pfile, CPP_W_TRIGRAPHS,
1031                                        pfile->line_table->highest_line, col,
1032                                        "trigraph ??%c converted to %c",
1033                                        note->type,
1034                                        (int) _cpp_trigraph_map[note->type]);
1035               else
1036                 {
1037                   cpp_warning_with_line
1038                     (pfile, CPP_W_TRIGRAPHS,
1039                      pfile->line_table->highest_line, col,
1040                      "trigraph ??%c ignored, use -trigraphs to enable",
1041                      note->type);
1042                 }
1043             }
1044         }
1045       else if (note->type == 0)
1046         /* Already processed in lex_raw_string.  */;
1047       else
1048         abort ();
1049     }
1050 }
1051
1052 /* Skip a C-style block comment.  We find the end of the comment by
1053    seeing if an asterisk is before every '/' we encounter.  Returns
1054    nonzero if comment terminated by EOF, zero otherwise.
1055
1056    Buffer->cur points to the initial asterisk of the comment.  */
1057 bool
1058 _cpp_skip_block_comment (cpp_reader *pfile)
1059 {
1060   cpp_buffer *buffer = pfile->buffer;
1061   const uchar *cur = buffer->cur;
1062   uchar c;
1063
1064   cur++;
1065   if (*cur == '/')
1066     cur++;
1067
1068   for (;;)
1069     {
1070       /* People like decorating comments with '*', so check for '/'
1071          instead for efficiency.  */
1072       c = *cur++;
1073
1074       if (c == '/')
1075         {
1076           if (cur[-2] == '*')
1077             break;
1078
1079           /* Warn about potential nested comments, but not if the '/'
1080              comes immediately before the true comment delimiter.
1081              Don't bother to get it right across escaped newlines.  */
1082           if (CPP_OPTION (pfile, warn_comments)
1083               && cur[0] == '*' && cur[1] != '/')
1084             {
1085               buffer->cur = cur;
1086               cpp_warning_with_line (pfile, CPP_W_COMMENTS,
1087                                      pfile->line_table->highest_line,
1088                                      CPP_BUF_COL (buffer),
1089                                      "\"/*\" within comment");
1090             }
1091         }
1092       else if (c == '\n')
1093         {
1094           unsigned int cols;
1095           buffer->cur = cur - 1;
1096           _cpp_process_line_notes (pfile, true);
1097           if (buffer->next_line >= buffer->rlimit)
1098             return true;
1099           _cpp_clean_line (pfile);
1100
1101           cols = buffer->next_line - buffer->line_base;
1102           CPP_INCREMENT_LINE (pfile, cols);
1103
1104           cur = buffer->cur;
1105         }
1106     }
1107
1108   buffer->cur = cur;
1109   _cpp_process_line_notes (pfile, true);
1110   return false;
1111 }
1112
1113 /* Skip a C++ line comment, leaving buffer->cur pointing to the
1114    terminating newline.  Handles escaped newlines.  Returns nonzero
1115    if a multiline comment.  */
1116 static int
1117 skip_line_comment (cpp_reader *pfile)
1118 {
1119   cpp_buffer *buffer = pfile->buffer;
1120   source_location orig_line = pfile->line_table->highest_line;
1121
1122   while (*buffer->cur != '\n')
1123     buffer->cur++;
1124
1125   _cpp_process_line_notes (pfile, true);
1126   return orig_line != pfile->line_table->highest_line;
1127 }
1128
1129 /* Skips whitespace, saving the next non-whitespace character.  */
1130 static void
1131 skip_whitespace (cpp_reader *pfile, cppchar_t c)
1132 {
1133   cpp_buffer *buffer = pfile->buffer;
1134   bool saw_NUL = false;
1135
1136   do
1137     {
1138       /* Horizontal space always OK.  */
1139       if (c == ' ' || c == '\t')
1140         ;
1141       /* Just \f \v or \0 left.  */
1142       else if (c == '\0')
1143         saw_NUL = true;
1144       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
1145         cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
1146                              CPP_BUF_COL (buffer),
1147                              "%s in preprocessing directive",
1148                              c == '\f' ? "form feed" : "vertical tab");
1149
1150       c = *buffer->cur++;
1151     }
1152   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
1153   while (is_nvspace (c));
1154
1155   if (saw_NUL)
1156     cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
1157
1158   buffer->cur--;
1159 }
1160
1161 /* See if the characters of a number token are valid in a name (no
1162    '.', '+' or '-').  */
1163 static int
1164 name_p (cpp_reader *pfile, const cpp_string *string)
1165 {
1166   unsigned int i;
1167
1168   for (i = 0; i < string->len; i++)
1169     if (!is_idchar (string->text[i]))
1170       return 0;
1171
1172   return 1;
1173 }
1174
1175 /* After parsing an identifier or other sequence, produce a warning about
1176    sequences not in NFC/NFKC.  */
1177 static void
1178 warn_about_normalization (cpp_reader *pfile,
1179                           const cpp_token *token,
1180                           const struct normalize_state *s)
1181 {
1182   if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
1183       && !pfile->state.skipping)
1184     {
1185       /* Make sure that the token is printed using UCNs, even
1186          if we'd otherwise happily print UTF-8.  */
1187       unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
1188       size_t sz;
1189
1190       sz = cpp_spell_token (pfile, token, buf, false) - buf;
1191       if (NORMALIZE_STATE_RESULT (s) == normalized_C)
1192         cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1193                                "`%.*s' is not in NFKC", (int) sz, buf);
1194       else
1195         cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1196                                "`%.*s' is not in NFC", (int) sz, buf);
1197       free (buf);
1198     }
1199 }
1200
1201 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
1202    an identifier.  FIRST is TRUE if this starts an identifier.  */
1203 static bool
1204 forms_identifier_p (cpp_reader *pfile, int first,
1205                     struct normalize_state *state)
1206 {
1207   cpp_buffer *buffer = pfile->buffer;
1208
1209   if (*buffer->cur == '$')
1210     {
1211       if (!CPP_OPTION (pfile, dollars_in_ident))
1212         return false;
1213
1214       buffer->cur++;
1215       if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
1216         {
1217           CPP_OPTION (pfile, warn_dollars) = 0;
1218           cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
1219         }
1220
1221       return true;
1222     }
1223
1224   /* Is this a syntactically valid UCN?  */
1225   if (CPP_OPTION (pfile, extended_identifiers)
1226       && *buffer->cur == '\\'
1227       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
1228     {
1229       buffer->cur += 2;
1230       if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
1231                           state))
1232         return true;
1233       buffer->cur -= 2;
1234     }
1235
1236   return false;
1237 }
1238
1239 /* Helper function to get the cpp_hashnode of the identifier BASE.  */
1240 static cpp_hashnode *
1241 lex_identifier_intern (cpp_reader *pfile, const uchar *base)
1242 {
1243   cpp_hashnode *result;
1244   const uchar *cur;
1245   unsigned int len;
1246   unsigned int hash = HT_HASHSTEP (0, *base);
1247
1248   cur = base + 1;
1249   while (ISIDNUM (*cur))
1250     {
1251       hash = HT_HASHSTEP (hash, *cur);
1252       cur++;
1253     }
1254   len = cur - base;
1255   hash = HT_HASHFINISH (hash, len);
1256   result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1257                                               base, len, hash, HT_ALLOC));
1258
1259   /* Rarely, identifiers require diagnostics when lexed.  */
1260   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1261                         && !pfile->state.skipping, 0))
1262     {
1263       /* It is allowed to poison the same identifier twice.  */
1264       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
1265         cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
1266                    NODE_NAME (result));
1267
1268       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1269          replacement list of a variadic macro.  */
1270       if (result == pfile->spec_nodes.n__VA_ARGS__
1271           && !pfile->state.va_args_ok)
1272         {
1273           if (CPP_OPTION (pfile, cplusplus))
1274             cpp_error (pfile, CPP_DL_PEDWARN,
1275                        "__VA_ARGS__ can only appear in the expansion"
1276                        " of a C++11 variadic macro");
1277           else
1278             cpp_error (pfile, CPP_DL_PEDWARN,
1279                        "__VA_ARGS__ can only appear in the expansion"
1280                        " of a C99 variadic macro");
1281         }
1282
1283       /* For -Wc++-compat, warn about use of C++ named operators.  */
1284       if (result->flags & NODE_WARN_OPERATOR)
1285         cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1286                      "identifier \"%s\" is a special operator name in C++",
1287                      NODE_NAME (result));
1288     }
1289
1290   return result;
1291 }
1292
1293 /* Get the cpp_hashnode of an identifier specified by NAME in
1294    the current cpp_reader object.  If none is found, NULL is returned.  */
1295 cpp_hashnode *
1296 _cpp_lex_identifier (cpp_reader *pfile, const char *name)
1297 {
1298   cpp_hashnode *result;
1299   result = lex_identifier_intern (pfile, (uchar *) name);
1300   return result;
1301 }
1302
1303 /* Lex an identifier starting at BUFFER->CUR - 1.  */
1304 static cpp_hashnode *
1305 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
1306                 struct normalize_state *nst, cpp_hashnode **spelling)
1307 {
1308   cpp_hashnode *result;
1309   const uchar *cur;
1310   unsigned int len;
1311   unsigned int hash = HT_HASHSTEP (0, *base);
1312
1313   cur = pfile->buffer->cur;
1314   if (! starts_ucn)
1315     {
1316       while (ISIDNUM (*cur))
1317         {
1318           hash = HT_HASHSTEP (hash, *cur);
1319           cur++;
1320         }
1321       NORMALIZE_STATE_UPDATE_IDNUM (nst, *(cur - 1));
1322     }
1323   pfile->buffer->cur = cur;
1324   if (starts_ucn || forms_identifier_p (pfile, false, nst))
1325     {
1326       /* Slower version for identifiers containing UCNs (or $).  */
1327       do {
1328         while (ISIDNUM (*pfile->buffer->cur))
1329           {
1330             NORMALIZE_STATE_UPDATE_IDNUM (nst, *pfile->buffer->cur);
1331             pfile->buffer->cur++;
1332           }
1333       } while (forms_identifier_p (pfile, false, nst));
1334       result = _cpp_interpret_identifier (pfile, base,
1335                                           pfile->buffer->cur - base);
1336       *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base);
1337     }
1338   else
1339     {
1340       len = cur - base;
1341       hash = HT_HASHFINISH (hash, len);
1342
1343       result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1344                                                   base, len, hash, HT_ALLOC));
1345       *spelling = result;
1346     }
1347
1348   /* Rarely, identifiers require diagnostics when lexed.  */
1349   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1350                         && !pfile->state.skipping, 0))
1351     {
1352       /* It is allowed to poison the same identifier twice.  */
1353       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
1354         cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
1355                    NODE_NAME (result));
1356
1357       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1358          replacement list of a variadic macro.  */
1359       if (result == pfile->spec_nodes.n__VA_ARGS__
1360           && !pfile->state.va_args_ok)
1361         {
1362           if (CPP_OPTION (pfile, cplusplus))
1363             cpp_error (pfile, CPP_DL_PEDWARN,
1364                        "__VA_ARGS__ can only appear in the expansion"
1365                        " of a C++11 variadic macro");
1366           else
1367             cpp_error (pfile, CPP_DL_PEDWARN,
1368                        "__VA_ARGS__ can only appear in the expansion"
1369                        " of a C99 variadic macro");
1370         }
1371
1372       /* For -Wc++-compat, warn about use of C++ named operators.  */
1373       if (result->flags & NODE_WARN_OPERATOR)
1374         cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1375                      "identifier \"%s\" is a special operator name in C++",
1376                      NODE_NAME (result));
1377     }
1378
1379   return result;
1380 }
1381
1382 /* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
1383 static void
1384 lex_number (cpp_reader *pfile, cpp_string *number,
1385             struct normalize_state *nst)
1386 {
1387   const uchar *cur;
1388   const uchar *base;
1389   uchar *dest;
1390
1391   base = pfile->buffer->cur - 1;
1392   do
1393     {
1394       cur = pfile->buffer->cur;
1395
1396       /* N.B. ISIDNUM does not include $.  */
1397       while (ISIDNUM (*cur) || *cur == '.' || DIGIT_SEP (*cur)
1398              || VALID_SIGN (*cur, cur[-1]))
1399         {
1400           NORMALIZE_STATE_UPDATE_IDNUM (nst, *cur);
1401           cur++;
1402         }
1403
1404       pfile->buffer->cur = cur;
1405     }
1406   while (forms_identifier_p (pfile, false, nst));
1407
1408   number->len = cur - base;
1409   dest = _cpp_unaligned_alloc (pfile, number->len + 1);
1410   memcpy (dest, base, number->len);
1411   dest[number->len] = '\0';
1412   number->text = dest;
1413 }
1414
1415 /* Create a token of type TYPE with a literal spelling.  */
1416 static void
1417 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
1418                 unsigned int len, enum cpp_ttype type)
1419 {
1420   uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
1421
1422   memcpy (dest, base, len);
1423   dest[len] = '\0';
1424   token->type = type;
1425   token->val.str.len = len;
1426   token->val.str.text = dest;
1427 }
1428
1429 /* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
1430    sequence from *FIRST_BUFF_P to LAST_BUFF_P.  */
1431
1432 static void
1433 bufring_append (cpp_reader *pfile, const uchar *base, size_t len,
1434                 _cpp_buff **first_buff_p, _cpp_buff **last_buff_p)
1435 {
1436   _cpp_buff *first_buff = *first_buff_p;
1437   _cpp_buff *last_buff = *last_buff_p;
1438
1439   if (first_buff == NULL)
1440     first_buff = last_buff = _cpp_get_buff (pfile, len);
1441   else if (len > BUFF_ROOM (last_buff))
1442     {
1443       size_t room = BUFF_ROOM (last_buff);
1444       memcpy (BUFF_FRONT (last_buff), base, room);
1445       BUFF_FRONT (last_buff) += room;
1446       base += room;
1447       len -= room;
1448       last_buff = _cpp_append_extend_buff (pfile, last_buff, len);
1449     }
1450
1451   memcpy (BUFF_FRONT (last_buff), base, len);
1452   BUFF_FRONT (last_buff) += len;
1453
1454   *first_buff_p = first_buff;
1455   *last_buff_p = last_buff;
1456 }
1457
1458
1459 /* Returns true if a macro has been defined.
1460    This might not work if compile with -save-temps,
1461    or preprocess separately from compilation.  */
1462
1463 static bool
1464 is_macro(cpp_reader *pfile, const uchar *base)
1465 {
1466   const uchar *cur = base;
1467   if (! ISIDST (*cur))
1468     return false;
1469   unsigned int hash = HT_HASHSTEP (0, *cur);
1470   ++cur;
1471   while (ISIDNUM (*cur))
1472     {
1473       hash = HT_HASHSTEP (hash, *cur);
1474       ++cur;
1475     }
1476   hash = HT_HASHFINISH (hash, cur - base);
1477
1478   cpp_hashnode *result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1479                                         base, cur - base, hash, HT_NO_INSERT));
1480
1481   return !result ? false : (result->type == NT_MACRO);
1482 }
1483
1484
1485 /* Lexes a raw string.  The stored string contains the spelling, including
1486    double quotes, delimiter string, '(' and ')', any leading
1487    'L', 'u', 'U' or 'u8' and 'R' modifier.  It returns the type of the
1488    literal, or CPP_OTHER if it was not properly terminated.
1489
1490    The spelling is NUL-terminated, but it is not guaranteed that this
1491    is the first NUL since embedded NULs are preserved.  */
1492
1493 static void
1494 lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
1495                 const uchar *cur)
1496 {
1497   uchar raw_prefix[17];
1498   uchar temp_buffer[18];
1499   const uchar *orig_base;
1500   unsigned int raw_prefix_len = 0, raw_suffix_len = 0;
1501   enum raw_str_phase { RAW_STR_PREFIX, RAW_STR, RAW_STR_SUFFIX };
1502   raw_str_phase phase = RAW_STR_PREFIX;
1503   enum cpp_ttype type;
1504   size_t total_len = 0;
1505   /* Index into temp_buffer during phases other than RAW_STR,
1506      during RAW_STR phase 17 to tell BUF_APPEND that nothing should
1507      be appended to temp_buffer.  */
1508   size_t temp_buffer_len = 0;
1509   _cpp_buff *first_buff = NULL, *last_buff = NULL;
1510   size_t raw_prefix_start;
1511   _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
1512
1513   type = (*base == 'L' ? CPP_WSTRING :
1514           *base == 'U' ? CPP_STRING32 :
1515           *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1516           : CPP_STRING);
1517
1518 #define BUF_APPEND(STR,LEN)                                     \
1519       do {                                                      \
1520         bufring_append (pfile, (const uchar *)(STR), (LEN),     \
1521                         &first_buff, &last_buff);               \
1522         total_len += (LEN);                                     \
1523         if (__builtin_expect (temp_buffer_len < 17, 0)          \
1524             && (const uchar *)(STR) != base                     \
1525             && (LEN) <= 2)                                      \
1526           {                                                     \
1527             memcpy (temp_buffer + temp_buffer_len,              \
1528                     (const uchar *)(STR), (LEN));               \
1529             temp_buffer_len += (LEN);                           \
1530           }                                                     \
1531       } while (0);
1532
1533   orig_base = base;
1534   ++cur;
1535   raw_prefix_start = cur - base;
1536   for (;;)
1537     {
1538       cppchar_t c;
1539
1540       /* If we previously performed any trigraph or line splicing
1541          transformations, undo them in between the opening and closing
1542          double quote.  */
1543       while (note->pos < cur)
1544         ++note;
1545       for (; note->pos == cur; ++note)
1546         {
1547           switch (note->type)
1548             {
1549             case '\\':
1550             case ' ':
1551               /* Restore backslash followed by newline.  */
1552               BUF_APPEND (base, cur - base);
1553               base = cur;
1554               BUF_APPEND ("\\", 1);
1555             after_backslash:
1556               if (note->type == ' ')
1557                 {
1558                   /* GNU backslash whitespace newline extension.  FIXME
1559                      could be any sequence of non-vertical space.  When we
1560                      can properly restore any such sequence, we should mark
1561                      this note as handled so _cpp_process_line_notes
1562                      doesn't warn.  */
1563                   BUF_APPEND (" ", 1);
1564                 }
1565
1566               BUF_APPEND ("\n", 1);
1567               break;
1568
1569             case 0:
1570               /* Already handled.  */
1571               break;
1572
1573             default:
1574               if (_cpp_trigraph_map[note->type])
1575                 {
1576                   /* Don't warn about this trigraph in
1577                      _cpp_process_line_notes, since trigraphs show up as
1578                      trigraphs in raw strings.  */
1579                   uchar type = note->type;
1580                   note->type = 0;
1581
1582                   if (!CPP_OPTION (pfile, trigraphs))
1583                     /* If we didn't convert the trigraph in the first
1584                        place, don't do anything now either.  */
1585                     break;
1586
1587                   BUF_APPEND (base, cur - base);
1588                   base = cur;
1589                   BUF_APPEND ("??", 2);
1590
1591                   /* ??/ followed by newline gets two line notes, one for
1592                      the trigraph and one for the backslash/newline.  */
1593                   if (type == '/' && note[1].pos == cur)
1594                     {
1595                       if (note[1].type != '\\'
1596                           && note[1].type != ' ')
1597                         abort ();
1598                       BUF_APPEND ("/", 1);
1599                       ++note;
1600                       goto after_backslash;
1601                     }
1602                   else
1603                     {
1604                       /* Skip the replacement character.  */
1605                       base = ++cur;
1606                       BUF_APPEND (&type, 1);
1607                       c = type;
1608                       goto check_c;
1609                     }
1610                 }
1611               else
1612                 abort ();
1613               break;
1614             }
1615         }
1616       c = *cur++;
1617       if (__builtin_expect (temp_buffer_len < 17, 0))
1618         temp_buffer[temp_buffer_len++] = c;
1619
1620      check_c:
1621       if (phase == RAW_STR_PREFIX)
1622         {
1623           while (raw_prefix_len < temp_buffer_len)
1624             {
1625               raw_prefix[raw_prefix_len] = temp_buffer[raw_prefix_len];
1626               switch (raw_prefix[raw_prefix_len])
1627                 {
1628                 case ' ': case '(': case ')': case '\\': case '\t':
1629                 case '\v': case '\f': case '\n': default:
1630                   break;
1631                 /* Basic source charset except the above chars.  */
1632                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1633                 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1634                 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1635                 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1636                 case 'y': case 'z':
1637                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1638                 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1639                 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1640                 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1641                 case 'Y': case 'Z':
1642                 case '0': case '1': case '2': case '3': case '4': case '5':
1643                 case '6': case '7': case '8': case '9':
1644                 case '_': case '{': case '}': case '#': case '[': case ']':
1645                 case '<': case '>': case '%': case ':': case ';': case '.':
1646                 case '?': case '*': case '+': case '-': case '/': case '^':
1647                 case '&': case '|': case '~': case '!': case '=': case ',':
1648                 case '"': case '\'':
1649                   if (raw_prefix_len < 16)
1650                     {
1651                       raw_prefix_len++;
1652                       continue;
1653                     }
1654                   break;
1655                 }
1656
1657               if (raw_prefix[raw_prefix_len] != '(')
1658                 {
1659                   int col = CPP_BUF_COLUMN (pfile->buffer, cur) + 1;
1660                   if (raw_prefix_len == 16)
1661                     cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1662                                          col, "raw string delimiter longer "
1663                                               "than 16 characters");
1664                   else if (raw_prefix[raw_prefix_len] == '\n')
1665                     cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1666                                          col, "invalid new-line in raw "
1667                                               "string delimiter");
1668                   else
1669                     cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1670                                          col, "invalid character '%c' in "
1671                                               "raw string delimiter",
1672                                          (int) raw_prefix[raw_prefix_len]);
1673                   pfile->buffer->cur = orig_base + raw_prefix_start - 1;
1674                   create_literal (pfile, token, orig_base,
1675                                   raw_prefix_start - 1, CPP_OTHER);
1676                   if (first_buff)
1677                     _cpp_release_buff (pfile, first_buff);
1678                   return;
1679                 }
1680               raw_prefix[raw_prefix_len] = '"';
1681               phase = RAW_STR;
1682               /* Nothing should be appended to temp_buffer during
1683                  RAW_STR phase.  */
1684               temp_buffer_len = 17;
1685               break;
1686             }
1687           continue;
1688         }
1689       else if (phase == RAW_STR_SUFFIX)
1690         {
1691           while (raw_suffix_len <= raw_prefix_len
1692                  && raw_suffix_len < temp_buffer_len
1693                  && temp_buffer[raw_suffix_len] == raw_prefix[raw_suffix_len])
1694             raw_suffix_len++;
1695           if (raw_suffix_len > raw_prefix_len)
1696             break;
1697           if (raw_suffix_len == temp_buffer_len)
1698             continue;
1699           phase = RAW_STR;
1700           /* Nothing should be appended to temp_buffer during
1701              RAW_STR phase.  */
1702           temp_buffer_len = 17;
1703         }
1704       if (c == ')')
1705         {
1706           phase = RAW_STR_SUFFIX;
1707           raw_suffix_len = 0;
1708           temp_buffer_len = 0;
1709         }
1710       else if (c == '\n')
1711         {
1712           if (pfile->state.in_directive
1713               || (pfile->state.parsing_args
1714                   && pfile->buffer->next_line >= pfile->buffer->rlimit))
1715             {
1716               cur--;
1717               type = CPP_OTHER;
1718               cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
1719                                    "unterminated raw string");
1720               break;
1721             }
1722
1723           BUF_APPEND (base, cur - base);
1724
1725           if (pfile->buffer->cur < pfile->buffer->rlimit)
1726             CPP_INCREMENT_LINE (pfile, 0);
1727           pfile->buffer->need_line = true;
1728
1729           pfile->buffer->cur = cur-1;
1730           _cpp_process_line_notes (pfile, false);
1731           if (!_cpp_get_fresh_line (pfile))
1732             {
1733               source_location src_loc = token->src_loc;
1734               token->type = CPP_EOF;
1735               /* Tell the compiler the line number of the EOF token.  */
1736               token->src_loc = pfile->line_table->highest_line;
1737               token->flags = BOL;
1738               if (first_buff != NULL)
1739                 _cpp_release_buff (pfile, first_buff);
1740               cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
1741                                    "unterminated raw string");
1742               return;
1743             }
1744
1745           cur = base = pfile->buffer->cur;
1746           note = &pfile->buffer->notes[pfile->buffer->cur_note];
1747         }
1748     }
1749
1750   if (CPP_OPTION (pfile, user_literals))
1751     {
1752       /* If a string format macro, say from inttypes.h, is placed touching
1753          a string literal it could be parsed as a C++11 user-defined string
1754          literal thus breaking the program.
1755          Try to identify macros with is_macro. A warning is issued. */
1756       if (is_macro (pfile, cur))
1757         {
1758           /* Raise a warning, but do not consume subsequent tokens.  */
1759           if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
1760             cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
1761                                    token->src_loc, 0,
1762                                    "invalid suffix on literal; C++11 requires "
1763                                    "a space between literal and string macro");
1764         }
1765       /* Grab user defined literal suffix.  */
1766       else if (ISIDST (*cur))
1767         {
1768           type = cpp_userdef_string_add_type (type);
1769           ++cur;
1770
1771           while (ISIDNUM (*cur))
1772             ++cur;
1773         }
1774     }
1775
1776   pfile->buffer->cur = cur;
1777   if (first_buff == NULL)
1778     create_literal (pfile, token, base, cur - base, type);
1779   else
1780     {
1781       uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1);
1782
1783       token->type = type;
1784       token->val.str.len = total_len + (cur - base);
1785       token->val.str.text = dest;
1786       last_buff = first_buff;
1787       while (last_buff != NULL)
1788         {
1789           memcpy (dest, last_buff->base,
1790                   BUFF_FRONT (last_buff) - last_buff->base);
1791           dest += BUFF_FRONT (last_buff) - last_buff->base;
1792           last_buff = last_buff->next;
1793         }
1794       _cpp_release_buff (pfile, first_buff);
1795       memcpy (dest, base, cur - base);
1796       dest[cur - base] = '\0';
1797     }
1798 }
1799
1800 /* Lexes a string, character constant, or angle-bracketed header file
1801    name.  The stored string contains the spelling, including opening
1802    quote and any leading 'L', 'u', 'U' or 'u8' and optional
1803    'R' modifier.  It returns the type of the literal, or CPP_OTHER
1804    if it was not properly terminated, or CPP_LESS for an unterminated
1805    header name which must be relexed as normal tokens.
1806
1807    The spelling is NUL-terminated, but it is not guaranteed that this
1808    is the first NUL since embedded NULs are preserved.  */
1809 static void
1810 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
1811 {
1812   bool saw_NUL = false;
1813   const uchar *cur;
1814   cppchar_t terminator;
1815   enum cpp_ttype type;
1816
1817   cur = base;
1818   terminator = *cur++;
1819   if (terminator == 'L' || terminator == 'U')
1820     terminator = *cur++;
1821   else if (terminator == 'u')
1822     {
1823       terminator = *cur++;
1824       if (terminator == '8')
1825         terminator = *cur++;
1826     }
1827   if (terminator == 'R')
1828     {
1829       lex_raw_string (pfile, token, base, cur);
1830       return;
1831     }
1832   if (terminator == '"')
1833     type = (*base == 'L' ? CPP_WSTRING :
1834             *base == 'U' ? CPP_STRING32 :
1835             *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1836                          : CPP_STRING);
1837   else if (terminator == '\'')
1838     type = (*base == 'L' ? CPP_WCHAR :
1839             *base == 'U' ? CPP_CHAR32 :
1840             *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
1841   else
1842     terminator = '>', type = CPP_HEADER_NAME;
1843
1844   for (;;)
1845     {
1846       cppchar_t c = *cur++;
1847
1848       /* In #include-style directives, terminators are not escapable.  */
1849       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
1850         cur++;
1851       else if (c == terminator)
1852         break;
1853       else if (c == '\n')
1854         {
1855           cur--;
1856           /* Unmatched quotes always yield undefined behavior, but
1857              greedy lexing means that what appears to be an unterminated
1858              header name may actually be a legitimate sequence of tokens.  */
1859           if (terminator == '>')
1860             {
1861               token->type = CPP_LESS;
1862               return;
1863             }
1864           type = CPP_OTHER;
1865           break;
1866         }
1867       else if (c == '\0')
1868         saw_NUL = true;
1869     }
1870
1871   if (saw_NUL && !pfile->state.skipping)
1872     cpp_error (pfile, CPP_DL_WARNING,
1873                "null character(s) preserved in literal");
1874
1875   if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
1876     cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
1877                (int) terminator);
1878
1879   if (CPP_OPTION (pfile, user_literals))
1880     {
1881       /* If a string format macro, say from inttypes.h, is placed touching
1882          a string literal it could be parsed as a C++11 user-defined string
1883          literal thus breaking the program.
1884          Try to identify macros with is_macro. A warning is issued. */
1885       if (is_macro (pfile, cur))
1886         {
1887           /* Raise a warning, but do not consume subsequent tokens.  */
1888           if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
1889             cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
1890                                    token->src_loc, 0,
1891                                    "invalid suffix on literal; C++11 requires "
1892                                    "a space between literal and string macro");
1893         }
1894       /* Grab user defined literal suffix.  */
1895       else if (ISIDST (*cur))
1896         {
1897           type = cpp_userdef_char_add_type (type);
1898           type = cpp_userdef_string_add_type (type);
1899           ++cur;
1900
1901           while (ISIDNUM (*cur))
1902             ++cur;
1903         }
1904     }
1905
1906   pfile->buffer->cur = cur;
1907   create_literal (pfile, token, base, cur - base, type);
1908 }
1909
1910 /* Return the comment table. The client may not make any assumption
1911    about the ordering of the table.  */
1912 cpp_comment_table *
1913 cpp_get_comments (cpp_reader *pfile)
1914 {
1915   return &pfile->comments;
1916 }
1917
1918 /* Append a comment to the end of the comment table. */
1919 static void
1920 store_comment (cpp_reader *pfile, cpp_token *token)
1921 {
1922   int len;
1923
1924   if (pfile->comments.allocated == 0)
1925     {
1926       pfile->comments.allocated = 256;
1927       pfile->comments.entries = (cpp_comment *) xmalloc
1928         (pfile->comments.allocated * sizeof (cpp_comment));
1929     }
1930
1931   if (pfile->comments.count == pfile->comments.allocated)
1932     {
1933       pfile->comments.allocated *= 2;
1934       pfile->comments.entries = (cpp_comment *) xrealloc
1935         (pfile->comments.entries,
1936          pfile->comments.allocated * sizeof (cpp_comment));
1937     }
1938
1939   len = token->val.str.len;
1940
1941   /* Copy comment. Note, token may not be NULL terminated. */
1942   pfile->comments.entries[pfile->comments.count].comment =
1943     (char *) xmalloc (sizeof (char) * (len + 1));
1944   memcpy (pfile->comments.entries[pfile->comments.count].comment,
1945           token->val.str.text, len);
1946   pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
1947
1948   /* Set source location. */
1949   pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
1950
1951   /* Increment the count of entries in the comment table. */
1952   pfile->comments.count++;
1953 }
1954
1955 /* The stored comment includes the comment start and any terminator.  */
1956 static void
1957 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
1958               cppchar_t type)
1959 {
1960   unsigned char *buffer;
1961   unsigned int len, clen, i;
1962
1963   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
1964
1965   /* C++ comments probably (not definitely) have moved past a new
1966      line, which we don't want to save in the comment.  */
1967   if (is_vspace (pfile->buffer->cur[-1]))
1968     len--;
1969
1970   /* If we are currently in a directive or in argument parsing, then
1971      we need to store all C++ comments as C comments internally, and
1972      so we need to allocate a little extra space in that case.
1973
1974      Note that the only time we encounter a directive here is
1975      when we are saving comments in a "#define".  */
1976   clen = ((pfile->state.in_directive || pfile->state.parsing_args)
1977           && type == '/') ? len + 2 : len;
1978
1979   buffer = _cpp_unaligned_alloc (pfile, clen);
1980
1981   token->type = CPP_COMMENT;
1982   token->val.str.len = clen;
1983   token->val.str.text = buffer;
1984
1985   buffer[0] = '/';
1986   memcpy (buffer + 1, from, len - 1);
1987
1988   /* Finish conversion to a C comment, if necessary.  */
1989   if ((pfile->state.in_directive || pfile->state.parsing_args) && type == '/')
1990     {
1991       buffer[1] = '*';
1992       buffer[clen - 2] = '*';
1993       buffer[clen - 1] = '/';
1994       /* As there can be in a C++ comments illegal sequences for C comments
1995          we need to filter them out.  */
1996       for (i = 2; i < (clen - 2); i++)
1997         if (buffer[i] == '/' && (buffer[i - 1] == '*' || buffer[i + 1] == '*'))
1998           buffer[i] = '|';
1999     }
2000
2001   /* Finally store this comment for use by clients of libcpp. */
2002   store_comment (pfile, token);
2003 }
2004
2005 /* Allocate COUNT tokens for RUN.  */
2006 void
2007 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
2008 {
2009   run->base = XNEWVEC (cpp_token, count);
2010   run->limit = run->base + count;
2011   run->next = NULL;
2012 }
2013
2014 /* Returns the next tokenrun, or creates one if there is none.  */
2015 static tokenrun *
2016 next_tokenrun (tokenrun *run)
2017 {
2018   if (run->next == NULL)
2019     {
2020       run->next = XNEW (tokenrun);
2021       run->next->prev = run;
2022       _cpp_init_tokenrun (run->next, 250);
2023     }
2024
2025   return run->next;
2026 }
2027
2028 /* Return the number of not yet processed token in a given
2029    context.  */
2030 int
2031 _cpp_remaining_tokens_num_in_context (cpp_context *context)
2032 {
2033   if (context->tokens_kind == TOKENS_KIND_DIRECT)
2034     return (LAST (context).token - FIRST (context).token);
2035   else if (context->tokens_kind == TOKENS_KIND_INDIRECT
2036            || context->tokens_kind == TOKENS_KIND_EXTENDED)
2037     return (LAST (context).ptoken - FIRST (context).ptoken);
2038   else
2039       abort ();
2040 }
2041
2042 /* Returns the token present at index INDEX in a given context.  If
2043    INDEX is zero, the next token to be processed is returned.  */
2044 static const cpp_token*
2045 _cpp_token_from_context_at (cpp_context *context, int index)
2046 {
2047   if (context->tokens_kind == TOKENS_KIND_DIRECT)
2048     return &(FIRST (context).token[index]);
2049   else if (context->tokens_kind == TOKENS_KIND_INDIRECT
2050            || context->tokens_kind == TOKENS_KIND_EXTENDED)
2051     return FIRST (context).ptoken[index];
2052  else
2053    abort ();
2054 }
2055
2056 /* Look ahead in the input stream.  */
2057 const cpp_token *
2058 cpp_peek_token (cpp_reader *pfile, int index)
2059 {
2060   cpp_context *context = pfile->context;
2061   const cpp_token *peektok;
2062   int count;
2063
2064   /* First, scan through any pending cpp_context objects.  */
2065   while (context->prev)
2066     {
2067       ptrdiff_t sz = _cpp_remaining_tokens_num_in_context (context);
2068
2069       if (index < (int) sz)
2070         return _cpp_token_from_context_at (context, index);
2071       index -= (int) sz;
2072       context = context->prev;
2073     }
2074
2075   /* We will have to read some new tokens after all (and do so
2076      without invalidating preceding tokens).  */
2077   count = index;
2078   pfile->keep_tokens++;
2079
2080   do
2081     {
2082       peektok = _cpp_lex_token (pfile);
2083       if (peektok->type == CPP_EOF)
2084         return peektok;
2085     }
2086   while (index--);
2087
2088   _cpp_backup_tokens_direct (pfile, count + 1);
2089   pfile->keep_tokens--;
2090
2091   return peektok;
2092 }
2093
2094 /* Allocate a single token that is invalidated at the same time as the
2095    rest of the tokens on the line.  Has its line and col set to the
2096    same as the last lexed token, so that diagnostics appear in the
2097    right place.  */
2098 cpp_token *
2099 _cpp_temp_token (cpp_reader *pfile)
2100 {
2101   cpp_token *old, *result;
2102   ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
2103   ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
2104
2105   old = pfile->cur_token - 1;
2106   /* Any pre-existing lookaheads must not be clobbered.  */
2107   if (la)
2108     {
2109       if (sz <= la)
2110         {
2111           tokenrun *next = next_tokenrun (pfile->cur_run);
2112
2113           if (sz < la)
2114             memmove (next->base + 1, next->base,
2115                      (la - sz) * sizeof (cpp_token));
2116
2117           next->base[0] = pfile->cur_run->limit[-1];
2118         }
2119
2120       if (sz > 1)
2121         memmove (pfile->cur_token + 1, pfile->cur_token,
2122                  MIN (la, sz - 1) * sizeof (cpp_token));
2123     }
2124
2125   if (!sz && pfile->cur_token == pfile->cur_run->limit)
2126     {
2127       pfile->cur_run = next_tokenrun (pfile->cur_run);
2128       pfile->cur_token = pfile->cur_run->base;
2129     }
2130
2131   result = pfile->cur_token++;
2132   result->src_loc = old->src_loc;
2133   return result;
2134 }
2135
2136 /* Lex a token into RESULT (external interface).  Takes care of issues
2137    like directive handling, token lookahead, multiple include
2138    optimization and skipping.  */
2139 const cpp_token *
2140 _cpp_lex_token (cpp_reader *pfile)
2141 {
2142   cpp_token *result;
2143
2144   for (;;)
2145     {
2146       if (pfile->cur_token == pfile->cur_run->limit)
2147         {
2148           pfile->cur_run = next_tokenrun (pfile->cur_run);
2149           pfile->cur_token = pfile->cur_run->base;
2150         }
2151       /* We assume that the current token is somewhere in the current
2152          run.  */
2153       if (pfile->cur_token < pfile->cur_run->base
2154           || pfile->cur_token >= pfile->cur_run->limit)
2155         abort ();
2156
2157       if (pfile->lookaheads)
2158         {
2159           pfile->lookaheads--;
2160           result = pfile->cur_token++;
2161         }
2162       else
2163         result = _cpp_lex_direct (pfile);
2164
2165       if (result->flags & BOL)
2166         {
2167           /* Is this a directive.  If _cpp_handle_directive returns
2168              false, it is an assembler #.  */
2169           if (result->type == CPP_HASH
2170               /* 6.10.3 p 11: Directives in a list of macro arguments
2171                  gives undefined behavior.  This implementation
2172                  handles the directive as normal.  */
2173               && pfile->state.parsing_args != 1)
2174             {
2175               if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
2176                 {
2177                   if (pfile->directive_result.type == CPP_PADDING)
2178                     continue;
2179                   result = &pfile->directive_result;
2180                 }
2181             }
2182           else if (pfile->state.in_deferred_pragma)
2183             result = &pfile->directive_result;
2184
2185           if (pfile->cb.line_change && !pfile->state.skipping)
2186             pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
2187         }
2188
2189       /* We don't skip tokens in directives.  */
2190       if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
2191         break;
2192
2193       /* Outside a directive, invalidate controlling macros.  At file
2194          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
2195          get here and MI optimization works.  */
2196       pfile->mi_valid = false;
2197
2198       if (!pfile->state.skipping || result->type == CPP_EOF)
2199         break;
2200     }
2201
2202   return result;
2203 }
2204
2205 /* Returns true if a fresh line has been loaded.  */
2206 bool
2207 _cpp_get_fresh_line (cpp_reader *pfile)
2208 {
2209   int return_at_eof;
2210
2211   /* We can't get a new line until we leave the current directive.  */
2212   if (pfile->state.in_directive)
2213     return false;
2214
2215   for (;;)
2216     {
2217       cpp_buffer *buffer = pfile->buffer;
2218
2219       if (!buffer->need_line)
2220         return true;
2221
2222       if (buffer->next_line < buffer->rlimit)
2223         {
2224           _cpp_clean_line (pfile);
2225           return true;
2226         }
2227
2228       /* First, get out of parsing arguments state.  */
2229       if (pfile->state.parsing_args)
2230         return false;
2231
2232       /* End of buffer.  Non-empty files should end in a newline.  */
2233       if (buffer->buf != buffer->rlimit
2234           && buffer->next_line > buffer->rlimit
2235           && !buffer->from_stage3)
2236         {
2237           /* Clip to buffer size.  */
2238           buffer->next_line = buffer->rlimit;
2239         }
2240
2241       return_at_eof = buffer->return_at_eof;
2242       _cpp_pop_buffer (pfile);
2243       if (pfile->buffer == NULL || return_at_eof)
2244         return false;
2245     }
2246 }
2247
2248 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)          \
2249   do                                                    \
2250     {                                                   \
2251       result->type = ELSE_TYPE;                         \
2252       if (*buffer->cur == CHAR)                         \
2253         buffer->cur++, result->type = THEN_TYPE;        \
2254     }                                                   \
2255   while (0)
2256
2257 /* Lex a token into pfile->cur_token, which is also incremented, to
2258    get diagnostics pointing to the correct location.
2259
2260    Does not handle issues such as token lookahead, multiple-include
2261    optimization, directives, skipping etc.  This function is only
2262    suitable for use by _cpp_lex_token, and in special cases like
2263    lex_expansion_token which doesn't care for any of these issues.
2264
2265    When meeting a newline, returns CPP_EOF if parsing a directive,
2266    otherwise returns to the start of the token buffer if permissible.
2267    Returns the location of the lexed token.  */
2268 cpp_token *
2269 _cpp_lex_direct (cpp_reader *pfile)
2270 {
2271   cppchar_t c;
2272   cpp_buffer *buffer;
2273   const unsigned char *comment_start;
2274   cpp_token *result = pfile->cur_token++;
2275
2276  fresh_line:
2277   result->flags = 0;
2278   buffer = pfile->buffer;
2279   if (buffer->need_line)
2280     {
2281       if (pfile->state.in_deferred_pragma)
2282         {
2283           result->type = CPP_PRAGMA_EOL;
2284           pfile->state.in_deferred_pragma = false;
2285           if (!pfile->state.pragma_allow_expansion)
2286             pfile->state.prevent_expansion--;
2287           return result;
2288         }
2289       if (!_cpp_get_fresh_line (pfile))
2290         {
2291           result->type = CPP_EOF;
2292           if (!pfile->state.in_directive)
2293             {
2294               /* Tell the compiler the line number of the EOF token.  */
2295               result->src_loc = pfile->line_table->highest_line;
2296               result->flags = BOL;
2297             }
2298           return result;
2299         }
2300       if (!pfile->keep_tokens)
2301         {
2302           pfile->cur_run = &pfile->base_run;
2303           result = pfile->base_run.base;
2304           pfile->cur_token = result + 1;
2305         }
2306       result->flags = BOL;
2307       if (pfile->state.parsing_args == 2)
2308         result->flags |= PREV_WHITE;
2309     }
2310   buffer = pfile->buffer;
2311  update_tokens_line:
2312   result->src_loc = pfile->line_table->highest_line;
2313
2314  skipped_white:
2315   if (buffer->cur >= buffer->notes[buffer->cur_note].pos
2316       && !pfile->overlaid_buffer)
2317     {
2318       _cpp_process_line_notes (pfile, false);
2319       result->src_loc = pfile->line_table->highest_line;
2320     }
2321   c = *buffer->cur++;
2322
2323   if (pfile->forced_token_location_p)
2324     result->src_loc = *pfile->forced_token_location_p;
2325   else
2326     result->src_loc = linemap_position_for_column (pfile->line_table,
2327                                           CPP_BUF_COLUMN (buffer, buffer->cur));
2328
2329   switch (c)
2330     {
2331     case ' ': case '\t': case '\f': case '\v': case '\0':
2332       result->flags |= PREV_WHITE;
2333       skip_whitespace (pfile, c);
2334       goto skipped_white;
2335
2336     case '\n':
2337       if (buffer->cur < buffer->rlimit)
2338         CPP_INCREMENT_LINE (pfile, 0);
2339       buffer->need_line = true;
2340       goto fresh_line;
2341
2342     case '0': case '1': case '2': case '3': case '4':
2343     case '5': case '6': case '7': case '8': case '9':
2344       {
2345         struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2346         result->type = CPP_NUMBER;
2347         lex_number (pfile, &result->val.str, &nst);
2348         warn_about_normalization (pfile, result, &nst);
2349         break;
2350       }
2351
2352     case 'L':
2353     case 'u':
2354     case 'U':
2355     case 'R':
2356       /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
2357          wide strings or raw strings.  */
2358       if (c == 'L' || CPP_OPTION (pfile, rliterals)
2359           || (c != 'R' && CPP_OPTION (pfile, uliterals)))
2360         {
2361           if ((*buffer->cur == '\'' && c != 'R')
2362               || *buffer->cur == '"'
2363               || (*buffer->cur == 'R'
2364                   && c != 'R'
2365                   && buffer->cur[1] == '"'
2366                   && CPP_OPTION (pfile, rliterals))
2367               || (*buffer->cur == '8'
2368                   && c == 'u'
2369                   && (buffer->cur[1] == '"'
2370                       || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'
2371                           && CPP_OPTION (pfile, rliterals)))))
2372             {
2373               lex_string (pfile, result, buffer->cur - 1);
2374               break;
2375             }
2376         }
2377       /* Fall through.  */
2378
2379     case '_':
2380     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2381     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2382     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2383     case 's': case 't':           case 'v': case 'w': case 'x':
2384     case 'y': case 'z':
2385     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2386     case 'G': case 'H': case 'I': case 'J': case 'K':
2387     case 'M': case 'N': case 'O': case 'P': case 'Q':
2388     case 'S': case 'T':           case 'V': case 'W': case 'X':
2389     case 'Y': case 'Z':
2390       result->type = CPP_NAME;
2391       {
2392         struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2393         result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
2394                                                 &nst,
2395                                                 &result->val.node.spelling);
2396         warn_about_normalization (pfile, result, &nst);
2397       }
2398
2399       /* Convert named operators to their proper types.  */
2400       if (result->val.node.node->flags & NODE_OPERATOR)
2401         {
2402           result->flags |= NAMED_OP;
2403           result->type = (enum cpp_ttype) result->val.node.node->directive_index;
2404         }
2405       break;
2406
2407     case '\'':
2408     case '"':
2409       lex_string (pfile, result, buffer->cur - 1);
2410       break;
2411
2412     case '/':
2413       /* A potential block or line comment.  */
2414       comment_start = buffer->cur;
2415       c = *buffer->cur;
2416
2417       if (c == '*')
2418         {
2419           if (_cpp_skip_block_comment (pfile))
2420             cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
2421         }
2422       else if (c == '/' && ! CPP_OPTION (pfile, traditional))
2423         {
2424           /* Don't warn for system headers.  */
2425           if (cpp_in_system_header (pfile))
2426             ;
2427           /* Warn about comments if pedantically GNUC89, and not
2428              in system headers.  */
2429           else if (CPP_OPTION (pfile, lang) == CLK_GNUC89
2430                    && CPP_PEDANTIC (pfile)
2431                    && ! buffer->warned_cplusplus_comments)
2432             {
2433               cpp_error (pfile, CPP_DL_PEDWARN,
2434                          "C++ style comments are not allowed in ISO C90");
2435               cpp_error (pfile, CPP_DL_PEDWARN,
2436                          "(this will be reported only once per input file)");
2437               buffer->warned_cplusplus_comments = 1;
2438             }
2439           /* Or if specifically desired via -Wc90-c99-compat.  */
2440           else if (CPP_OPTION (pfile, cpp_warn_c90_c99_compat) > 0
2441                    && ! CPP_OPTION (pfile, cplusplus)
2442                    && ! buffer->warned_cplusplus_comments)
2443             {
2444               cpp_error (pfile, CPP_DL_WARNING,
2445                          "C++ style comments are incompatible with C90");
2446               cpp_error (pfile, CPP_DL_WARNING,
2447                          "(this will be reported only once per input file)");
2448               buffer->warned_cplusplus_comments = 1;
2449             }
2450           /* In C89/C94, C++ style comments are forbidden.  */
2451           else if ((CPP_OPTION (pfile, lang) == CLK_STDC89
2452                     || CPP_OPTION (pfile, lang) == CLK_STDC94))
2453             {
2454               /* But don't be confused about valid code such as
2455                  - // immediately followed by *,
2456                  - // in a preprocessing directive,
2457                  - // in an #if 0 block.  */
2458               if (buffer->cur[1] == '*'
2459                   || pfile->state.in_directive
2460                   || pfile->state.skipping)
2461                 {
2462                   result->type = CPP_DIV;
2463                   break;
2464                 }
2465               else if (! buffer->warned_cplusplus_comments)
2466                 {
2467                   cpp_error (pfile, CPP_DL_ERROR,
2468                              "C++ style comments are not allowed in ISO C90");
2469                   cpp_error (pfile, CPP_DL_ERROR,
2470                              "(this will be reported only once per input "
2471                              "file)");
2472                   buffer->warned_cplusplus_comments = 1;
2473                 }
2474             }
2475           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
2476             cpp_warning (pfile, CPP_W_COMMENTS, "multi-line comment");
2477         }
2478       else if (c == '=')
2479         {
2480           buffer->cur++;
2481           result->type = CPP_DIV_EQ;
2482           break;
2483         }
2484       else
2485         {
2486           result->type = CPP_DIV;
2487           break;
2488         }
2489
2490       if (!pfile->state.save_comments)
2491         {
2492           result->flags |= PREV_WHITE;
2493           goto update_tokens_line;
2494         }
2495
2496       /* Save the comment as a token in its own right.  */
2497       save_comment (pfile, result, comment_start, c);
2498       break;
2499
2500     case '<':
2501       if (pfile->state.angled_headers)
2502         {
2503           lex_string (pfile, result, buffer->cur - 1);
2504           if (result->type != CPP_LESS)
2505             break;
2506         }
2507
2508       result->type = CPP_LESS;
2509       if (*buffer->cur == '=')
2510         buffer->cur++, result->type = CPP_LESS_EQ;
2511       else if (*buffer->cur == '<')
2512         {
2513           buffer->cur++;
2514           IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
2515         }
2516       else if (CPP_OPTION (pfile, digraphs))
2517         {
2518           if (*buffer->cur == ':')
2519             {
2520               /* C++11 [2.5/3 lex.pptoken], "Otherwise, if the next
2521                  three characters are <:: and the subsequent character
2522                  is neither : nor >, the < is treated as a preprocessor
2523                  token by itself".  */
2524               if (CPP_OPTION (pfile, cplusplus)
2525                   && CPP_OPTION (pfile, lang) != CLK_CXX98
2526                   && CPP_OPTION (pfile, lang) != CLK_GNUCXX
2527                   && buffer->cur[1] == ':'
2528                   && buffer->cur[2] != ':' && buffer->cur[2] != '>')
2529                 break;
2530
2531               buffer->cur++;
2532               result->flags |= DIGRAPH;
2533               result->type = CPP_OPEN_SQUARE;
2534             }
2535           else if (*buffer->cur == '%')
2536             {
2537               buffer->cur++;
2538               result->flags |= DIGRAPH;
2539               result->type = CPP_OPEN_BRACE;
2540             }
2541         }
2542       break;
2543
2544     case '>':
2545       result->type = CPP_GREATER;
2546       if (*buffer->cur == '=')
2547         buffer->cur++, result->type = CPP_GREATER_EQ;
2548       else if (*buffer->cur == '>')
2549         {
2550           buffer->cur++;
2551           IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
2552         }
2553       break;
2554
2555     case '%':
2556       result->type = CPP_MOD;
2557       if (*buffer->cur == '=')
2558         buffer->cur++, result->type = CPP_MOD_EQ;
2559       else if (CPP_OPTION (pfile, digraphs))
2560         {
2561           if (*buffer->cur == ':')
2562             {
2563               buffer->cur++;
2564               result->flags |= DIGRAPH;
2565               result->type = CPP_HASH;
2566               if (*buffer->cur == '%' && buffer->cur[1] == ':')
2567                 buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0;
2568             }
2569           else if (*buffer->cur == '>')
2570             {
2571               buffer->cur++;
2572               result->flags |= DIGRAPH;
2573               result->type = CPP_CLOSE_BRACE;
2574             }
2575         }
2576       break;
2577
2578     case '.':
2579       result->type = CPP_DOT;
2580       if (ISDIGIT (*buffer->cur))
2581         {
2582           struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2583           result->type = CPP_NUMBER;
2584           lex_number (pfile, &result->val.str, &nst);
2585           warn_about_normalization (pfile, result, &nst);
2586         }
2587       else if (*buffer->cur == '.' && buffer->cur[1] == '.')
2588         buffer->cur += 2, result->type = CPP_ELLIPSIS;
2589       else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
2590         buffer->cur++, result->type = CPP_DOT_STAR;
2591       break;
2592
2593     case '+':
2594       result->type = CPP_PLUS;
2595       if (*buffer->cur == '+')
2596         buffer->cur++, result->type = CPP_PLUS_PLUS;
2597       else if (*buffer->cur == '=')
2598         buffer->cur++, result->type = CPP_PLUS_EQ;
2599       break;
2600
2601     case '-':
2602       result->type = CPP_MINUS;
2603       if (*buffer->cur == '>')
2604         {
2605           buffer->cur++;
2606           result->type = CPP_DEREF;
2607           if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
2608             buffer->cur++, result->type = CPP_DEREF_STAR;
2609         }
2610       else if (*buffer->cur == '-')
2611         buffer->cur++, result->type = CPP_MINUS_MINUS;
2612       else if (*buffer->cur == '=')
2613         buffer->cur++, result->type = CPP_MINUS_EQ;
2614       break;
2615
2616     case '&':
2617       result->type = CPP_AND;
2618       if (*buffer->cur == '&')
2619         buffer->cur++, result->type = CPP_AND_AND;
2620       else if (*buffer->cur == '=')
2621         buffer->cur++, result->type = CPP_AND_EQ;
2622       break;
2623
2624     case '|':
2625       result->type = CPP_OR;
2626       if (*buffer->cur == '|')
2627         buffer->cur++, result->type = CPP_OR_OR;
2628       else if (*buffer->cur == '=')
2629         buffer->cur++, result->type = CPP_OR_EQ;
2630       break;
2631
2632     case ':':
2633       result->type = CPP_COLON;
2634       if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
2635         buffer->cur++, result->type = CPP_SCOPE;
2636       else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
2637         {
2638           buffer->cur++;
2639           result->flags |= DIGRAPH;
2640           result->type = CPP_CLOSE_SQUARE;
2641         }
2642       break;
2643
2644     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
2645     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
2646     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
2647     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
2648     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.token_no = 0; break;
2649
2650     case '?': result->type = CPP_QUERY; break;
2651     case '~': result->type = CPP_COMPL; break;
2652     case ',': result->type = CPP_COMMA; break;
2653     case '(': result->type = CPP_OPEN_PAREN; break;
2654     case ')': result->type = CPP_CLOSE_PAREN; break;
2655     case '[': result->type = CPP_OPEN_SQUARE; break;
2656     case ']': result->type = CPP_CLOSE_SQUARE; break;
2657     case '{': result->type = CPP_OPEN_BRACE; break;
2658     case '}': result->type = CPP_CLOSE_BRACE; break;
2659     case ';': result->type = CPP_SEMICOLON; break;
2660
2661       /* @ is a punctuator in Objective-C.  */
2662     case '@': result->type = CPP_ATSIGN; break;
2663
2664     case '$':
2665     case '\\':
2666       {
2667         const uchar *base = --buffer->cur;
2668         struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2669
2670         if (forms_identifier_p (pfile, true, &nst))
2671           {
2672             result->type = CPP_NAME;
2673             result->val.node.node = lex_identifier (pfile, base, true, &nst,
2674                                                     &result->val.node.spelling);
2675             warn_about_normalization (pfile, result, &nst);
2676             break;
2677           }
2678         buffer->cur++;
2679       }
2680
2681     default:
2682       create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
2683       break;
2684     }
2685
2686   return result;
2687 }
2688
2689 /* An upper bound on the number of bytes needed to spell TOKEN.
2690    Does not include preceding whitespace.  */
2691 unsigned int
2692 cpp_token_len (const cpp_token *token)
2693 {
2694   unsigned int len;
2695
2696   switch (TOKEN_SPELL (token))
2697     {
2698     default:            len = 6;                                break;
2699     case SPELL_LITERAL: len = token->val.str.len;               break;
2700     case SPELL_IDENT:   len = NODE_LEN (token->val.node.node) * 10;     break;
2701     }
2702
2703   return len;
2704 }
2705
2706 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
2707    Return the number of bytes read out of NAME.  (There are always
2708    10 bytes written to BUFFER.)  */
2709
2710 static size_t
2711 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
2712 {
2713   int j;
2714   int ucn_len = 0;
2715   int ucn_len_c;
2716   unsigned t;
2717   unsigned long utf32;
2718
2719   /* Compute the length of the UTF-8 sequence.  */
2720   for (t = *name; t & 0x80; t <<= 1)
2721     ucn_len++;
2722
2723   utf32 = *name & (0x7F >> ucn_len);
2724   for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
2725     {
2726       utf32 = (utf32 << 6) | (*++name & 0x3F);
2727
2728       /* Ill-formed UTF-8.  */
2729       if ((*name & ~0x3F) != 0x80)
2730         abort ();
2731     }
2732
2733   *buffer++ = '\\';
2734   *buffer++ = 'U';
2735   for (j = 7; j >= 0; j--)
2736     *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
2737   return ucn_len;
2738 }
2739
2740 /* Given a token TYPE corresponding to a digraph, return a pointer to
2741    the spelling of the digraph.  */
2742 static const unsigned char *
2743 cpp_digraph2name (enum cpp_ttype type)
2744 {
2745   return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
2746 }
2747
2748 /* Write the spelling of an identifier IDENT, using UCNs, to BUFFER.
2749    The buffer must already contain the enough space to hold the
2750    token's spelling.  Returns a pointer to the character after the
2751    last character written.  */
2752 unsigned char *
2753 _cpp_spell_ident_ucns (unsigned char *buffer, cpp_hashnode *ident)
2754 {
2755   size_t i;
2756   const unsigned char *name = NODE_NAME (ident);
2757
2758   for (i = 0; i < NODE_LEN (ident); i++)
2759     if (name[i] & ~0x7F)
2760       {
2761         i += utf8_to_ucn (buffer, name + i) - 1;
2762         buffer += 10;
2763       }
2764     else
2765       *buffer++ = name[i];
2766
2767   return buffer;
2768 }
2769
2770 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
2771    already contain the enough space to hold the token's spelling.
2772    Returns a pointer to the character after the last character written.
2773    FORSTRING is true if this is to be the spelling after translation
2774    phase 1 (with the original spelling of extended identifiers), false
2775    if extended identifiers should always be written using UCNs (there is
2776    no option for always writing them in the internal UTF-8 form).
2777    FIXME: Would be nice if we didn't need the PFILE argument.  */
2778 unsigned char *
2779 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
2780                  unsigned char *buffer, bool forstring)
2781 {
2782   switch (TOKEN_SPELL (token))
2783     {
2784     case SPELL_OPERATOR:
2785       {
2786         const unsigned char *spelling;
2787         unsigned char c;
2788
2789         if (token->flags & DIGRAPH)
2790           spelling = cpp_digraph2name (token->type);
2791         else if (token->flags & NAMED_OP)
2792           goto spell_ident;
2793         else
2794           spelling = TOKEN_NAME (token);
2795
2796         while ((c = *spelling++) != '\0')
2797           *buffer++ = c;
2798       }
2799       break;
2800
2801     spell_ident:
2802     case SPELL_IDENT:
2803       if (forstring)
2804         {
2805           memcpy (buffer, NODE_NAME (token->val.node.spelling),
2806                   NODE_LEN (token->val.node.spelling));
2807           buffer += NODE_LEN (token->val.node.spelling);
2808         }
2809       else
2810         buffer = _cpp_spell_ident_ucns (buffer, token->val.node.node);
2811       break;
2812
2813     case SPELL_LITERAL:
2814       memcpy (buffer, token->val.str.text, token->val.str.len);
2815       buffer += token->val.str.len;
2816       break;
2817
2818     case SPELL_NONE:
2819       cpp_error (pfile, CPP_DL_ICE,
2820                  "unspellable token %s", TOKEN_NAME (token));
2821       break;
2822     }
2823
2824   return buffer;
2825 }
2826
2827 /* Returns TOKEN spelt as a null-terminated string.  The string is
2828    freed when the reader is destroyed.  Useful for diagnostics.  */
2829 unsigned char *
2830 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
2831 {
2832   unsigned int len = cpp_token_len (token) + 1;
2833   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
2834
2835   end = cpp_spell_token (pfile, token, start, false);
2836   end[0] = '\0';
2837
2838   return start;
2839 }
2840
2841 /* Returns a pointer to a string which spells the token defined by
2842    TYPE and FLAGS.  Used by C front ends, which really should move to
2843    using cpp_token_as_text.  */
2844 const char *
2845 cpp_type2name (enum cpp_ttype type, unsigned char flags)
2846 {
2847   if (flags & DIGRAPH)
2848     return (const char *) cpp_digraph2name (type);
2849   else if (flags & NAMED_OP)
2850     return cpp_named_operator2name (type);
2851
2852   return (const char *) token_spellings[type].name;
2853 }
2854
2855 /* Writes the spelling of token to FP, without any preceding space.
2856    Separated from cpp_spell_token for efficiency - to avoid stdio
2857    double-buffering.  */
2858 void
2859 cpp_output_token (const cpp_token *token, FILE *fp)
2860 {
2861   switch (TOKEN_SPELL (token))
2862     {
2863     case SPELL_OPERATOR:
2864       {
2865         const unsigned char *spelling;
2866         int c;
2867
2868         if (token->flags & DIGRAPH)
2869           spelling = cpp_digraph2name (token->type);
2870         else if (token->flags & NAMED_OP)
2871           goto spell_ident;
2872         else
2873           spelling = TOKEN_NAME (token);
2874
2875         c = *spelling;
2876         do
2877           putc (c, fp);
2878         while ((c = *++spelling) != '\0');
2879       }
2880       break;
2881
2882     spell_ident:
2883     case SPELL_IDENT:
2884       {
2885         size_t i;
2886         const unsigned char * name = NODE_NAME (token->val.node.node);
2887
2888         for (i = 0; i < NODE_LEN (token->val.node.node); i++)
2889           if (name[i] & ~0x7F)
2890             {
2891               unsigned char buffer[10];
2892               i += utf8_to_ucn (buffer, name + i) - 1;
2893               fwrite (buffer, 1, 10, fp);
2894             }
2895           else
2896             fputc (NODE_NAME (token->val.node.node)[i], fp);
2897       }
2898       break;
2899
2900     case SPELL_LITERAL:
2901       fwrite (token->val.str.text, 1, token->val.str.len, fp);
2902       break;
2903
2904     case SPELL_NONE:
2905       /* An error, most probably.  */
2906       break;
2907     }
2908 }
2909
2910 /* Compare two tokens.  */
2911 int
2912 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
2913 {
2914   if (a->type == b->type && a->flags == b->flags)
2915     switch (TOKEN_SPELL (a))
2916       {
2917       default:                  /* Keep compiler happy.  */
2918       case SPELL_OPERATOR:
2919         /* token_no is used to track where multiple consecutive ##
2920            tokens were originally located.  */
2921         return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
2922       case SPELL_NONE:
2923         return (a->type != CPP_MACRO_ARG
2924                 || (a->val.macro_arg.arg_no == b->val.macro_arg.arg_no
2925                     && a->val.macro_arg.spelling == b->val.macro_arg.spelling));
2926       case SPELL_IDENT:
2927         return (a->val.node.node == b->val.node.node
2928                 && a->val.node.spelling == b->val.node.spelling);
2929       case SPELL_LITERAL:
2930         return (a->val.str.len == b->val.str.len
2931                 && !memcmp (a->val.str.text, b->val.str.text,
2932                             a->val.str.len));
2933       }
2934
2935   return 0;
2936 }
2937
2938 /* Returns nonzero if a space should be inserted to avoid an
2939    accidental token paste for output.  For simplicity, it is
2940    conservative, and occasionally advises a space where one is not
2941    needed, e.g. "." and ".2".  */
2942 int
2943 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
2944                  const cpp_token *token2)
2945 {
2946   enum cpp_ttype a = token1->type, b = token2->type;
2947   cppchar_t c;
2948
2949   if (token1->flags & NAMED_OP)
2950     a = CPP_NAME;
2951   if (token2->flags & NAMED_OP)
2952     b = CPP_NAME;
2953
2954   c = EOF;
2955   if (token2->flags & DIGRAPH)
2956     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
2957   else if (token_spellings[b].category == SPELL_OPERATOR)
2958     c = token_spellings[b].name[0];
2959
2960   /* Quickly get everything that can paste with an '='.  */
2961   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
2962     return 1;
2963
2964   switch (a)
2965     {
2966     case CPP_GREATER:   return c == '>';
2967     case CPP_LESS:      return c == '<' || c == '%' || c == ':';
2968     case CPP_PLUS:      return c == '+';
2969     case CPP_MINUS:     return c == '-' || c == '>';
2970     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
2971     case CPP_MOD:       return c == ':' || c == '>';
2972     case CPP_AND:       return c == '&';
2973     case CPP_OR:        return c == '|';
2974     case CPP_COLON:     return c == ':' || c == '>';
2975     case CPP_DEREF:     return c == '*';
2976     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
2977     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
2978     case CPP_NAME:      return ((b == CPP_NUMBER
2979                                  && name_p (pfile, &token2->val.str))
2980                                 || b == CPP_NAME
2981                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
2982     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
2983                                 || c == '.' || c == '+' || c == '-');
2984                                       /* UCNs */
2985     case CPP_OTHER:     return ((token1->val.str.text[0] == '\\'
2986                                  && b == CPP_NAME)
2987                                 || (CPP_OPTION (pfile, objc)
2988                                     && token1->val.str.text[0] == '@'
2989                                     && (b == CPP_NAME || b == CPP_STRING)));
2990     case CPP_STRING:
2991     case CPP_WSTRING:
2992     case CPP_UTF8STRING:
2993     case CPP_STRING16:
2994     case CPP_STRING32:  return (CPP_OPTION (pfile, user_literals)
2995                                 && (b == CPP_NAME
2996                                     || (TOKEN_SPELL (token2) == SPELL_LITERAL
2997                                         && ISIDST (token2->val.str.text[0]))));
2998
2999     default:            break;
3000     }
3001
3002   return 0;
3003 }
3004
3005 /* Output all the remaining tokens on the current line, and a newline
3006    character, to FP.  Leading whitespace is removed.  If there are
3007    macros, special token padding is not performed.  */
3008 void
3009 cpp_output_line (cpp_reader *pfile, FILE *fp)
3010 {
3011   const cpp_token *token;
3012
3013   token = cpp_get_token (pfile);
3014   while (token->type != CPP_EOF)
3015     {
3016       cpp_output_token (token, fp);
3017       token = cpp_get_token (pfile);
3018       if (token->flags & PREV_WHITE)
3019         putc (' ', fp);
3020     }
3021
3022   putc ('\n', fp);
3023 }
3024
3025 /* Return a string representation of all the remaining tokens on the
3026    current line.  The result is allocated using xmalloc and must be
3027    freed by the caller.  */
3028 unsigned char *
3029 cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
3030 {
3031   const cpp_token *token;
3032   unsigned int out = dir_name ? ustrlen (dir_name) : 0;
3033   unsigned int alloced = 120 + out;
3034   unsigned char *result = (unsigned char *) xmalloc (alloced);
3035
3036   /* If DIR_NAME is empty, there are no initial contents.  */
3037   if (dir_name)
3038     {
3039       sprintf ((char *) result, "#%s ", dir_name);
3040       out += 2;
3041     }
3042
3043   token = cpp_get_token (pfile);
3044   while (token->type != CPP_EOF)
3045     {
3046       unsigned char *last;
3047       /* Include room for a possible space and the terminating nul.  */
3048       unsigned int len = cpp_token_len (token) + 2;
3049
3050       if (out + len > alloced)
3051         {
3052           alloced *= 2;
3053           if (out + len > alloced)
3054             alloced = out + len;
3055           result = (unsigned char *) xrealloc (result, alloced);
3056         }
3057
3058       last = cpp_spell_token (pfile, token, &result[out], 0);
3059       out = last - result;
3060
3061       token = cpp_get_token (pfile);
3062       if (token->flags & PREV_WHITE)
3063         result[out++] = ' ';
3064     }
3065
3066   result[out] = '\0';
3067   return result;
3068 }
3069
3070 /* Memory buffers.  Changing these three constants can have a dramatic
3071    effect on performance.  The values here are reasonable defaults,
3072    but might be tuned.  If you adjust them, be sure to test across a
3073    range of uses of cpplib, including heavy nested function-like macro
3074    expansion.  Also check the change in peak memory usage (NJAMD is a
3075    good tool for this).  */
3076 #define MIN_BUFF_SIZE 8000
3077 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
3078 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
3079         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
3080
3081 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
3082   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
3083 #endif
3084
3085 /* Create a new allocation buffer.  Place the control block at the end
3086    of the buffer, so that buffer overflows will cause immediate chaos.  */
3087 static _cpp_buff *
3088 new_buff (size_t len)
3089 {
3090   _cpp_buff *result;
3091   unsigned char *base;
3092
3093   if (len < MIN_BUFF_SIZE)
3094     len = MIN_BUFF_SIZE;
3095   len = CPP_ALIGN (len);
3096
3097 #ifdef ENABLE_VALGRIND_CHECKING
3098   /* Valgrind warns about uses of interior pointers, so put _cpp_buff
3099      struct first.  */
3100   size_t slen = CPP_ALIGN2 (sizeof (_cpp_buff), 2 * DEFAULT_ALIGNMENT);
3101   base = XNEWVEC (unsigned char, len + slen);
3102   result = (_cpp_buff *) base;
3103   base += slen;
3104 #else
3105   base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
3106   result = (_cpp_buff *) (base + len);
3107 #endif
3108   result->base = base;
3109   result->cur = base;
3110   result->limit = base + len;
3111   result->next = NULL;
3112   return result;
3113 }
3114
3115 /* Place a chain of unwanted allocation buffers on the free list.  */
3116 void
3117 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
3118 {
3119   _cpp_buff *end = buff;
3120
3121   while (end->next)
3122     end = end->next;
3123   end->next = pfile->free_buffs;
3124   pfile->free_buffs = buff;
3125 }
3126
3127 /* Return a free buffer of size at least MIN_SIZE.  */
3128 _cpp_buff *
3129 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
3130 {
3131   _cpp_buff *result, **p;
3132
3133   for (p = &pfile->free_buffs;; p = &(*p)->next)
3134     {
3135       size_t size;
3136
3137       if (*p == NULL)
3138         return new_buff (min_size);
3139       result = *p;
3140       size = result->limit - result->base;
3141       /* Return a buffer that's big enough, but don't waste one that's
3142          way too big.  */
3143       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
3144         break;
3145     }
3146
3147   *p = result->next;
3148   result->next = NULL;
3149   result->cur = result->base;
3150   return result;
3151 }
3152
3153 /* Creates a new buffer with enough space to hold the uncommitted
3154    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
3155    the excess bytes to the new buffer.  Chains the new buffer after
3156    BUFF, and returns the new buffer.  */
3157 _cpp_buff *
3158 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
3159 {
3160   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
3161   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
3162
3163   buff->next = new_buff;
3164   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
3165   return new_buff;
3166 }
3167
3168 /* Creates a new buffer with enough space to hold the uncommitted
3169    remaining bytes of the buffer pointed to by BUFF, and at least
3170    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
3171    Chains the new buffer before the buffer pointed to by BUFF, and
3172    updates the pointer to point to the new buffer.  */
3173 void
3174 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
3175 {
3176   _cpp_buff *new_buff, *old_buff = *pbuff;
3177   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
3178
3179   new_buff = _cpp_get_buff (pfile, size);
3180   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
3181   new_buff->next = old_buff;
3182   *pbuff = new_buff;
3183 }
3184
3185 /* Free a chain of buffers starting at BUFF.  */
3186 void
3187 _cpp_free_buff (_cpp_buff *buff)
3188 {
3189   _cpp_buff *next;
3190
3191   for (; buff; buff = next)
3192     {
3193       next = buff->next;
3194 #ifdef ENABLE_VALGRIND_CHECKING
3195       free (buff);
3196 #else
3197       free (buff->base);
3198 #endif
3199     }
3200 }
3201
3202 /* Allocate permanent, unaligned storage of length LEN.  */
3203 unsigned char *
3204 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
3205 {
3206   _cpp_buff *buff = pfile->u_buff;
3207   unsigned char *result = buff->cur;
3208
3209   if (len > (size_t) (buff->limit - result))
3210     {
3211       buff = _cpp_get_buff (pfile, len);
3212       buff->next = pfile->u_buff;
3213       pfile->u_buff = buff;
3214       result = buff->cur;
3215     }
3216
3217   buff->cur = result + len;
3218   return result;
3219 }
3220
3221 /* Allocate permanent, unaligned storage of length LEN from a_buff.
3222    That buffer is used for growing allocations when saving macro
3223    replacement lists in a #define, and when parsing an answer to an
3224    assertion in #assert, #unassert or #if (and therefore possibly
3225    whilst expanding macros).  It therefore must not be used by any
3226    code that they might call: specifically the lexer and the guts of
3227    the macro expander.
3228
3229    All existing other uses clearly fit this restriction: storing
3230    registered pragmas during initialization.  */
3231 unsigned char *
3232 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
3233 {
3234   _cpp_buff *buff = pfile->a_buff;
3235   unsigned char *result = buff->cur;
3236
3237   if (len > (size_t) (buff->limit - result))
3238     {
3239       buff = _cpp_get_buff (pfile, len);
3240       buff->next = pfile->a_buff;
3241       pfile->a_buff = buff;
3242       result = buff->cur;
3243     }
3244
3245   buff->cur = result + len;
3246   return result;
3247 }
3248
3249 /* Say which field of TOK is in use.  */
3250
3251 enum cpp_token_fld_kind
3252 cpp_token_val_index (const cpp_token *tok)
3253 {
3254   switch (TOKEN_SPELL (tok))
3255     {
3256     case SPELL_IDENT:
3257       return CPP_TOKEN_FLD_NODE;
3258     case SPELL_LITERAL:
3259       return CPP_TOKEN_FLD_STR;
3260     case SPELL_OPERATOR:
3261       if (tok->type == CPP_PASTE)
3262         return CPP_TOKEN_FLD_TOKEN_NO;
3263       else
3264         return CPP_TOKEN_FLD_NONE;
3265     case SPELL_NONE:
3266       if (tok->type == CPP_MACRO_ARG)
3267         return CPP_TOKEN_FLD_ARG_NO;
3268       else if (tok->type == CPP_PADDING)
3269         return CPP_TOKEN_FLD_SOURCE;
3270       else if (tok->type == CPP_PRAGMA)
3271         return CPP_TOKEN_FLD_PRAGMA;
3272       /* else fall through */
3273     default:
3274       return CPP_TOKEN_FLD_NONE;
3275     }
3276 }
3277
3278 /* All tokens lexed in R after calling this function will be forced to have
3279    their source_location the same as the location referenced by P, until
3280    cpp_stop_forcing_token_locations is called for R.  */
3281
3282 void
3283 cpp_force_token_locations (cpp_reader *r, source_location *p)
3284 {
3285   r->forced_token_location_p = p;
3286 }
3287
3288 /* Go back to assigning locations naturally for lexed tokens.  */
3289
3290 void
3291 cpp_stop_forcing_token_locations (cpp_reader *r)
3292 {
3293   r->forced_token_location_p = NULL;
3294 }