libcpp/lex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000-2015 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7
   8 This program is free software; you can redistribute it and/or modify it
   9 under the terms of the GNU General Public License as published by the
  10 Free Software Foundation; either version 3, or (at your option) any
  11 later version.
  12
  13 This program is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with this program; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "cpplib.h"
  25 #include "internal.h"
  26
  27 enum spell_type
  28 {
  29   SPELL_OPERATOR = 0,
  30   SPELL_IDENT,
  31   SPELL_LITERAL,
  32   SPELL_NONE
  33 };
  34
  35 struct token_spelling
  36 {
  37   enum spell_type category;
  38   const unsigned char *name;
  39 };
  40
  41 static const unsigned char *const digraph_spellings[] =
  42 { UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
  43
  44 #define OP(e, s) { SPELL_OPERATOR, UC s  },
  45 #define TK(e, s) { SPELL_ ## s,    UC #e },
  46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  47 #undef OP
  48 #undef TK
  49
  50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  52
  53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
  54 static int skip_line_comment (cpp_reader *);
  55 static void skip_whitespace (cpp_reader *, cppchar_t);
  56 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
  57 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
  58 static void store_comment (cpp_reader *, cpp_token *);
  59 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
  60                             unsigned int, enum cpp_ttype);
  61 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
  62 static int name_p (cpp_reader *, const cpp_string *);
  63 static tokenrun *next_tokenrun (tokenrun *);
  64
  65 static _cpp_buff *new_buff (size_t);
  66
  67
  68 /* Utility routine:
  69
  70    Compares, the token TOKEN to the NUL-terminated string STRING.
  71    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  72 int
  73 cpp_ideq (const cpp_token *token, const char *string)
  74 {
  75   if (token->type != CPP_NAME)
  76     return 0;
  77
  78   return !ustrcmp (NODE_NAME (token->val.node.node), (const uchar *) string);
  79 }
  80
  81 /* Record a note TYPE at byte POS into the current cleaned logical
  82    line.  */
  83 static void
  84 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
  85 {
  86   if (buffer->notes_used == buffer->notes_cap)
  87     {
  88       buffer->notes_cap = buffer->notes_cap * 2 + 200;
  89       buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
  90                                   buffer->notes_cap);
  91     }
  92
  93   buffer->notes[buffer->notes_used].pos = pos;
  94   buffer->notes[buffer->notes_used].type = type;
  95   buffer->notes_used++;
  96 }
  97
  98 \f
  99 /* Fast path to find line special characters using optimized character
 100    scanning algorithms.  Anything complicated falls back to the slow
 101    path below.  Since this loop is very hot it's worth doing these kinds
 102    of optimizations.
 103
 104    One of the paths through the ifdefs should provide
 105
 106      const uchar *search_line_fast (const uchar *s, const uchar *end);
 107
 108    Between S and END, search for \n, \r, \\, ?.  Return a pointer to
 109    the found character.
 110
 111    Note that the last character of the buffer is *always* a newline,
 112    as forced by _cpp_convert_input.  This fact can be used to avoid
 113    explicitly looking for the end of the buffer.  */
 114
 115 /* Configure gives us an ifdef test.  */
 116 #ifndef WORDS_BIGENDIAN
 117 #define WORDS_BIGENDIAN 0
 118 #endif
 119
 120 /* We'd like the largest integer that fits into a register.  There's nothing
 121    in <stdint.h> that gives us that.  For most hosts this is unsigned long,
 122    but MS decided on an LLP64 model.  Thankfully when building with GCC we
 123    can get the "real" word size.  */
 124 #ifdef __GNUC__
 125 typedef unsigned int word_type __attribute__((__mode__(__word__)));
 126 #else
 127 typedef unsigned long word_type;
 128 #endif
 129
 130 /* The code below is only expecting sizes 4 or 8.
 131    Die at compile-time if this expectation is violated.  */
 132 typedef char check_word_type_size
 133   [(sizeof(word_type) == 8 || sizeof(word_type) == 4) * 2 - 1];
 134
 135 /* Return X with the first N bytes forced to values that won't match one
 136    of the interesting characters.  Note that NUL is not interesting.  */
 137
 138 static inline word_type
 139 acc_char_mask_misalign (word_type val, unsigned int n)
 140 {
 141   word_type mask = -1;
 142   if (WORDS_BIGENDIAN)
 143     mask >>= n * 8;
 144   else
 145     mask <<= n * 8;
 146   return val & mask;
 147 }
 148
 149 /* Return X replicated to all byte positions within WORD_TYPE.  */
 150
 151 static inline word_type
 152 acc_char_replicate (uchar x)
 153 {
 154   word_type ret;
 155
 156   ret = (x << 24) | (x << 16) | (x << 8) | x;
 157   if (sizeof(word_type) == 8)
 158     ret = (ret << 16 << 16) | ret;
 159   return ret;
 160 }
 161
 162 /* Return non-zero if some byte of VAL is (probably) C.  */
 163
 164 static inline word_type
 165 acc_char_cmp (word_type val, word_type c)
 166 {
 167 #if defined(__GNUC__) && defined(__alpha__)
 168   /* We can get exact results using a compare-bytes instruction.
 169      Get (val == c) via (0 >= (val ^ c)).  */
 170   return __builtin_alpha_cmpbge (0, val ^ c);
 171 #else
 172   word_type magic = 0x7efefefeU;
 173   if (sizeof(word_type) == 8)
 174     magic = (magic << 16 << 16) | 0xfefefefeU;
 175   magic |= 1;
 176
 177   val ^= c;
 178   return ((val + magic) ^ ~val) & ~magic;
 179 #endif
 180 }
 181
 182 /* Given the result of acc_char_cmp is non-zero, return the index of
 183    the found character.  If this was a false positive, return -1.  */
 184
 185 static inline int
 186 acc_char_index (word_type cmp ATTRIBUTE_UNUSED,
 187                 word_type val ATTRIBUTE_UNUSED)
 188 {
 189 #if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN
 190   /* The cmpbge instruction sets *bits* of the result corresponding to
 191      matches in the bytes with no false positives.  */
 192   return __builtin_ctzl (cmp);
 193 #else
 194   unsigned int i;
 195
 196   /* ??? It would be nice to force unrolling here,
 197      and have all of these constants folded.  */
 198   for (i = 0; i < sizeof(word_type); ++i)
 199     {
 200       uchar c;
 201       if (WORDS_BIGENDIAN)
 202         c = (val >> (sizeof(word_type) - i - 1) * 8) & 0xff;
 203       else
 204         c = (val >> i * 8) & 0xff;
 205
 206       if (c == '\n' || c == '\r' || c == '\\' || c == '?')
 207         return i;
 208     }
 209
 210   return -1;
 211 #endif
 212 }
 213
 214 /* A version of the fast scanner using bit fiddling techniques.
 215
 216    For 32-bit words, one would normally perform 16 comparisons and
 217    16 branches.  With this algorithm one performs 24 arithmetic
 218    operations and one branch.  Whether this is faster with a 32-bit
 219    word size is going to be somewhat system dependent.
 220
 221    For 64-bit words, we eliminate twice the number of comparisons
 222    and branches without increasing the number of arithmetic operations.
 223    It's almost certainly going to be a win with 64-bit word size.  */
 224
 225 static const uchar * search_line_acc_char (const uchar *, const uchar *)
 226   ATTRIBUTE_UNUSED;
 227
 228 static const uchar *
 229 search_line_acc_char (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
 230 {
 231   const word_type repl_nl = acc_char_replicate ('\n');
 232   const word_type repl_cr = acc_char_replicate ('\r');
 233   const word_type repl_bs = acc_char_replicate ('\\');
 234   const word_type repl_qm = acc_char_replicate ('?');
 235
 236   unsigned int misalign;
 237   const word_type *p;
 238   word_type val, t;
 239
 240   /* Align the buffer.  Mask out any bytes from before the beginning.  */
 241   p = (word_type *)((uintptr_t)s & -sizeof(word_type));
 242   val = *p;
 243   misalign = (uintptr_t)s & (sizeof(word_type) - 1);
 244   if (misalign)
 245     val = acc_char_mask_misalign (val, misalign);
 246
 247   /* Main loop.  */
 248   while (1)
 249     {
 250       t  = acc_char_cmp (val, repl_nl);
 251       t |= acc_char_cmp (val, repl_cr);
 252       t |= acc_char_cmp (val, repl_bs);
 253       t |= acc_char_cmp (val, repl_qm);
 254
 255       if (__builtin_expect (t != 0, 0))
 256         {
 257           int i = acc_char_index (t, val);
 258           if (i >= 0)
 259             return (const uchar *)p + i;
 260         }
 261
 262       val = *++p;
 263     }
 264 }
 265
 266 /* Disable on Solaris 2/x86 until the following problem can be properly
 267    autoconfed:
 268
 269    The Solaris 10+ assembler tags objects with the instruction set
 270    extensions used, so SSE4.2 executables cannot run on machines that
 271    don't support that extension.  */
 272
 273 #if (GCC_VERSION >= 4005) && (__GNUC__ >= 5 || !defined(__PIC__)) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
 274
 275 /* Replicated character data to be shared between implementations.
 276    Recall that outside of a context with vector support we can't
 277    define compatible vector types, therefore these are all defined
 278    in terms of raw characters.  */
 279 static const char repl_chars[4][16] __attribute__((aligned(16))) = {
 280   { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
 281     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' },
 282   { '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
 283     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' },
 284   { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
 285     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' },
 286   { '?', '?', '?', '?', '?', '?', '?', '?',
 287     '?', '?', '?', '?', '?', '?', '?', '?' },
 288 };
 289
 290 /* A version of the fast scanner using MMX vectorized byte compare insns.
 291
 292    This uses the PMOVMSKB instruction which was introduced with "MMX2",
 293    which was packaged into SSE1; it is also present in the AMD MMX
 294    extension.  Mark the function as using "sse" so that we emit a real
 295    "emms" instruction, rather than the 3dNOW "femms" instruction.  */
 296
 297 static const uchar *
 298 #ifndef __SSE__
 299 __attribute__((__target__("sse")))
 300 #endif
 301 search_line_mmx (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
 302 {
 303   typedef char v8qi __attribute__ ((__vector_size__ (8)));
 304   typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
 305
 306   const v8qi repl_nl = *(const v8qi *)repl_chars[0];
 307   const v8qi repl_cr = *(const v8qi *)repl_chars[1];
 308   const v8qi repl_bs = *(const v8qi *)repl_chars[2];
 309   const v8qi repl_qm = *(const v8qi *)repl_chars[3];
 310
 311   unsigned int misalign, found, mask;
 312   const v8qi *p;
 313   v8qi data, t, c;
 314
 315   /* Align the source pointer.  While MMX doesn't generate unaligned data
 316      faults, this allows us to safely scan to the end of the buffer without
 317      reading beyond the end of the last page.  */
 318   misalign = (uintptr_t)s & 7;
 319   p = (const v8qi *)((uintptr_t)s & -8);
 320   data = *p;
 321
 322   /* Create a mask for the bytes that are valid within the first
 323      16-byte block.  The Idea here is that the AND with the mask
 324      within the loop is "free", since we need some AND or TEST
 325      insn in order to set the flags for the branch anyway.  */
 326   mask = -1u << misalign;
 327
 328   /* Main loop processing 8 bytes at a time.  */
 329   goto start;
 330   do
 331     {
 332       data = *++p;
 333       mask = -1;
 334
 335     start:
 336       t = __builtin_ia32_pcmpeqb(data, repl_nl);
 337       c = __builtin_ia32_pcmpeqb(data, repl_cr);
 338       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
 339       c = __builtin_ia32_pcmpeqb(data, repl_bs);
 340       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
 341       c = __builtin_ia32_pcmpeqb(data, repl_qm);
 342       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
 343       found = __builtin_ia32_pmovmskb (t);
 344       found &= mask;
 345     }
 346   while (!found);
 347
 348   __builtin_ia32_emms ();
 349
 350   /* FOUND contains 1 in bits for which we matched a relevant
 351      character.  Conversion to the byte index is trivial.  */
 352   found = __builtin_ctz(found);
 353   return (const uchar *)p + found;
 354 }
 355
 356 /* A version of the fast scanner using SSE2 vectorized byte compare insns.  */
 357
 358 static const uchar *
 359 #ifndef __SSE2__
 360 __attribute__((__target__("sse2")))
 361 #endif
 362 search_line_sse2 (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
 363 {
 364   typedef char v16qi __attribute__ ((__vector_size__ (16)));
 365
 366   const v16qi repl_nl = *(const v16qi *)repl_chars[0];
 367   const v16qi repl_cr = *(const v16qi *)repl_chars[1];
 368   const v16qi repl_bs = *(const v16qi *)repl_chars[2];
 369   const v16qi repl_qm = *(const v16qi *)repl_chars[3];
 370
 371   unsigned int misalign, found, mask;
 372   const v16qi *p;
 373   v16qi data, t;
 374
 375   /* Align the source pointer.  */
 376   misalign = (uintptr_t)s & 15;
 377   p = (const v16qi *)((uintptr_t)s & -16);
 378   data = *p;
 379
 380   /* Create a mask for the bytes that are valid within the first
 381      16-byte block.  The Idea here is that the AND with the mask
 382      within the loop is "free", since we need some AND or TEST
 383      insn in order to set the flags for the branch anyway.  */
 384   mask = -1u << misalign;
 385
 386   /* Main loop processing 16 bytes at a time.  */
 387   goto start;
 388   do
 389     {
 390       data = *++p;
 391       mask = -1;
 392
 393     start:
 394       t  = __builtin_ia32_pcmpeqb128(data, repl_nl);
 395       t |= __builtin_ia32_pcmpeqb128(data, repl_cr);
 396       t |= __builtin_ia32_pcmpeqb128(data, repl_bs);
 397       t |= __builtin_ia32_pcmpeqb128(data, repl_qm);
 398       found = __builtin_ia32_pmovmskb128 (t);
 399       found &= mask;
 400     }
 401   while (!found);
 402
 403   /* FOUND contains 1 in bits for which we matched a relevant
 404      character.  Conversion to the byte index is trivial.  */
 405   found = __builtin_ctz(found);
 406   return (const uchar *)p + found;
 407 }
 408
 409 #ifdef HAVE_SSE4
 410 /* A version of the fast scanner using SSE 4.2 vectorized string insns.  */
 411
 412 static const uchar *
 413 #ifndef __SSE4_2__
 414 __attribute__((__target__("sse4.2")))
 415 #endif
 416 search_line_sse42 (const uchar *s, const uchar *end)
 417 {
 418   typedef char v16qi __attribute__ ((__vector_size__ (16)));
 419   static const v16qi search = { '\n', '\r', '?', '\\' };
 420
 421   uintptr_t si = (uintptr_t)s;
 422   uintptr_t index;
 423
 424   /* Check for unaligned input.  */
 425   if (si & 15)
 426     {
 427       v16qi sv;
 428
 429       if (__builtin_expect (end - s < 16, 0)
 430           && __builtin_expect ((si & 0xfff) > 0xff0, 0))
 431         {
 432           /* There are less than 16 bytes left in the buffer, and less
 433              than 16 bytes left on the page.  Reading 16 bytes at this
 434              point might generate a spurious page fault.  Defer to the
 435              SSE2 implementation, which already handles alignment.  */
 436           return search_line_sse2 (s, end);
 437         }
 438
 439       /* ??? The builtin doesn't understand that the PCMPESTRI read from
 440          memory need not be aligned.  */
 441       sv = __builtin_ia32_loaddqu ((const char *) s);
 442       index = __builtin_ia32_pcmpestri128 (search, 4, sv, 16, 0);
 443
 444       if (__builtin_expect (index < 16, 0))
 445         goto found;
 446
 447       /* Advance the pointer to an aligned address.  We will re-scan a
 448          few bytes, but we no longer need care for reading past the
 449          end of a page, since we're guaranteed a match.  */
 450       s = (const uchar *)((si + 16) & -16);
 451     }
 452
 453   /* Main loop, processing 16 bytes at a time.  By doing the whole loop
 454      in inline assembly, we can make proper use of the flags set.  */
 455   __asm (      "sub $16, %1\n"
 456         "       .balign 16\n"
 457         "0:     add $16, %1\n"
 458         "       %vpcmpestri $0, (%1), %2\n"
 459         "       jnc 0b"
 460         : "=&c"(index), "+r"(s)
 461         : "x"(search), "a"(4), "d"(16));
 462
 463  found:
 464   return s + index;
 465 }
 466
 467 #else
 468 /* Work around out-dated assemblers without sse4 support.  */
 469 #define search_line_sse42 search_line_sse2
 470 #endif
 471
 472 /* Check the CPU capabilities.  */
 473
 474 #include "../gcc/config/i386/cpuid.h"
 475
 476 typedef const uchar * (*search_line_fast_type) (const uchar *, const uchar *);
 477 static search_line_fast_type search_line_fast;
 478
 479 #define HAVE_init_vectorized_lexer 1
 480 static inline void
 481 init_vectorized_lexer (void)
 482 {
 483   unsigned dummy, ecx = 0, edx = 0;
 484   search_line_fast_type impl = search_line_acc_char;
 485   int minimum = 0;
 486
 487 #if defined(__SSE4_2__)
 488   minimum = 3;
 489 #elif defined(__SSE2__)
 490   minimum = 2;
 491 #elif defined(__SSE__)
 492   minimum = 1;
 493 #endif
 494
 495   if (minimum == 3)
 496     impl = search_line_sse42;
 497   else if (__get_cpuid (1, &dummy, &dummy, &ecx, &edx) || minimum == 2)
 498     {
 499       if (minimum == 3 || (ecx & bit_SSE4_2))
 500         impl = search_line_sse42;
 501       else if (minimum == 2 || (edx & bit_SSE2))
 502         impl = search_line_sse2;
 503       else if (minimum == 1 || (edx & bit_SSE))
 504         impl = search_line_mmx;
 505     }
 506   else if (__get_cpuid (0x80000001, &dummy, &dummy, &dummy, &edx))
 507     {
 508       if (minimum == 1
 509           || (edx & (bit_MMXEXT | bit_CMOV)) == (bit_MMXEXT | bit_CMOV))
 510         impl = search_line_mmx;
 511     }
 512
 513   search_line_fast = impl;
 514 }
 515
 516 #elif defined(_ARCH_PWR8) && defined(__ALTIVEC__)
 517
 518 /* A vection of the fast scanner using AltiVec vectorized byte compares
 519    and VSX unaligned loads (when VSX is available).  This is otherwise
 520    the same as the pre-GCC 5 version.  */
 521
 522 ATTRIBUTE_NO_SANITIZE_UNDEFINED
 523 static const uchar *
 524 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
 525 {
 526   typedef __attribute__((altivec(vector))) unsigned char vc;
 527
 528   const vc repl_nl = {
 529     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
 530     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
 531   };
 532   const vc repl_cr = {
 533     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
 534     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
 535   };
 536   const vc repl_bs = {
 537     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
 538     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
 539   };
 540   const vc repl_qm = {
 541     '?', '?', '?', '?', '?', '?', '?', '?',
 542     '?', '?', '?', '?', '?', '?', '?', '?',
 543   };
 544   const vc zero = { 0 };
 545
 546   vc data, t;
 547
 548   /* Main loop processing 16 bytes at a time.  */
 549   do
 550     {
 551       vc m_nl, m_cr, m_bs, m_qm;
 552
 553       data = *((const vc *)s);
 554       s += 16;
 555
 556       m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
 557       m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
 558       m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
 559       m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
 560       t = (m_nl | m_cr) | (m_bs | m_qm);
 561
 562       /* T now contains 0xff in bytes for which we matched one of the relevant
 563          characters.  We want to exit the loop if any byte in T is non-zero.
 564          Below is the expansion of vec_any_ne(t, zero).  */
 565     }
 566   while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
 567
 568   /* Restore s to to point to the 16 bytes we just processed.  */
 569   s -= 16;
 570
 571   {
 572 #define N  (sizeof(vc) / sizeof(long))
 573
 574     union {
 575       vc v;
 576       /* Statically assert that N is 2 or 4.  */
 577       unsigned long l[(N == 2 || N == 4) ? N : -1];
 578     } u;
 579     unsigned long l, i = 0;
 580
 581     u.v = t;
 582
 583     /* Find the first word of T that is non-zero.  */
 584     switch (N)
 585       {
 586       case 4:
 587         l = u.l[i++];
 588         if (l != 0)
 589           break;
 590         s += sizeof(unsigned long);
 591         l = u.l[i++];
 592         if (l != 0)
 593           break;
 594         s += sizeof(unsigned long);
 595       case 2:
 596         l = u.l[i++];
 597         if (l != 0)
 598           break;
 599         s += sizeof(unsigned long);
 600         l = u.l[i];
 601       }
 602
 603     /* L now contains 0xff in bytes for which we matched one of the
 604        relevant characters.  We can find the byte index by finding
 605        its bit index and dividing by 8.  */
 606 #ifdef __BIG_ENDIAN__
 607     l = __builtin_clzl(l) >> 3;
 608 #else
 609     l = __builtin_ctzl(l) >> 3;
 610 #endif
 611     return s + l;
 612
 613 #undef N
 614   }
 615 }
 616
 617 #elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) && defined (__BIG_ENDIAN__)
 618
 619 /* A vection of the fast scanner using AltiVec vectorized byte compares.
 620    This cannot be used for little endian because vec_lvsl/lvsr are
 621    deprecated for little endian and the code won't work properly.  */
 622 /* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
 623    so we can't compile this function without -maltivec on the command line
 624    (or implied by some other switch).  */
 625
 626 static const uchar *
 627 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
 628 {
 629   typedef __attribute__((altivec(vector))) unsigned char vc;
 630
 631   const vc repl_nl = {
 632     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
 633     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
 634   };
 635   const vc repl_cr = {
 636     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
 637     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
 638   };
 639   const vc repl_bs = {
 640     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
 641     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
 642   };
 643   const vc repl_qm = {
 644     '?', '?', '?', '?', '?', '?', '?', '?',
 645     '?', '?', '?', '?', '?', '?', '?', '?',
 646   };
 647   const vc ones = {
 648     -1, -1, -1, -1, -1, -1, -1, -1,
 649     -1, -1, -1, -1, -1, -1, -1, -1,
 650   };
 651   const vc zero = { 0 };
 652
 653   vc data, mask, t;
 654
 655   /* Altivec loads automatically mask addresses with -16.  This lets us
 656      issue the first load as early as possible.  */
 657   data = __builtin_vec_ld(0, (const vc *)s);
 658
 659   /* Discard bytes before the beginning of the buffer.  Do this by
 660      beginning with all ones and shifting in zeros according to the
 661      mis-alignment.  The LVSR instruction pulls the exact shift we
 662      want from the address.  */
 663   mask = __builtin_vec_lvsr(0, s);
 664   mask = __builtin_vec_perm(zero, ones, mask);
 665   data &= mask;
 666
 667   /* While altivec loads mask addresses, we still need to align S so
 668      that the offset we compute at the end is correct.  */
 669   s = (const uchar *)((uintptr_t)s & -16);
 670
 671   /* Main loop processing 16 bytes at a time.  */
 672   goto start;
 673   do
 674     {
 675       vc m_nl, m_cr, m_bs, m_qm;
 676
 677       s += 16;
 678       data = __builtin_vec_ld(0, (const vc *)s);
 679
 680     start:
 681       m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
 682       m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
 683       m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
 684       m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
 685       t = (m_nl | m_cr) | (m_bs | m_qm);
 686
 687       /* T now contains 0xff in bytes for which we matched one of the relevant
 688          characters.  We want to exit the loop if any byte in T is non-zero.
 689          Below is the expansion of vec_any_ne(t, zero).  */
 690     }
 691   while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
 692
 693   {
 694 #define N  (sizeof(vc) / sizeof(long))
 695
 696     union {
 697       vc v;
 698       /* Statically assert that N is 2 or 4.  */
 699       unsigned long l[(N == 2 || N == 4) ? N : -1];
 700     } u;
 701     unsigned long l, i = 0;
 702
 703     u.v = t;
 704
 705     /* Find the first word of T that is non-zero.  */
 706     switch (N)
 707       {
 708       case 4:
 709         l = u.l[i++];
 710         if (l != 0)
 711           break;
 712         s += sizeof(unsigned long);
 713         l = u.l[i++];
 714         if (l != 0)
 715           break;
 716         s += sizeof(unsigned long);
 717       case 2:
 718         l = u.l[i++];
 719         if (l != 0)
 720           break;
 721         s += sizeof(unsigned long);
 722         l = u.l[i];
 723       }
 724
 725     /* L now contains 0xff in bytes for which we matched one of the
 726        relevant characters.  We can find the byte index by finding
 727        its bit index and dividing by 8.  */
 728     l = __builtin_clzl(l) >> 3;
 729     return s + l;
 730
 731 #undef N
 732   }
 733 }
 734
 735 #elif defined (__ARM_NEON)
 736 #include "arm_neon.h"
 737
 738 static const uchar *
 739 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
 740 {
 741   const uint8x16_t repl_nl = vdupq_n_u8 ('\n');
 742   const uint8x16_t repl_cr = vdupq_n_u8 ('\r');
 743   const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
 744   const uint8x16_t repl_qm = vdupq_n_u8 ('?');
 745   const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
 746
 747   unsigned int misalign, found, mask;
 748   const uint8_t *p;
 749   uint8x16_t data;
 750
 751   /* Align the source pointer.  */
 752   misalign = (uintptr_t)s & 15;
 753   p = (const uint8_t *)((uintptr_t)s & -16);
 754   data = vld1q_u8 (p);
 755
 756   /* Create a mask for the bytes that are valid within the first
 757      16-byte block.  The Idea here is that the AND with the mask
 758      within the loop is "free", since we need some AND or TEST
 759      insn in order to set the flags for the branch anyway.  */
 760   mask = (-1u << misalign) & 0xffff;
 761
 762   /* Main loop, processing 16 bytes at a time.  */
 763   goto start;
 764
 765   do
 766     {
 767       uint8x8_t l;
 768       uint16x4_t m;
 769       uint32x2_t n;
 770       uint8x16_t t, u, v, w;
 771
 772       p += 16;
 773       data = vld1q_u8 (p);
 774       mask = 0xffff;
 775
 776     start:
 777       t = vceqq_u8 (data, repl_nl);
 778       u = vceqq_u8 (data, repl_cr);
 779       v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
 780       w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
 781       t = vandq_u8 (vorrq_u8 (v, w), xmask);
 782       l = vpadd_u8 (vget_low_u8 (t), vget_high_u8 (t));
 783       m = vpaddl_u8 (l);
 784       n = vpaddl_u16 (m);
 785
 786       found = vget_lane_u32 ((uint32x2_t) vorr_u64 ((uint64x1_t) n,
 787               vshr_n_u64 ((uint64x1_t) n, 24)), 0);
 788       found &= mask;
 789     }
 790   while (!found);
 791
 792   /* FOUND contains 1 in bits for which we matched a relevant
 793      character.  Conversion to the byte index is trivial.  */
 794   found = __builtin_ctz (found);
 795   return (const uchar *)p + found;
 796 }
 797
 798 #else
 799
 800 /* We only have one accellerated alternative.  Use a direct call so that
 801    we encourage inlining.  */
 802
 803 #define search_line_fast  search_line_acc_char
 804
 805 #endif
 806
 807 /* Initialize the lexer if needed.  */
 808
 809 void
 810 _cpp_init_lexer (void)
 811 {
 812 #ifdef HAVE_init_vectorized_lexer
 813   init_vectorized_lexer ();
 814 #endif
 815 }
 816
 817 /* Returns with a logical line that contains no escaped newlines or
 818    trigraphs.  This is a time-critical inner loop.  */
 819 void
 820 _cpp_clean_line (cpp_reader *pfile)
 821 {
 822   cpp_buffer *buffer;
 823   const uchar *s;
 824   uchar c, *d, *p;
 825
 826   buffer = pfile->buffer;
 827   buffer->cur_note = buffer->notes_used = 0;
 828   buffer->cur = buffer->line_base = buffer->next_line;
 829   buffer->need_line = false;
 830   s = buffer->next_line;
 831
 832   if (!buffer->from_stage3)
 833     {
 834       const uchar *pbackslash = NULL;
 835
 836       /* Fast path.  This is the common case of an un-escaped line with
 837          no trigraphs.  The primary win here is by not writing any
 838          data back to memory until we have to.  */
 839       while (1)
 840         {
 841           /* Perform an optimized search for \n, \r, \\, ?.  */
 842           s = search_line_fast (s, buffer->rlimit);
 843
 844           c = *s;
 845           if (c == '\\')
 846             {
 847               /* Record the location of the backslash and continue.  */
 848               pbackslash = s++;
 849             }
 850           else if (__builtin_expect (c == '?', 0))
 851             {
 852               if (__builtin_expect (s[1] == '?', false)
 853                    && _cpp_trigraph_map[s[2]])
 854                 {
 855                   /* Have a trigraph.  We may or may not have to convert
 856                      it.  Add a line note regardless, for -Wtrigraphs.  */
 857                   add_line_note (buffer, s, s[2]);
 858                   if (CPP_OPTION (pfile, trigraphs))
 859                     {
 860                       /* We do, and that means we have to switch to the
 861                          slow path.  */
 862                       d = (uchar *) s;
 863                       *d = _cpp_trigraph_map[s[2]];
 864                       s += 2;
 865                       goto slow_path;
 866                     }
 867                 }
 868               /* Not a trigraph.  Continue on fast-path.  */
 869               s++;
 870             }
 871           else
 872             break;
 873         }
 874
 875       /* This must be \r or \n.  We're either done, or we'll be forced
 876          to write back to the buffer and continue on the slow path.  */
 877       d = (uchar *) s;
 878
 879       if (__builtin_expect (s == buffer->rlimit, false))
 880         goto done;
 881
 882       /* DOS line ending? */
 883       if (__builtin_expect (c == '\r', false) && s[1] == '\n')
 884         {
 885           s++;
 886           if (s == buffer->rlimit)
 887             goto done;
 888         }
 889
 890       if (__builtin_expect (pbackslash == NULL, true))
 891         goto done;
 892
 893       /* Check for escaped newline.  */
 894       p = d;
 895       while (is_nvspace (p[-1]))
 896         p--;
 897       if (p - 1 != pbackslash)
 898         goto done;
 899
 900       /* Have an escaped newline; process it and proceed to
 901          the slow path.  */
 902       add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
 903       d = p - 2;
 904       buffer->next_line = p - 1;
 905
 906     slow_path:
 907       while (1)
 908         {
 909           c = *++s;
 910           *++d = c;
 911
 912           if (c == '\n' || c == '\r')
 913             {
 914               /* Handle DOS line endings.  */
 915               if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
 916                 s++;
 917               if (s == buffer->rlimit)
 918                 break;
 919
 920               /* Escaped?  */
 921               p = d;
 922               while (p != buffer->next_line && is_nvspace (p[-1]))
 923                 p--;
 924               if (p == buffer->next_line || p[-1] != '\\')
 925                 break;
 926
 927               add_line_note (buffer, p - 1, p != d ? ' ': '\\');
 928               d = p - 2;
 929               buffer->next_line = p - 1;
 930             }
 931           else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
 932             {
 933               /* Add a note regardless, for the benefit of -Wtrigraphs.  */
 934               add_line_note (buffer, d, s[2]);
 935               if (CPP_OPTION (pfile, trigraphs))
 936                 {
 937                   *d = _cpp_trigraph_map[s[2]];
 938                   s += 2;
 939                 }
 940             }
 941         }
 942     }
 943   else
 944     {
 945       while (*s != '\n' && *s != '\r')
 946         s++;
 947       d = (uchar *) s;
 948
 949       /* Handle DOS line endings.  */
 950       if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
 951         s++;
 952     }
 953
 954  done:
 955   *d = '\n';
 956   /* A sentinel note that should never be processed.  */
 957   add_line_note (buffer, d + 1, '\n');
 958   buffer->next_line = s + 1;
 959 }
 960
 961 /* Return true if the trigraph indicated by NOTE should be warned
 962    about in a comment.  */
 963 static bool
 964 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
 965 {
 966   const uchar *p;
 967
 968   /* Within comments we don't warn about trigraphs, unless the
 969      trigraph forms an escaped newline, as that may change
 970      behavior.  */
 971   if (note->type != '/')
 972     return false;
 973
 974   /* If -trigraphs, then this was an escaped newline iff the next note
 975      is coincident.  */
 976   if (CPP_OPTION (pfile, trigraphs))
 977     return note[1].pos == note->pos;
 978
 979   /* Otherwise, see if this forms an escaped newline.  */
 980   p = note->pos + 3;
 981   while (is_nvspace (*p))
 982     p++;
 983
 984   /* There might have been escaped newlines between the trigraph and the
 985      newline we found.  Hence the position test.  */
 986   return (*p == '\n' && p < note[1].pos);
 987 }
 988
 989 /* Process the notes created by add_line_note as far as the current
 990    location.  */
 991 void
 992 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
 993 {
 994   cpp_buffer *buffer = pfile->buffer;
 995
 996   for (;;)
 997     {
 998       _cpp_line_note *note = &buffer->notes[buffer->cur_note];
 999       unsigned int col;
1000
1001       if (note->pos > buffer->cur)
1002         break;
1003
1004       buffer->cur_note++;
1005       col = CPP_BUF_COLUMN (buffer, note->pos + 1);
1006
1007       if (note->type == '\\' || note->type == ' ')
1008         {
1009           if (note->type == ' ' && !in_comment)
1010             cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
1011                                  "backslash and newline separated by space");
1012
1013           if (buffer->next_line > buffer->rlimit)
1014             {
1015               cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
1016                                    "backslash-newline at end of file");
1017               /* Prevent "no newline at end of file" warning.  */
1018               buffer->next_line = buffer->rlimit;
1019             }
1020
1021           buffer->line_base = note->pos;
1022           CPP_INCREMENT_LINE (pfile, 0);
1023         }
1024       else if (_cpp_trigraph_map[note->type])
1025         {
1026           if (CPP_OPTION (pfile, warn_trigraphs)
1027               && (!in_comment || warn_in_comment (pfile, note)))
1028             {
1029               if (CPP_OPTION (pfile, trigraphs))
1030                 cpp_warning_with_line (pfile, CPP_W_TRIGRAPHS,
1031                                        pfile->line_table->highest_line, col,
1032                                        "trigraph ??%c converted to %c",
1033                                        note->type,
1034                                        (int) _cpp_trigraph_map[note->type]);
1035               else
1036                 {
1037                   cpp_warning_with_line
1038                     (pfile, CPP_W_TRIGRAPHS,
1039                      pfile->line_table->highest_line, col,
1040                      "trigraph ??%c ignored, use -trigraphs to enable",
1041                      note->type);
1042                 }
1043             }
1044         }
1045       else if (note->type == 0)
1046         /* Already processed in lex_raw_string.  */;
1047       else
1048         abort ();
1049     }
1050 }
1051
1052 /* Skip a C-style block comment.  We find the end of the comment by
1053    seeing if an asterisk is before every '/' we encounter.  Returns
1054    nonzero if comment terminated by EOF, zero otherwise.
1055
1056    Buffer->cur points to the initial asterisk of the comment.  */
1057 bool
1058 _cpp_skip_block_comment (cpp_reader *pfile)
1059 {
1060   cpp_buffer *buffer = pfile->buffer;
1061   const uchar *cur = buffer->cur;
1062   uchar c;
1063
1064   cur++;
1065   if (*cur == '/')
1066     cur++;
1067
1068   for (;;)
1069     {
1070       /* People like decorating comments with '*', so check for '/'
1071          instead for efficiency.  */
1072       c = *cur++;
1073
1074       if (c == '/')
1075         {
1076           if (cur[-2] == '*')
1077             break;
1078
1079           /* Warn about potential nested comments, but not if the '/'
1080              comes immediately before the true comment delimiter.
1081              Don't bother to get it right across escaped newlines.  */
1082           if (CPP_OPTION (pfile, warn_comments)
1083               && cur[0] == '*' && cur[1] != '/')
1084             {
1085               buffer->cur = cur;
1086               cpp_warning_with_line (pfile, CPP_W_COMMENTS,
1087                                      pfile->line_table->highest_line,
1088                                      CPP_BUF_COL (buffer),
1089                                      "\"/*\" within comment");
1090             }
1091         }
1092       else if (c == '\n')
1093         {
1094           unsigned int cols;
1095           buffer->cur = cur - 1;
1096           _cpp_process_line_notes (pfile, true);
1097           if (buffer->next_line >= buffer->rlimit)
1098             return true;
1099           _cpp_clean_line (pfile);
1100
1101           cols = buffer->next_line - buffer->line_base;
1102           CPP_INCREMENT_LINE (pfile, cols);
1103
1104           cur = buffer->cur;
1105         }
1106     }
1107
1108   buffer->cur = cur;
1109   _cpp_process_line_notes (pfile, true);
1110   return false;
1111 }
1112
1113 /* Skip a C++ line comment, leaving buffer->cur pointing to the
1114    terminating newline.  Handles escaped newlines.  Returns nonzero
1115    if a multiline comment.  */
1116 static int
1117 skip_line_comment (cpp_reader *pfile)
1118 {
1119   cpp_buffer *buffer = pfile->buffer;
1120   source_location orig_line = pfile->line_table->highest_line;
1121
1122   while (*buffer->cur != '\n')
1123     buffer->cur++;
1124
1125   _cpp_process_line_notes (pfile, true);
1126   return orig_line != pfile->line_table->highest_line;
1127 }
1128
1129 /* Skips whitespace, saving the next non-whitespace character.  */
1130 static void
1131 skip_whitespace (cpp_reader *pfile, cppchar_t c)
1132 {
1133   cpp_buffer *buffer = pfile->buffer;
1134   bool saw_NUL = false;
1135
1136   do
1137     {
1138       /* Horizontal space always OK.  */
1139       if (c == ' ' || c == '\t')
1140         ;
1141       /* Just \f \v or \0 left.  */
1142       else if (c == '\0')
1143         saw_NUL = true;
1144       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
1145         cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
1146                              CPP_BUF_COL (buffer),
1147                              "%s in preprocessing directive",
1148                              c == '\f' ? "form feed" : "vertical tab");
1149
1150       c = *buffer->cur++;
1151     }
1152   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
1153   while (is_nvspace (c));
1154
1155   if (saw_NUL)
1156     cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
1157
1158   buffer->cur--;
1159 }
1160
1161 /* See if the characters of a number token are valid in a name (no
1162    '.', '+' or '-').  */
1163 static int
1164 name_p (cpp_reader *pfile, const cpp_string *string)
1165 {
1166   unsigned int i;
1167
1168   for (i = 0; i < string->len; i++)
1169     if (!is_idchar (string->text[i]))
1170       return 0;
1171
1172   return 1;
1173 }
1174
1175 /* After parsing an identifier or other sequence, produce a warning about
1176    sequences not in NFC/NFKC.  */
1177 static void
1178 warn_about_normalization (cpp_reader *pfile,
1179                           const cpp_token *token,
1180                           const struct normalize_state *s)
1181 {
1182   if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
1183       && !pfile->state.skipping)
1184     {
1185       /* Make sure that the token is printed using UCNs, even
1186          if we'd otherwise happily print UTF-8.  */
1187       unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
1188       size_t sz;
1189
1190       sz = cpp_spell_token (pfile, token, buf, false) - buf;
1191       if (NORMALIZE_STATE_RESULT (s) == normalized_C)
1192         cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1193                                "`%.*s' is not in NFKC", (int) sz, buf);
1194       else
1195         cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1196                                "`%.*s' is not in NFC", (int) sz, buf);
1197       free (buf);
1198     }
1199 }
1200
1201 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
1202    an identifier.  FIRST is TRUE if this starts an identifier.  */
1203 static bool
1204 forms_identifier_p (cpp_reader *pfile, int first,
1205                     struct normalize_state *state)
1206 {
1207   cpp_buffer *buffer = pfile->buffer;
1208
1209   if (*buffer->cur == '$')
1210     {
1211       if (!CPP_OPTION (pfile, dollars_in_ident))
1212         return false;
1213
1214       buffer->cur++;
1215       if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
1216         {
1217           CPP_OPTION (pfile, warn_dollars) = 0;
1218           cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
1219         }
1220
1221       return true;
1222     }
1223
1224   /* Is this a syntactically valid UCN?  */
1225   if (CPP_OPTION (pfile, extended_identifiers)
1226       && *buffer->cur == '\\'
1227       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
1228     {
1229       buffer->cur += 2;
1230       if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
1231                           state))
1232         return true;
1233       buffer->cur -= 2;
1234     }
1235
1236   return false;
1237 }
1238
1239 /* Helper function to get the cpp_hashnode of the identifier BASE.  */
1240 static cpp_hashnode *
1241 lex_identifier_intern (cpp_reader *pfile, const uchar *base)
1242 {
1243   cpp_hashnode *result;
1244   const uchar *cur;
1245   unsigned int len;
1246   unsigned int hash = HT_HASHSTEP (0, *base);
1247
1248   cur = base + 1;
1249   while (ISIDNUM (*cur))
1250     {
1251       hash = HT_HASHSTEP (hash, *cur);
1252       cur++;
1253     }
1254   len = cur - base;
1255   hash = HT_HASHFINISH (hash, len);
1256   result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1257                                               base, len, hash, HT_ALLOC));
1258
1259   /* Rarely, identifiers require diagnostics when lexed.  */
1260   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1261                         && !pfile->state.skipping, 0))
1262     {
1263       /* It is allowed to poison the same identifier twice.  */
1264       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
1265         cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
1266                    NODE_NAME (result));
1267
1268       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1269          replacement list of a variadic macro.  */
1270       if (result == pfile->spec_nodes.n__VA_ARGS__
1271           && !pfile->state.va_args_ok)
1272         {
1273           if (CPP_OPTION (pfile, cplusplus))
1274             cpp_error (pfile, CPP_DL_PEDWARN,
1275                        "__VA_ARGS__ can only appear in the expansion"
1276                        " of a C++11 variadic macro");
1277           else
1278             cpp_error (pfile, CPP_DL_PEDWARN,
1279                        "__VA_ARGS__ can only appear in the expansion"
1280                        " of a C99 variadic macro");
1281         }
1282
1283       /* For -Wc++-compat, warn about use of C++ named operators.  */
1284       if (result->flags & NODE_WARN_OPERATOR)
1285         cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1286                      "identifier \"%s\" is a special operator name in C++",
1287                      NODE_NAME (result));
1288     }
1289
1290   return result;
1291 }
1292
1293 /* Get the cpp_hashnode of an identifier specified by NAME in
1294    the current cpp_reader object.  If none is found, NULL is returned.  */
1295 cpp_hashnode *
1296 _cpp_lex_identifier (cpp_reader *pfile, const char *name)
1297 {
1298   cpp_hashnode *result;
1299   result = lex_identifier_intern (pfile, (uchar *) name);
1300   return result;
1301 }
1302
1303 /* Lex an identifier starting at BUFFER->CUR - 1.  */
1304 static cpp_hashnode *
1305 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
1306                 struct normalize_state *nst, cpp_hashnode **spelling)
1307 {
1308   cpp_hashnode *result;
1309   const uchar *cur;
1310   unsigned int len;
1311   unsigned int hash = HT_HASHSTEP (0, *base);
1312
1313   cur = pfile->buffer->cur;
1314   if (! starts_ucn)
1315     {
1316       while (ISIDNUM (*cur))
1317         {
1318           hash = HT_HASHSTEP (hash, *cur);
1319           cur++;
1320         }
1321       NORMALIZE_STATE_UPDATE_IDNUM (nst, *(cur - 1));
1322     }
1323   pfile->buffer->cur = cur;
1324   if (starts_ucn || forms_identifier_p (pfile, false, nst))
1325     {
1326       /* Slower version for identifiers containing UCNs (or $).  */
1327       do {
1328         while (ISIDNUM (*pfile->buffer->cur))
1329           {
1330             NORMALIZE_STATE_UPDATE_IDNUM (nst, *pfile->buffer->cur);
1331             pfile->buffer->cur++;
1332           }
1333       } while (forms_identifier_p (pfile, false, nst));
1334       result = _cpp_interpret_identifier (pfile, base,
1335                                           pfile->buffer->cur - base);
1336       *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base);
1337     }
1338   else
1339     {
1340       len = cur - base;
1341       hash = HT_HASHFINISH (hash, len);
1342
1343       result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1344                                                   base, len, hash, HT_ALLOC));
1345       *spelling = result;
1346     }
1347
1348   /* Rarely, identifiers require diagnostics when lexed.  */
1349   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1350                         && !pfile->state.skipping, 0))
1351     {
1352       /* It is allowed to poison the same identifier twice.  */
1353       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
1354         cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
1355                    NODE_NAME (result));
1356
1357       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1358          replacement list of a variadic macro.  */
1359       if (result == pfile->spec_nodes.n__VA_ARGS__
1360           && !pfile->state.va_args_ok)
1361         {
1362           if (CPP_OPTION (pfile, cplusplus))
1363             cpp_error (pfile, CPP_DL_PEDWARN,
1364                        "__VA_ARGS__ can only appear in the expansion"
1365                        " of a C++11 variadic macro");
1366           else
1367             cpp_error (pfile, CPP_DL_PEDWARN,
1368                        "__VA_ARGS__ can only appear in the expansion"
1369                        " of a C99 variadic macro");
1370         }
1371
1372       /* For -Wc++-compat, warn about use of C++ named operators.  */
1373       if (result->flags & NODE_WARN_OPERATOR)
1374         cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1375                      "identifier \"%s\" is a special operator name in C++",
1376                      NODE_NAME (result));
1377     }
1378
1379   return result;
1380 }
1381
1382 /* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
1383 static void
1384 lex_number (cpp_reader *pfile, cpp_string *number,
1385             struct normalize_state *nst)
1386 {
1387   const uchar *cur;
1388   const uchar *base;
1389   uchar *dest;
1390
1391   base = pfile->buffer->cur - 1;
1392   do
1393     {
1394       cur = pfile->buffer->cur;
1395
1396       /* N.B. ISIDNUM does not include $.  */
1397       while (ISIDNUM (*cur) || *cur == '.' || DIGIT_SEP (*cur)
1398              || VALID_SIGN (*cur, cur[-1]))
1399         {
1400           NORMALIZE_STATE_UPDATE_IDNUM (nst, *cur);
1401           cur++;
1402         }
1403       /* A number can't end with a digit separator.  */
1404       while (cur > pfile->buffer->cur && DIGIT_SEP (cur[-1]))
1405         --cur;
1406
1407       pfile->buffer->cur = cur;
1408     }
1409   while (forms_identifier_p (pfile, false, nst));
1410
1411   number->len = cur - base;
1412   dest = _cpp_unaligned_alloc (pfile, number->len + 1);
1413   memcpy (dest, base, number->len);
1414   dest[number->len] = '\0';
1415   number->text = dest;
1416 }
1417
1418 /* Create a token of type TYPE with a literal spelling.  */
1419 static void
1420 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
1421                 unsigned int len, enum cpp_ttype type)
1422 {
1423   uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
1424
1425   memcpy (dest, base, len);
1426   dest[len] = '\0';
1427   token->type = type;
1428   token->val.str.len = len;
1429   token->val.str.text = dest;
1430 }
1431
1432 /* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
1433    sequence from *FIRST_BUFF_P to LAST_BUFF_P.  */
1434
1435 static void
1436 bufring_append (cpp_reader *pfile, const uchar *base, size_t len,
1437                 _cpp_buff **first_buff_p, _cpp_buff **last_buff_p)
1438 {
1439   _cpp_buff *first_buff = *first_buff_p;
1440   _cpp_buff *last_buff = *last_buff_p;
1441
1442   if (first_buff == NULL)
1443     first_buff = last_buff = _cpp_get_buff (pfile, len);
1444   else if (len > BUFF_ROOM (last_buff))
1445     {
1446       size_t room = BUFF_ROOM (last_buff);
1447       memcpy (BUFF_FRONT (last_buff), base, room);
1448       BUFF_FRONT (last_buff) += room;
1449       base += room;
1450       len -= room;
1451       last_buff = _cpp_append_extend_buff (pfile, last_buff, len);
1452     }
1453
1454   memcpy (BUFF_FRONT (last_buff), base, len);
1455   BUFF_FRONT (last_buff) += len;
1456
1457   *first_buff_p = first_buff;
1458   *last_buff_p = last_buff;
1459 }
1460
1461
1462 /* Returns true if a macro has been defined.
1463    This might not work if compile with -save-temps,
1464    or preprocess separately from compilation.  */
1465
1466 static bool
1467 is_macro(cpp_reader *pfile, const uchar *base)
1468 {
1469   const uchar *cur = base;
1470   if (! ISIDST (*cur))
1471     return false;
1472   unsigned int hash = HT_HASHSTEP (0, *cur);
1473   ++cur;
1474   while (ISIDNUM (*cur))
1475     {
1476       hash = HT_HASHSTEP (hash, *cur);
1477       ++cur;
1478     }
1479   hash = HT_HASHFINISH (hash, cur - base);
1480
1481   cpp_hashnode *result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1482                                         base, cur - base, hash, HT_NO_INSERT));
1483
1484   return !result ? false : (result->type == NT_MACRO);
1485 }
1486
1487
1488 /* Lexes a raw string.  The stored string contains the spelling, including
1489    double quotes, delimiter string, '(' and ')', any leading
1490    'L', 'u', 'U' or 'u8' and 'R' modifier.  It returns the type of the
1491    literal, or CPP_OTHER if it was not properly terminated.
1492
1493    The spelling is NUL-terminated, but it is not guaranteed that this
1494    is the first NUL since embedded NULs are preserved.  */
1495
1496 static void
1497 lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
1498                 const uchar *cur)
1499 {
1500   uchar raw_prefix[17];
1501   uchar temp_buffer[18];
1502   const uchar *orig_base;
1503   unsigned int raw_prefix_len = 0, raw_suffix_len = 0;
1504   enum raw_str_phase { RAW_STR_PREFIX, RAW_STR, RAW_STR_SUFFIX };
1505   raw_str_phase phase = RAW_STR_PREFIX;
1506   enum cpp_ttype type;
1507   size_t total_len = 0;
1508   /* Index into temp_buffer during phases other than RAW_STR,
1509      during RAW_STR phase 17 to tell BUF_APPEND that nothing should
1510      be appended to temp_buffer.  */
1511   size_t temp_buffer_len = 0;
1512   _cpp_buff *first_buff = NULL, *last_buff = NULL;
1513   size_t raw_prefix_start;
1514   _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
1515
1516   type = (*base == 'L' ? CPP_WSTRING :
1517           *base == 'U' ? CPP_STRING32 :
1518           *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1519           : CPP_STRING);
1520
1521 #define BUF_APPEND(STR,LEN)                                     \
1522       do {                                                      \
1523         bufring_append (pfile, (const uchar *)(STR), (LEN),     \
1524                         &first_buff, &last_buff);               \
1525         total_len += (LEN);                                     \
1526         if (__builtin_expect (temp_buffer_len < 17, 0)          \
1527             && (const uchar *)(STR) != base                     \
1528             && (LEN) <= 2)                                      \
1529           {                                                     \
1530             memcpy (temp_buffer + temp_buffer_len,              \
1531                     (const uchar *)(STR), (LEN));               \
1532             temp_buffer_len += (LEN);                           \
1533           }                                                     \
1534       } while (0);
1535
1536   orig_base = base;
1537   ++cur;
1538   raw_prefix_start = cur - base;
1539   for (;;)
1540     {
1541       cppchar_t c;
1542
1543       /* If we previously performed any trigraph or line splicing
1544          transformations, undo them in between the opening and closing
1545          double quote.  */
1546       while (note->pos < cur)
1547         ++note;
1548       for (; note->pos == cur; ++note)
1549         {
1550           switch (note->type)
1551             {
1552             case '\\':
1553             case ' ':
1554               /* Restore backslash followed by newline.  */
1555               BUF_APPEND (base, cur - base);
1556               base = cur;
1557               BUF_APPEND ("\\", 1);
1558             after_backslash:
1559               if (note->type == ' ')
1560                 {
1561                   /* GNU backslash whitespace newline extension.  FIXME
1562                      could be any sequence of non-vertical space.  When we
1563                      can properly restore any such sequence, we should mark
1564                      this note as handled so _cpp_process_line_notes
1565                      doesn't warn.  */
1566                   BUF_APPEND (" ", 1);
1567                 }
1568
1569               BUF_APPEND ("\n", 1);
1570               break;
1571
1572             case 0:
1573               /* Already handled.  */
1574               break;
1575
1576             default:
1577               if (_cpp_trigraph_map[note->type])
1578                 {
1579                   /* Don't warn about this trigraph in
1580                      _cpp_process_line_notes, since trigraphs show up as
1581                      trigraphs in raw strings.  */
1582                   uchar type = note->type;
1583                   note->type = 0;
1584
1585                   if (!CPP_OPTION (pfile, trigraphs))
1586                     /* If we didn't convert the trigraph in the first
1587                        place, don't do anything now either.  */
1588                     break;
1589
1590                   BUF_APPEND (base, cur - base);
1591                   base = cur;
1592                   BUF_APPEND ("??", 2);
1593
1594                   /* ??/ followed by newline gets two line notes, one for
1595                      the trigraph and one for the backslash/newline.  */
1596                   if (type == '/' && note[1].pos == cur)
1597                     {
1598                       if (note[1].type != '\\'
1599                           && note[1].type != ' ')
1600                         abort ();
1601                       BUF_APPEND ("/", 1);
1602                       ++note;
1603                       goto after_backslash;
1604                     }
1605                   else
1606                     {
1607                       /* Skip the replacement character.  */
1608                       base = ++cur;
1609                       BUF_APPEND (&type, 1);
1610                       c = type;
1611                       goto check_c;
1612                     }
1613                 }
1614               else
1615                 abort ();
1616               break;
1617             }
1618         }
1619       c = *cur++;
1620       if (__builtin_expect (temp_buffer_len < 17, 0))
1621         temp_buffer[temp_buffer_len++] = c;
1622
1623      check_c:
1624       if (phase == RAW_STR_PREFIX)
1625         {
1626           while (raw_prefix_len < temp_buffer_len)
1627             {
1628               raw_prefix[raw_prefix_len] = temp_buffer[raw_prefix_len];
1629               switch (raw_prefix[raw_prefix_len])
1630                 {
1631                 case ' ': case '(': case ')': case '\\': case '\t':
1632                 case '\v': case '\f': case '\n': default:
1633                   break;
1634                 /* Basic source charset except the above chars.  */
1635                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1636                 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1637                 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1638                 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1639                 case 'y': case 'z':
1640                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1641                 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1642                 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1643                 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1644                 case 'Y': case 'Z':
1645                 case '0': case '1': case '2': case '3': case '4': case '5':
1646                 case '6': case '7': case '8': case '9':
1647                 case '_': case '{': case '}': case '#': case '[': case ']':
1648                 case '<': case '>': case '%': case ':': case ';': case '.':
1649                 case '?': case '*': case '+': case '-': case '/': case '^':
1650                 case '&': case '|': case '~': case '!': case '=': case ',':
1651                 case '"': case '\'':
1652                   if (raw_prefix_len < 16)
1653                     {
1654                       raw_prefix_len++;
1655                       continue;
1656                     }
1657                   break;
1658                 }
1659
1660               if (raw_prefix[raw_prefix_len] != '(')
1661                 {
1662                   int col = CPP_BUF_COLUMN (pfile->buffer, cur) + 1;
1663                   if (raw_prefix_len == 16)
1664                     cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1665                                          col, "raw string delimiter longer "
1666                                               "than 16 characters");
1667                   else if (raw_prefix[raw_prefix_len] == '\n')
1668                     cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1669                                          col, "invalid new-line in raw "
1670                                               "string delimiter");
1671                   else
1672                     cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1673                                          col, "invalid character '%c' in "
1674                                               "raw string delimiter",
1675                                          (int) raw_prefix[raw_prefix_len]);
1676                   pfile->buffer->cur = orig_base + raw_prefix_start - 1;
1677                   create_literal (pfile, token, orig_base,
1678                                   raw_prefix_start - 1, CPP_OTHER);
1679                   if (first_buff)
1680                     _cpp_release_buff (pfile, first_buff);
1681                   return;
1682                 }
1683               raw_prefix[raw_prefix_len] = '"';
1684               phase = RAW_STR;
1685               /* Nothing should be appended to temp_buffer during
1686                  RAW_STR phase.  */
1687               temp_buffer_len = 17;
1688               break;
1689             }
1690           continue;
1691         }
1692       else if (phase == RAW_STR_SUFFIX)
1693         {
1694           while (raw_suffix_len <= raw_prefix_len
1695                  && raw_suffix_len < temp_buffer_len
1696                  && temp_buffer[raw_suffix_len] == raw_prefix[raw_suffix_len])
1697             raw_suffix_len++;
1698           if (raw_suffix_len > raw_prefix_len)
1699             break;
1700           if (raw_suffix_len == temp_buffer_len)
1701             continue;
1702           phase = RAW_STR;
1703           /* Nothing should be appended to temp_buffer during
1704              RAW_STR phase.  */
1705           temp_buffer_len = 17;
1706         }
1707       if (c == ')')
1708         {
1709           phase = RAW_STR_SUFFIX;
1710           raw_suffix_len = 0;
1711           temp_buffer_len = 0;
1712         }
1713       else if (c == '\n')
1714         {
1715           if (pfile->state.in_directive
1716               || (pfile->state.parsing_args
1717                   && pfile->buffer->next_line >= pfile->buffer->rlimit))
1718             {
1719               cur--;
1720               type = CPP_OTHER;
1721               cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
1722                                    "unterminated raw string");
1723               break;
1724             }
1725
1726           BUF_APPEND (base, cur - base);
1727
1728           if (pfile->buffer->cur < pfile->buffer->rlimit)
1729             CPP_INCREMENT_LINE (pfile, 0);
1730           pfile->buffer->need_line = true;
1731
1732           pfile->buffer->cur = cur-1;
1733           _cpp_process_line_notes (pfile, false);
1734           if (!_cpp_get_fresh_line (pfile))
1735             {
1736               source_location src_loc = token->src_loc;
1737               token->type = CPP_EOF;
1738               /* Tell the compiler the line number of the EOF token.  */
1739               token->src_loc = pfile->line_table->highest_line;
1740               token->flags = BOL;
1741               if (first_buff != NULL)
1742                 _cpp_release_buff (pfile, first_buff);
1743               cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
1744                                    "unterminated raw string");
1745               return;
1746             }
1747
1748           cur = base = pfile->buffer->cur;
1749           note = &pfile->buffer->notes[pfile->buffer->cur_note];
1750         }
1751     }
1752
1753   if (CPP_OPTION (pfile, user_literals))
1754     {
1755       /* If a string format macro, say from inttypes.h, is placed touching
1756          a string literal it could be parsed as a C++11 user-defined string
1757          literal thus breaking the program.
1758          Try to identify macros with is_macro. A warning is issued. */
1759       if (is_macro (pfile, cur))
1760         {
1761           /* Raise a warning, but do not consume subsequent tokens.  */
1762           if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
1763             cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
1764                                    token->src_loc, 0,
1765                                    "invalid suffix on literal; C++11 requires "
1766                                    "a space between literal and string macro");
1767         }
1768       /* Grab user defined literal suffix.  */
1769       else if (ISIDST (*cur))
1770         {
1771           type = cpp_userdef_string_add_type (type);
1772           ++cur;
1773
1774           while (ISIDNUM (*cur))
1775             ++cur;
1776         }
1777     }
1778
1779   pfile->buffer->cur = cur;
1780   if (first_buff == NULL)
1781     create_literal (pfile, token, base, cur - base, type);
1782   else
1783     {
1784       uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1);
1785
1786       token->type = type;
1787       token->val.str.len = total_len + (cur - base);
1788       token->val.str.text = dest;
1789       last_buff = first_buff;
1790       while (last_buff != NULL)
1791         {
1792           memcpy (dest, last_buff->base,
1793                   BUFF_FRONT (last_buff) - last_buff->base);
1794           dest += BUFF_FRONT (last_buff) - last_buff->base;
1795           last_buff = last_buff->next;
1796         }
1797       _cpp_release_buff (pfile, first_buff);
1798       memcpy (dest, base, cur - base);
1799       dest[cur - base] = '\0';
1800     }
1801 }
1802
1803 /* Lexes a string, character constant, or angle-bracketed header file
1804    name.  The stored string contains the spelling, including opening
1805    quote and any leading 'L', 'u', 'U' or 'u8' and optional
1806    'R' modifier.  It returns the type of the literal, or CPP_OTHER
1807    if it was not properly terminated, or CPP_LESS for an unterminated
1808    header name which must be relexed as normal tokens.
1809
1810    The spelling is NUL-terminated, but it is not guaranteed that this
1811    is the first NUL since embedded NULs are preserved.  */
1812 static void
1813 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
1814 {
1815   bool saw_NUL = false;
1816   const uchar *cur;
1817   cppchar_t terminator;
1818   enum cpp_ttype type;
1819
1820   cur = base;
1821   terminator = *cur++;
1822   if (terminator == 'L' || terminator == 'U')
1823     terminator = *cur++;
1824   else if (terminator == 'u')
1825     {
1826       terminator = *cur++;
1827       if (terminator == '8')
1828         terminator = *cur++;
1829     }
1830   if (terminator == 'R')
1831     {
1832       lex_raw_string (pfile, token, base, cur);
1833       return;
1834     }
1835   if (terminator == '"')
1836     type = (*base == 'L' ? CPP_WSTRING :
1837             *base == 'U' ? CPP_STRING32 :
1838             *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1839                          : CPP_STRING);
1840   else if (terminator == '\'')
1841     type = (*base == 'L' ? CPP_WCHAR :
1842             *base == 'U' ? CPP_CHAR32 :
1843             *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
1844   else
1845     terminator = '>', type = CPP_HEADER_NAME;
1846
1847   for (;;)
1848     {
1849       cppchar_t c = *cur++;
1850
1851       /* In #include-style directives, terminators are not escapable.  */
1852       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
1853         cur++;
1854       else if (c == terminator)
1855         break;
1856       else if (c == '\n')
1857         {
1858           cur--;
1859           /* Unmatched quotes always yield undefined behavior, but
1860              greedy lexing means that what appears to be an unterminated
1861              header name may actually be a legitimate sequence of tokens.  */
1862           if (terminator == '>')
1863             {
1864               token->type = CPP_LESS;
1865               return;
1866             }
1867           type = CPP_OTHER;
1868           break;
1869         }
1870       else if (c == '\0')
1871         saw_NUL = true;
1872     }
1873
1874   if (saw_NUL && !pfile->state.skipping)
1875     cpp_error (pfile, CPP_DL_WARNING,
1876                "null character(s) preserved in literal");
1877
1878   if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
1879     cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
1880                (int) terminator);
1881
1882   if (CPP_OPTION (pfile, user_literals))
1883     {
1884       /* If a string format macro, say from inttypes.h, is placed touching
1885          a string literal it could be parsed as a C++11 user-defined string
1886          literal thus breaking the program.
1887          Try to identify macros with is_macro. A warning is issued. */
1888       if (is_macro (pfile, cur))
1889         {
1890           /* Raise a warning, but do not consume subsequent tokens.  */
1891           if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
1892             cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
1893                                    token->src_loc, 0,
1894                                    "invalid suffix on literal; C++11 requires "
1895                                    "a space between literal and string macro");
1896         }
1897       /* Grab user defined literal suffix.  */
1898       else if (ISIDST (*cur))
1899         {
1900           type = cpp_userdef_char_add_type (type);
1901           type = cpp_userdef_string_add_type (type);
1902           ++cur;
1903
1904           while (ISIDNUM (*cur))
1905             ++cur;
1906         }
1907     }
1908   else if (CPP_OPTION (pfile, cpp_warn_cxx11_compat)
1909            && is_macro (pfile, cur)
1910            && !pfile->state.skipping)
1911     cpp_warning_with_line (pfile, CPP_W_CXX11_COMPAT,
1912                            token->src_loc, 0, "C++11 requires a space "
1913                            "between string literal and macro");
1914
1915   pfile->buffer->cur = cur;
1916   create_literal (pfile, token, base, cur - base, type);
1917 }
1918
1919 /* Return the comment table. The client may not make any assumption
1920    about the ordering of the table.  */
1921 cpp_comment_table *
1922 cpp_get_comments (cpp_reader *pfile)
1923 {
1924   return &pfile->comments;
1925 }
1926
1927 /* Append a comment to the end of the comment table. */
1928 static void
1929 store_comment (cpp_reader *pfile, cpp_token *token)
1930 {
1931   int len;
1932
1933   if (pfile->comments.allocated == 0)
1934     {
1935       pfile->comments.allocated = 256;
1936       pfile->comments.entries = (cpp_comment *) xmalloc
1937         (pfile->comments.allocated * sizeof (cpp_comment));
1938     }
1939
1940   if (pfile->comments.count == pfile->comments.allocated)
1941     {
1942       pfile->comments.allocated *= 2;
1943       pfile->comments.entries = (cpp_comment *) xrealloc
1944         (pfile->comments.entries,
1945          pfile->comments.allocated * sizeof (cpp_comment));
1946     }
1947
1948   len = token->val.str.len;
1949
1950   /* Copy comment. Note, token may not be NULL terminated. */
1951   pfile->comments.entries[pfile->comments.count].comment =
1952     (char *) xmalloc (sizeof (char) * (len + 1));
1953   memcpy (pfile->comments.entries[pfile->comments.count].comment,
1954           token->val.str.text, len);
1955   pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
1956
1957   /* Set source location. */
1958   pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
1959
1960   /* Increment the count of entries in the comment table. */
1961   pfile->comments.count++;
1962 }
1963
1964 /* The stored comment includes the comment start and any terminator.  */
1965 static void
1966 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
1967               cppchar_t type)
1968 {
1969   unsigned char *buffer;
1970   unsigned int len, clen, i;
1971
1972   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
1973
1974   /* C++ comments probably (not definitely) have moved past a new
1975      line, which we don't want to save in the comment.  */
1976   if (is_vspace (pfile->buffer->cur[-1]))
1977     len--;
1978
1979   /* If we are currently in a directive or in argument parsing, then
1980      we need to store all C++ comments as C comments internally, and
1981      so we need to allocate a little extra space in that case.
1982
1983      Note that the only time we encounter a directive here is
1984      when we are saving comments in a "#define".  */
1985   clen = ((pfile->state.in_directive || pfile->state.parsing_args)
1986           && type == '/') ? len + 2 : len;
1987
1988   buffer = _cpp_unaligned_alloc (pfile, clen);
1989
1990   token->type = CPP_COMMENT;
1991   token->val.str.len = clen;
1992   token->val.str.text = buffer;
1993
1994   buffer[0] = '/';
1995   memcpy (buffer + 1, from, len - 1);
1996
1997   /* Finish conversion to a C comment, if necessary.  */
1998   if ((pfile->state.in_directive || pfile->state.parsing_args) && type == '/')
1999     {
2000       buffer[1] = '*';
2001       buffer[clen - 2] = '*';
2002       buffer[clen - 1] = '/';
2003       /* As there can be in a C++ comments illegal sequences for C comments
2004          we need to filter them out.  */
2005       for (i = 2; i < (clen - 2); i++)
2006         if (buffer[i] == '/' && (buffer[i - 1] == '*' || buffer[i + 1] == '*'))
2007           buffer[i] = '|';
2008     }
2009
2010   /* Finally store this comment for use by clients of libcpp. */
2011   store_comment (pfile, token);
2012 }
2013
2014 /* Allocate COUNT tokens for RUN.  */
2015 void
2016 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
2017 {
2018   run->base = XNEWVEC (cpp_token, count);
2019   run->limit = run->base + count;
2020   run->next = NULL;
2021 }
2022
2023 /* Returns the next tokenrun, or creates one if there is none.  */
2024 static tokenrun *
2025 next_tokenrun (tokenrun *run)
2026 {
2027   if (run->next == NULL)
2028     {
2029       run->next = XNEW (tokenrun);
2030       run->next->prev = run;
2031       _cpp_init_tokenrun (run->next, 250);
2032     }
2033
2034   return run->next;
2035 }
2036
2037 /* Return the number of not yet processed token in a given
2038    context.  */
2039 int
2040 _cpp_remaining_tokens_num_in_context (cpp_context *context)
2041 {
2042   if (context->tokens_kind == TOKENS_KIND_DIRECT)
2043     return (LAST (context).token - FIRST (context).token);
2044   else if (context->tokens_kind == TOKENS_KIND_INDIRECT
2045            || context->tokens_kind == TOKENS_KIND_EXTENDED)
2046     return (LAST (context).ptoken - FIRST (context).ptoken);
2047   else
2048       abort ();
2049 }
2050
2051 /* Returns the token present at index INDEX in a given context.  If
2052    INDEX is zero, the next token to be processed is returned.  */
2053 static const cpp_token*
2054 _cpp_token_from_context_at (cpp_context *context, int index)
2055 {
2056   if (context->tokens_kind == TOKENS_KIND_DIRECT)
2057     return &(FIRST (context).token[index]);
2058   else if (context->tokens_kind == TOKENS_KIND_INDIRECT
2059            || context->tokens_kind == TOKENS_KIND_EXTENDED)
2060     return FIRST (context).ptoken[index];
2061  else
2062    abort ();
2063 }
2064
2065 /* Look ahead in the input stream.  */
2066 const cpp_token *
2067 cpp_peek_token (cpp_reader *pfile, int index)
2068 {
2069   cpp_context *context = pfile->context;
2070   const cpp_token *peektok;
2071   int count;
2072
2073   /* First, scan through any pending cpp_context objects.  */
2074   while (context->prev)
2075     {
2076       ptrdiff_t sz = _cpp_remaining_tokens_num_in_context (context);
2077
2078       if (index < (int) sz)
2079         return _cpp_token_from_context_at (context, index);
2080       index -= (int) sz;
2081       context = context->prev;
2082     }
2083
2084   /* We will have to read some new tokens after all (and do so
2085      without invalidating preceding tokens).  */
2086   count = index;
2087   pfile->keep_tokens++;
2088
2089   /* For peeked tokens temporarily disable line_change reporting,
2090      until the tokens are parsed for real.  */
2091   void (*line_change) (cpp_reader *, const cpp_token *, int)
2092     = pfile->cb.line_change;
2093   pfile->cb.line_change = NULL;
2094
2095   do
2096     {
2097       peektok = _cpp_lex_token (pfile);
2098       if (peektok->type == CPP_EOF)
2099         {
2100           index--;
2101           break;
2102         }
2103     }
2104   while (index--);
2105
2106   _cpp_backup_tokens_direct (pfile, count - index);
2107   pfile->keep_tokens--;
2108   pfile->cb.line_change = line_change;
2109
2110   return peektok;
2111 }
2112
2113 /* Allocate a single token that is invalidated at the same time as the
2114    rest of the tokens on the line.  Has its line and col set to the
2115    same as the last lexed token, so that diagnostics appear in the
2116    right place.  */
2117 cpp_token *
2118 _cpp_temp_token (cpp_reader *pfile)
2119 {
2120   cpp_token *old, *result;
2121   ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
2122   ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
2123
2124   old = pfile->cur_token - 1;
2125   /* Any pre-existing lookaheads must not be clobbered.  */
2126   if (la)
2127     {
2128       if (sz <= la)
2129         {
2130           tokenrun *next = next_tokenrun (pfile->cur_run);
2131
2132           if (sz < la)
2133             memmove (next->base + 1, next->base,
2134                      (la - sz) * sizeof (cpp_token));
2135
2136           next->base[0] = pfile->cur_run->limit[-1];
2137         }
2138
2139       if (sz > 1)
2140         memmove (pfile->cur_token + 1, pfile->cur_token,
2141                  MIN (la, sz - 1) * sizeof (cpp_token));
2142     }
2143
2144   if (!sz && pfile->cur_token == pfile->cur_run->limit)
2145     {
2146       pfile->cur_run = next_tokenrun (pfile->cur_run);
2147       pfile->cur_token = pfile->cur_run->base;
2148     }
2149
2150   result = pfile->cur_token++;
2151   result->src_loc = old->src_loc;
2152   return result;
2153 }
2154
2155 /* Lex a token into RESULT (external interface).  Takes care of issues
2156    like directive handling, token lookahead, multiple include
2157    optimization and skipping.  */
2158 const cpp_token *
2159 _cpp_lex_token (cpp_reader *pfile)
2160 {
2161   cpp_token *result;
2162
2163   for (;;)
2164     {
2165       if (pfile->cur_token == pfile->cur_run->limit)
2166         {
2167           pfile->cur_run = next_tokenrun (pfile->cur_run);
2168           pfile->cur_token = pfile->cur_run->base;
2169         }
2170       /* We assume that the current token is somewhere in the current
2171          run.  */
2172       if (pfile->cur_token < pfile->cur_run->base
2173           || pfile->cur_token >= pfile->cur_run->limit)
2174         abort ();
2175
2176       if (pfile->lookaheads)
2177         {
2178           pfile->lookaheads--;
2179           result = pfile->cur_token++;
2180         }
2181       else
2182         result = _cpp_lex_direct (pfile);
2183
2184       if (result->flags & BOL)
2185         {
2186           /* Is this a directive.  If _cpp_handle_directive returns
2187              false, it is an assembler #.  */
2188           if (result->type == CPP_HASH
2189               /* 6.10.3 p 11: Directives in a list of macro arguments
2190                  gives undefined behavior.  This implementation
2191                  handles the directive as normal.  */
2192               && pfile->state.parsing_args != 1)
2193             {
2194               if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
2195                 {
2196                   if (pfile->directive_result.type == CPP_PADDING)
2197                     continue;
2198                   result = &pfile->directive_result;
2199                 }
2200             }
2201           else if (pfile->state.in_deferred_pragma)
2202             result = &pfile->directive_result;
2203
2204           if (pfile->cb.line_change && !pfile->state.skipping)
2205             pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
2206         }
2207
2208       /* We don't skip tokens in directives.  */
2209       if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
2210         break;
2211
2212       /* Outside a directive, invalidate controlling macros.  At file
2213          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
2214          get here and MI optimization works.  */
2215       pfile->mi_valid = false;
2216
2217       if (!pfile->state.skipping || result->type == CPP_EOF)
2218         break;
2219     }
2220
2221   return result;
2222 }
2223
2224 /* Returns true if a fresh line has been loaded.  */
2225 bool
2226 _cpp_get_fresh_line (cpp_reader *pfile)
2227 {
2228   int return_at_eof;
2229
2230   /* We can't get a new line until we leave the current directive.  */
2231   if (pfile->state.in_directive)
2232     return false;
2233
2234   for (;;)
2235     {
2236       cpp_buffer *buffer = pfile->buffer;
2237
2238       if (!buffer->need_line)
2239         return true;
2240
2241       if (buffer->next_line < buffer->rlimit)
2242         {
2243           _cpp_clean_line (pfile);
2244           return true;
2245         }
2246
2247       /* First, get out of parsing arguments state.  */
2248       if (pfile->state.parsing_args)
2249         return false;
2250
2251       /* End of buffer.  Non-empty files should end in a newline.  */
2252       if (buffer->buf != buffer->rlimit
2253           && buffer->next_line > buffer->rlimit
2254           && !buffer->from_stage3)
2255         {
2256           /* Clip to buffer size.  */
2257           buffer->next_line = buffer->rlimit;
2258         }
2259
2260       return_at_eof = buffer->return_at_eof;
2261       _cpp_pop_buffer (pfile);
2262       if (pfile->buffer == NULL || return_at_eof)
2263         return false;
2264     }
2265 }
2266
2267 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)          \
2268   do                                                    \
2269     {                                                   \
2270       result->type = ELSE_TYPE;                         \
2271       if (*buffer->cur == CHAR)                         \
2272         buffer->cur++, result->type = THEN_TYPE;        \
2273     }                                                   \
2274   while (0)
2275
2276 /* Lex a token into pfile->cur_token, which is also incremented, to
2277    get diagnostics pointing to the correct location.
2278
2279    Does not handle issues such as token lookahead, multiple-include
2280    optimization, directives, skipping etc.  This function is only
2281    suitable for use by _cpp_lex_token, and in special cases like
2282    lex_expansion_token which doesn't care for any of these issues.
2283
2284    When meeting a newline, returns CPP_EOF if parsing a directive,
2285    otherwise returns to the start of the token buffer if permissible.
2286    Returns the location of the lexed token.  */
2287 cpp_token *
2288 _cpp_lex_direct (cpp_reader *pfile)
2289 {
2290   cppchar_t c;
2291   cpp_buffer *buffer;
2292   const unsigned char *comment_start;
2293   cpp_token *result = pfile->cur_token++;
2294
2295  fresh_line:
2296   result->flags = 0;
2297   buffer = pfile->buffer;
2298   if (buffer->need_line)
2299     {
2300       if (pfile->state.in_deferred_pragma)
2301         {
2302           result->type = CPP_PRAGMA_EOL;
2303           pfile->state.in_deferred_pragma = false;
2304           if (!pfile->state.pragma_allow_expansion)
2305             pfile->state.prevent_expansion--;
2306           return result;
2307         }
2308       if (!_cpp_get_fresh_line (pfile))
2309         {
2310           result->type = CPP_EOF;
2311           if (!pfile->state.in_directive)
2312             {
2313               /* Tell the compiler the line number of the EOF token.  */
2314               result->src_loc = pfile->line_table->highest_line;
2315               result->flags = BOL;
2316             }
2317           return result;
2318         }
2319       if (!pfile->keep_tokens)
2320         {
2321           pfile->cur_run = &pfile->base_run;
2322           result = pfile->base_run.base;
2323           pfile->cur_token = result + 1;
2324         }
2325       result->flags = BOL;
2326       if (pfile->state.parsing_args == 2)
2327         result->flags |= PREV_WHITE;
2328     }
2329   buffer = pfile->buffer;
2330  update_tokens_line:
2331   result->src_loc = pfile->line_table->highest_line;
2332
2333  skipped_white:
2334   if (buffer->cur >= buffer->notes[buffer->cur_note].pos
2335       && !pfile->overlaid_buffer)
2336     {
2337       _cpp_process_line_notes (pfile, false);
2338       result->src_loc = pfile->line_table->highest_line;
2339     }
2340   c = *buffer->cur++;
2341
2342   if (pfile->forced_token_location_p)
2343     result->src_loc = *pfile->forced_token_location_p;
2344   else
2345     result->src_loc = linemap_position_for_column (pfile->line_table,
2346                                           CPP_BUF_COLUMN (buffer, buffer->cur));
2347
2348   switch (c)
2349     {
2350     case ' ': case '\t': case '\f': case '\v': case '\0':
2351       result->flags |= PREV_WHITE;
2352       skip_whitespace (pfile, c);
2353       goto skipped_white;
2354
2355     case '\n':
2356       if (buffer->cur < buffer->rlimit)
2357         CPP_INCREMENT_LINE (pfile, 0);
2358       buffer->need_line = true;
2359       goto fresh_line;
2360
2361     case '0': case '1': case '2': case '3': case '4':
2362     case '5': case '6': case '7': case '8': case '9':
2363       {
2364         struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2365         result->type = CPP_NUMBER;
2366         lex_number (pfile, &result->val.str, &nst);
2367         warn_about_normalization (pfile, result, &nst);
2368         break;
2369       }
2370
2371     case 'L':
2372     case 'u':
2373     case 'U':
2374     case 'R':
2375       /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
2376          wide strings or raw strings.  */
2377       if (c == 'L' || CPP_OPTION (pfile, rliterals)
2378           || (c != 'R' && CPP_OPTION (pfile, uliterals)))
2379         {
2380           if ((*buffer->cur == '\'' && c != 'R')
2381               || *buffer->cur == '"'
2382               || (*buffer->cur == 'R'
2383                   && c != 'R'
2384                   && buffer->cur[1] == '"'
2385                   && CPP_OPTION (pfile, rliterals))
2386               || (*buffer->cur == '8'
2387                   && c == 'u'
2388                   && (buffer->cur[1] == '"'
2389                       || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'
2390                           && CPP_OPTION (pfile, rliterals)))))
2391             {
2392               lex_string (pfile, result, buffer->cur - 1);
2393               break;
2394             }
2395         }
2396       /* Fall through.  */
2397
2398     case '_':
2399     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2400     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2401     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2402     case 's': case 't':           case 'v': case 'w': case 'x':
2403     case 'y': case 'z':
2404     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2405     case 'G': case 'H': case 'I': case 'J': case 'K':
2406     case 'M': case 'N': case 'O': case 'P': case 'Q':
2407     case 'S': case 'T':           case 'V': case 'W': case 'X':
2408     case 'Y': case 'Z':
2409       result->type = CPP_NAME;
2410       {
2411         struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2412         result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
2413                                                 &nst,
2414                                                 &result->val.node.spelling);
2415         warn_about_normalization (pfile, result, &nst);
2416       }
2417
2418       /* Convert named operators to their proper types.  */
2419       if (result->val.node.node->flags & NODE_OPERATOR)
2420         {
2421           result->flags |= NAMED_OP;
2422           result->type = (enum cpp_ttype) result->val.node.node->directive_index;
2423         }
2424       break;
2425
2426     case '\'':
2427     case '"':
2428       lex_string (pfile, result, buffer->cur - 1);
2429       break;
2430
2431     case '/':
2432       /* A potential block or line comment.  */
2433       comment_start = buffer->cur;
2434       c = *buffer->cur;
2435
2436       if (c == '*')
2437         {
2438           if (_cpp_skip_block_comment (pfile))
2439             cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
2440         }
2441       else if (c == '/' && ! CPP_OPTION (pfile, traditional))
2442         {
2443           /* Don't warn for system headers.  */
2444           if (cpp_in_system_header (pfile))
2445             ;
2446           /* Warn about comments if pedantically GNUC89, and not
2447              in system headers.  */
2448           else if (CPP_OPTION (pfile, lang) == CLK_GNUC89
2449                    && CPP_PEDANTIC (pfile)
2450                    && ! buffer->warned_cplusplus_comments)
2451             {
2452               cpp_error (pfile, CPP_DL_PEDWARN,
2453                          "C++ style comments are not allowed in ISO C90");
2454               cpp_error (pfile, CPP_DL_PEDWARN,
2455                          "(this will be reported only once per input file)");
2456               buffer->warned_cplusplus_comments = 1;
2457             }
2458           /* Or if specifically desired via -Wc90-c99-compat.  */
2459           else if (CPP_OPTION (pfile, cpp_warn_c90_c99_compat) > 0
2460                    && ! CPP_OPTION (pfile, cplusplus)
2461                    && ! buffer->warned_cplusplus_comments)
2462             {
2463               cpp_error (pfile, CPP_DL_WARNING,
2464                          "C++ style comments are incompatible with C90");
2465               cpp_error (pfile, CPP_DL_WARNING,
2466                          "(this will be reported only once per input file)");
2467               buffer->warned_cplusplus_comments = 1;
2468             }
2469           /* In C89/C94, C++ style comments are forbidden.  */
2470           else if ((CPP_OPTION (pfile, lang) == CLK_STDC89
2471                     || CPP_OPTION (pfile, lang) == CLK_STDC94))
2472             {
2473               /* But don't be confused about valid code such as
2474                  - // immediately followed by *,
2475                  - // in a preprocessing directive,
2476                  - // in an #if 0 block.  */
2477               if (buffer->cur[1] == '*'
2478                   || pfile->state.in_directive
2479                   || pfile->state.skipping)
2480                 {
2481                   result->type = CPP_DIV;
2482                   break;
2483                 }
2484               else if (! buffer->warned_cplusplus_comments)
2485                 {
2486                   cpp_error (pfile, CPP_DL_ERROR,
2487                              "C++ style comments are not allowed in ISO C90");
2488                   cpp_error (pfile, CPP_DL_ERROR,
2489                              "(this will be reported only once per input "
2490                              "file)");
2491                   buffer->warned_cplusplus_comments = 1;
2492                 }
2493             }
2494           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
2495             cpp_warning (pfile, CPP_W_COMMENTS, "multi-line comment");
2496         }
2497       else if (c == '=')
2498         {
2499           buffer->cur++;
2500           result->type = CPP_DIV_EQ;
2501           break;
2502         }
2503       else
2504         {
2505           result->type = CPP_DIV;
2506           break;
2507         }
2508
2509       if (!pfile->state.save_comments)
2510         {
2511           result->flags |= PREV_WHITE;
2512           goto update_tokens_line;
2513         }
2514
2515       /* Save the comment as a token in its own right.  */
2516       save_comment (pfile, result, comment_start, c);
2517       break;
2518
2519     case '<':
2520       if (pfile->state.angled_headers)
2521         {
2522           lex_string (pfile, result, buffer->cur - 1);
2523           if (result->type != CPP_LESS)
2524             break;
2525         }
2526
2527       result->type = CPP_LESS;
2528       if (*buffer->cur == '=')
2529         buffer->cur++, result->type = CPP_LESS_EQ;
2530       else if (*buffer->cur == '<')
2531         {
2532           buffer->cur++;
2533           IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
2534         }
2535       else if (CPP_OPTION (pfile, digraphs))
2536         {
2537           if (*buffer->cur == ':')
2538             {
2539               /* C++11 [2.5/3 lex.pptoken], "Otherwise, if the next
2540                  three characters are <:: and the subsequent character
2541                  is neither : nor >, the < is treated as a preprocessor
2542                  token by itself".  */
2543               if (CPP_OPTION (pfile, cplusplus)
2544                   && CPP_OPTION (pfile, lang) != CLK_CXX98
2545                   && CPP_OPTION (pfile, lang) != CLK_GNUCXX
2546                   && buffer->cur[1] == ':'
2547                   && buffer->cur[2] != ':' && buffer->cur[2] != '>')
2548                 break;
2549
2550               buffer->cur++;
2551               result->flags |= DIGRAPH;
2552               result->type = CPP_OPEN_SQUARE;
2553             }
2554           else if (*buffer->cur == '%')
2555             {
2556               buffer->cur++;
2557               result->flags |= DIGRAPH;
2558               result->type = CPP_OPEN_BRACE;
2559             }
2560         }
2561       break;
2562
2563     case '>':
2564       result->type = CPP_GREATER;
2565       if (*buffer->cur == '=')
2566         buffer->cur++, result->type = CPP_GREATER_EQ;
2567       else if (*buffer->cur == '>')
2568         {
2569           buffer->cur++;
2570           IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
2571         }
2572       break;
2573
2574     case '%':
2575       result->type = CPP_MOD;
2576       if (*buffer->cur == '=')
2577         buffer->cur++, result->type = CPP_MOD_EQ;
2578       else if (CPP_OPTION (pfile, digraphs))
2579         {
2580           if (*buffer->cur == ':')
2581             {
2582               buffer->cur++;
2583               result->flags |= DIGRAPH;
2584               result->type = CPP_HASH;
2585               if (*buffer->cur == '%' && buffer->cur[1] == ':')
2586                 buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0;
2587             }
2588           else if (*buffer->cur == '>')
2589             {
2590               buffer->cur++;
2591               result->flags |= DIGRAPH;
2592               result->type = CPP_CLOSE_BRACE;
2593             }
2594         }
2595       break;
2596
2597     case '.':
2598       result->type = CPP_DOT;
2599       if (ISDIGIT (*buffer->cur))
2600         {
2601           struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2602           result->type = CPP_NUMBER;
2603           lex_number (pfile, &result->val.str, &nst);
2604           warn_about_normalization (pfile, result, &nst);
2605         }
2606       else if (*buffer->cur == '.' && buffer->cur[1] == '.')
2607         buffer->cur += 2, result->type = CPP_ELLIPSIS;
2608       else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
2609         buffer->cur++, result->type = CPP_DOT_STAR;
2610       break;
2611
2612     case '+':
2613       result->type = CPP_PLUS;
2614       if (*buffer->cur == '+')
2615         buffer->cur++, result->type = CPP_PLUS_PLUS;
2616       else if (*buffer->cur == '=')
2617         buffer->cur++, result->type = CPP_PLUS_EQ;
2618       break;
2619
2620     case '-':
2621       result->type = CPP_MINUS;
2622       if (*buffer->cur == '>')
2623         {
2624           buffer->cur++;
2625           result->type = CPP_DEREF;
2626           if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
2627             buffer->cur++, result->type = CPP_DEREF_STAR;
2628         }
2629       else if (*buffer->cur == '-')
2630         buffer->cur++, result->type = CPP_MINUS_MINUS;
2631       else if (*buffer->cur == '=')
2632         buffer->cur++, result->type = CPP_MINUS_EQ;
2633       break;
2634
2635     case '&':
2636       result->type = CPP_AND;
2637       if (*buffer->cur == '&')
2638         buffer->cur++, result->type = CPP_AND_AND;
2639       else if (*buffer->cur == '=')
2640         buffer->cur++, result->type = CPP_AND_EQ;
2641       break;
2642
2643     case '|':
2644       result->type = CPP_OR;
2645       if (*buffer->cur == '|')
2646         buffer->cur++, result->type = CPP_OR_OR;
2647       else if (*buffer->cur == '=')
2648         buffer->cur++, result->type = CPP_OR_EQ;
2649       break;
2650
2651     case ':':
2652       result->type = CPP_COLON;
2653       if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
2654         buffer->cur++, result->type = CPP_SCOPE;
2655       else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
2656         {
2657           buffer->cur++;
2658           result->flags |= DIGRAPH;
2659           result->type = CPP_CLOSE_SQUARE;
2660         }
2661       break;
2662
2663     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
2664     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
2665     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
2666     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
2667     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.token_no = 0; break;
2668
2669     case '?': result->type = CPP_QUERY; break;
2670     case '~': result->type = CPP_COMPL; break;
2671     case ',': result->type = CPP_COMMA; break;
2672     case '(': result->type = CPP_OPEN_PAREN; break;
2673     case ')': result->type = CPP_CLOSE_PAREN; break;
2674     case '[': result->type = CPP_OPEN_SQUARE; break;
2675     case ']': result->type = CPP_CLOSE_SQUARE; break;
2676     case '{': result->type = CPP_OPEN_BRACE; break;
2677     case '}': result->type = CPP_CLOSE_BRACE; break;
2678     case ';': result->type = CPP_SEMICOLON; break;
2679
2680       /* @ is a punctuator in Objective-C.  */
2681     case '@': result->type = CPP_ATSIGN; break;
2682
2683     case '$':
2684     case '\\':
2685       {
2686         const uchar *base = --buffer->cur;
2687         struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2688
2689         if (forms_identifier_p (pfile, true, &nst))
2690           {
2691             result->type = CPP_NAME;
2692             result->val.node.node = lex_identifier (pfile, base, true, &nst,
2693                                                     &result->val.node.spelling);
2694             warn_about_normalization (pfile, result, &nst);
2695             break;
2696           }
2697         buffer->cur++;
2698       }
2699
2700     default:
2701       create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
2702       break;
2703     }
2704
2705   return result;
2706 }
2707
2708 /* An upper bound on the number of bytes needed to spell TOKEN.
2709    Does not include preceding whitespace.  */
2710 unsigned int
2711 cpp_token_len (const cpp_token *token)
2712 {
2713   unsigned int len;
2714
2715   switch (TOKEN_SPELL (token))
2716     {
2717     default:            len = 6;                                break;
2718     case SPELL_LITERAL: len = token->val.str.len;               break;
2719     case SPELL_IDENT:   len = NODE_LEN (token->val.node.node) * 10;     break;
2720     }
2721
2722   return len;
2723 }
2724
2725 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
2726    Return the number of bytes read out of NAME.  (There are always
2727    10 bytes written to BUFFER.)  */
2728
2729 static size_t
2730 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
2731 {
2732   int j;
2733   int ucn_len = 0;
2734   int ucn_len_c;
2735   unsigned t;
2736   unsigned long utf32;
2737
2738   /* Compute the length of the UTF-8 sequence.  */
2739   for (t = *name; t & 0x80; t <<= 1)
2740     ucn_len++;
2741
2742   utf32 = *name & (0x7F >> ucn_len);
2743   for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
2744     {
2745       utf32 = (utf32 << 6) | (*++name & 0x3F);
2746
2747       /* Ill-formed UTF-8.  */
2748       if ((*name & ~0x3F) != 0x80)
2749         abort ();
2750     }
2751
2752   *buffer++ = '\\';
2753   *buffer++ = 'U';
2754   for (j = 7; j >= 0; j--)
2755     *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
2756   return ucn_len;
2757 }
2758
2759 /* Given a token TYPE corresponding to a digraph, return a pointer to
2760    the spelling of the digraph.  */
2761 static const unsigned char *
2762 cpp_digraph2name (enum cpp_ttype type)
2763 {
2764   return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
2765 }
2766
2767 /* Write the spelling of an identifier IDENT, using UCNs, to BUFFER.
2768    The buffer must already contain the enough space to hold the
2769    token's spelling.  Returns a pointer to the character after the
2770    last character written.  */
2771 unsigned char *
2772 _cpp_spell_ident_ucns (unsigned char *buffer, cpp_hashnode *ident)
2773 {
2774   size_t i;
2775   const unsigned char *name = NODE_NAME (ident);
2776
2777   for (i = 0; i < NODE_LEN (ident); i++)
2778     if (name[i] & ~0x7F)
2779       {
2780         i += utf8_to_ucn (buffer, name + i) - 1;
2781         buffer += 10;
2782       }
2783     else
2784       *buffer++ = name[i];
2785
2786   return buffer;
2787 }
2788
2789 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
2790    already contain the enough space to hold the token's spelling.
2791    Returns a pointer to the character after the last character written.
2792    FORSTRING is true if this is to be the spelling after translation
2793    phase 1 (with the original spelling of extended identifiers), false
2794    if extended identifiers should always be written using UCNs (there is
2795    no option for always writing them in the internal UTF-8 form).
2796    FIXME: Would be nice if we didn't need the PFILE argument.  */
2797 unsigned char *
2798 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
2799                  unsigned char *buffer, bool forstring)
2800 {
2801   switch (TOKEN_SPELL (token))
2802     {
2803     case SPELL_OPERATOR:
2804       {
2805         const unsigned char *spelling;
2806         unsigned char c;
2807
2808         if (token->flags & DIGRAPH)
2809           spelling = cpp_digraph2name (token->type);
2810         else if (token->flags & NAMED_OP)
2811           goto spell_ident;
2812         else
2813           spelling = TOKEN_NAME (token);
2814
2815         while ((c = *spelling++) != '\0')
2816           *buffer++ = c;
2817       }
2818       break;
2819
2820     spell_ident:
2821     case SPELL_IDENT:
2822       if (forstring)
2823         {
2824           memcpy (buffer, NODE_NAME (token->val.node.spelling),
2825                   NODE_LEN (token->val.node.spelling));
2826           buffer += NODE_LEN (token->val.node.spelling);
2827         }
2828       else
2829         buffer = _cpp_spell_ident_ucns (buffer, token->val.node.node);
2830       break;
2831
2832     case SPELL_LITERAL:
2833       memcpy (buffer, token->val.str.text, token->val.str.len);
2834       buffer += token->val.str.len;
2835       break;
2836
2837     case SPELL_NONE:
2838       cpp_error (pfile, CPP_DL_ICE,
2839                  "unspellable token %s", TOKEN_NAME (token));
2840       break;
2841     }
2842
2843   return buffer;
2844 }
2845
2846 /* Returns TOKEN spelt as a null-terminated string.  The string is
2847    freed when the reader is destroyed.  Useful for diagnostics.  */
2848 unsigned char *
2849 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
2850 {
2851   unsigned int len = cpp_token_len (token) + 1;
2852   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
2853
2854   end = cpp_spell_token (pfile, token, start, false);
2855   end[0] = '\0';
2856
2857   return start;
2858 }
2859
2860 /* Returns a pointer to a string which spells the token defined by
2861    TYPE and FLAGS.  Used by C front ends, which really should move to
2862    using cpp_token_as_text.  */
2863 const char *
2864 cpp_type2name (enum cpp_ttype type, unsigned char flags)
2865 {
2866   if (flags & DIGRAPH)
2867     return (const char *) cpp_digraph2name (type);
2868   else if (flags & NAMED_OP)
2869     return cpp_named_operator2name (type);
2870
2871   return (const char *) token_spellings[type].name;
2872 }
2873
2874 /* Writes the spelling of token to FP, without any preceding space.
2875    Separated from cpp_spell_token for efficiency - to avoid stdio
2876    double-buffering.  */
2877 void
2878 cpp_output_token (const cpp_token *token, FILE *fp)
2879 {
2880   switch (TOKEN_SPELL (token))
2881     {
2882     case SPELL_OPERATOR:
2883       {
2884         const unsigned char *spelling;
2885         int c;
2886
2887         if (token->flags & DIGRAPH)
2888           spelling = cpp_digraph2name (token->type);
2889         else if (token->flags & NAMED_OP)
2890           goto spell_ident;
2891         else
2892           spelling = TOKEN_NAME (token);
2893
2894         c = *spelling;
2895         do
2896           putc (c, fp);
2897         while ((c = *++spelling) != '\0');
2898       }
2899       break;
2900
2901     spell_ident:
2902     case SPELL_IDENT:
2903       {
2904         size_t i;
2905         const unsigned char * name = NODE_NAME (token->val.node.node);
2906
2907         for (i = 0; i < NODE_LEN (token->val.node.node); i++)
2908           if (name[i] & ~0x7F)
2909             {
2910               unsigned char buffer[10];
2911               i += utf8_to_ucn (buffer, name + i) - 1;
2912               fwrite (buffer, 1, 10, fp);
2913             }
2914           else
2915             fputc (NODE_NAME (token->val.node.node)[i], fp);
2916       }
2917       break;
2918
2919     case SPELL_LITERAL:
2920       fwrite (token->val.str.text, 1, token->val.str.len, fp);
2921       break;
2922
2923     case SPELL_NONE:
2924       /* An error, most probably.  */
2925       break;
2926     }
2927 }
2928
2929 /* Compare two tokens.  */
2930 int
2931 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
2932 {
2933   if (a->type == b->type && a->flags == b->flags)
2934     switch (TOKEN_SPELL (a))
2935       {
2936       default:                  /* Keep compiler happy.  */
2937       case SPELL_OPERATOR:
2938         /* token_no is used to track where multiple consecutive ##
2939            tokens were originally located.  */
2940         return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
2941       case SPELL_NONE:
2942         return (a->type != CPP_MACRO_ARG
2943                 || (a->val.macro_arg.arg_no == b->val.macro_arg.arg_no
2944                     && a->val.macro_arg.spelling == b->val.macro_arg.spelling));
2945       case SPELL_IDENT:
2946         return (a->val.node.node == b->val.node.node
2947                 && a->val.node.spelling == b->val.node.spelling);
2948       case SPELL_LITERAL:
2949         return (a->val.str.len == b->val.str.len
2950                 && !memcmp (a->val.str.text, b->val.str.text,
2951                             a->val.str.len));
2952       }
2953
2954   return 0;
2955 }
2956
2957 /* Returns nonzero if a space should be inserted to avoid an
2958    accidental token paste for output.  For simplicity, it is
2959    conservative, and occasionally advises a space where one is not
2960    needed, e.g. "." and ".2".  */
2961 int
2962 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
2963                  const cpp_token *token2)
2964 {
2965   enum cpp_ttype a = token1->type, b = token2->type;
2966   cppchar_t c;
2967
2968   if (token1->flags & NAMED_OP)
2969     a = CPP_NAME;
2970   if (token2->flags & NAMED_OP)
2971     b = CPP_NAME;
2972
2973   c = EOF;
2974   if (token2->flags & DIGRAPH)
2975     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
2976   else if (token_spellings[b].category == SPELL_OPERATOR)
2977     c = token_spellings[b].name[0];
2978
2979   /* Quickly get everything that can paste with an '='.  */
2980   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
2981     return 1;
2982
2983   switch (a)
2984     {
2985     case CPP_GREATER:   return c == '>';
2986     case CPP_LESS:      return c == '<' || c == '%' || c == ':';
2987     case CPP_PLUS:      return c == '+';
2988     case CPP_MINUS:     return c == '-' || c == '>';
2989     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
2990     case CPP_MOD:       return c == ':' || c == '>';
2991     case CPP_AND:       return c == '&';
2992     case CPP_OR:        return c == '|';
2993     case CPP_COLON:     return c == ':' || c == '>';
2994     case CPP_DEREF:     return c == '*';
2995     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
2996     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
2997     case CPP_NAME:      return ((b == CPP_NUMBER
2998                                  && name_p (pfile, &token2->val.str))
2999                                 || b == CPP_NAME
3000                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
3001     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
3002                                 || c == '.' || c == '+' || c == '-');
3003                                       /* UCNs */
3004     case CPP_OTHER:     return ((token1->val.str.text[0] == '\\'
3005                                  && b == CPP_NAME)
3006                                 || (CPP_OPTION (pfile, objc)
3007                                     && token1->val.str.text[0] == '@'
3008                                     && (b == CPP_NAME || b == CPP_STRING)));
3009     case CPP_STRING:
3010     case CPP_WSTRING:
3011     case CPP_UTF8STRING:
3012     case CPP_STRING16:
3013     case CPP_STRING32:  return (CPP_OPTION (pfile, user_literals)
3014                                 && (b == CPP_NAME
3015                                     || (TOKEN_SPELL (token2) == SPELL_LITERAL
3016                                         && ISIDST (token2->val.str.text[0]))));
3017
3018     default:            break;
3019     }
3020
3021   return 0;
3022 }
3023
3024 /* Output all the remaining tokens on the current line, and a newline
3025    character, to FP.  Leading whitespace is removed.  If there are
3026    macros, special token padding is not performed.  */
3027 void
3028 cpp_output_line (cpp_reader *pfile, FILE *fp)
3029 {
3030   const cpp_token *token;
3031
3032   token = cpp_get_token (pfile);
3033   while (token->type != CPP_EOF)
3034     {
3035       cpp_output_token (token, fp);
3036       token = cpp_get_token (pfile);
3037       if (token->flags & PREV_WHITE)
3038         putc (' ', fp);
3039     }
3040
3041   putc ('\n', fp);
3042 }
3043
3044 /* Return a string representation of all the remaining tokens on the
3045    current line.  The result is allocated using xmalloc and must be
3046    freed by the caller.  */
3047 unsigned char *
3048 cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
3049 {
3050   const cpp_token *token;
3051   unsigned int out = dir_name ? ustrlen (dir_name) : 0;
3052   unsigned int alloced = 120 + out;
3053   unsigned char *result = (unsigned char *) xmalloc (alloced);
3054
3055   /* If DIR_NAME is empty, there are no initial contents.  */
3056   if (dir_name)
3057     {
3058       sprintf ((char *) result, "#%s ", dir_name);
3059       out += 2;
3060     }
3061
3062   token = cpp_get_token (pfile);
3063   while (token->type != CPP_EOF)
3064     {
3065       unsigned char *last;
3066       /* Include room for a possible space and the terminating nul.  */
3067       unsigned int len = cpp_token_len (token) + 2;
3068
3069       if (out + len > alloced)
3070         {
3071           alloced *= 2;
3072           if (out + len > alloced)
3073             alloced = out + len;
3074           result = (unsigned char *) xrealloc (result, alloced);
3075         }
3076
3077       last = cpp_spell_token (pfile, token, &result[out], 0);
3078       out = last - result;
3079
3080       token = cpp_get_token (pfile);
3081       if (token->flags & PREV_WHITE)
3082         result[out++] = ' ';
3083     }
3084
3085   result[out] = '\0';
3086   return result;
3087 }
3088
3089 /* Memory buffers.  Changing these three constants can have a dramatic
3090    effect on performance.  The values here are reasonable defaults,
3091    but might be tuned.  If you adjust them, be sure to test across a
3092    range of uses of cpplib, including heavy nested function-like macro
3093    expansion.  Also check the change in peak memory usage (NJAMD is a
3094    good tool for this).  */
3095 #define MIN_BUFF_SIZE 8000
3096 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
3097 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
3098         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
3099
3100 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
3101   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
3102 #endif
3103
3104 /* Create a new allocation buffer.  Place the control block at the end
3105    of the buffer, so that buffer overflows will cause immediate chaos.  */
3106 static _cpp_buff *
3107 new_buff (size_t len)
3108 {
3109   _cpp_buff *result;
3110   unsigned char *base;
3111
3112   if (len < MIN_BUFF_SIZE)
3113     len = MIN_BUFF_SIZE;
3114   len = CPP_ALIGN (len);
3115
3116 #ifdef ENABLE_VALGRIND_CHECKING
3117   /* Valgrind warns about uses of interior pointers, so put _cpp_buff
3118      struct first.  */
3119   size_t slen = CPP_ALIGN2 (sizeof (_cpp_buff), 2 * DEFAULT_ALIGNMENT);
3120   base = XNEWVEC (unsigned char, len + slen);
3121   result = (_cpp_buff *) base;
3122   base += slen;
3123 #else
3124   base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
3125   result = (_cpp_buff *) (base + len);
3126 #endif
3127   result->base = base;
3128   result->cur = base;
3129   result->limit = base + len;
3130   result->next = NULL;
3131   return result;
3132 }
3133
3134 /* Place a chain of unwanted allocation buffers on the free list.  */
3135 void
3136 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
3137 {
3138   _cpp_buff *end = buff;
3139
3140   while (end->next)
3141     end = end->next;
3142   end->next = pfile->free_buffs;
3143   pfile->free_buffs = buff;
3144 }
3145
3146 /* Return a free buffer of size at least MIN_SIZE.  */
3147 _cpp_buff *
3148 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
3149 {
3150   _cpp_buff *result, **p;
3151
3152   for (p = &pfile->free_buffs;; p = &(*p)->next)
3153     {
3154       size_t size;
3155
3156       if (*p == NULL)
3157         return new_buff (min_size);
3158       result = *p;
3159       size = result->limit - result->base;
3160       /* Return a buffer that's big enough, but don't waste one that's
3161          way too big.  */
3162       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
3163         break;
3164     }
3165
3166   *p = result->next;
3167   result->next = NULL;
3168   result->cur = result->base;
3169   return result;
3170 }
3171
3172 /* Creates a new buffer with enough space to hold the uncommitted
3173    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
3174    the excess bytes to the new buffer.  Chains the new buffer after
3175    BUFF, and returns the new buffer.  */
3176 _cpp_buff *
3177 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
3178 {
3179   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
3180   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
3181
3182   buff->next = new_buff;
3183   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
3184   return new_buff;
3185 }
3186
3187 /* Creates a new buffer with enough space to hold the uncommitted
3188    remaining bytes of the buffer pointed to by BUFF, and at least
3189    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
3190    Chains the new buffer before the buffer pointed to by BUFF, and
3191    updates the pointer to point to the new buffer.  */
3192 void
3193 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
3194 {
3195   _cpp_buff *new_buff, *old_buff = *pbuff;
3196   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
3197
3198   new_buff = _cpp_get_buff (pfile, size);
3199   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
3200   new_buff->next = old_buff;
3201   *pbuff = new_buff;
3202 }
3203
3204 /* Free a chain of buffers starting at BUFF.  */
3205 void
3206 _cpp_free_buff (_cpp_buff *buff)
3207 {
3208   _cpp_buff *next;
3209
3210   for (; buff; buff = next)
3211     {
3212       next = buff->next;
3213 #ifdef ENABLE_VALGRIND_CHECKING
3214       free (buff);
3215 #else
3216       free (buff->base);
3217 #endif
3218     }
3219 }
3220
3221 /* Allocate permanent, unaligned storage of length LEN.  */
3222 unsigned char *
3223 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
3224 {
3225   _cpp_buff *buff = pfile->u_buff;
3226   unsigned char *result = buff->cur;
3227
3228   if (len > (size_t) (buff->limit - result))
3229     {
3230       buff = _cpp_get_buff (pfile, len);
3231       buff->next = pfile->u_buff;
3232       pfile->u_buff = buff;
3233       result = buff->cur;
3234     }
3235
3236   buff->cur = result + len;
3237   return result;
3238 }
3239
3240 /* Allocate permanent, unaligned storage of length LEN from a_buff.
3241    That buffer is used for growing allocations when saving macro
3242    replacement lists in a #define, and when parsing an answer to an
3243    assertion in #assert, #unassert or #if (and therefore possibly
3244    whilst expanding macros).  It therefore must not be used by any
3245    code that they might call: specifically the lexer and the guts of
3246    the macro expander.
3247
3248    All existing other uses clearly fit this restriction: storing
3249    registered pragmas during initialization.  */
3250 unsigned char *
3251 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
3252 {
3253   _cpp_buff *buff = pfile->a_buff;
3254   unsigned char *result = buff->cur;
3255
3256   if (len > (size_t) (buff->limit - result))
3257     {
3258       buff = _cpp_get_buff (pfile, len);
3259       buff->next = pfile->a_buff;
3260       pfile->a_buff = buff;
3261       result = buff->cur;
3262     }
3263
3264   buff->cur = result + len;
3265   return result;
3266 }
3267
3268 /* Say which field of TOK is in use.  */
3269
3270 enum cpp_token_fld_kind
3271 cpp_token_val_index (const cpp_token *tok)
3272 {
3273   switch (TOKEN_SPELL (tok))
3274     {
3275     case SPELL_IDENT:
3276       return CPP_TOKEN_FLD_NODE;
3277     case SPELL_LITERAL:
3278       return CPP_TOKEN_FLD_STR;
3279     case SPELL_OPERATOR:
3280       if (tok->type == CPP_PASTE)
3281         return CPP_TOKEN_FLD_TOKEN_NO;
3282       else
3283         return CPP_TOKEN_FLD_NONE;
3284     case SPELL_NONE:
3285       if (tok->type == CPP_MACRO_ARG)
3286         return CPP_TOKEN_FLD_ARG_NO;
3287       else if (tok->type == CPP_PADDING)
3288         return CPP_TOKEN_FLD_SOURCE;
3289       else if (tok->type == CPP_PRAGMA)
3290         return CPP_TOKEN_FLD_PRAGMA;
3291       /* else fall through */
3292     default:
3293       return CPP_TOKEN_FLD_NONE;
3294     }
3295 }
3296
3297 /* All tokens lexed in R after calling this function will be forced to have
3298    their source_location the same as the location referenced by P, until
3299    cpp_stop_forcing_token_locations is called for R.  */
3300
3301 void
3302 cpp_force_token_locations (cpp_reader *r, source_location *p)
3303 {
3304   r->forced_token_location_p = p;
3305 }
3306
3307 /* Go back to assigning locations naturally for lexed tokens.  */
3308
3309 void
3310 cpp_stop_forcing_token_locations (cpp_reader *r)
3311 {
3312   r->forced_token_location_p = NULL;
3313 }