1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000-2021 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
37 enum spell_type category
;
38 const unsigned char *name
;
41 static const unsigned char *const digraph_spellings
[] =
42 { UC
"%:", UC
"%:%:", UC
"<:", UC
":>", UC
"<%", UC
"%>" };
44 #define OP(e, s) { SPELL_OPERATOR, UC s },
45 #define TK(e, s) { SPELL_ ## s, UC #e },
46 static const struct token_spelling token_spellings
[N_TTYPES
] = { TTYPE_TABLE
};
50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
53 static void add_line_note (cpp_buffer
*, const uchar
*, unsigned int);
54 static int skip_line_comment (cpp_reader
*);
55 static void skip_whitespace (cpp_reader
*, cppchar_t
);
56 static void lex_string (cpp_reader
*, cpp_token
*, const uchar
*);
57 static void save_comment (cpp_reader
*, cpp_token
*, const uchar
*, cppchar_t
);
58 static void store_comment (cpp_reader
*, cpp_token
*);
59 static void create_literal (cpp_reader
*, cpp_token
*, const uchar
*,
60 unsigned int, enum cpp_ttype
);
61 static bool warn_in_comment (cpp_reader
*, _cpp_line_note
*);
62 static int name_p (cpp_reader
*, const cpp_string
*);
63 static tokenrun
*next_tokenrun (tokenrun
*);
65 static _cpp_buff
*new_buff (size_t);
70 Compares, the token TOKEN to the NUL-terminated string STRING.
71 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
73 cpp_ideq (const cpp_token
*token
, const char *string
)
75 if (token
->type
!= CPP_NAME
)
78 return !ustrcmp (NODE_NAME (token
->val
.node
.node
), (const uchar
*) string
);
81 /* Record a note TYPE at byte POS into the current cleaned logical
84 add_line_note (cpp_buffer
*buffer
, const uchar
*pos
, unsigned int type
)
86 if (buffer
->notes_used
== buffer
->notes_cap
)
88 buffer
->notes_cap
= buffer
->notes_cap
* 2 + 200;
89 buffer
->notes
= XRESIZEVEC (_cpp_line_note
, buffer
->notes
,
93 buffer
->notes
[buffer
->notes_used
].pos
= pos
;
94 buffer
->notes
[buffer
->notes_used
].type
= type
;
99 /* Fast path to find line special characters using optimized character
100 scanning algorithms. Anything complicated falls back to the slow
101 path below. Since this loop is very hot it's worth doing these kinds
104 One of the paths through the ifdefs should provide
106 const uchar *search_line_fast (const uchar *s, const uchar *end);
108 Between S and END, search for \n, \r, \\, ?. Return a pointer to
111 Note that the last character of the buffer is *always* a newline,
112 as forced by _cpp_convert_input. This fact can be used to avoid
113 explicitly looking for the end of the buffer. */
115 /* Configure gives us an ifdef test. */
116 #ifndef WORDS_BIGENDIAN
117 #define WORDS_BIGENDIAN 0
120 /* We'd like the largest integer that fits into a register. There's nothing
121 in <stdint.h> that gives us that. For most hosts this is unsigned long,
122 but MS decided on an LLP64 model. Thankfully when building with GCC we
123 can get the "real" word size. */
125 typedef unsigned int word_type
__attribute__((__mode__(__word__
)));
127 typedef unsigned long word_type
;
130 /* The code below is only expecting sizes 4 or 8.
131 Die at compile-time if this expectation is violated. */
132 typedef char check_word_type_size
133 [(sizeof(word_type
) == 8 || sizeof(word_type
) == 4) * 2 - 1];
135 /* Return X with the first N bytes forced to values that won't match one
136 of the interesting characters. Note that NUL is not interesting. */
138 static inline word_type
139 acc_char_mask_misalign (word_type val
, unsigned int n
)
149 /* Return X replicated to all byte positions within WORD_TYPE. */
151 static inline word_type
152 acc_char_replicate (uchar x
)
156 ret
= (x
<< 24) | (x
<< 16) | (x
<< 8) | x
;
157 if (sizeof(word_type
) == 8)
158 ret
= (ret
<< 16 << 16) | ret
;
162 /* Return non-zero if some byte of VAL is (probably) C. */
164 static inline word_type
165 acc_char_cmp (word_type val
, word_type c
)
167 #if defined(__GNUC__) && defined(__alpha__)
168 /* We can get exact results using a compare-bytes instruction.
169 Get (val == c) via (0 >= (val ^ c)). */
170 return __builtin_alpha_cmpbge (0, val
^ c
);
172 word_type magic
= 0x7efefefeU
;
173 if (sizeof(word_type
) == 8)
174 magic
= (magic
<< 16 << 16) | 0xfefefefeU
;
178 return ((val
+ magic
) ^ ~val
) & ~magic
;
182 /* Given the result of acc_char_cmp is non-zero, return the index of
183 the found character. If this was a false positive, return -1. */
186 acc_char_index (word_type cmp ATTRIBUTE_UNUSED
,
187 word_type val ATTRIBUTE_UNUSED
)
189 #if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN
190 /* The cmpbge instruction sets *bits* of the result corresponding to
191 matches in the bytes with no false positives. */
192 return __builtin_ctzl (cmp
);
196 /* ??? It would be nice to force unrolling here,
197 and have all of these constants folded. */
198 for (i
= 0; i
< sizeof(word_type
); ++i
)
202 c
= (val
>> (sizeof(word_type
) - i
- 1) * 8) & 0xff;
204 c
= (val
>> i
* 8) & 0xff;
206 if (c
== '\n' || c
== '\r' || c
== '\\' || c
== '?')
214 /* A version of the fast scanner using bit fiddling techniques.
216 For 32-bit words, one would normally perform 16 comparisons and
217 16 branches. With this algorithm one performs 24 arithmetic
218 operations and one branch. Whether this is faster with a 32-bit
219 word size is going to be somewhat system dependent.
221 For 64-bit words, we eliminate twice the number of comparisons
222 and branches without increasing the number of arithmetic operations.
223 It's almost certainly going to be a win with 64-bit word size. */
225 static const uchar
* search_line_acc_char (const uchar
*, const uchar
*)
229 search_line_acc_char (const uchar
*s
, const uchar
*end ATTRIBUTE_UNUSED
)
231 const word_type repl_nl
= acc_char_replicate ('\n');
232 const word_type repl_cr
= acc_char_replicate ('\r');
233 const word_type repl_bs
= acc_char_replicate ('\\');
234 const word_type repl_qm
= acc_char_replicate ('?');
236 unsigned int misalign
;
240 /* Align the buffer. Mask out any bytes from before the beginning. */
241 p
= (word_type
*)((uintptr_t)s
& -sizeof(word_type
));
243 misalign
= (uintptr_t)s
& (sizeof(word_type
) - 1);
245 val
= acc_char_mask_misalign (val
, misalign
);
250 t
= acc_char_cmp (val
, repl_nl
);
251 t
|= acc_char_cmp (val
, repl_cr
);
252 t
|= acc_char_cmp (val
, repl_bs
);
253 t
|= acc_char_cmp (val
, repl_qm
);
255 if (__builtin_expect (t
!= 0, 0))
257 int i
= acc_char_index (t
, val
);
259 return (const uchar
*)p
+ i
;
266 /* Disable on Solaris 2/x86 until the following problem can be properly
269 The Solaris 10+ assembler tags objects with the instruction set
270 extensions used, so SSE4.2 executables cannot run on machines that
271 don't support that extension. */
273 #if (GCC_VERSION >= 4005) && (__GNUC__ >= 5 || !defined(__PIC__)) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
275 /* Replicated character data to be shared between implementations.
276 Recall that outside of a context with vector support we can't
277 define compatible vector types, therefore these are all defined
278 in terms of raw characters. */
279 static const char repl_chars
[4][16] __attribute__((aligned(16))) = {
280 { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
281 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' },
282 { '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
283 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' },
284 { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
285 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' },
286 { '?', '?', '?', '?', '?', '?', '?', '?',
287 '?', '?', '?', '?', '?', '?', '?', '?' },
290 /* A version of the fast scanner using MMX vectorized byte compare insns.
292 This uses the PMOVMSKB instruction which was introduced with "MMX2",
293 which was packaged into SSE1; it is also present in the AMD MMX
294 extension. Mark the function as using "sse" so that we emit a real
295 "emms" instruction, rather than the 3dNOW "femms" instruction. */
299 __attribute__((__target__("sse")))
301 search_line_mmx (const uchar
*s
, const uchar
*end ATTRIBUTE_UNUSED
)
303 typedef char v8qi
__attribute__ ((__vector_size__ (8)));
304 typedef int __m64
__attribute__ ((__vector_size__ (8), __may_alias__
));
306 const v8qi repl_nl
= *(const v8qi
*)repl_chars
[0];
307 const v8qi repl_cr
= *(const v8qi
*)repl_chars
[1];
308 const v8qi repl_bs
= *(const v8qi
*)repl_chars
[2];
309 const v8qi repl_qm
= *(const v8qi
*)repl_chars
[3];
311 unsigned int misalign
, found
, mask
;
315 /* Align the source pointer. While MMX doesn't generate unaligned data
316 faults, this allows us to safely scan to the end of the buffer without
317 reading beyond the end of the last page. */
318 misalign
= (uintptr_t)s
& 7;
319 p
= (const v8qi
*)((uintptr_t)s
& -8);
322 /* Create a mask for the bytes that are valid within the first
323 16-byte block. The Idea here is that the AND with the mask
324 within the loop is "free", since we need some AND or TEST
325 insn in order to set the flags for the branch anyway. */
326 mask
= -1u << misalign
;
328 /* Main loop processing 8 bytes at a time. */
336 t
= __builtin_ia32_pcmpeqb(data
, repl_nl
);
337 c
= __builtin_ia32_pcmpeqb(data
, repl_cr
);
338 t
= (v8qi
) __builtin_ia32_por ((__m64
)t
, (__m64
)c
);
339 c
= __builtin_ia32_pcmpeqb(data
, repl_bs
);
340 t
= (v8qi
) __builtin_ia32_por ((__m64
)t
, (__m64
)c
);
341 c
= __builtin_ia32_pcmpeqb(data
, repl_qm
);
342 t
= (v8qi
) __builtin_ia32_por ((__m64
)t
, (__m64
)c
);
343 found
= __builtin_ia32_pmovmskb (t
);
348 __builtin_ia32_emms ();
350 /* FOUND contains 1 in bits for which we matched a relevant
351 character. Conversion to the byte index is trivial. */
352 found
= __builtin_ctz(found
);
353 return (const uchar
*)p
+ found
;
356 /* A version of the fast scanner using SSE2 vectorized byte compare insns. */
360 __attribute__((__target__("sse2")))
362 search_line_sse2 (const uchar
*s
, const uchar
*end ATTRIBUTE_UNUSED
)
364 typedef char v16qi
__attribute__ ((__vector_size__ (16)));
366 const v16qi repl_nl
= *(const v16qi
*)repl_chars
[0];
367 const v16qi repl_cr
= *(const v16qi
*)repl_chars
[1];
368 const v16qi repl_bs
= *(const v16qi
*)repl_chars
[2];
369 const v16qi repl_qm
= *(const v16qi
*)repl_chars
[3];
371 unsigned int misalign
, found
, mask
;
375 /* Align the source pointer. */
376 misalign
= (uintptr_t)s
& 15;
377 p
= (const v16qi
*)((uintptr_t)s
& -16);
380 /* Create a mask for the bytes that are valid within the first
381 16-byte block. The Idea here is that the AND with the mask
382 within the loop is "free", since we need some AND or TEST
383 insn in order to set the flags for the branch anyway. */
384 mask
= -1u << misalign
;
386 /* Main loop processing 16 bytes at a time. */
395 t
|= data
== repl_cr
;
396 t
|= data
== repl_bs
;
397 t
|= data
== repl_qm
;
398 found
= __builtin_ia32_pmovmskb128 (t
);
403 /* FOUND contains 1 in bits for which we matched a relevant
404 character. Conversion to the byte index is trivial. */
405 found
= __builtin_ctz(found
);
406 return (const uchar
*)p
+ found
;
410 /* A version of the fast scanner using SSE 4.2 vectorized string insns. */
414 __attribute__((__target__("sse4.2")))
416 search_line_sse42 (const uchar
*s
, const uchar
*end
)
418 typedef char v16qi
__attribute__ ((__vector_size__ (16)));
419 static const v16qi search
= { '\n', '\r', '?', '\\' };
421 uintptr_t si
= (uintptr_t)s
;
424 /* Check for unaligned input. */
429 if (__builtin_expect (end
- s
< 16, 0)
430 && __builtin_expect ((si
& 0xfff) > 0xff0, 0))
432 /* There are less than 16 bytes left in the buffer, and less
433 than 16 bytes left on the page. Reading 16 bytes at this
434 point might generate a spurious page fault. Defer to the
435 SSE2 implementation, which already handles alignment. */
436 return search_line_sse2 (s
, end
);
439 /* ??? The builtin doesn't understand that the PCMPESTRI read from
440 memory need not be aligned. */
441 sv
= __builtin_ia32_loaddqu ((const char *) s
);
442 index
= __builtin_ia32_pcmpestri128 (search
, 4, sv
, 16, 0);
444 if (__builtin_expect (index
< 16, 0))
447 /* Advance the pointer to an aligned address. We will re-scan a
448 few bytes, but we no longer need care for reading past the
449 end of a page, since we're guaranteed a match. */
450 s
= (const uchar
*)((si
+ 15) & -16);
453 /* Main loop, processing 16 bytes at a time. */
454 #ifdef __GCC_ASM_FLAG_OUTPUTS__
459 /* By using inline assembly instead of the builtin,
460 we can use the result, as well as the flags set. */
461 __asm ("%vpcmpestri\t$0, %2, %3"
462 : "=c"(index
), "=@ccc"(f
)
463 : "m"(*s
), "x"(search
), "a"(4), "d"(16));
471 /* By doing the whole loop in inline assembly,
472 we can make proper use of the flags set. */
473 __asm ( ".balign 16\n"
475 " %vpcmpestri\t$0, (%1), %2\n"
477 : "=&c"(index
), "+r"(s
)
478 : "x"(search
), "a"(4), "d"(16));
486 /* Work around out-dated assemblers without sse4 support. */
487 #define search_line_sse42 search_line_sse2
490 /* Check the CPU capabilities. */
492 #include "../gcc/config/i386/cpuid.h"
494 typedef const uchar
* (*search_line_fast_type
) (const uchar
*, const uchar
*);
495 static search_line_fast_type search_line_fast
;
497 #define HAVE_init_vectorized_lexer 1
499 init_vectorized_lexer (void)
501 unsigned dummy
, ecx
= 0, edx
= 0;
502 search_line_fast_type impl
= search_line_acc_char
;
505 #if defined(__SSE4_2__)
507 #elif defined(__SSE2__)
509 #elif defined(__SSE__)
514 impl
= search_line_sse42
;
515 else if (__get_cpuid (1, &dummy
, &dummy
, &ecx
, &edx
) || minimum
== 2)
517 if (minimum
== 3 || (ecx
& bit_SSE4_2
))
518 impl
= search_line_sse42
;
519 else if (minimum
== 2 || (edx
& bit_SSE2
))
520 impl
= search_line_sse2
;
521 else if (minimum
== 1 || (edx
& bit_SSE
))
522 impl
= search_line_mmx
;
524 else if (__get_cpuid (0x80000001, &dummy
, &dummy
, &dummy
, &edx
))
527 || (edx
& (bit_MMXEXT
| bit_CMOV
)) == (bit_MMXEXT
| bit_CMOV
))
528 impl
= search_line_mmx
;
531 search_line_fast
= impl
;
534 #elif (GCC_VERSION >= 4005) && defined(_ARCH_PWR8) && defined(__ALTIVEC__)
536 /* A vection of the fast scanner using AltiVec vectorized byte compares
537 and VSX unaligned loads (when VSX is available). This is otherwise
538 the same as the AltiVec version. */
540 ATTRIBUTE_NO_SANITIZE_UNDEFINED
542 search_line_fast (const uchar
*s
, const uchar
*end ATTRIBUTE_UNUSED
)
544 typedef __attribute__((altivec(vector
))) unsigned char vc
;
547 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
548 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
551 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
552 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
555 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
556 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
559 '?', '?', '?', '?', '?', '?', '?', '?',
560 '?', '?', '?', '?', '?', '?', '?', '?',
562 const vc zero
= { 0 };
566 /* Main loop processing 16 bytes at a time. */
569 vc m_nl
, m_cr
, m_bs
, m_qm
;
571 data
= __builtin_vec_vsx_ld (0, s
);
574 m_nl
= (vc
) __builtin_vec_cmpeq(data
, repl_nl
);
575 m_cr
= (vc
) __builtin_vec_cmpeq(data
, repl_cr
);
576 m_bs
= (vc
) __builtin_vec_cmpeq(data
, repl_bs
);
577 m_qm
= (vc
) __builtin_vec_cmpeq(data
, repl_qm
);
578 t
= (m_nl
| m_cr
) | (m_bs
| m_qm
);
580 /* T now contains 0xff in bytes for which we matched one of the relevant
581 characters. We want to exit the loop if any byte in T is non-zero.
582 Below is the expansion of vec_any_ne(t, zero). */
584 while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t
, zero
));
586 /* Restore s to to point to the 16 bytes we just processed. */
590 #define N (sizeof(vc) / sizeof(long))
594 /* Statically assert that N is 2 or 4. */
595 unsigned long l
[(N
== 2 || N
== 4) ? N
: -1];
597 unsigned long l
, i
= 0;
601 /* Find the first word of T that is non-zero. */
608 s
+= sizeof(unsigned long);
612 s
+= sizeof(unsigned long);
618 s
+= sizeof(unsigned long);
622 /* L now contains 0xff in bytes for which we matched one of the
623 relevant characters. We can find the byte index by finding
624 its bit index and dividing by 8. */
625 #ifdef __BIG_ENDIAN__
626 l
= __builtin_clzl(l
) >> 3;
628 l
= __builtin_ctzl(l
) >> 3;
636 #elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) && defined (__BIG_ENDIAN__)
638 /* A vection of the fast scanner using AltiVec vectorized byte compares.
639 This cannot be used for little endian because vec_lvsl/lvsr are
640 deprecated for little endian and the code won't work properly. */
641 /* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
642 so we can't compile this function without -maltivec on the command line
643 (or implied by some other switch). */
646 search_line_fast (const uchar
*s
, const uchar
*end ATTRIBUTE_UNUSED
)
648 typedef __attribute__((altivec(vector
))) unsigned char vc
;
651 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
652 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
655 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
656 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
659 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
660 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
663 '?', '?', '?', '?', '?', '?', '?', '?',
664 '?', '?', '?', '?', '?', '?', '?', '?',
667 -1, -1, -1, -1, -1, -1, -1, -1,
668 -1, -1, -1, -1, -1, -1, -1, -1,
670 const vc zero
= { 0 };
674 /* Altivec loads automatically mask addresses with -16. This lets us
675 issue the first load as early as possible. */
676 data
= __builtin_vec_ld(0, (const vc
*)s
);
678 /* Discard bytes before the beginning of the buffer. Do this by
679 beginning with all ones and shifting in zeros according to the
680 mis-alignment. The LVSR instruction pulls the exact shift we
681 want from the address. */
682 mask
= __builtin_vec_lvsr(0, s
);
683 mask
= __builtin_vec_perm(zero
, ones
, mask
);
686 /* While altivec loads mask addresses, we still need to align S so
687 that the offset we compute at the end is correct. */
688 s
= (const uchar
*)((uintptr_t)s
& -16);
690 /* Main loop processing 16 bytes at a time. */
694 vc m_nl
, m_cr
, m_bs
, m_qm
;
697 data
= __builtin_vec_ld(0, (const vc
*)s
);
700 m_nl
= (vc
) __builtin_vec_cmpeq(data
, repl_nl
);
701 m_cr
= (vc
) __builtin_vec_cmpeq(data
, repl_cr
);
702 m_bs
= (vc
) __builtin_vec_cmpeq(data
, repl_bs
);
703 m_qm
= (vc
) __builtin_vec_cmpeq(data
, repl_qm
);
704 t
= (m_nl
| m_cr
) | (m_bs
| m_qm
);
706 /* T now contains 0xff in bytes for which we matched one of the relevant
707 characters. We want to exit the loop if any byte in T is non-zero.
708 Below is the expansion of vec_any_ne(t, zero). */
710 while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t
, zero
));
713 #define N (sizeof(vc) / sizeof(long))
717 /* Statically assert that N is 2 or 4. */
718 unsigned long l
[(N
== 2 || N
== 4) ? N
: -1];
720 unsigned long l
, i
= 0;
724 /* Find the first word of T that is non-zero. */
731 s
+= sizeof(unsigned long);
735 s
+= sizeof(unsigned long);
741 s
+= sizeof(unsigned long);
745 /* L now contains 0xff in bytes for which we matched one of the
746 relevant characters. We can find the byte index by finding
747 its bit index and dividing by 8. */
748 l
= __builtin_clzl(l
) >> 3;
755 #elif defined (__ARM_NEON) && defined (__ARM_64BIT_STATE)
756 #include "arm_neon.h"
758 /* This doesn't have to be the exact page size, but no system may use
759 a size smaller than this. ARMv8 requires a minimum page size of
760 4k. The impact of being conservative here is a small number of
761 cases will take the slightly slower entry path into the main
764 #define AARCH64_MIN_PAGE_SIZE 4096
767 search_line_fast (const uchar
*s
, const uchar
*end ATTRIBUTE_UNUSED
)
769 const uint8x16_t repl_nl
= vdupq_n_u8 ('\n');
770 const uint8x16_t repl_cr
= vdupq_n_u8 ('\r');
771 const uint8x16_t repl_bs
= vdupq_n_u8 ('\\');
772 const uint8x16_t repl_qm
= vdupq_n_u8 ('?');
773 const uint8x16_t xmask
= (uint8x16_t
) vdupq_n_u64 (0x8040201008040201ULL
);
775 #ifdef __ARM_BIG_ENDIAN
776 const int16x8_t shift
= {8, 8, 8, 8, 0, 0, 0, 0};
778 const int16x8_t shift
= {0, 0, 0, 0, 8, 8, 8, 8};
788 /* Align the source pointer. */
789 p
= (const uint8_t *)((uintptr_t)s
& -16);
791 /* Assuming random string start positions, with a 4k page size we'll take
792 the slow path about 0.37% of the time. */
793 if (__builtin_expect ((AARCH64_MIN_PAGE_SIZE
794 - (((uintptr_t) s
) & (AARCH64_MIN_PAGE_SIZE
- 1)))
797 /* Slow path: the string starts near a possible page boundary. */
798 uint32_t misalign
, mask
;
800 misalign
= (uintptr_t)s
& 15;
801 mask
= (-1u << misalign
) & 0xffff;
803 t
= vceqq_u8 (data
, repl_nl
);
804 u
= vceqq_u8 (data
, repl_cr
);
805 v
= vorrq_u8 (t
, vceqq_u8 (data
, repl_bs
));
806 w
= vorrq_u8 (u
, vceqq_u8 (data
, repl_qm
));
808 t
= vandq_u8 (t
, xmask
);
810 m
= vshlq_u16 (m
, shift
);
811 found
= vaddvq_u16 (m
);
814 return (const uchar
*)p
+ __builtin_ctz (found
);
818 data
= vld1q_u8 ((const uint8_t *) s
);
819 t
= vceqq_u8 (data
, repl_nl
);
820 u
= vceqq_u8 (data
, repl_cr
);
821 v
= vorrq_u8 (t
, vceqq_u8 (data
, repl_bs
));
822 w
= vorrq_u8 (u
, vceqq_u8 (data
, repl_qm
));
824 if (__builtin_expect (vpaddd_u64 ((uint64x2_t
)t
) != 0, 0))
832 t
= vceqq_u8 (data
, repl_nl
);
833 u
= vceqq_u8 (data
, repl_cr
);
834 v
= vorrq_u8 (t
, vceqq_u8 (data
, repl_bs
));
835 w
= vorrq_u8 (u
, vceqq_u8 (data
, repl_qm
));
837 } while (!vpaddd_u64 ((uint64x2_t
)t
));
840 /* Now that we've found the terminating substring, work out precisely where
842 t
= vandq_u8 (t
, xmask
);
844 m
= vshlq_u16 (m
, shift
);
845 found
= vaddvq_u16 (m
);
846 return (((((uintptr_t) p
) < (uintptr_t) s
) ? s
: (const uchar
*)p
)
847 + __builtin_ctz (found
));
850 #elif defined (__ARM_NEON)
851 #include "arm_neon.h"
854 search_line_fast (const uchar
*s
, const uchar
*end ATTRIBUTE_UNUSED
)
856 const uint8x16_t repl_nl
= vdupq_n_u8 ('\n');
857 const uint8x16_t repl_cr
= vdupq_n_u8 ('\r');
858 const uint8x16_t repl_bs
= vdupq_n_u8 ('\\');
859 const uint8x16_t repl_qm
= vdupq_n_u8 ('?');
860 const uint8x16_t xmask
= (uint8x16_t
) vdupq_n_u64 (0x8040201008040201ULL
);
862 unsigned int misalign
, found
, mask
;
866 /* Align the source pointer. */
867 misalign
= (uintptr_t)s
& 15;
868 p
= (const uint8_t *)((uintptr_t)s
& -16);
871 /* Create a mask for the bytes that are valid within the first
872 16-byte block. The Idea here is that the AND with the mask
873 within the loop is "free", since we need some AND or TEST
874 insn in order to set the flags for the branch anyway. */
875 mask
= (-1u << misalign
) & 0xffff;
877 /* Main loop, processing 16 bytes at a time. */
885 uint8x16_t t
, u
, v
, w
;
892 t
= vceqq_u8 (data
, repl_nl
);
893 u
= vceqq_u8 (data
, repl_cr
);
894 v
= vorrq_u8 (t
, vceqq_u8 (data
, repl_bs
));
895 w
= vorrq_u8 (u
, vceqq_u8 (data
, repl_qm
));
896 t
= vandq_u8 (vorrq_u8 (v
, w
), xmask
);
897 l
= vpadd_u8 (vget_low_u8 (t
), vget_high_u8 (t
));
901 found
= vget_lane_u32 ((uint32x2_t
) vorr_u64 ((uint64x1_t
) n
,
902 vshr_n_u64 ((uint64x1_t
) n
, 24)), 0);
907 /* FOUND contains 1 in bits for which we matched a relevant
908 character. Conversion to the byte index is trivial. */
909 found
= __builtin_ctz (found
);
910 return (const uchar
*)p
+ found
;
915 /* We only have one accelerated alternative. Use a direct call so that
916 we encourage inlining. */
918 #define search_line_fast search_line_acc_char
922 /* Initialize the lexer if needed. */
925 _cpp_init_lexer (void)
927 #ifdef HAVE_init_vectorized_lexer
928 init_vectorized_lexer ();
932 /* Returns with a logical line that contains no escaped newlines or
933 trigraphs. This is a time-critical inner loop. */
935 _cpp_clean_line (cpp_reader
*pfile
)
941 buffer
= pfile
->buffer
;
942 buffer
->cur_note
= buffer
->notes_used
= 0;
943 buffer
->cur
= buffer
->line_base
= buffer
->next_line
;
944 buffer
->need_line
= false;
945 s
= buffer
->next_line
;
947 if (!buffer
->from_stage3
)
949 const uchar
*pbackslash
= NULL
;
951 /* Fast path. This is the common case of an un-escaped line with
952 no trigraphs. The primary win here is by not writing any
953 data back to memory until we have to. */
956 /* Perform an optimized search for \n, \r, \\, ?. */
957 s
= search_line_fast (s
, buffer
->rlimit
);
962 /* Record the location of the backslash and continue. */
965 else if (__builtin_expect (c
== '?', 0))
967 if (__builtin_expect (s
[1] == '?', false)
968 && _cpp_trigraph_map
[s
[2]])
970 /* Have a trigraph. We may or may not have to convert
971 it. Add a line note regardless, for -Wtrigraphs. */
972 add_line_note (buffer
, s
, s
[2]);
973 if (CPP_OPTION (pfile
, trigraphs
))
975 /* We do, and that means we have to switch to the
978 *d
= _cpp_trigraph_map
[s
[2]];
983 /* Not a trigraph. Continue on fast-path. */
990 /* This must be \r or \n. We're either done, or we'll be forced
991 to write back to the buffer and continue on the slow path. */
994 if (__builtin_expect (s
== buffer
->rlimit
, false))
997 /* DOS line ending? */
998 if (__builtin_expect (c
== '\r', false) && s
[1] == '\n')
1001 if (s
== buffer
->rlimit
)
1005 if (__builtin_expect (pbackslash
== NULL
, true))
1008 /* Check for escaped newline. */
1010 while (is_nvspace (p
[-1]))
1012 if (p
- 1 != pbackslash
)
1015 /* Have an escaped newline; process it and proceed to
1017 add_line_note (buffer
, p
- 1, p
!= d
? ' ' : '\\');
1019 buffer
->next_line
= p
- 1;
1027 if (c
== '\n' || c
== '\r')
1029 /* Handle DOS line endings. */
1030 if (c
== '\r' && s
!= buffer
->rlimit
&& s
[1] == '\n')
1032 if (s
== buffer
->rlimit
)
1037 while (p
!= buffer
->next_line
&& is_nvspace (p
[-1]))
1039 if (p
== buffer
->next_line
|| p
[-1] != '\\')
1042 add_line_note (buffer
, p
- 1, p
!= d
? ' ': '\\');
1044 buffer
->next_line
= p
- 1;
1046 else if (c
== '?' && s
[1] == '?' && _cpp_trigraph_map
[s
[2]])
1048 /* Add a note regardless, for the benefit of -Wtrigraphs. */
1049 add_line_note (buffer
, d
, s
[2]);
1050 if (CPP_OPTION (pfile
, trigraphs
))
1052 *d
= _cpp_trigraph_map
[s
[2]];
1060 while (*s
!= '\n' && *s
!= '\r')
1064 /* Handle DOS line endings. */
1065 if (*s
== '\r' && s
+ 1 != buffer
->rlimit
&& s
[1] == '\n')
1071 /* A sentinel note that should never be processed. */
1072 add_line_note (buffer
, d
+ 1, '\n');
1073 buffer
->next_line
= s
+ 1;
1076 /* Return true if the trigraph indicated by NOTE should be warned
1077 about in a comment. */
1079 warn_in_comment (cpp_reader
*pfile
, _cpp_line_note
*note
)
1083 /* Within comments we don't warn about trigraphs, unless the
1084 trigraph forms an escaped newline, as that may change
1086 if (note
->type
!= '/')
1089 /* If -trigraphs, then this was an escaped newline iff the next note
1091 if (CPP_OPTION (pfile
, trigraphs
))
1092 return note
[1].pos
== note
->pos
;
1094 /* Otherwise, see if this forms an escaped newline. */
1096 while (is_nvspace (*p
))
1099 /* There might have been escaped newlines between the trigraph and the
1100 newline we found. Hence the position test. */
1101 return (*p
== '\n' && p
< note
[1].pos
);
1104 /* Process the notes created by add_line_note as far as the current
1107 _cpp_process_line_notes (cpp_reader
*pfile
, int in_comment
)
1109 cpp_buffer
*buffer
= pfile
->buffer
;
1113 _cpp_line_note
*note
= &buffer
->notes
[buffer
->cur_note
];
1116 if (note
->pos
> buffer
->cur
)
1120 col
= CPP_BUF_COLUMN (buffer
, note
->pos
+ 1);
1122 if (note
->type
== '\\' || note
->type
== ' ')
1124 if (note
->type
== ' ' && !in_comment
)
1125 cpp_error_with_line (pfile
, CPP_DL_WARNING
, pfile
->line_table
->highest_line
, col
,
1126 "backslash and newline separated by space");
1128 if (buffer
->next_line
> buffer
->rlimit
)
1130 cpp_error_with_line (pfile
, CPP_DL_PEDWARN
, pfile
->line_table
->highest_line
, col
,
1131 "backslash-newline at end of file");
1132 /* Prevent "no newline at end of file" warning. */
1133 buffer
->next_line
= buffer
->rlimit
;
1136 buffer
->line_base
= note
->pos
;
1137 CPP_INCREMENT_LINE (pfile
, 0);
1139 else if (_cpp_trigraph_map
[note
->type
])
1141 if (CPP_OPTION (pfile
, warn_trigraphs
)
1142 && (!in_comment
|| warn_in_comment (pfile
, note
)))
1144 if (CPP_OPTION (pfile
, trigraphs
))
1145 cpp_warning_with_line (pfile
, CPP_W_TRIGRAPHS
,
1146 pfile
->line_table
->highest_line
, col
,
1147 "trigraph ??%c converted to %c",
1149 (int) _cpp_trigraph_map
[note
->type
]);
1152 cpp_warning_with_line
1153 (pfile
, CPP_W_TRIGRAPHS
,
1154 pfile
->line_table
->highest_line
, col
,
1155 "trigraph ??%c ignored, use -trigraphs to enable",
1160 else if (note
->type
== 0)
1161 /* Already processed in lex_raw_string. */;
1167 /* Skip a C-style block comment. We find the end of the comment by
1168 seeing if an asterisk is before every '/' we encounter. Returns
1169 nonzero if comment terminated by EOF, zero otherwise.
1171 Buffer->cur points to the initial asterisk of the comment. */
1173 _cpp_skip_block_comment (cpp_reader
*pfile
)
1175 cpp_buffer
*buffer
= pfile
->buffer
;
1176 const uchar
*cur
= buffer
->cur
;
1185 /* People like decorating comments with '*', so check for '/'
1186 instead for efficiency. */
1194 /* Warn about potential nested comments, but not if the '/'
1195 comes immediately before the true comment delimiter.
1196 Don't bother to get it right across escaped newlines. */
1197 if (CPP_OPTION (pfile
, warn_comments
)
1198 && cur
[0] == '*' && cur
[1] != '/')
1201 cpp_warning_with_line (pfile
, CPP_W_COMMENTS
,
1202 pfile
->line_table
->highest_line
,
1203 CPP_BUF_COL (buffer
),
1204 "\"/*\" within comment");
1210 buffer
->cur
= cur
- 1;
1211 _cpp_process_line_notes (pfile
, true);
1212 if (buffer
->next_line
>= buffer
->rlimit
)
1214 _cpp_clean_line (pfile
);
1216 cols
= buffer
->next_line
- buffer
->line_base
;
1217 CPP_INCREMENT_LINE (pfile
, cols
);
1224 _cpp_process_line_notes (pfile
, true);
1228 /* Skip a C++ line comment, leaving buffer->cur pointing to the
1229 terminating newline. Handles escaped newlines. Returns nonzero
1230 if a multiline comment. */
1232 skip_line_comment (cpp_reader
*pfile
)
1234 cpp_buffer
*buffer
= pfile
->buffer
;
1235 location_t orig_line
= pfile
->line_table
->highest_line
;
1237 while (*buffer
->cur
!= '\n')
1240 _cpp_process_line_notes (pfile
, true);
1241 return orig_line
!= pfile
->line_table
->highest_line
;
1244 /* Skips whitespace, saving the next non-whitespace character. */
1246 skip_whitespace (cpp_reader
*pfile
, cppchar_t c
)
1248 cpp_buffer
*buffer
= pfile
->buffer
;
1249 bool saw_NUL
= false;
1253 /* Horizontal space always OK. */
1254 if (c
== ' ' || c
== '\t')
1256 /* Just \f \v or \0 left. */
1259 else if (pfile
->state
.in_directive
&& CPP_PEDANTIC (pfile
))
1260 cpp_error_with_line (pfile
, CPP_DL_PEDWARN
, pfile
->line_table
->highest_line
,
1261 CPP_BUF_COL (buffer
),
1262 "%s in preprocessing directive",
1263 c
== '\f' ? "form feed" : "vertical tab");
1267 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
1268 while (is_nvspace (c
));
1271 cpp_error (pfile
, CPP_DL_WARNING
, "null character(s) ignored");
1276 /* See if the characters of a number token are valid in a name (no
1277 '.', '+' or '-'). */
1279 name_p (cpp_reader
*pfile
, const cpp_string
*string
)
1283 for (i
= 0; i
< string
->len
; i
++)
1284 if (!is_idchar (string
->text
[i
]))
1290 /* After parsing an identifier or other sequence, produce a warning about
1291 sequences not in NFC/NFKC. */
1293 warn_about_normalization (cpp_reader
*pfile
,
1294 const cpp_token
*token
,
1295 const struct normalize_state
*s
)
1297 if (CPP_OPTION (pfile
, warn_normalize
) < NORMALIZE_STATE_RESULT (s
)
1298 && !pfile
->state
.skipping
)
1300 /* Make sure that the token is printed using UCNs, even
1301 if we'd otherwise happily print UTF-8. */
1302 unsigned char *buf
= XNEWVEC (unsigned char, cpp_token_len (token
));
1305 sz
= cpp_spell_token (pfile
, token
, buf
, false) - buf
;
1306 if (NORMALIZE_STATE_RESULT (s
) == normalized_C
)
1307 cpp_warning_with_line (pfile
, CPP_W_NORMALIZE
, token
->src_loc
, 0,
1308 "`%.*s' is not in NFKC", (int) sz
, buf
);
1309 else if (CPP_OPTION (pfile
, cxx23_identifiers
))
1310 cpp_pedwarning_with_line (pfile
, CPP_W_NORMALIZE
, token
->src_loc
, 0,
1311 "`%.*s' is not in NFC", (int) sz
, buf
);
1313 cpp_warning_with_line (pfile
, CPP_W_NORMALIZE
, token
->src_loc
, 0,
1314 "`%.*s' is not in NFC", (int) sz
, buf
);
1319 static const cppchar_t utf8_signifier
= 0xC0;
1321 /* Returns TRUE if the sequence starting at buffer->cur is valid in
1322 an identifier. FIRST is TRUE if this starts an identifier. */
1324 forms_identifier_p (cpp_reader
*pfile
, int first
,
1325 struct normalize_state
*state
)
1327 cpp_buffer
*buffer
= pfile
->buffer
;
1329 if (*buffer
->cur
== '$')
1331 if (!CPP_OPTION (pfile
, dollars_in_ident
))
1335 if (CPP_OPTION (pfile
, warn_dollars
) && !pfile
->state
.skipping
)
1337 CPP_OPTION (pfile
, warn_dollars
) = 0;
1338 cpp_error (pfile
, CPP_DL_PEDWARN
, "'$' in identifier or number");
1344 /* Is this a syntactically valid UCN or a valid UTF-8 char? */
1345 if (CPP_OPTION (pfile
, extended_identifiers
))
1348 if (*buffer
->cur
>= utf8_signifier
)
1350 if (_cpp_valid_utf8 (pfile
, &buffer
->cur
, buffer
->rlimit
, 1 + !first
,
1354 else if (*buffer
->cur
== '\\'
1355 && (buffer
->cur
[1] == 'u' || buffer
->cur
[1] == 'U'))
1358 if (_cpp_valid_ucn (pfile
, &buffer
->cur
, buffer
->rlimit
, 1 + !first
,
1359 state
, &s
, NULL
, NULL
))
1368 /* Helper function to issue error about improper __VA_OPT__ use. */
1370 maybe_va_opt_error (cpp_reader
*pfile
)
1372 if (CPP_PEDANTIC (pfile
) && !CPP_OPTION (pfile
, va_opt
))
1374 /* __VA_OPT__ should not be accepted at all, but allow it in
1376 if (!_cpp_in_system_header (pfile
))
1377 cpp_error (pfile
, CPP_DL_PEDWARN
,
1378 "__VA_OPT__ is not available until C++20");
1380 else if (!pfile
->state
.va_args_ok
)
1382 /* __VA_OPT__ should only appear in the replacement list of a
1384 cpp_error (pfile
, CPP_DL_PEDWARN
,
1385 "__VA_OPT__ can only appear in the expansion"
1386 " of a C++20 variadic macro");
1390 /* Helper function to get the cpp_hashnode of the identifier BASE. */
1391 static cpp_hashnode
*
1392 lex_identifier_intern (cpp_reader
*pfile
, const uchar
*base
)
1394 cpp_hashnode
*result
;
1397 unsigned int hash
= HT_HASHSTEP (0, *base
);
1400 while (ISIDNUM (*cur
))
1402 hash
= HT_HASHSTEP (hash
, *cur
);
1406 hash
= HT_HASHFINISH (hash
, len
);
1407 result
= CPP_HASHNODE (ht_lookup_with_hash (pfile
->hash_table
,
1408 base
, len
, hash
, HT_ALLOC
));
1410 /* Rarely, identifiers require diagnostics when lexed. */
1411 if (__builtin_expect ((result
->flags
& NODE_DIAGNOSTIC
)
1412 && !pfile
->state
.skipping
, 0))
1414 /* It is allowed to poison the same identifier twice. */
1415 if ((result
->flags
& NODE_POISONED
) && !pfile
->state
.poisoned_ok
)
1416 cpp_error (pfile
, CPP_DL_ERROR
, "attempt to use poisoned \"%s\"",
1417 NODE_NAME (result
));
1419 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1420 replacement list of a variadic macro. */
1421 if (result
== pfile
->spec_nodes
.n__VA_ARGS__
1422 && !pfile
->state
.va_args_ok
)
1424 if (CPP_OPTION (pfile
, cplusplus
))
1425 cpp_error (pfile
, CPP_DL_PEDWARN
,
1426 "__VA_ARGS__ can only appear in the expansion"
1427 " of a C++11 variadic macro");
1429 cpp_error (pfile
, CPP_DL_PEDWARN
,
1430 "__VA_ARGS__ can only appear in the expansion"
1431 " of a C99 variadic macro");
1434 if (result
== pfile
->spec_nodes
.n__VA_OPT__
)
1435 maybe_va_opt_error (pfile
);
1437 /* For -Wc++-compat, warn about use of C++ named operators. */
1438 if (result
->flags
& NODE_WARN_OPERATOR
)
1439 cpp_warning (pfile
, CPP_W_CXX_OPERATOR_NAMES
,
1440 "identifier \"%s\" is a special operator name in C++",
1441 NODE_NAME (result
));
1447 /* Get the cpp_hashnode of an identifier specified by NAME in
1448 the current cpp_reader object. If none is found, NULL is returned. */
1450 _cpp_lex_identifier (cpp_reader
*pfile
, const char *name
)
1452 cpp_hashnode
*result
;
1453 result
= lex_identifier_intern (pfile
, (uchar
*) name
);
1457 /* Lex an identifier starting at BUFFER->CUR - 1. */
1458 static cpp_hashnode
*
1459 lex_identifier (cpp_reader
*pfile
, const uchar
*base
, bool starts_ucn
,
1460 struct normalize_state
*nst
, cpp_hashnode
**spelling
)
1462 cpp_hashnode
*result
;
1465 unsigned int hash
= HT_HASHSTEP (0, *base
);
1467 cur
= pfile
->buffer
->cur
;
1470 while (ISIDNUM (*cur
))
1472 hash
= HT_HASHSTEP (hash
, *cur
);
1475 NORMALIZE_STATE_UPDATE_IDNUM (nst
, *(cur
- 1));
1477 pfile
->buffer
->cur
= cur
;
1478 if (starts_ucn
|| forms_identifier_p (pfile
, false, nst
))
1480 /* Slower version for identifiers containing UCNs
1481 or extended chars (including $). */
1483 while (ISIDNUM (*pfile
->buffer
->cur
))
1485 NORMALIZE_STATE_UPDATE_IDNUM (nst
, *pfile
->buffer
->cur
);
1486 pfile
->buffer
->cur
++;
1488 } while (forms_identifier_p (pfile
, false, nst
));
1489 result
= _cpp_interpret_identifier (pfile
, base
,
1490 pfile
->buffer
->cur
- base
);
1491 *spelling
= cpp_lookup (pfile
, base
, pfile
->buffer
->cur
- base
);
1496 hash
= HT_HASHFINISH (hash
, len
);
1498 result
= CPP_HASHNODE (ht_lookup_with_hash (pfile
->hash_table
,
1499 base
, len
, hash
, HT_ALLOC
));
1503 /* Rarely, identifiers require diagnostics when lexed. */
1504 if (__builtin_expect ((result
->flags
& NODE_DIAGNOSTIC
)
1505 && !pfile
->state
.skipping
, 0))
1507 /* It is allowed to poison the same identifier twice. */
1508 if ((result
->flags
& NODE_POISONED
) && !pfile
->state
.poisoned_ok
)
1509 cpp_error (pfile
, CPP_DL_ERROR
, "attempt to use poisoned \"%s\"",
1510 NODE_NAME (result
));
1512 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1513 replacement list of a variadic macro. */
1514 if (result
== pfile
->spec_nodes
.n__VA_ARGS__
1515 && !pfile
->state
.va_args_ok
)
1517 if (CPP_OPTION (pfile
, cplusplus
))
1518 cpp_error (pfile
, CPP_DL_PEDWARN
,
1519 "__VA_ARGS__ can only appear in the expansion"
1520 " of a C++11 variadic macro");
1522 cpp_error (pfile
, CPP_DL_PEDWARN
,
1523 "__VA_ARGS__ can only appear in the expansion"
1524 " of a C99 variadic macro");
1527 /* __VA_OPT__ should only appear in the replacement list of a
1529 if (result
== pfile
->spec_nodes
.n__VA_OPT__
)
1530 maybe_va_opt_error (pfile
);
1532 /* For -Wc++-compat, warn about use of C++ named operators. */
1533 if (result
->flags
& NODE_WARN_OPERATOR
)
1534 cpp_warning (pfile
, CPP_W_CXX_OPERATOR_NAMES
,
1535 "identifier \"%s\" is a special operator name in C++",
1536 NODE_NAME (result
));
1542 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
1544 lex_number (cpp_reader
*pfile
, cpp_string
*number
,
1545 struct normalize_state
*nst
)
1551 base
= pfile
->buffer
->cur
- 1;
1554 const uchar
*adj_digit_sep
= NULL
;
1555 cur
= pfile
->buffer
->cur
;
1557 /* N.B. ISIDNUM does not include $. */
1558 while (ISIDNUM (*cur
)
1559 || (*cur
== '.' && !DIGIT_SEP (cur
[-1]))
1561 || (VALID_SIGN (*cur
, cur
[-1]) && !DIGIT_SEP (cur
[-2])))
1563 NORMALIZE_STATE_UPDATE_IDNUM (nst
, *cur
);
1564 /* Adjacent digit separators do not form part of the pp-number syntax.
1565 However, they can safely be diagnosed here as an error, since '' is
1566 not a valid preprocessing token. */
1567 if (DIGIT_SEP (*cur
) && DIGIT_SEP (cur
[-1]) && !adj_digit_sep
)
1568 adj_digit_sep
= cur
;
1571 /* A number can't end with a digit separator. */
1572 while (cur
> pfile
->buffer
->cur
&& DIGIT_SEP (cur
[-1]))
1574 if (adj_digit_sep
&& adj_digit_sep
< cur
)
1575 cpp_error (pfile
, CPP_DL_ERROR
, "adjacent digit separators");
1577 pfile
->buffer
->cur
= cur
;
1579 while (forms_identifier_p (pfile
, false, nst
));
1581 number
->len
= cur
- base
;
1582 dest
= _cpp_unaligned_alloc (pfile
, number
->len
+ 1);
1583 memcpy (dest
, base
, number
->len
);
1584 dest
[number
->len
] = '\0';
1585 number
->text
= dest
;
1588 /* Create a token of type TYPE with a literal spelling. */
1590 create_literal (cpp_reader
*pfile
, cpp_token
*token
, const uchar
*base
,
1591 unsigned int len
, enum cpp_ttype type
)
1594 token
->val
.str
.len
= len
;
1595 token
->val
.str
.text
= cpp_alloc_token_string (pfile
, base
, len
);
1599 cpp_alloc_token_string (cpp_reader
*pfile
,
1600 const unsigned char *ptr
, unsigned len
)
1602 uchar
*dest
= _cpp_unaligned_alloc (pfile
, len
+ 1);
1605 memcpy (dest
, ptr
, len
);
1609 /* A pair of raw buffer pointers. The currently open one is [1], the
1610 first one is [0]. Used for string literal lexing. */
1618 : first (NULL
), last (NULL
), rpos (0), accum (0)
1622 void append (cpp_reader
*, const uchar
*, size_t);
1624 void read_begin (cpp_reader
*);
1625 bool reading_p () const
1627 return rpos
!= NULL
;
1632 if (rpos
== BUFF_FRONT (last
))
1638 /* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
1639 sequence from *FIRST_BUFF_P to LAST_BUFF_P. */
1642 lit_accum::append (cpp_reader
*pfile
, const uchar
*base
, size_t len
)
1646 first
= last
= _cpp_get_buff (pfile
, len
);
1647 else if (len
> BUFF_ROOM (last
))
1649 /* There is insufficient room in the buffer. Copy what we can,
1650 and then either extend or create a new one. */
1651 size_t room
= BUFF_ROOM (last
);
1652 memcpy (BUFF_FRONT (last
), base
, room
);
1653 BUFF_FRONT (last
) += room
;
1658 gcc_checking_assert (!rpos
);
1660 last
= _cpp_append_extend_buff (pfile
, last
, len
);
1663 memcpy (BUFF_FRONT (last
), base
, len
);
1664 BUFF_FRONT (last
) += len
;
1669 lit_accum::read_begin (cpp_reader
*pfile
)
1671 /* We never accumulate more than 4 chars to read. */
1672 if (BUFF_ROOM (last
) < 4)
1674 last
= _cpp_append_extend_buff (pfile
, last
, 4);
1675 rpos
= BUFF_FRONT (last
);
1678 /* Returns true if a macro has been defined.
1679 This might not work if compile with -save-temps,
1680 or preprocess separately from compilation. */
1683 is_macro(cpp_reader
*pfile
, const uchar
*base
)
1685 const uchar
*cur
= base
;
1686 if (! ISIDST (*cur
))
1688 unsigned int hash
= HT_HASHSTEP (0, *cur
);
1690 while (ISIDNUM (*cur
))
1692 hash
= HT_HASHSTEP (hash
, *cur
);
1695 hash
= HT_HASHFINISH (hash
, cur
- base
);
1697 cpp_hashnode
*result
= CPP_HASHNODE (ht_lookup_with_hash (pfile
->hash_table
,
1698 base
, cur
- base
, hash
, HT_NO_INSERT
));
1700 return result
&& cpp_macro_p (result
);
1703 /* Returns true if a literal suffix does not have the expected form
1704 and is defined as a macro. */
1707 is_macro_not_literal_suffix(cpp_reader
*pfile
, const uchar
*base
)
1709 /* User-defined literals outside of namespace std must start with a single
1710 underscore, so assume anything of that form really is a UDL suffix.
1711 We don't need to worry about UDLs defined inside namespace std because
1712 their names are reserved, so cannot be used as macro names in valid
1714 if (base
[0] == '_' && base
[1] != '_')
1716 return is_macro (pfile
, base
);
1719 /* Lexes a raw string. The stored string contains the spelling,
1720 including double quotes, delimiter string, '(' and ')', any leading
1721 'L', 'u', 'U' or 'u8' and 'R' modifier. The created token contains
1722 the type of the literal, or CPP_OTHER if it was not properly
1725 BASE is the start of the token. Updates pfile->buffer->cur to just
1726 after the lexed string.
1728 The spelling is NUL-terminated, but it is not guaranteed that this
1729 is the first NUL since embedded NULs are preserved. */
1732 lex_raw_string (cpp_reader
*pfile
, cpp_token
*token
, const uchar
*base
)
1734 const uchar
*pos
= base
;
1736 /* 'tis a pity this information isn't passed down from the lexer's
1737 initial categorization of the token. */
1738 enum cpp_ttype type
= CPP_STRING
;
1745 else if (*pos
== 'U')
1747 type
= CPP_STRING32
;
1750 else if (*pos
== 'u')
1754 type
= CPP_UTF8STRING
;
1758 type
= CPP_STRING16
;
1762 gcc_checking_assert (pos
[0] == 'R' && pos
[1] == '"');
1765 _cpp_line_note
*note
= &pfile
->buffer
->notes
[pfile
->buffer
->cur_note
];
1767 /* Skip notes before the ". */
1768 while (note
->pos
< pos
)
1774 unsigned prefix_len
= 0;
1780 } phase
= PHASE_PREFIX
;
1784 gcc_checking_assert (note
->pos
>= pos
);
1786 /* Undo any escaped newlines and trigraphs. */
1787 if (!accum
.reading_p () && note
->pos
== pos
)
1792 /* Restore backslash followed by newline. */
1793 accum
.append (pfile
, base
, pos
- base
);
1795 accum
.read_begin (pfile
);
1796 accum
.append (pfile
, UC
"\\", 1);
1799 if (note
->type
== ' ')
1800 /* GNU backslash whitespace newline extension. FIXME
1801 could be any sequence of non-vertical space. When we
1802 can properly restore any such sequence, we should
1803 mark this note as handled so _cpp_process_line_notes
1805 accum
.append (pfile
, UC
" ", 1);
1807 accum
.append (pfile
, UC
"\n", 1);
1812 /* This can happen for ??/<NEWLINE> when trigraphs are not
1813 being interpretted. */
1814 gcc_checking_assert (!CPP_OPTION (pfile
, trigraphs
));
1820 gcc_checking_assert (_cpp_trigraph_map
[note
->type
]);
1822 /* Don't warn about this trigraph in
1823 _cpp_process_line_notes, since trigraphs show up as
1824 trigraphs in raw strings. */
1825 uchar type
= note
->type
;
1828 if (CPP_OPTION (pfile
, trigraphs
))
1830 accum
.append (pfile
, base
, pos
- base
);
1832 accum
.read_begin (pfile
);
1833 accum
.append (pfile
, UC
"??", 2);
1834 accum
.append (pfile
, &type
, 1);
1836 /* ??/ followed by newline gets two line notes, one for
1837 the trigraph and one for the backslash/newline. */
1838 if (type
== '/' && note
[1].pos
== pos
)
1841 gcc_assert (note
->type
== '\\' || note
->type
== ' ');
1842 goto after_backslash
;
1844 /* Skip the replacement character. */
1852 /* Now get a char to process. Either from an expanded note, or
1853 from the line buffer. */
1854 bool read_note
= accum
.reading_p ();
1855 char c
= read_note
? accum
.read_char () : *pos
++;
1857 if (phase
== PHASE_PREFIX
)
1863 prefix
[prefix_len
++] = '"';
1865 else if (prefix_len
< 16
1866 /* Prefix chars are any of the basic character set,
1867 [lex.charset] except for '
1868 ()\\\t\v\f\n'. Optimized for a contiguous
1870 /* Unlike a switch, this collapses down to one or
1871 two shift and bitmask operations on an ASCII
1872 system, with an outlier or two. */
1873 && (('Z' - 'A' == 25
1874 ? ((c
>= 'a' && c
<= 'z') || (c
>= 'A' && c
<= 'Z'))
1876 || (c
>= '0' && c
<= '9')
1877 || c
== '_' || c
== '{' || c
== '}'
1878 || c
== '[' || c
== ']' || c
== '#'
1879 || c
== '<' || c
== '>' || c
== '%'
1880 || c
== ':' || c
== ';' || c
== '.' || c
== '?'
1881 || c
== '*' || c
== '+' || c
== '-' || c
== '/'
1882 || c
== '^' || c
== '&' || c
== '|' || c
== '~'
1883 || c
== '!' || c
== '=' || c
== ','
1884 || c
== '"' || c
== '\''))
1885 prefix
[prefix_len
++] = c
;
1888 /* Something is wrong. */
1889 int col
= CPP_BUF_COLUMN (pfile
->buffer
, pos
) + read_note
;
1890 if (prefix_len
== 16)
1891 cpp_error_with_line (pfile
, CPP_DL_ERROR
, token
->src_loc
,
1892 col
, "raw string delimiter longer "
1893 "than 16 characters");
1895 cpp_error_with_line (pfile
, CPP_DL_ERROR
, token
->src_loc
,
1896 col
, "invalid new-line in raw "
1897 "string delimiter");
1899 cpp_error_with_line (pfile
, CPP_DL_ERROR
, token
->src_loc
,
1900 col
, "invalid character '%c' in "
1901 "raw string delimiter", c
);
1904 /* Continue until we get a close quote, that's probably
1905 the best failure mode. */
1912 if (phase
!= PHASE_NONE
)
1914 if (prefix
[phase
] != c
)
1916 else if (unsigned (phase
+ 1) == prefix_len
)
1920 phase
= Phase (phase
+ 1);
1925 if (!prefix_len
&& c
== '"')
1926 /* Failure mode lexing. */
1928 else if (prefix_len
&& c
== ')')
1929 phase
= PHASE_SUFFIX
;
1930 else if (!read_note
&& c
== '\n')
1933 pfile
->buffer
->cur
= pos
;
1934 if (pfile
->state
.in_directive
1935 || (pfile
->state
.parsing_args
1936 && pfile
->buffer
->next_line
>= pfile
->buffer
->rlimit
))
1938 cpp_error_with_line (pfile
, CPP_DL_ERROR
, token
->src_loc
, 0,
1939 "unterminated raw string");
1944 accum
.append (pfile
, base
, pos
- base
+ 1);
1945 _cpp_process_line_notes (pfile
, false);
1947 if (pfile
->buffer
->next_line
< pfile
->buffer
->rlimit
)
1948 CPP_INCREMENT_LINE (pfile
, 0);
1949 pfile
->buffer
->need_line
= true;
1951 if (!_cpp_get_fresh_line (pfile
))
1953 /* We ran out of file and failed to get a line. */
1954 location_t src_loc
= token
->src_loc
;
1955 token
->type
= CPP_EOF
;
1956 /* Tell the compiler the line number of the EOF token. */
1957 token
->src_loc
= pfile
->line_table
->highest_line
;
1960 _cpp_release_buff (pfile
, accum
.first
);
1961 cpp_error_with_line (pfile
, CPP_DL_ERROR
, src_loc
, 0,
1962 "unterminated raw string");
1963 /* Now pop the buffer that _cpp_get_fresh_line did not. */
1964 _cpp_pop_buffer (pfile
);
1968 pos
= base
= pfile
->buffer
->cur
;
1969 note
= &pfile
->buffer
->notes
[pfile
->buffer
->cur_note
];
1973 if (CPP_OPTION (pfile
, user_literals
))
1975 /* If a string format macro, say from inttypes.h, is placed touching
1976 a string literal it could be parsed as a C++11 user-defined string
1977 literal thus breaking the program. */
1978 if (is_macro_not_literal_suffix (pfile
, pos
))
1980 /* Raise a warning, but do not consume subsequent tokens. */
1981 if (CPP_OPTION (pfile
, warn_literal_suffix
) && !pfile
->state
.skipping
)
1982 cpp_warning_with_line (pfile
, CPP_W_LITERAL_SUFFIX
,
1984 "invalid suffix on literal; C++11 requires "
1985 "a space between literal and string macro");
1987 /* Grab user defined literal suffix. */
1988 else if (ISIDST (*pos
))
1990 type
= cpp_userdef_string_add_type (type
);
1993 while (ISIDNUM (*pos
))
1999 pfile
->buffer
->cur
= pos
;
2001 create_literal (pfile
, token
, base
, pos
- base
, type
);
2004 size_t extra_len
= pos
- base
;
2005 uchar
*dest
= _cpp_unaligned_alloc (pfile
, accum
.accum
+ extra_len
+ 1);
2008 token
->val
.str
.len
= accum
.accum
+ extra_len
;
2009 token
->val
.str
.text
= dest
;
2010 for (_cpp_buff
*buf
= accum
.first
; buf
; buf
= buf
->next
)
2012 size_t len
= BUFF_FRONT (buf
) - buf
->base
;
2013 memcpy (dest
, buf
->base
, len
);
2016 _cpp_release_buff (pfile
, accum
.first
);
2017 memcpy (dest
, base
, extra_len
);
2018 dest
[extra_len
] = '\0';
2022 /* Lexes a string, character constant, or angle-bracketed header file
2023 name. The stored string contains the spelling, including opening
2024 quote and any leading 'L', 'u', 'U' or 'u8' and optional
2025 'R' modifier. It returns the type of the literal, or CPP_OTHER
2026 if it was not properly terminated, or CPP_LESS for an unterminated
2027 header name which must be relexed as normal tokens.
2029 The spelling is NUL-terminated, but it is not guaranteed that this
2030 is the first NUL since embedded NULs are preserved. */
2032 lex_string (cpp_reader
*pfile
, cpp_token
*token
, const uchar
*base
)
2034 bool saw_NUL
= false;
2036 cppchar_t terminator
;
2037 enum cpp_ttype type
;
2040 terminator
= *cur
++;
2041 if (terminator
== 'L' || terminator
== 'U')
2042 terminator
= *cur
++;
2043 else if (terminator
== 'u')
2045 terminator
= *cur
++;
2046 if (terminator
== '8')
2047 terminator
= *cur
++;
2049 if (terminator
== 'R')
2051 lex_raw_string (pfile
, token
, base
);
2054 if (terminator
== '"')
2055 type
= (*base
== 'L' ? CPP_WSTRING
:
2056 *base
== 'U' ? CPP_STRING32
:
2057 *base
== 'u' ? (base
[1] == '8' ? CPP_UTF8STRING
: CPP_STRING16
)
2059 else if (terminator
== '\'')
2060 type
= (*base
== 'L' ? CPP_WCHAR
:
2061 *base
== 'U' ? CPP_CHAR32
:
2062 *base
== 'u' ? (base
[1] == '8' ? CPP_UTF8CHAR
: CPP_CHAR16
)
2065 terminator
= '>', type
= CPP_HEADER_NAME
;
2069 cppchar_t c
= *cur
++;
2071 /* In #include-style directives, terminators are not escapable. */
2072 if (c
== '\\' && !pfile
->state
.angled_headers
&& *cur
!= '\n')
2074 else if (c
== terminator
)
2079 /* Unmatched quotes always yield undefined behavior, but
2080 greedy lexing means that what appears to be an unterminated
2081 header name may actually be a legitimate sequence of tokens. */
2082 if (terminator
== '>')
2084 token
->type
= CPP_LESS
;
2094 if (saw_NUL
&& !pfile
->state
.skipping
)
2095 cpp_error (pfile
, CPP_DL_WARNING
,
2096 "null character(s) preserved in literal");
2098 if (type
== CPP_OTHER
&& CPP_OPTION (pfile
, lang
) != CLK_ASM
)
2099 cpp_error (pfile
, CPP_DL_PEDWARN
, "missing terminating %c character",
2102 if (CPP_OPTION (pfile
, user_literals
))
2104 /* If a string format macro, say from inttypes.h, is placed touching
2105 a string literal it could be parsed as a C++11 user-defined string
2106 literal thus breaking the program. */
2107 if (is_macro_not_literal_suffix (pfile
, cur
))
2109 /* Raise a warning, but do not consume subsequent tokens. */
2110 if (CPP_OPTION (pfile
, warn_literal_suffix
) && !pfile
->state
.skipping
)
2111 cpp_warning_with_line (pfile
, CPP_W_LITERAL_SUFFIX
,
2113 "invalid suffix on literal; C++11 requires "
2114 "a space between literal and string macro");
2116 /* Grab user defined literal suffix. */
2117 else if (ISIDST (*cur
))
2119 type
= cpp_userdef_char_add_type (type
);
2120 type
= cpp_userdef_string_add_type (type
);
2123 while (ISIDNUM (*cur
))
2127 else if (CPP_OPTION (pfile
, cpp_warn_cxx11_compat
)
2128 && is_macro (pfile
, cur
)
2129 && !pfile
->state
.skipping
)
2130 cpp_warning_with_line (pfile
, CPP_W_CXX11_COMPAT
,
2131 token
->src_loc
, 0, "C++11 requires a space "
2132 "between string literal and macro");
2134 pfile
->buffer
->cur
= cur
;
2135 create_literal (pfile
, token
, base
, cur
- base
, type
);
2138 /* Return the comment table. The client may not make any assumption
2139 about the ordering of the table. */
2141 cpp_get_comments (cpp_reader
*pfile
)
2143 return &pfile
->comments
;
2146 /* Append a comment to the end of the comment table. */
2148 store_comment (cpp_reader
*pfile
, cpp_token
*token
)
2152 if (pfile
->comments
.allocated
== 0)
2154 pfile
->comments
.allocated
= 256;
2155 pfile
->comments
.entries
= (cpp_comment
*) xmalloc
2156 (pfile
->comments
.allocated
* sizeof (cpp_comment
));
2159 if (pfile
->comments
.count
== pfile
->comments
.allocated
)
2161 pfile
->comments
.allocated
*= 2;
2162 pfile
->comments
.entries
= (cpp_comment
*) xrealloc
2163 (pfile
->comments
.entries
,
2164 pfile
->comments
.allocated
* sizeof (cpp_comment
));
2167 len
= token
->val
.str
.len
;
2169 /* Copy comment. Note, token may not be NULL terminated. */
2170 pfile
->comments
.entries
[pfile
->comments
.count
].comment
=
2171 (char *) xmalloc (sizeof (char) * (len
+ 1));
2172 memcpy (pfile
->comments
.entries
[pfile
->comments
.count
].comment
,
2173 token
->val
.str
.text
, len
);
2174 pfile
->comments
.entries
[pfile
->comments
.count
].comment
[len
] = '\0';
2176 /* Set source location. */
2177 pfile
->comments
.entries
[pfile
->comments
.count
].sloc
= token
->src_loc
;
2179 /* Increment the count of entries in the comment table. */
2180 pfile
->comments
.count
++;
2183 /* The stored comment includes the comment start and any terminator. */
2185 save_comment (cpp_reader
*pfile
, cpp_token
*token
, const unsigned char *from
,
2188 unsigned char *buffer
;
2189 unsigned int len
, clen
, i
;
2191 len
= pfile
->buffer
->cur
- from
+ 1; /* + 1 for the initial '/'. */
2193 /* C++ comments probably (not definitely) have moved past a new
2194 line, which we don't want to save in the comment. */
2195 if (is_vspace (pfile
->buffer
->cur
[-1]))
2198 /* If we are currently in a directive or in argument parsing, then
2199 we need to store all C++ comments as C comments internally, and
2200 so we need to allocate a little extra space in that case.
2202 Note that the only time we encounter a directive here is
2203 when we are saving comments in a "#define". */
2204 clen
= ((pfile
->state
.in_directive
|| pfile
->state
.parsing_args
)
2205 && type
== '/') ? len
+ 2 : len
;
2207 buffer
= _cpp_unaligned_alloc (pfile
, clen
);
2209 token
->type
= CPP_COMMENT
;
2210 token
->val
.str
.len
= clen
;
2211 token
->val
.str
.text
= buffer
;
2214 memcpy (buffer
+ 1, from
, len
- 1);
2216 /* Finish conversion to a C comment, if necessary. */
2217 if ((pfile
->state
.in_directive
|| pfile
->state
.parsing_args
) && type
== '/')
2220 buffer
[clen
- 2] = '*';
2221 buffer
[clen
- 1] = '/';
2222 /* As there can be in a C++ comments illegal sequences for C comments
2223 we need to filter them out. */
2224 for (i
= 2; i
< (clen
- 2); i
++)
2225 if (buffer
[i
] == '/' && (buffer
[i
- 1] == '*' || buffer
[i
+ 1] == '*'))
2229 /* Finally store this comment for use by clients of libcpp. */
2230 store_comment (pfile
, token
);
2233 /* Returns true if comment at COMMENT_START is a recognized FALLTHROUGH
2237 fallthrough_comment_p (cpp_reader
*pfile
, const unsigned char *comment_start
)
2239 const unsigned char *from
= comment_start
+ 1;
2241 switch (CPP_OPTION (pfile
, cpp_warn_implicit_fallthrough
))
2243 /* For both -Wimplicit-fallthrough=0 and -Wimplicit-fallthrough=5 we
2244 don't recognize any comments. The latter only checks attributes,
2245 the former doesn't warn. */
2249 /* -Wimplicit-fallthrough=1 considers any comment, no matter what
2254 /* -Wimplicit-fallthrough=2 looks for (case insensitive)
2255 .*falls?[ \t-]*thr(u|ough).* regex. */
2256 for (; (size_t) (pfile
->buffer
->cur
- from
) >= sizeof "fallthru" - 1;
2259 /* Is there anything like strpbrk with upper boundary, or
2260 memchr looking for 2 characters rather than just one? */
2261 if (from
[0] != 'f' && from
[0] != 'F')
2263 if (from
[1] != 'a' && from
[1] != 'A')
2265 if (from
[2] != 'l' && from
[2] != 'L')
2267 if (from
[3] != 'l' && from
[3] != 'L')
2269 from
+= sizeof "fall" - 1;
2270 if (from
[0] == 's' || from
[0] == 'S')
2272 while (*from
== ' ' || *from
== '\t' || *from
== '-')
2274 if (from
[0] != 't' && from
[0] != 'T')
2276 if (from
[1] != 'h' && from
[1] != 'H')
2278 if (from
[2] != 'r' && from
[2] != 'R')
2280 if (from
[3] == 'u' || from
[3] == 'U')
2282 if (from
[3] != 'o' && from
[3] != 'O')
2284 if (from
[4] != 'u' && from
[4] != 'U')
2286 if (from
[5] != 'g' && from
[5] != 'G')
2288 if (from
[6] != 'h' && from
[6] != 'H')
2298 /* Whole comment contents:
2302 if (*from
== '-' || *from
== '@')
2304 size_t len
= sizeof "fallthrough" - 1;
2305 if ((size_t) (pfile
->buffer
->cur
- from
- 1) < len
)
2307 if (memcmp (from
+ 1, "fallthrough", len
))
2311 if (from
[len
+ 1] != '@')
2317 /* Whole comment contents (regex):
2318 lint -fallthrough[ \t]*
2320 else if (*from
== 'l')
2322 size_t len
= sizeof "int -fallthrough" - 1;
2323 if ((size_t) (pfile
->buffer
->cur
- from
- 1) < len
)
2325 if (memcmp (from
+ 1, "int -fallthrough", len
))
2328 while (*from
== ' ' || *from
== '\t')
2331 /* Whole comment contents (regex):
2332 [ \t]*FALLTHR(U|OUGH)[ \t]*
2334 else if (CPP_OPTION (pfile
, cpp_warn_implicit_fallthrough
) == 4)
2336 while (*from
== ' ' || *from
== '\t')
2338 if ((size_t) (pfile
->buffer
->cur
- from
) < sizeof "FALLTHRU" - 1)
2340 if (memcmp (from
, "FALLTHR", sizeof "FALLTHR" - 1))
2342 from
+= sizeof "FALLTHR" - 1;
2345 else if ((size_t) (pfile
->buffer
->cur
- from
) < sizeof "OUGH" - 1)
2347 else if (memcmp (from
, "OUGH", sizeof "OUGH" - 1))
2350 from
+= sizeof "OUGH" - 1;
2351 while (*from
== ' ' || *from
== '\t')
2354 /* Whole comment contents (regex):
2355 [ \t.!]*(ELSE,? |INTENTIONAL(LY)? )?FALL(S | |-)?THR(OUGH|U)[ \t.!]*(-[^\n\r]*)?
2356 [ \t.!]*(Else,? |Intentional(ly)? )?Fall((s | |-)[Tt]|t)hr(ough|u)[ \t.!]*(-[^\n\r]*)?
2357 [ \t.!]*([Ee]lse,? |[Ii]ntentional(ly)? )?fall(s | |-)?thr(ough|u)[ \t.!]*(-[^\n\r]*)?
2361 while (*from
== ' ' || *from
== '\t' || *from
== '.' || *from
== '!')
2363 unsigned char f
= *from
;
2364 bool all_upper
= false;
2365 if (f
== 'E' || f
== 'e')
2367 if ((size_t) (pfile
->buffer
->cur
- from
)
2368 < sizeof "else fallthru" - 1)
2370 if (f
== 'E' && memcmp (from
+ 1, "LSE", sizeof "LSE" - 1) == 0)
2372 else if (memcmp (from
+ 1, "lse", sizeof "lse" - 1))
2374 from
+= sizeof "else" - 1;
2380 if (all_upper
&& *from
== 'f')
2382 if (f
== 'e' && *from
== 'F')
2386 else if (f
== 'I' || f
== 'i')
2388 if ((size_t) (pfile
->buffer
->cur
- from
)
2389 < sizeof "intentional fallthru" - 1)
2391 if (f
== 'I' && memcmp (from
+ 1, "NTENTIONAL",
2392 sizeof "NTENTIONAL" - 1) == 0)
2394 else if (memcmp (from
+ 1, "ntentional",
2395 sizeof "ntentional" - 1))
2397 from
+= sizeof "intentional" - 1;
2401 if (all_upper
&& *from
== 'f')
2406 if (memcmp (from
, "LY F", sizeof "LY F" - 1))
2408 from
+= sizeof "LY " - 1;
2412 if (memcmp (from
, "ly ", sizeof "ly " - 1))
2414 from
+= sizeof "ly " - 1;
2416 if (f
== 'i' && *from
== 'F')
2420 if (f
!= 'F' && f
!= 'f')
2422 if ((size_t) (pfile
->buffer
->cur
- from
) < sizeof "fallthru" - 1)
2424 if (f
== 'F' && memcmp (from
+ 1, "ALL", sizeof "ALL" - 1) == 0)
2428 else if (memcmp (from
+ 1, "all", sizeof "all" - 1))
2430 from
+= sizeof "fall" - 1;
2431 if (*from
== (all_upper
? 'S' : 's') && from
[1] == ' ')
2433 else if (*from
== ' ' || *from
== '-')
2435 else if (*from
!= (all_upper
? 'T' : 't'))
2437 if ((f
== 'f' || *from
!= 'T') && (all_upper
|| *from
!= 't'))
2439 if ((size_t) (pfile
->buffer
->cur
- from
) < sizeof "thru" - 1)
2441 if (memcmp (from
+ 1, all_upper
? "HRU" : "hru", sizeof "hru" - 1))
2443 if ((size_t) (pfile
->buffer
->cur
- from
) < sizeof "through" - 1)
2445 if (memcmp (from
+ 1, all_upper
? "HROUGH" : "hrough",
2446 sizeof "hrough" - 1))
2448 from
+= sizeof "through" - 1;
2451 from
+= sizeof "thru" - 1;
2452 while (*from
== ' ' || *from
== '\t' || *from
== '.' || *from
== '!')
2457 if (*comment_start
== '*')
2461 while (*from
&& *from
!= '*'
2462 && *from
!= '\n' && *from
!= '\r')
2464 if (*from
!= '*' || from
[1] == '/')
2471 while (*from
&& *from
!= '\n' && *from
!= '\r')
2475 /* C block comment. */
2476 if (*comment_start
== '*')
2478 if (*from
!= '*' || from
[1] != '/')
2481 /* C++ line comment. */
2482 else if (*from
!= '\n')
2488 /* Allocate COUNT tokens for RUN. */
2490 _cpp_init_tokenrun (tokenrun
*run
, unsigned int count
)
2492 run
->base
= XNEWVEC (cpp_token
, count
);
2493 run
->limit
= run
->base
+ count
;
2497 /* Returns the next tokenrun, or creates one if there is none. */
2499 next_tokenrun (tokenrun
*run
)
2501 if (run
->next
== NULL
)
2503 run
->next
= XNEW (tokenrun
);
2504 run
->next
->prev
= run
;
2505 _cpp_init_tokenrun (run
->next
, 250);
2511 /* Return the number of not yet processed token in a given
2514 _cpp_remaining_tokens_num_in_context (cpp_context
*context
)
2516 if (context
->tokens_kind
== TOKENS_KIND_DIRECT
)
2517 return (LAST (context
).token
- FIRST (context
).token
);
2518 else if (context
->tokens_kind
== TOKENS_KIND_INDIRECT
2519 || context
->tokens_kind
== TOKENS_KIND_EXTENDED
)
2520 return (LAST (context
).ptoken
- FIRST (context
).ptoken
);
2525 /* Returns the token present at index INDEX in a given context. If
2526 INDEX is zero, the next token to be processed is returned. */
2527 static const cpp_token
*
2528 _cpp_token_from_context_at (cpp_context
*context
, int index
)
2530 if (context
->tokens_kind
== TOKENS_KIND_DIRECT
)
2531 return &(FIRST (context
).token
[index
]);
2532 else if (context
->tokens_kind
== TOKENS_KIND_INDIRECT
2533 || context
->tokens_kind
== TOKENS_KIND_EXTENDED
)
2534 return FIRST (context
).ptoken
[index
];
2539 /* Look ahead in the input stream. */
2541 cpp_peek_token (cpp_reader
*pfile
, int index
)
2543 cpp_context
*context
= pfile
->context
;
2544 const cpp_token
*peektok
;
2547 /* First, scan through any pending cpp_context objects. */
2548 while (context
->prev
)
2550 ptrdiff_t sz
= _cpp_remaining_tokens_num_in_context (context
);
2552 if (index
< (int) sz
)
2553 return _cpp_token_from_context_at (context
, index
);
2555 context
= context
->prev
;
2558 /* We will have to read some new tokens after all (and do so
2559 without invalidating preceding tokens). */
2561 pfile
->keep_tokens
++;
2563 /* For peeked tokens temporarily disable line_change reporting,
2564 until the tokens are parsed for real. */
2565 void (*line_change
) (cpp_reader
*, const cpp_token
*, int)
2566 = pfile
->cb
.line_change
;
2567 pfile
->cb
.line_change
= NULL
;
2571 peektok
= _cpp_lex_token (pfile
);
2572 if (peektok
->type
== CPP_EOF
)
2577 else if (peektok
->type
== CPP_PRAGMA
)
2579 /* Don't peek past a pragma. */
2580 if (peektok
== &pfile
->directive_result
)
2581 /* Save the pragma in the buffer. */
2582 *pfile
->cur_token
++ = *peektok
;
2589 _cpp_backup_tokens_direct (pfile
, count
- index
);
2590 pfile
->keep_tokens
--;
2591 pfile
->cb
.line_change
= line_change
;
2596 /* Allocate a single token that is invalidated at the same time as the
2597 rest of the tokens on the line. Has its line and col set to the
2598 same as the last lexed token, so that diagnostics appear in the
2601 _cpp_temp_token (cpp_reader
*pfile
)
2603 cpp_token
*old
, *result
;
2604 ptrdiff_t sz
= pfile
->cur_run
->limit
- pfile
->cur_token
;
2605 ptrdiff_t la
= (ptrdiff_t) pfile
->lookaheads
;
2607 old
= pfile
->cur_token
- 1;
2608 /* Any pre-existing lookaheads must not be clobbered. */
2613 tokenrun
*next
= next_tokenrun (pfile
->cur_run
);
2616 memmove (next
->base
+ 1, next
->base
,
2617 (la
- sz
) * sizeof (cpp_token
));
2619 next
->base
[0] = pfile
->cur_run
->limit
[-1];
2623 memmove (pfile
->cur_token
+ 1, pfile
->cur_token
,
2624 MIN (la
, sz
- 1) * sizeof (cpp_token
));
2627 if (!sz
&& pfile
->cur_token
== pfile
->cur_run
->limit
)
2629 pfile
->cur_run
= next_tokenrun (pfile
->cur_run
);
2630 pfile
->cur_token
= pfile
->cur_run
->base
;
2633 result
= pfile
->cur_token
++;
2634 result
->src_loc
= old
->src_loc
;
2638 /* We're at the beginning of a logical line (so not in
2639 directives-mode) and RESULT is a CPP_NAME with NODE_MODULE set. See
2640 if we should enter deferred_pragma mode to tokenize the rest of the
2641 line as a module control-line. */
2644 cpp_maybe_module_directive (cpp_reader
*pfile
, cpp_token
*result
)
2646 unsigned backup
= 0; /* Tokens we peeked. */
2647 cpp_hashnode
*node
= result
->val
.node
.node
;
2648 cpp_token
*peek
= result
;
2649 cpp_token
*keyword
= peek
;
2650 cpp_hashnode
*(&n_modules
)[spec_nodes::M_HWM
][2] = pfile
->spec_nodes
.n_modules
;
2651 int header_count
= 0;
2653 /* Make sure the incoming state is as we expect it. This way we
2654 can restore it using constants. */
2655 gcc_checking_assert (!pfile
->state
.in_deferred_pragma
2656 && !pfile
->state
.skipping
2657 && !pfile
->state
.parsing_args
2658 && !pfile
->state
.angled_headers
2659 && (pfile
->state
.save_comments
2660 == !CPP_OPTION (pfile
, discard_comments
)));
2662 /* Enter directives mode sufficiently for peeking. We don't have
2663 to actually set in_directive. */
2664 pfile
->state
.in_deferred_pragma
= true;
2666 /* These two fields are needed to process tokenization in deferred
2667 pragma mode. They are not used outside deferred pragma mode or
2669 pfile
->state
.pragma_allow_expansion
= true;
2670 pfile
->directive_line
= result
->src_loc
;
2672 /* Saving comments is incompatible with directives mode. */
2673 pfile
->state
.save_comments
= 0;
2675 if (node
== n_modules
[spec_nodes::M_EXPORT
][0])
2677 peek
= _cpp_lex_direct (pfile
);
2680 if (keyword
->type
!= CPP_NAME
)
2682 node
= keyword
->val
.node
.node
;
2683 if (!(node
->flags
& NODE_MODULE
))
2687 if (node
== n_modules
[spec_nodes::M__IMPORT
][0])
2689 header_count
= backup
+ 2 + 16;
2690 else if (node
== n_modules
[spec_nodes::M_IMPORT
][0])
2692 header_count
= backup
+ 2 + (CPP_OPTION (pfile
, preprocessed
) ? 16 : 0);
2693 else if (node
== n_modules
[spec_nodes::M_MODULE
][0])
2698 /* We've seen [export] {module|import|__import}. Check the next token. */
2700 /* After '{,__}import' a header name may appear. */
2701 pfile
->state
.angled_headers
= true;
2702 peek
= _cpp_lex_direct (pfile
);
2705 /* ... import followed by identifier, ':', '<' or
2706 header-name preprocessing tokens, or module
2707 followed by cpp-identifier, ':' or ';' preprocessing
2708 tokens. C++ keywords are not yet relevant. */
2709 if (peek
->type
== CPP_NAME
2710 || peek
->type
== CPP_COLON
2712 ? (peek
->type
== CPP_LESS
2713 || (peek
->type
== CPP_STRING
&& peek
->val
.str
.text
[0] != 'R')
2714 || peek
->type
== CPP_HEADER_NAME
)
2715 : peek
->type
== CPP_SEMICOLON
))
2717 pfile
->state
.pragma_allow_expansion
= !CPP_OPTION (pfile
, preprocessed
);
2718 if (!pfile
->state
.pragma_allow_expansion
)
2719 pfile
->state
.prevent_expansion
++;
2721 if (!header_count
&& linemap_included_from
2722 (LINEMAPS_LAST_ORDINARY_MAP (pfile
->line_table
)))
2723 cpp_error_with_line (pfile
, CPP_DL_ERROR
, keyword
->src_loc
, 0,
2724 "module control-line cannot be in included file");
2726 /* The first one or two tokens cannot be macro names. */
2727 for (int ix
= backup
; ix
--;)
2729 cpp_token
*tok
= ix
? keyword
: result
;
2730 cpp_hashnode
*node
= tok
->val
.node
.node
;
2732 /* Don't attempt to expand the token. */
2733 tok
->flags
|= NO_EXPAND
;
2734 if (_cpp_defined_macro_p (node
)
2735 && _cpp_maybe_notify_macro_use (pfile
, node
, tok
->src_loc
)
2736 && !cpp_fun_like_macro_p (node
))
2737 cpp_error_with_line (pfile
, CPP_DL_ERROR
, tok
->src_loc
, 0,
2738 "module control-line \"%s\" cannot be"
2739 " an object-like macro",
2743 /* Map to underbar variants. */
2744 keyword
->val
.node
.node
= n_modules
[header_count
2745 ? spec_nodes::M_IMPORT
2746 : spec_nodes::M_MODULE
][1];
2748 result
->val
.node
.node
= n_modules
[spec_nodes::M_EXPORT
][1];
2750 /* Maybe tell the tokenizer we expect a header-name down the
2752 pfile
->state
.directive_file_token
= header_count
;
2757 /* Drop out of directive mode. */
2758 /* We aaserted save_comments had this value upon entry. */
2759 pfile
->state
.save_comments
2760 = !CPP_OPTION (pfile
, discard_comments
);
2761 pfile
->state
.in_deferred_pragma
= false;
2762 /* Do not let this remain on. */
2763 pfile
->state
.angled_headers
= false;
2766 /* In either case we want to backup the peeked tokens. */
2769 /* If we saw EOL, we should drop it, because this isn't a module
2770 control-line after all. */
2771 bool eol
= peek
->type
== CPP_PRAGMA_EOL
;
2772 if (!eol
|| backup
> 1)
2774 /* Put put the peeked tokens back */
2775 _cpp_backup_tokens_direct (pfile
, backup
);
2776 /* But if the last one was an EOL, forget it. */
2778 pfile
->lookaheads
--;
2783 /* Lex a token into RESULT (external interface). Takes care of issues
2784 like directive handling, token lookahead, multiple include
2785 optimization and skipping. */
2787 _cpp_lex_token (cpp_reader
*pfile
)
2793 if (pfile
->cur_token
== pfile
->cur_run
->limit
)
2795 pfile
->cur_run
= next_tokenrun (pfile
->cur_run
);
2796 pfile
->cur_token
= pfile
->cur_run
->base
;
2798 /* We assume that the current token is somewhere in the current
2800 if (pfile
->cur_token
< pfile
->cur_run
->base
2801 || pfile
->cur_token
>= pfile
->cur_run
->limit
)
2804 if (pfile
->lookaheads
)
2806 pfile
->lookaheads
--;
2807 result
= pfile
->cur_token
++;
2810 result
= _cpp_lex_direct (pfile
);
2812 if (result
->flags
& BOL
)
2814 /* Is this a directive. If _cpp_handle_directive returns
2815 false, it is an assembler #. */
2816 if (result
->type
== CPP_HASH
2817 /* 6.10.3 p 11: Directives in a list of macro arguments
2818 gives undefined behavior. This implementation
2819 handles the directive as normal. */
2820 && pfile
->state
.parsing_args
!= 1)
2822 if (_cpp_handle_directive (pfile
, result
->flags
& PREV_WHITE
))
2824 if (pfile
->directive_result
.type
== CPP_PADDING
)
2826 result
= &pfile
->directive_result
;
2829 else if (pfile
->state
.in_deferred_pragma
)
2830 result
= &pfile
->directive_result
;
2831 else if (result
->type
== CPP_NAME
2832 && (result
->val
.node
.node
->flags
& NODE_MODULE
)
2833 && !pfile
->state
.skipping
2834 /* Unlike regular directives, we do not deal with
2835 tokenizing module directives as macro arguments.
2836 That's not permitted. */
2837 && !pfile
->state
.parsing_args
)
2839 /* P1857. Before macro expansion, At start of logical
2841 /* We don't have to consider lookaheads at this point. */
2842 gcc_checking_assert (!pfile
->lookaheads
);
2844 cpp_maybe_module_directive (pfile
, result
);
2847 if (pfile
->cb
.line_change
&& !pfile
->state
.skipping
)
2848 pfile
->cb
.line_change (pfile
, result
, pfile
->state
.parsing_args
);
2851 /* We don't skip tokens in directives. */
2852 if (pfile
->state
.in_directive
|| pfile
->state
.in_deferred_pragma
)
2855 /* Outside a directive, invalidate controlling macros. At file
2856 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
2857 get here and MI optimization works. */
2858 pfile
->mi_valid
= false;
2860 if (!pfile
->state
.skipping
|| result
->type
== CPP_EOF
)
2867 /* Returns true if a fresh line has been loaded. */
2869 _cpp_get_fresh_line (cpp_reader
*pfile
)
2871 /* We can't get a new line until we leave the current directive. */
2872 if (pfile
->state
.in_directive
)
2877 cpp_buffer
*buffer
= pfile
->buffer
;
2879 if (!buffer
->need_line
)
2882 if (buffer
->next_line
< buffer
->rlimit
)
2884 _cpp_clean_line (pfile
);
2888 /* First, get out of parsing arguments state. */
2889 if (pfile
->state
.parsing_args
)
2892 /* End of buffer. Non-empty files should end in a newline. */
2893 if (buffer
->buf
!= buffer
->rlimit
2894 && buffer
->next_line
> buffer
->rlimit
2895 && !buffer
->from_stage3
)
2897 /* Clip to buffer size. */
2898 buffer
->next_line
= buffer
->rlimit
;
2901 if (buffer
->prev
&& !buffer
->return_at_eof
)
2902 _cpp_pop_buffer (pfile
);
2905 /* End of translation. Do not pop the buffer yet. Increment
2906 line number so that the EOF token is on a line of its own
2907 (_cpp_lex_direct doesn't increment in that case, because
2908 it's hard for it to distinguish this special case). */
2909 CPP_INCREMENT_LINE (pfile
, 0);
2915 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
2918 result->type = ELSE_TYPE; \
2919 if (*buffer->cur == CHAR) \
2920 buffer->cur++, result->type = THEN_TYPE; \
2924 /* Lex a token into pfile->cur_token, which is also incremented, to
2925 get diagnostics pointing to the correct location.
2927 Does not handle issues such as token lookahead, multiple-include
2928 optimization, directives, skipping etc. This function is only
2929 suitable for use by _cpp_lex_token, and in special cases like
2930 lex_expansion_token which doesn't care for any of these issues.
2932 When meeting a newline, returns CPP_EOF if parsing a directive,
2933 otherwise returns to the start of the token buffer if permissible.
2934 Returns the location of the lexed token. */
2936 _cpp_lex_direct (cpp_reader
*pfile
)
2940 const unsigned char *comment_start
;
2941 bool fallthrough_comment
= false;
2942 cpp_token
*result
= pfile
->cur_token
++;
2946 buffer
= pfile
->buffer
;
2947 if (buffer
->need_line
)
2949 gcc_assert (!pfile
->state
.in_deferred_pragma
);
2950 if (!_cpp_get_fresh_line (pfile
))
2952 result
->type
= CPP_EOF
;
2953 /* Not a real EOF in a directive or arg parsing -- we refuse
2954 to advance to the next file now, and will once we're out
2956 if (!pfile
->state
.in_directive
&& !pfile
->state
.parsing_args
)
2958 /* Tell the compiler the line number of the EOF token. */
2959 result
->src_loc
= pfile
->line_table
->highest_line
;
2960 result
->flags
= BOL
;
2961 /* Now pop the buffer that _cpp_get_fresh_line did not. */
2962 _cpp_pop_buffer (pfile
);
2966 if (buffer
!= pfile
->buffer
)
2967 fallthrough_comment
= false;
2968 if (!pfile
->keep_tokens
)
2970 pfile
->cur_run
= &pfile
->base_run
;
2971 result
= pfile
->base_run
.base
;
2972 pfile
->cur_token
= result
+ 1;
2974 result
->flags
= BOL
;
2975 if (pfile
->state
.parsing_args
== 2)
2976 result
->flags
|= PREV_WHITE
;
2978 buffer
= pfile
->buffer
;
2980 result
->src_loc
= pfile
->line_table
->highest_line
;
2983 if (buffer
->cur
>= buffer
->notes
[buffer
->cur_note
].pos
2984 && !pfile
->overlaid_buffer
)
2986 _cpp_process_line_notes (pfile
, false);
2987 result
->src_loc
= pfile
->line_table
->highest_line
;
2991 if (pfile
->forced_token_location
)
2992 result
->src_loc
= pfile
->forced_token_location
;
2994 result
->src_loc
= linemap_position_for_column (pfile
->line_table
,
2995 CPP_BUF_COLUMN (buffer
, buffer
->cur
));
2999 case ' ': case '\t': case '\f': case '\v': case '\0':
3000 result
->flags
|= PREV_WHITE
;
3001 skip_whitespace (pfile
, c
);
3005 /* Increment the line, unless this is the last line ... */
3006 if (buffer
->cur
< buffer
->rlimit
3007 /* ... or this is a #include, (where _cpp_stack_file needs to
3008 unwind by one line) ... */
3009 || (pfile
->state
.in_directive
> 1
3010 /* ... except traditional-cpp increments this elsewhere. */
3011 && !CPP_OPTION (pfile
, traditional
)))
3012 CPP_INCREMENT_LINE (pfile
, 0);
3013 buffer
->need_line
= true;
3014 if (pfile
->state
.in_deferred_pragma
)
3016 /* Produce the PRAGMA_EOL on this line. File reading
3017 ensures there is always a \n at end of the buffer, thus
3018 in a deferred pragma we always see CPP_PRAGMA_EOL before
3020 result
->type
= CPP_PRAGMA_EOL
;
3021 result
->flags
&= ~PREV_WHITE
;
3022 pfile
->state
.in_deferred_pragma
= false;
3023 if (!pfile
->state
.pragma_allow_expansion
)
3024 pfile
->state
.prevent_expansion
--;
3029 case '0': case '1': case '2': case '3': case '4':
3030 case '5': case '6': case '7': case '8': case '9':
3032 struct normalize_state nst
= INITIAL_NORMALIZE_STATE
;
3033 result
->type
= CPP_NUMBER
;
3034 lex_number (pfile
, &result
->val
.str
, &nst
);
3035 warn_about_normalization (pfile
, result
, &nst
);
3043 /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
3044 wide strings or raw strings. */
3045 if (c
== 'L' || CPP_OPTION (pfile
, rliterals
)
3046 || (c
!= 'R' && CPP_OPTION (pfile
, uliterals
)))
3048 if ((*buffer
->cur
== '\'' && c
!= 'R')
3049 || *buffer
->cur
== '"'
3050 || (*buffer
->cur
== 'R'
3052 && buffer
->cur
[1] == '"'
3053 && CPP_OPTION (pfile
, rliterals
))
3054 || (*buffer
->cur
== '8'
3056 && ((buffer
->cur
[1] == '"' || (buffer
->cur
[1] == '\''
3057 && CPP_OPTION (pfile
, utf8_char_literals
)))
3058 || (buffer
->cur
[1] == 'R' && buffer
->cur
[2] == '"'
3059 && CPP_OPTION (pfile
, rliterals
)))))
3061 lex_string (pfile
, result
, buffer
->cur
- 1);
3068 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
3069 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
3070 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
3071 case 's': case 't': case 'v': case 'w': case 'x':
3073 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
3074 case 'G': case 'H': case 'I': case 'J': case 'K':
3075 case 'M': case 'N': case 'O': case 'P': case 'Q':
3076 case 'S': case 'T': case 'V': case 'W': case 'X':
3078 result
->type
= CPP_NAME
;
3080 struct normalize_state nst
= INITIAL_NORMALIZE_STATE
;
3081 result
->val
.node
.node
= lex_identifier (pfile
, buffer
->cur
- 1, false,
3083 &result
->val
.node
.spelling
);
3084 warn_about_normalization (pfile
, result
, &nst
);
3087 /* Convert named operators to their proper types. */
3088 if (result
->val
.node
.node
->flags
& NODE_OPERATOR
)
3090 result
->flags
|= NAMED_OP
;
3091 result
->type
= (enum cpp_ttype
) result
->val
.node
.node
->directive_index
;
3094 /* Signal FALLTHROUGH comment followed by another token. */
3095 if (fallthrough_comment
)
3096 result
->flags
|= PREV_FALLTHROUGH
;
3101 lex_string (pfile
, result
, buffer
->cur
- 1);
3105 /* A potential block or line comment. */
3106 comment_start
= buffer
->cur
;
3111 if (_cpp_skip_block_comment (pfile
))
3112 cpp_error (pfile
, CPP_DL_ERROR
, "unterminated comment");
3114 else if (c
== '/' && ! CPP_OPTION (pfile
, traditional
))
3116 /* Don't warn for system headers. */
3117 if (_cpp_in_system_header (pfile
))
3119 /* Warn about comments if pedantically GNUC89, and not
3120 in system headers. */
3121 else if (CPP_OPTION (pfile
, lang
) == CLK_GNUC89
3122 && CPP_PEDANTIC (pfile
)
3123 && ! buffer
->warned_cplusplus_comments
)
3125 if (cpp_error (pfile
, CPP_DL_PEDWARN
,
3126 "C++ style comments are not allowed in ISO C90"))
3127 cpp_error (pfile
, CPP_DL_NOTE
,
3128 "(this will be reported only once per input file)");
3129 buffer
->warned_cplusplus_comments
= 1;
3131 /* Or if specifically desired via -Wc90-c99-compat. */
3132 else if (CPP_OPTION (pfile
, cpp_warn_c90_c99_compat
) > 0
3133 && ! CPP_OPTION (pfile
, cplusplus
)
3134 && ! buffer
->warned_cplusplus_comments
)
3136 if (cpp_error (pfile
, CPP_DL_WARNING
,
3137 "C++ style comments are incompatible with C90"))
3138 cpp_error (pfile
, CPP_DL_NOTE
,
3139 "(this will be reported only once per input file)");
3140 buffer
->warned_cplusplus_comments
= 1;
3142 /* In C89/C94, C++ style comments are forbidden. */
3143 else if ((CPP_OPTION (pfile
, lang
) == CLK_STDC89
3144 || CPP_OPTION (pfile
, lang
) == CLK_STDC94
))
3146 /* But don't be confused about valid code such as
3147 - // immediately followed by *,
3148 - // in a preprocessing directive,
3149 - // in an #if 0 block. */
3150 if (buffer
->cur
[1] == '*'
3151 || pfile
->state
.in_directive
3152 || pfile
->state
.skipping
)
3154 result
->type
= CPP_DIV
;
3157 else if (! buffer
->warned_cplusplus_comments
)
3159 if (cpp_error (pfile
, CPP_DL_ERROR
,
3160 "C++ style comments are not allowed in "
3162 cpp_error (pfile
, CPP_DL_NOTE
,
3163 "(this will be reported only once per input "
3165 buffer
->warned_cplusplus_comments
= 1;
3168 if (skip_line_comment (pfile
) && CPP_OPTION (pfile
, warn_comments
))
3169 cpp_warning (pfile
, CPP_W_COMMENTS
, "multi-line comment");
3174 result
->type
= CPP_DIV_EQ
;
3179 result
->type
= CPP_DIV
;
3183 if (fallthrough_comment_p (pfile
, comment_start
))
3184 fallthrough_comment
= true;
3186 if (pfile
->cb
.comment
)
3188 size_t len
= pfile
->buffer
->cur
- comment_start
;
3189 pfile
->cb
.comment (pfile
, result
->src_loc
, comment_start
- 1,
3193 if (!pfile
->state
.save_comments
)
3195 result
->flags
|= PREV_WHITE
;
3196 goto update_tokens_line
;
3199 if (fallthrough_comment
)
3200 result
->flags
|= PREV_FALLTHROUGH
;
3202 /* Save the comment as a token in its own right. */
3203 save_comment (pfile
, result
, comment_start
, c
);
3207 if (pfile
->state
.angled_headers
)
3209 lex_string (pfile
, result
, buffer
->cur
- 1);
3210 if (result
->type
!= CPP_LESS
)
3214 result
->type
= CPP_LESS
;
3215 if (*buffer
->cur
== '=')
3217 buffer
->cur
++, result
->type
= CPP_LESS_EQ
;
3218 if (*buffer
->cur
== '>'
3219 && CPP_OPTION (pfile
, cplusplus
)
3220 && CPP_OPTION (pfile
, lang
) >= CLK_GNUCXX20
)
3221 buffer
->cur
++, result
->type
= CPP_SPACESHIP
;
3223 else if (*buffer
->cur
== '<')
3226 IF_NEXT_IS ('=', CPP_LSHIFT_EQ
, CPP_LSHIFT
);
3228 else if (CPP_OPTION (pfile
, digraphs
))
3230 if (*buffer
->cur
== ':')
3232 /* C++11 [2.5/3 lex.pptoken], "Otherwise, if the next
3233 three characters are <:: and the subsequent character
3234 is neither : nor >, the < is treated as a preprocessor
3235 token by itself". */
3236 if (CPP_OPTION (pfile
, cplusplus
)
3237 && CPP_OPTION (pfile
, lang
) != CLK_CXX98
3238 && CPP_OPTION (pfile
, lang
) != CLK_GNUCXX
3239 && buffer
->cur
[1] == ':'
3240 && buffer
->cur
[2] != ':' && buffer
->cur
[2] != '>')
3244 result
->flags
|= DIGRAPH
;
3245 result
->type
= CPP_OPEN_SQUARE
;
3247 else if (*buffer
->cur
== '%')
3250 result
->flags
|= DIGRAPH
;
3251 result
->type
= CPP_OPEN_BRACE
;
3257 result
->type
= CPP_GREATER
;
3258 if (*buffer
->cur
== '=')
3259 buffer
->cur
++, result
->type
= CPP_GREATER_EQ
;
3260 else if (*buffer
->cur
== '>')
3263 IF_NEXT_IS ('=', CPP_RSHIFT_EQ
, CPP_RSHIFT
);
3268 result
->type
= CPP_MOD
;
3269 if (*buffer
->cur
== '=')
3270 buffer
->cur
++, result
->type
= CPP_MOD_EQ
;
3271 else if (CPP_OPTION (pfile
, digraphs
))
3273 if (*buffer
->cur
== ':')
3276 result
->flags
|= DIGRAPH
;
3277 result
->type
= CPP_HASH
;
3278 if (*buffer
->cur
== '%' && buffer
->cur
[1] == ':')
3279 buffer
->cur
+= 2, result
->type
= CPP_PASTE
, result
->val
.token_no
= 0;
3281 else if (*buffer
->cur
== '>')
3284 result
->flags
|= DIGRAPH
;
3285 result
->type
= CPP_CLOSE_BRACE
;
3291 result
->type
= CPP_DOT
;
3292 if (ISDIGIT (*buffer
->cur
))
3294 struct normalize_state nst
= INITIAL_NORMALIZE_STATE
;
3295 result
->type
= CPP_NUMBER
;
3296 lex_number (pfile
, &result
->val
.str
, &nst
);
3297 warn_about_normalization (pfile
, result
, &nst
);
3299 else if (*buffer
->cur
== '.' && buffer
->cur
[1] == '.')
3300 buffer
->cur
+= 2, result
->type
= CPP_ELLIPSIS
;
3301 else if (*buffer
->cur
== '*' && CPP_OPTION (pfile
, cplusplus
))
3302 buffer
->cur
++, result
->type
= CPP_DOT_STAR
;
3306 result
->type
= CPP_PLUS
;
3307 if (*buffer
->cur
== '+')
3308 buffer
->cur
++, result
->type
= CPP_PLUS_PLUS
;
3309 else if (*buffer
->cur
== '=')
3310 buffer
->cur
++, result
->type
= CPP_PLUS_EQ
;
3314 result
->type
= CPP_MINUS
;
3315 if (*buffer
->cur
== '>')
3318 result
->type
= CPP_DEREF
;
3319 if (*buffer
->cur
== '*' && CPP_OPTION (pfile
, cplusplus
))
3320 buffer
->cur
++, result
->type
= CPP_DEREF_STAR
;
3322 else if (*buffer
->cur
== '-')
3323 buffer
->cur
++, result
->type
= CPP_MINUS_MINUS
;
3324 else if (*buffer
->cur
== '=')
3325 buffer
->cur
++, result
->type
= CPP_MINUS_EQ
;
3329 result
->type
= CPP_AND
;
3330 if (*buffer
->cur
== '&')
3331 buffer
->cur
++, result
->type
= CPP_AND_AND
;
3332 else if (*buffer
->cur
== '=')
3333 buffer
->cur
++, result
->type
= CPP_AND_EQ
;
3337 result
->type
= CPP_OR
;
3338 if (*buffer
->cur
== '|')
3339 buffer
->cur
++, result
->type
= CPP_OR_OR
;
3340 else if (*buffer
->cur
== '=')
3341 buffer
->cur
++, result
->type
= CPP_OR_EQ
;
3345 result
->type
= CPP_COLON
;
3346 if (*buffer
->cur
== ':' && CPP_OPTION (pfile
, scope
))
3347 buffer
->cur
++, result
->type
= CPP_SCOPE
;
3348 else if (*buffer
->cur
== '>' && CPP_OPTION (pfile
, digraphs
))
3351 result
->flags
|= DIGRAPH
;
3352 result
->type
= CPP_CLOSE_SQUARE
;
3356 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ
, CPP_MULT
); break;
3357 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ
, CPP_EQ
); break;
3358 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ
, CPP_NOT
); break;
3359 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ
, CPP_XOR
); break;
3360 case '#': IF_NEXT_IS ('#', CPP_PASTE
, CPP_HASH
); result
->val
.token_no
= 0; break;
3362 case '?': result
->type
= CPP_QUERY
; break;
3363 case '~': result
->type
= CPP_COMPL
; break;
3364 case ',': result
->type
= CPP_COMMA
; break;
3365 case '(': result
->type
= CPP_OPEN_PAREN
; break;
3366 case ')': result
->type
= CPP_CLOSE_PAREN
; break;
3367 case '[': result
->type
= CPP_OPEN_SQUARE
; break;
3368 case ']': result
->type
= CPP_CLOSE_SQUARE
; break;
3369 case '{': result
->type
= CPP_OPEN_BRACE
; break;
3370 case '}': result
->type
= CPP_CLOSE_BRACE
; break;
3371 case ';': result
->type
= CPP_SEMICOLON
; break;
3373 /* @ is a punctuator in Objective-C. */
3374 case '@': result
->type
= CPP_ATSIGN
; break;
3378 const uchar
*base
= --buffer
->cur
;
3380 /* Check for an extended identifier ($ or UCN or UTF-8). */
3381 struct normalize_state nst
= INITIAL_NORMALIZE_STATE
;
3382 if (forms_identifier_p (pfile
, true, &nst
))
3384 result
->type
= CPP_NAME
;
3385 result
->val
.node
.node
= lex_identifier (pfile
, base
, true, &nst
,
3386 &result
->val
.node
.spelling
);
3387 warn_about_normalization (pfile
, result
, &nst
);
3391 /* Otherwise this will form a CPP_OTHER token. Parse valid UTF-8 as a
3394 if (c
>= utf8_signifier
)
3396 const uchar
*pstr
= base
;
3398 if (_cpp_valid_utf8 (pfile
, &pstr
, buffer
->rlimit
, 0, NULL
, &s
))
3401 create_literal (pfile
, result
, base
, buffer
->cur
- base
, CPP_OTHER
);
3407 /* Potentially convert the location of the token to a range. */
3408 if (result
->src_loc
>= RESERVED_LOCATION_COUNT
3409 && result
->type
!= CPP_EOF
)
3411 /* Ensure that any line notes are processed, so that we have the
3412 correct physical line/column for the end-point of the token even
3413 when a logical line is split via one or more backslashes. */
3414 if (buffer
->cur
>= buffer
->notes
[buffer
->cur_note
].pos
3415 && !pfile
->overlaid_buffer
)
3416 _cpp_process_line_notes (pfile
, false);
3418 source_range tok_range
;
3419 tok_range
.m_start
= result
->src_loc
;
3421 = linemap_position_for_column (pfile
->line_table
,
3422 CPP_BUF_COLUMN (buffer
, buffer
->cur
));
3424 result
->src_loc
= COMBINE_LOCATION_DATA (pfile
->line_table
,
3432 /* An upper bound on the number of bytes needed to spell TOKEN.
3433 Does not include preceding whitespace. */
3435 cpp_token_len (const cpp_token
*token
)
3439 switch (TOKEN_SPELL (token
))
3441 default: len
= 6; break;
3442 case SPELL_LITERAL
: len
= token
->val
.str
.len
; break;
3443 case SPELL_IDENT
: len
= NODE_LEN (token
->val
.node
.node
) * 10; break;
3449 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
3450 Return the number of bytes read out of NAME. (There are always
3451 10 bytes written to BUFFER.) */
3454 utf8_to_ucn (unsigned char *buffer
, const unsigned char *name
)
3460 unsigned long utf32
;
3462 /* Compute the length of the UTF-8 sequence. */
3463 for (t
= *name
; t
& 0x80; t
<<= 1)
3466 utf32
= *name
& (0x7F >> ucn_len
);
3467 for (ucn_len_c
= 1; ucn_len_c
< ucn_len
; ucn_len_c
++)
3469 utf32
= (utf32
<< 6) | (*++name
& 0x3F);
3471 /* Ill-formed UTF-8. */
3472 if ((*name
& ~0x3F) != 0x80)
3478 for (j
= 7; j
>= 0; j
--)
3479 *buffer
++ = "0123456789abcdef"[(utf32
>> (4 * j
)) & 0xF];
3483 /* Given a token TYPE corresponding to a digraph, return a pointer to
3484 the spelling of the digraph. */
3485 static const unsigned char *
3486 cpp_digraph2name (enum cpp_ttype type
)
3488 return digraph_spellings
[(int) type
- (int) CPP_FIRST_DIGRAPH
];
3491 /* Write the spelling of an identifier IDENT, using UCNs, to BUFFER.
3492 The buffer must already contain the enough space to hold the
3493 token's spelling. Returns a pointer to the character after the
3494 last character written. */
3496 _cpp_spell_ident_ucns (unsigned char *buffer
, cpp_hashnode
*ident
)
3499 const unsigned char *name
= NODE_NAME (ident
);
3501 for (i
= 0; i
< NODE_LEN (ident
); i
++)
3502 if (name
[i
] & ~0x7F)
3504 i
+= utf8_to_ucn (buffer
, name
+ i
) - 1;
3508 *buffer
++ = name
[i
];
3513 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
3514 already contain the enough space to hold the token's spelling.
3515 Returns a pointer to the character after the last character written.
3516 FORSTRING is true if this is to be the spelling after translation
3517 phase 1 (with the original spelling of extended identifiers), false
3518 if extended identifiers should always be written using UCNs (there is
3519 no option for always writing them in the internal UTF-8 form).
3520 FIXME: Would be nice if we didn't need the PFILE argument. */
3522 cpp_spell_token (cpp_reader
*pfile
, const cpp_token
*token
,
3523 unsigned char *buffer
, bool forstring
)
3525 switch (TOKEN_SPELL (token
))
3527 case SPELL_OPERATOR
:
3529 const unsigned char *spelling
;
3532 if (token
->flags
& DIGRAPH
)
3533 spelling
= cpp_digraph2name (token
->type
);
3534 else if (token
->flags
& NAMED_OP
)
3537 spelling
= TOKEN_NAME (token
);
3539 while ((c
= *spelling
++) != '\0')
3548 memcpy (buffer
, NODE_NAME (token
->val
.node
.spelling
),
3549 NODE_LEN (token
->val
.node
.spelling
));
3550 buffer
+= NODE_LEN (token
->val
.node
.spelling
);
3553 buffer
= _cpp_spell_ident_ucns (buffer
, token
->val
.node
.node
);
3557 memcpy (buffer
, token
->val
.str
.text
, token
->val
.str
.len
);
3558 buffer
+= token
->val
.str
.len
;
3562 cpp_error (pfile
, CPP_DL_ICE
,
3563 "unspellable token %s", TOKEN_NAME (token
));
3570 /* Returns TOKEN spelt as a null-terminated string. The string is
3571 freed when the reader is destroyed. Useful for diagnostics. */
3573 cpp_token_as_text (cpp_reader
*pfile
, const cpp_token
*token
)
3575 unsigned int len
= cpp_token_len (token
) + 1;
3576 unsigned char *start
= _cpp_unaligned_alloc (pfile
, len
), *end
;
3578 end
= cpp_spell_token (pfile
, token
, start
, false);
3584 /* Returns a pointer to a string which spells the token defined by
3585 TYPE and FLAGS. Used by C front ends, which really should move to
3586 using cpp_token_as_text. */
3588 cpp_type2name (enum cpp_ttype type
, unsigned char flags
)
3590 if (flags
& DIGRAPH
)
3591 return (const char *) cpp_digraph2name (type
);
3592 else if (flags
& NAMED_OP
)
3593 return cpp_named_operator2name (type
);
3595 return (const char *) token_spellings
[type
].name
;
3598 /* Writes the spelling of token to FP, without any preceding space.
3599 Separated from cpp_spell_token for efficiency - to avoid stdio
3600 double-buffering. */
3602 cpp_output_token (const cpp_token
*token
, FILE *fp
)
3604 switch (TOKEN_SPELL (token
))
3606 case SPELL_OPERATOR
:
3608 const unsigned char *spelling
;
3611 if (token
->flags
& DIGRAPH
)
3612 spelling
= cpp_digraph2name (token
->type
);
3613 else if (token
->flags
& NAMED_OP
)
3616 spelling
= TOKEN_NAME (token
);
3621 while ((c
= *++spelling
) != '\0');
3629 const unsigned char * name
= NODE_NAME (token
->val
.node
.node
);
3631 for (i
= 0; i
< NODE_LEN (token
->val
.node
.node
); i
++)
3632 if (name
[i
] & ~0x7F)
3634 unsigned char buffer
[10];
3635 i
+= utf8_to_ucn (buffer
, name
+ i
) - 1;
3636 fwrite (buffer
, 1, 10, fp
);
3639 fputc (NODE_NAME (token
->val
.node
.node
)[i
], fp
);
3644 if (token
->type
== CPP_HEADER_NAME
)
3646 fwrite (token
->val
.str
.text
, 1, token
->val
.str
.len
, fp
);
3647 if (token
->type
== CPP_HEADER_NAME
)
3652 /* An error, most probably. */
3657 /* Compare two tokens. */
3659 _cpp_equiv_tokens (const cpp_token
*a
, const cpp_token
*b
)
3661 if (a
->type
== b
->type
&& a
->flags
== b
->flags
)
3662 switch (TOKEN_SPELL (a
))
3664 default: /* Keep compiler happy. */
3665 case SPELL_OPERATOR
:
3666 /* token_no is used to track where multiple consecutive ##
3667 tokens were originally located. */
3668 return (a
->type
!= CPP_PASTE
|| a
->val
.token_no
== b
->val
.token_no
);
3670 return (a
->type
!= CPP_MACRO_ARG
3671 || (a
->val
.macro_arg
.arg_no
== b
->val
.macro_arg
.arg_no
3672 && a
->val
.macro_arg
.spelling
== b
->val
.macro_arg
.spelling
));
3674 return (a
->val
.node
.node
== b
->val
.node
.node
3675 && a
->val
.node
.spelling
== b
->val
.node
.spelling
);
3677 return (a
->val
.str
.len
== b
->val
.str
.len
3678 && !memcmp (a
->val
.str
.text
, b
->val
.str
.text
,
3685 /* Returns nonzero if a space should be inserted to avoid an
3686 accidental token paste for output. For simplicity, it is
3687 conservative, and occasionally advises a space where one is not
3688 needed, e.g. "." and ".2". */
3690 cpp_avoid_paste (cpp_reader
*pfile
, const cpp_token
*token1
,
3691 const cpp_token
*token2
)
3693 enum cpp_ttype a
= token1
->type
, b
= token2
->type
;
3696 if (token1
->flags
& NAMED_OP
)
3698 if (token2
->flags
& NAMED_OP
)
3702 if (token2
->flags
& DIGRAPH
)
3703 c
= digraph_spellings
[(int) b
- (int) CPP_FIRST_DIGRAPH
][0];
3704 else if (token_spellings
[b
].category
== SPELL_OPERATOR
)
3705 c
= token_spellings
[b
].name
[0];
3707 /* Quickly get everything that can paste with an '='. */
3708 if ((int) a
<= (int) CPP_LAST_EQ
&& c
== '=')
3713 case CPP_GREATER
: return c
== '>';
3714 case CPP_LESS
: return c
== '<' || c
== '%' || c
== ':';
3715 case CPP_PLUS
: return c
== '+';
3716 case CPP_MINUS
: return c
== '-' || c
== '>';
3717 case CPP_DIV
: return c
== '/' || c
== '*'; /* Comments. */
3718 case CPP_MOD
: return c
== ':' || c
== '>';
3719 case CPP_AND
: return c
== '&';
3720 case CPP_OR
: return c
== '|';
3721 case CPP_COLON
: return c
== ':' || c
== '>';
3722 case CPP_DEREF
: return c
== '*';
3723 case CPP_DOT
: return c
== '.' || c
== '%' || b
== CPP_NUMBER
;
3724 case CPP_HASH
: return c
== '#' || c
== '%'; /* Digraph form. */
3726 case CPP_NAME
: return ((b
== CPP_NUMBER
3727 && name_p (pfile
, &token2
->val
.str
))
3729 || b
== CPP_CHAR
|| b
== CPP_STRING
); /* L */
3730 case CPP_NUMBER
: return (b
== CPP_NUMBER
|| b
== CPP_NAME
3732 || c
== '.' || c
== '+' || c
== '-');
3734 case CPP_OTHER
: return ((token1
->val
.str
.text
[0] == '\\'
3736 || (CPP_OPTION (pfile
, objc
)
3737 && token1
->val
.str
.text
[0] == '@'
3738 && (b
== CPP_NAME
|| b
== CPP_STRING
)));
3739 case CPP_LESS_EQ
: return c
== '>';
3742 case CPP_UTF8STRING
:
3744 case CPP_STRING32
: return (CPP_OPTION (pfile
, user_literals
)
3746 || (TOKEN_SPELL (token2
) == SPELL_LITERAL
3747 && ISIDST (token2
->val
.str
.text
[0]))));
3755 /* Output all the remaining tokens on the current line, and a newline
3756 character, to FP. Leading whitespace is removed. If there are
3757 macros, special token padding is not performed. */
3759 cpp_output_line (cpp_reader
*pfile
, FILE *fp
)
3761 const cpp_token
*token
;
3763 token
= cpp_get_token (pfile
);
3764 while (token
->type
!= CPP_EOF
)
3766 cpp_output_token (token
, fp
);
3767 token
= cpp_get_token (pfile
);
3768 if (token
->flags
& PREV_WHITE
)
3775 /* Return a string representation of all the remaining tokens on the
3776 current line. The result is allocated using xmalloc and must be
3777 freed by the caller. */
3779 cpp_output_line_to_string (cpp_reader
*pfile
, const unsigned char *dir_name
)
3781 const cpp_token
*token
;
3782 unsigned int out
= dir_name
? ustrlen (dir_name
) : 0;
3783 unsigned int alloced
= 120 + out
;
3784 unsigned char *result
= (unsigned char *) xmalloc (alloced
);
3786 /* If DIR_NAME is empty, there are no initial contents. */
3789 sprintf ((char *) result
, "#%s ", dir_name
);
3793 token
= cpp_get_token (pfile
);
3794 while (token
->type
!= CPP_EOF
)
3796 unsigned char *last
;
3797 /* Include room for a possible space and the terminating nul. */
3798 unsigned int len
= cpp_token_len (token
) + 2;
3800 if (out
+ len
> alloced
)
3803 if (out
+ len
> alloced
)
3804 alloced
= out
+ len
;
3805 result
= (unsigned char *) xrealloc (result
, alloced
);
3808 last
= cpp_spell_token (pfile
, token
, &result
[out
], 0);
3809 out
= last
- result
;
3811 token
= cpp_get_token (pfile
);
3812 if (token
->flags
& PREV_WHITE
)
3813 result
[out
++] = ' ';
3820 /* Memory buffers. Changing these three constants can have a dramatic
3821 effect on performance. The values here are reasonable defaults,
3822 but might be tuned. If you adjust them, be sure to test across a
3823 range of uses of cpplib, including heavy nested function-like macro
3824 expansion. Also check the change in peak memory usage (NJAMD is a
3825 good tool for this). */
3826 #define MIN_BUFF_SIZE 8000
3827 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
3828 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
3829 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
3831 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
3832 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
3835 /* Create a new allocation buffer. Place the control block at the end
3836 of the buffer, so that buffer overflows will cause immediate chaos. */
3838 new_buff (size_t len
)
3841 unsigned char *base
;
3843 if (len
< MIN_BUFF_SIZE
)
3844 len
= MIN_BUFF_SIZE
;
3845 len
= CPP_ALIGN (len
);
3847 #ifdef ENABLE_VALGRIND_ANNOTATIONS
3848 /* Valgrind warns about uses of interior pointers, so put _cpp_buff
3850 size_t slen
= CPP_ALIGN2 (sizeof (_cpp_buff
), 2 * DEFAULT_ALIGNMENT
);
3851 base
= XNEWVEC (unsigned char, len
+ slen
);
3852 result
= (_cpp_buff
*) base
;
3855 base
= XNEWVEC (unsigned char, len
+ sizeof (_cpp_buff
));
3856 result
= (_cpp_buff
*) (base
+ len
);
3858 result
->base
= base
;
3860 result
->limit
= base
+ len
;
3861 result
->next
= NULL
;
3865 /* Place a chain of unwanted allocation buffers on the free list. */
3867 _cpp_release_buff (cpp_reader
*pfile
, _cpp_buff
*buff
)
3869 _cpp_buff
*end
= buff
;
3873 end
->next
= pfile
->free_buffs
;
3874 pfile
->free_buffs
= buff
;
3877 /* Return a free buffer of size at least MIN_SIZE. */
3879 _cpp_get_buff (cpp_reader
*pfile
, size_t min_size
)
3881 _cpp_buff
*result
, **p
;
3883 for (p
= &pfile
->free_buffs
;; p
= &(*p
)->next
)
3888 return new_buff (min_size
);
3890 size
= result
->limit
- result
->base
;
3891 /* Return a buffer that's big enough, but don't waste one that's
3893 if (size
>= min_size
&& size
<= BUFF_SIZE_UPPER_BOUND (min_size
))
3898 result
->next
= NULL
;
3899 result
->cur
= result
->base
;
3903 /* Creates a new buffer with enough space to hold the uncommitted
3904 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
3905 the excess bytes to the new buffer. Chains the new buffer after
3906 BUFF, and returns the new buffer. */
3908 _cpp_append_extend_buff (cpp_reader
*pfile
, _cpp_buff
*buff
, size_t min_extra
)
3910 size_t size
= EXTENDED_BUFF_SIZE (buff
, min_extra
);
3911 _cpp_buff
*new_buff
= _cpp_get_buff (pfile
, size
);
3913 buff
->next
= new_buff
;
3914 memcpy (new_buff
->base
, buff
->cur
, BUFF_ROOM (buff
));
3918 /* Creates a new buffer with enough space to hold the uncommitted
3919 remaining bytes of the buffer pointed to by BUFF, and at least
3920 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
3921 Chains the new buffer before the buffer pointed to by BUFF, and
3922 updates the pointer to point to the new buffer. */
3924 _cpp_extend_buff (cpp_reader
*pfile
, _cpp_buff
**pbuff
, size_t min_extra
)
3926 _cpp_buff
*new_buff
, *old_buff
= *pbuff
;
3927 size_t size
= EXTENDED_BUFF_SIZE (old_buff
, min_extra
);
3929 new_buff
= _cpp_get_buff (pfile
, size
);
3930 memcpy (new_buff
->base
, old_buff
->cur
, BUFF_ROOM (old_buff
));
3931 new_buff
->next
= old_buff
;
3935 /* Free a chain of buffers starting at BUFF. */
3937 _cpp_free_buff (_cpp_buff
*buff
)
3941 for (; buff
; buff
= next
)
3944 #ifdef ENABLE_VALGRIND_ANNOTATIONS
3952 /* Allocate permanent, unaligned storage of length LEN. */
3954 _cpp_unaligned_alloc (cpp_reader
*pfile
, size_t len
)
3956 _cpp_buff
*buff
= pfile
->u_buff
;
3957 unsigned char *result
= buff
->cur
;
3959 if (len
> (size_t) (buff
->limit
- result
))
3961 buff
= _cpp_get_buff (pfile
, len
);
3962 buff
->next
= pfile
->u_buff
;
3963 pfile
->u_buff
= buff
;
3967 buff
->cur
= result
+ len
;
3971 /* Allocate permanent, unaligned storage of length LEN from a_buff.
3972 That buffer is used for growing allocations when saving macro
3973 replacement lists in a #define, and when parsing an answer to an
3974 assertion in #assert, #unassert or #if (and therefore possibly
3975 whilst expanding macros). It therefore must not be used by any
3976 code that they might call: specifically the lexer and the guts of
3979 All existing other uses clearly fit this restriction: storing
3980 registered pragmas during initialization. */
3982 _cpp_aligned_alloc (cpp_reader
*pfile
, size_t len
)
3984 _cpp_buff
*buff
= pfile
->a_buff
;
3985 unsigned char *result
= buff
->cur
;
3987 if (len
> (size_t) (buff
->limit
- result
))
3989 buff
= _cpp_get_buff (pfile
, len
);
3990 buff
->next
= pfile
->a_buff
;
3991 pfile
->a_buff
= buff
;
3995 buff
->cur
= result
+ len
;
3999 /* Commit or allocate storage from a buffer. */
4002 _cpp_commit_buff (cpp_reader
*pfile
, size_t size
)
4004 void *ptr
= BUFF_FRONT (pfile
->a_buff
);
4006 if (pfile
->hash_table
->alloc_subobject
)
4008 void *copy
= pfile
->hash_table
->alloc_subobject (size
);
4009 memcpy (copy
, ptr
, size
);
4013 BUFF_FRONT (pfile
->a_buff
) += size
;
4018 /* Say which field of TOK is in use. */
4020 enum cpp_token_fld_kind
4021 cpp_token_val_index (const cpp_token
*tok
)
4023 switch (TOKEN_SPELL (tok
))
4026 return CPP_TOKEN_FLD_NODE
;
4028 return CPP_TOKEN_FLD_STR
;
4029 case SPELL_OPERATOR
:
4030 /* Operands which were originally spelled as ident keep around
4031 the node for the exact spelling. */
4032 if (tok
->flags
& NAMED_OP
)
4033 return CPP_TOKEN_FLD_NODE
;
4034 else if (tok
->type
== CPP_PASTE
)
4035 return CPP_TOKEN_FLD_TOKEN_NO
;
4037 return CPP_TOKEN_FLD_NONE
;
4039 if (tok
->type
== CPP_MACRO_ARG
)
4040 return CPP_TOKEN_FLD_ARG_NO
;
4041 else if (tok
->type
== CPP_PADDING
)
4042 return CPP_TOKEN_FLD_SOURCE
;
4043 else if (tok
->type
== CPP_PRAGMA
)
4044 return CPP_TOKEN_FLD_PRAGMA
;
4047 return CPP_TOKEN_FLD_NONE
;
4051 /* All tokens lexed in R after calling this function will be forced to
4052 have their location_t to be P, until
4053 cpp_stop_forcing_token_locations is called for R. */
4056 cpp_force_token_locations (cpp_reader
*r
, location_t loc
)
4058 r
->forced_token_location
= loc
;
4061 /* Go back to assigning locations naturally for lexed tokens. */
4064 cpp_stop_forcing_token_locations (cpp_reader
*r
)
4066 r
->forced_token_location
= 0;
4069 /* We're looking at \, if it's escaping EOL, look past it. If at
4070 LIMIT, don't advance. */
4072 static const unsigned char *
4073 do_peek_backslash (const unsigned char *peek
, const unsigned char *limit
)
4075 const unsigned char *probe
= peek
;
4077 if (__builtin_expect (peek
[1] == '\n', true))
4081 if (__builtin_expect (probe
< limit
, true))
4085 /* The user might be perverse. */
4086 return do_peek_backslash (peek
, limit
);
4089 else if (__builtin_expect (peek
[1] == '\r', false))
4091 if (probe
[2] == '\n')
4099 static const unsigned char *
4100 do_peek_next (const unsigned char *peek
, const unsigned char *limit
)
4102 if (__builtin_expect (*peek
== '\\', false))
4103 peek
= do_peek_backslash (peek
, limit
);
4107 static const unsigned char *
4108 do_peek_prev (const unsigned char *peek
, const unsigned char *bound
)
4113 unsigned char c
= *--peek
;
4114 if (__builtin_expect (c
== '\n', false)
4115 || __builtin_expect (c
== 'r', false))
4120 if (c
== '\n' && peek
[ix
] == '\r')
4122 if (peek
+ ix
== bound
)
4127 if (peek
[ix
] == '\\')
4128 return do_peek_prev (peek
+ ix
, bound
);
4136 /* If PEEK[-1] is identifier MATCH, scan past it and trailing white
4137 space. Otherwise return NULL. */
4139 static const unsigned char *
4140 do_peek_ident (const char *match
, const unsigned char *peek
,
4141 const unsigned char *limit
)
4143 for (; *++match
; peek
++)
4144 if (*peek
!= *match
)
4146 peek
= do_peek_next (peek
, limit
);
4147 if (*peek
!= *match
)
4151 /* Must now not be looking at an identifier char. */
4152 peek
= do_peek_next (peek
, limit
);
4153 if (ISIDNUM (*peek
))
4156 /* Skip control-line whitespace. */
4158 while (*peek
== ' ' || *peek
== '\t')
4160 if (__builtin_expect (*peek
== '\\', false))
4162 peek
= do_peek_backslash (peek
, limit
);
4170 /* Are we looking at a module control line starting as PEEK - 1? */
4173 do_peek_module (cpp_reader
*pfile
, unsigned char c
,
4174 const unsigned char *peek
, const unsigned char *limit
)
4176 bool import
= false;
4178 if (__builtin_expect (c
== 'e', false))
4180 if (!((peek
[0] == 'x' || peek
[0] == '\\')
4181 && (peek
= do_peek_ident ("export", peek
, limit
))))
4184 /* export, peek for import or module. No need to peek __import
4188 if (!((peek
[1] == 'm' || peek
[1] == '\\')
4189 && (peek
= do_peek_ident ("import", peek
+ 1, limit
))))
4193 else if (peek
[0] == 'm')
4195 if (!((peek
[1] == 'o' || peek
[1] == '\\')
4196 && (peek
= do_peek_ident ("module", peek
+ 1, limit
))))
4202 else if (__builtin_expect (c
== 'i', false))
4204 if (!((peek
[0] == 'm' || peek
[0] == '\\')
4205 && (peek
= do_peek_ident ("import", peek
, limit
))))
4209 else if (__builtin_expect (c
== '_', false))
4211 /* Needed for translated includes. */
4212 if (!((peek
[0] == '_' || peek
[0] == '\\')
4213 && (peek
= do_peek_ident ("__import", peek
, limit
))))
4217 else if (__builtin_expect (c
== 'm', false))
4219 if (!((peek
[0] == 'o' || peek
[0] == '\\')
4220 && (peek
= do_peek_ident ("module", peek
, limit
))))
4226 /* Peek the next character to see if it's good enough. We'll be at
4227 the first non-whitespace char, including skipping an escaped
4229 /* ... import followed by identifier, ':', '<' or header-name
4230 preprocessing tokens, or module followed by identifier, ':' or
4231 ';' preprocessing tokens. */
4232 unsigned char p
= *peek
++;
4234 /* A character literal is ... single quotes, ... optionally preceded
4235 by u8, u, U, or L */
4236 /* A string-literal is a ... double quotes, optionally prefixed by
4237 R, u8, u8R, u, uR, U, UR, L, or LR */
4240 peek
= do_peek_next (peek
, limit
);
4248 else if (p
== 'U' || p
== 'L')
4251 peek
= do_peek_next (peek
, limit
);
4253 if (*peek
== '\"' || *peek
== '\'')
4258 /* Identifier. Ok. */
4263 if (CPP_OPTION (pfile
, rliterals
))
4265 peek
= do_peek_next (peek
, limit
);
4269 /* Identifier. Ok. */
4271 else if ('Z' - 'A' == 25
4272 ? ((p
>= 'A' && p
<= 'Z') || (p
>= 'a' && p
<= 'z') || p
== '_')
4275 /* Identifier. Ok. */
4279 /* Maybe angle header, ok for import. Reject
4280 '<=', '<<' digraph:'<:'. */
4283 peek
= do_peek_next (peek
, limit
);
4284 if (*peek
== '=' || *peek
== '<'
4285 || (*peek
== ':' && CPP_OPTION (pfile
, digraphs
)))
4290 /* SEMICOLON, ok for module. */
4296 /* STRING, ok for import. */
4302 /* Maybe COLON, ok. Reject '::', digraph:':>'. */
4303 peek
= do_peek_next (peek
, limit
);
4304 if (*peek
== ':' || (*peek
== '>' && CPP_OPTION (pfile
, digraphs
)))
4308 /* FIXME: Detect a unicode character, excluding those not
4309 permitted as the initial character. [lex.name]/1. I presume
4310 we need to check the \[uU] spellings, and directly using
4311 Unicode in say UTF8 form? Or perhaps we do the phase-1
4312 conversion of UTF8 to universal-character-names? */
4318 /* Directives-only scanning. Somewhat more relaxed than correct
4319 parsing -- some ill-formed programs will not be rejected. */
4322 cpp_directive_only_process (cpp_reader
*pfile
,
4324 void (*cb
) (cpp_reader
*, CPP_DO_task
, void *, ...))
4326 bool module_p
= CPP_OPTION (pfile
, module_directives
);
4331 /* Buffer initialization, but no line cleaning. */
4332 cpp_buffer
*buffer
= pfile
->buffer
;
4333 buffer
->cur_note
= buffer
->notes_used
= 0;
4334 buffer
->cur
= buffer
->line_base
= buffer
->next_line
;
4335 buffer
->need_line
= false;
4336 /* Files always end in a newline or carriage return. We rely on this for
4337 character peeking safety. */
4338 gcc_assert (buffer
->rlimit
[0] == '\n' || buffer
->rlimit
[0] == '\r');
4340 const unsigned char *base
= buffer
->cur
;
4341 unsigned line_count
= 0;
4342 const unsigned char *line_start
= base
;
4347 const unsigned char *lwm
= base
;
4348 for (const unsigned char *pos
= base
, *limit
= buffer
->rlimit
;
4351 unsigned char c
= *pos
++;
4352 /* This matches the switch in _cpp_lex_direct. */
4355 case ' ': case '\t': case '\f': case '\v':
4356 /* Whitespace, do nothing. */
4359 case '\r': /* MAC line ending, or Windows \r\n */
4368 CPP_INCREMENT_LINE (pfile
, 0);
4374 /* <backslash><newline> is removed, and doesn't undo any
4375 preceeding escape or whatnot. */
4381 else if (*pos
== '\r')
4393 /* Line directive. */
4394 if (pos
- 1 > base
&& !pfile
->state
.skipping
)
4395 cb (pfile
, CPP_DO_print
, data
,
4396 line_count
, base
, pos
- 1 - base
);
4398 /* Prep things for directive handling. */
4399 buffer
->next_line
= pos
;
4400 buffer
->need_line
= true;
4401 bool ok
= _cpp_get_fresh_line (pfile
);
4402 gcc_checking_assert (ok
);
4404 /* Ensure proper column numbering for generated
4406 buffer
->line_base
-= pos
- line_start
;
4408 _cpp_handle_directive (pfile
, line_start
+ 1 != pos
);
4410 /* Sanitize the line settings. Duplicate #include's can
4412 // FIXME: Necessary?
4413 pfile
->line_table
->highest_location
4414 = pfile
->line_table
->highest_line
;
4416 if (!pfile
->state
.skipping
4417 && pfile
->buffer
->next_line
< pfile
->buffer
->rlimit
)
4418 cb (pfile
, CPP_DO_location
, data
,
4419 pfile
->line_table
->highest_line
);
4427 const unsigned char *peek
= do_peek_next (pos
, limit
);
4428 if (!(*peek
== '/' || *peek
== '*'))
4431 /* Line or block comment */
4432 bool is_block
= *peek
== '*';
4436 = linemap_position_for_column (pfile
->line_table
,
4455 CPP_INCREMENT_LINE (pfile
, 0);
4458 if (!esc
&& !is_block
)
4470 if (pos
> peek
&& !esc
)
4486 if (pos
< limit
|| is_block
)
4487 cpp_error_with_line (pfile
, CPP_DL_ERROR
, sloc
, 0,
4488 "unterminated comment");
4495 if (!CPP_OPTION (pfile
, digit_separators
))
4496 goto delimited_string
;
4498 /* Possibly a number punctuator. */
4499 if (!ISIDNUM (*do_peek_next (pos
, limit
)))
4500 goto delimited_string
;
4505 if (!CPP_OPTION (pfile
, rliterals
))
4506 goto delimited_string
;
4510 /* For ' see if it's a number punctuator
4511 \.?<digit>(<digit>|<identifier-nondigit>
4512 |'<digit>|'<nondigit>|[eEpP]<sign>|\.)* */
4513 /* For " see if it's a raw string
4514 {U,L,u,u8}R. This includes CPP_NUMBER detection,
4515 because that could be 0e+R. */
4516 const unsigned char *peek
= pos
- 1;
4517 bool quote_first
= c
== '"';
4518 bool quote_eight
= false;
4519 bool maybe_number_start
= false;
4520 bool want_number
= false;
4522 while ((peek
= do_peek_prev (peek
, lwm
)))
4524 unsigned char p
= *peek
;
4535 quote_first
= false;
4536 if (p
== 'L' || p
== 'U' || p
== 'u')
4543 else if (quote_eight
)
4550 quote_eight
= false;
4555 if (!want_number
&& ISIDNUM (p
))
4563 maybe_number_start
= true;
4566 else if (ISIDNUM (p
))
4567 maybe_number_start
= false;
4568 else if (p
== '+' || p
== '-')
4570 if (const unsigned char *peek_prev
4571 = do_peek_prev (peek
, lwm
))
4574 if (p
== 'e' || p
== 'E'
4575 || p
== 'p' || p
== 'P')
4578 maybe_number_start
= false;
4586 else if (p
== '\'' || p
== '\"')
4588 /* If this is lwm, this must be the end of a
4589 previous string. So this is a trailing
4590 literal type, (a) if those are allowed,
4591 and (b) maybe_start is false. Otherwise
4592 this must be a CPP_NUMBER because we've
4593 met another ', and we'd have checked that
4594 in its own right. */
4595 if (peek
== lwm
&& CPP_OPTION (pfile
, uliterals
))
4597 if (!maybe_number_start
&& !want_number
)
4598 /* Must be a literal type. */
4602 && CPP_OPTION (pfile
, digit_separators
))
4603 maybe_number_start
= true;
4608 else if (!quote_first
&& !quote_eight
)
4612 if (maybe_number_start
)
4620 goto delimited_string
;
4625 /* (Possibly raw) string or char literal. */
4626 unsigned char end
= c
;
4628 const unsigned char *delim
= NULL
;
4629 location_t sloc
= linemap_position_for_column (pfile
->line_table
,
4635 /* There can be no line breaks in the delimiter. */
4637 for (delim_len
= 0; (c
= *pos
++) != '('; delim_len
++)
4639 if (delim_len
== 16)
4641 cpp_error_with_line (pfile
, CPP_DL_ERROR
,
4643 "raw string delimiter"
4651 if (strchr (") \\\t\v\f\n", c
))
4653 cpp_error_with_line (pfile
, CPP_DL_ERROR
,
4655 "invalid character '%c'"
4684 CPP_INCREMENT_LINE (pfile
, 0);
4694 && pos
+ delim_len
+ 1 < limit
4695 && pos
[delim_len
] == end
4696 && !memcmp (delim
, pos
, delim_len
))
4698 pos
+= delim_len
+ 1;
4705 if (!raw
&& !(esc
& 1) && c
== end
)
4712 cpp_error_with_line (pfile
, CPP_DL_ERROR
, sloc
, 0,
4713 "unterminated literal");
4725 if (bol
&& module_p
&& !pfile
->state
.skipping
4726 && do_peek_module (pfile
, c
, pos
, limit
))
4728 /* We've seen the start of a module control line.
4729 Start up the tokenizer. */
4730 pos
--; /* Backup over the first character. */
4732 /* Backup over whitespace to start of line. */
4733 while (pos
> line_start
4734 && (pos
[-1] == ' ' || pos
[-1] == '\t'))
4738 cb (pfile
, CPP_DO_print
, data
, line_count
, base
, pos
- base
);
4740 /* Prep things for directive handling. */
4741 buffer
->next_line
= pos
;
4742 buffer
->need_line
= true;
4744 /* Now get tokens until the PRAGMA_EOL. */
4747 location_t spelling
;
4748 const cpp_token
*tok
4749 = cpp_get_token_with_location (pfile
, &spelling
);
4751 gcc_assert (pfile
->state
.in_deferred_pragma
4752 || tok
->type
== CPP_PRAGMA_EOL
);
4753 cb (pfile
, CPP_DO_token
, data
, tok
, spelling
);
4755 while (pfile
->state
.in_deferred_pragma
);
4757 if (pfile
->buffer
->next_line
< pfile
->buffer
->rlimit
)
4758 cb (pfile
, CPP_DO_location
, data
,
4759 pfile
->line_table
->highest_line
);
4761 pfile
->mi_valid
= false;
4769 pfile
->mi_valid
= false;
4774 if (buffer
->rlimit
> base
&& !pfile
->state
.skipping
)
4776 const unsigned char *limit
= buffer
->rlimit
;
4777 /* If the file was not newline terminated, add rlimit, which is
4778 guaranteed to point to a newline, to the end of our range. */
4779 if (limit
[-1] != '\n')
4782 CPP_INCREMENT_LINE (pfile
, 0);
4785 cb (pfile
, CPP_DO_print
, data
, line_count
, base
, limit
- base
);
4788 _cpp_pop_buffer (pfile
);
4790 while (pfile
->buffer
);