1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000-2021 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
37 enum spell_type category
;
38 const unsigned char *name
;
41 static const unsigned char *const digraph_spellings
[] =
42 { UC
"%:", UC
"%:%:", UC
"<:", UC
":>", UC
"<%", UC
"%>" };
44 #define OP(e, s) { SPELL_OPERATOR, UC s },
45 #define TK(e, s) { SPELL_ ## s, UC #e },
46 static const struct token_spelling token_spellings
[N_TTYPES
] = { TTYPE_TABLE
};
50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
53 static void add_line_note (cpp_buffer
*, const uchar
*, unsigned int);
54 static int skip_line_comment (cpp_reader
*);
55 static void skip_whitespace (cpp_reader
*, cppchar_t
);
56 static void lex_string (cpp_reader
*, cpp_token
*, const uchar
*);
57 static void save_comment (cpp_reader
*, cpp_token
*, const uchar
*, cppchar_t
);
58 static void store_comment (cpp_reader
*, cpp_token
*);
59 static void create_literal (cpp_reader
*, cpp_token
*, const uchar
*,
60 unsigned int, enum cpp_ttype
);
61 static bool warn_in_comment (cpp_reader
*, _cpp_line_note
*);
62 static int name_p (cpp_reader
*, const cpp_string
*);
63 static tokenrun
*next_tokenrun (tokenrun
*);
65 static _cpp_buff
*new_buff (size_t);
70 Compares, the token TOKEN to the NUL-terminated string STRING.
71 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
73 cpp_ideq (const cpp_token
*token
, const char *string
)
75 if (token
->type
!= CPP_NAME
)
78 return !ustrcmp (NODE_NAME (token
->val
.node
.node
), (const uchar
*) string
);
81 /* Record a note TYPE at byte POS into the current cleaned logical
84 add_line_note (cpp_buffer
*buffer
, const uchar
*pos
, unsigned int type
)
86 if (buffer
->notes_used
== buffer
->notes_cap
)
88 buffer
->notes_cap
= buffer
->notes_cap
* 2 + 200;
89 buffer
->notes
= XRESIZEVEC (_cpp_line_note
, buffer
->notes
,
93 buffer
->notes
[buffer
->notes_used
].pos
= pos
;
94 buffer
->notes
[buffer
->notes_used
].type
= type
;
99 /* Fast path to find line special characters using optimized character
100 scanning algorithms. Anything complicated falls back to the slow
101 path below. Since this loop is very hot it's worth doing these kinds
104 One of the paths through the ifdefs should provide
106 const uchar *search_line_fast (const uchar *s, const uchar *end);
108 Between S and END, search for \n, \r, \\, ?. Return a pointer to
111 Note that the last character of the buffer is *always* a newline,
112 as forced by _cpp_convert_input. This fact can be used to avoid
113 explicitly looking for the end of the buffer. */
115 /* Configure gives us an ifdef test. */
116 #ifndef WORDS_BIGENDIAN
117 #define WORDS_BIGENDIAN 0
120 /* We'd like the largest integer that fits into a register. There's nothing
121 in <stdint.h> that gives us that. For most hosts this is unsigned long,
122 but MS decided on an LLP64 model. Thankfully when building with GCC we
123 can get the "real" word size. */
125 typedef unsigned int word_type
__attribute__((__mode__(__word__
)));
127 typedef unsigned long word_type
;
130 /* The code below is only expecting sizes 4 or 8.
131 Die at compile-time if this expectation is violated. */
132 typedef char check_word_type_size
133 [(sizeof(word_type
) == 8 || sizeof(word_type
) == 4) * 2 - 1];
135 /* Return X with the first N bytes forced to values that won't match one
136 of the interesting characters. Note that NUL is not interesting. */
138 static inline word_type
139 acc_char_mask_misalign (word_type val
, unsigned int n
)
149 /* Return X replicated to all byte positions within WORD_TYPE. */
151 static inline word_type
152 acc_char_replicate (uchar x
)
156 ret
= (x
<< 24) | (x
<< 16) | (x
<< 8) | x
;
157 if (sizeof(word_type
) == 8)
158 ret
= (ret
<< 16 << 16) | ret
;
162 /* Return non-zero if some byte of VAL is (probably) C. */
164 static inline word_type
165 acc_char_cmp (word_type val
, word_type c
)
167 #if defined(__GNUC__) && defined(__alpha__)
168 /* We can get exact results using a compare-bytes instruction.
169 Get (val == c) via (0 >= (val ^ c)). */
170 return __builtin_alpha_cmpbge (0, val
^ c
);
172 word_type magic
= 0x7efefefeU
;
173 if (sizeof(word_type
) == 8)
174 magic
= (magic
<< 16 << 16) | 0xfefefefeU
;
178 return ((val
+ magic
) ^ ~val
) & ~magic
;
182 /* Given the result of acc_char_cmp is non-zero, return the index of
183 the found character. If this was a false positive, return -1. */
186 acc_char_index (word_type cmp ATTRIBUTE_UNUSED
,
187 word_type val ATTRIBUTE_UNUSED
)
189 #if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN
190 /* The cmpbge instruction sets *bits* of the result corresponding to
191 matches in the bytes with no false positives. */
192 return __builtin_ctzl (cmp
);
196 /* ??? It would be nice to force unrolling here,
197 and have all of these constants folded. */
198 for (i
= 0; i
< sizeof(word_type
); ++i
)
202 c
= (val
>> (sizeof(word_type
) - i
- 1) * 8) & 0xff;
204 c
= (val
>> i
* 8) & 0xff;
206 if (c
== '\n' || c
== '\r' || c
== '\\' || c
== '?')
214 /* A version of the fast scanner using bit fiddling techniques.
216 For 32-bit words, one would normally perform 16 comparisons and
217 16 branches. With this algorithm one performs 24 arithmetic
218 operations and one branch. Whether this is faster with a 32-bit
219 word size is going to be somewhat system dependent.
221 For 64-bit words, we eliminate twice the number of comparisons
222 and branches without increasing the number of arithmetic operations.
223 It's almost certainly going to be a win with 64-bit word size. */
225 static const uchar
* search_line_acc_char (const uchar
*, const uchar
*)
229 search_line_acc_char (const uchar
*s
, const uchar
*end ATTRIBUTE_UNUSED
)
231 const word_type repl_nl
= acc_char_replicate ('\n');
232 const word_type repl_cr
= acc_char_replicate ('\r');
233 const word_type repl_bs
= acc_char_replicate ('\\');
234 const word_type repl_qm
= acc_char_replicate ('?');
236 unsigned int misalign
;
240 /* Align the buffer. Mask out any bytes from before the beginning. */
241 p
= (word_type
*)((uintptr_t)s
& -sizeof(word_type
));
243 misalign
= (uintptr_t)s
& (sizeof(word_type
) - 1);
245 val
= acc_char_mask_misalign (val
, misalign
);
250 t
= acc_char_cmp (val
, repl_nl
);
251 t
|= acc_char_cmp (val
, repl_cr
);
252 t
|= acc_char_cmp (val
, repl_bs
);
253 t
|= acc_char_cmp (val
, repl_qm
);
255 if (__builtin_expect (t
!= 0, 0))
257 int i
= acc_char_index (t
, val
);
259 return (const uchar
*)p
+ i
;
266 /* Disable on Solaris 2/x86 until the following problem can be properly
269 The Solaris 10+ assembler tags objects with the instruction set
270 extensions used, so SSE4.2 executables cannot run on machines that
271 don't support that extension. */
273 #if (GCC_VERSION >= 4005) && (__GNUC__ >= 5 || !defined(__PIC__)) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
275 /* Replicated character data to be shared between implementations.
276 Recall that outside of a context with vector support we can't
277 define compatible vector types, therefore these are all defined
278 in terms of raw characters. */
279 static const char repl_chars
[4][16] __attribute__((aligned(16))) = {
280 { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
281 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' },
282 { '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
283 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' },
284 { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
285 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' },
286 { '?', '?', '?', '?', '?', '?', '?', '?',
287 '?', '?', '?', '?', '?', '?', '?', '?' },
290 /* A version of the fast scanner using MMX vectorized byte compare insns.
292 This uses the PMOVMSKB instruction which was introduced with "MMX2",
293 which was packaged into SSE1; it is also present in the AMD MMX
294 extension. Mark the function as using "sse" so that we emit a real
295 "emms" instruction, rather than the 3dNOW "femms" instruction. */
299 __attribute__((__target__("sse")))
301 search_line_mmx (const uchar
*s
, const uchar
*end ATTRIBUTE_UNUSED
)
303 typedef char v8qi
__attribute__ ((__vector_size__ (8)));
304 typedef int __m64
__attribute__ ((__vector_size__ (8), __may_alias__
));
306 const v8qi repl_nl
= *(const v8qi
*)repl_chars
[0];
307 const v8qi repl_cr
= *(const v8qi
*)repl_chars
[1];
308 const v8qi repl_bs
= *(const v8qi
*)repl_chars
[2];
309 const v8qi repl_qm
= *(const v8qi
*)repl_chars
[3];
311 unsigned int misalign
, found
, mask
;
315 /* Align the source pointer. While MMX doesn't generate unaligned data
316 faults, this allows us to safely scan to the end of the buffer without
317 reading beyond the end of the last page. */
318 misalign
= (uintptr_t)s
& 7;
319 p
= (const v8qi
*)((uintptr_t)s
& -8);
322 /* Create a mask for the bytes that are valid within the first
323 16-byte block. The Idea here is that the AND with the mask
324 within the loop is "free", since we need some AND or TEST
325 insn in order to set the flags for the branch anyway. */
326 mask
= -1u << misalign
;
328 /* Main loop processing 8 bytes at a time. */
336 t
= __builtin_ia32_pcmpeqb(data
, repl_nl
);
337 c
= __builtin_ia32_pcmpeqb(data
, repl_cr
);
338 t
= (v8qi
) __builtin_ia32_por ((__m64
)t
, (__m64
)c
);
339 c
= __builtin_ia32_pcmpeqb(data
, repl_bs
);
340 t
= (v8qi
) __builtin_ia32_por ((__m64
)t
, (__m64
)c
);
341 c
= __builtin_ia32_pcmpeqb(data
, repl_qm
);
342 t
= (v8qi
) __builtin_ia32_por ((__m64
)t
, (__m64
)c
);
343 found
= __builtin_ia32_pmovmskb (t
);
348 __builtin_ia32_emms ();
350 /* FOUND contains 1 in bits for which we matched a relevant
351 character. Conversion to the byte index is trivial. */
352 found
= __builtin_ctz(found
);
353 return (const uchar
*)p
+ found
;
356 /* A version of the fast scanner using SSE2 vectorized byte compare insns. */
360 __attribute__((__target__("sse2")))
362 search_line_sse2 (const uchar
*s
, const uchar
*end ATTRIBUTE_UNUSED
)
364 typedef char v16qi
__attribute__ ((__vector_size__ (16)));
366 const v16qi repl_nl
= *(const v16qi
*)repl_chars
[0];
367 const v16qi repl_cr
= *(const v16qi
*)repl_chars
[1];
368 const v16qi repl_bs
= *(const v16qi
*)repl_chars
[2];
369 const v16qi repl_qm
= *(const v16qi
*)repl_chars
[3];
371 unsigned int misalign
, found
, mask
;
375 /* Align the source pointer. */
376 misalign
= (uintptr_t)s
& 15;
377 p
= (const v16qi
*)((uintptr_t)s
& -16);
380 /* Create a mask for the bytes that are valid within the first
381 16-byte block. The Idea here is that the AND with the mask
382 within the loop is "free", since we need some AND or TEST
383 insn in order to set the flags for the branch anyway. */
384 mask
= -1u << misalign
;
386 /* Main loop processing 16 bytes at a time. */
395 t
|= data
== repl_cr
;
396 t
|= data
== repl_bs
;
397 t
|= data
== repl_qm
;
398 found
= __builtin_ia32_pmovmskb128 (t
);
403 /* FOUND contains 1 in bits for which we matched a relevant
404 character. Conversion to the byte index is trivial. */
405 found
= __builtin_ctz(found
);
406 return (const uchar
*)p
+ found
;
410 /* A version of the fast scanner using SSE 4.2 vectorized string insns. */
414 __attribute__((__target__("sse4.2")))
416 search_line_sse42 (const uchar
*s
, const uchar
*end
)
418 typedef char v16qi
__attribute__ ((__vector_size__ (16)));
419 static const v16qi search
= { '\n', '\r', '?', '\\' };
421 uintptr_t si
= (uintptr_t)s
;
424 /* Check for unaligned input. */
429 if (__builtin_expect (end
- s
< 16, 0)
430 && __builtin_expect ((si
& 0xfff) > 0xff0, 0))
432 /* There are less than 16 bytes left in the buffer, and less
433 than 16 bytes left on the page. Reading 16 bytes at this
434 point might generate a spurious page fault. Defer to the
435 SSE2 implementation, which already handles alignment. */
436 return search_line_sse2 (s
, end
);
439 /* ??? The builtin doesn't understand that the PCMPESTRI read from
440 memory need not be aligned. */
441 sv
= __builtin_ia32_loaddqu ((const char *) s
);
442 index
= __builtin_ia32_pcmpestri128 (search
, 4, sv
, 16, 0);
444 if (__builtin_expect (index
< 16, 0))
447 /* Advance the pointer to an aligned address. We will re-scan a
448 few bytes, but we no longer need care for reading past the
449 end of a page, since we're guaranteed a match. */
450 s
= (const uchar
*)((si
+ 15) & -16);
453 /* Main loop, processing 16 bytes at a time. */
454 #ifdef __GCC_ASM_FLAG_OUTPUTS__
459 /* By using inline assembly instead of the builtin,
460 we can use the result, as well as the flags set. */
461 __asm ("%vpcmpestri\t$0, %2, %3"
462 : "=c"(index
), "=@ccc"(f
)
463 : "m"(*s
), "x"(search
), "a"(4), "d"(16));
471 /* By doing the whole loop in inline assembly,
472 we can make proper use of the flags set. */
473 __asm ( ".balign 16\n"
475 " %vpcmpestri\t$0, (%1), %2\n"
477 : "=&c"(index
), "+r"(s
)
478 : "x"(search
), "a"(4), "d"(16));
486 /* Work around out-dated assemblers without sse4 support. */
487 #define search_line_sse42 search_line_sse2
490 /* Check the CPU capabilities. */
492 #include "../gcc/config/i386/cpuid.h"
494 typedef const uchar
* (*search_line_fast_type
) (const uchar
*, const uchar
*);
495 static search_line_fast_type search_line_fast
;
497 #define HAVE_init_vectorized_lexer 1
499 init_vectorized_lexer (void)
501 unsigned dummy
, ecx
= 0, edx
= 0;
502 search_line_fast_type impl
= search_line_acc_char
;
505 #if defined(__SSE4_2__)
507 #elif defined(__SSE2__)
509 #elif defined(__SSE__)
514 impl
= search_line_sse42
;
515 else if (__get_cpuid (1, &dummy
, &dummy
, &ecx
, &edx
) || minimum
== 2)
517 if (minimum
== 3 || (ecx
& bit_SSE4_2
))
518 impl
= search_line_sse42
;
519 else if (minimum
== 2 || (edx
& bit_SSE2
))
520 impl
= search_line_sse2
;
521 else if (minimum
== 1 || (edx
& bit_SSE
))
522 impl
= search_line_mmx
;
524 else if (__get_cpuid (0x80000001, &dummy
, &dummy
, &dummy
, &edx
))
527 || (edx
& (bit_MMXEXT
| bit_CMOV
)) == (bit_MMXEXT
| bit_CMOV
))
528 impl
= search_line_mmx
;
531 search_line_fast
= impl
;
534 #elif (GCC_VERSION >= 4005) && defined(_ARCH_PWR8) && defined(__ALTIVEC__)
536 /* A vection of the fast scanner using AltiVec vectorized byte compares
537 and VSX unaligned loads (when VSX is available). This is otherwise
538 the same as the AltiVec version. */
540 ATTRIBUTE_NO_SANITIZE_UNDEFINED
542 search_line_fast (const uchar
*s
, const uchar
*end ATTRIBUTE_UNUSED
)
544 typedef __attribute__((altivec(vector
))) unsigned char vc
;
547 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
548 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
551 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
552 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
555 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
556 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
559 '?', '?', '?', '?', '?', '?', '?', '?',
560 '?', '?', '?', '?', '?', '?', '?', '?',
562 const vc zero
= { 0 };
566 /* Main loop processing 16 bytes at a time. */
569 vc m_nl
, m_cr
, m_bs
, m_qm
;
571 data
= __builtin_vec_vsx_ld (0, s
);
574 m_nl
= (vc
) __builtin_vec_cmpeq(data
, repl_nl
);
575 m_cr
= (vc
) __builtin_vec_cmpeq(data
, repl_cr
);
576 m_bs
= (vc
) __builtin_vec_cmpeq(data
, repl_bs
);
577 m_qm
= (vc
) __builtin_vec_cmpeq(data
, repl_qm
);
578 t
= (m_nl
| m_cr
) | (m_bs
| m_qm
);
580 /* T now contains 0xff in bytes for which we matched one of the relevant
581 characters. We want to exit the loop if any byte in T is non-zero.
582 Below is the expansion of vec_any_ne(t, zero). */
584 while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t
, zero
));
586 /* Restore s to to point to the 16 bytes we just processed. */
590 #define N (sizeof(vc) / sizeof(long))
594 /* Statically assert that N is 2 or 4. */
595 unsigned long l
[(N
== 2 || N
== 4) ? N
: -1];
597 unsigned long l
, i
= 0;
601 /* Find the first word of T that is non-zero. */
608 s
+= sizeof(unsigned long);
612 s
+= sizeof(unsigned long);
618 s
+= sizeof(unsigned long);
622 /* L now contains 0xff in bytes for which we matched one of the
623 relevant characters. We can find the byte index by finding
624 its bit index and dividing by 8. */
625 #ifdef __BIG_ENDIAN__
626 l
= __builtin_clzl(l
) >> 3;
628 l
= __builtin_ctzl(l
) >> 3;
636 #elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) && defined (__BIG_ENDIAN__)
638 /* A vection of the fast scanner using AltiVec vectorized byte compares.
639 This cannot be used for little endian because vec_lvsl/lvsr are
640 deprecated for little endian and the code won't work properly. */
641 /* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
642 so we can't compile this function without -maltivec on the command line
643 (or implied by some other switch). */
646 search_line_fast (const uchar
*s
, const uchar
*end ATTRIBUTE_UNUSED
)
648 typedef __attribute__((altivec(vector
))) unsigned char vc
;
651 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
652 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
655 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
656 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
659 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
660 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
663 '?', '?', '?', '?', '?', '?', '?', '?',
664 '?', '?', '?', '?', '?', '?', '?', '?',
667 -1, -1, -1, -1, -1, -1, -1, -1,
668 -1, -1, -1, -1, -1, -1, -1, -1,
670 const vc zero
= { 0 };
674 /* Altivec loads automatically mask addresses with -16. This lets us
675 issue the first load as early as possible. */
676 data
= __builtin_vec_ld(0, (const vc
*)s
);
678 /* Discard bytes before the beginning of the buffer. Do this by
679 beginning with all ones and shifting in zeros according to the
680 mis-alignment. The LVSR instruction pulls the exact shift we
681 want from the address. */
682 mask
= __builtin_vec_lvsr(0, s
);
683 mask
= __builtin_vec_perm(zero
, ones
, mask
);
686 /* While altivec loads mask addresses, we still need to align S so
687 that the offset we compute at the end is correct. */
688 s
= (const uchar
*)((uintptr_t)s
& -16);
690 /* Main loop processing 16 bytes at a time. */
694 vc m_nl
, m_cr
, m_bs
, m_qm
;
697 data
= __builtin_vec_ld(0, (const vc
*)s
);
700 m_nl
= (vc
) __builtin_vec_cmpeq(data
, repl_nl
);
701 m_cr
= (vc
) __builtin_vec_cmpeq(data
, repl_cr
);
702 m_bs
= (vc
) __builtin_vec_cmpeq(data
, repl_bs
);
703 m_qm
= (vc
) __builtin_vec_cmpeq(data
, repl_qm
);
704 t
= (m_nl
| m_cr
) | (m_bs
| m_qm
);
706 /* T now contains 0xff in bytes for which we matched one of the relevant
707 characters. We want to exit the loop if any byte in T is non-zero.
708 Below is the expansion of vec_any_ne(t, zero). */
710 while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t
, zero
));
713 #define N (sizeof(vc) / sizeof(long))
717 /* Statically assert that N is 2 or 4. */
718 unsigned long l
[(N
== 2 || N
== 4) ? N
: -1];
720 unsigned long l
, i
= 0;
724 /* Find the first word of T that is non-zero. */
731 s
+= sizeof(unsigned long);
735 s
+= sizeof(unsigned long);
741 s
+= sizeof(unsigned long);
745 /* L now contains 0xff in bytes for which we matched one of the
746 relevant characters. We can find the byte index by finding
747 its bit index and dividing by 8. */
748 l
= __builtin_clzl(l
) >> 3;
755 #elif defined (__ARM_NEON) && defined (__ARM_64BIT_STATE)
756 #include "arm_neon.h"
758 /* This doesn't have to be the exact page size, but no system may use
759 a size smaller than this. ARMv8 requires a minimum page size of
760 4k. The impact of being conservative here is a small number of
761 cases will take the slightly slower entry path into the main
764 #define AARCH64_MIN_PAGE_SIZE 4096
767 search_line_fast (const uchar
*s
, const uchar
*end ATTRIBUTE_UNUSED
)
769 const uint8x16_t repl_nl
= vdupq_n_u8 ('\n');
770 const uint8x16_t repl_cr
= vdupq_n_u8 ('\r');
771 const uint8x16_t repl_bs
= vdupq_n_u8 ('\\');
772 const uint8x16_t repl_qm
= vdupq_n_u8 ('?');
773 const uint8x16_t xmask
= (uint8x16_t
) vdupq_n_u64 (0x8040201008040201ULL
);
775 #ifdef __ARM_BIG_ENDIAN
776 const int16x8_t shift
= {8, 8, 8, 8, 0, 0, 0, 0};
778 const int16x8_t shift
= {0, 0, 0, 0, 8, 8, 8, 8};
788 /* Align the source pointer. */
789 p
= (const uint8_t *)((uintptr_t)s
& -16);
791 /* Assuming random string start positions, with a 4k page size we'll take
792 the slow path about 0.37% of the time. */
793 if (__builtin_expect ((AARCH64_MIN_PAGE_SIZE
794 - (((uintptr_t) s
) & (AARCH64_MIN_PAGE_SIZE
- 1)))
797 /* Slow path: the string starts near a possible page boundary. */
798 uint32_t misalign
, mask
;
800 misalign
= (uintptr_t)s
& 15;
801 mask
= (-1u << misalign
) & 0xffff;
803 t
= vceqq_u8 (data
, repl_nl
);
804 u
= vceqq_u8 (data
, repl_cr
);
805 v
= vorrq_u8 (t
, vceqq_u8 (data
, repl_bs
));
806 w
= vorrq_u8 (u
, vceqq_u8 (data
, repl_qm
));
808 t
= vandq_u8 (t
, xmask
);
810 m
= vshlq_u16 (m
, shift
);
811 found
= vaddvq_u16 (m
);
814 return (const uchar
*)p
+ __builtin_ctz (found
);
818 data
= vld1q_u8 ((const uint8_t *) s
);
819 t
= vceqq_u8 (data
, repl_nl
);
820 u
= vceqq_u8 (data
, repl_cr
);
821 v
= vorrq_u8 (t
, vceqq_u8 (data
, repl_bs
));
822 w
= vorrq_u8 (u
, vceqq_u8 (data
, repl_qm
));
824 if (__builtin_expect (vpaddd_u64 ((uint64x2_t
)t
) != 0, 0))
832 t
= vceqq_u8 (data
, repl_nl
);
833 u
= vceqq_u8 (data
, repl_cr
);
834 v
= vorrq_u8 (t
, vceqq_u8 (data
, repl_bs
));
835 w
= vorrq_u8 (u
, vceqq_u8 (data
, repl_qm
));
837 } while (!vpaddd_u64 ((uint64x2_t
)t
));
840 /* Now that we've found the terminating substring, work out precisely where
842 t
= vandq_u8 (t
, xmask
);
844 m
= vshlq_u16 (m
, shift
);
845 found
= vaddvq_u16 (m
);
846 return (((((uintptr_t) p
) < (uintptr_t) s
) ? s
: (const uchar
*)p
)
847 + __builtin_ctz (found
));
850 #elif defined (__ARM_NEON)
851 #include "arm_neon.h"
854 search_line_fast (const uchar
*s
, const uchar
*end ATTRIBUTE_UNUSED
)
856 const uint8x16_t repl_nl
= vdupq_n_u8 ('\n');
857 const uint8x16_t repl_cr
= vdupq_n_u8 ('\r');
858 const uint8x16_t repl_bs
= vdupq_n_u8 ('\\');
859 const uint8x16_t repl_qm
= vdupq_n_u8 ('?');
860 const uint8x16_t xmask
= (uint8x16_t
) vdupq_n_u64 (0x8040201008040201ULL
);
862 unsigned int misalign
, found
, mask
;
866 /* Align the source pointer. */
867 misalign
= (uintptr_t)s
& 15;
868 p
= (const uint8_t *)((uintptr_t)s
& -16);
871 /* Create a mask for the bytes that are valid within the first
872 16-byte block. The Idea here is that the AND with the mask
873 within the loop is "free", since we need some AND or TEST
874 insn in order to set the flags for the branch anyway. */
875 mask
= (-1u << misalign
) & 0xffff;
877 /* Main loop, processing 16 bytes at a time. */
885 uint8x16_t t
, u
, v
, w
;
892 t
= vceqq_u8 (data
, repl_nl
);
893 u
= vceqq_u8 (data
, repl_cr
);
894 v
= vorrq_u8 (t
, vceqq_u8 (data
, repl_bs
));
895 w
= vorrq_u8 (u
, vceqq_u8 (data
, repl_qm
));
896 t
= vandq_u8 (vorrq_u8 (v
, w
), xmask
);
897 l
= vpadd_u8 (vget_low_u8 (t
), vget_high_u8 (t
));
901 found
= vget_lane_u32 ((uint32x2_t
) vorr_u64 ((uint64x1_t
) n
,
902 vshr_n_u64 ((uint64x1_t
) n
, 24)), 0);
907 /* FOUND contains 1 in bits for which we matched a relevant
908 character. Conversion to the byte index is trivial. */
909 found
= __builtin_ctz (found
);
910 return (const uchar
*)p
+ found
;
915 /* We only have one accelerated alternative. Use a direct call so that
916 we encourage inlining. */
918 #define search_line_fast search_line_acc_char
922 /* Initialize the lexer if needed. */
925 _cpp_init_lexer (void)
927 #ifdef HAVE_init_vectorized_lexer
928 init_vectorized_lexer ();
932 /* Returns with a logical line that contains no escaped newlines or
933 trigraphs. This is a time-critical inner loop. */
935 _cpp_clean_line (cpp_reader
*pfile
)
941 buffer
= pfile
->buffer
;
942 buffer
->cur_note
= buffer
->notes_used
= 0;
943 buffer
->cur
= buffer
->line_base
= buffer
->next_line
;
944 buffer
->need_line
= false;
945 s
= buffer
->next_line
;
947 if (!buffer
->from_stage3
)
949 const uchar
*pbackslash
= NULL
;
951 /* Fast path. This is the common case of an un-escaped line with
952 no trigraphs. The primary win here is by not writing any
953 data back to memory until we have to. */
956 /* Perform an optimized search for \n, \r, \\, ?. */
957 s
= search_line_fast (s
, buffer
->rlimit
);
962 /* Record the location of the backslash and continue. */
965 else if (__builtin_expect (c
== '?', 0))
967 if (__builtin_expect (s
[1] == '?', false)
968 && _cpp_trigraph_map
[s
[2]])
970 /* Have a trigraph. We may or may not have to convert
971 it. Add a line note regardless, for -Wtrigraphs. */
972 add_line_note (buffer
, s
, s
[2]);
973 if (CPP_OPTION (pfile
, trigraphs
))
975 /* We do, and that means we have to switch to the
978 *d
= _cpp_trigraph_map
[s
[2]];
983 /* Not a trigraph. Continue on fast-path. */
990 /* This must be \r or \n. We're either done, or we'll be forced
991 to write back to the buffer and continue on the slow path. */
994 if (__builtin_expect (s
== buffer
->rlimit
, false))
997 /* DOS line ending? */
998 if (__builtin_expect (c
== '\r', false) && s
[1] == '\n')
1001 if (s
== buffer
->rlimit
)
1005 if (__builtin_expect (pbackslash
== NULL
, true))
1008 /* Check for escaped newline. */
1010 while (is_nvspace (p
[-1]))
1012 if (p
- 1 != pbackslash
)
1015 /* Have an escaped newline; process it and proceed to
1017 add_line_note (buffer
, p
- 1, p
!= d
? ' ' : '\\');
1019 buffer
->next_line
= p
- 1;
1027 if (c
== '\n' || c
== '\r')
1029 /* Handle DOS line endings. */
1030 if (c
== '\r' && s
!= buffer
->rlimit
&& s
[1] == '\n')
1032 if (s
== buffer
->rlimit
)
1037 while (p
!= buffer
->next_line
&& is_nvspace (p
[-1]))
1039 if (p
== buffer
->next_line
|| p
[-1] != '\\')
1042 add_line_note (buffer
, p
- 1, p
!= d
? ' ': '\\');
1044 buffer
->next_line
= p
- 1;
1046 else if (c
== '?' && s
[1] == '?' && _cpp_trigraph_map
[s
[2]])
1048 /* Add a note regardless, for the benefit of -Wtrigraphs. */
1049 add_line_note (buffer
, d
, s
[2]);
1050 if (CPP_OPTION (pfile
, trigraphs
))
1052 *d
= _cpp_trigraph_map
[s
[2]];
1060 while (*s
!= '\n' && *s
!= '\r')
1064 /* Handle DOS line endings. */
1065 if (*s
== '\r' && s
+ 1 != buffer
->rlimit
&& s
[1] == '\n')
1071 /* A sentinel note that should never be processed. */
1072 add_line_note (buffer
, d
+ 1, '\n');
1073 buffer
->next_line
= s
+ 1;
1076 /* Return true if the trigraph indicated by NOTE should be warned
1077 about in a comment. */
1079 warn_in_comment (cpp_reader
*pfile
, _cpp_line_note
*note
)
1083 /* Within comments we don't warn about trigraphs, unless the
1084 trigraph forms an escaped newline, as that may change
1086 if (note
->type
!= '/')
1089 /* If -trigraphs, then this was an escaped newline iff the next note
1091 if (CPP_OPTION (pfile
, trigraphs
))
1092 return note
[1].pos
== note
->pos
;
1094 /* Otherwise, see if this forms an escaped newline. */
1096 while (is_nvspace (*p
))
1099 /* There might have been escaped newlines between the trigraph and the
1100 newline we found. Hence the position test. */
1101 return (*p
== '\n' && p
< note
[1].pos
);
1104 /* Process the notes created by add_line_note as far as the current
1107 _cpp_process_line_notes (cpp_reader
*pfile
, int in_comment
)
1109 cpp_buffer
*buffer
= pfile
->buffer
;
1113 _cpp_line_note
*note
= &buffer
->notes
[buffer
->cur_note
];
1116 if (note
->pos
> buffer
->cur
)
1120 col
= CPP_BUF_COLUMN (buffer
, note
->pos
+ 1);
1122 if (note
->type
== '\\' || note
->type
== ' ')
1124 if (note
->type
== ' ' && !in_comment
)
1125 cpp_error_with_line (pfile
, CPP_DL_WARNING
, pfile
->line_table
->highest_line
, col
,
1126 "backslash and newline separated by space");
1128 if (buffer
->next_line
> buffer
->rlimit
)
1130 cpp_error_with_line (pfile
, CPP_DL_PEDWARN
, pfile
->line_table
->highest_line
, col
,
1131 "backslash-newline at end of file");
1132 /* Prevent "no newline at end of file" warning. */
1133 buffer
->next_line
= buffer
->rlimit
;
1136 buffer
->line_base
= note
->pos
;
1137 CPP_INCREMENT_LINE (pfile
, 0);
1139 else if (_cpp_trigraph_map
[note
->type
])
1141 if (CPP_OPTION (pfile
, warn_trigraphs
)
1142 && (!in_comment
|| warn_in_comment (pfile
, note
)))
1144 if (CPP_OPTION (pfile
, trigraphs
))
1145 cpp_warning_with_line (pfile
, CPP_W_TRIGRAPHS
,
1146 pfile
->line_table
->highest_line
, col
,
1147 "trigraph ??%c converted to %c",
1149 (int) _cpp_trigraph_map
[note
->type
]);
1152 cpp_warning_with_line
1153 (pfile
, CPP_W_TRIGRAPHS
,
1154 pfile
->line_table
->highest_line
, col
,
1155 "trigraph ??%c ignored, use -trigraphs to enable",
1160 else if (note
->type
== 0)
1161 /* Already processed in lex_raw_string. */;
1167 /* Skip a C-style block comment. We find the end of the comment by
1168 seeing if an asterisk is before every '/' we encounter. Returns
1169 nonzero if comment terminated by EOF, zero otherwise.
1171 Buffer->cur points to the initial asterisk of the comment. */
1173 _cpp_skip_block_comment (cpp_reader
*pfile
)
1175 cpp_buffer
*buffer
= pfile
->buffer
;
1176 const uchar
*cur
= buffer
->cur
;
1185 /* People like decorating comments with '*', so check for '/'
1186 instead for efficiency. */
1194 /* Warn about potential nested comments, but not if the '/'
1195 comes immediately before the true comment delimiter.
1196 Don't bother to get it right across escaped newlines. */
1197 if (CPP_OPTION (pfile
, warn_comments
)
1198 && cur
[0] == '*' && cur
[1] != '/')
1201 cpp_warning_with_line (pfile
, CPP_W_COMMENTS
,
1202 pfile
->line_table
->highest_line
,
1203 CPP_BUF_COL (buffer
),
1204 "\"/*\" within comment");
1210 buffer
->cur
= cur
- 1;
1211 _cpp_process_line_notes (pfile
, true);
1212 if (buffer
->next_line
>= buffer
->rlimit
)
1214 _cpp_clean_line (pfile
);
1216 cols
= buffer
->next_line
- buffer
->line_base
;
1217 CPP_INCREMENT_LINE (pfile
, cols
);
1224 _cpp_process_line_notes (pfile
, true);
1228 /* Skip a C++ line comment, leaving buffer->cur pointing to the
1229 terminating newline. Handles escaped newlines. Returns nonzero
1230 if a multiline comment. */
1232 skip_line_comment (cpp_reader
*pfile
)
1234 cpp_buffer
*buffer
= pfile
->buffer
;
1235 location_t orig_line
= pfile
->line_table
->highest_line
;
1237 while (*buffer
->cur
!= '\n')
1240 _cpp_process_line_notes (pfile
, true);
1241 return orig_line
!= pfile
->line_table
->highest_line
;
1244 /* Skips whitespace, saving the next non-whitespace character. */
1246 skip_whitespace (cpp_reader
*pfile
, cppchar_t c
)
1248 cpp_buffer
*buffer
= pfile
->buffer
;
1249 bool saw_NUL
= false;
1253 /* Horizontal space always OK. */
1254 if (c
== ' ' || c
== '\t')
1256 /* Just \f \v or \0 left. */
1259 else if (pfile
->state
.in_directive
&& CPP_PEDANTIC (pfile
))
1260 cpp_error_with_line (pfile
, CPP_DL_PEDWARN
, pfile
->line_table
->highest_line
,
1261 CPP_BUF_COL (buffer
),
1262 "%s in preprocessing directive",
1263 c
== '\f' ? "form feed" : "vertical tab");
1267 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
1268 while (is_nvspace (c
));
1271 cpp_error (pfile
, CPP_DL_WARNING
, "null character(s) ignored");
1276 /* See if the characters of a number token are valid in a name (no
1277 '.', '+' or '-'). */
1279 name_p (cpp_reader
*pfile
, const cpp_string
*string
)
1283 for (i
= 0; i
< string
->len
; i
++)
1284 if (!is_idchar (string
->text
[i
]))
1290 /* After parsing an identifier or other sequence, produce a warning about
1291 sequences not in NFC/NFKC. */
1293 warn_about_normalization (cpp_reader
*pfile
,
1294 const cpp_token
*token
,
1295 const struct normalize_state
*s
)
1297 if (CPP_OPTION (pfile
, warn_normalize
) < NORMALIZE_STATE_RESULT (s
)
1298 && !pfile
->state
.skipping
)
1300 /* Make sure that the token is printed using UCNs, even
1301 if we'd otherwise happily print UTF-8. */
1302 unsigned char *buf
= XNEWVEC (unsigned char, cpp_token_len (token
));
1305 sz
= cpp_spell_token (pfile
, token
, buf
, false) - buf
;
1306 if (NORMALIZE_STATE_RESULT (s
) == normalized_C
)
1307 cpp_warning_with_line (pfile
, CPP_W_NORMALIZE
, token
->src_loc
, 0,
1308 "`%.*s' is not in NFKC", (int) sz
, buf
);
1310 cpp_warning_with_line (pfile
, CPP_W_NORMALIZE
, token
->src_loc
, 0,
1311 "`%.*s' is not in NFC", (int) sz
, buf
);
1316 static const cppchar_t utf8_signifier
= 0xC0;
1318 /* Returns TRUE if the sequence starting at buffer->cur is valid in
1319 an identifier. FIRST is TRUE if this starts an identifier. */
1321 forms_identifier_p (cpp_reader
*pfile
, int first
,
1322 struct normalize_state
*state
)
1324 cpp_buffer
*buffer
= pfile
->buffer
;
1326 if (*buffer
->cur
== '$')
1328 if (!CPP_OPTION (pfile
, dollars_in_ident
))
1332 if (CPP_OPTION (pfile
, warn_dollars
) && !pfile
->state
.skipping
)
1334 CPP_OPTION (pfile
, warn_dollars
) = 0;
1335 cpp_error (pfile
, CPP_DL_PEDWARN
, "'$' in identifier or number");
1341 /* Is this a syntactically valid UCN or a valid UTF-8 char? */
1342 if (CPP_OPTION (pfile
, extended_identifiers
))
1345 if (*buffer
->cur
>= utf8_signifier
)
1347 if (_cpp_valid_utf8 (pfile
, &buffer
->cur
, buffer
->rlimit
, 1 + !first
,
1351 else if (*buffer
->cur
== '\\'
1352 && (buffer
->cur
[1] == 'u' || buffer
->cur
[1] == 'U'))
1355 if (_cpp_valid_ucn (pfile
, &buffer
->cur
, buffer
->rlimit
, 1 + !first
,
1356 state
, &s
, NULL
, NULL
))
1365 /* Helper function to issue error about improper __VA_OPT__ use. */
1367 maybe_va_opt_error (cpp_reader
*pfile
)
1369 if (CPP_PEDANTIC (pfile
) && !CPP_OPTION (pfile
, va_opt
))
1371 /* __VA_OPT__ should not be accepted at all, but allow it in
1373 if (!_cpp_in_system_header (pfile
))
1374 cpp_error (pfile
, CPP_DL_PEDWARN
,
1375 "__VA_OPT__ is not available until C++20");
1377 else if (!pfile
->state
.va_args_ok
)
1379 /* __VA_OPT__ should only appear in the replacement list of a
1381 cpp_error (pfile
, CPP_DL_PEDWARN
,
1382 "__VA_OPT__ can only appear in the expansion"
1383 " of a C++20 variadic macro");
1387 /* Helper function to get the cpp_hashnode of the identifier BASE. */
1388 static cpp_hashnode
*
1389 lex_identifier_intern (cpp_reader
*pfile
, const uchar
*base
)
1391 cpp_hashnode
*result
;
1394 unsigned int hash
= HT_HASHSTEP (0, *base
);
1397 while (ISIDNUM (*cur
))
1399 hash
= HT_HASHSTEP (hash
, *cur
);
1403 hash
= HT_HASHFINISH (hash
, len
);
1404 result
= CPP_HASHNODE (ht_lookup_with_hash (pfile
->hash_table
,
1405 base
, len
, hash
, HT_ALLOC
));
1407 /* Rarely, identifiers require diagnostics when lexed. */
1408 if (__builtin_expect ((result
->flags
& NODE_DIAGNOSTIC
)
1409 && !pfile
->state
.skipping
, 0))
1411 /* It is allowed to poison the same identifier twice. */
1412 if ((result
->flags
& NODE_POISONED
) && !pfile
->state
.poisoned_ok
)
1413 cpp_error (pfile
, CPP_DL_ERROR
, "attempt to use poisoned \"%s\"",
1414 NODE_NAME (result
));
1416 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1417 replacement list of a variadic macro. */
1418 if (result
== pfile
->spec_nodes
.n__VA_ARGS__
1419 && !pfile
->state
.va_args_ok
)
1421 if (CPP_OPTION (pfile
, cplusplus
))
1422 cpp_error (pfile
, CPP_DL_PEDWARN
,
1423 "__VA_ARGS__ can only appear in the expansion"
1424 " of a C++11 variadic macro");
1426 cpp_error (pfile
, CPP_DL_PEDWARN
,
1427 "__VA_ARGS__ can only appear in the expansion"
1428 " of a C99 variadic macro");
1431 if (result
== pfile
->spec_nodes
.n__VA_OPT__
)
1432 maybe_va_opt_error (pfile
);
1434 /* For -Wc++-compat, warn about use of C++ named operators. */
1435 if (result
->flags
& NODE_WARN_OPERATOR
)
1436 cpp_warning (pfile
, CPP_W_CXX_OPERATOR_NAMES
,
1437 "identifier \"%s\" is a special operator name in C++",
1438 NODE_NAME (result
));
1444 /* Get the cpp_hashnode of an identifier specified by NAME in
1445 the current cpp_reader object. If none is found, NULL is returned. */
1447 _cpp_lex_identifier (cpp_reader
*pfile
, const char *name
)
1449 cpp_hashnode
*result
;
1450 result
= lex_identifier_intern (pfile
, (uchar
*) name
);
1454 /* Lex an identifier starting at BUFFER->CUR - 1. */
1455 static cpp_hashnode
*
1456 lex_identifier (cpp_reader
*pfile
, const uchar
*base
, bool starts_ucn
,
1457 struct normalize_state
*nst
, cpp_hashnode
**spelling
)
1459 cpp_hashnode
*result
;
1462 unsigned int hash
= HT_HASHSTEP (0, *base
);
1464 cur
= pfile
->buffer
->cur
;
1467 while (ISIDNUM (*cur
))
1469 hash
= HT_HASHSTEP (hash
, *cur
);
1472 NORMALIZE_STATE_UPDATE_IDNUM (nst
, *(cur
- 1));
1474 pfile
->buffer
->cur
= cur
;
1475 if (starts_ucn
|| forms_identifier_p (pfile
, false, nst
))
1477 /* Slower version for identifiers containing UCNs
1478 or extended chars (including $). */
1480 while (ISIDNUM (*pfile
->buffer
->cur
))
1482 NORMALIZE_STATE_UPDATE_IDNUM (nst
, *pfile
->buffer
->cur
);
1483 pfile
->buffer
->cur
++;
1485 } while (forms_identifier_p (pfile
, false, nst
));
1486 result
= _cpp_interpret_identifier (pfile
, base
,
1487 pfile
->buffer
->cur
- base
);
1488 *spelling
= cpp_lookup (pfile
, base
, pfile
->buffer
->cur
- base
);
1493 hash
= HT_HASHFINISH (hash
, len
);
1495 result
= CPP_HASHNODE (ht_lookup_with_hash (pfile
->hash_table
,
1496 base
, len
, hash
, HT_ALLOC
));
1500 /* Rarely, identifiers require diagnostics when lexed. */
1501 if (__builtin_expect ((result
->flags
& NODE_DIAGNOSTIC
)
1502 && !pfile
->state
.skipping
, 0))
1504 /* It is allowed to poison the same identifier twice. */
1505 if ((result
->flags
& NODE_POISONED
) && !pfile
->state
.poisoned_ok
)
1506 cpp_error (pfile
, CPP_DL_ERROR
, "attempt to use poisoned \"%s\"",
1507 NODE_NAME (result
));
1509 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1510 replacement list of a variadic macro. */
1511 if (result
== pfile
->spec_nodes
.n__VA_ARGS__
1512 && !pfile
->state
.va_args_ok
)
1514 if (CPP_OPTION (pfile
, cplusplus
))
1515 cpp_error (pfile
, CPP_DL_PEDWARN
,
1516 "__VA_ARGS__ can only appear in the expansion"
1517 " of a C++11 variadic macro");
1519 cpp_error (pfile
, CPP_DL_PEDWARN
,
1520 "__VA_ARGS__ can only appear in the expansion"
1521 " of a C99 variadic macro");
1524 /* __VA_OPT__ should only appear in the replacement list of a
1526 if (result
== pfile
->spec_nodes
.n__VA_OPT__
)
1527 maybe_va_opt_error (pfile
);
1529 /* For -Wc++-compat, warn about use of C++ named operators. */
1530 if (result
->flags
& NODE_WARN_OPERATOR
)
1531 cpp_warning (pfile
, CPP_W_CXX_OPERATOR_NAMES
,
1532 "identifier \"%s\" is a special operator name in C++",
1533 NODE_NAME (result
));
1539 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
1541 lex_number (cpp_reader
*pfile
, cpp_string
*number
,
1542 struct normalize_state
*nst
)
1548 base
= pfile
->buffer
->cur
- 1;
1551 cur
= pfile
->buffer
->cur
;
1553 /* N.B. ISIDNUM does not include $. */
1554 while (ISIDNUM (*cur
) || *cur
== '.' || DIGIT_SEP (*cur
)
1555 || VALID_SIGN (*cur
, cur
[-1]))
1557 NORMALIZE_STATE_UPDATE_IDNUM (nst
, *cur
);
1560 /* A number can't end with a digit separator. */
1561 while (cur
> pfile
->buffer
->cur
&& DIGIT_SEP (cur
[-1]))
1564 pfile
->buffer
->cur
= cur
;
1566 while (forms_identifier_p (pfile
, false, nst
));
1568 number
->len
= cur
- base
;
1569 dest
= _cpp_unaligned_alloc (pfile
, number
->len
+ 1);
1570 memcpy (dest
, base
, number
->len
);
1571 dest
[number
->len
] = '\0';
1572 number
->text
= dest
;
1575 /* Create a token of type TYPE with a literal spelling. */
1577 create_literal (cpp_reader
*pfile
, cpp_token
*token
, const uchar
*base
,
1578 unsigned int len
, enum cpp_ttype type
)
1581 token
->val
.str
.len
= len
;
1582 token
->val
.str
.text
= cpp_alloc_token_string (pfile
, base
, len
);
1586 cpp_alloc_token_string (cpp_reader
*pfile
,
1587 const unsigned char *ptr
, unsigned len
)
1589 uchar
*dest
= _cpp_unaligned_alloc (pfile
, len
+ 1);
1592 memcpy (dest
, ptr
, len
);
1596 /* A pair of raw buffer pointers. The currently open one is [1], the
1597 first one is [0]. Used for string literal lexing. */
1605 : first (NULL
), last (NULL
), rpos (0), accum (0)
1609 void append (cpp_reader
*, const uchar
*, size_t);
1611 void read_begin (cpp_reader
*);
1612 bool reading_p () const
1614 return rpos
!= NULL
;
1619 if (rpos
== BUFF_FRONT (last
))
1625 /* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
1626 sequence from *FIRST_BUFF_P to LAST_BUFF_P. */
1629 lit_accum::append (cpp_reader
*pfile
, const uchar
*base
, size_t len
)
1633 first
= last
= _cpp_get_buff (pfile
, len
);
1634 else if (len
> BUFF_ROOM (last
))
1636 /* There is insufficient room in the buffer. Copy what we can,
1637 and then either extend or create a new one. */
1638 size_t room
= BUFF_ROOM (last
);
1639 memcpy (BUFF_FRONT (last
), base
, room
);
1640 BUFF_FRONT (last
) += room
;
1645 gcc_checking_assert (!rpos
);
1647 last
= _cpp_append_extend_buff (pfile
, last
, len
);
1650 memcpy (BUFF_FRONT (last
), base
, len
);
1651 BUFF_FRONT (last
) += len
;
1656 lit_accum::read_begin (cpp_reader
*pfile
)
1658 /* We never accumulate more than 4 chars to read. */
1659 if (BUFF_ROOM (last
) < 4)
1661 last
= _cpp_append_extend_buff (pfile
, last
, 4);
1662 rpos
= BUFF_FRONT (last
);
1665 /* Returns true if a macro has been defined.
1666 This might not work if compile with -save-temps,
1667 or preprocess separately from compilation. */
1670 is_macro(cpp_reader
*pfile
, const uchar
*base
)
1672 const uchar
*cur
= base
;
1673 if (! ISIDST (*cur
))
1675 unsigned int hash
= HT_HASHSTEP (0, *cur
);
1677 while (ISIDNUM (*cur
))
1679 hash
= HT_HASHSTEP (hash
, *cur
);
1682 hash
= HT_HASHFINISH (hash
, cur
- base
);
1684 cpp_hashnode
*result
= CPP_HASHNODE (ht_lookup_with_hash (pfile
->hash_table
,
1685 base
, cur
- base
, hash
, HT_NO_INSERT
));
1687 return result
&& cpp_macro_p (result
);
1690 /* Returns true if a literal suffix does not have the expected form
1691 and is defined as a macro. */
1694 is_macro_not_literal_suffix(cpp_reader
*pfile
, const uchar
*base
)
1696 /* User-defined literals outside of namespace std must start with a single
1697 underscore, so assume anything of that form really is a UDL suffix.
1698 We don't need to worry about UDLs defined inside namespace std because
1699 their names are reserved, so cannot be used as macro names in valid
1701 if (base
[0] == '_' && base
[1] != '_')
1703 return is_macro (pfile
, base
);
1706 /* Lexes a raw string. The stored string contains the spelling,
1707 including double quotes, delimiter string, '(' and ')', any leading
1708 'L', 'u', 'U' or 'u8' and 'R' modifier. The created token contains
1709 the type of the literal, or CPP_OTHER if it was not properly
1712 BASE is the start of the token. Updates pfile->buffer->cur to just
1713 after the lexed string.
1715 The spelling is NUL-terminated, but it is not guaranteed that this
1716 is the first NUL since embedded NULs are preserved. */
1719 lex_raw_string (cpp_reader
*pfile
, cpp_token
*token
, const uchar
*base
)
1721 const uchar
*pos
= base
;
1723 /* 'tis a pity this information isn't passed down from the lexer's
1724 initial categorization of the token. */
1725 enum cpp_ttype type
= CPP_STRING
;
1732 else if (*pos
== 'U')
1734 type
= CPP_STRING32
;
1737 else if (*pos
== 'u')
1741 type
= CPP_UTF8STRING
;
1745 type
= CPP_STRING16
;
1749 gcc_checking_assert (pos
[0] == 'R' && pos
[1] == '"');
1752 _cpp_line_note
*note
= &pfile
->buffer
->notes
[pfile
->buffer
->cur_note
];
1754 /* Skip notes before the ". */
1755 while (note
->pos
< pos
)
1761 unsigned prefix_len
= 0;
1767 } phase
= PHASE_PREFIX
;
1771 gcc_checking_assert (note
->pos
>= pos
);
1773 /* Undo any escaped newlines and trigraphs. */
1774 if (!accum
.reading_p () && note
->pos
== pos
)
1779 /* Restore backslash followed by newline. */
1780 accum
.append (pfile
, base
, pos
- base
);
1782 accum
.read_begin (pfile
);
1783 accum
.append (pfile
, UC
"\\", 1);
1786 if (note
->type
== ' ')
1787 /* GNU backslash whitespace newline extension. FIXME
1788 could be any sequence of non-vertical space. When we
1789 can properly restore any such sequence, we should
1790 mark this note as handled so _cpp_process_line_notes
1792 accum
.append (pfile
, UC
" ", 1);
1794 accum
.append (pfile
, UC
"\n", 1);
1799 /* This can happen for ??/<NEWLINE> when trigraphs are not
1800 being interpretted. */
1801 gcc_checking_assert (!CPP_OPTION (pfile
, trigraphs
));
1807 gcc_checking_assert (_cpp_trigraph_map
[note
->type
]);
1809 /* Don't warn about this trigraph in
1810 _cpp_process_line_notes, since trigraphs show up as
1811 trigraphs in raw strings. */
1812 uchar type
= note
->type
;
1815 if (CPP_OPTION (pfile
, trigraphs
))
1817 accum
.append (pfile
, base
, pos
- base
);
1819 accum
.read_begin (pfile
);
1820 accum
.append (pfile
, UC
"??", 2);
1821 accum
.append (pfile
, &type
, 1);
1823 /* ??/ followed by newline gets two line notes, one for
1824 the trigraph and one for the backslash/newline. */
1825 if (type
== '/' && note
[1].pos
== pos
)
1828 gcc_assert (note
->type
== '\\' || note
->type
== ' ');
1829 goto after_backslash
;
1831 /* Skip the replacement character. */
1839 /* Now get a char to process. Either from an expanded note, or
1840 from the line buffer. */
1841 bool read_note
= accum
.reading_p ();
1842 char c
= read_note
? accum
.read_char () : *pos
++;
1844 if (phase
== PHASE_PREFIX
)
1850 prefix
[prefix_len
++] = '"';
1852 else if (prefix_len
< 16
1853 /* Prefix chars are any of the basic character set,
1854 [lex.charset] except for '
1855 ()\\\t\v\f\n'. Optimized for a contiguous
1857 /* Unlike a switch, this collapses down to one or
1858 two shift and bitmask operations on an ASCII
1859 system, with an outlier or two. */
1860 && (('Z' - 'A' == 25
1861 ? ((c
>= 'a' && c
<= 'z') || (c
>= 'A' && c
<= 'Z'))
1863 || (c
>= '0' && c
<= '9')
1864 || c
== '_' || c
== '{' || c
== '}'
1865 || c
== '[' || c
== ']' || c
== '#'
1866 || c
== '<' || c
== '>' || c
== '%'
1867 || c
== ':' || c
== ';' || c
== '.' || c
== '?'
1868 || c
== '*' || c
== '+' || c
== '-' || c
== '/'
1869 || c
== '^' || c
== '&' || c
== '|' || c
== '~'
1870 || c
== '!' || c
== '=' || c
== ','
1871 || c
== '"' || c
== '\''))
1872 prefix
[prefix_len
++] = c
;
1875 /* Something is wrong. */
1876 int col
= CPP_BUF_COLUMN (pfile
->buffer
, pos
) + read_note
;
1877 if (prefix_len
== 16)
1878 cpp_error_with_line (pfile
, CPP_DL_ERROR
, token
->src_loc
,
1879 col
, "raw string delimiter longer "
1880 "than 16 characters");
1882 cpp_error_with_line (pfile
, CPP_DL_ERROR
, token
->src_loc
,
1883 col
, "invalid new-line in raw "
1884 "string delimiter");
1886 cpp_error_with_line (pfile
, CPP_DL_ERROR
, token
->src_loc
,
1887 col
, "invalid character '%c' in "
1888 "raw string delimiter", c
);
1891 /* Continue until we get a close quote, that's probably
1892 the best failure mode. */
1899 if (phase
!= PHASE_NONE
)
1901 if (prefix
[phase
] != c
)
1903 else if (unsigned (phase
+ 1) == prefix_len
)
1907 phase
= Phase (phase
+ 1);
1912 if (!prefix_len
&& c
== '"')
1913 /* Failure mode lexing. */
1915 else if (prefix_len
&& c
== ')')
1916 phase
= PHASE_SUFFIX
;
1917 else if (!read_note
&& c
== '\n')
1920 pfile
->buffer
->cur
= pos
;
1921 if (pfile
->state
.in_directive
1922 || (pfile
->state
.parsing_args
1923 && pfile
->buffer
->next_line
>= pfile
->buffer
->rlimit
))
1925 cpp_error_with_line (pfile
, CPP_DL_ERROR
, token
->src_loc
, 0,
1926 "unterminated raw string");
1931 accum
.append (pfile
, base
, pos
- base
+ 1);
1932 _cpp_process_line_notes (pfile
, false);
1934 if (pfile
->buffer
->next_line
< pfile
->buffer
->rlimit
)
1935 CPP_INCREMENT_LINE (pfile
, 0);
1936 pfile
->buffer
->need_line
= true;
1938 if (!_cpp_get_fresh_line (pfile
))
1940 /* We ran out of file and failed to get a line. */
1941 location_t src_loc
= token
->src_loc
;
1942 token
->type
= CPP_EOF
;
1943 /* Tell the compiler the line number of the EOF token. */
1944 token
->src_loc
= pfile
->line_table
->highest_line
;
1947 _cpp_release_buff (pfile
, accum
.first
);
1948 cpp_error_with_line (pfile
, CPP_DL_ERROR
, src_loc
, 0,
1949 "unterminated raw string");
1950 /* Now pop the buffer that _cpp_get_fresh_line did not. */
1951 _cpp_pop_buffer (pfile
);
1955 pos
= base
= pfile
->buffer
->cur
;
1956 note
= &pfile
->buffer
->notes
[pfile
->buffer
->cur_note
];
1960 if (CPP_OPTION (pfile
, user_literals
))
1962 /* If a string format macro, say from inttypes.h, is placed touching
1963 a string literal it could be parsed as a C++11 user-defined string
1964 literal thus breaking the program. */
1965 if (is_macro_not_literal_suffix (pfile
, pos
))
1967 /* Raise a warning, but do not consume subsequent tokens. */
1968 if (CPP_OPTION (pfile
, warn_literal_suffix
) && !pfile
->state
.skipping
)
1969 cpp_warning_with_line (pfile
, CPP_W_LITERAL_SUFFIX
,
1971 "invalid suffix on literal; C++11 requires "
1972 "a space between literal and string macro");
1974 /* Grab user defined literal suffix. */
1975 else if (ISIDST (*pos
))
1977 type
= cpp_userdef_string_add_type (type
);
1980 while (ISIDNUM (*pos
))
1986 pfile
->buffer
->cur
= pos
;
1988 create_literal (pfile
, token
, base
, pos
- base
, type
);
1991 size_t extra_len
= pos
- base
;
1992 uchar
*dest
= _cpp_unaligned_alloc (pfile
, accum
.accum
+ extra_len
+ 1);
1995 token
->val
.str
.len
= accum
.accum
+ extra_len
;
1996 token
->val
.str
.text
= dest
;
1997 for (_cpp_buff
*buf
= accum
.first
; buf
; buf
= buf
->next
)
1999 size_t len
= BUFF_FRONT (buf
) - buf
->base
;
2000 memcpy (dest
, buf
->base
, len
);
2003 _cpp_release_buff (pfile
, accum
.first
);
2004 memcpy (dest
, base
, extra_len
);
2005 dest
[extra_len
] = '\0';
2009 /* Lexes a string, character constant, or angle-bracketed header file
2010 name. The stored string contains the spelling, including opening
2011 quote and any leading 'L', 'u', 'U' or 'u8' and optional
2012 'R' modifier. It returns the type of the literal, or CPP_OTHER
2013 if it was not properly terminated, or CPP_LESS for an unterminated
2014 header name which must be relexed as normal tokens.
2016 The spelling is NUL-terminated, but it is not guaranteed that this
2017 is the first NUL since embedded NULs are preserved. */
2019 lex_string (cpp_reader
*pfile
, cpp_token
*token
, const uchar
*base
)
2021 bool saw_NUL
= false;
2023 cppchar_t terminator
;
2024 enum cpp_ttype type
;
2027 terminator
= *cur
++;
2028 if (terminator
== 'L' || terminator
== 'U')
2029 terminator
= *cur
++;
2030 else if (terminator
== 'u')
2032 terminator
= *cur
++;
2033 if (terminator
== '8')
2034 terminator
= *cur
++;
2036 if (terminator
== 'R')
2038 lex_raw_string (pfile
, token
, base
);
2041 if (terminator
== '"')
2042 type
= (*base
== 'L' ? CPP_WSTRING
:
2043 *base
== 'U' ? CPP_STRING32
:
2044 *base
== 'u' ? (base
[1] == '8' ? CPP_UTF8STRING
: CPP_STRING16
)
2046 else if (terminator
== '\'')
2047 type
= (*base
== 'L' ? CPP_WCHAR
:
2048 *base
== 'U' ? CPP_CHAR32
:
2049 *base
== 'u' ? (base
[1] == '8' ? CPP_UTF8CHAR
: CPP_CHAR16
)
2052 terminator
= '>', type
= CPP_HEADER_NAME
;
2056 cppchar_t c
= *cur
++;
2058 /* In #include-style directives, terminators are not escapable. */
2059 if (c
== '\\' && !pfile
->state
.angled_headers
&& *cur
!= '\n')
2061 else if (c
== terminator
)
2066 /* Unmatched quotes always yield undefined behavior, but
2067 greedy lexing means that what appears to be an unterminated
2068 header name may actually be a legitimate sequence of tokens. */
2069 if (terminator
== '>')
2071 token
->type
= CPP_LESS
;
2081 if (saw_NUL
&& !pfile
->state
.skipping
)
2082 cpp_error (pfile
, CPP_DL_WARNING
,
2083 "null character(s) preserved in literal");
2085 if (type
== CPP_OTHER
&& CPP_OPTION (pfile
, lang
) != CLK_ASM
)
2086 cpp_error (pfile
, CPP_DL_PEDWARN
, "missing terminating %c character",
2089 if (CPP_OPTION (pfile
, user_literals
))
2091 /* If a string format macro, say from inttypes.h, is placed touching
2092 a string literal it could be parsed as a C++11 user-defined string
2093 literal thus breaking the program. */
2094 if (is_macro_not_literal_suffix (pfile
, cur
))
2096 /* Raise a warning, but do not consume subsequent tokens. */
2097 if (CPP_OPTION (pfile
, warn_literal_suffix
) && !pfile
->state
.skipping
)
2098 cpp_warning_with_line (pfile
, CPP_W_LITERAL_SUFFIX
,
2100 "invalid suffix on literal; C++11 requires "
2101 "a space between literal and string macro");
2103 /* Grab user defined literal suffix. */
2104 else if (ISIDST (*cur
))
2106 type
= cpp_userdef_char_add_type (type
);
2107 type
= cpp_userdef_string_add_type (type
);
2110 while (ISIDNUM (*cur
))
2114 else if (CPP_OPTION (pfile
, cpp_warn_cxx11_compat
)
2115 && is_macro (pfile
, cur
)
2116 && !pfile
->state
.skipping
)
2117 cpp_warning_with_line (pfile
, CPP_W_CXX11_COMPAT
,
2118 token
->src_loc
, 0, "C++11 requires a space "
2119 "between string literal and macro");
2121 pfile
->buffer
->cur
= cur
;
2122 create_literal (pfile
, token
, base
, cur
- base
, type
);
2125 /* Return the comment table. The client may not make any assumption
2126 about the ordering of the table. */
2128 cpp_get_comments (cpp_reader
*pfile
)
2130 return &pfile
->comments
;
2133 /* Append a comment to the end of the comment table. */
2135 store_comment (cpp_reader
*pfile
, cpp_token
*token
)
2139 if (pfile
->comments
.allocated
== 0)
2141 pfile
->comments
.allocated
= 256;
2142 pfile
->comments
.entries
= (cpp_comment
*) xmalloc
2143 (pfile
->comments
.allocated
* sizeof (cpp_comment
));
2146 if (pfile
->comments
.count
== pfile
->comments
.allocated
)
2148 pfile
->comments
.allocated
*= 2;
2149 pfile
->comments
.entries
= (cpp_comment
*) xrealloc
2150 (pfile
->comments
.entries
,
2151 pfile
->comments
.allocated
* sizeof (cpp_comment
));
2154 len
= token
->val
.str
.len
;
2156 /* Copy comment. Note, token may not be NULL terminated. */
2157 pfile
->comments
.entries
[pfile
->comments
.count
].comment
=
2158 (char *) xmalloc (sizeof (char) * (len
+ 1));
2159 memcpy (pfile
->comments
.entries
[pfile
->comments
.count
].comment
,
2160 token
->val
.str
.text
, len
);
2161 pfile
->comments
.entries
[pfile
->comments
.count
].comment
[len
] = '\0';
2163 /* Set source location. */
2164 pfile
->comments
.entries
[pfile
->comments
.count
].sloc
= token
->src_loc
;
2166 /* Increment the count of entries in the comment table. */
2167 pfile
->comments
.count
++;
2170 /* The stored comment includes the comment start and any terminator. */
2172 save_comment (cpp_reader
*pfile
, cpp_token
*token
, const unsigned char *from
,
2175 unsigned char *buffer
;
2176 unsigned int len
, clen
, i
;
2178 len
= pfile
->buffer
->cur
- from
+ 1; /* + 1 for the initial '/'. */
2180 /* C++ comments probably (not definitely) have moved past a new
2181 line, which we don't want to save in the comment. */
2182 if (is_vspace (pfile
->buffer
->cur
[-1]))
2185 /* If we are currently in a directive or in argument parsing, then
2186 we need to store all C++ comments as C comments internally, and
2187 so we need to allocate a little extra space in that case.
2189 Note that the only time we encounter a directive here is
2190 when we are saving comments in a "#define". */
2191 clen
= ((pfile
->state
.in_directive
|| pfile
->state
.parsing_args
)
2192 && type
== '/') ? len
+ 2 : len
;
2194 buffer
= _cpp_unaligned_alloc (pfile
, clen
);
2196 token
->type
= CPP_COMMENT
;
2197 token
->val
.str
.len
= clen
;
2198 token
->val
.str
.text
= buffer
;
2201 memcpy (buffer
+ 1, from
, len
- 1);
2203 /* Finish conversion to a C comment, if necessary. */
2204 if ((pfile
->state
.in_directive
|| pfile
->state
.parsing_args
) && type
== '/')
2207 buffer
[clen
- 2] = '*';
2208 buffer
[clen
- 1] = '/';
2209 /* As there can be in a C++ comments illegal sequences for C comments
2210 we need to filter them out. */
2211 for (i
= 2; i
< (clen
- 2); i
++)
2212 if (buffer
[i
] == '/' && (buffer
[i
- 1] == '*' || buffer
[i
+ 1] == '*'))
2216 /* Finally store this comment for use by clients of libcpp. */
2217 store_comment (pfile
, token
);
2220 /* Returns true if comment at COMMENT_START is a recognized FALLTHROUGH
2224 fallthrough_comment_p (cpp_reader
*pfile
, const unsigned char *comment_start
)
2226 const unsigned char *from
= comment_start
+ 1;
2228 switch (CPP_OPTION (pfile
, cpp_warn_implicit_fallthrough
))
2230 /* For both -Wimplicit-fallthrough=0 and -Wimplicit-fallthrough=5 we
2231 don't recognize any comments. The latter only checks attributes,
2232 the former doesn't warn. */
2236 /* -Wimplicit-fallthrough=1 considers any comment, no matter what
2241 /* -Wimplicit-fallthrough=2 looks for (case insensitive)
2242 .*falls?[ \t-]*thr(u|ough).* regex. */
2243 for (; (size_t) (pfile
->buffer
->cur
- from
) >= sizeof "fallthru" - 1;
2246 /* Is there anything like strpbrk with upper boundary, or
2247 memchr looking for 2 characters rather than just one? */
2248 if (from
[0] != 'f' && from
[0] != 'F')
2250 if (from
[1] != 'a' && from
[1] != 'A')
2252 if (from
[2] != 'l' && from
[2] != 'L')
2254 if (from
[3] != 'l' && from
[3] != 'L')
2256 from
+= sizeof "fall" - 1;
2257 if (from
[0] == 's' || from
[0] == 'S')
2259 while (*from
== ' ' || *from
== '\t' || *from
== '-')
2261 if (from
[0] != 't' && from
[0] != 'T')
2263 if (from
[1] != 'h' && from
[1] != 'H')
2265 if (from
[2] != 'r' && from
[2] != 'R')
2267 if (from
[3] == 'u' || from
[3] == 'U')
2269 if (from
[3] != 'o' && from
[3] != 'O')
2271 if (from
[4] != 'u' && from
[4] != 'U')
2273 if (from
[5] != 'g' && from
[5] != 'G')
2275 if (from
[6] != 'h' && from
[6] != 'H')
2285 /* Whole comment contents:
2289 if (*from
== '-' || *from
== '@')
2291 size_t len
= sizeof "fallthrough" - 1;
2292 if ((size_t) (pfile
->buffer
->cur
- from
- 1) < len
)
2294 if (memcmp (from
+ 1, "fallthrough", len
))
2298 if (from
[len
+ 1] != '@')
2304 /* Whole comment contents (regex):
2305 lint -fallthrough[ \t]*
2307 else if (*from
== 'l')
2309 size_t len
= sizeof "int -fallthrough" - 1;
2310 if ((size_t) (pfile
->buffer
->cur
- from
- 1) < len
)
2312 if (memcmp (from
+ 1, "int -fallthrough", len
))
2315 while (*from
== ' ' || *from
== '\t')
2318 /* Whole comment contents (regex):
2319 [ \t]*FALLTHR(U|OUGH)[ \t]*
2321 else if (CPP_OPTION (pfile
, cpp_warn_implicit_fallthrough
) == 4)
2323 while (*from
== ' ' || *from
== '\t')
2325 if ((size_t) (pfile
->buffer
->cur
- from
) < sizeof "FALLTHRU" - 1)
2327 if (memcmp (from
, "FALLTHR", sizeof "FALLTHR" - 1))
2329 from
+= sizeof "FALLTHR" - 1;
2332 else if ((size_t) (pfile
->buffer
->cur
- from
) < sizeof "OUGH" - 1)
2334 else if (memcmp (from
, "OUGH", sizeof "OUGH" - 1))
2337 from
+= sizeof "OUGH" - 1;
2338 while (*from
== ' ' || *from
== '\t')
2341 /* Whole comment contents (regex):
2342 [ \t.!]*(ELSE,? |INTENTIONAL(LY)? )?FALL(S | |-)?THR(OUGH|U)[ \t.!]*(-[^\n\r]*)?
2343 [ \t.!]*(Else,? |Intentional(ly)? )?Fall((s | |-)[Tt]|t)hr(ough|u)[ \t.!]*(-[^\n\r]*)?
2344 [ \t.!]*([Ee]lse,? |[Ii]ntentional(ly)? )?fall(s | |-)?thr(ough|u)[ \t.!]*(-[^\n\r]*)?
2348 while (*from
== ' ' || *from
== '\t' || *from
== '.' || *from
== '!')
2350 unsigned char f
= *from
;
2351 bool all_upper
= false;
2352 if (f
== 'E' || f
== 'e')
2354 if ((size_t) (pfile
->buffer
->cur
- from
)
2355 < sizeof "else fallthru" - 1)
2357 if (f
== 'E' && memcmp (from
+ 1, "LSE", sizeof "LSE" - 1) == 0)
2359 else if (memcmp (from
+ 1, "lse", sizeof "lse" - 1))
2361 from
+= sizeof "else" - 1;
2367 if (all_upper
&& *from
== 'f')
2369 if (f
== 'e' && *from
== 'F')
2373 else if (f
== 'I' || f
== 'i')
2375 if ((size_t) (pfile
->buffer
->cur
- from
)
2376 < sizeof "intentional fallthru" - 1)
2378 if (f
== 'I' && memcmp (from
+ 1, "NTENTIONAL",
2379 sizeof "NTENTIONAL" - 1) == 0)
2381 else if (memcmp (from
+ 1, "ntentional",
2382 sizeof "ntentional" - 1))
2384 from
+= sizeof "intentional" - 1;
2388 if (all_upper
&& *from
== 'f')
2393 if (memcmp (from
, "LY F", sizeof "LY F" - 1))
2395 from
+= sizeof "LY " - 1;
2399 if (memcmp (from
, "ly ", sizeof "ly " - 1))
2401 from
+= sizeof "ly " - 1;
2403 if (f
== 'i' && *from
== 'F')
2407 if (f
!= 'F' && f
!= 'f')
2409 if ((size_t) (pfile
->buffer
->cur
- from
) < sizeof "fallthru" - 1)
2411 if (f
== 'F' && memcmp (from
+ 1, "ALL", sizeof "ALL" - 1) == 0)
2415 else if (memcmp (from
+ 1, "all", sizeof "all" - 1))
2417 from
+= sizeof "fall" - 1;
2418 if (*from
== (all_upper
? 'S' : 's') && from
[1] == ' ')
2420 else if (*from
== ' ' || *from
== '-')
2422 else if (*from
!= (all_upper
? 'T' : 't'))
2424 if ((f
== 'f' || *from
!= 'T') && (all_upper
|| *from
!= 't'))
2426 if ((size_t) (pfile
->buffer
->cur
- from
) < sizeof "thru" - 1)
2428 if (memcmp (from
+ 1, all_upper
? "HRU" : "hru", sizeof "hru" - 1))
2430 if ((size_t) (pfile
->buffer
->cur
- from
) < sizeof "through" - 1)
2432 if (memcmp (from
+ 1, all_upper
? "HROUGH" : "hrough",
2433 sizeof "hrough" - 1))
2435 from
+= sizeof "through" - 1;
2438 from
+= sizeof "thru" - 1;
2439 while (*from
== ' ' || *from
== '\t' || *from
== '.' || *from
== '!')
2444 if (*comment_start
== '*')
2448 while (*from
&& *from
!= '*'
2449 && *from
!= '\n' && *from
!= '\r')
2451 if (*from
!= '*' || from
[1] == '/')
2458 while (*from
&& *from
!= '\n' && *from
!= '\r')
2462 /* C block comment. */
2463 if (*comment_start
== '*')
2465 if (*from
!= '*' || from
[1] != '/')
2468 /* C++ line comment. */
2469 else if (*from
!= '\n')
2475 /* Allocate COUNT tokens for RUN. */
2477 _cpp_init_tokenrun (tokenrun
*run
, unsigned int count
)
2479 run
->base
= XNEWVEC (cpp_token
, count
);
2480 run
->limit
= run
->base
+ count
;
2484 /* Returns the next tokenrun, or creates one if there is none. */
2486 next_tokenrun (tokenrun
*run
)
2488 if (run
->next
== NULL
)
2490 run
->next
= XNEW (tokenrun
);
2491 run
->next
->prev
= run
;
2492 _cpp_init_tokenrun (run
->next
, 250);
2498 /* Return the number of not yet processed token in a given
2501 _cpp_remaining_tokens_num_in_context (cpp_context
*context
)
2503 if (context
->tokens_kind
== TOKENS_KIND_DIRECT
)
2504 return (LAST (context
).token
- FIRST (context
).token
);
2505 else if (context
->tokens_kind
== TOKENS_KIND_INDIRECT
2506 || context
->tokens_kind
== TOKENS_KIND_EXTENDED
)
2507 return (LAST (context
).ptoken
- FIRST (context
).ptoken
);
2512 /* Returns the token present at index INDEX in a given context. If
2513 INDEX is zero, the next token to be processed is returned. */
2514 static const cpp_token
*
2515 _cpp_token_from_context_at (cpp_context
*context
, int index
)
2517 if (context
->tokens_kind
== TOKENS_KIND_DIRECT
)
2518 return &(FIRST (context
).token
[index
]);
2519 else if (context
->tokens_kind
== TOKENS_KIND_INDIRECT
2520 || context
->tokens_kind
== TOKENS_KIND_EXTENDED
)
2521 return FIRST (context
).ptoken
[index
];
2526 /* Look ahead in the input stream. */
2528 cpp_peek_token (cpp_reader
*pfile
, int index
)
2530 cpp_context
*context
= pfile
->context
;
2531 const cpp_token
*peektok
;
2534 /* First, scan through any pending cpp_context objects. */
2535 while (context
->prev
)
2537 ptrdiff_t sz
= _cpp_remaining_tokens_num_in_context (context
);
2539 if (index
< (int) sz
)
2540 return _cpp_token_from_context_at (context
, index
);
2542 context
= context
->prev
;
2545 /* We will have to read some new tokens after all (and do so
2546 without invalidating preceding tokens). */
2548 pfile
->keep_tokens
++;
2550 /* For peeked tokens temporarily disable line_change reporting,
2551 until the tokens are parsed for real. */
2552 void (*line_change
) (cpp_reader
*, const cpp_token
*, int)
2553 = pfile
->cb
.line_change
;
2554 pfile
->cb
.line_change
= NULL
;
2558 peektok
= _cpp_lex_token (pfile
);
2559 if (peektok
->type
== CPP_EOF
)
2564 else if (peektok
->type
== CPP_PRAGMA
)
2566 /* Don't peek past a pragma. */
2567 if (peektok
== &pfile
->directive_result
)
2568 /* Save the pragma in the buffer. */
2569 *pfile
->cur_token
++ = *peektok
;
2576 _cpp_backup_tokens_direct (pfile
, count
- index
);
2577 pfile
->keep_tokens
--;
2578 pfile
->cb
.line_change
= line_change
;
2583 /* Allocate a single token that is invalidated at the same time as the
2584 rest of the tokens on the line. Has its line and col set to the
2585 same as the last lexed token, so that diagnostics appear in the
2588 _cpp_temp_token (cpp_reader
*pfile
)
2590 cpp_token
*old
, *result
;
2591 ptrdiff_t sz
= pfile
->cur_run
->limit
- pfile
->cur_token
;
2592 ptrdiff_t la
= (ptrdiff_t) pfile
->lookaheads
;
2594 old
= pfile
->cur_token
- 1;
2595 /* Any pre-existing lookaheads must not be clobbered. */
2600 tokenrun
*next
= next_tokenrun (pfile
->cur_run
);
2603 memmove (next
->base
+ 1, next
->base
,
2604 (la
- sz
) * sizeof (cpp_token
));
2606 next
->base
[0] = pfile
->cur_run
->limit
[-1];
2610 memmove (pfile
->cur_token
+ 1, pfile
->cur_token
,
2611 MIN (la
, sz
- 1) * sizeof (cpp_token
));
2614 if (!sz
&& pfile
->cur_token
== pfile
->cur_run
->limit
)
2616 pfile
->cur_run
= next_tokenrun (pfile
->cur_run
);
2617 pfile
->cur_token
= pfile
->cur_run
->base
;
2620 result
= pfile
->cur_token
++;
2621 result
->src_loc
= old
->src_loc
;
2625 /* We're at the beginning of a logical line (so not in
2626 directives-mode) and RESULT is a CPP_NAME with NODE_MODULE set. See
2627 if we should enter deferred_pragma mode to tokenize the rest of the
2628 line as a module control-line. */
2631 cpp_maybe_module_directive (cpp_reader
*pfile
, cpp_token
*result
)
2633 unsigned backup
= 0; /* Tokens we peeked. */
2634 cpp_hashnode
*node
= result
->val
.node
.node
;
2635 cpp_token
*peek
= result
;
2636 cpp_token
*keyword
= peek
;
2637 cpp_hashnode
*(&n_modules
)[spec_nodes::M_HWM
][2] = pfile
->spec_nodes
.n_modules
;
2638 int header_count
= 0;
2640 /* Make sure the incoming state is as we expect it. This way we
2641 can restore it using constants. */
2642 gcc_checking_assert (!pfile
->state
.in_deferred_pragma
2643 && !pfile
->state
.skipping
2644 && !pfile
->state
.parsing_args
2645 && !pfile
->state
.angled_headers
2646 && (pfile
->state
.save_comments
2647 == !CPP_OPTION (pfile
, discard_comments
)));
2649 /* Enter directives mode sufficiently for peeking. We don't have
2650 to actually set in_directive. */
2651 pfile
->state
.in_deferred_pragma
= true;
2653 /* These two fields are needed to process tokenization in deferred
2654 pragma mode. They are not used outside deferred pragma mode or
2656 pfile
->state
.pragma_allow_expansion
= true;
2657 pfile
->directive_line
= result
->src_loc
;
2659 /* Saving comments is incompatible with directives mode. */
2660 pfile
->state
.save_comments
= 0;
2662 if (node
== n_modules
[spec_nodes::M_EXPORT
][0])
2664 peek
= _cpp_lex_direct (pfile
);
2667 if (keyword
->type
!= CPP_NAME
)
2669 node
= keyword
->val
.node
.node
;
2670 if (!(node
->flags
& NODE_MODULE
))
2674 if (node
== n_modules
[spec_nodes::M__IMPORT
][0])
2676 header_count
= backup
+ 2 + 16;
2677 else if (node
== n_modules
[spec_nodes::M_IMPORT
][0])
2679 header_count
= backup
+ 2 + (CPP_OPTION (pfile
, preprocessed
) ? 16 : 0);
2680 else if (node
== n_modules
[spec_nodes::M_MODULE
][0])
2685 /* We've seen [export] {module|import|__import}. Check the next token. */
2687 /* After '{,__}import' a header name may appear. */
2688 pfile
->state
.angled_headers
= true;
2689 peek
= _cpp_lex_direct (pfile
);
2692 /* ... import followed by identifier, ':', '<' or
2693 header-name preprocessing tokens, or module
2694 followed by cpp-identifier, ':' or ';' preprocessing
2695 tokens. C++ keywords are not yet relevant. */
2696 if (peek
->type
== CPP_NAME
2697 || peek
->type
== CPP_COLON
2699 ? (peek
->type
== CPP_LESS
2700 || (peek
->type
== CPP_STRING
&& peek
->val
.str
.text
[0] != 'R')
2701 || peek
->type
== CPP_HEADER_NAME
)
2702 : peek
->type
== CPP_SEMICOLON
))
2704 pfile
->state
.pragma_allow_expansion
= !CPP_OPTION (pfile
, preprocessed
);
2705 if (!pfile
->state
.pragma_allow_expansion
)
2706 pfile
->state
.prevent_expansion
++;
2708 if (!header_count
&& linemap_included_from
2709 (LINEMAPS_LAST_ORDINARY_MAP (pfile
->line_table
)))
2710 cpp_error_with_line (pfile
, CPP_DL_ERROR
, keyword
->src_loc
, 0,
2711 "module control-line cannot be in included file");
2713 /* The first one or two tokens cannot be macro names. */
2714 for (int ix
= backup
; ix
--;)
2716 cpp_token
*tok
= ix
? keyword
: result
;
2717 cpp_hashnode
*node
= tok
->val
.node
.node
;
2719 /* Don't attempt to expand the token. */
2720 tok
->flags
|= NO_EXPAND
;
2721 if (_cpp_defined_macro_p (node
)
2722 && _cpp_maybe_notify_macro_use (pfile
, node
, tok
->src_loc
)
2723 && !cpp_fun_like_macro_p (node
))
2724 cpp_error_with_line (pfile
, CPP_DL_ERROR
, tok
->src_loc
, 0,
2725 "module control-line \"%s\" cannot be"
2726 " an object-like macro",
2730 /* Map to underbar variants. */
2731 keyword
->val
.node
.node
= n_modules
[header_count
2732 ? spec_nodes::M_IMPORT
2733 : spec_nodes::M_MODULE
][1];
2735 result
->val
.node
.node
= n_modules
[spec_nodes::M_EXPORT
][1];
2737 /* Maybe tell the tokenizer we expect a header-name down the
2739 pfile
->state
.directive_file_token
= header_count
;
2744 /* Drop out of directive mode. */
2745 /* We aaserted save_comments had this value upon entry. */
2746 pfile
->state
.save_comments
2747 = !CPP_OPTION (pfile
, discard_comments
);
2748 pfile
->state
.in_deferred_pragma
= false;
2749 /* Do not let this remain on. */
2750 pfile
->state
.angled_headers
= false;
2753 /* In either case we want to backup the peeked tokens. */
2756 /* If we saw EOL, we should drop it, because this isn't a module
2757 control-line after all. */
2758 bool eol
= peek
->type
== CPP_PRAGMA_EOL
;
2759 if (!eol
|| backup
> 1)
2761 /* Put put the peeked tokens back */
2762 _cpp_backup_tokens_direct (pfile
, backup
);
2763 /* But if the last one was an EOL, forget it. */
2765 pfile
->lookaheads
--;
2770 /* Lex a token into RESULT (external interface). Takes care of issues
2771 like directive handling, token lookahead, multiple include
2772 optimization and skipping. */
2774 _cpp_lex_token (cpp_reader
*pfile
)
2780 if (pfile
->cur_token
== pfile
->cur_run
->limit
)
2782 pfile
->cur_run
= next_tokenrun (pfile
->cur_run
);
2783 pfile
->cur_token
= pfile
->cur_run
->base
;
2785 /* We assume that the current token is somewhere in the current
2787 if (pfile
->cur_token
< pfile
->cur_run
->base
2788 || pfile
->cur_token
>= pfile
->cur_run
->limit
)
2791 if (pfile
->lookaheads
)
2793 pfile
->lookaheads
--;
2794 result
= pfile
->cur_token
++;
2797 result
= _cpp_lex_direct (pfile
);
2799 if (result
->flags
& BOL
)
2801 /* Is this a directive. If _cpp_handle_directive returns
2802 false, it is an assembler #. */
2803 if (result
->type
== CPP_HASH
2804 /* 6.10.3 p 11: Directives in a list of macro arguments
2805 gives undefined behavior. This implementation
2806 handles the directive as normal. */
2807 && pfile
->state
.parsing_args
!= 1)
2809 if (_cpp_handle_directive (pfile
, result
->flags
& PREV_WHITE
))
2811 if (pfile
->directive_result
.type
== CPP_PADDING
)
2813 result
= &pfile
->directive_result
;
2816 else if (pfile
->state
.in_deferred_pragma
)
2817 result
= &pfile
->directive_result
;
2818 else if (result
->type
== CPP_NAME
2819 && (result
->val
.node
.node
->flags
& NODE_MODULE
)
2820 && !pfile
->state
.skipping
2821 /* Unlike regular directives, we do not deal with
2822 tokenizing module directives as macro arguments.
2823 That's not permitted. */
2824 && !pfile
->state
.parsing_args
)
2826 /* P1857. Before macro expansion, At start of logical
2828 /* We don't have to consider lookaheads at this point. */
2829 gcc_checking_assert (!pfile
->lookaheads
);
2831 cpp_maybe_module_directive (pfile
, result
);
2834 if (pfile
->cb
.line_change
&& !pfile
->state
.skipping
)
2835 pfile
->cb
.line_change (pfile
, result
, pfile
->state
.parsing_args
);
2838 /* We don't skip tokens in directives. */
2839 if (pfile
->state
.in_directive
|| pfile
->state
.in_deferred_pragma
)
2842 /* Outside a directive, invalidate controlling macros. At file
2843 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
2844 get here and MI optimization works. */
2845 pfile
->mi_valid
= false;
2847 if (!pfile
->state
.skipping
|| result
->type
== CPP_EOF
)
2854 /* Returns true if a fresh line has been loaded. */
2856 _cpp_get_fresh_line (cpp_reader
*pfile
)
2858 /* We can't get a new line until we leave the current directive. */
2859 if (pfile
->state
.in_directive
)
2864 cpp_buffer
*buffer
= pfile
->buffer
;
2866 if (!buffer
->need_line
)
2869 if (buffer
->next_line
< buffer
->rlimit
)
2871 _cpp_clean_line (pfile
);
2875 /* First, get out of parsing arguments state. */
2876 if (pfile
->state
.parsing_args
)
2879 /* End of buffer. Non-empty files should end in a newline. */
2880 if (buffer
->buf
!= buffer
->rlimit
2881 && buffer
->next_line
> buffer
->rlimit
2882 && !buffer
->from_stage3
)
2884 /* Clip to buffer size. */
2885 buffer
->next_line
= buffer
->rlimit
;
2888 if (buffer
->prev
&& !buffer
->return_at_eof
)
2889 _cpp_pop_buffer (pfile
);
2892 /* End of translation. Do not pop the buffer yet. Increment
2893 line number so that the EOF token is on a line of its own
2894 (_cpp_lex_direct doesn't increment in that case, because
2895 it's hard for it to distinguish this special case). */
2896 CPP_INCREMENT_LINE (pfile
, 0);
2902 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
2905 result->type = ELSE_TYPE; \
2906 if (*buffer->cur == CHAR) \
2907 buffer->cur++, result->type = THEN_TYPE; \
2911 /* Lex a token into pfile->cur_token, which is also incremented, to
2912 get diagnostics pointing to the correct location.
2914 Does not handle issues such as token lookahead, multiple-include
2915 optimization, directives, skipping etc. This function is only
2916 suitable for use by _cpp_lex_token, and in special cases like
2917 lex_expansion_token which doesn't care for any of these issues.
2919 When meeting a newline, returns CPP_EOF if parsing a directive,
2920 otherwise returns to the start of the token buffer if permissible.
2921 Returns the location of the lexed token. */
2923 _cpp_lex_direct (cpp_reader
*pfile
)
2927 const unsigned char *comment_start
;
2928 bool fallthrough_comment
= false;
2929 cpp_token
*result
= pfile
->cur_token
++;
2933 buffer
= pfile
->buffer
;
2934 if (buffer
->need_line
)
2936 gcc_assert (!pfile
->state
.in_deferred_pragma
);
2937 if (!_cpp_get_fresh_line (pfile
))
2939 result
->type
= CPP_EOF
;
2940 /* Not a real EOF in a directive or arg parsing -- we refuse
2941 to advance to the next file now, and will once we're out
2943 if (!pfile
->state
.in_directive
&& !pfile
->state
.parsing_args
)
2945 /* Tell the compiler the line number of the EOF token. */
2946 result
->src_loc
= pfile
->line_table
->highest_line
;
2947 result
->flags
= BOL
;
2948 /* Now pop the buffer that _cpp_get_fresh_line did not. */
2949 _cpp_pop_buffer (pfile
);
2953 if (buffer
!= pfile
->buffer
)
2954 fallthrough_comment
= false;
2955 if (!pfile
->keep_tokens
)
2957 pfile
->cur_run
= &pfile
->base_run
;
2958 result
= pfile
->base_run
.base
;
2959 pfile
->cur_token
= result
+ 1;
2961 result
->flags
= BOL
;
2962 if (pfile
->state
.parsing_args
== 2)
2963 result
->flags
|= PREV_WHITE
;
2965 buffer
= pfile
->buffer
;
2967 result
->src_loc
= pfile
->line_table
->highest_line
;
2970 if (buffer
->cur
>= buffer
->notes
[buffer
->cur_note
].pos
2971 && !pfile
->overlaid_buffer
)
2973 _cpp_process_line_notes (pfile
, false);
2974 result
->src_loc
= pfile
->line_table
->highest_line
;
2978 if (pfile
->forced_token_location
)
2979 result
->src_loc
= pfile
->forced_token_location
;
2981 result
->src_loc
= linemap_position_for_column (pfile
->line_table
,
2982 CPP_BUF_COLUMN (buffer
, buffer
->cur
));
2986 case ' ': case '\t': case '\f': case '\v': case '\0':
2987 result
->flags
|= PREV_WHITE
;
2988 skip_whitespace (pfile
, c
);
2992 /* Increment the line, unless this is the last line ... */
2993 if (buffer
->cur
< buffer
->rlimit
2994 /* ... or this is a #include, (where _cpp_stack_file needs to
2995 unwind by one line) ... */
2996 || (pfile
->state
.in_directive
> 1
2997 /* ... except traditional-cpp increments this elsewhere. */
2998 && !CPP_OPTION (pfile
, traditional
)))
2999 CPP_INCREMENT_LINE (pfile
, 0);
3000 buffer
->need_line
= true;
3001 if (pfile
->state
.in_deferred_pragma
)
3003 /* Produce the PRAGMA_EOL on this line. File reading
3004 ensures there is always a \n at end of the buffer, thus
3005 in a deferred pragma we always see CPP_PRAGMA_EOL before
3007 result
->type
= CPP_PRAGMA_EOL
;
3008 result
->flags
&= ~PREV_WHITE
;
3009 pfile
->state
.in_deferred_pragma
= false;
3010 if (!pfile
->state
.pragma_allow_expansion
)
3011 pfile
->state
.prevent_expansion
--;
3016 case '0': case '1': case '2': case '3': case '4':
3017 case '5': case '6': case '7': case '8': case '9':
3019 struct normalize_state nst
= INITIAL_NORMALIZE_STATE
;
3020 result
->type
= CPP_NUMBER
;
3021 lex_number (pfile
, &result
->val
.str
, &nst
);
3022 warn_about_normalization (pfile
, result
, &nst
);
3030 /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
3031 wide strings or raw strings. */
3032 if (c
== 'L' || CPP_OPTION (pfile
, rliterals
)
3033 || (c
!= 'R' && CPP_OPTION (pfile
, uliterals
)))
3035 if ((*buffer
->cur
== '\'' && c
!= 'R')
3036 || *buffer
->cur
== '"'
3037 || (*buffer
->cur
== 'R'
3039 && buffer
->cur
[1] == '"'
3040 && CPP_OPTION (pfile
, rliterals
))
3041 || (*buffer
->cur
== '8'
3043 && ((buffer
->cur
[1] == '"' || (buffer
->cur
[1] == '\''
3044 && CPP_OPTION (pfile
, utf8_char_literals
)))
3045 || (buffer
->cur
[1] == 'R' && buffer
->cur
[2] == '"'
3046 && CPP_OPTION (pfile
, rliterals
)))))
3048 lex_string (pfile
, result
, buffer
->cur
- 1);
3055 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
3056 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
3057 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
3058 case 's': case 't': case 'v': case 'w': case 'x':
3060 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
3061 case 'G': case 'H': case 'I': case 'J': case 'K':
3062 case 'M': case 'N': case 'O': case 'P': case 'Q':
3063 case 'S': case 'T': case 'V': case 'W': case 'X':
3065 result
->type
= CPP_NAME
;
3067 struct normalize_state nst
= INITIAL_NORMALIZE_STATE
;
3068 result
->val
.node
.node
= lex_identifier (pfile
, buffer
->cur
- 1, false,
3070 &result
->val
.node
.spelling
);
3071 warn_about_normalization (pfile
, result
, &nst
);
3074 /* Convert named operators to their proper types. */
3075 if (result
->val
.node
.node
->flags
& NODE_OPERATOR
)
3077 result
->flags
|= NAMED_OP
;
3078 result
->type
= (enum cpp_ttype
) result
->val
.node
.node
->directive_index
;
3081 /* Signal FALLTHROUGH comment followed by another token. */
3082 if (fallthrough_comment
)
3083 result
->flags
|= PREV_FALLTHROUGH
;
3088 lex_string (pfile
, result
, buffer
->cur
- 1);
3092 /* A potential block or line comment. */
3093 comment_start
= buffer
->cur
;
3098 if (_cpp_skip_block_comment (pfile
))
3099 cpp_error (pfile
, CPP_DL_ERROR
, "unterminated comment");
3101 else if (c
== '/' && ! CPP_OPTION (pfile
, traditional
))
3103 /* Don't warn for system headers. */
3104 if (_cpp_in_system_header (pfile
))
3106 /* Warn about comments if pedantically GNUC89, and not
3107 in system headers. */
3108 else if (CPP_OPTION (pfile
, lang
) == CLK_GNUC89
3109 && CPP_PEDANTIC (pfile
)
3110 && ! buffer
->warned_cplusplus_comments
)
3112 if (cpp_error (pfile
, CPP_DL_PEDWARN
,
3113 "C++ style comments are not allowed in ISO C90"))
3114 cpp_error (pfile
, CPP_DL_NOTE
,
3115 "(this will be reported only once per input file)");
3116 buffer
->warned_cplusplus_comments
= 1;
3118 /* Or if specifically desired via -Wc90-c99-compat. */
3119 else if (CPP_OPTION (pfile
, cpp_warn_c90_c99_compat
) > 0
3120 && ! CPP_OPTION (pfile
, cplusplus
)
3121 && ! buffer
->warned_cplusplus_comments
)
3123 if (cpp_error (pfile
, CPP_DL_WARNING
,
3124 "C++ style comments are incompatible with C90"))
3125 cpp_error (pfile
, CPP_DL_NOTE
,
3126 "(this will be reported only once per input file)");
3127 buffer
->warned_cplusplus_comments
= 1;
3129 /* In C89/C94, C++ style comments are forbidden. */
3130 else if ((CPP_OPTION (pfile
, lang
) == CLK_STDC89
3131 || CPP_OPTION (pfile
, lang
) == CLK_STDC94
))
3133 /* But don't be confused about valid code such as
3134 - // immediately followed by *,
3135 - // in a preprocessing directive,
3136 - // in an #if 0 block. */
3137 if (buffer
->cur
[1] == '*'
3138 || pfile
->state
.in_directive
3139 || pfile
->state
.skipping
)
3141 result
->type
= CPP_DIV
;
3144 else if (! buffer
->warned_cplusplus_comments
)
3146 if (cpp_error (pfile
, CPP_DL_ERROR
,
3147 "C++ style comments are not allowed in "
3149 cpp_error (pfile
, CPP_DL_NOTE
,
3150 "(this will be reported only once per input "
3152 buffer
->warned_cplusplus_comments
= 1;
3155 if (skip_line_comment (pfile
) && CPP_OPTION (pfile
, warn_comments
))
3156 cpp_warning (pfile
, CPP_W_COMMENTS
, "multi-line comment");
3161 result
->type
= CPP_DIV_EQ
;
3166 result
->type
= CPP_DIV
;
3170 if (fallthrough_comment_p (pfile
, comment_start
))
3171 fallthrough_comment
= true;
3173 if (pfile
->cb
.comment
)
3175 size_t len
= pfile
->buffer
->cur
- comment_start
;
3176 pfile
->cb
.comment (pfile
, result
->src_loc
, comment_start
- 1,
3180 if (!pfile
->state
.save_comments
)
3182 result
->flags
|= PREV_WHITE
;
3183 goto update_tokens_line
;
3186 if (fallthrough_comment
)
3187 result
->flags
|= PREV_FALLTHROUGH
;
3189 /* Save the comment as a token in its own right. */
3190 save_comment (pfile
, result
, comment_start
, c
);
3194 if (pfile
->state
.angled_headers
)
3196 lex_string (pfile
, result
, buffer
->cur
- 1);
3197 if (result
->type
!= CPP_LESS
)
3201 result
->type
= CPP_LESS
;
3202 if (*buffer
->cur
== '=')
3204 buffer
->cur
++, result
->type
= CPP_LESS_EQ
;
3205 if (*buffer
->cur
== '>'
3206 && CPP_OPTION (pfile
, cplusplus
)
3207 && CPP_OPTION (pfile
, lang
) >= CLK_GNUCXX20
)
3208 buffer
->cur
++, result
->type
= CPP_SPACESHIP
;
3210 else if (*buffer
->cur
== '<')
3213 IF_NEXT_IS ('=', CPP_LSHIFT_EQ
, CPP_LSHIFT
);
3215 else if (CPP_OPTION (pfile
, digraphs
))
3217 if (*buffer
->cur
== ':')
3219 /* C++11 [2.5/3 lex.pptoken], "Otherwise, if the next
3220 three characters are <:: and the subsequent character
3221 is neither : nor >, the < is treated as a preprocessor
3222 token by itself". */
3223 if (CPP_OPTION (pfile
, cplusplus
)
3224 && CPP_OPTION (pfile
, lang
) != CLK_CXX98
3225 && CPP_OPTION (pfile
, lang
) != CLK_GNUCXX
3226 && buffer
->cur
[1] == ':'
3227 && buffer
->cur
[2] != ':' && buffer
->cur
[2] != '>')
3231 result
->flags
|= DIGRAPH
;
3232 result
->type
= CPP_OPEN_SQUARE
;
3234 else if (*buffer
->cur
== '%')
3237 result
->flags
|= DIGRAPH
;
3238 result
->type
= CPP_OPEN_BRACE
;
3244 result
->type
= CPP_GREATER
;
3245 if (*buffer
->cur
== '=')
3246 buffer
->cur
++, result
->type
= CPP_GREATER_EQ
;
3247 else if (*buffer
->cur
== '>')
3250 IF_NEXT_IS ('=', CPP_RSHIFT_EQ
, CPP_RSHIFT
);
3255 result
->type
= CPP_MOD
;
3256 if (*buffer
->cur
== '=')
3257 buffer
->cur
++, result
->type
= CPP_MOD_EQ
;
3258 else if (CPP_OPTION (pfile
, digraphs
))
3260 if (*buffer
->cur
== ':')
3263 result
->flags
|= DIGRAPH
;
3264 result
->type
= CPP_HASH
;
3265 if (*buffer
->cur
== '%' && buffer
->cur
[1] == ':')
3266 buffer
->cur
+= 2, result
->type
= CPP_PASTE
, result
->val
.token_no
= 0;
3268 else if (*buffer
->cur
== '>')
3271 result
->flags
|= DIGRAPH
;
3272 result
->type
= CPP_CLOSE_BRACE
;
3278 result
->type
= CPP_DOT
;
3279 if (ISDIGIT (*buffer
->cur
))
3281 struct normalize_state nst
= INITIAL_NORMALIZE_STATE
;
3282 result
->type
= CPP_NUMBER
;
3283 lex_number (pfile
, &result
->val
.str
, &nst
);
3284 warn_about_normalization (pfile
, result
, &nst
);
3286 else if (*buffer
->cur
== '.' && buffer
->cur
[1] == '.')
3287 buffer
->cur
+= 2, result
->type
= CPP_ELLIPSIS
;
3288 else if (*buffer
->cur
== '*' && CPP_OPTION (pfile
, cplusplus
))
3289 buffer
->cur
++, result
->type
= CPP_DOT_STAR
;
3293 result
->type
= CPP_PLUS
;
3294 if (*buffer
->cur
== '+')
3295 buffer
->cur
++, result
->type
= CPP_PLUS_PLUS
;
3296 else if (*buffer
->cur
== '=')
3297 buffer
->cur
++, result
->type
= CPP_PLUS_EQ
;
3301 result
->type
= CPP_MINUS
;
3302 if (*buffer
->cur
== '>')
3305 result
->type
= CPP_DEREF
;
3306 if (*buffer
->cur
== '*' && CPP_OPTION (pfile
, cplusplus
))
3307 buffer
->cur
++, result
->type
= CPP_DEREF_STAR
;
3309 else if (*buffer
->cur
== '-')
3310 buffer
->cur
++, result
->type
= CPP_MINUS_MINUS
;
3311 else if (*buffer
->cur
== '=')
3312 buffer
->cur
++, result
->type
= CPP_MINUS_EQ
;
3316 result
->type
= CPP_AND
;
3317 if (*buffer
->cur
== '&')
3318 buffer
->cur
++, result
->type
= CPP_AND_AND
;
3319 else if (*buffer
->cur
== '=')
3320 buffer
->cur
++, result
->type
= CPP_AND_EQ
;
3324 result
->type
= CPP_OR
;
3325 if (*buffer
->cur
== '|')
3326 buffer
->cur
++, result
->type
= CPP_OR_OR
;
3327 else if (*buffer
->cur
== '=')
3328 buffer
->cur
++, result
->type
= CPP_OR_EQ
;
3332 result
->type
= CPP_COLON
;
3333 if (*buffer
->cur
== ':' && CPP_OPTION (pfile
, scope
))
3334 buffer
->cur
++, result
->type
= CPP_SCOPE
;
3335 else if (*buffer
->cur
== '>' && CPP_OPTION (pfile
, digraphs
))
3338 result
->flags
|= DIGRAPH
;
3339 result
->type
= CPP_CLOSE_SQUARE
;
3343 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ
, CPP_MULT
); break;
3344 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ
, CPP_EQ
); break;
3345 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ
, CPP_NOT
); break;
3346 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ
, CPP_XOR
); break;
3347 case '#': IF_NEXT_IS ('#', CPP_PASTE
, CPP_HASH
); result
->val
.token_no
= 0; break;
3349 case '?': result
->type
= CPP_QUERY
; break;
3350 case '~': result
->type
= CPP_COMPL
; break;
3351 case ',': result
->type
= CPP_COMMA
; break;
3352 case '(': result
->type
= CPP_OPEN_PAREN
; break;
3353 case ')': result
->type
= CPP_CLOSE_PAREN
; break;
3354 case '[': result
->type
= CPP_OPEN_SQUARE
; break;
3355 case ']': result
->type
= CPP_CLOSE_SQUARE
; break;
3356 case '{': result
->type
= CPP_OPEN_BRACE
; break;
3357 case '}': result
->type
= CPP_CLOSE_BRACE
; break;
3358 case ';': result
->type
= CPP_SEMICOLON
; break;
3360 /* @ is a punctuator in Objective-C. */
3361 case '@': result
->type
= CPP_ATSIGN
; break;
3365 const uchar
*base
= --buffer
->cur
;
3367 /* Check for an extended identifier ($ or UCN or UTF-8). */
3368 struct normalize_state nst
= INITIAL_NORMALIZE_STATE
;
3369 if (forms_identifier_p (pfile
, true, &nst
))
3371 result
->type
= CPP_NAME
;
3372 result
->val
.node
.node
= lex_identifier (pfile
, base
, true, &nst
,
3373 &result
->val
.node
.spelling
);
3374 warn_about_normalization (pfile
, result
, &nst
);
3378 /* Otherwise this will form a CPP_OTHER token. Parse valid UTF-8 as a
3381 if (c
>= utf8_signifier
)
3383 const uchar
*pstr
= base
;
3385 if (_cpp_valid_utf8 (pfile
, &pstr
, buffer
->rlimit
, 0, NULL
, &s
))
3388 create_literal (pfile
, result
, base
, buffer
->cur
- base
, CPP_OTHER
);
3394 /* Potentially convert the location of the token to a range. */
3395 if (result
->src_loc
>= RESERVED_LOCATION_COUNT
3396 && result
->type
!= CPP_EOF
)
3398 /* Ensure that any line notes are processed, so that we have the
3399 correct physical line/column for the end-point of the token even
3400 when a logical line is split via one or more backslashes. */
3401 if (buffer
->cur
>= buffer
->notes
[buffer
->cur_note
].pos
3402 && !pfile
->overlaid_buffer
)
3403 _cpp_process_line_notes (pfile
, false);
3405 source_range tok_range
;
3406 tok_range
.m_start
= result
->src_loc
;
3408 = linemap_position_for_column (pfile
->line_table
,
3409 CPP_BUF_COLUMN (buffer
, buffer
->cur
));
3411 result
->src_loc
= COMBINE_LOCATION_DATA (pfile
->line_table
,
3419 /* An upper bound on the number of bytes needed to spell TOKEN.
3420 Does not include preceding whitespace. */
3422 cpp_token_len (const cpp_token
*token
)
3426 switch (TOKEN_SPELL (token
))
3428 default: len
= 6; break;
3429 case SPELL_LITERAL
: len
= token
->val
.str
.len
; break;
3430 case SPELL_IDENT
: len
= NODE_LEN (token
->val
.node
.node
) * 10; break;
3436 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
3437 Return the number of bytes read out of NAME. (There are always
3438 10 bytes written to BUFFER.) */
3441 utf8_to_ucn (unsigned char *buffer
, const unsigned char *name
)
3447 unsigned long utf32
;
3449 /* Compute the length of the UTF-8 sequence. */
3450 for (t
= *name
; t
& 0x80; t
<<= 1)
3453 utf32
= *name
& (0x7F >> ucn_len
);
3454 for (ucn_len_c
= 1; ucn_len_c
< ucn_len
; ucn_len_c
++)
3456 utf32
= (utf32
<< 6) | (*++name
& 0x3F);
3458 /* Ill-formed UTF-8. */
3459 if ((*name
& ~0x3F) != 0x80)
3465 for (j
= 7; j
>= 0; j
--)
3466 *buffer
++ = "0123456789abcdef"[(utf32
>> (4 * j
)) & 0xF];
3470 /* Given a token TYPE corresponding to a digraph, return a pointer to
3471 the spelling of the digraph. */
3472 static const unsigned char *
3473 cpp_digraph2name (enum cpp_ttype type
)
3475 return digraph_spellings
[(int) type
- (int) CPP_FIRST_DIGRAPH
];
3478 /* Write the spelling of an identifier IDENT, using UCNs, to BUFFER.
3479 The buffer must already contain the enough space to hold the
3480 token's spelling. Returns a pointer to the character after the
3481 last character written. */
3483 _cpp_spell_ident_ucns (unsigned char *buffer
, cpp_hashnode
*ident
)
3486 const unsigned char *name
= NODE_NAME (ident
);
3488 for (i
= 0; i
< NODE_LEN (ident
); i
++)
3489 if (name
[i
] & ~0x7F)
3491 i
+= utf8_to_ucn (buffer
, name
+ i
) - 1;
3495 *buffer
++ = name
[i
];
3500 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
3501 already contain the enough space to hold the token's spelling.
3502 Returns a pointer to the character after the last character written.
3503 FORSTRING is true if this is to be the spelling after translation
3504 phase 1 (with the original spelling of extended identifiers), false
3505 if extended identifiers should always be written using UCNs (there is
3506 no option for always writing them in the internal UTF-8 form).
3507 FIXME: Would be nice if we didn't need the PFILE argument. */
3509 cpp_spell_token (cpp_reader
*pfile
, const cpp_token
*token
,
3510 unsigned char *buffer
, bool forstring
)
3512 switch (TOKEN_SPELL (token
))
3514 case SPELL_OPERATOR
:
3516 const unsigned char *spelling
;
3519 if (token
->flags
& DIGRAPH
)
3520 spelling
= cpp_digraph2name (token
->type
);
3521 else if (token
->flags
& NAMED_OP
)
3524 spelling
= TOKEN_NAME (token
);
3526 while ((c
= *spelling
++) != '\0')
3535 memcpy (buffer
, NODE_NAME (token
->val
.node
.spelling
),
3536 NODE_LEN (token
->val
.node
.spelling
));
3537 buffer
+= NODE_LEN (token
->val
.node
.spelling
);
3540 buffer
= _cpp_spell_ident_ucns (buffer
, token
->val
.node
.node
);
3544 memcpy (buffer
, token
->val
.str
.text
, token
->val
.str
.len
);
3545 buffer
+= token
->val
.str
.len
;
3549 cpp_error (pfile
, CPP_DL_ICE
,
3550 "unspellable token %s", TOKEN_NAME (token
));
3557 /* Returns TOKEN spelt as a null-terminated string. The string is
3558 freed when the reader is destroyed. Useful for diagnostics. */
3560 cpp_token_as_text (cpp_reader
*pfile
, const cpp_token
*token
)
3562 unsigned int len
= cpp_token_len (token
) + 1;
3563 unsigned char *start
= _cpp_unaligned_alloc (pfile
, len
), *end
;
3565 end
= cpp_spell_token (pfile
, token
, start
, false);
3571 /* Returns a pointer to a string which spells the token defined by
3572 TYPE and FLAGS. Used by C front ends, which really should move to
3573 using cpp_token_as_text. */
3575 cpp_type2name (enum cpp_ttype type
, unsigned char flags
)
3577 if (flags
& DIGRAPH
)
3578 return (const char *) cpp_digraph2name (type
);
3579 else if (flags
& NAMED_OP
)
3580 return cpp_named_operator2name (type
);
3582 return (const char *) token_spellings
[type
].name
;
3585 /* Writes the spelling of token to FP, without any preceding space.
3586 Separated from cpp_spell_token for efficiency - to avoid stdio
3587 double-buffering. */
3589 cpp_output_token (const cpp_token
*token
, FILE *fp
)
3591 switch (TOKEN_SPELL (token
))
3593 case SPELL_OPERATOR
:
3595 const unsigned char *spelling
;
3598 if (token
->flags
& DIGRAPH
)
3599 spelling
= cpp_digraph2name (token
->type
);
3600 else if (token
->flags
& NAMED_OP
)
3603 spelling
= TOKEN_NAME (token
);
3608 while ((c
= *++spelling
) != '\0');
3616 const unsigned char * name
= NODE_NAME (token
->val
.node
.node
);
3618 for (i
= 0; i
< NODE_LEN (token
->val
.node
.node
); i
++)
3619 if (name
[i
] & ~0x7F)
3621 unsigned char buffer
[10];
3622 i
+= utf8_to_ucn (buffer
, name
+ i
) - 1;
3623 fwrite (buffer
, 1, 10, fp
);
3626 fputc (NODE_NAME (token
->val
.node
.node
)[i
], fp
);
3631 if (token
->type
== CPP_HEADER_NAME
)
3633 fwrite (token
->val
.str
.text
, 1, token
->val
.str
.len
, fp
);
3634 if (token
->type
== CPP_HEADER_NAME
)
3639 /* An error, most probably. */
3644 /* Compare two tokens. */
3646 _cpp_equiv_tokens (const cpp_token
*a
, const cpp_token
*b
)
3648 if (a
->type
== b
->type
&& a
->flags
== b
->flags
)
3649 switch (TOKEN_SPELL (a
))
3651 default: /* Keep compiler happy. */
3652 case SPELL_OPERATOR
:
3653 /* token_no is used to track where multiple consecutive ##
3654 tokens were originally located. */
3655 return (a
->type
!= CPP_PASTE
|| a
->val
.token_no
== b
->val
.token_no
);
3657 return (a
->type
!= CPP_MACRO_ARG
3658 || (a
->val
.macro_arg
.arg_no
== b
->val
.macro_arg
.arg_no
3659 && a
->val
.macro_arg
.spelling
== b
->val
.macro_arg
.spelling
));
3661 return (a
->val
.node
.node
== b
->val
.node
.node
3662 && a
->val
.node
.spelling
== b
->val
.node
.spelling
);
3664 return (a
->val
.str
.len
== b
->val
.str
.len
3665 && !memcmp (a
->val
.str
.text
, b
->val
.str
.text
,
3672 /* Returns nonzero if a space should be inserted to avoid an
3673 accidental token paste for output. For simplicity, it is
3674 conservative, and occasionally advises a space where one is not
3675 needed, e.g. "." and ".2". */
3677 cpp_avoid_paste (cpp_reader
*pfile
, const cpp_token
*token1
,
3678 const cpp_token
*token2
)
3680 enum cpp_ttype a
= token1
->type
, b
= token2
->type
;
3683 if (token1
->flags
& NAMED_OP
)
3685 if (token2
->flags
& NAMED_OP
)
3689 if (token2
->flags
& DIGRAPH
)
3690 c
= digraph_spellings
[(int) b
- (int) CPP_FIRST_DIGRAPH
][0];
3691 else if (token_spellings
[b
].category
== SPELL_OPERATOR
)
3692 c
= token_spellings
[b
].name
[0];
3694 /* Quickly get everything that can paste with an '='. */
3695 if ((int) a
<= (int) CPP_LAST_EQ
&& c
== '=')
3700 case CPP_GREATER
: return c
== '>';
3701 case CPP_LESS
: return c
== '<' || c
== '%' || c
== ':';
3702 case CPP_PLUS
: return c
== '+';
3703 case CPP_MINUS
: return c
== '-' || c
== '>';
3704 case CPP_DIV
: return c
== '/' || c
== '*'; /* Comments. */
3705 case CPP_MOD
: return c
== ':' || c
== '>';
3706 case CPP_AND
: return c
== '&';
3707 case CPP_OR
: return c
== '|';
3708 case CPP_COLON
: return c
== ':' || c
== '>';
3709 case CPP_DEREF
: return c
== '*';
3710 case CPP_DOT
: return c
== '.' || c
== '%' || b
== CPP_NUMBER
;
3711 case CPP_HASH
: return c
== '#' || c
== '%'; /* Digraph form. */
3712 case CPP_NAME
: return ((b
== CPP_NUMBER
3713 && name_p (pfile
, &token2
->val
.str
))
3715 || b
== CPP_CHAR
|| b
== CPP_STRING
); /* L */
3716 case CPP_NUMBER
: return (b
== CPP_NUMBER
|| b
== CPP_NAME
3717 || c
== '.' || c
== '+' || c
== '-');
3719 case CPP_OTHER
: return ((token1
->val
.str
.text
[0] == '\\'
3721 || (CPP_OPTION (pfile
, objc
)
3722 && token1
->val
.str
.text
[0] == '@'
3723 && (b
== CPP_NAME
|| b
== CPP_STRING
)));
3724 case CPP_LESS_EQ
: return c
== '>';
3727 case CPP_UTF8STRING
:
3729 case CPP_STRING32
: return (CPP_OPTION (pfile
, user_literals
)
3731 || (TOKEN_SPELL (token2
) == SPELL_LITERAL
3732 && ISIDST (token2
->val
.str
.text
[0]))));
3740 /* Output all the remaining tokens on the current line, and a newline
3741 character, to FP. Leading whitespace is removed. If there are
3742 macros, special token padding is not performed. */
3744 cpp_output_line (cpp_reader
*pfile
, FILE *fp
)
3746 const cpp_token
*token
;
3748 token
= cpp_get_token (pfile
);
3749 while (token
->type
!= CPP_EOF
)
3751 cpp_output_token (token
, fp
);
3752 token
= cpp_get_token (pfile
);
3753 if (token
->flags
& PREV_WHITE
)
3760 /* Return a string representation of all the remaining tokens on the
3761 current line. The result is allocated using xmalloc and must be
3762 freed by the caller. */
3764 cpp_output_line_to_string (cpp_reader
*pfile
, const unsigned char *dir_name
)
3766 const cpp_token
*token
;
3767 unsigned int out
= dir_name
? ustrlen (dir_name
) : 0;
3768 unsigned int alloced
= 120 + out
;
3769 unsigned char *result
= (unsigned char *) xmalloc (alloced
);
3771 /* If DIR_NAME is empty, there are no initial contents. */
3774 sprintf ((char *) result
, "#%s ", dir_name
);
3778 token
= cpp_get_token (pfile
);
3779 while (token
->type
!= CPP_EOF
)
3781 unsigned char *last
;
3782 /* Include room for a possible space and the terminating nul. */
3783 unsigned int len
= cpp_token_len (token
) + 2;
3785 if (out
+ len
> alloced
)
3788 if (out
+ len
> alloced
)
3789 alloced
= out
+ len
;
3790 result
= (unsigned char *) xrealloc (result
, alloced
);
3793 last
= cpp_spell_token (pfile
, token
, &result
[out
], 0);
3794 out
= last
- result
;
3796 token
= cpp_get_token (pfile
);
3797 if (token
->flags
& PREV_WHITE
)
3798 result
[out
++] = ' ';
3805 /* Memory buffers. Changing these three constants can have a dramatic
3806 effect on performance. The values here are reasonable defaults,
3807 but might be tuned. If you adjust them, be sure to test across a
3808 range of uses of cpplib, including heavy nested function-like macro
3809 expansion. Also check the change in peak memory usage (NJAMD is a
3810 good tool for this). */
3811 #define MIN_BUFF_SIZE 8000
3812 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
3813 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
3814 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
3816 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
3817 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
3820 /* Create a new allocation buffer. Place the control block at the end
3821 of the buffer, so that buffer overflows will cause immediate chaos. */
3823 new_buff (size_t len
)
3826 unsigned char *base
;
3828 if (len
< MIN_BUFF_SIZE
)
3829 len
= MIN_BUFF_SIZE
;
3830 len
= CPP_ALIGN (len
);
3832 #ifdef ENABLE_VALGRIND_ANNOTATIONS
3833 /* Valgrind warns about uses of interior pointers, so put _cpp_buff
3835 size_t slen
= CPP_ALIGN2 (sizeof (_cpp_buff
), 2 * DEFAULT_ALIGNMENT
);
3836 base
= XNEWVEC (unsigned char, len
+ slen
);
3837 result
= (_cpp_buff
*) base
;
3840 base
= XNEWVEC (unsigned char, len
+ sizeof (_cpp_buff
));
3841 result
= (_cpp_buff
*) (base
+ len
);
3843 result
->base
= base
;
3845 result
->limit
= base
+ len
;
3846 result
->next
= NULL
;
3850 /* Place a chain of unwanted allocation buffers on the free list. */
3852 _cpp_release_buff (cpp_reader
*pfile
, _cpp_buff
*buff
)
3854 _cpp_buff
*end
= buff
;
3858 end
->next
= pfile
->free_buffs
;
3859 pfile
->free_buffs
= buff
;
3862 /* Return a free buffer of size at least MIN_SIZE. */
3864 _cpp_get_buff (cpp_reader
*pfile
, size_t min_size
)
3866 _cpp_buff
*result
, **p
;
3868 for (p
= &pfile
->free_buffs
;; p
= &(*p
)->next
)
3873 return new_buff (min_size
);
3875 size
= result
->limit
- result
->base
;
3876 /* Return a buffer that's big enough, but don't waste one that's
3878 if (size
>= min_size
&& size
<= BUFF_SIZE_UPPER_BOUND (min_size
))
3883 result
->next
= NULL
;
3884 result
->cur
= result
->base
;
3888 /* Creates a new buffer with enough space to hold the uncommitted
3889 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
3890 the excess bytes to the new buffer. Chains the new buffer after
3891 BUFF, and returns the new buffer. */
3893 _cpp_append_extend_buff (cpp_reader
*pfile
, _cpp_buff
*buff
, size_t min_extra
)
3895 size_t size
= EXTENDED_BUFF_SIZE (buff
, min_extra
);
3896 _cpp_buff
*new_buff
= _cpp_get_buff (pfile
, size
);
3898 buff
->next
= new_buff
;
3899 memcpy (new_buff
->base
, buff
->cur
, BUFF_ROOM (buff
));
3903 /* Creates a new buffer with enough space to hold the uncommitted
3904 remaining bytes of the buffer pointed to by BUFF, and at least
3905 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
3906 Chains the new buffer before the buffer pointed to by BUFF, and
3907 updates the pointer to point to the new buffer. */
3909 _cpp_extend_buff (cpp_reader
*pfile
, _cpp_buff
**pbuff
, size_t min_extra
)
3911 _cpp_buff
*new_buff
, *old_buff
= *pbuff
;
3912 size_t size
= EXTENDED_BUFF_SIZE (old_buff
, min_extra
);
3914 new_buff
= _cpp_get_buff (pfile
, size
);
3915 memcpy (new_buff
->base
, old_buff
->cur
, BUFF_ROOM (old_buff
));
3916 new_buff
->next
= old_buff
;
3920 /* Free a chain of buffers starting at BUFF. */
3922 _cpp_free_buff (_cpp_buff
*buff
)
3926 for (; buff
; buff
= next
)
3929 #ifdef ENABLE_VALGRIND_ANNOTATIONS
3937 /* Allocate permanent, unaligned storage of length LEN. */
3939 _cpp_unaligned_alloc (cpp_reader
*pfile
, size_t len
)
3941 _cpp_buff
*buff
= pfile
->u_buff
;
3942 unsigned char *result
= buff
->cur
;
3944 if (len
> (size_t) (buff
->limit
- result
))
3946 buff
= _cpp_get_buff (pfile
, len
);
3947 buff
->next
= pfile
->u_buff
;
3948 pfile
->u_buff
= buff
;
3952 buff
->cur
= result
+ len
;
3956 /* Allocate permanent, unaligned storage of length LEN from a_buff.
3957 That buffer is used for growing allocations when saving macro
3958 replacement lists in a #define, and when parsing an answer to an
3959 assertion in #assert, #unassert or #if (and therefore possibly
3960 whilst expanding macros). It therefore must not be used by any
3961 code that they might call: specifically the lexer and the guts of
3964 All existing other uses clearly fit this restriction: storing
3965 registered pragmas during initialization. */
3967 _cpp_aligned_alloc (cpp_reader
*pfile
, size_t len
)
3969 _cpp_buff
*buff
= pfile
->a_buff
;
3970 unsigned char *result
= buff
->cur
;
3972 if (len
> (size_t) (buff
->limit
- result
))
3974 buff
= _cpp_get_buff (pfile
, len
);
3975 buff
->next
= pfile
->a_buff
;
3976 pfile
->a_buff
= buff
;
3980 buff
->cur
= result
+ len
;
3984 /* Commit or allocate storage from a buffer. */
3987 _cpp_commit_buff (cpp_reader
*pfile
, size_t size
)
3989 void *ptr
= BUFF_FRONT (pfile
->a_buff
);
3991 if (pfile
->hash_table
->alloc_subobject
)
3993 void *copy
= pfile
->hash_table
->alloc_subobject (size
);
3994 memcpy (copy
, ptr
, size
);
3998 BUFF_FRONT (pfile
->a_buff
) += size
;
4003 /* Say which field of TOK is in use. */
4005 enum cpp_token_fld_kind
4006 cpp_token_val_index (const cpp_token
*tok
)
4008 switch (TOKEN_SPELL (tok
))
4011 return CPP_TOKEN_FLD_NODE
;
4013 return CPP_TOKEN_FLD_STR
;
4014 case SPELL_OPERATOR
:
4015 /* Operands which were originally spelled as ident keep around
4016 the node for the exact spelling. */
4017 if (tok
->flags
& NAMED_OP
)
4018 return CPP_TOKEN_FLD_NODE
;
4019 else if (tok
->type
== CPP_PASTE
)
4020 return CPP_TOKEN_FLD_TOKEN_NO
;
4022 return CPP_TOKEN_FLD_NONE
;
4024 if (tok
->type
== CPP_MACRO_ARG
)
4025 return CPP_TOKEN_FLD_ARG_NO
;
4026 else if (tok
->type
== CPP_PADDING
)
4027 return CPP_TOKEN_FLD_SOURCE
;
4028 else if (tok
->type
== CPP_PRAGMA
)
4029 return CPP_TOKEN_FLD_PRAGMA
;
4032 return CPP_TOKEN_FLD_NONE
;
4036 /* All tokens lexed in R after calling this function will be forced to
4037 have their location_t to be P, until
4038 cpp_stop_forcing_token_locations is called for R. */
4041 cpp_force_token_locations (cpp_reader
*r
, location_t loc
)
4043 r
->forced_token_location
= loc
;
4046 /* Go back to assigning locations naturally for lexed tokens. */
4049 cpp_stop_forcing_token_locations (cpp_reader
*r
)
4051 r
->forced_token_location
= 0;
4054 /* We're looking at \, if it's escaping EOL, look past it. If at
4055 LIMIT, don't advance. */
4057 static const unsigned char *
4058 do_peek_backslash (const unsigned char *peek
, const unsigned char *limit
)
4060 const unsigned char *probe
= peek
;
4062 if (__builtin_expect (peek
[1] == '\n', true))
4066 if (__builtin_expect (probe
< limit
, true))
4070 /* The user might be perverse. */
4071 return do_peek_backslash (peek
, limit
);
4074 else if (__builtin_expect (peek
[1] == '\r', false))
4076 if (probe
[2] == '\n')
4084 static const unsigned char *
4085 do_peek_next (const unsigned char *peek
, const unsigned char *limit
)
4087 if (__builtin_expect (*peek
== '\\', false))
4088 peek
= do_peek_backslash (peek
, limit
);
4092 static const unsigned char *
4093 do_peek_prev (const unsigned char *peek
, const unsigned char *bound
)
4098 unsigned char c
= *--peek
;
4099 if (__builtin_expect (c
== '\n', false)
4100 || __builtin_expect (c
== 'r', false))
4105 if (c
== '\n' && peek
[ix
] == '\r')
4107 if (peek
+ ix
== bound
)
4112 if (peek
[ix
] == '\\')
4113 return do_peek_prev (peek
+ ix
, bound
);
4121 /* If PEEK[-1] is identifier MATCH, scan past it and trailing white
4122 space. Otherwise return NULL. */
4124 static const unsigned char *
4125 do_peek_ident (const char *match
, const unsigned char *peek
,
4126 const unsigned char *limit
)
4128 for (; *++match
; peek
++)
4129 if (*peek
!= *match
)
4131 peek
= do_peek_next (peek
, limit
);
4132 if (*peek
!= *match
)
4136 /* Must now not be looking at an identifier char. */
4137 peek
= do_peek_next (peek
, limit
);
4138 if (ISIDNUM (*peek
))
4141 /* Skip control-line whitespace. */
4143 while (*peek
== ' ' || *peek
== '\t')
4145 if (__builtin_expect (*peek
== '\\', false))
4147 peek
= do_peek_backslash (peek
, limit
);
4155 /* Are we looking at a module control line starting as PEEK - 1? */
4158 do_peek_module (cpp_reader
*pfile
, unsigned char c
,
4159 const unsigned char *peek
, const unsigned char *limit
)
4161 bool import
= false;
4163 if (__builtin_expect (c
== 'e', false))
4165 if (!((peek
[0] == 'x' || peek
[0] == '\\')
4166 && (peek
= do_peek_ident ("export", peek
, limit
))))
4169 /* export, peek for import or module. No need to peek __import
4173 if (!((peek
[1] == 'm' || peek
[1] == '\\')
4174 && (peek
= do_peek_ident ("import", peek
+ 1, limit
))))
4178 else if (peek
[0] == 'm')
4180 if (!((peek
[1] == 'o' || peek
[1] == '\\')
4181 && (peek
= do_peek_ident ("module", peek
+ 1, limit
))))
4187 else if (__builtin_expect (c
== 'i', false))
4189 if (!((peek
[0] == 'm' || peek
[0] == '\\')
4190 && (peek
= do_peek_ident ("import", peek
, limit
))))
4194 else if (__builtin_expect (c
== '_', false))
4196 /* Needed for translated includes. */
4197 if (!((peek
[0] == '_' || peek
[0] == '\\')
4198 && (peek
= do_peek_ident ("__import", peek
, limit
))))
4202 else if (__builtin_expect (c
== 'm', false))
4204 if (!((peek
[0] == 'o' || peek
[0] == '\\')
4205 && (peek
= do_peek_ident ("module", peek
, limit
))))
4211 /* Peek the next character to see if it's good enough. We'll be at
4212 the first non-whitespace char, including skipping an escaped
4214 /* ... import followed by identifier, ':', '<' or header-name
4215 preprocessing tokens, or module followed by identifier, ':' or
4216 ';' preprocessing tokens. */
4217 unsigned char p
= *peek
++;
4219 /* A character literal is ... single quotes, ... optionally preceded
4220 by u8, u, U, or L */
4221 /* A string-literal is a ... double quotes, optionally prefixed by
4222 R, u8, u8R, u, uR, U, UR, L, or LR */
4225 peek
= do_peek_next (peek
, limit
);
4233 else if (p
== 'U' || p
== 'L')
4236 peek
= do_peek_next (peek
, limit
);
4238 if (*peek
== '\"' || *peek
== '\'')
4243 /* Identifier. Ok. */
4248 if (CPP_OPTION (pfile
, rliterals
))
4250 peek
= do_peek_next (peek
, limit
);
4254 /* Identifier. Ok. */
4256 else if ('Z' - 'A' == 25
4257 ? ((p
>= 'A' && p
<= 'Z') || (p
>= 'a' && p
<= 'z') || p
== '_')
4260 /* Identifier. Ok. */
4264 /* Maybe angle header, ok for import. Reject
4265 '<=', '<<' digraph:'<:'. */
4268 peek
= do_peek_next (peek
, limit
);
4269 if (*peek
== '=' || *peek
== '<'
4270 || (*peek
== ':' && CPP_OPTION (pfile
, digraphs
)))
4275 /* SEMICOLON, ok for module. */
4281 /* STRING, ok for import. */
4287 /* Maybe COLON, ok. Reject '::', digraph:':>'. */
4288 peek
= do_peek_next (peek
, limit
);
4289 if (*peek
== ':' || (*peek
== '>' && CPP_OPTION (pfile
, digraphs
)))
4293 /* FIXME: Detect a unicode character, excluding those not
4294 permitted as the initial character. [lex.name]/1. I presume
4295 we need to check the \[uU] spellings, and directly using
4296 Unicode in say UTF8 form? Or perhaps we do the phase-1
4297 conversion of UTF8 to universal-character-names? */
4303 /* Directives-only scanning. Somewhat more relaxed than correct
4304 parsing -- some ill-formed programs will not be rejected. */
4307 cpp_directive_only_process (cpp_reader
*pfile
,
4309 void (*cb
) (cpp_reader
*, CPP_DO_task
, void *, ...))
4311 bool module_p
= CPP_OPTION (pfile
, module_directives
);
4316 /* Buffer initialization, but no line cleaning. */
4317 cpp_buffer
*buffer
= pfile
->buffer
;
4318 buffer
->cur_note
= buffer
->notes_used
= 0;
4319 buffer
->cur
= buffer
->line_base
= buffer
->next_line
;
4320 buffer
->need_line
= false;
4321 /* Files always end in a newline or carriage return. We rely on this for
4322 character peeking safety. */
4323 gcc_assert (buffer
->rlimit
[0] == '\n' || buffer
->rlimit
[0] == '\r');
4325 const unsigned char *base
= buffer
->cur
;
4326 unsigned line_count
= 0;
4327 const unsigned char *line_start
= base
;
4332 const unsigned char *lwm
= base
;
4333 for (const unsigned char *pos
= base
, *limit
= buffer
->rlimit
;
4336 unsigned char c
= *pos
++;
4337 /* This matches the switch in _cpp_lex_direct. */
4340 case ' ': case '\t': case '\f': case '\v':
4341 /* Whitespace, do nothing. */
4344 case '\r': /* MAC line ending, or Windows \r\n */
4353 CPP_INCREMENT_LINE (pfile
, 0);
4359 /* <backslash><newline> is removed, and doesn't undo any
4360 preceeding escape or whatnot. */
4366 else if (*pos
== '\r')
4378 /* Line directive. */
4379 if (pos
- 1 > base
&& !pfile
->state
.skipping
)
4380 cb (pfile
, CPP_DO_print
, data
,
4381 line_count
, base
, pos
- 1 - base
);
4383 /* Prep things for directive handling. */
4384 buffer
->next_line
= pos
;
4385 buffer
->need_line
= true;
4386 bool ok
= _cpp_get_fresh_line (pfile
);
4387 gcc_checking_assert (ok
);
4389 /* Ensure proper column numbering for generated
4391 buffer
->line_base
-= pos
- line_start
;
4393 _cpp_handle_directive (pfile
, line_start
+ 1 != pos
);
4395 /* Sanitize the line settings. Duplicate #include's can
4397 // FIXME: Necessary?
4398 pfile
->line_table
->highest_location
4399 = pfile
->line_table
->highest_line
;
4401 if (!pfile
->state
.skipping
4402 && pfile
->buffer
->next_line
< pfile
->buffer
->rlimit
)
4403 cb (pfile
, CPP_DO_location
, data
,
4404 pfile
->line_table
->highest_line
);
4412 const unsigned char *peek
= do_peek_next (pos
, limit
);
4413 if (!(*peek
== '/' || *peek
== '*'))
4416 /* Line or block comment */
4417 bool is_block
= *peek
== '*';
4421 = linemap_position_for_column (pfile
->line_table
,
4440 CPP_INCREMENT_LINE (pfile
, 0);
4443 if (!esc
&& !is_block
)
4455 if (pos
> peek
&& !esc
)
4471 cpp_error_with_line (pfile
, CPP_DL_ERROR
, sloc
, 0,
4472 "unterminated comment");
4479 if (!CPP_OPTION (pfile
, digit_separators
))
4480 goto delimited_string
;
4482 /* Possibly a number punctuator. */
4483 if (!ISIDNUM (*do_peek_next (pos
, limit
)))
4484 goto delimited_string
;
4489 if (!CPP_OPTION (pfile
, rliterals
))
4490 goto delimited_string
;
4494 /* For ' see if it's a number punctuator
4495 \.?<digit>(<digit>|<identifier-nondigit>
4496 |'<digit>|'<nondigit>|[eEpP]<sign>|\.)* */
4497 /* For " see if it's a raw string
4498 {U,L,u,u8}R. This includes CPP_NUMBER detection,
4499 because that could be 0e+R. */
4500 const unsigned char *peek
= pos
- 1;
4501 bool quote_first
= c
== '"';
4502 bool quote_eight
= false;
4503 bool maybe_number_start
= false;
4504 bool want_number
= false;
4506 while ((peek
= do_peek_prev (peek
, lwm
)))
4508 unsigned char p
= *peek
;
4519 quote_first
= false;
4520 if (p
== 'L' || p
== 'U' || p
== 'u')
4527 else if (quote_eight
)
4534 quote_eight
= false;
4539 if (!want_number
&& ISIDNUM (p
))
4547 maybe_number_start
= true;
4550 else if (ISIDNUM (p
))
4551 maybe_number_start
= false;
4552 else if (p
== '+' || p
== '-')
4554 if (const unsigned char *peek_prev
4555 = do_peek_prev (peek
, lwm
))
4558 if (p
== 'e' || p
== 'E'
4559 || p
== 'p' || p
== 'P')
4562 maybe_number_start
= false;
4570 else if (p
== '\'' || p
== '\"')
4572 /* If this is lwm, this must be the end of a
4573 previous string. So this is a trailing
4574 literal type, (a) if those are allowed,
4575 and (b) maybe_start is false. Otherwise
4576 this must be a CPP_NUMBER because we've
4577 met another ', and we'd have checked that
4578 in its own right. */
4579 if (peek
== lwm
&& CPP_OPTION (pfile
, uliterals
))
4581 if (!maybe_number_start
&& !want_number
)
4582 /* Must be a literal type. */
4586 && CPP_OPTION (pfile
, digit_separators
))
4587 maybe_number_start
= true;
4592 else if (!quote_first
&& !quote_eight
)
4596 if (maybe_number_start
)
4604 goto delimited_string
;
4609 /* (Possibly raw) string or char literal. */
4610 unsigned char end
= c
;
4612 const unsigned char *delim
= NULL
;
4613 location_t sloc
= linemap_position_for_column (pfile
->line_table
,
4619 /* There can be no line breaks in the delimiter. */
4621 for (delim_len
= 0; (c
= *pos
++) != '('; delim_len
++)
4623 if (delim_len
== 16)
4625 cpp_error_with_line (pfile
, CPP_DL_ERROR
,
4627 "raw string delimiter"
4635 if (strchr (") \\\t\v\f\n", c
))
4637 cpp_error_with_line (pfile
, CPP_DL_ERROR
,
4639 "invalid character '%c'"
4668 CPP_INCREMENT_LINE (pfile
, 0);
4678 && pos
+ delim_len
+ 1 < limit
4679 && pos
[delim_len
] == end
4680 && !memcmp (delim
, pos
, delim_len
))
4682 pos
+= delim_len
+ 1;
4689 if (!raw
&& !(esc
& 1) && c
== end
)
4696 cpp_error_with_line (pfile
, CPP_DL_ERROR
, sloc
, 0,
4697 "unterminated literal");
4709 if (bol
&& module_p
&& !pfile
->state
.skipping
4710 && do_peek_module (pfile
, c
, pos
, limit
))
4712 /* We've seen the start of a module control line.
4713 Start up the tokenizer. */
4714 pos
--; /* Backup over the first character. */
4716 /* Backup over whitespace to start of line. */
4717 while (pos
> line_start
4718 && (pos
[-1] == ' ' || pos
[-1] == '\t'))
4722 cb (pfile
, CPP_DO_print
, data
, line_count
, base
, pos
- base
);
4724 /* Prep things for directive handling. */
4725 buffer
->next_line
= pos
;
4726 buffer
->need_line
= true;
4728 /* Now get tokens until the PRAGMA_EOL. */
4731 location_t spelling
;
4732 const cpp_token
*tok
4733 = cpp_get_token_with_location (pfile
, &spelling
);
4735 gcc_assert (pfile
->state
.in_deferred_pragma
4736 || tok
->type
== CPP_PRAGMA_EOL
);
4737 cb (pfile
, CPP_DO_token
, data
, tok
, spelling
);
4739 while (pfile
->state
.in_deferred_pragma
);
4741 if (pfile
->buffer
->next_line
< pfile
->buffer
->rlimit
)
4742 cb (pfile
, CPP_DO_location
, data
,
4743 pfile
->line_table
->highest_line
);
4745 pfile
->mi_valid
= false;
4753 pfile
->mi_valid
= false;
4758 if (buffer
->rlimit
> base
&& !pfile
->state
.skipping
)
4759 cb (pfile
, CPP_DO_print
, data
, line_count
, base
, buffer
->rlimit
- base
);
4761 _cpp_pop_buffer (pfile
);
4763 while (pfile
->buffer
);