1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000-2014 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
37 enum spell_type category
;
38 const unsigned char *name
;
41 static const unsigned char *const digraph_spellings
[] =
42 { UC
"%:", UC
"%:%:", UC
"<:", UC
":>", UC
"<%", UC
"%>" };
44 #define OP(e, s) { SPELL_OPERATOR, UC s },
45 #define TK(e, s) { SPELL_ ## s, UC #e },
46 static const struct token_spelling token_spellings
[N_TTYPES
] = { TTYPE_TABLE
};
50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
53 static void add_line_note (cpp_buffer
*, const uchar
*, unsigned int);
54 static int skip_line_comment (cpp_reader
*);
55 static void skip_whitespace (cpp_reader
*, cppchar_t
);
56 static void lex_string (cpp_reader
*, cpp_token
*, const uchar
*);
57 static void save_comment (cpp_reader
*, cpp_token
*, const uchar
*, cppchar_t
);
58 static void store_comment (cpp_reader
*, cpp_token
*);
59 static void create_literal (cpp_reader
*, cpp_token
*, const uchar
*,
60 unsigned int, enum cpp_ttype
);
61 static bool warn_in_comment (cpp_reader
*, _cpp_line_note
*);
62 static int name_p (cpp_reader
*, const cpp_string
*);
63 static tokenrun
*next_tokenrun (tokenrun
*);
65 static _cpp_buff
*new_buff (size_t);
70 Compares, the token TOKEN to the NUL-terminated string STRING.
71 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
73 cpp_ideq (const cpp_token
*token
, const char *string
)
75 if (token
->type
!= CPP_NAME
)
78 return !ustrcmp (NODE_NAME (token
->val
.node
.node
), (const uchar
*) string
);
81 /* Record a note TYPE at byte POS into the current cleaned logical
84 add_line_note (cpp_buffer
*buffer
, const uchar
*pos
, unsigned int type
)
86 if (buffer
->notes_used
== buffer
->notes_cap
)
88 buffer
->notes_cap
= buffer
->notes_cap
* 2 + 200;
89 buffer
->notes
= XRESIZEVEC (_cpp_line_note
, buffer
->notes
,
93 buffer
->notes
[buffer
->notes_used
].pos
= pos
;
94 buffer
->notes
[buffer
->notes_used
].type
= type
;
99 /* Fast path to find line special characters using optimized character
100 scanning algorithms. Anything complicated falls back to the slow
101 path below. Since this loop is very hot it's worth doing these kinds
104 One of the paths through the ifdefs should provide
106 const uchar *search_line_fast (const uchar *s, const uchar *end);
108 Between S and END, search for \n, \r, \\, ?. Return a pointer to
111 Note that the last character of the buffer is *always* a newline,
112 as forced by _cpp_convert_input. This fact can be used to avoid
113 explicitly looking for the end of the buffer. */
115 /* Configure gives us an ifdef test. */
116 #ifndef WORDS_BIGENDIAN
117 #define WORDS_BIGENDIAN 0
120 /* We'd like the largest integer that fits into a register. There's nothing
121 in <stdint.h> that gives us that. For most hosts this is unsigned long,
122 but MS decided on an LLP64 model. Thankfully when building with GCC we
123 can get the "real" word size. */
125 typedef unsigned int word_type
__attribute__((__mode__(__word__
)));
127 typedef unsigned long word_type
;
130 /* The code below is only expecting sizes 4 or 8.
131 Die at compile-time if this expectation is violated. */
132 typedef char check_word_type_size
133 [(sizeof(word_type
) == 8 || sizeof(word_type
) == 4) * 2 - 1];
135 /* Return X with the first N bytes forced to values that won't match one
136 of the interesting characters. Note that NUL is not interesting. */
138 static inline word_type
139 acc_char_mask_misalign (word_type val
, unsigned int n
)
149 /* Return X replicated to all byte positions within WORD_TYPE. */
151 static inline word_type
152 acc_char_replicate (uchar x
)
156 ret
= (x
<< 24) | (x
<< 16) | (x
<< 8) | x
;
157 if (sizeof(word_type
) == 8)
158 ret
= (ret
<< 16 << 16) | ret
;
162 /* Return non-zero if some byte of VAL is (probably) C. */
164 static inline word_type
165 acc_char_cmp (word_type val
, word_type c
)
167 #if defined(__GNUC__) && defined(__alpha__)
168 /* We can get exact results using a compare-bytes instruction.
169 Get (val == c) via (0 >= (val ^ c)). */
170 return __builtin_alpha_cmpbge (0, val
^ c
);
172 word_type magic
= 0x7efefefeU
;
173 if (sizeof(word_type
) == 8)
174 magic
= (magic
<< 16 << 16) | 0xfefefefeU
;
178 return ((val
+ magic
) ^ ~val
) & ~magic
;
182 /* Given the result of acc_char_cmp is non-zero, return the index of
183 the found character. If this was a false positive, return -1. */
186 acc_char_index (word_type cmp ATTRIBUTE_UNUSED
,
187 word_type val ATTRIBUTE_UNUSED
)
189 #if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN
190 /* The cmpbge instruction sets *bits* of the result corresponding to
191 matches in the bytes with no false positives. */
192 return __builtin_ctzl (cmp
);
196 /* ??? It would be nice to force unrolling here,
197 and have all of these constants folded. */
198 for (i
= 0; i
< sizeof(word_type
); ++i
)
202 c
= (val
>> (sizeof(word_type
) - i
- 1) * 8) & 0xff;
204 c
= (val
>> i
* 8) & 0xff;
206 if (c
== '\n' || c
== '\r' || c
== '\\' || c
== '?')
214 /* A version of the fast scanner using bit fiddling techniques.
216 For 32-bit words, one would normally perform 16 comparisons and
217 16 branches. With this algorithm one performs 24 arithmetic
218 operations and one branch. Whether this is faster with a 32-bit
219 word size is going to be somewhat system dependent.
221 For 64-bit words, we eliminate twice the number of comparisons
222 and branches without increasing the number of arithmetic operations.
223 It's almost certainly going to be a win with 64-bit word size. */
225 static const uchar
* search_line_acc_char (const uchar
*, const uchar
*)
229 search_line_acc_char (const uchar
*s
, const uchar
*end ATTRIBUTE_UNUSED
)
231 const word_type repl_nl
= acc_char_replicate ('\n');
232 const word_type repl_cr
= acc_char_replicate ('\r');
233 const word_type repl_bs
= acc_char_replicate ('\\');
234 const word_type repl_qm
= acc_char_replicate ('?');
236 unsigned int misalign
;
240 /* Align the buffer. Mask out any bytes from before the beginning. */
241 p
= (word_type
*)((uintptr_t)s
& -sizeof(word_type
));
243 misalign
= (uintptr_t)s
& (sizeof(word_type
) - 1);
245 val
= acc_char_mask_misalign (val
, misalign
);
250 t
= acc_char_cmp (val
, repl_nl
);
251 t
|= acc_char_cmp (val
, repl_cr
);
252 t
|= acc_char_cmp (val
, repl_bs
);
253 t
|= acc_char_cmp (val
, repl_qm
);
255 if (__builtin_expect (t
!= 0, 0))
257 int i
= acc_char_index (t
, val
);
259 return (const uchar
*)p
+ i
;
266 /* Disable on Solaris 2/x86 until the following problems can be properly
269 The Solaris 9 assembler cannot assemble SSE4.2 insns.
270 Before Solaris 9 Update 6, SSE insns cannot be executed.
271 The Solaris 10+ assembler tags objects with the instruction set
272 extensions used, so SSE4.2 executables cannot run on machines that
273 don't support that extension. */
275 #if (GCC_VERSION >= 4005) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
277 /* Replicated character data to be shared between implementations.
278 Recall that outside of a context with vector support we can't
279 define compatible vector types, therefore these are all defined
280 in terms of raw characters. */
281 static const char repl_chars
[4][16] __attribute__((aligned(16))) = {
282 { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
283 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' },
284 { '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
285 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' },
286 { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
287 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' },
288 { '?', '?', '?', '?', '?', '?', '?', '?',
289 '?', '?', '?', '?', '?', '?', '?', '?' },
292 /* A version of the fast scanner using MMX vectorized byte compare insns.
294 This uses the PMOVMSKB instruction which was introduced with "MMX2",
295 which was packaged into SSE1; it is also present in the AMD MMX
296 extension. Mark the function as using "sse" so that we emit a real
297 "emms" instruction, rather than the 3dNOW "femms" instruction. */
301 __attribute__((__target__("sse")))
303 search_line_mmx (const uchar
*s
, const uchar
*end ATTRIBUTE_UNUSED
)
305 typedef char v8qi
__attribute__ ((__vector_size__ (8)));
306 typedef int __m64
__attribute__ ((__vector_size__ (8), __may_alias__
));
308 const v8qi repl_nl
= *(const v8qi
*)repl_chars
[0];
309 const v8qi repl_cr
= *(const v8qi
*)repl_chars
[1];
310 const v8qi repl_bs
= *(const v8qi
*)repl_chars
[2];
311 const v8qi repl_qm
= *(const v8qi
*)repl_chars
[3];
313 unsigned int misalign
, found
, mask
;
317 /* Align the source pointer. While MMX doesn't generate unaligned data
318 faults, this allows us to safely scan to the end of the buffer without
319 reading beyond the end of the last page. */
320 misalign
= (uintptr_t)s
& 7;
321 p
= (const v8qi
*)((uintptr_t)s
& -8);
324 /* Create a mask for the bytes that are valid within the first
325 16-byte block. The Idea here is that the AND with the mask
326 within the loop is "free", since we need some AND or TEST
327 insn in order to set the flags for the branch anyway. */
328 mask
= -1u << misalign
;
330 /* Main loop processing 8 bytes at a time. */
338 t
= __builtin_ia32_pcmpeqb(data
, repl_nl
);
339 c
= __builtin_ia32_pcmpeqb(data
, repl_cr
);
340 t
= (v8qi
) __builtin_ia32_por ((__m64
)t
, (__m64
)c
);
341 c
= __builtin_ia32_pcmpeqb(data
, repl_bs
);
342 t
= (v8qi
) __builtin_ia32_por ((__m64
)t
, (__m64
)c
);
343 c
= __builtin_ia32_pcmpeqb(data
, repl_qm
);
344 t
= (v8qi
) __builtin_ia32_por ((__m64
)t
, (__m64
)c
);
345 found
= __builtin_ia32_pmovmskb (t
);
350 __builtin_ia32_emms ();
352 /* FOUND contains 1 in bits for which we matched a relevant
353 character. Conversion to the byte index is trivial. */
354 found
= __builtin_ctz(found
);
355 return (const uchar
*)p
+ found
;
358 /* A version of the fast scanner using SSE2 vectorized byte compare insns. */
362 __attribute__((__target__("sse2")))
364 search_line_sse2 (const uchar
*s
, const uchar
*end ATTRIBUTE_UNUSED
)
366 typedef char v16qi
__attribute__ ((__vector_size__ (16)));
368 const v16qi repl_nl
= *(const v16qi
*)repl_chars
[0];
369 const v16qi repl_cr
= *(const v16qi
*)repl_chars
[1];
370 const v16qi repl_bs
= *(const v16qi
*)repl_chars
[2];
371 const v16qi repl_qm
= *(const v16qi
*)repl_chars
[3];
373 unsigned int misalign
, found
, mask
;
377 /* Align the source pointer. */
378 misalign
= (uintptr_t)s
& 15;
379 p
= (const v16qi
*)((uintptr_t)s
& -16);
382 /* Create a mask for the bytes that are valid within the first
383 16-byte block. The Idea here is that the AND with the mask
384 within the loop is "free", since we need some AND or TEST
385 insn in order to set the flags for the branch anyway. */
386 mask
= -1u << misalign
;
388 /* Main loop processing 16 bytes at a time. */
396 t
= __builtin_ia32_pcmpeqb128(data
, repl_nl
);
397 t
|= __builtin_ia32_pcmpeqb128(data
, repl_cr
);
398 t
|= __builtin_ia32_pcmpeqb128(data
, repl_bs
);
399 t
|= __builtin_ia32_pcmpeqb128(data
, repl_qm
);
400 found
= __builtin_ia32_pmovmskb128 (t
);
405 /* FOUND contains 1 in bits for which we matched a relevant
406 character. Conversion to the byte index is trivial. */
407 found
= __builtin_ctz(found
);
408 return (const uchar
*)p
+ found
;
412 /* A version of the fast scanner using SSE 4.2 vectorized string insns. */
416 __attribute__((__target__("sse4.2")))
418 search_line_sse42 (const uchar
*s
, const uchar
*end
)
420 typedef char v16qi
__attribute__ ((__vector_size__ (16)));
421 static const v16qi search
= { '\n', '\r', '?', '\\' };
423 uintptr_t si
= (uintptr_t)s
;
426 /* Check for unaligned input. */
431 if (__builtin_expect (end
- s
< 16, 0)
432 && __builtin_expect ((si
& 0xfff) > 0xff0, 0))
434 /* There are less than 16 bytes left in the buffer, and less
435 than 16 bytes left on the page. Reading 16 bytes at this
436 point might generate a spurious page fault. Defer to the
437 SSE2 implementation, which already handles alignment. */
438 return search_line_sse2 (s
, end
);
441 /* ??? The builtin doesn't understand that the PCMPESTRI read from
442 memory need not be aligned. */
443 sv
= __builtin_ia32_loaddqu ((const char *) s
);
444 index
= __builtin_ia32_pcmpestri128 (search
, 4, sv
, 16, 0);
446 if (__builtin_expect (index
< 16, 0))
449 /* Advance the pointer to an aligned address. We will re-scan a
450 few bytes, but we no longer need care for reading past the
451 end of a page, since we're guaranteed a match. */
452 s
= (const uchar
*)((si
+ 16) & -16);
455 /* Main loop, processing 16 bytes at a time. By doing the whole loop
456 in inline assembly, we can make proper use of the flags set. */
457 __asm ( "sub $16, %1\n"
460 " %vpcmpestri $0, (%1), %2\n"
462 : "=&c"(index
), "+r"(s
)
463 : "x"(search
), "a"(4), "d"(16));
470 /* Work around out-dated assemblers without sse4 support. */
471 #define search_line_sse42 search_line_sse2
474 /* Check the CPU capabilities. */
476 #include "../gcc/config/i386/cpuid.h"
478 typedef const uchar
* (*search_line_fast_type
) (const uchar
*, const uchar
*);
479 static search_line_fast_type search_line_fast
;
481 #define HAVE_init_vectorized_lexer 1
483 init_vectorized_lexer (void)
485 unsigned dummy
, ecx
= 0, edx
= 0;
486 search_line_fast_type impl
= search_line_acc_char
;
489 #if defined(__SSE4_2__)
491 #elif defined(__SSE2__)
493 #elif defined(__SSE__)
498 impl
= search_line_sse42
;
499 else if (__get_cpuid (1, &dummy
, &dummy
, &ecx
, &edx
) || minimum
== 2)
501 if (minimum
== 3 || (ecx
& bit_SSE4_2
))
502 impl
= search_line_sse42
;
503 else if (minimum
== 2 || (edx
& bit_SSE2
))
504 impl
= search_line_sse2
;
505 else if (minimum
== 1 || (edx
& bit_SSE
))
506 impl
= search_line_mmx
;
508 else if (__get_cpuid (0x80000001, &dummy
, &dummy
, &dummy
, &edx
))
511 || (edx
& (bit_MMXEXT
| bit_CMOV
)) == (bit_MMXEXT
| bit_CMOV
))
512 impl
= search_line_mmx
;
515 search_line_fast
= impl
;
518 #elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__)
520 /* A vection of the fast scanner using AltiVec vectorized byte compares. */
521 /* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
522 so we can't compile this function without -maltivec on the command line
523 (or implied by some other switch). */
526 search_line_fast (const uchar
*s
, const uchar
*end ATTRIBUTE_UNUSED
)
528 typedef __attribute__((altivec(vector
))) unsigned char vc
;
531 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
532 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
535 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
536 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
539 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
540 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
543 '?', '?', '?', '?', '?', '?', '?', '?',
544 '?', '?', '?', '?', '?', '?', '?', '?',
547 -1, -1, -1, -1, -1, -1, -1, -1,
548 -1, -1, -1, -1, -1, -1, -1, -1,
550 const vc zero
= { 0 };
554 /* Altivec loads automatically mask addresses with -16. This lets us
555 issue the first load as early as possible. */
556 data
= __builtin_vec_ld(0, (const vc
*)s
);
558 /* Discard bytes before the beginning of the buffer. Do this by
559 beginning with all ones and shifting in zeros according to the
560 mis-alignment. The LVSR instruction pulls the exact shift we
561 want from the address. */
562 #ifdef __BIG_ENDIAN__
563 mask
= __builtin_vec_lvsr(0, s
);
564 mask
= __builtin_vec_perm(zero
, ones
, mask
);
566 mask
= __builtin_vec_lvsl(0, s
);
567 mask
= __builtin_vec_perm(ones
, zero
, mask
);
571 /* While altivec loads mask addresses, we still need to align S so
572 that the offset we compute at the end is correct. */
573 s
= (const uchar
*)((uintptr_t)s
& -16);
575 /* Main loop processing 16 bytes at a time. */
579 vc m_nl
, m_cr
, m_bs
, m_qm
;
582 data
= __builtin_vec_ld(0, (const vc
*)s
);
585 m_nl
= (vc
) __builtin_vec_cmpeq(data
, repl_nl
);
586 m_cr
= (vc
) __builtin_vec_cmpeq(data
, repl_cr
);
587 m_bs
= (vc
) __builtin_vec_cmpeq(data
, repl_bs
);
588 m_qm
= (vc
) __builtin_vec_cmpeq(data
, repl_qm
);
589 t
= (m_nl
| m_cr
) | (m_bs
| m_qm
);
591 /* T now contains 0xff in bytes for which we matched one of the relevant
592 characters. We want to exit the loop if any byte in T is non-zero.
593 Below is the expansion of vec_any_ne(t, zero). */
595 while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t
, zero
));
598 #define N (sizeof(vc) / sizeof(long))
602 /* Statically assert that N is 2 or 4. */
603 unsigned long l
[(N
== 2 || N
== 4) ? N
: -1];
605 unsigned long l
, i
= 0;
609 /* Find the first word of T that is non-zero. */
616 s
+= sizeof(unsigned long);
620 s
+= sizeof(unsigned long);
625 s
+= sizeof(unsigned long);
629 /* L now contains 0xff in bytes for which we matched one of the
630 relevant characters. We can find the byte index by finding
631 its bit index and dividing by 8. */
632 #ifdef __BIG_ENDIAN__
633 l
= __builtin_clzl(l
) >> 3;
635 l
= __builtin_ctzl(l
) >> 3;
643 #elif defined (__ARM_NEON__)
644 #include "arm_neon.h"
647 search_line_fast (const uchar
*s
, const uchar
*end ATTRIBUTE_UNUSED
)
649 const uint8x16_t repl_nl
= vdupq_n_u8 ('\n');
650 const uint8x16_t repl_cr
= vdupq_n_u8 ('\r');
651 const uint8x16_t repl_bs
= vdupq_n_u8 ('\\');
652 const uint8x16_t repl_qm
= vdupq_n_u8 ('?');
653 const uint8x16_t xmask
= (uint8x16_t
) vdupq_n_u64 (0x8040201008040201ULL
);
655 unsigned int misalign
, found
, mask
;
659 /* Align the source pointer. */
660 misalign
= (uintptr_t)s
& 15;
661 p
= (const uint8_t *)((uintptr_t)s
& -16);
664 /* Create a mask for the bytes that are valid within the first
665 16-byte block. The Idea here is that the AND with the mask
666 within the loop is "free", since we need some AND or TEST
667 insn in order to set the flags for the branch anyway. */
668 mask
= (-1u << misalign
) & 0xffff;
670 /* Main loop, processing 16 bytes at a time. */
678 uint8x16_t t
, u
, v
, w
;
685 t
= vceqq_u8 (data
, repl_nl
);
686 u
= vceqq_u8 (data
, repl_cr
);
687 v
= vorrq_u8 (t
, vceqq_u8 (data
, repl_bs
));
688 w
= vorrq_u8 (u
, vceqq_u8 (data
, repl_qm
));
689 t
= vandq_u8 (vorrq_u8 (v
, w
), xmask
);
690 l
= vpadd_u8 (vget_low_u8 (t
), vget_high_u8 (t
));
694 found
= vget_lane_u32 ((uint32x2_t
) vorr_u64 ((uint64x1_t
) n
,
695 vshr_n_u64 ((uint64x1_t
) n
, 24)), 0);
700 /* FOUND contains 1 in bits for which we matched a relevant
701 character. Conversion to the byte index is trivial. */
702 found
= __builtin_ctz (found
);
703 return (const uchar
*)p
+ found
;
708 /* We only have one accellerated alternative. Use a direct call so that
709 we encourage inlining. */
711 #define search_line_fast search_line_acc_char
715 /* Initialize the lexer if needed. */
718 _cpp_init_lexer (void)
720 #ifdef HAVE_init_vectorized_lexer
721 init_vectorized_lexer ();
725 /* Returns with a logical line that contains no escaped newlines or
726 trigraphs. This is a time-critical inner loop. */
728 _cpp_clean_line (cpp_reader
*pfile
)
734 buffer
= pfile
->buffer
;
735 buffer
->cur_note
= buffer
->notes_used
= 0;
736 buffer
->cur
= buffer
->line_base
= buffer
->next_line
;
737 buffer
->need_line
= false;
738 s
= buffer
->next_line
;
740 if (!buffer
->from_stage3
)
742 const uchar
*pbackslash
= NULL
;
744 /* Fast path. This is the common case of an un-escaped line with
745 no trigraphs. The primary win here is by not writing any
746 data back to memory until we have to. */
749 /* Perform an optimized search for \n, \r, \\, ?. */
750 s
= search_line_fast (s
, buffer
->rlimit
);
755 /* Record the location of the backslash and continue. */
758 else if (__builtin_expect (c
== '?', 0))
760 if (__builtin_expect (s
[1] == '?', false)
761 && _cpp_trigraph_map
[s
[2]])
763 /* Have a trigraph. We may or may not have to convert
764 it. Add a line note regardless, for -Wtrigraphs. */
765 add_line_note (buffer
, s
, s
[2]);
766 if (CPP_OPTION (pfile
, trigraphs
))
768 /* We do, and that means we have to switch to the
771 *d
= _cpp_trigraph_map
[s
[2]];
776 /* Not a trigraph. Continue on fast-path. */
783 /* This must be \r or \n. We're either done, or we'll be forced
784 to write back to the buffer and continue on the slow path. */
787 if (__builtin_expect (s
== buffer
->rlimit
, false))
790 /* DOS line ending? */
791 if (__builtin_expect (c
== '\r', false) && s
[1] == '\n')
794 if (s
== buffer
->rlimit
)
798 if (__builtin_expect (pbackslash
== NULL
, true))
801 /* Check for escaped newline. */
803 while (is_nvspace (p
[-1]))
805 if (p
- 1 != pbackslash
)
808 /* Have an escaped newline; process it and proceed to
810 add_line_note (buffer
, p
- 1, p
!= d
? ' ' : '\\');
812 buffer
->next_line
= p
- 1;
820 if (c
== '\n' || c
== '\r')
822 /* Handle DOS line endings. */
823 if (c
== '\r' && s
!= buffer
->rlimit
&& s
[1] == '\n')
825 if (s
== buffer
->rlimit
)
830 while (p
!= buffer
->next_line
&& is_nvspace (p
[-1]))
832 if (p
== buffer
->next_line
|| p
[-1] != '\\')
835 add_line_note (buffer
, p
- 1, p
!= d
? ' ': '\\');
837 buffer
->next_line
= p
- 1;
839 else if (c
== '?' && s
[1] == '?' && _cpp_trigraph_map
[s
[2]])
841 /* Add a note regardless, for the benefit of -Wtrigraphs. */
842 add_line_note (buffer
, d
, s
[2]);
843 if (CPP_OPTION (pfile
, trigraphs
))
845 *d
= _cpp_trigraph_map
[s
[2]];
853 while (*s
!= '\n' && *s
!= '\r')
857 /* Handle DOS line endings. */
858 if (*s
== '\r' && s
!= buffer
->rlimit
&& s
[1] == '\n')
864 /* A sentinel note that should never be processed. */
865 add_line_note (buffer
, d
+ 1, '\n');
866 buffer
->next_line
= s
+ 1;
869 /* Return true if the trigraph indicated by NOTE should be warned
870 about in a comment. */
872 warn_in_comment (cpp_reader
*pfile
, _cpp_line_note
*note
)
876 /* Within comments we don't warn about trigraphs, unless the
877 trigraph forms an escaped newline, as that may change
879 if (note
->type
!= '/')
882 /* If -trigraphs, then this was an escaped newline iff the next note
884 if (CPP_OPTION (pfile
, trigraphs
))
885 return note
[1].pos
== note
->pos
;
887 /* Otherwise, see if this forms an escaped newline. */
889 while (is_nvspace (*p
))
892 /* There might have been escaped newlines between the trigraph and the
893 newline we found. Hence the position test. */
894 return (*p
== '\n' && p
< note
[1].pos
);
897 /* Process the notes created by add_line_note as far as the current
900 _cpp_process_line_notes (cpp_reader
*pfile
, int in_comment
)
902 cpp_buffer
*buffer
= pfile
->buffer
;
906 _cpp_line_note
*note
= &buffer
->notes
[buffer
->cur_note
];
909 if (note
->pos
> buffer
->cur
)
913 col
= CPP_BUF_COLUMN (buffer
, note
->pos
+ 1);
915 if (note
->type
== '\\' || note
->type
== ' ')
917 if (note
->type
== ' ' && !in_comment
)
918 cpp_error_with_line (pfile
, CPP_DL_WARNING
, pfile
->line_table
->highest_line
, col
,
919 "backslash and newline separated by space");
921 if (buffer
->next_line
> buffer
->rlimit
)
923 cpp_error_with_line (pfile
, CPP_DL_PEDWARN
, pfile
->line_table
->highest_line
, col
,
924 "backslash-newline at end of file");
925 /* Prevent "no newline at end of file" warning. */
926 buffer
->next_line
= buffer
->rlimit
;
929 buffer
->line_base
= note
->pos
;
930 CPP_INCREMENT_LINE (pfile
, 0);
932 else if (_cpp_trigraph_map
[note
->type
])
934 if (CPP_OPTION (pfile
, warn_trigraphs
)
935 && (!in_comment
|| warn_in_comment (pfile
, note
)))
937 if (CPP_OPTION (pfile
, trigraphs
))
938 cpp_warning_with_line (pfile
, CPP_W_TRIGRAPHS
,
939 pfile
->line_table
->highest_line
, col
,
940 "trigraph ??%c converted to %c",
942 (int) _cpp_trigraph_map
[note
->type
]);
945 cpp_warning_with_line
946 (pfile
, CPP_W_TRIGRAPHS
,
947 pfile
->line_table
->highest_line
, col
,
948 "trigraph ??%c ignored, use -trigraphs to enable",
953 else if (note
->type
== 0)
954 /* Already processed in lex_raw_string. */;
960 /* Skip a C-style block comment. We find the end of the comment by
961 seeing if an asterisk is before every '/' we encounter. Returns
962 nonzero if comment terminated by EOF, zero otherwise.
964 Buffer->cur points to the initial asterisk of the comment. */
966 _cpp_skip_block_comment (cpp_reader
*pfile
)
968 cpp_buffer
*buffer
= pfile
->buffer
;
969 const uchar
*cur
= buffer
->cur
;
978 /* People like decorating comments with '*', so check for '/'
979 instead for efficiency. */
987 /* Warn about potential nested comments, but not if the '/'
988 comes immediately before the true comment delimiter.
989 Don't bother to get it right across escaped newlines. */
990 if (CPP_OPTION (pfile
, warn_comments
)
991 && cur
[0] == '*' && cur
[1] != '/')
994 cpp_warning_with_line (pfile
, CPP_W_COMMENTS
,
995 pfile
->line_table
->highest_line
,
996 CPP_BUF_COL (buffer
),
997 "\"/*\" within comment");
1003 buffer
->cur
= cur
- 1;
1004 _cpp_process_line_notes (pfile
, true);
1005 if (buffer
->next_line
>= buffer
->rlimit
)
1007 _cpp_clean_line (pfile
);
1009 cols
= buffer
->next_line
- buffer
->line_base
;
1010 CPP_INCREMENT_LINE (pfile
, cols
);
1017 _cpp_process_line_notes (pfile
, true);
1021 /* Skip a C++ line comment, leaving buffer->cur pointing to the
1022 terminating newline. Handles escaped newlines. Returns nonzero
1023 if a multiline comment. */
1025 skip_line_comment (cpp_reader
*pfile
)
1027 cpp_buffer
*buffer
= pfile
->buffer
;
1028 source_location orig_line
= pfile
->line_table
->highest_line
;
1030 while (*buffer
->cur
!= '\n')
1033 _cpp_process_line_notes (pfile
, true);
1034 return orig_line
!= pfile
->line_table
->highest_line
;
1037 /* Skips whitespace, saving the next non-whitespace character. */
1039 skip_whitespace (cpp_reader
*pfile
, cppchar_t c
)
1041 cpp_buffer
*buffer
= pfile
->buffer
;
1042 bool saw_NUL
= false;
1046 /* Horizontal space always OK. */
1047 if (c
== ' ' || c
== '\t')
1049 /* Just \f \v or \0 left. */
1052 else if (pfile
->state
.in_directive
&& CPP_PEDANTIC (pfile
))
1053 cpp_error_with_line (pfile
, CPP_DL_PEDWARN
, pfile
->line_table
->highest_line
,
1054 CPP_BUF_COL (buffer
),
1055 "%s in preprocessing directive",
1056 c
== '\f' ? "form feed" : "vertical tab");
1060 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
1061 while (is_nvspace (c
));
1064 cpp_error (pfile
, CPP_DL_WARNING
, "null character(s) ignored");
1069 /* See if the characters of a number token are valid in a name (no
1070 '.', '+' or '-'). */
1072 name_p (cpp_reader
*pfile
, const cpp_string
*string
)
1076 for (i
= 0; i
< string
->len
; i
++)
1077 if (!is_idchar (string
->text
[i
]))
1083 /* After parsing an identifier or other sequence, produce a warning about
1084 sequences not in NFC/NFKC. */
1086 warn_about_normalization (cpp_reader
*pfile
,
1087 const cpp_token
*token
,
1088 const struct normalize_state
*s
)
1090 if (CPP_OPTION (pfile
, warn_normalize
) < NORMALIZE_STATE_RESULT (s
)
1091 && !pfile
->state
.skipping
)
1093 /* Make sure that the token is printed using UCNs, even
1094 if we'd otherwise happily print UTF-8. */
1095 unsigned char *buf
= XNEWVEC (unsigned char, cpp_token_len (token
));
1098 sz
= cpp_spell_token (pfile
, token
, buf
, false) - buf
;
1099 if (NORMALIZE_STATE_RESULT (s
) == normalized_C
)
1100 cpp_warning_with_line (pfile
, CPP_W_NORMALIZE
, token
->src_loc
, 0,
1101 "`%.*s' is not in NFKC", (int) sz
, buf
);
1103 cpp_warning_with_line (pfile
, CPP_W_NORMALIZE
, token
->src_loc
, 0,
1104 "`%.*s' is not in NFC", (int) sz
, buf
);
1109 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
1110 an identifier. FIRST is TRUE if this starts an identifier. */
1112 forms_identifier_p (cpp_reader
*pfile
, int first
,
1113 struct normalize_state
*state
)
1115 cpp_buffer
*buffer
= pfile
->buffer
;
1117 if (*buffer
->cur
== '$')
1119 if (!CPP_OPTION (pfile
, dollars_in_ident
))
1123 if (CPP_OPTION (pfile
, warn_dollars
) && !pfile
->state
.skipping
)
1125 CPP_OPTION (pfile
, warn_dollars
) = 0;
1126 cpp_error (pfile
, CPP_DL_PEDWARN
, "'$' in identifier or number");
1132 /* Is this a syntactically valid UCN? */
1133 if (CPP_OPTION (pfile
, extended_identifiers
)
1134 && *buffer
->cur
== '\\'
1135 && (buffer
->cur
[1] == 'u' || buffer
->cur
[1] == 'U'))
1138 if (_cpp_valid_ucn (pfile
, &buffer
->cur
, buffer
->rlimit
, 1 + !first
,
1147 /* Helper function to get the cpp_hashnode of the identifier BASE. */
1148 static cpp_hashnode
*
1149 lex_identifier_intern (cpp_reader
*pfile
, const uchar
*base
)
1151 cpp_hashnode
*result
;
1154 unsigned int hash
= HT_HASHSTEP (0, *base
);
1157 while (ISIDNUM (*cur
))
1159 hash
= HT_HASHSTEP (hash
, *cur
);
1163 hash
= HT_HASHFINISH (hash
, len
);
1164 result
= CPP_HASHNODE (ht_lookup_with_hash (pfile
->hash_table
,
1165 base
, len
, hash
, HT_ALLOC
));
1167 /* Rarely, identifiers require diagnostics when lexed. */
1168 if (__builtin_expect ((result
->flags
& NODE_DIAGNOSTIC
)
1169 && !pfile
->state
.skipping
, 0))
1171 /* It is allowed to poison the same identifier twice. */
1172 if ((result
->flags
& NODE_POISONED
) && !pfile
->state
.poisoned_ok
)
1173 cpp_error (pfile
, CPP_DL_ERROR
, "attempt to use poisoned \"%s\"",
1174 NODE_NAME (result
));
1176 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1177 replacement list of a variadic macro. */
1178 if (result
== pfile
->spec_nodes
.n__VA_ARGS__
1179 && !pfile
->state
.va_args_ok
)
1180 cpp_error (pfile
, CPP_DL_PEDWARN
,
1181 "__VA_ARGS__ can only appear in the expansion"
1182 " of a C99 variadic macro");
1184 /* For -Wc++-compat, warn about use of C++ named operators. */
1185 if (result
->flags
& NODE_WARN_OPERATOR
)
1186 cpp_warning (pfile
, CPP_W_CXX_OPERATOR_NAMES
,
1187 "identifier \"%s\" is a special operator name in C++",
1188 NODE_NAME (result
));
1194 /* Get the cpp_hashnode of an identifier specified by NAME in
1195 the current cpp_reader object. If none is found, NULL is returned. */
1197 _cpp_lex_identifier (cpp_reader
*pfile
, const char *name
)
1199 cpp_hashnode
*result
;
1200 result
= lex_identifier_intern (pfile
, (uchar
*) name
);
1204 /* Lex an identifier starting at BUFFER->CUR - 1. */
1205 static cpp_hashnode
*
1206 lex_identifier (cpp_reader
*pfile
, const uchar
*base
, bool starts_ucn
,
1207 struct normalize_state
*nst
)
1209 cpp_hashnode
*result
;
1212 unsigned int hash
= HT_HASHSTEP (0, *base
);
1214 cur
= pfile
->buffer
->cur
;
1217 while (ISIDNUM (*cur
))
1219 hash
= HT_HASHSTEP (hash
, *cur
);
1222 NORMALIZE_STATE_UPDATE_IDNUM (nst
, *(cur
- 1));
1224 pfile
->buffer
->cur
= cur
;
1225 if (starts_ucn
|| forms_identifier_p (pfile
, false, nst
))
1227 /* Slower version for identifiers containing UCNs (or $). */
1229 while (ISIDNUM (*pfile
->buffer
->cur
))
1231 NORMALIZE_STATE_UPDATE_IDNUM (nst
, *pfile
->buffer
->cur
);
1232 pfile
->buffer
->cur
++;
1234 } while (forms_identifier_p (pfile
, false, nst
));
1235 result
= _cpp_interpret_identifier (pfile
, base
,
1236 pfile
->buffer
->cur
- base
);
1241 hash
= HT_HASHFINISH (hash
, len
);
1243 result
= CPP_HASHNODE (ht_lookup_with_hash (pfile
->hash_table
,
1244 base
, len
, hash
, HT_ALLOC
));
1247 /* Rarely, identifiers require diagnostics when lexed. */
1248 if (__builtin_expect ((result
->flags
& NODE_DIAGNOSTIC
)
1249 && !pfile
->state
.skipping
, 0))
1251 /* It is allowed to poison the same identifier twice. */
1252 if ((result
->flags
& NODE_POISONED
) && !pfile
->state
.poisoned_ok
)
1253 cpp_error (pfile
, CPP_DL_ERROR
, "attempt to use poisoned \"%s\"",
1254 NODE_NAME (result
));
1256 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1257 replacement list of a variadic macro. */
1258 if (result
== pfile
->spec_nodes
.n__VA_ARGS__
1259 && !pfile
->state
.va_args_ok
)
1260 cpp_error (pfile
, CPP_DL_PEDWARN
,
1261 "__VA_ARGS__ can only appear in the expansion"
1262 " of a C99 variadic macro");
1264 /* For -Wc++-compat, warn about use of C++ named operators. */
1265 if (result
->flags
& NODE_WARN_OPERATOR
)
1266 cpp_warning (pfile
, CPP_W_CXX_OPERATOR_NAMES
,
1267 "identifier \"%s\" is a special operator name in C++",
1268 NODE_NAME (result
));
1274 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
1276 lex_number (cpp_reader
*pfile
, cpp_string
*number
,
1277 struct normalize_state
*nst
)
1283 base
= pfile
->buffer
->cur
- 1;
1286 cur
= pfile
->buffer
->cur
;
1288 /* N.B. ISIDNUM does not include $. */
1289 while (ISIDNUM (*cur
) || *cur
== '.' || DIGIT_SEP (*cur
)
1290 || VALID_SIGN (*cur
, cur
[-1]))
1292 NORMALIZE_STATE_UPDATE_IDNUM (nst
, *cur
);
1296 pfile
->buffer
->cur
= cur
;
1298 while (forms_identifier_p (pfile
, false, nst
));
1300 number
->len
= cur
- base
;
1301 dest
= _cpp_unaligned_alloc (pfile
, number
->len
+ 1);
1302 memcpy (dest
, base
, number
->len
);
1303 dest
[number
->len
] = '\0';
1304 number
->text
= dest
;
1307 /* Create a token of type TYPE with a literal spelling. */
1309 create_literal (cpp_reader
*pfile
, cpp_token
*token
, const uchar
*base
,
1310 unsigned int len
, enum cpp_ttype type
)
1312 uchar
*dest
= _cpp_unaligned_alloc (pfile
, len
+ 1);
1314 memcpy (dest
, base
, len
);
1317 token
->val
.str
.len
= len
;
1318 token
->val
.str
.text
= dest
;
1321 /* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
1322 sequence from *FIRST_BUFF_P to LAST_BUFF_P. */
1325 bufring_append (cpp_reader
*pfile
, const uchar
*base
, size_t len
,
1326 _cpp_buff
**first_buff_p
, _cpp_buff
**last_buff_p
)
1328 _cpp_buff
*first_buff
= *first_buff_p
;
1329 _cpp_buff
*last_buff
= *last_buff_p
;
1331 if (first_buff
== NULL
)
1332 first_buff
= last_buff
= _cpp_get_buff (pfile
, len
);
1333 else if (len
> BUFF_ROOM (last_buff
))
1335 size_t room
= BUFF_ROOM (last_buff
);
1336 memcpy (BUFF_FRONT (last_buff
), base
, room
);
1337 BUFF_FRONT (last_buff
) += room
;
1340 last_buff
= _cpp_append_extend_buff (pfile
, last_buff
, len
);
1343 memcpy (BUFF_FRONT (last_buff
), base
, len
);
1344 BUFF_FRONT (last_buff
) += len
;
1346 *first_buff_p
= first_buff
;
1347 *last_buff_p
= last_buff
;
1351 /* Returns true if a macro has been defined.
1352 This might not work if compile with -save-temps,
1353 or preprocess separately from compilation. */
1356 is_macro(cpp_reader
*pfile
, const uchar
*base
)
1358 const uchar
*cur
= base
;
1359 if (! ISIDST (*cur
))
1361 unsigned int hash
= HT_HASHSTEP (0, *cur
);
1363 while (ISIDNUM (*cur
))
1365 hash
= HT_HASHSTEP (hash
, *cur
);
1368 hash
= HT_HASHFINISH (hash
, cur
- base
);
1370 cpp_hashnode
*result
= CPP_HASHNODE (ht_lookup_with_hash (pfile
->hash_table
,
1371 base
, cur
- base
, hash
, HT_NO_INSERT
));
1373 return !result
? false : (result
->type
== NT_MACRO
);
1377 /* Lexes a raw string. The stored string contains the spelling, including
1378 double quotes, delimiter string, '(' and ')', any leading
1379 'L', 'u', 'U' or 'u8' and 'R' modifier. It returns the type of the
1380 literal, or CPP_OTHER if it was not properly terminated.
1382 The spelling is NUL-terminated, but it is not guaranteed that this
1383 is the first NUL since embedded NULs are preserved. */
1386 lex_raw_string (cpp_reader
*pfile
, cpp_token
*token
, const uchar
*base
,
1389 uchar raw_prefix
[17];
1390 uchar temp_buffer
[18];
1391 const uchar
*orig_base
;
1392 unsigned int raw_prefix_len
= 0, raw_suffix_len
= 0;
1393 enum raw_str_phase
{ RAW_STR_PREFIX
, RAW_STR
, RAW_STR_SUFFIX
};
1394 raw_str_phase phase
= RAW_STR_PREFIX
;
1395 enum cpp_ttype type
;
1396 size_t total_len
= 0;
1397 /* Index into temp_buffer during phases other than RAW_STR,
1398 during RAW_STR phase 17 to tell BUF_APPEND that nothing should
1399 be appended to temp_buffer. */
1400 size_t temp_buffer_len
= 0;
1401 _cpp_buff
*first_buff
= NULL
, *last_buff
= NULL
;
1402 size_t raw_prefix_start
;
1403 _cpp_line_note
*note
= &pfile
->buffer
->notes
[pfile
->buffer
->cur_note
];
1405 type
= (*base
== 'L' ? CPP_WSTRING
:
1406 *base
== 'U' ? CPP_STRING32
:
1407 *base
== 'u' ? (base
[1] == '8' ? CPP_UTF8STRING
: CPP_STRING16
)
1410 #define BUF_APPEND(STR,LEN) \
1412 bufring_append (pfile, (const uchar *)(STR), (LEN), \
1413 &first_buff, &last_buff); \
1414 total_len += (LEN); \
1415 if (__builtin_expect (temp_buffer_len < 17, 0) \
1416 && (const uchar *)(STR) != base \
1419 memcpy (temp_buffer + temp_buffer_len, \
1420 (const uchar *)(STR), (LEN)); \
1421 temp_buffer_len += (LEN); \
1427 raw_prefix_start
= cur
- base
;
1432 /* If we previously performed any trigraph or line splicing
1433 transformations, undo them in between the opening and closing
1435 while (note
->pos
< cur
)
1437 for (; note
->pos
== cur
; ++note
)
1443 /* Restore backslash followed by newline. */
1444 BUF_APPEND (base
, cur
- base
);
1446 BUF_APPEND ("\\", 1);
1448 if (note
->type
== ' ')
1450 /* GNU backslash whitespace newline extension. FIXME
1451 could be any sequence of non-vertical space. When we
1452 can properly restore any such sequence, we should mark
1453 this note as handled so _cpp_process_line_notes
1455 BUF_APPEND (" ", 1);
1458 BUF_APPEND ("\n", 1);
1462 /* Already handled. */
1466 if (_cpp_trigraph_map
[note
->type
])
1468 /* Don't warn about this trigraph in
1469 _cpp_process_line_notes, since trigraphs show up as
1470 trigraphs in raw strings. */
1471 uchar type
= note
->type
;
1474 if (!CPP_OPTION (pfile
, trigraphs
))
1475 /* If we didn't convert the trigraph in the first
1476 place, don't do anything now either. */
1479 BUF_APPEND (base
, cur
- base
);
1481 BUF_APPEND ("??", 2);
1483 /* ??/ followed by newline gets two line notes, one for
1484 the trigraph and one for the backslash/newline. */
1485 if (type
== '/' && note
[1].pos
== cur
)
1487 if (note
[1].type
!= '\\'
1488 && note
[1].type
!= ' ')
1490 BUF_APPEND ("/", 1);
1492 goto after_backslash
;
1496 /* Skip the replacement character. */
1498 BUF_APPEND (&type
, 1);
1509 if (__builtin_expect (temp_buffer_len
< 17, 0))
1510 temp_buffer
[temp_buffer_len
++] = c
;
1513 if (phase
== RAW_STR_PREFIX
)
1515 while (raw_prefix_len
< temp_buffer_len
)
1517 raw_prefix
[raw_prefix_len
] = temp_buffer
[raw_prefix_len
];
1518 switch (raw_prefix
[raw_prefix_len
])
1520 case ' ': case '(': case ')': case '\\': case '\t':
1521 case '\v': case '\f': case '\n': default:
1523 /* Basic source charset except the above chars. */
1524 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1525 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1526 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1527 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1529 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1530 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1531 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1532 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1534 case '0': case '1': case '2': case '3': case '4': case '5':
1535 case '6': case '7': case '8': case '9':
1536 case '_': case '{': case '}': case '#': case '[': case ']':
1537 case '<': case '>': case '%': case ':': case ';': case '.':
1538 case '?': case '*': case '+': case '-': case '/': case '^':
1539 case '&': case '|': case '~': case '!': case '=': case ',':
1540 case '"': case '\'':
1541 if (raw_prefix_len
< 16)
1549 if (raw_prefix
[raw_prefix_len
] != '(')
1551 int col
= CPP_BUF_COLUMN (pfile
->buffer
, cur
) + 1;
1552 if (raw_prefix_len
== 16)
1553 cpp_error_with_line (pfile
, CPP_DL_ERROR
, token
->src_loc
,
1554 col
, "raw string delimiter longer "
1555 "than 16 characters");
1556 else if (raw_prefix
[raw_prefix_len
] == '\n')
1557 cpp_error_with_line (pfile
, CPP_DL_ERROR
, token
->src_loc
,
1558 col
, "invalid new-line in raw "
1559 "string delimiter");
1561 cpp_error_with_line (pfile
, CPP_DL_ERROR
, token
->src_loc
,
1562 col
, "invalid character '%c' in "
1563 "raw string delimiter",
1564 (int) raw_prefix
[raw_prefix_len
]);
1565 pfile
->buffer
->cur
= orig_base
+ raw_prefix_start
- 1;
1566 create_literal (pfile
, token
, orig_base
,
1567 raw_prefix_start
- 1, CPP_OTHER
);
1569 _cpp_release_buff (pfile
, first_buff
);
1572 raw_prefix
[raw_prefix_len
] = '"';
1574 /* Nothing should be appended to temp_buffer during
1576 temp_buffer_len
= 17;
1581 else if (phase
== RAW_STR_SUFFIX
)
1583 while (raw_suffix_len
<= raw_prefix_len
1584 && raw_suffix_len
< temp_buffer_len
1585 && temp_buffer
[raw_suffix_len
] == raw_prefix
[raw_suffix_len
])
1587 if (raw_suffix_len
> raw_prefix_len
)
1589 if (raw_suffix_len
== temp_buffer_len
)
1592 /* Nothing should be appended to temp_buffer during
1594 temp_buffer_len
= 17;
1598 phase
= RAW_STR_SUFFIX
;
1600 temp_buffer_len
= 0;
1604 if (pfile
->state
.in_directive
1605 || (pfile
->state
.parsing_args
1606 && pfile
->buffer
->next_line
>= pfile
->buffer
->rlimit
))
1610 cpp_error_with_line (pfile
, CPP_DL_ERROR
, token
->src_loc
, 0,
1611 "unterminated raw string");
1615 BUF_APPEND (base
, cur
- base
);
1617 if (pfile
->buffer
->cur
< pfile
->buffer
->rlimit
)
1618 CPP_INCREMENT_LINE (pfile
, 0);
1619 pfile
->buffer
->need_line
= true;
1621 pfile
->buffer
->cur
= cur
-1;
1622 _cpp_process_line_notes (pfile
, false);
1623 if (!_cpp_get_fresh_line (pfile
))
1625 source_location src_loc
= token
->src_loc
;
1626 token
->type
= CPP_EOF
;
1627 /* Tell the compiler the line number of the EOF token. */
1628 token
->src_loc
= pfile
->line_table
->highest_line
;
1630 if (first_buff
!= NULL
)
1631 _cpp_release_buff (pfile
, first_buff
);
1632 cpp_error_with_line (pfile
, CPP_DL_ERROR
, src_loc
, 0,
1633 "unterminated raw string");
1637 cur
= base
= pfile
->buffer
->cur
;
1638 note
= &pfile
->buffer
->notes
[pfile
->buffer
->cur_note
];
1642 if (CPP_OPTION (pfile
, user_literals
))
1644 /* If a string format macro, say from inttypes.h, is placed touching
1645 a string literal it could be parsed as a C++11 user-defined string
1646 literal thus breaking the program.
1647 Try to identify macros with is_macro. A warning is issued. */
1648 if (is_macro (pfile
, cur
))
1650 /* Raise a warning, but do not consume subsequent tokens. */
1651 if (CPP_OPTION (pfile
, warn_literal_suffix
) && !pfile
->state
.skipping
)
1652 cpp_warning_with_line (pfile
, CPP_W_LITERAL_SUFFIX
,
1654 "invalid suffix on literal; C++11 requires "
1655 "a space between literal and string macro");
1657 /* Grab user defined literal suffix. */
1658 else if (ISIDST (*cur
))
1660 type
= cpp_userdef_string_add_type (type
);
1663 while (ISIDNUM (*cur
))
1668 pfile
->buffer
->cur
= cur
;
1669 if (first_buff
== NULL
)
1670 create_literal (pfile
, token
, base
, cur
- base
, type
);
1673 uchar
*dest
= _cpp_unaligned_alloc (pfile
, total_len
+ (cur
- base
) + 1);
1676 token
->val
.str
.len
= total_len
+ (cur
- base
);
1677 token
->val
.str
.text
= dest
;
1678 last_buff
= first_buff
;
1679 while (last_buff
!= NULL
)
1681 memcpy (dest
, last_buff
->base
,
1682 BUFF_FRONT (last_buff
) - last_buff
->base
);
1683 dest
+= BUFF_FRONT (last_buff
) - last_buff
->base
;
1684 last_buff
= last_buff
->next
;
1686 _cpp_release_buff (pfile
, first_buff
);
1687 memcpy (dest
, base
, cur
- base
);
1688 dest
[cur
- base
] = '\0';
1692 /* Lexes a string, character constant, or angle-bracketed header file
1693 name. The stored string contains the spelling, including opening
1694 quote and any leading 'L', 'u', 'U' or 'u8' and optional
1695 'R' modifier. It returns the type of the literal, or CPP_OTHER
1696 if it was not properly terminated, or CPP_LESS for an unterminated
1697 header name which must be relexed as normal tokens.
1699 The spelling is NUL-terminated, but it is not guaranteed that this
1700 is the first NUL since embedded NULs are preserved. */
1702 lex_string (cpp_reader
*pfile
, cpp_token
*token
, const uchar
*base
)
1704 bool saw_NUL
= false;
1706 cppchar_t terminator
;
1707 enum cpp_ttype type
;
1710 terminator
= *cur
++;
1711 if (terminator
== 'L' || terminator
== 'U')
1712 terminator
= *cur
++;
1713 else if (terminator
== 'u')
1715 terminator
= *cur
++;
1716 if (terminator
== '8')
1717 terminator
= *cur
++;
1719 if (terminator
== 'R')
1721 lex_raw_string (pfile
, token
, base
, cur
);
1724 if (terminator
== '"')
1725 type
= (*base
== 'L' ? CPP_WSTRING
:
1726 *base
== 'U' ? CPP_STRING32
:
1727 *base
== 'u' ? (base
[1] == '8' ? CPP_UTF8STRING
: CPP_STRING16
)
1729 else if (terminator
== '\'')
1730 type
= (*base
== 'L' ? CPP_WCHAR
:
1731 *base
== 'U' ? CPP_CHAR32
:
1732 *base
== 'u' ? CPP_CHAR16
: CPP_CHAR
);
1734 terminator
= '>', type
= CPP_HEADER_NAME
;
1738 cppchar_t c
= *cur
++;
1740 /* In #include-style directives, terminators are not escapable. */
1741 if (c
== '\\' && !pfile
->state
.angled_headers
&& *cur
!= '\n')
1743 else if (c
== terminator
)
1748 /* Unmatched quotes always yield undefined behavior, but
1749 greedy lexing means that what appears to be an unterminated
1750 header name may actually be a legitimate sequence of tokens. */
1751 if (terminator
== '>')
1753 token
->type
= CPP_LESS
;
1763 if (saw_NUL
&& !pfile
->state
.skipping
)
1764 cpp_error (pfile
, CPP_DL_WARNING
,
1765 "null character(s) preserved in literal");
1767 if (type
== CPP_OTHER
&& CPP_OPTION (pfile
, lang
) != CLK_ASM
)
1768 cpp_error (pfile
, CPP_DL_PEDWARN
, "missing terminating %c character",
1771 if (CPP_OPTION (pfile
, user_literals
))
1773 /* If a string format macro, say from inttypes.h, is placed touching
1774 a string literal it could be parsed as a C++11 user-defined string
1775 literal thus breaking the program.
1776 Try to identify macros with is_macro. A warning is issued. */
1777 if (is_macro (pfile
, cur
))
1779 /* Raise a warning, but do not consume subsequent tokens. */
1780 if (CPP_OPTION (pfile
, warn_literal_suffix
) && !pfile
->state
.skipping
)
1781 cpp_warning_with_line (pfile
, CPP_W_LITERAL_SUFFIX
,
1783 "invalid suffix on literal; C++11 requires "
1784 "a space between literal and string macro");
1786 /* Grab user defined literal suffix. */
1787 else if (ISIDST (*cur
))
1789 type
= cpp_userdef_char_add_type (type
);
1790 type
= cpp_userdef_string_add_type (type
);
1793 while (ISIDNUM (*cur
))
1798 pfile
->buffer
->cur
= cur
;
1799 create_literal (pfile
, token
, base
, cur
- base
, type
);
1802 /* Return the comment table. The client may not make any assumption
1803 about the ordering of the table. */
1805 cpp_get_comments (cpp_reader
*pfile
)
1807 return &pfile
->comments
;
1810 /* Append a comment to the end of the comment table. */
1812 store_comment (cpp_reader
*pfile
, cpp_token
*token
)
1816 if (pfile
->comments
.allocated
== 0)
1818 pfile
->comments
.allocated
= 256;
1819 pfile
->comments
.entries
= (cpp_comment
*) xmalloc
1820 (pfile
->comments
.allocated
* sizeof (cpp_comment
));
1823 if (pfile
->comments
.count
== pfile
->comments
.allocated
)
1825 pfile
->comments
.allocated
*= 2;
1826 pfile
->comments
.entries
= (cpp_comment
*) xrealloc
1827 (pfile
->comments
.entries
,
1828 pfile
->comments
.allocated
* sizeof (cpp_comment
));
1831 len
= token
->val
.str
.len
;
1833 /* Copy comment. Note, token may not be NULL terminated. */
1834 pfile
->comments
.entries
[pfile
->comments
.count
].comment
=
1835 (char *) xmalloc (sizeof (char) * (len
+ 1));
1836 memcpy (pfile
->comments
.entries
[pfile
->comments
.count
].comment
,
1837 token
->val
.str
.text
, len
);
1838 pfile
->comments
.entries
[pfile
->comments
.count
].comment
[len
] = '\0';
1840 /* Set source location. */
1841 pfile
->comments
.entries
[pfile
->comments
.count
].sloc
= token
->src_loc
;
1843 /* Increment the count of entries in the comment table. */
1844 pfile
->comments
.count
++;
1847 /* The stored comment includes the comment start and any terminator. */
1849 save_comment (cpp_reader
*pfile
, cpp_token
*token
, const unsigned char *from
,
1852 unsigned char *buffer
;
1853 unsigned int len
, clen
, i
;
1855 len
= pfile
->buffer
->cur
- from
+ 1; /* + 1 for the initial '/'. */
1857 /* C++ comments probably (not definitely) have moved past a new
1858 line, which we don't want to save in the comment. */
1859 if (is_vspace (pfile
->buffer
->cur
[-1]))
1862 /* If we are currently in a directive or in argument parsing, then
1863 we need to store all C++ comments as C comments internally, and
1864 so we need to allocate a little extra space in that case.
1866 Note that the only time we encounter a directive here is
1867 when we are saving comments in a "#define". */
1868 clen
= ((pfile
->state
.in_directive
|| pfile
->state
.parsing_args
)
1869 && type
== '/') ? len
+ 2 : len
;
1871 buffer
= _cpp_unaligned_alloc (pfile
, clen
);
1873 token
->type
= CPP_COMMENT
;
1874 token
->val
.str
.len
= clen
;
1875 token
->val
.str
.text
= buffer
;
1878 memcpy (buffer
+ 1, from
, len
- 1);
1880 /* Finish conversion to a C comment, if necessary. */
1881 if ((pfile
->state
.in_directive
|| pfile
->state
.parsing_args
) && type
== '/')
1884 buffer
[clen
- 2] = '*';
1885 buffer
[clen
- 1] = '/';
1886 /* As there can be in a C++ comments illegal sequences for C comments
1887 we need to filter them out. */
1888 for (i
= 2; i
< (clen
- 2); i
++)
1889 if (buffer
[i
] == '/' && (buffer
[i
- 1] == '*' || buffer
[i
+ 1] == '*'))
1893 /* Finally store this comment for use by clients of libcpp. */
1894 store_comment (pfile
, token
);
1897 /* Allocate COUNT tokens for RUN. */
1899 _cpp_init_tokenrun (tokenrun
*run
, unsigned int count
)
1901 run
->base
= XNEWVEC (cpp_token
, count
);
1902 run
->limit
= run
->base
+ count
;
1906 /* Returns the next tokenrun, or creates one if there is none. */
1908 next_tokenrun (tokenrun
*run
)
1910 if (run
->next
== NULL
)
1912 run
->next
= XNEW (tokenrun
);
1913 run
->next
->prev
= run
;
1914 _cpp_init_tokenrun (run
->next
, 250);
1920 /* Return the number of not yet processed token in a given
1923 _cpp_remaining_tokens_num_in_context (cpp_context
*context
)
1925 if (context
->tokens_kind
== TOKENS_KIND_DIRECT
)
1926 return (LAST (context
).token
- FIRST (context
).token
);
1927 else if (context
->tokens_kind
== TOKENS_KIND_INDIRECT
1928 || context
->tokens_kind
== TOKENS_KIND_EXTENDED
)
1929 return (LAST (context
).ptoken
- FIRST (context
).ptoken
);
1934 /* Returns the token present at index INDEX in a given context. If
1935 INDEX is zero, the next token to be processed is returned. */
1936 static const cpp_token
*
1937 _cpp_token_from_context_at (cpp_context
*context
, int index
)
1939 if (context
->tokens_kind
== TOKENS_KIND_DIRECT
)
1940 return &(FIRST (context
).token
[index
]);
1941 else if (context
->tokens_kind
== TOKENS_KIND_INDIRECT
1942 || context
->tokens_kind
== TOKENS_KIND_EXTENDED
)
1943 return FIRST (context
).ptoken
[index
];
1948 /* Look ahead in the input stream. */
1950 cpp_peek_token (cpp_reader
*pfile
, int index
)
1952 cpp_context
*context
= pfile
->context
;
1953 const cpp_token
*peektok
;
1956 /* First, scan through any pending cpp_context objects. */
1957 while (context
->prev
)
1959 ptrdiff_t sz
= _cpp_remaining_tokens_num_in_context (context
);
1961 if (index
< (int) sz
)
1962 return _cpp_token_from_context_at (context
, index
);
1964 context
= context
->prev
;
1967 /* We will have to read some new tokens after all (and do so
1968 without invalidating preceding tokens). */
1970 pfile
->keep_tokens
++;
1974 peektok
= _cpp_lex_token (pfile
);
1975 if (peektok
->type
== CPP_EOF
)
1980 _cpp_backup_tokens_direct (pfile
, count
+ 1);
1981 pfile
->keep_tokens
--;
1986 /* Allocate a single token that is invalidated at the same time as the
1987 rest of the tokens on the line. Has its line and col set to the
1988 same as the last lexed token, so that diagnostics appear in the
1991 _cpp_temp_token (cpp_reader
*pfile
)
1993 cpp_token
*old
, *result
;
1994 ptrdiff_t sz
= pfile
->cur_run
->limit
- pfile
->cur_token
;
1995 ptrdiff_t la
= (ptrdiff_t) pfile
->lookaheads
;
1997 old
= pfile
->cur_token
- 1;
1998 /* Any pre-existing lookaheads must not be clobbered. */
2003 tokenrun
*next
= next_tokenrun (pfile
->cur_run
);
2006 memmove (next
->base
+ 1, next
->base
,
2007 (la
- sz
) * sizeof (cpp_token
));
2009 next
->base
[0] = pfile
->cur_run
->limit
[-1];
2013 memmove (pfile
->cur_token
+ 1, pfile
->cur_token
,
2014 MIN (la
, sz
- 1) * sizeof (cpp_token
));
2017 if (!sz
&& pfile
->cur_token
== pfile
->cur_run
->limit
)
2019 pfile
->cur_run
= next_tokenrun (pfile
->cur_run
);
2020 pfile
->cur_token
= pfile
->cur_run
->base
;
2023 result
= pfile
->cur_token
++;
2024 result
->src_loc
= old
->src_loc
;
2028 /* Lex a token into RESULT (external interface). Takes care of issues
2029 like directive handling, token lookahead, multiple include
2030 optimization and skipping. */
2032 _cpp_lex_token (cpp_reader
*pfile
)
2038 if (pfile
->cur_token
== pfile
->cur_run
->limit
)
2040 pfile
->cur_run
= next_tokenrun (pfile
->cur_run
);
2041 pfile
->cur_token
= pfile
->cur_run
->base
;
2043 /* We assume that the current token is somewhere in the current
2045 if (pfile
->cur_token
< pfile
->cur_run
->base
2046 || pfile
->cur_token
>= pfile
->cur_run
->limit
)
2049 if (pfile
->lookaheads
)
2051 pfile
->lookaheads
--;
2052 result
= pfile
->cur_token
++;
2055 result
= _cpp_lex_direct (pfile
);
2057 if (result
->flags
& BOL
)
2059 /* Is this a directive. If _cpp_handle_directive returns
2060 false, it is an assembler #. */
2061 if (result
->type
== CPP_HASH
2062 /* 6.10.3 p 11: Directives in a list of macro arguments
2063 gives undefined behavior. This implementation
2064 handles the directive as normal. */
2065 && pfile
->state
.parsing_args
!= 1)
2067 if (_cpp_handle_directive (pfile
, result
->flags
& PREV_WHITE
))
2069 if (pfile
->directive_result
.type
== CPP_PADDING
)
2071 result
= &pfile
->directive_result
;
2074 else if (pfile
->state
.in_deferred_pragma
)
2075 result
= &pfile
->directive_result
;
2077 if (pfile
->cb
.line_change
&& !pfile
->state
.skipping
)
2078 pfile
->cb
.line_change (pfile
, result
, pfile
->state
.parsing_args
);
2081 /* We don't skip tokens in directives. */
2082 if (pfile
->state
.in_directive
|| pfile
->state
.in_deferred_pragma
)
2085 /* Outside a directive, invalidate controlling macros. At file
2086 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
2087 get here and MI optimization works. */
2088 pfile
->mi_valid
= false;
2090 if (!pfile
->state
.skipping
|| result
->type
== CPP_EOF
)
2097 /* Returns true if a fresh line has been loaded. */
2099 _cpp_get_fresh_line (cpp_reader
*pfile
)
2103 /* We can't get a new line until we leave the current directive. */
2104 if (pfile
->state
.in_directive
)
2109 cpp_buffer
*buffer
= pfile
->buffer
;
2111 if (!buffer
->need_line
)
2114 if (buffer
->next_line
< buffer
->rlimit
)
2116 _cpp_clean_line (pfile
);
2120 /* First, get out of parsing arguments state. */
2121 if (pfile
->state
.parsing_args
)
2124 /* End of buffer. Non-empty files should end in a newline. */
2125 if (buffer
->buf
!= buffer
->rlimit
2126 && buffer
->next_line
> buffer
->rlimit
2127 && !buffer
->from_stage3
)
2129 /* Clip to buffer size. */
2130 buffer
->next_line
= buffer
->rlimit
;
2133 return_at_eof
= buffer
->return_at_eof
;
2134 _cpp_pop_buffer (pfile
);
2135 if (pfile
->buffer
== NULL
|| return_at_eof
)
2140 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
2143 result->type = ELSE_TYPE; \
2144 if (*buffer->cur == CHAR) \
2145 buffer->cur++, result->type = THEN_TYPE; \
2149 /* Lex a token into pfile->cur_token, which is also incremented, to
2150 get diagnostics pointing to the correct location.
2152 Does not handle issues such as token lookahead, multiple-include
2153 optimization, directives, skipping etc. This function is only
2154 suitable for use by _cpp_lex_token, and in special cases like
2155 lex_expansion_token which doesn't care for any of these issues.
2157 When meeting a newline, returns CPP_EOF if parsing a directive,
2158 otherwise returns to the start of the token buffer if permissible.
2159 Returns the location of the lexed token. */
2161 _cpp_lex_direct (cpp_reader
*pfile
)
2165 const unsigned char *comment_start
;
2166 cpp_token
*result
= pfile
->cur_token
++;
2170 buffer
= pfile
->buffer
;
2171 if (buffer
->need_line
)
2173 if (pfile
->state
.in_deferred_pragma
)
2175 result
->type
= CPP_PRAGMA_EOL
;
2176 pfile
->state
.in_deferred_pragma
= false;
2177 if (!pfile
->state
.pragma_allow_expansion
)
2178 pfile
->state
.prevent_expansion
--;
2181 if (!_cpp_get_fresh_line (pfile
))
2183 result
->type
= CPP_EOF
;
2184 if (!pfile
->state
.in_directive
)
2186 /* Tell the compiler the line number of the EOF token. */
2187 result
->src_loc
= pfile
->line_table
->highest_line
;
2188 result
->flags
= BOL
;
2192 if (!pfile
->keep_tokens
)
2194 pfile
->cur_run
= &pfile
->base_run
;
2195 result
= pfile
->base_run
.base
;
2196 pfile
->cur_token
= result
+ 1;
2198 result
->flags
= BOL
;
2199 if (pfile
->state
.parsing_args
== 2)
2200 result
->flags
|= PREV_WHITE
;
2202 buffer
= pfile
->buffer
;
2204 result
->src_loc
= pfile
->line_table
->highest_line
;
2207 if (buffer
->cur
>= buffer
->notes
[buffer
->cur_note
].pos
2208 && !pfile
->overlaid_buffer
)
2210 _cpp_process_line_notes (pfile
, false);
2211 result
->src_loc
= pfile
->line_table
->highest_line
;
2215 if (pfile
->forced_token_location_p
)
2216 result
->src_loc
= *pfile
->forced_token_location_p
;
2218 result
->src_loc
= linemap_position_for_column (pfile
->line_table
,
2219 CPP_BUF_COLUMN (buffer
, buffer
->cur
));
2223 case ' ': case '\t': case '\f': case '\v': case '\0':
2224 result
->flags
|= PREV_WHITE
;
2225 skip_whitespace (pfile
, c
);
2229 if (buffer
->cur
< buffer
->rlimit
)
2230 CPP_INCREMENT_LINE (pfile
, 0);
2231 buffer
->need_line
= true;
2234 case '0': case '1': case '2': case '3': case '4':
2235 case '5': case '6': case '7': case '8': case '9':
2237 struct normalize_state nst
= INITIAL_NORMALIZE_STATE
;
2238 result
->type
= CPP_NUMBER
;
2239 lex_number (pfile
, &result
->val
.str
, &nst
);
2240 warn_about_normalization (pfile
, result
, &nst
);
2248 /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
2249 wide strings or raw strings. */
2250 if (c
== 'L' || CPP_OPTION (pfile
, rliterals
)
2251 || (c
!= 'R' && CPP_OPTION (pfile
, uliterals
)))
2253 if ((*buffer
->cur
== '\'' && c
!= 'R')
2254 || *buffer
->cur
== '"'
2255 || (*buffer
->cur
== 'R'
2257 && buffer
->cur
[1] == '"'
2258 && CPP_OPTION (pfile
, rliterals
))
2259 || (*buffer
->cur
== '8'
2261 && (buffer
->cur
[1] == '"'
2262 || (buffer
->cur
[1] == 'R' && buffer
->cur
[2] == '"'
2263 && CPP_OPTION (pfile
, rliterals
)))))
2265 lex_string (pfile
, result
, buffer
->cur
- 1);
2272 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2273 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2274 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2275 case 's': case 't': case 'v': case 'w': case 'x':
2277 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2278 case 'G': case 'H': case 'I': case 'J': case 'K':
2279 case 'M': case 'N': case 'O': case 'P': case 'Q':
2280 case 'S': case 'T': case 'V': case 'W': case 'X':
2282 result
->type
= CPP_NAME
;
2284 struct normalize_state nst
= INITIAL_NORMALIZE_STATE
;
2285 result
->val
.node
.node
= lex_identifier (pfile
, buffer
->cur
- 1, false,
2287 warn_about_normalization (pfile
, result
, &nst
);
2290 /* Convert named operators to their proper types. */
2291 if (result
->val
.node
.node
->flags
& NODE_OPERATOR
)
2293 result
->flags
|= NAMED_OP
;
2294 result
->type
= (enum cpp_ttype
) result
->val
.node
.node
->directive_index
;
2300 lex_string (pfile
, result
, buffer
->cur
- 1);
2304 /* A potential block or line comment. */
2305 comment_start
= buffer
->cur
;
2310 if (_cpp_skip_block_comment (pfile
))
2311 cpp_error (pfile
, CPP_DL_ERROR
, "unterminated comment");
2313 else if (c
== '/' && (CPP_OPTION (pfile
, cplusplus_comments
)
2314 || cpp_in_system_header (pfile
)))
2316 /* Warn about comments only if pedantically GNUC89, and not
2317 in system headers. */
2318 if (CPP_OPTION (pfile
, lang
) == CLK_GNUC89
&& CPP_PEDANTIC (pfile
)
2319 && ! buffer
->warned_cplusplus_comments
)
2321 cpp_error (pfile
, CPP_DL_PEDWARN
,
2322 "C++ style comments are not allowed in ISO C90");
2323 cpp_error (pfile
, CPP_DL_PEDWARN
,
2324 "(this will be reported only once per input file)");
2325 buffer
->warned_cplusplus_comments
= 1;
2328 if (skip_line_comment (pfile
) && CPP_OPTION (pfile
, warn_comments
))
2329 cpp_warning (pfile
, CPP_W_COMMENTS
, "multi-line comment");
2334 result
->type
= CPP_DIV_EQ
;
2339 result
->type
= CPP_DIV
;
2343 if (!pfile
->state
.save_comments
)
2345 result
->flags
|= PREV_WHITE
;
2346 goto update_tokens_line
;
2349 /* Save the comment as a token in its own right. */
2350 save_comment (pfile
, result
, comment_start
, c
);
2354 if (pfile
->state
.angled_headers
)
2356 lex_string (pfile
, result
, buffer
->cur
- 1);
2357 if (result
->type
!= CPP_LESS
)
2361 result
->type
= CPP_LESS
;
2362 if (*buffer
->cur
== '=')
2363 buffer
->cur
++, result
->type
= CPP_LESS_EQ
;
2364 else if (*buffer
->cur
== '<')
2367 IF_NEXT_IS ('=', CPP_LSHIFT_EQ
, CPP_LSHIFT
);
2369 else if (CPP_OPTION (pfile
, digraphs
))
2371 if (*buffer
->cur
== ':')
2373 /* C++11 [2.5/3 lex.pptoken], "Otherwise, if the next
2374 three characters are <:: and the subsequent character
2375 is neither : nor >, the < is treated as a preprocessor
2376 token by itself". */
2377 if (CPP_OPTION (pfile
, cplusplus
)
2378 && CPP_OPTION (pfile
, lang
) != CLK_CXX98
2379 && CPP_OPTION (pfile
, lang
) != CLK_GNUCXX
2380 && buffer
->cur
[1] == ':'
2381 && buffer
->cur
[2] != ':' && buffer
->cur
[2] != '>')
2385 result
->flags
|= DIGRAPH
;
2386 result
->type
= CPP_OPEN_SQUARE
;
2388 else if (*buffer
->cur
== '%')
2391 result
->flags
|= DIGRAPH
;
2392 result
->type
= CPP_OPEN_BRACE
;
2398 result
->type
= CPP_GREATER
;
2399 if (*buffer
->cur
== '=')
2400 buffer
->cur
++, result
->type
= CPP_GREATER_EQ
;
2401 else if (*buffer
->cur
== '>')
2404 IF_NEXT_IS ('=', CPP_RSHIFT_EQ
, CPP_RSHIFT
);
2409 result
->type
= CPP_MOD
;
2410 if (*buffer
->cur
== '=')
2411 buffer
->cur
++, result
->type
= CPP_MOD_EQ
;
2412 else if (CPP_OPTION (pfile
, digraphs
))
2414 if (*buffer
->cur
== ':')
2417 result
->flags
|= DIGRAPH
;
2418 result
->type
= CPP_HASH
;
2419 if (*buffer
->cur
== '%' && buffer
->cur
[1] == ':')
2420 buffer
->cur
+= 2, result
->type
= CPP_PASTE
, result
->val
.token_no
= 0;
2422 else if (*buffer
->cur
== '>')
2425 result
->flags
|= DIGRAPH
;
2426 result
->type
= CPP_CLOSE_BRACE
;
2432 result
->type
= CPP_DOT
;
2433 if (ISDIGIT (*buffer
->cur
))
2435 struct normalize_state nst
= INITIAL_NORMALIZE_STATE
;
2436 result
->type
= CPP_NUMBER
;
2437 lex_number (pfile
, &result
->val
.str
, &nst
);
2438 warn_about_normalization (pfile
, result
, &nst
);
2440 else if (*buffer
->cur
== '.' && buffer
->cur
[1] == '.')
2441 buffer
->cur
+= 2, result
->type
= CPP_ELLIPSIS
;
2442 else if (*buffer
->cur
== '*' && CPP_OPTION (pfile
, cplusplus
))
2443 buffer
->cur
++, result
->type
= CPP_DOT_STAR
;
2447 result
->type
= CPP_PLUS
;
2448 if (*buffer
->cur
== '+')
2449 buffer
->cur
++, result
->type
= CPP_PLUS_PLUS
;
2450 else if (*buffer
->cur
== '=')
2451 buffer
->cur
++, result
->type
= CPP_PLUS_EQ
;
2455 result
->type
= CPP_MINUS
;
2456 if (*buffer
->cur
== '>')
2459 result
->type
= CPP_DEREF
;
2460 if (*buffer
->cur
== '*' && CPP_OPTION (pfile
, cplusplus
))
2461 buffer
->cur
++, result
->type
= CPP_DEREF_STAR
;
2463 else if (*buffer
->cur
== '-')
2464 buffer
->cur
++, result
->type
= CPP_MINUS_MINUS
;
2465 else if (*buffer
->cur
== '=')
2466 buffer
->cur
++, result
->type
= CPP_MINUS_EQ
;
2470 result
->type
= CPP_AND
;
2471 if (*buffer
->cur
== '&')
2472 buffer
->cur
++, result
->type
= CPP_AND_AND
;
2473 else if (*buffer
->cur
== '=')
2474 buffer
->cur
++, result
->type
= CPP_AND_EQ
;
2478 result
->type
= CPP_OR
;
2479 if (*buffer
->cur
== '|')
2480 buffer
->cur
++, result
->type
= CPP_OR_OR
;
2481 else if (*buffer
->cur
== '=')
2482 buffer
->cur
++, result
->type
= CPP_OR_EQ
;
2486 result
->type
= CPP_COLON
;
2487 if (*buffer
->cur
== ':' && CPP_OPTION (pfile
, cplusplus
))
2488 buffer
->cur
++, result
->type
= CPP_SCOPE
;
2489 else if (*buffer
->cur
== '>' && CPP_OPTION (pfile
, digraphs
))
2492 result
->flags
|= DIGRAPH
;
2493 result
->type
= CPP_CLOSE_SQUARE
;
2497 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ
, CPP_MULT
); break;
2498 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ
, CPP_EQ
); break;
2499 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ
, CPP_NOT
); break;
2500 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ
, CPP_XOR
); break;
2501 case '#': IF_NEXT_IS ('#', CPP_PASTE
, CPP_HASH
); result
->val
.token_no
= 0; break;
2503 case '?': result
->type
= CPP_QUERY
; break;
2504 case '~': result
->type
= CPP_COMPL
; break;
2505 case ',': result
->type
= CPP_COMMA
; break;
2506 case '(': result
->type
= CPP_OPEN_PAREN
; break;
2507 case ')': result
->type
= CPP_CLOSE_PAREN
; break;
2508 case '[': result
->type
= CPP_OPEN_SQUARE
; break;
2509 case ']': result
->type
= CPP_CLOSE_SQUARE
; break;
2510 case '{': result
->type
= CPP_OPEN_BRACE
; break;
2511 case '}': result
->type
= CPP_CLOSE_BRACE
; break;
2512 case ';': result
->type
= CPP_SEMICOLON
; break;
2514 /* @ is a punctuator in Objective-C. */
2515 case '@': result
->type
= CPP_ATSIGN
; break;
2520 const uchar
*base
= --buffer
->cur
;
2521 struct normalize_state nst
= INITIAL_NORMALIZE_STATE
;
2523 if (forms_identifier_p (pfile
, true, &nst
))
2525 result
->type
= CPP_NAME
;
2526 result
->val
.node
.node
= lex_identifier (pfile
, base
, true, &nst
);
2527 warn_about_normalization (pfile
, result
, &nst
);
2534 create_literal (pfile
, result
, buffer
->cur
- 1, 1, CPP_OTHER
);
2541 /* An upper bound on the number of bytes needed to spell TOKEN.
2542 Does not include preceding whitespace. */
2544 cpp_token_len (const cpp_token
*token
)
2548 switch (TOKEN_SPELL (token
))
2550 default: len
= 6; break;
2551 case SPELL_LITERAL
: len
= token
->val
.str
.len
; break;
2552 case SPELL_IDENT
: len
= NODE_LEN (token
->val
.node
.node
) * 10; break;
2558 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
2559 Return the number of bytes read out of NAME. (There are always
2560 10 bytes written to BUFFER.) */
2563 utf8_to_ucn (unsigned char *buffer
, const unsigned char *name
)
2569 unsigned long utf32
;
2571 /* Compute the length of the UTF-8 sequence. */
2572 for (t
= *name
; t
& 0x80; t
<<= 1)
2575 utf32
= *name
& (0x7F >> ucn_len
);
2576 for (ucn_len_c
= 1; ucn_len_c
< ucn_len
; ucn_len_c
++)
2578 utf32
= (utf32
<< 6) | (*++name
& 0x3F);
2580 /* Ill-formed UTF-8. */
2581 if ((*name
& ~0x3F) != 0x80)
2587 for (j
= 7; j
>= 0; j
--)
2588 *buffer
++ = "0123456789abcdef"[(utf32
>> (4 * j
)) & 0xF];
2592 /* Given a token TYPE corresponding to a digraph, return a pointer to
2593 the spelling of the digraph. */
2594 static const unsigned char *
2595 cpp_digraph2name (enum cpp_ttype type
)
2597 return digraph_spellings
[(int) type
- (int) CPP_FIRST_DIGRAPH
];
2600 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
2601 already contain the enough space to hold the token's spelling.
2602 Returns a pointer to the character after the last character written.
2603 FORSTRING is true if this is to be the spelling after translation
2604 phase 1 (this is different for UCNs).
2605 FIXME: Would be nice if we didn't need the PFILE argument. */
2607 cpp_spell_token (cpp_reader
*pfile
, const cpp_token
*token
,
2608 unsigned char *buffer
, bool forstring
)
2610 switch (TOKEN_SPELL (token
))
2612 case SPELL_OPERATOR
:
2614 const unsigned char *spelling
;
2617 if (token
->flags
& DIGRAPH
)
2618 spelling
= cpp_digraph2name (token
->type
);
2619 else if (token
->flags
& NAMED_OP
)
2622 spelling
= TOKEN_NAME (token
);
2624 while ((c
= *spelling
++) != '\0')
2633 memcpy (buffer
, NODE_NAME (token
->val
.node
.node
),
2634 NODE_LEN (token
->val
.node
.node
));
2635 buffer
+= NODE_LEN (token
->val
.node
.node
);
2640 const unsigned char * name
= NODE_NAME (token
->val
.node
.node
);
2642 for (i
= 0; i
< NODE_LEN (token
->val
.node
.node
); i
++)
2643 if (name
[i
] & ~0x7F)
2645 i
+= utf8_to_ucn (buffer
, name
+ i
) - 1;
2649 *buffer
++ = NODE_NAME (token
->val
.node
.node
)[i
];
2654 memcpy (buffer
, token
->val
.str
.text
, token
->val
.str
.len
);
2655 buffer
+= token
->val
.str
.len
;
2659 cpp_error (pfile
, CPP_DL_ICE
,
2660 "unspellable token %s", TOKEN_NAME (token
));
2667 /* Returns TOKEN spelt as a null-terminated string. The string is
2668 freed when the reader is destroyed. Useful for diagnostics. */
2670 cpp_token_as_text (cpp_reader
*pfile
, const cpp_token
*token
)
2672 unsigned int len
= cpp_token_len (token
) + 1;
2673 unsigned char *start
= _cpp_unaligned_alloc (pfile
, len
), *end
;
2675 end
= cpp_spell_token (pfile
, token
, start
, false);
2681 /* Returns a pointer to a string which spells the token defined by
2682 TYPE and FLAGS. Used by C front ends, which really should move to
2683 using cpp_token_as_text. */
2685 cpp_type2name (enum cpp_ttype type
, unsigned char flags
)
2687 if (flags
& DIGRAPH
)
2688 return (const char *) cpp_digraph2name (type
);
2689 else if (flags
& NAMED_OP
)
2690 return cpp_named_operator2name (type
);
2692 return (const char *) token_spellings
[type
].name
;
2695 /* Writes the spelling of token to FP, without any preceding space.
2696 Separated from cpp_spell_token for efficiency - to avoid stdio
2697 double-buffering. */
2699 cpp_output_token (const cpp_token
*token
, FILE *fp
)
2701 switch (TOKEN_SPELL (token
))
2703 case SPELL_OPERATOR
:
2705 const unsigned char *spelling
;
2708 if (token
->flags
& DIGRAPH
)
2709 spelling
= cpp_digraph2name (token
->type
);
2710 else if (token
->flags
& NAMED_OP
)
2713 spelling
= TOKEN_NAME (token
);
2718 while ((c
= *++spelling
) != '\0');
2726 const unsigned char * name
= NODE_NAME (token
->val
.node
.node
);
2728 for (i
= 0; i
< NODE_LEN (token
->val
.node
.node
); i
++)
2729 if (name
[i
] & ~0x7F)
2731 unsigned char buffer
[10];
2732 i
+= utf8_to_ucn (buffer
, name
+ i
) - 1;
2733 fwrite (buffer
, 1, 10, fp
);
2736 fputc (NODE_NAME (token
->val
.node
.node
)[i
], fp
);
2741 fwrite (token
->val
.str
.text
, 1, token
->val
.str
.len
, fp
);
2745 /* An error, most probably. */
2750 /* Compare two tokens. */
2752 _cpp_equiv_tokens (const cpp_token
*a
, const cpp_token
*b
)
2754 if (a
->type
== b
->type
&& a
->flags
== b
->flags
)
2755 switch (TOKEN_SPELL (a
))
2757 default: /* Keep compiler happy. */
2758 case SPELL_OPERATOR
:
2759 /* token_no is used to track where multiple consecutive ##
2760 tokens were originally located. */
2761 return (a
->type
!= CPP_PASTE
|| a
->val
.token_no
== b
->val
.token_no
);
2763 return (a
->type
!= CPP_MACRO_ARG
2764 || a
->val
.macro_arg
.arg_no
== b
->val
.macro_arg
.arg_no
);
2766 return a
->val
.node
.node
== b
->val
.node
.node
;
2768 return (a
->val
.str
.len
== b
->val
.str
.len
2769 && !memcmp (a
->val
.str
.text
, b
->val
.str
.text
,
2776 /* Returns nonzero if a space should be inserted to avoid an
2777 accidental token paste for output. For simplicity, it is
2778 conservative, and occasionally advises a space where one is not
2779 needed, e.g. "." and ".2". */
2781 cpp_avoid_paste (cpp_reader
*pfile
, const cpp_token
*token1
,
2782 const cpp_token
*token2
)
2784 enum cpp_ttype a
= token1
->type
, b
= token2
->type
;
2787 if (token1
->flags
& NAMED_OP
)
2789 if (token2
->flags
& NAMED_OP
)
2793 if (token2
->flags
& DIGRAPH
)
2794 c
= digraph_spellings
[(int) b
- (int) CPP_FIRST_DIGRAPH
][0];
2795 else if (token_spellings
[b
].category
== SPELL_OPERATOR
)
2796 c
= token_spellings
[b
].name
[0];
2798 /* Quickly get everything that can paste with an '='. */
2799 if ((int) a
<= (int) CPP_LAST_EQ
&& c
== '=')
2804 case CPP_GREATER
: return c
== '>';
2805 case CPP_LESS
: return c
== '<' || c
== '%' || c
== ':';
2806 case CPP_PLUS
: return c
== '+';
2807 case CPP_MINUS
: return c
== '-' || c
== '>';
2808 case CPP_DIV
: return c
== '/' || c
== '*'; /* Comments. */
2809 case CPP_MOD
: return c
== ':' || c
== '>';
2810 case CPP_AND
: return c
== '&';
2811 case CPP_OR
: return c
== '|';
2812 case CPP_COLON
: return c
== ':' || c
== '>';
2813 case CPP_DEREF
: return c
== '*';
2814 case CPP_DOT
: return c
== '.' || c
== '%' || b
== CPP_NUMBER
;
2815 case CPP_HASH
: return c
== '#' || c
== '%'; /* Digraph form. */
2816 case CPP_NAME
: return ((b
== CPP_NUMBER
2817 && name_p (pfile
, &token2
->val
.str
))
2819 || b
== CPP_CHAR
|| b
== CPP_STRING
); /* L */
2820 case CPP_NUMBER
: return (b
== CPP_NUMBER
|| b
== CPP_NAME
2821 || c
== '.' || c
== '+' || c
== '-');
2823 case CPP_OTHER
: return ((token1
->val
.str
.text
[0] == '\\'
2825 || (CPP_OPTION (pfile
, objc
)
2826 && token1
->val
.str
.text
[0] == '@'
2827 && (b
== CPP_NAME
|| b
== CPP_STRING
)));
2830 case CPP_UTF8STRING
:
2832 case CPP_STRING32
: return (CPP_OPTION (pfile
, user_literals
)
2834 || (TOKEN_SPELL (token2
) == SPELL_LITERAL
2835 && ISIDST (token2
->val
.str
.text
[0]))));
2843 /* Output all the remaining tokens on the current line, and a newline
2844 character, to FP. Leading whitespace is removed. If there are
2845 macros, special token padding is not performed. */
2847 cpp_output_line (cpp_reader
*pfile
, FILE *fp
)
2849 const cpp_token
*token
;
2851 token
= cpp_get_token (pfile
);
2852 while (token
->type
!= CPP_EOF
)
2854 cpp_output_token (token
, fp
);
2855 token
= cpp_get_token (pfile
);
2856 if (token
->flags
& PREV_WHITE
)
2863 /* Return a string representation of all the remaining tokens on the
2864 current line. The result is allocated using xmalloc and must be
2865 freed by the caller. */
2867 cpp_output_line_to_string (cpp_reader
*pfile
, const unsigned char *dir_name
)
2869 const cpp_token
*token
;
2870 unsigned int out
= dir_name
? ustrlen (dir_name
) : 0;
2871 unsigned int alloced
= 120 + out
;
2872 unsigned char *result
= (unsigned char *) xmalloc (alloced
);
2874 /* If DIR_NAME is empty, there are no initial contents. */
2877 sprintf ((char *) result
, "#%s ", dir_name
);
2881 token
= cpp_get_token (pfile
);
2882 while (token
->type
!= CPP_EOF
)
2884 unsigned char *last
;
2885 /* Include room for a possible space and the terminating nul. */
2886 unsigned int len
= cpp_token_len (token
) + 2;
2888 if (out
+ len
> alloced
)
2891 if (out
+ len
> alloced
)
2892 alloced
= out
+ len
;
2893 result
= (unsigned char *) xrealloc (result
, alloced
);
2896 last
= cpp_spell_token (pfile
, token
, &result
[out
], 0);
2897 out
= last
- result
;
2899 token
= cpp_get_token (pfile
);
2900 if (token
->flags
& PREV_WHITE
)
2901 result
[out
++] = ' ';
2908 /* Memory buffers. Changing these three constants can have a dramatic
2909 effect on performance. The values here are reasonable defaults,
2910 but might be tuned. If you adjust them, be sure to test across a
2911 range of uses of cpplib, including heavy nested function-like macro
2912 expansion. Also check the change in peak memory usage (NJAMD is a
2913 good tool for this). */
2914 #define MIN_BUFF_SIZE 8000
2915 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
2916 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2917 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2919 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2920 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2923 /* Create a new allocation buffer. Place the control block at the end
2924 of the buffer, so that buffer overflows will cause immediate chaos. */
2926 new_buff (size_t len
)
2929 unsigned char *base
;
2931 if (len
< MIN_BUFF_SIZE
)
2932 len
= MIN_BUFF_SIZE
;
2933 len
= CPP_ALIGN (len
);
2935 #ifdef ENABLE_VALGRIND_CHECKING
2936 /* Valgrind warns about uses of interior pointers, so put _cpp_buff
2938 size_t slen
= CPP_ALIGN2 (sizeof (_cpp_buff
), 2 * DEFAULT_ALIGNMENT
);
2939 base
= XNEWVEC (unsigned char, len
+ slen
);
2940 result
= (_cpp_buff
*) base
;
2943 base
= XNEWVEC (unsigned char, len
+ sizeof (_cpp_buff
));
2944 result
= (_cpp_buff
*) (base
+ len
);
2946 result
->base
= base
;
2948 result
->limit
= base
+ len
;
2949 result
->next
= NULL
;
2953 /* Place a chain of unwanted allocation buffers on the free list. */
2955 _cpp_release_buff (cpp_reader
*pfile
, _cpp_buff
*buff
)
2957 _cpp_buff
*end
= buff
;
2961 end
->next
= pfile
->free_buffs
;
2962 pfile
->free_buffs
= buff
;
2965 /* Return a free buffer of size at least MIN_SIZE. */
2967 _cpp_get_buff (cpp_reader
*pfile
, size_t min_size
)
2969 _cpp_buff
*result
, **p
;
2971 for (p
= &pfile
->free_buffs
;; p
= &(*p
)->next
)
2976 return new_buff (min_size
);
2978 size
= result
->limit
- result
->base
;
2979 /* Return a buffer that's big enough, but don't waste one that's
2981 if (size
>= min_size
&& size
<= BUFF_SIZE_UPPER_BOUND (min_size
))
2986 result
->next
= NULL
;
2987 result
->cur
= result
->base
;
2991 /* Creates a new buffer with enough space to hold the uncommitted
2992 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
2993 the excess bytes to the new buffer. Chains the new buffer after
2994 BUFF, and returns the new buffer. */
2996 _cpp_append_extend_buff (cpp_reader
*pfile
, _cpp_buff
*buff
, size_t min_extra
)
2998 size_t size
= EXTENDED_BUFF_SIZE (buff
, min_extra
);
2999 _cpp_buff
*new_buff
= _cpp_get_buff (pfile
, size
);
3001 buff
->next
= new_buff
;
3002 memcpy (new_buff
->base
, buff
->cur
, BUFF_ROOM (buff
));
3006 /* Creates a new buffer with enough space to hold the uncommitted
3007 remaining bytes of the buffer pointed to by BUFF, and at least
3008 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
3009 Chains the new buffer before the buffer pointed to by BUFF, and
3010 updates the pointer to point to the new buffer. */
3012 _cpp_extend_buff (cpp_reader
*pfile
, _cpp_buff
**pbuff
, size_t min_extra
)
3014 _cpp_buff
*new_buff
, *old_buff
= *pbuff
;
3015 size_t size
= EXTENDED_BUFF_SIZE (old_buff
, min_extra
);
3017 new_buff
= _cpp_get_buff (pfile
, size
);
3018 memcpy (new_buff
->base
, old_buff
->cur
, BUFF_ROOM (old_buff
));
3019 new_buff
->next
= old_buff
;
3023 /* Free a chain of buffers starting at BUFF. */
3025 _cpp_free_buff (_cpp_buff
*buff
)
3029 for (; buff
; buff
= next
)
3032 #ifdef ENABLE_VALGRIND_CHECKING
3040 /* Allocate permanent, unaligned storage of length LEN. */
3042 _cpp_unaligned_alloc (cpp_reader
*pfile
, size_t len
)
3044 _cpp_buff
*buff
= pfile
->u_buff
;
3045 unsigned char *result
= buff
->cur
;
3047 if (len
> (size_t) (buff
->limit
- result
))
3049 buff
= _cpp_get_buff (pfile
, len
);
3050 buff
->next
= pfile
->u_buff
;
3051 pfile
->u_buff
= buff
;
3055 buff
->cur
= result
+ len
;
3059 /* Allocate permanent, unaligned storage of length LEN from a_buff.
3060 That buffer is used for growing allocations when saving macro
3061 replacement lists in a #define, and when parsing an answer to an
3062 assertion in #assert, #unassert or #if (and therefore possibly
3063 whilst expanding macros). It therefore must not be used by any
3064 code that they might call: specifically the lexer and the guts of
3067 All existing other uses clearly fit this restriction: storing
3068 registered pragmas during initialization. */
3070 _cpp_aligned_alloc (cpp_reader
*pfile
, size_t len
)
3072 _cpp_buff
*buff
= pfile
->a_buff
;
3073 unsigned char *result
= buff
->cur
;
3075 if (len
> (size_t) (buff
->limit
- result
))
3077 buff
= _cpp_get_buff (pfile
, len
);
3078 buff
->next
= pfile
->a_buff
;
3079 pfile
->a_buff
= buff
;
3083 buff
->cur
= result
+ len
;
3087 /* Say which field of TOK is in use. */
3089 enum cpp_token_fld_kind
3090 cpp_token_val_index (const cpp_token
*tok
)
3092 switch (TOKEN_SPELL (tok
))
3095 return CPP_TOKEN_FLD_NODE
;
3097 return CPP_TOKEN_FLD_STR
;
3098 case SPELL_OPERATOR
:
3099 if (tok
->type
== CPP_PASTE
)
3100 return CPP_TOKEN_FLD_TOKEN_NO
;
3102 return CPP_TOKEN_FLD_NONE
;
3104 if (tok
->type
== CPP_MACRO_ARG
)
3105 return CPP_TOKEN_FLD_ARG_NO
;
3106 else if (tok
->type
== CPP_PADDING
)
3107 return CPP_TOKEN_FLD_SOURCE
;
3108 else if (tok
->type
== CPP_PRAGMA
)
3109 return CPP_TOKEN_FLD_PRAGMA
;
3110 /* else fall through */
3112 return CPP_TOKEN_FLD_NONE
;
3116 /* All tokens lexed in R after calling this function will be forced to have
3117 their source_location the same as the location referenced by P, until
3118 cpp_stop_forcing_token_locations is called for R. */
3121 cpp_force_token_locations (cpp_reader
*r
, source_location
*p
)
3123 r
->forced_token_location_p
= p
;
3126 /* Go back to assigning locations naturally for lexed tokens. */
3129 cpp_stop_forcing_token_locations (cpp_reader
*r
)
3131 r
->forced_token_location_p
= NULL
;