1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000-2014 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
37 enum spell_type category
;
38 const unsigned char *name
;
41 static const unsigned char *const digraph_spellings
[] =
42 { UC
"%:", UC
"%:%:", UC
"<:", UC
":>", UC
"<%", UC
"%>" };
44 #define OP(e, s) { SPELL_OPERATOR, UC s },
45 #define TK(e, s) { SPELL_ ## s, UC #e },
46 static const struct token_spelling token_spellings
[N_TTYPES
] = { TTYPE_TABLE
};
50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
53 static void add_line_note (cpp_buffer
*, const uchar
*, unsigned int);
54 static int skip_line_comment (cpp_reader
*);
55 static void skip_whitespace (cpp_reader
*, cppchar_t
);
56 static void lex_string (cpp_reader
*, cpp_token
*, const uchar
*);
57 static void save_comment (cpp_reader
*, cpp_token
*, const uchar
*, cppchar_t
);
58 static void store_comment (cpp_reader
*, cpp_token
*);
59 static void create_literal (cpp_reader
*, cpp_token
*, const uchar
*,
60 unsigned int, enum cpp_ttype
);
61 static bool warn_in_comment (cpp_reader
*, _cpp_line_note
*);
62 static int name_p (cpp_reader
*, const cpp_string
*);
63 static tokenrun
*next_tokenrun (tokenrun
*);
65 static _cpp_buff
*new_buff (size_t);
70 Compares, the token TOKEN to the NUL-terminated string STRING.
71 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
73 cpp_ideq (const cpp_token
*token
, const char *string
)
75 if (token
->type
!= CPP_NAME
)
78 return !ustrcmp (NODE_NAME (token
->val
.node
.node
), (const uchar
*) string
);
81 /* Record a note TYPE at byte POS into the current cleaned logical
84 add_line_note (cpp_buffer
*buffer
, const uchar
*pos
, unsigned int type
)
86 if (buffer
->notes_used
== buffer
->notes_cap
)
88 buffer
->notes_cap
= buffer
->notes_cap
* 2 + 200;
89 buffer
->notes
= XRESIZEVEC (_cpp_line_note
, buffer
->notes
,
93 buffer
->notes
[buffer
->notes_used
].pos
= pos
;
94 buffer
->notes
[buffer
->notes_used
].type
= type
;
99 /* Fast path to find line special characters using optimized character
100 scanning algorithms. Anything complicated falls back to the slow
101 path below. Since this loop is very hot it's worth doing these kinds
104 One of the paths through the ifdefs should provide
106 const uchar *search_line_fast (const uchar *s, const uchar *end);
108 Between S and END, search for \n, \r, \\, ?. Return a pointer to
111 Note that the last character of the buffer is *always* a newline,
112 as forced by _cpp_convert_input. This fact can be used to avoid
113 explicitly looking for the end of the buffer. */
115 /* Configure gives us an ifdef test. */
116 #ifndef WORDS_BIGENDIAN
117 #define WORDS_BIGENDIAN 0
120 /* We'd like the largest integer that fits into a register. There's nothing
121 in <stdint.h> that gives us that. For most hosts this is unsigned long,
122 but MS decided on an LLP64 model. Thankfully when building with GCC we
123 can get the "real" word size. */
125 typedef unsigned int word_type
__attribute__((__mode__(__word__
)));
127 typedef unsigned long word_type
;
130 /* The code below is only expecting sizes 4 or 8.
131 Die at compile-time if this expectation is violated. */
132 typedef char check_word_type_size
133 [(sizeof(word_type
) == 8 || sizeof(word_type
) == 4) * 2 - 1];
135 /* Return X with the first N bytes forced to values that won't match one
136 of the interesting characters. Note that NUL is not interesting. */
138 static inline word_type
139 acc_char_mask_misalign (word_type val
, unsigned int n
)
149 /* Return X replicated to all byte positions within WORD_TYPE. */
151 static inline word_type
152 acc_char_replicate (uchar x
)
156 ret
= (x
<< 24) | (x
<< 16) | (x
<< 8) | x
;
157 if (sizeof(word_type
) == 8)
158 ret
= (ret
<< 16 << 16) | ret
;
162 /* Return non-zero if some byte of VAL is (probably) C. */
164 static inline word_type
165 acc_char_cmp (word_type val
, word_type c
)
167 #if defined(__GNUC__) && defined(__alpha__)
168 /* We can get exact results using a compare-bytes instruction.
169 Get (val == c) via (0 >= (val ^ c)). */
170 return __builtin_alpha_cmpbge (0, val
^ c
);
172 word_type magic
= 0x7efefefeU
;
173 if (sizeof(word_type
) == 8)
174 magic
= (magic
<< 16 << 16) | 0xfefefefeU
;
178 return ((val
+ magic
) ^ ~val
) & ~magic
;
182 /* Given the result of acc_char_cmp is non-zero, return the index of
183 the found character. If this was a false positive, return -1. */
186 acc_char_index (word_type cmp ATTRIBUTE_UNUSED
,
187 word_type val ATTRIBUTE_UNUSED
)
189 #if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN
190 /* The cmpbge instruction sets *bits* of the result corresponding to
191 matches in the bytes with no false positives. */
192 return __builtin_ctzl (cmp
);
196 /* ??? It would be nice to force unrolling here,
197 and have all of these constants folded. */
198 for (i
= 0; i
< sizeof(word_type
); ++i
)
202 c
= (val
>> (sizeof(word_type
) - i
- 1) * 8) & 0xff;
204 c
= (val
>> i
* 8) & 0xff;
206 if (c
== '\n' || c
== '\r' || c
== '\\' || c
== '?')
214 /* A version of the fast scanner using bit fiddling techniques.
216 For 32-bit words, one would normally perform 16 comparisons and
217 16 branches. With this algorithm one performs 24 arithmetic
218 operations and one branch. Whether this is faster with a 32-bit
219 word size is going to be somewhat system dependent.
221 For 64-bit words, we eliminate twice the number of comparisons
222 and branches without increasing the number of arithmetic operations.
223 It's almost certainly going to be a win with 64-bit word size. */
225 static const uchar
* search_line_acc_char (const uchar
*, const uchar
*)
229 search_line_acc_char (const uchar
*s
, const uchar
*end ATTRIBUTE_UNUSED
)
231 const word_type repl_nl
= acc_char_replicate ('\n');
232 const word_type repl_cr
= acc_char_replicate ('\r');
233 const word_type repl_bs
= acc_char_replicate ('\\');
234 const word_type repl_qm
= acc_char_replicate ('?');
236 unsigned int misalign
;
240 /* Align the buffer. Mask out any bytes from before the beginning. */
241 p
= (word_type
*)((uintptr_t)s
& -sizeof(word_type
));
243 misalign
= (uintptr_t)s
& (sizeof(word_type
) - 1);
245 val
= acc_char_mask_misalign (val
, misalign
);
250 t
= acc_char_cmp (val
, repl_nl
);
251 t
|= acc_char_cmp (val
, repl_cr
);
252 t
|= acc_char_cmp (val
, repl_bs
);
253 t
|= acc_char_cmp (val
, repl_qm
);
255 if (__builtin_expect (t
!= 0, 0))
257 int i
= acc_char_index (t
, val
);
259 return (const uchar
*)p
+ i
;
266 /* Disable on Solaris 2/x86 until the following problem can be properly
269 The Solaris 10+ assembler tags objects with the instruction set
270 extensions used, so SSE4.2 executables cannot run on machines that
271 don't support that extension. */
273 #if (GCC_VERSION >= 4005) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
275 /* Replicated character data to be shared between implementations.
276 Recall that outside of a context with vector support we can't
277 define compatible vector types, therefore these are all defined
278 in terms of raw characters. */
279 static const char repl_chars
[4][16] __attribute__((aligned(16))) = {
280 { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
281 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' },
282 { '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
283 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' },
284 { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
285 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' },
286 { '?', '?', '?', '?', '?', '?', '?', '?',
287 '?', '?', '?', '?', '?', '?', '?', '?' },
290 /* A version of the fast scanner using MMX vectorized byte compare insns.
292 This uses the PMOVMSKB instruction which was introduced with "MMX2",
293 which was packaged into SSE1; it is also present in the AMD MMX
294 extension. Mark the function as using "sse" so that we emit a real
295 "emms" instruction, rather than the 3dNOW "femms" instruction. */
299 __attribute__((__target__("sse")))
301 search_line_mmx (const uchar
*s
, const uchar
*end ATTRIBUTE_UNUSED
)
303 typedef char v8qi
__attribute__ ((__vector_size__ (8)));
304 typedef int __m64
__attribute__ ((__vector_size__ (8), __may_alias__
));
306 const v8qi repl_nl
= *(const v8qi
*)repl_chars
[0];
307 const v8qi repl_cr
= *(const v8qi
*)repl_chars
[1];
308 const v8qi repl_bs
= *(const v8qi
*)repl_chars
[2];
309 const v8qi repl_qm
= *(const v8qi
*)repl_chars
[3];
311 unsigned int misalign
, found
, mask
;
315 /* Align the source pointer. While MMX doesn't generate unaligned data
316 faults, this allows us to safely scan to the end of the buffer without
317 reading beyond the end of the last page. */
318 misalign
= (uintptr_t)s
& 7;
319 p
= (const v8qi
*)((uintptr_t)s
& -8);
322 /* Create a mask for the bytes that are valid within the first
323 16-byte block. The Idea here is that the AND with the mask
324 within the loop is "free", since we need some AND or TEST
325 insn in order to set the flags for the branch anyway. */
326 mask
= -1u << misalign
;
328 /* Main loop processing 8 bytes at a time. */
336 t
= __builtin_ia32_pcmpeqb(data
, repl_nl
);
337 c
= __builtin_ia32_pcmpeqb(data
, repl_cr
);
338 t
= (v8qi
) __builtin_ia32_por ((__m64
)t
, (__m64
)c
);
339 c
= __builtin_ia32_pcmpeqb(data
, repl_bs
);
340 t
= (v8qi
) __builtin_ia32_por ((__m64
)t
, (__m64
)c
);
341 c
= __builtin_ia32_pcmpeqb(data
, repl_qm
);
342 t
= (v8qi
) __builtin_ia32_por ((__m64
)t
, (__m64
)c
);
343 found
= __builtin_ia32_pmovmskb (t
);
348 __builtin_ia32_emms ();
350 /* FOUND contains 1 in bits for which we matched a relevant
351 character. Conversion to the byte index is trivial. */
352 found
= __builtin_ctz(found
);
353 return (const uchar
*)p
+ found
;
356 /* A version of the fast scanner using SSE2 vectorized byte compare insns. */
360 __attribute__((__target__("sse2")))
362 search_line_sse2 (const uchar
*s
, const uchar
*end ATTRIBUTE_UNUSED
)
364 typedef char v16qi
__attribute__ ((__vector_size__ (16)));
366 const v16qi repl_nl
= *(const v16qi
*)repl_chars
[0];
367 const v16qi repl_cr
= *(const v16qi
*)repl_chars
[1];
368 const v16qi repl_bs
= *(const v16qi
*)repl_chars
[2];
369 const v16qi repl_qm
= *(const v16qi
*)repl_chars
[3];
371 unsigned int misalign
, found
, mask
;
375 /* Align the source pointer. */
376 misalign
= (uintptr_t)s
& 15;
377 p
= (const v16qi
*)((uintptr_t)s
& -16);
380 /* Create a mask for the bytes that are valid within the first
381 16-byte block. The Idea here is that the AND with the mask
382 within the loop is "free", since we need some AND or TEST
383 insn in order to set the flags for the branch anyway. */
384 mask
= -1u << misalign
;
386 /* Main loop processing 16 bytes at a time. */
394 t
= __builtin_ia32_pcmpeqb128(data
, repl_nl
);
395 t
|= __builtin_ia32_pcmpeqb128(data
, repl_cr
);
396 t
|= __builtin_ia32_pcmpeqb128(data
, repl_bs
);
397 t
|= __builtin_ia32_pcmpeqb128(data
, repl_qm
);
398 found
= __builtin_ia32_pmovmskb128 (t
);
403 /* FOUND contains 1 in bits for which we matched a relevant
404 character. Conversion to the byte index is trivial. */
405 found
= __builtin_ctz(found
);
406 return (const uchar
*)p
+ found
;
410 /* A version of the fast scanner using SSE 4.2 vectorized string insns. */
414 __attribute__((__target__("sse4.2")))
416 search_line_sse42 (const uchar
*s
, const uchar
*end
)
418 typedef char v16qi
__attribute__ ((__vector_size__ (16)));
419 static const v16qi search
= { '\n', '\r', '?', '\\' };
421 uintptr_t si
= (uintptr_t)s
;
424 /* Check for unaligned input. */
429 if (__builtin_expect (end
- s
< 16, 0)
430 && __builtin_expect ((si
& 0xfff) > 0xff0, 0))
432 /* There are less than 16 bytes left in the buffer, and less
433 than 16 bytes left on the page. Reading 16 bytes at this
434 point might generate a spurious page fault. Defer to the
435 SSE2 implementation, which already handles alignment. */
436 return search_line_sse2 (s
, end
);
439 /* ??? The builtin doesn't understand that the PCMPESTRI read from
440 memory need not be aligned. */
441 sv
= __builtin_ia32_loaddqu ((const char *) s
);
442 index
= __builtin_ia32_pcmpestri128 (search
, 4, sv
, 16, 0);
444 if (__builtin_expect (index
< 16, 0))
447 /* Advance the pointer to an aligned address. We will re-scan a
448 few bytes, but we no longer need care for reading past the
449 end of a page, since we're guaranteed a match. */
450 s
= (const uchar
*)((si
+ 16) & -16);
453 /* Main loop, processing 16 bytes at a time. By doing the whole loop
454 in inline assembly, we can make proper use of the flags set. */
455 __asm ( "sub $16, %1\n"
458 " %vpcmpestri $0, (%1), %2\n"
460 : "=&c"(index
), "+r"(s
)
461 : "x"(search
), "a"(4), "d"(16));
468 /* Work around out-dated assemblers without sse4 support. */
469 #define search_line_sse42 search_line_sse2
472 /* Check the CPU capabilities. */
474 #include "../gcc/config/i386/cpuid.h"
476 typedef const uchar
* (*search_line_fast_type
) (const uchar
*, const uchar
*);
477 static search_line_fast_type search_line_fast
;
479 #define HAVE_init_vectorized_lexer 1
481 init_vectorized_lexer (void)
483 unsigned dummy
, ecx
= 0, edx
= 0;
484 search_line_fast_type impl
= search_line_acc_char
;
487 #if defined(__SSE4_2__)
489 #elif defined(__SSE2__)
491 #elif defined(__SSE__)
496 impl
= search_line_sse42
;
497 else if (__get_cpuid (1, &dummy
, &dummy
, &ecx
, &edx
) || minimum
== 2)
499 if (minimum
== 3 || (ecx
& bit_SSE4_2
))
500 impl
= search_line_sse42
;
501 else if (minimum
== 2 || (edx
& bit_SSE2
))
502 impl
= search_line_sse2
;
503 else if (minimum
== 1 || (edx
& bit_SSE
))
504 impl
= search_line_mmx
;
506 else if (__get_cpuid (0x80000001, &dummy
, &dummy
, &dummy
, &edx
))
509 || (edx
& (bit_MMXEXT
| bit_CMOV
)) == (bit_MMXEXT
| bit_CMOV
))
510 impl
= search_line_mmx
;
513 search_line_fast
= impl
;
516 #elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__)
518 /* A vection of the fast scanner using AltiVec vectorized byte compares. */
519 /* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
520 so we can't compile this function without -maltivec on the command line
521 (or implied by some other switch). */
524 search_line_fast (const uchar
*s
, const uchar
*end ATTRIBUTE_UNUSED
)
526 typedef __attribute__((altivec(vector
))) unsigned char vc
;
529 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
530 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
533 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
534 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
537 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
538 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
541 '?', '?', '?', '?', '?', '?', '?', '?',
542 '?', '?', '?', '?', '?', '?', '?', '?',
545 -1, -1, -1, -1, -1, -1, -1, -1,
546 -1, -1, -1, -1, -1, -1, -1, -1,
548 const vc zero
= { 0 };
552 /* Altivec loads automatically mask addresses with -16. This lets us
553 issue the first load as early as possible. */
554 data
= __builtin_vec_ld(0, (const vc
*)s
);
556 /* Discard bytes before the beginning of the buffer. Do this by
557 beginning with all ones and shifting in zeros according to the
558 mis-alignment. The LVSR instruction pulls the exact shift we
559 want from the address. */
560 #ifdef __BIG_ENDIAN__
561 mask
= __builtin_vec_lvsr(0, s
);
562 mask
= __builtin_vec_perm(zero
, ones
, mask
);
564 mask
= __builtin_vec_lvsl(0, s
);
565 mask
= __builtin_vec_perm(ones
, zero
, mask
);
569 /* While altivec loads mask addresses, we still need to align S so
570 that the offset we compute at the end is correct. */
571 s
= (const uchar
*)((uintptr_t)s
& -16);
573 /* Main loop processing 16 bytes at a time. */
577 vc m_nl
, m_cr
, m_bs
, m_qm
;
580 data
= __builtin_vec_ld(0, (const vc
*)s
);
583 m_nl
= (vc
) __builtin_vec_cmpeq(data
, repl_nl
);
584 m_cr
= (vc
) __builtin_vec_cmpeq(data
, repl_cr
);
585 m_bs
= (vc
) __builtin_vec_cmpeq(data
, repl_bs
);
586 m_qm
= (vc
) __builtin_vec_cmpeq(data
, repl_qm
);
587 t
= (m_nl
| m_cr
) | (m_bs
| m_qm
);
589 /* T now contains 0xff in bytes for which we matched one of the relevant
590 characters. We want to exit the loop if any byte in T is non-zero.
591 Below is the expansion of vec_any_ne(t, zero). */
593 while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t
, zero
));
596 #define N (sizeof(vc) / sizeof(long))
600 /* Statically assert that N is 2 or 4. */
601 unsigned long l
[(N
== 2 || N
== 4) ? N
: -1];
603 unsigned long l
, i
= 0;
607 /* Find the first word of T that is non-zero. */
614 s
+= sizeof(unsigned long);
618 s
+= sizeof(unsigned long);
623 s
+= sizeof(unsigned long);
627 /* L now contains 0xff in bytes for which we matched one of the
628 relevant characters. We can find the byte index by finding
629 its bit index and dividing by 8. */
630 #ifdef __BIG_ENDIAN__
631 l
= __builtin_clzl(l
) >> 3;
633 l
= __builtin_ctzl(l
) >> 3;
641 #elif defined (__ARM_NEON__)
642 #include "arm_neon.h"
645 search_line_fast (const uchar
*s
, const uchar
*end ATTRIBUTE_UNUSED
)
647 const uint8x16_t repl_nl
= vdupq_n_u8 ('\n');
648 const uint8x16_t repl_cr
= vdupq_n_u8 ('\r');
649 const uint8x16_t repl_bs
= vdupq_n_u8 ('\\');
650 const uint8x16_t repl_qm
= vdupq_n_u8 ('?');
651 const uint8x16_t xmask
= (uint8x16_t
) vdupq_n_u64 (0x8040201008040201ULL
);
653 unsigned int misalign
, found
, mask
;
657 /* Align the source pointer. */
658 misalign
= (uintptr_t)s
& 15;
659 p
= (const uint8_t *)((uintptr_t)s
& -16);
662 /* Create a mask for the bytes that are valid within the first
663 16-byte block. The Idea here is that the AND with the mask
664 within the loop is "free", since we need some AND or TEST
665 insn in order to set the flags for the branch anyway. */
666 mask
= (-1u << misalign
) & 0xffff;
668 /* Main loop, processing 16 bytes at a time. */
676 uint8x16_t t
, u
, v
, w
;
683 t
= vceqq_u8 (data
, repl_nl
);
684 u
= vceqq_u8 (data
, repl_cr
);
685 v
= vorrq_u8 (t
, vceqq_u8 (data
, repl_bs
));
686 w
= vorrq_u8 (u
, vceqq_u8 (data
, repl_qm
));
687 t
= vandq_u8 (vorrq_u8 (v
, w
), xmask
);
688 l
= vpadd_u8 (vget_low_u8 (t
), vget_high_u8 (t
));
692 found
= vget_lane_u32 ((uint32x2_t
) vorr_u64 ((uint64x1_t
) n
,
693 vshr_n_u64 ((uint64x1_t
) n
, 24)), 0);
698 /* FOUND contains 1 in bits for which we matched a relevant
699 character. Conversion to the byte index is trivial. */
700 found
= __builtin_ctz (found
);
701 return (const uchar
*)p
+ found
;
706 /* We only have one accellerated alternative. Use a direct call so that
707 we encourage inlining. */
709 #define search_line_fast search_line_acc_char
713 /* Initialize the lexer if needed. */
716 _cpp_init_lexer (void)
718 #ifdef HAVE_init_vectorized_lexer
719 init_vectorized_lexer ();
723 /* Returns with a logical line that contains no escaped newlines or
724 trigraphs. This is a time-critical inner loop. */
726 _cpp_clean_line (cpp_reader
*pfile
)
732 buffer
= pfile
->buffer
;
733 buffer
->cur_note
= buffer
->notes_used
= 0;
734 buffer
->cur
= buffer
->line_base
= buffer
->next_line
;
735 buffer
->need_line
= false;
736 s
= buffer
->next_line
;
738 if (!buffer
->from_stage3
)
740 const uchar
*pbackslash
= NULL
;
742 /* Fast path. This is the common case of an un-escaped line with
743 no trigraphs. The primary win here is by not writing any
744 data back to memory until we have to. */
747 /* Perform an optimized search for \n, \r, \\, ?. */
748 s
= search_line_fast (s
, buffer
->rlimit
);
753 /* Record the location of the backslash and continue. */
756 else if (__builtin_expect (c
== '?', 0))
758 if (__builtin_expect (s
[1] == '?', false)
759 && _cpp_trigraph_map
[s
[2]])
761 /* Have a trigraph. We may or may not have to convert
762 it. Add a line note regardless, for -Wtrigraphs. */
763 add_line_note (buffer
, s
, s
[2]);
764 if (CPP_OPTION (pfile
, trigraphs
))
766 /* We do, and that means we have to switch to the
769 *d
= _cpp_trigraph_map
[s
[2]];
774 /* Not a trigraph. Continue on fast-path. */
781 /* This must be \r or \n. We're either done, or we'll be forced
782 to write back to the buffer and continue on the slow path. */
785 if (__builtin_expect (s
== buffer
->rlimit
, false))
788 /* DOS line ending? */
789 if (__builtin_expect (c
== '\r', false) && s
[1] == '\n')
792 if (s
== buffer
->rlimit
)
796 if (__builtin_expect (pbackslash
== NULL
, true))
799 /* Check for escaped newline. */
801 while (is_nvspace (p
[-1]))
803 if (p
- 1 != pbackslash
)
806 /* Have an escaped newline; process it and proceed to
808 add_line_note (buffer
, p
- 1, p
!= d
? ' ' : '\\');
810 buffer
->next_line
= p
- 1;
818 if (c
== '\n' || c
== '\r')
820 /* Handle DOS line endings. */
821 if (c
== '\r' && s
!= buffer
->rlimit
&& s
[1] == '\n')
823 if (s
== buffer
->rlimit
)
828 while (p
!= buffer
->next_line
&& is_nvspace (p
[-1]))
830 if (p
== buffer
->next_line
|| p
[-1] != '\\')
833 add_line_note (buffer
, p
- 1, p
!= d
? ' ': '\\');
835 buffer
->next_line
= p
- 1;
837 else if (c
== '?' && s
[1] == '?' && _cpp_trigraph_map
[s
[2]])
839 /* Add a note regardless, for the benefit of -Wtrigraphs. */
840 add_line_note (buffer
, d
, s
[2]);
841 if (CPP_OPTION (pfile
, trigraphs
))
843 *d
= _cpp_trigraph_map
[s
[2]];
851 while (*s
!= '\n' && *s
!= '\r')
855 /* Handle DOS line endings. */
856 if (*s
== '\r' && s
!= buffer
->rlimit
&& s
[1] == '\n')
862 /* A sentinel note that should never be processed. */
863 add_line_note (buffer
, d
+ 1, '\n');
864 buffer
->next_line
= s
+ 1;
867 /* Return true if the trigraph indicated by NOTE should be warned
868 about in a comment. */
870 warn_in_comment (cpp_reader
*pfile
, _cpp_line_note
*note
)
874 /* Within comments we don't warn about trigraphs, unless the
875 trigraph forms an escaped newline, as that may change
877 if (note
->type
!= '/')
880 /* If -trigraphs, then this was an escaped newline iff the next note
882 if (CPP_OPTION (pfile
, trigraphs
))
883 return note
[1].pos
== note
->pos
;
885 /* Otherwise, see if this forms an escaped newline. */
887 while (is_nvspace (*p
))
890 /* There might have been escaped newlines between the trigraph and the
891 newline we found. Hence the position test. */
892 return (*p
== '\n' && p
< note
[1].pos
);
895 /* Process the notes created by add_line_note as far as the current
898 _cpp_process_line_notes (cpp_reader
*pfile
, int in_comment
)
900 cpp_buffer
*buffer
= pfile
->buffer
;
904 _cpp_line_note
*note
= &buffer
->notes
[buffer
->cur_note
];
907 if (note
->pos
> buffer
->cur
)
911 col
= CPP_BUF_COLUMN (buffer
, note
->pos
+ 1);
913 if (note
->type
== '\\' || note
->type
== ' ')
915 if (note
->type
== ' ' && !in_comment
)
916 cpp_error_with_line (pfile
, CPP_DL_WARNING
, pfile
->line_table
->highest_line
, col
,
917 "backslash and newline separated by space");
919 if (buffer
->next_line
> buffer
->rlimit
)
921 cpp_error_with_line (pfile
, CPP_DL_PEDWARN
, pfile
->line_table
->highest_line
, col
,
922 "backslash-newline at end of file");
923 /* Prevent "no newline at end of file" warning. */
924 buffer
->next_line
= buffer
->rlimit
;
927 buffer
->line_base
= note
->pos
;
928 CPP_INCREMENT_LINE (pfile
, 0);
930 else if (_cpp_trigraph_map
[note
->type
])
932 if (CPP_OPTION (pfile
, warn_trigraphs
)
933 && (!in_comment
|| warn_in_comment (pfile
, note
)))
935 if (CPP_OPTION (pfile
, trigraphs
))
936 cpp_warning_with_line (pfile
, CPP_W_TRIGRAPHS
,
937 pfile
->line_table
->highest_line
, col
,
938 "trigraph ??%c converted to %c",
940 (int) _cpp_trigraph_map
[note
->type
]);
943 cpp_warning_with_line
944 (pfile
, CPP_W_TRIGRAPHS
,
945 pfile
->line_table
->highest_line
, col
,
946 "trigraph ??%c ignored, use -trigraphs to enable",
951 else if (note
->type
== 0)
952 /* Already processed in lex_raw_string. */;
958 /* Skip a C-style block comment. We find the end of the comment by
959 seeing if an asterisk is before every '/' we encounter. Returns
960 nonzero if comment terminated by EOF, zero otherwise.
962 Buffer->cur points to the initial asterisk of the comment. */
964 _cpp_skip_block_comment (cpp_reader
*pfile
)
966 cpp_buffer
*buffer
= pfile
->buffer
;
967 const uchar
*cur
= buffer
->cur
;
976 /* People like decorating comments with '*', so check for '/'
977 instead for efficiency. */
985 /* Warn about potential nested comments, but not if the '/'
986 comes immediately before the true comment delimiter.
987 Don't bother to get it right across escaped newlines. */
988 if (CPP_OPTION (pfile
, warn_comments
)
989 && cur
[0] == '*' && cur
[1] != '/')
992 cpp_warning_with_line (pfile
, CPP_W_COMMENTS
,
993 pfile
->line_table
->highest_line
,
994 CPP_BUF_COL (buffer
),
995 "\"/*\" within comment");
1001 buffer
->cur
= cur
- 1;
1002 _cpp_process_line_notes (pfile
, true);
1003 if (buffer
->next_line
>= buffer
->rlimit
)
1005 _cpp_clean_line (pfile
);
1007 cols
= buffer
->next_line
- buffer
->line_base
;
1008 CPP_INCREMENT_LINE (pfile
, cols
);
1015 _cpp_process_line_notes (pfile
, true);
1019 /* Skip a C++ line comment, leaving buffer->cur pointing to the
1020 terminating newline. Handles escaped newlines. Returns nonzero
1021 if a multiline comment. */
1023 skip_line_comment (cpp_reader
*pfile
)
1025 cpp_buffer
*buffer
= pfile
->buffer
;
1026 source_location orig_line
= pfile
->line_table
->highest_line
;
1028 while (*buffer
->cur
!= '\n')
1031 _cpp_process_line_notes (pfile
, true);
1032 return orig_line
!= pfile
->line_table
->highest_line
;
1035 /* Skips whitespace, saving the next non-whitespace character. */
1037 skip_whitespace (cpp_reader
*pfile
, cppchar_t c
)
1039 cpp_buffer
*buffer
= pfile
->buffer
;
1040 bool saw_NUL
= false;
1044 /* Horizontal space always OK. */
1045 if (c
== ' ' || c
== '\t')
1047 /* Just \f \v or \0 left. */
1050 else if (pfile
->state
.in_directive
&& CPP_PEDANTIC (pfile
))
1051 cpp_error_with_line (pfile
, CPP_DL_PEDWARN
, pfile
->line_table
->highest_line
,
1052 CPP_BUF_COL (buffer
),
1053 "%s in preprocessing directive",
1054 c
== '\f' ? "form feed" : "vertical tab");
1058 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
1059 while (is_nvspace (c
));
1062 cpp_error (pfile
, CPP_DL_WARNING
, "null character(s) ignored");
1067 /* See if the characters of a number token are valid in a name (no
1068 '.', '+' or '-'). */
1070 name_p (cpp_reader
*pfile
, const cpp_string
*string
)
1074 for (i
= 0; i
< string
->len
; i
++)
1075 if (!is_idchar (string
->text
[i
]))
1081 /* After parsing an identifier or other sequence, produce a warning about
1082 sequences not in NFC/NFKC. */
1084 warn_about_normalization (cpp_reader
*pfile
,
1085 const cpp_token
*token
,
1086 const struct normalize_state
*s
)
1088 if (CPP_OPTION (pfile
, warn_normalize
) < NORMALIZE_STATE_RESULT (s
)
1089 && !pfile
->state
.skipping
)
1091 /* Make sure that the token is printed using UCNs, even
1092 if we'd otherwise happily print UTF-8. */
1093 unsigned char *buf
= XNEWVEC (unsigned char, cpp_token_len (token
));
1096 sz
= cpp_spell_token (pfile
, token
, buf
, false) - buf
;
1097 if (NORMALIZE_STATE_RESULT (s
) == normalized_C
)
1098 cpp_warning_with_line (pfile
, CPP_W_NORMALIZE
, token
->src_loc
, 0,
1099 "`%.*s' is not in NFKC", (int) sz
, buf
);
1101 cpp_warning_with_line (pfile
, CPP_W_NORMALIZE
, token
->src_loc
, 0,
1102 "`%.*s' is not in NFC", (int) sz
, buf
);
1107 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
1108 an identifier. FIRST is TRUE if this starts an identifier. */
1110 forms_identifier_p (cpp_reader
*pfile
, int first
,
1111 struct normalize_state
*state
)
1113 cpp_buffer
*buffer
= pfile
->buffer
;
1115 if (*buffer
->cur
== '$')
1117 if (!CPP_OPTION (pfile
, dollars_in_ident
))
1121 if (CPP_OPTION (pfile
, warn_dollars
) && !pfile
->state
.skipping
)
1123 CPP_OPTION (pfile
, warn_dollars
) = 0;
1124 cpp_error (pfile
, CPP_DL_PEDWARN
, "'$' in identifier or number");
1130 /* Is this a syntactically valid UCN? */
1131 if (CPP_OPTION (pfile
, extended_identifiers
)
1132 && *buffer
->cur
== '\\'
1133 && (buffer
->cur
[1] == 'u' || buffer
->cur
[1] == 'U'))
1136 if (_cpp_valid_ucn (pfile
, &buffer
->cur
, buffer
->rlimit
, 1 + !first
,
1145 /* Helper function to get the cpp_hashnode of the identifier BASE. */
1146 static cpp_hashnode
*
1147 lex_identifier_intern (cpp_reader
*pfile
, const uchar
*base
)
1149 cpp_hashnode
*result
;
1152 unsigned int hash
= HT_HASHSTEP (0, *base
);
1155 while (ISIDNUM (*cur
))
1157 hash
= HT_HASHSTEP (hash
, *cur
);
1161 hash
= HT_HASHFINISH (hash
, len
);
1162 result
= CPP_HASHNODE (ht_lookup_with_hash (pfile
->hash_table
,
1163 base
, len
, hash
, HT_ALLOC
));
1165 /* Rarely, identifiers require diagnostics when lexed. */
1166 if (__builtin_expect ((result
->flags
& NODE_DIAGNOSTIC
)
1167 && !pfile
->state
.skipping
, 0))
1169 /* It is allowed to poison the same identifier twice. */
1170 if ((result
->flags
& NODE_POISONED
) && !pfile
->state
.poisoned_ok
)
1171 cpp_error (pfile
, CPP_DL_ERROR
, "attempt to use poisoned \"%s\"",
1172 NODE_NAME (result
));
1174 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1175 replacement list of a variadic macro. */
1176 if (result
== pfile
->spec_nodes
.n__VA_ARGS__
1177 && !pfile
->state
.va_args_ok
)
1178 cpp_error (pfile
, CPP_DL_PEDWARN
,
1179 "__VA_ARGS__ can only appear in the expansion"
1180 " of a C99 variadic macro");
1182 /* For -Wc++-compat, warn about use of C++ named operators. */
1183 if (result
->flags
& NODE_WARN_OPERATOR
)
1184 cpp_warning (pfile
, CPP_W_CXX_OPERATOR_NAMES
,
1185 "identifier \"%s\" is a special operator name in C++",
1186 NODE_NAME (result
));
1192 /* Get the cpp_hashnode of an identifier specified by NAME in
1193 the current cpp_reader object. If none is found, NULL is returned. */
1195 _cpp_lex_identifier (cpp_reader
*pfile
, const char *name
)
1197 cpp_hashnode
*result
;
1198 result
= lex_identifier_intern (pfile
, (uchar
*) name
);
1202 /* Lex an identifier starting at BUFFER->CUR - 1. */
1203 static cpp_hashnode
*
1204 lex_identifier (cpp_reader
*pfile
, const uchar
*base
, bool starts_ucn
,
1205 struct normalize_state
*nst
)
1207 cpp_hashnode
*result
;
1210 unsigned int hash
= HT_HASHSTEP (0, *base
);
1212 cur
= pfile
->buffer
->cur
;
1215 while (ISIDNUM (*cur
))
1217 hash
= HT_HASHSTEP (hash
, *cur
);
1220 NORMALIZE_STATE_UPDATE_IDNUM (nst
, *(cur
- 1));
1222 pfile
->buffer
->cur
= cur
;
1223 if (starts_ucn
|| forms_identifier_p (pfile
, false, nst
))
1225 /* Slower version for identifiers containing UCNs (or $). */
1227 while (ISIDNUM (*pfile
->buffer
->cur
))
1229 NORMALIZE_STATE_UPDATE_IDNUM (nst
, *pfile
->buffer
->cur
);
1230 pfile
->buffer
->cur
++;
1232 } while (forms_identifier_p (pfile
, false, nst
));
1233 result
= _cpp_interpret_identifier (pfile
, base
,
1234 pfile
->buffer
->cur
- base
);
1239 hash
= HT_HASHFINISH (hash
, len
);
1241 result
= CPP_HASHNODE (ht_lookup_with_hash (pfile
->hash_table
,
1242 base
, len
, hash
, HT_ALLOC
));
1245 /* Rarely, identifiers require diagnostics when lexed. */
1246 if (__builtin_expect ((result
->flags
& NODE_DIAGNOSTIC
)
1247 && !pfile
->state
.skipping
, 0))
1249 /* It is allowed to poison the same identifier twice. */
1250 if ((result
->flags
& NODE_POISONED
) && !pfile
->state
.poisoned_ok
)
1251 cpp_error (pfile
, CPP_DL_ERROR
, "attempt to use poisoned \"%s\"",
1252 NODE_NAME (result
));
1254 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1255 replacement list of a variadic macro. */
1256 if (result
== pfile
->spec_nodes
.n__VA_ARGS__
1257 && !pfile
->state
.va_args_ok
)
1258 cpp_error (pfile
, CPP_DL_PEDWARN
,
1259 "__VA_ARGS__ can only appear in the expansion"
1260 " of a C99 variadic macro");
1262 /* For -Wc++-compat, warn about use of C++ named operators. */
1263 if (result
->flags
& NODE_WARN_OPERATOR
)
1264 cpp_warning (pfile
, CPP_W_CXX_OPERATOR_NAMES
,
1265 "identifier \"%s\" is a special operator name in C++",
1266 NODE_NAME (result
));
1272 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
1274 lex_number (cpp_reader
*pfile
, cpp_string
*number
,
1275 struct normalize_state
*nst
)
1281 base
= pfile
->buffer
->cur
- 1;
1284 cur
= pfile
->buffer
->cur
;
1286 /* N.B. ISIDNUM does not include $. */
1287 while (ISIDNUM (*cur
) || *cur
== '.' || DIGIT_SEP (*cur
)
1288 || VALID_SIGN (*cur
, cur
[-1]))
1290 NORMALIZE_STATE_UPDATE_IDNUM (nst
, *cur
);
1294 pfile
->buffer
->cur
= cur
;
1296 while (forms_identifier_p (pfile
, false, nst
));
1298 number
->len
= cur
- base
;
1299 dest
= _cpp_unaligned_alloc (pfile
, number
->len
+ 1);
1300 memcpy (dest
, base
, number
->len
);
1301 dest
[number
->len
] = '\0';
1302 number
->text
= dest
;
1305 /* Create a token of type TYPE with a literal spelling. */
1307 create_literal (cpp_reader
*pfile
, cpp_token
*token
, const uchar
*base
,
1308 unsigned int len
, enum cpp_ttype type
)
1310 uchar
*dest
= _cpp_unaligned_alloc (pfile
, len
+ 1);
1312 memcpy (dest
, base
, len
);
1315 token
->val
.str
.len
= len
;
1316 token
->val
.str
.text
= dest
;
1319 /* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
1320 sequence from *FIRST_BUFF_P to LAST_BUFF_P. */
1323 bufring_append (cpp_reader
*pfile
, const uchar
*base
, size_t len
,
1324 _cpp_buff
**first_buff_p
, _cpp_buff
**last_buff_p
)
1326 _cpp_buff
*first_buff
= *first_buff_p
;
1327 _cpp_buff
*last_buff
= *last_buff_p
;
1329 if (first_buff
== NULL
)
1330 first_buff
= last_buff
= _cpp_get_buff (pfile
, len
);
1331 else if (len
> BUFF_ROOM (last_buff
))
1333 size_t room
= BUFF_ROOM (last_buff
);
1334 memcpy (BUFF_FRONT (last_buff
), base
, room
);
1335 BUFF_FRONT (last_buff
) += room
;
1338 last_buff
= _cpp_append_extend_buff (pfile
, last_buff
, len
);
1341 memcpy (BUFF_FRONT (last_buff
), base
, len
);
1342 BUFF_FRONT (last_buff
) += len
;
1344 *first_buff_p
= first_buff
;
1345 *last_buff_p
= last_buff
;
1349 /* Returns true if a macro has been defined.
1350 This might not work if compile with -save-temps,
1351 or preprocess separately from compilation. */
1354 is_macro(cpp_reader
*pfile
, const uchar
*base
)
1356 const uchar
*cur
= base
;
1357 if (! ISIDST (*cur
))
1359 unsigned int hash
= HT_HASHSTEP (0, *cur
);
1361 while (ISIDNUM (*cur
))
1363 hash
= HT_HASHSTEP (hash
, *cur
);
1366 hash
= HT_HASHFINISH (hash
, cur
- base
);
1368 cpp_hashnode
*result
= CPP_HASHNODE (ht_lookup_with_hash (pfile
->hash_table
,
1369 base
, cur
- base
, hash
, HT_NO_INSERT
));
1371 return !result
? false : (result
->type
== NT_MACRO
);
1375 /* Lexes a raw string. The stored string contains the spelling, including
1376 double quotes, delimiter string, '(' and ')', any leading
1377 'L', 'u', 'U' or 'u8' and 'R' modifier. It returns the type of the
1378 literal, or CPP_OTHER if it was not properly terminated.
1380 The spelling is NUL-terminated, but it is not guaranteed that this
1381 is the first NUL since embedded NULs are preserved. */
1384 lex_raw_string (cpp_reader
*pfile
, cpp_token
*token
, const uchar
*base
,
1387 uchar raw_prefix
[17];
1388 uchar temp_buffer
[18];
1389 const uchar
*orig_base
;
1390 unsigned int raw_prefix_len
= 0, raw_suffix_len
= 0;
1391 enum raw_str_phase
{ RAW_STR_PREFIX
, RAW_STR
, RAW_STR_SUFFIX
};
1392 raw_str_phase phase
= RAW_STR_PREFIX
;
1393 enum cpp_ttype type
;
1394 size_t total_len
= 0;
1395 /* Index into temp_buffer during phases other than RAW_STR,
1396 during RAW_STR phase 17 to tell BUF_APPEND that nothing should
1397 be appended to temp_buffer. */
1398 size_t temp_buffer_len
= 0;
1399 _cpp_buff
*first_buff
= NULL
, *last_buff
= NULL
;
1400 size_t raw_prefix_start
;
1401 _cpp_line_note
*note
= &pfile
->buffer
->notes
[pfile
->buffer
->cur_note
];
1403 type
= (*base
== 'L' ? CPP_WSTRING
:
1404 *base
== 'U' ? CPP_STRING32
:
1405 *base
== 'u' ? (base
[1] == '8' ? CPP_UTF8STRING
: CPP_STRING16
)
1408 #define BUF_APPEND(STR,LEN) \
1410 bufring_append (pfile, (const uchar *)(STR), (LEN), \
1411 &first_buff, &last_buff); \
1412 total_len += (LEN); \
1413 if (__builtin_expect (temp_buffer_len < 17, 0) \
1414 && (const uchar *)(STR) != base \
1417 memcpy (temp_buffer + temp_buffer_len, \
1418 (const uchar *)(STR), (LEN)); \
1419 temp_buffer_len += (LEN); \
1425 raw_prefix_start
= cur
- base
;
1430 /* If we previously performed any trigraph or line splicing
1431 transformations, undo them in between the opening and closing
1433 while (note
->pos
< cur
)
1435 for (; note
->pos
== cur
; ++note
)
1441 /* Restore backslash followed by newline. */
1442 BUF_APPEND (base
, cur
- base
);
1444 BUF_APPEND ("\\", 1);
1446 if (note
->type
== ' ')
1448 /* GNU backslash whitespace newline extension. FIXME
1449 could be any sequence of non-vertical space. When we
1450 can properly restore any such sequence, we should mark
1451 this note as handled so _cpp_process_line_notes
1453 BUF_APPEND (" ", 1);
1456 BUF_APPEND ("\n", 1);
1460 /* Already handled. */
1464 if (_cpp_trigraph_map
[note
->type
])
1466 /* Don't warn about this trigraph in
1467 _cpp_process_line_notes, since trigraphs show up as
1468 trigraphs in raw strings. */
1469 uchar type
= note
->type
;
1472 if (!CPP_OPTION (pfile
, trigraphs
))
1473 /* If we didn't convert the trigraph in the first
1474 place, don't do anything now either. */
1477 BUF_APPEND (base
, cur
- base
);
1479 BUF_APPEND ("??", 2);
1481 /* ??/ followed by newline gets two line notes, one for
1482 the trigraph and one for the backslash/newline. */
1483 if (type
== '/' && note
[1].pos
== cur
)
1485 if (note
[1].type
!= '\\'
1486 && note
[1].type
!= ' ')
1488 BUF_APPEND ("/", 1);
1490 goto after_backslash
;
1494 /* Skip the replacement character. */
1496 BUF_APPEND (&type
, 1);
1507 if (__builtin_expect (temp_buffer_len
< 17, 0))
1508 temp_buffer
[temp_buffer_len
++] = c
;
1511 if (phase
== RAW_STR_PREFIX
)
1513 while (raw_prefix_len
< temp_buffer_len
)
1515 raw_prefix
[raw_prefix_len
] = temp_buffer
[raw_prefix_len
];
1516 switch (raw_prefix
[raw_prefix_len
])
1518 case ' ': case '(': case ')': case '\\': case '\t':
1519 case '\v': case '\f': case '\n': default:
1521 /* Basic source charset except the above chars. */
1522 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1523 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1524 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1525 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1527 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1528 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1529 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1530 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1532 case '0': case '1': case '2': case '3': case '4': case '5':
1533 case '6': case '7': case '8': case '9':
1534 case '_': case '{': case '}': case '#': case '[': case ']':
1535 case '<': case '>': case '%': case ':': case ';': case '.':
1536 case '?': case '*': case '+': case '-': case '/': case '^':
1537 case '&': case '|': case '~': case '!': case '=': case ',':
1538 case '"': case '\'':
1539 if (raw_prefix_len
< 16)
1547 if (raw_prefix
[raw_prefix_len
] != '(')
1549 int col
= CPP_BUF_COLUMN (pfile
->buffer
, cur
) + 1;
1550 if (raw_prefix_len
== 16)
1551 cpp_error_with_line (pfile
, CPP_DL_ERROR
, token
->src_loc
,
1552 col
, "raw string delimiter longer "
1553 "than 16 characters");
1554 else if (raw_prefix
[raw_prefix_len
] == '\n')
1555 cpp_error_with_line (pfile
, CPP_DL_ERROR
, token
->src_loc
,
1556 col
, "invalid new-line in raw "
1557 "string delimiter");
1559 cpp_error_with_line (pfile
, CPP_DL_ERROR
, token
->src_loc
,
1560 col
, "invalid character '%c' in "
1561 "raw string delimiter",
1562 (int) raw_prefix
[raw_prefix_len
]);
1563 pfile
->buffer
->cur
= orig_base
+ raw_prefix_start
- 1;
1564 create_literal (pfile
, token
, orig_base
,
1565 raw_prefix_start
- 1, CPP_OTHER
);
1567 _cpp_release_buff (pfile
, first_buff
);
1570 raw_prefix
[raw_prefix_len
] = '"';
1572 /* Nothing should be appended to temp_buffer during
1574 temp_buffer_len
= 17;
1579 else if (phase
== RAW_STR_SUFFIX
)
1581 while (raw_suffix_len
<= raw_prefix_len
1582 && raw_suffix_len
< temp_buffer_len
1583 && temp_buffer
[raw_suffix_len
] == raw_prefix
[raw_suffix_len
])
1585 if (raw_suffix_len
> raw_prefix_len
)
1587 if (raw_suffix_len
== temp_buffer_len
)
1590 /* Nothing should be appended to temp_buffer during
1592 temp_buffer_len
= 17;
1596 phase
= RAW_STR_SUFFIX
;
1598 temp_buffer_len
= 0;
1602 if (pfile
->state
.in_directive
1603 || (pfile
->state
.parsing_args
1604 && pfile
->buffer
->next_line
>= pfile
->buffer
->rlimit
))
1608 cpp_error_with_line (pfile
, CPP_DL_ERROR
, token
->src_loc
, 0,
1609 "unterminated raw string");
1613 BUF_APPEND (base
, cur
- base
);
1615 if (pfile
->buffer
->cur
< pfile
->buffer
->rlimit
)
1616 CPP_INCREMENT_LINE (pfile
, 0);
1617 pfile
->buffer
->need_line
= true;
1619 pfile
->buffer
->cur
= cur
-1;
1620 _cpp_process_line_notes (pfile
, false);
1621 if (!_cpp_get_fresh_line (pfile
))
1623 source_location src_loc
= token
->src_loc
;
1624 token
->type
= CPP_EOF
;
1625 /* Tell the compiler the line number of the EOF token. */
1626 token
->src_loc
= pfile
->line_table
->highest_line
;
1628 if (first_buff
!= NULL
)
1629 _cpp_release_buff (pfile
, first_buff
);
1630 cpp_error_with_line (pfile
, CPP_DL_ERROR
, src_loc
, 0,
1631 "unterminated raw string");
1635 cur
= base
= pfile
->buffer
->cur
;
1636 note
= &pfile
->buffer
->notes
[pfile
->buffer
->cur_note
];
1640 if (CPP_OPTION (pfile
, user_literals
))
1642 /* If a string format macro, say from inttypes.h, is placed touching
1643 a string literal it could be parsed as a C++11 user-defined string
1644 literal thus breaking the program.
1645 Try to identify macros with is_macro. A warning is issued. */
1646 if (is_macro (pfile
, cur
))
1648 /* Raise a warning, but do not consume subsequent tokens. */
1649 if (CPP_OPTION (pfile
, warn_literal_suffix
))
1650 cpp_warning_with_line (pfile
, CPP_W_LITERAL_SUFFIX
,
1652 "invalid suffix on literal; C++11 requires "
1653 "a space between literal and string macro");
1655 /* Grab user defined literal suffix. */
1656 else if (ISIDST (*cur
))
1658 type
= cpp_userdef_string_add_type (type
);
1661 while (ISIDNUM (*cur
))
1666 pfile
->buffer
->cur
= cur
;
1667 if (first_buff
== NULL
)
1668 create_literal (pfile
, token
, base
, cur
- base
, type
);
1671 uchar
*dest
= _cpp_unaligned_alloc (pfile
, total_len
+ (cur
- base
) + 1);
1674 token
->val
.str
.len
= total_len
+ (cur
- base
);
1675 token
->val
.str
.text
= dest
;
1676 last_buff
= first_buff
;
1677 while (last_buff
!= NULL
)
1679 memcpy (dest
, last_buff
->base
,
1680 BUFF_FRONT (last_buff
) - last_buff
->base
);
1681 dest
+= BUFF_FRONT (last_buff
) - last_buff
->base
;
1682 last_buff
= last_buff
->next
;
1684 _cpp_release_buff (pfile
, first_buff
);
1685 memcpy (dest
, base
, cur
- base
);
1686 dest
[cur
- base
] = '\0';
1690 /* Lexes a string, character constant, or angle-bracketed header file
1691 name. The stored string contains the spelling, including opening
1692 quote and any leading 'L', 'u', 'U' or 'u8' and optional
1693 'R' modifier. It returns the type of the literal, or CPP_OTHER
1694 if it was not properly terminated, or CPP_LESS for an unterminated
1695 header name which must be relexed as normal tokens.
1697 The spelling is NUL-terminated, but it is not guaranteed that this
1698 is the first NUL since embedded NULs are preserved. */
1700 lex_string (cpp_reader
*pfile
, cpp_token
*token
, const uchar
*base
)
1702 bool saw_NUL
= false;
1704 cppchar_t terminator
;
1705 enum cpp_ttype type
;
1708 terminator
= *cur
++;
1709 if (terminator
== 'L' || terminator
== 'U')
1710 terminator
= *cur
++;
1711 else if (terminator
== 'u')
1713 terminator
= *cur
++;
1714 if (terminator
== '8')
1715 terminator
= *cur
++;
1717 if (terminator
== 'R')
1719 lex_raw_string (pfile
, token
, base
, cur
);
1722 if (terminator
== '"')
1723 type
= (*base
== 'L' ? CPP_WSTRING
:
1724 *base
== 'U' ? CPP_STRING32
:
1725 *base
== 'u' ? (base
[1] == '8' ? CPP_UTF8STRING
: CPP_STRING16
)
1727 else if (terminator
== '\'')
1728 type
= (*base
== 'L' ? CPP_WCHAR
:
1729 *base
== 'U' ? CPP_CHAR32
:
1730 *base
== 'u' ? CPP_CHAR16
: CPP_CHAR
);
1732 terminator
= '>', type
= CPP_HEADER_NAME
;
1736 cppchar_t c
= *cur
++;
1738 /* In #include-style directives, terminators are not escapable. */
1739 if (c
== '\\' && !pfile
->state
.angled_headers
&& *cur
!= '\n')
1741 else if (c
== terminator
)
1746 /* Unmatched quotes always yield undefined behavior, but
1747 greedy lexing means that what appears to be an unterminated
1748 header name may actually be a legitimate sequence of tokens. */
1749 if (terminator
== '>')
1751 token
->type
= CPP_LESS
;
1761 if (saw_NUL
&& !pfile
->state
.skipping
)
1762 cpp_error (pfile
, CPP_DL_WARNING
,
1763 "null character(s) preserved in literal");
1765 if (type
== CPP_OTHER
&& CPP_OPTION (pfile
, lang
) != CLK_ASM
)
1766 cpp_error (pfile
, CPP_DL_PEDWARN
, "missing terminating %c character",
1769 if (CPP_OPTION (pfile
, user_literals
))
1771 /* If a string format macro, say from inttypes.h, is placed touching
1772 a string literal it could be parsed as a C++11 user-defined string
1773 literal thus breaking the program.
1774 Try to identify macros with is_macro. A warning is issued. */
1775 if (is_macro (pfile
, cur
))
1777 /* Raise a warning, but do not consume subsequent tokens. */
1778 if (CPP_OPTION (pfile
, warn_literal_suffix
))
1779 cpp_warning_with_line (pfile
, CPP_W_LITERAL_SUFFIX
,
1781 "invalid suffix on literal; C++11 requires "
1782 "a space between literal and string macro");
1784 /* Grab user defined literal suffix. */
1785 else if (ISIDST (*cur
))
1787 type
= cpp_userdef_char_add_type (type
);
1788 type
= cpp_userdef_string_add_type (type
);
1791 while (ISIDNUM (*cur
))
1796 pfile
->buffer
->cur
= cur
;
1797 create_literal (pfile
, token
, base
, cur
- base
, type
);
1800 /* Return the comment table. The client may not make any assumption
1801 about the ordering of the table. */
1803 cpp_get_comments (cpp_reader
*pfile
)
1805 return &pfile
->comments
;
1808 /* Append a comment to the end of the comment table. */
1810 store_comment (cpp_reader
*pfile
, cpp_token
*token
)
1814 if (pfile
->comments
.allocated
== 0)
1816 pfile
->comments
.allocated
= 256;
1817 pfile
->comments
.entries
= (cpp_comment
*) xmalloc
1818 (pfile
->comments
.allocated
* sizeof (cpp_comment
));
1821 if (pfile
->comments
.count
== pfile
->comments
.allocated
)
1823 pfile
->comments
.allocated
*= 2;
1824 pfile
->comments
.entries
= (cpp_comment
*) xrealloc
1825 (pfile
->comments
.entries
,
1826 pfile
->comments
.allocated
* sizeof (cpp_comment
));
1829 len
= token
->val
.str
.len
;
1831 /* Copy comment. Note, token may not be NULL terminated. */
1832 pfile
->comments
.entries
[pfile
->comments
.count
].comment
=
1833 (char *) xmalloc (sizeof (char) * (len
+ 1));
1834 memcpy (pfile
->comments
.entries
[pfile
->comments
.count
].comment
,
1835 token
->val
.str
.text
, len
);
1836 pfile
->comments
.entries
[pfile
->comments
.count
].comment
[len
] = '\0';
1838 /* Set source location. */
1839 pfile
->comments
.entries
[pfile
->comments
.count
].sloc
= token
->src_loc
;
1841 /* Increment the count of entries in the comment table. */
1842 pfile
->comments
.count
++;
1845 /* The stored comment includes the comment start and any terminator. */
1847 save_comment (cpp_reader
*pfile
, cpp_token
*token
, const unsigned char *from
,
1850 unsigned char *buffer
;
1851 unsigned int len
, clen
, i
;
1853 len
= pfile
->buffer
->cur
- from
+ 1; /* + 1 for the initial '/'. */
1855 /* C++ comments probably (not definitely) have moved past a new
1856 line, which we don't want to save in the comment. */
1857 if (is_vspace (pfile
->buffer
->cur
[-1]))
1860 /* If we are currently in a directive or in argument parsing, then
1861 we need to store all C++ comments as C comments internally, and
1862 so we need to allocate a little extra space in that case.
1864 Note that the only time we encounter a directive here is
1865 when we are saving comments in a "#define". */
1866 clen
= ((pfile
->state
.in_directive
|| pfile
->state
.parsing_args
)
1867 && type
== '/') ? len
+ 2 : len
;
1869 buffer
= _cpp_unaligned_alloc (pfile
, clen
);
1871 token
->type
= CPP_COMMENT
;
1872 token
->val
.str
.len
= clen
;
1873 token
->val
.str
.text
= buffer
;
1876 memcpy (buffer
+ 1, from
, len
- 1);
1878 /* Finish conversion to a C comment, if necessary. */
1879 if ((pfile
->state
.in_directive
|| pfile
->state
.parsing_args
) && type
== '/')
1882 buffer
[clen
- 2] = '*';
1883 buffer
[clen
- 1] = '/';
1884 /* As there can be in a C++ comments illegal sequences for C comments
1885 we need to filter them out. */
1886 for (i
= 2; i
< (clen
- 2); i
++)
1887 if (buffer
[i
] == '/' && (buffer
[i
- 1] == '*' || buffer
[i
+ 1] == '*'))
1891 /* Finally store this comment for use by clients of libcpp. */
1892 store_comment (pfile
, token
);
1895 /* Allocate COUNT tokens for RUN. */
1897 _cpp_init_tokenrun (tokenrun
*run
, unsigned int count
)
1899 run
->base
= XNEWVEC (cpp_token
, count
);
1900 run
->limit
= run
->base
+ count
;
1904 /* Returns the next tokenrun, or creates one if there is none. */
1906 next_tokenrun (tokenrun
*run
)
1908 if (run
->next
== NULL
)
1910 run
->next
= XNEW (tokenrun
);
1911 run
->next
->prev
= run
;
1912 _cpp_init_tokenrun (run
->next
, 250);
1918 /* Return the number of not yet processed token in a given
1921 _cpp_remaining_tokens_num_in_context (cpp_context
*context
)
1923 if (context
->tokens_kind
== TOKENS_KIND_DIRECT
)
1924 return (LAST (context
).token
- FIRST (context
).token
);
1925 else if (context
->tokens_kind
== TOKENS_KIND_INDIRECT
1926 || context
->tokens_kind
== TOKENS_KIND_EXTENDED
)
1927 return (LAST (context
).ptoken
- FIRST (context
).ptoken
);
1932 /* Returns the token present at index INDEX in a given context. If
1933 INDEX is zero, the next token to be processed is returned. */
1934 static const cpp_token
*
1935 _cpp_token_from_context_at (cpp_context
*context
, int index
)
1937 if (context
->tokens_kind
== TOKENS_KIND_DIRECT
)
1938 return &(FIRST (context
).token
[index
]);
1939 else if (context
->tokens_kind
== TOKENS_KIND_INDIRECT
1940 || context
->tokens_kind
== TOKENS_KIND_EXTENDED
)
1941 return FIRST (context
).ptoken
[index
];
1946 /* Look ahead in the input stream. */
1948 cpp_peek_token (cpp_reader
*pfile
, int index
)
1950 cpp_context
*context
= pfile
->context
;
1951 const cpp_token
*peektok
;
1954 /* First, scan through any pending cpp_context objects. */
1955 while (context
->prev
)
1957 ptrdiff_t sz
= _cpp_remaining_tokens_num_in_context (context
);
1959 if (index
< (int) sz
)
1960 return _cpp_token_from_context_at (context
, index
);
1962 context
= context
->prev
;
1965 /* We will have to read some new tokens after all (and do so
1966 without invalidating preceding tokens). */
1968 pfile
->keep_tokens
++;
1972 peektok
= _cpp_lex_token (pfile
);
1973 if (peektok
->type
== CPP_EOF
)
1978 _cpp_backup_tokens_direct (pfile
, count
+ 1);
1979 pfile
->keep_tokens
--;
1984 /* Allocate a single token that is invalidated at the same time as the
1985 rest of the tokens on the line. Has its line and col set to the
1986 same as the last lexed token, so that diagnostics appear in the
1989 _cpp_temp_token (cpp_reader
*pfile
)
1991 cpp_token
*old
, *result
;
1992 ptrdiff_t sz
= pfile
->cur_run
->limit
- pfile
->cur_token
;
1993 ptrdiff_t la
= (ptrdiff_t) pfile
->lookaheads
;
1995 old
= pfile
->cur_token
- 1;
1996 /* Any pre-existing lookaheads must not be clobbered. */
2001 tokenrun
*next
= next_tokenrun (pfile
->cur_run
);
2004 memmove (next
->base
+ 1, next
->base
,
2005 (la
- sz
) * sizeof (cpp_token
));
2007 next
->base
[0] = pfile
->cur_run
->limit
[-1];
2011 memmove (pfile
->cur_token
+ 1, pfile
->cur_token
,
2012 MIN (la
, sz
- 1) * sizeof (cpp_token
));
2015 if (!sz
&& pfile
->cur_token
== pfile
->cur_run
->limit
)
2017 pfile
->cur_run
= next_tokenrun (pfile
->cur_run
);
2018 pfile
->cur_token
= pfile
->cur_run
->base
;
2021 result
= pfile
->cur_token
++;
2022 result
->src_loc
= old
->src_loc
;
2026 /* Lex a token into RESULT (external interface). Takes care of issues
2027 like directive handling, token lookahead, multiple include
2028 optimization and skipping. */
2030 _cpp_lex_token (cpp_reader
*pfile
)
2036 if (pfile
->cur_token
== pfile
->cur_run
->limit
)
2038 pfile
->cur_run
= next_tokenrun (pfile
->cur_run
);
2039 pfile
->cur_token
= pfile
->cur_run
->base
;
2041 /* We assume that the current token is somewhere in the current
2043 if (pfile
->cur_token
< pfile
->cur_run
->base
2044 || pfile
->cur_token
>= pfile
->cur_run
->limit
)
2047 if (pfile
->lookaheads
)
2049 pfile
->lookaheads
--;
2050 result
= pfile
->cur_token
++;
2053 result
= _cpp_lex_direct (pfile
);
2055 if (result
->flags
& BOL
)
2057 /* Is this a directive. If _cpp_handle_directive returns
2058 false, it is an assembler #. */
2059 if (result
->type
== CPP_HASH
2060 /* 6.10.3 p 11: Directives in a list of macro arguments
2061 gives undefined behavior. This implementation
2062 handles the directive as normal. */
2063 && pfile
->state
.parsing_args
!= 1)
2065 if (_cpp_handle_directive (pfile
, result
->flags
& PREV_WHITE
))
2067 if (pfile
->directive_result
.type
== CPP_PADDING
)
2069 result
= &pfile
->directive_result
;
2072 else if (pfile
->state
.in_deferred_pragma
)
2073 result
= &pfile
->directive_result
;
2075 if (pfile
->cb
.line_change
&& !pfile
->state
.skipping
)
2076 pfile
->cb
.line_change (pfile
, result
, pfile
->state
.parsing_args
);
2079 /* We don't skip tokens in directives. */
2080 if (pfile
->state
.in_directive
|| pfile
->state
.in_deferred_pragma
)
2083 /* Outside a directive, invalidate controlling macros. At file
2084 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
2085 get here and MI optimization works. */
2086 pfile
->mi_valid
= false;
2088 if (!pfile
->state
.skipping
|| result
->type
== CPP_EOF
)
2095 /* Returns true if a fresh line has been loaded. */
2097 _cpp_get_fresh_line (cpp_reader
*pfile
)
2101 /* We can't get a new line until we leave the current directive. */
2102 if (pfile
->state
.in_directive
)
2107 cpp_buffer
*buffer
= pfile
->buffer
;
2109 if (!buffer
->need_line
)
2112 if (buffer
->next_line
< buffer
->rlimit
)
2114 _cpp_clean_line (pfile
);
2118 /* First, get out of parsing arguments state. */
2119 if (pfile
->state
.parsing_args
)
2122 /* End of buffer. Non-empty files should end in a newline. */
2123 if (buffer
->buf
!= buffer
->rlimit
2124 && buffer
->next_line
> buffer
->rlimit
2125 && !buffer
->from_stage3
)
2127 /* Clip to buffer size. */
2128 buffer
->next_line
= buffer
->rlimit
;
2131 return_at_eof
= buffer
->return_at_eof
;
2132 _cpp_pop_buffer (pfile
);
2133 if (pfile
->buffer
== NULL
|| return_at_eof
)
2138 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
2141 result->type = ELSE_TYPE; \
2142 if (*buffer->cur == CHAR) \
2143 buffer->cur++, result->type = THEN_TYPE; \
2147 /* Lex a token into pfile->cur_token, which is also incremented, to
2148 get diagnostics pointing to the correct location.
2150 Does not handle issues such as token lookahead, multiple-include
2151 optimization, directives, skipping etc. This function is only
2152 suitable for use by _cpp_lex_token, and in special cases like
2153 lex_expansion_token which doesn't care for any of these issues.
2155 When meeting a newline, returns CPP_EOF if parsing a directive,
2156 otherwise returns to the start of the token buffer if permissible.
2157 Returns the location of the lexed token. */
2159 _cpp_lex_direct (cpp_reader
*pfile
)
2163 const unsigned char *comment_start
;
2164 cpp_token
*result
= pfile
->cur_token
++;
2168 buffer
= pfile
->buffer
;
2169 if (buffer
->need_line
)
2171 if (pfile
->state
.in_deferred_pragma
)
2173 result
->type
= CPP_PRAGMA_EOL
;
2174 pfile
->state
.in_deferred_pragma
= false;
2175 if (!pfile
->state
.pragma_allow_expansion
)
2176 pfile
->state
.prevent_expansion
--;
2179 if (!_cpp_get_fresh_line (pfile
))
2181 result
->type
= CPP_EOF
;
2182 if (!pfile
->state
.in_directive
)
2184 /* Tell the compiler the line number of the EOF token. */
2185 result
->src_loc
= pfile
->line_table
->highest_line
;
2186 result
->flags
= BOL
;
2190 if (!pfile
->keep_tokens
)
2192 pfile
->cur_run
= &pfile
->base_run
;
2193 result
= pfile
->base_run
.base
;
2194 pfile
->cur_token
= result
+ 1;
2196 result
->flags
= BOL
;
2197 if (pfile
->state
.parsing_args
== 2)
2198 result
->flags
|= PREV_WHITE
;
2200 buffer
= pfile
->buffer
;
2202 result
->src_loc
= pfile
->line_table
->highest_line
;
2205 if (buffer
->cur
>= buffer
->notes
[buffer
->cur_note
].pos
2206 && !pfile
->overlaid_buffer
)
2208 _cpp_process_line_notes (pfile
, false);
2209 result
->src_loc
= pfile
->line_table
->highest_line
;
2213 if (pfile
->forced_token_location_p
)
2214 result
->src_loc
= *pfile
->forced_token_location_p
;
2216 result
->src_loc
= linemap_position_for_column (pfile
->line_table
,
2217 CPP_BUF_COLUMN (buffer
, buffer
->cur
));
2221 case ' ': case '\t': case '\f': case '\v': case '\0':
2222 result
->flags
|= PREV_WHITE
;
2223 skip_whitespace (pfile
, c
);
2227 if (buffer
->cur
< buffer
->rlimit
)
2228 CPP_INCREMENT_LINE (pfile
, 0);
2229 buffer
->need_line
= true;
2232 case '0': case '1': case '2': case '3': case '4':
2233 case '5': case '6': case '7': case '8': case '9':
2235 struct normalize_state nst
= INITIAL_NORMALIZE_STATE
;
2236 result
->type
= CPP_NUMBER
;
2237 lex_number (pfile
, &result
->val
.str
, &nst
);
2238 warn_about_normalization (pfile
, result
, &nst
);
2246 /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
2247 wide strings or raw strings. */
2248 if (c
== 'L' || CPP_OPTION (pfile
, rliterals
)
2249 || (c
!= 'R' && CPP_OPTION (pfile
, uliterals
)))
2251 if ((*buffer
->cur
== '\'' && c
!= 'R')
2252 || *buffer
->cur
== '"'
2253 || (*buffer
->cur
== 'R'
2255 && buffer
->cur
[1] == '"'
2256 && CPP_OPTION (pfile
, rliterals
))
2257 || (*buffer
->cur
== '8'
2259 && (buffer
->cur
[1] == '"'
2260 || (buffer
->cur
[1] == 'R' && buffer
->cur
[2] == '"'
2261 && CPP_OPTION (pfile
, rliterals
)))))
2263 lex_string (pfile
, result
, buffer
->cur
- 1);
2270 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2271 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2272 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2273 case 's': case 't': case 'v': case 'w': case 'x':
2275 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2276 case 'G': case 'H': case 'I': case 'J': case 'K':
2277 case 'M': case 'N': case 'O': case 'P': case 'Q':
2278 case 'S': case 'T': case 'V': case 'W': case 'X':
2280 result
->type
= CPP_NAME
;
2282 struct normalize_state nst
= INITIAL_NORMALIZE_STATE
;
2283 result
->val
.node
.node
= lex_identifier (pfile
, buffer
->cur
- 1, false,
2285 warn_about_normalization (pfile
, result
, &nst
);
2288 /* Convert named operators to their proper types. */
2289 if (result
->val
.node
.node
->flags
& NODE_OPERATOR
)
2291 result
->flags
|= NAMED_OP
;
2292 result
->type
= (enum cpp_ttype
) result
->val
.node
.node
->directive_index
;
2298 lex_string (pfile
, result
, buffer
->cur
- 1);
2302 /* A potential block or line comment. */
2303 comment_start
= buffer
->cur
;
2308 if (_cpp_skip_block_comment (pfile
))
2309 cpp_error (pfile
, CPP_DL_ERROR
, "unterminated comment");
2311 else if (c
== '/' && (CPP_OPTION (pfile
, cplusplus_comments
)
2312 || cpp_in_system_header (pfile
)))
2314 /* Warn about comments only if pedantically GNUC89, and not
2315 in system headers. */
2316 if (CPP_OPTION (pfile
, lang
) == CLK_GNUC89
&& CPP_PEDANTIC (pfile
)
2317 && ! buffer
->warned_cplusplus_comments
)
2319 cpp_error (pfile
, CPP_DL_PEDWARN
,
2320 "C++ style comments are not allowed in ISO C90");
2321 cpp_error (pfile
, CPP_DL_PEDWARN
,
2322 "(this will be reported only once per input file)");
2323 buffer
->warned_cplusplus_comments
= 1;
2326 if (skip_line_comment (pfile
) && CPP_OPTION (pfile
, warn_comments
))
2327 cpp_warning (pfile
, CPP_W_COMMENTS
, "multi-line comment");
2332 result
->type
= CPP_DIV_EQ
;
2337 result
->type
= CPP_DIV
;
2341 if (!pfile
->state
.save_comments
)
2343 result
->flags
|= PREV_WHITE
;
2344 goto update_tokens_line
;
2347 /* Save the comment as a token in its own right. */
2348 save_comment (pfile
, result
, comment_start
, c
);
2352 if (pfile
->state
.angled_headers
)
2354 lex_string (pfile
, result
, buffer
->cur
- 1);
2355 if (result
->type
!= CPP_LESS
)
2359 result
->type
= CPP_LESS
;
2360 if (*buffer
->cur
== '=')
2361 buffer
->cur
++, result
->type
= CPP_LESS_EQ
;
2362 else if (*buffer
->cur
== '<')
2365 IF_NEXT_IS ('=', CPP_LSHIFT_EQ
, CPP_LSHIFT
);
2367 else if (CPP_OPTION (pfile
, digraphs
))
2369 if (*buffer
->cur
== ':')
2371 /* C++11 [2.5/3 lex.pptoken], "Otherwise, if the next
2372 three characters are <:: and the subsequent character
2373 is neither : nor >, the < is treated as a preprocessor
2374 token by itself". */
2375 if (CPP_OPTION (pfile
, cplusplus
)
2376 && CPP_OPTION (pfile
, lang
) != CLK_CXX98
2377 && CPP_OPTION (pfile
, lang
) != CLK_GNUCXX
2378 && buffer
->cur
[1] == ':'
2379 && buffer
->cur
[2] != ':' && buffer
->cur
[2] != '>')
2383 result
->flags
|= DIGRAPH
;
2384 result
->type
= CPP_OPEN_SQUARE
;
2386 else if (*buffer
->cur
== '%')
2389 result
->flags
|= DIGRAPH
;
2390 result
->type
= CPP_OPEN_BRACE
;
2396 result
->type
= CPP_GREATER
;
2397 if (*buffer
->cur
== '=')
2398 buffer
->cur
++, result
->type
= CPP_GREATER_EQ
;
2399 else if (*buffer
->cur
== '>')
2402 IF_NEXT_IS ('=', CPP_RSHIFT_EQ
, CPP_RSHIFT
);
2407 result
->type
= CPP_MOD
;
2408 if (*buffer
->cur
== '=')
2409 buffer
->cur
++, result
->type
= CPP_MOD_EQ
;
2410 else if (CPP_OPTION (pfile
, digraphs
))
2412 if (*buffer
->cur
== ':')
2415 result
->flags
|= DIGRAPH
;
2416 result
->type
= CPP_HASH
;
2417 if (*buffer
->cur
== '%' && buffer
->cur
[1] == ':')
2418 buffer
->cur
+= 2, result
->type
= CPP_PASTE
, result
->val
.token_no
= 0;
2420 else if (*buffer
->cur
== '>')
2423 result
->flags
|= DIGRAPH
;
2424 result
->type
= CPP_CLOSE_BRACE
;
2430 result
->type
= CPP_DOT
;
2431 if (ISDIGIT (*buffer
->cur
))
2433 struct normalize_state nst
= INITIAL_NORMALIZE_STATE
;
2434 result
->type
= CPP_NUMBER
;
2435 lex_number (pfile
, &result
->val
.str
, &nst
);
2436 warn_about_normalization (pfile
, result
, &nst
);
2438 else if (*buffer
->cur
== '.' && buffer
->cur
[1] == '.')
2439 buffer
->cur
+= 2, result
->type
= CPP_ELLIPSIS
;
2440 else if (*buffer
->cur
== '*' && CPP_OPTION (pfile
, cplusplus
))
2441 buffer
->cur
++, result
->type
= CPP_DOT_STAR
;
2445 result
->type
= CPP_PLUS
;
2446 if (*buffer
->cur
== '+')
2447 buffer
->cur
++, result
->type
= CPP_PLUS_PLUS
;
2448 else if (*buffer
->cur
== '=')
2449 buffer
->cur
++, result
->type
= CPP_PLUS_EQ
;
2453 result
->type
= CPP_MINUS
;
2454 if (*buffer
->cur
== '>')
2457 result
->type
= CPP_DEREF
;
2458 if (*buffer
->cur
== '*' && CPP_OPTION (pfile
, cplusplus
))
2459 buffer
->cur
++, result
->type
= CPP_DEREF_STAR
;
2461 else if (*buffer
->cur
== '-')
2462 buffer
->cur
++, result
->type
= CPP_MINUS_MINUS
;
2463 else if (*buffer
->cur
== '=')
2464 buffer
->cur
++, result
->type
= CPP_MINUS_EQ
;
2468 result
->type
= CPP_AND
;
2469 if (*buffer
->cur
== '&')
2470 buffer
->cur
++, result
->type
= CPP_AND_AND
;
2471 else if (*buffer
->cur
== '=')
2472 buffer
->cur
++, result
->type
= CPP_AND_EQ
;
2476 result
->type
= CPP_OR
;
2477 if (*buffer
->cur
== '|')
2478 buffer
->cur
++, result
->type
= CPP_OR_OR
;
2479 else if (*buffer
->cur
== '=')
2480 buffer
->cur
++, result
->type
= CPP_OR_EQ
;
2484 result
->type
= CPP_COLON
;
2485 if (*buffer
->cur
== ':' && CPP_OPTION (pfile
, cplusplus
))
2486 buffer
->cur
++, result
->type
= CPP_SCOPE
;
2487 else if (*buffer
->cur
== '>' && CPP_OPTION (pfile
, digraphs
))
2490 result
->flags
|= DIGRAPH
;
2491 result
->type
= CPP_CLOSE_SQUARE
;
2495 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ
, CPP_MULT
); break;
2496 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ
, CPP_EQ
); break;
2497 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ
, CPP_NOT
); break;
2498 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ
, CPP_XOR
); break;
2499 case '#': IF_NEXT_IS ('#', CPP_PASTE
, CPP_HASH
); result
->val
.token_no
= 0; break;
2501 case '?': result
->type
= CPP_QUERY
; break;
2502 case '~': result
->type
= CPP_COMPL
; break;
2503 case ',': result
->type
= CPP_COMMA
; break;
2504 case '(': result
->type
= CPP_OPEN_PAREN
; break;
2505 case ')': result
->type
= CPP_CLOSE_PAREN
; break;
2506 case '[': result
->type
= CPP_OPEN_SQUARE
; break;
2507 case ']': result
->type
= CPP_CLOSE_SQUARE
; break;
2508 case '{': result
->type
= CPP_OPEN_BRACE
; break;
2509 case '}': result
->type
= CPP_CLOSE_BRACE
; break;
2510 case ';': result
->type
= CPP_SEMICOLON
; break;
2512 /* @ is a punctuator in Objective-C. */
2513 case '@': result
->type
= CPP_ATSIGN
; break;
2518 const uchar
*base
= --buffer
->cur
;
2519 struct normalize_state nst
= INITIAL_NORMALIZE_STATE
;
2521 if (forms_identifier_p (pfile
, true, &nst
))
2523 result
->type
= CPP_NAME
;
2524 result
->val
.node
.node
= lex_identifier (pfile
, base
, true, &nst
);
2525 warn_about_normalization (pfile
, result
, &nst
);
2532 create_literal (pfile
, result
, buffer
->cur
- 1, 1, CPP_OTHER
);
2539 /* An upper bound on the number of bytes needed to spell TOKEN.
2540 Does not include preceding whitespace. */
2542 cpp_token_len (const cpp_token
*token
)
2546 switch (TOKEN_SPELL (token
))
2548 default: len
= 6; break;
2549 case SPELL_LITERAL
: len
= token
->val
.str
.len
; break;
2550 case SPELL_IDENT
: len
= NODE_LEN (token
->val
.node
.node
) * 10; break;
2556 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
2557 Return the number of bytes read out of NAME. (There are always
2558 10 bytes written to BUFFER.) */
2561 utf8_to_ucn (unsigned char *buffer
, const unsigned char *name
)
2567 unsigned long utf32
;
2569 /* Compute the length of the UTF-8 sequence. */
2570 for (t
= *name
; t
& 0x80; t
<<= 1)
2573 utf32
= *name
& (0x7F >> ucn_len
);
2574 for (ucn_len_c
= 1; ucn_len_c
< ucn_len
; ucn_len_c
++)
2576 utf32
= (utf32
<< 6) | (*++name
& 0x3F);
2578 /* Ill-formed UTF-8. */
2579 if ((*name
& ~0x3F) != 0x80)
2585 for (j
= 7; j
>= 0; j
--)
2586 *buffer
++ = "0123456789abcdef"[(utf32
>> (4 * j
)) & 0xF];
2590 /* Given a token TYPE corresponding to a digraph, return a pointer to
2591 the spelling of the digraph. */
2592 static const unsigned char *
2593 cpp_digraph2name (enum cpp_ttype type
)
2595 return digraph_spellings
[(int) type
- (int) CPP_FIRST_DIGRAPH
];
2598 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
2599 already contain the enough space to hold the token's spelling.
2600 Returns a pointer to the character after the last character written.
2601 FORSTRING is true if this is to be the spelling after translation
2602 phase 1 (this is different for UCNs).
2603 FIXME: Would be nice if we didn't need the PFILE argument. */
2605 cpp_spell_token (cpp_reader
*pfile
, const cpp_token
*token
,
2606 unsigned char *buffer
, bool forstring
)
2608 switch (TOKEN_SPELL (token
))
2610 case SPELL_OPERATOR
:
2612 const unsigned char *spelling
;
2615 if (token
->flags
& DIGRAPH
)
2616 spelling
= cpp_digraph2name (token
->type
);
2617 else if (token
->flags
& NAMED_OP
)
2620 spelling
= TOKEN_NAME (token
);
2622 while ((c
= *spelling
++) != '\0')
2631 memcpy (buffer
, NODE_NAME (token
->val
.node
.node
),
2632 NODE_LEN (token
->val
.node
.node
));
2633 buffer
+= NODE_LEN (token
->val
.node
.node
);
2638 const unsigned char * name
= NODE_NAME (token
->val
.node
.node
);
2640 for (i
= 0; i
< NODE_LEN (token
->val
.node
.node
); i
++)
2641 if (name
[i
] & ~0x7F)
2643 i
+= utf8_to_ucn (buffer
, name
+ i
) - 1;
2647 *buffer
++ = NODE_NAME (token
->val
.node
.node
)[i
];
2652 memcpy (buffer
, token
->val
.str
.text
, token
->val
.str
.len
);
2653 buffer
+= token
->val
.str
.len
;
2657 cpp_error (pfile
, CPP_DL_ICE
,
2658 "unspellable token %s", TOKEN_NAME (token
));
2665 /* Returns TOKEN spelt as a null-terminated string. The string is
2666 freed when the reader is destroyed. Useful for diagnostics. */
2668 cpp_token_as_text (cpp_reader
*pfile
, const cpp_token
*token
)
2670 unsigned int len
= cpp_token_len (token
) + 1;
2671 unsigned char *start
= _cpp_unaligned_alloc (pfile
, len
), *end
;
2673 end
= cpp_spell_token (pfile
, token
, start
, false);
2679 /* Returns a pointer to a string which spells the token defined by
2680 TYPE and FLAGS. Used by C front ends, which really should move to
2681 using cpp_token_as_text. */
2683 cpp_type2name (enum cpp_ttype type
, unsigned char flags
)
2685 if (flags
& DIGRAPH
)
2686 return (const char *) cpp_digraph2name (type
);
2687 else if (flags
& NAMED_OP
)
2688 return cpp_named_operator2name (type
);
2690 return (const char *) token_spellings
[type
].name
;
2693 /* Writes the spelling of token to FP, without any preceding space.
2694 Separated from cpp_spell_token for efficiency - to avoid stdio
2695 double-buffering. */
2697 cpp_output_token (const cpp_token
*token
, FILE *fp
)
2699 switch (TOKEN_SPELL (token
))
2701 case SPELL_OPERATOR
:
2703 const unsigned char *spelling
;
2706 if (token
->flags
& DIGRAPH
)
2707 spelling
= cpp_digraph2name (token
->type
);
2708 else if (token
->flags
& NAMED_OP
)
2711 spelling
= TOKEN_NAME (token
);
2716 while ((c
= *++spelling
) != '\0');
2724 const unsigned char * name
= NODE_NAME (token
->val
.node
.node
);
2726 for (i
= 0; i
< NODE_LEN (token
->val
.node
.node
); i
++)
2727 if (name
[i
] & ~0x7F)
2729 unsigned char buffer
[10];
2730 i
+= utf8_to_ucn (buffer
, name
+ i
) - 1;
2731 fwrite (buffer
, 1, 10, fp
);
2734 fputc (NODE_NAME (token
->val
.node
.node
)[i
], fp
);
2739 fwrite (token
->val
.str
.text
, 1, token
->val
.str
.len
, fp
);
2743 /* An error, most probably. */
2748 /* Compare two tokens. */
2750 _cpp_equiv_tokens (const cpp_token
*a
, const cpp_token
*b
)
2752 if (a
->type
== b
->type
&& a
->flags
== b
->flags
)
2753 switch (TOKEN_SPELL (a
))
2755 default: /* Keep compiler happy. */
2756 case SPELL_OPERATOR
:
2757 /* token_no is used to track where multiple consecutive ##
2758 tokens were originally located. */
2759 return (a
->type
!= CPP_PASTE
|| a
->val
.token_no
== b
->val
.token_no
);
2761 return (a
->type
!= CPP_MACRO_ARG
2762 || a
->val
.macro_arg
.arg_no
== b
->val
.macro_arg
.arg_no
);
2764 return a
->val
.node
.node
== b
->val
.node
.node
;
2766 return (a
->val
.str
.len
== b
->val
.str
.len
2767 && !memcmp (a
->val
.str
.text
, b
->val
.str
.text
,
2774 /* Returns nonzero if a space should be inserted to avoid an
2775 accidental token paste for output. For simplicity, it is
2776 conservative, and occasionally advises a space where one is not
2777 needed, e.g. "." and ".2". */
2779 cpp_avoid_paste (cpp_reader
*pfile
, const cpp_token
*token1
,
2780 const cpp_token
*token2
)
2782 enum cpp_ttype a
= token1
->type
, b
= token2
->type
;
2785 if (token1
->flags
& NAMED_OP
)
2787 if (token2
->flags
& NAMED_OP
)
2791 if (token2
->flags
& DIGRAPH
)
2792 c
= digraph_spellings
[(int) b
- (int) CPP_FIRST_DIGRAPH
][0];
2793 else if (token_spellings
[b
].category
== SPELL_OPERATOR
)
2794 c
= token_spellings
[b
].name
[0];
2796 /* Quickly get everything that can paste with an '='. */
2797 if ((int) a
<= (int) CPP_LAST_EQ
&& c
== '=')
2802 case CPP_GREATER
: return c
== '>';
2803 case CPP_LESS
: return c
== '<' || c
== '%' || c
== ':';
2804 case CPP_PLUS
: return c
== '+';
2805 case CPP_MINUS
: return c
== '-' || c
== '>';
2806 case CPP_DIV
: return c
== '/' || c
== '*'; /* Comments. */
2807 case CPP_MOD
: return c
== ':' || c
== '>';
2808 case CPP_AND
: return c
== '&';
2809 case CPP_OR
: return c
== '|';
2810 case CPP_COLON
: return c
== ':' || c
== '>';
2811 case CPP_DEREF
: return c
== '*';
2812 case CPP_DOT
: return c
== '.' || c
== '%' || b
== CPP_NUMBER
;
2813 case CPP_HASH
: return c
== '#' || c
== '%'; /* Digraph form. */
2814 case CPP_NAME
: return ((b
== CPP_NUMBER
2815 && name_p (pfile
, &token2
->val
.str
))
2817 || b
== CPP_CHAR
|| b
== CPP_STRING
); /* L */
2818 case CPP_NUMBER
: return (b
== CPP_NUMBER
|| b
== CPP_NAME
2819 || c
== '.' || c
== '+' || c
== '-');
2821 case CPP_OTHER
: return ((token1
->val
.str
.text
[0] == '\\'
2823 || (CPP_OPTION (pfile
, objc
)
2824 && token1
->val
.str
.text
[0] == '@'
2825 && (b
== CPP_NAME
|| b
== CPP_STRING
)));
2828 case CPP_UTF8STRING
:
2830 case CPP_STRING32
: return (CPP_OPTION (pfile
, user_literals
)
2832 || (TOKEN_SPELL (token2
) == SPELL_LITERAL
2833 && ISIDST (token2
->val
.str
.text
[0]))));
2841 /* Output all the remaining tokens on the current line, and a newline
2842 character, to FP. Leading whitespace is removed. If there are
2843 macros, special token padding is not performed. */
2845 cpp_output_line (cpp_reader
*pfile
, FILE *fp
)
2847 const cpp_token
*token
;
2849 token
= cpp_get_token (pfile
);
2850 while (token
->type
!= CPP_EOF
)
2852 cpp_output_token (token
, fp
);
2853 token
= cpp_get_token (pfile
);
2854 if (token
->flags
& PREV_WHITE
)
2861 /* Return a string representation of all the remaining tokens on the
2862 current line. The result is allocated using xmalloc and must be
2863 freed by the caller. */
2865 cpp_output_line_to_string (cpp_reader
*pfile
, const unsigned char *dir_name
)
2867 const cpp_token
*token
;
2868 unsigned int out
= dir_name
? ustrlen (dir_name
) : 0;
2869 unsigned int alloced
= 120 + out
;
2870 unsigned char *result
= (unsigned char *) xmalloc (alloced
);
2872 /* If DIR_NAME is empty, there are no initial contents. */
2875 sprintf ((char *) result
, "#%s ", dir_name
);
2879 token
= cpp_get_token (pfile
);
2880 while (token
->type
!= CPP_EOF
)
2882 unsigned char *last
;
2883 /* Include room for a possible space and the terminating nul. */
2884 unsigned int len
= cpp_token_len (token
) + 2;
2886 if (out
+ len
> alloced
)
2889 if (out
+ len
> alloced
)
2890 alloced
= out
+ len
;
2891 result
= (unsigned char *) xrealloc (result
, alloced
);
2894 last
= cpp_spell_token (pfile
, token
, &result
[out
], 0);
2895 out
= last
- result
;
2897 token
= cpp_get_token (pfile
);
2898 if (token
->flags
& PREV_WHITE
)
2899 result
[out
++] = ' ';
2906 /* Memory buffers. Changing these three constants can have a dramatic
2907 effect on performance. The values here are reasonable defaults,
2908 but might be tuned. If you adjust them, be sure to test across a
2909 range of uses of cpplib, including heavy nested function-like macro
2910 expansion. Also check the change in peak memory usage (NJAMD is a
2911 good tool for this). */
2912 #define MIN_BUFF_SIZE 8000
2913 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
2914 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2915 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2917 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2918 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2921 /* Create a new allocation buffer. Place the control block at the end
2922 of the buffer, so that buffer overflows will cause immediate chaos. */
2924 new_buff (size_t len
)
2927 unsigned char *base
;
2929 if (len
< MIN_BUFF_SIZE
)
2930 len
= MIN_BUFF_SIZE
;
2931 len
= CPP_ALIGN (len
);
2933 #ifdef ENABLE_VALGRIND_CHECKING
2934 /* Valgrind warns about uses of interior pointers, so put _cpp_buff
2936 size_t slen
= CPP_ALIGN2 (sizeof (_cpp_buff
), 2 * DEFAULT_ALIGNMENT
);
2937 base
= XNEWVEC (unsigned char, len
+ slen
);
2938 result
= (_cpp_buff
*) base
;
2941 base
= XNEWVEC (unsigned char, len
+ sizeof (_cpp_buff
));
2942 result
= (_cpp_buff
*) (base
+ len
);
2944 result
->base
= base
;
2946 result
->limit
= base
+ len
;
2947 result
->next
= NULL
;
2951 /* Place a chain of unwanted allocation buffers on the free list. */
2953 _cpp_release_buff (cpp_reader
*pfile
, _cpp_buff
*buff
)
2955 _cpp_buff
*end
= buff
;
2959 end
->next
= pfile
->free_buffs
;
2960 pfile
->free_buffs
= buff
;
2963 /* Return a free buffer of size at least MIN_SIZE. */
2965 _cpp_get_buff (cpp_reader
*pfile
, size_t min_size
)
2967 _cpp_buff
*result
, **p
;
2969 for (p
= &pfile
->free_buffs
;; p
= &(*p
)->next
)
2974 return new_buff (min_size
);
2976 size
= result
->limit
- result
->base
;
2977 /* Return a buffer that's big enough, but don't waste one that's
2979 if (size
>= min_size
&& size
<= BUFF_SIZE_UPPER_BOUND (min_size
))
2984 result
->next
= NULL
;
2985 result
->cur
= result
->base
;
2989 /* Creates a new buffer with enough space to hold the uncommitted
2990 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
2991 the excess bytes to the new buffer. Chains the new buffer after
2992 BUFF, and returns the new buffer. */
2994 _cpp_append_extend_buff (cpp_reader
*pfile
, _cpp_buff
*buff
, size_t min_extra
)
2996 size_t size
= EXTENDED_BUFF_SIZE (buff
, min_extra
);
2997 _cpp_buff
*new_buff
= _cpp_get_buff (pfile
, size
);
2999 buff
->next
= new_buff
;
3000 memcpy (new_buff
->base
, buff
->cur
, BUFF_ROOM (buff
));
3004 /* Creates a new buffer with enough space to hold the uncommitted
3005 remaining bytes of the buffer pointed to by BUFF, and at least
3006 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
3007 Chains the new buffer before the buffer pointed to by BUFF, and
3008 updates the pointer to point to the new buffer. */
3010 _cpp_extend_buff (cpp_reader
*pfile
, _cpp_buff
**pbuff
, size_t min_extra
)
3012 _cpp_buff
*new_buff
, *old_buff
= *pbuff
;
3013 size_t size
= EXTENDED_BUFF_SIZE (old_buff
, min_extra
);
3015 new_buff
= _cpp_get_buff (pfile
, size
);
3016 memcpy (new_buff
->base
, old_buff
->cur
, BUFF_ROOM (old_buff
));
3017 new_buff
->next
= old_buff
;
3021 /* Free a chain of buffers starting at BUFF. */
3023 _cpp_free_buff (_cpp_buff
*buff
)
3027 for (; buff
; buff
= next
)
3030 #ifdef ENABLE_VALGRIND_CHECKING
3038 /* Allocate permanent, unaligned storage of length LEN. */
3040 _cpp_unaligned_alloc (cpp_reader
*pfile
, size_t len
)
3042 _cpp_buff
*buff
= pfile
->u_buff
;
3043 unsigned char *result
= buff
->cur
;
3045 if (len
> (size_t) (buff
->limit
- result
))
3047 buff
= _cpp_get_buff (pfile
, len
);
3048 buff
->next
= pfile
->u_buff
;
3049 pfile
->u_buff
= buff
;
3053 buff
->cur
= result
+ len
;
3057 /* Allocate permanent, unaligned storage of length LEN from a_buff.
3058 That buffer is used for growing allocations when saving macro
3059 replacement lists in a #define, and when parsing an answer to an
3060 assertion in #assert, #unassert or #if (and therefore possibly
3061 whilst expanding macros). It therefore must not be used by any
3062 code that they might call: specifically the lexer and the guts of
3065 All existing other uses clearly fit this restriction: storing
3066 registered pragmas during initialization. */
3068 _cpp_aligned_alloc (cpp_reader
*pfile
, size_t len
)
3070 _cpp_buff
*buff
= pfile
->a_buff
;
3071 unsigned char *result
= buff
->cur
;
3073 if (len
> (size_t) (buff
->limit
- result
))
3075 buff
= _cpp_get_buff (pfile
, len
);
3076 buff
->next
= pfile
->a_buff
;
3077 pfile
->a_buff
= buff
;
3081 buff
->cur
= result
+ len
;
3085 /* Say which field of TOK is in use. */
3087 enum cpp_token_fld_kind
3088 cpp_token_val_index (const cpp_token
*tok
)
3090 switch (TOKEN_SPELL (tok
))
3093 return CPP_TOKEN_FLD_NODE
;
3095 return CPP_TOKEN_FLD_STR
;
3096 case SPELL_OPERATOR
:
3097 if (tok
->type
== CPP_PASTE
)
3098 return CPP_TOKEN_FLD_TOKEN_NO
;
3100 return CPP_TOKEN_FLD_NONE
;
3102 if (tok
->type
== CPP_MACRO_ARG
)
3103 return CPP_TOKEN_FLD_ARG_NO
;
3104 else if (tok
->type
== CPP_PADDING
)
3105 return CPP_TOKEN_FLD_SOURCE
;
3106 else if (tok
->type
== CPP_PRAGMA
)
3107 return CPP_TOKEN_FLD_PRAGMA
;
3108 /* else fall through */
3110 return CPP_TOKEN_FLD_NONE
;
3114 /* All tokens lexed in R after calling this function will be forced to have
3115 their source_location the same as the location referenced by P, until
3116 cpp_stop_forcing_token_locations is called for R. */
3119 cpp_force_token_locations (cpp_reader
*r
, source_location
*p
)
3121 r
->forced_token_location_p
= p
;
3124 /* Go back to assigning locations naturally for lexed tokens. */
3127 cpp_stop_forcing_token_locations (cpp_reader
*r
)
3129 r
->forced_token_location_p
= NULL
;