1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009
3 Free Software Foundation, Inc.
4 Contributed by Per Bothner, 1994-95.
5 Based on CCCP program by Paul Rubin, June 1986
6 Adapted to ANSI C, Richard Stallman, Jan 1987
7 Broken out to separate file, Zack Weinberg, Mar 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 3, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
38 enum spell_type category
;
39 const unsigned char *name
;
42 static const unsigned char *const digraph_spellings
[] =
43 { UC
"%:", UC
"%:%:", UC
"<:", UC
":>", UC
"<%", UC
"%>" };
45 #define OP(e, s) { SPELL_OPERATOR, UC s },
46 #define TK(e, s) { SPELL_ ## s, UC #e },
47 static const struct token_spelling token_spellings
[N_TTYPES
] = { TTYPE_TABLE
};
51 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
52 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
54 static void add_line_note (cpp_buffer
*, const uchar
*, unsigned int);
55 static int skip_line_comment (cpp_reader
*);
56 static void skip_whitespace (cpp_reader
*, cppchar_t
);
57 static void lex_string (cpp_reader
*, cpp_token
*, const uchar
*);
58 static void save_comment (cpp_reader
*, cpp_token
*, const uchar
*, cppchar_t
);
59 static void store_comment (cpp_reader
*, cpp_token
*);
60 static void create_literal (cpp_reader
*, cpp_token
*, const uchar
*,
61 unsigned int, enum cpp_ttype
);
62 static bool warn_in_comment (cpp_reader
*, _cpp_line_note
*);
63 static int name_p (cpp_reader
*, const cpp_string
*);
64 static tokenrun
*next_tokenrun (tokenrun
*);
66 static _cpp_buff
*new_buff (size_t);
71 Compares, the token TOKEN to the NUL-terminated string STRING.
72 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
74 cpp_ideq (const cpp_token
*token
, const char *string
)
76 if (token
->type
!= CPP_NAME
)
79 return !ustrcmp (NODE_NAME (token
->val
.node
.node
), (const uchar
*) string
);
82 /* Record a note TYPE at byte POS into the current cleaned logical
85 add_line_note (cpp_buffer
*buffer
, const uchar
*pos
, unsigned int type
)
87 if (buffer
->notes_used
== buffer
->notes_cap
)
89 buffer
->notes_cap
= buffer
->notes_cap
* 2 + 200;
90 buffer
->notes
= XRESIZEVEC (_cpp_line_note
, buffer
->notes
,
94 buffer
->notes
[buffer
->notes_used
].pos
= pos
;
95 buffer
->notes
[buffer
->notes_used
].type
= type
;
99 /* Returns with a logical line that contains no escaped newlines or
100 trigraphs. This is a time-critical inner loop. */
102 _cpp_clean_line (cpp_reader
*pfile
)
108 buffer
= pfile
->buffer
;
109 buffer
->cur_note
= buffer
->notes_used
= 0;
110 buffer
->cur
= buffer
->line_base
= buffer
->next_line
;
111 buffer
->need_line
= false;
112 s
= buffer
->next_line
- 1;
114 if (!buffer
->from_stage3
)
116 const uchar
*pbackslash
= NULL
;
118 /* Short circuit for the common case of an un-escaped line with
119 no trigraphs. The primary win here is by not writing any
120 data back to memory until we have to. */
124 if (__builtin_expect (c
== '\n', false)
125 || __builtin_expect (c
== '\r', false))
129 if (__builtin_expect (s
== buffer
->rlimit
, false))
132 /* DOS line ending? */
133 if (__builtin_expect (c
== '\r', false)
137 if (s
== buffer
->rlimit
)
141 if (__builtin_expect (pbackslash
== NULL
, true))
144 /* Check for escaped newline. */
146 while (is_nvspace (p
[-1]))
148 if (p
- 1 != pbackslash
)
151 /* Have an escaped newline; process it and proceed to
153 add_line_note (buffer
, p
- 1, p
!= d
? ' ' : '\\');
155 buffer
->next_line
= p
- 1;
158 if (__builtin_expect (c
== '\\', false))
160 else if (__builtin_expect (c
== '?', false)
161 && __builtin_expect (s
[1] == '?', false)
162 && _cpp_trigraph_map
[s
[2]])
164 /* Have a trigraph. We may or may not have to convert
165 it. Add a line note regardless, for -Wtrigraphs. */
166 add_line_note (buffer
, s
, s
[2]);
167 if (CPP_OPTION (pfile
, trigraphs
))
169 /* We do, and that means we have to switch to the
172 *d
= _cpp_trigraph_map
[s
[2]];
185 if (c
== '\n' || c
== '\r')
187 /* Handle DOS line endings. */
188 if (c
== '\r' && s
!= buffer
->rlimit
&& s
[1] == '\n')
190 if (s
== buffer
->rlimit
)
195 while (p
!= buffer
->next_line
&& is_nvspace (p
[-1]))
197 if (p
== buffer
->next_line
|| p
[-1] != '\\')
200 add_line_note (buffer
, p
- 1, p
!= d
? ' ': '\\');
202 buffer
->next_line
= p
- 1;
204 else if (c
== '?' && s
[1] == '?' && _cpp_trigraph_map
[s
[2]])
206 /* Add a note regardless, for the benefit of -Wtrigraphs. */
207 add_line_note (buffer
, d
, s
[2]);
208 if (CPP_OPTION (pfile
, trigraphs
))
210 *d
= _cpp_trigraph_map
[s
[2]];
220 while (*s
!= '\n' && *s
!= '\r');
223 /* Handle DOS line endings. */
224 if (*s
== '\r' && s
!= buffer
->rlimit
&& s
[1] == '\n')
230 /* A sentinel note that should never be processed. */
231 add_line_note (buffer
, d
+ 1, '\n');
232 buffer
->next_line
= s
+ 1;
235 /* Return true if the trigraph indicated by NOTE should be warned
236 about in a comment. */
238 warn_in_comment (cpp_reader
*pfile
, _cpp_line_note
*note
)
242 /* Within comments we don't warn about trigraphs, unless the
243 trigraph forms an escaped newline, as that may change
245 if (note
->type
!= '/')
248 /* If -trigraphs, then this was an escaped newline iff the next note
250 if (CPP_OPTION (pfile
, trigraphs
))
251 return note
[1].pos
== note
->pos
;
253 /* Otherwise, see if this forms an escaped newline. */
255 while (is_nvspace (*p
))
258 /* There might have been escaped newlines between the trigraph and the
259 newline we found. Hence the position test. */
260 return (*p
== '\n' && p
< note
[1].pos
);
263 /* Process the notes created by add_line_note as far as the current
266 _cpp_process_line_notes (cpp_reader
*pfile
, int in_comment
)
268 cpp_buffer
*buffer
= pfile
->buffer
;
272 _cpp_line_note
*note
= &buffer
->notes
[buffer
->cur_note
];
275 if (note
->pos
> buffer
->cur
)
279 col
= CPP_BUF_COLUMN (buffer
, note
->pos
+ 1);
281 if (note
->type
== '\\' || note
->type
== ' ')
283 if (note
->type
== ' ' && !in_comment
)
284 cpp_error_with_line (pfile
, CPP_DL_WARNING
, pfile
->line_table
->highest_line
, col
,
285 "backslash and newline separated by space");
287 if (buffer
->next_line
> buffer
->rlimit
)
289 cpp_error_with_line (pfile
, CPP_DL_PEDWARN
, pfile
->line_table
->highest_line
, col
,
290 "backslash-newline at end of file");
291 /* Prevent "no newline at end of file" warning. */
292 buffer
->next_line
= buffer
->rlimit
;
295 buffer
->line_base
= note
->pos
;
296 CPP_INCREMENT_LINE (pfile
, 0);
298 else if (_cpp_trigraph_map
[note
->type
])
300 if (CPP_OPTION (pfile
, warn_trigraphs
)
301 && (!in_comment
|| warn_in_comment (pfile
, note
)))
303 if (CPP_OPTION (pfile
, trigraphs
))
304 cpp_error_with_line (pfile
, CPP_DL_WARNING
, pfile
->line_table
->highest_line
, col
,
305 "trigraph ??%c converted to %c",
307 (int) _cpp_trigraph_map
[note
->type
]);
311 (pfile
, CPP_DL_WARNING
, pfile
->line_table
->highest_line
, col
,
312 "trigraph ??%c ignored, use -trigraphs to enable",
322 /* Skip a C-style block comment. We find the end of the comment by
323 seeing if an asterisk is before every '/' we encounter. Returns
324 nonzero if comment terminated by EOF, zero otherwise.
326 Buffer->cur points to the initial asterisk of the comment. */
328 _cpp_skip_block_comment (cpp_reader
*pfile
)
330 cpp_buffer
*buffer
= pfile
->buffer
;
331 const uchar
*cur
= buffer
->cur
;
340 /* People like decorating comments with '*', so check for '/'
341 instead for efficiency. */
349 /* Warn about potential nested comments, but not if the '/'
350 comes immediately before the true comment delimiter.
351 Don't bother to get it right across escaped newlines. */
352 if (CPP_OPTION (pfile
, warn_comments
)
353 && cur
[0] == '*' && cur
[1] != '/')
356 cpp_error_with_line (pfile
, CPP_DL_WARNING
,
357 pfile
->line_table
->highest_line
, CPP_BUF_COL (buffer
),
358 "\"/*\" within comment");
364 buffer
->cur
= cur
- 1;
365 _cpp_process_line_notes (pfile
, true);
366 if (buffer
->next_line
>= buffer
->rlimit
)
368 _cpp_clean_line (pfile
);
370 cols
= buffer
->next_line
- buffer
->line_base
;
371 CPP_INCREMENT_LINE (pfile
, cols
);
378 _cpp_process_line_notes (pfile
, true);
382 /* Skip a C++ line comment, leaving buffer->cur pointing to the
383 terminating newline. Handles escaped newlines. Returns nonzero
384 if a multiline comment. */
386 skip_line_comment (cpp_reader
*pfile
)
388 cpp_buffer
*buffer
= pfile
->buffer
;
389 source_location orig_line
= pfile
->line_table
->highest_line
;
391 while (*buffer
->cur
!= '\n')
394 _cpp_process_line_notes (pfile
, true);
395 return orig_line
!= pfile
->line_table
->highest_line
;
398 /* Skips whitespace, saving the next non-whitespace character. */
400 skip_whitespace (cpp_reader
*pfile
, cppchar_t c
)
402 cpp_buffer
*buffer
= pfile
->buffer
;
403 bool saw_NUL
= false;
407 /* Horizontal space always OK. */
408 if (c
== ' ' || c
== '\t')
410 /* Just \f \v or \0 left. */
413 else if (pfile
->state
.in_directive
&& CPP_PEDANTIC (pfile
))
414 cpp_error_with_line (pfile
, CPP_DL_PEDWARN
, pfile
->line_table
->highest_line
,
415 CPP_BUF_COL (buffer
),
416 "%s in preprocessing directive",
417 c
== '\f' ? "form feed" : "vertical tab");
421 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
422 while (is_nvspace (c
));
425 cpp_error (pfile
, CPP_DL_WARNING
, "null character(s) ignored");
430 /* See if the characters of a number token are valid in a name (no
433 name_p (cpp_reader
*pfile
, const cpp_string
*string
)
437 for (i
= 0; i
< string
->len
; i
++)
438 if (!is_idchar (string
->text
[i
]))
444 /* After parsing an identifier or other sequence, produce a warning about
445 sequences not in NFC/NFKC. */
447 warn_about_normalization (cpp_reader
*pfile
,
448 const cpp_token
*token
,
449 const struct normalize_state
*s
)
451 if (CPP_OPTION (pfile
, warn_normalize
) < NORMALIZE_STATE_RESULT (s
)
452 && !pfile
->state
.skipping
)
454 /* Make sure that the token is printed using UCNs, even
455 if we'd otherwise happily print UTF-8. */
456 unsigned char *buf
= XNEWVEC (unsigned char, cpp_token_len (token
));
459 sz
= cpp_spell_token (pfile
, token
, buf
, false) - buf
;
460 if (NORMALIZE_STATE_RESULT (s
) == normalized_C
)
461 cpp_error_with_line (pfile
, CPP_DL_WARNING
, token
->src_loc
, 0,
462 "`%.*s' is not in NFKC", (int) sz
, buf
);
464 cpp_error_with_line (pfile
, CPP_DL_WARNING
, token
->src_loc
, 0,
465 "`%.*s' is not in NFC", (int) sz
, buf
);
469 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
470 an identifier. FIRST is TRUE if this starts an identifier. */
472 forms_identifier_p (cpp_reader
*pfile
, int first
,
473 struct normalize_state
*state
)
475 cpp_buffer
*buffer
= pfile
->buffer
;
477 if (*buffer
->cur
== '$')
479 if (!CPP_OPTION (pfile
, dollars_in_ident
))
483 if (CPP_OPTION (pfile
, warn_dollars
) && !pfile
->state
.skipping
)
485 CPP_OPTION (pfile
, warn_dollars
) = 0;
486 cpp_error (pfile
, CPP_DL_PEDWARN
, "'$' in identifier or number");
492 /* Is this a syntactically valid UCN? */
493 if (CPP_OPTION (pfile
, extended_identifiers
)
494 && *buffer
->cur
== '\\'
495 && (buffer
->cur
[1] == 'u' || buffer
->cur
[1] == 'U'))
498 if (_cpp_valid_ucn (pfile
, &buffer
->cur
, buffer
->rlimit
, 1 + !first
,
507 /* Helper function to get the cpp_hashnode of the identifier BASE. */
508 static cpp_hashnode
*
509 lex_identifier_intern (cpp_reader
*pfile
, const uchar
*base
)
511 cpp_hashnode
*result
;
514 unsigned int hash
= HT_HASHSTEP (0, *base
);
517 while (ISIDNUM (*cur
))
519 hash
= HT_HASHSTEP (hash
, *cur
);
523 hash
= HT_HASHFINISH (hash
, len
);
524 result
= CPP_HASHNODE (ht_lookup_with_hash (pfile
->hash_table
,
525 base
, len
, hash
, HT_ALLOC
));
527 /* Rarely, identifiers require diagnostics when lexed. */
528 if (__builtin_expect ((result
->flags
& NODE_DIAGNOSTIC
)
529 && !pfile
->state
.skipping
, 0))
531 /* It is allowed to poison the same identifier twice. */
532 if ((result
->flags
& NODE_POISONED
) && !pfile
->state
.poisoned_ok
)
533 cpp_error (pfile
, CPP_DL_ERROR
, "attempt to use poisoned \"%s\"",
536 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
537 replacement list of a variadic macro. */
538 if (result
== pfile
->spec_nodes
.n__VA_ARGS__
539 && !pfile
->state
.va_args_ok
)
540 cpp_error (pfile
, CPP_DL_PEDWARN
,
541 "__VA_ARGS__ can only appear in the expansion"
542 " of a C99 variadic macro");
544 /* For -Wc++-compat, warn about use of C++ named operators. */
545 if (result
->flags
& NODE_WARN_OPERATOR
)
546 cpp_error (pfile
, CPP_DL_WARNING
,
547 "identifier \"%s\" is a special operator name in C++",
554 /* Get the cpp_hashnode of an identifier specified by NAME in
555 the current cpp_reader object. If none is found, NULL is returned. */
557 _cpp_lex_identifier (cpp_reader
*pfile
, const char *name
)
559 cpp_hashnode
*result
;
560 result
= lex_identifier_intern (pfile
, (uchar
*) name
);
564 /* Lex an identifier starting at BUFFER->CUR - 1. */
565 static cpp_hashnode
*
566 lex_identifier (cpp_reader
*pfile
, const uchar
*base
, bool starts_ucn
,
567 struct normalize_state
*nst
)
569 cpp_hashnode
*result
;
572 unsigned int hash
= HT_HASHSTEP (0, *base
);
574 cur
= pfile
->buffer
->cur
;
576 while (ISIDNUM (*cur
))
578 hash
= HT_HASHSTEP (hash
, *cur
);
581 pfile
->buffer
->cur
= cur
;
582 if (starts_ucn
|| forms_identifier_p (pfile
, false, nst
))
584 /* Slower version for identifiers containing UCNs (or $). */
586 while (ISIDNUM (*pfile
->buffer
->cur
))
588 pfile
->buffer
->cur
++;
589 NORMALIZE_STATE_UPDATE_IDNUM (nst
);
591 } while (forms_identifier_p (pfile
, false, nst
));
592 result
= _cpp_interpret_identifier (pfile
, base
,
593 pfile
->buffer
->cur
- base
);
598 hash
= HT_HASHFINISH (hash
, len
);
600 result
= CPP_HASHNODE (ht_lookup_with_hash (pfile
->hash_table
,
601 base
, len
, hash
, HT_ALLOC
));
604 /* Rarely, identifiers require diagnostics when lexed. */
605 if (__builtin_expect ((result
->flags
& NODE_DIAGNOSTIC
)
606 && !pfile
->state
.skipping
, 0))
608 /* It is allowed to poison the same identifier twice. */
609 if ((result
->flags
& NODE_POISONED
) && !pfile
->state
.poisoned_ok
)
610 cpp_error (pfile
, CPP_DL_ERROR
, "attempt to use poisoned \"%s\"",
613 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
614 replacement list of a variadic macro. */
615 if (result
== pfile
->spec_nodes
.n__VA_ARGS__
616 && !pfile
->state
.va_args_ok
)
617 cpp_error (pfile
, CPP_DL_PEDWARN
,
618 "__VA_ARGS__ can only appear in the expansion"
619 " of a C99 variadic macro");
621 /* For -Wc++-compat, warn about use of C++ named operators. */
622 if (result
->flags
& NODE_WARN_OPERATOR
)
623 cpp_error (pfile
, CPP_DL_WARNING
,
624 "identifier \"%s\" is a special operator name in C++",
631 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
633 lex_number (cpp_reader
*pfile
, cpp_string
*number
,
634 struct normalize_state
*nst
)
640 base
= pfile
->buffer
->cur
- 1;
643 cur
= pfile
->buffer
->cur
;
645 /* N.B. ISIDNUM does not include $. */
646 while (ISIDNUM (*cur
) || *cur
== '.' || VALID_SIGN (*cur
, cur
[-1]))
649 NORMALIZE_STATE_UPDATE_IDNUM (nst
);
652 pfile
->buffer
->cur
= cur
;
654 while (forms_identifier_p (pfile
, false, nst
));
656 number
->len
= cur
- base
;
657 dest
= _cpp_unaligned_alloc (pfile
, number
->len
+ 1);
658 memcpy (dest
, base
, number
->len
);
659 dest
[number
->len
] = '\0';
663 /* Create a token of type TYPE with a literal spelling. */
665 create_literal (cpp_reader
*pfile
, cpp_token
*token
, const uchar
*base
,
666 unsigned int len
, enum cpp_ttype type
)
668 uchar
*dest
= _cpp_unaligned_alloc (pfile
, len
+ 1);
670 memcpy (dest
, base
, len
);
673 token
->val
.str
.len
= len
;
674 token
->val
.str
.text
= dest
;
677 /* Lexes a raw string. The stored string contains the spelling, including
678 double quotes, delimiter string, '[' and ']', any leading
679 'L', 'u', 'U' or 'u8' and 'R' modifier. It returns the type of the
680 literal, or CPP_OTHER if it was not properly terminated.
682 The spelling is NUL-terminated, but it is not guaranteed that this
683 is the first NUL since embedded NULs are preserved. */
686 lex_raw_string (cpp_reader
*pfile
, cpp_token
*token
, const uchar
*base
,
689 source_location saw_NUL
= 0;
690 const uchar
*raw_prefix
;
691 unsigned int raw_prefix_len
= 0;
693 size_t total_len
= 0;
694 _cpp_buff
*first_buff
= NULL
, *last_buff
= NULL
;
696 type
= (*base
== 'L' ? CPP_WSTRING
:
697 *base
== 'U' ? CPP_STRING32
:
698 *base
== 'u' ? (base
[1] == '8' ? CPP_UTF8STRING
: CPP_STRING16
)
701 raw_prefix
= cur
+ 1;
702 while (raw_prefix_len
< 16)
704 switch (raw_prefix
[raw_prefix_len
])
706 case ' ': case '[': case ']': case '\t':
707 case '\v': case '\f': case '\n': default:
709 /* Basic source charset except the above chars. */
710 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
711 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
712 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
713 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
715 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
716 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
717 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
718 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
720 case '0': case '1': case '2': case '3': case '4': case '5':
721 case '6': case '7': case '8': case '9':
722 case '_': case '{': case '}': case '#': case '(': case ')':
723 case '<': case '>': case '%': case ':': case ';': case '.':
724 case '?': case '*': case '+': case '-': case '/': case '^':
725 case '&': case '|': case '~': case '!': case '=': case ',':
726 case '\\': case '"': case '\'':
733 if (raw_prefix
[raw_prefix_len
] != '[')
735 int col
= CPP_BUF_COLUMN (pfile
->buffer
, raw_prefix
+ raw_prefix_len
)
737 if (raw_prefix_len
== 16)
738 cpp_error_with_line (pfile
, CPP_DL_ERROR
, token
->src_loc
, col
,
739 "raw string delimiter longer than 16 characters");
741 cpp_error_with_line (pfile
, CPP_DL_ERROR
, token
->src_loc
, col
,
742 "invalid character '%c' in raw string delimiter",
743 (int) raw_prefix
[raw_prefix_len
]);
744 pfile
->buffer
->cur
= raw_prefix
- 1;
745 create_literal (pfile
, token
, base
, raw_prefix
- 1 - base
, CPP_OTHER
);
749 cur
= raw_prefix
+ raw_prefix_len
+ 1;
752 cppchar_t c
= *cur
++;
755 && strncmp ((const char *) cur
, (const char *) raw_prefix
,
757 && cur
[raw_prefix_len
] == '"')
759 cur
+= raw_prefix_len
+ 1;
764 if (pfile
->state
.in_directive
765 || pfile
->state
.parsing_args
766 || pfile
->state
.in_deferred_pragma
)
770 cpp_error_with_line (pfile
, CPP_DL_ERROR
, token
->src_loc
, 0,
771 "unterminated raw string");
775 /* raw strings allow embedded non-escaped newlines, which
776 complicates this routine a lot. */
777 if (first_buff
== NULL
)
779 total_len
= cur
- base
;
780 first_buff
= last_buff
= _cpp_get_buff (pfile
, total_len
);
781 memcpy (BUFF_FRONT (last_buff
), base
, total_len
);
782 raw_prefix
= BUFF_FRONT (last_buff
) + (raw_prefix
- base
);
783 BUFF_FRONT (last_buff
) += total_len
;
787 size_t len
= cur
- base
;
788 size_t cur_len
= len
> BUFF_ROOM (last_buff
)
789 ? BUFF_ROOM (last_buff
) : len
;
792 memcpy (BUFF_FRONT (last_buff
), base
, cur_len
);
793 BUFF_FRONT (last_buff
) += cur_len
;
796 last_buff
= _cpp_append_extend_buff (pfile
, last_buff
,
798 memcpy (BUFF_FRONT (last_buff
), base
+ cur_len
,
800 BUFF_FRONT (last_buff
) += len
- cur_len
;
804 if (pfile
->buffer
->cur
< pfile
->buffer
->rlimit
)
805 CPP_INCREMENT_LINE (pfile
, 0);
806 pfile
->buffer
->need_line
= true;
808 if (!_cpp_get_fresh_line (pfile
))
810 source_location src_loc
= token
->src_loc
;
811 token
->type
= CPP_EOF
;
812 /* Tell the compiler the line number of the EOF token. */
813 token
->src_loc
= pfile
->line_table
->highest_line
;
815 if (first_buff
!= NULL
)
816 _cpp_release_buff (pfile
, first_buff
);
817 cpp_error_with_line (pfile
, CPP_DL_ERROR
, src_loc
, 0,
818 "unterminated raw string");
822 cur
= base
= pfile
->buffer
->cur
;
824 else if (c
== '\0' && !saw_NUL
)
825 LINEMAP_POSITION_FOR_COLUMN (saw_NUL
, pfile
->line_table
,
826 CPP_BUF_COLUMN (pfile
->buffer
, cur
));
829 if (saw_NUL
&& !pfile
->state
.skipping
)
830 cpp_error_with_line (pfile
, CPP_DL_WARNING
, saw_NUL
, 0,
831 "null character(s) preserved in literal");
833 pfile
->buffer
->cur
= cur
;
834 if (first_buff
== NULL
)
835 create_literal (pfile
, token
, base
, cur
- base
, type
);
838 uchar
*dest
= _cpp_unaligned_alloc (pfile
, total_len
+ (cur
- base
) + 1);
841 token
->val
.str
.len
= total_len
+ (cur
- base
);
842 token
->val
.str
.text
= dest
;
843 last_buff
= first_buff
;
844 while (last_buff
!= NULL
)
846 memcpy (dest
, last_buff
->base
,
847 BUFF_FRONT (last_buff
) - last_buff
->base
);
848 dest
+= BUFF_FRONT (last_buff
) - last_buff
->base
;
849 last_buff
= last_buff
->next
;
851 _cpp_release_buff (pfile
, first_buff
);
852 memcpy (dest
, base
, cur
- base
);
853 dest
[cur
- base
] = '\0';
857 /* Lexes a string, character constant, or angle-bracketed header file
858 name. The stored string contains the spelling, including opening
859 quote and any leading 'L', 'u', 'U' or 'u8' and optional
860 'R' modifier. It returns the type of the literal, or CPP_OTHER
861 if it was not properly terminated, or CPP_LESS for an unterminated
862 header name which must be relexed as normal tokens.
864 The spelling is NUL-terminated, but it is not guaranteed that this
865 is the first NUL since embedded NULs are preserved. */
867 lex_string (cpp_reader
*pfile
, cpp_token
*token
, const uchar
*base
)
869 bool saw_NUL
= false;
871 cppchar_t terminator
;
876 if (terminator
== 'L' || terminator
== 'U')
878 else if (terminator
== 'u')
881 if (terminator
== '8')
884 if (terminator
== 'R')
886 lex_raw_string (pfile
, token
, base
, cur
);
889 if (terminator
== '"')
890 type
= (*base
== 'L' ? CPP_WSTRING
:
891 *base
== 'U' ? CPP_STRING32
:
892 *base
== 'u' ? (base
[1] == '8' ? CPP_UTF8STRING
: CPP_STRING16
)
894 else if (terminator
== '\'')
895 type
= (*base
== 'L' ? CPP_WCHAR
:
896 *base
== 'U' ? CPP_CHAR32
:
897 *base
== 'u' ? CPP_CHAR16
: CPP_CHAR
);
899 terminator
= '>', type
= CPP_HEADER_NAME
;
903 cppchar_t c
= *cur
++;
905 /* In #include-style directives, terminators are not escapable. */
906 if (c
== '\\' && !pfile
->state
.angled_headers
&& *cur
!= '\n')
908 else if (c
== terminator
)
913 /* Unmatched quotes always yield undefined behavior, but
914 greedy lexing means that what appears to be an unterminated
915 header name may actually be a legitimate sequence of tokens. */
916 if (terminator
== '>')
918 token
->type
= CPP_LESS
;
928 if (saw_NUL
&& !pfile
->state
.skipping
)
929 cpp_error (pfile
, CPP_DL_WARNING
,
930 "null character(s) preserved in literal");
932 if (type
== CPP_OTHER
&& CPP_OPTION (pfile
, lang
) != CLK_ASM
)
933 cpp_error (pfile
, CPP_DL_PEDWARN
, "missing terminating %c character",
936 pfile
->buffer
->cur
= cur
;
937 create_literal (pfile
, token
, base
, cur
- base
, type
);
940 /* Return the comment table. The client may not make any assumption
941 about the ordering of the table. */
943 cpp_get_comments (cpp_reader
*pfile
)
945 return &pfile
->comments
;
948 /* Append a comment to the end of the comment table. */
950 store_comment (cpp_reader
*pfile
, cpp_token
*token
)
954 if (pfile
->comments
.allocated
== 0)
956 pfile
->comments
.allocated
= 256;
957 pfile
->comments
.entries
= (cpp_comment
*) xmalloc
958 (pfile
->comments
.allocated
* sizeof (cpp_comment
));
961 if (pfile
->comments
.count
== pfile
->comments
.allocated
)
963 pfile
->comments
.allocated
*= 2;
964 pfile
->comments
.entries
= (cpp_comment
*) xrealloc
965 (pfile
->comments
.entries
,
966 pfile
->comments
.allocated
* sizeof (cpp_comment
));
969 len
= token
->val
.str
.len
;
971 /* Copy comment. Note, token may not be NULL terminated. */
972 pfile
->comments
.entries
[pfile
->comments
.count
].comment
=
973 (char *) xmalloc (sizeof (char) * (len
+ 1));
974 memcpy (pfile
->comments
.entries
[pfile
->comments
.count
].comment
,
975 token
->val
.str
.text
, len
);
976 pfile
->comments
.entries
[pfile
->comments
.count
].comment
[len
] = '\0';
978 /* Set source location. */
979 pfile
->comments
.entries
[pfile
->comments
.count
].sloc
= token
->src_loc
;
981 /* Increment the count of entries in the comment table. */
982 pfile
->comments
.count
++;
985 /* The stored comment includes the comment start and any terminator. */
987 save_comment (cpp_reader
*pfile
, cpp_token
*token
, const unsigned char *from
,
990 unsigned char *buffer
;
991 unsigned int len
, clen
;
993 len
= pfile
->buffer
->cur
- from
+ 1; /* + 1 for the initial '/'. */
995 /* C++ comments probably (not definitely) have moved past a new
996 line, which we don't want to save in the comment. */
997 if (is_vspace (pfile
->buffer
->cur
[-1]))
1000 /* If we are currently in a directive, then we need to store all
1001 C++ comments as C comments internally, and so we need to
1002 allocate a little extra space in that case.
1004 Note that the only time we encounter a directive here is
1005 when we are saving comments in a "#define". */
1006 clen
= (pfile
->state
.in_directive
&& type
== '/') ? len
+ 2 : len
;
1008 buffer
= _cpp_unaligned_alloc (pfile
, clen
);
1010 token
->type
= CPP_COMMENT
;
1011 token
->val
.str
.len
= clen
;
1012 token
->val
.str
.text
= buffer
;
1015 memcpy (buffer
+ 1, from
, len
- 1);
1017 /* Finish conversion to a C comment, if necessary. */
1018 if (pfile
->state
.in_directive
&& type
== '/')
1021 buffer
[clen
- 2] = '*';
1022 buffer
[clen
- 1] = '/';
1025 /* Finally store this comment for use by clients of libcpp. */
1026 store_comment (pfile
, token
);
1029 /* Allocate COUNT tokens for RUN. */
1031 _cpp_init_tokenrun (tokenrun
*run
, unsigned int count
)
1033 run
->base
= XNEWVEC (cpp_token
, count
);
1034 run
->limit
= run
->base
+ count
;
1038 /* Returns the next tokenrun, or creates one if there is none. */
1040 next_tokenrun (tokenrun
*run
)
1042 if (run
->next
== NULL
)
1044 run
->next
= XNEW (tokenrun
);
1045 run
->next
->prev
= run
;
1046 _cpp_init_tokenrun (run
->next
, 250);
1052 /* Look ahead in the input stream. */
1054 cpp_peek_token (cpp_reader
*pfile
, int index
)
1056 cpp_context
*context
= pfile
->context
;
1057 const cpp_token
*peektok
;
1060 /* First, scan through any pending cpp_context objects. */
1061 while (context
->prev
)
1063 ptrdiff_t sz
= (context
->direct_p
1064 ? LAST (context
).token
- FIRST (context
).token
1065 : LAST (context
).ptoken
- FIRST (context
).ptoken
);
1067 if (index
< (int) sz
)
1068 return (context
->direct_p
1069 ? FIRST (context
).token
+ index
1070 : *(FIRST (context
).ptoken
+ index
));
1073 context
= context
->prev
;
1076 /* We will have to read some new tokens after all (and do so
1077 without invalidating preceding tokens). */
1079 pfile
->keep_tokens
++;
1083 peektok
= _cpp_lex_token (pfile
);
1084 if (peektok
->type
== CPP_EOF
)
1089 _cpp_backup_tokens_direct (pfile
, count
+ 1);
1090 pfile
->keep_tokens
--;
1095 /* Allocate a single token that is invalidated at the same time as the
1096 rest of the tokens on the line. Has its line and col set to the
1097 same as the last lexed token, so that diagnostics appear in the
1100 _cpp_temp_token (cpp_reader
*pfile
)
1102 cpp_token
*old
, *result
;
1103 ptrdiff_t sz
= pfile
->cur_run
->limit
- pfile
->cur_token
;
1104 ptrdiff_t la
= (ptrdiff_t) pfile
->lookaheads
;
1106 old
= pfile
->cur_token
- 1;
1107 /* Any pre-existing lookaheads must not be clobbered. */
1112 tokenrun
*next
= next_tokenrun (pfile
->cur_run
);
1115 memmove (next
->base
+ 1, next
->base
,
1116 (la
- sz
) * sizeof (cpp_token
));
1118 next
->base
[0] = pfile
->cur_run
->limit
[-1];
1122 memmove (pfile
->cur_token
+ 1, pfile
->cur_token
,
1123 MIN (la
, sz
- 1) * sizeof (cpp_token
));
1126 if (!sz
&& pfile
->cur_token
== pfile
->cur_run
->limit
)
1128 pfile
->cur_run
= next_tokenrun (pfile
->cur_run
);
1129 pfile
->cur_token
= pfile
->cur_run
->base
;
1132 result
= pfile
->cur_token
++;
1133 result
->src_loc
= old
->src_loc
;
1137 /* Lex a token into RESULT (external interface). Takes care of issues
1138 like directive handling, token lookahead, multiple include
1139 optimization and skipping. */
1141 _cpp_lex_token (cpp_reader
*pfile
)
1147 if (pfile
->cur_token
== pfile
->cur_run
->limit
)
1149 pfile
->cur_run
= next_tokenrun (pfile
->cur_run
);
1150 pfile
->cur_token
= pfile
->cur_run
->base
;
1152 /* We assume that the current token is somewhere in the current
1154 if (pfile
->cur_token
< pfile
->cur_run
->base
1155 || pfile
->cur_token
>= pfile
->cur_run
->limit
)
1158 if (pfile
->lookaheads
)
1160 pfile
->lookaheads
--;
1161 result
= pfile
->cur_token
++;
1164 result
= _cpp_lex_direct (pfile
);
1166 if (result
->flags
& BOL
)
1168 /* Is this a directive. If _cpp_handle_directive returns
1169 false, it is an assembler #. */
1170 if (result
->type
== CPP_HASH
1171 /* 6.10.3 p 11: Directives in a list of macro arguments
1172 gives undefined behavior. This implementation
1173 handles the directive as normal. */
1174 && pfile
->state
.parsing_args
!= 1)
1176 if (_cpp_handle_directive (pfile
, result
->flags
& PREV_WHITE
))
1178 if (pfile
->directive_result
.type
== CPP_PADDING
)
1180 result
= &pfile
->directive_result
;
1183 else if (pfile
->state
.in_deferred_pragma
)
1184 result
= &pfile
->directive_result
;
1186 if (pfile
->cb
.line_change
&& !pfile
->state
.skipping
)
1187 pfile
->cb
.line_change (pfile
, result
, pfile
->state
.parsing_args
);
1190 /* We don't skip tokens in directives. */
1191 if (pfile
->state
.in_directive
|| pfile
->state
.in_deferred_pragma
)
1194 /* Outside a directive, invalidate controlling macros. At file
1195 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
1196 get here and MI optimization works. */
1197 pfile
->mi_valid
= false;
1199 if (!pfile
->state
.skipping
|| result
->type
== CPP_EOF
)
1206 /* Returns true if a fresh line has been loaded. */
1208 _cpp_get_fresh_line (cpp_reader
*pfile
)
1212 /* We can't get a new line until we leave the current directive. */
1213 if (pfile
->state
.in_directive
)
1218 cpp_buffer
*buffer
= pfile
->buffer
;
1220 if (!buffer
->need_line
)
1223 if (buffer
->next_line
< buffer
->rlimit
)
1225 _cpp_clean_line (pfile
);
1229 /* First, get out of parsing arguments state. */
1230 if (pfile
->state
.parsing_args
)
1233 /* End of buffer. Non-empty files should end in a newline. */
1234 if (buffer
->buf
!= buffer
->rlimit
1235 && buffer
->next_line
> buffer
->rlimit
1236 && !buffer
->from_stage3
)
1238 /* Clip to buffer size. */
1239 buffer
->next_line
= buffer
->rlimit
;
1242 return_at_eof
= buffer
->return_at_eof
;
1243 _cpp_pop_buffer (pfile
);
1244 if (pfile
->buffer
== NULL
|| return_at_eof
)
1249 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
1252 result->type = ELSE_TYPE; \
1253 if (*buffer->cur == CHAR) \
1254 buffer->cur++, result->type = THEN_TYPE; \
1258 /* Lex a token into pfile->cur_token, which is also incremented, to
1259 get diagnostics pointing to the correct location.
1261 Does not handle issues such as token lookahead, multiple-include
1262 optimization, directives, skipping etc. This function is only
1263 suitable for use by _cpp_lex_token, and in special cases like
1264 lex_expansion_token which doesn't care for any of these issues.
1266 When meeting a newline, returns CPP_EOF if parsing a directive,
1267 otherwise returns to the start of the token buffer if permissible.
1268 Returns the location of the lexed token. */
1270 _cpp_lex_direct (cpp_reader
*pfile
)
1274 const unsigned char *comment_start
;
1275 cpp_token
*result
= pfile
->cur_token
++;
1279 buffer
= pfile
->buffer
;
1280 if (buffer
->need_line
)
1282 if (pfile
->state
.in_deferred_pragma
)
1284 result
->type
= CPP_PRAGMA_EOL
;
1285 pfile
->state
.in_deferred_pragma
= false;
1286 if (!pfile
->state
.pragma_allow_expansion
)
1287 pfile
->state
.prevent_expansion
--;
1290 if (!_cpp_get_fresh_line (pfile
))
1292 result
->type
= CPP_EOF
;
1293 if (!pfile
->state
.in_directive
)
1295 /* Tell the compiler the line number of the EOF token. */
1296 result
->src_loc
= pfile
->line_table
->highest_line
;
1297 result
->flags
= BOL
;
1301 if (!pfile
->keep_tokens
)
1303 pfile
->cur_run
= &pfile
->base_run
;
1304 result
= pfile
->base_run
.base
;
1305 pfile
->cur_token
= result
+ 1;
1307 result
->flags
= BOL
;
1308 if (pfile
->state
.parsing_args
== 2)
1309 result
->flags
|= PREV_WHITE
;
1311 buffer
= pfile
->buffer
;
1313 result
->src_loc
= pfile
->line_table
->highest_line
;
1316 if (buffer
->cur
>= buffer
->notes
[buffer
->cur_note
].pos
1317 && !pfile
->overlaid_buffer
)
1319 _cpp_process_line_notes (pfile
, false);
1320 result
->src_loc
= pfile
->line_table
->highest_line
;
1324 LINEMAP_POSITION_FOR_COLUMN (result
->src_loc
, pfile
->line_table
,
1325 CPP_BUF_COLUMN (buffer
, buffer
->cur
));
1329 case ' ': case '\t': case '\f': case '\v': case '\0':
1330 result
->flags
|= PREV_WHITE
;
1331 skip_whitespace (pfile
, c
);
1335 if (buffer
->cur
< buffer
->rlimit
)
1336 CPP_INCREMENT_LINE (pfile
, 0);
1337 buffer
->need_line
= true;
1340 case '0': case '1': case '2': case '3': case '4':
1341 case '5': case '6': case '7': case '8': case '9':
1343 struct normalize_state nst
= INITIAL_NORMALIZE_STATE
;
1344 result
->type
= CPP_NUMBER
;
1345 lex_number (pfile
, &result
->val
.str
, &nst
);
1346 warn_about_normalization (pfile
, result
, &nst
);
1354 /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
1355 wide strings or raw strings. */
1356 if (c
== 'L' || CPP_OPTION (pfile
, uliterals
))
1358 if ((*buffer
->cur
== '\'' && c
!= 'R')
1359 || *buffer
->cur
== '"'
1360 || (*buffer
->cur
== 'R'
1362 && buffer
->cur
[1] == '"'
1363 && CPP_OPTION (pfile
, uliterals
))
1364 || (*buffer
->cur
== '8'
1366 && (buffer
->cur
[1] == '"'
1367 || (buffer
->cur
[1] == 'R' && buffer
->cur
[2] == '"'))))
1369 lex_string (pfile
, result
, buffer
->cur
- 1);
1376 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1377 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1378 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1379 case 's': case 't': case 'v': case 'w': case 'x':
1381 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1382 case 'G': case 'H': case 'I': case 'J': case 'K':
1383 case 'M': case 'N': case 'O': case 'P': case 'Q':
1384 case 'S': case 'T': case 'V': case 'W': case 'X':
1386 result
->type
= CPP_NAME
;
1388 struct normalize_state nst
= INITIAL_NORMALIZE_STATE
;
1389 result
->val
.node
.node
= lex_identifier (pfile
, buffer
->cur
- 1, false,
1391 warn_about_normalization (pfile
, result
, &nst
);
1394 /* Convert named operators to their proper types. */
1395 if (result
->val
.node
.node
->flags
& NODE_OPERATOR
)
1397 result
->flags
|= NAMED_OP
;
1398 result
->type
= (enum cpp_ttype
) result
->val
.node
.node
->directive_index
;
1404 lex_string (pfile
, result
, buffer
->cur
- 1);
1408 /* A potential block or line comment. */
1409 comment_start
= buffer
->cur
;
1414 if (_cpp_skip_block_comment (pfile
))
1415 cpp_error (pfile
, CPP_DL_ERROR
, "unterminated comment");
1417 else if (c
== '/' && (CPP_OPTION (pfile
, cplusplus_comments
)
1418 || cpp_in_system_header (pfile
)))
1420 /* Warn about comments only if pedantically GNUC89, and not
1421 in system headers. */
1422 if (CPP_OPTION (pfile
, lang
) == CLK_GNUC89
&& CPP_PEDANTIC (pfile
)
1423 && ! buffer
->warned_cplusplus_comments
)
1425 cpp_error (pfile
, CPP_DL_PEDWARN
,
1426 "C++ style comments are not allowed in ISO C90");
1427 cpp_error (pfile
, CPP_DL_PEDWARN
,
1428 "(this will be reported only once per input file)");
1429 buffer
->warned_cplusplus_comments
= 1;
1432 if (skip_line_comment (pfile
) && CPP_OPTION (pfile
, warn_comments
))
1433 cpp_error (pfile
, CPP_DL_WARNING
, "multi-line comment");
1438 result
->type
= CPP_DIV_EQ
;
1443 result
->type
= CPP_DIV
;
1447 if (!pfile
->state
.save_comments
)
1449 result
->flags
|= PREV_WHITE
;
1450 goto update_tokens_line
;
1453 /* Save the comment as a token in its own right. */
1454 save_comment (pfile
, result
, comment_start
, c
);
1458 if (pfile
->state
.angled_headers
)
1460 lex_string (pfile
, result
, buffer
->cur
- 1);
1461 if (result
->type
!= CPP_LESS
)
1465 result
->type
= CPP_LESS
;
1466 if (*buffer
->cur
== '=')
1467 buffer
->cur
++, result
->type
= CPP_LESS_EQ
;
1468 else if (*buffer
->cur
== '<')
1471 IF_NEXT_IS ('=', CPP_LSHIFT_EQ
, CPP_LSHIFT
);
1473 else if (CPP_OPTION (pfile
, digraphs
))
1475 if (*buffer
->cur
== ':')
1478 result
->flags
|= DIGRAPH
;
1479 result
->type
= CPP_OPEN_SQUARE
;
1481 else if (*buffer
->cur
== '%')
1484 result
->flags
|= DIGRAPH
;
1485 result
->type
= CPP_OPEN_BRACE
;
1491 result
->type
= CPP_GREATER
;
1492 if (*buffer
->cur
== '=')
1493 buffer
->cur
++, result
->type
= CPP_GREATER_EQ
;
1494 else if (*buffer
->cur
== '>')
1497 IF_NEXT_IS ('=', CPP_RSHIFT_EQ
, CPP_RSHIFT
);
1502 result
->type
= CPP_MOD
;
1503 if (*buffer
->cur
== '=')
1504 buffer
->cur
++, result
->type
= CPP_MOD_EQ
;
1505 else if (CPP_OPTION (pfile
, digraphs
))
1507 if (*buffer
->cur
== ':')
1510 result
->flags
|= DIGRAPH
;
1511 result
->type
= CPP_HASH
;
1512 if (*buffer
->cur
== '%' && buffer
->cur
[1] == ':')
1513 buffer
->cur
+= 2, result
->type
= CPP_PASTE
, result
->val
.token_no
= 0;
1515 else if (*buffer
->cur
== '>')
1518 result
->flags
|= DIGRAPH
;
1519 result
->type
= CPP_CLOSE_BRACE
;
1525 result
->type
= CPP_DOT
;
1526 if (ISDIGIT (*buffer
->cur
))
1528 struct normalize_state nst
= INITIAL_NORMALIZE_STATE
;
1529 result
->type
= CPP_NUMBER
;
1530 lex_number (pfile
, &result
->val
.str
, &nst
);
1531 warn_about_normalization (pfile
, result
, &nst
);
1533 else if (*buffer
->cur
== '.' && buffer
->cur
[1] == '.')
1534 buffer
->cur
+= 2, result
->type
= CPP_ELLIPSIS
;
1535 else if (*buffer
->cur
== '*' && CPP_OPTION (pfile
, cplusplus
))
1536 buffer
->cur
++, result
->type
= CPP_DOT_STAR
;
1540 result
->type
= CPP_PLUS
;
1541 if (*buffer
->cur
== '+')
1542 buffer
->cur
++, result
->type
= CPP_PLUS_PLUS
;
1543 else if (*buffer
->cur
== '=')
1544 buffer
->cur
++, result
->type
= CPP_PLUS_EQ
;
1548 result
->type
= CPP_MINUS
;
1549 if (*buffer
->cur
== '>')
1552 result
->type
= CPP_DEREF
;
1553 if (*buffer
->cur
== '*' && CPP_OPTION (pfile
, cplusplus
))
1554 buffer
->cur
++, result
->type
= CPP_DEREF_STAR
;
1556 else if (*buffer
->cur
== '-')
1557 buffer
->cur
++, result
->type
= CPP_MINUS_MINUS
;
1558 else if (*buffer
->cur
== '=')
1559 buffer
->cur
++, result
->type
= CPP_MINUS_EQ
;
1563 result
->type
= CPP_AND
;
1564 if (*buffer
->cur
== '&')
1565 buffer
->cur
++, result
->type
= CPP_AND_AND
;
1566 else if (*buffer
->cur
== '=')
1567 buffer
->cur
++, result
->type
= CPP_AND_EQ
;
1571 result
->type
= CPP_OR
;
1572 if (*buffer
->cur
== '|')
1573 buffer
->cur
++, result
->type
= CPP_OR_OR
;
1574 else if (*buffer
->cur
== '=')
1575 buffer
->cur
++, result
->type
= CPP_OR_EQ
;
1579 result
->type
= CPP_COLON
;
1580 if (*buffer
->cur
== ':' && CPP_OPTION (pfile
, cplusplus
))
1581 buffer
->cur
++, result
->type
= CPP_SCOPE
;
1582 else if (*buffer
->cur
== '>' && CPP_OPTION (pfile
, digraphs
))
1585 result
->flags
|= DIGRAPH
;
1586 result
->type
= CPP_CLOSE_SQUARE
;
1590 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ
, CPP_MULT
); break;
1591 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ
, CPP_EQ
); break;
1592 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ
, CPP_NOT
); break;
1593 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ
, CPP_XOR
); break;
1594 case '#': IF_NEXT_IS ('#', CPP_PASTE
, CPP_HASH
); result
->val
.token_no
= 0; break;
1596 case '?': result
->type
= CPP_QUERY
; break;
1597 case '~': result
->type
= CPP_COMPL
; break;
1598 case ',': result
->type
= CPP_COMMA
; break;
1599 case '(': result
->type
= CPP_OPEN_PAREN
; break;
1600 case ')': result
->type
= CPP_CLOSE_PAREN
; break;
1601 case '[': result
->type
= CPP_OPEN_SQUARE
; break;
1602 case ']': result
->type
= CPP_CLOSE_SQUARE
; break;
1603 case '{': result
->type
= CPP_OPEN_BRACE
; break;
1604 case '}': result
->type
= CPP_CLOSE_BRACE
; break;
1605 case ';': result
->type
= CPP_SEMICOLON
; break;
1607 /* @ is a punctuator in Objective-C. */
1608 case '@': result
->type
= CPP_ATSIGN
; break;
1613 const uchar
*base
= --buffer
->cur
;
1614 struct normalize_state nst
= INITIAL_NORMALIZE_STATE
;
1616 if (forms_identifier_p (pfile
, true, &nst
))
1618 result
->type
= CPP_NAME
;
1619 result
->val
.node
.node
= lex_identifier (pfile
, base
, true, &nst
);
1620 warn_about_normalization (pfile
, result
, &nst
);
1627 create_literal (pfile
, result
, buffer
->cur
- 1, 1, CPP_OTHER
);
1634 /* An upper bound on the number of bytes needed to spell TOKEN.
1635 Does not include preceding whitespace. */
1637 cpp_token_len (const cpp_token
*token
)
1641 switch (TOKEN_SPELL (token
))
1643 default: len
= 6; break;
1644 case SPELL_LITERAL
: len
= token
->val
.str
.len
; break;
1645 case SPELL_IDENT
: len
= NODE_LEN (token
->val
.node
.node
) * 10; break;
1651 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1652 Return the number of bytes read out of NAME. (There are always
1653 10 bytes written to BUFFER.) */
1656 utf8_to_ucn (unsigned char *buffer
, const unsigned char *name
)
1662 unsigned long utf32
;
1664 /* Compute the length of the UTF-8 sequence. */
1665 for (t
= *name
; t
& 0x80; t
<<= 1)
1668 utf32
= *name
& (0x7F >> ucn_len
);
1669 for (ucn_len_c
= 1; ucn_len_c
< ucn_len
; ucn_len_c
++)
1671 utf32
= (utf32
<< 6) | (*++name
& 0x3F);
1673 /* Ill-formed UTF-8. */
1674 if ((*name
& ~0x3F) != 0x80)
1680 for (j
= 7; j
>= 0; j
--)
1681 *buffer
++ = "0123456789abcdef"[(utf32
>> (4 * j
)) & 0xF];
1685 /* Given a token TYPE corresponding to a digraph, return a pointer to
1686 the spelling of the digraph. */
1687 static const unsigned char *
1688 cpp_digraph2name (enum cpp_ttype type
)
1690 return digraph_spellings
[(int) type
- (int) CPP_FIRST_DIGRAPH
];
1693 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1694 already contain the enough space to hold the token's spelling.
1695 Returns a pointer to the character after the last character written.
1696 FORSTRING is true if this is to be the spelling after translation
1697 phase 1 (this is different for UCNs).
1698 FIXME: Would be nice if we didn't need the PFILE argument. */
1700 cpp_spell_token (cpp_reader
*pfile
, const cpp_token
*token
,
1701 unsigned char *buffer
, bool forstring
)
1703 switch (TOKEN_SPELL (token
))
1705 case SPELL_OPERATOR
:
1707 const unsigned char *spelling
;
1710 if (token
->flags
& DIGRAPH
)
1711 spelling
= cpp_digraph2name (token
->type
);
1712 else if (token
->flags
& NAMED_OP
)
1715 spelling
= TOKEN_NAME (token
);
1717 while ((c
= *spelling
++) != '\0')
1726 memcpy (buffer
, NODE_NAME (token
->val
.node
.node
),
1727 NODE_LEN (token
->val
.node
.node
));
1728 buffer
+= NODE_LEN (token
->val
.node
.node
);
1733 const unsigned char * name
= NODE_NAME (token
->val
.node
.node
);
1735 for (i
= 0; i
< NODE_LEN (token
->val
.node
.node
); i
++)
1736 if (name
[i
] & ~0x7F)
1738 i
+= utf8_to_ucn (buffer
, name
+ i
) - 1;
1742 *buffer
++ = NODE_NAME (token
->val
.node
.node
)[i
];
1747 memcpy (buffer
, token
->val
.str
.text
, token
->val
.str
.len
);
1748 buffer
+= token
->val
.str
.len
;
1752 cpp_error (pfile
, CPP_DL_ICE
,
1753 "unspellable token %s", TOKEN_NAME (token
));
1760 /* Returns TOKEN spelt as a null-terminated string. The string is
1761 freed when the reader is destroyed. Useful for diagnostics. */
1763 cpp_token_as_text (cpp_reader
*pfile
, const cpp_token
*token
)
1765 unsigned int len
= cpp_token_len (token
) + 1;
1766 unsigned char *start
= _cpp_unaligned_alloc (pfile
, len
), *end
;
1768 end
= cpp_spell_token (pfile
, token
, start
, false);
1774 /* Returns a pointer to a string which spells the token defined by
1775 TYPE and FLAGS. Used by C front ends, which really should move to
1776 using cpp_token_as_text. */
1778 cpp_type2name (enum cpp_ttype type
, unsigned char flags
)
1780 if (flags
& DIGRAPH
)
1781 return (const char *) cpp_digraph2name (type
);
1782 else if (flags
& NAMED_OP
)
1783 return cpp_named_operator2name (type
);
1785 return (const char *) token_spellings
[type
].name
;
1788 /* Writes the spelling of token to FP, without any preceding space.
1789 Separated from cpp_spell_token for efficiency - to avoid stdio
1790 double-buffering. */
1792 cpp_output_token (const cpp_token
*token
, FILE *fp
)
1794 switch (TOKEN_SPELL (token
))
1796 case SPELL_OPERATOR
:
1798 const unsigned char *spelling
;
1801 if (token
->flags
& DIGRAPH
)
1802 spelling
= cpp_digraph2name (token
->type
);
1803 else if (token
->flags
& NAMED_OP
)
1806 spelling
= TOKEN_NAME (token
);
1811 while ((c
= *++spelling
) != '\0');
1819 const unsigned char * name
= NODE_NAME (token
->val
.node
.node
);
1821 for (i
= 0; i
< NODE_LEN (token
->val
.node
.node
); i
++)
1822 if (name
[i
] & ~0x7F)
1824 unsigned char buffer
[10];
1825 i
+= utf8_to_ucn (buffer
, name
+ i
) - 1;
1826 fwrite (buffer
, 1, 10, fp
);
1829 fputc (NODE_NAME (token
->val
.node
.node
)[i
], fp
);
1834 fwrite (token
->val
.str
.text
, 1, token
->val
.str
.len
, fp
);
1838 /* An error, most probably. */
1843 /* Compare two tokens. */
1845 _cpp_equiv_tokens (const cpp_token
*a
, const cpp_token
*b
)
1847 if (a
->type
== b
->type
&& a
->flags
== b
->flags
)
1848 switch (TOKEN_SPELL (a
))
1850 default: /* Keep compiler happy. */
1851 case SPELL_OPERATOR
:
1852 /* token_no is used to track where multiple consecutive ##
1853 tokens were originally located. */
1854 return (a
->type
!= CPP_PASTE
|| a
->val
.token_no
== b
->val
.token_no
);
1856 return (a
->type
!= CPP_MACRO_ARG
1857 || a
->val
.macro_arg
.arg_no
== b
->val
.macro_arg
.arg_no
);
1859 return a
->val
.node
.node
== b
->val
.node
.node
;
1861 return (a
->val
.str
.len
== b
->val
.str
.len
1862 && !memcmp (a
->val
.str
.text
, b
->val
.str
.text
,
1869 /* Returns nonzero if a space should be inserted to avoid an
1870 accidental token paste for output. For simplicity, it is
1871 conservative, and occasionally advises a space where one is not
1872 needed, e.g. "." and ".2". */
1874 cpp_avoid_paste (cpp_reader
*pfile
, const cpp_token
*token1
,
1875 const cpp_token
*token2
)
1877 enum cpp_ttype a
= token1
->type
, b
= token2
->type
;
1880 if (token1
->flags
& NAMED_OP
)
1882 if (token2
->flags
& NAMED_OP
)
1886 if (token2
->flags
& DIGRAPH
)
1887 c
= digraph_spellings
[(int) b
- (int) CPP_FIRST_DIGRAPH
][0];
1888 else if (token_spellings
[b
].category
== SPELL_OPERATOR
)
1889 c
= token_spellings
[b
].name
[0];
1891 /* Quickly get everything that can paste with an '='. */
1892 if ((int) a
<= (int) CPP_LAST_EQ
&& c
== '=')
1897 case CPP_GREATER
: return c
== '>';
1898 case CPP_LESS
: return c
== '<' || c
== '%' || c
== ':';
1899 case CPP_PLUS
: return c
== '+';
1900 case CPP_MINUS
: return c
== '-' || c
== '>';
1901 case CPP_DIV
: return c
== '/' || c
== '*'; /* Comments. */
1902 case CPP_MOD
: return c
== ':' || c
== '>';
1903 case CPP_AND
: return c
== '&';
1904 case CPP_OR
: return c
== '|';
1905 case CPP_COLON
: return c
== ':' || c
== '>';
1906 case CPP_DEREF
: return c
== '*';
1907 case CPP_DOT
: return c
== '.' || c
== '%' || b
== CPP_NUMBER
;
1908 case CPP_HASH
: return c
== '#' || c
== '%'; /* Digraph form. */
1909 case CPP_NAME
: return ((b
== CPP_NUMBER
1910 && name_p (pfile
, &token2
->val
.str
))
1912 || b
== CPP_CHAR
|| b
== CPP_STRING
); /* L */
1913 case CPP_NUMBER
: return (b
== CPP_NUMBER
|| b
== CPP_NAME
1914 || c
== '.' || c
== '+' || c
== '-');
1916 case CPP_OTHER
: return ((token1
->val
.str
.text
[0] == '\\'
1918 || (CPP_OPTION (pfile
, objc
)
1919 && token1
->val
.str
.text
[0] == '@'
1920 && (b
== CPP_NAME
|| b
== CPP_STRING
)));
1927 /* Output all the remaining tokens on the current line, and a newline
1928 character, to FP. Leading whitespace is removed. If there are
1929 macros, special token padding is not performed. */
1931 cpp_output_line (cpp_reader
*pfile
, FILE *fp
)
1933 const cpp_token
*token
;
1935 token
= cpp_get_token (pfile
);
1936 while (token
->type
!= CPP_EOF
)
1938 cpp_output_token (token
, fp
);
1939 token
= cpp_get_token (pfile
);
1940 if (token
->flags
& PREV_WHITE
)
1947 /* Return a string representation of all the remaining tokens on the
1948 current line. The result is allocated using xmalloc and must be
1949 freed by the caller. */
1951 cpp_output_line_to_string (cpp_reader
*pfile
, const unsigned char *dir_name
)
1953 const cpp_token
*token
;
1954 unsigned int out
= dir_name
? ustrlen (dir_name
) : 0;
1955 unsigned int alloced
= 120 + out
;
1956 unsigned char *result
= (unsigned char *) xmalloc (alloced
);
1958 /* If DIR_NAME is empty, there are no initial contents. */
1961 sprintf ((char *) result
, "#%s ", dir_name
);
1965 token
= cpp_get_token (pfile
);
1966 while (token
->type
!= CPP_EOF
)
1968 unsigned char *last
;
1969 /* Include room for a possible space and the terminating nul. */
1970 unsigned int len
= cpp_token_len (token
) + 2;
1972 if (out
+ len
> alloced
)
1975 if (out
+ len
> alloced
)
1976 alloced
= out
+ len
;
1977 result
= (unsigned char *) xrealloc (result
, alloced
);
1980 last
= cpp_spell_token (pfile
, token
, &result
[out
], 0);
1981 out
= last
- result
;
1983 token
= cpp_get_token (pfile
);
1984 if (token
->flags
& PREV_WHITE
)
1985 result
[out
++] = ' ';
1992 /* Memory buffers. Changing these three constants can have a dramatic
1993 effect on performance. The values here are reasonable defaults,
1994 but might be tuned. If you adjust them, be sure to test across a
1995 range of uses of cpplib, including heavy nested function-like macro
1996 expansion. Also check the change in peak memory usage (NJAMD is a
1997 good tool for this). */
1998 #define MIN_BUFF_SIZE 8000
1999 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
2000 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2001 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2003 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2004 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2007 /* Create a new allocation buffer. Place the control block at the end
2008 of the buffer, so that buffer overflows will cause immediate chaos. */
2010 new_buff (size_t len
)
2013 unsigned char *base
;
2015 if (len
< MIN_BUFF_SIZE
)
2016 len
= MIN_BUFF_SIZE
;
2017 len
= CPP_ALIGN (len
);
2019 base
= XNEWVEC (unsigned char, len
+ sizeof (_cpp_buff
));
2020 result
= (_cpp_buff
*) (base
+ len
);
2021 result
->base
= base
;
2023 result
->limit
= base
+ len
;
2024 result
->next
= NULL
;
2028 /* Place a chain of unwanted allocation buffers on the free list. */
2030 _cpp_release_buff (cpp_reader
*pfile
, _cpp_buff
*buff
)
2032 _cpp_buff
*end
= buff
;
2036 end
->next
= pfile
->free_buffs
;
2037 pfile
->free_buffs
= buff
;
2040 /* Return a free buffer of size at least MIN_SIZE. */
2042 _cpp_get_buff (cpp_reader
*pfile
, size_t min_size
)
2044 _cpp_buff
*result
, **p
;
2046 for (p
= &pfile
->free_buffs
;; p
= &(*p
)->next
)
2051 return new_buff (min_size
);
2053 size
= result
->limit
- result
->base
;
2054 /* Return a buffer that's big enough, but don't waste one that's
2056 if (size
>= min_size
&& size
<= BUFF_SIZE_UPPER_BOUND (min_size
))
2061 result
->next
= NULL
;
2062 result
->cur
= result
->base
;
2066 /* Creates a new buffer with enough space to hold the uncommitted
2067 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
2068 the excess bytes to the new buffer. Chains the new buffer after
2069 BUFF, and returns the new buffer. */
2071 _cpp_append_extend_buff (cpp_reader
*pfile
, _cpp_buff
*buff
, size_t min_extra
)
2073 size_t size
= EXTENDED_BUFF_SIZE (buff
, min_extra
);
2074 _cpp_buff
*new_buff
= _cpp_get_buff (pfile
, size
);
2076 buff
->next
= new_buff
;
2077 memcpy (new_buff
->base
, buff
->cur
, BUFF_ROOM (buff
));
2081 /* Creates a new buffer with enough space to hold the uncommitted
2082 remaining bytes of the buffer pointed to by BUFF, and at least
2083 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
2084 Chains the new buffer before the buffer pointed to by BUFF, and
2085 updates the pointer to point to the new buffer. */
2087 _cpp_extend_buff (cpp_reader
*pfile
, _cpp_buff
**pbuff
, size_t min_extra
)
2089 _cpp_buff
*new_buff
, *old_buff
= *pbuff
;
2090 size_t size
= EXTENDED_BUFF_SIZE (old_buff
, min_extra
);
2092 new_buff
= _cpp_get_buff (pfile
, size
);
2093 memcpy (new_buff
->base
, old_buff
->cur
, BUFF_ROOM (old_buff
));
2094 new_buff
->next
= old_buff
;
2098 /* Free a chain of buffers starting at BUFF. */
2100 _cpp_free_buff (_cpp_buff
*buff
)
2104 for (; buff
; buff
= next
)
2111 /* Allocate permanent, unaligned storage of length LEN. */
2113 _cpp_unaligned_alloc (cpp_reader
*pfile
, size_t len
)
2115 _cpp_buff
*buff
= pfile
->u_buff
;
2116 unsigned char *result
= buff
->cur
;
2118 if (len
> (size_t) (buff
->limit
- result
))
2120 buff
= _cpp_get_buff (pfile
, len
);
2121 buff
->next
= pfile
->u_buff
;
2122 pfile
->u_buff
= buff
;
2126 buff
->cur
= result
+ len
;
2130 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2131 That buffer is used for growing allocations when saving macro
2132 replacement lists in a #define, and when parsing an answer to an
2133 assertion in #assert, #unassert or #if (and therefore possibly
2134 whilst expanding macros). It therefore must not be used by any
2135 code that they might call: specifically the lexer and the guts of
2138 All existing other uses clearly fit this restriction: storing
2139 registered pragmas during initialization. */
2141 _cpp_aligned_alloc (cpp_reader
*pfile
, size_t len
)
2143 _cpp_buff
*buff
= pfile
->a_buff
;
2144 unsigned char *result
= buff
->cur
;
2146 if (len
> (size_t) (buff
->limit
- result
))
2148 buff
= _cpp_get_buff (pfile
, len
);
2149 buff
->next
= pfile
->a_buff
;
2150 pfile
->a_buff
= buff
;
2154 buff
->cur
= result
+ len
;
2158 /* Say which field of TOK is in use. */
2160 enum cpp_token_fld_kind
2161 cpp_token_val_index (cpp_token
*tok
)
2163 switch (TOKEN_SPELL (tok
))
2166 return CPP_TOKEN_FLD_NODE
;
2168 return CPP_TOKEN_FLD_STR
;
2169 case SPELL_OPERATOR
:
2170 if (tok
->type
== CPP_PASTE
)
2171 return CPP_TOKEN_FLD_TOKEN_NO
;
2173 return CPP_TOKEN_FLD_NONE
;
2175 if (tok
->type
== CPP_MACRO_ARG
)
2176 return CPP_TOKEN_FLD_ARG_NO
;
2177 else if (tok
->type
== CPP_PADDING
)
2178 return CPP_TOKEN_FLD_SOURCE
;
2179 else if (tok
->type
== CPP_PRAGMA
)
2180 return CPP_TOKEN_FLD_PRAGMA
;
2181 /* else fall through */
2183 return CPP_TOKEN_FLD_NONE
;