1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
28 /* MULTIBYTE_CHARS support only works for native compilers.
29 ??? Ideally what we want is to model widechar support after
30 the current floating point support. */
32 #undef MULTIBYTE_CHARS
35 #ifdef MULTIBYTE_CHARS
40 /* Tokens with SPELL_STRING store their spelling in the token list,
41 and it's length in the token->val.name.len. */
54 enum spell_type category
;
55 const unsigned char *name
;
58 static const unsigned char *const digraph_spellings
[] =
59 { U
"%:", U
"%:%:", U
"<:", U
":>", U
"<%", U
"%>" };
61 #define OP(e, s) { SPELL_OPERATOR, U s },
62 #define TK(e, s) { s, U STRINGX (e) },
63 static const struct token_spelling token_spellings
[N_TTYPES
] = { TTYPE_TABLE
};
67 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
68 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
69 #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
71 static void handle_newline
PARAMS ((cpp_reader
*));
72 static cppchar_t skip_escaped_newlines
PARAMS ((cpp_reader
*));
73 static cppchar_t get_effective_char
PARAMS ((cpp_reader
*));
75 static int skip_block_comment
PARAMS ((cpp_reader
*));
76 static int skip_line_comment
PARAMS ((cpp_reader
*));
77 static void adjust_column
PARAMS ((cpp_reader
*));
78 static int skip_whitespace
PARAMS ((cpp_reader
*, cppchar_t
));
79 static cpp_hashnode
*parse_identifier
PARAMS ((cpp_reader
*));
80 static cpp_hashnode
*parse_identifier_slow
PARAMS ((cpp_reader
*,
82 static void parse_number
PARAMS ((cpp_reader
*, cpp_string
*, cppchar_t
, int));
83 static int unescaped_terminator_p
PARAMS ((cpp_reader
*, const U_CHAR
*));
84 static void parse_string
PARAMS ((cpp_reader
*, cpp_token
*, cppchar_t
));
85 static void unterminated
PARAMS ((cpp_reader
*, int));
86 static bool trigraph_p
PARAMS ((cpp_reader
*));
87 static void save_comment
PARAMS ((cpp_reader
*, cpp_token
*, const U_CHAR
*));
88 static int name_p
PARAMS ((cpp_reader
*, const cpp_string
*));
89 static int maybe_read_ucs
PARAMS ((cpp_reader
*, const unsigned char **,
90 const unsigned char *, unsigned int *));
91 static tokenrun
*next_tokenrun
PARAMS ((tokenrun
*));
93 static unsigned int hex_digit_value
PARAMS ((unsigned int));
94 static _cpp_buff
*new_buff
PARAMS ((size_t));
98 Compares, the token TOKEN to the NUL-terminated string STRING.
99 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
101 cpp_ideq (token
, string
)
102 const cpp_token
*token
;
105 if (token
->type
!= CPP_NAME
)
108 return !ustrcmp (NODE_NAME (token
->val
.node
), (const U_CHAR
*) string
);
111 /* Call when meeting a newline, assumed to be in buffer->cur[-1].
112 Returns with buffer->cur pointing to the character immediately
113 following the newline (combination). */
115 handle_newline (pfile
)
118 cpp_buffer
*buffer
= pfile
->buffer
;
120 /* Handle CR-LF and LF-CR. Most other implementations (e.g. java)
121 only accept CR-LF; maybe we should fall back to that behaviour? */
122 if (buffer
->cur
[-1] + buffer
->cur
[0] == '\r' + '\n')
125 buffer
->line_base
= buffer
->cur
;
126 buffer
->col_adjust
= 0;
130 /* Subroutine of skip_escaped_newlines; called when a 3-character
131 sequence beginning with "??" is encountered. buffer->cur points to
134 Warn if necessary, and returns true if the sequence forms a
135 trigraph and the trigraph should be honoured. */
140 cpp_buffer
*buffer
= pfile
->buffer
;
141 cppchar_t from_char
= buffer
->cur
[1];
144 if (!_cpp_trigraph_map
[from_char
])
147 accept
= CPP_OPTION (pfile
, trigraphs
);
149 /* Don't warn about trigraphs in comments. */
150 if (CPP_OPTION (pfile
, warn_trigraphs
) && !pfile
->state
.lexing_comment
)
153 cpp_warning_with_line (pfile
, pfile
->line
, CPP_BUF_COL (buffer
) - 1,
154 "trigraph ??%c converted to %c",
156 (int) _cpp_trigraph_map
[from_char
]);
157 else if (buffer
->cur
!= buffer
->last_Wtrigraphs
)
159 buffer
->last_Wtrigraphs
= buffer
->cur
;
160 cpp_warning_with_line (pfile
, pfile
->line
,
161 CPP_BUF_COL (buffer
) - 1,
162 "trigraph ??%c ignored", (int) from_char
);
169 /* Skips any escaped newlines introduced by '?' or a '\\', assumed to
170 lie in buffer->cur[-1]. Returns the next byte, which will be in
171 buffer->cur[-1]. This routine performs preprocessing stages 1 and
172 2 of the ISO C standard. */
174 skip_escaped_newlines (pfile
)
177 cpp_buffer
*buffer
= pfile
->buffer
;
178 cppchar_t next
= buffer
->cur
[-1];
180 /* Only do this if we apply stages 1 and 2. */
181 if (!buffer
->from_stage3
)
183 const unsigned char *saved_cur
;
190 if (buffer
->cur
[0] != '?' || !trigraph_p (pfile
))
193 /* Translate the trigraph. */
194 next
= _cpp_trigraph_map
[buffer
->cur
[1]];
200 if (buffer
->cur
== buffer
->rlimit
)
203 /* We have a backslash, and room for at least one more
204 character. Skip horizontal whitespace. */
205 saved_cur
= buffer
->cur
;
207 next1
= *buffer
->cur
++;
208 while (is_nvspace (next1
) && buffer
->cur
< buffer
->rlimit
);
210 if (!is_vspace (next1
))
212 buffer
->cur
= saved_cur
;
216 if (saved_cur
!= buffer
->cur
- 1
217 && !pfile
->state
.lexing_comment
)
218 cpp_warning (pfile
, "backslash and newline separated by space");
220 handle_newline (pfile
);
221 buffer
->backup_to
= buffer
->cur
;
222 if (buffer
->cur
== buffer
->rlimit
)
224 cpp_pedwarn (pfile
, "backslash-newline at end of file");
228 next
= *buffer
->cur
++;
230 while (next
== '\\' || next
== '?');
236 /* Obtain the next character, after trigraph conversion and skipping
237 an arbitrarily long string of escaped newlines. The common case of
238 no trigraphs or escaped newlines falls through quickly. On return,
239 buffer->backup_to points to where to return to if the character is
240 not to be processed. */
242 get_effective_char (pfile
)
246 cpp_buffer
*buffer
= pfile
->buffer
;
248 buffer
->backup_to
= buffer
->cur
;
249 next
= *buffer
->cur
++;
250 if (__builtin_expect (next
== '?' || next
== '\\', 0))
251 next
= skip_escaped_newlines (pfile
);
256 /* Skip a C-style block comment. We find the end of the comment by
257 seeing if an asterisk is before every '/' we encounter. Returns
258 non-zero if comment terminated by EOF, zero otherwise. */
260 skip_block_comment (pfile
)
263 cpp_buffer
*buffer
= pfile
->buffer
;
264 cppchar_t c
= EOF
, prevc
= EOF
;
266 pfile
->state
.lexing_comment
= 1;
267 while (buffer
->cur
!= buffer
->rlimit
)
269 prevc
= c
, c
= *buffer
->cur
++;
271 /* FIXME: For speed, create a new character class of characters
272 of interest inside block comments. */
273 if (c
== '?' || c
== '\\')
274 c
= skip_escaped_newlines (pfile
);
276 /* People like decorating comments with '*', so check for '/'
277 instead for efficiency. */
283 /* Warn about potential nested comments, but not if the '/'
284 comes immediately before the true comment delimiter.
285 Don't bother to get it right across escaped newlines. */
286 if (CPP_OPTION (pfile
, warn_comments
)
287 && buffer
->cur
[0] == '*' && buffer
->cur
[1] != '/')
288 cpp_warning_with_line (pfile
,
289 pfile
->line
, CPP_BUF_COL (buffer
),
290 "\"/*\" within comment");
292 else if (is_vspace (c
))
293 handle_newline (pfile
);
295 adjust_column (pfile
);
298 pfile
->state
.lexing_comment
= 0;
299 return c
!= '/' || prevc
!= '*';
302 /* Skip a C++ line comment, leaving buffer->cur pointing to the
303 terminating newline. Handles escaped newlines. Returns non-zero
304 if a multiline comment. */
306 skip_line_comment (pfile
)
309 cpp_buffer
*buffer
= pfile
->buffer
;
310 unsigned int orig_line
= pfile
->line
;
313 pfile
->state
.lexing_comment
= 1;
316 if (buffer
->cur
== buffer
->rlimit
)
320 if (c
== '?' || c
== '\\')
321 c
= skip_escaped_newlines (pfile
);
323 while (!is_vspace (c
));
325 /* Step back over the newline, except at EOF. */
329 pfile
->state
.lexing_comment
= 0;
330 return orig_line
!= pfile
->line
;
333 /* pfile->buffer->cur is one beyond the \t character. Update
334 col_adjust so we track the column correctly. */
336 adjust_column (pfile
)
339 cpp_buffer
*buffer
= pfile
->buffer
;
340 unsigned int col
= CPP_BUF_COL (buffer
) - 1; /* Zero-based column. */
342 /* Round it up to multiple of the tabstop, but subtract 1 since the
343 tab itself occupies a character position. */
344 buffer
->col_adjust
+= (CPP_OPTION (pfile
, tabstop
)
345 - col
% CPP_OPTION (pfile
, tabstop
)) - 1;
348 /* Skips whitespace, saving the next non-whitespace character.
349 Adjusts pfile->col_adjust to account for tabs. Without this,
350 tokens might be assigned an incorrect column. */
352 skip_whitespace (pfile
, c
)
356 cpp_buffer
*buffer
= pfile
->buffer
;
357 unsigned int warned
= 0;
361 /* Horizontal space always OK. */
365 adjust_column (pfile
);
366 /* Just \f \v or \0 left. */
369 if (buffer
->cur
- 1 == buffer
->rlimit
)
373 cpp_warning (pfile
, "null character(s) ignored");
377 else if (pfile
->state
.in_directive
&& CPP_PEDANTIC (pfile
))
378 cpp_pedwarn_with_line (pfile
, pfile
->line
,
379 CPP_BUF_COL (buffer
),
380 "%s in preprocessing directive",
381 c
== '\f' ? "form feed" : "vertical tab");
385 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
386 while (is_nvspace (c
));
392 /* See if the characters of a number token are valid in a name (no
395 name_p (pfile
, string
)
397 const cpp_string
*string
;
401 for (i
= 0; i
< string
->len
; i
++)
402 if (!is_idchar (string
->text
[i
]))
408 /* Parse an identifier, skipping embedded backslash-newlines. This is
409 a critical inner loop. The common case is an identifier which has
410 not been split by backslash-newline, does not contain a dollar
411 sign, and has already been scanned (roughly 10:1 ratio of
412 seen:unseen identifiers in normal code; the distribution is
413 Poisson-like). Second most common case is a new identifier, not
414 split and no dollar sign. The other possibilities are rare and
415 have been relegated to parse_identifier_slow. */
416 static cpp_hashnode
*
417 parse_identifier (pfile
)
420 cpp_hashnode
*result
;
423 /* Fast-path loop. Skim over a normal identifier.
424 N.B. ISIDNUM does not include $. */
425 cur
= pfile
->buffer
->cur
;
426 while (ISIDNUM (*cur
))
429 /* Check for slow-path cases. */
430 if (*cur
== '?' || *cur
== '\\' || *cur
== '$')
431 result
= parse_identifier_slow (pfile
, cur
);
434 const U_CHAR
*base
= pfile
->buffer
->cur
- 1;
435 result
= (cpp_hashnode
*)
436 ht_lookup (pfile
->hash_table
, base
, cur
- base
, HT_ALLOC
);
437 pfile
->buffer
->cur
= cur
;
440 /* Rarely, identifiers require diagnostics when lexed.
441 XXX Has to be forced out of the fast path. */
442 if (__builtin_expect ((result
->flags
& NODE_DIAGNOSTIC
)
443 && !pfile
->state
.skipping
, 0))
445 /* It is allowed to poison the same identifier twice. */
446 if ((result
->flags
& NODE_POISONED
) && !pfile
->state
.poisoned_ok
)
447 cpp_error (pfile
, "attempt to use poisoned \"%s\"",
450 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
451 replacement list of a variadic macro. */
452 if (result
== pfile
->spec_nodes
.n__VA_ARGS__
453 && !pfile
->state
.va_args_ok
)
455 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
461 /* Slow path. This handles identifiers which have been split, and
462 identifiers which contain dollar signs. The part of the identifier
463 from PFILE->buffer->cur-1 to CUR has already been scanned. */
464 static cpp_hashnode
*
465 parse_identifier_slow (pfile
, cur
)
469 cpp_buffer
*buffer
= pfile
->buffer
;
470 const U_CHAR
*base
= buffer
->cur
- 1;
471 struct obstack
*stack
= &pfile
->hash_table
->stack
;
472 unsigned int c
, saw_dollar
= 0, len
;
474 /* Copy the part of the token which is known to be okay. */
475 obstack_grow (stack
, base
, cur
- base
);
477 /* Now process the part which isn't. We are looking at one of
478 '$', '\\', or '?' on entry to this loop. */
483 while (is_idchar (c
))
485 obstack_1grow (stack
, c
);
493 /* Potential escaped newline? */
494 buffer
->backup_to
= buffer
->cur
- 1;
495 if (c
!= '?' && c
!= '\\')
497 c
= skip_escaped_newlines (pfile
);
499 while (is_idchar (c
));
501 /* Step back over the unwanted char. */
504 /* $ is not an identifier character in the standard, but is commonly
505 accepted as an extension. Don't warn about it in skipped
506 conditional blocks. */
507 if (saw_dollar
&& CPP_PEDANTIC (pfile
) && ! pfile
->state
.skipping
)
508 cpp_pedwarn (pfile
, "'$' character(s) in identifier");
510 /* Identifiers are null-terminated. */
511 len
= obstack_object_size (stack
);
512 obstack_1grow (stack
, '\0');
514 return (cpp_hashnode
*)
515 ht_lookup (pfile
->hash_table
, obstack_finish (stack
), len
, HT_ALLOCED
);
518 /* Parse a number, beginning with character C, skipping embedded
519 backslash-newlines. LEADING_PERIOD is non-zero if there was a "."
520 before C. Place the result in NUMBER. */
522 parse_number (pfile
, number
, c
, leading_period
)
528 cpp_buffer
*buffer
= pfile
->buffer
;
529 unsigned char *dest
, *limit
;
531 dest
= BUFF_FRONT (pfile
->u_buff
);
532 limit
= BUFF_LIMIT (pfile
->u_buff
);
534 /* Place a leading period. */
539 _cpp_extend_buff (pfile
, &pfile
->u_buff
, 1);
540 dest
= BUFF_FRONT (pfile
->u_buff
);
541 limit
= BUFF_LIMIT (pfile
->u_buff
);
550 /* Need room for terminating null. */
551 if ((size_t) (limit
- dest
) < 2)
553 size_t len_so_far
= dest
- BUFF_FRONT (pfile
->u_buff
);
554 _cpp_extend_buff (pfile
, &pfile
->u_buff
, 2);
555 dest
= BUFF_FRONT (pfile
->u_buff
) + len_so_far
;
556 limit
= BUFF_LIMIT (pfile
->u_buff
);
562 while (is_numchar (c
) || c
== '.' || VALID_SIGN (c
, dest
[-1]));
564 /* Potential escaped newline? */
565 buffer
->backup_to
= buffer
->cur
- 1;
566 if (c
!= '?' && c
!= '\\')
568 c
= skip_escaped_newlines (pfile
);
570 while (is_numchar (c
) || c
== '.' || VALID_SIGN (c
, dest
[-1]));
572 /* Step back over the unwanted char. */
575 /* Null-terminate the number. */
578 number
->text
= BUFF_FRONT (pfile
->u_buff
);
579 number
->len
= dest
- number
->text
;
580 BUFF_FRONT (pfile
->u_buff
) = dest
+ 1;
583 /* Subroutine of parse_string. Emits error for unterminated strings. */
585 unterminated (pfile
, term
)
589 cpp_error (pfile
, "missing terminating %c character", term
);
591 if (term
== '\"' && pfile
->mls_line
&& pfile
->mls_line
!= pfile
->line
)
593 cpp_error_with_line (pfile
, pfile
->mls_line
, pfile
->mls_col
,
594 "possible start of unterminated string literal");
599 /* Subroutine of parse_string. */
601 unescaped_terminator_p (pfile
, dest
)
603 const unsigned char *dest
;
605 const unsigned char *start
, *temp
;
607 /* In #include-style directives, terminators are not escapeable. */
608 if (pfile
->state
.angled_headers
)
611 start
= BUFF_FRONT (pfile
->u_buff
);
613 /* An odd number of consecutive backslashes represents an escaped
615 for (temp
= dest
; temp
> start
&& temp
[-1] == '\\'; temp
--)
618 return ((dest
- temp
) & 1) == 0;
621 /* Parses a string, character constant, or angle-bracketed header file
622 name. Handles embedded trigraphs and escaped newlines. The stored
623 string is guaranteed NUL-terminated, but it is not guaranteed that
624 this is the first NUL since embedded NULs are preserved.
625 Multi-line strings are allowed, but they are deprecated.
627 When this function returns, buffer->cur points to the next
628 character to be processed. */
630 parse_string (pfile
, token
, terminator
)
633 cppchar_t terminator
;
635 cpp_buffer
*buffer
= pfile
->buffer
;
636 unsigned char *dest
, *limit
;
638 bool warned_nulls
= false, warned_multi
= false;
640 dest
= BUFF_FRONT (pfile
->u_buff
);
641 limit
= BUFF_LIMIT (pfile
->u_buff
);
645 /* We need room for another char, possibly the terminating NUL. */
646 if ((size_t) (limit
- dest
) < 1)
648 size_t len_so_far
= dest
- BUFF_FRONT (pfile
->u_buff
);
649 _cpp_extend_buff (pfile
, &pfile
->u_buff
, 2);
650 dest
= BUFF_FRONT (pfile
->u_buff
) + len_so_far
;
651 limit
= BUFF_LIMIT (pfile
->u_buff
);
654 /* Handle trigraphs, escaped newlines etc. */
656 if (c
== '?' || c
== '\\')
657 c
= skip_escaped_newlines (pfile
);
661 if (unescaped_terminator_p (pfile
, dest
))
664 else if (is_vspace (c
))
666 /* In assembly language, silently terminate string and
667 character literals at end of line. This is a kludge
668 around not knowing where comments are. */
669 if (CPP_OPTION (pfile
, lang
) == CLK_ASM
&& terminator
!= '>')
675 /* Character constants and header names may not extend over
676 multiple lines. In Standard C, neither may strings.
677 Unfortunately, we accept multiline strings as an
678 extension, except in #include family directives. */
679 if (terminator
!= '"' || pfile
->state
.angled_headers
)
681 unterminated (pfile
, terminator
);
689 cpp_pedwarn (pfile
, "multi-line string literals are deprecated");
692 if (pfile
->mls_line
== 0)
694 pfile
->mls_line
= token
->line
;
695 pfile
->mls_col
= token
->col
;
698 handle_newline (pfile
);
703 if (buffer
->cur
- 1 == buffer
->rlimit
)
705 unterminated (pfile
, terminator
);
712 cpp_warning (pfile
, "null character(s) preserved in literal");
721 token
->val
.str
.text
= BUFF_FRONT (pfile
->u_buff
);
722 token
->val
.str
.len
= dest
- BUFF_FRONT (pfile
->u_buff
);
723 BUFF_FRONT (pfile
->u_buff
) = dest
+ 1;
726 /* The stored comment includes the comment start and any terminator. */
728 save_comment (pfile
, token
, from
)
731 const unsigned char *from
;
733 unsigned char *buffer
;
736 len
= pfile
->buffer
->cur
- from
+ 1; /* + 1 for the initial '/'. */
738 /* C++ comments probably (not definitely) have moved past a new
739 line, which we don't want to save in the comment. */
740 if (is_vspace (pfile
->buffer
->cur
[-1]))
742 buffer
= _cpp_unaligned_alloc (pfile
, len
);
744 token
->type
= CPP_COMMENT
;
745 token
->val
.str
.len
= len
;
746 token
->val
.str
.text
= buffer
;
749 memcpy (buffer
+ 1, from
, len
- 1);
752 /* Allocate COUNT tokens for RUN. */
754 _cpp_init_tokenrun (run
, count
)
758 run
->base
= xnewvec (cpp_token
, count
);
759 run
->limit
= run
->base
+ count
;
763 /* Returns the next tokenrun, or creates one if there is none. */
768 if (run
->next
== NULL
)
770 run
->next
= xnew (tokenrun
);
771 run
->next
->prev
= run
;
772 _cpp_init_tokenrun (run
->next
, 250);
778 /* Allocate a single token that is invalidated at the same time as the
779 rest of the tokens on the line. Has its line and col set to the
780 same as the last lexed token, so that diagnostics appear in the
783 _cpp_temp_token (pfile
)
786 cpp_token
*old
, *result
;
788 old
= pfile
->cur_token
- 1;
789 if (pfile
->cur_token
== pfile
->cur_run
->limit
)
791 pfile
->cur_run
= next_tokenrun (pfile
->cur_run
);
792 pfile
->cur_token
= pfile
->cur_run
->base
;
795 result
= pfile
->cur_token
++;
796 result
->line
= old
->line
;
797 result
->col
= old
->col
;
801 /* Lex a token into RESULT (external interface). Takes care of issues
802 like directive handling, token lookahead, multiple include
803 optimization and skipping. */
805 _cpp_lex_token (pfile
)
812 if (pfile
->cur_token
== pfile
->cur_run
->limit
)
814 pfile
->cur_run
= next_tokenrun (pfile
->cur_run
);
815 pfile
->cur_token
= pfile
->cur_run
->base
;
818 if (pfile
->lookaheads
)
821 result
= pfile
->cur_token
++;
824 result
= _cpp_lex_direct (pfile
);
826 if (result
->flags
& BOL
)
828 /* Is this a directive. If _cpp_handle_directive returns
829 false, it is an assembler #. */
830 if (result
->type
== CPP_HASH
831 && !pfile
->state
.parsing_args
832 && _cpp_handle_directive (pfile
, result
->flags
& PREV_WHITE
))
834 if (pfile
->cb
.line_change
&& !pfile
->state
.skipping
)
835 (*pfile
->cb
.line_change
)(pfile
, result
, pfile
->state
.parsing_args
);
838 /* We don't skip tokens in directives. */
839 if (pfile
->state
.in_directive
)
842 /* Outside a directive, invalidate controlling macros. At file
843 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
844 get here and MI optimisation works. */
845 pfile
->mi_valid
= false;
847 if (!pfile
->state
.skipping
|| result
->type
== CPP_EOF
)
854 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
856 if (get_effective_char (pfile) == CHAR) \
857 result->type = THEN_TYPE; \
861 result->type = ELSE_TYPE; \
865 /* Lex a token into pfile->cur_token, which is also incremented, to
866 get diagnostics pointing to the correct location.
868 Does not handle issues such as token lookahead, multiple-include
869 optimisation, directives, skipping etc. This function is only
870 suitable for use by _cpp_lex_token, and in special cases like
871 lex_expansion_token which doesn't care for any of these issues.
873 When meeting a newline, returns CPP_EOF if parsing a directive,
874 otherwise returns to the start of the token buffer if permissible.
875 Returns the location of the lexed token. */
877 _cpp_lex_direct (pfile
)
882 const unsigned char *comment_start
;
883 cpp_token
*result
= pfile
->cur_token
++;
886 buffer
= pfile
->buffer
;
887 result
->flags
= buffer
->saved_flags
;
888 buffer
->saved_flags
= 0;
890 result
->line
= pfile
->line
;
894 result
->col
= CPP_BUF_COLUMN (buffer
, buffer
->cur
);
899 case ' ': case '\t': case '\f': case '\v': case '\0':
900 result
->flags
|= PREV_WHITE
;
901 if (skip_whitespace (pfile
, c
))
906 buffer
->saved_flags
= BOL
;
907 if (!pfile
->state
.parsing_args
&& !pfile
->state
.in_directive
)
909 if (buffer
->cur
!= buffer
->line_base
)
911 /* Non-empty files should end in a newline. Don't warn
912 for command line and _Pragma buffers. */
913 if (!buffer
->from_stage3
)
914 cpp_pedwarn (pfile
, "no newline at end of file");
915 handle_newline (pfile
);
918 /* Don't pop the last buffer. */
921 unsigned char stop
= buffer
->return_at_eof
;
923 _cpp_pop_buffer (pfile
);
928 result
->type
= CPP_EOF
;
931 case '\n': case '\r':
932 handle_newline (pfile
);
933 buffer
->saved_flags
= BOL
;
934 if (! pfile
->state
.in_directive
)
936 if (pfile
->state
.parsing_args
== 2)
937 buffer
->saved_flags
|= PREV_WHITE
;
938 if (!pfile
->keep_tokens
)
940 pfile
->cur_run
= &pfile
->base_run
;
941 result
= pfile
->base_run
.base
;
942 pfile
->cur_token
= result
+ 1;
946 result
->type
= CPP_EOF
;
951 /* These could start an escaped newline, or '?' a trigraph. Let
952 skip_escaped_newlines do all the work. */
954 unsigned int line
= pfile
->line
;
956 c
= skip_escaped_newlines (pfile
);
957 if (line
!= pfile
->line
)
960 /* We had at least one escaped newline of some sort.
961 Update the token's line and column. */
962 goto update_tokens_line
;
966 /* We are either the original '?' or '\\', or a trigraph. */
968 result
->type
= CPP_QUERY
;
975 case '0': case '1': case '2': case '3': case '4':
976 case '5': case '6': case '7': case '8': case '9':
977 result
->type
= CPP_NUMBER
;
978 parse_number (pfile
, &result
->val
.str
, c
, 0);
982 /* 'L' may introduce wide characters or strings. */
984 const unsigned char *pos
= buffer
->cur
;
986 c
= get_effective_char (pfile
);
987 if (c
== '\'' || c
== '"')
989 result
->type
= (c
== '"' ? CPP_WSTRING
: CPP_WCHAR
);
990 parse_string (pfile
, result
, c
);
999 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1000 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1001 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1002 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1004 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1005 case 'G': case 'H': case 'I': case 'J': case 'K':
1006 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1007 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1009 result
->type
= CPP_NAME
;
1010 result
->val
.node
= parse_identifier (pfile
);
1012 /* Convert named operators to their proper types. */
1013 if (result
->val
.node
->flags
& NODE_OPERATOR
)
1015 result
->flags
|= NAMED_OP
;
1016 result
->type
= result
->val
.node
->value
.operator;
1022 result
->type
= c
== '"' ? CPP_STRING
: CPP_CHAR
;
1023 parse_string (pfile
, result
, c
);
1027 /* A potential block or line comment. */
1028 comment_start
= buffer
->cur
;
1029 c
= get_effective_char (pfile
);
1033 if (skip_block_comment (pfile
))
1034 cpp_error (pfile
, "unterminated comment");
1036 else if (c
== '/' && (CPP_OPTION (pfile
, cplusplus_comments
)
1037 || CPP_IN_SYSTEM_HEADER (pfile
)))
1039 /* Warn about comments only if pedantically GNUC89, and not
1040 in system headers. */
1041 if (CPP_OPTION (pfile
, lang
) == CLK_GNUC89
&& CPP_PEDANTIC (pfile
)
1042 && ! buffer
->warned_cplusplus_comments
)
1045 "C++ style comments are not allowed in ISO C89");
1047 "(this will be reported only once per input file)");
1048 buffer
->warned_cplusplus_comments
= 1;
1051 if (skip_line_comment (pfile
) && CPP_OPTION (pfile
, warn_comments
))
1052 cpp_warning (pfile
, "multi-line comment");
1056 result
->type
= CPP_DIV_EQ
;
1062 result
->type
= CPP_DIV
;
1066 if (!pfile
->state
.save_comments
)
1068 result
->flags
|= PREV_WHITE
;
1069 goto update_tokens_line
;
1072 /* Save the comment as a token in its own right. */
1073 save_comment (pfile
, result
, comment_start
);
1077 if (pfile
->state
.angled_headers
)
1079 result
->type
= CPP_HEADER_NAME
;
1080 parse_string (pfile
, result
, '>');
1084 c
= get_effective_char (pfile
);
1086 result
->type
= CPP_LESS_EQ
;
1088 IF_NEXT_IS ('=', CPP_LSHIFT_EQ
, CPP_LSHIFT
);
1089 else if (c
== '?' && CPP_OPTION (pfile
, cplusplus
))
1090 IF_NEXT_IS ('=', CPP_MIN_EQ
, CPP_MIN
);
1091 else if (c
== ':' && CPP_OPTION (pfile
, digraphs
))
1093 result
->type
= CPP_OPEN_SQUARE
;
1094 result
->flags
|= DIGRAPH
;
1096 else if (c
== '%' && CPP_OPTION (pfile
, digraphs
))
1098 result
->type
= CPP_OPEN_BRACE
;
1099 result
->flags
|= DIGRAPH
;
1104 result
->type
= CPP_LESS
;
1109 c
= get_effective_char (pfile
);
1111 result
->type
= CPP_GREATER_EQ
;
1113 IF_NEXT_IS ('=', CPP_RSHIFT_EQ
, CPP_RSHIFT
);
1114 else if (c
== '?' && CPP_OPTION (pfile
, cplusplus
))
1115 IF_NEXT_IS ('=', CPP_MAX_EQ
, CPP_MAX
);
1119 result
->type
= CPP_GREATER
;
1124 c
= get_effective_char (pfile
);
1126 result
->type
= CPP_MOD_EQ
;
1127 else if (CPP_OPTION (pfile
, digraphs
) && c
== ':')
1129 result
->flags
|= DIGRAPH
;
1130 result
->type
= CPP_HASH
;
1131 if (get_effective_char (pfile
) == '%')
1133 const unsigned char *pos
= buffer
->cur
;
1135 if (get_effective_char (pfile
) == ':')
1136 result
->type
= CPP_PASTE
;
1138 buffer
->cur
= pos
- 1;
1143 else if (CPP_OPTION (pfile
, digraphs
) && c
== '>')
1145 result
->flags
|= DIGRAPH
;
1146 result
->type
= CPP_CLOSE_BRACE
;
1151 result
->type
= CPP_MOD
;
1156 result
->type
= CPP_DOT
;
1157 c
= get_effective_char (pfile
);
1160 const unsigned char *pos
= buffer
->cur
;
1162 if (get_effective_char (pfile
) == '.')
1163 result
->type
= CPP_ELLIPSIS
;
1165 buffer
->cur
= pos
- 1;
1167 /* All known character sets have 0...9 contiguous. */
1168 else if (ISDIGIT (c
))
1170 result
->type
= CPP_NUMBER
;
1171 parse_number (pfile
, &result
->val
.str
, c
, 1);
1173 else if (c
== '*' && CPP_OPTION (pfile
, cplusplus
))
1174 result
->type
= CPP_DOT_STAR
;
1180 c
= get_effective_char (pfile
);
1182 result
->type
= CPP_PLUS_PLUS
;
1184 result
->type
= CPP_PLUS_EQ
;
1188 result
->type
= CPP_PLUS
;
1193 c
= get_effective_char (pfile
);
1196 result
->type
= CPP_DEREF
;
1197 if (CPP_OPTION (pfile
, cplusplus
))
1199 if (get_effective_char (pfile
) == '*')
1200 result
->type
= CPP_DEREF_STAR
;
1206 result
->type
= CPP_MINUS_MINUS
;
1208 result
->type
= CPP_MINUS_EQ
;
1212 result
->type
= CPP_MINUS
;
1217 c
= get_effective_char (pfile
);
1219 result
->type
= CPP_AND_AND
;
1221 result
->type
= CPP_AND_EQ
;
1225 result
->type
= CPP_AND
;
1230 c
= get_effective_char (pfile
);
1232 result
->type
= CPP_OR_OR
;
1234 result
->type
= CPP_OR_EQ
;
1238 result
->type
= CPP_OR
;
1243 c
= get_effective_char (pfile
);
1244 if (c
== ':' && CPP_OPTION (pfile
, cplusplus
))
1245 result
->type
= CPP_SCOPE
;
1246 else if (c
== '>' && CPP_OPTION (pfile
, digraphs
))
1248 result
->flags
|= DIGRAPH
;
1249 result
->type
= CPP_CLOSE_SQUARE
;
1254 result
->type
= CPP_COLON
;
1258 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ
, CPP_MULT
); break;
1259 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ
, CPP_EQ
); break;
1260 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ
, CPP_NOT
); break;
1261 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ
, CPP_XOR
); break;
1262 case '#': IF_NEXT_IS ('#', CPP_PASTE
, CPP_HASH
); break;
1264 case '~': result
->type
= CPP_COMPL
; break;
1265 case ',': result
->type
= CPP_COMMA
; break;
1266 case '(': result
->type
= CPP_OPEN_PAREN
; break;
1267 case ')': result
->type
= CPP_CLOSE_PAREN
; break;
1268 case '[': result
->type
= CPP_OPEN_SQUARE
; break;
1269 case ']': result
->type
= CPP_CLOSE_SQUARE
; break;
1270 case '{': result
->type
= CPP_OPEN_BRACE
; break;
1271 case '}': result
->type
= CPP_CLOSE_BRACE
; break;
1272 case ';': result
->type
= CPP_SEMICOLON
; break;
1274 /* @ is a punctuator in Objective C. */
1275 case '@': result
->type
= CPP_ATSIGN
; break;
1278 if (CPP_OPTION (pfile
, dollars_in_ident
))
1280 /* Fall through... */
1284 result
->type
= CPP_OTHER
;
1292 /* An upper bound on the number of bytes needed to spell TOKEN,
1293 including preceding whitespace. */
1295 cpp_token_len (token
)
1296 const cpp_token
*token
;
1300 switch (TOKEN_SPELL (token
))
1302 default: len
= 0; break;
1304 case SPELL_STRING
: len
= token
->val
.str
.len
; break;
1305 case SPELL_IDENT
: len
= NODE_LEN (token
->val
.node
); break;
1307 /* 1 for whitespace, 4 for comment delimiters. */
1311 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1312 already contain the enough space to hold the token's spelling.
1313 Returns a pointer to the character after the last character
1316 cpp_spell_token (pfile
, token
, buffer
)
1317 cpp_reader
*pfile
; /* Would be nice to be rid of this... */
1318 const cpp_token
*token
;
1319 unsigned char *buffer
;
1321 switch (TOKEN_SPELL (token
))
1323 case SPELL_OPERATOR
:
1325 const unsigned char *spelling
;
1328 if (token
->flags
& DIGRAPH
)
1330 = digraph_spellings
[(int) token
->type
- (int) CPP_FIRST_DIGRAPH
];
1331 else if (token
->flags
& NAMED_OP
)
1334 spelling
= TOKEN_NAME (token
);
1336 while ((c
= *spelling
++) != '\0')
1342 *buffer
++ = token
->val
.c
;
1347 memcpy (buffer
, NODE_NAME (token
->val
.node
), NODE_LEN (token
->val
.node
));
1348 buffer
+= NODE_LEN (token
->val
.node
);
1352 memcpy (buffer
, token
->val
.str
.text
, token
->val
.str
.len
);
1353 buffer
+= token
->val
.str
.len
;
1358 int left
, right
, tag
;
1359 switch (token
->type
)
1361 case CPP_STRING
: left
= '"'; right
= '"'; tag
= '\0'; break;
1362 case CPP_WSTRING
: left
= '"'; right
= '"'; tag
= 'L'; break;
1363 case CPP_CHAR
: left
= '\''; right
= '\''; tag
= '\0'; break;
1364 case CPP_WCHAR
: left
= '\''; right
= '\''; tag
= 'L'; break;
1365 case CPP_HEADER_NAME
: left
= '<'; right
= '>'; tag
= '\0'; break;
1367 cpp_ice (pfile
, "unknown string token %s\n", TOKEN_NAME (token
));
1370 if (tag
) *buffer
++ = tag
;
1372 memcpy (buffer
, token
->val
.str
.text
, token
->val
.str
.len
);
1373 buffer
+= token
->val
.str
.len
;
1379 cpp_ice (pfile
, "unspellable token %s", TOKEN_NAME (token
));
1386 /* Returns TOKEN spelt as a null-terminated string. The string is
1387 freed when the reader is destroyed. Useful for diagnostics. */
1389 cpp_token_as_text (pfile
, token
)
1391 const cpp_token
*token
;
1393 unsigned int len
= cpp_token_len (token
);
1394 unsigned char *start
= _cpp_unaligned_alloc (pfile
, len
), *end
;
1396 end
= cpp_spell_token (pfile
, token
, start
);
1402 /* Used by C front ends, which really should move to using
1403 cpp_token_as_text. */
1405 cpp_type2name (type
)
1406 enum cpp_ttype type
;
1408 return (const char *) token_spellings
[type
].name
;
1411 /* Writes the spelling of token to FP, without any preceding space.
1412 Separated from cpp_spell_token for efficiency - to avoid stdio
1413 double-buffering. */
1415 cpp_output_token (token
, fp
)
1416 const cpp_token
*token
;
1419 switch (TOKEN_SPELL (token
))
1421 case SPELL_OPERATOR
:
1423 const unsigned char *spelling
;
1426 if (token
->flags
& DIGRAPH
)
1428 = digraph_spellings
[(int) token
->type
- (int) CPP_FIRST_DIGRAPH
];
1429 else if (token
->flags
& NAMED_OP
)
1432 spelling
= TOKEN_NAME (token
);
1437 while ((c
= *++spelling
) != '\0');
1442 putc (token
->val
.c
, fp
);
1447 fwrite (NODE_NAME (token
->val
.node
), 1, NODE_LEN (token
->val
.node
), fp
);
1451 fwrite (token
->val
.str
.text
, 1, token
->val
.str
.len
, fp
);
1456 int left
, right
, tag
;
1457 switch (token
->type
)
1459 case CPP_STRING
: left
= '"'; right
= '"'; tag
= '\0'; break;
1460 case CPP_WSTRING
: left
= '"'; right
= '"'; tag
= 'L'; break;
1461 case CPP_CHAR
: left
= '\''; right
= '\''; tag
= '\0'; break;
1462 case CPP_WCHAR
: left
= '\''; right
= '\''; tag
= 'L'; break;
1463 case CPP_HEADER_NAME
: left
= '<'; right
= '>'; tag
= '\0'; break;
1465 fprintf (stderr
, "impossible STRING token %s\n", TOKEN_NAME (token
));
1468 if (tag
) putc (tag
, fp
);
1470 fwrite (token
->val
.str
.text
, 1, token
->val
.str
.len
, fp
);
1476 /* An error, most probably. */
1481 /* Compare two tokens. */
1483 _cpp_equiv_tokens (a
, b
)
1484 const cpp_token
*a
, *b
;
1486 if (a
->type
== b
->type
&& a
->flags
== b
->flags
)
1487 switch (TOKEN_SPELL (a
))
1489 default: /* Keep compiler happy. */
1490 case SPELL_OPERATOR
:
1493 return a
->val
.c
== b
->val
.c
; /* Character. */
1495 return (a
->type
!= CPP_MACRO_ARG
|| a
->val
.arg_no
== b
->val
.arg_no
);
1497 return a
->val
.node
== b
->val
.node
;
1500 return (a
->val
.str
.len
== b
->val
.str
.len
1501 && !memcmp (a
->val
.str
.text
, b
->val
.str
.text
,
1508 /* Returns nonzero if a space should be inserted to avoid an
1509 accidental token paste for output. For simplicity, it is
1510 conservative, and occasionally advises a space where one is not
1511 needed, e.g. "." and ".2". */
1513 cpp_avoid_paste (pfile
, token1
, token2
)
1515 const cpp_token
*token1
, *token2
;
1517 enum cpp_ttype a
= token1
->type
, b
= token2
->type
;
1520 if (token1
->flags
& NAMED_OP
)
1522 if (token2
->flags
& NAMED_OP
)
1526 if (token2
->flags
& DIGRAPH
)
1527 c
= digraph_spellings
[(int) b
- (int) CPP_FIRST_DIGRAPH
][0];
1528 else if (token_spellings
[b
].category
== SPELL_OPERATOR
)
1529 c
= token_spellings
[b
].name
[0];
1531 /* Quickly get everything that can paste with an '='. */
1532 if ((int) a
<= (int) CPP_LAST_EQ
&& c
== '=')
1537 case CPP_GREATER
: return c
== '>' || c
== '?';
1538 case CPP_LESS
: return c
== '<' || c
== '?' || c
== '%' || c
== ':';
1539 case CPP_PLUS
: return c
== '+';
1540 case CPP_MINUS
: return c
== '-' || c
== '>';
1541 case CPP_DIV
: return c
== '/' || c
== '*'; /* Comments. */
1542 case CPP_MOD
: return c
== ':' || c
== '>';
1543 case CPP_AND
: return c
== '&';
1544 case CPP_OR
: return c
== '|';
1545 case CPP_COLON
: return c
== ':' || c
== '>';
1546 case CPP_DEREF
: return c
== '*';
1547 case CPP_DOT
: return c
== '.' || c
== '%' || b
== CPP_NUMBER
;
1548 case CPP_HASH
: return c
== '#' || c
== '%'; /* Digraph form. */
1549 case CPP_NAME
: return ((b
== CPP_NUMBER
1550 && name_p (pfile
, &token2
->val
.str
))
1552 || b
== CPP_CHAR
|| b
== CPP_STRING
); /* L */
1553 case CPP_NUMBER
: return (b
== CPP_NUMBER
|| b
== CPP_NAME
1554 || c
== '.' || c
== '+' || c
== '-');
1555 case CPP_OTHER
: return (CPP_OPTION (pfile
, objc
)
1556 && token1
->val
.c
== '@'
1557 && (b
== CPP_NAME
|| b
== CPP_STRING
));
1564 /* Output all the remaining tokens on the current line, and a newline
1565 character, to FP. Leading whitespace is removed. If there are
1566 macros, special token padding is not performed. */
1568 cpp_output_line (pfile
, fp
)
1572 const cpp_token
*token
;
1574 token
= cpp_get_token (pfile
);
1575 while (token
->type
!= CPP_EOF
)
1577 cpp_output_token (token
, fp
);
1578 token
= cpp_get_token (pfile
);
1579 if (token
->flags
& PREV_WHITE
)
1586 /* Returns the value of a hexadecimal digit. */
1592 return hex_value (c
);
1597 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1598 failure if cpplib is not parsing C++ or C99. Such failure is
1599 silent, and no variables are updated. Otherwise returns 0, and
1600 warns if -Wtraditional.
1602 [lex.charset]: The character designated by the universal character
1603 name \UNNNNNNNN is that character whose character short name in
1604 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1605 universal character name \uNNNN is that character whose character
1606 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1607 for a universal character name is less than 0x20 or in the range
1608 0x7F-0x9F (inclusive), or if the universal character name
1609 designates a character in the basic source character set, then the
1610 program is ill-formed.
1612 We assume that wchar_t is Unicode, so we don't need to do any
1613 mapping. Is this ever wrong?
1615 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1616 LIMIT is the end of the string or charconst. PSTR is updated to
1617 point after the UCS on return, and the UCS is written into PC. */
1620 maybe_read_ucs (pfile
, pstr
, limit
, pc
)
1622 const unsigned char **pstr
;
1623 const unsigned char *limit
;
1626 const unsigned char *p
= *pstr
;
1627 unsigned int code
= 0;
1628 unsigned int c
= *pc
, length
;
1630 /* Only attempt to interpret a UCS for C++ and C99. */
1631 if (! (CPP_OPTION (pfile
, cplusplus
) || CPP_OPTION (pfile
, c99
)))
1634 if (CPP_WTRADITIONAL (pfile
))
1635 cpp_warning (pfile
, "the meaning of '\\%c' varies with -traditional", c
);
1637 length
= (c
== 'u' ? 4: 8);
1639 if ((size_t) (limit
- p
) < length
)
1641 cpp_error (pfile
, "incomplete universal-character-name");
1642 /* Skip to the end to avoid more diagnostics. */
1647 for (; length
; length
--, p
++)
1651 code
= (code
<< 4) + hex_digit_value (c
);
1655 "non-hex digit '%c' in universal-character-name", c
);
1656 /* We shouldn't skip in case there are multibyte chars. */
1662 #ifdef TARGET_EBCDIC
1663 cpp_error (pfile
, "universal-character-name on EBCDIC target");
1664 code
= 0x3f; /* EBCDIC invalid character */
1666 /* True extended characters are OK. */
1668 && !(code
& 0x80000000)
1669 && !(code
>= 0xD800 && code
<= 0xDFFF))
1671 /* The standard permits $, @ and ` to be specified as UCNs. We use
1672 hex escapes so that this also works with EBCDIC hosts. */
1673 else if (code
== 0x24 || code
== 0x40 || code
== 0x60)
1675 /* Don't give another error if one occurred above. */
1676 else if (length
== 0)
1677 cpp_error (pfile
, "universal-character-name out of range");
1685 /* Interpret an escape sequence, and return its value. PSTR points to
1686 the input pointer, which is just after the backslash. LIMIT is how
1687 much text we have. MASK is a bitmask for the precision for the
1688 destination type (char or wchar_t). TRADITIONAL, if true, does not
1689 interpret escapes that did not exist in traditional C.
1691 Handles all relevant diagnostics. */
1693 cpp_parse_escape (pfile
, pstr
, limit
, mask
, traditional
)
1695 const unsigned char **pstr
;
1696 const unsigned char *limit
;
1697 unsigned HOST_WIDE_INT mask
;
1701 const unsigned char *str
= *pstr
;
1702 unsigned int c
= *str
++;
1706 case '\\': case '\'': case '"': case '?': break;
1707 case 'b': c
= TARGET_BS
; break;
1708 case 'f': c
= TARGET_FF
; break;
1709 case 'n': c
= TARGET_NEWLINE
; break;
1710 case 'r': c
= TARGET_CR
; break;
1711 case 't': c
= TARGET_TAB
; break;
1712 case 'v': c
= TARGET_VT
; break;
1714 case '(': case '{': case '[': case '%':
1715 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1716 '\%' is used to prevent SCCS from getting confused. */
1717 unknown
= CPP_PEDANTIC (pfile
);
1721 if (CPP_WTRADITIONAL (pfile
))
1722 cpp_warning (pfile
, "the meaning of '\\a' varies with -traditional");
1728 if (CPP_PEDANTIC (pfile
))
1729 cpp_pedwarn (pfile
, "non-ISO-standard escape sequence, '\\%c'", c
);
1734 unknown
= maybe_read_ucs (pfile
, &str
, limit
, &c
);
1738 if (CPP_WTRADITIONAL (pfile
))
1739 cpp_warning (pfile
, "the meaning of '\\x' varies with -traditional");
1743 unsigned int i
= 0, overflow
= 0;
1744 int digits_found
= 0;
1752 overflow
|= i
^ (i
<< 4 >> 4);
1753 i
= (i
<< 4) + hex_digit_value (c
);
1758 cpp_error (pfile
, "\\x used with no following hex digits");
1760 if (overflow
| (i
!= (i
& mask
)))
1762 cpp_pedwarn (pfile
, "hex escape sequence out of range");
1769 case '0': case '1': case '2': case '3':
1770 case '4': case '5': case '6': case '7':
1772 unsigned int i
= c
- '0';
1775 while (str
< limit
&& ++count
< 3)
1778 if (c
< '0' || c
> '7')
1781 i
= (i
<< 3) + c
- '0';
1784 if (i
!= (i
& mask
))
1786 cpp_pedwarn (pfile
, "octal escape sequence out of range");
1801 cpp_pedwarn (pfile
, "unknown escape sequence '\\%c'", c
);
1803 cpp_pedwarn (pfile
, "unknown escape sequence: '\\%03o'", c
);
1807 cpp_pedwarn (pfile
, "escape sequence out of range for character");
1813 #ifndef MAX_CHAR_TYPE_SIZE
1814 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1817 #ifndef MAX_WCHAR_TYPE_SIZE
1818 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1821 /* Interpret a (possibly wide) character constant in TOKEN.
1822 WARN_MULTI warns about multi-character charconsts, if not
1823 TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
1824 that did not exist in traditional C. PCHARS_SEEN points to a
1825 variable that is filled in with the number of characters seen. */
1827 cpp_interpret_charconst (pfile
, token
, warn_multi
, traditional
, pchars_seen
)
1829 const cpp_token
*token
;
1832 unsigned int *pchars_seen
;
1834 const unsigned char *str
= token
->val
.str
.text
;
1835 const unsigned char *limit
= str
+ token
->val
.str
.len
;
1836 unsigned int chars_seen
= 0;
1837 unsigned int width
, max_chars
, c
;
1838 unsigned HOST_WIDE_INT mask
;
1839 HOST_WIDE_INT result
= 0;
1842 #ifdef MULTIBYTE_CHARS
1843 (void) local_mbtowc (NULL
, NULL
, 0);
1846 /* Width in bits. */
1847 if (token
->type
== CPP_CHAR
)
1849 width
= MAX_CHAR_TYPE_SIZE
;
1850 unsigned_p
= CPP_OPTION (pfile
, signed_char
) == 0;
1854 width
= MAX_WCHAR_TYPE_SIZE
;
1855 unsigned_p
= WCHAR_UNSIGNED
;
1858 if (width
< HOST_BITS_PER_WIDE_INT
)
1859 mask
= ((unsigned HOST_WIDE_INT
) 1 << width
) - 1;
1862 max_chars
= HOST_BITS_PER_WIDE_INT
/ width
;
1866 #ifdef MULTIBYTE_CHARS
1870 char_len
= local_mbtowc (&wc
, str
, limit
- str
);
1873 cpp_warning (pfile
, "ignoring invalid multibyte character");
1886 c
= cpp_parse_escape (pfile
, &str
, limit
, mask
, traditional
);
1888 #ifdef MAP_CHARACTER
1890 c
= MAP_CHARACTER (c
);
1893 /* Merge character into result; ignore excess chars. */
1894 if (++chars_seen
<= max_chars
)
1896 if (width
< HOST_BITS_PER_WIDE_INT
)
1897 result
= (result
<< width
) | (c
& mask
);
1903 if (chars_seen
== 0)
1904 cpp_error (pfile
, "empty character constant");
1905 else if (chars_seen
> max_chars
)
1907 chars_seen
= max_chars
;
1908 cpp_warning (pfile
, "character constant too long");
1910 else if (chars_seen
> 1 && !traditional
&& warn_multi
)
1911 cpp_warning (pfile
, "multi-character character constant");
1913 /* If relevant type is signed, sign-extend the constant. */
1916 unsigned int nbits
= chars_seen
* width
;
1918 mask
= (unsigned HOST_WIDE_INT
) ~0 >> (HOST_BITS_PER_WIDE_INT
- nbits
);
1919 if (unsigned_p
|| ((result
>> (nbits
- 1)) & 1) == 0)
1925 *pchars_seen
= chars_seen
;
1929 /* Memory buffers. Changing these three constants can have a dramatic
1930 effect on performance. The values here are reasonable defaults,
1931 but might be tuned. If you adjust them, be sure to test across a
1932 range of uses of cpplib, including heavy nested function-like macro
1933 expansion. Also check the change in peak memory usage (NJAMD is a
1934 good tool for this). */
1935 #define MIN_BUFF_SIZE 8000
1936 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1937 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1938 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1940 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1941 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1954 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
1955 #define CPP_ALIGN(size, align) (((size) + ((align) - 1)) & ~((align) - 1))
1957 /* Create a new allocation buffer. Place the control block at the end
1958 of the buffer, so that buffer overflows will cause immediate chaos. */
1964 unsigned char *base
;
1966 if (len
< MIN_BUFF_SIZE
)
1967 len
= MIN_BUFF_SIZE
;
1968 len
= CPP_ALIGN (len
, DEFAULT_ALIGNMENT
);
1970 base
= xmalloc (len
+ sizeof (_cpp_buff
));
1971 result
= (_cpp_buff
*) (base
+ len
);
1972 result
->base
= base
;
1974 result
->limit
= base
+ len
;
1975 result
->next
= NULL
;
1979 /* Place a chain of unwanted allocation buffers on the free list. */
1981 _cpp_release_buff (pfile
, buff
)
1985 _cpp_buff
*end
= buff
;
1989 end
->next
= pfile
->free_buffs
;
1990 pfile
->free_buffs
= buff
;
1993 /* Return a free buffer of size at least MIN_SIZE. */
1995 _cpp_get_buff (pfile
, min_size
)
1999 _cpp_buff
*result
, **p
;
2001 for (p
= &pfile
->free_buffs
;; p
= &(*p
)->next
)
2006 return new_buff (min_size
);
2008 size
= result
->limit
- result
->base
;
2009 /* Return a buffer that's big enough, but don't waste one that's
2011 if (size
>= min_size
&& size
<= BUFF_SIZE_UPPER_BOUND (min_size
))
2016 result
->next
= NULL
;
2017 result
->cur
= result
->base
;
2021 /* Creates a new buffer with enough space to hold the uncommitted
2022 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
2023 the excess bytes to the new buffer. Chains the new buffer after
2024 BUFF, and returns the new buffer. */
2026 _cpp_append_extend_buff (pfile
, buff
, min_extra
)
2031 size_t size
= EXTENDED_BUFF_SIZE (buff
, min_extra
);
2032 _cpp_buff
*new_buff
= _cpp_get_buff (pfile
, size
);
2034 buff
->next
= new_buff
;
2035 memcpy (new_buff
->base
, buff
->cur
, BUFF_ROOM (buff
));
2039 /* Creates a new buffer with enough space to hold the uncommitted
2040 remaining bytes of the buffer pointed to by BUFF, and at least
2041 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
2042 Chains the new buffer before the buffer pointed to by BUFF, and
2043 updates the pointer to point to the new buffer. */
2045 _cpp_extend_buff (pfile
, pbuff
, min_extra
)
2050 _cpp_buff
*new_buff
, *old_buff
= *pbuff
;
2051 size_t size
= EXTENDED_BUFF_SIZE (old_buff
, min_extra
);
2053 new_buff
= _cpp_get_buff (pfile
, size
);
2054 memcpy (new_buff
->base
, old_buff
->cur
, BUFF_ROOM (old_buff
));
2055 new_buff
->next
= old_buff
;
2059 /* Free a chain of buffers starting at BUFF. */
2061 _cpp_free_buff (buff
)
2066 for (; buff
; buff
= next
)
2073 /* Allocate permanent, unaligned storage of length LEN. */
2075 _cpp_unaligned_alloc (pfile
, len
)
2079 _cpp_buff
*buff
= pfile
->u_buff
;
2080 unsigned char *result
= buff
->cur
;
2082 if (len
> (size_t) (buff
->limit
- result
))
2084 buff
= _cpp_get_buff (pfile
, len
);
2085 buff
->next
= pfile
->u_buff
;
2086 pfile
->u_buff
= buff
;
2090 buff
->cur
= result
+ len
;
2094 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2095 That buffer is used for growing allocations when saving macro
2096 replacement lists in a #define, and when parsing an answer to an
2097 assertion in #assert, #unassert or #if (and therefore possibly
2098 whilst expanding macros). It therefore must not be used by any
2099 code that they might call: specifically the lexer and the guts of
2102 All existing other uses clearly fit this restriction: storing
2103 registered pragmas during initialization. */
2105 _cpp_aligned_alloc (pfile
, len
)
2109 _cpp_buff
*buff
= pfile
->a_buff
;
2110 unsigned char *result
= buff
->cur
;
2112 if (len
> (size_t) (buff
->limit
- result
))
2114 buff
= _cpp_get_buff (pfile
, len
);
2115 buff
->next
= pfile
->a_buff
;
2116 pfile
->a_buff
= buff
;
2120 buff
->cur
= result
+ len
;