1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
25 #include "coretypes.h"
30 #ifdef MULTIBYTE_CHARS
35 /* Tokens with SPELL_STRING store their spelling in the token list,
36 and it's length in the token->val.name.len. */
49 enum spell_type category
;
50 const unsigned char *name
;
53 static const unsigned char *const digraph_spellings
[] =
54 { U
"%:", U
"%:%:", U
"<:", U
":>", U
"<%", U
"%>" };
56 #define OP(e, s) { SPELL_OPERATOR, U s },
57 #define TK(e, s) { s, U STRINGX (e) },
58 static const struct token_spelling token_spellings
[N_TTYPES
] = { TTYPE_TABLE
};
62 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
63 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
64 #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
66 static void handle_newline
PARAMS ((cpp_reader
*));
67 static cppchar_t skip_escaped_newlines
PARAMS ((cpp_reader
*));
68 static cppchar_t get_effective_char
PARAMS ((cpp_reader
*));
70 static int skip_block_comment
PARAMS ((cpp_reader
*));
71 static int skip_line_comment
PARAMS ((cpp_reader
*));
72 static void adjust_column
PARAMS ((cpp_reader
*));
73 static int skip_whitespace
PARAMS ((cpp_reader
*, cppchar_t
));
74 static cpp_hashnode
*parse_identifier
PARAMS ((cpp_reader
*));
75 static uchar
*parse_slow
PARAMS ((cpp_reader
*, const uchar
*, int,
77 static void parse_number
PARAMS ((cpp_reader
*, cpp_string
*, int));
78 static int unescaped_terminator_p
PARAMS ((cpp_reader
*, const uchar
*));
79 static void parse_string
PARAMS ((cpp_reader
*, cpp_token
*, cppchar_t
));
80 static bool trigraph_p
PARAMS ((cpp_reader
*));
81 static void save_comment
PARAMS ((cpp_reader
*, cpp_token
*, const uchar
*,
83 static bool continue_after_nul
PARAMS ((cpp_reader
*));
84 static int name_p
PARAMS ((cpp_reader
*, const cpp_string
*));
85 static int maybe_read_ucs
PARAMS ((cpp_reader
*, const unsigned char **,
86 const unsigned char *, cppchar_t
*));
87 static tokenrun
*next_tokenrun
PARAMS ((tokenrun
*));
89 static unsigned int hex_digit_value
PARAMS ((unsigned int));
90 static _cpp_buff
*new_buff
PARAMS ((size_t));
94 Compares, the token TOKEN to the NUL-terminated string STRING.
95 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
97 cpp_ideq (token
, string
)
98 const cpp_token
*token
;
101 if (token
->type
!= CPP_NAME
)
104 return !ustrcmp (NODE_NAME (token
->val
.node
), (const uchar
*) string
);
107 /* Call when meeting a newline, assumed to be in buffer->cur[-1].
108 Returns with buffer->cur pointing to the character immediately
109 following the newline (combination). */
111 handle_newline (pfile
)
114 cpp_buffer
*buffer
= pfile
->buffer
;
116 /* Handle CR-LF and LF-CR. Most other implementations (e.g. java)
117 only accept CR-LF; maybe we should fall back to that behavior? */
118 if (buffer
->cur
[-1] + buffer
->cur
[0] == '\r' + '\n')
121 buffer
->line_base
= buffer
->cur
;
122 buffer
->col_adjust
= 0;
126 /* Subroutine of skip_escaped_newlines; called when a 3-character
127 sequence beginning with "??" is encountered. buffer->cur points to
130 Warn if necessary, and returns true if the sequence forms a
131 trigraph and the trigraph should be honored. */
136 cpp_buffer
*buffer
= pfile
->buffer
;
137 cppchar_t from_char
= buffer
->cur
[1];
140 if (!_cpp_trigraph_map
[from_char
])
143 accept
= CPP_OPTION (pfile
, trigraphs
);
145 /* Don't warn about trigraphs in comments. */
146 if (CPP_OPTION (pfile
, warn_trigraphs
) && !pfile
->state
.lexing_comment
)
149 cpp_error_with_line (pfile
, DL_WARNING
,
150 pfile
->line
, CPP_BUF_COL (buffer
) - 1,
151 "trigraph ??%c converted to %c",
153 (int) _cpp_trigraph_map
[from_char
]);
154 else if (buffer
->cur
!= buffer
->last_Wtrigraphs
)
156 buffer
->last_Wtrigraphs
= buffer
->cur
;
157 cpp_error_with_line (pfile
, DL_WARNING
,
158 pfile
->line
, CPP_BUF_COL (buffer
) - 1,
159 "trigraph ??%c ignored", (int) from_char
);
166 /* Skips any escaped newlines introduced by '?' or a '\\', assumed to
167 lie in buffer->cur[-1]. Returns the next byte, which will be in
168 buffer->cur[-1]. This routine performs preprocessing stages 1 and
169 2 of the ISO C standard. */
171 skip_escaped_newlines (pfile
)
174 cpp_buffer
*buffer
= pfile
->buffer
;
175 cppchar_t next
= buffer
->cur
[-1];
177 /* Only do this if we apply stages 1 and 2. */
178 if (!buffer
->from_stage3
)
180 const unsigned char *saved_cur
;
187 if (buffer
->cur
[0] != '?' || !trigraph_p (pfile
))
190 /* Translate the trigraph. */
191 next
= _cpp_trigraph_map
[buffer
->cur
[1]];
197 if (buffer
->cur
== buffer
->rlimit
)
200 /* We have a backslash, and room for at least one more
201 character. Skip horizontal whitespace. */
202 saved_cur
= buffer
->cur
;
204 next1
= *buffer
->cur
++;
205 while (is_nvspace (next1
) && buffer
->cur
< buffer
->rlimit
);
207 if (!is_vspace (next1
))
209 buffer
->cur
= saved_cur
;
213 if (saved_cur
!= buffer
->cur
- 1
214 && !pfile
->state
.lexing_comment
)
215 cpp_error (pfile
, DL_WARNING
,
216 "backslash and newline separated by space");
218 handle_newline (pfile
);
219 buffer
->backup_to
= buffer
->cur
;
220 if (buffer
->cur
== buffer
->rlimit
)
222 cpp_error (pfile
, DL_PEDWARN
,
223 "backslash-newline at end of file");
227 next
= *buffer
->cur
++;
229 while (next
== '\\' || next
== '?');
235 /* Obtain the next character, after trigraph conversion and skipping
236 an arbitrarily long string of escaped newlines. The common case of
237 no trigraphs or escaped newlines falls through quickly. On return,
238 buffer->backup_to points to where to return to if the character is
239 not to be processed. */
241 get_effective_char (pfile
)
245 cpp_buffer
*buffer
= pfile
->buffer
;
247 buffer
->backup_to
= buffer
->cur
;
248 next
= *buffer
->cur
++;
249 if (__builtin_expect (next
== '?' || next
== '\\', 0))
250 next
= skip_escaped_newlines (pfile
);
255 /* Skip a C-style block comment. We find the end of the comment by
256 seeing if an asterisk is before every '/' we encounter. Returns
257 nonzero if comment terminated by EOF, zero otherwise. */
259 skip_block_comment (pfile
)
262 cpp_buffer
*buffer
= pfile
->buffer
;
263 cppchar_t c
= EOF
, prevc
= EOF
;
265 pfile
->state
.lexing_comment
= 1;
266 while (buffer
->cur
!= buffer
->rlimit
)
268 prevc
= c
, c
= *buffer
->cur
++;
270 /* FIXME: For speed, create a new character class of characters
271 of interest inside block comments. */
272 if (c
== '?' || c
== '\\')
273 c
= skip_escaped_newlines (pfile
);
275 /* People like decorating comments with '*', so check for '/'
276 instead for efficiency. */
282 /* Warn about potential nested comments, but not if the '/'
283 comes immediately before the true comment delimiter.
284 Don't bother to get it right across escaped newlines. */
285 if (CPP_OPTION (pfile
, warn_comments
)
286 && buffer
->cur
[0] == '*' && buffer
->cur
[1] != '/')
287 cpp_error_with_line (pfile
, DL_WARNING
,
288 pfile
->line
, CPP_BUF_COL (buffer
),
289 "\"/*\" within comment");
291 else if (is_vspace (c
))
292 handle_newline (pfile
);
294 adjust_column (pfile
);
297 pfile
->state
.lexing_comment
= 0;
298 return c
!= '/' || prevc
!= '*';
301 /* Skip a C++ line comment, leaving buffer->cur pointing to the
302 terminating newline. Handles escaped newlines. Returns nonzero
303 if a multiline comment. */
305 skip_line_comment (pfile
)
308 cpp_buffer
*buffer
= pfile
->buffer
;
309 unsigned int orig_line
= pfile
->line
;
311 #ifdef MULTIBYTE_CHARS
316 pfile
->state
.lexing_comment
= 1;
317 #ifdef MULTIBYTE_CHARS
318 /* Reset multibyte conversion state. */
319 (void) local_mbtowc (NULL
, NULL
, 0);
323 if (buffer
->cur
== buffer
->rlimit
)
326 #ifdef MULTIBYTE_CHARS
327 char_len
= local_mbtowc (&wc
, (const char *) buffer
->cur
,
328 buffer
->rlimit
- buffer
->cur
);
331 cpp_error (pfile
, DL_WARNING
,
332 "ignoring invalid multibyte character");
338 buffer
->cur
+= char_len
;
344 if (c
== '?' || c
== '\\')
345 c
= skip_escaped_newlines (pfile
);
347 while (!is_vspace (c
));
349 /* Step back over the newline, except at EOF. */
353 pfile
->state
.lexing_comment
= 0;
354 return orig_line
!= pfile
->line
;
357 /* pfile->buffer->cur is one beyond the \t character. Update
358 col_adjust so we track the column correctly. */
360 adjust_column (pfile
)
363 cpp_buffer
*buffer
= pfile
->buffer
;
364 unsigned int col
= CPP_BUF_COL (buffer
) - 1; /* Zero-based column. */
366 /* Round it up to multiple of the tabstop, but subtract 1 since the
367 tab itself occupies a character position. */
368 buffer
->col_adjust
+= (CPP_OPTION (pfile
, tabstop
)
369 - col
% CPP_OPTION (pfile
, tabstop
)) - 1;
372 /* Skips whitespace, saving the next non-whitespace character.
373 Adjusts pfile->col_adjust to account for tabs. Without this,
374 tokens might be assigned an incorrect column. */
376 skip_whitespace (pfile
, c
)
380 cpp_buffer
*buffer
= pfile
->buffer
;
381 unsigned int warned
= 0;
385 /* Horizontal space always OK. */
389 adjust_column (pfile
);
390 /* Just \f \v or \0 left. */
393 if (buffer
->cur
- 1 == buffer
->rlimit
)
397 cpp_error (pfile
, DL_WARNING
, "null character(s) ignored");
401 else if (pfile
->state
.in_directive
&& CPP_PEDANTIC (pfile
))
402 cpp_error_with_line (pfile
, DL_PEDWARN
, pfile
->line
,
403 CPP_BUF_COL (buffer
),
404 "%s in preprocessing directive",
405 c
== '\f' ? "form feed" : "vertical tab");
409 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
410 while (is_nvspace (c
));
416 /* See if the characters of a number token are valid in a name (no
419 name_p (pfile
, string
)
421 const cpp_string
*string
;
425 for (i
= 0; i
< string
->len
; i
++)
426 if (!is_idchar (string
->text
[i
]))
432 /* Parse an identifier, skipping embedded backslash-newlines. This is
433 a critical inner loop. The common case is an identifier which has
434 not been split by backslash-newline, does not contain a dollar
435 sign, and has already been scanned (roughly 10:1 ratio of
436 seen:unseen identifiers in normal code; the distribution is
437 Poisson-like). Second most common case is a new identifier, not
438 split and no dollar sign. The other possibilities are rare and
439 have been relegated to parse_slow. */
440 static cpp_hashnode
*
441 parse_identifier (pfile
)
444 cpp_hashnode
*result
;
445 const uchar
*cur
, *base
;
447 /* Fast-path loop. Skim over a normal identifier.
448 N.B. ISIDNUM does not include $. */
449 cur
= pfile
->buffer
->cur
;
450 while (ISIDNUM (*cur
))
453 /* Check for slow-path cases. */
454 if (*cur
== '?' || *cur
== '\\' || *cur
== '$')
458 base
= parse_slow (pfile
, cur
, 0, &len
);
459 result
= (cpp_hashnode
*)
460 ht_lookup (pfile
->hash_table
, base
, len
, HT_ALLOCED
);
464 base
= pfile
->buffer
->cur
- 1;
465 pfile
->buffer
->cur
= cur
;
466 result
= (cpp_hashnode
*)
467 ht_lookup (pfile
->hash_table
, base
, cur
- base
, HT_ALLOC
);
470 /* Rarely, identifiers require diagnostics when lexed.
471 XXX Has to be forced out of the fast path. */
472 if (__builtin_expect ((result
->flags
& NODE_DIAGNOSTIC
)
473 && !pfile
->state
.skipping
, 0))
475 /* It is allowed to poison the same identifier twice. */
476 if ((result
->flags
& NODE_POISONED
) && !pfile
->state
.poisoned_ok
)
477 cpp_error (pfile
, DL_ERROR
, "attempt to use poisoned \"%s\"",
480 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
481 replacement list of a variadic macro. */
482 if (result
== pfile
->spec_nodes
.n__VA_ARGS__
483 && !pfile
->state
.va_args_ok
)
484 cpp_error (pfile
, DL_PEDWARN
,
485 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
491 /* Slow path. This handles numbers and identifiers which have been
492 split, or contain dollar signs. The part of the token from
493 PFILE->buffer->cur-1 to CUR has already been scanned. NUMBER_P is
494 1 if it's a number, and 2 if it has a leading period. Returns a
495 pointer to the token's NUL-terminated spelling in permanent
496 storage, and sets PLEN to its length. */
498 parse_slow (pfile
, cur
, number_p
, plen
)
504 cpp_buffer
*buffer
= pfile
->buffer
;
505 const uchar
*base
= buffer
->cur
- 1;
506 struct obstack
*stack
= &pfile
->hash_table
->stack
;
507 unsigned int c
, prevc
, saw_dollar
= 0;
509 /* Place any leading period. */
511 obstack_1grow (stack
, '.');
513 /* Copy the part of the token which is known to be okay. */
514 obstack_grow (stack
, base
, cur
- base
);
516 /* Now process the part which isn't. We are looking at one of
517 '$', '\\', or '?' on entry to this loop. */
523 /* Potential escaped newline? */
524 buffer
->backup_to
= buffer
->cur
- 1;
525 if (c
== '?' || c
== '\\')
526 c
= skip_escaped_newlines (pfile
);
532 if (c
!= '.' && !VALID_SIGN (c
, prevc
))
536 /* Handle normal identifier characters in this loop. */
540 obstack_1grow (stack
, c
);
547 while (is_idchar (c
));
550 /* Step back over the unwanted char. */
553 /* $ is not an identifier character in the standard, but is commonly
554 accepted as an extension. Don't warn about it in skipped
555 conditional blocks. */
556 if (saw_dollar
&& CPP_PEDANTIC (pfile
) && ! pfile
->state
.skipping
)
557 cpp_error (pfile
, DL_PEDWARN
, "'$' character(s) in identifier or number");
559 /* Identifiers and numbers are null-terminated. */
560 *plen
= obstack_object_size (stack
);
561 obstack_1grow (stack
, '\0');
562 return obstack_finish (stack
);
565 /* Parse a number, beginning with character C, skipping embedded
566 backslash-newlines. LEADING_PERIOD is nonzero if there was a "."
567 before C. Place the result in NUMBER. */
569 parse_number (pfile
, number
, leading_period
)
576 /* Fast-path loop. Skim over a normal number.
577 N.B. ISIDNUM does not include $. */
578 cur
= pfile
->buffer
->cur
;
579 while (ISIDNUM (*cur
) || *cur
== '.' || VALID_SIGN (*cur
, cur
[-1]))
582 /* Check for slow-path cases. */
583 if (*cur
== '?' || *cur
== '\\' || *cur
== '$')
584 number
->text
= parse_slow (pfile
, cur
, 1 + leading_period
, &number
->len
);
587 const uchar
*base
= pfile
->buffer
->cur
- 1;
590 number
->len
= cur
- base
+ leading_period
;
591 dest
= _cpp_unaligned_alloc (pfile
, number
->len
+ 1);
592 dest
[number
->len
] = '\0';
597 memcpy (dest
, base
, cur
- base
);
598 pfile
->buffer
->cur
= cur
;
602 /* Subroutine of parse_string. */
604 unescaped_terminator_p (pfile
, dest
)
606 const unsigned char *dest
;
608 const unsigned char *start
, *temp
;
610 /* In #include-style directives, terminators are not escapeable. */
611 if (pfile
->state
.angled_headers
)
614 start
= BUFF_FRONT (pfile
->u_buff
);
616 /* An odd number of consecutive backslashes represents an escaped
618 for (temp
= dest
; temp
> start
&& temp
[-1] == '\\'; temp
--)
621 return ((dest
- temp
) & 1) == 0;
624 /* Parses a string, character constant, or angle-bracketed header file
625 name. Handles embedded trigraphs and escaped newlines. The stored
626 string is guaranteed NUL-terminated, but it is not guaranteed that
627 this is the first NUL since embedded NULs are preserved.
629 When this function returns, buffer->cur points to the next
630 character to be processed. */
632 parse_string (pfile
, token
, terminator
)
635 cppchar_t terminator
;
637 cpp_buffer
*buffer
= pfile
->buffer
;
638 unsigned char *dest
, *limit
;
640 bool warned_nulls
= false;
641 #ifdef MULTIBYTE_CHARS
646 dest
= BUFF_FRONT (pfile
->u_buff
);
647 limit
= BUFF_LIMIT (pfile
->u_buff
);
649 #ifdef MULTIBYTE_CHARS
650 /* Reset multibyte conversion state. */
651 (void) local_mbtowc (NULL
, NULL
, 0);
655 /* We need room for another char, possibly the terminating NUL. */
656 if ((size_t) (limit
- dest
) < 1)
658 size_t len_so_far
= dest
- BUFF_FRONT (pfile
->u_buff
);
659 _cpp_extend_buff (pfile
, &pfile
->u_buff
, 2);
660 dest
= BUFF_FRONT (pfile
->u_buff
) + len_so_far
;
661 limit
= BUFF_LIMIT (pfile
->u_buff
);
664 #ifdef MULTIBYTE_CHARS
665 char_len
= local_mbtowc (&wc
, (const char *) buffer
->cur
,
666 buffer
->rlimit
- buffer
->cur
);
669 cpp_error (pfile
, DL_WARNING
,
670 "ignoring invalid multibyte character");
676 buffer
->cur
+= char_len
;
683 /* Handle trigraphs, escaped newlines etc. */
684 if (c
== '?' || c
== '\\')
685 c
= skip_escaped_newlines (pfile
);
689 if (unescaped_terminator_p (pfile
, dest
))
692 else if (is_vspace (c
))
694 /* No string literal may extend over multiple lines. In
695 assembly language, suppress the error except for <>
696 includes. This is a kludge around not knowing where
699 if (CPP_OPTION (pfile
, lang
) != CLK_ASM
|| terminator
== '>')
700 cpp_error (pfile
, DL_ERROR
, "missing terminating %c character",
707 if (buffer
->cur
- 1 == buffer
->rlimit
)
712 cpp_error (pfile
, DL_WARNING
,
713 "null character(s) preserved in literal");
716 #ifdef MULTIBYTE_CHARS
719 for ( ; char_len
> 0; --char_len
)
720 *dest
++ = (*buffer
->cur
- char_len
);
729 token
->val
.str
.text
= BUFF_FRONT (pfile
->u_buff
);
730 token
->val
.str
.len
= dest
- BUFF_FRONT (pfile
->u_buff
);
731 BUFF_FRONT (pfile
->u_buff
) = dest
+ 1;
734 /* The stored comment includes the comment start and any terminator. */
736 save_comment (pfile
, token
, from
, type
)
739 const unsigned char *from
;
742 unsigned char *buffer
;
743 unsigned int len
, clen
;
745 len
= pfile
->buffer
->cur
- from
+ 1; /* + 1 for the initial '/'. */
747 /* C++ comments probably (not definitely) have moved past a new
748 line, which we don't want to save in the comment. */
749 if (is_vspace (pfile
->buffer
->cur
[-1]))
752 /* If we are currently in a directive, then we need to store all
753 C++ comments as C comments internally, and so we need to
754 allocate a little extra space in that case.
756 Note that the only time we encounter a directive here is
757 when we are saving comments in a "#define". */
758 clen
= (pfile
->state
.in_directive
&& type
== '/') ? len
+ 2 : len
;
760 buffer
= _cpp_unaligned_alloc (pfile
, clen
);
762 token
->type
= CPP_COMMENT
;
763 token
->val
.str
.len
= clen
;
764 token
->val
.str
.text
= buffer
;
767 memcpy (buffer
+ 1, from
, len
- 1);
769 /* Finish conversion to a C comment, if necessary. */
770 if (pfile
->state
.in_directive
&& type
== '/')
773 buffer
[clen
- 2] = '*';
774 buffer
[clen
- 1] = '/';
778 /* Allocate COUNT tokens for RUN. */
780 _cpp_init_tokenrun (run
, count
)
784 run
->base
= xnewvec (cpp_token
, count
);
785 run
->limit
= run
->base
+ count
;
789 /* Returns the next tokenrun, or creates one if there is none. */
794 if (run
->next
== NULL
)
796 run
->next
= xnew (tokenrun
);
797 run
->next
->prev
= run
;
798 _cpp_init_tokenrun (run
->next
, 250);
804 /* Allocate a single token that is invalidated at the same time as the
805 rest of the tokens on the line. Has its line and col set to the
806 same as the last lexed token, so that diagnostics appear in the
809 _cpp_temp_token (pfile
)
812 cpp_token
*old
, *result
;
814 old
= pfile
->cur_token
- 1;
815 if (pfile
->cur_token
== pfile
->cur_run
->limit
)
817 pfile
->cur_run
= next_tokenrun (pfile
->cur_run
);
818 pfile
->cur_token
= pfile
->cur_run
->base
;
821 result
= pfile
->cur_token
++;
822 result
->line
= old
->line
;
823 result
->col
= old
->col
;
827 /* Lex a token into RESULT (external interface). Takes care of issues
828 like directive handling, token lookahead, multiple include
829 optimization and skipping. */
831 _cpp_lex_token (pfile
)
838 if (pfile
->cur_token
== pfile
->cur_run
->limit
)
840 pfile
->cur_run
= next_tokenrun (pfile
->cur_run
);
841 pfile
->cur_token
= pfile
->cur_run
->base
;
844 if (pfile
->lookaheads
)
847 result
= pfile
->cur_token
++;
850 result
= _cpp_lex_direct (pfile
);
852 if (result
->flags
& BOL
)
854 /* Is this a directive. If _cpp_handle_directive returns
855 false, it is an assembler #. */
856 if (result
->type
== CPP_HASH
857 /* 6.10.3 p 11: Directives in a list of macro arguments
858 gives undefined behavior. This implementation
859 handles the directive as normal. */
860 && pfile
->state
.parsing_args
!= 1
861 && _cpp_handle_directive (pfile
, result
->flags
& PREV_WHITE
))
863 if (pfile
->cb
.line_change
&& !pfile
->state
.skipping
)
864 (*pfile
->cb
.line_change
)(pfile
, result
, pfile
->state
.parsing_args
);
867 /* We don't skip tokens in directives. */
868 if (pfile
->state
.in_directive
)
871 /* Outside a directive, invalidate controlling macros. At file
872 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
873 get here and MI optimisation works. */
874 pfile
->mi_valid
= false;
876 if (!pfile
->state
.skipping
|| result
->type
== CPP_EOF
)
883 /* A NUL terminates the current buffer. For ISO preprocessing this is
884 EOF, but for traditional preprocessing it indicates we need a line
885 refill. Returns TRUE to continue preprocessing a new buffer, FALSE
886 to return a CPP_EOF to the caller. */
888 continue_after_nul (pfile
)
891 cpp_buffer
*buffer
= pfile
->buffer
;
894 buffer
->saved_flags
= BOL
;
895 if (CPP_OPTION (pfile
, traditional
))
897 if (pfile
->state
.in_directive
)
900 _cpp_remove_overlay (pfile
);
901 more
= _cpp_read_logical_line_trad (pfile
);
902 _cpp_overlay_buffer (pfile
, pfile
->out
.base
,
903 pfile
->out
.cur
- pfile
->out
.base
);
904 pfile
->line
= pfile
->out
.first_line
;
908 /* Stop parsing arguments with a CPP_EOF. When we finally come
909 back here, do the work of popping the buffer. */
910 if (!pfile
->state
.parsing_args
)
912 if (buffer
->cur
!= buffer
->line_base
)
914 /* Non-empty files should end in a newline. Don't warn
915 for command line and _Pragma buffers. */
916 if (!buffer
->from_stage3
)
917 cpp_error (pfile
, DL_PEDWARN
, "no newline at end of file");
918 handle_newline (pfile
);
921 /* Similarly, finish an in-progress directive with CPP_EOF
922 before popping the buffer. */
923 if (!pfile
->state
.in_directive
&& buffer
->prev
)
925 more
= !buffer
->return_at_eof
;
926 _cpp_pop_buffer (pfile
);
934 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
936 if (get_effective_char (pfile) == CHAR) \
937 result->type = THEN_TYPE; \
941 result->type = ELSE_TYPE; \
945 /* Lex a token into pfile->cur_token, which is also incremented, to
946 get diagnostics pointing to the correct location.
948 Does not handle issues such as token lookahead, multiple-include
949 optimisation, directives, skipping etc. This function is only
950 suitable for use by _cpp_lex_token, and in special cases like
951 lex_expansion_token which doesn't care for any of these issues.
953 When meeting a newline, returns CPP_EOF if parsing a directive,
954 otherwise returns to the start of the token buffer if permissible.
955 Returns the location of the lexed token. */
957 _cpp_lex_direct (pfile
)
962 const unsigned char *comment_start
;
963 cpp_token
*result
= pfile
->cur_token
++;
966 buffer
= pfile
->buffer
;
967 result
->flags
= buffer
->saved_flags
;
968 buffer
->saved_flags
= 0;
970 result
->line
= pfile
->line
;
974 result
->col
= CPP_BUF_COLUMN (buffer
, buffer
->cur
);
979 case ' ': case '\t': case '\f': case '\v': case '\0':
980 result
->flags
|= PREV_WHITE
;
981 if (skip_whitespace (pfile
, c
))
986 if (continue_after_nul (pfile
))
988 result
->type
= CPP_EOF
;
991 case '\n': case '\r':
992 handle_newline (pfile
);
993 buffer
->saved_flags
= BOL
;
994 if (! pfile
->state
.in_directive
)
996 if (pfile
->state
.parsing_args
== 2)
997 buffer
->saved_flags
|= PREV_WHITE
;
998 if (!pfile
->keep_tokens
)
1000 pfile
->cur_run
= &pfile
->base_run
;
1001 result
= pfile
->base_run
.base
;
1002 pfile
->cur_token
= result
+ 1;
1006 result
->type
= CPP_EOF
;
1011 /* These could start an escaped newline, or '?' a trigraph. Let
1012 skip_escaped_newlines do all the work. */
1014 unsigned int line
= pfile
->line
;
1016 c
= skip_escaped_newlines (pfile
);
1017 if (line
!= pfile
->line
)
1020 /* We had at least one escaped newline of some sort.
1021 Update the token's line and column. */
1022 goto update_tokens_line
;
1026 /* We are either the original '?' or '\\', or a trigraph. */
1028 result
->type
= CPP_QUERY
;
1035 case '0': case '1': case '2': case '3': case '4':
1036 case '5': case '6': case '7': case '8': case '9':
1037 result
->type
= CPP_NUMBER
;
1038 parse_number (pfile
, &result
->val
.str
, 0);
1042 /* 'L' may introduce wide characters or strings. */
1044 const unsigned char *pos
= buffer
->cur
;
1046 c
= get_effective_char (pfile
);
1047 if (c
== '\'' || c
== '"')
1049 result
->type
= (c
== '"' ? CPP_WSTRING
: CPP_WCHAR
);
1050 parse_string (pfile
, result
, c
);
1059 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1060 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1061 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1062 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1064 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1065 case 'G': case 'H': case 'I': case 'J': case 'K':
1066 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1067 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1069 result
->type
= CPP_NAME
;
1070 result
->val
.node
= parse_identifier (pfile
);
1072 /* Convert named operators to their proper types. */
1073 if (result
->val
.node
->flags
& NODE_OPERATOR
)
1075 result
->flags
|= NAMED_OP
;
1076 result
->type
= result
->val
.node
->directive_index
;
1082 result
->type
= c
== '"' ? CPP_STRING
: CPP_CHAR
;
1083 parse_string (pfile
, result
, c
);
1087 /* A potential block or line comment. */
1088 comment_start
= buffer
->cur
;
1089 c
= get_effective_char (pfile
);
1093 if (skip_block_comment (pfile
))
1094 cpp_error (pfile
, DL_ERROR
, "unterminated comment");
1096 else if (c
== '/' && (CPP_OPTION (pfile
, cplusplus_comments
)
1097 || CPP_IN_SYSTEM_HEADER (pfile
)))
1099 /* Warn about comments only if pedantically GNUC89, and not
1100 in system headers. */
1101 if (CPP_OPTION (pfile
, lang
) == CLK_GNUC89
&& CPP_PEDANTIC (pfile
)
1102 && ! buffer
->warned_cplusplus_comments
)
1104 cpp_error (pfile
, DL_PEDWARN
,
1105 "C++ style comments are not allowed in ISO C90");
1106 cpp_error (pfile
, DL_PEDWARN
,
1107 "(this will be reported only once per input file)");
1108 buffer
->warned_cplusplus_comments
= 1;
1111 if (skip_line_comment (pfile
) && CPP_OPTION (pfile
, warn_comments
))
1112 cpp_error (pfile
, DL_WARNING
, "multi-line comment");
1116 result
->type
= CPP_DIV_EQ
;
1122 result
->type
= CPP_DIV
;
1126 if (!pfile
->state
.save_comments
)
1128 result
->flags
|= PREV_WHITE
;
1129 goto update_tokens_line
;
1132 /* Save the comment as a token in its own right. */
1133 save_comment (pfile
, result
, comment_start
, c
);
1137 if (pfile
->state
.angled_headers
)
1139 result
->type
= CPP_HEADER_NAME
;
1140 parse_string (pfile
, result
, '>');
1144 c
= get_effective_char (pfile
);
1146 result
->type
= CPP_LESS_EQ
;
1148 IF_NEXT_IS ('=', CPP_LSHIFT_EQ
, CPP_LSHIFT
);
1149 else if (c
== '?' && CPP_OPTION (pfile
, cplusplus
))
1150 IF_NEXT_IS ('=', CPP_MIN_EQ
, CPP_MIN
);
1151 else if (c
== ':' && CPP_OPTION (pfile
, digraphs
))
1153 result
->type
= CPP_OPEN_SQUARE
;
1154 result
->flags
|= DIGRAPH
;
1156 else if (c
== '%' && CPP_OPTION (pfile
, digraphs
))
1158 result
->type
= CPP_OPEN_BRACE
;
1159 result
->flags
|= DIGRAPH
;
1164 result
->type
= CPP_LESS
;
1169 c
= get_effective_char (pfile
);
1171 result
->type
= CPP_GREATER_EQ
;
1173 IF_NEXT_IS ('=', CPP_RSHIFT_EQ
, CPP_RSHIFT
);
1174 else if (c
== '?' && CPP_OPTION (pfile
, cplusplus
))
1175 IF_NEXT_IS ('=', CPP_MAX_EQ
, CPP_MAX
);
1179 result
->type
= CPP_GREATER
;
1184 c
= get_effective_char (pfile
);
1186 result
->type
= CPP_MOD_EQ
;
1187 else if (CPP_OPTION (pfile
, digraphs
) && c
== ':')
1189 result
->flags
|= DIGRAPH
;
1190 result
->type
= CPP_HASH
;
1191 if (get_effective_char (pfile
) == '%')
1193 const unsigned char *pos
= buffer
->cur
;
1195 if (get_effective_char (pfile
) == ':')
1196 result
->type
= CPP_PASTE
;
1198 buffer
->cur
= pos
- 1;
1203 else if (CPP_OPTION (pfile
, digraphs
) && c
== '>')
1205 result
->flags
|= DIGRAPH
;
1206 result
->type
= CPP_CLOSE_BRACE
;
1211 result
->type
= CPP_MOD
;
1216 result
->type
= CPP_DOT
;
1217 c
= get_effective_char (pfile
);
1220 const unsigned char *pos
= buffer
->cur
;
1222 if (get_effective_char (pfile
) == '.')
1223 result
->type
= CPP_ELLIPSIS
;
1225 buffer
->cur
= pos
- 1;
1227 /* All known character sets have 0...9 contiguous. */
1228 else if (ISDIGIT (c
))
1230 result
->type
= CPP_NUMBER
;
1231 parse_number (pfile
, &result
->val
.str
, 1);
1233 else if (c
== '*' && CPP_OPTION (pfile
, cplusplus
))
1234 result
->type
= CPP_DOT_STAR
;
1240 c
= get_effective_char (pfile
);
1242 result
->type
= CPP_PLUS_PLUS
;
1244 result
->type
= CPP_PLUS_EQ
;
1248 result
->type
= CPP_PLUS
;
1253 c
= get_effective_char (pfile
);
1256 result
->type
= CPP_DEREF
;
1257 if (CPP_OPTION (pfile
, cplusplus
))
1259 if (get_effective_char (pfile
) == '*')
1260 result
->type
= CPP_DEREF_STAR
;
1266 result
->type
= CPP_MINUS_MINUS
;
1268 result
->type
= CPP_MINUS_EQ
;
1272 result
->type
= CPP_MINUS
;
1277 c
= get_effective_char (pfile
);
1279 result
->type
= CPP_AND_AND
;
1281 result
->type
= CPP_AND_EQ
;
1285 result
->type
= CPP_AND
;
1290 c
= get_effective_char (pfile
);
1292 result
->type
= CPP_OR_OR
;
1294 result
->type
= CPP_OR_EQ
;
1298 result
->type
= CPP_OR
;
1303 c
= get_effective_char (pfile
);
1304 if (c
== ':' && CPP_OPTION (pfile
, cplusplus
))
1305 result
->type
= CPP_SCOPE
;
1306 else if (c
== '>' && CPP_OPTION (pfile
, digraphs
))
1308 result
->flags
|= DIGRAPH
;
1309 result
->type
= CPP_CLOSE_SQUARE
;
1314 result
->type
= CPP_COLON
;
1318 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ
, CPP_MULT
); break;
1319 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ
, CPP_EQ
); break;
1320 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ
, CPP_NOT
); break;
1321 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ
, CPP_XOR
); break;
1322 case '#': IF_NEXT_IS ('#', CPP_PASTE
, CPP_HASH
); break;
1324 case '~': result
->type
= CPP_COMPL
; break;
1325 case ',': result
->type
= CPP_COMMA
; break;
1326 case '(': result
->type
= CPP_OPEN_PAREN
; break;
1327 case ')': result
->type
= CPP_CLOSE_PAREN
; break;
1328 case '[': result
->type
= CPP_OPEN_SQUARE
; break;
1329 case ']': result
->type
= CPP_CLOSE_SQUARE
; break;
1330 case '{': result
->type
= CPP_OPEN_BRACE
; break;
1331 case '}': result
->type
= CPP_CLOSE_BRACE
; break;
1332 case ';': result
->type
= CPP_SEMICOLON
; break;
1334 /* @ is a punctuator in Objective-C. */
1335 case '@': result
->type
= CPP_ATSIGN
; break;
1338 if (CPP_OPTION (pfile
, dollars_in_ident
))
1340 /* Fall through... */
1344 result
->type
= CPP_OTHER
;
1352 /* An upper bound on the number of bytes needed to spell TOKEN,
1353 including preceding whitespace. */
1355 cpp_token_len (token
)
1356 const cpp_token
*token
;
1360 switch (TOKEN_SPELL (token
))
1362 default: len
= 0; break;
1364 case SPELL_STRING
: len
= token
->val
.str
.len
; break;
1365 case SPELL_IDENT
: len
= NODE_LEN (token
->val
.node
); break;
1367 /* 1 for whitespace, 4 for comment delimiters. */
1371 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1372 already contain the enough space to hold the token's spelling.
1373 Returns a pointer to the character after the last character
1376 cpp_spell_token (pfile
, token
, buffer
)
1377 cpp_reader
*pfile
; /* Would be nice to be rid of this... */
1378 const cpp_token
*token
;
1379 unsigned char *buffer
;
1381 switch (TOKEN_SPELL (token
))
1383 case SPELL_OPERATOR
:
1385 const unsigned char *spelling
;
1388 if (token
->flags
& DIGRAPH
)
1390 = digraph_spellings
[(int) token
->type
- (int) CPP_FIRST_DIGRAPH
];
1391 else if (token
->flags
& NAMED_OP
)
1394 spelling
= TOKEN_NAME (token
);
1396 while ((c
= *spelling
++) != '\0')
1402 *buffer
++ = token
->val
.c
;
1407 memcpy (buffer
, NODE_NAME (token
->val
.node
), NODE_LEN (token
->val
.node
));
1408 buffer
+= NODE_LEN (token
->val
.node
);
1412 memcpy (buffer
, token
->val
.str
.text
, token
->val
.str
.len
);
1413 buffer
+= token
->val
.str
.len
;
1418 int left
, right
, tag
;
1419 switch (token
->type
)
1421 case CPP_STRING
: left
= '"'; right
= '"'; tag
= '\0'; break;
1422 case CPP_WSTRING
: left
= '"'; right
= '"'; tag
= 'L'; break;
1423 case CPP_CHAR
: left
= '\''; right
= '\''; tag
= '\0'; break;
1424 case CPP_WCHAR
: left
= '\''; right
= '\''; tag
= 'L'; break;
1425 case CPP_HEADER_NAME
: left
= '<'; right
= '>'; tag
= '\0'; break;
1427 cpp_error (pfile
, DL_ICE
, "unknown string token %s\n",
1428 TOKEN_NAME (token
));
1431 if (tag
) *buffer
++ = tag
;
1433 memcpy (buffer
, token
->val
.str
.text
, token
->val
.str
.len
);
1434 buffer
+= token
->val
.str
.len
;
1440 cpp_error (pfile
, DL_ICE
, "unspellable token %s", TOKEN_NAME (token
));
1447 /* Returns TOKEN spelt as a null-terminated string. The string is
1448 freed when the reader is destroyed. Useful for diagnostics. */
1450 cpp_token_as_text (pfile
, token
)
1452 const cpp_token
*token
;
1454 unsigned int len
= cpp_token_len (token
);
1455 unsigned char *start
= _cpp_unaligned_alloc (pfile
, len
), *end
;
1457 end
= cpp_spell_token (pfile
, token
, start
);
1463 /* Used by C front ends, which really should move to using
1464 cpp_token_as_text. */
1466 cpp_type2name (type
)
1467 enum cpp_ttype type
;
1469 return (const char *) token_spellings
[type
].name
;
1472 /* Writes the spelling of token to FP, without any preceding space.
1473 Separated from cpp_spell_token for efficiency - to avoid stdio
1474 double-buffering. */
1476 cpp_output_token (token
, fp
)
1477 const cpp_token
*token
;
1480 switch (TOKEN_SPELL (token
))
1482 case SPELL_OPERATOR
:
1484 const unsigned char *spelling
;
1487 if (token
->flags
& DIGRAPH
)
1489 = digraph_spellings
[(int) token
->type
- (int) CPP_FIRST_DIGRAPH
];
1490 else if (token
->flags
& NAMED_OP
)
1493 spelling
= TOKEN_NAME (token
);
1498 while ((c
= *++spelling
) != '\0');
1503 putc (token
->val
.c
, fp
);
1508 fwrite (NODE_NAME (token
->val
.node
), 1, NODE_LEN (token
->val
.node
), fp
);
1512 fwrite (token
->val
.str
.text
, 1, token
->val
.str
.len
, fp
);
1517 int left
, right
, tag
;
1518 switch (token
->type
)
1520 case CPP_STRING
: left
= '"'; right
= '"'; tag
= '\0'; break;
1521 case CPP_WSTRING
: left
= '"'; right
= '"'; tag
= 'L'; break;
1522 case CPP_CHAR
: left
= '\''; right
= '\''; tag
= '\0'; break;
1523 case CPP_WCHAR
: left
= '\''; right
= '\''; tag
= 'L'; break;
1524 case CPP_HEADER_NAME
: left
= '<'; right
= '>'; tag
= '\0'; break;
1526 fprintf (stderr
, "impossible STRING token %s\n", TOKEN_NAME (token
));
1529 if (tag
) putc (tag
, fp
);
1531 fwrite (token
->val
.str
.text
, 1, token
->val
.str
.len
, fp
);
1537 /* An error, most probably. */
1542 /* Compare two tokens. */
1544 _cpp_equiv_tokens (a
, b
)
1545 const cpp_token
*a
, *b
;
1547 if (a
->type
== b
->type
&& a
->flags
== b
->flags
)
1548 switch (TOKEN_SPELL (a
))
1550 default: /* Keep compiler happy. */
1551 case SPELL_OPERATOR
:
1554 return a
->val
.c
== b
->val
.c
; /* Character. */
1556 return (a
->type
!= CPP_MACRO_ARG
|| a
->val
.arg_no
== b
->val
.arg_no
);
1558 return a
->val
.node
== b
->val
.node
;
1561 return (a
->val
.str
.len
== b
->val
.str
.len
1562 && !memcmp (a
->val
.str
.text
, b
->val
.str
.text
,
1569 /* Returns nonzero if a space should be inserted to avoid an
1570 accidental token paste for output. For simplicity, it is
1571 conservative, and occasionally advises a space where one is not
1572 needed, e.g. "." and ".2". */
1574 cpp_avoid_paste (pfile
, token1
, token2
)
1576 const cpp_token
*token1
, *token2
;
1578 enum cpp_ttype a
= token1
->type
, b
= token2
->type
;
1581 if (token1
->flags
& NAMED_OP
)
1583 if (token2
->flags
& NAMED_OP
)
1587 if (token2
->flags
& DIGRAPH
)
1588 c
= digraph_spellings
[(int) b
- (int) CPP_FIRST_DIGRAPH
][0];
1589 else if (token_spellings
[b
].category
== SPELL_OPERATOR
)
1590 c
= token_spellings
[b
].name
[0];
1592 /* Quickly get everything that can paste with an '='. */
1593 if ((int) a
<= (int) CPP_LAST_EQ
&& c
== '=')
1598 case CPP_GREATER
: return c
== '>' || c
== '?';
1599 case CPP_LESS
: return c
== '<' || c
== '?' || c
== '%' || c
== ':';
1600 case CPP_PLUS
: return c
== '+';
1601 case CPP_MINUS
: return c
== '-' || c
== '>';
1602 case CPP_DIV
: return c
== '/' || c
== '*'; /* Comments. */
1603 case CPP_MOD
: return c
== ':' || c
== '>';
1604 case CPP_AND
: return c
== '&';
1605 case CPP_OR
: return c
== '|';
1606 case CPP_COLON
: return c
== ':' || c
== '>';
1607 case CPP_DEREF
: return c
== '*';
1608 case CPP_DOT
: return c
== '.' || c
== '%' || b
== CPP_NUMBER
;
1609 case CPP_HASH
: return c
== '#' || c
== '%'; /* Digraph form. */
1610 case CPP_NAME
: return ((b
== CPP_NUMBER
1611 && name_p (pfile
, &token2
->val
.str
))
1613 || b
== CPP_CHAR
|| b
== CPP_STRING
); /* L */
1614 case CPP_NUMBER
: return (b
== CPP_NUMBER
|| b
== CPP_NAME
1615 || c
== '.' || c
== '+' || c
== '-');
1616 case CPP_OTHER
: return (CPP_OPTION (pfile
, objc
)
1617 && token1
->val
.c
== '@'
1618 && (b
== CPP_NAME
|| b
== CPP_STRING
));
1625 /* Output all the remaining tokens on the current line, and a newline
1626 character, to FP. Leading whitespace is removed. If there are
1627 macros, special token padding is not performed. */
1629 cpp_output_line (pfile
, fp
)
1633 const cpp_token
*token
;
1635 token
= cpp_get_token (pfile
);
1636 while (token
->type
!= CPP_EOF
)
1638 cpp_output_token (token
, fp
);
1639 token
= cpp_get_token (pfile
);
1640 if (token
->flags
& PREV_WHITE
)
1647 /* Returns the value of a hexadecimal digit. */
1653 return hex_value (c
);
1658 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1659 failure if cpplib is not parsing C++ or C99. Such failure is
1660 silent, and no variables are updated. Otherwise returns 0, and
1661 warns if -Wtraditional.
1663 [lex.charset]: The character designated by the universal character
1664 name \UNNNNNNNN is that character whose character short name in
1665 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1666 universal character name \uNNNN is that character whose character
1667 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1668 for a universal character name is less than 0x20 or in the range
1669 0x7F-0x9F (inclusive), or if the universal character name
1670 designates a character in the basic source character set, then the
1671 program is ill-formed.
1673 We assume that wchar_t is Unicode, so we don't need to do any
1674 mapping. Is this ever wrong?
1676 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1677 LIMIT is the end of the string or charconst. PSTR is updated to
1678 point after the UCS on return, and the UCS is written into PC. */
1681 maybe_read_ucs (pfile
, pstr
, limit
, pc
)
1683 const unsigned char **pstr
;
1684 const unsigned char *limit
;
1687 const unsigned char *p
= *pstr
;
1688 unsigned int code
= 0;
1689 unsigned int c
= *pc
, length
;
1691 /* Only attempt to interpret a UCS for C++ and C99. */
1692 if (! (CPP_OPTION (pfile
, cplusplus
) || CPP_OPTION (pfile
, c99
)))
1695 if (CPP_WTRADITIONAL (pfile
))
1696 cpp_error (pfile
, DL_WARNING
,
1697 "the meaning of '\\%c' is different in traditional C", c
);
1699 length
= (c
== 'u' ? 4: 8);
1701 if ((size_t) (limit
- p
) < length
)
1703 cpp_error (pfile
, DL_ERROR
, "incomplete universal-character-name");
1704 /* Skip to the end to avoid more diagnostics. */
1709 for (; length
; length
--, p
++)
1713 code
= (code
<< 4) + hex_digit_value (c
);
1716 cpp_error (pfile
, DL_ERROR
,
1717 "non-hex digit '%c' in universal-character-name", c
);
1718 /* We shouldn't skip in case there are multibyte chars. */
1724 #ifdef TARGET_EBCDIC
1725 cpp_error (pfile
, DL_ERROR
, "universal-character-name on EBCDIC target");
1726 code
= 0x3f; /* EBCDIC invalid character */
1728 /* True extended characters are OK. */
1730 && !(code
& 0x80000000)
1731 && !(code
>= 0xD800 && code
<= 0xDFFF))
1733 /* The standard permits $, @ and ` to be specified as UCNs. We use
1734 hex escapes so that this also works with EBCDIC hosts. */
1735 else if (code
== 0x24 || code
== 0x40 || code
== 0x60)
1737 /* Don't give another error if one occurred above. */
1738 else if (length
== 0)
1739 cpp_error (pfile
, DL_ERROR
, "universal-character-name out of range");
1747 /* Returns the value of an escape sequence, truncated to the correct
1748 target precision. PSTR points to the input pointer, which is just
1749 after the backslash. LIMIT is how much text we have. WIDE is true
1750 if the escape sequence is part of a wide character constant or
1751 string literal. Handles all relevant diagnostics. */
1753 cpp_parse_escape (pfile
, pstr
, limit
, wide
)
1755 const unsigned char **pstr
;
1756 const unsigned char *limit
;
1760 const unsigned char *str
= *pstr
;
1765 width
= CPP_OPTION (pfile
, wchar_precision
);
1767 width
= CPP_OPTION (pfile
, char_precision
);
1768 if (width
< BITS_PER_CPPCHAR_T
)
1769 mask
= ((cppchar_t
) 1 << width
) - 1;
1776 case '\\': case '\'': case '"': case '?': break;
1777 case 'b': c
= TARGET_BS
; break;
1778 case 'f': c
= TARGET_FF
; break;
1779 case 'n': c
= TARGET_NEWLINE
; break;
1780 case 'r': c
= TARGET_CR
; break;
1781 case 't': c
= TARGET_TAB
; break;
1782 case 'v': c
= TARGET_VT
; break;
1784 case '(': case '{': case '[': case '%':
1785 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1786 '\%' is used to prevent SCCS from getting confused. */
1787 unknown
= CPP_PEDANTIC (pfile
);
1791 if (CPP_WTRADITIONAL (pfile
))
1792 cpp_error (pfile
, DL_WARNING
,
1793 "the meaning of '\\a' is different in traditional C");
1798 if (CPP_PEDANTIC (pfile
))
1799 cpp_error (pfile
, DL_PEDWARN
,
1800 "non-ISO-standard escape sequence, '\\%c'", (int) c
);
1805 unknown
= maybe_read_ucs (pfile
, &str
, limit
, &c
);
1809 if (CPP_WTRADITIONAL (pfile
))
1810 cpp_error (pfile
, DL_WARNING
,
1811 "the meaning of '\\x' is different in traditional C");
1814 cppchar_t i
= 0, overflow
= 0;
1815 int digits_found
= 0;
1823 overflow
|= i
^ (i
<< 4 >> 4);
1824 i
= (i
<< 4) + hex_digit_value (c
);
1829 cpp_error (pfile
, DL_ERROR
,
1830 "\\x used with no following hex digits");
1832 if (overflow
| (i
!= (i
& mask
)))
1834 cpp_error (pfile
, DL_PEDWARN
,
1835 "hex escape sequence out of range");
1842 case '0': case '1': case '2': case '3':
1843 case '4': case '5': case '6': case '7':
1846 cppchar_t i
= c
- '0';
1848 while (str
< limit
&& ++count
< 3)
1851 if (c
< '0' || c
> '7')
1854 i
= (i
<< 3) + c
- '0';
1857 if (i
!= (i
& mask
))
1859 cpp_error (pfile
, DL_PEDWARN
,
1860 "octal escape sequence out of range");
1875 cpp_error (pfile
, DL_PEDWARN
,
1876 "unknown escape sequence '\\%c'", (int) c
);
1878 cpp_error (pfile
, DL_PEDWARN
,
1879 "unknown escape sequence: '\\%03o'", (int) c
);
1884 cpp_error (pfile
, DL_PEDWARN
, "escape sequence out of range for its type");
1892 /* Interpret a (possibly wide) character constant in TOKEN.
1893 WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN
1894 points to a variable that is filled in with the number of
1895 characters seen, and UNSIGNEDP to a variable that indicates whether
1896 the result has signed type. */
1898 cpp_interpret_charconst (pfile
, token
, pchars_seen
, unsignedp
)
1900 const cpp_token
*token
;
1901 unsigned int *pchars_seen
;
1904 const unsigned char *str
= token
->val
.str
.text
;
1905 const unsigned char *limit
= str
+ token
->val
.str
.len
;
1906 unsigned int chars_seen
= 0;
1907 size_t width
, max_chars
;
1908 cppchar_t c
, mask
, result
= 0;
1911 #ifdef MULTIBYTE_CHARS
1912 (void) local_mbtowc (NULL
, NULL
, 0);
1915 /* Width in bits. */
1916 if (token
->type
== CPP_CHAR
)
1918 width
= CPP_OPTION (pfile
, char_precision
);
1919 max_chars
= CPP_OPTION (pfile
, int_precision
) / width
;
1920 unsigned_p
= CPP_OPTION (pfile
, unsigned_char
);
1924 width
= CPP_OPTION (pfile
, wchar_precision
);
1926 unsigned_p
= CPP_OPTION (pfile
, unsigned_wchar
);
1929 if (width
< BITS_PER_CPPCHAR_T
)
1930 mask
= ((cppchar_t
) 1 << width
) - 1;
1936 #ifdef MULTIBYTE_CHARS
1940 char_len
= local_mbtowc (&wc
, str
, limit
- str
);
1943 cpp_error (pfile
, DL_WARNING
,
1944 "ignoring invalid multibyte character");
1957 c
= cpp_parse_escape (pfile
, &str
, limit
, token
->type
== CPP_WCHAR
);
1959 #ifdef MAP_CHARACTER
1961 c
= MAP_CHARACTER (c
);
1966 /* Truncate the character, scale the result and merge the two. */
1968 if (width
< BITS_PER_CPPCHAR_T
)
1969 result
= (result
<< width
) | c
;
1974 if (chars_seen
== 0)
1975 cpp_error (pfile
, DL_ERROR
, "empty character constant");
1976 else if (chars_seen
> 1)
1978 /* Multichar charconsts are of type int and therefore signed. */
1981 if (chars_seen
> max_chars
)
1983 chars_seen
= max_chars
;
1984 cpp_error (pfile
, DL_WARNING
,
1985 "character constant too long for its type");
1987 else if (CPP_OPTION (pfile
, warn_multichar
))
1988 cpp_error (pfile
, DL_WARNING
, "multi-character character constant");
1991 /* Sign-extend or truncate the constant to cppchar_t. The value is
1992 in WIDTH bits, but for multi-char charconsts it's value is the
1993 full target type's width. */
1996 if (width
< BITS_PER_CPPCHAR_T
)
1998 mask
= ((cppchar_t
) 1 << width
) - 1;
1999 if (unsigned_p
|| !(result
& (1 << (width
- 1))))
2005 *pchars_seen
= chars_seen
;
2006 *unsignedp
= unsigned_p
;
2010 /* Memory buffers. Changing these three constants can have a dramatic
2011 effect on performance. The values here are reasonable defaults,
2012 but might be tuned. If you adjust them, be sure to test across a
2013 range of uses of cpplib, including heavy nested function-like macro
2014 expansion. Also check the change in peak memory usage (NJAMD is a
2015 good tool for this). */
2016 #define MIN_BUFF_SIZE 8000
2017 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
2018 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2019 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2021 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2022 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2025 /* Create a new allocation buffer. Place the control block at the end
2026 of the buffer, so that buffer overflows will cause immediate chaos. */
2032 unsigned char *base
;
2034 if (len
< MIN_BUFF_SIZE
)
2035 len
= MIN_BUFF_SIZE
;
2036 len
= CPP_ALIGN (len
);
2038 base
= xmalloc (len
+ sizeof (_cpp_buff
));
2039 result
= (_cpp_buff
*) (base
+ len
);
2040 result
->base
= base
;
2042 result
->limit
= base
+ len
;
2043 result
->next
= NULL
;
2047 /* Place a chain of unwanted allocation buffers on the free list. */
2049 _cpp_release_buff (pfile
, buff
)
2053 _cpp_buff
*end
= buff
;
2057 end
->next
= pfile
->free_buffs
;
2058 pfile
->free_buffs
= buff
;
2061 /* Return a free buffer of size at least MIN_SIZE. */
2063 _cpp_get_buff (pfile
, min_size
)
2067 _cpp_buff
*result
, **p
;
2069 for (p
= &pfile
->free_buffs
;; p
= &(*p
)->next
)
2074 return new_buff (min_size
);
2076 size
= result
->limit
- result
->base
;
2077 /* Return a buffer that's big enough, but don't waste one that's
2079 if (size
>= min_size
&& size
<= BUFF_SIZE_UPPER_BOUND (min_size
))
2084 result
->next
= NULL
;
2085 result
->cur
= result
->base
;
2089 /* Creates a new buffer with enough space to hold the uncommitted
2090 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
2091 the excess bytes to the new buffer. Chains the new buffer after
2092 BUFF, and returns the new buffer. */
2094 _cpp_append_extend_buff (pfile
, buff
, min_extra
)
2099 size_t size
= EXTENDED_BUFF_SIZE (buff
, min_extra
);
2100 _cpp_buff
*new_buff
= _cpp_get_buff (pfile
, size
);
2102 buff
->next
= new_buff
;
2103 memcpy (new_buff
->base
, buff
->cur
, BUFF_ROOM (buff
));
2107 /* Creates a new buffer with enough space to hold the uncommitted
2108 remaining bytes of the buffer pointed to by BUFF, and at least
2109 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
2110 Chains the new buffer before the buffer pointed to by BUFF, and
2111 updates the pointer to point to the new buffer. */
2113 _cpp_extend_buff (pfile
, pbuff
, min_extra
)
2118 _cpp_buff
*new_buff
, *old_buff
= *pbuff
;
2119 size_t size
= EXTENDED_BUFF_SIZE (old_buff
, min_extra
);
2121 new_buff
= _cpp_get_buff (pfile
, size
);
2122 memcpy (new_buff
->base
, old_buff
->cur
, BUFF_ROOM (old_buff
));
2123 new_buff
->next
= old_buff
;
2127 /* Free a chain of buffers starting at BUFF. */
2129 _cpp_free_buff (buff
)
2134 for (; buff
; buff
= next
)
2141 /* Allocate permanent, unaligned storage of length LEN. */
2143 _cpp_unaligned_alloc (pfile
, len
)
2147 _cpp_buff
*buff
= pfile
->u_buff
;
2148 unsigned char *result
= buff
->cur
;
2150 if (len
> (size_t) (buff
->limit
- result
))
2152 buff
= _cpp_get_buff (pfile
, len
);
2153 buff
->next
= pfile
->u_buff
;
2154 pfile
->u_buff
= buff
;
2158 buff
->cur
= result
+ len
;
2162 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2163 That buffer is used for growing allocations when saving macro
2164 replacement lists in a #define, and when parsing an answer to an
2165 assertion in #assert, #unassert or #if (and therefore possibly
2166 whilst expanding macros). It therefore must not be used by any
2167 code that they might call: specifically the lexer and the guts of
2170 All existing other uses clearly fit this restriction: storing
2171 registered pragmas during initialization. */
2173 _cpp_aligned_alloc (pfile
, len
)
2177 _cpp_buff
*buff
= pfile
->a_buff
;
2178 unsigned char *result
= buff
->cur
;
2180 if (len
> (size_t) (buff
->limit
- result
))
2182 buff
= _cpp_get_buff (pfile
, len
);
2183 buff
->next
= pfile
->a_buff
;
2184 pfile
->a_buff
= buff
;
2188 buff
->cur
= result
+ len
;