1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
28 #ifdef MULTIBYTE_CHARS
33 /* Tokens with SPELL_STRING store their spelling in the token list,
34 and it's length in the token->val.name.len. */
47 enum spell_type category
;
48 const unsigned char *name
;
51 static const unsigned char *const digraph_spellings
[] =
52 { U
"%:", U
"%:%:", U
"<:", U
":>", U
"<%", U
"%>" };
54 #define OP(e, s) { SPELL_OPERATOR, U s },
55 #define TK(e, s) { s, U STRINGX (e) },
56 static const struct token_spelling token_spellings
[N_TTYPES
] = { TTYPE_TABLE
};
60 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
61 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
62 #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
64 static void handle_newline
PARAMS ((cpp_reader
*));
65 static cppchar_t skip_escaped_newlines
PARAMS ((cpp_reader
*));
66 static cppchar_t get_effective_char
PARAMS ((cpp_reader
*));
68 static int skip_block_comment
PARAMS ((cpp_reader
*));
69 static int skip_line_comment
PARAMS ((cpp_reader
*));
70 static void adjust_column
PARAMS ((cpp_reader
*));
71 static int skip_whitespace
PARAMS ((cpp_reader
*, cppchar_t
));
72 static cpp_hashnode
*parse_identifier
PARAMS ((cpp_reader
*));
73 static uchar
*parse_slow
PARAMS ((cpp_reader
*, const uchar
*, int,
75 static void parse_number
PARAMS ((cpp_reader
*, cpp_string
*, int));
76 static int unescaped_terminator_p
PARAMS ((cpp_reader
*, const uchar
*));
77 static void parse_string
PARAMS ((cpp_reader
*, cpp_token
*, cppchar_t
));
78 static bool trigraph_p
PARAMS ((cpp_reader
*));
79 static void save_comment
PARAMS ((cpp_reader
*, cpp_token
*, const uchar
*,
81 static bool continue_after_nul
PARAMS ((cpp_reader
*));
82 static int name_p
PARAMS ((cpp_reader
*, const cpp_string
*));
83 static int maybe_read_ucs
PARAMS ((cpp_reader
*, const unsigned char **,
84 const unsigned char *, cppchar_t
*));
85 static tokenrun
*next_tokenrun
PARAMS ((tokenrun
*));
87 static unsigned int hex_digit_value
PARAMS ((unsigned int));
88 static _cpp_buff
*new_buff
PARAMS ((size_t));
92 Compares, the token TOKEN to the NUL-terminated string STRING.
93 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
95 cpp_ideq (token
, string
)
96 const cpp_token
*token
;
99 if (token
->type
!= CPP_NAME
)
102 return !ustrcmp (NODE_NAME (token
->val
.node
), (const uchar
*) string
);
105 /* Call when meeting a newline, assumed to be in buffer->cur[-1].
106 Returns with buffer->cur pointing to the character immediately
107 following the newline (combination). */
109 handle_newline (pfile
)
112 cpp_buffer
*buffer
= pfile
->buffer
;
114 /* Handle CR-LF and LF-CR. Most other implementations (e.g. java)
115 only accept CR-LF; maybe we should fall back to that behaviour? */
116 if (buffer
->cur
[-1] + buffer
->cur
[0] == '\r' + '\n')
119 buffer
->line_base
= buffer
->cur
;
120 buffer
->col_adjust
= 0;
124 /* Subroutine of skip_escaped_newlines; called when a 3-character
125 sequence beginning with "??" is encountered. buffer->cur points to
128 Warn if necessary, and returns true if the sequence forms a
129 trigraph and the trigraph should be honoured. */
134 cpp_buffer
*buffer
= pfile
->buffer
;
135 cppchar_t from_char
= buffer
->cur
[1];
138 if (!_cpp_trigraph_map
[from_char
])
141 accept
= CPP_OPTION (pfile
, trigraphs
);
143 /* Don't warn about trigraphs in comments. */
144 if (CPP_OPTION (pfile
, warn_trigraphs
) && !pfile
->state
.lexing_comment
)
147 cpp_error_with_line (pfile
, DL_WARNING
,
148 pfile
->line
, CPP_BUF_COL (buffer
) - 1,
149 "trigraph ??%c converted to %c",
151 (int) _cpp_trigraph_map
[from_char
]);
152 else if (buffer
->cur
!= buffer
->last_Wtrigraphs
)
154 buffer
->last_Wtrigraphs
= buffer
->cur
;
155 cpp_error_with_line (pfile
, DL_WARNING
,
156 pfile
->line
, CPP_BUF_COL (buffer
) - 1,
157 "trigraph ??%c ignored", (int) from_char
);
164 /* Skips any escaped newlines introduced by '?' or a '\\', assumed to
165 lie in buffer->cur[-1]. Returns the next byte, which will be in
166 buffer->cur[-1]. This routine performs preprocessing stages 1 and
167 2 of the ISO C standard. */
169 skip_escaped_newlines (pfile
)
172 cpp_buffer
*buffer
= pfile
->buffer
;
173 cppchar_t next
= buffer
->cur
[-1];
175 /* Only do this if we apply stages 1 and 2. */
176 if (!buffer
->from_stage3
)
178 const unsigned char *saved_cur
;
185 if (buffer
->cur
[0] != '?' || !trigraph_p (pfile
))
188 /* Translate the trigraph. */
189 next
= _cpp_trigraph_map
[buffer
->cur
[1]];
195 if (buffer
->cur
== buffer
->rlimit
)
198 /* We have a backslash, and room for at least one more
199 character. Skip horizontal whitespace. */
200 saved_cur
= buffer
->cur
;
202 next1
= *buffer
->cur
++;
203 while (is_nvspace (next1
) && buffer
->cur
< buffer
->rlimit
);
205 if (!is_vspace (next1
))
207 buffer
->cur
= saved_cur
;
211 if (saved_cur
!= buffer
->cur
- 1
212 && !pfile
->state
.lexing_comment
)
213 cpp_error (pfile
, DL_WARNING
,
214 "backslash and newline separated by space");
216 handle_newline (pfile
);
217 buffer
->backup_to
= buffer
->cur
;
218 if (buffer
->cur
== buffer
->rlimit
)
220 cpp_error (pfile
, DL_PEDWARN
,
221 "backslash-newline at end of file");
225 next
= *buffer
->cur
++;
227 while (next
== '\\' || next
== '?');
233 /* Obtain the next character, after trigraph conversion and skipping
234 an arbitrarily long string of escaped newlines. The common case of
235 no trigraphs or escaped newlines falls through quickly. On return,
236 buffer->backup_to points to where to return to if the character is
237 not to be processed. */
239 get_effective_char (pfile
)
243 cpp_buffer
*buffer
= pfile
->buffer
;
245 buffer
->backup_to
= buffer
->cur
;
246 next
= *buffer
->cur
++;
247 if (__builtin_expect (next
== '?' || next
== '\\', 0))
248 next
= skip_escaped_newlines (pfile
);
253 /* Skip a C-style block comment. We find the end of the comment by
254 seeing if an asterisk is before every '/' we encounter. Returns
255 non-zero if comment terminated by EOF, zero otherwise. */
257 skip_block_comment (pfile
)
260 cpp_buffer
*buffer
= pfile
->buffer
;
261 cppchar_t c
= EOF
, prevc
= EOF
;
263 pfile
->state
.lexing_comment
= 1;
264 while (buffer
->cur
!= buffer
->rlimit
)
266 prevc
= c
, c
= *buffer
->cur
++;
268 /* FIXME: For speed, create a new character class of characters
269 of interest inside block comments. */
270 if (c
== '?' || c
== '\\')
271 c
= skip_escaped_newlines (pfile
);
273 /* People like decorating comments with '*', so check for '/'
274 instead for efficiency. */
280 /* Warn about potential nested comments, but not if the '/'
281 comes immediately before the true comment delimiter.
282 Don't bother to get it right across escaped newlines. */
283 if (CPP_OPTION (pfile
, warn_comments
)
284 && buffer
->cur
[0] == '*' && buffer
->cur
[1] != '/')
285 cpp_error_with_line (pfile
, DL_WARNING
,
286 pfile
->line
, CPP_BUF_COL (buffer
),
287 "\"/*\" within comment");
289 else if (is_vspace (c
))
290 handle_newline (pfile
);
292 adjust_column (pfile
);
295 pfile
->state
.lexing_comment
= 0;
296 return c
!= '/' || prevc
!= '*';
299 /* Skip a C++ line comment, leaving buffer->cur pointing to the
300 terminating newline. Handles escaped newlines. Returns non-zero
301 if a multiline comment. */
303 skip_line_comment (pfile
)
306 cpp_buffer
*buffer
= pfile
->buffer
;
307 unsigned int orig_line
= pfile
->line
;
309 #ifdef MULTIBYTE_CHARS
314 pfile
->state
.lexing_comment
= 1;
315 #ifdef MULTIBYTE_CHARS
316 /* Reset multibyte conversion state. */
317 (void) local_mbtowc (NULL
, NULL
, 0);
321 if (buffer
->cur
== buffer
->rlimit
)
324 #ifdef MULTIBYTE_CHARS
325 char_len
= local_mbtowc (&wc
, (const char *) buffer
->cur
,
326 buffer
->rlimit
- buffer
->cur
);
329 cpp_error (pfile
, DL_WARNING
,
330 "ignoring invalid multibyte character");
336 buffer
->cur
+= char_len
;
342 if (c
== '?' || c
== '\\')
343 c
= skip_escaped_newlines (pfile
);
345 while (!is_vspace (c
));
347 /* Step back over the newline, except at EOF. */
351 pfile
->state
.lexing_comment
= 0;
352 return orig_line
!= pfile
->line
;
355 /* pfile->buffer->cur is one beyond the \t character. Update
356 col_adjust so we track the column correctly. */
358 adjust_column (pfile
)
361 cpp_buffer
*buffer
= pfile
->buffer
;
362 unsigned int col
= CPP_BUF_COL (buffer
) - 1; /* Zero-based column. */
364 /* Round it up to multiple of the tabstop, but subtract 1 since the
365 tab itself occupies a character position. */
366 buffer
->col_adjust
+= (CPP_OPTION (pfile
, tabstop
)
367 - col
% CPP_OPTION (pfile
, tabstop
)) - 1;
370 /* Skips whitespace, saving the next non-whitespace character.
371 Adjusts pfile->col_adjust to account for tabs. Without this,
372 tokens might be assigned an incorrect column. */
374 skip_whitespace (pfile
, c
)
378 cpp_buffer
*buffer
= pfile
->buffer
;
379 unsigned int warned
= 0;
383 /* Horizontal space always OK. */
387 adjust_column (pfile
);
388 /* Just \f \v or \0 left. */
391 if (buffer
->cur
- 1 == buffer
->rlimit
)
395 cpp_error (pfile
, DL_WARNING
, "null character(s) ignored");
399 else if (pfile
->state
.in_directive
&& CPP_PEDANTIC (pfile
))
400 cpp_error_with_line (pfile
, DL_PEDWARN
, pfile
->line
,
401 CPP_BUF_COL (buffer
),
402 "%s in preprocessing directive",
403 c
== '\f' ? "form feed" : "vertical tab");
407 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
408 while (is_nvspace (c
));
414 /* See if the characters of a number token are valid in a name (no
417 name_p (pfile
, string
)
419 const cpp_string
*string
;
423 for (i
= 0; i
< string
->len
; i
++)
424 if (!is_idchar (string
->text
[i
]))
430 /* Parse an identifier, skipping embedded backslash-newlines. This is
431 a critical inner loop. The common case is an identifier which has
432 not been split by backslash-newline, does not contain a dollar
433 sign, and has already been scanned (roughly 10:1 ratio of
434 seen:unseen identifiers in normal code; the distribution is
435 Poisson-like). Second most common case is a new identifier, not
436 split and no dollar sign. The other possibilities are rare and
437 have been relegated to parse_slow. */
438 static cpp_hashnode
*
439 parse_identifier (pfile
)
442 cpp_hashnode
*result
;
443 const uchar
*cur
, *base
;
445 /* Fast-path loop. Skim over a normal identifier.
446 N.B. ISIDNUM does not include $. */
447 cur
= pfile
->buffer
->cur
;
448 while (ISIDNUM (*cur
))
451 /* Check for slow-path cases. */
452 if (*cur
== '?' || *cur
== '\\' || *cur
== '$')
456 base
= parse_slow (pfile
, cur
, 0, &len
);
457 result
= (cpp_hashnode
*)
458 ht_lookup (pfile
->hash_table
, base
, len
, HT_ALLOCED
);
462 base
= pfile
->buffer
->cur
- 1;
463 pfile
->buffer
->cur
= cur
;
464 result
= (cpp_hashnode
*)
465 ht_lookup (pfile
->hash_table
, base
, cur
- base
, HT_ALLOC
);
468 /* Rarely, identifiers require diagnostics when lexed.
469 XXX Has to be forced out of the fast path. */
470 if (__builtin_expect ((result
->flags
& NODE_DIAGNOSTIC
)
471 && !pfile
->state
.skipping
, 0))
473 /* It is allowed to poison the same identifier twice. */
474 if ((result
->flags
& NODE_POISONED
) && !pfile
->state
.poisoned_ok
)
475 cpp_error (pfile
, DL_ERROR
, "attempt to use poisoned \"%s\"",
478 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
479 replacement list of a variadic macro. */
480 if (result
== pfile
->spec_nodes
.n__VA_ARGS__
481 && !pfile
->state
.va_args_ok
)
482 cpp_error (pfile
, DL_PEDWARN
,
483 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
489 /* Slow path. This handles numbers and identifiers which have been
490 split, or contain dollar signs. The part of the token from
491 PFILE->buffer->cur-1 to CUR has already been scanned. NUMBER_P is
492 1 if it's a number, and 2 if it has a leading period. Returns a
493 pointer to the token's NUL-terminated spelling in permanent
494 storage, and sets PLEN to its length. */
496 parse_slow (pfile
, cur
, number_p
, plen
)
502 cpp_buffer
*buffer
= pfile
->buffer
;
503 const uchar
*base
= buffer
->cur
- 1;
504 struct obstack
*stack
= &pfile
->hash_table
->stack
;
505 unsigned int c
, prevc
, saw_dollar
= 0;
507 /* Place any leading period. */
509 obstack_1grow (stack
, '.');
511 /* Copy the part of the token which is known to be okay. */
512 obstack_grow (stack
, base
, cur
- base
);
514 /* Now process the part which isn't. We are looking at one of
515 '$', '\\', or '?' on entry to this loop. */
521 /* Potential escaped newline? */
522 buffer
->backup_to
= buffer
->cur
- 1;
523 if (c
== '?' || c
== '\\')
524 c
= skip_escaped_newlines (pfile
);
530 if (c
!= '.' && !VALID_SIGN (c
, prevc
))
534 /* Handle normal identifier characters in this loop. */
538 obstack_1grow (stack
, c
);
545 while (is_idchar (c
));
548 /* Step back over the unwanted char. */
551 /* $ is not an identifier character in the standard, but is commonly
552 accepted as an extension. Don't warn about it in skipped
553 conditional blocks. */
554 if (saw_dollar
&& CPP_PEDANTIC (pfile
) && ! pfile
->state
.skipping
)
555 cpp_error (pfile
, DL_PEDWARN
, "'$' character(s) in identifier or number");
557 /* Identifiers and numbers are null-terminated. */
558 *plen
= obstack_object_size (stack
);
559 obstack_1grow (stack
, '\0');
560 return obstack_finish (stack
);
563 /* Parse a number, beginning with character C, skipping embedded
564 backslash-newlines. LEADING_PERIOD is non-zero if there was a "."
565 before C. Place the result in NUMBER. */
567 parse_number (pfile
, number
, leading_period
)
574 /* Fast-path loop. Skim over a normal number.
575 N.B. ISIDNUM does not include $. */
576 cur
= pfile
->buffer
->cur
;
577 while (ISIDNUM (*cur
) || *cur
== '.' || VALID_SIGN (*cur
, cur
[-1]))
580 /* Check for slow-path cases. */
581 if (*cur
== '?' || *cur
== '\\' || *cur
== '$')
582 number
->text
= parse_slow (pfile
, cur
, 1 + leading_period
, &number
->len
);
585 const uchar
*base
= pfile
->buffer
->cur
- 1;
588 number
->len
= cur
- base
+ leading_period
;
589 dest
= _cpp_unaligned_alloc (pfile
, number
->len
+ 1);
590 dest
[number
->len
] = '\0';
595 memcpy (dest
, base
, cur
- base
);
596 pfile
->buffer
->cur
= cur
;
600 /* Subroutine of parse_string. */
602 unescaped_terminator_p (pfile
, dest
)
604 const unsigned char *dest
;
606 const unsigned char *start
, *temp
;
608 /* In #include-style directives, terminators are not escapeable. */
609 if (pfile
->state
.angled_headers
)
612 start
= BUFF_FRONT (pfile
->u_buff
);
614 /* An odd number of consecutive backslashes represents an escaped
616 for (temp
= dest
; temp
> start
&& temp
[-1] == '\\'; temp
--)
619 return ((dest
- temp
) & 1) == 0;
622 /* Parses a string, character constant, or angle-bracketed header file
623 name. Handles embedded trigraphs and escaped newlines. The stored
624 string is guaranteed NUL-terminated, but it is not guaranteed that
625 this is the first NUL since embedded NULs are preserved.
627 When this function returns, buffer->cur points to the next
628 character to be processed. */
630 parse_string (pfile
, token
, terminator
)
633 cppchar_t terminator
;
635 cpp_buffer
*buffer
= pfile
->buffer
;
636 unsigned char *dest
, *limit
;
638 bool warned_nulls
= false;
639 #ifdef MULTIBYTE_CHARS
644 dest
= BUFF_FRONT (pfile
->u_buff
);
645 limit
= BUFF_LIMIT (pfile
->u_buff
);
647 #ifdef MULTIBYTE_CHARS
648 /* Reset multibyte conversion state. */
649 (void) local_mbtowc (NULL
, NULL
, 0);
653 /* We need room for another char, possibly the terminating NUL. */
654 if ((size_t) (limit
- dest
) < 1)
656 size_t len_so_far
= dest
- BUFF_FRONT (pfile
->u_buff
);
657 _cpp_extend_buff (pfile
, &pfile
->u_buff
, 2);
658 dest
= BUFF_FRONT (pfile
->u_buff
) + len_so_far
;
659 limit
= BUFF_LIMIT (pfile
->u_buff
);
662 #ifdef MULTIBYTE_CHARS
663 char_len
= local_mbtowc (&wc
, (const char *) buffer
->cur
,
664 buffer
->rlimit
- buffer
->cur
);
667 cpp_error (pfile
, DL_WARNING
,
668 "ignoring invalid multibyte character");
674 buffer
->cur
+= char_len
;
681 /* Handle trigraphs, escaped newlines etc. */
682 if (c
== '?' || c
== '\\')
683 c
= skip_escaped_newlines (pfile
);
687 if (unescaped_terminator_p (pfile
, dest
))
690 else if (is_vspace (c
))
692 /* No string literal may extend over multiple lines. In
693 assembly language, suppress the error except for <>
694 includes. This is a kludge around not knowing where
697 if (CPP_OPTION (pfile
, lang
) != CLK_ASM
|| terminator
== '>')
698 cpp_error (pfile
, DL_ERROR
, "missing terminating %c character",
705 if (buffer
->cur
- 1 == buffer
->rlimit
)
710 cpp_error (pfile
, DL_WARNING
,
711 "null character(s) preserved in literal");
714 #ifdef MULTIBYTE_CHARS
717 for ( ; char_len
> 0; --char_len
)
718 *dest
++ = (*buffer
->cur
- char_len
);
727 token
->val
.str
.text
= BUFF_FRONT (pfile
->u_buff
);
728 token
->val
.str
.len
= dest
- BUFF_FRONT (pfile
->u_buff
);
729 BUFF_FRONT (pfile
->u_buff
) = dest
+ 1;
732 /* The stored comment includes the comment start and any terminator. */
734 save_comment (pfile
, token
, from
, type
)
737 const unsigned char *from
;
740 unsigned char *buffer
;
741 unsigned int len
, clen
;
743 len
= pfile
->buffer
->cur
- from
+ 1; /* + 1 for the initial '/'. */
745 /* C++ comments probably (not definitely) have moved past a new
746 line, which we don't want to save in the comment. */
747 if (is_vspace (pfile
->buffer
->cur
[-1]))
750 /* If we are currently in a directive, then we need to store all
751 C++ comments as C comments internally, and so we need to
752 allocate a little extra space in that case.
754 Note that the only time we encounter a directive here is
755 when we are saving comments in a "#define". */
756 clen
= (pfile
->state
.in_directive
&& type
== '/') ? len
+ 2 : len
;
758 buffer
= _cpp_unaligned_alloc (pfile
, clen
);
760 token
->type
= CPP_COMMENT
;
761 token
->val
.str
.len
= clen
;
762 token
->val
.str
.text
= buffer
;
765 memcpy (buffer
+ 1, from
, len
- 1);
767 /* Finish conversion to a C comment, if necessary. */
768 if (pfile
->state
.in_directive
&& type
== '/')
771 buffer
[clen
- 2] = '*';
772 buffer
[clen
- 1] = '/';
776 /* Allocate COUNT tokens for RUN. */
778 _cpp_init_tokenrun (run
, count
)
782 run
->base
= xnewvec (cpp_token
, count
);
783 run
->limit
= run
->base
+ count
;
787 /* Returns the next tokenrun, or creates one if there is none. */
792 if (run
->next
== NULL
)
794 run
->next
= xnew (tokenrun
);
795 run
->next
->prev
= run
;
796 _cpp_init_tokenrun (run
->next
, 250);
802 /* Allocate a single token that is invalidated at the same time as the
803 rest of the tokens on the line. Has its line and col set to the
804 same as the last lexed token, so that diagnostics appear in the
807 _cpp_temp_token (pfile
)
810 cpp_token
*old
, *result
;
812 old
= pfile
->cur_token
- 1;
813 if (pfile
->cur_token
== pfile
->cur_run
->limit
)
815 pfile
->cur_run
= next_tokenrun (pfile
->cur_run
);
816 pfile
->cur_token
= pfile
->cur_run
->base
;
819 result
= pfile
->cur_token
++;
820 result
->line
= old
->line
;
821 result
->col
= old
->col
;
825 /* Lex a token into RESULT (external interface). Takes care of issues
826 like directive handling, token lookahead, multiple include
827 optimization and skipping. */
829 _cpp_lex_token (pfile
)
836 if (pfile
->cur_token
== pfile
->cur_run
->limit
)
838 pfile
->cur_run
= next_tokenrun (pfile
->cur_run
);
839 pfile
->cur_token
= pfile
->cur_run
->base
;
842 if (pfile
->lookaheads
)
845 result
= pfile
->cur_token
++;
848 result
= _cpp_lex_direct (pfile
);
850 if (result
->flags
& BOL
)
852 /* Is this a directive. If _cpp_handle_directive returns
853 false, it is an assembler #. */
854 if (result
->type
== CPP_HASH
855 /* 6.10.3 p 11: Directives in a list of macro arguments
856 gives undefined behavior. This implementation
857 handles the directive as normal. */
858 && pfile
->state
.parsing_args
!= 1
859 && _cpp_handle_directive (pfile
, result
->flags
& PREV_WHITE
))
861 if (pfile
->cb
.line_change
&& !pfile
->state
.skipping
)
862 (*pfile
->cb
.line_change
)(pfile
, result
, pfile
->state
.parsing_args
);
865 /* We don't skip tokens in directives. */
866 if (pfile
->state
.in_directive
)
869 /* Outside a directive, invalidate controlling macros. At file
870 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
871 get here and MI optimisation works. */
872 pfile
->mi_valid
= false;
874 if (!pfile
->state
.skipping
|| result
->type
== CPP_EOF
)
881 /* A NUL terminates the current buffer. For ISO preprocessing this is
882 EOF, but for traditional preprocessing it indicates we need a line
883 refill. Returns TRUE to continue preprocessing a new buffer, FALSE
884 to return a CPP_EOF to the caller. */
886 continue_after_nul (pfile
)
889 cpp_buffer
*buffer
= pfile
->buffer
;
892 buffer
->saved_flags
= BOL
;
893 if (CPP_OPTION (pfile
, traditional
))
894 more
= _cpp_read_logical_line_trad (pfile
);
897 /* Stop parsing arguments with a CPP_EOF. When we finally come
898 back here, do the work of popping the buffer. */
899 if (!pfile
->state
.parsing_args
)
901 if (buffer
->cur
!= buffer
->line_base
)
903 /* Non-empty files should end in a newline. Don't warn
904 for command line and _Pragma buffers. */
905 if (!buffer
->from_stage3
)
906 cpp_error (pfile
, DL_PEDWARN
, "no newline at end of file");
907 handle_newline (pfile
);
910 /* Similarly, finish an in-progress directive with CPP_EOF
911 before popping the buffer. */
912 if (!pfile
->state
.in_directive
&& buffer
->prev
)
914 more
= !buffer
->return_at_eof
;
915 _cpp_pop_buffer (pfile
);
923 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
925 if (get_effective_char (pfile) == CHAR) \
926 result->type = THEN_TYPE; \
930 result->type = ELSE_TYPE; \
934 /* Lex a token into pfile->cur_token, which is also incremented, to
935 get diagnostics pointing to the correct location.
937 Does not handle issues such as token lookahead, multiple-include
938 optimisation, directives, skipping etc. This function is only
939 suitable for use by _cpp_lex_token, and in special cases like
940 lex_expansion_token which doesn't care for any of these issues.
942 When meeting a newline, returns CPP_EOF if parsing a directive,
943 otherwise returns to the start of the token buffer if permissible.
944 Returns the location of the lexed token. */
946 _cpp_lex_direct (pfile
)
951 const unsigned char *comment_start
;
952 cpp_token
*result
= pfile
->cur_token
++;
955 buffer
= pfile
->buffer
;
956 result
->flags
= buffer
->saved_flags
;
957 buffer
->saved_flags
= 0;
959 result
->line
= pfile
->line
;
963 result
->col
= CPP_BUF_COLUMN (buffer
, buffer
->cur
);
968 case ' ': case '\t': case '\f': case '\v': case '\0':
969 result
->flags
|= PREV_WHITE
;
970 if (skip_whitespace (pfile
, c
))
975 if (continue_after_nul (pfile
))
977 result
->type
= CPP_EOF
;
980 case '\n': case '\r':
981 handle_newline (pfile
);
982 buffer
->saved_flags
= BOL
;
983 if (! pfile
->state
.in_directive
)
985 if (pfile
->state
.parsing_args
== 2)
986 buffer
->saved_flags
|= PREV_WHITE
;
987 if (!pfile
->keep_tokens
)
989 pfile
->cur_run
= &pfile
->base_run
;
990 result
= pfile
->base_run
.base
;
991 pfile
->cur_token
= result
+ 1;
995 result
->type
= CPP_EOF
;
1000 /* These could start an escaped newline, or '?' a trigraph. Let
1001 skip_escaped_newlines do all the work. */
1003 unsigned int line
= pfile
->line
;
1005 c
= skip_escaped_newlines (pfile
);
1006 if (line
!= pfile
->line
)
1009 /* We had at least one escaped newline of some sort.
1010 Update the token's line and column. */
1011 goto update_tokens_line
;
1015 /* We are either the original '?' or '\\', or a trigraph. */
1017 result
->type
= CPP_QUERY
;
1024 case '0': case '1': case '2': case '3': case '4':
1025 case '5': case '6': case '7': case '8': case '9':
1026 result
->type
= CPP_NUMBER
;
1027 parse_number (pfile
, &result
->val
.str
, 0);
1031 /* 'L' may introduce wide characters or strings. */
1033 const unsigned char *pos
= buffer
->cur
;
1035 c
= get_effective_char (pfile
);
1036 if (c
== '\'' || c
== '"')
1038 result
->type
= (c
== '"' ? CPP_WSTRING
: CPP_WCHAR
);
1039 parse_string (pfile
, result
, c
);
1048 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1049 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1050 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1051 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1053 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1054 case 'G': case 'H': case 'I': case 'J': case 'K':
1055 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1056 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1058 result
->type
= CPP_NAME
;
1059 result
->val
.node
= parse_identifier (pfile
);
1061 /* Convert named operators to their proper types. */
1062 if (result
->val
.node
->flags
& NODE_OPERATOR
)
1064 result
->flags
|= NAMED_OP
;
1065 result
->type
= result
->val
.node
->value
.operator;
1071 result
->type
= c
== '"' ? CPP_STRING
: CPP_CHAR
;
1072 parse_string (pfile
, result
, c
);
1076 /* A potential block or line comment. */
1077 comment_start
= buffer
->cur
;
1078 c
= get_effective_char (pfile
);
1082 if (skip_block_comment (pfile
))
1083 cpp_error (pfile
, DL_ERROR
, "unterminated comment");
1085 else if (c
== '/' && (CPP_OPTION (pfile
, cplusplus_comments
)
1086 || CPP_IN_SYSTEM_HEADER (pfile
)))
1088 /* Warn about comments only if pedantically GNUC89, and not
1089 in system headers. */
1090 if (CPP_OPTION (pfile
, lang
) == CLK_GNUC89
&& CPP_PEDANTIC (pfile
)
1091 && ! buffer
->warned_cplusplus_comments
)
1093 cpp_error (pfile
, DL_PEDWARN
,
1094 "C++ style comments are not allowed in ISO C89");
1095 cpp_error (pfile
, DL_PEDWARN
,
1096 "(this will be reported only once per input file)");
1097 buffer
->warned_cplusplus_comments
= 1;
1100 if (skip_line_comment (pfile
) && CPP_OPTION (pfile
, warn_comments
))
1101 cpp_error (pfile
, DL_WARNING
, "multi-line comment");
1105 result
->type
= CPP_DIV_EQ
;
1111 result
->type
= CPP_DIV
;
1115 if (!pfile
->state
.save_comments
)
1117 result
->flags
|= PREV_WHITE
;
1118 goto update_tokens_line
;
1121 /* Save the comment as a token in its own right. */
1122 save_comment (pfile
, result
, comment_start
, c
);
1126 if (pfile
->state
.angled_headers
)
1128 result
->type
= CPP_HEADER_NAME
;
1129 parse_string (pfile
, result
, '>');
1133 c
= get_effective_char (pfile
);
1135 result
->type
= CPP_LESS_EQ
;
1137 IF_NEXT_IS ('=', CPP_LSHIFT_EQ
, CPP_LSHIFT
);
1138 else if (c
== '?' && CPP_OPTION (pfile
, cplusplus
))
1139 IF_NEXT_IS ('=', CPP_MIN_EQ
, CPP_MIN
);
1140 else if (c
== ':' && CPP_OPTION (pfile
, digraphs
))
1142 result
->type
= CPP_OPEN_SQUARE
;
1143 result
->flags
|= DIGRAPH
;
1145 else if (c
== '%' && CPP_OPTION (pfile
, digraphs
))
1147 result
->type
= CPP_OPEN_BRACE
;
1148 result
->flags
|= DIGRAPH
;
1153 result
->type
= CPP_LESS
;
1158 c
= get_effective_char (pfile
);
1160 result
->type
= CPP_GREATER_EQ
;
1162 IF_NEXT_IS ('=', CPP_RSHIFT_EQ
, CPP_RSHIFT
);
1163 else if (c
== '?' && CPP_OPTION (pfile
, cplusplus
))
1164 IF_NEXT_IS ('=', CPP_MAX_EQ
, CPP_MAX
);
1168 result
->type
= CPP_GREATER
;
1173 c
= get_effective_char (pfile
);
1175 result
->type
= CPP_MOD_EQ
;
1176 else if (CPP_OPTION (pfile
, digraphs
) && c
== ':')
1178 result
->flags
|= DIGRAPH
;
1179 result
->type
= CPP_HASH
;
1180 if (get_effective_char (pfile
) == '%')
1182 const unsigned char *pos
= buffer
->cur
;
1184 if (get_effective_char (pfile
) == ':')
1185 result
->type
= CPP_PASTE
;
1187 buffer
->cur
= pos
- 1;
1192 else if (CPP_OPTION (pfile
, digraphs
) && c
== '>')
1194 result
->flags
|= DIGRAPH
;
1195 result
->type
= CPP_CLOSE_BRACE
;
1200 result
->type
= CPP_MOD
;
1205 result
->type
= CPP_DOT
;
1206 c
= get_effective_char (pfile
);
1209 const unsigned char *pos
= buffer
->cur
;
1211 if (get_effective_char (pfile
) == '.')
1212 result
->type
= CPP_ELLIPSIS
;
1214 buffer
->cur
= pos
- 1;
1216 /* All known character sets have 0...9 contiguous. */
1217 else if (ISDIGIT (c
))
1219 result
->type
= CPP_NUMBER
;
1220 parse_number (pfile
, &result
->val
.str
, 1);
1222 else if (c
== '*' && CPP_OPTION (pfile
, cplusplus
))
1223 result
->type
= CPP_DOT_STAR
;
1229 c
= get_effective_char (pfile
);
1231 result
->type
= CPP_PLUS_PLUS
;
1233 result
->type
= CPP_PLUS_EQ
;
1237 result
->type
= CPP_PLUS
;
1242 c
= get_effective_char (pfile
);
1245 result
->type
= CPP_DEREF
;
1246 if (CPP_OPTION (pfile
, cplusplus
))
1248 if (get_effective_char (pfile
) == '*')
1249 result
->type
= CPP_DEREF_STAR
;
1255 result
->type
= CPP_MINUS_MINUS
;
1257 result
->type
= CPP_MINUS_EQ
;
1261 result
->type
= CPP_MINUS
;
1266 c
= get_effective_char (pfile
);
1268 result
->type
= CPP_AND_AND
;
1270 result
->type
= CPP_AND_EQ
;
1274 result
->type
= CPP_AND
;
1279 c
= get_effective_char (pfile
);
1281 result
->type
= CPP_OR_OR
;
1283 result
->type
= CPP_OR_EQ
;
1287 result
->type
= CPP_OR
;
1292 c
= get_effective_char (pfile
);
1293 if (c
== ':' && CPP_OPTION (pfile
, cplusplus
))
1294 result
->type
= CPP_SCOPE
;
1295 else if (c
== '>' && CPP_OPTION (pfile
, digraphs
))
1297 result
->flags
|= DIGRAPH
;
1298 result
->type
= CPP_CLOSE_SQUARE
;
1303 result
->type
= CPP_COLON
;
1307 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ
, CPP_MULT
); break;
1308 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ
, CPP_EQ
); break;
1309 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ
, CPP_NOT
); break;
1310 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ
, CPP_XOR
); break;
1311 case '#': IF_NEXT_IS ('#', CPP_PASTE
, CPP_HASH
); break;
1313 case '~': result
->type
= CPP_COMPL
; break;
1314 case ',': result
->type
= CPP_COMMA
; break;
1315 case '(': result
->type
= CPP_OPEN_PAREN
; break;
1316 case ')': result
->type
= CPP_CLOSE_PAREN
; break;
1317 case '[': result
->type
= CPP_OPEN_SQUARE
; break;
1318 case ']': result
->type
= CPP_CLOSE_SQUARE
; break;
1319 case '{': result
->type
= CPP_OPEN_BRACE
; break;
1320 case '}': result
->type
= CPP_CLOSE_BRACE
; break;
1321 case ';': result
->type
= CPP_SEMICOLON
; break;
1323 /* @ is a punctuator in Objective C. */
1324 case '@': result
->type
= CPP_ATSIGN
; break;
1327 if (CPP_OPTION (pfile
, dollars_in_ident
))
1329 /* Fall through... */
1333 result
->type
= CPP_OTHER
;
1341 /* An upper bound on the number of bytes needed to spell TOKEN,
1342 including preceding whitespace. */
1344 cpp_token_len (token
)
1345 const cpp_token
*token
;
1349 switch (TOKEN_SPELL (token
))
1351 default: len
= 0; break;
1353 case SPELL_STRING
: len
= token
->val
.str
.len
; break;
1354 case SPELL_IDENT
: len
= NODE_LEN (token
->val
.node
); break;
1356 /* 1 for whitespace, 4 for comment delimiters. */
1360 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1361 already contain the enough space to hold the token's spelling.
1362 Returns a pointer to the character after the last character
1365 cpp_spell_token (pfile
, token
, buffer
)
1366 cpp_reader
*pfile
; /* Would be nice to be rid of this... */
1367 const cpp_token
*token
;
1368 unsigned char *buffer
;
1370 switch (TOKEN_SPELL (token
))
1372 case SPELL_OPERATOR
:
1374 const unsigned char *spelling
;
1377 if (token
->flags
& DIGRAPH
)
1379 = digraph_spellings
[(int) token
->type
- (int) CPP_FIRST_DIGRAPH
];
1380 else if (token
->flags
& NAMED_OP
)
1383 spelling
= TOKEN_NAME (token
);
1385 while ((c
= *spelling
++) != '\0')
1391 *buffer
++ = token
->val
.c
;
1396 memcpy (buffer
, NODE_NAME (token
->val
.node
), NODE_LEN (token
->val
.node
));
1397 buffer
+= NODE_LEN (token
->val
.node
);
1401 memcpy (buffer
, token
->val
.str
.text
, token
->val
.str
.len
);
1402 buffer
+= token
->val
.str
.len
;
1407 int left
, right
, tag
;
1408 switch (token
->type
)
1410 case CPP_STRING
: left
= '"'; right
= '"'; tag
= '\0'; break;
1411 case CPP_WSTRING
: left
= '"'; right
= '"'; tag
= 'L'; break;
1412 case CPP_CHAR
: left
= '\''; right
= '\''; tag
= '\0'; break;
1413 case CPP_WCHAR
: left
= '\''; right
= '\''; tag
= 'L'; break;
1414 case CPP_HEADER_NAME
: left
= '<'; right
= '>'; tag
= '\0'; break;
1416 cpp_error (pfile
, DL_ICE
, "unknown string token %s\n",
1417 TOKEN_NAME (token
));
1420 if (tag
) *buffer
++ = tag
;
1422 memcpy (buffer
, token
->val
.str
.text
, token
->val
.str
.len
);
1423 buffer
+= token
->val
.str
.len
;
1429 cpp_error (pfile
, DL_ICE
, "unspellable token %s", TOKEN_NAME (token
));
1436 /* Returns TOKEN spelt as a null-terminated string. The string is
1437 freed when the reader is destroyed. Useful for diagnostics. */
1439 cpp_token_as_text (pfile
, token
)
1441 const cpp_token
*token
;
1443 unsigned int len
= cpp_token_len (token
);
1444 unsigned char *start
= _cpp_unaligned_alloc (pfile
, len
), *end
;
1446 end
= cpp_spell_token (pfile
, token
, start
);
1452 /* Used by C front ends, which really should move to using
1453 cpp_token_as_text. */
1455 cpp_type2name (type
)
1456 enum cpp_ttype type
;
1458 return (const char *) token_spellings
[type
].name
;
1461 /* Writes the spelling of token to FP, without any preceding space.
1462 Separated from cpp_spell_token for efficiency - to avoid stdio
1463 double-buffering. */
1465 cpp_output_token (token
, fp
)
1466 const cpp_token
*token
;
1469 switch (TOKEN_SPELL (token
))
1471 case SPELL_OPERATOR
:
1473 const unsigned char *spelling
;
1476 if (token
->flags
& DIGRAPH
)
1478 = digraph_spellings
[(int) token
->type
- (int) CPP_FIRST_DIGRAPH
];
1479 else if (token
->flags
& NAMED_OP
)
1482 spelling
= TOKEN_NAME (token
);
1487 while ((c
= *++spelling
) != '\0');
1492 putc (token
->val
.c
, fp
);
1497 fwrite (NODE_NAME (token
->val
.node
), 1, NODE_LEN (token
->val
.node
), fp
);
1501 fwrite (token
->val
.str
.text
, 1, token
->val
.str
.len
, fp
);
1506 int left
, right
, tag
;
1507 switch (token
->type
)
1509 case CPP_STRING
: left
= '"'; right
= '"'; tag
= '\0'; break;
1510 case CPP_WSTRING
: left
= '"'; right
= '"'; tag
= 'L'; break;
1511 case CPP_CHAR
: left
= '\''; right
= '\''; tag
= '\0'; break;
1512 case CPP_WCHAR
: left
= '\''; right
= '\''; tag
= 'L'; break;
1513 case CPP_HEADER_NAME
: left
= '<'; right
= '>'; tag
= '\0'; break;
1515 fprintf (stderr
, "impossible STRING token %s\n", TOKEN_NAME (token
));
1518 if (tag
) putc (tag
, fp
);
1520 fwrite (token
->val
.str
.text
, 1, token
->val
.str
.len
, fp
);
1526 /* An error, most probably. */
1531 /* Compare two tokens. */
1533 _cpp_equiv_tokens (a
, b
)
1534 const cpp_token
*a
, *b
;
1536 if (a
->type
== b
->type
&& a
->flags
== b
->flags
)
1537 switch (TOKEN_SPELL (a
))
1539 default: /* Keep compiler happy. */
1540 case SPELL_OPERATOR
:
1543 return a
->val
.c
== b
->val
.c
; /* Character. */
1545 return (a
->type
!= CPP_MACRO_ARG
|| a
->val
.arg_no
== b
->val
.arg_no
);
1547 return a
->val
.node
== b
->val
.node
;
1550 return (a
->val
.str
.len
== b
->val
.str
.len
1551 && !memcmp (a
->val
.str
.text
, b
->val
.str
.text
,
1558 /* Returns nonzero if a space should be inserted to avoid an
1559 accidental token paste for output. For simplicity, it is
1560 conservative, and occasionally advises a space where one is not
1561 needed, e.g. "." and ".2". */
1563 cpp_avoid_paste (pfile
, token1
, token2
)
1565 const cpp_token
*token1
, *token2
;
1567 enum cpp_ttype a
= token1
->type
, b
= token2
->type
;
1570 if (token1
->flags
& NAMED_OP
)
1572 if (token2
->flags
& NAMED_OP
)
1576 if (token2
->flags
& DIGRAPH
)
1577 c
= digraph_spellings
[(int) b
- (int) CPP_FIRST_DIGRAPH
][0];
1578 else if (token_spellings
[b
].category
== SPELL_OPERATOR
)
1579 c
= token_spellings
[b
].name
[0];
1581 /* Quickly get everything that can paste with an '='. */
1582 if ((int) a
<= (int) CPP_LAST_EQ
&& c
== '=')
1587 case CPP_GREATER
: return c
== '>' || c
== '?';
1588 case CPP_LESS
: return c
== '<' || c
== '?' || c
== '%' || c
== ':';
1589 case CPP_PLUS
: return c
== '+';
1590 case CPP_MINUS
: return c
== '-' || c
== '>';
1591 case CPP_DIV
: return c
== '/' || c
== '*'; /* Comments. */
1592 case CPP_MOD
: return c
== ':' || c
== '>';
1593 case CPP_AND
: return c
== '&';
1594 case CPP_OR
: return c
== '|';
1595 case CPP_COLON
: return c
== ':' || c
== '>';
1596 case CPP_DEREF
: return c
== '*';
1597 case CPP_DOT
: return c
== '.' || c
== '%' || b
== CPP_NUMBER
;
1598 case CPP_HASH
: return c
== '#' || c
== '%'; /* Digraph form. */
1599 case CPP_NAME
: return ((b
== CPP_NUMBER
1600 && name_p (pfile
, &token2
->val
.str
))
1602 || b
== CPP_CHAR
|| b
== CPP_STRING
); /* L */
1603 case CPP_NUMBER
: return (b
== CPP_NUMBER
|| b
== CPP_NAME
1604 || c
== '.' || c
== '+' || c
== '-');
1605 case CPP_OTHER
: return (CPP_OPTION (pfile
, objc
)
1606 && token1
->val
.c
== '@'
1607 && (b
== CPP_NAME
|| b
== CPP_STRING
));
1614 /* Output all the remaining tokens on the current line, and a newline
1615 character, to FP. Leading whitespace is removed. If there are
1616 macros, special token padding is not performed. */
1618 cpp_output_line (pfile
, fp
)
1622 const cpp_token
*token
;
1624 token
= cpp_get_token (pfile
);
1625 while (token
->type
!= CPP_EOF
)
1627 cpp_output_token (token
, fp
);
1628 token
= cpp_get_token (pfile
);
1629 if (token
->flags
& PREV_WHITE
)
1636 /* Returns the value of a hexadecimal digit. */
1642 return hex_value (c
);
1647 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1648 failure if cpplib is not parsing C++ or C99. Such failure is
1649 silent, and no variables are updated. Otherwise returns 0, and
1650 warns if -Wtraditional.
1652 [lex.charset]: The character designated by the universal character
1653 name \UNNNNNNNN is that character whose character short name in
1654 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1655 universal character name \uNNNN is that character whose character
1656 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1657 for a universal character name is less than 0x20 or in the range
1658 0x7F-0x9F (inclusive), or if the universal character name
1659 designates a character in the basic source character set, then the
1660 program is ill-formed.
1662 We assume that wchar_t is Unicode, so we don't need to do any
1663 mapping. Is this ever wrong?
1665 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1666 LIMIT is the end of the string or charconst. PSTR is updated to
1667 point after the UCS on return, and the UCS is written into PC. */
1670 maybe_read_ucs (pfile
, pstr
, limit
, pc
)
1672 const unsigned char **pstr
;
1673 const unsigned char *limit
;
1676 const unsigned char *p
= *pstr
;
1677 unsigned int code
= 0;
1678 unsigned int c
= *pc
, length
;
1680 /* Only attempt to interpret a UCS for C++ and C99. */
1681 if (! (CPP_OPTION (pfile
, cplusplus
) || CPP_OPTION (pfile
, c99
)))
1684 if (CPP_WTRADITIONAL (pfile
))
1685 cpp_error (pfile
, DL_WARNING
,
1686 "the meaning of '\\%c' is different in traditional C", c
);
1688 length
= (c
== 'u' ? 4: 8);
1690 if ((size_t) (limit
- p
) < length
)
1692 cpp_error (pfile
, DL_ERROR
, "incomplete universal-character-name");
1693 /* Skip to the end to avoid more diagnostics. */
1698 for (; length
; length
--, p
++)
1702 code
= (code
<< 4) + hex_digit_value (c
);
1705 cpp_error (pfile
, DL_ERROR
,
1706 "non-hex digit '%c' in universal-character-name", c
);
1707 /* We shouldn't skip in case there are multibyte chars. */
1713 #ifdef TARGET_EBCDIC
1714 cpp_error (pfile
, DL_ERROR
, "universal-character-name on EBCDIC target");
1715 code
= 0x3f; /* EBCDIC invalid character */
1717 /* True extended characters are OK. */
1719 && !(code
& 0x80000000)
1720 && !(code
>= 0xD800 && code
<= 0xDFFF))
1722 /* The standard permits $, @ and ` to be specified as UCNs. We use
1723 hex escapes so that this also works with EBCDIC hosts. */
1724 else if (code
== 0x24 || code
== 0x40 || code
== 0x60)
1726 /* Don't give another error if one occurred above. */
1727 else if (length
== 0)
1728 cpp_error (pfile
, DL_ERROR
, "universal-character-name out of range");
1736 /* Returns the value of an escape sequence, truncated to the correct
1737 target precision. PSTR points to the input pointer, which is just
1738 after the backslash. LIMIT is how much text we have. WIDE is true
1739 if the escape sequence is part of a wide character constant or
1740 string literal. Handles all relevant diagnostics. */
1742 cpp_parse_escape (pfile
, pstr
, limit
, wide
)
1744 const unsigned char **pstr
;
1745 const unsigned char *limit
;
1749 const unsigned char *str
= *pstr
;
1754 width
= CPP_OPTION (pfile
, wchar_precision
);
1756 width
= CPP_OPTION (pfile
, char_precision
);
1757 if (width
< BITS_PER_CPPCHAR_T
)
1758 mask
= ((cppchar_t
) 1 << width
) - 1;
1765 case '\\': case '\'': case '"': case '?': break;
1766 case 'b': c
= TARGET_BS
; break;
1767 case 'f': c
= TARGET_FF
; break;
1768 case 'n': c
= TARGET_NEWLINE
; break;
1769 case 'r': c
= TARGET_CR
; break;
1770 case 't': c
= TARGET_TAB
; break;
1771 case 'v': c
= TARGET_VT
; break;
1773 case '(': case '{': case '[': case '%':
1774 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1775 '\%' is used to prevent SCCS from getting confused. */
1776 unknown
= CPP_PEDANTIC (pfile
);
1780 if (CPP_WTRADITIONAL (pfile
))
1781 cpp_error (pfile
, DL_WARNING
,
1782 "the meaning of '\\a' is different in traditional C");
1787 if (CPP_PEDANTIC (pfile
))
1788 cpp_error (pfile
, DL_PEDWARN
,
1789 "non-ISO-standard escape sequence, '\\%c'", (int) c
);
1794 unknown
= maybe_read_ucs (pfile
, &str
, limit
, &c
);
1798 if (CPP_WTRADITIONAL (pfile
))
1799 cpp_error (pfile
, DL_WARNING
,
1800 "the meaning of '\\x' is different in traditional C");
1803 cppchar_t i
= 0, overflow
= 0;
1804 int digits_found
= 0;
1812 overflow
|= i
^ (i
<< 4 >> 4);
1813 i
= (i
<< 4) + hex_digit_value (c
);
1818 cpp_error (pfile
, DL_ERROR
,
1819 "\\x used with no following hex digits");
1821 if (overflow
| (i
!= (i
& mask
)))
1823 cpp_error (pfile
, DL_PEDWARN
,
1824 "hex escape sequence out of range");
1831 case '0': case '1': case '2': case '3':
1832 case '4': case '5': case '6': case '7':
1835 cppchar_t i
= c
- '0';
1837 while (str
< limit
&& ++count
< 3)
1840 if (c
< '0' || c
> '7')
1843 i
= (i
<< 3) + c
- '0';
1846 if (i
!= (i
& mask
))
1848 cpp_error (pfile
, DL_PEDWARN
,
1849 "octal escape sequence out of range");
1864 cpp_error (pfile
, DL_PEDWARN
,
1865 "unknown escape sequence '\\%c'", (int) c
);
1867 cpp_error (pfile
, DL_PEDWARN
,
1868 "unknown escape sequence: '\\%03o'", (int) c
);
1873 cpp_error (pfile
, DL_PEDWARN
, "escape sequence out of range for its type");
1881 /* Interpret a (possibly wide) character constant in TOKEN.
1882 WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN
1883 points to a variable that is filled in with the number of
1884 characters seen, and UNSIGNEDP to a variable that indicates whether
1885 the result has signed type. */
1887 cpp_interpret_charconst (pfile
, token
, pchars_seen
, unsignedp
)
1889 const cpp_token
*token
;
1890 unsigned int *pchars_seen
;
1893 const unsigned char *str
= token
->val
.str
.text
;
1894 const unsigned char *limit
= str
+ token
->val
.str
.len
;
1895 unsigned int chars_seen
= 0;
1896 size_t width
, max_chars
;
1897 cppchar_t c
, mask
, result
= 0;
1900 #ifdef MULTIBYTE_CHARS
1901 (void) local_mbtowc (NULL
, NULL
, 0);
1904 /* Width in bits. */
1905 if (token
->type
== CPP_CHAR
)
1907 width
= CPP_OPTION (pfile
, char_precision
);
1908 max_chars
= CPP_OPTION (pfile
, int_precision
) / width
;
1909 unsigned_p
= CPP_OPTION (pfile
, unsigned_char
);
1913 width
= CPP_OPTION (pfile
, wchar_precision
);
1915 unsigned_p
= CPP_OPTION (pfile
, unsigned_wchar
);
1918 if (width
< BITS_PER_CPPCHAR_T
)
1919 mask
= ((cppchar_t
) 1 << width
) - 1;
1925 #ifdef MULTIBYTE_CHARS
1929 char_len
= local_mbtowc (&wc
, str
, limit
- str
);
1932 cpp_error (pfile
, DL_WARNING
,
1933 "ignoring invalid multibyte character");
1946 c
= cpp_parse_escape (pfile
, &str
, limit
, token
->type
== CPP_WCHAR
);
1948 #ifdef MAP_CHARACTER
1950 c
= MAP_CHARACTER (c
);
1955 /* Truncate the character, scale the result and merge the two. */
1957 if (width
< BITS_PER_CPPCHAR_T
)
1958 result
= (result
<< width
) | c
;
1963 if (chars_seen
== 0)
1964 cpp_error (pfile
, DL_ERROR
, "empty character constant");
1965 else if (chars_seen
> 1)
1967 /* Multichar charconsts are of type int and therefore signed. */
1970 if (chars_seen
> max_chars
)
1972 chars_seen
= max_chars
;
1973 cpp_error (pfile
, DL_WARNING
,
1974 "character constant too long for its type");
1976 else if (CPP_OPTION (pfile
, warn_multichar
))
1977 cpp_error (pfile
, DL_WARNING
, "multi-character character constant");
1980 /* Sign-extend or truncate the constant to cppchar_t. The value is
1981 in WIDTH bits, but for multi-char charconsts it's value is the
1982 full target type's width. */
1985 if (width
< BITS_PER_CPPCHAR_T
)
1987 mask
= ((cppchar_t
) 1 << width
) - 1;
1988 if (unsigned_p
|| !(result
& (1 << (width
- 1))))
1994 *pchars_seen
= chars_seen
;
1995 *unsignedp
= unsigned_p
;
1999 /* Memory buffers. Changing these three constants can have a dramatic
2000 effect on performance. The values here are reasonable defaults,
2001 but might be tuned. If you adjust them, be sure to test across a
2002 range of uses of cpplib, including heavy nested function-like macro
2003 expansion. Also check the change in peak memory usage (NJAMD is a
2004 good tool for this). */
2005 #define MIN_BUFF_SIZE 8000
2006 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
2007 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2008 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2010 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2011 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2014 /* Create a new allocation buffer. Place the control block at the end
2015 of the buffer, so that buffer overflows will cause immediate chaos. */
2021 unsigned char *base
;
2023 if (len
< MIN_BUFF_SIZE
)
2024 len
= MIN_BUFF_SIZE
;
2025 len
= CPP_ALIGN (len
);
2027 base
= xmalloc (len
+ sizeof (_cpp_buff
));
2028 result
= (_cpp_buff
*) (base
+ len
);
2029 result
->base
= base
;
2031 result
->limit
= base
+ len
;
2032 result
->next
= NULL
;
2036 /* Place a chain of unwanted allocation buffers on the free list. */
2038 _cpp_release_buff (pfile
, buff
)
2042 _cpp_buff
*end
= buff
;
2046 end
->next
= pfile
->free_buffs
;
2047 pfile
->free_buffs
= buff
;
2050 /* Return a free buffer of size at least MIN_SIZE. */
2052 _cpp_get_buff (pfile
, min_size
)
2056 _cpp_buff
*result
, **p
;
2058 for (p
= &pfile
->free_buffs
;; p
= &(*p
)->next
)
2063 return new_buff (min_size
);
2065 size
= result
->limit
- result
->base
;
2066 /* Return a buffer that's big enough, but don't waste one that's
2068 if (size
>= min_size
&& size
<= BUFF_SIZE_UPPER_BOUND (min_size
))
2073 result
->next
= NULL
;
2074 result
->cur
= result
->base
;
2078 /* Creates a new buffer with enough space to hold the uncommitted
2079 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
2080 the excess bytes to the new buffer. Chains the new buffer after
2081 BUFF, and returns the new buffer. */
2083 _cpp_append_extend_buff (pfile
, buff
, min_extra
)
2088 size_t size
= EXTENDED_BUFF_SIZE (buff
, min_extra
);
2089 _cpp_buff
*new_buff
= _cpp_get_buff (pfile
, size
);
2091 buff
->next
= new_buff
;
2092 memcpy (new_buff
->base
, buff
->cur
, BUFF_ROOM (buff
));
2096 /* Creates a new buffer with enough space to hold the uncommitted
2097 remaining bytes of the buffer pointed to by BUFF, and at least
2098 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
2099 Chains the new buffer before the buffer pointed to by BUFF, and
2100 updates the pointer to point to the new buffer. */
2102 _cpp_extend_buff (pfile
, pbuff
, min_extra
)
2107 _cpp_buff
*new_buff
, *old_buff
= *pbuff
;
2108 size_t size
= EXTENDED_BUFF_SIZE (old_buff
, min_extra
);
2110 new_buff
= _cpp_get_buff (pfile
, size
);
2111 memcpy (new_buff
->base
, old_buff
->cur
, BUFF_ROOM (old_buff
));
2112 new_buff
->next
= old_buff
;
2116 /* Free a chain of buffers starting at BUFF. */
2118 _cpp_free_buff (buff
)
2123 for (; buff
; buff
= next
)
2130 /* Allocate permanent, unaligned storage of length LEN. */
2132 _cpp_unaligned_alloc (pfile
, len
)
2136 _cpp_buff
*buff
= pfile
->u_buff
;
2137 unsigned char *result
= buff
->cur
;
2139 if (len
> (size_t) (buff
->limit
- result
))
2141 buff
= _cpp_get_buff (pfile
, len
);
2142 buff
->next
= pfile
->u_buff
;
2143 pfile
->u_buff
= buff
;
2147 buff
->cur
= result
+ len
;
2151 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2152 That buffer is used for growing allocations when saving macro
2153 replacement lists in a #define, and when parsing an answer to an
2154 assertion in #assert, #unassert or #if (and therefore possibly
2155 whilst expanding macros). It therefore must not be used by any
2156 code that they might call: specifically the lexer and the guts of
2159 All existing other uses clearly fit this restriction: storing
2160 registered pragmas during initialization. */
2162 _cpp_aligned_alloc (pfile
, len
)
2166 _cpp_buff
*buff
= pfile
->a_buff
;
2167 unsigned char *result
= buff
->cur
;
2169 if (len
> (size_t) (buff
->limit
- result
))
2171 buff
= _cpp_get_buff (pfile
, len
);
2172 buff
->next
= pfile
->a_buff
;
2173 pfile
->a_buff
= buff
;
2177 buff
->cur
= result
+ len
;