1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
25 #include "coretypes.h"
30 #ifdef MULTIBYTE_CHARS
35 /* Tokens with SPELL_STRING store their spelling in the token list,
36 and it's length in the token->val.name.len. */
49 enum spell_type category
;
50 const unsigned char *name
;
53 static const unsigned char *const digraph_spellings
[] =
54 { U
"%:", U
"%:%:", U
"<:", U
":>", U
"<%", U
"%>" };
56 #define OP(e, s) { SPELL_OPERATOR, U s },
57 #define TK(e, s) { s, U STRINGX (e) },
58 static const struct token_spelling token_spellings
[N_TTYPES
] = { TTYPE_TABLE
};
62 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
63 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
64 #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
66 static void handle_newline
PARAMS ((cpp_reader
*));
67 static cppchar_t skip_escaped_newlines
PARAMS ((cpp_reader
*));
68 static cppchar_t get_effective_char
PARAMS ((cpp_reader
*));
70 static int skip_block_comment
PARAMS ((cpp_reader
*));
71 static int skip_line_comment
PARAMS ((cpp_reader
*));
72 static void adjust_column
PARAMS ((cpp_reader
*));
73 static int skip_whitespace
PARAMS ((cpp_reader
*, cppchar_t
));
74 static cpp_hashnode
*parse_identifier
PARAMS ((cpp_reader
*));
75 static uchar
*parse_slow
PARAMS ((cpp_reader
*, const uchar
*, int,
77 static void parse_number
PARAMS ((cpp_reader
*, cpp_string
*, int));
78 static int unescaped_terminator_p
PARAMS ((cpp_reader
*, const uchar
*));
79 static void parse_string
PARAMS ((cpp_reader
*, cpp_token
*, cppchar_t
));
80 static bool trigraph_p
PARAMS ((cpp_reader
*));
81 static void save_comment
PARAMS ((cpp_reader
*, cpp_token
*, const uchar
*,
83 static bool continue_after_nul
PARAMS ((cpp_reader
*));
84 static int name_p
PARAMS ((cpp_reader
*, const cpp_string
*));
85 static int maybe_read_ucs
PARAMS ((cpp_reader
*, const unsigned char **,
86 const unsigned char *, cppchar_t
*));
87 static tokenrun
*next_tokenrun
PARAMS ((tokenrun
*));
89 static unsigned int hex_digit_value
PARAMS ((unsigned int));
90 static _cpp_buff
*new_buff
PARAMS ((size_t));
92 /* Change to the native locale for multibyte conversions. */
96 #ifdef MULTIBYTE_CHARS
97 setlocale (LC_CTYPE
, "");
98 GET_ENVIRONMENT (literal_codeset
, "LANG");
104 Compares, the token TOKEN to the NUL-terminated string STRING.
105 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
107 cpp_ideq (token
, string
)
108 const cpp_token
*token
;
111 if (token
->type
!= CPP_NAME
)
114 return !ustrcmp (NODE_NAME (token
->val
.node
), (const uchar
*) string
);
117 /* Call when meeting a newline, assumed to be in buffer->cur[-1].
118 Returns with buffer->cur pointing to the character immediately
119 following the newline (combination). */
121 handle_newline (pfile
)
124 cpp_buffer
*buffer
= pfile
->buffer
;
126 /* Handle CR-LF and LF-CR. Most other implementations (e.g. java)
127 only accept CR-LF; maybe we should fall back to that behavior? */
128 if (buffer
->cur
[-1] + buffer
->cur
[0] == '\r' + '\n')
131 buffer
->line_base
= buffer
->cur
;
132 buffer
->col_adjust
= 0;
136 /* Subroutine of skip_escaped_newlines; called when a 3-character
137 sequence beginning with "??" is encountered. buffer->cur points to
140 Warn if necessary, and returns true if the sequence forms a
141 trigraph and the trigraph should be honored. */
146 cpp_buffer
*buffer
= pfile
->buffer
;
147 cppchar_t from_char
= buffer
->cur
[1];
150 if (!_cpp_trigraph_map
[from_char
])
153 accept
= CPP_OPTION (pfile
, trigraphs
);
155 /* Don't warn about trigraphs in comments. */
156 if (CPP_OPTION (pfile
, warn_trigraphs
) && !pfile
->state
.lexing_comment
)
159 cpp_error_with_line (pfile
, DL_WARNING
,
160 pfile
->line
, CPP_BUF_COL (buffer
) - 1,
161 "trigraph ??%c converted to %c",
163 (int) _cpp_trigraph_map
[from_char
]);
164 else if (buffer
->cur
!= buffer
->last_Wtrigraphs
)
166 buffer
->last_Wtrigraphs
= buffer
->cur
;
167 cpp_error_with_line (pfile
, DL_WARNING
,
168 pfile
->line
, CPP_BUF_COL (buffer
) - 1,
169 "trigraph ??%c ignored", (int) from_char
);
176 /* Skips any escaped newlines introduced by '?' or a '\\', assumed to
177 lie in buffer->cur[-1]. Returns the next byte, which will be in
178 buffer->cur[-1]. This routine performs preprocessing stages 1 and
179 2 of the ISO C standard. */
181 skip_escaped_newlines (pfile
)
184 cpp_buffer
*buffer
= pfile
->buffer
;
185 cppchar_t next
= buffer
->cur
[-1];
187 /* Only do this if we apply stages 1 and 2. */
188 if (!buffer
->from_stage3
)
190 const unsigned char *saved_cur
;
197 if (buffer
->cur
[0] != '?' || !trigraph_p (pfile
))
200 /* Translate the trigraph. */
201 next
= _cpp_trigraph_map
[buffer
->cur
[1]];
207 if (buffer
->cur
== buffer
->rlimit
)
210 /* We have a backslash, and room for at least one more
211 character. Skip horizontal whitespace. */
212 saved_cur
= buffer
->cur
;
214 next1
= *buffer
->cur
++;
215 while (is_nvspace (next1
) && buffer
->cur
< buffer
->rlimit
);
217 if (!is_vspace (next1
))
219 buffer
->cur
= saved_cur
;
223 if (saved_cur
!= buffer
->cur
- 1
224 && !pfile
->state
.lexing_comment
)
225 cpp_error (pfile
, DL_WARNING
,
226 "backslash and newline separated by space");
228 handle_newline (pfile
);
229 buffer
->backup_to
= buffer
->cur
;
230 if (buffer
->cur
== buffer
->rlimit
)
232 cpp_error (pfile
, DL_PEDWARN
,
233 "backslash-newline at end of file");
237 next
= *buffer
->cur
++;
239 while (next
== '\\' || next
== '?');
245 /* Obtain the next character, after trigraph conversion and skipping
246 an arbitrarily long string of escaped newlines. The common case of
247 no trigraphs or escaped newlines falls through quickly. On return,
248 buffer->backup_to points to where to return to if the character is
249 not to be processed. */
251 get_effective_char (pfile
)
255 cpp_buffer
*buffer
= pfile
->buffer
;
257 buffer
->backup_to
= buffer
->cur
;
258 next
= *buffer
->cur
++;
259 if (__builtin_expect (next
== '?' || next
== '\\', 0))
260 next
= skip_escaped_newlines (pfile
);
265 /* Skip a C-style block comment. We find the end of the comment by
266 seeing if an asterisk is before every '/' we encounter. Returns
267 nonzero if comment terminated by EOF, zero otherwise. */
269 skip_block_comment (pfile
)
272 cpp_buffer
*buffer
= pfile
->buffer
;
273 cppchar_t c
= EOF
, prevc
= EOF
;
275 pfile
->state
.lexing_comment
= 1;
276 while (buffer
->cur
!= buffer
->rlimit
)
278 prevc
= c
, c
= *buffer
->cur
++;
280 /* FIXME: For speed, create a new character class of characters
281 of interest inside block comments. */
282 if (c
== '?' || c
== '\\')
283 c
= skip_escaped_newlines (pfile
);
285 /* People like decorating comments with '*', so check for '/'
286 instead for efficiency. */
292 /* Warn about potential nested comments, but not if the '/'
293 comes immediately before the true comment delimiter.
294 Don't bother to get it right across escaped newlines. */
295 if (CPP_OPTION (pfile
, warn_comments
)
296 && buffer
->cur
[0] == '*' && buffer
->cur
[1] != '/')
297 cpp_error_with_line (pfile
, DL_WARNING
,
298 pfile
->line
, CPP_BUF_COL (buffer
),
299 "\"/*\" within comment");
301 else if (is_vspace (c
))
302 handle_newline (pfile
);
304 adjust_column (pfile
);
307 pfile
->state
.lexing_comment
= 0;
308 return c
!= '/' || prevc
!= '*';
311 /* Skip a C++ line comment, leaving buffer->cur pointing to the
312 terminating newline. Handles escaped newlines. Returns nonzero
313 if a multiline comment. */
315 skip_line_comment (pfile
)
318 cpp_buffer
*buffer
= pfile
->buffer
;
319 unsigned int orig_line
= pfile
->line
;
321 #ifdef MULTIBYTE_CHARS
326 pfile
->state
.lexing_comment
= 1;
327 #ifdef MULTIBYTE_CHARS
328 /* Reset multibyte conversion state. */
329 (void) local_mbtowc (NULL
, NULL
, 0);
333 if (buffer
->cur
== buffer
->rlimit
)
336 #ifdef MULTIBYTE_CHARS
337 char_len
= local_mbtowc (&wc
, (const char *) buffer
->cur
,
338 buffer
->rlimit
- buffer
->cur
);
341 cpp_error (pfile
, DL_WARNING
,
342 "ignoring invalid multibyte character");
348 buffer
->cur
+= char_len
;
354 if (c
== '?' || c
== '\\')
355 c
= skip_escaped_newlines (pfile
);
357 while (!is_vspace (c
));
359 /* Step back over the newline, except at EOF. */
363 pfile
->state
.lexing_comment
= 0;
364 return orig_line
!= pfile
->line
;
367 /* pfile->buffer->cur is one beyond the \t character. Update
368 col_adjust so we track the column correctly. */
370 adjust_column (pfile
)
373 cpp_buffer
*buffer
= pfile
->buffer
;
374 unsigned int col
= CPP_BUF_COL (buffer
) - 1; /* Zero-based column. */
376 /* Round it up to multiple of the tabstop, but subtract 1 since the
377 tab itself occupies a character position. */
378 buffer
->col_adjust
+= (CPP_OPTION (pfile
, tabstop
)
379 - col
% CPP_OPTION (pfile
, tabstop
)) - 1;
382 /* Skips whitespace, saving the next non-whitespace character.
383 Adjusts pfile->col_adjust to account for tabs. Without this,
384 tokens might be assigned an incorrect column. */
386 skip_whitespace (pfile
, c
)
390 cpp_buffer
*buffer
= pfile
->buffer
;
391 unsigned int warned
= 0;
395 /* Horizontal space always OK. */
399 adjust_column (pfile
);
400 /* Just \f \v or \0 left. */
403 if (buffer
->cur
- 1 == buffer
->rlimit
)
407 cpp_error (pfile
, DL_WARNING
, "null character(s) ignored");
411 else if (pfile
->state
.in_directive
&& CPP_PEDANTIC (pfile
))
412 cpp_error_with_line (pfile
, DL_PEDWARN
, pfile
->line
,
413 CPP_BUF_COL (buffer
),
414 "%s in preprocessing directive",
415 c
== '\f' ? "form feed" : "vertical tab");
419 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
420 while (is_nvspace (c
));
426 /* See if the characters of a number token are valid in a name (no
429 name_p (pfile
, string
)
431 const cpp_string
*string
;
435 for (i
= 0; i
< string
->len
; i
++)
436 if (!is_idchar (string
->text
[i
]))
442 /* Parse an identifier, skipping embedded backslash-newlines. This is
443 a critical inner loop. The common case is an identifier which has
444 not been split by backslash-newline, does not contain a dollar
445 sign, and has already been scanned (roughly 10:1 ratio of
446 seen:unseen identifiers in normal code; the distribution is
447 Poisson-like). Second most common case is a new identifier, not
448 split and no dollar sign. The other possibilities are rare and
449 have been relegated to parse_slow. */
450 static cpp_hashnode
*
451 parse_identifier (pfile
)
454 cpp_hashnode
*result
;
455 const uchar
*cur
, *base
;
457 /* Fast-path loop. Skim over a normal identifier.
458 N.B. ISIDNUM does not include $. */
459 cur
= pfile
->buffer
->cur
;
460 while (ISIDNUM (*cur
))
463 /* Check for slow-path cases. */
464 if (*cur
== '?' || *cur
== '\\' || *cur
== '$')
468 base
= parse_slow (pfile
, cur
, 0, &len
);
469 result
= (cpp_hashnode
*)
470 ht_lookup (pfile
->hash_table
, base
, len
, HT_ALLOCED
);
474 base
= pfile
->buffer
->cur
- 1;
475 pfile
->buffer
->cur
= cur
;
476 result
= (cpp_hashnode
*)
477 ht_lookup (pfile
->hash_table
, base
, cur
- base
, HT_ALLOC
);
480 /* Rarely, identifiers require diagnostics when lexed.
481 XXX Has to be forced out of the fast path. */
482 if (__builtin_expect ((result
->flags
& NODE_DIAGNOSTIC
)
483 && !pfile
->state
.skipping
, 0))
485 /* It is allowed to poison the same identifier twice. */
486 if ((result
->flags
& NODE_POISONED
) && !pfile
->state
.poisoned_ok
)
487 cpp_error (pfile
, DL_ERROR
, "attempt to use poisoned \"%s\"",
490 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
491 replacement list of a variadic macro. */
492 if (result
== pfile
->spec_nodes
.n__VA_ARGS__
493 && !pfile
->state
.va_args_ok
)
494 cpp_error (pfile
, DL_PEDWARN
,
495 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
501 /* Slow path. This handles numbers and identifiers which have been
502 split, or contain dollar signs. The part of the token from
503 PFILE->buffer->cur-1 to CUR has already been scanned. NUMBER_P is
504 1 if it's a number, and 2 if it has a leading period. Returns a
505 pointer to the token's NUL-terminated spelling in permanent
506 storage, and sets PLEN to its length. */
508 parse_slow (pfile
, cur
, number_p
, plen
)
514 cpp_buffer
*buffer
= pfile
->buffer
;
515 const uchar
*base
= buffer
->cur
- 1;
516 struct obstack
*stack
= &pfile
->hash_table
->stack
;
517 unsigned int c
, prevc
, saw_dollar
= 0;
519 /* Place any leading period. */
521 obstack_1grow (stack
, '.');
523 /* Copy the part of the token which is known to be okay. */
524 obstack_grow (stack
, base
, cur
- base
);
526 /* Now process the part which isn't. We are looking at one of
527 '$', '\\', or '?' on entry to this loop. */
533 /* Potential escaped newline? */
534 buffer
->backup_to
= buffer
->cur
- 1;
535 if (c
== '?' || c
== '\\')
536 c
= skip_escaped_newlines (pfile
);
542 if (c
!= '.' && !VALID_SIGN (c
, prevc
))
546 /* Handle normal identifier characters in this loop. */
550 obstack_1grow (stack
, c
);
557 while (is_idchar (c
));
560 /* Step back over the unwanted char. */
563 /* $ is not an identifier character in the standard, but is commonly
564 accepted as an extension. Don't warn about it in skipped
565 conditional blocks. */
566 if (saw_dollar
&& CPP_PEDANTIC (pfile
) && ! pfile
->state
.skipping
)
567 cpp_error (pfile
, DL_PEDWARN
, "'$' character(s) in identifier or number");
569 /* Identifiers and numbers are null-terminated. */
570 *plen
= obstack_object_size (stack
);
571 obstack_1grow (stack
, '\0');
572 return obstack_finish (stack
);
575 /* Parse a number, beginning with character C, skipping embedded
576 backslash-newlines. LEADING_PERIOD is nonzero if there was a "."
577 before C. Place the result in NUMBER. */
579 parse_number (pfile
, number
, leading_period
)
586 /* Fast-path loop. Skim over a normal number.
587 N.B. ISIDNUM does not include $. */
588 cur
= pfile
->buffer
->cur
;
589 while (ISIDNUM (*cur
) || *cur
== '.' || VALID_SIGN (*cur
, cur
[-1]))
592 /* Check for slow-path cases. */
593 if (*cur
== '?' || *cur
== '\\' || *cur
== '$')
594 number
->text
= parse_slow (pfile
, cur
, 1 + leading_period
, &number
->len
);
597 const uchar
*base
= pfile
->buffer
->cur
- 1;
600 number
->len
= cur
- base
+ leading_period
;
601 dest
= _cpp_unaligned_alloc (pfile
, number
->len
+ 1);
602 dest
[number
->len
] = '\0';
607 memcpy (dest
, base
, cur
- base
);
608 pfile
->buffer
->cur
= cur
;
612 /* Subroutine of parse_string. */
614 unescaped_terminator_p (pfile
, dest
)
616 const unsigned char *dest
;
618 const unsigned char *start
, *temp
;
620 /* In #include-style directives, terminators are not escapable. */
621 if (pfile
->state
.angled_headers
)
624 start
= BUFF_FRONT (pfile
->u_buff
);
626 /* An odd number of consecutive backslashes represents an escaped
628 for (temp
= dest
; temp
> start
&& temp
[-1] == '\\'; temp
--)
631 return ((dest
- temp
) & 1) == 0;
634 /* Parses a string, character constant, or angle-bracketed header file
635 name. Handles embedded trigraphs and escaped newlines. The stored
636 string is guaranteed NUL-terminated, but it is not guaranteed that
637 this is the first NUL since embedded NULs are preserved.
639 When this function returns, buffer->cur points to the next
640 character to be processed. */
642 parse_string (pfile
, token
, terminator
)
645 cppchar_t terminator
;
647 cpp_buffer
*buffer
= pfile
->buffer
;
648 unsigned char *dest
, *limit
;
650 bool warned_nulls
= false;
651 #ifdef MULTIBYTE_CHARS
656 dest
= BUFF_FRONT (pfile
->u_buff
);
657 limit
= BUFF_LIMIT (pfile
->u_buff
);
659 #ifdef MULTIBYTE_CHARS
660 /* Reset multibyte conversion state. */
661 (void) local_mbtowc (NULL
, NULL
, 0);
665 /* We need room for another char, possibly the terminating NUL. */
666 if ((size_t) (limit
- dest
) < 1)
668 size_t len_so_far
= dest
- BUFF_FRONT (pfile
->u_buff
);
669 _cpp_extend_buff (pfile
, &pfile
->u_buff
, 2);
670 dest
= BUFF_FRONT (pfile
->u_buff
) + len_so_far
;
671 limit
= BUFF_LIMIT (pfile
->u_buff
);
674 #ifdef MULTIBYTE_CHARS
675 char_len
= local_mbtowc (&wc
, (const char *) buffer
->cur
,
676 buffer
->rlimit
- buffer
->cur
);
679 cpp_error (pfile
, DL_WARNING
,
680 "ignoring invalid multibyte character");
686 buffer
->cur
+= char_len
;
693 /* Handle trigraphs, escaped newlines etc. */
694 if (c
== '?' || c
== '\\')
695 c
= skip_escaped_newlines (pfile
);
699 if (unescaped_terminator_p (pfile
, dest
))
702 else if (is_vspace (c
))
704 /* No string literal may extend over multiple lines. In
705 assembly language, suppress the error except for <>
706 includes. This is a kludge around not knowing where
709 if (CPP_OPTION (pfile
, lang
) != CLK_ASM
|| terminator
== '>')
710 cpp_error (pfile
, DL_ERROR
, "missing terminating %c character",
717 if (buffer
->cur
- 1 == buffer
->rlimit
)
722 cpp_error (pfile
, DL_WARNING
,
723 "null character(s) preserved in literal");
726 #ifdef MULTIBYTE_CHARS
729 for ( ; char_len
> 0; --char_len
)
730 *dest
++ = (*buffer
->cur
- char_len
);
739 token
->val
.str
.text
= BUFF_FRONT (pfile
->u_buff
);
740 token
->val
.str
.len
= dest
- BUFF_FRONT (pfile
->u_buff
);
741 BUFF_FRONT (pfile
->u_buff
) = dest
+ 1;
744 /* The stored comment includes the comment start and any terminator. */
746 save_comment (pfile
, token
, from
, type
)
749 const unsigned char *from
;
752 unsigned char *buffer
;
753 unsigned int len
, clen
;
755 len
= pfile
->buffer
->cur
- from
+ 1; /* + 1 for the initial '/'. */
757 /* C++ comments probably (not definitely) have moved past a new
758 line, which we don't want to save in the comment. */
759 if (is_vspace (pfile
->buffer
->cur
[-1]))
762 /* If we are currently in a directive, then we need to store all
763 C++ comments as C comments internally, and so we need to
764 allocate a little extra space in that case.
766 Note that the only time we encounter a directive here is
767 when we are saving comments in a "#define". */
768 clen
= (pfile
->state
.in_directive
&& type
== '/') ? len
+ 2 : len
;
770 buffer
= _cpp_unaligned_alloc (pfile
, clen
);
772 token
->type
= CPP_COMMENT
;
773 token
->val
.str
.len
= clen
;
774 token
->val
.str
.text
= buffer
;
777 memcpy (buffer
+ 1, from
, len
- 1);
779 /* Finish conversion to a C comment, if necessary. */
780 if (pfile
->state
.in_directive
&& type
== '/')
783 buffer
[clen
- 2] = '*';
784 buffer
[clen
- 1] = '/';
788 /* Allocate COUNT tokens for RUN. */
790 _cpp_init_tokenrun (run
, count
)
794 run
->base
= xnewvec (cpp_token
, count
);
795 run
->limit
= run
->base
+ count
;
799 /* Returns the next tokenrun, or creates one if there is none. */
804 if (run
->next
== NULL
)
806 run
->next
= xnew (tokenrun
);
807 run
->next
->prev
= run
;
808 _cpp_init_tokenrun (run
->next
, 250);
814 /* Allocate a single token that is invalidated at the same time as the
815 rest of the tokens on the line. Has its line and col set to the
816 same as the last lexed token, so that diagnostics appear in the
819 _cpp_temp_token (pfile
)
822 cpp_token
*old
, *result
;
824 old
= pfile
->cur_token
- 1;
825 if (pfile
->cur_token
== pfile
->cur_run
->limit
)
827 pfile
->cur_run
= next_tokenrun (pfile
->cur_run
);
828 pfile
->cur_token
= pfile
->cur_run
->base
;
831 result
= pfile
->cur_token
++;
832 result
->line
= old
->line
;
833 result
->col
= old
->col
;
837 /* Lex a token into RESULT (external interface). Takes care of issues
838 like directive handling, token lookahead, multiple include
839 optimization and skipping. */
841 _cpp_lex_token (pfile
)
848 if (pfile
->cur_token
== pfile
->cur_run
->limit
)
850 pfile
->cur_run
= next_tokenrun (pfile
->cur_run
);
851 pfile
->cur_token
= pfile
->cur_run
->base
;
854 if (pfile
->lookaheads
)
857 result
= pfile
->cur_token
++;
860 result
= _cpp_lex_direct (pfile
);
862 if (result
->flags
& BOL
)
864 /* Is this a directive. If _cpp_handle_directive returns
865 false, it is an assembler #. */
866 if (result
->type
== CPP_HASH
867 /* 6.10.3 p 11: Directives in a list of macro arguments
868 gives undefined behavior. This implementation
869 handles the directive as normal. */
870 && pfile
->state
.parsing_args
!= 1
871 && _cpp_handle_directive (pfile
, result
->flags
& PREV_WHITE
))
873 if (pfile
->cb
.line_change
&& !pfile
->state
.skipping
)
874 (*pfile
->cb
.line_change
)(pfile
, result
, pfile
->state
.parsing_args
);
877 /* We don't skip tokens in directives. */
878 if (pfile
->state
.in_directive
)
881 /* Outside a directive, invalidate controlling macros. At file
882 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
883 get here and MI optimisation works. */
884 pfile
->mi_valid
= false;
886 if (!pfile
->state
.skipping
|| result
->type
== CPP_EOF
)
893 /* A NUL terminates the current buffer. For ISO preprocessing this is
894 EOF, but for traditional preprocessing it indicates we need a line
895 refill. Returns TRUE to continue preprocessing a new buffer, FALSE
896 to return a CPP_EOF to the caller. */
898 continue_after_nul (pfile
)
901 cpp_buffer
*buffer
= pfile
->buffer
;
904 buffer
->saved_flags
= BOL
;
905 if (CPP_OPTION (pfile
, traditional
))
907 if (pfile
->state
.in_directive
)
910 _cpp_remove_overlay (pfile
);
911 more
= _cpp_read_logical_line_trad (pfile
);
912 _cpp_overlay_buffer (pfile
, pfile
->out
.base
,
913 pfile
->out
.cur
- pfile
->out
.base
);
914 pfile
->line
= pfile
->out
.first_line
;
918 /* Stop parsing arguments with a CPP_EOF. When we finally come
919 back here, do the work of popping the buffer. */
920 if (!pfile
->state
.parsing_args
)
922 if (buffer
->cur
!= buffer
->line_base
)
924 /* Non-empty files should end in a newline. Don't warn
925 for command line and _Pragma buffers. */
926 if (!buffer
->from_stage3
)
927 cpp_error (pfile
, DL_PEDWARN
, "no newline at end of file");
928 handle_newline (pfile
);
931 /* Similarly, finish an in-progress directive with CPP_EOF
932 before popping the buffer. */
933 if (!pfile
->state
.in_directive
&& buffer
->prev
)
935 more
= !buffer
->return_at_eof
;
936 _cpp_pop_buffer (pfile
);
944 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
946 if (get_effective_char (pfile) == CHAR) \
947 result->type = THEN_TYPE; \
951 result->type = ELSE_TYPE; \
955 /* Lex a token into pfile->cur_token, which is also incremented, to
956 get diagnostics pointing to the correct location.
958 Does not handle issues such as token lookahead, multiple-include
959 optimisation, directives, skipping etc. This function is only
960 suitable for use by _cpp_lex_token, and in special cases like
961 lex_expansion_token which doesn't care for any of these issues.
963 When meeting a newline, returns CPP_EOF if parsing a directive,
964 otherwise returns to the start of the token buffer if permissible.
965 Returns the location of the lexed token. */
967 _cpp_lex_direct (pfile
)
972 const unsigned char *comment_start
;
973 cpp_token
*result
= pfile
->cur_token
++;
976 buffer
= pfile
->buffer
;
977 result
->flags
= buffer
->saved_flags
;
978 buffer
->saved_flags
= 0;
980 result
->line
= pfile
->line
;
984 result
->col
= CPP_BUF_COLUMN (buffer
, buffer
->cur
);
989 case ' ': case '\t': case '\f': case '\v': case '\0':
990 result
->flags
|= PREV_WHITE
;
991 if (skip_whitespace (pfile
, c
))
996 if (continue_after_nul (pfile
))
998 result
->type
= CPP_EOF
;
1001 case '\n': case '\r':
1002 handle_newline (pfile
);
1003 buffer
->saved_flags
= BOL
;
1004 if (! pfile
->state
.in_directive
)
1006 if (pfile
->state
.parsing_args
== 2)
1007 buffer
->saved_flags
|= PREV_WHITE
;
1008 if (!pfile
->keep_tokens
)
1010 pfile
->cur_run
= &pfile
->base_run
;
1011 result
= pfile
->base_run
.base
;
1012 pfile
->cur_token
= result
+ 1;
1016 result
->type
= CPP_EOF
;
1021 /* These could start an escaped newline, or '?' a trigraph. Let
1022 skip_escaped_newlines do all the work. */
1024 unsigned int line
= pfile
->line
;
1026 c
= skip_escaped_newlines (pfile
);
1027 if (line
!= pfile
->line
)
1030 /* We had at least one escaped newline of some sort.
1031 Update the token's line and column. */
1032 goto update_tokens_line
;
1036 /* We are either the original '?' or '\\', or a trigraph. */
1038 result
->type
= CPP_QUERY
;
1045 case '0': case '1': case '2': case '3': case '4':
1046 case '5': case '6': case '7': case '8': case '9':
1047 result
->type
= CPP_NUMBER
;
1048 parse_number (pfile
, &result
->val
.str
, 0);
1052 /* 'L' may introduce wide characters or strings. */
1054 const unsigned char *pos
= buffer
->cur
;
1056 c
= get_effective_char (pfile
);
1057 if (c
== '\'' || c
== '"')
1059 result
->type
= (c
== '"' ? CPP_WSTRING
: CPP_WCHAR
);
1060 parse_string (pfile
, result
, c
);
1069 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1070 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1071 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1072 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1074 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1075 case 'G': case 'H': case 'I': case 'J': case 'K':
1076 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1077 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1079 result
->type
= CPP_NAME
;
1080 result
->val
.node
= parse_identifier (pfile
);
1082 /* Convert named operators to their proper types. */
1083 if (result
->val
.node
->flags
& NODE_OPERATOR
)
1085 result
->flags
|= NAMED_OP
;
1086 result
->type
= result
->val
.node
->directive_index
;
1092 result
->type
= c
== '"' ? CPP_STRING
: CPP_CHAR
;
1093 parse_string (pfile
, result
, c
);
1097 /* A potential block or line comment. */
1098 comment_start
= buffer
->cur
;
1099 c
= get_effective_char (pfile
);
1103 if (skip_block_comment (pfile
))
1104 cpp_error (pfile
, DL_ERROR
, "unterminated comment");
1106 else if (c
== '/' && (CPP_OPTION (pfile
, cplusplus_comments
)
1107 || CPP_IN_SYSTEM_HEADER (pfile
)))
1109 /* Warn about comments only if pedantically GNUC89, and not
1110 in system headers. */
1111 if (CPP_OPTION (pfile
, lang
) == CLK_GNUC89
&& CPP_PEDANTIC (pfile
)
1112 && ! buffer
->warned_cplusplus_comments
)
1114 cpp_error (pfile
, DL_PEDWARN
,
1115 "C++ style comments are not allowed in ISO C90");
1116 cpp_error (pfile
, DL_PEDWARN
,
1117 "(this will be reported only once per input file)");
1118 buffer
->warned_cplusplus_comments
= 1;
1121 if (skip_line_comment (pfile
) && CPP_OPTION (pfile
, warn_comments
))
1122 cpp_error (pfile
, DL_WARNING
, "multi-line comment");
1126 result
->type
= CPP_DIV_EQ
;
1132 result
->type
= CPP_DIV
;
1136 if (!pfile
->state
.save_comments
)
1138 result
->flags
|= PREV_WHITE
;
1139 goto update_tokens_line
;
1142 /* Save the comment as a token in its own right. */
1143 save_comment (pfile
, result
, comment_start
, c
);
1147 if (pfile
->state
.angled_headers
)
1149 result
->type
= CPP_HEADER_NAME
;
1150 parse_string (pfile
, result
, '>');
1154 c
= get_effective_char (pfile
);
1156 result
->type
= CPP_LESS_EQ
;
1158 IF_NEXT_IS ('=', CPP_LSHIFT_EQ
, CPP_LSHIFT
);
1159 else if (c
== '?' && CPP_OPTION (pfile
, cplusplus
))
1160 IF_NEXT_IS ('=', CPP_MIN_EQ
, CPP_MIN
);
1161 else if (c
== ':' && CPP_OPTION (pfile
, digraphs
))
1163 result
->type
= CPP_OPEN_SQUARE
;
1164 result
->flags
|= DIGRAPH
;
1166 else if (c
== '%' && CPP_OPTION (pfile
, digraphs
))
1168 result
->type
= CPP_OPEN_BRACE
;
1169 result
->flags
|= DIGRAPH
;
1174 result
->type
= CPP_LESS
;
1179 c
= get_effective_char (pfile
);
1181 result
->type
= CPP_GREATER_EQ
;
1183 IF_NEXT_IS ('=', CPP_RSHIFT_EQ
, CPP_RSHIFT
);
1184 else if (c
== '?' && CPP_OPTION (pfile
, cplusplus
))
1185 IF_NEXT_IS ('=', CPP_MAX_EQ
, CPP_MAX
);
1189 result
->type
= CPP_GREATER
;
1194 c
= get_effective_char (pfile
);
1196 result
->type
= CPP_MOD_EQ
;
1197 else if (CPP_OPTION (pfile
, digraphs
) && c
== ':')
1199 result
->flags
|= DIGRAPH
;
1200 result
->type
= CPP_HASH
;
1201 if (get_effective_char (pfile
) == '%')
1203 const unsigned char *pos
= buffer
->cur
;
1205 if (get_effective_char (pfile
) == ':')
1206 result
->type
= CPP_PASTE
;
1208 buffer
->cur
= pos
- 1;
1213 else if (CPP_OPTION (pfile
, digraphs
) && c
== '>')
1215 result
->flags
|= DIGRAPH
;
1216 result
->type
= CPP_CLOSE_BRACE
;
1221 result
->type
= CPP_MOD
;
1226 result
->type
= CPP_DOT
;
1227 c
= get_effective_char (pfile
);
1230 const unsigned char *pos
= buffer
->cur
;
1232 if (get_effective_char (pfile
) == '.')
1233 result
->type
= CPP_ELLIPSIS
;
1235 buffer
->cur
= pos
- 1;
1237 /* All known character sets have 0...9 contiguous. */
1238 else if (ISDIGIT (c
))
1240 result
->type
= CPP_NUMBER
;
1241 parse_number (pfile
, &result
->val
.str
, 1);
1243 else if (c
== '*' && CPP_OPTION (pfile
, cplusplus
))
1244 result
->type
= CPP_DOT_STAR
;
1250 c
= get_effective_char (pfile
);
1252 result
->type
= CPP_PLUS_PLUS
;
1254 result
->type
= CPP_PLUS_EQ
;
1258 result
->type
= CPP_PLUS
;
1263 c
= get_effective_char (pfile
);
1266 result
->type
= CPP_DEREF
;
1267 if (CPP_OPTION (pfile
, cplusplus
))
1269 if (get_effective_char (pfile
) == '*')
1270 result
->type
= CPP_DEREF_STAR
;
1276 result
->type
= CPP_MINUS_MINUS
;
1278 result
->type
= CPP_MINUS_EQ
;
1282 result
->type
= CPP_MINUS
;
1287 c
= get_effective_char (pfile
);
1289 result
->type
= CPP_AND_AND
;
1291 result
->type
= CPP_AND_EQ
;
1295 result
->type
= CPP_AND
;
1300 c
= get_effective_char (pfile
);
1302 result
->type
= CPP_OR_OR
;
1304 result
->type
= CPP_OR_EQ
;
1308 result
->type
= CPP_OR
;
1313 c
= get_effective_char (pfile
);
1314 if (c
== ':' && CPP_OPTION (pfile
, cplusplus
))
1315 result
->type
= CPP_SCOPE
;
1316 else if (c
== '>' && CPP_OPTION (pfile
, digraphs
))
1318 result
->flags
|= DIGRAPH
;
1319 result
->type
= CPP_CLOSE_SQUARE
;
1324 result
->type
= CPP_COLON
;
1328 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ
, CPP_MULT
); break;
1329 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ
, CPP_EQ
); break;
1330 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ
, CPP_NOT
); break;
1331 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ
, CPP_XOR
); break;
1332 case '#': IF_NEXT_IS ('#', CPP_PASTE
, CPP_HASH
); break;
1334 case '~': result
->type
= CPP_COMPL
; break;
1335 case ',': result
->type
= CPP_COMMA
; break;
1336 case '(': result
->type
= CPP_OPEN_PAREN
; break;
1337 case ')': result
->type
= CPP_CLOSE_PAREN
; break;
1338 case '[': result
->type
= CPP_OPEN_SQUARE
; break;
1339 case ']': result
->type
= CPP_CLOSE_SQUARE
; break;
1340 case '{': result
->type
= CPP_OPEN_BRACE
; break;
1341 case '}': result
->type
= CPP_CLOSE_BRACE
; break;
1342 case ';': result
->type
= CPP_SEMICOLON
; break;
1344 /* @ is a punctuator in Objective-C. */
1345 case '@': result
->type
= CPP_ATSIGN
; break;
1348 if (CPP_OPTION (pfile
, dollars_in_ident
))
1350 /* Fall through... */
1354 result
->type
= CPP_OTHER
;
1362 /* An upper bound on the number of bytes needed to spell TOKEN,
1363 including preceding whitespace. */
1365 cpp_token_len (token
)
1366 const cpp_token
*token
;
1370 switch (TOKEN_SPELL (token
))
1372 default: len
= 0; break;
1374 case SPELL_STRING
: len
= token
->val
.str
.len
; break;
1375 case SPELL_IDENT
: len
= NODE_LEN (token
->val
.node
); break;
1377 /* 1 for whitespace, 4 for comment delimiters. */
1381 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1382 already contain the enough space to hold the token's spelling.
1383 Returns a pointer to the character after the last character
1386 cpp_spell_token (pfile
, token
, buffer
)
1387 cpp_reader
*pfile
; /* Would be nice to be rid of this... */
1388 const cpp_token
*token
;
1389 unsigned char *buffer
;
1391 switch (TOKEN_SPELL (token
))
1393 case SPELL_OPERATOR
:
1395 const unsigned char *spelling
;
1398 if (token
->flags
& DIGRAPH
)
1400 = digraph_spellings
[(int) token
->type
- (int) CPP_FIRST_DIGRAPH
];
1401 else if (token
->flags
& NAMED_OP
)
1404 spelling
= TOKEN_NAME (token
);
1406 while ((c
= *spelling
++) != '\0')
1412 *buffer
++ = token
->val
.c
;
1417 memcpy (buffer
, NODE_NAME (token
->val
.node
), NODE_LEN (token
->val
.node
));
1418 buffer
+= NODE_LEN (token
->val
.node
);
1422 memcpy (buffer
, token
->val
.str
.text
, token
->val
.str
.len
);
1423 buffer
+= token
->val
.str
.len
;
1428 int left
, right
, tag
;
1429 switch (token
->type
)
1431 case CPP_STRING
: left
= '"'; right
= '"'; tag
= '\0'; break;
1432 case CPP_WSTRING
: left
= '"'; right
= '"'; tag
= 'L'; break;
1433 case CPP_CHAR
: left
= '\''; right
= '\''; tag
= '\0'; break;
1434 case CPP_WCHAR
: left
= '\''; right
= '\''; tag
= 'L'; break;
1435 case CPP_HEADER_NAME
: left
= '<'; right
= '>'; tag
= '\0'; break;
1437 cpp_error (pfile
, DL_ICE
, "unknown string token %s\n",
1438 TOKEN_NAME (token
));
1441 if (tag
) *buffer
++ = tag
;
1443 memcpy (buffer
, token
->val
.str
.text
, token
->val
.str
.len
);
1444 buffer
+= token
->val
.str
.len
;
1450 cpp_error (pfile
, DL_ICE
, "unspellable token %s", TOKEN_NAME (token
));
1457 /* Returns TOKEN spelt as a null-terminated string. The string is
1458 freed when the reader is destroyed. Useful for diagnostics. */
1460 cpp_token_as_text (pfile
, token
)
1462 const cpp_token
*token
;
1464 unsigned int len
= cpp_token_len (token
);
1465 unsigned char *start
= _cpp_unaligned_alloc (pfile
, len
), *end
;
1467 end
= cpp_spell_token (pfile
, token
, start
);
1473 /* Used by C front ends, which really should move to using
1474 cpp_token_as_text. */
1476 cpp_type2name (type
)
1477 enum cpp_ttype type
;
1479 return (const char *) token_spellings
[type
].name
;
1482 /* Writes the spelling of token to FP, without any preceding space.
1483 Separated from cpp_spell_token for efficiency - to avoid stdio
1484 double-buffering. */
1486 cpp_output_token (token
, fp
)
1487 const cpp_token
*token
;
1490 switch (TOKEN_SPELL (token
))
1492 case SPELL_OPERATOR
:
1494 const unsigned char *spelling
;
1497 if (token
->flags
& DIGRAPH
)
1499 = digraph_spellings
[(int) token
->type
- (int) CPP_FIRST_DIGRAPH
];
1500 else if (token
->flags
& NAMED_OP
)
1503 spelling
= TOKEN_NAME (token
);
1508 while ((c
= *++spelling
) != '\0');
1513 putc (token
->val
.c
, fp
);
1518 fwrite (NODE_NAME (token
->val
.node
), 1, NODE_LEN (token
->val
.node
), fp
);
1522 fwrite (token
->val
.str
.text
, 1, token
->val
.str
.len
, fp
);
1527 int left
, right
, tag
;
1528 switch (token
->type
)
1530 case CPP_STRING
: left
= '"'; right
= '"'; tag
= '\0'; break;
1531 case CPP_WSTRING
: left
= '"'; right
= '"'; tag
= 'L'; break;
1532 case CPP_CHAR
: left
= '\''; right
= '\''; tag
= '\0'; break;
1533 case CPP_WCHAR
: left
= '\''; right
= '\''; tag
= 'L'; break;
1534 case CPP_HEADER_NAME
: left
= '<'; right
= '>'; tag
= '\0'; break;
1536 fprintf (stderr
, "impossible STRING token %s\n", TOKEN_NAME (token
));
1539 if (tag
) putc (tag
, fp
);
1541 fwrite (token
->val
.str
.text
, 1, token
->val
.str
.len
, fp
);
1547 /* An error, most probably. */
1552 /* Compare two tokens. */
1554 _cpp_equiv_tokens (a
, b
)
1555 const cpp_token
*a
, *b
;
1557 if (a
->type
== b
->type
&& a
->flags
== b
->flags
)
1558 switch (TOKEN_SPELL (a
))
1560 default: /* Keep compiler happy. */
1561 case SPELL_OPERATOR
:
1564 return a
->val
.c
== b
->val
.c
; /* Character. */
1566 return (a
->type
!= CPP_MACRO_ARG
|| a
->val
.arg_no
== b
->val
.arg_no
);
1568 return a
->val
.node
== b
->val
.node
;
1571 return (a
->val
.str
.len
== b
->val
.str
.len
1572 && !memcmp (a
->val
.str
.text
, b
->val
.str
.text
,
1579 /* Returns nonzero if a space should be inserted to avoid an
1580 accidental token paste for output. For simplicity, it is
1581 conservative, and occasionally advises a space where one is not
1582 needed, e.g. "." and ".2". */
1584 cpp_avoid_paste (pfile
, token1
, token2
)
1586 const cpp_token
*token1
, *token2
;
1588 enum cpp_ttype a
= token1
->type
, b
= token2
->type
;
1591 if (token1
->flags
& NAMED_OP
)
1593 if (token2
->flags
& NAMED_OP
)
1597 if (token2
->flags
& DIGRAPH
)
1598 c
= digraph_spellings
[(int) b
- (int) CPP_FIRST_DIGRAPH
][0];
1599 else if (token_spellings
[b
].category
== SPELL_OPERATOR
)
1600 c
= token_spellings
[b
].name
[0];
1602 /* Quickly get everything that can paste with an '='. */
1603 if ((int) a
<= (int) CPP_LAST_EQ
&& c
== '=')
1608 case CPP_GREATER
: return c
== '>' || c
== '?';
1609 case CPP_LESS
: return c
== '<' || c
== '?' || c
== '%' || c
== ':';
1610 case CPP_PLUS
: return c
== '+';
1611 case CPP_MINUS
: return c
== '-' || c
== '>';
1612 case CPP_DIV
: return c
== '/' || c
== '*'; /* Comments. */
1613 case CPP_MOD
: return c
== ':' || c
== '>';
1614 case CPP_AND
: return c
== '&';
1615 case CPP_OR
: return c
== '|';
1616 case CPP_COLON
: return c
== ':' || c
== '>';
1617 case CPP_DEREF
: return c
== '*';
1618 case CPP_DOT
: return c
== '.' || c
== '%' || b
== CPP_NUMBER
;
1619 case CPP_HASH
: return c
== '#' || c
== '%'; /* Digraph form. */
1620 case CPP_NAME
: return ((b
== CPP_NUMBER
1621 && name_p (pfile
, &token2
->val
.str
))
1623 || b
== CPP_CHAR
|| b
== CPP_STRING
); /* L */
1624 case CPP_NUMBER
: return (b
== CPP_NUMBER
|| b
== CPP_NAME
1625 || c
== '.' || c
== '+' || c
== '-');
1626 case CPP_OTHER
: return (CPP_OPTION (pfile
, objc
)
1627 && token1
->val
.c
== '@'
1628 && (b
== CPP_NAME
|| b
== CPP_STRING
));
1635 /* Output all the remaining tokens on the current line, and a newline
1636 character, to FP. Leading whitespace is removed. If there are
1637 macros, special token padding is not performed. */
1639 cpp_output_line (pfile
, fp
)
1643 const cpp_token
*token
;
1645 token
= cpp_get_token (pfile
);
1646 while (token
->type
!= CPP_EOF
)
1648 cpp_output_token (token
, fp
);
1649 token
= cpp_get_token (pfile
);
1650 if (token
->flags
& PREV_WHITE
)
1657 /* Returns the value of a hexadecimal digit. */
1663 return hex_value (c
);
1668 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1669 failure if cpplib is not parsing C++ or C99. Such failure is
1670 silent, and no variables are updated. Otherwise returns 0, and
1671 warns if -Wtraditional.
1673 [lex.charset]: The character designated by the universal character
1674 name \UNNNNNNNN is that character whose character short name in
1675 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1676 universal character name \uNNNN is that character whose character
1677 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1678 for a universal character name is less than 0x20 or in the range
1679 0x7F-0x9F (inclusive), or if the universal character name
1680 designates a character in the basic source character set, then the
1681 program is ill-formed.
1683 We assume that wchar_t is Unicode, so we don't need to do any
1684 mapping. Is this ever wrong?
1686 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1687 LIMIT is the end of the string or charconst. PSTR is updated to
1688 point after the UCS on return, and the UCS is written into PC. */
1691 maybe_read_ucs (pfile
, pstr
, limit
, pc
)
1693 const unsigned char **pstr
;
1694 const unsigned char *limit
;
1697 const unsigned char *p
= *pstr
;
1698 unsigned int code
= 0;
1699 unsigned int c
= *pc
, length
;
1701 /* Only attempt to interpret a UCS for C++ and C99. */
1702 if (! (CPP_OPTION (pfile
, cplusplus
) || CPP_OPTION (pfile
, c99
)))
1705 if (CPP_WTRADITIONAL (pfile
))
1706 cpp_error (pfile
, DL_WARNING
,
1707 "the meaning of '\\%c' is different in traditional C", c
);
1709 length
= (c
== 'u' ? 4: 8);
1711 if ((size_t) (limit
- p
) < length
)
1713 cpp_error (pfile
, DL_ERROR
, "incomplete universal-character-name");
1714 /* Skip to the end to avoid more diagnostics. */
1719 for (; length
; length
--, p
++)
1723 code
= (code
<< 4) + hex_digit_value (c
);
1726 cpp_error (pfile
, DL_ERROR
,
1727 "non-hex digit '%c' in universal-character-name", c
);
1728 /* We shouldn't skip in case there are multibyte chars. */
1734 #ifdef TARGET_EBCDIC
1735 cpp_error (pfile
, DL_ERROR
, "universal-character-name on EBCDIC target");
1736 code
= 0x3f; /* EBCDIC invalid character */
1738 /* True extended characters are OK. */
1740 && !(code
& 0x80000000)
1741 && !(code
>= 0xD800 && code
<= 0xDFFF))
1743 /* The standard permits $, @ and ` to be specified as UCNs. We use
1744 hex escapes so that this also works with EBCDIC hosts. */
1745 else if (code
== 0x24 || code
== 0x40 || code
== 0x60)
1747 /* Don't give another error if one occurred above. */
1748 else if (length
== 0)
1749 cpp_error (pfile
, DL_ERROR
, "universal-character-name out of range");
1757 /* Returns the value of an escape sequence, truncated to the correct
1758 target precision. PSTR points to the input pointer, which is just
1759 after the backslash. LIMIT is how much text we have. WIDE is true
1760 if the escape sequence is part of a wide character constant or
1761 string literal. Handles all relevant diagnostics. */
1763 cpp_parse_escape (pfile
, pstr
, limit
, wide
)
1765 const unsigned char **pstr
;
1766 const unsigned char *limit
;
1770 const unsigned char *str
= *pstr
;
1775 width
= CPP_OPTION (pfile
, wchar_precision
);
1777 width
= CPP_OPTION (pfile
, char_precision
);
1778 if (width
< BITS_PER_CPPCHAR_T
)
1779 mask
= ((cppchar_t
) 1 << width
) - 1;
1786 case '\\': case '\'': case '"': case '?': break;
1787 case 'b': c
= TARGET_BS
; break;
1788 case 'f': c
= TARGET_FF
; break;
1789 case 'n': c
= TARGET_NEWLINE
; break;
1790 case 'r': c
= TARGET_CR
; break;
1791 case 't': c
= TARGET_TAB
; break;
1792 case 'v': c
= TARGET_VT
; break;
1794 case '(': case '{': case '[': case '%':
1795 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1796 '\%' is used to prevent SCCS from getting confused. */
1797 unknown
= CPP_PEDANTIC (pfile
);
1801 if (CPP_WTRADITIONAL (pfile
))
1802 cpp_error (pfile
, DL_WARNING
,
1803 "the meaning of '\\a' is different in traditional C");
1808 if (CPP_PEDANTIC (pfile
))
1809 cpp_error (pfile
, DL_PEDWARN
,
1810 "non-ISO-standard escape sequence, '\\%c'", (int) c
);
1815 unknown
= maybe_read_ucs (pfile
, &str
, limit
, &c
);
1819 if (CPP_WTRADITIONAL (pfile
))
1820 cpp_error (pfile
, DL_WARNING
,
1821 "the meaning of '\\x' is different in traditional C");
1824 cppchar_t i
= 0, overflow
= 0;
1825 int digits_found
= 0;
1833 overflow
|= i
^ (i
<< 4 >> 4);
1834 i
= (i
<< 4) + hex_digit_value (c
);
1839 cpp_error (pfile
, DL_ERROR
,
1840 "\\x used with no following hex digits");
1842 if (overflow
| (i
!= (i
& mask
)))
1844 cpp_error (pfile
, DL_PEDWARN
,
1845 "hex escape sequence out of range");
1852 case '0': case '1': case '2': case '3':
1853 case '4': case '5': case '6': case '7':
1856 cppchar_t i
= c
- '0';
1858 while (str
< limit
&& ++count
< 3)
1861 if (c
< '0' || c
> '7')
1864 i
= (i
<< 3) + c
- '0';
1867 if (i
!= (i
& mask
))
1869 cpp_error (pfile
, DL_PEDWARN
,
1870 "octal escape sequence out of range");
1885 cpp_error (pfile
, DL_PEDWARN
,
1886 "unknown escape sequence '\\%c'", (int) c
);
1888 cpp_error (pfile
, DL_PEDWARN
,
1889 "unknown escape sequence: '\\%03o'", (int) c
);
1894 cpp_error (pfile
, DL_PEDWARN
, "escape sequence out of range for its type");
1902 /* Interpret a (possibly wide) character constant in TOKEN.
1903 WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN
1904 points to a variable that is filled in with the number of
1905 characters seen, and UNSIGNEDP to a variable that indicates whether
1906 the result has signed type. */
1908 cpp_interpret_charconst (pfile
, token
, pchars_seen
, unsignedp
)
1910 const cpp_token
*token
;
1911 unsigned int *pchars_seen
;
1914 const unsigned char *str
= token
->val
.str
.text
;
1915 const unsigned char *limit
= str
+ token
->val
.str
.len
;
1916 unsigned int chars_seen
= 0;
1917 size_t width
, max_chars
;
1918 cppchar_t c
, mask
, result
= 0;
1921 #ifdef MULTIBYTE_CHARS
1922 (void) local_mbtowc (NULL
, NULL
, 0);
1925 /* Width in bits. */
1926 if (token
->type
== CPP_CHAR
)
1928 width
= CPP_OPTION (pfile
, char_precision
);
1929 max_chars
= CPP_OPTION (pfile
, int_precision
) / width
;
1930 unsigned_p
= CPP_OPTION (pfile
, unsigned_char
);
1934 width
= CPP_OPTION (pfile
, wchar_precision
);
1936 unsigned_p
= CPP_OPTION (pfile
, unsigned_wchar
);
1939 if (width
< BITS_PER_CPPCHAR_T
)
1940 mask
= ((cppchar_t
) 1 << width
) - 1;
1946 #ifdef MULTIBYTE_CHARS
1950 char_len
= local_mbtowc (&wc
, (const char *)str
, limit
- str
);
1953 cpp_error (pfile
, DL_WARNING
,
1954 "ignoring invalid multibyte character");
1967 c
= cpp_parse_escape (pfile
, &str
, limit
, token
->type
== CPP_WCHAR
);
1969 #ifdef MAP_CHARACTER
1971 c
= MAP_CHARACTER (c
);
1976 /* Truncate the character, scale the result and merge the two. */
1978 if (width
< BITS_PER_CPPCHAR_T
)
1979 result
= (result
<< width
) | c
;
1984 if (chars_seen
== 0)
1985 cpp_error (pfile
, DL_ERROR
, "empty character constant");
1986 else if (chars_seen
> 1)
1988 /* Multichar charconsts are of type int and therefore signed. */
1991 if (chars_seen
> max_chars
)
1993 chars_seen
= max_chars
;
1994 cpp_error (pfile
, DL_WARNING
,
1995 "character constant too long for its type");
1997 else if (CPP_OPTION (pfile
, warn_multichar
))
1998 cpp_error (pfile
, DL_WARNING
, "multi-character character constant");
2001 /* Sign-extend or truncate the constant to cppchar_t. The value is
2002 in WIDTH bits, but for multi-char charconsts it's value is the
2003 full target type's width. */
2006 if (width
< BITS_PER_CPPCHAR_T
)
2008 mask
= ((cppchar_t
) 1 << width
) - 1;
2009 if (unsigned_p
|| !(result
& (1 << (width
- 1))))
2015 *pchars_seen
= chars_seen
;
2016 *unsignedp
= unsigned_p
;
2020 /* Memory buffers. Changing these three constants can have a dramatic
2021 effect on performance. The values here are reasonable defaults,
2022 but might be tuned. If you adjust them, be sure to test across a
2023 range of uses of cpplib, including heavy nested function-like macro
2024 expansion. Also check the change in peak memory usage (NJAMD is a
2025 good tool for this). */
2026 #define MIN_BUFF_SIZE 8000
2027 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
2028 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2029 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2031 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2032 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2035 /* Create a new allocation buffer. Place the control block at the end
2036 of the buffer, so that buffer overflows will cause immediate chaos. */
2042 unsigned char *base
;
2044 if (len
< MIN_BUFF_SIZE
)
2045 len
= MIN_BUFF_SIZE
;
2046 len
= CPP_ALIGN (len
);
2048 base
= xmalloc (len
+ sizeof (_cpp_buff
));
2049 result
= (_cpp_buff
*) (base
+ len
);
2050 result
->base
= base
;
2052 result
->limit
= base
+ len
;
2053 result
->next
= NULL
;
2057 /* Place a chain of unwanted allocation buffers on the free list. */
2059 _cpp_release_buff (pfile
, buff
)
2063 _cpp_buff
*end
= buff
;
2067 end
->next
= pfile
->free_buffs
;
2068 pfile
->free_buffs
= buff
;
2071 /* Return a free buffer of size at least MIN_SIZE. */
2073 _cpp_get_buff (pfile
, min_size
)
2077 _cpp_buff
*result
, **p
;
2079 for (p
= &pfile
->free_buffs
;; p
= &(*p
)->next
)
2084 return new_buff (min_size
);
2086 size
= result
->limit
- result
->base
;
2087 /* Return a buffer that's big enough, but don't waste one that's
2089 if (size
>= min_size
&& size
<= BUFF_SIZE_UPPER_BOUND (min_size
))
2094 result
->next
= NULL
;
2095 result
->cur
= result
->base
;
2099 /* Creates a new buffer with enough space to hold the uncommitted
2100 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
2101 the excess bytes to the new buffer. Chains the new buffer after
2102 BUFF, and returns the new buffer. */
2104 _cpp_append_extend_buff (pfile
, buff
, min_extra
)
2109 size_t size
= EXTENDED_BUFF_SIZE (buff
, min_extra
);
2110 _cpp_buff
*new_buff
= _cpp_get_buff (pfile
, size
);
2112 buff
->next
= new_buff
;
2113 memcpy (new_buff
->base
, buff
->cur
, BUFF_ROOM (buff
));
2117 /* Creates a new buffer with enough space to hold the uncommitted
2118 remaining bytes of the buffer pointed to by BUFF, and at least
2119 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
2120 Chains the new buffer before the buffer pointed to by BUFF, and
2121 updates the pointer to point to the new buffer. */
2123 _cpp_extend_buff (pfile
, pbuff
, min_extra
)
2128 _cpp_buff
*new_buff
, *old_buff
= *pbuff
;
2129 size_t size
= EXTENDED_BUFF_SIZE (old_buff
, min_extra
);
2131 new_buff
= _cpp_get_buff (pfile
, size
);
2132 memcpy (new_buff
->base
, old_buff
->cur
, BUFF_ROOM (old_buff
));
2133 new_buff
->next
= old_buff
;
2137 /* Free a chain of buffers starting at BUFF. */
2139 _cpp_free_buff (buff
)
2144 for (; buff
; buff
= next
)
2151 /* Allocate permanent, unaligned storage of length LEN. */
2153 _cpp_unaligned_alloc (pfile
, len
)
2157 _cpp_buff
*buff
= pfile
->u_buff
;
2158 unsigned char *result
= buff
->cur
;
2160 if (len
> (size_t) (buff
->limit
- result
))
2162 buff
= _cpp_get_buff (pfile
, len
);
2163 buff
->next
= pfile
->u_buff
;
2164 pfile
->u_buff
= buff
;
2168 buff
->cur
= result
+ len
;
2172 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2173 That buffer is used for growing allocations when saving macro
2174 replacement lists in a #define, and when parsing an answer to an
2175 assertion in #assert, #unassert or #if (and therefore possibly
2176 whilst expanding macros). It therefore must not be used by any
2177 code that they might call: specifically the lexer and the guts of
2180 All existing other uses clearly fit this restriction: storing
2181 registered pragmas during initialization. */
2183 _cpp_aligned_alloc (pfile
, len
)
2187 _cpp_buff
*buff
= pfile
->a_buff
;
2188 unsigned char *result
= buff
->cur
;
2190 if (len
> (size_t) (buff
->limit
- result
))
2192 buff
= _cpp_get_buff (pfile
, len
);
2193 buff
->next
= pfile
->a_buff
;
2194 pfile
->a_buff
= buff
;
2198 buff
->cur
= result
+ len
;