1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
23 /* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
42 /* MULTIBYTE_CHARS support only works for native compilers.
43 ??? Ideally what we want is to model widechar support after
44 the current floating point support. */
46 #undef MULTIBYTE_CHARS
49 #ifdef MULTIBYTE_CHARS
54 /* Tokens with SPELL_STRING store their spelling in the token list,
55 and it's length in the token->val.name.len. */
67 enum spell_type category
;
68 const unsigned char *name
;
71 const unsigned char *digraph_spellings
[] = {U
"%:", U
"%:%:", U
"<:",
74 #define OP(e, s) { SPELL_OPERATOR, U s },
75 #define TK(e, s) { s, U STRINGX (e) },
76 const struct token_spelling token_spellings
[N_TTYPES
] = {TTYPE_TABLE
};
80 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
81 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
83 static cppchar_t handle_newline
PARAMS ((cpp_reader
*, cppchar_t
));
84 static cppchar_t skip_escaped_newlines
PARAMS ((cpp_reader
*, cppchar_t
));
85 static cppchar_t get_effective_char
PARAMS ((cpp_reader
*));
87 static int skip_block_comment
PARAMS ((cpp_reader
*));
88 static int skip_line_comment
PARAMS ((cpp_reader
*));
89 static void adjust_column
PARAMS ((cpp_reader
*));
90 static void skip_whitespace
PARAMS ((cpp_reader
*, cppchar_t
));
91 static cpp_hashnode
*parse_identifier
PARAMS ((cpp_reader
*));
92 static cpp_hashnode
*parse_identifier_slow
PARAMS ((cpp_reader
*,
94 static void parse_number
PARAMS ((cpp_reader
*, cpp_string
*, cppchar_t
, int));
95 static int unescaped_terminator_p
PARAMS ((cpp_reader
*, const U_CHAR
*));
96 static void parse_string
PARAMS ((cpp_reader
*, cpp_token
*, cppchar_t
));
97 static void unterminated
PARAMS ((cpp_reader
*, int));
98 static int trigraph_ok
PARAMS ((cpp_reader
*, cppchar_t
));
99 static void save_comment
PARAMS ((cpp_reader
*, cpp_token
*, const U_CHAR
*));
100 static void lex_percent
PARAMS ((cpp_reader
*, cpp_token
*));
101 static void lex_dot
PARAMS ((cpp_reader
*, cpp_token
*));
102 static int name_p
PARAMS ((cpp_reader
*, const cpp_string
*));
103 static int maybe_read_ucs
PARAMS ((cpp_reader
*, const unsigned char **,
104 const unsigned char *, unsigned int *));
105 static tokenrun
*next_tokenrun
PARAMS ((tokenrun
*));
107 static cpp_chunk
*new_chunk
PARAMS ((unsigned int));
108 static int chunk_suitable
PARAMS ((cpp_pool
*, cpp_chunk
*, unsigned int));
109 static unsigned int hex_digit_value
PARAMS ((unsigned int));
110 static _cpp_buff
*new_buff
PARAMS ((unsigned int));
114 Compares, the token TOKEN to the NUL-terminated string STRING.
115 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
118 cpp_ideq (token
, string
)
119 const cpp_token
*token
;
122 if (token
->type
!= CPP_NAME
)
125 return !ustrcmp (NODE_NAME (token
->val
.node
), (const U_CHAR
*) string
);
128 /* Call when meeting a newline. Returns the character after the newline
129 (or carriage-return newline combination), or EOF. */
131 handle_newline (pfile
, newline_char
)
133 cppchar_t newline_char
;
136 cppchar_t next
= EOF
;
139 buffer
= pfile
->buffer
;
140 buffer
->col_adjust
= 0;
141 buffer
->line_base
= buffer
->cur
;
143 /* Handle CR-LF and LF-CR combinations, get the next character. */
144 if (buffer
->cur
< buffer
->rlimit
)
146 next
= *buffer
->cur
++;
147 if (next
+ newline_char
== '\r' + '\n')
149 buffer
->line_base
= buffer
->cur
;
150 if (buffer
->cur
< buffer
->rlimit
)
151 next
= *buffer
->cur
++;
157 buffer
->read_ahead
= next
;
161 /* Subroutine of skip_escaped_newlines; called when a trigraph is
162 encountered. It warns if necessary, and returns true if the
163 trigraph should be honoured. FROM_CHAR is the third character of a
164 trigraph, and presumed to be the previous character for position
167 trigraph_ok (pfile
, from_char
)
171 int accept
= CPP_OPTION (pfile
, trigraphs
);
173 /* Don't warn about trigraphs in comments. */
174 if (CPP_OPTION (pfile
, warn_trigraphs
) && !pfile
->state
.lexing_comment
)
176 cpp_buffer
*buffer
= pfile
->buffer
;
179 cpp_warning_with_line (pfile
, pfile
->line
, CPP_BUF_COL (buffer
) - 2,
180 "trigraph ??%c converted to %c",
182 (int) _cpp_trigraph_map
[from_char
]);
183 else if (buffer
->cur
!= buffer
->last_Wtrigraphs
)
185 buffer
->last_Wtrigraphs
= buffer
->cur
;
186 cpp_warning_with_line (pfile
, pfile
->line
,
187 CPP_BUF_COL (buffer
) - 2,
188 "trigraph ??%c ignored", (int) from_char
);
195 /* Assumes local variables buffer and result. */
196 #define ACCEPT_CHAR(t) \
197 do { result->type = t; buffer->read_ahead = EOF; } while (0)
199 /* When we move to multibyte character sets, add to these something
200 that saves and restores the state of the multibyte conversion
201 library. This probably involves saving and restoring a "cookie".
202 In the case of glibc it is an 8-byte structure, so is not a high
203 overhead operation. In any case, it's out of the fast path. */
204 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
205 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
207 /* Skips any escaped newlines introduced by NEXT, which is either a
208 '?' or a '\\'. Returns the next character, which will also have
209 been placed in buffer->read_ahead. This routine performs
210 preprocessing stages 1 and 2 of the ISO C standard. */
212 skip_escaped_newlines (pfile
, next
)
216 cpp_buffer
*buffer
= pfile
->buffer
;
218 /* Only do this if we apply stages 1 and 2. */
219 if (!buffer
->from_stage3
)
222 const unsigned char *saved_cur
;
227 if (buffer
->cur
== buffer
->rlimit
)
233 next1
= *buffer
->cur
++;
234 if (next1
!= '?' || buffer
->cur
== buffer
->rlimit
)
240 next1
= *buffer
->cur
++;
241 if (!_cpp_trigraph_map
[next1
]
242 || !trigraph_ok (pfile
, next1
))
248 /* We have a full trigraph here. */
249 next
= _cpp_trigraph_map
[next1
];
250 if (next
!= '\\' || buffer
->cur
== buffer
->rlimit
)
255 /* We have a backslash, and room for at least one more character. */
259 next1
= *buffer
->cur
++;
260 if (!is_nvspace (next1
))
264 while (buffer
->cur
< buffer
->rlimit
);
266 if (!is_vspace (next1
))
272 if (space
&& !pfile
->state
.lexing_comment
)
273 cpp_warning (pfile
, "backslash and newline separated by space");
275 next
= handle_newline (pfile
, next1
);
277 cpp_pedwarn (pfile
, "backslash-newline at end of file");
279 while (next
== '\\' || next
== '?');
282 buffer
->read_ahead
= next
;
286 /* Obtain the next character, after trigraph conversion and skipping
287 an arbitrary string of escaped newlines. The common case of no
288 trigraphs or escaped newlines falls through quickly. */
290 get_effective_char (pfile
)
293 cpp_buffer
*buffer
= pfile
->buffer
;
294 cppchar_t next
= EOF
;
296 if (buffer
->cur
< buffer
->rlimit
)
298 next
= *buffer
->cur
++;
300 /* '?' can introduce trigraphs (and therefore backslash); '\\'
301 can introduce escaped newlines, which we want to skip, or
302 UCNs, which, depending upon lexer state, we will handle in
304 if (next
== '?' || next
== '\\')
305 next
= skip_escaped_newlines (pfile
, next
);
308 buffer
->read_ahead
= next
;
312 /* Skip a C-style block comment. We find the end of the comment by
313 seeing if an asterisk is before every '/' we encounter. Returns
314 non-zero if comment terminated by EOF, zero otherwise. */
316 skip_block_comment (pfile
)
319 cpp_buffer
*buffer
= pfile
->buffer
;
320 cppchar_t c
= EOF
, prevc
= EOF
;
322 pfile
->state
.lexing_comment
= 1;
323 while (buffer
->cur
!= buffer
->rlimit
)
325 prevc
= c
, c
= *buffer
->cur
++;
328 /* FIXME: For speed, create a new character class of characters
329 of interest inside block comments. */
330 if (c
== '?' || c
== '\\')
331 c
= skip_escaped_newlines (pfile
, c
);
333 /* People like decorating comments with '*', so check for '/'
334 instead for efficiency. */
340 /* Warn about potential nested comments, but not if the '/'
341 comes immediately before the true comment delimeter.
342 Don't bother to get it right across escaped newlines. */
343 if (CPP_OPTION (pfile
, warn_comments
)
344 && buffer
->cur
!= buffer
->rlimit
)
346 prevc
= c
, c
= *buffer
->cur
++;
347 if (c
== '*' && buffer
->cur
!= buffer
->rlimit
)
349 prevc
= c
, c
= *buffer
->cur
++;
351 cpp_warning_with_line (pfile
, pfile
->line
,
352 CPP_BUF_COL (buffer
) - 2,
353 "\"/*\" within comment");
358 else if (is_vspace (c
))
360 prevc
= c
, c
= handle_newline (pfile
, c
);
364 adjust_column (pfile
);
367 pfile
->state
.lexing_comment
= 0;
368 buffer
->read_ahead
= EOF
;
369 return c
!= '/' || prevc
!= '*';
372 /* Skip a C++ line comment. Handles escaped newlines. Returns
373 non-zero if a multiline comment. The following new line, if any,
374 is left in buffer->read_ahead. */
376 skip_line_comment (pfile
)
379 cpp_buffer
*buffer
= pfile
->buffer
;
380 unsigned int orig_line
= pfile
->line
;
383 pfile
->state
.lexing_comment
= 1;
387 if (buffer
->cur
== buffer
->rlimit
)
391 if (c
== '?' || c
== '\\')
392 c
= skip_escaped_newlines (pfile
, c
);
394 while (!is_vspace (c
));
396 pfile
->state
.lexing_comment
= 0;
397 buffer
->read_ahead
= c
; /* Leave any newline for caller. */
398 return orig_line
!= pfile
->line
;
401 /* pfile->buffer->cur is one beyond the \t character. Update
402 col_adjust so we track the column correctly. */
404 adjust_column (pfile
)
407 cpp_buffer
*buffer
= pfile
->buffer
;
408 unsigned int col
= CPP_BUF_COL (buffer
) - 1; /* Zero-based column. */
410 /* Round it up to multiple of the tabstop, but subtract 1 since the
411 tab itself occupies a character position. */
412 buffer
->col_adjust
+= (CPP_OPTION (pfile
, tabstop
)
413 - col
% CPP_OPTION (pfile
, tabstop
)) - 1;
416 /* Skips whitespace, saving the next non-whitespace character.
417 Adjusts pfile->col_adjust to account for tabs. Without this,
418 tokens might be assigned an incorrect column. */
420 skip_whitespace (pfile
, c
)
424 cpp_buffer
*buffer
= pfile
->buffer
;
425 unsigned int warned
= 0;
429 /* Horizontal space always OK. */
433 adjust_column (pfile
);
434 /* Just \f \v or \0 left. */
439 cpp_warning (pfile
, "null character(s) ignored");
443 else if (pfile
->state
.in_directive
&& CPP_PEDANTIC (pfile
))
444 cpp_pedwarn_with_line (pfile
, pfile
->line
,
445 CPP_BUF_COL (buffer
),
446 "%s in preprocessing directive",
447 c
== '\f' ? "form feed" : "vertical tab");
450 if (buffer
->cur
== buffer
->rlimit
)
454 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
455 while (is_nvspace (c
));
457 /* Remember the next character. */
458 buffer
->read_ahead
= c
;
461 /* See if the characters of a number token are valid in a name (no
464 name_p (pfile
, string
)
466 const cpp_string
*string
;
470 for (i
= 0; i
< string
->len
; i
++)
471 if (!is_idchar (string
->text
[i
]))
477 /* Parse an identifier, skipping embedded backslash-newlines. This is
478 a critical inner loop. The common case is an identifier which has
479 not been split by backslash-newline, does not contain a dollar
480 sign, and has already been scanned (roughly 10:1 ratio of
481 seen:unseen identifiers in normal code; the distribution is
482 Poisson-like). Second most common case is a new identifier, not
483 split and no dollar sign. The other possibilities are rare and
484 have been relegated to parse_identifier_slow. */
486 static cpp_hashnode
*
487 parse_identifier (pfile
)
490 cpp_hashnode
*result
;
491 const U_CHAR
*cur
, *rlimit
;
493 /* Fast-path loop. Skim over a normal identifier.
494 N.B. ISIDNUM does not include $. */
495 cur
= pfile
->buffer
->cur
- 1;
496 rlimit
= pfile
->buffer
->rlimit
;
499 while (cur
< rlimit
&& ISIDNUM (*cur
));
501 /* Check for slow-path cases. */
502 if (cur
< rlimit
&& (*cur
== '?' || *cur
== '\\' || *cur
== '$'))
503 result
= parse_identifier_slow (pfile
, cur
);
506 const U_CHAR
*base
= pfile
->buffer
->cur
- 1;
507 result
= (cpp_hashnode
*)
508 ht_lookup (pfile
->hash_table
, base
, cur
- base
, HT_ALLOC
);
509 pfile
->buffer
->cur
= cur
;
512 /* Rarely, identifiers require diagnostics when lexed.
513 XXX Has to be forced out of the fast path. */
514 if (__builtin_expect ((result
->flags
& NODE_DIAGNOSTIC
)
515 && !pfile
->state
.skipping
, 0))
517 /* It is allowed to poison the same identifier twice. */
518 if ((result
->flags
& NODE_POISONED
) && !pfile
->state
.poisoned_ok
)
519 cpp_error (pfile
, "attempt to use poisoned \"%s\"",
522 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
523 replacement list of a variadic macro. */
524 if (result
== pfile
->spec_nodes
.n__VA_ARGS__
525 && !pfile
->state
.va_args_ok
)
527 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
533 /* Slow path. This handles identifiers which have been split, and
534 identifiers which contain dollar signs. The part of the identifier
535 from PFILE->buffer->cur-1 to CUR has already been scanned. */
536 static cpp_hashnode
*
537 parse_identifier_slow (pfile
, cur
)
541 cpp_buffer
*buffer
= pfile
->buffer
;
542 const U_CHAR
*base
= buffer
->cur
- 1;
543 struct obstack
*stack
= &pfile
->hash_table
->stack
;
544 unsigned int c
, saw_dollar
= 0, len
;
546 /* Copy the part of the token which is known to be okay. */
547 obstack_grow (stack
, base
, cur
- base
);
549 /* Now process the part which isn't. We are looking at one of
550 '$', '\\', or '?' on entry to this loop. */
555 while (is_idchar (c
))
557 obstack_1grow (stack
, c
);
563 if (buffer
->cur
== buffer
->rlimit
)
569 /* Potential escaped newline? */
570 if (c
!= '?' && c
!= '\\')
572 c
= skip_escaped_newlines (pfile
, c
);
574 while (is_idchar (c
));
576 /* Remember the next character. */
577 buffer
->read_ahead
= c
;
579 /* $ is not a identifier character in the standard, but is commonly
580 accepted as an extension. Don't warn about it in skipped
581 conditional blocks. */
582 if (saw_dollar
&& CPP_PEDANTIC (pfile
) && ! pfile
->state
.skipping
)
583 cpp_pedwarn (pfile
, "'$' character(s) in identifier");
585 /* Identifiers are null-terminated. */
586 len
= obstack_object_size (stack
);
587 obstack_1grow (stack
, '\0');
589 return (cpp_hashnode
*)
590 ht_lookup (pfile
->hash_table
, obstack_finish (stack
), len
, HT_ALLOCED
);
593 /* Parse a number, skipping embedded backslash-newlines. */
595 parse_number (pfile
, number
, c
, leading_period
)
601 cpp_buffer
*buffer
= pfile
->buffer
;
602 cpp_pool
*pool
= &pfile
->ident_pool
;
603 unsigned char *dest
, *limit
;
605 dest
= POOL_FRONT (pool
);
606 limit
= POOL_LIMIT (pool
);
608 /* Place a leading period. */
612 limit
= _cpp_next_chunk (pool
, 0, &dest
);
620 /* Need room for terminating null. */
621 if (dest
+ 1 >= limit
)
622 limit
= _cpp_next_chunk (pool
, 0, &dest
);
626 if (buffer
->cur
== buffer
->rlimit
)
631 while (is_numchar (c
) || c
== '.' || VALID_SIGN (c
, dest
[-1]));
633 /* Potential escaped newline? */
634 if (c
!= '?' && c
!= '\\')
636 c
= skip_escaped_newlines (pfile
, c
);
638 while (is_numchar (c
) || c
== '.' || VALID_SIGN (c
, dest
[-1]));
640 /* Remember the next character. */
641 buffer
->read_ahead
= c
;
643 /* Null-terminate the number. */
646 number
->text
= POOL_FRONT (pool
);
647 number
->len
= dest
- number
->text
;
648 POOL_COMMIT (pool
, number
->len
+ 1);
651 /* Subroutine of parse_string. Emits error for unterminated strings. */
653 unterminated (pfile
, term
)
657 cpp_error (pfile
, "missing terminating %c character", term
);
659 if (term
== '\"' && pfile
->mls_line
&& pfile
->mls_line
!= pfile
->line
)
661 cpp_error_with_line (pfile
, pfile
->mls_line
, pfile
->mls_col
,
662 "possible start of unterminated string literal");
667 /* Subroutine of parse_string. */
669 unescaped_terminator_p (pfile
, dest
)
671 const unsigned char *dest
;
673 const unsigned char *start
, *temp
;
675 /* In #include-style directives, terminators are not escapeable. */
676 if (pfile
->state
.angled_headers
)
679 start
= POOL_FRONT (&pfile
->ident_pool
);
681 /* An odd number of consecutive backslashes represents an escaped
683 for (temp
= dest
; temp
> start
&& temp
[-1] == '\\'; temp
--)
686 return ((dest
- temp
) & 1) == 0;
689 /* Parses a string, character constant, or angle-bracketed header file
690 name. Handles embedded trigraphs and escaped newlines. The stored
691 string is guaranteed NUL-terminated, but it is not guaranteed that
692 this is the first NUL since embedded NULs are preserved.
694 Multi-line strings are allowed, but they are deprecated. */
696 parse_string (pfile
, token
, terminator
)
699 cppchar_t terminator
;
701 cpp_buffer
*buffer
= pfile
->buffer
;
702 cpp_pool
*pool
= &pfile
->ident_pool
;
703 unsigned char *dest
, *limit
;
705 bool warned_nulls
= false, warned_multi
= false;
707 dest
= POOL_FRONT (pool
);
708 limit
= POOL_LIMIT (pool
);
712 if (buffer
->cur
== buffer
->rlimit
)
718 /* We need space for the terminating NUL. */
720 limit
= _cpp_next_chunk (pool
, 0, &dest
);
724 unterminated (pfile
, terminator
);
728 /* Handle trigraphs, escaped newlines etc. */
729 if (c
== '?' || c
== '\\')
730 c
= skip_escaped_newlines (pfile
, c
);
732 if (c
== terminator
&& unescaped_terminator_p (pfile
, dest
))
737 else if (is_vspace (c
))
739 /* In assembly language, silently terminate string and
740 character literals at end of line. This is a kludge
741 around not knowing where comments are. */
742 if (CPP_OPTION (pfile
, lang
) == CLK_ASM
&& terminator
!= '>')
745 /* Character constants and header names may not extend over
746 multiple lines. In Standard C, neither may strings.
747 Unfortunately, we accept multiline strings as an
748 extension, except in #include family directives. */
749 if (terminator
!= '"' || pfile
->state
.angled_headers
)
751 unterminated (pfile
, terminator
);
758 cpp_pedwarn (pfile
, "multi-line string literals are deprecated");
761 if (pfile
->mls_line
== 0)
763 pfile
->mls_line
= token
->line
;
764 pfile
->mls_col
= token
->col
;
767 c
= handle_newline (pfile
, c
);
771 else if (c
== '\0' && !warned_nulls
)
774 cpp_warning (pfile
, "null character(s) preserved in literal");
780 /* Remember the next character. */
781 buffer
->read_ahead
= c
;
784 token
->val
.str
.text
= POOL_FRONT (pool
);
785 token
->val
.str
.len
= dest
- token
->val
.str
.text
;
786 POOL_COMMIT (pool
, token
->val
.str
.len
+ 1);
789 /* The stored comment includes the comment start and any terminator. */
791 save_comment (pfile
, token
, from
)
794 const unsigned char *from
;
796 unsigned char *buffer
;
799 len
= pfile
->buffer
->cur
- from
+ 1; /* + 1 for the initial '/'. */
800 /* C++ comments probably (not definitely) have moved past a new
801 line, which we don't want to save in the comment. */
802 if (pfile
->buffer
->read_ahead
!= EOF
)
804 buffer
= _cpp_pool_alloc (&pfile
->ident_pool
, len
);
806 token
->type
= CPP_COMMENT
;
807 token
->val
.str
.len
= len
;
808 token
->val
.str
.text
= buffer
;
811 memcpy (buffer
+ 1, from
, len
- 1);
814 /* Subroutine of _cpp_lex_direct to handle '%'. A little tricky, since we
815 want to avoid stepping back when lexing %:%X. */
817 lex_percent (pfile
, result
)
821 cpp_buffer
*buffer
= pfile
->buffer
;
824 result
->type
= CPP_MOD
;
825 /* Parsing %:%X could leave an extra character. */
826 if (buffer
->extra_char
== EOF
)
827 c
= get_effective_char (pfile
);
830 c
= buffer
->read_ahead
= buffer
->extra_char
;
831 buffer
->extra_char
= EOF
;
835 ACCEPT_CHAR (CPP_MOD_EQ
);
836 else if (CPP_OPTION (pfile
, digraphs
))
840 result
->flags
|= DIGRAPH
;
841 ACCEPT_CHAR (CPP_HASH
);
842 if (get_effective_char (pfile
) == '%')
844 buffer
->extra_char
= get_effective_char (pfile
);
845 if (buffer
->extra_char
== ':')
847 buffer
->extra_char
= EOF
;
848 ACCEPT_CHAR (CPP_PASTE
);
851 /* We'll catch the extra_char when we're called back. */
852 buffer
->read_ahead
= '%';
857 result
->flags
|= DIGRAPH
;
858 ACCEPT_CHAR (CPP_CLOSE_BRACE
);
863 /* Subroutine of _cpp_lex_direct to handle '.'. This is tricky, since we
864 want to avoid stepping back when lexing '...' or '.123'. In the
865 latter case we should also set a flag for parse_number. */
867 lex_dot (pfile
, result
)
871 cpp_buffer
*buffer
= pfile
->buffer
;
874 /* Parsing ..X could leave an extra character. */
875 if (buffer
->extra_char
== EOF
)
876 c
= get_effective_char (pfile
);
879 c
= buffer
->read_ahead
= buffer
->extra_char
;
880 buffer
->extra_char
= EOF
;
883 /* All known character sets have 0...9 contiguous. */
884 if (c
>= '0' && c
<= '9')
886 result
->type
= CPP_NUMBER
;
887 parse_number (pfile
, &result
->val
.str
, c
, 1);
891 result
->type
= CPP_DOT
;
894 buffer
->extra_char
= get_effective_char (pfile
);
895 if (buffer
->extra_char
== '.')
897 buffer
->extra_char
= EOF
;
898 ACCEPT_CHAR (CPP_ELLIPSIS
);
901 /* We'll catch the extra_char when we're called back. */
902 buffer
->read_ahead
= '.';
904 else if (c
== '*' && CPP_OPTION (pfile
, cplusplus
))
905 ACCEPT_CHAR (CPP_DOT_STAR
);
909 /* Allocate COUNT tokens for RUN. */
911 _cpp_init_tokenrun (run
, count
)
915 run
->base
= xnewvec (cpp_token
, count
);
916 run
->limit
= run
->base
+ count
;
920 /* Returns the next tokenrun, or creates one if there is none. */
925 if (run
->next
== NULL
)
927 run
->next
= xnew (tokenrun
);
928 run
->next
->prev
= run
;
929 _cpp_init_tokenrun (run
->next
, 250);
935 /* Allocate a single token that is invalidated at the same time as the
936 rest of the tokens on the line. Has its line and col set to the
937 same as the last lexed token, so that diagnostics appear in the
940 _cpp_temp_token (pfile
)
943 cpp_token
*old
, *result
;
945 old
= pfile
->cur_token
- 1;
946 if (pfile
->cur_token
== pfile
->cur_run
->limit
)
948 pfile
->cur_run
= next_tokenrun (pfile
->cur_run
);
949 pfile
->cur_token
= pfile
->cur_run
->base
;
952 result
= pfile
->cur_token
++;
953 result
->line
= old
->line
;
954 result
->col
= old
->col
;
958 /* Lex a token into RESULT (external interface). Takes care of issues
959 like directive handling, token lookahead, multiple include
960 opimisation and skipping. */
962 _cpp_lex_token (pfile
)
969 if (pfile
->cur_token
== pfile
->cur_run
->limit
)
971 pfile
->cur_run
= next_tokenrun (pfile
->cur_run
);
972 pfile
->cur_token
= pfile
->cur_run
->base
;
975 if (pfile
->lookaheads
)
978 result
= pfile
->cur_token
++;
981 result
= _cpp_lex_direct (pfile
);
983 if (result
->flags
& BOL
)
985 /* Is this a directive. If _cpp_handle_directive returns
986 false, it is an assembler #. */
987 if (result
->type
== CPP_HASH
988 && !pfile
->state
.parsing_args
989 && _cpp_handle_directive (pfile
, result
->flags
& PREV_WHITE
))
991 if (pfile
->cb
.line_change
&& !pfile
->state
.skipping
)
992 (*pfile
->cb
.line_change
)(pfile
, result
, pfile
->state
.parsing_args
);
995 /* We don't skip tokens in directives. */
996 if (pfile
->state
.in_directive
)
999 /* Outside a directive, invalidate controlling macros. At file
1000 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
1001 get here and MI optimisation works. */
1002 pfile
->mi_valid
= false;
1004 if (!pfile
->state
.skipping
|| result
->type
== CPP_EOF
)
1011 /* Lex a token into pfile->cur_token, which is also incremented, to
1012 get diagnostics pointing to the correct location.
1014 Does not handle issues such as token lookahead, multiple-include
1015 optimisation, directives, skipping etc. This function is only
1016 suitable for use by _cpp_lex_token, and in special cases like
1017 lex_expansion_token which doesn't care for any of these issues.
1019 When meeting a newline, returns CPP_EOF if parsing a directive,
1020 otherwise returns to the start of the token buffer if permissible.
1021 Returns the location of the lexed token. */
1023 _cpp_lex_direct (pfile
)
1028 const unsigned char *comment_start
;
1029 cpp_token
*result
= pfile
->cur_token
++;
1032 buffer
= pfile
->buffer
;
1033 result
->flags
= buffer
->saved_flags
;
1034 buffer
->saved_flags
= 0;
1036 result
->line
= pfile
->line
;
1039 c
= buffer
->read_ahead
;
1040 if (c
== EOF
&& buffer
->cur
< buffer
->rlimit
)
1042 result
->col
= CPP_BUF_COLUMN (buffer
, buffer
->cur
);
1043 buffer
->read_ahead
= EOF
;
1049 buffer
->saved_flags
= BOL
;
1050 if (!pfile
->state
.parsing_args
&& !pfile
->state
.in_directive
)
1052 if (buffer
->cur
!= buffer
->line_base
)
1054 /* Non-empty files should end in a newline. Don't warn
1055 for command line and _Pragma buffers. */
1056 if (!buffer
->from_stage3
)
1057 cpp_pedwarn (pfile
, "no newline at end of file");
1058 handle_newline (pfile
, '\n');
1061 /* Don't pop the last buffer. */
1064 unsigned char stop
= buffer
->return_at_eof
;
1066 _cpp_pop_buffer (pfile
);
1071 result
->type
= CPP_EOF
;
1074 case ' ': case '\t': case '\f': case '\v': case '\0':
1075 skip_whitespace (pfile
, c
);
1076 result
->flags
|= PREV_WHITE
;
1079 case '\n': case '\r':
1080 handle_newline (pfile
, c
);
1081 buffer
->saved_flags
= BOL
;
1082 if (! pfile
->state
.in_directive
)
1084 if (pfile
->state
.parsing_args
== 2)
1085 buffer
->saved_flags
|= PREV_WHITE
;
1086 if (!pfile
->keep_tokens
)
1088 pfile
->cur_run
= &pfile
->base_run
;
1089 result
= pfile
->base_run
.base
;
1090 pfile
->cur_token
= result
+ 1;
1094 result
->type
= CPP_EOF
;
1099 /* These could start an escaped newline, or '?' a trigraph. Let
1100 skip_escaped_newlines do all the work. */
1102 unsigned int line
= pfile
->line
;
1104 c
= skip_escaped_newlines (pfile
, c
);
1105 if (line
!= pfile
->line
)
1106 /* We had at least one escaped newline of some sort, and the
1107 next character is in buffer->read_ahead. Update the
1108 token's line and column. */
1109 goto update_tokens_line
;
1111 /* We are either the original '?' or '\\', or a trigraph. */
1112 result
->type
= CPP_QUERY
;
1113 buffer
->read_ahead
= EOF
;
1121 case '0': case '1': case '2': case '3': case '4':
1122 case '5': case '6': case '7': case '8': case '9':
1123 result
->type
= CPP_NUMBER
;
1124 parse_number (pfile
, &result
->val
.str
, c
, 0);
1128 if (!CPP_OPTION (pfile
, dollars_in_ident
))
1130 /* Fall through... */
1133 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1134 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1135 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1136 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1138 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1139 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1140 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1141 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1143 result
->type
= CPP_NAME
;
1144 result
->val
.node
= parse_identifier (pfile
);
1146 /* 'L' may introduce wide characters or strings. */
1147 if (result
->val
.node
== pfile
->spec_nodes
.n_L
)
1149 c
= buffer
->read_ahead
;
1150 if (c
== EOF
&& buffer
->cur
< buffer
->rlimit
)
1152 if (c
== '\'' || c
== '"')
1155 ACCEPT_CHAR (c
== '"' ? CPP_WSTRING
: CPP_WCHAR
);
1159 /* Convert named operators to their proper types. */
1160 else if (result
->val
.node
->flags
& NODE_OPERATOR
)
1162 result
->flags
|= NAMED_OP
;
1163 result
->type
= result
->val
.node
->value
.operator;
1169 result
->type
= c
== '"' ? CPP_STRING
: CPP_CHAR
;
1171 parse_string (pfile
, result
, c
);
1175 /* A potential block or line comment. */
1176 comment_start
= buffer
->cur
;
1177 result
->type
= CPP_DIV
;
1178 c
= get_effective_char (pfile
);
1180 ACCEPT_CHAR (CPP_DIV_EQ
);
1181 if (c
!= '/' && c
!= '*')
1186 if (skip_block_comment (pfile
))
1187 cpp_error (pfile
, "unterminated comment");
1191 if (!CPP_OPTION (pfile
, cplusplus_comments
)
1192 && !CPP_IN_SYSTEM_HEADER (pfile
))
1195 /* Warn about comments only if pedantically GNUC89, and not
1196 in system headers. */
1197 if (CPP_OPTION (pfile
, lang
) == CLK_GNUC89
&& CPP_PEDANTIC (pfile
)
1198 && ! buffer
->warned_cplusplus_comments
)
1201 "C++ style comments are not allowed in ISO C89");
1203 "(this will be reported only once per input file)");
1204 buffer
->warned_cplusplus_comments
= 1;
1207 /* Skip_line_comment updates buffer->read_ahead. */
1208 if (skip_line_comment (pfile
) && CPP_OPTION (pfile
, warn_comments
))
1209 cpp_warning (pfile
, "multi-line comment");
1212 /* Skipping the comment has updated buffer->read_ahead. */
1213 if (!pfile
->state
.save_comments
)
1215 result
->flags
|= PREV_WHITE
;
1216 goto update_tokens_line
;
1219 /* Save the comment as a token in its own right. */
1220 save_comment (pfile
, result
, comment_start
);
1221 /* Don't do MI optimisation. */
1225 if (pfile
->state
.angled_headers
)
1227 result
->type
= CPP_HEADER_NAME
;
1228 c
= '>'; /* terminator. */
1232 result
->type
= CPP_LESS
;
1233 c
= get_effective_char (pfile
);
1235 ACCEPT_CHAR (CPP_LESS_EQ
);
1238 ACCEPT_CHAR (CPP_LSHIFT
);
1239 if (get_effective_char (pfile
) == '=')
1240 ACCEPT_CHAR (CPP_LSHIFT_EQ
);
1242 else if (c
== '?' && CPP_OPTION (pfile
, cplusplus
))
1244 ACCEPT_CHAR (CPP_MIN
);
1245 if (get_effective_char (pfile
) == '=')
1246 ACCEPT_CHAR (CPP_MIN_EQ
);
1248 else if (c
== ':' && CPP_OPTION (pfile
, digraphs
))
1250 ACCEPT_CHAR (CPP_OPEN_SQUARE
);
1251 result
->flags
|= DIGRAPH
;
1253 else if (c
== '%' && CPP_OPTION (pfile
, digraphs
))
1255 ACCEPT_CHAR (CPP_OPEN_BRACE
);
1256 result
->flags
|= DIGRAPH
;
1261 result
->type
= CPP_GREATER
;
1262 c
= get_effective_char (pfile
);
1264 ACCEPT_CHAR (CPP_GREATER_EQ
);
1267 ACCEPT_CHAR (CPP_RSHIFT
);
1268 if (get_effective_char (pfile
) == '=')
1269 ACCEPT_CHAR (CPP_RSHIFT_EQ
);
1271 else if (c
== '?' && CPP_OPTION (pfile
, cplusplus
))
1273 ACCEPT_CHAR (CPP_MAX
);
1274 if (get_effective_char (pfile
) == '=')
1275 ACCEPT_CHAR (CPP_MAX_EQ
);
1280 lex_percent (pfile
, result
);
1284 lex_dot (pfile
, result
);
1288 result
->type
= CPP_PLUS
;
1289 c
= get_effective_char (pfile
);
1291 ACCEPT_CHAR (CPP_PLUS_EQ
);
1293 ACCEPT_CHAR (CPP_PLUS_PLUS
);
1297 result
->type
= CPP_MINUS
;
1298 c
= get_effective_char (pfile
);
1301 ACCEPT_CHAR (CPP_DEREF
);
1302 if (CPP_OPTION (pfile
, cplusplus
)
1303 && get_effective_char (pfile
) == '*')
1304 ACCEPT_CHAR (CPP_DEREF_STAR
);
1307 ACCEPT_CHAR (CPP_MINUS_EQ
);
1309 ACCEPT_CHAR (CPP_MINUS_MINUS
);
1313 result
->type
= CPP_MULT
;
1314 if (get_effective_char (pfile
) == '=')
1315 ACCEPT_CHAR (CPP_MULT_EQ
);
1319 result
->type
= CPP_EQ
;
1320 if (get_effective_char (pfile
) == '=')
1321 ACCEPT_CHAR (CPP_EQ_EQ
);
1325 result
->type
= CPP_NOT
;
1326 if (get_effective_char (pfile
) == '=')
1327 ACCEPT_CHAR (CPP_NOT_EQ
);
1331 result
->type
= CPP_AND
;
1332 c
= get_effective_char (pfile
);
1334 ACCEPT_CHAR (CPP_AND_EQ
);
1336 ACCEPT_CHAR (CPP_AND_AND
);
1340 result
->type
= CPP_HASH
;
1341 if (get_effective_char (pfile
) == '#')
1342 ACCEPT_CHAR (CPP_PASTE
);
1346 result
->type
= CPP_OR
;
1347 c
= get_effective_char (pfile
);
1349 ACCEPT_CHAR (CPP_OR_EQ
);
1351 ACCEPT_CHAR (CPP_OR_OR
);
1355 result
->type
= CPP_XOR
;
1356 if (get_effective_char (pfile
) == '=')
1357 ACCEPT_CHAR (CPP_XOR_EQ
);
1361 result
->type
= CPP_COLON
;
1362 c
= get_effective_char (pfile
);
1363 if (c
== ':' && CPP_OPTION (pfile
, cplusplus
))
1364 ACCEPT_CHAR (CPP_SCOPE
);
1365 else if (c
== '>' && CPP_OPTION (pfile
, digraphs
))
1367 result
->flags
|= DIGRAPH
;
1368 ACCEPT_CHAR (CPP_CLOSE_SQUARE
);
1372 case '~': result
->type
= CPP_COMPL
; break;
1373 case ',': result
->type
= CPP_COMMA
; break;
1374 case '(': result
->type
= CPP_OPEN_PAREN
; break;
1375 case ')': result
->type
= CPP_CLOSE_PAREN
; break;
1376 case '[': result
->type
= CPP_OPEN_SQUARE
; break;
1377 case ']': result
->type
= CPP_CLOSE_SQUARE
; break;
1378 case '{': result
->type
= CPP_OPEN_BRACE
; break;
1379 case '}': result
->type
= CPP_CLOSE_BRACE
; break;
1380 case ';': result
->type
= CPP_SEMICOLON
; break;
1382 /* @ is a punctuator in Objective C. */
1383 case '@': result
->type
= CPP_ATSIGN
; break;
1387 result
->type
= CPP_OTHER
;
1395 /* An upper bound on the number of bytes needed to spell a token,
1396 including preceding whitespace. */
1398 cpp_token_len (token
)
1399 const cpp_token
*token
;
1403 switch (TOKEN_SPELL (token
))
1405 default: len
= 0; break;
1406 case SPELL_STRING
: len
= token
->val
.str
.len
; break;
1407 case SPELL_IDENT
: len
= NODE_LEN (token
->val
.node
); break;
1409 /* 1 for whitespace, 4 for comment delimeters. */
1413 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1414 already contain the enough space to hold the token's spelling.
1415 Returns a pointer to the character after the last character
1418 cpp_spell_token (pfile
, token
, buffer
)
1419 cpp_reader
*pfile
; /* Would be nice to be rid of this... */
1420 const cpp_token
*token
;
1421 unsigned char *buffer
;
1423 switch (TOKEN_SPELL (token
))
1425 case SPELL_OPERATOR
:
1427 const unsigned char *spelling
;
1430 if (token
->flags
& DIGRAPH
)
1432 = digraph_spellings
[(int) token
->type
- (int) CPP_FIRST_DIGRAPH
];
1433 else if (token
->flags
& NAMED_OP
)
1436 spelling
= TOKEN_NAME (token
);
1438 while ((c
= *spelling
++) != '\0')
1445 memcpy (buffer
, NODE_NAME (token
->val
.node
), NODE_LEN (token
->val
.node
));
1446 buffer
+= NODE_LEN (token
->val
.node
);
1451 int left
, right
, tag
;
1452 switch (token
->type
)
1454 case CPP_STRING
: left
= '"'; right
= '"'; tag
= '\0'; break;
1455 case CPP_WSTRING
: left
= '"'; right
= '"'; tag
= 'L'; break;
1456 case CPP_CHAR
: left
= '\''; right
= '\''; tag
= '\0'; break;
1457 case CPP_WCHAR
: left
= '\''; right
= '\''; tag
= 'L'; break;
1458 case CPP_HEADER_NAME
: left
= '<'; right
= '>'; tag
= '\0'; break;
1459 default: left
= '\0'; right
= '\0'; tag
= '\0'; break;
1461 if (tag
) *buffer
++ = tag
;
1462 if (left
) *buffer
++ = left
;
1463 memcpy (buffer
, token
->val
.str
.text
, token
->val
.str
.len
);
1464 buffer
+= token
->val
.str
.len
;
1465 if (right
) *buffer
++ = right
;
1470 *buffer
++ = token
->val
.c
;
1474 cpp_ice (pfile
, "Unspellable token %s", TOKEN_NAME (token
));
1481 /* Returns a token as a null-terminated string. The string is
1482 temporary, and automatically freed later. Useful for diagnostics. */
1484 cpp_token_as_text (pfile
, token
)
1486 const cpp_token
*token
;
1488 unsigned int len
= cpp_token_len (token
);
1489 unsigned char *start
= _cpp_pool_alloc (&pfile
->ident_pool
, len
), *end
;
1491 end
= cpp_spell_token (pfile
, token
, start
);
1497 /* Used by C front ends. Should really move to using cpp_token_as_text. */
1499 cpp_type2name (type
)
1500 enum cpp_ttype type
;
1502 return (const char *) token_spellings
[type
].name
;
1505 /* Writes the spelling of token to FP, without any preceding space.
1506 Separated from cpp_spell_token for efficiency - to avoid stdio
1507 double-buffering. */
1509 cpp_output_token (token
, fp
)
1510 const cpp_token
*token
;
1513 switch (TOKEN_SPELL (token
))
1515 case SPELL_OPERATOR
:
1517 const unsigned char *spelling
;
1519 if (token
->flags
& DIGRAPH
)
1521 = digraph_spellings
[(int) token
->type
- (int) CPP_FIRST_DIGRAPH
];
1522 else if (token
->flags
& NAMED_OP
)
1525 spelling
= TOKEN_NAME (token
);
1527 ufputs (spelling
, fp
);
1533 ufputs (NODE_NAME (token
->val
.node
), fp
);
1538 int left
, right
, tag
;
1539 switch (token
->type
)
1541 case CPP_STRING
: left
= '"'; right
= '"'; tag
= '\0'; break;
1542 case CPP_WSTRING
: left
= '"'; right
= '"'; tag
= 'L'; break;
1543 case CPP_CHAR
: left
= '\''; right
= '\''; tag
= '\0'; break;
1544 case CPP_WCHAR
: left
= '\''; right
= '\''; tag
= 'L'; break;
1545 case CPP_HEADER_NAME
: left
= '<'; right
= '>'; tag
= '\0'; break;
1546 default: left
= '\0'; right
= '\0'; tag
= '\0'; break;
1548 if (tag
) putc (tag
, fp
);
1549 if (left
) putc (left
, fp
);
1550 fwrite (token
->val
.str
.text
, 1, token
->val
.str
.len
, fp
);
1551 if (right
) putc (right
, fp
);
1556 putc (token
->val
.c
, fp
);
1560 /* An error, most probably. */
1565 /* Compare two tokens. */
1567 _cpp_equiv_tokens (a
, b
)
1568 const cpp_token
*a
, *b
;
1570 if (a
->type
== b
->type
&& a
->flags
== b
->flags
)
1571 switch (TOKEN_SPELL (a
))
1573 default: /* Keep compiler happy. */
1574 case SPELL_OPERATOR
:
1577 return a
->val
.c
== b
->val
.c
; /* Character. */
1579 return (a
->type
!= CPP_MACRO_ARG
|| a
->val
.arg_no
== b
->val
.arg_no
);
1581 return a
->val
.node
== b
->val
.node
;
1583 return (a
->val
.str
.len
== b
->val
.str
.len
1584 && !memcmp (a
->val
.str
.text
, b
->val
.str
.text
,
1591 /* Determine whether two tokens can be pasted together, and if so,
1592 what the resulting token is. Returns CPP_EOF if the tokens cannot
1593 be pasted, or the appropriate type for the merged token if they
1596 cpp_can_paste (pfile
, token1
, token2
, digraph
)
1598 const cpp_token
*token1
, *token2
;
1601 enum cpp_ttype a
= token1
->type
, b
= token2
->type
;
1602 int cxx
= CPP_OPTION (pfile
, cplusplus
);
1604 /* Treat named operators as if they were ordinary NAMEs. */
1605 if (token1
->flags
& NAMED_OP
)
1607 if (token2
->flags
& NAMED_OP
)
1610 if ((int) a
<= (int) CPP_LAST_EQ
&& b
== CPP_EQ
)
1611 return (enum cpp_ttype
) ((int) a
+ ((int) CPP_EQ_EQ
- (int) CPP_EQ
));
1616 if (b
== a
) return CPP_RSHIFT
;
1617 if (b
== CPP_QUERY
&& cxx
) return CPP_MAX
;
1618 if (b
== CPP_GREATER_EQ
) return CPP_RSHIFT_EQ
;
1621 if (b
== a
) return CPP_LSHIFT
;
1622 if (b
== CPP_QUERY
&& cxx
) return CPP_MIN
;
1623 if (b
== CPP_LESS_EQ
) return CPP_LSHIFT_EQ
;
1624 if (CPP_OPTION (pfile
, digraphs
))
1627 {*digraph
= 1; return CPP_OPEN_SQUARE
;} /* <: digraph */
1629 {*digraph
= 1; return CPP_OPEN_BRACE
;} /* <% digraph */
1633 case CPP_PLUS
: if (b
== a
) return CPP_PLUS_PLUS
; break;
1634 case CPP_AND
: if (b
== a
) return CPP_AND_AND
; break;
1635 case CPP_OR
: if (b
== a
) return CPP_OR_OR
; break;
1638 if (b
== a
) return CPP_MINUS_MINUS
;
1639 if (b
== CPP_GREATER
) return CPP_DEREF
;
1642 if (b
== a
&& cxx
) return CPP_SCOPE
;
1643 if (b
== CPP_GREATER
&& CPP_OPTION (pfile
, digraphs
))
1644 {*digraph
= 1; return CPP_CLOSE_SQUARE
;} /* :> digraph */
1648 if (CPP_OPTION (pfile
, digraphs
))
1650 if (b
== CPP_GREATER
)
1651 {*digraph
= 1; return CPP_CLOSE_BRACE
;} /* %> digraph */
1653 {*digraph
= 1; return CPP_HASH
;} /* %: digraph */
1657 if (b
== CPP_MULT
&& cxx
) return CPP_DEREF_STAR
;
1660 if (b
== CPP_MULT
&& cxx
) return CPP_DOT_STAR
;
1661 if (b
== CPP_NUMBER
) return CPP_NUMBER
;
1665 if (b
== a
&& (token1
->flags
& DIGRAPH
) == (token2
->flags
& DIGRAPH
))
1667 {*digraph
= (token1
->flags
& DIGRAPH
); return CPP_PASTE
;}
1671 if (b
== CPP_NAME
) return CPP_NAME
;
1673 && name_p (pfile
, &token2
->val
.str
)) return CPP_NAME
;
1675 && token1
->val
.node
== pfile
->spec_nodes
.n_L
) return CPP_WCHAR
;
1677 && token1
->val
.node
== pfile
->spec_nodes
.n_L
) return CPP_WSTRING
;
1681 if (b
== CPP_NUMBER
) return CPP_NUMBER
;
1682 if (b
== CPP_NAME
) return CPP_NUMBER
;
1683 if (b
== CPP_DOT
) return CPP_NUMBER
;
1684 /* Numbers cannot have length zero, so this is safe. */
1685 if ((b
== CPP_PLUS
|| b
== CPP_MINUS
)
1686 && VALID_SIGN ('+', token1
->val
.str
.text
[token1
->val
.str
.len
- 1]))
1697 /* Returns nonzero if a space should be inserted to avoid an
1698 accidental token paste for output. For simplicity, it is
1699 conservative, and occasionally advises a space where one is not
1700 needed, e.g. "." and ".2". */
1703 cpp_avoid_paste (pfile
, token1
, token2
)
1705 const cpp_token
*token1
, *token2
;
1707 enum cpp_ttype a
= token1
->type
, b
= token2
->type
;
1710 if (token1
->flags
& NAMED_OP
)
1712 if (token2
->flags
& NAMED_OP
)
1716 if (token2
->flags
& DIGRAPH
)
1717 c
= digraph_spellings
[(int) b
- (int) CPP_FIRST_DIGRAPH
][0];
1718 else if (token_spellings
[b
].category
== SPELL_OPERATOR
)
1719 c
= token_spellings
[b
].name
[0];
1721 /* Quickly get everything that can paste with an '='. */
1722 if ((int) a
<= (int) CPP_LAST_EQ
&& c
== '=')
1727 case CPP_GREATER
: return c
== '>' || c
== '?';
1728 case CPP_LESS
: return c
== '<' || c
== '?' || c
== '%' || c
== ':';
1729 case CPP_PLUS
: return c
== '+';
1730 case CPP_MINUS
: return c
== '-' || c
== '>';
1731 case CPP_DIV
: return c
== '/' || c
== '*'; /* Comments. */
1732 case CPP_MOD
: return c
== ':' || c
== '>';
1733 case CPP_AND
: return c
== '&';
1734 case CPP_OR
: return c
== '|';
1735 case CPP_COLON
: return c
== ':' || c
== '>';
1736 case CPP_DEREF
: return c
== '*';
1737 case CPP_DOT
: return c
== '.' || c
== '%' || b
== CPP_NUMBER
;
1738 case CPP_HASH
: return c
== '#' || c
== '%'; /* Digraph form. */
1739 case CPP_NAME
: return ((b
== CPP_NUMBER
1740 && name_p (pfile
, &token2
->val
.str
))
1742 || b
== CPP_CHAR
|| b
== CPP_STRING
); /* L */
1743 case CPP_NUMBER
: return (b
== CPP_NUMBER
|| b
== CPP_NAME
1744 || c
== '.' || c
== '+' || c
== '-');
1745 case CPP_OTHER
: return (CPP_OPTION (pfile
, objc
)
1746 && token1
->val
.c
== '@'
1747 && (b
== CPP_NAME
|| b
== CPP_STRING
));
1754 /* Output all the remaining tokens on the current line, and a newline
1755 character, to FP. Leading whitespace is removed. If there are
1756 macros, special token padding is not performed. */
1758 cpp_output_line (pfile
, fp
)
1762 const cpp_token
*token
;
1764 token
= cpp_get_token (pfile
);
1765 while (token
->type
!= CPP_EOF
)
1767 cpp_output_token (token
, fp
);
1768 token
= cpp_get_token (pfile
);
1769 if (token
->flags
& PREV_WHITE
)
1776 /* Returns the value of a hexadecimal digit. */
1781 if (c
>= 'a' && c
<= 'f')
1782 return c
- 'a' + 10;
1783 if (c
>= 'A' && c
<= 'F')
1784 return c
- 'A' + 10;
1785 if (c
>= '0' && c
<= '9')
1790 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1791 failure if cpplib is not parsing C++ or C99. Such failure is
1792 silent, and no variables are updated. Otherwise returns 0, and
1793 warns if -Wtraditional.
1795 [lex.charset]: The character designated by the universal character
1796 name \UNNNNNNNN is that character whose character short name in
1797 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1798 universal character name \uNNNN is that character whose character
1799 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1800 for a universal character name is less than 0x20 or in the range
1801 0x7F-0x9F (inclusive), or if the universal character name
1802 designates a character in the basic source character set, then the
1803 program is ill-formed.
1805 We assume that wchar_t is Unicode, so we don't need to do any
1806 mapping. Is this ever wrong?
1808 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1809 LIMIT is the end of the string or charconst. PSTR is updated to
1810 point after the UCS on return, and the UCS is written into PC. */
1813 maybe_read_ucs (pfile
, pstr
, limit
, pc
)
1815 const unsigned char **pstr
;
1816 const unsigned char *limit
;
1819 const unsigned char *p
= *pstr
;
1820 unsigned int code
= 0;
1821 unsigned int c
= *pc
, length
;
1823 /* Only attempt to interpret a UCS for C++ and C99. */
1824 if (! (CPP_OPTION (pfile
, cplusplus
) || CPP_OPTION (pfile
, c99
)))
1827 if (CPP_WTRADITIONAL (pfile
))
1828 cpp_warning (pfile
, "the meaning of '\\%c' varies with -traditional", c
);
1830 length
= (c
== 'u' ? 4: 8);
1832 if ((size_t) (limit
- p
) < length
)
1834 cpp_error (pfile
, "incomplete universal-character-name");
1835 /* Skip to the end to avoid more diagnostics. */
1840 for (; length
; length
--, p
++)
1844 code
= (code
<< 4) + hex_digit_value (c
);
1848 "non-hex digit '%c' in universal-character-name", c
);
1849 /* We shouldn't skip in case there are multibyte chars. */
1855 #ifdef TARGET_EBCDIC
1856 cpp_error (pfile
, "universal-character-name on EBCDIC target");
1857 code
= 0x3f; /* EBCDIC invalid character */
1859 /* True extended characters are OK. */
1861 && !(code
& 0x80000000)
1862 && !(code
>= 0xD800 && code
<= 0xDFFF))
1864 /* The standard permits $, @ and ` to be specified as UCNs. We use
1865 hex escapes so that this also works with EBCDIC hosts. */
1866 else if (code
== 0x24 || code
== 0x40 || code
== 0x60)
1868 /* Don't give another error if one occurred above. */
1869 else if (length
== 0)
1870 cpp_error (pfile
, "universal-character-name out of range");
1878 /* Interpret an escape sequence, and return its value. PSTR points to
1879 the input pointer, which is just after the backslash. LIMIT is how
1880 much text we have. MASK is a bitmask for the precision for the
1881 destination type (char or wchar_t). TRADITIONAL, if true, does not
1882 interpret escapes that did not exist in traditional C.
1884 Handles all relevant diagnostics. */
1887 cpp_parse_escape (pfile
, pstr
, limit
, mask
, traditional
)
1889 const unsigned char **pstr
;
1890 const unsigned char *limit
;
1891 unsigned HOST_WIDE_INT mask
;
1895 const unsigned char *str
= *pstr
;
1896 unsigned int c
= *str
++;
1900 case '\\': case '\'': case '"': case '?': break;
1901 case 'b': c
= TARGET_BS
; break;
1902 case 'f': c
= TARGET_FF
; break;
1903 case 'n': c
= TARGET_NEWLINE
; break;
1904 case 'r': c
= TARGET_CR
; break;
1905 case 't': c
= TARGET_TAB
; break;
1906 case 'v': c
= TARGET_VT
; break;
1908 case '(': case '{': case '[': case '%':
1909 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1910 '\%' is used to prevent SCCS from getting confused. */
1911 unknown
= CPP_PEDANTIC (pfile
);
1915 if (CPP_WTRADITIONAL (pfile
))
1916 cpp_warning (pfile
, "the meaning of '\\a' varies with -traditional");
1922 if (CPP_PEDANTIC (pfile
))
1923 cpp_pedwarn (pfile
, "non-ISO-standard escape sequence, '\\%c'", c
);
1928 unknown
= maybe_read_ucs (pfile
, &str
, limit
, &c
);
1932 if (CPP_WTRADITIONAL (pfile
))
1933 cpp_warning (pfile
, "the meaning of '\\x' varies with -traditional");
1937 unsigned int i
= 0, overflow
= 0;
1938 int digits_found
= 0;
1946 overflow
|= i
^ (i
<< 4 >> 4);
1947 i
= (i
<< 4) + hex_digit_value (c
);
1952 cpp_error (pfile
, "\\x used with no following hex digits");
1954 if (overflow
| (i
!= (i
& mask
)))
1956 cpp_pedwarn (pfile
, "hex escape sequence out of range");
1963 case '0': case '1': case '2': case '3':
1964 case '4': case '5': case '6': case '7':
1966 unsigned int i
= c
- '0';
1969 while (str
< limit
&& ++count
< 3)
1972 if (c
< '0' || c
> '7')
1975 i
= (i
<< 3) + c
- '0';
1978 if (i
!= (i
& mask
))
1980 cpp_pedwarn (pfile
, "octal escape sequence out of range");
1995 cpp_pedwarn (pfile
, "unknown escape sequence '\\%c'", c
);
1997 cpp_pedwarn (pfile
, "unknown escape sequence: '\\%03o'", c
);
2001 cpp_pedwarn (pfile
, "escape sequence out of range for character");
2007 #ifndef MAX_CHAR_TYPE_SIZE
2008 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
2011 #ifndef MAX_WCHAR_TYPE_SIZE
2012 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
2015 /* Interpret a (possibly wide) character constant in TOKEN.
2016 WARN_MULTI warns about multi-character charconsts, if not
2017 TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
2018 that did not exist in traditional C. PCHARS_SEEN points to a
2019 variable that is filled in with the number of characters seen. */
2021 cpp_interpret_charconst (pfile
, token
, warn_multi
, traditional
, pchars_seen
)
2023 const cpp_token
*token
;
2026 unsigned int *pchars_seen
;
2028 const unsigned char *str
= token
->val
.str
.text
;
2029 const unsigned char *limit
= str
+ token
->val
.str
.len
;
2030 unsigned int chars_seen
= 0;
2031 unsigned int width
, max_chars
, c
;
2032 unsigned HOST_WIDE_INT mask
;
2033 HOST_WIDE_INT result
= 0;
2035 #ifdef MULTIBYTE_CHARS
2036 (void) local_mbtowc (NULL
, NULL
, 0);
2039 /* Width in bits. */
2040 if (token
->type
== CPP_CHAR
)
2041 width
= MAX_CHAR_TYPE_SIZE
;
2043 width
= MAX_WCHAR_TYPE_SIZE
;
2045 if (width
< HOST_BITS_PER_WIDE_INT
)
2046 mask
= ((unsigned HOST_WIDE_INT
) 1 << width
) - 1;
2049 max_chars
= HOST_BITS_PER_WIDE_INT
/ width
;
2053 #ifdef MULTIBYTE_CHARS
2057 char_len
= local_mbtowc (&wc
, str
, limit
- str
);
2060 cpp_warning (pfile
, "ignoring invalid multibyte character");
2073 c
= cpp_parse_escape (pfile
, &str
, limit
, mask
, traditional
);
2075 #ifdef MAP_CHARACTER
2077 c
= MAP_CHARACTER (c
);
2080 /* Merge character into result; ignore excess chars. */
2081 if (++chars_seen
<= max_chars
)
2083 if (width
< HOST_BITS_PER_WIDE_INT
)
2084 result
= (result
<< width
) | (c
& mask
);
2090 if (chars_seen
== 0)
2091 cpp_error (pfile
, "empty character constant");
2092 else if (chars_seen
> max_chars
)
2094 chars_seen
= max_chars
;
2095 cpp_warning (pfile
, "character constant too long");
2097 else if (chars_seen
> 1 && !traditional
&& warn_multi
)
2098 cpp_warning (pfile
, "multi-character character constant");
2100 /* If char type is signed, sign-extend the constant. The
2101 __CHAR_UNSIGNED__ macro is set by the driver if appropriate. */
2102 if (token
->type
== CPP_CHAR
&& chars_seen
)
2104 unsigned int nbits
= chars_seen
* width
;
2105 unsigned int mask
= (unsigned int) ~0 >> (HOST_BITS_PER_INT
- nbits
);
2107 if (pfile
->spec_nodes
.n__CHAR_UNSIGNED__
->type
== NT_MACRO
2108 || ((result
>> (nbits
- 1)) & 1) == 0)
2114 *pchars_seen
= chars_seen
;
2118 /* Memory buffers. */
2130 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2131 #define CPP_ALIGN(size, align) (((size) + ((align) - 1)) & ~((align) - 1))
2133 /* Create a new allocation buffer. */
2143 len
= CPP_ALIGN (len
, DEFAULT_ALIGNMENT
);
2145 base
= xmalloc (len
+ sizeof (_cpp_buff
));
2146 result
= (_cpp_buff
*) (base
+ len
);
2147 result
->base
= base
;
2149 result
->limit
= base
+ len
;
2150 result
->next
= NULL
;
2154 /* Place a chain of unwanted allocation buffers on the free list. */
2156 _cpp_release_buff (pfile
, buff
)
2160 _cpp_buff
*end
= buff
;
2164 end
->next
= pfile
->free_buffs
;
2165 pfile
->free_buffs
= buff
;
2168 /* Return a free buffer of size at least MIN_SIZE. */
2170 _cpp_get_buff (pfile
, min_size
)
2172 unsigned int min_size
;
2174 _cpp_buff
*result
, **p
;
2176 for (p
= &pfile
->free_buffs
;; p
= &(*p
)->next
)
2178 if (*p
== NULL
|| (*p
)->next
== NULL
)
2179 return new_buff (min_size
);
2180 result
= (*p
)->next
;
2181 if ((unsigned int) (result
->limit
- result
->base
) > min_size
)
2186 result
->next
= NULL
;
2187 result
->cur
= result
->base
;
2191 /* Return a buffer chained on the end of BUFF. Copy to it the
2192 uncommitted remaining bytes of BUFF, with at least MIN_EXTRA more
2195 _cpp_extend_buff (pfile
, buff
, min_extra
)
2198 unsigned int min_extra
;
2200 unsigned int size
= min_extra
+ (buff
->limit
- buff
->cur
) * 2;
2202 buff
->next
= _cpp_get_buff (pfile
, size
);
2203 memcpy (buff
->next
->base
, buff
->cur
, buff
->limit
- buff
->cur
);
2207 /* Free a chain of buffers starting at BUFF. */
2209 _cpp_free_buff (buff
)
2214 for (; buff
; buff
= next
)
2222 chunk_suitable (pool
, chunk
, size
)
2227 /* Being at least twice SIZE means we can use memcpy in
2228 _cpp_next_chunk rather than memmove. Besides, it's a good idea
2230 return (chunk
&& pool
->locked
!= chunk
2231 && (unsigned int) (chunk
->limit
- chunk
->base
) >= size
* 2);
2234 /* Returns the end of the new pool. PTR points to a char in the old
2235 pool, and is updated to point to the same char in the new pool. */
2237 _cpp_next_chunk (pool
, len
, ptr
)
2240 unsigned char **ptr
;
2242 cpp_chunk
*chunk
= pool
->cur
->next
;
2244 /* LEN is the minimum size we want in the new pool. */
2245 len
+= POOL_ROOM (pool
);
2246 if (! chunk_suitable (pool
, chunk
, len
))
2248 chunk
= new_chunk (POOL_SIZE (pool
) * 2 + len
);
2250 chunk
->next
= pool
->cur
->next
;
2251 pool
->cur
->next
= chunk
;
2254 /* Update the pointer before changing chunk's front. */
2256 *ptr
+= chunk
->base
- POOL_FRONT (pool
);
2258 memcpy (chunk
->base
, POOL_FRONT (pool
), POOL_ROOM (pool
));
2259 chunk
->front
= chunk
->base
;
2262 return POOL_LIMIT (pool
);
2269 unsigned char *base
;
2272 size
= POOL_ALIGN (size
, DEFAULT_ALIGNMENT
);
2273 base
= (unsigned char *) xmalloc (size
+ sizeof (cpp_chunk
));
2274 /* Put the chunk descriptor at the end. Then chunk overruns will
2275 cause obvious chaos. */
2276 result
= (cpp_chunk
*) (base
+ size
);
2277 result
->base
= base
;
2278 result
->front
= base
;
2279 result
->limit
= base
+ size
;
2286 _cpp_init_pool (pool
, size
, align
, temp
)
2288 unsigned int size
, align
, temp
;
2291 align
= DEFAULT_ALIGNMENT
;
2292 if (align
& (align
- 1))
2294 pool
->align
= align
;
2295 pool
->first
= new_chunk (size
);
2296 pool
->cur
= pool
->first
;
2300 pool
->cur
->next
= pool
->cur
;
2304 _cpp_lock_pool (pool
)
2307 if (pool
->locks
++ == 0)
2308 pool
->locked
= pool
->cur
;
2312 _cpp_unlock_pool (pool
)
2315 if (--pool
->locks
== 0)
2320 _cpp_free_pool (pool
)
2323 cpp_chunk
*chunk
= pool
->first
, *next
;
2331 while (chunk
&& chunk
!= pool
->first
);
2334 /* Reserve LEN bytes from a memory pool. */
2336 _cpp_pool_reserve (pool
, len
)
2340 len
= POOL_ALIGN (len
, pool
->align
);
2341 if (len
> (unsigned int) POOL_ROOM (pool
))
2342 _cpp_next_chunk (pool
, len
, 0);
2344 return POOL_FRONT (pool
);
2347 /* Allocate LEN bytes from a memory pool. */
2349 _cpp_pool_alloc (pool
, len
)
2353 unsigned char *result
= _cpp_pool_reserve (pool
, len
);
2355 POOL_COMMIT (pool
, len
);