1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
37 enum spell_type category
;
38 const unsigned char *name
;
41 static const unsigned char *const digraph_spellings
[] =
42 { U
"%:", U
"%:%:", U
"<:", U
":>", U
"<%", U
"%>" };
44 #define OP(e, s) { SPELL_OPERATOR, U s },
45 #define TK(e, s) { s, U #e },
46 static const struct token_spelling token_spellings
[N_TTYPES
] = { TTYPE_TABLE
};
50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
53 static void add_line_note (cpp_buffer
*, const uchar
*, unsigned int);
54 static int skip_line_comment (cpp_reader
*);
55 static void skip_whitespace (cpp_reader
*, cppchar_t
);
56 static cpp_hashnode
*lex_identifier (cpp_reader
*, const uchar
*);
57 static void lex_number (cpp_reader
*, cpp_string
*);
58 static bool forms_identifier_p (cpp_reader
*, int);
59 static void lex_string (cpp_reader
*, cpp_token
*, const uchar
*);
60 static void save_comment (cpp_reader
*, cpp_token
*, const uchar
*, cppchar_t
);
61 static void create_literal (cpp_reader
*, cpp_token
*, const uchar
*,
62 unsigned int, enum cpp_ttype
);
63 static bool warn_in_comment (cpp_reader
*, _cpp_line_note
*);
64 static int name_p (cpp_reader
*, const cpp_string
*);
65 static tokenrun
*next_tokenrun (tokenrun
*);
67 static _cpp_buff
*new_buff (size_t);
72 Compares, the token TOKEN to the NUL-terminated string STRING.
73 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
75 cpp_ideq (const cpp_token
*token
, const char *string
)
77 if (token
->type
!= CPP_NAME
)
80 return !ustrcmp (NODE_NAME (token
->val
.node
), (const uchar
*) string
);
83 /* Record a note TYPE at byte POS into the current cleaned logical
86 add_line_note (cpp_buffer
*buffer
, const uchar
*pos
, unsigned int type
)
88 if (buffer
->notes_used
== buffer
->notes_cap
)
90 buffer
->notes_cap
= buffer
->notes_cap
* 2 + 200;
91 buffer
->notes
= xrealloc (buffer
->notes
,
92 buffer
->notes_cap
* sizeof (_cpp_line_note
));
95 buffer
->notes
[buffer
->notes_used
].pos
= pos
;
96 buffer
->notes
[buffer
->notes_used
].type
= type
;
100 /* Returns with a logical line that contains no escaped newlines or
101 trigraphs. This is a time-critical inner loop. */
103 _cpp_clean_line (cpp_reader
*pfile
)
109 buffer
= pfile
->buffer
;
110 buffer
->cur_note
= buffer
->notes_used
= 0;
111 buffer
->cur
= buffer
->line_base
= buffer
->next_line
;
112 buffer
->need_line
= false;
113 s
= buffer
->next_line
- 1;
115 if (!buffer
->from_stage3
)
117 /* Short circuit for the common case of an un-escaped line with
118 no trigraphs. The primary win here is by not writing any
119 data back to memory until we have to. */
123 if (c
== '\n' || c
== '\r')
127 if (s
== buffer
->rlimit
)
130 /* DOS line ending? */
131 if (c
== '\r' && s
[1] == '\n')
134 if (s
== buffer
->rlimit
)
137 /* check for escaped newline */
139 while (p
!= buffer
->next_line
&& is_nvspace (p
[-1]))
141 if (p
== buffer
->next_line
|| p
[-1] != '\\')
144 /* Have an escaped newline; process it and proceed to
146 add_line_note (buffer
, p
- 1, p
!= d
? ' ' : '\\');
148 buffer
->next_line
= p
- 1;
151 if (c
== '?' && s
[1] == '?' && _cpp_trigraph_map
[s
[2]])
153 /* Have a trigraph. We may or may not have to convert
154 it. Add a line note regardless, for -Wtrigraphs. */
155 add_line_note (buffer
, s
, s
[2]);
156 if (CPP_OPTION (pfile
, trigraphs
))
158 /* We do, and that means we have to switch to the
161 *d
= _cpp_trigraph_map
[s
[2]];
174 if (c
== '\n' || c
== '\r')
176 /* Handle DOS line endings. */
177 if (c
== '\r' && s
!= buffer
->rlimit
&& s
[1] == '\n')
179 if (s
== buffer
->rlimit
)
184 while (p
!= buffer
->next_line
&& is_nvspace (p
[-1]))
186 if (p
== buffer
->next_line
|| p
[-1] != '\\')
189 add_line_note (buffer
, p
- 1, p
!= d
? ' ': '\\');
191 buffer
->next_line
= p
- 1;
193 else if (c
== '?' && s
[1] == '?' && _cpp_trigraph_map
[s
[2]])
195 /* Add a note regardless, for the benefit of -Wtrigraphs. */
196 add_line_note (buffer
, d
, s
[2]);
197 if (CPP_OPTION (pfile
, trigraphs
))
199 *d
= _cpp_trigraph_map
[s
[2]];
209 while (*s
!= '\n' && *s
!= '\r');
212 /* Handle DOS line endings. */
213 if (*s
== '\r' && s
!= buffer
->rlimit
&& s
[1] == '\n')
219 /* A sentinel note that should never be processed. */
220 add_line_note (buffer
, d
+ 1, '\n');
221 buffer
->next_line
= s
+ 1;
224 /* Return true if the trigraph indicated by NOTE should be warned
225 about in a comment. */
227 warn_in_comment (cpp_reader
*pfile
, _cpp_line_note
*note
)
231 /* Within comments we don't warn about trigraphs, unless the
232 trigraph forms an escaped newline, as that may change
234 if (note
->type
!= '/')
237 /* If -trigraphs, then this was an escaped newline iff the next note
239 if (CPP_OPTION (pfile
, trigraphs
))
240 return note
[1].pos
== note
->pos
;
242 /* Otherwise, see if this forms an escaped newline. */
244 while (is_nvspace (*p
))
247 /* There might have been escaped newlines between the trigraph and the
248 newline we found. Hence the position test. */
249 return (*p
== '\n' && p
< note
[1].pos
);
252 /* Process the notes created by add_line_note as far as the current
255 _cpp_process_line_notes (cpp_reader
*pfile
, int in_comment
)
257 cpp_buffer
*buffer
= pfile
->buffer
;
261 _cpp_line_note
*note
= &buffer
->notes
[buffer
->cur_note
];
264 if (note
->pos
> buffer
->cur
)
268 col
= CPP_BUF_COLUMN (buffer
, note
->pos
+ 1);
270 if (note
->type
== '\\' || note
->type
== ' ')
272 if (note
->type
== ' ' && !in_comment
)
273 cpp_error_with_line (pfile
, CPP_DL_WARNING
, pfile
->line
, col
,
274 "backslash and newline separated by space");
276 if (buffer
->next_line
> buffer
->rlimit
)
278 cpp_error_with_line (pfile
, CPP_DL_PEDWARN
, pfile
->line
, col
,
279 "backslash-newline at end of file");
280 /* Prevent "no newline at end of file" warning. */
281 buffer
->next_line
= buffer
->rlimit
;
284 buffer
->line_base
= note
->pos
;
287 else if (_cpp_trigraph_map
[note
->type
])
289 if (CPP_OPTION (pfile
, warn_trigraphs
)
290 && (!in_comment
|| warn_in_comment (pfile
, note
)))
292 if (CPP_OPTION (pfile
, trigraphs
))
293 cpp_error_with_line (pfile
, CPP_DL_WARNING
, pfile
->line
, col
,
294 "trigraph ??%c converted to %c",
296 (int) _cpp_trigraph_map
[note
->type
]);
300 (pfile
, CPP_DL_WARNING
, pfile
->line
, col
,
301 "trigraph ??%c ignored, use -trigraphs to enable",
311 /* Skip a C-style block comment. We find the end of the comment by
312 seeing if an asterisk is before every '/' we encounter. Returns
313 nonzero if comment terminated by EOF, zero otherwise.
315 Buffer->cur points to the initial asterisk of the comment. */
317 _cpp_skip_block_comment (cpp_reader
*pfile
)
319 cpp_buffer
*buffer
= pfile
->buffer
;
320 const uchar
*cur
= buffer
->cur
;
329 /* People like decorating comments with '*', so check for '/'
330 instead for efficiency. */
338 /* Warn about potential nested comments, but not if the '/'
339 comes immediately before the true comment delimiter.
340 Don't bother to get it right across escaped newlines. */
341 if (CPP_OPTION (pfile
, warn_comments
)
342 && cur
[0] == '*' && cur
[1] != '/')
345 cpp_error_with_line (pfile
, CPP_DL_WARNING
,
346 pfile
->line
, CPP_BUF_COL (buffer
),
347 "\"/*\" within comment");
352 buffer
->cur
= cur
- 1;
353 _cpp_process_line_notes (pfile
, true);
354 if (buffer
->next_line
>= buffer
->rlimit
)
356 _cpp_clean_line (pfile
);
363 _cpp_process_line_notes (pfile
, true);
367 /* Skip a C++ line comment, leaving buffer->cur pointing to the
368 terminating newline. Handles escaped newlines. Returns nonzero
369 if a multiline comment. */
371 skip_line_comment (cpp_reader
*pfile
)
373 cpp_buffer
*buffer
= pfile
->buffer
;
374 unsigned int orig_line
= pfile
->line
;
376 while (*buffer
->cur
!= '\n')
379 _cpp_process_line_notes (pfile
, true);
380 return orig_line
!= pfile
->line
;
383 /* Skips whitespace, saving the next non-whitespace character. */
385 skip_whitespace (cpp_reader
*pfile
, cppchar_t c
)
387 cpp_buffer
*buffer
= pfile
->buffer
;
388 bool saw_NUL
= false;
392 /* Horizontal space always OK. */
393 if (c
== ' ' || c
== '\t')
395 /* Just \f \v or \0 left. */
398 else if (pfile
->state
.in_directive
&& CPP_PEDANTIC (pfile
))
399 cpp_error_with_line (pfile
, CPP_DL_PEDWARN
, pfile
->line
,
400 CPP_BUF_COL (buffer
),
401 "%s in preprocessing directive",
402 c
== '\f' ? "form feed" : "vertical tab");
406 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
407 while (is_nvspace (c
));
410 cpp_error (pfile
, CPP_DL_WARNING
, "null character(s) ignored");
415 /* See if the characters of a number token are valid in a name (no
418 name_p (cpp_reader
*pfile
, const cpp_string
*string
)
422 for (i
= 0; i
< string
->len
; i
++)
423 if (!is_idchar (string
->text
[i
]))
429 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
430 an identifier. FIRST is TRUE if this starts an identifier. */
432 forms_identifier_p (cpp_reader
*pfile
, int first
)
434 cpp_buffer
*buffer
= pfile
->buffer
;
436 if (*buffer
->cur
== '$')
438 if (!CPP_OPTION (pfile
, dollars_in_ident
))
442 if (CPP_OPTION (pfile
, warn_dollars
) && !pfile
->state
.skipping
)
444 CPP_OPTION (pfile
, warn_dollars
) = 0;
445 cpp_error (pfile
, CPP_DL_PEDWARN
, "'$' in identifier or number");
451 /* Is this a syntactically valid UCN? */
452 if (0 && *buffer
->cur
== '\\'
453 && (buffer
->cur
[1] == 'u' || buffer
->cur
[1] == 'U'))
456 if (_cpp_valid_ucn (pfile
, &buffer
->cur
, buffer
->rlimit
, 1 + !first
))
464 /* Lex an identifier starting at BUFFER->CUR - 1. */
465 static cpp_hashnode
*
466 lex_identifier (cpp_reader
*pfile
, const uchar
*base
)
468 cpp_hashnode
*result
;
473 cur
= pfile
->buffer
->cur
;
475 /* N.B. ISIDNUM does not include $. */
476 while (ISIDNUM (*cur
))
479 pfile
->buffer
->cur
= cur
;
481 while (forms_identifier_p (pfile
, false));
483 result
= (cpp_hashnode
*)
484 ht_lookup (pfile
->hash_table
, base
, cur
- base
, HT_ALLOC
);
486 /* Rarely, identifiers require diagnostics when lexed. */
487 if (__builtin_expect ((result
->flags
& NODE_DIAGNOSTIC
)
488 && !pfile
->state
.skipping
, 0))
490 /* It is allowed to poison the same identifier twice. */
491 if ((result
->flags
& NODE_POISONED
) && !pfile
->state
.poisoned_ok
)
492 cpp_error (pfile
, CPP_DL_ERROR
, "attempt to use poisoned \"%s\"",
495 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
496 replacement list of a variadic macro. */
497 if (result
== pfile
->spec_nodes
.n__VA_ARGS__
498 && !pfile
->state
.va_args_ok
)
499 cpp_error (pfile
, CPP_DL_PEDWARN
,
500 "__VA_ARGS__ can only appear in the expansion"
501 " of a C99 variadic macro");
507 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
509 lex_number (cpp_reader
*pfile
, cpp_string
*number
)
515 base
= pfile
->buffer
->cur
- 1;
518 cur
= pfile
->buffer
->cur
;
520 /* N.B. ISIDNUM does not include $. */
521 while (ISIDNUM (*cur
) || *cur
== '.' || VALID_SIGN (*cur
, cur
[-1]))
524 pfile
->buffer
->cur
= cur
;
526 while (forms_identifier_p (pfile
, false));
528 number
->len
= cur
- base
;
529 dest
= _cpp_unaligned_alloc (pfile
, number
->len
+ 1);
530 memcpy (dest
, base
, number
->len
);
531 dest
[number
->len
] = '\0';
535 /* Create a token of type TYPE with a literal spelling. */
537 create_literal (cpp_reader
*pfile
, cpp_token
*token
, const uchar
*base
,
538 unsigned int len
, enum cpp_ttype type
)
540 uchar
*dest
= _cpp_unaligned_alloc (pfile
, len
+ 1);
542 memcpy (dest
, base
, len
);
545 token
->val
.str
.len
= len
;
546 token
->val
.str
.text
= dest
;
549 /* Lexes a string, character constant, or angle-bracketed header file
550 name. The stored string contains the spelling, including opening
551 quote and leading any leading 'L'. It returns the type of the
552 literal, or CPP_OTHER if it was not properly terminated.
554 The spelling is NUL-terminated, but it is not guaranteed that this
555 is the first NUL since embedded NULs are preserved. */
557 lex_string (cpp_reader
*pfile
, cpp_token
*token
, const uchar
*base
)
559 bool saw_NUL
= false;
561 cppchar_t terminator
;
566 if (terminator
== 'L')
568 if (terminator
== '\"')
569 type
= *base
== 'L' ? CPP_WSTRING
: CPP_STRING
;
570 else if (terminator
== '\'')
571 type
= *base
== 'L' ? CPP_WCHAR
: CPP_CHAR
;
573 terminator
= '>', type
= CPP_HEADER_NAME
;
577 cppchar_t c
= *cur
++;
579 /* In #include-style directives, terminators are not escapable. */
580 if (c
== '\\' && !pfile
->state
.angled_headers
&& *cur
!= '\n')
582 else if (c
== terminator
)
594 if (saw_NUL
&& !pfile
->state
.skipping
)
595 cpp_error (pfile
, CPP_DL_WARNING
,
596 "null character(s) preserved in literal");
598 pfile
->buffer
->cur
= cur
;
599 create_literal (pfile
, token
, base
, cur
- base
, type
);
602 /* The stored comment includes the comment start and any terminator. */
604 save_comment (cpp_reader
*pfile
, cpp_token
*token
, const unsigned char *from
,
607 unsigned char *buffer
;
608 unsigned int len
, clen
;
610 len
= pfile
->buffer
->cur
- from
+ 1; /* + 1 for the initial '/'. */
612 /* C++ comments probably (not definitely) have moved past a new
613 line, which we don't want to save in the comment. */
614 if (is_vspace (pfile
->buffer
->cur
[-1]))
617 /* If we are currently in a directive, then we need to store all
618 C++ comments as C comments internally, and so we need to
619 allocate a little extra space in that case.
621 Note that the only time we encounter a directive here is
622 when we are saving comments in a "#define". */
623 clen
= (pfile
->state
.in_directive
&& type
== '/') ? len
+ 2 : len
;
625 buffer
= _cpp_unaligned_alloc (pfile
, clen
);
627 token
->type
= CPP_COMMENT
;
628 token
->val
.str
.len
= clen
;
629 token
->val
.str
.text
= buffer
;
632 memcpy (buffer
+ 1, from
, len
- 1);
634 /* Finish conversion to a C comment, if necessary. */
635 if (pfile
->state
.in_directive
&& type
== '/')
638 buffer
[clen
- 2] = '*';
639 buffer
[clen
- 1] = '/';
643 /* Allocate COUNT tokens for RUN. */
645 _cpp_init_tokenrun (tokenrun
*run
, unsigned int count
)
647 run
->base
= xnewvec (cpp_token
, count
);
648 run
->limit
= run
->base
+ count
;
652 /* Returns the next tokenrun, or creates one if there is none. */
654 next_tokenrun (tokenrun
*run
)
656 if (run
->next
== NULL
)
658 run
->next
= xnew (tokenrun
);
659 run
->next
->prev
= run
;
660 _cpp_init_tokenrun (run
->next
, 250);
666 /* Allocate a single token that is invalidated at the same time as the
667 rest of the tokens on the line. Has its line and col set to the
668 same as the last lexed token, so that diagnostics appear in the
671 _cpp_temp_token (cpp_reader
*pfile
)
673 cpp_token
*old
, *result
;
675 old
= pfile
->cur_token
- 1;
676 if (pfile
->cur_token
== pfile
->cur_run
->limit
)
678 pfile
->cur_run
= next_tokenrun (pfile
->cur_run
);
679 pfile
->cur_token
= pfile
->cur_run
->base
;
682 result
= pfile
->cur_token
++;
683 result
->line
= old
->line
;
684 result
->col
= old
->col
;
688 /* Lex a token into RESULT (external interface). Takes care of issues
689 like directive handling, token lookahead, multiple include
690 optimization and skipping. */
692 _cpp_lex_token (cpp_reader
*pfile
)
698 if (pfile
->cur_token
== pfile
->cur_run
->limit
)
700 pfile
->cur_run
= next_tokenrun (pfile
->cur_run
);
701 pfile
->cur_token
= pfile
->cur_run
->base
;
704 if (pfile
->lookaheads
)
707 result
= pfile
->cur_token
++;
710 result
= _cpp_lex_direct (pfile
);
712 if (result
->flags
& BOL
)
714 /* Is this a directive. If _cpp_handle_directive returns
715 false, it is an assembler #. */
716 if (result
->type
== CPP_HASH
717 /* 6.10.3 p 11: Directives in a list of macro arguments
718 gives undefined behavior. This implementation
719 handles the directive as normal. */
720 && pfile
->state
.parsing_args
!= 1
721 && _cpp_handle_directive (pfile
, result
->flags
& PREV_WHITE
))
723 if (pfile
->cb
.line_change
&& !pfile
->state
.skipping
)
724 pfile
->cb
.line_change (pfile
, result
, pfile
->state
.parsing_args
);
727 /* We don't skip tokens in directives. */
728 if (pfile
->state
.in_directive
)
731 /* Outside a directive, invalidate controlling macros. At file
732 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
733 get here and MI optimization works. */
734 pfile
->mi_valid
= false;
736 if (!pfile
->state
.skipping
|| result
->type
== CPP_EOF
)
743 /* Returns true if a fresh line has been loaded. */
745 _cpp_get_fresh_line (cpp_reader
*pfile
)
749 /* We can't get a new line until we leave the current directive. */
750 if (pfile
->state
.in_directive
)
755 cpp_buffer
*buffer
= pfile
->buffer
;
757 if (!buffer
->need_line
)
760 if (buffer
->next_line
< buffer
->rlimit
)
762 _cpp_clean_line (pfile
);
766 /* First, get out of parsing arguments state. */
767 if (pfile
->state
.parsing_args
)
770 /* End of buffer. Non-empty files should end in a newline. */
771 if (buffer
->buf
!= buffer
->rlimit
772 && buffer
->next_line
> buffer
->rlimit
773 && !buffer
->from_stage3
)
775 /* Only warn once. */
776 buffer
->next_line
= buffer
->rlimit
;
777 cpp_error_with_line (pfile
, CPP_DL_PEDWARN
, pfile
->line
- 1,
778 CPP_BUF_COLUMN (buffer
, buffer
->cur
),
779 "no newline at end of file");
782 return_at_eof
= buffer
->return_at_eof
;
783 _cpp_pop_buffer (pfile
);
784 if (pfile
->buffer
== NULL
|| return_at_eof
)
789 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
792 result->type = ELSE_TYPE; \
793 if (*buffer->cur == CHAR) \
794 buffer->cur++, result->type = THEN_TYPE; \
798 /* Lex a token into pfile->cur_token, which is also incremented, to
799 get diagnostics pointing to the correct location.
801 Does not handle issues such as token lookahead, multiple-include
802 optimization, directives, skipping etc. This function is only
803 suitable for use by _cpp_lex_token, and in special cases like
804 lex_expansion_token which doesn't care for any of these issues.
806 When meeting a newline, returns CPP_EOF if parsing a directive,
807 otherwise returns to the start of the token buffer if permissible.
808 Returns the location of the lexed token. */
810 _cpp_lex_direct (cpp_reader
*pfile
)
814 const unsigned char *comment_start
;
815 cpp_token
*result
= pfile
->cur_token
++;
819 buffer
= pfile
->buffer
;
820 if (buffer
->need_line
)
822 if (!_cpp_get_fresh_line (pfile
))
824 result
->type
= CPP_EOF
;
825 if (!pfile
->state
.in_directive
)
827 /* Tell the compiler the line number of the EOF token. */
828 result
->line
= pfile
->line
;
833 if (!pfile
->keep_tokens
)
835 pfile
->cur_run
= &pfile
->base_run
;
836 result
= pfile
->base_run
.base
;
837 pfile
->cur_token
= result
+ 1;
840 if (pfile
->state
.parsing_args
== 2)
841 result
->flags
|= PREV_WHITE
;
843 buffer
= pfile
->buffer
;
845 result
->line
= pfile
->line
;
848 if (buffer
->cur
>= buffer
->notes
[buffer
->cur_note
].pos
849 && !pfile
->overlaid_buffer
)
851 _cpp_process_line_notes (pfile
, false);
852 result
->line
= pfile
->line
;
855 result
->col
= CPP_BUF_COLUMN (buffer
, buffer
->cur
);
859 case ' ': case '\t': case '\f': case '\v': case '\0':
860 result
->flags
|= PREV_WHITE
;
861 skip_whitespace (pfile
, c
);
866 buffer
->need_line
= true;
869 case '0': case '1': case '2': case '3': case '4':
870 case '5': case '6': case '7': case '8': case '9':
871 result
->type
= CPP_NUMBER
;
872 lex_number (pfile
, &result
->val
.str
);
876 /* 'L' may introduce wide characters or strings. */
877 if (*buffer
->cur
== '\'' || *buffer
->cur
== '"')
879 lex_string (pfile
, result
, buffer
->cur
- 1);
885 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
886 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
887 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
888 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
890 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
891 case 'G': case 'H': case 'I': case 'J': case 'K':
892 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
893 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
895 result
->type
= CPP_NAME
;
896 result
->val
.node
= lex_identifier (pfile
, buffer
->cur
- 1);
898 /* Convert named operators to their proper types. */
899 if (result
->val
.node
->flags
& NODE_OPERATOR
)
901 result
->flags
|= NAMED_OP
;
902 result
->type
= result
->val
.node
->directive_index
;
908 lex_string (pfile
, result
, buffer
->cur
- 1);
912 /* A potential block or line comment. */
913 comment_start
= buffer
->cur
;
918 if (_cpp_skip_block_comment (pfile
))
919 cpp_error (pfile
, CPP_DL_ERROR
, "unterminated comment");
921 else if (c
== '/' && (CPP_OPTION (pfile
, cplusplus_comments
)
922 || CPP_IN_SYSTEM_HEADER (pfile
)))
924 /* Warn about comments only if pedantically GNUC89, and not
925 in system headers. */
926 if (CPP_OPTION (pfile
, lang
) == CLK_GNUC89
&& CPP_PEDANTIC (pfile
)
927 && ! buffer
->warned_cplusplus_comments
)
929 cpp_error (pfile
, CPP_DL_PEDWARN
,
930 "C++ style comments are not allowed in ISO C90");
931 cpp_error (pfile
, CPP_DL_PEDWARN
,
932 "(this will be reported only once per input file)");
933 buffer
->warned_cplusplus_comments
= 1;
936 if (skip_line_comment (pfile
) && CPP_OPTION (pfile
, warn_comments
))
937 cpp_error (pfile
, CPP_DL_WARNING
, "multi-line comment");
942 result
->type
= CPP_DIV_EQ
;
947 result
->type
= CPP_DIV
;
951 if (!pfile
->state
.save_comments
)
953 result
->flags
|= PREV_WHITE
;
954 goto update_tokens_line
;
957 /* Save the comment as a token in its own right. */
958 save_comment (pfile
, result
, comment_start
, c
);
962 if (pfile
->state
.angled_headers
)
964 lex_string (pfile
, result
, buffer
->cur
- 1);
968 result
->type
= CPP_LESS
;
969 if (*buffer
->cur
== '=')
970 buffer
->cur
++, result
->type
= CPP_LESS_EQ
;
971 else if (*buffer
->cur
== '<')
974 IF_NEXT_IS ('=', CPP_LSHIFT_EQ
, CPP_LSHIFT
);
976 else if (*buffer
->cur
== '?' && CPP_OPTION (pfile
, cplusplus
))
979 IF_NEXT_IS ('=', CPP_MIN_EQ
, CPP_MIN
);
981 else if (CPP_OPTION (pfile
, digraphs
))
983 if (*buffer
->cur
== ':')
986 result
->flags
|= DIGRAPH
;
987 result
->type
= CPP_OPEN_SQUARE
;
989 else if (*buffer
->cur
== '%')
992 result
->flags
|= DIGRAPH
;
993 result
->type
= CPP_OPEN_BRACE
;
999 result
->type
= CPP_GREATER
;
1000 if (*buffer
->cur
== '=')
1001 buffer
->cur
++, result
->type
= CPP_GREATER_EQ
;
1002 else if (*buffer
->cur
== '>')
1005 IF_NEXT_IS ('=', CPP_RSHIFT_EQ
, CPP_RSHIFT
);
1007 else if (*buffer
->cur
== '?' && CPP_OPTION (pfile
, cplusplus
))
1010 IF_NEXT_IS ('=', CPP_MAX_EQ
, CPP_MAX
);
1015 result
->type
= CPP_MOD
;
1016 if (*buffer
->cur
== '=')
1017 buffer
->cur
++, result
->type
= CPP_MOD_EQ
;
1018 else if (CPP_OPTION (pfile
, digraphs
))
1020 if (*buffer
->cur
== ':')
1023 result
->flags
|= DIGRAPH
;
1024 result
->type
= CPP_HASH
;
1025 if (*buffer
->cur
== '%' && buffer
->cur
[1] == ':')
1026 buffer
->cur
+= 2, result
->type
= CPP_PASTE
;
1028 else if (*buffer
->cur
== '>')
1031 result
->flags
|= DIGRAPH
;
1032 result
->type
= CPP_CLOSE_BRACE
;
1038 result
->type
= CPP_DOT
;
1039 if (ISDIGIT (*buffer
->cur
))
1041 result
->type
= CPP_NUMBER
;
1042 lex_number (pfile
, &result
->val
.str
);
1044 else if (*buffer
->cur
== '.' && buffer
->cur
[1] == '.')
1045 buffer
->cur
+= 2, result
->type
= CPP_ELLIPSIS
;
1046 else if (*buffer
->cur
== '*' && CPP_OPTION (pfile
, cplusplus
))
1047 buffer
->cur
++, result
->type
= CPP_DOT_STAR
;
1051 result
->type
= CPP_PLUS
;
1052 if (*buffer
->cur
== '+')
1053 buffer
->cur
++, result
->type
= CPP_PLUS_PLUS
;
1054 else if (*buffer
->cur
== '=')
1055 buffer
->cur
++, result
->type
= CPP_PLUS_EQ
;
1059 result
->type
= CPP_MINUS
;
1060 if (*buffer
->cur
== '>')
1063 result
->type
= CPP_DEREF
;
1064 if (*buffer
->cur
== '*' && CPP_OPTION (pfile
, cplusplus
))
1065 buffer
->cur
++, result
->type
= CPP_DEREF_STAR
;
1067 else if (*buffer
->cur
== '-')
1068 buffer
->cur
++, result
->type
= CPP_MINUS_MINUS
;
1069 else if (*buffer
->cur
== '=')
1070 buffer
->cur
++, result
->type
= CPP_MINUS_EQ
;
1074 result
->type
= CPP_AND
;
1075 if (*buffer
->cur
== '&')
1076 buffer
->cur
++, result
->type
= CPP_AND_AND
;
1077 else if (*buffer
->cur
== '=')
1078 buffer
->cur
++, result
->type
= CPP_AND_EQ
;
1082 result
->type
= CPP_OR
;
1083 if (*buffer
->cur
== '|')
1084 buffer
->cur
++, result
->type
= CPP_OR_OR
;
1085 else if (*buffer
->cur
== '=')
1086 buffer
->cur
++, result
->type
= CPP_OR_EQ
;
1090 result
->type
= CPP_COLON
;
1091 if (*buffer
->cur
== ':' && CPP_OPTION (pfile
, cplusplus
))
1092 buffer
->cur
++, result
->type
= CPP_SCOPE
;
1093 else if (*buffer
->cur
== '>' && CPP_OPTION (pfile
, digraphs
))
1096 result
->flags
|= DIGRAPH
;
1097 result
->type
= CPP_CLOSE_SQUARE
;
1101 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ
, CPP_MULT
); break;
1102 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ
, CPP_EQ
); break;
1103 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ
, CPP_NOT
); break;
1104 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ
, CPP_XOR
); break;
1105 case '#': IF_NEXT_IS ('#', CPP_PASTE
, CPP_HASH
); break;
1107 case '?': result
->type
= CPP_QUERY
; break;
1108 case '~': result
->type
= CPP_COMPL
; break;
1109 case ',': result
->type
= CPP_COMMA
; break;
1110 case '(': result
->type
= CPP_OPEN_PAREN
; break;
1111 case ')': result
->type
= CPP_CLOSE_PAREN
; break;
1112 case '[': result
->type
= CPP_OPEN_SQUARE
; break;
1113 case ']': result
->type
= CPP_CLOSE_SQUARE
; break;
1114 case '{': result
->type
= CPP_OPEN_BRACE
; break;
1115 case '}': result
->type
= CPP_CLOSE_BRACE
; break;
1116 case ';': result
->type
= CPP_SEMICOLON
; break;
1118 /* @ is a punctuator in Objective-C. */
1119 case '@': result
->type
= CPP_ATSIGN
; break;
1124 const uchar
*base
= --buffer
->cur
;
1126 if (forms_identifier_p (pfile
, true))
1128 result
->type
= CPP_NAME
;
1129 result
->val
.node
= lex_identifier (pfile
, base
);
1136 create_literal (pfile
, result
, buffer
->cur
- 1, 1, CPP_OTHER
);
1143 /* An upper bound on the number of bytes needed to spell TOKEN.
1144 Does not include preceding whitespace. */
1146 cpp_token_len (const cpp_token
*token
)
1150 switch (TOKEN_SPELL (token
))
1152 default: len
= 4; break;
1153 case SPELL_LITERAL
: len
= token
->val
.str
.len
; break;
1154 case SPELL_IDENT
: len
= NODE_LEN (token
->val
.node
); break;
1160 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1161 already contain the enough space to hold the token's spelling.
1162 Returns a pointer to the character after the last character written.
1163 FIXME: Would be nice if we didn't need the PFILE argument. */
1165 cpp_spell_token (cpp_reader
*pfile
, const cpp_token
*token
,
1166 unsigned char *buffer
)
1168 switch (TOKEN_SPELL (token
))
1170 case SPELL_OPERATOR
:
1172 const unsigned char *spelling
;
1175 if (token
->flags
& DIGRAPH
)
1177 = digraph_spellings
[(int) token
->type
- (int) CPP_FIRST_DIGRAPH
];
1178 else if (token
->flags
& NAMED_OP
)
1181 spelling
= TOKEN_NAME (token
);
1183 while ((c
= *spelling
++) != '\0')
1190 memcpy (buffer
, NODE_NAME (token
->val
.node
), NODE_LEN (token
->val
.node
));
1191 buffer
+= NODE_LEN (token
->val
.node
);
1195 memcpy (buffer
, token
->val
.str
.text
, token
->val
.str
.len
);
1196 buffer
+= token
->val
.str
.len
;
1200 cpp_error (pfile
, CPP_DL_ICE
,
1201 "unspellable token %s", TOKEN_NAME (token
));
1208 /* Returns TOKEN spelt as a null-terminated string. The string is
1209 freed when the reader is destroyed. Useful for diagnostics. */
1211 cpp_token_as_text (cpp_reader
*pfile
, const cpp_token
*token
)
1213 unsigned int len
= cpp_token_len (token
) + 1;
1214 unsigned char *start
= _cpp_unaligned_alloc (pfile
, len
), *end
;
1216 end
= cpp_spell_token (pfile
, token
, start
);
1222 /* Used by C front ends, which really should move to using
1223 cpp_token_as_text. */
1225 cpp_type2name (enum cpp_ttype type
)
1227 return (const char *) token_spellings
[type
].name
;
1230 /* Writes the spelling of token to FP, without any preceding space.
1231 Separated from cpp_spell_token for efficiency - to avoid stdio
1232 double-buffering. */
1234 cpp_output_token (const cpp_token
*token
, FILE *fp
)
1236 switch (TOKEN_SPELL (token
))
1238 case SPELL_OPERATOR
:
1240 const unsigned char *spelling
;
1243 if (token
->flags
& DIGRAPH
)
1245 = digraph_spellings
[(int) token
->type
- (int) CPP_FIRST_DIGRAPH
];
1246 else if (token
->flags
& NAMED_OP
)
1249 spelling
= TOKEN_NAME (token
);
1254 while ((c
= *++spelling
) != '\0');
1260 fwrite (NODE_NAME (token
->val
.node
), 1, NODE_LEN (token
->val
.node
), fp
);
1264 fwrite (token
->val
.str
.text
, 1, token
->val
.str
.len
, fp
);
1268 /* An error, most probably. */
1273 /* Compare two tokens. */
1275 _cpp_equiv_tokens (const cpp_token
*a
, const cpp_token
*b
)
1277 if (a
->type
== b
->type
&& a
->flags
== b
->flags
)
1278 switch (TOKEN_SPELL (a
))
1280 default: /* Keep compiler happy. */
1281 case SPELL_OPERATOR
:
1284 return (a
->type
!= CPP_MACRO_ARG
|| a
->val
.arg_no
== b
->val
.arg_no
);
1286 return a
->val
.node
== b
->val
.node
;
1288 return (a
->val
.str
.len
== b
->val
.str
.len
1289 && !memcmp (a
->val
.str
.text
, b
->val
.str
.text
,
1296 /* Returns nonzero if a space should be inserted to avoid an
1297 accidental token paste for output. For simplicity, it is
1298 conservative, and occasionally advises a space where one is not
1299 needed, e.g. "." and ".2". */
1301 cpp_avoid_paste (cpp_reader
*pfile
, const cpp_token
*token1
,
1302 const cpp_token
*token2
)
1304 enum cpp_ttype a
= token1
->type
, b
= token2
->type
;
1307 if (token1
->flags
& NAMED_OP
)
1309 if (token2
->flags
& NAMED_OP
)
1313 if (token2
->flags
& DIGRAPH
)
1314 c
= digraph_spellings
[(int) b
- (int) CPP_FIRST_DIGRAPH
][0];
1315 else if (token_spellings
[b
].category
== SPELL_OPERATOR
)
1316 c
= token_spellings
[b
].name
[0];
1318 /* Quickly get everything that can paste with an '='. */
1319 if ((int) a
<= (int) CPP_LAST_EQ
&& c
== '=')
1324 case CPP_GREATER
: return c
== '>' || c
== '?';
1325 case CPP_LESS
: return c
== '<' || c
== '?' || c
== '%' || c
== ':';
1326 case CPP_PLUS
: return c
== '+';
1327 case CPP_MINUS
: return c
== '-' || c
== '>';
1328 case CPP_DIV
: return c
== '/' || c
== '*'; /* Comments. */
1329 case CPP_MOD
: return c
== ':' || c
== '>';
1330 case CPP_AND
: return c
== '&';
1331 case CPP_OR
: return c
== '|';
1332 case CPP_COLON
: return c
== ':' || c
== '>';
1333 case CPP_DEREF
: return c
== '*';
1334 case CPP_DOT
: return c
== '.' || c
== '%' || b
== CPP_NUMBER
;
1335 case CPP_HASH
: return c
== '#' || c
== '%'; /* Digraph form. */
1336 case CPP_NAME
: return ((b
== CPP_NUMBER
1337 && name_p (pfile
, &token2
->val
.str
))
1339 || b
== CPP_CHAR
|| b
== CPP_STRING
); /* L */
1340 case CPP_NUMBER
: return (b
== CPP_NUMBER
|| b
== CPP_NAME
1341 || c
== '.' || c
== '+' || c
== '-');
1343 case CPP_OTHER
: return ((token1
->val
.str
.text
[0] == '\\'
1345 || (CPP_OPTION (pfile
, objc
)
1346 && token1
->val
.str
.text
[0] == '@'
1347 && (b
== CPP_NAME
|| b
== CPP_STRING
)));
1354 /* Output all the remaining tokens on the current line, and a newline
1355 character, to FP. Leading whitespace is removed. If there are
1356 macros, special token padding is not performed. */
1358 cpp_output_line (cpp_reader
*pfile
, FILE *fp
)
1360 const cpp_token
*token
;
1362 token
= cpp_get_token (pfile
);
1363 while (token
->type
!= CPP_EOF
)
1365 cpp_output_token (token
, fp
);
1366 token
= cpp_get_token (pfile
);
1367 if (token
->flags
& PREV_WHITE
)
1374 /* Memory buffers. Changing these three constants can have a dramatic
1375 effect on performance. The values here are reasonable defaults,
1376 but might be tuned. If you adjust them, be sure to test across a
1377 range of uses of cpplib, including heavy nested function-like macro
1378 expansion. Also check the change in peak memory usage (NJAMD is a
1379 good tool for this). */
1380 #define MIN_BUFF_SIZE 8000
1381 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1382 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1383 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1385 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1386 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1389 /* Create a new allocation buffer. Place the control block at the end
1390 of the buffer, so that buffer overflows will cause immediate chaos. */
1392 new_buff (size_t len
)
1395 unsigned char *base
;
1397 if (len
< MIN_BUFF_SIZE
)
1398 len
= MIN_BUFF_SIZE
;
1399 len
= CPP_ALIGN (len
);
1401 base
= xmalloc (len
+ sizeof (_cpp_buff
));
1402 result
= (_cpp_buff
*) (base
+ len
);
1403 result
->base
= base
;
1405 result
->limit
= base
+ len
;
1406 result
->next
= NULL
;
1410 /* Place a chain of unwanted allocation buffers on the free list. */
1412 _cpp_release_buff (cpp_reader
*pfile
, _cpp_buff
*buff
)
1414 _cpp_buff
*end
= buff
;
1418 end
->next
= pfile
->free_buffs
;
1419 pfile
->free_buffs
= buff
;
1422 /* Return a free buffer of size at least MIN_SIZE. */
1424 _cpp_get_buff (cpp_reader
*pfile
, size_t min_size
)
1426 _cpp_buff
*result
, **p
;
1428 for (p
= &pfile
->free_buffs
;; p
= &(*p
)->next
)
1433 return new_buff (min_size
);
1435 size
= result
->limit
- result
->base
;
1436 /* Return a buffer that's big enough, but don't waste one that's
1438 if (size
>= min_size
&& size
<= BUFF_SIZE_UPPER_BOUND (min_size
))
1443 result
->next
= NULL
;
1444 result
->cur
= result
->base
;
1448 /* Creates a new buffer with enough space to hold the uncommitted
1449 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1450 the excess bytes to the new buffer. Chains the new buffer after
1451 BUFF, and returns the new buffer. */
1453 _cpp_append_extend_buff (cpp_reader
*pfile
, _cpp_buff
*buff
, size_t min_extra
)
1455 size_t size
= EXTENDED_BUFF_SIZE (buff
, min_extra
);
1456 _cpp_buff
*new_buff
= _cpp_get_buff (pfile
, size
);
1458 buff
->next
= new_buff
;
1459 memcpy (new_buff
->base
, buff
->cur
, BUFF_ROOM (buff
));
1463 /* Creates a new buffer with enough space to hold the uncommitted
1464 remaining bytes of the buffer pointed to by BUFF, and at least
1465 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1466 Chains the new buffer before the buffer pointed to by BUFF, and
1467 updates the pointer to point to the new buffer. */
1469 _cpp_extend_buff (cpp_reader
*pfile
, _cpp_buff
**pbuff
, size_t min_extra
)
1471 _cpp_buff
*new_buff
, *old_buff
= *pbuff
;
1472 size_t size
= EXTENDED_BUFF_SIZE (old_buff
, min_extra
);
1474 new_buff
= _cpp_get_buff (pfile
, size
);
1475 memcpy (new_buff
->base
, old_buff
->cur
, BUFF_ROOM (old_buff
));
1476 new_buff
->next
= old_buff
;
1480 /* Free a chain of buffers starting at BUFF. */
1482 _cpp_free_buff (_cpp_buff
*buff
)
1486 for (; buff
; buff
= next
)
1493 /* Allocate permanent, unaligned storage of length LEN. */
1495 _cpp_unaligned_alloc (cpp_reader
*pfile
, size_t len
)
1497 _cpp_buff
*buff
= pfile
->u_buff
;
1498 unsigned char *result
= buff
->cur
;
1500 if (len
> (size_t) (buff
->limit
- result
))
1502 buff
= _cpp_get_buff (pfile
, len
);
1503 buff
->next
= pfile
->u_buff
;
1504 pfile
->u_buff
= buff
;
1508 buff
->cur
= result
+ len
;
1512 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1513 That buffer is used for growing allocations when saving macro
1514 replacement lists in a #define, and when parsing an answer to an
1515 assertion in #assert, #unassert or #if (and therefore possibly
1516 whilst expanding macros). It therefore must not be used by any
1517 code that they might call: specifically the lexer and the guts of
1520 All existing other uses clearly fit this restriction: storing
1521 registered pragmas during initialization. */
1523 _cpp_aligned_alloc (cpp_reader
*pfile
, size_t len
)
1525 _cpp_buff
*buff
= pfile
->a_buff
;
1526 unsigned char *result
= buff
->cur
;
1528 if (len
> (size_t) (buff
->limit
- result
))
1530 buff
= _cpp_get_buff (pfile
, len
);
1531 buff
->next
= pfile
->a_buff
;
1532 pfile
->a_buff
= buff
;
1536 buff
->cur
= result
+ len
;