1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
39 enum spell_type category
;
40 const unsigned char *name
;
43 static const unsigned char *const digraph_spellings
[] =
44 { U
"%:", U
"%:%:", U
"<:", U
":>", U
"<%", U
"%>" };
46 #define OP(e, s) { SPELL_OPERATOR, U s },
47 #define TK(e, s) { s, U #e },
48 static const struct token_spelling token_spellings
[N_TTYPES
] = { TTYPE_TABLE
};
52 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
53 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
55 static void add_line_note (cpp_buffer
*, const uchar
*, unsigned int);
56 static int skip_line_comment (cpp_reader
*);
57 static void skip_whitespace (cpp_reader
*, cppchar_t
);
58 static cpp_hashnode
*lex_identifier (cpp_reader
*, const uchar
*);
59 static void lex_number (cpp_reader
*, cpp_string
*);
60 static bool forms_identifier_p (cpp_reader
*, int);
61 static void lex_string (cpp_reader
*, cpp_token
*, const uchar
*);
62 static void save_comment (cpp_reader
*, cpp_token
*, const uchar
*, cppchar_t
);
63 static void create_literal (cpp_reader
*, cpp_token
*, const uchar
*,
64 unsigned int, enum cpp_ttype
);
65 static bool warn_in_comment (cpp_reader
*, _cpp_line_note
*);
66 static int name_p (cpp_reader
*, const cpp_string
*);
67 static tokenrun
*next_tokenrun (tokenrun
*);
69 static _cpp_buff
*new_buff (size_t);
74 Compares, the token TOKEN to the NUL-terminated string STRING.
75 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
77 cpp_ideq (const cpp_token
*token
, const char *string
)
79 if (token
->type
!= CPP_NAME
)
82 return !ustrcmp (NODE_NAME (token
->val
.node
), (const uchar
*) string
);
85 /* Record a note TYPE at byte POS into the current cleaned logical
88 add_line_note (cpp_buffer
*buffer
, const uchar
*pos
, unsigned int type
)
90 if (buffer
->notes_used
== buffer
->notes_cap
)
92 buffer
->notes_cap
= buffer
->notes_cap
* 2 + 200;
93 buffer
->notes
= (_cpp_line_note
*)
94 xrealloc (buffer
->notes
, buffer
->notes_cap
* sizeof (_cpp_line_note
));
97 buffer
->notes
[buffer
->notes_used
].pos
= pos
;
98 buffer
->notes
[buffer
->notes_used
].type
= type
;
102 /* Returns with a logical line that contains no escaped newlines or
103 trigraphs. This is a time-critical inner loop. */
105 _cpp_clean_line (cpp_reader
*pfile
)
111 buffer
= pfile
->buffer
;
112 buffer
->cur_note
= buffer
->notes_used
= 0;
113 buffer
->cur
= buffer
->line_base
= buffer
->next_line
;
114 buffer
->need_line
= false;
115 s
= buffer
->next_line
- 1;
117 if (!buffer
->from_stage3
)
126 if (c
== '\n' || c
== '\r')
128 /* Handle DOS line endings. */
129 if (c
== '\r' && s
!= buffer
->rlimit
&& s
[1] == '\n')
131 if (s
== buffer
->rlimit
)
136 while (p
!= buffer
->next_line
&& is_nvspace (p
[-1]))
138 if (p
== buffer
->next_line
|| p
[-1] != '\\')
141 add_line_note (buffer
, p
- 1, p
!= d
? ' ': '\\');
143 buffer
->next_line
= p
- 1;
145 else if (c
== '?' && s
[1] == '?' && _cpp_trigraph_map
[s
[2]])
147 /* Add a note regardless, for the benefit of -Wtrigraphs. */
148 add_line_note (buffer
, d
, s
[2]);
149 if (CPP_OPTION (pfile
, trigraphs
))
151 *d
= _cpp_trigraph_map
[s
[2]];
161 while (*s
!= '\n' && *s
!= '\r');
164 /* Handle DOS line endings. */
165 if (*s
== '\r' && s
!= buffer
->rlimit
&& s
[1] == '\n')
170 /* A sentinel note that should never be processed. */
171 add_line_note (buffer
, d
+ 1, '\n');
172 buffer
->next_line
= s
+ 1;
175 /* Return true if the trigraph indicated by NOTE should be warned
176 about in a comment. */
178 warn_in_comment (cpp_reader
*pfile
, _cpp_line_note
*note
)
182 /* Within comments we don't warn about trigraphs, unless the
183 trigraph forms an escaped newline, as that may change
185 if (note
->type
!= '/')
188 /* If -trigraphs, then this was an escaped newline iff the next note
190 if (CPP_OPTION (pfile
, trigraphs
))
191 return note
[1].pos
== note
->pos
;
193 /* Otherwise, see if this forms an escaped newline. */
195 while (is_nvspace (*p
))
198 /* There might have been escaped newlines between the trigraph and the
199 newline we found. Hence the position test. */
200 return (*p
== '\n' && p
< note
[1].pos
);
203 /* Process the notes created by add_line_note as far as the current
206 _cpp_process_line_notes (cpp_reader
*pfile
, int in_comment
)
208 cpp_buffer
*buffer
= pfile
->buffer
;
212 _cpp_line_note
*note
= &buffer
->notes
[buffer
->cur_note
];
215 if (note
->pos
> buffer
->cur
)
219 col
= CPP_BUF_COLUMN (buffer
, note
->pos
+ 1);
221 if (note
->type
== '\\' || note
->type
== ' ')
223 if (note
->type
== ' ' && !in_comment
)
224 cpp_error_with_line (pfile
, DL_WARNING
, pfile
->line
, col
,
225 "backslash and newline separated by space");
227 if (buffer
->next_line
> buffer
->rlimit
)
229 cpp_error_with_line (pfile
, DL_PEDWARN
, pfile
->line
, col
,
230 "backslash-newline at end of file");
231 /* Prevent "no newline at end of file" warning. */
232 buffer
->next_line
= buffer
->rlimit
;
235 buffer
->line_base
= note
->pos
;
238 else if (_cpp_trigraph_map
[note
->type
])
240 if (CPP_OPTION (pfile
, warn_trigraphs
)
241 && (!in_comment
|| warn_in_comment (pfile
, note
)))
243 if (CPP_OPTION (pfile
, trigraphs
))
244 cpp_error_with_line (pfile
, DL_WARNING
, pfile
->line
, col
,
245 "trigraph ??%c converted to %c",
247 (int) _cpp_trigraph_map
[note
->type
]);
249 cpp_error_with_line (pfile
, DL_WARNING
, pfile
->line
, col
,
250 "trigraph ??%c ignored",
259 /* Skip a C-style block comment. We find the end of the comment by
260 seeing if an asterisk is before every '/' we encounter. Returns
261 nonzero if comment terminated by EOF, zero otherwise.
263 Buffer->cur points to the initial asterisk of the comment. */
265 _cpp_skip_block_comment (cpp_reader
*pfile
)
267 cpp_buffer
*buffer
= pfile
->buffer
;
271 if (*buffer
->cur
== '/')
278 /* People like decorating comments with '*', so check for '/'
279 instead for efficiency. */
282 if (buffer
->cur
[-2] == '*')
285 /* Warn about potential nested comments, but not if the '/'
286 comes immediately before the true comment delimiter.
287 Don't bother to get it right across escaped newlines. */
288 if (CPP_OPTION (pfile
, warn_comments
)
289 && buffer
->cur
[0] == '*' && buffer
->cur
[1] != '/')
290 cpp_error_with_line (pfile
, DL_WARNING
,
291 pfile
->line
, CPP_BUF_COL (buffer
),
292 "\"/*\" within comment");
297 _cpp_process_line_notes (pfile
, true);
298 if (buffer
->next_line
>= buffer
->rlimit
)
300 _cpp_clean_line (pfile
);
305 _cpp_process_line_notes (pfile
, true);
309 /* Skip a C++ line comment, leaving buffer->cur pointing to the
310 terminating newline. Handles escaped newlines. Returns nonzero
311 if a multiline comment. */
313 skip_line_comment (cpp_reader
*pfile
)
315 cpp_buffer
*buffer
= pfile
->buffer
;
316 unsigned int orig_line
= pfile
->line
;
318 while (*buffer
->cur
!= '\n')
321 _cpp_process_line_notes (pfile
, true);
322 return orig_line
!= pfile
->line
;
325 /* Skips whitespace, saving the next non-whitespace character. */
327 skip_whitespace (cpp_reader
*pfile
, cppchar_t c
)
329 cpp_buffer
*buffer
= pfile
->buffer
;
330 bool saw_NUL
= false;
334 /* Horizontal space always OK. */
335 if (c
== ' ' || c
== '\t')
337 /* Just \f \v or \0 left. */
340 else if (pfile
->state
.in_directive
&& CPP_PEDANTIC (pfile
))
341 cpp_error_with_line (pfile
, DL_PEDWARN
, pfile
->line
,
342 CPP_BUF_COL (buffer
),
343 "%s in preprocessing directive",
344 c
== '\f' ? "form feed" : "vertical tab");
348 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
349 while (is_nvspace (c
));
352 cpp_error (pfile
, DL_WARNING
, "null character(s) ignored");
357 /* See if the characters of a number token are valid in a name (no
360 name_p (cpp_reader
*pfile
, const cpp_string
*string
)
364 for (i
= 0; i
< string
->len
; i
++)
365 if (!is_idchar (string
->text
[i
]))
371 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
372 an identifier. FIRST is TRUE if this starts an identifier. */
374 forms_identifier_p (cpp_reader
*pfile
, int first
)
376 cpp_buffer
*buffer
= pfile
->buffer
;
378 if (*buffer
->cur
== '$')
380 if (!CPP_OPTION (pfile
, dollars_in_ident
))
384 if (CPP_OPTION (pfile
, warn_dollars
) && !pfile
->state
.skipping
)
386 CPP_OPTION (pfile
, warn_dollars
) = 0;
387 cpp_error (pfile
, DL_PEDWARN
, "'$' in identifier or number");
393 /* Is this a syntactically valid UCN? */
394 if (0 && *buffer
->cur
== '\\'
395 && (buffer
->cur
[1] == 'u' || buffer
->cur
[1] == 'U'))
398 if (_cpp_valid_ucn (pfile
, &buffer
->cur
, buffer
->rlimit
, 1 + !first
))
406 /* Lex an identifier starting at BUFFER->CUR - 1. */
407 static cpp_hashnode
*
408 lex_identifier (cpp_reader
*pfile
, const uchar
*base
)
410 cpp_hashnode
*result
;
415 cur
= pfile
->buffer
->cur
;
417 /* N.B. ISIDNUM does not include $. */
418 while (ISIDNUM (*cur
))
421 pfile
->buffer
->cur
= cur
;
423 while (forms_identifier_p (pfile
, false));
425 result
= (cpp_hashnode
*)
426 ht_lookup (pfile
->hash_table
, base
, cur
- base
, HT_ALLOC
);
428 /* Rarely, identifiers require diagnostics when lexed. */
429 if (__builtin_expect ((result
->flags
& NODE_DIAGNOSTIC
)
430 && !pfile
->state
.skipping
, 0))
432 /* It is allowed to poison the same identifier twice. */
433 if ((result
->flags
& NODE_POISONED
) && !pfile
->state
.poisoned_ok
)
434 cpp_error (pfile
, DL_ERROR
, "attempt to use poisoned \"%s\"",
437 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
438 replacement list of a variadic macro. */
439 if (result
== pfile
->spec_nodes
.n__VA_ARGS__
440 && !pfile
->state
.va_args_ok
)
441 cpp_error (pfile
, DL_PEDWARN
,
442 "__VA_ARGS__ can only appear in the expansion"
443 " of a C99 variadic macro");
449 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
451 lex_number (cpp_reader
*pfile
, cpp_string
*number
)
457 base
= pfile
->buffer
->cur
- 1;
460 cur
= pfile
->buffer
->cur
;
462 /* N.B. ISIDNUM does not include $. */
463 while (ISIDNUM (*cur
) || *cur
== '.' || VALID_SIGN (*cur
, cur
[-1]))
466 pfile
->buffer
->cur
= cur
;
468 while (forms_identifier_p (pfile
, false));
470 number
->len
= cur
- base
;
471 dest
= _cpp_unaligned_alloc (pfile
, number
->len
+ 1);
472 memcpy (dest
, base
, number
->len
);
473 dest
[number
->len
] = '\0';
477 /* Create a token of type TYPE with a literal spelling. */
479 create_literal (cpp_reader
*pfile
, cpp_token
*token
, const uchar
*base
,
480 unsigned int len
, enum cpp_ttype type
)
482 uchar
*dest
= _cpp_unaligned_alloc (pfile
, len
+ 1);
484 memcpy (dest
, base
, len
);
487 token
->val
.str
.len
= len
;
488 token
->val
.str
.text
= dest
;
491 /* Lexes a string, character constant, or angle-bracketed header file
492 name. The stored string contains the spelling, including opening
493 quote and leading any leading 'L'. It returns the type of the
494 literal, or CPP_OTHER if it was not properly terminated.
496 The spelling is NUL-terminated, but it is not guaranteed that this
497 is the first NUL since embedded NULs are preserved. */
499 lex_string (cpp_reader
*pfile
, cpp_token
*token
, const uchar
*base
)
501 bool saw_NUL
= false;
503 cppchar_t terminator
;
508 if (terminator
== 'L')
510 if (terminator
== '\"')
511 type
= *base
== 'L' ? CPP_WSTRING
: CPP_STRING
;
512 else if (terminator
== '\'')
513 type
= *base
== 'L' ? CPP_WCHAR
: CPP_CHAR
;
515 terminator
= '>', type
= CPP_HEADER_NAME
;
519 cppchar_t c
= *cur
++;
521 /* In #include-style directives, terminators are not escapable. */
522 if (c
== '\\' && !pfile
->state
.angled_headers
&& *cur
!= '\n')
524 else if (c
== terminator
)
536 if (saw_NUL
&& !pfile
->state
.skipping
)
537 cpp_error (pfile
, DL_WARNING
, "null character(s) preserved in literal");
539 pfile
->buffer
->cur
= cur
;
540 create_literal (pfile
, token
, base
, cur
- base
, type
);
543 /* The stored comment includes the comment start and any terminator. */
545 save_comment (cpp_reader
*pfile
, cpp_token
*token
, const unsigned char *from
,
548 unsigned char *buffer
;
549 unsigned int len
, clen
;
551 len
= pfile
->buffer
->cur
- from
+ 1; /* + 1 for the initial '/'. */
553 /* C++ comments probably (not definitely) have moved past a new
554 line, which we don't want to save in the comment. */
555 if (is_vspace (pfile
->buffer
->cur
[-1]))
558 /* If we are currently in a directive, then we need to store all
559 C++ comments as C comments internally, and so we need to
560 allocate a little extra space in that case.
562 Note that the only time we encounter a directive here is
563 when we are saving comments in a "#define". */
564 clen
= (pfile
->state
.in_directive
&& type
== '/') ? len
+ 2 : len
;
566 buffer
= _cpp_unaligned_alloc (pfile
, clen
);
568 token
->type
= CPP_COMMENT
;
569 token
->val
.str
.len
= clen
;
570 token
->val
.str
.text
= buffer
;
573 memcpy (buffer
+ 1, from
, len
- 1);
575 /* Finish conversion to a C comment, if necessary. */
576 if (pfile
->state
.in_directive
&& type
== '/')
579 buffer
[clen
- 2] = '*';
580 buffer
[clen
- 1] = '/';
584 /* Allocate COUNT tokens for RUN. */
586 _cpp_init_tokenrun (tokenrun
*run
, unsigned int count
)
588 run
->base
= xnewvec (cpp_token
, count
);
589 run
->limit
= run
->base
+ count
;
593 /* Returns the next tokenrun, or creates one if there is none. */
595 next_tokenrun (tokenrun
*run
)
597 if (run
->next
== NULL
)
599 run
->next
= xnew (tokenrun
);
600 run
->next
->prev
= run
;
601 _cpp_init_tokenrun (run
->next
, 250);
607 /* Allocate a single token that is invalidated at the same time as the
608 rest of the tokens on the line. Has its line and col set to the
609 same as the last lexed token, so that diagnostics appear in the
612 _cpp_temp_token (cpp_reader
*pfile
)
614 cpp_token
*old
, *result
;
616 old
= pfile
->cur_token
- 1;
617 if (pfile
->cur_token
== pfile
->cur_run
->limit
)
619 pfile
->cur_run
= next_tokenrun (pfile
->cur_run
);
620 pfile
->cur_token
= pfile
->cur_run
->base
;
623 result
= pfile
->cur_token
++;
624 result
->line
= old
->line
;
625 result
->col
= old
->col
;
629 /* Lex a token into RESULT (external interface). Takes care of issues
630 like directive handling, token lookahead, multiple include
631 optimization and skipping. */
633 _cpp_lex_token (cpp_reader
*pfile
)
639 if (pfile
->cur_token
== pfile
->cur_run
->limit
)
641 pfile
->cur_run
= next_tokenrun (pfile
->cur_run
);
642 pfile
->cur_token
= pfile
->cur_run
->base
;
645 if (pfile
->lookaheads
)
648 result
= pfile
->cur_token
++;
651 result
= _cpp_lex_direct (pfile
);
653 if (result
->flags
& BOL
)
655 /* Is this a directive. If _cpp_handle_directive returns
656 false, it is an assembler #. */
657 if (result
->type
== CPP_HASH
658 /* 6.10.3 p 11: Directives in a list of macro arguments
659 gives undefined behavior. This implementation
660 handles the directive as normal. */
661 && pfile
->state
.parsing_args
!= 1
662 && _cpp_handle_directive (pfile
, result
->flags
& PREV_WHITE
))
664 if (pfile
->cb
.line_change
&& !pfile
->state
.skipping
)
665 pfile
->cb
.line_change (pfile
, result
, pfile
->state
.parsing_args
);
668 /* We don't skip tokens in directives. */
669 if (pfile
->state
.in_directive
)
672 /* Outside a directive, invalidate controlling macros. At file
673 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
674 get here and MI optimization works. */
675 pfile
->mi_valid
= false;
677 if (!pfile
->state
.skipping
|| result
->type
== CPP_EOF
)
684 /* Returns true if a fresh line has been loaded. */
686 _cpp_get_fresh_line (cpp_reader
*pfile
)
688 /* We can't get a new line until we leave the current directive. */
689 if (pfile
->state
.in_directive
)
694 cpp_buffer
*buffer
= pfile
->buffer
;
696 if (!buffer
->need_line
)
699 if (buffer
->next_line
< buffer
->rlimit
)
701 _cpp_clean_line (pfile
);
705 /* First, get out of parsing arguments state. */
706 if (pfile
->state
.parsing_args
)
709 /* End of buffer. Non-empty files should end in a newline. */
710 if (buffer
->buf
!= buffer
->rlimit
711 && buffer
->next_line
> buffer
->rlimit
712 && !buffer
->from_stage3
)
714 /* Only warn once. */
715 buffer
->next_line
= buffer
->rlimit
;
716 cpp_error_with_line (pfile
, DL_PEDWARN
, pfile
->line
- 1,
717 CPP_BUF_COLUMN (buffer
, buffer
->cur
),
718 "no newline at end of file");
724 if (buffer
->return_at_eof
)
726 _cpp_pop_buffer (pfile
);
730 _cpp_pop_buffer (pfile
);
734 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
737 result->type = ELSE_TYPE; \
738 if (*buffer->cur == CHAR) \
739 buffer->cur++, result->type = THEN_TYPE; \
743 /* Lex a token into pfile->cur_token, which is also incremented, to
744 get diagnostics pointing to the correct location.
746 Does not handle issues such as token lookahead, multiple-include
747 optimization, directives, skipping etc. This function is only
748 suitable for use by _cpp_lex_token, and in special cases like
749 lex_expansion_token which doesn't care for any of these issues.
751 When meeting a newline, returns CPP_EOF if parsing a directive,
752 otherwise returns to the start of the token buffer if permissible.
753 Returns the location of the lexed token. */
755 _cpp_lex_direct (cpp_reader
*pfile
)
759 const unsigned char *comment_start
;
760 cpp_token
*result
= pfile
->cur_token
++;
764 if (pfile
->buffer
->need_line
)
766 if (!_cpp_get_fresh_line (pfile
))
768 result
->type
= CPP_EOF
;
769 if (!pfile
->state
.in_directive
)
771 /* Tell the compiler the line number of the EOF token. */
772 result
->line
= pfile
->line
;
777 if (!pfile
->keep_tokens
)
779 pfile
->cur_run
= &pfile
->base_run
;
780 result
= pfile
->base_run
.base
;
781 pfile
->cur_token
= result
+ 1;
784 if (pfile
->state
.parsing_args
== 2)
785 result
->flags
|= PREV_WHITE
;
787 buffer
= pfile
->buffer
;
789 result
->line
= pfile
->line
;
792 if (buffer
->cur
>= buffer
->notes
[buffer
->cur_note
].pos
793 && !pfile
->overlaid_buffer
)
795 _cpp_process_line_notes (pfile
, false);
796 result
->line
= pfile
->line
;
799 result
->col
= CPP_BUF_COLUMN (buffer
, buffer
->cur
);
803 case ' ': case '\t': case '\f': case '\v': case '\0':
804 result
->flags
|= PREV_WHITE
;
805 skip_whitespace (pfile
, c
);
810 buffer
->need_line
= true;
813 case '0': case '1': case '2': case '3': case '4':
814 case '5': case '6': case '7': case '8': case '9':
815 result
->type
= CPP_NUMBER
;
816 lex_number (pfile
, &result
->val
.str
);
820 /* 'L' may introduce wide characters or strings. */
821 if (*buffer
->cur
== '\'' || *buffer
->cur
== '"')
823 lex_string (pfile
, result
, buffer
->cur
- 1);
829 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
830 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
831 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
832 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
834 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
835 case 'G': case 'H': case 'I': case 'J': case 'K':
836 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
837 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
839 result
->type
= CPP_NAME
;
840 result
->val
.node
= lex_identifier (pfile
, buffer
->cur
- 1);
842 /* Convert named operators to their proper types. */
843 if (result
->val
.node
->flags
& NODE_OPERATOR
)
845 result
->flags
|= NAMED_OP
;
846 result
->type
= result
->val
.node
->directive_index
;
852 lex_string (pfile
, result
, buffer
->cur
- 1);
856 /* A potential block or line comment. */
857 comment_start
= buffer
->cur
;
862 if (_cpp_skip_block_comment (pfile
))
863 cpp_error (pfile
, DL_ERROR
, "unterminated comment");
865 else if (c
== '/' && (CPP_OPTION (pfile
, cplusplus_comments
)
866 || CPP_IN_SYSTEM_HEADER (pfile
)))
868 /* Warn about comments only if pedantically GNUC89, and not
869 in system headers. */
870 if (CPP_OPTION (pfile
, lang
) == CLK_GNUC89
&& CPP_PEDANTIC (pfile
)
871 && ! buffer
->warned_cplusplus_comments
)
873 cpp_error (pfile
, DL_PEDWARN
,
874 "C++ style comments are not allowed in ISO C90");
875 cpp_error (pfile
, DL_PEDWARN
,
876 "(this will be reported only once per input file)");
877 buffer
->warned_cplusplus_comments
= 1;
880 if (skip_line_comment (pfile
) && CPP_OPTION (pfile
, warn_comments
))
881 cpp_error (pfile
, DL_WARNING
, "multi-line comment");
886 result
->type
= CPP_DIV_EQ
;
891 result
->type
= CPP_DIV
;
895 if (!pfile
->state
.save_comments
)
897 result
->flags
|= PREV_WHITE
;
898 goto update_tokens_line
;
901 /* Save the comment as a token in its own right. */
902 save_comment (pfile
, result
, comment_start
, c
);
906 if (pfile
->state
.angled_headers
)
908 lex_string (pfile
, result
, buffer
->cur
- 1);
912 result
->type
= CPP_LESS
;
913 if (*buffer
->cur
== '=')
914 buffer
->cur
++, result
->type
= CPP_LESS_EQ
;
915 else if (*buffer
->cur
== '<')
918 IF_NEXT_IS ('=', CPP_LSHIFT_EQ
, CPP_LSHIFT
);
920 else if (*buffer
->cur
== '?' && CPP_OPTION (pfile
, cplusplus
))
923 IF_NEXT_IS ('=', CPP_MIN_EQ
, CPP_MIN
);
925 else if (CPP_OPTION (pfile
, digraphs
))
927 if (*buffer
->cur
== ':')
930 result
->flags
|= DIGRAPH
;
931 result
->type
= CPP_OPEN_SQUARE
;
933 else if (*buffer
->cur
== '%')
936 result
->flags
|= DIGRAPH
;
937 result
->type
= CPP_OPEN_BRACE
;
943 result
->type
= CPP_GREATER
;
944 if (*buffer
->cur
== '=')
945 buffer
->cur
++, result
->type
= CPP_GREATER_EQ
;
946 else if (*buffer
->cur
== '>')
949 IF_NEXT_IS ('=', CPP_RSHIFT_EQ
, CPP_RSHIFT
);
951 else if (*buffer
->cur
== '?' && CPP_OPTION (pfile
, cplusplus
))
954 IF_NEXT_IS ('=', CPP_MAX_EQ
, CPP_MAX
);
959 result
->type
= CPP_MOD
;
960 if (*buffer
->cur
== '=')
961 buffer
->cur
++, result
->type
= CPP_MOD_EQ
;
962 else if (CPP_OPTION (pfile
, digraphs
))
964 if (*buffer
->cur
== ':')
967 result
->flags
|= DIGRAPH
;
968 result
->type
= CPP_HASH
;
969 if (*buffer
->cur
== '%' && buffer
->cur
[1] == ':')
970 buffer
->cur
+= 2, result
->type
= CPP_PASTE
;
972 else if (*buffer
->cur
== '>')
975 result
->flags
|= DIGRAPH
;
976 result
->type
= CPP_CLOSE_BRACE
;
982 result
->type
= CPP_DOT
;
983 if (ISDIGIT (*buffer
->cur
))
985 result
->type
= CPP_NUMBER
;
986 lex_number (pfile
, &result
->val
.str
);
988 else if (*buffer
->cur
== '.' && buffer
->cur
[1] == '.')
989 buffer
->cur
+= 2, result
->type
= CPP_ELLIPSIS
;
990 else if (*buffer
->cur
== '*' && CPP_OPTION (pfile
, cplusplus
))
991 buffer
->cur
++, result
->type
= CPP_DOT_STAR
;
995 result
->type
= CPP_PLUS
;
996 if (*buffer
->cur
== '+')
997 buffer
->cur
++, result
->type
= CPP_PLUS_PLUS
;
998 else if (*buffer
->cur
== '=')
999 buffer
->cur
++, result
->type
= CPP_PLUS_EQ
;
1003 result
->type
= CPP_MINUS
;
1004 if (*buffer
->cur
== '>')
1007 result
->type
= CPP_DEREF
;
1008 if (*buffer
->cur
== '*' && CPP_OPTION (pfile
, cplusplus
))
1009 buffer
->cur
++, result
->type
= CPP_DEREF_STAR
;
1011 else if (*buffer
->cur
== '-')
1012 buffer
->cur
++, result
->type
= CPP_MINUS_MINUS
;
1013 else if (*buffer
->cur
== '=')
1014 buffer
->cur
++, result
->type
= CPP_MINUS_EQ
;
1018 result
->type
= CPP_AND
;
1019 if (*buffer
->cur
== '&')
1020 buffer
->cur
++, result
->type
= CPP_AND_AND
;
1021 else if (*buffer
->cur
== '=')
1022 buffer
->cur
++, result
->type
= CPP_AND_EQ
;
1026 result
->type
= CPP_OR
;
1027 if (*buffer
->cur
== '|')
1028 buffer
->cur
++, result
->type
= CPP_OR_OR
;
1029 else if (*buffer
->cur
== '=')
1030 buffer
->cur
++, result
->type
= CPP_OR_EQ
;
1034 result
->type
= CPP_COLON
;
1035 if (*buffer
->cur
== ':' && CPP_OPTION (pfile
, cplusplus
))
1036 buffer
->cur
++, result
->type
= CPP_SCOPE
;
1037 else if (*buffer
->cur
== '>' && CPP_OPTION (pfile
, digraphs
))
1040 result
->flags
|= DIGRAPH
;
1041 result
->type
= CPP_CLOSE_SQUARE
;
1045 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ
, CPP_MULT
); break;
1046 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ
, CPP_EQ
); break;
1047 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ
, CPP_NOT
); break;
1048 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ
, CPP_XOR
); break;
1049 case '#': IF_NEXT_IS ('#', CPP_PASTE
, CPP_HASH
); break;
1051 case '?': result
->type
= CPP_QUERY
; break;
1052 case '~': result
->type
= CPP_COMPL
; break;
1053 case ',': result
->type
= CPP_COMMA
; break;
1054 case '(': result
->type
= CPP_OPEN_PAREN
; break;
1055 case ')': result
->type
= CPP_CLOSE_PAREN
; break;
1056 case '[': result
->type
= CPP_OPEN_SQUARE
; break;
1057 case ']': result
->type
= CPP_CLOSE_SQUARE
; break;
1058 case '{': result
->type
= CPP_OPEN_BRACE
; break;
1059 case '}': result
->type
= CPP_CLOSE_BRACE
; break;
1060 case ';': result
->type
= CPP_SEMICOLON
; break;
1062 /* @ is a punctuator in Objective-C. */
1063 case '@': result
->type
= CPP_ATSIGN
; break;
1068 const uchar
*base
= --buffer
->cur
;
1070 if (forms_identifier_p (pfile
, true))
1072 result
->type
= CPP_NAME
;
1073 result
->val
.node
= lex_identifier (pfile
, base
);
1080 create_literal (pfile
, result
, buffer
->cur
- 1, 1, CPP_OTHER
);
1087 /* An upper bound on the number of bytes needed to spell TOKEN.
1088 Does not include preceding whitespace. */
1090 cpp_token_len (const cpp_token
*token
)
1094 switch (TOKEN_SPELL (token
))
1096 default: len
= 4; break;
1097 case SPELL_LITERAL
: len
= token
->val
.str
.len
; break;
1098 case SPELL_IDENT
: len
= NODE_LEN (token
->val
.node
); break;
1104 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1105 already contain the enough space to hold the token's spelling.
1106 Returns a pointer to the character after the last character written.
1107 FIXME: Would be nice if we didn't need the PFILE argument. */
1109 cpp_spell_token (cpp_reader
*pfile
, const cpp_token
*token
,
1110 unsigned char *buffer
)
1112 switch (TOKEN_SPELL (token
))
1114 case SPELL_OPERATOR
:
1116 const unsigned char *spelling
;
1119 if (token
->flags
& DIGRAPH
)
1121 = digraph_spellings
[(int) token
->type
- (int) CPP_FIRST_DIGRAPH
];
1122 else if (token
->flags
& NAMED_OP
)
1125 spelling
= TOKEN_NAME (token
);
1127 while ((c
= *spelling
++) != '\0')
1134 memcpy (buffer
, NODE_NAME (token
->val
.node
), NODE_LEN (token
->val
.node
));
1135 buffer
+= NODE_LEN (token
->val
.node
);
1139 memcpy (buffer
, token
->val
.str
.text
, token
->val
.str
.len
);
1140 buffer
+= token
->val
.str
.len
;
1144 cpp_error (pfile
, DL_ICE
, "unspellable token %s", TOKEN_NAME (token
));
1151 /* Returns TOKEN spelt as a null-terminated string. The string is
1152 freed when the reader is destroyed. Useful for diagnostics. */
1154 cpp_token_as_text (cpp_reader
*pfile
, const cpp_token
*token
)
1156 unsigned int len
= cpp_token_len (token
) + 1;
1157 unsigned char *start
= _cpp_unaligned_alloc (pfile
, len
), *end
;
1159 end
= cpp_spell_token (pfile
, token
, start
);
1165 /* Used by C front ends, which really should move to using
1166 cpp_token_as_text. */
1168 cpp_type2name (enum cpp_ttype type
)
1170 return (const char *) token_spellings
[type
].name
;
1173 /* Writes the spelling of token to FP, without any preceding space.
1174 Separated from cpp_spell_token for efficiency - to avoid stdio
1175 double-buffering. */
1177 cpp_output_token (const cpp_token
*token
, FILE *fp
)
1179 switch (TOKEN_SPELL (token
))
1181 case SPELL_OPERATOR
:
1183 const unsigned char *spelling
;
1186 if (token
->flags
& DIGRAPH
)
1188 = digraph_spellings
[(int) token
->type
- (int) CPP_FIRST_DIGRAPH
];
1189 else if (token
->flags
& NAMED_OP
)
1192 spelling
= TOKEN_NAME (token
);
1197 while ((c
= *++spelling
) != '\0');
1203 fwrite (NODE_NAME (token
->val
.node
), 1, NODE_LEN (token
->val
.node
), fp
);
1207 fwrite (token
->val
.str
.text
, 1, token
->val
.str
.len
, fp
);
1211 /* An error, most probably. */
1216 /* Compare two tokens. */
1218 _cpp_equiv_tokens (const cpp_token
*a
, const cpp_token
*b
)
1220 if (a
->type
== b
->type
&& a
->flags
== b
->flags
)
1221 switch (TOKEN_SPELL (a
))
1223 default: /* Keep compiler happy. */
1224 case SPELL_OPERATOR
:
1227 return (a
->type
!= CPP_MACRO_ARG
|| a
->val
.arg_no
== b
->val
.arg_no
);
1229 return a
->val
.node
== b
->val
.node
;
1231 return (a
->val
.str
.len
== b
->val
.str
.len
1232 && !memcmp (a
->val
.str
.text
, b
->val
.str
.text
,
1239 /* Returns nonzero if a space should be inserted to avoid an
1240 accidental token paste for output. For simplicity, it is
1241 conservative, and occasionally advises a space where one is not
1242 needed, e.g. "." and ".2". */
1244 cpp_avoid_paste (cpp_reader
*pfile
, const cpp_token
*token1
,
1245 const cpp_token
*token2
)
1247 enum cpp_ttype a
= token1
->type
, b
= token2
->type
;
1250 if (token1
->flags
& NAMED_OP
)
1252 if (token2
->flags
& NAMED_OP
)
1256 if (token2
->flags
& DIGRAPH
)
1257 c
= digraph_spellings
[(int) b
- (int) CPP_FIRST_DIGRAPH
][0];
1258 else if (token_spellings
[b
].category
== SPELL_OPERATOR
)
1259 c
= token_spellings
[b
].name
[0];
1261 /* Quickly get everything that can paste with an '='. */
1262 if ((int) a
<= (int) CPP_LAST_EQ
&& c
== '=')
1267 case CPP_GREATER
: return c
== '>' || c
== '?';
1268 case CPP_LESS
: return c
== '<' || c
== '?' || c
== '%' || c
== ':';
1269 case CPP_PLUS
: return c
== '+';
1270 case CPP_MINUS
: return c
== '-' || c
== '>';
1271 case CPP_DIV
: return c
== '/' || c
== '*'; /* Comments. */
1272 case CPP_MOD
: return c
== ':' || c
== '>';
1273 case CPP_AND
: return c
== '&';
1274 case CPP_OR
: return c
== '|';
1275 case CPP_COLON
: return c
== ':' || c
== '>';
1276 case CPP_DEREF
: return c
== '*';
1277 case CPP_DOT
: return c
== '.' || c
== '%' || b
== CPP_NUMBER
;
1278 case CPP_HASH
: return c
== '#' || c
== '%'; /* Digraph form. */
1279 case CPP_NAME
: return ((b
== CPP_NUMBER
1280 && name_p (pfile
, &token2
->val
.str
))
1282 || b
== CPP_CHAR
|| b
== CPP_STRING
); /* L */
1283 case CPP_NUMBER
: return (b
== CPP_NUMBER
|| b
== CPP_NAME
1284 || c
== '.' || c
== '+' || c
== '-');
1286 case CPP_OTHER
: return ((token1
->val
.str
.text
[0] == '\\'
1288 || (CPP_OPTION (pfile
, objc
)
1289 && token1
->val
.str
.text
[0] == '@'
1290 && (b
== CPP_NAME
|| b
== CPP_STRING
)));
1297 /* Output all the remaining tokens on the current line, and a newline
1298 character, to FP. Leading whitespace is removed. If there are
1299 macros, special token padding is not performed. */
1301 cpp_output_line (cpp_reader
*pfile
, FILE *fp
)
1303 const cpp_token
*token
;
1305 token
= cpp_get_token (pfile
);
1306 while (token
->type
!= CPP_EOF
)
1308 cpp_output_token (token
, fp
);
1309 token
= cpp_get_token (pfile
);
1310 if (token
->flags
& PREV_WHITE
)
1317 /* Memory buffers. Changing these three constants can have a dramatic
1318 effect on performance. The values here are reasonable defaults,
1319 but might be tuned. If you adjust them, be sure to test across a
1320 range of uses of cpplib, including heavy nested function-like macro
1321 expansion. Also check the change in peak memory usage (NJAMD is a
1322 good tool for this). */
1323 #define MIN_BUFF_SIZE 8000
1324 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1325 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1326 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1328 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1329 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1332 /* Create a new allocation buffer. Place the control block at the end
1333 of the buffer, so that buffer overflows will cause immediate chaos. */
1335 new_buff (size_t len
)
1338 unsigned char *base
;
1340 if (len
< MIN_BUFF_SIZE
)
1341 len
= MIN_BUFF_SIZE
;
1342 len
= CPP_ALIGN (len
);
1344 base
= xmalloc (len
+ sizeof (_cpp_buff
));
1345 result
= (_cpp_buff
*) (base
+ len
);
1346 result
->base
= base
;
1348 result
->limit
= base
+ len
;
1349 result
->next
= NULL
;
1353 /* Place a chain of unwanted allocation buffers on the free list. */
1355 _cpp_release_buff (cpp_reader
*pfile
, _cpp_buff
*buff
)
1357 _cpp_buff
*end
= buff
;
1361 end
->next
= pfile
->free_buffs
;
1362 pfile
->free_buffs
= buff
;
1365 /* Return a free buffer of size at least MIN_SIZE. */
1367 _cpp_get_buff (cpp_reader
*pfile
, size_t min_size
)
1369 _cpp_buff
*result
, **p
;
1371 for (p
= &pfile
->free_buffs
;; p
= &(*p
)->next
)
1376 return new_buff (min_size
);
1378 size
= result
->limit
- result
->base
;
1379 /* Return a buffer that's big enough, but don't waste one that's
1381 if (size
>= min_size
&& size
<= BUFF_SIZE_UPPER_BOUND (min_size
))
1386 result
->next
= NULL
;
1387 result
->cur
= result
->base
;
1391 /* Creates a new buffer with enough space to hold the uncommitted
1392 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1393 the excess bytes to the new buffer. Chains the new buffer after
1394 BUFF, and returns the new buffer. */
1396 _cpp_append_extend_buff (cpp_reader
*pfile
, _cpp_buff
*buff
, size_t min_extra
)
1398 size_t size
= EXTENDED_BUFF_SIZE (buff
, min_extra
);
1399 _cpp_buff
*new_buff
= _cpp_get_buff (pfile
, size
);
1401 buff
->next
= new_buff
;
1402 memcpy (new_buff
->base
, buff
->cur
, BUFF_ROOM (buff
));
1406 /* Creates a new buffer with enough space to hold the uncommitted
1407 remaining bytes of the buffer pointed to by BUFF, and at least
1408 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1409 Chains the new buffer before the buffer pointed to by BUFF, and
1410 updates the pointer to point to the new buffer. */
1412 _cpp_extend_buff (cpp_reader
*pfile
, _cpp_buff
**pbuff
, size_t min_extra
)
1414 _cpp_buff
*new_buff
, *old_buff
= *pbuff
;
1415 size_t size
= EXTENDED_BUFF_SIZE (old_buff
, min_extra
);
1417 new_buff
= _cpp_get_buff (pfile
, size
);
1418 memcpy (new_buff
->base
, old_buff
->cur
, BUFF_ROOM (old_buff
));
1419 new_buff
->next
= old_buff
;
1423 /* Free a chain of buffers starting at BUFF. */
1425 _cpp_free_buff (buff
)
1430 for (; buff
; buff
= next
)
1437 /* Allocate permanent, unaligned storage of length LEN. */
1439 _cpp_unaligned_alloc (cpp_reader
*pfile
, size_t len
)
1441 _cpp_buff
*buff
= pfile
->u_buff
;
1442 unsigned char *result
= buff
->cur
;
1444 if (len
> (size_t) (buff
->limit
- result
))
1446 buff
= _cpp_get_buff (pfile
, len
);
1447 buff
->next
= pfile
->u_buff
;
1448 pfile
->u_buff
= buff
;
1452 buff
->cur
= result
+ len
;
1456 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1457 That buffer is used for growing allocations when saving macro
1458 replacement lists in a #define, and when parsing an answer to an
1459 assertion in #assert, #unassert or #if (and therefore possibly
1460 whilst expanding macros). It therefore must not be used by any
1461 code that they might call: specifically the lexer and the guts of
1464 All existing other uses clearly fit this restriction: storing
1465 registered pragmas during initialization. */
1467 _cpp_aligned_alloc (cpp_reader
*pfile
, size_t len
)
1469 _cpp_buff
*buff
= pfile
->a_buff
;
1470 unsigned char *result
= buff
->cur
;
1472 if (len
> (size_t) (buff
->limit
- result
))
1474 buff
= _cpp_get_buff (pfile
, len
);
1475 buff
->next
= pfile
->a_buff
;
1476 pfile
->a_buff
= buff
;
1480 buff
->cur
= result
+ len
;