1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
37 enum spell_type category
;
38 const unsigned char *name
;
41 static const unsigned char *const digraph_spellings
[] =
42 { U
"%:", U
"%:%:", U
"<:", U
":>", U
"<%", U
"%>" };
44 #define OP(e, s) { SPELL_OPERATOR, U s },
45 #define TK(e, s) { s, U #e },
46 static const struct token_spelling token_spellings
[N_TTYPES
] = { TTYPE_TABLE
};
50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
53 static void add_line_note (cpp_buffer
*, const uchar
*, unsigned int);
54 static int skip_line_comment (cpp_reader
*);
55 static void skip_whitespace (cpp_reader
*, cppchar_t
);
56 static cpp_hashnode
*lex_identifier (cpp_reader
*, const uchar
*);
57 static void lex_number (cpp_reader
*, cpp_string
*);
58 static bool forms_identifier_p (cpp_reader
*, int);
59 static void lex_string (cpp_reader
*, cpp_token
*, const uchar
*);
60 static void save_comment (cpp_reader
*, cpp_token
*, const uchar
*, cppchar_t
);
61 static void create_literal (cpp_reader
*, cpp_token
*, const uchar
*,
62 unsigned int, enum cpp_ttype
);
63 static bool warn_in_comment (cpp_reader
*, _cpp_line_note
*);
64 static int name_p (cpp_reader
*, const cpp_string
*);
65 static tokenrun
*next_tokenrun (tokenrun
*);
67 static _cpp_buff
*new_buff (size_t);
72 Compares, the token TOKEN to the NUL-terminated string STRING.
73 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
75 cpp_ideq (const cpp_token
*token
, const char *string
)
77 if (token
->type
!= CPP_NAME
)
80 return !ustrcmp (NODE_NAME (token
->val
.node
), (const uchar
*) string
);
83 /* Record a note TYPE at byte POS into the current cleaned logical
86 add_line_note (cpp_buffer
*buffer
, const uchar
*pos
, unsigned int type
)
88 if (buffer
->notes_used
== buffer
->notes_cap
)
90 buffer
->notes_cap
= buffer
->notes_cap
* 2 + 200;
91 buffer
->notes
= xrealloc (buffer
->notes
,
92 buffer
->notes_cap
* sizeof (_cpp_line_note
));
95 buffer
->notes
[buffer
->notes_used
].pos
= pos
;
96 buffer
->notes
[buffer
->notes_used
].type
= type
;
100 /* Returns with a logical line that contains no escaped newlines or
101 trigraphs. This is a time-critical inner loop. */
103 _cpp_clean_line (cpp_reader
*pfile
)
109 buffer
= pfile
->buffer
;
110 buffer
->cur_note
= buffer
->notes_used
= 0;
111 buffer
->cur
= buffer
->line_base
= buffer
->next_line
;
112 buffer
->need_line
= false;
113 s
= buffer
->next_line
- 1;
115 if (!buffer
->from_stage3
)
117 /* Short circuit for the common case of an un-escaped line with
118 no trigraphs. The primary win here is by not writing any
119 data back to memory until we have to. */
123 if (c
== '\n' || c
== '\r')
127 if (s
== buffer
->rlimit
)
130 /* DOS line ending? */
131 if (c
== '\r' && s
[1] == '\n')
134 if (s
== buffer
->rlimit
)
137 /* check for escaped newline */
139 while (p
!= buffer
->next_line
&& is_nvspace (p
[-1]))
141 if (p
== buffer
->next_line
|| p
[-1] != '\\')
144 /* Have an escaped newline; process it and proceed to
146 add_line_note (buffer
, p
- 1, p
!= d
? ' ' : '\\');
148 buffer
->next_line
= p
- 1;
151 if (c
== '?' && s
[1] == '?' && _cpp_trigraph_map
[s
[2]])
153 /* Have a trigraph. We may or may not have to convert
154 it. Add a line note regardless, for -Wtrigraphs. */
155 add_line_note (buffer
, s
, s
[2]);
156 if (CPP_OPTION (pfile
, trigraphs
))
158 /* We do, and that means we have to switch to the
161 *d
= _cpp_trigraph_map
[s
[2]];
174 if (c
== '\n' || c
== '\r')
176 /* Handle DOS line endings. */
177 if (c
== '\r' && s
!= buffer
->rlimit
&& s
[1] == '\n')
179 if (s
== buffer
->rlimit
)
184 while (p
!= buffer
->next_line
&& is_nvspace (p
[-1]))
186 if (p
== buffer
->next_line
|| p
[-1] != '\\')
189 add_line_note (buffer
, p
- 1, p
!= d
? ' ': '\\');
191 buffer
->next_line
= p
- 1;
193 else if (c
== '?' && s
[1] == '?' && _cpp_trigraph_map
[s
[2]])
195 /* Add a note regardless, for the benefit of -Wtrigraphs. */
196 add_line_note (buffer
, d
, s
[2]);
197 if (CPP_OPTION (pfile
, trigraphs
))
199 *d
= _cpp_trigraph_map
[s
[2]];
209 while (*s
!= '\n' && *s
!= '\r');
212 /* Handle DOS line endings. */
213 if (*s
== '\r' && s
!= buffer
->rlimit
&& s
[1] == '\n')
219 /* A sentinel note that should never be processed. */
220 add_line_note (buffer
, d
+ 1, '\n');
221 buffer
->next_line
= s
+ 1;
224 /* Return true if the trigraph indicated by NOTE should be warned
225 about in a comment. */
227 warn_in_comment (cpp_reader
*pfile
, _cpp_line_note
*note
)
231 /* Within comments we don't warn about trigraphs, unless the
232 trigraph forms an escaped newline, as that may change
234 if (note
->type
!= '/')
237 /* If -trigraphs, then this was an escaped newline iff the next note
239 if (CPP_OPTION (pfile
, trigraphs
))
240 return note
[1].pos
== note
->pos
;
242 /* Otherwise, see if this forms an escaped newline. */
244 while (is_nvspace (*p
))
247 /* There might have been escaped newlines between the trigraph and the
248 newline we found. Hence the position test. */
249 return (*p
== '\n' && p
< note
[1].pos
);
252 /* Process the notes created by add_line_note as far as the current
255 _cpp_process_line_notes (cpp_reader
*pfile
, int in_comment
)
257 cpp_buffer
*buffer
= pfile
->buffer
;
261 _cpp_line_note
*note
= &buffer
->notes
[buffer
->cur_note
];
264 if (note
->pos
> buffer
->cur
)
268 col
= CPP_BUF_COLUMN (buffer
, note
->pos
+ 1);
270 if (note
->type
== '\\' || note
->type
== ' ')
272 if (note
->type
== ' ' && !in_comment
)
273 cpp_error_with_line (pfile
, CPP_DL_WARNING
, pfile
->line
, col
,
274 "backslash and newline separated by space");
276 if (buffer
->next_line
> buffer
->rlimit
)
278 cpp_error_with_line (pfile
, CPP_DL_PEDWARN
, pfile
->line
, col
,
279 "backslash-newline at end of file");
280 /* Prevent "no newline at end of file" warning. */
281 buffer
->next_line
= buffer
->rlimit
;
284 buffer
->line_base
= note
->pos
;
285 CPP_INCREMENT_LINE (pfile
, 0);
287 else if (_cpp_trigraph_map
[note
->type
])
289 if (CPP_OPTION (pfile
, warn_trigraphs
)
290 && (!in_comment
|| warn_in_comment (pfile
, note
)))
292 if (CPP_OPTION (pfile
, trigraphs
))
293 cpp_error_with_line (pfile
, CPP_DL_WARNING
, pfile
->line
, col
,
294 "trigraph ??%c converted to %c",
296 (int) _cpp_trigraph_map
[note
->type
]);
300 (pfile
, CPP_DL_WARNING
, pfile
->line
, col
,
301 "trigraph ??%c ignored, use -trigraphs to enable",
311 /* Skip a C-style block comment. We find the end of the comment by
312 seeing if an asterisk is before every '/' we encounter. Returns
313 nonzero if comment terminated by EOF, zero otherwise.
315 Buffer->cur points to the initial asterisk of the comment. */
317 _cpp_skip_block_comment (cpp_reader
*pfile
)
319 cpp_buffer
*buffer
= pfile
->buffer
;
320 const uchar
*cur
= buffer
->cur
;
329 /* People like decorating comments with '*', so check for '/'
330 instead for efficiency. */
338 /* Warn about potential nested comments, but not if the '/'
339 comes immediately before the true comment delimiter.
340 Don't bother to get it right across escaped newlines. */
341 if (CPP_OPTION (pfile
, warn_comments
)
342 && cur
[0] == '*' && cur
[1] != '/')
345 cpp_error_with_line (pfile
, CPP_DL_WARNING
,
346 pfile
->line
, CPP_BUF_COL (buffer
),
347 "\"/*\" within comment");
353 buffer
->cur
= cur
- 1;
354 _cpp_process_line_notes (pfile
, true);
355 if (buffer
->next_line
>= buffer
->rlimit
)
357 _cpp_clean_line (pfile
);
359 cols
= buffer
->next_line
- buffer
->line_base
;
360 CPP_INCREMENT_LINE (pfile
, cols
);
367 _cpp_process_line_notes (pfile
, true);
371 /* Skip a C++ line comment, leaving buffer->cur pointing to the
372 terminating newline. Handles escaped newlines. Returns nonzero
373 if a multiline comment. */
375 skip_line_comment (cpp_reader
*pfile
)
377 cpp_buffer
*buffer
= pfile
->buffer
;
378 unsigned int orig_line
= pfile
->line
;
380 while (*buffer
->cur
!= '\n')
383 _cpp_process_line_notes (pfile
, true);
384 return orig_line
!= pfile
->line
;
387 /* Skips whitespace, saving the next non-whitespace character. */
389 skip_whitespace (cpp_reader
*pfile
, cppchar_t c
)
391 cpp_buffer
*buffer
= pfile
->buffer
;
392 bool saw_NUL
= false;
396 /* Horizontal space always OK. */
397 if (c
== ' ' || c
== '\t')
399 /* Just \f \v or \0 left. */
402 else if (pfile
->state
.in_directive
&& CPP_PEDANTIC (pfile
))
403 cpp_error_with_line (pfile
, CPP_DL_PEDWARN
, pfile
->line
,
404 CPP_BUF_COL (buffer
),
405 "%s in preprocessing directive",
406 c
== '\f' ? "form feed" : "vertical tab");
410 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
411 while (is_nvspace (c
));
414 cpp_error (pfile
, CPP_DL_WARNING
, "null character(s) ignored");
419 /* See if the characters of a number token are valid in a name (no
422 name_p (cpp_reader
*pfile
, const cpp_string
*string
)
426 for (i
= 0; i
< string
->len
; i
++)
427 if (!is_idchar (string
->text
[i
]))
433 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
434 an identifier. FIRST is TRUE if this starts an identifier. */
436 forms_identifier_p (cpp_reader
*pfile
, int first
)
438 cpp_buffer
*buffer
= pfile
->buffer
;
440 if (*buffer
->cur
== '$')
442 if (!CPP_OPTION (pfile
, dollars_in_ident
))
446 if (CPP_OPTION (pfile
, warn_dollars
) && !pfile
->state
.skipping
)
448 CPP_OPTION (pfile
, warn_dollars
) = 0;
449 cpp_error (pfile
, CPP_DL_PEDWARN
, "'$' in identifier or number");
455 /* Is this a syntactically valid UCN? */
456 if (0 && *buffer
->cur
== '\\'
457 && (buffer
->cur
[1] == 'u' || buffer
->cur
[1] == 'U'))
460 if (_cpp_valid_ucn (pfile
, &buffer
->cur
, buffer
->rlimit
, 1 + !first
))
468 /* Lex an identifier starting at BUFFER->CUR - 1. */
469 static cpp_hashnode
*
470 lex_identifier (cpp_reader
*pfile
, const uchar
*base
)
472 cpp_hashnode
*result
;
477 cur
= pfile
->buffer
->cur
;
479 /* N.B. ISIDNUM does not include $. */
480 while (ISIDNUM (*cur
))
483 pfile
->buffer
->cur
= cur
;
485 while (forms_identifier_p (pfile
, false));
487 result
= (cpp_hashnode
*)
488 ht_lookup (pfile
->hash_table
, base
, cur
- base
, HT_ALLOC
);
490 /* Rarely, identifiers require diagnostics when lexed. */
491 if (__builtin_expect ((result
->flags
& NODE_DIAGNOSTIC
)
492 && !pfile
->state
.skipping
, 0))
494 /* It is allowed to poison the same identifier twice. */
495 if ((result
->flags
& NODE_POISONED
) && !pfile
->state
.poisoned_ok
)
496 cpp_error (pfile
, CPP_DL_ERROR
, "attempt to use poisoned \"%s\"",
499 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
500 replacement list of a variadic macro. */
501 if (result
== pfile
->spec_nodes
.n__VA_ARGS__
502 && !pfile
->state
.va_args_ok
)
503 cpp_error (pfile
, CPP_DL_PEDWARN
,
504 "__VA_ARGS__ can only appear in the expansion"
505 " of a C99 variadic macro");
511 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
513 lex_number (cpp_reader
*pfile
, cpp_string
*number
)
519 base
= pfile
->buffer
->cur
- 1;
522 cur
= pfile
->buffer
->cur
;
524 /* N.B. ISIDNUM does not include $. */
525 while (ISIDNUM (*cur
) || *cur
== '.' || VALID_SIGN (*cur
, cur
[-1]))
528 pfile
->buffer
->cur
= cur
;
530 while (forms_identifier_p (pfile
, false));
532 number
->len
= cur
- base
;
533 dest
= _cpp_unaligned_alloc (pfile
, number
->len
+ 1);
534 memcpy (dest
, base
, number
->len
);
535 dest
[number
->len
] = '\0';
539 /* Create a token of type TYPE with a literal spelling. */
541 create_literal (cpp_reader
*pfile
, cpp_token
*token
, const uchar
*base
,
542 unsigned int len
, enum cpp_ttype type
)
544 uchar
*dest
= _cpp_unaligned_alloc (pfile
, len
+ 1);
546 memcpy (dest
, base
, len
);
549 token
->val
.str
.len
= len
;
550 token
->val
.str
.text
= dest
;
553 /* Lexes a string, character constant, or angle-bracketed header file
554 name. The stored string contains the spelling, including opening
555 quote and leading any leading 'L'. It returns the type of the
556 literal, or CPP_OTHER if it was not properly terminated.
558 The spelling is NUL-terminated, but it is not guaranteed that this
559 is the first NUL since embedded NULs are preserved. */
561 lex_string (cpp_reader
*pfile
, cpp_token
*token
, const uchar
*base
)
563 bool saw_NUL
= false;
565 cppchar_t terminator
;
570 if (terminator
== 'L')
572 if (terminator
== '\"')
573 type
= *base
== 'L' ? CPP_WSTRING
: CPP_STRING
;
574 else if (terminator
== '\'')
575 type
= *base
== 'L' ? CPP_WCHAR
: CPP_CHAR
;
577 terminator
= '>', type
= CPP_HEADER_NAME
;
581 cppchar_t c
= *cur
++;
583 /* In #include-style directives, terminators are not escapable. */
584 if (c
== '\\' && !pfile
->state
.angled_headers
&& *cur
!= '\n')
586 else if (c
== terminator
)
598 if (saw_NUL
&& !pfile
->state
.skipping
)
599 cpp_error (pfile
, CPP_DL_WARNING
,
600 "null character(s) preserved in literal");
602 pfile
->buffer
->cur
= cur
;
603 create_literal (pfile
, token
, base
, cur
- base
, type
);
606 /* The stored comment includes the comment start and any terminator. */
608 save_comment (cpp_reader
*pfile
, cpp_token
*token
, const unsigned char *from
,
611 unsigned char *buffer
;
612 unsigned int len
, clen
;
614 len
= pfile
->buffer
->cur
- from
+ 1; /* + 1 for the initial '/'. */
616 /* C++ comments probably (not definitely) have moved past a new
617 line, which we don't want to save in the comment. */
618 if (is_vspace (pfile
->buffer
->cur
[-1]))
621 /* If we are currently in a directive, then we need to store all
622 C++ comments as C comments internally, and so we need to
623 allocate a little extra space in that case.
625 Note that the only time we encounter a directive here is
626 when we are saving comments in a "#define". */
627 clen
= (pfile
->state
.in_directive
&& type
== '/') ? len
+ 2 : len
;
629 buffer
= _cpp_unaligned_alloc (pfile
, clen
);
631 token
->type
= CPP_COMMENT
;
632 token
->val
.str
.len
= clen
;
633 token
->val
.str
.text
= buffer
;
636 memcpy (buffer
+ 1, from
, len
- 1);
638 /* Finish conversion to a C comment, if necessary. */
639 if (pfile
->state
.in_directive
&& type
== '/')
642 buffer
[clen
- 2] = '*';
643 buffer
[clen
- 1] = '/';
647 /* Allocate COUNT tokens for RUN. */
649 _cpp_init_tokenrun (tokenrun
*run
, unsigned int count
)
651 run
->base
= xnewvec (cpp_token
, count
);
652 run
->limit
= run
->base
+ count
;
656 /* Returns the next tokenrun, or creates one if there is none. */
658 next_tokenrun (tokenrun
*run
)
660 if (run
->next
== NULL
)
662 run
->next
= xnew (tokenrun
);
663 run
->next
->prev
= run
;
664 _cpp_init_tokenrun (run
->next
, 250);
670 /* Allocate a single token that is invalidated at the same time as the
671 rest of the tokens on the line. Has its line and col set to the
672 same as the last lexed token, so that diagnostics appear in the
675 _cpp_temp_token (cpp_reader
*pfile
)
677 cpp_token
*old
, *result
;
679 old
= pfile
->cur_token
- 1;
680 if (pfile
->cur_token
== pfile
->cur_run
->limit
)
682 pfile
->cur_run
= next_tokenrun (pfile
->cur_run
);
683 pfile
->cur_token
= pfile
->cur_run
->base
;
686 result
= pfile
->cur_token
++;
687 result
->src_loc
= old
->src_loc
;
691 /* Lex a token into RESULT (external interface). Takes care of issues
692 like directive handling, token lookahead, multiple include
693 optimization and skipping. */
695 _cpp_lex_token (cpp_reader
*pfile
)
701 if (pfile
->cur_token
== pfile
->cur_run
->limit
)
703 pfile
->cur_run
= next_tokenrun (pfile
->cur_run
);
704 pfile
->cur_token
= pfile
->cur_run
->base
;
707 if (pfile
->lookaheads
)
710 result
= pfile
->cur_token
++;
713 result
= _cpp_lex_direct (pfile
);
715 if (result
->flags
& BOL
)
717 /* Is this a directive. If _cpp_handle_directive returns
718 false, it is an assembler #. */
719 if (result
->type
== CPP_HASH
720 /* 6.10.3 p 11: Directives in a list of macro arguments
721 gives undefined behavior. This implementation
722 handles the directive as normal. */
723 && pfile
->state
.parsing_args
!= 1
724 && _cpp_handle_directive (pfile
, result
->flags
& PREV_WHITE
))
726 if (pfile
->cb
.line_change
&& !pfile
->state
.skipping
)
727 pfile
->cb
.line_change (pfile
, result
, pfile
->state
.parsing_args
);
730 /* We don't skip tokens in directives. */
731 if (pfile
->state
.in_directive
)
734 /* Outside a directive, invalidate controlling macros. At file
735 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
736 get here and MI optimization works. */
737 pfile
->mi_valid
= false;
739 if (!pfile
->state
.skipping
|| result
->type
== CPP_EOF
)
746 /* Returns true if a fresh line has been loaded. */
748 _cpp_get_fresh_line (cpp_reader
*pfile
)
750 /* We can't get a new line until we leave the current directive. */
751 if (pfile
->state
.in_directive
)
756 cpp_buffer
*buffer
= pfile
->buffer
;
758 if (!buffer
->need_line
)
761 if (buffer
->next_line
< buffer
->rlimit
)
763 _cpp_clean_line (pfile
);
767 /* First, get out of parsing arguments state. */
768 if (pfile
->state
.parsing_args
)
771 /* End of buffer. Non-empty files should end in a newline. */
772 if (buffer
->buf
!= buffer
->rlimit
773 && buffer
->next_line
> buffer
->rlimit
774 && !buffer
->from_stage3
)
776 /* Only warn once. */
777 buffer
->next_line
= buffer
->rlimit
;
778 cpp_error_with_line (pfile
, CPP_DL_PEDWARN
, pfile
->line
,
779 CPP_BUF_COLUMN (buffer
, buffer
->cur
),
780 "no newline at end of file");
783 _cpp_pop_buffer (pfile
);
784 if (pfile
->buffer
== NULL
)
789 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
792 result->type = ELSE_TYPE; \
793 if (*buffer->cur == CHAR) \
794 buffer->cur++, result->type = THEN_TYPE; \
798 /* Lex a token into pfile->cur_token, which is also incremented, to
799 get diagnostics pointing to the correct location.
801 Does not handle issues such as token lookahead, multiple-include
802 optimization, directives, skipping etc. This function is only
803 suitable for use by _cpp_lex_token, and in special cases like
804 lex_expansion_token which doesn't care for any of these issues.
806 When meeting a newline, returns CPP_EOF if parsing a directive,
807 otherwise returns to the start of the token buffer if permissible.
808 Returns the location of the lexed token. */
810 _cpp_lex_direct (cpp_reader
*pfile
)
814 const unsigned char *comment_start
;
815 cpp_token
*result
= pfile
->cur_token
++;
819 buffer
= pfile
->buffer
;
820 if (buffer
->need_line
)
822 if (!_cpp_get_fresh_line (pfile
))
824 result
->type
= CPP_EOF
;
825 if (!pfile
->state
.in_directive
)
827 /* Tell the compiler the line number of the EOF token. */
828 result
->src_loc
= pfile
->line
;
833 if (!pfile
->keep_tokens
)
835 pfile
->cur_run
= &pfile
->base_run
;
836 result
= pfile
->base_run
.base
;
837 pfile
->cur_token
= result
+ 1;
840 if (pfile
->state
.parsing_args
== 2)
841 result
->flags
|= PREV_WHITE
;
843 buffer
= pfile
->buffer
;
845 result
->src_loc
= pfile
->line
;
848 if (buffer
->cur
>= buffer
->notes
[buffer
->cur_note
].pos
849 && !pfile
->overlaid_buffer
)
851 _cpp_process_line_notes (pfile
, false);
852 result
->src_loc
= pfile
->line
;
856 result
->src_loc
= linemap_position_for_column (pfile
->line_table
,
857 CPP_BUF_COLUMN (buffer
, buffer
->cur
));
861 case ' ': case '\t': case '\f': case '\v': case '\0':
862 result
->flags
|= PREV_WHITE
;
863 skip_whitespace (pfile
, c
);
867 if (buffer
->cur
< buffer
->rlimit
)
868 CPP_INCREMENT_LINE (pfile
, 0);
869 buffer
->need_line
= true;
872 case '0': case '1': case '2': case '3': case '4':
873 case '5': case '6': case '7': case '8': case '9':
874 result
->type
= CPP_NUMBER
;
875 lex_number (pfile
, &result
->val
.str
);
879 /* 'L' may introduce wide characters or strings. */
880 if (*buffer
->cur
== '\'' || *buffer
->cur
== '"')
882 lex_string (pfile
, result
, buffer
->cur
- 1);
888 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
889 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
890 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
891 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
893 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
894 case 'G': case 'H': case 'I': case 'J': case 'K':
895 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
896 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
898 result
->type
= CPP_NAME
;
899 result
->val
.node
= lex_identifier (pfile
, buffer
->cur
- 1);
901 /* Convert named operators to their proper types. */
902 if (result
->val
.node
->flags
& NODE_OPERATOR
)
904 result
->flags
|= NAMED_OP
;
905 result
->type
= result
->val
.node
->directive_index
;
911 lex_string (pfile
, result
, buffer
->cur
- 1);
915 /* A potential block or line comment. */
916 comment_start
= buffer
->cur
;
921 if (_cpp_skip_block_comment (pfile
))
922 cpp_error (pfile
, CPP_DL_ERROR
, "unterminated comment");
924 else if (c
== '/' && (CPP_OPTION (pfile
, cplusplus_comments
)
925 || cpp_in_system_header (pfile
)))
927 /* Warn about comments only if pedantically GNUC89, and not
928 in system headers. */
929 if (CPP_OPTION (pfile
, lang
) == CLK_GNUC89
&& CPP_PEDANTIC (pfile
)
930 && ! buffer
->warned_cplusplus_comments
)
932 cpp_error (pfile
, CPP_DL_PEDWARN
,
933 "C++ style comments are not allowed in ISO C90");
934 cpp_error (pfile
, CPP_DL_PEDWARN
,
935 "(this will be reported only once per input file)");
936 buffer
->warned_cplusplus_comments
= 1;
939 if (skip_line_comment (pfile
) && CPP_OPTION (pfile
, warn_comments
))
940 cpp_error (pfile
, CPP_DL_WARNING
, "multi-line comment");
945 result
->type
= CPP_DIV_EQ
;
950 result
->type
= CPP_DIV
;
954 if (!pfile
->state
.save_comments
)
956 result
->flags
|= PREV_WHITE
;
957 goto update_tokens_line
;
960 /* Save the comment as a token in its own right. */
961 save_comment (pfile
, result
, comment_start
, c
);
965 if (pfile
->state
.angled_headers
)
967 lex_string (pfile
, result
, buffer
->cur
- 1);
971 result
->type
= CPP_LESS
;
972 if (*buffer
->cur
== '=')
973 buffer
->cur
++, result
->type
= CPP_LESS_EQ
;
974 else if (*buffer
->cur
== '<')
977 IF_NEXT_IS ('=', CPP_LSHIFT_EQ
, CPP_LSHIFT
);
979 else if (*buffer
->cur
== '?' && CPP_OPTION (pfile
, cplusplus
))
982 IF_NEXT_IS ('=', CPP_MIN_EQ
, CPP_MIN
);
984 else if (CPP_OPTION (pfile
, digraphs
))
986 if (*buffer
->cur
== ':')
989 result
->flags
|= DIGRAPH
;
990 result
->type
= CPP_OPEN_SQUARE
;
992 else if (*buffer
->cur
== '%')
995 result
->flags
|= DIGRAPH
;
996 result
->type
= CPP_OPEN_BRACE
;
1002 result
->type
= CPP_GREATER
;
1003 if (*buffer
->cur
== '=')
1004 buffer
->cur
++, result
->type
= CPP_GREATER_EQ
;
1005 else if (*buffer
->cur
== '>')
1008 IF_NEXT_IS ('=', CPP_RSHIFT_EQ
, CPP_RSHIFT
);
1010 else if (*buffer
->cur
== '?' && CPP_OPTION (pfile
, cplusplus
))
1013 IF_NEXT_IS ('=', CPP_MAX_EQ
, CPP_MAX
);
1018 result
->type
= CPP_MOD
;
1019 if (*buffer
->cur
== '=')
1020 buffer
->cur
++, result
->type
= CPP_MOD_EQ
;
1021 else if (CPP_OPTION (pfile
, digraphs
))
1023 if (*buffer
->cur
== ':')
1026 result
->flags
|= DIGRAPH
;
1027 result
->type
= CPP_HASH
;
1028 if (*buffer
->cur
== '%' && buffer
->cur
[1] == ':')
1029 buffer
->cur
+= 2, result
->type
= CPP_PASTE
;
1031 else if (*buffer
->cur
== '>')
1034 result
->flags
|= DIGRAPH
;
1035 result
->type
= CPP_CLOSE_BRACE
;
1041 result
->type
= CPP_DOT
;
1042 if (ISDIGIT (*buffer
->cur
))
1044 result
->type
= CPP_NUMBER
;
1045 lex_number (pfile
, &result
->val
.str
);
1047 else if (*buffer
->cur
== '.' && buffer
->cur
[1] == '.')
1048 buffer
->cur
+= 2, result
->type
= CPP_ELLIPSIS
;
1049 else if (*buffer
->cur
== '*' && CPP_OPTION (pfile
, cplusplus
))
1050 buffer
->cur
++, result
->type
= CPP_DOT_STAR
;
1054 result
->type
= CPP_PLUS
;
1055 if (*buffer
->cur
== '+')
1056 buffer
->cur
++, result
->type
= CPP_PLUS_PLUS
;
1057 else if (*buffer
->cur
== '=')
1058 buffer
->cur
++, result
->type
= CPP_PLUS_EQ
;
1062 result
->type
= CPP_MINUS
;
1063 if (*buffer
->cur
== '>')
1066 result
->type
= CPP_DEREF
;
1067 if (*buffer
->cur
== '*' && CPP_OPTION (pfile
, cplusplus
))
1068 buffer
->cur
++, result
->type
= CPP_DEREF_STAR
;
1070 else if (*buffer
->cur
== '-')
1071 buffer
->cur
++, result
->type
= CPP_MINUS_MINUS
;
1072 else if (*buffer
->cur
== '=')
1073 buffer
->cur
++, result
->type
= CPP_MINUS_EQ
;
1077 result
->type
= CPP_AND
;
1078 if (*buffer
->cur
== '&')
1079 buffer
->cur
++, result
->type
= CPP_AND_AND
;
1080 else if (*buffer
->cur
== '=')
1081 buffer
->cur
++, result
->type
= CPP_AND_EQ
;
1085 result
->type
= CPP_OR
;
1086 if (*buffer
->cur
== '|')
1087 buffer
->cur
++, result
->type
= CPP_OR_OR
;
1088 else if (*buffer
->cur
== '=')
1089 buffer
->cur
++, result
->type
= CPP_OR_EQ
;
1093 result
->type
= CPP_COLON
;
1094 if (*buffer
->cur
== ':' && CPP_OPTION (pfile
, cplusplus
))
1095 buffer
->cur
++, result
->type
= CPP_SCOPE
;
1096 else if (*buffer
->cur
== '>' && CPP_OPTION (pfile
, digraphs
))
1099 result
->flags
|= DIGRAPH
;
1100 result
->type
= CPP_CLOSE_SQUARE
;
1104 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ
, CPP_MULT
); break;
1105 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ
, CPP_EQ
); break;
1106 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ
, CPP_NOT
); break;
1107 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ
, CPP_XOR
); break;
1108 case '#': IF_NEXT_IS ('#', CPP_PASTE
, CPP_HASH
); break;
1110 case '?': result
->type
= CPP_QUERY
; break;
1111 case '~': result
->type
= CPP_COMPL
; break;
1112 case ',': result
->type
= CPP_COMMA
; break;
1113 case '(': result
->type
= CPP_OPEN_PAREN
; break;
1114 case ')': result
->type
= CPP_CLOSE_PAREN
; break;
1115 case '[': result
->type
= CPP_OPEN_SQUARE
; break;
1116 case ']': result
->type
= CPP_CLOSE_SQUARE
; break;
1117 case '{': result
->type
= CPP_OPEN_BRACE
; break;
1118 case '}': result
->type
= CPP_CLOSE_BRACE
; break;
1119 case ';': result
->type
= CPP_SEMICOLON
; break;
1121 /* @ is a punctuator in Objective-C. */
1122 case '@': result
->type
= CPP_ATSIGN
; break;
1127 const uchar
*base
= --buffer
->cur
;
1129 if (forms_identifier_p (pfile
, true))
1131 result
->type
= CPP_NAME
;
1132 result
->val
.node
= lex_identifier (pfile
, base
);
1139 create_literal (pfile
, result
, buffer
->cur
- 1, 1, CPP_OTHER
);
1146 /* An upper bound on the number of bytes needed to spell TOKEN.
1147 Does not include preceding whitespace. */
1149 cpp_token_len (const cpp_token
*token
)
1153 switch (TOKEN_SPELL (token
))
1155 default: len
= 4; break;
1156 case SPELL_LITERAL
: len
= token
->val
.str
.len
; break;
1157 case SPELL_IDENT
: len
= NODE_LEN (token
->val
.node
); break;
1163 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1164 already contain the enough space to hold the token's spelling.
1165 Returns a pointer to the character after the last character written.
1166 FIXME: Would be nice if we didn't need the PFILE argument. */
1168 cpp_spell_token (cpp_reader
*pfile
, const cpp_token
*token
,
1169 unsigned char *buffer
)
1171 switch (TOKEN_SPELL (token
))
1173 case SPELL_OPERATOR
:
1175 const unsigned char *spelling
;
1178 if (token
->flags
& DIGRAPH
)
1180 = digraph_spellings
[(int) token
->type
- (int) CPP_FIRST_DIGRAPH
];
1181 else if (token
->flags
& NAMED_OP
)
1184 spelling
= TOKEN_NAME (token
);
1186 while ((c
= *spelling
++) != '\0')
1193 memcpy (buffer
, NODE_NAME (token
->val
.node
), NODE_LEN (token
->val
.node
));
1194 buffer
+= NODE_LEN (token
->val
.node
);
1198 memcpy (buffer
, token
->val
.str
.text
, token
->val
.str
.len
);
1199 buffer
+= token
->val
.str
.len
;
1203 cpp_error (pfile
, CPP_DL_ICE
,
1204 "unspellable token %s", TOKEN_NAME (token
));
1211 /* Returns TOKEN spelt as a null-terminated string. The string is
1212 freed when the reader is destroyed. Useful for diagnostics. */
1214 cpp_token_as_text (cpp_reader
*pfile
, const cpp_token
*token
)
1216 unsigned int len
= cpp_token_len (token
) + 1;
1217 unsigned char *start
= _cpp_unaligned_alloc (pfile
, len
), *end
;
1219 end
= cpp_spell_token (pfile
, token
, start
);
1225 /* Used by C front ends, which really should move to using
1226 cpp_token_as_text. */
1228 cpp_type2name (enum cpp_ttype type
)
1230 return (const char *) token_spellings
[type
].name
;
1233 /* Writes the spelling of token to FP, without any preceding space.
1234 Separated from cpp_spell_token for efficiency - to avoid stdio
1235 double-buffering. */
1237 cpp_output_token (const cpp_token
*token
, FILE *fp
)
1239 switch (TOKEN_SPELL (token
))
1241 case SPELL_OPERATOR
:
1243 const unsigned char *spelling
;
1246 if (token
->flags
& DIGRAPH
)
1248 = digraph_spellings
[(int) token
->type
- (int) CPP_FIRST_DIGRAPH
];
1249 else if (token
->flags
& NAMED_OP
)
1252 spelling
= TOKEN_NAME (token
);
1257 while ((c
= *++spelling
) != '\0');
1263 fwrite (NODE_NAME (token
->val
.node
), 1, NODE_LEN (token
->val
.node
), fp
);
1267 fwrite (token
->val
.str
.text
, 1, token
->val
.str
.len
, fp
);
1271 /* An error, most probably. */
1276 /* Compare two tokens. */
1278 _cpp_equiv_tokens (const cpp_token
*a
, const cpp_token
*b
)
1280 if (a
->type
== b
->type
&& a
->flags
== b
->flags
)
1281 switch (TOKEN_SPELL (a
))
1283 default: /* Keep compiler happy. */
1284 case SPELL_OPERATOR
:
1287 return (a
->type
!= CPP_MACRO_ARG
|| a
->val
.arg_no
== b
->val
.arg_no
);
1289 return a
->val
.node
== b
->val
.node
;
1291 return (a
->val
.str
.len
== b
->val
.str
.len
1292 && !memcmp (a
->val
.str
.text
, b
->val
.str
.text
,
1299 /* Returns nonzero if a space should be inserted to avoid an
1300 accidental token paste for output. For simplicity, it is
1301 conservative, and occasionally advises a space where one is not
1302 needed, e.g. "." and ".2". */
1304 cpp_avoid_paste (cpp_reader
*pfile
, const cpp_token
*token1
,
1305 const cpp_token
*token2
)
1307 enum cpp_ttype a
= token1
->type
, b
= token2
->type
;
1310 if (token1
->flags
& NAMED_OP
)
1312 if (token2
->flags
& NAMED_OP
)
1316 if (token2
->flags
& DIGRAPH
)
1317 c
= digraph_spellings
[(int) b
- (int) CPP_FIRST_DIGRAPH
][0];
1318 else if (token_spellings
[b
].category
== SPELL_OPERATOR
)
1319 c
= token_spellings
[b
].name
[0];
1321 /* Quickly get everything that can paste with an '='. */
1322 if ((int) a
<= (int) CPP_LAST_EQ
&& c
== '=')
1327 case CPP_GREATER
: return c
== '>' || c
== '?';
1328 case CPP_LESS
: return c
== '<' || c
== '?' || c
== '%' || c
== ':';
1329 case CPP_PLUS
: return c
== '+';
1330 case CPP_MINUS
: return c
== '-' || c
== '>';
1331 case CPP_DIV
: return c
== '/' || c
== '*'; /* Comments. */
1332 case CPP_MOD
: return c
== ':' || c
== '>';
1333 case CPP_AND
: return c
== '&';
1334 case CPP_OR
: return c
== '|';
1335 case CPP_COLON
: return c
== ':' || c
== '>';
1336 case CPP_DEREF
: return c
== '*';
1337 case CPP_DOT
: return c
== '.' || c
== '%' || b
== CPP_NUMBER
;
1338 case CPP_HASH
: return c
== '#' || c
== '%'; /* Digraph form. */
1339 case CPP_NAME
: return ((b
== CPP_NUMBER
1340 && name_p (pfile
, &token2
->val
.str
))
1342 || b
== CPP_CHAR
|| b
== CPP_STRING
); /* L */
1343 case CPP_NUMBER
: return (b
== CPP_NUMBER
|| b
== CPP_NAME
1344 || c
== '.' || c
== '+' || c
== '-');
1346 case CPP_OTHER
: return ((token1
->val
.str
.text
[0] == '\\'
1348 || (CPP_OPTION (pfile
, objc
)
1349 && token1
->val
.str
.text
[0] == '@'
1350 && (b
== CPP_NAME
|| b
== CPP_STRING
)));
1357 /* Output all the remaining tokens on the current line, and a newline
1358 character, to FP. Leading whitespace is removed. If there are
1359 macros, special token padding is not performed. */
1361 cpp_output_line (cpp_reader
*pfile
, FILE *fp
)
1363 const cpp_token
*token
;
1365 token
= cpp_get_token (pfile
);
1366 while (token
->type
!= CPP_EOF
)
1368 cpp_output_token (token
, fp
);
1369 token
= cpp_get_token (pfile
);
1370 if (token
->flags
& PREV_WHITE
)
1377 /* Memory buffers. Changing these three constants can have a dramatic
1378 effect on performance. The values here are reasonable defaults,
1379 but might be tuned. If you adjust them, be sure to test across a
1380 range of uses of cpplib, including heavy nested function-like macro
1381 expansion. Also check the change in peak memory usage (NJAMD is a
1382 good tool for this). */
1383 #define MIN_BUFF_SIZE 8000
1384 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1385 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1386 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1388 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1389 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1392 /* Create a new allocation buffer. Place the control block at the end
1393 of the buffer, so that buffer overflows will cause immediate chaos. */
1395 new_buff (size_t len
)
1398 unsigned char *base
;
1400 if (len
< MIN_BUFF_SIZE
)
1401 len
= MIN_BUFF_SIZE
;
1402 len
= CPP_ALIGN (len
);
1404 base
= xmalloc (len
+ sizeof (_cpp_buff
));
1405 result
= (_cpp_buff
*) (base
+ len
);
1406 result
->base
= base
;
1408 result
->limit
= base
+ len
;
1409 result
->next
= NULL
;
1413 /* Place a chain of unwanted allocation buffers on the free list. */
1415 _cpp_release_buff (cpp_reader
*pfile
, _cpp_buff
*buff
)
1417 _cpp_buff
*end
= buff
;
1421 end
->next
= pfile
->free_buffs
;
1422 pfile
->free_buffs
= buff
;
1425 /* Return a free buffer of size at least MIN_SIZE. */
1427 _cpp_get_buff (cpp_reader
*pfile
, size_t min_size
)
1429 _cpp_buff
*result
, **p
;
1431 for (p
= &pfile
->free_buffs
;; p
= &(*p
)->next
)
1436 return new_buff (min_size
);
1438 size
= result
->limit
- result
->base
;
1439 /* Return a buffer that's big enough, but don't waste one that's
1441 if (size
>= min_size
&& size
<= BUFF_SIZE_UPPER_BOUND (min_size
))
1446 result
->next
= NULL
;
1447 result
->cur
= result
->base
;
1451 /* Creates a new buffer with enough space to hold the uncommitted
1452 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1453 the excess bytes to the new buffer. Chains the new buffer after
1454 BUFF, and returns the new buffer. */
1456 _cpp_append_extend_buff (cpp_reader
*pfile
, _cpp_buff
*buff
, size_t min_extra
)
1458 size_t size
= EXTENDED_BUFF_SIZE (buff
, min_extra
);
1459 _cpp_buff
*new_buff
= _cpp_get_buff (pfile
, size
);
1461 buff
->next
= new_buff
;
1462 memcpy (new_buff
->base
, buff
->cur
, BUFF_ROOM (buff
));
1466 /* Creates a new buffer with enough space to hold the uncommitted
1467 remaining bytes of the buffer pointed to by BUFF, and at least
1468 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1469 Chains the new buffer before the buffer pointed to by BUFF, and
1470 updates the pointer to point to the new buffer. */
1472 _cpp_extend_buff (cpp_reader
*pfile
, _cpp_buff
**pbuff
, size_t min_extra
)
1474 _cpp_buff
*new_buff
, *old_buff
= *pbuff
;
1475 size_t size
= EXTENDED_BUFF_SIZE (old_buff
, min_extra
);
1477 new_buff
= _cpp_get_buff (pfile
, size
);
1478 memcpy (new_buff
->base
, old_buff
->cur
, BUFF_ROOM (old_buff
));
1479 new_buff
->next
= old_buff
;
1483 /* Free a chain of buffers starting at BUFF. */
1485 _cpp_free_buff (_cpp_buff
*buff
)
1489 for (; buff
; buff
= next
)
1496 /* Allocate permanent, unaligned storage of length LEN. */
1498 _cpp_unaligned_alloc (cpp_reader
*pfile
, size_t len
)
1500 _cpp_buff
*buff
= pfile
->u_buff
;
1501 unsigned char *result
= buff
->cur
;
1503 if (len
> (size_t) (buff
->limit
- result
))
1505 buff
= _cpp_get_buff (pfile
, len
);
1506 buff
->next
= pfile
->u_buff
;
1507 pfile
->u_buff
= buff
;
1511 buff
->cur
= result
+ len
;
1515 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1516 That buffer is used for growing allocations when saving macro
1517 replacement lists in a #define, and when parsing an answer to an
1518 assertion in #assert, #unassert or #if (and therefore possibly
1519 whilst expanding macros). It therefore must not be used by any
1520 code that they might call: specifically the lexer and the guts of
1523 All existing other uses clearly fit this restriction: storing
1524 registered pragmas during initialization. */
1526 _cpp_aligned_alloc (cpp_reader
*pfile
, size_t len
)
1528 _cpp_buff
*buff
= pfile
->a_buff
;
1529 unsigned char *result
= buff
->cur
;
1531 if (len
> (size_t) (buff
->limit
- result
))
1533 buff
= _cpp_get_buff (pfile
, len
);
1534 buff
->next
= pfile
->a_buff
;
1535 pfile
->a_buff
= buff
;
1539 buff
->cur
= result
+ len
;