1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
37 enum spell_type category
;
38 const unsigned char *name
;
41 static const unsigned char *const digraph_spellings
[] =
42 { U
"%:", U
"%:%:", U
"<:", U
":>", U
"<%", U
"%>" };
44 #define OP(e, s) { SPELL_OPERATOR, U s },
45 #define TK(e, s) { s, U #e },
46 static const struct token_spelling token_spellings
[N_TTYPES
] = { TTYPE_TABLE
};
50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
53 static void add_line_note (cpp_buffer
*, const uchar
*, unsigned int);
54 static int skip_line_comment (cpp_reader
*);
55 static void skip_whitespace (cpp_reader
*, cppchar_t
);
56 static cpp_hashnode
*lex_identifier (cpp_reader
*, const uchar
*);
57 static void lex_number (cpp_reader
*, cpp_string
*);
58 static bool forms_identifier_p (cpp_reader
*, int);
59 static void lex_string (cpp_reader
*, cpp_token
*, const uchar
*);
60 static void save_comment (cpp_reader
*, cpp_token
*, const uchar
*, cppchar_t
);
61 static void create_literal (cpp_reader
*, cpp_token
*, const uchar
*,
62 unsigned int, enum cpp_ttype
);
63 static bool warn_in_comment (cpp_reader
*, _cpp_line_note
*);
64 static int name_p (cpp_reader
*, const cpp_string
*);
65 static tokenrun
*next_tokenrun (tokenrun
*);
67 static _cpp_buff
*new_buff (size_t);
72 Compares, the token TOKEN to the NUL-terminated string STRING.
73 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
75 cpp_ideq (const cpp_token
*token
, const char *string
)
77 if (token
->type
!= CPP_NAME
)
80 return !ustrcmp (NODE_NAME (token
->val
.node
), (const uchar
*) string
);
83 /* Record a note TYPE at byte POS into the current cleaned logical
86 add_line_note (cpp_buffer
*buffer
, const uchar
*pos
, unsigned int type
)
88 if (buffer
->notes_used
== buffer
->notes_cap
)
90 buffer
->notes_cap
= buffer
->notes_cap
* 2 + 200;
91 buffer
->notes
= xrealloc (buffer
->notes
,
92 buffer
->notes_cap
* sizeof (_cpp_line_note
));
95 buffer
->notes
[buffer
->notes_used
].pos
= pos
;
96 buffer
->notes
[buffer
->notes_used
].type
= type
;
100 /* Returns with a logical line that contains no escaped newlines or
101 trigraphs. This is a time-critical inner loop. */
103 _cpp_clean_line (cpp_reader
*pfile
)
109 buffer
= pfile
->buffer
;
110 buffer
->cur_note
= buffer
->notes_used
= 0;
111 buffer
->cur
= buffer
->line_base
= buffer
->next_line
;
112 buffer
->need_line
= false;
113 s
= buffer
->next_line
- 1;
115 if (!buffer
->from_stage3
)
117 /* Short circuit for the common case of an un-escaped line with
118 no trigraphs. The primary win here is by not writing any
119 data back to memory until we have to. */
123 if (c
== '\n' || c
== '\r')
127 if (s
== buffer
->rlimit
)
130 /* DOS line ending? */
131 if (c
== '\r' && s
[1] == '\n')
134 if (s
== buffer
->rlimit
)
137 /* check for escaped newline */
139 while (p
!= buffer
->next_line
&& is_nvspace (p
[-1]))
141 if (p
== buffer
->next_line
|| p
[-1] != '\\')
144 /* Have an escaped newline; process it and proceed to
146 add_line_note (buffer
, p
- 1, p
!= d
? ' ' : '\\');
148 buffer
->next_line
= p
- 1;
151 if (c
== '?' && s
[1] == '?' && _cpp_trigraph_map
[s
[2]])
153 /* Have a trigraph. We may or may not have to convert
154 it. Add a line note regardless, for -Wtrigraphs. */
155 add_line_note (buffer
, s
, s
[2]);
156 if (CPP_OPTION (pfile
, trigraphs
))
158 /* We do, and that means we have to switch to the
161 *d
= _cpp_trigraph_map
[s
[2]];
174 if (c
== '\n' || c
== '\r')
176 /* Handle DOS line endings. */
177 if (c
== '\r' && s
!= buffer
->rlimit
&& s
[1] == '\n')
179 if (s
== buffer
->rlimit
)
184 while (p
!= buffer
->next_line
&& is_nvspace (p
[-1]))
186 if (p
== buffer
->next_line
|| p
[-1] != '\\')
189 add_line_note (buffer
, p
- 1, p
!= d
? ' ': '\\');
191 buffer
->next_line
= p
- 1;
193 else if (c
== '?' && s
[1] == '?' && _cpp_trigraph_map
[s
[2]])
195 /* Add a note regardless, for the benefit of -Wtrigraphs. */
196 add_line_note (buffer
, d
, s
[2]);
197 if (CPP_OPTION (pfile
, trigraphs
))
199 *d
= _cpp_trigraph_map
[s
[2]];
209 while (*s
!= '\n' && *s
!= '\r');
212 /* Handle DOS line endings. */
213 if (*s
== '\r' && s
!= buffer
->rlimit
&& s
[1] == '\n')
219 /* A sentinel note that should never be processed. */
220 add_line_note (buffer
, d
+ 1, '\n');
221 buffer
->next_line
= s
+ 1;
224 /* Return true if the trigraph indicated by NOTE should be warned
225 about in a comment. */
227 warn_in_comment (cpp_reader
*pfile
, _cpp_line_note
*note
)
231 /* Within comments we don't warn about trigraphs, unless the
232 trigraph forms an escaped newline, as that may change
234 if (note
->type
!= '/')
237 /* If -trigraphs, then this was an escaped newline iff the next note
239 if (CPP_OPTION (pfile
, trigraphs
))
240 return note
[1].pos
== note
->pos
;
242 /* Otherwise, see if this forms an escaped newline. */
244 while (is_nvspace (*p
))
247 /* There might have been escaped newlines between the trigraph and the
248 newline we found. Hence the position test. */
249 return (*p
== '\n' && p
< note
[1].pos
);
252 /* Process the notes created by add_line_note as far as the current
255 _cpp_process_line_notes (cpp_reader
*pfile
, int in_comment
)
257 cpp_buffer
*buffer
= pfile
->buffer
;
261 _cpp_line_note
*note
= &buffer
->notes
[buffer
->cur_note
];
264 if (note
->pos
> buffer
->cur
)
268 col
= CPP_BUF_COLUMN (buffer
, note
->pos
+ 1);
270 if (note
->type
== '\\' || note
->type
== ' ')
272 if (note
->type
== ' ' && !in_comment
)
273 cpp_error_with_line (pfile
, CPP_DL_WARNING
, pfile
->line_table
->highest_line
, col
,
274 "backslash and newline separated by space");
276 if (buffer
->next_line
> buffer
->rlimit
)
278 cpp_error_with_line (pfile
, CPP_DL_PEDWARN
, pfile
->line_table
->highest_line
, col
,
279 "backslash-newline at end of file");
280 /* Prevent "no newline at end of file" warning. */
281 buffer
->next_line
= buffer
->rlimit
;
284 buffer
->line_base
= note
->pos
;
285 CPP_INCREMENT_LINE (pfile
, 0);
287 else if (_cpp_trigraph_map
[note
->type
])
289 if (CPP_OPTION (pfile
, warn_trigraphs
)
290 && (!in_comment
|| warn_in_comment (pfile
, note
)))
292 if (CPP_OPTION (pfile
, trigraphs
))
293 cpp_error_with_line (pfile
, CPP_DL_WARNING
, pfile
->line_table
->highest_line
, col
,
294 "trigraph ??%c converted to %c",
296 (int) _cpp_trigraph_map
[note
->type
]);
300 (pfile
, CPP_DL_WARNING
, pfile
->line_table
->highest_line
, col
,
301 "trigraph ??%c ignored, use -trigraphs to enable",
311 /* Skip a C-style block comment. We find the end of the comment by
312 seeing if an asterisk is before every '/' we encounter. Returns
313 nonzero if comment terminated by EOF, zero otherwise.
315 Buffer->cur points to the initial asterisk of the comment. */
317 _cpp_skip_block_comment (cpp_reader
*pfile
)
319 cpp_buffer
*buffer
= pfile
->buffer
;
320 const uchar
*cur
= buffer
->cur
;
329 /* People like decorating comments with '*', so check for '/'
330 instead for efficiency. */
338 /* Warn about potential nested comments, but not if the '/'
339 comes immediately before the true comment delimiter.
340 Don't bother to get it right across escaped newlines. */
341 if (CPP_OPTION (pfile
, warn_comments
)
342 && cur
[0] == '*' && cur
[1] != '/')
345 cpp_error_with_line (pfile
, CPP_DL_WARNING
,
346 pfile
->line_table
->highest_line
, CPP_BUF_COL (buffer
),
347 "\"/*\" within comment");
353 buffer
->cur
= cur
- 1;
354 _cpp_process_line_notes (pfile
, true);
355 if (buffer
->next_line
>= buffer
->rlimit
)
357 _cpp_clean_line (pfile
);
359 cols
= buffer
->next_line
- buffer
->line_base
;
360 CPP_INCREMENT_LINE (pfile
, cols
);
367 _cpp_process_line_notes (pfile
, true);
371 /* Skip a C++ line comment, leaving buffer->cur pointing to the
372 terminating newline. Handles escaped newlines. Returns nonzero
373 if a multiline comment. */
375 skip_line_comment (cpp_reader
*pfile
)
377 cpp_buffer
*buffer
= pfile
->buffer
;
378 unsigned int orig_line
= pfile
->line_table
->highest_line
;
380 while (*buffer
->cur
!= '\n')
383 _cpp_process_line_notes (pfile
, true);
384 return orig_line
!= pfile
->line_table
->highest_line
;
387 /* Skips whitespace, saving the next non-whitespace character. */
389 skip_whitespace (cpp_reader
*pfile
, cppchar_t c
)
391 cpp_buffer
*buffer
= pfile
->buffer
;
392 bool saw_NUL
= false;
396 /* Horizontal space always OK. */
397 if (c
== ' ' || c
== '\t')
399 /* Just \f \v or \0 left. */
402 else if (pfile
->state
.in_directive
&& CPP_PEDANTIC (pfile
))
403 cpp_error_with_line (pfile
, CPP_DL_PEDWARN
, pfile
->line_table
->highest_line
,
404 CPP_BUF_COL (buffer
),
405 "%s in preprocessing directive",
406 c
== '\f' ? "form feed" : "vertical tab");
410 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
411 while (is_nvspace (c
));
414 cpp_error (pfile
, CPP_DL_WARNING
, "null character(s) ignored");
419 /* See if the characters of a number token are valid in a name (no
422 name_p (cpp_reader
*pfile
, const cpp_string
*string
)
426 for (i
= 0; i
< string
->len
; i
++)
427 if (!is_idchar (string
->text
[i
]))
433 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
434 an identifier. FIRST is TRUE if this starts an identifier. */
436 forms_identifier_p (cpp_reader
*pfile
, int first
)
438 cpp_buffer
*buffer
= pfile
->buffer
;
440 if (*buffer
->cur
== '$')
442 if (!CPP_OPTION (pfile
, dollars_in_ident
))
446 if (CPP_OPTION (pfile
, warn_dollars
) && !pfile
->state
.skipping
)
448 CPP_OPTION (pfile
, warn_dollars
) = 0;
449 cpp_error (pfile
, CPP_DL_PEDWARN
, "'$' in identifier or number");
455 /* Is this a syntactically valid UCN? */
456 if (0 && *buffer
->cur
== '\\'
457 && (buffer
->cur
[1] == 'u' || buffer
->cur
[1] == 'U'))
460 if (_cpp_valid_ucn (pfile
, &buffer
->cur
, buffer
->rlimit
, 1 + !first
))
468 /* Lex an identifier starting at BUFFER->CUR - 1. */
469 static cpp_hashnode
*
470 lex_identifier (cpp_reader
*pfile
, const uchar
*base
)
472 cpp_hashnode
*result
;
473 const uchar
*cur
, *limit
;
475 unsigned int hash
= HT_HASHSTEP (0, *base
);
477 cur
= pfile
->buffer
->cur
;
480 /* N.B. ISIDNUM does not include $. */
481 while (ISIDNUM (*cur
))
483 hash
= HT_HASHSTEP (hash
, *cur
);
487 pfile
->buffer
->cur
= cur
;
488 if (!forms_identifier_p (pfile
, false))
491 limit
= pfile
->buffer
->cur
;
494 hash
= HT_HASHSTEP (hash
, *cur
);
499 hash
= HT_HASHFINISH (hash
, len
);
501 result
= (cpp_hashnode
*)
502 ht_lookup_with_hash (pfile
->hash_table
, base
, len
, hash
, HT_ALLOC
);
504 /* Rarely, identifiers require diagnostics when lexed. */
505 if (__builtin_expect ((result
->flags
& NODE_DIAGNOSTIC
)
506 && !pfile
->state
.skipping
, 0))
508 /* It is allowed to poison the same identifier twice. */
509 if ((result
->flags
& NODE_POISONED
) && !pfile
->state
.poisoned_ok
)
510 cpp_error (pfile
, CPP_DL_ERROR
, "attempt to use poisoned \"%s\"",
513 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
514 replacement list of a variadic macro. */
515 if (result
== pfile
->spec_nodes
.n__VA_ARGS__
516 && !pfile
->state
.va_args_ok
)
517 cpp_error (pfile
, CPP_DL_PEDWARN
,
518 "__VA_ARGS__ can only appear in the expansion"
519 " of a C99 variadic macro");
525 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
527 lex_number (cpp_reader
*pfile
, cpp_string
*number
)
533 base
= pfile
->buffer
->cur
- 1;
536 cur
= pfile
->buffer
->cur
;
538 /* N.B. ISIDNUM does not include $. */
539 while (ISIDNUM (*cur
) || *cur
== '.' || VALID_SIGN (*cur
, cur
[-1]))
542 pfile
->buffer
->cur
= cur
;
544 while (forms_identifier_p (pfile
, false));
546 number
->len
= cur
- base
;
547 dest
= _cpp_unaligned_alloc (pfile
, number
->len
+ 1);
548 memcpy (dest
, base
, number
->len
);
549 dest
[number
->len
] = '\0';
553 /* Create a token of type TYPE with a literal spelling. */
555 create_literal (cpp_reader
*pfile
, cpp_token
*token
, const uchar
*base
,
556 unsigned int len
, enum cpp_ttype type
)
558 uchar
*dest
= _cpp_unaligned_alloc (pfile
, len
+ 1);
560 memcpy (dest
, base
, len
);
563 token
->val
.str
.len
= len
;
564 token
->val
.str
.text
= dest
;
567 /* Lexes a string, character constant, or angle-bracketed header file
568 name. The stored string contains the spelling, including opening
569 quote and leading any leading 'L'. It returns the type of the
570 literal, or CPP_OTHER if it was not properly terminated.
572 The spelling is NUL-terminated, but it is not guaranteed that this
573 is the first NUL since embedded NULs are preserved. */
575 lex_string (cpp_reader
*pfile
, cpp_token
*token
, const uchar
*base
)
577 bool saw_NUL
= false;
579 cppchar_t terminator
;
584 if (terminator
== 'L')
586 if (terminator
== '\"')
587 type
= *base
== 'L' ? CPP_WSTRING
: CPP_STRING
;
588 else if (terminator
== '\'')
589 type
= *base
== 'L' ? CPP_WCHAR
: CPP_CHAR
;
591 terminator
= '>', type
= CPP_HEADER_NAME
;
595 cppchar_t c
= *cur
++;
597 /* In #include-style directives, terminators are not escapable. */
598 if (c
== '\\' && !pfile
->state
.angled_headers
&& *cur
!= '\n')
600 else if (c
== terminator
)
612 if (saw_NUL
&& !pfile
->state
.skipping
)
613 cpp_error (pfile
, CPP_DL_WARNING
,
614 "null character(s) preserved in literal");
616 pfile
->buffer
->cur
= cur
;
617 create_literal (pfile
, token
, base
, cur
- base
, type
);
620 /* The stored comment includes the comment start and any terminator. */
622 save_comment (cpp_reader
*pfile
, cpp_token
*token
, const unsigned char *from
,
625 unsigned char *buffer
;
626 unsigned int len
, clen
;
628 len
= pfile
->buffer
->cur
- from
+ 1; /* + 1 for the initial '/'. */
630 /* C++ comments probably (not definitely) have moved past a new
631 line, which we don't want to save in the comment. */
632 if (is_vspace (pfile
->buffer
->cur
[-1]))
635 /* If we are currently in a directive, then we need to store all
636 C++ comments as C comments internally, and so we need to
637 allocate a little extra space in that case.
639 Note that the only time we encounter a directive here is
640 when we are saving comments in a "#define". */
641 clen
= (pfile
->state
.in_directive
&& type
== '/') ? len
+ 2 : len
;
643 buffer
= _cpp_unaligned_alloc (pfile
, clen
);
645 token
->type
= CPP_COMMENT
;
646 token
->val
.str
.len
= clen
;
647 token
->val
.str
.text
= buffer
;
650 memcpy (buffer
+ 1, from
, len
- 1);
652 /* Finish conversion to a C comment, if necessary. */
653 if (pfile
->state
.in_directive
&& type
== '/')
656 buffer
[clen
- 2] = '*';
657 buffer
[clen
- 1] = '/';
661 /* Allocate COUNT tokens for RUN. */
663 _cpp_init_tokenrun (tokenrun
*run
, unsigned int count
)
665 run
->base
= XNEWVEC (cpp_token
, count
);
666 run
->limit
= run
->base
+ count
;
670 /* Returns the next tokenrun, or creates one if there is none. */
672 next_tokenrun (tokenrun
*run
)
674 if (run
->next
== NULL
)
676 run
->next
= XNEW (tokenrun
);
677 run
->next
->prev
= run
;
678 _cpp_init_tokenrun (run
->next
, 250);
684 /* Allocate a single token that is invalidated at the same time as the
685 rest of the tokens on the line. Has its line and col set to the
686 same as the last lexed token, so that diagnostics appear in the
689 _cpp_temp_token (cpp_reader
*pfile
)
691 cpp_token
*old
, *result
;
693 old
= pfile
->cur_token
- 1;
694 if (pfile
->cur_token
== pfile
->cur_run
->limit
)
696 pfile
->cur_run
= next_tokenrun (pfile
->cur_run
);
697 pfile
->cur_token
= pfile
->cur_run
->base
;
700 result
= pfile
->cur_token
++;
701 result
->src_loc
= old
->src_loc
;
705 /* Lex a token into RESULT (external interface). Takes care of issues
706 like directive handling, token lookahead, multiple include
707 optimization and skipping. */
709 _cpp_lex_token (cpp_reader
*pfile
)
715 if (pfile
->cur_token
== pfile
->cur_run
->limit
)
717 pfile
->cur_run
= next_tokenrun (pfile
->cur_run
);
718 pfile
->cur_token
= pfile
->cur_run
->base
;
721 if (pfile
->lookaheads
)
724 result
= pfile
->cur_token
++;
727 result
= _cpp_lex_direct (pfile
);
729 if (result
->flags
& BOL
)
731 /* Is this a directive. If _cpp_handle_directive returns
732 false, it is an assembler #. */
733 if (result
->type
== CPP_HASH
734 /* 6.10.3 p 11: Directives in a list of macro arguments
735 gives undefined behavior. This implementation
736 handles the directive as normal. */
737 && pfile
->state
.parsing_args
!= 1
738 && _cpp_handle_directive (pfile
, result
->flags
& PREV_WHITE
))
740 if (pfile
->cb
.line_change
&& !pfile
->state
.skipping
)
741 pfile
->cb
.line_change (pfile
, result
, pfile
->state
.parsing_args
);
744 /* We don't skip tokens in directives. */
745 if (pfile
->state
.in_directive
)
748 /* Outside a directive, invalidate controlling macros. At file
749 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
750 get here and MI optimization works. */
751 pfile
->mi_valid
= false;
753 if (!pfile
->state
.skipping
|| result
->type
== CPP_EOF
)
760 /* Returns true if a fresh line has been loaded. */
762 _cpp_get_fresh_line (cpp_reader
*pfile
)
766 /* We can't get a new line until we leave the current directive. */
767 if (pfile
->state
.in_directive
)
772 cpp_buffer
*buffer
= pfile
->buffer
;
774 if (!buffer
->need_line
)
777 if (buffer
->next_line
< buffer
->rlimit
)
779 _cpp_clean_line (pfile
);
783 /* First, get out of parsing arguments state. */
784 if (pfile
->state
.parsing_args
)
787 /* End of buffer. Non-empty files should end in a newline. */
788 if (buffer
->buf
!= buffer
->rlimit
789 && buffer
->next_line
> buffer
->rlimit
790 && !buffer
->from_stage3
)
792 /* Only warn once. */
793 buffer
->next_line
= buffer
->rlimit
;
794 cpp_error_with_line (pfile
, CPP_DL_PEDWARN
, pfile
->line_table
->highest_line
,
795 CPP_BUF_COLUMN (buffer
, buffer
->cur
),
796 "no newline at end of file");
799 return_at_eof
= buffer
->return_at_eof
;
800 _cpp_pop_buffer (pfile
);
801 if (pfile
->buffer
== NULL
|| return_at_eof
)
806 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
809 result->type = ELSE_TYPE; \
810 if (*buffer->cur == CHAR) \
811 buffer->cur++, result->type = THEN_TYPE; \
815 /* Lex a token into pfile->cur_token, which is also incremented, to
816 get diagnostics pointing to the correct location.
818 Does not handle issues such as token lookahead, multiple-include
819 optimization, directives, skipping etc. This function is only
820 suitable for use by _cpp_lex_token, and in special cases like
821 lex_expansion_token which doesn't care for any of these issues.
823 When meeting a newline, returns CPP_EOF if parsing a directive,
824 otherwise returns to the start of the token buffer if permissible.
825 Returns the location of the lexed token. */
827 _cpp_lex_direct (cpp_reader
*pfile
)
831 const unsigned char *comment_start
;
832 cpp_token
*result
= pfile
->cur_token
++;
836 buffer
= pfile
->buffer
;
837 if (buffer
->need_line
)
839 if (!_cpp_get_fresh_line (pfile
))
841 result
->type
= CPP_EOF
;
842 if (!pfile
->state
.in_directive
)
844 /* Tell the compiler the line number of the EOF token. */
845 result
->src_loc
= pfile
->line_table
->highest_line
;
850 if (!pfile
->keep_tokens
)
852 pfile
->cur_run
= &pfile
->base_run
;
853 result
= pfile
->base_run
.base
;
854 pfile
->cur_token
= result
+ 1;
857 if (pfile
->state
.parsing_args
== 2)
858 result
->flags
|= PREV_WHITE
;
860 buffer
= pfile
->buffer
;
862 result
->src_loc
= pfile
->line_table
->highest_line
;
865 if (buffer
->cur
>= buffer
->notes
[buffer
->cur_note
].pos
866 && !pfile
->overlaid_buffer
)
868 _cpp_process_line_notes (pfile
, false);
869 result
->src_loc
= pfile
->line_table
->highest_line
;
873 LINEMAP_POSITION_FOR_COLUMN (result
->src_loc
, pfile
->line_table
,
874 CPP_BUF_COLUMN (buffer
, buffer
->cur
));
878 case ' ': case '\t': case '\f': case '\v': case '\0':
879 result
->flags
|= PREV_WHITE
;
880 skip_whitespace (pfile
, c
);
884 if (buffer
->cur
< buffer
->rlimit
)
885 CPP_INCREMENT_LINE (pfile
, 0);
886 buffer
->need_line
= true;
889 case '0': case '1': case '2': case '3': case '4':
890 case '5': case '6': case '7': case '8': case '9':
891 result
->type
= CPP_NUMBER
;
892 lex_number (pfile
, &result
->val
.str
);
896 /* 'L' may introduce wide characters or strings. */
897 if (*buffer
->cur
== '\'' || *buffer
->cur
== '"')
899 lex_string (pfile
, result
, buffer
->cur
- 1);
905 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
906 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
907 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
908 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
910 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
911 case 'G': case 'H': case 'I': case 'J': case 'K':
912 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
913 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
915 result
->type
= CPP_NAME
;
916 result
->val
.node
= lex_identifier (pfile
, buffer
->cur
- 1);
918 /* Convert named operators to their proper types. */
919 if (result
->val
.node
->flags
& NODE_OPERATOR
)
921 result
->flags
|= NAMED_OP
;
922 result
->type
= result
->val
.node
->directive_index
;
928 lex_string (pfile
, result
, buffer
->cur
- 1);
932 /* A potential block or line comment. */
933 comment_start
= buffer
->cur
;
938 if (_cpp_skip_block_comment (pfile
))
939 cpp_error (pfile
, CPP_DL_ERROR
, "unterminated comment");
941 else if (c
== '/' && (CPP_OPTION (pfile
, cplusplus_comments
)
942 || cpp_in_system_header (pfile
)))
944 /* Warn about comments only if pedantically GNUC89, and not
945 in system headers. */
946 if (CPP_OPTION (pfile
, lang
) == CLK_GNUC89
&& CPP_PEDANTIC (pfile
)
947 && ! buffer
->warned_cplusplus_comments
)
949 cpp_error (pfile
, CPP_DL_PEDWARN
,
950 "C++ style comments are not allowed in ISO C90");
951 cpp_error (pfile
, CPP_DL_PEDWARN
,
952 "(this will be reported only once per input file)");
953 buffer
->warned_cplusplus_comments
= 1;
956 if (skip_line_comment (pfile
) && CPP_OPTION (pfile
, warn_comments
))
957 cpp_error (pfile
, CPP_DL_WARNING
, "multi-line comment");
962 result
->type
= CPP_DIV_EQ
;
967 result
->type
= CPP_DIV
;
971 if (!pfile
->state
.save_comments
)
973 result
->flags
|= PREV_WHITE
;
974 goto update_tokens_line
;
977 /* Save the comment as a token in its own right. */
978 save_comment (pfile
, result
, comment_start
, c
);
982 if (pfile
->state
.angled_headers
)
984 lex_string (pfile
, result
, buffer
->cur
- 1);
988 result
->type
= CPP_LESS
;
989 if (*buffer
->cur
== '=')
990 buffer
->cur
++, result
->type
= CPP_LESS_EQ
;
991 else if (*buffer
->cur
== '<')
994 IF_NEXT_IS ('=', CPP_LSHIFT_EQ
, CPP_LSHIFT
);
996 else if (*buffer
->cur
== '?' && CPP_OPTION (pfile
, cplusplus
))
999 IF_NEXT_IS ('=', CPP_MIN_EQ
, CPP_MIN
);
1001 else if (CPP_OPTION (pfile
, digraphs
))
1003 if (*buffer
->cur
== ':')
1006 result
->flags
|= DIGRAPH
;
1007 result
->type
= CPP_OPEN_SQUARE
;
1009 else if (*buffer
->cur
== '%')
1012 result
->flags
|= DIGRAPH
;
1013 result
->type
= CPP_OPEN_BRACE
;
1019 result
->type
= CPP_GREATER
;
1020 if (*buffer
->cur
== '=')
1021 buffer
->cur
++, result
->type
= CPP_GREATER_EQ
;
1022 else if (*buffer
->cur
== '>')
1025 IF_NEXT_IS ('=', CPP_RSHIFT_EQ
, CPP_RSHIFT
);
1027 else if (*buffer
->cur
== '?' && CPP_OPTION (pfile
, cplusplus
))
1030 IF_NEXT_IS ('=', CPP_MAX_EQ
, CPP_MAX
);
1035 result
->type
= CPP_MOD
;
1036 if (*buffer
->cur
== '=')
1037 buffer
->cur
++, result
->type
= CPP_MOD_EQ
;
1038 else if (CPP_OPTION (pfile
, digraphs
))
1040 if (*buffer
->cur
== ':')
1043 result
->flags
|= DIGRAPH
;
1044 result
->type
= CPP_HASH
;
1045 if (*buffer
->cur
== '%' && buffer
->cur
[1] == ':')
1046 buffer
->cur
+= 2, result
->type
= CPP_PASTE
;
1048 else if (*buffer
->cur
== '>')
1051 result
->flags
|= DIGRAPH
;
1052 result
->type
= CPP_CLOSE_BRACE
;
1058 result
->type
= CPP_DOT
;
1059 if (ISDIGIT (*buffer
->cur
))
1061 result
->type
= CPP_NUMBER
;
1062 lex_number (pfile
, &result
->val
.str
);
1064 else if (*buffer
->cur
== '.' && buffer
->cur
[1] == '.')
1065 buffer
->cur
+= 2, result
->type
= CPP_ELLIPSIS
;
1066 else if (*buffer
->cur
== '*' && CPP_OPTION (pfile
, cplusplus
))
1067 buffer
->cur
++, result
->type
= CPP_DOT_STAR
;
1071 result
->type
= CPP_PLUS
;
1072 if (*buffer
->cur
== '+')
1073 buffer
->cur
++, result
->type
= CPP_PLUS_PLUS
;
1074 else if (*buffer
->cur
== '=')
1075 buffer
->cur
++, result
->type
= CPP_PLUS_EQ
;
1079 result
->type
= CPP_MINUS
;
1080 if (*buffer
->cur
== '>')
1083 result
->type
= CPP_DEREF
;
1084 if (*buffer
->cur
== '*' && CPP_OPTION (pfile
, cplusplus
))
1085 buffer
->cur
++, result
->type
= CPP_DEREF_STAR
;
1087 else if (*buffer
->cur
== '-')
1088 buffer
->cur
++, result
->type
= CPP_MINUS_MINUS
;
1089 else if (*buffer
->cur
== '=')
1090 buffer
->cur
++, result
->type
= CPP_MINUS_EQ
;
1094 result
->type
= CPP_AND
;
1095 if (*buffer
->cur
== '&')
1096 buffer
->cur
++, result
->type
= CPP_AND_AND
;
1097 else if (*buffer
->cur
== '=')
1098 buffer
->cur
++, result
->type
= CPP_AND_EQ
;
1102 result
->type
= CPP_OR
;
1103 if (*buffer
->cur
== '|')
1104 buffer
->cur
++, result
->type
= CPP_OR_OR
;
1105 else if (*buffer
->cur
== '=')
1106 buffer
->cur
++, result
->type
= CPP_OR_EQ
;
1110 result
->type
= CPP_COLON
;
1111 if (*buffer
->cur
== ':' && CPP_OPTION (pfile
, cplusplus
))
1112 buffer
->cur
++, result
->type
= CPP_SCOPE
;
1113 else if (*buffer
->cur
== '>' && CPP_OPTION (pfile
, digraphs
))
1116 result
->flags
|= DIGRAPH
;
1117 result
->type
= CPP_CLOSE_SQUARE
;
1121 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ
, CPP_MULT
); break;
1122 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ
, CPP_EQ
); break;
1123 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ
, CPP_NOT
); break;
1124 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ
, CPP_XOR
); break;
1125 case '#': IF_NEXT_IS ('#', CPP_PASTE
, CPP_HASH
); break;
1127 case '?': result
->type
= CPP_QUERY
; break;
1128 case '~': result
->type
= CPP_COMPL
; break;
1129 case ',': result
->type
= CPP_COMMA
; break;
1130 case '(': result
->type
= CPP_OPEN_PAREN
; break;
1131 case ')': result
->type
= CPP_CLOSE_PAREN
; break;
1132 case '[': result
->type
= CPP_OPEN_SQUARE
; break;
1133 case ']': result
->type
= CPP_CLOSE_SQUARE
; break;
1134 case '{': result
->type
= CPP_OPEN_BRACE
; break;
1135 case '}': result
->type
= CPP_CLOSE_BRACE
; break;
1136 case ';': result
->type
= CPP_SEMICOLON
; break;
1138 /* @ is a punctuator in Objective-C. */
1139 case '@': result
->type
= CPP_ATSIGN
; break;
1144 const uchar
*base
= --buffer
->cur
;
1146 if (forms_identifier_p (pfile
, true))
1148 result
->type
= CPP_NAME
;
1149 result
->val
.node
= lex_identifier (pfile
, base
);
1156 create_literal (pfile
, result
, buffer
->cur
- 1, 1, CPP_OTHER
);
1163 /* An upper bound on the number of bytes needed to spell TOKEN.
1164 Does not include preceding whitespace. */
1166 cpp_token_len (const cpp_token
*token
)
1170 switch (TOKEN_SPELL (token
))
1172 default: len
= 4; break;
1173 case SPELL_LITERAL
: len
= token
->val
.str
.len
; break;
1174 case SPELL_IDENT
: len
= NODE_LEN (token
->val
.node
); break;
1180 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1181 already contain the enough space to hold the token's spelling.
1182 Returns a pointer to the character after the last character written.
1183 FIXME: Would be nice if we didn't need the PFILE argument. */
1185 cpp_spell_token (cpp_reader
*pfile
, const cpp_token
*token
,
1186 unsigned char *buffer
)
1188 switch (TOKEN_SPELL (token
))
1190 case SPELL_OPERATOR
:
1192 const unsigned char *spelling
;
1195 if (token
->flags
& DIGRAPH
)
1197 = digraph_spellings
[(int) token
->type
- (int) CPP_FIRST_DIGRAPH
];
1198 else if (token
->flags
& NAMED_OP
)
1201 spelling
= TOKEN_NAME (token
);
1203 while ((c
= *spelling
++) != '\0')
1210 memcpy (buffer
, NODE_NAME (token
->val
.node
), NODE_LEN (token
->val
.node
));
1211 buffer
+= NODE_LEN (token
->val
.node
);
1215 memcpy (buffer
, token
->val
.str
.text
, token
->val
.str
.len
);
1216 buffer
+= token
->val
.str
.len
;
1220 cpp_error (pfile
, CPP_DL_ICE
,
1221 "unspellable token %s", TOKEN_NAME (token
));
1228 /* Returns TOKEN spelt as a null-terminated string. The string is
1229 freed when the reader is destroyed. Useful for diagnostics. */
1231 cpp_token_as_text (cpp_reader
*pfile
, const cpp_token
*token
)
1233 unsigned int len
= cpp_token_len (token
) + 1;
1234 unsigned char *start
= _cpp_unaligned_alloc (pfile
, len
), *end
;
1236 end
= cpp_spell_token (pfile
, token
, start
);
1242 /* Used by C front ends, which really should move to using
1243 cpp_token_as_text. */
1245 cpp_type2name (enum cpp_ttype type
)
1247 return (const char *) token_spellings
[type
].name
;
1250 /* Writes the spelling of token to FP, without any preceding space.
1251 Separated from cpp_spell_token for efficiency - to avoid stdio
1252 double-buffering. */
1254 cpp_output_token (const cpp_token
*token
, FILE *fp
)
1256 switch (TOKEN_SPELL (token
))
1258 case SPELL_OPERATOR
:
1260 const unsigned char *spelling
;
1263 if (token
->flags
& DIGRAPH
)
1265 = digraph_spellings
[(int) token
->type
- (int) CPP_FIRST_DIGRAPH
];
1266 else if (token
->flags
& NAMED_OP
)
1269 spelling
= TOKEN_NAME (token
);
1274 while ((c
= *++spelling
) != '\0');
1280 fwrite (NODE_NAME (token
->val
.node
), 1, NODE_LEN (token
->val
.node
), fp
);
1284 fwrite (token
->val
.str
.text
, 1, token
->val
.str
.len
, fp
);
1288 /* An error, most probably. */
1293 /* Compare two tokens. */
1295 _cpp_equiv_tokens (const cpp_token
*a
, const cpp_token
*b
)
1297 if (a
->type
== b
->type
&& a
->flags
== b
->flags
)
1298 switch (TOKEN_SPELL (a
))
1300 default: /* Keep compiler happy. */
1301 case SPELL_OPERATOR
:
1304 return (a
->type
!= CPP_MACRO_ARG
|| a
->val
.arg_no
== b
->val
.arg_no
);
1306 return a
->val
.node
== b
->val
.node
;
1308 return (a
->val
.str
.len
== b
->val
.str
.len
1309 && !memcmp (a
->val
.str
.text
, b
->val
.str
.text
,
1316 /* Returns nonzero if a space should be inserted to avoid an
1317 accidental token paste for output. For simplicity, it is
1318 conservative, and occasionally advises a space where one is not
1319 needed, e.g. "." and ".2". */
1321 cpp_avoid_paste (cpp_reader
*pfile
, const cpp_token
*token1
,
1322 const cpp_token
*token2
)
1324 enum cpp_ttype a
= token1
->type
, b
= token2
->type
;
1327 if (token1
->flags
& NAMED_OP
)
1329 if (token2
->flags
& NAMED_OP
)
1333 if (token2
->flags
& DIGRAPH
)
1334 c
= digraph_spellings
[(int) b
- (int) CPP_FIRST_DIGRAPH
][0];
1335 else if (token_spellings
[b
].category
== SPELL_OPERATOR
)
1336 c
= token_spellings
[b
].name
[0];
1338 /* Quickly get everything that can paste with an '='. */
1339 if ((int) a
<= (int) CPP_LAST_EQ
&& c
== '=')
1344 case CPP_GREATER
: return c
== '>' || c
== '?';
1345 case CPP_LESS
: return c
== '<' || c
== '?' || c
== '%' || c
== ':';
1346 case CPP_PLUS
: return c
== '+';
1347 case CPP_MINUS
: return c
== '-' || c
== '>';
1348 case CPP_DIV
: return c
== '/' || c
== '*'; /* Comments. */
1349 case CPP_MOD
: return c
== ':' || c
== '>';
1350 case CPP_AND
: return c
== '&';
1351 case CPP_OR
: return c
== '|';
1352 case CPP_COLON
: return c
== ':' || c
== '>';
1353 case CPP_DEREF
: return c
== '*';
1354 case CPP_DOT
: return c
== '.' || c
== '%' || b
== CPP_NUMBER
;
1355 case CPP_HASH
: return c
== '#' || c
== '%'; /* Digraph form. */
1356 case CPP_NAME
: return ((b
== CPP_NUMBER
1357 && name_p (pfile
, &token2
->val
.str
))
1359 || b
== CPP_CHAR
|| b
== CPP_STRING
); /* L */
1360 case CPP_NUMBER
: return (b
== CPP_NUMBER
|| b
== CPP_NAME
1361 || c
== '.' || c
== '+' || c
== '-');
1363 case CPP_OTHER
: return ((token1
->val
.str
.text
[0] == '\\'
1365 || (CPP_OPTION (pfile
, objc
)
1366 && token1
->val
.str
.text
[0] == '@'
1367 && (b
== CPP_NAME
|| b
== CPP_STRING
)));
1374 /* Output all the remaining tokens on the current line, and a newline
1375 character, to FP. Leading whitespace is removed. If there are
1376 macros, special token padding is not performed. */
1378 cpp_output_line (cpp_reader
*pfile
, FILE *fp
)
1380 const cpp_token
*token
;
1382 token
= cpp_get_token (pfile
);
1383 while (token
->type
!= CPP_EOF
)
1385 cpp_output_token (token
, fp
);
1386 token
= cpp_get_token (pfile
);
1387 if (token
->flags
& PREV_WHITE
)
1394 /* Memory buffers. Changing these three constants can have a dramatic
1395 effect on performance. The values here are reasonable defaults,
1396 but might be tuned. If you adjust them, be sure to test across a
1397 range of uses of cpplib, including heavy nested function-like macro
1398 expansion. Also check the change in peak memory usage (NJAMD is a
1399 good tool for this). */
1400 #define MIN_BUFF_SIZE 8000
1401 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1402 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1403 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1405 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1406 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1409 /* Create a new allocation buffer. Place the control block at the end
1410 of the buffer, so that buffer overflows will cause immediate chaos. */
1412 new_buff (size_t len
)
1415 unsigned char *base
;
1417 if (len
< MIN_BUFF_SIZE
)
1418 len
= MIN_BUFF_SIZE
;
1419 len
= CPP_ALIGN (len
);
1421 base
= xmalloc (len
+ sizeof (_cpp_buff
));
1422 result
= (_cpp_buff
*) (base
+ len
);
1423 result
->base
= base
;
1425 result
->limit
= base
+ len
;
1426 result
->next
= NULL
;
1430 /* Place a chain of unwanted allocation buffers on the free list. */
1432 _cpp_release_buff (cpp_reader
*pfile
, _cpp_buff
*buff
)
1434 _cpp_buff
*end
= buff
;
1438 end
->next
= pfile
->free_buffs
;
1439 pfile
->free_buffs
= buff
;
1442 /* Return a free buffer of size at least MIN_SIZE. */
1444 _cpp_get_buff (cpp_reader
*pfile
, size_t min_size
)
1446 _cpp_buff
*result
, **p
;
1448 for (p
= &pfile
->free_buffs
;; p
= &(*p
)->next
)
1453 return new_buff (min_size
);
1455 size
= result
->limit
- result
->base
;
1456 /* Return a buffer that's big enough, but don't waste one that's
1458 if (size
>= min_size
&& size
<= BUFF_SIZE_UPPER_BOUND (min_size
))
1463 result
->next
= NULL
;
1464 result
->cur
= result
->base
;
1468 /* Creates a new buffer with enough space to hold the uncommitted
1469 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1470 the excess bytes to the new buffer. Chains the new buffer after
1471 BUFF, and returns the new buffer. */
1473 _cpp_append_extend_buff (cpp_reader
*pfile
, _cpp_buff
*buff
, size_t min_extra
)
1475 size_t size
= EXTENDED_BUFF_SIZE (buff
, min_extra
);
1476 _cpp_buff
*new_buff
= _cpp_get_buff (pfile
, size
);
1478 buff
->next
= new_buff
;
1479 memcpy (new_buff
->base
, buff
->cur
, BUFF_ROOM (buff
));
1483 /* Creates a new buffer with enough space to hold the uncommitted
1484 remaining bytes of the buffer pointed to by BUFF, and at least
1485 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1486 Chains the new buffer before the buffer pointed to by BUFF, and
1487 updates the pointer to point to the new buffer. */
1489 _cpp_extend_buff (cpp_reader
*pfile
, _cpp_buff
**pbuff
, size_t min_extra
)
1491 _cpp_buff
*new_buff
, *old_buff
= *pbuff
;
1492 size_t size
= EXTENDED_BUFF_SIZE (old_buff
, min_extra
);
1494 new_buff
= _cpp_get_buff (pfile
, size
);
1495 memcpy (new_buff
->base
, old_buff
->cur
, BUFF_ROOM (old_buff
));
1496 new_buff
->next
= old_buff
;
1500 /* Free a chain of buffers starting at BUFF. */
1502 _cpp_free_buff (_cpp_buff
*buff
)
1506 for (; buff
; buff
= next
)
1513 /* Allocate permanent, unaligned storage of length LEN. */
1515 _cpp_unaligned_alloc (cpp_reader
*pfile
, size_t len
)
1517 _cpp_buff
*buff
= pfile
->u_buff
;
1518 unsigned char *result
= buff
->cur
;
1520 if (len
> (size_t) (buff
->limit
- result
))
1522 buff
= _cpp_get_buff (pfile
, len
);
1523 buff
->next
= pfile
->u_buff
;
1524 pfile
->u_buff
= buff
;
1528 buff
->cur
= result
+ len
;
1532 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1533 That buffer is used for growing allocations when saving macro
1534 replacement lists in a #define, and when parsing an answer to an
1535 assertion in #assert, #unassert or #if (and therefore possibly
1536 whilst expanding macros). It therefore must not be used by any
1537 code that they might call: specifically the lexer and the guts of
1540 All existing other uses clearly fit this restriction: storing
1541 registered pragmas during initialization. */
1543 _cpp_aligned_alloc (cpp_reader
*pfile
, size_t len
)
1545 _cpp_buff
*buff
= pfile
->a_buff
;
1546 unsigned char *result
= buff
->cur
;
1548 if (len
> (size_t) (buff
->limit
- result
))
1550 buff
= _cpp_get_buff (pfile
, len
);
1551 buff
->next
= pfile
->a_buff
;
1552 pfile
->a_buff
= buff
;
1556 buff
->cur
= result
+ len
;
1560 /* Say which field of TOK is in use. */
1562 enum cpp_token_fld_kind
1563 cpp_token_val_index (cpp_token
*tok
)
1565 switch (TOKEN_SPELL (tok
))
1568 return CPP_TOKEN_FLD_NODE
;
1570 return CPP_TOKEN_FLD_STR
;
1572 if (tok
->type
== CPP_MACRO_ARG
)
1573 return CPP_TOKEN_FLD_ARG_NO
;
1574 else if (tok
->type
== CPP_PADDING
)
1575 return CPP_TOKEN_FLD_SOURCE
;
1576 /* else fall through */
1578 return CPP_TOKEN_FLD_NONE
;