* real.c (encode_ieee_extended): Initialize whole array.
[official-gcc.git] / gcc / cpplex.c
blob3701415aa326d463105c67abadc2672c6207c30e
1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
11 later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "cpplib.h"
25 #include "cpphash.h"
27 enum spell_type
29 SPELL_OPERATOR = 0,
30 SPELL_IDENT,
31 SPELL_LITERAL,
32 SPELL_NONE
35 struct token_spelling
37 enum spell_type category;
38 const unsigned char *name;
41 static const unsigned char *const digraph_spellings[] =
42 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
44 #define OP(e, s) { SPELL_OPERATOR, U s },
45 #define TK(e, s) { s, U #e },
46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
47 #undef OP
48 #undef TK
50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54 static int skip_line_comment (cpp_reader *);
55 static void skip_whitespace (cpp_reader *, cppchar_t);
56 static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *);
57 static void lex_number (cpp_reader *, cpp_string *);
58 static bool forms_identifier_p (cpp_reader *, int);
59 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
60 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
61 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
62 unsigned int, enum cpp_ttype);
63 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
64 static int name_p (cpp_reader *, const cpp_string *);
65 static tokenrun *next_tokenrun (tokenrun *);
67 static _cpp_buff *new_buff (size_t);
70 /* Utility routine:
72 Compares, the token TOKEN to the NUL-terminated string STRING.
73 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
74 int
75 cpp_ideq (const cpp_token *token, const char *string)
77 if (token->type != CPP_NAME)
78 return 0;
80 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
83 /* Record a note TYPE at byte POS into the current cleaned logical
84 line. */
85 static void
86 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
88 if (buffer->notes_used == buffer->notes_cap)
90 buffer->notes_cap = buffer->notes_cap * 2 + 200;
91 buffer->notes = xrealloc (buffer->notes,
92 buffer->notes_cap * sizeof (_cpp_line_note));
95 buffer->notes[buffer->notes_used].pos = pos;
96 buffer->notes[buffer->notes_used].type = type;
97 buffer->notes_used++;
100 /* Returns with a logical line that contains no escaped newlines or
101 trigraphs. This is a time-critical inner loop. */
102 void
103 _cpp_clean_line (cpp_reader *pfile)
105 cpp_buffer *buffer;
106 const uchar *s;
107 uchar c, *d, *p;
109 buffer = pfile->buffer;
110 buffer->cur_note = buffer->notes_used = 0;
111 buffer->cur = buffer->line_base = buffer->next_line;
112 buffer->need_line = false;
113 s = buffer->next_line - 1;
115 if (!buffer->from_stage3)
117 /* Short circuit for the common case of an un-escaped line with
118 no trigraphs. The primary win here is by not writing any
119 data back to memory until we have to. */
120 for (;;)
122 c = *++s;
123 if (c == '\n' || c == '\r')
125 d = (uchar *) s;
127 if (s == buffer->rlimit)
128 goto done;
130 /* DOS line ending? */
131 if (c == '\r' && s[1] == '\n')
132 s++;
134 if (s == buffer->rlimit)
135 goto done;
137 /* check for escaped newline */
138 p = d;
139 while (p != buffer->next_line && is_nvspace (p[-1]))
140 p--;
141 if (p == buffer->next_line || p[-1] != '\\')
142 goto done;
144 /* Have an escaped newline; process it and proceed to
145 the slow path. */
146 add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
147 d = p - 2;
148 buffer->next_line = p - 1;
149 break;
151 if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
153 /* Have a trigraph. We may or may not have to convert
154 it. Add a line note regardless, for -Wtrigraphs. */
155 add_line_note (buffer, s, s[2]);
156 if (CPP_OPTION (pfile, trigraphs))
158 /* We do, and that means we have to switch to the
159 slow path. */
160 d = (uchar *) s;
161 *d = _cpp_trigraph_map[s[2]];
162 s += 2;
163 break;
169 for (;;)
171 c = *++s;
172 *++d = c;
174 if (c == '\n' || c == '\r')
176 /* Handle DOS line endings. */
177 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
178 s++;
179 if (s == buffer->rlimit)
180 break;
182 /* Escaped? */
183 p = d;
184 while (p != buffer->next_line && is_nvspace (p[-1]))
185 p--;
186 if (p == buffer->next_line || p[-1] != '\\')
187 break;
189 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
190 d = p - 2;
191 buffer->next_line = p - 1;
193 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
195 /* Add a note regardless, for the benefit of -Wtrigraphs. */
196 add_line_note (buffer, d, s[2]);
197 if (CPP_OPTION (pfile, trigraphs))
199 *d = _cpp_trigraph_map[s[2]];
200 s += 2;
205 else
208 s++;
209 while (*s != '\n' && *s != '\r');
210 d = (uchar *) s;
212 /* Handle DOS line endings. */
213 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
214 s++;
217 done:
218 *d = '\n';
219 /* A sentinel note that should never be processed. */
220 add_line_note (buffer, d + 1, '\n');
221 buffer->next_line = s + 1;
224 /* Return true if the trigraph indicated by NOTE should be warned
225 about in a comment. */
226 static bool
227 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
229 const uchar *p;
231 /* Within comments we don't warn about trigraphs, unless the
232 trigraph forms an escaped newline, as that may change
233 behavior. */
234 if (note->type != '/')
235 return false;
237 /* If -trigraphs, then this was an escaped newline iff the next note
238 is coincident. */
239 if (CPP_OPTION (pfile, trigraphs))
240 return note[1].pos == note->pos;
242 /* Otherwise, see if this forms an escaped newline. */
243 p = note->pos + 3;
244 while (is_nvspace (*p))
245 p++;
247 /* There might have been escaped newlines between the trigraph and the
248 newline we found. Hence the position test. */
249 return (*p == '\n' && p < note[1].pos);
252 /* Process the notes created by add_line_note as far as the current
253 location. */
254 void
255 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
257 cpp_buffer *buffer = pfile->buffer;
259 for (;;)
261 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
262 unsigned int col;
264 if (note->pos > buffer->cur)
265 break;
267 buffer->cur_note++;
268 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
270 if (note->type == '\\' || note->type == ' ')
272 if (note->type == ' ' && !in_comment)
273 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
274 "backslash and newline separated by space");
276 if (buffer->next_line > buffer->rlimit)
278 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line, col,
279 "backslash-newline at end of file");
280 /* Prevent "no newline at end of file" warning. */
281 buffer->next_line = buffer->rlimit;
284 buffer->line_base = note->pos;
285 pfile->line++;
287 else if (_cpp_trigraph_map[note->type])
289 if (CPP_OPTION (pfile, warn_trigraphs)
290 && (!in_comment || warn_in_comment (pfile, note)))
292 if (CPP_OPTION (pfile, trigraphs))
293 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
294 "trigraph ??%c converted to %c",
295 note->type,
296 (int) _cpp_trigraph_map[note->type]);
297 else
299 cpp_error_with_line
300 (pfile, DL_WARNING, pfile->line, col,
301 "trigraph ??%c ignored, use -trigraphs to enable",
302 note->type);
306 else
307 abort ();
311 /* Skip a C-style block comment. We find the end of the comment by
312 seeing if an asterisk is before every '/' we encounter. Returns
313 nonzero if comment terminated by EOF, zero otherwise.
315 Buffer->cur points to the initial asterisk of the comment. */
316 bool
317 _cpp_skip_block_comment (cpp_reader *pfile)
319 cpp_buffer *buffer = pfile->buffer;
320 const uchar *cur = buffer->cur;
321 uchar c;
323 cur++;
324 if (*cur == '/')
325 cur++;
327 for (;;)
329 /* People like decorating comments with '*', so check for '/'
330 instead for efficiency. */
331 c = *cur++;
333 if (c == '/')
335 if (cur[-2] == '*')
336 break;
338 /* Warn about potential nested comments, but not if the '/'
339 comes immediately before the true comment delimiter.
340 Don't bother to get it right across escaped newlines. */
341 if (CPP_OPTION (pfile, warn_comments)
342 && cur[0] == '*' && cur[1] != '/')
344 buffer->cur = cur;
345 cpp_error_with_line (pfile, DL_WARNING,
346 pfile->line, CPP_BUF_COL (buffer),
347 "\"/*\" within comment");
350 else if (c == '\n')
352 buffer->cur = cur - 1;
353 _cpp_process_line_notes (pfile, true);
354 if (buffer->next_line >= buffer->rlimit)
355 return true;
356 _cpp_clean_line (pfile);
357 pfile->line++;
358 cur = buffer->cur;
362 buffer->cur = cur;
363 _cpp_process_line_notes (pfile, true);
364 return false;
367 /* Skip a C++ line comment, leaving buffer->cur pointing to the
368 terminating newline. Handles escaped newlines. Returns nonzero
369 if a multiline comment. */
370 static int
371 skip_line_comment (cpp_reader *pfile)
373 cpp_buffer *buffer = pfile->buffer;
374 unsigned int orig_line = pfile->line;
376 while (*buffer->cur != '\n')
377 buffer->cur++;
379 _cpp_process_line_notes (pfile, true);
380 return orig_line != pfile->line;
383 /* Skips whitespace, saving the next non-whitespace character. */
384 static void
385 skip_whitespace (cpp_reader *pfile, cppchar_t c)
387 cpp_buffer *buffer = pfile->buffer;
388 bool saw_NUL = false;
392 /* Horizontal space always OK. */
393 if (c == ' ' || c == '\t')
395 /* Just \f \v or \0 left. */
396 else if (c == '\0')
397 saw_NUL = true;
398 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
399 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
400 CPP_BUF_COL (buffer),
401 "%s in preprocessing directive",
402 c == '\f' ? "form feed" : "vertical tab");
404 c = *buffer->cur++;
406 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
407 while (is_nvspace (c));
409 if (saw_NUL)
410 cpp_error (pfile, DL_WARNING, "null character(s) ignored");
412 buffer->cur--;
415 /* See if the characters of a number token are valid in a name (no
416 '.', '+' or '-'). */
417 static int
418 name_p (cpp_reader *pfile, const cpp_string *string)
420 unsigned int i;
422 for (i = 0; i < string->len; i++)
423 if (!is_idchar (string->text[i]))
424 return 0;
426 return 1;
429 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
430 an identifier. FIRST is TRUE if this starts an identifier. */
431 static bool
432 forms_identifier_p (cpp_reader *pfile, int first)
434 cpp_buffer *buffer = pfile->buffer;
436 if (*buffer->cur == '$')
438 if (!CPP_OPTION (pfile, dollars_in_ident))
439 return false;
441 buffer->cur++;
442 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
444 CPP_OPTION (pfile, warn_dollars) = 0;
445 cpp_error (pfile, DL_PEDWARN, "'$' in identifier or number");
448 return true;
451 /* Is this a syntactically valid UCN? */
452 if (0 && *buffer->cur == '\\'
453 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
455 buffer->cur += 2;
456 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first))
457 return true;
458 buffer->cur -= 2;
461 return false;
464 /* Lex an identifier starting at BUFFER->CUR - 1. */
465 static cpp_hashnode *
466 lex_identifier (cpp_reader *pfile, const uchar *base)
468 cpp_hashnode *result;
469 const uchar *cur;
473 cur = pfile->buffer->cur;
475 /* N.B. ISIDNUM does not include $. */
476 while (ISIDNUM (*cur))
477 cur++;
479 pfile->buffer->cur = cur;
481 while (forms_identifier_p (pfile, false));
483 result = (cpp_hashnode *)
484 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
486 /* Rarely, identifiers require diagnostics when lexed. */
487 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
488 && !pfile->state.skipping, 0))
490 /* It is allowed to poison the same identifier twice. */
491 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
492 cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
493 NODE_NAME (result));
495 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
496 replacement list of a variadic macro. */
497 if (result == pfile->spec_nodes.n__VA_ARGS__
498 && !pfile->state.va_args_ok)
499 cpp_error (pfile, DL_PEDWARN,
500 "__VA_ARGS__ can only appear in the expansion"
501 " of a C99 variadic macro");
504 return result;
507 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
508 static void
509 lex_number (cpp_reader *pfile, cpp_string *number)
511 const uchar *cur;
512 const uchar *base;
513 uchar *dest;
515 base = pfile->buffer->cur - 1;
518 cur = pfile->buffer->cur;
520 /* N.B. ISIDNUM does not include $. */
521 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
522 cur++;
524 pfile->buffer->cur = cur;
526 while (forms_identifier_p (pfile, false));
528 number->len = cur - base;
529 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
530 memcpy (dest, base, number->len);
531 dest[number->len] = '\0';
532 number->text = dest;
535 /* Create a token of type TYPE with a literal spelling. */
536 static void
537 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
538 unsigned int len, enum cpp_ttype type)
540 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
542 memcpy (dest, base, len);
543 dest[len] = '\0';
544 token->type = type;
545 token->val.str.len = len;
546 token->val.str.text = dest;
549 /* Lexes a string, character constant, or angle-bracketed header file
550 name. The stored string contains the spelling, including opening
551 quote and leading any leading 'L'. It returns the type of the
552 literal, or CPP_OTHER if it was not properly terminated.
554 The spelling is NUL-terminated, but it is not guaranteed that this
555 is the first NUL since embedded NULs are preserved. */
556 static void
557 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
559 bool saw_NUL = false;
560 const uchar *cur;
561 cppchar_t terminator;
562 enum cpp_ttype type;
564 cur = base;
565 terminator = *cur++;
566 if (terminator == 'L')
567 terminator = *cur++;
568 if (terminator == '\"')
569 type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
570 else if (terminator == '\'')
571 type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
572 else
573 terminator = '>', type = CPP_HEADER_NAME;
575 for (;;)
577 cppchar_t c = *cur++;
579 /* In #include-style directives, terminators are not escapable. */
580 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
581 cur++;
582 else if (c == terminator)
583 break;
584 else if (c == '\n')
586 cur--;
587 type = CPP_OTHER;
588 break;
590 else if (c == '\0')
591 saw_NUL = true;
594 if (saw_NUL && !pfile->state.skipping)
595 cpp_error (pfile, DL_WARNING, "null character(s) preserved in literal");
597 pfile->buffer->cur = cur;
598 create_literal (pfile, token, base, cur - base, type);
601 /* The stored comment includes the comment start and any terminator. */
602 static void
603 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
604 cppchar_t type)
606 unsigned char *buffer;
607 unsigned int len, clen;
609 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
611 /* C++ comments probably (not definitely) have moved past a new
612 line, which we don't want to save in the comment. */
613 if (is_vspace (pfile->buffer->cur[-1]))
614 len--;
616 /* If we are currently in a directive, then we need to store all
617 C++ comments as C comments internally, and so we need to
618 allocate a little extra space in that case.
620 Note that the only time we encounter a directive here is
621 when we are saving comments in a "#define". */
622 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
624 buffer = _cpp_unaligned_alloc (pfile, clen);
626 token->type = CPP_COMMENT;
627 token->val.str.len = clen;
628 token->val.str.text = buffer;
630 buffer[0] = '/';
631 memcpy (buffer + 1, from, len - 1);
633 /* Finish conversion to a C comment, if necessary. */
634 if (pfile->state.in_directive && type == '/')
636 buffer[1] = '*';
637 buffer[clen - 2] = '*';
638 buffer[clen - 1] = '/';
642 /* Allocate COUNT tokens for RUN. */
643 void
644 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
646 run->base = xnewvec (cpp_token, count);
647 run->limit = run->base + count;
648 run->next = NULL;
651 /* Returns the next tokenrun, or creates one if there is none. */
652 static tokenrun *
653 next_tokenrun (tokenrun *run)
655 if (run->next == NULL)
657 run->next = xnew (tokenrun);
658 run->next->prev = run;
659 _cpp_init_tokenrun (run->next, 250);
662 return run->next;
665 /* Allocate a single token that is invalidated at the same time as the
666 rest of the tokens on the line. Has its line and col set to the
667 same as the last lexed token, so that diagnostics appear in the
668 right place. */
669 cpp_token *
670 _cpp_temp_token (cpp_reader *pfile)
672 cpp_token *old, *result;
674 old = pfile->cur_token - 1;
675 if (pfile->cur_token == pfile->cur_run->limit)
677 pfile->cur_run = next_tokenrun (pfile->cur_run);
678 pfile->cur_token = pfile->cur_run->base;
681 result = pfile->cur_token++;
682 result->line = old->line;
683 result->col = old->col;
684 return result;
687 /* Lex a token into RESULT (external interface). Takes care of issues
688 like directive handling, token lookahead, multiple include
689 optimization and skipping. */
690 const cpp_token *
691 _cpp_lex_token (cpp_reader *pfile)
693 cpp_token *result;
695 for (;;)
697 if (pfile->cur_token == pfile->cur_run->limit)
699 pfile->cur_run = next_tokenrun (pfile->cur_run);
700 pfile->cur_token = pfile->cur_run->base;
703 if (pfile->lookaheads)
705 pfile->lookaheads--;
706 result = pfile->cur_token++;
708 else
709 result = _cpp_lex_direct (pfile);
711 if (result->flags & BOL)
713 /* Is this a directive. If _cpp_handle_directive returns
714 false, it is an assembler #. */
715 if (result->type == CPP_HASH
716 /* 6.10.3 p 11: Directives in a list of macro arguments
717 gives undefined behavior. This implementation
718 handles the directive as normal. */
719 && pfile->state.parsing_args != 1
720 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
721 continue;
722 if (pfile->cb.line_change && !pfile->state.skipping)
723 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
726 /* We don't skip tokens in directives. */
727 if (pfile->state.in_directive)
728 break;
730 /* Outside a directive, invalidate controlling macros. At file
731 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
732 get here and MI optimization works. */
733 pfile->mi_valid = false;
735 if (!pfile->state.skipping || result->type == CPP_EOF)
736 break;
739 return result;
742 /* Returns true if a fresh line has been loaded. */
743 bool
744 _cpp_get_fresh_line (cpp_reader *pfile)
746 /* We can't get a new line until we leave the current directive. */
747 if (pfile->state.in_directive)
748 return false;
750 for (;;)
752 cpp_buffer *buffer = pfile->buffer;
754 if (!buffer->need_line)
755 return true;
757 if (buffer->next_line < buffer->rlimit)
759 _cpp_clean_line (pfile);
760 return true;
763 /* First, get out of parsing arguments state. */
764 if (pfile->state.parsing_args)
765 return false;
767 /* End of buffer. Non-empty files should end in a newline. */
768 if (buffer->buf != buffer->rlimit
769 && buffer->next_line > buffer->rlimit
770 && !buffer->from_stage3)
772 /* Only warn once. */
773 buffer->next_line = buffer->rlimit;
774 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line - 1,
775 CPP_BUF_COLUMN (buffer, buffer->cur),
776 "no newline at end of file");
779 _cpp_pop_buffer (pfile);
780 if (pfile->buffer == NULL)
781 return false;
785 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
786 do \
788 result->type = ELSE_TYPE; \
789 if (*buffer->cur == CHAR) \
790 buffer->cur++, result->type = THEN_TYPE; \
792 while (0)
794 /* Lex a token into pfile->cur_token, which is also incremented, to
795 get diagnostics pointing to the correct location.
797 Does not handle issues such as token lookahead, multiple-include
798 optimization, directives, skipping etc. This function is only
799 suitable for use by _cpp_lex_token, and in special cases like
800 lex_expansion_token which doesn't care for any of these issues.
802 When meeting a newline, returns CPP_EOF if parsing a directive,
803 otherwise returns to the start of the token buffer if permissible.
804 Returns the location of the lexed token. */
805 cpp_token *
806 _cpp_lex_direct (cpp_reader *pfile)
808 cppchar_t c;
809 cpp_buffer *buffer;
810 const unsigned char *comment_start;
811 cpp_token *result = pfile->cur_token++;
813 fresh_line:
814 result->flags = 0;
815 buffer = pfile->buffer;
816 if (buffer->need_line)
818 if (!_cpp_get_fresh_line (pfile))
820 result->type = CPP_EOF;
821 if (!pfile->state.in_directive)
823 /* Tell the compiler the line number of the EOF token. */
824 result->line = pfile->line;
825 result->flags = BOL;
827 return result;
829 if (!pfile->keep_tokens)
831 pfile->cur_run = &pfile->base_run;
832 result = pfile->base_run.base;
833 pfile->cur_token = result + 1;
835 result->flags = BOL;
836 if (pfile->state.parsing_args == 2)
837 result->flags |= PREV_WHITE;
839 buffer = pfile->buffer;
840 update_tokens_line:
841 result->line = pfile->line;
843 skipped_white:
844 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
845 && !pfile->overlaid_buffer)
847 _cpp_process_line_notes (pfile, false);
848 result->line = pfile->line;
850 c = *buffer->cur++;
851 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
853 switch (c)
855 case ' ': case '\t': case '\f': case '\v': case '\0':
856 result->flags |= PREV_WHITE;
857 skip_whitespace (pfile, c);
858 goto skipped_white;
860 case '\n':
861 pfile->line++;
862 buffer->need_line = true;
863 goto fresh_line;
865 case '0': case '1': case '2': case '3': case '4':
866 case '5': case '6': case '7': case '8': case '9':
867 result->type = CPP_NUMBER;
868 lex_number (pfile, &result->val.str);
869 break;
871 case 'L':
872 /* 'L' may introduce wide characters or strings. */
873 if (*buffer->cur == '\'' || *buffer->cur == '"')
875 lex_string (pfile, result, buffer->cur - 1);
876 break;
878 /* Fall through. */
880 case '_':
881 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
882 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
883 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
884 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
885 case 'y': case 'z':
886 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
887 case 'G': case 'H': case 'I': case 'J': case 'K':
888 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
889 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
890 case 'Y': case 'Z':
891 result->type = CPP_NAME;
892 result->val.node = lex_identifier (pfile, buffer->cur - 1);
894 /* Convert named operators to their proper types. */
895 if (result->val.node->flags & NODE_OPERATOR)
897 result->flags |= NAMED_OP;
898 result->type = result->val.node->directive_index;
900 break;
902 case '\'':
903 case '"':
904 lex_string (pfile, result, buffer->cur - 1);
905 break;
907 case '/':
908 /* A potential block or line comment. */
909 comment_start = buffer->cur;
910 c = *buffer->cur;
912 if (c == '*')
914 if (_cpp_skip_block_comment (pfile))
915 cpp_error (pfile, DL_ERROR, "unterminated comment");
917 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
918 || CPP_IN_SYSTEM_HEADER (pfile)))
920 /* Warn about comments only if pedantically GNUC89, and not
921 in system headers. */
922 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
923 && ! buffer->warned_cplusplus_comments)
925 cpp_error (pfile, DL_PEDWARN,
926 "C++ style comments are not allowed in ISO C90");
927 cpp_error (pfile, DL_PEDWARN,
928 "(this will be reported only once per input file)");
929 buffer->warned_cplusplus_comments = 1;
932 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
933 cpp_error (pfile, DL_WARNING, "multi-line comment");
935 else if (c == '=')
937 buffer->cur++;
938 result->type = CPP_DIV_EQ;
939 break;
941 else
943 result->type = CPP_DIV;
944 break;
947 if (!pfile->state.save_comments)
949 result->flags |= PREV_WHITE;
950 goto update_tokens_line;
953 /* Save the comment as a token in its own right. */
954 save_comment (pfile, result, comment_start, c);
955 break;
957 case '<':
958 if (pfile->state.angled_headers)
960 lex_string (pfile, result, buffer->cur - 1);
961 break;
964 result->type = CPP_LESS;
965 if (*buffer->cur == '=')
966 buffer->cur++, result->type = CPP_LESS_EQ;
967 else if (*buffer->cur == '<')
969 buffer->cur++;
970 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
972 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
974 buffer->cur++;
975 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
977 else if (CPP_OPTION (pfile, digraphs))
979 if (*buffer->cur == ':')
981 buffer->cur++;
982 result->flags |= DIGRAPH;
983 result->type = CPP_OPEN_SQUARE;
985 else if (*buffer->cur == '%')
987 buffer->cur++;
988 result->flags |= DIGRAPH;
989 result->type = CPP_OPEN_BRACE;
992 break;
994 case '>':
995 result->type = CPP_GREATER;
996 if (*buffer->cur == '=')
997 buffer->cur++, result->type = CPP_GREATER_EQ;
998 else if (*buffer->cur == '>')
1000 buffer->cur++;
1001 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1003 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
1005 buffer->cur++;
1006 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1008 break;
1010 case '%':
1011 result->type = CPP_MOD;
1012 if (*buffer->cur == '=')
1013 buffer->cur++, result->type = CPP_MOD_EQ;
1014 else if (CPP_OPTION (pfile, digraphs))
1016 if (*buffer->cur == ':')
1018 buffer->cur++;
1019 result->flags |= DIGRAPH;
1020 result->type = CPP_HASH;
1021 if (*buffer->cur == '%' && buffer->cur[1] == ':')
1022 buffer->cur += 2, result->type = CPP_PASTE;
1024 else if (*buffer->cur == '>')
1026 buffer->cur++;
1027 result->flags |= DIGRAPH;
1028 result->type = CPP_CLOSE_BRACE;
1031 break;
1033 case '.':
1034 result->type = CPP_DOT;
1035 if (ISDIGIT (*buffer->cur))
1037 result->type = CPP_NUMBER;
1038 lex_number (pfile, &result->val.str);
1040 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1041 buffer->cur += 2, result->type = CPP_ELLIPSIS;
1042 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1043 buffer->cur++, result->type = CPP_DOT_STAR;
1044 break;
1046 case '+':
1047 result->type = CPP_PLUS;
1048 if (*buffer->cur == '+')
1049 buffer->cur++, result->type = CPP_PLUS_PLUS;
1050 else if (*buffer->cur == '=')
1051 buffer->cur++, result->type = CPP_PLUS_EQ;
1052 break;
1054 case '-':
1055 result->type = CPP_MINUS;
1056 if (*buffer->cur == '>')
1058 buffer->cur++;
1059 result->type = CPP_DEREF;
1060 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1061 buffer->cur++, result->type = CPP_DEREF_STAR;
1063 else if (*buffer->cur == '-')
1064 buffer->cur++, result->type = CPP_MINUS_MINUS;
1065 else if (*buffer->cur == '=')
1066 buffer->cur++, result->type = CPP_MINUS_EQ;
1067 break;
1069 case '&':
1070 result->type = CPP_AND;
1071 if (*buffer->cur == '&')
1072 buffer->cur++, result->type = CPP_AND_AND;
1073 else if (*buffer->cur == '=')
1074 buffer->cur++, result->type = CPP_AND_EQ;
1075 break;
1077 case '|':
1078 result->type = CPP_OR;
1079 if (*buffer->cur == '|')
1080 buffer->cur++, result->type = CPP_OR_OR;
1081 else if (*buffer->cur == '=')
1082 buffer->cur++, result->type = CPP_OR_EQ;
1083 break;
1085 case ':':
1086 result->type = CPP_COLON;
1087 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1088 buffer->cur++, result->type = CPP_SCOPE;
1089 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1091 buffer->cur++;
1092 result->flags |= DIGRAPH;
1093 result->type = CPP_CLOSE_SQUARE;
1095 break;
1097 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1098 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1099 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1100 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1101 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1103 case '?': result->type = CPP_QUERY; break;
1104 case '~': result->type = CPP_COMPL; break;
1105 case ',': result->type = CPP_COMMA; break;
1106 case '(': result->type = CPP_OPEN_PAREN; break;
1107 case ')': result->type = CPP_CLOSE_PAREN; break;
1108 case '[': result->type = CPP_OPEN_SQUARE; break;
1109 case ']': result->type = CPP_CLOSE_SQUARE; break;
1110 case '{': result->type = CPP_OPEN_BRACE; break;
1111 case '}': result->type = CPP_CLOSE_BRACE; break;
1112 case ';': result->type = CPP_SEMICOLON; break;
1114 /* @ is a punctuator in Objective-C. */
1115 case '@': result->type = CPP_ATSIGN; break;
1117 case '$':
1118 case '\\':
1120 const uchar *base = --buffer->cur;
1122 if (forms_identifier_p (pfile, true))
1124 result->type = CPP_NAME;
1125 result->val.node = lex_identifier (pfile, base);
1126 break;
1128 buffer->cur++;
1131 default:
1132 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1133 break;
1136 return result;
1139 /* An upper bound on the number of bytes needed to spell TOKEN.
1140 Does not include preceding whitespace. */
1141 unsigned int
1142 cpp_token_len (const cpp_token *token)
1144 unsigned int len;
1146 switch (TOKEN_SPELL (token))
1148 default: len = 4; break;
1149 case SPELL_LITERAL: len = token->val.str.len; break;
1150 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
1153 return len;
1156 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1157 already contain the enough space to hold the token's spelling.
1158 Returns a pointer to the character after the last character written.
1159 FIXME: Would be nice if we didn't need the PFILE argument. */
1160 unsigned char *
1161 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1162 unsigned char *buffer)
1164 switch (TOKEN_SPELL (token))
1166 case SPELL_OPERATOR:
1168 const unsigned char *spelling;
1169 unsigned char c;
1171 if (token->flags & DIGRAPH)
1172 spelling
1173 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1174 else if (token->flags & NAMED_OP)
1175 goto spell_ident;
1176 else
1177 spelling = TOKEN_NAME (token);
1179 while ((c = *spelling++) != '\0')
1180 *buffer++ = c;
1182 break;
1184 spell_ident:
1185 case SPELL_IDENT:
1186 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1187 buffer += NODE_LEN (token->val.node);
1188 break;
1190 case SPELL_LITERAL:
1191 memcpy (buffer, token->val.str.text, token->val.str.len);
1192 buffer += token->val.str.len;
1193 break;
1195 case SPELL_NONE:
1196 cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
1197 break;
1200 return buffer;
1203 /* Returns TOKEN spelt as a null-terminated string. The string is
1204 freed when the reader is destroyed. Useful for diagnostics. */
1205 unsigned char *
1206 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1208 unsigned int len = cpp_token_len (token) + 1;
1209 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1211 end = cpp_spell_token (pfile, token, start);
1212 end[0] = '\0';
1214 return start;
1217 /* Used by C front ends, which really should move to using
1218 cpp_token_as_text. */
1219 const char *
1220 cpp_type2name (enum cpp_ttype type)
1222 return (const char *) token_spellings[type].name;
1225 /* Writes the spelling of token to FP, without any preceding space.
1226 Separated from cpp_spell_token for efficiency - to avoid stdio
1227 double-buffering. */
1228 void
1229 cpp_output_token (const cpp_token *token, FILE *fp)
1231 switch (TOKEN_SPELL (token))
1233 case SPELL_OPERATOR:
1235 const unsigned char *spelling;
1236 int c;
1238 if (token->flags & DIGRAPH)
1239 spelling
1240 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1241 else if (token->flags & NAMED_OP)
1242 goto spell_ident;
1243 else
1244 spelling = TOKEN_NAME (token);
1246 c = *spelling;
1248 putc (c, fp);
1249 while ((c = *++spelling) != '\0');
1251 break;
1253 spell_ident:
1254 case SPELL_IDENT:
1255 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1256 break;
1258 case SPELL_LITERAL:
1259 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1260 break;
1262 case SPELL_NONE:
1263 /* An error, most probably. */
1264 break;
1268 /* Compare two tokens. */
1270 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1272 if (a->type == b->type && a->flags == b->flags)
1273 switch (TOKEN_SPELL (a))
1275 default: /* Keep compiler happy. */
1276 case SPELL_OPERATOR:
1277 return 1;
1278 case SPELL_NONE:
1279 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1280 case SPELL_IDENT:
1281 return a->val.node == b->val.node;
1282 case SPELL_LITERAL:
1283 return (a->val.str.len == b->val.str.len
1284 && !memcmp (a->val.str.text, b->val.str.text,
1285 a->val.str.len));
1288 return 0;
1291 /* Returns nonzero if a space should be inserted to avoid an
1292 accidental token paste for output. For simplicity, it is
1293 conservative, and occasionally advises a space where one is not
1294 needed, e.g. "." and ".2". */
1296 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1297 const cpp_token *token2)
1299 enum cpp_ttype a = token1->type, b = token2->type;
1300 cppchar_t c;
1302 if (token1->flags & NAMED_OP)
1303 a = CPP_NAME;
1304 if (token2->flags & NAMED_OP)
1305 b = CPP_NAME;
1307 c = EOF;
1308 if (token2->flags & DIGRAPH)
1309 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1310 else if (token_spellings[b].category == SPELL_OPERATOR)
1311 c = token_spellings[b].name[0];
1313 /* Quickly get everything that can paste with an '='. */
1314 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1315 return 1;
1317 switch (a)
1319 case CPP_GREATER: return c == '>' || c == '?';
1320 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1321 case CPP_PLUS: return c == '+';
1322 case CPP_MINUS: return c == '-' || c == '>';
1323 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1324 case CPP_MOD: return c == ':' || c == '>';
1325 case CPP_AND: return c == '&';
1326 case CPP_OR: return c == '|';
1327 case CPP_COLON: return c == ':' || c == '>';
1328 case CPP_DEREF: return c == '*';
1329 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1330 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1331 case CPP_NAME: return ((b == CPP_NUMBER
1332 && name_p (pfile, &token2->val.str))
1333 || b == CPP_NAME
1334 || b == CPP_CHAR || b == CPP_STRING); /* L */
1335 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1336 || c == '.' || c == '+' || c == '-');
1337 /* UCNs */
1338 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1339 && b == CPP_NAME)
1340 || (CPP_OPTION (pfile, objc)
1341 && token1->val.str.text[0] == '@'
1342 && (b == CPP_NAME || b == CPP_STRING)));
1343 default: break;
1346 return 0;
1349 /* Output all the remaining tokens on the current line, and a newline
1350 character, to FP. Leading whitespace is removed. If there are
1351 macros, special token padding is not performed. */
1352 void
1353 cpp_output_line (cpp_reader *pfile, FILE *fp)
1355 const cpp_token *token;
1357 token = cpp_get_token (pfile);
1358 while (token->type != CPP_EOF)
1360 cpp_output_token (token, fp);
1361 token = cpp_get_token (pfile);
1362 if (token->flags & PREV_WHITE)
1363 putc (' ', fp);
1366 putc ('\n', fp);
1369 /* Memory buffers. Changing these three constants can have a dramatic
1370 effect on performance. The values here are reasonable defaults,
1371 but might be tuned. If you adjust them, be sure to test across a
1372 range of uses of cpplib, including heavy nested function-like macro
1373 expansion. Also check the change in peak memory usage (NJAMD is a
1374 good tool for this). */
1375 #define MIN_BUFF_SIZE 8000
1376 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1377 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1378 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1380 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1381 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1382 #endif
1384 /* Create a new allocation buffer. Place the control block at the end
1385 of the buffer, so that buffer overflows will cause immediate chaos. */
1386 static _cpp_buff *
1387 new_buff (size_t len)
1389 _cpp_buff *result;
1390 unsigned char *base;
1392 if (len < MIN_BUFF_SIZE)
1393 len = MIN_BUFF_SIZE;
1394 len = CPP_ALIGN (len);
1396 base = xmalloc (len + sizeof (_cpp_buff));
1397 result = (_cpp_buff *) (base + len);
1398 result->base = base;
1399 result->cur = base;
1400 result->limit = base + len;
1401 result->next = NULL;
1402 return result;
1405 /* Place a chain of unwanted allocation buffers on the free list. */
1406 void
1407 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1409 _cpp_buff *end = buff;
1411 while (end->next)
1412 end = end->next;
1413 end->next = pfile->free_buffs;
1414 pfile->free_buffs = buff;
1417 /* Return a free buffer of size at least MIN_SIZE. */
1418 _cpp_buff *
1419 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1421 _cpp_buff *result, **p;
1423 for (p = &pfile->free_buffs;; p = &(*p)->next)
1425 size_t size;
1427 if (*p == NULL)
1428 return new_buff (min_size);
1429 result = *p;
1430 size = result->limit - result->base;
1431 /* Return a buffer that's big enough, but don't waste one that's
1432 way too big. */
1433 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1434 break;
1437 *p = result->next;
1438 result->next = NULL;
1439 result->cur = result->base;
1440 return result;
1443 /* Creates a new buffer with enough space to hold the uncommitted
1444 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1445 the excess bytes to the new buffer. Chains the new buffer after
1446 BUFF, and returns the new buffer. */
1447 _cpp_buff *
1448 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1450 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1451 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1453 buff->next = new_buff;
1454 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1455 return new_buff;
1458 /* Creates a new buffer with enough space to hold the uncommitted
1459 remaining bytes of the buffer pointed to by BUFF, and at least
1460 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1461 Chains the new buffer before the buffer pointed to by BUFF, and
1462 updates the pointer to point to the new buffer. */
1463 void
1464 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1466 _cpp_buff *new_buff, *old_buff = *pbuff;
1467 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1469 new_buff = _cpp_get_buff (pfile, size);
1470 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1471 new_buff->next = old_buff;
1472 *pbuff = new_buff;
1475 /* Free a chain of buffers starting at BUFF. */
1476 void
1477 _cpp_free_buff (_cpp_buff *buff)
1479 _cpp_buff *next;
1481 for (; buff; buff = next)
1483 next = buff->next;
1484 free (buff->base);
1488 /* Allocate permanent, unaligned storage of length LEN. */
1489 unsigned char *
1490 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1492 _cpp_buff *buff = pfile->u_buff;
1493 unsigned char *result = buff->cur;
1495 if (len > (size_t) (buff->limit - result))
1497 buff = _cpp_get_buff (pfile, len);
1498 buff->next = pfile->u_buff;
1499 pfile->u_buff = buff;
1500 result = buff->cur;
1503 buff->cur = result + len;
1504 return result;
1507 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1508 That buffer is used for growing allocations when saving macro
1509 replacement lists in a #define, and when parsing an answer to an
1510 assertion in #assert, #unassert or #if (and therefore possibly
1511 whilst expanding macros). It therefore must not be used by any
1512 code that they might call: specifically the lexer and the guts of
1513 the macro expander.
1515 All existing other uses clearly fit this restriction: storing
1516 registered pragmas during initialization. */
1517 unsigned char *
1518 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1520 _cpp_buff *buff = pfile->a_buff;
1521 unsigned char *result = buff->cur;
1523 if (len > (size_t) (buff->limit - result))
1525 buff = _cpp_get_buff (pfile, len);
1526 buff->next = pfile->a_buff;
1527 pfile->a_buff = buff;
1528 result = buff->cur;
1531 buff->cur = result + len;
1532 return result;